From 5165238460068e53c740eaa621ebb6623dc4a50d Mon Sep 17 00:00:00 2001
From: "Zhu, Lejun" <lejun.zhu@linux.intel.com>
Date: Tue, 3 Jun 2014 13:26:02 +0800
Subject: mfd: intel_soc_pmic: Core driver

This patch provides the common I2C driver code for Intel SoC PMICs.

Signed-off-by: Yang, Bin <bin.yang@intel.com>
Signed-off-by: Zhu, Lejun <lejun.zhu@linux.intel.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/intel_soc_pmic.h | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)
 create mode 100644 include/linux/mfd/intel_soc_pmic.h

(limited to 'include/linux')

diff --git a/include/linux/mfd/intel_soc_pmic.h b/include/linux/mfd/intel_soc_pmic.h
new file mode 100644
index 000000000000..abcbfcf32d10
--- /dev/null
+++ b/include/linux/mfd/intel_soc_pmic.h
@@ -0,0 +1,30 @@
+/*
+ * intel_soc_pmic.h - Intel SoC PMIC Driver
+ *
+ * Copyright (C) 2012-2014 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Author: Yang, Bin <bin.yang@intel.com>
+ * Author: Zhu, Lejun <lejun.zhu@linux.intel.com>
+ */
+
+#ifndef __INTEL_SOC_PMIC_H__
+#define __INTEL_SOC_PMIC_H__
+
+#include <linux/regmap.h>
+
+struct intel_soc_pmic {
+	int irq;
+	struct regmap *regmap;
+	struct regmap_irq_chip_data *irq_chip_data;
+};
+
+#endif	/* __INTEL_SOC_PMIC_H__ */
-- 
cgit v1.2.3


From f15a5cf912f05b572d1f9f3772fba019643f4837 Mon Sep 17 00:00:00 2001
From: Kinglong Mee <kinglongmee@gmail.com>
Date: Tue, 10 Jun 2014 18:29:39 +0800
Subject: SUNRPC/NFSD: Change to type of bool for rq_usedeferral and
 rq_splice_ok

rq_usedeferral and rq_splice_ok are used as 0 and 1, just defined to bool.

Signed-off-by: Kinglong Mee <kinglongmee@gmail.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4proc.c                | 4 ++--
 include/linux/sunrpc/svc.h        | 4 ++--
 net/sunrpc/auth_gss/svcauth_gss.c | 2 +-
 net/sunrpc/svc.c                  | 4 ++--
 4 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index baa3803f0811..be6734060d2a 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1298,7 +1298,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
 	 * Don't use the deferral mechanism for NFSv4; compounds make it
 	 * too hard to avoid non-idempotency problems.
 	 */
-	rqstp->rq_usedeferral = 0;
+	rqstp->rq_usedeferral = false;
 
 	/*
 	 * According to RFC3010, this takes precedence over all other errors.
@@ -1417,7 +1417,7 @@ encode_op:
 	BUG_ON(cstate->replay_owner);
 out:
 	/* Reset deferral mechanism for RPC deferrals */
-	rqstp->rq_usedeferral = 1;
+	rqstp->rq_usedeferral = true;
 	dprintk("nfsv4 compound returned %d\n", ntohl(status));
 	return status;
 }
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 1bc7cd05b22e..cf61ecd148e0 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -236,7 +236,7 @@ struct svc_rqst {
 	struct svc_cred		rq_cred;	/* auth info */
 	void *			rq_xprt_ctxt;	/* transport specific context ptr */
 	struct svc_deferred_req*rq_deferred;	/* deferred request we are replaying */
-	int			rq_usedeferral;	/* use deferral */
+	bool			rq_usedeferral;	/* use deferral */
 
 	size_t			rq_xprt_hlen;	/* xprt header len */
 	struct xdr_buf		rq_arg;
@@ -277,7 +277,7 @@ struct svc_rqst {
 	struct auth_domain *	rq_gssclient;	/* "gss/"-style peer info */
 	int			rq_cachetype;
 	struct svc_cacherep *	rq_cacherep;	/* cache info */
-	int			rq_splice_ok;   /* turned off in gss privacy
+	bool			rq_splice_ok;   /* turned off in gss privacy
 						 * to prevent encrypting page
 						 * cache pages */
 	wait_queue_head_t	rq_wait;	/* synchronization */
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 4ce5eccec1f6..c548ab213f76 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -886,7 +886,7 @@ unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gs
 	u32 priv_len, maj_stat;
 	int pad, saved_len, remaining_len, offset;
 
-	rqstp->rq_splice_ok = 0;
+	rqstp->rq_splice_ok = false;
 
 	priv_len = svc_getnl(&buf->head[0]);
 	if (rqstp->rq_deferred) {
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 5de6801cd924..1db5007ddbce 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1086,9 +1086,9 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 		goto err_short_len;
 
 	/* Will be turned off only in gss privacy case: */
-	rqstp->rq_splice_ok = 1;
+	rqstp->rq_splice_ok = true;
 	/* Will be turned off only when NFSv4 Sessions are used */
-	rqstp->rq_usedeferral = 1;
+	rqstp->rq_usedeferral = true;
 	rqstp->rq_dropme = false;
 
 	/* Setup reply header */
-- 
cgit v1.2.3


From 1e7f3a485922211b6e4a082ebc6bf05810b0b6ea Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@primarydata.com>
Date: Mon, 9 Jun 2014 11:48:33 -0400
Subject: nfs: move nfs_pgio_data and remove nfs_rw_header

nfs_rw_header was used to allocate an nfs_pgio_header along with an
nfs_pgio_data, because a _header would need at least one _data.

Now there is only ever one nfs_pgio_data for each nfs_pgio_header -- move
it to nfs_pgio_header and get rid of nfs_rw_header.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/direct.c          |  8 ++---
 fs/nfs/internal.h        |  6 ++--
 fs/nfs/pagelist.c        | 94 +++++++++++++++---------------------------------
 fs/nfs/pnfs.c            | 24 ++++++-------
 fs/nfs/read.c            |  6 ++--
 fs/nfs/write.c           | 10 +++---
 include/linux/nfs_page.h |  4 +--
 include/linux/nfs_xdr.h  | 38 +++++++++-----------
 8 files changed, 71 insertions(+), 119 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 8f98138cbc43..179de67ca907 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -148,8 +148,8 @@ static void nfs_direct_set_hdr_verf(struct nfs_direct_req *dreq,
 {
 	struct nfs_writeverf *verfp;
 
-	verfp = nfs_direct_select_verf(dreq, hdr->data->ds_clp,
-				      hdr->data->ds_idx);
+	verfp = nfs_direct_select_verf(dreq, hdr->data.ds_clp,
+				      hdr->data.ds_idx);
 	WARN_ON_ONCE(verfp->committed >= 0);
 	memcpy(verfp, &hdr->verf, sizeof(struct nfs_writeverf));
 	WARN_ON_ONCE(verfp->committed < 0);
@@ -169,8 +169,8 @@ static int nfs_direct_set_or_cmp_hdr_verf(struct nfs_direct_req *dreq,
 {
 	struct nfs_writeverf *verfp;
 
-	verfp = nfs_direct_select_verf(dreq, hdr->data->ds_clp,
-					 hdr->data->ds_idx);
+	verfp = nfs_direct_select_verf(dreq, hdr->data.ds_clp,
+					 hdr->data.ds_idx);
 	if (verfp->committed < 0) {
 		nfs_direct_set_hdr_verf(dreq, hdr);
 		return 0;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 82ddbf46660e..5cda049c8f9b 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -238,9 +238,9 @@ void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos);
 int nfs_iocounter_wait(struct nfs_io_counter *c);
 
 extern const struct nfs_pageio_ops nfs_pgio_rw_ops;
-struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *);
-void nfs_rw_header_free(struct nfs_pgio_header *);
-void nfs_pgio_data_release(struct nfs_pgio_data *);
+struct nfs_pgio_header *nfs_pgio_header_alloc(const struct nfs_rw_ops *);
+void nfs_pgio_header_free(struct nfs_pgio_header *);
+void nfs_pgio_data_destroy(struct nfs_pgio_data *);
 int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *);
 int nfs_initiate_pgio(struct rpc_clnt *, struct nfs_pgio_data *,
 		      const struct rpc_call_ops *, int, int);
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index b6ee3a6ee96d..e4cde476562f 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -452,95 +452,61 @@ size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc,
 }
 EXPORT_SYMBOL_GPL(nfs_generic_pg_test);
 
-static inline struct nfs_rw_header *NFS_RW_HEADER(struct nfs_pgio_header *hdr)
+struct nfs_pgio_header *nfs_pgio_header_alloc(const struct nfs_rw_ops *ops)
 {
-	return container_of(hdr, struct nfs_rw_header, header);
-}
-
-/**
- * nfs_rw_header_alloc - Allocate a header for a read or write
- * @ops: Read or write function vector
- */
-struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *ops)
-{
-	struct nfs_rw_header *header = ops->rw_alloc_header();
-
-	if (header) {
-		struct nfs_pgio_header *hdr = &header->header;
+	struct nfs_pgio_header *hdr = ops->rw_alloc_header();
 
+	if (hdr) {
 		INIT_LIST_HEAD(&hdr->pages);
 		spin_lock_init(&hdr->lock);
 		atomic_set(&hdr->refcnt, 0);
 		hdr->rw_ops = ops;
 	}
-	return header;
+	return hdr;
 }
-EXPORT_SYMBOL_GPL(nfs_rw_header_alloc);
+EXPORT_SYMBOL_GPL(nfs_pgio_header_alloc);
 
 /*
- * nfs_rw_header_free - Free a read or write header
+ * nfs_pgio_header_free - Free a read or write header
  * @hdr: The header to free
  */
-void nfs_rw_header_free(struct nfs_pgio_header *hdr)
+void nfs_pgio_header_free(struct nfs_pgio_header *hdr)
 {
-	hdr->rw_ops->rw_free_header(NFS_RW_HEADER(hdr));
+	hdr->rw_ops->rw_free_header(hdr);
 }
-EXPORT_SYMBOL_GPL(nfs_rw_header_free);
+EXPORT_SYMBOL_GPL(nfs_pgio_header_free);
 
 /**
  * nfs_pgio_data_alloc - Allocate pageio data
  * @hdr: The header making a request
  * @pagecount: Number of pages to create
  */
-static struct nfs_pgio_data *nfs_pgio_data_alloc(struct nfs_pgio_header *hdr,
-						 unsigned int pagecount)
+static bool nfs_pgio_data_init(struct nfs_pgio_header *hdr,
+			       unsigned int pagecount)
 {
-	struct nfs_pgio_data *data, *prealloc;
-
-	prealloc = &NFS_RW_HEADER(hdr)->rpc_data;
-	if (prealloc->header == NULL)
-		data = prealloc;
-	else
-		data = kzalloc(sizeof(*data), GFP_KERNEL);
-	if (!data)
-		goto out;
-
-	if (nfs_pgarray_set(&data->pages, pagecount)) {
-		data->header = hdr;
+	if (nfs_pgarray_set(&hdr->data.pages, pagecount)) {
+		hdr->data.header = hdr;
 		atomic_inc(&hdr->refcnt);
-	} else {
-		if (data != prealloc)
-			kfree(data);
-		data = NULL;
+		return true;
 	}
-out:
-	return data;
+	return false;
 }
 
 /**
- * nfs_pgio_data_release - Properly free pageio data
- * @data: The data to release
+ * nfs_pgio_data_destroy - Properly free pageio data
+ * @data: The data to destroy
  */
-void nfs_pgio_data_release(struct nfs_pgio_data *data)
+void nfs_pgio_data_destroy(struct nfs_pgio_data *data)
 {
 	struct nfs_pgio_header *hdr = data->header;
-	struct nfs_rw_header *pageio_header = NFS_RW_HEADER(hdr);
 
 	put_nfs_open_context(data->args.context);
 	if (data->pages.pagevec != data->pages.page_array)
 		kfree(data->pages.pagevec);
-	if (data == &pageio_header->rpc_data) {
-		data->header = NULL;
-		data = NULL;
-	}
 	if (atomic_dec_and_test(&hdr->refcnt))
 		hdr->completion_ops->completion(hdr);
-	/* Note: we only free the rpc_task after callbacks are done.
-	 * See the comment in rpc_free_task() for why
-	 */
-	kfree(data);
 }
-EXPORT_SYMBOL_GPL(nfs_pgio_data_release);
+EXPORT_SYMBOL_GPL(nfs_pgio_data_destroy);
 
 /**
  * nfs_pgio_rpcsetup - Set up arguments for a pageio call
@@ -655,8 +621,7 @@ static int nfs_pgio_error(struct nfs_pageio_descriptor *desc,
 			  struct nfs_pgio_header *hdr)
 {
 	set_bit(NFS_IOHDR_REDO, &hdr->flags);
-	nfs_pgio_data_release(hdr->data);
-	hdr->data = NULL;
+	nfs_pgio_data_destroy(&hdr->data);
 	desc->pg_completion_ops->error_cleanup(&desc->pg_list);
 	return -ENOMEM;
 }
@@ -670,7 +635,7 @@ static void nfs_pgio_release(void *calldata)
 	struct nfs_pgio_data *data = calldata;
 	if (data->header->rw_ops->rw_release)
 		data->header->rw_ops->rw_release(data);
-	nfs_pgio_data_release(data);
+	nfs_pgio_data_destroy(data);
 }
 
 /**
@@ -746,11 +711,11 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
 	struct list_head *head = &desc->pg_list;
 	struct nfs_commit_info cinfo;
 
-	data = nfs_pgio_data_alloc(hdr, nfs_page_array_len(desc->pg_base,
-							   desc->pg_count));
-	if (!data)
+	if (!nfs_pgio_data_init(hdr, nfs_page_array_len(desc->pg_base,
+			   desc->pg_count)))
 		return nfs_pgio_error(desc, hdr);
 
+	data = &hdr->data;
 	nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
 	pages = data->pages.pagevec;
 	while (!list_empty(head)) {
@@ -766,7 +731,6 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
 
 	/* Set up the argument struct */
 	nfs_pgio_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo);
-	hdr->data = data;
 	desc->pg_rpc_callops = &nfs_pgio_common_ops;
 	return 0;
 }
@@ -774,22 +738,20 @@ EXPORT_SYMBOL_GPL(nfs_generic_pgio);
 
 static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc)
 {
-	struct nfs_rw_header *rw_hdr;
 	struct nfs_pgio_header *hdr;
 	int ret;
 
-	rw_hdr = nfs_rw_header_alloc(desc->pg_rw_ops);
-	if (!rw_hdr) {
+	hdr = nfs_pgio_header_alloc(desc->pg_rw_ops);
+	if (!hdr) {
 		desc->pg_completion_ops->error_cleanup(&desc->pg_list);
 		return -ENOMEM;
 	}
-	hdr = &rw_hdr->header;
-	nfs_pgheader_init(desc, hdr, nfs_rw_header_free);
+	nfs_pgheader_init(desc, hdr, nfs_pgio_header_free);
 	atomic_inc(&hdr->refcnt);
 	ret = nfs_generic_pgio(desc, hdr);
 	if (ret == 0)
 		ret = nfs_initiate_pgio(NFS_CLIENT(hdr->inode),
-					hdr->data, desc->pg_rpc_callops,
+					&hdr->data, desc->pg_rpc_callops,
 					desc->pg_ioflags, 0);
 	if (atomic_dec_and_test(&hdr->refcnt))
 		hdr->completion_ops->completion(hdr);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 6fdcd233d6f7..067104cce181 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1546,7 +1546,7 @@ pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
 		nfs_pageio_reset_write_mds(desc);
 		desc->pg_recoalesce = 1;
 	}
-	nfs_pgio_data_release(data);
+	nfs_pgio_data_destroy(data);
 }
 
 static enum pnfs_try_status
@@ -1575,7 +1575,7 @@ static void
 pnfs_do_write(struct nfs_pageio_descriptor *desc,
 	      struct nfs_pgio_header *hdr, int how)
 {
-	struct nfs_pgio_data *data = hdr->data;
+	struct nfs_pgio_data *data = &hdr->data;
 	const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
 	struct pnfs_layout_segment *lseg = desc->pg_lseg;
 	enum pnfs_try_status trypnfs;
@@ -1590,25 +1590,23 @@ pnfs_do_write(struct nfs_pageio_descriptor *desc,
 static void pnfs_writehdr_free(struct nfs_pgio_header *hdr)
 {
 	pnfs_put_lseg(hdr->lseg);
-	nfs_rw_header_free(hdr);
+	nfs_pgio_header_free(hdr);
 }
 EXPORT_SYMBOL_GPL(pnfs_writehdr_free);
 
 int
 pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
 {
-	struct nfs_rw_header *whdr;
 	struct nfs_pgio_header *hdr;
 	int ret;
 
-	whdr = nfs_rw_header_alloc(desc->pg_rw_ops);
-	if (!whdr) {
+	hdr = nfs_pgio_header_alloc(desc->pg_rw_ops);
+	if (!hdr) {
 		desc->pg_completion_ops->error_cleanup(&desc->pg_list);
 		pnfs_put_lseg(desc->pg_lseg);
 		desc->pg_lseg = NULL;
 		return -ENOMEM;
 	}
-	hdr = &whdr->header;
 	nfs_pgheader_init(desc, hdr, pnfs_writehdr_free);
 	hdr->lseg = pnfs_get_lseg(desc->pg_lseg);
 	atomic_inc(&hdr->refcnt);
@@ -1696,7 +1694,7 @@ pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
 		nfs_pageio_reset_read_mds(desc);
 		desc->pg_recoalesce = 1;
 	}
-	nfs_pgio_data_release(data);
+	nfs_pgio_data_destroy(data);
 }
 
 /*
@@ -1727,7 +1725,7 @@ pnfs_try_to_read_data(struct nfs_pgio_data *rdata,
 static void
 pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr)
 {
-	struct nfs_pgio_data *data = hdr->data;
+	struct nfs_pgio_data *data = &hdr->data;
 	const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
 	struct pnfs_layout_segment *lseg = desc->pg_lseg;
 	enum pnfs_try_status trypnfs;
@@ -1742,26 +1740,24 @@ pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr)
 static void pnfs_readhdr_free(struct nfs_pgio_header *hdr)
 {
 	pnfs_put_lseg(hdr->lseg);
-	nfs_rw_header_free(hdr);
+	nfs_pgio_header_free(hdr);
 }
 EXPORT_SYMBOL_GPL(pnfs_readhdr_free);
 
 int
 pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
 {
-	struct nfs_rw_header *rhdr;
 	struct nfs_pgio_header *hdr;
 	int ret;
 
-	rhdr = nfs_rw_header_alloc(desc->pg_rw_ops);
-	if (!rhdr) {
+	hdr = nfs_pgio_header_alloc(desc->pg_rw_ops);
+	if (!hdr) {
 		desc->pg_completion_ops->error_cleanup(&desc->pg_list);
 		ret = -ENOMEM;
 		pnfs_put_lseg(desc->pg_lseg);
 		desc->pg_lseg = NULL;
 		return ret;
 	}
-	hdr = &rhdr->header;
 	nfs_pgheader_init(desc, hdr, pnfs_readhdr_free);
 	hdr->lseg = pnfs_get_lseg(desc->pg_lseg);
 	atomic_inc(&hdr->refcnt);
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index e818a475ca64..d9df4ab3737b 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -33,12 +33,12 @@ static const struct nfs_rw_ops nfs_rw_read_ops;
 
 static struct kmem_cache *nfs_rdata_cachep;
 
-static struct nfs_rw_header *nfs_readhdr_alloc(void)
+static struct nfs_pgio_header *nfs_readhdr_alloc(void)
 {
 	return kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL);
 }
 
-static void nfs_readhdr_free(struct nfs_rw_header *rhdr)
+static void nfs_readhdr_free(struct nfs_pgio_header *rhdr)
 {
 	kmem_cache_free(nfs_rdata_cachep, rhdr);
 }
@@ -404,7 +404,7 @@ out:
 int __init nfs_init_readpagecache(void)
 {
 	nfs_rdata_cachep = kmem_cache_create("nfs_read_data",
-					     sizeof(struct nfs_rw_header),
+					     sizeof(struct nfs_pgio_header),
 					     0, SLAB_HWCACHE_ALIGN,
 					     NULL);
 	if (nfs_rdata_cachep == NULL)
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 98ff061ccaf3..d694952f0071 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -70,18 +70,18 @@ void nfs_commit_free(struct nfs_commit_data *p)
 }
 EXPORT_SYMBOL_GPL(nfs_commit_free);
 
-static struct nfs_rw_header *nfs_writehdr_alloc(void)
+static struct nfs_pgio_header *nfs_writehdr_alloc(void)
 {
-	struct nfs_rw_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO);
+	struct nfs_pgio_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO);
 
 	if (p)
 		memset(p, 0, sizeof(*p));
 	return p;
 }
 
-static void nfs_writehdr_free(struct nfs_rw_header *whdr)
+static void nfs_writehdr_free(struct nfs_pgio_header *hdr)
 {
-	mempool_free(whdr, nfs_wdata_mempool);
+	mempool_free(hdr, nfs_wdata_mempool);
 }
 
 static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
@@ -1655,7 +1655,7 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
 int __init nfs_init_writepagecache(void)
 {
 	nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
-					     sizeof(struct nfs_rw_header),
+					     sizeof(struct nfs_pgio_header),
 					     0, SLAB_HWCACHE_ALIGN,
 					     NULL);
 	if (nfs_wdata_cachep == NULL)
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 7d9096d95d4a..43592651cd5a 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -62,8 +62,8 @@ struct nfs_pageio_ops {
 
 struct nfs_rw_ops {
 	const fmode_t rw_mode;
-	struct nfs_rw_header *(*rw_alloc_header)(void);
-	void (*rw_free_header)(struct nfs_rw_header *);
+	struct nfs_pgio_header *(*rw_alloc_header)(void);
+	void (*rw_free_header)(struct nfs_pgio_header *);
 	void (*rw_release)(struct nfs_pgio_data *);
 	int  (*rw_done)(struct rpc_task *, struct nfs_pgio_data *, struct inode *);
 	void (*rw_result)(struct rpc_task *, struct nfs_pgio_data *);
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 9a1396e70310..e1c9437e8aac 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1257,13 +1257,27 @@ enum {
 	NFS_IOHDR_NEED_RESCHED,
 };
 
-struct nfs_pgio_data;
+struct nfs_pgio_data {
+	struct nfs_pgio_header	*header;
+	struct list_head	list;
+	struct rpc_task		task;
+	struct nfs_fattr	fattr;
+	struct nfs_writeverf	verf;		/* Used for writes */
+	struct nfs_pgio_args	args;		/* argument struct */
+	struct nfs_pgio_res	res;		/* result struct */
+	unsigned long		timestamp;	/* For lease renewal */
+	int (*pgio_done_cb)(struct rpc_task *task, struct nfs_pgio_data *data);
+	__u64			mds_offset;	/* Filelayout dense stripe */
+	struct nfs_page_array	pages;
+	struct nfs_client	*ds_clp;	/* pNFS data server */
+	int			ds_idx;		/* ds index if ds_clp is set */
+};
 
 struct nfs_pgio_header {
 	struct inode		*inode;
 	struct rpc_cred		*cred;
 	struct list_head	pages;
-	struct nfs_pgio_data	*data;
+	struct nfs_pgio_data	data;
 	atomic_t		refcnt;
 	struct nfs_page		*req;
 	struct nfs_writeverf	verf;		/* Used for writes */
@@ -1283,26 +1297,6 @@ struct nfs_pgio_header {
 	unsigned long		flags;
 };
 
-struct nfs_pgio_data {
-	struct nfs_pgio_header	*header;
-	struct rpc_task		task;
-	struct nfs_fattr	fattr;
-	struct nfs_writeverf	verf;		/* Used for writes */
-	struct nfs_pgio_args	args;		/* argument struct */
-	struct nfs_pgio_res	res;		/* result struct */
-	unsigned long		timestamp;	/* For lease renewal */
-	int (*pgio_done_cb) (struct rpc_task *task, struct nfs_pgio_data *data);
-	__u64			mds_offset;	/* Filelayout dense stripe */
-	struct nfs_page_array	pages;
-	struct nfs_client	*ds_clp;	/* pNFS data server */
-	int			ds_idx;		/* ds index if ds_clp is set */
-};
-
-struct nfs_rw_header {
-	struct nfs_pgio_header	header;
-	struct nfs_pgio_data	rpc_data;
-};
-
 struct nfs_mds_commit_info {
 	atomic_t rpcs_out;
 	unsigned long		ncommit;
-- 
cgit v1.2.3


From 823b0c9d9800e712374cda89ac3565bd29f6701b Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@primarydata.com>
Date: Mon, 9 Jun 2014 11:48:34 -0400
Subject: nfs: rename members of nfs_pgio_data

Rename "verf" to "writeverf" and "pages" to "page_array" to prepare for
merge of nfs_pgio_data and nfs_pgio_header.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/blocklayout/blocklayout.c | 17 ++++++++++-------
 fs/nfs/objlayout/objlayout.c     |  4 ++--
 fs/nfs/pagelist.c                | 12 ++++++------
 fs/nfs/write.c                   |  9 +++++----
 include/linux/nfs_xdr.h          |  4 ++--
 5 files changed, 25 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 9b431f44fad9..36b01cef849e 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -258,7 +258,8 @@ bl_read_pagelist(struct nfs_pgio_data *rdata)
 	const bool is_dio = (header->dreq != NULL);
 
 	dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__,
-	       rdata->pages.npages, f_offset, (unsigned int)rdata->args.count);
+		rdata->page_array.npages, f_offset,
+		(unsigned int)rdata->args.count);
 
 	par = alloc_parallel(rdata);
 	if (!par)
@@ -268,7 +269,7 @@ bl_read_pagelist(struct nfs_pgio_data *rdata)
 
 	isect = (sector_t) (f_offset >> SECTOR_SHIFT);
 	/* Code assumes extents are page-aligned */
-	for (i = pg_index; i < rdata->pages.npages; i++) {
+	for (i = pg_index; i < rdata->page_array.npages; i++) {
 		if (!extent_length) {
 			/* We've used up the previous extent */
 			bl_put_extent(be);
@@ -317,7 +318,8 @@ bl_read_pagelist(struct nfs_pgio_data *rdata)
 			struct pnfs_block_extent *be_read;
 
 			be_read = (hole && cow_read) ? cow_read : be;
-			bio = do_add_page_to_bio(bio, rdata->pages.npages - i,
+			bio = do_add_page_to_bio(bio,
+						 rdata->page_array.npages - i,
 						 READ,
 						 isect, pages[i], be_read,
 						 bl_end_io_read, par,
@@ -446,7 +448,7 @@ static void bl_end_par_io_write(void *data, int num_se)
 	}
 
 	wdata->task.tk_status = wdata->header->pnfs_error;
-	wdata->verf.committed = NFS_FILE_SYNC;
+	wdata->writeverf.committed = NFS_FILE_SYNC;
 	INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup);
 	schedule_work(&wdata->task.u.tk_work);
 }
@@ -699,7 +701,7 @@ bl_write_pagelist(struct nfs_pgio_data *wdata, int sync)
 		dprintk("pnfsblock nonblock aligned DIO writes. Resend MDS\n");
 		goto out_mds;
 	}
-	/* At this point, wdata->pages is a (sequential) list of nfs_pages.
+	/* At this point, wdata->page_aray is a (sequential) list of nfs_pages.
 	 * We want to write each, and if there is an error set pnfs_error
 	 * to have it redone using nfs.
 	 */
@@ -791,7 +793,7 @@ next_page:
 
 	/* Middle pages */
 	pg_index = wdata->args.pgbase >> PAGE_CACHE_SHIFT;
-	for (i = pg_index; i < wdata->pages.npages; i++) {
+	for (i = pg_index; i < wdata->page_array.npages; i++) {
 		if (!extent_length) {
 			/* We've used up the previous extent */
 			bl_put_extent(be);
@@ -862,7 +864,8 @@ next_page:
 		}
 
 
-		bio = do_add_page_to_bio(bio, wdata->pages.npages - i, WRITE,
+		bio = do_add_page_to_bio(bio, wdata->page_array.npages - i,
+					 WRITE,
 					 isect, pages[i], be,
 					 bl_end_io_write, par,
 					 pg_offset, pg_len);
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c
index 765d3f54e986..31fed91a8bac 100644
--- a/fs/nfs/objlayout/objlayout.c
+++ b/fs/nfs/objlayout/objlayout.c
@@ -329,7 +329,7 @@ objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
 	oir->status = wdata->task.tk_status = status;
 	if (status >= 0) {
 		wdata->res.count = status;
-		wdata->verf.committed = oir->committed;
+		wdata->writeverf.committed = oir->committed;
 	} else {
 		wdata->header->pnfs_error = status;
 	}
@@ -337,7 +337,7 @@ objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
 	/* must not use oir after this point */
 
 	dprintk("%s: Return status %zd committed %d sync=%d\n", __func__,
-		status, wdata->verf.committed, sync);
+		status, wdata->writeverf.committed, sync);
 
 	if (sync)
 		pnfs_ld_write_done(wdata);
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index e4cde476562f..5e70918f6c95 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -484,7 +484,7 @@ EXPORT_SYMBOL_GPL(nfs_pgio_header_free);
 static bool nfs_pgio_data_init(struct nfs_pgio_header *hdr,
 			       unsigned int pagecount)
 {
-	if (nfs_pgarray_set(&hdr->data.pages, pagecount)) {
+	if (nfs_pgarray_set(&hdr->data.page_array, pagecount)) {
 		hdr->data.header = hdr;
 		atomic_inc(&hdr->refcnt);
 		return true;
@@ -501,8 +501,8 @@ void nfs_pgio_data_destroy(struct nfs_pgio_data *data)
 	struct nfs_pgio_header *hdr = data->header;
 
 	put_nfs_open_context(data->args.context);
-	if (data->pages.pagevec != data->pages.page_array)
-		kfree(data->pages.pagevec);
+	if (data->page_array.pagevec != data->page_array.page_array)
+		kfree(data->page_array.pagevec);
 	if (atomic_dec_and_test(&hdr->refcnt))
 		hdr->completion_ops->completion(hdr);
 }
@@ -530,7 +530,7 @@ static void nfs_pgio_rpcsetup(struct nfs_pgio_data *data,
 	/* pnfs_set_layoutcommit needs this */
 	data->mds_offset = data->args.offset;
 	data->args.pgbase = req->wb_pgbase + offset;
-	data->args.pages  = data->pages.pagevec;
+	data->args.pages  = data->page_array.pagevec;
 	data->args.count  = count;
 	data->args.context = get_nfs_open_context(req->wb_context);
 	data->args.lock_context = req->wb_lock_context;
@@ -548,7 +548,7 @@ static void nfs_pgio_rpcsetup(struct nfs_pgio_data *data,
 	data->res.fattr   = &data->fattr;
 	data->res.count   = count;
 	data->res.eof     = 0;
-	data->res.verf    = &data->verf;
+	data->res.verf    = &data->writeverf;
 	nfs_fattr_init(&data->fattr);
 }
 
@@ -717,7 +717,7 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
 
 	data = &hdr->data;
 	nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
-	pages = data->pages.pagevec;
+	pages = data->page_array.pagevec;
 	while (!list_empty(head)) {
 		req = nfs_list_entry(head->next);
 		nfs_list_remove_request(req);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index d694952f0071..6afe0f679420 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -598,9 +598,9 @@ nfs_clear_request_commit(struct nfs_page *req)
 static inline
 int nfs_write_need_commit(struct nfs_pgio_data *data)
 {
-	if (data->verf.committed == NFS_DATA_SYNC)
+	if (data->writeverf.committed == NFS_DATA_SYNC)
 		return data->header->lseg == NULL;
-	return data->verf.committed != NFS_FILE_SYNC;
+	return data->writeverf.committed != NFS_FILE_SYNC;
 }
 
 #else
@@ -1095,8 +1095,9 @@ static void nfs_writeback_release_common(struct nfs_pgio_data *data)
 		if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags))
 			; /* Do nothing */
 		else if (!test_and_set_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags))
-			memcpy(&hdr->verf, &data->verf, sizeof(hdr->verf));
-		else if (memcmp(&hdr->verf, &data->verf, sizeof(hdr->verf)))
+			memcpy(&hdr->verf, &data->writeverf, sizeof(hdr->verf));
+		else if (memcmp(&hdr->verf, &data->writeverf,
+			 sizeof(hdr->verf)))
 			set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags);
 		spin_unlock(&hdr->lock);
 	}
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index e1c9437e8aac..bb18dba1aefe 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1262,13 +1262,13 @@ struct nfs_pgio_data {
 	struct list_head	list;
 	struct rpc_task		task;
 	struct nfs_fattr	fattr;
-	struct nfs_writeverf	verf;		/* Used for writes */
+	struct nfs_writeverf	writeverf;	/* Used for writes */
 	struct nfs_pgio_args	args;		/* argument struct */
 	struct nfs_pgio_res	res;		/* result struct */
 	unsigned long		timestamp;	/* For lease renewal */
 	int (*pgio_done_cb)(struct rpc_task *task, struct nfs_pgio_data *data);
 	__u64			mds_offset;	/* Filelayout dense stripe */
-	struct nfs_page_array	pages;
+	struct nfs_page_array	page_array;
 	struct nfs_client	*ds_clp;	/* pNFS data server */
 	int			ds_idx;		/* ds index if ds_clp is set */
 };
-- 
cgit v1.2.3


From d45f60c67848b9f19160692581d78e5b4757a000 Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@primarydata.com>
Date: Mon, 9 Jun 2014 11:48:35 -0400
Subject: nfs: merge nfs_pgio_data into _header

struct nfs_pgio_data only exists as a member of nfs_pgio_header, but is
passed around everywhere, because there used to be multiple _data structs
per _header. Many of these functions then use the _data to find a pointer
to the _header.  This patch cleans this up by merging the nfs_pgio_data
structure into nfs_pgio_header and passing nfs_pgio_header around instead.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/blocklayout/blocklayout.c |  98 +++++++++++-----------
 fs/nfs/direct.c                  |   8 +-
 fs/nfs/filelayout/filelayout.c   | 170 +++++++++++++++++++--------------------
 fs/nfs/internal.h                |   6 +-
 fs/nfs/nfs3proc.c                |  21 ++---
 fs/nfs/nfs4_fs.h                 |   6 +-
 fs/nfs/nfs4proc.c                | 105 ++++++++++++------------
 fs/nfs/nfs4trace.h               |  28 +++----
 fs/nfs/objlayout/objio_osd.c     |  24 +++---
 fs/nfs/objlayout/objlayout.c     |  81 +++++++++----------
 fs/nfs/objlayout/objlayout.h     |   8 +-
 fs/nfs/pagelist.c                | 120 +++++++++++++--------------
 fs/nfs/pnfs.c                    |  80 ++++++++----------
 fs/nfs/pnfs.h                    |  10 +--
 fs/nfs/proc.c                    |  27 ++++---
 fs/nfs/read.c                    |  42 +++++-----
 fs/nfs/write.c                   |  56 ++++++-------
 include/linux/nfs_page.h         |   9 ++-
 include/linux/nfs_xdr.h          |  43 +++++-----
 19 files changed, 460 insertions(+), 482 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 36b01cef849e..c3ccfe440390 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -210,8 +210,7 @@ static void bl_end_io_read(struct bio *bio, int err)
 			SetPageUptodate(bvec->bv_page);
 
 	if (err) {
-		struct nfs_pgio_data *rdata = par->data;
-		struct nfs_pgio_header *header = rdata->header;
+		struct nfs_pgio_header *header = par->data;
 
 		if (!header->pnfs_error)
 			header->pnfs_error = -EIO;
@@ -224,44 +223,44 @@ static void bl_end_io_read(struct bio *bio, int err)
 static void bl_read_cleanup(struct work_struct *work)
 {
 	struct rpc_task *task;
-	struct nfs_pgio_data *rdata;
+	struct nfs_pgio_header *hdr;
 	dprintk("%s enter\n", __func__);
 	task = container_of(work, struct rpc_task, u.tk_work);
-	rdata = container_of(task, struct nfs_pgio_data, task);
-	pnfs_ld_read_done(rdata);
+	hdr = container_of(task, struct nfs_pgio_header, task);
+	pnfs_ld_read_done(hdr);
 }
 
 static void
 bl_end_par_io_read(void *data, int unused)
 {
-	struct nfs_pgio_data *rdata = data;
+	struct nfs_pgio_header *hdr = data;
 
-	rdata->task.tk_status = rdata->header->pnfs_error;
-	INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup);
-	schedule_work(&rdata->task.u.tk_work);
+	hdr->task.tk_status = hdr->pnfs_error;
+	INIT_WORK(&hdr->task.u.tk_work, bl_read_cleanup);
+	schedule_work(&hdr->task.u.tk_work);
 }
 
 static enum pnfs_try_status
-bl_read_pagelist(struct nfs_pgio_data *rdata)
+bl_read_pagelist(struct nfs_pgio_header *hdr)
 {
-	struct nfs_pgio_header *header = rdata->header;
+	struct nfs_pgio_header *header = hdr;
 	int i, hole;
 	struct bio *bio = NULL;
 	struct pnfs_block_extent *be = NULL, *cow_read = NULL;
 	sector_t isect, extent_length = 0;
 	struct parallel_io *par;
-	loff_t f_offset = rdata->args.offset;
-	size_t bytes_left = rdata->args.count;
+	loff_t f_offset = hdr->args.offset;
+	size_t bytes_left = hdr->args.count;
 	unsigned int pg_offset, pg_len;
-	struct page **pages = rdata->args.pages;
-	int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT;
+	struct page **pages = hdr->args.pages;
+	int pg_index = hdr->args.pgbase >> PAGE_CACHE_SHIFT;
 	const bool is_dio = (header->dreq != NULL);
 
 	dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__,
-		rdata->page_array.npages, f_offset,
-		(unsigned int)rdata->args.count);
+		hdr->page_array.npages, f_offset,
+		(unsigned int)hdr->args.count);
 
-	par = alloc_parallel(rdata);
+	par = alloc_parallel(hdr);
 	if (!par)
 		goto use_mds;
 	par->pnfs_callback = bl_end_par_io_read;
@@ -269,7 +268,7 @@ bl_read_pagelist(struct nfs_pgio_data *rdata)
 
 	isect = (sector_t) (f_offset >> SECTOR_SHIFT);
 	/* Code assumes extents are page-aligned */
-	for (i = pg_index; i < rdata->page_array.npages; i++) {
+	for (i = pg_index; i < hdr->page_array.npages; i++) {
 		if (!extent_length) {
 			/* We've used up the previous extent */
 			bl_put_extent(be);
@@ -319,7 +318,7 @@ bl_read_pagelist(struct nfs_pgio_data *rdata)
 
 			be_read = (hole && cow_read) ? cow_read : be;
 			bio = do_add_page_to_bio(bio,
-						 rdata->page_array.npages - i,
+						 hdr->page_array.npages - i,
 						 READ,
 						 isect, pages[i], be_read,
 						 bl_end_io_read, par,
@@ -334,10 +333,10 @@ bl_read_pagelist(struct nfs_pgio_data *rdata)
 		extent_length -= PAGE_CACHE_SECTORS;
 	}
 	if ((isect << SECTOR_SHIFT) >= header->inode->i_size) {
-		rdata->res.eof = 1;
-		rdata->res.count = header->inode->i_size - rdata->args.offset;
+		hdr->res.eof = 1;
+		hdr->res.count = header->inode->i_size - hdr->args.offset;
 	} else {
-		rdata->res.count = (isect << SECTOR_SHIFT) - rdata->args.offset;
+		hdr->res.count = (isect << SECTOR_SHIFT) - hdr->args.offset;
 	}
 out:
 	bl_put_extent(be);
@@ -392,8 +391,7 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
 	}
 
 	if (unlikely(err)) {
-		struct nfs_pgio_data *data = par->data;
-		struct nfs_pgio_header *header = data->header;
+		struct nfs_pgio_header *header = par->data;
 
 		if (!header->pnfs_error)
 			header->pnfs_error = -EIO;
@@ -407,8 +405,7 @@ static void bl_end_io_write(struct bio *bio, int err)
 {
 	struct parallel_io *par = bio->bi_private;
 	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
-	struct nfs_pgio_data *data = par->data;
-	struct nfs_pgio_header *header = data->header;
+	struct nfs_pgio_header *header = par->data;
 
 	if (!uptodate) {
 		if (!header->pnfs_error)
@@ -425,32 +422,32 @@ static void bl_end_io_write(struct bio *bio, int err)
 static void bl_write_cleanup(struct work_struct *work)
 {
 	struct rpc_task *task;
-	struct nfs_pgio_data *wdata;
+	struct nfs_pgio_header *hdr;
 	dprintk("%s enter\n", __func__);
 	task = container_of(work, struct rpc_task, u.tk_work);
-	wdata = container_of(task, struct nfs_pgio_data, task);
-	if (likely(!wdata->header->pnfs_error)) {
+	hdr = container_of(task, struct nfs_pgio_header, task);
+	if (likely(!hdr->pnfs_error)) {
 		/* Marks for LAYOUTCOMMIT */
-		mark_extents_written(BLK_LSEG2EXT(wdata->header->lseg),
-				     wdata->args.offset, wdata->args.count);
+		mark_extents_written(BLK_LSEG2EXT(hdr->lseg),
+				     hdr->args.offset, hdr->args.count);
 	}
-	pnfs_ld_write_done(wdata);
+	pnfs_ld_write_done(hdr);
 }
 
 /* Called when last of bios associated with a bl_write_pagelist call finishes */
 static void bl_end_par_io_write(void *data, int num_se)
 {
-	struct nfs_pgio_data *wdata = data;
+	struct nfs_pgio_header *hdr = data;
 
-	if (unlikely(wdata->header->pnfs_error)) {
-		bl_free_short_extents(&BLK_LSEG2EXT(wdata->header->lseg)->bl_inval,
+	if (unlikely(hdr->pnfs_error)) {
+		bl_free_short_extents(&BLK_LSEG2EXT(hdr->lseg)->bl_inval,
 					num_se);
 	}
 
-	wdata->task.tk_status = wdata->header->pnfs_error;
-	wdata->writeverf.committed = NFS_FILE_SYNC;
-	INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup);
-	schedule_work(&wdata->task.u.tk_work);
+	hdr->task.tk_status = hdr->pnfs_error;
+	hdr->writeverf.committed = NFS_FILE_SYNC;
+	INIT_WORK(&hdr->task.u.tk_work, bl_write_cleanup);
+	schedule_work(&hdr->task.u.tk_work);
 }
 
 /* FIXME STUB - mark intersection of layout and page as bad, so is not
@@ -675,18 +672,17 @@ check_page:
 }
 
 static enum pnfs_try_status
-bl_write_pagelist(struct nfs_pgio_data *wdata, int sync)
+bl_write_pagelist(struct nfs_pgio_header *header, int sync)
 {
-	struct nfs_pgio_header *header = wdata->header;
 	int i, ret, npg_zero, pg_index, last = 0;
 	struct bio *bio = NULL;
 	struct pnfs_block_extent *be = NULL, *cow_read = NULL;
 	sector_t isect, last_isect = 0, extent_length = 0;
 	struct parallel_io *par = NULL;
-	loff_t offset = wdata->args.offset;
-	size_t count = wdata->args.count;
+	loff_t offset = header->args.offset;
+	size_t count = header->args.count;
 	unsigned int pg_offset, pg_len, saved_len;
-	struct page **pages = wdata->args.pages;
+	struct page **pages = header->args.pages;
 	struct page *page;
 	pgoff_t index;
 	u64 temp;
@@ -701,11 +697,11 @@ bl_write_pagelist(struct nfs_pgio_data *wdata, int sync)
 		dprintk("pnfsblock nonblock aligned DIO writes. Resend MDS\n");
 		goto out_mds;
 	}
-	/* At this point, wdata->page_aray is a (sequential) list of nfs_pages.
+	/* At this point, header->page_aray is a (sequential) list of nfs_pages.
 	 * We want to write each, and if there is an error set pnfs_error
 	 * to have it redone using nfs.
 	 */
-	par = alloc_parallel(wdata);
+	par = alloc_parallel(header);
 	if (!par)
 		goto out_mds;
 	par->pnfs_callback = bl_end_par_io_write;
@@ -792,8 +788,8 @@ next_page:
 	bio = bl_submit_bio(WRITE, bio);
 
 	/* Middle pages */
-	pg_index = wdata->args.pgbase >> PAGE_CACHE_SHIFT;
-	for (i = pg_index; i < wdata->page_array.npages; i++) {
+	pg_index = header->args.pgbase >> PAGE_CACHE_SHIFT;
+	for (i = pg_index; i < header->page_array.npages; i++) {
 		if (!extent_length) {
 			/* We've used up the previous extent */
 			bl_put_extent(be);
@@ -864,7 +860,7 @@ next_page:
 		}
 
 
-		bio = do_add_page_to_bio(bio, wdata->page_array.npages - i,
+		bio = do_add_page_to_bio(bio, header->page_array.npages - i,
 					 WRITE,
 					 isect, pages[i], be,
 					 bl_end_io_write, par,
@@ -893,7 +889,7 @@ next_page:
 	}
 
 write_done:
-	wdata->res.count = wdata->args.count;
+	header->res.count = header->args.count;
 out:
 	bl_put_extent(be);
 	bl_put_extent(cow_read);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 179de67ca907..6c4c867ee04c 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -148,8 +148,8 @@ static void nfs_direct_set_hdr_verf(struct nfs_direct_req *dreq,
 {
 	struct nfs_writeverf *verfp;
 
-	verfp = nfs_direct_select_verf(dreq, hdr->data.ds_clp,
-				      hdr->data.ds_idx);
+	verfp = nfs_direct_select_verf(dreq, hdr->ds_clp,
+				      hdr->ds_idx);
 	WARN_ON_ONCE(verfp->committed >= 0);
 	memcpy(verfp, &hdr->verf, sizeof(struct nfs_writeverf));
 	WARN_ON_ONCE(verfp->committed < 0);
@@ -169,8 +169,8 @@ static int nfs_direct_set_or_cmp_hdr_verf(struct nfs_direct_req *dreq,
 {
 	struct nfs_writeverf *verfp;
 
-	verfp = nfs_direct_select_verf(dreq, hdr->data.ds_clp,
-					 hdr->data.ds_idx);
+	verfp = nfs_direct_select_verf(dreq, hdr->ds_clp,
+					 hdr->ds_idx);
 	if (verfp->committed < 0) {
 		nfs_direct_set_hdr_verf(dreq, hdr);
 		return 0;
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index d2eba1c13b7e..537e7f7a0b48 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -84,19 +84,18 @@ filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset)
 	BUG();
 }
 
-static void filelayout_reset_write(struct nfs_pgio_data *data)
+static void filelayout_reset_write(struct nfs_pgio_header *hdr)
 {
-	struct nfs_pgio_header *hdr = data->header;
-	struct rpc_task *task = &data->task;
+	struct rpc_task *task = &hdr->task;
 
 	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
 		dprintk("%s Reset task %5u for i/o through MDS "
 			"(req %s/%llu, %u bytes @ offset %llu)\n", __func__,
-			data->task.tk_pid,
+			hdr->task.tk_pid,
 			hdr->inode->i_sb->s_id,
 			(unsigned long long)NFS_FILEID(hdr->inode),
-			data->args.count,
-			(unsigned long long)data->args.offset);
+			hdr->args.count,
+			(unsigned long long)hdr->args.offset);
 
 		task->tk_status = pnfs_write_done_resend_to_mds(hdr->inode,
 							&hdr->pages,
@@ -105,19 +104,18 @@ static void filelayout_reset_write(struct nfs_pgio_data *data)
 	}
 }
 
-static void filelayout_reset_read(struct nfs_pgio_data *data)
+static void filelayout_reset_read(struct nfs_pgio_header *hdr)
 {
-	struct nfs_pgio_header *hdr = data->header;
-	struct rpc_task *task = &data->task;
+	struct rpc_task *task = &hdr->task;
 
 	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
 		dprintk("%s Reset task %5u for i/o through MDS "
 			"(req %s/%llu, %u bytes @ offset %llu)\n", __func__,
-			data->task.tk_pid,
+			hdr->task.tk_pid,
 			hdr->inode->i_sb->s_id,
 			(unsigned long long)NFS_FILEID(hdr->inode),
-			data->args.count,
-			(unsigned long long)data->args.offset);
+			hdr->args.count,
+			(unsigned long long)hdr->args.offset);
 
 		task->tk_status = pnfs_read_done_resend_to_mds(hdr->inode,
 							&hdr->pages,
@@ -243,18 +241,17 @@ wait_on_recovery:
 /* NFS_PROTO call done callback routines */
 
 static int filelayout_read_done_cb(struct rpc_task *task,
-				struct nfs_pgio_data *data)
+				struct nfs_pgio_header *hdr)
 {
-	struct nfs_pgio_header *hdr = data->header;
 	int err;
 
-	trace_nfs4_pnfs_read(data, task->tk_status);
-	err = filelayout_async_handle_error(task, data->args.context->state,
-					    data->ds_clp, hdr->lseg);
+	trace_nfs4_pnfs_read(hdr, task->tk_status);
+	err = filelayout_async_handle_error(task, hdr->args.context->state,
+					    hdr->ds_clp, hdr->lseg);
 
 	switch (err) {
 	case -NFS4ERR_RESET_TO_MDS:
-		filelayout_reset_read(data);
+		filelayout_reset_read(hdr);
 		return task->tk_status;
 	case -EAGAIN:
 		rpc_restart_call_prepare(task);
@@ -270,15 +267,14 @@ static int filelayout_read_done_cb(struct rpc_task *task,
  * rfc5661 is not clear about which credential should be used.
  */
 static void
-filelayout_set_layoutcommit(struct nfs_pgio_data *wdata)
+filelayout_set_layoutcommit(struct nfs_pgio_header *hdr)
 {
-	struct nfs_pgio_header *hdr = wdata->header;
 
 	if (FILELAYOUT_LSEG(hdr->lseg)->commit_through_mds ||
-	    wdata->res.verf->committed == NFS_FILE_SYNC)
+	    hdr->res.verf->committed == NFS_FILE_SYNC)
 		return;
 
-	pnfs_set_layoutcommit(wdata);
+	pnfs_set_layoutcommit(hdr);
 	dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino,
 		(unsigned long) NFS_I(hdr->inode)->layout->plh_lwb);
 }
@@ -305,83 +301,82 @@ filelayout_reset_to_mds(struct pnfs_layout_segment *lseg)
  */
 static void filelayout_read_prepare(struct rpc_task *task, void *data)
 {
-	struct nfs_pgio_data *rdata = data;
+	struct nfs_pgio_header *hdr = data;
 
-	if (unlikely(test_bit(NFS_CONTEXT_BAD, &rdata->args.context->flags))) {
+	if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) {
 		rpc_exit(task, -EIO);
 		return;
 	}
-	if (filelayout_reset_to_mds(rdata->header->lseg)) {
+	if (filelayout_reset_to_mds(hdr->lseg)) {
 		dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid);
-		filelayout_reset_read(rdata);
+		filelayout_reset_read(hdr);
 		rpc_exit(task, 0);
 		return;
 	}
-	rdata->pgio_done_cb = filelayout_read_done_cb;
+	hdr->pgio_done_cb = filelayout_read_done_cb;
 
-	if (nfs41_setup_sequence(rdata->ds_clp->cl_session,
-			&rdata->args.seq_args,
-			&rdata->res.seq_res,
+	if (nfs41_setup_sequence(hdr->ds_clp->cl_session,
+			&hdr->args.seq_args,
+			&hdr->res.seq_res,
 			task))
 		return;
-	if (nfs4_set_rw_stateid(&rdata->args.stateid, rdata->args.context,
-			rdata->args.lock_context, FMODE_READ) == -EIO)
+	if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context,
+			hdr->args.lock_context, FMODE_READ) == -EIO)
 		rpc_exit(task, -EIO); /* lost lock, terminate I/O */
 }
 
 static void filelayout_read_call_done(struct rpc_task *task, void *data)
 {
-	struct nfs_pgio_data *rdata = data;
+	struct nfs_pgio_header *hdr = data;
 
 	dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status);
 
-	if (test_bit(NFS_IOHDR_REDO, &rdata->header->flags) &&
+	if (test_bit(NFS_IOHDR_REDO, &hdr->flags) &&
 	    task->tk_status == 0) {
-		nfs41_sequence_done(task, &rdata->res.seq_res);
+		nfs41_sequence_done(task, &hdr->res.seq_res);
 		return;
 	}
 
 	/* Note this may cause RPC to be resent */
-	rdata->header->mds_ops->rpc_call_done(task, data);
+	hdr->mds_ops->rpc_call_done(task, data);
 }
 
 static void filelayout_read_count_stats(struct rpc_task *task, void *data)
 {
-	struct nfs_pgio_data *rdata = data;
+	struct nfs_pgio_header *hdr = data;
 
-	rpc_count_iostats(task, NFS_SERVER(rdata->header->inode)->client->cl_metrics);
+	rpc_count_iostats(task, NFS_SERVER(hdr->inode)->client->cl_metrics);
 }
 
 static void filelayout_read_release(void *data)
 {
-	struct nfs_pgio_data *rdata = data;
-	struct pnfs_layout_hdr *lo = rdata->header->lseg->pls_layout;
+	struct nfs_pgio_header *hdr = data;
+	struct pnfs_layout_hdr *lo = hdr->lseg->pls_layout;
 
 	filelayout_fenceme(lo->plh_inode, lo);
-	nfs_put_client(rdata->ds_clp);
-	rdata->header->mds_ops->rpc_release(data);
+	nfs_put_client(hdr->ds_clp);
+	hdr->mds_ops->rpc_release(data);
 }
 
 static int filelayout_write_done_cb(struct rpc_task *task,
-				struct nfs_pgio_data *data)
+				struct nfs_pgio_header *hdr)
 {
-	struct nfs_pgio_header *hdr = data->header;
 	int err;
 
-	trace_nfs4_pnfs_write(data, task->tk_status);
-	err = filelayout_async_handle_error(task, data->args.context->state,
-					    data->ds_clp, hdr->lseg);
+	trace_nfs4_pnfs_write(hdr, task->tk_status);
+	err = filelayout_async_handle_error(task, hdr->args.context->state,
+					    hdr->ds_clp, hdr->lseg);
 
 	switch (err) {
 	case -NFS4ERR_RESET_TO_MDS:
-		filelayout_reset_write(data);
+		filelayout_reset_write(hdr);
 		return task->tk_status;
 	case -EAGAIN:
 		rpc_restart_call_prepare(task);
 		return -EAGAIN;
 	}
 
-	filelayout_set_layoutcommit(data);
+	filelayout_set_layoutcommit(hdr);
 	return 0;
 }
 
@@ -419,57 +414,57 @@ static int filelayout_commit_done_cb(struct rpc_task *task,
 
 static void filelayout_write_prepare(struct rpc_task *task, void *data)
 {
-	struct nfs_pgio_data *wdata = data;
+	struct nfs_pgio_header *hdr = data;
 
-	if (unlikely(test_bit(NFS_CONTEXT_BAD, &wdata->args.context->flags))) {
+	if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) {
 		rpc_exit(task, -EIO);
 		return;
 	}
-	if (filelayout_reset_to_mds(wdata->header->lseg)) {
+	if (filelayout_reset_to_mds(hdr->lseg)) {
 		dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid);
-		filelayout_reset_write(wdata);
+		filelayout_reset_write(hdr);
 		rpc_exit(task, 0);
 		return;
 	}
-	if (nfs41_setup_sequence(wdata->ds_clp->cl_session,
-			&wdata->args.seq_args,
-			&wdata->res.seq_res,
+	if (nfs41_setup_sequence(hdr->ds_clp->cl_session,
+			&hdr->args.seq_args,
+			&hdr->res.seq_res,
 			task))
 		return;
-	if (nfs4_set_rw_stateid(&wdata->args.stateid, wdata->args.context,
-			wdata->args.lock_context, FMODE_WRITE) == -EIO)
+	if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context,
+			hdr->args.lock_context, FMODE_WRITE) == -EIO)
 		rpc_exit(task, -EIO); /* lost lock, terminate I/O */
 }
 
 static void filelayout_write_call_done(struct rpc_task *task, void *data)
 {
-	struct nfs_pgio_data *wdata = data;
+	struct nfs_pgio_header *hdr = data;
 
-	if (test_bit(NFS_IOHDR_REDO, &wdata->header->flags) &&
+	if (test_bit(NFS_IOHDR_REDO, &hdr->flags) &&
 	    task->tk_status == 0) {
-		nfs41_sequence_done(task, &wdata->res.seq_res);
+		nfs41_sequence_done(task, &hdr->res.seq_res);
 		return;
 	}
 
 	/* Note this may cause RPC to be resent */
-	wdata->header->mds_ops->rpc_call_done(task, data);
+	hdr->mds_ops->rpc_call_done(task, data);
 }
 
 static void filelayout_write_count_stats(struct rpc_task *task, void *data)
 {
-	struct nfs_pgio_data *wdata = data;
+	struct nfs_pgio_header *hdr = data;
 
-	rpc_count_iostats(task, NFS_SERVER(wdata->header->inode)->client->cl_metrics);
+	rpc_count_iostats(task, NFS_SERVER(hdr->inode)->client->cl_metrics);
 }
 
 static void filelayout_write_release(void *data)
 {
-	struct nfs_pgio_data *wdata = data;
-	struct pnfs_layout_hdr *lo = wdata->header->lseg->pls_layout;
+	struct nfs_pgio_header *hdr = data;
+	struct pnfs_layout_hdr *lo = hdr->lseg->pls_layout;
 
 	filelayout_fenceme(lo->plh_inode, lo);
-	nfs_put_client(wdata->ds_clp);
-	wdata->header->mds_ops->rpc_release(data);
+	nfs_put_client(hdr->ds_clp);
+	hdr->mds_ops->rpc_release(data);
 }
 
 static void filelayout_commit_prepare(struct rpc_task *task, void *data)
@@ -529,19 +524,18 @@ static const struct rpc_call_ops filelayout_commit_call_ops = {
 };
 
 static enum pnfs_try_status
-filelayout_read_pagelist(struct nfs_pgio_data *data)
+filelayout_read_pagelist(struct nfs_pgio_header *hdr)
 {
-	struct nfs_pgio_header *hdr = data->header;
 	struct pnfs_layout_segment *lseg = hdr->lseg;
 	struct nfs4_pnfs_ds *ds;
 	struct rpc_clnt *ds_clnt;
-	loff_t offset = data->args.offset;
+	loff_t offset = hdr->args.offset;
 	u32 j, idx;
 	struct nfs_fh *fh;
 
 	dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n",
 		__func__, hdr->inode->i_ino,
-		data->args.pgbase, (size_t)data->args.count, offset);
+		hdr->args.pgbase, (size_t)hdr->args.count, offset);
 
 	/* Retrieve the correct rpc_client for the byte range */
 	j = nfs4_fl_calc_j_index(lseg, offset);
@@ -559,30 +553,29 @@ filelayout_read_pagelist(struct nfs_pgio_data *data)
 
 	/* No multipath support. Use first DS */
 	atomic_inc(&ds->ds_clp->cl_count);
-	data->ds_clp = ds->ds_clp;
-	data->ds_idx = idx;
+	hdr->ds_clp = ds->ds_clp;
+	hdr->ds_idx = idx;
 	fh = nfs4_fl_select_ds_fh(lseg, j);
 	if (fh)
-		data->args.fh = fh;
+		hdr->args.fh = fh;
 
-	data->args.offset = filelayout_get_dserver_offset(lseg, offset);
-	data->mds_offset = offset;
+	hdr->args.offset = filelayout_get_dserver_offset(lseg, offset);
+	hdr->mds_offset = offset;
 
 	/* Perform an asynchronous read to ds */
-	nfs_initiate_pgio(ds_clnt, data,
+	nfs_initiate_pgio(ds_clnt, hdr,
 			    &filelayout_read_call_ops, 0, RPC_TASK_SOFTCONN);
 	return PNFS_ATTEMPTED;
 }
 
 /* Perform async writes. */
 static enum pnfs_try_status
-filelayout_write_pagelist(struct nfs_pgio_data *data, int sync)
+filelayout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
 {
-	struct nfs_pgio_header *hdr = data->header;
 	struct pnfs_layout_segment *lseg = hdr->lseg;
 	struct nfs4_pnfs_ds *ds;
 	struct rpc_clnt *ds_clnt;
-	loff_t offset = data->args.offset;
+	loff_t offset = hdr->args.offset;
 	u32 j, idx;
 	struct nfs_fh *fh;
 
@@ -598,21 +591,20 @@ filelayout_write_pagelist(struct nfs_pgio_data *data, int sync)
 		return PNFS_NOT_ATTEMPTED;
 
 	dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s cl_count %d\n",
-		__func__, hdr->inode->i_ino, sync, (size_t) data->args.count,
+		__func__, hdr->inode->i_ino, sync, (size_t) hdr->args.count,
 		offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count));
 
-	data->pgio_done_cb = filelayout_write_done_cb;
+	hdr->pgio_done_cb = filelayout_write_done_cb;
 	atomic_inc(&ds->ds_clp->cl_count);
-	data->ds_clp = ds->ds_clp;
-	data->ds_idx = idx;
+	hdr->ds_clp = ds->ds_clp;
+	hdr->ds_idx = idx;
 	fh = nfs4_fl_select_ds_fh(lseg, j);
 	if (fh)
-		data->args.fh = fh;
-
-	data->args.offset = filelayout_get_dserver_offset(lseg, offset);
+		hdr->args.fh = fh;
+	hdr->args.offset = filelayout_get_dserver_offset(lseg, offset);
 
 	/* Perform an asynchronous write */
-	nfs_initiate_pgio(ds_clnt, data,
+	nfs_initiate_pgio(ds_clnt, hdr,
 				    &filelayout_write_call_ops, sync,
 				    RPC_TASK_SOFTCONN);
 	return PNFS_ATTEMPTED;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 5cda049c8f9b..3f3aedd2e8c9 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -240,9 +240,9 @@ int nfs_iocounter_wait(struct nfs_io_counter *c);
 extern const struct nfs_pageio_ops nfs_pgio_rw_ops;
 struct nfs_pgio_header *nfs_pgio_header_alloc(const struct nfs_rw_ops *);
 void nfs_pgio_header_free(struct nfs_pgio_header *);
-void nfs_pgio_data_destroy(struct nfs_pgio_data *);
+void nfs_pgio_data_destroy(struct nfs_pgio_header *);
 int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *);
-int nfs_initiate_pgio(struct rpc_clnt *, struct nfs_pgio_data *,
+int nfs_initiate_pgio(struct rpc_clnt *, struct nfs_pgio_header *,
 		      const struct rpc_call_ops *, int, int);
 
 static inline void nfs_iocounter_init(struct nfs_io_counter *c)
@@ -481,7 +481,7 @@ static inline void nfs_inode_dio_wait(struct inode *inode)
 extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq);
 
 /* nfs4proc.c */
-extern void __nfs4_read_done_cb(struct nfs_pgio_data *);
+extern void __nfs4_read_done_cb(struct nfs_pgio_header *);
 extern struct nfs_client *nfs4_init_client(struct nfs_client *clp,
 			    const struct rpc_timeout *timeparms,
 			    const char *ip_addr);
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index e7daa42bbc86..854959db0e5d 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -795,41 +795,44 @@ nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
 	return status;
 }
 
-static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_data *data)
+static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
 {
-	struct inode *inode = data->header->inode;
+	struct inode *inode = hdr->inode;
 
 	if (nfs3_async_handle_jukebox(task, inode))
 		return -EAGAIN;
 
 	nfs_invalidate_atime(inode);
-	nfs_refresh_inode(inode, &data->fattr);
+	nfs_refresh_inode(inode, &hdr->fattr);
 	return 0;
 }
 
-static void nfs3_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
+static void nfs3_proc_read_setup(struct nfs_pgio_header *hdr,
+				 struct rpc_message *msg)
 {
 	msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ];
 }
 
-static int nfs3_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data)
+static int nfs3_proc_pgio_rpc_prepare(struct rpc_task *task,
+				      struct nfs_pgio_header *hdr)
 {
 	rpc_call_start(task);
 	return 0;
 }
 
-static int nfs3_write_done(struct rpc_task *task, struct nfs_pgio_data *data)
+static int nfs3_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
 {
-	struct inode *inode = data->header->inode;
+	struct inode *inode = hdr->inode;
 
 	if (nfs3_async_handle_jukebox(task, inode))
 		return -EAGAIN;
 	if (task->tk_status >= 0)
-		nfs_post_op_update_inode_force_wcc(inode, data->res.fattr);
+		nfs_post_op_update_inode_force_wcc(inode, hdr->res.fattr);
 	return 0;
 }
 
-static void nfs3_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
+static void nfs3_proc_write_setup(struct nfs_pgio_header *hdr,
+				  struct rpc_message *msg)
 {
 	msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE];
 }
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index ba2affa51941..b8ea4a26998c 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -337,11 +337,11 @@ nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_mode,
  */
 static inline void
 nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp,
-			 struct rpc_message *msg, struct nfs_pgio_data *wdata)
+			 struct rpc_message *msg, struct nfs_pgio_header *hdr)
 {
 	if (_nfs4_state_protect(clp, NFS_SP4_MACH_CRED_WRITE, clntp, msg) &&
 	    !test_bit(NFS_SP4_MACH_CRED_COMMIT, &clp->cl_sp4_flags))
-		wdata->args.stable = NFS_FILE_SYNC;
+		hdr->args.stable = NFS_FILE_SYNC;
 }
 #else /* CONFIG_NFS_v4_1 */
 static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server)
@@ -369,7 +369,7 @@ nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_flags,
 
 static inline void
 nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp,
-			 struct rpc_message *msg, struct nfs_pgio_data *wdata)
+			 struct rpc_message *msg, struct nfs_pgio_header *hdr)
 {
 }
 #endif /* CONFIG_NFS_V4_1 */
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 4bf3d97cc5a0..b0e5705599bf 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -4033,24 +4033,25 @@ static bool nfs4_error_stateid_expired(int err)
 	return false;
 }
 
-void __nfs4_read_done_cb(struct nfs_pgio_data *data)
+void __nfs4_read_done_cb(struct nfs_pgio_header *hdr)
 {
-	nfs_invalidate_atime(data->header->inode);
+	nfs_invalidate_atime(hdr->inode);
 }
 
-static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_pgio_data *data)
+static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_pgio_header *hdr)
 {
-	struct nfs_server *server = NFS_SERVER(data->header->inode);
+	struct nfs_server *server = NFS_SERVER(hdr->inode);
 
-	trace_nfs4_read(data, task->tk_status);
-	if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) {
+	trace_nfs4_read(hdr, task->tk_status);
+	if (nfs4_async_handle_error(task, server,
+				    hdr->args.context->state) == -EAGAIN) {
 		rpc_restart_call_prepare(task);
 		return -EAGAIN;
 	}
 
-	__nfs4_read_done_cb(data);
+	__nfs4_read_done_cb(hdr);
 	if (task->tk_status > 0)
-		renew_lease(server, data->timestamp);
+		renew_lease(server, hdr->timestamp);
 	return 0;
 }
 
@@ -4068,54 +4069,59 @@ static bool nfs4_read_stateid_changed(struct rpc_task *task,
 	return true;
 }
 
-static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_data *data)
+static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
 {
 
 	dprintk("--> %s\n", __func__);
 
-	if (!nfs4_sequence_done(task, &data->res.seq_res))
+	if (!nfs4_sequence_done(task, &hdr->res.seq_res))
 		return -EAGAIN;
-	if (nfs4_read_stateid_changed(task, &data->args))
+	if (nfs4_read_stateid_changed(task, &hdr->args))
 		return -EAGAIN;
-	return data->pgio_done_cb ? data->pgio_done_cb(task, data) :
-				    nfs4_read_done_cb(task, data);
+	return hdr->pgio_done_cb ? hdr->pgio_done_cb(task, hdr) :
+				    nfs4_read_done_cb(task, hdr);
 }
 
-static void nfs4_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
+static void nfs4_proc_read_setup(struct nfs_pgio_header *hdr,
+				 struct rpc_message *msg)
 {
-	data->timestamp   = jiffies;
-	data->pgio_done_cb = nfs4_read_done_cb;
+	hdr->timestamp   = jiffies;
+	hdr->pgio_done_cb = nfs4_read_done_cb;
 	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
-	nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0);
+	nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 0);
 }
 
-static int nfs4_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data)
+static int nfs4_proc_pgio_rpc_prepare(struct rpc_task *task,
+				      struct nfs_pgio_header *hdr)
 {
-	if (nfs4_setup_sequence(NFS_SERVER(data->header->inode),
-			&data->args.seq_args,
-			&data->res.seq_res,
+	if (nfs4_setup_sequence(NFS_SERVER(hdr->inode),
+			&hdr->args.seq_args,
+			&hdr->res.seq_res,
 			task))
 		return 0;
-	if (nfs4_set_rw_stateid(&data->args.stateid, data->args.context,
-				data->args.lock_context, data->header->rw_ops->rw_mode) == -EIO)
+	if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context,
+				hdr->args.lock_context,
+				hdr->rw_ops->rw_mode) == -EIO)
 		return -EIO;
-	if (unlikely(test_bit(NFS_CONTEXT_BAD, &data->args.context->flags)))
+	if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags)))
 		return -EIO;
 	return 0;
 }
 
-static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_pgio_data *data)
+static int nfs4_write_done_cb(struct rpc_task *task,
+			      struct nfs_pgio_header *hdr)
 {
-	struct inode *inode = data->header->inode;
+	struct inode *inode = hdr->inode;
 	
-	trace_nfs4_write(data, task->tk_status);
-	if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) {
+	trace_nfs4_write(hdr, task->tk_status);
+	if (nfs4_async_handle_error(task, NFS_SERVER(inode),
+				    hdr->args.context->state) == -EAGAIN) {
 		rpc_restart_call_prepare(task);
 		return -EAGAIN;
 	}
 	if (task->tk_status >= 0) {
-		renew_lease(NFS_SERVER(inode), data->timestamp);
-		nfs_post_op_update_inode_force_wcc(inode, &data->fattr);
+		renew_lease(NFS_SERVER(inode), hdr->timestamp);
+		nfs_post_op_update_inode_force_wcc(inode, &hdr->fattr);
 	}
 	return 0;
 }
@@ -4134,23 +4140,21 @@ static bool nfs4_write_stateid_changed(struct rpc_task *task,
 	return true;
 }
 
-static int nfs4_write_done(struct rpc_task *task, struct nfs_pgio_data *data)
+static int nfs4_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
 {
-	if (!nfs4_sequence_done(task, &data->res.seq_res))
+	if (!nfs4_sequence_done(task, &hdr->res.seq_res))
 		return -EAGAIN;
-	if (nfs4_write_stateid_changed(task, &data->args))
+	if (nfs4_write_stateid_changed(task, &hdr->args))
 		return -EAGAIN;
-	return data->pgio_done_cb ? data->pgio_done_cb(task, data) :
-		nfs4_write_done_cb(task, data);
+	return hdr->pgio_done_cb ? hdr->pgio_done_cb(task, hdr) :
+		nfs4_write_done_cb(task, hdr);
 }
 
 static
-bool nfs4_write_need_cache_consistency_data(const struct nfs_pgio_data *data)
+bool nfs4_write_need_cache_consistency_data(struct nfs_pgio_header *hdr)
 {
-	const struct nfs_pgio_header *hdr = data->header;
-
 	/* Don't request attributes for pNFS or O_DIRECT writes */
-	if (data->ds_clp != NULL || hdr->dreq != NULL)
+	if (hdr->ds_clp != NULL || hdr->dreq != NULL)
 		return false;
 	/* Otherwise, request attributes if and only if we don't hold
 	 * a delegation
@@ -4158,23 +4162,24 @@ bool nfs4_write_need_cache_consistency_data(const struct nfs_pgio_data *data)
 	return nfs4_have_delegation(hdr->inode, FMODE_READ) == 0;
 }
 
-static void nfs4_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
+static void nfs4_proc_write_setup(struct nfs_pgio_header *hdr,
+				  struct rpc_message *msg)
 {
-	struct nfs_server *server = NFS_SERVER(data->header->inode);
+	struct nfs_server *server = NFS_SERVER(hdr->inode);
 
-	if (!nfs4_write_need_cache_consistency_data(data)) {
-		data->args.bitmask = NULL;
-		data->res.fattr = NULL;
+	if (!nfs4_write_need_cache_consistency_data(hdr)) {
+		hdr->args.bitmask = NULL;
+		hdr->res.fattr = NULL;
 	} else
-		data->args.bitmask = server->cache_consistency_bitmask;
+		hdr->args.bitmask = server->cache_consistency_bitmask;
 
-	if (!data->pgio_done_cb)
-		data->pgio_done_cb = nfs4_write_done_cb;
-	data->res.server = server;
-	data->timestamp   = jiffies;
+	if (!hdr->pgio_done_cb)
+		hdr->pgio_done_cb = nfs4_write_done_cb;
+	hdr->res.server = server;
+	hdr->timestamp   = jiffies;
 
 	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE];
-	nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
+	nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 1);
 }
 
 static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index 0a744f3a86f6..1c32adbe728d 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -932,11 +932,11 @@ DEFINE_NFS4_IDMAP_EVENT(nfs4_map_gid_to_group);
 
 DECLARE_EVENT_CLASS(nfs4_read_event,
 		TP_PROTO(
-			const struct nfs_pgio_data *data,
+			const struct nfs_pgio_header *hdr,
 			int error
 		),
 
-		TP_ARGS(data, error),
+		TP_ARGS(hdr, error),
 
 		TP_STRUCT__entry(
 			__field(dev_t, dev)
@@ -948,12 +948,12 @@ DECLARE_EVENT_CLASS(nfs4_read_event,
 		),
 
 		TP_fast_assign(
-			const struct inode *inode = data->header->inode;
+			const struct inode *inode = hdr->inode;
 			__entry->dev = inode->i_sb->s_dev;
 			__entry->fileid = NFS_FILEID(inode);
 			__entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
-			__entry->offset = data->args.offset;
-			__entry->count = data->args.count;
+			__entry->offset = hdr->args.offset;
+			__entry->count = hdr->args.count;
 			__entry->error = error;
 		),
 
@@ -972,10 +972,10 @@ DECLARE_EVENT_CLASS(nfs4_read_event,
 #define DEFINE_NFS4_READ_EVENT(name) \
 	DEFINE_EVENT(nfs4_read_event, name, \
 			TP_PROTO( \
-				const struct nfs_pgio_data *data, \
+				const struct nfs_pgio_header *hdr, \
 				int error \
 			), \
-			TP_ARGS(data, error))
+			TP_ARGS(hdr, error))
 DEFINE_NFS4_READ_EVENT(nfs4_read);
 #ifdef CONFIG_NFS_V4_1
 DEFINE_NFS4_READ_EVENT(nfs4_pnfs_read);
@@ -983,11 +983,11 @@ DEFINE_NFS4_READ_EVENT(nfs4_pnfs_read);
 
 DECLARE_EVENT_CLASS(nfs4_write_event,
 		TP_PROTO(
-			const struct nfs_pgio_data *data,
+			const struct nfs_pgio_header *hdr,
 			int error
 		),
 
-		TP_ARGS(data, error),
+		TP_ARGS(hdr, error),
 
 		TP_STRUCT__entry(
 			__field(dev_t, dev)
@@ -999,12 +999,12 @@ DECLARE_EVENT_CLASS(nfs4_write_event,
 		),
 
 		TP_fast_assign(
-			const struct inode *inode = data->header->inode;
+			const struct inode *inode = hdr->inode;
 			__entry->dev = inode->i_sb->s_dev;
 			__entry->fileid = NFS_FILEID(inode);
 			__entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
-			__entry->offset = data->args.offset;
-			__entry->count = data->args.count;
+			__entry->offset = hdr->args.offset;
+			__entry->count = hdr->args.count;
 			__entry->error = error;
 		),
 
@@ -1024,10 +1024,10 @@ DECLARE_EVENT_CLASS(nfs4_write_event,
 #define DEFINE_NFS4_WRITE_EVENT(name) \
 	DEFINE_EVENT(nfs4_write_event, name, \
 			TP_PROTO( \
-				const struct nfs_pgio_data *data, \
+				const struct nfs_pgio_header *hdr, \
 				int error \
 			), \
-			TP_ARGS(data, error))
+			TP_ARGS(hdr, error))
 DEFINE_NFS4_WRITE_EVENT(nfs4_write);
 #ifdef CONFIG_NFS_V4_1
 DEFINE_NFS4_WRITE_EVENT(nfs4_pnfs_write);
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 611320753db2..ae05278b3761 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -439,22 +439,21 @@ static void _read_done(struct ore_io_state *ios, void *private)
 	objlayout_read_done(&objios->oir, status, objios->sync);
 }
 
-int objio_read_pagelist(struct nfs_pgio_data *rdata)
+int objio_read_pagelist(struct nfs_pgio_header *hdr)
 {
-	struct nfs_pgio_header *hdr = rdata->header;
 	struct objio_state *objios;
 	int ret;
 
 	ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, true,
-			hdr->lseg, rdata->args.pages, rdata->args.pgbase,
-			rdata->args.offset, rdata->args.count, rdata,
+			hdr->lseg, hdr->args.pages, hdr->args.pgbase,
+			hdr->args.offset, hdr->args.count, hdr,
 			GFP_KERNEL, &objios);
 	if (unlikely(ret))
 		return ret;
 
 	objios->ios->done = _read_done;
 	dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
-		rdata->args.offset, rdata->args.count);
+		hdr->args.offset, hdr->args.count);
 	ret = ore_read(objios->ios);
 	if (unlikely(ret))
 		objio_free_result(&objios->oir);
@@ -487,11 +486,11 @@ static void _write_done(struct ore_io_state *ios, void *private)
 static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
 {
 	struct objio_state *objios = priv;
-	struct nfs_pgio_data *wdata = objios->oir.rpcdata;
-	struct address_space *mapping = wdata->header->inode->i_mapping;
+	struct nfs_pgio_header *hdr = objios->oir.rpcdata;
+	struct address_space *mapping = hdr->inode->i_mapping;
 	pgoff_t index = offset / PAGE_SIZE;
 	struct page *page;
-	loff_t i_size = i_size_read(wdata->header->inode);
+	loff_t i_size = i_size_read(hdr->inode);
 
 	if (offset >= i_size) {
 		*uptodate = true;
@@ -531,15 +530,14 @@ static const struct _ore_r4w_op _r4w_op = {
 	.put_page = &__r4w_put_page,
 };
 
-int objio_write_pagelist(struct nfs_pgio_data *wdata, int how)
+int objio_write_pagelist(struct nfs_pgio_header *hdr, int how)
 {
-	struct nfs_pgio_header *hdr = wdata->header;
 	struct objio_state *objios;
 	int ret;
 
 	ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, false,
-			hdr->lseg, wdata->args.pages, wdata->args.pgbase,
-			wdata->args.offset, wdata->args.count, wdata, GFP_NOFS,
+			hdr->lseg, hdr->args.pages, hdr->args.pgbase,
+			hdr->args.offset, hdr->args.count, hdr, GFP_NOFS,
 			&objios);
 	if (unlikely(ret))
 		return ret;
@@ -551,7 +549,7 @@ int objio_write_pagelist(struct nfs_pgio_data *wdata, int how)
 		objios->ios->done = _write_done;
 
 	dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
-		wdata->args.offset, wdata->args.count);
+		hdr->args.offset, hdr->args.count);
 	ret = ore_write(objios->ios);
 	if (unlikely(ret)) {
 		objio_free_result(&objios->oir);
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c
index 31fed91a8bac..86312787cee6 100644
--- a/fs/nfs/objlayout/objlayout.c
+++ b/fs/nfs/objlayout/objlayout.c
@@ -229,36 +229,36 @@ objlayout_io_set_result(struct objlayout_io_res *oir, unsigned index,
 static void _rpc_read_complete(struct work_struct *work)
 {
 	struct rpc_task *task;
-	struct nfs_pgio_data *rdata;
+	struct nfs_pgio_header *hdr;
 
 	dprintk("%s enter\n", __func__);
 	task = container_of(work, struct rpc_task, u.tk_work);
-	rdata = container_of(task, struct nfs_pgio_data, task);
+	hdr = container_of(task, struct nfs_pgio_header, task);
 
-	pnfs_ld_read_done(rdata);
+	pnfs_ld_read_done(hdr);
 }
 
 void
 objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
 {
-	struct nfs_pgio_data *rdata = oir->rpcdata;
+	struct nfs_pgio_header *hdr = oir->rpcdata;
 
-	oir->status = rdata->task.tk_status = status;
+	oir->status = hdr->task.tk_status = status;
 	if (status >= 0)
-		rdata->res.count = status;
+		hdr->res.count = status;
 	else
-		rdata->header->pnfs_error = status;
+		hdr->pnfs_error = status;
 	objlayout_iodone(oir);
 	/* must not use oir after this point */
 
 	dprintk("%s: Return status=%zd eof=%d sync=%d\n", __func__,
-		status, rdata->res.eof, sync);
+		status, hdr->res.eof, sync);
 
 	if (sync)
-		pnfs_ld_read_done(rdata);
+		pnfs_ld_read_done(hdr);
 	else {
-		INIT_WORK(&rdata->task.u.tk_work, _rpc_read_complete);
-		schedule_work(&rdata->task.u.tk_work);
+		INIT_WORK(&hdr->task.u.tk_work, _rpc_read_complete);
+		schedule_work(&hdr->task.u.tk_work);
 	}
 }
 
@@ -266,12 +266,11 @@ objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
  * Perform sync or async reads.
  */
 enum pnfs_try_status
-objlayout_read_pagelist(struct nfs_pgio_data *rdata)
+objlayout_read_pagelist(struct nfs_pgio_header *hdr)
 {
-	struct nfs_pgio_header *hdr = rdata->header;
 	struct inode *inode = hdr->inode;
-	loff_t offset = rdata->args.offset;
-	size_t count = rdata->args.count;
+	loff_t offset = hdr->args.offset;
+	size_t count = hdr->args.count;
 	int err;
 	loff_t eof;
 
@@ -279,23 +278,23 @@ objlayout_read_pagelist(struct nfs_pgio_data *rdata)
 	if (unlikely(offset + count > eof)) {
 		if (offset >= eof) {
 			err = 0;
-			rdata->res.count = 0;
-			rdata->res.eof = 1;
+			hdr->res.count = 0;
+			hdr->res.eof = 1;
 			/*FIXME: do we need to call pnfs_ld_read_done() */
 			goto out;
 		}
 		count = eof - offset;
 	}
 
-	rdata->res.eof = (offset + count) >= eof;
-	_fix_verify_io_params(hdr->lseg, &rdata->args.pages,
-			      &rdata->args.pgbase,
-			      rdata->args.offset, rdata->args.count);
+	hdr->res.eof = (offset + count) >= eof;
+	_fix_verify_io_params(hdr->lseg, &hdr->args.pages,
+			      &hdr->args.pgbase,
+			      hdr->args.offset, hdr->args.count);
 
 	dprintk("%s: inode(%lx) offset 0x%llx count 0x%Zx eof=%d\n",
-		__func__, inode->i_ino, offset, count, rdata->res.eof);
+		__func__, inode->i_ino, offset, count, hdr->res.eof);
 
-	err = objio_read_pagelist(rdata);
+	err = objio_read_pagelist(hdr);
  out:
 	if (unlikely(err)) {
 		hdr->pnfs_error = err;
@@ -312,38 +311,38 @@ objlayout_read_pagelist(struct nfs_pgio_data *rdata)
 static void _rpc_write_complete(struct work_struct *work)
 {
 	struct rpc_task *task;
-	struct nfs_pgio_data *wdata;
+	struct nfs_pgio_header *hdr;
 
 	dprintk("%s enter\n", __func__);
 	task = container_of(work, struct rpc_task, u.tk_work);
-	wdata = container_of(task, struct nfs_pgio_data, task);
+	hdr = container_of(task, struct nfs_pgio_header, task);
 
-	pnfs_ld_write_done(wdata);
+	pnfs_ld_write_done(hdr);
 }
 
 void
 objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
 {
-	struct nfs_pgio_data *wdata = oir->rpcdata;
+	struct nfs_pgio_header *hdr = oir->rpcdata;
 
-	oir->status = wdata->task.tk_status = status;
+	oir->status = hdr->task.tk_status = status;
 	if (status >= 0) {
-		wdata->res.count = status;
-		wdata->writeverf.committed = oir->committed;
+		hdr->res.count = status;
+		hdr->writeverf.committed = oir->committed;
 	} else {
-		wdata->header->pnfs_error = status;
+		hdr->pnfs_error = status;
 	}
 	objlayout_iodone(oir);
 	/* must not use oir after this point */
 
 	dprintk("%s: Return status %zd committed %d sync=%d\n", __func__,
-		status, wdata->writeverf.committed, sync);
+		status, hdr->writeverf.committed, sync);
 
 	if (sync)
-		pnfs_ld_write_done(wdata);
+		pnfs_ld_write_done(hdr);
 	else {
-		INIT_WORK(&wdata->task.u.tk_work, _rpc_write_complete);
-		schedule_work(&wdata->task.u.tk_work);
+		INIT_WORK(&hdr->task.u.tk_work, _rpc_write_complete);
+		schedule_work(&hdr->task.u.tk_work);
 	}
 }
 
@@ -351,17 +350,15 @@ objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
  * Perform sync or async writes.
  */
 enum pnfs_try_status
-objlayout_write_pagelist(struct nfs_pgio_data *wdata,
-			 int how)
+objlayout_write_pagelist(struct nfs_pgio_header *hdr, int how)
 {
-	struct nfs_pgio_header *hdr = wdata->header;
 	int err;
 
-	_fix_verify_io_params(hdr->lseg, &wdata->args.pages,
-			      &wdata->args.pgbase,
-			      wdata->args.offset, wdata->args.count);
+	_fix_verify_io_params(hdr->lseg, &hdr->args.pages,
+			      &hdr->args.pgbase,
+			      hdr->args.offset, hdr->args.count);
 
-	err = objio_write_pagelist(wdata, how);
+	err = objio_write_pagelist(hdr, how);
 	if (unlikely(err)) {
 		hdr->pnfs_error = err;
 		dprintk("%s: Returned Error %d\n", __func__, err);
diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h
index 01e041029a6c..fd13f1d2f136 100644
--- a/fs/nfs/objlayout/objlayout.h
+++ b/fs/nfs/objlayout/objlayout.h
@@ -119,8 +119,8 @@ extern void objio_free_lseg(struct pnfs_layout_segment *lseg);
  */
 extern void objio_free_result(struct objlayout_io_res *oir);
 
-extern int objio_read_pagelist(struct nfs_pgio_data *rdata);
-extern int objio_write_pagelist(struct nfs_pgio_data *wdata, int how);
+extern int objio_read_pagelist(struct nfs_pgio_header *rdata);
+extern int objio_write_pagelist(struct nfs_pgio_header *wdata, int how);
 
 /*
  * callback API
@@ -168,10 +168,10 @@ extern struct pnfs_layout_segment *objlayout_alloc_lseg(
 extern void objlayout_free_lseg(struct pnfs_layout_segment *);
 
 extern enum pnfs_try_status objlayout_read_pagelist(
-	struct nfs_pgio_data *);
+	struct nfs_pgio_header *);
 
 extern enum pnfs_try_status objlayout_write_pagelist(
-	struct nfs_pgio_data *,
+	struct nfs_pgio_header *,
 	int how);
 
 extern void objlayout_encode_layoutcommit(
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 5e70918f6c95..ecb3d4cdbc85 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -484,8 +484,7 @@ EXPORT_SYMBOL_GPL(nfs_pgio_header_free);
 static bool nfs_pgio_data_init(struct nfs_pgio_header *hdr,
 			       unsigned int pagecount)
 {
-	if (nfs_pgarray_set(&hdr->data.page_array, pagecount)) {
-		hdr->data.header = hdr;
+	if (nfs_pgarray_set(&hdr->page_array, pagecount)) {
 		atomic_inc(&hdr->refcnt);
 		return true;
 	}
@@ -493,16 +492,14 @@ static bool nfs_pgio_data_init(struct nfs_pgio_header *hdr,
 }
 
 /**
- * nfs_pgio_data_destroy - Properly free pageio data
- * @data: The data to destroy
+ * nfs_pgio_data_destroy - Properly release pageio data
+ * @hdr: The header with data to destroy
  */
-void nfs_pgio_data_destroy(struct nfs_pgio_data *data)
+void nfs_pgio_data_destroy(struct nfs_pgio_header *hdr)
 {
-	struct nfs_pgio_header *hdr = data->header;
-
-	put_nfs_open_context(data->args.context);
-	if (data->page_array.pagevec != data->page_array.page_array)
-		kfree(data->page_array.pagevec);
+	put_nfs_open_context(hdr->args.context);
+	if (hdr->page_array.pagevec != hdr->page_array.page_array)
+		kfree(hdr->page_array.pagevec);
 	if (atomic_dec_and_test(&hdr->refcnt))
 		hdr->completion_ops->completion(hdr);
 }
@@ -510,31 +507,31 @@ EXPORT_SYMBOL_GPL(nfs_pgio_data_destroy);
 
 /**
  * nfs_pgio_rpcsetup - Set up arguments for a pageio call
- * @data: The pageio data
+ * @hdr: The pageio hdr
  * @count: Number of bytes to read
  * @offset: Initial offset
  * @how: How to commit data (writes only)
  * @cinfo: Commit information for the call (writes only)
  */
-static void nfs_pgio_rpcsetup(struct nfs_pgio_data *data,
+static void nfs_pgio_rpcsetup(struct nfs_pgio_header *hdr,
 			      unsigned int count, unsigned int offset,
 			      int how, struct nfs_commit_info *cinfo)
 {
-	struct nfs_page *req = data->header->req;
+	struct nfs_page *req = hdr->req;
 
 	/* Set up the RPC argument and reply structs
-	 * NB: take care not to mess about with data->commit et al. */
+	 * NB: take care not to mess about with hdr->commit et al. */
 
-	data->args.fh     = NFS_FH(data->header->inode);
-	data->args.offset = req_offset(req) + offset;
+	hdr->args.fh     = NFS_FH(hdr->inode);
+	hdr->args.offset = req_offset(req) + offset;
 	/* pnfs_set_layoutcommit needs this */
-	data->mds_offset = data->args.offset;
-	data->args.pgbase = req->wb_pgbase + offset;
-	data->args.pages  = data->page_array.pagevec;
-	data->args.count  = count;
-	data->args.context = get_nfs_open_context(req->wb_context);
-	data->args.lock_context = req->wb_lock_context;
-	data->args.stable  = NFS_UNSTABLE;
+	hdr->mds_offset = hdr->args.offset;
+	hdr->args.pgbase = req->wb_pgbase + offset;
+	hdr->args.pages  = hdr->page_array.pagevec;
+	hdr->args.count  = count;
+	hdr->args.context = get_nfs_open_context(req->wb_context);
+	hdr->args.lock_context = req->wb_lock_context;
+	hdr->args.stable  = NFS_UNSTABLE;
 	switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) {
 	case 0:
 		break;
@@ -542,59 +539,60 @@ static void nfs_pgio_rpcsetup(struct nfs_pgio_data *data,
 		if (nfs_reqs_to_commit(cinfo))
 			break;
 	default:
-		data->args.stable = NFS_FILE_SYNC;
+		hdr->args.stable = NFS_FILE_SYNC;
 	}
 
-	data->res.fattr   = &data->fattr;
-	data->res.count   = count;
-	data->res.eof     = 0;
-	data->res.verf    = &data->writeverf;
-	nfs_fattr_init(&data->fattr);
+	hdr->res.fattr   = &hdr->fattr;
+	hdr->res.count   = count;
+	hdr->res.eof     = 0;
+	hdr->res.verf    = &hdr->writeverf;
+	nfs_fattr_init(&hdr->fattr);
 }
 
 /**
- * nfs_pgio_prepare - Prepare pageio data to go over the wire
+ * nfs_pgio_prepare - Prepare pageio hdr to go over the wire
  * @task: The current task
- * @calldata: pageio data to prepare
+ * @calldata: pageio header to prepare
  */
 static void nfs_pgio_prepare(struct rpc_task *task, void *calldata)
 {
-	struct nfs_pgio_data *data = calldata;
+	struct nfs_pgio_header *hdr = calldata;
 	int err;
-	err = NFS_PROTO(data->header->inode)->pgio_rpc_prepare(task, data);
+	err = NFS_PROTO(hdr->inode)->pgio_rpc_prepare(task, hdr);
 	if (err)
 		rpc_exit(task, err);
 }
 
-int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_data *data,
+int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr,
 		      const struct rpc_call_ops *call_ops, int how, int flags)
 {
+	struct inode *inode = hdr->inode;
 	struct rpc_task *task;
 	struct rpc_message msg = {
-		.rpc_argp = &data->args,
-		.rpc_resp = &data->res,
-		.rpc_cred = data->header->cred,
+		.rpc_argp = &hdr->args,
+		.rpc_resp = &hdr->res,
+		.rpc_cred = hdr->cred,
 	};
 	struct rpc_task_setup task_setup_data = {
 		.rpc_client = clnt,
-		.task = &data->task,
+		.task = &hdr->task,
 		.rpc_message = &msg,
 		.callback_ops = call_ops,
-		.callback_data = data,
+		.callback_data = hdr,
 		.workqueue = nfsiod_workqueue,
 		.flags = RPC_TASK_ASYNC | flags,
 	};
 	int ret = 0;
 
-	data->header->rw_ops->rw_initiate(data, &msg, &task_setup_data, how);
+	hdr->rw_ops->rw_initiate(hdr, &msg, &task_setup_data, how);
 
 	dprintk("NFS: %5u initiated pgio call "
 		"(req %s/%llu, %u bytes @ offset %llu)\n",
-		data->task.tk_pid,
-		data->header->inode->i_sb->s_id,
-		(unsigned long long)NFS_FILEID(data->header->inode),
-		data->args.count,
-		(unsigned long long)data->args.offset);
+		hdr->task.tk_pid,
+		inode->i_sb->s_id,
+		(unsigned long long)NFS_FILEID(inode),
+		hdr->args.count,
+		(unsigned long long)hdr->args.offset);
 
 	task = rpc_run_task(&task_setup_data);
 	if (IS_ERR(task)) {
@@ -621,21 +619,21 @@ static int nfs_pgio_error(struct nfs_pageio_descriptor *desc,
 			  struct nfs_pgio_header *hdr)
 {
 	set_bit(NFS_IOHDR_REDO, &hdr->flags);
-	nfs_pgio_data_destroy(&hdr->data);
+	nfs_pgio_data_destroy(hdr);
 	desc->pg_completion_ops->error_cleanup(&desc->pg_list);
 	return -ENOMEM;
 }
 
 /**
  * nfs_pgio_release - Release pageio data
- * @calldata: The pageio data to release
+ * @calldata: The pageio header to release
  */
 static void nfs_pgio_release(void *calldata)
 {
-	struct nfs_pgio_data *data = calldata;
-	if (data->header->rw_ops->rw_release)
-		data->header->rw_ops->rw_release(data);
-	nfs_pgio_data_destroy(data);
+	struct nfs_pgio_header *hdr = calldata;
+	if (hdr->rw_ops->rw_release)
+		hdr->rw_ops->rw_release(hdr);
+	nfs_pgio_data_destroy(hdr);
 }
 
 /**
@@ -676,22 +674,22 @@ EXPORT_SYMBOL_GPL(nfs_pageio_init);
 /**
  * nfs_pgio_result - Basic pageio error handling
  * @task: The task that ran
- * @calldata: Pageio data to check
+ * @calldata: Pageio header to check
  */
 static void nfs_pgio_result(struct rpc_task *task, void *calldata)
 {
-	struct nfs_pgio_data *data = calldata;
-	struct inode *inode = data->header->inode;
+	struct nfs_pgio_header *hdr = calldata;
+	struct inode *inode = hdr->inode;
 
 	dprintk("NFS: %s: %5u, (status %d)\n", __func__,
 		task->tk_pid, task->tk_status);
 
-	if (data->header->rw_ops->rw_done(task, data, inode) != 0)
+	if (hdr->rw_ops->rw_done(task, hdr, inode) != 0)
 		return;
 	if (task->tk_status < 0)
-		nfs_set_pgio_error(data->header, task->tk_status, data->args.offset);
+		nfs_set_pgio_error(hdr, task->tk_status, hdr->args.offset);
 	else
-		data->header->rw_ops->rw_result(task, data);
+		hdr->rw_ops->rw_result(task, hdr);
 }
 
 /*
@@ -707,7 +705,6 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
 {
 	struct nfs_page		*req;
 	struct page		**pages;
-	struct nfs_pgio_data	*data;
 	struct list_head *head = &desc->pg_list;
 	struct nfs_commit_info cinfo;
 
@@ -715,9 +712,8 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
 			   desc->pg_count)))
 		return nfs_pgio_error(desc, hdr);
 
-	data = &hdr->data;
 	nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
-	pages = data->page_array.pagevec;
+	pages = hdr->page_array.pagevec;
 	while (!list_empty(head)) {
 		req = nfs_list_entry(head->next);
 		nfs_list_remove_request(req);
@@ -730,7 +726,7 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
 		desc->pg_ioflags &= ~FLUSH_COND_STABLE;
 
 	/* Set up the argument struct */
-	nfs_pgio_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo);
+	nfs_pgio_rpcsetup(hdr, desc->pg_count, 0, desc->pg_ioflags, &cinfo);
 	desc->pg_rpc_callops = &nfs_pgio_common_ops;
 	return 0;
 }
@@ -751,7 +747,7 @@ static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc)
 	ret = nfs_generic_pgio(desc, hdr);
 	if (ret == 0)
 		ret = nfs_initiate_pgio(NFS_CLIENT(hdr->inode),
-					&hdr->data, desc->pg_rpc_callops,
+					hdr, desc->pg_rpc_callops,
 					desc->pg_ioflags, 0);
 	if (atomic_dec_and_test(&hdr->refcnt))
 		hdr->completion_ops->completion(hdr);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 067104cce181..ecc911347750 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1502,9 +1502,8 @@ int pnfs_write_done_resend_to_mds(struct inode *inode,
 }
 EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds);
 
-static void pnfs_ld_handle_write_error(struct nfs_pgio_data *data)
+static void pnfs_ld_handle_write_error(struct nfs_pgio_header *hdr)
 {
-	struct nfs_pgio_header *hdr = data->header;
 
 	dprintk("pnfs write error = %d\n", hdr->pnfs_error);
 	if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
@@ -1512,7 +1511,7 @@ static void pnfs_ld_handle_write_error(struct nfs_pgio_data *data)
 		pnfs_return_layout(hdr->inode);
 	}
 	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
-		data->task.tk_status = pnfs_write_done_resend_to_mds(hdr->inode,
+		hdr->task.tk_status = pnfs_write_done_resend_to_mds(hdr->inode,
 							&hdr->pages,
 							hdr->completion_ops,
 							hdr->dreq);
@@ -1521,41 +1520,36 @@ static void pnfs_ld_handle_write_error(struct nfs_pgio_data *data)
 /*
  * Called by non rpc-based layout drivers
  */
-void pnfs_ld_write_done(struct nfs_pgio_data *data)
+void pnfs_ld_write_done(struct nfs_pgio_header *hdr)
 {
-	struct nfs_pgio_header *hdr = data->header;
-
-	trace_nfs4_pnfs_write(data, hdr->pnfs_error);
+	trace_nfs4_pnfs_write(hdr, hdr->pnfs_error);
 	if (!hdr->pnfs_error) {
-		pnfs_set_layoutcommit(data);
-		hdr->mds_ops->rpc_call_done(&data->task, data);
+		pnfs_set_layoutcommit(hdr);
+		hdr->mds_ops->rpc_call_done(&hdr->task, hdr);
 	} else
-		pnfs_ld_handle_write_error(data);
-	hdr->mds_ops->rpc_release(data);
+		pnfs_ld_handle_write_error(hdr);
+	hdr->mds_ops->rpc_release(hdr);
 }
 EXPORT_SYMBOL_GPL(pnfs_ld_write_done);
 
 static void
 pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
-		struct nfs_pgio_data *data)
+		struct nfs_pgio_header *hdr)
 {
-	struct nfs_pgio_header *hdr = data->header;
-
 	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
 		list_splice_tail_init(&hdr->pages, &desc->pg_list);
 		nfs_pageio_reset_write_mds(desc);
 		desc->pg_recoalesce = 1;
 	}
-	nfs_pgio_data_destroy(data);
+	nfs_pgio_data_destroy(hdr);
 }
 
 static enum pnfs_try_status
-pnfs_try_to_write_data(struct nfs_pgio_data *wdata,
+pnfs_try_to_write_data(struct nfs_pgio_header *hdr,
 			const struct rpc_call_ops *call_ops,
 			struct pnfs_layout_segment *lseg,
 			int how)
 {
-	struct nfs_pgio_header *hdr = wdata->header;
 	struct inode *inode = hdr->inode;
 	enum pnfs_try_status trypnfs;
 	struct nfs_server *nfss = NFS_SERVER(inode);
@@ -1563,8 +1557,8 @@ pnfs_try_to_write_data(struct nfs_pgio_data *wdata,
 	hdr->mds_ops = call_ops;
 
 	dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__,
-		inode->i_ino, wdata->args.count, wdata->args.offset, how);
-	trypnfs = nfss->pnfs_curr_ld->write_pagelist(wdata, how);
+		inode->i_ino, hdr->args.count, hdr->args.offset, how);
+	trypnfs = nfss->pnfs_curr_ld->write_pagelist(hdr, how);
 	if (trypnfs != PNFS_NOT_ATTEMPTED)
 		nfs_inc_stats(inode, NFSIOS_PNFS_WRITE);
 	dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
@@ -1575,15 +1569,14 @@ static void
 pnfs_do_write(struct nfs_pageio_descriptor *desc,
 	      struct nfs_pgio_header *hdr, int how)
 {
-	struct nfs_pgio_data *data = &hdr->data;
 	const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
 	struct pnfs_layout_segment *lseg = desc->pg_lseg;
 	enum pnfs_try_status trypnfs;
 
 	desc->pg_lseg = NULL;
-	trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how);
+	trypnfs = pnfs_try_to_write_data(hdr, call_ops, lseg, how);
 	if (trypnfs == PNFS_NOT_ATTEMPTED)
-		pnfs_write_through_mds(desc, data);
+		pnfs_write_through_mds(desc, hdr);
 	pnfs_put_lseg(lseg);
 }
 
@@ -1650,17 +1643,15 @@ int pnfs_read_done_resend_to_mds(struct inode *inode,
 }
 EXPORT_SYMBOL_GPL(pnfs_read_done_resend_to_mds);
 
-static void pnfs_ld_handle_read_error(struct nfs_pgio_data *data)
+static void pnfs_ld_handle_read_error(struct nfs_pgio_header *hdr)
 {
-	struct nfs_pgio_header *hdr = data->header;
-
 	dprintk("pnfs read error = %d\n", hdr->pnfs_error);
 	if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
 	    PNFS_LAYOUTRET_ON_ERROR) {
 		pnfs_return_layout(hdr->inode);
 	}
 	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
-		data->task.tk_status = pnfs_read_done_resend_to_mds(hdr->inode,
+		hdr->task.tk_status = pnfs_read_done_resend_to_mds(hdr->inode,
 							&hdr->pages,
 							hdr->completion_ops,
 							hdr->dreq);
@@ -1669,43 +1660,38 @@ static void pnfs_ld_handle_read_error(struct nfs_pgio_data *data)
 /*
  * Called by non rpc-based layout drivers
  */
-void pnfs_ld_read_done(struct nfs_pgio_data *data)
+void pnfs_ld_read_done(struct nfs_pgio_header *hdr)
 {
-	struct nfs_pgio_header *hdr = data->header;
-
-	trace_nfs4_pnfs_read(data, hdr->pnfs_error);
+	trace_nfs4_pnfs_read(hdr, hdr->pnfs_error);
 	if (likely(!hdr->pnfs_error)) {
-		__nfs4_read_done_cb(data);
-		hdr->mds_ops->rpc_call_done(&data->task, data);
+		__nfs4_read_done_cb(hdr);
+		hdr->mds_ops->rpc_call_done(&hdr->task, hdr);
 	} else
-		pnfs_ld_handle_read_error(data);
-	hdr->mds_ops->rpc_release(data);
+		pnfs_ld_handle_read_error(hdr);
+	hdr->mds_ops->rpc_release(hdr);
 }
 EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
 
 static void
 pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
-		struct nfs_pgio_data *data)
+		struct nfs_pgio_header *hdr)
 {
-	struct nfs_pgio_header *hdr = data->header;
-
 	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
 		list_splice_tail_init(&hdr->pages, &desc->pg_list);
 		nfs_pageio_reset_read_mds(desc);
 		desc->pg_recoalesce = 1;
 	}
-	nfs_pgio_data_destroy(data);
+	nfs_pgio_data_destroy(hdr);
 }
 
 /*
  * Call the appropriate parallel I/O subsystem read function.
  */
 static enum pnfs_try_status
-pnfs_try_to_read_data(struct nfs_pgio_data *rdata,
+pnfs_try_to_read_data(struct nfs_pgio_header *hdr,
 		       const struct rpc_call_ops *call_ops,
 		       struct pnfs_layout_segment *lseg)
 {
-	struct nfs_pgio_header *hdr = rdata->header;
 	struct inode *inode = hdr->inode;
 	struct nfs_server *nfss = NFS_SERVER(inode);
 	enum pnfs_try_status trypnfs;
@@ -1713,9 +1699,9 @@ pnfs_try_to_read_data(struct nfs_pgio_data *rdata,
 	hdr->mds_ops = call_ops;
 
 	dprintk("%s: Reading ino:%lu %u@%llu\n",
-		__func__, inode->i_ino, rdata->args.count, rdata->args.offset);
+		__func__, inode->i_ino, hdr->args.count, hdr->args.offset);
 
-	trypnfs = nfss->pnfs_curr_ld->read_pagelist(rdata);
+	trypnfs = nfss->pnfs_curr_ld->read_pagelist(hdr);
 	if (trypnfs != PNFS_NOT_ATTEMPTED)
 		nfs_inc_stats(inode, NFSIOS_PNFS_READ);
 	dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
@@ -1725,15 +1711,14 @@ pnfs_try_to_read_data(struct nfs_pgio_data *rdata,
 static void
 pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr)
 {
-	struct nfs_pgio_data *data = &hdr->data;
 	const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
 	struct pnfs_layout_segment *lseg = desc->pg_lseg;
 	enum pnfs_try_status trypnfs;
 
 	desc->pg_lseg = NULL;
-	trypnfs = pnfs_try_to_read_data(data, call_ops, lseg);
+	trypnfs = pnfs_try_to_read_data(hdr, call_ops, lseg);
 	if (trypnfs == PNFS_NOT_ATTEMPTED)
-		pnfs_read_through_mds(desc, data);
+		pnfs_read_through_mds(desc, hdr);
 	pnfs_put_lseg(lseg);
 }
 
@@ -1816,12 +1801,11 @@ void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg)
 EXPORT_SYMBOL_GPL(pnfs_set_lo_fail);
 
 void
-pnfs_set_layoutcommit(struct nfs_pgio_data *wdata)
+pnfs_set_layoutcommit(struct nfs_pgio_header *hdr)
 {
-	struct nfs_pgio_header *hdr = wdata->header;
 	struct inode *inode = hdr->inode;
 	struct nfs_inode *nfsi = NFS_I(inode);
-	loff_t end_pos = wdata->mds_offset + wdata->res.count;
+	loff_t end_pos = hdr->mds_offset + hdr->res.count;
 	bool mark_as_dirty = false;
 
 	spin_lock(&inode->i_lock);
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 4fb309a2b4c4..a4a58be94064 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -113,8 +113,8 @@ struct pnfs_layoutdriver_type {
 	 * Return PNFS_ATTEMPTED to indicate the layout code has attempted
 	 * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS
 	 */
-	enum pnfs_try_status (*read_pagelist) (struct nfs_pgio_data *nfs_data);
-	enum pnfs_try_status (*write_pagelist) (struct nfs_pgio_data *nfs_data, int how);
+	enum pnfs_try_status (*read_pagelist)(struct nfs_pgio_header *);
+	enum pnfs_try_status (*write_pagelist)(struct nfs_pgio_header *, int);
 
 	void (*free_deviceid_node) (struct nfs4_deviceid_node *);
 
@@ -213,13 +213,13 @@ bool pnfs_roc(struct inode *ino);
 void pnfs_roc_release(struct inode *ino);
 void pnfs_roc_set_barrier(struct inode *ino, u32 barrier);
 bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task);
-void pnfs_set_layoutcommit(struct nfs_pgio_data *wdata);
+void pnfs_set_layoutcommit(struct nfs_pgio_header *);
 void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data);
 int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
 int _pnfs_return_layout(struct inode *);
 int pnfs_commit_and_return_layout(struct inode *);
-void pnfs_ld_write_done(struct nfs_pgio_data *);
-void pnfs_ld_read_done(struct nfs_pgio_data *);
+void pnfs_ld_write_done(struct nfs_pgio_header *);
+void pnfs_ld_read_done(struct nfs_pgio_header *);
 struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
 					       struct nfs_open_context *ctx,
 					       loff_t pos,
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index c171ce1a8a30..b09cc23d6f43 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -578,46 +578,49 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
 	return 0;
 }
 
-static int nfs_read_done(struct rpc_task *task, struct nfs_pgio_data *data)
+static int nfs_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
 {
-	struct inode *inode = data->header->inode;
+	struct inode *inode = hdr->inode;
 
 	nfs_invalidate_atime(inode);
 	if (task->tk_status >= 0) {
-		nfs_refresh_inode(inode, data->res.fattr);
+		nfs_refresh_inode(inode, hdr->res.fattr);
 		/* Emulate the eof flag, which isn't normally needed in NFSv2
 		 * as it is guaranteed to always return the file attributes
 		 */
-		if (data->args.offset + data->res.count >= data->res.fattr->size)
-			data->res.eof = 1;
+		if (hdr->args.offset + hdr->res.count >= hdr->res.fattr->size)
+			hdr->res.eof = 1;
 	}
 	return 0;
 }
 
-static void nfs_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
+static void nfs_proc_read_setup(struct nfs_pgio_header *hdr,
+				struct rpc_message *msg)
 {
 	msg->rpc_proc = &nfs_procedures[NFSPROC_READ];
 }
 
-static int nfs_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data)
+static int nfs_proc_pgio_rpc_prepare(struct rpc_task *task,
+				     struct nfs_pgio_header *hdr)
 {
 	rpc_call_start(task);
 	return 0;
 }
 
-static int nfs_write_done(struct rpc_task *task, struct nfs_pgio_data *data)
+static int nfs_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
 {
-	struct inode *inode = data->header->inode;
+	struct inode *inode = hdr->inode;
 
 	if (task->tk_status >= 0)
-		nfs_post_op_update_inode_force_wcc(inode, data->res.fattr);
+		nfs_post_op_update_inode_force_wcc(inode, hdr->res.fattr);
 	return 0;
 }
 
-static void nfs_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
+static void nfs_proc_write_setup(struct nfs_pgio_header *hdr,
+				 struct rpc_message *msg)
 {
 	/* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */
-	data->args.stable = NFS_FILE_SYNC;
+	hdr->args.stable = NFS_FILE_SYNC;
 	msg->rpc_proc = &nfs_procedures[NFSPROC_WRITE];
 }
 
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index d9df4ab3737b..b1532b73fea3 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -172,14 +172,15 @@ out:
 	hdr->release(hdr);
 }
 
-static void nfs_initiate_read(struct nfs_pgio_data *data, struct rpc_message *msg,
+static void nfs_initiate_read(struct nfs_pgio_header *hdr,
+			      struct rpc_message *msg,
 			      struct rpc_task_setup *task_setup_data, int how)
 {
-	struct inode *inode = data->header->inode;
+	struct inode *inode = hdr->inode;
 	int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0;
 
 	task_setup_data->flags |= swap_flags;
-	NFS_PROTO(inode)->read_setup(data, msg);
+	NFS_PROTO(inode)->read_setup(hdr, msg);
 }
 
 static void
@@ -203,14 +204,15 @@ static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops = {
  * This is the callback from RPC telling us whether a reply was
  * received or some error occurred (timeout or socket shutdown).
  */
-static int nfs_readpage_done(struct rpc_task *task, struct nfs_pgio_data *data,
+static int nfs_readpage_done(struct rpc_task *task,
+			     struct nfs_pgio_header *hdr,
 			     struct inode *inode)
 {
-	int status = NFS_PROTO(inode)->read_done(task, data);
+	int status = NFS_PROTO(inode)->read_done(task, hdr);
 	if (status != 0)
 		return status;
 
-	nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, data->res.count);
+	nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, hdr->res.count);
 
 	if (task->tk_status == -ESTALE) {
 		set_bit(NFS_INO_STALE, &NFS_I(inode)->flags);
@@ -219,34 +221,34 @@ static int nfs_readpage_done(struct rpc_task *task, struct nfs_pgio_data *data,
 	return 0;
 }
 
-static void nfs_readpage_retry(struct rpc_task *task, struct nfs_pgio_data *data)
+static void nfs_readpage_retry(struct rpc_task *task,
+			       struct nfs_pgio_header *hdr)
 {
-	struct nfs_pgio_args *argp = &data->args;
-	struct nfs_pgio_res  *resp = &data->res;
+	struct nfs_pgio_args *argp = &hdr->args;
+	struct nfs_pgio_res  *resp = &hdr->res;
 
 	/* This is a short read! */
-	nfs_inc_stats(data->header->inode, NFSIOS_SHORTREAD);
+	nfs_inc_stats(hdr->inode, NFSIOS_SHORTREAD);
 	/* Has the server at least made some progress? */
 	if (resp->count == 0) {
-		nfs_set_pgio_error(data->header, -EIO, argp->offset);
+		nfs_set_pgio_error(hdr, -EIO, argp->offset);
 		return;
 	}
-	/* Yes, so retry the read at the end of the data */
-	data->mds_offset += resp->count;
+	/* Yes, so retry the read at the end of the hdr */
+	hdr->mds_offset += resp->count;
 	argp->offset += resp->count;
 	argp->pgbase += resp->count;
 	argp->count -= resp->count;
 	rpc_restart_call_prepare(task);
 }
 
-static void nfs_readpage_result(struct rpc_task *task, struct nfs_pgio_data *data)
+static void nfs_readpage_result(struct rpc_task *task,
+				struct nfs_pgio_header *hdr)
 {
-	struct nfs_pgio_header *hdr = data->header;
-
-	if (data->res.eof) {
+	if (hdr->res.eof) {
 		loff_t bound;
 
-		bound = data->args.offset + data->res.count;
+		bound = hdr->args.offset + hdr->res.count;
 		spin_lock(&hdr->lock);
 		if (bound < hdr->io_start + hdr->good_bytes) {
 			set_bit(NFS_IOHDR_EOF, &hdr->flags);
@@ -254,8 +256,8 @@ static void nfs_readpage_result(struct rpc_task *task, struct nfs_pgio_data *dat
 			hdr->good_bytes = bound - hdr->io_start;
 		}
 		spin_unlock(&hdr->lock);
-	} else if (data->res.count != data->args.count)
-		nfs_readpage_retry(task, data);
+	} else if (hdr->res.count != hdr->args.count)
+		nfs_readpage_retry(task, hdr);
 }
 
 /*
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 6afe0f679420..6a2d0986a3a3 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -596,11 +596,11 @@ nfs_clear_request_commit(struct nfs_page *req)
 }
 
 static inline
-int nfs_write_need_commit(struct nfs_pgio_data *data)
+int nfs_write_need_commit(struct nfs_pgio_header *hdr)
 {
-	if (data->writeverf.committed == NFS_DATA_SYNC)
-		return data->header->lseg == NULL;
-	return data->writeverf.committed != NFS_FILE_SYNC;
+	if (hdr->writeverf.committed == NFS_DATA_SYNC)
+		return hdr->lseg == NULL;
+	return hdr->writeverf.committed != NFS_FILE_SYNC;
 }
 
 #else
@@ -627,7 +627,7 @@ nfs_clear_request_commit(struct nfs_page *req)
 }
 
 static inline
-int nfs_write_need_commit(struct nfs_pgio_data *data)
+int nfs_write_need_commit(struct nfs_pgio_header *hdr)
 {
 	return 0;
 }
@@ -1013,17 +1013,18 @@ static int flush_task_priority(int how)
 	return RPC_PRIORITY_NORMAL;
 }
 
-static void nfs_initiate_write(struct nfs_pgio_data *data, struct rpc_message *msg,
+static void nfs_initiate_write(struct nfs_pgio_header *hdr,
+			       struct rpc_message *msg,
 			       struct rpc_task_setup *task_setup_data, int how)
 {
-	struct inode *inode = data->header->inode;
+	struct inode *inode = hdr->inode;
 	int priority = flush_task_priority(how);
 
 	task_setup_data->priority = priority;
-	NFS_PROTO(inode)->write_setup(data, msg);
+	NFS_PROTO(inode)->write_setup(hdr, msg);
 
 	nfs4_state_protect_write(NFS_SERVER(inode)->nfs_client,
-				 &task_setup_data->rpc_client, msg, data);
+				 &task_setup_data->rpc_client, msg, hdr);
 }
 
 /* If a nfs_flush_* function fails, it should remove reqs from @head and
@@ -1085,19 +1086,17 @@ void nfs_commit_prepare(struct rpc_task *task, void *calldata)
 	NFS_PROTO(data->inode)->commit_rpc_prepare(task, data);
 }
 
-static void nfs_writeback_release_common(struct nfs_pgio_data *data)
+static void nfs_writeback_release_common(struct nfs_pgio_header *hdr)
 {
-	struct nfs_pgio_header *hdr = data->header;
-	int status = data->task.tk_status;
+	int status = hdr->task.tk_status;
 
-	if ((status >= 0) && nfs_write_need_commit(data)) {
+	if ((status >= 0) && nfs_write_need_commit(hdr)) {
 		spin_lock(&hdr->lock);
 		if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags))
 			; /* Do nothing */
 		else if (!test_and_set_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags))
-			memcpy(&hdr->verf, &data->writeverf, sizeof(hdr->verf));
-		else if (memcmp(&hdr->verf, &data->writeverf,
-			 sizeof(hdr->verf)))
+			memcpy(&hdr->verf, &hdr->writeverf, sizeof(hdr->verf));
+		else if (memcmp(&hdr->verf, &hdr->writeverf, sizeof(hdr->verf)))
 			set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags);
 		spin_unlock(&hdr->lock);
 	}
@@ -1131,7 +1130,8 @@ static int nfs_should_remove_suid(const struct inode *inode)
 /*
  * This function is called when the WRITE call is complete.
  */
-static int nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data,
+static int nfs_writeback_done(struct rpc_task *task,
+			      struct nfs_pgio_header *hdr,
 			      struct inode *inode)
 {
 	int status;
@@ -1143,13 +1143,14 @@ static int nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data,
 	 * another writer had changed the file, but some applications
 	 * depend on tighter cache coherency when writing.
 	 */
-	status = NFS_PROTO(inode)->write_done(task, data);
+	status = NFS_PROTO(inode)->write_done(task, hdr);
 	if (status != 0)
 		return status;
-	nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, data->res.count);
+	nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, hdr->res.count);
 
 #if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
-	if (data->res.verf->committed < data->args.stable && task->tk_status >= 0) {
+	if (hdr->res.verf->committed < hdr->args.stable &&
+	    task->tk_status >= 0) {
 		/* We tried a write call, but the server did not
 		 * commit data to stable storage even though we
 		 * requested it.
@@ -1165,7 +1166,7 @@ static int nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data,
 			dprintk("NFS:       faulty NFS server %s:"
 				" (committed = %d) != (stable = %d)\n",
 				NFS_SERVER(inode)->nfs_client->cl_hostname,
-				data->res.verf->committed, data->args.stable);
+				hdr->res.verf->committed, hdr->args.stable);
 			complain = jiffies + 300 * HZ;
 		}
 	}
@@ -1180,16 +1181,17 @@ static int nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data,
 /*
  * This function is called when the WRITE call is complete.
  */
-static void nfs_writeback_result(struct rpc_task *task, struct nfs_pgio_data *data)
+static void nfs_writeback_result(struct rpc_task *task,
+				 struct nfs_pgio_header *hdr)
 {
-	struct nfs_pgio_args	*argp = &data->args;
-	struct nfs_pgio_res	*resp = &data->res;
+	struct nfs_pgio_args	*argp = &hdr->args;
+	struct nfs_pgio_res	*resp = &hdr->res;
 
 	if (resp->count < argp->count) {
 		static unsigned long    complain;
 
 		/* This a short write! */
-		nfs_inc_stats(data->header->inode, NFSIOS_SHORTWRITE);
+		nfs_inc_stats(hdr->inode, NFSIOS_SHORTWRITE);
 
 		/* Has the server at least made some progress? */
 		if (resp->count == 0) {
@@ -1199,14 +1201,14 @@ static void nfs_writeback_result(struct rpc_task *task, struct nfs_pgio_data *da
 				       argp->count);
 				complain = jiffies + 300 * HZ;
 			}
-			nfs_set_pgio_error(data->header, -EIO, argp->offset);
+			nfs_set_pgio_error(hdr, -EIO, argp->offset);
 			task->tk_status = -EIO;
 			return;
 		}
 		/* Was this an NFSv2 write or an NFSv3 stable write? */
 		if (resp->verf->committed != NFS_UNSTABLE) {
 			/* Resend from where the server left off */
-			data->mds_offset += resp->count;
+			hdr->mds_offset += resp->count;
 			argp->offset += resp->count;
 			argp->pgbase += resp->count;
 			argp->count -= resp->count;
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 43592651cd5a..d0fae7b78252 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -64,10 +64,11 @@ struct nfs_rw_ops {
 	const fmode_t rw_mode;
 	struct nfs_pgio_header *(*rw_alloc_header)(void);
 	void (*rw_free_header)(struct nfs_pgio_header *);
-	void (*rw_release)(struct nfs_pgio_data *);
-	int  (*rw_done)(struct rpc_task *, struct nfs_pgio_data *, struct inode *);
-	void (*rw_result)(struct rpc_task *, struct nfs_pgio_data *);
-	void (*rw_initiate)(struct nfs_pgio_data *, struct rpc_message *,
+	void (*rw_release)(struct nfs_pgio_header *);
+	int  (*rw_done)(struct rpc_task *, struct nfs_pgio_header *,
+			struct inode *);
+	void (*rw_result)(struct rpc_task *, struct nfs_pgio_header *);
+	void (*rw_initiate)(struct nfs_pgio_header *, struct rpc_message *,
 			    struct rpc_task_setup *, int);
 };
 
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index bb18dba1aefe..efeaf7690b51 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1257,27 +1257,10 @@ enum {
 	NFS_IOHDR_NEED_RESCHED,
 };
 
-struct nfs_pgio_data {
-	struct nfs_pgio_header	*header;
-	struct list_head	list;
-	struct rpc_task		task;
-	struct nfs_fattr	fattr;
-	struct nfs_writeverf	writeverf;	/* Used for writes */
-	struct nfs_pgio_args	args;		/* argument struct */
-	struct nfs_pgio_res	res;		/* result struct */
-	unsigned long		timestamp;	/* For lease renewal */
-	int (*pgio_done_cb)(struct rpc_task *task, struct nfs_pgio_data *data);
-	__u64			mds_offset;	/* Filelayout dense stripe */
-	struct nfs_page_array	page_array;
-	struct nfs_client	*ds_clp;	/* pNFS data server */
-	int			ds_idx;		/* ds index if ds_clp is set */
-};
-
 struct nfs_pgio_header {
 	struct inode		*inode;
 	struct rpc_cred		*cred;
 	struct list_head	pages;
-	struct nfs_pgio_data	data;
 	atomic_t		refcnt;
 	struct nfs_page		*req;
 	struct nfs_writeverf	verf;		/* Used for writes */
@@ -1295,6 +1278,21 @@ struct nfs_pgio_header {
 	int			error;		/* merge with pnfs_error */
 	unsigned long		good_bytes;	/* boundary of good data */
 	unsigned long		flags;
+
+	/*
+	 * rpc data
+	 */
+	struct rpc_task		task;
+	struct nfs_fattr	fattr;
+	struct nfs_writeverf	writeverf;	/* Used for writes */
+	struct nfs_pgio_args	args;		/* argument struct */
+	struct nfs_pgio_res	res;		/* result struct */
+	unsigned long		timestamp;	/* For lease renewal */
+	int (*pgio_done_cb)(struct rpc_task *, struct nfs_pgio_header *);
+	__u64			mds_offset;	/* Filelayout dense stripe */
+	struct nfs_page_array	page_array;
+	struct nfs_client	*ds_clp;	/* pNFS data server */
+	int			ds_idx;		/* ds index if ds_clp is set */
 };
 
 struct nfs_mds_commit_info {
@@ -1426,11 +1424,12 @@ struct nfs_rpc_ops {
 			     struct nfs_pathconf *);
 	int	(*set_capabilities)(struct nfs_server *, struct nfs_fh *);
 	int	(*decode_dirent)(struct xdr_stream *, struct nfs_entry *, int);
-	int	(*pgio_rpc_prepare)(struct rpc_task *, struct nfs_pgio_data *);
-	void	(*read_setup)   (struct nfs_pgio_data *, struct rpc_message *);
-	int	(*read_done)  (struct rpc_task *, struct nfs_pgio_data *);
-	void	(*write_setup)  (struct nfs_pgio_data *, struct rpc_message *);
-	int	(*write_done)  (struct rpc_task *, struct nfs_pgio_data *);
+	int	(*pgio_rpc_prepare)(struct rpc_task *,
+				    struct nfs_pgio_header *);
+	void	(*read_setup)(struct nfs_pgio_header *, struct rpc_message *);
+	int	(*read_done)(struct rpc_task *, struct nfs_pgio_header *);
+	void	(*write_setup)(struct nfs_pgio_header *, struct rpc_message *);
+	int	(*write_done)(struct rpc_task *, struct nfs_pgio_header *);
 	void	(*commit_setup) (struct nfs_commit_data *, struct rpc_message *);
 	void	(*commit_rpc_prepare)(struct rpc_task *, struct nfs_commit_data *);
 	int	(*commit_done) (struct rpc_task *, struct nfs_commit_data *);
-- 
cgit v1.2.3


From c65e6254ca4db1584c5bf5f228ee26556477a9fd Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@primarydata.com>
Date: Mon, 9 Jun 2014 11:48:36 -0400
Subject: nfs: remove unused writeverf code

Remove duplicate writeverf structure from merge of nfs_pgio_header and
nfs_pgio_data and remove writeverf related flags and logic to handle
more than one RPC per nfs_pgio_header.

Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/blocklayout/blocklayout.c |  2 +-
 fs/nfs/direct.c                  | 25 ++++++++-----------------
 fs/nfs/internal.h                |  1 +
 fs/nfs/objlayout/objlayout.c     |  4 ++--
 fs/nfs/pagelist.c                |  2 +-
 fs/nfs/write.c                   | 27 +++++----------------------
 include/linux/nfs_xdr.h          |  3 ---
 7 files changed, 18 insertions(+), 46 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index c3ccfe440390..04ac32b339f8 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -445,7 +445,7 @@ static void bl_end_par_io_write(void *data, int num_se)
 	}
 
 	hdr->task.tk_status = hdr->pnfs_error;
-	hdr->writeverf.committed = NFS_FILE_SYNC;
+	hdr->verf.committed = NFS_FILE_SYNC;
 	INIT_WORK(&hdr->task.u.tk_work, bl_write_cleanup);
 	schedule_work(&hdr->task.u.tk_work);
 }
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 6c4c867ee04c..2a3293a5dda0 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -715,7 +715,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
 {
 	struct nfs_direct_req *dreq = hdr->dreq;
 	struct nfs_commit_info cinfo;
-	int bit = -1;
+	bool request_commit = false;
 	struct nfs_page *req = nfs_list_entry(hdr->pages.next);
 
 	if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
@@ -729,27 +729,20 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
 		dreq->flags = 0;
 		dreq->error = hdr->error;
 	}
-	if (dreq->error != 0)
-		bit = NFS_IOHDR_ERROR;
-	else {
+	if (dreq->error == 0) {
 		dreq->count += hdr->good_bytes;
-		if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) {
-			dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
-			bit = NFS_IOHDR_NEED_RESCHED;
-		} else if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) {
+		if (nfs_write_need_commit(hdr)) {
 			if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES)
-				bit = NFS_IOHDR_NEED_RESCHED;
+				request_commit = true;
 			else if (dreq->flags == 0) {
 				nfs_direct_set_hdr_verf(dreq, hdr);
-				bit = NFS_IOHDR_NEED_COMMIT;
+				request_commit = true;
 				dreq->flags = NFS_ODIRECT_DO_COMMIT;
 			} else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) {
-				if (nfs_direct_set_or_cmp_hdr_verf(dreq, hdr)) {
+				request_commit = true;
+				if (nfs_direct_set_or_cmp_hdr_verf(dreq, hdr))
 					dreq->flags =
 						NFS_ODIRECT_RESCHED_WRITES;
-					bit = NFS_IOHDR_NEED_RESCHED;
-				} else
-					bit = NFS_IOHDR_NEED_COMMIT;
 			}
 		}
 	}
@@ -760,9 +753,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
 
 		req = nfs_list_entry(hdr->pages.next);
 		nfs_list_remove_request(req);
-		switch (bit) {
-		case NFS_IOHDR_NEED_RESCHED:
-		case NFS_IOHDR_NEED_COMMIT:
+		if (request_commit) {
 			kref_get(&req->wb_kref);
 			nfs_mark_request_commit(req, hdr->lseg, &cinfo);
 			do_destroy = false;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 3f3aedd2e8c9..da36257628c5 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -441,6 +441,7 @@ int nfs_scan_commit(struct inode *inode, struct list_head *dst,
 void nfs_mark_request_commit(struct nfs_page *req,
 			     struct pnfs_layout_segment *lseg,
 			     struct nfs_commit_info *cinfo);
+int nfs_write_need_commit(struct nfs_pgio_header *);
 int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
 			    int how, struct nfs_commit_info *cinfo);
 void nfs_retry_commit(struct list_head *page_list,
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c
index 86312787cee6..697a16d11fac 100644
--- a/fs/nfs/objlayout/objlayout.c
+++ b/fs/nfs/objlayout/objlayout.c
@@ -328,7 +328,7 @@ objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
 	oir->status = hdr->task.tk_status = status;
 	if (status >= 0) {
 		hdr->res.count = status;
-		hdr->writeverf.committed = oir->committed;
+		hdr->verf.committed = oir->committed;
 	} else {
 		hdr->pnfs_error = status;
 	}
@@ -336,7 +336,7 @@ objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
 	/* must not use oir after this point */
 
 	dprintk("%s: Return status %zd committed %d sync=%d\n", __func__,
-		status, hdr->writeverf.committed, sync);
+		status, hdr->verf.committed, sync);
 
 	if (sync)
 		pnfs_ld_write_done(hdr);
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index ecb3d4cdbc85..7dd0d5f101a4 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -545,7 +545,7 @@ static void nfs_pgio_rpcsetup(struct nfs_pgio_header *hdr,
 	hdr->res.fattr   = &hdr->fattr;
 	hdr->res.count   = count;
 	hdr->res.eof     = 0;
-	hdr->res.verf    = &hdr->writeverf;
+	hdr->res.verf    = &hdr->verf;
 	nfs_fattr_init(&hdr->fattr);
 }
 
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 6a2d0986a3a3..8534ee5c207d 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -595,12 +595,11 @@ nfs_clear_request_commit(struct nfs_page *req)
 	}
 }
 
-static inline
 int nfs_write_need_commit(struct nfs_pgio_header *hdr)
 {
-	if (hdr->writeverf.committed == NFS_DATA_SYNC)
+	if (hdr->verf.committed == NFS_DATA_SYNC)
 		return hdr->lseg == NULL;
-	return hdr->writeverf.committed != NFS_FILE_SYNC;
+	return hdr->verf.committed != NFS_FILE_SYNC;
 }
 
 #else
@@ -626,7 +625,6 @@ nfs_clear_request_commit(struct nfs_page *req)
 {
 }
 
-static inline
 int nfs_write_need_commit(struct nfs_pgio_header *hdr)
 {
 	return 0;
@@ -654,11 +652,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
 			nfs_context_set_write_error(req->wb_context, hdr->error);
 			goto remove_req;
 		}
-		if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) {
-			nfs_mark_request_dirty(req);
-			goto next;
-		}
-		if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) {
+		if (nfs_write_need_commit(hdr)) {
 			memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf));
 			nfs_mark_request_commit(req, hdr->lseg, &cinfo);
 			goto next;
@@ -668,7 +662,7 @@ remove_req:
 next:
 		nfs_unlock_request(req);
 		nfs_end_page_writeback(req);
-		do_destroy = !test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags);
+		do_destroy = !nfs_write_need_commit(hdr);
 		nfs_release_request(req);
 	}
 out:
@@ -1088,18 +1082,7 @@ void nfs_commit_prepare(struct rpc_task *task, void *calldata)
 
 static void nfs_writeback_release_common(struct nfs_pgio_header *hdr)
 {
-	int status = hdr->task.tk_status;
-
-	if ((status >= 0) && nfs_write_need_commit(hdr)) {
-		spin_lock(&hdr->lock);
-		if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags))
-			; /* Do nothing */
-		else if (!test_and_set_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags))
-			memcpy(&hdr->verf, &hdr->writeverf, sizeof(hdr->verf));
-		else if (memcmp(&hdr->verf, &hdr->writeverf, sizeof(hdr->verf)))
-			set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags);
-		spin_unlock(&hdr->lock);
-	}
+	/* do nothing! */
 }
 
 /*
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index efeaf7690b51..e1b7b3b7c40f 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1253,8 +1253,6 @@ enum {
 	NFS_IOHDR_ERROR = 0,
 	NFS_IOHDR_EOF,
 	NFS_IOHDR_REDO,
-	NFS_IOHDR_NEED_COMMIT,
-	NFS_IOHDR_NEED_RESCHED,
 };
 
 struct nfs_pgio_header {
@@ -1284,7 +1282,6 @@ struct nfs_pgio_header {
 	 */
 	struct rpc_task		task;
 	struct nfs_fattr	fattr;
-	struct nfs_writeverf	writeverf;	/* Used for writes */
 	struct nfs_pgio_args	args;		/* argument struct */
 	struct nfs_pgio_res	res;		/* result struct */
 	unsigned long		timestamp;	/* For lease renewal */
-- 
cgit v1.2.3


From 4714fb51fd03a14d8c73001438283e7f7b752f1e Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@primarydata.com>
Date: Mon, 9 Jun 2014 11:48:37 -0400
Subject: nfs: remove pgio_header refcount, related cleanup

The refcounting on nfs_pgio_header was related to there being (possibly)
more than one nfs_pgio_data. Now that nfs_pgio_data has been merged into
nfs_pgio_header, there is no reason to do this ref counting.  Just call
the completion callback on nfs_pgio_release/nfs_pgio_error.

Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/pagelist.c       | 36 +++++++++++-------------------------
 fs/nfs/pnfs.c           |  6 ------
 include/linux/nfs_xdr.h |  1 -
 3 files changed, 11 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 7dd0d5f101a4..580fc0c982e6 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -459,7 +459,6 @@ struct nfs_pgio_header *nfs_pgio_header_alloc(const struct nfs_rw_ops *ops)
 	if (hdr) {
 		INIT_LIST_HEAD(&hdr->pages);
 		spin_lock_init(&hdr->lock);
-		atomic_set(&hdr->refcnt, 0);
 		hdr->rw_ops = ops;
 	}
 	return hdr;
@@ -477,31 +476,18 @@ void nfs_pgio_header_free(struct nfs_pgio_header *hdr)
 EXPORT_SYMBOL_GPL(nfs_pgio_header_free);
 
 /**
- * nfs_pgio_data_alloc - Allocate pageio data
- * @hdr: The header making a request
- * @pagecount: Number of pages to create
- */
-static bool nfs_pgio_data_init(struct nfs_pgio_header *hdr,
-			       unsigned int pagecount)
-{
-	if (nfs_pgarray_set(&hdr->page_array, pagecount)) {
-		atomic_inc(&hdr->refcnt);
-		return true;
-	}
-	return false;
-}
-
-/**
- * nfs_pgio_data_destroy - Properly release pageio data
- * @hdr: The header with data to destroy
+ * nfs_pgio_data_destroy - make @hdr suitable for reuse
+ *
+ * Frees memory and releases refs from nfs_generic_pgio, so that it may
+ * be called again.
+ *
+ * @hdr: A header that has had nfs_generic_pgio called
  */
 void nfs_pgio_data_destroy(struct nfs_pgio_header *hdr)
 {
 	put_nfs_open_context(hdr->args.context);
 	if (hdr->page_array.pagevec != hdr->page_array.page_array)
 		kfree(hdr->page_array.pagevec);
-	if (atomic_dec_and_test(&hdr->refcnt))
-		hdr->completion_ops->completion(hdr);
 }
 EXPORT_SYMBOL_GPL(nfs_pgio_data_destroy);
 
@@ -620,6 +606,7 @@ static int nfs_pgio_error(struct nfs_pageio_descriptor *desc,
 {
 	set_bit(NFS_IOHDR_REDO, &hdr->flags);
 	nfs_pgio_data_destroy(hdr);
+	hdr->completion_ops->completion(hdr);
 	desc->pg_completion_ops->error_cleanup(&desc->pg_list);
 	return -ENOMEM;
 }
@@ -634,6 +621,7 @@ static void nfs_pgio_release(void *calldata)
 	if (hdr->rw_ops->rw_release)
 		hdr->rw_ops->rw_release(hdr);
 	nfs_pgio_data_destroy(hdr);
+	hdr->completion_ops->completion(hdr);
 }
 
 /**
@@ -707,9 +695,10 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
 	struct page		**pages;
 	struct list_head *head = &desc->pg_list;
 	struct nfs_commit_info cinfo;
+	unsigned int pagecount;
 
-	if (!nfs_pgio_data_init(hdr, nfs_page_array_len(desc->pg_base,
-			   desc->pg_count)))
+	pagecount = nfs_page_array_len(desc->pg_base, desc->pg_count);
+	if (!nfs_pgarray_set(&hdr->page_array, pagecount))
 		return nfs_pgio_error(desc, hdr);
 
 	nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
@@ -743,14 +732,11 @@ static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc)
 		return -ENOMEM;
 	}
 	nfs_pgheader_init(desc, hdr, nfs_pgio_header_free);
-	atomic_inc(&hdr->refcnt);
 	ret = nfs_generic_pgio(desc, hdr);
 	if (ret == 0)
 		ret = nfs_initiate_pgio(NFS_CLIENT(hdr->inode),
 					hdr, desc->pg_rpc_callops,
 					desc->pg_ioflags, 0);
-	if (atomic_dec_and_test(&hdr->refcnt))
-		hdr->completion_ops->completion(hdr);
 	return ret;
 }
 
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index ecc911347750..ecbed4632d11 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1602,15 +1602,12 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
 	}
 	nfs_pgheader_init(desc, hdr, pnfs_writehdr_free);
 	hdr->lseg = pnfs_get_lseg(desc->pg_lseg);
-	atomic_inc(&hdr->refcnt);
 	ret = nfs_generic_pgio(desc, hdr);
 	if (ret != 0) {
 		pnfs_put_lseg(desc->pg_lseg);
 		desc->pg_lseg = NULL;
 	} else
 		pnfs_do_write(desc, hdr, desc->pg_ioflags);
-	if (atomic_dec_and_test(&hdr->refcnt))
-		hdr->completion_ops->completion(hdr);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages);
@@ -1745,15 +1742,12 @@ pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
 	}
 	nfs_pgheader_init(desc, hdr, pnfs_readhdr_free);
 	hdr->lseg = pnfs_get_lseg(desc->pg_lseg);
-	atomic_inc(&hdr->refcnt);
 	ret = nfs_generic_pgio(desc, hdr);
 	if (ret != 0) {
 		pnfs_put_lseg(desc->pg_lseg);
 		desc->pg_lseg = NULL;
 	} else
 		pnfs_do_read(desc, hdr);
-	if (atomic_dec_and_test(&hdr->refcnt))
-		hdr->completion_ops->completion(hdr);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages);
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index e1b7b3b7c40f..81cbbf313272 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1259,7 +1259,6 @@ struct nfs_pgio_header {
 	struct inode		*inode;
 	struct rpc_cred		*cred;
 	struct list_head	pages;
-	atomic_t		refcnt;
 	struct nfs_page		*req;
 	struct nfs_writeverf	verf;		/* Used for writes */
 	struct pnfs_layout_segment *lseg;
-- 
cgit v1.2.3


From 53113ad35e4b9ce82d949c7c67c7b666fad5d907 Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@primarydata.com>
Date: Mon, 9 Jun 2014 11:48:38 -0400
Subject: pnfs: clean up *_resend_to_mds

Clean up pnfs_read_done_resend_to_mds and pnfs_write_done_resend_to_mds:
 - instead of passing all arguments from a nfs_pgio_header, just pass the header
 - share the common code

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/filelayout/filelayout.c | 10 ++-----
 fs/nfs/pagelist.c              | 32 +++++++++++++++++++++
 fs/nfs/pnfs.c                  | 63 ++++++------------------------------------
 fs/nfs/pnfs.h                  |  8 ++----
 include/linux/nfs_page.h       |  2 ++
 5 files changed, 47 insertions(+), 68 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index 537e7f7a0b48..504d58a51d35 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -97,10 +97,7 @@ static void filelayout_reset_write(struct nfs_pgio_header *hdr)
 			hdr->args.count,
 			(unsigned long long)hdr->args.offset);
 
-		task->tk_status = pnfs_write_done_resend_to_mds(hdr->inode,
-							&hdr->pages,
-							hdr->completion_ops,
-							hdr->dreq);
+		task->tk_status = pnfs_write_done_resend_to_mds(hdr);
 	}
 }
 
@@ -117,10 +114,7 @@ static void filelayout_reset_read(struct nfs_pgio_header *hdr)
 			hdr->args.count,
 			(unsigned long long)hdr->args.offset);
 
-		task->tk_status = pnfs_read_done_resend_to_mds(hdr->inode,
-							&hdr->pages,
-							hdr->completion_ops,
-							hdr->dreq);
+		task->tk_status = pnfs_read_done_resend_to_mds(hdr);
 	}
 }
 
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 580fc0c982e6..9c6c55359394 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -949,6 +949,38 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
 }
 EXPORT_SYMBOL_GPL(nfs_pageio_add_request);
 
+/*
+ * nfs_pageio_resend - Transfer requests to new descriptor and resend
+ * @hdr - the pgio header to move request from
+ * @desc - the pageio descriptor to add requests to
+ *
+ * Try to move each request (nfs_page) from @hdr to @desc then attempt
+ * to send them.
+ *
+ * Returns 0 on success and < 0 on error.
+ */
+int nfs_pageio_resend(struct nfs_pageio_descriptor *desc,
+		      struct nfs_pgio_header *hdr)
+{
+	LIST_HEAD(failed);
+
+	desc->pg_dreq = hdr->dreq;
+	while (!list_empty(&hdr->pages)) {
+		struct nfs_page *req = nfs_list_entry(hdr->pages.next);
+
+		nfs_list_remove_request(req);
+		if (!nfs_pageio_add_request(desc, req))
+			nfs_list_add_request(req, &failed);
+	}
+	nfs_pageio_complete(desc);
+	if (!list_empty(&failed)) {
+		list_move(&failed, &hdr->pages);
+		return -EIO;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nfs_pageio_resend);
+
 /**
  * nfs_pageio_complete - Complete I/O on an nfs_pageio_descriptor
  * @desc: pointer to io descriptor
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index ecbed4632d11..83ff8a05485a 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1470,35 +1470,14 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
 }
 EXPORT_SYMBOL_GPL(pnfs_generic_pg_test);
 
-int pnfs_write_done_resend_to_mds(struct inode *inode,
-				struct list_head *head,
-				const struct nfs_pgio_completion_ops *compl_ops,
-				struct nfs_direct_req *dreq)
+int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *hdr)
 {
 	struct nfs_pageio_descriptor pgio;
-	LIST_HEAD(failed);
 
 	/* Resend all requests through the MDS */
-	nfs_pageio_init_write(&pgio, inode, FLUSH_STABLE, true, compl_ops);
-	pgio.pg_dreq = dreq;
-	while (!list_empty(head)) {
-		struct nfs_page *req = nfs_list_entry(head->next);
-
-		nfs_list_remove_request(req);
-		if (!nfs_pageio_add_request(&pgio, req))
-			nfs_list_add_request(req, &failed);
-	}
-	nfs_pageio_complete(&pgio);
-
-	if (!list_empty(&failed)) {
-		/* For some reason our attempt to resend pages. Mark the
-		 * overall send request as having failed, and let
-		 * nfs_writeback_release_full deal with the error.
-		 */
-		list_move(&failed, head);
-		return -EIO;
-	}
-	return 0;
+	nfs_pageio_init_write(&pgio, hdr->inode, FLUSH_STABLE, true,
+			      hdr->completion_ops);
+	return nfs_pageio_resend(&pgio, hdr);
 }
 EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds);
 
@@ -1511,10 +1490,7 @@ static void pnfs_ld_handle_write_error(struct nfs_pgio_header *hdr)
 		pnfs_return_layout(hdr->inode);
 	}
 	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
-		hdr->task.tk_status = pnfs_write_done_resend_to_mds(hdr->inode,
-							&hdr->pages,
-							hdr->completion_ops,
-							hdr->dreq);
+		hdr->task.tk_status = pnfs_write_done_resend_to_mds(hdr);
 }
 
 /*
@@ -1612,31 +1588,13 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
 }
 EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages);
 
-int pnfs_read_done_resend_to_mds(struct inode *inode,
-				struct list_head *head,
-				const struct nfs_pgio_completion_ops *compl_ops,
-				struct nfs_direct_req *dreq)
+int pnfs_read_done_resend_to_mds(struct nfs_pgio_header *hdr)
 {
 	struct nfs_pageio_descriptor pgio;
-	LIST_HEAD(failed);
 
 	/* Resend all requests through the MDS */
-	nfs_pageio_init_read(&pgio, inode, true, compl_ops);
-	pgio.pg_dreq = dreq;
-	while (!list_empty(head)) {
-		struct nfs_page *req = nfs_list_entry(head->next);
-
-		nfs_list_remove_request(req);
-		if (!nfs_pageio_add_request(&pgio, req))
-			nfs_list_add_request(req, &failed);
-	}
-	nfs_pageio_complete(&pgio);
-
-	if (!list_empty(&failed)) {
-		list_move(&failed, head);
-		return -EIO;
-	}
-	return 0;
+	nfs_pageio_init_read(&pgio, hdr->inode, true, hdr->completion_ops);
+	return nfs_pageio_resend(&pgio, hdr);
 }
 EXPORT_SYMBOL_GPL(pnfs_read_done_resend_to_mds);
 
@@ -1648,10 +1606,7 @@ static void pnfs_ld_handle_read_error(struct nfs_pgio_header *hdr)
 		pnfs_return_layout(hdr->inode);
 	}
 	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
-		hdr->task.tk_status = pnfs_read_done_resend_to_mds(hdr->inode,
-							&hdr->pages,
-							hdr->completion_ops,
-							hdr->dreq);
+		hdr->task.tk_status = pnfs_read_done_resend_to_mds(hdr);
 }
 
 /*
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index a4a58be94064..27ddecd3847f 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -228,12 +228,8 @@ struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
 					       gfp_t gfp_flags);
 
 void nfs4_deviceid_mark_client_invalid(struct nfs_client *clp);
-int pnfs_read_done_resend_to_mds(struct inode *inode, struct list_head *head,
-			const struct nfs_pgio_completion_ops *compl_ops,
-			struct nfs_direct_req *dreq);
-int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head *head,
-			const struct nfs_pgio_completion_ops *compl_ops,
-			struct nfs_direct_req *dreq);
+int pnfs_read_done_resend_to_mds(struct nfs_pgio_header *);
+int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *);
 struct nfs4_threshold *pnfs_mdsthreshold_alloc(void);
 
 /* nfs4_deviceid_flags */
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index d0fae7b78252..4b48548e700e 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -112,6 +112,8 @@ extern	void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
 			     int how);
 extern	int nfs_pageio_add_request(struct nfs_pageio_descriptor *,
 				   struct nfs_page *);
+extern  int nfs_pageio_resend(struct nfs_pageio_descriptor *,
+			      struct nfs_pgio_header *);
 extern	void nfs_pageio_complete(struct nfs_pageio_descriptor *desc);
 extern	void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *, pgoff_t);
 extern size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc,
-- 
cgit v1.2.3


From d0d480cce8f522b37c2c1de38230fc9ad15fa506 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Wed, 25 Jun 2014 10:08:44 -0700
Subject: leds: add led-class attribute-group support

Allow led-class devices to be created with optional attribute groups.

This is needed in order to allow led drivers to create custom device
attributes in a race-free manner.

Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Bryan Wu <cooloney@gmail.com>
---
 drivers/leds/led-class.c | 5 +++--
 include/linux/leds.h     | 2 ++
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c
index f37d63cf726b..aa29198fca3e 100644
--- a/drivers/leds/led-class.c
+++ b/drivers/leds/led-class.c
@@ -210,8 +210,9 @@ static const struct dev_pm_ops leds_class_dev_pm_ops = {
  */
 int led_classdev_register(struct device *parent, struct led_classdev *led_cdev)
 {
-	led_cdev->dev = device_create(leds_class, parent, 0, led_cdev,
-				      "%s", led_cdev->name);
+	led_cdev->dev = device_create_with_groups(leds_class, parent, 0,
+					led_cdev, led_cdev->groups,
+					"%s", led_cdev->name);
 	if (IS_ERR(led_cdev->dev))
 		return PTR_ERR(led_cdev->dev);
 
diff --git a/include/linux/leds.h b/include/linux/leds.h
index 0287ab296689..e43686472197 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -63,6 +63,8 @@ struct led_classdev {
 				     unsigned long *delay_off);
 
 	struct device		*dev;
+	const struct attribute_group	**groups;
+
 	struct list_head	 node;			/* LED Device list */
 	const char		*default_trigger;	/* Trigger to use */
 
-- 
cgit v1.2.3


From 3482f2c52b77bf6596e24aae82e204a0603eba66 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@linaro.org>
Date: Thu, 27 Mar 2014 17:18:55 -0700
Subject: of: Create of_console_check() for selecting a console specified in
 /chosen

The devicetree has a binding for specifying the console device in the
/chosen node, but the kernel doesn't use it consistently. This change
adds an API for testing if a device node is a console, and adds a
preferred console entry if it is.

At the same time this patch removes the of_device_is_stdout_path() API
since it is unused.

Signed-off-by: Grant Likely <grant.likely@linaro.org>
Tested-by: Sascha Hauer <s.hauer@pengutronix.de>
---
 drivers/of/base.c  | 23 +++++++++++++----------
 include/linux/of.h |  6 +++---
 2 files changed, 16 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/of/base.c b/drivers/of/base.c
index b9864806e9b8..df9b2bb7bb27 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -17,6 +17,7 @@
  *      as published by the Free Software Foundation; either version
  *      2 of the License, or (at your option) any later version.
  */
+#include <linux/console.h>
 #include <linux/ctype.h>
 #include <linux/cpu.h>
 #include <linux/module.h>
@@ -2180,20 +2181,22 @@ const char *of_prop_next_string(struct property *prop, const char *cur)
 EXPORT_SYMBOL_GPL(of_prop_next_string);
 
 /**
- * of_device_is_stdout_path - check if a device node matches the
- *                            linux,stdout-path property
- *
- * Check if this device node matches the linux,stdout-path property
- * in the chosen node. return true if yes, false otherwise.
+ * of_console_check() - Test and setup console for DT setup
+ * @dn - Pointer to device node
+ * @name - Name to use for preferred console without index. ex. "ttyS"
+ * @index - Index to use for preferred console.
+ *
+ * Check if the given device node matches the stdout-path property in the
+ * /chosen node. If it does then register it as the preferred console and return
+ * TRUE. Otherwise return FALSE.
  */
-int of_device_is_stdout_path(struct device_node *dn)
+bool of_console_check(struct device_node *dn, char *name, int index)
 {
-	if (!of_stdout)
+	if (!dn || dn != of_stdout || console_set_on_cmdline)
 		return false;
-
-	return of_stdout == dn;
+	return add_preferred_console(name, index, NULL);
 }
-EXPORT_SYMBOL_GPL(of_device_is_stdout_path);
+EXPORT_SYMBOL_GPL(of_console_check);
 
 /**
  *	of_find_next_cache_node - Find a node's subsidiary cache
diff --git a/include/linux/of.h b/include/linux/of.h
index 196b34c1ef4e..9d9734056e39 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -352,7 +352,7 @@ const __be32 *of_prop_next_u32(struct property *prop, const __be32 *cur,
  */
 const char *of_prop_next_string(struct property *prop, const char *cur);
 
-int of_device_is_stdout_path(struct device_node *dn);
+bool of_console_check(struct device_node *dn, char *name, int index);
 
 #else /* CONFIG_OF */
 
@@ -564,9 +564,9 @@ static inline int of_machine_is_compatible(const char *compat)
 	return 0;
 }
 
-static inline int of_device_is_stdout_path(struct device_node *dn)
+static inline bool of_console_check(const struct device_node *dn, const char *name, int index)
 {
-	return 0;
+	return false;
 }
 
 static inline const __be32 *of_prop_next_u32(struct property *prop,
-- 
cgit v1.2.3


From a752ee56ad84bf9a35b8323af1ad22b03c1df2c4 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@linaro.org>
Date: Fri, 28 Mar 2014 08:12:18 -0700
Subject: tty: Update hypervisor tty drivers to use core stdout parsing code.

The evh_bytechan, hvc_opal and hvc_vio drivers all open code the parsing
of the stdout node in the device tree. This patch simplifies the driver
by removing the duplicated functionality.

Signed-off-by: Grant Likely <grant.likely@linaro.org>
---
 drivers/of/base.c          |  5 ++++-
 drivers/tty/ehv_bytechan.c | 43 ++++---------------------------------------
 drivers/tty/hvc/hvc_opal.c | 15 +++------------
 drivers/tty/hvc/hvc_vio.c  | 29 ++++++++++-------------------
 include/linux/of.h         |  1 +
 5 files changed, 22 insertions(+), 71 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/of/base.c b/drivers/of/base.c
index df9b2bb7bb27..e4f95ba0a3eb 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -36,7 +36,7 @@ struct device_node *of_allnodes;
 EXPORT_SYMBOL(of_allnodes);
 struct device_node *of_chosen;
 struct device_node *of_aliases;
-static struct device_node *of_stdout;
+struct device_node *of_stdout;
 
 static struct kset *of_kset;
 
@@ -2063,9 +2063,12 @@ void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align))
 		of_chosen = of_find_node_by_path("/chosen@0");
 
 	if (of_chosen) {
+		/* linux,stdout-path and /aliases/stdout are for legacy compatibility */
 		const char *name = of_get_property(of_chosen, "stdout-path", NULL);
 		if (!name)
 			name = of_get_property(of_chosen, "linux,stdout-path", NULL);
+		if (IS_ENABLED(CONFIG_PPC) && !name)
+			name = of_get_property(of_aliases, "stdout", NULL);
 		if (name)
 			of_stdout = of_find_node_by_path(name);
 	}
diff --git a/drivers/tty/ehv_bytechan.c b/drivers/tty/ehv_bytechan.c
index 0419b69e270f..4f485e88f60c 100644
--- a/drivers/tty/ehv_bytechan.c
+++ b/drivers/tty/ehv_bytechan.c
@@ -108,55 +108,23 @@ static void disable_tx_interrupt(struct ehv_bc_data *bc)
  *
  * The byte channel to be used for the console is specified via a "stdout"
  * property in the /chosen node.
- *
- * For compatible with legacy device trees, we also look for a "stdout" alias.
  */
 static int find_console_handle(void)
 {
-	struct device_node *np, *np2;
+	struct device_node *np = of_stdout;
 	const char *sprop = NULL;
 	const uint32_t *iprop;
 
-	np = of_find_node_by_path("/chosen");
-	if (np)
-		sprop = of_get_property(np, "stdout-path", NULL);
-
-	if (!np || !sprop) {
-		of_node_put(np);
-		np = of_find_node_by_name(NULL, "aliases");
-		if (np)
-			sprop = of_get_property(np, "stdout", NULL);
-	}
-
-	if (!sprop) {
-		of_node_put(np);
-		return 0;
-	}
-
 	/* We don't care what the aliased node is actually called.  We only
 	 * care if it's compatible with "epapr,hv-byte-channel", because that
-	 * indicates that it's a byte channel node.  We use a temporary
-	 * variable, 'np2', because we can't release 'np' until we're done with
-	 * 'sprop'.
+	 * indicates that it's a byte channel node.
 	 */
-	np2 = of_find_node_by_path(sprop);
-	of_node_put(np);
-	np = np2;
-	if (!np) {
-		pr_warning("ehv-bc: stdout node '%s' does not exist\n", sprop);
-		return 0;
-	}
-
-	/* Is it a byte channel? */
-	if (!of_device_is_compatible(np, "epapr,hv-byte-channel")) {
-		of_node_put(np);
+	if (!np || !of_device_is_compatible(np, "epapr,hv-byte-channel"))
 		return 0;
-	}
 
 	stdout_irq = irq_of_parse_and_map(np, 0);
 	if (stdout_irq == NO_IRQ) {
-		pr_err("ehv-bc: no 'interrupts' property in %s node\n", sprop);
-		of_node_put(np);
+		pr_err("ehv-bc: no 'interrupts' property in %s node\n", np->full_name);
 		return 0;
 	}
 
@@ -167,12 +135,9 @@ static int find_console_handle(void)
 	if (!iprop) {
 		pr_err("ehv-bc: no 'hv-handle' property in %s node\n",
 		       np->name);
-		of_node_put(np);
 		return 0;
 	}
 	stdout_bc = be32_to_cpu(*iprop);
-
-	of_node_put(np);
 	return 1;
 }
 
diff --git a/drivers/tty/hvc/hvc_opal.c b/drivers/tty/hvc/hvc_opal.c
index a585079b4b38..a2cc5f834c63 100644
--- a/drivers/tty/hvc/hvc_opal.c
+++ b/drivers/tty/hvc/hvc_opal.c
@@ -342,22 +342,13 @@ static void udbg_init_opal_common(void)
 
 void __init hvc_opal_init_early(void)
 {
-	struct device_node *stdout_node = NULL;
+	struct device_node *stdout_node = of_node_get(of_stdout);
 	const __be32 *termno;
-	const char *name = NULL;
 	const struct hv_ops *ops;
 	u32 index;
 
-	/* find the boot console from /chosen/stdout */
-	if (of_chosen)
-		name = of_get_property(of_chosen, "linux,stdout-path", NULL);
-	if (name) {
-		stdout_node = of_find_node_by_path(name);
-		if (!stdout_node) {
-			pr_err("hvc_opal: Failed to locate default console!\n");
-			return;
-		}
-	} else {
+	/* If the console wasn't in /chosen, try /ibm,opal */
+	if (!stdout_node) {
 		struct device_node *opal, *np;
 
 		/* Current OPAL takeover doesn't provide the stdout
diff --git a/drivers/tty/hvc/hvc_vio.c b/drivers/tty/hvc/hvc_vio.c
index b594abfbf21e..5618b5fc7500 100644
--- a/drivers/tty/hvc/hvc_vio.c
+++ b/drivers/tty/hvc/hvc_vio.c
@@ -404,42 +404,35 @@ module_exit(hvc_vio_exit);
 
 void __init hvc_vio_init_early(void)
 {
-	struct device_node *stdout_node;
 	const __be32 *termno;
 	const char *name;
 	const struct hv_ops *ops;
 
 	/* find the boot console from /chosen/stdout */
-	if (!of_chosen)
+	if (!of_stdout)
 		return;
-	name = of_get_property(of_chosen, "linux,stdout-path", NULL);
-	if (name == NULL)
-		return;
-	stdout_node = of_find_node_by_path(name);
-	if (!stdout_node)
-		return;
-	name = of_get_property(stdout_node, "name", NULL);
+	name = of_get_property(of_stdout, "name", NULL);
 	if (!name) {
 		printk(KERN_WARNING "stdout node missing 'name' property!\n");
-		goto out;
+		return;
 	}
 
 	/* Check if it's a virtual terminal */
 	if (strncmp(name, "vty", 3) != 0)
-		goto out;
-	termno = of_get_property(stdout_node, "reg", NULL);
+		return;
+	termno = of_get_property(of_stdout, "reg", NULL);
 	if (termno == NULL)
-		goto out;
+		return;
 	hvterm_priv0.termno = of_read_number(termno, 1);
 	spin_lock_init(&hvterm_priv0.buf_lock);
 	hvterm_privs[0] = &hvterm_priv0;
 
 	/* Check the protocol */
-	if (of_device_is_compatible(stdout_node, "hvterm1")) {
+	if (of_device_is_compatible(of_stdout, "hvterm1")) {
 		hvterm_priv0.proto = HV_PROTOCOL_RAW;
 		ops = &hvterm_raw_ops;
 	}
-	else if (of_device_is_compatible(stdout_node, "hvterm-protocol")) {
+	else if (of_device_is_compatible(of_stdout, "hvterm-protocol")) {
 		hvterm_priv0.proto = HV_PROTOCOL_HVSI;
 		ops = &hvterm_hvsi_ops;
 		hvsilib_init(&hvterm_priv0.hvsi, hvc_get_chars, hvc_put_chars,
@@ -447,7 +440,7 @@ void __init hvc_vio_init_early(void)
 		/* HVSI, perform the handshake now */
 		hvsilib_establish(&hvterm_priv0.hvsi);
 	} else
-		goto out;
+		return;
 	udbg_putc = udbg_hvc_putc;
 	udbg_getc = udbg_hvc_getc;
 	udbg_getc_poll = udbg_hvc_getc_poll;
@@ -456,14 +449,12 @@ void __init hvc_vio_init_early(void)
 	 * backend for HVSI, only do udbg
 	 */
 	if (hvterm_priv0.proto == HV_PROTOCOL_HVSI)
-		goto out;
+		return;
 #endif
 	/* Check whether the user has requested a different console. */
 	if (!strstr(cmd_line, "console="))
 		add_preferred_console("hvc", 0, NULL);
 	hvc_instantiate(0, 0, ops);
-out:
-	of_node_put(stdout_node);
 }
 
 /* call this from early_init() for a working debug console on
diff --git a/include/linux/of.h b/include/linux/of.h
index 9d9734056e39..f0d256273c83 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -113,6 +113,7 @@ static inline void of_node_put(struct device_node *node) { }
 extern struct device_node *of_allnodes;
 extern struct device_node *of_chosen;
 extern struct device_node *of_aliases;
+extern struct device_node *of_stdout;
 extern raw_spinlock_t devtree_lock;
 
 static inline bool of_have_populated_dt(void)
-- 
cgit v1.2.3


From 7b8278358cc2b453ca6e75eedb3741cdb7e97236 Mon Sep 17 00:00:00 2001
From: Aristeu Rozanski <aris@redhat.com>
Date: Wed, 18 Jun 2014 11:05:01 -0300
Subject: edac: add DDR4 and RDDR4

Haswell memory controller can make use of DDR4 and Registered DDR4

Cc: tony.luck@intel.com
Signed-off-by: Aristeu Rozanski <aris@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
 drivers/edac/edac_mc_sysfs.c | 4 +++-
 include/linux/edac.h         | 5 +++++
 2 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
index 01fae8289cf0..a6cd36100663 100644
--- a/drivers/edac/edac_mc_sysfs.c
+++ b/drivers/edac/edac_mc_sysfs.c
@@ -108,7 +108,9 @@ static const char * const mem_types[] = {
 	[MEM_RDDR2] = "Registered-DDR2",
 	[MEM_XDR] = "XDR",
 	[MEM_DDR3] = "Unbuffered-DDR3",
-	[MEM_RDDR3] = "Registered-DDR3"
+	[MEM_RDDR3] = "Registered-DDR3",
+	[MEM_DDR4] = "Unbuffered-DDR4",
+	[MEM_RDDR4] = "Registered-DDR4"
 };
 
 static const char * const dev_types[] = {
diff --git a/include/linux/edac.h b/include/linux/edac.h
index 8e6c20af11a2..e1e68da6f35c 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -194,6 +194,9 @@ static inline char *mc_event_error_type(const unsigned int err_type)
  * @MEM_DDR3:		DDR3 RAM
  * @MEM_RDDR3:		Registered DDR3 RAM
  *			This is a variant of the DDR3 memories.
+ * @MEM_DDR4:		DDR4 RAM
+ * @MEM_RDDR4:		Registered DDR4 RAM
+ *			This is a variant of the DDR4 memories.
  */
 enum mem_type {
 	MEM_EMPTY = 0,
@@ -213,6 +216,8 @@ enum mem_type {
 	MEM_XDR,
 	MEM_DDR3,
 	MEM_RDDR3,
+	MEM_DDR4,
+	MEM_RDDR4,
 };
 
 #define MEM_FLAG_EMPTY		BIT(MEM_EMPTY)
-- 
cgit v1.2.3


From 11c32d7b6274cb0f554943d65bd4a126c4a86dcd Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Thu, 22 May 2014 23:25:14 +0200
Subject: video: move Versatile CLCD helpers

This moves the Versatile-specific helper code and panel database
down into the drivers/video folder next to the CLCD driver
itself, preserving the config symbol but also moving the header
to platform data.

This is necessary to rid the Integrator of this final <plat/*>
inclusion dependency and get us one less user of the
plat-versatile folder.

Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Jean-Christophe Plagniol-Villard <plagnioj@jcrosoft.com>
Cc: linux-fbdev@vger.kernel.org
Cc: Russell King <linux@arm.linux.org.uk>
Acked-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 arch/arm/mach-integrator/Kconfig                   |   1 -
 arch/arm/mach-integrator/integrator_cp.c           |   3 +-
 arch/arm/mach-realview/core.c                      |   2 +-
 arch/arm/mach-versatile/core.c                     |   2 +-
 arch/arm/mach-vexpress/ct-ca9x4.c                  |   3 +-
 arch/arm/plat-versatile/Kconfig                    |   3 -
 arch/arm/plat-versatile/Makefile                   |   1 -
 arch/arm/plat-versatile/clcd.c                     | 182 ---------------------
 arch/arm/plat-versatile/include/plat/clcd.h        |   9 -
 drivers/video/fbdev/Kconfig                        |   7 +
 drivers/video/fbdev/Makefile                       |   1 +
 drivers/video/fbdev/amba-clcd-versatile.c          | 182 +++++++++++++++++++++
 include/linux/platform_data/video-clcd-versatile.h |   9 +
 13 files changed, 203 insertions(+), 202 deletions(-)
 delete mode 100644 arch/arm/plat-versatile/clcd.c
 delete mode 100644 arch/arm/plat-versatile/include/plat/clcd.h
 create mode 100644 drivers/video/fbdev/amba-clcd-versatile.c
 create mode 100644 include/linux/platform_data/video-clcd-versatile.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-integrator/Kconfig b/arch/arm/mach-integrator/Kconfig
index 64f8e2564a37..c455e974bbfe 100644
--- a/arch/arm/mach-integrator/Kconfig
+++ b/arch/arm/mach-integrator/Kconfig
@@ -17,7 +17,6 @@ config ARCH_INTEGRATOR_CP
 	bool "Support Integrator/CP platform"
 	select ARCH_CINTEGRATOR
 	select ARM_TIMER_SP804
-	select PLAT_VERSATILE_CLCD
 	select SERIAL_AMBA_PL011 if TTY
 	select SERIAL_AMBA_PL011_CONSOLE if TTY
 	select SOC_BUS
diff --git a/arch/arm/mach-integrator/integrator_cp.c b/arch/arm/mach-integrator/integrator_cp.c
index a938242b0c95..0228165d2d64 100644
--- a/arch/arm/mach-integrator/integrator_cp.c
+++ b/arch/arm/mach-integrator/integrator_cp.c
@@ -18,6 +18,7 @@
 #include <linux/amba/bus.h>
 #include <linux/amba/kmi.h>
 #include <linux/amba/clcd.h>
+#include <linux/platform_data/video-clcd-versatile.h>
 #include <linux/amba/mmci.h>
 #include <linux/io.h>
 #include <linux/irqchip/versatile-fpga.h>
@@ -36,8 +37,6 @@
 #include <asm/mach/map.h>
 #include <asm/mach/time.h>
 
-#include <plat/clcd.h>
-
 #include "hardware.h"
 #include "cm.h"
 #include "common.h"
diff --git a/arch/arm/mach-realview/core.c b/arch/arm/mach-realview/core.c
index 8c1b39a0caa0..850e506926df 100644
--- a/arch/arm/mach-realview/core.c
+++ b/arch/arm/mach-realview/core.c
@@ -25,6 +25,7 @@
 #include <linux/interrupt.h>
 #include <linux/amba/bus.h>
 #include <linux/amba/clcd.h>
+#include <linux/platform_data/video-clcd-versatile.h>
 #include <linux/io.h>
 #include <linux/smsc911x.h>
 #include <linux/ata_platform.h>
@@ -48,7 +49,6 @@
 #include <mach/irqs.h>
 #include <asm/hardware/timer-sp.h>
 
-#include <plat/clcd.h>
 #include <plat/sched_clock.h>
 
 #include "core.h"
diff --git a/arch/arm/mach-versatile/core.c b/arch/arm/mach-versatile/core.c
index be83ba25f81b..08fb8c89f414 100644
--- a/arch/arm/mach-versatile/core.c
+++ b/arch/arm/mach-versatile/core.c
@@ -28,6 +28,7 @@
 #include <linux/of_platform.h>
 #include <linux/amba/bus.h>
 #include <linux/amba/clcd.h>
+#include <linux/platform_data/video-clcd-versatile.h>
 #include <linux/amba/pl061.h>
 #include <linux/amba/mmci.h>
 #include <linux/amba/pl022.h>
@@ -53,7 +54,6 @@
 #include <mach/platform.h>
 #include <asm/hardware/timer-sp.h>
 
-#include <plat/clcd.h>
 #include <plat/sched_clock.h>
 
 #include "core.h"
diff --git a/arch/arm/mach-vexpress/ct-ca9x4.c b/arch/arm/mach-vexpress/ct-ca9x4.c
index 86150d7a2e7d..27bea049380a 100644
--- a/arch/arm/mach-vexpress/ct-ca9x4.c
+++ b/arch/arm/mach-vexpress/ct-ca9x4.c
@@ -8,6 +8,7 @@
 #include <linux/platform_device.h>
 #include <linux/amba/bus.h>
 #include <linux/amba/clcd.h>
+#include <linux/platform_data/video-clcd-versatile.h>
 #include <linux/clkdev.h>
 #include <linux/vexpress.h>
 #include <linux/irqchip/arm-gic.h>
@@ -29,8 +30,6 @@
 #include <mach/motherboard.h>
 #include <mach/irqs.h>
 
-#include <plat/clcd.h>
-
 static struct map_desc ct_ca9x4_io_desc[] __initdata = {
 	{
 		.virtual        = V2T_PERIPH,
diff --git a/arch/arm/plat-versatile/Kconfig b/arch/arm/plat-versatile/Kconfig
index fce41e93b6a4..a301ca2c7d00 100644
--- a/arch/arm/plat-versatile/Kconfig
+++ b/arch/arm/plat-versatile/Kconfig
@@ -3,9 +3,6 @@ if PLAT_VERSATILE
 config PLAT_VERSATILE_CLOCK
 	bool
 
-config PLAT_VERSATILE_CLCD
-	bool
-
 config PLAT_VERSATILE_SCHED_CLOCK
 	def_bool y
 
diff --git a/arch/arm/plat-versatile/Makefile b/arch/arm/plat-versatile/Makefile
index 2e0c472958ae..03c4900ac3f4 100644
--- a/arch/arm/plat-versatile/Makefile
+++ b/arch/arm/plat-versatile/Makefile
@@ -1,6 +1,5 @@
 ccflags-$(CONFIG_ARCH_MULTIPLATFORM) := -I$(srctree)/$(src)/include
 
 obj-$(CONFIG_PLAT_VERSATILE_CLOCK) += clock.o
-obj-$(CONFIG_PLAT_VERSATILE_CLCD) += clcd.o
 obj-$(CONFIG_PLAT_VERSATILE_SCHED_CLOCK) += sched-clock.o
 obj-$(CONFIG_SMP) += headsmp.o platsmp.o
diff --git a/arch/arm/plat-versatile/clcd.c b/arch/arm/plat-versatile/clcd.c
deleted file mode 100644
index 6628cc27efc5..000000000000
--- a/arch/arm/plat-versatile/clcd.c
+++ /dev/null
@@ -1,182 +0,0 @@
-#include <linux/device.h>
-#include <linux/dma-mapping.h>
-#include <linux/amba/bus.h>
-#include <linux/amba/clcd.h>
-#include <plat/clcd.h>
-
-static struct clcd_panel vga = {
-	.mode		= {
-		.name		= "VGA",
-		.refresh	= 60,
-		.xres		= 640,
-		.yres		= 480,
-		.pixclock	= 39721,
-		.left_margin	= 40,
-		.right_margin	= 24,
-		.upper_margin	= 32,
-		.lower_margin	= 11,
-		.hsync_len	= 96,
-		.vsync_len	= 2,
-		.sync		= 0,
-		.vmode		= FB_VMODE_NONINTERLACED,
-	},
-	.width		= -1,
-	.height		= -1,
-	.tim2		= TIM2_BCD | TIM2_IPC,
-	.cntl		= CNTL_LCDTFT | CNTL_BGR | CNTL_LCDVCOMP(1),
-	.caps		= CLCD_CAP_5551 | CLCD_CAP_565 | CLCD_CAP_888,
-	.bpp		= 16,
-};
-
-static struct clcd_panel xvga = {
-	.mode		= {
-		.name		= "XVGA",
-		.refresh	= 60,
-		.xres		= 1024,
-		.yres		= 768,
-		.pixclock	= 15748,
-		.left_margin	= 152,
-		.right_margin	= 48,
-		.upper_margin	= 23,
-		.lower_margin	= 3,
-		.hsync_len	= 104,
-		.vsync_len	= 4,
-		.sync		= 0,
-		.vmode		= FB_VMODE_NONINTERLACED,
-	},
-	.width		= -1,
-	.height		= -1,
-	.tim2		= TIM2_BCD | TIM2_IPC,
-	.cntl		= CNTL_LCDTFT | CNTL_BGR | CNTL_LCDVCOMP(1),
-	.caps		= CLCD_CAP_5551 | CLCD_CAP_565 | CLCD_CAP_888,
-	.bpp		= 16,
-};
-
-/* Sanyo TM38QV67A02A - 3.8 inch QVGA (320x240) Color TFT */
-static struct clcd_panel sanyo_tm38qv67a02a = {
-	.mode		= {
-		.name		= "Sanyo TM38QV67A02A",
-		.refresh	= 116,
-		.xres		= 320,
-		.yres		= 240,
-		.pixclock	= 100000,
-		.left_margin	= 6,
-		.right_margin	= 6,
-		.upper_margin	= 5,
-		.lower_margin	= 5,
-		.hsync_len	= 6,
-		.vsync_len	= 6,
-		.sync		= 0,
-		.vmode		= FB_VMODE_NONINTERLACED,
-	},
-	.width		= -1,
-	.height		= -1,
-	.tim2		= TIM2_BCD,
-	.cntl		= CNTL_LCDTFT | CNTL_BGR | CNTL_LCDVCOMP(1),
-	.caps		= CLCD_CAP_5551,
-	.bpp		= 16,
-};
-
-static struct clcd_panel sanyo_2_5_in = {
-	.mode		= {
-		.name		= "Sanyo QVGA Portrait",
-		.refresh	= 116,
-		.xres		= 240,
-		.yres		= 320,
-		.pixclock	= 100000,
-		.left_margin	= 20,
-		.right_margin	= 10,
-		.upper_margin	= 2,
-		.lower_margin	= 2,
-		.hsync_len	= 10,
-		.vsync_len	= 2,
-		.sync		= FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT,
-		.vmode		= FB_VMODE_NONINTERLACED,
-	},
-	.width		= -1,
-	.height		= -1,
-	.tim2		= TIM2_IVS | TIM2_IHS | TIM2_IPC,
-	.cntl		= CNTL_LCDTFT | CNTL_BGR | CNTL_LCDVCOMP(1),
-	.caps		= CLCD_CAP_5551,
-	.bpp		= 16,
-};
-
-/* Epson L2F50113T00 - 2.2 inch 176x220 Color TFT */
-static struct clcd_panel epson_l2f50113t00 = {
-	.mode		= {
-		.name		= "Epson L2F50113T00",
-		.refresh	= 390,
-		.xres		= 176,
-		.yres		= 220,
-		.pixclock	= 62500,
-		.left_margin	= 3,
-		.right_margin	= 2,
-		.upper_margin	= 1,
-		.lower_margin	= 0,
-		.hsync_len	= 3,
-		.vsync_len	= 2,
-		.sync		= 0,
-		.vmode		= FB_VMODE_NONINTERLACED,
-	},
-	.width		= -1,
-	.height		= -1,
-	.tim2		= TIM2_BCD | TIM2_IPC,
-	.cntl		= CNTL_LCDTFT | CNTL_BGR | CNTL_LCDVCOMP(1),
-	.caps		= CLCD_CAP_5551,
-	.bpp		= 16,
-};
-
-static struct clcd_panel *panels[] = {
-	&vga,
-	&xvga,
-	&sanyo_tm38qv67a02a,
-	&sanyo_2_5_in,
-	&epson_l2f50113t00,
-};
-
-struct clcd_panel *versatile_clcd_get_panel(const char *name)
-{
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(panels); i++)
-		if (strcmp(panels[i]->mode.name, name) == 0)
-			break;
-
-	if (i < ARRAY_SIZE(panels))
-		return panels[i];
-
-	pr_err("CLCD: couldn't get parameters for panel %s\n", name);
-
-	return NULL;
-}
-
-int versatile_clcd_setup_dma(struct clcd_fb *fb, unsigned long framesize)
-{
-	dma_addr_t dma;
-
-	fb->fb.screen_base = dma_alloc_writecombine(&fb->dev->dev, framesize,
-						    &dma, GFP_KERNEL);
-	if (!fb->fb.screen_base) {
-		pr_err("CLCD: unable to map framebuffer\n");
-		return -ENOMEM;
-	}
-
-	fb->fb.fix.smem_start	= dma;
-	fb->fb.fix.smem_len	= framesize;
-
-	return 0;
-}
-
-int versatile_clcd_mmap_dma(struct clcd_fb *fb, struct vm_area_struct *vma)
-{
-	return dma_mmap_writecombine(&fb->dev->dev, vma,
-				     fb->fb.screen_base,
-				     fb->fb.fix.smem_start,
-				     fb->fb.fix.smem_len);
-}
-
-void versatile_clcd_remove_dma(struct clcd_fb *fb)
-{
-	dma_free_writecombine(&fb->dev->dev, fb->fb.fix.smem_len,
-			      fb->fb.screen_base, fb->fb.fix.smem_start);
-}
diff --git a/arch/arm/plat-versatile/include/plat/clcd.h b/arch/arm/plat-versatile/include/plat/clcd.h
deleted file mode 100644
index 6bb6a1d2019b..000000000000
--- a/arch/arm/plat-versatile/include/plat/clcd.h
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef PLAT_CLCD_H
-#define PLAT_CLCD_H
-
-struct clcd_panel *versatile_clcd_get_panel(const char *);
-int versatile_clcd_setup_dma(struct clcd_fb *, unsigned long);
-int versatile_clcd_mmap_dma(struct clcd_fb *, struct vm_area_struct *);
-void versatile_clcd_remove_dma(struct clcd_fb *);
-
-#endif
diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig
index 59c98bfd5a8a..5edc7a054e03 100644
--- a/drivers/video/fbdev/Kconfig
+++ b/drivers/video/fbdev/Kconfig
@@ -290,6 +290,13 @@ config FB_ARMCLCD
 	  here and read <file:Documentation/kbuild/modules.txt>.  The module
 	  will be called amba-clcd.
 
+# Helper logic selected only by the ARM Versatile platform family.
+config PLAT_VERSATILE_CLCD
+	depends on FB_ARMCLCD
+	depends on (PLAT_VERSATILE || ARCH_INTEGRATOR)
+	default y
+	bool
+
 config FB_ACORN
 	bool "Acorn VIDC support"
 	depends on (FB = y) && ARM && ARCH_ACORN
diff --git a/drivers/video/fbdev/Makefile b/drivers/video/fbdev/Makefile
index 0284f2a12538..0b2090d2e52e 100644
--- a/drivers/video/fbdev/Makefile
+++ b/drivers/video/fbdev/Makefile
@@ -78,6 +78,7 @@ obj-$(CONFIG_FB_ATMEL)		  += atmel_lcdfb.o
 obj-$(CONFIG_FB_PVR2)             += pvr2fb.o
 obj-$(CONFIG_FB_VOODOO1)          += sstfb.o
 obj-$(CONFIG_FB_ARMCLCD)	  += amba-clcd.o
+obj-$(CONFIG_PLAT_VERSATILE_CLCD) += amba-clcd-versatile.o
 obj-$(CONFIG_FB_GOLDFISH)         += goldfishfb.o
 obj-$(CONFIG_FB_68328)            += 68328fb.o
 obj-$(CONFIG_FB_GBE)              += gbefb.o
diff --git a/drivers/video/fbdev/amba-clcd-versatile.c b/drivers/video/fbdev/amba-clcd-versatile.c
new file mode 100644
index 000000000000..7a8afcd4573e
--- /dev/null
+++ b/drivers/video/fbdev/amba-clcd-versatile.c
@@ -0,0 +1,182 @@
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/platform_data/video-clcd-versatile.h>
+
+static struct clcd_panel vga = {
+	.mode		= {
+		.name		= "VGA",
+		.refresh	= 60,
+		.xres		= 640,
+		.yres		= 480,
+		.pixclock	= 39721,
+		.left_margin	= 40,
+		.right_margin	= 24,
+		.upper_margin	= 32,
+		.lower_margin	= 11,
+		.hsync_len	= 96,
+		.vsync_len	= 2,
+		.sync		= 0,
+		.vmode		= FB_VMODE_NONINTERLACED,
+	},
+	.width		= -1,
+	.height		= -1,
+	.tim2		= TIM2_BCD | TIM2_IPC,
+	.cntl		= CNTL_LCDTFT | CNTL_BGR | CNTL_LCDVCOMP(1),
+	.caps		= CLCD_CAP_5551 | CLCD_CAP_565 | CLCD_CAP_888,
+	.bpp		= 16,
+};
+
+static struct clcd_panel xvga = {
+	.mode		= {
+		.name		= "XVGA",
+		.refresh	= 60,
+		.xres		= 1024,
+		.yres		= 768,
+		.pixclock	= 15748,
+		.left_margin	= 152,
+		.right_margin	= 48,
+		.upper_margin	= 23,
+		.lower_margin	= 3,
+		.hsync_len	= 104,
+		.vsync_len	= 4,
+		.sync		= 0,
+		.vmode		= FB_VMODE_NONINTERLACED,
+	},
+	.width		= -1,
+	.height		= -1,
+	.tim2		= TIM2_BCD | TIM2_IPC,
+	.cntl		= CNTL_LCDTFT | CNTL_BGR | CNTL_LCDVCOMP(1),
+	.caps		= CLCD_CAP_5551 | CLCD_CAP_565 | CLCD_CAP_888,
+	.bpp		= 16,
+};
+
+/* Sanyo TM38QV67A02A - 3.8 inch QVGA (320x240) Color TFT */
+static struct clcd_panel sanyo_tm38qv67a02a = {
+	.mode		= {
+		.name		= "Sanyo TM38QV67A02A",
+		.refresh	= 116,
+		.xres		= 320,
+		.yres		= 240,
+		.pixclock	= 100000,
+		.left_margin	= 6,
+		.right_margin	= 6,
+		.upper_margin	= 5,
+		.lower_margin	= 5,
+		.hsync_len	= 6,
+		.vsync_len	= 6,
+		.sync		= 0,
+		.vmode		= FB_VMODE_NONINTERLACED,
+	},
+	.width		= -1,
+	.height		= -1,
+	.tim2		= TIM2_BCD,
+	.cntl		= CNTL_LCDTFT | CNTL_BGR | CNTL_LCDVCOMP(1),
+	.caps		= CLCD_CAP_5551,
+	.bpp		= 16,
+};
+
+static struct clcd_panel sanyo_2_5_in = {
+	.mode		= {
+		.name		= "Sanyo QVGA Portrait",
+		.refresh	= 116,
+		.xres		= 240,
+		.yres		= 320,
+		.pixclock	= 100000,
+		.left_margin	= 20,
+		.right_margin	= 10,
+		.upper_margin	= 2,
+		.lower_margin	= 2,
+		.hsync_len	= 10,
+		.vsync_len	= 2,
+		.sync		= FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT,
+		.vmode		= FB_VMODE_NONINTERLACED,
+	},
+	.width		= -1,
+	.height		= -1,
+	.tim2		= TIM2_IVS | TIM2_IHS | TIM2_IPC,
+	.cntl		= CNTL_LCDTFT | CNTL_BGR | CNTL_LCDVCOMP(1),
+	.caps		= CLCD_CAP_5551,
+	.bpp		= 16,
+};
+
+/* Epson L2F50113T00 - 2.2 inch 176x220 Color TFT */
+static struct clcd_panel epson_l2f50113t00 = {
+	.mode		= {
+		.name		= "Epson L2F50113T00",
+		.refresh	= 390,
+		.xres		= 176,
+		.yres		= 220,
+		.pixclock	= 62500,
+		.left_margin	= 3,
+		.right_margin	= 2,
+		.upper_margin	= 1,
+		.lower_margin	= 0,
+		.hsync_len	= 3,
+		.vsync_len	= 2,
+		.sync		= 0,
+		.vmode		= FB_VMODE_NONINTERLACED,
+	},
+	.width		= -1,
+	.height		= -1,
+	.tim2		= TIM2_BCD | TIM2_IPC,
+	.cntl		= CNTL_LCDTFT | CNTL_BGR | CNTL_LCDVCOMP(1),
+	.caps		= CLCD_CAP_5551,
+	.bpp		= 16,
+};
+
+static struct clcd_panel *panels[] = {
+	&vga,
+	&xvga,
+	&sanyo_tm38qv67a02a,
+	&sanyo_2_5_in,
+	&epson_l2f50113t00,
+};
+
+struct clcd_panel *versatile_clcd_get_panel(const char *name)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(panels); i++)
+		if (strcmp(panels[i]->mode.name, name) == 0)
+			break;
+
+	if (i < ARRAY_SIZE(panels))
+		return panels[i];
+
+	pr_err("CLCD: couldn't get parameters for panel %s\n", name);
+
+	return NULL;
+}
+
+int versatile_clcd_setup_dma(struct clcd_fb *fb, unsigned long framesize)
+{
+	dma_addr_t dma;
+
+	fb->fb.screen_base = dma_alloc_writecombine(&fb->dev->dev, framesize,
+						    &dma, GFP_KERNEL);
+	if (!fb->fb.screen_base) {
+		pr_err("CLCD: unable to map framebuffer\n");
+		return -ENOMEM;
+	}
+
+	fb->fb.fix.smem_start	= dma;
+	fb->fb.fix.smem_len	= framesize;
+
+	return 0;
+}
+
+int versatile_clcd_mmap_dma(struct clcd_fb *fb, struct vm_area_struct *vma)
+{
+	return dma_mmap_writecombine(&fb->dev->dev, vma,
+				     fb->fb.screen_base,
+				     fb->fb.fix.smem_start,
+				     fb->fb.fix.smem_len);
+}
+
+void versatile_clcd_remove_dma(struct clcd_fb *fb)
+{
+	dma_free_writecombine(&fb->dev->dev, fb->fb.fix.smem_len,
+			      fb->fb.screen_base, fb->fb.fix.smem_start);
+}
diff --git a/include/linux/platform_data/video-clcd-versatile.h b/include/linux/platform_data/video-clcd-versatile.h
new file mode 100644
index 000000000000..6bb6a1d2019b
--- /dev/null
+++ b/include/linux/platform_data/video-clcd-versatile.h
@@ -0,0 +1,9 @@
+#ifndef PLAT_CLCD_H
+#define PLAT_CLCD_H
+
+struct clcd_panel *versatile_clcd_get_panel(const char *);
+int versatile_clcd_setup_dma(struct clcd_fb *, unsigned long);
+int versatile_clcd_mmap_dma(struct clcd_fb *, struct vm_area_struct *);
+void versatile_clcd_remove_dma(struct clcd_fb *);
+
+#endif
-- 
cgit v1.2.3


From 5d98e61d337c181f199a6cb864569cc4e116ef4c Mon Sep 17 00:00:00 2001
From: Lan Tianyu <tianyu.lan@intel.com>
Date: Tue, 20 May 2014 20:59:23 +0800
Subject: I2C/ACPI: Add i2c ACPI operation region support

ACPI 5.0 spec(5.5.2.4.5) defines GenericSerialBus(i2c, spi, uart) operation region.
It allows ACPI aml code able to access such kind of devices to implement
some ACPI standard method.

ACPI Spec defines some access attribute to associate with i2c protocol.
AttribQuick 	       	       		Read/Write Quick Protocol
AttribSendReceive			Send/Receive Byte Protocol
AttribByte 			 	Read/Write Byte Protocol
AttribWord				Read/Write Word Protocol
AttribBlock				Read/Write Block Protocol
AttribBytes				Read/Write N-Bytes Protocol
AttribProcessCall			Process Call Protocol
AttribBlockProcessCall			Write Block-Read Block Process Call Protocol
AttribRawBytes 				Raw Read/Write N-BytesProtocol
AttribRawProcessBytes			Raw Process Call Protocol

On the Asus T100TA, Bios use GenericSerialBus operation region to access
i2c device to get battery info.

Sample code From Asus T100TA

    Scope (_SB.I2C1)
    {
        Name (UMPC, ResourceTemplate ()
        {
            I2cSerialBus (0x0066, ControllerInitiated, 0x00061A80,
                AddressingMode7Bit, "\\_SB.I2C1",
                0x00, ResourceConsumer, ,
                )
        })

	...

        OperationRegion (DVUM, GenericSerialBus, Zero, 0x0100)
        Field (DVUM, BufferAcc, NoLock, Preserve)
        {
            Connection (UMPC),
            Offset (0x81),
            AccessAs (BufferAcc, AttribBytes (0x3E)),
            FGC0,   8
        }
	...
     }

     Device (BATC)
     {
         Name (_HID, EisaId ("PNP0C0A"))  // _HID: Hardware ID
         Name (_UID, One)  // _UID: Unique ID
	 ...

            Method (_BST, 0, NotSerialized)  // _BST: Battery Status
            {
                If (LEqual (AVBL, One))
                {
                    Store (FGC0, BFFG)
                    If (LNotEqual (STAT, One))
                    {
                        ShiftRight (CHST, 0x04, Local0)
                        And (Local0, 0x03, Local0)
                        If (LOr (LEqual (Local0, One), LEqual (Local0, 0x02)))
                        {
                            Store (0x02, Local1)
                        }
	...

    }

The i2c operation region is defined under I2C1 scope. _BST method under
battery device BATC read battery status from the field "FCG0". The request
would be sent to i2c operation region handler.

This patch is to add i2c ACPI operation region support. Due to there are
only "Byte" and "Bytes" protocol access on the Asus T100TA, other protocols
have not been tested.

About RawBytes and RawProcessBytes protocol, they needs specific drivers to interpret
reference data from AML code according ACPI 5.0 SPEC(5.5.2.4.5.3.9 and 5.5.2.4.5.3.10).
So far, not found such case and will add when find real case.

Signed-off-by: Lan Tianyu <tianyu.lan@intel.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/Makefile   |   5 +-
 drivers/i2c/i2c-acpi.c | 273 +++++++++++++++++++++++++++++++++++++++++++++++++
 drivers/i2c/i2c-core.c |   2 +
 include/linux/acpi.h   |  11 ++
 include/linux/i2c.h    |  10 ++
 5 files changed, 300 insertions(+), 1 deletion(-)
 create mode 100644 drivers/i2c/i2c-acpi.c

(limited to 'include/linux')

diff --git a/drivers/i2c/Makefile b/drivers/i2c/Makefile
index 1722f50f2473..80db3073aa84 100644
--- a/drivers/i2c/Makefile
+++ b/drivers/i2c/Makefile
@@ -2,8 +2,11 @@
 # Makefile for the i2c core.
 #
 
+i2ccore-y := i2c-core.o
+i2ccore-$(CONFIG_ACPI)		+= i2c-acpi.o
+
 obj-$(CONFIG_I2C_BOARDINFO)	+= i2c-boardinfo.o
-obj-$(CONFIG_I2C)		+= i2c-core.o
+obj-$(CONFIG_I2C)		+= i2ccore.o
 obj-$(CONFIG_I2C_SMBUS)		+= i2c-smbus.o
 obj-$(CONFIG_I2C_CHARDEV)	+= i2c-dev.o
 obj-$(CONFIG_I2C_MUX)		+= i2c-mux.o
diff --git a/drivers/i2c/i2c-acpi.c b/drivers/i2c/i2c-acpi.c
new file mode 100644
index 000000000000..f7f4c89c09b3
--- /dev/null
+++ b/drivers/i2c/i2c-acpi.c
@@ -0,0 +1,273 @@
+/*
+ * I2C ACPI code
+ *
+ * Copyright (C) 2014 Intel Corp
+ *
+ * Author: Lan Tianyu <tianyu.lan@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ */
+#define pr_fmt(fmt) "I2C/ACPI : " fmt
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/acpi.h>
+
+struct acpi_i2c_handler_data {
+	struct acpi_connection_info info;
+	struct i2c_adapter *adapter;
+};
+
+struct gsb_buffer {
+	u8	status;
+	u8	len;
+	union {
+		u16	wdata;
+		u8	bdata;
+		u8	data[0];
+	};
+} __packed;
+
+static int acpi_gsb_i2c_read_bytes(struct i2c_client *client,
+		u8 cmd, u8 *data, u8 data_len)
+{
+
+	struct i2c_msg msgs[2];
+	int ret;
+	u8 *buffer;
+
+	buffer = kzalloc(data_len, GFP_KERNEL);
+	if (!buffer)
+		return AE_NO_MEMORY;
+
+	msgs[0].addr = client->addr;
+	msgs[0].flags = client->flags;
+	msgs[0].len = 1;
+	msgs[0].buf = &cmd;
+
+	msgs[1].addr = client->addr;
+	msgs[1].flags = client->flags | I2C_M_RD;
+	msgs[1].len = data_len;
+	msgs[1].buf = buffer;
+
+	ret = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs));
+	if (ret < 0)
+		dev_err(&client->adapter->dev, "i2c read failed\n");
+	else
+		memcpy(data, buffer, data_len);
+
+	kfree(buffer);
+	return ret;
+}
+
+static int acpi_gsb_i2c_write_bytes(struct i2c_client *client,
+		u8 cmd, u8 *data, u8 data_len)
+{
+
+	struct i2c_msg msgs[1];
+	u8 *buffer;
+	int ret = AE_OK;
+
+	buffer = kzalloc(data_len + 1, GFP_KERNEL);
+	if (!buffer)
+		return AE_NO_MEMORY;
+
+	buffer[0] = cmd;
+	memcpy(buffer + 1, data, data_len);
+
+	msgs[0].addr = client->addr;
+	msgs[0].flags = client->flags;
+	msgs[0].len = data_len + 1;
+	msgs[0].buf = buffer;
+
+	ret = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs));
+	if (ret < 0)
+		dev_err(&client->adapter->dev, "i2c write failed\n");
+
+	kfree(buffer);
+	return ret;
+}
+
+static acpi_status
+acpi_i2c_space_handler(u32 function, acpi_physical_address command,
+			u32 bits, u64 *value64,
+			void *handler_context, void *region_context)
+{
+	struct gsb_buffer *gsb = (struct gsb_buffer *)value64;
+	struct acpi_i2c_handler_data *data = handler_context;
+	struct acpi_connection_info *info = &data->info;
+	struct acpi_resource_i2c_serialbus *sb;
+	struct i2c_adapter *adapter = data->adapter;
+	struct i2c_client client;
+	struct acpi_resource *ares;
+	u32 accessor_type = function >> 16;
+	u8 action = function & ACPI_IO_MASK;
+	acpi_status ret = AE_OK;
+	int status;
+
+	ret = acpi_buffer_to_resource(info->connection, info->length, &ares);
+	if (ACPI_FAILURE(ret))
+		return ret;
+
+	if (!value64 || ares->type != ACPI_RESOURCE_TYPE_SERIAL_BUS) {
+		ret = AE_BAD_PARAMETER;
+		goto err;
+	}
+
+	sb = &ares->data.i2c_serial_bus;
+	if (sb->type != ACPI_RESOURCE_SERIAL_TYPE_I2C) {
+		ret = AE_BAD_PARAMETER;
+		goto err;
+	}
+
+	memset(&client, 0, sizeof(client));
+	client.adapter = adapter;
+	client.addr = sb->slave_address;
+	client.flags = 0;
+
+	if (sb->access_mode == ACPI_I2C_10BIT_MODE)
+		client.flags |= I2C_CLIENT_TEN;
+
+	switch (accessor_type) {
+	case ACPI_GSB_ACCESS_ATTRIB_SEND_RCV:
+		if (action == ACPI_READ) {
+			status = i2c_smbus_read_byte(&client);
+			if (status >= 0) {
+				gsb->bdata = status;
+				status = 0;
+			}
+		} else {
+			status = i2c_smbus_write_byte(&client, gsb->bdata);
+		}
+		break;
+
+	case ACPI_GSB_ACCESS_ATTRIB_BYTE:
+		if (action == ACPI_READ) {
+			status = i2c_smbus_read_byte_data(&client, command);
+			if (status >= 0) {
+				gsb->bdata = status;
+				status = 0;
+			}
+		} else {
+			status = i2c_smbus_write_byte_data(&client, command,
+					gsb->bdata);
+		}
+		break;
+
+	case ACPI_GSB_ACCESS_ATTRIB_WORD:
+		if (action == ACPI_READ) {
+			status = i2c_smbus_read_word_data(&client, command);
+			if (status >= 0) {
+				gsb->wdata = status;
+				status = 0;
+			}
+		} else {
+			status = i2c_smbus_write_word_data(&client, command,
+					gsb->wdata);
+		}
+		break;
+
+	case ACPI_GSB_ACCESS_ATTRIB_BLOCK:
+		if (action == ACPI_READ) {
+			status = i2c_smbus_read_block_data(&client, command,
+					gsb->data);
+			if (status >= 0) {
+				gsb->len = status;
+				status = 0;
+			}
+		} else {
+			status = i2c_smbus_write_block_data(&client, command,
+					gsb->len, gsb->data);
+		}
+		break;
+
+	case ACPI_GSB_ACCESS_ATTRIB_MULTIBYTE:
+		if (action == ACPI_READ) {
+			status = acpi_gsb_i2c_read_bytes(&client, command,
+					gsb->data, info->access_length);
+			if (status > 0)
+				status = 0;
+		} else {
+			status = acpi_gsb_i2c_write_bytes(&client, command,
+					gsb->data, info->access_length);
+		}
+		break;
+
+	default:
+		pr_info("protocol(0x%02x) is not supported.\n", accessor_type);
+		ret = AE_BAD_PARAMETER;
+		goto err;
+	}
+
+	gsb->status = status;
+
+ err:
+	ACPI_FREE(ares);
+	return ret;
+}
+
+
+int acpi_i2c_install_space_handler(struct i2c_adapter *adapter)
+{
+	acpi_handle handle = ACPI_HANDLE(adapter->dev.parent);
+	struct acpi_i2c_handler_data *data;
+	acpi_status status;
+
+	if (!handle)
+		return -ENODEV;
+
+	data = kzalloc(sizeof(struct acpi_i2c_handler_data),
+			    GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data->adapter = adapter;
+	status = acpi_bus_attach_private_data(handle, (void *)data);
+	if (ACPI_FAILURE(status)) {
+		kfree(data);
+		return -ENOMEM;
+	}
+
+	status = acpi_install_address_space_handler(handle,
+				ACPI_ADR_SPACE_GSBUS,
+				&acpi_i2c_space_handler,
+				NULL,
+				data);
+	if (ACPI_FAILURE(status)) {
+		dev_err(&adapter->dev, "Error installing i2c space handler\n");
+		acpi_bus_detach_private_data(handle);
+		kfree(data);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+void acpi_i2c_remove_space_handler(struct i2c_adapter *adapter)
+{
+	acpi_handle handle = ACPI_HANDLE(adapter->dev.parent);
+	struct acpi_i2c_handler_data *data;
+	acpi_status status;
+
+	if (!handle)
+		return;
+
+	acpi_remove_address_space_handler(handle,
+				ACPI_ADR_SPACE_GSBUS,
+				&acpi_i2c_space_handler);
+
+	status = acpi_bus_get_private_data(handle, (void **)&data);
+	if (ACPI_SUCCESS(status))
+		kfree(data);
+
+	acpi_bus_detach_private_data(handle);
+}
diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index 7c7f4b856bad..e25cb84cb297 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -1293,6 +1293,7 @@ exit_recovery:
 	/* create pre-declared device nodes */
 	of_i2c_register_devices(adap);
 	acpi_i2c_register_devices(adap);
+	acpi_i2c_install_space_handler(adap);
 
 	if (adap->nr < __i2c_first_dynamic_bus_num)
 		i2c_scan_static_board_info(adap);
@@ -1466,6 +1467,7 @@ void i2c_del_adapter(struct i2c_adapter *adap)
 		return;
 	}
 
+	acpi_i2c_remove_space_handler(adap);
 	/* Tell drivers about this removal */
 	mutex_lock(&core_lock);
 	bus_for_each_drv(&i2c_bus_type, NULL, adap,
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 358c01b971db..40718e91e171 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -364,6 +364,17 @@ extern bool osc_sb_apei_support_acked;
 #define OSC_PCI_EXPRESS_CAPABILITY_CONTROL	0x00000010
 #define OSC_PCI_CONTROL_MASKS			0x0000001f
 
+#define ACPI_GSB_ACCESS_ATTRIB_QUICK		0x00000002
+#define ACPI_GSB_ACCESS_ATTRIB_SEND_RCV         0x00000004
+#define ACPI_GSB_ACCESS_ATTRIB_BYTE		0x00000006
+#define ACPI_GSB_ACCESS_ATTRIB_WORD		0x00000008
+#define ACPI_GSB_ACCESS_ATTRIB_BLOCK		0x0000000A
+#define ACPI_GSB_ACCESS_ATTRIB_MULTIBYTE	0x0000000B
+#define ACPI_GSB_ACCESS_ATTRIB_WORD_CALL	0x0000000C
+#define ACPI_GSB_ACCESS_ATTRIB_BLOCK_CALL	0x0000000D
+#define ACPI_GSB_ACCESS_ATTRIB_RAW_BYTES	0x0000000E
+#define ACPI_GSB_ACCESS_ATTRIB_RAW_PROCESS	0x0000000F
+
 extern acpi_status acpi_pci_osc_control_set(acpi_handle handle,
 					     u32 *mask, u32 req);
 
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index b556e0ab946f..f7a939a2cb56 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -577,4 +577,14 @@ static inline struct i2c_adapter *of_find_i2c_adapter_by_node(struct device_node
 }
 #endif /* CONFIG_OF */
 
+#ifdef CONFIG_ACPI
+int acpi_i2c_install_space_handler(struct i2c_adapter *adapter);
+void acpi_i2c_remove_space_handler(struct i2c_adapter *adapter);
+#else
+static inline void acpi_i2c_remove_space_handler(struct i2c_adapter *adapter)
+{ }
+static inline int acpi_i2c_install_space_handler(struct i2c_adapter *adapter)
+{ return 0; }
+#endif
+
 #endif /* _LINUX_I2C_H */
-- 
cgit v1.2.3


From da3c6647ee08711c7edc28d7fea4ad69fc5ffcca Mon Sep 17 00:00:00 2001
From: Lan Tianyu <tianyu.lan@intel.com>
Date: Tue, 20 May 2014 20:59:24 +0800
Subject: I2C/ACPI: Clean up I2C ACPI code and Add CONFIG_I2C_ACPI config

Clean up ACPI related code in the i2c core and add CONFIG_I2C_ACPI
to enable I2C ACPI code.

Current there is a race between removing I2C ACPI operation region
and ACPI AML code accessing. So make i2c core built-in if CONFIG_I2C_ACPI
is set.

Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Lan Tianyu <tianyu.lan@intel.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/Kconfig    | 18 +++++++++-
 drivers/i2c/Makefile   |  2 +-
 drivers/i2c/i2c-acpi.c | 89 ++++++++++++++++++++++++++++++++++++++++++++++
 drivers/i2c/i2c-core.c | 95 --------------------------------------------------
 include/linux/i2c.h    |  4 ++-
 5 files changed, 110 insertions(+), 98 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/i2c/Kconfig b/drivers/i2c/Kconfig
index 7b7ea320a258..3e3b680dc007 100644
--- a/drivers/i2c/Kconfig
+++ b/drivers/i2c/Kconfig
@@ -2,7 +2,9 @@
 # I2C subsystem configuration
 #
 
-menuconfig I2C
+menu "I2C support"
+
+config I2C
 	tristate "I2C support"
 	select RT_MUTEXES
 	---help---
@@ -21,6 +23,18 @@ menuconfig I2C
 	  This I2C support can also be built as a module.  If so, the module
 	  will be called i2c-core.
 
+config I2C_ACPI
+	bool "I2C ACPI support"
+	select I2C
+	depends on ACPI
+	default y
+	help
+	  Say Y here if you want to enable ACPI I2C support. This includes support
+	  for automatic enumeration of I2C slave devices and support for ACPI I2C
+	  Operation Regions. Operation Regions allow firmware (BIOS) code to
+	  access I2C slave devices, such as smart batteries through an I2C host
+	  controller driver.
+
 if I2C
 
 config I2C_BOARDINFO
@@ -124,3 +138,5 @@ config I2C_DEBUG_BUS
 	  on.
 
 endif # I2C
+
+endmenu
diff --git a/drivers/i2c/Makefile b/drivers/i2c/Makefile
index 80db3073aa84..a1f590cbb435 100644
--- a/drivers/i2c/Makefile
+++ b/drivers/i2c/Makefile
@@ -3,7 +3,7 @@
 #
 
 i2ccore-y := i2c-core.o
-i2ccore-$(CONFIG_ACPI)		+= i2c-acpi.o
+i2ccore-$(CONFIG_I2C_ACPI)	+= i2c-acpi.o
 
 obj-$(CONFIG_I2C_BOARDINFO)	+= i2c-boardinfo.o
 obj-$(CONFIG_I2C)		+= i2ccore.o
diff --git a/drivers/i2c/i2c-acpi.c b/drivers/i2c/i2c-acpi.c
index f7f4c89c09b3..e8b61967334b 100644
--- a/drivers/i2c/i2c-acpi.c
+++ b/drivers/i2c/i2c-acpi.c
@@ -37,6 +37,95 @@ struct gsb_buffer {
 	};
 } __packed;
 
+static int acpi_i2c_add_resource(struct acpi_resource *ares, void *data)
+{
+	struct i2c_board_info *info = data;
+
+	if (ares->type == ACPI_RESOURCE_TYPE_SERIAL_BUS) {
+		struct acpi_resource_i2c_serialbus *sb;
+
+		sb = &ares->data.i2c_serial_bus;
+		if (sb->type == ACPI_RESOURCE_SERIAL_TYPE_I2C) {
+			info->addr = sb->slave_address;
+			if (sb->access_mode == ACPI_I2C_10BIT_MODE)
+				info->flags |= I2C_CLIENT_TEN;
+		}
+	} else if (info->irq < 0) {
+		struct resource r;
+
+		if (acpi_dev_resource_interrupt(ares, 0, &r))
+			info->irq = r.start;
+	}
+
+	/* Tell the ACPI core to skip this resource */
+	return 1;
+}
+
+static acpi_status acpi_i2c_add_device(acpi_handle handle, u32 level,
+				       void *data, void **return_value)
+{
+	struct i2c_adapter *adapter = data;
+	struct list_head resource_list;
+	struct i2c_board_info info;
+	struct acpi_device *adev;
+	int ret;
+
+	if (acpi_bus_get_device(handle, &adev))
+		return AE_OK;
+	if (acpi_bus_get_status(adev) || !adev->status.present)
+		return AE_OK;
+
+	memset(&info, 0, sizeof(info));
+	info.acpi_node.companion = adev;
+	info.irq = -1;
+
+	INIT_LIST_HEAD(&resource_list);
+	ret = acpi_dev_get_resources(adev, &resource_list,
+				     acpi_i2c_add_resource, &info);
+	acpi_dev_free_resource_list(&resource_list);
+
+	if (ret < 0 || !info.addr)
+		return AE_OK;
+
+	adev->power.flags.ignore_parent = true;
+	strlcpy(info.type, dev_name(&adev->dev), sizeof(info.type));
+	if (!i2c_new_device(adapter, &info)) {
+		adev->power.flags.ignore_parent = false;
+		dev_err(&adapter->dev,
+			"failed to add I2C device %s from ACPI\n",
+			dev_name(&adev->dev));
+	}
+
+	return AE_OK;
+}
+
+/**
+ * acpi_i2c_register_devices - enumerate I2C slave devices behind adapter
+ * @adap: pointer to adapter
+ *
+ * Enumerate all I2C slave devices behind this adapter by walking the ACPI
+ * namespace. When a device is found it will be added to the Linux device
+ * model and bound to the corresponding ACPI handle.
+ */
+void acpi_i2c_register_devices(struct i2c_adapter *adap)
+{
+	acpi_handle handle;
+	acpi_status status;
+
+	if (!adap->dev.parent)
+		return;
+
+	handle = ACPI_HANDLE(adap->dev.parent);
+	if (!handle)
+		return;
+
+	status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, 1,
+				     acpi_i2c_add_device, NULL,
+				     adap, NULL);
+	if (ACPI_FAILURE(status))
+		dev_warn(&adap->dev, "failed to enumerate I2C slaves\n");
+}
+
 static int acpi_gsb_i2c_read_bytes(struct i2c_client *client,
 		u8 cmd, u8 *data, u8 data_len)
 {
diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index e25cb84cb297..4ccff114b147 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -1092,101 +1092,6 @@ EXPORT_SYMBOL(of_find_i2c_adapter_by_node);
 static void of_i2c_register_devices(struct i2c_adapter *adap) { }
 #endif /* CONFIG_OF */
 
-/* ACPI support code */
-
-#if IS_ENABLED(CONFIG_ACPI)
-static int acpi_i2c_add_resource(struct acpi_resource *ares, void *data)
-{
-	struct i2c_board_info *info = data;
-
-	if (ares->type == ACPI_RESOURCE_TYPE_SERIAL_BUS) {
-		struct acpi_resource_i2c_serialbus *sb;
-
-		sb = &ares->data.i2c_serial_bus;
-		if (sb->type == ACPI_RESOURCE_SERIAL_TYPE_I2C) {
-			info->addr = sb->slave_address;
-			if (sb->access_mode == ACPI_I2C_10BIT_MODE)
-				info->flags |= I2C_CLIENT_TEN;
-		}
-	} else if (info->irq < 0) {
-		struct resource r;
-
-		if (acpi_dev_resource_interrupt(ares, 0, &r))
-			info->irq = r.start;
-	}
-
-	/* Tell the ACPI core to skip this resource */
-	return 1;
-}
-
-static acpi_status acpi_i2c_add_device(acpi_handle handle, u32 level,
-				       void *data, void **return_value)
-{
-	struct i2c_adapter *adapter = data;
-	struct list_head resource_list;
-	struct i2c_board_info info;
-	struct acpi_device *adev;
-	int ret;
-
-	if (acpi_bus_get_device(handle, &adev))
-		return AE_OK;
-	if (acpi_bus_get_status(adev) || !adev->status.present)
-		return AE_OK;
-
-	memset(&info, 0, sizeof(info));
-	info.acpi_node.companion = adev;
-	info.irq = -1;
-
-	INIT_LIST_HEAD(&resource_list);
-	ret = acpi_dev_get_resources(adev, &resource_list,
-				     acpi_i2c_add_resource, &info);
-	acpi_dev_free_resource_list(&resource_list);
-
-	if (ret < 0 || !info.addr)
-		return AE_OK;
-
-	adev->power.flags.ignore_parent = true;
-	strlcpy(info.type, dev_name(&adev->dev), sizeof(info.type));
-	if (!i2c_new_device(adapter, &info)) {
-		adev->power.flags.ignore_parent = false;
-		dev_err(&adapter->dev,
-			"failed to add I2C device %s from ACPI\n",
-			dev_name(&adev->dev));
-	}
-
-	return AE_OK;
-}
-
-/**
- * acpi_i2c_register_devices - enumerate I2C slave devices behind adapter
- * @adap: pointer to adapter
- *
- * Enumerate all I2C slave devices behind this adapter by walking the ACPI
- * namespace. When a device is found it will be added to the Linux device
- * model and bound to the corresponding ACPI handle.
- */
-static void acpi_i2c_register_devices(struct i2c_adapter *adap)
-{
-	acpi_handle handle;
-	acpi_status status;
-
-	if (!adap->dev.parent)
-		return;
-
-	handle = ACPI_HANDLE(adap->dev.parent);
-	if (!handle)
-		return;
-
-	status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, 1,
-				     acpi_i2c_add_device, NULL,
-				     adap, NULL);
-	if (ACPI_FAILURE(status))
-		dev_warn(&adap->dev, "failed to enumerate I2C slaves\n");
-}
-#else
-static inline void acpi_i2c_register_devices(struct i2c_adapter *adap) {}
-#endif /* CONFIG_ACPI */
-
 static int i2c_do_add_adapter(struct i2c_driver *driver,
 			      struct i2c_adapter *adap)
 {
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index f7a939a2cb56..ea507665896c 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -577,10 +577,12 @@ static inline struct i2c_adapter *of_find_i2c_adapter_by_node(struct device_node
 }
 #endif /* CONFIG_OF */
 
-#ifdef CONFIG_ACPI
+#ifdef CONFIG_I2C_ACPI
 int acpi_i2c_install_space_handler(struct i2c_adapter *adapter);
 void acpi_i2c_remove_space_handler(struct i2c_adapter *adapter);
+void acpi_i2c_register_devices(struct i2c_adapter *adap);
 #else
+static inline void acpi_i2c_register_devices(struct i2c_adapter *adap) { }
 static inline void acpi_i2c_remove_space_handler(struct i2c_adapter *adapter)
 { }
 static inline int acpi_i2c_install_space_handler(struct i2c_adapter *adapter)
-- 
cgit v1.2.3


From 981409b25e2a99409b26daa67293ca1cfd5ea0a0 Mon Sep 17 00:00:00 2001
From: Archit Taneja <archit@ti.com>
Date: Fri, 16 Nov 2012 14:46:04 +0530
Subject: fbdev: arm has __raw I/O accessors, use them in fb.h

This removes the sparse warnings on arm platforms:

warning: cast removes address space of expression

Signed-off-by: Archit Taneja <archit@ti.com>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
Cc: H Hartley Sweeten <hsweeten at visionengravers.com>
Cc: Alexander Shiyan <shc_work@mail.ru>
Cc: Russell King <linux@arm.linux.org.uk>
---
 include/linux/fb.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fb.h b/include/linux/fb.h
index b6bfda99add3..09bb7a18d287 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -553,7 +553,7 @@ static inline struct apertures_struct *alloc_apertures(unsigned int max_num) {
 #define fb_memcpy_fromfb sbus_memcpy_fromio
 #define fb_memcpy_tofb sbus_memcpy_toio
 
-#elif defined(__i386__) || defined(__alpha__) || defined(__x86_64__) || defined(__hppa__) || defined(__sh__) || defined(__powerpc__) || defined(__avr32__) || defined(__bfin__)
+#elif defined(__i386__) || defined(__alpha__) || defined(__x86_64__) || defined(__hppa__) || defined(__sh__) || defined(__powerpc__) || defined(__avr32__) || defined(__bfin__) || defined(__arm__)
 
 #define fb_readb __raw_readb
 #define fb_readw __raw_readw
-- 
cgit v1.2.3


From 780db2071ac4d167ee4154ad9c96088f1bba044b Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 1 Jul 2014 10:31:13 -0600
Subject: blk-mq: decouble blk-mq freezing from generic bypassing

blk_mq freezing is entangled with generic bypassing which bypasses
blkcg and io scheduler and lets IO requests fall through the block
layer to the drivers in FIFO order.  This allows forward progress on
IOs with the advanced features disabled so that those features can be
configured or altered without worrying about stalling IO which may
lead to deadlock through memory allocation.

However, generic bypassing doesn't quite fit blk-mq.  blk-mq currently
doesn't make use of blkcg or ioscheds and it maps bypssing to
freezing, which blocks request processing and drains all the in-flight
ones.  This causes problems as bypassing assumes that request
processing is online.  blk-mq works around this by conditionally
allowing request processing for the problem case - during queue
initialization.

Another weirdity is that except for during queue cleanup, bypassing
started on the generic side prevents blk-mq from processing new
requests but doesn't drain the in-flight ones.  This shouldn't break
anything but again highlights that something isn't quite right here.

The root cause is conflating blk-mq freezing and generic bypassing
which are two different mechanisms.  The only intersecting purpose
that they serve is during queue cleanup.  Let's properly separate
blk-mq freezing from generic bypassing and simply use it where
necessary.

* request_queue->mq_freeze_depth is added and
  blk_mq_[un]freeze_queue() now operate on this counter instead of
  ->bypass_depth.  The replacement for QUEUE_FLAG_BYPASS isn't added
  but the counter is tested directly.  This will be further updated by
  later changes.

* blk_mq_drain_queue() is dropped and "__" prefix is dropped from
  blk_mq_freeze_queue().  Queue cleanup path now calls
  blk_mq_freeze_queue() directly.

* blk_queue_enter()'s fast path condition is simplified to simply
  check @q->mq_freeze_depth.  Previously, the condition was

	!blk_queue_dying(q) &&
	    (!blk_queue_bypass(q) || !blk_queue_init_done(q))

  mq_freeze_depth is incremented right after dying is set and
  blk_queue_init_done() exception isn't necessary as blk-mq doesn't
  start frozen, which only leaves the blk_queue_bypass() test which
  can be replaced by @q->mq_freeze_depth test.

This change simplifies the code and reduces confusion in the area.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Nicholas A. Bellinger <nab@linux-iscsi.org>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-core.c       |  2 +-
 block/blk-mq.c         | 17 ++++++-----------
 block/blk-mq.h         |  2 +-
 include/linux/blkdev.h |  1 +
 4 files changed, 9 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 0d0bdd65b2d7..c359d72e9d76 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -514,7 +514,7 @@ void blk_cleanup_queue(struct request_queue *q)
 	 * prevent that q->request_fn() gets invoked after draining finished.
 	 */
 	if (q->mq_ops) {
-		blk_mq_drain_queue(q);
+		blk_mq_freeze_queue(q);
 		spin_lock_irq(lock);
 	} else {
 		spin_lock_irq(lock);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index f4bdddd7ed99..1e324a123d40 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -84,15 +84,14 @@ static int blk_mq_queue_enter(struct request_queue *q)
 	smp_mb();
 
 	/* we have problems freezing the queue if it's initializing */
-	if (!blk_queue_dying(q) &&
-	    (!blk_queue_bypass(q) || !blk_queue_init_done(q)))
+	if (!q->mq_freeze_depth)
 		return 0;
 
 	__percpu_counter_add(&q->mq_usage_counter, -1, 1000000);
 
 	spin_lock_irq(q->queue_lock);
 	ret = wait_event_interruptible_lock_irq(q->mq_freeze_wq,
-		!blk_queue_bypass(q) || blk_queue_dying(q),
+		!q->mq_freeze_depth || blk_queue_dying(q),
 		*q->queue_lock);
 	/* inc usage with lock hold to avoid freeze_queue runs here */
 	if (!ret && !blk_queue_dying(q))
@@ -129,11 +128,10 @@ void blk_mq_drain_queue(struct request_queue *q)
  * Guarantee no request is in use, so we can change any data structure of
  * the queue afterward.
  */
-static void blk_mq_freeze_queue(struct request_queue *q)
+void blk_mq_freeze_queue(struct request_queue *q)
 {
 	spin_lock_irq(q->queue_lock);
-	q->bypass_depth++;
-	queue_flag_set(QUEUE_FLAG_BYPASS, q);
+	q->mq_freeze_depth++;
 	spin_unlock_irq(q->queue_lock);
 
 	blk_mq_drain_queue(q);
@@ -144,11 +142,8 @@ static void blk_mq_unfreeze_queue(struct request_queue *q)
 	bool wake = false;
 
 	spin_lock_irq(q->queue_lock);
-	if (!--q->bypass_depth) {
-		queue_flag_clear(QUEUE_FLAG_BYPASS, q);
-		wake = true;
-	}
-	WARN_ON_ONCE(q->bypass_depth < 0);
+	wake = !--q->mq_freeze_depth;
+	WARN_ON_ONCE(q->mq_freeze_depth < 0);
 	spin_unlock_irq(q->queue_lock);
 	if (wake)
 		wake_up_all(&q->mq_freeze_wq);
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 26460884c6cd..ca4964a6295d 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -28,7 +28,7 @@ struct blk_mq_ctx {
 void __blk_mq_complete_request(struct request *rq);
 void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
 void blk_mq_init_flush(struct request_queue *q);
-void blk_mq_drain_queue(struct request_queue *q);
+void blk_mq_freeze_queue(struct request_queue *q);
 void blk_mq_free_queue(struct request_queue *q);
 void blk_mq_clone_flush_request(struct request *flush_rq,
 		struct request *orig_rq);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 8699bcf5f099..c8f344ff74fe 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -470,6 +470,7 @@ struct request_queue {
 	struct mutex		sysfs_lock;
 
 	int			bypass_depth;
+	int			mq_freeze_depth;
 
 #if defined(CONFIG_BLK_DEV_BSG)
 	bsg_job_fn		*bsg_job_fn;
-- 
cgit v1.2.3


From add703fda981b9719d37f371498b9f129acbd997 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 1 Jul 2014 10:34:38 -0600
Subject: blk-mq: use percpu_ref for mq usage count

Currently, blk-mq uses a percpu_counter to keep track of how many
usages are in flight.  The percpu_counter is drained while freezing to
ensure that no usage is left in-flight after freezing is complete.
blk_mq_queue_enter/exit() and blk_mq_[un]freeze_queue() implement this
per-cpu gating mechanism.

This type of code has relatively high chance of subtle bugs which are
extremely difficult to trigger and it's way too hairy to be open coded
in blk-mq.  percpu_ref can serve the same purpose after the recent
changes.  This patch replaces the open-coded per-cpu usage counting
and draining mechanism with percpu_ref.

blk_mq_queue_enter() performs tryget_live on the ref and exit()
performs put.  blk_mq_freeze_queue() kills the ref and waits until the
reference count reaches zero.  blk_mq_unfreeze_queue() revives the ref
and wakes up the waiters.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Nicholas A. Bellinger <nab@linux-iscsi.org>
Cc: Kent Overstreet <kmo@daterainc.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c         | 68 +++++++++++++++++++++-----------------------------
 include/linux/blkdev.h |  3 ++-
 2 files changed, 31 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 22682fb4be65..5189cb1e478a 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -78,34 +78,32 @@ static void blk_mq_hctx_clear_pending(struct blk_mq_hw_ctx *hctx,
 
 static int blk_mq_queue_enter(struct request_queue *q)
 {
-	int ret;
-
-	__percpu_counter_add(&q->mq_usage_counter, 1, 1000000);
-	smp_mb();
-
-	/* we have problems freezing the queue if it's initializing */
-	if (!q->mq_freeze_depth)
-		return 0;
-
-	__percpu_counter_add(&q->mq_usage_counter, -1, 1000000);
+	while (true) {
+		int ret;
 
-	spin_lock_irq(q->queue_lock);
-	ret = wait_event_interruptible_lock_irq(q->mq_freeze_wq,
-		!q->mq_freeze_depth || blk_queue_dying(q),
-		*q->queue_lock);
-	/* inc usage with lock hold to avoid freeze_queue runs here */
-	if (!ret && !blk_queue_dying(q))
-		__percpu_counter_add(&q->mq_usage_counter, 1, 1000000);
-	else if (blk_queue_dying(q))
-		ret = -ENODEV;
-	spin_unlock_irq(q->queue_lock);
+		if (percpu_ref_tryget_live(&q->mq_usage_counter))
+			return 0;
 
-	return ret;
+		ret = wait_event_interruptible(q->mq_freeze_wq,
+				!q->mq_freeze_depth || blk_queue_dying(q));
+		if (blk_queue_dying(q))
+			return -ENODEV;
+		if (ret)
+			return ret;
+	}
 }
 
 static void blk_mq_queue_exit(struct request_queue *q)
 {
-	__percpu_counter_add(&q->mq_usage_counter, -1, 1000000);
+	percpu_ref_put(&q->mq_usage_counter);
+}
+
+static void blk_mq_usage_counter_release(struct percpu_ref *ref)
+{
+	struct request_queue *q =
+		container_of(ref, struct request_queue, mq_usage_counter);
+
+	wake_up_all(&q->mq_freeze_wq);
 }
 
 /*
@@ -118,18 +116,9 @@ void blk_mq_freeze_queue(struct request_queue *q)
 	q->mq_freeze_depth++;
 	spin_unlock_irq(q->queue_lock);
 
-	while (true) {
-		s64 count;
-
-		spin_lock_irq(q->queue_lock);
-		count = percpu_counter_sum(&q->mq_usage_counter);
-		spin_unlock_irq(q->queue_lock);
-
-		if (count == 0)
-			break;
-		blk_mq_start_hw_queues(q);
-		msleep(10);
-	}
+	percpu_ref_kill(&q->mq_usage_counter);
+	blk_mq_run_queues(q, false);
+	wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->mq_usage_counter));
 }
 
 static void blk_mq_unfreeze_queue(struct request_queue *q)
@@ -140,8 +129,10 @@ static void blk_mq_unfreeze_queue(struct request_queue *q)
 	wake = !--q->mq_freeze_depth;
 	WARN_ON_ONCE(q->mq_freeze_depth < 0);
 	spin_unlock_irq(q->queue_lock);
-	if (wake)
+	if (wake) {
+		percpu_ref_reinit(&q->mq_usage_counter);
 		wake_up_all(&q->mq_freeze_wq);
+	}
 }
 
 bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx)
@@ -1785,7 +1776,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
 	if (!q)
 		goto err_hctxs;
 
-	if (percpu_counter_init(&q->mq_usage_counter, 0))
+	if (percpu_ref_init(&q->mq_usage_counter, blk_mq_usage_counter_release))
 		goto err_map;
 
 	setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q);
@@ -1878,7 +1869,7 @@ void blk_mq_free_queue(struct request_queue *q)
 	blk_mq_exit_hw_queues(q, set, set->nr_hw_queues);
 	blk_mq_free_hw_queues(q, set);
 
-	percpu_counter_destroy(&q->mq_usage_counter);
+	percpu_ref_exit(&q->mq_usage_counter);
 
 	free_percpu(q->queue_ctx);
 	kfree(q->queue_hw_ctx);
@@ -2037,8 +2028,7 @@ static int __init blk_mq_init(void)
 {
 	blk_mq_cpu_init();
 
-	/* Must be called after percpu_counter_hotcpu_callback() */
-	hotcpu_notifier(blk_mq_queue_reinit_notify, -10);
+	hotcpu_notifier(blk_mq_queue_reinit_notify, 0);
 
 	return 0;
 }
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index c8f344ff74fe..518b46555b80 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -21,6 +21,7 @@
 #include <linux/bsg.h>
 #include <linux/smp.h>
 #include <linux/rcupdate.h>
+#include <linux/percpu-refcount.h>
 
 #include <asm/scatterlist.h>
 
@@ -484,7 +485,7 @@ struct request_queue {
 #endif
 	struct rcu_head		rcu_head;
 	wait_queue_head_t	mq_freeze_wq;
-	struct percpu_counter	mq_usage_counter;
+	struct percpu_ref	mq_usage_counter;
 	struct list_head	all_q_node;
 
 	struct blk_mq_tag_set	*tag_set;
-- 
cgit v1.2.3


From cbcd1054a1fd2aa980fc11ff28e436fc4aaa2d54 Mon Sep 17 00:00:00 2001
From: Gu Zheng <guz.fnst@cn.fujitsu.com>
Date: Tue, 1 Jul 2014 10:36:47 -0600
Subject: bio-integrity: add "bip_max_vcnt" into struct bio_integrity_payload

Commit 08778795 ("block: Fix nr_vecs for inline integrity vectors") from
Martin introduces the function bip_integrity_vecs(get the useful vectors)
to fix the issue about nr_vecs for inline integrity vectors that reported
by David Milburn.

But it seems that bip_integrity_vecs() will return the wrong number if the
bio is not based on any bio_set for some reason(bio->bi_pool == NULL),
because in that case, the bip_inline_vecs[0] is malloced directly.  So
here we add the bip_max_vcnt to record the count of vector slots, and
cleanup the function bip_integrity_vecs().

Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Cc: Kent Overstreet <kmo@daterainc.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/bio-integrity.c | 12 +++---------
 include/linux/bio.h   |  1 +
 2 files changed, 4 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 9e241063a616..bc423f7b02da 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -70,8 +70,10 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
 					  bs->bvec_integrity_pool);
 		if (!bip->bip_vec)
 			goto err;
+		bip->bip_max_vcnt = bvec_nr_vecs(idx);
 	} else {
 		bip->bip_vec = bip->bip_inline_vecs;
+		bip->bip_max_vcnt = inline_vecs;
 	}
 
 	bip->bip_slab = idx;
@@ -114,14 +116,6 @@ void bio_integrity_free(struct bio *bio)
 }
 EXPORT_SYMBOL(bio_integrity_free);
 
-static inline unsigned int bip_integrity_vecs(struct bio_integrity_payload *bip)
-{
-	if (bip->bip_slab == BIO_POOL_NONE)
-		return BIP_INLINE_VECS;
-
-	return bvec_nr_vecs(bip->bip_slab);
-}
-
 /**
  * bio_integrity_add_page - Attach integrity metadata
  * @bio:	bio to update
@@ -137,7 +131,7 @@ int bio_integrity_add_page(struct bio *bio, struct page *page,
 	struct bio_integrity_payload *bip = bio->bi_integrity;
 	struct bio_vec *iv;
 
-	if (bip->bip_vcnt >= bip_integrity_vecs(bip)) {
+	if (bip->bip_vcnt >= bip->bip_max_vcnt) {
 		printk(KERN_ERR "%s: bip_vec full\n", __func__);
 		return 0;
 	}
diff --git a/include/linux/bio.h b/include/linux/bio.h
index d2633ee099d9..b39e5000ff58 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -308,6 +308,7 @@ struct bio_integrity_payload {
 
 	unsigned short		bip_slab;	/* slab the bip came from */
 	unsigned short		bip_vcnt;	/* # of integrity bio_vecs */
+	unsigned short		bip_max_vcnt;	/* integrity bio_vec slots */
 	unsigned		bip_owns_buf:1;	/* should free bip_buf */
 
 	struct work_struct	bip_work;	/* I/O completion */
-- 
cgit v1.2.3


From 8b37e1bef5a6b60e949e28a4db3006e4b00bd758 Mon Sep 17 00:00:00 2001
From: Vincent Donnefort <vdonnefort@gmail.com>
Date: Sat, 14 Jun 2014 02:21:40 -0700
Subject: leds: convert blink timer to workqueue

This patch converts the blink timer from led-core to workqueue which is more
suitable for this kind of non-priority operations. Moreover, timer may lead to
errors when a LED setting function use a scheduling function such as pinctrl
which is using mutex.

Signed-off-by: Vincent Donnefort <vdonnefort@gmail.com>
Signed-off-by: Bryan Wu <cooloney@gmail.com>
---
 drivers/leds/led-class.c | 14 +++++++-------
 drivers/leds/led-core.c  | 11 ++++++-----
 include/linux/leds.h     |  3 +--
 3 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c
index aa29198fca3e..129729d35478 100644
--- a/drivers/leds/led-class.c
+++ b/drivers/leds/led-class.c
@@ -15,10 +15,10 @@
 #include <linux/list.h>
 #include <linux/spinlock.h>
 #include <linux/device.h>
-#include <linux/timer.h>
 #include <linux/err.h>
 #include <linux/ctype.h>
 #include <linux/leds.h>
+#include <linux/workqueue.h>
 #include "leds.h"
 
 static struct class *leds_class;
@@ -97,9 +97,10 @@ static const struct attribute_group *led_groups[] = {
 	NULL,
 };
 
-static void led_timer_function(unsigned long data)
+static void led_work_function(struct work_struct *ws)
 {
-	struct led_classdev *led_cdev = (void *)data;
+	struct led_classdev *led_cdev =
+		container_of(ws, struct led_classdev, blink_work.work);
 	unsigned long brightness;
 	unsigned long delay;
 
@@ -143,7 +144,8 @@ static void led_timer_function(unsigned long data)
 		}
 	}
 
-	mod_timer(&led_cdev->blink_timer, jiffies + msecs_to_jiffies(delay));
+	queue_delayed_work(system_wq, &led_cdev->blink_work,
+			   msecs_to_jiffies(delay));
 }
 
 static void set_brightness_delayed(struct work_struct *ws)
@@ -231,9 +233,7 @@ int led_classdev_register(struct device *parent, struct led_classdev *led_cdev)
 
 	INIT_WORK(&led_cdev->set_brightness_work, set_brightness_delayed);
 
-	init_timer(&led_cdev->blink_timer);
-	led_cdev->blink_timer.function = led_timer_function;
-	led_cdev->blink_timer.data = (unsigned long)led_cdev;
+	INIT_DELAYED_WORK(&led_cdev->blink_work, led_work_function);
 
 #ifdef CONFIG_LEDS_TRIGGERS
 	led_trigger_set_default(led_cdev);
diff --git a/drivers/leds/led-core.c b/drivers/leds/led-core.c
index 71b40d3bf776..4bb116867b88 100644
--- a/drivers/leds/led-core.c
+++ b/drivers/leds/led-core.c
@@ -16,6 +16,7 @@
 #include <linux/module.h>
 #include <linux/rwsem.h>
 #include <linux/leds.h>
+#include <linux/workqueue.h>
 #include "leds.h"
 
 DECLARE_RWSEM(leds_list_lock);
@@ -51,7 +52,7 @@ static void led_set_software_blink(struct led_classdev *led_cdev,
 		return;
 	}
 
-	mod_timer(&led_cdev->blink_timer, jiffies + 1);
+	queue_delayed_work(system_wq, &led_cdev->blink_work, 1);
 }
 
 
@@ -75,7 +76,7 @@ void led_blink_set(struct led_classdev *led_cdev,
 		   unsigned long *delay_on,
 		   unsigned long *delay_off)
 {
-	del_timer_sync(&led_cdev->blink_timer);
+	cancel_delayed_work_sync(&led_cdev->blink_work);
 
 	led_cdev->flags &= ~LED_BLINK_ONESHOT;
 	led_cdev->flags &= ~LED_BLINK_ONESHOT_STOP;
@@ -90,7 +91,7 @@ void led_blink_set_oneshot(struct led_classdev *led_cdev,
 			   int invert)
 {
 	if ((led_cdev->flags & LED_BLINK_ONESHOT) &&
-	     timer_pending(&led_cdev->blink_timer))
+	     delayed_work_pending(&led_cdev->blink_work))
 		return;
 
 	led_cdev->flags |= LED_BLINK_ONESHOT;
@@ -107,7 +108,7 @@ EXPORT_SYMBOL(led_blink_set_oneshot);
 
 void led_stop_software_blink(struct led_classdev *led_cdev)
 {
-	del_timer_sync(&led_cdev->blink_timer);
+	cancel_delayed_work_sync(&led_cdev->blink_work);
 	led_cdev->blink_delay_on = 0;
 	led_cdev->blink_delay_off = 0;
 }
@@ -116,7 +117,7 @@ EXPORT_SYMBOL_GPL(led_stop_software_blink);
 void led_set_brightness(struct led_classdev *led_cdev,
 			enum led_brightness brightness)
 {
-	/* delay brightness setting if need to stop soft-blink timer */
+	/* delay brightness setting if need to stop soft-blink work */
 	if (led_cdev->blink_delay_on || led_cdev->blink_delay_off) {
 		led_cdev->delayed_set_value = brightness;
 		schedule_work(&led_cdev->set_brightness_work);
diff --git a/include/linux/leds.h b/include/linux/leds.h
index e43686472197..6a599dce7f9d 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -15,7 +15,6 @@
 #include <linux/list.h>
 #include <linux/spinlock.h>
 #include <linux/rwsem.h>
-#include <linux/timer.h>
 #include <linux/workqueue.h>
 
 struct device;
@@ -69,7 +68,7 @@ struct led_classdev {
 	const char		*default_trigger;	/* Trigger to use */
 
 	unsigned long		 blink_delay_on, blink_delay_off;
-	struct timer_list	 blink_timer;
+	struct delayed_work	 blink_work;
 	int			 blink_brightness;
 
 	struct work_struct	set_brightness_work;
-- 
cgit v1.2.3


From 75f353b61342b5847c7f6d8499fd6301dce09845 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@linaro.org>
Date: Tue, 24 Jun 2014 16:13:47 +0100
Subject: of/platform: Fix of_platform_device_destroy iteration of devices

of_platform_destroy does not work properly, since the tree
population test was iterating on all devices having as its parent
the given platform device.

The check was intended to check whether any other platform or amba
devices created by of_platform_populate were still populated, but
instead checked for every kind of device. This is wrong, since platform
devices typically create a subsystem regular device and set themselves
as parents.

Instead, go ahead and call the unregister functions for any devices
created with of_platform_populate. The driver core will take care of
unbinding drivers, and drivers are responsible for getting rid of any
child devices that weren't created by of_platform_populate.

Signed-off-by: Grant Likely <grant.likely@linaro.org>
Signed-off-by: Pantelis Antoniou <pantelis.antoniou@konsulko.com>
---
 drivers/of/platform.c       | 32 +++++++++-----------------------
 include/linux/of.h          |  1 +
 include/linux/of_platform.h |  7 ++-----
 3 files changed, 12 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index 500436f9be7f..0197725e033a 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -422,6 +422,7 @@ static int of_platform_bus_create(struct device_node *bus,
 			break;
 		}
 	}
+	of_node_set_flag(bus, OF_POPULATED_BUS);
 	return rc;
 }
 
@@ -508,19 +509,13 @@ EXPORT_SYMBOL_GPL(of_platform_populate);
 
 static int of_platform_device_destroy(struct device *dev, void *data)
 {
-	bool *children_left = data;
-
 	/* Do not touch devices not populated from the device tree */
-	if (!dev->of_node || !of_node_check_flag(dev->of_node, OF_POPULATED)) {
-		*children_left = true;
+	if (!dev->of_node || !of_node_check_flag(dev->of_node, OF_POPULATED))
 		return 0;
-	}
 
-	/* Recurse, but don't touch this device if it has any children left */
-	if (of_platform_depopulate(dev) != 0) {
-		*children_left = true;
-		return 0;
-	}
+	/* Recurse for any nodes that were treated as busses */
+	if (of_node_check_flag(dev->of_node, OF_POPULATED_BUS))
+		device_for_each_child(dev, NULL, of_platform_device_destroy);
 
 	if (dev->bus == &platform_bus_type)
 		platform_device_unregister(to_platform_device(dev));
@@ -528,19 +523,15 @@ static int of_platform_device_destroy(struct device *dev, void *data)
 	else if (dev->bus == &amba_bustype)
 		amba_device_unregister(to_amba_device(dev));
 #endif
-	else {
-		*children_left = true;
-		return 0;
-	}
 
 	of_node_clear_flag(dev->of_node, OF_POPULATED);
-
+	of_node_clear_flag(dev->of_node, OF_POPULATED_BUS);
 	return 0;
 }
 
 /**
  * of_platform_depopulate() - Remove devices populated from device tree
- * @parent: device which childred will be removed
+ * @parent: device which children will be removed
  *
  * Complementary to of_platform_populate(), this function removes children
  * of the given device (and, recurrently, their children) that have been
@@ -550,14 +541,9 @@ static int of_platform_device_destroy(struct device *dev, void *data)
  * Returns 0 when all children devices have been removed or
  * -EBUSY when some children remained.
  */
-int of_platform_depopulate(struct device *parent)
+void of_platform_depopulate(struct device *parent)
 {
-	bool children_left = false;
-
-	device_for_each_child(parent, &children_left,
-			      of_platform_device_destroy);
-
-	return children_left ? -EBUSY : 0;
+	device_for_each_child(parent, NULL, of_platform_device_destroy);
 }
 EXPORT_SYMBOL_GPL(of_platform_depopulate);
 
diff --git a/include/linux/of.h b/include/linux/of.h
index 196b34c1ef4e..abf829a1f150 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -204,6 +204,7 @@ static inline unsigned long of_read_ulong(const __be32 *cell, int size)
 #define OF_DYNAMIC	1 /* node and properties were allocated via kmalloc */
 #define OF_DETACHED	2 /* node has been detached from the device tree */
 #define OF_POPULATED	3 /* device already created for the node */
+#define OF_POPULATED_BUS	4 /* of_platform_populate recursed to children of this node */
 
 #define OF_IS_DYNAMIC(x) test_bit(OF_DYNAMIC, &x->_flags)
 #define OF_MARK_DYNAMIC(x) set_bit(OF_DYNAMIC, &x->_flags)
diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h
index d96e1badbee0..c2b0627a2317 100644
--- a/include/linux/of_platform.h
+++ b/include/linux/of_platform.h
@@ -72,7 +72,7 @@ extern int of_platform_populate(struct device_node *root,
 				const struct of_device_id *matches,
 				const struct of_dev_auxdata *lookup,
 				struct device *parent);
-extern int of_platform_depopulate(struct device *parent);
+extern void of_platform_depopulate(struct device *parent);
 #else
 static inline int of_platform_populate(struct device_node *root,
 					const struct of_device_id *matches,
@@ -81,10 +81,7 @@ static inline int of_platform_populate(struct device_node *root,
 {
 	return -ENODEV;
 }
-static inline int of_platform_depopulate(struct device *parent)
-{
-	return -ENODEV;
-}
+static inline void of_platform_depopulate(struct device *parent) { }
 #endif
 
 #endif	/* _LINUX_OF_PLATFORM_H */
-- 
cgit v1.2.3


From 1d0326b13bc9ecab5c784415165e6f78fb06ae5b Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <ilya.dryomov@inktank.com>
Date: Fri, 20 Jun 2014 14:14:41 +0400
Subject: libceph: rename ceph_osd_request::r_linger_osd to r_linger_osd_item

So that:

req->r_osd_item --> osd->o_requests list
req->r_linger_osd_item --> osd->o_linger_requests list

Signed-off-by: Ilya Dryomov <ilya.dryomov@inktank.com>
Reviewed-by: Alex Elder <elder@linaro.org>
---
 include/linux/ceph/osd_client.h | 2 +-
 net/ceph/osd_client.c           | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 94ec69672164..7490a03ac163 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -117,7 +117,7 @@ struct ceph_osd_request {
 	struct list_head r_req_lru_item;
 	struct list_head r_osd_item;
 	struct list_head r_linger_item;
-	struct list_head r_linger_osd;
+	struct list_head r_linger_osd_item;
 	struct ceph_osd *r_osd;
 	struct ceph_pg   r_pgid;
 	int              r_pg_osds[CEPH_PG_MAX_SIZE];
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 05be0c181695..d5d2be3bd113 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -364,7 +364,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
 	RB_CLEAR_NODE(&req->r_node);
 	INIT_LIST_HEAD(&req->r_unsafe_item);
 	INIT_LIST_HEAD(&req->r_linger_item);
-	INIT_LIST_HEAD(&req->r_linger_osd);
+	INIT_LIST_HEAD(&req->r_linger_osd_item);
 	INIT_LIST_HEAD(&req->r_req_lru_item);
 	INIT_LIST_HEAD(&req->r_osd_item);
 
@@ -916,7 +916,7 @@ static void __kick_osd_requests(struct ceph_osd_client *osdc,
 	 * list at the end to keep things in tid order.
 	 */
 	list_for_each_entry_safe(req, nreq, &osd->o_linger_requests,
-				 r_linger_osd) {
+				 r_linger_osd_item) {
 		/*
 		 * reregister request prior to unregistering linger so
 		 * that r_osd is preserved.
@@ -1218,7 +1218,7 @@ static void __register_linger_request(struct ceph_osd_client *osdc,
 	ceph_osdc_get_request(req);
 	list_add_tail(&req->r_linger_item, &osdc->req_linger);
 	if (req->r_osd)
-		list_add_tail(&req->r_linger_osd,
+		list_add_tail(&req->r_linger_osd_item,
 			      &req->r_osd->o_linger_requests);
 }
 
@@ -1228,7 +1228,7 @@ static void __unregister_linger_request(struct ceph_osd_client *osdc,
 	dout("__unregister_linger_request %p\n", req);
 	list_del_init(&req->r_linger_item);
 	if (req->r_osd) {
-		list_del_init(&req->r_linger_osd);
+		list_del_init(&req->r_linger_osd_item);
 
 		if (list_empty(&req->r_osd->o_requests) &&
 		    list_empty(&req->r_osd->o_linger_requests)) {
-- 
cgit v1.2.3


From 0215e44bb390a968d01404aa2f35af56f9b55fc8 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <ilya.dryomov@inktank.com>
Date: Fri, 20 Jun 2014 14:14:41 +0400
Subject: libceph: move and add dout()s to ceph_msg_{get,put}()

Add dout()s to ceph_msg_{get,put}().  Also move them to .c and turn
kref release callback into a static function.

Signed-off-by: Ilya Dryomov <ilya.dryomov@inktank.com>
Reviewed-by: Alex Elder <elder@linaro.org>
---
 include/linux/ceph/messenger.h | 14 ++------------
 net/ceph/messenger.c           | 31 ++++++++++++++++++++++---------
 2 files changed, 24 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index d21f2dba0731..40ae58e3e9db 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -285,19 +285,9 @@ extern void ceph_msg_data_add_bio(struct ceph_msg *msg, struct bio *bio,
 
 extern struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
 				     bool can_fail);
-extern void ceph_msg_kfree(struct ceph_msg *m);
 
-
-static inline struct ceph_msg *ceph_msg_get(struct ceph_msg *msg)
-{
-	kref_get(&msg->kref);
-	return msg;
-}
-extern void ceph_msg_last_put(struct kref *kref);
-static inline void ceph_msg_put(struct ceph_msg *msg)
-{
-	kref_put(&msg->kref, ceph_msg_last_put);
-}
+extern struct ceph_msg *ceph_msg_get(struct ceph_msg *msg);
+extern void ceph_msg_put(struct ceph_msg *msg);
 
 extern void ceph_msg_dump(struct ceph_msg *msg);
 
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 1948d592aa54..8bffa5b90fef 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -3269,24 +3269,21 @@ static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip)
 /*
  * Free a generically kmalloc'd message.
  */
-void ceph_msg_kfree(struct ceph_msg *m)
+static void ceph_msg_free(struct ceph_msg *m)
 {
-	dout("msg_kfree %p\n", m);
+	dout("%s %p\n", __func__, m);
 	ceph_kvfree(m->front.iov_base);
 	kmem_cache_free(ceph_msg_cache, m);
 }
 
-/*
- * Drop a msg ref.  Destroy as needed.
- */
-void ceph_msg_last_put(struct kref *kref)
+static void ceph_msg_release(struct kref *kref)
 {
 	struct ceph_msg *m = container_of(kref, struct ceph_msg, kref);
 	LIST_HEAD(data);
 	struct list_head *links;
 	struct list_head *next;
 
-	dout("ceph_msg_put last one on %p\n", m);
+	dout("%s %p\n", __func__, m);
 	WARN_ON(!list_empty(&m->list_head));
 
 	/* drop middle, data, if any */
@@ -3308,9 +3305,25 @@ void ceph_msg_last_put(struct kref *kref)
 	if (m->pool)
 		ceph_msgpool_put(m->pool, m);
 	else
-		ceph_msg_kfree(m);
+		ceph_msg_free(m);
+}
+
+struct ceph_msg *ceph_msg_get(struct ceph_msg *msg)
+{
+	dout("%s %p (was %d)\n", __func__, msg,
+	     atomic_read(&msg->kref.refcount));
+	kref_get(&msg->kref);
+	return msg;
+}
+EXPORT_SYMBOL(ceph_msg_get);
+
+void ceph_msg_put(struct ceph_msg *msg)
+{
+	dout("%s %p (was %d)\n", __func__, msg,
+	     atomic_read(&msg->kref.refcount));
+	kref_put(&msg->kref, ceph_msg_release);
 }
-EXPORT_SYMBOL(ceph_msg_last_put);
+EXPORT_SYMBOL(ceph_msg_put);
 
 void ceph_msg_dump(struct ceph_msg *msg)
 {
-- 
cgit v1.2.3


From 9e94af202afd961da39f82b55ba83edd4ad30e98 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <ilya.dryomov@inktank.com>
Date: Fri, 20 Jun 2014 14:14:42 +0400
Subject: libceph: move and add dout()s to ceph_osdc_request_{get,put}()

Add dout()s to ceph_osdc_request_{get,put}().  Also move them to .c and
turn kref release callback into a static function.

Signed-off-by: Ilya Dryomov <ilya.dryomov@inktank.com>
Reviewed-by: Alex Elder <elder@linaro.org>
---
 include/linux/ceph/osd_client.h | 11 ++---------
 net/ceph/osd_client.c           | 26 ++++++++++++++++++++++----
 2 files changed, 24 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 7490a03ac163..a8d5652f589d 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -328,15 +328,8 @@ extern void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc,
 extern void ceph_osdc_unregister_linger_request(struct ceph_osd_client *osdc,
 						struct ceph_osd_request *req);
 
-static inline void ceph_osdc_get_request(struct ceph_osd_request *req)
-{
-	kref_get(&req->r_kref);
-}
-extern void ceph_osdc_release_request(struct kref *kref);
-static inline void ceph_osdc_put_request(struct ceph_osd_request *req)
-{
-	kref_put(&req->r_kref, ceph_osdc_release_request);
-}
+extern void ceph_osdc_get_request(struct ceph_osd_request *req);
+extern void ceph_osdc_put_request(struct ceph_osd_request *req);
 
 extern int ceph_osdc_start_request(struct ceph_osd_client *osdc,
 				   struct ceph_osd_request *req,
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 6202923b41ff..7406046212dc 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -297,12 +297,15 @@ static void osd_req_op_data_release(struct ceph_osd_request *osd_req,
 /*
  * requests
  */
-void ceph_osdc_release_request(struct kref *kref)
+static void ceph_osdc_release_request(struct kref *kref)
 {
-	struct ceph_osd_request *req;
+	struct ceph_osd_request *req = container_of(kref,
+					    struct ceph_osd_request, r_kref);
 	unsigned int which;
 
-	req = container_of(kref, struct ceph_osd_request, r_kref);
+	dout("%s %p (r_request %p r_reply %p)\n", __func__, req,
+	     req->r_request, req->r_reply);
+
 	if (req->r_request)
 		ceph_msg_put(req->r_request);
 	if (req->r_reply) {
@@ -320,7 +323,22 @@ void ceph_osdc_release_request(struct kref *kref)
 		kmem_cache_free(ceph_osd_request_cache, req);
 
 }
-EXPORT_SYMBOL(ceph_osdc_release_request);
+
+void ceph_osdc_get_request(struct ceph_osd_request *req)
+{
+	dout("%s %p (was %d)\n", __func__, req,
+	     atomic_read(&req->r_kref.refcount));
+	kref_get(&req->r_kref);
+}
+EXPORT_SYMBOL(ceph_osdc_get_request);
+
+void ceph_osdc_put_request(struct ceph_osd_request *req)
+{
+	dout("%s %p (was %d)\n", __func__, req,
+	     atomic_read(&req->r_kref.refcount));
+	kref_put(&req->r_kref, ceph_osdc_release_request);
+}
+EXPORT_SYMBOL(ceph_osdc_put_request);
 
 struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
 					       struct ceph_snap_context *snapc,
-- 
cgit v1.2.3


From c9f9b93ddfd76498fe36d9f550bd26533a4ee6bf Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <ilya.dryomov@inktank.com>
Date: Thu, 19 Jun 2014 11:38:13 +0400
Subject: libceph: introduce ceph_osdc_cancel_request()

Introduce ceph_osdc_cancel_request() intended for canceling requests
from the higher layers (rbd and cephfs).  Because higher layers are in
charge and are supposed to know what and when they are canceling, the
request is not completed, only unref'ed and removed from the libceph
data structures.

__cancel_request() is no longer called before __unregister_request(),
because __unregister_request() unconditionally revokes r_request and
there is no point in trying to do it twice.

Signed-off-by: Ilya Dryomov <ilya.dryomov@inktank.com>
Reviewed-by: Alex Elder <elder@linaro.org>
---
 include/linux/ceph/osd_client.h |  1 +
 net/ceph/osd_client.c           | 31 +++++++++++++++++++++++++------
 2 files changed, 26 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index a8d5652f589d..de09cad7b7c7 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -334,6 +334,7 @@ extern void ceph_osdc_put_request(struct ceph_osd_request *req);
 extern int ceph_osdc_start_request(struct ceph_osd_client *osdc,
 				   struct ceph_osd_request *req,
 				   bool nofail);
+extern void ceph_osdc_cancel_request(struct ceph_osd_request *req);
 extern int ceph_osdc_wait_request(struct ceph_osd_client *osdc,
 				  struct ceph_osd_request *req);
 extern void ceph_osdc_sync(struct ceph_osd_client *osdc);
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 89d7d8861d80..6c1ccf5590a3 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -2467,6 +2467,25 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc,
 }
 EXPORT_SYMBOL(ceph_osdc_start_request);
 
+/*
+ * Unregister a registered request.  The request is not completed (i.e.
+ * no callbacks or wakeups) - higher layers are supposed to know what
+ * they are canceling.
+ */
+void ceph_osdc_cancel_request(struct ceph_osd_request *req)
+{
+	struct ceph_osd_client *osdc = req->r_osdc;
+
+	mutex_lock(&osdc->request_mutex);
+	if (req->r_linger)
+		__unregister_linger_request(osdc, req);
+	__unregister_request(osdc, req);
+	mutex_unlock(&osdc->request_mutex);
+
+	dout("%s %p tid %llu canceled\n", __func__, req, req->r_tid);
+}
+EXPORT_SYMBOL(ceph_osdc_cancel_request);
+
 /*
  * wait for a request to complete
  */
@@ -2475,18 +2494,18 @@ int ceph_osdc_wait_request(struct ceph_osd_client *osdc,
 {
 	int rc;
 
+	dout("%s %p tid %llu\n", __func__, req, req->r_tid);
+
 	rc = wait_for_completion_interruptible(&req->r_completion);
 	if (rc < 0) {
-		mutex_lock(&osdc->request_mutex);
-		__cancel_request(req);
-		__unregister_request(osdc, req);
-		mutex_unlock(&osdc->request_mutex);
+		dout("%s %p tid %llu interrupted\n", __func__, req, req->r_tid);
+		ceph_osdc_cancel_request(req);
 		complete_request(req);
-		dout("wait_request tid %llu canceled/timed out\n", req->r_tid);
 		return rc;
 	}
 
-	dout("wait_request tid %llu result %d\n", req->r_tid, req->r_result);
+	dout("%s %p tid %llu result %d\n", __func__, req, req->r_tid,
+	     req->r_result);
 	return req->r_result;
 }
 EXPORT_SYMBOL(ceph_osdc_wait_request);
-- 
cgit v1.2.3


From 2d05f082cbc73b837011225b165d64d25b47c940 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <ilya.dryomov@inktank.com>
Date: Tue, 24 Jun 2014 16:21:45 +0400
Subject: libceph: nuke ceph_osdc_unregister_linger_request()

Remove now unused ceph_osdc_unregister_linger_request().

Signed-off-by: Ilya Dryomov <ilya.dryomov@inktank.com>
Reviewed-by: Alex Elder <elder@linaro.org>
---
 include/linux/ceph/osd_client.h |  2 --
 net/ceph/osd_client.c           | 10 ----------
 2 files changed, 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index de09cad7b7c7..03aeb27fcc69 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -325,8 +325,6 @@ extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *,
 
 extern void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc,
 					 struct ceph_osd_request *req);
-extern void ceph_osdc_unregister_linger_request(struct ceph_osd_client *osdc,
-						struct ceph_osd_request *req);
 
 extern void ceph_osdc_get_request(struct ceph_osd_request *req);
 extern void ceph_osdc_put_request(struct ceph_osd_request *req);
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 6c1ccf5590a3..30f6faf3584f 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1281,16 +1281,6 @@ static void __unregister_linger_request(struct ceph_osd_client *osdc,
 	ceph_osdc_put_request(req);
 }
 
-void ceph_osdc_unregister_linger_request(struct ceph_osd_client *osdc,
-					 struct ceph_osd_request *req)
-{
-	mutex_lock(&osdc->request_mutex);
-	if (req->r_linger)
-		__unregister_linger_request(osdc, req);
-	mutex_unlock(&osdc->request_mutex);
-}
-EXPORT_SYMBOL(ceph_osdc_unregister_linger_request);
-
 void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc,
 				  struct ceph_osd_request *req)
 {
-- 
cgit v1.2.3


From 8471bb73ba10ed6788b4f1e9b8a0f9dc6bdb05b5 Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <ezequiel.garcia@free-electrons.com>
Date: Wed, 21 May 2014 19:06:12 -0300
Subject: mtd: Introduce mtd_block_isreserved()

In addition to mtd_block_isbad(), which checks if a block is bad or
reserved, it's needed to check if a block is reserved only (but not
bad). This commit adds an MTD interface for it, in a similar fashion to
mtd_block_isbad().

While here, fix mtd_block_isbad() so the out-of-bounds checking is done
before the callback check.

Signed-off-by: Ezequiel Garcia <ezequiel.garcia@free-electrons.com>
Tested-by: Pekon Gupta <pekon@ti.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/mtdcore.c        | 14 ++++++++++++--
 drivers/mtd/mtdpart.c        |  9 +++++++++
 drivers/mtd/nand/nand_base.c | 18 ++++++++++++++++++
 drivers/mtd/nand/nand_bbt.c  | 14 ++++++++++++++
 include/linux/mtd/mtd.h      |  2 ++
 include/linux/mtd/nand.h     |  1 +
 6 files changed, 56 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 11857faa0d96..e4831b4159db 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -1043,12 +1043,22 @@ int mtd_is_locked(struct mtd_info *mtd, loff_t ofs, uint64_t len)
 }
 EXPORT_SYMBOL_GPL(mtd_is_locked);
 
-int mtd_block_isbad(struct mtd_info *mtd, loff_t ofs)
+int mtd_block_isreserved(struct mtd_info *mtd, loff_t ofs)
 {
-	if (!mtd->_block_isbad)
+	if (ofs < 0 || ofs > mtd->size)
+		return -EINVAL;
+	if (!mtd->_block_isreserved)
 		return 0;
+	return mtd->_block_isreserved(mtd, ofs);
+}
+EXPORT_SYMBOL_GPL(mtd_block_isreserved);
+
+int mtd_block_isbad(struct mtd_info *mtd, loff_t ofs)
+{
 	if (ofs < 0 || ofs > mtd->size)
 		return -EINVAL;
+	if (!mtd->_block_isbad)
+		return 0;
 	return mtd->_block_isbad(mtd, ofs);
 }
 EXPORT_SYMBOL_GPL(mtd_block_isbad);
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index 1ca9aec141ff..921e8c647884 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -290,6 +290,13 @@ static void part_resume(struct mtd_info *mtd)
 	part->master->_resume(part->master);
 }
 
+static int part_block_isreserved(struct mtd_info *mtd, loff_t ofs)
+{
+	struct mtd_part *part = PART(mtd);
+	ofs += part->offset;
+	return part->master->_block_isreserved(part->master, ofs);
+}
+
 static int part_block_isbad(struct mtd_info *mtd, loff_t ofs)
 {
 	struct mtd_part *part = PART(mtd);
@@ -422,6 +429,8 @@ static struct mtd_part *allocate_partition(struct mtd_info *master,
 		slave->mtd._unlock = part_unlock;
 	if (master->_is_locked)
 		slave->mtd._is_locked = part_is_locked;
+	if (master->_block_isreserved)
+		slave->mtd._block_isreserved = part_block_isreserved;
 	if (master->_block_isbad)
 		slave->mtd._block_isbad = part_block_isbad;
 	if (master->_block_markbad)
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 41167e9e991e..0c505dd1f522 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -484,6 +484,23 @@ static int nand_check_wp(struct mtd_info *mtd)
 	return (chip->read_byte(mtd) & NAND_STATUS_WP) ? 0 : 1;
 }
 
+/**
+ * nand_block_checkbad - [GENERIC] Check if a block is marked bad
+ * @mtd: MTD device structure
+ * @ofs: offset from device start
+ *
+ * Check if the block is mark as reserved.
+ */
+static int nand_block_isreserved(struct mtd_info *mtd, loff_t ofs)
+{
+	struct nand_chip *chip = mtd->priv;
+
+	if (!chip->bbt)
+		return 0;
+	/* Return info from the table */
+	return nand_isreserved_bbt(mtd, ofs);
+}
+
 /**
  * nand_block_checkbad - [GENERIC] Check if a block is marked bad
  * @mtd: MTD device structure
@@ -4111,6 +4128,7 @@ int nand_scan_tail(struct mtd_info *mtd)
 	mtd->_unlock = NULL;
 	mtd->_suspend = nand_suspend;
 	mtd->_resume = nand_resume;
+	mtd->_block_isreserved = nand_block_isreserved;
 	mtd->_block_isbad = nand_block_isbad;
 	mtd->_block_markbad = nand_block_markbad;
 	mtd->writebufsize = mtd->writesize;
diff --git a/drivers/mtd/nand/nand_bbt.c b/drivers/mtd/nand/nand_bbt.c
index 7f0c3b4c2a4f..443fa82cde6a 100644
--- a/drivers/mtd/nand/nand_bbt.c
+++ b/drivers/mtd/nand/nand_bbt.c
@@ -1310,6 +1310,20 @@ int nand_default_bbt(struct mtd_info *mtd)
 	return nand_scan_bbt(mtd, this->badblock_pattern);
 }
 
+/**
+ * nand_isreserved_bbt - [NAND Interface] Check if a block is reserved
+ * @mtd: MTD device structure
+ * @offs: offset in the device
+ */
+int nand_isreserved_bbt(struct mtd_info *mtd, loff_t offs)
+{
+	struct nand_chip *this = mtd->priv;
+	int block;
+
+	block = (int)(offs >> this->bbt_erase_shift);
+	return bbt_get_entry(this, block) == BBT_BLOCK_RESERVED;
+}
+
 /**
  * nand_isbad_bbt - [NAND Interface] Check if a block is bad
  * @mtd: MTD device structure
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index a1b0b4c8fd79..031ff3a9a0bd 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -222,6 +222,7 @@ struct mtd_info {
 	int (*_lock) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
 	int (*_unlock) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
 	int (*_is_locked) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
+	int (*_block_isreserved) (struct mtd_info *mtd, loff_t ofs);
 	int (*_block_isbad) (struct mtd_info *mtd, loff_t ofs);
 	int (*_block_markbad) (struct mtd_info *mtd, loff_t ofs);
 	int (*_suspend) (struct mtd_info *mtd);
@@ -302,6 +303,7 @@ static inline void mtd_sync(struct mtd_info *mtd)
 int mtd_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len);
 int mtd_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len);
 int mtd_is_locked(struct mtd_info *mtd, loff_t ofs, uint64_t len);
+int mtd_block_isreserved(struct mtd_info *mtd, loff_t ofs);
 int mtd_block_isbad(struct mtd_info *mtd, loff_t ofs);
 int mtd_block_markbad(struct mtd_info *mtd, loff_t ofs);
 
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 2f0af2891f0f..1cff329ae13d 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -810,6 +810,7 @@ extern struct nand_manufacturers nand_manuf_ids[];
 extern int nand_scan_bbt(struct mtd_info *mtd, struct nand_bbt_descr *bd);
 extern int nand_default_bbt(struct mtd_info *mtd);
 extern int nand_markbad_bbt(struct mtd_info *mtd, loff_t offs);
+extern int nand_isreserved_bbt(struct mtd_info *mtd, loff_t offs);
 extern int nand_isbad_bbt(struct mtd_info *mtd, loff_t offs, int allowbbt);
 extern int nand_erase_nand(struct mtd_info *mtd, struct erase_info *instr,
 			   int allowbbt);
-- 
cgit v1.2.3


From 6b32fafee2bb5fcf0b3d3d04a9762d3a0212089e Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Fri, 20 Jun 2014 14:37:38 +0200
Subject: dmaengine: shdma: Add more register documentation

Also add a few definitions that were missing.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
---
 arch/sh/include/asm/dma-register.h | 36 +++++++++++++++++++-----------------
 drivers/dma/sh/shdmac.c            | 12 ++++++------
 include/linux/sh_dma.h             | 24 +++++++++++++-----------
 3 files changed, 38 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/arch/sh/include/asm/dma-register.h b/arch/sh/include/asm/dma-register.h
index 51cd78feacff..c757b47e6b64 100644
--- a/arch/sh/include/asm/dma-register.h
+++ b/arch/sh/include/asm/dma-register.h
@@ -13,17 +13,17 @@
 #ifndef DMA_REGISTER_H
 #define DMA_REGISTER_H
 
-/* DMA register */
-#define SAR	0x00
-#define DAR	0x04
-#define TCR	0x08
-#define CHCR	0x0C
-#define DMAOR	0x40
+/* DMA registers */
+#define SAR	0x00	/* Source Address Register */
+#define DAR	0x04	/* Destination Address Register */
+#define TCR	0x08	/* Transfer Count Register */
+#define CHCR	0x0C	/* Channel Control Register */
+#define DMAOR	0x40	/* DMA Operation Register */
 
 /* DMAOR definitions */
-#define DMAOR_AE	0x00000004
+#define DMAOR_AE	0x00000004	/* Address Error Flag */
 #define DMAOR_NMIF	0x00000002
-#define DMAOR_DME	0x00000001
+#define DMAOR_DME	0x00000001	/* DMA Master Enable */
 
 /* Definitions for the SuperH DMAC */
 #define REQ_L	0x00000000
@@ -34,18 +34,20 @@
 #define ACK_W	0x00020000
 #define ACK_H	0x00000000
 #define ACK_L	0x00010000
-#define DM_INC	0x00004000
-#define DM_DEC	0x00008000
-#define DM_FIX	0x0000c000
-#define SM_INC	0x00001000
-#define SM_DEC	0x00002000
-#define SM_FIX	0x00003000
+#define DM_INC	0x00004000	/* Destination addresses are incremented */
+#define DM_DEC	0x00008000	/* Destination addresses are decremented */
+#define DM_FIX	0x0000c000	/* Destination address is fixed */
+#define SM_INC	0x00001000	/* Source addresses are incremented */
+#define SM_DEC	0x00002000	/* Source addresses are decremented */
+#define SM_FIX	0x00003000	/* Source address is fixed */
 #define RS_IN	0x00000200
 #define RS_OUT	0x00000300
+#define RS_AUTO	0x00000400	/* Auto Request */
+#define RS_ERS	0x00000800	/* DMA extended resource selector */
 #define TS_BLK	0x00000040
 #define TM_BUR	0x00000020
-#define CHCR_DE	0x00000001
-#define CHCR_TE	0x00000002
-#define CHCR_IE	0x00000004
+#define CHCR_DE	0x00000001	/* DMA Enable */
+#define CHCR_TE	0x00000002	/* Transfer End Flag */
+#define CHCR_IE	0x00000004	/* Interrupt Enable */
 
 #endif
diff --git a/drivers/dma/sh/shdmac.c b/drivers/dma/sh/shdmac.c
index 146d5df926db..1a6f6595c6c1 100644
--- a/drivers/dma/sh/shdmac.c
+++ b/drivers/dma/sh/shdmac.c
@@ -38,12 +38,12 @@
 #include "../dmaengine.h"
 #include "shdma.h"
 
-/* DMA register */
-#define SAR	0x00
-#define DAR	0x04
-#define TCR	0x08
-#define CHCR	0x0C
-#define DMAOR	0x40
+/* DMA registers */
+#define SAR	0x00	/* Source Address Register */
+#define DAR	0x04	/* Destination Address Register */
+#define TCR	0x08	/* Transfer Count Register */
+#define CHCR	0x0C	/* Channel Control Register */
+#define DMAOR	0x40	/* DMA Operation Register */
 
 #define TEND	0x18 /* USB-DMAC */
 
diff --git a/include/linux/sh_dma.h b/include/linux/sh_dma.h
index b7b43b82231e..56b97eed28a4 100644
--- a/include/linux/sh_dma.h
+++ b/include/linux/sh_dma.h
@@ -95,19 +95,21 @@ struct sh_dmae_pdata {
 };
 
 /* DMAOR definitions */
-#define DMAOR_AE	0x00000004
+#define DMAOR_AE	0x00000004	/* Address Error Flag */
 #define DMAOR_NMIF	0x00000002
-#define DMAOR_DME	0x00000001
+#define DMAOR_DME	0x00000001	/* DMA Master Enable */
 
 /* Definitions for the SuperH DMAC */
-#define DM_INC	0x00004000
-#define DM_DEC	0x00008000
-#define DM_FIX	0x0000c000
-#define SM_INC	0x00001000
-#define SM_DEC	0x00002000
-#define SM_FIX	0x00003000
-#define CHCR_DE	0x00000001
-#define CHCR_TE	0x00000002
-#define CHCR_IE	0x00000004
+#define DM_INC	0x00004000	/* Destination addresses are incremented */
+#define DM_DEC	0x00008000	/* Destination addresses are decremented */
+#define DM_FIX	0x0000c000	/* Destination address is fixed */
+#define SM_INC	0x00001000	/* Source addresses are incremented */
+#define SM_DEC	0x00002000	/* Source addresses are decremented */
+#define SM_FIX	0x00003000	/* Source address is fixed */
+#define RS_AUTO	0x00000400	/* Auto Request */
+#define RS_ERS	0x00000800	/* DMA extended resource selector */
+#define CHCR_DE	0x00000001	/* DMA Enable */
+#define CHCR_TE	0x00000002	/* Transfer End Flag */
+#define CHCR_IE	0x00000004	/* Interrupt Enable */
 
 #endif
-- 
cgit v1.2.3


From 3a48edc4bd68f841c07c7bc86358d2f02133f247 Mon Sep 17 00:00:00 2001
From: Tim Kryger <tim.kryger@gmail.com>
Date: Fri, 13 Jun 2014 10:13:56 -0700
Subject: mmc: sdhci: Use mmc core regulator infrastucture

Switch the common SDHCI code over to use mmc_host's regulator pointers
and remove the ones in the sdhci_host structure.  Additionally, use the
common mmc_regulator_get_supply function to get the regulators and set
the ocr_avail mask.

This change sets the ocr_avail directly based upon the voltage ranges
supported which ensures ocr_avail is set correctly while allowing the
use of regulators that can't provide exactly 1.8v, 3.0v, or 3.3v.

Signed-off-by: Tim Kryger <tim.kryger@gmail.com>
Signed-off-by: Markus Mayer <markus.mayer@linaro.org>
Reviewed-by: Matt Porter <mporter@linaro.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci.c  | 97 ++++++++++++++++++-----------------------------
 include/linux/mmc/sdhci.h |  3 --
 2 files changed, 36 insertions(+), 64 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 47055f3f01b8..ee524b06db14 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -1223,6 +1223,7 @@ EXPORT_SYMBOL_GPL(sdhci_set_clock);
 static void sdhci_set_power(struct sdhci_host *host, unsigned char mode,
 			    unsigned short vdd)
 {
+	struct mmc_host *mmc = host->mmc;
 	u8 pwr = 0;
 
 	if (mode != MMC_POWER_OFF) {
@@ -1284,9 +1285,9 @@ static void sdhci_set_power(struct sdhci_host *host, unsigned char mode,
 			mdelay(10);
 	}
 
-	if (host->vmmc) {
+	if (!IS_ERR(mmc->supply.vmmc)) {
 		spin_unlock_irq(&host->lock);
-		mmc_regulator_set_ocr(host->mmc, host->vmmc, vdd);
+		mmc_regulator_set_ocr(host->mmc, mmc->supply.vmmc, vdd);
 		spin_lock_irq(&host->lock);
 	}
 }
@@ -1440,13 +1441,15 @@ static void sdhci_do_set_ios(struct sdhci_host *host, struct mmc_ios *ios)
 {
 	unsigned long flags;
 	u8 ctrl;
+	struct mmc_host *mmc = host->mmc;
 
 	spin_lock_irqsave(&host->lock, flags);
 
 	if (host->flags & SDHCI_DEVICE_DEAD) {
 		spin_unlock_irqrestore(&host->lock, flags);
-		if (host->vmmc && ios->power_mode == MMC_POWER_OFF)
-			mmc_regulator_set_ocr(host->mmc, host->vmmc, 0);
+		if (!IS_ERR(mmc->supply.vmmc) &&
+		    ios->power_mode == MMC_POWER_OFF)
+			mmc_regulator_set_ocr(host->mmc, mmc->supply.vmmc, 0);
 		return;
 	}
 
@@ -1707,6 +1710,7 @@ static void sdhci_enable_sdio_irq(struct mmc_host *mmc, int enable)
 static int sdhci_do_start_signal_voltage_switch(struct sdhci_host *host,
 						struct mmc_ios *ios)
 {
+	struct mmc_host *mmc = host->mmc;
 	u16 ctrl;
 	int ret;
 
@@ -1725,8 +1729,9 @@ static int sdhci_do_start_signal_voltage_switch(struct sdhci_host *host,
 		ctrl &= ~SDHCI_CTRL_VDD_180;
 		sdhci_writew(host, ctrl, SDHCI_HOST_CONTROL2);
 
-		if (host->vqmmc) {
-			ret = regulator_set_voltage(host->vqmmc, 2700000, 3600000);
+		if (!IS_ERR(mmc->supply.vqmmc)) {
+			ret = regulator_set_voltage(mmc->supply.vqmmc, 2700000,
+						    3600000);
 			if (ret) {
 				pr_warning("%s: Switching to 3.3V signalling voltage "
 						" failed\n", mmc_hostname(host->mmc));
@@ -1746,8 +1751,8 @@ static int sdhci_do_start_signal_voltage_switch(struct sdhci_host *host,
 
 		return -EAGAIN;
 	case MMC_SIGNAL_VOLTAGE_180:
-		if (host->vqmmc) {
-			ret = regulator_set_voltage(host->vqmmc,
+		if (!IS_ERR(mmc->supply.vqmmc)) {
+			ret = regulator_set_voltage(mmc->supply.vqmmc,
 					1700000, 1950000);
 			if (ret) {
 				pr_warning("%s: Switching to 1.8V signalling voltage "
@@ -1776,8 +1781,9 @@ static int sdhci_do_start_signal_voltage_switch(struct sdhci_host *host,
 
 		return -EAGAIN;
 	case MMC_SIGNAL_VOLTAGE_120:
-		if (host->vqmmc) {
-			ret = regulator_set_voltage(host->vqmmc, 1100000, 1300000);
+		if (!IS_ERR(mmc->supply.vqmmc)) {
+			ret = regulator_set_voltage(mmc->supply.vqmmc, 1100000,
+						    1300000);
 			if (ret) {
 				pr_warning("%s: Switching to 1.2V signalling voltage "
 						" failed\n", mmc_hostname(host->mmc));
@@ -2962,25 +2968,22 @@ int sdhci_add_host(struct sdhci_host *host)
 	    !(host->mmc->caps & MMC_CAP_NONREMOVABLE))
 		mmc->caps |= MMC_CAP_NEEDS_POLL;
 
+	/* If there are external regulators, get them */
+	if (mmc_regulator_get_supply(mmc) == -EPROBE_DEFER)
+		return -EPROBE_DEFER;
+
 	/* If vqmmc regulator and no 1.8V signalling, then there's no UHS */
-	host->vqmmc = regulator_get_optional(mmc_dev(mmc), "vqmmc");
-	if (IS_ERR_OR_NULL(host->vqmmc)) {
-		if (PTR_ERR(host->vqmmc) < 0) {
-			pr_info("%s: no vqmmc regulator found\n",
-				mmc_hostname(mmc));
-			host->vqmmc = NULL;
-		}
-	} else {
-		ret = regulator_enable(host->vqmmc);
-		if (!regulator_is_supported_voltage(host->vqmmc, 1700000,
-			1950000))
+	if (!IS_ERR(mmc->supply.vqmmc)) {
+		ret = regulator_enable(mmc->supply.vqmmc);
+		if (!regulator_is_supported_voltage(mmc->supply.vqmmc, 1700000,
+						    1950000))
 			caps[1] &= ~(SDHCI_SUPPORT_SDR104 |
 					SDHCI_SUPPORT_SDR50 |
 					SDHCI_SUPPORT_DDR50);
 		if (ret) {
 			pr_warn("%s: Failed to enable vqmmc regulator: %d\n",
 				mmc_hostname(mmc), ret);
-			host->vqmmc = NULL;
+			mmc->supply.vqmmc = NULL;
 		}
 	}
 
@@ -3041,34 +3044,6 @@ int sdhci_add_host(struct sdhci_host *host)
 
 	ocr_avail = 0;
 
-	host->vmmc = regulator_get_optional(mmc_dev(mmc), "vmmc");
-	if (IS_ERR_OR_NULL(host->vmmc)) {
-		if (PTR_ERR(host->vmmc) < 0) {
-			pr_info("%s: no vmmc regulator found\n",
-				mmc_hostname(mmc));
-			host->vmmc = NULL;
-		}
-	}
-
-#ifdef CONFIG_REGULATOR
-	/*
-	 * Voltage range check makes sense only if regulator reports
-	 * any voltage value.
-	 */
-	if (host->vmmc && regulator_get_voltage(host->vmmc) > 0) {
-		ret = regulator_is_supported_voltage(host->vmmc, 2700000,
-			3600000);
-		if ((ret <= 0) || (!(caps[0] & SDHCI_CAN_VDD_330)))
-			caps[0] &= ~SDHCI_CAN_VDD_330;
-		if ((ret <= 0) || (!(caps[0] & SDHCI_CAN_VDD_300)))
-			caps[0] &= ~SDHCI_CAN_VDD_300;
-		ret = regulator_is_supported_voltage(host->vmmc, 1700000,
-			1950000);
-		if ((ret <= 0) || (!(caps[0] & SDHCI_CAN_VDD_180)))
-			caps[0] &= ~SDHCI_CAN_VDD_180;
-	}
-#endif /* CONFIG_REGULATOR */
-
 	/*
 	 * According to SD Host Controller spec v3.00, if the Host System
 	 * can afford more than 150mA, Host Driver should set XPC to 1. Also
@@ -3077,8 +3052,8 @@ int sdhci_add_host(struct sdhci_host *host)
 	 * value.
 	 */
 	max_current_caps = sdhci_readl(host, SDHCI_MAX_CURRENT);
-	if (!max_current_caps && host->vmmc) {
-		u32 curr = regulator_get_current_limit(host->vmmc);
+	if (!max_current_caps && !IS_ERR(mmc->supply.vmmc)) {
+		u32 curr = regulator_get_current_limit(mmc->supply.vmmc);
 		if (curr > 0) {
 
 			/* convert to SDHCI_MAX_CURRENT format */
@@ -3118,8 +3093,11 @@ int sdhci_add_host(struct sdhci_host *host)
 				   SDHCI_MAX_CURRENT_MULTIPLIER;
 	}
 
+	if (mmc->ocr_avail)
+		ocr_avail &= mmc->ocr_avail;
+
 	if (host->ocr_mask)
-		ocr_avail = host->ocr_mask;
+		ocr_avail &= host->ocr_mask;
 
 	mmc->ocr_avail = ocr_avail;
 	mmc->ocr_avail_sdio = ocr_avail;
@@ -3273,6 +3251,7 @@ EXPORT_SYMBOL_GPL(sdhci_add_host);
 
 void sdhci_remove_host(struct sdhci_host *host, int dead)
 {
+	struct mmc_host *mmc = host->mmc;
 	unsigned long flags;
 
 	if (dead) {
@@ -3310,15 +3289,11 @@ void sdhci_remove_host(struct sdhci_host *host, int dead)
 
 	tasklet_kill(&host->finish_tasklet);
 
-	if (host->vmmc) {
-		regulator_disable(host->vmmc);
-		regulator_put(host->vmmc);
-	}
+	if (!IS_ERR(mmc->supply.vmmc))
+		regulator_disable(mmc->supply.vmmc);
 
-	if (host->vqmmc) {
-		regulator_disable(host->vqmmc);
-		regulator_put(host->vqmmc);
-	}
+	if (!IS_ERR(mmc->supply.vqmmc))
+		regulator_disable(mmc->supply.vqmmc);
 
 	if (host->adma_desc)
 		dma_free_coherent(mmc_dev(host->mmc), ADMA_SIZE,
diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h
index 08abe9941884..09ebe57d5ce9 100644
--- a/include/linux/mmc/sdhci.h
+++ b/include/linux/mmc/sdhci.h
@@ -104,9 +104,6 @@ struct sdhci_host {
 
 	const struct sdhci_ops *ops;	/* Low level hw interface */
 
-	struct regulator *vmmc;		/* Power regulator (vmmc) */
-	struct regulator *vqmmc;	/* Signaling regulator (vccq) */
-
 	/* Internal data */
 	struct mmc_host *mmc;	/* MMC structure */
 	u64 dma_mask;		/* custom DMA mask */
-- 
cgit v1.2.3


From 2cd3a2a54656f9c480b1c7272fc07635d575841b Mon Sep 17 00:00:00 2001
From: Andreas Fenkart <afenkart@gmail.com>
Date: Thu, 29 May 2014 10:28:00 +0200
Subject: mmc: omap_hsmmc: Enable SDIO interrupt

There have been various patches floating around for enabling
the SDIO IRQ for hsmmc, but none of them ever got merged.

Probably the reason for not merging the SDIO interrupt patches
has been the lack of wake-up path for SDIO on some omaps that
has also needed remuxing the SDIO DAT1 line to a GPIO making
the patches complex.

This patch adds the minimal SDIO IRQ support to hsmmc for
omaps that do have the wake-up path. For those omaps, the
DAT1 line need to have the wake-up enable bit set, and the
wake-up interrupt is the same as for the MMC controller.

This patch has been tested on am3730 es1.2 with mwifiex
connected to MMC3 with mwifiex waking to Ethernet traffic
from off-idle mode. Note that for omaps that do not have
the SDIO wake-up path, this patch will not work for idle
modes and further patches for remuxing DAT1 to GPIO are
needed.

Based on earlier patches [1][2] by David Vrabel
<david.vrabel@csr.com>, Steve Sakoman <steve@sakoman.com>

For now, only support SDIO interrupt if we are booted with
a separate wake-irq configued via device tree. This is
because omaps need the wake-irq for idle states, and some
omaps need special quirks. And we don't want to add new
legacy mux platform init code callbacks any longer as we
are moving to DT based booting anyways.

To use it, you need to specify the wake-irq using the
interrupts-extended property.

[1] http://www.sakoman.com/cgi-bin/gitweb.cgi?p=linux.git;a=commitdiff_plain;h=010810d22f6f49ac03da4ba384969432e0320453
[2] http://comments.gmane.org/gmane.linux.kernel.mmc/20446

Acked-by: Balaji T K <balajitk@ti.com>
Signed-off-by: Andreas Fenkart <afenkart@gmail.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 .../devicetree/bindings/mmc/ti-omap-hsmmc.txt      |   1 +
 drivers/mmc/host/omap_hsmmc.c                      | 201 +++++++++++++++++++--
 include/linux/platform_data/mmc-omap.h             |   1 +
 3 files changed, 191 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/mmc/ti-omap-hsmmc.txt b/Documentation/devicetree/bindings/mmc/ti-omap-hsmmc.txt
index ce8056116fb0..0233ba7951e5 100644
--- a/Documentation/devicetree/bindings/mmc/ti-omap-hsmmc.txt
+++ b/Documentation/devicetree/bindings/mmc/ti-omap-hsmmc.txt
@@ -12,6 +12,7 @@ Required properties:
  Should be "ti,omap3-hsmmc", for OMAP3 controllers
  Should be "ti,omap3-pre-es3-hsmmc" for OMAP3 controllers pre ES3.0
  Should be "ti,omap4-hsmmc", for OMAP4 controllers
+ Should be "ti,am33xx-hsmmc", for AM335x controllers
 - ti,hwmods: Must be "mmc<n>", n is controller instance starting 1
 
 Optional properties:
diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 6b7b75585926..9446010a5dd9 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -29,6 +29,7 @@
 #include <linux/timer.h>
 #include <linux/clk.h>
 #include <linux/of.h>
+#include <linux/of_irq.h>
 #include <linux/of_gpio.h>
 #include <linux/of_device.h>
 #include <linux/omap-dmaengine.h>
@@ -36,6 +37,7 @@
 #include <linux/mmc/core.h>
 #include <linux/mmc/mmc.h>
 #include <linux/io.h>
+#include <linux/irq.h>
 #include <linux/gpio.h>
 #include <linux/regulator/consumer.h>
 #include <linux/pinctrl/consumer.h>
@@ -106,6 +108,7 @@
 #define TC_EN			(1 << 1)
 #define BWR_EN			(1 << 4)
 #define BRR_EN			(1 << 5)
+#define CIRQ_EN			(1 << 8)
 #define ERR_EN			(1 << 15)
 #define CTO_EN			(1 << 16)
 #define CCRC_EN			(1 << 17)
@@ -140,7 +143,6 @@
 #define VDD_3V0			3000000		/* 300000 uV */
 #define VDD_165_195		(ffs(MMC_VDD_165_195) - 1)
 
-#define AUTO_CMD23		(1 << 1)	/* Auto CMD23 support */
 /*
  * One controller can have multiple slots, like on some omap boards using
  * omap.c controller driver. Luckily this is not currently done on any known
@@ -194,6 +196,7 @@ struct omap_hsmmc_host {
 	u32			sysctl;
 	u32			capa;
 	int			irq;
+	int			wake_irq;
 	int			use_dma, dma_ch;
 	struct dma_chan		*tx_chan;
 	struct dma_chan		*rx_chan;
@@ -206,6 +209,9 @@ struct omap_hsmmc_host {
 	int			req_in_progress;
 	unsigned long		clk_rate;
 	unsigned int		flags;
+#define AUTO_CMD23		(1 << 0)        /* Auto CMD23 support */
+#define HSMMC_SDIO_IRQ_ENABLED	(1 << 1)        /* SDIO irq enabled */
+#define HSMMC_WAKE_IRQ_ENABLED	(1 << 2)
 	struct omap_hsmmc_next	next_data;
 	struct	omap_mmc_platform_data	*pdata;
 };
@@ -510,27 +516,40 @@ static void omap_hsmmc_stop_clock(struct omap_hsmmc_host *host)
 static void omap_hsmmc_enable_irq(struct omap_hsmmc_host *host,
 				  struct mmc_command *cmd)
 {
-	unsigned int irq_mask;
+	u32 irq_mask = INT_EN_MASK;
+	unsigned long flags;
 
 	if (host->use_dma)
-		irq_mask = INT_EN_MASK & ~(BRR_EN | BWR_EN);
-	else
-		irq_mask = INT_EN_MASK;
+		irq_mask &= ~(BRR_EN | BWR_EN);
 
 	/* Disable timeout for erases */
 	if (cmd->opcode == MMC_ERASE)
 		irq_mask &= ~DTO_EN;
 
+	spin_lock_irqsave(&host->irq_lock, flags);
 	OMAP_HSMMC_WRITE(host->base, STAT, STAT_CLEAR);
 	OMAP_HSMMC_WRITE(host->base, ISE, irq_mask);
+
+	/* latch pending CIRQ, but don't signal MMC core */
+	if (host->flags & HSMMC_SDIO_IRQ_ENABLED)
+		irq_mask |= CIRQ_EN;
 	OMAP_HSMMC_WRITE(host->base, IE, irq_mask);
+	spin_unlock_irqrestore(&host->irq_lock, flags);
 }
 
 static void omap_hsmmc_disable_irq(struct omap_hsmmc_host *host)
 {
-	OMAP_HSMMC_WRITE(host->base, ISE, 0);
-	OMAP_HSMMC_WRITE(host->base, IE, 0);
+	u32 irq_mask = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&host->irq_lock, flags);
+	/* no transfer running but need to keep cirq if enabled */
+	if (host->flags & HSMMC_SDIO_IRQ_ENABLED)
+		irq_mask |= CIRQ_EN;
+	OMAP_HSMMC_WRITE(host->base, ISE, irq_mask);
+	OMAP_HSMMC_WRITE(host->base, IE, irq_mask);
 	OMAP_HSMMC_WRITE(host->base, STAT, STAT_CLEAR);
+	spin_unlock_irqrestore(&host->irq_lock, flags);
 }
 
 /* Calculate divisor for the given clock frequency */
@@ -681,7 +700,9 @@ static int omap_hsmmc_context_restore(struct omap_hsmmc_host *host)
 		&& time_before(jiffies, timeout))
 		;
 
-	omap_hsmmc_disable_irq(host);
+	OMAP_HSMMC_WRITE(host->base, ISE, 0);
+	OMAP_HSMMC_WRITE(host->base, IE, 0);
+	OMAP_HSMMC_WRITE(host->base, STAT, STAT_CLEAR);
 
 	/* Do not initialize card-specific things if the power is off */
 	if (host->power_mode == MMC_POWER_OFF)
@@ -1118,8 +1139,12 @@ static irqreturn_t omap_hsmmc_irq(int irq, void *dev_id)
 	int status;
 
 	status = OMAP_HSMMC_READ(host->base, STAT);
-	while (status & INT_EN_MASK && host->req_in_progress) {
-		omap_hsmmc_do_irq(host, status);
+	while (status & (INT_EN_MASK | CIRQ_EN)) {
+		if (host->req_in_progress)
+			omap_hsmmc_do_irq(host, status);
+
+		if (status & CIRQ_EN)
+			mmc_signal_sdio_irq(host->mmc);
 
 		/* Flush posted write */
 		status = OMAP_HSMMC_READ(host->base, STAT);
@@ -1128,6 +1153,22 @@ static irqreturn_t omap_hsmmc_irq(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
+static irqreturn_t omap_hsmmc_wake_irq(int irq, void *dev_id)
+{
+	struct omap_hsmmc_host *host = dev_id;
+
+	/* cirq is level triggered, disable to avoid infinite loop */
+	spin_lock(&host->irq_lock);
+	if (host->flags & HSMMC_WAKE_IRQ_ENABLED) {
+		disable_irq_nosync(host->wake_irq);
+		host->flags &= ~HSMMC_WAKE_IRQ_ENABLED;
+	}
+	spin_unlock(&host->irq_lock);
+	pm_request_resume(host->dev); /* no use counter */
+
+	return IRQ_HANDLED;
+}
+
 static void set_sd_bus_power(struct omap_hsmmc_host *host)
 {
 	unsigned long i;
@@ -1639,6 +1680,79 @@ static void omap_hsmmc_init_card(struct mmc_host *mmc, struct mmc_card *card)
 		mmc_slot(host).init_card(card);
 }
 
+static void omap_hsmmc_enable_sdio_irq(struct mmc_host *mmc, int enable)
+{
+	struct omap_hsmmc_host *host = mmc_priv(mmc);
+	u32 irq_mask;
+	unsigned long flags;
+
+	spin_lock_irqsave(&host->irq_lock, flags);
+
+	irq_mask = OMAP_HSMMC_READ(host->base, ISE);
+	if (enable) {
+		host->flags |= HSMMC_SDIO_IRQ_ENABLED;
+		irq_mask |= CIRQ_EN;
+	} else {
+		host->flags &= ~HSMMC_SDIO_IRQ_ENABLED;
+		irq_mask &= ~CIRQ_EN;
+	}
+	OMAP_HSMMC_WRITE(host->base, IE, irq_mask);
+
+	/*
+	 * if enable, piggy back detection on current request
+	 * but always disable immediately
+	 */
+	if (!host->req_in_progress || !enable)
+		OMAP_HSMMC_WRITE(host->base, ISE, irq_mask);
+
+	/* flush posted write */
+	OMAP_HSMMC_READ(host->base, IE);
+
+	spin_unlock_irqrestore(&host->irq_lock, flags);
+}
+
+static int omap_hsmmc_configure_wake_irq(struct omap_hsmmc_host *host)
+{
+	struct mmc_host *mmc = host->mmc;
+	int ret;
+
+	/*
+	 * For omaps with wake-up path, wakeirq will be irq from pinctrl and
+	 * for other omaps, wakeirq will be from GPIO (dat line remuxed to
+	 * gpio). wakeirq is needed to detect sdio irq in runtime suspend state
+	 * with functional clock disabled.
+	 */
+	if (!host->dev->of_node || !host->wake_irq)
+		return -ENODEV;
+
+	/* Prevent auto-enabling of IRQ */
+	irq_set_status_flags(host->wake_irq, IRQ_NOAUTOEN);
+	ret = devm_request_irq(host->dev, host->wake_irq, omap_hsmmc_wake_irq,
+			       IRQF_TRIGGER_LOW | IRQF_ONESHOT,
+			       mmc_hostname(mmc), host);
+	if (ret) {
+		dev_err(mmc_dev(host->mmc), "Unable to request wake IRQ\n");
+		goto err;
+	}
+
+	/*
+	 * Some omaps don't have wake-up path from deeper idle states
+	 * and need to remux SDIO DAT1 to GPIO for wake-up from idle.
+	 */
+	if (host->pdata->controller_flags & OMAP_HSMMC_SWAKEUP_MISSING) {
+		ret = -ENODEV;
+		devm_free_irq(host->dev, host->wake_irq, host);
+		goto err;
+	}
+
+	return 0;
+
+err:
+	dev_warn(host->dev, "no SDIO IRQ support, falling back to polling\n");
+	host->wake_irq = 0;
+	return ret;
+}
+
 static void omap_hsmmc_conf_bus_power(struct omap_hsmmc_host *host)
 {
 	u32 hctl, capa, value;
@@ -1691,7 +1805,7 @@ static const struct mmc_host_ops omap_hsmmc_ops = {
 	.get_cd = omap_hsmmc_get_cd,
 	.get_ro = omap_hsmmc_get_ro,
 	.init_card = omap_hsmmc_init_card,
-	/* NYET -- enable_sdio_irq */
+	.enable_sdio_irq = omap_hsmmc_enable_sdio_irq,
 };
 
 #ifdef CONFIG_DEBUG_FS
@@ -1761,6 +1875,10 @@ static const struct omap_mmc_of_data omap3_pre_es3_mmc_of_data = {
 static const struct omap_mmc_of_data omap4_mmc_of_data = {
 	.reg_offset = 0x100,
 };
+static const struct omap_mmc_of_data am33xx_mmc_of_data = {
+	.reg_offset = 0x100,
+	.controller_flags = OMAP_HSMMC_SWAKEUP_MISSING,
+};
 
 static const struct of_device_id omap_mmc_of_match[] = {
 	{
@@ -1777,6 +1895,10 @@ static const struct of_device_id omap_mmc_of_match[] = {
 		.compatible = "ti,omap4-hsmmc",
 		.data = &omap4_mmc_of_data,
 	},
+	{
+		.compatible = "ti,am33xx-hsmmc",
+		.data = &am33xx_mmc_of_data,
+	},
 	{},
 };
 MODULE_DEVICE_TABLE(of, omap_mmc_of_match);
@@ -1913,6 +2035,9 @@ static int omap_hsmmc_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, host);
 
+	if (pdev->dev.of_node)
+		host->wake_irq = irq_of_parse_and_map(pdev->dev.of_node, 1);
+
 	mmc->ops	= &omap_hsmmc_ops;
 
 	mmc->f_min = OMAP_MMC_MIN_CLOCK;
@@ -2066,6 +2191,18 @@ static int omap_hsmmc_probe(struct platform_device *pdev)
 		dev_warn(&pdev->dev,
 			"pins are not configured from the driver\n");
 
+	/*
+	 * For now, only support SDIO interrupt if we have a separate
+	 * wake-up interrupt configured from device tree. This is because
+	 * the wake-up interrupt is needed for idle state and some
+	 * platforms need special quirks. And we don't want to add new
+	 * legacy mux platform init code callbacks any longer as we
+	 * are moving to DT based booting anyways.
+	 */
+	ret = omap_hsmmc_configure_wake_irq(host);
+	if (!ret)
+		mmc->caps |= MMC_CAP_SDIO_IRQ;
+
 	omap_hsmmc_protect_card(host);
 
 	mmc_add_host(mmc);
@@ -2170,11 +2307,18 @@ static int omap_hsmmc_suspend(struct device *dev)
 	pm_runtime_get_sync(host->dev);
 
 	if (!(host->mmc->pm_flags & MMC_PM_KEEP_POWER)) {
-		omap_hsmmc_disable_irq(host);
+		OMAP_HSMMC_WRITE(host->base, ISE, 0);
+		OMAP_HSMMC_WRITE(host->base, IE, 0);
+		OMAP_HSMMC_WRITE(host->base, STAT, STAT_CLEAR);
 		OMAP_HSMMC_WRITE(host->base, HCTL,
 				OMAP_HSMMC_READ(host->base, HCTL) & ~SDBP);
 	}
 
+	/* do not wake up due to sdio irq */
+	if ((host->mmc->caps & MMC_CAP_SDIO_IRQ) &&
+	    !(host->mmc->pm_flags & MMC_PM_WAKE_SDIO_IRQ))
+		disable_irq(host->wake_irq);
+
 	if (host->dbclk)
 		clk_disable_unprepare(host->dbclk);
 
@@ -2200,6 +2344,10 @@ static int omap_hsmmc_resume(struct device *dev)
 
 	omap_hsmmc_protect_card(host);
 
+	if ((host->mmc->caps & MMC_CAP_SDIO_IRQ) &&
+	    !(host->mmc->pm_flags & MMC_PM_WAKE_SDIO_IRQ))
+		enable_irq(host->wake_irq);
+
 	pm_runtime_mark_last_busy(host->dev);
 	pm_runtime_put_autosuspend(host->dev);
 	return 0;
@@ -2215,22 +2363,51 @@ static int omap_hsmmc_resume(struct device *dev)
 static int omap_hsmmc_runtime_suspend(struct device *dev)
 {
 	struct omap_hsmmc_host *host;
+	unsigned long flags;
 
 	host = platform_get_drvdata(to_platform_device(dev));
 	omap_hsmmc_context_save(host);
 	dev_dbg(dev, "disabled\n");
 
+	spin_lock_irqsave(&host->irq_lock, flags);
+	if ((host->mmc->caps & MMC_CAP_SDIO_IRQ) &&
+	    (host->flags & HSMMC_SDIO_IRQ_ENABLED)) {
+		/* disable sdio irq handling to prevent race */
+		OMAP_HSMMC_WRITE(host->base, ISE, 0);
+		OMAP_HSMMC_WRITE(host->base, IE, 0);
+		OMAP_HSMMC_WRITE(host->base, STAT, STAT_CLEAR);
+
+		WARN_ON(host->flags & HSMMC_WAKE_IRQ_ENABLED);
+		enable_irq(host->wake_irq);
+		host->flags |= HSMMC_WAKE_IRQ_ENABLED;
+	}
+	spin_unlock_irqrestore(&host->irq_lock, flags);
 	return 0;
 }
 
 static int omap_hsmmc_runtime_resume(struct device *dev)
 {
 	struct omap_hsmmc_host *host;
+	unsigned long flags;
 
 	host = platform_get_drvdata(to_platform_device(dev));
 	omap_hsmmc_context_restore(host);
 	dev_dbg(dev, "enabled\n");
 
+	spin_lock_irqsave(&host->irq_lock, flags);
+	if ((host->mmc->caps & MMC_CAP_SDIO_IRQ) &&
+	    (host->flags & HSMMC_SDIO_IRQ_ENABLED)) {
+		/* sdio irq flag can't change while in runtime suspend */
+		if (host->flags & HSMMC_WAKE_IRQ_ENABLED) {
+			disable_irq_nosync(host->wake_irq);
+			host->flags &= ~HSMMC_WAKE_IRQ_ENABLED;
+		}
+
+		OMAP_HSMMC_WRITE(host->base, STAT, STAT_CLEAR);
+		OMAP_HSMMC_WRITE(host->base, ISE, CIRQ_EN);
+		OMAP_HSMMC_WRITE(host->base, IE, CIRQ_EN);
+	}
+	spin_unlock_irqrestore(&host->irq_lock, flags);
 	return 0;
 }
 
diff --git a/include/linux/platform_data/mmc-omap.h b/include/linux/platform_data/mmc-omap.h
index 2bf1b30cb5dc..51e70cf25cbc 100644
--- a/include/linux/platform_data/mmc-omap.h
+++ b/include/linux/platform_data/mmc-omap.h
@@ -28,6 +28,7 @@
  */
 #define OMAP_HSMMC_SUPPORTS_DUAL_VOLT		BIT(0)
 #define OMAP_HSMMC_BROKEN_MULTIBLOCK_READ	BIT(1)
+#define OMAP_HSMMC_SWAKEUP_MISSING		BIT(2)
 
 struct mmc_card;
 
-- 
cgit v1.2.3


From ec38846ad59d7b780540afcec101b24139933195 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Thu, 29 May 2014 01:20:16 +0200
Subject: backlight: atmel-pwm-bl: remove obsolete driver

The atmel-pwm-bl driver is now obsolete. It is not used by any mainlined boards
and is replaced by the generic pwm_bl with the pawm-atmel driver using the
generic PWM framework.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Acked-by: Hans-Christian Egtvedt <egtvedt@samfundet.no>
Acked-by: Jingoo Han <jg1.han@samsung.com>
Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
---
 drivers/video/backlight/Kconfig        |  11 --
 drivers/video/backlight/Makefile       |   1 -
 drivers/video/backlight/atmel-pwm-bl.c | 223 ---------------------------------
 include/linux/atmel-pwm-bl.h           |  43 -------
 4 files changed, 278 deletions(-)
 delete mode 100644 drivers/video/backlight/atmel-pwm-bl.c
 delete mode 100644 include/linux/atmel-pwm-bl.h

(limited to 'include/linux')

diff --git a/drivers/video/backlight/Kconfig b/drivers/video/backlight/Kconfig
index 5d449059a556..c3c18339b8cb 100644
--- a/drivers/video/backlight/Kconfig
+++ b/drivers/video/backlight/Kconfig
@@ -178,17 +178,6 @@ config BACKLIGHT_ATMEL_LCDC
 	  If in doubt, it's safe to enable this option; it doesn't kick
 	  in unless the board's description says it's wired that way.
 
-config BACKLIGHT_ATMEL_PWM
-	tristate "Atmel PWM backlight control"
-	depends on ATMEL_PWM
-	help
-	  Say Y here if you want to use the PWM peripheral in Atmel AT91 and
-	  AVR32 devices. This driver will need additional platform data to know
-	  which PWM instance to use and how to configure it.
-
-	  To compile this driver as a module, choose M here: the module will be
-	  called atmel-pwm-bl.
-
 config BACKLIGHT_EP93XX
 	tristate "Cirrus EP93xx Backlight Driver"
 	depends on FB_EP93XX
diff --git a/drivers/video/backlight/Makefile b/drivers/video/backlight/Makefile
index bb820024f346..351451dbb607 100644
--- a/drivers/video/backlight/Makefile
+++ b/drivers/video/backlight/Makefile
@@ -25,7 +25,6 @@ obj-$(CONFIG_BACKLIGHT_ADP8860)		+= adp8860_bl.o
 obj-$(CONFIG_BACKLIGHT_ADP8870)		+= adp8870_bl.o
 obj-$(CONFIG_BACKLIGHT_APPLE)		+= apple_bl.o
 obj-$(CONFIG_BACKLIGHT_AS3711)		+= as3711_bl.o
-obj-$(CONFIG_BACKLIGHT_ATMEL_PWM)	+= atmel-pwm-bl.o
 obj-$(CONFIG_BACKLIGHT_BD6107)		+= bd6107.o
 obj-$(CONFIG_BACKLIGHT_CARILLO_RANCH)	+= cr_bllcd.o
 obj-$(CONFIG_BACKLIGHT_CLASS_DEVICE)	+= backlight.o
diff --git a/drivers/video/backlight/atmel-pwm-bl.c b/drivers/video/backlight/atmel-pwm-bl.c
deleted file mode 100644
index 261b1a4ec3d8..000000000000
--- a/drivers/video/backlight/atmel-pwm-bl.c
+++ /dev/null
@@ -1,223 +0,0 @@
-/*
- * Copyright (C) 2008 Atmel Corporation
- *
- * Backlight driver using Atmel PWM peripheral.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- */
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/platform_device.h>
-#include <linux/fb.h>
-#include <linux/gpio.h>
-#include <linux/backlight.h>
-#include <linux/atmel_pwm.h>
-#include <linux/atmel-pwm-bl.h>
-#include <linux/slab.h>
-
-struct atmel_pwm_bl {
-	const struct atmel_pwm_bl_platform_data	*pdata;
-	struct backlight_device			*bldev;
-	struct platform_device			*pdev;
-	struct pwm_channel			pwmc;
-	int					gpio_on;
-};
-
-static void atmel_pwm_bl_set_gpio_on(struct atmel_pwm_bl *pwmbl, int on)
-{
-	if (!gpio_is_valid(pwmbl->gpio_on))
-		return;
-
-	gpio_set_value(pwmbl->gpio_on, on ^ pwmbl->pdata->on_active_low);
-}
-
-static int atmel_pwm_bl_set_intensity(struct backlight_device *bd)
-{
-	struct atmel_pwm_bl *pwmbl = bl_get_data(bd);
-	int intensity = bd->props.brightness;
-	int pwm_duty;
-
-	if (bd->props.power != FB_BLANK_UNBLANK)
-		intensity = 0;
-	if (bd->props.fb_blank != FB_BLANK_UNBLANK)
-		intensity = 0;
-
-	if (pwmbl->pdata->pwm_active_low)
-		pwm_duty = pwmbl->pdata->pwm_duty_min + intensity;
-	else
-		pwm_duty = pwmbl->pdata->pwm_duty_max - intensity;
-
-	if (pwm_duty > pwmbl->pdata->pwm_duty_max)
-		pwm_duty = pwmbl->pdata->pwm_duty_max;
-	if (pwm_duty < pwmbl->pdata->pwm_duty_min)
-		pwm_duty = pwmbl->pdata->pwm_duty_min;
-
-	if (!intensity) {
-		atmel_pwm_bl_set_gpio_on(pwmbl, 0);
-		pwm_channel_writel(&pwmbl->pwmc, PWM_CUPD, pwm_duty);
-		pwm_channel_disable(&pwmbl->pwmc);
-	} else {
-		pwm_channel_enable(&pwmbl->pwmc);
-		pwm_channel_writel(&pwmbl->pwmc, PWM_CUPD, pwm_duty);
-		atmel_pwm_bl_set_gpio_on(pwmbl, 1);
-	}
-
-	return 0;
-}
-
-static int atmel_pwm_bl_get_intensity(struct backlight_device *bd)
-{
-	struct atmel_pwm_bl *pwmbl = bl_get_data(bd);
-	u32 cdty;
-	u32 intensity;
-
-	cdty = pwm_channel_readl(&pwmbl->pwmc, PWM_CDTY);
-	if (pwmbl->pdata->pwm_active_low)
-		intensity = cdty - pwmbl->pdata->pwm_duty_min;
-	else
-		intensity = pwmbl->pdata->pwm_duty_max - cdty;
-
-	return intensity & 0xffff;
-}
-
-static int atmel_pwm_bl_init_pwm(struct atmel_pwm_bl *pwmbl)
-{
-	unsigned long pwm_rate = pwmbl->pwmc.mck;
-	unsigned long prescale = DIV_ROUND_UP(pwm_rate,
-			(pwmbl->pdata->pwm_frequency *
-			 pwmbl->pdata->pwm_compare_max)) - 1;
-
-	/*
-	 * Prescale must be power of two and maximum 0xf in size because of
-	 * hardware limit. PWM speed will be:
-	 *	PWM module clock speed / (2 ^ prescale).
-	 */
-	prescale = fls(prescale);
-	if (prescale > 0xf)
-		prescale = 0xf;
-
-	pwm_channel_writel(&pwmbl->pwmc, PWM_CMR, prescale);
-	pwm_channel_writel(&pwmbl->pwmc, PWM_CDTY,
-			pwmbl->pdata->pwm_duty_min +
-			pwmbl->bldev->props.brightness);
-	pwm_channel_writel(&pwmbl->pwmc, PWM_CPRD,
-			pwmbl->pdata->pwm_compare_max);
-
-	dev_info(&pwmbl->pdev->dev, "Atmel PWM backlight driver (%lu Hz)\n",
-		pwmbl->pwmc.mck / pwmbl->pdata->pwm_compare_max /
-		(1 << prescale));
-
-	return pwm_channel_enable(&pwmbl->pwmc);
-}
-
-static const struct backlight_ops atmel_pwm_bl_ops = {
-	.get_brightness = atmel_pwm_bl_get_intensity,
-	.update_status  = atmel_pwm_bl_set_intensity,
-};
-
-static int atmel_pwm_bl_probe(struct platform_device *pdev)
-{
-	struct backlight_properties props;
-	const struct atmel_pwm_bl_platform_data *pdata;
-	struct backlight_device *bldev;
-	struct atmel_pwm_bl *pwmbl;
-	unsigned long flags;
-	int retval;
-
-	pdata = dev_get_platdata(&pdev->dev);
-	if (!pdata)
-		return -ENODEV;
-
-	if (pdata->pwm_compare_max < pdata->pwm_duty_max ||
-			pdata->pwm_duty_min > pdata->pwm_duty_max ||
-			pdata->pwm_frequency == 0)
-		return -EINVAL;
-
-	pwmbl = devm_kzalloc(&pdev->dev, sizeof(struct atmel_pwm_bl),
-				GFP_KERNEL);
-	if (!pwmbl)
-		return -ENOMEM;
-
-	pwmbl->pdev = pdev;
-	pwmbl->pdata = pdata;
-	pwmbl->gpio_on = pdata->gpio_on;
-
-	retval = pwm_channel_alloc(pdata->pwm_channel, &pwmbl->pwmc);
-	if (retval)
-		return retval;
-
-	if (gpio_is_valid(pwmbl->gpio_on)) {
-		/* Turn display off by default. */
-		if (pdata->on_active_low)
-			flags = GPIOF_OUT_INIT_HIGH;
-		else
-			flags = GPIOF_OUT_INIT_LOW;
-
-		retval = devm_gpio_request_one(&pdev->dev, pwmbl->gpio_on,
-						flags, "gpio_atmel_pwm_bl");
-		if (retval)
-			goto err_free_pwm;
-	}
-
-	memset(&props, 0, sizeof(struct backlight_properties));
-	props.type = BACKLIGHT_RAW;
-	props.max_brightness = pdata->pwm_duty_max - pdata->pwm_duty_min;
-	bldev = devm_backlight_device_register(&pdev->dev, "atmel-pwm-bl",
-					&pdev->dev, pwmbl, &atmel_pwm_bl_ops,
-					&props);
-	if (IS_ERR(bldev)) {
-		retval = PTR_ERR(bldev);
-		goto err_free_pwm;
-	}
-
-	pwmbl->bldev = bldev;
-
-	platform_set_drvdata(pdev, pwmbl);
-
-	/* Power up the backlight by default at middle intesity. */
-	bldev->props.power = FB_BLANK_UNBLANK;
-	bldev->props.brightness = bldev->props.max_brightness / 2;
-
-	retval = atmel_pwm_bl_init_pwm(pwmbl);
-	if (retval)
-		goto err_free_pwm;
-
-	atmel_pwm_bl_set_intensity(bldev);
-
-	return 0;
-
-err_free_pwm:
-	pwm_channel_free(&pwmbl->pwmc);
-
-	return retval;
-}
-
-static int atmel_pwm_bl_remove(struct platform_device *pdev)
-{
-	struct atmel_pwm_bl *pwmbl = platform_get_drvdata(pdev);
-
-	atmel_pwm_bl_set_gpio_on(pwmbl, 0);
-	pwm_channel_disable(&pwmbl->pwmc);
-	pwm_channel_free(&pwmbl->pwmc);
-
-	return 0;
-}
-
-static struct platform_driver atmel_pwm_bl_driver = {
-	.driver = {
-		.name = "atmel-pwm-bl",
-	},
-	/* REVISIT add suspend() and resume() */
-	.probe = atmel_pwm_bl_probe,
-	.remove = atmel_pwm_bl_remove,
-};
-
-module_platform_driver(atmel_pwm_bl_driver);
-
-MODULE_AUTHOR("Hans-Christian egtvedt <hans-christian.egtvedt@atmel.com>");
-MODULE_DESCRIPTION("Atmel PWM backlight driver");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:atmel-pwm-bl");
diff --git a/include/linux/atmel-pwm-bl.h b/include/linux/atmel-pwm-bl.h
deleted file mode 100644
index 0153a47806c2..000000000000
--- a/include/linux/atmel-pwm-bl.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (C) 2007 Atmel Corporation
- *
- * Driver for the AT32AP700X PS/2 controller (PSIF).
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
- */
-
-#ifndef __INCLUDE_ATMEL_PWM_BL_H
-#define __INCLUDE_ATMEL_PWM_BL_H
-
-/**
- * struct atmel_pwm_bl_platform_data
- * @pwm_channel: which PWM channel in the PWM module to use.
- * @pwm_frequency: PWM frequency to generate, the driver will try to be as
- *	close as the prescaler allows.
- * @pwm_compare_max: value to use in the PWM channel compare register.
- * @pwm_duty_max: maximum duty cycle value, must be less than or equal to
- *	pwm_compare_max.
- * @pwm_duty_min: minimum duty cycle value, must be less than pwm_duty_max.
- * @pwm_active_low: set to one if the low part of the PWM signal increases the
- *	brightness of the backlight.
- * @gpio_on: GPIO line to control the backlight on/off, set to -1 if not used.
- * @on_active_low: set to one if the on/off signal is on when GPIO is low.
- *
- * This struct must be added to the platform device in the board code. It is
- * used by the atmel-pwm-bl driver to setup the GPIO to control on/off and the
- * PWM device.
- */
-struct atmel_pwm_bl_platform_data {
-	unsigned int pwm_channel;
-	unsigned int pwm_frequency;
-	unsigned int pwm_compare_max;
-	unsigned int pwm_duty_max;
-	unsigned int pwm_duty_min;
-	unsigned int pwm_active_low;
-	int gpio_on;
-	unsigned int on_active_low;
-};
-
-#endif /* __INCLUDE_ATMEL_PWM_BL_H */
-- 
cgit v1.2.3


From f2a70e1fc1ccc0fcdf4ad12db7382134228fb552 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Thu, 29 May 2014 01:20:18 +0200
Subject: misc: atmel_pwm: remove obsolete driver

The misc/atmel_pwm is not used by any mainlined boards and has been replaced by
the pwm-driver using the generic PWM framework.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
---
 drivers/misc/Kconfig      |  10 --
 drivers/misc/Makefile     |   1 -
 drivers/misc/atmel_pwm.c  | 402 ----------------------------------------------
 include/linux/atmel_pwm.h |  70 --------
 4 files changed, 483 deletions(-)
 delete mode 100644 drivers/misc/atmel_pwm.c
 delete mode 100644 include/linux/atmel_pwm.h

(limited to 'include/linux')

diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index ee9402324a23..b841180c7c74 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -51,16 +51,6 @@ config AD525X_DPOT_SPI
 	  To compile this driver as a module, choose M here: the
 	  module will be called ad525x_dpot-spi.
 
-config ATMEL_PWM
-	tristate "Atmel AT32/AT91 PWM support"
-	depends on HAVE_CLK
-	depends on AVR32 || ARCH_AT91SAM9263 || ARCH_AT91SAM9RL || ARCH_AT91SAM9G45
-	help
-	  This option enables device driver support for the PWM channels
-	  on certain Atmel processors.  Pulse Width Modulation is used for
-	  purposes including software controlled power-efficient backlights
-	  on LCD displays, motor control, and waveform generation.
-
 config ATMEL_TCLIB
 	bool "Atmel AT32/AT91 Timer/Counter Library"
 	depends on (AVR32 || ARCH_AT91)
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index d59ce1261b38..5497d026e651 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -7,7 +7,6 @@ obj-$(CONFIG_AD525X_DPOT)	+= ad525x_dpot.o
 obj-$(CONFIG_AD525X_DPOT_I2C)	+= ad525x_dpot-i2c.o
 obj-$(CONFIG_AD525X_DPOT_SPI)	+= ad525x_dpot-spi.o
 obj-$(CONFIG_INTEL_MID_PTI)	+= pti.o
-obj-$(CONFIG_ATMEL_PWM)		+= atmel_pwm.o
 obj-$(CONFIG_ATMEL_SSC)		+= atmel-ssc.o
 obj-$(CONFIG_ATMEL_TCLIB)	+= atmel_tclib.o
 obj-$(CONFIG_BMP085)		+= bmp085.o
diff --git a/drivers/misc/atmel_pwm.c b/drivers/misc/atmel_pwm.c
deleted file mode 100644
index a6dc56e1bc58..000000000000
--- a/drivers/misc/atmel_pwm.c
+++ /dev/null
@@ -1,402 +0,0 @@
-#include <linux/module.h>
-#include <linux/clk.h>
-#include <linux/err.h>
-#include <linux/slab.h>
-#include <linux/io.h>
-#include <linux/interrupt.h>
-#include <linux/platform_device.h>
-#include <linux/atmel_pwm.h>
-
-
-/*
- * This is a simple driver for the PWM controller found in various newer
- * Atmel SOCs, including the AVR32 series and the AT91sam9263.
- *
- * Chips with current Linux ports have only 4 PWM channels, out of max 32.
- * AT32UC3A and AT32UC3B chips have 7 channels (but currently no Linux).
- * Docs are inconsistent about the width of the channel counter registers;
- * it's at least 16 bits, but several places say 20 bits.
- */
-#define	PWM_NCHAN	4		/* max 32 */
-
-struct pwm {
-	spinlock_t		lock;
-	struct platform_device	*pdev;
-	u32			mask;
-	int			irq;
-	void __iomem		*base;
-	struct clk		*clk;
-	struct pwm_channel	*channel[PWM_NCHAN];
-	void			(*handler[PWM_NCHAN])(struct pwm_channel *);
-};
-
-
-/* global PWM controller registers */
-#define PWM_MR		0x00
-#define PWM_ENA		0x04
-#define PWM_DIS		0x08
-#define PWM_SR		0x0c
-#define PWM_IER		0x10
-#define PWM_IDR		0x14
-#define PWM_IMR		0x18
-#define PWM_ISR		0x1c
-
-static inline void pwm_writel(const struct pwm *p, unsigned offset, u32 val)
-{
-	__raw_writel(val, p->base + offset);
-}
-
-static inline u32 pwm_readl(const struct pwm *p, unsigned offset)
-{
-	return __raw_readl(p->base + offset);
-}
-
-static inline void __iomem *pwmc_regs(const struct pwm *p, int index)
-{
-	return p->base + 0x200 + index * 0x20;
-}
-
-static struct pwm *pwm;
-
-static void pwm_dumpregs(struct pwm_channel *ch, char *tag)
-{
-	struct device	*dev = &pwm->pdev->dev;
-
-	dev_dbg(dev, "%s: mr %08x, sr %08x, imr %08x\n",
-		tag,
-		pwm_readl(pwm, PWM_MR),
-		pwm_readl(pwm, PWM_SR),
-		pwm_readl(pwm, PWM_IMR));
-	dev_dbg(dev,
-		"pwm ch%d - mr %08x, dty %u, prd %u, cnt %u\n",
-		ch->index,
-		pwm_channel_readl(ch, PWM_CMR),
-		pwm_channel_readl(ch, PWM_CDTY),
-		pwm_channel_readl(ch, PWM_CPRD),
-		pwm_channel_readl(ch, PWM_CCNT));
-}
-
-
-/**
- * pwm_channel_alloc - allocate an unused PWM channel
- * @index: identifies the channel
- * @ch: structure to be initialized
- *
- * Drivers allocate PWM channels according to the board's wiring, and
- * matching board-specific setup code.  Returns zero or negative errno.
- */
-int pwm_channel_alloc(int index, struct pwm_channel *ch)
-{
-	unsigned long	flags;
-	int		status = 0;
-
-	if (!pwm)
-		return -EPROBE_DEFER;
-
-	if (!(pwm->mask & 1 << index))
-		return -ENODEV;
-
-	if (index < 0 || index >= PWM_NCHAN || !ch)
-		return -EINVAL;
-	memset(ch, 0, sizeof *ch);
-
-	spin_lock_irqsave(&pwm->lock, flags);
-	if (pwm->channel[index])
-		status = -EBUSY;
-	else {
-		clk_enable(pwm->clk);
-
-		ch->regs = pwmc_regs(pwm, index);
-		ch->index = index;
-
-		/* REVISIT: ap7000 seems to go 2x as fast as we expect!! */
-		ch->mck = clk_get_rate(pwm->clk);
-
-		pwm->channel[index] = ch;
-		pwm->handler[index] = NULL;
-
-		/* channel and irq are always disabled when we return */
-		pwm_writel(pwm, PWM_DIS, 1 << index);
-		pwm_writel(pwm, PWM_IDR, 1 << index);
-	}
-	spin_unlock_irqrestore(&pwm->lock, flags);
-	return status;
-}
-EXPORT_SYMBOL(pwm_channel_alloc);
-
-static int pwmcheck(struct pwm_channel *ch)
-{
-	int		index;
-
-	if (!pwm)
-		return -ENODEV;
-	if (!ch)
-		return -EINVAL;
-	index = ch->index;
-	if (index < 0 || index >= PWM_NCHAN || pwm->channel[index] != ch)
-		return -EINVAL;
-
-	return index;
-}
-
-/**
- * pwm_channel_free - release a previously allocated channel
- * @ch: the channel being released
- *
- * The channel is completely shut down (counter and IRQ disabled),
- * and made available for re-use.  Returns zero, or negative errno.
- */
-int pwm_channel_free(struct pwm_channel *ch)
-{
-	unsigned long	flags;
-	int		t;
-
-	spin_lock_irqsave(&pwm->lock, flags);
-	t = pwmcheck(ch);
-	if (t >= 0) {
-		pwm->channel[t] = NULL;
-		pwm->handler[t] = NULL;
-
-		/* channel and irq are always disabled when we return */
-		pwm_writel(pwm, PWM_DIS, 1 << t);
-		pwm_writel(pwm, PWM_IDR, 1 << t);
-
-		clk_disable(pwm->clk);
-		t = 0;
-	}
-	spin_unlock_irqrestore(&pwm->lock, flags);
-	return t;
-}
-EXPORT_SYMBOL(pwm_channel_free);
-
-int __pwm_channel_onoff(struct pwm_channel *ch, int enabled)
-{
-	unsigned long	flags;
-	int		t;
-
-	/* OMITTED FUNCTIONALITY:  starting several channels in synch */
-
-	spin_lock_irqsave(&pwm->lock, flags);
-	t = pwmcheck(ch);
-	if (t >= 0) {
-		pwm_writel(pwm, enabled ? PWM_ENA : PWM_DIS, 1 << t);
-		t = 0;
-		pwm_dumpregs(ch, enabled ? "enable" : "disable");
-	}
-	spin_unlock_irqrestore(&pwm->lock, flags);
-
-	return t;
-}
-EXPORT_SYMBOL(__pwm_channel_onoff);
-
-/**
- * pwm_clk_alloc - allocate and configure CLKA or CLKB
- * @prescale: from 0..10, the power of two used to divide MCK
- * @div: from 1..255, the linear divisor to use
- *
- * Returns PWM_CPR_CLKA, PWM_CPR_CLKB, or negative errno.  The allocated
- * clock will run with a period of (2^prescale * div) / MCK, or twice as
- * long if center aligned PWM output is used.  The clock must later be
- * deconfigured using pwm_clk_free().
- */
-int pwm_clk_alloc(unsigned prescale, unsigned div)
-{
-	unsigned long	flags;
-	u32		mr;
-	u32		val = (prescale << 8) | div;
-	int		ret = -EBUSY;
-
-	if (prescale >= 10 || div == 0 || div > 255)
-		return -EINVAL;
-
-	spin_lock_irqsave(&pwm->lock, flags);
-	mr = pwm_readl(pwm, PWM_MR);
-	if ((mr & 0xffff) == 0) {
-		mr |= val;
-		ret = PWM_CPR_CLKA;
-	} else if ((mr & (0xffff << 16)) == 0) {
-		mr |= val << 16;
-		ret = PWM_CPR_CLKB;
-	}
-	if (ret > 0)
-		pwm_writel(pwm, PWM_MR, mr);
-	spin_unlock_irqrestore(&pwm->lock, flags);
-	return ret;
-}
-EXPORT_SYMBOL(pwm_clk_alloc);
-
-/**
- * pwm_clk_free - deconfigure and release CLKA or CLKB
- *
- * Reverses the effect of pwm_clk_alloc().
- */
-void pwm_clk_free(unsigned clk)
-{
-	unsigned long	flags;
-	u32		mr;
-
-	spin_lock_irqsave(&pwm->lock, flags);
-	mr = pwm_readl(pwm, PWM_MR);
-	if (clk == PWM_CPR_CLKA)
-		pwm_writel(pwm, PWM_MR, mr & ~(0xffff << 0));
-	if (clk == PWM_CPR_CLKB)
-		pwm_writel(pwm, PWM_MR, mr & ~(0xffff << 16));
-	spin_unlock_irqrestore(&pwm->lock, flags);
-}
-EXPORT_SYMBOL(pwm_clk_free);
-
-/**
- * pwm_channel_handler - manage channel's IRQ handler
- * @ch: the channel
- * @handler: the handler to use, possibly NULL
- *
- * If the handler is non-null, the handler will be called after every
- * period of this PWM channel.  If the handler is null, this channel
- * won't generate an IRQ.
- */
-int pwm_channel_handler(struct pwm_channel *ch,
-		void (*handler)(struct pwm_channel *ch))
-{
-	unsigned long	flags;
-	int		t;
-
-	spin_lock_irqsave(&pwm->lock, flags);
-	t = pwmcheck(ch);
-	if (t >= 0) {
-		pwm->handler[t] = handler;
-		pwm_writel(pwm, handler ? PWM_IER : PWM_IDR, 1 << t);
-		t = 0;
-	}
-	spin_unlock_irqrestore(&pwm->lock, flags);
-
-	return t;
-}
-EXPORT_SYMBOL(pwm_channel_handler);
-
-static irqreturn_t pwm_irq(int id, void *_pwm)
-{
-	struct pwm	*p = _pwm;
-	irqreturn_t	handled = IRQ_NONE;
-	u32		irqstat;
-	int		index;
-
-	spin_lock(&p->lock);
-
-	/* ack irqs, then handle them */
-	irqstat = pwm_readl(pwm, PWM_ISR);
-
-	while (irqstat) {
-		struct pwm_channel *ch;
-		void (*handler)(struct pwm_channel *ch);
-
-		index = ffs(irqstat) - 1;
-		irqstat &= ~(1 << index);
-		ch = pwm->channel[index];
-		handler = pwm->handler[index];
-		if (handler && ch) {
-			spin_unlock(&p->lock);
-			handler(ch);
-			spin_lock(&p->lock);
-			handled = IRQ_HANDLED;
-		}
-	}
-
-	spin_unlock(&p->lock);
-	return handled;
-}
-
-static int __init pwm_probe(struct platform_device *pdev)
-{
-	struct resource *r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	int irq = platform_get_irq(pdev, 0);
-	u32 *mp = pdev->dev.platform_data;
-	struct pwm *p;
-	int status = -EIO;
-
-	if (pwm)
-		return -EBUSY;
-	if (!r || irq < 0 || !mp || !*mp)
-		return -ENODEV;
-	if (*mp & ~((1<<PWM_NCHAN)-1)) {
-		dev_warn(&pdev->dev, "mask 0x%x ... more than %d channels\n",
-			*mp, PWM_NCHAN);
-		return -EINVAL;
-	}
-
-	p = kzalloc(sizeof(*p), GFP_KERNEL);
-	if (!p)
-		return -ENOMEM;
-
-	spin_lock_init(&p->lock);
-	p->pdev = pdev;
-	p->mask = *mp;
-	p->irq = irq;
-	p->base = ioremap(r->start, resource_size(r));
-	if (!p->base)
-		goto fail;
-	p->clk = clk_get(&pdev->dev, "pwm_clk");
-	if (IS_ERR(p->clk)) {
-		status = PTR_ERR(p->clk);
-		p->clk = NULL;
-		goto fail;
-	}
-
-	status = request_irq(irq, pwm_irq, 0, pdev->name, p);
-	if (status < 0)
-		goto fail;
-
-	pwm = p;
-	platform_set_drvdata(pdev, p);
-
-	return 0;
-
-fail:
-	if (p->clk)
-		clk_put(p->clk);
-	if (p->base)
-		iounmap(p->base);
-
-	kfree(p);
-	return status;
-}
-
-static int __exit pwm_remove(struct platform_device *pdev)
-{
-	struct pwm *p = platform_get_drvdata(pdev);
-
-	if (p != pwm)
-		return -EINVAL;
-
-	clk_enable(pwm->clk);
-	pwm_writel(pwm, PWM_DIS, (1 << PWM_NCHAN) - 1);
-	pwm_writel(pwm, PWM_IDR, (1 << PWM_NCHAN) - 1);
-	clk_disable(pwm->clk);
-
-	pwm = NULL;
-
-	free_irq(p->irq, p);
-	clk_put(p->clk);
-	iounmap(p->base);
-	kfree(p);
-
-	return 0;
-}
-
-static struct platform_driver atmel_pwm_driver = {
-	.driver = {
-		.name = "atmel_pwm",
-		.owner = THIS_MODULE,
-	},
-	.remove = __exit_p(pwm_remove),
-
-	/* NOTE: PWM can keep running in AVR32 "idle" and "frozen" states;
-	 * and all AT91sam9263 states, albeit at reduced clock rate if
-	 * MCK becomes the slow clock (i.e. what Linux labels STR).
-	 */
-};
-
-module_platform_driver_probe(atmel_pwm_driver, pwm_probe);
-
-MODULE_DESCRIPTION("Driver for AT32/AT91 PWM module");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:atmel_pwm");
diff --git a/include/linux/atmel_pwm.h b/include/linux/atmel_pwm.h
deleted file mode 100644
index ea04abb3db8e..000000000000
--- a/include/linux/atmel_pwm.h
+++ /dev/null
@@ -1,70 +0,0 @@
-#ifndef __LINUX_ATMEL_PWM_H
-#define __LINUX_ATMEL_PWM_H
-
-/**
- * struct pwm_channel - driver handle to a PWM channel
- * @regs: base of this channel's registers
- * @index: number of this channel (0..31)
- * @mck: base clock rate, which can be prescaled and maybe subdivided
- *
- * Drivers initialize a pwm_channel structure using pwm_channel_alloc().
- * Then they configure its clock rate (derived from MCK), alignment,
- * polarity, and duty cycle by writing directly to the channel registers,
- * before enabling the channel by calling pwm_channel_enable().
- *
- * After emitting a PWM signal for the desired length of time, drivers
- * may then pwm_channel_disable() or pwm_channel_free().  Both of these
- * disable the channel, but when it's freed the IRQ is deconfigured and
- * the channel must later be re-allocated and reconfigured.
- *
- * Note that if the period or duty cycle need to be changed while the
- * PWM channel is operating, drivers must use the PWM_CUPD double buffer
- * mechanism, either polling until they change or getting implicitly
- * notified through a once-per-period interrupt handler.
- */
-struct pwm_channel {
-	void __iomem	*regs;
-	unsigned	index;
-	unsigned long	mck;
-};
-
-extern int pwm_channel_alloc(int index, struct pwm_channel *ch);
-extern int pwm_channel_free(struct pwm_channel *ch);
-
-extern int pwm_clk_alloc(unsigned prescale, unsigned div);
-extern void pwm_clk_free(unsigned clk);
-
-extern int __pwm_channel_onoff(struct pwm_channel *ch, int enabled);
-
-#define pwm_channel_enable(ch)	__pwm_channel_onoff((ch), 1)
-#define pwm_channel_disable(ch)	__pwm_channel_onoff((ch), 0)
-
-/* periodic interrupts, mostly for CUPD changes to period or cycle */
-extern int pwm_channel_handler(struct pwm_channel *ch,
-		void (*handler)(struct pwm_channel *ch));
-
-/* per-channel registers (banked at pwm_channel->regs) */
-#define PWM_CMR		0x00		/* mode register */
-#define		PWM_CPR_CPD	(1 << 10)	/* set: CUPD modifies period */
-#define		PWM_CPR_CPOL	(1 << 9)	/* set: idle high */
-#define		PWM_CPR_CALG	(1 << 8)	/* set: center align */
-#define		PWM_CPR_CPRE	(0xf << 0)	/* mask: rate is mck/(2^pre) */
-#define		PWM_CPR_CLKA	(0xb << 0)	/* rate CLKA */
-#define		PWM_CPR_CLKB	(0xc << 0)	/* rate CLKB */
-#define PWM_CDTY	0x04		/* duty cycle (max of CPRD) */
-#define PWM_CPRD	0x08		/* period (count up from zero) */
-#define PWM_CCNT	0x0c		/* counter (20 bits?) */
-#define PWM_CUPD	0x10		/* update CPRD (or CDTY) next period */
-
-static inline void
-pwm_channel_writel(struct pwm_channel *pwmc, unsigned offset, u32 val)
-{
-	__raw_writel(val, pwmc->regs + offset);
-}
-
-static inline u32 pwm_channel_readl(struct pwm_channel *pwmc, unsigned offset)
-{
-	return __raw_readl(pwmc->regs + offset);
-}
-
-#endif /* __LINUX_ATMEL_PWM_H */
-- 
cgit v1.2.3


From 8cd118308a8649c649533a0133af0ce731d223bb Mon Sep 17 00:00:00 2001
From: Micky Ching <micky_ching@realsil.com.cn>
Date: Fri, 6 Jun 2014 15:05:44 +0800
Subject: mfd: rtsx: Add dma transfer function

rtsx driver using a single function for transfer data, dma map/unmap are
placed in one fix function. We need map/unmap dma in different place(for
mmc async driver), so add three function for dma map, dma transfer and
dma unmap.

Signed-off-by: Micky Ching <micky_ching@realsil.com.cn>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/rtsx_pcr.c       | 76 ++++++++++++++++++++++++++++----------------
 include/linux/mfd/rtsx_pci.h |  6 ++++
 2 files changed, 54 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/rtsx_pcr.c b/drivers/mfd/rtsx_pcr.c
index 1d15735f9ef9..d01b8c249231 100644
--- a/drivers/mfd/rtsx_pcr.c
+++ b/drivers/mfd/rtsx_pcr.c
@@ -337,40 +337,64 @@ static void rtsx_pci_add_sg_tbl(struct rtsx_pcr *pcr,
 int rtsx_pci_transfer_data(struct rtsx_pcr *pcr, struct scatterlist *sglist,
 		int num_sg, bool read, int timeout)
 {
-	struct completion trans_done;
-	u8 dir;
-	int err = 0, i, count;
-	long timeleft;
-	unsigned long flags;
-	struct scatterlist *sg;
-	enum dma_data_direction dma_dir;
-	u32 val;
-	dma_addr_t addr;
-	unsigned int len;
+	int err = 0, count;
 
 	dev_dbg(&(pcr->pci->dev), "--> %s: num_sg = %d\n", __func__, num_sg);
+	count = rtsx_pci_dma_map_sg(pcr, sglist, num_sg, read);
+	if (count < 1)
+		return -EINVAL;
+	dev_dbg(&(pcr->pci->dev), "DMA mapping count: %d\n", count);
+
+	err = rtsx_pci_dma_transfer(pcr, sglist, count, read, timeout);
+
+	rtsx_pci_dma_unmap_sg(pcr, sglist, num_sg, read);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_transfer_data);
+
+int rtsx_pci_dma_map_sg(struct rtsx_pcr *pcr, struct scatterlist *sglist,
+		int num_sg, bool read)
+{
+	enum dma_data_direction dir = read ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
 
-	/* don't transfer data during abort processing */
 	if (pcr->remove_pci)
 		return -EINVAL;
 
 	if ((sglist == NULL) || (num_sg <= 0))
 		return -EINVAL;
 
-	if (read) {
-		dir = DEVICE_TO_HOST;
-		dma_dir = DMA_FROM_DEVICE;
-	} else {
-		dir = HOST_TO_DEVICE;
-		dma_dir = DMA_TO_DEVICE;
-	}
+	return dma_map_sg(&(pcr->pci->dev), sglist, num_sg, dir);
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_dma_map_sg);
 
-	count = dma_map_sg(&(pcr->pci->dev), sglist, num_sg, dma_dir);
-	if (count < 1) {
-		dev_err(&(pcr->pci->dev), "scatterlist map failed\n");
+void rtsx_pci_dma_unmap_sg(struct rtsx_pcr *pcr, struct scatterlist *sglist,
+		int num_sg, bool read)
+{
+	enum dma_data_direction dir = read ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
+
+	dma_unmap_sg(&(pcr->pci->dev), sglist, num_sg, dir);
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_dma_unmap_sg);
+
+int rtsx_pci_dma_transfer(struct rtsx_pcr *pcr, struct scatterlist *sglist,
+		int count, bool read, int timeout)
+{
+	struct completion trans_done;
+	struct scatterlist *sg;
+	dma_addr_t addr;
+	long timeleft;
+	unsigned long flags;
+	unsigned int len;
+	int i, err = 0;
+	u32 val;
+	u8 dir = read ? DEVICE_TO_HOST : HOST_TO_DEVICE;
+
+	if (pcr->remove_pci)
+		return -ENODEV;
+
+	if ((sglist == NULL) || (count < 1))
 		return -EINVAL;
-	}
-	dev_dbg(&(pcr->pci->dev), "DMA mapping count: %d\n", count);
 
 	val = ((u32)(dir & 0x01) << 29) | TRIG_DMA | ADMA_MODE;
 	pcr->sgi = 0;
@@ -400,12 +424,10 @@ int rtsx_pci_transfer_data(struct rtsx_pcr *pcr, struct scatterlist *sglist,
 	}
 
 	spin_lock_irqsave(&pcr->lock, flags);
-
 	if (pcr->trans_result == TRANS_RESULT_FAIL)
 		err = -EINVAL;
 	else if (pcr->trans_result == TRANS_NO_DEVICE)
 		err = -ENODEV;
-
 	spin_unlock_irqrestore(&pcr->lock, flags);
 
 out:
@@ -413,8 +435,6 @@ out:
 	pcr->done = NULL;
 	spin_unlock_irqrestore(&pcr->lock, flags);
 
-	dma_unmap_sg(&(pcr->pci->dev), sglist, num_sg, dma_dir);
-
 	if ((err < 0) && (err != -ENODEV))
 		rtsx_pci_stop_cmd(pcr);
 
@@ -423,7 +443,7 @@ out:
 
 	return err;
 }
-EXPORT_SYMBOL_GPL(rtsx_pci_transfer_data);
+EXPORT_SYMBOL_GPL(rtsx_pci_dma_transfer);
 
 int rtsx_pci_read_ppbuf(struct rtsx_pcr *pcr, u8 *buf, int buf_len)
 {
diff --git a/include/linux/mfd/rtsx_pci.h b/include/linux/mfd/rtsx_pci.h
index a3835976f7c6..74346d5e7899 100644
--- a/include/linux/mfd/rtsx_pci.h
+++ b/include/linux/mfd/rtsx_pci.h
@@ -943,6 +943,12 @@ void rtsx_pci_send_cmd_no_wait(struct rtsx_pcr *pcr);
 int rtsx_pci_send_cmd(struct rtsx_pcr *pcr, int timeout);
 int rtsx_pci_transfer_data(struct rtsx_pcr *pcr, struct scatterlist *sglist,
 		int num_sg, bool read, int timeout);
+int rtsx_pci_dma_map_sg(struct rtsx_pcr *pcr, struct scatterlist *sglist,
+		int num_sg, bool read);
+void rtsx_pci_dma_unmap_sg(struct rtsx_pcr *pcr, struct scatterlist *sglist,
+		int num_sg, bool read);
+int rtsx_pci_dma_transfer(struct rtsx_pcr *pcr, struct scatterlist *sglist,
+		int count, bool read, int timeout);
 int rtsx_pci_read_ppbuf(struct rtsx_pcr *pcr, u8 *buf, int buf_len);
 int rtsx_pci_write_ppbuf(struct rtsx_pcr *pcr, u8 *buf, int buf_len);
 int rtsx_pci_card_pull_ctl_enable(struct rtsx_pcr *pcr, int card);
-- 
cgit v1.2.3


From bdbc736da636956a40435f5f41d2be6af544c3fb Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Date: Mon, 2 Jun 2014 09:50:43 +0100
Subject: mfd: arizona: Lower ARIZONA_MAX_CORE_SUPPLIES to 2

There are no Arizona devices with 3 core supplies but we define a fix
array with space for 3 core supplies. Lower the ARIZONA_MAX_CORE_SUPPLIES
define to 2, to save a few bytes.

Signed-off-by: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/arizona/core.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/arizona/core.h b/include/linux/mfd/arizona/core.h
index 6d9371f88875..70854d892760 100644
--- a/include/linux/mfd/arizona/core.h
+++ b/include/linux/mfd/arizona/core.h
@@ -18,7 +18,7 @@
 #include <linux/regulator/consumer.h>
 #include <linux/mfd/arizona/pdata.h>
 
-#define ARIZONA_MAX_CORE_SUPPLIES 3
+#define ARIZONA_MAX_CORE_SUPPLIES 2
 
 enum arizona_type {
 	WM5102 = 1,
-- 
cgit v1.2.3


From 10f9edaeaa30468194e1dcd0e47e59b012f4cf8b Mon Sep 17 00:00:00 2001
From: Alexander Shiyan <shc_work@mail.ru>
Date: Wed, 18 Jun 2014 21:05:40 +0400
Subject: mfd: mc13xxx: Use regmap irq framework for interrupts

This patch convert mc13xxx MFD driver to use regmap irq framework
for interrupt registration.

Signed-off-by: Alexander Shiyan <shc_work@mail.ru>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/Kconfig         |   1 +
 drivers/mfd/mc13xxx-core.c  | 310 ++++++--------------------------------------
 drivers/mfd/mc13xxx.h       |  11 +-
 include/linux/mfd/mc13783.h |   1 -
 include/linux/mfd/mc13xxx.h |  23 +++-
 5 files changed, 65 insertions(+), 281 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 207c433074af..defe58d65940 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -187,6 +187,7 @@ config MFD_MC13XXX
 	tristate
 	depends on (SPI_MASTER || I2C)
 	select MFD_CORE
+	select REGMAP_IRQ
 	help
 	  Enable support for the Freescale MC13783 and MC13892 PMICs.
 	  This driver provides common support for accessing the device,
diff --git a/drivers/mfd/mc13xxx-core.c b/drivers/mfd/mc13xxx-core.c
index acf5dd712eb2..2b6bc868cd3d 100644
--- a/drivers/mfd/mc13xxx-core.c
+++ b/drivers/mfd/mc13xxx-core.c
@@ -10,106 +10,18 @@
  * Free Software Foundation.
  */
 
-#include <linux/slab.h>
 #include <linux/module.h>
-#include <linux/platform_device.h>
-#include <linux/mutex.h>
-#include <linux/interrupt.h>
-#include <linux/mfd/core.h>
-#include <linux/mfd/mc13xxx.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
-#include <linux/of_gpio.h>
+#include <linux/platform_device.h>
+#include <linux/mfd/core.h>
 
 #include "mc13xxx.h"
 
 #define MC13XXX_IRQSTAT0	0
-#define MC13XXX_IRQSTAT0_ADCDONEI	(1 << 0)
-#define MC13XXX_IRQSTAT0_ADCBISDONEI	(1 << 1)
-#define MC13XXX_IRQSTAT0_TSI		(1 << 2)
-#define MC13783_IRQSTAT0_WHIGHI		(1 << 3)
-#define MC13783_IRQSTAT0_WLOWI		(1 << 4)
-#define MC13XXX_IRQSTAT0_CHGDETI	(1 << 6)
-#define MC13783_IRQSTAT0_CHGOVI		(1 << 7)
-#define MC13XXX_IRQSTAT0_CHGREVI	(1 << 8)
-#define MC13XXX_IRQSTAT0_CHGSHORTI	(1 << 9)
-#define MC13XXX_IRQSTAT0_CCCVI		(1 << 10)
-#define MC13XXX_IRQSTAT0_CHGCURRI	(1 << 11)
-#define MC13XXX_IRQSTAT0_BPONI		(1 << 12)
-#define MC13XXX_IRQSTAT0_LOBATLI	(1 << 13)
-#define MC13XXX_IRQSTAT0_LOBATHI	(1 << 14)
-#define MC13783_IRQSTAT0_UDPI		(1 << 15)
-#define MC13783_IRQSTAT0_USBI		(1 << 16)
-#define MC13783_IRQSTAT0_IDI		(1 << 19)
-#define MC13783_IRQSTAT0_SE1I		(1 << 21)
-#define MC13783_IRQSTAT0_CKDETI		(1 << 22)
-#define MC13783_IRQSTAT0_UDMI		(1 << 23)
-
 #define MC13XXX_IRQMASK0	1
-#define MC13XXX_IRQMASK0_ADCDONEM	MC13XXX_IRQSTAT0_ADCDONEI
-#define MC13XXX_IRQMASK0_ADCBISDONEM	MC13XXX_IRQSTAT0_ADCBISDONEI
-#define MC13XXX_IRQMASK0_TSM		MC13XXX_IRQSTAT0_TSI
-#define MC13783_IRQMASK0_WHIGHM		MC13783_IRQSTAT0_WHIGHI
-#define MC13783_IRQMASK0_WLOWM		MC13783_IRQSTAT0_WLOWI
-#define MC13XXX_IRQMASK0_CHGDETM	MC13XXX_IRQSTAT0_CHGDETI
-#define MC13783_IRQMASK0_CHGOVM		MC13783_IRQSTAT0_CHGOVI
-#define MC13XXX_IRQMASK0_CHGREVM	MC13XXX_IRQSTAT0_CHGREVI
-#define MC13XXX_IRQMASK0_CHGSHORTM	MC13XXX_IRQSTAT0_CHGSHORTI
-#define MC13XXX_IRQMASK0_CCCVM		MC13XXX_IRQSTAT0_CCCVI
-#define MC13XXX_IRQMASK0_CHGCURRM	MC13XXX_IRQSTAT0_CHGCURRI
-#define MC13XXX_IRQMASK0_BPONM		MC13XXX_IRQSTAT0_BPONI
-#define MC13XXX_IRQMASK0_LOBATLM	MC13XXX_IRQSTAT0_LOBATLI
-#define MC13XXX_IRQMASK0_LOBATHM	MC13XXX_IRQSTAT0_LOBATHI
-#define MC13783_IRQMASK0_UDPM		MC13783_IRQSTAT0_UDPI
-#define MC13783_IRQMASK0_USBM		MC13783_IRQSTAT0_USBI
-#define MC13783_IRQMASK0_IDM		MC13783_IRQSTAT0_IDI
-#define MC13783_IRQMASK0_SE1M		MC13783_IRQSTAT0_SE1I
-#define MC13783_IRQMASK0_CKDETM		MC13783_IRQSTAT0_CKDETI
-#define MC13783_IRQMASK0_UDMM		MC13783_IRQSTAT0_UDMI
-
 #define MC13XXX_IRQSTAT1	3
-#define MC13XXX_IRQSTAT1_1HZI		(1 << 0)
-#define MC13XXX_IRQSTAT1_TODAI		(1 << 1)
-#define MC13783_IRQSTAT1_ONOFD1I	(1 << 3)
-#define MC13783_IRQSTAT1_ONOFD2I	(1 << 4)
-#define MC13783_IRQSTAT1_ONOFD3I	(1 << 5)
-#define MC13XXX_IRQSTAT1_SYSRSTI	(1 << 6)
-#define MC13XXX_IRQSTAT1_RTCRSTI	(1 << 7)
-#define MC13XXX_IRQSTAT1_PCI		(1 << 8)
-#define MC13XXX_IRQSTAT1_WARMI		(1 << 9)
-#define MC13XXX_IRQSTAT1_MEMHLDI	(1 << 10)
-#define MC13783_IRQSTAT1_PWRRDYI	(1 << 11)
-#define MC13XXX_IRQSTAT1_THWARNLI	(1 << 12)
-#define MC13XXX_IRQSTAT1_THWARNHI	(1 << 13)
-#define MC13XXX_IRQSTAT1_CLKI		(1 << 14)
-#define MC13783_IRQSTAT1_SEMAFI		(1 << 15)
-#define MC13783_IRQSTAT1_MC2BI		(1 << 17)
-#define MC13783_IRQSTAT1_HSDETI		(1 << 18)
-#define MC13783_IRQSTAT1_HSLI		(1 << 19)
-#define MC13783_IRQSTAT1_ALSPTHI	(1 << 20)
-#define MC13783_IRQSTAT1_AHSSHORTI	(1 << 21)
-
 #define MC13XXX_IRQMASK1	4
-#define MC13XXX_IRQMASK1_1HZM		MC13XXX_IRQSTAT1_1HZI
-#define MC13XXX_IRQMASK1_TODAM		MC13XXX_IRQSTAT1_TODAI
-#define MC13783_IRQMASK1_ONOFD1M	MC13783_IRQSTAT1_ONOFD1I
-#define MC13783_IRQMASK1_ONOFD2M	MC13783_IRQSTAT1_ONOFD2I
-#define MC13783_IRQMASK1_ONOFD3M	MC13783_IRQSTAT1_ONOFD3I
-#define MC13XXX_IRQMASK1_SYSRSTM	MC13XXX_IRQSTAT1_SYSRSTI
-#define MC13XXX_IRQMASK1_RTCRSTM	MC13XXX_IRQSTAT1_RTCRSTI
-#define MC13XXX_IRQMASK1_PCM		MC13XXX_IRQSTAT1_PCI
-#define MC13XXX_IRQMASK1_WARMM		MC13XXX_IRQSTAT1_WARMI
-#define MC13XXX_IRQMASK1_MEMHLDM	MC13XXX_IRQSTAT1_MEMHLDI
-#define MC13783_IRQMASK1_PWRRDYM	MC13783_IRQSTAT1_PWRRDYI
-#define MC13XXX_IRQMASK1_THWARNLM	MC13XXX_IRQSTAT1_THWARNLI
-#define MC13XXX_IRQMASK1_THWARNHM	MC13XXX_IRQSTAT1_THWARNHI
-#define MC13XXX_IRQMASK1_CLKM		MC13XXX_IRQSTAT1_CLKI
-#define MC13783_IRQMASK1_SEMAFM		MC13783_IRQSTAT1_SEMAFI
-#define MC13783_IRQMASK1_MC2BM		MC13783_IRQSTAT1_MC2BI
-#define MC13783_IRQMASK1_HSDETM		MC13783_IRQSTAT1_HSDETI
-#define MC13783_IRQMASK1_HSLM		MC13783_IRQSTAT1_HSLI
-#define MC13783_IRQMASK1_ALSPTHM	MC13783_IRQSTAT1_ALSPTHI
-#define MC13783_IRQMASK1_AHSSHORTM	MC13783_IRQSTAT1_AHSSHORTI
 
 #define MC13XXX_REVISION	7
 #define MC13XXX_REVISION_REVMETAL	(0x07 <<  0)
@@ -189,45 +101,21 @@ EXPORT_SYMBOL(mc13xxx_reg_rmw);
 
 int mc13xxx_irq_mask(struct mc13xxx *mc13xxx, int irq)
 {
-	int ret;
-	unsigned int offmask = irq < 24 ? MC13XXX_IRQMASK0 : MC13XXX_IRQMASK1;
-	u32 irqbit = 1 << (irq < 24 ? irq : irq - 24);
-	u32 mask;
-
-	if (irq < 0 || irq >= MC13XXX_NUM_IRQ)
-		return -EINVAL;
-
-	ret = mc13xxx_reg_read(mc13xxx, offmask, &mask);
-	if (ret)
-		return ret;
+	int virq = regmap_irq_get_virq(mc13xxx->irq_data, irq);
 
-	if (mask & irqbit)
-		/* already masked */
-		return 0;
+	disable_irq_nosync(virq);
 
-	return mc13xxx_reg_write(mc13xxx, offmask, mask | irqbit);
+	return 0;
 }
 EXPORT_SYMBOL(mc13xxx_irq_mask);
 
 int mc13xxx_irq_unmask(struct mc13xxx *mc13xxx, int irq)
 {
-	int ret;
-	unsigned int offmask = irq < 24 ? MC13XXX_IRQMASK0 : MC13XXX_IRQMASK1;
-	u32 irqbit = 1 << (irq < 24 ? irq : irq - 24);
-	u32 mask;
-
-	if (irq < 0 || irq >= MC13XXX_NUM_IRQ)
-		return -EINVAL;
+	int virq = regmap_irq_get_virq(mc13xxx->irq_data, irq);
 
-	ret = mc13xxx_reg_read(mc13xxx, offmask, &mask);
-	if (ret)
-		return ret;
+	enable_irq(virq);
 
-	if (!(mask & irqbit))
-		/* already unmasked */
-		return 0;
-
-	return mc13xxx_reg_write(mc13xxx, offmask, mask & ~irqbit);
+	return 0;
 }
 EXPORT_SYMBOL(mc13xxx_irq_unmask);
 
@@ -239,7 +127,7 @@ int mc13xxx_irq_status(struct mc13xxx *mc13xxx, int irq,
 	unsigned int offstat = irq < 24 ? MC13XXX_IRQSTAT0 : MC13XXX_IRQSTAT1;
 	u32 irqbit = 1 << (irq < 24 ? irq : irq - 24);
 
-	if (irq < 0 || irq >= MC13XXX_NUM_IRQ)
+	if (irq < 0 || irq >= ARRAY_SIZE(mc13xxx->irqs))
 		return -EINVAL;
 
 	if (enabled) {
@@ -266,147 +154,26 @@ int mc13xxx_irq_status(struct mc13xxx *mc13xxx, int irq,
 }
 EXPORT_SYMBOL(mc13xxx_irq_status);
 
-int mc13xxx_irq_ack(struct mc13xxx *mc13xxx, int irq)
-{
-	unsigned int offstat = irq < 24 ? MC13XXX_IRQSTAT0 : MC13XXX_IRQSTAT1;
-	unsigned int val = 1 << (irq < 24 ? irq : irq - 24);
-
-	BUG_ON(irq < 0 || irq >= MC13XXX_NUM_IRQ);
-
-	return mc13xxx_reg_write(mc13xxx, offstat, val);
-}
-EXPORT_SYMBOL(mc13xxx_irq_ack);
-
-int mc13xxx_irq_request_nounmask(struct mc13xxx *mc13xxx, int irq,
-		irq_handler_t handler, const char *name, void *dev)
-{
-	BUG_ON(!mutex_is_locked(&mc13xxx->lock));
-	BUG_ON(!handler);
-
-	if (irq < 0 || irq >= MC13XXX_NUM_IRQ)
-		return -EINVAL;
-
-	if (mc13xxx->irqhandler[irq])
-		return -EBUSY;
-
-	mc13xxx->irqhandler[irq] = handler;
-	mc13xxx->irqdata[irq] = dev;
-
-	return 0;
-}
-EXPORT_SYMBOL(mc13xxx_irq_request_nounmask);
-
 int mc13xxx_irq_request(struct mc13xxx *mc13xxx, int irq,
 		irq_handler_t handler, const char *name, void *dev)
 {
-	int ret;
+	int virq = regmap_irq_get_virq(mc13xxx->irq_data, irq);
 
-	ret = mc13xxx_irq_request_nounmask(mc13xxx, irq, handler, name, dev);
-	if (ret)
-		return ret;
-
-	ret = mc13xxx_irq_unmask(mc13xxx, irq);
-	if (ret) {
-		mc13xxx->irqhandler[irq] = NULL;
-		mc13xxx->irqdata[irq] = NULL;
-		return ret;
-	}
-
-	return 0;
+	return devm_request_threaded_irq(mc13xxx->dev, virq, NULL, handler,
+					 0, name, dev);
 }
 EXPORT_SYMBOL(mc13xxx_irq_request);
 
 int mc13xxx_irq_free(struct mc13xxx *mc13xxx, int irq, void *dev)
 {
-	int ret;
-	BUG_ON(!mutex_is_locked(&mc13xxx->lock));
+	int virq = regmap_irq_get_virq(mc13xxx->irq_data, irq);
 
-	if (irq < 0 || irq >= MC13XXX_NUM_IRQ || !mc13xxx->irqhandler[irq] ||
-			mc13xxx->irqdata[irq] != dev)
-		return -EINVAL;
-
-	ret = mc13xxx_irq_mask(mc13xxx, irq);
-	if (ret)
-		return ret;
-
-	mc13xxx->irqhandler[irq] = NULL;
-	mc13xxx->irqdata[irq] = NULL;
+	devm_free_irq(mc13xxx->dev, virq, dev);
 
 	return 0;
 }
 EXPORT_SYMBOL(mc13xxx_irq_free);
 
-static inline irqreturn_t mc13xxx_irqhandler(struct mc13xxx *mc13xxx, int irq)
-{
-	return mc13xxx->irqhandler[irq](irq, mc13xxx->irqdata[irq]);
-}
-
-/*
- * returns: number of handled irqs or negative error
- * locking: holds mc13xxx->lock
- */
-static int mc13xxx_irq_handle(struct mc13xxx *mc13xxx,
-		unsigned int offstat, unsigned int offmask, int baseirq)
-{
-	u32 stat, mask;
-	int ret = mc13xxx_reg_read(mc13xxx, offstat, &stat);
-	int num_handled = 0;
-
-	if (ret)
-		return ret;
-
-	ret = mc13xxx_reg_read(mc13xxx, offmask, &mask);
-	if (ret)
-		return ret;
-
-	while (stat & ~mask) {
-		int irq = __ffs(stat & ~mask);
-
-		stat &= ~(1 << irq);
-
-		if (likely(mc13xxx->irqhandler[baseirq + irq])) {
-			irqreturn_t handled;
-
-			handled = mc13xxx_irqhandler(mc13xxx, baseirq + irq);
-			if (handled == IRQ_HANDLED)
-				num_handled++;
-		} else {
-			dev_err(mc13xxx->dev,
-					"BUG: irq %u but no handler\n",
-					baseirq + irq);
-
-			mask |= 1 << irq;
-
-			ret = mc13xxx_reg_write(mc13xxx, offmask, mask);
-		}
-	}
-
-	return num_handled;
-}
-
-static irqreturn_t mc13xxx_irq_thread(int irq, void *data)
-{
-	struct mc13xxx *mc13xxx = data;
-	irqreturn_t ret;
-	int handled = 0;
-
-	mc13xxx_lock(mc13xxx);
-
-	ret = mc13xxx_irq_handle(mc13xxx, MC13XXX_IRQSTAT0,
-			MC13XXX_IRQMASK0, 0);
-	if (ret > 0)
-		handled = 1;
-
-	ret = mc13xxx_irq_handle(mc13xxx, MC13XXX_IRQSTAT1,
-			MC13XXX_IRQMASK1, 24);
-	if (ret > 0)
-		handled = 1;
-
-	mc13xxx_unlock(mc13xxx);
-
-	return IRQ_RETVAL(handled);
-}
-
 #define maskval(reg, mask)	(((reg) & (mask)) >> __ffs(mask))
 static void mc13xxx_print_revision(struct mc13xxx *mc13xxx, u32 revision)
 {
@@ -475,8 +242,6 @@ static irqreturn_t mc13xxx_handler_adcdone(int irq, void *data)
 {
 	struct mc13xxx_adcdone_data *adcdone_data = data;
 
-	mc13xxx_irq_ack(adcdone_data->mc13xxx, irq);
-
 	complete_all(&adcdone_data->done);
 
 	return IRQ_HANDLED;
@@ -544,7 +309,6 @@ int mc13xxx_adc_do_conversion(struct mc13xxx *mc13xxx, unsigned int mode,
 	dev_dbg(mc13xxx->dev, "%s: request irq\n", __func__);
 	mc13xxx_irq_request(mc13xxx, MC13XXX_IRQ_ADCDONE,
 			mc13xxx_handler_adcdone, __func__, &adcdone_data);
-	mc13xxx_irq_ack(mc13xxx, MC13XXX_IRQ_ADCDONE);
 
 	mc13xxx_reg_write(mc13xxx, MC13XXX_ADC0, adc0);
 	mc13xxx_reg_write(mc13xxx, MC13XXX_ADC1, adc1);
@@ -599,7 +363,8 @@ static int mc13xxx_add_subdevice_pdata(struct mc13xxx *mc13xxx,
 	if (!cell.name)
 		return -ENOMEM;
 
-	return mfd_add_devices(mc13xxx->dev, -1, &cell, 1, NULL, 0, NULL);
+	return mfd_add_devices(mc13xxx->dev, -1, &cell, 1, NULL, 0,
+			       regmap_irq_get_domain(mc13xxx->irq_data));
 }
 
 static int mc13xxx_add_subdevice(struct mc13xxx *mc13xxx, const char *format)
@@ -640,8 +405,8 @@ int mc13xxx_common_init(struct device *dev)
 {
 	struct mc13xxx_platform_data *pdata = dev_get_platdata(dev);
 	struct mc13xxx *mc13xxx = dev_get_drvdata(dev);
-	int ret;
 	u32 revision;
+	int i, ret;
 
 	mc13xxx->dev = dev;
 
@@ -651,31 +416,32 @@ int mc13xxx_common_init(struct device *dev)
 
 	mc13xxx->variant->print_revision(mc13xxx, revision);
 
-	/* mask all irqs */
-	ret = mc13xxx_reg_write(mc13xxx, MC13XXX_IRQMASK0, 0x00ffffff);
-	if (ret)
-		return ret;
+	for (i = 0; i < ARRAY_SIZE(mc13xxx->irqs); i++) {
+		mc13xxx->irqs[i].reg_offset = i / MC13XXX_IRQ_PER_REG;
+		mc13xxx->irqs[i].mask = BIT(i % MC13XXX_IRQ_PER_REG);
+	}
 
-	ret = mc13xxx_reg_write(mc13xxx, MC13XXX_IRQMASK1, 0x00ffffff);
+	mc13xxx->irq_chip.name = dev_name(dev);
+	mc13xxx->irq_chip.status_base = MC13XXX_IRQSTAT0;
+	mc13xxx->irq_chip.mask_base = MC13XXX_IRQMASK0;
+	mc13xxx->irq_chip.ack_base = MC13XXX_IRQSTAT0;
+	mc13xxx->irq_chip.irq_reg_stride = MC13XXX_IRQSTAT1 - MC13XXX_IRQSTAT0;
+	mc13xxx->irq_chip.init_ack_masked = true;
+	mc13xxx->irq_chip.use_ack = true;
+	mc13xxx->irq_chip.num_regs = MC13XXX_IRQ_REG_CNT;
+	mc13xxx->irq_chip.irqs = mc13xxx->irqs;
+	mc13xxx->irq_chip.num_irqs = ARRAY_SIZE(mc13xxx->irqs);
+
+	ret = regmap_add_irq_chip(mc13xxx->regmap, mc13xxx->irq, IRQF_ONESHOT,
+				  0, &mc13xxx->irq_chip, &mc13xxx->irq_data);
 	if (ret)
 		return ret;
 
 	mutex_init(&mc13xxx->lock);
 
-	ret = request_threaded_irq(mc13xxx->irq, NULL, mc13xxx_irq_thread,
-			IRQF_ONESHOT | IRQF_TRIGGER_HIGH, "mc13xxx", mc13xxx);
-	if (ret)
-		return ret;
-
 	if (mc13xxx_probe_flags_dt(mc13xxx) < 0 && pdata)
 		mc13xxx->flags = pdata->flags;
 
-	if (mc13xxx->flags & MC13XXX_USE_ADC)
-		mc13xxx_add_subdevice(mc13xxx, "%s-adc");
-
-	if (mc13xxx->flags & MC13XXX_USE_RTC)
-		mc13xxx_add_subdevice(mc13xxx, "%s-rtc");
-
 	if (pdata) {
 		mc13xxx_add_subdevice_pdata(mc13xxx, "%s-regulator",
 			&pdata->regulators, sizeof(pdata->regulators));
@@ -699,6 +465,12 @@ int mc13xxx_common_init(struct device *dev)
 			mc13xxx_add_subdevice(mc13xxx, "%s-ts");
 	}
 
+	if (mc13xxx->flags & MC13XXX_USE_ADC)
+		mc13xxx_add_subdevice(mc13xxx, "%s-adc");
+
+	if (mc13xxx->flags & MC13XXX_USE_RTC)
+		mc13xxx_add_subdevice(mc13xxx, "%s-rtc");
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(mc13xxx_common_init);
@@ -707,8 +479,8 @@ int mc13xxx_common_exit(struct device *dev)
 {
 	struct mc13xxx *mc13xxx = dev_get_drvdata(dev);
 
-	free_irq(mc13xxx->irq, mc13xxx);
 	mfd_remove_devices(dev);
+	regmap_del_irq_chip(mc13xxx->irq, mc13xxx->irq_data);
 	mutex_destroy(&mc13xxx->lock);
 
 	return 0;
diff --git a/drivers/mfd/mc13xxx.h b/drivers/mfd/mc13xxx.h
index ae7f1659f5d1..33677d1dcf66 100644
--- a/drivers/mfd/mc13xxx.h
+++ b/drivers/mfd/mc13xxx.h
@@ -13,7 +13,9 @@
 #include <linux/regmap.h>
 #include <linux/mfd/mc13xxx.h>
 
-#define MC13XXX_NUMREGS 0x3f
+#define MC13XXX_NUMREGS		0x3f
+#define MC13XXX_IRQ_REG_CNT	2
+#define MC13XXX_IRQ_PER_REG	24
 
 struct mc13xxx;
 
@@ -33,13 +35,14 @@ struct mc13xxx {
 	struct device *dev;
 	const struct mc13xxx_variant *variant;
 
+	struct regmap_irq irqs[MC13XXX_IRQ_PER_REG * MC13XXX_IRQ_REG_CNT];
+	struct regmap_irq_chip irq_chip;
+	struct regmap_irq_chip_data *irq_data;
+
 	struct mutex lock;
 	int irq;
 	int flags;
 
-	irq_handler_t irqhandler[MC13XXX_NUM_IRQ];
-	void *irqdata[MC13XXX_NUM_IRQ];
-
 	int adcflags;
 };
 
diff --git a/include/linux/mfd/mc13783.h b/include/linux/mfd/mc13783.h
index a8eeda773a7b..4ff6137d8d67 100644
--- a/include/linux/mfd/mc13783.h
+++ b/include/linux/mfd/mc13783.h
@@ -86,6 +86,5 @@
 #define MC13783_IRQ_HSL		43
 #define MC13783_IRQ_ALSPTH	44
 #define MC13783_IRQ_AHSSHORT	45
-#define MC13783_NUM_IRQ		MC13XXX_NUM_IRQ
 
 #endif /* ifndef __LINUX_MFD_MC13783_H */
diff --git a/include/linux/mfd/mc13xxx.h b/include/linux/mfd/mc13xxx.h
index d63b1d309106..638222e43e48 100644
--- a/include/linux/mfd/mc13xxx.h
+++ b/include/linux/mfd/mc13xxx.h
@@ -23,15 +23,10 @@ int mc13xxx_reg_rmw(struct mc13xxx *mc13xxx, unsigned int offset,
 
 int mc13xxx_irq_request(struct mc13xxx *mc13xxx, int irq,
 		irq_handler_t handler, const char *name, void *dev);
-int mc13xxx_irq_request_nounmask(struct mc13xxx *mc13xxx, int irq,
-		irq_handler_t handler, const char *name, void *dev);
 int mc13xxx_irq_free(struct mc13xxx *mc13xxx, int irq, void *dev);
 
-int mc13xxx_irq_mask(struct mc13xxx *mc13xxx, int irq);
-int mc13xxx_irq_unmask(struct mc13xxx *mc13xxx, int irq);
 int mc13xxx_irq_status(struct mc13xxx *mc13xxx, int irq,
 		int *enabled, int *pending);
-int mc13xxx_irq_ack(struct mc13xxx *mc13xxx, int irq);
 
 int mc13xxx_get_flags(struct mc13xxx *mc13xxx);
 
@@ -39,6 +34,22 @@ int mc13xxx_adc_do_conversion(struct mc13xxx *mc13xxx,
 		unsigned int mode, unsigned int channel,
 		u8 ato, bool atox, unsigned int *sample);
 
+/* Deprecated calls */
+static inline int mc13xxx_irq_ack(struct mc13xxx *mc13xxx, int irq)
+{
+	return 0;
+}
+
+static inline int mc13xxx_irq_request_nounmask(struct mc13xxx *mc13xxx, int irq,
+					       irq_handler_t handler,
+					       const char *name, void *dev)
+{
+	return mc13xxx_irq_request(mc13xxx, irq, handler, name, dev);
+}
+
+int mc13xxx_irq_mask(struct mc13xxx *mc13xxx, int irq);
+int mc13xxx_irq_unmask(struct mc13xxx *mc13xxx, int irq);
+
 #define MC13783_AUDIO_RX0	36
 #define MC13783_AUDIO_RX1	37
 #define MC13783_AUDIO_TX	38
@@ -68,8 +79,6 @@ int mc13xxx_adc_do_conversion(struct mc13xxx *mc13xxx,
 #define MC13XXX_IRQ_THWARNH	37
 #define MC13XXX_IRQ_CLK		38
 
-#define MC13XXX_NUM_IRQ		46
-
 struct regulator_init_data;
 
 struct mc13xxx_regulator_init_data {
-- 
cgit v1.2.3


From 54e8827d5f0e66d152ef63e7958030ef4880cd85 Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Wed, 25 Jun 2014 16:14:44 +0900
Subject: mfd: sec-core: Add support for S2MPU02 device

Add support for Samsung S2MPU02 PMIC device to the MFD sec-core driver.
The S2MPU02 device includes PMIC/RTC/Clock devices.

Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
Reviewed-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/sec-core.c              |  46 ++++++---
 drivers/mfd/sec-irq.c               | 110 +++++++++++++++++---
 include/linux/mfd/samsung/core.h    |   1 +
 include/linux/mfd/samsung/irq.h     |  24 +++++
 include/linux/mfd/samsung/s2mpu02.h | 201 ++++++++++++++++++++++++++++++++++++
 5 files changed, 352 insertions(+), 30 deletions(-)
 create mode 100644 include/linux/mfd/samsung/s2mpu02.h

(limited to 'include/linux')

diff --git a/drivers/mfd/sec-core.c b/drivers/mfd/sec-core.c
index be06d0abbf19..15ba847b3d23 100644
--- a/drivers/mfd/sec-core.c
+++ b/drivers/mfd/sec-core.c
@@ -89,6 +89,15 @@ static const struct mfd_cell s2mpa01_devs[] = {
 	},
 };
 
+static const struct mfd_cell s2mpu02_devs[] = {
+	{ .name = "s2mpu02-pmic", },
+	{ .name = "s2mpu02-rtc", },
+	{
+		.name = "s2mpu02-clk",
+		.of_compatible = "samsung,s2mpu02-clk",
+	}
+};
+
 #ifdef CONFIG_OF
 static const struct of_device_id sec_dt_match[] = {
 	{	.compatible = "samsung,s5m8767-pmic",
@@ -102,6 +111,9 @@ static const struct of_device_id sec_dt_match[] = {
 	}, {
 		.compatible = "samsung,s2mpa01-pmic",
 		.data = (void *)S2MPA01,
+	}, {
+		.compatible = "samsung,s2mpu02-pmic",
+		.data = (void *)S2MPU02,
 	}, {
 		/* Sentinel */
 	},
@@ -250,9 +262,10 @@ static int sec_pmic_probe(struct i2c_client *i2c,
 {
 	struct sec_platform_data *pdata = dev_get_platdata(&i2c->dev);
 	const struct regmap_config *regmap;
+	const struct mfd_cell *sec_devs;
 	struct sec_pmic_dev *sec_pmic;
 	unsigned long device_type;
-	int ret;
+	int ret, num_sec_devs;
 
 	sec_pmic = devm_kzalloc(&i2c->dev, sizeof(struct sec_pmic_dev),
 				GFP_KERNEL);
@@ -319,34 +332,39 @@ static int sec_pmic_probe(struct i2c_client *i2c,
 
 	switch (sec_pmic->device_type) {
 	case S5M8751X:
-		ret = mfd_add_devices(sec_pmic->dev, -1, s5m8751_devs,
-				      ARRAY_SIZE(s5m8751_devs), NULL, 0, NULL);
+		sec_devs = s5m8751_devs;
+		num_sec_devs = ARRAY_SIZE(s5m8751_devs);
 		break;
 	case S5M8763X:
-		ret = mfd_add_devices(sec_pmic->dev, -1, s5m8763_devs,
-				      ARRAY_SIZE(s5m8763_devs), NULL, 0, NULL);
+		sec_devs = s5m8763_devs;
+		num_sec_devs = ARRAY_SIZE(s5m8763_devs);
 		break;
 	case S5M8767X:
-		ret = mfd_add_devices(sec_pmic->dev, -1, s5m8767_devs,
-				      ARRAY_SIZE(s5m8767_devs), NULL, 0, NULL);
+		sec_devs = s5m8767_devs;
+		num_sec_devs = ARRAY_SIZE(s5m8767_devs);
 		break;
 	case S2MPA01:
-		ret = mfd_add_devices(sec_pmic->dev, -1, s2mpa01_devs,
-				      ARRAY_SIZE(s2mpa01_devs), NULL, 0, NULL);
+		sec_devs = s2mpa01_devs;
+		num_sec_devs = ARRAY_SIZE(s2mpa01_devs);
 		break;
 	case S2MPS11X:
-		ret = mfd_add_devices(sec_pmic->dev, -1, s2mps11_devs,
-				      ARRAY_SIZE(s2mps11_devs), NULL, 0, NULL);
+		sec_devs = s2mps11_devs;
+		num_sec_devs = ARRAY_SIZE(s2mps11_devs);
 		break;
 	case S2MPS14X:
-		ret = mfd_add_devices(sec_pmic->dev, -1, s2mps14_devs,
-				      ARRAY_SIZE(s2mps14_devs), NULL, 0, NULL);
+		sec_devs = s2mps14_devs;
+		num_sec_devs = ARRAY_SIZE(s2mps14_devs);
+		break;
+	case S2MPU02:
+		sec_devs = s2mpu02_devs;
+		num_sec_devs = ARRAY_SIZE(s2mpu02_devs);
 		break;
 	default:
 		/* If this happens the probe function is problem */
 		BUG();
 	}
-
+	ret = mfd_add_devices(sec_pmic->dev, -1, sec_devs, num_sec_devs, NULL,
+			      0, NULL);
 	if (ret)
 		goto err_mfd;
 
diff --git a/drivers/mfd/sec-irq.c b/drivers/mfd/sec-irq.c
index 654e2c1dbf7a..f9a57869e3ec 100644
--- a/drivers/mfd/sec-irq.c
+++ b/drivers/mfd/sec-irq.c
@@ -20,6 +20,7 @@
 #include <linux/mfd/samsung/irq.h>
 #include <linux/mfd/samsung/s2mps11.h>
 #include <linux/mfd/samsung/s2mps14.h>
+#include <linux/mfd/samsung/s2mpu02.h>
 #include <linux/mfd/samsung/s5m8763.h>
 #include <linux/mfd/samsung/s5m8767.h>
 
@@ -161,6 +162,77 @@ static const struct regmap_irq s2mps14_irqs[] = {
 	},
 };
 
+static const struct regmap_irq s2mpu02_irqs[] = {
+	[S2MPU02_IRQ_PWRONF] = {
+		.reg_offset = 0,
+		.mask = S2MPS11_IRQ_PWRONF_MASK,
+	},
+	[S2MPU02_IRQ_PWRONR] = {
+		.reg_offset = 0,
+		.mask = S2MPS11_IRQ_PWRONR_MASK,
+	},
+	[S2MPU02_IRQ_JIGONBF] = {
+		.reg_offset = 0,
+		.mask = S2MPS11_IRQ_JIGONBF_MASK,
+	},
+	[S2MPU02_IRQ_JIGONBR] = {
+		.reg_offset = 0,
+		.mask = S2MPS11_IRQ_JIGONBR_MASK,
+	},
+	[S2MPU02_IRQ_ACOKBF] = {
+		.reg_offset = 0,
+		.mask = S2MPS11_IRQ_ACOKBF_MASK,
+	},
+	[S2MPU02_IRQ_ACOKBR] = {
+		.reg_offset = 0,
+		.mask = S2MPS11_IRQ_ACOKBR_MASK,
+	},
+	[S2MPU02_IRQ_PWRON1S] = {
+		.reg_offset = 0,
+		.mask = S2MPS11_IRQ_PWRON1S_MASK,
+	},
+	[S2MPU02_IRQ_MRB] = {
+		.reg_offset = 0,
+		.mask = S2MPS11_IRQ_MRB_MASK,
+	},
+	[S2MPU02_IRQ_RTC60S] = {
+		.reg_offset = 1,
+		.mask = S2MPS11_IRQ_RTC60S_MASK,
+	},
+	[S2MPU02_IRQ_RTCA1] = {
+		.reg_offset = 1,
+		.mask = S2MPS11_IRQ_RTCA1_MASK,
+	},
+	[S2MPU02_IRQ_RTCA0] = {
+		.reg_offset = 1,
+		.mask = S2MPS11_IRQ_RTCA0_MASK,
+	},
+	[S2MPU02_IRQ_SMPL] = {
+		.reg_offset = 1,
+		.mask = S2MPS11_IRQ_SMPL_MASK,
+	},
+	[S2MPU02_IRQ_RTC1S] = {
+		.reg_offset = 1,
+		.mask = S2MPS11_IRQ_RTC1S_MASK,
+	},
+	[S2MPU02_IRQ_WTSR] = {
+		.reg_offset = 1,
+		.mask = S2MPS11_IRQ_WTSR_MASK,
+	},
+	[S2MPU02_IRQ_INT120C] = {
+		.reg_offset = 2,
+		.mask = S2MPS11_IRQ_INT120C_MASK,
+	},
+	[S2MPU02_IRQ_INT140C] = {
+		.reg_offset = 2,
+		.mask = S2MPS11_IRQ_INT140C_MASK,
+	},
+	[S2MPU02_IRQ_TSD] = {
+		.reg_offset = 2,
+		.mask = S2MPS14_IRQ_TSD_MASK,
+	},
+};
+
 static const struct regmap_irq s5m8767_irqs[] = {
 	[S5M8767_IRQ_PWRR] = {
 		.reg_offset = 0,
@@ -327,6 +399,16 @@ static const struct regmap_irq_chip s2mps14_irq_chip = {
 	.ack_base = S2MPS14_REG_INT1,
 };
 
+static const struct regmap_irq_chip s2mpu02_irq_chip = {
+	.name = "s2mpu02",
+	.irqs = s2mpu02_irqs,
+	.num_irqs = ARRAY_SIZE(s2mpu02_irqs),
+	.num_regs = 3,
+	.status_base = S2MPU02_REG_INT1,
+	.mask_base = S2MPU02_REG_INT1M,
+	.ack_base = S2MPU02_REG_INT1,
+};
+
 static const struct regmap_irq_chip s5m8767_irq_chip = {
 	.name = "s5m8767",
 	.irqs = s5m8767_irqs,
@@ -351,6 +433,7 @@ int sec_irq_init(struct sec_pmic_dev *sec_pmic)
 {
 	int ret = 0;
 	int type = sec_pmic->device_type;
+	const struct regmap_irq_chip *sec_irq_chip;
 
 	if (!sec_pmic->irq) {
 		dev_warn(sec_pmic->dev,
@@ -361,28 +444,19 @@ int sec_irq_init(struct sec_pmic_dev *sec_pmic)
 
 	switch (type) {
 	case S5M8763X:
-		ret = regmap_add_irq_chip(sec_pmic->regmap_pmic, sec_pmic->irq,
-				  IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
-				  sec_pmic->irq_base, &s5m8763_irq_chip,
-				  &sec_pmic->irq_data);
+		sec_irq_chip = &s5m8763_irq_chip;
 		break;
 	case S5M8767X:
-		ret = regmap_add_irq_chip(sec_pmic->regmap_pmic, sec_pmic->irq,
-				  IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
-				  sec_pmic->irq_base, &s5m8767_irq_chip,
-				  &sec_pmic->irq_data);
+		sec_irq_chip = &s5m8767_irq_chip;
 		break;
 	case S2MPS11X:
-		ret = regmap_add_irq_chip(sec_pmic->regmap_pmic, sec_pmic->irq,
-				  IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
-				  sec_pmic->irq_base, &s2mps11_irq_chip,
-				  &sec_pmic->irq_data);
+		sec_irq_chip = &s2mps11_irq_chip;
 		break;
 	case S2MPS14X:
-		ret = regmap_add_irq_chip(sec_pmic->regmap_pmic, sec_pmic->irq,
-				  IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
-				  sec_pmic->irq_base, &s2mps14_irq_chip,
-				  &sec_pmic->irq_data);
+		sec_irq_chip = &s2mps14_irq_chip;
+		break;
+	case S2MPU02:
+		sec_irq_chip = &s2mpu02_irq_chip;
 		break;
 	default:
 		dev_err(sec_pmic->dev, "Unknown device type %lu\n",
@@ -390,6 +464,10 @@ int sec_irq_init(struct sec_pmic_dev *sec_pmic)
 		return -EINVAL;
 	}
 
+	ret = regmap_add_irq_chip(sec_pmic->regmap_pmic, sec_pmic->irq,
+			  IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
+			  sec_pmic->irq_base, sec_irq_chip,
+			  &sec_pmic->irq_data);
 	if (ret != 0) {
 		dev_err(sec_pmic->dev, "Failed to register IRQ chip: %d\n", ret);
 		return ret;
diff --git a/include/linux/mfd/samsung/core.h b/include/linux/mfd/samsung/core.h
index 47d84242940b..b5f73de81aad 100644
--- a/include/linux/mfd/samsung/core.h
+++ b/include/linux/mfd/samsung/core.h
@@ -21,6 +21,7 @@ enum sec_device_type {
 	S2MPA01,
 	S2MPS11X,
 	S2MPS14X,
+	S2MPU02,
 };
 
 /**
diff --git a/include/linux/mfd/samsung/irq.h b/include/linux/mfd/samsung/irq.h
index 1224f447356b..f35af7361b60 100644
--- a/include/linux/mfd/samsung/irq.h
+++ b/include/linux/mfd/samsung/irq.h
@@ -129,6 +129,30 @@ enum s2mps14_irq {
 	S2MPS14_IRQ_NR,
 };
 
+enum s2mpu02_irq {
+	S2MPU02_IRQ_PWRONF,
+	S2MPU02_IRQ_PWRONR,
+	S2MPU02_IRQ_JIGONBF,
+	S2MPU02_IRQ_JIGONBR,
+	S2MPU02_IRQ_ACOKBF,
+	S2MPU02_IRQ_ACOKBR,
+	S2MPU02_IRQ_PWRON1S,
+	S2MPU02_IRQ_MRB,
+
+	S2MPU02_IRQ_RTC60S,
+	S2MPU02_IRQ_RTCA1,
+	S2MPU02_IRQ_RTCA0,
+	S2MPU02_IRQ_SMPL,
+	S2MPU02_IRQ_RTC1S,
+	S2MPU02_IRQ_WTSR,
+
+	S2MPU02_IRQ_INT120C,
+	S2MPU02_IRQ_INT140C,
+	S2MPU02_IRQ_TSD,
+
+	S2MPU02_IRQ_NR,
+};
+
 /* Masks for interrupts are the same as in s2mps11 */
 #define S2MPS14_IRQ_TSD_MASK		(1 << 2)
 
diff --git a/include/linux/mfd/samsung/s2mpu02.h b/include/linux/mfd/samsung/s2mpu02.h
new file mode 100644
index 000000000000..47ae9bc583a7
--- /dev/null
+++ b/include/linux/mfd/samsung/s2mpu02.h
@@ -0,0 +1,201 @@
+/*
+ * s2mpu02.h
+ *
+ * Copyright (c) 2014 Samsung Electronics Co., Ltd
+ *              http://www.samsung.com
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __LINUX_MFD_S2MPU02_H
+#define __LINUX_MFD_S2MPU02_H
+
+/* S2MPU02 registers */
+enum S2MPU02_reg {
+	S2MPU02_REG_ID,
+	S2MPU02_REG_INT1,
+	S2MPU02_REG_INT2,
+	S2MPU02_REG_INT3,
+	S2MPU02_REG_INT1M,
+	S2MPU02_REG_INT2M,
+	S2MPU02_REG_INT3M,
+	S2MPU02_REG_ST1,
+	S2MPU02_REG_ST2,
+	S2MPU02_REG_PWRONSRC,
+	S2MPU02_REG_OFFSRC,
+	S2MPU02_REG_BU_CHG,
+	S2MPU02_REG_RTCCTRL,
+	S2MPU02_REG_PMCTRL1,
+	S2MPU02_REG_RSVD1,
+	S2MPU02_REG_RSVD2,
+	S2MPU02_REG_RSVD3,
+	S2MPU02_REG_RSVD4,
+	S2MPU02_REG_RSVD5,
+	S2MPU02_REG_RSVD6,
+	S2MPU02_REG_RSVD7,
+	S2MPU02_REG_WRSTEN,
+	S2MPU02_REG_RSVD8,
+	S2MPU02_REG_RSVD9,
+	S2MPU02_REG_RSVD10,
+	S2MPU02_REG_B1CTRL1,
+	S2MPU02_REG_B1CTRL2,
+	S2MPU02_REG_B2CTRL1,
+	S2MPU02_REG_B2CTRL2,
+	S2MPU02_REG_B3CTRL1,
+	S2MPU02_REG_B3CTRL2,
+	S2MPU02_REG_B4CTRL1,
+	S2MPU02_REG_B4CTRL2,
+	S2MPU02_REG_B5CTRL1,
+	S2MPU02_REG_B5CTRL2,
+	S2MPU02_REG_B5CTRL3,
+	S2MPU02_REG_B5CTRL4,
+	S2MPU02_REG_B5CTRL5,
+	S2MPU02_REG_B6CTRL1,
+	S2MPU02_REG_B6CTRL2,
+	S2MPU02_REG_B7CTRL1,
+	S2MPU02_REG_B7CTRL2,
+	S2MPU02_REG_RAMP1,
+	S2MPU02_REG_RAMP2,
+	S2MPU02_REG_L1CTRL,
+	S2MPU02_REG_L2CTRL1,
+	S2MPU02_REG_L2CTRL2,
+	S2MPU02_REG_L2CTRL3,
+	S2MPU02_REG_L2CTRL4,
+	S2MPU02_REG_L3CTRL,
+	S2MPU02_REG_L4CTRL,
+	S2MPU02_REG_L5CTRL,
+	S2MPU02_REG_L6CTRL,
+	S2MPU02_REG_L7CTRL,
+	S2MPU02_REG_L8CTRL,
+	S2MPU02_REG_L9CTRL,
+	S2MPU02_REG_L10CTRL,
+	S2MPU02_REG_L11CTRL,
+	S2MPU02_REG_L12CTRL,
+	S2MPU02_REG_L13CTRL,
+	S2MPU02_REG_L14CTRL,
+	S2MPU02_REG_L15CTRL,
+	S2MPU02_REG_L16CTRL,
+	S2MPU02_REG_L17CTRL,
+	S2MPU02_REG_L18CTRL,
+	S2MPU02_REG_L19CTRL,
+	S2MPU02_REG_L20CTRL,
+	S2MPU02_REG_L21CTRL,
+	S2MPU02_REG_L22CTRL,
+	S2MPU02_REG_L23CTRL,
+	S2MPU02_REG_L24CTRL,
+	S2MPU02_REG_L25CTRL,
+	S2MPU02_REG_L26CTRL,
+	S2MPU02_REG_L27CTRL,
+	S2MPU02_REG_L28CTRL,
+	S2MPU02_REG_LDODSCH1,
+	S2MPU02_REG_LDODSCH2,
+	S2MPU02_REG_LDODSCH3,
+	S2MPU02_REG_LDODSCH4,
+	S2MPU02_REG_SELMIF,
+	S2MPU02_REG_RSVD11,
+	S2MPU02_REG_RSVD12,
+	S2MPU02_REG_RSVD13,
+	S2MPU02_REG_DVSSEL,
+	S2MPU02_REG_DVSPTR,
+	S2MPU02_REG_DVSDATA,
+};
+
+/* S2MPU02 regulator ids */
+enum S2MPU02_regulators {
+	S2MPU02_LDO1,
+	S2MPU02_LDO2,
+	S2MPU02_LDO3,
+	S2MPU02_LDO4,
+	S2MPU02_LDO5,
+	S2MPU02_LDO6,
+	S2MPU02_LDO7,
+	S2MPU02_LDO8,
+	S2MPU02_LDO9,
+	S2MPU02_LDO10,
+	S2MPU02_LDO11,
+	S2MPU02_LDO12,
+	S2MPU02_LDO13,
+	S2MPU02_LDO14,
+	S2MPU02_LDO15,
+	S2MPU02_LDO16,
+	S2MPU02_LDO17,
+	S2MPU02_LDO18,
+	S2MPU02_LDO19,
+	S2MPU02_LDO20,
+	S2MPU02_LDO21,
+	S2MPU02_LDO22,
+	S2MPU02_LDO23,
+	S2MPU02_LDO24,
+	S2MPU02_LDO25,
+	S2MPU02_LDO26,
+	S2MPU02_LDO27,
+	S2MPU02_LDO28,
+	S2MPU02_BUCK1,
+	S2MPU02_BUCK2,
+	S2MPU02_BUCK3,
+	S2MPU02_BUCK4,
+	S2MPU02_BUCK5,
+	S2MPU02_BUCK6,
+	S2MPU02_BUCK7,
+
+	S2MPU02_REGULATOR_MAX,
+};
+
+/* Regulator constraints for BUCKx */
+#define S2MPU02_BUCK1234_MIN_600MV	600000
+#define S2MPU02_BUCK5_MIN_1081_25MV	1081250
+#define S2MPU02_BUCK6_MIN_1700MV	1700000
+#define S2MPU02_BUCK7_MIN_900MV		900000
+
+#define S2MPU02_BUCK1234_STEP_6_25MV	6250
+#define S2MPU02_BUCK5_STEP_6_25MV	6250
+#define S2MPU02_BUCK6_STEP_2_50MV	2500
+#define S2MPU02_BUCK7_STEP_6_25MV	6250
+
+#define S2MPU02_BUCK1234_START_SEL	0x00
+#define S2MPU02_BUCK5_START_SEL		0x4D
+#define S2MPU02_BUCK6_START_SEL		0x28
+#define S2MPU02_BUCK7_START_SEL		0x30
+
+#define S2MPU02_BUCK_RAMP_DELAY		12500
+
+/* Regulator constraints for different types of LDOx */
+#define S2MPU02_LDO_MIN_900MV		900000
+#define S2MPU02_LDO_MIN_1050MV		1050000
+#define S2MPU02_LDO_MIN_1600MV		1600000
+#define S2MPU02_LDO_STEP_12_5MV		12500
+#define S2MPU02_LDO_STEP_25MV		25000
+#define S2MPU02_LDO_STEP_50MV		50000
+
+#define S2MPU02_LDO_GROUP1_START_SEL	0x8
+#define S2MPU02_LDO_GROUP2_START_SEL	0xA
+#define S2MPU02_LDO_GROUP3_START_SEL	0x10
+
+#define S2MPU02_LDO_VSEL_MASK		0x3F
+#define S2MPU02_BUCK_VSEL_MASK		0xFF
+#define S2MPU02_ENABLE_MASK		(0x03 << S2MPU02_ENABLE_SHIFT)
+#define S2MPU02_ENABLE_SHIFT		6
+
+/* On/Off controlled by PWREN */
+#define S2MPU02_ENABLE_SUSPEND		(0x01 << S2MPU02_ENABLE_SHIFT)
+#define S2MPU02_DISABLE_SUSPEND		(0x11 << S2MPU02_ENABLE_SHIFT)
+#define S2MPU02_LDO_N_VOLTAGES		(S2MPU02_LDO_VSEL_MASK + 1)
+#define S2MPU02_BUCK_N_VOLTAGES		(S2MPU02_BUCK_VSEL_MASK + 1)
+
+/* RAMP delay for BUCK1234*/
+#define S2MPU02_BUCK1_RAMP_SHIFT	6
+#define S2MPU02_BUCK2_RAMP_SHIFT	4
+#define S2MPU02_BUCK3_RAMP_SHIFT	2
+#define S2MPU02_BUCK4_RAMP_SHIFT	0
+#define S2MPU02_BUCK1234_RAMP_MASK	0x3
+
+#endif /*  __LINUX_MFD_S2MPU02_H */
-- 
cgit v1.2.3


From ee98662ec914a23ab826b0c83797aa9414f737bc Mon Sep 17 00:00:00 2001
From: Bill Richardson <wfrichar@chromium.org>
Date: Wed, 18 Jun 2014 11:13:58 -0700
Subject: mfd: cros_ec: Fix the comment on cros_ec_remove()

This comment was incorrect, so update it.

Signed-off-by: Bill Richardson <wfrichar@chromium.org>
Signed-off-by: Simon Glass <sjg@chromium.org>
Signed-off-by: Doug Anderson <dianders@chromium.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/cros_ec.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h
index 887ef4f7bef7..7e9fe6e98d2f 100644
--- a/include/linux/mfd/cros_ec.h
+++ b/include/linux/mfd/cros_ec.h
@@ -148,8 +148,7 @@ int cros_ec_prepare_tx(struct cros_ec_device *ec_dev,
 /**
  * cros_ec_remove - Remove a ChromeOS EC
  *
- * Call this to deregister a ChromeOS EC. After this you should call
- * cros_ec_free().
+ * Call this to deregister a ChromeOS EC, then clean up any private data.
  *
  * @ec_dev: Device to register
  * @return 0 if ok, -ve on error
-- 
cgit v1.2.3


From 2ce701ae4e351d9407ec0b30f5f9dd56b6de4292 Mon Sep 17 00:00:00 2001
From: Bill Richardson <wfrichar@chromium.org>
Date: Wed, 18 Jun 2014 11:13:59 -0700
Subject: mfd: cros_ec: Allow static din/dout buffers with cros_ec_register()

The lower-level driver may want to provide its own buffers. If so,
there's no need to allocate new ones.  This already happens to work
just fine (since we check for size of 0 and use devm allocation), but
it's good to document it.

[dianders: Resolved conflicts; documented that no code changes needed
on mainline]

Signed-off-by: Bill Richardson <wfrichar@chromium.org>
Signed-off-by: Doug Anderson <dianders@chromium.org>
Reviewed-by: Simon Glass <sjg@chromium.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/cros_ec.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h
index 7e9fe6e98d2f..2ee3190b691c 100644
--- a/include/linux/mfd/cros_ec.h
+++ b/include/linux/mfd/cros_ec.h
@@ -68,8 +68,8 @@ struct cros_ec_msg {
  * We use this alignment to keep ARM and x86 happy. Probably word
  * alignment would be OK, there might be a small performance advantage
  * to using dword.
- * @din_size: size of din buffer
- * @dout_size: size of dout buffer
+ * @din_size: size of din buffer to allocate (zero to use static din)
+ * @dout_size: size of dout buffer to allocate (zero to use static dout)
  * @command_send: send a command
  * @command_recv: receive a command
  * @ec_name: name of EC device (e.g. 'chromeos-ec')
-- 
cgit v1.2.3


From 7e6cb5b4dbbc4b1d98289c88d0bc4092cac328be Mon Sep 17 00:00:00 2001
From: Bill Richardson <wfrichar@chromium.org>
Date: Wed, 18 Jun 2014 11:14:00 -0700
Subject: mfd: cros_ec: Tweak struct cros_ec_device for clarity

The members of struct cros_ec_device were improperly commented, and
intermixed the private and public sections. This is just cleanup to make it
more obvious what goes with what.

[dianders: left lock in the structure but gave it the name that will
eventually be used.]

Signed-off-by: Bill Richardson <wfrichar@chromium.org>
Signed-off-by: Doug Anderson <dianders@chromium.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/cros_ec.c       |  2 +-
 drivers/mfd/cros_ec_i2c.c   |  4 +--
 drivers/mfd/cros_ec_spi.c   | 10 +++----
 include/linux/mfd/cros_ec.h | 65 ++++++++++++++++++++++++---------------------
 4 files changed, 43 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/cros_ec.c b/drivers/mfd/cros_ec.c
index 38fe9bf0d169..04e053c71cc6 100644
--- a/drivers/mfd/cros_ec.c
+++ b/drivers/mfd/cros_ec.c
@@ -57,7 +57,7 @@ static int cros_ec_command_sendrecv(struct cros_ec_device *ec_dev,
 	msg.in_buf = in_buf;
 	msg.in_len = in_len;
 
-	return ec_dev->command_xfer(ec_dev, &msg);
+	return ec_dev->cmd_xfer(ec_dev, &msg);
 }
 
 static int cros_ec_command_recv(struct cros_ec_device *ec_dev,
diff --git a/drivers/mfd/cros_ec_i2c.c b/drivers/mfd/cros_ec_i2c.c
index 4f71be99a183..777e529abb16 100644
--- a/drivers/mfd/cros_ec_i2c.c
+++ b/drivers/mfd/cros_ec_i2c.c
@@ -29,7 +29,7 @@ static inline struct cros_ec_device *to_ec_dev(struct device *dev)
 	return i2c_get_clientdata(client);
 }
 
-static int cros_ec_command_xfer(struct cros_ec_device *ec_dev,
+static int cros_ec_cmd_xfer_i2c(struct cros_ec_device *ec_dev,
 				struct cros_ec_msg *msg)
 {
 	struct i2c_client *client = ec_dev->priv;
@@ -136,7 +136,7 @@ static int cros_ec_i2c_probe(struct i2c_client *client,
 	ec_dev->dev = dev;
 	ec_dev->priv = client;
 	ec_dev->irq = client->irq;
-	ec_dev->command_xfer = cros_ec_command_xfer;
+	ec_dev->cmd_xfer = cros_ec_cmd_xfer_i2c;
 	ec_dev->ec_name = client->name;
 	ec_dev->phys_name = client->adapter->name;
 	ec_dev->parent = &client->dev;
diff --git a/drivers/mfd/cros_ec_spi.c b/drivers/mfd/cros_ec_spi.c
index 1fcc65ecad0e..6c3075fb5dc7 100644
--- a/drivers/mfd/cros_ec_spi.c
+++ b/drivers/mfd/cros_ec_spi.c
@@ -73,7 +73,7 @@
  *	if no record
  * @end_of_msg_delay: used to set the delay_usecs on the spi_transfer that
  *      is sent when we want to turn off CS at the end of a transaction.
- * @lock: mutex to ensure only one user of cros_ec_command_spi_xfer at a time
+ * @lock: mutex to ensure only one user of cros_ec_cmd_xfer_spi at a time
  */
 struct cros_ec_spi {
 	struct spi_device *spi;
@@ -210,13 +210,13 @@ static int cros_ec_spi_receive_response(struct cros_ec_device *ec_dev,
 }
 
 /**
- * cros_ec_command_spi_xfer - Transfer a message over SPI and receive the reply
+ * cros_ec_cmd_xfer_spi - Transfer a message over SPI and receive the reply
  *
  * @ec_dev: ChromeOS EC device
  * @ec_msg: Message to transfer
  */
-static int cros_ec_command_spi_xfer(struct cros_ec_device *ec_dev,
-				    struct cros_ec_msg *ec_msg)
+static int cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev,
+				struct cros_ec_msg *ec_msg)
 {
 	struct cros_ec_spi *ec_spi = ec_dev->priv;
 	struct spi_transfer trans;
@@ -368,7 +368,7 @@ static int cros_ec_spi_probe(struct spi_device *spi)
 	ec_dev->dev = dev;
 	ec_dev->priv = ec_spi;
 	ec_dev->irq = spi->irq;
-	ec_dev->command_xfer = cros_ec_command_spi_xfer;
+	ec_dev->cmd_xfer = cros_ec_cmd_xfer_spi;
 	ec_dev->ec_name = ec_spi->spi->modalias;
 	ec_dev->phys_name = dev_name(&ec_spi->spi->dev);
 	ec_dev->parent = &ec_spi->spi->dev;
diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h
index 2ee3190b691c..79a35857cc9e 100644
--- a/include/linux/mfd/cros_ec.h
+++ b/include/linux/mfd/cros_ec.h
@@ -16,7 +16,9 @@
 #ifndef __LINUX_MFD_CROS_EC_H
 #define __LINUX_MFD_CROS_EC_H
 
+#include <linux/notifier.h>
 #include <linux/mfd/cros_ec_commands.h>
+#include <linux/mutex.h>
 
 /*
  * Command interface between EC and AP, for LPC, I2C and SPI interfaces.
@@ -55,34 +57,53 @@ struct cros_ec_msg {
 /**
  * struct cros_ec_device - Information about a ChromeOS EC device
  *
+ * @ec_name: name of EC device (e.g. 'chromeos-ec')
+ * @phys_name: name of physical comms layer (e.g. 'i2c-4')
+ * @dev: Device pointer
+ * @was_wake_device: true if this device was set to wake the system from
+ * sleep at the last suspend
+ * @event_notifier: interrupt event notifier for transport devices
+ * @command_send: send a command
+ * @command_recv: receive a response
+ * @command_sendrecv: send a command and receive a response
+ *
  * @name: Name of this EC interface
  * @priv: Private data
  * @irq: Interrupt to use
- * @din: input buffer (from EC)
- * @dout: output buffer (to EC)
+ * @din: input buffer (for data from EC)
+ * @dout: output buffer (for data to EC)
  * \note
  * These two buffers will always be dword-aligned and include enough
  * space for up to 7 word-alignment bytes also, so we can ensure that
  * the body of the message is always dword-aligned (64-bit).
- *
  * We use this alignment to keep ARM and x86 happy. Probably word
  * alignment would be OK, there might be a small performance advantage
  * to using dword.
  * @din_size: size of din buffer to allocate (zero to use static din)
  * @dout_size: size of dout buffer to allocate (zero to use static dout)
- * @command_send: send a command
- * @command_recv: receive a command
- * @ec_name: name of EC device (e.g. 'chromeos-ec')
- * @phys_name: name of physical comms layer (e.g. 'i2c-4')
  * @parent: pointer to parent device (e.g. i2c or spi device)
- * @dev: Device pointer
- * dev_lock: Lock to prevent concurrent access
  * @wake_enabled: true if this device can wake the system from sleep
- * @was_wake_device: true if this device was set to wake the system from
- * sleep at the last suspend
- * @event_notifier: interrupt event notifier for transport devices
+ * @lock: one transaction at a time
+ * @cmd_xfer: low-level channel to the EC
  */
 struct cros_ec_device {
+
+	/* These are used by other drivers that want to talk to the EC */
+	const char *ec_name;
+	const char *phys_name;
+	struct device *dev;
+	bool was_wake_device;
+	struct class *cros_class;
+	struct blocking_notifier_head event_notifier;
+	int (*command_send)(struct cros_ec_device *ec,
+			    uint16_t cmd, void *out_buf, int out_len);
+	int (*command_recv)(struct cros_ec_device *ec,
+			    uint16_t cmd, void *in_buf, int in_len);
+	int (*command_sendrecv)(struct cros_ec_device *ec,
+				uint16_t cmd, void *out_buf, int out_len,
+				void *in_buf, int in_len);
+
+	/* These are used to implement the platform-specific interface */
 	const char *name;
 	void *priv;
 	int irq;
@@ -90,26 +111,10 @@ struct cros_ec_device {
 	uint8_t *dout;
 	int din_size;
 	int dout_size;
-	int (*command_send)(struct cros_ec_device *ec,
-			uint16_t cmd, void *out_buf, int out_len);
-	int (*command_recv)(struct cros_ec_device *ec,
-			uint16_t cmd, void *in_buf, int in_len);
-	int (*command_sendrecv)(struct cros_ec_device *ec,
-			uint16_t cmd, void *out_buf, int out_len,
-			void *in_buf, int in_len);
-	int (*command_xfer)(struct cros_ec_device *ec,
-			struct cros_ec_msg *msg);
-
-	const char *ec_name;
-	const char *phys_name;
 	struct device *parent;
-
-	/* These are --private-- fields - do not assign */
-	struct device *dev;
-	struct mutex dev_lock;
 	bool wake_enabled;
-	bool was_wake_device;
-	struct blocking_notifier_head event_notifier;
+	struct mutex lock;
+	int (*cmd_xfer)(struct cros_ec_device *ec, struct cros_ec_msg *msg);
 };
 
 /**
-- 
cgit v1.2.3


From 5d4773e27e8ab37491767a6ef99ffd7100fe6341 Mon Sep 17 00:00:00 2001
From: Bill Richardson <wfrichar@chromium.org>
Date: Wed, 18 Jun 2014 11:14:02 -0700
Subject: mfd: cros_ec: Use struct cros_ec_command to communicate with the EC

This is some internal structure reorganization / renaming to prepare
for future patches that will add a userspace API to cros_ec.  There
should be no visible changes.

Signed-off-by: Bill Richardson <wfrichar@chromium.org>
Signed-off-by: Doug Anderson <dianders@chromium.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/cros_ec.c       | 28 ++++++++++++++--------------
 drivers/mfd/cros_ec_i2c.c   | 24 ++++++++++++------------
 drivers/mfd/cros_ec_spi.c   | 16 ++++++++--------
 include/linux/mfd/cros_ec.h | 35 ++++++++++++++++++-----------------
 4 files changed, 52 insertions(+), 51 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/cros_ec.c b/drivers/mfd/cros_ec.c
index 04e053c71cc6..2e86c282f0b4 100644
--- a/drivers/mfd/cros_ec.c
+++ b/drivers/mfd/cros_ec.c
@@ -25,22 +25,22 @@
 #include <linux/mfd/cros_ec_commands.h>
 
 int cros_ec_prepare_tx(struct cros_ec_device *ec_dev,
-		       struct cros_ec_msg *msg)
+		       struct cros_ec_command *msg)
 {
 	uint8_t *out;
 	int csum, i;
 
-	BUG_ON(msg->out_len > EC_PROTO2_MAX_PARAM_SIZE);
+	BUG_ON(msg->outsize > EC_PROTO2_MAX_PARAM_SIZE);
 	out = ec_dev->dout;
 	out[0] = EC_CMD_VERSION0 + msg->version;
-	out[1] = msg->cmd;
-	out[2] = msg->out_len;
+	out[1] = msg->command;
+	out[2] = msg->outsize;
 	csum = out[0] + out[1] + out[2];
-	for (i = 0; i < msg->out_len; i++)
-		csum += out[EC_MSG_TX_HEADER_BYTES + i] = msg->out_buf[i];
-	out[EC_MSG_TX_HEADER_BYTES + msg->out_len] = (uint8_t)(csum & 0xff);
+	for (i = 0; i < msg->outsize; i++)
+		csum += out[EC_MSG_TX_HEADER_BYTES + i] = msg->outdata[i];
+	out[EC_MSG_TX_HEADER_BYTES + msg->outsize] = (uint8_t)(csum & 0xff);
 
-	return EC_MSG_TX_PROTO_BYTES + msg->out_len;
+	return EC_MSG_TX_PROTO_BYTES + msg->outsize;
 }
 EXPORT_SYMBOL(cros_ec_prepare_tx);
 
@@ -48,14 +48,14 @@ static int cros_ec_command_sendrecv(struct cros_ec_device *ec_dev,
 		uint16_t cmd, void *out_buf, int out_len,
 		void *in_buf, int in_len)
 {
-	struct cros_ec_msg msg;
+	struct cros_ec_command msg;
 
 	msg.version = cmd >> 8;
-	msg.cmd = cmd & 0xff;
-	msg.out_buf = out_buf;
-	msg.out_len = out_len;
-	msg.in_buf = in_buf;
-	msg.in_len = in_len;
+	msg.command = cmd & 0xff;
+	msg.outdata = out_buf;
+	msg.outsize = out_len;
+	msg.indata = in_buf;
+	msg.insize = in_len;
 
 	return ec_dev->cmd_xfer(ec_dev, &msg);
 }
diff --git a/drivers/mfd/cros_ec_i2c.c b/drivers/mfd/cros_ec_i2c.c
index 777e529abb16..37ed12f99560 100644
--- a/drivers/mfd/cros_ec_i2c.c
+++ b/drivers/mfd/cros_ec_i2c.c
@@ -30,7 +30,7 @@ static inline struct cros_ec_device *to_ec_dev(struct device *dev)
 }
 
 static int cros_ec_cmd_xfer_i2c(struct cros_ec_device *ec_dev,
-				struct cros_ec_msg *msg)
+				struct cros_ec_command *msg)
 {
 	struct i2c_client *client = ec_dev->priv;
 	int ret = -ENOMEM;
@@ -50,7 +50,7 @@ static int cros_ec_cmd_xfer_i2c(struct cros_ec_device *ec_dev,
 	 * allocate larger packet (one byte for checksum, one byte for
 	 * length, and one for result code)
 	 */
-	packet_len = msg->in_len + 3;
+	packet_len = msg->insize + 3;
 	in_buf = kzalloc(packet_len, GFP_KERNEL);
 	if (!in_buf)
 		goto done;
@@ -61,7 +61,7 @@ static int cros_ec_cmd_xfer_i2c(struct cros_ec_device *ec_dev,
 	 * allocate larger packet (one byte for checksum, one for
 	 * command code, one for length, and one for command version)
 	 */
-	packet_len = msg->out_len + 4;
+	packet_len = msg->outsize + 4;
 	out_buf = kzalloc(packet_len, GFP_KERNEL);
 	if (!out_buf)
 		goto done;
@@ -69,16 +69,16 @@ static int cros_ec_cmd_xfer_i2c(struct cros_ec_device *ec_dev,
 	i2c_msg[0].buf = (char *)out_buf;
 
 	out_buf[0] = EC_CMD_VERSION0 + msg->version;
-	out_buf[1] = msg->cmd;
-	out_buf[2] = msg->out_len;
+	out_buf[1] = msg->command;
+	out_buf[2] = msg->outsize;
 
 	/* copy message payload and compute checksum */
 	sum = out_buf[0] + out_buf[1] + out_buf[2];
-	for (i = 0; i < msg->out_len; i++) {
-		out_buf[3 + i] = msg->out_buf[i];
+	for (i = 0; i < msg->outsize; i++) {
+		out_buf[3 + i] = msg->outdata[i];
 		sum += out_buf[3 + i];
 	}
-	out_buf[3 + msg->out_len] = sum;
+	out_buf[3 + msg->outsize] = sum;
 
 	/* send command to EC and read answer */
 	ret = i2c_transfer(client->adapter, i2c_msg, 2);
@@ -94,20 +94,20 @@ static int cros_ec_cmd_xfer_i2c(struct cros_ec_device *ec_dev,
 	/* check response error code */
 	if (i2c_msg[1].buf[0]) {
 		dev_warn(ec_dev->dev, "command 0x%02x returned an error %d\n",
-			 msg->cmd, i2c_msg[1].buf[0]);
+			 msg->command, i2c_msg[1].buf[0]);
 		ret = -EINVAL;
 		goto done;
 	}
 
 	/* copy response packet payload and compute checksum */
 	sum = in_buf[0] + in_buf[1];
-	for (i = 0; i < msg->in_len; i++) {
-		msg->in_buf[i] = in_buf[2 + i];
+	for (i = 0; i < msg->insize; i++) {
+		msg->indata[i] = in_buf[2 + i];
 		sum += in_buf[2 + i];
 	}
 	dev_dbg(ec_dev->dev, "packet: %*ph, sum = %02x\n",
 		i2c_msg[1].len, in_buf, sum);
-	if (sum != in_buf[2 + msg->in_len]) {
+	if (sum != in_buf[2 + msg->insize]) {
 		dev_err(ec_dev->dev, "bad packet checksum\n");
 		ret = -EBADMSG;
 		goto done;
diff --git a/drivers/mfd/cros_ec_spi.c b/drivers/mfd/cros_ec_spi.c
index 9d45d88813b8..bef7735ecfde 100644
--- a/drivers/mfd/cros_ec_spi.c
+++ b/drivers/mfd/cros_ec_spi.c
@@ -216,7 +216,7 @@ static int cros_ec_spi_receive_response(struct cros_ec_device *ec_dev,
  * @ec_msg: Message to transfer
  */
 static int cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev,
-				struct cros_ec_msg *ec_msg)
+				struct cros_ec_command *ec_msg)
 {
 	struct cros_ec_spi *ec_spi = ec_dev->priv;
 	struct spi_transfer trans;
@@ -261,7 +261,7 @@ static int cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev,
 	/* Get the response */
 	if (!ret) {
 		ret = cros_ec_spi_receive_response(ec_dev,
-				ec_msg->in_len + EC_MSG_TX_PROTO_BYTES);
+				ec_msg->insize + EC_MSG_TX_PROTO_BYTES);
 	} else {
 		dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret);
 	}
@@ -290,21 +290,21 @@ static int cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev,
 	if (ptr[0]) {
 		if (ptr[0] == EC_RES_IN_PROGRESS) {
 			dev_dbg(ec_dev->dev, "command 0x%02x in progress\n",
-				ec_msg->cmd);
+				ec_msg->command);
 			ret = -EAGAIN;
 			goto exit;
 		}
 		dev_warn(ec_dev->dev, "command 0x%02x returned an error %d\n",
-			 ec_msg->cmd, ptr[0]);
+			 ec_msg->command, ptr[0]);
 		debug_packet(ec_dev->dev, "in_err", ptr, len);
 		ret = -EINVAL;
 		goto exit;
 	}
 	len = ptr[1];
 	sum = ptr[0] + ptr[1];
-	if (len > ec_msg->in_len) {
+	if (len > ec_msg->insize) {
 		dev_err(ec_dev->dev, "packet too long (%d bytes, expected %d)",
-			len, ec_msg->in_len);
+			len, ec_msg->insize);
 		ret = -ENOSPC;
 		goto exit;
 	}
@@ -312,8 +312,8 @@ static int cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev,
 	/* copy response packet payload and compute checksum */
 	for (i = 0; i < len; i++) {
 		sum += ptr[i + 2];
-		if (ec_msg->in_len)
-			ec_msg->in_buf[i] = ptr[i + 2];
+		if (ec_msg->insize)
+			ec_msg->indata[i] = ptr[i + 2];
 	}
 	sum &= 0xff;
 
diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h
index 79a35857cc9e..f27c03766069 100644
--- a/include/linux/mfd/cros_ec.h
+++ b/include/linux/mfd/cros_ec.h
@@ -35,23 +35,23 @@ enum {
 					EC_MSG_TX_PROTO_BYTES,
 };
 
-/**
- * struct cros_ec_msg - A message sent to the EC, and its reply
- *
+/*
  * @version: Command version number (often 0)
- * @cmd: Command to send (EC_CMD_...)
- * @out_buf: Outgoing payload (to EC)
- * @outlen: Outgoing length
- * @in_buf: Incoming payload (from EC)
- * @in_len: Incoming length
+ * @command: Command to send (EC_CMD_...)
+ * @outdata: Outgoing data to EC
+ * @outsize: Outgoing length in bytes
+ * @indata: Where to put the incoming data from EC
+ * @insize: Incoming length in bytes (filled in by EC)
+ * @result: EC's response to the command (separate from communication failure)
  */
-struct cros_ec_msg {
-	u8 version;
-	u8 cmd;
-	uint8_t *out_buf;
-	int out_len;
-	uint8_t *in_buf;
-	int in_len;
+struct cros_ec_command {
+	uint32_t version;
+	uint32_t command;
+	uint8_t *outdata;
+	uint32_t outsize;
+	uint8_t *indata;
+	uint32_t insize;
+	uint32_t result;
 };
 
 /**
@@ -114,7 +114,8 @@ struct cros_ec_device {
 	struct device *parent;
 	bool wake_enabled;
 	struct mutex lock;
-	int (*cmd_xfer)(struct cros_ec_device *ec, struct cros_ec_msg *msg);
+	int (*cmd_xfer)(struct cros_ec_device *ec,
+			struct cros_ec_command *msg);
 };
 
 /**
@@ -148,7 +149,7 @@ int cros_ec_resume(struct cros_ec_device *ec_dev);
  * @msg: Message to write
  */
 int cros_ec_prepare_tx(struct cros_ec_device *ec_dev,
-		       struct cros_ec_msg *msg);
+		       struct cros_ec_command *msg);
 
 /**
  * cros_ec_remove - Remove a ChromeOS EC
-- 
cgit v1.2.3


From 533cec8f34778de10412dfabac991cf458ebf3c9 Mon Sep 17 00:00:00 2001
From: Bill Richardson <wfrichar@chromium.org>
Date: Wed, 18 Jun 2014 11:14:03 -0700
Subject: mfd: cros_ec: cleanup: remove unused fields from struct
 cros_ec_device

struct cros_ec_device has a superfluous "name" field. We can get all the
debugging info we need from the existing ec_name and phys_name fields, so
let's take out the extra field.

The printout also has sufficient info in it without explicitly adding
the transport.  Before this change:
  cros-ec-spi spi2.0: Chrome EC (SPI)

After this change:
  cros-ec-spi spi2.0: Chrome EC device registered

Signed-off-by: Bill Richardson <wfrichar@chromium.org>
Signed-off-by: Doug Anderson <dianders@chromium.org>
Reviewed-by: Simon Glass <sjg@chromium.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/cros_ec.c       | 2 +-
 drivers/mfd/cros_ec_i2c.c   | 1 -
 drivers/mfd/cros_ec_spi.c   | 1 -
 include/linux/mfd/cros_ec.h | 2 --
 4 files changed, 1 insertion(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/cros_ec.c b/drivers/mfd/cros_ec.c
index 2e86c282f0b4..49ed8c340868 100644
--- a/drivers/mfd/cros_ec.c
+++ b/drivers/mfd/cros_ec.c
@@ -140,7 +140,7 @@ int cros_ec_register(struct cros_ec_device *ec_dev)
 		goto fail_mfd;
 	}
 
-	dev_info(dev, "Chrome EC (%s)\n", ec_dev->name);
+	dev_info(dev, "Chrome EC device registered\n");
 
 	return 0;
 
diff --git a/drivers/mfd/cros_ec_i2c.c b/drivers/mfd/cros_ec_i2c.c
index 37ed12f99560..5bb32f5550b3 100644
--- a/drivers/mfd/cros_ec_i2c.c
+++ b/drivers/mfd/cros_ec_i2c.c
@@ -132,7 +132,6 @@ static int cros_ec_i2c_probe(struct i2c_client *client,
 		return -ENOMEM;
 
 	i2c_set_clientdata(client, ec_dev);
-	ec_dev->name = "I2C";
 	ec_dev->dev = dev;
 	ec_dev->priv = client;
 	ec_dev->irq = client->irq;
diff --git a/drivers/mfd/cros_ec_spi.c b/drivers/mfd/cros_ec_spi.c
index bef7735ecfde..6e929b5f3bd3 100644
--- a/drivers/mfd/cros_ec_spi.c
+++ b/drivers/mfd/cros_ec_spi.c
@@ -370,7 +370,6 @@ static int cros_ec_spi_probe(struct spi_device *spi)
 	cros_ec_spi_dt_probe(ec_spi, dev);
 
 	spi_set_drvdata(spi, ec_dev);
-	ec_dev->name = "SPI";
 	ec_dev->dev = dev;
 	ec_dev->priv = ec_spi;
 	ec_dev->irq = spi->irq;
diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h
index f27c03766069..2b0c5982dbc1 100644
--- a/include/linux/mfd/cros_ec.h
+++ b/include/linux/mfd/cros_ec.h
@@ -67,7 +67,6 @@ struct cros_ec_command {
  * @command_recv: receive a response
  * @command_sendrecv: send a command and receive a response
  *
- * @name: Name of this EC interface
  * @priv: Private data
  * @irq: Interrupt to use
  * @din: input buffer (for data from EC)
@@ -104,7 +103,6 @@ struct cros_ec_device {
 				void *in_buf, int in_len);
 
 	/* These are used to implement the platform-specific interface */
-	const char *name;
 	void *priv;
 	int irq;
 	uint8_t *din;
-- 
cgit v1.2.3


From 5799f95a373a2752e5c732f531a6f40fe458b818 Mon Sep 17 00:00:00 2001
From: Bill Richardson <wfrichar@chromium.org>
Date: Wed, 18 Jun 2014 11:14:04 -0700
Subject: mfd: cros_ec: cleanup: Remove EC wrapper functions

Remove the three wrapper functions that talk to the EC without passing all
the desired arguments and just use the underlying communication function
that passes everything in a struct intead.

This is internal code refactoring only. Nothing should change.

Signed-off-by: Bill Richardson <wfrichar@chromium.org>
Signed-off-by: Doug Anderson <dianders@chromium.org>
Reviewed-by: Simon Glass <sjg@chromium.org>
Acked-by: Wolfram Sang <wsa@the-dreams.de>
Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/i2c/busses/i2c-cros-ec-tunnel.c | 15 +++++++++++----
 drivers/input/keyboard/cros_ec_keyb.c   | 12 ++++++++++--
 drivers/mfd/cros_ec.c                   | 32 --------------------------------
 include/linux/mfd/cros_ec.h             | 19 ++++++-------------
 4 files changed, 27 insertions(+), 51 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/i2c/busses/i2c-cros-ec-tunnel.c b/drivers/i2c/busses/i2c-cros-ec-tunnel.c
index 8e7a71487bb1..dd07818d03d0 100644
--- a/drivers/i2c/busses/i2c-cros-ec-tunnel.c
+++ b/drivers/i2c/busses/i2c-cros-ec-tunnel.c
@@ -183,6 +183,7 @@ static int ec_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg i2c_msgs[],
 	u8 *request = NULL;
 	u8 *response = NULL;
 	int result;
+	struct cros_ec_command msg;
 
 	request_len = ec_i2c_count_message(i2c_msgs, num);
 	if (request_len < 0) {
@@ -218,9 +219,15 @@ static int ec_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg i2c_msgs[],
 	}
 
 	ec_i2c_construct_message(request, i2c_msgs, num, bus_num);
-	result = bus->ec->command_sendrecv(bus->ec, EC_CMD_I2C_PASSTHRU,
-					   request, request_len,
-					   response, response_len);
+
+	msg.version = 0;
+	msg.command = EC_CMD_I2C_PASSTHRU;
+	msg.outdata = request;
+	msg.outsize = request_len;
+	msg.indata = response;
+	msg.insize = response_len;
+
+	result = bus->ec->cmd_xfer(bus->ec, &msg);
 	if (result)
 		goto exit;
 
@@ -258,7 +265,7 @@ static int ec_i2c_probe(struct platform_device *pdev)
 	u32 remote_bus;
 	int err;
 
-	if (!ec->command_sendrecv) {
+	if (!ec->cmd_xfer) {
 		dev_err(dev, "Missing sendrecv\n");
 		return -EINVAL;
 	}
diff --git a/drivers/input/keyboard/cros_ec_keyb.c b/drivers/input/keyboard/cros_ec_keyb.c
index 408379669d3c..b8341ab99f55 100644
--- a/drivers/input/keyboard/cros_ec_keyb.c
+++ b/drivers/input/keyboard/cros_ec_keyb.c
@@ -191,8 +191,16 @@ static void cros_ec_keyb_close(struct input_dev *dev)
 
 static int cros_ec_keyb_get_state(struct cros_ec_keyb *ckdev, uint8_t *kb_state)
 {
-	return ckdev->ec->command_recv(ckdev->ec, EC_CMD_MKBP_STATE,
-					  kb_state, ckdev->cols);
+	struct cros_ec_command msg = {
+		.version = 0,
+		.command = EC_CMD_MKBP_STATE,
+		.outdata = NULL,
+		.outsize = 0,
+		.indata = kb_state,
+		.insize = ckdev->cols,
+	};
+
+	return ckdev->ec->cmd_xfer(ckdev->ec, &msg);
 }
 
 static int cros_ec_keyb_work(struct notifier_block *nb,
diff --git a/drivers/mfd/cros_ec.c b/drivers/mfd/cros_ec.c
index 49ed8c340868..4851ed2fbe31 100644
--- a/drivers/mfd/cros_ec.c
+++ b/drivers/mfd/cros_ec.c
@@ -44,34 +44,6 @@ int cros_ec_prepare_tx(struct cros_ec_device *ec_dev,
 }
 EXPORT_SYMBOL(cros_ec_prepare_tx);
 
-static int cros_ec_command_sendrecv(struct cros_ec_device *ec_dev,
-		uint16_t cmd, void *out_buf, int out_len,
-		void *in_buf, int in_len)
-{
-	struct cros_ec_command msg;
-
-	msg.version = cmd >> 8;
-	msg.command = cmd & 0xff;
-	msg.outdata = out_buf;
-	msg.outsize = out_len;
-	msg.indata = in_buf;
-	msg.insize = in_len;
-
-	return ec_dev->cmd_xfer(ec_dev, &msg);
-}
-
-static int cros_ec_command_recv(struct cros_ec_device *ec_dev,
-		uint16_t cmd, void *buf, int buf_len)
-{
-	return cros_ec_command_sendrecv(ec_dev, cmd, NULL, 0, buf, buf_len);
-}
-
-static int cros_ec_command_send(struct cros_ec_device *ec_dev,
-		uint16_t cmd, void *buf, int buf_len)
-{
-	return cros_ec_command_sendrecv(ec_dev, cmd, buf, buf_len, NULL, 0);
-}
-
 static irqreturn_t ec_irq_thread(int irq, void *data)
 {
 	struct cros_ec_device *ec_dev = data;
@@ -104,10 +76,6 @@ int cros_ec_register(struct cros_ec_device *ec_dev)
 
 	BLOCKING_INIT_NOTIFIER_HEAD(&ec_dev->event_notifier);
 
-	ec_dev->command_send = cros_ec_command_send;
-	ec_dev->command_recv = cros_ec_command_recv;
-	ec_dev->command_sendrecv = cros_ec_command_sendrecv;
-
 	if (ec_dev->din_size) {
 		ec_dev->din = devm_kzalloc(dev, ec_dev->din_size, GFP_KERNEL);
 		if (!ec_dev->din)
diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h
index 2b0c5982dbc1..60c088055f3a 100644
--- a/include/linux/mfd/cros_ec.h
+++ b/include/linux/mfd/cros_ec.h
@@ -63,9 +63,10 @@ struct cros_ec_command {
  * @was_wake_device: true if this device was set to wake the system from
  * sleep at the last suspend
  * @event_notifier: interrupt event notifier for transport devices
- * @command_send: send a command
- * @command_recv: receive a response
- * @command_sendrecv: send a command and receive a response
+ * @cmd_xfer: send command to EC and get response
+ *     Returns 0 if the communication succeeded, but that doesn't mean the EC
+ *     was happy with the command it got. Caller should check msg.result for
+ *     the EC's result code.
  *
  * @priv: Private data
  * @irq: Interrupt to use
@@ -83,7 +84,6 @@ struct cros_ec_command {
  * @parent: pointer to parent device (e.g. i2c or spi device)
  * @wake_enabled: true if this device can wake the system from sleep
  * @lock: one transaction at a time
- * @cmd_xfer: low-level channel to the EC
  */
 struct cros_ec_device {
 
@@ -94,13 +94,8 @@ struct cros_ec_device {
 	bool was_wake_device;
 	struct class *cros_class;
 	struct blocking_notifier_head event_notifier;
-	int (*command_send)(struct cros_ec_device *ec,
-			    uint16_t cmd, void *out_buf, int out_len);
-	int (*command_recv)(struct cros_ec_device *ec,
-			    uint16_t cmd, void *in_buf, int in_len);
-	int (*command_sendrecv)(struct cros_ec_device *ec,
-				uint16_t cmd, void *out_buf, int out_len,
-				void *in_buf, int in_len);
+	int (*cmd_xfer)(struct cros_ec_device *ec,
+			struct cros_ec_command *msg);
 
 	/* These are used to implement the platform-specific interface */
 	void *priv;
@@ -112,8 +107,6 @@ struct cros_ec_device {
 	struct device *parent;
 	bool wake_enabled;
 	struct mutex lock;
-	int (*cmd_xfer)(struct cros_ec_device *ec,
-			struct cros_ec_command *msg);
 };
 
 /**
-- 
cgit v1.2.3


From 6db07b6336589ff480528173e41f8f6af3f0097f Mon Sep 17 00:00:00 2001
From: Bill Richardson <wfrichar@chromium.org>
Date: Wed, 18 Jun 2014 11:14:05 -0700
Subject: mfd: cros_ec: Check result code from EC messages

Just because the host was able to talk to the EC doesn't mean that the EC
was happy with what it was told. Errors in communincation are not the same
as error messages from the EC itself.

This change lets the EC report its errors separately.

[dianders: Added common function to cros_ec.c]

Signed-off-by: Bill Richardson <wfrichar@chromium.org>
Signed-off-by: Doug Anderson <dianders@chromium.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/cros_ec.c       | 18 ++++++++++++++++++
 drivers/mfd/cros_ec_i2c.c   |  8 +++-----
 drivers/mfd/cros_ec_spi.c   | 19 ++++++-------------
 include/linux/mfd/cros_ec.h | 12 ++++++++++++
 4 files changed, 39 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/cros_ec.c b/drivers/mfd/cros_ec.c
index 4851ed2fbe31..83e30c663578 100644
--- a/drivers/mfd/cros_ec.c
+++ b/drivers/mfd/cros_ec.c
@@ -44,6 +44,24 @@ int cros_ec_prepare_tx(struct cros_ec_device *ec_dev,
 }
 EXPORT_SYMBOL(cros_ec_prepare_tx);
 
+int cros_ec_check_result(struct cros_ec_device *ec_dev,
+			 struct cros_ec_command *msg)
+{
+	switch (msg->result) {
+	case EC_RES_SUCCESS:
+		return 0;
+	case EC_RES_IN_PROGRESS:
+		dev_dbg(ec_dev->dev, "command 0x%02x in progress\n",
+			msg->command);
+		return -EAGAIN;
+	default:
+		dev_dbg(ec_dev->dev, "command 0x%02x returned %d\n",
+			msg->command, msg->result);
+		return 0;
+	}
+}
+EXPORT_SYMBOL(cros_ec_check_result);
+
 static irqreturn_t ec_irq_thread(int irq, void *data)
 {
 	struct cros_ec_device *ec_dev = data;
diff --git a/drivers/mfd/cros_ec_i2c.c b/drivers/mfd/cros_ec_i2c.c
index 5bb32f5550b3..189e7d1d7742 100644
--- a/drivers/mfd/cros_ec_i2c.c
+++ b/drivers/mfd/cros_ec_i2c.c
@@ -92,12 +92,10 @@ static int cros_ec_cmd_xfer_i2c(struct cros_ec_device *ec_dev,
 	}
 
 	/* check response error code */
-	if (i2c_msg[1].buf[0]) {
-		dev_warn(ec_dev->dev, "command 0x%02x returned an error %d\n",
-			 msg->command, i2c_msg[1].buf[0]);
-		ret = -EINVAL;
+	msg->result = i2c_msg[1].buf[0];
+	ret = cros_ec_check_result(ec_dev, msg);
+	if (ret)
 		goto done;
-	}
 
 	/* copy response packet payload and compute checksum */
 	sum = in_buf[0] + in_buf[1];
diff --git a/drivers/mfd/cros_ec_spi.c b/drivers/mfd/cros_ec_spi.c
index 6e929b5f3bd3..da1da05cd546 100644
--- a/drivers/mfd/cros_ec_spi.c
+++ b/drivers/mfd/cros_ec_spi.c
@@ -285,21 +285,14 @@ static int cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev,
 		goto exit;
 	}
 
-	/* check response error code */
 	ptr = ec_dev->din;
-	if (ptr[0]) {
-		if (ptr[0] == EC_RES_IN_PROGRESS) {
-			dev_dbg(ec_dev->dev, "command 0x%02x in progress\n",
-				ec_msg->command);
-			ret = -EAGAIN;
-			goto exit;
-		}
-		dev_warn(ec_dev->dev, "command 0x%02x returned an error %d\n",
-			 ec_msg->command, ptr[0]);
-		debug_packet(ec_dev->dev, "in_err", ptr, len);
-		ret = -EINVAL;
+
+	/* check response error code */
+	ec_msg->result = ptr[0];
+	ret = cros_ec_check_result(ec_dev, ec_msg);
+	if (ret)
 		goto exit;
-	}
+
 	len = ptr[1];
 	sum = ptr[0] + ptr[1];
 	if (len > ec_msg->insize) {
diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h
index 60c088055f3a..1f79f162abe4 100644
--- a/include/linux/mfd/cros_ec.h
+++ b/include/linux/mfd/cros_ec.h
@@ -142,6 +142,18 @@ int cros_ec_resume(struct cros_ec_device *ec_dev);
 int cros_ec_prepare_tx(struct cros_ec_device *ec_dev,
 		       struct cros_ec_command *msg);
 
+/**
+ * cros_ec_check_result - Check ec_msg->result
+ *
+ * This is used by ChromeOS EC drivers to check the ec_msg->result for
+ * errors and to warn about them.
+ *
+ * @ec_dev: EC device
+ * @msg: Message to check
+ */
+int cros_ec_check_result(struct cros_ec_device *ec_dev,
+			 struct cros_ec_command *msg);
+
 /**
  * cros_ec_remove - Remove a ChromeOS EC
  *
-- 
cgit v1.2.3


From 12ebc8a50bc54e3a6fe207861fc6793181f9c2dc Mon Sep 17 00:00:00 2001
From: Bill Richardson <wfrichar@chromium.org>
Date: Wed, 18 Jun 2014 11:14:06 -0700
Subject: mfd: cros_ec: ec_dev->cmd_xfer() returns number of bytes received
 from EC

When communicating with the EC, the cmd_xfer() function should return the
number of bytes it received from the EC, or negative on error.

Signed-off-by: Bill Richardson <wfrichar@chromium.org>
Signed-off-by: Doug Anderson <dianders@chromium.org>
Reviewed-by: Simon Glass <sjg@chromium.org>
Acked-by: Wolfram Sang <wsa@the-dreams.de>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/i2c/busses/i2c-cros-ec-tunnel.c | 2 +-
 drivers/mfd/cros_ec_i2c.c               | 2 +-
 drivers/mfd/cros_ec_spi.c               | 2 +-
 include/linux/mfd/cros_ec.h             | 8 ++++----
 4 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/i2c/busses/i2c-cros-ec-tunnel.c b/drivers/i2c/busses/i2c-cros-ec-tunnel.c
index dd07818d03d0..05e033c98115 100644
--- a/drivers/i2c/busses/i2c-cros-ec-tunnel.c
+++ b/drivers/i2c/busses/i2c-cros-ec-tunnel.c
@@ -228,7 +228,7 @@ static int ec_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg i2c_msgs[],
 	msg.insize = response_len;
 
 	result = bus->ec->cmd_xfer(bus->ec, &msg);
-	if (result)
+	if (result < 0)
 		goto exit;
 
 	result = ec_i2c_parse_response(response, i2c_msgs, &num);
diff --git a/drivers/mfd/cros_ec_i2c.c b/drivers/mfd/cros_ec_i2c.c
index 189e7d1d7742..fd7a546d3478 100644
--- a/drivers/mfd/cros_ec_i2c.c
+++ b/drivers/mfd/cros_ec_i2c.c
@@ -111,7 +111,7 @@ static int cros_ec_cmd_xfer_i2c(struct cros_ec_device *ec_dev,
 		goto done;
 	}
 
-	ret = 0;
+	ret = i2c_msg[1].buf[1];
  done:
 	kfree(in_buf);
 	kfree(out_buf);
diff --git a/drivers/mfd/cros_ec_spi.c b/drivers/mfd/cros_ec_spi.c
index da1da05cd546..ac52e3653e90 100644
--- a/drivers/mfd/cros_ec_spi.c
+++ b/drivers/mfd/cros_ec_spi.c
@@ -320,7 +320,7 @@ static int cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev,
 		goto exit;
 	}
 
-	ret = 0;
+	ret = len;
 exit:
 	mutex_unlock(&ec_spi->lock);
 	return ret;
diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h
index 1f79f162abe4..0ebf26fddbbb 100644
--- a/include/linux/mfd/cros_ec.h
+++ b/include/linux/mfd/cros_ec.h
@@ -41,7 +41,7 @@ enum {
  * @outdata: Outgoing data to EC
  * @outsize: Outgoing length in bytes
  * @indata: Where to put the incoming data from EC
- * @insize: Incoming length in bytes (filled in by EC)
+ * @insize: Max number of bytes to accept from EC
  * @result: EC's response to the command (separate from communication failure)
  */
 struct cros_ec_command {
@@ -64,9 +64,9 @@ struct cros_ec_command {
  * sleep at the last suspend
  * @event_notifier: interrupt event notifier for transport devices
  * @cmd_xfer: send command to EC and get response
- *     Returns 0 if the communication succeeded, but that doesn't mean the EC
- *     was happy with the command it got. Caller should check msg.result for
- *     the EC's result code.
+ *     Returns the number of bytes received if the communication succeeded, but
+ *     that doesn't mean the EC was happy with the command. The caller
+ *     should check msg.result for the EC's result code.
  *
  * @priv: Private data
  * @irq: Interrupt to use
-- 
cgit v1.2.3


From d1fd345e2087f0362c92bd3b0a1cea7fe636ac3a Mon Sep 17 00:00:00 2001
From: Andrew Bresticker <abrestic@chromium.org>
Date: Wed, 18 Jun 2014 11:14:07 -0700
Subject: mfd: cros_ec: Move EC interrupt to cros_ec_keyb

If we receive EC interrupts after the cros_ec driver has probed, but
before the cros_ec_keyb driver has probed, the cros_ec IRQ handler
will not run the cros_ec_keyb notifier and the EC will leave the IRQ
line asserted.  The cros_ec IRQ handler then returns IRQ_HANDLED and
the resulting flood of interrupts causes the machine to hang.

Since the EC interrupt is currently only used for the keyboard, move
the setup and handling of the EC interrupt to the cros_ec_keyb driver.

Signed-off-by: Andrew Bresticker <abrestic@chromium.org>
Signed-off-by: Doug Anderson <dianders@chromium.org>
Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/input/keyboard/cros_ec_keyb.c | 58 ++++++++++++++++++++---------------
 drivers/mfd/cros_ec.c                 | 35 +--------------------
 include/linux/mfd/cros_ec.h           |  2 --
 3 files changed, 34 insertions(+), 61 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/keyboard/cros_ec_keyb.c b/drivers/input/keyboard/cros_ec_keyb.c
index b8341ab99f55..791781ade4e7 100644
--- a/drivers/input/keyboard/cros_ec_keyb.c
+++ b/drivers/input/keyboard/cros_ec_keyb.c
@@ -24,8 +24,8 @@
 #include <linux/module.h>
 #include <linux/i2c.h>
 #include <linux/input.h>
+#include <linux/interrupt.h>
 #include <linux/kernel.h>
-#include <linux/notifier.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/input/matrix_keypad.h>
@@ -42,7 +42,6 @@
  * @dev: Device pointer
  * @idev: Input device
  * @ec: Top level ChromeOS device to use to talk to EC
- * @event_notifier: interrupt event notifier for transport devices
  */
 struct cros_ec_keyb {
 	unsigned int rows;
@@ -55,7 +54,6 @@ struct cros_ec_keyb {
 	struct device *dev;
 	struct input_dev *idev;
 	struct cros_ec_device *ec;
-	struct notifier_block notifier;
 };
 
 
@@ -173,22 +171,6 @@ static void cros_ec_keyb_process(struct cros_ec_keyb *ckdev,
 	input_sync(ckdev->idev);
 }
 
-static int cros_ec_keyb_open(struct input_dev *dev)
-{
-	struct cros_ec_keyb *ckdev = input_get_drvdata(dev);
-
-	return blocking_notifier_chain_register(&ckdev->ec->event_notifier,
-						&ckdev->notifier);
-}
-
-static void cros_ec_keyb_close(struct input_dev *dev)
-{
-	struct cros_ec_keyb *ckdev = input_get_drvdata(dev);
-
-	blocking_notifier_chain_unregister(&ckdev->ec->event_notifier,
-					   &ckdev->notifier);
-}
-
 static int cros_ec_keyb_get_state(struct cros_ec_keyb *ckdev, uint8_t *kb_state)
 {
 	struct cros_ec_command msg = {
@@ -203,19 +185,41 @@ static int cros_ec_keyb_get_state(struct cros_ec_keyb *ckdev, uint8_t *kb_state)
 	return ckdev->ec->cmd_xfer(ckdev->ec, &msg);
 }
 
-static int cros_ec_keyb_work(struct notifier_block *nb,
-		     unsigned long state, void *_notify)
+static irqreturn_t cros_ec_keyb_irq(int irq, void *data)
 {
+	struct cros_ec_keyb *ckdev = data;
+	struct cros_ec_device *ec = ckdev->ec;
 	int ret;
-	struct cros_ec_keyb *ckdev = container_of(nb, struct cros_ec_keyb,
-						    notifier);
 	uint8_t kb_state[ckdev->cols];
 
+	if (device_may_wakeup(ec->dev))
+		pm_wakeup_event(ec->dev, 0);
+
 	ret = cros_ec_keyb_get_state(ckdev, kb_state);
 	if (ret >= 0)
 		cros_ec_keyb_process(ckdev, kb_state, ret);
+	else
+		dev_err(ec->dev, "failed to get keyboard state: %d\n", ret);
 
-	return NOTIFY_DONE;
+	return IRQ_HANDLED;
+}
+
+static int cros_ec_keyb_open(struct input_dev *dev)
+{
+	struct cros_ec_keyb *ckdev = input_get_drvdata(dev);
+	struct cros_ec_device *ec = ckdev->ec;
+
+	return request_threaded_irq(ec->irq, NULL, cros_ec_keyb_irq,
+					IRQF_TRIGGER_LOW | IRQF_ONESHOT,
+					"cros_ec_keyb", ckdev);
+}
+
+static void cros_ec_keyb_close(struct input_dev *dev)
+{
+	struct cros_ec_keyb *ckdev = input_get_drvdata(dev);
+	struct cros_ec_device *ec = ckdev->ec;
+
+	free_irq(ec->irq, ckdev);
 }
 
 static int cros_ec_keyb_probe(struct platform_device *pdev)
@@ -246,8 +250,12 @@ static int cros_ec_keyb_probe(struct platform_device *pdev)
 	if (!idev)
 		return -ENOMEM;
 
+	if (!ec->irq) {
+		dev_err(dev, "no EC IRQ specified\n");
+		return -EINVAL;
+	}
+
 	ckdev->ec = ec;
-	ckdev->notifier.notifier_call = cros_ec_keyb_work;
 	ckdev->dev = dev;
 	dev_set_drvdata(&pdev->dev, ckdev);
 
diff --git a/drivers/mfd/cros_ec.c b/drivers/mfd/cros_ec.c
index 83e30c663578..4873f9c50452 100644
--- a/drivers/mfd/cros_ec.c
+++ b/drivers/mfd/cros_ec.c
@@ -62,18 +62,6 @@ int cros_ec_check_result(struct cros_ec_device *ec_dev,
 }
 EXPORT_SYMBOL(cros_ec_check_result);
 
-static irqreturn_t ec_irq_thread(int irq, void *data)
-{
-	struct cros_ec_device *ec_dev = data;
-
-	if (device_may_wakeup(ec_dev->dev))
-		pm_wakeup_event(ec_dev->dev, 0);
-
-	blocking_notifier_call_chain(&ec_dev->event_notifier, 1, ec_dev);
-
-	return IRQ_HANDLED;
-}
-
 static const struct mfd_cell cros_devs[] = {
 	{
 		.name = "cros-ec-keyb",
@@ -92,8 +80,6 @@ int cros_ec_register(struct cros_ec_device *ec_dev)
 	struct device *dev = ec_dev->dev;
 	int err = 0;
 
-	BLOCKING_INIT_NOTIFIER_HEAD(&ec_dev->event_notifier);
-
 	if (ec_dev->din_size) {
 		ec_dev->din = devm_kzalloc(dev, ec_dev->din_size, GFP_KERNEL);
 		if (!ec_dev->din)
@@ -105,42 +91,23 @@ int cros_ec_register(struct cros_ec_device *ec_dev)
 			return -ENOMEM;
 	}
 
-	if (!ec_dev->irq) {
-		dev_dbg(dev, "no valid IRQ: %d\n", ec_dev->irq);
-		return err;
-	}
-
-	err = request_threaded_irq(ec_dev->irq, NULL, ec_irq_thread,
-				   IRQF_TRIGGER_LOW | IRQF_ONESHOT,
-				   "chromeos-ec", ec_dev);
-	if (err) {
-		dev_err(dev, "request irq %d: error %d\n", ec_dev->irq, err);
-		return err;
-	}
-
 	err = mfd_add_devices(dev, 0, cros_devs,
 			      ARRAY_SIZE(cros_devs),
 			      NULL, ec_dev->irq, NULL);
 	if (err) {
 		dev_err(dev, "failed to add mfd devices\n");
-		goto fail_mfd;
+		return err;
 	}
 
 	dev_info(dev, "Chrome EC device registered\n");
 
 	return 0;
-
-fail_mfd:
-	free_irq(ec_dev->irq, ec_dev);
-
-	return err;
 }
 EXPORT_SYMBOL(cros_ec_register);
 
 int cros_ec_remove(struct cros_ec_device *ec_dev)
 {
 	mfd_remove_devices(ec_dev->dev);
-	free_irq(ec_dev->irq, ec_dev);
 
 	return 0;
 }
diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h
index 0ebf26fddbbb..fcbe9d129a9d 100644
--- a/include/linux/mfd/cros_ec.h
+++ b/include/linux/mfd/cros_ec.h
@@ -62,7 +62,6 @@ struct cros_ec_command {
  * @dev: Device pointer
  * @was_wake_device: true if this device was set to wake the system from
  * sleep at the last suspend
- * @event_notifier: interrupt event notifier for transport devices
  * @cmd_xfer: send command to EC and get response
  *     Returns the number of bytes received if the communication succeeded, but
  *     that doesn't mean the EC was happy with the command. The caller
@@ -93,7 +92,6 @@ struct cros_ec_device {
 	struct device *dev;
 	bool was_wake_device;
 	struct class *cros_class;
-	struct blocking_notifier_head event_notifier;
 	int (*cmd_xfer)(struct cros_ec_device *ec,
 			struct cros_ec_command *msg);
 
-- 
cgit v1.2.3


From 01a0f4aaaefff9f57bb17e6cc514c84ba43a7335 Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Wed, 2 Jul 2014 14:34:13 +0100
Subject: mfd: tps65910: Rid data size incompatibility warn when building for
 64bit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Extinguishes:

../drivers/mfd/tps65910.c: In function ‘tps65910_parse_dt’:
../drivers/mfd/tps65910.c:404:14:
	warning: cast from pointer to integer of different size

Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/tps65910.c       | 10 +++++-----
 include/linux/mfd/tps65910.h |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/tps65910.c b/drivers/mfd/tps65910.c
index f9e42ea1cb1a..f243e75d28f3 100644
--- a/drivers/mfd/tps65910.c
+++ b/drivers/mfd/tps65910.c
@@ -387,7 +387,7 @@ static const struct of_device_id tps65910_of_match[] = {
 MODULE_DEVICE_TABLE(of, tps65910_of_match);
 
 static struct tps65910_board *tps65910_parse_dt(struct i2c_client *client,
-						int *chip_id)
+						unsigned long *chip_id)
 {
 	struct device_node *np = client->dev.of_node;
 	struct tps65910_board *board_info;
@@ -401,7 +401,7 @@ static struct tps65910_board *tps65910_parse_dt(struct i2c_client *client,
 		return NULL;
 	}
 
-	*chip_id  = (int)match->data;
+	*chip_id  = (unsigned long)match->data;
 
 	board_info = devm_kzalloc(&client->dev, sizeof(*board_info),
 			GFP_KERNEL);
@@ -431,7 +431,7 @@ static struct tps65910_board *tps65910_parse_dt(struct i2c_client *client,
 #else
 static inline
 struct tps65910_board *tps65910_parse_dt(struct i2c_client *client,
-					 int *chip_id)
+					 unsigned long *chip_id)
 {
 	return NULL;
 }
@@ -453,14 +453,14 @@ static void tps65910_power_off(void)
 }
 
 static int tps65910_i2c_probe(struct i2c_client *i2c,
-					const struct i2c_device_id *id)
+			      const struct i2c_device_id *id)
 {
 	struct tps65910 *tps65910;
 	struct tps65910_board *pmic_plat_data;
 	struct tps65910_board *of_pmic_plat_data = NULL;
 	struct tps65910_platform_data *init_data;
+	unsigned long chip_id = id->driver_data;
 	int ret = 0;
-	int chip_id = id->driver_data;
 
 	pmic_plat_data = dev_get_platdata(&i2c->dev);
 
diff --git a/include/linux/mfd/tps65910.h b/include/linux/mfd/tps65910.h
index 16c2335c2856..6483a6fdce59 100644
--- a/include/linux/mfd/tps65910.h
+++ b/include/linux/mfd/tps65910.h
@@ -892,7 +892,7 @@ struct tps65910 {
 	struct device *dev;
 	struct i2c_client *i2c_client;
 	struct regmap *regmap;
-	unsigned int id;
+	unsigned long id;
 
 	/* Client devices */
 	struct tps65910_pmic *pmic;
-- 
cgit v1.2.3


From 8fe39aac0578cbb0abf27e1be70ff581e0c1d836 Mon Sep 17 00:00:00 2001
From: Philipp Reisner <philipp.reisner@linbit.com>
Date: Fri, 22 Nov 2013 13:22:13 +0100
Subject: drbd: device->ldev is not guaranteed on an D_ATTACHING disk

Some parts of the code assumed that get_ldev_if_state(device, D_ATTACHING)
is sufficient to access the ldev member of the device object. That was
wrong. ldev may not be there or might be freed at any time if the device
has a disk state of D_ATTACHING.

bm_rw()
  Documented that drbd_bm_read() is only called from drbd_adm_attach.
  drbd_bm_write() is only called when a reference is held, and it is
  documented that a caller has to hold a reference before calling
  drbd_bm_write()

drbd_bm_write_page()
  Use get_ldev() instead of get_ldev_if_state(device, D_ATTACHING)

drbd_bmio_set_n_write()
  No longer use get_ldev_if_state(device, D_ATTACHING). All callers
  hold a reference to ldev now.

drbd_bmio_clear_n_write()
  All callers where holding a reference of ldev anyways. Remove the
  misleading get_ldev_if_state(device, D_ATTACHING)

drbd_reconsider_max_bio_size()
  Removed the get_ldev_if_state(device, D_ATTACHING). All callers
  now pass a struct drbd_backing_dev* when they have a proper
  reference, or a NULL pointer.
  Before this fix, the receiver could trigger a NULL pointer
  deref when in drbd_reconsider_max_bio_size()

drbd_bump_write_ordering()
  Used get_ldev_if_state(device, D_ATTACHING) with the wrong assumption.
  Remove it, and allow the caller to pass in a struct drbd_backing_dev*
  when the caller knows that accessing this bdev is safe.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_bitmap.c   |  4 +++-
 drivers/block/drbd/drbd_int.h      |  9 ++++----
 drivers/block/drbd/drbd_main.c     | 36 +++++++++++++------------------
 drivers/block/drbd/drbd_nl.c       | 41 +++++++++++++++++++++++-------------
 drivers/block/drbd/drbd_receiver.c | 43 ++++++++++++++++++++++++++------------
 include/linux/drbd.h               |  2 +-
 6 files changed, 79 insertions(+), 56 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index 1aa29f8fdfe1..ed310415020b 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -1085,6 +1085,8 @@ static int bm_rw(struct drbd_device *device, int rw, unsigned flags, unsigned la
 		kfree(ctx);
 		return -ENODEV;
 	}
+	/* Here D_ATTACHING is sufficient since drbd_bm_read() is called only from
+	   drbd_adm_attach(), after device->ldev was assigned. */
 
 	if (!ctx->flags)
 		WARN_ON(!(BM_LOCKED_MASK & b->bm_flags));
@@ -1260,7 +1262,7 @@ int drbd_bm_write_page(struct drbd_device *device, unsigned int idx) __must_hold
 		.kref = { ATOMIC_INIT(2) },
 	};
 
-	if (!get_ldev_if_state(device, D_ATTACHING)) {  /* put is in bm_aio_ctx_destroy() */
+	if (!get_ldev(device)) {  /* put is in bm_aio_ctx_destroy() */
 		drbd_err(device, "ASSERT FAILED: get_ldev_if_state() == 1 in drbd_bm_write_page()\n");
 		kfree(ctx);
 		return -ENODEV;
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 1ef2474e8f11..c87bc8e8fd82 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -984,8 +984,8 @@ extern int drbd_bitmap_io(struct drbd_device *device,
 extern int drbd_bitmap_io_from_worker(struct drbd_device *device,
 		int (*io_fn)(struct drbd_device *),
 		char *why, enum bm_flag flags);
-extern int drbd_bmio_set_n_write(struct drbd_device *device);
-extern int drbd_bmio_clear_n_write(struct drbd_device *device);
+extern int drbd_bmio_set_n_write(struct drbd_device *device) __must_hold(local);
+extern int drbd_bmio_clear_n_write(struct drbd_device *device) __must_hold(local);
 extern void drbd_ldev_destroy(struct drbd_device *device);
 
 /* Meta data layout
@@ -1313,7 +1313,7 @@ enum determine_dev_size {
 extern enum determine_dev_size
 drbd_determine_dev_size(struct drbd_device *, enum dds_flags, struct resize_parms *) __must_hold(local);
 extern void resync_after_online_grow(struct drbd_device *);
-extern void drbd_reconsider_max_bio_size(struct drbd_device *device);
+extern void drbd_reconsider_max_bio_size(struct drbd_device *device, struct drbd_backing_dev *bdev);
 extern enum drbd_state_rv drbd_set_role(struct drbd_device *device,
 					enum drbd_role new_role,
 					int force);
@@ -1479,7 +1479,8 @@ static inline void drbd_generic_make_request(struct drbd_device *device,
 		generic_make_request(bio);
 }
 
-void drbd_bump_write_ordering(struct drbd_resource *resource, enum write_ordering_e wo);
+void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backing_dev *bdev,
+			      enum write_ordering_e wo);
 
 /* drbd_proc.c */
 extern struct proc_dir_entry *drbd_proc;
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 17b9a237f2e6..a6af93528d57 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -3466,23 +3466,19 @@ void drbd_uuid_set_bm(struct drbd_device *device, u64 val) __must_hold(local)
  *
  * Sets all bits in the bitmap and writes the whole bitmap to stable storage.
  */
-int drbd_bmio_set_n_write(struct drbd_device *device)
+int drbd_bmio_set_n_write(struct drbd_device *device) __must_hold(local)
 {
 	int rv = -EIO;
 
-	if (get_ldev_if_state(device, D_ATTACHING)) {
-		drbd_md_set_flag(device, MDF_FULL_SYNC);
-		drbd_md_sync(device);
-		drbd_bm_set_all(device);
-
-		rv = drbd_bm_write(device);
+	drbd_md_set_flag(device, MDF_FULL_SYNC);
+	drbd_md_sync(device);
+	drbd_bm_set_all(device);
 
-		if (!rv) {
-			drbd_md_clear_flag(device, MDF_FULL_SYNC);
-			drbd_md_sync(device);
-		}
+	rv = drbd_bm_write(device);
 
-		put_ldev(device);
+	if (!rv) {
+		drbd_md_clear_flag(device, MDF_FULL_SYNC);
+		drbd_md_sync(device);
 	}
 
 	return rv;
@@ -3494,18 +3490,11 @@ int drbd_bmio_set_n_write(struct drbd_device *device)
  *
  * Clears all bits in the bitmap and writes the whole bitmap to stable storage.
  */
-int drbd_bmio_clear_n_write(struct drbd_device *device)
+int drbd_bmio_clear_n_write(struct drbd_device *device) __must_hold(local)
 {
-	int rv = -EIO;
-
 	drbd_resume_al(device);
-	if (get_ldev_if_state(device, D_ATTACHING)) {
-		drbd_bm_clear_all(device);
-		rv = drbd_bm_write(device);
-		put_ldev(device);
-	}
-
-	return rv;
+	drbd_bm_clear_all(device);
+	return drbd_bm_write(device);
 }
 
 static int w_bitmap_io(struct drbd_work *w, int unused)
@@ -3603,6 +3592,9 @@ static int w_go_diskless(struct drbd_work *w, int unused)
  * that drbd_set_out_of_sync() can not be called. This function MAY ONLY be
  * called from worker context. It MUST NOT be used while a previous such
  * work is still pending!
+ *
+ * Its worker function encloses the call of io_fn() by get_ldev() and
+ * put_ldev().
  */
 void drbd_queue_bitmap_io(struct drbd_device *device,
 			  int (*io_fn)(struct drbd_device *),
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 43fad2c1ba01..25f4b6f67c21 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1110,15 +1110,16 @@ static int drbd_check_al_size(struct drbd_device *device, struct disk_conf *dc)
 	return 0;
 }
 
-static void drbd_setup_queue_param(struct drbd_device *device, unsigned int max_bio_size)
+static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backing_dev *bdev,
+				   unsigned int max_bio_size)
 {
 	struct request_queue * const q = device->rq_queue;
 	unsigned int max_hw_sectors = max_bio_size >> 9;
 	unsigned int max_segments = 0;
 	struct request_queue *b = NULL;
 
-	if (get_ldev_if_state(device, D_ATTACHING)) {
-		b = device->ldev->backing_bdev->bd_disk->queue;
+	if (bdev) {
+		b = bdev->backing_bdev->bd_disk->queue;
 
 		max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9);
 		rcu_read_lock();
@@ -1163,11 +1164,10 @@ static void drbd_setup_queue_param(struct drbd_device *device, unsigned int max_
 				 b->backing_dev_info.ra_pages);
 			q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages;
 		}
-		put_ldev(device);
 	}
 }
 
-void drbd_reconsider_max_bio_size(struct drbd_device *device)
+void drbd_reconsider_max_bio_size(struct drbd_device *device, struct drbd_backing_dev *bdev)
 {
 	unsigned int now, new, local, peer;
 
@@ -1175,10 +1175,9 @@ void drbd_reconsider_max_bio_size(struct drbd_device *device)
 	local = device->local_max_bio_size; /* Eventually last known value, from volatile memory */
 	peer = device->peer_max_bio_size; /* Eventually last known value, from meta data */
 
-	if (get_ldev_if_state(device, D_ATTACHING)) {
-		local = queue_max_hw_sectors(device->ldev->backing_bdev->bd_disk->queue) << 9;
+	if (bdev) {
+		local = queue_max_hw_sectors(bdev->backing_bdev->bd_disk->queue) << 9;
 		device->local_max_bio_size = local;
-		put_ldev(device);
 	}
 	local = min(local, DRBD_MAX_BIO_SIZE);
 
@@ -1211,7 +1210,7 @@ void drbd_reconsider_max_bio_size(struct drbd_device *device)
 	if (new != now)
 		drbd_info(device, "max BIO size = %u\n", new);
 
-	drbd_setup_queue_param(device, new);
+	drbd_setup_queue_param(device, bdev, new);
 }
 
 /* Starts the worker thread */
@@ -1399,7 +1398,7 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
 	else
 		set_bit(MD_NO_FUA, &device->flags);
 
-	drbd_bump_write_ordering(device->resource, WO_bdev_flush);
+	drbd_bump_write_ordering(device->resource, NULL, WO_bdev_flush);
 
 	drbd_md_sync(device);
 
@@ -1704,7 +1703,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 	new_disk_conf = NULL;
 	new_plan = NULL;
 
-	drbd_bump_write_ordering(device->resource, WO_bdev_flush);
+	drbd_bump_write_ordering(device->resource, device->ldev, WO_bdev_flush);
 
 	if (drbd_md_test_flag(device->ldev, MDF_CRASHED_PRIMARY))
 		set_bit(CRASHED_PRIMARY, &device->flags);
@@ -1720,7 +1719,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 	device->read_cnt = 0;
 	device->writ_cnt = 0;
 
-	drbd_reconsider_max_bio_size(device);
+	drbd_reconsider_max_bio_size(device, device->ldev);
 
 	/* If I am currently not R_PRIMARY,
 	 * but meta data primary indicator is set,
@@ -2648,8 +2647,13 @@ int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info)
 	if (retcode != NO_ERROR)
 		goto out;
 
-	mutex_lock(&adm_ctx.resource->adm_mutex);
 	device = adm_ctx.device;
+	if (!get_ldev(device)) {
+		retcode = ERR_NO_DISK;
+		goto out;
+	}
+
+	mutex_lock(&adm_ctx.resource->adm_mutex);
 
 	/* If there is still bitmap IO pending, probably because of a previous
 	 * resync just being finished, wait for it before requesting a new resync.
@@ -2673,6 +2677,7 @@ int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info)
 		retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_T));
 	drbd_resume_io(device);
 	mutex_unlock(&adm_ctx.resource->adm_mutex);
+	put_ldev(device);
 out:
 	drbd_adm_finish(&adm_ctx, info, retcode);
 	return 0;
@@ -2698,7 +2703,7 @@ out:
 	return 0;
 }
 
-static int drbd_bmio_set_susp_al(struct drbd_device *device)
+static int drbd_bmio_set_susp_al(struct drbd_device *device) __must_hold(local)
 {
 	int rv;
 
@@ -2719,8 +2724,13 @@ int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info)
 	if (retcode != NO_ERROR)
 		goto out;
 
-	mutex_lock(&adm_ctx.resource->adm_mutex);
 	device = adm_ctx.device;
+	if (!get_ldev(device)) {
+		retcode = ERR_NO_DISK;
+		goto out;
+	}
+
+	mutex_lock(&adm_ctx.resource->adm_mutex);
 
 	/* If there is still bitmap IO pending, probably because of a previous
 	 * resync just being finished, wait for it before requesting a new resync.
@@ -2747,6 +2757,7 @@ int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info)
 		retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_S));
 	drbd_resume_io(device);
 	mutex_unlock(&adm_ctx.resource->adm_mutex);
+	put_ldev(device);
 out:
 	drbd_adm_finish(&adm_ctx, info, retcode);
 	return 0;
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index c7084188c2ae..be0c3761cdc6 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -1168,7 +1168,7 @@ static void drbd_flush(struct drbd_connection *connection)
 				/* would rather check on EOPNOTSUPP, but that is not reliable.
 				 * don't try again for ANY return value != 0
 				 * if (rv == -EOPNOTSUPP) */
-				drbd_bump_write_ordering(connection->resource, WO_drain_io);
+				drbd_bump_write_ordering(connection->resource, NULL, WO_drain_io);
 			}
 			put_ldev(device);
 			kref_put(&device->kref, drbd_destroy_device);
@@ -1257,14 +1257,29 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *connectio
 	return rv;
 }
 
+static enum write_ordering_e
+max_allowed_wo(struct drbd_backing_dev *bdev, enum write_ordering_e wo)
+{
+	struct disk_conf *dc;
+
+	dc = rcu_dereference(bdev->disk_conf);
+
+	if (wo == WO_bdev_flush && !dc->disk_flushes)
+		wo = WO_drain_io;
+	if (wo == WO_drain_io && !dc->disk_drain)
+		wo = WO_none;
+
+	return wo;
+}
+
 /**
  * drbd_bump_write_ordering() - Fall back to an other write ordering method
  * @connection:	DRBD connection.
  * @wo:		Write ordering method to try.
  */
-void drbd_bump_write_ordering(struct drbd_resource *resource, enum write_ordering_e wo)
+void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backing_dev *bdev,
+			      enum write_ordering_e wo)
 {
-	struct disk_conf *dc;
 	struct drbd_device *device;
 	enum write_ordering_e pwo;
 	int vnr;
@@ -1278,17 +1293,18 @@ void drbd_bump_write_ordering(struct drbd_resource *resource, enum write_orderin
 	wo = min(pwo, wo);
 	rcu_read_lock();
 	idr_for_each_entry(&resource->devices, device, vnr) {
-		if (!get_ldev_if_state(device, D_ATTACHING))
-			continue;
-		dc = rcu_dereference(device->ldev->disk_conf);
-
-		if (wo == WO_bdev_flush && !dc->disk_flushes)
-			wo = WO_drain_io;
-		if (wo == WO_drain_io && !dc->disk_drain)
-			wo = WO_none;
-		put_ldev(device);
+		if (get_ldev(device)) {
+			wo = max_allowed_wo(device->ldev, wo);
+			if (device->ldev == bdev)
+				bdev = NULL;
+			put_ldev(device);
+		}
 	}
 	rcu_read_unlock();
+
+	if (bdev)
+		wo = max_allowed_wo(bdev, wo);
+
 	resource->write_ordering = wo;
 	if (pwo != resource->write_ordering || wo == WO_bdev_flush)
 		drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]);
@@ -3709,7 +3725,6 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info
 	}
 
 	device->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
-	drbd_reconsider_max_bio_size(device);
 	/* Leave drbd_reconsider_max_bio_size() before drbd_determine_dev_size().
 	   In case we cleared the QUEUE_FLAG_DISCARD from our queue in
 	   drbd_reconsider_max_bio_size(), we can be sure that after
@@ -3717,6 +3732,7 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info
 
 	ddsf = be16_to_cpu(p->dds_flags);
 	if (get_ldev(device)) {
+		drbd_reconsider_max_bio_size(device, device->ldev);
 		dd = drbd_determine_dev_size(device, ddsf, NULL);
 		put_ldev(device);
 		if (dd == DS_ERROR)
@@ -3724,6 +3740,7 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info
 		drbd_md_sync(device);
 	} else {
 		/* I am diskless, need to accept the peer's size. */
+		drbd_reconsider_max_bio_size(device, NULL);
 		drbd_set_my_capacity(device, p_size);
 	}
 
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 3dbe9bd57a09..20ec8903b1e4 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -245,7 +245,7 @@ enum drbd_disk_state {
 	D_DISKLESS,
 	D_ATTACHING,      /* In the process of reading the meta-data */
 	D_FAILED,         /* Becomes D_DISKLESS as soon as we told it the peer */
-			/* when >= D_FAILED it is legal to access mdev->bc */
+			  /* when >= D_FAILED it is legal to access mdev->ldev */
 	D_NEGOTIATING,    /* Late attaching state, we need to talk to the peer */
 	D_INCONSISTENT,
 	D_OUTDATED,
-- 
cgit v1.2.3


From aaaba34576407857f6146ff6c330f06e63fb2bf2 Mon Sep 17 00:00:00 2001
From: Lars Ellenberg <lars.ellenberg@linbit.com>
Date: Tue, 18 Mar 2014 12:30:09 +0100
Subject: drbd: implement csums-after-crash-only

Checksum based resync trades CPU cycles for network bandwidth,
in situations where we expect much of the to-be-resynced blocks
to be actually identical on both sides already.

In a "network hickup" scenario, it won't help:
all to-be-resynced blocks will typically be different.

The use case is for the resync of *potentially* different blocks
after crash recovery -- the crash recovery had marked larger areas
(those covered by the activity log) as need-to-be-resynced,
just in case. Most of those blocks will be identical.

This option makes it possible to configure checksum based resync,
but only actually use it for the first resync after primary crash.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_int.h      |  2 ++
 drivers/block/drbd/drbd_receiver.c |  2 ++
 drivers/block/drbd/drbd_worker.c   | 24 ++++++++++++++++++++----
 include/linux/drbd_genl.h          |  3 +++
 include/linux/drbd_limits.h        |  1 +
 5 files changed, 28 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index abf5aefd9790..fe6595a96a9a 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -738,6 +738,8 @@ struct drbd_device {
 	struct rb_root read_requests;
 	struct rb_root write_requests;
 
+	/* use checksums for *this* resync */
+	bool use_csums;
 	/* blocks to resync in this run [unit BM_BLOCK_SIZE] */
 	unsigned long rs_total;
 	/* number of resync blocks that failed in this run */
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 5626c5babc3f..d326af67c27e 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -2555,6 +2555,8 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet
 			peer_req->w.cb = w_e_end_csum_rs_req;
 			/* used in the sector offset progress display */
 			device->bm_resync_fo = BM_SECT_TO_BIT(sector);
+			/* remember to report stats in drbd_resync_finished */
+			device->use_csums = true;
 		} else if (pi->cmd == P_OV_REPLY) {
 			/* track progress, we may need to throttle */
 			atomic_add(size >> 9, &device->rs_sect_in);
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 2ff5fd49a3b1..6532a697cf49 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -698,8 +698,8 @@ next_sector:
 		/* adjust very last sectors, in case we are oddly sized */
 		if (sector + (size>>9) > capacity)
 			size = (capacity-sector)<<9;
-		if (connection->agreed_pro_version >= 89 &&
-		    connection->csums_tfm) {
+
+		if (device->use_csums) {
 			switch (read_for_csum(peer_device, sector, size)) {
 			case -EIO: /* Disk failure */
 				put_ldev(device);
@@ -913,7 +913,7 @@ int drbd_resync_finished(struct drbd_device *device)
 		if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T)
 			khelper_cmd = "after-resync-target";
 
-		if (first_peer_device(device)->connection->csums_tfm && device->rs_total) {
+		if (device->use_csums && device->rs_total) {
 			const unsigned long s = device->rs_same_csum;
 			const unsigned long t = device->rs_total;
 			const int ratio =
@@ -1622,6 +1622,18 @@ static void do_start_resync(struct drbd_device *device)
 	clear_bit(AHEAD_TO_SYNC_SOURCE, &device->flags);
 }
 
+static bool use_checksum_based_resync(struct drbd_connection *connection, struct drbd_device *device)
+{
+	bool csums_after_crash_only;
+	rcu_read_lock();
+	csums_after_crash_only = rcu_dereference(connection->net_conf)->csums_after_crash_only;
+	rcu_read_unlock();
+	return connection->agreed_pro_version >= 89 &&		/* supported? */
+		connection->csums_tfm &&			/* configured? */
+		(csums_after_crash_only == 0			/* use for each resync? */
+		 || test_bit(CRASHED_PRIMARY, &device->flags));	/* or only after Primary crash? */
+}
+
 /**
  * drbd_start_resync() - Start the resync process
  * @device:	DRBD device.
@@ -1756,8 +1768,12 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
 		     drbd_conn_str(ns.conn),
 		     (unsigned long) device->rs_total << (BM_BLOCK_SHIFT-10),
 		     (unsigned long) device->rs_total);
-		if (side == C_SYNC_TARGET)
+		if (side == C_SYNC_TARGET) {
 			device->bm_resync_fo = 0;
+			device->use_csums = use_checksum_based_resync(connection, device);
+		} else {
+			device->use_csums = 0;
+		}
 
 		/* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid
 		 * with w_send_oos, or the sync target will get confused as to
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index 4193f5f2636c..71fc924c53fa 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -171,6 +171,9 @@ GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf,
 	__flg_field(28, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT,	tentative)
 	__flg_field_def(29,	DRBD_GENLA_F_MANDATORY,	use_rle, DRBD_USE_RLE_DEF)
 	/* 9: __u32_field_def(30,	DRBD_GENLA_F_MANDATORY,	fencing_policy, DRBD_FENCING_DEF) */
+	/* 9: __str_field_def(31,     DRBD_GENLA_F_MANDATORY, name, SHARED_SECRET_MAX) */
+	/* 9: __u32_field(32,         DRBD_F_REQUIRED | DRBD_F_INVARIANT,     peer_node_id) */
+	__flg_field_def(33, 0 /* OPTIONAL */,	csums_after_crash_only, DRBD_CSUMS_AFTER_CRASH_ONLY_DEF)
 )
 
 GENL_struct(DRBD_NLA_SET_ROLE_PARMS, 6, set_role_parms,
diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index 17e50bb00521..9d2df1d51414 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -214,6 +214,7 @@
 #define DRBD_ALLOW_TWO_PRIMARIES_DEF	0
 #define DRBD_ALWAYS_ASBP_DEF	0
 #define DRBD_USE_RLE_DEF	1
+#define DRBD_CSUMS_AFTER_CRASH_ONLY_DEF 0
 
 #define DRBD_AL_STRIPES_MIN     1
 #define DRBD_AL_STRIPES_MAX     1024
-- 
cgit v1.2.3


From 5d0b17f1a29e8189d04aef447a3a53cfd05529b2 Mon Sep 17 00:00:00 2001
From: Philipp Reisner <philipp.reisner@linbit.com>
Date: Tue, 18 Mar 2014 14:24:35 +0100
Subject: drbd: New net configuration option socket-check-timeout

In setups involving a DRBD-proxy and connections that experience a lot of
buffer-bloat it might be necessary to set ping-timeout to an
unusual high value. By default DRBD uses the same value to wait if a newly
established TCP-connection is stable. Since the DRBD-proxy is usually located
in the same data center such a long wait time may hinder DRBD's connect process.

In such setups socket-check-timeout should be set to
at least to the round trip time between DRBD and DRBD-proxy. I.e. in most
cases to 1.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_receiver.c | 46 +++++++++++++++++++++++++-------------
 include/linux/drbd_genl.h          |  1 +
 include/linux/drbd_limits.h        |  5 +++++
 3 files changed, 36 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 7da83f3a61eb..b89e6fb468c6 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -819,7 +819,7 @@ static int receive_first_packet(struct drbd_connection *connection, struct socke
  * drbd_socket_okay() - Free the socket if its connection is not okay
  * @sock:	pointer to the pointer to the socket.
  */
-static int drbd_socket_okay(struct socket **sock)
+static bool drbd_socket_okay(struct socket **sock)
 {
 	int rr;
 	char tb[4];
@@ -837,6 +837,30 @@ static int drbd_socket_okay(struct socket **sock)
 		return false;
 	}
 }
+
+static bool connection_established(struct drbd_connection *connection,
+				   struct socket **sock1,
+				   struct socket **sock2)
+{
+	struct net_conf *nc;
+	int timeout;
+	bool ok;
+
+	if (!*sock1 || !*sock2)
+		return false;
+
+	rcu_read_lock();
+	nc = rcu_dereference(connection->net_conf);
+	timeout = (nc->sock_check_timeo ?: nc->ping_timeo) * HZ / 10;
+	rcu_read_unlock();
+	schedule_timeout_interruptible(timeout);
+
+	ok = drbd_socket_okay(sock1);
+	ok = drbd_socket_okay(sock2) && ok;
+
+	return ok;
+}
+
 /* Gets called if a connection is established, or if a new minor gets created
    in a connection */
 int drbd_connected(struct drbd_peer_device *peer_device)
@@ -878,8 +902,8 @@ static int conn_connect(struct drbd_connection *connection)
 	struct drbd_socket sock, msock;
 	struct drbd_peer_device *peer_device;
 	struct net_conf *nc;
-	int vnr, timeout, h, ok;
-	bool discard_my_data;
+	int vnr, timeout, h;
+	bool discard_my_data, ok;
 	enum drbd_state_rv rv;
 	struct accept_wait_data ad = {
 		.connection = connection,
@@ -923,17 +947,8 @@ static int conn_connect(struct drbd_connection *connection)
 			}
 		}
 
-		if (sock.socket && msock.socket) {
-			rcu_read_lock();
-			nc = rcu_dereference(connection->net_conf);
-			timeout = nc->ping_timeo * HZ / 10;
-			rcu_read_unlock();
-			schedule_timeout_interruptible(timeout);
-			ok = drbd_socket_okay(&sock.socket);
-			ok = drbd_socket_okay(&msock.socket) && ok;
-			if (ok)
-				break;
-		}
+		if (connection_established(connection, &sock.socket, &msock.socket))
+			break;
 
 retry:
 		s = drbd_wait_for_connect(connection, &ad);
@@ -979,8 +994,7 @@ randomize:
 				goto out_release_sockets;
 		}
 
-		ok = drbd_socket_okay(&sock.socket);
-		ok = drbd_socket_okay(&msock.socket) && ok;
+		ok = connection_established(connection, &sock.socket, &msock.socket);
 	} while (!ok);
 
 	if (ad.s_listen)
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index 71fc924c53fa..7b131ed8f9c6 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -174,6 +174,7 @@ GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf,
 	/* 9: __str_field_def(31,     DRBD_GENLA_F_MANDATORY, name, SHARED_SECRET_MAX) */
 	/* 9: __u32_field(32,         DRBD_F_REQUIRED | DRBD_F_INVARIANT,     peer_node_id) */
 	__flg_field_def(33, 0 /* OPTIONAL */,	csums_after_crash_only, DRBD_CSUMS_AFTER_CRASH_ONLY_DEF)
+	__u32_field_def(34, 0 /* OPTIONAL */, sock_check_timeo, DRBD_SOCKET_CHECK_TIMEO_DEF)
 )
 
 GENL_struct(DRBD_NLA_SET_ROLE_PARMS, 6, set_role_parms,
diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index 9d2df1d51414..8ac8c5d9a3ad 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -225,4 +225,9 @@
 #define DRBD_AL_STRIPE_SIZE_MAX   16777216
 #define DRBD_AL_STRIPE_SIZE_DEF   32
 #define DRBD_AL_STRIPE_SIZE_SCALE 'k' /* kilobytes */
+
+#define DRBD_SOCKET_CHECK_TIMEO_MIN 0
+#define DRBD_SOCKET_CHECK_TIMEO_MAX DRBD_PING_TIMEO_MAX
+#define DRBD_SOCKET_CHECK_TIMEO_DEF 0
+#define DRBD_SOCKET_CHECK_TIMEO_SCALE '1'
 #endif
-- 
cgit v1.2.3


From bf0d6e4a1138e71cafdbbb99cde430eee50c4ff1 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Tue, 6 May 2014 14:28:32 +0300
Subject: drbd: silence underflow warning in read_in_block()

My static checker warns that "data_size" could be negative and underflow
the limit check.  The code looks suspicious but I don't know if it is a
real bug.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_receiver.c | 2 +-
 include/linux/drbd.h               | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index f972988291c5..9342b8da73ab 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -1592,7 +1592,7 @@ read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
 	struct drbd_peer_request *peer_req;
 	struct page *page;
 	int dgs, ds, err;
-	int data_size = pi->size;
+	unsigned int data_size = pi->size;
 	void *dig_in = peer_device->connection->int_dig_in;
 	void *dig_vv = peer_device->connection->int_dig_vv;
 	unsigned long *data;
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 20ec8903b1e4..debb70d40547 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -52,7 +52,7 @@
 #endif
 
 extern const char *drbd_buildtag(void);
-#define REL_VERSION "8.4.3"
+#define REL_VERSION "8.4.5"
 #define API_VERSION 1
 #define PRO_VERSION_MIN 86
 #define PRO_VERSION_MAX 101
-- 
cgit v1.2.3


From 2243a87d90b42eb38bc281957df3e57c712b5e56 Mon Sep 17 00:00:00 2001
From: Fan Wu <fwu@marvell.com>
Date: Mon, 9 Jun 2014 09:37:56 +0800
Subject: pinctrl: avoid duplicated calling enable_pinmux_setting for a pin

What the patch does:
1. Call pinmux_disable_setting ahead of pinmux_enable_setting
  each time pinctrl_select_state is called
2. Remove the HW disable operation in pinmux_disable_setting function.
3. Remove the disable ops in struct pinmux_ops
4. Remove all the disable ops users in current code base.

Notes:
1. Great thanks for the suggestion from Linus, Tony Lindgren and
   Stephen Warren and Everyone that shared comments on this patch.
2. The patch also includes comment fixes from Stephen Warren.

The reason why we do this:
1. To avoid duplicated calling of the enable_setting operation
   without disabling operation inbetween which will let the pin
   descriptor desc->mux_usecount increase monotonously.
2. The HW pin disable operation is not useful for any of the
   existing platforms.
   And this can be used to avoid the HW glitch after using the
   item #1 modification.

In the following case, the issue can be reproduced:
1. There is a driver that need to switch pin state dynamically,
   e.g. between "sleep" and "default" state
2. The pin setting configuration in a DTS node may be like this:

  component a {
	pinctrl-names = "default", "sleep";
	pinctrl-0 = <&a_grp_setting &c_grp_setting>;
	pinctrl-1 = <&b_grp_setting &c_grp_setting>;
  }

  The "c_grp_setting" config node is totally identical, maybe like
  following one:

  c_grp_setting: c_grp_setting {
	pinctrl-single,pins = <GPIO48 AF6>;
  }

3. When switching the pin state in the following official pinctrl
   sequence:
	pin = pinctrl_get();
	state = pinctrl_lookup_state(wanted_state);
	pinctrl_select_state(state);
	pinctrl_put();

Test Result:
1. The switch is completed as expected, that is: the device's
   pin configuration is changed according to the description in the
   "wanted_state" group setting
2. The "desc->mux_usecount" of the corresponding pins in "c_group"
   is increased without being decreased, because the "desc" is for
   each physical pin while the setting is for each setting node
   in the DTS.
   Thus, if the "c_grp_setting" in pinctrl-0 is not disabled ahead
   of enabling "c_grp_setting" in pinctrl-1, the desc->mux_usecount
   will keep increasing without any chance to be decreased.

According to the comments in the original code, only the setting,
in old state but not in new state, will be "disabled" (calling
pinmux_disable_setting), which is correct logic but not intact. We
still need consider case that the setting is in both old state
and new state. We can do this in the following two ways:

1. Avoid to "enable"(calling pinmux_enable_setting) the "same pin
   setting" repeatedly
2. "Disable"(calling pinmux_disable_setting) the "same pin setting",
   actually two setting instances, ahead of enabling them.

Analysis:
1. The solution #2 is better because it can avoid too much
   iteration.
2. If we disable all of the settings in the old state and one of
   the setting(s) exist in the new state, the pins mux function
   change may happen when some SoC vendors defined the
   "pinctrl-single,function-off"
   in their DTS file.
   old_setting => disabled_setting => new_setting.
3. In the pinmux framework, when a pin state is switched, the
   setting in the old state should be marked as "disabled".

Conclusion:
1. To Remove the HW disabling operation to above the glitch mentioned
   above.
2. Handle the issue mentioned above by disabling all of the settings
   in old state and then enable the all of the settings in new state.

Signed-off-by: Fan Wu <fwu@marvell.com>
Acked-by: Stephen Warren <swarren@nvidia.com>
Acked-by: Patrice Chotard <patrice.chotard@st.com>
Acked-by: Heiko Stuebner <heiko@sntech.de>
Acked-by: Maxime Coquelin <maxime.coquelin@st.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/core.c                | 24 +++------------
 drivers/pinctrl/pinctrl-abx500.c      | 15 ---------
 drivers/pinctrl/pinctrl-adi2.c        | 30 ------------------
 drivers/pinctrl/pinctrl-at91.c        | 21 -------------
 drivers/pinctrl/pinctrl-bcm2835.c     | 11 -------
 drivers/pinctrl/pinctrl-exynos5440.c  |  8 -----
 drivers/pinctrl/pinctrl-msm.c         | 25 ---------------
 drivers/pinctrl/pinctrl-nomadik.c     | 16 ----------
 drivers/pinctrl/pinctrl-rockchip.c    | 18 -----------
 drivers/pinctrl/pinctrl-samsung.c     |  8 -----
 drivers/pinctrl/pinctrl-single.c      | 56 ---------------------------------
 drivers/pinctrl/pinctrl-st.c          |  6 ----
 drivers/pinctrl/pinctrl-tb10x.c       | 17 ----------
 drivers/pinctrl/pinctrl-tegra.c       | 13 --------
 drivers/pinctrl/pinctrl-tz1090-pdc.c  | 28 -----------------
 drivers/pinctrl/pinctrl-tz1090.c      | 58 -----------------------------------
 drivers/pinctrl/pinctrl-u300.c        | 14 ---------
 drivers/pinctrl/pinmux.c              |  4 ---
 drivers/pinctrl/sh-pfc/pinctrl.c      | 22 -------------
 drivers/pinctrl/sirf/pinctrl-sirf.c   | 10 ------
 drivers/pinctrl/spear/pinctrl-spear.c |  7 -----
 drivers/pinctrl/vt8500/pinctrl-wmt.c  | 12 --------
 include/linux/pinctrl/pinmux.h        |  2 --
 23 files changed, 5 insertions(+), 420 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c
index e09474ecde23..e4f65510c87e 100644
--- a/drivers/pinctrl/core.c
+++ b/drivers/pinctrl/core.c
@@ -992,29 +992,15 @@ int pinctrl_select_state(struct pinctrl *p, struct pinctrl_state *state)
 
 	if (p->state) {
 		/*
-		 * The set of groups with a mux configuration in the old state
-		 * may not be identical to the set of groups with a mux setting
-		 * in the new state. While this might be unusual, it's entirely
-		 * possible for the "user"-supplied mapping table to be written
-		 * that way. For each group that was configured in the old state
-		 * but not in the new state, this code puts that group into a
-		 * safe/disabled state.
+		 * For each pinmux setting in the old state, forget SW's record
+		 * of mux owner for that pingroup. Any pingroups which are
+		 * still owned by the new state will be re-acquired by the call
+		 * to pinmux_enable_setting() in the loop below.
 		 */
 		list_for_each_entry(setting, &p->state->settings, node) {
-			bool found = false;
 			if (setting->type != PIN_MAP_TYPE_MUX_GROUP)
 				continue;
-			list_for_each_entry(setting2, &state->settings, node) {
-				if (setting2->type != PIN_MAP_TYPE_MUX_GROUP)
-					continue;
-				if (setting2->data.mux.group ==
-						setting->data.mux.group) {
-					found = true;
-					break;
-				}
-			}
-			if (!found)
-				pinmux_disable_setting(setting);
+			pinmux_disable_setting(setting);
 		}
 	}
 
diff --git a/drivers/pinctrl/pinctrl-abx500.c b/drivers/pinctrl/pinctrl-abx500.c
index 163da9c3ea0e..f3f8b24efe54 100644
--- a/drivers/pinctrl/pinctrl-abx500.c
+++ b/drivers/pinctrl/pinctrl-abx500.c
@@ -737,20 +737,6 @@ static int abx500_pmx_enable(struct pinctrl_dev *pctldev, unsigned function,
 	return ret;
 }
 
-static void abx500_pmx_disable(struct pinctrl_dev *pctldev,
-			       unsigned function, unsigned group)
-{
-	struct abx500_pinctrl *pct = pinctrl_dev_get_drvdata(pctldev);
-	const struct abx500_pingroup *g;
-
-	g = &pct->soc->groups[group];
-	if (g->altsetting < 0)
-		return;
-
-	/* FIXME: poke out the mux, set the pin to some default state? */
-	dev_dbg(pct->dev, "disable group %s, %u pins\n", g->name, g->npins);
-}
-
 static int abx500_gpio_request_enable(struct pinctrl_dev *pctldev,
 			       struct pinctrl_gpio_range *range,
 			       unsigned offset)
@@ -799,7 +785,6 @@ static const struct pinmux_ops abx500_pinmux_ops = {
 	.get_function_name = abx500_pmx_get_func_name,
 	.get_function_groups = abx500_pmx_get_func_groups,
 	.enable = abx500_pmx_enable,
-	.disable = abx500_pmx_disable,
 	.gpio_request_enable = abx500_gpio_request_enable,
 	.gpio_disable_free = abx500_gpio_disable_free,
 };
diff --git a/drivers/pinctrl/pinctrl-adi2.c b/drivers/pinctrl/pinctrl-adi2.c
index 5c44feb54ebb..b02ee4f882c0 100644
--- a/drivers/pinctrl/pinctrl-adi2.c
+++ b/drivers/pinctrl/pinctrl-adi2.c
@@ -652,35 +652,6 @@ static int adi_pinmux_enable(struct pinctrl_dev *pctldev, unsigned func_id,
 	return 0;
 }
 
-static void adi_pinmux_disable(struct pinctrl_dev *pctldev, unsigned func_id,
-	unsigned group_id)
-{
-	struct adi_pinctrl *pinctrl = pinctrl_dev_get_drvdata(pctldev);
-	struct gpio_port *port;
-	struct pinctrl_gpio_range *range;
-	unsigned long flags;
-	unsigned short *mux, pin;
-
-	mux = (unsigned short *)pinctrl->soc->groups[group_id].mux;
-
-	while (*mux) {
-		pin = P_IDENT(*mux);
-
-		range = pinctrl_find_gpio_range_from_pin(pctldev, pin);
-		if (range == NULL) /* should not happen */
-			return;
-
-		port = container_of(range->gc, struct gpio_port, chip);
-
-		spin_lock_irqsave(&port->lock, flags);
-
-		port_setup(port, pin_to_offset(range, pin), true);
-		mux++;
-
-		spin_unlock_irqrestore(&port->lock, flags);
-	}
-}
-
 static int adi_pinmux_get_funcs_count(struct pinctrl_dev *pctldev)
 {
 	struct adi_pinctrl *pinctrl = pinctrl_dev_get_drvdata(pctldev);
@@ -728,7 +699,6 @@ static int adi_pinmux_request_gpio(struct pinctrl_dev *pctldev,
 
 static struct pinmux_ops adi_pinmux_ops = {
 	.enable = adi_pinmux_enable,
-	.disable = adi_pinmux_disable,
 	.get_functions_count = adi_pinmux_get_funcs_count,
 	.get_function_name = adi_pinmux_get_func_name,
 	.get_function_groups = adi_pinmux_get_groups,
diff --git a/drivers/pinctrl/pinctrl-at91.c b/drivers/pinctrl/pinctrl-at91.c
index 421493cb490c..bd57ab514aa4 100644
--- a/drivers/pinctrl/pinctrl-at91.c
+++ b/drivers/pinctrl/pinctrl-at91.c
@@ -611,26 +611,6 @@ static int at91_pmx_enable(struct pinctrl_dev *pctldev, unsigned selector,
 	return 0;
 }
 
-static void at91_pmx_disable(struct pinctrl_dev *pctldev, unsigned selector,
-			   unsigned group)
-{
-	struct at91_pinctrl *info = pinctrl_dev_get_drvdata(pctldev);
-	const struct at91_pmx_pin *pins_conf = info->groups[group].pins_conf;
-	const struct at91_pmx_pin *pin;
-	uint32_t npins = info->groups[group].npins;
-	int i;
-	unsigned mask;
-	void __iomem *pio;
-
-	for (i = 0; i < npins; i++) {
-		pin = &pins_conf[i];
-		at91_pin_dbg(info->dev, pin);
-		pio = pin_to_controller(info, pin->bank);
-		mask = pin_to_mask(pin->pin);
-		at91_mux_gpio_enable(pio, mask, 1);
-	}
-}
-
 static int at91_pmx_get_funcs_count(struct pinctrl_dev *pctldev)
 {
 	struct at91_pinctrl *info = pinctrl_dev_get_drvdata(pctldev);
@@ -705,7 +685,6 @@ static const struct pinmux_ops at91_pmx_ops = {
 	.get_function_name	= at91_pmx_get_func_name,
 	.get_function_groups	= at91_pmx_get_groups,
 	.enable			= at91_pmx_enable,
-	.disable		= at91_pmx_disable,
 	.gpio_request_enable	= at91_gpio_request_enable,
 	.gpio_disable_free	= at91_gpio_disable_free,
 };
diff --git a/drivers/pinctrl/pinctrl-bcm2835.c b/drivers/pinctrl/pinctrl-bcm2835.c
index 3d907de9bc91..5bcfd7ace0cd 100644
--- a/drivers/pinctrl/pinctrl-bcm2835.c
+++ b/drivers/pinctrl/pinctrl-bcm2835.c
@@ -841,16 +841,6 @@ static int bcm2835_pmx_enable(struct pinctrl_dev *pctldev,
 	return 0;
 }
 
-static void bcm2835_pmx_disable(struct pinctrl_dev *pctldev,
-		unsigned func_selector,
-		unsigned group_selector)
-{
-	struct bcm2835_pinctrl *pc = pinctrl_dev_get_drvdata(pctldev);
-
-	/* disable by setting to GPIO_IN */
-	bcm2835_pinctrl_fsel_set(pc, group_selector, BCM2835_FSEL_GPIO_IN);
-}
-
 static void bcm2835_pmx_gpio_disable_free(struct pinctrl_dev *pctldev,
 		struct pinctrl_gpio_range *range,
 		unsigned offset)
@@ -880,7 +870,6 @@ static const struct pinmux_ops bcm2835_pmx_ops = {
 	.get_function_name = bcm2835_pmx_get_function_name,
 	.get_function_groups = bcm2835_pmx_get_function_groups,
 	.enable = bcm2835_pmx_enable,
-	.disable = bcm2835_pmx_disable,
 	.gpio_disable_free = bcm2835_pmx_gpio_disable_free,
 	.gpio_set_direction = bcm2835_pmx_gpio_set_direction,
 };
diff --git a/drivers/pinctrl/pinctrl-exynos5440.c b/drivers/pinctrl/pinctrl-exynos5440.c
index 8fe2ab0a7698..4b145b5db7a6 100644
--- a/drivers/pinctrl/pinctrl-exynos5440.c
+++ b/drivers/pinctrl/pinctrl-exynos5440.c
@@ -371,13 +371,6 @@ static int exynos5440_pinmux_enable(struct pinctrl_dev *pctldev, unsigned select
 	return 0;
 }
 
-/* disable a specified pinmux by writing to registers */
-static void exynos5440_pinmux_disable(struct pinctrl_dev *pctldev,
-					unsigned selector, unsigned group)
-{
-	exynos5440_pinmux_setup(pctldev, selector, group, false);
-}
-
 /*
  * The calls to gpio_direction_output() and gpio_direction_input()
  * leads to this function call (via the pinctrl_gpio_direction_{input|output}()
@@ -395,7 +388,6 @@ static const struct pinmux_ops exynos5440_pinmux_ops = {
 	.get_function_name	= exynos5440_pinmux_get_fname,
 	.get_function_groups	= exynos5440_pinmux_get_groups,
 	.enable			= exynos5440_pinmux_enable,
-	.disable		= exynos5440_pinmux_disable,
 	.gpio_set_direction	= exynos5440_pinmux_gpio_set_direction,
 };
 
diff --git a/drivers/pinctrl/pinctrl-msm.c b/drivers/pinctrl/pinctrl-msm.c
index df6dda4ce803..bdfaba4430f2 100644
--- a/drivers/pinctrl/pinctrl-msm.c
+++ b/drivers/pinctrl/pinctrl-msm.c
@@ -165,36 +165,11 @@ static int msm_pinmux_enable(struct pinctrl_dev *pctldev,
 	return 0;
 }
 
-static void msm_pinmux_disable(struct pinctrl_dev *pctldev,
-			       unsigned function,
-			       unsigned group)
-{
-	struct msm_pinctrl *pctrl = pinctrl_dev_get_drvdata(pctldev);
-	const struct msm_pingroup *g;
-	unsigned long flags;
-	u32 val;
-
-	g = &pctrl->soc->groups[group];
-
-	if (WARN_ON(g->mux_bit < 0))
-		return;
-
-	spin_lock_irqsave(&pctrl->lock, flags);
-
-	/* Clear the mux bits to select gpio mode */
-	val = readl(pctrl->regs + g->ctl_reg);
-	val &= ~(0x7 << g->mux_bit);
-	writel(val, pctrl->regs + g->ctl_reg);
-
-	spin_unlock_irqrestore(&pctrl->lock, flags);
-}
-
 static const struct pinmux_ops msm_pinmux_ops = {
 	.get_functions_count	= msm_get_functions_count,
 	.get_function_name	= msm_get_function_name,
 	.get_function_groups	= msm_get_function_groups,
 	.enable			= msm_pinmux_enable,
-	.disable		= msm_pinmux_disable,
 };
 
 static int msm_config_reg(struct msm_pinctrl *pctrl,
diff --git a/drivers/pinctrl/pinctrl-nomadik.c b/drivers/pinctrl/pinctrl-nomadik.c
index 8f6f16ef73f3..a564251fe093 100644
--- a/drivers/pinctrl/pinctrl-nomadik.c
+++ b/drivers/pinctrl/pinctrl-nomadik.c
@@ -1765,21 +1765,6 @@ out_glitch:
 	return ret;
 }
 
-static void nmk_pmx_disable(struct pinctrl_dev *pctldev,
-			    unsigned function, unsigned group)
-{
-	struct nmk_pinctrl *npct = pinctrl_dev_get_drvdata(pctldev);
-	const struct nmk_pingroup *g;
-
-	g = &npct->soc->groups[group];
-
-	if (g->altsetting < 0)
-		return;
-
-	/* Poke out the mux, set the pin to some default state? */
-	dev_dbg(npct->dev, "disable group %s, %u pins\n", g->name, g->npins);
-}
-
 static int nmk_gpio_request_enable(struct pinctrl_dev *pctldev,
 				   struct pinctrl_gpio_range *range,
 				   unsigned offset)
@@ -1826,7 +1811,6 @@ static const struct pinmux_ops nmk_pinmux_ops = {
 	.get_function_name = nmk_pmx_get_func_name,
 	.get_function_groups = nmk_pmx_get_func_groups,
 	.enable = nmk_pmx_enable,
-	.disable = nmk_pmx_disable,
 	.gpio_request_enable = nmk_gpio_request_enable,
 	.gpio_disable_free = nmk_gpio_disable_free,
 };
diff --git a/drivers/pinctrl/pinctrl-rockchip.c b/drivers/pinctrl/pinctrl-rockchip.c
index bb805d5e9ff0..51f67a6eadcb 100644
--- a/drivers/pinctrl/pinctrl-rockchip.c
+++ b/drivers/pinctrl/pinctrl-rockchip.c
@@ -657,23 +657,6 @@ static int rockchip_pmx_enable(struct pinctrl_dev *pctldev, unsigned selector,
 	return 0;
 }
 
-static void rockchip_pmx_disable(struct pinctrl_dev *pctldev,
-					unsigned selector, unsigned group)
-{
-	struct rockchip_pinctrl *info = pinctrl_dev_get_drvdata(pctldev);
-	const unsigned int *pins = info->groups[group].pins;
-	struct rockchip_pin_bank *bank;
-	int cnt;
-
-	dev_dbg(info->dev, "disable function %s group %s\n",
-		info->functions[selector].name, info->groups[group].name);
-
-	for (cnt = 0; cnt < info->groups[group].npins; cnt++) {
-		bank = pin_to_bank(info, pins[cnt]);
-		rockchip_set_mux(bank, pins[cnt] - bank->pin_base, 0);
-	}
-}
-
 /*
  * The calls to gpio_direction_output() and gpio_direction_input()
  * leads to this function call (via the pinctrl_gpio_direction_{input|output}()
@@ -716,7 +699,6 @@ static const struct pinmux_ops rockchip_pmx_ops = {
 	.get_function_name	= rockchip_pmx_get_func_name,
 	.get_function_groups	= rockchip_pmx_get_groups,
 	.enable			= rockchip_pmx_enable,
-	.disable		= rockchip_pmx_disable,
 	.gpio_set_direction	= rockchip_pmx_gpio_set_direction,
 };
 
diff --git a/drivers/pinctrl/pinctrl-samsung.c b/drivers/pinctrl/pinctrl-samsung.c
index 3e61d0f8f146..089abde35d44 100644
--- a/drivers/pinctrl/pinctrl-samsung.c
+++ b/drivers/pinctrl/pinctrl-samsung.c
@@ -333,13 +333,6 @@ static int samsung_pinmux_enable(struct pinctrl_dev *pctldev, unsigned selector,
 	return 0;
 }
 
-/* disable a specified pinmux by writing to registers */
-static void samsung_pinmux_disable(struct pinctrl_dev *pctldev,
-					unsigned selector, unsigned group)
-{
-	samsung_pinmux_setup(pctldev, selector, group, false);
-}
-
 /*
  * The calls to gpio_direction_output() and gpio_direction_input()
  * leads to this function call (via the pinctrl_gpio_direction_{input|output}()
@@ -390,7 +383,6 @@ static const struct pinmux_ops samsung_pinmux_ops = {
 	.get_function_name	= samsung_pinmux_get_fname,
 	.get_function_groups	= samsung_pinmux_get_groups,
 	.enable			= samsung_pinmux_enable,
-	.disable		= samsung_pinmux_disable,
 	.gpio_set_direction	= samsung_pinmux_gpio_set_direction,
 };
 
diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c
index 2960557bfed9..ff6a2bda52e5 100644
--- a/drivers/pinctrl/pinctrl-single.c
+++ b/drivers/pinctrl/pinctrl-single.c
@@ -488,61 +488,6 @@ static int pcs_enable(struct pinctrl_dev *pctldev, unsigned fselector,
 	return 0;
 }
 
-static void pcs_disable(struct pinctrl_dev *pctldev, unsigned fselector,
-					unsigned group)
-{
-	struct pcs_device *pcs;
-	struct pcs_function *func;
-	int i;
-
-	pcs = pinctrl_dev_get_drvdata(pctldev);
-	/* If function mask is null, needn't disable it. */
-	if (!pcs->fmask)
-		return;
-
-	func = radix_tree_lookup(&pcs->ftree, fselector);
-	if (!func) {
-		dev_err(pcs->dev, "%s could not find function%i\n",
-			__func__, fselector);
-		return;
-	}
-
-	/*
-	 * Ignore disable if function-off is not specified. Some hardware
-	 * does not have clearly defined disable function. For pin specific
-	 * off modes, you can use alternate named states as described in
-	 * pinctrl-bindings.txt.
-	 */
-	if (pcs->foff == PCS_OFF_DISABLED) {
-		dev_dbg(pcs->dev, "ignoring disable for %s function%i\n",
-			func->name, fselector);
-		return;
-	}
-
-	dev_dbg(pcs->dev, "disabling function%i %s\n",
-		fselector, func->name);
-
-	for (i = 0; i < func->nvals; i++) {
-		struct pcs_func_vals *vals;
-		unsigned long flags;
-		unsigned val, mask;
-
-		vals = &func->vals[i];
-		raw_spin_lock_irqsave(&pcs->lock, flags);
-		val = pcs->read(vals->reg);
-
-		if (pcs->bits_per_mux)
-			mask = vals->mask;
-		else
-			mask = pcs->fmask;
-
-		val &= ~mask;
-		val |= pcs->foff << pcs->fshift;
-		pcs->write(val, vals->reg);
-		raw_spin_unlock_irqrestore(&pcs->lock, flags);
-	}
-}
-
 static int pcs_request_gpio(struct pinctrl_dev *pctldev,
 			    struct pinctrl_gpio_range *range, unsigned pin)
 {
@@ -575,7 +520,6 @@ static const struct pinmux_ops pcs_pinmux_ops = {
 	.get_function_name = pcs_get_function_name,
 	.get_function_groups = pcs_get_function_groups,
 	.enable = pcs_enable,
-	.disable = pcs_disable,
 	.gpio_request_enable = pcs_request_gpio,
 };
 
diff --git a/drivers/pinctrl/pinctrl-st.c b/drivers/pinctrl/pinctrl-st.c
index 1bd6363bc95e..e1919cd43117 100644
--- a/drivers/pinctrl/pinctrl-st.c
+++ b/drivers/pinctrl/pinctrl-st.c
@@ -930,11 +930,6 @@ static int st_pmx_enable(struct pinctrl_dev *pctldev, unsigned fselector,
 	return 0;
 }
 
-static void st_pmx_disable(struct pinctrl_dev *pctldev, unsigned selector,
-		unsigned group)
-{
-}
-
 static int st_pmx_set_gpio_direction(struct pinctrl_dev *pctldev,
 			struct pinctrl_gpio_range *range, unsigned gpio,
 			bool input)
@@ -957,7 +952,6 @@ static struct pinmux_ops st_pmxops = {
 	.get_function_name	= st_pmx_get_fname,
 	.get_function_groups	= st_pmx_get_groups,
 	.enable			= st_pmx_enable,
-	.disable		= st_pmx_disable,
 	.gpio_set_direction	= st_pmx_set_gpio_direction,
 };
 
diff --git a/drivers/pinctrl/pinctrl-tb10x.c b/drivers/pinctrl/pinctrl-tb10x.c
index 26ca6855f478..71c5d4f0c538 100644
--- a/drivers/pinctrl/pinctrl-tb10x.c
+++ b/drivers/pinctrl/pinctrl-tb10x.c
@@ -738,22 +738,6 @@ static int tb10x_pctl_enable(struct pinctrl_dev *pctl,
 	return 0;
 }
 
-static void tb10x_pctl_disable(struct pinctrl_dev *pctl,
-			unsigned func_selector, unsigned group_selector)
-{
-	struct tb10x_pinctrl *state = pinctrl_dev_get_drvdata(pctl);
-	const struct tb10x_pinfuncgrp *grp = &state->pingroups[group_selector];
-
-	if (grp->port < 0)
-		return;
-
-	mutex_lock(&state->mutex);
-
-	state->ports[grp->port].count--;
-
-	mutex_unlock(&state->mutex);
-}
-
 static struct pinmux_ops tb10x_pinmux_ops = {
 	.get_functions_count = tb10x_get_functions_count,
 	.get_function_name = tb10x_get_function_name,
@@ -761,7 +745,6 @@ static struct pinmux_ops tb10x_pinmux_ops = {
 	.gpio_request_enable = tb10x_gpio_request_enable,
 	.gpio_disable_free = tb10x_gpio_disable_free,
 	.enable = tb10x_pctl_enable,
-	.disable = tb10x_pctl_disable,
 };
 
 static struct pinctrl_desc tb10x_pindesc = {
diff --git a/drivers/pinctrl/pinctrl-tegra.c b/drivers/pinctrl/pinctrl-tegra.c
index 2d43bff74f59..150af5503c09 100644
--- a/drivers/pinctrl/pinctrl-tegra.c
+++ b/drivers/pinctrl/pinctrl-tegra.c
@@ -290,24 +290,11 @@ static int tegra_pinctrl_enable(struct pinctrl_dev *pctldev, unsigned function,
 	return 0;
 }
 
-static void tegra_pinctrl_disable(struct pinctrl_dev *pctldev,
-				  unsigned function, unsigned group)
-{
-	struct tegra_pmx *pmx = pinctrl_dev_get_drvdata(pctldev);
-	const struct tegra_pingroup *g;
-
-	g = &pmx->soc->groups[group];
-
-	if (WARN_ON(g->mux_reg < 0))
-		return;
-}
-
 static const struct pinmux_ops tegra_pinmux_ops = {
 	.get_functions_count = tegra_pinctrl_get_funcs_count,
 	.get_function_name = tegra_pinctrl_get_func_name,
 	.get_function_groups = tegra_pinctrl_get_func_groups,
 	.enable = tegra_pinctrl_enable,
-	.disable = tegra_pinctrl_disable,
 };
 
 static int tegra_pinconf_reg(struct tegra_pmx *pmx,
diff --git a/drivers/pinctrl/pinctrl-tz1090-pdc.c b/drivers/pinctrl/pinctrl-tz1090-pdc.c
index 5bf01c28925e..41e81a35cabb 100644
--- a/drivers/pinctrl/pinctrl-tz1090-pdc.c
+++ b/drivers/pinctrl/pinctrl-tz1090-pdc.c
@@ -574,33 +574,6 @@ static int tz1090_pdc_pinctrl_enable(struct pinctrl_dev *pctldev,
 	return 0;
 }
 
-static void tz1090_pdc_pinctrl_disable(struct pinctrl_dev *pctldev,
-				       unsigned int function,
-				       unsigned int group)
-{
-	struct tz1090_pdc_pmx *pmx = pinctrl_dev_get_drvdata(pctldev);
-	const struct tz1090_pdc_pingroup *grp = &tz1090_pdc_groups[group];
-
-	dev_dbg(pctldev->dev, "%s(func=%u (%s), group=%u (%s))\n",
-		__func__,
-		function, tz1090_pdc_functions[function].name,
-		group, tz1090_pdc_groups[group].name);
-
-	/* is it even a mux? */
-	if (grp->drv)
-		return;
-
-	/* does this group even control the function? */
-	if (function != grp->func)
-		return;
-
-	/* record the pin being unmuxed and update mux bit */
-	spin_lock(&pmx->lock);
-	pmx->mux_en &= ~BIT(grp->pins[0]);
-	tz1090_pdc_pinctrl_mux(pmx, grp);
-	spin_unlock(&pmx->lock);
-}
-
 static const struct tz1090_pdc_pingroup *find_mux_group(
 						struct tz1090_pdc_pmx *pmx,
 						unsigned int pin)
@@ -662,7 +635,6 @@ static struct pinmux_ops tz1090_pdc_pinmux_ops = {
 	.get_function_name	= tz1090_pdc_pinctrl_get_func_name,
 	.get_function_groups	= tz1090_pdc_pinctrl_get_func_groups,
 	.enable			= tz1090_pdc_pinctrl_enable,
-	.disable		= tz1090_pdc_pinctrl_disable,
 	.gpio_request_enable	= tz1090_pdc_pinctrl_gpio_request_enable,
 	.gpio_disable_free	= tz1090_pdc_pinctrl_gpio_disable_free,
 };
diff --git a/drivers/pinctrl/pinctrl-tz1090.c b/drivers/pinctrl/pinctrl-tz1090.c
index bc9cd7a7602e..24082216842e 100644
--- a/drivers/pinctrl/pinctrl-tz1090.c
+++ b/drivers/pinctrl/pinctrl-tz1090.c
@@ -1478,63 +1478,6 @@ mux_pins:
 	return 0;
 }
 
-/**
- * tz1090_pinctrl_disable() - Disable a function on a pin group.
- * @pctldev:		Pin control data
- * @function:		Function index to disable
- * @group:		Group index to disable
- *
- * Disable a particular function on a group of pins. The per GPIO pin pseudo pin
- * groups can be used (in which case the pin will be taken out of peripheral
- * mode. Some convenience pin groups can also be used in which case the effect
- * is the same as enabling the function on each individual pin in the group.
- */
-static void tz1090_pinctrl_disable(struct pinctrl_dev *pctldev,
-				   unsigned int function, unsigned int group)
-{
-	struct tz1090_pmx *pmx = pinctrl_dev_get_drvdata(pctldev);
-	struct tz1090_pingroup *grp;
-	unsigned int pin_num, mux_group, i, npins;
-	const unsigned int *pins;
-
-	/* group of pins? */
-	if (group < ARRAY_SIZE(tz1090_groups)) {
-		grp = &tz1090_groups[group];
-		npins = grp->npins;
-		pins = grp->pins;
-		/*
-		 * All pins in the group must belong to the same mux group,
-		 * which allows us to just use the mux group of the first pin.
-		 * By explicitly listing permitted pingroups for each function
-		 * the pinmux core should ensure this is always the case.
-		 */
-	} else {
-		pin_num = group - ARRAY_SIZE(tz1090_groups);
-		npins = 1;
-		pins = &pin_num;
-	}
-	mux_group = tz1090_mux_pins[*pins];
-
-	/* no mux group, but can still be individually muxed to peripheral */
-	if (mux_group >= TZ1090_MUX_GROUP_MAX) {
-		if (function == TZ1090_MUX_PERIP)
-			goto unmux_pins;
-		return;
-	}
-
-	/* mux group already set to a different function? */
-	grp = &tz1090_mux_groups[mux_group];
-	dev_dbg(pctldev->dev, "%s: unmuxing %u pin(s) in '%s' from '%s'\n",
-		__func__, npins, grp->name, tz1090_functions[function].name);
-
-	/* subtract pins from ref count and unmux individually */
-	WARN_ON(grp->func_count < npins);
-	grp->func_count -= npins;
-unmux_pins:
-	for (i = 0; i < npins; ++i)
-		tz1090_pinctrl_perip_select(pmx, pins[i], false);
-}
-
 /**
  * tz1090_pinctrl_gpio_request_enable() - Put pin in GPIO mode.
  * @pctldev:		Pin control data
@@ -1575,7 +1518,6 @@ static struct pinmux_ops tz1090_pinmux_ops = {
 	.get_function_name	= tz1090_pinctrl_get_func_name,
 	.get_function_groups	= tz1090_pinctrl_get_func_groups,
 	.enable			= tz1090_pinctrl_enable,
-	.disable		= tz1090_pinctrl_disable,
 	.gpio_request_enable	= tz1090_pinctrl_gpio_request_enable,
 	.gpio_disable_free	= tz1090_pinctrl_gpio_disable_free,
 };
diff --git a/drivers/pinctrl/pinctrl-u300.c b/drivers/pinctrl/pinctrl-u300.c
index 209a01b8bd3b..0959bb36450f 100644
--- a/drivers/pinctrl/pinctrl-u300.c
+++ b/drivers/pinctrl/pinctrl-u300.c
@@ -970,19 +970,6 @@ static int u300_pmx_enable(struct pinctrl_dev *pctldev, unsigned selector,
 	return 0;
 }
 
-static void u300_pmx_disable(struct pinctrl_dev *pctldev, unsigned selector,
-			     unsigned group)
-{
-	struct u300_pmx *upmx;
-
-	/* There is nothing to do with the power pins */
-	if (selector == 0)
-		return;
-
-	upmx = pinctrl_dev_get_drvdata(pctldev);
-	u300_pmx_endisable(upmx, selector, false);
-}
-
 static int u300_pmx_get_funcs_count(struct pinctrl_dev *pctldev)
 {
 	return ARRAY_SIZE(u300_pmx_functions);
@@ -1008,7 +995,6 @@ static const struct pinmux_ops u300_pmx_ops = {
 	.get_function_name = u300_pmx_get_func_name,
 	.get_function_groups = u300_pmx_get_groups,
 	.enable = u300_pmx_enable,
-	.disable = u300_pmx_disable,
 };
 
 static int u300_pin_config_get(struct pinctrl_dev *pctldev, unsigned pin,
diff --git a/drivers/pinctrl/pinmux.c b/drivers/pinctrl/pinmux.c
index 051e8592990e..c055daf9a80f 100644
--- a/drivers/pinctrl/pinmux.c
+++ b/drivers/pinctrl/pinmux.c
@@ -471,7 +471,6 @@ void pinmux_disable_setting(struct pinctrl_setting const *setting)
 {
 	struct pinctrl_dev *pctldev = setting->pctldev;
 	const struct pinctrl_ops *pctlops = pctldev->desc->pctlops;
-	const struct pinmux_ops *ops = pctldev->desc->pmxops;
 	int ret = 0;
 	const unsigned *pins = NULL;
 	unsigned num_pins = 0;
@@ -518,9 +517,6 @@ void pinmux_disable_setting(struct pinctrl_setting const *setting)
 				 pins[i], desc->name, gname);
 		}
 	}
-
-	if (ops->disable)
-		ops->disable(pctldev, setting->data.mux.func, setting->data.mux.group);
 }
 
 #ifdef CONFIG_DEBUG_FS
diff --git a/drivers/pinctrl/sh-pfc/pinctrl.c b/drivers/pinctrl/sh-pfc/pinctrl.c
index e758af95c209..11db3ee39d40 100644
--- a/drivers/pinctrl/sh-pfc/pinctrl.c
+++ b/drivers/pinctrl/sh-pfc/pinctrl.c
@@ -345,27 +345,6 @@ done:
 	return ret;
 }
 
-static void sh_pfc_func_disable(struct pinctrl_dev *pctldev, unsigned selector,
-				unsigned group)
-{
-	struct sh_pfc_pinctrl *pmx = pinctrl_dev_get_drvdata(pctldev);
-	struct sh_pfc *pfc = pmx->pfc;
-	const struct sh_pfc_pin_group *grp = &pfc->info->groups[group];
-	unsigned long flags;
-	unsigned int i;
-
-	spin_lock_irqsave(&pfc->lock, flags);
-
-	for (i = 0; i < grp->nr_pins; ++i) {
-		int idx = sh_pfc_get_pin_index(pfc, grp->pins[i]);
-		struct sh_pfc_pin_config *cfg = &pmx->configs[idx];
-
-		cfg->type = PINMUX_TYPE_NONE;
-	}
-
-	spin_unlock_irqrestore(&pfc->lock, flags);
-}
-
 static int sh_pfc_gpio_request_enable(struct pinctrl_dev *pctldev,
 				      struct pinctrl_gpio_range *range,
 				      unsigned offset)
@@ -464,7 +443,6 @@ static const struct pinmux_ops sh_pfc_pinmux_ops = {
 	.get_function_name	= sh_pfc_get_function_name,
 	.get_function_groups	= sh_pfc_get_function_groups,
 	.enable			= sh_pfc_func_enable,
-	.disable		= sh_pfc_func_disable,
 	.gpio_request_enable	= sh_pfc_gpio_request_enable,
 	.gpio_disable_free	= sh_pfc_gpio_disable_free,
 	.gpio_set_direction	= sh_pfc_gpio_set_direction,
diff --git a/drivers/pinctrl/sirf/pinctrl-sirf.c b/drivers/pinctrl/sirf/pinctrl-sirf.c
index 014f5b1fee55..4c1d7c68666d 100644
--- a/drivers/pinctrl/sirf/pinctrl-sirf.c
+++ b/drivers/pinctrl/sirf/pinctrl-sirf.c
@@ -186,15 +186,6 @@ static int sirfsoc_pinmux_enable(struct pinctrl_dev *pmxdev, unsigned selector,
 	return 0;
 }
 
-static void sirfsoc_pinmux_disable(struct pinctrl_dev *pmxdev, unsigned selector,
-	unsigned group)
-{
-	struct sirfsoc_pmx *spmx;
-
-	spmx = pinctrl_dev_get_drvdata(pmxdev);
-	sirfsoc_pinmux_endisable(spmx, selector, false);
-}
-
 static int sirfsoc_pinmux_get_funcs_count(struct pinctrl_dev *pmxdev)
 {
 	return sirfsoc_pmxfunc_cnt;
@@ -240,7 +231,6 @@ static int sirfsoc_pinmux_request_gpio(struct pinctrl_dev *pmxdev,
 
 static struct pinmux_ops sirfsoc_pinmux_ops = {
 	.enable = sirfsoc_pinmux_enable,
-	.disable = sirfsoc_pinmux_disable,
 	.get_functions_count = sirfsoc_pinmux_get_funcs_count,
 	.get_function_name = sirfsoc_pinmux_get_func_name,
 	.get_function_groups = sirfsoc_pinmux_get_groups,
diff --git a/drivers/pinctrl/spear/pinctrl-spear.c b/drivers/pinctrl/spear/pinctrl-spear.c
index 58bf6867aa17..f72cc4e192bd 100644
--- a/drivers/pinctrl/spear/pinctrl-spear.c
+++ b/drivers/pinctrl/spear/pinctrl-spear.c
@@ -274,12 +274,6 @@ static int spear_pinctrl_enable(struct pinctrl_dev *pctldev, unsigned function,
 	return spear_pinctrl_endisable(pctldev, function, group, true);
 }
 
-static void spear_pinctrl_disable(struct pinctrl_dev *pctldev,
-		unsigned function, unsigned group)
-{
-	spear_pinctrl_endisable(pctldev, function, group, false);
-}
-
 /* gpio with pinmux */
 static struct spear_gpio_pingroup *get_gpio_pingroup(struct spear_pmx *pmx,
 		unsigned pin)
@@ -345,7 +339,6 @@ static const struct pinmux_ops spear_pinmux_ops = {
 	.get_function_name = spear_pinctrl_get_func_name,
 	.get_function_groups = spear_pinctrl_get_func_groups,
 	.enable = spear_pinctrl_enable,
-	.disable = spear_pinctrl_disable,
 	.gpio_request_enable = gpio_request_enable,
 	.gpio_disable_free = gpio_disable_free,
 };
diff --git a/drivers/pinctrl/vt8500/pinctrl-wmt.c b/drivers/pinctrl/vt8500/pinctrl-wmt.c
index 2c61281bebd7..8c976c21eeee 100644
--- a/drivers/pinctrl/vt8500/pinctrl-wmt.c
+++ b/drivers/pinctrl/vt8500/pinctrl-wmt.c
@@ -141,17 +141,6 @@ static int wmt_pmx_enable(struct pinctrl_dev *pctldev,
 	return wmt_set_pinmux(data, func_selector, pinnum);
 }
 
-static void wmt_pmx_disable(struct pinctrl_dev *pctldev,
-			    unsigned func_selector,
-			    unsigned group_selector)
-{
-	struct wmt_pinctrl_data *data = pinctrl_dev_get_drvdata(pctldev);
-	u32 pinnum = data->pins[group_selector].number;
-
-	/* disable by setting GPIO_IN */
-	wmt_set_pinmux(data, WMT_FSEL_GPIO_IN, pinnum);
-}
-
 static void wmt_pmx_gpio_disable_free(struct pinctrl_dev *pctldev,
 				      struct pinctrl_gpio_range *range,
 				      unsigned offset)
@@ -180,7 +169,6 @@ static struct pinmux_ops wmt_pinmux_ops = {
 	.get_function_name = wmt_pmx_get_function_name,
 	.get_function_groups = wmt_pmx_get_function_groups,
 	.enable = wmt_pmx_enable,
-	.disable = wmt_pmx_disable,
 	.gpio_disable_free = wmt_pmx_gpio_disable_free,
 	.gpio_set_direction = wmt_pmx_gpio_set_direction,
 };
diff --git a/include/linux/pinctrl/pinmux.h b/include/linux/pinctrl/pinmux.h
index c15395031cb3..3097aafbeb24 100644
--- a/include/linux/pinctrl/pinmux.h
+++ b/include/linux/pinctrl/pinmux.h
@@ -70,8 +70,6 @@ struct pinmux_ops {
 				  unsigned * const num_groups);
 	int (*enable) (struct pinctrl_dev *pctldev, unsigned func_selector,
 		       unsigned group_selector);
-	void (*disable) (struct pinctrl_dev *pctldev, unsigned func_selector,
-			 unsigned group_selector);
 	int (*gpio_request_enable) (struct pinctrl_dev *pctldev,
 				    struct pinctrl_gpio_range *range,
 				    unsigned offset);
-- 
cgit v1.2.3


From c14deddec1fbd8c9757c53a49dbfd2dc83265f21 Mon Sep 17 00:00:00 2001
From: "grmoore@altera.com" <grmoore@altera.com>
Date: Tue, 29 Apr 2014 10:29:51 -0500
Subject: mtd: spi-nor: add support for flag status register on Micron chips

Some new Micron flash chips require reading the flag status register to
determine when operations have completed.

Furthermore, chips with multi-die stacks of the 65nm 256Mb QSPI also
require reading the status register before reading the flag status
register.

This patch adds support for the flag status register in the n25q512ax3
and n25q00 Micron QSPI flash chips.

Signed-off-by: Graham Moore <grmoore@altera.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/spi-nor/spi-nor.c | 52 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/mtd/spi-nor.h   |  4 ++++
 2 files changed, 56 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index c713c8656710..7da3a7067c35 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -47,6 +47,25 @@ static int read_sr(struct spi_nor *nor)
 	return val;
 }
 
+/*
+ * Read the flag status register, returning its value in the location
+ * Return the status register value.
+ * Returns negative if error occurred.
+ */
+static int read_fsr(struct spi_nor *nor)
+{
+	int ret;
+	u8 val;
+
+	ret = nor->read_reg(nor, SPINOR_OP_RDFSR, &val, 1);
+	if (ret < 0) {
+		pr_err("error %d reading FSR\n", ret);
+		return ret;
+	}
+
+	return val;
+}
+
 /*
  * Read configuration register, returning its value in the
  * location. Return the configuration register value.
@@ -165,6 +184,32 @@ static int spi_nor_wait_till_ready(struct spi_nor *nor)
 	return -ETIMEDOUT;
 }
 
+static int spi_nor_wait_till_fsr_ready(struct spi_nor *nor)
+{
+	unsigned long deadline;
+	int sr;
+	int fsr;
+
+	deadline = jiffies + MAX_READY_WAIT_JIFFIES;
+
+	do {
+		cond_resched();
+
+		sr = read_sr(nor);
+		if (sr < 0) {
+			break;
+		} else if (!(sr & SR_WIP)) {
+			fsr = read_fsr(nor);
+			if (fsr < 0)
+				break;
+			if (fsr & FSR_READY)
+				return 0;
+		}
+	} while (!time_after_eq(jiffies, deadline));
+
+	return -ETIMEDOUT;
+}
+
 /*
  * Service routine to read status register until ready, or timeout occurs.
  * Returns non-zero if error.
@@ -402,6 +447,7 @@ struct flash_info {
 #define	SECT_4K_PMC		0x10	/* SPINOR_OP_BE_4K_PMC works uniformly */
 #define	SPI_NOR_DUAL_READ	0x20    /* Flash supports Dual Read */
 #define	SPI_NOR_QUAD_READ	0x40    /* Flash supports Quad Read */
+#define	USE_FSR			0x80	/* use flag status register */
 };
 
 #define INFO(_jedec_id, _ext_id, _sector_size, _n_sectors, _flags)	\
@@ -488,6 +534,8 @@ const struct spi_device_id spi_nor_ids[] = {
 	{ "n25q128a13",  INFO(0x20ba18, 0, 64 * 1024,  256, 0) },
 	{ "n25q256a",    INFO(0x20ba19, 0, 64 * 1024,  512, SECT_4K) },
 	{ "n25q512a",    INFO(0x20bb20, 0, 64 * 1024, 1024, SECT_4K) },
+	{ "n25q512ax3",  INFO(0x20ba20, 0, 64 * 1024, 1024, USE_FSR) },
+	{ "n25q00",      INFO(0x20ba21, 0, 64 * 1024, 2048, USE_FSR) },
 
 	/* PMC */
 	{ "pm25lv512",   INFO(0,        0, 32 * 1024,    2, SECT_4K_PMC) },
@@ -965,6 +1013,10 @@ int spi_nor_scan(struct spi_nor *nor, const struct spi_device_id *id,
 	else
 		mtd->_write = spi_nor_write;
 
+	if ((info->flags & USE_FSR) &&
+	    nor->wait_till_ready == spi_nor_wait_till_ready)
+		nor->wait_till_ready = spi_nor_wait_till_fsr_ready;
+
 	/* prefer "small sector" erase if possible */
 	if (info->flags & SECT_4K) {
 		nor->erase_opcode = SPINOR_OP_BE_4K;
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index 53241842a7ab..9e6294f32ba8 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -34,6 +34,7 @@
 #define SPINOR_OP_SE		0xd8	/* Sector erase (usually 64KiB) */
 #define SPINOR_OP_RDID		0x9f	/* Read JEDEC ID */
 #define SPINOR_OP_RDCR		0x35	/* Read configuration register */
+#define SPINOR_OP_RDFSR		0x70	/* Read flag status register */
 
 /* 4-byte address opcodes - used on Spansion and some Macronix flashes. */
 #define SPINOR_OP_READ4		0x13	/* Read data bytes (low frequency) */
@@ -66,6 +67,9 @@
 
 #define SR_QUAD_EN_MX		0x40	/* Macronix Quad I/O */
 
+/* Flag Status Register bits */
+#define FSR_READY		0x80
+
 /* Configuration Register bits. */
 #define CR_QUAD_EN_SPAN		0x2	/* Spansion Quad I/O */
 
-- 
cgit v1.2.3


From 2004c726b9d9a9670b7f837190be9c8dfa7a0e9d Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@poochiereds.net>
Date: Sat, 21 Jun 2014 20:52:15 -0400
Subject: auth_gss: fetch the acceptor name out of the downcall

If rpc.gssd sends us an acceptor name string trailing the context token,
stash it as part of the context.

Signed-off-by: Jeff Layton <jlayton@poochiereds.net>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 include/linux/sunrpc/auth_gss.h |  1 +
 net/sunrpc/auth_gss/auth_gss.c  | 20 +++++++++++++++++---
 2 files changed, 18 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/auth_gss.h b/include/linux/sunrpc/auth_gss.h
index f1cfd4c85cd0..cbc6875fb9cf 100644
--- a/include/linux/sunrpc/auth_gss.h
+++ b/include/linux/sunrpc/auth_gss.h
@@ -71,6 +71,7 @@ struct gss_cl_ctx {
 	spinlock_t		gc_seq_lock;
 	struct gss_ctx __rcu	*gc_gss_ctx;
 	struct xdr_netobj	gc_wire_ctx;
+	struct xdr_netobj	gc_acceptor;
 	u32			gc_win;
 	unsigned long		gc_expiry;
 	struct rcu_head		gc_rcu;
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index b6e440baccc3..e34af68603bd 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -262,9 +262,22 @@ gss_fill_context(const void *p, const void *end, struct gss_cl_ctx *ctx, struct
 		p = ERR_PTR(ret);
 		goto err;
 	}
-	dprintk("RPC:       %s Success. gc_expiry %lu now %lu timeout %u\n",
-		__func__, ctx->gc_expiry, now, timeout);
-	return q;
+
+	/* is there any trailing data? */
+	if (q == end) {
+		p = q;
+		goto done;
+	}
+
+	/* pull in acceptor name (if there is one) */
+	p = simple_get_netobj(q, end, &ctx->gc_acceptor);
+	if (IS_ERR(p))
+		goto err;
+done:
+	dprintk("RPC:       %s Success. gc_expiry %lu now %lu timeout %u acceptor %.*s\n",
+		__func__, ctx->gc_expiry, now, timeout, ctx->gc_acceptor.len,
+		ctx->gc_acceptor.data);
+	return p;
 err:
 	dprintk("RPC:       %s returns error %ld\n", __func__, -PTR_ERR(p));
 	return p;
@@ -1225,6 +1238,7 @@ gss_do_free_ctx(struct gss_cl_ctx *ctx)
 
 	gss_delete_sec_context(&ctx->gc_gss_ctx);
 	kfree(ctx->gc_wire_ctx.data);
+	kfree(ctx->gc_acceptor.data);
 	kfree(ctx);
 }
 
-- 
cgit v1.2.3


From a0337d1ddb5a4bd609e3ff0955551cb240340340 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@poochiereds.net>
Date: Sat, 21 Jun 2014 20:52:16 -0400
Subject: sunrpc: add a new "stringify_acceptor" rpc_credop

...and add an new rpc_auth function to call it when it exists. This
is only applicable for AUTH_GSS mechanisms, so we only specify this
for those sorts of credentials.

Signed-off-by: Jeff Layton <jlayton@poochiereds.net>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 include/linux/sunrpc/auth.h    |  2 ++
 net/sunrpc/auth.c              |  9 ++++++
 net/sunrpc/auth_gss/auth_gss.c | 62 ++++++++++++++++++++++++++++--------------
 3 files changed, 53 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index 790be1472792..c683b9a06913 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -140,6 +140,7 @@ struct rpc_credops {
 						void *, __be32 *, void *);
 	int			(*crkey_timeout)(struct rpc_cred *);
 	bool			(*crkey_to_expire)(struct rpc_cred *);
+	char *			(*crstringify_acceptor)(struct rpc_cred *);
 };
 
 extern const struct rpc_authops	authunix_ops;
@@ -182,6 +183,7 @@ void			rpcauth_clear_credcache(struct rpc_cred_cache *);
 int			rpcauth_key_timeout_notify(struct rpc_auth *,
 						struct rpc_cred *);
 bool			rpcauth_cred_key_to_expire(struct rpc_cred *);
+char *			rpcauth_stringify_acceptor(struct rpc_cred *);
 
 static inline
 struct rpc_cred *	get_rpccred(struct rpc_cred *cred)
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index f77366717420..1481efff6aa2 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -363,6 +363,15 @@ rpcauth_cred_key_to_expire(struct rpc_cred *cred)
 }
 EXPORT_SYMBOL_GPL(rpcauth_cred_key_to_expire);
 
+char *
+rpcauth_stringify_acceptor(struct rpc_cred *cred)
+{
+	if (!cred->cr_ops->crstringify_acceptor)
+		return NULL;
+	return cred->cr_ops->crstringify_acceptor(cred);
+}
+EXPORT_SYMBOL_GPL(rpcauth_stringify_acceptor);
+
 /*
  * Destroy a list of credentials
  */
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index e34af68603bd..73854314fb85 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -1346,6 +1346,26 @@ gss_cred_init(struct rpc_auth *auth, struct rpc_cred *cred)
 	return err;
 }
 
+static char *
+gss_stringify_acceptor(struct rpc_cred *cred)
+{
+	char *string;
+	struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base);
+	struct xdr_netobj *acceptor = &gss_cred->gc_ctx->gc_acceptor;
+
+	/* no point if there's no string */
+	if (!acceptor->len)
+		return NULL;
+
+	string = kmalloc(acceptor->len + 1, GFP_KERNEL);
+	if (!string)
+		return string;
+
+	memcpy(string, acceptor->data, acceptor->len);
+	string[acceptor->len] = '\0';
+	return string;
+}
+
 /*
  * Returns -EACCES if GSS context is NULL or will expire within the
  * timeout (miliseconds)
@@ -1923,29 +1943,31 @@ static const struct rpc_authops authgss_ops = {
 };
 
 static const struct rpc_credops gss_credops = {
-	.cr_name	= "AUTH_GSS",
-	.crdestroy	= gss_destroy_cred,
-	.cr_init	= gss_cred_init,
-	.crbind		= rpcauth_generic_bind_cred,
-	.crmatch	= gss_match,
-	.crmarshal	= gss_marshal,
-	.crrefresh	= gss_refresh,
-	.crvalidate	= gss_validate,
-	.crwrap_req	= gss_wrap_req,
-	.crunwrap_resp	= gss_unwrap_resp,
-	.crkey_timeout	= gss_key_timeout,
+	.cr_name		= "AUTH_GSS",
+	.crdestroy		= gss_destroy_cred,
+	.cr_init		= gss_cred_init,
+	.crbind			= rpcauth_generic_bind_cred,
+	.crmatch		= gss_match,
+	.crmarshal		= gss_marshal,
+	.crrefresh		= gss_refresh,
+	.crvalidate		= gss_validate,
+	.crwrap_req		= gss_wrap_req,
+	.crunwrap_resp		= gss_unwrap_resp,
+	.crkey_timeout		= gss_key_timeout,
+	.crstringify_acceptor	= gss_stringify_acceptor,
 };
 
 static const struct rpc_credops gss_nullops = {
-	.cr_name	= "AUTH_GSS",
-	.crdestroy	= gss_destroy_nullcred,
-	.crbind		= rpcauth_generic_bind_cred,
-	.crmatch	= gss_match,
-	.crmarshal	= gss_marshal,
-	.crrefresh	= gss_refresh_null,
-	.crvalidate	= gss_validate,
-	.crwrap_req	= gss_wrap_req,
-	.crunwrap_resp	= gss_unwrap_resp,
+	.cr_name		= "AUTH_GSS",
+	.crdestroy		= gss_destroy_nullcred,
+	.crbind			= rpcauth_generic_bind_cred,
+	.crmatch		= gss_match,
+	.crmarshal		= gss_marshal,
+	.crrefresh		= gss_refresh_null,
+	.crvalidate		= gss_validate,
+	.crwrap_req		= gss_wrap_req,
+	.crunwrap_resp		= gss_unwrap_resp,
+	.crstringify_acceptor	= gss_stringify_acceptor,
 };
 
 static const struct rpc_pipe_ops gss_upcall_ops_v0 = {
-- 
cgit v1.2.3


From f11b2a1cfbf5dd783eb55cb470509d06e20d1c78 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@poochiereds.net>
Date: Sat, 21 Jun 2014 20:52:17 -0400
Subject: nfs4: copy acceptor name from context to nfs_client

The current CB_COMPOUND handling code tries to compare the principal
name of the request with the cl_hostname in the client. This is not
guaranteed to ever work, particularly if the client happened to mount
a CNAME of the server or a non-fqdn.

Fix this by instead comparing the cr_principal string with the acceptor
name that we get from gssd. In the event that gssd didn't send one
down (i.e. it was too old), then we fall back to trying to use the
cl_hostname as we do today.

Signed-off-by: Jeff Layton <jlayton@poochiereds.net>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/callback.c         | 12 ++++++++++++
 fs/nfs/client.c           |  1 +
 fs/nfs/nfs4proc.c         | 33 ++++++++++++++++++++++++++++++++-
 include/linux/nfs_fs_sb.h |  1 +
 include/linux/nfs_xdr.h   |  1 +
 5 files changed, 47 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 073b4cf67ed9..54de482143cc 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -428,6 +428,18 @@ check_gss_callback_principal(struct nfs_client *clp, struct svc_rqst *rqstp)
 	if (p == NULL)
 		return 0;
 
+	/*
+	 * Did we get the acceptor from userland during the SETCLIENID
+	 * negotiation?
+	 */
+	if (clp->cl_acceptor)
+		return !strcmp(p, clp->cl_acceptor);
+
+	/*
+	 * Otherwise try to verify it using the cl_hostname. Note that this
+	 * doesn't work if a non-canonical hostname was used in the devname.
+	 */
+
 	/* Expect a GSS_C_NT_HOSTBASED_NAME like "nfs@serverhostname" */
 
 	if (memcmp(p, "nfs@", 4) != 0)
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index b213ee8fb012..168aa0df2658 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -252,6 +252,7 @@ void nfs_free_client(struct nfs_client *clp)
 	put_net(clp->cl_net);
 	put_nfs_version(clp->cl_nfs_mod);
 	kfree(clp->cl_hostname);
+	kfree(clp->cl_acceptor);
 	kfree(clp);
 
 	dprintk("<-- nfs_free_client()\n");
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 0b8490eab486..b7babb3b8a4d 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -4936,6 +4936,18 @@ nfs4_init_callback_netid(const struct nfs_client *clp, char *buf, size_t len)
 		return scnprintf(buf, len, "tcp");
 }
 
+static void nfs4_setclientid_done(struct rpc_task *task, void *calldata)
+{
+	struct nfs4_setclientid *sc = calldata;
+
+	if (task->tk_status == 0)
+		sc->sc_cred = get_rpccred(task->tk_rqstp->rq_cred);
+}
+
+static const struct rpc_call_ops nfs4_setclientid_ops = {
+	.rpc_call_done = nfs4_setclientid_done,
+};
+
 /**
  * nfs4_proc_setclientid - Negotiate client ID
  * @clp: state data structure
@@ -4962,6 +4974,14 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
 		.rpc_resp = res,
 		.rpc_cred = cred,
 	};
+	struct rpc_task *task;
+	struct rpc_task_setup task_setup_data = {
+		.rpc_client = clp->cl_rpcclient,
+		.rpc_message = &msg,
+		.callback_ops = &nfs4_setclientid_ops,
+		.callback_data = &setclientid,
+		.flags = RPC_TASK_TIMEOUT,
+	};
 	int status;
 
 	/* nfs_client_id4 */
@@ -4988,7 +5008,18 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
 	dprintk("NFS call  setclientid auth=%s, '%.*s'\n",
 		clp->cl_rpcclient->cl_auth->au_ops->au_name,
 		setclientid.sc_name_len, setclientid.sc_name);
-	status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
+	task = rpc_run_task(&task_setup_data);
+	if (IS_ERR(task)) {
+		status = PTR_ERR(task);
+		goto out;
+	}
+	status = task->tk_status;
+	if (setclientid.sc_cred) {
+		clp->cl_acceptor = rpcauth_stringify_acceptor(setclientid.sc_cred);
+		put_rpccred(setclientid.sc_cred);
+	}
+	rpc_put_task(task);
+out:
 	trace_nfs4_setclientid(clp, status);
 	dprintk("NFS reply setclientid: %d\n", status);
 	return status;
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 1150ea41b626..922be2e050f5 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -45,6 +45,7 @@ struct nfs_client {
 	struct sockaddr_storage	cl_addr;	/* server identifier */
 	size_t			cl_addrlen;
 	char *			cl_hostname;	/* hostname of server */
+	char *			cl_acceptor;	/* GSSAPI acceptor name */
 	struct list_head	cl_share_link;	/* link in global client list */
 	struct list_head	cl_superblocks;	/* List of nfs_server structs */
 
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 81cbbf313272..0040629894df 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -993,6 +993,7 @@ struct nfs4_setclientid {
 	unsigned int			sc_uaddr_len;
 	char				sc_uaddr[RPCBIND_MAXUADDRLEN + 1];
 	u32				sc_cb_ident;
+	struct rpc_cred			*sc_cred;
 };
 
 struct nfs4_setclientid_res {
-- 
cgit v1.2.3


From b9ba6f94b2382ef832f97122976b73004f714714 Mon Sep 17 00:00:00 2001
From: Niu Yawei <yawei.niu@gmail.com>
Date: Wed, 4 Jun 2014 12:23:19 +0800
Subject: quota: remove dqptr_sem

Remove dqptr_sem to make quota code scalable: Remove the dqptr_sem,
accessing inode->i_dquot now protected by dquot_srcu, and changing
inode->i_dquot is now serialized by dq_data_lock.

Signed-off-by: Lai Siyao <lai.siyao@intel.com>
Signed-off-by: Niu Yawei <yawei.niu@intel.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/quota/dquot.c      | 114 ++++++++++++++++++++++----------------------------
 fs/super.c            |   1 -
 include/linux/quota.h |   1 -
 3 files changed, 49 insertions(+), 67 deletions(-)

(limited to 'include/linux')

diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index fb2d2e2a89e7..f2d0eee9d1f1 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -96,13 +96,16 @@
  * Note that some things (eg. sb pointer, type, id) doesn't change during
  * the life of the dquot structure and so needn't to be protected by a lock
  *
- * Any operation working on dquots via inode pointers must hold dqptr_sem.  If
- * operation is just reading pointers from inode (or not using them at all) the
- * read lock is enough. If pointers are altered function must hold write lock.
+ * Operation accessing dquots via inode pointers are protected by dquot_srcu.
+ * Operation of reading pointer needs srcu_read_lock(&dquot_srcu), and
+ * synchronize_srcu(&dquot_srcu) is called after clearing pointers from
+ * inode and before dropping dquot references to avoid use of dquots after
+ * they are freed. dq_data_lock is used to serialize the pointer setting and
+ * clearing operations.
  * Special care needs to be taken about S_NOQUOTA inode flag (marking that
  * inode is a quota file). Functions adding pointers from inode to dquots have
- * to check this flag under dqptr_sem and then (if S_NOQUOTA is not set) they
- * have to do all pointer modifications before dropping dqptr_sem. This makes
+ * to check this flag under dq_data_lock and then (if S_NOQUOTA is not set) they
+ * have to do all pointer modifications before dropping dq_data_lock. This makes
  * sure they cannot race with quotaon which first sets S_NOQUOTA flag and
  * then drops all pointers to dquots from an inode.
  *
@@ -116,21 +119,15 @@
  * spinlock to internal buffers before writing.
  *
  * Lock ordering (including related VFS locks) is the following:
- *   dqonoff_mutex > i_mutex > journal_lock > dqptr_sem > dquot->dq_lock >
- *   dqio_mutex
+ *   dqonoff_mutex > i_mutex > journal_lock > dquot->dq_lock > dqio_mutex
  * dqonoff_mutex > i_mutex comes from dquot_quota_sync, dquot_enable, etc.
- * The lock ordering of dqptr_sem imposed by quota code is only dqonoff_sem >
- * dqptr_sem. But filesystem has to count with the fact that functions such as
- * dquot_alloc_space() acquire dqptr_sem and they usually have to be called
- * from inside a transaction to keep filesystem consistency after a crash. Also
- * filesystems usually want to do some IO on dquot from ->mark_dirty which is
- * called with dqptr_sem held.
  */
 
 static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_list_lock);
 static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_state_lock);
 __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_data_lock);
 EXPORT_SYMBOL(dq_data_lock);
+DEFINE_STATIC_SRCU(dquot_srcu);
 
 void __quota_error(struct super_block *sb, const char *func,
 		   const char *fmt, ...)
@@ -964,7 +961,6 @@ static void add_dquot_ref(struct super_block *sb, int type)
 /*
  * Remove references to dquots from inode and add dquot to list for freeing
  * if we have the last reference to dquot
- * We can't race with anybody because we hold dqptr_sem for writing...
  */
 static void remove_inode_dquot_ref(struct inode *inode, int type,
 				   struct list_head *tofree_head)
@@ -1024,13 +1020,15 @@ static void remove_dquot_ref(struct super_block *sb, int type,
 		 *  We have to scan also I_NEW inodes because they can already
 		 *  have quota pointer initialized. Luckily, we need to touch
 		 *  only quota pointers and these have separate locking
-		 *  (dqptr_sem).
+		 *  (dq_data_lock).
 		 */
+		spin_lock(&dq_data_lock);
 		if (!IS_NOQUOTA(inode)) {
 			if (unlikely(inode_get_rsv_space(inode) > 0))
 				reserved = 1;
 			remove_inode_dquot_ref(inode, type, tofree_head);
 		}
+		spin_unlock(&dq_data_lock);
 	}
 	spin_unlock(&inode_sb_list_lock);
 #ifdef CONFIG_QUOTA_DEBUG
@@ -1048,9 +1046,8 @@ static void drop_dquot_ref(struct super_block *sb, int type)
 	LIST_HEAD(tofree_head);
 
 	if (sb->dq_op) {
-		down_write(&sb_dqopt(sb)->dqptr_sem);
 		remove_dquot_ref(sb, type, &tofree_head);
-		up_write(&sb_dqopt(sb)->dqptr_sem);
+		synchronize_srcu(&dquot_srcu);
 		put_dquot_list(&tofree_head);
 	}
 }
@@ -1381,9 +1378,6 @@ static int dquot_active(const struct inode *inode)
 /*
  * Initialize quota pointers in inode
  *
- * We do things in a bit complicated way but by that we avoid calling
- * dqget() and thus filesystem callbacks under dqptr_sem.
- *
  * It is better to call this function outside of any transaction as it
  * might need a lot of space in journal for dquot structure allocation.
  */
@@ -1394,8 +1388,6 @@ static void __dquot_initialize(struct inode *inode, int type)
 	struct super_block *sb = inode->i_sb;
 	qsize_t rsv;
 
-	/* First test before acquiring mutex - solves deadlocks when we
-         * re-enter the quota code and are already holding the mutex */
 	if (!dquot_active(inode))
 		return;
 
@@ -1429,7 +1421,7 @@ static void __dquot_initialize(struct inode *inode, int type)
 	if (!init_needed)
 		return;
 
-	down_write(&sb_dqopt(sb)->dqptr_sem);
+	spin_lock(&dq_data_lock);
 	if (IS_NOQUOTA(inode))
 		goto out_err;
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -1449,15 +1441,12 @@ static void __dquot_initialize(struct inode *inode, int type)
 			 * did a write before quota was turned on
 			 */
 			rsv = inode_get_rsv_space(inode);
-			if (unlikely(rsv)) {
-				spin_lock(&dq_data_lock);
+			if (unlikely(rsv))
 				dquot_resv_space(inode->i_dquot[cnt], rsv);
-				spin_unlock(&dq_data_lock);
-			}
 		}
 	}
 out_err:
-	up_write(&sb_dqopt(sb)->dqptr_sem);
+	spin_unlock(&dq_data_lock);
 	/* Drop unused references */
 	dqput_all(got);
 }
@@ -1469,19 +1458,24 @@ void dquot_initialize(struct inode *inode)
 EXPORT_SYMBOL(dquot_initialize);
 
 /*
- * 	Release all quotas referenced by inode
+ * Release all quotas referenced by inode.
+ *
+ * This function only be called on inode free or converting
+ * a file to quota file, no other users for the i_dquot in
+ * both cases, so we needn't call synchronize_srcu() after
+ * clearing i_dquot.
  */
 static void __dquot_drop(struct inode *inode)
 {
 	int cnt;
 	struct dquot *put[MAXQUOTAS];
 
-	down_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
+	spin_lock(&dq_data_lock);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		put[cnt] = inode->i_dquot[cnt];
 		inode->i_dquot[cnt] = NULL;
 	}
-	up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
+	spin_unlock(&dq_data_lock);
 	dqput_all(put);
 }
 
@@ -1599,15 +1593,11 @@ static void inode_decr_space(struct inode *inode, qsize_t number, int reserve)
  */
 int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
 {
-	int cnt, ret = 0;
+	int cnt, ret = 0, index;
 	struct dquot_warn warn[MAXQUOTAS];
 	struct dquot **dquots = inode->i_dquot;
 	int reserve = flags & DQUOT_SPACE_RESERVE;
 
-	/*
-	 * First test before acquiring mutex - solves deadlocks when we
-	 * re-enter the quota code and are already holding the mutex
-	 */
 	if (!dquot_active(inode)) {
 		inode_incr_space(inode, number, reserve);
 		goto out;
@@ -1616,7 +1606,7 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
 		warn[cnt].w_type = QUOTA_NL_NOWARN;
 
-	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+	index = srcu_read_lock(&dquot_srcu);
 	spin_lock(&dq_data_lock);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		if (!dquots[cnt])
@@ -1643,7 +1633,7 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
 		goto out_flush_warn;
 	mark_all_dquot_dirty(dquots);
 out_flush_warn:
-	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+	srcu_read_unlock(&dquot_srcu, index);
 	flush_warnings(warn);
 out:
 	return ret;
@@ -1655,17 +1645,16 @@ EXPORT_SYMBOL(__dquot_alloc_space);
  */
 int dquot_alloc_inode(const struct inode *inode)
 {
-	int cnt, ret = 0;
+	int cnt, ret = 0, index;
 	struct dquot_warn warn[MAXQUOTAS];
 	struct dquot * const *dquots = inode->i_dquot;
 
-	/* First test before acquiring mutex - solves deadlocks when we
-         * re-enter the quota code and are already holding the mutex */
 	if (!dquot_active(inode))
 		return 0;
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
 		warn[cnt].w_type = QUOTA_NL_NOWARN;
-	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+
+	index = srcu_read_lock(&dquot_srcu);
 	spin_lock(&dq_data_lock);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		if (!dquots[cnt])
@@ -1685,7 +1674,7 @@ warn_put_all:
 	spin_unlock(&dq_data_lock);
 	if (ret == 0)
 		mark_all_dquot_dirty(dquots);
-	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+	srcu_read_unlock(&dquot_srcu, index);
 	flush_warnings(warn);
 	return ret;
 }
@@ -1696,14 +1685,14 @@ EXPORT_SYMBOL(dquot_alloc_inode);
  */
 int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
 {
-	int cnt;
+	int cnt, index;
 
 	if (!dquot_active(inode)) {
 		inode_claim_rsv_space(inode, number);
 		return 0;
 	}
 
-	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+	index = srcu_read_lock(&dquot_srcu);
 	spin_lock(&dq_data_lock);
 	/* Claim reserved quotas to allocated quotas */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -1715,7 +1704,7 @@ int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
 	inode_claim_rsv_space(inode, number);
 	spin_unlock(&dq_data_lock);
 	mark_all_dquot_dirty(inode->i_dquot);
-	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+	srcu_read_unlock(&dquot_srcu, index);
 	return 0;
 }
 EXPORT_SYMBOL(dquot_claim_space_nodirty);
@@ -1725,14 +1714,14 @@ EXPORT_SYMBOL(dquot_claim_space_nodirty);
  */
 void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number)
 {
-	int cnt;
+	int cnt, index;
 
 	if (!dquot_active(inode)) {
 		inode_reclaim_rsv_space(inode, number);
 		return;
 	}
 
-	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+	index = srcu_read_lock(&dquot_srcu);
 	spin_lock(&dq_data_lock);
 	/* Claim reserved quotas to allocated quotas */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -1744,7 +1733,7 @@ void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number)
 	inode_reclaim_rsv_space(inode, number);
 	spin_unlock(&dq_data_lock);
 	mark_all_dquot_dirty(inode->i_dquot);
-	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+	srcu_read_unlock(&dquot_srcu, index);
 	return;
 }
 EXPORT_SYMBOL(dquot_reclaim_space_nodirty);
@@ -1757,16 +1746,14 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags)
 	unsigned int cnt;
 	struct dquot_warn warn[MAXQUOTAS];
 	struct dquot **dquots = inode->i_dquot;
-	int reserve = flags & DQUOT_SPACE_RESERVE;
+	int reserve = flags & DQUOT_SPACE_RESERVE, index;
 
-	/* First test before acquiring mutex - solves deadlocks when we
-         * re-enter the quota code and are already holding the mutex */
 	if (!dquot_active(inode)) {
 		inode_decr_space(inode, number, reserve);
 		return;
 	}
 
-	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+	index = srcu_read_lock(&dquot_srcu);
 	spin_lock(&dq_data_lock);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		int wtype;
@@ -1789,7 +1776,7 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags)
 		goto out_unlock;
 	mark_all_dquot_dirty(dquots);
 out_unlock:
-	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+	srcu_read_unlock(&dquot_srcu, index);
 	flush_warnings(warn);
 }
 EXPORT_SYMBOL(__dquot_free_space);
@@ -1802,13 +1789,12 @@ void dquot_free_inode(const struct inode *inode)
 	unsigned int cnt;
 	struct dquot_warn warn[MAXQUOTAS];
 	struct dquot * const *dquots = inode->i_dquot;
+	int index;
 
-	/* First test before acquiring mutex - solves deadlocks when we
-         * re-enter the quota code and are already holding the mutex */
 	if (!dquot_active(inode))
 		return;
 
-	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+	index = srcu_read_lock(&dquot_srcu);
 	spin_lock(&dq_data_lock);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		int wtype;
@@ -1823,7 +1809,7 @@ void dquot_free_inode(const struct inode *inode)
 	}
 	spin_unlock(&dq_data_lock);
 	mark_all_dquot_dirty(dquots);
-	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+	srcu_read_unlock(&dquot_srcu, index);
 	flush_warnings(warn);
 }
 EXPORT_SYMBOL(dquot_free_inode);
@@ -1837,6 +1823,8 @@ EXPORT_SYMBOL(dquot_free_inode);
  * This operation can block, but only after everything is updated
  * A transaction must be started when entering this function.
  *
+ * We are holding reference on transfer_from & transfer_to, no need to
+ * protect them by srcu_read_lock().
  */
 int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
 {
@@ -1849,8 +1837,6 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
 	struct dquot_warn warn_from_inodes[MAXQUOTAS];
 	struct dquot_warn warn_from_space[MAXQUOTAS];
 
-	/* First test before acquiring mutex - solves deadlocks when we
-         * re-enter the quota code and are already holding the mutex */
 	if (IS_NOQUOTA(inode))
 		return 0;
 	/* Initialize the arrays */
@@ -1859,12 +1845,12 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
 		warn_from_inodes[cnt].w_type = QUOTA_NL_NOWARN;
 		warn_from_space[cnt].w_type = QUOTA_NL_NOWARN;
 	}
-	down_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
+
+	spin_lock(&dq_data_lock);
 	if (IS_NOQUOTA(inode)) {	/* File without quota accounting? */
-		up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
+		spin_unlock(&dq_data_lock);
 		return 0;
 	}
-	spin_lock(&dq_data_lock);
 	cur_space = inode_get_bytes(inode);
 	rsv_space = inode_get_rsv_space(inode);
 	space = cur_space + rsv_space;
@@ -1918,7 +1904,6 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
 		inode->i_dquot[cnt] = transfer_to[cnt];
 	}
 	spin_unlock(&dq_data_lock);
-	up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
 
 	mark_all_dquot_dirty(transfer_from);
 	mark_all_dquot_dirty(transfer_to);
@@ -1932,7 +1917,6 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
 	return 0;
 over_quota:
 	spin_unlock(&dq_data_lock);
-	up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
 	flush_warnings(warn_to);
 	return ret;
 }
diff --git a/fs/super.c b/fs/super.c
index d20d5b11dedf..872b26bf06dd 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -218,7 +218,6 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
 	lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key);
 	mutex_init(&s->s_dquot.dqio_mutex);
 	mutex_init(&s->s_dquot.dqonoff_mutex);
-	init_rwsem(&s->s_dquot.dqptr_sem);
 	s->s_maxbytes = MAX_NON_LFS;
 	s->s_op = &default_op;
 	s->s_time_gran = 1000000000;
diff --git a/include/linux/quota.h b/include/linux/quota.h
index 0f3c5d38da1f..80d345a3524c 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -390,7 +390,6 @@ struct quota_info {
 	unsigned int flags;			/* Flags for diskquotas on this device */
 	struct mutex dqio_mutex;		/* lock device while I/O in progress */
 	struct mutex dqonoff_mutex;		/* Serialize quotaon & quotaoff */
-	struct rw_semaphore dqptr_sem;		/* serialize ops using quota_info struct, pointers from inode to dquots */
 	struct inode *files[MAXQUOTAS];		/* inodes of quotafiles */
 	struct mem_dqinfo info[MAXQUOTAS];	/* Information for each quota type */
 	const struct quota_format_ops *ops[MAXQUOTAS];	/* Operations for each type */
-- 
cgit v1.2.3


From a509ea840b8e29e512764803e30b805c7ea89038 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Wed, 9 Jul 2014 17:45:10 +0200
Subject: ARM: mvebu: extend PMSU code to support dynamic frequency scaling

This commit adds the necessary code in the Marvell EBU PMSU driver to
support dynamic frequency scaling. In essence, what this new code does
is that it:

 * registers the frequency operating points supported by the CPU;

 * registers a clock notifier of the CPU clocks. The notifier function
   listens to the newly introduced APPLY_RATE_CHANGE event, and uses
   that to finalize the frequency transition by doing the part of the
   procedure that involves the PMSU;

 * registers a platform device for the cpufreq-generic driver, which
   will take care of the CPU frequency transitions.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Link: https://lkml.kernel.org/r/1404920715-19834-3-git-send-email-thomas.petazzoni@free-electrons.com
Signed-off-by: Jason Cooper <jason@lakedaemon.net>
---
 arch/arm/mach-mvebu/pmsu.c | 162 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mvebu-pmsu.h |  20 ++++++
 2 files changed, 182 insertions(+)
 create mode 100644 include/linux/mvebu-pmsu.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-mvebu/pmsu.c b/arch/arm/mach-mvebu/pmsu.c
index 53a55c8520bf..db7d9ab298b6 100644
--- a/arch/arm/mach-mvebu/pmsu.c
+++ b/arch/arm/mach-mvebu/pmsu.c
@@ -18,20 +18,26 @@
 
 #define pr_fmt(fmt) "mvebu-pmsu: " fmt
 
+#include <linux/clk.h>
 #include <linux/cpu_pm.h>
+#include <linux/delay.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/of_address.h>
+#include <linux/of_device.h>
 #include <linux/io.h>
 #include <linux/platform_device.h>
+#include <linux/pm_opp.h>
 #include <linux/smp.h>
 #include <linux/resource.h>
+#include <linux/slab.h>
 #include <asm/cacheflush.h>
 #include <asm/cp15.h>
 #include <asm/smp_plat.h>
 #include <asm/suspend.h>
 #include <asm/tlbflush.h>
 #include "common.h"
+#include "armada-370-xp.h"
 
 static void __iomem *pmsu_mp_base;
 
@@ -57,6 +63,10 @@ static void __iomem *pmsu_mp_base;
 #define PMSU_STATUS_AND_MASK_IRQ_MASK		BIT(24)
 #define PMSU_STATUS_AND_MASK_FIQ_MASK		BIT(25)
 
+#define PMSU_EVENT_STATUS_AND_MASK(cpu)     ((cpu * 0x100) + 0x120)
+#define PMSU_EVENT_STATUS_AND_MASK_DFS_DONE        BIT(1)
+#define PMSU_EVENT_STATUS_AND_MASK_DFS_DONE_MASK   BIT(17)
+
 #define PMSU_BOOT_ADDR_REDIRECT_OFFSET(cpu) ((cpu * 0x100) + 0x124)
 
 /* PMSU fabric registers */
@@ -296,3 +306,155 @@ int __init armada_370_xp_cpu_pm_init(void)
 
 arch_initcall(armada_370_xp_cpu_pm_init);
 early_initcall(armada_370_xp_pmsu_init);
+
+static void mvebu_pmsu_dfs_request_local(void *data)
+{
+	u32 reg;
+	u32 cpu = smp_processor_id();
+	unsigned long flags;
+
+	local_irq_save(flags);
+
+	/* Prepare to enter idle */
+	reg = readl(pmsu_mp_base + PMSU_STATUS_AND_MASK(cpu));
+	reg |= PMSU_STATUS_AND_MASK_CPU_IDLE_WAIT |
+	       PMSU_STATUS_AND_MASK_IRQ_MASK     |
+	       PMSU_STATUS_AND_MASK_FIQ_MASK;
+	writel(reg, pmsu_mp_base + PMSU_STATUS_AND_MASK(cpu));
+
+	/* Request the DFS transition */
+	reg = readl(pmsu_mp_base + PMSU_CONTROL_AND_CONFIG(cpu));
+	reg |= PMSU_CONTROL_AND_CONFIG_DFS_REQ;
+	writel(reg, pmsu_mp_base + PMSU_CONTROL_AND_CONFIG(cpu));
+
+	/* The fact of entering idle will trigger the DFS transition */
+	wfi();
+
+	/*
+	 * We're back from idle, the DFS transition has completed,
+	 * clear the idle wait indication.
+	 */
+	reg = readl(pmsu_mp_base + PMSU_STATUS_AND_MASK(cpu));
+	reg &= ~PMSU_STATUS_AND_MASK_CPU_IDLE_WAIT;
+	writel(reg, pmsu_mp_base + PMSU_STATUS_AND_MASK(cpu));
+
+	local_irq_restore(flags);
+}
+
+int mvebu_pmsu_dfs_request(int cpu)
+{
+	unsigned long timeout;
+	int hwcpu = cpu_logical_map(cpu);
+	u32 reg;
+
+	/* Clear any previous DFS DONE event */
+	reg = readl(pmsu_mp_base + PMSU_EVENT_STATUS_AND_MASK(hwcpu));
+	reg &= ~PMSU_EVENT_STATUS_AND_MASK_DFS_DONE;
+	writel(reg, pmsu_mp_base + PMSU_EVENT_STATUS_AND_MASK(hwcpu));
+
+	/* Mask the DFS done interrupt, since we are going to poll */
+	reg = readl(pmsu_mp_base + PMSU_EVENT_STATUS_AND_MASK(hwcpu));
+	reg |= PMSU_EVENT_STATUS_AND_MASK_DFS_DONE_MASK;
+	writel(reg, pmsu_mp_base + PMSU_EVENT_STATUS_AND_MASK(hwcpu));
+
+	/* Trigger the DFS on the appropriate CPU */
+	smp_call_function_single(cpu, mvebu_pmsu_dfs_request_local,
+				 NULL, false);
+
+	/* Poll until the DFS done event is generated */
+	timeout = jiffies + HZ;
+	while (time_before(jiffies, timeout)) {
+		reg = readl(pmsu_mp_base + PMSU_EVENT_STATUS_AND_MASK(hwcpu));
+		if (reg & PMSU_EVENT_STATUS_AND_MASK_DFS_DONE)
+			break;
+		udelay(10);
+	}
+
+	if (time_after(jiffies, timeout))
+		return -ETIME;
+
+	/* Restore the DFS mask to its original state */
+	reg = readl(pmsu_mp_base + PMSU_EVENT_STATUS_AND_MASK(hwcpu));
+	reg &= ~PMSU_EVENT_STATUS_AND_MASK_DFS_DONE_MASK;
+	writel(reg, pmsu_mp_base + PMSU_EVENT_STATUS_AND_MASK(hwcpu));
+
+	return 0;
+}
+
+static int __init armada_xp_pmsu_cpufreq_init(void)
+{
+	struct device_node *np;
+	struct resource res;
+	int ret, cpu;
+
+	if (!of_machine_is_compatible("marvell,armadaxp"))
+		return 0;
+
+	/*
+	 * In order to have proper cpufreq handling, we need to ensure
+	 * that the Device Tree description of the CPU clock includes
+	 * the definition of the PMU DFS registers. If not, we do not
+	 * register the clock notifier and the cpufreq driver. This
+	 * piece of code is only for compatibility with old Device
+	 * Trees.
+	 */
+	np = of_find_compatible_node(NULL, NULL, "marvell,armada-xp-cpu-clock");
+	if (!np)
+		return 0;
+
+	ret = of_address_to_resource(np, 1, &res);
+	if (ret) {
+		pr_warn(FW_WARN "not enabling cpufreq, deprecated armada-xp-cpu-clock binding\n");
+		of_node_put(np);
+		return 0;
+	}
+
+	of_node_put(np);
+
+	/*
+	 * For each CPU, this loop registers the operating points
+	 * supported (which are the nominal CPU frequency and half of
+	 * it), and registers the clock notifier that will take care
+	 * of doing the PMSU part of a frequency transition.
+	 */
+	for_each_possible_cpu(cpu) {
+		struct device *cpu_dev;
+		struct clk *clk;
+		int ret;
+
+		cpu_dev = get_cpu_device(cpu);
+		if (!cpu_dev) {
+			pr_err("Cannot get CPU %d\n", cpu);
+			continue;
+		}
+
+		clk = clk_get(cpu_dev, 0);
+		if (!clk) {
+			pr_err("Cannot get clock for CPU %d\n", cpu);
+			return -ENODEV;
+		}
+
+		/*
+		 * In case of a failure of dev_pm_opp_add(), we don't
+		 * bother with cleaning up the registered OPP (there's
+		 * no function to do so), and simply cancel the
+		 * registration of the cpufreq device.
+		 */
+		ret = dev_pm_opp_add(cpu_dev, clk_get_rate(clk), 0);
+		if (ret) {
+			clk_put(clk);
+			return ret;
+		}
+
+		ret = dev_pm_opp_add(cpu_dev, clk_get_rate(clk) / 2, 0);
+		if (ret) {
+			clk_put(clk);
+			return ret;
+		}
+	}
+
+	platform_device_register_simple("cpufreq-generic", -1, NULL, 0);
+	return 0;
+}
+
+device_initcall(armada_xp_pmsu_cpufreq_init);
diff --git a/include/linux/mvebu-pmsu.h b/include/linux/mvebu-pmsu.h
new file mode 100644
index 000000000000..b918d07efe23
--- /dev/null
+++ b/include/linux/mvebu-pmsu.h
@@ -0,0 +1,20 @@
+/*
+ * Copyright (C) 2012 Marvell
+ *
+ * Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#ifndef __MVEBU_PMSU_H__
+#define __MVEBU_PMSU_H__
+
+#ifdef CONFIG_MACH_MVEBU_V7
+int mvebu_pmsu_dfs_request(int cpu);
+#else
+static inline int mvebu_pmsu_dfs_request(int cpu) { return -ENODEV; }
+#endif
+
+#endif /* __MVEBU_PMSU_H__ */
-- 
cgit v1.2.3


From 8fe8bc7773303e3c49be348c3180bc9785104dfc Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa@the-dreams.de>
Date: Wed, 9 Jul 2014 16:25:23 +0200
Subject: i2c: s6000: remove duplicate driver

It turned out that the s6000 simply has a designware IP core and should
use the designated driver for it which is way more maintained and
feature complete. There are currently no users in tree, and not even a
toolchain for s6000 seems to be available. So, simply remove this
duplicate. If someone needs assistance in converting to the designware
driver, the i2c list will be there to help.

Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/Kconfig     |  10 -
 drivers/i2c/busses/Makefile    |   1 -
 drivers/i2c/busses/i2c-s6000.c | 404 -----------------------------------------
 drivers/i2c/busses/i2c-s6000.h |  79 --------
 include/linux/i2c/s6000.h      |  10 -
 5 files changed, 504 deletions(-)
 delete mode 100644 drivers/i2c/busses/i2c-s6000.c
 delete mode 100644 drivers/i2c/busses/i2c-s6000.h
 delete mode 100644 include/linux/i2c/s6000.h

(limited to 'include/linux')

diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index 9f7d5859cf65..d25dd120c011 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -700,16 +700,6 @@ config I2C_S3C2410
 	  Say Y here to include support for I2C controller in the
 	  Samsung SoCs.
 
-config I2C_S6000
-	tristate "S6000 I2C support"
-	depends on XTENSA_VARIANT_S6000
-	help
-	  This driver supports the on chip I2C device on the
-	  S6000 xtensa processor family.
-
-	  To compile this driver as a module, choose M here. The module
-	  will be called i2c-s6000.
-
 config I2C_SH7760
 	tristate "Renesas SH7760 I2C Controller"
 	depends on CPU_SUBTYPE_SH7760
diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile
index dd9a7f8e873f..1958b490105e 100644
--- a/drivers/i2c/busses/Makefile
+++ b/drivers/i2c/busses/Makefile
@@ -68,7 +68,6 @@ obj-$(CONFIG_I2C_QUP)		+= i2c-qup.o
 obj-$(CONFIG_I2C_RIIC)		+= i2c-riic.o
 obj-$(CONFIG_I2C_RK3X)		+= i2c-rk3x.o
 obj-$(CONFIG_I2C_S3C2410)	+= i2c-s3c2410.o
-obj-$(CONFIG_I2C_S6000)		+= i2c-s6000.o
 obj-$(CONFIG_I2C_SH7760)	+= i2c-sh7760.o
 obj-$(CONFIG_I2C_SH_MOBILE)	+= i2c-sh_mobile.o
 obj-$(CONFIG_I2C_SIMTEC)	+= i2c-simtec.o
diff --git a/drivers/i2c/busses/i2c-s6000.c b/drivers/i2c/busses/i2c-s6000.c
deleted file mode 100644
index dd186a037684..000000000000
--- a/drivers/i2c/busses/i2c-s6000.c
+++ /dev/null
@@ -1,404 +0,0 @@
-/*
- * drivers/i2c/busses/i2c-s6000.c
- *
- * Description: Driver for S6000 Family I2C Interface
- * Copyright (c) 2008 emlix GmbH
- * Author:	Oskar Schirmer <oskar@scara.com>
- *
- * Partially based on i2c-bfin-twi.c driver by <sonic.zhang@analog.com>
- * Copyright (c) 2005-2007 Analog Devices, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <linux/clk.h>
-#include <linux/err.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/i2c.h>
-#include <linux/i2c/s6000.h>
-#include <linux/timer.h>
-#include <linux/spinlock.h>
-#include <linux/completion.h>
-#include <linux/interrupt.h>
-#include <linux/platform_device.h>
-#include <linux/io.h>
-
-#include "i2c-s6000.h"
-
-#define DRV_NAME "i2c-s6000"
-
-#define POLL_TIMEOUT	(2 * HZ)
-
-struct s6i2c_if {
-	u8 __iomem		*reg; /* memory mapped registers */
-	int			irq;
-	spinlock_t		lock;
-	struct i2c_msg		*msgs; /* messages currently handled */
-	int			msgs_num; /* nb of msgs to do */
-	int			msgs_push; /* nb of msgs read/written */
-	int			msgs_done; /* nb of msgs finally handled */
-	unsigned		push; /* nb of bytes read/written in msg */
-	unsigned		done; /* nb of bytes finally handled */
-	int			timeout_count; /* timeout retries left */
-	struct timer_list	timeout_timer;
-	struct i2c_adapter	adap;
-	struct completion	complete;
-	struct clk		*clk;
-	struct resource		*res;
-};
-
-static inline u16 i2c_rd16(struct s6i2c_if *iface, unsigned n)
-{
-	return readw(iface->reg + (n));
-}
-
-static inline void i2c_wr16(struct s6i2c_if *iface, unsigned n, u16 v)
-{
-	writew(v, iface->reg + (n));
-}
-
-static inline u32 i2c_rd32(struct s6i2c_if *iface, unsigned n)
-{
-	return readl(iface->reg + (n));
-}
-
-static inline void i2c_wr32(struct s6i2c_if *iface, unsigned n, u32 v)
-{
-	writel(v, iface->reg + (n));
-}
-
-static struct s6i2c_if s6i2c_if;
-
-static void s6i2c_handle_interrupt(struct s6i2c_if *iface)
-{
-	if (i2c_rd16(iface, S6_I2C_INTRSTAT) & (1 << S6_I2C_INTR_TXABRT)) {
-		i2c_rd16(iface, S6_I2C_CLRTXABRT);
-		i2c_wr16(iface, S6_I2C_INTRMASK, 0);
-		complete(&iface->complete);
-		return;
-	}
-	if (iface->msgs_done >= iface->msgs_num) {
-		dev_err(&iface->adap.dev, "s6i2c: spurious I2C irq: %04x\n",
-			i2c_rd16(iface, S6_I2C_INTRSTAT));
-		i2c_wr16(iface, S6_I2C_INTRMASK, 0);
-		return;
-	}
-	while ((iface->msgs_push < iface->msgs_num)
-	    && (i2c_rd16(iface, S6_I2C_STATUS) & (1 << S6_I2C_STATUS_TFNF))) {
-		struct i2c_msg *m = &iface->msgs[iface->msgs_push];
-		if (!(m->flags & I2C_M_RD))
-			i2c_wr16(iface, S6_I2C_DATACMD, m->buf[iface->push]);
-		else
-			i2c_wr16(iface, S6_I2C_DATACMD,
-				 1 << S6_I2C_DATACMD_READ);
-		if (++iface->push >= m->len) {
-			iface->push = 0;
-			iface->msgs_push += 1;
-		}
-	}
-	do {
-		struct i2c_msg *m = &iface->msgs[iface->msgs_done];
-		if (!(m->flags & I2C_M_RD)) {
-			if (iface->msgs_done < iface->msgs_push)
-				iface->msgs_done += 1;
-			else
-				break;
-		} else if (i2c_rd16(iface, S6_I2C_STATUS)
-				& (1 << S6_I2C_STATUS_RFNE)) {
-			m->buf[iface->done] = i2c_rd16(iface, S6_I2C_DATACMD);
-			if (++iface->done >= m->len) {
-				iface->done = 0;
-				iface->msgs_done += 1;
-			}
-		} else{
-			break;
-		}
-	} while (iface->msgs_done < iface->msgs_num);
-	if (iface->msgs_done >= iface->msgs_num) {
-		i2c_wr16(iface, S6_I2C_INTRMASK, 1 << S6_I2C_INTR_TXABRT);
-		complete(&iface->complete);
-	} else if (iface->msgs_push >= iface->msgs_num) {
-		i2c_wr16(iface, S6_I2C_INTRMASK, (1 << S6_I2C_INTR_TXABRT) |
-						 (1 << S6_I2C_INTR_RXFULL));
-	} else {
-		i2c_wr16(iface, S6_I2C_INTRMASK, (1 << S6_I2C_INTR_TXABRT) |
-						 (1 << S6_I2C_INTR_TXEMPTY) |
-						 (1 << S6_I2C_INTR_RXFULL));
-	}
-}
-
-static irqreturn_t s6i2c_interrupt_entry(int irq, void *dev_id)
-{
-	struct s6i2c_if *iface = dev_id;
-	if (!(i2c_rd16(iface, S6_I2C_STATUS) & ((1 << S6_I2C_INTR_RXUNDER)
-					      | (1 << S6_I2C_INTR_RXOVER)
-					      | (1 << S6_I2C_INTR_RXFULL)
-					      | (1 << S6_I2C_INTR_TXOVER)
-					      | (1 << S6_I2C_INTR_TXEMPTY)
-					      | (1 << S6_I2C_INTR_RDREQ)
-					      | (1 << S6_I2C_INTR_TXABRT)
-					      | (1 << S6_I2C_INTR_RXDONE)
-					      | (1 << S6_I2C_INTR_ACTIVITY)
-					      | (1 << S6_I2C_INTR_STOPDET)
-					      | (1 << S6_I2C_INTR_STARTDET)
-					      | (1 << S6_I2C_INTR_GENCALL))))
-		return IRQ_NONE;
-
-	spin_lock(&iface->lock);
-	del_timer(&iface->timeout_timer);
-	s6i2c_handle_interrupt(iface);
-	spin_unlock(&iface->lock);
-	return IRQ_HANDLED;
-}
-
-static void s6i2c_timeout(unsigned long data)
-{
-	struct s6i2c_if *iface = (struct s6i2c_if *)data;
-	unsigned long flags;
-
-	spin_lock_irqsave(&iface->lock, flags);
-	s6i2c_handle_interrupt(iface);
-	if (--iface->timeout_count > 0) {
-		iface->timeout_timer.expires = jiffies + POLL_TIMEOUT;
-		add_timer(&iface->timeout_timer);
-	} else {
-		complete(&iface->complete);
-		i2c_wr16(iface, S6_I2C_INTRMASK, 0);
-	}
-	spin_unlock_irqrestore(&iface->lock, flags);
-}
-
-static int s6i2c_master_xfer(struct i2c_adapter *adap,
-				struct i2c_msg *msgs, int num)
-{
-	struct s6i2c_if *iface = adap->algo_data;
-	int i;
-	if (num == 0)
-		return 0;
-	if (i2c_rd16(iface, S6_I2C_STATUS) & (1 << S6_I2C_STATUS_ACTIVITY))
-		yield();
-	i2c_wr16(iface, S6_I2C_INTRMASK, 0);
-	i2c_rd16(iface, S6_I2C_CLRINTR);
-	for (i = 0; i < num; i++) {
-		if (msgs[i].flags & I2C_M_TEN) {
-			dev_err(&adap->dev,
-				"s6i2c: 10 bits addr not supported\n");
-			return -EINVAL;
-		}
-		if (msgs[i].len == 0) {
-			dev_err(&adap->dev,
-				"s6i2c: zero length message not supported\n");
-			return -EINVAL;
-		}
-		if (msgs[i].addr != msgs[0].addr) {
-			dev_err(&adap->dev,
-				"s6i2c: multiple xfer cannot change target\n");
-			return -EINVAL;
-		}
-	}
-
-	iface->msgs = msgs;
-	iface->msgs_num = num;
-	iface->msgs_push = 0;
-	iface->msgs_done = 0;
-	iface->push = 0;
-	iface->done = 0;
-	iface->timeout_count = 10;
-	i2c_wr16(iface, S6_I2C_TAR, msgs[0].addr);
-	i2c_wr16(iface, S6_I2C_ENABLE, 1);
-	i2c_wr16(iface, S6_I2C_INTRMASK, (1 << S6_I2C_INTR_TXEMPTY) |
-					 (1 << S6_I2C_INTR_TXABRT));
-
-	iface->timeout_timer.expires = jiffies + POLL_TIMEOUT;
-	add_timer(&iface->timeout_timer);
-	wait_for_completion(&iface->complete);
-	del_timer_sync(&iface->timeout_timer);
-	while (i2c_rd32(iface, S6_I2C_TXFLR) > 0)
-		schedule();
-	while (i2c_rd16(iface, S6_I2C_STATUS) & (1 << S6_I2C_STATUS_ACTIVITY))
-		schedule();
-
-	i2c_wr16(iface, S6_I2C_INTRMASK, 0);
-	i2c_wr16(iface, S6_I2C_ENABLE, 0);
-	return iface->msgs_done;
-}
-
-static u32 s6i2c_functionality(struct i2c_adapter *adap)
-{
-	return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL;
-}
-
-static struct i2c_algorithm s6i2c_algorithm = {
-	.master_xfer   = s6i2c_master_xfer,
-	.functionality = s6i2c_functionality,
-};
-
-static u16 nanoseconds_on_clk(struct s6i2c_if *iface, u32 ns)
-{
-	u32 dividend = ((clk_get_rate(iface->clk) / 1000) * ns) / 1000000;
-	if (dividend > 0xffff)
-		return 0xffff;
-	return dividend;
-}
-
-static int s6i2c_probe(struct platform_device *dev)
-{
-	struct s6i2c_if *iface = &s6i2c_if;
-	struct i2c_adapter *p_adap;
-	const char *clock;
-	int bus_num, rc;
-	spin_lock_init(&iface->lock);
-	init_completion(&iface->complete);
-	iface->irq = platform_get_irq(dev, 0);
-	if (iface->irq < 0) {
-		rc = iface->irq;
-		goto err_out;
-	}
-	iface->res = platform_get_resource(dev, IORESOURCE_MEM, 0);
-	if (!iface->res) {
-		rc = -ENXIO;
-		goto err_out;
-	}
-	iface->res = request_mem_region(iface->res->start,
-					resource_size(iface->res),
-					dev->dev.bus_id);
-	if (!iface->res) {
-		rc = -EBUSY;
-		goto err_out;
-	}
-	iface->reg = ioremap_nocache(iface->res->start,
-				     resource_size(iface->res));
-	if (!iface->reg) {
-		rc = -ENOMEM;
-		goto err_reg;
-	}
-
-	clock = 0;
-	bus_num = -1;
-	if (dev_get_platdata(&dev->dev)) {
-		struct s6_i2c_platform_data *pdata =
-			dev_get_platdata(&dev->dev);
-		bus_num = pdata->bus_num;
-		clock = pdata->clock;
-	}
-	iface->clk = clk_get(&dev->dev, clock);
-	if (IS_ERR(iface->clk)) {
-		rc = PTR_ERR(iface->clk);
-		goto err_map;
-	}
-	rc = clk_enable(iface->clk);
-	if (rc < 0)
-		goto err_clk_put;
-	init_timer(&iface->timeout_timer);
-	iface->timeout_timer.function = s6i2c_timeout;
-	iface->timeout_timer.data = (unsigned long)iface;
-
-	p_adap = &iface->adap;
-	strlcpy(p_adap->name, dev->name, sizeof(p_adap->name));
-	p_adap->algo = &s6i2c_algorithm;
-	p_adap->algo_data = iface;
-	p_adap->nr = bus_num;
-	p_adap->class = 0;
-	p_adap->dev.parent = &dev->dev;
-	i2c_wr16(iface, S6_I2C_INTRMASK, 0);
-	rc = request_irq(iface->irq, s6i2c_interrupt_entry,
-			 IRQF_SHARED, dev->name, iface);
-	if (rc) {
-		dev_err(&p_adap->dev, "s6i2c: can't get IRQ %d\n", iface->irq);
-		goto err_clk_dis;
-	}
-
-	i2c_wr16(iface, S6_I2C_ENABLE, 0);
-	udelay(1);
-	i2c_wr32(iface, S6_I2C_SRESET, 1 << S6_I2C_SRESET_IC_SRST);
-	i2c_wr16(iface, S6_I2C_CLRTXABRT, 1);
-	i2c_wr16(iface, S6_I2C_CON,
-			(1 << S6_I2C_CON_MASTER) |
-			(S6_I2C_CON_SPEED_NORMAL << S6_I2C_CON_SPEED) |
-			(0 << S6_I2C_CON_10BITSLAVE) |
-			(0 << S6_I2C_CON_10BITMASTER) |
-			(1 << S6_I2C_CON_RESTARTENA) |
-			(1 << S6_I2C_CON_SLAVEDISABLE));
-	i2c_wr16(iface, S6_I2C_SSHCNT, nanoseconds_on_clk(iface, 4000));
-	i2c_wr16(iface, S6_I2C_SSLCNT, nanoseconds_on_clk(iface, 4700));
-	i2c_wr16(iface, S6_I2C_FSHCNT, nanoseconds_on_clk(iface, 600));
-	i2c_wr16(iface, S6_I2C_FSLCNT, nanoseconds_on_clk(iface, 1300));
-	i2c_wr16(iface, S6_I2C_RXTL, 0);
-	i2c_wr16(iface, S6_I2C_TXTL, 0);
-
-	platform_set_drvdata(dev, iface);
-	rc = i2c_add_numbered_adapter(p_adap);
-	if (rc)
-		goto err_irq_free;
-	return 0;
-
-err_irq_free:
-	free_irq(iface->irq, iface);
-err_clk_dis:
-	clk_disable(iface->clk);
-err_clk_put:
-	clk_put(iface->clk);
-err_map:
-	iounmap(iface->reg);
-err_reg:
-	release_mem_region(iface->res->start,
-			   resource_size(iface->res));
-err_out:
-	return rc;
-}
-
-static int s6i2c_remove(struct platform_device *pdev)
-{
-	struct s6i2c_if *iface = platform_get_drvdata(pdev);
-	i2c_wr16(iface, S6_I2C_ENABLE, 0);
-	i2c_del_adapter(&iface->adap);
-	free_irq(iface->irq, iface);
-	clk_disable(iface->clk);
-	clk_put(iface->clk);
-	iounmap(iface->reg);
-	release_mem_region(iface->res->start,
-			   resource_size(iface->res));
-	return 0;
-}
-
-static struct platform_driver s6i2c_driver = {
-	.probe		= s6i2c_probe,
-	.remove		= s6i2c_remove,
-	.driver		= {
-		.name	= DRV_NAME,
-		.owner	= THIS_MODULE,
-	},
-};
-
-static int __init s6i2c_init(void)
-{
-	pr_info("I2C: S6000 I2C driver\n");
-	return platform_driver_register(&s6i2c_driver);
-}
-
-static void __exit s6i2c_exit(void)
-{
-	platform_driver_unregister(&s6i2c_driver);
-}
-
-MODULE_DESCRIPTION("I2C-Bus adapter routines for S6000 I2C");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:" DRV_NAME);
-
-subsys_initcall(s6i2c_init);
-module_exit(s6i2c_exit);
diff --git a/drivers/i2c/busses/i2c-s6000.h b/drivers/i2c/busses/i2c-s6000.h
deleted file mode 100644
index 4936f9f2256f..000000000000
--- a/drivers/i2c/busses/i2c-s6000.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * drivers/i2c/busses/i2c-s6000.h
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2008 Emlix GmbH <info@emlix.com>
- * Author:	Oskar Schirmer <oskar@scara.com>
- */
-
-#ifndef __DRIVERS_I2C_BUSSES_I2C_S6000_H
-#define __DRIVERS_I2C_BUSSES_I2C_S6000_H
-
-#define S6_I2C_CON		0x000
-#define S6_I2C_CON_MASTER		0
-#define S6_I2C_CON_SPEED		1
-#define S6_I2C_CON_SPEED_NORMAL			1
-#define S6_I2C_CON_SPEED_FAST			2
-#define S6_I2C_CON_SPEED_MASK			3
-#define S6_I2C_CON_10BITSLAVE		3
-#define S6_I2C_CON_10BITMASTER		4
-#define S6_I2C_CON_RESTARTENA		5
-#define S6_I2C_CON_SLAVEDISABLE		6
-#define S6_I2C_TAR		0x004
-#define S6_I2C_TAR_GCORSTART		10
-#define S6_I2C_TAR_SPECIAL		11
-#define S6_I2C_SAR		0x008
-#define S6_I2C_HSMADDR		0x00C
-#define S6_I2C_DATACMD		0x010
-#define S6_I2C_DATACMD_READ		8
-#define S6_I2C_SSHCNT		0x014
-#define S6_I2C_SSLCNT		0x018
-#define S6_I2C_FSHCNT		0x01C
-#define S6_I2C_FSLCNT		0x020
-#define S6_I2C_INTRSTAT		0x02C
-#define S6_I2C_INTRMASK		0x030
-#define S6_I2C_RAWINTR		0x034
-#define S6_I2C_INTR_RXUNDER		0
-#define S6_I2C_INTR_RXOVER		1
-#define S6_I2C_INTR_RXFULL		2
-#define S6_I2C_INTR_TXOVER		3
-#define S6_I2C_INTR_TXEMPTY		4
-#define S6_I2C_INTR_RDREQ		5
-#define S6_I2C_INTR_TXABRT		6
-#define S6_I2C_INTR_RXDONE		7
-#define S6_I2C_INTR_ACTIVITY		8
-#define S6_I2C_INTR_STOPDET		9
-#define S6_I2C_INTR_STARTDET		10
-#define S6_I2C_INTR_GENCALL		11
-#define S6_I2C_RXTL		0x038
-#define S6_I2C_TXTL		0x03C
-#define S6_I2C_CLRINTR		0x040
-#define S6_I2C_CLRRXUNDER	0x044
-#define S6_I2C_CLRRXOVER	0x048
-#define S6_I2C_CLRTXOVER	0x04C
-#define S6_I2C_CLRRDREQ		0x050
-#define S6_I2C_CLRTXABRT	0x054
-#define S6_I2C_CLRRXDONE	0x058
-#define S6_I2C_CLRACTIVITY	0x05C
-#define S6_I2C_CLRSTOPDET	0x060
-#define S6_I2C_CLRSTARTDET	0x064
-#define S6_I2C_CLRGENCALL	0x068
-#define S6_I2C_ENABLE		0x06C
-#define S6_I2C_STATUS		0x070
-#define S6_I2C_STATUS_ACTIVITY		0
-#define S6_I2C_STATUS_TFNF		1
-#define S6_I2C_STATUS_TFE		2
-#define S6_I2C_STATUS_RFNE		3
-#define S6_I2C_STATUS_RFF		4
-#define S6_I2C_TXFLR		0x074
-#define S6_I2C_RXFLR		0x078
-#define S6_I2C_SRESET		0x07C
-#define S6_I2C_SRESET_IC_SRST		0
-#define S6_I2C_SRESET_IC_MASTER_SRST	1
-#define S6_I2C_SRESET_IC_SLAVE_SRST	2
-#define S6_I2C_TXABRTSOURCE	0x080
-
-#endif
diff --git a/include/linux/i2c/s6000.h b/include/linux/i2c/s6000.h
deleted file mode 100644
index d9b34bfdae76..000000000000
--- a/include/linux/i2c/s6000.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef __LINUX_I2C_S6000_H
-#define __LINUX_I2C_S6000_H
-
-struct s6_i2c_platform_data {
-	const char *clock; /* the clock to use */
-	int bus_num; /* the bus number to register */
-};
-
-#endif
-
-- 
cgit v1.2.3


From 306a7f9139318a28063282a15b9f9ebacf09c9b9 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Thu, 17 Jul 2014 13:17:24 +0200
Subject: ARM: tegra: Move includes to include/soc/tegra

In order to not clutter the include/linux directory with SoC specific
headers, move the Tegra-specific headers out into a separate directory.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 arch/arm/mach-tegra/fuse.c          |   3 +-
 arch/arm/mach-tegra/pmc.c           |   3 +-
 arch/arm/mach-tegra/powergate.c     |   3 +-
 drivers/amba/tegra-ahb.c            |   3 +-
 drivers/clk/tegra/clk-periph-gate.c |   3 +-
 drivers/clk/tegra/clk-tegra30.c     |   5 +-
 drivers/clk/tegra/clk.c             |   3 +-
 drivers/gpu/drm/tegra/gr3d.c        |   3 +-
 drivers/gpu/drm/tegra/sor.c         |   3 +-
 drivers/iommu/tegra-smmu.c          |   3 +-
 drivers/pci/host/pci-tegra.c        |   5 +-
 include/linux/tegra-ahb.h           |  19 -----
 include/linux/tegra-cpuidle.h       |  25 -------
 include/linux/tegra-powergate.h     | 134 ------------------------------------
 include/linux/tegra-soc.h           |  22 ------
 include/soc/tegra/ahb.h             |  19 +++++
 include/soc/tegra/cpuidle.h         |  25 +++++++
 include/soc/tegra/fuse.h            |  22 ++++++
 include/soc/tegra/powergate.h       | 134 ++++++++++++++++++++++++++++++++++++
 19 files changed, 225 insertions(+), 212 deletions(-)
 delete mode 100644 include/linux/tegra-ahb.h
 delete mode 100644 include/linux/tegra-cpuidle.h
 delete mode 100644 include/linux/tegra-powergate.h
 delete mode 100644 include/linux/tegra-soc.h
 create mode 100644 include/soc/tegra/ahb.h
 create mode 100644 include/soc/tegra/cpuidle.h
 create mode 100644 include/soc/tegra/fuse.h
 create mode 100644 include/soc/tegra/powergate.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-tegra/fuse.c b/arch/arm/mach-tegra/fuse.c
index c9ac23b385be..930fef861227 100644
--- a/arch/arm/mach-tegra/fuse.c
+++ b/arch/arm/mach-tegra/fuse.c
@@ -23,7 +23,8 @@
 #include <linux/export.h>
 #include <linux/random.h>
 #include <linux/clk.h>
-#include <linux/tegra-soc.h>
+
+#include <soc/tegra/fuse.h>
 
 #include "fuse.h"
 #include "iomap.h"
diff --git a/arch/arm/mach-tegra/pmc.c b/arch/arm/mach-tegra/pmc.c
index 7c7123e7557b..0f457801eaca 100644
--- a/arch/arm/mach-tegra/pmc.c
+++ b/arch/arm/mach-tegra/pmc.c
@@ -20,7 +20,8 @@
 #include <linux/io.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
-#include <linux/tegra-powergate.h>
+
+#include <soc/tegra/powergate.h>
 
 #include "flowctrl.h"
 #include "fuse.h"
diff --git a/arch/arm/mach-tegra/powergate.c b/arch/arm/mach-tegra/powergate.c
index 4cefc5cd6bed..7b148bc6c995 100644
--- a/arch/arm/mach-tegra/powergate.c
+++ b/arch/arm/mach-tegra/powergate.c
@@ -29,7 +29,8 @@
 #include <linux/seq_file.h>
 #include <linux/spinlock.h>
 #include <linux/clk/tegra.h>
-#include <linux/tegra-powergate.h>
+
+#include <soc/tegra/powergate.h>
 
 #include "fuse.h"
 #include "iomap.h"
diff --git a/drivers/amba/tegra-ahb.c b/drivers/amba/tegra-ahb.c
index 558a239954e8..d8961ef4d2e7 100644
--- a/drivers/amba/tegra-ahb.c
+++ b/drivers/amba/tegra-ahb.c
@@ -25,7 +25,8 @@
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/io.h>
-#include <linux/tegra-ahb.h>
+
+#include <soc/tegra/ahb.h>
 
 #define DRV_NAME "tegra-ahb"
 
diff --git a/drivers/clk/tegra/clk-periph-gate.c b/drivers/clk/tegra/clk-periph-gate.c
index 507015314827..0aa8830ae7cc 100644
--- a/drivers/clk/tegra/clk-periph-gate.c
+++ b/drivers/clk/tegra/clk-periph-gate.c
@@ -20,7 +20,8 @@
 #include <linux/io.h>
 #include <linux/delay.h>
 #include <linux/err.h>
-#include <linux/tegra-soc.h>
+
+#include <soc/tegra/fuse.h>
 
 #include "clk.h"
 
diff --git a/drivers/clk/tegra/clk-tegra30.c b/drivers/clk/tegra/clk-tegra30.c
index 8b10c38b6e3c..5679ffdb3f8c 100644
--- a/drivers/clk/tegra/clk-tegra30.c
+++ b/drivers/clk/tegra/clk-tegra30.c
@@ -22,8 +22,11 @@
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/clk/tegra.h>
-#include <linux/tegra-powergate.h>
+
+#include <soc/tegra/powergate.h>
+
 #include <dt-bindings/clock/tegra30-car.h>
+
 #include "clk.h"
 #include "clk-id.h"
 
diff --git a/drivers/clk/tegra/clk.c b/drivers/clk/tegra/clk.c
index c0a7d7723510..f4503ba97400 100644
--- a/drivers/clk/tegra/clk.c
+++ b/drivers/clk/tegra/clk.c
@@ -19,7 +19,8 @@
 #include <linux/of.h>
 #include <linux/clk/tegra.h>
 #include <linux/reset-controller.h>
-#include <linux/tegra-soc.h>
+
+#include <soc/tegra/fuse.h>
 
 #include "clk.h"
 
diff --git a/drivers/gpu/drm/tegra/gr3d.c b/drivers/gpu/drm/tegra/gr3d.c
index 30f5ba9bd6d0..69974851e564 100644
--- a/drivers/gpu/drm/tegra/gr3d.c
+++ b/drivers/gpu/drm/tegra/gr3d.c
@@ -12,7 +12,8 @@
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/reset.h>
-#include <linux/tegra-powergate.h>
+
+#include <soc/tegra/powergate.h>
 
 #include "drm.h"
 #include "gem.h"
diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c
index 27c979b50111..eafd0b8a71d2 100644
--- a/drivers/gpu/drm/tegra/sor.c
+++ b/drivers/gpu/drm/tegra/sor.c
@@ -11,7 +11,8 @@
 #include <linux/io.h>
 #include <linux/platform_device.h>
 #include <linux/reset.h>
-#include <linux/tegra-powergate.h>
+
+#include <soc/tegra/powergate.h>
 
 #include <drm/drm_dp_helper.h>
 
diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index 605b5b46a903..230d06c9328b 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -35,7 +35,8 @@
 #include <linux/of_iommu.h>
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
-#include <linux/tegra-ahb.h>
+
+#include <soc/tegra/ahb.h>
 
 #include <asm/page.h>
 #include <asm/cacheflush.h>
diff --git a/drivers/pci/host/pci-tegra.c b/drivers/pci/host/pci-tegra.c
index 083cf37ca047..a2f0f88be382 100644
--- a/drivers/pci/host/pci-tegra.c
+++ b/drivers/pci/host/pci-tegra.c
@@ -41,11 +41,12 @@
 #include <linux/reset.h>
 #include <linux/sizes.h>
 #include <linux/slab.h>
-#include <linux/tegra-cpuidle.h>
-#include <linux/tegra-powergate.h>
 #include <linux/vmalloc.h>
 #include <linux/regulator/consumer.h>
 
+#include <soc/tegra/cpuidle.h>
+#include <soc/tegra/powergate.h>
+
 #include <asm/mach/irq.h>
 #include <asm/mach/map.h>
 #include <asm/mach/pci.h>
diff --git a/include/linux/tegra-ahb.h b/include/linux/tegra-ahb.h
deleted file mode 100644
index f1cd075ceee1..000000000000
--- a/include/linux/tegra-ahb.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/*
- * Copyright (c) 2012, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-
-#ifndef __LINUX_AHB_H__
-#define __LINUX_AHB_H__
-
-extern int tegra_ahb_enable_smmu(struct device_node *ahb);
-
-#endif	/* __LINUX_AHB_H__ */
diff --git a/include/linux/tegra-cpuidle.h b/include/linux/tegra-cpuidle.h
deleted file mode 100644
index 9c6286bbf662..000000000000
--- a/include/linux/tegra-cpuidle.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-
-#ifndef __LINUX_TEGRA_CPUIDLE_H__
-#define __LINUX_TEGRA_CPUIDLE_H__
-
-#ifdef CONFIG_CPU_IDLE
-void tegra_cpuidle_pcie_irqs_in_use(void);
-#else
-static inline void tegra_cpuidle_pcie_irqs_in_use(void)
-{
-}
-#endif
-
-#endif
diff --git a/include/linux/tegra-powergate.h b/include/linux/tegra-powergate.h
deleted file mode 100644
index 46f0a07812b4..000000000000
--- a/include/linux/tegra-powergate.h
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- * Copyright (c) 2010 Google, Inc
- *
- * Author:
- *	Colin Cross <ccross@google.com>
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- */
-
-#ifndef _MACH_TEGRA_POWERGATE_H_
-#define _MACH_TEGRA_POWERGATE_H_
-
-struct clk;
-struct reset_control;
-
-#define TEGRA_POWERGATE_CPU	0
-#define TEGRA_POWERGATE_3D	1
-#define TEGRA_POWERGATE_VENC	2
-#define TEGRA_POWERGATE_PCIE	3
-#define TEGRA_POWERGATE_VDEC	4
-#define TEGRA_POWERGATE_L2	5
-#define TEGRA_POWERGATE_MPE	6
-#define TEGRA_POWERGATE_HEG	7
-#define TEGRA_POWERGATE_SATA	8
-#define TEGRA_POWERGATE_CPU1	9
-#define TEGRA_POWERGATE_CPU2	10
-#define TEGRA_POWERGATE_CPU3	11
-#define TEGRA_POWERGATE_CELP	12
-#define TEGRA_POWERGATE_3D1	13
-#define TEGRA_POWERGATE_CPU0	14
-#define TEGRA_POWERGATE_C0NC	15
-#define TEGRA_POWERGATE_C1NC	16
-#define TEGRA_POWERGATE_SOR	17
-#define TEGRA_POWERGATE_DIS	18
-#define TEGRA_POWERGATE_DISB	19
-#define TEGRA_POWERGATE_XUSBA	20
-#define TEGRA_POWERGATE_XUSBB	21
-#define TEGRA_POWERGATE_XUSBC	22
-#define TEGRA_POWERGATE_VIC	23
-#define TEGRA_POWERGATE_IRAM	24
-
-#define TEGRA_POWERGATE_3D0	TEGRA_POWERGATE_3D
-
-#define TEGRA_IO_RAIL_CSIA	0
-#define TEGRA_IO_RAIL_CSIB	1
-#define TEGRA_IO_RAIL_DSI	2
-#define TEGRA_IO_RAIL_MIPI_BIAS	3
-#define TEGRA_IO_RAIL_PEX_BIAS	4
-#define TEGRA_IO_RAIL_PEX_CLK1	5
-#define TEGRA_IO_RAIL_PEX_CLK2	6
-#define TEGRA_IO_RAIL_USB0	9
-#define TEGRA_IO_RAIL_USB1	10
-#define TEGRA_IO_RAIL_USB2	11
-#define TEGRA_IO_RAIL_USB_BIAS	12
-#define TEGRA_IO_RAIL_NAND	13
-#define TEGRA_IO_RAIL_UART	14
-#define TEGRA_IO_RAIL_BB	15
-#define TEGRA_IO_RAIL_AUDIO	17
-#define TEGRA_IO_RAIL_HSIC	19
-#define TEGRA_IO_RAIL_COMP	22
-#define TEGRA_IO_RAIL_HDMI	28
-#define TEGRA_IO_RAIL_PEX_CNTRL	32
-#define TEGRA_IO_RAIL_SDMMC1	33
-#define TEGRA_IO_RAIL_SDMMC3	34
-#define TEGRA_IO_RAIL_SDMMC4	35
-#define TEGRA_IO_RAIL_CAM	36
-#define TEGRA_IO_RAIL_RES	37
-#define TEGRA_IO_RAIL_HV	38
-#define TEGRA_IO_RAIL_DSIB	39
-#define TEGRA_IO_RAIL_DSIC	40
-#define TEGRA_IO_RAIL_DSID	41
-#define TEGRA_IO_RAIL_CSIE	44
-#define TEGRA_IO_RAIL_LVDS	57
-#define TEGRA_IO_RAIL_SYS_DDC	58
-
-#ifdef CONFIG_ARCH_TEGRA
-int tegra_powergate_is_powered(int id);
-int tegra_powergate_power_on(int id);
-int tegra_powergate_power_off(int id);
-int tegra_powergate_remove_clamping(int id);
-
-/* Must be called with clk disabled, and returns with clk enabled */
-int tegra_powergate_sequence_power_up(int id, struct clk *clk,
-				      struct reset_control *rst);
-
-int tegra_io_rail_power_on(int id);
-int tegra_io_rail_power_off(int id);
-#else
-static inline int tegra_powergate_is_powered(int id)
-{
-	return -ENOSYS;
-}
-
-static inline int tegra_powergate_power_on(int id)
-{
-	return -ENOSYS;
-}
-
-static inline int tegra_powergate_power_off(int id)
-{
-	return -ENOSYS;
-}
-
-static inline int tegra_powergate_remove_clamping(int id)
-{
-	return -ENOSYS;
-}
-
-static inline int tegra_powergate_sequence_power_up(int id, struct clk *clk,
-						    struct reset_control *rst)
-{
-	return -ENOSYS;
-}
-
-static inline int tegra_io_rail_power_on(int id)
-{
-	return -ENOSYS;
-}
-
-static inline int tegra_io_rail_power_off(int id)
-{
-	return -ENOSYS;
-}
-#endif
-
-#endif /* _MACH_TEGRA_POWERGATE_H_ */
diff --git a/include/linux/tegra-soc.h b/include/linux/tegra-soc.h
deleted file mode 100644
index 95f611d78f3a..000000000000
--- a/include/linux/tegra-soc.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Copyright (c) 2012, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef __LINUX_TEGRA_SOC_H_
-#define __LINUX_TEGRA_SOC_H_
-
-u32 tegra_read_chipid(void);
-
-#endif /* __LINUX_TEGRA_SOC_H_ */
diff --git a/include/soc/tegra/ahb.h b/include/soc/tegra/ahb.h
new file mode 100644
index 000000000000..504eb6f957e5
--- /dev/null
+++ b/include/soc/tegra/ahb.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2012, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __SOC_TEGRA_AHB_H__
+#define __SOC_TEGRA_AHB_H__
+
+extern int tegra_ahb_enable_smmu(struct device_node *ahb);
+
+#endif /* __SOC_TEGRA_AHB_H__ */
diff --git a/include/soc/tegra/cpuidle.h b/include/soc/tegra/cpuidle.h
new file mode 100644
index 000000000000..ea04f4225638
--- /dev/null
+++ b/include/soc/tegra/cpuidle.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __SOC_TEGRA_CPUIDLE_H__
+#define __SOC_TEGRA_CPUIDLE_H__
+
+#ifdef CONFIG_CPU_IDLE
+void tegra_cpuidle_pcie_irqs_in_use(void);
+#else
+static inline void tegra_cpuidle_pcie_irqs_in_use(void)
+{
+}
+#endif
+
+#endif /* __SOC_TEGRA_CPUIDLE_H__ */
diff --git a/include/soc/tegra/fuse.h b/include/soc/tegra/fuse.h
new file mode 100644
index 000000000000..85f555c89ad5
--- /dev/null
+++ b/include/soc/tegra/fuse.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2012, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __SOC_TEGRA_FUSE_H__
+#define __SOC_TEGRA_FUSE_H__
+
+u32 tegra_read_chipid(void);
+
+#endif /* __SOC_TEGRA_FUSE_H__ */
diff --git a/include/soc/tegra/powergate.h b/include/soc/tegra/powergate.h
new file mode 100644
index 000000000000..c16912ed1a8d
--- /dev/null
+++ b/include/soc/tegra/powergate.h
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2010 Google, Inc
+ *
+ * Author:
+ *	Colin Cross <ccross@google.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __SOC_TEGRA_POWERGATE_H__
+#define __SOC_TEGRA_POWERGATE_H__
+
+struct clk;
+struct reset_control;
+
+#define TEGRA_POWERGATE_CPU	0
+#define TEGRA_POWERGATE_3D	1
+#define TEGRA_POWERGATE_VENC	2
+#define TEGRA_POWERGATE_PCIE	3
+#define TEGRA_POWERGATE_VDEC	4
+#define TEGRA_POWERGATE_L2	5
+#define TEGRA_POWERGATE_MPE	6
+#define TEGRA_POWERGATE_HEG	7
+#define TEGRA_POWERGATE_SATA	8
+#define TEGRA_POWERGATE_CPU1	9
+#define TEGRA_POWERGATE_CPU2	10
+#define TEGRA_POWERGATE_CPU3	11
+#define TEGRA_POWERGATE_CELP	12
+#define TEGRA_POWERGATE_3D1	13
+#define TEGRA_POWERGATE_CPU0	14
+#define TEGRA_POWERGATE_C0NC	15
+#define TEGRA_POWERGATE_C1NC	16
+#define TEGRA_POWERGATE_SOR	17
+#define TEGRA_POWERGATE_DIS	18
+#define TEGRA_POWERGATE_DISB	19
+#define TEGRA_POWERGATE_XUSBA	20
+#define TEGRA_POWERGATE_XUSBB	21
+#define TEGRA_POWERGATE_XUSBC	22
+#define TEGRA_POWERGATE_VIC	23
+#define TEGRA_POWERGATE_IRAM	24
+
+#define TEGRA_POWERGATE_3D0	TEGRA_POWERGATE_3D
+
+#define TEGRA_IO_RAIL_CSIA	0
+#define TEGRA_IO_RAIL_CSIB	1
+#define TEGRA_IO_RAIL_DSI	2
+#define TEGRA_IO_RAIL_MIPI_BIAS	3
+#define TEGRA_IO_RAIL_PEX_BIAS	4
+#define TEGRA_IO_RAIL_PEX_CLK1	5
+#define TEGRA_IO_RAIL_PEX_CLK2	6
+#define TEGRA_IO_RAIL_USB0	9
+#define TEGRA_IO_RAIL_USB1	10
+#define TEGRA_IO_RAIL_USB2	11
+#define TEGRA_IO_RAIL_USB_BIAS	12
+#define TEGRA_IO_RAIL_NAND	13
+#define TEGRA_IO_RAIL_UART	14
+#define TEGRA_IO_RAIL_BB	15
+#define TEGRA_IO_RAIL_AUDIO	17
+#define TEGRA_IO_RAIL_HSIC	19
+#define TEGRA_IO_RAIL_COMP	22
+#define TEGRA_IO_RAIL_HDMI	28
+#define TEGRA_IO_RAIL_PEX_CNTRL	32
+#define TEGRA_IO_RAIL_SDMMC1	33
+#define TEGRA_IO_RAIL_SDMMC3	34
+#define TEGRA_IO_RAIL_SDMMC4	35
+#define TEGRA_IO_RAIL_CAM	36
+#define TEGRA_IO_RAIL_RES	37
+#define TEGRA_IO_RAIL_HV	38
+#define TEGRA_IO_RAIL_DSIB	39
+#define TEGRA_IO_RAIL_DSIC	40
+#define TEGRA_IO_RAIL_DSID	41
+#define TEGRA_IO_RAIL_CSIE	44
+#define TEGRA_IO_RAIL_LVDS	57
+#define TEGRA_IO_RAIL_SYS_DDC	58
+
+#ifdef CONFIG_ARCH_TEGRA
+int tegra_powergate_is_powered(int id);
+int tegra_powergate_power_on(int id);
+int tegra_powergate_power_off(int id);
+int tegra_powergate_remove_clamping(int id);
+
+/* Must be called with clk disabled, and returns with clk enabled */
+int tegra_powergate_sequence_power_up(int id, struct clk *clk,
+				      struct reset_control *rst);
+
+int tegra_io_rail_power_on(int id);
+int tegra_io_rail_power_off(int id);
+#else
+static inline int tegra_powergate_is_powered(int id)
+{
+	return -ENOSYS;
+}
+
+static inline int tegra_powergate_power_on(int id)
+{
+	return -ENOSYS;
+}
+
+static inline int tegra_powergate_power_off(int id)
+{
+	return -ENOSYS;
+}
+
+static inline int tegra_powergate_remove_clamping(int id)
+{
+	return -ENOSYS;
+}
+
+static inline int tegra_powergate_sequence_power_up(int id, struct clk *clk,
+						    struct reset_control *rst)
+{
+	return -ENOSYS;
+}
+
+static inline int tegra_io_rail_power_on(int id)
+{
+	return -ENOSYS;
+}
+
+static inline int tegra_io_rail_power_off(int id)
+{
+	return -ENOSYS;
+}
+#endif
+
+#endif /* __SOC_TEGRA_POWERGATE_H__ */
-- 
cgit v1.2.3


From ae4b884fc6316b3190be19448cea24b020c1cad6 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@primarydata.com>
Date: Tue, 15 Jul 2014 12:59:36 -0400
Subject: nfsd: silence sparse warning about accessing credentials

sparse says:

    fs/nfsd/auth.c:31:38: warning: incorrect type in argument 1 (different address spaces)
    fs/nfsd/auth.c:31:38:    expected struct cred const *cred
    fs/nfsd/auth.c:31:38:    got struct cred const [noderef] <asn:4>*real_cred

Add a new accessor for the ->real_cred and use that to fetch the
pointer. Accessing current->real_cred directly is actually quite safe
since we know that they can't go away so this is mostly a cosmetic fixup
to silence sparse.

Signed-off-by: Jeff Layton <jlayton@primarydata.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/auth.c       | 2 +-
 include/linux/cred.h | 9 +++++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c
index 72f44823adbb..9d46a0bdd9f9 100644
--- a/fs/nfsd/auth.c
+++ b/fs/nfsd/auth.c
@@ -28,7 +28,7 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
 	validate_process_creds();
 
 	/* discard any old override before preparing the new set */
-	revert_creds(get_cred(current->real_cred));
+	revert_creds(get_cred(current_real_cred()));
 	new = prepare_creds();
 	if (!new)
 		return -ENOMEM;
diff --git a/include/linux/cred.h b/include/linux/cred.h
index f61d6c8f5ef3..b2d0820837c4 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -258,6 +258,15 @@ static inline void put_cred(const struct cred *_cred)
 #define current_cred() \
 	rcu_dereference_protected(current->cred, 1)
 
+/**
+ * current_real_cred - Access the current task's objective credentials
+ *
+ * Access the objective credentials of the current task.  RCU-safe,
+ * since nobody else can modify it.
+ */
+#define current_real_cred() \
+	rcu_dereference_protected(current->real_cred, 1)
+
 /**
  * __task_cred - Access a task's objective credentials
  * @task: The task to query
-- 
cgit v1.2.3


From e1b243772d455f3b25b410a15a5677a9e74ffa37 Mon Sep 17 00:00:00 2001
From: Alexander Shiyan <shc_work@mail.ru>
Date: Tue, 20 May 2014 20:43:51 +0400
Subject: ARM: i.MX: Remove registration helper for i.MX1 USB UDC

imx_udc driver was removed from the kernel of about 10 months ago.
This patch removes a registration helper for this driver and
orphaned driver header.

Signed-off-by: Alexander Shiyan <shc_work@mail.ru>
Signed-off-by: Shawn Guo <shawn.guo@freescale.com>
---
 arch/arm/mach-imx/devices/Kconfig            |  3 --
 arch/arm/mach-imx/devices/Makefile           |  1 -
 arch/arm/mach-imx/devices/devices-common.h   | 16 ------
 arch/arm/mach-imx/devices/platform-imx_udc.c | 75 ----------------------------
 include/linux/platform_data/usb-imx_udc.h    | 23 ---------
 5 files changed, 118 deletions(-)
 delete mode 100644 arch/arm/mach-imx/devices/platform-imx_udc.c
 delete mode 100644 include/linux/platform_data/usb-imx_udc.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-imx/devices/Kconfig b/arch/arm/mach-imx/devices/Kconfig
index 2d260a5a307c..dd7e0b45fb18 100644
--- a/arch/arm/mach-imx/devices/Kconfig
+++ b/arch/arm/mach-imx/devices/Kconfig
@@ -43,9 +43,6 @@ config IMX_HAVE_PLATFORM_IMX_SSI
 config IMX_HAVE_PLATFORM_IMX_UART
 	bool
 
-config IMX_HAVE_PLATFORM_IMX_UDC
-	bool
-
 config IMX_HAVE_PLATFORM_IPU_CORE
 	bool
 
diff --git a/arch/arm/mach-imx/devices/Makefile b/arch/arm/mach-imx/devices/Makefile
index 1cbc14cd80d1..6bb144dd680e 100644
--- a/arch/arm/mach-imx/devices/Makefile
+++ b/arch/arm/mach-imx/devices/Makefile
@@ -16,7 +16,6 @@ obj-$(CONFIG_IMX_HAVE_PLATFORM_IMX_KEYPAD) += platform-imx-keypad.o
 obj-$(CONFIG_IMX_HAVE_PLATFORM_PATA_IMX) += platform-pata_imx.o
 obj-$(CONFIG_IMX_HAVE_PLATFORM_IMX_SSI) += platform-imx-ssi.o
 obj-$(CONFIG_IMX_HAVE_PLATFORM_IMX_UART) += platform-imx-uart.o
-obj-$(CONFIG_IMX_HAVE_PLATFORM_IMX_UDC) += platform-imx_udc.o
 obj-$(CONFIG_IMX_HAVE_PLATFORM_IPU_CORE) += platform-ipu-core.o
 obj-$(CONFIG_IMX_HAVE_PLATFORM_MX1_CAMERA) += platform-mx1-camera.o
 obj-$(CONFIG_IMX_HAVE_PLATFORM_MX2_CAMERA) += platform-mx2-camera.o
diff --git a/arch/arm/mach-imx/devices/devices-common.h b/arch/arm/mach-imx/devices/devices-common.h
index 61352a80bb59..69bafc884dfa 100644
--- a/arch/arm/mach-imx/devices/devices-common.h
+++ b/arch/arm/mach-imx/devices/devices-common.h
@@ -176,22 +176,6 @@ struct platform_device *__init imx_add_imx_uart_1irq(
 		const struct imx_imx_uart_1irq_data *data,
 		const struct imxuart_platform_data *pdata);
 
-#include <linux/platform_data/usb-imx_udc.h>
-struct imx_imx_udc_data {
-	resource_size_t iobase;
-	resource_size_t iosize;
-	resource_size_t irq0;
-	resource_size_t irq1;
-	resource_size_t irq2;
-	resource_size_t irq3;
-	resource_size_t irq4;
-	resource_size_t irq5;
-	resource_size_t irq6;
-};
-struct platform_device *__init imx_add_imx_udc(
-		const struct imx_imx_udc_data *data,
-		const struct imxusb_platform_data *pdata);
-
 #include <linux/platform_data/video-mx3fb.h>
 #include <linux/platform_data/camera-mx3.h>
 struct imx_ipu_core_data {
diff --git a/arch/arm/mach-imx/devices/platform-imx_udc.c b/arch/arm/mach-imx/devices/platform-imx_udc.c
deleted file mode 100644
index 5ced7e4e2c71..000000000000
--- a/arch/arm/mach-imx/devices/platform-imx_udc.c
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (C) 2010 Pengutronix
- * Uwe Kleine-Koenig <u.kleine-koenig@pengutronix.de>
- *
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License version 2 as published by the
- * Free Software Foundation.
- */
-#include "../hardware.h"
-#include "devices-common.h"
-
-#define imx_imx_udc_data_entry_single(soc, _size)			\
-	{								\
-		.iobase = soc ## _USBD_BASE_ADDR,			\
-		.iosize = _size,					\
-		.irq0 = soc ## _INT_USBD0,				\
-		.irq1 = soc ## _INT_USBD1,				\
-		.irq2 = soc ## _INT_USBD2,				\
-		.irq3 = soc ## _INT_USBD3,				\
-		.irq4 = soc ## _INT_USBD4,				\
-		.irq5 = soc ## _INT_USBD5,				\
-		.irq6 = soc ## _INT_USBD6,				\
-	}
-
-#define imx_imx_udc_data_entry(soc, _size)				\
-	[_id] = imx_imx_udc_data_entry_single(soc, _size)
-
-#ifdef CONFIG_SOC_IMX1
-const struct imx_imx_udc_data imx1_imx_udc_data __initconst =
-	imx_imx_udc_data_entry_single(MX1, SZ_4K);
-#endif /* ifdef CONFIG_SOC_IMX1 */
-
-struct platform_device *__init imx_add_imx_udc(
-		const struct imx_imx_udc_data *data,
-		const struct imxusb_platform_data *pdata)
-{
-	struct resource res[] = {
-		{
-			.start = data->iobase,
-			.end = data->iobase + data->iosize - 1,
-			.flags = IORESOURCE_MEM,
-		}, {
-			.start = data->irq0,
-			.end = data->irq0,
-			.flags = IORESOURCE_IRQ,
-		}, {
-			.start = data->irq1,
-			.end = data->irq1,
-			.flags = IORESOURCE_IRQ,
-		}, {
-			.start = data->irq2,
-			.end = data->irq2,
-			.flags = IORESOURCE_IRQ,
-		}, {
-			.start = data->irq3,
-			.end = data->irq3,
-			.flags = IORESOURCE_IRQ,
-		}, {
-			.start = data->irq4,
-			.end = data->irq4,
-			.flags = IORESOURCE_IRQ,
-		}, {
-			.start = data->irq5,
-			.end = data->irq5,
-			.flags = IORESOURCE_IRQ,
-		}, {
-			.start = data->irq6,
-			.end = data->irq6,
-			.flags = IORESOURCE_IRQ,
-		},
-	};
-
-	return imx_add_platform_device("imx_udc", 0,
-			res, ARRAY_SIZE(res), pdata, sizeof(*pdata));
-}
diff --git a/include/linux/platform_data/usb-imx_udc.h b/include/linux/platform_data/usb-imx_udc.h
deleted file mode 100644
index be273371f34a..000000000000
--- a/include/linux/platform_data/usb-imx_udc.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- *	Copyright (C) 2008 Darius Augulis <augulis.darius@gmail.com>
- *
- *	This program is free software; you can redistribute it and/or modify
- *	it under the terms of the GNU General Public License as published by
- *	the Free Software Foundation; either version 2 of the License, or
- *	(at your option) any later version.
- *
- *	This program is distributed in the hope that it will be useful,
- *	but WITHOUT ANY WARRANTY; without even the implied warranty of
- *	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *	GNU General Public License for more details.
- */
-
-#ifndef __ASM_ARCH_MXC_USB
-#define __ASM_ARCH_MXC_USB
-
-struct imxusb_platform_data {
-	int (*init)(struct device *);
-	void (*exit)(struct device *);
-};
-
-#endif /* __ASM_ARCH_MXC_USB */
-- 
cgit v1.2.3


From 641dfe8b73e81aa38cbbeab72a480462a4987963 Mon Sep 17 00:00:00 2001
From: Shawn Guo <shawn.guo@freescale.com>
Date: Mon, 19 May 2014 20:41:52 +0800
Subject: ARM: imx: move EHCI platform defines out of platform_data header

The platform_data header usb-ehci-mxc.h has a lot of stuff used by only
IMX platform code.  They shouldn't be really in this header but a IMX
platform local header.  Create ehci.h and move these stuff into it.

Signed-off-by: Shawn Guo <shawn.guo@freescale.com>
---
 arch/arm/mach-imx/ehci-imx25.c              |  1 +
 arch/arm/mach-imx/ehci-imx27.c              |  1 +
 arch/arm/mach-imx/ehci-imx31.c              |  1 +
 arch/arm/mach-imx/ehci-imx35.c              |  1 +
 arch/arm/mach-imx/ehci-imx5.c               |  1 +
 arch/arm/mach-imx/ehci.h                    | 50 +++++++++++++++++++++++++++++
 arch/arm/mach-imx/mach-armadillo5x0.c       |  1 +
 arch/arm/mach-imx/mach-cpuimx27.c           |  1 +
 arch/arm/mach-imx/mach-cpuimx35.c           |  1 +
 arch/arm/mach-imx/mach-eukrea_cpuimx25.c    |  1 +
 arch/arm/mach-imx/mach-imx27_visstrim_m10.c |  1 +
 arch/arm/mach-imx/mach-mx25_3ds.c           |  1 +
 arch/arm/mach-imx/mach-mx27_3ds.c           |  1 +
 arch/arm/mach-imx/mach-mx31_3ds.c           |  1 +
 arch/arm/mach-imx/mach-mx31lilly.c          |  1 +
 arch/arm/mach-imx/mach-mx31lite.c           |  1 +
 arch/arm/mach-imx/mach-mx31moboard.c        |  1 +
 arch/arm/mach-imx/mach-mx35_3ds.c           |  1 +
 arch/arm/mach-imx/mach-pca100.c             |  1 +
 arch/arm/mach-imx/mach-pcm037.c             |  1 +
 arch/arm/mach-imx/mach-pcm038.c             |  1 +
 arch/arm/mach-imx/mach-pcm043.c             |  1 +
 arch/arm/mach-imx/mach-vpr200.c             |  1 +
 arch/arm/mach-imx/mx31moboard-devboard.c    |  1 +
 arch/arm/mach-imx/mx31moboard-marxbot.c     |  1 +
 arch/arm/mach-imx/mx31moboard-smartbot.c    |  1 +
 include/linux/platform_data/usb-ehci-mxc.h  | 46 --------------------------
 27 files changed, 75 insertions(+), 46 deletions(-)
 create mode 100644 arch/arm/mach-imx/ehci.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-imx/ehci-imx25.c b/arch/arm/mach-imx/ehci-imx25.c
index 134c190e3003..42a5a3d14c5f 100644
--- a/arch/arm/mach-imx/ehci-imx25.c
+++ b/arch/arm/mach-imx/ehci-imx25.c
@@ -17,6 +17,7 @@
 #include <linux/io.h>
 #include <linux/platform_data/usb-ehci-mxc.h>
 
+#include "ehci.h"
 #include "hardware.h"
 
 #define USBCTRL_OTGBASE_OFFSET	0x600
diff --git a/arch/arm/mach-imx/ehci-imx27.c b/arch/arm/mach-imx/ehci-imx27.c
index 448d9115539d..c56974346c16 100644
--- a/arch/arm/mach-imx/ehci-imx27.c
+++ b/arch/arm/mach-imx/ehci-imx27.c
@@ -17,6 +17,7 @@
 #include <linux/io.h>
 #include <linux/platform_data/usb-ehci-mxc.h>
 
+#include "ehci.h"
 #include "hardware.h"
 
 #define USBCTRL_OTGBASE_OFFSET	0x600
diff --git a/arch/arm/mach-imx/ehci-imx31.c b/arch/arm/mach-imx/ehci-imx31.c
index 05de4e1e39d7..bede21d9b981 100644
--- a/arch/arm/mach-imx/ehci-imx31.c
+++ b/arch/arm/mach-imx/ehci-imx31.c
@@ -17,6 +17,7 @@
 #include <linux/io.h>
 #include <linux/platform_data/usb-ehci-mxc.h>
 
+#include "ehci.h"
 #include "hardware.h"
 
 #define USBCTRL_OTGBASE_OFFSET	0x600
diff --git a/arch/arm/mach-imx/ehci-imx35.c b/arch/arm/mach-imx/ehci-imx35.c
index 554e7cccff53..f424a543755c 100644
--- a/arch/arm/mach-imx/ehci-imx35.c
+++ b/arch/arm/mach-imx/ehci-imx35.c
@@ -17,6 +17,7 @@
 #include <linux/io.h>
 #include <linux/platform_data/usb-ehci-mxc.h>
 
+#include "ehci.h"
 #include "hardware.h"
 
 #define USBCTRL_OTGBASE_OFFSET	0x600
diff --git a/arch/arm/mach-imx/ehci-imx5.c b/arch/arm/mach-imx/ehci-imx5.c
index e49710b10c68..74bfdd970bfe 100644
--- a/arch/arm/mach-imx/ehci-imx5.c
+++ b/arch/arm/mach-imx/ehci-imx5.c
@@ -17,6 +17,7 @@
 #include <linux/io.h>
 #include <linux/platform_data/usb-ehci-mxc.h>
 
+#include "ehci.h"
 #include "hardware.h"
 
 #define MXC_OTG_OFFSET			0
diff --git a/arch/arm/mach-imx/ehci.h b/arch/arm/mach-imx/ehci.h
new file mode 100644
index 000000000000..4f42ca380b26
--- /dev/null
+++ b/arch/arm/mach-imx/ehci.h
@@ -0,0 +1,50 @@
+#ifndef __MACH_IMX_EHCI_H
+#define __MACH_IMX_EHCI_H
+
+/* values for portsc field */
+#define MXC_EHCI_PHY_LOW_POWER_SUSPEND	(1 << 23)
+#define MXC_EHCI_FORCE_FS		(1 << 24)
+#define MXC_EHCI_UTMI_8BIT		(0 << 28)
+#define MXC_EHCI_UTMI_16BIT		(1 << 28)
+#define MXC_EHCI_SERIAL			(1 << 29)
+#define MXC_EHCI_MODE_UTMI		(0 << 30)
+#define MXC_EHCI_MODE_PHILIPS		(1 << 30)
+#define MXC_EHCI_MODE_ULPI		(2 << 30)
+#define MXC_EHCI_MODE_SERIAL		(3 << 30)
+
+/* values for flags field */
+#define MXC_EHCI_INTERFACE_DIFF_UNI	(0 << 0)
+#define MXC_EHCI_INTERFACE_DIFF_BI	(1 << 0)
+#define MXC_EHCI_INTERFACE_SINGLE_UNI	(2 << 0)
+#define MXC_EHCI_INTERFACE_SINGLE_BI	(3 << 0)
+#define MXC_EHCI_INTERFACE_MASK		(0xf)
+
+#define MXC_EHCI_POWER_PINS_ENABLED	(1 << 5)
+#define MXC_EHCI_PWR_PIN_ACTIVE_HIGH	(1 << 6)
+#define MXC_EHCI_OC_PIN_ACTIVE_LOW	(1 << 7)
+#define MXC_EHCI_TTL_ENABLED		(1 << 8)
+
+#define MXC_EHCI_INTERNAL_PHY		(1 << 9)
+#define MXC_EHCI_IPPUE_DOWN		(1 << 10)
+#define MXC_EHCI_IPPUE_UP		(1 << 11)
+#define MXC_EHCI_WAKEUP_ENABLED		(1 << 12)
+#define MXC_EHCI_ITC_NO_THRESHOLD	(1 << 13)
+
+#define MXC_USBCTRL_OFFSET		0
+#define MXC_USB_PHY_CTR_FUNC_OFFSET	0x8
+#define MXC_USB_PHY_CTR_FUNC2_OFFSET	0xc
+#define MXC_USBH2CTRL_OFFSET		0x14
+
+#define MX5_USBOTHER_REGS_OFFSET	0x800
+
+/* USB_PHY_CTRL_FUNC2*/
+#define MX5_USB_UTMI_PHYCTRL1_PLLDIV_MASK		0x3
+#define MX5_USB_UTMI_PHYCTRL1_PLLDIV_SHIFT		0
+
+int mx51_initialize_usb_hw(int port, unsigned int flags);
+int mx25_initialize_usb_hw(int port, unsigned int flags);
+int mx31_initialize_usb_hw(int port, unsigned int flags);
+int mx35_initialize_usb_hw(int port, unsigned int flags);
+int mx27_initialize_usb_hw(int port, unsigned int flags);
+
+#endif /* __MACH_IMX_EHCI_H */
diff --git a/arch/arm/mach-imx/mach-armadillo5x0.c b/arch/arm/mach-imx/mach-armadillo5x0.c
index 39406b7e3228..a7e9bd26a552 100644
--- a/arch/arm/mach-imx/mach-armadillo5x0.c
+++ b/arch/arm/mach-imx/mach-armadillo5x0.c
@@ -50,6 +50,7 @@
 #include "common.h"
 #include "devices-imx31.h"
 #include "crmregs-imx3.h"
+#include "ehci.h"
 #include "hardware.h"
 #include "iomux-mx3.h"
 #include "ulpi.h"
diff --git a/arch/arm/mach-imx/mach-cpuimx27.c b/arch/arm/mach-imx/mach-cpuimx27.c
index 75b7b6aa2720..e6d4b9929571 100644
--- a/arch/arm/mach-imx/mach-cpuimx27.c
+++ b/arch/arm/mach-imx/mach-cpuimx27.c
@@ -36,6 +36,7 @@
 
 #include "common.h"
 #include "devices-imx27.h"
+#include "ehci.h"
 #include "eukrea-baseboards.h"
 #include "hardware.h"
 #include "iomux-mx27.h"
diff --git a/arch/arm/mach-imx/mach-cpuimx35.c b/arch/arm/mach-imx/mach-cpuimx35.c
index 1ffa27169045..62a6e02f4763 100644
--- a/arch/arm/mach-imx/mach-cpuimx35.c
+++ b/arch/arm/mach-imx/mach-cpuimx35.c
@@ -39,6 +39,7 @@
 
 #include "common.h"
 #include "devices-imx35.h"
+#include "ehci.h"
 #include "eukrea-baseboards.h"
 #include "hardware.h"
 #include "iomux-mx35.h"
diff --git a/arch/arm/mach-imx/mach-eukrea_cpuimx25.c b/arch/arm/mach-imx/mach-eukrea_cpuimx25.c
index e978dda1434c..b2ee6e009fe4 100644
--- a/arch/arm/mach-imx/mach-eukrea_cpuimx25.c
+++ b/arch/arm/mach-imx/mach-eukrea_cpuimx25.c
@@ -35,6 +35,7 @@
 
 #include "common.h"
 #include "devices-imx25.h"
+#include "ehci.h"
 #include "eukrea-baseboards.h"
 #include "hardware.h"
 #include "iomux-mx25.h"
diff --git a/arch/arm/mach-imx/mach-imx27_visstrim_m10.c b/arch/arm/mach-imx/mach-imx27_visstrim_m10.c
index b61bd8ed5568..ede2bdbb5dd5 100644
--- a/arch/arm/mach-imx/mach-imx27_visstrim_m10.c
+++ b/arch/arm/mach-imx/mach-imx27_visstrim_m10.c
@@ -43,6 +43,7 @@
 
 #include "common.h"
 #include "devices-imx27.h"
+#include "ehci.h"
 #include "hardware.h"
 #include "iomux-mx27.h"
 
diff --git a/arch/arm/mach-imx/mach-mx25_3ds.c b/arch/arm/mach-imx/mach-mx25_3ds.c
index ea1fa199c148..0d01e367b062 100644
--- a/arch/arm/mach-imx/mach-mx25_3ds.c
+++ b/arch/arm/mach-imx/mach-mx25_3ds.c
@@ -39,6 +39,7 @@
 
 #include "common.h"
 #include "devices-imx25.h"
+#include "ehci.h"
 #include "hardware.h"
 #include "iomux-mx25.h"
 #include "mx25.h"
diff --git a/arch/arm/mach-imx/mach-mx27_3ds.c b/arch/arm/mach-imx/mach-mx27_3ds.c
index 435a5428a678..9ef4640f3660 100644
--- a/arch/arm/mach-imx/mach-mx27_3ds.c
+++ b/arch/arm/mach-imx/mach-mx27_3ds.c
@@ -40,6 +40,7 @@
 #include "3ds_debugboard.h"
 #include "common.h"
 #include "devices-imx27.h"
+#include "ehci.h"
 #include "hardware.h"
 #include "iomux-mx27.h"
 #include "ulpi.h"
diff --git a/arch/arm/mach-imx/mach-mx31_3ds.c b/arch/arm/mach-imx/mach-mx31_3ds.c
index 4217871a9653..453f41a2c5a9 100644
--- a/arch/arm/mach-imx/mach-mx31_3ds.c
+++ b/arch/arm/mach-imx/mach-mx31_3ds.c
@@ -40,6 +40,7 @@
 #include "3ds_debugboard.h"
 #include "common.h"
 #include "devices-imx31.h"
+#include "ehci.h"
 #include "hardware.h"
 #include "iomux-mx3.h"
 #include "ulpi.h"
diff --git a/arch/arm/mach-imx/mach-mx31lilly.c b/arch/arm/mach-imx/mach-mx31lilly.c
index eee042fa2768..e9549a3c0223 100644
--- a/arch/arm/mach-imx/mach-mx31lilly.c
+++ b/arch/arm/mach-imx/mach-mx31lilly.c
@@ -45,6 +45,7 @@
 #include "board-mx31lilly.h"
 #include "common.h"
 #include "devices-imx31.h"
+#include "ehci.h"
 #include "hardware.h"
 #include "iomux-mx3.h"
 #include "ulpi.h"
diff --git a/arch/arm/mach-imx/mach-mx31lite.c b/arch/arm/mach-imx/mach-mx31lite.c
index fa15d0b6118d..57eac6f45fab 100644
--- a/arch/arm/mach-imx/mach-mx31lite.c
+++ b/arch/arm/mach-imx/mach-mx31lite.c
@@ -42,6 +42,7 @@
 #include "board-mx31lite.h"
 #include "common.h"
 #include "devices-imx31.h"
+#include "ehci.h"
 #include "hardware.h"
 #include "iomux-mx3.h"
 #include "ulpi.h"
diff --git a/arch/arm/mach-imx/mach-mx31moboard.c b/arch/arm/mach-imx/mach-mx31moboard.c
index 08730f238449..6bed57040973 100644
--- a/arch/arm/mach-imx/mach-mx31moboard.c
+++ b/arch/arm/mach-imx/mach-mx31moboard.c
@@ -47,6 +47,7 @@
 #include "board-mx31moboard.h"
 #include "common.h"
 #include "devices-imx31.h"
+#include "ehci.h"
 #include "hardware.h"
 #include "iomux-mx3.h"
 #include "ulpi.h"
diff --git a/arch/arm/mach-imx/mach-mx35_3ds.c b/arch/arm/mach-imx/mach-mx35_3ds.c
index 4e8b184d773b..72cd77d21f63 100644
--- a/arch/arm/mach-imx/mach-mx35_3ds.c
+++ b/arch/arm/mach-imx/mach-mx35_3ds.c
@@ -50,6 +50,7 @@
 #include "3ds_debugboard.h"
 #include "common.h"
 #include "devices-imx35.h"
+#include "ehci.h"
 #include "hardware.h"
 #include "iomux-mx35.h"
 
diff --git a/arch/arm/mach-imx/mach-pca100.c b/arch/arm/mach-imx/mach-pca100.c
index 12212378c672..2d1c50bd8bdf 100644
--- a/arch/arm/mach-imx/mach-pca100.c
+++ b/arch/arm/mach-imx/mach-pca100.c
@@ -36,6 +36,7 @@
 
 #include "common.h"
 #include "devices-imx27.h"
+#include "ehci.h"
 #include "hardware.h"
 #include "iomux-mx27.h"
 #include "ulpi.h"
diff --git a/arch/arm/mach-imx/mach-pcm037.c b/arch/arm/mach-imx/mach-pcm037.c
index 81b8affb9448..8eb1570f7851 100644
--- a/arch/arm/mach-imx/mach-pcm037.c
+++ b/arch/arm/mach-imx/mach-pcm037.c
@@ -45,6 +45,7 @@
 
 #include "common.h"
 #include "devices-imx31.h"
+#include "ehci.h"
 #include "hardware.h"
 #include "iomux-mx3.h"
 #include "pcm037.h"
diff --git a/arch/arm/mach-imx/mach-pcm038.c b/arch/arm/mach-imx/mach-pcm038.c
index 6c56fb5553c7..ee862ad6b6fc 100644
--- a/arch/arm/mach-imx/mach-pcm038.c
+++ b/arch/arm/mach-imx/mach-pcm038.c
@@ -36,6 +36,7 @@
 #include "board-pcm038.h"
 #include "common.h"
 #include "devices-imx27.h"
+#include "ehci.h"
 #include "hardware.h"
 #include "iomux-mx27.h"
 #include "ulpi.h"
diff --git a/arch/arm/mach-imx/mach-pcm043.c b/arch/arm/mach-imx/mach-pcm043.c
index c62b5d261345..b623bcaca76c 100644
--- a/arch/arm/mach-imx/mach-pcm043.c
+++ b/arch/arm/mach-imx/mach-pcm043.c
@@ -35,6 +35,7 @@
 
 #include "common.h"
 #include "devices-imx35.h"
+#include "ehci.h"
 #include "hardware.h"
 #include "iomux-mx35.h"
 #include "ulpi.h"
diff --git a/arch/arm/mach-imx/mach-vpr200.c b/arch/arm/mach-imx/mach-vpr200.c
index 872b3c6ba408..97836e94451c 100644
--- a/arch/arm/mach-imx/mach-vpr200.c
+++ b/arch/arm/mach-imx/mach-vpr200.c
@@ -34,6 +34,7 @@
 
 #include "common.h"
 #include "devices-imx35.h"
+#include "ehci.h"
 #include "hardware.h"
 #include "iomux-mx35.h"
 
diff --git a/arch/arm/mach-imx/mx31moboard-devboard.c b/arch/arm/mach-imx/mx31moboard-devboard.c
index 52d5b1574721..1e4ea1640a2a 100644
--- a/arch/arm/mach-imx/mx31moboard-devboard.c
+++ b/arch/arm/mach-imx/mx31moboard-devboard.c
@@ -24,6 +24,7 @@
 
 #include "common.h"
 #include "devices-imx31.h"
+#include "ehci.h"
 #include "hardware.h"
 #include "iomux-mx3.h"
 #include "ulpi.h"
diff --git a/arch/arm/mach-imx/mx31moboard-marxbot.c b/arch/arm/mach-imx/mx31moboard-marxbot.c
index a4f43e90f3c1..699d01a4fef8 100644
--- a/arch/arm/mach-imx/mx31moboard-marxbot.c
+++ b/arch/arm/mach-imx/mx31moboard-marxbot.c
@@ -28,6 +28,7 @@
 
 #include "common.h"
 #include "devices-imx31.h"
+#include "ehci.h"
 #include "hardware.h"
 #include "iomux-mx3.h"
 #include "ulpi.h"
diff --git a/arch/arm/mach-imx/mx31moboard-smartbot.c b/arch/arm/mach-imx/mx31moboard-smartbot.c
index 04ae45dbfaa7..4b3d66eb8d34 100644
--- a/arch/arm/mach-imx/mx31moboard-smartbot.c
+++ b/arch/arm/mach-imx/mx31moboard-smartbot.c
@@ -28,6 +28,7 @@
 #include "board-mx31moboard.h"
 #include "common.h"
 #include "devices-imx31.h"
+#include "ehci.h"
 #include "hardware.h"
 #include "iomux-mx3.h"
 #include "ulpi.h"
diff --git a/include/linux/platform_data/usb-ehci-mxc.h b/include/linux/platform_data/usb-ehci-mxc.h
index 7eb9d1329671..157e71f79f99 100644
--- a/include/linux/platform_data/usb-ehci-mxc.h
+++ b/include/linux/platform_data/usb-ehci-mxc.h
@@ -1,46 +1,6 @@
 #ifndef __INCLUDE_ASM_ARCH_MXC_EHCI_H
 #define __INCLUDE_ASM_ARCH_MXC_EHCI_H
 
-/* values for portsc field */
-#define MXC_EHCI_PHY_LOW_POWER_SUSPEND	(1 << 23)
-#define MXC_EHCI_FORCE_FS		(1 << 24)
-#define MXC_EHCI_UTMI_8BIT		(0 << 28)
-#define MXC_EHCI_UTMI_16BIT		(1 << 28)
-#define MXC_EHCI_SERIAL			(1 << 29)
-#define MXC_EHCI_MODE_UTMI		(0 << 30)
-#define MXC_EHCI_MODE_PHILIPS		(1 << 30)
-#define MXC_EHCI_MODE_ULPI		(2 << 30)
-#define MXC_EHCI_MODE_SERIAL		(3 << 30)
-
-/* values for flags field */
-#define MXC_EHCI_INTERFACE_DIFF_UNI	(0 << 0)
-#define MXC_EHCI_INTERFACE_DIFF_BI	(1 << 0)
-#define MXC_EHCI_INTERFACE_SINGLE_UNI	(2 << 0)
-#define MXC_EHCI_INTERFACE_SINGLE_BI	(3 << 0)
-#define MXC_EHCI_INTERFACE_MASK		(0xf)
-
-#define MXC_EHCI_POWER_PINS_ENABLED	(1 << 5)
-#define MXC_EHCI_PWR_PIN_ACTIVE_HIGH	(1 << 6)
-#define MXC_EHCI_OC_PIN_ACTIVE_LOW	(1 << 7)
-#define MXC_EHCI_TTL_ENABLED		(1 << 8)
-
-#define MXC_EHCI_INTERNAL_PHY		(1 << 9)
-#define MXC_EHCI_IPPUE_DOWN		(1 << 10)
-#define MXC_EHCI_IPPUE_UP		(1 << 11)
-#define MXC_EHCI_WAKEUP_ENABLED		(1 << 12)
-#define MXC_EHCI_ITC_NO_THRESHOLD	(1 << 13)
-
-#define MXC_USBCTRL_OFFSET		0
-#define MXC_USB_PHY_CTR_FUNC_OFFSET	0x8
-#define MXC_USB_PHY_CTR_FUNC2_OFFSET	0xc
-#define MXC_USBH2CTRL_OFFSET		0x14
-
-#define MX5_USBOTHER_REGS_OFFSET	0x800
-
-/* USB_PHY_CTRL_FUNC2*/
-#define MX5_USB_UTMI_PHYCTRL1_PLLDIV_MASK		0x3
-#define MX5_USB_UTMI_PHYCTRL1_PLLDIV_SHIFT		0
-
 struct mxc_usbh_platform_data {
 	int (*init)(struct platform_device *pdev);
 	int (*exit)(struct platform_device *pdev);
@@ -49,11 +9,5 @@ struct mxc_usbh_platform_data {
 	struct usb_phy		*otg;
 };
 
-int mx51_initialize_usb_hw(int port, unsigned int flags);
-int mx25_initialize_usb_hw(int port, unsigned int flags);
-int mx31_initialize_usb_hw(int port, unsigned int flags);
-int mx35_initialize_usb_hw(int port, unsigned int flags);
-int mx27_initialize_usb_hw(int port, unsigned int flags);
-
 #endif /* __INCLUDE_ASM_ARCH_MXC_EHCI_H */
 
-- 
cgit v1.2.3


From 35e3bc535d0437ca5f32985a294703ce48c75d88 Mon Sep 17 00:00:00 2001
From: Alexander Shiyan <shc_work@mail.ru>
Date: Sat, 5 Jul 2014 08:51:38 +0400
Subject: ARM: i.MX: Remove i.MX1 camera support

i.MX1 camera driver has been removed by the commit 90b055898e.
This patch removes remaining support files for this camera.

Signed-off-by: Alexander Shiyan <shc_work@mail.ru>
Signed-off-by: Shawn Guo <shawn.guo@freescale.com>
---
 arch/arm/mach-imx/Makefile                      |  3 --
 arch/arm/mach-imx/devices/Kconfig               |  3 --
 arch/arm/mach-imx/devices/Makefile              |  1 -
 arch/arm/mach-imx/devices/devices-common.h      | 10 ------
 arch/arm/mach-imx/devices/platform-mx1-camera.c | 42 -------------------------
 arch/arm/mach-imx/mx1-camera-fiq-ksym.c         | 18 -----------
 arch/arm/mach-imx/mx1-camera-fiq.S              | 35 ---------------------
 include/linux/platform_data/camera-mx1.h        | 35 ---------------------
 8 files changed, 147 deletions(-)
 delete mode 100644 arch/arm/mach-imx/devices/platform-mx1-camera.c
 delete mode 100644 arch/arm/mach-imx/mx1-camera-fiq-ksym.c
 delete mode 100644 arch/arm/mach-imx/mx1-camera-fiq.S
 delete mode 100644 include/linux/platform_data/camera-mx1.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-imx/Makefile b/arch/arm/mach-imx/Makefile
index a3d9712567c9..d1c1d38f3741 100644
--- a/arch/arm/mach-imx/Makefile
+++ b/arch/arm/mach-imx/Makefile
@@ -40,9 +40,6 @@ obj-y += ssi-fiq.o
 obj-y += ssi-fiq-ksym.o
 endif
 
-# Support for CMOS sensor interface
-obj-$(CONFIG_MX1_VIDEO) += mx1-camera-fiq.o mx1-camera-fiq-ksym.o
-
 # i.MX1 based machines
 obj-$(CONFIG_ARCH_MX1ADS) += mach-mx1ads.o
 obj-$(CONFIG_MACH_SCB9328) += mach-scb9328.o
diff --git a/arch/arm/mach-imx/devices/Kconfig b/arch/arm/mach-imx/devices/Kconfig
index 119f699649a9..73a1dce6cac1 100644
--- a/arch/arm/mach-imx/devices/Kconfig
+++ b/arch/arm/mach-imx/devices/Kconfig
@@ -45,9 +45,6 @@ config IMX_HAVE_PLATFORM_IMX_UART
 config IMX_HAVE_PLATFORM_IPU_CORE
 	bool
 
-config IMX_HAVE_PLATFORM_MX1_CAMERA
-	bool
-
 config IMX_HAVE_PLATFORM_MX2_CAMERA
 	bool
 
diff --git a/arch/arm/mach-imx/devices/Makefile b/arch/arm/mach-imx/devices/Makefile
index 6bb144dd680e..8fdb12b4ca7e 100644
--- a/arch/arm/mach-imx/devices/Makefile
+++ b/arch/arm/mach-imx/devices/Makefile
@@ -17,7 +17,6 @@ obj-$(CONFIG_IMX_HAVE_PLATFORM_PATA_IMX) += platform-pata_imx.o
 obj-$(CONFIG_IMX_HAVE_PLATFORM_IMX_SSI) += platform-imx-ssi.o
 obj-$(CONFIG_IMX_HAVE_PLATFORM_IMX_UART) += platform-imx-uart.o
 obj-$(CONFIG_IMX_HAVE_PLATFORM_IPU_CORE) += platform-ipu-core.o
-obj-$(CONFIG_IMX_HAVE_PLATFORM_MX1_CAMERA) += platform-mx1-camera.o
 obj-$(CONFIG_IMX_HAVE_PLATFORM_MX2_CAMERA) += platform-mx2-camera.o
 obj-$(CONFIG_IMX_HAVE_PLATFORM_MXC_EHCI) += platform-mxc-ehci.o
 obj-$(CONFIG_IMX_HAVE_PLATFORM_MXC_MMC) += platform-mxc-mmc.o
diff --git a/arch/arm/mach-imx/devices/devices-common.h b/arch/arm/mach-imx/devices/devices-common.h
index 69bafc884dfa..67f7fb13050d 100644
--- a/arch/arm/mach-imx/devices/devices-common.h
+++ b/arch/arm/mach-imx/devices/devices-common.h
@@ -192,16 +192,6 @@ struct platform_device *__init imx_add_mx3_sdc_fb(
 		const struct imx_ipu_core_data *data,
 		struct mx3fb_platform_data *pdata);
 
-#include <linux/platform_data/camera-mx1.h>
-struct imx_mx1_camera_data {
-	resource_size_t iobase;
-	resource_size_t iosize;
-	resource_size_t irq;
-};
-struct platform_device *__init imx_add_mx1_camera(
-		const struct imx_mx1_camera_data *data,
-		const struct mx1_camera_pdata *pdata);
-
 #include <linux/platform_data/camera-mx2.h>
 struct imx_mx2_camera_data {
 	const char *devid;
diff --git a/arch/arm/mach-imx/devices/platform-mx1-camera.c b/arch/arm/mach-imx/devices/platform-mx1-camera.c
deleted file mode 100644
index 2c6788131080..000000000000
--- a/arch/arm/mach-imx/devices/platform-mx1-camera.c
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (C) 2010 Pengutronix
- * Uwe Kleine-Koenig <u.kleine-koenig@pengutronix.de>
- *
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License version 2 as published by the
- * Free Software Foundation.
- */
-#include "../hardware.h"
-#include "devices-common.h"
-
-#define imx_mx1_camera_data_entry_single(soc, _size)			\
-	{								\
-		.iobase = soc ## _CSI ## _BASE_ADDR,			\
-		.iosize = _size,					\
-		.irq = soc ## _INT_CSI,					\
-	}
-
-#ifdef CONFIG_SOC_IMX1
-const struct imx_mx1_camera_data imx1_mx1_camera_data __initconst =
-	imx_mx1_camera_data_entry_single(MX1, 10);
-#endif /* ifdef CONFIG_SOC_IMX1 */
-
-struct platform_device *__init imx_add_mx1_camera(
-		const struct imx_mx1_camera_data *data,
-		const struct mx1_camera_pdata *pdata)
-{
-	struct resource res[] = {
-		{
-			.start = data->iobase,
-			.end = data->iobase + data->iosize - 1,
-			.flags = IORESOURCE_MEM,
-		}, {
-			.start = data->irq,
-			.end = data->irq,
-			.flags = IORESOURCE_IRQ,
-		},
-	};
-	return imx_add_platform_device_dmamask("mx1-camera", 0,
-			res, ARRAY_SIZE(res),
-			pdata, sizeof(*pdata), DMA_BIT_MASK(32));
-}
diff --git a/arch/arm/mach-imx/mx1-camera-fiq-ksym.c b/arch/arm/mach-imx/mx1-camera-fiq-ksym.c
deleted file mode 100644
index fb38436ca67f..000000000000
--- a/arch/arm/mach-imx/mx1-camera-fiq-ksym.c
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * Exported ksyms of ARCH_MX1
- *
- * Copyright (C) 2008, Darius Augulis <augulis.darius@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/platform_device.h>
-#include <linux/module.h>
-
-#include <linux/platform_data/camera-mx1.h>
-
-/* IMX camera FIQ handler */
-EXPORT_SYMBOL(mx1_camera_sof_fiq_start);
-EXPORT_SYMBOL(mx1_camera_sof_fiq_end);
diff --git a/arch/arm/mach-imx/mx1-camera-fiq.S b/arch/arm/mach-imx/mx1-camera-fiq.S
deleted file mode 100644
index 9c69aa65bf17..000000000000
--- a/arch/arm/mach-imx/mx1-camera-fiq.S
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- *  Copyright (C) 2008 Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
- *
- *  Based on linux/arch/arm/lib/floppydma.S
- *      Copyright (C) 1995, 1996 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-		.text
-		.global	mx1_camera_sof_fiq_end
-		.global	mx1_camera_sof_fiq_start
-mx1_camera_sof_fiq_start:
-		@ enable dma
-		ldr	r12, [r9]
-		orr	r12, r12, #0x00000001
-		str	r12, [r9]
-		@ unmask DMA interrupt
-		ldr	r12, [r8]
-		bic	r12, r12, r13
-		str	r12, [r8]
-		@ disable SOF interrupt
-		ldr	r12, [r10]
-		bic	r12, r12, #0x00010000
-		str	r12, [r10]
-		@ clear SOF flag
-		mov	r12, #0x00010000
-		str	r12, [r11]
-		@ return from FIQ
-		subs	pc, lr, #4
-mx1_camera_sof_fiq_end:
diff --git a/include/linux/platform_data/camera-mx1.h b/include/linux/platform_data/camera-mx1.h
deleted file mode 100644
index 4fd6c70314b4..000000000000
--- a/include/linux/platform_data/camera-mx1.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * mx1_camera.h - i.MX1/i.MXL camera driver header file
- *
- * Copyright (c) 2008, Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
- * Copyright (C) 2009, Darius Augulis <augulis.darius@gmail.com>
- *
- * Based on PXA camera.h file:
- * Copyright (C) 2003, Intel Corporation
- * Copyright (C) 2008, Guennadi Liakhovetski <kernel@pengutronix.de>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef __ASM_ARCH_CAMERA_H_
-#define __ASM_ARCH_CAMERA_H_
-
-#define MX1_CAMERA_DATA_HIGH	1
-#define MX1_CAMERA_PCLK_RISING	2
-#define MX1_CAMERA_VSYNC_HIGH	4
-
-extern unsigned char mx1_camera_sof_fiq_start, mx1_camera_sof_fiq_end;
-
-/**
- * struct mx1_camera_pdata - i.MX1/i.MXL camera platform data
- * @mclk_10khz:	master clock frequency in 10kHz units
- * @flags:	MX1 camera platform flags
- */
-struct mx1_camera_pdata {
-	unsigned long mclk_10khz;
-	unsigned long flags;
-};
-
-#endif /* __ASM_ARCH_CAMERA_H_ */
-- 
cgit v1.2.3


From 3c45ddf823d679a820adddd53b52c6699c9a05ac Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 16 Jul 2014 15:38:32 -0400
Subject: svcrdma: Select NFSv4.1 backchannel transport based on forward
 channel

The current code always selects XPRT_TRANSPORT_BC_TCP for the back
channel, even when the forward channel was not TCP (eg, RDMA). When
a 4.1 mount is attempted with RDMA, the server panics in the TCP BC
code when trying to send CB_NULL.

Instead, construct the transport protocol number from the forward
channel transport or'd with XPRT_TRANSPORT_BC. Transports that do
not support bi-directional RPC will not have registered a "BC"
transport, causing create_backchannel_client() to fail immediately.

Fixes: https://bugzilla.linux-nfs.org/show_bug.cgi?id=265
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4callback.c                   | 3 ++-
 include/linux/sunrpc/svc_xprt.h          | 1 +
 net/sunrpc/svcsock.c                     | 2 ++
 net/sunrpc/xprt.c                        | 2 +-
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 1 +
 5 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index a88a93e09d69..564d72304613 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -689,7 +689,8 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
 		clp->cl_cb_session = ses;
 		args.bc_xprt = conn->cb_xprt;
 		args.prognumber = clp->cl_cb_session->se_cb_prog;
-		args.protocol = XPRT_TRANSPORT_BC_TCP;
+		args.protocol = conn->cb_xprt->xpt_class->xcl_ident |
+				XPRT_TRANSPORT_BC;
 		args.authflavor = ses->se_cb_sec.flavor;
 	}
 	/* Create RPC client */
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 7235040a19b2..5d9d6f84b382 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -33,6 +33,7 @@ struct svc_xprt_class {
 	struct svc_xprt_ops	*xcl_ops;
 	struct list_head	xcl_list;
 	u32			xcl_max_payload;
+	int			xcl_ident;
 };
 
 /*
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index b507cd327d9b..b2437ee93657 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -692,6 +692,7 @@ static struct svc_xprt_class svc_udp_class = {
 	.xcl_owner = THIS_MODULE,
 	.xcl_ops = &svc_udp_ops,
 	.xcl_max_payload = RPCSVC_MAXPAYLOAD_UDP,
+	.xcl_ident = XPRT_TRANSPORT_UDP,
 };
 
 static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)
@@ -1292,6 +1293,7 @@ static struct svc_xprt_class svc_tcp_class = {
 	.xcl_owner = THIS_MODULE,
 	.xcl_ops = &svc_tcp_ops,
 	.xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP,
+	.xcl_ident = XPRT_TRANSPORT_TCP,
 };
 
 void svc_init_xprt_sock(void)
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index c3b2b3369e52..51c63165073c 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1306,7 +1306,7 @@ struct rpc_xprt *xprt_create_transport(struct xprt_create *args)
 		}
 	}
 	spin_unlock(&xprt_list_lock);
-	printk(KERN_ERR "RPC: transport (%d) not supported\n", args->ident);
+	dprintk("RPC: transport (%d) not supported\n", args->ident);
 	return ERR_PTR(-EIO);
 
 found:
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index e7323fbbd348..06a5d9235107 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -92,6 +92,7 @@ struct svc_xprt_class svc_rdma_class = {
 	.xcl_owner = THIS_MODULE,
 	.xcl_ops = &svc_rdma_ops,
 	.xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP,
+	.xcl_ident = XPRT_TRANSPORT_RDMA,
 };
 
 struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
-- 
cgit v1.2.3


From 4362175dd65d1816a18ac3f14107d788d5fced27 Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Mon, 14 Jul 2014 18:29:16 +0100
Subject: mfd: ab8500-debugfs: BIG clean-up

When checkpatch is run on ab8500-debugfs.c it screamed blue murder!

This patch fixes up all of the errors/warnings reported:

WARNING: line over 80 characters
+		err = seq_printf(s, "  [0x%02X/0x%02X]: 0x%02X\n",

WARNING: Prefer [subsystem eg: netdev]_info([subsystem]dev, ... then dev_info(dev, ... then pr_info(...  to printk(KERN_INFO ...
+		printk(KERN_INFO" [0x%02X/0x%02X]: 0x%02X\n",

WARNING: Prefer seq_puts to seq_printf
+	seq_printf(s, AB8500_NAME_STRING " register values:\n");

WARNING: Prefer seq_puts to seq_printf
+	seq_printf(s, AB8500_NAME_STRING " register values:\n");

WARNING: Prefer [subsystem eg: netdev]_info([subsystem]dev, ... then dev_info(dev, ... then pr_info(...  to printk(KERN_INFO ...
+	printk(KERN_INFO"ab8500 register values:\n");

WARNING: Prefer [subsystem eg: netdev]_info([subsystem]dev, ... then dev_info(dev, ... then pr_info(...  to printk(KERN_INFO ...
+		printk(KERN_INFO" bank 0x%02X:\n", i);

WARNING: externs should be avoided in .c files
+extern int prcmu_abb_read(u8 slave, u8 reg, u8 *value, u8 size);

WARNING: quoted string split across lines
+	pr_info("Saving all ABB registers at \"ab8500_complete_register_dump\" "
+		"for crash analyze.\n");

WARNING: Prefer [subsystem eg: netdev]_err([subsystem]dev, ... then dev_err(dev, ... then pr_err(...  to printk(KERN_ERR ...
+		printk(KERN_ERR "abx500_set_reg failed %d, %d", err, __LINE__);

WARNING: Prefer seq_puts to seq_printf
+	seq_printf(s, "name: number:  number of: wake:\n");

WARNING: line over 80 characters
+	return single_open(file, ab8500_print_modem_registers, inode->i_private);

WARNING: line over 80 characters
+	return single_open(file, ab8500_gpadc_btemp_ball_print, inode->i_private);

WARNING: line over 80 characters
+	return single_open(file, ab8500_gpadc_main_bat_v_print, inode->i_private);

WARNING: line over 80 characters
+	vbat_true_meas_convert = ab8500_gpadc_ad_to_voltage(gpadc, VBAT_TRUE_MEAS,

WARNING: line over 80 characters
+static int ab8540_gpadc_vbat_true_meas_and_ibat_print(struct seq_file *s, void *p)

WARNING: line over 80 characters
+static const struct file_operations ab8540_gpadc_vbat_true_meas_and_ibat_fops = {

WARNING: line over 80 characters
+		vmain_l, vmain_h, btemp_l, btemp_h, vbat_l, vbat_h, ibat_l, ibat_h);

WARNING: quoted string split across lines
+		dev_err(dev, "debugfs error input: "
+			"should be egal to 1, 4, 8 or 16\n");

WARNING: Missing a blank line after declarations
+	char *s = b;
+	if ((*s == '0') && ((*(s+1) == 'x') || (*(s+1) == 'X'))) {

WARNING: simple_strtoul is obsolete, use kstrtoul instead
+			loc.mask = simple_strtoul(b, &b, 0);

WARNING: simple_strtol is obsolete, use kstrtol instead
+			loc.shift = simple_strtol(b, &b, 0);

WARNING: simple_strtoul is obsolete, use kstrtoul instead
+	loc.bank = simple_strtoul(b, &b, 0);

WARNING: simple_strtoul is obsolete, use kstrtoul instead
+	loc.addr = simple_strtoul(b, &b, 0);

WARNING: simple_strtoul is obsolete, use kstrtoul instead
+		val = simple_strtoul(b, &b, 0);

WARNING: quoted string split across lines
+	pr_warn("HWREG request: %s, %s, addr=0x%08X, mask=0x%X, shift=%d"
+			"value=0x%X\n", (write) ? "write" : "read",

WARNING: Prefer [subsystem eg: netdev]_err([subsystem]dev, ... then dev_err(dev, ... then pr_err(...  to printk(KERN_ERR ...
+		printk(KERN_ERR "sysfs_create_file failed %d\n", err);

WARNING: Prefer [subsystem eg: netdev]_err([subsystem]dev, ... then dev_err(dev, ... then pr_err(...  to printk(KERN_ERR ...
+		printk(KERN_ERR "request_threaded_irq failed %d, %lu\n",

ERROR: code indent should use tabs where possible
+                       err, user_val);$

WARNING: please, no spaces at the start of a line
+                       err, user_val);$

WARNING: Missing a blank line after declarations
+	struct resource *res;
+	debug_bank = AB8500_MISC;

ERROR: space required after that ',' (ctx:VxV)
+		sizeof(*dev_attr)*num_irqs,GFP_KERNEL);
 		                          ^

WARNING: return of an errno should typically be -ve (return -ENXIO)
+		return ENXIO;

WARNING: line over 80 characters
+	file = debugfs_create_file("register-bank", (S_IRUGO | S_IWUSR | S_IWGRP),

WARNING: line over 80 characters
+	file = debugfs_create_file("register-address", (S_IRUGO | S_IWUSR | S_IWGRP),

WARNING: line over 80 characters
+	file = debugfs_create_file("register-value", (S_IRUGO | S_IWUSR | S_IWGRP),

WARNING: line over 80 characters
+	file = debugfs_create_file("irq-subscribe", (S_IRUGO | S_IWUSR | S_IWGRP),

WARNING: line over 80 characters
+	file = debugfs_create_file("irq-unsubscribe", (S_IRUGO | S_IWUSR | S_IWGRP),

WARNING: line over 80 characters
+	file = debugfs_create_file("all-modem-registers", (S_IRUGO | S_IWUSR | S_IWGRP),

WARNING: line over 80 characters
+	file = debugfs_create_file("main_charger_v", (S_IRUGO | S_IWUSR | S_IWGRP),

WARNING: line over 80 characters
+	file = debugfs_create_file("main_charger_c", (S_IRUGO | S_IWUSR | S_IWGRP),

WARNING: line over 80 characters
+	file = debugfs_create_file("usb_charger_c", (S_IRUGO | S_IWUSR | S_IWGRP),

WARNING: line over 80 characters
+		file = debugfs_create_file("xtal_temp", (S_IRUGO | S_IWUSR | S_IWGRP),

WARNING: line over 80 characters
+			ab8500_gpadc_dir, &plf->dev, &ab8540_gpadc_xtal_temp_fops);

WARNING: line over 80 characters
+		file = debugfs_create_file("vbattruemeas", (S_IRUGO | S_IWUSR | S_IWGRP),

WARNING: line over 80 characters
+		file = debugfs_create_file("otp_calib", (S_IRUGO | S_IWUSR | S_IWGRP),

WARNING: line over 80 characters
+			ab8500_gpadc_dir, &plf->dev, &ab8540_gpadc_otp_calib_fops);

total: 2 errors, 44 warnings, 3230 lines checked

Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/ab8500-debugfs.c      | 288 +++++++++++++++++++++++---------------
 include/linux/mfd/abx500/ab8500.h |   1 +
 2 files changed, 173 insertions(+), 116 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/ab8500-debugfs.c b/drivers/mfd/ab8500-debugfs.c
index f7f271c16f2c..b2c7e3b1edfa 100644
--- a/drivers/mfd/ab8500-debugfs.c
+++ b/drivers/mfd/ab8500-debugfs.c
@@ -135,10 +135,10 @@ struct ab8500_prcmu_ranges {
 /* hwreg- "mask" and "shift" entries ressources */
 struct hwreg_cfg {
 	u32  bank;      /* target bank */
-	u32  addr;      /* target address */
+	unsigned long addr;      /* target address */
 	uint fmt;       /* format */
-	uint mask;      /* read/write mask, applied before any bit shift */
-	int  shift;     /* bit shift (read:right shift, write:left shift */
+	unsigned long mask; /* read/write mask, applied before any bit shift */
+	long shift;     /* bit shift (read:right shift, write:left shift */
 };
 /* fmt bit #0: 0=hexa, 1=dec */
 #define REG_FMT_DEC(c) ((c)->fmt & 0x1)
@@ -1304,16 +1304,17 @@ static int ab8500_registers_print(struct device *dev, u32 bank,
 			}
 
 			if (s) {
-				err = seq_printf(s, "  [0x%02X/0x%02X]: 0x%02X\n",
-					bank, reg, value);
+				err = seq_printf(s,
+						 "  [0x%02X/0x%02X]: 0x%02X\n",
+						 bank, reg, value);
 				if (err < 0) {
 					/* Error is not returned here since
 					 * the output is wanted in any case */
 					return 0;
 				}
 			} else {
-				printk(KERN_INFO" [0x%02X/0x%02X]: 0x%02X\n",
-					bank, reg, value);
+				dev_info(dev, " [0x%02X/0x%02X]: 0x%02X\n",
+					 bank, reg, value);
 			}
 		}
 	}
@@ -1325,7 +1326,7 @@ static int ab8500_print_bank_registers(struct seq_file *s, void *p)
 	struct device *dev = s->private;
 	u32 bank = debug_bank;
 
-	seq_printf(s, AB8500_NAME_STRING " register values:\n");
+	seq_puts(s, AB8500_NAME_STRING " register values:\n");
 
 	seq_printf(s, " bank 0x%02X:\n", bank);
 
@@ -1351,7 +1352,7 @@ static int ab8500_print_all_banks(struct seq_file *s, void *p)
 	struct device *dev = s->private;
 	unsigned int i;
 
-	seq_printf(s, AB8500_NAME_STRING " register values:\n");
+	seq_puts(s, AB8500_NAME_STRING " register values:\n");
 
 	for (i = 0; i < AB8500_NUM_BANKS; i++) {
 		seq_printf(s, " bank 0x%02X:\n", i);
@@ -1366,10 +1367,10 @@ void ab8500_dump_all_banks(struct device *dev)
 {
 	unsigned int i;
 
-	printk(KERN_INFO"ab8500 register values:\n");
+	dev_info(dev, "ab8500 register values:\n");
 
 	for (i = 1; i < AB8500_NUM_BANKS; i++) {
-		printk(KERN_INFO" bank 0x%02X:\n", i);
+		dev_info(dev, " bank 0x%02X:\n", i);
 		ab8500_registers_print(dev, i, NULL);
 	}
 }
@@ -1383,8 +1384,6 @@ static struct ab8500_register_dump
 	u8 value;
 } ab8500_complete_register_dump[DUMP_MAX_REGS];
 
-extern int prcmu_abb_read(u8 slave, u8 reg, u8 *value, u8 size);
-
 /* This shall only be called upon kernel panic! */
 void ab8500_dump_all_banks_to_mem(void)
 {
@@ -1392,8 +1391,7 @@ void ab8500_dump_all_banks_to_mem(void)
 	u8 bank;
 	int err = 0;
 
-	pr_info("Saving all ABB registers at \"ab8500_complete_register_dump\" "
-		"for crash analyze.\n");
+	pr_info("Saving all ABB registers for crash analysis.\n");
 
 	for (bank = 0; bank < AB8500_NUM_BANKS; bank++) {
 		for (i = 0; i < debug_ranges[bank].num_ranges; i++) {
@@ -1563,7 +1561,7 @@ static ssize_t ab8500_val_write(struct file *file,
 	err = abx500_set_register_interruptible(dev,
 		(u8)debug_bank, debug_address, (u8)user_val);
 	if (err < 0) {
-		printk(KERN_ERR "abx500_set_reg failed %d, %d", err, __LINE__);
+		pr_err("abx500_set_reg failed %d, %d", err, __LINE__);
 		return -EINVAL;
 	}
 
@@ -1595,7 +1593,7 @@ static int ab8500_interrupts_print(struct seq_file *s, void *p)
 {
 	int line;
 
-	seq_printf(s, "name: number:  number of: wake:\n");
+	seq_puts(s, "name: number:  number of: wake:\n");
 
 	for (line = 0; line < num_interrupt_lines; line++) {
 		struct irq_desc *desc = irq_to_desc(line + irq_first);
@@ -1721,7 +1719,8 @@ static int ab8500_print_modem_registers(struct seq_file *s, void *p)
 
 static int ab8500_modem_open(struct inode *inode, struct file *file)
 {
-	return single_open(file, ab8500_print_modem_registers, inode->i_private);
+	return single_open(file, ab8500_print_modem_registers,
+			   inode->i_private);
 }
 
 static const struct file_operations ab8500_modem_fops = {
@@ -1750,7 +1749,8 @@ static int ab8500_gpadc_bat_ctrl_print(struct seq_file *s, void *p)
 
 static int ab8500_gpadc_bat_ctrl_open(struct inode *inode, struct file *file)
 {
-	return single_open(file, ab8500_gpadc_bat_ctrl_print, inode->i_private);
+	return single_open(file, ab8500_gpadc_bat_ctrl_print,
+			   inode->i_private);
 }
 
 static const struct file_operations ab8500_gpadc_bat_ctrl_fops = {
@@ -1780,7 +1780,8 @@ static int ab8500_gpadc_btemp_ball_print(struct seq_file *s, void *p)
 static int ab8500_gpadc_btemp_ball_open(struct inode *inode,
 					struct file *file)
 {
-	return single_open(file, ab8500_gpadc_btemp_ball_print, inode->i_private);
+	return single_open(file, ab8500_gpadc_btemp_ball_print,
+			   inode->i_private);
 }
 
 static const struct file_operations ab8500_gpadc_btemp_ball_fops = {
@@ -1961,7 +1962,8 @@ static int ab8500_gpadc_main_bat_v_print(struct seq_file *s, void *p)
 static int ab8500_gpadc_main_bat_v_open(struct inode *inode,
 					struct file *file)
 {
-	return single_open(file, ab8500_gpadc_main_bat_v_print, inode->i_private);
+	return single_open(file, ab8500_gpadc_main_bat_v_print,
+			   inode->i_private);
 }
 
 static const struct file_operations ab8500_gpadc_main_bat_v_fops = {
@@ -2081,7 +2083,8 @@ static int ab8500_gpadc_bk_bat_v_print(struct seq_file *s, void *p)
 
 static int ab8500_gpadc_bk_bat_v_open(struct inode *inode, struct file *file)
 {
-	return single_open(file, ab8500_gpadc_bk_bat_v_print, inode->i_private);
+	return single_open(file, ab8500_gpadc_bk_bat_v_print,
+			   inode->i_private);
 }
 
 static const struct file_operations ab8500_gpadc_bk_bat_v_fops = {
@@ -2110,7 +2113,8 @@ static int ab8500_gpadc_die_temp_print(struct seq_file *s, void *p)
 
 static int ab8500_gpadc_die_temp_open(struct inode *inode, struct file *file)
 {
-	return single_open(file, ab8500_gpadc_die_temp_print, inode->i_private);
+	return single_open(file, ab8500_gpadc_die_temp_print,
+			   inode->i_private);
 }
 
 static const struct file_operations ab8500_gpadc_die_temp_fops = {
@@ -2189,8 +2193,9 @@ static int ab8540_gpadc_vbat_true_meas_print(struct seq_file *s, void *p)
 	gpadc = ab8500_gpadc_get("ab8500-gpadc.0");
 	vbat_true_meas_raw = ab8500_gpadc_read_raw(gpadc, VBAT_TRUE_MEAS,
 		avg_sample, trig_edge, trig_timer, conv_type);
-	vbat_true_meas_convert = ab8500_gpadc_ad_to_voltage(gpadc, VBAT_TRUE_MEAS,
-		vbat_true_meas_raw);
+	vbat_true_meas_convert =
+		ab8500_gpadc_ad_to_voltage(gpadc, VBAT_TRUE_MEAS,
+					   vbat_true_meas_raw);
 
 	return seq_printf(s, "%d,0x%X\n",
 		vbat_true_meas_convert, vbat_true_meas_raw);
@@ -2284,7 +2289,8 @@ static const struct file_operations ab8540_gpadc_vbat_meas_and_ibat_fops = {
 	.owner = THIS_MODULE,
 };
 
-static int ab8540_gpadc_vbat_true_meas_and_ibat_print(struct seq_file *s, void *p)
+static int ab8540_gpadc_vbat_true_meas_and_ibat_print(struct seq_file *s,
+						      void *p)
 {
 	int vbat_true_meas_raw;
 	int vbat_true_meas_convert;
@@ -2313,7 +2319,8 @@ static int ab8540_gpadc_vbat_true_meas_and_ibat_open(struct inode *inode,
 		inode->i_private);
 }
 
-static const struct file_operations ab8540_gpadc_vbat_true_meas_and_ibat_fops = {
+static const struct file_operations
+ab8540_gpadc_vbat_true_meas_and_ibat_fops = {
 	.open = ab8540_gpadc_vbat_true_meas_and_ibat_open,
 	.read = seq_read,
 	.llseek = seq_lseek,
@@ -2367,14 +2374,15 @@ static int ab8540_gpadc_otp_cal_print(struct seq_file *s, void *p)
 	ab8540_gpadc_get_otp(gpadc, &vmain_l, &vmain_h, &btemp_l, &btemp_h,
 			&vbat_l, &vbat_h, &ibat_l, &ibat_h);
 	return seq_printf(s, "VMAIN_L:0x%X\n"
-		"VMAIN_H:0x%X\n"
-		"BTEMP_L:0x%X\n"
-		"BTEMP_H:0x%X\n"
-		"VBAT_L:0x%X\n"
-		"VBAT_H:0x%X\n"
-		"IBAT_L:0x%X\n"
-		"IBAT_H:0x%X\n",
-		vmain_l, vmain_h, btemp_l, btemp_h, vbat_l, vbat_h, ibat_l, ibat_h);
+			  "VMAIN_H:0x%X\n"
+			  "BTEMP_L:0x%X\n"
+			  "BTEMP_H:0x%X\n"
+			  "VBAT_L:0x%X\n"
+			  "VBAT_H:0x%X\n"
+			  "IBAT_L:0x%X\n"
+			  "IBAT_H:0x%X\n",
+			  vmain_l, vmain_h, btemp_l, btemp_h,
+			  vbat_l, vbat_h, ibat_l, ibat_h);
 }
 
 static int ab8540_gpadc_otp_cal_open(struct inode *inode, struct file *file)
@@ -2418,8 +2426,8 @@ static ssize_t ab8500_gpadc_avg_sample_write(struct file *file,
 			|| (user_avg_sample == SAMPLE_16)) {
 		avg_sample = (u8) user_avg_sample;
 	} else {
-		dev_err(dev, "debugfs error input: "
-			"should be egal to 1, 4, 8 or 16\n");
+		dev_err(dev,
+			"debugfs err input: should be egal to 1, 4, 8 or 16\n");
 		return -EINVAL;
 	}
 
@@ -2578,6 +2586,7 @@ static const struct file_operations ab8500_gpadc_conv_type_fops = {
 static int strval_len(char *b)
 {
 	char *s = b;
+
 	if ((*s == '0') && ((*(s+1) == 'x') || (*(s+1) == 'X'))) {
 		s += 2;
 		for (; *s && (*s != ' ') && (*s != '\n'); s++) {
@@ -2642,13 +2651,17 @@ static ssize_t hwreg_common_write(char *b, struct hwreg_cfg *cfg,
 			b += (*(b+2) == ' ') ? 3 : 6;
 			if (strval_len(b) == 0)
 				return -EINVAL;
-			loc.mask = simple_strtoul(b, &b, 0);
+			ret = kstrtoul(b, 0, &loc.mask);
+			if (ret)
+				return ret;
 		} else if ((!strncmp(b, "-s ", 3)) ||
 				(!strncmp(b, "-shift ", 7))) {
 			b += (*(b+2) == ' ') ? 3 : 7;
 			if (strval_len(b) == 0)
 				return -EINVAL;
-			loc.shift = simple_strtol(b, &b, 0);
+			ret = kstrtol(b, 0, &loc.shift);
+			if (ret)
+				return ret;
 		} else {
 			return -EINVAL;
 		}
@@ -2656,29 +2669,36 @@ static ssize_t hwreg_common_write(char *b, struct hwreg_cfg *cfg,
 	/* get arg BANK and ADDRESS */
 	if (strval_len(b) == 0)
 		return -EINVAL;
-	loc.bank = simple_strtoul(b, &b, 0);
+	ret = kstrtouint(b, 0, &loc.bank);
+	if (ret)
+		return ret;
 	while (*b == ' ')
 		b++;
 	if (strval_len(b) == 0)
 		return -EINVAL;
-	loc.addr = simple_strtoul(b, &b, 0);
+	ret = kstrtoul(b, 0, &loc.addr);
+	if (ret)
+		return ret;
 
 	if (write) {
 		while (*b == ' ')
 			b++;
 		if (strval_len(b) == 0)
 			return -EINVAL;
-		val = simple_strtoul(b, &b, 0);
+		ret = kstrtouint(b, 0, &val);
+		if (ret)
+			return ret;
 	}
 
 	/* args are ok, update target cfg (mainly for read) */
 	*cfg = loc;
 
 #ifdef ABB_HWREG_DEBUG
-	pr_warn("HWREG request: %s, %s, addr=0x%08X, mask=0x%X, shift=%d"
-			"value=0x%X\n", (write) ? "write" : "read",
-			REG_FMT_DEC(cfg) ? "decimal" : "hexa",
-			cfg->addr, cfg->mask, cfg->shift, val);
+	pr_warn("HWREG request: %s, %s,\n"
+		"  addr=0x%08X, mask=0x%X, shift=%d" "value=0x%X\n",
+		(write) ? "write" : "read",
+		REG_FMT_DEC(cfg) ? "decimal" : "hexa",
+		cfg->addr, cfg->mask, cfg->shift, val);
 #endif
 
 	if (!write)
@@ -2814,7 +2834,7 @@ static ssize_t ab8500_subscribe_write(struct file *file,
 	dev_attr[irq_index]->attr.mode = S_IRUGO;
 	err = sysfs_create_file(&dev->kobj, &dev_attr[irq_index]->attr);
 	if (err < 0) {
-		printk(KERN_ERR "sysfs_create_file failed %d\n", err);
+		pr_info("sysfs_create_file failed %d\n", err);
 		return err;
 	}
 
@@ -2822,8 +2842,8 @@ static ssize_t ab8500_subscribe_write(struct file *file,
 				   IRQF_SHARED | IRQF_NO_SUSPEND,
 				   "ab8500-debug", &dev->kobj);
 	if (err < 0) {
-		printk(KERN_ERR "request_threaded_irq failed %d, %lu\n",
-                       err, user_val);
+		pr_info("request_threaded_irq failed %d, %lu\n",
+			err, user_val);
 		sysfs_remove_file(&dev->kobj, &dev_attr[irq_index]->attr);
 		return err;
 	}
@@ -2945,6 +2965,7 @@ static int ab8500_debug_probe(struct platform_device *plf)
 	struct dentry *file;
 	struct ab8500 *ab8500;
 	struct resource *res;
+
 	debug_bank = AB8500_MISC;
 	debug_address = AB8500_REV_REG & 0x00FF;
 
@@ -2957,7 +2978,7 @@ static int ab8500_debug_probe(struct platform_device *plf)
 		return -ENOMEM;
 
 	dev_attr = devm_kzalloc(&plf->dev,
-				sizeof(*dev_attr)*num_irqs,GFP_KERNEL);
+				sizeof(*dev_attr)*num_irqs, GFP_KERNEL);
 	if (!dev_attr)
 		return -ENOMEM;
 
@@ -2968,23 +2989,20 @@ static int ab8500_debug_probe(struct platform_device *plf)
 
 	res = platform_get_resource_byname(plf, 0, "IRQ_AB8500");
 	if (!res) {
-		dev_err(&plf->dev, "AB8500 irq not found, err %d\n",
-			irq_first);
-		return ENXIO;
+		dev_err(&plf->dev, "AB8500 irq not found, err %d\n", irq_first);
+		return -ENXIO;
 	}
 	irq_ab8500 = res->start;
 
 	irq_first = platform_get_irq_byname(plf, "IRQ_FIRST");
 	if (irq_first < 0) {
-		dev_err(&plf->dev, "First irq not found, err %d\n",
-			irq_first);
+		dev_err(&plf->dev, "First irq not found, err %d\n", irq_first);
 		return irq_first;
 	}
 
 	irq_last = platform_get_irq_byname(plf, "IRQ_LAST");
 	if (irq_last < 0) {
-		dev_err(&plf->dev, "Last irq not found, err %d\n",
-			irq_last);
+		dev_err(&plf->dev, "Last irq not found, err %d\n", irq_last);
 		return irq_last;
 	}
 
@@ -2993,37 +3011,41 @@ static int ab8500_debug_probe(struct platform_device *plf)
 		goto err;
 
 	ab8500_gpadc_dir = debugfs_create_dir(AB8500_ADC_NAME_STRING,
-		ab8500_dir);
+					      ab8500_dir);
 	if (!ab8500_gpadc_dir)
 		goto err;
 
-	file = debugfs_create_file("all-bank-registers", S_IRUGO,
-		ab8500_dir, &plf->dev, &ab8500_registers_fops);
+	file = debugfs_create_file("all-bank-registers", S_IRUGO, ab8500_dir,
+				   &plf->dev, &ab8500_registers_fops);
 	if (!file)
 		goto err;
 
-	file = debugfs_create_file("all-banks", S_IRUGO,
-		ab8500_dir, &plf->dev, &ab8500_all_banks_fops);
+	file = debugfs_create_file("all-banks", S_IRUGO, ab8500_dir,
+				   &plf->dev, &ab8500_all_banks_fops);
 	if (!file)
 		goto err;
 
-	file = debugfs_create_file("register-bank", (S_IRUGO | S_IWUSR | S_IWGRP),
-		ab8500_dir, &plf->dev, &ab8500_bank_fops);
+	file = debugfs_create_file("register-bank",
+				   (S_IRUGO | S_IWUSR | S_IWGRP),
+				   ab8500_dir, &plf->dev, &ab8500_bank_fops);
 	if (!file)
 		goto err;
 
-	file = debugfs_create_file("register-address", (S_IRUGO | S_IWUSR | S_IWGRP),
-		ab8500_dir, &plf->dev, &ab8500_address_fops);
+	file = debugfs_create_file("register-address",
+				   (S_IRUGO | S_IWUSR | S_IWGRP),
+				   ab8500_dir, &plf->dev, &ab8500_address_fops);
 	if (!file)
 		goto err;
 
-	file = debugfs_create_file("register-value", (S_IRUGO | S_IWUSR | S_IWGRP),
-		ab8500_dir, &plf->dev, &ab8500_val_fops);
+	file = debugfs_create_file("register-value",
+				   (S_IRUGO | S_IWUSR | S_IWGRP),
+				   ab8500_dir, &plf->dev, &ab8500_val_fops);
 	if (!file)
 		goto err;
 
-	file = debugfs_create_file("irq-subscribe", (S_IRUGO | S_IWUSR | S_IWGRP),
-		ab8500_dir, &plf->dev, &ab8500_subscribe_fops);
+	file = debugfs_create_file("irq-subscribe",
+				   (S_IRUGO | S_IWUSR | S_IWGRP), ab8500_dir,
+				   &plf->dev, &ab8500_subscribe_fops);
 	if (!file)
 		goto err;
 
@@ -3041,150 +3063,184 @@ static int ab8500_debug_probe(struct platform_device *plf)
 		num_interrupt_lines = AB8540_NR_IRQS;
 	}
 
-	file = debugfs_create_file("interrupts", (S_IRUGO),
-		ab8500_dir, &plf->dev, &ab8500_interrupts_fops);
+	file = debugfs_create_file("interrupts", (S_IRUGO), ab8500_dir,
+				   &plf->dev, &ab8500_interrupts_fops);
 	if (!file)
 		goto err;
 
-	file = debugfs_create_file("irq-unsubscribe", (S_IRUGO | S_IWUSR | S_IWGRP),
-		ab8500_dir, &plf->dev, &ab8500_unsubscribe_fops);
+	file = debugfs_create_file("irq-unsubscribe",
+				   (S_IRUGO | S_IWUSR | S_IWGRP), ab8500_dir,
+				   &plf->dev, &ab8500_unsubscribe_fops);
 	if (!file)
 		goto err;
 
 	file = debugfs_create_file("hwreg", (S_IRUGO | S_IWUSR | S_IWGRP),
-		ab8500_dir, &plf->dev, &ab8500_hwreg_fops);
+				   ab8500_dir, &plf->dev, &ab8500_hwreg_fops);
 	if (!file)
 		goto err;
 
-	file = debugfs_create_file("all-modem-registers", (S_IRUGO | S_IWUSR | S_IWGRP),
-		ab8500_dir, &plf->dev, &ab8500_modem_fops);
+	file = debugfs_create_file("all-modem-registers",
+				   (S_IRUGO | S_IWUSR | S_IWGRP),
+				   ab8500_dir, &plf->dev, &ab8500_modem_fops);
 	if (!file)
 		goto err;
 
 	file = debugfs_create_file("bat_ctrl", (S_IRUGO | S_IWUSR | S_IWGRP),
-		ab8500_gpadc_dir, &plf->dev, &ab8500_gpadc_bat_ctrl_fops);
+				   ab8500_gpadc_dir, &plf->dev,
+				   &ab8500_gpadc_bat_ctrl_fops);
 	if (!file)
 		goto err;
 
 	file = debugfs_create_file("btemp_ball", (S_IRUGO | S_IWUSR | S_IWGRP),
-		ab8500_gpadc_dir, &plf->dev, &ab8500_gpadc_btemp_ball_fops);
+				   ab8500_gpadc_dir,
+				   &plf->dev, &ab8500_gpadc_btemp_ball_fops);
 	if (!file)
 		goto err;
 
-	file = debugfs_create_file("main_charger_v", (S_IRUGO | S_IWUSR | S_IWGRP),
-		ab8500_gpadc_dir, &plf->dev, &ab8500_gpadc_main_charger_v_fops);
+	file = debugfs_create_file("main_charger_v",
+				   (S_IRUGO | S_IWUSR | S_IWGRP),
+				   ab8500_gpadc_dir, &plf->dev,
+				   &ab8500_gpadc_main_charger_v_fops);
 	if (!file)
 		goto err;
 
-	file = debugfs_create_file("acc_detect1", (S_IRUGO | S_IWUSR | S_IWGRP),
-		ab8500_gpadc_dir, &plf->dev, &ab8500_gpadc_acc_detect1_fops);
+	file = debugfs_create_file("acc_detect1",
+				   (S_IRUGO | S_IWUSR | S_IWGRP),
+				   ab8500_gpadc_dir, &plf->dev,
+				   &ab8500_gpadc_acc_detect1_fops);
 	if (!file)
 		goto err;
 
-	file = debugfs_create_file("acc_detect2", (S_IRUGO | S_IWUSR | S_IWGRP),
-		ab8500_gpadc_dir, &plf->dev, &ab8500_gpadc_acc_detect2_fops);
+	file = debugfs_create_file("acc_detect2",
+				   (S_IRUGO | S_IWUSR | S_IWGRP),
+				   ab8500_gpadc_dir, &plf->dev,
+				   &ab8500_gpadc_acc_detect2_fops);
 	if (!file)
 		goto err;
 
 	file = debugfs_create_file("adc_aux1", (S_IRUGO | S_IWUSR | S_IWGRP),
-		ab8500_gpadc_dir, &plf->dev, &ab8500_gpadc_aux1_fops);
+				   ab8500_gpadc_dir, &plf->dev,
+				   &ab8500_gpadc_aux1_fops);
 	if (!file)
 		goto err;
 
 	file = debugfs_create_file("adc_aux2", (S_IRUGO | S_IWUSR | S_IWGRP),
-		ab8500_gpadc_dir, &plf->dev, &ab8500_gpadc_aux2_fops);
+				   ab8500_gpadc_dir, &plf->dev,
+				   &ab8500_gpadc_aux2_fops);
 	if (!file)
 		goto err;
 
 	file = debugfs_create_file("main_bat_v", (S_IRUGO | S_IWUSR | S_IWGRP),
-		ab8500_gpadc_dir, &plf->dev, &ab8500_gpadc_main_bat_v_fops);
+				   ab8500_gpadc_dir, &plf->dev,
+				   &ab8500_gpadc_main_bat_v_fops);
 	if (!file)
 		goto err;
 
 	file = debugfs_create_file("vbus_v", (S_IRUGO | S_IWUSR | S_IWGRP),
-		ab8500_gpadc_dir, &plf->dev, &ab8500_gpadc_vbus_v_fops);
+				   ab8500_gpadc_dir, &plf->dev,
+				   &ab8500_gpadc_vbus_v_fops);
 	if (!file)
 		goto err;
 
-	file = debugfs_create_file("main_charger_c", (S_IRUGO | S_IWUSR | S_IWGRP),
-		ab8500_gpadc_dir, &plf->dev, &ab8500_gpadc_main_charger_c_fops);
+	file = debugfs_create_file("main_charger_c",
+				   (S_IRUGO | S_IWUSR | S_IWGRP),
+				   ab8500_gpadc_dir, &plf->dev,
+				   &ab8500_gpadc_main_charger_c_fops);
 	if (!file)
 		goto err;
 
-	file = debugfs_create_file("usb_charger_c", (S_IRUGO | S_IWUSR | S_IWGRP),
-		ab8500_gpadc_dir, &plf->dev, &ab8500_gpadc_usb_charger_c_fops);
+	file = debugfs_create_file("usb_charger_c",
+				   (S_IRUGO | S_IWUSR | S_IWGRP),
+				   ab8500_gpadc_dir,
+				   &plf->dev, &ab8500_gpadc_usb_charger_c_fops);
 	if (!file)
 		goto err;
 
 	file = debugfs_create_file("bk_bat_v", (S_IRUGO | S_IWUSR | S_IWGRP),
-		ab8500_gpadc_dir, &plf->dev, &ab8500_gpadc_bk_bat_v_fops);
+				   ab8500_gpadc_dir, &plf->dev,
+				   &ab8500_gpadc_bk_bat_v_fops);
 	if (!file)
 		goto err;
 
 	file = debugfs_create_file("die_temp", (S_IRUGO | S_IWUSR | S_IWGRP),
-		ab8500_gpadc_dir, &plf->dev, &ab8500_gpadc_die_temp_fops);
+				   ab8500_gpadc_dir, &plf->dev,
+				   &ab8500_gpadc_die_temp_fops);
 	if (!file)
 		goto err;
 
 	file = debugfs_create_file("usb_id", (S_IRUGO | S_IWUSR | S_IWGRP),
-		ab8500_gpadc_dir, &plf->dev, &ab8500_gpadc_usb_id_fops);
+				   ab8500_gpadc_dir, &plf->dev,
+				   &ab8500_gpadc_usb_id_fops);
 	if (!file)
 		goto err;
 
 	if (is_ab8540(ab8500)) {
-		file = debugfs_create_file("xtal_temp", (S_IRUGO | S_IWUSR | S_IWGRP),
-			ab8500_gpadc_dir, &plf->dev, &ab8540_gpadc_xtal_temp_fops);
+		file = debugfs_create_file("xtal_temp",
+					   (S_IRUGO | S_IWUSR | S_IWGRP),
+					   ab8500_gpadc_dir, &plf->dev,
+					   &ab8540_gpadc_xtal_temp_fops);
 		if (!file)
 			goto err;
-		file = debugfs_create_file("vbattruemeas", (S_IRUGO | S_IWUSR | S_IWGRP),
-			ab8500_gpadc_dir, &plf->dev,
-			&ab8540_gpadc_vbat_true_meas_fops);
+		file = debugfs_create_file("vbattruemeas",
+					   (S_IRUGO | S_IWUSR | S_IWGRP),
+					   ab8500_gpadc_dir, &plf->dev,
+					   &ab8540_gpadc_vbat_true_meas_fops);
 		if (!file)
 			goto err;
 		file = debugfs_create_file("batctrl_and_ibat",
-			(S_IRUGO | S_IWUGO), ab8500_gpadc_dir,
-			&plf->dev, &ab8540_gpadc_bat_ctrl_and_ibat_fops);
+					(S_IRUGO | S_IWUGO),
+					ab8500_gpadc_dir,
+					&plf->dev,
+					&ab8540_gpadc_bat_ctrl_and_ibat_fops);
 		if (!file)
 			goto err;
 		file = debugfs_create_file("vbatmeas_and_ibat",
-			(S_IRUGO | S_IWUGO), ab8500_gpadc_dir,
-			&plf->dev,
-			&ab8540_gpadc_vbat_meas_and_ibat_fops);
+					(S_IRUGO | S_IWUGO),
+					ab8500_gpadc_dir, &plf->dev,
+					&ab8540_gpadc_vbat_meas_and_ibat_fops);
 		if (!file)
 			goto err;
 		file = debugfs_create_file("vbattruemeas_and_ibat",
-			(S_IRUGO | S_IWUGO), ab8500_gpadc_dir,
-			&plf->dev,
-			&ab8540_gpadc_vbat_true_meas_and_ibat_fops);
+				(S_IRUGO | S_IWUGO),
+				ab8500_gpadc_dir,
+				&plf->dev,
+				&ab8540_gpadc_vbat_true_meas_and_ibat_fops);
 		if (!file)
 			goto err;
 		file = debugfs_create_file("battemp_and_ibat",
-			(S_IRUGO | S_IWUGO), ab8500_gpadc_dir,
+			(S_IRUGO | S_IWUGO),
+			ab8500_gpadc_dir,
 			&plf->dev, &ab8540_gpadc_bat_temp_and_ibat_fops);
 		if (!file)
 			goto err;
-		file = debugfs_create_file("otp_calib", (S_IRUGO | S_IWUSR | S_IWGRP),
-			ab8500_gpadc_dir, &plf->dev, &ab8540_gpadc_otp_calib_fops);
+		file = debugfs_create_file("otp_calib",
+				(S_IRUGO | S_IWUSR | S_IWGRP),
+				ab8500_gpadc_dir,
+				&plf->dev, &ab8540_gpadc_otp_calib_fops);
 		if (!file)
 			goto err;
 	}
 	file = debugfs_create_file("avg_sample", (S_IRUGO | S_IWUSR | S_IWGRP),
-		ab8500_gpadc_dir, &plf->dev, &ab8500_gpadc_avg_sample_fops);
+				   ab8500_gpadc_dir, &plf->dev,
+				   &ab8500_gpadc_avg_sample_fops);
 	if (!file)
 		goto err;
 
 	file = debugfs_create_file("trig_edge", (S_IRUGO | S_IWUSR | S_IWGRP),
-		ab8500_gpadc_dir, &plf->dev, &ab8500_gpadc_trig_edge_fops);
+				   ab8500_gpadc_dir, &plf->dev,
+				   &ab8500_gpadc_trig_edge_fops);
 	if (!file)
 		goto err;
 
 	file = debugfs_create_file("trig_timer", (S_IRUGO | S_IWUSR | S_IWGRP),
-		ab8500_gpadc_dir, &plf->dev, &ab8500_gpadc_trig_timer_fops);
+				   ab8500_gpadc_dir, &plf->dev,
+				   &ab8500_gpadc_trig_timer_fops);
 	if (!file)
 		goto err;
 
 	file = debugfs_create_file("conv_type", (S_IRUGO | S_IWUSR | S_IWGRP),
-		ab8500_gpadc_dir, &plf->dev, &ab8500_gpadc_conv_type_fops);
+				   ab8500_gpadc_dir, &plf->dev,
+				   &ab8500_gpadc_conv_type_fops);
 	if (!file)
 		goto err;
 
diff --git a/include/linux/mfd/abx500/ab8500.h b/include/linux/mfd/abx500/ab8500.h
index 4e7fe7417fc9..9475fee2bfc5 100644
--- a/include/linux/mfd/abx500/ab8500.h
+++ b/include/linux/mfd/abx500/ab8500.h
@@ -505,6 +505,7 @@ static inline int is_ab9540_2p0_or_earlier(struct ab8500 *ab)
 void ab8500_override_turn_on_stat(u8 mask, u8 set);
 
 #ifdef CONFIG_AB8500_DEBUG
+extern int prcmu_abb_read(u8 slave, u8 reg, u8 *value, u8 size);
 void ab8500_dump_all_banks(struct device *dev);
 void ab8500_debug_register_interrupt(int line);
 #else
-- 
cgit v1.2.3


From 6f1c1e71d933f58a6248f1681aededdd407f32a8 Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javier.martinez@collabora.co.uk>
Date: Fri, 4 Jul 2014 22:24:04 +0200
Subject: mfd: max77686: Convert to use regmap_irq

By using the generic IRQ support in the Register map API, it
is possible to get rid max77686-irq.c and simplify the code.

Suggested-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: Javier Martinez Canillas <javier.martinez@collabora.co.uk>
Reviewed-by: Doug Anderson <dianders@chromium.org>
Tested-by: Doug Anderson <dianders@chromium.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/Kconfig                  |   1 +
 drivers/mfd/Makefile                 |   2 +-
 drivers/mfd/max77686-irq.c           | 319 -----------------------------------
 drivers/mfd/max77686.c               |  97 ++++++++++-
 drivers/rtc/rtc-max77686.c           |  27 +--
 include/linux/mfd/max77686-private.h |  31 +++-
 include/linux/mfd/max77686.h         |   2 -
 7 files changed, 123 insertions(+), 356 deletions(-)
 delete mode 100644 drivers/mfd/max77686-irq.c

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index b8d9ca0b68e2..30102042dcaf 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -384,6 +384,7 @@ config MFD_MAX77686
 	depends on I2C=y
 	select MFD_CORE
 	select REGMAP_I2C
+	select REGMAP_IRQ
 	select IRQ_DOMAIN
 	help
 	  Say yes here to add support for Maxim Semiconductor MAX77686.
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 4e2bc255b8b0..f00148782d9b 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -115,7 +115,7 @@ da9063-objs			:= da9063-core.o da9063-irq.o da9063-i2c.o
 obj-$(CONFIG_MFD_DA9063)	+= da9063.o
 
 obj-$(CONFIG_MFD_MAX14577)	+= max14577.o
-obj-$(CONFIG_MFD_MAX77686)	+= max77686.o max77686-irq.o
+obj-$(CONFIG_MFD_MAX77686)	+= max77686.o
 obj-$(CONFIG_MFD_MAX77693)	+= max77693.o
 obj-$(CONFIG_MFD_MAX8907)	+= max8907.o
 max8925-objs			:= max8925-core.o max8925-i2c.o
diff --git a/drivers/mfd/max77686-irq.c b/drivers/mfd/max77686-irq.c
deleted file mode 100644
index cdc3280e2ec7..000000000000
--- a/drivers/mfd/max77686-irq.c
+++ /dev/null
@@ -1,319 +0,0 @@
-/*
- * max77686-irq.c - Interrupt controller support for MAX77686
- *
- * Copyright (C) 2012 Samsung Electronics Co.Ltd
- * Chiwoong Byun <woong.byun@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- *
- * This driver is based on max8997-irq.c
- */
-
-#include <linux/err.h>
-#include <linux/irq.h>
-#include <linux/interrupt.h>
-#include <linux/gpio.h>
-#include <linux/mfd/max77686.h>
-#include <linux/mfd/max77686-private.h>
-#include <linux/irqdomain.h>
-#include <linux/regmap.h>
-
-enum {
-	MAX77686_DEBUG_IRQ_INFO = 1 << 0,
-	MAX77686_DEBUG_IRQ_MASK = 1 << 1,
-	MAX77686_DEBUG_IRQ_INT = 1 << 2,
-};
-
-static int debug_mask = 0;
-module_param(debug_mask, int, 0);
-MODULE_PARM_DESC(debug_mask, "Set debug_mask : 0x0=off 0x1=IRQ_INFO  0x2=IRQ_MASK 0x4=IRQ_INI)");
-
-static const u8 max77686_mask_reg[] = {
-	[PMIC_INT1] = MAX77686_REG_INT1MSK,
-	[PMIC_INT2] = MAX77686_REG_INT2MSK,
-	[RTC_INT] = MAX77686_RTC_INTM,
-};
-
-static struct regmap *max77686_get_regmap(struct max77686_dev *max77686,
-				enum max77686_irq_source src)
-{
-	switch (src) {
-	case PMIC_INT1 ... PMIC_INT2:
-		return max77686->regmap;
-	case RTC_INT:
-		return max77686->rtc_regmap;
-	default:
-		return ERR_PTR(-EINVAL);
-	}
-}
-
-struct max77686_irq_data {
-	int mask;
-	enum max77686_irq_source group;
-};
-
-#define DECLARE_IRQ(idx, _group, _mask)		\
-	[(idx)] = { .group = (_group), .mask = (_mask) }
-static const struct max77686_irq_data max77686_irqs[] = {
-	DECLARE_IRQ(MAX77686_PMICIRQ_PWRONF,	PMIC_INT1, 1 << 0),
-	DECLARE_IRQ(MAX77686_PMICIRQ_PWRONR,	PMIC_INT1, 1 << 1),
-	DECLARE_IRQ(MAX77686_PMICIRQ_JIGONBF,	PMIC_INT1, 1 << 2),
-	DECLARE_IRQ(MAX77686_PMICIRQ_JIGONBR,	PMIC_INT1, 1 << 3),
-	DECLARE_IRQ(MAX77686_PMICIRQ_ACOKBF,	PMIC_INT1, 1 << 4),
-	DECLARE_IRQ(MAX77686_PMICIRQ_ACOKBR,	PMIC_INT1, 1 << 5),
-	DECLARE_IRQ(MAX77686_PMICIRQ_ONKEY1S,	PMIC_INT1, 1 << 6),
-	DECLARE_IRQ(MAX77686_PMICIRQ_MRSTB,		PMIC_INT1, 1 << 7),
-	DECLARE_IRQ(MAX77686_PMICIRQ_140C,		PMIC_INT2, 1 << 0),
-	DECLARE_IRQ(MAX77686_PMICIRQ_120C,		PMIC_INT2, 1 << 1),
-	DECLARE_IRQ(MAX77686_RTCIRQ_RTC60S,		RTC_INT, 1 << 0),
-	DECLARE_IRQ(MAX77686_RTCIRQ_RTCA1,		RTC_INT, 1 << 1),
-	DECLARE_IRQ(MAX77686_RTCIRQ_RTCA2,		RTC_INT, 1 << 2),
-	DECLARE_IRQ(MAX77686_RTCIRQ_SMPL,		RTC_INT, 1 << 3),
-	DECLARE_IRQ(MAX77686_RTCIRQ_RTC1S,		RTC_INT, 1 << 4),
-	DECLARE_IRQ(MAX77686_RTCIRQ_WTSR,		RTC_INT, 1 << 5),
-};
-
-static void max77686_irq_lock(struct irq_data *data)
-{
-	struct max77686_dev *max77686 = irq_get_chip_data(data->irq);
-
-	if (debug_mask & MAX77686_DEBUG_IRQ_MASK)
-		pr_info("%s\n", __func__);
-
-	mutex_lock(&max77686->irqlock);
-}
-
-static void max77686_irq_sync_unlock(struct irq_data *data)
-{
-	struct max77686_dev *max77686 = irq_get_chip_data(data->irq);
-	int i;
-
-	for (i = 0; i < MAX77686_IRQ_GROUP_NR; i++) {
-		u8 mask_reg = max77686_mask_reg[i];
-		struct regmap *map = max77686_get_regmap(max77686, i);
-
-		if (debug_mask & MAX77686_DEBUG_IRQ_MASK)
-			pr_debug("%s: mask_reg[%d]=0x%x, cur=0x%x\n",
-			__func__, i, mask_reg, max77686->irq_masks_cur[i]);
-
-		if (mask_reg == MAX77686_REG_INVALID ||
-				IS_ERR_OR_NULL(map))
-			continue;
-
-		max77686->irq_masks_cache[i] = max77686->irq_masks_cur[i];
-
-		regmap_write(map, max77686_mask_reg[i],
-				max77686->irq_masks_cur[i]);
-	}
-
-	mutex_unlock(&max77686->irqlock);
-}
-
-static const inline struct max77686_irq_data *to_max77686_irq(int irq)
-{
-	struct irq_data *data = irq_get_irq_data(irq);
-	return &max77686_irqs[data->hwirq];
-}
-
-static void max77686_irq_mask(struct irq_data *data)
-{
-	struct max77686_dev *max77686 = irq_get_chip_data(data->irq);
-	const struct max77686_irq_data *irq_data = to_max77686_irq(data->irq);
-
-	max77686->irq_masks_cur[irq_data->group] |= irq_data->mask;
-
-	if (debug_mask & MAX77686_DEBUG_IRQ_MASK)
-		pr_info("%s: group=%d, cur=0x%x\n",
-			__func__, irq_data->group,
-			max77686->irq_masks_cur[irq_data->group]);
-}
-
-static void max77686_irq_unmask(struct irq_data *data)
-{
-	struct max77686_dev *max77686 = irq_get_chip_data(data->irq);
-	const struct max77686_irq_data *irq_data = to_max77686_irq(data->irq);
-
-	max77686->irq_masks_cur[irq_data->group] &= ~irq_data->mask;
-
-	if (debug_mask & MAX77686_DEBUG_IRQ_MASK)
-		pr_info("%s: group=%d, cur=0x%x\n",
-			__func__, irq_data->group,
-			max77686->irq_masks_cur[irq_data->group]);
-}
-
-static struct irq_chip max77686_irq_chip = {
-	.name			= "max77686",
-	.irq_bus_lock		= max77686_irq_lock,
-	.irq_bus_sync_unlock	= max77686_irq_sync_unlock,
-	.irq_mask		= max77686_irq_mask,
-	.irq_unmask		= max77686_irq_unmask,
-};
-
-static irqreturn_t max77686_irq_thread(int irq, void *data)
-{
-	struct max77686_dev *max77686 = data;
-	unsigned int irq_reg[MAX77686_IRQ_GROUP_NR] = {};
-	unsigned int irq_src;
-	int ret;
-	int i, cur_irq;
-
-	ret = regmap_read(max77686->regmap,  MAX77686_REG_INTSRC, &irq_src);
-	if (ret < 0) {
-		dev_err(max77686->dev, "Failed to read interrupt source: %d\n",
-				ret);
-		return IRQ_NONE;
-	}
-
-	if (debug_mask & MAX77686_DEBUG_IRQ_INT)
-		pr_info("%s: irq_src=0x%x\n", __func__, irq_src);
-
-	if (irq_src == MAX77686_IRQSRC_PMIC) {
-		ret = regmap_bulk_read(max77686->regmap,
-					 MAX77686_REG_INT1, irq_reg, 2);
-		if (ret < 0) {
-			dev_err(max77686->dev, "Failed to read interrupt source: %d\n",
-					ret);
-			return IRQ_NONE;
-		}
-
-		if (debug_mask & MAX77686_DEBUG_IRQ_INT)
-			pr_info("%s: int1=0x%x, int2=0x%x\n", __func__,
-				 irq_reg[PMIC_INT1], irq_reg[PMIC_INT2]);
-	}
-
-	if (irq_src & MAX77686_IRQSRC_RTC) {
-		ret = regmap_read(max77686->rtc_regmap,
-					MAX77686_RTC_INT, &irq_reg[RTC_INT]);
-		if (ret < 0) {
-			dev_err(max77686->dev, "Failed to read interrupt source: %d\n",
-					ret);
-			return IRQ_NONE;
-		}
-
-		if (debug_mask & MAX77686_DEBUG_IRQ_INT)
-			pr_info("%s: rtc int=0x%x\n", __func__,
-							 irq_reg[RTC_INT]);
-
-	}
-
-	for (i = 0; i < MAX77686_IRQ_GROUP_NR; i++)
-		irq_reg[i] &= ~max77686->irq_masks_cur[i];
-
-	for (i = 0; i < MAX77686_IRQ_NR; i++) {
-		if (irq_reg[max77686_irqs[i].group] & max77686_irqs[i].mask) {
-			cur_irq = irq_find_mapping(max77686->irq_domain, i);
-			if (cur_irq)
-				handle_nested_irq(cur_irq);
-		}
-	}
-
-	return IRQ_HANDLED;
-}
-
-static int max77686_irq_domain_map(struct irq_domain *d, unsigned int irq,
-					irq_hw_number_t hw)
-{
-	struct max77686_dev *max77686 = d->host_data;
-
-	irq_set_chip_data(irq, max77686);
-	irq_set_chip_and_handler(irq, &max77686_irq_chip, handle_edge_irq);
-	irq_set_nested_thread(irq, 1);
-#ifdef CONFIG_ARM
-	set_irq_flags(irq, IRQF_VALID);
-#else
-	irq_set_noprobe(irq);
-#endif
-	return 0;
-}
-
-static struct irq_domain_ops max77686_irq_domain_ops = {
-	.map = max77686_irq_domain_map,
-};
-
-int max77686_irq_init(struct max77686_dev *max77686)
-{
-	struct irq_domain *domain;
-	int i;
-	int ret;
-	int val;
-	struct regmap *map;
-
-	mutex_init(&max77686->irqlock);
-
-	if (max77686->irq_gpio && !max77686->irq) {
-		max77686->irq = gpio_to_irq(max77686->irq_gpio);
-
-		if (debug_mask & MAX77686_DEBUG_IRQ_INT) {
-			ret = gpio_request(max77686->irq_gpio, "pmic_irq");
-			if (ret < 0) {
-				dev_err(max77686->dev,
-					"Failed to request gpio %d with ret:"
-					"%d\n",	max77686->irq_gpio, ret);
-				return IRQ_NONE;
-			}
-
-			gpio_direction_input(max77686->irq_gpio);
-			val = gpio_get_value(max77686->irq_gpio);
-			gpio_free(max77686->irq_gpio);
-			pr_info("%s: gpio_irq=%x\n", __func__, val);
-		}
-	}
-
-	if (!max77686->irq) {
-		dev_err(max77686->dev, "irq is not specified\n");
-		return -ENODEV;
-	}
-
-	/* Mask individual interrupt sources */
-	for (i = 0; i < MAX77686_IRQ_GROUP_NR; i++) {
-		max77686->irq_masks_cur[i] = 0xff;
-		max77686->irq_masks_cache[i] = 0xff;
-		map = max77686_get_regmap(max77686, i);
-
-		if (IS_ERR_OR_NULL(map))
-			continue;
-		if (max77686_mask_reg[i] == MAX77686_REG_INVALID)
-			continue;
-
-		regmap_write(map, max77686_mask_reg[i], 0xff);
-	}
-	domain = irq_domain_add_linear(NULL, MAX77686_IRQ_NR,
-					&max77686_irq_domain_ops, max77686);
-	if (!domain) {
-		dev_err(max77686->dev, "could not create irq domain\n");
-		return -ENODEV;
-	}
-	max77686->irq_domain = domain;
-
-	ret = request_threaded_irq(max77686->irq, NULL, max77686_irq_thread,
-				   IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
-				   "max77686-irq", max77686);
-
-	if (ret)
-		dev_err(max77686->dev, "Failed to request IRQ %d: %d\n",
-			max77686->irq, ret);
-
-
-	if (debug_mask & MAX77686_DEBUG_IRQ_INFO)
-		pr_info("%s-\n", __func__);
-
-	return 0;
-}
-
-void max77686_irq_exit(struct max77686_dev *max77686)
-{
-	if (max77686->irq)
-		free_irq(max77686->irq, max77686);
-}
diff --git a/drivers/mfd/max77686.c b/drivers/mfd/max77686.c
index ce869acf27ae..3cb41d02cd3d 100644
--- a/drivers/mfd/max77686.c
+++ b/drivers/mfd/max77686.c
@@ -25,6 +25,8 @@
 #include <linux/export.h>
 #include <linux/slab.h>
 #include <linux/i2c.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
 #include <linux/pm_runtime.h>
 #include <linux/module.h>
 #include <linux/mfd/core.h>
@@ -46,6 +48,54 @@ static struct regmap_config max77686_regmap_config = {
 	.val_bits = 8,
 };
 
+static struct regmap_config max77686_rtc_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+};
+
+static const struct regmap_irq max77686_irqs[] = {
+	/* INT1 interrupts */
+	{ .reg_offset = 0, .mask = MAX77686_INT1_PWRONF_MSK, },
+	{ .reg_offset = 0, .mask = MAX77686_INT1_PWRONR_MSK, },
+	{ .reg_offset = 0, .mask = MAX77686_INT1_JIGONBF_MSK, },
+	{ .reg_offset = 0, .mask = MAX77686_INT1_JIGONBR_MSK, },
+	{ .reg_offset = 0, .mask = MAX77686_INT1_ACOKBF_MSK, },
+	{ .reg_offset = 0, .mask = MAX77686_INT1_ACOKBR_MSK, },
+	{ .reg_offset = 0, .mask = MAX77686_INT1_ONKEY1S_MSK, },
+	{ .reg_offset = 0, .mask = MAX77686_INT1_MRSTB_MSK, },
+	/* INT2 interrupts */
+	{ .reg_offset = 1, .mask = MAX77686_INT2_140C_MSK, },
+	{ .reg_offset = 1, .mask = MAX77686_INT2_120C_MSK, },
+};
+
+static const struct regmap_irq_chip max77686_irq_chip = {
+	.name			= "max77686-pmic",
+	.status_base		= MAX77686_REG_INT1,
+	.mask_base		= MAX77686_REG_INT1MSK,
+	.num_regs		= 2,
+	.irqs			= max77686_irqs,
+	.num_irqs		= ARRAY_SIZE(max77686_irqs),
+};
+
+static const struct regmap_irq max77686_rtc_irqs[] = {
+	/* RTC interrupts */
+	{ .reg_offset = 0, .mask = MAX77686_RTCINT_RTC60S_MSK, },
+	{ .reg_offset = 0, .mask = MAX77686_RTCINT_RTCA1_MSK, },
+	{ .reg_offset = 0, .mask = MAX77686_RTCINT_RTCA2_MSK, },
+	{ .reg_offset = 0, .mask = MAX77686_RTCINT_SMPL_MSK, },
+	{ .reg_offset = 0, .mask = MAX77686_RTCINT_RTC1S_MSK, },
+	{ .reg_offset = 0, .mask = MAX77686_RTCINT_WTSR_MSK, },
+};
+
+static const struct regmap_irq_chip max77686_rtc_irq_chip = {
+	.name			= "max77686-rtc",
+	.status_base		= MAX77686_RTC_INT,
+	.mask_base		= MAX77686_RTC_INTM,
+	.num_regs		= 1,
+	.irqs			= max77686_rtc_irqs,
+	.num_irqs		= ARRAY_SIZE(max77686_rtc_irqs),
+};
+
 #ifdef CONFIG_OF
 static const struct of_device_id max77686_pmic_dt_match[] = {
 	{.compatible = "maxim,max77686", .data = NULL},
@@ -101,7 +151,6 @@ static int max77686_i2c_probe(struct i2c_client *i2c,
 	max77686->type = id->driver_data;
 
 	max77686->wakeup = pdata->wakeup;
-	max77686->irq_gpio = pdata->irq_gpio;
 	max77686->irq = i2c->irq;
 
 	max77686->regmap = devm_regmap_init_i2c(i2c, &max77686_regmap_config);
@@ -117,8 +166,7 @@ static int max77686_i2c_probe(struct i2c_client *i2c,
 		dev_err(max77686->dev,
 			"device not found on this channel (this is not an error)\n");
 		return -ENODEV;
-	} else
-		dev_info(max77686->dev, "device found\n");
+	}
 
 	max77686->rtc = i2c_new_dummy(i2c->adapter, I2C_ADDR_RTC);
 	if (!max77686->rtc) {
@@ -127,15 +175,48 @@ static int max77686_i2c_probe(struct i2c_client *i2c,
 	}
 	i2c_set_clientdata(max77686->rtc, max77686);
 
-	max77686_irq_init(max77686);
+	max77686->rtc_regmap = devm_regmap_init_i2c(max77686->rtc,
+						    &max77686_rtc_regmap_config);
+	if (IS_ERR(max77686->rtc_regmap)) {
+		ret = PTR_ERR(max77686->rtc_regmap);
+		dev_err(max77686->dev, "failed to allocate RTC regmap: %d\n",
+			ret);
+		goto err_unregister_i2c;
+	}
+
+	ret = regmap_add_irq_chip(max77686->regmap, max77686->irq,
+				  IRQF_TRIGGER_FALLING | IRQF_ONESHOT |
+				  IRQF_SHARED, 0, &max77686_irq_chip,
+				  &max77686->irq_data);
+	if (ret != 0) {
+		dev_err(&i2c->dev, "failed to add PMIC irq chip: %d\n", ret);
+		goto err_unregister_i2c;
+	}
+	ret = regmap_add_irq_chip(max77686->rtc_regmap, max77686->irq,
+				  IRQF_TRIGGER_FALLING | IRQF_ONESHOT |
+				  IRQF_SHARED, 0, &max77686_rtc_irq_chip,
+				  &max77686->rtc_irq_data);
+	if (ret != 0) {
+		dev_err(&i2c->dev, "failed to add RTC irq chip: %d\n", ret);
+		goto err_del_irqc;
+	}
 
 	ret = mfd_add_devices(max77686->dev, -1, max77686_devs,
 			      ARRAY_SIZE(max77686_devs), NULL, 0, NULL);
 	if (ret < 0) {
-		mfd_remove_devices(max77686->dev);
-		i2c_unregister_device(max77686->rtc);
+		dev_err(&i2c->dev, "failed to add MFD devices: %d\n", ret);
+		goto err_del_rtc_irqc;
 	}
 
+	return 0;
+
+err_del_rtc_irqc:
+	regmap_del_irq_chip(max77686->irq, max77686->rtc_irq_data);
+err_del_irqc:
+	regmap_del_irq_chip(max77686->irq, max77686->irq_data);
+err_unregister_i2c:
+	i2c_unregister_device(max77686->rtc);
+
 	return ret;
 }
 
@@ -144,6 +225,10 @@ static int max77686_i2c_remove(struct i2c_client *i2c)
 	struct max77686_dev *max77686 = i2c_get_clientdata(i2c);
 
 	mfd_remove_devices(max77686->dev);
+
+	regmap_del_irq_chip(max77686->irq, max77686->rtc_irq_data);
+	regmap_del_irq_chip(max77686->irq, max77686->irq_data);
+
 	i2c_unregister_device(max77686->rtc);
 
 	return 0;
diff --git a/drivers/rtc/rtc-max77686.c b/drivers/rtc/rtc-max77686.c
index 9efe118a28ba..d20a7f0786eb 100644
--- a/drivers/rtc/rtc-max77686.c
+++ b/drivers/rtc/rtc-max77686.c
@@ -492,16 +492,11 @@ static int max77686_rtc_init_reg(struct max77686_rtc_info *info)
 	return ret;
 }
 
-static struct regmap_config max77686_rtc_regmap_config = {
-	.reg_bits = 8,
-	.val_bits = 8,
-};
-
 static int max77686_rtc_probe(struct platform_device *pdev)
 {
 	struct max77686_dev *max77686 = dev_get_drvdata(pdev->dev.parent);
 	struct max77686_rtc_info *info;
-	int ret, virq;
+	int ret;
 
 	dev_info(&pdev->dev, "%s\n", __func__);
 
@@ -514,14 +509,7 @@ static int max77686_rtc_probe(struct platform_device *pdev)
 	info->dev = &pdev->dev;
 	info->max77686 = max77686;
 	info->rtc = max77686->rtc;
-	info->max77686->rtc_regmap = devm_regmap_init_i2c(info->max77686->rtc,
-					 &max77686_rtc_regmap_config);
-	if (IS_ERR(info->max77686->rtc_regmap)) {
-		ret = PTR_ERR(info->max77686->rtc_regmap);
-		dev_err(info->max77686->dev, "Failed to allocate register map: %d\n",
-				ret);
-		return ret;
-	}
+
 	platform_set_drvdata(pdev, info);
 
 	ret = max77686_rtc_init_reg(info);
@@ -550,15 +538,16 @@ static int max77686_rtc_probe(struct platform_device *pdev)
 			ret = -EINVAL;
 		goto err_rtc;
 	}
-	virq = irq_create_mapping(max77686->irq_domain, MAX77686_RTCIRQ_RTCA1);
-	if (!virq) {
+
+	info->virq = regmap_irq_get_virq(max77686->rtc_irq_data,
+					 MAX77686_RTCIRQ_RTCA1);
+	if (!info->virq) {
 		ret = -ENXIO;
 		goto err_rtc;
 	}
-	info->virq = virq;
 
-	ret = devm_request_threaded_irq(&pdev->dev, virq, NULL,
-				max77686_rtc_alarm_irq, 0, "rtc-alarm0", info);
+	ret = devm_request_threaded_irq(&pdev->dev, info->virq, NULL,
+				max77686_rtc_alarm_irq, 0, "rtc-alarm1", info);
 	if (ret < 0)
 		dev_err(&pdev->dev, "Failed to request alarm IRQ: %d: %d\n",
 			info->virq, ret);
diff --git a/include/linux/mfd/max77686-private.h b/include/linux/mfd/max77686-private.h
index 8c75a9c8dfab..8e177806cba1 100644
--- a/include/linux/mfd/max77686-private.h
+++ b/include/linux/mfd/max77686-private.h
@@ -181,9 +181,6 @@ enum max77686_rtc_reg {
 	MAX77686_ALARM2_DATE		= 0x1B,
 };
 
-#define MAX77686_IRQSRC_PMIC	(0)
-#define MAX77686_IRQSRC_RTC		(1 << 0)
-
 enum max77686_irq_source {
 	PMIC_INT1 = 0,
 	PMIC_INT2,
@@ -205,16 +202,33 @@ enum max77686_irq {
 	MAX77686_PMICIRQ_140C,
 	MAX77686_PMICIRQ_120C,
 
-	MAX77686_RTCIRQ_RTC60S,
+	MAX77686_RTCIRQ_RTC60S = 0,
 	MAX77686_RTCIRQ_RTCA1,
 	MAX77686_RTCIRQ_RTCA2,
 	MAX77686_RTCIRQ_SMPL,
 	MAX77686_RTCIRQ_RTC1S,
 	MAX77686_RTCIRQ_WTSR,
-
-	MAX77686_IRQ_NR,
 };
 
+#define MAX77686_INT1_PWRONF_MSK	BIT(0)
+#define MAX77686_INT1_PWRONR_MSK	BIT(1)
+#define MAX77686_INT1_JIGONBF_MSK	BIT(2)
+#define MAX77686_INT1_JIGONBR_MSK	BIT(3)
+#define MAX77686_INT1_ACOKBF_MSK	BIT(4)
+#define MAX77686_INT1_ACOKBR_MSK	BIT(5)
+#define MAX77686_INT1_ONKEY1S_MSK	BIT(6)
+#define MAX77686_INT1_MRSTB_MSK		BIT(7)
+
+#define MAX77686_INT2_140C_MSK		BIT(0)
+#define MAX77686_INT2_120C_MSK		BIT(1)
+
+#define MAX77686_RTCINT_RTC60S_MSK	BIT(0)
+#define MAX77686_RTCINT_RTCA1_MSK	BIT(1)
+#define MAX77686_RTCINT_RTCA2_MSK	BIT(2)
+#define MAX77686_RTCINT_SMPL_MSK	BIT(3)
+#define MAX77686_RTCINT_RTC1S_MSK	BIT(4)
+#define MAX77686_RTCINT_WTSR_MSK	BIT(5)
+
 struct max77686_dev {
 	struct device *dev;
 	struct i2c_client *i2c; /* 0xcc / PMIC, Battery Control, and FLASH */
@@ -224,11 +238,10 @@ struct max77686_dev {
 
 	struct regmap *regmap;		/* regmap for mfd */
 	struct regmap *rtc_regmap;	/* regmap for rtc */
-
-	struct irq_domain *irq_domain;
+	struct regmap_irq_chip_data *irq_data;
+	struct regmap_irq_chip_data *rtc_irq_data;
 
 	int irq;
-	int irq_gpio;
 	bool wakeup;
 	struct mutex irqlock;
 	int irq_masks_cur[MAX77686_IRQ_GROUP_NR];
diff --git a/include/linux/mfd/max77686.h b/include/linux/mfd/max77686.h
index 46c0f320ed76..4cbcc13e8a2a 100644
--- a/include/linux/mfd/max77686.h
+++ b/include/linux/mfd/max77686.h
@@ -89,8 +89,6 @@ struct max77686_opmode_data {
 };
 
 struct max77686_platform_data {
-	/* IRQ */
-	int irq_gpio;
 	int ono;
 	int wakeup;
 
-- 
cgit v1.2.3


From c708a98f01068fe07f77448031f9f5317423e777 Mon Sep 17 00:00:00 2001
From: Javi Merino <javi.merino@arm.com>
Date: Wed, 25 Jun 2014 11:00:12 +0100
Subject: thermal: document struct thermal_zone_device and thermal_governor

Document struct thermal_zone_device and struct thermal_governor fields
and their use by the thermal framework code.

Cc: Zhang Rui <rui.zhang@intel.com>
Cc: Eduardo Valentin <edubezval@gmail.com>
Signed-off-by: Javi Merino <javi.merino@arm.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 include/linux/thermal.h | 46 ++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 44 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index f7e11c7ea7d9..0305cde21a74 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -158,6 +158,42 @@ struct thermal_attr {
 	char name[THERMAL_NAME_LENGTH];
 };
 
+/**
+ * struct thermal_zone_device - structure for a thermal zone
+ * @id:		unique id number for each thermal zone
+ * @type:	the thermal zone device type
+ * @device:	&struct device for this thermal zone
+ * @trip_temp_attrs:	attributes for trip points for sysfs: trip temperature
+ * @trip_type_attrs:	attributes for trip points for sysfs: trip type
+ * @trip_hyst_attrs:	attributes for trip points for sysfs: trip hysteresis
+ * @devdata:	private pointer for device private data
+ * @trips:	number of trip points the thermal zone supports
+ * @passive_delay:	number of milliseconds to wait between polls when
+ *			performing passive cooling.  Currenty only used by the
+ *			step-wise governor
+ * @polling_delay:	number of milliseconds to wait between polls when
+ *			checking whether trip points have been crossed (0 for
+ *			interrupt driven systems)
+ * @temperature:	current temperature.  This is only for core code,
+ *			drivers should use thermal_zone_get_temp() to get the
+ *			current temperature
+ * @last_temperature:	previous temperature read
+ * @emul_temperature:	emulated temperature when using CONFIG_THERMAL_EMULATION
+ * @passive:		1 if you've crossed a passive trip point, 0 otherwise.
+ *			Currenty only used by the step-wise governor.
+ * @forced_passive:	If > 0, temperature at which to switch on all ACPI
+ *			processor cooling devices.  Currently only used by the
+ *			step-wise governor.
+ * @ops:	operations this &thermal_zone_device supports
+ * @tzp:	thermal zone parameters
+ * @governor:	pointer to the governor for this thermal zone
+ * @thermal_instances:	list of &struct thermal_instance of this thermal zone
+ * @idr:	&struct idr to generate unique id for this zone's cooling
+ *		devices
+ * @lock:	lock to protect thermal_instances list
+ * @node:	node in thermal_tz_list (in thermal_core.c)
+ * @poll_queue:	delayed work for polling
+ */
 struct thermal_zone_device {
 	int id;
 	char type[THERMAL_NAME_LENGTH];
@@ -179,12 +215,18 @@ struct thermal_zone_device {
 	struct thermal_governor *governor;
 	struct list_head thermal_instances;
 	struct idr idr;
-	struct mutex lock; /* protect thermal_instances list */
+	struct mutex lock;
 	struct list_head node;
 	struct delayed_work poll_queue;
 };
 
-/* Structure that holds thermal governor information */
+/**
+ * struct thermal_governor - structure that holds thermal governor information
+ * @name:	name of the governor
+ * @throttle:	callback called for every trip point even if temperature is
+ *		below the trip point temperature
+ * @governor_list:	node in thermal_governor_list (in thermal_core.c)
+ */
 struct thermal_governor {
 	char name[THERMAL_NAME_LENGTH];
 	int (*throttle)(struct thermal_zone_device *tz, int trip);
-- 
cgit v1.2.3


From bb5fd0b6daaf0da0b1e78c699b8582984373d3f4 Mon Sep 17 00:00:00 2001
From: Boris BREZILLON <boris.brezillon@free-electrons.com>
Date: Fri, 11 Jul 2014 09:49:41 +0200
Subject: mtd: nand: define struct nand_timings

Define a struct containing the standard NAND timings as described in NAND
datasheets.

Signed-off-by: Boris BREZILLON <boris.brezillon@free-electrons.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 include/linux/mtd/nand.h | 49 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 1cff329ae13d..cdda207c16e1 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -948,4 +948,53 @@ static inline int jedec_feature(struct nand_chip *chip)
 	return chip->jedec_version ? le16_to_cpu(chip->jedec_params.features)
 		: 0;
 }
+
+/**
+ * struct nand_sdr_timings - SDR NAND chip timings
+ *
+ * This struct defines the timing requirements of a SDR NAND chip.
+ * These informations can be found in every NAND datasheets and the timings
+ * meaning are described in the ONFI specifications:
+ * www.onfi.org/~/media/ONFI/specs/onfi_3_1_spec.pdf (chapter 4.15 Timing
+ * Parameters)
+ *
+ * All these timings are expressed in picoseconds.
+ */
+
+struct nand_sdr_timings {
+	u32 tALH_min;
+	u32 tADL_min;
+	u32 tALS_min;
+	u32 tAR_min;
+	u32 tCEA_max;
+	u32 tCEH_min;
+	u32 tCH_min;
+	u32 tCHZ_max;
+	u32 tCLH_min;
+	u32 tCLR_min;
+	u32 tCLS_min;
+	u32 tCOH_min;
+	u32 tCS_min;
+	u32 tDH_min;
+	u32 tDS_min;
+	u32 tFEAT_max;
+	u32 tIR_min;
+	u32 tITC_max;
+	u32 tRC_min;
+	u32 tREA_max;
+	u32 tREH_min;
+	u32 tRHOH_min;
+	u32 tRHW_min;
+	u32 tRHZ_max;
+	u32 tRLOH_min;
+	u32 tRP_min;
+	u32 tRR_min;
+	u64 tRST_max;
+	u32 tWB_max;
+	u32 tWC_min;
+	u32 tWH_min;
+	u32 tWHR_min;
+	u32 tWP_min;
+	u32 tWW_min;
+};
 #endif /* __LINUX_MTD_NAND_H */
-- 
cgit v1.2.3


From 974647ea8a13021a91d558df61d598bcabf73439 Mon Sep 17 00:00:00 2001
From: Boris BREZILLON <boris.brezillon@free-electrons.com>
Date: Fri, 11 Jul 2014 09:49:42 +0200
Subject: mtd: nand: add ONFI timing mode to nand_timings converter

Add a converter to retrieve NAND timings from an ONFI NAND timing mode.
At the moment, only SDR NAND timings are supported.

Signed-off-by: Boris BREZILLON <boris.brezillon@free-electrons.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/nand/Makefile       |   2 +-
 drivers/mtd/nand/nand_timings.c | 253 ++++++++++++++++++++++++++++++++++++++++
 include/linux/mtd/nand.h        |   3 +
 3 files changed, 257 insertions(+), 1 deletion(-)
 create mode 100644 drivers/mtd/nand/nand_timings.c

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/Makefile b/drivers/mtd/nand/Makefile
index 542b5689eb63..a035e7cc6d46 100644
--- a/drivers/mtd/nand/Makefile
+++ b/drivers/mtd/nand/Makefile
@@ -50,4 +50,4 @@ obj-$(CONFIG_MTD_NAND_GPMI_NAND)	+= gpmi-nand/
 obj-$(CONFIG_MTD_NAND_XWAY)		+= xway_nand.o
 obj-$(CONFIG_MTD_NAND_BCM47XXNFLASH)	+= bcm47xxnflash/
 
-nand-objs := nand_base.o nand_bbt.o
+nand-objs := nand_base.o nand_bbt.o nand_timings.o
diff --git a/drivers/mtd/nand/nand_timings.c b/drivers/mtd/nand/nand_timings.c
new file mode 100644
index 000000000000..8b36253420fa
--- /dev/null
+++ b/drivers/mtd/nand/nand_timings.c
@@ -0,0 +1,253 @@
+/*
+ *  Copyright (C) 2014 Free Electrons
+ *
+ *  Author: Boris BREZILLON <boris.brezillon@free-electrons.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/err.h>
+#include <linux/export.h>
+#include <linux/mtd/nand.h>
+
+static const struct nand_sdr_timings onfi_sdr_timings[] = {
+	/* Mode 0 */
+	{
+		.tADL_min = 200000,
+		.tALH_min = 20000,
+		.tALS_min = 50000,
+		.tAR_min = 25000,
+		.tCEA_max = 100000,
+		.tCEH_min = 20000,
+		.tCH_min = 20000,
+		.tCHZ_max = 100000,
+		.tCLH_min = 20000,
+		.tCLR_min = 20000,
+		.tCLS_min = 50000,
+		.tCOH_min = 0,
+		.tCS_min = 70000,
+		.tDH_min = 20000,
+		.tDS_min = 40000,
+		.tFEAT_max = 1000000,
+		.tIR_min = 10000,
+		.tITC_max = 1000000,
+		.tRC_min = 100000,
+		.tREA_max = 40000,
+		.tREH_min = 30000,
+		.tRHOH_min = 0,
+		.tRHW_min = 200000,
+		.tRHZ_max = 200000,
+		.tRLOH_min = 0,
+		.tRP_min = 50000,
+		.tRST_max = 250000000000,
+		.tWB_max = 200000,
+		.tRR_min = 40000,
+		.tWC_min = 100000,
+		.tWH_min = 30000,
+		.tWHR_min = 120000,
+		.tWP_min = 50000,
+		.tWW_min = 100000,
+	},
+	/* Mode 1 */
+	{
+		.tADL_min = 100000,
+		.tALH_min = 10000,
+		.tALS_min = 25000,
+		.tAR_min = 10000,
+		.tCEA_max = 45000,
+		.tCEH_min = 20000,
+		.tCH_min = 10000,
+		.tCHZ_max = 50000,
+		.tCLH_min = 10000,
+		.tCLR_min = 10000,
+		.tCLS_min = 25000,
+		.tCOH_min = 15000,
+		.tCS_min = 35000,
+		.tDH_min = 10000,
+		.tDS_min = 20000,
+		.tFEAT_max = 1000000,
+		.tIR_min = 0,
+		.tITC_max = 1000000,
+		.tRC_min = 50000,
+		.tREA_max = 30000,
+		.tREH_min = 15000,
+		.tRHOH_min = 15000,
+		.tRHW_min = 100000,
+		.tRHZ_max = 100000,
+		.tRLOH_min = 0,
+		.tRP_min = 25000,
+		.tRR_min = 20000,
+		.tRST_max = 500000000,
+		.tWB_max = 100000,
+		.tWC_min = 45000,
+		.tWH_min = 15000,
+		.tWHR_min = 80000,
+		.tWP_min = 25000,
+		.tWW_min = 100000,
+	},
+	/* Mode 2 */
+	{
+		.tADL_min = 100000,
+		.tALH_min = 10000,
+		.tALS_min = 15000,
+		.tAR_min = 10000,
+		.tCEA_max = 30000,
+		.tCEH_min = 20000,
+		.tCH_min = 10000,
+		.tCHZ_max = 50000,
+		.tCLH_min = 10000,
+		.tCLR_min = 10000,
+		.tCLS_min = 15000,
+		.tCOH_min = 15000,
+		.tCS_min = 25000,
+		.tDH_min = 5000,
+		.tDS_min = 15000,
+		.tFEAT_max = 1000000,
+		.tIR_min = 0,
+		.tITC_max = 1000000,
+		.tRC_min = 35000,
+		.tREA_max = 25000,
+		.tREH_min = 15000,
+		.tRHOH_min = 15000,
+		.tRHW_min = 100000,
+		.tRHZ_max = 100000,
+		.tRLOH_min = 0,
+		.tRR_min = 20000,
+		.tRST_max = 500000000,
+		.tWB_max = 100000,
+		.tRP_min = 17000,
+		.tWC_min = 35000,
+		.tWH_min = 15000,
+		.tWHR_min = 80000,
+		.tWP_min = 17000,
+		.tWW_min = 100000,
+	},
+	/* Mode 3 */
+	{
+		.tADL_min = 100000,
+		.tALH_min = 5000,
+		.tALS_min = 10000,
+		.tAR_min = 10000,
+		.tCEA_max = 25000,
+		.tCEH_min = 20000,
+		.tCH_min = 5000,
+		.tCHZ_max = 50000,
+		.tCLH_min = 5000,
+		.tCLR_min = 10000,
+		.tCLS_min = 10000,
+		.tCOH_min = 15000,
+		.tCS_min = 25000,
+		.tDH_min = 5000,
+		.tDS_min = 10000,
+		.tFEAT_max = 1000000,
+		.tIR_min = 0,
+		.tITC_max = 1000000,
+		.tRC_min = 30000,
+		.tREA_max = 20000,
+		.tREH_min = 10000,
+		.tRHOH_min = 15000,
+		.tRHW_min = 100000,
+		.tRHZ_max = 100000,
+		.tRLOH_min = 0,
+		.tRP_min = 15000,
+		.tRR_min = 20000,
+		.tRST_max = 500000000,
+		.tWB_max = 100000,
+		.tWC_min = 30000,
+		.tWH_min = 10000,
+		.tWHR_min = 80000,
+		.tWP_min = 15000,
+		.tWW_min = 100000,
+	},
+	/* Mode 4 */
+	{
+		.tADL_min = 70000,
+		.tALH_min = 5000,
+		.tALS_min = 10000,
+		.tAR_min = 10000,
+		.tCEA_max = 25000,
+		.tCEH_min = 20000,
+		.tCH_min = 5000,
+		.tCHZ_max = 30000,
+		.tCLH_min = 5000,
+		.tCLR_min = 10000,
+		.tCLS_min = 10000,
+		.tCOH_min = 15000,
+		.tCS_min = 20000,
+		.tDH_min = 5000,
+		.tDS_min = 10000,
+		.tFEAT_max = 1000000,
+		.tIR_min = 0,
+		.tITC_max = 1000000,
+		.tRC_min = 25000,
+		.tREA_max = 20000,
+		.tREH_min = 10000,
+		.tRHOH_min = 15000,
+		.tRHW_min = 100000,
+		.tRHZ_max = 100000,
+		.tRLOH_min = 5000,
+		.tRP_min = 12000,
+		.tRR_min = 20000,
+		.tRST_max = 500000000,
+		.tWB_max = 100000,
+		.tWC_min = 25000,
+		.tWH_min = 10000,
+		.tWHR_min = 80000,
+		.tWP_min = 12000,
+		.tWW_min = 100000,
+	},
+	/* Mode 5 */
+	{
+		.tADL_min = 70000,
+		.tALH_min = 5000,
+		.tALS_min = 10000,
+		.tAR_min = 10000,
+		.tCEA_max = 25000,
+		.tCEH_min = 20000,
+		.tCH_min = 5000,
+		.tCHZ_max = 30000,
+		.tCLH_min = 5000,
+		.tCLR_min = 10000,
+		.tCLS_min = 10000,
+		.tCOH_min = 15000,
+		.tCS_min = 15000,
+		.tDH_min = 5000,
+		.tDS_min = 7000,
+		.tFEAT_max = 1000000,
+		.tIR_min = 0,
+		.tITC_max = 1000000,
+		.tRC_min = 20000,
+		.tREA_max = 16000,
+		.tREH_min = 7000,
+		.tRHOH_min = 15000,
+		.tRHW_min = 100000,
+		.tRHZ_max = 100000,
+		.tRLOH_min = 5000,
+		.tRP_min = 10000,
+		.tRR_min = 20000,
+		.tRST_max = 500000000,
+		.tWB_max = 100000,
+		.tWC_min = 20000,
+		.tWH_min = 7000,
+		.tWHR_min = 80000,
+		.tWP_min = 10000,
+		.tWW_min = 100000,
+	},
+};
+
+/**
+ * onfi_async_timing_mode_to_sdr_timings - [NAND Interface] Retrieve NAND
+ * timings according to the given ONFI timing mode
+ * @mode: ONFI timing mode
+ */
+const struct nand_sdr_timings *onfi_async_timing_mode_to_sdr_timings(int mode)
+{
+	if (mode < 0 || mode >= ARRAY_SIZE(onfi_sdr_timings))
+		return ERR_PTR(-EINVAL);
+
+	return &onfi_sdr_timings[mode];
+}
+EXPORT_SYMBOL(onfi_async_timing_mode_to_sdr_timings);
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index cdda207c16e1..3083c53e0270 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -997,4 +997,7 @@ struct nand_sdr_timings {
 	u32 tWP_min;
 	u32 tWW_min;
 };
+
+/* get timing characteristics from ONFI timing mode. */
+const struct nand_sdr_timings *onfi_async_timing_mode_to_sdr_timings(int mode);
 #endif /* __LINUX_MTD_NAND_H */
-- 
cgit v1.2.3


From 76be4a54157ab0059fb29d8d516db46d239812e2 Mon Sep 17 00:00:00 2001
From: Nishanth Menon <nm@ti.com>
Date: Thu, 12 Jun 2014 17:15:22 +0530
Subject: ARM: OMAP2+: DMA: remove requirement of irq for platform-dma driver

we have currently 2 DMA drivers that try to co-exist.
drivers/dma/omap-dma.c which registers it's own IRQ and is device tree
aware and uses arch/arm/plat-omap/dma.c instance created by
arch/arm/mach-omap2/dma.c to maintain channel usage (omap_request_dma).

Currently both try to register interrupts and mach-omap2/plat-omap dma.c
attempts to use the IRQ number registered by hwmod to register it's own
interrupt handler.

Now, there is no reasonable way of static allocating DMA irq in GIC
SPI when we use crossbar. However, since the dma_chan structure is
freed as a result of IRQ not being present due to devm allocation,
maintaining information of channel by platform code fails at a later
point in time when that region of memory is reused.

So, if hwmod does not indicate an IRQ number, then, assume that
dma-engine will take care of the interrupt handling.

Signed-off-by: Nishanth Menon <nm@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/dma.c | 3 +++
 arch/arm/plat-omap/dma.c  | 5 +++--
 include/linux/omap-dma.h  | 1 +
 3 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/dma.c b/arch/arm/mach-omap2/dma.c
index a6d2cf1f8d02..e1a56d87599e 100644
--- a/arch/arm/mach-omap2/dma.c
+++ b/arch/arm/mach-omap2/dma.c
@@ -259,6 +259,9 @@ static int __init omap2_system_dma_init_dev(struct omap_hwmod *oh, void *unused)
 	if (cpu_is_omap34xx() && (omap_type() != OMAP2_DEVICE_TYPE_GP))
 		d->dev_caps |= HS_CHANNELS_RESERVED;
 
+	if (platform_get_irq_byname(pdev, "0") < 0)
+		d->dev_caps |= DMA_ENGINE_HANDLE_IRQ;
+
 	/* Check the capabilities register for descriptor loading feature */
 	if (dma_read(CAPS_0, 0) & DMA_HAS_DESCRIPTOR_CAPS)
 		dma_common_ch_end = CCDN;
diff --git a/arch/arm/plat-omap/dma.c b/arch/arm/plat-omap/dma.c
index b5608b1f9fbd..7aae0e5b188c 100644
--- a/arch/arm/plat-omap/dma.c
+++ b/arch/arm/plat-omap/dma.c
@@ -2100,7 +2100,7 @@ static int omap_system_dma_probe(struct platform_device *pdev)
 		omap_dma_set_global_params(DMA_DEFAULT_ARB_RATE,
 				DMA_DEFAULT_FIFO_DEPTH, 0);
 
-	if (dma_omap2plus()) {
+	if (dma_omap2plus() && !(d->dev_caps & DMA_ENGINE_HANDLE_IRQ)) {
 		strcpy(irq_name, "0");
 		dma_irq = platform_get_irq_byname(pdev, irq_name);
 		if (dma_irq < 0) {
@@ -2145,7 +2145,8 @@ static int omap_system_dma_remove(struct platform_device *pdev)
 		char irq_name[4];
 		strcpy(irq_name, "0");
 		dma_irq = platform_get_irq_byname(pdev, irq_name);
-		remove_irq(dma_irq, &omap24xx_dma_irq);
+		if (dma_irq >= 0)
+			remove_irq(dma_irq, &omap24xx_dma_irq);
 	} else {
 		int irq_rel = 0;
 		for ( ; irq_rel < dma_chan_count; irq_rel++) {
diff --git a/include/linux/omap-dma.h b/include/linux/omap-dma.h
index 88e6ea4a5d36..6f06f8bc612c 100644
--- a/include/linux/omap-dma.h
+++ b/include/linux/omap-dma.h
@@ -130,6 +130,7 @@
 #define IS_WORD_16			BIT(0xd)
 #define ENABLE_16XX_MODE		BIT(0xe)
 #define HS_CHANNELS_RESERVED		BIT(0xf)
+#define DMA_ENGINE_HANDLE_IRQ		BIT(0x10)
 
 /* Defines for DMA Capabilities */
 #define DMA_HAS_TRANSPARENT_CAPS	(0x1 << 18)
-- 
cgit v1.2.3


From 14c8a620ba436511b1347c592633befa49535176 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Wed, 23 Jul 2014 10:47:49 +0200
Subject: gpio: drop retval check enforcing from gpiochip_remove()

As we start to decomission the return value from gpiochip_remove()
the compilers emit warnings due to the function being tagged
__must_check. So drop this until we remove the return value
altogether.

Cc: Abdoulaye Berthe <berthe.ab@gmail.com>
Suggested-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/gpio/driver.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index 573e4f3243d0..ca3024554a2d 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -141,7 +141,7 @@ extern const char *gpiochip_is_requested(struct gpio_chip *chip,
 
 /* add/remove chips */
 extern int gpiochip_add(struct gpio_chip *chip);
-extern int __must_check gpiochip_remove(struct gpio_chip *chip);
+extern int gpiochip_remove(struct gpio_chip *chip);
 extern struct gpio_chip *gpiochip_find(void *data,
 			      int (*match)(struct gpio_chip *chip, void *data));
 
-- 
cgit v1.2.3


From ec4c4d877becf1c224f45347f4fc0016765e00d0 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 15 Jul 2014 09:58:06 +0200
Subject: video: fix up versatile CLCD helper move

commit 11c32d7b6274cb0f ("video: move Versatile CLCD helpers")
moved files out of the plat-versatile directory but in the process
got a few of the dependencies wrong:

- If CONFIG_FB is not set, the file no longer gets built, resulting
  in a link error
- If CONFIG_FB or CONFIG_FB_ARMCLCD are disabled, we also get a
  Kconfig warning for incorrect dependencies due to the symbol
  being 'select'ed from the platform Kconfig.
- When the file is not built, we also get a link error for missing
  symbols.

This patch should fix all three, by removing the 'select' statements,
changing the Kconfig description of the symbol to be enabled in
exactly the right configurations, and adding inline stub functions
for the case when the framebuffer driver is disabled.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
---
 arch/arm/Kconfig                                   |  2 --
 arch/arm/mach-vexpress/Kconfig                     |  1 -
 drivers/video/fbdev/Kconfig                        |  6 ++----
 include/linux/platform_data/video-clcd-versatile.h | 18 ++++++++++++++++++
 4 files changed, 20 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index c32064de77d8..11f18a04c066 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -332,7 +332,6 @@ config ARCH_REALVIEW
 	select ICST
 	select NEED_MACH_MEMORY_H
 	select PLAT_VERSATILE
-	select PLAT_VERSATILE_CLCD
 	help
 	  This enables support for ARM Ltd RealView boards.
 
@@ -347,7 +346,6 @@ config ARCH_VERSATILE
 	select HAVE_MACH_CLKDEV
 	select ICST
 	select PLAT_VERSATILE
-	select PLAT_VERSATILE_CLCD
 	select PLAT_VERSATILE_CLOCK
 	select VERSATILE_FPGA_IRQ
 	help
diff --git a/arch/arm/mach-vexpress/Kconfig b/arch/arm/mach-vexpress/Kconfig
index d8b9330f896a..e9166dfc4756 100644
--- a/arch/arm/mach-vexpress/Kconfig
+++ b/arch/arm/mach-vexpress/Kconfig
@@ -13,7 +13,6 @@ menuconfig ARCH_VEXPRESS
 	select ICST
 	select NO_IOPORT_MAP
 	select PLAT_VERSATILE
-	select PLAT_VERSATILE_CLCD
 	select POWER_RESET
 	select POWER_RESET_VEXPRESS
 	select POWER_SUPPLY
diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig
index 92026d31bb48..bdf463072247 100644
--- a/drivers/video/fbdev/Kconfig
+++ b/drivers/video/fbdev/Kconfig
@@ -292,10 +292,8 @@ config FB_ARMCLCD
 
 # Helper logic selected only by the ARM Versatile platform family.
 config PLAT_VERSATILE_CLCD
-	depends on FB_ARMCLCD
-	depends on (PLAT_VERSATILE || ARCH_INTEGRATOR)
-	default y
-	bool
+	def_bool ARCH_VERSATILE || ARCH_REALVIEW || ARCH_VEXPRESS
+	depends on FB_ARMCLCD && FB=y
 
 config FB_ACORN
 	bool "Acorn VIDC support"
diff --git a/include/linux/platform_data/video-clcd-versatile.h b/include/linux/platform_data/video-clcd-versatile.h
index 6bb6a1d2019b..09ccf182af4d 100644
--- a/include/linux/platform_data/video-clcd-versatile.h
+++ b/include/linux/platform_data/video-clcd-versatile.h
@@ -1,9 +1,27 @@
 #ifndef PLAT_CLCD_H
 #define PLAT_CLCD_H
 
+#ifdef CONFIG_PLAT_VERSATILE_CLCD
 struct clcd_panel *versatile_clcd_get_panel(const char *);
 int versatile_clcd_setup_dma(struct clcd_fb *, unsigned long);
 int versatile_clcd_mmap_dma(struct clcd_fb *, struct vm_area_struct *);
 void versatile_clcd_remove_dma(struct clcd_fb *);
+#else
+static inline struct clcd_panel *versatile_clcd_get_panel(const char *s)
+{
+	return NULL;
+}
+static inline int versatile_clcd_setup_dma(struct clcd_fb *fb, unsigned long framesize)
+{
+	return -ENODEV;
+}
+static inline int versatile_clcd_mmap_dma(struct clcd_fb *fb, struct vm_area_struct *vm)
+{
+	return -ENODEV;
+}
+static inline void versatile_clcd_remove_dma(struct clcd_fb *fb)
+{
+}
+#endif
 
 #endif
-- 
cgit v1.2.3


From 1bd6b601fe196b6fbce2c93536ce0f3f53577cec Mon Sep 17 00:00:00 2001
From: Alexandre Courbot <acourbot@nvidia.com>
Date: Tue, 22 Jul 2014 16:17:41 +0900
Subject: gpio: make gpiochip_get_desc() gpiolib-private

As GPIO descriptors are not going to remain unique anymore, having this
function public is not safe. Restrain its use to gpiolib since we have
no user outside of it.

Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpiolib-of.c   | 2 +-
 drivers/gpio/gpiolib.c      | 1 -
 drivers/gpio/gpiolib.h      | 2 ++
 include/linux/gpio/driver.h | 3 ---
 4 files changed, 3 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index 3e2fae205bee..7cfdc2278905 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -23,7 +23,7 @@
 #include <linux/pinctrl/pinctrl.h>
 #include <linux/slab.h>
 
-struct gpio_desc;
+#include "gpiolib.h"
 
 /* Private data structure for of_gpiochip_find_and_xlate */
 struct gg_data {
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index c5509359ba88..38d176e31379 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -82,7 +82,6 @@ struct gpio_desc *gpiochip_get_desc(struct gpio_chip *chip,
 
 	return &chip->desc[hwnum];
 }
-EXPORT_SYMBOL_GPL(gpiochip_get_desc);
 
 /**
  * Convert a GPIO descriptor to the integer namespace.
diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h
index 98020c393eb3..acbb9335f08c 100644
--- a/drivers/gpio/gpiolib.h
+++ b/drivers/gpio/gpiolib.h
@@ -51,6 +51,8 @@ void gpiochip_free_own_desc(struct gpio_desc *desc);
 struct gpio_desc *of_get_named_gpiod_flags(struct device_node *np,
 		   const char *list_name, int index, enum of_gpio_flags *flags);
 
+struct gpio_desc *gpiochip_get_desc(struct gpio_chip *chip, u16 hwnum);
+
 extern struct spinlock gpio_lock;
 extern struct list_head gpio_chips;
 
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index ca3024554a2d..88f92dfae545 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -151,9 +151,6 @@ void gpiod_unlock_as_irq(struct gpio_desc *desc);
 
 struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc);
 
-struct gpio_desc *gpiochip_get_desc(struct gpio_chip *chip,
-				    u16 hwnum);
-
 enum gpio_lookup_flags {
 	GPIO_ACTIVE_HIGH = (0 << 0),
 	GPIO_ACTIVE_LOW = (1 << 0),
-- 
cgit v1.2.3


From d74be6dfea1b96cfb4bd79d9254fa9d21ed5f131 Mon Sep 17 00:00:00 2001
From: Alexandre Courbot <acourbot@nvidia.com>
Date: Tue, 22 Jul 2014 16:17:42 +0900
Subject: gpio: remove gpiod_lock/unlock_as_irq()

gpio_lock/unlock_as_irq() are working with (chip, offset) arguments and
are thus not using the old integer namespace. Therefore, there is no
reason to have gpiod variants of these functions working with
descriptors, especially since the (chip, offset) tuple is more suitable
to the users of these functions (GPIO drivers, whereas GPIO descriptors
are targeted at GPIO consumers).

Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 Documentation/gpio/driver.txt |  4 ++--
 drivers/gpio/gpiolib-acpi.c   |  6 +++---
 drivers/gpio/gpiolib-legacy.c | 12 ------------
 drivers/gpio/gpiolib-sysfs.c  |  4 ++--
 drivers/gpio/gpiolib.c        | 30 ++++++++++++++++--------------
 include/asm-generic/gpio.h    |  3 ---
 include/linux/gpio/driver.h   |  4 ++--
 7 files changed, 25 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/gpio/driver.txt b/Documentation/gpio/driver.txt
index fa9a0a8b3734..224dbbcd1804 100644
--- a/Documentation/gpio/driver.txt
+++ b/Documentation/gpio/driver.txt
@@ -157,12 +157,12 @@ Locking IRQ usage
 Input GPIOs can be used as IRQ signals. When this happens, a driver is requested
 to mark the GPIO as being used as an IRQ:
 
-	int gpiod_lock_as_irq(struct gpio_desc *desc)
+	int gpio_lock_as_irq(struct gpio_chip *chip, unsigned int offset)
 
 This will prevent the use of non-irq related GPIO APIs until the GPIO IRQ lock
 is released:
 
-	void gpiod_unlock_as_irq(struct gpio_desc *desc)
+	void gpio_unlock_as_irq(struct gpio_chip *chip, unsigned int offset)
 
 When implementing an irqchip inside a GPIO driver, these two functions should
 typically be called in the .startup() and .shutdown() callbacks from the
diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c
index 4a987917c186..d2e8600df02c 100644
--- a/drivers/gpio/gpiolib-acpi.c
+++ b/drivers/gpio/gpiolib-acpi.c
@@ -157,7 +157,7 @@ static acpi_status acpi_gpiochip_request_interrupt(struct acpi_resource *ares,
 
 	gpiod_direction_input(desc);
 
-	ret = gpiod_lock_as_irq(desc);
+	ret = gpio_lock_as_irq(chip, pin);
 	if (ret) {
 		dev_err(chip->dev, "Failed to lock GPIO as interrupt\n");
 		goto fail_free_desc;
@@ -212,7 +212,7 @@ static acpi_status acpi_gpiochip_request_interrupt(struct acpi_resource *ares,
 fail_free_event:
 	kfree(event);
 fail_unlock_irq:
-	gpiod_unlock_as_irq(desc);
+	gpio_unlock_as_irq(chip, pin);
 fail_free_desc:
 	gpiochip_free_own_desc(desc);
 
@@ -263,7 +263,7 @@ static void acpi_gpiochip_free_interrupts(struct acpi_gpio_chip *acpi_gpio)
 		desc = gpiochip_get_desc(chip, event->pin);
 		if (WARN_ON(IS_ERR(desc)))
 			continue;
-		gpiod_unlock_as_irq(desc);
+		gpio_unlock_as_irq(chip, event->pin);
 		gpiochip_free_own_desc(desc);
 		list_del(&event->node);
 		kfree(event);
diff --git a/drivers/gpio/gpiolib-legacy.c b/drivers/gpio/gpiolib-legacy.c
index c684d94cdbb4..078ae6c2df79 100644
--- a/drivers/gpio/gpiolib-legacy.c
+++ b/drivers/gpio/gpiolib-legacy.c
@@ -100,15 +100,3 @@ void gpio_free_array(const struct gpio *array, size_t num)
 		gpio_free((array++)->gpio);
 }
 EXPORT_SYMBOL_GPL(gpio_free_array);
-
-int gpio_lock_as_irq(struct gpio_chip *chip, unsigned int offset)
-{
-	return gpiod_lock_as_irq(gpiochip_get_desc(chip, offset));
-}
-EXPORT_SYMBOL_GPL(gpio_lock_as_irq);
-
-void gpio_unlock_as_irq(struct gpio_chip *chip, unsigned int offset)
-{
-	return gpiod_unlock_as_irq(gpiochip_get_desc(chip, offset));
-}
-EXPORT_SYMBOL_GPL(gpio_unlock_as_irq);
diff --git a/drivers/gpio/gpiolib-sysfs.c b/drivers/gpio/gpiolib-sysfs.c
index f150aa288fa1..be45a9283c28 100644
--- a/drivers/gpio/gpiolib-sysfs.c
+++ b/drivers/gpio/gpiolib-sysfs.c
@@ -161,7 +161,7 @@ static int gpio_setup_irq(struct gpio_desc *desc, struct device *dev,
 	desc->flags &= ~GPIO_TRIGGER_MASK;
 
 	if (!gpio_flags) {
-		gpiod_unlock_as_irq(desc);
+		gpio_unlock_as_irq(desc->chip, gpio_chip_hwgpio(desc));
 		ret = 0;
 		goto free_id;
 	}
@@ -200,7 +200,7 @@ static int gpio_setup_irq(struct gpio_desc *desc, struct device *dev,
 	if (ret < 0)
 		goto free_id;
 
-	ret = gpiod_lock_as_irq(desc);
+	ret = gpio_lock_as_irq(desc->chip, gpio_chip_hwgpio(desc));
 	if (ret < 0) {
 		gpiod_warn(desc, "failed to flag the GPIO for IRQ\n");
 		goto free_id;
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 38d176e31379..7582207c92e7 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -1428,44 +1428,46 @@ int gpiod_to_irq(const struct gpio_desc *desc)
 EXPORT_SYMBOL_GPL(gpiod_to_irq);
 
 /**
- * gpiod_lock_as_irq() - lock a GPIO to be used as IRQ
- * @gpio: the GPIO line to lock as used for IRQ
+ * gpio_lock_as_irq() - lock a GPIO to be used as IRQ
+ * @chip: the chip the GPIO to lock belongs to
+ * @offset: the offset of the GPIO to lock as IRQ
  *
  * This is used directly by GPIO drivers that want to lock down
  * a certain GPIO line to be used for IRQs.
  */
-int gpiod_lock_as_irq(struct gpio_desc *desc)
+int gpio_lock_as_irq(struct gpio_chip *chip, unsigned int offset)
 {
-	if (!desc)
+	if (offset >= chip->ngpio)
 		return -EINVAL;
 
-	if (test_bit(FLAG_IS_OUT, &desc->flags)) {
-		gpiod_err(desc,
+	if (test_bit(FLAG_IS_OUT, &chip->desc[offset].flags)) {
+		chip_err(chip,
 			  "%s: tried to flag a GPIO set as output for IRQ\n",
 			  __func__);
 		return -EIO;
 	}
 
-	set_bit(FLAG_USED_AS_IRQ, &desc->flags);
+	set_bit(FLAG_USED_AS_IRQ, &chip->desc[offset].flags);
 	return 0;
 }
-EXPORT_SYMBOL_GPL(gpiod_lock_as_irq);
+EXPORT_SYMBOL_GPL(gpio_lock_as_irq);
 
 /**
- * gpiod_unlock_as_irq() - unlock a GPIO used as IRQ
- * @gpio: the GPIO line to unlock from IRQ usage
+ * gpio_unlock_as_irq() - unlock a GPIO used as IRQ
+ * @chip: the chip the GPIO to lock belongs to
+ * @offset: the offset of the GPIO to lock as IRQ
  *
  * This is used directly by GPIO drivers that want to indicate
  * that a certain GPIO is no longer used exclusively for IRQ.
  */
-void gpiod_unlock_as_irq(struct gpio_desc *desc)
+void gpio_unlock_as_irq(struct gpio_chip *chip, unsigned int offset)
 {
-	if (!desc)
+	if (offset >= chip->ngpio)
 		return;
 
-	clear_bit(FLAG_USED_AS_IRQ, &desc->flags);
+	clear_bit(FLAG_USED_AS_IRQ, &chip->desc[offset].flags);
 }
-EXPORT_SYMBOL_GPL(gpiod_unlock_as_irq);
+EXPORT_SYMBOL_GPL(gpio_unlock_as_irq);
 
 /**
  * gpiod_get_raw_value_cansleep() - return a gpio's raw value
diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h
index 23e364538ab5..c1d4105e1c1d 100644
--- a/include/asm-generic/gpio.h
+++ b/include/asm-generic/gpio.h
@@ -110,9 +110,6 @@ static inline int __gpio_to_irq(unsigned gpio)
 	return gpiod_to_irq(gpio_to_desc(gpio));
 }
 
-extern int gpio_lock_as_irq(struct gpio_chip *chip, unsigned int offset);
-extern void gpio_unlock_as_irq(struct gpio_chip *chip, unsigned int offset);
-
 extern int gpio_request_one(unsigned gpio, unsigned long flags, const char *label);
 extern int gpio_request_array(const struct gpio *array, size_t num);
 extern void gpio_free_array(const struct gpio *array, size_t num);
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index 88f92dfae545..c66c91682d9e 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -146,8 +146,8 @@ extern struct gpio_chip *gpiochip_find(void *data,
 			      int (*match)(struct gpio_chip *chip, void *data));
 
 /* lock/unlock as IRQ */
-int gpiod_lock_as_irq(struct gpio_desc *desc);
-void gpiod_unlock_as_irq(struct gpio_desc *desc);
+int gpio_lock_as_irq(struct gpio_chip *chip, unsigned int offset);
+void gpio_unlock_as_irq(struct gpio_chip *chip, unsigned int offset);
 
 struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc);
 
-- 
cgit v1.2.3


From d9bb5a43277d2dcc514fa693f741bbc38e2e2271 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 22 Jul 2014 17:48:04 -0400
Subject: svcrdma: Double the default credit limit

The RDMA credit limit controls how many concurrent RPCs are allowed
per connection.

An NFS/RDMA client and server exchange their credit limits in the
RPC/RDMA headers. The Linux client and the Solaris client and server
allow 32 credits. The Linux server allows only 16, which limits its
performance.

Set the server's default credit limit to 32, like the other well-
known implementations, so the out-of-the-shrinkwrap performance of
the Linux server is better.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_rdma.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 5cf99a016368..975da754c778 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -174,8 +174,7 @@ struct svcxprt_rdma {
  * page size of 4k, or 32k * 2 ops / 4k = 16 outstanding RDMA_READ.  */
 #define RPCRDMA_ORD             (64/4)
 #define RPCRDMA_SQ_DEPTH_MULT   8
-#define RPCRDMA_MAX_THREADS     16
-#define RPCRDMA_MAX_REQUESTS    16
+#define RPCRDMA_MAX_REQUESTS    32
 #define RPCRDMA_MAX_REQ_SIZE    4096
 
 /* svc_rdma_marshal.c */
-- 
cgit v1.2.3


From 50a77c658b80e7e3303e3bcec195b30e2b62d513 Mon Sep 17 00:00:00 2001
From: Nick Dyer <nick.dyer@itdev.co.uk>
Date: Wed, 23 Jul 2014 12:38:48 -0700
Subject: Input: atmel_mxt_ts - download device config using firmware loader

The existing implementation which encodes the configuration as a binary
blob in platform data is unsatisfactory since it requires a kernel
recompile for the configuration to be changed, and it doesn't deal well
with firmware changes that move values around on the chip.

Atmel define an ASCII format for the configuration which can be exported
from their tools. This patch implements a parser for that format which
loads the configuration via the firmware loader and sends it to the MXT
chip.

Signed-off-by: Nick Dyer <nick.dyer@itdev.co.uk>
Acked-by: Benson Leung <bleung@chromium.org>
Acked-by: Yufeng Shen <miletus@chromium.org>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/touchscreen/atmel_mxt_ts.c  | 278 ++++++++++++++++++++++--------
 drivers/platform/chrome/chromeos_laptop.c |   4 -
 include/linux/i2c/atmel_mxt_ts.h          |   3 -
 3 files changed, 204 insertions(+), 81 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c
index 4317273c2138..c6b5e7dbd942 100644
--- a/drivers/input/touchscreen/atmel_mxt_ts.c
+++ b/drivers/input/touchscreen/atmel_mxt_ts.c
@@ -2,6 +2,7 @@
  * Atmel maXTouch Touchscreen driver
  *
  * Copyright (C) 2010 Samsung Electronics Co.Ltd
+ * Copyright (C) 2011-2014 Atmel Corporation
  * Copyright (C) 2012 Google, Inc.
  *
  * Author: Joonyoung Shim <jy0922.shim@samsung.com>
@@ -30,8 +31,10 @@
 #define MXT_VER_21		21
 #define MXT_VER_22		22
 
-/* Firmware */
+/* Firmware files */
 #define MXT_FW_NAME		"maxtouch.fw"
+#define MXT_CFG_NAME		"maxtouch.cfg"
+#define MXT_CFG_MAGIC		"OBP_RAW V1"
 
 /* Registers */
 #define MXT_INFO		0x00
@@ -298,37 +301,6 @@ static bool mxt_object_readable(unsigned int type)
 	}
 }
 
-static bool mxt_object_writable(unsigned int type)
-{
-	switch (type) {
-	case MXT_GEN_COMMAND_T6:
-	case MXT_GEN_POWER_T7:
-	case MXT_GEN_ACQUIRE_T8:
-	case MXT_TOUCH_MULTI_T9:
-	case MXT_TOUCH_KEYARRAY_T15:
-	case MXT_TOUCH_PROXIMITY_T23:
-	case MXT_TOUCH_PROXKEY_T52:
-	case MXT_PROCI_GRIPFACE_T20:
-	case MXT_PROCG_NOISE_T22:
-	case MXT_PROCI_ONETOUCH_T24:
-	case MXT_PROCI_TWOTOUCH_T27:
-	case MXT_PROCI_GRIP_T40:
-	case MXT_PROCI_PALM_T41:
-	case MXT_PROCI_TOUCHSUPPRESSION_T42:
-	case MXT_PROCI_STYLUS_T47:
-	case MXT_PROCG_NOISESUPPRESSION_T48:
-	case MXT_SPT_COMMSCONFIG_T18:
-	case MXT_SPT_GPIOPWM_T19:
-	case MXT_SPT_SELFTEST_T25:
-	case MXT_SPT_CTECONFIG_T28:
-	case MXT_SPT_DIGITIZER_T43:
-	case MXT_SPT_CTECONFIG_T46:
-		return true;
-	default:
-		return false;
-	}
-}
-
 static void mxt_dump_message(struct device *dev,
 			     struct mxt_message *message)
 {
@@ -606,7 +578,7 @@ mxt_get_object(struct mxt_data *data, u8 type)
 			return object;
 	}
 
-	dev_err(&data->client->dev, "Invalid object type T%u\n", type);
+	dev_warn(&data->client->dev, "Invalid object type T%u\n", type);
 	return NULL;
 }
 
@@ -877,58 +849,197 @@ static void mxt_update_crc(struct mxt_data *data, u8 cmd, u8 value)
 	mxt_wait_for_completion(data, &data->crc_completion, MXT_CRC_TIMEOUT);
 }
 
-static int mxt_check_reg_init(struct mxt_data *data)
+/*
+ * mxt_update_cfg - download configuration to chip
+ *
+ * Atmel Raw Config File Format
+ *
+ * The first four lines of the raw config file contain:
+ *  1) Version
+ *  2) Chip ID Information (first 7 bytes of device memory)
+ *  3) Chip Information Block 24-bit CRC Checksum
+ *  4) Chip Configuration 24-bit CRC Checksum
+ *
+ * The rest of the file consists of one line per object instance:
+ *   <TYPE> <INSTANCE> <SIZE> <CONTENTS>
+ *
+ *   <TYPE> - 2-byte object type as hex
+ *   <INSTANCE> - 2-byte object instance number as hex
+ *   <SIZE> - 2-byte object size as hex
+ *   <CONTENTS> - array of <SIZE> 1-byte hex values
+ */
+static int mxt_update_cfg(struct mxt_data *data, const struct firmware *cfg)
 {
-	const struct mxt_platform_data *pdata = data->pdata;
-	struct mxt_object *object;
 	struct device *dev = &data->client->dev;
-	int index = 0;
-	int i, size;
+	struct mxt_info cfg_info;
+	struct mxt_object *object;
 	int ret;
+	int offset;
+	int pos;
+	int i;
+	u32 info_crc, config_crc;
+	unsigned int type, instance, size;
+	u8 val;
+	u16 reg;
 
-	if (!pdata->config) {
-		dev_dbg(dev, "No cfg data defined, skipping reg init\n");
-		return 0;
+	mxt_update_crc(data, MXT_COMMAND_REPORTALL, 1);
+
+	if (strncmp(cfg->data, MXT_CFG_MAGIC, strlen(MXT_CFG_MAGIC))) {
+		dev_err(dev, "Unrecognised config file\n");
+		ret = -EINVAL;
+		goto release;
 	}
 
-	mxt_update_crc(data, MXT_COMMAND_REPORTALL, 1);
+	pos = strlen(MXT_CFG_MAGIC);
+
+	/* Load information block and check */
+	for (i = 0; i < sizeof(struct mxt_info); i++) {
+		ret = sscanf(cfg->data + pos, "%hhx%n",
+			     (unsigned char *)&cfg_info + i,
+			     &offset);
+		if (ret != 1) {
+			dev_err(dev, "Bad format\n");
+			ret = -EINVAL;
+			goto release;
+		}
 
-	if (data->config_crc == pdata->config_crc) {
-		dev_info(dev, "Config CRC 0x%06X: OK\n", data->config_crc);
-		return 0;
+		pos += offset;
+	}
+
+	if (cfg_info.family_id != data->info.family_id) {
+		dev_err(dev, "Family ID mismatch!\n");
+		ret = -EINVAL;
+		goto release;
 	}
 
-	dev_info(dev, "Config CRC 0x%06X: does not match 0x%06X\n",
-		 data->config_crc, pdata->config_crc);
+	if (cfg_info.variant_id != data->info.variant_id) {
+		dev_err(dev, "Variant ID mismatch!\n");
+		ret = -EINVAL;
+		goto release;
+	}
 
-	for (i = 0; i < data->info.object_num; i++) {
-		object = data->object_table + i;
+	if (cfg_info.version != data->info.version)
+		dev_err(dev, "Warning: version mismatch!\n");
 
-		if (!mxt_object_writable(object->type))
+	if (cfg_info.build != data->info.build)
+		dev_err(dev, "Warning: build num mismatch!\n");
+
+	ret = sscanf(cfg->data + pos, "%x%n", &info_crc, &offset);
+	if (ret != 1) {
+		dev_err(dev, "Bad format: failed to parse Info CRC\n");
+		ret = -EINVAL;
+		goto release;
+	}
+	pos += offset;
+
+	/* Check config CRC */
+	ret = sscanf(cfg->data + pos, "%x%n", &config_crc, &offset);
+	if (ret != 1) {
+		dev_err(dev, "Bad format: failed to parse Config CRC\n");
+		ret = -EINVAL;
+		goto release;
+	}
+	pos += offset;
+
+	if (data->config_crc == config_crc) {
+		dev_dbg(dev, "Config CRC 0x%06X: OK\n", config_crc);
+		ret = 0;
+		goto release;
+	}
+
+	dev_info(dev, "Config CRC 0x%06X: does not match file 0x%06X\n",
+		 data->config_crc, config_crc);
+
+	while (pos < cfg->size) {
+		/* Read type, instance, length */
+		ret = sscanf(cfg->data + pos, "%x %x %x%n",
+			     &type, &instance, &size, &offset);
+		if (ret == 0) {
+			/* EOF */
+			ret = 1;
+			goto release;
+		} else if (ret != 3) {
+			dev_err(dev, "Bad format: failed to parse object\n");
+			ret = -EINVAL;
+			goto release;
+		}
+		pos += offset;
+
+		object = mxt_get_object(data, type);
+		if (!object) {
+			/* Skip object */
+			for (i = 0; i < size; i++) {
+				ret = sscanf(cfg->data + pos, "%hhx%n",
+					     &val,
+					     &offset);
+				pos += offset;
+			}
 			continue;
+		}
 
-		size = mxt_obj_size(object) * mxt_obj_instances(object);
-		if (index + size > pdata->config_length) {
-			dev_err(dev, "Not enough config data!\n");
-			return -EINVAL;
+		if (size > mxt_obj_size(object)) {
+			dev_err(dev, "Discarding %zu byte(s) in T%u\n",
+				size - mxt_obj_size(object), type);
 		}
 
-		ret = __mxt_write_reg(data->client, object->start_address,
-				size, &pdata->config[index]);
-		if (ret)
-			return ret;
-		index += size;
+		if (instance >= mxt_obj_instances(object)) {
+			dev_err(dev, "Object instances exceeded!\n");
+			ret = -EINVAL;
+			goto release;
+		}
+
+		reg = object->start_address + mxt_obj_size(object) * instance;
+
+		for (i = 0; i < size; i++) {
+			ret = sscanf(cfg->data + pos, "%hhx%n",
+				     &val,
+				     &offset);
+			if (ret != 1) {
+				dev_err(dev, "Bad format in T%d\n", type);
+				ret = -EINVAL;
+				goto release;
+			}
+			pos += offset;
+
+			if (i > mxt_obj_size(object))
+				continue;
+
+			ret = mxt_write_reg(data->client, reg + i, val);
+			if (ret)
+				goto release;
+
+		}
+
+		/*
+		 * If firmware is upgraded, new bytes may be added to end of
+		 * objects. It is generally forward compatible to zero these
+		 * bytes - previous behaviour will be retained. However
+		 * this does invalidate the CRC and will force a config
+		 * download every time until the configuration is updated.
+		 */
+		if (size < mxt_obj_size(object)) {
+			dev_info(dev, "Zeroing %zu byte(s) in T%d\n",
+				 mxt_obj_size(object) - size, type);
+
+			for (i = size + 1; i < mxt_obj_size(object); i++) {
+				ret = mxt_write_reg(data->client, reg + i, 0);
+				if (ret)
+					goto release;
+			}
+		}
 	}
 
 	mxt_update_crc(data, MXT_COMMAND_BACKUPNV, MXT_BACKUP_VALUE);
 
 	ret = mxt_soft_reset(data);
 	if (ret)
-		return ret;
+		goto release;
 
 	dev_info(dev, "Config successfully updated\n");
 
-	return 0;
+release:
+	release_firmware(cfg);
+	return ret;
 }
 
 static int mxt_make_highchg(struct mxt_data *data)
@@ -1204,10 +1315,17 @@ err_free_mem:
 	return error;
 }
 
+static int mxt_configure_objects(struct mxt_data *data,
+				 const struct firmware *cfg);
+
+static void mxt_config_cb(const struct firmware *cfg, void *ctx)
+{
+	mxt_configure_objects(ctx, cfg);
+}
+
 static int mxt_initialize(struct mxt_data *data)
 {
 	struct i2c_client *client = data->client;
-	struct mxt_info *info = &data->info;
 	int error;
 
 	error = mxt_get_info(data);
@@ -1225,28 +1343,40 @@ static int mxt_initialize(struct mxt_data *data)
 	if (error)
 		goto err_free_object_table;
 
-	/* Check register init values */
-	error = mxt_check_reg_init(data);
-	if (error) {
-		dev_err(&client->dev, "Error %d initializing configuration\n",
-			error);
-		goto err_free_object_table;
+	request_firmware_nowait(THIS_MODULE, true, MXT_CFG_NAME,
+				&data->client->dev, GFP_KERNEL, data,
+				mxt_config_cb);
+
+	return 0;
+
+err_free_object_table:
+	mxt_free_object_table(data);
+	return error;
+}
+
+static int mxt_configure_objects(struct mxt_data *data,
+				 const struct firmware *cfg)
+{
+	struct device *dev = &data->client->dev;
+	struct mxt_info *info = &data->info;
+	int error;
+
+	if (cfg) {
+		error = mxt_update_cfg(data, cfg);
+		if (error)
+			dev_warn(dev, "Error %d updating config\n", error);
 	}
 
 	error = mxt_initialize_t9_input_device(data);
 	if (error)
-		goto err_free_object_table;
+		return error;
 
-	dev_info(&client->dev,
+	dev_info(dev,
 		 "Family: %u Variant: %u Firmware V%u.%u.%02X Objects: %u\n",
 		 info->family_id, info->variant_id, info->version >> 4,
 		 info->version & 0xf, info->build, info->object_num);
 
 	return 0;
-
-err_free_object_table:
-	mxt_free_object_table(data);
-	return error;
 }
 
 /* Firmware Version is returned as Major.Minor.Build */
diff --git a/drivers/platform/chrome/chromeos_laptop.c b/drivers/platform/chrome/chromeos_laptop.c
index 7f1a2e2711bd..67b316b2a2bd 100644
--- a/drivers/platform/chrome/chromeos_laptop.c
+++ b/drivers/platform/chrome/chromeos_laptop.c
@@ -97,8 +97,6 @@ static struct mxt_platform_data atmel_224s_tp_platform_data = {
 	.irqflags		= IRQF_TRIGGER_FALLING,
 	.t19_num_keys		= ARRAY_SIZE(mxt_t19_keys),
 	.t19_keymap		= mxt_t19_keys,
-	.config			= NULL,
-	.config_length		= 0,
 };
 
 static struct i2c_board_info atmel_224s_tp_device = {
@@ -109,8 +107,6 @@ static struct i2c_board_info atmel_224s_tp_device = {
 
 static struct mxt_platform_data atmel_1664s_platform_data = {
 	.irqflags		= IRQF_TRIGGER_FALLING,
-	.config			= NULL,
-	.config_length		= 0,
 };
 
 static struct i2c_board_info atmel_1664s_device = {
diff --git a/include/linux/i2c/atmel_mxt_ts.h b/include/linux/i2c/atmel_mxt_ts.h
index 3891dc1de21c..02bf6ea31701 100644
--- a/include/linux/i2c/atmel_mxt_ts.h
+++ b/include/linux/i2c/atmel_mxt_ts.h
@@ -17,9 +17,6 @@
 
 /* The platform data for the Atmel maXTouch touchscreen driver */
 struct mxt_platform_data {
-	const u8 *config;
-	size_t config_length;
-	u32 config_crc;
 	unsigned long irqflags;
 	u8 t19_num_keys;
 	const unsigned int *t19_keymap;
-- 
cgit v1.2.3


From 8a2b22a2595bf89d4396530edf8388159fad9d83 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@linaro.org>
Date: Wed, 23 Jul 2014 17:05:06 -0600
Subject: of: Make devicetree sysfs update functions consistent.

All of the DT modification functions are split into two parts, the first
part manipulates the DT data structure, and the second part updates
sysfs, but the code isn't very consistent about how the second half is
called. They don't all enforce the same rules about when it is valid to
update sysfs, and there isn't any clarity on locking.

The transactional DT modification feature that is coming also needs
access to these functions so that it can perform all the structure
changes together, and then all the sysfs updates as a second stage
instead of doing each one at a time.

Fix up the second have by creating a separate __of_*_sysfs() function
for each of the helpers. The new functions have consistent naming (ie.
of_node_add() becomes __of_attach_node_sysfs()) and all of them now
defer if of_init hasn't been called yet.

Callers of the new functions must hold the of_mutex to ensure there are
no race conditions with of_init(). The mutex ensures that there will
only ever be one writer to the tree at any given time. There can still
be any number of readers and the raw_spin_lock is still used to make
sure access to the data structure is still consistent.

Finally, put the function prototypes into of_private.h so they are
accessible to the transaction code.

Signed-off-by: Pantelis Antoniou <pantelis.antoniou@konsulko.com>
[grant.likely: Changed suffix from _post to _sysfs to match existing code]
[grant.likely: Reorganized to eliminate trivial wrappers]
Signed-off-by: Grant Likely <grant.likely@linaro.org>
---
 drivers/of/base.c       | 96 +++++++++++++++++++++++++------------------------
 drivers/of/dynamic.c    | 12 +++++--
 drivers/of/of_private.h | 10 ++++++
 include/linux/of.h      |  2 --
 4 files changed, 69 insertions(+), 51 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/of/base.c b/drivers/of/base.c
index b403f9d98461..ad4929cbd876 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -37,10 +37,13 @@ struct device_node *of_chosen;
 struct device_node *of_aliases;
 static struct device_node *of_stdout;
 
-static struct kset *of_kset;
+struct kset *of_kset;
 
 /*
- * Used to protect the of_aliases, to hold off addition of nodes to sysfs
+ * Used to protect the of_aliases, to hold off addition of nodes to sysfs.
+ * This mutex must be held whenever modifications are being made to the
+ * device tree. The of_{attach,detach}_node() and
+ * of_{add,remove,update}_property() helpers make sure this happens.
  */
 DEFINE_MUTEX(of_mutex);
 
@@ -127,13 +130,16 @@ static const char *safe_name(struct kobject *kobj, const char *orig_name)
 	return name;
 }
 
-static int __of_add_property_sysfs(struct device_node *np, struct property *pp)
+int __of_add_property_sysfs(struct device_node *np, struct property *pp)
 {
 	int rc;
 
 	/* Important: Don't leak passwords */
 	bool secure = strncmp(pp->name, "security-", 9) == 0;
 
+	if (!of_kset || !of_node_is_attached(np))
+		return 0;
+
 	sysfs_bin_attr_init(&pp->attr);
 	pp->attr.attr.name = safe_name(&np->kobj, pp->name);
 	pp->attr.attr.mode = secure ? S_IRUSR : S_IRUGO;
@@ -145,12 +151,15 @@ static int __of_add_property_sysfs(struct device_node *np, struct property *pp)
 	return rc;
 }
 
-static int __of_node_add(struct device_node *np)
+int __of_attach_node_sysfs(struct device_node *np)
 {
 	const char *name;
 	struct property *pp;
 	int rc;
 
+	if (!of_kset)
+		return 0;
+
 	np->kobj.kset = of_kset;
 	if (!np->parent) {
 		/* Nodes without parents are new top level trees */
@@ -172,26 +181,6 @@ static int __of_node_add(struct device_node *np)
 	return 0;
 }
 
-int of_node_add(struct device_node *np)
-{
-	int rc = 0;
-
-	BUG_ON(!of_node_is_initialized(np));
-
-	/*
-	 * Grab the mutex here so that in a race condition between of_init() and
-	 * of_node_add(), node addition will still be consistent.
-	 */
-	mutex_lock(&of_mutex);
-	if (of_kset)
-		rc = __of_node_add(np);
-	else
-		/* This scenario may be perfectly valid, but report it anyway */
-		pr_info("of_node_add(%s) before of_init()\n", np->full_name);
-	mutex_unlock(&of_mutex);
-	return rc;
-}
-
 static int __init of_init(void)
 {
 	struct device_node *np;
@@ -204,7 +193,7 @@ static int __init of_init(void)
 		return -ENOMEM;
 	}
 	for_each_of_allnodes(np)
-		__of_node_add(np);
+		__of_attach_node_sysfs(np);
 	mutex_unlock(&of_mutex);
 
 	/* Symlink in /proc as required by userspace ABI */
@@ -1689,15 +1678,17 @@ int of_add_property(struct device_node *np, struct property *prop)
 	if (rc)
 		return rc;
 
+	mutex_lock(&of_mutex);
+
 	raw_spin_lock_irqsave(&devtree_lock, flags);
 	rc = __of_add_property(np, prop);
 	raw_spin_unlock_irqrestore(&devtree_lock, flags);
-	if (rc)
-		return rc;
 
-	if (of_node_is_attached(np))
+	if (!rc)
 		__of_add_property_sysfs(np, prop);
 
+	mutex_unlock(&of_mutex);
+
 	return rc;
 }
 
@@ -1720,6 +1711,13 @@ int __of_remove_property(struct device_node *np, struct property *prop)
 	return 0;
 }
 
+void __of_remove_property_sysfs(struct device_node *np, struct property *prop)
+{
+	/* at early boot, bail here and defer setup to of_init() */
+	if (of_kset && of_node_is_attached(np))
+		sysfs_remove_bin_file(&np->kobj, &prop->attr);
+}
+
 /**
  * of_remove_property - Remove a property from a node.
  *
@@ -1737,20 +1735,18 @@ int of_remove_property(struct device_node *np, struct property *prop)
 	if (rc)
 		return rc;
 
+	mutex_lock(&of_mutex);
+
 	raw_spin_lock_irqsave(&devtree_lock, flags);
 	rc = __of_remove_property(np, prop);
 	raw_spin_unlock_irqrestore(&devtree_lock, flags);
 
-	if (rc)
-		return rc;
-
-	/* at early boot, bail hear and defer setup to of_init() */
-	if (!of_kset)
-		return 0;
+	if (!rc)
+		__of_remove_property_sysfs(np, prop);
 
-	sysfs_remove_bin_file(&np->kobj, &prop->attr);
+	mutex_unlock(&of_mutex);
 
-	return 0;
+	return rc;
 }
 
 int __of_update_property(struct device_node *np, struct property *newprop,
@@ -1779,6 +1775,18 @@ int __of_update_property(struct device_node *np, struct property *newprop,
 	return 0;
 }
 
+void __of_update_property_sysfs(struct device_node *np, struct property *newprop,
+		struct property *oldprop)
+{
+	/* At early boot, bail out and defer setup to of_init() */
+	if (!of_kset)
+		return;
+
+	if (oldprop)
+		sysfs_remove_bin_file(&np->kobj, &oldprop->attr);
+	__of_add_property_sysfs(np, newprop);
+}
+
 /*
  * of_update_property - Update a property in a node, if the property does
  * not exist, add it.
@@ -1801,22 +1809,18 @@ int of_update_property(struct device_node *np, struct property *newprop)
 	if (rc)
 		return rc;
 
+	mutex_lock(&of_mutex);
+
 	raw_spin_lock_irqsave(&devtree_lock, flags);
 	rc = __of_update_property(np, newprop, &oldprop);
 	raw_spin_unlock_irqrestore(&devtree_lock, flags);
-	if (rc)
-		return rc;
 
-	/* At early boot, bail out and defer setup to of_init() */
-	if (!of_kset)
-		return 0;
+	if (!rc)
+		__of_update_property_sysfs(np, newprop, oldprop);
 
-	/* Update the sysfs attribute */
-	if (oldprop)
-		sysfs_remove_bin_file(&np->kobj, &oldprop->attr);
-	__of_add_property_sysfs(np, newprop);
+	mutex_unlock(&of_mutex);
 
-	return 0;
+	return rc;
 }
 
 static void of_alias_add(struct alias_prop *ap, struct device_node *np,
diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c
index 75fcc66fcefd..c875787fa394 100644
--- a/drivers/of/dynamic.c
+++ b/drivers/of/dynamic.c
@@ -41,11 +41,13 @@ void of_node_put(struct device_node *node)
 }
 EXPORT_SYMBOL(of_node_put);
 
-static void of_node_remove(struct device_node *np)
+void __of_detach_node_sysfs(struct device_node *np)
 {
 	struct property *pp;
 
 	BUG_ON(!of_node_is_initialized(np));
+	if (!of_kset)
+		return;
 
 	/* only remove properties if on sysfs */
 	if (of_node_is_attached(np)) {
@@ -115,11 +117,13 @@ int of_attach_node(struct device_node *np)
 	if (rc)
 		return rc;
 
+	mutex_lock(&of_mutex);
 	raw_spin_lock_irqsave(&devtree_lock, flags);
 	__of_attach_node(np);
 	raw_spin_unlock_irqrestore(&devtree_lock, flags);
 
-	of_node_add(np);
+	__of_attach_node_sysfs(np);
+	mutex_unlock(&of_mutex);
 	return 0;
 }
 
@@ -174,11 +178,13 @@ int of_detach_node(struct device_node *np)
 	if (rc)
 		return rc;
 
+	mutex_lock(&of_mutex);
 	raw_spin_lock_irqsave(&devtree_lock, flags);
 	__of_detach_node(np);
 	raw_spin_unlock_irqrestore(&devtree_lock, flags);
 
-	of_node_remove(np);
+	__of_detach_node_sysfs(np);
+	mutex_unlock(&of_mutex);
 	return rc;
 }
 
diff --git a/drivers/of/of_private.h b/drivers/of/of_private.h
index 0f6089722af9..0d99ba8caeed 100644
--- a/drivers/of/of_private.h
+++ b/drivers/of/of_private.h
@@ -33,6 +33,8 @@ struct alias_prop {
 
 extern struct mutex of_mutex;
 extern struct list_head aliases_lookup;
+extern struct kset *of_kset;
+
 
 static inline struct device_node *kobj_to_device_node(struct kobject *kobj)
 {
@@ -62,11 +64,19 @@ struct property *__of_prop_dup(const struct property *prop, gfp_t allocflags);
 struct device_node *__of_node_alloc(const char *full_name, gfp_t allocflags);
 
 extern int __of_add_property(struct device_node *np, struct property *prop);
+extern int __of_add_property_sysfs(struct device_node *np,
+		struct property *prop);
 extern int __of_remove_property(struct device_node *np, struct property *prop);
+extern void __of_remove_property_sysfs(struct device_node *np,
+		struct property *prop);
 extern int __of_update_property(struct device_node *np,
 		struct property *newprop, struct property **oldprop);
+extern void __of_update_property_sysfs(struct device_node *np,
+		struct property *newprop, struct property *oldprop);
 
 extern void __of_attach_node(struct device_node *np);
+extern int __of_attach_node_sysfs(struct device_node *np);
 extern void __of_detach_node(struct device_node *np);
+extern void __of_detach_node_sysfs(struct device_node *np);
 
 #endif /* _LINUX_OF_PRIVATE_H */
diff --git a/include/linux/of.h b/include/linux/of.h
index abf829a1f150..705fa12fca7f 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -74,8 +74,6 @@ struct of_phandle_args {
 	uint32_t args[MAX_PHANDLE_ARGS];
 };
 
-extern int of_node_add(struct device_node *node);
-
 /* initialize a node */
 extern struct kobj_type of_node_ktype;
 static inline void of_node_init(struct device_node *node)
-- 
cgit v1.2.3


From 259092a35c7e11f1d4616b0f5b3ba7b851fe4fa6 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@linaro.org>
Date: Wed, 16 Jul 2014 12:48:23 -0600
Subject: of: Reorder device tree changes and notifiers

Currently, devicetree reconfig notifiers get emitted before the change
is applied to the tree, but that behaviour is problematic if the
receiver wants the determine the new state of the tree. The current
users don't care, but the changeset code to follow will be making
multiple changes at once. Reorder notifiers to get emitted after the
change has been applied to the tree so that callbacks see the new tree
state.

At the same time, fixup the existing callbacks to expect the new order.
There are a few callbacks that compare the old and new values of a
changed property. Put both property pointers into the of_prop_reconfig
structure.

The current notifiers also allow the notifier callback to fail and
cancel the change to the tree, but that feature isn't actually used.
It really isn't valid to ignore a tree modification provided by firmware
anyway, so remove the ability to cancel a change to the tree.

Signed-off-by: Grant Likely <grant.likely@linaro.org>
Cc: Nathan Fontenot <nfont@austin.ibm.com>
---
 arch/powerpc/platforms/pseries/hotplug-memory.c |  2 +-
 drivers/crypto/nx/nx-842.c                      | 30 +++++++------------------
 drivers/of/base.c                               | 21 ++++++++---------
 drivers/of/dynamic.c                            | 18 +++++++--------
 drivers/of/of_private.h                         |  4 ++--
 include/linux/of.h                              |  1 +
 6 files changed, 29 insertions(+), 47 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 7995135170a3..ac01e188faef 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -194,7 +194,7 @@ static int pseries_update_drconf_memory(struct of_prop_reconfig *pr)
 	if (!memblock_size)
 		return -EINVAL;
 
-	p = (u32 *)of_get_property(pr->dn, "ibm,dynamic-memory", NULL);
+	p = (u32 *) pr->old_prop->value;
 	if (!p)
 		return -EINVAL;
 
diff --git a/drivers/crypto/nx/nx-842.c b/drivers/crypto/nx/nx-842.c
index 502edf0a2933..c897c3a5ee17 100644
--- a/drivers/crypto/nx/nx-842.c
+++ b/drivers/crypto/nx/nx-842.c
@@ -936,28 +936,14 @@ static int nx842_OF_upd(struct property *new_prop)
 		goto error_out;
 	}
 
-	/* Set ptr to new property if provided */
-	if (new_prop) {
-		/* Single property */
-		if (!strncmp(new_prop->name, "status", new_prop->length)) {
-			status = new_prop;
-
-		} else if (!strncmp(new_prop->name, "ibm,max-sg-len",
-					new_prop->length)) {
-			maxsglen = new_prop;
-
-		} else if (!strncmp(new_prop->name, "ibm,max-sync-cop",
-					new_prop->length)) {
-			maxsyncop = new_prop;
-
-		} else {
-			/*
-			 * Skip the update, the property being updated
-			 * has no impact.
-			 */
-			goto out;
-		}
-	}
+	/*
+	 * If this is a property update, there are only certain properties that
+	 * we care about. Bail if it isn't in the below list
+	 */
+	if (new_prop && (strncmp(new_prop->name, "status", new_prop->length) ||
+		         strncmp(new_prop->name, "ibm,max-sg-len", new_prop->length) ||
+		         strncmp(new_prop->name, "ibm,max-sync-cop", new_prop->length)))
+		goto out;
 
 	/* Perform property updates */
 	ret = nx842_OF_upd_status(new_devdata, status);
diff --git a/drivers/of/base.c b/drivers/of/base.c
index ededf8e33145..a7ad1013edfa 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -1674,10 +1674,6 @@ int of_add_property(struct device_node *np, struct property *prop)
 	unsigned long flags;
 	int rc;
 
-	rc = of_property_notify(OF_RECONFIG_ADD_PROPERTY, np, prop);
-	if (rc)
-		return rc;
-
 	mutex_lock(&of_mutex);
 
 	raw_spin_lock_irqsave(&devtree_lock, flags);
@@ -1689,6 +1685,9 @@ int of_add_property(struct device_node *np, struct property *prop)
 
 	mutex_unlock(&of_mutex);
 
+	if (!rc)
+		of_property_notify(OF_RECONFIG_ADD_PROPERTY, np, prop, NULL);
+
 	return rc;
 }
 
@@ -1731,10 +1730,6 @@ int of_remove_property(struct device_node *np, struct property *prop)
 	unsigned long flags;
 	int rc;
 
-	rc = of_property_notify(OF_RECONFIG_REMOVE_PROPERTY, np, prop);
-	if (rc)
-		return rc;
-
 	mutex_lock(&of_mutex);
 
 	raw_spin_lock_irqsave(&devtree_lock, flags);
@@ -1746,6 +1741,9 @@ int of_remove_property(struct device_node *np, struct property *prop)
 
 	mutex_unlock(&of_mutex);
 
+	if (!rc)
+		of_property_notify(OF_RECONFIG_REMOVE_PROPERTY, np, prop, NULL);
+
 	return rc;
 }
 
@@ -1805,10 +1803,6 @@ int of_update_property(struct device_node *np, struct property *newprop)
 	if (!newprop->name)
 		return -EINVAL;
 
-	rc = of_property_notify(OF_RECONFIG_UPDATE_PROPERTY, np, newprop);
-	if (rc)
-		return rc;
-
 	mutex_lock(&of_mutex);
 
 	raw_spin_lock_irqsave(&devtree_lock, flags);
@@ -1820,6 +1814,9 @@ int of_update_property(struct device_node *np, struct property *newprop)
 
 	mutex_unlock(&of_mutex);
 
+	if (!rc)
+		of_property_notify(OF_RECONFIG_UPDATE_PROPERTY, np, newprop, oldprop);
+
 	return rc;
 }
 
diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c
index 7c020b9a3317..7bd5501736a6 100644
--- a/drivers/of/dynamic.c
+++ b/drivers/of/dynamic.c
@@ -83,7 +83,7 @@ int of_reconfig_notify(unsigned long action, void *p)
 }
 
 int of_property_notify(int action, struct device_node *np,
-		       struct property *prop)
+		       struct property *prop, struct property *oldprop)
 {
 	struct of_prop_reconfig pr;
 
@@ -93,6 +93,7 @@ int of_property_notify(int action, struct device_node *np,
 
 	pr.dn = np;
 	pr.prop = prop;
+	pr.old_prop = oldprop;
 	return of_reconfig_notify(action, &pr);
 }
 
@@ -125,11 +126,6 @@ void __of_attach_node(struct device_node *np)
 int of_attach_node(struct device_node *np)
 {
 	unsigned long flags;
-	int rc;
-
-	rc = of_reconfig_notify(OF_RECONFIG_ATTACH_NODE, np);
-	if (rc)
-		return rc;
 
 	mutex_lock(&of_mutex);
 	raw_spin_lock_irqsave(&devtree_lock, flags);
@@ -138,6 +134,9 @@ int of_attach_node(struct device_node *np)
 
 	__of_attach_node_sysfs(np);
 	mutex_unlock(&of_mutex);
+
+	of_reconfig_notify(OF_RECONFIG_ATTACH_NODE, np);
+
 	return 0;
 }
 
@@ -188,10 +187,6 @@ int of_detach_node(struct device_node *np)
 	unsigned long flags;
 	int rc = 0;
 
-	rc = of_reconfig_notify(OF_RECONFIG_DETACH_NODE, np);
-	if (rc)
-		return rc;
-
 	mutex_lock(&of_mutex);
 	raw_spin_lock_irqsave(&devtree_lock, flags);
 	__of_detach_node(np);
@@ -199,6 +194,9 @@ int of_detach_node(struct device_node *np)
 
 	__of_detach_node_sysfs(np);
 	mutex_unlock(&of_mutex);
+
+	of_reconfig_notify(OF_RECONFIG_DETACH_NODE, np);
+
 	return rc;
 }
 
diff --git a/drivers/of/of_private.h b/drivers/of/of_private.h
index 8129c0e58d70..f69ccb1fa308 100644
--- a/drivers/of/of_private.h
+++ b/drivers/of/of_private.h
@@ -43,11 +43,11 @@ static inline struct device_node *kobj_to_device_node(struct kobject *kobj)
 
 #if defined(CONFIG_OF_DYNAMIC)
 extern int of_property_notify(int action, struct device_node *np,
-			      struct property *prop);
+			      struct property *prop, struct property *old_prop);
 extern void of_node_release(struct kobject *kobj);
 #else /* CONFIG_OF_DYNAMIC */
 static inline int of_property_notify(int action, struct device_node *np,
-				     struct property *prop)
+				     struct property *prop, struct property *old_prop)
 {
 	return 0;
 }
diff --git a/include/linux/of.h b/include/linux/of.h
index 705fa12fca7f..400f18cb4fff 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -321,6 +321,7 @@ extern int of_update_property(struct device_node *np, struct property *newprop);
 struct of_prop_reconfig {
 	struct device_node	*dn;
 	struct property		*prop;
+	struct property		*old_prop;
 };
 
 extern int of_reconfig_notifier_register(struct notifier_block *);
-- 
cgit v1.2.3


From 201c910bd6898d81d4ac6685d0f421b7e10f3c5d Mon Sep 17 00:00:00 2001
From: Pantelis Antoniou <pantelis.antoniou@konsulko.com>
Date: Fri, 4 Jul 2014 19:58:49 +0300
Subject: of: Transactional DT support.

Introducing DT transactional support.

A DT transaction is a method which allows one to apply changes
in the live tree, in such a way that either the full set of changes
take effect, or the state of the tree can be rolled-back to the
state it was before it was attempted. An applied transaction
can be rolled-back at any time.

Documentation is in
	Documentation/devicetree/changesets.txt

Signed-off-by: Pantelis Antoniou <pantelis.antoniou@konsulko.com>
[glikely: Removed device notifiers and reworked to be more consistent]
Signed-off-by: Grant Likely <grant.likely@linaro.org>
---
 Documentation/devicetree/changesets.txt |  40 ++++
 drivers/of/dynamic.c                    | 344 ++++++++++++++++++++++++++++++++
 drivers/of/of_private.h                 |   9 +
 drivers/of/selftest.c                   |  51 +++++
 drivers/of/testcase-data/testcases.dtsi |  10 +
 include/linux/of.h                      |  76 +++++++
 6 files changed, 530 insertions(+)
 create mode 100644 Documentation/devicetree/changesets.txt

(limited to 'include/linux')

diff --git a/Documentation/devicetree/changesets.txt b/Documentation/devicetree/changesets.txt
new file mode 100644
index 000000000000..935ba5acc34e
--- /dev/null
+++ b/Documentation/devicetree/changesets.txt
@@ -0,0 +1,40 @@
+A DT changeset is a method which allows one to apply changes
+in the live tree in such a way that either the full set of changes
+will be applied, or none of them will be. If an error occurs partway
+through applying the changeset, then the tree will be rolled back to the
+previous state. A changeset can also be removed after it has been
+applied.
+
+When a changeset is applied, all of the changes get applied to the tree
+at once before emitting OF_RECONFIG notifiers. This is so that the
+receiver sees a complete and consistent state of the tree when it
+receives the notifier.
+
+The sequence of a changeset is as follows.
+
+1. of_changeset_init() - initializes a changeset
+
+2. A number of DT tree change calls, of_changeset_attach_node(),
+of_changeset_detach_node(), of_changeset_add_property(),
+of_changeset_remove_property, of_changeset_update_property() to prepare
+a set of changes. No changes to the active tree are made at this point.
+All the change operations are recorded in the of_changeset 'entries'
+list.
+
+3. mutex_lock(of_mutex) - starts a changeset; The global of_mutex
+ensures there can only be one editor at a time.
+
+4. of_changeset_apply() - Apply the changes to the tree. Either the
+entire changeset will get applied, or if there is an error the tree will
+be restored to the previous state
+
+5. mutex_unlock(of_mutex) - All operations complete, release the mutex
+
+If a successfully applied changeset needs to be removed, it can be done
+with the following sequence.
+
+1. mutex_lock(of_mutex)
+
+2. of_changeset_revert()
+
+3. mutex_unlock(of_mutex)
diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c
index 7bd5501736a6..c1002b7be786 100644
--- a/drivers/of/dynamic.c
+++ b/drivers/of/dynamic.c
@@ -314,3 +314,347 @@ struct device_node *__of_node_alloc(const char *full_name, gfp_t allocflags)
 	kfree(node);
 	return NULL;
 }
+
+static void __of_changeset_entry_destroy(struct of_changeset_entry *ce)
+{
+	of_node_put(ce->np);
+	list_del(&ce->node);
+	kfree(ce);
+}
+
+#ifdef DEBUG
+static void __of_changeset_entry_dump(struct of_changeset_entry *ce)
+{
+	switch (ce->action) {
+	case OF_RECONFIG_ADD_PROPERTY:
+		pr_debug("%p: %s %s/%s\n",
+			ce, "ADD_PROPERTY   ", ce->np->full_name,
+			ce->prop->name);
+		break;
+	case OF_RECONFIG_REMOVE_PROPERTY:
+		pr_debug("%p: %s %s/%s\n",
+			ce, "REMOVE_PROPERTY", ce->np->full_name,
+			ce->prop->name);
+		break;
+	case OF_RECONFIG_UPDATE_PROPERTY:
+		pr_debug("%p: %s %s/%s\n",
+			ce, "UPDATE_PROPERTY", ce->np->full_name,
+			ce->prop->name);
+		break;
+	case OF_RECONFIG_ATTACH_NODE:
+		pr_debug("%p: %s %s\n",
+			ce, "ATTACH_NODE    ", ce->np->full_name);
+		break;
+	case OF_RECONFIG_DETACH_NODE:
+		pr_debug("%p: %s %s\n",
+			ce, "DETACH_NODE    ", ce->np->full_name);
+		break;
+	}
+}
+#else
+static inline void __of_changeset_entry_dump(struct of_changeset_entry *ce)
+{
+	/* empty */
+}
+#endif
+
+static void __of_changeset_entry_invert(struct of_changeset_entry *ce,
+					  struct of_changeset_entry *rce)
+{
+	memcpy(rce, ce, sizeof(*rce));
+
+	switch (ce->action) {
+	case OF_RECONFIG_ATTACH_NODE:
+		rce->action = OF_RECONFIG_DETACH_NODE;
+		break;
+	case OF_RECONFIG_DETACH_NODE:
+		rce->action = OF_RECONFIG_ATTACH_NODE;
+		break;
+	case OF_RECONFIG_ADD_PROPERTY:
+		rce->action = OF_RECONFIG_REMOVE_PROPERTY;
+		break;
+	case OF_RECONFIG_REMOVE_PROPERTY:
+		rce->action = OF_RECONFIG_ADD_PROPERTY;
+		break;
+	case OF_RECONFIG_UPDATE_PROPERTY:
+		rce->old_prop = ce->prop;
+		rce->prop = ce->old_prop;
+		break;
+	}
+}
+
+static void __of_changeset_entry_notify(struct of_changeset_entry *ce, bool revert)
+{
+	struct of_changeset_entry ce_inverted;
+	int ret;
+
+	if (revert) {
+		__of_changeset_entry_invert(ce, &ce_inverted);
+		ce = &ce_inverted;
+	}
+
+	switch (ce->action) {
+	case OF_RECONFIG_ATTACH_NODE:
+	case OF_RECONFIG_DETACH_NODE:
+		ret = of_reconfig_notify(ce->action, ce->np);
+		break;
+	case OF_RECONFIG_ADD_PROPERTY:
+	case OF_RECONFIG_REMOVE_PROPERTY:
+	case OF_RECONFIG_UPDATE_PROPERTY:
+		ret = of_property_notify(ce->action, ce->np, ce->prop, ce->old_prop);
+		break;
+	default:
+		pr_err("%s: invalid devicetree changeset action: %i\n", __func__,
+			(int)ce->action);
+		return;
+	}
+
+	if (ret)
+		pr_err("%s: notifier error @%s\n", __func__, ce->np->full_name);
+}
+
+static int __of_changeset_entry_apply(struct of_changeset_entry *ce)
+{
+	struct property *old_prop, **propp;
+	unsigned long flags;
+	int ret = 0;
+
+	__of_changeset_entry_dump(ce);
+
+	raw_spin_lock_irqsave(&devtree_lock, flags);
+	switch (ce->action) {
+	case OF_RECONFIG_ATTACH_NODE:
+		__of_attach_node(ce->np);
+		break;
+	case OF_RECONFIG_DETACH_NODE:
+		__of_detach_node(ce->np);
+		break;
+	case OF_RECONFIG_ADD_PROPERTY:
+		/* If the property is in deadprops then it must be removed */
+		for (propp = &ce->np->deadprops; *propp; propp = &(*propp)->next) {
+			if (*propp == ce->prop) {
+				*propp = ce->prop->next;
+				ce->prop->next = NULL;
+				break;
+			}
+		}
+
+		ret = __of_add_property(ce->np, ce->prop);
+		if (ret) {
+			pr_err("%s: add_property failed @%s/%s\n",
+				__func__, ce->np->full_name,
+				ce->prop->name);
+			break;
+		}
+		break;
+	case OF_RECONFIG_REMOVE_PROPERTY:
+		ret = __of_remove_property(ce->np, ce->prop);
+		if (ret) {
+			pr_err("%s: remove_property failed @%s/%s\n",
+				__func__, ce->np->full_name,
+				ce->prop->name);
+			break;
+		}
+		break;
+
+	case OF_RECONFIG_UPDATE_PROPERTY:
+		/* If the property is in deadprops then it must be removed */
+		for (propp = &ce->np->deadprops; *propp; propp = &(*propp)->next) {
+			if (*propp == ce->prop) {
+				*propp = ce->prop->next;
+				ce->prop->next = NULL;
+				break;
+			}
+		}
+
+		ret = __of_update_property(ce->np, ce->prop, &old_prop);
+		if (ret) {
+			pr_err("%s: update_property failed @%s/%s\n",
+				__func__, ce->np->full_name,
+				ce->prop->name);
+			break;
+		}
+		break;
+	default:
+		ret = -EINVAL;
+	}
+	raw_spin_unlock_irqrestore(&devtree_lock, flags);
+
+	if (ret)
+		return ret;
+
+	switch (ce->action) {
+	case OF_RECONFIG_ATTACH_NODE:
+		__of_attach_node_sysfs(ce->np);
+		break;
+	case OF_RECONFIG_DETACH_NODE:
+		__of_detach_node_sysfs(ce->np);
+		break;
+	case OF_RECONFIG_ADD_PROPERTY:
+		/* ignore duplicate names */
+		__of_add_property_sysfs(ce->np, ce->prop);
+		break;
+	case OF_RECONFIG_REMOVE_PROPERTY:
+		__of_remove_property_sysfs(ce->np, ce->prop);
+		break;
+	case OF_RECONFIG_UPDATE_PROPERTY:
+		__of_update_property_sysfs(ce->np, ce->prop, ce->old_prop);
+		break;
+	}
+
+	return 0;
+}
+
+static inline int __of_changeset_entry_revert(struct of_changeset_entry *ce)
+{
+	struct of_changeset_entry ce_inverted;
+
+	__of_changeset_entry_invert(ce, &ce_inverted);
+	return __of_changeset_entry_apply(&ce_inverted);
+}
+
+/**
+ * of_changeset_init - Initialize a changeset for use
+ *
+ * @ocs:	changeset pointer
+ *
+ * Initialize a changeset structure
+ */
+void of_changeset_init(struct of_changeset *ocs)
+{
+	memset(ocs, 0, sizeof(*ocs));
+	INIT_LIST_HEAD(&ocs->entries);
+}
+
+/**
+ * of_changeset_destroy - Destroy a changeset
+ *
+ * @ocs:	changeset pointer
+ *
+ * Destroys a changeset. Note that if a changeset is applied,
+ * its changes to the tree cannot be reverted.
+ */
+void of_changeset_destroy(struct of_changeset *ocs)
+{
+	struct of_changeset_entry *ce, *cen;
+
+	list_for_each_entry_safe_reverse(ce, cen, &ocs->entries, node)
+		__of_changeset_entry_destroy(ce);
+}
+
+/**
+ * of_changeset_apply - Applies a changeset
+ *
+ * @ocs:	changeset pointer
+ *
+ * Applies a changeset to the live tree.
+ * Any side-effects of live tree state changes are applied here on
+ * sucess, like creation/destruction of devices and side-effects
+ * like creation of sysfs properties and directories.
+ * Returns 0 on success, a negative error value in case of an error.
+ * On error the partially applied effects are reverted.
+ */
+int of_changeset_apply(struct of_changeset *ocs)
+{
+	struct of_changeset_entry *ce;
+	int ret;
+
+	/* perform the rest of the work */
+	pr_debug("of_changeset: applying...\n");
+	list_for_each_entry(ce, &ocs->entries, node) {
+		ret = __of_changeset_entry_apply(ce);
+		if (ret) {
+			pr_err("%s: Error applying changeset (%d)\n", __func__, ret);
+			list_for_each_entry_continue_reverse(ce, &ocs->entries, node)
+				__of_changeset_entry_revert(ce);
+			return ret;
+		}
+	}
+	pr_debug("of_changeset: applied, emitting notifiers.\n");
+
+	/* drop the global lock while emitting notifiers */
+	mutex_unlock(&of_mutex);
+	list_for_each_entry(ce, &ocs->entries, node)
+		__of_changeset_entry_notify(ce, 0);
+	mutex_lock(&of_mutex);
+	pr_debug("of_changeset: notifiers sent.\n");
+
+	return 0;
+}
+
+/**
+ * of_changeset_revert - Reverts an applied changeset
+ *
+ * @ocs:	changeset pointer
+ *
+ * Reverts a changeset returning the state of the tree to what it
+ * was before the application.
+ * Any side-effects like creation/destruction of devices and
+ * removal of sysfs properties and directories are applied.
+ * Returns 0 on success, a negative error value in case of an error.
+ */
+int of_changeset_revert(struct of_changeset *ocs)
+{
+	struct of_changeset_entry *ce;
+	int ret;
+
+	pr_debug("of_changeset: reverting...\n");
+	list_for_each_entry_reverse(ce, &ocs->entries, node) {
+		ret = __of_changeset_entry_revert(ce);
+		if (ret) {
+			pr_err("%s: Error reverting changeset (%d)\n", __func__, ret);
+			list_for_each_entry_continue(ce, &ocs->entries, node)
+				__of_changeset_entry_apply(ce);
+			return ret;
+		}
+	}
+	pr_debug("of_changeset: reverted, emitting notifiers.\n");
+
+	/* drop the global lock while emitting notifiers */
+	mutex_unlock(&of_mutex);
+	list_for_each_entry_reverse(ce, &ocs->entries, node)
+		__of_changeset_entry_notify(ce, 1);
+	mutex_lock(&of_mutex);
+	pr_debug("of_changeset: notifiers sent.\n");
+
+	return 0;
+}
+
+/**
+ * of_changeset_action - Perform a changeset action
+ *
+ * @ocs:	changeset pointer
+ * @action:	action to perform
+ * @np:		Pointer to device node
+ * @prop:	Pointer to property
+ *
+ * On action being one of:
+ * + OF_RECONFIG_ATTACH_NODE
+ * + OF_RECONFIG_DETACH_NODE,
+ * + OF_RECONFIG_ADD_PROPERTY
+ * + OF_RECONFIG_REMOVE_PROPERTY,
+ * + OF_RECONFIG_UPDATE_PROPERTY
+ * Returns 0 on success, a negative error value in case of an error.
+ */
+int of_changeset_action(struct of_changeset *ocs, unsigned long action,
+		struct device_node *np, struct property *prop)
+{
+	struct of_changeset_entry *ce;
+
+	ce = kzalloc(sizeof(*ce), GFP_KERNEL);
+	if (!ce) {
+		pr_err("%s: Failed to allocate\n", __func__);
+		return -ENOMEM;
+	}
+	/* get a reference to the node */
+	ce->action = action;
+	ce->np = of_node_get(np);
+	ce->prop = prop;
+
+	if (action == OF_RECONFIG_UPDATE_PROPERTY && prop)
+		ce->old_prop = of_find_property(np, prop->name, NULL);
+
+	/* add it to the list */
+	list_add_tail(&ce->node, &ocs->entries);
+	return 0;
+}
diff --git a/drivers/of/of_private.h b/drivers/of/of_private.h
index f69ccb1fa308..858e0a5d9a11 100644
--- a/drivers/of/of_private.h
+++ b/drivers/of/of_private.h
@@ -81,4 +81,13 @@ extern int __of_attach_node_sysfs(struct device_node *np);
 extern void __of_detach_node(struct device_node *np);
 extern void __of_detach_node_sysfs(struct device_node *np);
 
+/* iterators for transactions, used for overlays */
+/* forward iterator */
+#define for_each_transaction_entry(_oft, _te) \
+	list_for_each_entry(_te, &(_oft)->te_list, node)
+
+/* reverse iterator */
+#define for_each_transaction_entry_reverse(_oft, _te) \
+	list_for_each_entry_reverse(_te, &(_oft)->te_list, node)
+
 #endif /* _LINUX_OF_PRIVATE_H */
diff --git a/drivers/of/selftest.c b/drivers/of/selftest.c
index ee2166f0f36a..04e39a183e53 100644
--- a/drivers/of/selftest.c
+++ b/drivers/of/selftest.c
@@ -293,6 +293,56 @@ static void __init of_selftest_property_copy(void)
 #endif
 }
 
+static void __init of_selftest_changeset(void)
+{
+#ifdef CONFIG_OF_DYNAMIC
+	struct property *ppadd, padd = { .name = "prop-add", .length = 0, .value = "" };
+	struct property *ppupdate, pupdate = { .name = "prop-update", .length = 5, .value = "abcd" };
+	struct property *ppremove;
+	struct device_node *n1, *n2, *n21, *nremove, *parent;
+	struct of_changeset chgset;
+
+	of_changeset_init(&chgset);
+	n1 = __of_node_alloc("/testcase-data/changeset/n1", GFP_KERNEL);
+	selftest(n1, "testcase setup failure\n");
+	n2 = __of_node_alloc("/testcase-data/changeset/n2", GFP_KERNEL);
+	selftest(n2, "testcase setup failure\n");
+	n21 = __of_node_alloc("/testcase-data/changeset/n2/n21", GFP_KERNEL);
+	selftest(n21, "testcase setup failure %p\n", n21);
+	nremove = of_find_node_by_path("/testcase-data/changeset/node-remove");
+	selftest(nremove, "testcase setup failure\n");
+	ppadd = __of_prop_dup(&padd, GFP_KERNEL);
+	selftest(ppadd, "testcase setup failure\n");
+	ppupdate = __of_prop_dup(&pupdate, GFP_KERNEL);
+	selftest(ppupdate, "testcase setup failure\n");
+	parent = nremove->parent;
+	n1->parent = parent;
+	n2->parent = parent;
+	n21->parent = n2;
+	n2->child = n21;
+	ppremove = of_find_property(parent, "prop-remove", NULL);
+	selftest(ppremove, "failed to find removal prop");
+
+	of_changeset_init(&chgset);
+	selftest(!of_changeset_attach_node(&chgset, n1), "fail attach n1\n");
+	selftest(!of_changeset_attach_node(&chgset, n2), "fail attach n2\n");
+	selftest(!of_changeset_detach_node(&chgset, nremove), "fail remove node\n");
+	selftest(!of_changeset_attach_node(&chgset, n21), "fail attach n21\n");
+	selftest(!of_changeset_add_property(&chgset, parent, ppadd), "fail add prop\n");
+	selftest(!of_changeset_update_property(&chgset, parent, ppupdate), "fail update prop\n");
+	selftest(!of_changeset_remove_property(&chgset, parent, ppremove), "fail remove prop\n");
+	mutex_lock(&of_mutex);
+	selftest(!of_changeset_apply(&chgset), "apply failed\n");
+	mutex_unlock(&of_mutex);
+
+	mutex_lock(&of_mutex);
+	selftest(!of_changeset_revert(&chgset), "revert failed\n");
+	mutex_unlock(&of_mutex);
+
+	of_changeset_destroy(&chgset);
+#endif
+}
+
 static void __init of_selftest_parse_interrupts(void)
 {
 	struct device_node *np;
@@ -561,6 +611,7 @@ static int __init of_selftest(void)
 	of_selftest_parse_phandle_with_args();
 	of_selftest_property_match_string();
 	of_selftest_property_copy();
+	of_selftest_changeset();
 	of_selftest_parse_interrupts();
 	of_selftest_parse_interrupts_extended();
 	of_selftest_match_node();
diff --git a/drivers/of/testcase-data/testcases.dtsi b/drivers/of/testcase-data/testcases.dtsi
index 6d8d980ac858..669bb07df142 100644
--- a/drivers/of/testcase-data/testcases.dtsi
+++ b/drivers/of/testcase-data/testcases.dtsi
@@ -1,3 +1,13 @@
+/ {
+	testcase-data {
+		changeset {
+			prop-update = "hello";
+			prop-remove = "world";
+			node-remove {
+			};
+		};
+	};
+};
 #include "tests-phandle.dtsi"
 #include "tests-interrupts.dtsi"
 #include "tests-match.dtsi"
diff --git a/include/linux/of.h b/include/linux/of.h
index 400f18cb4fff..bc91fbb13ce8 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -786,4 +786,80 @@ typedef void (*of_init_fn_1)(struct device_node *);
 #define OF_DECLARE_2(table, name, compat, fn) \
 		_OF_DECLARE(table, name, compat, fn, of_init_fn_2)
 
+/**
+ * struct of_changeset_entry	- Holds a changeset entry
+ *
+ * @node:	list_head for the log list
+ * @action:	notifier action
+ * @np:		pointer to the device node affected
+ * @prop:	pointer to the property affected
+ * @old_prop:	hold a pointer to the original property
+ *
+ * Every modification of the device tree during a changeset
+ * is held in a list of of_changeset_entry structures.
+ * That way we can recover from a partial application, or we can
+ * revert the changeset
+ */
+struct of_changeset_entry {
+	struct list_head node;
+	unsigned long action;
+	struct device_node *np;
+	struct property *prop;
+	struct property *old_prop;
+};
+
+/**
+ * struct of_changeset - changeset tracker structure
+ *
+ * @entries:	list_head for the changeset entries
+ *
+ * changesets are a convenient way to apply bulk changes to the
+ * live tree. In case of an error, changes are rolled-back.
+ * changesets live on after initial application, and if not
+ * destroyed after use, they can be reverted in one single call.
+ */
+struct of_changeset {
+	struct list_head entries;
+};
+
+#ifdef CONFIG_OF_DYNAMIC
+extern void of_changeset_init(struct of_changeset *ocs);
+extern void of_changeset_destroy(struct of_changeset *ocs);
+extern int of_changeset_apply(struct of_changeset *ocs);
+extern int of_changeset_revert(struct of_changeset *ocs);
+extern int of_changeset_action(struct of_changeset *ocs,
+		unsigned long action, struct device_node *np,
+		struct property *prop);
+
+static inline int of_changeset_attach_node(struct of_changeset *ocs,
+		struct device_node *np)
+{
+	return of_changeset_action(ocs, OF_RECONFIG_ATTACH_NODE, np, NULL);
+}
+
+static inline int of_changeset_detach_node(struct of_changeset *ocs,
+		struct device_node *np)
+{
+	return of_changeset_action(ocs, OF_RECONFIG_DETACH_NODE, np, NULL);
+}
+
+static inline int of_changeset_add_property(struct of_changeset *ocs,
+		struct device_node *np, struct property *prop)
+{
+	return of_changeset_action(ocs, OF_RECONFIG_ADD_PROPERTY, np, prop);
+}
+
+static inline int of_changeset_remove_property(struct of_changeset *ocs,
+		struct device_node *np, struct property *prop)
+{
+	return of_changeset_action(ocs, OF_RECONFIG_REMOVE_PROPERTY, np, prop);
+}
+
+static inline int of_changeset_update_property(struct of_changeset *ocs,
+		struct device_node *np, struct property *prop)
+{
+	return of_changeset_action(ocs, OF_RECONFIG_UPDATE_PROPERTY, np, prop);
+}
+#endif
+
 #endif /* _LINUX_OF_H */
-- 
cgit v1.2.3


From f7d4ad98fdd08932ffda2354c62e2e2ee059adcc Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Tue, 22 Jul 2014 08:01:01 -0700
Subject: gpiolib: Export gpiochip_request_own_desc and gpiochip_free_own_desc

Both functions were introduced to let gpio drivers request their own
gpio pins. Without exporting the functions, this can however only be
used by gpio drivers built into the kernel.

Secondary impact is that the functions can not currently be used by
platform initialization code associated with the gpio-pca953x driver.
This code permits auto-export of gpio pins through platform data, but
if this functionality is used, the module can no longer be unloaded due
to the problem solved with the introduction of gpiochip_request_own_desc
and gpiochip_free_own_desc.

Export both function so they can be used from modules and from
platform initialization code.

Reviewed-by: Alexandre Courbot <acourbot@nvidia.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 Documentation/gpio/driver.txt | 21 +++++++++++++++++++++
 drivers/gpio/gpiolib.c        |  2 ++
 drivers/gpio/gpiolib.h        |  3 ---
 include/linux/gpio/driver.h   |  3 +++
 4 files changed, 26 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/gpio/driver.txt b/Documentation/gpio/driver.txt
index 224dbbcd1804..18790c237977 100644
--- a/Documentation/gpio/driver.txt
+++ b/Documentation/gpio/driver.txt
@@ -167,3 +167,24 @@ is released:
 When implementing an irqchip inside a GPIO driver, these two functions should
 typically be called in the .startup() and .shutdown() callbacks from the
 irqchip.
+
+
+Requesting self-owned GPIO pins
+-------------------------------
+
+Sometimes it is useful to allow a GPIO chip driver to request its own GPIO
+descriptors through the gpiolib API. Using gpio_request() for this purpose
+does not help since it pins the module to the kernel forever (it calls
+try_module_get()). A GPIO driver can use the following functions instead
+to request and free descriptors without being pinned to the kernel forever.
+
+	int gpiochip_request_own_desc(struct gpio_desc *desc, const char *label)
+
+	void gpiochip_free_own_desc(struct gpio_desc *desc)
+
+Descriptors requested with gpiochip_request_own_desc() must be released with
+gpiochip_free_own_desc().
+
+These functions must be used with care since they do not affect module use
+count. Do not use the functions to request gpio descriptors not owned by the
+calling driver.
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 412d64e93cfb..768f0831db18 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -897,6 +897,7 @@ int gpiochip_request_own_desc(struct gpio_desc *desc, const char *label)
 
 	return __gpiod_request(desc, label);
 }
+EXPORT_SYMBOL_GPL(gpiochip_request_own_desc);
 
 /**
  * gpiochip_free_own_desc - Free GPIO requested by the chip driver
@@ -910,6 +911,7 @@ void gpiochip_free_own_desc(struct gpio_desc *desc)
 	if (desc)
 		__gpiod_free(desc);
 }
+EXPORT_SYMBOL_GPL(gpiochip_free_own_desc);
 
 /* Drivers MUST set GPIO direction before making get/set calls.  In
  * some cases this is done in early boot, before IRQs are enabled.
diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h
index acbb9335f08c..7fcb645ded4c 100644
--- a/drivers/gpio/gpiolib.h
+++ b/drivers/gpio/gpiolib.h
@@ -45,9 +45,6 @@ acpi_get_gpiod_by_index(struct device *dev, int index,
 }
 #endif
 
-int gpiochip_request_own_desc(struct gpio_desc *desc, const char *label);
-void gpiochip_free_own_desc(struct gpio_desc *desc);
-
 struct gpio_desc *of_get_named_gpiod_flags(struct device_node *np,
 		   const char *list_name, int index, enum of_gpio_flags *flags);
 
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index c66c91682d9e..4c463fb0155e 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -220,6 +220,9 @@ int gpiochip_irqchip_add(struct gpio_chip *gpiochip,
 
 #endif /* CONFIG_GPIO_IRQCHIP */
 
+int gpiochip_request_own_desc(struct gpio_desc *desc, const char *label);
+void gpiochip_free_own_desc(struct gpio_desc *desc);
+
 #else /* CONFIG_GPIOLIB */
 
 static inline struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc)
-- 
cgit v1.2.3


From f892afb07eeecf575179c4747952644a82a92a36 Mon Sep 17 00:00:00 2001
From: Nicolin Chen <nicoleotsuka@gmail.com>
Date: Mon, 16 Jun 2014 11:31:05 +0800
Subject: dmaengine: imx-sdma: Add a new DMATYPE for Shared Peripheral ASRC

Shared Peripheral ASRC, running on SPBA, needs to use shp sciprts for
DMA transfer. So this patch just adds a new DMATYPE for it.

Signed-off-by: Nicolin Chen <nicoleotsuka@gmail.com>
Acked-by: Shawn Guo <shawn.guo@linaro.org>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt | 1 +
 drivers/dma/imx-sdma.c                                 | 5 +++++
 include/linux/platform_data/dma-imx.h                  | 1 +
 3 files changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt b/Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt
index e577196a12c0..4659fd952301 100644
--- a/Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt
+++ b/Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt
@@ -47,6 +47,7 @@ The full ID of peripheral types can be found below.
 	20	ASRC
 	21	ESAI
 	22	SSI Dual FIFO	(needs firmware ver >= 2)
+	23	Shared ASRC
 
 The third cell specifies the transfer priority as below.
 
diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
index 8269c200b53b..de584e605db5 100644
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c
@@ -750,6 +750,11 @@ static void sdma_get_pc(struct sdma_channel *sdmac,
 		emi_2_per = sdma->script_addrs->asrc_2_mcu_addr;
 		per_2_per = sdma->script_addrs->per_2_per_addr;
 		break;
+	case IMX_DMATYPE_ASRC_SP:
+		per_2_emi = sdma->script_addrs->shp_2_mcu_addr;
+		emi_2_per = sdma->script_addrs->mcu_2_shp_addr;
+		per_2_per = sdma->script_addrs->per_2_per_addr;
+		break;
 	case IMX_DMATYPE_MSHC:
 		per_2_emi = sdma->script_addrs->mshc_2_mcu_addr;
 		emi_2_per = sdma->script_addrs->mcu_2_mshc_addr;
diff --git a/include/linux/platform_data/dma-imx.h b/include/linux/platform_data/dma-imx.h
index bcbc6c3c14c0..7aa0e89d1bcc 100644
--- a/include/linux/platform_data/dma-imx.h
+++ b/include/linux/platform_data/dma-imx.h
@@ -40,6 +40,7 @@ enum sdma_peripheral_type {
 	IMX_DMATYPE_ASRC,	/* ASRC */
 	IMX_DMATYPE_ESAI,	/* ESAI */
 	IMX_DMATYPE_SSI_DUAL,	/* SSI Dual FIFO */
+	IMX_DMATYPE_ASRC_SP,	/* Shared ASRC */
 };
 
 enum imx_dma_prio {
-- 
cgit v1.2.3


From 0c9dbebdb6611d2cd75d025ec09035c3e8ce2160 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Fri, 11 Jul 2014 18:18:26 +0200
Subject: dmaengine: Remove unused definition of DMA_MAX_COOKIE

As of commit commit f04cd40701deace2efb9edd7120e59366bda2118 ("fsldma: fix
controller lockups"), its last (and only ever) user is gone.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 include/linux/dmaengine.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index d2c5cc7c583c..4eb2f82aed1d 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -37,7 +37,6 @@
  */
 typedef s32 dma_cookie_t;
 #define DMA_MIN_COOKIE	1
-#define DMA_MAX_COOKIE	INT_MAX
 
 static inline int dma_submit_error(dma_cookie_t cookie)
 {
-- 
cgit v1.2.3


From a259f3896a39ec7cbcd5f630a6ec95bdcbc080d2 Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javier.martinez@collabora.co.uk>
Date: Thu, 24 Jul 2014 14:39:24 +0200
Subject: mfd: max77686: Add Maxim 77802 PMIC support

Maxim MAX77802 is a power management chip that contains 10 high
efficiency Buck regulators, 32 Low-dropout (LDO) regulators used
to power up application processors and peripherals, a 2-channel
32kHz clock outputs, a Real-Time-Clock (RTC) and a I2C interface
to program the individual regulators, clocks outputs and the RTC.

This patch adds support for MAX77802 to the MAX77686 driver and is
based on a driver added to the Chrome OS kernel 3.8 by Simon Glass.

Signed-off-by: Javier Martinez Canillas <javier.martinez@collabora.co.uk>
Reviewed-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/Kconfig                  |   6 +-
 drivers/mfd/max77686.c               | 197 ++++++++++++++++++++++++++++-----
 include/linux/mfd/max77686-private.h | 208 ++++++++++++++++++++++++++++++++++-
 include/linux/mfd/max77686.h         |  57 +++++++++-
 4 files changed, 436 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 30102042dcaf..de5abf244746 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -380,15 +380,15 @@ config MFD_MAX14577
 	  of the device.
 
 config MFD_MAX77686
-	bool "Maxim Semiconductor MAX77686 PMIC Support"
+	bool "Maxim Semiconductor MAX77686/802 PMIC Support"
 	depends on I2C=y
 	select MFD_CORE
 	select REGMAP_I2C
 	select REGMAP_IRQ
 	select IRQ_DOMAIN
 	help
-	  Say yes here to add support for Maxim Semiconductor MAX77686.
-	  This is a Power Management IC with RTC on chip.
+	  Say yes here to add support for Maxim Semiconductor MAX77686 and
+	  MAX77802 which are Power Management IC with an RTC on chip.
 	  This driver provides common support for accessing the device;
 	  additional drivers must be enabled in order to use the functionality
 	  of the device.
diff --git a/drivers/mfd/max77686.c b/drivers/mfd/max77686.c
index f2bd69915987..c65332291bb4 100644
--- a/drivers/mfd/max77686.c
+++ b/drivers/mfd/max77686.c
@@ -1,5 +1,5 @@
 /*
- * max77686.c - mfd core driver for the Maxim 77686
+ * max77686.c - mfd core driver for the Maxim 77686/802
  *
  * Copyright (C) 2012 Samsung Electronics
  * Chiwoong Byun <woong.byun@smasung.com>
@@ -43,6 +43,74 @@ static const struct mfd_cell max77686_devs[] = {
 	{ .name = "max77686-clk", },
 };
 
+static const struct mfd_cell max77802_devs[] = {
+	{ .name = "max77802-pmic", },
+	{ .name = "max77802-clk", },
+	{ .name = "max77802-rtc", },
+};
+
+static bool max77802_pmic_is_accessible_reg(struct device *dev,
+					    unsigned int reg)
+{
+	return (reg >= MAX77802_REG_DEVICE_ID && reg < MAX77802_REG_PMIC_END);
+}
+
+static bool max77802_rtc_is_accessible_reg(struct device *dev,
+					   unsigned int reg)
+{
+	return (reg >= MAX77802_RTC_INT && reg < MAX77802_RTC_END);
+}
+
+static bool max77802_is_accessible_reg(struct device *dev, unsigned int reg)
+{
+	return (max77802_pmic_is_accessible_reg(dev, reg) ||
+		max77802_rtc_is_accessible_reg(dev, reg));
+}
+
+static bool max77802_pmic_is_precious_reg(struct device *dev, unsigned int reg)
+{
+	return (reg == MAX77802_REG_INTSRC || reg == MAX77802_REG_INT1 ||
+		reg == MAX77802_REG_INT2);
+}
+
+static bool max77802_rtc_is_precious_reg(struct device *dev, unsigned int reg)
+{
+	return (reg == MAX77802_RTC_INT ||
+		reg == MAX77802_RTC_UPDATE0 ||
+		reg == MAX77802_RTC_UPDATE1);
+}
+
+static bool max77802_is_precious_reg(struct device *dev, unsigned int reg)
+{
+	return (max77802_pmic_is_precious_reg(dev, reg) ||
+		max77802_rtc_is_precious_reg(dev, reg));
+}
+
+static bool max77802_pmic_is_volatile_reg(struct device *dev, unsigned int reg)
+{
+	return (max77802_is_precious_reg(dev, reg) ||
+		reg == MAX77802_REG_STATUS1 || reg == MAX77802_REG_STATUS2 ||
+		reg == MAX77802_REG_PWRON);
+}
+
+static bool max77802_rtc_is_volatile_reg(struct device *dev, unsigned int reg)
+{
+	return (max77802_rtc_is_precious_reg(dev, reg) ||
+		reg == MAX77802_RTC_SEC ||
+		reg == MAX77802_RTC_MIN ||
+		reg == MAX77802_RTC_HOUR ||
+		reg == MAX77802_RTC_WEEKDAY ||
+		reg == MAX77802_RTC_MONTH ||
+		reg == MAX77802_RTC_YEAR ||
+		reg == MAX77802_RTC_DATE);
+}
+
+static bool max77802_is_volatile_reg(struct device *dev, unsigned int reg)
+{
+	return (max77802_pmic_is_volatile_reg(dev, reg) ||
+		max77802_rtc_is_volatile_reg(dev, reg));
+}
+
 static struct regmap_config max77686_regmap_config = {
 	.reg_bits = 8,
 	.val_bits = 8,
@@ -53,6 +121,17 @@ static struct regmap_config max77686_rtc_regmap_config = {
 	.val_bits = 8,
 };
 
+static struct regmap_config max77802_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+	.writeable_reg = max77802_is_accessible_reg,
+	.readable_reg = max77802_is_accessible_reg,
+	.precious_reg = max77802_is_precious_reg,
+	.volatile_reg = max77802_is_volatile_reg,
+	.name = "max77802-pmic",
+	.cache_type = REGCACHE_RBTREE,
+};
+
 static const struct regmap_irq max77686_irqs[] = {
 	/* INT1 interrupts */
 	{ .reg_offset = 0, .mask = MAX77686_INT1_PWRONF_MSK, },
@@ -96,9 +175,34 @@ static const struct regmap_irq_chip max77686_rtc_irq_chip = {
 	.num_irqs		= ARRAY_SIZE(max77686_rtc_irqs),
 };
 
+static const struct regmap_irq_chip max77802_irq_chip = {
+	.name			= "max77802-pmic",
+	.status_base		= MAX77802_REG_INT1,
+	.mask_base		= MAX77802_REG_INT1MSK,
+	.num_regs		= 2,
+	.irqs			= max77686_irqs, /* same masks as 77686 */
+	.num_irqs		= ARRAY_SIZE(max77686_irqs),
+};
+
+static const struct regmap_irq_chip max77802_rtc_irq_chip = {
+	.name			= "max77802-rtc",
+	.status_base		= MAX77802_RTC_INT,
+	.mask_base		= MAX77802_RTC_INTM,
+	.num_regs		= 1,
+	.irqs			= max77686_rtc_irqs, /* same masks as 77686 */
+	.num_irqs		= ARRAY_SIZE(max77686_rtc_irqs),
+};
+
 static const struct of_device_id max77686_pmic_dt_match[] = {
-	{.compatible = "maxim,max77686", .data = NULL},
-	{},
+	{
+		.compatible = "maxim,max77686",
+		.data = (void *)TYPE_MAX77686,
+	},
+	{
+		.compatible = "maxim,max77802",
+		.data = (void *)TYPE_MAX77802,
+	},
+	{ },
 };
 
 static struct max77686_platform_data *max77686_i2c_parse_dt_pdata(struct device
@@ -119,8 +223,15 @@ static int max77686_i2c_probe(struct i2c_client *i2c,
 {
 	struct max77686_dev *max77686 = NULL;
 	struct max77686_platform_data *pdata = dev_get_platdata(&i2c->dev);
+	const struct of_device_id *match;
 	unsigned int data;
 	int ret = 0;
+	const struct regmap_config *config;
+	const struct regmap_irq_chip *irq_chip;
+	const struct regmap_irq_chip *rtc_irq_chip;
+	struct regmap **rtc_regmap;
+	const struct mfd_cell *cells;
+	int n_devs;
 
 	if (IS_ENABLED(CONFIG_OF) && i2c->dev.of_node && !pdata)
 		pdata = max77686_i2c_parse_dt_pdata(&i2c->dev);
@@ -135,15 +246,40 @@ static int max77686_i2c_probe(struct i2c_client *i2c,
 	if (!max77686)
 		return -ENOMEM;
 
+	if (i2c->dev.of_node) {
+		match = of_match_node(max77686_pmic_dt_match, i2c->dev.of_node);
+		if (!match)
+			return -EINVAL;
+
+		max77686->type = (int)match->data;
+	} else {
+		max77686->type = id->driver_data;
+	}
+
 	i2c_set_clientdata(i2c, max77686);
 	max77686->dev = &i2c->dev;
 	max77686->i2c = i2c;
-	max77686->type = id->driver_data;
 
 	max77686->wakeup = pdata->wakeup;
 	max77686->irq = i2c->irq;
 
-	max77686->regmap = devm_regmap_init_i2c(i2c, &max77686_regmap_config);
+	if (max77686->type == TYPE_MAX77686) {
+		config = &max77686_regmap_config;
+		irq_chip = &max77686_irq_chip;
+		rtc_irq_chip = &max77686_rtc_irq_chip;
+		rtc_regmap = &max77686->rtc_regmap;
+		cells =  max77686_devs;
+		n_devs = ARRAY_SIZE(max77686_devs);
+	} else {
+		config = &max77802_regmap_config;
+		irq_chip = &max77802_irq_chip;
+		rtc_irq_chip = &max77802_rtc_irq_chip;
+		rtc_regmap = &max77686->regmap;
+		cells =  max77802_devs;
+		n_devs = ARRAY_SIZE(max77802_devs);
+	}
+
+	max77686->regmap = devm_regmap_init_i2c(i2c, config);
 	if (IS_ERR(max77686->regmap)) {
 		ret = PTR_ERR(max77686->regmap);
 		dev_err(max77686->dev, "Failed to allocate register map: %d\n",
@@ -158,41 +294,46 @@ static int max77686_i2c_probe(struct i2c_client *i2c,
 		return -ENODEV;
 	}
 
-	max77686->rtc = i2c_new_dummy(i2c->adapter, I2C_ADDR_RTC);
-	if (!max77686->rtc) {
-		dev_err(max77686->dev, "Failed to allocate I2C device for RTC\n");
-		return -ENODEV;
-	}
-	i2c_set_clientdata(max77686->rtc, max77686);
-
-	max77686->rtc_regmap = devm_regmap_init_i2c(max77686->rtc,
-						&max77686_rtc_regmap_config);
-	if (IS_ERR(max77686->rtc_regmap)) {
-		ret = PTR_ERR(max77686->rtc_regmap);
-		dev_err(max77686->dev, "failed to allocate RTC regmap: %d\n",
-			ret);
-		goto err_unregister_i2c;
+	if (max77686->type == TYPE_MAX77686) {
+		max77686->rtc = i2c_new_dummy(i2c->adapter, I2C_ADDR_RTC);
+		if (!max77686->rtc) {
+			dev_err(max77686->dev,
+				"Failed to allocate I2C device for RTC\n");
+			return -ENODEV;
+		}
+		i2c_set_clientdata(max77686->rtc, max77686);
+
+		max77686->rtc_regmap =
+			devm_regmap_init_i2c(max77686->rtc,
+					     &max77686_rtc_regmap_config);
+		if (IS_ERR(max77686->rtc_regmap)) {
+			ret = PTR_ERR(max77686->rtc_regmap);
+			dev_err(max77686->dev,
+				"failed to allocate RTC regmap: %d\n",
+				ret);
+			goto err_unregister_i2c;
+		}
 	}
 
 	ret = regmap_add_irq_chip(max77686->regmap, max77686->irq,
 				  IRQF_TRIGGER_FALLING | IRQF_ONESHOT |
-				  IRQF_SHARED, 0, &max77686_irq_chip,
+				  IRQF_SHARED, 0, irq_chip,
 				  &max77686->irq_data);
 	if (ret) {
 		dev_err(&i2c->dev, "failed to add PMIC irq chip: %d\n", ret);
 		goto err_unregister_i2c;
 	}
-	ret = regmap_add_irq_chip(max77686->rtc_regmap, max77686->irq,
+
+	ret = regmap_add_irq_chip(*rtc_regmap, max77686->irq,
 				  IRQF_TRIGGER_FALLING | IRQF_ONESHOT |
-				  IRQF_SHARED, 0, &max77686_rtc_irq_chip,
+				  IRQF_SHARED, 0, rtc_irq_chip,
 				  &max77686->rtc_irq_data);
 	if (ret) {
 		dev_err(&i2c->dev, "failed to add RTC irq chip: %d\n", ret);
 		goto err_del_irqc;
 	}
 
-	ret = mfd_add_devices(max77686->dev, -1, max77686_devs,
-			      ARRAY_SIZE(max77686_devs), NULL, 0, NULL);
+	ret = mfd_add_devices(max77686->dev, -1, cells, n_devs, NULL, 0, NULL);
 	if (ret < 0) {
 		dev_err(&i2c->dev, "failed to add MFD devices: %d\n", ret);
 		goto err_del_rtc_irqc;
@@ -205,7 +346,8 @@ err_del_rtc_irqc:
 err_del_irqc:
 	regmap_del_irq_chip(max77686->irq, max77686->irq_data);
 err_unregister_i2c:
-	i2c_unregister_device(max77686->rtc);
+	if (max77686->type == TYPE_MAX77686)
+		i2c_unregister_device(max77686->rtc);
 
 	return ret;
 }
@@ -219,7 +361,8 @@ static int max77686_i2c_remove(struct i2c_client *i2c)
 	regmap_del_irq_chip(max77686->irq, max77686->rtc_irq_data);
 	regmap_del_irq_chip(max77686->irq, max77686->irq_data);
 
-	i2c_unregister_device(max77686->rtc);
+	if (max77686->type == TYPE_MAX77686)
+		i2c_unregister_device(max77686->rtc);
 
 	return 0;
 }
@@ -294,6 +437,6 @@ static void __exit max77686_i2c_exit(void)
 }
 module_exit(max77686_i2c_exit);
 
-MODULE_DESCRIPTION("MAXIM 77686 multi-function core driver");
+MODULE_DESCRIPTION("MAXIM 77686/802 multi-function core driver");
 MODULE_AUTHOR("Chiwoong Byun <woong.byun@samsung.com>");
 MODULE_LICENSE("GPL");
diff --git a/include/linux/mfd/max77686-private.h b/include/linux/mfd/max77686-private.h
index 8e177806cba1..0d60b38e5b5c 100644
--- a/include/linux/mfd/max77686-private.h
+++ b/include/linux/mfd/max77686-private.h
@@ -1,5 +1,5 @@
 /*
- * max77686-private.h - Voltage regulator driver for the Maxim 77686
+ * max77686-private.h - Voltage regulator driver for the Maxim 77686/802
  *
  *  Copyright (C) 2012 Samsung Electrnoics
  *  Chiwoong Byun <woong.byun@samsung.com>
@@ -28,6 +28,7 @@
 
 #define MAX77686_REG_INVALID		(0xff)
 
+/* MAX77686 PMIC registers */
 enum max77686_pmic_reg {
 	MAX77686_REG_DEVICE_ID		= 0x00,
 	MAX77686_REG_INTSRC		= 0x01,
@@ -181,6 +182,210 @@ enum max77686_rtc_reg {
 	MAX77686_ALARM2_DATE		= 0x1B,
 };
 
+/* MAX77802 PMIC registers */
+enum max77802_pmic_reg {
+	MAX77802_REG_DEVICE_ID		= 0x00,
+	MAX77802_REG_INTSRC		= 0x01,
+	MAX77802_REG_INT1		= 0x02,
+	MAX77802_REG_INT2		= 0x03,
+
+	MAX77802_REG_INT1MSK		= 0x04,
+	MAX77802_REG_INT2MSK		= 0x05,
+
+	MAX77802_REG_STATUS1		= 0x06,
+	MAX77802_REG_STATUS2		= 0x07,
+
+	MAX77802_REG_PWRON		= 0x08,
+	/* Reserved: 0x09 */
+	MAX77802_REG_MRSTB		= 0x0A,
+	MAX77802_REG_EPWRHOLD		= 0x0B,
+	/* Reserved: 0x0C-0x0D */
+	MAX77802_REG_BOOSTCTRL		= 0x0E,
+	MAX77802_REG_BOOSTOUT		= 0x0F,
+
+	MAX77802_REG_BUCK1CTRL		= 0x10,
+	MAX77802_REG_BUCK1DVS1		= 0x11,
+	MAX77802_REG_BUCK1DVS2		= 0x12,
+	MAX77802_REG_BUCK1DVS3		= 0x13,
+	MAX77802_REG_BUCK1DVS4		= 0x14,
+	MAX77802_REG_BUCK1DVS5		= 0x15,
+	MAX77802_REG_BUCK1DVS6		= 0x16,
+	MAX77802_REG_BUCK1DVS7		= 0x17,
+	MAX77802_REG_BUCK1DVS8		= 0x18,
+	/* Reserved: 0x19 */
+	MAX77802_REG_BUCK2CTRL1		= 0x1A,
+	MAX77802_REG_BUCK2CTRL2		= 0x1B,
+	MAX77802_REG_BUCK2PHTRAN	= 0x1C,
+	MAX77802_REG_BUCK2DVS1		= 0x1D,
+	MAX77802_REG_BUCK2DVS2		= 0x1E,
+	MAX77802_REG_BUCK2DVS3		= 0x1F,
+	MAX77802_REG_BUCK2DVS4		= 0x20,
+	MAX77802_REG_BUCK2DVS5		= 0x21,
+	MAX77802_REG_BUCK2DVS6		= 0x22,
+	MAX77802_REG_BUCK2DVS7		= 0x23,
+	MAX77802_REG_BUCK2DVS8		= 0x24,
+	/* Reserved: 0x25-0x26 */
+	MAX77802_REG_BUCK3CTRL1		= 0x27,
+	MAX77802_REG_BUCK3DVS1		= 0x28,
+	MAX77802_REG_BUCK3DVS2		= 0x29,
+	MAX77802_REG_BUCK3DVS3		= 0x2A,
+	MAX77802_REG_BUCK3DVS4		= 0x2B,
+	MAX77802_REG_BUCK3DVS5		= 0x2C,
+	MAX77802_REG_BUCK3DVS6		= 0x2D,
+	MAX77802_REG_BUCK3DVS7		= 0x2E,
+	MAX77802_REG_BUCK3DVS8		= 0x2F,
+	/* Reserved: 0x30-0x36 */
+	MAX77802_REG_BUCK4CTRL1		= 0x37,
+	MAX77802_REG_BUCK4DVS1		= 0x38,
+	MAX77802_REG_BUCK4DVS2		= 0x39,
+	MAX77802_REG_BUCK4DVS3		= 0x3A,
+	MAX77802_REG_BUCK4DVS4		= 0x3B,
+	MAX77802_REG_BUCK4DVS5		= 0x3C,
+	MAX77802_REG_BUCK4DVS6		= 0x3D,
+	MAX77802_REG_BUCK4DVS7		= 0x3E,
+	MAX77802_REG_BUCK4DVS8		= 0x3F,
+	/* Reserved: 0x40 */
+	MAX77802_REG_BUCK5CTRL		= 0x41,
+	MAX77802_REG_BUCK5OUT		= 0x42,
+	/* Reserved: 0x43 */
+	MAX77802_REG_BUCK6CTRL		= 0x44,
+	MAX77802_REG_BUCK6DVS1		= 0x45,
+	MAX77802_REG_BUCK6DVS2		= 0x46,
+	MAX77802_REG_BUCK6DVS3		= 0x47,
+	MAX77802_REG_BUCK6DVS4		= 0x48,
+	MAX77802_REG_BUCK6DVS5		= 0x49,
+	MAX77802_REG_BUCK6DVS6		= 0x4A,
+	MAX77802_REG_BUCK6DVS7		= 0x4B,
+	MAX77802_REG_BUCK6DVS8		= 0x4C,
+	/* Reserved: 0x4D */
+	MAX77802_REG_BUCK7CTRL		= 0x4E,
+	MAX77802_REG_BUCK7OUT		= 0x4F,
+	/* Reserved: 0x50 */
+	MAX77802_REG_BUCK8CTRL		= 0x51,
+	MAX77802_REG_BUCK8OUT		= 0x52,
+	/* Reserved: 0x53 */
+	MAX77802_REG_BUCK9CTRL		= 0x54,
+	MAX77802_REG_BUCK9OUT		= 0x55,
+	/* Reserved: 0x56 */
+	MAX77802_REG_BUCK10CTRL		= 0x57,
+	MAX77802_REG_BUCK10OUT		= 0x58,
+
+	/* Reserved: 0x59-0x5F */
+
+	MAX77802_REG_LDO1CTRL1		= 0x60,
+	MAX77802_REG_LDO2CTRL1		= 0x61,
+	MAX77802_REG_LDO3CTRL1		= 0x62,
+	MAX77802_REG_LDO4CTRL1		= 0x63,
+	MAX77802_REG_LDO5CTRL1		= 0x64,
+	MAX77802_REG_LDO6CTRL1		= 0x65,
+	MAX77802_REG_LDO7CTRL1		= 0x66,
+	MAX77802_REG_LDO8CTRL1		= 0x67,
+	MAX77802_REG_LDO9CTRL1		= 0x68,
+	MAX77802_REG_LDO10CTRL1		= 0x69,
+	MAX77802_REG_LDO11CTRL1		= 0x6A,
+	MAX77802_REG_LDO12CTRL1		= 0x6B,
+	MAX77802_REG_LDO13CTRL1		= 0x6C,
+	MAX77802_REG_LDO14CTRL1		= 0x6D,
+	MAX77802_REG_LDO15CTRL1		= 0x6E,
+	/* Reserved: 0x6F */
+	MAX77802_REG_LDO17CTRL1		= 0x70,
+	MAX77802_REG_LDO18CTRL1		= 0x71,
+	MAX77802_REG_LDO19CTRL1		= 0x72,
+	MAX77802_REG_LDO20CTRL1		= 0x73,
+	MAX77802_REG_LDO21CTRL1		= 0x74,
+	MAX77802_REG_LDO22CTRL1		= 0x75,
+	MAX77802_REG_LDO23CTRL1		= 0x76,
+	MAX77802_REG_LDO24CTRL1		= 0x77,
+	MAX77802_REG_LDO25CTRL1		= 0x78,
+	MAX77802_REG_LDO26CTRL1		= 0x79,
+	MAX77802_REG_LDO27CTRL1		= 0x7A,
+	MAX77802_REG_LDO28CTRL1		= 0x7B,
+	MAX77802_REG_LDO29CTRL1		= 0x7C,
+	MAX77802_REG_LDO30CTRL1		= 0x7D,
+	/* Reserved: 0x7E */
+	MAX77802_REG_LDO32CTRL1		= 0x7F,
+	MAX77802_REG_LDO33CTRL1		= 0x80,
+	MAX77802_REG_LDO34CTRL1		= 0x81,
+	MAX77802_REG_LDO35CTRL1		= 0x82,
+	/* Reserved: 0x83-0x8F */
+	MAX77802_REG_LDO1CTRL2		= 0x90,
+	MAX77802_REG_LDO2CTRL2		= 0x91,
+	MAX77802_REG_LDO3CTRL2		= 0x92,
+	MAX77802_REG_LDO4CTRL2		= 0x93,
+	MAX77802_REG_LDO5CTRL2		= 0x94,
+	MAX77802_REG_LDO6CTRL2		= 0x95,
+	MAX77802_REG_LDO7CTRL2		= 0x96,
+	MAX77802_REG_LDO8CTRL2		= 0x97,
+	MAX77802_REG_LDO9CTRL2		= 0x98,
+	MAX77802_REG_LDO10CTRL2		= 0x99,
+	MAX77802_REG_LDO11CTRL2		= 0x9A,
+	MAX77802_REG_LDO12CTRL2		= 0x9B,
+	MAX77802_REG_LDO13CTRL2		= 0x9C,
+	MAX77802_REG_LDO14CTRL2		= 0x9D,
+	MAX77802_REG_LDO15CTRL2		= 0x9E,
+	/* Reserved: 0x9F */
+	MAX77802_REG_LDO17CTRL2		= 0xA0,
+	MAX77802_REG_LDO18CTRL2		= 0xA1,
+	MAX77802_REG_LDO19CTRL2		= 0xA2,
+	MAX77802_REG_LDO20CTRL2		= 0xA3,
+	MAX77802_REG_LDO21CTRL2		= 0xA4,
+	MAX77802_REG_LDO22CTRL2		= 0xA5,
+	MAX77802_REG_LDO23CTRL2		= 0xA6,
+	MAX77802_REG_LDO24CTRL2		= 0xA7,
+	MAX77802_REG_LDO25CTRL2		= 0xA8,
+	MAX77802_REG_LDO26CTRL2		= 0xA9,
+	MAX77802_REG_LDO27CTRL2		= 0xAA,
+	MAX77802_REG_LDO28CTRL2		= 0xAB,
+	MAX77802_REG_LDO29CTRL2		= 0xAC,
+	MAX77802_REG_LDO30CTRL2		= 0xAD,
+	/* Reserved: 0xAE */
+	MAX77802_REG_LDO32CTRL2		= 0xAF,
+	MAX77802_REG_LDO33CTRL2		= 0xB0,
+	MAX77802_REG_LDO34CTRL2		= 0xB1,
+	MAX77802_REG_LDO35CTRL2		= 0xB2,
+	/* Reserved: 0xB3 */
+
+	MAX77802_REG_BBAT_CHG		= 0xB4,
+	MAX77802_REG_32KHZ		= 0xB5,
+
+	MAX77802_REG_PMIC_END		= 0xB6,
+};
+
+enum max77802_rtc_reg {
+	MAX77802_RTC_INT		= 0xC0,
+	MAX77802_RTC_INTM		= 0xC1,
+	MAX77802_RTC_CONTROLM		= 0xC2,
+	MAX77802_RTC_CONTROL		= 0xC3,
+	MAX77802_RTC_UPDATE0		= 0xC4,
+	MAX77802_RTC_UPDATE1		= 0xC5,
+	MAX77802_WTSR_SMPL_CNTL		= 0xC6,
+	MAX77802_RTC_SEC		= 0xC7,
+	MAX77802_RTC_MIN		= 0xC8,
+	MAX77802_RTC_HOUR		= 0xC9,
+	MAX77802_RTC_WEEKDAY		= 0xCA,
+	MAX77802_RTC_MONTH		= 0xCB,
+	MAX77802_RTC_YEAR		= 0xCC,
+	MAX77802_RTC_DATE		= 0xCD,
+	MAX77802_RTC_AE1		= 0xCE,
+	MAX77802_ALARM1_SEC		= 0xCF,
+	MAX77802_ALARM1_MIN		= 0xD0,
+	MAX77802_ALARM1_HOUR		= 0xD1,
+	MAX77802_ALARM1_WEEKDAY		= 0xD2,
+	MAX77802_ALARM1_MONTH		= 0xD3,
+	MAX77802_ALARM1_YEAR		= 0xD4,
+	MAX77802_ALARM1_DATE		= 0xD5,
+	MAX77802_RTC_AE2		= 0xD6,
+	MAX77802_ALARM2_SEC		= 0xD7,
+	MAX77802_ALARM2_MIN		= 0xD8,
+	MAX77802_ALARM2_HOUR		= 0xD9,
+	MAX77802_ALARM2_WEEKDAY		= 0xDA,
+	MAX77802_ALARM2_MONTH		= 0xDB,
+	MAX77802_ALARM2_YEAR		= 0xDC,
+	MAX77802_ALARM2_DATE		= 0xDD,
+
+	MAX77802_RTC_END		= 0xDF,
+};
+
 enum max77686_irq_source {
 	PMIC_INT1 = 0,
 	PMIC_INT2,
@@ -250,6 +455,7 @@ struct max77686_dev {
 
 enum max77686_types {
 	TYPE_MAX77686,
+	TYPE_MAX77802,
 };
 
 extern int max77686_irq_init(struct max77686_dev *max77686);
diff --git a/include/linux/mfd/max77686.h b/include/linux/mfd/max77686.h
index 4cbcc13e8a2a..7e6dc4b2b795 100644
--- a/include/linux/mfd/max77686.h
+++ b/include/linux/mfd/max77686.h
@@ -1,5 +1,5 @@
 /*
- * max77686.h - Driver for the Maxim 77686
+ * max77686.h - Driver for the Maxim 77686/802
  *
  *  Copyright (C) 2012 Samsung Electrnoics
  *  Chiwoong Byun <woong.byun@samsung.com>
@@ -71,6 +71,54 @@ enum max77686_regulators {
 	MAX77686_REG_MAX,
 };
 
+/* MAX77802 regulator IDs */
+enum max77802_regulators {
+	MAX77802_BUCK1 = 0,
+	MAX77802_BUCK2,
+	MAX77802_BUCK3,
+	MAX77802_BUCK4,
+	MAX77802_BUCK5,
+	MAX77802_BUCK6,
+	MAX77802_BUCK7,
+	MAX77802_BUCK8,
+	MAX77802_BUCK9,
+	MAX77802_BUCK10,
+	MAX77802_LDO1,
+	MAX77802_LDO2,
+	MAX77802_LDO3,
+	MAX77802_LDO4,
+	MAX77802_LDO5,
+	MAX77802_LDO6,
+	MAX77802_LDO7,
+	MAX77802_LDO8,
+	MAX77802_LDO9,
+	MAX77802_LDO10,
+	MAX77802_LDO11,
+	MAX77802_LDO12,
+	MAX77802_LDO13,
+	MAX77802_LDO14,
+	MAX77802_LDO15,
+	MAX77802_LDO17,
+	MAX77802_LDO18,
+	MAX77802_LDO19,
+	MAX77802_LDO20,
+	MAX77802_LDO21,
+	MAX77802_LDO23,
+	MAX77802_LDO24,
+	MAX77802_LDO25,
+	MAX77802_LDO26,
+	MAX77802_LDO27,
+	MAX77802_LDO28,
+	MAX77802_LDO29,
+	MAX77802_LDO30,
+	MAX77802_LDO32,
+	MAX77802_LDO33,
+	MAX77802_LDO34,
+	MAX77802_LDO35,
+
+	MAX77802_REG_MAX,
+};
+
 struct max77686_regulator_data {
 	int id;
 	struct regulator_init_data *initdata;
@@ -83,6 +131,13 @@ enum max77686_opmode {
 	MAX77686_OPMODE_STANDBY,
 };
 
+enum max77802_opmode {
+	MAX77802_OPMODE_OFF,
+	MAX77802_OPMODE_STANDBY,
+	MAX77802_OPMODE_LP,
+	MAX77802_OPMODE_NORMAL,
+};
+
 struct max77686_opmode_data {
 	int id;
 	int mode;
-- 
cgit v1.2.3


From ec8bd56699cb4371994437583a285b855b6f5e3a Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Thu, 24 Jul 2014 17:07:16 +0100
Subject: mfd: max77686: Ensure device type IDs are architecture agnostic
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Extinguishes:

../drivers/mfd/max77686.c: In function ‘max77686_i2c_probe’:
../drivers/mfd/max77686.c:254:20:
	warning: cast from pointer to integer of different size

Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/max77686.c               | 5 ++---
 include/linux/mfd/max77686-private.h | 2 +-
 2 files changed, 3 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/max77686.c b/drivers/mfd/max77686.c
index c65332291bb4..86e552348db4 100644
--- a/drivers/mfd/max77686.c
+++ b/drivers/mfd/max77686.c
@@ -251,10 +251,9 @@ static int max77686_i2c_probe(struct i2c_client *i2c,
 		if (!match)
 			return -EINVAL;
 
-		max77686->type = (int)match->data;
-	} else {
+		max77686->type = (unsigned long)match->data;
+	} else
 		max77686->type = id->driver_data;
-	}
 
 	i2c_set_clientdata(i2c, max77686);
 	max77686->dev = &i2c->dev;
diff --git a/include/linux/mfd/max77686-private.h b/include/linux/mfd/max77686-private.h
index 0d60b38e5b5c..960b92ad450d 100644
--- a/include/linux/mfd/max77686-private.h
+++ b/include/linux/mfd/max77686-private.h
@@ -439,7 +439,7 @@ struct max77686_dev {
 	struct i2c_client *i2c; /* 0xcc / PMIC, Battery Control, and FLASH */
 	struct i2c_client *rtc; /* slave addr 0x0c */
 
-	int type;
+	unsigned long type;
 
 	struct regmap *regmap;		/* regmap for mfd */
 	struct regmap *rtc_regmap;	/* regmap for rtc */
-- 
cgit v1.2.3


From 16369efb1f6006ec79babe53f388eed431533596 Mon Sep 17 00:00:00 2001
From: Alexander Popov <a13xp0p0v88@gmail.com>
Date: Wed, 25 Jun 2014 14:52:59 +0400
Subject: dmaengine: of: add common xlate function for matching by channel id

This patch adds a new common OF dma xlate callback function which will match a
channel by it's id. The binding expects one integer argument which it will use to
lookup the channel by the id.

Unlike of_dma_simple_xlate this function is able to handle a system with
multiple DMA controllers. When registering the of dma provider with
of_dma_controller_register a pointer to the dma_device struct which is
associated with the dt node needs to passed as the data parameter.
New function will use this pointer to match only channels which belong to the
specified DMA controller.

Signed-off-by: Alexander Popov <a13xp0p0v88@gmail.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/of-dma.c   | 35 +++++++++++++++++++++++++++++++++++
 include/linux/of_dma.h |  4 ++++
 2 files changed, 39 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/dma/of-dma.c b/drivers/dma/of-dma.c
index e8fe9dc455f4..d5fbeaa1e7ba 100644
--- a/drivers/dma/of-dma.c
+++ b/drivers/dma/of-dma.c
@@ -218,3 +218,38 @@ struct dma_chan *of_dma_simple_xlate(struct of_phandle_args *dma_spec,
 			&dma_spec->args[0]);
 }
 EXPORT_SYMBOL_GPL(of_dma_simple_xlate);
+
+/**
+ * of_dma_xlate_by_chan_id - Translate dt property to DMA channel by channel id
+ * @dma_spec:	pointer to DMA specifier as found in the device tree
+ * @of_dma:	pointer to DMA controller data
+ *
+ * This function can be used as the of xlate callback for DMA driver which wants
+ * to match the channel based on the channel id. When using this xlate function
+ * the #dma-cells propety of the DMA controller dt node needs to be set to 1.
+ * The data parameter of of_dma_controller_register must be a pointer to the
+ * dma_device struct the function should match upon.
+ *
+ * Returns pointer to appropriate dma channel on success or NULL on error.
+ */
+struct dma_chan *of_dma_xlate_by_chan_id(struct of_phandle_args *dma_spec,
+					 struct of_dma *ofdma)
+{
+	struct dma_device *dev = ofdma->of_dma_data;
+	struct dma_chan *chan, *candidate = NULL;
+
+	if (!dev || dma_spec->args_count != 1)
+		return NULL;
+
+	list_for_each_entry(chan, &dev->channels, device_node)
+		if (chan->chan_id == dma_spec->args[0]) {
+			candidate = chan;
+			break;
+		}
+
+	if (!candidate)
+		return NULL;
+
+	return dma_get_slave_channel(candidate);
+}
+EXPORT_SYMBOL_GPL(of_dma_xlate_by_chan_id);
diff --git a/include/linux/of_dma.h b/include/linux/of_dma.h
index ae36298ba076..56bc026c143f 100644
--- a/include/linux/of_dma.h
+++ b/include/linux/of_dma.h
@@ -41,6 +41,8 @@ extern struct dma_chan *of_dma_request_slave_channel(struct device_node *np,
 						     const char *name);
 extern struct dma_chan *of_dma_simple_xlate(struct of_phandle_args *dma_spec,
 		struct of_dma *ofdma);
+extern struct dma_chan *of_dma_xlate_by_chan_id(struct of_phandle_args *dma_spec,
+		struct of_dma *ofdma);
 #else
 static inline int of_dma_controller_register(struct device_node *np,
 		struct dma_chan *(*of_dma_xlate)
@@ -66,6 +68,8 @@ static inline struct dma_chan *of_dma_simple_xlate(struct of_phandle_args *dma_s
 	return NULL;
 }
 
+#define of_dma_xlate_by_chan_id NULL
+
 #endif
 
 #endif /* __LINUX_OF_DMA_H */
-- 
cgit v1.2.3


From 29b4739134c73a2873adec93346f09bb76d6a794 Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Date: Thu, 24 Jul 2014 12:52:23 -0700
Subject: Input: wacom - switch from an USB driver to a HID driver

All USB Wacom tablets are actually HID devices.
For historical reasons, they are handled as plain USB devices.
The current code makes more and more reference to the HID subsystem
like implementing its own HID report descriptor parser to handle new
devices.

From the user point of view, we can transparently switch from this state
to a driver handled in the HID subsystem and clean up a lot of USB specific
code in the wacom.ko driver.

The other benefit once the USB dependecies have been removed is that we can
use a tool like uhid to make regression tests and allow further cleanup or
new implementations without risking breaking current behaviors.

To match the current handling of devices in wacom_wac.c, we rely on the
hid_type set by usbhid. usbhid sets the hid_type to HID_TYPE_USBMOUSE when
it sees a USB boot mouse protocol declared and HID_TYPE_USBNONE when the
device is plain HID. There is thus a one to one matching between the list
of supported devices before and after the switch from USB to HID.

Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Reviewed-by: Jason Gerecke <killertofu@gmail.com>
Tested-by: Jason Gerecke <killertofu@gmail.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/hid/hid-core.c           |  10 +-
 drivers/hid/hid-wacom.c          |   2 +-
 drivers/input/tablet/wacom.h     |   6 +-
 drivers/input/tablet/wacom_sys.c | 222 ++++++++++++++-------------------------
 drivers/input/tablet/wacom_wac.c |  80 +++++++-------
 drivers/input/tablet/wacom_wac.h |   4 +-
 include/linux/hid.h              |   5 +
 7 files changed, 141 insertions(+), 188 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 8ed66fd1ea87..1ce751db5a7a 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -787,6 +787,15 @@ static int hid_scan_report(struct hid_device *hid)
 		/* hid-rmi should take care of them, not hid-generic */
 		hid->group = HID_GROUP_RMI;
 
+	/*
+	 * Vendor specific handlings
+	 */
+	switch (hid->vendor) {
+	case USB_VENDOR_ID_WACOM:
+		hid->group = HID_GROUP_WACOM;
+		break;
+	}
+
 	vfree(parser);
 	return 0;
 }
@@ -2339,7 +2348,6 @@ static const struct hid_device_id hid_ignore_list[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_VERNIER, USB_DEVICE_ID_VERNIER_SKIP) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_VERNIER, USB_DEVICE_ID_VERNIER_CYCLOPS) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_VERNIER, USB_DEVICE_ID_VERNIER_LCSPEC) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_WACOM, HID_ANY_ID) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_4_PHIDGETSERVO_20) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_1_PHIDGETSERVO_20) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_8_8_4_IF_KIT) },
diff --git a/drivers/hid/hid-wacom.c b/drivers/hid/hid-wacom.c
index 902013ec041b..4874f4ec43f5 100644
--- a/drivers/hid/hid-wacom.c
+++ b/drivers/hid/hid-wacom.c
@@ -960,7 +960,7 @@ static const struct hid_device_id wacom_devices[] = {
 MODULE_DEVICE_TABLE(hid, wacom_devices);
 
 static struct hid_driver wacom_driver = {
-	.name = "wacom",
+	.name = "hid-wacom",
 	.id_table = wacom_devices,
 	.probe = wacom_probe,
 	.remove = wacom_remove,
diff --git a/drivers/input/tablet/wacom.h b/drivers/input/tablet/wacom.h
index caa59cab2c5f..b9212390db71 100644
--- a/drivers/input/tablet/wacom.h
+++ b/drivers/input/tablet/wacom.h
@@ -106,14 +106,12 @@ MODULE_LICENSE(DRIVER_LICENSE);
 #define USB_VENDOR_ID_LENOVO	0x17ef
 
 struct wacom {
-	dma_addr_t data_dma;
 	struct usb_device *usbdev;
 	struct usb_interface *intf;
-	struct urb *irq;
 	struct wacom_wac wacom_wac;
+	struct hid_device *hdev;
 	struct mutex lock;
 	struct work_struct work;
-	bool open;
 	char phys[32];
 	struct wacom_led {
 		u8 select[2]; /* status led selector (0..3) */
@@ -130,7 +128,7 @@ static inline void wacom_schedule_work(struct wacom_wac *wacom_wac)
 	schedule_work(&wacom->work);
 }
 
-extern const struct usb_device_id wacom_ids[];
+extern const struct hid_device_id wacom_ids[];
 
 void wacom_wac_irq(struct wacom_wac *wacom_wac, size_t len);
 void wacom_setup_device_quirks(struct wacom_features *features);
diff --git a/drivers/input/tablet/wacom_sys.c b/drivers/input/tablet/wacom_sys.c
index c34791e7f0b9..5ceeab6e95f1 100644
--- a/drivers/input/tablet/wacom_sys.c
+++ b/drivers/input/tablet/wacom_sys.c
@@ -83,86 +83,40 @@ static int wacom_set_report(struct usb_interface *intf, u8 type, u8 id,
 	return retval;
 }
 
-static void wacom_sys_irq(struct urb *urb)
+static int wacom_raw_event(struct hid_device *hdev, struct hid_report *report,
+		u8 *raw_data, int size)
 {
-	struct wacom *wacom = urb->context;
-	struct device *dev = &wacom->intf->dev;
-	int retval;
+	struct wacom *wacom = hid_get_drvdata(hdev);
 
-	switch (urb->status) {
-	case 0:
-		/* success */
-		break;
-	case -ECONNRESET:
-	case -ENOENT:
-	case -ESHUTDOWN:
-		/* this urb is terminated, clean up */
-		dev_dbg(dev, "%s - urb shutting down with status: %d\n",
-			__func__, urb->status);
-		return;
-	default:
-		dev_dbg(dev, "%s - nonzero urb status received: %d\n",
-			__func__, urb->status);
-		goto exit;
-	}
+	if (size > WACOM_PKGLEN_MAX)
+		return 1;
 
-	wacom_wac_irq(&wacom->wacom_wac, urb->actual_length);
+	memcpy(wacom->wacom_wac.data, raw_data, size);
 
- exit:
-	usb_mark_last_busy(wacom->usbdev);
-	retval = usb_submit_urb(urb, GFP_ATOMIC);
-	if (retval)
-		dev_err(dev, "%s - usb_submit_urb failed with result %d\n",
-			__func__, retval);
+	wacom_wac_irq(&wacom->wacom_wac, size);
+
+	return 0;
 }
 
 static int wacom_open(struct input_dev *dev)
 {
 	struct wacom *wacom = input_get_drvdata(dev);
-	int retval = 0;
-
-	if (usb_autopm_get_interface(wacom->intf) < 0)
-		return -EIO;
+	int retval;
 
 	mutex_lock(&wacom->lock);
-
-	if (wacom->open)
-		goto out;
-
-	if (usb_submit_urb(wacom->irq, GFP_KERNEL)) {
-		retval = -EIO;
-		goto out;
-	}
-
-	wacom->open = true;
-	wacom->intf->needs_remote_wakeup = 1;
-
-out:
+	retval = hid_hw_open(wacom->hdev);
 	mutex_unlock(&wacom->lock);
-	usb_autopm_put_interface(wacom->intf);
+
 	return retval;
 }
 
 static void wacom_close(struct input_dev *dev)
 {
 	struct wacom *wacom = input_get_drvdata(dev);
-	int autopm_error;
-
-	autopm_error = usb_autopm_get_interface(wacom->intf);
 
 	mutex_lock(&wacom->lock);
-	if (!wacom->open)
-		goto out;
-
-	usb_kill_urb(wacom->irq);
-	wacom->open = false;
-	wacom->intf->needs_remote_wakeup = 0;
-
-out:
+	hid_hw_close(wacom->hdev);
 	mutex_unlock(&wacom->lock);
-
-	if (!autopm_error)
-		usb_autopm_put_interface(wacom->intf);
 }
 
 /*
@@ -807,7 +761,8 @@ out:
 static ssize_t wacom_led_select_store(struct device *dev, int set_id,
 				      const char *buf, size_t count)
 {
-	struct wacom *wacom = dev_get_drvdata(dev);
+	struct hid_device *hdev = dev_get_drvdata(dev);
+	struct wacom *wacom = hid_get_drvdata(hdev);
 	unsigned int id;
 	int err;
 
@@ -834,7 +789,8 @@ static ssize_t wacom_led##SET_ID##_select_store(struct device *dev,	\
 static ssize_t wacom_led##SET_ID##_select_show(struct device *dev,	\
 	struct device_attribute *attr, char *buf)			\
 {									\
-	struct wacom *wacom = dev_get_drvdata(dev);			\
+	struct hid_device *hdev = dev_get_drvdata(dev);			\
+	struct wacom *wacom = hid_get_drvdata(hdev);			\
 	return snprintf(buf, 2, "%d\n", wacom->led.select[SET_ID]);	\
 }									\
 static DEVICE_ATTR(status_led##SET_ID##_select, S_IWUSR | S_IRUSR,	\
@@ -868,7 +824,8 @@ static ssize_t wacom_luminance_store(struct wacom *wacom, u8 *dest,
 static ssize_t wacom_##name##_luminance_store(struct device *dev,	\
 	struct device_attribute *attr, const char *buf, size_t count)	\
 {									\
-	struct wacom *wacom = dev_get_drvdata(dev);			\
+	struct hid_device *hdev = dev_get_drvdata(dev);			\
+	struct wacom *wacom = hid_get_drvdata(hdev);			\
 									\
 	return wacom_luminance_store(wacom, &wacom->led.field,		\
 				     buf, count);			\
@@ -883,7 +840,8 @@ DEVICE_LUMINANCE_ATTR(buttons, img_lum);
 static ssize_t wacom_button_image_store(struct device *dev, int button_id,
 					const char *buf, size_t count)
 {
-	struct wacom *wacom = dev_get_drvdata(dev);
+	struct hid_device *hdev = dev_get_drvdata(dev);
+	struct wacom *wacom = hid_get_drvdata(hdev);
 	int err;
 
 	if (count != 1024)
@@ -1201,6 +1159,7 @@ static void wacom_wireless_work(struct work_struct *work)
 	struct wacom *wacom = container_of(work, struct wacom, work);
 	struct usb_device *usbdev = wacom->usbdev;
 	struct wacom_wac *wacom_wac = &wacom->wacom_wac;
+	struct hid_device *hdev1, *hdev2;
 	struct wacom *wacom1, *wacom2;
 	struct wacom_wac *wacom_wac1, *wacom_wac2;
 	int error;
@@ -1213,32 +1172,34 @@ static void wacom_wireless_work(struct work_struct *work)
 	wacom_destroy_battery(wacom);
 
 	/* Stylus interface */
-	wacom1 = usb_get_intfdata(usbdev->config->interface[1]);
+	hdev1 = usb_get_intfdata(usbdev->config->interface[1]);
+	wacom1 = hid_get_drvdata(hdev1);
 	wacom_wac1 = &(wacom1->wacom_wac);
 	wacom_unregister_inputs(wacom1);
 
 	/* Touch interface */
-	wacom2 = usb_get_intfdata(usbdev->config->interface[2]);
+	hdev2 = usb_get_intfdata(usbdev->config->interface[2]);
+	wacom2 = hid_get_drvdata(hdev2);
 	wacom_wac2 = &(wacom2->wacom_wac);
 	wacom_unregister_inputs(wacom2);
 
 	if (wacom_wac->pid == 0) {
 		dev_info(&wacom->intf->dev, "wireless tablet disconnected\n");
 	} else {
-		const struct usb_device_id *id = wacom_ids;
+		const struct hid_device_id *id = wacom_ids;
 
 		dev_info(&wacom->intf->dev,
 			 "wireless tablet connected with PID %x\n",
 			 wacom_wac->pid);
 
-		while (id->match_flags) {
-			if (id->idVendor == USB_VENDOR_ID_WACOM &&
-			    id->idProduct == wacom_wac->pid)
+		while (id->bus) {
+			if (id->vendor == USB_VENDOR_ID_WACOM &&
+			    id->product == wacom_wac->pid)
 				break;
 			id++;
 		}
 
-		if (!id->match_flags) {
+		if (!id->bus) {
 			dev_info(&wacom->intf->dev,
 				 "ignoring unknown PID.\n");
 			return;
@@ -1246,7 +1207,7 @@ static void wacom_wireless_work(struct work_struct *work)
 
 		/* Stylus interface */
 		wacom_wac1->features =
-			*((struct wacom_features *)id->driver_info);
+			*((struct wacom_features *)id->driver_data);
 		wacom_wac1->features.device_type = BTN_TOOL_PEN;
 		snprintf(wacom_wac1->name, WACOM_NAME_MAX, "%s (WL) Pen",
 			 wacom_wac1->features.name);
@@ -1262,7 +1223,7 @@ static void wacom_wireless_work(struct work_struct *work)
 		if (wacom_wac1->features.touch_max ||
 		    wacom_wac1->features.type == INTUOSHT) {
 			wacom_wac2->features =
-				*((struct wacom_features *)id->driver_info);
+				*((struct wacom_features *)id->driver_data);
 			wacom_wac2->features.pktlen = WACOM_PKGLEN_BBTOUCH3;
 			wacom_wac2->features.device_type = BTN_TOOL_FINGER;
 			wacom_wac2->features.x_max = wacom_wac2->features.y_max = 4096;
@@ -1323,8 +1284,10 @@ static void wacom_calculate_res(struct wacom_features *features)
 						    features->unitExpo);
 }
 
-static int wacom_probe(struct usb_interface *intf, const struct usb_device_id *id)
+static int wacom_probe(struct hid_device *hdev,
+		const struct hid_device_id *id)
 {
+	struct usb_interface *intf = to_usb_interface(hdev->dev.parent);
 	struct usb_device *dev = interface_to_usbdev(intf);
 	struct usb_endpoint_descriptor *endpoint;
 	struct wacom *wacom;
@@ -1332,34 +1295,29 @@ static int wacom_probe(struct usb_interface *intf, const struct usb_device_id *i
 	struct wacom_features *features;
 	int error;
 
-	if (!id->driver_info)
+	if (!id->driver_data)
 		return -EINVAL;
 
 	wacom = kzalloc(sizeof(struct wacom), GFP_KERNEL);
 	if (!wacom)
 		return -ENOMEM;
 
+	hid_set_drvdata(hdev, wacom);
+	wacom->hdev = hdev;
+
 	wacom_wac = &wacom->wacom_wac;
-	wacom_wac->features = *((struct wacom_features *)id->driver_info);
+	wacom_wac->features = *((struct wacom_features *)id->driver_data);
 	features = &wacom_wac->features;
 	if (features->pktlen > WACOM_PKGLEN_MAX) {
 		error = -EINVAL;
 		goto fail1;
 	}
 
-	wacom_wac->data = usb_alloc_coherent(dev, WACOM_PKGLEN_MAX,
-					     GFP_KERNEL, &wacom->data_dma);
-	if (!wacom_wac->data) {
-		error = -ENOMEM;
+	if (features->check_for_hid_type && features->hid_type != hdev->type) {
+		error = -ENODEV;
 		goto fail1;
 	}
 
-	wacom->irq = usb_alloc_urb(0, GFP_KERNEL);
-	if (!wacom->irq) {
-		error = -ENOMEM;
-		goto fail2;
-	}
-
 	wacom->usbdev = dev;
 	wacom->intf = intf;
 	mutex_init(&wacom->lock);
@@ -1375,7 +1333,7 @@ static int wacom_probe(struct usb_interface *intf, const struct usb_device_id *i
 	/* Retrieve the physical and logical size for touch devices */
 	error = wacom_retrieve_hid_descriptor(intf, features);
 	if (error)
-		goto fail3;
+		goto fail1;
 
 	/*
 	 * Intuos5 has no useful data about its touch interface in its
@@ -1423,38 +1381,38 @@ static int wacom_probe(struct usb_interface *intf, const struct usb_device_id *i
 			other_dev = dev;
 		error = wacom_add_shared_data(wacom_wac, other_dev);
 		if (error)
-			goto fail3;
+			goto fail1;
 	}
 
-	usb_fill_int_urb(wacom->irq, dev,
-			 usb_rcvintpipe(dev, endpoint->bEndpointAddress),
-			 wacom_wac->data, features->pktlen,
-			 wacom_sys_irq, wacom, endpoint->bInterval);
-	wacom->irq->transfer_dma = wacom->data_dma;
-	wacom->irq->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
-
 	error = wacom_initialize_leds(wacom);
 	if (error)
-		goto fail4;
+		goto fail2;
 
 	if (!(features->quirks & WACOM_QUIRK_NO_INPUT)) {
 		error = wacom_register_inputs(wacom);
 		if (error)
-			goto fail5;
+			goto fail3;
 	}
 
 	/* Note that if query fails it is not a hard failure */
 	wacom_query_tablet_data(intf, features);
 
-	usb_set_intfdata(intf, wacom);
+	/* Regular HID work starts now */
+	error = hid_parse(hdev);
+	if (error) {
+		hid_err(hdev, "parse failed\n");
+		goto fail4;
+	}
 
-	if (features->quirks & WACOM_QUIRK_MONITOR) {
-		if (usb_submit_urb(wacom->irq, GFP_KERNEL)) {
-			error = -EIO;
-			goto fail5;
-		}
+	error = hid_hw_start(hdev, HID_CONNECT_HIDRAW);
+	if (error) {
+		hid_err(hdev, "hw start failed\n");
+		goto fail4;
 	}
 
+	if (features->quirks & WACOM_QUIRK_MONITOR)
+		error = hid_hw_open(hdev);
+
 	if (wacom_wac->features.type == INTUOSHT && wacom_wac->features.touch_max) {
 		if (wacom_wac->features.device_type == BTN_TOOL_FINGER)
 			wacom_wac->shared->touch_input = wacom_wac->input;
@@ -1462,78 +1420,60 @@ static int wacom_probe(struct usb_interface *intf, const struct usb_device_id *i
 
 	return 0;
 
- fail5: wacom_destroy_leds(wacom);
- fail4:	wacom_remove_shared_data(wacom_wac);
- fail3:	usb_free_urb(wacom->irq);
- fail2:	usb_free_coherent(dev, WACOM_PKGLEN_MAX, wacom_wac->data, wacom->data_dma);
+ fail4:	wacom_unregister_inputs(wacom);
+ fail3:	wacom_destroy_leds(wacom);
+ fail2:	wacom_remove_shared_data(wacom_wac);
  fail1:	kfree(wacom);
+	hid_set_drvdata(hdev, NULL);
 	return error;
 }
 
-static void wacom_disconnect(struct usb_interface *intf)
+static void wacom_remove(struct hid_device *hdev)
 {
-	struct wacom *wacom = usb_get_intfdata(intf);
+	struct wacom *wacom = hid_get_drvdata(hdev);
 
-	usb_set_intfdata(intf, NULL);
+	hid_hw_stop(hdev);
 
-	usb_kill_urb(wacom->irq);
 	cancel_work_sync(&wacom->work);
 	wacom_unregister_inputs(wacom);
 	wacom_destroy_battery(wacom);
 	wacom_destroy_leds(wacom);
-	usb_free_urb(wacom->irq);
-	usb_free_coherent(interface_to_usbdev(intf), WACOM_PKGLEN_MAX,
-			wacom->wacom_wac.data, wacom->data_dma);
 	wacom_remove_shared_data(&wacom->wacom_wac);
-	kfree(wacom);
-}
-
-static int wacom_suspend(struct usb_interface *intf, pm_message_t message)
-{
-	struct wacom *wacom = usb_get_intfdata(intf);
-
-	mutex_lock(&wacom->lock);
-	usb_kill_urb(wacom->irq);
-	mutex_unlock(&wacom->lock);
 
-	return 0;
+	hid_set_drvdata(hdev, NULL);
+	kfree(wacom);
 }
 
-static int wacom_resume(struct usb_interface *intf)
+static int wacom_resume(struct hid_device *hdev)
 {
-	struct wacom *wacom = usb_get_intfdata(intf);
+	struct wacom *wacom = hid_get_drvdata(hdev);
 	struct wacom_features *features = &wacom->wacom_wac.features;
-	int rv = 0;
 
 	mutex_lock(&wacom->lock);
 
 	/* switch to wacom mode first */
-	wacom_query_tablet_data(intf, features);
+	wacom_query_tablet_data(wacom->intf, features);
 	wacom_led_control(wacom);
 
-	if ((wacom->open || (features->quirks & WACOM_QUIRK_MONITOR)) &&
-	    usb_submit_urb(wacom->irq, GFP_NOIO) < 0)
-		rv = -EIO;
-
 	mutex_unlock(&wacom->lock);
 
-	return rv;
+	return 0;
 }
 
-static int wacom_reset_resume(struct usb_interface *intf)
+static int wacom_reset_resume(struct hid_device *hdev)
 {
-	return wacom_resume(intf);
+	return wacom_resume(hdev);
 }
 
-static struct usb_driver wacom_driver = {
+static struct hid_driver wacom_driver = {
 	.name =		"wacom",
 	.id_table =	wacom_ids,
 	.probe =	wacom_probe,
-	.disconnect =	wacom_disconnect,
-	.suspend =	wacom_suspend,
+	.remove =	wacom_remove,
+#ifdef CONFIG_PM
 	.resume =	wacom_resume,
 	.reset_resume =	wacom_reset_resume,
-	.supports_autosuspend = 1,
+#endif
+	.raw_event =	wacom_raw_event,
 };
-
-module_usb_driver(wacom_driver);
+module_hid_driver(wacom_driver);
diff --git a/drivers/input/tablet/wacom_wac.c b/drivers/input/tablet/wacom_wac.c
index f170277b6f28..1c95ce78d749 100644
--- a/drivers/input/tablet/wacom_wac.c
+++ b/drivers/input/tablet/wacom_wac.c
@@ -2197,15 +2197,18 @@ static const struct wacom_features wacom_features_0x2A =
 static const struct wacom_features wacom_features_0x314 =
 	{ "Wacom Intuos Pro S", WACOM_PKGLEN_INTUOS,  31496, 19685, 2047,
 	  63, INTUOSPS, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES,
-	  .touch_max = 16 };
+	  .touch_max = 16,
+	  .check_for_hid_type = true, .hid_type = HID_TYPE_USBNONE };
 static const struct wacom_features wacom_features_0x315 =
 	{ "Wacom Intuos Pro M", WACOM_PKGLEN_INTUOS,  44704, 27940, 2047,
 	  63, INTUOSPM, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES,
-	  .touch_max = 16 };
+	  .touch_max = 16,
+	  .check_for_hid_type = true, .hid_type = HID_TYPE_USBNONE };
 static const struct wacom_features wacom_features_0x317 =
 	{ "Wacom Intuos Pro L", WACOM_PKGLEN_INTUOS,  65024, 40640, 2047,
 	  63, INTUOSPL, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES,
-	  .touch_max = 16 };
+	  .touch_max = 16,
+	  .check_for_hid_type = true, .hid_type = HID_TYPE_USBNONE };
 static const struct wacom_features wacom_features_0xF4 =
 	{ "Wacom Cintiq 24HD",       WACOM_PKGLEN_INTUOS,   104280, 65400, 2047,
 	  63, WACOM_24HD, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES, 200, 200 };
@@ -2215,7 +2218,8 @@ static const struct wacom_features wacom_features_0xF8 =
 	  .oVid = USB_VENDOR_ID_WACOM, .oPid = 0xf6 };
 static const struct wacom_features wacom_features_0xF6 =
 	{ "Wacom Cintiq 24HD touch", .type = WACOM_24HDT, /* Touch */
-	  .oVid = USB_VENDOR_ID_WACOM, .oPid = 0xf8, .touch_max = 10 };
+	  .oVid = USB_VENDOR_ID_WACOM, .oPid = 0xf8, .touch_max = 10,
+	  .check_for_hid_type = true, .hid_type = HID_TYPE_USBNONE };
 static const struct wacom_features wacom_features_0x3F =
 	{ "Wacom Cintiq 21UX",    WACOM_PKGLEN_INTUOS,    87200, 65600, 1023,
 	  63, CINTIQ, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES };
@@ -2233,7 +2237,8 @@ static const struct wacom_features wacom_features_0xC7 =
 	  0, PL, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
 static const struct wacom_features wacom_features_0xCE =
 	{ "Wacom DTU2231",        WACOM_PKGLEN_GRAPHIRE,  47864, 27011,  511,
-	  0, DTU, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
+	  0, DTU, WACOM_INTUOS_RES, WACOM_INTUOS_RES,
+	  .check_for_hid_type = true, .hid_type = HID_TYPE_USBMOUSE };
 static const struct wacom_features wacom_features_0xF0 =
 	{ "Wacom DTU1631",        WACOM_PKGLEN_GRAPHIRE,  34623, 19553,  511,
 	  0, DTU, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
@@ -2249,7 +2254,8 @@ static const struct wacom_features wacom_features_0x59 = /* Pen */
 	  .oVid = USB_VENDOR_ID_WACOM, .oPid = 0x5D };
 static const struct wacom_features wacom_features_0x5D = /* Touch */
 	{ "Wacom DTH2242",       .type = WACOM_24HDT,
-	  .oVid = USB_VENDOR_ID_WACOM, .oPid = 0x59, .touch_max = 10 };
+	  .oVid = USB_VENDOR_ID_WACOM, .oPid = 0x59, .touch_max = 10,
+	  .check_for_hid_type = true, .hid_type = HID_TYPE_USBNONE };
 static const struct wacom_features wacom_features_0xCC =
 	{ "Wacom Cintiq 21UX2",   WACOM_PKGLEN_INTUOS,    87000, 65400, 2047,
 	  63, WACOM_21UX2, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES, 200, 200 };
@@ -2262,7 +2268,8 @@ static const struct wacom_features wacom_features_0x5B =
 	  .oVid = USB_VENDOR_ID_WACOM, .oPid = 0x5e };
 static const struct wacom_features wacom_features_0x5E =
 	{ "Wacom Cintiq 22HDT", .type = WACOM_24HDT,
-	  .oVid = USB_VENDOR_ID_WACOM, .oPid = 0x5b, .touch_max = 10 };
+	  .oVid = USB_VENDOR_ID_WACOM, .oPid = 0x5b, .touch_max = 10,
+	  .check_for_hid_type = true, .hid_type = HID_TYPE_USBNONE };
 static const struct wacom_features wacom_features_0x90 =
 	{ "Wacom ISDv4 90",       WACOM_PKGLEN_GRAPHIRE,  26202, 16325,  255,
 	  0, TABLETPC, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
@@ -2400,14 +2407,17 @@ static const struct wacom_features wacom_features_0x301 =
 static const struct wacom_features wacom_features_0x302 =
 	{ "Wacom Intuos PT S",     WACOM_PKGLEN_BBPEN,    15200,  9500, 1023,
 	  31, INTUOSHT, WACOM_INTUOS_RES, WACOM_INTUOS_RES,
-	  .touch_max = 16 };
+	  .touch_max = 16,
+	  .check_for_hid_type = true, .hid_type = HID_TYPE_USBNONE };
 static const struct wacom_features wacom_features_0x303 =
 	{ "Wacom Intuos PT M",     WACOM_PKGLEN_BBPEN,    21600, 13500, 1023,
 	  31, INTUOSHT, WACOM_INTUOS_RES, WACOM_INTUOS_RES,
-	  .touch_max = 16 };
+	  .touch_max = 16,
+	  .check_for_hid_type = true, .hid_type = HID_TYPE_USBNONE };
 static const struct wacom_features wacom_features_0x30E =
 	{ "Wacom Intuos S",        WACOM_PKGLEN_BBPEN,    15200,  9500, 1023,
-	  31, INTUOSHT, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
+	  31, INTUOSHT, WACOM_INTUOS_RES, WACOM_INTUOS_RES,
+	  .check_for_hid_type = true, .hid_type = HID_TYPE_USBNONE };
 static const struct wacom_features wacom_features_0x6004 =
 	{ "ISD-V4",               WACOM_PKGLEN_GRAPHIRE,  12800,  8000,  255,
 	  0, TABLETPC, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
@@ -2417,22 +2427,18 @@ static const struct wacom_features wacom_features_0x0307 =
 	  .oVid = USB_VENDOR_ID_WACOM, .oPid = 0x309 };
 static const struct wacom_features wacom_features_0x0309 =
 	{ "Wacom ISDv5 309", .type = WACOM_24HDT, /* Touch */
-	  .oVid = USB_VENDOR_ID_WACOM, .oPid = 0x0307, .touch_max = 10 };
-
-#define USB_DEVICE_WACOM(prod)					\
-	USB_DEVICE(USB_VENDOR_ID_WACOM, prod),			\
-	.driver_info = (kernel_ulong_t)&wacom_features_##prod
+	  .oVid = USB_VENDOR_ID_WACOM, .oPid = 0x0307, .touch_max = 10,
+	  .check_for_hid_type = true, .hid_type = HID_TYPE_USBNONE };
 
-#define USB_DEVICE_DETAILED(prod, class, sub, proto)			\
-	USB_DEVICE_AND_INTERFACE_INFO(USB_VENDOR_ID_WACOM, prod, class,	\
-				      sub, proto),			\
-	.driver_info = (kernel_ulong_t)&wacom_features_##prod
+#define USB_DEVICE_WACOM(prod)						\
+	HID_DEVICE(BUS_USB, HID_GROUP_WACOM, USB_VENDOR_ID_WACOM, prod),\
+	.driver_data = (kernel_ulong_t)&wacom_features_##prod
 
 #define USB_DEVICE_LENOVO(prod)					\
-	USB_DEVICE(USB_VENDOR_ID_LENOVO, prod),			\
-	.driver_info = (kernel_ulong_t)&wacom_features_##prod
+	HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, prod),			\
+	.driver_data = (kernel_ulong_t)&wacom_features_##prod
 
-const struct usb_device_id wacom_ids[] = {
+const struct hid_device_id wacom_ids[] = {
 	{ USB_DEVICE_WACOM(0x00) },
 	{ USB_DEVICE_WACOM(0x10) },
 	{ USB_DEVICE_WACOM(0x11) },
@@ -2478,9 +2484,9 @@ const struct usb_device_id wacom_ids[] = {
 	{ USB_DEVICE_WACOM(0x45) },
 	{ USB_DEVICE_WACOM(0x57) },
 	{ USB_DEVICE_WACOM(0x59) },
-	{ USB_DEVICE_DETAILED(0x5D, USB_CLASS_HID, 0, 0) },
+	{ USB_DEVICE_WACOM(0x5D) },
 	{ USB_DEVICE_WACOM(0x5B) },
-	{ USB_DEVICE_DETAILED(0x5E, USB_CLASS_HID, 0, 0) },
+	{ USB_DEVICE_WACOM(0x5E) },
 	{ USB_DEVICE_WACOM(0xB0) },
 	{ USB_DEVICE_WACOM(0xB1) },
 	{ USB_DEVICE_WACOM(0xB2) },
@@ -2502,13 +2508,7 @@ const struct usb_device_id wacom_ids[] = {
 	{ USB_DEVICE_WACOM(0xC5) },
 	{ USB_DEVICE_WACOM(0xC6) },
 	{ USB_DEVICE_WACOM(0xC7) },
-	/*
-	 * DTU-2231 has two interfaces on the same configuration,
-	 * only one is used.
-	 */
-	{ USB_DEVICE_DETAILED(0xCE, USB_CLASS_HID,
-			      USB_INTERFACE_SUBCLASS_BOOT,
-			      USB_INTERFACE_PROTOCOL_MOUSE) },
+	{ USB_DEVICE_WACOM(0xCE) },
 	{ USB_DEVICE_WACOM(0x84) },
 	{ USB_DEVICE_WACOM(0xD0) },
 	{ USB_DEVICE_WACOM(0xD1) },
@@ -2546,13 +2546,13 @@ const struct usb_device_id wacom_ids[] = {
 	{ USB_DEVICE_WACOM(0x116) },
 	{ USB_DEVICE_WACOM(0x300) },
 	{ USB_DEVICE_WACOM(0x301) },
-	{ USB_DEVICE_DETAILED(0x302, USB_CLASS_HID, 0, 0) },
-	{ USB_DEVICE_DETAILED(0x303, USB_CLASS_HID, 0, 0) },
-	{ USB_DEVICE_DETAILED(0x30E, USB_CLASS_HID, 0, 0) },
+	{ USB_DEVICE_WACOM(0x302) },
+	{ USB_DEVICE_WACOM(0x303) },
+	{ USB_DEVICE_WACOM(0x30E) },
 	{ USB_DEVICE_WACOM(0x304) },
-	{ USB_DEVICE_DETAILED(0x314, USB_CLASS_HID, 0, 0) },
-	{ USB_DEVICE_DETAILED(0x315, USB_CLASS_HID, 0, 0) },
-	{ USB_DEVICE_DETAILED(0x317, USB_CLASS_HID, 0, 0) },
+	{ USB_DEVICE_WACOM(0x314) },
+	{ USB_DEVICE_WACOM(0x315) },
+	{ USB_DEVICE_WACOM(0x317) },
 	{ USB_DEVICE_WACOM(0x4001) },
 	{ USB_DEVICE_WACOM(0x4004) },
 	{ USB_DEVICE_WACOM(0x5000) },
@@ -2560,12 +2560,12 @@ const struct usb_device_id wacom_ids[] = {
 	{ USB_DEVICE_WACOM(0x47) },
 	{ USB_DEVICE_WACOM(0xF4) },
 	{ USB_DEVICE_WACOM(0xF8) },
-	{ USB_DEVICE_DETAILED(0xF6, USB_CLASS_HID, 0, 0) },
+	{ USB_DEVICE_WACOM(0xF6) },
 	{ USB_DEVICE_WACOM(0xFA) },
 	{ USB_DEVICE_WACOM(0xFB) },
 	{ USB_DEVICE_WACOM(0x0307) },
-	{ USB_DEVICE_DETAILED(0x0309, USB_CLASS_HID, 0, 0) },
+	{ USB_DEVICE_WACOM(0x0309) },
 	{ USB_DEVICE_LENOVO(0x6004) },
 	{ }
 };
-MODULE_DEVICE_TABLE(usb, wacom_ids);
+MODULE_DEVICE_TABLE(hid, wacom_ids);
diff --git a/drivers/input/tablet/wacom_wac.h b/drivers/input/tablet/wacom_wac.h
index f48164c780f4..8821a518abf6 100644
--- a/drivers/input/tablet/wacom_wac.h
+++ b/drivers/input/tablet/wacom_wac.h
@@ -137,6 +137,8 @@ struct wacom_features {
 	unsigned touch_max;
 	int oVid;
 	int oPid;
+	bool check_for_hid_type;
+	int hid_type;
 };
 
 struct wacom_shared {
@@ -151,7 +153,7 @@ struct wacom_shared {
 struct wacom_wac {
 	char name[WACOM_NAME_MAX];
 	char pad_name[WACOM_NAME_MAX];
-	unsigned char *data;
+	unsigned char data[WACOM_PKGLEN_MAX];
 	int tool[2];
 	int id[2];
 	__u32 serial[2];
diff --git a/include/linux/hid.h b/include/linux/hid.h
index 77632cf159c0..07fa80671db0 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -310,6 +310,11 @@ struct hid_item {
  */
 #define HID_GROUP_RMI				0x0100
 
+/*
+ * Vendor specific HID device groups
+ */
+#define HID_GROUP_WACOM				0x0101
+
 /*
  * This is the global environment of the parser. This information is
  * persistent for main-items. The global environment can be saved and
-- 
cgit v1.2.3


From 3a611c3cfba2106aed3187b90903855e776e2761 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Sun, 27 Jul 2014 07:23:01 +0930
Subject: modules: Fix build error in moduleloader.h

Fengguang Wu's build bot detected that if moduleloader.h is included in
a C file (used by ftrace and kprobes to access module_alloc() when
available), that it can fail to build if CONFIG_MODULES and
CONFIG_MODULES_USE_ELF_REL is not defined.

This is because there's a printk() that dereferences struct module to
print the name of the module. But as struct module does not exist when
CONFIG_MODULES is not defined we get this error:

   include/linux/moduleloader.h: In function 'apply_relocate':
>> include/linux/moduleloader.h:48:63: error: dereferencing pointer to
>> incomplete type
     printk(KERN_ERR "module %s: REL relocation unsupported\n", me->name);
								  ^
Reported-by: kbuild test robot <fengguang.wu@intel.com>
Based-on-the-true-story-by: Steven Rostedt <rostedt@goodmis.org>
Confirms-rustys-story-ends-the-same-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/moduleloader.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/moduleloader.h b/include/linux/moduleloader.h
index 560ca53a75fa..7eeb9bbfb816 100644
--- a/include/linux/moduleloader.h
+++ b/include/linux/moduleloader.h
@@ -45,7 +45,8 @@ static inline int apply_relocate(Elf_Shdr *sechdrs,
 				 unsigned int relsec,
 				 struct module *me)
 {
-	printk(KERN_ERR "module %s: REL relocation unsupported\n", me->name);
+	printk(KERN_ERR "module %s: REL relocation unsupported\n",
+	       module_name(me));
 	return -ENOEXEC;
 }
 #endif
@@ -67,7 +68,8 @@ static inline int apply_relocate_add(Elf_Shdr *sechdrs,
 				     unsigned int relsec,
 				     struct module *me)
 {
-	printk(KERN_ERR "module %s: REL relocation unsupported\n", me->name);
+	printk(KERN_ERR "module %s: REL relocation unsupported\n",
+	       module_name(me));
 	return -ENOEXEC;
 }
 #endif
-- 
cgit v1.2.3


From 9b20a352d78a7651aa68a9220f77ccb03009d892 Mon Sep 17 00:00:00 2001
From: Petr Mladek <pmladek@suse.cz>
Date: Sun, 27 Jul 2014 07:24:01 +0930
Subject: module: add within_module() function

It is just a small optimization that allows to replace few
occurrences of within_module_init() || within_module_core()
with a single call.

Signed-off-by: Petr Mladek <pmladek@suse.cz>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/module.h |  5 +++++
 kernel/module.c        | 12 ++++--------
 2 files changed, 9 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/module.h b/include/linux/module.h
index f520a767c86c..61d8fb2d0873 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -408,6 +408,11 @@ static inline int within_module_init(unsigned long addr, const struct module *mo
 	       addr < (unsigned long)mod->module_init + mod->init_size;
 }
 
+static inline int within_module(unsigned long addr, const struct module *mod)
+{
+	return within_module_init(addr, mod) || within_module_core(addr, mod);
+}
+
 /* Search for module by name: must hold module_mutex. */
 struct module *find_module(const char *name);
 
diff --git a/kernel/module.c b/kernel/module.c
index 81e727cf6df9..e87fdd2fc3c2 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -3448,8 +3448,7 @@ const char *module_address_lookup(unsigned long addr,
 	list_for_each_entry_rcu(mod, &modules, list) {
 		if (mod->state == MODULE_STATE_UNFORMED)
 			continue;
-		if (within_module_init(addr, mod) ||
-		    within_module_core(addr, mod)) {
+		if (within_module(addr, mod)) {
 			if (modname)
 				*modname = mod->name;
 			ret = get_ksymbol(mod, addr, size, offset);
@@ -3473,8 +3472,7 @@ int lookup_module_symbol_name(unsigned long addr, char *symname)
 	list_for_each_entry_rcu(mod, &modules, list) {
 		if (mod->state == MODULE_STATE_UNFORMED)
 			continue;
-		if (within_module_init(addr, mod) ||
-		    within_module_core(addr, mod)) {
+		if (within_module(addr, mod)) {
 			const char *sym;
 
 			sym = get_ksymbol(mod, addr, NULL, NULL);
@@ -3499,8 +3497,7 @@ int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size,
 	list_for_each_entry_rcu(mod, &modules, list) {
 		if (mod->state == MODULE_STATE_UNFORMED)
 			continue;
-		if (within_module_init(addr, mod) ||
-		    within_module_core(addr, mod)) {
+		if (within_module(addr, mod)) {
 			const char *sym;
 
 			sym = get_ksymbol(mod, addr, size, offset);
@@ -3764,8 +3761,7 @@ struct module *__module_address(unsigned long addr)
 	list_for_each_entry_rcu(mod, &modules, list) {
 		if (mod->state == MODULE_STATE_UNFORMED)
 			continue;
-		if (within_module_core(addr, mod)
-		    || within_module_init(addr, mod))
+		if (within_module(addr, mod))
 			return mod;
 	}
 	return NULL;
-- 
cgit v1.2.3


From 76681c8faa07f9e07caa3cc69f235c8719b2a6ea Mon Sep 17 00:00:00 2001
From: Petr Mladek <pmladek@suse.cz>
Date: Sun, 27 Jul 2014 07:25:01 +0930
Subject: module: return bool from within_module*()

The within_module*() functions return only true or false. Let's use bool as
the return type.

Note that it should not change kABI because these are inline functions.

Signed-off-by: Petr Mladek <pmladek@suse.cz>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/module.h | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/module.h b/include/linux/module.h
index 61d8fb2d0873..71f282a4e307 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -396,19 +396,21 @@ bool is_module_address(unsigned long addr);
 bool is_module_percpu_address(unsigned long addr);
 bool is_module_text_address(unsigned long addr);
 
-static inline int within_module_core(unsigned long addr, const struct module *mod)
+static inline bool within_module_core(unsigned long addr,
+				      const struct module *mod)
 {
 	return (unsigned long)mod->module_core <= addr &&
 	       addr < (unsigned long)mod->module_core + mod->core_size;
 }
 
-static inline int within_module_init(unsigned long addr, const struct module *mod)
+static inline bool within_module_init(unsigned long addr,
+				      const struct module *mod)
 {
 	return (unsigned long)mod->module_init <= addr &&
 	       addr < (unsigned long)mod->module_init + mod->init_size;
 }
 
-static inline int within_module(unsigned long addr, const struct module *mod)
+static inline bool within_module(unsigned long addr, const struct module *mod)
 {
 	return within_module_init(addr, mod) || within_module_core(addr, mod);
 }
-- 
cgit v1.2.3


From 37549e94c77a94a9c32b5ae3313a3801cb66adf9 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Sun, 27 Jul 2014 07:26:01 +0930
Subject: sysfs: disallow world-writable files.

This check was introduced in 2006 by Alexey Dobriyan (9774a1f54f173)
for module parameters; we removed it when we unified the check into
VERIFY_OCTAL_PERMISSIONS() as sysfs didn't have the same requirement.
Now all those users are fixed, reintroduce it.

Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Dave Jones <davej@redhat.com>
Cc: Joe Perches <joe@perches.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/kernel.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 4c52907a6d8b..43e1c6a9683e 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -849,5 +849,7 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
 	 /* User perms >= group perms >= other perms */			\
 	 BUILD_BUG_ON_ZERO(((perms) >> 6) < (((perms) >> 3) & 7)) +	\
 	 BUILD_BUG_ON_ZERO((((perms) >> 3) & 7) < ((perms) & 7)) +	\
+	 /* Other writable?  Generally considered a bad idea. */	\
+	 BUILD_BUG_ON_ZERO((perms) & 2) +				\
 	 (perms))
 #endif
-- 
cgit v1.2.3


From 9cb42e2a8ed06e91ce9d2c59fbae8d1185ebe2f7 Mon Sep 17 00:00:00 2001
From: "Opensource [Steve Twiss]" <stwiss.opensource@diasemi.com>
Date: Mon, 21 Jul 2014 11:39:33 +0100
Subject: mfd: da9063: Add support for AD silicon variant

Add register definitions for DA9063 AD (0x3) silicon variant ID
the ability to choose the silicon variant at run-time using regmap
configuration. This patch also adds RTC support for the AD silicon
changes.

It adds both BB and AD support as regmap ranges and then makes the
distinction between the two tables at run-time. This allows both AD
and BB silicon variants to be supported at the same time.

Suggested-by: Philipp Zabel <p.zabel@pengutronix.de>
Signed-off-by: Opensource [Steve Twiss] <stwiss.opensource@diasemi.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/da9063-core.c            |   6 +-
 drivers/mfd/da9063-i2c.c             | 134 ++++++++++++++++++++++++++++-------
 drivers/rtc/rtc-da9063.c             |  54 +++++++++-----
 include/linux/mfd/da9063/core.h      |   3 +-
 include/linux/mfd/da9063/registers.h | 129 +++++++++++++++++++++------------
 5 files changed, 236 insertions(+), 90 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/da9063-core.c b/drivers/mfd/da9063-core.c
index e70ae315abc7..93db8bb8c8f0 100644
--- a/drivers/mfd/da9063-core.c
+++ b/drivers/mfd/da9063-core.c
@@ -153,9 +153,9 @@ int da9063_device_init(struct da9063 *da9063, unsigned int irq)
 		 "Device detected (chip-ID: 0x%02X, var-ID: 0x%02X)\n",
 		 model, variant_id);
 
-	if (variant_code != PMIC_DA9063_BB) {
-		dev_err(da9063->dev, "Unknown chip variant code: 0x%02X\n",
-				variant_code);
+	if (variant_code < PMIC_DA9063_BB && variant_code != PMIC_DA9063_AD) {
+		dev_err(da9063->dev,
+			"Cannot support variant code: 0x%02X\n", variant_code);
 		return -ENODEV;
 	}
 
diff --git a/drivers/mfd/da9063-i2c.c b/drivers/mfd/da9063-i2c.c
index 8db5c805c64f..21fd8d9a217b 100644
--- a/drivers/mfd/da9063-i2c.c
+++ b/drivers/mfd/da9063-i2c.c
@@ -25,10 +25,10 @@
 #include <linux/mfd/da9063/pdata.h>
 #include <linux/mfd/da9063/registers.h>
 
-static const struct regmap_range da9063_readable_ranges[] = {
+static const struct regmap_range da9063_ad_readable_ranges[] = {
 	{
 		.range_min = DA9063_REG_PAGE_CON,
-		.range_max = DA9063_REG_SECOND_D,
+		.range_max = DA9063_AD_REG_SECOND_D,
 	}, {
 		.range_min = DA9063_REG_SEQ,
 		.range_max = DA9063_REG_ID_32_31,
@@ -37,14 +37,14 @@ static const struct regmap_range da9063_readable_ranges[] = {
 		.range_max = DA9063_REG_AUTO3_LOW,
 	}, {
 		.range_min = DA9063_REG_T_OFFSET,
-		.range_max = DA9063_REG_GP_ID_19,
+		.range_max = DA9063_AD_REG_GP_ID_19,
 	}, {
 		.range_min = DA9063_REG_CHIP_ID,
 		.range_max = DA9063_REG_CHIP_VARIANT,
 	},
 };
 
-static const struct regmap_range da9063_writeable_ranges[] = {
+static const struct regmap_range da9063_ad_writeable_ranges[] = {
 	{
 		.range_min = DA9063_REG_PAGE_CON,
 		.range_max = DA9063_REG_PAGE_CON,
@@ -53,7 +53,7 @@ static const struct regmap_range da9063_writeable_ranges[] = {
 		.range_max = DA9063_REG_VSYS_MON,
 	}, {
 		.range_min = DA9063_REG_COUNT_S,
-		.range_max = DA9063_REG_ALARM_Y,
+		.range_max = DA9063_AD_REG_ALARM_Y,
 	}, {
 		.range_min = DA9063_REG_SEQ,
 		.range_max = DA9063_REG_ID_32_31,
@@ -62,14 +62,14 @@ static const struct regmap_range da9063_writeable_ranges[] = {
 		.range_max = DA9063_REG_AUTO3_LOW,
 	}, {
 		.range_min = DA9063_REG_CONFIG_I,
-		.range_max = DA9063_REG_MON_REG_4,
+		.range_max = DA9063_AD_REG_MON_REG_4,
 	}, {
-		.range_min = DA9063_REG_GP_ID_0,
-		.range_max = DA9063_REG_GP_ID_19,
+		.range_min = DA9063_AD_REG_GP_ID_0,
+		.range_max = DA9063_AD_REG_GP_ID_19,
 	},
 };
 
-static const struct regmap_range da9063_volatile_ranges[] = {
+static const struct regmap_range da9063_ad_volatile_ranges[] = {
 	{
 		.range_min = DA9063_REG_STATUS_A,
 		.range_max = DA9063_REG_EVENT_D,
@@ -81,26 +81,104 @@ static const struct regmap_range da9063_volatile_ranges[] = {
 		.range_max = DA9063_REG_ADC_MAN,
 	}, {
 		.range_min = DA9063_REG_ADC_RES_L,
-		.range_max = DA9063_REG_SECOND_D,
+		.range_max = DA9063_AD_REG_SECOND_D,
 	}, {
-		.range_min = DA9063_REG_MON_REG_5,
-		.range_max = DA9063_REG_MON_REG_6,
+		.range_min = DA9063_AD_REG_MON_REG_5,
+		.range_max = DA9063_AD_REG_MON_REG_6,
 	},
 };
 
-static const struct regmap_access_table da9063_readable_table = {
-	.yes_ranges = da9063_readable_ranges,
-	.n_yes_ranges = ARRAY_SIZE(da9063_readable_ranges),
+static const struct regmap_access_table da9063_ad_readable_table = {
+	.yes_ranges = da9063_ad_readable_ranges,
+	.n_yes_ranges = ARRAY_SIZE(da9063_ad_readable_ranges),
 };
 
-static const struct regmap_access_table da9063_writeable_table = {
-	.yes_ranges = da9063_writeable_ranges,
-	.n_yes_ranges = ARRAY_SIZE(da9063_writeable_ranges),
+static const struct regmap_access_table da9063_ad_writeable_table = {
+	.yes_ranges = da9063_ad_writeable_ranges,
+	.n_yes_ranges = ARRAY_SIZE(da9063_ad_writeable_ranges),
 };
 
-static const struct regmap_access_table da9063_volatile_table = {
-	.yes_ranges = da9063_volatile_ranges,
-	.n_yes_ranges = ARRAY_SIZE(da9063_volatile_ranges),
+static const struct regmap_access_table da9063_ad_volatile_table = {
+	.yes_ranges = da9063_ad_volatile_ranges,
+	.n_yes_ranges = ARRAY_SIZE(da9063_ad_volatile_ranges),
+};
+
+static const struct regmap_range da9063_bb_readable_ranges[] = {
+	{
+		.range_min = DA9063_REG_PAGE_CON,
+		.range_max = DA9063_BB_REG_SECOND_D,
+	}, {
+		.range_min = DA9063_REG_SEQ,
+		.range_max = DA9063_REG_ID_32_31,
+	}, {
+		.range_min = DA9063_REG_SEQ_A,
+		.range_max = DA9063_REG_AUTO3_LOW,
+	}, {
+		.range_min = DA9063_REG_T_OFFSET,
+		.range_max = DA9063_BB_REG_GP_ID_19,
+	}, {
+		.range_min = DA9063_REG_CHIP_ID,
+		.range_max = DA9063_REG_CHIP_VARIANT,
+	},
+};
+
+static const struct regmap_range da9063_bb_writeable_ranges[] = {
+	{
+		.range_min = DA9063_REG_PAGE_CON,
+		.range_max = DA9063_REG_PAGE_CON,
+	}, {
+		.range_min = DA9063_REG_FAULT_LOG,
+		.range_max = DA9063_REG_VSYS_MON,
+	}, {
+		.range_min = DA9063_REG_COUNT_S,
+		.range_max = DA9063_BB_REG_ALARM_Y,
+	}, {
+		.range_min = DA9063_REG_SEQ,
+		.range_max = DA9063_REG_ID_32_31,
+	}, {
+		.range_min = DA9063_REG_SEQ_A,
+		.range_max = DA9063_REG_AUTO3_LOW,
+	}, {
+		.range_min = DA9063_REG_CONFIG_I,
+		.range_max = DA9063_BB_REG_MON_REG_4,
+	}, {
+		.range_min = DA9063_BB_REG_GP_ID_0,
+		.range_max = DA9063_BB_REG_GP_ID_19,
+	},
+};
+
+static const struct regmap_range da9063_bb_volatile_ranges[] = {
+	{
+		.range_min = DA9063_REG_STATUS_A,
+		.range_max = DA9063_REG_EVENT_D,
+	}, {
+		.range_min = DA9063_REG_CONTROL_F,
+		.range_max = DA9063_REG_CONTROL_F,
+	}, {
+		.range_min = DA9063_REG_ADC_MAN,
+		.range_max = DA9063_REG_ADC_MAN,
+	}, {
+		.range_min = DA9063_REG_ADC_RES_L,
+		.range_max = DA9063_BB_REG_SECOND_D,
+	}, {
+		.range_min = DA9063_BB_REG_MON_REG_5,
+		.range_max = DA9063_BB_REG_MON_REG_6,
+	},
+};
+
+static const struct regmap_access_table da9063_bb_readable_table = {
+	.yes_ranges = da9063_bb_readable_ranges,
+	.n_yes_ranges = ARRAY_SIZE(da9063_bb_readable_ranges),
+};
+
+static const struct regmap_access_table da9063_bb_writeable_table = {
+	.yes_ranges = da9063_bb_writeable_ranges,
+	.n_yes_ranges = ARRAY_SIZE(da9063_bb_writeable_ranges),
+};
+
+static const struct regmap_access_table da9063_bb_volatile_table = {
+	.yes_ranges = da9063_bb_volatile_ranges,
+	.n_yes_ranges = ARRAY_SIZE(da9063_bb_volatile_ranges),
 };
 
 static const struct regmap_range_cfg da9063_range_cfg[] = {
@@ -123,10 +201,6 @@ static struct regmap_config da9063_regmap_config = {
 	.max_register = DA9063_REG_CHIP_VARIANT,
 
 	.cache_type = REGCACHE_RBTREE,
-
-	.rd_table = &da9063_readable_table,
-	.wr_table = &da9063_writeable_table,
-	.volatile_table = &da9063_volatile_table,
 };
 
 static int da9063_i2c_probe(struct i2c_client *i2c,
@@ -143,6 +217,16 @@ static int da9063_i2c_probe(struct i2c_client *i2c,
 	da9063->dev = &i2c->dev;
 	da9063->chip_irq = i2c->irq;
 
+	if (da9063->variant_code == PMIC_DA9063_AD) {
+		da9063_regmap_config.rd_table = &da9063_ad_readable_table;
+		da9063_regmap_config.wr_table = &da9063_ad_writeable_table;
+		da9063_regmap_config.volatile_table = &da9063_ad_volatile_table;
+	} else {
+		da9063_regmap_config.rd_table = &da9063_bb_readable_table;
+		da9063_regmap_config.wr_table = &da9063_bb_writeable_table;
+		da9063_regmap_config.volatile_table = &da9063_bb_volatile_table;
+	}
+
 	da9063->regmap = devm_regmap_init_i2c(i2c, &da9063_regmap_config);
 	if (IS_ERR(da9063->regmap)) {
 		ret = PTR_ERR(da9063->regmap);
diff --git a/drivers/rtc/rtc-da9063.c b/drivers/rtc/rtc-da9063.c
index 595393098b09..731ed1a97f59 100644
--- a/drivers/rtc/rtc-da9063.c
+++ b/drivers/rtc/rtc-da9063.c
@@ -29,6 +29,8 @@
 #define YEARS_FROM_DA9063(year)		((year) + 100)
 #define MONTHS_FROM_DA9063(month)	((month) - 1)
 
+#define RTC_ALARM_DATA_LEN (DA9063_AD_REG_ALARM_Y - DA9063_AD_REG_ALARM_MI + 1)
+
 #define RTC_DATA_LEN	(DA9063_REG_COUNT_Y - DA9063_REG_COUNT_S + 1)
 #define RTC_SEC		0
 #define RTC_MIN		1
@@ -42,6 +44,10 @@ struct da9063_rtc {
 	struct da9063		*hw;
 	struct rtc_time		alarm_time;
 	bool			rtc_sync;
+	int			alarm_year;
+	int			alarm_start;
+	int			alarm_len;
+	int			data_start;
 };
 
 static void da9063_data_to_tm(u8 *data, struct rtc_time *tm)
@@ -83,7 +89,7 @@ static int da9063_rtc_stop_alarm(struct device *dev)
 {
 	struct da9063_rtc *rtc = dev_get_drvdata(dev);
 
-	return regmap_update_bits(rtc->hw->regmap, DA9063_REG_ALARM_Y,
+	return regmap_update_bits(rtc->hw->regmap, rtc->alarm_year,
 				  DA9063_ALARM_ON, 0);
 }
 
@@ -91,7 +97,7 @@ static int da9063_rtc_start_alarm(struct device *dev)
 {
 	struct da9063_rtc *rtc = dev_get_drvdata(dev);
 
-	return regmap_update_bits(rtc->hw->regmap, DA9063_REG_ALARM_Y,
+	return regmap_update_bits(rtc->hw->regmap, rtc->alarm_year,
 				  DA9063_ALARM_ON, DA9063_ALARM_ON);
 }
 
@@ -151,8 +157,9 @@ static int da9063_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 	int ret;
 	unsigned int val;
 
-	ret = regmap_bulk_read(rtc->hw->regmap, DA9063_REG_ALARM_S,
-			       &data[RTC_SEC], RTC_DATA_LEN);
+	data[RTC_SEC] = 0;
+	ret = regmap_bulk_read(rtc->hw->regmap, rtc->alarm_start,
+			       &data[rtc->data_start], rtc->alarm_len);
 	if (ret < 0)
 		return ret;
 
@@ -186,14 +193,14 @@ static int da9063_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 		return ret;
 	}
 
-	ret = regmap_bulk_write(rtc->hw->regmap, DA9063_REG_ALARM_S,
-				data, RTC_DATA_LEN);
+	ret = regmap_bulk_write(rtc->hw->regmap, rtc->alarm_start,
+			       &data[rtc->data_start], rtc->alarm_len);
 	if (ret < 0) {
 		dev_err(dev, "Failed to write alarm: %d\n", ret);
 		return ret;
 	}
 
-	rtc->alarm_time = alrm->time;
+	da9063_data_to_tm(data, &rtc->alarm_time);
 
 	if (alrm->enabled) {
 		ret = da9063_rtc_start_alarm(dev);
@@ -218,7 +225,7 @@ static irqreturn_t da9063_alarm_event(int irq, void *data)
 {
 	struct da9063_rtc *rtc = data;
 
-	regmap_update_bits(rtc->hw->regmap, DA9063_REG_ALARM_Y,
+	regmap_update_bits(rtc->hw->regmap, rtc->alarm_year,
 			   DA9063_ALARM_ON, 0);
 
 	rtc->rtc_sync = true;
@@ -257,7 +264,23 @@ static int da9063_rtc_probe(struct platform_device *pdev)
 		goto err;
 	}
 
-	ret = regmap_update_bits(da9063->regmap, DA9063_REG_ALARM_S,
+	rtc = devm_kzalloc(&pdev->dev, sizeof(*rtc), GFP_KERNEL);
+	if (!rtc)
+		return -ENOMEM;
+
+	if (da9063->variant_code == PMIC_DA9063_AD) {
+		rtc->alarm_year = DA9063_AD_REG_ALARM_Y;
+		rtc->alarm_start = DA9063_AD_REG_ALARM_MI;
+		rtc->alarm_len = RTC_ALARM_DATA_LEN;
+		rtc->data_start = RTC_MIN;
+	} else {
+		rtc->alarm_year = DA9063_BB_REG_ALARM_Y;
+		rtc->alarm_start = DA9063_BB_REG_ALARM_S;
+		rtc->alarm_len = RTC_DATA_LEN;
+		rtc->data_start = RTC_SEC;
+	}
+
+	ret = regmap_update_bits(da9063->regmap, rtc->alarm_start,
 			DA9063_ALARM_STATUS_TICK | DA9063_ALARM_STATUS_ALARM,
 			0);
 	if (ret < 0) {
@@ -265,7 +288,7 @@ static int da9063_rtc_probe(struct platform_device *pdev)
 		goto err;
 	}
 
-	ret = regmap_update_bits(da9063->regmap, DA9063_REG_ALARM_S,
+	ret = regmap_update_bits(da9063->regmap, rtc->alarm_start,
 				 DA9063_ALARM_STATUS_ALARM,
 				 DA9063_ALARM_STATUS_ALARM);
 	if (ret < 0) {
@@ -273,25 +296,22 @@ static int da9063_rtc_probe(struct platform_device *pdev)
 		goto err;
 	}
 
-	ret = regmap_update_bits(da9063->regmap, DA9063_REG_ALARM_Y,
+	ret = regmap_update_bits(da9063->regmap, rtc->alarm_year,
 				 DA9063_TICK_ON, 0);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "Failed to disable TICKs\n");
 		goto err;
 	}
 
-	ret = regmap_bulk_read(da9063->regmap, DA9063_REG_ALARM_S,
-			       data, RTC_DATA_LEN);
+	data[RTC_SEC] = 0;
+	ret = regmap_bulk_read(da9063->regmap, rtc->alarm_start,
+			       &data[rtc->data_start], rtc->alarm_len);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "Failed to read initial alarm data: %d\n",
 			ret);
 		goto err;
 	}
 
-	rtc = devm_kzalloc(&pdev->dev, sizeof(*rtc), GFP_KERNEL);
-	if (!rtc)
-		return -ENOMEM;
-
 	platform_set_drvdata(pdev, rtc);
 
 	irq_alarm = platform_get_irq_byname(pdev, "ALARM");
diff --git a/include/linux/mfd/da9063/core.h b/include/linux/mfd/da9063/core.h
index 00a9aac5d1e8..b92a3262f8f6 100644
--- a/include/linux/mfd/da9063/core.h
+++ b/include/linux/mfd/da9063/core.h
@@ -34,7 +34,8 @@ enum da9063_models {
 };
 
 enum da9063_variant_codes {
-	PMIC_DA9063_BB = 0x5
+	PMIC_DA9063_AD = 0x3,
+	PMIC_DA9063_BB = 0x5,
 };
 
 /* Interrupts */
diff --git a/include/linux/mfd/da9063/registers.h b/include/linux/mfd/da9063/registers.h
index 09a85c699da1..2e0ba6d5fbc3 100644
--- a/include/linux/mfd/da9063/registers.h
+++ b/include/linux/mfd/da9063/registers.h
@@ -104,16 +104,27 @@
 #define	DA9063_REG_COUNT_D		0x43
 #define	DA9063_REG_COUNT_MO		0x44
 #define	DA9063_REG_COUNT_Y		0x45
-#define	DA9063_REG_ALARM_S		0x46
-#define	DA9063_REG_ALARM_MI		0x47
-#define	DA9063_REG_ALARM_H		0x48
-#define	DA9063_REG_ALARM_D		0x49
-#define	DA9063_REG_ALARM_MO		0x4A
-#define	DA9063_REG_ALARM_Y		0x4B
-#define	DA9063_REG_SECOND_A		0x4C
-#define	DA9063_REG_SECOND_B		0x4D
-#define	DA9063_REG_SECOND_C		0x4E
-#define	DA9063_REG_SECOND_D		0x4F
+
+#define	DA9063_AD_REG_ALARM_MI		0x46
+#define	DA9063_AD_REG_ALARM_H		0x47
+#define	DA9063_AD_REG_ALARM_D		0x48
+#define	DA9063_AD_REG_ALARM_MO		0x49
+#define	DA9063_AD_REG_ALARM_Y		0x4A
+#define	DA9063_AD_REG_SECOND_A		0x4B
+#define	DA9063_AD_REG_SECOND_B		0x4C
+#define	DA9063_AD_REG_SECOND_C		0x4D
+#define	DA9063_AD_REG_SECOND_D		0x4E
+
+#define	DA9063_BB_REG_ALARM_S		0x46
+#define	DA9063_BB_REG_ALARM_MI		0x47
+#define	DA9063_BB_REG_ALARM_H		0x48
+#define	DA9063_BB_REG_ALARM_D		0x49
+#define	DA9063_BB_REG_ALARM_MO		0x4A
+#define	DA9063_BB_REG_ALARM_Y		0x4B
+#define	DA9063_BB_REG_SECOND_A		0x4C
+#define	DA9063_BB_REG_SECOND_B		0x4D
+#define	DA9063_BB_REG_SECOND_C		0x4E
+#define	DA9063_BB_REG_SECOND_D		0x4F
 
 /* Sequencer Control Registers */
 #define	DA9063_REG_SEQ			0x81
@@ -223,37 +234,67 @@
 #define	DA9063_REG_CONFIG_J		0x10F
 #define	DA9063_REG_CONFIG_K		0x110
 #define	DA9063_REG_CONFIG_L		0x111
-#define	DA9063_REG_CONFIG_M		0x112
-#define	DA9063_REG_CONFIG_N		0x113
-
-#define	DA9063_REG_MON_REG_1		0x114
-#define	DA9063_REG_MON_REG_2		0x115
-#define	DA9063_REG_MON_REG_3		0x116
-#define	DA9063_REG_MON_REG_4		0x117
-#define	DA9063_REG_MON_REG_5		0x11E
-#define	DA9063_REG_MON_REG_6		0x11F
-#define	DA9063_REG_TRIM_CLDR		0x120
+
+#define	DA9063_AD_REG_MON_REG_1		0x112
+#define	DA9063_AD_REG_MON_REG_2		0x113
+#define	DA9063_AD_REG_MON_REG_3		0x114
+#define	DA9063_AD_REG_MON_REG_4		0x115
+#define	DA9063_AD_REG_MON_REG_5		0x116
+#define	DA9063_AD_REG_MON_REG_6		0x117
+#define	DA9063_AD_REG_TRIM_CLDR		0x118
+
+#define	DA9063_AD_REG_GP_ID_0		0x119
+#define	DA9063_AD_REG_GP_ID_1		0x11A
+#define	DA9063_AD_REG_GP_ID_2		0x11B
+#define	DA9063_AD_REG_GP_ID_3		0x11C
+#define	DA9063_AD_REG_GP_ID_4		0x11D
+#define	DA9063_AD_REG_GP_ID_5		0x11E
+#define	DA9063_AD_REG_GP_ID_6		0x11F
+#define	DA9063_AD_REG_GP_ID_7		0x120
+#define	DA9063_AD_REG_GP_ID_8		0x121
+#define	DA9063_AD_REG_GP_ID_9		0x122
+#define	DA9063_AD_REG_GP_ID_10		0x123
+#define	DA9063_AD_REG_GP_ID_11		0x124
+#define	DA9063_AD_REG_GP_ID_12		0x125
+#define	DA9063_AD_REG_GP_ID_13		0x126
+#define	DA9063_AD_REG_GP_ID_14		0x127
+#define	DA9063_AD_REG_GP_ID_15		0x128
+#define	DA9063_AD_REG_GP_ID_16		0x129
+#define	DA9063_AD_REG_GP_ID_17		0x12A
+#define	DA9063_AD_REG_GP_ID_18		0x12B
+#define	DA9063_AD_REG_GP_ID_19		0x12C
+
+#define	DA9063_BB_REG_CONFIG_M		0x112
+#define	DA9063_BB_REG_CONFIG_N		0x113
+
+#define	DA9063_BB_REG_MON_REG_1		0x114
+#define	DA9063_BB_REG_MON_REG_2		0x115
+#define	DA9063_BB_REG_MON_REG_3		0x116
+#define	DA9063_BB_REG_MON_REG_4		0x117
+#define	DA9063_BB_REG_MON_REG_5		0x11E
+#define	DA9063_BB_REG_MON_REG_6		0x11F
+#define	DA9063_BB_REG_TRIM_CLDR		0x120
 /* General Purpose Registers */
-#define	DA9063_REG_GP_ID_0		0x121
-#define	DA9063_REG_GP_ID_1		0x122
-#define	DA9063_REG_GP_ID_2		0x123
-#define	DA9063_REG_GP_ID_3		0x124
-#define	DA9063_REG_GP_ID_4		0x125
-#define	DA9063_REG_GP_ID_5		0x126
-#define	DA9063_REG_GP_ID_6		0x127
-#define	DA9063_REG_GP_ID_7		0x128
-#define	DA9063_REG_GP_ID_8		0x129
-#define	DA9063_REG_GP_ID_9		0x12A
-#define	DA9063_REG_GP_ID_10		0x12B
-#define	DA9063_REG_GP_ID_11		0x12C
-#define	DA9063_REG_GP_ID_12		0x12D
-#define	DA9063_REG_GP_ID_13		0x12E
-#define	DA9063_REG_GP_ID_14		0x12F
-#define	DA9063_REG_GP_ID_15		0x130
-#define	DA9063_REG_GP_ID_16		0x131
-#define	DA9063_REG_GP_ID_17		0x132
-#define	DA9063_REG_GP_ID_18		0x133
-#define	DA9063_REG_GP_ID_19		0x134
+#define	DA9063_BB_REG_GP_ID_0		0x121
+#define	DA9063_BB_REG_GP_ID_1		0x122
+#define	DA9063_BB_REG_GP_ID_2		0x123
+#define	DA9063_BB_REG_GP_ID_3		0x124
+#define	DA9063_BB_REG_GP_ID_4		0x125
+#define	DA9063_BB_REG_GP_ID_5		0x126
+#define	DA9063_BB_REG_GP_ID_6		0x127
+#define	DA9063_BB_REG_GP_ID_7		0x128
+#define	DA9063_BB_REG_GP_ID_8		0x129
+#define	DA9063_BB_REG_GP_ID_9		0x12A
+#define	DA9063_BB_REG_GP_ID_10		0x12B
+#define	DA9063_BB_REG_GP_ID_11		0x12C
+#define	DA9063_BB_REG_GP_ID_12		0x12D
+#define	DA9063_BB_REG_GP_ID_13		0x12E
+#define	DA9063_BB_REG_GP_ID_14		0x12F
+#define	DA9063_BB_REG_GP_ID_15		0x130
+#define	DA9063_BB_REG_GP_ID_16		0x131
+#define	DA9063_BB_REG_GP_ID_17		0x132
+#define	DA9063_BB_REG_GP_ID_18		0x133
+#define	DA9063_BB_REG_GP_ID_19		0x134
 
 /* Chip ID and variant */
 #define	DA9063_REG_CHIP_ID		0x181
@@ -404,10 +445,10 @@
 /* DA9063_REG_CONTROL_B (addr=0x0F) */
 #define	DA9063_CHG_SEL				0x01
 #define	DA9063_WATCHDOG_PD			0x02
-#define	DA9063_RESET_BLINKING			0x04
+#define	DA9063_BB_RESET_BLINKING		0x04
 #define	DA9063_NRES_MODE			0x08
 #define	DA9063_NONKEY_LOCK			0x10
-#define	DA9063_BUCK_SLOWSTART			0x80
+#define	DA9063_BB_BUCK_SLOWSTART		0x80
 
 /* DA9063_REG_CONTROL_C (addr=0x10) */
 #define	DA9063_DEBOUNCING_MASK			0x07
@@ -467,7 +508,7 @@
 #define	DA9063_GPADC_PAUSE			0x02
 #define	DA9063_PMIF_DIS				0x04
 #define	DA9063_HS2WIRE_DIS			0x08
-#define	DA9063_CLDR_PAUSE			0x10
+#define	DA9063_BB_CLDR_PAUSE			0x10
 #define	DA9063_BBAT_DIS				0x20
 #define	DA9063_OUT_32K_PAUSE			0x40
 #define	DA9063_PMCONT_DIS			0x80
@@ -844,7 +885,7 @@
 #define DA9063_MONITOR				0x40
 
 /* DA9063_REG_ALARM_S (addr=0x46) */
-#define DA9063_ALARM_S_MASK			0x3F
+#define DA9063_BB_ALARM_S_MASK			0x3F
 #define DA9063_ALARM_STATUS_ALARM		0x80
 #define DA9063_ALARM_STATUS_TICK		0x40
 /* DA9063_REG_ALARM_MI (addr=0x47) */
-- 
cgit v1.2.3


From 3d2108dae4e1768c06718cdce19f8f0089ce310e Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Date: Tue, 15 Jul 2014 11:21:46 +0100
Subject: mfd: wm5110: Add in the output done interrupts

wm5110 has interrupts to signal that an output has fully enabled. This
patch adds in these interrupts although use is not made of them yet.

Signed-off-by: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/wm5110-tables.c           |  18 +++++
 include/linux/mfd/arizona/core.h      |  10 ++-
 include/linux/mfd/arizona/registers.h | 120 ++++++++++++++++++++++++++++++++++
 3 files changed, 146 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/wm5110-tables.c b/drivers/mfd/wm5110-tables.c
index b26be6e5e3f4..4729007055d8 100644
--- a/drivers/mfd/wm5110-tables.c
+++ b/drivers/mfd/wm5110-tables.c
@@ -416,6 +416,24 @@ static const struct regmap_irq wm5110_irqs[ARIZONA_NUM_IRQ] = {
 	[ARIZONA_IRQ_ISRC2_CFG_ERR] = {
 		.reg_offset = 3, .mask = ARIZONA_ISRC2_CFG_ERR_EINT1
 	},
+	[ARIZONA_IRQ_HP3R_DONE] = {
+		.reg_offset = 3, .mask = ARIZONA_HP3R_DONE_EINT1
+	},
+	[ARIZONA_IRQ_HP3L_DONE] = {
+		.reg_offset = 3, .mask = ARIZONA_HP3L_DONE_EINT1
+	},
+	[ARIZONA_IRQ_HP2R_DONE] = {
+		.reg_offset = 3, .mask = ARIZONA_HP2R_DONE_EINT1
+	},
+	[ARIZONA_IRQ_HP2L_DONE] = {
+		.reg_offset = 3, .mask = ARIZONA_HP2L_DONE_EINT1
+	},
+	[ARIZONA_IRQ_HP1R_DONE] = {
+		.reg_offset = 3, .mask = ARIZONA_HP1R_DONE_EINT1
+	},
+	[ARIZONA_IRQ_HP1L_DONE] = {
+		.reg_offset = 3, .mask = ARIZONA_HP1L_DONE_EINT1
+	},
 
 	[ARIZONA_IRQ_BOOT_DONE] = {
 		.reg_offset = 4, .mask = ARIZONA_BOOT_DONE_EINT1
diff --git a/include/linux/mfd/arizona/core.h b/include/linux/mfd/arizona/core.h
index 70854d892760..6a62fc99f399 100644
--- a/include/linux/mfd/arizona/core.h
+++ b/include/linux/mfd/arizona/core.h
@@ -78,8 +78,14 @@ enum arizona_type {
 #define ARIZONA_IRQ_FLL1_CLOCK_OK         49
 #define ARIZONA_IRQ_MICD_CLAMP_RISE	  50
 #define ARIZONA_IRQ_MICD_CLAMP_FALL	  51
-
-#define ARIZONA_NUM_IRQ                   52
+#define ARIZONA_IRQ_HP3R_DONE             52
+#define ARIZONA_IRQ_HP3L_DONE             53
+#define ARIZONA_IRQ_HP2R_DONE             54
+#define ARIZONA_IRQ_HP2L_DONE             55
+#define ARIZONA_IRQ_HP1R_DONE             56
+#define ARIZONA_IRQ_HP1L_DONE             57
+
+#define ARIZONA_NUM_IRQ                   58
 
 struct snd_soc_dapm_context;
 
diff --git a/include/linux/mfd/arizona/registers.h b/include/linux/mfd/arizona/registers.h
index 7204d8138b24..df93563ea8c5 100644
--- a/include/linux/mfd/arizona/registers.h
+++ b/include/linux/mfd/arizona/registers.h
@@ -4795,6 +4795,30 @@
 #define ARIZONA_ISRC2_CFG_ERR_EINT1_MASK         0x0040  /* ISRC2_CFG_ERR_EINT1 */
 #define ARIZONA_ISRC2_CFG_ERR_EINT1_SHIFT             6  /* ISRC2_CFG_ERR_EINT1 */
 #define ARIZONA_ISRC2_CFG_ERR_EINT1_WIDTH             1  /* ISRC2_CFG_ERR_EINT1 */
+#define ARIZONA_HP3R_DONE_EINT1                  0x0020  /* HP3R_DONE_EINT1 */
+#define ARIZONA_HP3R_DONE_EINT1_MASK             0x0020  /* HP3R_DONE_EINT1 */
+#define ARIZONA_HP3R_DONE_EINT1_SHIFT                 5  /* HP3R_DONE_EINT1 */
+#define ARIZONA_HP3R_DONE_EINT1_WIDTH                 1  /* HP3R_DONE_EINT1 */
+#define ARIZONA_HP3L_DONE_EINT1                  0x0010  /* HP3L_DONE_EINT1 */
+#define ARIZONA_HP3L_DONE_EINT1_MASK             0x0010  /* HP3L_DONE_EINT1 */
+#define ARIZONA_HP3L_DONE_EINT1_SHIFT                 4  /* HP3L_DONE_EINT1 */
+#define ARIZONA_HP3L_DONE_EINT1_WIDTH                 1  /* HP3L_DONE_EINT1 */
+#define ARIZONA_HP2R_DONE_EINT1                  0x0008  /* HP2R_DONE_EINT1 */
+#define ARIZONA_HP2R_DONE_EINT1_MASK             0x0008  /* HP2R_DONE_EINT1 */
+#define ARIZONA_HP2R_DONE_EINT1_SHIFT                 3  /* HP2R_DONE_EINT1 */
+#define ARIZONA_HP2R_DONE_EINT1_WIDTH                 1  /* HP2R_DONE_EINT1 */
+#define ARIZONA_HP2L_DONE_EINT1                  0x0004  /* HP2L_DONE_EINT1 */
+#define ARIZONA_HP2L_DONE_EINT1_MASK             0x0004  /* HP2L_DONE_EINT1 */
+#define ARIZONA_HP2L_DONE_EINT1_SHIFT                 2  /* HP2L_DONE_EINT1 */
+#define ARIZONA_HP2L_DONE_EINT1_WIDTH                 1  /* HP2L_DONE_EINT1 */
+#define ARIZONA_HP1R_DONE_EINT1                  0x0002  /* HP1R_DONE_EINT1 */
+#define ARIZONA_HP1R_DONE_EINT1_MASK             0x0002  /* HP1R_DONE_EINT1 */
+#define ARIZONA_HP1R_DONE_EINT1_SHIFT                 1  /* HP1R_DONE_EINT1 */
+#define ARIZONA_HP1R_DONE_EINT1_WIDTH                 1  /* HP1R_DONE_EINT1 */
+#define ARIZONA_HP1L_DONE_EINT1                  0x0001  /* HP1L_DONE_EINT1 */
+#define ARIZONA_HP1L_DONE_EINT1_MASK             0x0001  /* HP1L_DONE_EINT1 */
+#define ARIZONA_HP1L_DONE_EINT1_SHIFT                 0  /* HP1L_DONE_EINT1 */
+#define ARIZONA_HP1L_DONE_EINT1_WIDTH                 1  /* HP1L_DONE_EINT1 */
 
 /*
  * R3332 (0xD04) - Interrupt Status 5
@@ -4963,6 +4987,30 @@
 #define ARIZONA_IM_ISRC2_CFG_ERR_EINT1_MASK      0x0040  /* IM_ISRC2_CFG_ERR_EINT1 */
 #define ARIZONA_IM_ISRC2_CFG_ERR_EINT1_SHIFT          6  /* IM_ISRC2_CFG_ERR_EINT1 */
 #define ARIZONA_IM_ISRC2_CFG_ERR_EINT1_WIDTH          1  /* IM_ISRC2_CFG_ERR_EINT1 */
+#define ARIZONA_IM_HP3R_DONE_EINT1               0x0020  /* IM_HP3R_DONE_EINT1 */
+#define ARIZONA_IM_HP3R_DONE_EINT1_MASK          0x0020  /* IM_HP3R_DONE_EINT1 */
+#define ARIZONA_IM_HP3R_DONE_EINT1_SHIFT              5  /* IM_HP3R_DONE_EINT1 */
+#define ARIZONA_IM_HP3R_DONE_EINT1_WIDTH              1  /* IM_HP3R_DONE_EINT1 */
+#define ARIZONA_IM_HP3L_DONE_EINT1               0x0010  /* IM_HP3L_DONE_EINT1 */
+#define ARIZONA_IM_HP3L_DONE_EINT1_MASK          0x0010  /* IM_HP3L_DONE_EINT1 */
+#define ARIZONA_IM_HP3L_DONE_EINT1_SHIFT              4  /* IM_HP3L_DONE_EINT1 */
+#define ARIZONA_IM_HP3L_DONE_EINT1_WIDTH              1  /* IM_HP3L_DONE_EINT1 */
+#define ARIZONA_IM_HP2R_DONE_EINT1               0x0008  /* IM_HP2R_DONE_EINT1 */
+#define ARIZONA_IM_HP2R_DONE_EINT1_MASK          0x0008  /* IM_HP2R_DONE_EINT1 */
+#define ARIZONA_IM_HP2R_DONE_EINT1_SHIFT              3  /* IM_HP2R_DONE_EINT1 */
+#define ARIZONA_IM_HP2R_DONE_EINT1_WIDTH              1  /* IM_HP2R_DONE_EINT1 */
+#define ARIZONA_IM_HP2L_DONE_EINT1               0x0004  /* IM_HP2L_DONE_EINT1 */
+#define ARIZONA_IM_HP2L_DONE_EINT1_MASK          0x0004  /* IM_HP2L_DONE_EINT1 */
+#define ARIZONA_IM_HP2L_DONE_EINT1_SHIFT              2  /* IM_HP2L_DONE_EINT1 */
+#define ARIZONA_IM_HP2L_DONE_EINT1_WIDTH              1  /* IM_HP2L_DONE_EINT1 */
+#define ARIZONA_IM_HP1R_DONE_EINT1               0x0002  /* IM_HP1R_DONE_EINT1 */
+#define ARIZONA_IM_HP1R_DONE_EINT1_MASK          0x0002  /* IM_HP1R_DONE_EINT1 */
+#define ARIZONA_IM_HP1R_DONE_EINT1_SHIFT              1  /* IM_HP1R_DONE_EINT1 */
+#define ARIZONA_IM_HP1R_DONE_EINT1_WIDTH              1  /* IM_HP1R_DONE_EINT1 */
+#define ARIZONA_IM_HP1L_DONE_EINT1               0x0001  /* IM_HP1L_DONE_EINT1 */
+#define ARIZONA_IM_HP1L_DONE_EINT1_MASK          0x0001  /* IM_HP1L_DONE_EINT1 */
+#define ARIZONA_IM_HP1L_DONE_EINT1_SHIFT              0  /* IM_HP1L_DONE_EINT1 */
+#define ARIZONA_IM_HP1L_DONE_EINT1_WIDTH              1  /* IM_HP1L_DONE_EINT1 */
 
 /*
  * R3340 (0xD0C) - Interrupt Status 5 Mask
@@ -5139,6 +5187,30 @@
 #define ARIZONA_ISRC2_CFG_ERR_EINT2_MASK         0x0040  /* ISRC2_CFG_ERR_EINT2 */
 #define ARIZONA_ISRC2_CFG_ERR_EINT2_SHIFT             6  /* ISRC2_CFG_ERR_EINT2 */
 #define ARIZONA_ISRC2_CFG_ERR_EINT2_WIDTH             1  /* ISRC2_CFG_ERR_EINT2 */
+#define ARIZONA_HP3R_DONE_EINT2                  0x0020  /* HP3R_DONE_EINT2 */
+#define ARIZONA_HP3R_DONE_EINT2_MASK             0x0020  /* HP3R_DONE_EINT2 */
+#define ARIZONA_HP3R_DONE_EINT2_SHIFT                 5  /* HP3R_DONE_EINT2 */
+#define ARIZONA_HP3R_DONE_EINT2_WIDTH                 1  /* HP3R_DONE_EINT2 */
+#define ARIZONA_HP3L_DONE_EINT2                  0x0010  /* HP3L_DONE_EINT2 */
+#define ARIZONA_HP3L_DONE_EINT2_MASK             0x0010  /* HP3L_DONE_EINT2 */
+#define ARIZONA_HP3L_DONE_EINT2_SHIFT                 4  /* HP3L_DONE_EINT2 */
+#define ARIZONA_HP3L_DONE_EINT2_WIDTH                 1  /* HP3L_DONE_EINT2 */
+#define ARIZONA_HP2R_DONE_EINT2                  0x0008  /* HP2R_DONE_EINT2 */
+#define ARIZONA_HP2R_DONE_EINT2_MASK             0x0008  /* HP2R_DONE_EINT2 */
+#define ARIZONA_HP2R_DONE_EINT2_SHIFT                 3  /* HP2R_DONE_EINT2 */
+#define ARIZONA_HP2R_DONE_EINT2_WIDTH                 1  /* HP2R_DONE_EINT2 */
+#define ARIZONA_HP2L_DONE_EINT2                  0x0004  /* HP2L_DONE_EINT2 */
+#define ARIZONA_HP2L_DONE_EINT2_MASK             0x0004  /* HP2L_DONE_EINT2 */
+#define ARIZONA_HP2L_DONE_EINT2_SHIFT                 2  /* HP2L_DONE_EINT2 */
+#define ARIZONA_HP2L_DONE_EINT2_WIDTH                 1  /* HP2L_DONE_EINT2 */
+#define ARIZONA_HP1R_DONE_EINT2                  0x0002  /* HP1R_DONE_EINT2 */
+#define ARIZONA_HP1R_DONE_EINT2_MASK             0x0002  /* HP1R_DONE_EINT2 */
+#define ARIZONA_HP1R_DONE_EINT2_SHIFT                 1  /* HP1R_DONE_EINT2 */
+#define ARIZONA_HP1R_DONE_EINT2_WIDTH                 1  /* HP1R_DONE_EINT2 */
+#define ARIZONA_HP1L_DONE_EINT2                  0x0001  /* HP1L_DONE_EINT2 */
+#define ARIZONA_HP1L_DONE_EINT2_MASK             0x0001  /* HP1L_DONE_EINT2 */
+#define ARIZONA_HP1L_DONE_EINT2_SHIFT                 0  /* HP1L_DONE_EINT2 */
+#define ARIZONA_HP1L_DONE_EINT2_WIDTH                 1  /* HP1L_DONE_EINT2 */
 
 /*
  * R3348 (0xD14) - IRQ2 Status 5
@@ -5307,6 +5379,30 @@
 #define ARIZONA_IM_ISRC2_CFG_ERR_EINT2_MASK      0x0040  /* IM_ISRC2_CFG_ERR_EINT2 */
 #define ARIZONA_IM_ISRC2_CFG_ERR_EINT2_SHIFT          6  /* IM_ISRC2_CFG_ERR_EINT2 */
 #define ARIZONA_IM_ISRC2_CFG_ERR_EINT2_WIDTH          1  /* IM_ISRC2_CFG_ERR_EINT2 */
+#define ARIZONA_IM_HP3R_DONE_EINT2               0x0020  /* IM_HP3R_DONE_EINT2 */
+#define ARIZONA_IM_HP3R_DONE_EINT2_MASK          0x0020  /* IM_HP3R_DONE_EINT2 */
+#define ARIZONA_IM_HP3R_DONE_EINT2_SHIFT              5  /* IM_HP3R_DONE_EINT2 */
+#define ARIZONA_IM_HP3R_DONE_EINT2_WIDTH              1  /* IM_HP3R_DONE_EINT2 */
+#define ARIZONA_IM_HP3L_DONE_EINT2               0x0010  /* IM_HP3L_DONE_EINT2 */
+#define ARIZONA_IM_HP3L_DONE_EINT2_MASK          0x0010  /* IM_HP3L_DONE_EINT2 */
+#define ARIZONA_IM_HP3L_DONE_EINT2_SHIFT              4  /* IM_HP3L_DONE_EINT2 */
+#define ARIZONA_IM_HP3L_DONE_EINT2_WIDTH              1  /* IM_HP3L_DONE_EINT2 */
+#define ARIZONA_IM_HP2R_DONE_EINT2               0x0008  /* IM_HP2R_DONE_EINT2 */
+#define ARIZONA_IM_HP2R_DONE_EINT2_MASK          0x0008  /* IM_HP2R_DONE_EINT2 */
+#define ARIZONA_IM_HP2R_DONE_EINT2_SHIFT              3  /* IM_HP2R_DONE_EINT2 */
+#define ARIZONA_IM_HP2R_DONE_EINT2_WIDTH              1  /* IM_HP2R_DONE_EINT2 */
+#define ARIZONA_IM_HP2L_DONE_EINT2               0x0004  /* IM_HP2L_DONE_EINT2 */
+#define ARIZONA_IM_HP2L_DONE_EINT2_MASK          0x0004  /* IM_HP2L_DONE_EINT2 */
+#define ARIZONA_IM_HP2L_DONE_EINT2_SHIFT              2  /* IM_HP2L_DONE_EINT2 */
+#define ARIZONA_IM_HP2L_DONE_EINT2_WIDTH              1  /* IM_HP2L_DONE_EINT2 */
+#define ARIZONA_IM_HP1R_DONE_EINT2               0x0002  /* IM_HP1R_DONE_EINT2 */
+#define ARIZONA_IM_HP1R_DONE_EINT2_MASK          0x0002  /* IM_HP1R_DONE_EINT2 */
+#define ARIZONA_IM_HP1R_DONE_EINT2_SHIFT              1  /* IM_HP1R_DONE_EINT2 */
+#define ARIZONA_IM_HP1R_DONE_EINT2_WIDTH              1  /* IM_HP1R_DONE_EINT2 */
+#define ARIZONA_IM_HP1L_DONE_EINT2               0x0001  /* IM_HP1L_DONE_EINT2 */
+#define ARIZONA_IM_HP1L_DONE_EINT2_MASK          0x0001  /* IM_HP1L_DONE_EINT2 */
+#define ARIZONA_IM_HP1L_DONE_EINT2_SHIFT              0  /* IM_HP1L_DONE_EINT2 */
+#define ARIZONA_IM_HP1L_DONE_EINT2_WIDTH              1  /* IM_HP1L_DONE_EINT2 */
 
 /*
  * R3356 (0xD1C) - IRQ2 Status 5 Mask
@@ -5464,6 +5560,30 @@
 #define ARIZONA_ISRC2_CFG_ERR_STS_MASK           0x0040  /* ISRC2_CFG_ERR_STS */
 #define ARIZONA_ISRC2_CFG_ERR_STS_SHIFT               6  /* ISRC2_CFG_ERR_STS */
 #define ARIZONA_ISRC2_CFG_ERR_STS_WIDTH               1  /* ISRC2_CFG_ERR_STS */
+#define ARIZONA_HP3R_DONE_STS                    0x0020  /* HP3R_DONE_STS */
+#define ARIZONA_HP3R_DONE_STS_MASK               0x0020  /* HP3R_DONE_STS */
+#define ARIZONA_HP3R_DONE_STS_SHIFT                   5  /* HP3R_DONE_STS */
+#define ARIZONA_HP3R_DONE_STS_WIDTH                   1  /* HP3R_DONE_STS */
+#define ARIZONA_HP3L_DONE_STS                    0x0010  /* HP3L_DONE_STS */
+#define ARIZONA_HP3L_DONE_STS_MASK               0x0010  /* HP3L_DONE_STS */
+#define ARIZONA_HP3L_DONE_STS_SHIFT                   4  /* HP3L_DONE_STS */
+#define ARIZONA_HP3L_DONE_STS_WIDTH                   1  /* HP3L_DONE_STS */
+#define ARIZONA_HP2R_DONE_STS                    0x0008  /* HP2R_DONE_STS */
+#define ARIZONA_HP2R_DONE_STS_MASK               0x0008  /* HP2R_DONE_STS */
+#define ARIZONA_HP2R_DONE_STS_SHIFT                   3  /* HP2R_DONE_STS */
+#define ARIZONA_HP2R_DONE_STS_WIDTH                   1  /* HP2R_DONE_STS */
+#define ARIZONA_HP2L_DONE_STS                    0x0004  /* HP2L_DONE_STS */
+#define ARIZONA_HP2L_DONE_STS_MASK               0x0004  /* HP2L_DONE_STS */
+#define ARIZONA_HP2L_DONE_STS_SHIFT                   2  /* HP2L_DONE_STS */
+#define ARIZONA_HP2L_DONE_STS_WIDTH                   1  /* HP2L_DONE_STS */
+#define ARIZONA_HP1R_DONE_STS                    0x0002  /* HP1R_DONE_STS */
+#define ARIZONA_HP1R_DONE_STS_MASK               0x0002  /* HP1R_DONE_STS */
+#define ARIZONA_HP1R_DONE_STS_SHIFT                   1  /* HP1R_DONE_STS */
+#define ARIZONA_HP1R_DONE_STS_WIDTH                   1  /* HP1R_DONE_STS */
+#define ARIZONA_HP1L_DONE_STS                    0x0001  /* HP1L_DONE_STS */
+#define ARIZONA_HP1L_DONE_STS_MASK               0x0001  /* HP1L_DONE_STS */
+#define ARIZONA_HP1L_DONE_STS_SHIFT                   0  /* HP1L_DONE_STS */
+#define ARIZONA_HP1L_DONE_STS_WIDTH                   1  /* HP1L_DONE_STS */
 
 /*
  * R3363 (0xD23) - Interrupt Raw Status 5
-- 
cgit v1.2.3


From c0fe2c5b3f730e3d56d37f7b731a5b1191a4e8bf Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Date: Tue, 15 Jul 2014 11:21:47 +0100
Subject: mfd: arizona: Rename thermal shutdown interrupt

Newer versions of the IP introduce short circuit protection which will
also shutdown the speaker. Rename the interrupt and associated register
bits associated with thermal events to better fit the function and avoid
conflict with future interrupt additions.

Signed-off-by: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/wm5102-tables.c           |  8 ++--
 drivers/mfd/wm5110-tables.c           |  8 ++--
 drivers/mfd/wm8997-tables.c           |  8 ++--
 include/linux/mfd/arizona/core.h      |  4 +-
 include/linux/mfd/arizona/registers.h | 80 +++++++++++++++++------------------
 sound/soc/codecs/arizona.c            | 10 ++---
 6 files changed, 59 insertions(+), 59 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/wm5102-tables.c b/drivers/mfd/wm5102-tables.c
index c8a993bd17ae..fb4d4bb0f47d 100644
--- a/drivers/mfd/wm5102-tables.c
+++ b/drivers/mfd/wm5102-tables.c
@@ -138,11 +138,11 @@ static const struct regmap_irq wm5102_irqs[ARIZONA_NUM_IRQ] = {
 		.reg_offset = 1, .mask = ARIZONA_DSP_IRQ1_EINT1
 	},
 
-	[ARIZONA_IRQ_SPK_SHUTDOWN_WARN] = {
-		.reg_offset = 2, .mask = ARIZONA_SPK_SHUTDOWN_WARN_EINT1
+	[ARIZONA_IRQ_SPK_OVERHEAT_WARN] = {
+		.reg_offset = 2, .mask = ARIZONA_SPK_OVERHEAT_WARN_EINT1
 	},
-	[ARIZONA_IRQ_SPK_SHUTDOWN] = {
-		.reg_offset = 2, .mask = ARIZONA_SPK_SHUTDOWN_EINT1
+	[ARIZONA_IRQ_SPK_OVERHEAT] = {
+		.reg_offset = 2, .mask = ARIZONA_SPK_OVERHEAT_EINT1
 	},
 	[ARIZONA_IRQ_HPDET] = {
 		.reg_offset = 2, .mask = ARIZONA_HPDET_EINT1
diff --git a/drivers/mfd/wm5110-tables.c b/drivers/mfd/wm5110-tables.c
index 4729007055d8..2822768f2df1 100644
--- a/drivers/mfd/wm5110-tables.c
+++ b/drivers/mfd/wm5110-tables.c
@@ -340,11 +340,11 @@ static const struct regmap_irq wm5110_irqs[ARIZONA_NUM_IRQ] = {
 		.reg_offset = 1, .mask = ARIZONA_DSP_IRQ1_EINT1
 	},
 
-	[ARIZONA_IRQ_SPK_SHUTDOWN_WARN] = {
-		.reg_offset = 2, .mask = ARIZONA_SPK_SHUTDOWN_WARN_EINT1
+	[ARIZONA_IRQ_SPK_OVERHEAT_WARN] = {
+		.reg_offset = 2, .mask = ARIZONA_SPK_OVERHEAT_WARN_EINT1
 	},
-	[ARIZONA_IRQ_SPK_SHUTDOWN] = {
-		.reg_offset = 2, .mask = ARIZONA_SPK_SHUTDOWN_EINT1
+	[ARIZONA_IRQ_SPK_OVERHEAT] = {
+		.reg_offset = 2, .mask = ARIZONA_SPK_OVERHEAT_EINT1
 	},
 	[ARIZONA_IRQ_HPDET] = {
 		.reg_offset = 2, .mask = ARIZONA_HPDET_EINT1
diff --git a/drivers/mfd/wm8997-tables.c b/drivers/mfd/wm8997-tables.c
index 04bc6d5ff285..510da3b52324 100644
--- a/drivers/mfd/wm8997-tables.c
+++ b/drivers/mfd/wm8997-tables.c
@@ -65,11 +65,11 @@ static const struct regmap_irq wm8997_irqs[ARIZONA_NUM_IRQ] = {
 	[ARIZONA_IRQ_GP2] = { .reg_offset = 0, .mask = ARIZONA_GP2_EINT1 },
 	[ARIZONA_IRQ_GP1] = { .reg_offset = 0, .mask = ARIZONA_GP1_EINT1 },
 
-	[ARIZONA_IRQ_SPK_SHUTDOWN_WARN] = {
-		.reg_offset = 2, .mask = ARIZONA_SPK_SHUTDOWN_WARN_EINT1
+	[ARIZONA_IRQ_SPK_OVERHEAT_WARN] = {
+		.reg_offset = 2, .mask = ARIZONA_SPK_OVERHEAT_WARN_EINT1
 	},
-	[ARIZONA_IRQ_SPK_SHUTDOWN] = {
-		.reg_offset = 2, .mask = ARIZONA_SPK_SHUTDOWN_EINT1
+	[ARIZONA_IRQ_SPK_OVERHEAT] = {
+		.reg_offset = 2, .mask = ARIZONA_SPK_OVERHEAT_EINT1
 	},
 	[ARIZONA_IRQ_HPDET] = {
 		.reg_offset = 2, .mask = ARIZONA_HPDET_EINT1
diff --git a/include/linux/mfd/arizona/core.h b/include/linux/mfd/arizona/core.h
index 6a62fc99f399..819edf5d1edf 100644
--- a/include/linux/mfd/arizona/core.h
+++ b/include/linux/mfd/arizona/core.h
@@ -46,8 +46,8 @@ enum arizona_type {
 #define ARIZONA_IRQ_DSP_IRQ6              17
 #define ARIZONA_IRQ_DSP_IRQ7              18
 #define ARIZONA_IRQ_DSP_IRQ8              19
-#define ARIZONA_IRQ_SPK_SHUTDOWN_WARN     20
-#define ARIZONA_IRQ_SPK_SHUTDOWN          21
+#define ARIZONA_IRQ_SPK_OVERHEAT_WARN     20
+#define ARIZONA_IRQ_SPK_OVERHEAT          21
 #define ARIZONA_IRQ_MICDET                22
 #define ARIZONA_IRQ_HPDET                 23
 #define ARIZONA_IRQ_WSEQ_DONE             24
diff --git a/include/linux/mfd/arizona/registers.h b/include/linux/mfd/arizona/registers.h
index df93563ea8c5..f7d6f9e91da1 100644
--- a/include/linux/mfd/arizona/registers.h
+++ b/include/linux/mfd/arizona/registers.h
@@ -4691,14 +4691,14 @@
 /*
  * R3330 (0xD02) - Interrupt Status 3
  */
-#define ARIZONA_SPK_SHUTDOWN_WARN_EINT1          0x8000  /* SPK_SHUTDOWN_WARN_EINT1 */
-#define ARIZONA_SPK_SHUTDOWN_WARN_EINT1_MASK     0x8000  /* SPK_SHUTDOWN_WARN_EINT1 */
-#define ARIZONA_SPK_SHUTDOWN_WARN_EINT1_SHIFT        15  /* SPK_SHUTDOWN_WARN_EINT1 */
-#define ARIZONA_SPK_SHUTDOWN_WARN_EINT1_WIDTH         1  /* SPK_SHUTDOWN_WARN_EINT1 */
-#define ARIZONA_SPK_SHUTDOWN_EINT1               0x4000  /* SPK_SHUTDOWN_EINT1 */
-#define ARIZONA_SPK_SHUTDOWN_EINT1_MASK          0x4000  /* SPK_SHUTDOWN_EINT1 */
-#define ARIZONA_SPK_SHUTDOWN_EINT1_SHIFT             14  /* SPK_SHUTDOWN_EINT1 */
-#define ARIZONA_SPK_SHUTDOWN_EINT1_WIDTH              1  /* SPK_SHUTDOWN_EINT1 */
+#define ARIZONA_SPK_OVERHEAT_WARN_EINT1          0x8000  /* SPK_OVERHEAT_WARN_EINT1 */
+#define ARIZONA_SPK_OVERHEAT_WARN_EINT1_MASK     0x8000  /* SPK_OVERHEAD_WARN_EINT1 */
+#define ARIZONA_SPK_OVERHEAT_WARN_EINT1_SHIFT        15  /* SPK_OVERHEAT_WARN_EINT1 */
+#define ARIZONA_SPK_OVERHEAT_WARN_EINT1_WIDTH         1  /* SPK_OVERHEAT_WARN_EINT1 */
+#define ARIZONA_SPK_OVERHEAT_EINT1               0x4000  /* SPK_OVERHEAT_EINT1 */
+#define ARIZONA_SPK_OVERHEAT_EINT1_MASK          0x4000  /* SPK_OVERHEAT_EINT1 */
+#define ARIZONA_SPK_OVERHEAT_EINT1_SHIFT             14  /* SPK_OVERHEAT_EINT1 */
+#define ARIZONA_SPK_OVERHEAT_EINT1_WIDTH              1  /* SPK_OVERHEAT_EINT1 */
 #define ARIZONA_HPDET_EINT1                      0x2000  /* HPDET_EINT1 */
 #define ARIZONA_HPDET_EINT1_MASK                 0x2000  /* HPDET_EINT1 */
 #define ARIZONA_HPDET_EINT1_SHIFT                    13  /* HPDET_EINT1 */
@@ -4883,14 +4883,14 @@
 /*
  * R3338 (0xD0A) - Interrupt Status 3 Mask
  */
-#define ARIZONA_IM_SPK_SHUTDOWN_WARN_EINT1       0x8000  /* IM_SPK_SHUTDOWN_WARN_EINT1 */
-#define ARIZONA_IM_SPK_SHUTDOWN_WARN_EINT1_MASK  0x8000  /* IM_SPK_SHUTDOWN_WARN_EINT1 */
-#define ARIZONA_IM_SPK_SHUTDOWN_WARN_EINT1_SHIFT     15  /* IM_SPK_SHUTDOWN_WARN_EINT1 */
-#define ARIZONA_IM_SPK_SHUTDOWN_WARN_EINT1_WIDTH      1  /* IM_SPK_SHUTDOWN_WARN_EINT1 */
-#define ARIZONA_IM_SPK_SHUTDOWN_EINT1            0x4000  /* IM_SPK_SHUTDOWN_EINT1 */
-#define ARIZONA_IM_SPK_SHUTDOWN_EINT1_MASK       0x4000  /* IM_SPK_SHUTDOWN_EINT1 */
-#define ARIZONA_IM_SPK_SHUTDOWN_EINT1_SHIFT          14  /* IM_SPK_SHUTDOWN_EINT1 */
-#define ARIZONA_IM_SPK_SHUTDOWN_EINT1_WIDTH           1  /* IM_SPK_SHUTDOWN_EINT1 */
+#define ARIZONA_IM_SPK_OVERHEAT_WARN_EINT1       0x8000  /* IM_SPK_OVERHEAT_WARN_EINT1 */
+#define ARIZONA_IM_SPK_OVERHEAT_WARN_EINT1_MASK  0x8000  /* IM_SPK_OVERHEAT_WARN_EINT1 */
+#define ARIZONA_IM_SPK_OVERHEAT_WARN_EINT1_SHIFT     15  /* IM_SPK_OVERHEAT_WARN_EINT1 */
+#define ARIZONA_IM_SPK_OVERHEAT_WARN_EINT1_WIDTH      1  /* IM_SPK_OVERHEAT_WARN_EINT1 */
+#define ARIZONA_IM_SPK_OVERHEAT_EINT1            0x4000  /* IM_SPK_OVERHEAT_EINT1 */
+#define ARIZONA_IM_SPK_OVERHEAT_EINT1_MASK       0x4000  /* IM_SPK_OVERHEAT_EINT1 */
+#define ARIZONA_IM_SPK_OVERHEAT_EINT1_SHIFT          14  /* IM_SPK_OVERHEAT_EINT1 */
+#define ARIZONA_IM_SPK_OVERHEAT_EINT1_WIDTH           1  /* IM_SPK_OVERHEAT_EINT1 */
 #define ARIZONA_IM_HPDET_EINT1                   0x2000  /* IM_HPDET_EINT1 */
 #define ARIZONA_IM_HPDET_EINT1_MASK              0x2000  /* IM_HPDET_EINT1 */
 #define ARIZONA_IM_HPDET_EINT1_SHIFT                 13  /* IM_HPDET_EINT1 */
@@ -5083,14 +5083,14 @@
 /*
  * R3346 (0xD12) - IRQ2 Status 3
  */
-#define ARIZONA_SPK_SHUTDOWN_WARN_EINT2          0x8000  /* SPK_SHUTDOWN_WARN_EINT2 */
-#define ARIZONA_SPK_SHUTDOWN_WARN_EINT2_MASK     0x8000  /* SPK_SHUTDOWN_WARN_EINT2 */
-#define ARIZONA_SPK_SHUTDOWN_WARN_EINT2_SHIFT        15  /* SPK_SHUTDOWN_WARN_EINT2 */
-#define ARIZONA_SPK_SHUTDOWN_WARN_EINT2_WIDTH         1  /* SPK_SHUTDOWN_WARN_EINT2 */
-#define ARIZONA_SPK_SHUTDOWN_EINT2               0x4000  /* SPK_SHUTDOWN_EINT2 */
-#define ARIZONA_SPK_SHUTDOWN_EINT2_MASK          0x4000  /* SPK_SHUTDOWN_EINT2 */
-#define ARIZONA_SPK_SHUTDOWN_EINT2_SHIFT             14  /* SPK_SHUTDOWN_EINT2 */
-#define ARIZONA_SPK_SHUTDOWN_EINT2_WIDTH              1  /* SPK_SHUTDOWN_EINT2 */
+#define ARIZONA_SPK_OVERHEAT_WARN_EINT2          0x8000  /* SPK_OVERHEAT_WARN_EINT2 */
+#define ARIZONA_SPK_OVERHEAT_WARN_EINT2_MASK     0x8000  /* SPK_OVERHEAT_WARN_EINT2 */
+#define ARIZONA_SPK_OVERHEAT_WARN_EINT2_SHIFT        15  /* SPK_OVERHEAT_WARN_EINT2 */
+#define ARIZONA_SPK_OVERHEAT_WARN_EINT2_WIDTH         1  /* SPK_OVERHEAT_WARN_EINT2 */
+#define ARIZONA_SPK_OVERHEAT_EINT2               0x4000  /* SPK_OVERHEAT_EINT2 */
+#define ARIZONA_SPK_OVERHEAT_EINT2_MASK          0x4000  /* SPK_OVERHEAT_EINT2 */
+#define ARIZONA_SPK_OVERHEAT_EINT2_SHIFT             14  /* SPK_OVERHEAT_EINT2 */
+#define ARIZONA_SPK_OVERHEAT_EINT2_WIDTH              1  /* SPK_OVERHEAT_EINT2 */
 #define ARIZONA_HPDET_EINT2                      0x2000  /* HPDET_EINT2 */
 #define ARIZONA_HPDET_EINT2_MASK                 0x2000  /* HPDET_EINT2 */
 #define ARIZONA_HPDET_EINT2_SHIFT                    13  /* HPDET_EINT2 */
@@ -5275,14 +5275,14 @@
 /*
  * R3354 (0xD1A) - IRQ2 Status 3 Mask
  */
-#define ARIZONA_IM_SPK_SHUTDOWN_WARN_EINT2       0x8000  /* IM_SPK_SHUTDOWN_WARN_EINT2 */
-#define ARIZONA_IM_SPK_SHUTDOWN_WARN_EINT2_MASK  0x8000  /* IM_SPK_SHUTDOWN_WARN_EINT2 */
-#define ARIZONA_IM_SPK_SHUTDOWN_WARN_EINT2_SHIFT     15  /* IM_SPK_SHUTDOWN_WARN_EINT2 */
-#define ARIZONA_IM_SPK_SHUTDOWN_WARN_EINT2_WIDTH      1  /* IM_SPK_SHUTDOWN_WARN_EINT2 */
-#define ARIZONA_IM_SPK_SHUTDOWN_EINT2            0x4000  /* IM_SPK_SHUTDOWN_EINT2 */
-#define ARIZONA_IM_SPK_SHUTDOWN_EINT2_MASK       0x4000  /* IM_SPK_SHUTDOWN_EINT2 */
-#define ARIZONA_IM_SPK_SHUTDOWN_EINT2_SHIFT          14  /* IM_SPK_SHUTDOWN_EINT2 */
-#define ARIZONA_IM_SPK_SHUTDOWN_EINT2_WIDTH           1  /* IM_SPK_SHUTDOWN_EINT2 */
+#define ARIZONA_IM_SPK_OVERHEAT_WARN_EINT2       0x8000  /* IM_SPK_OVERHEAT_WARN_EINT2 */
+#define ARIZONA_IM_SPK_OVERHEAT_WARN_EINT2_MASK  0x8000  /* IM_SPK_OVERHEAT_WARN_EINT2 */
+#define ARIZONA_IM_SPK_OVERHEAT_WARN_EINT2_SHIFT     15  /* IM_SPK_OVERHEAT_WARN_EINT2 */
+#define ARIZONA_IM_SPK_OVERHEAT_WARN_EINT2_WIDTH      1  /* IM_SPK_OVERHEAT_WARN_EINT2 */
+#define ARIZONA_IM_SPK_OVERHEAT_EINT2            0x4000  /* IM_SPK_OVERHEAT_EINT2 */
+#define ARIZONA_IM_SPK_OVERHEAT_EINT2_MASK       0x4000  /* IM_SPK_OVERHEAT_EINT2 */
+#define ARIZONA_IM_SPK_OVERHEAT_EINT2_SHIFT          14  /* IM_SPK_OVERHEAT_EINT2 */
+#define ARIZONA_IM_SPK_OVERHEAT_EINT2_WIDTH           1  /* IM_SPK_OVERHEAT_EINT2 */
 #define ARIZONA_IM_HPDET_EINT2                   0x2000  /* IM_HPDET_EINT2 */
 #define ARIZONA_IM_HPDET_EINT2_MASK              0x2000  /* IM_HPDET_EINT2 */
 #define ARIZONA_IM_HPDET_EINT2_SHIFT                 13  /* IM_HPDET_EINT2 */
@@ -5456,14 +5456,14 @@
 /*
  * R3361 (0xD21) - Interrupt Raw Status 3
  */
-#define ARIZONA_SPK_SHUTDOWN_WARN_STS            0x8000  /* SPK_SHUTDOWN_WARN_STS */
-#define ARIZONA_SPK_SHUTDOWN_WARN_STS_MASK       0x8000  /* SPK_SHUTDOWN_WARN_STS */
-#define ARIZONA_SPK_SHUTDOWN_WARN_STS_SHIFT          15  /* SPK_SHUTDOWN_WARN_STS */
-#define ARIZONA_SPK_SHUTDOWN_WARN_STS_WIDTH           1  /* SPK_SHUTDOWN_WARN_STS */
-#define ARIZONA_SPK_SHUTDOWN_STS                 0x4000  /* SPK_SHUTDOWN_STS */
-#define ARIZONA_SPK_SHUTDOWN_STS_MASK            0x4000  /* SPK_SHUTDOWN_STS */
-#define ARIZONA_SPK_SHUTDOWN_STS_SHIFT               14  /* SPK_SHUTDOWN_STS */
-#define ARIZONA_SPK_SHUTDOWN_STS_WIDTH                1  /* SPK_SHUTDOWN_STS */
+#define ARIZONA_SPK_OVERHEAT_WARN_STS            0x8000  /* SPK_OVERHEAT_WARN_STS */
+#define ARIZONA_SPK_OVERHEAT_WARN_STS_MASK       0x8000  /* SPK_OVERHEAT_WARN_STS */
+#define ARIZONA_SPK_OVERHEAT_WARN_STS_SHIFT          15  /* SPK_OVERHEAT_WARN_STS */
+#define ARIZONA_SPK_OVERHEAT_WARN_STS_WIDTH           1  /* SPK_OVERHEAT_WARN_STS */
+#define ARIZONA_SPK_OVERHEAT_STS                 0x4000  /* SPK_OVERHEAT_STS */
+#define ARIZONA_SPK_OVERHEAT_STS_MASK            0x4000  /* SPK_OVERHEAT_STS */
+#define ARIZONA_SPK_OVERHEAT_STS_SHIFT               14  /* SPK_OVERHEAT_STS */
+#define ARIZONA_SPK_OVERHEAT_STS_WIDTH                1  /* SPK_OVERHEAT_STS */
 #define ARIZONA_HPDET_STS                        0x2000  /* HPDET_STS */
 #define ARIZONA_HPDET_STS_MASK                   0x2000  /* HPDET_STS */
 #define ARIZONA_HPDET_STS_SHIFT                      13  /* HPDET_STS */
diff --git a/sound/soc/codecs/arizona.c b/sound/soc/codecs/arizona.c
index 29e198f57d4c..1b14105d8da3 100644
--- a/sound/soc/codecs/arizona.c
+++ b/sound/soc/codecs/arizona.c
@@ -107,7 +107,7 @@ static int arizona_spk_ev(struct snd_soc_dapm_widget *w,
 		break;
 	case SND_SOC_DAPM_POST_PMU:
 		val = snd_soc_read(codec, ARIZONA_INTERRUPT_RAW_STATUS_3);
-		if (val & ARIZONA_SPK_SHUTDOWN_STS) {
+		if (val & ARIZONA_SPK_OVERHEAT_STS) {
 			dev_crit(arizona->dev,
 				 "Speaker not enabled due to temperature\n");
 			return -EBUSY;
@@ -159,7 +159,7 @@ static irqreturn_t arizona_thermal_warn(int irq, void *data)
 	if (ret != 0) {
 		dev_err(arizona->dev, "Failed to read thermal status: %d\n",
 			ret);
-	} else if (val & ARIZONA_SPK_SHUTDOWN_WARN_STS) {
+	} else if (val & ARIZONA_SPK_OVERHEAT_WARN_STS) {
 		dev_crit(arizona->dev, "Thermal warning\n");
 	}
 
@@ -177,7 +177,7 @@ static irqreturn_t arizona_thermal_shutdown(int irq, void *data)
 	if (ret != 0) {
 		dev_err(arizona->dev, "Failed to read thermal status: %d\n",
 			ret);
-	} else if (val & ARIZONA_SPK_SHUTDOWN_STS) {
+	} else if (val & ARIZONA_SPK_OVERHEAT_STS) {
 		dev_crit(arizona->dev, "Thermal shutdown\n");
 		ret = regmap_update_bits(arizona->regmap,
 					 ARIZONA_OUTPUT_ENABLES_1,
@@ -223,7 +223,7 @@ int arizona_init_spk(struct snd_soc_codec *codec)
 		break;
 	}
 
-	ret = arizona_request_irq(arizona, ARIZONA_IRQ_SPK_SHUTDOWN_WARN,
+	ret = arizona_request_irq(arizona, ARIZONA_IRQ_SPK_OVERHEAT_WARN,
 				  "Thermal warning", arizona_thermal_warn,
 				  arizona);
 	if (ret != 0)
@@ -231,7 +231,7 @@ int arizona_init_spk(struct snd_soc_codec *codec)
 			"Failed to get thermal warning IRQ: %d\n",
 			ret);
 
-	ret = arizona_request_irq(arizona, ARIZONA_IRQ_SPK_SHUTDOWN,
+	ret = arizona_request_irq(arizona, ARIZONA_IRQ_SPK_OVERHEAT,
 				  "Thermal shutdown", arizona_thermal_shutdown,
 				  arizona);
 	if (ret != 0)
-- 
cgit v1.2.3


From 3215501fc90e109c7b854423e02eb05bc638b555 Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Date: Tue, 15 Jul 2014 11:21:48 +0100
Subject: mfd: wm5110: Add new interrupt register definitions

Newer versions of the IP have a lot of new interrupts and move several
existing interrupts. This patch adds the register definitions and regmap
hookup for these interrupts.

Signed-off-by: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/arizona-irq.c             |  10 +-
 drivers/mfd/arizona.h                 |   1 +
 drivers/mfd/wm5110-tables.c           | 213 +++++++++++++
 include/linux/mfd/arizona/core.h      |  21 +-
 include/linux/mfd/arizona/registers.h | 585 ++++++++++++++++++++++++++++++++++
 5 files changed, 827 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/arizona-irq.c b/drivers/mfd/arizona-irq.c
index 17102f589100..e780bc40165d 100644
--- a/drivers/mfd/arizona-irq.c
+++ b/drivers/mfd/arizona-irq.c
@@ -203,7 +203,15 @@ int arizona_irq_init(struct arizona *arizona)
 #ifdef CONFIG_MFD_WM5110
 	case WM5110:
 		aod = &wm5110_aod;
-		irq = &wm5110_irq;
+
+		switch (arizona->rev) {
+		case 0 ... 2:
+			irq = &wm5110_irq;
+			break;
+		default:
+			irq = &wm5110_revd_irq;
+			break;
+		}
 
 		ctrlif_error = false;
 		break;
diff --git a/drivers/mfd/arizona.h b/drivers/mfd/arizona.h
index 2951498ab9a1..fbe2843271c5 100644
--- a/drivers/mfd/arizona.h
+++ b/drivers/mfd/arizona.h
@@ -36,6 +36,7 @@ extern const struct regmap_irq_chip wm5102_irq;
 
 extern const struct regmap_irq_chip wm5110_aod;
 extern const struct regmap_irq_chip wm5110_irq;
+extern const struct regmap_irq_chip wm5110_revd_irq;
 
 extern const struct regmap_irq_chip wm8997_aod;
 extern const struct regmap_irq_chip wm8997_irq;
diff --git a/drivers/mfd/wm5110-tables.c b/drivers/mfd/wm5110-tables.c
index 2822768f2df1..9b98ee559188 100644
--- a/drivers/mfd/wm5110-tables.c
+++ b/drivers/mfd/wm5110-tables.c
@@ -457,6 +457,209 @@ const struct regmap_irq_chip wm5110_irq = {
 };
 EXPORT_SYMBOL_GPL(wm5110_irq);
 
+static const struct regmap_irq wm5110_revd_irqs[ARIZONA_NUM_IRQ] = {
+	[ARIZONA_IRQ_GP4] = { .reg_offset = 0, .mask = ARIZONA_GP4_EINT1 },
+	[ARIZONA_IRQ_GP3] = { .reg_offset = 0, .mask = ARIZONA_GP3_EINT1 },
+	[ARIZONA_IRQ_GP2] = { .reg_offset = 0, .mask = ARIZONA_GP2_EINT1 },
+	[ARIZONA_IRQ_GP1] = { .reg_offset = 0, .mask = ARIZONA_GP1_EINT1 },
+
+	[ARIZONA_IRQ_DSP4_RAM_RDY] = {
+		.reg_offset = 1, .mask = ARIZONA_DSP4_RAM_RDY_EINT1
+	},
+	[ARIZONA_IRQ_DSP3_RAM_RDY] = {
+		.reg_offset = 1, .mask = ARIZONA_DSP3_RAM_RDY_EINT1
+	},
+	[ARIZONA_IRQ_DSP2_RAM_RDY] = {
+		.reg_offset = 1, .mask = ARIZONA_DSP2_RAM_RDY_EINT1
+	},
+	[ARIZONA_IRQ_DSP1_RAM_RDY] = {
+		.reg_offset = 1, .mask = ARIZONA_DSP1_RAM_RDY_EINT1
+	},
+	[ARIZONA_IRQ_DSP_IRQ8] = {
+		.reg_offset = 1, .mask = ARIZONA_DSP_IRQ8_EINT1
+	},
+	[ARIZONA_IRQ_DSP_IRQ7] = {
+		.reg_offset = 1, .mask = ARIZONA_DSP_IRQ7_EINT1
+	},
+	[ARIZONA_IRQ_DSP_IRQ6] = {
+		.reg_offset = 1, .mask = ARIZONA_DSP_IRQ6_EINT1
+	},
+	[ARIZONA_IRQ_DSP_IRQ5] = {
+		.reg_offset = 1, .mask = ARIZONA_DSP_IRQ5_EINT1
+	},
+	[ARIZONA_IRQ_DSP_IRQ4] = {
+		.reg_offset = 1, .mask = ARIZONA_DSP_IRQ4_EINT1
+	},
+	[ARIZONA_IRQ_DSP_IRQ3] = {
+		.reg_offset = 1, .mask = ARIZONA_DSP_IRQ3_EINT1
+	},
+	[ARIZONA_IRQ_DSP_IRQ2] = {
+		.reg_offset = 1, .mask = ARIZONA_DSP_IRQ2_EINT1
+	},
+	[ARIZONA_IRQ_DSP_IRQ1] = {
+		.reg_offset = 1, .mask = ARIZONA_DSP_IRQ1_EINT1
+	},
+
+	[ARIZONA_IRQ_SPK_OVERHEAT_WARN] = {
+		.reg_offset = 2, .mask = ARIZONA_SPK_OVERHEAT_WARN_EINT1
+	},
+	[ARIZONA_IRQ_SPK_OVERHEAT] = {
+		.reg_offset = 2, .mask = ARIZONA_SPK_OVERHEAT_EINT1
+	},
+	[ARIZONA_IRQ_HPDET] = {
+		.reg_offset = 2, .mask = ARIZONA_HPDET_EINT1
+	},
+	[ARIZONA_IRQ_MICDET] = {
+		.reg_offset = 2, .mask = ARIZONA_MICDET_EINT1
+	},
+	[ARIZONA_IRQ_WSEQ_DONE] = {
+		.reg_offset = 2, .mask = ARIZONA_WSEQ_DONE_EINT1
+	},
+	[ARIZONA_IRQ_DRC2_SIG_DET] = {
+		.reg_offset = 2, .mask = ARIZONA_DRC2_SIG_DET_EINT1
+	},
+	[ARIZONA_IRQ_DRC1_SIG_DET] = {
+		.reg_offset = 2, .mask = ARIZONA_DRC1_SIG_DET_EINT1
+	},
+	[ARIZONA_IRQ_ASRC2_LOCK] = {
+		.reg_offset = 2, .mask = ARIZONA_ASRC2_LOCK_EINT1
+	},
+	[ARIZONA_IRQ_ASRC1_LOCK] = {
+		.reg_offset = 2, .mask = ARIZONA_ASRC1_LOCK_EINT1
+	},
+	[ARIZONA_IRQ_UNDERCLOCKED] = {
+		.reg_offset = 2, .mask = ARIZONA_UNDERCLOCKED_EINT1
+	},
+	[ARIZONA_IRQ_OVERCLOCKED] = {
+		.reg_offset = 2, .mask = ARIZONA_OVERCLOCKED_EINT1
+	},
+	[ARIZONA_IRQ_FLL2_LOCK] = {
+		.reg_offset = 2, .mask = ARIZONA_FLL2_LOCK_EINT1
+	},
+	[ARIZONA_IRQ_FLL1_LOCK] = {
+		.reg_offset = 2, .mask = ARIZONA_FLL1_LOCK_EINT1
+	},
+	[ARIZONA_IRQ_CLKGEN_ERR] = {
+		.reg_offset = 2, .mask = ARIZONA_CLKGEN_ERR_EINT1
+	},
+	[ARIZONA_IRQ_CLKGEN_ERR_ASYNC] = {
+		.reg_offset = 2, .mask = ARIZONA_CLKGEN_ERR_ASYNC_EINT1
+	},
+
+	[ARIZONA_IRQ_CTRLIF_ERR] = {
+		.reg_offset = 3, .mask = ARIZONA_V2_CTRLIF_ERR_EINT1
+	},
+	[ARIZONA_IRQ_MIXER_DROPPED_SAMPLES] = {
+		.reg_offset = 3, .mask = ARIZONA_V2_MIXER_DROPPED_SAMPLE_EINT1
+	},
+	[ARIZONA_IRQ_ASYNC_CLK_ENA_LOW] = {
+		.reg_offset = 3, .mask = ARIZONA_V2_ASYNC_CLK_ENA_LOW_EINT1
+	},
+	[ARIZONA_IRQ_SYSCLK_ENA_LOW] = {
+		.reg_offset = 3, .mask = ARIZONA_V2_SYSCLK_ENA_LOW_EINT1
+	},
+	[ARIZONA_IRQ_ISRC1_CFG_ERR] = {
+		.reg_offset = 3, .mask = ARIZONA_V2_ISRC1_CFG_ERR_EINT1
+	},
+	[ARIZONA_IRQ_ISRC2_CFG_ERR] = {
+		.reg_offset = 3, .mask = ARIZONA_V2_ISRC2_CFG_ERR_EINT1
+	},
+	[ARIZONA_IRQ_ISRC3_CFG_ERR] = {
+		.reg_offset = 3, .mask = ARIZONA_V2_ISRC3_CFG_ERR_EINT1
+	},
+	[ARIZONA_IRQ_HP3R_DONE] = {
+		.reg_offset = 3, .mask = ARIZONA_HP3R_DONE_EINT1
+	},
+	[ARIZONA_IRQ_HP3L_DONE] = {
+		.reg_offset = 3, .mask = ARIZONA_HP3L_DONE_EINT1
+	},
+	[ARIZONA_IRQ_HP2R_DONE] = {
+		.reg_offset = 3, .mask = ARIZONA_HP2R_DONE_EINT1
+	},
+	[ARIZONA_IRQ_HP2L_DONE] = {
+		.reg_offset = 3, .mask = ARIZONA_HP2L_DONE_EINT1
+	},
+	[ARIZONA_IRQ_HP1R_DONE] = {
+		.reg_offset = 3, .mask = ARIZONA_HP1R_DONE_EINT1
+	},
+	[ARIZONA_IRQ_HP1L_DONE] = {
+		.reg_offset = 3, .mask = ARIZONA_HP1L_DONE_EINT1
+	},
+
+	[ARIZONA_IRQ_BOOT_DONE] = {
+		.reg_offset = 4, .mask = ARIZONA_BOOT_DONE_EINT1
+	},
+	[ARIZONA_IRQ_ASRC_CFG_ERR] = {
+		.reg_offset = 4, .mask = ARIZONA_V2_ASRC_CFG_ERR_EINT1
+	},
+	[ARIZONA_IRQ_FLL2_CLOCK_OK] = {
+		.reg_offset = 4, .mask = ARIZONA_FLL2_CLOCK_OK_EINT1
+	},
+	[ARIZONA_IRQ_FLL1_CLOCK_OK] = {
+		.reg_offset = 4, .mask = ARIZONA_FLL1_CLOCK_OK_EINT1
+	},
+
+	[ARIZONA_IRQ_DSP_SHARED_WR_COLL] = {
+		.reg_offset = 5, .mask = ARIZONA_DSP_SHARED_WR_COLL_EINT1
+	},
+	[ARIZONA_IRQ_SPK_SHUTDOWN] = {
+		.reg_offset = 5, .mask = ARIZONA_SPK_SHUTDOWN_EINT1
+	},
+	[ARIZONA_IRQ_SPK1R_SHORT] = {
+		.reg_offset = 5, .mask = ARIZONA_SPK1R_SHORT_EINT1
+	},
+	[ARIZONA_IRQ_SPK1L_SHORT] = {
+		.reg_offset = 5, .mask = ARIZONA_SPK1L_SHORT_EINT1
+	},
+	[ARIZONA_IRQ_HP3R_SC_NEG] = {
+		.reg_offset = 5, .mask = ARIZONA_HP3R_SC_NEG_EINT1
+	},
+	[ARIZONA_IRQ_HP3R_SC_POS] = {
+		.reg_offset = 5, .mask = ARIZONA_HP3R_SC_POS_EINT1
+	},
+	[ARIZONA_IRQ_HP3L_SC_NEG] = {
+		.reg_offset = 5, .mask = ARIZONA_HP3L_SC_NEG_EINT1
+	},
+	[ARIZONA_IRQ_HP3L_SC_POS] = {
+		.reg_offset = 5, .mask = ARIZONA_HP3L_SC_POS_EINT1
+	},
+	[ARIZONA_IRQ_HP2R_SC_NEG] = {
+		.reg_offset = 5, .mask = ARIZONA_HP2R_SC_NEG_EINT1
+	},
+	[ARIZONA_IRQ_HP2R_SC_POS] = {
+		.reg_offset = 5, .mask = ARIZONA_HP2R_SC_POS_EINT1
+	},
+	[ARIZONA_IRQ_HP2L_SC_NEG] = {
+		.reg_offset = 5, .mask = ARIZONA_HP2L_SC_NEG_EINT1
+	},
+	[ARIZONA_IRQ_HP2L_SC_POS] = {
+		.reg_offset = 5, .mask = ARIZONA_HP2L_SC_POS_EINT1
+	},
+	[ARIZONA_IRQ_HP1R_SC_NEG] = {
+		.reg_offset = 5, .mask = ARIZONA_HP1R_SC_NEG_EINT1
+	},
+	[ARIZONA_IRQ_HP1R_SC_POS] = {
+		.reg_offset = 5, .mask = ARIZONA_HP1R_SC_POS_EINT1
+	},
+	[ARIZONA_IRQ_HP1L_SC_NEG] = {
+		.reg_offset = 5, .mask = ARIZONA_HP1L_SC_NEG_EINT1
+	},
+	[ARIZONA_IRQ_HP1L_SC_POS] = {
+		.reg_offset = 5, .mask = ARIZONA_HP1L_SC_POS_EINT1
+	},
+};
+
+const struct regmap_irq_chip wm5110_revd_irq = {
+	.name = "wm5110 IRQ",
+	.status_base = ARIZONA_INTERRUPT_STATUS_1,
+	.mask_base = ARIZONA_INTERRUPT_STATUS_1_MASK,
+	.ack_base = ARIZONA_INTERRUPT_STATUS_1,
+	.num_regs = 6,
+	.irqs = wm5110_revd_irqs,
+	.num_irqs = ARRAY_SIZE(wm5110_revd_irqs),
+};
+EXPORT_SYMBOL_GPL(wm5110_revd_irq);
+
 static const struct reg_default wm5110_reg_default[] = {
 	{ 0x00000008, 0x0019 },    /* R8     - Ctrl IF SPI CFG 1 */
 	{ 0x00000009, 0x0001 },    /* R9     - Ctrl IF I2C1 CFG 1 */
@@ -1286,12 +1489,14 @@ static const struct reg_default wm5110_reg_default[] = {
 	{ 0x00000D0A, 0xFFFF },    /* R3338  - Interrupt Status 3 Mask */
 	{ 0x00000D0B, 0xFFFF },    /* R3339  - Interrupt Status 4 Mask */
 	{ 0x00000D0C, 0xFEFF },    /* R3340  - Interrupt Status 5 Mask */
+	{ 0x00000D0D, 0xFFFF },    /* R3341  - Interrupt Status 6 Mask */
 	{ 0x00000D0F, 0x0000 },    /* R3343  - Interrupt Control */
 	{ 0x00000D18, 0xFFFF },    /* R3352  - IRQ2 Status 1 Mask */
 	{ 0x00000D19, 0xFFFF },    /* R3353  - IRQ2 Status 2 Mask */
 	{ 0x00000D1A, 0xFFFF },    /* R3354  - IRQ2 Status 3 Mask */
 	{ 0x00000D1B, 0xFFFF },    /* R3355  - IRQ2 Status 4 Mask */
 	{ 0x00000D1C, 0xFFFF },    /* R3356  - IRQ2 Status 5 Mask */
+	{ 0x00000D1D, 0xFFFF },    /* R3357  - IRQ2 Status 6 Mask */
 	{ 0x00000D1F, 0x0000 },    /* R3359  - IRQ2 Control */
 	{ 0x00000D53, 0xFFFF },    /* R3411  - AOD IRQ Mask IRQ1 */
 	{ 0x00000D54, 0xFFFF },    /* R3412  - AOD IRQ Mask IRQ2 */
@@ -2323,22 +2528,26 @@ static bool wm5110_readable_register(struct device *dev, unsigned int reg)
 	case ARIZONA_INTERRUPT_STATUS_3:
 	case ARIZONA_INTERRUPT_STATUS_4:
 	case ARIZONA_INTERRUPT_STATUS_5:
+	case ARIZONA_INTERRUPT_STATUS_6:
 	case ARIZONA_INTERRUPT_STATUS_1_MASK:
 	case ARIZONA_INTERRUPT_STATUS_2_MASK:
 	case ARIZONA_INTERRUPT_STATUS_3_MASK:
 	case ARIZONA_INTERRUPT_STATUS_4_MASK:
 	case ARIZONA_INTERRUPT_STATUS_5_MASK:
+	case ARIZONA_INTERRUPT_STATUS_6_MASK:
 	case ARIZONA_INTERRUPT_CONTROL:
 	case ARIZONA_IRQ2_STATUS_1:
 	case ARIZONA_IRQ2_STATUS_2:
 	case ARIZONA_IRQ2_STATUS_3:
 	case ARIZONA_IRQ2_STATUS_4:
 	case ARIZONA_IRQ2_STATUS_5:
+	case ARIZONA_IRQ2_STATUS_6:
 	case ARIZONA_IRQ2_STATUS_1_MASK:
 	case ARIZONA_IRQ2_STATUS_2_MASK:
 	case ARIZONA_IRQ2_STATUS_3_MASK:
 	case ARIZONA_IRQ2_STATUS_4_MASK:
 	case ARIZONA_IRQ2_STATUS_5_MASK:
+	case ARIZONA_IRQ2_STATUS_6_MASK:
 	case ARIZONA_IRQ2_CONTROL:
 	case ARIZONA_INTERRUPT_RAW_STATUS_2:
 	case ARIZONA_INTERRUPT_RAW_STATUS_3:
@@ -2347,6 +2556,7 @@ static bool wm5110_readable_register(struct device *dev, unsigned int reg)
 	case ARIZONA_INTERRUPT_RAW_STATUS_6:
 	case ARIZONA_INTERRUPT_RAW_STATUS_7:
 	case ARIZONA_INTERRUPT_RAW_STATUS_8:
+	case ARIZONA_INTERRUPT_RAW_STATUS_9:
 	case ARIZONA_IRQ_PIN_STATUS:
 	case ARIZONA_AOD_WKUP_AND_TRIG:
 	case ARIZONA_AOD_IRQ1:
@@ -2622,11 +2832,13 @@ static bool wm5110_volatile_register(struct device *dev, unsigned int reg)
 	case ARIZONA_INTERRUPT_STATUS_3:
 	case ARIZONA_INTERRUPT_STATUS_4:
 	case ARIZONA_INTERRUPT_STATUS_5:
+	case ARIZONA_INTERRUPT_STATUS_6:
 	case ARIZONA_IRQ2_STATUS_1:
 	case ARIZONA_IRQ2_STATUS_2:
 	case ARIZONA_IRQ2_STATUS_3:
 	case ARIZONA_IRQ2_STATUS_4:
 	case ARIZONA_IRQ2_STATUS_5:
+	case ARIZONA_IRQ2_STATUS_6:
 	case ARIZONA_INTERRUPT_RAW_STATUS_2:
 	case ARIZONA_INTERRUPT_RAW_STATUS_3:
 	case ARIZONA_INTERRUPT_RAW_STATUS_4:
@@ -2634,6 +2846,7 @@ static bool wm5110_volatile_register(struct device *dev, unsigned int reg)
 	case ARIZONA_INTERRUPT_RAW_STATUS_6:
 	case ARIZONA_INTERRUPT_RAW_STATUS_7:
 	case ARIZONA_INTERRUPT_RAW_STATUS_8:
+	case ARIZONA_INTERRUPT_RAW_STATUS_9:
 	case ARIZONA_IRQ_PIN_STATUS:
 	case ARIZONA_AOD_WKUP_AND_TRIG:
 	case ARIZONA_AOD_IRQ1:
diff --git a/include/linux/mfd/arizona/core.h b/include/linux/mfd/arizona/core.h
index 819edf5d1edf..8bc7601cca68 100644
--- a/include/linux/mfd/arizona/core.h
+++ b/include/linux/mfd/arizona/core.h
@@ -84,8 +84,25 @@ enum arizona_type {
 #define ARIZONA_IRQ_HP2L_DONE             55
 #define ARIZONA_IRQ_HP1R_DONE             56
 #define ARIZONA_IRQ_HP1L_DONE             57
-
-#define ARIZONA_NUM_IRQ                   58
+#define ARIZONA_IRQ_ISRC3_CFG_ERR         58
+#define ARIZONA_IRQ_DSP_SHARED_WR_COLL    59
+#define ARIZONA_IRQ_SPK_SHUTDOWN          60
+#define ARIZONA_IRQ_SPK1R_SHORT           61
+#define ARIZONA_IRQ_SPK1L_SHORT           62
+#define ARIZONA_IRQ_HP3R_SC_NEG           63
+#define ARIZONA_IRQ_HP3R_SC_POS           64
+#define ARIZONA_IRQ_HP3L_SC_NEG           65
+#define ARIZONA_IRQ_HP3L_SC_POS           66
+#define ARIZONA_IRQ_HP2R_SC_NEG           67
+#define ARIZONA_IRQ_HP2R_SC_POS           68
+#define ARIZONA_IRQ_HP2L_SC_NEG           69
+#define ARIZONA_IRQ_HP2L_SC_POS           70
+#define ARIZONA_IRQ_HP1R_SC_NEG           71
+#define ARIZONA_IRQ_HP1R_SC_POS           72
+#define ARIZONA_IRQ_HP1L_SC_NEG           73
+#define ARIZONA_IRQ_HP1L_SC_POS           74
+
+#define ARIZONA_NUM_IRQ                   75
 
 struct snd_soc_dapm_context;
 
diff --git a/include/linux/mfd/arizona/registers.h b/include/linux/mfd/arizona/registers.h
index f7d6f9e91da1..dbd23c36de21 100644
--- a/include/linux/mfd/arizona/registers.h
+++ b/include/linux/mfd/arizona/registers.h
@@ -878,22 +878,26 @@
 #define ARIZONA_INTERRUPT_STATUS_3               0xD02
 #define ARIZONA_INTERRUPT_STATUS_4               0xD03
 #define ARIZONA_INTERRUPT_STATUS_5               0xD04
+#define ARIZONA_INTERRUPT_STATUS_6               0xD05
 #define ARIZONA_INTERRUPT_STATUS_1_MASK          0xD08
 #define ARIZONA_INTERRUPT_STATUS_2_MASK          0xD09
 #define ARIZONA_INTERRUPT_STATUS_3_MASK          0xD0A
 #define ARIZONA_INTERRUPT_STATUS_4_MASK          0xD0B
 #define ARIZONA_INTERRUPT_STATUS_5_MASK          0xD0C
+#define ARIZONA_INTERRUPT_STATUS_6_MASK          0xD0D
 #define ARIZONA_INTERRUPT_CONTROL                0xD0F
 #define ARIZONA_IRQ2_STATUS_1                    0xD10
 #define ARIZONA_IRQ2_STATUS_2                    0xD11
 #define ARIZONA_IRQ2_STATUS_3                    0xD12
 #define ARIZONA_IRQ2_STATUS_4                    0xD13
 #define ARIZONA_IRQ2_STATUS_5                    0xD14
+#define ARIZONA_IRQ2_STATUS_6                    0xD15
 #define ARIZONA_IRQ2_STATUS_1_MASK               0xD18
 #define ARIZONA_IRQ2_STATUS_2_MASK               0xD19
 #define ARIZONA_IRQ2_STATUS_3_MASK               0xD1A
 #define ARIZONA_IRQ2_STATUS_4_MASK               0xD1B
 #define ARIZONA_IRQ2_STATUS_5_MASK               0xD1C
+#define ARIZONA_IRQ2_STATUS_6_MASK               0xD1D
 #define ARIZONA_IRQ2_CONTROL                     0xD1F
 #define ARIZONA_INTERRUPT_RAW_STATUS_2           0xD20
 #define ARIZONA_INTERRUPT_RAW_STATUS_3           0xD21
@@ -902,6 +906,7 @@
 #define ARIZONA_INTERRUPT_RAW_STATUS_6           0xD24
 #define ARIZONA_INTERRUPT_RAW_STATUS_7           0xD25
 #define ARIZONA_INTERRUPT_RAW_STATUS_8           0xD26
+#define ARIZONA_INTERRUPT_RAW_STATUS_9           0xD28
 #define ARIZONA_IRQ_PIN_STATUS                   0xD40
 #define ARIZONA_ADSP2_IRQ0                       0xD41
 #define ARIZONA_AOD_WKUP_AND_TRIG                0xD50
@@ -4820,6 +4825,53 @@
 #define ARIZONA_HP1L_DONE_EINT1_SHIFT                 0  /* HP1L_DONE_EINT1 */
 #define ARIZONA_HP1L_DONE_EINT1_WIDTH                 1  /* HP1L_DONE_EINT1 */
 
+/*
+ * R3331 (0xD03) - Interrupt Status 4 (Alternate layout)
+ *
+ * Alternate layout used on later devices, note only fields that have moved
+ * are specified
+ */
+#define ARIZONA_V2_AIF3_ERR_EINT1                  0x8000  /* AIF3_ERR_EINT1 */
+#define ARIZONA_V2_AIF3_ERR_EINT1_MASK             0x8000  /* AIF3_ERR_EINT1 */
+#define ARIZONA_V2_AIF3_ERR_EINT1_SHIFT                15  /* AIF3_ERR_EINT1 */
+#define ARIZONA_V2_AIF3_ERR_EINT1_WIDTH                 1  /* AIF3_ERR_EINT1 */
+#define ARIZONA_V2_AIF2_ERR_EINT1                  0x4000  /* AIF2_ERR_EINT1 */
+#define ARIZONA_V2_AIF2_ERR_EINT1_MASK             0x4000  /* AIF2_ERR_EINT1 */
+#define ARIZONA_V2_AIF2_ERR_EINT1_SHIFT                14  /* AIF2_ERR_EINT1 */
+#define ARIZONA_V2_AIF2_ERR_EINT1_WIDTH                 1  /* AIF2_ERR_EINT1 */
+#define ARIZONA_V2_AIF1_ERR_EINT1                  0x2000  /* AIF1_ERR_EINT1 */
+#define ARIZONA_V2_AIF1_ERR_EINT1_MASK             0x2000  /* AIF1_ERR_EINT1 */
+#define ARIZONA_V2_AIF1_ERR_EINT1_SHIFT                13  /* AIF1_ERR_EINT1 */
+#define ARIZONA_V2_AIF1_ERR_EINT1_WIDTH                 1  /* AIF1_ERR_EINT1 */
+#define ARIZONA_V2_CTRLIF_ERR_EINT1                0x1000  /* CTRLIF_ERR_EINT1 */
+#define ARIZONA_V2_CTRLIF_ERR_EINT1_MASK           0x1000  /* CTRLIF_ERR_EINT1 */
+#define ARIZONA_V2_CTRLIF_ERR_EINT1_SHIFT              12  /* CTRLIF_ERR_EINT1 */
+#define ARIZONA_V2_CTRLIF_ERR_EINT1_WIDTH               1  /* CTRLIF_ERR_EINT1 */
+#define ARIZONA_V2_MIXER_DROPPED_SAMPLE_EINT1      0x0800  /* MIXER_DROPPED_SAMPLE_EINT1 */
+#define ARIZONA_V2_MIXER_DROPPED_SAMPLE_EINT1_MASK 0x0800  /* MIXER_DROPPED_SAMPLE_EINT1 */
+#define ARIZONA_V2_MIXER_DROPPED_SAMPLE_EINT1_SHIFT    11  /* MIXER_DROPPED_SAMPLE_EINT1 */
+#define ARIZONA_V2_MIXER_DROPPED_SAMPLE_EINT1_WIDTH     1  /* MIXER_DROPPED_SAMPLE_EINT1 */
+#define ARIZONA_V2_ASYNC_CLK_ENA_LOW_EINT1         0x0400  /* ASYNC_CLK_ENA_LOW_EINT1 */
+#define ARIZONA_V2_ASYNC_CLK_ENA_LOW_EINT1_MASK    0x0400  /* ASYNC_CLK_ENA_LOW_EINT1 */
+#define ARIZONA_V2_ASYNC_CLK_ENA_LOW_EINT1_SHIFT       10  /* ASYNC_CLK_ENA_LOW_EINT1 */
+#define ARIZONA_V2_ASYNC_CLK_ENA_LOW_EINT1_WIDTH        1  /* ASYNC_CLK_ENA_LOW_EINT1 */
+#define ARIZONA_V2_SYSCLK_ENA_LOW_EINT1            0x0200  /* SYSCLK_ENA_LOW_EINT1 */
+#define ARIZONA_V2_SYSCLK_ENA_LOW_EINT1_MASK       0x0200  /* SYSCLK_ENA_LOW_EINT1 */
+#define ARIZONA_V2_SYSCLK_ENA_LOW_EINT1_SHIFT           9  /* SYSCLK_ENA_LOW_EINT1 */
+#define ARIZONA_V2_SYSCLK_ENA_LOW_EINT1_WIDTH           1  /* SYSCLK_ENA_LOW_EINT1 */
+#define ARIZONA_V2_ISRC1_CFG_ERR_EINT1             0x0100  /* ISRC1_CFG_ERR_EINT1 */
+#define ARIZONA_V2_ISRC1_CFG_ERR_EINT1_MASK        0x0100  /* ISRC1_CFG_ERR_EINT1 */
+#define ARIZONA_V2_ISRC1_CFG_ERR_EINT1_SHIFT            8  /* ISRC1_CFG_ERR_EINT1 */
+#define ARIZONA_V2_ISRC1_CFG_ERR_EINT1_WIDTH            1  /* ISRC1_CFG_ERR_EINT1 */
+#define ARIZONA_V2_ISRC2_CFG_ERR_EINT1             0x0080  /* ISRC2_CFG_ERR_EINT1 */
+#define ARIZONA_V2_ISRC2_CFG_ERR_EINT1_MASK        0x0080  /* ISRC2_CFG_ERR_EINT1 */
+#define ARIZONA_V2_ISRC2_CFG_ERR_EINT1_SHIFT            7  /* ISRC2_CFG_ERR_EINT1 */
+#define ARIZONA_V2_ISRC2_CFG_ERR_EINT1_WIDTH            1  /* ISRC2_CFG_ERR_EINT1 */
+#define ARIZONA_V2_ISRC3_CFG_ERR_EINT1             0x0040  /* ISRC3_CFG_ERR_EINT1 */
+#define ARIZONA_V2_ISRC3_CFG_ERR_EINT1_MASK        0x0040  /* ISRC3_CFG_ERR_EINT1 */
+#define ARIZONA_V2_ISRC3_CFG_ERR_EINT1_SHIFT            6  /* ISRC3_CFG_ERR_EINT1 */
+#define ARIZONA_V2_ISRC3_CFG_ERR_EINT1_WIDTH            1  /* ISRC3_CFG_ERR_EINT1 */
+
 /*
  * R3332 (0xD04) - Interrupt Status 5
  */
@@ -4844,6 +4896,85 @@
 #define ARIZONA_FLL1_CLOCK_OK_EINT1_SHIFT             0  /* FLL1_CLOCK_OK_EINT1 */
 #define ARIZONA_FLL1_CLOCK_OK_EINT1_WIDTH             1  /* FLL1_CLOCK_OK_EINT1 */
 
+/*
+ * R3332 (0xD05) - Interrupt Status 5 (Alternate layout)
+ *
+ * Alternate layout used on later devices, note only fields that have moved
+ * are specified
+ */
+#define ARIZONA_V2_ASRC_CFG_ERR_EINT1            0x0008  /* ASRC_CFG_ERR_EINT1 */
+#define ARIZONA_V2_ASRC_CFG_ERR_EINT1_MASK       0x0008  /* ASRC_CFG_ERR_EINT1 */
+#define ARIZONA_V2_ASRC_CFG_ERR_EINT1_SHIFT           3  /* ASRC_CFG_ERR_EINT1 */
+#define ARIZONA_V2_ASRC_CFG_ERR_EINT1_WIDTH           1  /* ASRC_CFG_ERR_EINT1 */
+
+/*
+ * R3333 (0xD05) - Interrupt Status 6
+ */
+#define ARIZONA_DSP_SHARED_WR_COLL_EINT1         0x8000  /* DSP_SHARED_WR_COLL_EINT1 */
+#define ARIZONA_DSP_SHARED_WR_COLL_EINT1_MASK    0x8000  /* DSP_SHARED_WR_COLL_EINT1 */
+#define ARIZONA_DSP_SHARED_WR_COLL_EINT1_SHIFT       15  /* DSP_SHARED_WR_COLL_EINT1 */
+#define ARIZONA_DSP_SHARED_WR_COLL_EINT1_WIDTH        1  /* DSP_SHARED_WR_COLL_EINT1 */
+#define ARIZONA_SPK_SHUTDOWN_EINT1               0x4000  /* SPK_SHUTDOWN_EINT1 */
+#define ARIZONA_SPK_SHUTDOWN_EINT1_MASK          0x4000  /* SPK_SHUTDOWN_EINT1 */
+#define ARIZONA_SPK_SHUTDOWN_EINT1_SHIFT             14  /* SPK_SHUTDOWN_EINT1 */
+#define ARIZONA_SPK_SHUTDOWN_EINT1_WIDTH              1  /* SPK_SHUTDOWN_EINT1 */
+#define ARIZONA_SPK1R_SHORT_EINT1                0x2000  /* SPK1R_SHORT_EINT1 */
+#define ARIZONA_SPK1R_SHORT_EINT1_MASK           0x2000  /* SPK1R_SHORT_EINT1 */
+#define ARIZONA_SPK1R_SHORT_EINT1_SHIFT              13  /* SPK1R_SHORT_EINT1 */
+#define ARIZONA_SPK1R_SHORT_EINT1_WIDTH               1  /* SPK1R_SHORT_EINT1 */
+#define ARIZONA_SPK1L_SHORT_EINT1                0x1000  /* SPK1L_SHORT_EINT1 */
+#define ARIZONA_SPK1L_SHORT_EINT1_MASK           0x1000  /* SPK1L_SHORT_EINT1 */
+#define ARIZONA_SPK1L_SHORT_EINT1_SHIFT              12  /* SPK1L_SHORT_EINT1 */
+#define ARIZONA_SPK1L_SHORT_EINT1_WIDTH               1  /* SPK1L_SHORT_EINT1 */
+#define ARIZONA_HP3R_SC_NEG_EINT1                0x0800  /* HP3R_SC_NEG_EINT1 */
+#define ARIZONA_HP3R_SC_NEG_EINT1_MASK           0x0800  /* HP3R_SC_NEG_EINT1 */
+#define ARIZONA_HP3R_SC_NEG_EINT1_SHIFT              11  /* HP3R_SC_NEG_EINT1 */
+#define ARIZONA_HP3R_SC_NEG_EINT1_WIDTH               1  /* HP3R_SC_NEG_EINT1 */
+#define ARIZONA_HP3R_SC_POS_EINT1                0x0400  /* HP3R_SC_POS_EINT1 */
+#define ARIZONA_HP3R_SC_POS_EINT1_MASK           0x0400  /* HP3R_SC_POS_EINT1 */
+#define ARIZONA_HP3R_SC_POS_EINT1_SHIFT              10  /* HP3R_SC_POS_EINT1 */
+#define ARIZONA_HP3R_SC_POS_EINT1_WIDTH               1  /* HP3R_SC_POS_EINT1 */
+#define ARIZONA_HP3L_SC_NEG_EINT1                0x0200  /* HP3L_SC_NEG_EINT1 */
+#define ARIZONA_HP3L_SC_NEG_EINT1_MASK           0x0200  /* HP3L_SC_NEG_EINT1 */
+#define ARIZONA_HP3L_SC_NEG_EINT1_SHIFT               9  /* HP3L_SC_NEG_EINT1 */
+#define ARIZONA_HP3L_SC_NEG_EINT1_WIDTH               1  /* HP3L_SC_NEG_EINT1 */
+#define ARIZONA_HP3L_SC_POS_EINT1                0x0100  /* HP3L_SC_POS_EINT1 */
+#define ARIZONA_HP3L_SC_POS_EINT1_MASK           0x0100  /* HP3L_SC_POS_EINT1 */
+#define ARIZONA_HP3L_SC_POS_EINT1_SHIFT               8  /* HP3L_SC_POS_EINT1 */
+#define ARIZONA_HP3L_SC_POS_EINT1_WIDTH               1  /* HP3L_SC_POS_EINT1 */
+#define ARIZONA_HP2R_SC_NEG_EINT1                0x0080  /* HP2R_SC_NEG_EINT1 */
+#define ARIZONA_HP2R_SC_NEG_EINT1_MASK           0x0080  /* HP2R_SC_NEG_EINT1 */
+#define ARIZONA_HP2R_SC_NEG_EINT1_SHIFT               7  /* HP2R_SC_NEG_EINT1 */
+#define ARIZONA_HP2R_SC_NEG_EINT1_WIDTH               1  /* HP2R_SC_NEG_EINT1 */
+#define ARIZONA_HP2R_SC_POS_EINT1                0x0040  /* HP2R_SC_POS_EINT1 */
+#define ARIZONA_HP2R_SC_POS_EINT1_MASK           0x0040  /* HP2R_SC_POS_EINT1 */
+#define ARIZONA_HP2R_SC_POS_EINT1_SHIFT               6  /* HP2R_SC_POS_EINT1 */
+#define ARIZONA_HP2R_SC_POS_EINT1_WIDTH               1  /* HP2R_SC_POS_EINT1 */
+#define ARIZONA_HP2L_SC_NEG_EINT1                0x0020  /* HP2L_SC_NEG_EINT1 */
+#define ARIZONA_HP2L_SC_NEG_EINT1_MASK           0x0020  /* HP2L_SC_NEG_EINT1 */
+#define ARIZONA_HP2L_SC_NEG_EINT1_SHIFT               5  /* HP2L_SC_NEG_EINT1 */
+#define ARIZONA_HP2L_SC_NEG_EINT1_WIDTH               1  /* HP2L_SC_NEG_EINT1 */
+#define ARIZONA_HP2L_SC_POS_EINT1                0x0010  /* HP2L_SC_POS_EINT1 */
+#define ARIZONA_HP2L_SC_POS_EINT1_MASK           0x0010  /* HP2L_SC_POS_EINT1 */
+#define ARIZONA_HP2L_SC_POS_EINT1_SHIFT               4  /* HP2L_SC_POS_EINT1 */
+#define ARIZONA_HP2L_SC_POS_EINT1_WIDTH               1  /* HP2L_SC_POS_EINT1 */
+#define ARIZONA_HP1R_SC_NEG_EINT1                0x0008  /* HP1R_SC_NEG_EINT1 */
+#define ARIZONA_HP1R_SC_NEG_EINT1_MASK           0x0008  /* HP1R_SC_NEG_EINT1 */
+#define ARIZONA_HP1R_SC_NEG_EINT1_SHIFT               3  /* HP1R_SC_NEG_EINT1 */
+#define ARIZONA_HP1R_SC_NEG_EINT1_WIDTH               1  /* HP1R_SC_NEG_EINT1 */
+#define ARIZONA_HP1R_SC_POS_EINT1                0x0004  /* HP1R_SC_POS_EINT1 */
+#define ARIZONA_HP1R_SC_POS_EINT1_MASK           0x0004  /* HP1R_SC_POS_EINT1 */
+#define ARIZONA_HP1R_SC_POS_EINT1_SHIFT               2  /* HP1R_SC_POS_EINT1 */
+#define ARIZONA_HP1R_SC_POS_EINT1_WIDTH               1  /* HP1R_SC_POS_EINT1 */
+#define ARIZONA_HP1L_SC_NEG_EINT1                0x0002  /* HP1L_SC_NEG_EINT1 */
+#define ARIZONA_HP1L_SC_NEG_EINT1_MASK           0x0002  /* HP1L_SC_NEG_EINT1 */
+#define ARIZONA_HP1L_SC_NEG_EINT1_SHIFT               1  /* HP1L_SC_NEG_EINT1 */
+#define ARIZONA_HP1L_SC_NEG_EINT1_WIDTH               1  /* HP1L_SC_NEG_EINT1 */
+#define ARIZONA_HP1L_SC_POS_EINT1                0x0001  /* HP1L_SC_POS_EINT1 */
+#define ARIZONA_HP1L_SC_POS_EINT1_MASK           0x0001  /* HP1L_SC_POS_EINT1 */
+#define ARIZONA_HP1L_SC_POS_EINT1_SHIFT               0  /* HP1L_SC_POS_EINT1 */
+#define ARIZONA_HP1L_SC_POS_EINT1_WIDTH               1  /* HP1L_SC_POS_EINT1 */
+
 /*
  * R3336 (0xD08) - Interrupt Status 1 Mask
  */
@@ -5012,6 +5143,53 @@
 #define ARIZONA_IM_HP1L_DONE_EINT1_SHIFT              0  /* IM_HP1L_DONE_EINT1 */
 #define ARIZONA_IM_HP1L_DONE_EINT1_WIDTH              1  /* IM_HP1L_DONE_EINT1 */
 
+/*
+ * R3339 (0xD0B) - Interrupt Status 4 Mask (Alternate layout)
+ *
+ * Alternate layout used on later devices, note only fields that have moved
+ * are specified
+ */
+#define ARIZONA_V2_IM_AIF3_ERR_EINT1                  0x8000  /* IM_AIF3_ERR_EINT1 */
+#define ARIZONA_V2_IM_AIF3_ERR_EINT1_MASK             0x8000  /* IM_AIF3_ERR_EINT1 */
+#define ARIZONA_V2_IM_AIF3_ERR_EINT1_SHIFT                15  /* IM_AIF3_ERR_EINT1 */
+#define ARIZONA_V2_IM_AIF3_ERR_EINT1_WIDTH                 1  /* IM_AIF3_ERR_EINT1 */
+#define ARIZONA_V2_IM_AIF2_ERR_EINT1                  0x4000  /* IM_AIF2_ERR_EINT1 */
+#define ARIZONA_V2_IM_AIF2_ERR_EINT1_MASK             0x4000  /* IM_AIF2_ERR_EINT1 */
+#define ARIZONA_V2_IM_AIF2_ERR_EINT1_SHIFT                14  /* IM_AIF2_ERR_EINT1 */
+#define ARIZONA_V2_IM_AIF2_ERR_EINT1_WIDTH                 1  /* IM_AIF2_ERR_EINT1 */
+#define ARIZONA_V2_IM_AIF1_ERR_EINT1                  0x2000  /* IM_AIF1_ERR_EINT1 */
+#define ARIZONA_V2_IM_AIF1_ERR_EINT1_MASK             0x2000  /* IM_AIF1_ERR_EINT1 */
+#define ARIZONA_V2_IM_AIF1_ERR_EINT1_SHIFT                13  /* IM_AIF1_ERR_EINT1 */
+#define ARIZONA_V2_IM_AIF1_ERR_EINT1_WIDTH                 1  /* IM_AIF1_ERR_EINT1 */
+#define ARIZONA_V2_IM_CTRLIF_ERR_EINT1                0x1000  /* IM_CTRLIF_ERR_EINT1 */
+#define ARIZONA_V2_IM_CTRLIF_ERR_EINT1_MASK           0x1000  /* IM_CTRLIF_ERR_EINT1 */
+#define ARIZONA_V2_IM_CTRLIF_ERR_EINT1_SHIFT              12  /* IM_CTRLIF_ERR_EINT1 */
+#define ARIZONA_V2_IM_CTRLIF_ERR_EINT1_WIDTH               1  /* IM_CTRLIF_ERR_EINT1 */
+#define ARIZONA_V2_IM_MIXER_DROPPED_SAMPLE_EINT1      0x0800  /* IM_MIXER_DROPPED_SAMPLE_EINT1 */
+#define ARIZONA_V2_IM_MIXER_DROPPED_SAMPLE_EINT1_MASK 0x0800  /* IM_MIXER_DROPPED_SAMPLE_EINT1 */
+#define ARIZONA_V2_IM_MIXER_DROPPED_SAMPLE_EINT1_SHIFT    11  /* IM_MIXER_DROPPED_SAMPLE_EINT1 */
+#define ARIZONA_V2_IM_MIXER_DROPPED_SAMPLE_EINT1_WIDTH     1  /* IM_MIXER_DROPPED_SAMPLE_EINT1 */
+#define ARIZONA_V2_IM_ASYNC_CLK_ENA_LOW_EINT1         0x0400  /* IM_ASYNC_CLK_ENA_LOW_EINT1 */
+#define ARIZONA_V2_IM_ASYNC_CLK_ENA_LOW_EINT1_MASK    0x0400  /* IM_ASYNC_CLK_ENA_LOW_EINT1 */
+#define ARIZONA_V2_IM_ASYNC_CLK_ENA_LOW_EINT1_SHIFT       10  /* IM_ASYNC_CLK_ENA_LOW_EINT1 */
+#define ARIZONA_V2_IM_ASYNC_CLK_ENA_LOW_EINT1_WIDTH        1  /* IM_ASYNC_CLK_ENA_LOW_EINT1 */
+#define ARIZONA_V2_IM_SYSCLK_ENA_LOW_EINT1            0x0200  /* IM_SYSCLK_ENA_LOW_EINT1 */
+#define ARIZONA_V2_IM_SYSCLK_ENA_LOW_EINT1_MASK       0x0200  /* IM_SYSCLK_ENA_LOW_EINT1 */
+#define ARIZONA_V2_IM_SYSCLK_ENA_LOW_EINT1_SHIFT           9  /* IM_SYSCLK_ENA_LOW_EINT1 */
+#define ARIZONA_V2_IM_SYSCLK_ENA_LOW_EINT1_WIDTH           1  /* IM_SYSCLK_ENA_LOW_EINT1 */
+#define ARIZONA_V2_IM_ISRC1_CFG_ERR_EINT1             0x0100  /* IM_ISRC1_CFG_ERR_EINT1 */
+#define ARIZONA_V2_IM_ISRC1_CFG_ERR_EINT1_MASK        0x0100  /* IM_ISRC1_CFG_ERR_EINT1 */
+#define ARIZONA_V2_IM_ISRC1_CFG_ERR_EINT1_SHIFT            8  /* IM_ISRC1_CFG_ERR_EINT1 */
+#define ARIZONA_V2_IM_ISRC1_CFG_ERR_EINT1_WIDTH            1  /* IM_ISRC1_CFG_ERR_EINT1 */
+#define ARIZONA_V2_IM_ISRC2_CFG_ERR_EINT1             0x0080  /* IM_ISRC2_CFG_ERR_EINT1 */
+#define ARIZONA_V2_IM_ISRC2_CFG_ERR_EINT1_MASK        0x0080  /* IM_ISRC2_CFG_ERR_EINT1 */
+#define ARIZONA_V2_IM_ISRC2_CFG_ERR_EINT1_SHIFT            7  /* IM_ISRC2_CFG_ERR_EINT1 */
+#define ARIZONA_V2_IM_ISRC2_CFG_ERR_EINT1_WIDTH            1  /* IM_ISRC2_CFG_ERR_EINT1 */
+#define ARIZONA_V2_IM_ISRC3_CFG_ERR_EINT1             0x0040  /* IM_ISRC3_CFG_ERR_EINT1 */
+#define ARIZONA_V2_IM_ISRC3_CFG_ERR_EINT1_MASK        0x0040  /* IM_ISRC3_CFG_ERR_EINT1 */
+#define ARIZONA_V2_IM_ISRC3_CFG_ERR_EINT1_SHIFT            6  /* IM_ISRC3_CFG_ERR_EINT1 */
+#define ARIZONA_V2_IM_ISRC3_CFG_ERR_EINT1_WIDTH            1  /* IM_ISRC3_CFG_ERR_EINT1 */
+
 /*
  * R3340 (0xD0C) - Interrupt Status 5 Mask
  */
@@ -5036,6 +5214,85 @@
 #define ARIZONA_IM_FLL1_CLOCK_OK_EINT1_SHIFT          0  /* IM_FLL1_CLOCK_OK_EINT1 */
 #define ARIZONA_IM_FLL1_CLOCK_OK_EINT1_WIDTH          1  /* IM_FLL1_CLOCK_OK_EINT1 */
 
+/*
+ * R3340 (0xD0C) - Interrupt Status 5 Mask (Alternate layout)
+ *
+ * Alternate layout used on later devices, note only fields that have moved
+ * are specified
+ */
+#define ARIZONA_V2_IM_ASRC_CFG_ERR_EINT1         0x0008  /* IM_ASRC_CFG_ERR_EINT1 */
+#define ARIZONA_V2_IM_ASRC_CFG_ERR_EINT1_MASK    0x0008  /* IM_ASRC_CFG_ERR_EINT1 */
+#define ARIZONA_V2_IM_ASRC_CFG_ERR_EINT1_SHIFT        3  /* IM_ASRC_CFG_ERR_EINT1 */
+#define ARIZONA_V2_IM_ASRC_CFG_ERR_EINT1_WIDTH        1  /* IM_ASRC_CFG_ERR_EINT1 */
+
+/*
+ * R3341 (0xD0D) - Interrupt Status 6 Mask
+ */
+#define ARIZONA_IM_DSP_SHARED_WR_COLL_EINT1      0x8000  /* IM_DSP_SHARED_WR_COLL_EINT1 */
+#define ARIZONA_IM_DSP_SHARED_WR_COLL_EINT1_MASK 0x8000  /* IM_DSP_SHARED_WR_COLL_EINT1 */
+#define ARIZONA_IM_DSP_SHARED_WR_COLL_EINT1_SHIFT    15  /* IM_DSP_SHARED_WR_COLL_EINT1 */
+#define ARIZONA_IM_DSP_SHARED_WR_COLL_EINT1_WIDTH     1  /* IM_DSP_SHARED_WR_COLL_EINT1 */
+#define ARIZONA_IM_SPK_SHUTDOWN_EINT1            0x4000  /* IM_SPK_SHUTDOWN_EINT1 */
+#define ARIZONA_IM_SPK_SHUTDOWN_EINT1_MASK       0x4000  /* IM_SPK_SHUTDOWN_EINT1 */
+#define ARIZONA_IM_SPK_SHUTDOWN_EINT1_SHIFT          14  /* IM_SPK_SHUTDOWN_EINT1 */
+#define ARIZONA_IM_SPK_SHUTDOWN_EINT1_WIDTH           1  /* IM_SPK_SHUTDOWN_EINT1 */
+#define ARIZONA_IM_SPK1R_SHORT_EINT1             0x2000  /* IM_SPK1R_SHORT_EINT1 */
+#define ARIZONA_IM_SPK1R_SHORT_EINT1_MASK        0x2000  /* IM_SPK1R_SHORT_EINT1 */
+#define ARIZONA_IM_SPK1R_SHORT_EINT1_SHIFT           13  /* IM_SPK1R_SHORT_EINT1 */
+#define ARIZONA_IM_SPK1R_SHORT_EINT1_WIDTH            1  /* IM_SPK1R_SHORT_EINT1 */
+#define ARIZONA_IM_SPK1L_SHORT_EINT1             0x1000  /* IM_SPK1L_SHORT_EINT1 */
+#define ARIZONA_IM_SPK1L_SHORT_EINT1_MASK        0x1000  /* IM_SPK1L_SHORT_EINT1 */
+#define ARIZONA_IM_SPK1L_SHORT_EINT1_SHIFT           12  /* IM_SPK1L_SHORT_EINT1 */
+#define ARIZONA_IM_SPK1L_SHORT_EINT1_WIDTH            1  /* IM_SPK1L_SHORT_EINT1 */
+#define ARIZONA_IM_HP3R_SC_NEG_EINT1             0x0800  /* IM_HP3R_SC_NEG_EINT1 */
+#define ARIZONA_IM_HP3R_SC_NEG_EINT1_MASK        0x0800  /* IM_HP3R_SC_NEG_EINT1 */
+#define ARIZONA_IM_HP3R_SC_NEG_EINT1_SHIFT           11  /* IM_HP3R_SC_NEG_EINT1 */
+#define ARIZONA_IM_HP3R_SC_NEG_EINT1_WIDTH            1  /* IM_HP3R_SC_NEG_EINT1 */
+#define ARIZONA_IM_HP3R_SC_POS_EINT1             0x0400  /* IM_HP3R_SC_POS_EINT1 */
+#define ARIZONA_IM_HP3R_SC_POS_EINT1_MASK        0x0400  /* IM_HP3R_SC_POS_EINT1 */
+#define ARIZONA_IM_HP3R_SC_POS_EINT1_SHIFT           10  /* IM_HP3R_SC_POS_EINT1 */
+#define ARIZONA_IM_HP3R_SC_POS_EINT1_WIDTH            1  /* IM_HP3R_SC_POS_EINT1 */
+#define ARIZONA_IM_HP3L_SC_NEG_EINT1             0x0200  /* IM_HP3L_SC_NEG_EINT1 */
+#define ARIZONA_IM_HP3L_SC_NEG_EINT1_MASK        0x0200  /* IM_HP3L_SC_NEG_EINT1 */
+#define ARIZONA_IM_HP3L_SC_NEG_EINT1_SHIFT            9  /* IM_HP3L_SC_NEG_EINT1 */
+#define ARIZONA_IM_HP3L_SC_NEG_EINT1_WIDTH            1  /* IM_HP3L_SC_NEG_EINT1 */
+#define ARIZONA_IM_HP3L_SC_POS_EINT1             0x0100  /* IM_HP3L_SC_POS_EINT1 */
+#define ARIZONA_IM_HP3L_SC_POS_EINT1_MASK        0x0100  /* IM_HP3L_SC_POS_EINT1 */
+#define ARIZONA_IM_HP3L_SC_POS_EINT1_SHIFT            8  /* IM_HP3L_SC_POS_EINT1 */
+#define ARIZONA_IM_HP3L_SC_POS_EINT1_WIDTH            1  /* IM_HP3L_SC_POS_EINT1 */
+#define ARIZONA_IM_HP2R_SC_NEG_EINT1             0x0080  /* IM_HP2R_SC_NEG_EINT1 */
+#define ARIZONA_IM_HP2R_SC_NEG_EINT1_MASK        0x0080  /* IM_HP2R_SC_NEG_EINT1 */
+#define ARIZONA_IM_HP2R_SC_NEG_EINT1_SHIFT            7  /* IM_HP2R_SC_NEG_EINT1 */
+#define ARIZONA_IM_HP2R_SC_NEG_EINT1_WIDTH            1  /* IM_HP2R_SC_NEG_EINT1 */
+#define ARIZONA_IM_HP2R_SC_POS_EINT1             0x0040  /* IM_HP2R_SC_POS_EINT1 */
+#define ARIZONA_IM_HP2R_SC_POS_EINT1_MASK        0x0040  /* IM_HP2R_SC_POS_EINT1 */
+#define ARIZONA_IM_HP2R_SC_POS_EINT1_SHIFT            6  /* IM_HP2R_SC_POS_EINT1 */
+#define ARIZONA_IM_HP2R_SC_POS_EINT1_WIDTH            1  /* IM_HP2R_SC_POS_EINT1 */
+#define ARIZONA_IM_HP2L_SC_NEG_EINT1             0x0020  /* IM_HP2L_SC_NEG_EINT1 */
+#define ARIZONA_IM_HP2L_SC_NEG_EINT1_MASK        0x0020  /* IM_HP2L_SC_NEG_EINT1 */
+#define ARIZONA_IM_HP2L_SC_NEG_EINT1_SHIFT            5  /* IM_HP2L_SC_NEG_EINT1 */
+#define ARIZONA_IM_HP2L_SC_NEG_EINT1_WIDTH            1  /* IM_HP2L_SC_NEG_EINT1 */
+#define ARIZONA_IM_HP2L_SC_POS_EINT1             0x0010  /* IM_HP2L_SC_POS_EINT1 */
+#define ARIZONA_IM_HP2L_SC_POS_EINT1_MASK        0x0010  /* IM_HP2L_SC_POS_EINT1 */
+#define ARIZONA_IM_HP2L_SC_POS_EINT1_SHIFT            4  /* IM_HP2L_SC_POS_EINT1 */
+#define ARIZONA_IM_HP2L_SC_POS_EINT1_WIDTH            1  /* IM_HP2L_SC_POS_EINT1 */
+#define ARIZONA_IM_HP1R_SC_NEG_EINT1             0x0008  /* IM_HP1R_SC_NEG_EINT1 */
+#define ARIZONA_IM_HP1R_SC_NEG_EINT1_MASK        0x0008  /* IM_HP1R_SC_NEG_EINT1 */
+#define ARIZONA_IM_HP1R_SC_NEG_EINT1_SHIFT            3  /* IM_HP1R_SC_NEG_EINT1 */
+#define ARIZONA_IM_HP1R_SC_NEG_EINT1_WIDTH            1  /* IM_HP1R_SC_NEG_EINT1 */
+#define ARIZONA_IM_HP1R_SC_POS_EINT1             0x0004  /* IM_HP1R_SC_POS_EINT1 */
+#define ARIZONA_IM_HP1R_SC_POS_EINT1_MASK        0x0004  /* IM_HP1R_SC_POS_EINT1 */
+#define ARIZONA_IM_HP1R_SC_POS_EINT1_SHIFT            2  /* IM_HP1R_SC_POS_EINT1 */
+#define ARIZONA_IM_HP1R_SC_POS_EINT1_WIDTH            1  /* IM_HP1R_SC_POS_EINT1 */
+#define ARIZONA_IM_HP1L_SC_NEG_EINT1             0x0002  /* IM_HP1L_SC_NEG_EINT1 */
+#define ARIZONA_IM_HP1L_SC_NEG_EINT1_MASK        0x0002  /* IM_HP1L_SC_NEG_EINT1 */
+#define ARIZONA_IM_HP1L_SC_NEG_EINT1_SHIFT            1  /* IM_HP1L_SC_NEG_EINT1 */
+#define ARIZONA_IM_HP1L_SC_NEG_EINT1_WIDTH            1  /* IM_HP1L_SC_NEG_EINT1 */
+#define ARIZONA_IM_HP1L_SC_POS_EINT1             0x0001  /* IM_HP1L_SC_POS_EINT1 */
+#define ARIZONA_IM_HP1L_SC_POS_EINT1_MASK        0x0001  /* IM_HP1L_SC_POS_EINT1 */
+#define ARIZONA_IM_HP1L_SC_POS_EINT1_SHIFT            0  /* IM_HP1L_SC_POS_EINT1 */
+#define ARIZONA_IM_HP1L_SC_POS_EINT1_WIDTH            1  /* IM_HP1L_SC_POS_EINT1 */
+
 /*
  * R3343 (0xD0F) - Interrupt Control
  */
@@ -5212,6 +5469,53 @@
 #define ARIZONA_HP1L_DONE_EINT2_SHIFT                 0  /* HP1L_DONE_EINT2 */
 #define ARIZONA_HP1L_DONE_EINT2_WIDTH                 1  /* HP1L_DONE_EINT2 */
 
+/*
+ * R3347 (0xD13) - IRQ2 Status 4 (Alternate layout)
+ *
+ * Alternate layout used on later devices, note only fields that have moved
+ * are specified
+ */
+#define ARIZONA_V2_AIF3_ERR_EINT2                  0x8000  /* AIF3_ERR_EINT2 */
+#define ARIZONA_V2_AIF3_ERR_EINT2_MASK             0x8000  /* AIF3_ERR_EINT2 */
+#define ARIZONA_V2_AIF3_ERR_EINT2_SHIFT                15  /* AIF3_ERR_EINT2 */
+#define ARIZONA_V2_AIF3_ERR_EINT2_WIDTH                 1  /* AIF3_ERR_EINT2 */
+#define ARIZONA_V2_AIF2_ERR_EINT2                  0x4000  /* AIF2_ERR_EINT2 */
+#define ARIZONA_V2_AIF2_ERR_EINT2_MASK             0x4000  /* AIF2_ERR_EINT2 */
+#define ARIZONA_V2_AIF2_ERR_EINT2_SHIFT                14  /* AIF2_ERR_EINT2 */
+#define ARIZONA_V2_AIF2_ERR_EINT2_WIDTH                 1  /* AIF2_ERR_EINT2 */
+#define ARIZONA_V2_AIF1_ERR_EINT2                  0x2000  /* AIF1_ERR_EINT2 */
+#define ARIZONA_V2_AIF1_ERR_EINT2_MASK             0x2000  /* AIF1_ERR_EINT2 */
+#define ARIZONA_V2_AIF1_ERR_EINT2_SHIFT                13  /* AIF1_ERR_EINT2 */
+#define ARIZONA_V2_AIF1_ERR_EINT2_WIDTH                 1  /* AIF1_ERR_EINT2 */
+#define ARIZONA_V2_CTRLIF_ERR_EINT2                0x1000  /* CTRLIF_ERR_EINT2 */
+#define ARIZONA_V2_CTRLIF_ERR_EINT2_MASK           0x1000  /* CTRLIF_ERR_EINT2 */
+#define ARIZONA_V2_CTRLIF_ERR_EINT2_SHIFT              12  /* CTRLIF_ERR_EINT2 */
+#define ARIZONA_V2_CTRLIF_ERR_EINT2_WIDTH               1  /* CTRLIF_ERR_EINT2 */
+#define ARIZONA_V2_MIXER_DROPPED_SAMPLE_EINT2      0x0800  /* MIXER_DROPPED_SAMPLE_EINT2 */
+#define ARIZONA_V2_MIXER_DROPPED_SAMPLE_EINT2_MASK 0x0800  /* MIXER_DROPPED_SAMPLE_EINT2 */
+#define ARIZONA_V2_MIXER_DROPPED_SAMPLE_EINT2_SHIFT    11  /* MIXER_DROPPED_SAMPLE_EINT2 */
+#define ARIZONA_V2_MIXER_DROPPED_SAMPLE_EINT2_WIDTH     1  /* MIXER_DROPPED_SAMPLE_EINT2 */
+#define ARIZONA_V2_ASYNC_CLK_ENA_LOW_EINT2         0x0400  /* ASYNC_CLK_ENA_LOW_EINT2 */
+#define ARIZONA_V2_ASYNC_CLK_ENA_LOW_EINT2_MASK    0x0400  /* ASYNC_CLK_ENA_LOW_EINT2 */
+#define ARIZONA_V2_ASYNC_CLK_ENA_LOW_EINT2_SHIFT       10  /* ASYNC_CLK_ENA_LOW_EINT2 */
+#define ARIZONA_V2_ASYNC_CLK_ENA_LOW_EINT2_WIDTH        1  /* ASYNC_CLK_ENA_LOW_EINT2 */
+#define ARIZONA_V2_SYSCLK_ENA_LOW_EINT2            0x0200  /* SYSCLK_ENA_LOW_EINT2 */
+#define ARIZONA_V2_SYSCLK_ENA_LOW_EINT2_MASK       0x0200  /* SYSCLK_ENA_LOW_EINT2 */
+#define ARIZONA_V2_SYSCLK_ENA_LOW_EINT2_SHIFT           9  /* SYSCLK_ENA_LOW_EINT2 */
+#define ARIZONA_V2_SYSCLK_ENA_LOW_EINT2_WIDTH           1  /* SYSCLK_ENA_LOW_EINT2 */
+#define ARIZONA_V2_ISRC1_CFG_ERR_EINT2             0x0100  /* ISRC1_CFG_ERR_EINT2 */
+#define ARIZONA_V2_ISRC1_CFG_ERR_EINT2_MASK        0x0100  /* ISRC1_CFG_ERR_EINT2 */
+#define ARIZONA_V2_ISRC1_CFG_ERR_EINT2_SHIFT            8  /* ISRC1_CFG_ERR_EINT2 */
+#define ARIZONA_V2_ISRC1_CFG_ERR_EINT2_WIDTH            1  /* ISRC1_CFG_ERR_EINT2 */
+#define ARIZONA_V2_ISRC2_CFG_ERR_EINT2             0x0080  /* ISRC2_CFG_ERR_EINT2 */
+#define ARIZONA_V2_ISRC2_CFG_ERR_EINT2_MASK        0x0080  /* ISRC2_CFG_ERR_EINT2 */
+#define ARIZONA_V2_ISRC2_CFG_ERR_EINT2_SHIFT            7  /* ISRC2_CFG_ERR_EINT2 */
+#define ARIZONA_V2_ISRC2_CFG_ERR_EINT2_WIDTH            1  /* ISRC2_CFG_ERR_EINT2 */
+#define ARIZONA_V2_ISRC3_CFG_ERR_EINT2             0x0040  /* ISRC3_CFG_ERR_EINT2 */
+#define ARIZONA_V2_ISRC3_CFG_ERR_EINT2_MASK        0x0040  /* ISRC3_CFG_ERR_EINT2 */
+#define ARIZONA_V2_ISRC3_CFG_ERR_EINT2_SHIFT            6  /* ISRC3_CFG_ERR_EINT2 */
+#define ARIZONA_V2_ISRC3_CFG_ERR_EINT2_WIDTH            1  /* ISRC3_CFG_ERR_EINT2 */
+
 /*
  * R3348 (0xD14) - IRQ2 Status 5
  */
@@ -5236,6 +5540,85 @@
 #define ARIZONA_FLL1_CLOCK_OK_EINT2_SHIFT             0  /* FLL1_CLOCK_OK_EINT2 */
 #define ARIZONA_FLL1_CLOCK_OK_EINT2_WIDTH             1  /* FLL1_CLOCK_OK_EINT2 */
 
+/*
+ * R3348 (0xD14) - IRQ2 Status 5 (Alternate layout)
+ *
+ * Alternate layout used on later devices, note only fields that have moved
+ * are specified
+ */
+#define ARIZONA_V2_ASRC_CFG_ERR_EINT2            0x0008  /* ASRC_CFG_ERR_EINT2 */
+#define ARIZONA_V2_ASRC_CFG_ERR_EINT2_MASK       0x0008  /* ASRC_CFG_ERR_EINT2 */
+#define ARIZONA_V2_ASRC_CFG_ERR_EINT2_SHIFT           3  /* ASRC_CFG_ERR_EINT2 */
+#define ARIZONA_V2_ASRC_CFG_ERR_EINT2_WIDTH           1  /* ASRC_CFG_ERR_EINT2 */
+
+/*
+ * R3349 (0xD15) - IRQ2 Status 6
+ */
+#define ARIZONA_DSP_SHARED_WR_COLL_EINT2         0x8000  /* DSP_SHARED_WR_COLL_EINT2 */
+#define ARIZONA_DSP_SHARED_WR_COLL_EINT2_MASK    0x8000  /* DSP_SHARED_WR_COLL_EINT2 */
+#define ARIZONA_DSP_SHARED_WR_COLL_EINT2_SHIFT       15  /* DSP_SHARED_WR_COLL_EINT2 */
+#define ARIZONA_DSP_SHARED_WR_COLL_EINT2_WIDTH        1  /* DSP_SHARED_WR_COLL_EINT2 */
+#define ARIZONA_SPK_SHUTDOWN_EINT2               0x4000  /* SPK_SHUTDOWN_EINT2 */
+#define ARIZONA_SPK_SHUTDOWN_EINT2_MASK          0x4000  /* SPK_SHUTDOWN_EINT2 */
+#define ARIZONA_SPK_SHUTDOWN_EINT2_SHIFT             14  /* SPK_SHUTDOWN_EINT2 */
+#define ARIZONA_SPK_SHUTDOWN_EINT2_WIDTH              1  /* SPK_SHUTDOWN_EINT2 */
+#define ARIZONA_SPK1R_SHORT_EINT2                0x2000  /* SPK1R_SHORT_EINT2 */
+#define ARIZONA_SPK1R_SHORT_EINT2_MASK           0x2000  /* SPK1R_SHORT_EINT2 */
+#define ARIZONA_SPK1R_SHORT_EINT2_SHIFT              13  /* SPK1R_SHORT_EINT2 */
+#define ARIZONA_SPK1R_SHORT_EINT2_WIDTH               1  /* SPK1R_SHORT_EINT2 */
+#define ARIZONA_SPK1L_SHORT_EINT2                0x1000  /* SPK1L_SHORT_EINT2 */
+#define ARIZONA_SPK1L_SHORT_EINT2_MASK           0x1000  /* SPK1L_SHORT_EINT2 */
+#define ARIZONA_SPK1L_SHORT_EINT2_SHIFT              12  /* SPK1L_SHORT_EINT2 */
+#define ARIZONA_SPK1L_SHORT_EINT2_WIDTH               1  /* SPK1L_SHORT_EINT2 */
+#define ARIZONA_HP3R_SC_NEG_EINT2                0x0800  /* HP3R_SC_NEG_EINT2 */
+#define ARIZONA_HP3R_SC_NEG_EINT2_MASK           0x0800  /* HP3R_SC_NEG_EINT2 */
+#define ARIZONA_HP3R_SC_NEG_EINT2_SHIFT              11  /* HP3R_SC_NEG_EINT2 */
+#define ARIZONA_HP3R_SC_NEG_EINT2_WIDTH               1  /* HP3R_SC_NEG_EINT2 */
+#define ARIZONA_HP3R_SC_POS_EINT2                0x0400  /* HP3R_SC_POS_EINT2 */
+#define ARIZONA_HP3R_SC_POS_EINT2_MASK           0x0400  /* HP3R_SC_POS_EINT2 */
+#define ARIZONA_HP3R_SC_POS_EINT2_SHIFT              10  /* HP3R_SC_POS_EINT2 */
+#define ARIZONA_HP3R_SC_POS_EINT2_WIDTH               1  /* HP3R_SC_POS_EINT2 */
+#define ARIZONA_HP3L_SC_NEG_EINT2                0x0200  /* HP3L_SC_NEG_EINT2 */
+#define ARIZONA_HP3L_SC_NEG_EINT2_MASK           0x0200  /* HP3L_SC_NEG_EINT2 */
+#define ARIZONA_HP3L_SC_NEG_EINT2_SHIFT               9  /* HP3L_SC_NEG_EINT2 */
+#define ARIZONA_HP3L_SC_NEG_EINT2_WIDTH               1  /* HP3L_SC_NEG_EINT2 */
+#define ARIZONA_HP3L_SC_POS_EINT2                0x0100  /* HP3L_SC_POS_EINT2 */
+#define ARIZONA_HP3L_SC_POS_EINT2_MASK           0x0100  /* HP3L_SC_POS_EINT2 */
+#define ARIZONA_HP3L_SC_POS_EINT2_SHIFT               8  /* HP3L_SC_POS_EINT2 */
+#define ARIZONA_HP3L_SC_POS_EINT2_WIDTH               1  /* HP3L_SC_POS_EINT2 */
+#define ARIZONA_HP2R_SC_NEG_EINT2                0x0080  /* HP2R_SC_NEG_EINT2 */
+#define ARIZONA_HP2R_SC_NEG_EINT2_MASK           0x0080  /* HP2R_SC_NEG_EINT2 */
+#define ARIZONA_HP2R_SC_NEG_EINT2_SHIFT               7  /* HP2R_SC_NEG_EINT2 */
+#define ARIZONA_HP2R_SC_NEG_EINT2_WIDTH               1  /* HP2R_SC_NEG_EINT2 */
+#define ARIZONA_HP2R_SC_POS_EINT2                0x0040  /* HP2R_SC_POS_EINT2 */
+#define ARIZONA_HP2R_SC_POS_EINT2_MASK           0x0040  /* HP2R_SC_POS_EINT2 */
+#define ARIZONA_HP2R_SC_POS_EINT2_SHIFT               6  /* HP2R_SC_POS_EINT2 */
+#define ARIZONA_HP2R_SC_POS_EINT2_WIDTH               1  /* HP2R_SC_POS_EINT2 */
+#define ARIZONA_HP2L_SC_NEG_EINT2                0x0020  /* HP2L_SC_NEG_EINT2 */
+#define ARIZONA_HP2L_SC_NEG_EINT2_MASK           0x0020  /* HP2L_SC_NEG_EINT2 */
+#define ARIZONA_HP2L_SC_NEG_EINT2_SHIFT               5  /* HP2L_SC_NEG_EINT2 */
+#define ARIZONA_HP2L_SC_NEG_EINT2_WIDTH               1  /* HP2L_SC_NEG_EINT2 */
+#define ARIZONA_HP2L_SC_POS_EINT2                0x0010  /* HP2L_SC_POS_EINT2 */
+#define ARIZONA_HP2L_SC_POS_EINT2_MASK           0x0010  /* HP2L_SC_POS_EINT2 */
+#define ARIZONA_HP2L_SC_POS_EINT2_SHIFT               4  /* HP2L_SC_POS_EINT2 */
+#define ARIZONA_HP2L_SC_POS_EINT2_WIDTH               1  /* HP2L_SC_POS_EINT2 */
+#define ARIZONA_HP1R_SC_NEG_EINT2                0x0008  /* HP1R_SC_NEG_EINT2 */
+#define ARIZONA_HP1R_SC_NEG_EINT2_MASK           0x0008  /* HP1R_SC_NEG_EINT2 */
+#define ARIZONA_HP1R_SC_NEG_EINT2_SHIFT               3  /* HP1R_SC_NEG_EINT2 */
+#define ARIZONA_HP1R_SC_NEG_EINT2_WIDTH               1  /* HP1R_SC_NEG_EINT2 */
+#define ARIZONA_HP1R_SC_POS_EINT2                0x0004  /* HP1R_SC_POS_EINT2 */
+#define ARIZONA_HP1R_SC_POS_EINT2_MASK           0x0004  /* HP1R_SC_POS_EINT2 */
+#define ARIZONA_HP1R_SC_POS_EINT2_SHIFT               2  /* HP1R_SC_POS_EINT2 */
+#define ARIZONA_HP1R_SC_POS_EINT2_WIDTH               1  /* HP1R_SC_POS_EINT2 */
+#define ARIZONA_HP1L_SC_NEG_EINT2                0x0002  /* HP1L_SC_NEG_EINT2 */
+#define ARIZONA_HP1L_SC_NEG_EINT2_MASK           0x0002  /* HP1L_SC_NEG_EINT2 */
+#define ARIZONA_HP1L_SC_NEG_EINT2_SHIFT               1  /* HP1L_SC_NEG_EINT2 */
+#define ARIZONA_HP1L_SC_NEG_EINT2_WIDTH               1  /* HP1L_SC_NEG_EINT2 */
+#define ARIZONA_HP1L_SC_POS_EINT2                0x0001  /* HP1L_SC_POS_EINT2 */
+#define ARIZONA_HP1L_SC_POS_EINT2_MASK           0x0001  /* HP1L_SC_POS_EINT2 */
+#define ARIZONA_HP1L_SC_POS_EINT2_SHIFT               0  /* HP1L_SC_POS_EINT2 */
+#define ARIZONA_HP1L_SC_POS_EINT2_WIDTH               1  /* HP1L_SC_POS_EINT2 */
+
 /*
  * R3352 (0xD18) - IRQ2 Status 1 Mask
  */
@@ -5404,6 +5787,53 @@
 #define ARIZONA_IM_HP1L_DONE_EINT2_SHIFT              0  /* IM_HP1L_DONE_EINT2 */
 #define ARIZONA_IM_HP1L_DONE_EINT2_WIDTH              1  /* IM_HP1L_DONE_EINT2 */
 
+/*
+ * R3355 (0xD1B) - IRQ2 Status 4 Mask (Alternate layout)
+ *
+ * Alternate layout used on later devices, note only fields that have moved
+ * are specified
+ */
+#define ARIZONA_V2_IM_AIF3_ERR_EINT2                  0x8000  /* IM_AIF3_ERR_EINT2 */
+#define ARIZONA_V2_IM_AIF3_ERR_EINT2_MASK             0x8000  /* IM_AIF3_ERR_EINT2 */
+#define ARIZONA_V2_IM_AIF3_ERR_EINT2_SHIFT                15  /* IM_AIF3_ERR_EINT2 */
+#define ARIZONA_V2_IM_AIF3_ERR_EINT2_WIDTH                 1  /* IM_AIF3_ERR_EINT2 */
+#define ARIZONA_V2_IM_AIF2_ERR_EINT2                  0x4000  /* IM_AIF2_ERR_EINT2 */
+#define ARIZONA_V2_IM_AIF2_ERR_EINT2_MASK             0x4000  /* IM_AIF2_ERR_EINT2 */
+#define ARIZONA_V2_IM_AIF2_ERR_EINT2_SHIFT                14  /* IM_AIF2_ERR_EINT2 */
+#define ARIZONA_V2_IM_AIF2_ERR_EINT2_WIDTH                 1  /* IM_AIF2_ERR_EINT2 */
+#define ARIZONA_V2_IM_AIF1_ERR_EINT2                  0x2000  /* IM_AIF1_ERR_EINT2 */
+#define ARIZONA_V2_IM_AIF1_ERR_EINT2_MASK             0x2000  /* IM_AIF1_ERR_EINT2 */
+#define ARIZONA_V2_IM_AIF1_ERR_EINT2_SHIFT                13  /* IM_AIF1_ERR_EINT2 */
+#define ARIZONA_V2_IM_AIF1_ERR_EINT2_WIDTH                 1  /* IM_AIF1_ERR_EINT2 */
+#define ARIZONA_V2_IM_CTRLIF_ERR_EINT2                0x1000  /* IM_CTRLIF_ERR_EINT2 */
+#define ARIZONA_V2_IM_CTRLIF_ERR_EINT2_MASK           0x1000  /* IM_CTRLIF_ERR_EINT2 */
+#define ARIZONA_V2_IM_CTRLIF_ERR_EINT2_SHIFT              12  /* IM_CTRLIF_ERR_EINT2 */
+#define ARIZONA_V2_IM_CTRLIF_ERR_EINT2_WIDTH               1  /* IM_CTRLIF_ERR_EINT2 */
+#define ARIZONA_V2_IM_MIXER_DROPPED_SAMPLE_EINT2      0x0800  /* IM_MIXER_DROPPED_SAMPLE_EINT2 */
+#define ARIZONA_V2_IM_MIXER_DROPPED_SAMPLE_EINT2_MASK 0x0800  /* IM_MIXER_DROPPED_SAMPLE_EINT2 */
+#define ARIZONA_V2_IM_MIXER_DROPPED_SAMPLE_EINT2_SHIFT    11  /* IM_MIXER_DROPPED_SAMPLE_EINT2 */
+#define ARIZONA_V2_IM_MIXER_DROPPED_SAMPLE_EINT2_WIDTH     1  /* IM_MIXER_DROPPED_SAMPLE_EINT2 */
+#define ARIZONA_V2_IM_ASYNC_CLK_ENA_LOW_EINT2         0x0400  /* IM_ASYNC_CLK_ENA_LOW_EINT2 */
+#define ARIZONA_V2_IM_ASYNC_CLK_ENA_LOW_EINT2_MASK    0x0400  /* IM_ASYNC_CLK_ENA_LOW_EINT2 */
+#define ARIZONA_V2_IM_ASYNC_CLK_ENA_LOW_EINT2_SHIFT       10  /* IM_ASYNC_CLK_ENA_LOW_EINT2 */
+#define ARIZONA_V2_IM_ASYNC_CLK_ENA_LOW_EINT2_WIDTH        1  /* IM_ASYNC_CLK_ENA_LOW_EINT2 */
+#define ARIZONA_V2_IM_SYSCLK_ENA_LOW_EINT2            0x0200  /* IM_SYSCLK_ENA_LOW_EINT2 */
+#define ARIZONA_V2_IM_SYSCLK_ENA_LOW_EINT2_MASK       0x0200  /* IM_SYSCLK_ENA_LOW_EINT2 */
+#define ARIZONA_V2_IM_SYSCLK_ENA_LOW_EINT2_SHIFT           9  /* IM_SYSCLK_ENA_LOW_EINT2 */
+#define ARIZONA_V2_IM_SYSCLK_ENA_LOW_EINT2_WIDTH           1  /* IM_SYSCLK_ENA_LOW_EINT2 */
+#define ARIZONA_V2_IM_ISRC1_CFG_ERR_EINT2             0x0100  /* IM_ISRC1_CFG_ERR_EINT2 */
+#define ARIZONA_V2_IM_ISRC1_CFG_ERR_EINT2_MASK        0x0100  /* IM_ISRC1_CFG_ERR_EINT2 */
+#define ARIZONA_V2_IM_ISRC1_CFG_ERR_EINT2_SHIFT            8  /* IM_ISRC1_CFG_ERR_EINT2 */
+#define ARIZONA_V2_IM_ISRC1_CFG_ERR_EINT2_WIDTH            1  /* IM_ISRC1_CFG_ERR_EINT2 */
+#define ARIZONA_V2_IM_ISRC2_CFG_ERR_EINT2             0x0080  /* IM_ISRC2_CFG_ERR_EINT2 */
+#define ARIZONA_V2_IM_ISRC2_CFG_ERR_EINT2_MASK        0x0080  /* IM_ISRC2_CFG_ERR_EINT2 */
+#define ARIZONA_V2_IM_ISRC2_CFG_ERR_EINT2_SHIFT            7  /* IM_ISRC2_CFG_ERR_EINT2 */
+#define ARIZONA_V2_IM_ISRC2_CFG_ERR_EINT2_WIDTH            1  /* IM_ISRC2_CFG_ERR_EINT2 */
+#define ARIZONA_V2_IM_ISRC3_CFG_ERR_EINT2             0x0040  /* IM_ISRC3_CFG_ERR_EINT2 */
+#define ARIZONA_V2_IM_ISRC3_CFG_ERR_EINT2_MASK        0x0040  /* IM_ISRC3_CFG_ERR_EINT2 */
+#define ARIZONA_V2_IM_ISRC3_CFG_ERR_EINT2_SHIFT            6  /* IM_ISRC3_CFG_ERR_EINT2 */
+#define ARIZONA_V2_IM_ISRC3_CFG_ERR_EINT2_WIDTH            1  /* IM_ISRC3_CFG_ERR_EINT2 */
+
 /*
  * R3356 (0xD1C) - IRQ2 Status 5 Mask
  */
@@ -5429,6 +5859,85 @@
 #define ARIZONA_IM_FLL1_CLOCK_OK_EINT2_SHIFT          0  /* IM_FLL1_CLOCK_OK_EINT2 */
 #define ARIZONA_IM_FLL1_CLOCK_OK_EINT2_WIDTH          1  /* IM_FLL1_CLOCK_OK_EINT2 */
 
+/*
+ * R3340 (0xD0C) - Interrupt Status 5 Mask (Alternate layout)
+ *
+ * Alternate layout used on later devices, note only fields that have moved
+ * are specified
+ */
+#define ARIZONA_V2_IM_ASRC_CFG_ERR_EINT2         0x0008  /* IM_ASRC_CFG_ERR_EINT2 */
+#define ARIZONA_V2_IM_ASRC_CFG_ERR_EINT2_MASK    0x0008  /* IM_ASRC_CFG_ERR_EINT2 */
+#define ARIZONA_V2_IM_ASRC_CFG_ERR_EINT2_SHIFT        3  /* IM_ASRC_CFG_ERR_EINT2 */
+#define ARIZONA_V2_IM_ASRC_CFG_ERR_EINT2_WIDTH        1  /* IM_ASRC_CFG_ERR_EINT2 */
+
+/*
+ * R3357 (0xD1D) - IRQ2 Status 6 Mask
+ */
+#define ARIZONA_IM_DSP_SHARED_WR_COLL_EINT2      0x8000  /* IM_DSP_SHARED_WR_COLL_EINT2 */
+#define ARIZONA_IM_DSP_SHARED_WR_COLL_EINT2_MASK 0x8000  /* IM_DSP_SHARED_WR_COLL_EINT2 */
+#define ARIZONA_IM_DSP_SHARED_WR_COLL_EINT2_SHIFT    15  /* IM_DSP_SHARED_WR_COLL_EINT2 */
+#define ARIZONA_IM_DSP_SHARED_WR_COLL_EINT2_WIDTH     1  /* IM_DSP_SHARED_WR_COLL_EINT2 */
+#define ARIZONA_IM_SPK_SHUTDOWN_EINT2            0x4000  /* IM_SPK_SHUTDOWN_EINT2 */
+#define ARIZONA_IM_SPK_SHUTDOWN_EINT2_MASK       0x4000  /* IM_SPK_SHUTDOWN_EINT2 */
+#define ARIZONA_IM_SPK_SHUTDOWN_EINT2_SHIFT          14  /* IM_SPK_SHUTDOWN_EINT2 */
+#define ARIZONA_IM_SPK_SHUTDOWN_EINT2_WIDTH           1  /* IM_SPK_SHUTDOWN_EINT2 */
+#define ARIZONA_IM_SPK1R_SHORT_EINT2             0x2000  /* IM_SPK1R_SHORT_EINT2 */
+#define ARIZONA_IM_SPK1R_SHORT_EINT2_MASK        0x2000  /* IM_SPK1R_SHORT_EINT2 */
+#define ARIZONA_IM_SPK1R_SHORT_EINT2_SHIFT           13  /* IM_SPK1R_SHORT_EINT2 */
+#define ARIZONA_IM_SPK1R_SHORT_EINT2_WIDTH            1  /* IM_SPK1R_SHORT_EINT2 */
+#define ARIZONA_IM_SPK1L_SHORT_EINT2             0x1000  /* IM_SPK1L_SHORT_EINT2 */
+#define ARIZONA_IM_SPK1L_SHORT_EINT2_MASK        0x1000  /* IM_SPK1L_SHORT_EINT2 */
+#define ARIZONA_IM_SPK1L_SHORT_EINT2_SHIFT           12  /* IM_SPK1L_SHORT_EINT2 */
+#define ARIZONA_IM_SPK1L_SHORT_EINT2_WIDTH            1  /* IM_SPK1L_SHORT_EINT2 */
+#define ARIZONA_IM_HP3R_SC_NEG_EINT2             0x0800  /* IM_HP3R_SC_NEG_EINT2 */
+#define ARIZONA_IM_HP3R_SC_NEG_EINT2_MASK        0x0800  /* IM_HP3R_SC_NEG_EINT2 */
+#define ARIZONA_IM_HP3R_SC_NEG_EINT2_SHIFT           11  /* IM_HP3R_SC_NEG_EINT2 */
+#define ARIZONA_IM_HP3R_SC_NEG_EINT2_WIDTH            1  /* IM_HP3R_SC_NEG_EINT2 */
+#define ARIZONA_IM_HP3R_SC_POS_EINT2             0x0400  /* IM_HP3R_SC_POS_EINT2 */
+#define ARIZONA_IM_HP3R_SC_POS_EINT2_MASK        0x0400  /* IM_HP3R_SC_POS_EINT2 */
+#define ARIZONA_IM_HP3R_SC_POS_EINT2_SHIFT           10  /* IM_HP3R_SC_POS_EINT2 */
+#define ARIZONA_IM_HP3R_SC_POS_EINT2_WIDTH            1  /* IM_HP3R_SC_POS_EINT2 */
+#define ARIZONA_IM_HP3L_SC_NEG_EINT2             0x0200  /* IM_HP3L_SC_NEG_EINT2 */
+#define ARIZONA_IM_HP3L_SC_NEG_EINT2_MASK        0x0200  /* IM_HP3L_SC_NEG_EINT2 */
+#define ARIZONA_IM_HP3L_SC_NEG_EINT2_SHIFT            9  /* IM_HP3L_SC_NEG_EINT2 */
+#define ARIZONA_IM_HP3L_SC_NEG_EINT2_WIDTH            1  /* IM_HP3L_SC_NEG_EINT2 */
+#define ARIZONA_IM_HP3L_SC_POS_EINT2             0x0100  /* IM_HP3L_SC_POS_EINT2 */
+#define ARIZONA_IM_HP3L_SC_POS_EINT2_MASK        0x0100  /* IM_HP3L_SC_POS_EINT2 */
+#define ARIZONA_IM_HP3L_SC_POS_EINT2_SHIFT            8  /* IM_HP3L_SC_POS_EINT2 */
+#define ARIZONA_IM_HP3L_SC_POS_EINT2_WIDTH            1  /* IM_HP3L_SC_POS_EINT2 */
+#define ARIZONA_IM_HP2R_SC_NEG_EINT2             0x0080  /* IM_HP2R_SC_NEG_EINT2 */
+#define ARIZONA_IM_HP2R_SC_NEG_EINT2_MASK        0x0080  /* IM_HP2R_SC_NEG_EINT2 */
+#define ARIZONA_IM_HP2R_SC_NEG_EINT2_SHIFT            7  /* IM_HP2R_SC_NEG_EINT2 */
+#define ARIZONA_IM_HP2R_SC_NEG_EINT2_WIDTH            1  /* IM_HP2R_SC_NEG_EINT2 */
+#define ARIZONA_IM_HP2R_SC_POS_EINT2             0x0040  /* IM_HP2R_SC_POS_EINT2 */
+#define ARIZONA_IM_HP2R_SC_POS_EINT2_MASK        0x0040  /* IM_HP2R_SC_POS_EINT2 */
+#define ARIZONA_IM_HP2R_SC_POS_EINT2_SHIFT            6  /* IM_HP2R_SC_POS_EINT2 */
+#define ARIZONA_IM_HP2R_SC_POS_EINT2_WIDTH            1  /* IM_HP2R_SC_POS_EINT2 */
+#define ARIZONA_IM_HP2L_SC_NEG_EINT2             0x0020  /* IM_HP2L_SC_NEG_EINT2 */
+#define ARIZONA_IM_HP2L_SC_NEG_EINT2_MASK        0x0020  /* IM_HP2L_SC_NEG_EINT2 */
+#define ARIZONA_IM_HP2L_SC_NEG_EINT2_SHIFT            5  /* IM_HP2L_SC_NEG_EINT2 */
+#define ARIZONA_IM_HP2L_SC_NEG_EINT2_WIDTH            1  /* IM_HP2L_SC_NEG_EINT2 */
+#define ARIZONA_IM_HP2L_SC_POS_EINT2             0x0010  /* IM_HP2L_SC_POS_EINT2 */
+#define ARIZONA_IM_HP2L_SC_POS_EINT2_MASK        0x0010  /* IM_HP2L_SC_POS_EINT2 */
+#define ARIZONA_IM_HP2L_SC_POS_EINT2_SHIFT            4  /* IM_HP2L_SC_POS_EINT2 */
+#define ARIZONA_IM_HP2L_SC_POS_EINT2_WIDTH            1  /* IM_HP2L_SC_POS_EINT2 */
+#define ARIZONA_IM_HP1R_SC_NEG_EINT2             0x0008  /* IM_HP1R_SC_NEG_EINT2 */
+#define ARIZONA_IM_HP1R_SC_NEG_EINT2_MASK        0x0008  /* IM_HP1R_SC_NEG_EINT2 */
+#define ARIZONA_IM_HP1R_SC_NEG_EINT2_SHIFT            3  /* IM_HP1R_SC_NEG_EINT2 */
+#define ARIZONA_IM_HP1R_SC_NEG_EINT2_WIDTH            1  /* IM_HP1R_SC_NEG_EINT2 */
+#define ARIZONA_IM_HP1R_SC_POS_EINT2             0x0004  /* IM_HP1R_SC_POS_EINT2 */
+#define ARIZONA_IM_HP1R_SC_POS_EINT2_MASK        0x0004  /* IM_HP1R_SC_POS_EINT2 */
+#define ARIZONA_IM_HP1R_SC_POS_EINT2_SHIFT            2  /* IM_HP1R_SC_POS_EINT2 */
+#define ARIZONA_IM_HP1R_SC_POS_EINT2_WIDTH            1  /* IM_HP1R_SC_POS_EINT2 */
+#define ARIZONA_IM_HP1L_SC_NEG_EINT2             0x0002  /* IM_HP1L_SC_NEG_EINT2 */
+#define ARIZONA_IM_HP1L_SC_NEG_EINT2_MASK        0x0002  /* IM_HP1L_SC_NEG_EINT2 */
+#define ARIZONA_IM_HP1L_SC_NEG_EINT2_SHIFT            1  /* IM_HP1L_SC_NEG_EINT2 */
+#define ARIZONA_IM_HP1L_SC_NEG_EINT2_WIDTH            1  /* IM_HP1L_SC_NEG_EINT2 */
+#define ARIZONA_IM_HP1L_SC_POS_EINT2             0x0001  /* IM_HP1L_SC_POS_EINT2 */
+#define ARIZONA_IM_HP1L_SC_POS_EINT2_MASK        0x0001  /* IM_HP1L_SC_POS_EINT2 */
+#define ARIZONA_IM_HP1L_SC_POS_EINT2_SHIFT            0  /* IM_HP1L_SC_POS_EINT2 */
+#define ARIZONA_IM_HP1L_SC_POS_EINT2_WIDTH            1  /* IM_HP1L_SC_POS_EINT2 */
+
 /*
  * R3359 (0xD1F) - IRQ2 Control
  */
@@ -5700,6 +6209,10 @@
 #define ARIZONA_ADSP2_1_OVERCLOCKED_STS_MASK     0x0008  /* ADSP2_1_OVERCLOCKED_STS */
 #define ARIZONA_ADSP2_1_OVERCLOCKED_STS_SHIFT         3  /* ADSP2_1_OVERCLOCKED_STS */
 #define ARIZONA_ADSP2_1_OVERCLOCKED_STS_WIDTH         1  /* ADSP2_1_OVERCLOCKED_STS */
+#define ARIZONA_ISRC3_OVERCLOCKED_STS            0x0004  /* ISRC3_OVERCLOCKED_STS */
+#define ARIZONA_ISRC3_OVERCLOCKED_STS_MASK       0x0004  /* ISRC3_OVERCLOCKED_STS */
+#define ARIZONA_ISRC3_OVERCLOCKED_STS_SHIFT           2  /* ISRC3_OVERCLOCKED_STS */
+#define ARIZONA_ISRC3_OVERCLOCKED_STS_WIDTH           1  /* ISRC3_OVERCLOCKED_STS */
 #define ARIZONA_ISRC2_OVERCLOCKED_STS            0x0002  /* ISRC2_OVERCLOCKED_STS */
 #define ARIZONA_ISRC2_OVERCLOCKED_STS_MASK       0x0002  /* ISRC2_OVERCLOCKED_STS */
 #define ARIZONA_ISRC2_OVERCLOCKED_STS_SHIFT           1  /* ISRC2_OVERCLOCKED_STS */
@@ -5724,6 +6237,10 @@
 #define ARIZONA_AIF1_UNDERCLOCKED_STS_MASK       0x0100  /* AIF1_UNDERCLOCKED_STS */
 #define ARIZONA_AIF1_UNDERCLOCKED_STS_SHIFT           8  /* AIF1_UNDERCLOCKED_STS */
 #define ARIZONA_AIF1_UNDERCLOCKED_STS_WIDTH           1  /* AIF1_UNDERCLOCKED_STS */
+#define ARIZONA_ISRC3_UNDERCLOCKED_STS           0x0080  /* ISRC3_UNDERCLOCKED_STS */
+#define ARIZONA_ISRC3_UNDERCLOCKED_STS_MASK      0x0080  /* ISRC3_UNDERCLOCKED_STS */
+#define ARIZONA_ISRC3_UNDERCLOCKED_STS_SHIFT          7  /* ISRC3_UNDERCLOCKED_STS */
+#define ARIZONA_ISRC3_UNDERCLOCKED_STS_WIDTH          1  /* ISRC3_UNDERCLOCKED_STS */
 #define ARIZONA_ISRC2_UNDERCLOCKED_STS           0x0040  /* ISRC2_UNDERCLOCKED_STS */
 #define ARIZONA_ISRC2_UNDERCLOCKED_STS_MASK      0x0040  /* ISRC2_UNDERCLOCKED_STS */
 #define ARIZONA_ISRC2_UNDERCLOCKED_STS_SHIFT          6  /* ISRC2_UNDERCLOCKED_STS */
@@ -5753,6 +6270,74 @@
 #define ARIZONA_MIXER_UNDERCLOCKED_STS_SHIFT          0  /* MIXER_UNDERCLOCKED_STS */
 #define ARIZONA_MIXER_UNDERCLOCKED_STS_WIDTH          1  /* MIXER_UNDERCLOCKED_STS */
 
+/*
+ * R3368 (0xD28) - Interrupt Raw Status 9
+ */
+#define ARIZONA_DSP_SHARED_WR_COLL_STS           0x8000  /* DSP_SHARED_WR_COLL_STS */
+#define ARIZONA_DSP_SHARED_WR_COLL_STS_MASK      0x8000  /* DSP_SHARED_WR_COLL_STS */
+#define ARIZONA_DSP_SHARED_WR_COLL_STS_SHIFT         15  /* DSP_SHARED_WR_COLL_STS */
+#define ARIZONA_DSP_SHARED_WR_COLL_STS_WIDTH          1  /* DSP_SHARED_WR_COLL_STS */
+#define ARIZONA_SPK_SHUTDOWN_STS                 0x4000  /* SPK_SHUTDOWN_STS */
+#define ARIZONA_SPK_SHUTDOWN_STS_MASK            0x4000  /* SPK_SHUTDOWN_STS */
+#define ARIZONA_SPK_SHUTDOWN_STS_SHIFT               14  /* SPK_SHUTDOWN_STS */
+#define ARIZONA_SPK_SHUTDOWN_STS_WIDTH                1  /* SPK_SHUTDOWN_STS */
+#define ARIZONA_SPK1R_SHORT_STS                  0x2000  /* SPK1R_SHORT_STS */
+#define ARIZONA_SPK1R_SHORT_STS_MASK             0x2000  /* SPK1R_SHORT_STS */
+#define ARIZONA_SPK1R_SHORT_STS_SHIFT                13  /* SPK1R_SHORT_STS */
+#define ARIZONA_SPK1R_SHORT_STS_WIDTH                 1  /* SPK1R_SHORT_STS */
+#define ARIZONA_SPK1L_SHORT_STS                  0x1000  /* SPK1L_SHORT_STS */
+#define ARIZONA_SPK1L_SHORT_STS_MASK             0x1000  /* SPK1L_SHORT_STS */
+#define ARIZONA_SPK1L_SHORT_STS_SHIFT                12  /* SPK1L_SHORT_STS */
+#define ARIZONA_SPK1L_SHORT_STS_WIDTH                 1  /* SPK1L_SHORT_STS */
+#define ARIZONA_HP3R_SC_NEG_STS                  0x0800  /* HP3R_SC_NEG_STS */
+#define ARIZONA_HP3R_SC_NEG_STS_MASK             0x0800  /* HP3R_SC_NEG_STS */
+#define ARIZONA_HP3R_SC_NEG_STS_SHIFT                11  /* HP3R_SC_NEG_STS */
+#define ARIZONA_HP3R_SC_NEG_STS_WIDTH                 1  /* HP3R_SC_NEG_STS */
+#define ARIZONA_HP3R_SC_POS_STS                  0x0400  /* HP3R_SC_POS_STS */
+#define ARIZONA_HP3R_SC_POS_STS_MASK             0x0400  /* HP3R_SC_POS_STS */
+#define ARIZONA_HP3R_SC_POS_STS_SHIFT                10  /* HP3R_SC_POS_STS */
+#define ARIZONA_HP3R_SC_POS_STS_WIDTH                 1  /* HP3R_SC_POS_STS */
+#define ARIZONA_HP3L_SC_NEG_STS                  0x0200  /* HP3L_SC_NEG_STS */
+#define ARIZONA_HP3L_SC_NEG_STS_MASK             0x0200  /* HP3L_SC_NEG_STS */
+#define ARIZONA_HP3L_SC_NEG_STS_SHIFT                 9  /* HP3L_SC_NEG_STS */
+#define ARIZONA_HP3L_SC_NEG_STS_WIDTH                 1  /* HP3L_SC_NEG_STS */
+#define ARIZONA_HP3L_SC_POS_STS                  0x0100  /* HP3L_SC_POS_STS */
+#define ARIZONA_HP3L_SC_POS_STS_MASK             0x0100  /* HP3L_SC_POS_STS */
+#define ARIZONA_HP3L_SC_POS_STS_SHIFT                 8  /* HP3L_SC_POS_STS */
+#define ARIZONA_HP3L_SC_POS_STS_WIDTH                 1  /* HP3L_SC_POS_STS */
+#define ARIZONA_HP2R_SC_NEG_STS                  0x0080  /* HP2R_SC_NEG_STS */
+#define ARIZONA_HP2R_SC_NEG_STS_MASK             0x0080  /* HP2R_SC_NEG_STS */
+#define ARIZONA_HP2R_SC_NEG_STS_SHIFT                 7  /* HP2R_SC_NEG_STS */
+#define ARIZONA_HP2R_SC_NEG_STS_WIDTH                 1  /* HP2R_SC_NEG_STS */
+#define ARIZONA_HP2R_SC_POS_STS                  0x0040  /* HP2R_SC_POS_STS */
+#define ARIZONA_HP2R_SC_POS_STS_MASK             0x0040  /* HP2R_SC_POS_STS */
+#define ARIZONA_HP2R_SC_POS_STS_SHIFT                 6  /* HP2R_SC_POS_STS */
+#define ARIZONA_HP2R_SC_POS_STS_WIDTH                 1  /* HP2R_SC_POS_STS */
+#define ARIZONA_HP2L_SC_NEG_STS                  0x0020  /* HP2L_SC_NEG_STS */
+#define ARIZONA_HP2L_SC_NEG_STS_MASK             0x0020  /* HP2L_SC_NEG_STS */
+#define ARIZONA_HP2L_SC_NEG_STS_SHIFT                 5  /* HP2L_SC_NEG_STS */
+#define ARIZONA_HP2L_SC_NEG_STS_WIDTH                 1  /* HP2L_SC_NEG_STS */
+#define ARIZONA_HP2L_SC_POS_STS                  0x0010  /* HP2L_SC_POS_STS */
+#define ARIZONA_HP2L_SC_POS_STS_MASK             0x0010  /* HP2L_SC_POS_STS */
+#define ARIZONA_HP2L_SC_POS_STS_SHIFT                 4  /* HP2L_SC_POS_STS */
+#define ARIZONA_HP2L_SC_POS_STS_WIDTH                 1  /* HP2L_SC_POS_STS */
+#define ARIZONA_HP1R_SC_NEG_STS                  0x0008  /* HP1R_SC_NEG_STS */
+#define ARIZONA_HP1R_SC_NEG_STS_MASK             0x0008  /* HP1R_SC_NEG_STS */
+#define ARIZONA_HP1R_SC_NEG_STS_SHIFT                 3  /* HP1R_SC_NEG_STS */
+#define ARIZONA_HP1R_SC_NEG_STS_WIDTH                 1  /* HP1R_SC_NEG_STS */
+#define ARIZONA_HP1R_SC_POS_STS                  0x0004  /* HP1R_SC_POS_STS */
+#define ARIZONA_HP1R_SC_POS_STS_MASK             0x0004  /* HP1R_SC_POS_STS */
+#define ARIZONA_HP1R_SC_POS_STS_SHIFT                 2  /* HP1R_SC_POS_STS */
+#define ARIZONA_HP1R_SC_POS_STS_WIDTH                 1  /* HP1R_SC_POS_STS */
+#define ARIZONA_HP1L_SC_NEG_STS                  0x0002  /* HP1L_SC_NEG_STS */
+#define ARIZONA_HP1L_SC_NEG_STS_MASK             0x0002  /* HP1L_SC_NEG_STS */
+#define ARIZONA_HP1L_SC_NEG_STS_SHIFT                 1  /* HP1L_SC_NEG_STS */
+#define ARIZONA_HP1L_SC_NEG_STS_WIDTH                 1  /* HP1L_SC_NEG_STS */
+#define ARIZONA_HP1L_SC_POS_STS                  0x0001  /* HP1L_SC_POS_STS */
+#define ARIZONA_HP1L_SC_POS_STS_MASK             0x0001  /* HP1L_SC_POS_STS */
+#define ARIZONA_HP1L_SC_POS_STS_SHIFT                 0  /* HP1L_SC_POS_STS */
+#define ARIZONA_HP1L_SC_POS_STS_WIDTH                 1  /* HP1L_SC_POS_STS */
+
 /*
  * R3392 (0xD40) - IRQ Pin Status
  */
-- 
cgit v1.2.3


From 30a2af3a320d5c0598cde08ba6e5d22a724f82e4 Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Date: Tue, 15 Jul 2014 11:21:50 +0100
Subject: mfd: arizona: Only free the CTRLIF_ERR IRQ if we requested it

We only request the control interface error IRQ if we set ctrlif_error,
as such we should only free it in that situation. Otherwise we will
attempt to free an IRQ we never requested and get a warning from the IRQ
core.

This patch moves the ctrlif_error variable into the arizona structure
and checks it in all cases we free the control interface error IRQ.

Signed-off-by: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/arizona-irq.c        | 19 ++++++++++++-------
 include/linux/mfd/arizona/core.h |  2 ++
 2 files changed, 14 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/arizona-irq.c b/drivers/mfd/arizona-irq.c
index e780bc40165d..d420dbc0e2b0 100644
--- a/drivers/mfd/arizona-irq.c
+++ b/drivers/mfd/arizona-irq.c
@@ -188,16 +188,17 @@ int arizona_irq_init(struct arizona *arizona)
 	int flags = IRQF_ONESHOT;
 	int ret, i;
 	const struct regmap_irq_chip *aod, *irq;
-	bool ctrlif_error = true;
 	struct irq_data *irq_data;
 
+	arizona->ctrlif_error = true;
+
 	switch (arizona->type) {
 #ifdef CONFIG_MFD_WM5102
 	case WM5102:
 		aod = &wm5102_aod;
 		irq = &wm5102_irq;
 
-		ctrlif_error = false;
+		arizona->ctrlif_error = false;
 		break;
 #endif
 #ifdef CONFIG_MFD_WM5110
@@ -213,7 +214,7 @@ int arizona_irq_init(struct arizona *arizona)
 			break;
 		}
 
-		ctrlif_error = false;
+		arizona->ctrlif_error = false;
 		break;
 #endif
 #ifdef CONFIG_MFD_WM8997
@@ -221,7 +222,7 @@ int arizona_irq_init(struct arizona *arizona)
 		aod = &wm8997_aod;
 		irq = &wm8997_irq;
 
-		ctrlif_error = false;
+		arizona->ctrlif_error = false;
 		break;
 #endif
 	default:
@@ -308,7 +309,7 @@ int arizona_irq_init(struct arizona *arizona)
 	}
 
 	/* Handle control interface errors in the core */
-	if (ctrlif_error) {
+	if (arizona->ctrlif_error) {
 		i = arizona_map_irq(arizona, ARIZONA_IRQ_CTRLIF_ERR);
 		ret = request_threaded_irq(i, NULL, arizona_ctrlif_err,
 					   IRQF_ONESHOT,
@@ -353,7 +354,9 @@ int arizona_irq_init(struct arizona *arizona)
 	return 0;
 
 err_main_irq:
-	free_irq(arizona_map_irq(arizona, ARIZONA_IRQ_CTRLIF_ERR), arizona);
+	if (arizona->ctrlif_error)
+		free_irq(arizona_map_irq(arizona, ARIZONA_IRQ_CTRLIF_ERR),
+			 arizona);
 err_ctrlif:
 	free_irq(arizona_map_irq(arizona, ARIZONA_IRQ_BOOT_DONE), arizona);
 err_boot_done:
@@ -369,7 +372,9 @@ err:
 
 int arizona_irq_exit(struct arizona *arizona)
 {
-	free_irq(arizona_map_irq(arizona, ARIZONA_IRQ_CTRLIF_ERR), arizona);
+	if (arizona->ctrlif_error)
+		free_irq(arizona_map_irq(arizona, ARIZONA_IRQ_CTRLIF_ERR),
+			 arizona);
 	free_irq(arizona_map_irq(arizona, ARIZONA_IRQ_BOOT_DONE), arizona);
 	regmap_del_irq_chip(irq_create_mapping(arizona->virq, 1),
 			    arizona->irq_chip);
diff --git a/include/linux/mfd/arizona/core.h b/include/linux/mfd/arizona/core.h
index 8bc7601cca68..fdd8b7b82db5 100644
--- a/include/linux/mfd/arizona/core.h
+++ b/include/linux/mfd/arizona/core.h
@@ -132,6 +132,8 @@ struct arizona {
 	struct mutex clk_lock;
 	int clk32k_ref;
 
+	bool ctrlif_error;
+
 	struct snd_soc_dapm_context *dapm;
 };
 
-- 
cgit v1.2.3


From 0a6d315827eedc733d404ecff3cd4cc0e6437865 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Thu, 24 Jul 2014 20:08:55 +0200
Subject: gpio: split gpiod board registration into machine header

As per example from the regulator subsystem: put all defines and
functions related to registering board info for GPIO descriptors
into a separate <linux/gpio/machine.h> header.

Cc: Andrew Victor <linux@maxim.org.za>
Cc: Nicolas Ferre <nicolas.ferre@atmel.com>
Cc: Jean-Christophe Plagniol-Villard <plagnioj@jcrosoft.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Thierry Reding <thierry.reding@gmail.com>
Acked-by: Stephen Warren <swarren@wwwdotorg.org>
Reviewed-by: Alexandre Courbot <gnurou@gmail.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 Documentation/gpio/board.txt            |  2 +-
 arch/arm/mach-at91/at91rm9200_devices.c |  2 +-
 arch/arm/mach-tegra/board-paz00.c       |  2 +-
 arch/mips/jz4740/board-qi_lb60.c        |  1 +
 drivers/gpio/gpiolib.c                  |  1 +
 include/linux/gpio/driver.h             | 54 ------------------------------
 include/linux/gpio/machine.h            | 58 +++++++++++++++++++++++++++++++++
 7 files changed, 63 insertions(+), 57 deletions(-)
 create mode 100644 include/linux/gpio/machine.h

(limited to 'include/linux')

diff --git a/Documentation/gpio/board.txt b/Documentation/gpio/board.txt
index ba169faad5c6..4452786225b8 100644
--- a/Documentation/gpio/board.txt
+++ b/Documentation/gpio/board.txt
@@ -60,7 +60,7 @@ Platform Data
 Finally, GPIOs can be bound to devices and functions using platform data. Board
 files that desire to do so need to include the following header:
 
-	#include <linux/gpio/driver.h>
+	#include <linux/gpio/machine.h>
 
 GPIOs are mapped by the means of tables of lookups, containing instances of the
 gpiod_lookup structure. Two macros are defined to help declaring such mappings:
diff --git a/arch/arm/mach-at91/at91rm9200_devices.c b/arch/arm/mach-at91/at91rm9200_devices.c
index 3f4bb58aea54..74f1eaf97801 100644
--- a/arch/arm/mach-at91/at91rm9200_devices.c
+++ b/arch/arm/mach-at91/at91rm9200_devices.c
@@ -15,7 +15,7 @@
 
 #include <linux/dma-mapping.h>
 #include <linux/gpio.h>
-#include <linux/gpio/driver.h>
+#include <linux/gpio/machine.h>
 #include <linux/platform_device.h>
 #include <linux/i2c-gpio.h>
 
diff --git a/arch/arm/mach-tegra/board-paz00.c b/arch/arm/mach-tegra/board-paz00.c
index 9c6029ba526f..91fd858ced0d 100644
--- a/arch/arm/mach-tegra/board-paz00.c
+++ b/arch/arm/mach-tegra/board-paz00.c
@@ -18,7 +18,7 @@
  */
 
 #include <linux/platform_device.h>
-#include <linux/gpio/driver.h>
+#include <linux/gpio/machine.h>
 #include <linux/rfkill-gpio.h>
 #include "board.h"
 
diff --git a/arch/mips/jz4740/board-qi_lb60.c b/arch/mips/jz4740/board-qi_lb60.c
index 088e92a79ae6..c454525e7695 100644
--- a/arch/mips/jz4740/board-qi_lb60.c
+++ b/arch/mips/jz4740/board-qi_lb60.c
@@ -15,6 +15,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/gpio.h>
+#include <linux/gpio/machine.h>
 
 #include <linux/input.h>
 #include <linux/gpio_keys.h>
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 768f0831db18..18b069e6ba03 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -14,6 +14,7 @@
 #include <linux/slab.h>
 #include <linux/acpi.h>
 #include <linux/gpio/driver.h>
+#include <linux/gpio/machine.h>
 
 #include "gpiolib.h"
 
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index 4c463fb0155e..e78a2373e374 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -151,60 +151,6 @@ void gpio_unlock_as_irq(struct gpio_chip *chip, unsigned int offset);
 
 struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc);
 
-enum gpio_lookup_flags {
-	GPIO_ACTIVE_HIGH = (0 << 0),
-	GPIO_ACTIVE_LOW = (1 << 0),
-	GPIO_OPEN_DRAIN = (1 << 1),
-	GPIO_OPEN_SOURCE = (1 << 2),
-};
-
-/**
- * struct gpiod_lookup - lookup table
- * @chip_label: name of the chip the GPIO belongs to
- * @chip_hwnum: hardware number (i.e. relative to the chip) of the GPIO
- * @con_id: name of the GPIO from the device's point of view
- * @idx: index of the GPIO in case several GPIOs share the same name
- * @flags: mask of GPIO_* values
- *
- * gpiod_lookup is a lookup table for associating GPIOs to specific devices and
- * functions using platform data.
- */
-struct gpiod_lookup {
-	const char *chip_label;
-	u16 chip_hwnum;
-	const char *con_id;
-	unsigned int idx;
-	enum gpio_lookup_flags flags;
-};
-
-struct gpiod_lookup_table {
-	struct list_head list;
-	const char *dev_id;
-	struct gpiod_lookup table[];
-};
-
-/*
- * Simple definition of a single GPIO under a con_id
- */
-#define GPIO_LOOKUP(_chip_label, _chip_hwnum, _con_id, _flags) \
-	GPIO_LOOKUP_IDX(_chip_label, _chip_hwnum, _con_id, 0, _flags)
-
-/*
- * Use this macro if you need to have several GPIOs under the same con_id.
- * Each GPIO needs to use a different index and can be accessed using
- * gpiod_get_index()
- */
-#define GPIO_LOOKUP_IDX(_chip_label, _chip_hwnum, _con_id, _idx, _flags)  \
-{                                                                         \
-	.chip_label = _chip_label,                                        \
-	.chip_hwnum = _chip_hwnum,                                        \
-	.con_id = _con_id,                                                \
-	.idx = _idx,                                                      \
-	.flags = _flags,                                                  \
-}
-
-void gpiod_add_lookup_table(struct gpiod_lookup_table *table);
-
 #ifdef CONFIG_GPIOLIB_IRQCHIP
 
 void gpiochip_set_chained_irqchip(struct gpio_chip *gpiochip,
diff --git a/include/linux/gpio/machine.h b/include/linux/gpio/machine.h
new file mode 100644
index 000000000000..b8ad87fab4ce
--- /dev/null
+++ b/include/linux/gpio/machine.h
@@ -0,0 +1,58 @@
+#ifndef __LINUX_GPIO_MACHINE_H
+#define __LINUX_GPIO_MACHINE_H
+
+enum gpio_lookup_flags {
+	GPIO_ACTIVE_HIGH = (0 << 0),
+	GPIO_ACTIVE_LOW = (1 << 0),
+	GPIO_OPEN_DRAIN = (1 << 1),
+	GPIO_OPEN_SOURCE = (1 << 2),
+};
+
+/**
+ * struct gpiod_lookup - lookup table
+ * @chip_label: name of the chip the GPIO belongs to
+ * @chip_hwnum: hardware number (i.e. relative to the chip) of the GPIO
+ * @con_id: name of the GPIO from the device's point of view
+ * @idx: index of the GPIO in case several GPIOs share the same name
+ * @flags: mask of GPIO_* values
+ *
+ * gpiod_lookup is a lookup table for associating GPIOs to specific devices and
+ * functions using platform data.
+ */
+struct gpiod_lookup {
+	const char *chip_label;
+	u16 chip_hwnum;
+	const char *con_id;
+	unsigned int idx;
+	enum gpio_lookup_flags flags;
+};
+
+struct gpiod_lookup_table {
+	struct list_head list;
+	const char *dev_id;
+	struct gpiod_lookup table[];
+};
+
+/*
+ * Simple definition of a single GPIO under a con_id
+ */
+#define GPIO_LOOKUP(_chip_label, _chip_hwnum, _con_id, _flags) \
+	GPIO_LOOKUP_IDX(_chip_label, _chip_hwnum, _con_id, 0, _flags)
+
+/*
+ * Use this macro if you need to have several GPIOs under the same con_id.
+ * Each GPIO needs to use a different index and can be accessed using
+ * gpiod_get_index()
+ */
+#define GPIO_LOOKUP_IDX(_chip_label, _chip_hwnum, _con_id, _idx, _flags)  \
+{                                                                         \
+	.chip_label = _chip_label,                                        \
+	.chip_hwnum = _chip_hwnum,                                        \
+	.con_id = _con_id,                                                \
+	.idx = _idx,                                                      \
+	.flags = _flags,                                                  \
+}
+
+void gpiod_add_lookup_table(struct gpiod_lookup_table *table);
+
+#endif /* __LINUX_GPIO_MACHINE_H */
-- 
cgit v1.2.3


From 39b2bbe3d715cf5013b5c48695ccdd25bd3bf120 Mon Sep 17 00:00:00 2001
From: Alexandre Courbot <acourbot@nvidia.com>
Date: Fri, 25 Jul 2014 23:38:36 +0900
Subject: gpio: add flags argument to gpiod_get*() functions

The huge majority of GPIOs have their direction and initial value set
right after being obtained by one of the gpiod_get() functions. The
integer GPIO API had gpio_request_one() that took a convenience flags
parameter allowing to specify an direction and value applied to the
returned GPIO. This feature greatly simplifies client code and ensures
errors are always handled properly.

A similar feature has been requested for the gpiod API. Since setting
the direction of a GPIO is so often the very next action done after
obtaining its descriptor, we prefer to extend the existing functions
instead of introducing new functions that would raise the
number of gpiod getters to 16 (!).

The drawback of this approach is that all gpiod clients need to be
updated. To limit the pain, temporary macros are introduced that allow
gpiod_get*() to be called with or without the extra flags argument. They
will be removed once all consumer code has been updated.

Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
Reviewed-by: Mark Brown <broonie@linaro.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 Documentation/gpio/consumer.txt | 26 ++++++++++---
 drivers/gpio/devres.c           | 40 ++++++++++++--------
 drivers/gpio/gpiolib.c          | 67 +++++++++++++++++++++++-----------
 include/linux/gpio/consumer.h   | 81 +++++++++++++++++++++++++++++++++--------
 4 files changed, 155 insertions(+), 59 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/gpio/consumer.txt b/Documentation/gpio/consumer.txt
index d8abfc31abbe..76546324e968 100644
--- a/Documentation/gpio/consumer.txt
+++ b/Documentation/gpio/consumer.txt
@@ -29,13 +29,24 @@ gpiod_get() functions. Like many other kernel subsystems, gpiod_get() takes the
 device that will use the GPIO and the function the requested GPIO is supposed to
 fulfill:
 
-	struct gpio_desc *gpiod_get(struct device *dev, const char *con_id)
+	struct gpio_desc *gpiod_get(struct device *dev, const char *con_id,
+				    enum gpiod_flags flags)
 
 If a function is implemented by using several GPIOs together (e.g. a simple LED
 device that displays digits), an additional index argument can be specified:
 
 	struct gpio_desc *gpiod_get_index(struct device *dev,
-					  const char *con_id, unsigned int idx)
+					  const char *con_id, unsigned int idx,
+					  enum gpiod_flags flags)
+
+The flags parameter is used to optionally specify a direction and initial value
+for the GPIO. Values can be:
+
+* GPIOD_ASIS or 0 to not initialize the GPIO at all. The direction must be set
+  later with one of the dedicated functions.
+* GPIOD_IN to initialize the GPIO as input.
+* GPIOD_OUT_LOW to initialize the GPIO as output with a value of 0.
+* GPIOD_OUT_HIGH to initialize the GPIO as output with a value of 1.
 
 Both functions return either a valid GPIO descriptor, or an error code checkable
 with IS_ERR() (they will never return a NULL pointer). -ENOENT will be returned
@@ -46,11 +57,13 @@ errors and an absence of GPIO for optional GPIO parameters.
 
 Device-managed variants of these functions are also defined:
 
-	struct gpio_desc *devm_gpiod_get(struct device *dev, const char *con_id)
+	struct gpio_desc *devm_gpiod_get(struct device *dev, const char *con_id,
+					 enum gpiod_flags flags)
 
 	struct gpio_desc *devm_gpiod_get_index(struct device *dev,
 					       const char *con_id,
-					       unsigned int idx)
+					       unsigned int idx,
+					       enum gpiod_flags flags)
 
 A GPIO descriptor can be disposed of using the gpiod_put() function:
 
@@ -67,8 +80,9 @@ Using GPIOs
 
 Setting Direction
 -----------------
-The first thing a driver must do with a GPIO is setting its direction. This is
-done by invoking one of the gpiod_direction_*() functions:
+The first thing a driver must do with a GPIO is setting its direction. If no
+direction-setting flags have been given to gpiod_get*(), this is done by
+invoking one of the gpiod_direction_*() functions:
 
 	int gpiod_direction_input(struct gpio_desc *desc)
 	int gpiod_direction_output(struct gpio_desc *desc, int value)
diff --git a/drivers/gpio/devres.c b/drivers/gpio/devres.c
index 65978cf85f79..41b2f40578d5 100644
--- a/drivers/gpio/devres.c
+++ b/drivers/gpio/devres.c
@@ -39,47 +39,53 @@ static int devm_gpiod_match(struct device *dev, void *res, void *data)
  * devm_gpiod_get - Resource-managed gpiod_get()
  * @dev:	GPIO consumer
  * @con_id:	function within the GPIO consumer
+ * @flags:	optional GPIO initialization flags
  *
  * Managed gpiod_get(). GPIO descriptors returned from this function are
  * automatically disposed on driver detach. See gpiod_get() for detailed
  * information about behavior and return values.
  */
-struct gpio_desc *__must_check devm_gpiod_get(struct device *dev,
-					      const char *con_id)
+struct gpio_desc *__must_check __devm_gpiod_get(struct device *dev,
+					      const char *con_id,
+					      enum gpiod_flags flags)
 {
-	return devm_gpiod_get_index(dev, con_id, 0);
+	return devm_gpiod_get_index(dev, con_id, 0, flags);
 }
-EXPORT_SYMBOL(devm_gpiod_get);
+EXPORT_SYMBOL(__devm_gpiod_get);
 
 /**
  * devm_gpiod_get_optional - Resource-managed gpiod_get_optional()
  * @dev: GPIO consumer
  * @con_id: function within the GPIO consumer
+ * @flags: optional GPIO initialization flags
  *
  * Managed gpiod_get_optional(). GPIO descriptors returned from this function
  * are automatically disposed on driver detach. See gpiod_get_optional() for
  * detailed information about behavior and return values.
  */
-struct gpio_desc *__must_check devm_gpiod_get_optional(struct device *dev,
-						       const char *con_id)
+struct gpio_desc *__must_check __devm_gpiod_get_optional(struct device *dev,
+						       const char *con_id,
+						       enum gpiod_flags flags)
 {
-	return devm_gpiod_get_index_optional(dev, con_id, 0);
+	return devm_gpiod_get_index_optional(dev, con_id, 0, flags);
 }
-EXPORT_SYMBOL(devm_gpiod_get_optional);
+EXPORT_SYMBOL(__devm_gpiod_get_optional);
 
 /**
  * devm_gpiod_get_index - Resource-managed gpiod_get_index()
  * @dev:	GPIO consumer
  * @con_id:	function within the GPIO consumer
  * @idx:	index of the GPIO to obtain in the consumer
+ * @flags:	optional GPIO initialization flags
  *
  * Managed gpiod_get_index(). GPIO descriptors returned from this function are
  * automatically disposed on driver detach. See gpiod_get_index() for detailed
  * information about behavior and return values.
  */
-struct gpio_desc *__must_check devm_gpiod_get_index(struct device *dev,
+struct gpio_desc *__must_check __devm_gpiod_get_index(struct device *dev,
 						    const char *con_id,
-						    unsigned int idx)
+						    unsigned int idx,
+						    enum gpiod_flags flags)
 {
 	struct gpio_desc **dr;
 	struct gpio_desc *desc;
@@ -89,7 +95,7 @@ struct gpio_desc *__must_check devm_gpiod_get_index(struct device *dev,
 	if (!dr)
 		return ERR_PTR(-ENOMEM);
 
-	desc = gpiod_get_index(dev, con_id, idx);
+	desc = gpiod_get_index(dev, con_id, idx, flags);
 	if (IS_ERR(desc)) {
 		devres_free(dr);
 		return desc;
@@ -100,26 +106,28 @@ struct gpio_desc *__must_check devm_gpiod_get_index(struct device *dev,
 
 	return desc;
 }
-EXPORT_SYMBOL(devm_gpiod_get_index);
+EXPORT_SYMBOL(__devm_gpiod_get_index);
 
 /**
  * devm_gpiod_get_index_optional - Resource-managed gpiod_get_index_optional()
  * @dev: GPIO consumer
  * @con_id: function within the GPIO consumer
  * @index: index of the GPIO to obtain in the consumer
+ * @flags: optional GPIO initialization flags
  *
  * Managed gpiod_get_index_optional(). GPIO descriptors returned from this
  * function are automatically disposed on driver detach. See
  * gpiod_get_index_optional() for detailed information about behavior and
  * return values.
  */
-struct gpio_desc *__must_check devm_gpiod_get_index_optional(struct device *dev,
+struct gpio_desc *__must_check __devm_gpiod_get_index_optional(struct device *dev,
 							     const char *con_id,
-							     unsigned int index)
+							     unsigned int index,
+							 enum gpiod_flags flags)
 {
 	struct gpio_desc *desc;
 
-	desc = devm_gpiod_get_index(dev, con_id, index);
+	desc = devm_gpiod_get_index(dev, con_id, index, flags);
 	if (IS_ERR(desc)) {
 		if (PTR_ERR(desc) == -ENOENT)
 			return NULL;
@@ -127,7 +135,7 @@ struct gpio_desc *__must_check devm_gpiod_get_index_optional(struct device *dev,
 
 	return desc;
 }
-EXPORT_SYMBOL(devm_gpiod_get_index_optional);
+EXPORT_SYMBOL(__devm_gpiod_get_index_optional);
 
 /**
  * devm_gpiod_put - Resource-managed gpiod_put()
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 330227581a25..15cc0bb65dda 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -1582,38 +1582,43 @@ static struct gpio_desc *gpiod_find(struct device *dev, const char *con_id,
  * gpiod_get - obtain a GPIO for a given GPIO function
  * @dev:	GPIO consumer, can be NULL for system-global GPIOs
  * @con_id:	function within the GPIO consumer
+ * @flags:	optional GPIO initialization flags
  *
  * Return the GPIO descriptor corresponding to the function con_id of device
  * dev, -ENOENT if no GPIO has been assigned to the requested function, or
  * another IS_ERR() code if an error occured while trying to acquire the GPIO.
  */
-struct gpio_desc *__must_check gpiod_get(struct device *dev, const char *con_id)
+struct gpio_desc *__must_check __gpiod_get(struct device *dev, const char *con_id,
+					 enum gpiod_flags flags)
 {
-	return gpiod_get_index(dev, con_id, 0);
+	return gpiod_get_index(dev, con_id, 0, flags);
 }
-EXPORT_SYMBOL_GPL(gpiod_get);
+EXPORT_SYMBOL_GPL(__gpiod_get);
 
 /**
  * gpiod_get_optional - obtain an optional GPIO for a given GPIO function
  * @dev: GPIO consumer, can be NULL for system-global GPIOs
  * @con_id: function within the GPIO consumer
+ * @flags: optional GPIO initialization flags
  *
  * This is equivalent to gpiod_get(), except that when no GPIO was assigned to
  * the requested function it will return NULL. This is convenient for drivers
  * that need to handle optional GPIOs.
  */
-struct gpio_desc *__must_check gpiod_get_optional(struct device *dev,
-						  const char *con_id)
+struct gpio_desc *__must_check __gpiod_get_optional(struct device *dev,
+						  const char *con_id,
+						  enum gpiod_flags flags)
 {
-	return gpiod_get_index_optional(dev, con_id, 0);
+	return gpiod_get_index_optional(dev, con_id, 0, flags);
 }
-EXPORT_SYMBOL_GPL(gpiod_get_optional);
+EXPORT_SYMBOL_GPL(__gpiod_get_optional);
 
 /**
  * gpiod_get_index - obtain a GPIO from a multi-index GPIO function
  * @dev:	GPIO consumer, can be NULL for system-global GPIOs
  * @con_id:	function within the GPIO consumer
  * @idx:	index of the GPIO to obtain in the consumer
+ * @flags:	optional GPIO initialization flags
  *
  * This variant of gpiod_get() allows to access GPIOs other than the first
  * defined one for functions that define several GPIOs.
@@ -1622,23 +1627,24 @@ EXPORT_SYMBOL_GPL(gpiod_get_optional);
  * requested function and/or index, or another IS_ERR() code if an error
  * occured while trying to acquire the GPIO.
  */
-struct gpio_desc *__must_check gpiod_get_index(struct device *dev,
+struct gpio_desc *__must_check __gpiod_get_index(struct device *dev,
 					       const char *con_id,
-					       unsigned int idx)
+					       unsigned int idx,
+					       enum gpiod_flags flags)
 {
 	struct gpio_desc *desc = NULL;
 	int status;
-	enum gpio_lookup_flags flags = 0;
+	enum gpio_lookup_flags lookupflags = 0;
 
 	dev_dbg(dev, "GPIO lookup for consumer %s\n", con_id);
 
 	/* Using device tree? */
 	if (IS_ENABLED(CONFIG_OF) && dev && dev->of_node) {
 		dev_dbg(dev, "using device tree for GPIO lookup\n");
-		desc = of_find_gpio(dev, con_id, idx, &flags);
+		desc = of_find_gpio(dev, con_id, idx, &lookupflags);
 	} else if (IS_ENABLED(CONFIG_ACPI) && dev && ACPI_HANDLE(dev)) {
 		dev_dbg(dev, "using ACPI for GPIO lookup\n");
-		desc = acpi_find_gpio(dev, con_id, idx, &flags);
+		desc = acpi_find_gpio(dev, con_id, idx, &lookupflags);
 	}
 
 	/*
@@ -1647,7 +1653,7 @@ struct gpio_desc *__must_check gpiod_get_index(struct device *dev,
 	 */
 	if (!desc || desc == ERR_PTR(-ENOENT)) {
 		dev_dbg(dev, "using lookup tables for GPIO lookup");
-		desc = gpiod_find(dev, con_id, idx, &flags);
+		desc = gpiod_find(dev, con_id, idx, &lookupflags);
 	}
 
 	if (IS_ERR(desc)) {
@@ -1660,16 +1666,33 @@ struct gpio_desc *__must_check gpiod_get_index(struct device *dev,
 	if (status < 0)
 		return ERR_PTR(status);
 
-	if (flags & GPIO_ACTIVE_LOW)
+	if (lookupflags & GPIO_ACTIVE_LOW)
 		set_bit(FLAG_ACTIVE_LOW, &desc->flags);
-	if (flags & GPIO_OPEN_DRAIN)
+	if (lookupflags & GPIO_OPEN_DRAIN)
 		set_bit(FLAG_OPEN_DRAIN, &desc->flags);
-	if (flags & GPIO_OPEN_SOURCE)
+	if (lookupflags & GPIO_OPEN_SOURCE)
 		set_bit(FLAG_OPEN_SOURCE, &desc->flags);
 
+	/* No particular flag request, return here... */
+	if (flags & GPIOD_FLAGS_BIT_DIR_SET)
+		return desc;
+
+	/* Process flags */
+	if (flags & GPIOD_FLAGS_BIT_DIR_OUT)
+		status = gpiod_direction_output(desc,
+					      flags & GPIOD_FLAGS_BIT_DIR_VAL);
+	else
+		status = gpiod_direction_input(desc);
+
+	if (status < 0) {
+		dev_dbg(dev, "setup of GPIO %s failed\n", con_id);
+		gpiod_put(desc);
+		return ERR_PTR(status);
+	}
+
 	return desc;
 }
-EXPORT_SYMBOL_GPL(gpiod_get_index);
+EXPORT_SYMBOL_GPL(__gpiod_get_index);
 
 /**
  * gpiod_get_index_optional - obtain an optional GPIO from a multi-index GPIO
@@ -1677,18 +1700,20 @@ EXPORT_SYMBOL_GPL(gpiod_get_index);
  * @dev: GPIO consumer, can be NULL for system-global GPIOs
  * @con_id: function within the GPIO consumer
  * @index: index of the GPIO to obtain in the consumer
+ * @flags: optional GPIO initialization flags
  *
  * This is equivalent to gpiod_get_index(), except that when no GPIO with the
  * specified index was assigned to the requested function it will return NULL.
  * This is convenient for drivers that need to handle optional GPIOs.
  */
-struct gpio_desc *__must_check gpiod_get_index_optional(struct device *dev,
+struct gpio_desc *__must_check __gpiod_get_index_optional(struct device *dev,
 							const char *con_id,
-							unsigned int index)
+							unsigned int index,
+							enum gpiod_flags flags)
 {
 	struct gpio_desc *desc;
 
-	desc = gpiod_get_index(dev, con_id, index);
+	desc = gpiod_get_index(dev, con_id, index, flags);
 	if (IS_ERR(desc)) {
 		if (PTR_ERR(desc) == -ENOENT)
 			return NULL;
@@ -1696,7 +1721,7 @@ struct gpio_desc *__must_check gpiod_get_index_optional(struct device *dev,
 
 	return desc;
 }
-EXPORT_SYMBOL_GPL(gpiod_get_index_optional);
+EXPORT_SYMBOL_GPL(__gpiod_get_index_optional);
 
 /**
  * gpiod_put - dispose of a GPIO descriptor
diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h
index 05e53ccb708b..b7ce0c64c6f3 100644
--- a/include/linux/gpio/consumer.h
+++ b/include/linux/gpio/consumer.h
@@ -18,30 +18,79 @@ struct gpio_desc;
 
 #ifdef CONFIG_GPIOLIB
 
+#define GPIOD_FLAGS_BIT_DIR_SET		BIT(0)
+#define GPIOD_FLAGS_BIT_DIR_OUT		BIT(1)
+#define GPIOD_FLAGS_BIT_DIR_VAL		BIT(2)
+
+/**
+ * Optional flags that can be passed to one of gpiod_* to configure direction
+ * and output value. These values cannot be OR'd.
+ */
+enum gpiod_flags {
+	GPIOD_ASIS	= 0,
+	GPIOD_IN	= GPIOD_FLAGS_BIT_DIR_SET,
+	GPIOD_OUT_LOW	= GPIOD_FLAGS_BIT_DIR_SET | GPIOD_FLAGS_BIT_DIR_OUT,
+	GPIOD_OUT_HIGH	= GPIOD_FLAGS_BIT_DIR_SET | GPIOD_FLAGS_BIT_DIR_OUT |
+			  GPIOD_FLAGS_BIT_DIR_VAL,
+};
+
 /* Acquire and dispose GPIOs */
-struct gpio_desc *__must_check gpiod_get(struct device *dev,
-					 const char *con_id);
-struct gpio_desc *__must_check gpiod_get_index(struct device *dev,
+struct gpio_desc *__must_check __gpiod_get(struct device *dev,
+					 const char *con_id,
+					 enum gpiod_flags flags);
+#define __gpiod_get(dev, con_id, flags, ...) __gpiod_get(dev, con_id, flags)
+#define gpiod_get(varargs...) __gpiod_get(varargs, 0)
+struct gpio_desc *__must_check __gpiod_get_index(struct device *dev,
 					       const char *con_id,
-					       unsigned int idx);
-struct gpio_desc *__must_check gpiod_get_optional(struct device *dev,
-						  const char *con_id);
-struct gpio_desc *__must_check gpiod_get_index_optional(struct device *dev,
+					       unsigned int idx,
+					       enum gpiod_flags flags);
+#define __gpiod_get_index(dev, con_id, index, flags, ...)		\
+	__gpiod_get_index(dev, con_id, index, flags)
+#define gpiod_get_index(varargs...) __gpiod_get_index(varargs, 0)
+struct gpio_desc *__must_check __gpiod_get_optional(struct device *dev,
+						  const char *con_id,
+						  enum gpiod_flags flags);
+#define __gpiod_get_optional(dev, con_id, flags, ...)			\
+	__gpiod_get_optional(dev, con_id, flags)
+#define gpiod_get_optional(varargs...) __gpiod_get_optional(varargs, 0)
+struct gpio_desc *__must_check __gpiod_get_index_optional(struct device *dev,
 							const char *con_id,
-							unsigned int index);
+							unsigned int index,
+							enum gpiod_flags flags);
+#define __gpiod_get_index_optional(dev, con_id, index, flags, ...)	\
+	__gpiod_get_index_optional(dev, con_id, index, flags)
+#define gpiod_get_index_optional(varargs...)				\
+	__gpiod_get_index_optional(varargs, 0)
 
 void gpiod_put(struct gpio_desc *desc);
 
-struct gpio_desc *__must_check devm_gpiod_get(struct device *dev,
-					      const char *con_id);
-struct gpio_desc *__must_check devm_gpiod_get_index(struct device *dev,
+struct gpio_desc *__must_check __devm_gpiod_get(struct device *dev,
+					      const char *con_id,
+					      enum gpiod_flags flags);
+#define __devm_gpiod_get(dev, con_id, flags, ...)			\
+	__devm_gpiod_get(dev, con_id, flags)
+#define devm_gpiod_get(varargs...) __devm_gpiod_get(varargs, 0)
+struct gpio_desc *__must_check __devm_gpiod_get_index(struct device *dev,
 						    const char *con_id,
-						    unsigned int idx);
-struct gpio_desc *__must_check devm_gpiod_get_optional(struct device *dev,
-						       const char *con_id);
+						    unsigned int idx,
+						    enum gpiod_flags flags);
+#define __devm_gpiod_get_index(dev, con_id, index, flags, ...)		\
+	__devm_gpiod_get_index(dev, con_id, index, flags)
+#define devm_gpiod_get_index(varargs...) __devm_gpiod_get_index(varargs, 0)
+struct gpio_desc *__must_check __devm_gpiod_get_optional(struct device *dev,
+						       const char *con_id,
+						       enum gpiod_flags flags);
+#define __devm_gpiod_get_optional(dev, con_id, flags, ...)		\
+	__devm_gpiod_get_optional(dev, con_id, flags)
+#define devm_gpiod_get_optional(varargs...)				\
+	__devm_gpiod_get_optional(varargs, 0)
 struct gpio_desc *__must_check
-devm_gpiod_get_index_optional(struct device *dev, const char *con_id,
-			      unsigned int index);
+__devm_gpiod_get_index_optional(struct device *dev, const char *con_id,
+			      unsigned int index, enum gpiod_flags flags);
+#define __devm_gpiod_get_index_optional(dev, con_id, index, flags, ...)	\
+	__devm_gpiod_get_index_optional(dev, con_id, index, flags)
+#define devm_gpiod_get_index_optional(varargs...)			\
+	__devm_gpiod_get_index_optional(varargs, 0)
 
 void devm_gpiod_put(struct device *dev, struct gpio_desc *desc);
 
-- 
cgit v1.2.3


From eb3fe7def66511120766c8fc05ee9631cce7fe6f Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Tue, 8 Jul 2014 13:46:37 +0300
Subject: ARM: edma: Add edma_assign_channel_eventq() to move channel to a give
 queue

In some cases it is desired to move a channel to a specific event queue.
Such a use case is audio, where it is preferred that it is served with
highest priority compared to other DMA clients.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Acked-by: Sekhar Nori <nsekhar@ti.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 arch/arm/common/edma.c             | 28 ++++++++++++++++++++++++++++
 include/linux/platform_data/edma.h |  2 ++
 2 files changed, 30 insertions(+)

(limited to 'include/linux')

diff --git a/arch/arm/common/edma.c b/arch/arm/common/edma.c
index f834aae7720f..88099175fc56 100644
--- a/arch/arm/common/edma.c
+++ b/arch/arm/common/edma.c
@@ -1414,6 +1414,34 @@ void edma_clear_event(unsigned channel)
 }
 EXPORT_SYMBOL(edma_clear_event);
 
+/*
+ * edma_assign_channel_eventq - move given channel to desired eventq
+ * Arguments:
+ *	channel - channel number
+ *	eventq_no - queue to move the channel
+ *
+ * Can be used to move a channel to a selected event queue.
+ */
+void edma_assign_channel_eventq(unsigned channel, enum dma_event_q eventq_no)
+{
+	unsigned ctlr;
+
+	ctlr = EDMA_CTLR(channel);
+	channel = EDMA_CHAN_SLOT(channel);
+
+	if (channel >= edma_cc[ctlr]->num_channels)
+		return;
+
+	/* default to low priority queue */
+	if (eventq_no == EVENTQ_DEFAULT)
+		eventq_no = edma_cc[ctlr]->default_queue;
+	if (eventq_no >= edma_cc[ctlr]->num_tc)
+		return;
+
+	map_dmach_queue(ctlr, channel, eventq_no);
+}
+EXPORT_SYMBOL(edma_assign_channel_eventq);
+
 static int edma_setup_from_hw(struct device *dev, struct edma_soc_info *pdata,
 			      struct edma *edma_cc)
 {
diff --git a/include/linux/platform_data/edma.h b/include/linux/platform_data/edma.h
index eb8d5627d080..bdb2710e2aab 100644
--- a/include/linux/platform_data/edma.h
+++ b/include/linux/platform_data/edma.h
@@ -150,6 +150,8 @@ void edma_clear_event(unsigned channel);
 void edma_pause(unsigned channel);
 void edma_resume(unsigned channel);
 
+void edma_assign_channel_eventq(unsigned channel, enum dma_event_q eventq_no);
+
 struct edma_rsv_info {
 
 	const s16	(*rsv_chans)[2];
-- 
cgit v1.2.3


From 784aa3d7fb6f729c06d5836c9d9569f58e4d05ae Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Mon, 14 Jul 2014 18:27:35 +0200
Subject: KVM: Rename and add argument to check_extension

In preparation to make the check_extension function available to VM scope
we add a struct kvm * argument to the function header and rename the function
accordingly. It will still be called from the /dev/kvm fd, but with a NULL
argument for struct kvm *.

Signed-off-by: Alexander Graf <agraf@suse.de>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/arm/kvm/arm.c         | 2 +-
 arch/ia64/kvm/kvm-ia64.c   | 2 +-
 arch/mips/kvm/mips.c       | 2 +-
 arch/powerpc/kvm/powerpc.c | 2 +-
 arch/s390/kvm/kvm-s390.c   | 2 +-
 arch/x86/kvm/x86.c         | 2 +-
 include/linux/kvm_host.h   | 2 +-
 virt/kvm/kvm_main.c        | 6 +++---
 8 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 3c82b37c0f9e..cb77f999badd 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -184,7 +184,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 	}
 }
 
-int kvm_dev_ioctl_check_extension(long ext)
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 {
 	int r;
 	switch (ext) {
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 6a4309bb821a..0729ba6acddf 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -190,7 +190,7 @@ void kvm_arch_check_processor_compat(void *rtn)
 	*(int *)rtn = 0;
 }
 
-int kvm_dev_ioctl_check_extension(long ext)
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 {
 
 	int r;
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index d687c6e3258d..3ca79aa011df 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -885,7 +885,7 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
 	return VM_FAULT_SIGBUS;
 }
 
-int kvm_dev_ioctl_check_extension(long ext)
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 {
 	int r;
 
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 8e0356835960..d870bacc2f75 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -391,7 +391,7 @@ void kvm_arch_sync_events(struct kvm *kvm)
 {
 }
 
-int kvm_dev_ioctl_check_extension(long ext)
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 {
 	int r;
 	/* FIXME!!
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 2f3e14fe91a4..00268cacdf4c 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -146,7 +146,7 @@ long kvm_arch_dev_ioctl(struct file *filp,
 	return -EINVAL;
 }
 
-int kvm_dev_ioctl_check_extension(long ext)
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 {
 	int r;
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5a8691b0ed76..5a62d91c96e7 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2616,7 +2616,7 @@ out:
 	return r;
 }
 
-int kvm_dev_ioctl_check_extension(long ext)
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 {
 	int r;
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ec4e3bd83d47..5065b953e6e8 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -602,7 +602,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 			 unsigned int ioctl, unsigned long arg);
 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf);
 
-int kvm_dev_ioctl_check_extension(long ext);
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext);
 
 int kvm_get_dirty_log(struct kvm *kvm,
 			struct kvm_dirty_log *log, int *is_dirty);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 4b6c01b477f9..e28f3caa539d 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2571,7 +2571,7 @@ static int kvm_dev_ioctl_create_vm(unsigned long type)
 	return r;
 }
 
-static long kvm_dev_ioctl_check_extension_generic(long arg)
+static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
 {
 	switch (arg) {
 	case KVM_CAP_USER_MEMORY:
@@ -2595,7 +2595,7 @@ static long kvm_dev_ioctl_check_extension_generic(long arg)
 	default:
 		break;
 	}
-	return kvm_dev_ioctl_check_extension(arg);
+	return kvm_vm_ioctl_check_extension(kvm, arg);
 }
 
 static long kvm_dev_ioctl(struct file *filp,
@@ -2614,7 +2614,7 @@ static long kvm_dev_ioctl(struct file *filp,
 		r = kvm_dev_ioctl_create_vm(arg);
 		break;
 	case KVM_CHECK_EXTENSION:
-		r = kvm_dev_ioctl_check_extension_generic(arg);
+		r = kvm_vm_ioctl_check_extension_generic(NULL, arg);
 		break;
 	case KVM_GET_VCPU_MMAP_SIZE:
 		r = -EINVAL;
-- 
cgit v1.2.3


From 36874c7e219fa080141d49fd7bb9bbbdad0507c5 Mon Sep 17 00:00:00 2001
From: Roger Quadros <rogerq@ti.com>
Date: Mon, 28 Jul 2014 10:01:07 -0700
Subject: Input: pixcir_i2c_ts - support up to 5 fingers and hardware tracking
 IDs

Some variants of the Pixcir touch controller support up to 5 simultaneous
fingers and hardware tracking IDs. Prepare the driver for that.

Signed-off-by: Roger Quadros <rogerq@ti.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/touchscreen/pixcir_i2c_ts.c | 74 ++++++++++++++++++++++++-------
 include/linux/input/pixcir_ts.h           | 12 +++++
 2 files changed, 69 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/touchscreen/pixcir_i2c_ts.c b/drivers/input/touchscreen/pixcir_i2c_ts.c
index 0b016814e6b6..eddda520a1a7 100644
--- a/drivers/input/touchscreen/pixcir_i2c_ts.c
+++ b/drivers/input/touchscreen/pixcir_i2c_ts.c
@@ -27,18 +27,20 @@
 #include <linux/input/pixcir_ts.h>
 #include <linux/gpio.h>
 
-#define PIXCIR_MAX_SLOTS       2
+#define PIXCIR_MAX_SLOTS       5 /* Max fingers supported by driver */
 
 struct pixcir_i2c_ts_data {
 	struct i2c_client *client;
 	struct input_dev *input;
-	const struct pixcir_ts_platform_data *chip;
+	const struct pixcir_ts_platform_data *pdata;
 	bool running;
+	int max_fingers;	/* Max fingers supported in this instance */
 };
 
 struct pixcir_touch {
 	int x;
 	int y;
+	int id;
 };
 
 struct pixcir_report_data {
@@ -49,13 +51,21 @@ struct pixcir_report_data {
 static void pixcir_ts_parse(struct pixcir_i2c_ts_data *tsdata,
 			    struct pixcir_report_data *report)
 {
-	u8 rdbuf[10], wrbuf[1] = { 0 };
+	u8 rdbuf[2 + PIXCIR_MAX_SLOTS * 5];
+	u8 wrbuf[1] = { 0 };
 	u8 *bufptr;
 	u8 touch;
 	int ret, i;
+	int readsize;
+	const struct pixcir_i2c_chip_data *chip = &tsdata->pdata->chip;
 
 	memset(report, 0, sizeof(struct pixcir_report_data));
 
+	i = chip->has_hw_ids ? 1 : 0;
+	readsize = 2 + tsdata->max_fingers * (4 + i);
+	if (readsize > sizeof(rdbuf))
+		readsize = sizeof(rdbuf);
+
 	ret = i2c_master_send(tsdata->client, wrbuf, sizeof(wrbuf));
 	if (ret != sizeof(wrbuf)) {
 		dev_err(&tsdata->client->dev,
@@ -64,7 +74,7 @@ static void pixcir_ts_parse(struct pixcir_i2c_ts_data *tsdata,
 		return;
 	}
 
-	ret = i2c_master_recv(tsdata->client, rdbuf, sizeof(rdbuf));
+	ret = i2c_master_recv(tsdata->client, rdbuf, readsize);
 	if (ret != sizeof(rdbuf)) {
 		dev_err(&tsdata->client->dev,
 			"%s: i2c_master_recv failed(), ret=%d\n",
@@ -73,8 +83,8 @@ static void pixcir_ts_parse(struct pixcir_i2c_ts_data *tsdata,
 	}
 
 	touch = rdbuf[0] & 0x7;
-	if (touch > PIXCIR_MAX_SLOTS)
-		touch = PIXCIR_MAX_SLOTS;
+	if (touch > tsdata->max_fingers)
+		touch = tsdata->max_fingers;
 
 	report->num_touches = touch;
 	bufptr = &rdbuf[2];
@@ -83,7 +93,12 @@ static void pixcir_ts_parse(struct pixcir_i2c_ts_data *tsdata,
 		report->touches[i].x = (bufptr[1] << 8) | bufptr[0];
 		report->touches[i].y = (bufptr[3] << 8) | bufptr[2];
 
-		bufptr = bufptr + 4;
+		if (chip->has_hw_ids) {
+			report->touches[i].id = bufptr[4];
+			bufptr = bufptr + 5;
+		} else {
+			bufptr = bufptr + 4;
+		}
 	}
 }
 
@@ -95,22 +110,35 @@ static void pixcir_ts_report(struct pixcir_i2c_ts_data *ts,
 	struct pixcir_touch *touch;
 	int n, i, slot;
 	struct device *dev = &ts->client->dev;
+	const struct pixcir_i2c_chip_data *chip = &ts->pdata->chip;
 
 	n = report->num_touches;
 	if (n > PIXCIR_MAX_SLOTS)
 		n = PIXCIR_MAX_SLOTS;
 
-	for (i = 0; i < n; i++) {
-		touch = &report->touches[i];
-		pos[i].x = touch->x;
-		pos[i].y = touch->y;
-	}
+	if (!chip->has_hw_ids) {
+		for (i = 0; i < n; i++) {
+			touch = &report->touches[i];
+			pos[i].x = touch->x;
+			pos[i].y = touch->y;
+		}
 
-	input_mt_assign_slots(ts->input, slots, pos, n);
+		input_mt_assign_slots(ts->input, slots, pos, n);
+	}
 
 	for (i = 0; i < n; i++) {
 		touch = &report->touches[i];
-		slot = slots[i];
+
+		if (chip->has_hw_ids) {
+			slot = input_mt_get_slot_by_key(ts->input, touch->id);
+			if (slot < 0) {
+				dev_dbg(dev, "no free slot for id 0x%x\n",
+					touch->id);
+				continue;
+			}
+		} else {
+			slot = slots[i];
+		}
 
 		input_mt_slot(ts->input, slot);
 		input_mt_report_slot_state(ts->input,
@@ -130,7 +158,7 @@ static void pixcir_ts_report(struct pixcir_i2c_ts_data *ts,
 static irqreturn_t pixcir_ts_isr(int irq, void *dev_id)
 {
 	struct pixcir_i2c_ts_data *tsdata = dev_id;
-	const struct pixcir_ts_platform_data *pdata = tsdata->chip;
+	const struct pixcir_ts_platform_data *pdata = tsdata->pdata;
 	struct pixcir_report_data report;
 
 	while (tsdata->running) {
@@ -399,6 +427,11 @@ static int pixcir_i2c_ts_probe(struct i2c_client *client,
 		return -EINVAL;
 	}
 
+	if (!pdata->chip.max_fingers) {
+		dev_err(dev, "Invalid max_fingers in pdata\n");
+		return -EINVAL;
+	}
+
 	tsdata = devm_kzalloc(dev, sizeof(*tsdata), GFP_KERNEL);
 	if (!tsdata)
 		return -ENOMEM;
@@ -411,7 +444,7 @@ static int pixcir_i2c_ts_probe(struct i2c_client *client,
 
 	tsdata->client = client;
 	tsdata->input = input;
-	tsdata->chip = pdata;
+	tsdata->pdata = pdata;
 
 	input->name = client->name;
 	input->id.bustype = BUS_I2C;
@@ -427,7 +460,14 @@ static int pixcir_i2c_ts_probe(struct i2c_client *client,
 	input_set_abs_params(input, ABS_MT_POSITION_X, 0, pdata->x_max, 0, 0);
 	input_set_abs_params(input, ABS_MT_POSITION_Y, 0, pdata->y_max, 0, 0);
 
-	error = input_mt_init_slots(input, PIXCIR_MAX_SLOTS,
+	tsdata->max_fingers = tsdata->pdata->chip.max_fingers;
+	if (tsdata->max_fingers > PIXCIR_MAX_SLOTS) {
+		tsdata->max_fingers = PIXCIR_MAX_SLOTS;
+		dev_info(dev, "Limiting maximum fingers to %d\n",
+			 tsdata->max_fingers);
+	}
+
+	error = input_mt_init_slots(input, tsdata->max_fingers,
 				    INPUT_MT_DIRECT | INPUT_MT_DROP_UNUSED);
 	if (error) {
 		dev_err(dev, "Error initializing Multi-Touch slots\n");
diff --git a/include/linux/input/pixcir_ts.h b/include/linux/input/pixcir_ts.h
index 160cf353aa39..7bae83b7c396 100644
--- a/include/linux/input/pixcir_ts.h
+++ b/include/linux/input/pixcir_ts.h
@@ -43,10 +43,22 @@ enum pixcir_int_mode {
 #define PIXCIR_INT_ENABLE	(1UL << 3)
 #define PIXCIR_INT_POL_HIGH	(1UL << 2)
 
+/**
+ * struct pixcir_irc_chip_data - chip related data
+ * @max_fingers:	Max number of fingers reported simultaneously by h/w
+ * @has_hw_ids:		Hardware supports finger tracking IDs
+ *
+ */
+struct pixcir_i2c_chip_data {
+	u8 max_fingers;
+	bool has_hw_ids;
+};
+
 struct pixcir_ts_platform_data {
 	int x_max;
 	int y_max;
 	int gpio_attb;		/* GPIO connected to ATTB line */
+	struct pixcir_i2c_chip_data chip;
 };
 
 #endif
-- 
cgit v1.2.3


From 518776800c094a518ae6d303660b57f1400eb1eb Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Thu, 24 Jul 2014 23:59:33 -0400
Subject: SUNRPC: Allow svc_reserve() to notify TCP socket that space has been
 freed

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_xprt.h | 1 +
 net/sunrpc/svc_xprt.c           | 2 ++
 net/sunrpc/svcsock.c            | 9 +++++++++
 3 files changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 5d9d6f84b382..ce6e4182a5b2 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -25,6 +25,7 @@ struct svc_xprt_ops {
 	void		(*xpo_detach)(struct svc_xprt *);
 	void		(*xpo_free)(struct svc_xprt *);
 	int		(*xpo_secure_port)(struct svc_rqst *);
+	void		(*xpo_adjust_wspace)(struct svc_xprt *);
 };
 
 struct svc_xprt_class {
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 9cfa391e2bd0..6666c6745858 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -448,6 +448,8 @@ void svc_reserve(struct svc_rqst *rqstp, int space)
 		atomic_sub((rqstp->rq_reserved - space), &xprt->xpt_reserved);
 		rqstp->rq_reserved = space;
 
+		if (xprt->xpt_ops->xpo_adjust_wspace)
+			xprt->xpt_ops->xpo_adjust_wspace(xprt);
 		svc_xprt_enqueue(xprt);
 	}
 }
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 88db211d4264..c24a8ff33f8f 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -475,6 +475,14 @@ static void svc_tcp_write_space(struct sock *sk)
 	svc_write_space(sk);
 }
 
+static void svc_tcp_adjust_wspace(struct svc_xprt *xprt)
+{
+	struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
+
+	if (svc_tcp_has_wspace(xprt))
+		clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
+}
+
 /*
  * See net/ipv6/ip_sockglue.c : ip_cmsg_recv_pktinfo
  */
@@ -1289,6 +1297,7 @@ static struct svc_xprt_ops svc_tcp_ops = {
 	.xpo_has_wspace = svc_tcp_has_wspace,
 	.xpo_accept = svc_tcp_accept,
 	.xpo_secure_port = svc_sock_secure_port,
+	.xpo_adjust_wspace = svc_tcp_adjust_wspace,
 };
 
 static struct svc_xprt_class svc_tcp_class = {
-- 
cgit v1.2.3


From 728dba3a39c66b3d8ac889ddbe38b5b1c264aec3 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 3 Feb 2014 19:13:49 -0800
Subject: namespaces: Use task_lock and not rcu to protect nsproxy

The synchronous syncrhonize_rcu in switch_task_namespaces makes setns
a sufficiently expensive system call that people have complained.

Upon inspect nsproxy no longer needs rcu protection for remote reads.
remote reads are rare.  So optimize for same process reads and write
by switching using rask_lock instead.

This yields a simpler to understand lock, and a faster setns system call.

In particular this fixes a performance regression observed
by Rafael David Tinoco <rafael.tinoco@canonical.com>.

This is effectively a revert of Pavel Emelyanov's commit
cf7b708c8d1d7a27736771bcf4c457b332b0f818 Make access to task's nsproxy lighter
from 2007.  The race this originialy fixed no longer exists as
do_notify_parent uses task_active_pid_ns(parent) instead of
parent->nsproxy.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/namespace.c           |  6 +++---
 fs/proc/proc_net.c       |  4 +++-
 fs/proc_namespace.c      |  8 +++-----
 include/linux/nsproxy.h  | 16 ++++++----------
 ipc/namespace.c          |  6 +++---
 kernel/nsproxy.c         | 15 ++++-----------
 kernel/utsname.c         |  6 +++---
 net/core/net_namespace.c | 10 ++++++----
 8 files changed, 31 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namespace.c b/fs/namespace.c
index 182bc41cd887..7187d01329c3 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2972,13 +2972,13 @@ static void *mntns_get(struct task_struct *task)
 	struct mnt_namespace *ns = NULL;
 	struct nsproxy *nsproxy;
 
-	rcu_read_lock();
-	nsproxy = task_nsproxy(task);
+	task_lock(task);
+	nsproxy = task->nsproxy;
 	if (nsproxy) {
 		ns = nsproxy->mnt_ns;
 		get_mnt_ns(ns);
 	}
-	rcu_read_unlock();
+	task_unlock(task);
 
 	return ns;
 }
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index 4677bb7dc7c2..a63af3e0a612 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -113,9 +113,11 @@ static struct net *get_proc_task_net(struct inode *dir)
 	rcu_read_lock();
 	task = pid_task(proc_pid(dir), PIDTYPE_PID);
 	if (task != NULL) {
-		ns = task_nsproxy(task);
+		task_lock(task);
+		ns = task->nsproxy;
 		if (ns != NULL)
 			net = get_net(ns->net_ns);
+		task_unlock(task);
 	}
 	rcu_read_unlock();
 
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
index 1a81373947f3..73ca1740d839 100644
--- a/fs/proc_namespace.c
+++ b/fs/proc_namespace.c
@@ -232,17 +232,15 @@ static int mounts_open_common(struct inode *inode, struct file *file,
 	if (!task)
 		goto err;
 
-	rcu_read_lock();
-	nsp = task_nsproxy(task);
+	task_lock(task);
+	nsp = task->nsproxy;
 	if (!nsp || !nsp->mnt_ns) {
-		rcu_read_unlock();
+		task_unlock(task);
 		put_task_struct(task);
 		goto err;
 	}
 	ns = nsp->mnt_ns;
 	get_mnt_ns(ns);
-	rcu_read_unlock();
-	task_lock(task);
 	if (!task->fs) {
 		task_unlock(task);
 		put_task_struct(task);
diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
index b4ec59d159ac..35fa08fd7739 100644
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -40,32 +40,28 @@ extern struct nsproxy init_nsproxy;
  * the namespaces access rules are:
  *
  *  1. only current task is allowed to change tsk->nsproxy pointer or
- *     any pointer on the nsproxy itself
+ *     any pointer on the nsproxy itself.  Current must hold the task_lock
+ *     when changing tsk->nsproxy.
  *
  *  2. when accessing (i.e. reading) current task's namespaces - no
  *     precautions should be taken - just dereference the pointers
  *
  *  3. the access to other task namespaces is performed like this
- *     rcu_read_lock();
- *     nsproxy = task_nsproxy(tsk);
+ *     task_lock(task);
+ *     nsproxy = task->nsproxy;
  *     if (nsproxy != NULL) {
  *             / *
  *               * work with the namespaces here
  *               * e.g. get the reference on one of them
  *               * /
  *     } / *
- *         * NULL task_nsproxy() means that this task is
+ *         * NULL task->nsproxy means that this task is
  *         * almost dead (zombie)
  *         * /
- *     rcu_read_unlock();
+ *     task_unlock(task);
  *
  */
 
-static inline struct nsproxy *task_nsproxy(struct task_struct *tsk)
-{
-	return rcu_dereference(tsk->nsproxy);
-}
-
 int copy_namespaces(unsigned long flags, struct task_struct *tsk);
 void exit_task_namespaces(struct task_struct *tsk);
 void switch_task_namespaces(struct task_struct *tsk, struct nsproxy *new);
diff --git a/ipc/namespace.c b/ipc/namespace.c
index 59451c1e214d..b54468e48e32 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -154,11 +154,11 @@ static void *ipcns_get(struct task_struct *task)
 	struct ipc_namespace *ns = NULL;
 	struct nsproxy *nsproxy;
 
-	rcu_read_lock();
-	nsproxy = task_nsproxy(task);
+	task_lock(task);
+	nsproxy = task->nsproxy;
 	if (nsproxy)
 		ns = get_ipc_ns(nsproxy->ipc_ns);
-	rcu_read_unlock();
+	task_unlock(task);
 
 	return ns;
 }
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 8e7811086b82..ef42d0ab3115 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -204,20 +204,13 @@ void switch_task_namespaces(struct task_struct *p, struct nsproxy *new)
 
 	might_sleep();
 
+	task_lock(p);
 	ns = p->nsproxy;
+	p->nsproxy = new;
+	task_unlock(p);
 
-	rcu_assign_pointer(p->nsproxy, new);
-
-	if (ns && atomic_dec_and_test(&ns->count)) {
-		/*
-		 * wait for others to get what they want from this nsproxy.
-		 *
-		 * cannot release this nsproxy via the call_rcu() since
-		 * put_mnt_ns() will want to sleep
-		 */
-		synchronize_rcu();
+	if (ns && atomic_dec_and_test(&ns->count))
 		free_nsproxy(ns);
-	}
 }
 
 void exit_task_namespaces(struct task_struct *p)
diff --git a/kernel/utsname.c b/kernel/utsname.c
index fd393124e507..883aaaa7de8a 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -93,13 +93,13 @@ static void *utsns_get(struct task_struct *task)
 	struct uts_namespace *ns = NULL;
 	struct nsproxy *nsproxy;
 
-	rcu_read_lock();
-	nsproxy = task_nsproxy(task);
+	task_lock(task);
+	nsproxy = task->nsproxy;
 	if (nsproxy) {
 		ns = nsproxy->uts_ns;
 		get_uts_ns(ns);
 	}
-	rcu_read_unlock();
+	task_unlock(task);
 
 	return ns;
 }
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 85b62691f4f2..7c6b51a58968 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -373,9 +373,11 @@ struct net *get_net_ns_by_pid(pid_t pid)
 	tsk = find_task_by_vpid(pid);
 	if (tsk) {
 		struct nsproxy *nsproxy;
-		nsproxy = task_nsproxy(tsk);
+		task_lock(tsk);
+		nsproxy = tsk->nsproxy;
 		if (nsproxy)
 			net = get_net(nsproxy->net_ns);
+		task_unlock(tsk);
 	}
 	rcu_read_unlock();
 	return net;
@@ -632,11 +634,11 @@ static void *netns_get(struct task_struct *task)
 	struct net *net = NULL;
 	struct nsproxy *nsproxy;
 
-	rcu_read_lock();
-	nsproxy = task_nsproxy(task);
+	task_lock(task);
+	nsproxy = task->nsproxy;
 	if (nsproxy)
 		net = get_net(nsproxy->net_ns);
-	rcu_read_unlock();
+	task_unlock(task);
 
 	return net;
 }
-- 
cgit v1.2.3


From a779ca5fa766e270b9e11c162d877295e2904f4e Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 29 Jul 2014 17:26:04 -0400
Subject: xprtrdma: Remove RPCRDMA_PERSISTENT_REGISTRATION macro

Clean up.

RPCRDMA_PERSISTENT_REGISTRATION was a compile-time switch between
RPCRDMA_REGISTER mode and RPCRDMA_ALLPHYSICAL mode.  Since
RPCRDMA_REGISTER has been removed, there's no need for the extra
conditional compilation.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Tested-by: Steve Wise <swise@opengridcomputing.com>
Tested-by: Shirley Ma <shirley.ma@oracle.com>
Tested-by: Devesh Sharma <devesh.sharma@emulex.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/sunrpc/xprtrdma.h |  2 --
 net/sunrpc/xprtrdma/verbs.c     | 13 -------------
 2 files changed, 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xprtrdma.h b/include/linux/sunrpc/xprtrdma.h
index c2f04e1ae159..64a0a0a97b23 100644
--- a/include/linux/sunrpc/xprtrdma.h
+++ b/include/linux/sunrpc/xprtrdma.h
@@ -62,8 +62,6 @@
 #define RPCRDMA_INLINE_PAD_THRESH  (512)/* payload threshold to pad (bytes) */
 
 /* memory registration strategies */
-#define RPCRDMA_PERSISTENT_REGISTRATION (1)
-
 enum rpcrdma_memreg {
 	RPCRDMA_BOUNCEBUFFERS = 0,
 	RPCRDMA_REGISTER,
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 1208ab2e655f..c2253d4c64df 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -561,12 +561,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
 		if (!ia->ri_id->device->alloc_fmr) {
 			dprintk("RPC:       %s: MTHCAFMR registration "
 				"not supported by HCA\n", __func__);
-#if RPCRDMA_PERSISTENT_REGISTRATION
 			memreg = RPCRDMA_ALLPHYSICAL;
-#else
-			rc = -ENOMEM;
-			goto out2;
-#endif
 		}
 	}
 
@@ -581,20 +576,16 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
 	switch (memreg) {
 	case RPCRDMA_FRMR:
 		break;
-#if RPCRDMA_PERSISTENT_REGISTRATION
 	case RPCRDMA_ALLPHYSICAL:
 		mem_priv = IB_ACCESS_LOCAL_WRITE |
 				IB_ACCESS_REMOTE_WRITE |
 				IB_ACCESS_REMOTE_READ;
 		goto register_setup;
-#endif
 	case RPCRDMA_MTHCAFMR:
 		if (ia->ri_have_dma_lkey)
 			break;
 		mem_priv = IB_ACCESS_LOCAL_WRITE;
-#if RPCRDMA_PERSISTENT_REGISTRATION
 	register_setup:
-#endif
 		ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv);
 		if (IS_ERR(ia->ri_bind_mem)) {
 			printk(KERN_ALERT "%s: ib_get_dma_mr for "
@@ -1905,7 +1896,6 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
 
 	switch (ia->ri_memreg_strategy) {
 
-#if RPCRDMA_PERSISTENT_REGISTRATION
 	case RPCRDMA_ALLPHYSICAL:
 		rpcrdma_map_one(ia, seg, writing);
 		seg->mr_rkey = ia->ri_bind_mem->rkey;
@@ -1913,7 +1903,6 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
 		seg->mr_nsegs = 1;
 		nsegs = 1;
 		break;
-#endif
 
 	/* Registration using frmr registration */
 	case RPCRDMA_FRMR:
@@ -1943,13 +1932,11 @@ rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
 
 	switch (ia->ri_memreg_strategy) {
 
-#if RPCRDMA_PERSISTENT_REGISTRATION
 	case RPCRDMA_ALLPHYSICAL:
 		read_lock(&ia->ri_qplock);
 		rpcrdma_unmap_one(ia, seg);
 		read_unlock(&ia->ri_qplock);
 		break;
-#endif
 
 	case RPCRDMA_FRMR:
 		rc = rpcrdma_deregister_frmr_external(seg, ia, r_xprt);
-- 
cgit v1.2.3


From a6138db815df5ee542d848318e5dae681590fccd Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 28 Jul 2014 16:26:53 -0700
Subject: mnt: Only change user settable mount flags in remount

Kenton Varda <kenton@sandstorm.io> discovered that by remounting a
read-only bind mount read-only in a user namespace the
MNT_LOCK_READONLY bit would be cleared, allowing an unprivileged user
to the remount a read-only mount read-write.

Correct this by replacing the mask of mount flags to preserve
with a mask of mount flags that may be changed, and preserve
all others.   This ensures that any future bugs with this mask and
remount will fail in an easy to detect way where new mount flags
simply won't change.

Cc: stable@vger.kernel.org
Acked-by: Serge E. Hallyn <serge.hallyn@ubuntu.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/namespace.c        | 2 +-
 include/linux/mount.h | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namespace.c b/fs/namespace.c
index 7187d01329c3..cb40449ea0df 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1937,7 +1937,7 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
 		err = do_remount_sb(sb, flags, data, 0);
 	if (!err) {
 		lock_mount_hash();
-		mnt_flags |= mnt->mnt.mnt_flags & MNT_PROPAGATION_MASK;
+		mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK;
 		mnt->mnt.mnt_flags = mnt_flags;
 		touch_mnt_namespace(mnt->mnt_ns);
 		unlock_mount_hash();
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 839bac270904..b637a89e1fae 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -42,7 +42,9 @@ struct mnt_namespace;
  * flag, consider how it interacts with shared mounts.
  */
 #define MNT_SHARED_MASK	(MNT_UNBINDABLE)
-#define MNT_PROPAGATION_MASK	(MNT_SHARED | MNT_UNBINDABLE)
+#define MNT_USER_SETTABLE_MASK  (MNT_NOSUID | MNT_NODEV | MNT_NOEXEC \
+				 | MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME \
+				 | MNT_READONLY)
 
 #define MNT_INTERNAL_FLAGS (MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | \
 			    MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED)
-- 
cgit v1.2.3


From 9566d6742852c527bf5af38af5cbb878dad75705 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 28 Jul 2014 17:26:07 -0700
Subject: mnt: Correct permission checks in do_remount

While invesgiating the issue where in "mount --bind -oremount,ro ..."
would result in later "mount --bind -oremount,rw" succeeding even if
the mount started off locked I realized that there are several
additional mount flags that should be locked and are not.

In particular MNT_NOSUID, MNT_NODEV, MNT_NOEXEC, and the atime
flags in addition to MNT_READONLY should all be locked.  These
flags are all per superblock, can all be changed with MS_BIND,
and should not be changable if set by a more privileged user.

The following additions to the current logic are added in this patch.
- nosuid may not be clearable by a less privileged user.
- nodev  may not be clearable by a less privielged user.
- noexec may not be clearable by a less privileged user.
- atime flags may not be changeable by a less privileged user.

The logic with atime is that always setting atime on access is a
global policy and backup software and auditing software could break if
atime bits are not updated (when they are configured to be updated),
and serious performance degradation could result (DOS attack) if atime
updates happen when they have been explicitly disabled.  Therefore an
unprivileged user should not be able to mess with the atime bits set
by a more privileged user.

The additional restrictions are implemented with the addition of
MNT_LOCK_NOSUID, MNT_LOCK_NODEV, MNT_LOCK_NOEXEC, and MNT_LOCK_ATIME
mnt flags.

Taken together these changes and the fixes for MNT_LOCK_READONLY
should make it safe for an unprivileged user to create a user
namespace and to call "mount --bind -o remount,... ..." without
the danger of mount flags being changed maliciously.

Cc: stable@vger.kernel.org
Acked-by: Serge E. Hallyn <serge.hallyn@ubuntu.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/namespace.c        | 36 +++++++++++++++++++++++++++++++++---
 include/linux/mount.h |  5 +++++
 2 files changed, 38 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namespace.c b/fs/namespace.c
index 1105a577a14f..dd9c93b5a9d5 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -890,8 +890,21 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
 
 	mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~(MNT_WRITE_HOLD|MNT_MARKED);
 	/* Don't allow unprivileged users to change mount flags */
-	if ((flag & CL_UNPRIVILEGED) && (mnt->mnt.mnt_flags & MNT_READONLY))
-		mnt->mnt.mnt_flags |= MNT_LOCK_READONLY;
+	if (flag & CL_UNPRIVILEGED) {
+		mnt->mnt.mnt_flags |= MNT_LOCK_ATIME;
+
+		if (mnt->mnt.mnt_flags & MNT_READONLY)
+			mnt->mnt.mnt_flags |= MNT_LOCK_READONLY;
+
+		if (mnt->mnt.mnt_flags & MNT_NODEV)
+			mnt->mnt.mnt_flags |= MNT_LOCK_NODEV;
+
+		if (mnt->mnt.mnt_flags & MNT_NOSUID)
+			mnt->mnt.mnt_flags |= MNT_LOCK_NOSUID;
+
+		if (mnt->mnt.mnt_flags & MNT_NOEXEC)
+			mnt->mnt.mnt_flags |= MNT_LOCK_NOEXEC;
+	}
 
 	/* Don't allow unprivileged users to reveal what is under a mount */
 	if ((flag & CL_UNPRIVILEGED) && list_empty(&old->mnt_expire))
@@ -1931,6 +1944,23 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
 	    !(mnt_flags & MNT_READONLY)) {
 		return -EPERM;
 	}
+	if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) &&
+	    !(mnt_flags & MNT_NODEV)) {
+		return -EPERM;
+	}
+	if ((mnt->mnt.mnt_flags & MNT_LOCK_NOSUID) &&
+	    !(mnt_flags & MNT_NOSUID)) {
+		return -EPERM;
+	}
+	if ((mnt->mnt.mnt_flags & MNT_LOCK_NOEXEC) &&
+	    !(mnt_flags & MNT_NOEXEC)) {
+		return -EPERM;
+	}
+	if ((mnt->mnt.mnt_flags & MNT_LOCK_ATIME) &&
+	    ((mnt->mnt.mnt_flags & MNT_ATIME_MASK) != (mnt_flags & MNT_ATIME_MASK))) {
+		return -EPERM;
+	}
+
 	err = security_sb_remount(sb, data);
 	if (err)
 		return err;
@@ -2129,7 +2159,7 @@ static int do_new_mount(struct path *path, const char *fstype, int flags,
 		 */
 		if (!(type->fs_flags & FS_USERNS_DEV_MOUNT)) {
 			flags |= MS_NODEV;
-			mnt_flags |= MNT_NODEV;
+			mnt_flags |= MNT_NODEV | MNT_LOCK_NODEV;
 		}
 	}
 
diff --git a/include/linux/mount.h b/include/linux/mount.h
index b637a89e1fae..b0c1e6574e7f 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -45,12 +45,17 @@ struct mnt_namespace;
 #define MNT_USER_SETTABLE_MASK  (MNT_NOSUID | MNT_NODEV | MNT_NOEXEC \
 				 | MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME \
 				 | MNT_READONLY)
+#define MNT_ATIME_MASK (MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME )
 
 #define MNT_INTERNAL_FLAGS (MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | \
 			    MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED)
 
 #define MNT_INTERNAL	0x4000
 
+#define MNT_LOCK_ATIME		0x040000
+#define MNT_LOCK_NOEXEC		0x080000
+#define MNT_LOCK_NOSUID		0x100000
+#define MNT_LOCK_NODEV		0x200000
 #define MNT_LOCK_READONLY	0x400000
 #define MNT_LOCKED		0x800000
 #define MNT_DOOMED		0x1000000
-- 
cgit v1.2.3


From 9dcfee01930e6cc1e84d28c232664f0c19a1f86c Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Mon, 14 Jul 2014 10:28:04 +0200
Subject: drivers: of: add automated assignment of reserved regions to client
 devices

This patch adds code for automated assignment of reserved memory regions
to struct device. reserved_mem->ops->device_init()/device_cleanup()
callbacks are called to perform reserved memory driver specific
initialization and cleanup

Based on previous code provided by Josh Cartwright <joshc@codeaurora.org>

Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Grant Likely <grant.likely@linaro.org>
---
 drivers/of/of_reserved_mem.c    | 70 +++++++++++++++++++++++++++++++++++++++++
 include/linux/of_reserved_mem.h |  7 +++++
 2 files changed, 77 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c
index 632aae861375..59fb12e84e6b 100644
--- a/drivers/of/of_reserved_mem.c
+++ b/drivers/of/of_reserved_mem.c
@@ -206,8 +206,16 @@ void __init fdt_init_reserved_mem(void)
 	for (i = 0; i < reserved_mem_count; i++) {
 		struct reserved_mem *rmem = &reserved_mem[i];
 		unsigned long node = rmem->fdt_node;
+		int len;
+		const __be32 *prop;
 		int err = 0;
 
+		prop = of_get_flat_dt_prop(node, "phandle", &len);
+		if (!prop)
+			prop = of_get_flat_dt_prop(node, "linux,phandle", &len);
+		if (prop)
+			rmem->phandle = of_read_number(prop, len/4);
+
 		if (rmem->size == 0)
 			err = __reserved_mem_alloc_size(node, rmem->name,
 						 &rmem->base, &rmem->size);
@@ -215,3 +223,65 @@ void __init fdt_init_reserved_mem(void)
 			__reserved_mem_init_node(rmem);
 	}
 }
+
+static inline struct reserved_mem *__find_rmem(struct device_node *node)
+{
+	unsigned int i;
+
+	if (!node->phandle)
+		return NULL;
+
+	for (i = 0; i < reserved_mem_count; i++)
+		if (reserved_mem[i].phandle == node->phandle)
+			return &reserved_mem[i];
+	return NULL;
+}
+
+/**
+ * of_reserved_mem_device_init() - assign reserved memory region to given device
+ *
+ * This function assign memory region pointed by "memory-region" device tree
+ * property to the given device.
+ */
+void of_reserved_mem_device_init(struct device *dev)
+{
+	struct reserved_mem *rmem;
+	struct device_node *np;
+
+	np = of_parse_phandle(dev->of_node, "memory-region", 0);
+	if (!np)
+		return;
+
+	rmem = __find_rmem(np);
+	of_node_put(np);
+
+	if (!rmem || !rmem->ops || !rmem->ops->device_init)
+		return;
+
+	rmem->ops->device_init(rmem, dev);
+	dev_info(dev, "assigned reserved memory node %s\n", rmem->name);
+}
+
+/**
+ * of_reserved_mem_device_release() - release reserved memory device structures
+ *
+ * This function releases structures allocated for memory region handling for
+ * the given device.
+ */
+void of_reserved_mem_device_release(struct device *dev)
+{
+	struct reserved_mem *rmem;
+	struct device_node *np;
+
+	np = of_parse_phandle(dev->of_node, "memory-region", 0);
+	if (!np)
+		return;
+
+	rmem = __find_rmem(np);
+	of_node_put(np);
+
+	if (!rmem || !rmem->ops || !rmem->ops->device_release)
+		return;
+
+	rmem->ops->device_release(rmem, dev);
+}
diff --git a/include/linux/of_reserved_mem.h b/include/linux/of_reserved_mem.h
index 4669ddfdd5af..5b5efae09135 100644
--- a/include/linux/of_reserved_mem.h
+++ b/include/linux/of_reserved_mem.h
@@ -8,6 +8,7 @@ struct reserved_mem_ops;
 struct reserved_mem {
 	const char			*name;
 	unsigned long			fdt_node;
+	unsigned long			phandle;
 	const struct reserved_mem_ops	*ops;
 	phys_addr_t			base;
 	phys_addr_t			size;
@@ -27,10 +28,16 @@ typedef int (*reservedmem_of_init_fn)(struct reserved_mem *rmem);
 	_OF_DECLARE(reservedmem, name, compat, init, reservedmem_of_init_fn)
 
 #ifdef CONFIG_OF_RESERVED_MEM
+void of_reserved_mem_device_init(struct device *dev);
+void of_reserved_mem_device_release(struct device *dev);
+
 void fdt_init_reserved_mem(void);
 void fdt_reserved_mem_save_node(unsigned long node, const char *uname,
 			       phys_addr_t base, phys_addr_t size);
 #else
+static inline void of_reserved_mem_device_init(struct device *dev) { }
+static inline void of_reserved_mem_device_release(struct device *pdev) { }
+
 static inline void fdt_init_reserved_mem(void) { }
 static inline void fdt_reserved_mem_save_node(unsigned long node,
 		const char *uname, phys_addr_t base, phys_addr_t size) { }
-- 
cgit v1.2.3


From e630664c8383f300c4146d7613d61e5a8eb1f8e3 Mon Sep 17 00:00:00 2001
From: Matan Barak <matanb@mellanox.com>
Date: Thu, 31 Jul 2014 11:01:29 +0300
Subject: mlx4_core: Add helper functions to support MR re-registration

Add few helper functions to support a mechanism of getting an MPT,
modifying it and updating the HCA with the modified object.

The code takes 2 paths, one for directly changing the MPT (and
sometimes its related MTTs) and another one which queries the MPT and
updates the HCA via fw command SW2HW_MPT. The first path is used in
native mode; the second path is slower and is used only in SRIOV.

Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/net/ethernet/mellanox/mlx4/mlx4.h          |   2 +
 drivers/net/ethernet/mellanox/mlx4/mr.c            | 160 +++++++++++++++++++++
 .../net/ethernet/mellanox/mlx4/resource_tracker.c  |  26 +++-
 include/linux/mlx4/device.h                        |  16 +++
 4 files changed, 202 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index 1d8af7336807..b40d587974fa 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -279,6 +279,8 @@ struct mlx4_icm_table {
 #define MLX4_MPT_FLAG_PHYSICAL	    (1 <<  9)
 #define MLX4_MPT_FLAG_REGION	    (1 <<  8)
 
+#define MLX4_MPT_PD_MASK	    (0x1FFFFUL)
+#define MLX4_MPT_PD_VF_MASK	    (0xFE0000UL)
 #define MLX4_MPT_PD_FLAG_FAST_REG   (1 << 27)
 #define MLX4_MPT_PD_FLAG_RAE	    (1 << 28)
 #define MLX4_MPT_PD_FLAG_EN_INV	    (3 << 24)
diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c
index 2839abb878a6..7d717eccb7b0 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mr.c
@@ -298,6 +298,131 @@ static int mlx4_HW2SW_MPT(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox
 			    MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
 }
 
+int mlx4_mr_hw_get_mpt(struct mlx4_dev *dev, struct mlx4_mr *mmr,
+		       struct mlx4_mpt_entry ***mpt_entry)
+{
+	int err;
+	int key = key_to_hw_index(mmr->key) & (dev->caps.num_mpts - 1);
+	struct mlx4_cmd_mailbox *mailbox = NULL;
+
+	/* Make sure that at this point we have single-threaded access only */
+
+	if (mmr->enabled != MLX4_MPT_EN_HW)
+		return -EINVAL;
+
+	err = mlx4_HW2SW_MPT(dev, NULL, key);
+
+	if (err) {
+		mlx4_warn(dev, "HW2SW_MPT failed (%d).", err);
+		mlx4_warn(dev, "Most likely the MR has MWs bound to it.\n");
+		return err;
+	}
+
+	mmr->enabled = MLX4_MPT_EN_SW;
+
+	if (!mlx4_is_mfunc(dev)) {
+		**mpt_entry = mlx4_table_find(
+				&mlx4_priv(dev)->mr_table.dmpt_table,
+				key, NULL);
+	} else {
+		mailbox = mlx4_alloc_cmd_mailbox(dev);
+		if (IS_ERR_OR_NULL(mailbox))
+			return PTR_ERR(mailbox);
+
+		err = mlx4_cmd_box(dev, 0, mailbox->dma, key,
+				   0, MLX4_CMD_QUERY_MPT,
+				   MLX4_CMD_TIME_CLASS_B,
+				   MLX4_CMD_WRAPPED);
+
+		if (err)
+			goto free_mailbox;
+
+		*mpt_entry = (struct mlx4_mpt_entry **)&mailbox->buf;
+	}
+
+	if (!(*mpt_entry) || !(**mpt_entry)) {
+		err = -ENOMEM;
+		goto free_mailbox;
+	}
+
+	return 0;
+
+free_mailbox:
+	mlx4_free_cmd_mailbox(dev, mailbox);
+	return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_mr_hw_get_mpt);
+
+int mlx4_mr_hw_write_mpt(struct mlx4_dev *dev, struct mlx4_mr *mmr,
+			 struct mlx4_mpt_entry **mpt_entry)
+{
+	int err;
+
+	if (!mlx4_is_mfunc(dev)) {
+		/* Make sure any changes to this entry are flushed */
+		wmb();
+
+		*(u8 *)(*mpt_entry) = MLX4_MPT_STATUS_HW;
+
+		/* Make sure the new status is written */
+		wmb();
+
+		err = mlx4_SYNC_TPT(dev);
+	} else {
+		int key = key_to_hw_index(mmr->key) & (dev->caps.num_mpts - 1);
+
+		struct mlx4_cmd_mailbox *mailbox =
+			container_of((void *)mpt_entry, struct mlx4_cmd_mailbox,
+				     buf);
+
+		err = mlx4_SW2HW_MPT(dev, mailbox, key);
+	}
+
+	mmr->pd = be32_to_cpu((*mpt_entry)->pd_flags) & MLX4_MPT_PD_MASK;
+	if (!err)
+		mmr->enabled = MLX4_MPT_EN_HW;
+	return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_mr_hw_write_mpt);
+
+void mlx4_mr_hw_put_mpt(struct mlx4_dev *dev,
+			struct mlx4_mpt_entry **mpt_entry)
+{
+	if (mlx4_is_mfunc(dev)) {
+		struct mlx4_cmd_mailbox *mailbox =
+			container_of((void *)mpt_entry, struct mlx4_cmd_mailbox,
+				     buf);
+		mlx4_free_cmd_mailbox(dev, mailbox);
+	}
+}
+EXPORT_SYMBOL_GPL(mlx4_mr_hw_put_mpt);
+
+int mlx4_mr_hw_change_pd(struct mlx4_dev *dev, struct mlx4_mpt_entry *mpt_entry,
+			 u32 pdn)
+{
+	u32 pd_flags = be32_to_cpu(mpt_entry->pd_flags);
+	/* The wrapper function will put the slave's id here */
+	if (mlx4_is_mfunc(dev))
+		pd_flags &= ~MLX4_MPT_PD_VF_MASK;
+	mpt_entry->pd_flags = cpu_to_be32((pd_flags &  ~MLX4_MPT_PD_MASK) |
+					  (pdn & MLX4_MPT_PD_MASK)
+					  | MLX4_MPT_PD_FLAG_EN_INV);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_mr_hw_change_pd);
+
+int mlx4_mr_hw_change_access(struct mlx4_dev *dev,
+			     struct mlx4_mpt_entry *mpt_entry,
+			     u32 access)
+{
+	u32 flags = (be32_to_cpu(mpt_entry->flags) & ~MLX4_PERM_MASK) |
+		    (access & MLX4_PERM_MASK);
+
+	mpt_entry->flags = cpu_to_be32(flags);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_mr_hw_change_access);
+
 static int mlx4_mr_alloc_reserved(struct mlx4_dev *dev, u32 mridx, u32 pd,
 			   u64 iova, u64 size, u32 access, int npages,
 			   int page_shift, struct mlx4_mr *mr)
@@ -463,6 +588,41 @@ int mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr)
 }
 EXPORT_SYMBOL_GPL(mlx4_mr_free);
 
+void mlx4_mr_rereg_mem_cleanup(struct mlx4_dev *dev, struct mlx4_mr *mr)
+{
+	mlx4_mtt_cleanup(dev, &mr->mtt);
+}
+EXPORT_SYMBOL_GPL(mlx4_mr_rereg_mem_cleanup);
+
+int mlx4_mr_rereg_mem_write(struct mlx4_dev *dev, struct mlx4_mr *mr,
+			    u64 iova, u64 size, int npages,
+			    int page_shift, struct mlx4_mpt_entry *mpt_entry)
+{
+	int err;
+
+	mpt_entry->start       = cpu_to_be64(mr->iova);
+	mpt_entry->length      = cpu_to_be64(mr->size);
+	mpt_entry->entity_size = cpu_to_be32(mr->mtt.page_shift);
+
+	err = mlx4_mtt_init(dev, npages, page_shift, &mr->mtt);
+	if (err)
+		return err;
+
+	if (mr->mtt.order < 0) {
+		mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_PHYSICAL);
+		mpt_entry->mtt_addr = 0;
+	} else {
+		mpt_entry->mtt_addr = cpu_to_be64(mlx4_mtt_addr(dev,
+						  &mr->mtt));
+		if (mr->mtt.page_shift == 0)
+			mpt_entry->mtt_sz    = cpu_to_be32(1 << mr->mtt.order);
+	}
+	mr->enabled = MLX4_MPT_EN_SW;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_mr_rereg_mem_write);
+
 int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr)
 {
 	struct mlx4_cmd_mailbox *mailbox;
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index 0efc1368e5a8..1089367fed22 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -2613,12 +2613,34 @@ int mlx4_QUERY_MPT_wrapper(struct mlx4_dev *dev, int slave,
 	if (err)
 		return err;
 
-	if (mpt->com.from_state != RES_MPT_HW) {
+	if (mpt->com.from_state == RES_MPT_MAPPED) {
+		/* In order to allow rereg in SRIOV, we need to alter the MPT entry. To do
+		 * that, the VF must read the MPT. But since the MPT entry memory is not
+		 * in the VF's virtual memory space, it must use QUERY_MPT to obtain the
+		 * entry contents. To guarantee that the MPT cannot be changed, the driver
+		 * must perform HW2SW_MPT before this query and return the MPT entry to HW
+		 * ownership fofollowing the change. The change here allows the VF to
+		 * perform QUERY_MPT also when the entry is in SW ownership.
+		 */
+		struct mlx4_mpt_entry *mpt_entry = mlx4_table_find(
+					&mlx4_priv(dev)->mr_table.dmpt_table,
+					mpt->key, NULL);
+
+		if (NULL == mpt_entry || NULL == outbox->buf) {
+			err = -EINVAL;
+			goto out;
+		}
+
+		memcpy(outbox->buf, mpt_entry, sizeof(*mpt_entry));
+
+		err = 0;
+	} else if (mpt->com.from_state == RES_MPT_HW) {
+		err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+	} else {
 		err = -EBUSY;
 		goto out;
 	}
 
-	err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
 
 out:
 	put_res(dev, slave, id, RES_MPT);
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 35b51e7af886..bac002167ace 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -262,6 +262,7 @@ enum {
 	MLX4_PERM_REMOTE_WRITE	= 1 << 13,
 	MLX4_PERM_ATOMIC	= 1 << 14,
 	MLX4_PERM_BIND_MW	= 1 << 15,
+	MLX4_PERM_MASK		= 0xFC00
 };
 
 enum {
@@ -1243,4 +1244,19 @@ int mlx4_vf_smi_enabled(struct mlx4_dev *dev, int slave, int port);
 int mlx4_vf_get_enable_smi_admin(struct mlx4_dev *dev, int slave, int port);
 int mlx4_vf_set_enable_smi_admin(struct mlx4_dev *dev, int slave, int port,
 				 int enable);
+int mlx4_mr_hw_get_mpt(struct mlx4_dev *dev, struct mlx4_mr *mmr,
+		       struct mlx4_mpt_entry ***mpt_entry);
+int mlx4_mr_hw_write_mpt(struct mlx4_dev *dev, struct mlx4_mr *mmr,
+			 struct mlx4_mpt_entry **mpt_entry);
+int mlx4_mr_hw_change_pd(struct mlx4_dev *dev, struct mlx4_mpt_entry *mpt_entry,
+			 u32 pdn);
+int mlx4_mr_hw_change_access(struct mlx4_dev *dev,
+			     struct mlx4_mpt_entry *mpt_entry,
+			     u32 access);
+void mlx4_mr_hw_put_mpt(struct mlx4_dev *dev,
+			struct mlx4_mpt_entry **mpt_entry);
+void mlx4_mr_rereg_mem_cleanup(struct mlx4_dev *dev, struct mlx4_mr *mr);
+int mlx4_mr_rereg_mem_write(struct mlx4_dev *dev, struct mlx4_mr *mr,
+			    u64 iova, u64 size, int npages,
+			    int page_shift, struct mlx4_mpt_entry *mpt_entry);
 #endif /* MLX4_DEVICE_H */
-- 
cgit v1.2.3


From a3b255717fed1cad0dd4ed5be77114d32ef22a6d Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@primarydata.com>
Date: Wed, 16 Jul 2014 06:52:18 -0400
Subject: sunrpc: remove __rcu annotation from struct gss_cl_ctx->gc_gss_ctx

Commit 5b22216e11f7 (nfs: __rcu annotations) added a __rcu annotation to
the gc_gss_ctx field. I see no rationale for adding that though, as that
field does not seem to be managed via RCU at all.

Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Paul McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Jeff Layton <jlayton@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 include/linux/sunrpc/auth_gss.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/auth_gss.h b/include/linux/sunrpc/auth_gss.h
index cbc6875fb9cf..36eebc451b41 100644
--- a/include/linux/sunrpc/auth_gss.h
+++ b/include/linux/sunrpc/auth_gss.h
@@ -69,7 +69,7 @@ struct gss_cl_ctx {
 	enum rpc_gss_proc	gc_proc;
 	u32			gc_seq;
 	spinlock_t		gc_seq_lock;
-	struct gss_ctx __rcu	*gc_gss_ctx;
+	struct gss_ctx		*gc_gss_ctx;
 	struct xdr_netobj	gc_wire_ctx;
 	struct xdr_netobj	gc_acceptor;
 	u32			gc_win;
-- 
cgit v1.2.3


From ec25422c669d38f4e8a83da7f77950094349de48 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@primarydata.com>
Date: Wed, 16 Jul 2014 06:52:22 -0400
Subject: sunrpc: remove "ec" argument from encrypt_v2 operation

It's always 0.

Signed-off-by: Jeff Layton <jlayton@primarydata.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 include/linux/sunrpc/gss_krb5.h       | 4 ++--
 net/sunrpc/auth_gss/gss_krb5_crypto.c | 9 ++-------
 net/sunrpc/auth_gss/gss_krb5_wrap.c   | 2 +-
 3 files changed, 5 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h
index 5af2931cf58d..df02a4188487 100644
--- a/include/linux/sunrpc/gss_krb5.h
+++ b/include/linux/sunrpc/gss_krb5.h
@@ -81,7 +81,7 @@ struct gss_krb5_enctype {
 		       struct xdr_netobj *in,
 		       struct xdr_netobj *out);	/* complete key generation */
 	u32 (*encrypt_v2) (struct krb5_ctx *kctx, u32 offset,
-			   struct xdr_buf *buf, int ec,
+			   struct xdr_buf *buf,
 			   struct page **pages); /* v2 encryption function */
 	u32 (*decrypt_v2) (struct krb5_ctx *kctx, u32 offset,
 			   struct xdr_buf *buf, u32 *headskip,
@@ -310,7 +310,7 @@ gss_krb5_aes_make_key(const struct gss_krb5_enctype *gk5e,
 
 u32
 gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
-		     struct xdr_buf *buf, int ec,
+		     struct xdr_buf *buf,
 		     struct page **pages);
 
 u32
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index 0f43e894bc0a..f5ed9f6ece06 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -641,7 +641,7 @@ out:
 
 u32
 gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
-		     struct xdr_buf *buf, int ec, struct page **pages)
+		     struct xdr_buf *buf, struct page **pages)
 {
 	u32 err;
 	struct xdr_netobj hmac;
@@ -684,13 +684,8 @@ gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
 		ecptr = buf->tail[0].iov_base;
 	}
 
-	memset(ecptr, 'X', ec);
-	buf->tail[0].iov_len += ec;
-	buf->len += ec;
-
 	/* copy plaintext gss token header after filler (if any) */
-	memcpy(ecptr + ec, buf->head[0].iov_base + offset,
-						GSS_KRB5_TOK_HDR_LEN);
+	memcpy(ecptr, buf->head[0].iov_base + offset, GSS_KRB5_TOK_HDR_LEN);
 	buf->tail[0].iov_len += GSS_KRB5_TOK_HDR_LEN;
 	buf->len += GSS_KRB5_TOK_HDR_LEN;
 
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index 88cd24aacddc..4b614c604fe0 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -483,7 +483,7 @@ gss_wrap_kerberos_v2(struct krb5_ctx *kctx, u32 offset,
 	*be64ptr = cpu_to_be64(kctx->seq_send64++);
 	spin_unlock(&krb5_seq_lock);
 
-	err = (*kctx->gk5e->encrypt_v2)(kctx, offset, buf, 0, pages);
+	err = (*kctx->gk5e->encrypt_v2)(kctx, offset, buf, pages);
 	if (err)
 		return err;
 
-- 
cgit v1.2.3


From e7029206ff43f6cf7d6fcb741adb126f47200516 Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@primarydata.com>
Date: Thu, 17 Jul 2014 20:42:15 -0400
Subject: nfs: check wait_on_bit_lock err in page_group_lock

Return errors from wait_on_bit_lock from nfs_page_group_lock.

Add a bool argument @wait to nfs_page_group_lock. If true, loop over
wait_on_bit_lock until it returns cleanly. If false, return the error
from wait_on_bit_lock.

Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/pagelist.c        | 29 +++++++++++++++++++++++------
 fs/nfs/write.c           |  6 ++++--
 include/linux/nfs_page.h |  2 +-
 3 files changed, 28 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index e76a40e298f2..9425118e91d7 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -147,17 +147,25 @@ static int nfs_wait_bit_uninterruptible(void *word)
  * @req - request in group that is to be locked
  *
  * this lock must be held if modifying the page group list
+ *
+ * returns result from wait_on_bit_lock: 0 on success, < 0 on error
  */
-void
-nfs_page_group_lock(struct nfs_page *req)
+int
+nfs_page_group_lock(struct nfs_page *req, bool wait)
 {
 	struct nfs_page *head = req->wb_head;
+	int ret;
 
 	WARN_ON_ONCE(head != head->wb_head);
 
-	wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK,
+	do {
+		ret = wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK,
 			nfs_wait_bit_uninterruptible,
 			TASK_UNINTERRUPTIBLE);
+	} while (wait && ret != 0);
+
+	WARN_ON_ONCE(ret > 0);
+	return ret;
 }
 
 /*
@@ -218,7 +226,7 @@ bool nfs_page_group_sync_on_bit(struct nfs_page *req, unsigned int bit)
 {
 	bool ret;
 
-	nfs_page_group_lock(req);
+	nfs_page_group_lock(req, true);
 	ret = nfs_page_group_sync_on_bit_locked(req, bit);
 	nfs_page_group_unlock(req);
 
@@ -858,8 +866,13 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
 	struct nfs_page *subreq;
 	unsigned int bytes_left = 0;
 	unsigned int offset, pgbase;
+	int ret;
 
-	nfs_page_group_lock(req);
+	ret = nfs_page_group_lock(req, false);
+	if (ret < 0) {
+		desc->pg_error = ret;
+		return 0;
+	}
 
 	subreq = req;
 	bytes_left = subreq->wb_bytes;
@@ -881,7 +894,11 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
 			if (desc->pg_recoalesce)
 				return 0;
 			/* retry add_request for this subreq */
-			nfs_page_group_lock(req);
+			ret = nfs_page_group_lock(req, false);
+			if (ret < 0) {
+				desc->pg_error = ret;
+				return 0;
+			}
 			continue;
 		}
 
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index d357728ed8ba..8d1ed2b9c16c 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -216,7 +216,7 @@ static bool nfs_page_group_covers_page(struct nfs_page *req)
 	unsigned int pos = 0;
 	unsigned int len = nfs_page_length(req->wb_page);
 
-	nfs_page_group_lock(req);
+	nfs_page_group_lock(req, true);
 
 	do {
 		tmp = nfs_page_group_search_locked(req->wb_head, pos);
@@ -456,7 +456,9 @@ try_again:
 	}
 
 	/* lock each request in the page group */
-	nfs_page_group_lock(head);
+	ret = nfs_page_group_lock(head, false);
+	if (ret < 0)
+		return ERR_PTR(ret);
 	subreq = head;
 	do {
 		/*
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 4b48548e700e..291924ca9517 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -122,7 +122,7 @@ extern size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc,
 extern  int nfs_wait_on_request(struct nfs_page *);
 extern	void nfs_unlock_request(struct nfs_page *req);
 extern	void nfs_unlock_and_release_request(struct nfs_page *);
-extern void nfs_page_group_lock(struct nfs_page *);
+extern int nfs_page_group_lock(struct nfs_page *, bool);
 extern void nfs_page_group_unlock(struct nfs_page *);
 extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int);
 
-- 
cgit v1.2.3


From b412ddf0661e11485876a202c48868143e3a01cf Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@primarydata.com>
Date: Thu, 17 Jul 2014 20:42:16 -0400
Subject: nfs: fix comment and add warn_on for PG_INODE_REF

Fix the comment in nfs_page.h for PG_INODE_REF to reflect that it's no longer
set only on head requests. Also add a WARN_ON_ONCE in nfs_inode_remove_request
as PG_INODE_REF should always be set.

Suggested-by: Peng Tao <tao.peng@primarydata.com>
Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/write.c           | 2 ++
 include/linux/nfs_page.h | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 8d1ed2b9c16c..e6bc5b51f325 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -707,6 +707,8 @@ static void nfs_inode_remove_request(struct nfs_page *req)
 
 	if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags))
 		nfs_release_request(req);
+	else
+		WARN_ON_ONCE(1);
 }
 
 static void
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 291924ca9517..6ad2bbcad405 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -26,7 +26,7 @@ enum {
 	PG_MAPPED,		/* page private set for buffered io */
 	PG_CLEAN,		/* write succeeded */
 	PG_COMMIT_TO_DS,	/* used by pnfs layouts */
-	PG_INODE_REF,		/* extra ref held by inode (head req only) */
+	PG_INODE_REF,		/* extra ref held by inode when in writeback */
 	PG_HEADLOCK,		/* page group lock of wb_head */
 	PG_TEARDOWN,		/* page group sync for destroy */
 	PG_UNLOCKPAGE,		/* page group sync bit in read path */
-- 
cgit v1.2.3


From bd95608053b7f7813351b0defc0e3e7ef8cf2803 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 14 Jul 2014 11:28:20 +1000
Subject: sunrpc/auth: allow lockless (rcu) lookup of credential cache.

The new flag RPCAUTH_LOOKUP_RCU to credential lookup avoids locking,
does not take a reference on the returned credential, and returns
-ECHILD if a simple lookup was not possible.

The returned value can only be used within an rcu_read_lock protected
region.

The main user of this is the new rpc_lookup_cred_nonblock() which
returns a pointer to the current credential which is only rcu-safe (no
ref-count held), and might return -ECHILD if allocation was required.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 include/linux/sunrpc/auth.h |  2 ++
 net/sunrpc/auth.c           | 17 +++++++++++++++--
 net/sunrpc/auth_generic.c   |  6 ++++++
 net/sunrpc/auth_null.c      |  2 ++
 4 files changed, 25 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index c683b9a06913..8e030075fe79 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -103,6 +103,7 @@ struct rpc_auth_create_args {
 
 /* Flags for rpcauth_lookupcred() */
 #define RPCAUTH_LOOKUP_NEW		0x01	/* Accept an uninitialised cred */
+#define RPCAUTH_LOOKUP_RCU		0x02	/* lock-less lookup */
 
 /*
  * Client authentication ops
@@ -154,6 +155,7 @@ void			rpc_destroy_generic_auth(void);
 void 			rpc_destroy_authunix(void);
 
 struct rpc_cred *	rpc_lookup_cred(void);
+struct rpc_cred *	rpc_lookup_cred_nonblock(void);
 struct rpc_cred *	rpc_lookup_machine_cred(const char *service_name);
 int			rpcauth_register(const struct rpc_authops *);
 int			rpcauth_unregister(const struct rpc_authops *);
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 360decdddc78..24fcbd23ae6c 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -557,6 +557,12 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
 	hlist_for_each_entry_rcu(entry, &cache->hashtable[nr], cr_hash) {
 		if (!entry->cr_ops->crmatch(acred, entry, flags))
 			continue;
+		if (flags & RPCAUTH_LOOKUP_RCU) {
+			if (test_bit(RPCAUTH_CRED_HASHED, &entry->cr_flags) &&
+			    !test_bit(RPCAUTH_CRED_NEW, &entry->cr_flags))
+				cred = entry;
+			break;
+		}
 		spin_lock(&cache->lock);
 		if (test_bit(RPCAUTH_CRED_HASHED, &entry->cr_flags) == 0) {
 			spin_unlock(&cache->lock);
@@ -571,6 +577,9 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
 	if (cred != NULL)
 		goto found;
 
+	if (flags & RPCAUTH_LOOKUP_RCU)
+		return ERR_PTR(-ECHILD);
+
 	new = auth->au_ops->crcreate(auth, acred, flags);
 	if (IS_ERR(new)) {
 		cred = new;
@@ -621,10 +630,14 @@ rpcauth_lookupcred(struct rpc_auth *auth, int flags)
 	memset(&acred, 0, sizeof(acred));
 	acred.uid = cred->fsuid;
 	acred.gid = cred->fsgid;
-	acred.group_info = get_group_info(((struct cred *)cred)->group_info);
+	if (flags & RPCAUTH_LOOKUP_RCU)
+		acred.group_info = rcu_dereference(cred->group_info);
+	else
+		acred.group_info = get_group_info(((struct cred *)cred)->group_info);
 
 	ret = auth->au_ops->lookup_cred(auth, &acred, flags);
-	put_group_info(acred.group_info);
+	if (!(flags & RPCAUTH_LOOKUP_RCU))
+		put_group_info(acred.group_info);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(rpcauth_lookupcred);
diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c
index ed04869b2d4f..6f6b829c9e8e 100644
--- a/net/sunrpc/auth_generic.c
+++ b/net/sunrpc/auth_generic.c
@@ -38,6 +38,12 @@ struct rpc_cred *rpc_lookup_cred(void)
 }
 EXPORT_SYMBOL_GPL(rpc_lookup_cred);
 
+struct rpc_cred *rpc_lookup_cred_nonblock(void)
+{
+	return rpcauth_lookupcred(&generic_auth, RPCAUTH_LOOKUP_RCU);
+}
+EXPORT_SYMBOL_GPL(rpc_lookup_cred_nonblock);
+
 /*
  * Public call interface for looking up machine creds.
  */
diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c
index f0ebe07978a2..712c123e04e9 100644
--- a/net/sunrpc/auth_null.c
+++ b/net/sunrpc/auth_null.c
@@ -35,6 +35,8 @@ nul_destroy(struct rpc_auth *auth)
 static struct rpc_cred *
 nul_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
 {
+	if (flags & RPCAUTH_LOOKUP_RCU)
+		return &null_cred;
 	return get_rpccred(&null_cred);
 }
 
-- 
cgit v1.2.3


From 912a108da767ae75cc929d2854e698aff527ec5d Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 14 Jul 2014 11:28:20 +1000
Subject: NFS: teach nfs_neg_need_reval to understand LOOKUP_RCU

This requires nfs_check_verifier to take an rcu_walk flag, and requires
an rcu version of nfs_revalidate_inode which returns -ECHILD rather
than making an RPC call.

With this, nfs_lookup_revalidate can call nfs_neg_need_reval in
RCU-walk mode.

We can also move the LOOKUP_RCU check past the nfs_check_verifier()
call in nfs_lookup_revalidate.

If RCU_WALK prevents nfs_check_verifier or nfs_neg_need_reval from
doing a full check, they return a status indicating that a revalidation
is required.  As this revalidation will not be possible in RCU_WALK
mode, -ECHILD will ultimately be returned, which is the desired result.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/dir.c           | 59 +++++++++++++++++++++++++++++++++++---------------
 fs/nfs/inode.c         |  9 ++++++++
 include/linux/nfs_fs.h |  1 +
 3 files changed, 52 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 8a3c36984fc4..dcd4fe5831d6 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -988,9 +988,13 @@ EXPORT_SYMBOL_GPL(nfs_force_lookup_revalidate);
  * A check for whether or not the parent directory has changed.
  * In the case it has, we assume that the dentries are untrustworthy
  * and may need to be looked up again.
+ * If rcu_walk prevents us from performing a full check, return 0.
  */
-static int nfs_check_verifier(struct inode *dir, struct dentry *dentry)
+static int nfs_check_verifier(struct inode *dir, struct dentry *dentry,
+			      int rcu_walk)
 {
+	int ret;
+
 	if (IS_ROOT(dentry))
 		return 1;
 	if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONE)
@@ -998,7 +1002,11 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry)
 	if (!nfs_verify_change_attribute(dir, dentry->d_time))
 		return 0;
 	/* Revalidate nfsi->cache_change_attribute before we declare a match */
-	if (nfs_revalidate_inode(NFS_SERVER(dir), dir) < 0)
+	if (rcu_walk)
+		ret = nfs_revalidate_inode_rcu(NFS_SERVER(dir), dir);
+	else
+		ret = nfs_revalidate_inode(NFS_SERVER(dir), dir);
+	if (ret < 0)
 		return 0;
 	if (!nfs_verify_change_attribute(dir, dentry->d_time))
 		return 0;
@@ -1054,6 +1062,9 @@ out_force:
  *
  * If parent mtime has changed, we revalidate, else we wait for a
  * period corresponding to the parent's attribute cache timeout value.
+ *
+ * If LOOKUP_RCU prevents us from performing a full check, return 1
+ * suggesting a reval is needed.
  */
 static inline
 int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
@@ -1064,7 +1075,7 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
 		return 0;
 	if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG)
 		return 1;
-	return !nfs_check_verifier(dir, dentry);
+	return !nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU);
 }
 
 /*
@@ -1101,11 +1112,11 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
 	inode = dentry->d_inode;
 
 	if (!inode) {
-		if (flags & LOOKUP_RCU)
-			return -ECHILD;
-
-		if (nfs_neg_need_reval(dir, dentry, flags))
+		if (nfs_neg_need_reval(dir, dentry, flags)) {
+			if (flags & LOOKUP_RCU)
+				return -ECHILD;
 			goto out_bad;
+		}
 		goto out_valid_noent;
 	}
 
@@ -1120,16 +1131,21 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
 	if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ))
 		goto out_set_verifier;
 
-	if (flags & LOOKUP_RCU)
-		return -ECHILD;
-
 	/* Force a full look up iff the parent directory has changed */
-	if (!nfs_is_exclusive_create(dir, flags) && nfs_check_verifier(dir, dentry)) {
+	if (!nfs_is_exclusive_create(dir, flags) &&
+	    nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU)) {
+
+		if (flags & LOOKUP_RCU)
+			return -ECHILD;
+
 		if (nfs_lookup_verify_inode(inode, flags))
 			goto out_zap_parent;
 		goto out_valid;
 	}
 
+	if (flags & LOOKUP_RCU)
+		return -ECHILD;
+
 	if (NFS_STALE(inode))
 		goto out_bad;
 
@@ -1566,14 +1582,23 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags)
 		struct dentry *parent;
 		struct inode *dir;
 
-		if (flags & LOOKUP_RCU)
-			return -ECHILD;
-
-		parent = dget_parent(dentry);
-		dir = parent->d_inode;
+		if (flags & LOOKUP_RCU) {
+			parent = rcu_dereference(dentry);
+			dir = ACCESS_ONCE(parent->d_inode);
+			if (!dir)
+				return -ECHILD;
+		} else {
+			parent = dget_parent(dentry);
+			dir = parent->d_inode;
+		}
 		if (!nfs_neg_need_reval(dir, dentry, flags))
 			ret = 1;
-		dput(parent);
+		else if (flags & LOOKUP_RCU)
+			ret = -ECHILD;
+		if (!(flags & LOOKUP_RCU))
+			dput(parent);
+		else if (parent != rcu_dereference(dentry))
+			return -ECHILD;
 		goto out;
 	}
 
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 9927913c97c2..147fd17e7920 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1002,6 +1002,15 @@ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
 }
 EXPORT_SYMBOL_GPL(nfs_revalidate_inode);
 
+int nfs_revalidate_inode_rcu(struct nfs_server *server, struct inode *inode)
+{
+	if (!(NFS_I(inode)->cache_validity &
+			(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_LABEL))
+			&& !nfs_attribute_cache_expired(inode))
+		return NFS_STALE(inode) ? -ESTALE : 0;
+	return -ECHILD;
+}
+
 static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index e30f6059ecd6..60cd9e377926 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -352,6 +352,7 @@ extern int nfs_release(struct inode *, struct file *);
 extern int nfs_attribute_timeout(struct inode *inode);
 extern int nfs_attribute_cache_expired(struct inode *inode);
 extern int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode);
+extern int nfs_revalidate_inode_rcu(struct nfs_server *server, struct inode *inode);
 extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *);
 extern int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping);
 extern int nfs_setattr(struct dentry *, struct iattr *);
-- 
cgit v1.2.3


From f682a398b2e24ae0a775ddf37cced83b897198ee Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 14 Jul 2014 11:28:20 +1000
Subject: NFS: allow lockless access to access_cache

The access cache is used during RCU-walk path lookups, so it is best
to avoid locking if possible as taking a lock kills concurrency.

The rbtree is not rcu-safe and cannot easily be made so.
Instead we simply check the last (i.e. most recent) entry on the LRU
list.  If this doesn't match, then we return -ECHILD and retry in
lock/refcount mode.

This requires freeing the nfs_access_entry struct with rcu, and
requires using rcu access primatives when adding entries to the lru, and
when examining the last entry.

Calling put_rpccred before kfree_rcu looks a bit odd, but as
put_rpccred already provides rcu protection, we know that the cred will
not actually be freed until the next grace period, so any concurrent
access will be safe.

This patch provides about 5% performance improvement on a stat-heavy
synthetic work load with 4 threads on a 2-core CPU.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/dir.c           | 43 +++++++++++++++++++++++++++++++++++++++++--
 include/linux/nfs_fs.h |  1 +
 2 files changed, 42 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 2bfbde0f7176..1b5f38f48dab 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -2079,7 +2079,7 @@ MODULE_PARM_DESC(nfs_access_max_cachesize, "NFS access maximum total cache lengt
 static void nfs_access_free_entry(struct nfs_access_entry *entry)
 {
 	put_rpccred(entry->cred);
-	kfree(entry);
+	kfree_rcu(entry, rcu_head);
 	smp_mb__before_atomic();
 	atomic_long_dec(&nfs_access_nr_entries);
 	smp_mb__after_atomic();
@@ -2257,6 +2257,38 @@ out_zap:
 	return -ENOENT;
 }
 
+static int nfs_access_get_cached_rcu(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res)
+{
+	/* Only check the most recently returned cache entry,
+	 * but do it without locking.
+	 */
+	struct nfs_inode *nfsi = NFS_I(inode);
+	struct nfs_access_entry *cache;
+	int err = -ECHILD;
+	struct list_head *lh;
+
+	rcu_read_lock();
+	if (nfsi->cache_validity & NFS_INO_INVALID_ACCESS)
+		goto out;
+	lh = rcu_dereference(nfsi->access_cache_entry_lru.prev);
+	cache = list_entry(lh, struct nfs_access_entry, lru);
+	if (lh == &nfsi->access_cache_entry_lru ||
+	    cred != cache->cred)
+		cache = NULL;
+	if (cache == NULL)
+		goto out;
+	if (!nfs_have_delegated_attributes(inode) &&
+	    !time_in_range_open(jiffies, cache->jiffies, cache->jiffies + nfsi->attrtimeo))
+		goto out;
+	res->jiffies = cache->jiffies;
+	res->cred = cache->cred;
+	res->mask = cache->mask;
+	err = 0;
+out:
+	rcu_read_unlock();
+	return err;
+}
+
 static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry *set)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
@@ -2300,6 +2332,11 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
 	cache->cred = get_rpccred(set->cred);
 	cache->mask = set->mask;
 
+	/* The above field assignments must be visible
+	 * before this item appears on the lru.  We cannot easily
+	 * use rcu_assign_pointer, so just force the memory barrier.
+	 */
+	smp_wmb();
 	nfs_access_add_rbtree(inode, cache);
 
 	/* Update accounting */
@@ -2339,7 +2376,9 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask)
 
 	trace_nfs_access_enter(inode);
 
-	status = nfs_access_get_cached(inode, cred, &cache);
+	status = nfs_access_get_cached_rcu(inode, cred, &cache);
+	if (status != 0)
+		status = nfs_access_get_cached(inode, cred, &cache);
 	if (status == 0)
 		goto out_cached;
 
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 60cd9e377926..5180a7ededec 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -52,6 +52,7 @@ struct nfs_access_entry {
 	unsigned long		jiffies;
 	struct rpc_cred *	cred;
 	int			mask;
+	struct rcu_head		rcu_head;
 };
 
 struct nfs_lockowner {
-- 
cgit v1.2.3


From 961e3beae3b29ae9463631415342244cdaf1cd47 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Tue, 10 Jun 2014 10:25:00 +0200
Subject: drm/tegra: Make job submission 64-bit safe

Job submission currently relies on the fact that struct drm_tegra_reloc
and struct host1x_reloc are the same size and uses a simple call to the
copy_from_user() function to copy them to kernel space. This causes the
handle to be stored in the buffer object field, which then needs a cast
to a 32 bit integer to resolve it to a proper buffer object pointer and
store it back in the buffer object field.

On 64-bit architectures that will no longer work, since pointers are 64
bits wide whereas handles will remain 32 bits. This causes the sizes of
both structures to because different and copying will no longer work.

Fix this by adding a new function, host1x_reloc_get_user(), that copies
the structures field by field.

While at it, use substructures for the command and target buffers in
struct host1x_reloc for better readability. Also use unsized types to
make it more obvious that this isn't part of userspace ABI.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/drm.c | 62 ++++++++++++++++++++++++++++++++-------------
 drivers/gpu/host1x/job.c    | 22 ++++++++--------
 include/linux/host1x.h      | 15 ++++++-----
 3 files changed, 64 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index 5cba5e736130..59736bb810cd 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -132,6 +132,45 @@ host1x_bo_lookup(struct drm_device *drm, struct drm_file *file, u32 handle)
 	return &bo->base;
 }
 
+static int host1x_reloc_copy_from_user(struct host1x_reloc *dest,
+				       struct drm_tegra_reloc __user *src,
+				       struct drm_device *drm,
+				       struct drm_file *file)
+{
+	u32 cmdbuf, target;
+	int err;
+
+	err = get_user(cmdbuf, &src->cmdbuf.handle);
+	if (err < 0)
+		return err;
+
+	err = get_user(dest->cmdbuf.offset, &src->cmdbuf.offset);
+	if (err < 0)
+		return err;
+
+	err = get_user(target, &src->target.handle);
+	if (err < 0)
+		return err;
+
+	err = get_user(dest->target.offset, &src->cmdbuf.offset);
+	if (err < 0)
+		return err;
+
+	err = get_user(dest->shift, &src->shift);
+	if (err < 0)
+		return err;
+
+	dest->cmdbuf.bo = host1x_bo_lookup(drm, file, cmdbuf);
+	if (!dest->cmdbuf.bo)
+		return -ENOENT;
+
+	dest->target.bo = host1x_bo_lookup(drm, file, target);
+	if (!dest->target.bo)
+		return -ENOENT;
+
+	return 0;
+}
+
 int tegra_drm_submit(struct tegra_drm_context *context,
 		     struct drm_tegra_submit *args, struct drm_device *drm,
 		     struct drm_file *file)
@@ -184,26 +223,13 @@ int tegra_drm_submit(struct tegra_drm_context *context,
 		cmdbufs++;
 	}
 
-	if (copy_from_user(job->relocarray, relocs,
-			   sizeof(*relocs) * num_relocs)) {
-		err = -EFAULT;
-		goto fail;
-	}
-
+	/* copy and resolve relocations from submit */
 	while (num_relocs--) {
-		struct host1x_reloc *reloc = &job->relocarray[num_relocs];
-		struct host1x_bo *cmdbuf, *target;
-
-		cmdbuf = host1x_bo_lookup(drm, file, (u32)reloc->cmdbuf);
-		target = host1x_bo_lookup(drm, file, (u32)reloc->target);
-
-		reloc->cmdbuf = cmdbuf;
-		reloc->target = target;
-
-		if (!reloc->target || !reloc->cmdbuf) {
-			err = -ENOENT;
+		err = host1x_reloc_copy_from_user(&job->relocarray[num_relocs],
+						  &relocs[num_relocs], drm,
+						  file);
+		if (err < 0)
 			goto fail;
-		}
 	}
 
 	if (copy_from_user(job->waitchk, waitchks,
diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c
index 112f27e51bc7..63bd63f3c7df 100644
--- a/drivers/gpu/host1x/job.c
+++ b/drivers/gpu/host1x/job.c
@@ -185,16 +185,16 @@ static unsigned int pin_job(struct host1x_job *job)
 		struct sg_table *sgt;
 		dma_addr_t phys_addr;
 
-		reloc->target = host1x_bo_get(reloc->target);
-		if (!reloc->target)
+		reloc->target.bo = host1x_bo_get(reloc->target.bo);
+		if (!reloc->target.bo)
 			goto unpin;
 
-		phys_addr = host1x_bo_pin(reloc->target, &sgt);
+		phys_addr = host1x_bo_pin(reloc->target.bo, &sgt);
 		if (!phys_addr)
 			goto unpin;
 
 		job->addr_phys[job->num_unpins] = phys_addr;
-		job->unpins[job->num_unpins].bo = reloc->target;
+		job->unpins[job->num_unpins].bo = reloc->target.bo;
 		job->unpins[job->num_unpins].sgt = sgt;
 		job->num_unpins++;
 	}
@@ -235,21 +235,21 @@ static unsigned int do_relocs(struct host1x_job *job, struct host1x_bo *cmdbuf)
 	for (i = 0; i < job->num_relocs; i++) {
 		struct host1x_reloc *reloc = &job->relocarray[i];
 		u32 reloc_addr = (job->reloc_addr_phys[i] +
-			reloc->target_offset) >> reloc->shift;
+				  reloc->target.offset) >> reloc->shift;
 		u32 *target;
 
 		/* skip all other gathers */
-		if (cmdbuf != reloc->cmdbuf)
+		if (cmdbuf != reloc->cmdbuf.bo)
 			continue;
 
-		if (last_page != reloc->cmdbuf_offset >> PAGE_SHIFT) {
+		if (last_page != reloc->cmdbuf.offset >> PAGE_SHIFT) {
 			if (cmdbuf_page_addr)
 				host1x_bo_kunmap(cmdbuf, last_page,
 						 cmdbuf_page_addr);
 
 			cmdbuf_page_addr = host1x_bo_kmap(cmdbuf,
-					reloc->cmdbuf_offset >> PAGE_SHIFT);
-			last_page = reloc->cmdbuf_offset >> PAGE_SHIFT;
+					reloc->cmdbuf.offset >> PAGE_SHIFT);
+			last_page = reloc->cmdbuf.offset >> PAGE_SHIFT;
 
 			if (unlikely(!cmdbuf_page_addr)) {
 				pr_err("Could not map cmdbuf for relocation\n");
@@ -257,7 +257,7 @@ static unsigned int do_relocs(struct host1x_job *job, struct host1x_bo *cmdbuf)
 			}
 		}
 
-		target = cmdbuf_page_addr + (reloc->cmdbuf_offset & ~PAGE_MASK);
+		target = cmdbuf_page_addr + (reloc->cmdbuf.offset & ~PAGE_MASK);
 		*target = reloc_addr;
 	}
 
@@ -272,7 +272,7 @@ static bool check_reloc(struct host1x_reloc *reloc, struct host1x_bo *cmdbuf,
 {
 	offset *= sizeof(u32);
 
-	if (reloc->cmdbuf != cmdbuf || reloc->cmdbuf_offset != offset)
+	if (reloc->cmdbuf.bo != cmdbuf || reloc->cmdbuf.offset != offset)
 		return false;
 
 	return true;
diff --git a/include/linux/host1x.h b/include/linux/host1x.h
index d2b52999e771..bb9840fd1e18 100644
--- a/include/linux/host1x.h
+++ b/include/linux/host1x.h
@@ -164,12 +164,15 @@ int host1x_job_submit(struct host1x_job *job);
  */
 
 struct host1x_reloc {
-	struct host1x_bo *cmdbuf;
-	u32 cmdbuf_offset;
-	struct host1x_bo *target;
-	u32 target_offset;
-	u32 shift;
-	u32 pad;
+	struct {
+		struct host1x_bo *bo;
+		unsigned long offset;
+	} cmdbuf;
+	struct {
+		struct host1x_bo *bo;
+		unsigned long offset;
+	} target;
+	unsigned long shift;
 };
 
 struct host1x_job {
-- 
cgit v1.2.3


From 31c1e5a1350ae8d1bc2018f5de8264266d9773e1 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Date: Fri, 1 Aug 2014 12:20:10 +0200
Subject: dmaengine: Remove the context argument to the prep_dma_cyclic
 operation

The argument is always set to NULL and never used. Remove it.

Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/amba-pl08x.c      | 2 +-
 drivers/dma/at_hdmac.c        | 3 +--
 drivers/dma/bcm2835-dma.c     | 2 +-
 drivers/dma/dma-jz4740.c      | 2 +-
 drivers/dma/edma.c            | 2 +-
 drivers/dma/ep93xx_dma.c      | 4 +---
 drivers/dma/fsl-edma.c        | 2 +-
 drivers/dma/imx-dma.c         | 2 +-
 drivers/dma/imx-sdma.c        | 2 +-
 drivers/dma/mmp_pdma.c        | 2 +-
 drivers/dma/mmp_tdma.c        | 2 +-
 drivers/dma/mxs-dma.c         | 2 +-
 drivers/dma/omap-dma.c        | 3 +--
 drivers/dma/pl330.c           | 2 +-
 drivers/dma/s3c24xx-dma.c     | 3 +--
 drivers/dma/sa11x0-dma.c      | 2 +-
 drivers/dma/sh/shdma-base.c   | 2 +-
 drivers/dma/sirf-dma.c        | 2 +-
 drivers/dma/ste_dma40.c       | 3 +--
 drivers/dma/tegra20-apb-dma.c | 2 +-
 include/linux/dmaengine.h     | 4 ++--
 21 files changed, 22 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c
index 8114731a1c62..48ec81fe1eac 100644
--- a/drivers/dma/amba-pl08x.c
+++ b/drivers/dma/amba-pl08x.c
@@ -1653,7 +1653,7 @@ static struct dma_async_tx_descriptor *pl08x_prep_slave_sg(
 static struct dma_async_tx_descriptor *pl08x_prep_dma_cyclic(
 		struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
 		size_t period_len, enum dma_transfer_direction direction,
-		unsigned long flags, void *context)
+		unsigned long flags)
 {
 	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
 	struct pl08x_driver_data *pl08x = plchan->host;
diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
index c13a3bb0f594..d20ab1b73a3a 100644
--- a/drivers/dma/at_hdmac.c
+++ b/drivers/dma/at_hdmac.c
@@ -893,12 +893,11 @@ atc_dma_cyclic_fill_desc(struct dma_chan *chan, struct at_desc *desc,
  * @period_len: number of bytes for each period
  * @direction: transfer direction, to or from device
  * @flags: tx descriptor status flags
- * @context: transfer context (ignored)
  */
 static struct dma_async_tx_descriptor *
 atc_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
 		size_t period_len, enum dma_transfer_direction direction,
-		unsigned long flags, void *context)
+		unsigned long flags)
 {
 	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
 	struct at_dma_slave	*atslave = chan->private;
diff --git a/drivers/dma/bcm2835-dma.c b/drivers/dma/bcm2835-dma.c
index a03602164e3e..68007974961a 100644
--- a/drivers/dma/bcm2835-dma.c
+++ b/drivers/dma/bcm2835-dma.c
@@ -335,7 +335,7 @@ static void bcm2835_dma_issue_pending(struct dma_chan *chan)
 static struct dma_async_tx_descriptor *bcm2835_dma_prep_dma_cyclic(
 	struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
 	size_t period_len, enum dma_transfer_direction direction,
-	unsigned long flags, void *context)
+	unsigned long flags)
 {
 	struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
 	enum dma_slave_buswidth dev_width;
diff --git a/drivers/dma/dma-jz4740.c b/drivers/dma/dma-jz4740.c
index bfbce6b07902..6a9d89c93b1f 100644
--- a/drivers/dma/dma-jz4740.c
+++ b/drivers/dma/dma-jz4740.c
@@ -433,7 +433,7 @@ static struct dma_async_tx_descriptor *jz4740_dma_prep_slave_sg(
 static struct dma_async_tx_descriptor *jz4740_dma_prep_dma_cyclic(
 	struct dma_chan *c, dma_addr_t buf_addr, size_t buf_len,
 	size_t period_len, enum dma_transfer_direction direction,
-	unsigned long flags, void *context)
+	unsigned long flags)
 {
 	struct jz4740_dmaengine_chan *chan = to_jz4740_dma_chan(c);
 	struct jz4740_dma_desc *desc;
diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
index a13f37f719ed..d566650abf62 100644
--- a/drivers/dma/edma.c
+++ b/drivers/dma/edma.c
@@ -598,7 +598,7 @@ struct dma_async_tx_descriptor *edma_prep_dma_memcpy(
 static struct dma_async_tx_descriptor *edma_prep_dma_cyclic(
 	struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
 	size_t period_len, enum dma_transfer_direction direction,
-	unsigned long tx_flags, void *context)
+	unsigned long tx_flags)
 {
 	struct edma_chan *echan = to_edma_chan(chan);
 	struct device *dev = chan->device->dev;
diff --git a/drivers/dma/ep93xx_dma.c b/drivers/dma/ep93xx_dma.c
index cb4bf682a708..7650470196c4 100644
--- a/drivers/dma/ep93xx_dma.c
+++ b/drivers/dma/ep93xx_dma.c
@@ -1092,7 +1092,6 @@ fail:
  * @period_len: length of a single period
  * @dir: direction of the operation
  * @flags: tx descriptor status flags
- * @context: operation context (ignored)
  *
  * Prepares a descriptor for cyclic DMA operation. This means that once the
  * descriptor is submitted, we will be submitting in a @period_len sized
@@ -1105,8 +1104,7 @@ fail:
 static struct dma_async_tx_descriptor *
 ep93xx_dma_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t dma_addr,
 			   size_t buf_len, size_t period_len,
-			   enum dma_transfer_direction dir, unsigned long flags,
-			   void *context)
+			   enum dma_transfer_direction dir, unsigned long flags)
 {
 	struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan);
 	struct ep93xx_dma_desc *desc, *first;
diff --git a/drivers/dma/fsl-edma.c b/drivers/dma/fsl-edma.c
index 24ab3d371954..3c5711d5fe97 100644
--- a/drivers/dma/fsl-edma.c
+++ b/drivers/dma/fsl-edma.c
@@ -517,7 +517,7 @@ err:
 static struct dma_async_tx_descriptor *fsl_edma_prep_dma_cyclic(
 		struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len,
 		size_t period_len, enum dma_transfer_direction direction,
-		unsigned long flags, void *context)
+		unsigned long flags)
 {
 	struct fsl_edma_chan *fsl_chan = to_fsl_edma_chan(chan);
 	struct fsl_edma_desc *fsl_desc;
diff --git a/drivers/dma/imx-dma.c b/drivers/dma/imx-dma.c
index 286660a12cc6..9d2c9e7374dc 100644
--- a/drivers/dma/imx-dma.c
+++ b/drivers/dma/imx-dma.c
@@ -866,7 +866,7 @@ static struct dma_async_tx_descriptor *imxdma_prep_slave_sg(
 static struct dma_async_tx_descriptor *imxdma_prep_dma_cyclic(
 		struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len,
 		size_t period_len, enum dma_transfer_direction direction,
-		unsigned long flags, void *context)
+		unsigned long flags)
 {
 	struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
 	struct imxdma_engine *imxdma = imxdmac->imxdma;
diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
index de584e605db5..f7626e37d0b8 100644
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c
@@ -1125,7 +1125,7 @@ err_out:
 static struct dma_async_tx_descriptor *sdma_prep_dma_cyclic(
 		struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len,
 		size_t period_len, enum dma_transfer_direction direction,
-		unsigned long flags, void *context)
+		unsigned long flags)
 {
 	struct sdma_channel *sdmac = to_sdma_chan(chan);
 	struct sdma_engine *sdma = sdmac->sdma;
diff --git a/drivers/dma/mmp_pdma.c b/drivers/dma/mmp_pdma.c
index a7b186d536b3..a1a4db5721b8 100644
--- a/drivers/dma/mmp_pdma.c
+++ b/drivers/dma/mmp_pdma.c
@@ -601,7 +601,7 @@ static struct dma_async_tx_descriptor *
 mmp_pdma_prep_dma_cyclic(struct dma_chan *dchan,
 			 dma_addr_t buf_addr, size_t len, size_t period_len,
 			 enum dma_transfer_direction direction,
-			 unsigned long flags, void *context)
+			 unsigned long flags)
 {
 	struct mmp_pdma_chan *chan;
 	struct mmp_pdma_desc_sw *first = NULL, *prev = NULL, *new;
diff --git a/drivers/dma/mmp_tdma.c b/drivers/dma/mmp_tdma.c
index 724f7f4c9720..6ad30e2c5038 100644
--- a/drivers/dma/mmp_tdma.c
+++ b/drivers/dma/mmp_tdma.c
@@ -389,7 +389,7 @@ struct mmp_tdma_desc *mmp_tdma_alloc_descriptor(struct mmp_tdma_chan *tdmac)
 static struct dma_async_tx_descriptor *mmp_tdma_prep_dma_cyclic(
 		struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len,
 		size_t period_len, enum dma_transfer_direction direction,
-		unsigned long flags, void *context)
+		unsigned long flags)
 {
 	struct mmp_tdma_chan *tdmac = to_mmp_tdma_chan(chan);
 	struct mmp_tdma_desc *desc;
diff --git a/drivers/dma/mxs-dma.c b/drivers/dma/mxs-dma.c
index dc1dba78e529..5ea61201dbf0 100644
--- a/drivers/dma/mxs-dma.c
+++ b/drivers/dma/mxs-dma.c
@@ -589,7 +589,7 @@ err_out:
 static struct dma_async_tx_descriptor *mxs_dma_prep_dma_cyclic(
 		struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len,
 		size_t period_len, enum dma_transfer_direction direction,
-		unsigned long flags, void *context)
+		unsigned long flags)
 {
 	struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
 	struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
diff --git a/drivers/dma/omap-dma.c b/drivers/dma/omap-dma.c
index b19f04f4390b..4cf7d9a950d7 100644
--- a/drivers/dma/omap-dma.c
+++ b/drivers/dma/omap-dma.c
@@ -853,8 +853,7 @@ static struct dma_async_tx_descriptor *omap_dma_prep_slave_sg(
 
 static struct dma_async_tx_descriptor *omap_dma_prep_dma_cyclic(
 	struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
-	size_t period_len, enum dma_transfer_direction dir, unsigned long flags,
-	void *context)
+	size_t period_len, enum dma_transfer_direction dir, unsigned long flags)
 {
 	struct omap_dmadev *od = to_omap_dma_dev(chan->device);
 	struct omap_chan *c = to_omap_dma_chan(chan);
diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index a55d75498098..d5149aacd2fe 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -2362,7 +2362,7 @@ static inline int get_burst_len(struct dma_pl330_desc *desc, size_t len)
 static struct dma_async_tx_descriptor *pl330_prep_dma_cyclic(
 		struct dma_chan *chan, dma_addr_t dma_addr, size_t len,
 		size_t period_len, enum dma_transfer_direction direction,
-		unsigned long flags, void *context)
+		unsigned long flags)
 {
 	struct dma_pl330_desc *desc = NULL, *first = NULL;
 	struct dma_pl330_chan *pch = to_pchan(chan);
diff --git a/drivers/dma/s3c24xx-dma.c b/drivers/dma/s3c24xx-dma.c
index 012520c9fd79..7416572d1e40 100644
--- a/drivers/dma/s3c24xx-dma.c
+++ b/drivers/dma/s3c24xx-dma.c
@@ -889,8 +889,7 @@ static struct dma_async_tx_descriptor *s3c24xx_dma_prep_memcpy(
 
 static struct dma_async_tx_descriptor *s3c24xx_dma_prep_dma_cyclic(
 	struct dma_chan *chan, dma_addr_t addr, size_t size, size_t period,
-	enum dma_transfer_direction direction, unsigned long flags,
-	void *context)
+	enum dma_transfer_direction direction, unsigned long flags)
 {
 	struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan);
 	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
diff --git a/drivers/dma/sa11x0-dma.c b/drivers/dma/sa11x0-dma.c
index 5ebdfbc1051e..4b0ef043729a 100644
--- a/drivers/dma/sa11x0-dma.c
+++ b/drivers/dma/sa11x0-dma.c
@@ -612,7 +612,7 @@ static struct dma_async_tx_descriptor *sa11x0_dma_prep_slave_sg(
 
 static struct dma_async_tx_descriptor *sa11x0_dma_prep_dma_cyclic(
 	struct dma_chan *chan, dma_addr_t addr, size_t size, size_t period,
-	enum dma_transfer_direction dir, unsigned long flags, void *context)
+	enum dma_transfer_direction dir, unsigned long flags)
 {
 	struct sa11x0_dma_chan *c = to_sa11x0_dma_chan(chan);
 	struct sa11x0_dma_desc *txd;
diff --git a/drivers/dma/sh/shdma-base.c b/drivers/dma/sh/shdma-base.c
index e427a03a0e8b..42d497416196 100644
--- a/drivers/dma/sh/shdma-base.c
+++ b/drivers/dma/sh/shdma-base.c
@@ -668,7 +668,7 @@ static struct dma_async_tx_descriptor *shdma_prep_slave_sg(
 static struct dma_async_tx_descriptor *shdma_prep_dma_cyclic(
 	struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
 	size_t period_len, enum dma_transfer_direction direction,
-	unsigned long flags, void *context)
+	unsigned long flags)
 {
 	struct shdma_chan *schan = to_shdma_chan(chan);
 	struct shdma_dev *sdev = to_shdma_dev(schan->dma_chan.device);
diff --git a/drivers/dma/sirf-dma.c b/drivers/dma/sirf-dma.c
index 03f7820fa333..aac03ab10c54 100644
--- a/drivers/dma/sirf-dma.c
+++ b/drivers/dma/sirf-dma.c
@@ -580,7 +580,7 @@ err_dir:
 static struct dma_async_tx_descriptor *
 sirfsoc_dma_prep_cyclic(struct dma_chan *chan, dma_addr_t addr,
 	size_t buf_len, size_t period_len,
-	enum dma_transfer_direction direction, unsigned long flags, void *context)
+	enum dma_transfer_direction direction, unsigned long flags)
 {
 	struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
 	struct sirfsoc_dma_desc *sdesc = NULL;
diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index c7984459ede7..5fe59335e247 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -2531,8 +2531,7 @@ d40_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 static struct dma_async_tx_descriptor *
 dma40_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t dma_addr,
 		     size_t buf_len, size_t period_len,
-		     enum dma_transfer_direction direction, unsigned long flags,
-		     void *context)
+		     enum dma_transfer_direction direction, unsigned long flags)
 {
 	unsigned int periods = buf_len / period_len;
 	struct dma_async_tx_descriptor *txd;
diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c
index 03ad64ecaaf0..16efa603ff65 100644
--- a/drivers/dma/tegra20-apb-dma.c
+++ b/drivers/dma/tegra20-apb-dma.c
@@ -1055,7 +1055,7 @@ static struct dma_async_tx_descriptor *tegra_dma_prep_slave_sg(
 static struct dma_async_tx_descriptor *tegra_dma_prep_dma_cyclic(
 	struct dma_chan *dc, dma_addr_t buf_addr, size_t buf_len,
 	size_t period_len, enum dma_transfer_direction direction,
-	unsigned long flags, void *context)
+	unsigned long flags)
 {
 	struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc);
 	struct tegra_dma_desc *dma_desc = NULL;
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 4eb2f82aed1d..94ddccd706fc 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -669,7 +669,7 @@ struct dma_device {
 	struct dma_async_tx_descriptor *(*device_prep_dma_cyclic)(
 		struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
 		size_t period_len, enum dma_transfer_direction direction,
-		unsigned long flags, void *context);
+		unsigned long flags);
 	struct dma_async_tx_descriptor *(*device_prep_interleaved_dma)(
 		struct dma_chan *chan, struct dma_interleaved_template *xt,
 		unsigned long flags);
@@ -744,7 +744,7 @@ static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_cyclic(
 		unsigned long flags)
 {
 	return chan->device->device_prep_dma_cyclic(chan, buf_addr, buf_len,
-						period_len, dir, flags, NULL);
+						period_len, dir, flags);
 }
 
 static inline struct dma_async_tx_descriptor *dmaengine_prep_interleaved_dma(
-- 
cgit v1.2.3


From 0097875bd41528922fb3bb5f348c53f17e00e2fd Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 31 Jul 2014 03:10:50 -0700
Subject: proc: Implement /proc/thread-self to point at the directory of the
 current thread

/proc/thread-self is derived from /proc/self.  /proc/thread-self
points to the directory in proc containing information about the
current thread.

This funtionality has been missing for a long time, and is tricky to
implement in userspace as gettid() is not exported by glibc.  More
importantly this allows fixing defects in /proc/mounts and /proc/net
where in a threaded application today they wind up being empty files
when only the initial pthread has exited, causing problems for other
threads.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/proc/Makefile              |  1 +
 fs/proc/base.c                | 15 +++++---
 fs/proc/inode.c               |  7 +++-
 fs/proc/internal.h            |  6 +++
 fs/proc/root.c                |  3 ++
 fs/proc/thread_self.c         | 85 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/pid_namespace.h |  1 +
 7 files changed, 112 insertions(+), 6 deletions(-)
 create mode 100644 fs/proc/thread_self.c

(limited to 'include/linux')

diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index 239493ec718e..7151ea428041 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -23,6 +23,7 @@ proc-y	+= version.o
 proc-y	+= softirqs.o
 proc-y	+= namespaces.o
 proc-y	+= self.o
+proc-y	+= thread_self.o
 proc-$(CONFIG_PROC_SYSCTL)	+= proc_sysctl.o
 proc-$(CONFIG_NET)		+= proc_net.o
 proc-$(CONFIG_PROC_KCORE)	+= kcore.o
diff --git a/fs/proc/base.c b/fs/proc/base.c
index ed34e405c6b9..0131156ce7c9 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2847,7 +2847,7 @@ retry:
 	return iter;
 }
 
-#define TGID_OFFSET (FIRST_PROCESS_ENTRY + 1)
+#define TGID_OFFSET (FIRST_PROCESS_ENTRY + 2)
 
 /* for the /proc/ directory itself, after non-process stuff has been done */
 int proc_pid_readdir(struct file *file, struct dir_context *ctx)
@@ -2859,14 +2859,19 @@ int proc_pid_readdir(struct file *file, struct dir_context *ctx)
 	if (pos >= PID_MAX_LIMIT + TGID_OFFSET)
 		return 0;
 
-	if (pos == TGID_OFFSET - 1) {
+	if (pos == TGID_OFFSET - 2) {
 		struct inode *inode = ns->proc_self->d_inode;
 		if (!dir_emit(ctx, "self", 4, inode->i_ino, DT_LNK))
 			return 0;
-		iter.tgid = 0;
-	} else {
-		iter.tgid = pos - TGID_OFFSET;
+		ctx->pos = pos = pos + 1;
+	}
+	if (pos == TGID_OFFSET - 1) {
+		struct inode *inode = ns->proc_thread_self->d_inode;
+		if (!dir_emit(ctx, "thread-self", 11, inode->i_ino, DT_LNK))
+			return 0;
+		ctx->pos = pos = pos + 1;
 	}
+	iter.tgid = pos - TGID_OFFSET;
 	iter.task = NULL;
 	for (iter = next_tgid(ns, iter);
 	     iter.task;
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 0adbc02d60e3..333080d7a671 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -442,6 +442,7 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
 int proc_fill_super(struct super_block *s)
 {
 	struct inode *root_inode;
+	int ret;
 
 	s->s_flags |= MS_NODIRATIME | MS_NOSUID | MS_NOEXEC;
 	s->s_blocksize = 1024;
@@ -463,5 +464,9 @@ int proc_fill_super(struct super_block *s)
 		return -ENOMEM;
 	}
 
-	return proc_setup_self(s);
+	ret = proc_setup_self(s);
+	if (ret) {
+		return ret;
+	}
+	return proc_setup_thread_self(s);
 }
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 3ab6d14e71c5..ee04619173b2 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -233,6 +233,12 @@ static inline int proc_net_init(void) { return 0; }
  */
 extern int proc_setup_self(struct super_block *);
 
+/*
+ * proc_thread_self.c
+ */
+extern int proc_setup_thread_self(struct super_block *);
+extern void proc_thread_self_init(void);
+
 /*
  * proc_sysctl.c
  */
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 5dbadecb234d..48f1c03bc7ed 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -149,6 +149,8 @@ static void proc_kill_sb(struct super_block *sb)
 	ns = (struct pid_namespace *)sb->s_fs_info;
 	if (ns->proc_self)
 		dput(ns->proc_self);
+	if (ns->proc_thread_self)
+		dput(ns->proc_thread_self);
 	kill_anon_super(sb);
 	put_pid_ns(ns);
 }
@@ -170,6 +172,7 @@ void __init proc_root_init(void)
 		return;
 
 	proc_self_init();
+	proc_thread_self_init();
 	proc_symlink("mounts", NULL, "self/mounts");
 
 	proc_net_init();
diff --git a/fs/proc/thread_self.c b/fs/proc/thread_self.c
new file mode 100644
index 000000000000..59075b509df3
--- /dev/null
+++ b/fs/proc/thread_self.c
@@ -0,0 +1,85 @@
+#include <linux/sched.h>
+#include <linux/namei.h>
+#include <linux/slab.h>
+#include <linux/pid_namespace.h>
+#include "internal.h"
+
+/*
+ * /proc/thread_self:
+ */
+static int proc_thread_self_readlink(struct dentry *dentry, char __user *buffer,
+			      int buflen)
+{
+	struct pid_namespace *ns = dentry->d_sb->s_fs_info;
+	pid_t tgid = task_tgid_nr_ns(current, ns);
+	pid_t pid = task_pid_nr_ns(current, ns);
+	char tmp[PROC_NUMBUF + 6 + PROC_NUMBUF];
+	if (!pid)
+		return -ENOENT;
+	sprintf(tmp, "%d/task/%d", tgid, pid);
+	return readlink_copy(buffer, buflen, tmp);
+}
+
+static void *proc_thread_self_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+	struct pid_namespace *ns = dentry->d_sb->s_fs_info;
+	pid_t tgid = task_tgid_nr_ns(current, ns);
+	pid_t pid = task_pid_nr_ns(current, ns);
+	char *name = ERR_PTR(-ENOENT);
+	if (pid) {
+		name = kmalloc(PROC_NUMBUF + 6 + PROC_NUMBUF, GFP_KERNEL);
+		if (!name)
+			name = ERR_PTR(-ENOMEM);
+		else
+			sprintf(name, "%d/task/%d", tgid, pid);
+	}
+	nd_set_link(nd, name);
+	return NULL;
+}
+
+static const struct inode_operations proc_thread_self_inode_operations = {
+	.readlink	= proc_thread_self_readlink,
+	.follow_link	= proc_thread_self_follow_link,
+	.put_link	= kfree_put_link,
+};
+
+static unsigned thread_self_inum;
+
+int proc_setup_thread_self(struct super_block *s)
+{
+	struct inode *root_inode = s->s_root->d_inode;
+	struct pid_namespace *ns = s->s_fs_info;
+	struct dentry *thread_self;
+
+	mutex_lock(&root_inode->i_mutex);
+	thread_self = d_alloc_name(s->s_root, "thread-self");
+	if (thread_self) {
+		struct inode *inode = new_inode_pseudo(s);
+		if (inode) {
+			inode->i_ino = thread_self_inum;
+			inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+			inode->i_mode = S_IFLNK | S_IRWXUGO;
+			inode->i_uid = GLOBAL_ROOT_UID;
+			inode->i_gid = GLOBAL_ROOT_GID;
+			inode->i_op = &proc_thread_self_inode_operations;
+			d_add(thread_self, inode);
+		} else {
+			dput(thread_self);
+			thread_self = ERR_PTR(-ENOMEM);
+		}
+	} else {
+		thread_self = ERR_PTR(-ENOMEM);
+	}
+	mutex_unlock(&root_inode->i_mutex);
+	if (IS_ERR(thread_self)) {
+		pr_err("proc_fill_super: can't allocate /proc/thread_self\n");
+		return PTR_ERR(thread_self);
+	}
+	ns->proc_thread_self = thread_self;
+	return 0;
+}
+
+void __init proc_thread_self_init(void)
+{
+	proc_alloc_inum(&thread_self_inum);
+}
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index 7246ef3d4455..1997ffc295a7 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -33,6 +33,7 @@ struct pid_namespace {
 #ifdef CONFIG_PROC_FS
 	struct vfsmount *proc_mnt;
 	struct dentry *proc_self;
+	struct dentry *proc_thread_self;
 #endif
 #ifdef CONFIG_BSD_PROCESS_ACCT
 	struct bsd_acct_struct *bacct;
-- 
cgit v1.2.3


From 1b69be5e8afc634f39ad695a6ab6aad0cf0975c7 Mon Sep 17 00:00:00 2001
From: Gavin Shan <gwshan@linux.vnet.ibm.com>
Date: Tue, 10 Jun 2014 11:41:57 +1000
Subject: drivers/vfio: EEH support for VFIO PCI device

The patch adds new IOCTL commands for sPAPR VFIO container device
to support EEH functionality for PCI devices, which have been passed
through from host to somebody else via VFIO.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
Acked-by: Alexander Graf <agraf@suse.de>
Acked-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 Documentation/vfio.txt              | 87 +++++++++++++++++++++++++++++++++++--
 drivers/vfio/Makefile               |  1 +
 drivers/vfio/pci/vfio_pci.c         | 18 ++++++--
 drivers/vfio/vfio_iommu_spapr_tce.c | 17 +++++++-
 drivers/vfio/vfio_spapr_eeh.c       | 87 +++++++++++++++++++++++++++++++++++++
 include/linux/vfio.h                | 23 ++++++++++
 include/uapi/linux/vfio.h           | 34 +++++++++++++++
 7 files changed, 259 insertions(+), 8 deletions(-)
 create mode 100644 drivers/vfio/vfio_spapr_eeh.c

(limited to 'include/linux')

diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt
index b9ca02370d46..96978eced341 100644
--- a/Documentation/vfio.txt
+++ b/Documentation/vfio.txt
@@ -305,7 +305,15 @@ faster, the map/unmap handling has been implemented in real mode which provides
 an excellent performance which has limitations such as inability to do
 locked pages accounting in real time.
 
-So 3 additional ioctls have been added:
+4) According to sPAPR specification, A Partitionable Endpoint (PE) is an I/O
+subtree that can be treated as a unit for the purposes of partitioning and
+error recovery. A PE may be a single or multi-function IOA (IO Adapter), a
+function of a multi-function IOA, or multiple IOAs (possibly including switch
+and bridge structures above the multiple IOAs). PPC64 guests detect PCI errors
+and recover from them via EEH RTAS services, which works on the basis of
+additional ioctl commands.
+
+So 4 additional ioctls have been added:
 
 	VFIO_IOMMU_SPAPR_TCE_GET_INFO - returns the size and the start
 		of the DMA window on the PCI bus.
@@ -316,9 +324,12 @@ So 3 additional ioctls have been added:
 
 	VFIO_IOMMU_DISABLE - disables the container.
 
+	VFIO_EEH_PE_OP - provides an API for EEH setup, error detection and recovery.
 
 The code flow from the example above should be slightly changed:
 
+	struct vfio_eeh_pe_op pe_op = { .argsz = sizeof(pe_op), .flags = 0 };
+
 	.....
 	/* Add the group to the container */
 	ioctl(group, VFIO_GROUP_SET_CONTAINER, &container);
@@ -342,9 +353,79 @@ The code flow from the example above should be slightly changed:
 	dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE;
 
 	/* Check here is .iova/.size are within DMA window from spapr_iommu_info */
-
 	ioctl(container, VFIO_IOMMU_MAP_DMA, &dma_map);
-	.....
+
+	/* Get a file descriptor for the device */
+	device = ioctl(group, VFIO_GROUP_GET_DEVICE_FD, "0000:06:0d.0");
+
+	....
+
+	/* Gratuitous device reset and go... */
+	ioctl(device, VFIO_DEVICE_RESET);
+
+	/* Make sure EEH is supported */
+	ioctl(container, VFIO_CHECK_EXTENSION, VFIO_EEH);
+
+	/* Enable the EEH functionality on the device */
+	pe_op.op = VFIO_EEH_PE_ENABLE;
+	ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+
+	/* You're suggested to create additional data struct to represent
+	 * PE, and put child devices belonging to same IOMMU group to the
+	 * PE instance for later reference.
+	 */
+
+	/* Check the PE's state and make sure it's in functional state */
+	pe_op.op = VFIO_EEH_PE_GET_STATE;
+	ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+
+	/* Save device state using pci_save_state().
+	 * EEH should be enabled on the specified device.
+	 */
+
+	....
+
+	/* When 0xFF's returned from reading PCI config space or IO BARs
+	 * of the PCI device. Check the PE's state to see if that has been
+	 * frozen.
+	 */
+	ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+
+	/* Waiting for pending PCI transactions to be completed and don't
+	 * produce any more PCI traffic from/to the affected PE until
+	 * recovery is finished.
+	 */
+
+	/* Enable IO for the affected PE and collect logs. Usually, the
+	 * standard part of PCI config space, AER registers are dumped
+	 * as logs for further analysis.
+	 */
+	pe_op.op = VFIO_EEH_PE_UNFREEZE_IO;
+	ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+
+	/*
+	 * Issue PE reset: hot or fundamental reset. Usually, hot reset
+	 * is enough. However, the firmware of some PCI adapters would
+	 * require fundamental reset.
+	 */
+	pe_op.op = VFIO_EEH_PE_RESET_HOT;
+	ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+	pe_op.op = VFIO_EEH_PE_RESET_DEACTIVATE;
+	ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+
+	/* Configure the PCI bridges for the affected PE */
+	pe_op.op = VFIO_EEH_PE_CONFIGURE;
+	ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+
+	/* Restored state we saved at initialization time. pci_restore_state()
+	 * is good enough as an example.
+	 */
+
+	/* Hopefully, error is recovered successfully. Now, you can resume to
+	 * start PCI traffic to/from the affected PE.
+	 */
+
+	....
 
 -------------------------------------------------------------------------------
 
diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile
index 72bfabc8629e..50e30bc75e85 100644
--- a/drivers/vfio/Makefile
+++ b/drivers/vfio/Makefile
@@ -1,4 +1,5 @@
 obj-$(CONFIG_VFIO) += vfio.o
 obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o
 obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o
+obj-$(CONFIG_EEH) += vfio_spapr_eeh.o
 obj-$(CONFIG_VFIO_PCI) += pci/
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 010e0f8b8e4f..e2ee80f36e3e 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -157,8 +157,10 @@ static void vfio_pci_release(void *device_data)
 {
 	struct vfio_pci_device *vdev = device_data;
 
-	if (atomic_dec_and_test(&vdev->refcnt))
+	if (atomic_dec_and_test(&vdev->refcnt)) {
+		vfio_spapr_pci_eeh_release(vdev->pdev);
 		vfio_pci_disable(vdev);
+	}
 
 	module_put(THIS_MODULE);
 }
@@ -166,19 +168,27 @@ static void vfio_pci_release(void *device_data)
 static int vfio_pci_open(void *device_data)
 {
 	struct vfio_pci_device *vdev = device_data;
+	int ret;
 
 	if (!try_module_get(THIS_MODULE))
 		return -ENODEV;
 
 	if (atomic_inc_return(&vdev->refcnt) == 1) {
-		int ret = vfio_pci_enable(vdev);
+		ret = vfio_pci_enable(vdev);
+		if (ret)
+			goto error;
+
+		ret = vfio_spapr_pci_eeh_open(vdev->pdev);
 		if (ret) {
-			module_put(THIS_MODULE);
-			return ret;
+			vfio_pci_disable(vdev);
+			goto error;
 		}
 	}
 
 	return 0;
+error:
+	module_put(THIS_MODULE);
+	return ret;
 }
 
 static int vfio_pci_get_irq_count(struct vfio_pci_device *vdev, int irq_type)
diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
index a84788ba662c..730b4ef3e0cc 100644
--- a/drivers/vfio/vfio_iommu_spapr_tce.c
+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
@@ -156,7 +156,16 @@ static long tce_iommu_ioctl(void *iommu_data,
 
 	switch (cmd) {
 	case VFIO_CHECK_EXTENSION:
-		return (arg == VFIO_SPAPR_TCE_IOMMU) ? 1 : 0;
+		switch (arg) {
+		case VFIO_SPAPR_TCE_IOMMU:
+			ret = 1;
+			break;
+		default:
+			ret = vfio_spapr_iommu_eeh_ioctl(NULL, cmd, arg);
+			break;
+		}
+
+		return (ret < 0) ? 0 : ret;
 
 	case VFIO_IOMMU_SPAPR_TCE_GET_INFO: {
 		struct vfio_iommu_spapr_tce_info info;
@@ -283,6 +292,12 @@ static long tce_iommu_ioctl(void *iommu_data,
 		tce_iommu_disable(container);
 		mutex_unlock(&container->lock);
 		return 0;
+	case VFIO_EEH_PE_OP:
+		if (!container->tbl || !container->tbl->it_group)
+			return -ENODEV;
+
+		return vfio_spapr_iommu_eeh_ioctl(container->tbl->it_group,
+						  cmd, arg);
 	}
 
 	return -ENOTTY;
diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c
new file mode 100644
index 000000000000..f834b4ce1431
--- /dev/null
+++ b/drivers/vfio/vfio_spapr_eeh.c
@@ -0,0 +1,87 @@
+/*
+ * EEH functionality support for VFIO devices. The feature is only
+ * available on sPAPR compatible platforms.
+ *
+ * Copyright Gavin Shan, IBM Corporation 2014.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/uaccess.h>
+#include <linux/vfio.h>
+#include <asm/eeh.h>
+
+/* We might build address mapping here for "fast" path later */
+int vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
+{
+	return eeh_dev_open(pdev);
+}
+
+void vfio_spapr_pci_eeh_release(struct pci_dev *pdev)
+{
+	eeh_dev_release(pdev);
+}
+
+long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
+				unsigned int cmd, unsigned long arg)
+{
+	struct eeh_pe *pe;
+	struct vfio_eeh_pe_op op;
+	unsigned long minsz;
+	long ret = -EINVAL;
+
+	switch (cmd) {
+	case VFIO_CHECK_EXTENSION:
+		if (arg == VFIO_EEH)
+			ret = eeh_enabled() ? 1 : 0;
+		else
+			ret = 0;
+		break;
+	case VFIO_EEH_PE_OP:
+		pe = eeh_iommu_group_to_pe(group);
+		if (!pe)
+			return -ENODEV;
+
+		minsz = offsetofend(struct vfio_eeh_pe_op, op);
+		if (copy_from_user(&op, (void __user *)arg, minsz))
+			return -EFAULT;
+		if (op.argsz < minsz || op.flags)
+			return -EINVAL;
+
+		switch (op.op) {
+		case VFIO_EEH_PE_DISABLE:
+			ret = eeh_pe_set_option(pe, EEH_OPT_DISABLE);
+			break;
+		case VFIO_EEH_PE_ENABLE:
+			ret = eeh_pe_set_option(pe, EEH_OPT_ENABLE);
+			break;
+		case VFIO_EEH_PE_UNFREEZE_IO:
+			ret = eeh_pe_set_option(pe, EEH_OPT_THAW_MMIO);
+			break;
+		case VFIO_EEH_PE_UNFREEZE_DMA:
+			ret = eeh_pe_set_option(pe, EEH_OPT_THAW_DMA);
+			break;
+		case VFIO_EEH_PE_GET_STATE:
+			ret = eeh_pe_get_state(pe);
+			break;
+		case VFIO_EEH_PE_RESET_DEACTIVATE:
+			ret = eeh_pe_reset(pe, EEH_RESET_DEACTIVATE);
+			break;
+		case VFIO_EEH_PE_RESET_HOT:
+			ret = eeh_pe_reset(pe, EEH_RESET_HOT);
+			break;
+		case VFIO_EEH_PE_RESET_FUNDAMENTAL:
+			ret = eeh_pe_reset(pe, EEH_RESET_FUNDAMENTAL);
+			break;
+		case VFIO_EEH_PE_CONFIGURE:
+			ret = eeh_pe_configure(pe);
+			break;
+		default:
+			ret = -EINVAL;
+		}
+	}
+
+	return ret;
+}
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 8ec980b5e3af..25a0fbd4b998 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -98,4 +98,27 @@ extern int vfio_external_user_iommu_id(struct vfio_group *group);
 extern long vfio_external_check_extension(struct vfio_group *group,
 					  unsigned long arg);
 
+#ifdef CONFIG_EEH
+extern int vfio_spapr_pci_eeh_open(struct pci_dev *pdev);
+extern void vfio_spapr_pci_eeh_release(struct pci_dev *pdev);
+extern long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
+				       unsigned int cmd,
+				       unsigned long arg);
+#else
+static inline int vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
+{
+	return 0;
+}
+
+static inline void vfio_spapr_pci_eeh_release(struct pci_dev *pdev)
+{
+}
+
+static inline long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
+					      unsigned int cmd,
+					      unsigned long arg)
+{
+	return -ENOTTY;
+}
+#endif /* CONFIG_EEH */
 #endif /* VFIO_H */
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index cb9023d4f063..6612974c64bf 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -30,6 +30,9 @@
  */
 #define VFIO_DMA_CC_IOMMU		4
 
+/* Check if EEH is supported */
+#define VFIO_EEH			5
+
 /*
  * The IOCTL interface is designed for extensibility by embedding the
  * structure length (argsz) and flags into structures passed between
@@ -455,6 +458,37 @@ struct vfio_iommu_spapr_tce_info {
 
 #define VFIO_IOMMU_SPAPR_TCE_GET_INFO	_IO(VFIO_TYPE, VFIO_BASE + 12)
 
+/*
+ * EEH PE operation struct provides ways to:
+ * - enable/disable EEH functionality;
+ * - unfreeze IO/DMA for frozen PE;
+ * - read PE state;
+ * - reset PE;
+ * - configure PE.
+ */
+struct vfio_eeh_pe_op {
+	__u32 argsz;
+	__u32 flags;
+	__u32 op;
+};
+
+#define VFIO_EEH_PE_DISABLE		0	/* Disable EEH functionality */
+#define VFIO_EEH_PE_ENABLE		1	/* Enable EEH functionality  */
+#define VFIO_EEH_PE_UNFREEZE_IO		2	/* Enable IO for frozen PE   */
+#define VFIO_EEH_PE_UNFREEZE_DMA	3	/* Enable DMA for frozen PE  */
+#define VFIO_EEH_PE_GET_STATE		4	/* PE state retrieval        */
+#define  VFIO_EEH_PE_STATE_NORMAL	0	/* PE in functional state    */
+#define  VFIO_EEH_PE_STATE_RESET	1	/* PE reset in progress      */
+#define  VFIO_EEH_PE_STATE_STOPPED	2	/* Stopped DMA and IO        */
+#define  VFIO_EEH_PE_STATE_STOPPED_DMA	4	/* Stopped DMA only          */
+#define  VFIO_EEH_PE_STATE_UNAVAIL	5	/* State unavailable         */
+#define VFIO_EEH_PE_RESET_DEACTIVATE	5	/* Deassert PE reset         */
+#define VFIO_EEH_PE_RESET_HOT		6	/* Assert hot reset          */
+#define VFIO_EEH_PE_RESET_FUNDAMENTAL	7	/* Assert fundamental reset  */
+#define VFIO_EEH_PE_CONFIGURE		8	/* PE configuration          */
+
+#define VFIO_EEH_PE_OP			_IO(VFIO_TYPE, VFIO_BASE + 21)
+
 /* ***************************************************************** */
 
 #endif /* _UAPIVFIO_H */
-- 
cgit v1.2.3


From 8ba918d488caded2c4368b0b922eb905fe3bb101 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 30 Jun 2014 20:51:10 +1000
Subject: KVM: irqchip: Provide and use accessors for irq routing table

This provides accessor functions for the KVM interrupt mappings, in
order to reduce the amount of code that accesses the fields of the
kvm_irq_routing_table struct, and restrict that code to one file,
virt/kvm/irqchip.c.  The new functions are kvm_irq_map_gsi(), which
maps from a global interrupt number to a set of IRQ routing entries,
and kvm_irq_map_chip_pin, which maps from IRQ chip and pin numbers to
a global interrupt number.

This also moves the update of kvm_irq_routing_table::chip[][]
into irqchip.c, out of the various kvm_set_routing_entry
implementations.  That means that none of the kvm_set_routing_entry
implementations need the kvm_irq_routing_table argument anymore,
so this removes it.

This does not change any locking or data lifetime rules.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Tested-by: Eric Auger <eric.auger@linaro.org>
Tested-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/powerpc/kvm/mpic.c   |  4 +---
 arch/s390/kvm/interrupt.c |  3 +--
 include/linux/kvm_host.h  |  8 ++++++--
 virt/kvm/eventfd.c        | 10 ++++++----
 virt/kvm/irq_comm.c       | 20 +++++++++-----------
 virt/kvm/irqchip.c        | 42 ++++++++++++++++++++++++++++++++++--------
 6 files changed, 57 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c
index b68d0dc9479a..39b3a8f816f2 100644
--- a/arch/powerpc/kvm/mpic.c
+++ b/arch/powerpc/kvm/mpic.c
@@ -1826,8 +1826,7 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
 	return 0;
 }
 
-int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
-			  struct kvm_kernel_irq_routing_entry *e,
+int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e,
 			  const struct kvm_irq_routing_entry *ue)
 {
 	int r = -EINVAL;
@@ -1839,7 +1838,6 @@ int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
 		e->irqchip.pin = ue->u.irqchip.pin;
 		if (e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS)
 			goto out;
-		rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi;
 		break;
 	case KVM_IRQ_ROUTING_MSI:
 		e->set = kvm_set_msi;
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 92528a0bdda6..f4c819bfc193 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -1556,8 +1556,7 @@ static int set_adapter_int(struct kvm_kernel_irq_routing_entry *e,
 	return ret;
 }
 
-int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
-			  struct kvm_kernel_irq_routing_entry *e,
+int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e,
 			  const struct kvm_irq_routing_entry *ue)
 {
 	int ret;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 5065b953e6e8..4956149e962a 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -752,6 +752,11 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
 void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
 			     bool mask);
 
+int kvm_irq_map_gsi(struct kvm_kernel_irq_routing_entry *entries,
+		    struct kvm_irq_routing_table *irq_rt, int gsi);
+int kvm_irq_map_chip_pin(struct kvm_irq_routing_table *irq_rt,
+			 unsigned irqchip, unsigned pin);
+
 int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
 		bool line_status);
 int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level);
@@ -942,8 +947,7 @@ int kvm_set_irq_routing(struct kvm *kvm,
 			const struct kvm_irq_routing_entry *entries,
 			unsigned nr,
 			unsigned flags);
-int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
-			  struct kvm_kernel_irq_routing_entry *e,
+int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e,
 			  const struct kvm_irq_routing_entry *ue);
 void kvm_free_irq_routing(struct kvm *kvm);
 
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index bae593a545c5..15fa9488b2d0 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -282,20 +282,22 @@ static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd,
 			 struct kvm_irq_routing_table *irq_rt)
 {
 	struct kvm_kernel_irq_routing_entry *e;
+	struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
+	int i, n_entries;
+
+	n_entries = kvm_irq_map_gsi(entries, irq_rt, irqfd->gsi);
 
 	write_seqcount_begin(&irqfd->irq_entry_sc);
 
 	irqfd->irq_entry.type = 0;
-	if (irqfd->gsi >= irq_rt->nr_rt_entries)
-		goto out;
 
-	hlist_for_each_entry(e, &irq_rt->map[irqfd->gsi], link) {
+	e = entries;
+	for (i = 0; i < n_entries; ++i, ++e) {
 		/* Only fast-path MSI. */
 		if (e->type == KVM_IRQ_ROUTING_MSI)
 			irqfd->irq_entry = *e;
 	}
 
- out:
 	write_seqcount_end(&irqfd->irq_entry_sc);
 }
 
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index a228ee82bad2..175844593243 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -160,6 +160,7 @@ static int kvm_set_msi_inatomic(struct kvm_kernel_irq_routing_entry *e,
  */
 int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
 {
+	struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
 	struct kvm_kernel_irq_routing_entry *e;
 	int ret = -EINVAL;
 	struct kvm_irq_routing_table *irq_rt;
@@ -177,14 +178,13 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
 	 */
 	idx = srcu_read_lock(&kvm->irq_srcu);
 	irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
-	if (irq < irq_rt->nr_rt_entries)
-		hlist_for_each_entry(e, &irq_rt->map[irq], link) {
-			if (likely(e->type == KVM_IRQ_ROUTING_MSI))
-				ret = kvm_set_msi_inatomic(e, kvm);
-			else
-				ret = -EWOULDBLOCK;
-			break;
-		}
+	if (kvm_irq_map_gsi(entries, irq_rt, irq) > 0) {
+		e = &entries[0];
+		if (likely(e->type == KVM_IRQ_ROUTING_MSI))
+			ret = kvm_set_msi_inatomic(e, kvm);
+		else
+			ret = -EWOULDBLOCK;
+	}
 	srcu_read_unlock(&kvm->irq_srcu, idx);
 	return ret;
 }
@@ -272,8 +272,7 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
 	srcu_read_unlock(&kvm->irq_srcu, idx);
 }
 
-int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
-			  struct kvm_kernel_irq_routing_entry *e,
+int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e,
 			  const struct kvm_irq_routing_entry *ue)
 {
 	int r = -EINVAL;
@@ -304,7 +303,6 @@ int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
 		e->irqchip.pin = ue->u.irqchip.pin + delta;
 		if (e->irqchip.pin >= max_pin)
 			goto out;
-		rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi;
 		break;
 	case KVM_IRQ_ROUTING_MSI:
 		e->set = kvm_set_msi;
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index b43c275775cd..f4648dd94888 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -31,13 +31,37 @@
 #include <trace/events/kvm.h>
 #include "irq.h"
 
+int kvm_irq_map_gsi(struct kvm_kernel_irq_routing_entry *entries,
+		    struct kvm_irq_routing_table *irq_rt, int gsi)
+{
+	struct kvm_kernel_irq_routing_entry *e;
+	int n = 0;
+
+	if (gsi < irq_rt->nr_rt_entries) {
+		hlist_for_each_entry(e, &irq_rt->map[gsi], link) {
+			entries[n] = *e;
+			++n;
+		}
+	}
+
+	return n;
+}
+
+int kvm_irq_map_chip_pin(struct kvm_irq_routing_table *irq_rt,
+			 unsigned irqchip, unsigned pin)
+{
+	return irq_rt->chip[irqchip][pin];
+}
+
 bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
 {
+	struct kvm_irq_routing_table *irq_rt;
 	struct kvm_irq_ack_notifier *kian;
 	int gsi, idx;
 
 	idx = srcu_read_lock(&kvm->irq_srcu);
-	gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin];
+	irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
+	gsi = kvm_irq_map_chip_pin(irq_rt, irqchip, pin);
 	if (gsi != -1)
 		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
 					 link)
@@ -54,13 +78,15 @@ EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
 
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
 {
+	struct kvm_irq_routing_table *irq_rt;
 	struct kvm_irq_ack_notifier *kian;
 	int gsi, idx;
 
 	trace_kvm_ack_irq(irqchip, pin);
 
 	idx = srcu_read_lock(&kvm->irq_srcu);
-	gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin];
+	irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
+	gsi = kvm_irq_map_chip_pin(irq_rt, irqchip, pin);
 	if (gsi != -1)
 		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
 					 link)
@@ -115,8 +141,8 @@ int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi)
 int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
 		bool line_status)
 {
-	struct kvm_kernel_irq_routing_entry *e, irq_set[KVM_NR_IRQCHIPS];
-	int ret = -1, i = 0, idx;
+	struct kvm_kernel_irq_routing_entry irq_set[KVM_NR_IRQCHIPS];
+	int ret = -1, i, idx;
 	struct kvm_irq_routing_table *irq_rt;
 
 	trace_kvm_set_irq(irq, level, irq_source_id);
@@ -127,9 +153,7 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
 	 */
 	idx = srcu_read_lock(&kvm->irq_srcu);
 	irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
-	if (irq < irq_rt->nr_rt_entries)
-		hlist_for_each_entry(e, &irq_rt->map[irq], link)
-			irq_set[i++] = *e;
+	i = kvm_irq_map_gsi(irq_set, irq_rt, irq);
 	srcu_read_unlock(&kvm->irq_srcu, idx);
 
 	while(i--) {
@@ -171,9 +195,11 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt,
 
 	e->gsi = ue->gsi;
 	e->type = ue->type;
-	r = kvm_set_routing_entry(rt, e, ue);
+	r = kvm_set_routing_entry(e, ue);
 	if (r)
 		goto out;
+	if (e->type == KVM_IRQ_ROUTING_IRQCHIP)
+		rt->chip[e->irqchip.irqchip][e->irqchip.pin] = e->gsi;
 
 	hlist_add_head(&e->link, &rt->map[e->gsi]);
 	r = 0;
-- 
cgit v1.2.3


From 9957c86d659a4d5a2bed25ccbd3bfc9c3f25e658 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 30 Jun 2014 20:51:11 +1000
Subject: KVM: Move all accesses to kvm::irq_routing into irqchip.c

Now that struct _irqfd does not keep a reference to storage pointed
to by the irq_routing field of struct kvm, we can move the statement
that updates it out from under the irqfds.lock and put it in
kvm_set_irq_routing() instead.  That means we then have to take a
srcu_read_lock on kvm->irq_srcu around the irqfd_update call in
kvm_irqfd_assign(), since holding the kvm->irqfds.lock no longer
ensures that that the routing can't change.

Combined with changing kvm_irq_map_gsi() and kvm_irq_map_chip_pin()
to take a struct kvm * argument instead of the pointer to the routing
table, this allows us to to move all references to kvm->irq_routing
into irqchip.c.  That in turn allows us to move the definition of the
kvm_irq_routing_table struct into irqchip.c as well.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Tested-by: Eric Auger <eric.auger@linaro.org>
Tested-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 35 +++++++----------------------------
 virt/kvm/eventfd.c       | 22 +++++++++-------------
 virt/kvm/irq_comm.c      |  6 ++----
 virt/kvm/irqchip.c       | 39 +++++++++++++++++++++++++--------------
 4 files changed, 43 insertions(+), 59 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 4956149e962a..ddd33e1aeee1 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -325,24 +325,7 @@ struct kvm_kernel_irq_routing_entry {
 	struct hlist_node link;
 };
 
-#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
-
-struct kvm_irq_routing_table {
-	int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
-	struct kvm_kernel_irq_routing_entry *rt_entries;
-	u32 nr_rt_entries;
-	/*
-	 * Array indexed by gsi. Each entry contains list of irq chips
-	 * the gsi is connected to.
-	 */
-	struct hlist_head map[0];
-};
-
-#else
-
-struct kvm_irq_routing_table {};
-
-#endif
+struct kvm_irq_routing_table;
 
 #ifndef KVM_PRIVATE_MEM_SLOTS
 #define KVM_PRIVATE_MEM_SLOTS 0
@@ -401,8 +384,7 @@ struct kvm {
 	struct mutex irq_lock;
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
 	/*
-	 * Update side is protected by irq_lock and,
-	 * if configured, irqfds.lock.
+	 * Update side is protected by irq_lock.
 	 */
 	struct kvm_irq_routing_table __rcu *irq_routing;
 	struct hlist_head mask_notifier_list;
@@ -752,10 +734,9 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
 void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
 			     bool mask);
 
-int kvm_irq_map_gsi(struct kvm_kernel_irq_routing_entry *entries,
-		    struct kvm_irq_routing_table *irq_rt, int gsi);
-int kvm_irq_map_chip_pin(struct kvm_irq_routing_table *irq_rt,
-			 unsigned irqchip, unsigned pin);
+int kvm_irq_map_gsi(struct kvm *kvm,
+		    struct kvm_kernel_irq_routing_entry *entries, int gsi);
+int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin);
 
 int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
 		bool line_status);
@@ -967,7 +948,7 @@ int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args);
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
 int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args);
 void kvm_irqfd_release(struct kvm *kvm);
-void kvm_irq_routing_update(struct kvm *, struct kvm_irq_routing_table *);
+void kvm_irq_routing_update(struct kvm *);
 #else
 static inline int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
 {
@@ -989,10 +970,8 @@ static inline int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
 static inline void kvm_irqfd_release(struct kvm *kvm) {}
 
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
-static inline void kvm_irq_routing_update(struct kvm *kvm,
-					  struct kvm_irq_routing_table *irq_rt)
+static inline void kvm_irq_routing_update(struct kvm *kvm)
 {
-	rcu_assign_pointer(kvm->irq_routing, irq_rt);
 }
 #endif
 
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 15fa9488b2d0..f0075ffb0c35 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -278,14 +278,13 @@ irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
 }
 
 /* Must be called under irqfds.lock */
-static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd,
-			 struct kvm_irq_routing_table *irq_rt)
+static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd)
 {
 	struct kvm_kernel_irq_routing_entry *e;
 	struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
 	int i, n_entries;
 
-	n_entries = kvm_irq_map_gsi(entries, irq_rt, irqfd->gsi);
+	n_entries = kvm_irq_map_gsi(kvm, entries, irqfd->gsi);
 
 	write_seqcount_begin(&irqfd->irq_entry_sc);
 
@@ -304,12 +303,12 @@ static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd,
 static int
 kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
 {
-	struct kvm_irq_routing_table *irq_rt;
 	struct _irqfd *irqfd, *tmp;
 	struct fd f;
 	struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL;
 	int ret;
 	unsigned int events;
+	int idx;
 
 	irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL);
 	if (!irqfd)
@@ -403,9 +402,9 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
 		goto fail;
 	}
 
-	irq_rt = rcu_dereference_protected(kvm->irq_routing,
-					   lockdep_is_held(&kvm->irqfds.lock));
-	irqfd_update(kvm, irqfd, irq_rt);
+	idx = srcu_read_lock(&kvm->irq_srcu);
+	irqfd_update(kvm, irqfd);
+	srcu_read_unlock(&kvm->irq_srcu, idx);
 
 	list_add_tail(&irqfd->list, &kvm->irqfds.items);
 
@@ -539,20 +538,17 @@ kvm_irqfd_release(struct kvm *kvm)
 }
 
 /*
- * Change irq_routing and irqfd.
+ * Take note of a change in irq routing.
  * Caller must invoke synchronize_srcu(&kvm->irq_srcu) afterwards.
  */
-void kvm_irq_routing_update(struct kvm *kvm,
-			    struct kvm_irq_routing_table *irq_rt)
+void kvm_irq_routing_update(struct kvm *kvm)
 {
 	struct _irqfd *irqfd;
 
 	spin_lock_irq(&kvm->irqfds.lock);
 
-	rcu_assign_pointer(kvm->irq_routing, irq_rt);
-
 	list_for_each_entry(irqfd, &kvm->irqfds.items, list)
-		irqfd_update(kvm, irqfd, irq_rt);
+		irqfd_update(kvm, irqfd);
 
 	spin_unlock_irq(&kvm->irqfds.lock);
 }
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 175844593243..963b8995a9e8 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -163,7 +163,6 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
 	struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
 	struct kvm_kernel_irq_routing_entry *e;
 	int ret = -EINVAL;
-	struct kvm_irq_routing_table *irq_rt;
 	int idx;
 
 	trace_kvm_set_irq(irq, level, irq_source_id);
@@ -177,8 +176,7 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
 	 * which is limited to 1:1 GSI mapping.
 	 */
 	idx = srcu_read_lock(&kvm->irq_srcu);
-	irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
-	if (kvm_irq_map_gsi(entries, irq_rt, irq) > 0) {
+	if (kvm_irq_map_gsi(kvm, entries, irq) > 0) {
 		e = &entries[0];
 		if (likely(e->type == KVM_IRQ_ROUTING_MSI))
 			ret = kvm_set_msi_inatomic(e, kvm);
@@ -264,7 +262,7 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
 	int idx, gsi;
 
 	idx = srcu_read_lock(&kvm->irq_srcu);
-	gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin];
+	gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
 	if (gsi != -1)
 		hlist_for_each_entry_rcu(kimn, &kvm->mask_notifier_list, link)
 			if (kimn->irq == gsi)
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index f4648dd94888..04faac50cef5 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -31,12 +31,26 @@
 #include <trace/events/kvm.h>
 #include "irq.h"
 
-int kvm_irq_map_gsi(struct kvm_kernel_irq_routing_entry *entries,
-		    struct kvm_irq_routing_table *irq_rt, int gsi)
+struct kvm_irq_routing_table {
+	int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
+	struct kvm_kernel_irq_routing_entry *rt_entries;
+	u32 nr_rt_entries;
+	/*
+	 * Array indexed by gsi. Each entry contains list of irq chips
+	 * the gsi is connected to.
+	 */
+	struct hlist_head map[0];
+};
+
+int kvm_irq_map_gsi(struct kvm *kvm,
+		    struct kvm_kernel_irq_routing_entry *entries, int gsi)
 {
+	struct kvm_irq_routing_table *irq_rt;
 	struct kvm_kernel_irq_routing_entry *e;
 	int n = 0;
 
+	irq_rt = srcu_dereference_check(kvm->irq_routing, &kvm->irq_srcu,
+					lockdep_is_held(&kvm->irq_lock));
 	if (gsi < irq_rt->nr_rt_entries) {
 		hlist_for_each_entry(e, &irq_rt->map[gsi], link) {
 			entries[n] = *e;
@@ -47,21 +61,21 @@ int kvm_irq_map_gsi(struct kvm_kernel_irq_routing_entry *entries,
 	return n;
 }
 
-int kvm_irq_map_chip_pin(struct kvm_irq_routing_table *irq_rt,
-			 unsigned irqchip, unsigned pin)
+int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin)
 {
+	struct kvm_irq_routing_table *irq_rt;
+
+	irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
 	return irq_rt->chip[irqchip][pin];
 }
 
 bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
 {
-	struct kvm_irq_routing_table *irq_rt;
 	struct kvm_irq_ack_notifier *kian;
 	int gsi, idx;
 
 	idx = srcu_read_lock(&kvm->irq_srcu);
-	irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
-	gsi = kvm_irq_map_chip_pin(irq_rt, irqchip, pin);
+	gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
 	if (gsi != -1)
 		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
 					 link)
@@ -78,15 +92,13 @@ EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
 
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
 {
-	struct kvm_irq_routing_table *irq_rt;
 	struct kvm_irq_ack_notifier *kian;
 	int gsi, idx;
 
 	trace_kvm_ack_irq(irqchip, pin);
 
 	idx = srcu_read_lock(&kvm->irq_srcu);
-	irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
-	gsi = kvm_irq_map_chip_pin(irq_rt, irqchip, pin);
+	gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
 	if (gsi != -1)
 		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
 					 link)
@@ -143,7 +155,6 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
 {
 	struct kvm_kernel_irq_routing_entry irq_set[KVM_NR_IRQCHIPS];
 	int ret = -1, i, idx;
-	struct kvm_irq_routing_table *irq_rt;
 
 	trace_kvm_set_irq(irq, level, irq_source_id);
 
@@ -152,8 +163,7 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
 	 * writes to the unused one.
 	 */
 	idx = srcu_read_lock(&kvm->irq_srcu);
-	irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
-	i = kvm_irq_map_gsi(irq_set, irq_rt, irq);
+	i = kvm_irq_map_gsi(kvm, irq_set, irq);
 	srcu_read_unlock(&kvm->irq_srcu, idx);
 
 	while(i--) {
@@ -250,7 +260,8 @@ int kvm_set_irq_routing(struct kvm *kvm,
 
 	mutex_lock(&kvm->irq_lock);
 	old = kvm->irq_routing;
-	kvm_irq_routing_update(kvm, new);
+	rcu_assign_pointer(kvm->irq_routing, new);
+	kvm_irq_routing_update(kvm);
 	mutex_unlock(&kvm->irq_lock);
 
 	synchronize_srcu_expedited(&kvm->irq_srcu);
-- 
cgit v1.2.3


From 297e21053a52f060944e9f0de4c64fad9bcd72fc Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 30 Jun 2014 20:51:13 +1000
Subject: KVM: Give IRQFD its own separate enabling Kconfig option

Currently, the IRQFD code is conditional on CONFIG_HAVE_KVM_IRQ_ROUTING.
So that we can have the IRQFD code compiled in without having the
IRQ routing code, this creates a new CONFIG_HAVE_KVM_IRQFD, makes
the IRQFD code conditional on it instead of CONFIG_HAVE_KVM_IRQ_ROUTING,
and makes all the platforms that currently select HAVE_KVM_IRQ_ROUTING
also select HAVE_KVM_IRQFD.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Tested-by: Eric Auger <eric.auger@linaro.org>
Tested-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/ia64/kvm/Kconfig    | 1 +
 arch/powerpc/kvm/Kconfig | 1 +
 arch/s390/kvm/Kconfig    | 1 +
 arch/x86/kvm/Kconfig     | 1 +
 include/linux/kvm_host.h | 8 ++++----
 virt/kvm/Kconfig         | 3 +++
 virt/kvm/eventfd.c       | 6 +++---
 virt/kvm/kvm_main.c      | 2 +-
 8 files changed, 15 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig
index 990b86420cc6..3d50ea955c4c 100644
--- a/arch/ia64/kvm/Kconfig
+++ b/arch/ia64/kvm/Kconfig
@@ -25,6 +25,7 @@ config KVM
 	select PREEMPT_NOTIFIERS
 	select ANON_INODES
 	select HAVE_KVM_IRQCHIP
+	select HAVE_KVM_IRQFD
 	select HAVE_KVM_IRQ_ROUTING
 	select KVM_APIC_ARCHITECTURE
 	select KVM_MMIO
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 8f104a6879f0..d4741dba91af 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -158,6 +158,7 @@ config KVM_MPIC
 	bool "KVM in-kernel MPIC emulation"
 	depends on KVM && E500
 	select HAVE_KVM_IRQCHIP
+	select HAVE_KVM_IRQFD
 	select HAVE_KVM_IRQ_ROUTING
 	select HAVE_KVM_MSI
 	help
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index 10d529ac9821..646db9c467d1 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -26,6 +26,7 @@ config KVM
 	select KVM_ASYNC_PF
 	select KVM_ASYNC_PF_SYNC
 	select HAVE_KVM_IRQCHIP
+	select HAVE_KVM_IRQFD
 	select HAVE_KVM_IRQ_ROUTING
 	---help---
 	  Support hosting paravirtualized guest machines using the SIE
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 287e4c85fff9..f9d16ff56c6b 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -27,6 +27,7 @@ config KVM
 	select MMU_NOTIFIER
 	select ANON_INODES
 	select HAVE_KVM_IRQCHIP
+	select HAVE_KVM_IRQFD
 	select HAVE_KVM_IRQ_ROUTING
 	select HAVE_KVM_EVENTFD
 	select KVM_APIC_ARCHITECTURE
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ddd33e1aeee1..8593d2e61cbf 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -437,7 +437,7 @@ void kvm_vcpu_uninit(struct kvm_vcpu *vcpu);
 int __must_check vcpu_load(struct kvm_vcpu *vcpu);
 void vcpu_put(struct kvm_vcpu *vcpu);
 
-#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
+#ifdef CONFIG_HAVE_KVM_IRQFD
 int kvm_irqfd_init(void);
 void kvm_irqfd_exit(void);
 #else
@@ -932,20 +932,20 @@ int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e,
 			  const struct kvm_irq_routing_entry *ue);
 void kvm_free_irq_routing(struct kvm *kvm);
 
-int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi);
-
 #else
 
 static inline void kvm_free_irq_routing(struct kvm *kvm) {}
 
 #endif
 
+int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi);
+
 #ifdef CONFIG_HAVE_KVM_EVENTFD
 
 void kvm_eventfd_init(struct kvm *kvm);
 int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args);
 
-#ifdef CONFIG_HAVE_KVM_IRQCHIP
+#ifdef CONFIG_HAVE_KVM_IRQFD
 int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args);
 void kvm_irqfd_release(struct kvm *kvm);
 void kvm_irq_routing_update(struct kvm *);
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index 13f2d19793e3..fc0c5e603eb4 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -6,6 +6,9 @@ config HAVE_KVM
 config HAVE_KVM_IRQCHIP
        bool
 
+config HAVE_KVM_IRQFD
+       bool
+
 config HAVE_KVM_IRQ_ROUTING
        bool
 
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 99957df69cf2..f5f61548f60d 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -39,7 +39,7 @@
 #include "irq.h"
 #include "iodev.h"
 
-#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
+#ifdef CONFIG_HAVE_KVM_IRQFD
 /*
  * --------------------------------------------------------------------
  * irqfd: Allows an fd to be used to inject an interrupt to the guest
@@ -450,7 +450,7 @@ out:
 void
 kvm_eventfd_init(struct kvm *kvm)
 {
-#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
+#ifdef CONFIG_HAVE_KVM_IRQFD
 	spin_lock_init(&kvm->irqfds.lock);
 	INIT_LIST_HEAD(&kvm->irqfds.items);
 	INIT_LIST_HEAD(&kvm->irqfds.resampler_list);
@@ -459,7 +459,7 @@ kvm_eventfd_init(struct kvm *kvm)
 	INIT_LIST_HEAD(&kvm->ioeventfds);
 }
 
-#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
+#ifdef CONFIG_HAVE_KVM_IRQFD
 /*
  * shutdown any irqfd's that match fd+gsi
  */
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 1b95cc926cfc..a69a623938b8 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2337,7 +2337,7 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
 #ifdef CONFIG_HAVE_KVM_MSI
 	case KVM_CAP_SIGNAL_MSI:
 #endif
-#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
+#ifdef CONFIG_HAVE_KVM_IRQFD
 	case KVM_CAP_IRQFD_RESAMPLE:
 #endif
 	case KVM_CAP_CHECK_EXTENSION_VM:
-- 
cgit v1.2.3


From 114840c3d29b9cbd867faa69595a2aee6f6b54a2 Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Sun, 1 Jun 2014 11:53:50 +0300
Subject: mlx4_core: Add support for secure-host and SMP firewall

Secure-host is the general term for the capability of a device
to protect itself and the subnet from malicious host software.

This is achieved by:
1. Not allowing un-trusted entities to access device configuration
   registers, directly (through pci_cr or pci_conf) and indirectly
   (through MADs).

2. Hiding M_Key from untrusted entities.

3. Preventing the modification of GUID0 by un-trusted entities

4. Not allowing drivers on untrusted hosts to receive nor to transmit
   packets over QP0 (SMP Firewall).

The secure-host capability depends on firmware handling all QP0
packets, and not passing these packets up to the driver. Any information
required by the driver for proper operation (e.g., SM lid) is passed
via events generated by the firmware while processing QP0 MADs.

Driver support mainly requires using the MAD_DEMUX FW command at startup,
where the feature is enabled/disabled through a procedure described in
the Mellanox HCA tools package.

Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>

[ Fix error path in mlx4_setup_hca to go to err_mcg_table_free. - Roland ]

Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/net/ethernet/mellanox/mlx4/cmd.c  |  9 +++
 drivers/net/ethernet/mellanox/mlx4/fw.c   | 91 ++++++++++++++++++++++++++++++-
 drivers/net/ethernet/mellanox/mlx4/main.c |  5 ++
 drivers/net/ethernet/mellanox/mlx4/mlx4.h |  1 +
 include/linux/mlx4/cmd.h                  |  7 +++
 include/linux/mlx4/device.h               |  1 +
 6 files changed, 113 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index 5d940a26055c..65a4a0f88ea0 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -1310,6 +1310,15 @@ static struct mlx4_cmd_info cmd_info[] = {
 		.verify = NULL,
 		.wrapper = mlx4_MAD_IFC_wrapper
 	},
+	{
+		.opcode = MLX4_CMD_MAD_DEMUX,
+		.has_inbox = false,
+		.has_outbox = false,
+		.out_is_imm = false,
+		.encode_slave_id = false,
+		.verify = NULL,
+		.wrapper = mlx4_CMD_EPERM_wrapper
+	},
 	{
 		.opcode = MLX4_CMD_QUERY_IF_STAT,
 		.has_inbox = false,
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 688e1eabab29..494753e44ae3 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -136,7 +136,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags)
 		[7] = "FSM (MAC anti-spoofing) support",
 		[8] = "Dynamic QP updates support",
 		[9] = "Device managed flow steering IPoIB support",
-		[10] = "TCP/IP offloads/flow-steering for VXLAN support"
+		[10] = "TCP/IP offloads/flow-steering for VXLAN support",
+		[11] = "MAD DEMUX (Secure-Host) support"
 	};
 	int i;
 
@@ -571,6 +572,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 #define QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET		0xa0
 #define QUERY_DEV_CAP_FW_REASSIGN_MAC		0x9d
 #define QUERY_DEV_CAP_VXLAN			0x9e
+#define QUERY_DEV_CAP_MAD_DEMUX_OFFSET		0xb0
 
 	dev_cap->flags2 = 0;
 	mailbox = mlx4_alloc_cmd_mailbox(dev);
@@ -748,6 +750,11 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 		MLX4_GET(dev_cap->max_counters, outbox,
 			 QUERY_DEV_CAP_MAX_COUNTERS_OFFSET);
 
+	MLX4_GET(field32, outbox,
+		 QUERY_DEV_CAP_MAD_DEMUX_OFFSET);
+	if (field32 & (1 << 0))
+		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_MAD_DEMUX;
+
 	MLX4_GET(field32, outbox, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET);
 	if (field32 & (1 << 16))
 		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_UPDATE_QP;
@@ -2016,3 +2023,85 @@ void mlx4_opreq_action(struct work_struct *work)
 out:
 	mlx4_free_cmd_mailbox(dev, mailbox);
 }
+
+static int mlx4_check_smp_firewall_active(struct mlx4_dev *dev,
+					  struct mlx4_cmd_mailbox *mailbox)
+{
+#define MLX4_CMD_MAD_DEMUX_SET_ATTR_OFFSET		0x10
+#define MLX4_CMD_MAD_DEMUX_GETRESP_ATTR_OFFSET		0x20
+#define MLX4_CMD_MAD_DEMUX_TRAP_ATTR_OFFSET		0x40
+#define MLX4_CMD_MAD_DEMUX_TRAP_REPRESS_ATTR_OFFSET	0x70
+
+	u32 set_attr_mask, getresp_attr_mask;
+	u32 trap_attr_mask, traprepress_attr_mask;
+
+	MLX4_GET(set_attr_mask, mailbox->buf,
+		 MLX4_CMD_MAD_DEMUX_SET_ATTR_OFFSET);
+	mlx4_dbg(dev, "SMP firewall set_attribute_mask = 0x%x\n",
+		 set_attr_mask);
+
+	MLX4_GET(getresp_attr_mask, mailbox->buf,
+		 MLX4_CMD_MAD_DEMUX_GETRESP_ATTR_OFFSET);
+	mlx4_dbg(dev, "SMP firewall getresp_attribute_mask = 0x%x\n",
+		 getresp_attr_mask);
+
+	MLX4_GET(trap_attr_mask, mailbox->buf,
+		 MLX4_CMD_MAD_DEMUX_TRAP_ATTR_OFFSET);
+	mlx4_dbg(dev, "SMP firewall trap_attribute_mask = 0x%x\n",
+		 trap_attr_mask);
+
+	MLX4_GET(traprepress_attr_mask, mailbox->buf,
+		 MLX4_CMD_MAD_DEMUX_TRAP_REPRESS_ATTR_OFFSET);
+	mlx4_dbg(dev, "SMP firewall traprepress_attribute_mask = 0x%x\n",
+		 traprepress_attr_mask);
+
+	if (set_attr_mask && getresp_attr_mask && trap_attr_mask &&
+	    traprepress_attr_mask)
+		return 1;
+
+	return 0;
+}
+
+int mlx4_config_mad_demux(struct mlx4_dev *dev)
+{
+	struct mlx4_cmd_mailbox *mailbox;
+	int secure_host_active;
+	int err;
+
+	/* Check if mad_demux is supported */
+	if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_MAD_DEMUX))
+		return 0;
+
+	mailbox = mlx4_alloc_cmd_mailbox(dev);
+	if (IS_ERR(mailbox)) {
+		mlx4_warn(dev, "Failed to allocate mailbox for cmd MAD_DEMUX");
+		return -ENOMEM;
+	}
+
+	/* Query mad_demux to find out which MADs are handled by internal sma */
+	err = mlx4_cmd_box(dev, 0, mailbox->dma, 0x01 /* subn mgmt class */,
+			   MLX4_CMD_MAD_DEMUX_QUERY_RESTR, MLX4_CMD_MAD_DEMUX,
+			   MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
+	if (err) {
+		mlx4_warn(dev, "MLX4_CMD_MAD_DEMUX: query restrictions failed (%d)\n",
+			  err);
+		goto out;
+	}
+
+	secure_host_active = mlx4_check_smp_firewall_active(dev, mailbox);
+
+	/* Config mad_demux to handle all MADs returned by the query above */
+	err = mlx4_cmd(dev, mailbox->dma, 0x01 /* subn mgmt class */,
+		       MLX4_CMD_MAD_DEMUX_CONFIG, MLX4_CMD_MAD_DEMUX,
+		       MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
+	if (err) {
+		mlx4_warn(dev, "MLX4_CMD_MAD_DEMUX: configure failed (%d)\n", err);
+		goto out;
+	}
+
+	if (secure_host_active)
+		mlx4_warn(dev, "HCA operating in secure-host mode. SMP firewall activated.\n");
+out:
+	mlx4_free_cmd_mailbox(dev, mailbox);
+	return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 82ab427290c3..f2c8e8ba23fe 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -1831,6 +1831,11 @@ static int mlx4_setup_hca(struct mlx4_dev *dev)
 			mlx4_err(dev, "Failed to initialize multicast group table, aborting\n");
 			goto err_mr_table_free;
 		}
+		err = mlx4_config_mad_demux(dev);
+		if (err) {
+			mlx4_err(dev, "Failed in config_mad_demux, aborting\n");
+			goto err_mcg_table_free;
+		}
 	}
 
 	err = mlx4_init_eq_table(dev);
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index 1d8af7336807..310b30b4682c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -1311,5 +1311,6 @@ void mlx4_init_quotas(struct mlx4_dev *dev);
 int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave, int port);
 /* Returns the VF index of slave */
 int mlx4_get_vf_indx(struct mlx4_dev *dev, int slave);
+int mlx4_config_mad_demux(struct mlx4_dev *dev);
 
 #endif /* MLX4_H */
diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h
index c8450366c130..379c02648ab3 100644
--- a/include/linux/mlx4/cmd.h
+++ b/include/linux/mlx4/cmd.h
@@ -116,6 +116,7 @@ enum {
 	/* special QP and management commands */
 	MLX4_CMD_CONF_SPECIAL_QP = 0x23,
 	MLX4_CMD_MAD_IFC	 = 0x24,
+	MLX4_CMD_MAD_DEMUX	 = 0x203,
 
 	/* multicast commands */
 	MLX4_CMD_READ_MCG	 = 0x25,
@@ -185,6 +186,12 @@ enum {
 	MLX4_SET_PORT_VXLAN	= 0xB
 };
 
+enum {
+	MLX4_CMD_MAD_DEMUX_CONFIG	= 0,
+	MLX4_CMD_MAD_DEMUX_QUERY_STATE	= 1,
+	MLX4_CMD_MAD_DEMUX_QUERY_RESTR	= 2, /* Query mad demux restrictions */
+};
+
 enum {
 	MLX4_CMD_WRAPPED,
 	MLX4_CMD_NATIVE
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 35b51e7af886..cee9561e8ef6 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -172,6 +172,7 @@ enum {
 	MLX4_DEV_CAP_FLAG2_UPDATE_QP		= 1LL <<  8,
 	MLX4_DEV_CAP_FLAG2_DMFS_IPOIB		= 1LL <<  9,
 	MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS	= 1LL <<  10,
+	MLX4_DEV_CAP_FLAG2_MAD_DEMUX		= 1LL <<  11,
 };
 
 enum {
-- 
cgit v1.2.3


From df5601f9c3d831b4c478b004a1ed90a18643adbe Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard@nod.at>
Date: Mon, 7 Oct 2013 15:37:19 +0200
Subject: tracehook_signal_handler: Remove sig, info, ka and regs

These parameters are nowhere used, so we can remove them.

Signed-off-by: Richard Weinberger <richard@nod.at>
---
 include/linux/tracehook.h | 8 +-------
 kernel/signal.c           | 2 +-
 2 files changed, 2 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 6f8ab7da27c4..84d497297c5f 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -133,10 +133,6 @@ static inline void tracehook_report_syscall_exit(struct pt_regs *regs, int step)
 
 /**
  * tracehook_signal_handler - signal handler setup is complete
- * @sig:		number of signal being delivered
- * @info:		siginfo_t of signal being delivered
- * @ka:			sigaction setting that chose the handler
- * @regs:		user register state
  * @stepping:		nonzero if debugger single-step or block-step in use
  *
  * Called by the arch code after a signal handler has been set up.
@@ -146,9 +142,7 @@ static inline void tracehook_report_syscall_exit(struct pt_regs *regs, int step)
  * Called without locks, shortly before returning to user mode
  * (or handling more signals).
  */
-static inline void tracehook_signal_handler(int sig, siginfo_t *info,
-					    const struct k_sigaction *ka,
-					    struct pt_regs *regs, int stepping)
+static inline void tracehook_signal_handler(int stepping)
 {
 	if (stepping)
 		ptrace_notify(SIGTRAP);
diff --git a/kernel/signal.c b/kernel/signal.c
index a4077e90f19f..c4d47661cc86 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2379,7 +2379,7 @@ void signal_delivered(int sig, siginfo_t *info, struct k_sigaction *ka,
 	if (!(ka->sa.sa_flags & SA_NODEFER))
 		sigaddset(&blocked, sig);
 	set_current_blocked(&blocked);
-	tracehook_signal_handler(sig, info, ka, regs, stepping);
+	tracehook_signal_handler(stepping);
 }
 
 void signal_setup_done(int failed, struct ksignal *ksig, int stepping)
-- 
cgit v1.2.3


From 10b1c7ac8bfed429cf3dcb0225482c8dc1485d8e Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard@nod.at>
Date: Sun, 13 Jul 2014 13:36:04 +0200
Subject: Clean up signal_delivered()

 - Pass a ksignal struct to it
 - Remove unused regs parameter
 - Make it private as it's nowhere outside of kernel/signal.c is used

Signed-off-by: Richard Weinberger <richard@nod.at>
---
 include/linux/signal.h |  1 -
 kernel/signal.c        | 21 ++++++++-------------
 2 files changed, 8 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/signal.h b/include/linux/signal.h
index c9e65360c49a..b005cc3dc1dc 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -282,7 +282,6 @@ struct ksignal {
 
 extern int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka, struct pt_regs *regs, void *cookie);
 extern void signal_setup_done(int failed, struct ksignal *ksig, int stepping);
-extern void signal_delivered(int sig, siginfo_t *info, struct k_sigaction *ka, struct pt_regs *regs, int stepping);
 extern void exit_signals(struct task_struct *tsk);
 extern void kernel_sigaction(int, __sighandler_t);
 
diff --git a/kernel/signal.c b/kernel/signal.c
index c4d47661cc86..0d75cf875d44 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2353,19 +2353,15 @@ relock:
 
 /**
  * signal_delivered - 
- * @sig:		number of signal being delivered
- * @info:		siginfo_t of signal being delivered
- * @ka:			sigaction setting that chose the handler
- * @regs:		user register state
+ * @ksig:		kernel signal struct
  * @stepping:		nonzero if debugger single-step or block-step in use
  *
  * This function should be called when a signal has successfully been
- * delivered. It updates the blocked signals accordingly (@ka->sa.sa_mask
+ * delivered. It updates the blocked signals accordingly (@ksig->ka.sa.sa_mask
  * is always blocked, and the signal itself is blocked unless %SA_NODEFER
- * is set in @ka->sa.sa_flags.  Tracing is notified.
+ * is set in @ksig->ka.sa.sa_flags.  Tracing is notified.
  */
-void signal_delivered(int sig, siginfo_t *info, struct k_sigaction *ka,
-			struct pt_regs *regs, int stepping)
+static void signal_delivered(struct ksignal *ksig, int stepping)
 {
 	sigset_t blocked;
 
@@ -2375,9 +2371,9 @@ void signal_delivered(int sig, siginfo_t *info, struct k_sigaction *ka,
 	   simply clear the restore sigmask flag.  */
 	clear_restore_sigmask();
 
-	sigorsets(&blocked, &current->blocked, &ka->sa.sa_mask);
-	if (!(ka->sa.sa_flags & SA_NODEFER))
-		sigaddset(&blocked, sig);
+	sigorsets(&blocked, &current->blocked, &ksig->ka.sa.sa_mask);
+	if (!(ksig->ka.sa.sa_flags & SA_NODEFER))
+		sigaddset(&blocked, ksig->sig);
 	set_current_blocked(&blocked);
 	tracehook_signal_handler(stepping);
 }
@@ -2387,8 +2383,7 @@ void signal_setup_done(int failed, struct ksignal *ksig, int stepping)
 	if (failed)
 		force_sigsegv(ksig->sig, current);
 	else
-		signal_delivered(ksig->sig, &ksig->info, &ksig->ka,
-			signal_pt_regs(), stepping);
+		signal_delivered(ksig, stepping);
 }
 
 /*
-- 
cgit v1.2.3


From 828b1f65d23cf8a68795739f6dd08fc8abd9ee64 Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard@nod.at>
Date: Mon, 7 Oct 2013 15:26:57 +0200
Subject: Rip out get_signal_to_deliver()

Now we can turn get_signal() to the main function.

Signed-off-by: Richard Weinberger <richard@nod.at>
---
 include/linux/signal.h | 14 +-------------
 kernel/signal.c        | 23 ++++++++++++-----------
 2 files changed, 13 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/signal.h b/include/linux/signal.h
index b005cc3dc1dc..750196fcc0a5 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -280,7 +280,7 @@ struct ksignal {
 	int sig;
 };
 
-extern int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka, struct pt_regs *regs, void *cookie);
+extern int get_signal(struct ksignal *ksig);
 extern void signal_setup_done(int failed, struct ksignal *ksig, int stepping);
 extern void exit_signals(struct task_struct *tsk);
 extern void kernel_sigaction(int, __sighandler_t);
@@ -300,18 +300,6 @@ static inline void disallow_signal(int sig)
 	kernel_sigaction(sig, SIG_IGN);
 }
 
-/*
- * Eventually that'll replace get_signal_to_deliver(); macro for now,
- * to avoid nastiness with include order.
- */
-#define get_signal(ksig)					\
-({								\
-	struct ksignal *p = (ksig);				\
-	p->sig = get_signal_to_deliver(&p->info, &p->ka,	\
-					signal_pt_regs(), NULL);\
-	p->sig > 0;						\
-})
-
 extern struct kmem_cache *sighand_cachep;
 
 int unhandled_signal(struct task_struct *tsk, int sig);
diff --git a/kernel/signal.c b/kernel/signal.c
index 0d75cf875d44..5c6020040388 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2166,8 +2166,7 @@ static int ptrace_signal(int signr, siginfo_t *info)
 	return signr;
 }
 
-int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka,
-			  struct pt_regs *regs, void *cookie)
+int get_signal(struct ksignal *ksig)
 {
 	struct sighand_struct *sighand = current->sighand;
 	struct signal_struct *signal = current->signal;
@@ -2237,13 +2236,13 @@ relock:
 			goto relock;
 		}
 
-		signr = dequeue_signal(current, &current->blocked, info);
+		signr = dequeue_signal(current, &current->blocked, &ksig->info);
 
 		if (!signr)
 			break; /* will return 0 */
 
 		if (unlikely(current->ptrace) && signr != SIGKILL) {
-			signr = ptrace_signal(signr, info);
+			signr = ptrace_signal(signr, &ksig->info);
 			if (!signr)
 				continue;
 		}
@@ -2251,13 +2250,13 @@ relock:
 		ka = &sighand->action[signr-1];
 
 		/* Trace actually delivered signals. */
-		trace_signal_deliver(signr, info, ka);
+		trace_signal_deliver(signr, &ksig->info, ka);
 
 		if (ka->sa.sa_handler == SIG_IGN) /* Do nothing.  */
 			continue;
 		if (ka->sa.sa_handler != SIG_DFL) {
 			/* Run the handler.  */
-			*return_ka = *ka;
+			ksig->ka = *ka;
 
 			if (ka->sa.sa_flags & SA_ONESHOT)
 				ka->sa.sa_handler = SIG_DFL;
@@ -2307,7 +2306,7 @@ relock:
 				spin_lock_irq(&sighand->siglock);
 			}
 
-			if (likely(do_signal_stop(info->si_signo))) {
+			if (likely(do_signal_stop(ksig->info.si_signo))) {
 				/* It released the siglock.  */
 				goto relock;
 			}
@@ -2328,7 +2327,7 @@ relock:
 
 		if (sig_kernel_coredump(signr)) {
 			if (print_fatal_signals)
-				print_fatal_signal(info->si_signo);
+				print_fatal_signal(ksig->info.si_signo);
 			proc_coredump_connector(current);
 			/*
 			 * If it was able to dump core, this kills all
@@ -2338,17 +2337,19 @@ relock:
 			 * first and our do_group_exit call below will use
 			 * that value and ignore the one we pass it.
 			 */
-			do_coredump(info);
+			do_coredump(&ksig->info);
 		}
 
 		/*
 		 * Death signals, no core dump.
 		 */
-		do_group_exit(info->si_signo);
+		do_group_exit(ksig->info.si_signo);
 		/* NOTREACHED */
 	}
 	spin_unlock_irq(&sighand->siglock);
-	return signr;
+
+	ksig->sig = signr;
+	return ksig->sig > 0;
 }
 
 /**
-- 
cgit v1.2.3


From 72f15c03977acc8f06080e6c8a91d93bfc655a65 Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard@nod.at>
Date: Wed, 5 Mar 2014 15:15:22 +0100
Subject: sas_ss_flags: Remove nested ternary if

...to make it readable.

Signed-off-by: Richard Weinberger <richard@nod.at>
---
 include/linux/sched.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0376b054a0d0..795ea2bc3d4f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2360,8 +2360,10 @@ static inline int on_sig_stack(unsigned long sp)
 
 static inline int sas_ss_flags(unsigned long sp)
 {
-	return (current->sas_ss_size == 0 ? SS_DISABLE
-		: on_sig_stack(sp) ? SS_ONSTACK : 0);
+	if (!current->sas_ss_size)
+		return SS_DISABLE;
+
+	return on_sig_stack(sp) ? SS_ONSTACK : 0;
 }
 
 static inline unsigned long sigsp(unsigned long sp, struct ksignal *ksig)
-- 
cgit v1.2.3


From c77dcacb397519b6ade8f08201a4a90a7f4f751e Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 6 Aug 2014 14:24:45 +0200
Subject: KVM: Move more code under CONFIG_HAVE_KVM_IRQFD

Commits e4d57e1ee1ab (KVM: Move irq notifier implementation into
eventfd.c, 2014-06-30) included the irq notifier code unconditionally
in eventfd.c, while it was under CONFIG_HAVE_KVM_IRQCHIP before.

Similarly, commit 297e21053a52 (KVM: Give IRQFD its own separate enabling
Kconfig option, 2014-06-30) moved code from CONFIG_HAVE_IRQ_ROUTING
to CONFIG_HAVE_KVM_IRQFD but forgot to move the pieces that used to be
under CONFIG_HAVE_KVM_IRQCHIP.

Together, this broke compilation without CONFIG_KVM_XICS.  Fix by adding
or changing the #ifdefs so that they point at CONFIG_HAVE_KVM_IRQFD.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h   |   2 +
 include/trace/events/kvm.h |   8 +--
 virt/kvm/eventfd.c         | 122 ++++++++++++++++++++++-----------------------
 virt/kvm/kvm_main.c        |   2 +
 4 files changed, 69 insertions(+), 65 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 8593d2e61cbf..a4c33b34fe3f 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -388,6 +388,8 @@ struct kvm {
 	 */
 	struct kvm_irq_routing_table __rcu *irq_routing;
 	struct hlist_head mask_notifier_list;
+#endif
+#ifdef CONFIG_HAVE_KVM_IRQFD
 	struct hlist_head irq_ack_notifier_list;
 #endif
 
diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
index 131a0bda7aec..908925ace776 100644
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -37,7 +37,7 @@ TRACE_EVENT(kvm_userspace_exit,
 		  __entry->errno < 0 ? -__entry->errno : __entry->reason)
 );
 
-#if defined(CONFIG_HAVE_KVM_IRQCHIP)
+#if defined(CONFIG_HAVE_KVM_IRQFD)
 TRACE_EVENT(kvm_set_irq,
 	TP_PROTO(unsigned int gsi, int level, int irq_source_id),
 	TP_ARGS(gsi, level, irq_source_id),
@@ -57,7 +57,7 @@ TRACE_EVENT(kvm_set_irq,
 	TP_printk("gsi %u level %d source %d",
 		  __entry->gsi, __entry->level, __entry->irq_source_id)
 );
-#endif
+#endif /* defined(CONFIG_HAVE_KVM_IRQFD) */
 
 #if defined(__KVM_HAVE_IOAPIC)
 #define kvm_deliver_mode		\
@@ -124,7 +124,7 @@ TRACE_EVENT(kvm_msi_set_irq,
 
 #endif /* defined(__KVM_HAVE_IOAPIC) */
 
-#if defined(CONFIG_HAVE_KVM_IRQCHIP)
+#if defined(CONFIG_HAVE_KVM_IRQFD)
 
 TRACE_EVENT(kvm_ack_irq,
 	TP_PROTO(unsigned int irqchip, unsigned int pin),
@@ -149,7 +149,7 @@ TRACE_EVENT(kvm_ack_irq,
 #endif
 );
 
-#endif /* defined(CONFIG_HAVE_KVM_IRQCHIP) */
+#endif /* defined(CONFIG_HAVE_KVM_IRQFD) */
 
 
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index f5f61548f60d..3c5981c87c3f 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -445,6 +445,67 @@ out:
 	kfree(irqfd);
 	return ret;
 }
+
+bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
+{
+	struct kvm_irq_ack_notifier *kian;
+	int gsi, idx;
+
+	idx = srcu_read_lock(&kvm->irq_srcu);
+	gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
+	if (gsi != -1)
+		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
+					 link)
+			if (kian->gsi == gsi) {
+				srcu_read_unlock(&kvm->irq_srcu, idx);
+				return true;
+			}
+
+	srcu_read_unlock(&kvm->irq_srcu, idx);
+
+	return false;
+}
+EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
+
+void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
+{
+	struct kvm_irq_ack_notifier *kian;
+	int gsi, idx;
+
+	trace_kvm_ack_irq(irqchip, pin);
+
+	idx = srcu_read_lock(&kvm->irq_srcu);
+	gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
+	if (gsi != -1)
+		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
+					 link)
+			if (kian->gsi == gsi)
+				kian->irq_acked(kian);
+	srcu_read_unlock(&kvm->irq_srcu, idx);
+}
+
+void kvm_register_irq_ack_notifier(struct kvm *kvm,
+				   struct kvm_irq_ack_notifier *kian)
+{
+	mutex_lock(&kvm->irq_lock);
+	hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
+	mutex_unlock(&kvm->irq_lock);
+#ifdef __KVM_HAVE_IOAPIC
+	kvm_vcpu_request_scan_ioapic(kvm);
+#endif
+}
+
+void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
+				    struct kvm_irq_ack_notifier *kian)
+{
+	mutex_lock(&kvm->irq_lock);
+	hlist_del_init_rcu(&kian->link);
+	mutex_unlock(&kvm->irq_lock);
+	synchronize_srcu(&kvm->irq_srcu);
+#ifdef __KVM_HAVE_IOAPIC
+	kvm_vcpu_request_scan_ioapic(kvm);
+#endif
+}
 #endif
 
 void
@@ -867,64 +928,3 @@ kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 
 	return kvm_assign_ioeventfd(kvm, args);
 }
-
-bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
-{
-	struct kvm_irq_ack_notifier *kian;
-	int gsi, idx;
-
-	idx = srcu_read_lock(&kvm->irq_srcu);
-	gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
-	if (gsi != -1)
-		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
-					 link)
-			if (kian->gsi == gsi) {
-				srcu_read_unlock(&kvm->irq_srcu, idx);
-				return true;
-			}
-
-	srcu_read_unlock(&kvm->irq_srcu, idx);
-
-	return false;
-}
-EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
-
-void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
-{
-	struct kvm_irq_ack_notifier *kian;
-	int gsi, idx;
-
-	trace_kvm_ack_irq(irqchip, pin);
-
-	idx = srcu_read_lock(&kvm->irq_srcu);
-	gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
-	if (gsi != -1)
-		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
-					 link)
-			if (kian->gsi == gsi)
-				kian->irq_acked(kian);
-	srcu_read_unlock(&kvm->irq_srcu, idx);
-}
-
-void kvm_register_irq_ack_notifier(struct kvm *kvm,
-				   struct kvm_irq_ack_notifier *kian)
-{
-	mutex_lock(&kvm->irq_lock);
-	hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
-	mutex_unlock(&kvm->irq_lock);
-#ifdef __KVM_HAVE_IOAPIC
-	kvm_vcpu_request_scan_ioapic(kvm);
-#endif
-}
-
-void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
-				    struct kvm_irq_ack_notifier *kian)
-{
-	mutex_lock(&kvm->irq_lock);
-	hlist_del_init_rcu(&kian->link);
-	mutex_unlock(&kvm->irq_lock);
-	synchronize_srcu(&kvm->irq_srcu);
-#ifdef __KVM_HAVE_IOAPIC
-	kvm_vcpu_request_scan_ioapic(kvm);
-#endif
-}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index a69a623938b8..33712fb26eb1 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -465,6 +465,8 @@ static struct kvm *kvm_create_vm(unsigned long type)
 
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
 	INIT_HLIST_HEAD(&kvm->mask_notifier_list);
+#endif
+#ifdef CONFIG_HAVE_KVM_IRQFD
 	INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list);
 #endif
 
-- 
cgit v1.2.3


From 372ba8cb46b271a7662b92cbefedee56725f6bd0 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Wed, 6 Aug 2014 14:19:21 +0100
Subject: cpuidle: menu: Lookup CPU runqueues less

The menu governer makes separate lookups of the CPU runqueue to get
load and number of IO waiters but it can be done with a single lookup.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/governors/menu.c | 17 +++++++----------
 include/linux/sched.h            |  3 +--
 kernel/sched/core.c              |  7 +++++++
 kernel/sched/proc.c              |  7 -------
 4 files changed, 15 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index f55d8260ec43..27702742b319 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -134,12 +134,9 @@ struct menu_device {
 #define LOAD_INT(x) ((x) >> FSHIFT)
 #define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
 
-static int get_loadavg(void)
+static inline int get_loadavg(unsigned long load)
 {
-	unsigned long this = this_cpu_load();
-
-
-	return LOAD_INT(this) * 10 + LOAD_FRAC(this) / 10;
+	return LOAD_INT(load) * 10 + LOAD_FRAC(load) / 10;
 }
 
 static inline int which_bucket(unsigned int duration, unsigned long nr_iowaiters)
@@ -175,13 +172,13 @@ static inline int which_bucket(unsigned int duration, unsigned long nr_iowaiters
  * to be, the higher this multiplier, and thus the higher
  * the barrier to go to an expensive C state.
  */
-static inline int performance_multiplier(unsigned long nr_iowaiters)
+static inline int performance_multiplier(unsigned long nr_iowaiters, unsigned long load)
 {
 	int mult = 1;
 
 	/* for higher loadavg, we are more reluctant */
 
-	mult += 2 * get_loadavg();
+	mult += 2 * get_loadavg(load);
 
 	/* for IO wait tasks (per cpu!) we add 5x each */
 	mult += 10 * nr_iowaiters;
@@ -296,7 +293,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 	int latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY);
 	int i;
 	unsigned int interactivity_req;
-	unsigned long nr_iowaiters;
+	unsigned long nr_iowaiters, cpu_load;
 
 	if (data->needs_update) {
 		menu_update(drv, dev);
@@ -312,7 +309,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 	/* determine the expected residency time, round up */
 	data->next_timer_us = ktime_to_us(tick_nohz_get_sleep_length());
 
-	nr_iowaiters = nr_iowait_cpu(smp_processor_id());
+	get_iowait_load(&nr_iowaiters, &cpu_load);
 	data->bucket = which_bucket(data->next_timer_us, nr_iowaiters);
 
 	/*
@@ -331,7 +328,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 	 * duration / latency ratio. Adjust the latency limit if
 	 * necessary.
 	 */
-	interactivity_req = data->predicted_us / performance_multiplier(nr_iowaiters);
+	interactivity_req = data->predicted_us / performance_multiplier(nr_iowaiters, cpu_load);
 	if (latency_req > interactivity_req)
 		latency_req = interactivity_req;
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 306f4f0c987a..641bd954bb5d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -168,8 +168,7 @@ extern int nr_processes(void);
 extern unsigned long nr_running(void);
 extern unsigned long nr_iowait(void);
 extern unsigned long nr_iowait_cpu(int cpu);
-extern unsigned long this_cpu_load(void);
-
+extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load);
 
 extern void calc_global_load(unsigned long ticks);
 extern void update_cpu_load_nohz(void);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 3bdf01b494fe..863ef1d19563 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2385,6 +2385,13 @@ unsigned long nr_iowait_cpu(int cpu)
 	return atomic_read(&this->nr_iowait);
 }
 
+void get_iowait_load(unsigned long *nr_waiters, unsigned long *load)
+{
+	struct rq *this = this_rq();
+	*nr_waiters = atomic_read(&this->nr_iowait);
+	*load = this->cpu_load[0];
+}
+
 #ifdef CONFIG_SMP
 
 /*
diff --git a/kernel/sched/proc.c b/kernel/sched/proc.c
index 16f5a30f9c88..8ecd552fe4f2 100644
--- a/kernel/sched/proc.c
+++ b/kernel/sched/proc.c
@@ -8,13 +8,6 @@
 
 #include "sched.h"
 
-unsigned long this_cpu_load(void)
-{
-	struct rq *this = this_rq();
-	return this->cpu_load[0];
-}
-
-
 /*
  * Global load-average calculations
  *
-- 
cgit v1.2.3


From 8ba8fa917093510cdcb4ec8ff8b9603e1b525658 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 6 Aug 2014 16:03:26 -0700
Subject: fsnotify: rename event handling functions

Rename fsnotify_add_notify_event() to fsnotify_add_event() since the
"notify" part is duplicit.  Rename fsnotify_remove_notify_event() and
fsnotify_peek_notify_event() to fsnotify_remove_first_event() and
fsnotify_peek_first_event() respectively since "notify" part is duplicit
and they really look at the first event in the queue.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Jan Kara <jack@suse.cz>
Cc: Eric Paris <eparis@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/notify/fanotify/fanotify.c        |  2 +-
 fs/notify/fanotify/fanotify_user.c   |  2 +-
 fs/notify/inotify/inotify_fsnotify.c |  2 +-
 fs/notify/inotify/inotify_user.c     |  4 ++--
 fs/notify/notification.c             | 19 ++++++++++---------
 include/linux/fsnotify_backend.h     | 12 ++++++------
 6 files changed, 21 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index ee9cb3795c2b..fdeb36b70c65 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -210,7 +210,7 @@ static int fanotify_handle_event(struct fsnotify_group *group,
 		return -ENOMEM;
 
 	fsn_event = &event->fse;
-	ret = fsnotify_add_notify_event(group, fsn_event, fanotify_merge);
+	ret = fsnotify_add_event(group, fsn_event, fanotify_merge);
 	if (ret) {
 		/* Permission events shouldn't be merged */
 		BUG_ON(ret == 1 && mask & FAN_ALL_PERM_EVENTS);
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 3fdc8a3e1134..fbf2210823ab 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -66,7 +66,7 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
 
 	/* held the notification_mutex the whole time, so this is the
 	 * same event we peeked above */
-	return fsnotify_remove_notify_event(group);
+	return fsnotify_remove_first_event(group);
 }
 
 static int create_fd(struct fsnotify_group *group,
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 43ab1e1a07a2..0f88bc0b4e6c 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -108,7 +108,7 @@ int inotify_handle_event(struct fsnotify_group *group,
 	if (len)
 		strcpy(event->name, file_name);
 
-	ret = fsnotify_add_notify_event(group, fsn_event, inotify_merge);
+	ret = fsnotify_add_event(group, fsn_event, inotify_merge);
 	if (ret) {
 		/* Our event wasn't used in the end. Free it. */
 		fsnotify_destroy_event(group, fsn_event);
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index cc423a30a0c8..daf76652fe58 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -149,7 +149,7 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
 	if (fsnotify_notify_queue_is_empty(group))
 		return NULL;
 
-	event = fsnotify_peek_notify_event(group);
+	event = fsnotify_peek_first_event(group);
 
 	pr_debug("%s: group=%p event=%p\n", __func__, group, event);
 
@@ -159,7 +159,7 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
 
 	/* held the notification_mutex the whole time, so this is the
 	 * same event we peeked above */
-	fsnotify_remove_notify_event(group);
+	fsnotify_remove_first_event(group);
 
 	return event;
 }
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index 1e58402171a5..1d394220acbe 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -83,10 +83,10 @@ void fsnotify_destroy_event(struct fsnotify_group *group,
  * added to the queue, 1 if the event was merged with some other queued event,
  * 2 if the queue of events has overflown.
  */
-int fsnotify_add_notify_event(struct fsnotify_group *group,
-			      struct fsnotify_event *event,
-			      int (*merge)(struct list_head *,
-					   struct fsnotify_event *))
+int fsnotify_add_event(struct fsnotify_group *group,
+		       struct fsnotify_event *event,
+		       int (*merge)(struct list_head *,
+				    struct fsnotify_event *))
 {
 	int ret = 0;
 	struct list_head *list = &group->notification_list;
@@ -128,7 +128,7 @@ queue:
  * Remove and return the first event from the notification list.  It is the
  * responsibility of the caller to destroy the obtained event
  */
-struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group)
+struct fsnotify_event *fsnotify_remove_first_event(struct fsnotify_group *group)
 {
 	struct fsnotify_event *event;
 
@@ -140,7 +140,7 @@ struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group
 				 struct fsnotify_event, list);
 	/*
 	 * We need to init list head for the case of overflow event so that
-	 * check in fsnotify_add_notify_events() works
+	 * check in fsnotify_add_event() works
 	 */
 	list_del_init(&event->list);
 	group->q_len--;
@@ -149,9 +149,10 @@ struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group
 }
 
 /*
- * This will not remove the event, that must be done with fsnotify_remove_notify_event()
+ * This will not remove the event, that must be done with
+ * fsnotify_remove_first_event()
  */
-struct fsnotify_event *fsnotify_peek_notify_event(struct fsnotify_group *group)
+struct fsnotify_event *fsnotify_peek_first_event(struct fsnotify_group *group)
 {
 	BUG_ON(!mutex_is_locked(&group->notification_mutex));
 
@@ -169,7 +170,7 @@ void fsnotify_flush_notify(struct fsnotify_group *group)
 
 	mutex_lock(&group->notification_mutex);
 	while (!fsnotify_notify_queue_is_empty(group)) {
-		event = fsnotify_remove_notify_event(group);
+		event = fsnotify_remove_first_event(group);
 		fsnotify_destroy_event(group, event);
 	}
 	mutex_unlock(&group->notification_mutex);
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index fc7718c6bd3e..a6e899943363 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -322,16 +322,16 @@ extern int fsnotify_fasync(int fd, struct file *file, int on);
 extern void fsnotify_destroy_event(struct fsnotify_group *group,
 				   struct fsnotify_event *event);
 /* attach the event to the group notification queue */
-extern int fsnotify_add_notify_event(struct fsnotify_group *group,
-				     struct fsnotify_event *event,
-				     int (*merge)(struct list_head *,
-						  struct fsnotify_event *));
+extern int fsnotify_add_event(struct fsnotify_group *group,
+			      struct fsnotify_event *event,
+			      int (*merge)(struct list_head *,
+					   struct fsnotify_event *));
 /* true if the group notification queue is empty */
 extern bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group);
 /* return, but do not dequeue the first event on the notification queue */
-extern struct fsnotify_event *fsnotify_peek_notify_event(struct fsnotify_group *group);
+extern struct fsnotify_event *fsnotify_peek_first_event(struct fsnotify_group *group);
 /* return AND dequeue the first event on the notification queue */
-extern struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group);
+extern struct fsnotify_event *fsnotify_remove_first_event(struct fsnotify_group *group);
 
 /* functions used to manipulate the marks attached to inodes */
 
-- 
cgit v1.2.3


From 5838d4442bd5971687b72221736222637e03140d Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 6 Aug 2014 16:03:28 -0700
Subject: fanotify: fix double free of pending permission events

Commit 85816794240b ("fanotify: Fix use after free for permission
events") introduced a double free issue for permission events which are
pending in group's notification queue while group is being destroyed.
These events are freed from fanotify_handle_event() but they are not
removed from groups notification queue and thus they get freed again
from fsnotify_flush_notify().

Fix the problem by removing permission events from notification queue
before freeing them if we skip processing access response.  Also expand
comments in fanotify_release() to explain group shutdown in detail.

Fixes: 85816794240b9659e66e4d9b0df7c6e814e5f603
Signed-off-by: Jan Kara <jack@suse.cz>
Reported-by: Douglas Leeder <douglas.leeder@sophos.com>
Tested-by: Douglas Leeder <douglas.leeder@sophos.com>
Reported-by: Heinrich Schuchard <xypron.glpk@gmx.de>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/notify/fanotify/fanotify.c      |  9 ++++++++-
 fs/notify/fanotify/fanotify_user.c | 12 ++++++++++++
 fs/notify/notification.c           | 18 +++++++++++++++++-
 include/linux/fsnotify_backend.h   |  2 ++
 4 files changed, 39 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index fdeb36b70c65..30d3addfad75 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -70,8 +70,15 @@ static int fanotify_get_response(struct fsnotify_group *group,
 	wait_event(group->fanotify_data.access_waitq, event->response ||
 				atomic_read(&group->fanotify_data.bypass_perm));
 
-	if (!event->response) /* bypass_perm set */
+	if (!event->response) {	/* bypass_perm set */
+		/*
+		 * Event was canceled because group is being destroyed. Remove
+		 * it from group's event list because we are responsible for
+		 * freeing the permission event.
+		 */
+		fsnotify_remove_event(group, &event->fae.fse);
 		return 0;
+	}
 
 	/* userspace responded, convert to something usable */
 	switch (event->response) {
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index fbf2210823ab..b13992a41bd9 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -359,6 +359,11 @@ static int fanotify_release(struct inode *ignored, struct file *file)
 #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
 	struct fanotify_perm_event_info *event, *next;
 
+	/*
+	 * There may be still new events arriving in the notification queue
+	 * but since userspace cannot use fanotify fd anymore, no event can
+	 * enter or leave access_list by now.
+	 */
 	spin_lock(&group->fanotify_data.access_lock);
 
 	atomic_inc(&group->fanotify_data.bypass_perm);
@@ -373,6 +378,13 @@ static int fanotify_release(struct inode *ignored, struct file *file)
 	}
 	spin_unlock(&group->fanotify_data.access_lock);
 
+	/*
+	 * Since bypass_perm is set, newly queued events will not wait for
+	 * access response. Wake up the already sleeping ones now.
+	 * synchronize_srcu() in fsnotify_destroy_group() will wait for all
+	 * processes sleeping in fanotify_handle_event() waiting for access
+	 * response and thus also for all permission events to be freed.
+	 */
 	wake_up(&group->fanotify_data.access_waitq);
 #endif
 
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index 1d394220acbe..a95d8e037aeb 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -73,7 +73,8 @@ void fsnotify_destroy_event(struct fsnotify_group *group,
 	/* Overflow events are per-group and we don't want to free them */
 	if (!event || event->mask == FS_Q_OVERFLOW)
 		return;
-
+	/* If the event is still queued, we have a problem... */
+	WARN_ON(!list_empty(&event->list));
 	group->ops->free_event(event);
 }
 
@@ -124,6 +125,21 @@ queue:
 	return ret;
 }
 
+/*
+ * Remove @event from group's notification queue. It is the responsibility of
+ * the caller to destroy the event.
+ */
+void fsnotify_remove_event(struct fsnotify_group *group,
+			   struct fsnotify_event *event)
+{
+	mutex_lock(&group->notification_mutex);
+	if (!list_empty(&event->list)) {
+		list_del_init(&event->list);
+		group->q_len--;
+	}
+	mutex_unlock(&group->notification_mutex);
+}
+
 /*
  * Remove and return the first event from the notification list.  It is the
  * responsibility of the caller to destroy the obtained event
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index a6e899943363..ca060d7c4fa6 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -326,6 +326,8 @@ extern int fsnotify_add_event(struct fsnotify_group *group,
 			      struct fsnotify_event *event,
 			      int (*merge)(struct list_head *,
 					   struct fsnotify_event *));
+/* Remove passed event from groups notification queue */
+extern void fsnotify_remove_event(struct fsnotify_group *group, struct fsnotify_event *event);
 /* true if the group notification queue is empty */
 extern bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group);
 /* return, but do not dequeue the first event on the notification queue */
-- 
cgit v1.2.3


From e19318116048d5fbdb8d230d6d37625834b503cd Mon Sep 17 00:00:00 2001
From: Fabian Frederick <fabf@skynet.be>
Date: Wed, 6 Aug 2014 16:04:59 -0700
Subject: mm/page_alloc.c: add __meminit to alloc_pages_exact_nid()

alloc_pages_exact_nid() is only called by __meminit alloc_page_cgroup()

Signed-off-by: Fabian Frederick <fabf@skynet.be>
Cc: Mel Gorman <mgorman@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h | 2 +-
 mm/page_alloc.c     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 6eb1fb37de9a..5e7219dc0fae 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -360,7 +360,7 @@ extern unsigned long get_zeroed_page(gfp_t gfp_mask);
 void *alloc_pages_exact(size_t size, gfp_t gfp_mask);
 void free_pages_exact(void *virt, size_t size);
 /* This is different from alloc_pages_exact_node !!! */
-void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask);
+void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask);
 
 #define __get_free_page(gfp_mask) \
 		__get_free_pages((gfp_mask), 0)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index ef44ad736ca1..fd4322cc096d 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2962,7 +2962,7 @@ EXPORT_SYMBOL(alloc_pages_exact);
  * Note this is not alloc_pages_exact_node() which allocates on a specific node,
  * but is not exact.
  */
-void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask)
+void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask)
 {
 	unsigned order = get_order(size);
 	struct page *p = alloc_pages_node(nid, gfp_mask, order);
-- 
cgit v1.2.3


From 2cfb3665e864755400dc57b6ceee2ebb6b382910 Mon Sep 17 00:00:00 2001
From: Fabian Frederick <fabf@skynet.be>
Date: Wed, 6 Aug 2014 16:05:03 -0700
Subject: include/linux/memblock.h: add __init to memblock_set_bottom_up()

memblock_set_bottom_up() is only called by __init
cmdline_parse_movable_node() and __init numa_init().

Signed-off-by: Fabian Frederick <fabf@skynet.be>
Reviewed-by: Tang Chen <tangchen@cn.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memblock.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index b660e05b63d4..e8cc45307f8f 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -249,7 +249,7 @@ phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align);
 /*
  * Set the allocation direction to bottom-up or top-down.
  */
-static inline void memblock_set_bottom_up(bool enable)
+static inline void __init memblock_set_bottom_up(bool enable)
 {
 	memblock.bottom_up = enable;
 }
@@ -264,7 +264,7 @@ static inline bool memblock_bottom_up(void)
 	return memblock.bottom_up;
 }
 #else
-static inline void memblock_set_bottom_up(bool enable) {}
+static inline void __init memblock_set_bottom_up(bool enable) {}
 static inline bool memblock_bottom_up(void) { return false; }
 #endif
 
-- 
cgit v1.2.3


From 4f7c6b49c45a398d72763d1f0e64ddff8b3653c7 Mon Sep 17 00:00:00 2001
From: Tang Chen <tangchen@cn.fujitsu.com>
Date: Wed, 6 Aug 2014 16:05:13 -0700
Subject: mem-hotplug: introduce MMOP_OFFLINE to replace the hard coding -1

In store_mem_state(), we have:

  ...
  334         else if (!strncmp(buf, "offline", min_t(int, count, 7)))
  335                 online_type = -1;
  ...
  355         case -1:
  356                 ret = device_offline(&mem->dev);
  357                 break;
  ...

Here, "offline" is hard coded as -1.

This patch does the following renaming:

 ONLINE_KEEP     ->  MMOP_ONLINE_KEEP
 ONLINE_KERNEL   ->  MMOP_ONLINE_KERNEL
 ONLINE_MOVABLE  ->  MMOP_ONLINE_MOVABLE

and introduces MMOP_OFFLINE = -1 to avoid hard coding.

Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com>
Cc: Hu Tao <hutao@cn.fujitsu.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: Gu Zheng <guz.fnst@cn.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/base/memory.c          | 18 +++++++++---------
 include/linux/memory_hotplug.h |  9 +++++----
 mm/memory_hotplug.c            |  9 ++++++---
 3 files changed, 20 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index c6707dfb5284..7c60ed27e711 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -284,7 +284,7 @@ static int memory_subsys_online(struct device *dev)
 	 * attribute and need to set the online_type.
 	 */
 	if (mem->online_type < 0)
-		mem->online_type = ONLINE_KEEP;
+		mem->online_type = MMOP_ONLINE_KEEP;
 
 	ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE);
 
@@ -316,22 +316,22 @@ store_mem_state(struct device *dev,
 		return ret;
 
 	if (sysfs_streq(buf, "online_kernel"))
-		online_type = ONLINE_KERNEL;
+		online_type = MMOP_ONLINE_KERNEL;
 	else if (sysfs_streq(buf, "online_movable"))
-		online_type = ONLINE_MOVABLE;
+		online_type = MMOP_ONLINE_MOVABLE;
 	else if (sysfs_streq(buf, "online"))
-		online_type = ONLINE_KEEP;
+		online_type = MMOP_ONLINE_KEEP;
 	else if (sysfs_streq(buf, "offline"))
-		online_type = -1;
+		online_type = MMOP_OFFLINE;
 	else {
 		ret = -EINVAL;
 		goto err;
 	}
 
 	switch (online_type) {
-	case ONLINE_KERNEL:
-	case ONLINE_MOVABLE:
-	case ONLINE_KEEP:
+	case MMOP_ONLINE_KERNEL:
+	case MMOP_ONLINE_MOVABLE:
+	case MMOP_ONLINE_KEEP:
 		/*
 		 * mem->online_type is not protected so there can be a
 		 * race here.  However, when racing online, the first
@@ -342,7 +342,7 @@ store_mem_state(struct device *dev,
 		mem->online_type = online_type;
 		ret = device_online(&mem->dev);
 		break;
-	case -1:
+	case MMOP_OFFLINE:
 		ret = device_offline(&mem->dev);
 		break;
 	default:
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 010d125bffbf..79dd9eca054f 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -26,11 +26,12 @@ enum {
 	MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE = NODE_INFO,
 };
 
-/* Types for control the zone type of onlined memory */
+/* Types for control the zone type of onlined and offlined memory */
 enum {
-	ONLINE_KEEP,
-	ONLINE_KERNEL,
-	ONLINE_MOVABLE,
+	MMOP_OFFLINE = -1,
+	MMOP_ONLINE_KEEP,
+	MMOP_ONLINE_KERNEL,
+	MMOP_ONLINE_MOVABLE,
 };
 
 /*
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 3557e8c9e8de..a3797d3fd8a4 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -977,15 +977,18 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
 	zone = page_zone(pfn_to_page(pfn));
 
 	ret = -EINVAL;
-	if ((zone_idx(zone) > ZONE_NORMAL || online_type == ONLINE_MOVABLE) &&
+	if ((zone_idx(zone) > ZONE_NORMAL ||
+	    online_type == MMOP_ONLINE_MOVABLE) &&
 	    !can_online_high_movable(zone))
 		goto out;
 
-	if (online_type == ONLINE_KERNEL && zone_idx(zone) == ZONE_MOVABLE) {
+	if (online_type == MMOP_ONLINE_KERNEL &&
+	    zone_idx(zone) == ZONE_MOVABLE) {
 		if (move_pfn_range_left(zone - 1, zone, pfn, pfn + nr_pages))
 			goto out;
 	}
-	if (online_type == ONLINE_MOVABLE && zone_idx(zone) == ZONE_MOVABLE - 1) {
+	if (online_type == MMOP_ONLINE_MOVABLE &&
+	    zone_idx(zone) == ZONE_MOVABLE - 1) {
 		if (move_pfn_range_right(zone, zone + 1, pfn, pfn + nr_pages))
 			goto out;
 	}
-- 
cgit v1.2.3


From a254129e8686bff7a340b58f35241b04927e81c0 Mon Sep 17 00:00:00 2001
From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Date: Wed, 6 Aug 2014 16:05:25 -0700
Subject: CMA: generalize CMA reserved area management functionality

Currently, there are two users on CMA functionality, one is the DMA
subsystem and the other is the KVM on powerpc.  They have their own code
to manage CMA reserved area even if they looks really similar.  From my
guess, it is caused by some needs on bitmap management.  KVM side wants
to maintain bitmap not for 1 page, but for more size.  Eventually it use
bitmap where one bit represents 64 pages.

When I implement CMA related patches, I should change those two places
to apply my change and it seem to be painful to me.  I want to change
this situation and reduce future code management overhead through this
patch.

This change could also help developer who want to use CMA in their new
feature development, since they can use CMA easily without copying &
pasting this reserved area management code.

In previous patches, we have prepared some features to generalize CMA
reserved area management and now it's time to do it.  This patch moves
core functions to mm/cma.c and change DMA APIs to use these functions.

There is no functional change in DMA APIs.

Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Acked-by: Michal Nazarewicz <mina86@mina86.com>
Acked-by: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
Acked-by: Minchan Kim <minchan@kernel.org>
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Alexander Graf <agraf@suse.de>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Gleb Natapov <gleb@kernel.org>
Acked-by: Marek Szyprowski <m.szyprowski@samsung.com>
Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/mm/dma-mapping.c      |   1 +
 drivers/base/Kconfig           |  10 --
 drivers/base/dma-contiguous.c  | 280 +---------------------------------
 include/linux/cma.h            |  27 ++++
 include/linux/dma-contiguous.h |  11 +-
 mm/Kconfig                     |  11 ++
 mm/Makefile                    |   1 +
 mm/cma.c                       | 333 +++++++++++++++++++++++++++++++++++++++++
 8 files changed, 383 insertions(+), 291 deletions(-)
 create mode 100644 include/linux/cma.h
 create mode 100644 mm/cma.c

(limited to 'include/linux')

diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 1f88db06b133..7a996aaa061e 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -26,6 +26,7 @@
 #include <linux/io.h>
 #include <linux/vmalloc.h>
 #include <linux/sizes.h>
+#include <linux/cma.h>
 
 #include <asm/memory.h>
 #include <asm/highmem.h>
diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index 88500fed3c7a..4e7f0ff83ae7 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -289,16 +289,6 @@ config CMA_ALIGNMENT
 
 	  If unsure, leave the default value "8".
 
-config CMA_AREAS
-	int "Maximum count of the CMA device-private areas"
-	default 7
-	help
-	  CMA allows to create CMA areas for particular devices. This parameter
-	  sets the maximum number of such device private CMA areas in the
-	  system.
-
-	  If unsure, leave the default value "7".
-
 endif
 
 endmenu
diff --git a/drivers/base/dma-contiguous.c b/drivers/base/dma-contiguous.c
index ad8a85bf852f..0411c1c57005 100644
--- a/drivers/base/dma-contiguous.c
+++ b/drivers/base/dma-contiguous.c
@@ -24,25 +24,9 @@
 
 #include <linux/memblock.h>
 #include <linux/err.h>
-#include <linux/mm.h>
-#include <linux/mutex.h>
-#include <linux/page-isolation.h>
 #include <linux/sizes.h>
-#include <linux/slab.h>
-#include <linux/swap.h>
-#include <linux/mm_types.h>
 #include <linux/dma-contiguous.h>
-#include <linux/log2.h>
-
-struct cma {
-	unsigned long	base_pfn;
-	unsigned long	count;
-	unsigned long	*bitmap;
-	unsigned int order_per_bit; /* Order of pages represented by one bit */
-	struct mutex	lock;
-};
-
-struct cma *dma_contiguous_default_area;
+#include <linux/cma.h>
 
 #ifdef CONFIG_CMA_SIZE_MBYTES
 #define CMA_SIZE_MBYTES CONFIG_CMA_SIZE_MBYTES
@@ -50,6 +34,8 @@ struct cma *dma_contiguous_default_area;
 #define CMA_SIZE_MBYTES 0
 #endif
 
+struct cma *dma_contiguous_default_area;
+
 /*
  * Default global CMA area size can be defined in kernel's .config.
  * This is useful mainly for distro maintainers to create a kernel
@@ -156,169 +142,6 @@ void __init dma_contiguous_reserve(phys_addr_t limit)
 	}
 }
 
-static DEFINE_MUTEX(cma_mutex);
-
-static unsigned long cma_bitmap_aligned_mask(struct cma *cma, int align_order)
-{
-	return (1UL << (align_order >> cma->order_per_bit)) - 1;
-}
-
-static unsigned long cma_bitmap_maxno(struct cma *cma)
-{
-	return cma->count >> cma->order_per_bit;
-}
-
-static unsigned long cma_bitmap_pages_to_bits(struct cma *cma,
-						unsigned long pages)
-{
-	return ALIGN(pages, 1UL << cma->order_per_bit) >> cma->order_per_bit;
-}
-
-static void cma_clear_bitmap(struct cma *cma, unsigned long pfn, int count)
-{
-	unsigned long bitmap_no, bitmap_count;
-
-	bitmap_no = (pfn - cma->base_pfn) >> cma->order_per_bit;
-	bitmap_count = cma_bitmap_pages_to_bits(cma, count);
-
-	mutex_lock(&cma->lock);
-	bitmap_clear(cma->bitmap, bitmap_no, bitmap_count);
-	mutex_unlock(&cma->lock);
-}
-
-static int __init cma_activate_area(struct cma *cma)
-{
-	int bitmap_size = BITS_TO_LONGS(cma_bitmap_maxno(cma)) * sizeof(long);
-	unsigned long base_pfn = cma->base_pfn, pfn = base_pfn;
-	unsigned i = cma->count >> pageblock_order;
-	struct zone *zone;
-
-	cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
-
-	if (!cma->bitmap)
-		return -ENOMEM;
-
-	WARN_ON_ONCE(!pfn_valid(pfn));
-	zone = page_zone(pfn_to_page(pfn));
-
-	do {
-		unsigned j;
-		base_pfn = pfn;
-		for (j = pageblock_nr_pages; j; --j, pfn++) {
-			WARN_ON_ONCE(!pfn_valid(pfn));
-			/*
-			 * alloc_contig_range requires the pfn range
-			 * specified to be in the same zone. Make this
-			 * simple by forcing the entire CMA resv range
-			 * to be in the same zone.
-			 */
-			if (page_zone(pfn_to_page(pfn)) != zone)
-				goto err;
-		}
-		init_cma_reserved_pageblock(pfn_to_page(base_pfn));
-	} while (--i);
-
-	mutex_init(&cma->lock);
-	return 0;
-
-err:
-	kfree(cma->bitmap);
-	return -EINVAL;
-}
-
-static struct cma cma_areas[MAX_CMA_AREAS];
-static unsigned cma_area_count;
-
-static int __init cma_init_reserved_areas(void)
-{
-	int i;
-
-	for (i = 0; i < cma_area_count; i++) {
-		int ret = cma_activate_area(&cma_areas[i]);
-		if (ret)
-			return ret;
-	}
-
-	return 0;
-}
-core_initcall(cma_init_reserved_areas);
-
-static int __init __dma_contiguous_reserve_area(phys_addr_t size,
-			phys_addr_t base, phys_addr_t limit,
-			phys_addr_t alignment, unsigned int order_per_bit,
-			struct cma **res_cma, bool fixed)
-{
-	struct cma *cma = &cma_areas[cma_area_count];
-	int ret = 0;
-
-	pr_debug("%s(size %lx, base %08lx, limit %08lx alignment %08lx)\n",
-		__func__, (unsigned long)size, (unsigned long)base,
-		(unsigned long)limit, (unsigned long)alignment);
-
-	if (cma_area_count == ARRAY_SIZE(cma_areas)) {
-		pr_err("Not enough slots for CMA reserved regions!\n");
-		return -ENOSPC;
-	}
-
-	if (!size)
-		return -EINVAL;
-
-	if (alignment && !is_power_of_2(alignment))
-		return -EINVAL;
-
-	/*
-	 * Sanitise input arguments.
-	 * Pages both ends in CMA area could be merged into adjacent unmovable
-	 * migratetype page by page allocator's buddy algorithm. In the case,
-	 * you couldn't get a contiguous memory, which is not what we want.
-	 */
-	alignment = max(alignment,
-		(phys_addr_t)PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order));
-	base = ALIGN(base, alignment);
-	size = ALIGN(size, alignment);
-	limit &= ~(alignment - 1);
-
-	/* size should be aligned with order_per_bit */
-	if (!IS_ALIGNED(size >> PAGE_SHIFT, 1 << order_per_bit))
-		return -EINVAL;
-
-	/* Reserve memory */
-	if (base && fixed) {
-		if (memblock_is_region_reserved(base, size) ||
-		    memblock_reserve(base, size) < 0) {
-			ret = -EBUSY;
-			goto err;
-		}
-	} else {
-		phys_addr_t addr = memblock_alloc_range(size, alignment, base,
-							limit);
-		if (!addr) {
-			ret = -ENOMEM;
-			goto err;
-		} else {
-			base = addr;
-		}
-	}
-
-	/*
-	 * Each reserved area must be initialised later, when more kernel
-	 * subsystems (like slab allocator) are available.
-	 */
-	cma->base_pfn = PFN_DOWN(base);
-	cma->count = size >> PAGE_SHIFT;
-	cma->order_per_bit = order_per_bit;
-	*res_cma = cma;
-	cma_area_count++;
-
-	pr_info("CMA: reserved %ld MiB at %08lx\n", (unsigned long)size / SZ_1M,
-		(unsigned long)base);
-	return 0;
-
-err:
-	pr_err("CMA: failed to reserve %ld MiB\n", (unsigned long)size / SZ_1M);
-	return ret;
-}
-
 /**
  * dma_contiguous_reserve_area() - reserve custom contiguous area
  * @size: Size of the reserved area (in bytes),
@@ -342,77 +165,17 @@ int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base,
 {
 	int ret;
 
-	ret = __dma_contiguous_reserve_area(size, base, limit, 0, 0,
-						res_cma, fixed);
+	ret = cma_declare_contiguous(size, base, limit, 0, 0, res_cma, fixed);
 	if (ret)
 		return ret;
 
 	/* Architecture specific contiguous memory fixup. */
-	dma_contiguous_early_fixup(PFN_PHYS((*res_cma)->base_pfn),
-				(*res_cma)->count << PAGE_SHIFT);
+	dma_contiguous_early_fixup(cma_get_base(*res_cma),
+				cma_get_size(*res_cma));
 
 	return 0;
 }
 
-static struct page *__dma_alloc_from_contiguous(struct cma *cma, int count,
-				       unsigned int align)
-{
-	unsigned long mask, pfn, start = 0;
-	unsigned long bitmap_maxno, bitmap_no, bitmap_count;
-	struct page *page = NULL;
-	int ret;
-
-	if (!cma || !cma->count)
-		return NULL;
-
-	pr_debug("%s(cma %p, count %d, align %d)\n", __func__, (void *)cma,
-		 count, align);
-
-	if (!count)
-		return NULL;
-
-	mask = cma_bitmap_aligned_mask(cma, align);
-	bitmap_maxno = cma_bitmap_maxno(cma);
-	bitmap_count = cma_bitmap_pages_to_bits(cma, count);
-
-	for (;;) {
-		mutex_lock(&cma->lock);
-		bitmap_no = bitmap_find_next_zero_area(cma->bitmap,
-				bitmap_maxno, start, bitmap_count, mask);
-		if (bitmap_no >= bitmap_maxno) {
-			mutex_unlock(&cma->lock);
-			break;
-		}
-		bitmap_set(cma->bitmap, bitmap_no, bitmap_count);
-		/*
-		 * It's safe to drop the lock here. We've marked this region for
-		 * our exclusive use. If the migration fails we will take the
-		 * lock again and unmark it.
-		 */
-		mutex_unlock(&cma->lock);
-
-		pfn = cma->base_pfn + (bitmap_no << cma->order_per_bit);
-		mutex_lock(&cma_mutex);
-		ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA);
-		mutex_unlock(&cma_mutex);
-		if (ret == 0) {
-			page = pfn_to_page(pfn);
-			break;
-		} else if (ret != -EBUSY) {
-			cma_clear_bitmap(cma, pfn, count);
-			break;
-		}
-		cma_clear_bitmap(cma, pfn, count);
-		pr_debug("%s(): memory range at %p is busy, retrying\n",
-			 __func__, pfn_to_page(pfn));
-		/* try again with a bit different memory target */
-		start = bitmap_no + mask + 1;
-	}
-
-	pr_debug("%s(): returned %p\n", __func__, page);
-	return page;
-}
-
 /**
  * dma_alloc_from_contiguous() - allocate pages from contiguous area
  * @dev:   Pointer to device for which the allocation is performed.
@@ -427,35 +190,10 @@ static struct page *__dma_alloc_from_contiguous(struct cma *cma, int count,
 struct page *dma_alloc_from_contiguous(struct device *dev, int count,
 				       unsigned int align)
 {
-	struct cma *cma = dev_get_cma_area(dev);
-
 	if (align > CONFIG_CMA_ALIGNMENT)
 		align = CONFIG_CMA_ALIGNMENT;
 
-	return __dma_alloc_from_contiguous(cma, count, align);
-}
-
-static bool __dma_release_from_contiguous(struct cma *cma, struct page *pages,
-				 int count)
-{
-	unsigned long pfn;
-
-	if (!cma || !pages)
-		return false;
-
-	pr_debug("%s(page %p)\n", __func__, (void *)pages);
-
-	pfn = page_to_pfn(pages);
-
-	if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count)
-		return false;
-
-	VM_BUG_ON(pfn + count > cma->base_pfn + cma->count);
-
-	free_contig_range(pfn, count);
-	cma_clear_bitmap(cma, pfn, count);
-
-	return true;
+	return cma_alloc(dev_get_cma_area(dev), count, align);
 }
 
 /**
@@ -471,7 +209,5 @@ static bool __dma_release_from_contiguous(struct cma *cma, struct page *pages,
 bool dma_release_from_contiguous(struct device *dev, struct page *pages,
 				 int count)
 {
-	struct cma *cma = dev_get_cma_area(dev);
-
-	return __dma_release_from_contiguous(cma, pages, count);
+	return cma_release(dev_get_cma_area(dev), pages, count);
 }
diff --git a/include/linux/cma.h b/include/linux/cma.h
new file mode 100644
index 000000000000..f6f7809acb98
--- /dev/null
+++ b/include/linux/cma.h
@@ -0,0 +1,27 @@
+#ifndef __CMA_H__
+#define __CMA_H__
+
+/*
+ * There is always at least global CMA area and a few optional
+ * areas configured in kernel .config.
+ */
+#ifdef CONFIG_CMA_AREAS
+#define MAX_CMA_AREAS	(1 + CONFIG_CMA_AREAS)
+
+#else
+#define MAX_CMA_AREAS	(0)
+
+#endif
+
+struct cma;
+
+extern phys_addr_t cma_get_base(struct cma *cma);
+extern unsigned long cma_get_size(struct cma *cma);
+
+extern int __init cma_declare_contiguous(phys_addr_t size,
+			phys_addr_t base, phys_addr_t limit,
+			phys_addr_t alignment, unsigned int order_per_bit,
+			struct cma **res_cma, bool fixed);
+extern struct page *cma_alloc(struct cma *cma, int count, unsigned int align);
+extern bool cma_release(struct cma *cma, struct page *pages, int count);
+#endif
diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h
index 772eab5d524a..569bbd039896 100644
--- a/include/linux/dma-contiguous.h
+++ b/include/linux/dma-contiguous.h
@@ -53,18 +53,13 @@
 
 #ifdef __KERNEL__
 
+#include <linux/device.h>
+
 struct cma;
 struct page;
-struct device;
 
 #ifdef CONFIG_DMA_CMA
 
-/*
- * There is always at least global CMA area and a few optional device
- * private areas configured in kernel .config.
- */
-#define MAX_CMA_AREAS	(1 + CONFIG_CMA_AREAS)
-
 extern struct cma *dma_contiguous_default_area;
 
 static inline struct cma *dev_get_cma_area(struct device *dev)
@@ -123,8 +118,6 @@ bool dma_release_from_contiguous(struct device *dev, struct page *pages,
 
 #else
 
-#define MAX_CMA_AREAS	(0)
-
 static inline struct cma *dev_get_cma_area(struct device *dev)
 {
 	return NULL;
diff --git a/mm/Kconfig b/mm/Kconfig
index 3e9977a9d657..f4899ec39cf4 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -508,6 +508,17 @@ config CMA_DEBUG
 	  processing calls such as dma_alloc_from_contiguous().
 	  This option does not affect warning and error messages.
 
+config CMA_AREAS
+	int "Maximum count of the CMA areas"
+	depends on CMA
+	default 7
+	help
+	  CMA allows to create CMA areas for particular purpose, mainly,
+	  used as device private area. This parameter sets the maximum
+	  number of CMA area in the system.
+
+	  If unsure, leave the default value "7".
+
 config ZBUD
 	tristate
 	default n
diff --git a/mm/Makefile b/mm/Makefile
index 4064f3ec145e..8338473c329a 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -62,3 +62,4 @@ obj-$(CONFIG_MEMORY_ISOLATION) += page_isolation.o
 obj-$(CONFIG_ZBUD)	+= zbud.o
 obj-$(CONFIG_ZSMALLOC)	+= zsmalloc.o
 obj-$(CONFIG_GENERIC_EARLY_IOREMAP) += early_ioremap.o
+obj-$(CONFIG_CMA)	+= cma.o
diff --git a/mm/cma.c b/mm/cma.c
new file mode 100644
index 000000000000..656004216953
--- /dev/null
+++ b/mm/cma.c
@@ -0,0 +1,333 @@
+/*
+ * Contiguous Memory Allocator
+ *
+ * Copyright (c) 2010-2011 by Samsung Electronics.
+ * Copyright IBM Corporation, 2013
+ * Copyright LG Electronics Inc., 2014
+ * Written by:
+ *	Marek Szyprowski <m.szyprowski@samsung.com>
+ *	Michal Nazarewicz <mina86@mina86.com>
+ *	Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ *	Joonsoo Kim <iamjoonsoo.kim@lge.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License or (at your optional) any later version of the license.
+ */
+
+#define pr_fmt(fmt) "cma: " fmt
+
+#ifdef CONFIG_CMA_DEBUG
+#ifndef DEBUG
+#  define DEBUG
+#endif
+#endif
+
+#include <linux/memblock.h>
+#include <linux/err.h>
+#include <linux/mm.h>
+#include <linux/mutex.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+#include <linux/log2.h>
+#include <linux/cma.h>
+
+struct cma {
+	unsigned long	base_pfn;
+	unsigned long	count;
+	unsigned long	*bitmap;
+	unsigned int order_per_bit; /* Order of pages represented by one bit */
+	struct mutex	lock;
+};
+
+static struct cma cma_areas[MAX_CMA_AREAS];
+static unsigned cma_area_count;
+static DEFINE_MUTEX(cma_mutex);
+
+phys_addr_t cma_get_base(struct cma *cma)
+{
+	return PFN_PHYS(cma->base_pfn);
+}
+
+unsigned long cma_get_size(struct cma *cma)
+{
+	return cma->count << PAGE_SHIFT;
+}
+
+static unsigned long cma_bitmap_aligned_mask(struct cma *cma, int align_order)
+{
+	return (1UL << (align_order >> cma->order_per_bit)) - 1;
+}
+
+static unsigned long cma_bitmap_maxno(struct cma *cma)
+{
+	return cma->count >> cma->order_per_bit;
+}
+
+static unsigned long cma_bitmap_pages_to_bits(struct cma *cma,
+						unsigned long pages)
+{
+	return ALIGN(pages, 1UL << cma->order_per_bit) >> cma->order_per_bit;
+}
+
+static void cma_clear_bitmap(struct cma *cma, unsigned long pfn, int count)
+{
+	unsigned long bitmap_no, bitmap_count;
+
+	bitmap_no = (pfn - cma->base_pfn) >> cma->order_per_bit;
+	bitmap_count = cma_bitmap_pages_to_bits(cma, count);
+
+	mutex_lock(&cma->lock);
+	bitmap_clear(cma->bitmap, bitmap_no, bitmap_count);
+	mutex_unlock(&cma->lock);
+}
+
+static int __init cma_activate_area(struct cma *cma)
+{
+	int bitmap_size = BITS_TO_LONGS(cma_bitmap_maxno(cma)) * sizeof(long);
+	unsigned long base_pfn = cma->base_pfn, pfn = base_pfn;
+	unsigned i = cma->count >> pageblock_order;
+	struct zone *zone;
+
+	cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
+
+	if (!cma->bitmap)
+		return -ENOMEM;
+
+	WARN_ON_ONCE(!pfn_valid(pfn));
+	zone = page_zone(pfn_to_page(pfn));
+
+	do {
+		unsigned j;
+
+		base_pfn = pfn;
+		for (j = pageblock_nr_pages; j; --j, pfn++) {
+			WARN_ON_ONCE(!pfn_valid(pfn));
+			/*
+			 * alloc_contig_range requires the pfn range
+			 * specified to be in the same zone. Make this
+			 * simple by forcing the entire CMA resv range
+			 * to be in the same zone.
+			 */
+			if (page_zone(pfn_to_page(pfn)) != zone)
+				goto err;
+		}
+		init_cma_reserved_pageblock(pfn_to_page(base_pfn));
+	} while (--i);
+
+	mutex_init(&cma->lock);
+	return 0;
+
+err:
+	kfree(cma->bitmap);
+	return -EINVAL;
+}
+
+static int __init cma_init_reserved_areas(void)
+{
+	int i;
+
+	for (i = 0; i < cma_area_count; i++) {
+		int ret = cma_activate_area(&cma_areas[i]);
+
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+core_initcall(cma_init_reserved_areas);
+
+/**
+ * cma_declare_contiguous() - reserve custom contiguous area
+ * @size: Size of the reserved area (in bytes),
+ * @base: Base address of the reserved area optional, use 0 for any
+ * @limit: End address of the reserved memory (optional, 0 for any).
+ * @alignment: Alignment for the CMA area, should be power of 2 or zero
+ * @order_per_bit: Order of pages represented by one bit on bitmap.
+ * @res_cma: Pointer to store the created cma region.
+ * @fixed: hint about where to place the reserved area
+ *
+ * This function reserves memory from early allocator. It should be
+ * called by arch specific code once the early allocator (memblock or bootmem)
+ * has been activated and all other subsystems have already allocated/reserved
+ * memory. This function allows to create custom reserved areas.
+ *
+ * If @fixed is true, reserve contiguous area at exactly @base.  If false,
+ * reserve in range from @base to @limit.
+ */
+int __init cma_declare_contiguous(phys_addr_t size,
+			phys_addr_t base, phys_addr_t limit,
+			phys_addr_t alignment, unsigned int order_per_bit,
+			struct cma **res_cma, bool fixed)
+{
+	struct cma *cma = &cma_areas[cma_area_count];
+	int ret = 0;
+
+	pr_debug("%s(size %lx, base %08lx, limit %08lx alignment %08lx)\n",
+		__func__, (unsigned long)size, (unsigned long)base,
+		(unsigned long)limit, (unsigned long)alignment);
+
+	if (cma_area_count == ARRAY_SIZE(cma_areas)) {
+		pr_err("Not enough slots for CMA reserved regions!\n");
+		return -ENOSPC;
+	}
+
+	if (!size)
+		return -EINVAL;
+
+	if (alignment && !is_power_of_2(alignment))
+		return -EINVAL;
+
+	/*
+	 * Sanitise input arguments.
+	 * Pages both ends in CMA area could be merged into adjacent unmovable
+	 * migratetype page by page allocator's buddy algorithm. In the case,
+	 * you couldn't get a contiguous memory, which is not what we want.
+	 */
+	alignment = max(alignment,
+		(phys_addr_t)PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order));
+	base = ALIGN(base, alignment);
+	size = ALIGN(size, alignment);
+	limit &= ~(alignment - 1);
+
+	/* size should be aligned with order_per_bit */
+	if (!IS_ALIGNED(size >> PAGE_SHIFT, 1 << order_per_bit))
+		return -EINVAL;
+
+	/* Reserve memory */
+	if (base && fixed) {
+		if (memblock_is_region_reserved(base, size) ||
+		    memblock_reserve(base, size) < 0) {
+			ret = -EBUSY;
+			goto err;
+		}
+	} else {
+		phys_addr_t addr = memblock_alloc_range(size, alignment, base,
+							limit);
+		if (!addr) {
+			ret = -ENOMEM;
+			goto err;
+		} else {
+			base = addr;
+		}
+	}
+
+	/*
+	 * Each reserved area must be initialised later, when more kernel
+	 * subsystems (like slab allocator) are available.
+	 */
+	cma->base_pfn = PFN_DOWN(base);
+	cma->count = size >> PAGE_SHIFT;
+	cma->order_per_bit = order_per_bit;
+	*res_cma = cma;
+	cma_area_count++;
+
+	pr_info("CMA: reserved %ld MiB at %08lx\n", (unsigned long)size / SZ_1M,
+		(unsigned long)base);
+	return 0;
+
+err:
+	pr_err("CMA: failed to reserve %ld MiB\n", (unsigned long)size / SZ_1M);
+	return ret;
+}
+
+/**
+ * cma_alloc() - allocate pages from contiguous area
+ * @cma:   Contiguous memory region for which the allocation is performed.
+ * @count: Requested number of pages.
+ * @align: Requested alignment of pages (in PAGE_SIZE order).
+ *
+ * This function allocates part of contiguous memory on specific
+ * contiguous memory area.
+ */
+struct page *cma_alloc(struct cma *cma, int count, unsigned int align)
+{
+	unsigned long mask, pfn, start = 0;
+	unsigned long bitmap_maxno, bitmap_no, bitmap_count;
+	struct page *page = NULL;
+	int ret;
+
+	if (!cma || !cma->count)
+		return NULL;
+
+	pr_debug("%s(cma %p, count %d, align %d)\n", __func__, (void *)cma,
+		 count, align);
+
+	if (!count)
+		return NULL;
+
+	mask = cma_bitmap_aligned_mask(cma, align);
+	bitmap_maxno = cma_bitmap_maxno(cma);
+	bitmap_count = cma_bitmap_pages_to_bits(cma, count);
+
+	for (;;) {
+		mutex_lock(&cma->lock);
+		bitmap_no = bitmap_find_next_zero_area(cma->bitmap,
+				bitmap_maxno, start, bitmap_count, mask);
+		if (bitmap_no >= bitmap_maxno) {
+			mutex_unlock(&cma->lock);
+			break;
+		}
+		bitmap_set(cma->bitmap, bitmap_no, bitmap_count);
+		/*
+		 * It's safe to drop the lock here. We've marked this region for
+		 * our exclusive use. If the migration fails we will take the
+		 * lock again and unmark it.
+		 */
+		mutex_unlock(&cma->lock);
+
+		pfn = cma->base_pfn + (bitmap_no << cma->order_per_bit);
+		mutex_lock(&cma_mutex);
+		ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA);
+		mutex_unlock(&cma_mutex);
+		if (ret == 0) {
+			page = pfn_to_page(pfn);
+			break;
+		} else if (ret != -EBUSY) {
+			cma_clear_bitmap(cma, pfn, count);
+			break;
+		}
+		cma_clear_bitmap(cma, pfn, count);
+		pr_debug("%s(): memory range at %p is busy, retrying\n",
+			 __func__, pfn_to_page(pfn));
+		/* try again with a bit different memory target */
+		start = bitmap_no + mask + 1;
+	}
+
+	pr_debug("%s(): returned %p\n", __func__, page);
+	return page;
+}
+
+/**
+ * cma_release() - release allocated pages
+ * @cma:   Contiguous memory region for which the allocation is performed.
+ * @pages: Allocated pages.
+ * @count: Number of allocated pages.
+ *
+ * This function releases memory allocated by alloc_cma().
+ * It returns false when provided pages do not belong to contiguous area and
+ * true otherwise.
+ */
+bool cma_release(struct cma *cma, struct page *pages, int count)
+{
+	unsigned long pfn;
+
+	if (!cma || !pages)
+		return false;
+
+	pr_debug("%s(page %p)\n", __func__, (void *)pages);
+
+	pfn = page_to_pfn(pages);
+
+	if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count)
+		return false;
+
+	VM_BUG_ON(pfn + count > cma->base_pfn + cma->count);
+
+	free_contig_range(pfn, count);
+	cma_clear_bitmap(cma, pfn, count);
+
+	return true;
+}
-- 
cgit v1.2.3


From c1f733aaaf30a0068a3126d5aa9d5b4c25ba4c0c Mon Sep 17 00:00:00 2001
From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Date: Wed, 6 Aug 2014 16:05:32 -0700
Subject: mm, CMA: change cma_declare_contiguous() to obey coding convention

Conventionally, we put output param to the end of param list and put the
'base' ahead of 'size', but cma_declare_contiguous() doesn't look like
that, so change it.

Additionally, move down cma_areas reference code to the position where
it is really needed.

Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Acked-by: Michal Nazarewicz <mina86@mina86.com>
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Alexander Graf <agraf@suse.de>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Gleb Natapov <gleb@kernel.org>
Acked-by: Marek Szyprowski <m.szyprowski@samsung.com>
Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/kvm/book3s_hv_builtin.c |  4 ++--
 drivers/base/dma-contiguous.c        |  2 +-
 include/linux/cma.h                  |  2 +-
 mm/cma.c                             | 13 +++++++------
 4 files changed, 11 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 3960e0bceaf2..6cf498a9bc98 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -185,8 +185,8 @@ void __init kvm_cma_reserve(void)
 			align_size = HPT_ALIGN_PAGES << PAGE_SHIFT;
 
 		align_size = max(kvm_rma_pages << PAGE_SHIFT, align_size);
-		cma_declare_contiguous(selected_size, 0, 0, align_size,
-			KVM_CMA_CHUNK_ORDER - PAGE_SHIFT, &kvm_cma, false);
+		cma_declare_contiguous(0, selected_size, 0, align_size,
+			KVM_CMA_CHUNK_ORDER - PAGE_SHIFT, false, &kvm_cma);
 	}
 }
 
diff --git a/drivers/base/dma-contiguous.c b/drivers/base/dma-contiguous.c
index 0411c1c57005..6606abdf880c 100644
--- a/drivers/base/dma-contiguous.c
+++ b/drivers/base/dma-contiguous.c
@@ -165,7 +165,7 @@ int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base,
 {
 	int ret;
 
-	ret = cma_declare_contiguous(size, base, limit, 0, 0, res_cma, fixed);
+	ret = cma_declare_contiguous(base, size, limit, 0, 0, fixed, res_cma);
 	if (ret)
 		return ret;
 
diff --git a/include/linux/cma.h b/include/linux/cma.h
index f6f7809acb98..371b93042520 100644
--- a/include/linux/cma.h
+++ b/include/linux/cma.h
@@ -21,7 +21,7 @@ extern unsigned long cma_get_size(struct cma *cma);
 extern int __init cma_declare_contiguous(phys_addr_t size,
 			phys_addr_t base, phys_addr_t limit,
 			phys_addr_t alignment, unsigned int order_per_bit,
-			struct cma **res_cma, bool fixed);
+			bool fixed, struct cma **res_cma);
 extern struct page *cma_alloc(struct cma *cma, int count, unsigned int align);
 extern bool cma_release(struct cma *cma, struct page *pages, int count);
 #endif
diff --git a/mm/cma.c b/mm/cma.c
index 103a6663b7c7..488e50810ed1 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -141,13 +141,13 @@ core_initcall(cma_init_reserved_areas);
 
 /**
  * cma_declare_contiguous() - reserve custom contiguous area
- * @size: Size of the reserved area (in bytes),
  * @base: Base address of the reserved area optional, use 0 for any
+ * @size: Size of the reserved area (in bytes),
  * @limit: End address of the reserved memory (optional, 0 for any).
  * @alignment: Alignment for the CMA area, should be power of 2 or zero
  * @order_per_bit: Order of pages represented by one bit on bitmap.
- * @res_cma: Pointer to store the created cma region.
  * @fixed: hint about where to place the reserved area
+ * @res_cma: Pointer to store the created cma region.
  *
  * This function reserves memory from early allocator. It should be
  * called by arch specific code once the early allocator (memblock or bootmem)
@@ -157,12 +157,12 @@ core_initcall(cma_init_reserved_areas);
  * If @fixed is true, reserve contiguous area at exactly @base.  If false,
  * reserve in range from @base to @limit.
  */
-int __init cma_declare_contiguous(phys_addr_t size,
-			phys_addr_t base, phys_addr_t limit,
+int __init cma_declare_contiguous(phys_addr_t base,
+			phys_addr_t size, phys_addr_t limit,
 			phys_addr_t alignment, unsigned int order_per_bit,
-			struct cma **res_cma, bool fixed)
+			bool fixed, struct cma **res_cma)
 {
-	struct cma *cma = &cma_areas[cma_area_count];
+	struct cma *cma;
 	int ret = 0;
 
 	pr_debug("%s(size %lx, base %08lx, limit %08lx alignment %08lx)\n",
@@ -218,6 +218,7 @@ int __init cma_declare_contiguous(phys_addr_t size,
 	 * Each reserved area must be initialised later, when more kernel
 	 * subsystems (like slab allocator) are available.
 	 */
+	cma = &cma_areas[cma_area_count];
 	cma->base_pfn = PFN_DOWN(base);
 	cma->count = size >> PAGE_SHIFT;
 	cma->order_per_bit = order_per_bit;
-- 
cgit v1.2.3


From 2f3e442ccceb85c51c7dffd3799bfd84de213874 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Wed, 6 Aug 2014 16:05:40 -0700
Subject: mm: page-flags: clean up the page flag test, set, clear macros

- PAGEFLAG_FALSE only defines TEST, make it define SET and CLEAR as
  well, analogous to PAGEFLAG.

- Define TESTSETFLAG_FALSE, analogous to TESTSETFLAG.

- Define TESTSCFLAG_FALSE, analogous to TESTSCFLAG

- Make PG_mlocked accessors the same on both MMU and !MMU setups

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page-flags.h | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 8304959ad336..e1f5fcd79792 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -171,13 +171,12 @@ static inline int __TestClearPage##uname(struct page *page)		\
 #define __PAGEFLAG(uname, lname) TESTPAGEFLAG(uname, lname)		\
 	__SETPAGEFLAG(uname, lname)  __CLEARPAGEFLAG(uname, lname)
 
-#define PAGEFLAG_FALSE(uname) 						\
-static inline int Page##uname(const struct page *page)			\
-			{ return 0; }
-
 #define TESTSCFLAG(uname, lname)					\
 	TESTSETFLAG(uname, lname) TESTCLEARFLAG(uname, lname)
 
+#define TESTPAGEFLAG_FALSE(uname)					\
+static inline int Page##uname(const struct page *page) { return 0; }
+
 #define SETPAGEFLAG_NOOP(uname)						\
 static inline void SetPage##uname(struct page *page) {  }
 
@@ -187,12 +186,21 @@ static inline void ClearPage##uname(struct page *page) {  }
 #define __CLEARPAGEFLAG_NOOP(uname)					\
 static inline void __ClearPage##uname(struct page *page) {  }
 
+#define TESTSETFLAG_FALSE(uname)					\
+static inline int TestSetPage##uname(struct page *page) { return 0; }
+
 #define TESTCLEARFLAG_FALSE(uname)					\
 static inline int TestClearPage##uname(struct page *page) { return 0; }
 
 #define __TESTCLEARFLAG_FALSE(uname)					\
 static inline int __TestClearPage##uname(struct page *page) { return 0; }
 
+#define PAGEFLAG_FALSE(uname) TESTPAGEFLAG_FALSE(uname)			\
+	SETPAGEFLAG_NOOP(uname) CLEARPAGEFLAG_NOOP(uname)
+
+#define TESTSCFLAG_FALSE(uname)						\
+	TESTSETFLAG_FALSE(uname) TESTCLEARFLAG_FALSE(uname)
+
 struct page;	/* forward declaration */
 
 TESTPAGEFLAG(Locked, locked)
@@ -248,7 +256,6 @@ PAGEFLAG_FALSE(HighMem)
 PAGEFLAG(SwapCache, swapcache)
 #else
 PAGEFLAG_FALSE(SwapCache)
-	SETPAGEFLAG_NOOP(SwapCache) CLEARPAGEFLAG_NOOP(SwapCache)
 #endif
 
 PAGEFLAG(Unevictable, unevictable) __CLEARPAGEFLAG(Unevictable, unevictable)
@@ -258,8 +265,8 @@ PAGEFLAG(Unevictable, unevictable) __CLEARPAGEFLAG(Unevictable, unevictable)
 PAGEFLAG(Mlocked, mlocked) __CLEARPAGEFLAG(Mlocked, mlocked)
 	TESTSCFLAG(Mlocked, mlocked) __TESTCLEARFLAG(Mlocked, mlocked)
 #else
-PAGEFLAG_FALSE(Mlocked) SETPAGEFLAG_NOOP(Mlocked)
-	TESTCLEARFLAG_FALSE(Mlocked) __TESTCLEARFLAG_FALSE(Mlocked)
+PAGEFLAG_FALSE(Mlocked) __CLEARPAGEFLAG_NOOP(Mlocked)
+	TESTSCFLAG_FALSE(Mlocked) __TESTCLEARFLAG_FALSE(Mlocked)
 #endif
 
 #ifdef CONFIG_ARCH_USES_PG_UNCACHED
-- 
cgit v1.2.3


From 1a4dc5bc7cb5659a8004d105afeb0571126f8f56 Mon Sep 17 00:00:00 2001
From: Wang Nan <wangnan0@huawei.com>
Date: Wed, 6 Aug 2014 16:06:08 -0700
Subject: mem-hotplug: improve zone_movable_is_highmem logic

In original code, zone_movable_is_highmem() assumes ZONE_MOVABLE not
highmem if CONFIG_HAVE_MEMBLOCK_NODE_MAP is not set.  In online_pages,
it extracts pages from the previous zone before ZONE_MOVABLE.  Which is
logically inconsistent:

If HAVE_MEMBLOCK_NODE_MAP is turned off but HIGHMEM is on,
zone_movable_is_highmem() makes movable zone not highmem, but
online_pages() extracts pages from ZONE_HIGHMEM.

This inconsistency doesn't cause real problem currently, because all
architectures support online_pages also have HAVE_MEMBLOCK_NODE_MAP.
However, fixing it makes code clear, and also helps futher coding.

Signed-off-by: Wang Nan <wangnan0@huawei.com>
Cc: Zhang Zhen <zhangzhen@huawei.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Jiang Liu <liuj97@gmail.com>
Cc: Li Zefan <lizefan@huawei.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 6cbd1b6c3d20..559e659288fc 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -872,6 +872,8 @@ static inline int zone_movable_is_highmem(void)
 {
 #if defined(CONFIG_HIGHMEM) && defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP)
 	return movable_zone == ZONE_HIGHMEM;
+#elif defined(CONFIG_HIGHMEM)
+	return (ZONE_MOVABLE - 1) == ZONE_HIGHMEM;
 #else
 	return 0;
 #endif
-- 
cgit v1.2.3


From ef6b571fb8920d5006349a6e29ac47c4817e9691 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Wed, 6 Aug 2014 16:06:30 -0700
Subject: include/linux/mmdebug.h: add VM_WARN_ONCE()

It was missing...

Cc: Konstantin Khlebnikov <koct9i@gmail.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmdebug.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
index edd82a105220..2f348d02f640 100644
--- a/include/linux/mmdebug.h
+++ b/include/linux/mmdebug.h
@@ -20,11 +20,13 @@ extern void dump_page_badflags(struct page *page, const char *reason,
 	} while (0)
 #define VM_WARN_ON(cond) WARN_ON(cond)
 #define VM_WARN_ON_ONCE(cond) WARN_ON_ONCE(cond)
+#define VM_WARN_ONCE(cond, format...) WARN_ONCE(cond, format)
 #else
 #define VM_BUG_ON(cond) BUILD_BUG_ON_INVALID(cond)
 #define VM_BUG_ON_PAGE(cond, page) VM_BUG_ON(cond)
 #define VM_WARN_ON(cond) BUILD_BUG_ON_INVALID(cond)
 #define VM_WARN_ON_ONCE(cond) BUILD_BUG_ON_INVALID(cond)
+#define VM_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond)
 #endif
 
 #ifdef CONFIG_DEBUG_VIRTUAL
-- 
cgit v1.2.3


From eb39d618f9e80f81cfc5788cf1b252d141c2f0c3 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Wed, 6 Aug 2014 16:06:43 -0700
Subject: mm: replace init_page_accessed by __SetPageReferenced

Do we really need an exported alias for __SetPageReferenced()? Its
callers better know what they're doing, in which case the page would not
be already marked referenced.  Kill init_page_accessed(), just
__SetPageReferenced() inline.

Signed-off-by: Hugh Dickins <hughd@google.com>
Acked-by: Mel Gorman <mgorman@suse.de>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Prabhakar Lad <prabhakar.csengg@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h |  1 -
 mm/filemap.c         |  4 ++--
 mm/shmem.c           |  2 +-
 mm/swap.c            | 14 +++-----------
 4 files changed, 6 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 4bdbee80eede..1eb64043c076 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -311,7 +311,6 @@ extern void lru_add_page_tail(struct page *page, struct page *page_tail,
 			 struct lruvec *lruvec, struct list_head *head);
 extern void activate_page(struct page *);
 extern void mark_page_accessed(struct page *);
-extern void init_page_accessed(struct page *page);
 extern void lru_add_drain(void);
 extern void lru_add_drain_cpu(int cpu);
 extern void lru_add_drain_all(void);
diff --git a/mm/filemap.c b/mm/filemap.c
index 65d44fd88c78..7e85c8147e1b 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1091,9 +1091,9 @@ no_page:
 		if (WARN_ON_ONCE(!(fgp_flags & FGP_LOCK)))
 			fgp_flags |= FGP_LOCK;
 
-		/* Init accessed so avoit atomic mark_page_accessed later */
+		/* Init accessed so avoid atomic mark_page_accessed later */
 		if (fgp_flags & FGP_ACCESSED)
-			init_page_accessed(page);
+			__SetPageReferenced(page);
 
 		err = add_to_page_cache_lru(page, mapping, offset, radix_gfp_mask);
 		if (unlikely(err)) {
diff --git a/mm/shmem.c b/mm/shmem.c
index 57fd82a5af7a..fe15d96c3166 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1166,7 +1166,7 @@ repeat:
 		__SetPageSwapBacked(page);
 		__set_page_locked(page);
 		if (sgp == SGP_WRITE)
-			init_page_accessed(page);
+			__SetPageReferenced(page);
 
 		error = mem_cgroup_charge_file(page, current->mm,
 						gfp & GFP_RECLAIM_MASK);
diff --git a/mm/swap.c b/mm/swap.c
index 9e8e3472248b..d8eb4d09ffa2 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -589,6 +589,9 @@ static void __lru_cache_activate_page(struct page *page)
  * inactive,unreferenced	->	inactive,referenced
  * inactive,referenced		->	active,unreferenced
  * active,unreferenced		->	active,referenced
+ *
+ * When a newly allocated page is not yet visible, so safe for non-atomic ops,
+ * __SetPageReferenced(page) may be substituted for mark_page_accessed(page).
  */
 void mark_page_accessed(struct page *page)
 {
@@ -614,17 +617,6 @@ void mark_page_accessed(struct page *page)
 }
 EXPORT_SYMBOL(mark_page_accessed);
 
-/*
- * Used to mark_page_accessed(page) that is not visible yet and when it is
- * still safe to use non-atomic ops
- */
-void init_page_accessed(struct page *page)
-{
-	if (!PageReferenced(page))
-		__SetPageReferenced(page);
-}
-EXPORT_SYMBOL(init_page_accessed);
-
 static void __lru_cache_add(struct page *page)
 {
 	struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
-- 
cgit v1.2.3


From ed4d4902ebdd7ca8b5a51daaf6bebf4b172895cc Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Wed, 6 Aug 2014 16:06:54 -0700
Subject: mm, hugetlb: remove hugetlb_zero and hugetlb_infinity

They are unnecessary: "zero" can be used in place of "hugetlb_zero" and
passing extra2 == NULL is equivalent to infinity.

Signed-off-by: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Reviewed-by: Luiz Capitulino <lcapitulino@redhat.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h | 1 -
 kernel/sysctl.c         | 9 +++------
 mm/hugetlb.c            | 1 -
 3 files changed, 3 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index a23c096b3080..6e6d338641fe 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -87,7 +87,6 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud);
 #endif
 
 extern unsigned long hugepages_treat_as_movable;
-extern const unsigned long hugetlb_zero, hugetlb_infinity;
 extern int sysctl_hugetlb_shm_group;
 extern struct list_head huge_boot_pages;
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 75b22e22a72c..75875a741b5e 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1240,8 +1240,7 @@ static struct ctl_table vm_table[] = {
 		.maxlen		= sizeof(unsigned long),
 		.mode		= 0644,
 		.proc_handler	= hugetlb_sysctl_handler,
-		.extra1		= (void *)&hugetlb_zero,
-		.extra2		= (void *)&hugetlb_infinity,
+		.extra1		= &zero,
 	},
 #ifdef CONFIG_NUMA
 	{
@@ -1250,8 +1249,7 @@ static struct ctl_table vm_table[] = {
 		.maxlen         = sizeof(unsigned long),
 		.mode           = 0644,
 		.proc_handler   = &hugetlb_mempolicy_sysctl_handler,
-		.extra1		= (void *)&hugetlb_zero,
-		.extra2		= (void *)&hugetlb_infinity,
+		.extra1		= &zero,
 	},
 #endif
 	 {
@@ -1274,8 +1272,7 @@ static struct ctl_table vm_table[] = {
 		.maxlen		= sizeof(unsigned long),
 		.mode		= 0644,
 		.proc_handler	= hugetlb_overcommit_handler,
-		.extra1		= (void *)&hugetlb_zero,
-		.extra2		= (void *)&hugetlb_infinity,
+		.extra1		= &zero,
 	},
 #endif
 	{
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 7a0fcb33973e..d9ad93b55585 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -35,7 +35,6 @@
 #include <linux/node.h>
 #include "internal.h"
 
-const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL;
 unsigned long hugepages_treat_as_movable;
 
 int hugetlb_max_hstate __read_mostly;
-- 
cgit v1.2.3


From 21bda264f4243f61dfcc485174055f12ad0530b4 Mon Sep 17 00:00:00 2001
From: Jerome Marchand <jmarchan@redhat.com>
Date: Wed, 6 Aug 2014 16:06:56 -0700
Subject: mm: make copy_pte_range static again

Commit 71e3aac0724f ("thp: transparent hugepage core") adds
copy_pte_range prototype to huge_mm.h.  I'm not sure why (or if) this
function have been used outside of memory.c, but it currently isn't.
This patch makes copy_pte_range() static again.

Signed-off-by: Jerome Marchand <jmarchan@redhat.com>
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/huge_mm.h | 4 ----
 mm/memory.c             | 2 +-
 2 files changed, 1 insertion(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index b826239bdce0..63579cb8d3dc 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -93,10 +93,6 @@ extern bool is_vma_temporary_stack(struct vm_area_struct *vma);
 #endif /* CONFIG_DEBUG_VM */
 
 extern unsigned long transparent_hugepage_flags;
-extern int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
-			  pmd_t *dst_pmd, pmd_t *src_pmd,
-			  struct vm_area_struct *vma,
-			  unsigned long addr, unsigned long end);
 extern int split_huge_page_to_list(struct page *page, struct list_head *list);
 static inline int split_huge_page(struct page *page)
 {
diff --git a/mm/memory.c b/mm/memory.c
index 06ff0720d75a..01d0289f30a7 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -884,7 +884,7 @@ out_set_pte:
 	return 0;
 }
 
-int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 		   pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma,
 		   unsigned long addr, unsigned long end)
 {
-- 
cgit v1.2.3


From f6f8ed47353597dcb895eb4a15a28af657392e72 Mon Sep 17 00:00:00 2001
From: WANG Chao <chaowang@redhat.com>
Date: Wed, 6 Aug 2014 16:06:58 -0700
Subject: mm/vmalloc.c: clean up map_vm_area third argument

Currently map_vm_area() takes (struct page *** pages) as third argument,
and after mapping, it moves (*pages) to point to (*pages +
nr_mappped_pages).

It looks like this kind of increment is useless to its caller these
days.  The callers don't care about the increments and actually they're
trying to avoid this by passing another copy to map_vm_area().

The caller can always guarantee all the pages can be mapped into vm_area
as specified in first argument and the caller only cares about whether
map_vm_area() fails or not.

This patch cleans up the pointer movement in map_vm_area() and updates
its callers accordingly.

Signed-off-by: WANG Chao <chaowang@redhat.com>
Cc: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Nitin Gupta <ngupta@vflare.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/tile/kernel/module.c        |  2 +-
 drivers/lguest/core.c            |  7 ++-----
 drivers/staging/android/binder.c |  4 +---
 include/linux/vmalloc.h          |  2 +-
 mm/vmalloc.c                     | 14 +++++---------
 mm/zsmalloc.c                    |  2 +-
 6 files changed, 11 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/arch/tile/kernel/module.c b/arch/tile/kernel/module.c
index 4918d91bc3a6..d19b13e3a59f 100644
--- a/arch/tile/kernel/module.c
+++ b/arch/tile/kernel/module.c
@@ -58,7 +58,7 @@ void *module_alloc(unsigned long size)
 	area->nr_pages = npages;
 	area->pages = pages;
 
-	if (map_vm_area(area, prot_rwx, &pages)) {
+	if (map_vm_area(area, prot_rwx, pages)) {
 		vunmap(area->addr);
 		goto error;
 	}
diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c
index 0bf1e4edf04d..6590558d1d31 100644
--- a/drivers/lguest/core.c
+++ b/drivers/lguest/core.c
@@ -42,7 +42,6 @@ DEFINE_MUTEX(lguest_lock);
 static __init int map_switcher(void)
 {
 	int i, err;
-	struct page **pagep;
 
 	/*
 	 * Map the Switcher in to high memory.
@@ -110,11 +109,9 @@ static __init int map_switcher(void)
 	 * This code actually sets up the pages we've allocated to appear at
 	 * switcher_addr.  map_vm_area() takes the vma we allocated above, the
 	 * kind of pages we're mapping (kernel pages), and a pointer to our
-	 * array of struct pages.  It increments that pointer, but we don't
-	 * care.
+	 * array of struct pages.
 	 */
-	pagep = lg_switcher_pages;
-	err = map_vm_area(switcher_vma, PAGE_KERNEL_EXEC, &pagep);
+	err = map_vm_area(switcher_vma, PAGE_KERNEL_EXEC, lg_switcher_pages);
 	if (err) {
 		printk("lguest: map_vm_area failed: %i\n", err);
 		goto free_vma;
diff --git a/drivers/staging/android/binder.c b/drivers/staging/android/binder.c
index 02b0379ae550..4f34dc0095b5 100644
--- a/drivers/staging/android/binder.c
+++ b/drivers/staging/android/binder.c
@@ -585,7 +585,6 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate,
 
 	for (page_addr = start; page_addr < end; page_addr += PAGE_SIZE) {
 		int ret;
-		struct page **page_array_ptr;
 
 		page = &proc->pages[(page_addr - proc->buffer) / PAGE_SIZE];
 
@@ -598,8 +597,7 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate,
 		}
 		tmp_area.addr = page_addr;
 		tmp_area.size = PAGE_SIZE + PAGE_SIZE /* guard page? */;
-		page_array_ptr = page;
-		ret = map_vm_area(&tmp_area, PAGE_KERNEL, &page_array_ptr);
+		ret = map_vm_area(&tmp_area, PAGE_KERNEL, page);
 		if (ret) {
 			pr_err("%d: binder_alloc_buf failed to map page at %p in kernel\n",
 			       proc->pid, page_addr);
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 4b8a89189a29..b87696fdf06a 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -113,7 +113,7 @@ extern struct vm_struct *remove_vm_area(const void *addr);
 extern struct vm_struct *find_vm_area(const void *addr);
 
 extern int map_vm_area(struct vm_struct *area, pgprot_t prot,
-			struct page ***pages);
+			struct page **pages);
 #ifdef CONFIG_MMU
 extern int map_kernel_range_noflush(unsigned long start, unsigned long size,
 				    pgprot_t prot, struct page **pages);
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 9ec4173f48a8..2b0aa5486092 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1270,19 +1270,15 @@ void unmap_kernel_range(unsigned long addr, unsigned long size)
 }
 EXPORT_SYMBOL_GPL(unmap_kernel_range);
 
-int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages)
+int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page **pages)
 {
 	unsigned long addr = (unsigned long)area->addr;
 	unsigned long end = addr + get_vm_area_size(area);
 	int err;
 
-	err = vmap_page_range(addr, end, prot, *pages);
-	if (err > 0) {
-		*pages += err;
-		err = 0;
-	}
+	err = vmap_page_range(addr, end, prot, pages);
 
-	return err;
+	return err > 0 ? 0 : err;
 }
 EXPORT_SYMBOL_GPL(map_vm_area);
 
@@ -1548,7 +1544,7 @@ void *vmap(struct page **pages, unsigned int count,
 	if (!area)
 		return NULL;
 
-	if (map_vm_area(area, prot, &pages)) {
+	if (map_vm_area(area, prot, pages)) {
 		vunmap(area->addr);
 		return NULL;
 	}
@@ -1606,7 +1602,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 			cond_resched();
 	}
 
-	if (map_vm_area(area, prot, &pages))
+	if (map_vm_area(area, prot, pages))
 		goto fail;
 	return area->addr;
 
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index fe78189624cf..bb62a4adc328 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -690,7 +690,7 @@ static inline void __zs_cpu_down(struct mapping_area *area)
 static inline void *__zs_map_object(struct mapping_area *area,
 				struct page *pages[2], int off, int size)
 {
-	BUG_ON(map_vm_area(area->vm, PAGE_KERNEL, &pages));
+	BUG_ON(map_vm_area(area->vm, PAGE_KERNEL, pages));
 	area->vm_addr = area->vm->addr;
 	return area->vm_addr + off;
 }
-- 
cgit v1.2.3


From 3484b2de9499df23c4604a513b36f96326ae81ad Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Wed, 6 Aug 2014 16:07:14 -0700
Subject: mm: rearrange zone fields into read-only, page alloc, statistics and
 page reclaim lines

The arrangement of struct zone has changed over time and now it has
reached the point where there is some inappropriate sharing going on.
On x86-64 for example

o The zone->node field is shared with the zone lock and zone->node is
  accessed frequently from the page allocator due to the fair zone
  allocation policy.

o span_seqlock is almost never used by shares a line with free_area

o Some zone statistics share a cache line with the LRU lock so
  reclaim-intensive and allocator-intensive workloads can bounce the cache
  line on a stat update

This patch rearranges struct zone to put read-only and read-mostly
fields together and then splits the page allocator intensive fields, the
zone statistics and the page reclaim intensive fields into their own
cache lines.  Note that the type of lowmem_reserve changes due to the
watermark calculations being signed and avoiding a signed/unsigned
conversion there.

On the test configuration I used the overall size of struct zone shrunk
by one cache line.  On smaller machines, this is not likely to be
noticable.  However, on a 4-node NUMA machine running tiobench the
system CPU overhead is reduced by this patch.

          3.16.0-rc3  3.16.0-rc3
             vanillarearrange-v5r9
User          746.94      759.78
System      65336.22    58350.98
Elapsed     27553.52    27282.02

Signed-off-by: Mel Gorman <mgorman@suse.de>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 211 +++++++++++++++++++++++++------------------------
 mm/page_alloc.c        |   7 +-
 mm/vmstat.c            |   4 +-
 3 files changed, 113 insertions(+), 109 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 559e659288fc..ed0876bb902c 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -324,18 +324,11 @@ enum zone_type {
 #ifndef __GENERATING_BOUNDS_H
 
 struct zone {
-	/* Fields commonly accessed by the page allocator */
+	/* Read-mostly fields */
 
 	/* zone watermarks, access with *_wmark_pages(zone) macros */
 	unsigned long watermark[NR_WMARK];
 
-	/*
-	 * When free pages are below this point, additional steps are taken
-	 * when reading the number of free pages to avoid per-cpu counter
-	 * drift allowing watermarks to be breached
-	 */
-	unsigned long percpu_drift_mark;
-
 	/*
 	 * We don't know if the memory that we're going to allocate will be freeable
 	 * or/and it will be released eventually, so to avoid totally wasting several
@@ -344,41 +337,26 @@ struct zone {
 	 * on the higher zones). This array is recalculated at runtime if the
 	 * sysctl_lowmem_reserve_ratio sysctl changes.
 	 */
-	unsigned long		lowmem_reserve[MAX_NR_ZONES];
-
-	/*
-	 * This is a per-zone reserve of pages that should not be
-	 * considered dirtyable memory.
-	 */
-	unsigned long		dirty_balance_reserve;
+	long lowmem_reserve[MAX_NR_ZONES];
 
 #ifdef CONFIG_NUMA
 	int node;
+#endif
+
 	/*
-	 * zone reclaim becomes active if more unmapped pages exist.
+	 * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on
+	 * this zone's LRU.  Maintained by the pageout code.
 	 */
-	unsigned long		min_unmapped_pages;
-	unsigned long		min_slab_pages;
-#endif
+	unsigned int inactive_ratio;
+
+	struct pglist_data	*zone_pgdat;
 	struct per_cpu_pageset __percpu *pageset;
+
 	/*
-	 * free areas of different sizes
+	 * This is a per-zone reserve of pages that should not be
+	 * considered dirtyable memory.
 	 */
-	spinlock_t		lock;
-#if defined CONFIG_COMPACTION || defined CONFIG_CMA
-	/* Set to true when the PG_migrate_skip bits should be cleared */
-	bool			compact_blockskip_flush;
-
-	/* pfn where compaction free scanner should start */
-	unsigned long		compact_cached_free_pfn;
-	/* pfn where async and sync compaction migration scanner should start */
-	unsigned long		compact_cached_migrate_pfn[2];
-#endif
-#ifdef CONFIG_MEMORY_HOTPLUG
-	/* see spanned/present_pages for more description */
-	seqlock_t		span_seqlock;
-#endif
-	struct free_area	free_area[MAX_ORDER];
+	unsigned long		dirty_balance_reserve;
 
 #ifndef CONFIG_SPARSEMEM
 	/*
@@ -388,74 +366,14 @@ struct zone {
 	unsigned long		*pageblock_flags;
 #endif /* CONFIG_SPARSEMEM */
 
-#ifdef CONFIG_COMPACTION
-	/*
-	 * On compaction failure, 1<<compact_defer_shift compactions
-	 * are skipped before trying again. The number attempted since
-	 * last failure is tracked with compact_considered.
-	 */
-	unsigned int		compact_considered;
-	unsigned int		compact_defer_shift;
-	int			compact_order_failed;
-#endif
-
-	ZONE_PADDING(_pad1_)
-
-	/* Fields commonly accessed by the page reclaim scanner */
-	spinlock_t		lru_lock;
-	struct lruvec		lruvec;
-
-	/* Evictions & activations on the inactive file list */
-	atomic_long_t		inactive_age;
-
-	unsigned long		pages_scanned;	   /* since last reclaim */
-	unsigned long		flags;		   /* zone flags, see below */
-
-	/* Zone statistics */
-	atomic_long_t		vm_stat[NR_VM_ZONE_STAT_ITEMS];
-
-	/*
-	 * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on
-	 * this zone's LRU.  Maintained by the pageout code.
-	 */
-	unsigned int inactive_ratio;
-
-
-	ZONE_PADDING(_pad2_)
-	/* Rarely used or read-mostly fields */
-
+#ifdef CONFIG_NUMA
 	/*
-	 * wait_table		-- the array holding the hash table
-	 * wait_table_hash_nr_entries	-- the size of the hash table array
-	 * wait_table_bits	-- wait_table_size == (1 << wait_table_bits)
-	 *
-	 * The purpose of all these is to keep track of the people
-	 * waiting for a page to become available and make them
-	 * runnable again when possible. The trouble is that this
-	 * consumes a lot of space, especially when so few things
-	 * wait on pages at a given time. So instead of using
-	 * per-page waitqueues, we use a waitqueue hash table.
-	 *
-	 * The bucket discipline is to sleep on the same queue when
-	 * colliding and wake all in that wait queue when removing.
-	 * When something wakes, it must check to be sure its page is
-	 * truly available, a la thundering herd. The cost of a
-	 * collision is great, but given the expected load of the
-	 * table, they should be so rare as to be outweighed by the
-	 * benefits from the saved space.
-	 *
-	 * __wait_on_page_locked() and unlock_page() in mm/filemap.c, are the
-	 * primary users of these fields, and in mm/page_alloc.c
-	 * free_area_init_core() performs the initialization of them.
+	 * zone reclaim becomes active if more unmapped pages exist.
 	 */
-	wait_queue_head_t	* wait_table;
-	unsigned long		wait_table_hash_nr_entries;
-	unsigned long		wait_table_bits;
+	unsigned long		min_unmapped_pages;
+	unsigned long		min_slab_pages;
+#endif /* CONFIG_NUMA */
 
-	/*
-	 * Discontig memory support fields.
-	 */
-	struct pglist_data	*zone_pgdat;
 	/* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */
 	unsigned long		zone_start_pfn;
 
@@ -500,9 +418,11 @@ struct zone {
 	 * adjust_managed_page_count() should be used instead of directly
 	 * touching zone->managed_pages and totalram_pages.
 	 */
+	unsigned long		managed_pages;
 	unsigned long		spanned_pages;
 	unsigned long		present_pages;
-	unsigned long		managed_pages;
+
+	const char		*name;
 
 	/*
 	 * Number of MIGRATE_RESEVE page block. To maintain for just
@@ -510,10 +430,95 @@ struct zone {
 	 */
 	int			nr_migrate_reserve_block;
 
+#ifdef CONFIG_MEMORY_HOTPLUG
+	/* see spanned/present_pages for more description */
+	seqlock_t		span_seqlock;
+#endif
+
 	/*
-	 * rarely used fields:
+	 * wait_table		-- the array holding the hash table
+	 * wait_table_hash_nr_entries	-- the size of the hash table array
+	 * wait_table_bits	-- wait_table_size == (1 << wait_table_bits)
+	 *
+	 * The purpose of all these is to keep track of the people
+	 * waiting for a page to become available and make them
+	 * runnable again when possible. The trouble is that this
+	 * consumes a lot of space, especially when so few things
+	 * wait on pages at a given time. So instead of using
+	 * per-page waitqueues, we use a waitqueue hash table.
+	 *
+	 * The bucket discipline is to sleep on the same queue when
+	 * colliding and wake all in that wait queue when removing.
+	 * When something wakes, it must check to be sure its page is
+	 * truly available, a la thundering herd. The cost of a
+	 * collision is great, but given the expected load of the
+	 * table, they should be so rare as to be outweighed by the
+	 * benefits from the saved space.
+	 *
+	 * __wait_on_page_locked() and unlock_page() in mm/filemap.c, are the
+	 * primary users of these fields, and in mm/page_alloc.c
+	 * free_area_init_core() performs the initialization of them.
 	 */
-	const char		*name;
+	wait_queue_head_t	*wait_table;
+	unsigned long		wait_table_hash_nr_entries;
+	unsigned long		wait_table_bits;
+
+	ZONE_PADDING(_pad1_)
+
+	/* Write-intensive fields used from the page allocator */
+	spinlock_t		lock;
+
+	/* free areas of different sizes */
+	struct free_area	free_area[MAX_ORDER];
+
+	/* zone flags, see below */
+	unsigned long		flags;
+
+	ZONE_PADDING(_pad2_)
+
+	/* Write-intensive fields used by page reclaim */
+
+	/* Fields commonly accessed by the page reclaim scanner */
+	spinlock_t		lru_lock;
+	unsigned long		pages_scanned;	   /* since last reclaim */
+	struct lruvec		lruvec;
+
+	/* Evictions & activations on the inactive file list */
+	atomic_long_t		inactive_age;
+
+	/*
+	 * When free pages are below this point, additional steps are taken
+	 * when reading the number of free pages to avoid per-cpu counter
+	 * drift allowing watermarks to be breached
+	 */
+	unsigned long percpu_drift_mark;
+
+#if defined CONFIG_COMPACTION || defined CONFIG_CMA
+	/* pfn where compaction free scanner should start */
+	unsigned long		compact_cached_free_pfn;
+	/* pfn where async and sync compaction migration scanner should start */
+	unsigned long		compact_cached_migrate_pfn[2];
+#endif
+
+#ifdef CONFIG_COMPACTION
+	/*
+	 * On compaction failure, 1<<compact_defer_shift compactions
+	 * are skipped before trying again. The number attempted since
+	 * last failure is tracked with compact_considered.
+	 */
+	unsigned int		compact_considered;
+	unsigned int		compact_defer_shift;
+	int			compact_order_failed;
+#endif
+
+#if defined CONFIG_COMPACTION || defined CONFIG_CMA
+	/* Set to true when the PG_migrate_skip bits should be cleared */
+	bool			compact_blockskip_flush;
+#endif
+
+	ZONE_PADDING(_pad3_)
+	/* Zone statistics */
+	atomic_long_t		vm_stat[NR_VM_ZONE_STAT_ITEMS];
 } ____cacheline_internodealigned_in_smp;
 
 typedef enum {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 0987ac9f0a4e..b7381d11f021 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1708,7 +1708,6 @@ static bool __zone_watermark_ok(struct zone *z, unsigned int order,
 {
 	/* free_pages my go negative - that's OK */
 	long min = mark;
-	long lowmem_reserve = z->lowmem_reserve[classzone_idx];
 	int o;
 	long free_cma = 0;
 
@@ -1723,7 +1722,7 @@ static bool __zone_watermark_ok(struct zone *z, unsigned int order,
 		free_cma = zone_page_state(z, NR_FREE_CMA_PAGES);
 #endif
 
-	if (free_pages - free_cma <= min + lowmem_reserve)
+	if (free_pages - free_cma <= min + z->lowmem_reserve[classzone_idx])
 		return false;
 	for (o = 0; o < order; o++) {
 		/* At the next order, this order's pages become unavailable */
@@ -3254,7 +3253,7 @@ void show_free_areas(unsigned int filter)
 			);
 		printk("lowmem_reserve[]:");
 		for (i = 0; i < MAX_NR_ZONES; i++)
-			printk(" %lu", zone->lowmem_reserve[i]);
+			printk(" %ld", zone->lowmem_reserve[i]);
 		printk("\n");
 	}
 
@@ -5575,7 +5574,7 @@ static void calculate_totalreserve_pages(void)
 	for_each_online_pgdat(pgdat) {
 		for (i = 0; i < MAX_NR_ZONES; i++) {
 			struct zone *zone = pgdat->node_zones + i;
-			unsigned long max = 0;
+			long max = 0;
 
 			/* Find valid and maximum lowmem_reserve in the zone */
 			for (j = i; j < MAX_NR_ZONES; j++) {
diff --git a/mm/vmstat.c b/mm/vmstat.c
index b37bd49bfd55..8267f77d1875 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1077,10 +1077,10 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
 				zone_page_state(zone, i));
 
 	seq_printf(m,
-		   "\n        protection: (%lu",
+		   "\n        protection: (%ld",
 		   zone->lowmem_reserve[0]);
 	for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++)
-		seq_printf(m, ", %lu", zone->lowmem_reserve[i]);
+		seq_printf(m, ", %ld", zone->lowmem_reserve[i]);
 	seq_printf(m,
 		   ")"
 		   "\n  pagesets");
-- 
cgit v1.2.3


From 0d5d823ab4e608ec7b52ac4410de4cb74bbe0edd Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Wed, 6 Aug 2014 16:07:16 -0700
Subject: mm: move zone->pages_scanned into a vmstat counter

zone->pages_scanned is a write-intensive cache line during page reclaim
and it's also updated during page free.  Move the counter into vmstat to
take advantage of the per-cpu updates and do not update it in the free
paths unless necessary.

On a small UMA machine running tiobench the difference is marginal.  On
a 4-node machine the overhead is more noticable.  Note that automatic
NUMA balancing was disabled for this test as otherwise the system CPU
overhead is unpredictable.

          3.16.0-rc3  3.16.0-rc3  3.16.0-rc3
             vanillarearrange-v5   vmstat-v5
User          746.94      759.78      774.56
System      65336.22    58350.98    32847.27
Elapsed     27553.52    27282.02    27415.04

Note that the overhead reduction will vary depending on where exactly
pages are allocated and freed.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h |  2 +-
 mm/page_alloc.c        | 12 +++++++++---
 mm/vmscan.c            |  7 ++++---
 mm/vmstat.c            |  3 ++-
 4 files changed, 16 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index ed0876bb902c..0bd77f730b38 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -143,6 +143,7 @@ enum zone_stat_item {
 	NR_SHMEM,		/* shmem pages (included tmpfs/GEM pages) */
 	NR_DIRTIED,		/* page dirtyings since bootup */
 	NR_WRITTEN,		/* page writings since bootup */
+	NR_PAGES_SCANNED,	/* pages scanned since last reclaim */
 #ifdef CONFIG_NUMA
 	NUMA_HIT,		/* allocated in intended node */
 	NUMA_MISS,		/* allocated in non intended node */
@@ -480,7 +481,6 @@ struct zone {
 
 	/* Fields commonly accessed by the page reclaim scanner */
 	spinlock_t		lru_lock;
-	unsigned long		pages_scanned;	   /* since last reclaim */
 	struct lruvec		lruvec;
 
 	/* Evictions & activations on the inactive file list */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index b7381d11f021..daa016063793 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -680,9 +680,12 @@ static void free_pcppages_bulk(struct zone *zone, int count,
 	int migratetype = 0;
 	int batch_free = 0;
 	int to_free = count;
+	unsigned long nr_scanned;
 
 	spin_lock(&zone->lock);
-	zone->pages_scanned = 0;
+	nr_scanned = zone_page_state(zone, NR_PAGES_SCANNED);
+	if (nr_scanned)
+		__mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned);
 
 	while (to_free) {
 		struct page *page;
@@ -731,8 +734,11 @@ static void free_one_page(struct zone *zone,
 				unsigned int order,
 				int migratetype)
 {
+	unsigned long nr_scanned;
 	spin_lock(&zone->lock);
-	zone->pages_scanned = 0;
+	nr_scanned = zone_page_state(zone, NR_PAGES_SCANNED);
+	if (nr_scanned)
+		__mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned);
 
 	__free_one_page(page, pfn, zone, order, migratetype);
 	if (unlikely(!is_migrate_isolate(migratetype)))
@@ -3248,7 +3254,7 @@ void show_free_areas(unsigned int filter)
 			K(zone_page_state(zone, NR_BOUNCE)),
 			K(zone_page_state(zone, NR_FREE_CMA_PAGES)),
 			K(zone_page_state(zone, NR_WRITEBACK_TEMP)),
-			zone->pages_scanned,
+			K(zone_page_state(zone, NR_PAGES_SCANNED)),
 			(!zone_reclaimable(zone) ? "yes" : "no")
 			);
 		printk("lowmem_reserve[]:");
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 5fec1ba9951f..9c8222b499b4 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -174,7 +174,8 @@ static unsigned long zone_reclaimable_pages(struct zone *zone)
 
 bool zone_reclaimable(struct zone *zone)
 {
-	return zone->pages_scanned < zone_reclaimable_pages(zone) * 6;
+	return zone_page_state(zone, NR_PAGES_SCANNED) <
+		zone_reclaimable_pages(zone) * 6;
 }
 
 static unsigned long get_lru_size(struct lruvec *lruvec, enum lru_list lru)
@@ -1508,7 +1509,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
 	__mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken);
 
 	if (global_reclaim(sc)) {
-		zone->pages_scanned += nr_scanned;
+		__mod_zone_page_state(zone, NR_PAGES_SCANNED, nr_scanned);
 		if (current_is_kswapd())
 			__count_zone_vm_events(PGSCAN_KSWAPD, zone, nr_scanned);
 		else
@@ -1698,7 +1699,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
 	nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &l_hold,
 				     &nr_scanned, sc, isolate_mode, lru);
 	if (global_reclaim(sc))
-		zone->pages_scanned += nr_scanned;
+		__mod_zone_page_state(zone, NR_PAGES_SCANNED, nr_scanned);
 
 	reclaim_stat->recent_scanned[file] += nr_taken;
 
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 8267f77d1875..e574e883fa70 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -763,6 +763,7 @@ const char * const vmstat_text[] = {
 	"nr_shmem",
 	"nr_dirtied",
 	"nr_written",
+	"nr_pages_scanned",
 
 #ifdef CONFIG_NUMA
 	"numa_hit",
@@ -1067,7 +1068,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
 		   min_wmark_pages(zone),
 		   low_wmark_pages(zone),
 		   high_wmark_pages(zone),
-		   zone->pages_scanned,
+		   zone_page_state(zone, NR_PAGES_SCANNED),
 		   zone->spanned_pages,
 		   zone->present_pages,
 		   zone->managed_pages);
-- 
cgit v1.2.3


From 4ffeaf3560a52b4a69cc7909873d08c0ef5909d4 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Wed, 6 Aug 2014 16:07:22 -0700
Subject: mm: page_alloc: reduce cost of the fair zone allocation policy

The fair zone allocation policy round-robins allocations between zones
within a node to avoid age inversion problems during reclaim.  If the
first allocation fails, the batch counts are reset and a second attempt
made before entering the slow path.

One assumption made with this scheme is that batches expire at roughly
the same time and the resets each time are justified.  This assumption
does not hold when zones reach their low watermark as the batches will
be consumed at uneven rates.  Allocation failure due to watermark
depletion result in additional zonelist scans for the reset and another
watermark check before hitting the slowpath.

On UMA, the benefit is negligible -- around 0.25%.  On 4-socket NUMA
machine it's variable due to the variability of measuring overhead with
the vmstat changes.  The system CPU overhead comparison looks like

          3.16.0-rc3  3.16.0-rc3  3.16.0-rc3
             vanilla   vmstat-v5 lowercost-v5
User          746.94      774.56      802.00
System      65336.22    32847.27    40852.33
Elapsed     27553.52    27415.04    27368.46

However it is worth noting that the overall benchmark still completed
faster and intuitively it makes sense to take as few passes as possible
through the zonelists.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h |   6 +++
 mm/page_alloc.c        | 101 ++++++++++++++++++++++++++-----------------------
 2 files changed, 59 insertions(+), 48 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 0bd77f730b38..318df7051850 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -534,6 +534,7 @@ typedef enum {
 	ZONE_WRITEBACK,			/* reclaim scanning has recently found
 					 * many pages under writeback
 					 */
+	ZONE_FAIR_DEPLETED,		/* fair zone policy batch depleted */
 } zone_flags_t;
 
 static inline void zone_set_flag(struct zone *zone, zone_flags_t flag)
@@ -571,6 +572,11 @@ static inline int zone_is_reclaim_locked(const struct zone *zone)
 	return test_bit(ZONE_RECLAIM_LOCKED, &zone->flags);
 }
 
+static inline int zone_is_fair_depleted(const struct zone *zone)
+{
+	return test_bit(ZONE_FAIR_DEPLETED, &zone->flags);
+}
+
 static inline int zone_is_oom_locked(const struct zone *zone)
 {
 	return test_bit(ZONE_OOM_LOCKED, &zone->flags);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6e5e8f762532..fb9908148474 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1612,6 +1612,9 @@ again:
 	}
 
 	__mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order));
+	if (zone_page_state(zone, NR_ALLOC_BATCH) == 0 &&
+	    !zone_is_fair_depleted(zone))
+		zone_set_flag(zone, ZONE_FAIR_DEPLETED);
 
 	__count_zone_vm_events(PGALLOC, zone, 1 << order);
 	zone_statistics(preferred_zone, zone, gfp_flags);
@@ -1923,6 +1926,18 @@ static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone)
 
 #endif	/* CONFIG_NUMA */
 
+static void reset_alloc_batches(struct zone *preferred_zone)
+{
+	struct zone *zone = preferred_zone->zone_pgdat->node_zones;
+
+	do {
+		mod_zone_page_state(zone, NR_ALLOC_BATCH,
+			high_wmark_pages(zone) - low_wmark_pages(zone) -
+			atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]));
+		zone_clear_flag(zone, ZONE_FAIR_DEPLETED);
+	} while (zone++ != preferred_zone);
+}
+
 /*
  * get_page_from_freelist goes through the zonelist trying to allocate
  * a page.
@@ -1940,8 +1955,12 @@ get_page_from_freelist(gfp_t gfp_mask, nodemask_t *nodemask, unsigned int order,
 	int did_zlc_setup = 0;		/* just call zlc_setup() one time */
 	bool consider_zone_dirty = (alloc_flags & ALLOC_WMARK_LOW) &&
 				(gfp_mask & __GFP_WRITE);
+	int nr_fair_skipped = 0;
+	bool zonelist_rescan;
 
 zonelist_scan:
+	zonelist_rescan = false;
+
 	/*
 	 * Scan zonelist, looking for a zone with enough free.
 	 * See also __cpuset_node_allowed_softwall() comment in kernel/cpuset.c.
@@ -1966,8 +1985,10 @@ zonelist_scan:
 		if (alloc_flags & ALLOC_FAIR) {
 			if (!zone_local(preferred_zone, zone))
 				break;
-			if (zone_page_state(zone, NR_ALLOC_BATCH) <= 0)
+			if (zone_is_fair_depleted(zone)) {
+				nr_fair_skipped++;
 				continue;
+			}
 		}
 		/*
 		 * When allocating a page cache page for writing, we
@@ -2073,13 +2094,7 @@ this_zone_full:
 			zlc_mark_zone_full(zonelist, z);
 	}
 
-	if (unlikely(IS_ENABLED(CONFIG_NUMA) && page == NULL && zlc_active)) {
-		/* Disable zlc cache for second zonelist scan */
-		zlc_active = 0;
-		goto zonelist_scan;
-	}
-
-	if (page)
+	if (page) {
 		/*
 		 * page->pfmemalloc is set when ALLOC_NO_WATERMARKS was
 		 * necessary to allocate the page. The expectation is
@@ -2088,8 +2103,37 @@ this_zone_full:
 		 * for !PFMEMALLOC purposes.
 		 */
 		page->pfmemalloc = !!(alloc_flags & ALLOC_NO_WATERMARKS);
+		return page;
+	}
 
-	return page;
+	/*
+	 * The first pass makes sure allocations are spread fairly within the
+	 * local node.  However, the local node might have free pages left
+	 * after the fairness batches are exhausted, and remote zones haven't
+	 * even been considered yet.  Try once more without fairness, and
+	 * include remote zones now, before entering the slowpath and waking
+	 * kswapd: prefer spilling to a remote zone over swapping locally.
+	 */
+	if (alloc_flags & ALLOC_FAIR) {
+		alloc_flags &= ~ALLOC_FAIR;
+		if (nr_fair_skipped) {
+			zonelist_rescan = true;
+			reset_alloc_batches(preferred_zone);
+		}
+		if (nr_online_nodes > 1)
+			zonelist_rescan = true;
+	}
+
+	if (unlikely(IS_ENABLED(CONFIG_NUMA) && zlc_active)) {
+		/* Disable zlc cache for second zonelist scan */
+		zlc_active = 0;
+		zonelist_rescan = true;
+	}
+
+	if (zonelist_rescan)
+		goto zonelist_scan;
+
+	return NULL;
 }
 
 /*
@@ -2410,28 +2454,6 @@ __alloc_pages_high_priority(gfp_t gfp_mask, unsigned int order,
 	return page;
 }
 
-static void reset_alloc_batches(struct zonelist *zonelist,
-				enum zone_type high_zoneidx,
-				struct zone *preferred_zone)
-{
-	struct zoneref *z;
-	struct zone *zone;
-
-	for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
-		/*
-		 * Only reset the batches of zones that were actually
-		 * considered in the fairness pass, we don't want to
-		 * trash fairness information for zones that are not
-		 * actually part of this zonelist's round-robin cycle.
-		 */
-		if (!zone_local(preferred_zone, zone))
-			continue;
-		mod_zone_page_state(zone, NR_ALLOC_BATCH,
-			high_wmark_pages(zone) - low_wmark_pages(zone) -
-			atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]));
-	}
-}
-
 static void wake_all_kswapds(unsigned int order,
 			     struct zonelist *zonelist,
 			     enum zone_type high_zoneidx,
@@ -2767,28 +2789,11 @@ retry_cpuset:
 	if (allocflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE)
 		alloc_flags |= ALLOC_CMA;
 #endif
-retry:
 	/* First allocation attempt */
 	page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order,
 			zonelist, high_zoneidx, alloc_flags,
 			preferred_zone, classzone_idx, migratetype);
 	if (unlikely(!page)) {
-		/*
-		 * The first pass makes sure allocations are spread
-		 * fairly within the local node.  However, the local
-		 * node might have free pages left after the fairness
-		 * batches are exhausted, and remote zones haven't
-		 * even been considered yet.  Try once more without
-		 * fairness, and include remote zones now, before
-		 * entering the slowpath and waking kswapd: prefer
-		 * spilling to a remote zone over swapping locally.
-		 */
-		if (alloc_flags & ALLOC_FAIR) {
-			reset_alloc_batches(zonelist, high_zoneidx,
-					    preferred_zone);
-			alloc_flags &= ~ALLOC_FAIR;
-			goto retry;
-		}
 		/*
 		 * Runtime PM, block IO and its error handling path
 		 * can deadlock because I/O on the device might not
-- 
cgit v1.2.3


From 9a95f3cf7b33d66fa64727cff8cd2f2a9d09f335 Mon Sep 17 00:00:00 2001
From: Paul Cassella <cassella@cray.com>
Date: Wed, 6 Aug 2014 16:07:24 -0700
Subject: mm: describe mmap_sem rules for __lock_page_or_retry() and callers

Add a comment describing the circumstances in which
__lock_page_or_retry() will or will not release the mmap_sem when
returning 0.

Add comments to lock_page_or_retry()'s callers (filemap_fault(),
do_swap_page()) noting the impact on VM_FAULT_RETRY returns.

Add comments on up the call tree, particularly replacing the false "We
return with mmap_sem still held" comments.

Signed-off-by: Paul Cassella <cassella@cray.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/mm/fault.c     |  3 ++-
 include/linux/pagemap.h |  3 +++
 mm/filemap.c            | 23 +++++++++++++++++++++++
 mm/gup.c                | 18 +++++++++++++++---
 mm/memory.c             | 34 +++++++++++++++++++++++++++++++---
 mm/mlock.c              |  9 ++++++++-
 6 files changed, 82 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 1dbade870f90..a24194681513 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1218,7 +1218,8 @@ good_area:
 	/*
 	 * If for any reason at all we couldn't handle the fault,
 	 * make sure we exit gracefully rather than endlessly redo
-	 * the fault:
+	 * the fault.  Since we never set FAULT_FLAG_RETRY_NOWAIT, if
+	 * we get VM_FAULT_RETRY back, the mmap_sem has been unlocked.
 	 */
 	fault = handle_mm_fault(mm, vma, address, flags);
 
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index e1474ae18c88..3df8c7db7a4e 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -484,6 +484,9 @@ static inline int lock_page_killable(struct page *page)
 /*
  * lock_page_or_retry - Lock the page, unless this would block and the
  * caller indicated that it can handle a retry.
+ *
+ * Return value and mmap_sem implications depend on flags; see
+ * __lock_page_or_retry().
  */
 static inline int lock_page_or_retry(struct page *page, struct mm_struct *mm,
 				     unsigned int flags)
diff --git a/mm/filemap.c b/mm/filemap.c
index 7e85c8147e1b..af19a6b079f5 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -808,6 +808,17 @@ int __lock_page_killable(struct page *page)
 }
 EXPORT_SYMBOL_GPL(__lock_page_killable);
 
+/*
+ * Return values:
+ * 1 - page is locked; mmap_sem is still held.
+ * 0 - page is not locked.
+ *     mmap_sem has been released (up_read()), unless flags had both
+ *     FAULT_FLAG_ALLOW_RETRY and FAULT_FLAG_RETRY_NOWAIT set, in
+ *     which case mmap_sem is still held.
+ *
+ * If neither ALLOW_RETRY nor KILLABLE are set, will always return 1
+ * with the page locked and the mmap_sem unperturbed.
+ */
 int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
 			 unsigned int flags)
 {
@@ -1827,6 +1838,18 @@ static void do_async_mmap_readahead(struct vm_area_struct *vma,
  * The goto's are kind of ugly, but this streamlines the normal case of having
  * it in the page cache, and handles the special cases reasonably without
  * having a lot of duplicated code.
+ *
+ * vma->vm_mm->mmap_sem must be held on entry.
+ *
+ * If our return value has VM_FAULT_RETRY set, it's because
+ * lock_page_or_retry() returned 0.
+ * The mmap_sem has usually been released in this case.
+ * See __lock_page_or_retry() for the exception.
+ *
+ * If our return value does not have VM_FAULT_RETRY set, the mmap_sem
+ * has not been released.
+ *
+ * We never return with VM_FAULT_RETRY and a bit from VM_FAULT_ERROR set.
  */
 int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
diff --git a/mm/gup.c b/mm/gup.c
index cc5a9e7adea7..91d044b1600d 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -258,6 +258,11 @@ unmap:
 	return ret;
 }
 
+/*
+ * mmap_sem must be held on entry.  If @nonblocking != NULL and
+ * *@flags does not include FOLL_NOWAIT, the mmap_sem may be released.
+ * If it is, *@nonblocking will be set to 0 and -EBUSY returned.
+ */
 static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
 		unsigned long address, unsigned int *flags, int *nonblocking)
 {
@@ -373,7 +378,7 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
  * with a put_page() call when it is finished with. vmas will only
  * remain valid while mmap_sem is held.
  *
- * Must be called with mmap_sem held for read or write.
+ * Must be called with mmap_sem held.  It may be released.  See below.
  *
  * __get_user_pages walks a process's page tables and takes a reference to
  * each struct page that each user address corresponds to at a given
@@ -396,7 +401,14 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
  *
  * If @nonblocking != NULL, __get_user_pages will not wait for disk IO
  * or mmap_sem contention, and if waiting is needed to pin all pages,
- * *@nonblocking will be set to 0.
+ * *@nonblocking will be set to 0.  Further, if @gup_flags does not
+ * include FOLL_NOWAIT, the mmap_sem will be released via up_read() in
+ * this case.
+ *
+ * A caller using such a combination of @nonblocking and @gup_flags
+ * must therefore hold the mmap_sem for reading only, and recognize
+ * when it's been released.  Otherwise, it must be held for either
+ * reading or writing and will not be released.
  *
  * In most cases, get_user_pages or get_user_pages_fast should be used
  * instead of __get_user_pages. __get_user_pages should be used only if
@@ -528,7 +540,7 @@ EXPORT_SYMBOL(__get_user_pages);
  * such architectures, gup() will not be enough to make a subsequent access
  * succeed.
  *
- * This should be called with the mm_sem held for read.
+ * This has the same semantics wrt the @mm->mmap_sem as does filemap_fault().
  */
 int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
 		     unsigned long address, unsigned int fault_flags)
diff --git a/mm/memory.c b/mm/memory.c
index 7e131325bdf8..4d0a543f3bb3 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2399,7 +2399,10 @@ EXPORT_SYMBOL(unmap_mapping_range);
 /*
  * We enter with non-exclusive mmap_sem (to exclude vma changes,
  * but allow concurrent faults), and pte mapped but not yet locked.
- * We return with mmap_sem still held, but pte unmapped and unlocked.
+ * We return with pte unmapped and unlocked.
+ *
+ * We return with the mmap_sem locked or unlocked in the same cases
+ * as does filemap_fault().
  */
 static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		unsigned long address, pte_t *page_table, pmd_t *pmd,
@@ -2688,6 +2691,11 @@ oom:
 	return VM_FAULT_OOM;
 }
 
+/*
+ * The mmap_sem must have been held on entry, and may have been
+ * released depending on flags and vma->vm_ops->fault() return value.
+ * See filemap_fault() and __lock_page_retry().
+ */
 static int __do_fault(struct vm_area_struct *vma, unsigned long address,
 		pgoff_t pgoff, unsigned int flags, struct page **page)
 {
@@ -3016,6 +3024,12 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	return ret;
 }
 
+/*
+ * We enter with non-exclusive mmap_sem (to exclude vma changes,
+ * but allow concurrent faults).
+ * The mmap_sem may have been released depending on flags and our
+ * return value.  See filemap_fault() and __lock_page_or_retry().
+ */
 static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 		unsigned long address, pte_t *page_table, pmd_t *pmd,
 		unsigned int flags, pte_t orig_pte)
@@ -3040,7 +3054,9 @@ static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
  *
  * We enter with non-exclusive mmap_sem (to exclude vma changes,
  * but allow concurrent faults), and pte mapped but not yet locked.
- * We return with mmap_sem still held, but pte unmapped and unlocked.
+ * We return with pte unmapped and unlocked.
+ * The mmap_sem may have been released depending on flags and our
+ * return value.  See filemap_fault() and __lock_page_or_retry().
  */
 static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 		unsigned long address, pte_t *page_table, pmd_t *pmd,
@@ -3172,7 +3188,10 @@ out:
  *
  * We enter with non-exclusive mmap_sem (to exclude vma changes,
  * but allow concurrent faults), and pte mapped but not yet locked.
- * We return with mmap_sem still held, but pte unmapped and unlocked.
+ * We return with pte unmapped and unlocked.
+ *
+ * The mmap_sem may have been released depending on flags and our
+ * return value.  See filemap_fault() and __lock_page_or_retry().
  */
 static int handle_pte_fault(struct mm_struct *mm,
 		     struct vm_area_struct *vma, unsigned long address,
@@ -3232,6 +3251,9 @@ unlock:
 
 /*
  * By the time we get here, we already hold the mm semaphore
+ *
+ * The mmap_sem may have been released depending on flags and our
+ * return value.  See filemap_fault() and __lock_page_or_retry().
  */
 static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 			     unsigned long address, unsigned int flags)
@@ -3313,6 +3335,12 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	return handle_pte_fault(mm, vma, address, pte, pmd, flags);
 }
 
+/*
+ * By the time we get here, we already hold the mm semaphore
+ *
+ * The mmap_sem may have been released depending on flags and our
+ * return value.  See filemap_fault() and __lock_page_or_retry().
+ */
 int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 		    unsigned long address, unsigned int flags)
 {
diff --git a/mm/mlock.c b/mm/mlock.c
index b1eb53634005..ce84cb0b83ef 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -210,12 +210,19 @@ out:
  * @vma:   target vma
  * @start: start address
  * @end:   end address
+ * @nonblocking:
  *
  * This takes care of making the pages present too.
  *
  * return 0 on success, negative error code on error.
  *
- * vma->vm_mm->mmap_sem must be held for at least read.
+ * vma->vm_mm->mmap_sem must be held.
+ *
+ * If @nonblocking is NULL, it may be held for read or write and will
+ * be unperturbed.
+ *
+ * If @nonblocking is non-NULL, it must held for read only and may be
+ * released.  If it's released, *@nonblocking will be set to 0.
  */
 long __mlock_vma_pages_range(struct vm_area_struct *vma,
 		unsigned long start, unsigned long end, int *nonblocking)
-- 
cgit v1.2.3


From 6326440077a48d2c3b2993f3b3f2d969f09b6917 Mon Sep 17 00:00:00 2001
From: Wang Nan <wangnan0@huawei.com>
Date: Wed, 6 Aug 2014 16:07:36 -0700
Subject: memory-hotplug: add zone_for_memory() for selecting zone for new
 memory

This series of patches fixes a problem when adding memory in bad manner.
For example: for a x86_64 machine booted with "mem=400M" and with 2GiB
memory installed, following commands cause problem:

  # echo 0x40000000 > /sys/devices/system/memory/probe
 [   28.613895] init_memory_mapping: [mem 0x40000000-0x47ffffff]
  # echo 0x48000000 > /sys/devices/system/memory/probe
 [   28.693675] init_memory_mapping: [mem 0x48000000-0x4fffffff]
  # echo online_movable > /sys/devices/system/memory/memory9/state
  # echo 0x50000000 > /sys/devices/system/memory/probe
 [   29.084090] init_memory_mapping: [mem 0x50000000-0x57ffffff]
  # echo 0x58000000 > /sys/devices/system/memory/probe
 [   29.151880] init_memory_mapping: [mem 0x58000000-0x5fffffff]
  # echo online_movable > /sys/devices/system/memory/memory11/state
  # echo online> /sys/devices/system/memory/memory8/state
  # echo online> /sys/devices/system/memory/memory10/state
  # echo offline> /sys/devices/system/memory/memory9/state
 [   30.558819] Offlined Pages 32768
  # free
              total       used       free     shared    buffers     cached
 Mem:        780588 18014398509432020     830552          0          0      51180
 -/+ buffers/cache: 18014398509380840     881732
 Swap:            0          0          0

This is because the above commands probe higher memory after online a
section with online_movable, which causes ZONE_HIGHMEM (or ZONE_NORMAL
for systems without ZONE_HIGHMEM) overlaps ZONE_MOVABLE.

After the second online_movable, the problem can be observed from
zoneinfo:

  # cat /proc/zoneinfo
  ...
  Node 0, zone  Movable
    pages free     65491
          min      250
          low      312
          high     375
          scanned  0
          spanned  18446744073709518848
          present  65536
          managed  65536
  ...

This series of patches solve the problem by checking ZONE_MOVABLE when
choosing zone for new memory.  If new memory is inside or higher than
ZONE_MOVABLE, makes it go there instead.

After applying this series of patches, following are free and zoneinfo
result (after offlining memory9):

  bash-4.2# free
                total       used       free     shared    buffers     cached
   Mem:        780956      80112     700844          0          0      51180
   -/+ buffers/cache:      28932     752024
   Swap:            0          0          0

  bash-4.2# cat /proc/zoneinfo

  Node 0, zone      DMA
    pages free     3389
          min      14
          low      17
          high     21
          scanned  0
          spanned  4095
          present  3998
          managed  3977
      nr_free_pages 3389
  ...
    start_pfn:         1
    inactive_ratio:    1
  Node 0, zone    DMA32
    pages free     73724
          min      341
          low      426
          high     511
          scanned  0
          spanned  98304
          present  98304
          managed  92958
      nr_free_pages 73724
    ...
    start_pfn:         4096
    inactive_ratio:    1
  Node 0, zone   Normal
    pages free     32630
          min      120
          low      150
          high     180
          scanned  0
          spanned  32768
          present  32768
          managed  32768
      nr_free_pages 32630
  ...
    start_pfn:         262144
    inactive_ratio:    1
  Node 0, zone  Movable
    pages free     65476
          min      241
          low      301
          high     361
          scanned  0
          spanned  98304
          present  65536
          managed  65536
      nr_free_pages 65476
  ...
    start_pfn:         294912
    inactive_ratio:    1

This patch (of 7):

Introduce zone_for_memory() in arch independent code for
arch_add_memory() use.

Many arch_add_memory() function simply selects ZONE_HIGHMEM or
ZONE_NORMAL and add new memory into it.  However, with the existance of
ZONE_MOVABLE, the selection method should be carefully considered: if
new, higher memory is added after ZONE_MOVABLE is setup, the default
zone and ZONE_MOVABLE may overlap each other.

should_add_memory_movable() checks the status of ZONE_MOVABLE.  If it
has already contain memory, compare the address of new memory and
movable memory.  If new memory is higher than movable, it should be
added into ZONE_MOVABLE instead of default zone.

Signed-off-by: Wang Nan <wangnan0@huawei.com>
Cc: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: "Mel Gorman" <mgorman@suse.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memory_hotplug.h |  1 +
 mm/memory_hotplug.c            | 28 ++++++++++++++++++++++++++++
 2 files changed, 29 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 79dd9eca054f..d9524c49d767 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -259,6 +259,7 @@ static inline void remove_memory(int nid, u64 start, u64 size) {}
 extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn,
 		void *arg, int (*func)(struct memory_block *, void *));
 extern int add_memory(int nid, u64 start, u64 size);
+extern int zone_for_memory(int nid, u64 start, u64 size, int zone_default);
 extern int arch_add_memory(int nid, u64 start, u64 size);
 extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages);
 extern bool is_memblock_offlined(struct memory_block *mem);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index a3797d3fd8a4..2ff8c2325e96 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1159,6 +1159,34 @@ static int check_hotplug_memory_range(u64 start, u64 size)
 	return 0;
 }
 
+/*
+ * If movable zone has already been setup, newly added memory should be check.
+ * If its address is higher than movable zone, it should be added as movable.
+ * Without this check, movable zone may overlap with other zone.
+ */
+static int should_add_memory_movable(int nid, u64 start, u64 size)
+{
+	unsigned long start_pfn = start >> PAGE_SHIFT;
+	pg_data_t *pgdat = NODE_DATA(nid);
+	struct zone *movable_zone = pgdat->node_zones + ZONE_MOVABLE;
+
+	if (zone_is_empty(movable_zone))
+		return 0;
+
+	if (movable_zone->zone_start_pfn <= start_pfn)
+		return 1;
+
+	return 0;
+}
+
+int zone_for_memory(int nid, u64 start, u64 size, int zone_default)
+{
+	if (should_add_memory_movable(nid, start, size))
+		return ZONE_MOVABLE;
+
+	return zone_default;
+}
+
 /* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */
 int __ref add_memory(int nid, u64 start, u64 size)
 {
-- 
cgit v1.2.3


From 8d060bf490930f305c4efc45724e861a268f4d2f Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Wed, 6 Aug 2014 16:07:50 -0700
Subject: mm, oom: ensure memoryless node zonelist always includes zones

With memoryless node support being worked on, it's possible that for
optimizations that a node may not have a non-NULL zonelist.  When
CONFIG_NUMA is enabled and node 0 is memoryless, this means the zonelist
for first_online_node may become NULL.

The oom killer requires a zonelist that includes all memory zones for
the sysrq trigger and pagefault out of memory handler.

Ensure that a non-NULL zonelist is always passed to the oom killer.

[akpm@linux-foundation.org: fix non-numa build]
Signed-off-by: David Rientjes <rientjes@google.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/tty/sysrq.c      |  2 +-
 include/linux/nodemask.h | 11 ++++++++++-
 mm/oom_kill.c            |  2 +-
 3 files changed, 12 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index 454b65898e2c..42bad18c66c9 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -355,7 +355,7 @@ static struct sysrq_key_op sysrq_term_op = {
 
 static void moom_callback(struct work_struct *ignored)
 {
-	out_of_memory(node_zonelist(first_online_node, GFP_KERNEL), GFP_KERNEL,
+	out_of_memory(node_zonelist(first_memory_node, GFP_KERNEL), GFP_KERNEL,
 		      0, NULL, true);
 }
 
diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
index 58b9a02c38d2..83a6aeda899d 100644
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@ -430,7 +430,15 @@ static inline int num_node_state(enum node_states state)
 	for_each_node_mask((__node), node_states[__state])
 
 #define first_online_node	first_node(node_states[N_ONLINE])
-#define next_online_node(nid)	next_node((nid), node_states[N_ONLINE])
+#define first_memory_node	first_node(node_states[N_MEMORY])
+static inline int next_online_node(int nid)
+{
+	return next_node(nid, node_states[N_ONLINE]);
+}
+static inline int next_memory_node(int nid)
+{
+	return next_node(nid, node_states[N_MEMORY]);
+}
 
 extern int nr_node_ids;
 extern int nr_online_nodes;
@@ -471,6 +479,7 @@ static inline int num_node_state(enum node_states state)
 	for ( (node) = 0; (node) == 0; (node) = 1)
 
 #define first_online_node	0
+#define first_memory_node	0
 #define next_online_node(nid)	(MAX_NUMNODES)
 #define nr_node_ids		1
 #define nr_online_nodes		1
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 3291e82d4352..b0a1e1ff0353 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -694,7 +694,7 @@ void pagefault_out_of_memory(void)
 	if (mem_cgroup_oom_synchronize(true))
 		return;
 
-	zonelist = node_zonelist(first_online_node, GFP_KERNEL);
+	zonelist = node_zonelist(first_memory_node, GFP_KERNEL);
 	if (try_set_zonelist_oom(zonelist, GFP_KERNEL)) {
 		out_of_memory(NULL, 0, 0, NULL, false);
 		clear_zonelist_oom(zonelist, GFP_KERNEL);
-- 
cgit v1.2.3


From e972a070e2d3296cd2e2cc2fd0561ce89a1d5ebf Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Wed, 6 Aug 2014 16:07:52 -0700
Subject: mm, oom: rename zonelist locking functions

try_set_zonelist_oom() and clear_zonelist_oom() are not named properly
to imply that they require locking semantics to avoid out_of_memory()
being reordered.

zone_scan_lock is required for both functions to ensure that there is
proper locking synchronization.

Rename try_set_zonelist_oom() to oom_zonelist_trylock() and rename
clear_zonelist_oom() to oom_zonelist_unlock() to imply there is proper
locking semantics.

At the same time, convert oom_zonelist_trylock() to return bool instead
of int since only success and failure are tested.

Signed-off-by: David Rientjes <rientjes@google.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/oom.h |  4 ++--
 mm/oom_kill.c       | 30 +++++++++++++-----------------
 mm/page_alloc.c     |  6 +++---
 3 files changed, 18 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/oom.h b/include/linux/oom.h
index 4cd62677feb9..647395a1a550 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -55,8 +55,8 @@ extern void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
 			     struct mem_cgroup *memcg, nodemask_t *nodemask,
 			     const char *message);
 
-extern int try_set_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags);
-extern void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags);
+extern bool oom_zonelist_trylock(struct zonelist *zonelist, gfp_t gfp_flags);
+extern void oom_zonelist_unlock(struct zonelist *zonelist, gfp_t gfp_flags);
 
 extern void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask,
 			       int order, const nodemask_t *nodemask);
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index b0a1e1ff0353..d33aca1552ad 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -559,28 +559,25 @@ EXPORT_SYMBOL_GPL(unregister_oom_notifier);
  * if a parallel OOM killing is already taking place that includes a zone in
  * the zonelist.  Otherwise, locks all zones in the zonelist and returns 1.
  */
-int try_set_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_mask)
+bool oom_zonelist_trylock(struct zonelist *zonelist, gfp_t gfp_mask)
 {
 	struct zoneref *z;
 	struct zone *zone;
-	int ret = 1;
+	bool ret = true;
 
 	spin_lock(&zone_scan_lock);
-	for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) {
+	for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask))
 		if (zone_is_oom_locked(zone)) {
-			ret = 0;
+			ret = false;
 			goto out;
 		}
-	}
 
-	for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) {
-		/*
-		 * Lock each zone in the zonelist under zone_scan_lock so a
-		 * parallel invocation of try_set_zonelist_oom() doesn't succeed
-		 * when it shouldn't.
-		 */
+	/*
+	 * Lock each zone in the zonelist under zone_scan_lock so a parallel
+	 * call to oom_zonelist_trylock() doesn't succeed when it shouldn't.
+	 */
+	for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask))
 		zone_set_flag(zone, ZONE_OOM_LOCKED);
-	}
 
 out:
 	spin_unlock(&zone_scan_lock);
@@ -592,15 +589,14 @@ out:
  * allocation attempts with zonelists containing them may now recall the OOM
  * killer, if necessary.
  */
-void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_mask)
+void oom_zonelist_unlock(struct zonelist *zonelist, gfp_t gfp_mask)
 {
 	struct zoneref *z;
 	struct zone *zone;
 
 	spin_lock(&zone_scan_lock);
-	for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) {
+	for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask))
 		zone_clear_flag(zone, ZONE_OOM_LOCKED);
-	}
 	spin_unlock(&zone_scan_lock);
 }
 
@@ -695,8 +691,8 @@ void pagefault_out_of_memory(void)
 		return;
 
 	zonelist = node_zonelist(first_memory_node, GFP_KERNEL);
-	if (try_set_zonelist_oom(zonelist, GFP_KERNEL)) {
+	if (oom_zonelist_trylock(zonelist, GFP_KERNEL)) {
 		out_of_memory(NULL, 0, 0, NULL, false);
-		clear_zonelist_oom(zonelist, GFP_KERNEL);
+		oom_zonelist_unlock(zonelist, GFP_KERNEL);
 	}
 }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index fb9908148474..578236089ec1 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2246,8 +2246,8 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
 {
 	struct page *page;
 
-	/* Acquire the OOM killer lock for the zones in zonelist */
-	if (!try_set_zonelist_oom(zonelist, gfp_mask)) {
+	/* Acquire the per-zone oom lock for each zone */
+	if (!oom_zonelist_trylock(zonelist, gfp_mask)) {
 		schedule_timeout_uninterruptible(1);
 		return NULL;
 	}
@@ -2285,7 +2285,7 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
 	out_of_memory(zonelist, gfp_mask, order, nodemask, false);
 
 out:
-	clear_zonelist_oom(zonelist, gfp_mask);
+	oom_zonelist_unlock(zonelist, gfp_mask);
 	return page;
 }
 
-- 
cgit v1.2.3


From 1d352bfd41e8219cdf9bebe79677700bdc38b540 Mon Sep 17 00:00:00 2001
From: Chintan Pandya <cpandya@codeaurora.org>
Date: Wed, 6 Aug 2014 16:08:18 -0700
Subject: mm: BUG when __kmap_atomic_idx equals KM_TYPE_NR

__kmap_atomic_idx is per_cpu variable.  Each CPU can use KM_TYPE_NR
entries from FIXMAP i.e.  from 0 to KM_TYPE_NR - 1.  Allowing
__kmap_atomic_idx to over- shoot to KM_TYPE_NR can mess up with next
CPU's 0th entry which is a bug.  Hence BUG_ON if __kmap_atomic_idx >=
KM_TYPE_NR.

Fix the off-by-on in this test.

Signed-off-by: Chintan Pandya <cpandya@codeaurora.org>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/highmem.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 7fb31da45d03..9286a46b7d69 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -93,7 +93,7 @@ static inline int kmap_atomic_idx_push(void)
 
 #ifdef CONFIG_DEBUG_HIGHMEM
 	WARN_ON_ONCE(in_irq() && !irqs_disabled());
-	BUG_ON(idx > KM_TYPE_NR);
+	BUG_ON(idx >= KM_TYPE_NR);
 #endif
 	return idx;
 }
-- 
cgit v1.2.3


From b972216e27d1c853eced33f8638926636c606341 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 6 Aug 2014 16:08:20 -0700
Subject: mmu_notifier: add call_srcu and sync function for listener to delay
 call and sync
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When kernel device drivers or subsystems want to bind their lifespan to
t= he lifespan of the mm_struct, they usually use one of the following
methods:

1. Manually calling a function in the interested kernel module.  The
   funct= ion call needs to be placed in mmput.  This method was rejected
   by several ker= nel maintainers.

2. Registering to the mmu notifier release mechanism.

The problem with the latter approach is that the mmu_notifier_release
cal= lback is called from__mmu_notifier_release (called from exit_mmap).
That functi= on iterates over the list of mmu notifiers and don't expect
the release call= back function to remove itself from the list.
Therefore, the callback function= in the kernel module can't release the
mmu_notifier_object, which is actuall= y the kernel module's object
itself.  As a result, the destruction of the kernel module's object must
to be done in a delayed fashion.

This patch adds support for this delayed callback, by adding a new
mmu_notifier_call_srcu function that receives a function ptr and calls
th= at function with call_srcu.  In that function, the kernel module
releases its object.  To use mmu_notifier_call_srcu, the calling module
needs to call b= efore that a new function called
mmu_notifier_unregister_no_release that as its= name implies,
unregisters a notifier without calling its notifier release call= back.

This patch also adds a function that will call barrier_srcu so those
kern= el modules can sync with mmu_notifier.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Jérôme Glisse <jglisse@redhat.com>
Signed-off-by: Oded Gabbay <oded.gabbay@amd.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmu_notifier.h |  6 ++++++
 mm/mmu_notifier.c            | 40 +++++++++++++++++++++++++++++++++++++++-
 2 files changed, 45 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index deca87452528..27288692241e 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -170,6 +170,8 @@ extern int __mmu_notifier_register(struct mmu_notifier *mn,
 				   struct mm_struct *mm);
 extern void mmu_notifier_unregister(struct mmu_notifier *mn,
 				    struct mm_struct *mm);
+extern void mmu_notifier_unregister_no_release(struct mmu_notifier *mn,
+					       struct mm_struct *mm);
 extern void __mmu_notifier_mm_destroy(struct mm_struct *mm);
 extern void __mmu_notifier_release(struct mm_struct *mm);
 extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
@@ -288,6 +290,10 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
 	set_pte_at(___mm, ___address, __ptep, ___pte);			\
 })
 
+extern void mmu_notifier_call_srcu(struct rcu_head *rcu,
+				   void (*func)(struct rcu_head *rcu));
+extern void mmu_notifier_synchronize(void);
+
 #else /* CONFIG_MMU_NOTIFIER */
 
 static inline void mmu_notifier_release(struct mm_struct *mm)
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index 41cefdf0aadd..950813b1eb36 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -22,6 +22,25 @@
 /* global SRCU for all MMs */
 static struct srcu_struct srcu;
 
+/*
+ * This function allows mmu_notifier::release callback to delay a call to
+ * a function that will free appropriate resources. The function must be
+ * quick and must not block.
+ */
+void mmu_notifier_call_srcu(struct rcu_head *rcu,
+			    void (*func)(struct rcu_head *rcu))
+{
+	call_srcu(&srcu, rcu, func);
+}
+EXPORT_SYMBOL_GPL(mmu_notifier_call_srcu);
+
+void mmu_notifier_synchronize(void)
+{
+	/* Wait for any running method to finish. */
+	srcu_barrier(&srcu);
+}
+EXPORT_SYMBOL_GPL(mmu_notifier_synchronize);
+
 /*
  * This function can't run concurrently against mmu_notifier_register
  * because mm->mm_users > 0 during mmu_notifier_register and exit_mmap
@@ -53,7 +72,6 @@ void __mmu_notifier_release(struct mm_struct *mm)
 		 */
 		if (mn->ops->release)
 			mn->ops->release(mn, mm);
-	srcu_read_unlock(&srcu, id);
 
 	spin_lock(&mm->mmu_notifier_mm->lock);
 	while (unlikely(!hlist_empty(&mm->mmu_notifier_mm->list))) {
@@ -69,6 +87,7 @@ void __mmu_notifier_release(struct mm_struct *mm)
 		hlist_del_init_rcu(&mn->hlist);
 	}
 	spin_unlock(&mm->mmu_notifier_mm->lock);
+	srcu_read_unlock(&srcu, id);
 
 	/*
 	 * synchronize_srcu here prevents mmu_notifier_release from returning to
@@ -325,6 +344,25 @@ void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm)
 }
 EXPORT_SYMBOL_GPL(mmu_notifier_unregister);
 
+/*
+ * Same as mmu_notifier_unregister but no callback and no srcu synchronization.
+ */
+void mmu_notifier_unregister_no_release(struct mmu_notifier *mn,
+					struct mm_struct *mm)
+{
+	spin_lock(&mm->mmu_notifier_mm->lock);
+	/*
+	 * Can not use list_del_rcu() since __mmu_notifier_release
+	 * can delete it before we hold the lock.
+	 */
+	hlist_del_init_rcu(&mn->hlist);
+	spin_unlock(&mm->mmu_notifier_mm->lock);
+
+	BUG_ON(atomic_read(&mm->mm_count) <= 0);
+	mmdrop(mm);
+}
+EXPORT_SYMBOL_GPL(mmu_notifier_unregister_no_release);
+
 static int __init mmu_notifier_init(void)
 {
 	return init_srcu_struct(&srcu);
-- 
cgit v1.2.3


From 99eef8e9369abe009006b4fa7f6ca5086c09cf46 Mon Sep 17 00:00:00 2001
From: Dan Streetman <ddstreet@ieee.org>
Date: Wed, 6 Aug 2014 16:08:33 -0700
Subject: mm/zbud: change zbud_alloc size type to size_t

Change the type of the zbud_alloc() size param from unsigned int to
size_t.

Technically, this should not make any difference, as the zbud
implementation already restricts the size to well within either type's
limits; but as zsmalloc (and kmalloc) use size_t, and zpool will use
size_t, this brings the size parameter type in line with zsmalloc/zpool.

Signed-off-by: Dan Streetman <ddstreet@ieee.org>
Acked-by: Seth Jennings <sjennings@variantweb.net>
Tested-by: Seth Jennings <sjennings@variantweb.net>
Cc: Weijie Yang <weijie.yang@samsung.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Nitin Gupta <ngupta@vflare.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/zbud.h | 2 +-
 mm/zbud.c            | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/zbud.h b/include/linux/zbud.h
index 13af0d450bf6..f9d41a6e361f 100644
--- a/include/linux/zbud.h
+++ b/include/linux/zbud.h
@@ -11,7 +11,7 @@ struct zbud_ops {
 
 struct zbud_pool *zbud_create_pool(gfp_t gfp, struct zbud_ops *ops);
 void zbud_destroy_pool(struct zbud_pool *pool);
-int zbud_alloc(struct zbud_pool *pool, unsigned int size, gfp_t gfp,
+int zbud_alloc(struct zbud_pool *pool, size_t size, gfp_t gfp,
 	unsigned long *handle);
 void zbud_free(struct zbud_pool *pool, unsigned long handle);
 int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries);
diff --git a/mm/zbud.c b/mm/zbud.c
index 01df13a7e2e1..d01226117b8d 100644
--- a/mm/zbud.c
+++ b/mm/zbud.c
@@ -122,7 +122,7 @@ enum buddy {
 };
 
 /* Converts an allocation size in bytes to size in zbud chunks */
-static int size_to_chunks(int size)
+static int size_to_chunks(size_t size)
 {
 	return (size + CHUNK_SIZE - 1) >> CHUNK_SHIFT;
 }
@@ -247,7 +247,7 @@ void zbud_destroy_pool(struct zbud_pool *pool)
  * gfp arguments are invalid or -ENOMEM if the pool was unable to allocate
  * a new page.
  */
-int zbud_alloc(struct zbud_pool *pool, unsigned int size, gfp_t gfp,
+int zbud_alloc(struct zbud_pool *pool, size_t size, gfp_t gfp,
 			unsigned long *handle)
 {
 	int chunks, i, freechunks;
-- 
cgit v1.2.3


From af8d417a04564bca0348e7e3c749ab12a3e837ad Mon Sep 17 00:00:00 2001
From: Dan Streetman <ddstreet@ieee.org>
Date: Wed, 6 Aug 2014 16:08:36 -0700
Subject: mm/zpool: implement common zpool api to zbud/zsmalloc

Add zpool api.

zpool provides an interface for memory storage, typically of compressed
memory.  Users can select what backend to use; currently the only
implementations are zbud, a low density implementation with up to two
compressed pages per storage page, and zsmalloc, a higher density
implementation with multiple compressed pages per storage page.

Signed-off-by: Dan Streetman <ddstreet@ieee.org>
Tested-by: Seth Jennings <sjennings@variantweb.net>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Nitin Gupta <ngupta@vflare.org>
Cc: Weijie Yang <weijie.yang@samsung.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/zpool.h | 106 +++++++++++++++
 mm/Kconfig            |  41 +++---
 mm/Makefile           |   1 +
 mm/zpool.c            | 364 ++++++++++++++++++++++++++++++++++++++++++++++++++
 mm/zsmalloc.c         |   1 -
 5 files changed, 495 insertions(+), 18 deletions(-)
 create mode 100644 include/linux/zpool.h
 create mode 100644 mm/zpool.c

(limited to 'include/linux')

diff --git a/include/linux/zpool.h b/include/linux/zpool.h
new file mode 100644
index 000000000000..f14bd75f08b3
--- /dev/null
+++ b/include/linux/zpool.h
@@ -0,0 +1,106 @@
+/*
+ * zpool memory storage api
+ *
+ * Copyright (C) 2014 Dan Streetman
+ *
+ * This is a common frontend for the zbud and zsmalloc memory
+ * storage pool implementations.  Typically, this is used to
+ * store compressed memory.
+ */
+
+#ifndef _ZPOOL_H_
+#define _ZPOOL_H_
+
+struct zpool;
+
+struct zpool_ops {
+	int (*evict)(struct zpool *pool, unsigned long handle);
+};
+
+/*
+ * Control how a handle is mapped.  It will be ignored if the
+ * implementation does not support it.  Its use is optional.
+ * Note that this does not refer to memory protection, it
+ * refers to how the memory will be copied in/out if copying
+ * is necessary during mapping; read-write is the safest as
+ * it copies the existing memory in on map, and copies the
+ * changed memory back out on unmap.  Write-only does not copy
+ * in the memory and should only be used for initialization.
+ * If in doubt, use ZPOOL_MM_DEFAULT which is read-write.
+ */
+enum zpool_mapmode {
+	ZPOOL_MM_RW, /* normal read-write mapping */
+	ZPOOL_MM_RO, /* read-only (no copy-out at unmap time) */
+	ZPOOL_MM_WO, /* write-only (no copy-in at map time) */
+
+	ZPOOL_MM_DEFAULT = ZPOOL_MM_RW
+};
+
+struct zpool *zpool_create_pool(char *type, gfp_t gfp, struct zpool_ops *ops);
+
+char *zpool_get_type(struct zpool *pool);
+
+void zpool_destroy_pool(struct zpool *pool);
+
+int zpool_malloc(struct zpool *pool, size_t size, gfp_t gfp,
+			unsigned long *handle);
+
+void zpool_free(struct zpool *pool, unsigned long handle);
+
+int zpool_shrink(struct zpool *pool, unsigned int pages,
+			unsigned int *reclaimed);
+
+void *zpool_map_handle(struct zpool *pool, unsigned long handle,
+			enum zpool_mapmode mm);
+
+void zpool_unmap_handle(struct zpool *pool, unsigned long handle);
+
+u64 zpool_get_total_size(struct zpool *pool);
+
+
+/**
+ * struct zpool_driver - driver implementation for zpool
+ * @type:	name of the driver.
+ * @list:	entry in the list of zpool drivers.
+ * @create:	create a new pool.
+ * @destroy:	destroy a pool.
+ * @malloc:	allocate mem from a pool.
+ * @free:	free mem from a pool.
+ * @shrink:	shrink the pool.
+ * @map:	map a handle.
+ * @unmap:	unmap a handle.
+ * @total_size:	get total size of a pool.
+ *
+ * This is created by a zpool implementation and registered
+ * with zpool.
+ */
+struct zpool_driver {
+	char *type;
+	struct module *owner;
+	atomic_t refcount;
+	struct list_head list;
+
+	void *(*create)(gfp_t gfp, struct zpool_ops *ops);
+	void (*destroy)(void *pool);
+
+	int (*malloc)(void *pool, size_t size, gfp_t gfp,
+				unsigned long *handle);
+	void (*free)(void *pool, unsigned long handle);
+
+	int (*shrink)(void *pool, unsigned int pages,
+				unsigned int *reclaimed);
+
+	void *(*map)(void *pool, unsigned long handle,
+				enum zpool_mapmode mm);
+	void (*unmap)(void *pool, unsigned long handle);
+
+	u64 (*total_size)(void *pool);
+};
+
+void zpool_register_driver(struct zpool_driver *driver);
+
+int zpool_unregister_driver(struct zpool_driver *driver);
+
+int zpool_evict(void *pool, unsigned long handle);
+
+#endif
diff --git a/mm/Kconfig b/mm/Kconfig
index f4899ec39cf4..12179b8c3b89 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -519,15 +519,17 @@ config CMA_AREAS
 
 	  If unsure, leave the default value "7".
 
-config ZBUD
-	tristate
-	default n
+config MEM_SOFT_DIRTY
+	bool "Track memory changes"
+	depends on CHECKPOINT_RESTORE && HAVE_ARCH_SOFT_DIRTY && PROC_FS
+	select PROC_PAGE_MONITOR
 	help
-	  A special purpose allocator for storing compressed pages.
-	  It is designed to store up to two compressed pages per physical
-	  page.  While this design limits storage density, it has simple and
-	  deterministic reclaim properties that make it preferable to a higher
-	  density approach when reclaim will be used.
+	  This option enables memory changes tracking by introducing a
+	  soft-dirty bit on pte-s. This bit it set when someone writes
+	  into a page just as regular dirty bit, but unlike the latter
+	  it can be cleared by hands.
+
+	  See Documentation/vm/soft-dirty.txt for more details.
 
 config ZSWAP
 	bool "Compressed cache for swap pages (EXPERIMENTAL)"
@@ -549,17 +551,22 @@ config ZSWAP
 	  they have not be fully explored on the large set of potential
 	  configurations and workloads that exist.
 
-config MEM_SOFT_DIRTY
-	bool "Track memory changes"
-	depends on CHECKPOINT_RESTORE && HAVE_ARCH_SOFT_DIRTY && PROC_FS
-	select PROC_PAGE_MONITOR
+config ZPOOL
+	tristate "Common API for compressed memory storage"
+	default n
 	help
-	  This option enables memory changes tracking by introducing a
-	  soft-dirty bit on pte-s. This bit it set when someone writes
-	  into a page just as regular dirty bit, but unlike the latter
-	  it can be cleared by hands.
+	  Compressed memory storage API.  This allows using either zbud or
+	  zsmalloc.
 
-	  See Documentation/vm/soft-dirty.txt for more details.
+config ZBUD
+	tristate "Low density storage for compressed pages"
+	default n
+	help
+	  A special purpose allocator for storing compressed pages.
+	  It is designed to store up to two compressed pages per physical
+	  page.  While this design limits storage density, it has simple and
+	  deterministic reclaim properties that make it preferable to a higher
+	  density approach when reclaim will be used.
 
 config ZSMALLOC
 	tristate "Memory allocator for compressed pages"
diff --git a/mm/Makefile b/mm/Makefile
index 8338473c329a..632ae77e6070 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -59,6 +59,7 @@ obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o
 obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o
 obj-$(CONFIG_CLEANCACHE) += cleancache.o
 obj-$(CONFIG_MEMORY_ISOLATION) += page_isolation.o
+obj-$(CONFIG_ZPOOL)	+= zpool.o
 obj-$(CONFIG_ZBUD)	+= zbud.o
 obj-$(CONFIG_ZSMALLOC)	+= zsmalloc.o
 obj-$(CONFIG_GENERIC_EARLY_IOREMAP) += early_ioremap.o
diff --git a/mm/zpool.c b/mm/zpool.c
new file mode 100644
index 000000000000..e40612a1df00
--- /dev/null
+++ b/mm/zpool.c
@@ -0,0 +1,364 @@
+/*
+ * zpool memory storage api
+ *
+ * Copyright (C) 2014 Dan Streetman
+ *
+ * This is a common frontend for memory storage pool implementations.
+ * Typically, this is used to store compressed memory.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/list.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <linux/zpool.h>
+
+struct zpool {
+	char *type;
+
+	struct zpool_driver *driver;
+	void *pool;
+	struct zpool_ops *ops;
+
+	struct list_head list;
+};
+
+static LIST_HEAD(drivers_head);
+static DEFINE_SPINLOCK(drivers_lock);
+
+static LIST_HEAD(pools_head);
+static DEFINE_SPINLOCK(pools_lock);
+
+/**
+ * zpool_register_driver() - register a zpool implementation.
+ * @driver:	driver to register
+ */
+void zpool_register_driver(struct zpool_driver *driver)
+{
+	spin_lock(&drivers_lock);
+	atomic_set(&driver->refcount, 0);
+	list_add(&driver->list, &drivers_head);
+	spin_unlock(&drivers_lock);
+}
+EXPORT_SYMBOL(zpool_register_driver);
+
+/**
+ * zpool_unregister_driver() - unregister a zpool implementation.
+ * @driver:	driver to unregister.
+ *
+ * Module usage counting is used to prevent using a driver
+ * while/after unloading, so if this is called from module
+ * exit function, this should never fail; if called from
+ * other than the module exit function, and this returns
+ * failure, the driver is in use and must remain available.
+ */
+int zpool_unregister_driver(struct zpool_driver *driver)
+{
+	int ret = 0, refcount;
+
+	spin_lock(&drivers_lock);
+	refcount = atomic_read(&driver->refcount);
+	WARN_ON(refcount < 0);
+	if (refcount > 0)
+		ret = -EBUSY;
+	else
+		list_del(&driver->list);
+	spin_unlock(&drivers_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL(zpool_unregister_driver);
+
+/**
+ * zpool_evict() - evict callback from a zpool implementation.
+ * @pool:	pool to evict from.
+ * @handle:	handle to evict.
+ *
+ * This can be used by zpool implementations to call the
+ * user's evict zpool_ops struct evict callback.
+ */
+int zpool_evict(void *pool, unsigned long handle)
+{
+	struct zpool *zpool;
+
+	spin_lock(&pools_lock);
+	list_for_each_entry(zpool, &pools_head, list) {
+		if (zpool->pool == pool) {
+			spin_unlock(&pools_lock);
+			if (!zpool->ops || !zpool->ops->evict)
+				return -EINVAL;
+			return zpool->ops->evict(zpool, handle);
+		}
+	}
+	spin_unlock(&pools_lock);
+
+	return -ENOENT;
+}
+EXPORT_SYMBOL(zpool_evict);
+
+static struct zpool_driver *zpool_get_driver(char *type)
+{
+	struct zpool_driver *driver;
+
+	spin_lock(&drivers_lock);
+	list_for_each_entry(driver, &drivers_head, list) {
+		if (!strcmp(driver->type, type)) {
+			bool got = try_module_get(driver->owner);
+
+			if (got)
+				atomic_inc(&driver->refcount);
+			spin_unlock(&drivers_lock);
+			return got ? driver : NULL;
+		}
+	}
+
+	spin_unlock(&drivers_lock);
+	return NULL;
+}
+
+static void zpool_put_driver(struct zpool_driver *driver)
+{
+	atomic_dec(&driver->refcount);
+	module_put(driver->owner);
+}
+
+/**
+ * zpool_create_pool() - Create a new zpool
+ * @type	The type of the zpool to create (e.g. zbud, zsmalloc)
+ * @gfp		The GFP flags to use when allocating the pool.
+ * @ops		The optional ops callback.
+ *
+ * This creates a new zpool of the specified type.  The gfp flags will be
+ * used when allocating memory, if the implementation supports it.  If the
+ * ops param is NULL, then the created zpool will not be shrinkable.
+ *
+ * Implementations must guarantee this to be thread-safe.
+ *
+ * Returns: New zpool on success, NULL on failure.
+ */
+struct zpool *zpool_create_pool(char *type, gfp_t gfp, struct zpool_ops *ops)
+{
+	struct zpool_driver *driver;
+	struct zpool *zpool;
+
+	pr_info("creating pool type %s\n", type);
+
+	driver = zpool_get_driver(type);
+
+	if (!driver) {
+		request_module(type);
+		driver = zpool_get_driver(type);
+	}
+
+	if (!driver) {
+		pr_err("no driver for type %s\n", type);
+		return NULL;
+	}
+
+	zpool = kmalloc(sizeof(*zpool), gfp);
+	if (!zpool) {
+		pr_err("couldn't create zpool - out of memory\n");
+		zpool_put_driver(driver);
+		return NULL;
+	}
+
+	zpool->type = driver->type;
+	zpool->driver = driver;
+	zpool->pool = driver->create(gfp, ops);
+	zpool->ops = ops;
+
+	if (!zpool->pool) {
+		pr_err("couldn't create %s pool\n", type);
+		zpool_put_driver(driver);
+		kfree(zpool);
+		return NULL;
+	}
+
+	pr_info("created %s pool\n", type);
+
+	spin_lock(&pools_lock);
+	list_add(&zpool->list, &pools_head);
+	spin_unlock(&pools_lock);
+
+	return zpool;
+}
+
+/**
+ * zpool_destroy_pool() - Destroy a zpool
+ * @pool	The zpool to destroy.
+ *
+ * Implementations must guarantee this to be thread-safe,
+ * however only when destroying different pools.  The same
+ * pool should only be destroyed once, and should not be used
+ * after it is destroyed.
+ *
+ * This destroys an existing zpool.  The zpool should not be in use.
+ */
+void zpool_destroy_pool(struct zpool *zpool)
+{
+	pr_info("destroying pool type %s\n", zpool->type);
+
+	spin_lock(&pools_lock);
+	list_del(&zpool->list);
+	spin_unlock(&pools_lock);
+	zpool->driver->destroy(zpool->pool);
+	zpool_put_driver(zpool->driver);
+	kfree(zpool);
+}
+
+/**
+ * zpool_get_type() - Get the type of the zpool
+ * @pool	The zpool to check
+ *
+ * This returns the type of the pool.
+ *
+ * Implementations must guarantee this to be thread-safe.
+ *
+ * Returns: The type of zpool.
+ */
+char *zpool_get_type(struct zpool *zpool)
+{
+	return zpool->type;
+}
+
+/**
+ * zpool_malloc() - Allocate memory
+ * @pool	The zpool to allocate from.
+ * @size	The amount of memory to allocate.
+ * @gfp		The GFP flags to use when allocating memory.
+ * @handle	Pointer to the handle to set
+ *
+ * This allocates the requested amount of memory from the pool.
+ * The gfp flags will be used when allocating memory, if the
+ * implementation supports it.  The provided @handle will be
+ * set to the allocated object handle.
+ *
+ * Implementations must guarantee this to be thread-safe.
+ *
+ * Returns: 0 on success, negative value on error.
+ */
+int zpool_malloc(struct zpool *zpool, size_t size, gfp_t gfp,
+			unsigned long *handle)
+{
+	return zpool->driver->malloc(zpool->pool, size, gfp, handle);
+}
+
+/**
+ * zpool_free() - Free previously allocated memory
+ * @pool	The zpool that allocated the memory.
+ * @handle	The handle to the memory to free.
+ *
+ * This frees previously allocated memory.  This does not guarantee
+ * that the pool will actually free memory, only that the memory
+ * in the pool will become available for use by the pool.
+ *
+ * Implementations must guarantee this to be thread-safe,
+ * however only when freeing different handles.  The same
+ * handle should only be freed once, and should not be used
+ * after freeing.
+ */
+void zpool_free(struct zpool *zpool, unsigned long handle)
+{
+	zpool->driver->free(zpool->pool, handle);
+}
+
+/**
+ * zpool_shrink() - Shrink the pool size
+ * @pool	The zpool to shrink.
+ * @pages	The number of pages to shrink the pool.
+ * @reclaimed	The number of pages successfully evicted.
+ *
+ * This attempts to shrink the actual memory size of the pool
+ * by evicting currently used handle(s).  If the pool was
+ * created with no zpool_ops, or the evict call fails for any
+ * of the handles, this will fail.  If non-NULL, the @reclaimed
+ * parameter will be set to the number of pages reclaimed,
+ * which may be more than the number of pages requested.
+ *
+ * Implementations must guarantee this to be thread-safe.
+ *
+ * Returns: 0 on success, negative value on error/failure.
+ */
+int zpool_shrink(struct zpool *zpool, unsigned int pages,
+			unsigned int *reclaimed)
+{
+	return zpool->driver->shrink(zpool->pool, pages, reclaimed);
+}
+
+/**
+ * zpool_map_handle() - Map a previously allocated handle into memory
+ * @pool	The zpool that the handle was allocated from
+ * @handle	The handle to map
+ * @mm		How the memory should be mapped
+ *
+ * This maps a previously allocated handle into memory.  The @mm
+ * param indicates to the implementation how the memory will be
+ * used, i.e. read-only, write-only, read-write.  If the
+ * implementation does not support it, the memory will be treated
+ * as read-write.
+ *
+ * This may hold locks, disable interrupts, and/or preemption,
+ * and the zpool_unmap_handle() must be called to undo those
+ * actions.  The code that uses the mapped handle should complete
+ * its operatons on the mapped handle memory quickly and unmap
+ * as soon as possible.  As the implementation may use per-cpu
+ * data, multiple handles should not be mapped concurrently on
+ * any cpu.
+ *
+ * Returns: A pointer to the handle's mapped memory area.
+ */
+void *zpool_map_handle(struct zpool *zpool, unsigned long handle,
+			enum zpool_mapmode mapmode)
+{
+	return zpool->driver->map(zpool->pool, handle, mapmode);
+}
+
+/**
+ * zpool_unmap_handle() - Unmap a previously mapped handle
+ * @pool	The zpool that the handle was allocated from
+ * @handle	The handle to unmap
+ *
+ * This unmaps a previously mapped handle.  Any locks or other
+ * actions that the implementation took in zpool_map_handle()
+ * will be undone here.  The memory area returned from
+ * zpool_map_handle() should no longer be used after this.
+ */
+void zpool_unmap_handle(struct zpool *zpool, unsigned long handle)
+{
+	zpool->driver->unmap(zpool->pool, handle);
+}
+
+/**
+ * zpool_get_total_size() - The total size of the pool
+ * @pool	The zpool to check
+ *
+ * This returns the total size in bytes of the pool.
+ *
+ * Returns: Total size of the zpool in bytes.
+ */
+u64 zpool_get_total_size(struct zpool *zpool)
+{
+	return zpool->driver->total_size(zpool->pool);
+}
+
+static int __init init_zpool(void)
+{
+	pr_info("loaded\n");
+	return 0;
+}
+
+static void __exit exit_zpool(void)
+{
+	pr_info("unloaded\n");
+}
+
+module_init(init_zpool);
+module_exit(exit_zpool);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");
+MODULE_DESCRIPTION("Common API for compressed memory storage");
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index bb62a4adc328..6a1827d3d231 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -240,7 +240,6 @@ struct mapping_area {
 	enum zs_mapmode vm_mm; /* mapping mode */
 };
 
-
 /* per-cpu VM mapping areas for zspage accesses that cross page boundaries */
 static DEFINE_PER_CPU(struct mapping_area, zs_map_area);
 
-- 
cgit v1.2.3


From 68be302963230fa76600cd598935a830ac95dca2 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Wed, 6 Aug 2014 16:08:45 -0700
Subject: fs.h, drivers/hwmon/asus_atk0110.c: fix DEFINE_SIMPLE_ATTRIBUTE
 semicolon definition and use

The DEFINE_SIMPLE_ATTRIBUTE macro should not end in a ; Fix the one use
in the kernel tree that did not have a semicolon.

Signed-off-by: Joe Perches <joe@perches.com>
Acked-by: Guenter Roeck <linux@roeck-us.net>
Acked-by: Luca Tettamanti <kronos.it@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/hwmon/asus_atk0110.c | 2 +-
 include/linux/fs.h           | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hwmon/asus_atk0110.c b/drivers/hwmon/asus_atk0110.c
index ae208f612198..cccef87963e0 100644
--- a/drivers/hwmon/asus_atk0110.c
+++ b/drivers/hwmon/asus_atk0110.c
@@ -688,7 +688,7 @@ static int atk_debugfs_gitm_get(void *p, u64 *val)
 DEFINE_SIMPLE_ATTRIBUTE(atk_debugfs_gitm,
 			atk_debugfs_gitm_get,
 			NULL,
-			"0x%08llx\n")
+			"0x%08llx\n");
 
 static int atk_acpi_print(char *buf, size_t sz, union acpi_object *obj)
 {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2daccaf4b547..1ab6c6913040 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2688,7 +2688,7 @@ static const struct file_operations __fops = {				\
 	.read	 = simple_attr_read,					\
 	.write	 = simple_attr_write,					\
 	.llseek	 = generic_file_llseek,					\
-};
+}
 
 static inline __printf(1, 2)
 void __simple_attr_check_format(const char *fmt, ...)
-- 
cgit v1.2.3


From 90a856436ddafbe0c6f8c18d7fc21aed3784e227 Mon Sep 17 00:00:00 2001
From: Geoff Levand <geoff@infradead.org>
Date: Wed, 6 Aug 2014 16:08:47 -0700
Subject: include/linux/byteorder/generic.h: minor comment fix

Signed-off-by: Geoff Levand <geoff@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/byteorder/generic.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/byteorder/generic.h b/include/linux/byteorder/generic.h
index 0846e6b931ce..89f67c1c3160 100644
--- a/include/linux/byteorder/generic.h
+++ b/include/linux/byteorder/generic.h
@@ -2,7 +2,7 @@
 #define _LINUX_BYTEORDER_GENERIC_H
 
 /*
- * linux/byteorder_generic.h
+ * linux/byteorder/generic.h
  * Generic Byte-reordering support
  *
  * The "... p" macros, like le64_to_cpup, can be used with pointers
-- 
cgit v1.2.3


From 42a9dc0b3d0f749375c767c7d5cab56e89160576 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Wed, 6 Aug 2014 16:09:01 -0700
Subject: printk: rename DEFAULT_MESSAGE_LOGLEVEL

Commit a8fe19ebfbfd ("kernel/printk: use symbolic defines for console
loglevels") makes consistent use of symbolic values for printk() log
levels.

The naming scheme used is different from the one used for
DEFAULT_MESSAGE_LOGLEVEL though.  Change that symbol name to be
MESSAGE_LOGLEVEL_DEFAULT for consistency.  And because the value of that
symbol comes from a similarly-named config option, rename
CONFIG_DEFAULT_MESSAGE_LOGLEVEL as well.

Signed-off-by: Alex Elder <elder@linaro.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: Jan Kara <jack@suse.cz>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Petr Mladek <pmladek@suse.cz>
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/printk.h | 2 +-
 kernel/printk/printk.c | 2 +-
 lib/Kconfig.debug      | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/printk.h b/include/linux/printk.h
index 319ff7e53efb..0990997a5304 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -31,7 +31,7 @@ static inline const char *printk_skip_level(const char *buffer)
 }
 
 /* printk's without a loglevel use this.. */
-#define DEFAULT_MESSAGE_LOGLEVEL CONFIG_DEFAULT_MESSAGE_LOGLEVEL
+#define MESSAGE_LOGLEVEL_DEFAULT CONFIG_MESSAGE_LOGLEVEL_DEFAULT
 
 /* We show everything that is MORE important than this.. */
 #define CONSOLE_LOGLEVEL_SILENT  0 /* Mum's the word */
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index ec3bfb0b1f62..770ed4821ba9 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -56,7 +56,7 @@
 
 int console_printk[4] = {
 	CONSOLE_LOGLEVEL_DEFAULT,	/* console_loglevel */
-	DEFAULT_MESSAGE_LOGLEVEL,	/* default_message_loglevel */
+	MESSAGE_LOGLEVEL_DEFAULT,	/* default_message_loglevel */
 	CONSOLE_LOGLEVEL_MIN,		/* minimum_console_loglevel */
 	CONSOLE_LOGLEVEL_DEFAULT,	/* default_console_loglevel */
 };
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index cfe7df8f62cc..cb45f59685e6 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -15,7 +15,7 @@ config PRINTK_TIME
 	  The behavior is also controlled by the kernel command line
 	  parameter printk.time=1. See Documentation/kernel-parameters.txt
 
-config DEFAULT_MESSAGE_LOGLEVEL
+config MESSAGE_LOGLEVEL_DEFAULT
 	int "Default message log level (1-7)"
 	range 1 7
 	default "4"
-- 
cgit v1.2.3


From bc18dd335a161f9229ed3aaab88ce0706cbd9867 Mon Sep 17 00:00:00 2001
From: Ken Helias <kenhelias@firemail.de>
Date: Wed, 6 Aug 2014 16:09:14 -0700
Subject: list: make hlist_add_after() argument names match
 hlist_add_after_rcu()

The argument names for hlist_add_after() are poorly chosen because they
look the same as the ones for hlist_add_before() but have to be used
differently.

hlist_add_after_rcu() has made a better choice.

Signed-off-by: Ken Helias <kenhelias@firemail.de>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/list.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/list.h b/include/linux/list.h
index ef9594171062..624ec7f48293 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -654,15 +654,15 @@ static inline void hlist_add_before(struct hlist_node *n,
 	*(n->pprev) = n;
 }
 
-static inline void hlist_add_after(struct hlist_node *n,
-					struct hlist_node *next)
+static inline void hlist_add_after(struct hlist_node *prev,
+				   struct hlist_node *n)
 {
-	next->next = n->next;
-	n->next = next;
-	next->pprev = &n->next;
+	n->next = prev->next;
+	prev->next = n;
+	n->pprev = &prev->next;
 
-	if(next->next)
-		next->next->pprev  = &next->next;
+	if (n->next)
+		n->next->pprev  = &n->next;
 }
 
 /* after that we'll appear to be on some hlist and hlist_del will work */
-- 
cgit v1.2.3


From 1d023284c31a4e40a94d5bbcb7dbb7a35ee0bcbc Mon Sep 17 00:00:00 2001
From: Ken Helias <kenhelias@firemail.de>
Date: Wed, 6 Aug 2014 16:09:16 -0700
Subject: list: fix order of arguments for hlist_add_after(_rcu)

All other add functions for lists have the new item as first argument
and the position where it is added as second argument.  This was changed
for no good reason in this function and makes using it unnecessary
confusing.

The name was changed to hlist_add_behind() to cause unconverted code to
generate a compile error instead of using the wrong parameter order.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Ken Helias <kenhelias@firemail.de>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Acked-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>	[intel driver bits]
Cc: Hugh Dickins <hughd@google.com>
Cc: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/RCU/whatisRCU.txt                  | 2 +-
 drivers/gpu/drm/drm_hashtab.c                    | 2 +-
 drivers/net/ethernet/intel/i40e/i40e_ethtool.c   | 2 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c | 2 +-
 drivers/staging/lustre/lustre/libcfs/hash.c      | 4 ++--
 fs/namespace.c                                   | 2 +-
 fs/notify/inode_mark.c                           | 2 +-
 fs/notify/vfsmount_mark.c                        | 2 +-
 include/linux/list.h                             | 4 ++--
 include/linux/rculist.h                          | 8 ++++----
 net/batman-adv/fragmentation.c                   | 2 +-
 net/bridge/br_multicast.c                        | 2 +-
 net/ipv4/fib_trie.c                              | 2 +-
 net/ipv6/addrlabel.c                             | 2 +-
 net/xfrm/xfrm_policy.c                           | 4 ++--
 15 files changed, 21 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index 49b8551a3b68..e48c57f1943b 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -818,7 +818,7 @@ RCU pointer/list update:
 	list_add_tail_rcu
 	list_del_rcu
 	list_replace_rcu
-	hlist_add_after_rcu
+	hlist_add_behind_rcu
 	hlist_add_before_rcu
 	hlist_add_head_rcu
 	hlist_del_rcu
diff --git a/drivers/gpu/drm/drm_hashtab.c b/drivers/gpu/drm/drm_hashtab.c
index 7e4bae760e27..c3b80fd65d62 100644
--- a/drivers/gpu/drm/drm_hashtab.c
+++ b/drivers/gpu/drm/drm_hashtab.c
@@ -125,7 +125,7 @@ int drm_ht_insert_item(struct drm_open_hash *ht, struct drm_hash_item *item)
 		parent = &entry->head;
 	}
 	if (parent) {
-		hlist_add_after_rcu(parent, &item->head);
+		hlist_add_behind_rcu(&item->head, parent);
 	} else {
 		hlist_add_head_rcu(&item->head, h_list);
 	}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 681a9e81ff51..e8ba7470700a 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -1948,7 +1948,7 @@ static int i40e_update_ethtool_fdir_entry(struct i40e_vsi *vsi,
 
 	/* add filter to the list */
 	if (parent)
-		hlist_add_after(&parent->fdir_node, &input->fdir_node);
+		hlist_add_behind(&input->fdir_node, &parent->fdir_node);
 	else
 		hlist_add_head(&input->fdir_node,
 			       &pf->fdir_filter_list);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index 94a1c07efeb0..e4100b5737b6 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -2517,7 +2517,7 @@ static int ixgbe_update_ethtool_fdir_entry(struct ixgbe_adapter *adapter,
 
 	/* add filter to the list */
 	if (parent)
-		hlist_add_after(&parent->fdir_node, &input->fdir_node);
+		hlist_add_behind(&input->fdir_node, &parent->fdir_node);
 	else
 		hlist_add_head(&input->fdir_node,
 			       &adapter->fdir_filter_list);
diff --git a/drivers/staging/lustre/lustre/libcfs/hash.c b/drivers/staging/lustre/lustre/libcfs/hash.c
index 5dde79418297..8ef1deb59d4a 100644
--- a/drivers/staging/lustre/lustre/libcfs/hash.c
+++ b/drivers/staging/lustre/lustre/libcfs/hash.c
@@ -351,7 +351,7 @@ cfs_hash_dh_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd,
 					    cfs_hash_dhead_t, dh_head);
 
 	if (dh->dh_tail != NULL) /* not empty */
-		hlist_add_after(dh->dh_tail, hnode);
+		hlist_add_behind(hnode, dh->dh_tail);
 	else /* empty list */
 		hlist_add_head(hnode, &dh->dh_head);
 	dh->dh_tail = hnode;
@@ -406,7 +406,7 @@ cfs_hash_dd_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd,
 						cfs_hash_dhead_dep_t, dd_head);
 
 	if (dh->dd_tail != NULL) /* not empty */
-		hlist_add_after(dh->dd_tail, hnode);
+		hlist_add_behind(hnode, dh->dd_tail);
 	else /* empty list */
 		hlist_add_head(hnode, &dh->dd_head);
 	dh->dd_tail = hnode;
diff --git a/fs/namespace.c b/fs/namespace.c
index 182bc41cd887..2a1447c946e7 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -798,7 +798,7 @@ static void commit_tree(struct mount *mnt, struct mount *shadows)
 	list_splice(&head, n->list.prev);
 
 	if (shadows)
-		hlist_add_after_rcu(&shadows->mnt_hash, &mnt->mnt_hash);
+		hlist_add_behind_rcu(&mnt->mnt_hash, &shadows->mnt_hash);
 	else
 		hlist_add_head_rcu(&mnt->mnt_hash,
 				m_hash(&parent->mnt, mnt->mnt_mountpoint));
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index 74825be65b7b..9ce062218de9 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -232,7 +232,7 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
 
 	BUG_ON(last == NULL);
 	/* mark should be the last entry.  last is the current last entry */
-	hlist_add_after_rcu(&last->i.i_list, &mark->i.i_list);
+	hlist_add_behind_rcu(&mark->i.i_list, &last->i.i_list);
 out:
 	fsnotify_recalc_inode_mask_locked(inode);
 	spin_unlock(&inode->i_lock);
diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c
index 68ca5a8704b5..ac851e8376b1 100644
--- a/fs/notify/vfsmount_mark.c
+++ b/fs/notify/vfsmount_mark.c
@@ -191,7 +191,7 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
 
 	BUG_ON(last == NULL);
 	/* mark should be the last entry.  last is the current last entry */
-	hlist_add_after_rcu(&last->m.m_list, &mark->m.m_list);
+	hlist_add_behind_rcu(&mark->m.m_list, &last->m.m_list);
 out:
 	fsnotify_recalc_vfsmount_mask_locked(mnt);
 	spin_unlock(&mnt->mnt_root->d_lock);
diff --git a/include/linux/list.h b/include/linux/list.h
index 624ec7f48293..cbbb96fcead9 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -654,8 +654,8 @@ static inline void hlist_add_before(struct hlist_node *n,
 	*(n->pprev) = n;
 }
 
-static inline void hlist_add_after(struct hlist_node *prev,
-				   struct hlist_node *n)
+static inline void hlist_add_behind(struct hlist_node *n,
+				    struct hlist_node *prev)
 {
 	n->next = prev->next;
 	prev->next = n;
diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 8183b46fbaa2..372ad5e0dcb8 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -432,9 +432,9 @@ static inline void hlist_add_before_rcu(struct hlist_node *n,
 }
 
 /**
- * hlist_add_after_rcu
- * @prev: the existing element to add the new element after.
+ * hlist_add_behind_rcu
  * @n: the new element to add to the hash list.
+ * @prev: the existing element to add the new element after.
  *
  * Description:
  * Adds the specified element to the specified hlist
@@ -449,8 +449,8 @@ static inline void hlist_add_before_rcu(struct hlist_node *n,
  * hlist_for_each_entry_rcu(), used to prevent memory-consistency
  * problems on Alpha CPUs.
  */
-static inline void hlist_add_after_rcu(struct hlist_node *prev,
-				       struct hlist_node *n)
+static inline void hlist_add_behind_rcu(struct hlist_node *n,
+					struct hlist_node *prev)
 {
 	n->next = prev->next;
 	n->pprev = &prev->next;
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index 022d18ab27a6..52c43f904220 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -188,7 +188,7 @@ static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node,
 
 	/* Reached the end of the list, so insert after 'frag_entry_last'. */
 	if (likely(frag_entry_last)) {
-		hlist_add_after(&frag_entry_last->list, &frag_entry_new->list);
+		hlist_add_behind(&frag_entry_last->list, &frag_entry_new->list);
 		chain->size += skb->len - hdr_size;
 		chain->timestamp = jiffies;
 		ret = true;
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index b4845f4b2bb4..7751c92c8c57 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1174,7 +1174,7 @@ static void br_multicast_add_router(struct net_bridge *br,
 	}
 
 	if (slot)
-		hlist_add_after_rcu(slot, &port->rlist);
+		hlist_add_behind_rcu(&port->rlist, slot);
 	else
 		hlist_add_head_rcu(&port->rlist, &br->router_list);
 }
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 5afeb5aa4c7c..e9cb2588e416 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -940,7 +940,7 @@ static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new)
 			last = li;
 		}
 		if (last)
-			hlist_add_after_rcu(&last->hlist, &new->hlist);
+			hlist_add_behind_rcu(&new->hlist, &last->hlist);
 		else
 			hlist_add_before_rcu(&new->hlist, &li->hlist);
 	}
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index 731e1e1722d9..fd0dc47f471d 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -277,7 +277,7 @@ static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace)
 		last = p;
 	}
 	if (last)
-		hlist_add_after_rcu(&last->list, &newp->list);
+		hlist_add_behind_rcu(&newp->list, &last->list);
 	else
 		hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head);
 out:
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 0525d78ba328..beeed602aeb3 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -389,7 +389,7 @@ redo:
 			if (h != h0)
 				continue;
 			hlist_del(&pol->bydst);
-			hlist_add_after(entry0, &pol->bydst);
+			hlist_add_behind(&pol->bydst, entry0);
 		}
 		entry0 = &pol->bydst;
 	}
@@ -654,7 +654,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
 			break;
 	}
 	if (newpos)
-		hlist_add_after(newpos, &policy->bydst);
+		hlist_add_behind(&policy->bydst, newpos);
 	else
 		hlist_add_head(&policy->bydst, chain);
 	xfrm_pol_hold(policy);
-- 
cgit v1.2.3


From 0f9859ca92c9182bcb8f18c55cae1a04627cbb59 Mon Sep 17 00:00:00 2001
From: Ken Helias <kenhelias@firemail.de>
Date: Wed, 6 Aug 2014 16:09:18 -0700
Subject: klist: use same naming scheme as hlist for klist_add_after()

The name was modified from hlist_add_after() to hlist_add_behind() when
adjusting the order of arguments to match the one with
klist_add_after().  This is necessary to break old code when it would
use it the wrong way.

Make klist follow this naming scheme for consistency.

Signed-off-by: Ken Helias <kenhelias@firemail.de>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/klist.h | 2 +-
 lib/klist.c           | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/klist.h b/include/linux/klist.h
index a370ce57cf1d..61e5b723ae73 100644
--- a/include/linux/klist.h
+++ b/include/linux/klist.h
@@ -44,7 +44,7 @@ struct klist_node {
 
 extern void klist_add_tail(struct klist_node *n, struct klist *k);
 extern void klist_add_head(struct klist_node *n, struct klist *k);
-extern void klist_add_after(struct klist_node *n, struct klist_node *pos);
+extern void klist_add_behind(struct klist_node *n, struct klist_node *pos);
 extern void klist_add_before(struct klist_node *n, struct klist_node *pos);
 
 extern void klist_del(struct klist_node *n);
diff --git a/lib/klist.c b/lib/klist.c
index 358a368a2947..89b485a2a58d 100644
--- a/lib/klist.c
+++ b/lib/klist.c
@@ -140,11 +140,11 @@ void klist_add_tail(struct klist_node *n, struct klist *k)
 EXPORT_SYMBOL_GPL(klist_add_tail);
 
 /**
- * klist_add_after - Init a klist_node and add it after an existing node
+ * klist_add_behind - Init a klist_node and add it after an existing node
  * @n: node we're adding.
  * @pos: node to put @n after
  */
-void klist_add_after(struct klist_node *n, struct klist_node *pos)
+void klist_add_behind(struct klist_node *n, struct klist_node *pos)
 {
 	struct klist *k = knode_klist(pos);
 
@@ -153,7 +153,7 @@ void klist_add_after(struct klist_node *n, struct klist_node *pos)
 	list_add(&n->n_node, &pos->n_node);
 	spin_unlock(&k->k_lock);
 }
-EXPORT_SYMBOL_GPL(klist_add_after);
+EXPORT_SYMBOL_GPL(klist_add_behind);
 
 /**
  * klist_add_before - Init a klist_node and add it before an existing node
-- 
cgit v1.2.3


From 62e7ca5280fd8cbf523970757e13f0324ce0daa0 Mon Sep 17 00:00:00 2001
From: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Date: Wed, 6 Aug 2014 16:09:21 -0700
Subject: zlib: clean up some dead code

Cleanup unused `if 0'-ed functions, which have been dead since 2006
(commits 87c2ce3b9305 ("lib/zlib*: cleanups") by Adrian Bunk and
4f3865fb57a0 ("zlib_inflate: Upgrade library code to a recent version")
by Richard Purdie):

 - zlib_deflateSetDictionary
 - zlib_deflateParams
 - zlib_deflateCopy
 - zlib_inflateSync
 - zlib_syncsearch
 - zlib_inflateSetDictionary
 - zlib_inflatePrime

Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/zlib.h       | 118 -------------------------------------
 lib/zlib_deflate/deflate.c | 143 ---------------------------------------------
 lib/zlib_inflate/inflate.c | 132 -----------------------------------------
 3 files changed, 393 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/zlib.h b/include/linux/zlib.h
index 9c5a6b4de0a3..197abb2a54c5 100644
--- a/include/linux/zlib.h
+++ b/include/linux/zlib.h
@@ -493,64 +493,6 @@ extern int deflateInit2 (z_streamp strm,
    method). msg is set to null if there is no error message.  deflateInit2 does
    not perform any compression: this will be done by deflate().
 */
-                            
-#if 0
-extern int zlib_deflateSetDictionary (z_streamp strm,
-						     const Byte *dictionary,
-						     uInt  dictLength);
-#endif
-/*
-     Initializes the compression dictionary from the given byte sequence
-   without producing any compressed output. This function must be called
-   immediately after deflateInit, deflateInit2 or deflateReset, before any
-   call of deflate. The compressor and decompressor must use exactly the same
-   dictionary (see inflateSetDictionary).
-
-     The dictionary should consist of strings (byte sequences) that are likely
-   to be encountered later in the data to be compressed, with the most commonly
-   used strings preferably put towards the end of the dictionary. Using a
-   dictionary is most useful when the data to be compressed is short and can be
-   predicted with good accuracy; the data can then be compressed better than
-   with the default empty dictionary.
-
-     Depending on the size of the compression data structures selected by
-   deflateInit or deflateInit2, a part of the dictionary may in effect be
-   discarded, for example if the dictionary is larger than the window size in
-   deflate or deflate2. Thus the strings most likely to be useful should be
-   put at the end of the dictionary, not at the front.
-
-     Upon return of this function, strm->adler is set to the Adler32 value
-   of the dictionary; the decompressor may later use this value to determine
-   which dictionary has been used by the compressor. (The Adler32 value
-   applies to the whole dictionary even if only a subset of the dictionary is
-   actually used by the compressor.)
-
-     deflateSetDictionary returns Z_OK if success, or Z_STREAM_ERROR if a
-   parameter is invalid (such as NULL dictionary) or the stream state is
-   inconsistent (for example if deflate has already been called for this stream
-   or if the compression method is bsort). deflateSetDictionary does not
-   perform any compression: this will be done by deflate().
-*/
-
-#if 0
-extern int zlib_deflateCopy (z_streamp dest, z_streamp source);
-#endif
-
-/*
-     Sets the destination stream as a complete copy of the source stream.
-
-     This function can be useful when several compression strategies will be
-   tried, for example when there are several ways of pre-processing the input
-   data with a filter. The streams that will be discarded should then be freed
-   by calling deflateEnd.  Note that deflateCopy duplicates the internal
-   compression state which can be quite large, so this strategy is slow and
-   can consume lots of memory.
-
-     deflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not
-   enough memory, Z_STREAM_ERROR if the source stream state was inconsistent
-   (such as zalloc being NULL). msg is left unchanged in both source and
-   destination.
-*/
 
 extern int zlib_deflateReset (z_streamp strm);
 /*
@@ -568,27 +510,6 @@ static inline unsigned long deflateBound(unsigned long s)
 	return s + ((s + 7) >> 3) + ((s + 63) >> 6) + 11;
 }
 
-#if 0
-extern int zlib_deflateParams (z_streamp strm, int level, int strategy);
-#endif
-/*
-     Dynamically update the compression level and compression strategy.  The
-   interpretation of level and strategy is as in deflateInit2.  This can be
-   used to switch between compression and straight copy of the input data, or
-   to switch to a different kind of input data requiring a different
-   strategy. If the compression level is changed, the input available so far
-   is compressed with the old level (and may be flushed); the new level will
-   take effect only at the next call of deflate().
-
-     Before the call of deflateParams, the stream state must be set as for
-   a call of deflate(), since the currently available input may have to
-   be compressed and flushed. In particular, strm->avail_out must be non-zero.
-
-     deflateParams returns Z_OK if success, Z_STREAM_ERROR if the source
-   stream state was inconsistent or if a parameter was invalid, Z_BUF_ERROR
-   if strm->avail_out was zero.
-*/
-
 /*   
 extern int inflateInit2 (z_streamp strm, int  windowBits);
 
@@ -631,45 +552,6 @@ extern int inflateInit2 (z_streamp strm, int  windowBits);
    and avail_out are unchanged.)
 */
 
-extern int zlib_inflateSetDictionary (z_streamp strm,
-						     const Byte *dictionary,
-						     uInt  dictLength);
-/*
-     Initializes the decompression dictionary from the given uncompressed byte
-   sequence. This function must be called immediately after a call of inflate,
-   if that call returned Z_NEED_DICT. The dictionary chosen by the compressor
-   can be determined from the adler32 value returned by that call of inflate.
-   The compressor and decompressor must use exactly the same dictionary (see
-   deflateSetDictionary).  For raw inflate, this function can be called
-   immediately after inflateInit2() or inflateReset() and before any call of
-   inflate() to set the dictionary.  The application must insure that the
-   dictionary that was used for compression is provided.
-
-     inflateSetDictionary returns Z_OK if success, Z_STREAM_ERROR if a
-   parameter is invalid (such as NULL dictionary) or the stream state is
-   inconsistent, Z_DATA_ERROR if the given dictionary doesn't match the
-   expected one (incorrect adler32 value). inflateSetDictionary does not
-   perform any decompression: this will be done by subsequent calls of
-   inflate().
-*/
-
-#if 0
-extern int zlib_inflateSync (z_streamp strm);
-#endif
-/* 
-    Skips invalid compressed data until a full flush point (see above the
-  description of deflate with Z_FULL_FLUSH) can be found, or until all
-  available input is skipped. No output is provided.
-
-    inflateSync returns Z_OK if a full flush point has been found, Z_BUF_ERROR
-  if no more input was provided, Z_DATA_ERROR if no flush point has been found,
-  or Z_STREAM_ERROR if the stream structure was inconsistent. In the success
-  case, the application may save the current current value of total_in which
-  indicates where valid compressed data was found. In the error case, the
-  application may repeatedly call inflateSync, providing more input each time,
-  until success or end of the input data.
-*/
-
 extern int zlib_inflateReset (z_streamp strm);
 /*
      This function is equivalent to inflateEnd followed by inflateInit,
diff --git a/lib/zlib_deflate/deflate.c b/lib/zlib_deflate/deflate.c
index d63381e8e333..d20ef458f137 100644
--- a/lib/zlib_deflate/deflate.c
+++ b/lib/zlib_deflate/deflate.c
@@ -249,52 +249,6 @@ int zlib_deflateInit2(
     return zlib_deflateReset(strm);
 }
 
-/* ========================================================================= */
-#if 0
-int zlib_deflateSetDictionary(
-	z_streamp strm,
-	const Byte *dictionary,
-	uInt  dictLength
-)
-{
-    deflate_state *s;
-    uInt length = dictLength;
-    uInt n;
-    IPos hash_head = 0;
-
-    if (strm == NULL || strm->state == NULL || dictionary == NULL)
-	return Z_STREAM_ERROR;
-
-    s = (deflate_state *) strm->state;
-    if (s->status != INIT_STATE) return Z_STREAM_ERROR;
-
-    strm->adler = zlib_adler32(strm->adler, dictionary, dictLength);
-
-    if (length < MIN_MATCH) return Z_OK;
-    if (length > MAX_DIST(s)) {
-	length = MAX_DIST(s);
-#ifndef USE_DICT_HEAD
-	dictionary += dictLength - length; /* use the tail of the dictionary */
-#endif
-    }
-    memcpy((char *)s->window, dictionary, length);
-    s->strstart = length;
-    s->block_start = (long)length;
-
-    /* Insert all strings in the hash table (except for the last two bytes).
-     * s->lookahead stays null, so s->ins_h will be recomputed at the next
-     * call of fill_window.
-     */
-    s->ins_h = s->window[0];
-    UPDATE_HASH(s, s->ins_h, s->window[1]);
-    for (n = 0; n <= length - MIN_MATCH; n++) {
-	INSERT_STRING(s, n, hash_head);
-    }
-    if (hash_head) hash_head = 0;  /* to make compiler happy */
-    return Z_OK;
-}
-#endif  /*  0  */
-
 /* ========================================================================= */
 int zlib_deflateReset(
 	z_streamp strm
@@ -326,45 +280,6 @@ int zlib_deflateReset(
     return Z_OK;
 }
 
-/* ========================================================================= */
-#if 0
-int zlib_deflateParams(
-	z_streamp strm,
-	int level,
-	int strategy
-)
-{
-    deflate_state *s;
-    compress_func func;
-    int err = Z_OK;
-
-    if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR;
-    s = (deflate_state *) strm->state;
-
-    if (level == Z_DEFAULT_COMPRESSION) {
-	level = 6;
-    }
-    if (level < 0 || level > 9 || strategy < 0 || strategy > Z_HUFFMAN_ONLY) {
-	return Z_STREAM_ERROR;
-    }
-    func = configuration_table[s->level].func;
-
-    if (func != configuration_table[level].func && strm->total_in != 0) {
-	/* Flush the last buffer: */
-	err = zlib_deflate(strm, Z_PARTIAL_FLUSH);
-    }
-    if (s->level != level) {
-	s->level = level;
-	s->max_lazy_match   = configuration_table[level].max_lazy;
-	s->good_match       = configuration_table[level].good_length;
-	s->nice_match       = configuration_table[level].nice_length;
-	s->max_chain_length = configuration_table[level].max_chain;
-    }
-    s->strategy = strategy;
-    return err;
-}
-#endif  /*  0  */
-
 /* =========================================================================
  * Put a short in the pending buffer. The 16-bit value is put in MSB order.
  * IN assertion: the stream state is correct and there is enough room in
@@ -568,64 +483,6 @@ int zlib_deflateEnd(
     return status == BUSY_STATE ? Z_DATA_ERROR : Z_OK;
 }
 
-/* =========================================================================
- * Copy the source state to the destination state.
- */
-#if 0
-int zlib_deflateCopy (
-	z_streamp dest,
-	z_streamp source
-)
-{
-#ifdef MAXSEG_64K
-    return Z_STREAM_ERROR;
-#else
-    deflate_state *ds;
-    deflate_state *ss;
-    ush *overlay;
-    deflate_workspace *mem;
-
-
-    if (source == NULL || dest == NULL || source->state == NULL) {
-        return Z_STREAM_ERROR;
-    }
-
-    ss = (deflate_state *) source->state;
-
-    *dest = *source;
-
-    mem = (deflate_workspace *) dest->workspace;
-
-    ds = &(mem->deflate_memory);
-
-    dest->state = (struct internal_state *) ds;
-    *ds = *ss;
-    ds->strm = dest;
-
-    ds->window = (Byte *) mem->window_memory;
-    ds->prev   = (Pos *)  mem->prev_memory;
-    ds->head   = (Pos *)  mem->head_memory;
-    overlay = (ush *) mem->overlay_memory;
-    ds->pending_buf = (uch *) overlay;
-
-    memcpy(ds->window, ss->window, ds->w_size * 2 * sizeof(Byte));
-    memcpy(ds->prev, ss->prev, ds->w_size * sizeof(Pos));
-    memcpy(ds->head, ss->head, ds->hash_size * sizeof(Pos));
-    memcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size);
-
-    ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf);
-    ds->d_buf = overlay + ds->lit_bufsize/sizeof(ush);
-    ds->l_buf = ds->pending_buf + (1+sizeof(ush))*ds->lit_bufsize;
-
-    ds->l_desc.dyn_tree = ds->dyn_ltree;
-    ds->d_desc.dyn_tree = ds->dyn_dtree;
-    ds->bl_desc.dyn_tree = ds->bl_tree;
-
-    return Z_OK;
-#endif
-}
-#endif  /*  0  */
-
 /* ===========================================================================
  * Read a new buffer from the current input stream, update the adler32
  * and total number of bytes read.  All deflate() input goes through
diff --git a/lib/zlib_inflate/inflate.c b/lib/zlib_inflate/inflate.c
index f5ce87b0800e..58a733b10387 100644
--- a/lib/zlib_inflate/inflate.c
+++ b/lib/zlib_inflate/inflate.c
@@ -45,21 +45,6 @@ int zlib_inflateReset(z_streamp strm)
     return Z_OK;
 }
 
-#if 0
-int zlib_inflatePrime(z_streamp strm, int bits, int value)
-{
-    struct inflate_state *state;
-
-    if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR;
-    state = (struct inflate_state *)strm->state;
-    if (bits > 16 || state->bits + bits > 32) return Z_STREAM_ERROR;
-    value &= (1L << bits) - 1;
-    state->hold += value << state->bits;
-    state->bits += bits;
-    return Z_OK;
-}
-#endif
-
 int zlib_inflateInit2(z_streamp strm, int windowBits)
 {
     struct inflate_state *state;
@@ -761,123 +746,6 @@ int zlib_inflateEnd(z_streamp strm)
     return Z_OK;
 }
 
-#if 0
-int zlib_inflateSetDictionary(z_streamp strm, const Byte *dictionary,
-        uInt dictLength)
-{
-    struct inflate_state *state;
-    unsigned long id;
-
-    /* check state */
-    if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR;
-    state = (struct inflate_state *)strm->state;
-    if (state->wrap != 0 && state->mode != DICT)
-        return Z_STREAM_ERROR;
-
-    /* check for correct dictionary id */
-    if (state->mode == DICT) {
-        id = zlib_adler32(0L, NULL, 0);
-        id = zlib_adler32(id, dictionary, dictLength);
-        if (id != state->check)
-            return Z_DATA_ERROR;
-    }
-
-    /* copy dictionary to window */
-    zlib_updatewindow(strm, strm->avail_out);
-
-    if (dictLength > state->wsize) {
-        memcpy(state->window, dictionary + dictLength - state->wsize,
-                state->wsize);
-        state->whave = state->wsize;
-    }
-    else {
-        memcpy(state->window + state->wsize - dictLength, dictionary,
-                dictLength);
-        state->whave = dictLength;
-    }
-    state->havedict = 1;
-    return Z_OK;
-}
-#endif
-
-#if 0
-/*
-   Search buf[0..len-1] for the pattern: 0, 0, 0xff, 0xff.  Return when found
-   or when out of input.  When called, *have is the number of pattern bytes
-   found in order so far, in 0..3.  On return *have is updated to the new
-   state.  If on return *have equals four, then the pattern was found and the
-   return value is how many bytes were read including the last byte of the
-   pattern.  If *have is less than four, then the pattern has not been found
-   yet and the return value is len.  In the latter case, zlib_syncsearch() can be
-   called again with more data and the *have state.  *have is initialized to
-   zero for the first call.
- */
-static unsigned zlib_syncsearch(unsigned *have, unsigned char *buf,
-        unsigned len)
-{
-    unsigned got;
-    unsigned next;
-
-    got = *have;
-    next = 0;
-    while (next < len && got < 4) {
-        if ((int)(buf[next]) == (got < 2 ? 0 : 0xff))
-            got++;
-        else if (buf[next])
-            got = 0;
-        else
-            got = 4 - got;
-        next++;
-    }
-    *have = got;
-    return next;
-}
-#endif
-
-#if 0
-int zlib_inflateSync(z_streamp strm)
-{
-    unsigned len;               /* number of bytes to look at or looked at */
-    unsigned long in, out;      /* temporary to save total_in and total_out */
-    unsigned char buf[4];       /* to restore bit buffer to byte string */
-    struct inflate_state *state;
-
-    /* check parameters */
-    if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR;
-    state = (struct inflate_state *)strm->state;
-    if (strm->avail_in == 0 && state->bits < 8) return Z_BUF_ERROR;
-
-    /* if first time, start search in bit buffer */
-    if (state->mode != SYNC) {
-        state->mode = SYNC;
-        state->hold <<= state->bits & 7;
-        state->bits -= state->bits & 7;
-        len = 0;
-        while (state->bits >= 8) {
-            buf[len++] = (unsigned char)(state->hold);
-            state->hold >>= 8;
-            state->bits -= 8;
-        }
-        state->have = 0;
-        zlib_syncsearch(&(state->have), buf, len);
-    }
-
-    /* search available input */
-    len = zlib_syncsearch(&(state->have), strm->next_in, strm->avail_in);
-    strm->avail_in -= len;
-    strm->next_in += len;
-    strm->total_in += len;
-
-    /* return no joy or set up to restart inflate() on a new block */
-    if (state->have != 4) return Z_DATA_ERROR;
-    in = strm->total_in;  out = strm->total_out;
-    zlib_inflateReset(strm);
-    strm->total_in = in;  strm->total_out = out;
-    state->mode = TYPE;
-    return Z_OK;
-}
-#endif
-
 /*
  * This subroutine adds the data at next_in/avail_in to the output history
  * without performing any output.  The output buffer must be "caught up";
-- 
cgit v1.2.3


From b01250856b25f4417c51aa33afc451fbf7da1484 Mon Sep 17 00:00:00 2001
From: George Spelvin <linux@horizon.com>
Date: Wed, 6 Aug 2014 16:09:23 -0700
Subject: lib: add lib/glob.c

This is a helper function from drivers/ata/libata_core.c, where it is
used to blacklist particular device models.  It's being moved to lib/ so
other drivers may use it for the same purpose.

This implementation in non-recursive, so is safe for the kernel stack.

[akpm@linux-foundation.org: fix sparse warning]
Signed-off-by: George Spelvin <linux@horizon.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/glob.h |   9 ++++
 lib/Kconfig          |  19 ++++++++
 lib/Makefile         |   2 +
 lib/glob.c           | 123 +++++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 153 insertions(+)
 create mode 100644 include/linux/glob.h
 create mode 100644 lib/glob.c

(limited to 'include/linux')

diff --git a/include/linux/glob.h b/include/linux/glob.h
new file mode 100644
index 000000000000..861d8347d08e
--- /dev/null
+++ b/include/linux/glob.h
@@ -0,0 +1,9 @@
+#ifndef _LINUX_GLOB_H
+#define _LINUX_GLOB_H
+
+#include <linux/types.h>	/* For bool */
+#include <linux/compiler.h>	/* For __pure */
+
+bool __pure glob_match(char const *pat, char const *str);
+
+#endif	/* _LINUX_GLOB_H */
diff --git a/lib/Kconfig b/lib/Kconfig
index a8a775730c09..41bfeec72e40 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -396,6 +396,25 @@ config CPU_RMAP
 config DQL
 	bool
 
+config GLOB
+	bool
+#	This actually supports modular compilation, but the module overhead
+#	is ridiculous for the amount of code involved.	Until an out-of-tree
+#	driver asks for it, we'll just link it directly it into the kernel
+#	when required.  Since we're ignoring out-of-tree users,	there's also
+#	no need bother prompting for a manual decision:
+#	prompt "glob_match() function"
+	help
+	  This option provides a glob_match function for performing
+	  simple text pattern matching.  It originated in the ATA code
+	  to blacklist particular drive models, but other device drivers
+	  may need similar functionality.
+
+	  All drivers in the Linux kernel tree that require this function
+	  should automatically select this option.  Say N unless you
+	  are compiling an out-of tree driver which tells you that it
+	  depends on this.
+
 #
 # Netlink attribute parsing support is select'ed if needed
 #
diff --git a/lib/Makefile b/lib/Makefile
index 8427df95dade..d6b4bc496408 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -137,6 +137,8 @@ obj-$(CONFIG_CORDIC) += cordic.o
 
 obj-$(CONFIG_DQL) += dynamic_queue_limits.o
 
+obj-$(CONFIG_GLOB) += glob.o
+
 obj-$(CONFIG_MPILIB) += mpi/
 obj-$(CONFIG_SIGNATURE) += digsig.o
 
diff --git a/lib/glob.c b/lib/glob.c
new file mode 100644
index 000000000000..0ba3ea86b546
--- /dev/null
+++ b/lib/glob.c
@@ -0,0 +1,123 @@
+#include <linux/module.h>
+#include <linux/glob.h>
+
+/*
+ * The only reason this code can be compiled as a module is because the
+ * ATA code that depends on it can be as well.  In practice, they're
+ * both usually compiled in and the module overhead goes away.
+ */
+MODULE_DESCRIPTION("glob(7) matching");
+MODULE_LICENSE("Dual MIT/GPL");
+
+/**
+ * glob_match - Shell-style pattern matching, like !fnmatch(pat, str, 0)
+ * @pat: Shell-style pattern to match, e.g. "*.[ch]".
+ * @str: String to match.  The pattern must match the entire string.
+ *
+ * Perform shell-style glob matching, returning true (1) if the match
+ * succeeds, or false (0) if it fails.  Equivalent to !fnmatch(@pat, @str, 0).
+ *
+ * Pattern metacharacters are ?, *, [ and \.
+ * (And, inside character classes, !, - and ].)
+ *
+ * This is small and simple implementation intended for device blacklists
+ * where a string is matched against a number of patterns.  Thus, it
+ * does not preprocess the patterns.  It is non-recursive, and run-time
+ * is at most quadratic: strlen(@str)*strlen(@pat).
+ *
+ * An example of the worst case is glob_match("*aaaaa", "aaaaaaaaaa");
+ * it takes 6 passes over the pattern before matching the string.
+ *
+ * Like !fnmatch(@pat, @str, 0) and unlike the shell, this does NOT
+ * treat / or leading . specially; it isn't actually used for pathnames.
+ *
+ * Note that according to glob(7) (and unlike bash), character classes
+ * are complemented by a leading !; this does not support the regex-style
+ * [^a-z] syntax.
+ *
+ * An opening bracket without a matching close is matched literally.
+ */
+bool __pure glob_match(char const *pat, char const *str)
+{
+	/*
+	 * Backtrack to previous * on mismatch and retry starting one
+	 * character later in the string.  Because * matches all characters
+	 * (no exception for /), it can be easily proved that there's
+	 * never a need to backtrack multiple levels.
+	 */
+	char const *back_pat = NULL, *back_str = back_str;
+
+	/*
+	 * Loop over each token (character or class) in pat, matching
+	 * it against the remaining unmatched tail of str.  Return false
+	 * on mismatch, or true after matching the trailing nul bytes.
+	 */
+	for (;;) {
+		unsigned char c = *str++;
+		unsigned char d = *pat++;
+
+		switch (d) {
+		case '?':	/* Wildcard: anything but nul */
+			if (c == '\0')
+				return false;
+			break;
+		case '*':	/* Any-length wildcard */
+			if (*pat == '\0')	/* Optimize trailing * case */
+				return true;
+			back_pat = pat;
+			back_str = --str;	/* Allow zero-length match */
+			break;
+		case '[': {	/* Character class */
+			bool match = false, inverted = (*pat == '!');
+			char const *class = pat + inverted;
+			unsigned char a = *class++;
+
+			/*
+			 * Iterate over each span in the character class.
+			 * A span is either a single character a, or a
+			 * range a-b.  The first span may begin with ']'.
+			 */
+			do {
+				unsigned char b = a;
+
+				if (a == '\0')	/* Malformed */
+					goto literal;
+
+				if (class[0] == '-' && class[1] != ']') {
+					b = class[1];
+
+					if (b == '\0')
+						goto literal;
+
+					class += 2;
+					/* Any special action if a > b? */
+				}
+				match |= (a <= c && c <= b);
+			} while ((a = *class++) != ']');
+
+			if (match == inverted)
+				goto backtrack;
+			pat = class;
+			}
+			break;
+		case '\\':
+			d = *pat++;
+			/*FALLTHROUGH*/
+		default:	/* Literal character */
+literal:
+			if (c == d) {
+				if (d == '\0')
+					return true;
+				break;
+			}
+backtrack:
+			if (c == '\0' || !back_pat)
+				return false;	/* No point continuing */
+			/* Try again from last *, one character later in str. */
+			pat = back_pat;
+			str = ++back_str;
+			break;
+		}
+	}
+}
+EXPORT_SYMBOL(glob_match);
-- 
cgit v1.2.3


From 087face5265026d4fe664bdb580f4904bd10cfbf Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Wed, 6 Aug 2014 16:09:36 -0700
Subject: kernel.h: remove deprecated pack_hex_byte

It's been nearly 3 years now since commit 55036ba76b2d ("lib: rename
pack_hex_byte() to hex_byte_pack()") so it's time to remove this
deprecated and unused static inline.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index a9e2268ecccb..3dc22abbc68a 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -493,11 +493,6 @@ static inline char *hex_byte_pack_upper(char *buf, u8 byte)
 	return buf;
 }
 
-static inline char * __deprecated pack_hex_byte(char *buf, u8 byte)
-{
-	return hex_byte_pack(buf, byte);
-}
-
 extern int hex_to_bin(char ch);
 extern int __must_check hex2bin(u8 *dst, const char *src, size_t count);
 
-- 
cgit v1.2.3


From 0679cc483669d08153d158273455398a389ee9ca Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Wed, 6 Aug 2014 16:09:49 -0700
Subject: lib: bitmap: make nbits parameter of bitmap_empty unsigned

Many functions in lib/bitmap.c start with an expression such as lim =
bits/BITS_PER_LONG.  Since bits has type (signed) int, and since gcc
cannot know that it is in fact non-negative, it generates worse code
than it could.  These patches, mostly consisting of changing various
parameters to unsigned, gives a slight overall code reduction:

  add/remove: 1/1 grow/shrink: 8/16 up/down: 251/-414 (-163)
  function                                     old     new   delta
  tick_device_uses_broadcast                   335     425     +90
  __irq_alloc_descs                            498     554     +56
  __bitmap_andnot                               73     115     +42
  __bitmap_and                                  70     101     +31
  bitmap_weight                                  -      11     +11
  copy_hugetlb_page_range                      752     762     +10
  follow_hugetlb_page                          846     854      +8
  hugetlb_init                                1415    1417      +2
  hugetlb_nrpages_setup                        130     131      +1
  hugetlb_add_hstate                           377     376      -1
  bitmap_allocate_region                        82      80      -2
  select_task_rq_fair                         2202    2191     -11
  hweight_long                                  66      55     -11
  __reg_op                                     230     219     -11
  dm_stats_message                            2849    2833     -16
  bitmap_parselist                              92      74     -18
  __bitmap_weight                              115      97     -18
  __bitmap_subset                              153     129     -24
  __bitmap_full                                128     104     -24
  __bitmap_empty                               120      96     -24
  bitmap_set                                   179     149     -30
  bitmap_clear                                 185     155     -30
  __bitmap_equal                               136     105     -31
  __bitmap_intersects                          148     108     -40
  __bitmap_complement                          109      67     -42
  tick_device_setup_broadcast_func.isra         81       -     -81

[The increases in __bitmap_and{,not} are due to bug fixes 17/18,18/18.
No idea why bitmap_weight suddenly appears.] While 163 bytes treewide is
insignificant, I believe the bitmap functions are often called with
locks held, so saving even a few cycles might be worth it.

While making these changes, I found a few other things that might be
worth including.  16,17,18 are actual bug fixes.  The rest shouldn't
change the behaviour of any of the functions, provided no-one passed
negative nbits values.  If something should come up, it should be fairly
bisectable.

A few issues I thought about, but didn't know what to do with:

* Many of the functions misbehave if nbits is compile-time 0; the
  out-of-line functions generally handle 0 correctly.  bitmap_fill() is
  particularly bad, whether the 0 is known at compile time or not.  It
  would probably be nice to add detection of at least compile-time 0 and
  handle that appropriately.

* I didn't change __bitmap_shift_{left,right} to use unsigned because I
  want to fully understand why the algorithm works before making that
  change.  However, AFAICT, they behave correctly for all (positive) shift
  amounts.  This is not the case for the small_const_nbits versions.  If
  for example nbits = n = BITS_PER_LONG, the shift operators turn into
  no-ops (at least on x86), so one get *dst = *src, whereas one would
  expect to get *dst=0.  That difference in behaviour is somewhat
  annoying.

This patch (of 18):

The compiler can generate slightly smaller and simpler code when it
knows that "nbits" is non-negative.  Since no-one passes a negative
bit-count, this shouldn't affect the semantics.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitmap.h | 4 ++--
 lib/bitmap.c           | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 7ad634501e48..3d3fd6b2f157 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -88,7 +88,7 @@
  * lib/bitmap.c provides these functions:
  */
 
-extern int __bitmap_empty(const unsigned long *bitmap, int bits);
+extern int __bitmap_empty(const unsigned long *bitmap, unsigned int nbits);
 extern int __bitmap_full(const unsigned long *bitmap, int bits);
 extern int __bitmap_equal(const unsigned long *bitmap1,
                 	const unsigned long *bitmap2, int bits);
@@ -257,7 +257,7 @@ static inline int bitmap_subset(const unsigned long *src1,
 		return __bitmap_subset(src1, src2, nbits);
 }
 
-static inline int bitmap_empty(const unsigned long *src, int nbits)
+static inline int bitmap_empty(const unsigned long *src, unsigned nbits)
 {
 	if (small_const_nbits(nbits))
 		return ! (*src & BITMAP_LAST_WORD_MASK(nbits));
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 06f7e4fe8d2d..378911001442 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -40,9 +40,9 @@
  * for the best explanations of this ordering.
  */
 
-int __bitmap_empty(const unsigned long *bitmap, int bits)
+int __bitmap_empty(const unsigned long *bitmap, unsigned int bits)
 {
-	int k, lim = bits/BITS_PER_LONG;
+	unsigned int k, lim = bits/BITS_PER_LONG;
 	for (k = 0; k < lim; ++k)
 		if (bitmap[k])
 			return 0;
-- 
cgit v1.2.3


From 8397927c8045c58afc68ef839855eb5505259df3 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Wed, 6 Aug 2014 16:09:51 -0700
Subject: lib: bitmap: make nbits parameter of bitmap_full unsigned

The compiler can generate slightly smaller and simpler code when it
knows that "nbits" is non-negative.  Since no-one passes a negative
bit-count, this shouldn't affect the semantics.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitmap.h | 4 ++--
 lib/bitmap.c           | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 3d3fd6b2f157..bc7e520d3f78 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -89,7 +89,7 @@
  */
 
 extern int __bitmap_empty(const unsigned long *bitmap, unsigned int nbits);
-extern int __bitmap_full(const unsigned long *bitmap, int bits);
+extern int __bitmap_full(const unsigned long *bitmap, unsigned int nbits);
 extern int __bitmap_equal(const unsigned long *bitmap1,
                 	const unsigned long *bitmap2, int bits);
 extern void __bitmap_complement(unsigned long *dst, const unsigned long *src,
@@ -265,7 +265,7 @@ static inline int bitmap_empty(const unsigned long *src, unsigned nbits)
 		return __bitmap_empty(src, nbits);
 }
 
-static inline int bitmap_full(const unsigned long *src, int nbits)
+static inline int bitmap_full(const unsigned long *src, unsigned int nbits)
 {
 	if (small_const_nbits(nbits))
 		return ! (~(*src) & BITMAP_LAST_WORD_MASK(nbits));
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 378911001442..9859f38660f9 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -55,9 +55,9 @@ int __bitmap_empty(const unsigned long *bitmap, unsigned int bits)
 }
 EXPORT_SYMBOL(__bitmap_empty);
 
-int __bitmap_full(const unsigned long *bitmap, int bits)
+int __bitmap_full(const unsigned long *bitmap, unsigned int bits)
 {
-	int k, lim = bits/BITS_PER_LONG;
+	unsigned int k, lim = bits/BITS_PER_LONG;
 	for (k = 0; k < lim; ++k)
 		if (~bitmap[k])
 			return 0;
-- 
cgit v1.2.3


From 5e068069319a9fb02fb14337c2cedeae5f16d812 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Wed, 6 Aug 2014 16:09:53 -0700
Subject: lib: bitmap: make nbits parameter of bitmap_equal unsigned

The compiler can generate slightly smaller and simpler code when it
knows that "nbits" is non-negative.  Since no-one passes a negative
bit-count, this shouldn't affect the semantics.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitmap.h | 2 +-
 lib/bitmap.c           | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index bc7e520d3f78..1e0f46c91125 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -91,7 +91,7 @@
 extern int __bitmap_empty(const unsigned long *bitmap, unsigned int nbits);
 extern int __bitmap_full(const unsigned long *bitmap, unsigned int nbits);
 extern int __bitmap_equal(const unsigned long *bitmap1,
-                	const unsigned long *bitmap2, int bits);
+			  const unsigned long *bitmap2, unsigned int nbits);
 extern void __bitmap_complement(unsigned long *dst, const unsigned long *src,
 			int bits);
 extern void __bitmap_shift_right(unsigned long *dst,
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 9859f38660f9..d6bb955e71cb 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -71,9 +71,9 @@ int __bitmap_full(const unsigned long *bitmap, unsigned int bits)
 EXPORT_SYMBOL(__bitmap_full);
 
 int __bitmap_equal(const unsigned long *bitmap1,
-		const unsigned long *bitmap2, int bits)
+		const unsigned long *bitmap2, unsigned int bits)
 {
-	int k, lim = bits/BITS_PER_LONG;
+	unsigned int k, lim = bits/BITS_PER_LONG;
 	for (k = 0; k < lim; ++k)
 		if (bitmap1[k] != bitmap2[k])
 			return 0;
-- 
cgit v1.2.3


From 3d6684f4e6a46f3a8263f5681e093bccbb767a1c Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Wed, 6 Aug 2014 16:09:55 -0700
Subject: lib: bitmap: make nbits parameter of bitmap_complement unsigned

The compiler can generate slightly smaller and simpler code when it
knows that "nbits" is non-negative.  Since no-one passes a negative
bit-count, this shouldn't affect the semantics.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitmap.h | 6 +++---
 lib/bitmap.c           | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 1e0f46c91125..21fb52ffe444 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -93,7 +93,7 @@ extern int __bitmap_full(const unsigned long *bitmap, unsigned int nbits);
 extern int __bitmap_equal(const unsigned long *bitmap1,
 			  const unsigned long *bitmap2, unsigned int nbits);
 extern void __bitmap_complement(unsigned long *dst, const unsigned long *src,
-			int bits);
+			unsigned int nbits);
 extern void __bitmap_shift_right(unsigned long *dst,
                         const unsigned long *src, int shift, int bits);
 extern void __bitmap_shift_left(unsigned long *dst,
@@ -222,7 +222,7 @@ static inline int bitmap_andnot(unsigned long *dst, const unsigned long *src1,
 }
 
 static inline void bitmap_complement(unsigned long *dst, const unsigned long *src,
-			int nbits)
+			unsigned int nbits)
 {
 	if (small_const_nbits(nbits))
 		*dst = ~(*src) & BITMAP_LAST_WORD_MASK(nbits);
@@ -231,7 +231,7 @@ static inline void bitmap_complement(unsigned long *dst, const unsigned long *sr
 }
 
 static inline int bitmap_equal(const unsigned long *src1,
-			const unsigned long *src2, int nbits)
+			const unsigned long *src2, unsigned int nbits)
 {
 	if (small_const_nbits(nbits))
 		return ! ((*src1 ^ *src2) & BITMAP_LAST_WORD_MASK(nbits));
diff --git a/lib/bitmap.c b/lib/bitmap.c
index d6bb955e71cb..0f2f845702eb 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -86,9 +86,9 @@ int __bitmap_equal(const unsigned long *bitmap1,
 }
 EXPORT_SYMBOL(__bitmap_equal);
 
-void __bitmap_complement(unsigned long *dst, const unsigned long *src, int bits)
+void __bitmap_complement(unsigned long *dst, const unsigned long *src, unsigned int bits)
 {
-	int k, lim = bits/BITS_PER_LONG;
+	unsigned int k, lim = bits/BITS_PER_LONG;
 	for (k = 0; k < lim; ++k)
 		dst[k] = ~src[k];
 
-- 
cgit v1.2.3


From 65b4ee62c9cd10640f0054f47fd84c7920e8c118 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Wed, 6 Aug 2014 16:09:57 -0700
Subject: lib: bitmap: remove unnecessary mask from bitmap_complement

Since the extra bits are "don't care", there is no reason to mask the
last word to the used bits when complementing.  This shaves off yet a
few bytes.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitmap.h | 2 +-
 lib/bitmap.c           | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 21fb52ffe444..f42d72d5fe82 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -225,7 +225,7 @@ static inline void bitmap_complement(unsigned long *dst, const unsigned long *sr
 			unsigned int nbits)
 {
 	if (small_const_nbits(nbits))
-		*dst = ~(*src) & BITMAP_LAST_WORD_MASK(nbits);
+		*dst = ~(*src);
 	else
 		__bitmap_complement(dst, src, nbits);
 }
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 0f2f845702eb..4387e3c092fd 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -93,7 +93,7 @@ void __bitmap_complement(unsigned long *dst, const unsigned long *src, unsigned
 		dst[k] = ~src[k];
 
 	if (bits % BITS_PER_LONG)
-		dst[k] = ~src[k] & BITMAP_LAST_WORD_MASK(bits);
+		dst[k] = ~src[k];
 }
 EXPORT_SYMBOL(__bitmap_complement);
 
-- 
cgit v1.2.3


From 2f9305eb31097fdd3dc86daca65d8097d1fcf2ff Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Wed, 6 Aug 2014 16:09:59 -0700
Subject: lib: bitmap: make nbits parameter of bitmap_{and,or,xor,andnot}
 unsigned

This change is only for consistency with the changes to the other
bitmap_* functions; it doesn't change the size of the generated code:
inside BITS_TO_LONGS there is a sizeof(long), which causes bits to be
interpreted as unsigned anyway.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitmap.h | 16 ++++++++--------
 lib/bitmap.c           | 24 ++++++++++++------------
 2 files changed, 20 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index f42d72d5fe82..7048782fe5b9 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -99,13 +99,13 @@ extern void __bitmap_shift_right(unsigned long *dst,
 extern void __bitmap_shift_left(unsigned long *dst,
                         const unsigned long *src, int shift, int bits);
 extern int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
-			const unsigned long *bitmap2, int bits);
+			const unsigned long *bitmap2, unsigned int nbits);
 extern void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
-			const unsigned long *bitmap2, int bits);
+			const unsigned long *bitmap2, unsigned int nbits);
 extern void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
-			const unsigned long *bitmap2, int bits);
+			const unsigned long *bitmap2, unsigned int nbits);
 extern int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
-			const unsigned long *bitmap2, int bits);
+			const unsigned long *bitmap2, unsigned int nbits);
 extern int __bitmap_intersects(const unsigned long *bitmap1,
 			const unsigned long *bitmap2, int bits);
 extern int __bitmap_subset(const unsigned long *bitmap1,
@@ -188,7 +188,7 @@ static inline void bitmap_copy(unsigned long *dst, const unsigned long *src,
 }
 
 static inline int bitmap_and(unsigned long *dst, const unsigned long *src1,
-			const unsigned long *src2, int nbits)
+			const unsigned long *src2, unsigned int nbits)
 {
 	if (small_const_nbits(nbits))
 		return (*dst = *src1 & *src2) != 0;
@@ -196,7 +196,7 @@ static inline int bitmap_and(unsigned long *dst, const unsigned long *src1,
 }
 
 static inline void bitmap_or(unsigned long *dst, const unsigned long *src1,
-			const unsigned long *src2, int nbits)
+			const unsigned long *src2, unsigned int nbits)
 {
 	if (small_const_nbits(nbits))
 		*dst = *src1 | *src2;
@@ -205,7 +205,7 @@ static inline void bitmap_or(unsigned long *dst, const unsigned long *src1,
 }
 
 static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1,
-			const unsigned long *src2, int nbits)
+			const unsigned long *src2, unsigned int nbits)
 {
 	if (small_const_nbits(nbits))
 		*dst = *src1 ^ *src2;
@@ -214,7 +214,7 @@ static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1,
 }
 
 static inline int bitmap_andnot(unsigned long *dst, const unsigned long *src1,
-			const unsigned long *src2, int nbits)
+			const unsigned long *src2, unsigned int nbits)
 {
 	if (small_const_nbits(nbits))
 		return (*dst = *src1 & ~(*src2)) != 0;
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 4387e3c092fd..03207373cc5a 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -182,10 +182,10 @@ void __bitmap_shift_left(unsigned long *dst,
 EXPORT_SYMBOL(__bitmap_shift_left);
 
 int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
-				const unsigned long *bitmap2, int bits)
+				const unsigned long *bitmap2, unsigned int bits)
 {
-	int k;
-	int nr = BITS_TO_LONGS(bits);
+	unsigned int k;
+	unsigned int nr = BITS_TO_LONGS(bits);
 	unsigned long result = 0;
 
 	for (k = 0; k < nr; k++)
@@ -195,10 +195,10 @@ int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
 EXPORT_SYMBOL(__bitmap_and);
 
 void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
-				const unsigned long *bitmap2, int bits)
+				const unsigned long *bitmap2, unsigned int bits)
 {
-	int k;
-	int nr = BITS_TO_LONGS(bits);
+	unsigned int k;
+	unsigned int nr = BITS_TO_LONGS(bits);
 
 	for (k = 0; k < nr; k++)
 		dst[k] = bitmap1[k] | bitmap2[k];
@@ -206,10 +206,10 @@ void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
 EXPORT_SYMBOL(__bitmap_or);
 
 void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
-				const unsigned long *bitmap2, int bits)
+				const unsigned long *bitmap2, unsigned int bits)
 {
-	int k;
-	int nr = BITS_TO_LONGS(bits);
+	unsigned int k;
+	unsigned int nr = BITS_TO_LONGS(bits);
 
 	for (k = 0; k < nr; k++)
 		dst[k] = bitmap1[k] ^ bitmap2[k];
@@ -217,10 +217,10 @@ void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
 EXPORT_SYMBOL(__bitmap_xor);
 
 int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
-				const unsigned long *bitmap2, int bits)
+				const unsigned long *bitmap2, unsigned int bits)
 {
-	int k;
-	int nr = BITS_TO_LONGS(bits);
+	unsigned int k;
+	unsigned int nr = BITS_TO_LONGS(bits);
 	unsigned long result = 0;
 
 	for (k = 0; k < nr; k++)
-- 
cgit v1.2.3


From 6dfe9799c2a03d225316a3e959b0447f3f50303e Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Wed, 6 Aug 2014 16:10:01 -0700
Subject: lib: bitmap: make nbits parameter of bitmap_intersects unsigned

The compiler can generate slightly smaller and simpler code when it
knows that "nbits" is non-negative.  Since no-one passes a negative
bit-count, this shouldn't affect the semantics.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitmap.h | 4 ++--
 lib/bitmap.c           | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 7048782fe5b9..2f3f3a4d5996 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -107,7 +107,7 @@ extern void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
 extern int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
 			const unsigned long *bitmap2, unsigned int nbits);
 extern int __bitmap_intersects(const unsigned long *bitmap1,
-			const unsigned long *bitmap2, int bits);
+			const unsigned long *bitmap2, unsigned int nbits);
 extern int __bitmap_subset(const unsigned long *bitmap1,
 			const unsigned long *bitmap2, int bits);
 extern int __bitmap_weight(const unsigned long *bitmap, int bits);
@@ -240,7 +240,7 @@ static inline int bitmap_equal(const unsigned long *src1,
 }
 
 static inline int bitmap_intersects(const unsigned long *src1,
-			const unsigned long *src2, int nbits)
+			const unsigned long *src2, unsigned int nbits)
 {
 	if (small_const_nbits(nbits))
 		return ((*src1 & *src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0;
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 03207373cc5a..e85daa90b237 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -230,9 +230,9 @@ int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
 EXPORT_SYMBOL(__bitmap_andnot);
 
 int __bitmap_intersects(const unsigned long *bitmap1,
-				const unsigned long *bitmap2, int bits)
+			const unsigned long *bitmap2, unsigned int bits)
 {
-	int k, lim = bits/BITS_PER_LONG;
+	unsigned int k, lim = bits/BITS_PER_LONG;
 	for (k = 0; k < lim; ++k)
 		if (bitmap1[k] & bitmap2[k])
 			return 1;
-- 
cgit v1.2.3


From 5be20213e855550de2b32fde6fc116f74bab86a6 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Wed, 6 Aug 2014 16:10:03 -0700
Subject: lib: bitmap: make nbits parameter of bitmap_subset unsigned

The compiler can generate slightly smaller and simpler code when it
knows that "nbits" is non-negative.  Since no-one passes a negative
bit-count, this shouldn't affect the semantics.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitmap.h | 4 ++--
 lib/bitmap.c           | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 2f3f3a4d5996..87e88f79def1 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -109,7 +109,7 @@ extern int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
 extern int __bitmap_intersects(const unsigned long *bitmap1,
 			const unsigned long *bitmap2, unsigned int nbits);
 extern int __bitmap_subset(const unsigned long *bitmap1,
-			const unsigned long *bitmap2, int bits);
+			const unsigned long *bitmap2, unsigned int nbits);
 extern int __bitmap_weight(const unsigned long *bitmap, int bits);
 
 extern void bitmap_set(unsigned long *map, int i, int len);
@@ -249,7 +249,7 @@ static inline int bitmap_intersects(const unsigned long *src1,
 }
 
 static inline int bitmap_subset(const unsigned long *src1,
-			const unsigned long *src2, int nbits)
+			const unsigned long *src2, unsigned int nbits)
 {
 	if (small_const_nbits(nbits))
 		return ! ((*src1 & ~(*src2)) & BITMAP_LAST_WORD_MASK(nbits));
diff --git a/lib/bitmap.c b/lib/bitmap.c
index e85daa90b237..c9bff5379795 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -245,9 +245,9 @@ int __bitmap_intersects(const unsigned long *bitmap1,
 EXPORT_SYMBOL(__bitmap_intersects);
 
 int __bitmap_subset(const unsigned long *bitmap1,
-				const unsigned long *bitmap2, int bits)
+		    const unsigned long *bitmap2, unsigned int bits)
 {
-	int k, lim = bits/BITS_PER_LONG;
+	unsigned int k, lim = bits/BITS_PER_LONG;
 	for (k = 0; k < lim; ++k)
 		if (bitmap1[k] & ~bitmap2[k])
 			return 0;
-- 
cgit v1.2.3


From 877d9f3b63ac2e5dbc51cbcdff156433f03b3a32 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Wed, 6 Aug 2014 16:10:05 -0700
Subject: lib: bitmap: make nbits parameter of bitmap_weight unsigned

The compiler can generate slightly smaller and simpler code when it
knows that "nbits" is non-negative.  Since no-one passes a negative
bit-count, this shouldn't affect the semantics.

I didn't change the return type, since that might change the semantics
of some expression containing a call to bitmap_weight(). Certainly an
int is capable of holding the result.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitmap.h | 4 ++--
 lib/bitmap.c           | 5 +++--
 2 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 87e88f79def1..64b0ebe9f9a8 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -110,7 +110,7 @@ extern int __bitmap_intersects(const unsigned long *bitmap1,
 			const unsigned long *bitmap2, unsigned int nbits);
 extern int __bitmap_subset(const unsigned long *bitmap1,
 			const unsigned long *bitmap2, unsigned int nbits);
-extern int __bitmap_weight(const unsigned long *bitmap, int bits);
+extern int __bitmap_weight(const unsigned long *bitmap, unsigned int nbits);
 
 extern void bitmap_set(unsigned long *map, int i, int len);
 extern void bitmap_clear(unsigned long *map, int start, int nr);
@@ -273,7 +273,7 @@ static inline int bitmap_full(const unsigned long *src, unsigned int nbits)
 		return __bitmap_full(src, nbits);
 }
 
-static inline int bitmap_weight(const unsigned long *src, int nbits)
+static inline int bitmap_weight(const unsigned long *src, unsigned int nbits)
 {
 	if (small_const_nbits(nbits))
 		return hweight_long(*src & BITMAP_LAST_WORD_MASK(nbits));
diff --git a/lib/bitmap.c b/lib/bitmap.c
index c9bff5379795..f69435c23f9c 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -259,9 +259,10 @@ int __bitmap_subset(const unsigned long *bitmap1,
 }
 EXPORT_SYMBOL(__bitmap_subset);
 
-int __bitmap_weight(const unsigned long *bitmap, int bits)
+int __bitmap_weight(const unsigned long *bitmap, unsigned int bits)
 {
-	int k, w = 0, lim = bits/BITS_PER_LONG;
+	unsigned int k, lim = bits/BITS_PER_LONG;
+	int w = 0;
 
 	for (k = 0; k < lim; k++)
 		w += hweight_long(bitmap[k]);
-- 
cgit v1.2.3


From fb5ac54263ef3fcb5c469a61e0ab6b06e45e2307 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Wed, 6 Aug 2014 16:10:07 -0700
Subject: lib: bitmap: make the start index of bitmap_set unsigned

The compiler can generate slightly smaller and simpler code when it
knows that "start" is non-negative.

Also, use the names "start" and "len" for the two parameters in both
header file and implementation, instead of the previous mix.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitmap.h |  2 +-
 lib/bitmap.c           | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 64b0ebe9f9a8..ad2c67d3583e 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -112,7 +112,7 @@ extern int __bitmap_subset(const unsigned long *bitmap1,
 			const unsigned long *bitmap2, unsigned int nbits);
 extern int __bitmap_weight(const unsigned long *bitmap, unsigned int nbits);
 
-extern void bitmap_set(unsigned long *map, int i, int len);
+extern void bitmap_set(unsigned long *map, unsigned int start, int len);
 extern void bitmap_clear(unsigned long *map, int start, int nr);
 extern unsigned long bitmap_find_next_zero_area(unsigned long *map,
 					 unsigned long size,
diff --git a/lib/bitmap.c b/lib/bitmap.c
index f69435c23f9c..2a3a92fc3355 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -274,21 +274,21 @@ int __bitmap_weight(const unsigned long *bitmap, unsigned int bits)
 }
 EXPORT_SYMBOL(__bitmap_weight);
 
-void bitmap_set(unsigned long *map, int start, int nr)
+void bitmap_set(unsigned long *map, unsigned int start, int len)
 {
 	unsigned long *p = map + BIT_WORD(start);
-	const int size = start + nr;
+	const unsigned int size = start + len;
 	int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG);
 	unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start);
 
-	while (nr - bits_to_set >= 0) {
+	while (len - bits_to_set >= 0) {
 		*p |= mask_to_set;
-		nr -= bits_to_set;
+		len -= bits_to_set;
 		bits_to_set = BITS_PER_LONG;
 		mask_to_set = ~0UL;
 		p++;
 	}
-	if (nr) {
+	if (len) {
 		mask_to_set &= BITMAP_LAST_WORD_MASK(size);
 		*p |= mask_to_set;
 	}
-- 
cgit v1.2.3


From 154f5e38f30f262025c8c2e825376f6eb51e8bcb Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Wed, 6 Aug 2014 16:10:10 -0700
Subject: lib: bitmap: make the start index of bitmap_clear unsigned

The compiler can generate slightly smaller and simpler code when it
knows that "start" is non-negative.

Also, use the names "start" and "len" for the two parameters for
consistency with bitmap_set.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitmap.h |  2 +-
 lib/bitmap.c           | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index ad2c67d3583e..83c1c7d25073 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -113,7 +113,7 @@ extern int __bitmap_subset(const unsigned long *bitmap1,
 extern int __bitmap_weight(const unsigned long *bitmap, unsigned int nbits);
 
 extern void bitmap_set(unsigned long *map, unsigned int start, int len);
-extern void bitmap_clear(unsigned long *map, int start, int nr);
+extern void bitmap_clear(unsigned long *map, unsigned int start, int len);
 extern unsigned long bitmap_find_next_zero_area(unsigned long *map,
 					 unsigned long size,
 					 unsigned long start,
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 2a3a92fc3355..5d2540396300 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -295,21 +295,21 @@ void bitmap_set(unsigned long *map, unsigned int start, int len)
 }
 EXPORT_SYMBOL(bitmap_set);
 
-void bitmap_clear(unsigned long *map, int start, int nr)
+void bitmap_clear(unsigned long *map, unsigned int start, int len)
 {
 	unsigned long *p = map + BIT_WORD(start);
-	const int size = start + nr;
+	const unsigned int size = start + len;
 	int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG);
 	unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start);
 
-	while (nr - bits_to_clear >= 0) {
+	while (len - bits_to_clear >= 0) {
 		*p &= ~mask_to_clear;
-		nr -= bits_to_clear;
+		len -= bits_to_clear;
 		bits_to_clear = BITS_PER_LONG;
 		mask_to_clear = ~0UL;
 		p++;
 	}
-	if (nr) {
+	if (len) {
 		mask_to_clear &= BITMAP_LAST_WORD_MASK(size);
 		*p &= ~mask_to_clear;
 	}
-- 
cgit v1.2.3


From 9279d3286e10736766edcaf815ae10e00856e448 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Wed, 6 Aug 2014 16:10:16 -0700
Subject: lib: bitmap: change parameter of bitmap_*_region to unsigned

Changing the pos parameter of __reg_op to unsigned allows the compiler
to generate slightly smaller and simpler code.  Also update its callers
bitmap_*_region to receive and pass unsigned int.  The return types of
bitmap_find_free_region and bitmap_allocate_region are still int to
allow a negative error code to be returned.  An int is certainly capable
of representing any realistic return value.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitmap.h |  6 +++---
 lib/bitmap.c           | 12 ++++++------
 2 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 83c1c7d25073..210037833356 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -140,9 +140,9 @@ extern void bitmap_onto(unsigned long *dst, const unsigned long *orig,
 		const unsigned long *relmap, int bits);
 extern void bitmap_fold(unsigned long *dst, const unsigned long *orig,
 		int sz, int bits);
-extern int bitmap_find_free_region(unsigned long *bitmap, int bits, int order);
-extern void bitmap_release_region(unsigned long *bitmap, int pos, int order);
-extern int bitmap_allocate_region(unsigned long *bitmap, int pos, int order);
+extern int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order);
+extern void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order);
+extern int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order);
 extern void bitmap_copy_le(void *dst, const unsigned long *src, int nbits);
 extern int bitmap_ord_to_pos(const unsigned long *bitmap, int n, int bits);
 
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 2714df9f5cdb..c2f3807b3601 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -1042,7 +1042,7 @@ enum {
 	REG_OP_RELEASE,		/* clear all bits in region */
 };
 
-static int __reg_op(unsigned long *bitmap, int pos, int order, int reg_op)
+static int __reg_op(unsigned long *bitmap, unsigned int pos, int order, int reg_op)
 {
 	int nbits_reg;		/* number of bits in region */
 	int index;		/* index first long of region in bitmap */
@@ -1108,11 +1108,11 @@ done:
  * Return the bit offset in bitmap of the allocated region,
  * or -errno on failure.
  */
-int bitmap_find_free_region(unsigned long *bitmap, int bits, int order)
+int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order)
 {
-	int pos, end;		/* scans bitmap by regions of size order */
+	unsigned int pos, end;		/* scans bitmap by regions of size order */
 
-	for (pos = 0 ; (end = pos + (1 << order)) <= bits; pos = end) {
+	for (pos = 0 ; (end = pos + (1U << order)) <= bits; pos = end) {
 		if (!__reg_op(bitmap, pos, order, REG_OP_ISFREE))
 			continue;
 		__reg_op(bitmap, pos, order, REG_OP_ALLOC);
@@ -1133,7 +1133,7 @@ EXPORT_SYMBOL(bitmap_find_free_region);
  *
  * No return value.
  */
-void bitmap_release_region(unsigned long *bitmap, int pos, int order)
+void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order)
 {
 	__reg_op(bitmap, pos, order, REG_OP_RELEASE);
 }
@@ -1150,7 +1150,7 @@ EXPORT_SYMBOL(bitmap_release_region);
  * Return 0 on success, or %-EBUSY if specified region wasn't
  * free (not all bits were zero).
  */
-int bitmap_allocate_region(unsigned long *bitmap, int pos, int order)
+int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order)
 {
 	if (!__reg_op(bitmap, pos, order, REG_OP_ISFREE))
 		return -EBUSY;
-- 
cgit v1.2.3


From c5341ec8904ebff50f365a2626da6ab525d63b9e Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Wed, 6 Aug 2014 16:10:20 -0700
Subject: lib: bitmap: add missing mask in bitmap_shift_right

There is no guarantee that *src does not contain garbage bits outside
the lower nbits, so we need to mask it before the shift-and-assign.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitmap.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 210037833356..75df61d9ecfb 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -284,7 +284,7 @@ static inline void bitmap_shift_right(unsigned long *dst,
 			const unsigned long *src, int n, int nbits)
 {
 	if (small_const_nbits(nbits))
-		*dst = *src >> n;
+		*dst = (*src & BITMAP_LAST_WORD_MASK(nbits)) >> n;
 	else
 		__bitmap_shift_right(dst, src, n, nbits);
 }
-- 
cgit v1.2.3


From 7e5f97d1927f41affa21aa5b321865ceab1994ce Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Wed, 6 Aug 2014 16:10:22 -0700
Subject: lib: bitmap: add missing mask in bitmap_and

Apparently, bitmap_and is supposed to return whether the new bitmap is
empty.  But it didn't take potential garbage bits in the last word into
account.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitmap.h | 2 +-
 lib/bitmap.c           | 7 +++++--
 2 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 75df61d9ecfb..3399a9ecd991 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -191,7 +191,7 @@ static inline int bitmap_and(unsigned long *dst, const unsigned long *src1,
 			const unsigned long *src2, unsigned int nbits)
 {
 	if (small_const_nbits(nbits))
-		return (*dst = *src1 & *src2) != 0;
+		return (*dst = *src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits)) != 0;
 	return __bitmap_and(dst, src1, src2, nbits);
 }
 
diff --git a/lib/bitmap.c b/lib/bitmap.c
index faaf7206d4cf..ce2ec80bf431 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -185,11 +185,14 @@ int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
 				const unsigned long *bitmap2, unsigned int bits)
 {
 	unsigned int k;
-	unsigned int nr = BITS_TO_LONGS(bits);
+	unsigned int lim = bits/BITS_PER_LONG;
 	unsigned long result = 0;
 
-	for (k = 0; k < nr; k++)
+	for (k = 0; k < lim; k++)
 		result |= (dst[k] = bitmap1[k] & bitmap2[k]);
+	if (bits % BITS_PER_LONG)
+		result |= (dst[k] = bitmap1[k] & bitmap2[k] &
+			   BITMAP_LAST_WORD_MASK(bits));
 	return result != 0;
 }
 EXPORT_SYMBOL(__bitmap_and);
-- 
cgit v1.2.3


From 74e765319084bd2940a9612ada961f0f7385936c Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Wed, 6 Aug 2014 16:10:24 -0700
Subject: lib: bitmap: add missing mask in bitmap_andnot

Apparently, bitmap_andnot is supposed to return whether the new bitmap
is empty.  But it didn't take potential garbage bits in the last word
into account.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitmap.h | 2 +-
 lib/bitmap.c           | 7 +++++--
 2 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 3399a9ecd991..e1c8d080c427 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -217,7 +217,7 @@ static inline int bitmap_andnot(unsigned long *dst, const unsigned long *src1,
 			const unsigned long *src2, unsigned int nbits)
 {
 	if (small_const_nbits(nbits))
-		return (*dst = *src1 & ~(*src2)) != 0;
+		return (*dst = *src1 & ~(*src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0;
 	return __bitmap_andnot(dst, src1, src2, nbits);
 }
 
diff --git a/lib/bitmap.c b/lib/bitmap.c
index ce2ec80bf431..1e031f2c9aba 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -223,11 +223,14 @@ int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
 				const unsigned long *bitmap2, unsigned int bits)
 {
 	unsigned int k;
-	unsigned int nr = BITS_TO_LONGS(bits);
+	unsigned int lim = bits/BITS_PER_LONG;
 	unsigned long result = 0;
 
-	for (k = 0; k < nr; k++)
+	for (k = 0; k < lim; k++)
 		result |= (dst[k] = bitmap1[k] & ~bitmap2[k]);
+	if (bits % BITS_PER_LONG)
+		result |= (dst[k] = bitmap1[k] & ~bitmap2[k] &
+			   BITMAP_LAST_WORD_MASK(bits));
 	return result != 0;
 }
 EXPORT_SYMBOL(__bitmap_andnot);
-- 
cgit v1.2.3


From b3ea074fd3c798bee861aa076dc2f873461ae26f Mon Sep 17 00:00:00 2001
From: Alexandre Courbot <acourbot@nvidia.com>
Date: Mon, 4 Aug 2014 13:05:56 +0900
Subject: gpio: add missing includes in machine.h

linux/types.h and linux/list.h should be included so the typed used in
the header file are always properly declared.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Reported-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/gpio/machine.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/gpio/machine.h b/include/linux/gpio/machine.h
index b8ad87fab4ce..e2706140eaff 100644
--- a/include/linux/gpio/machine.h
+++ b/include/linux/gpio/machine.h
@@ -1,6 +1,9 @@
 #ifndef __LINUX_GPIO_MACHINE_H
 #define __LINUX_GPIO_MACHINE_H
 
+#include <linux/types.h>
+#include <linux/list.h>
+
 enum gpio_lookup_flags {
 	GPIO_ACTIVE_HIGH = (0 << 0),
 	GPIO_ACTIVE_LOW = (1 << 0),
-- 
cgit v1.2.3


From ed44724b79d8e03a40665436019cf22baba80d30 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 19 Apr 2014 14:37:20 -0400
Subject: acct: switch to __kernel_write()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/internal.h      |  1 -
 include/linux/fs.h |  1 +
 kernel/acct.c      | 31 ++++++++++++-------------------
 3 files changed, 13 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/fs/internal.h b/fs/internal.h
index 465742407466..9a2edba87c2b 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -131,7 +131,6 @@ extern long prune_dcache_sb(struct super_block *sb, unsigned long nr_to_scan,
 /*
  * read_write.c
  */
-extern ssize_t __kernel_write(struct file *, const char *, size_t, loff_t *);
 extern int rw_verify_area(int, struct file *, const loff_t *, size_t);
 
 /*
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e11d60cc867b..4b7d57cf7863 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2335,6 +2335,7 @@ extern int do_pipe_flags(int *, int);
 
 extern int kernel_read(struct file *, loff_t, char *, unsigned long);
 extern ssize_t kernel_write(struct file *, const char *, size_t, loff_t);
+extern ssize_t __kernel_write(struct file *, const char *, size_t, loff_t *);
 extern struct file * open_exec(const char *);
  
 /* fs/dcache.c -- generic fs support functions */
diff --git a/kernel/acct.c b/kernel/acct.c
index 807ebc5d8333..8082d9875d6b 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -456,12 +456,16 @@ static void do_acct_process(struct bsd_acct_struct *acct,
 {
 	struct pacct_struct *pacct = &current->signal->pacct;
 	acct_t ac;
-	mm_segment_t fs;
 	unsigned long flim;
 	u64 elapsed, run_time;
 	struct tty_struct *tty;
 	const struct cred *orig_cred;
 
+	/*
+	 * Accounting records are not subject to resource limits.
+	 */
+	flim = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
+	current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
 	/* Perform file operations on behalf of whoever enabled accounting */
 	orig_cred = override_creds(file->f_cred);
 
@@ -536,25 +540,14 @@ static void do_acct_process(struct bsd_acct_struct *acct,
 	 * Get freeze protection. If the fs is frozen, just skip the write
 	 * as we could deadlock the system otherwise.
 	 */
-	if (!file_start_write_trylock(file))
-		goto out;
-	/*
-	 * Kernel segment override to datasegment and write it
-	 * to the accounting file.
-	 */
-	fs = get_fs();
-	set_fs(KERNEL_DS);
-	/*
-	 * Accounting records are not subject to resource limits.
-	 */
-	flim = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
-	current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
-	file->f_op->write(file, (char *)&ac,
-			       sizeof(acct_t), &file->f_pos);
-	current->signal->rlim[RLIMIT_FSIZE].rlim_cur = flim;
-	set_fs(fs);
-	file_end_write(file);
+	if (file_start_write_trylock(file)) {
+		/* it's been opened O_APPEND, so position is irrelevant */
+		loff_t pos = 0;
+		__kernel_write(file, (char *)&ac, sizeof(acct_t), &pos);
+		file_end_write(file);
+	}
 out:
+	current->signal->rlim[RLIMIT_FSIZE].rlim_cur = flim;
 	revert_creds(orig_cred);
 }
 
-- 
cgit v1.2.3


From 215752fce31c80f3b3a1530bc7cddb3ba6a69b3a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 7 Aug 2014 06:23:41 -0400
Subject: acct: get rid of acct_list

Put these suckers on per-vfsmount and per-superblock lists instead.
Note: right now it's still acct_lock for everything, but that's
going to change.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/mount.h           |   1 +
 fs/namespace.c       |   2 +-
 fs/super.c           |   2 +-
 include/linux/acct.h |   6 +--
 include/linux/fs.h   |   1 +
 kernel/acct.c        | 135 +++++++++++++++++++++------------------------------
 6 files changed, 62 insertions(+), 85 deletions(-)

(limited to 'include/linux')

diff --git a/fs/mount.h b/fs/mount.h
index d55297f2fa05..0a2d1458681f 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -56,6 +56,7 @@ struct mount {
 	int mnt_group_id;		/* peer group identifier */
 	int mnt_expiry_mark;		/* true if marked for expiry */
 	int mnt_pinned;
+	struct hlist_head mnt_pins;
 	struct path mnt_ex_mountpoint;
 };
 
diff --git a/fs/namespace.c b/fs/namespace.c
index 182bc41cd887..22e530addfaf 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -956,7 +956,7 @@ put_again:
 		mnt->mnt_pinned = 0;
 		rcu_read_unlock();
 		unlock_mount_hash();
-		acct_auto_close_mnt(&mnt->mnt);
+		acct_auto_close_mnt(&mnt->mnt_pins);
 		goto put_again;
 	}
 	if (unlikely(mnt->mnt.mnt_flags & MNT_DOOMED)) {
diff --git a/fs/super.c b/fs/super.c
index d20d5b11dedf..52ed93eb63df 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -703,7 +703,7 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
 #endif
 
 	if (flags & MS_RDONLY)
-		acct_auto_close(sb);
+		acct_auto_close(&sb->s_pins);
 	shrink_dcache_sb(sb);
 
 	remount_ro = (flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY);
diff --git a/include/linux/acct.h b/include/linux/acct.h
index 4a5b7cb56079..65a4f889182e 100644
--- a/include/linux/acct.h
+++ b/include/linux/acct.h
@@ -24,14 +24,14 @@ struct super_block;
 struct pacct_struct;
 struct pid_namespace;
 extern int acct_parm[]; /* for sysctl */
-extern void acct_auto_close_mnt(struct vfsmount *m);
-extern void acct_auto_close(struct super_block *sb);
+extern void acct_auto_close(struct hlist_head *);
+extern void acct_auto_close_mnt(struct hlist_head *);
 extern void acct_collect(long exitcode, int group_dead);
 extern void acct_process(void);
 extern void acct_exit_ns(struct pid_namespace *);
 #else
-#define acct_auto_close_mnt(x)	do { } while (0)
 #define acct_auto_close(x)	do { } while (0)
+#define acct_auto_close_mnt(x)	do { } while (0)
 #define acct_collect(x,y)	do { } while (0)
 #define acct_process()		do { } while (0)
 #define acct_exit_ns(ns)	do { } while (0)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4b7d57cf7863..17f70872a4a5 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1250,6 +1250,7 @@ struct super_block {
 
 	/* AIO completions deferred from interrupt context */
 	struct workqueue_struct *s_dio_done_wq;
+	struct hlist_head s_pins;
 
 	/*
 	 * Keep the lru lists last in the structure so they always sit on their
diff --git a/kernel/acct.c b/kernel/acct.c
index 019f012a3c6f..21fbb3c27c2a 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -59,6 +59,7 @@
 #include <asm/div64.h>
 #include <linux/blkdev.h> /* sector_div */
 #include <linux/pid_namespace.h>
+#include <../fs/mount.h>	/* will go away when we refactor */
 
 /*
  * These constants control the amount of freespace that suspend and
@@ -79,16 +80,16 @@ static void do_acct_process(struct bsd_acct_struct *acct);
 
 struct bsd_acct_struct {
 	long			count;
+	struct hlist_node	s_list;
+	struct hlist_node	m_list;
 	struct mutex		lock;
 	int			active;
 	unsigned long		needcheck;
 	struct file		*file;
 	struct pid_namespace	*ns;
-	struct list_head	list;
 };
 
 static DEFINE_SPINLOCK(acct_lock);
-static LIST_HEAD(acct_list);
 
 /*
  * Check the amount of free space and suspend/resume accordingly.
@@ -133,25 +134,33 @@ static void acct_put(struct bsd_acct_struct *p)
 	spin_unlock(&acct_lock);
 }
 
-static struct bsd_acct_struct *acct_get(struct bsd_acct_struct **p)
+static struct bsd_acct_struct *__acct_get(struct bsd_acct_struct *res)
+{
+	res->count++;
+	spin_unlock(&acct_lock);
+	mutex_lock(&res->lock);
+	if (!res->ns) {
+		mutex_unlock(&res->lock);
+		spin_lock(&acct_lock);
+		if (!--res->count)
+			kfree(res);
+		return NULL;
+	}
+	return res;
+}
+
+static struct bsd_acct_struct *acct_get(struct pid_namespace *ns)
 {
 	struct bsd_acct_struct *res;
 	spin_lock(&acct_lock);
 again:
-	res = *p;
-	if (res)
-		res->count++;
-	spin_unlock(&acct_lock);
-	if (res) {
-		mutex_lock(&res->lock);
-		if (!res->ns) {
-			mutex_unlock(&res->lock);
-			spin_lock(&acct_lock);
-			if (!--res->count)
-				kfree(res);
-			goto again;
-		}
+	if (!ns->bacct) {
+		spin_unlock(&acct_lock);
+		return NULL;
 	}
+	res = __acct_get(ns->bacct);
+	if (!res)
+		goto again;
 	return res;
 }
 
@@ -162,7 +171,8 @@ static void acct_kill(struct bsd_acct_struct *acct,
 		struct file *file = acct->file;
 		struct pid_namespace *ns = acct->ns;
 		spin_lock(&acct_lock);
-		list_del(&acct->list);
+		hlist_del(&acct->m_list);
+		hlist_del(&acct->s_list);
 		mnt_unpin(file->f_path.mnt);
 		spin_unlock(&acct_lock);
 		do_acct_process(acct);
@@ -170,8 +180,10 @@ static void acct_kill(struct bsd_acct_struct *acct,
 		spin_lock(&acct_lock);
 		ns->bacct = new;
 		if (new) {
-			mnt_pin(new->file->f_path.mnt);
-			list_add(&new->list, &acct_list);
+			struct vfsmount *m = new->file->f_path.mnt;
+			mnt_pin(m);
+			hlist_add_head(&new->s_list, &m->mnt_sb->s_pins);
+			hlist_add_head(&new->m_list, &real_mount(m)->mnt_pins);
 		}
 		acct->ns = NULL;
 		mutex_unlock(&acct->lock);
@@ -218,14 +230,15 @@ static int acct_on(struct filename *pathname)
 	mutex_init(&acct->lock);
 	mnt = file->f_path.mnt;
 
-	old = acct_get(&ns->bacct);
+	old = acct_get(ns);
 	if (old) {
 		acct_kill(old, acct);
 	} else {
 		spin_lock(&acct_lock);
 		ns->bacct = acct;
 		mnt_pin(mnt);
-		list_add(&acct->list, &acct_list);
+		hlist_add_head(&acct->s_list, &mnt->mnt_sb->s_pins);
+		hlist_add_head(&acct->m_list, &real_mount(mnt)->mnt_pins);
 		spin_unlock(&acct_lock);
 	}
 	mntput(mnt); /* it's pinned, now give up active reference */
@@ -261,79 +274,41 @@ SYSCALL_DEFINE1(acct, const char __user *, name)
 		mutex_unlock(&acct_on_mutex);
 		putname(tmp);
 	} else {
-		acct_kill(acct_get(&task_active_pid_ns(current)->bacct), NULL);
+		acct_kill(acct_get(task_active_pid_ns(current)), NULL);
 	}
 
 	return error;
 }
 
-/**
- * acct_auto_close - turn off a filesystem's accounting if it is on
- * @m: vfsmount being shut down
- *
- * If the accounting is turned on for a file in the subtree pointed to
- * to by m, turn accounting off.  Done when m is about to die.
- */
-void acct_auto_close_mnt(struct vfsmount *m)
+void acct_auto_close_mnt(struct hlist_head *list)
 {
-	struct bsd_acct_struct *acct;
-
-	spin_lock(&acct_lock);
-restart:
-	list_for_each_entry(acct, &acct_list, list)
-		if (acct->file->f_path.mnt == m) {
-			acct->count++;
-			spin_unlock(&acct_lock);
-			mutex_lock(&acct->lock);
-			if (!acct->ns) {
-				mutex_unlock(&acct->lock);
-				spin_lock(&acct_lock);
-				if (!--acct->count)
-					kfree(acct);
-				goto restart;
-			}
-			acct_kill(acct, NULL);
-			spin_lock(&acct_lock);
-			goto restart;
-		}
+	while (1) {
+		spin_lock(&acct_lock);
+		if (!list->first)
+			break;
+		acct_kill(__acct_get(hlist_entry(list->first,
+						 struct bsd_acct_struct,
+						 m_list)), NULL);
+	}
 	spin_unlock(&acct_lock);
 }
 
-/**
- * acct_auto_close - turn off a filesystem's accounting if it is on
- * @sb: super block for the filesystem
- *
- * If the accounting is turned on for a file in the filesystem pointed
- * to by sb, turn accounting off.
- */
-void acct_auto_close(struct super_block *sb)
+void acct_auto_close(struct hlist_head *list)
 {
-	struct bsd_acct_struct *acct;
-
-	spin_lock(&acct_lock);
-restart:
-	list_for_each_entry(acct, &acct_list, list)
-		if (acct->file->f_path.dentry->d_sb == sb) {
-			acct->count++;
-			spin_unlock(&acct_lock);
-			mutex_lock(&acct->lock);
-			if (!acct->ns) {
-				mutex_unlock(&acct->lock);
-				spin_lock(&acct_lock);
-				if (!--acct->count)
-					kfree(acct);
-				goto restart;
-			}
-			acct_kill(acct, NULL);
-			spin_lock(&acct_lock);
-			goto restart;
-		}
+	while (1) {
+		spin_lock(&acct_lock);
+		if (!list->first)
+			break;
+		acct_kill(__acct_get(hlist_entry(list->first,
+						 struct bsd_acct_struct,
+						 s_list)), NULL);
+	}
 	spin_unlock(&acct_lock);
 }
 
 void acct_exit_ns(struct pid_namespace *ns)
 {
-	acct_kill(acct_get(&ns->bacct), NULL);
+	acct_kill(acct_get(ns), NULL);
 }
 
 /*
@@ -602,7 +577,7 @@ void acct_collect(long exitcode, int group_dead)
 static void slow_acct_process(struct pid_namespace *ns)
 {
 	for ( ; ns; ns = ns->parent) {
-		struct bsd_acct_struct *acct = acct_get(&ns->bacct);
+		struct bsd_acct_struct *acct = acct_get(ns);
 		if (acct) {
 			do_acct_process(acct);
 			mutex_unlock(&acct->lock);
-- 
cgit v1.2.3


From efb170c22867cdc6f770de441bdefecec6712199 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 7 Aug 2014 08:39:04 -0400
Subject: take fs_pin stuff to fs/*

Add a new field to fs_pin - kill(pin).  That's what umount and r/o remount
will be calling for all pins attached to vfsmount and superblock resp.
Called after bumping the refcount, so it won't go away under us.  Dropping
the refcount is responsibility of the instance.  All generic stuff moved to
fs/fs_pin.c; the next step will rip all the knowledge of kernel/acct.c from
fs/super.c and fs/namespace.c.  After that - death to mnt_pin(); it was
intended to be usable as generic mechanism for code that wants to attach
objects to vfsmount, so that they would not make the sucker busy and
would get killed on umount.  Never got it right; it remained acct.c-specific
all along.  Now it's very close to being killable.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/Makefile            |   2 +-
 fs/fs_pin.c            |  77 ++++++++++++++++++++++++++++++
 include/linux/acct.h   |   6 +--
 include/linux/fs_pin.h |  17 +++++++
 kernel/acct.c          | 127 +++++++++++++------------------------------------
 5 files changed, 129 insertions(+), 100 deletions(-)
 create mode 100644 fs/fs_pin.c
 create mode 100644 include/linux/fs_pin.h

(limited to 'include/linux')

diff --git a/fs/Makefile b/fs/Makefile
index 4030cbfbc9af..90c88529892b 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -11,7 +11,7 @@ obj-y :=	open.o read_write.o file_table.o super.o \
 		attr.o bad_inode.o file.o filesystems.o namespace.o \
 		seq_file.o xattr.o libfs.o fs-writeback.o \
 		pnode.o splice.o sync.o utimes.o \
-		stack.o fs_struct.o statfs.o
+		stack.o fs_struct.o statfs.o fs_pin.o
 
 ifeq ($(CONFIG_BLOCK),y)
 obj-y +=	buffer.o block_dev.o direct-io.o mpage.o
diff --git a/fs/fs_pin.c b/fs/fs_pin.c
new file mode 100644
index 000000000000..f3ce0b874a44
--- /dev/null
+++ b/fs/fs_pin.c
@@ -0,0 +1,77 @@
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/fs_pin.h>
+#include "mount.h"
+
+static void pin_free_rcu(struct rcu_head *head)
+{
+	kfree(container_of(head, struct fs_pin, rcu));
+}
+
+static DEFINE_SPINLOCK(pin_lock);
+
+void pin_put(struct fs_pin *p)
+{
+	if (atomic_long_dec_and_test(&p->count))
+		call_rcu(&p->rcu, pin_free_rcu);
+}
+
+void pin_remove(struct fs_pin *pin)
+{
+	spin_lock(&pin_lock);
+	hlist_del(&pin->m_list);
+	hlist_del(&pin->s_list);
+	spin_unlock(&pin_lock);
+}
+
+void pin_insert(struct fs_pin *pin, struct vfsmount *m)
+{
+	spin_lock(&pin_lock);
+	hlist_add_head(&pin->s_list, &m->mnt_sb->s_pins);
+	hlist_add_head(&pin->m_list, &real_mount(m)->mnt_pins);
+	spin_unlock(&pin_lock);
+}
+
+void acct_auto_close_mnt(struct hlist_head *list)
+{
+	while (1) {
+		struct hlist_node *p;
+		struct fs_pin *pin;
+		rcu_read_lock();
+		p = ACCESS_ONCE(list->first);
+		if (!p) {
+			rcu_read_unlock();
+			break;
+		}
+		pin = hlist_entry(p, struct fs_pin, m_list);
+		if (!atomic_long_inc_not_zero(&pin->count)) {
+			rcu_read_unlock();
+			cpu_relax();
+			continue;
+		}
+		rcu_read_unlock();
+		pin->kill(pin);
+	}
+}
+
+void acct_auto_close(struct hlist_head *list)
+{
+	while (1) {
+		struct hlist_node *p;
+		struct fs_pin *pin;
+		rcu_read_lock();
+		p = ACCESS_ONCE(list->first);
+		if (!p) {
+			rcu_read_unlock();
+			break;
+		}
+		pin = hlist_entry(p, struct fs_pin, s_list);
+		if (!atomic_long_inc_not_zero(&pin->count)) {
+			rcu_read_unlock();
+			cpu_relax();
+			continue;
+		}
+		rcu_read_unlock();
+		pin->kill(pin);
+	}
+}
diff --git a/include/linux/acct.h b/include/linux/acct.h
index 65a4f889182e..137837929dbe 100644
--- a/include/linux/acct.h
+++ b/include/linux/acct.h
@@ -24,18 +24,16 @@ struct super_block;
 struct pacct_struct;
 struct pid_namespace;
 extern int acct_parm[]; /* for sysctl */
-extern void acct_auto_close(struct hlist_head *);
-extern void acct_auto_close_mnt(struct hlist_head *);
 extern void acct_collect(long exitcode, int group_dead);
 extern void acct_process(void);
 extern void acct_exit_ns(struct pid_namespace *);
 #else
-#define acct_auto_close(x)	do { } while (0)
-#define acct_auto_close_mnt(x)	do { } while (0)
 #define acct_collect(x,y)	do { } while (0)
 #define acct_process()		do { } while (0)
 #define acct_exit_ns(ns)	do { } while (0)
 #endif
+extern void acct_auto_close(struct hlist_head *);
+extern void acct_auto_close_mnt(struct hlist_head *);
 
 /*
  * ACCT_VERSION numbers as yet defined:
diff --git a/include/linux/fs_pin.h b/include/linux/fs_pin.h
new file mode 100644
index 000000000000..f66525e72ccf
--- /dev/null
+++ b/include/linux/fs_pin.h
@@ -0,0 +1,17 @@
+#include <linux/fs.h>
+
+struct fs_pin {
+	atomic_long_t		count;
+	union {
+		struct {
+			struct hlist_node	s_list;
+			struct hlist_node	m_list;
+		};
+		struct rcu_head rcu;
+	};
+	void (*kill)(struct fs_pin *);
+};
+
+void pin_put(struct fs_pin *);
+void pin_remove(struct fs_pin *);
+void pin_insert(struct fs_pin *, struct vfsmount *);
diff --git a/kernel/acct.c b/kernel/acct.c
index afeaaa6f49bf..a7993a6cb604 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -59,7 +59,7 @@
 #include <asm/div64.h>
 #include <linux/blkdev.h> /* sector_div */
 #include <linux/pid_namespace.h>
-#include <../fs/mount.h>	/* will go away when we refactor */
+#include <linux/fs_pin.h>
 
 /*
  * These constants control the amount of freespace that suspend and
@@ -78,17 +78,6 @@ int acct_parm[3] = {4, 2, 30};
  */
 static void do_acct_process(struct bsd_acct_struct *acct);
 
-struct fs_pin {
-	atomic_long_t		count;
-	union {
-		struct {
-			struct hlist_node	s_list;
-			struct hlist_node	m_list;
-		};
-		struct rcu_head rcu;
-	};
-};
-
 struct bsd_acct_struct {
 	struct fs_pin		pin;
 	struct mutex		lock;
@@ -100,13 +89,6 @@ struct bsd_acct_struct {
 	struct completion	done;
 };
 
-static void pin_free_rcu(struct rcu_head *head)
-{
-	kfree(container_of(head, struct fs_pin, rcu));
-}
-
-static DEFINE_SPINLOCK(acct_lock);
-
 /*
  * Check the amount of free space and suspend/resume accordingly.
  */
@@ -142,29 +124,6 @@ out:
 	return acct->active;
 }
 
-static void pin_put(struct fs_pin *p)
-{
-	if (atomic_long_dec_and_test(&p->count))
-		call_rcu(&p->rcu, pin_free_rcu);
-}
-
-static struct bsd_acct_struct *__acct_get(struct bsd_acct_struct *res)
-{
-	if (!atomic_long_inc_not_zero(&res->pin.count)) {
-		rcu_read_unlock();
-		cpu_relax();
-		return NULL;
-	}
-	rcu_read_unlock();
-	mutex_lock(&res->lock);
-	if (!res->ns) {
-		mutex_unlock(&res->lock);
-		pin_put(&res->pin);
-		return NULL;
-	}
-	return res;
-}
-
 static struct bsd_acct_struct *acct_get(struct pid_namespace *ns)
 {
 	struct bsd_acct_struct *res;
@@ -176,9 +135,18 @@ again:
 		rcu_read_unlock();
 		return NULL;
 	}
-	res = __acct_get(res);
-	if (!res)
+	if (!atomic_long_inc_not_zero(&res->pin.count)) {
+		rcu_read_unlock();
+		cpu_relax();
 		goto again;
+	}
+	rcu_read_unlock();
+	mutex_lock(&res->lock);
+	if (!res->ns) {
+		mutex_unlock(&res->lock);
+		pin_put(&res->pin);
+		goto again;
+	}
 	return res;
 }
 
@@ -203,19 +171,8 @@ static void acct_kill(struct bsd_acct_struct *acct,
 		init_completion(&acct->done);
 		schedule_work(&acct->work);
 		wait_for_completion(&acct->done);
-		spin_lock(&acct_lock);
-		hlist_del(&acct->pin.m_list);
-		hlist_del(&acct->pin.s_list);
-		spin_unlock(&acct_lock);
+		pin_remove(&acct->pin);
 		ns->bacct = new;
-		if (new) {
-			struct vfsmount *m = new->file->f_path.mnt;
-			spin_lock(&acct_lock);
-			hlist_add_head(&new->pin.s_list, &m->mnt_sb->s_pins);
-			hlist_add_head(&new->pin.m_list, &real_mount(m)->mnt_pins);
-			spin_unlock(&acct_lock);
-			mutex_unlock(&new->lock);
-		}
 		acct->ns = NULL;
 		atomic_long_dec(&acct->pin.count);
 		mutex_unlock(&acct->lock);
@@ -223,6 +180,19 @@ static void acct_kill(struct bsd_acct_struct *acct,
 	}
 }
 
+static void acct_pin_kill(struct fs_pin *pin)
+{
+	struct bsd_acct_struct *acct;
+	acct = container_of(pin, struct bsd_acct_struct, pin);
+	mutex_lock(&acct->lock);
+	if (!acct->ns) {
+		mutex_unlock(&acct->lock);
+		pin_put(pin);
+		acct = NULL;
+	}
+	acct_kill(acct, NULL);
+}
+
 static int acct_on(struct filename *pathname)
 {
 	struct file *file;
@@ -254,25 +224,22 @@ static int acct_on(struct filename *pathname)
 	}
 
 	atomic_long_set(&acct->pin.count, 1);
+	acct->pin.kill = acct_pin_kill;
 	acct->file = file;
 	acct->needcheck = jiffies;
 	acct->ns = ns;
 	mutex_init(&acct->lock);
 	mnt = file->f_path.mnt;
 	mnt_pin(mnt);
+	mutex_lock_nested(&acct->lock, 1);	/* nobody has seen it yet */
+	pin_insert(&acct->pin, mnt);
 
 	old = acct_get(ns);
-	mutex_lock_nested(&acct->lock, 1);	/* nobody has seen it yet */
-	if (old) {
+	if (old)
 		acct_kill(old, acct);
-	} else {
+	else
 		ns->bacct = acct;
-		spin_lock(&acct_lock);
-		hlist_add_head(&acct->pin.s_list, &mnt->mnt_sb->s_pins);
-		hlist_add_head(&acct->pin.m_list, &real_mount(mnt)->mnt_pins);
-		spin_unlock(&acct_lock);
-		mutex_unlock(&acct->lock);
-	}
+	mutex_unlock(&acct->lock);
 	mntput(mnt); /* it's pinned, now give up active reference */
 	return 0;
 }
@@ -312,36 +279,6 @@ SYSCALL_DEFINE1(acct, const char __user *, name)
 	return error;
 }
 
-void acct_auto_close_mnt(struct hlist_head *list)
-{
-	rcu_read_lock();
-	while (1) {
-		struct hlist_node *p = ACCESS_ONCE(list->first);
-		if (!p)
-			break;
-		acct_kill(__acct_get(hlist_entry(p,
-						 struct bsd_acct_struct,
-						 pin.m_list)), NULL);
-		rcu_read_lock();
-	}
-	rcu_read_unlock();
-}
-
-void acct_auto_close(struct hlist_head *list)
-{
-	rcu_read_lock();
-	while (1) {
-		struct hlist_node *p = ACCESS_ONCE(list->first);
-		if (!p)
-			break;
-		acct_kill(__acct_get(hlist_entry(p,
-						 struct bsd_acct_struct,
-						 pin.s_list)), NULL);
-		rcu_read_lock();
-	}
-	rcu_read_unlock();
-}
-
 void acct_exit_ns(struct pid_namespace *ns)
 {
 	acct_kill(acct_get(ns), NULL);
-- 
cgit v1.2.3


From 8fa1f1c2bd86007beb4a4845e6087ac4a704dc80 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 21 May 2014 18:22:52 -0400
Subject: make fs/{namespace,super}.c forget about acct.h

These externs belong in fs/internal.h.  Rename (they are not acct-specific
anymore) and move them over there.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/fs_pin.c          | 9 +++++----
 fs/internal.h        | 6 ++++++
 fs/namespace.c       | 3 +--
 fs/super.c           | 3 +--
 include/linux/acct.h | 2 --
 5 files changed, 13 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/fs/fs_pin.c b/fs/fs_pin.c
index f3ce0b874a44..9368236ca100 100644
--- a/fs/fs_pin.c
+++ b/fs/fs_pin.c
@@ -1,6 +1,7 @@
 #include <linux/fs.h>
 #include <linux/slab.h>
 #include <linux/fs_pin.h>
+#include "internal.h"
 #include "mount.h"
 
 static void pin_free_rcu(struct rcu_head *head)
@@ -32,13 +33,13 @@ void pin_insert(struct fs_pin *pin, struct vfsmount *m)
 	spin_unlock(&pin_lock);
 }
 
-void acct_auto_close_mnt(struct hlist_head *list)
+void mnt_pin_kill(struct mount *m)
 {
 	while (1) {
 		struct hlist_node *p;
 		struct fs_pin *pin;
 		rcu_read_lock();
-		p = ACCESS_ONCE(list->first);
+		p = ACCESS_ONCE(m->mnt_pins.first);
 		if (!p) {
 			rcu_read_unlock();
 			break;
@@ -54,13 +55,13 @@ void acct_auto_close_mnt(struct hlist_head *list)
 	}
 }
 
-void acct_auto_close(struct hlist_head *list)
+void sb_pin_kill(struct super_block *sb)
 {
 	while (1) {
 		struct hlist_node *p;
 		struct fs_pin *pin;
 		rcu_read_lock();
-		p = ACCESS_ONCE(list->first);
+		p = ACCESS_ONCE(sb->s_pins.first);
 		if (!p) {
 			rcu_read_unlock();
 			break;
diff --git a/fs/internal.h b/fs/internal.h
index 9a2edba87c2b..e325b4f9c799 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -143,3 +143,9 @@ extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
  * pipe.c
  */
 extern const struct file_operations pipefifo_fops;
+
+/*
+ * fs_pin.c
+ */
+extern void sb_pin_kill(struct super_block *sb);
+extern void mnt_pin_kill(struct mount *m);
diff --git a/fs/namespace.c b/fs/namespace.c
index 22e530addfaf..0e4ce51c5277 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -16,7 +16,6 @@
 #include <linux/namei.h>
 #include <linux/security.h>
 #include <linux/idr.h>
-#include <linux/acct.h>		/* acct_auto_close_mnt */
 #include <linux/init.h>		/* init_rootfs */
 #include <linux/fs_struct.h>	/* get_fs_root et.al. */
 #include <linux/fsnotify.h>	/* fsnotify_vfsmount_delete */
@@ -956,7 +955,7 @@ put_again:
 		mnt->mnt_pinned = 0;
 		rcu_read_unlock();
 		unlock_mount_hash();
-		acct_auto_close_mnt(&mnt->mnt_pins);
+		mnt_pin_kill(mnt);
 		goto put_again;
 	}
 	if (unlikely(mnt->mnt.mnt_flags & MNT_DOOMED)) {
diff --git a/fs/super.c b/fs/super.c
index a369f8964dc1..a371ce6aa919 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -22,7 +22,6 @@
 
 #include <linux/export.h>
 #include <linux/slab.h>
-#include <linux/acct.h>
 #include <linux/blkdev.h>
 #include <linux/mount.h>
 #include <linux/security.h>
@@ -707,7 +706,7 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
 	if (remount_ro) {
 		if (sb->s_pins.first) {
 			up_write(&sb->s_umount);
-			acct_auto_close(&sb->s_pins);
+			sb_pin_kill(sb);
 			down_write(&sb->s_umount);
 			if (!sb->s_root)
 				return 0;
diff --git a/include/linux/acct.h b/include/linux/acct.h
index 137837929dbe..dccc2d4fe7de 100644
--- a/include/linux/acct.h
+++ b/include/linux/acct.h
@@ -32,8 +32,6 @@ extern void acct_exit_ns(struct pid_namespace *);
 #define acct_process()		do { } while (0)
 #define acct_exit_ns(ns)	do { } while (0)
 #endif
-extern void acct_auto_close(struct hlist_head *);
-extern void acct_auto_close_mnt(struct hlist_head *);
 
 /*
  * ACCT_VERSION numbers as yet defined:
-- 
cgit v1.2.3


From 3064c3563ba4c23e2c7a47254ec056ed9ba0098a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 7 Aug 2014 09:12:31 -0400
Subject: death to mnt_pinned

Rather than playing silly buggers with vfsmount refcounts, just have
acct_on() ask fs/namespace.c for internal clone of file->f_path.mnt
and replace it with said clone.  Then attach the pin to original
vfsmount.  Voila - the clone will be alive until the file gets closed,
making sure that underlying superblock remains active, etc., and
we can drop the original vfsmount, so that it's not kept busy.
If the file lives until the final mntput of the original vfsmount,
we'll notice that there's an fs_pin (one in bsd_acct_struct that
holds that file) and mnt_pin_kill() will take it out.  Since
->kill() is synchronous, we won't proceed past that point until
these files are closed (and private clones of our vfsmount are
gone), so we get the same ordering warranties we used to get.

mnt_pin()/mnt_unpin()/->mnt_pinned is gone now, and good riddance -
it never became usable outside of kernel/acct.c (and racy wrt
umount even there).

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/mount.h            |  1 -
 fs/namespace.c        | 35 +++++++++--------------------------
 include/linux/mount.h |  4 ++--
 kernel/acct.c         | 24 +++++++++++++++++++-----
 4 files changed, 30 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/fs/mount.h b/fs/mount.h
index 0a2d1458681f..6740a6215529 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -55,7 +55,6 @@ struct mount {
 	int mnt_id;			/* mount identifier */
 	int mnt_group_id;		/* peer group identifier */
 	int mnt_expiry_mark;		/* true if marked for expiry */
-	int mnt_pinned;
 	struct hlist_head mnt_pins;
 	struct path mnt_ex_mountpoint;
 };
diff --git a/fs/namespace.c b/fs/namespace.c
index 0e4ce51c5277..65af9d0e0d67 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -937,7 +937,6 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
 
 static void mntput_no_expire(struct mount *mnt)
 {
-put_again:
 	rcu_read_lock();
 	mnt_add_count(mnt, -1);
 	if (likely(mnt->mnt_ns)) { /* shouldn't be the last one */
@@ -950,14 +949,6 @@ put_again:
 		unlock_mount_hash();
 		return;
 	}
-	if (unlikely(mnt->mnt_pinned)) {
-		mnt_add_count(mnt, mnt->mnt_pinned + 1);
-		mnt->mnt_pinned = 0;
-		rcu_read_unlock();
-		unlock_mount_hash();
-		mnt_pin_kill(mnt);
-		goto put_again;
-	}
 	if (unlikely(mnt->mnt.mnt_flags & MNT_DOOMED)) {
 		rcu_read_unlock();
 		unlock_mount_hash();
@@ -980,6 +971,8 @@ put_again:
 	 * so mnt_get_writers() below is safe.
 	 */
 	WARN_ON(mnt_get_writers(mnt));
+	if (unlikely(mnt->mnt_pins.first))
+		mnt_pin_kill(mnt);
 	fsnotify_vfsmount_delete(&mnt->mnt);
 	dput(mnt->mnt.mnt_root);
 	deactivate_super(mnt->mnt.mnt_sb);
@@ -1007,25 +1000,15 @@ struct vfsmount *mntget(struct vfsmount *mnt)
 }
 EXPORT_SYMBOL(mntget);
 
-void mnt_pin(struct vfsmount *mnt)
+struct vfsmount *mnt_clone_internal(struct path *path)
 {
-	lock_mount_hash();
-	real_mount(mnt)->mnt_pinned++;
-	unlock_mount_hash();
-}
-EXPORT_SYMBOL(mnt_pin);
-
-void mnt_unpin(struct vfsmount *m)
-{
-	struct mount *mnt = real_mount(m);
-	lock_mount_hash();
-	if (mnt->mnt_pinned) {
-		mnt_add_count(mnt, 1);
-		mnt->mnt_pinned--;
-	}
-	unlock_mount_hash();
+	struct mount *p;
+	p = clone_mnt(real_mount(path->mnt), path->dentry, CL_PRIVATE);
+	if (IS_ERR(p))
+		return ERR_CAST(p);
+	p->mnt.mnt_flags |= MNT_INTERNAL;
+	return &p->mnt;
 }
-EXPORT_SYMBOL(mnt_unpin);
 
 static inline void mangle(struct seq_file *m, const char *s)
 {
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 839bac270904..864b120c1345 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -62,6 +62,7 @@ struct vfsmount {
 };
 
 struct file; /* forward dec */
+struct path;
 
 extern int mnt_want_write(struct vfsmount *mnt);
 extern int mnt_want_write_file(struct file *file);
@@ -70,8 +71,7 @@ extern void mnt_drop_write(struct vfsmount *mnt);
 extern void mnt_drop_write_file(struct file *file);
 extern void mntput(struct vfsmount *mnt);
 extern struct vfsmount *mntget(struct vfsmount *mnt);
-extern void mnt_pin(struct vfsmount *mnt);
-extern void mnt_unpin(struct vfsmount *mnt);
+extern struct vfsmount *mnt_clone_internal(struct path *path);
 extern int __mnt_is_readonly(struct vfsmount *mnt);
 
 struct file_system_type;
diff --git a/kernel/acct.c b/kernel/acct.c
index a7993a6cb604..2e6cf818021d 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -154,7 +154,6 @@ static void close_work(struct work_struct *work)
 {
 	struct bsd_acct_struct *acct = container_of(work, struct bsd_acct_struct, work);
 	struct file *file = acct->file;
-	mnt_unpin(file->f_path.mnt);
 	if (file->f_op->flush)
 		file->f_op->flush(file, NULL);
 	__fput_sync(file);
@@ -196,9 +195,10 @@ static void acct_pin_kill(struct fs_pin *pin)
 static int acct_on(struct filename *pathname)
 {
 	struct file *file;
-	struct vfsmount *mnt;
+	struct vfsmount *mnt, *internal;
 	struct pid_namespace *ns = task_active_pid_ns(current);
 	struct bsd_acct_struct *acct, *old;
+	int err;
 
 	acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL);
 	if (!acct)
@@ -222,6 +222,21 @@ static int acct_on(struct filename *pathname)
 		filp_close(file, NULL);
 		return -EIO;
 	}
+	internal = mnt_clone_internal(&file->f_path);
+	if (IS_ERR(internal)) {
+		kfree(acct);
+		filp_close(file, NULL);
+		return PTR_ERR(internal);
+	}
+	err = mnt_want_write(internal);
+	if (err) {
+		mntput(internal);
+		kfree(acct);
+		filp_close(file, NULL);
+		return err;
+	}
+	mnt = file->f_path.mnt;
+	file->f_path.mnt = internal;
 
 	atomic_long_set(&acct->pin.count, 1);
 	acct->pin.kill = acct_pin_kill;
@@ -229,8 +244,6 @@ static int acct_on(struct filename *pathname)
 	acct->needcheck = jiffies;
 	acct->ns = ns;
 	mutex_init(&acct->lock);
-	mnt = file->f_path.mnt;
-	mnt_pin(mnt);
 	mutex_lock_nested(&acct->lock, 1);	/* nobody has seen it yet */
 	pin_insert(&acct->pin, mnt);
 
@@ -240,7 +253,8 @@ static int acct_on(struct filename *pathname)
 	else
 		ns->bacct = acct;
 	mutex_unlock(&acct->lock);
-	mntput(mnt); /* it's pinned, now give up active reference */
+	mnt_drop_write(mnt);
+	mntput(mnt);
 	return 0;
 }
 
-- 
cgit v1.2.3


From 1a0a397e41cb1bf70cfe45fd0eeff08c7c501ec0 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Fri, 14 Feb 2014 17:35:37 -0500
Subject: dcache: d_obtain_alias callers don't all want DISCONNECTED

There are a few d_obtain_alias callers that are using it to get the
root of a filesystem which may already have an alias somewhere else.

This is not the same as the filehandle-lookup case, and none of them
actually need DCACHE_DISCONNECTED set.

It isn't really a serious problem, but it would really be clearer if we
reserved DCACHE_DISCONNECTED for those cases where it's actually needed.

In the btrfs case this was causing a spurious printk from
nfsd/nfsfh.c:fh_verify when it found an unexpected DCACHE_DISCONNECTED
dentry.  Josef worked around this by unsetting DCACHE_DISCONNECTED
manually in 3a0dfa6a12e "Btrfs: unset DCACHE_DISCONNECTED when mounting
default subvol", and this replaces that workaround.

Cc: Josef Bacik <jbacik@fb.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/btrfs/super.c       |  9 +------
 fs/ceph/super.c        |  2 +-
 fs/dcache.c            | 69 +++++++++++++++++++++++++++++++++++---------------
 fs/nfs/getroot.c       |  2 +-
 fs/nilfs2/super.c      |  2 +-
 include/linux/dcache.h |  1 +
 6 files changed, 54 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 8e16bca69c56..67b48b9a03e0 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -851,7 +851,6 @@ static struct dentry *get_default_root(struct super_block *sb,
 	struct btrfs_path *path;
 	struct btrfs_key location;
 	struct inode *inode;
-	struct dentry *dentry;
 	u64 dir_id;
 	int new = 0;
 
@@ -922,13 +921,7 @@ setup_root:
 		return dget(sb->s_root);
 	}
 
-	dentry = d_obtain_alias(inode);
-	if (!IS_ERR(dentry)) {
-		spin_lock(&dentry->d_lock);
-		dentry->d_flags &= ~DCACHE_DISCONNECTED;
-		spin_unlock(&dentry->d_lock);
-	}
-	return dentry;
+	return d_obtain_root(inode);
 }
 
 static int btrfs_fill_super(struct super_block *sb,
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 06150fd745ac..f6e12377335c 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -755,7 +755,7 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
 				goto out;
 			}
 		} else {
-			root = d_obtain_alias(inode);
+			root = d_obtain_root(inode);
 		}
 		ceph_init_dentry(root);
 		dout("open_root_inode success, root dentry is %p\n", root);
diff --git a/fs/dcache.c b/fs/dcache.c
index 3ed095363997..63d556c0e698 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1781,25 +1781,7 @@ struct dentry *d_find_any_alias(struct inode *inode)
 }
 EXPORT_SYMBOL(d_find_any_alias);
 
-/**
- * d_obtain_alias - find or allocate a dentry for a given inode
- * @inode: inode to allocate the dentry for
- *
- * Obtain a dentry for an inode resulting from NFS filehandle conversion or
- * similar open by handle operations.  The returned dentry may be anonymous,
- * or may have a full name (if the inode was already in the cache).
- *
- * When called on a directory inode, we must ensure that the inode only ever
- * has one dentry.  If a dentry is found, that is returned instead of
- * allocating a new one.
- *
- * On successful return, the reference to the inode has been transferred
- * to the dentry.  In case of an error the reference on the inode is released.
- * To make it easier to use in export operations a %NULL or IS_ERR inode may
- * be passed in and will be the error will be propagate to the return value,
- * with a %NULL @inode replaced by ERR_PTR(-ESTALE).
- */
-struct dentry *d_obtain_alias(struct inode *inode)
+struct dentry *__d_obtain_alias(struct inode *inode, int disconnected)
 {
 	static const struct qstr anonstring = QSTR_INIT("/", 1);
 	struct dentry *tmp;
@@ -1830,7 +1812,10 @@ struct dentry *d_obtain_alias(struct inode *inode)
 	}
 
 	/* attach a disconnected dentry */
-	add_flags = d_flags_for_inode(inode) | DCACHE_DISCONNECTED;
+	add_flags = d_flags_for_inode(inode);
+
+	if (disconnected)
+		add_flags |= DCACHE_DISCONNECTED;
 
 	spin_lock(&tmp->d_lock);
 	tmp->d_inode = inode;
@@ -1851,8 +1836,52 @@ struct dentry *d_obtain_alias(struct inode *inode)
 	iput(inode);
 	return res;
 }
+
+/**
+ * d_obtain_alias - find or allocate a DISCONNECTED dentry for a given inode
+ * @inode: inode to allocate the dentry for
+ *
+ * Obtain a dentry for an inode resulting from NFS filehandle conversion or
+ * similar open by handle operations.  The returned dentry may be anonymous,
+ * or may have a full name (if the inode was already in the cache).
+ *
+ * When called on a directory inode, we must ensure that the inode only ever
+ * has one dentry.  If a dentry is found, that is returned instead of
+ * allocating a new one.
+ *
+ * On successful return, the reference to the inode has been transferred
+ * to the dentry.  In case of an error the reference on the inode is released.
+ * To make it easier to use in export operations a %NULL or IS_ERR inode may
+ * be passed in and the error will be propagated to the return value,
+ * with a %NULL @inode replaced by ERR_PTR(-ESTALE).
+ */
+struct dentry *d_obtain_alias(struct inode *inode)
+{
+	return __d_obtain_alias(inode, 1);
+}
 EXPORT_SYMBOL(d_obtain_alias);
 
+/**
+ * d_obtain_root - find or allocate a dentry for a given inode
+ * @inode: inode to allocate the dentry for
+ *
+ * Obtain an IS_ROOT dentry for the root of a filesystem.
+ *
+ * We must ensure that directory inodes only ever have one dentry.  If a
+ * dentry is found, that is returned instead of allocating a new one.
+ *
+ * On successful return, the reference to the inode has been transferred
+ * to the dentry.  In case of an error the reference on the inode is
+ * released.  A %NULL or IS_ERR inode may be passed in and will be the
+ * error will be propagate to the return value, with a %NULL @inode
+ * replaced by ERR_PTR(-ESTALE).
+ */
+struct dentry *d_obtain_root(struct inode *inode)
+{
+	return __d_obtain_alias(inode, 0);
+}
+EXPORT_SYMBOL(d_obtain_root);
+
 /**
  * d_add_ci - lookup or allocate new dentry with case-exact name
  * @inode:  the inode case-insensitive lookup has found
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index b94f80420a58..880618a8b048 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -112,7 +112,7 @@ struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh,
 	 * if the dentry tree reaches them; however if the dentry already
 	 * exists, we'll pick it up at this point and use it as the root
 	 */
-	ret = d_obtain_alias(inode);
+	ret = d_obtain_root(inode);
 	if (IS_ERR(ret)) {
 		dprintk("nfs_get_root: get root dentry failed\n");
 		goto out;
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 8c532b2ca3ab..ac914994dfed 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -942,7 +942,7 @@ static int nilfs_get_root_dentry(struct super_block *sb,
 			iput(inode);
 		}
 	} else {
-		dentry = d_obtain_alias(inode);
+		dentry = d_obtain_root(inode);
 		if (IS_ERR(dentry)) {
 			ret = PTR_ERR(dentry);
 			goto failed_dentry;
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 3c7ec327ebd2..e4ae2ad48d07 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -249,6 +249,7 @@ extern struct dentry * d_splice_alias(struct inode *, struct dentry *);
 extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *);
 extern struct dentry *d_find_any_alias(struct inode *inode);
 extern struct dentry * d_obtain_alias(struct inode *);
+extern struct dentry * d_obtain_root(struct inode *);
 extern void shrink_dcache_sb(struct super_block *);
 extern void shrink_dcache_parent(struct dentry *);
 extern void shrink_dcache_for_umount(struct super_block *);
-- 
cgit v1.2.3


From c7f3888ad7f0932a87fb76e6e4edff2a90cc7920 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 18 Jun 2014 20:34:33 -0400
Subject: switch iov_iter_get_pages() to passing maximal number of pages

... instead of maximal size.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/direct-io.c      |  2 +-
 fs/fuse/file.c      |  4 ++--
 include/linux/uio.h |  2 +-
 mm/iov_iter.c       | 17 ++++++++---------
 4 files changed, 12 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/fs/direct-io.c b/fs/direct-io.c
index 17e39b047de5..c3116404ab49 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -158,7 +158,7 @@ static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio)
 {
 	ssize_t ret;
 
-	ret = iov_iter_get_pages(sdio->iter, dio->pages, DIO_PAGES * PAGE_SIZE,
+	ret = iov_iter_get_pages(sdio->iter, dio->pages, DIO_PAGES,
 				&sdio->from);
 
 	if (ret < 0 && sdio->blocks_available && (dio->rw & WRITE)) {
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 40ac2628ddcf..912061ac4baf 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1303,10 +1303,10 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii,
 	while (nbytes < *nbytesp && req->num_pages < req->max_pages) {
 		unsigned npages;
 		size_t start;
-		unsigned n = req->max_pages - req->num_pages;
 		ssize_t ret = iov_iter_get_pages(ii,
 					&req->pages[req->num_pages],
-					n * PAGE_SIZE, &start);
+					req->max_pages - req->num_pages,
+					&start);
 		if (ret < 0)
 			return ret;
 
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 09a7cffc224e..48d64e6ab292 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -84,7 +84,7 @@ unsigned long iov_iter_alignment(const struct iov_iter *i);
 void iov_iter_init(struct iov_iter *i, int direction, const struct iovec *iov,
 			unsigned long nr_segs, size_t count);
 ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages,
-			size_t maxsize, size_t *start);
+			unsigned maxpages, size_t *start);
 ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages,
 			size_t maxsize, size_t *start);
 int iov_iter_npages(const struct iov_iter *i, int maxpages);
diff --git a/mm/iov_iter.c b/mm/iov_iter.c
index 7b5dbd1517b5..ab88dc0ea1d3 100644
--- a/mm/iov_iter.c
+++ b/mm/iov_iter.c
@@ -310,7 +310,7 @@ void iov_iter_init(struct iov_iter *i, int direction,
 EXPORT_SYMBOL(iov_iter_init);
 
 static ssize_t get_pages_iovec(struct iov_iter *i,
-		   struct page **pages, size_t maxsize,
+		   struct page **pages, unsigned maxpages,
 		   size_t *start)
 {
 	size_t offset = i->iov_offset;
@@ -323,10 +323,10 @@ static ssize_t get_pages_iovec(struct iov_iter *i,
 	len = iov->iov_len - offset;
 	if (len > i->count)
 		len = i->count;
-	if (len > maxsize)
-		len = maxsize;
 	addr = (unsigned long)iov->iov_base + offset;
 	len += *start = addr & (PAGE_SIZE - 1);
+	if (len > maxpages * PAGE_SIZE)
+		len = maxpages * PAGE_SIZE;
 	addr &= ~(PAGE_SIZE - 1);
 	n = (len + PAGE_SIZE - 1) / PAGE_SIZE;
 	res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, pages);
@@ -588,15 +588,14 @@ static unsigned long alignment_bvec(const struct iov_iter *i)
 }
 
 static ssize_t get_pages_bvec(struct iov_iter *i,
-		   struct page **pages, size_t maxsize,
+		   struct page **pages, unsigned maxpages,
 		   size_t *start)
 {
 	const struct bio_vec *bvec = i->bvec;
 	size_t len = bvec->bv_len - i->iov_offset;
 	if (len > i->count)
 		len = i->count;
-	if (len > maxsize)
-		len = maxsize;
+	/* can't be more than PAGE_SIZE */
 	*start = bvec->bv_offset + i->iov_offset;
 
 	get_page(*pages = bvec->bv_page);
@@ -712,13 +711,13 @@ unsigned long iov_iter_alignment(const struct iov_iter *i)
 EXPORT_SYMBOL(iov_iter_alignment);
 
 ssize_t iov_iter_get_pages(struct iov_iter *i,
-		   struct page **pages, size_t maxsize,
+		   struct page **pages, unsigned maxpages,
 		   size_t *start)
 {
 	if (i->type & ITER_BVEC)
-		return get_pages_bvec(i, pages, maxsize, start);
+		return get_pages_bvec(i, pages, maxpages, start);
 	else
-		return get_pages_iovec(i, pages, maxsize, start);
+		return get_pages_iovec(i, pages, maxpages, start);
 }
 EXPORT_SYMBOL(iov_iter_get_pages);
 
-- 
cgit v1.2.3


From 3c49b52b155d0f723792377e1a4480a0e7ca0ba2 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Fri, 25 Jul 2014 16:05:29 -0400
Subject: tracing: Do not do anything special with tracepoint_string when
 tracing is disabled

When CONFIG_TRACING is not enabled, there's no reason to save the trace
strings either by the linker or as a static variable that can be
referenced later. Simply pass back the string that is given to
tracepoint_string().

Had to move the define to include/linux/tracepoint.h so that it is still
visible when CONFIG_TRACING is not set.

Link: http://lkml.kernel.org/p/1406318733-26754-2-git-send-email-nicolas.pitre@linaro.org

Suggested-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h | 34 ----------------------------------
 include/linux/tracepoint.h   | 44 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 44 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index cff3106ffe2c..c9f619a2070f 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -574,40 +574,6 @@ do {									\
 		__trace_printk(ip, fmt, ##args);			\
 } while (0)
 
-/**
- * tracepoint_string - register constant persistent string to trace system
- * @str - a constant persistent string that will be referenced in tracepoints
- *
- * If constant strings are being used in tracepoints, it is faster and
- * more efficient to just save the pointer to the string and reference
- * that with a printf "%s" instead of saving the string in the ring buffer
- * and wasting space and time.
- *
- * The problem with the above approach is that userspace tools that read
- * the binary output of the trace buffers do not have access to the string.
- * Instead they just show the address of the string which is not very
- * useful to users.
- *
- * With tracepoint_string(), the string will be registered to the tracing
- * system and exported to userspace via the debugfs/tracing/printk_formats
- * file that maps the string address to the string text. This way userspace
- * tools that read the binary buffers have a way to map the pointers to
- * the ASCII strings they represent.
- *
- * The @str used must be a constant string and persistent as it would not
- * make sense to show a string that no longer exists. But it is still fine
- * to be used with modules, because when modules are unloaded, if they
- * had tracepoints, the ring buffers are cleared too. As long as the string
- * does not change during the life of the module, it is fine to use
- * tracepoint_string() within a module.
- */
-#define tracepoint_string(str)						\
-	({								\
-		static const char *___tp_str __tracepoint_string = str; \
-		___tp_str;						\
-	})
-#define __tracepoint_string	__attribute__((section("__tracepoint_str")))
-
 #ifdef CONFIG_PERF_EVENTS
 struct perf_event;
 
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 2e2a5f7717e5..b1293f15f592 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -249,6 +249,50 @@ extern void syscall_unregfunc(void);
 
 #endif /* CONFIG_TRACEPOINTS */
 
+#ifdef CONFIG_TRACING
+/**
+ * tracepoint_string - register constant persistent string to trace system
+ * @str - a constant persistent string that will be referenced in tracepoints
+ *
+ * If constant strings are being used in tracepoints, it is faster and
+ * more efficient to just save the pointer to the string and reference
+ * that with a printf "%s" instead of saving the string in the ring buffer
+ * and wasting space and time.
+ *
+ * The problem with the above approach is that userspace tools that read
+ * the binary output of the trace buffers do not have access to the string.
+ * Instead they just show the address of the string which is not very
+ * useful to users.
+ *
+ * With tracepoint_string(), the string will be registered to the tracing
+ * system and exported to userspace via the debugfs/tracing/printk_formats
+ * file that maps the string address to the string text. This way userspace
+ * tools that read the binary buffers have a way to map the pointers to
+ * the ASCII strings they represent.
+ *
+ * The @str used must be a constant string and persistent as it would not
+ * make sense to show a string that no longer exists. But it is still fine
+ * to be used with modules, because when modules are unloaded, if they
+ * had tracepoints, the ring buffers are cleared too. As long as the string
+ * does not change during the life of the module, it is fine to use
+ * tracepoint_string() within a module.
+ */
+#define tracepoint_string(str)						\
+	({								\
+		static const char *___tp_str __tracepoint_string = str; \
+		___tp_str;						\
+	})
+#define __tracepoint_string	__attribute__((section("__tracepoint_str")))
+#else
+/*
+ * tracepoint_string() is used to save the string address for userspace
+ * tracing tools. When tracing isn't configured, there's no need to save
+ * anything.
+ */
+# define tracepoint_string(str) str
+# define __tracepoint_string
+#endif
+
 /*
  * The need for the DECLARE_TRACE_NOARGS() is to handle the prototype
  * (void). "void" is a special value in a function prototype and can
-- 
cgit v1.2.3


From f8ec894945e7d205ce62be59e55e72c4304e4739 Mon Sep 17 00:00:00 2001
From: Henrik Rydberg <rydberg@euromail.se>
Date: Fri, 25 Jul 2014 17:16:42 -0700
Subject: Input: MT - make slot cleanup callable outside mt_sync_frame()

Some semi-mt drivers use the slots in a manual way, but may still
want to call parts of the frame synchronization logic. This patch
makes input_mt_drop_unused callable from those drivers.

Signed-off-by: Henrik Rydberg <rydberg@euromail.se>
Reviewed-by: Benson Leung <bleung@chromium.org>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/input-mt.c | 38 +++++++++++++++++++++++++++-----------
 include/linux/input/mt.h |  1 +
 2 files changed, 28 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/input-mt.c b/drivers/input/input-mt.c
index d398f1321f14..c30204f2fa30 100644
--- a/drivers/input/input-mt.c
+++ b/drivers/input/input-mt.c
@@ -236,6 +236,31 @@ void input_mt_report_pointer_emulation(struct input_dev *dev, bool use_count)
 }
 EXPORT_SYMBOL(input_mt_report_pointer_emulation);
 
+/**
+ * input_mt_drop_unused() - Inactivate slots not seen in this frame
+ * @dev: input device with allocated MT slots
+ *
+ * Lift all slots not seen since the last call to this function.
+ */
+void input_mt_drop_unused(struct input_dev *dev)
+{
+	struct input_mt *mt = dev->mt;
+	int i;
+
+	if (!mt)
+		return;
+
+	for (i = 0; i < mt->num_slots; i++) {
+		if (!input_mt_is_used(mt, &mt->slots[i])) {
+			input_mt_slot(dev, i);
+			input_event(dev, EV_ABS, ABS_MT_TRACKING_ID, -1);
+		}
+	}
+
+	mt->frame++;
+}
+EXPORT_SYMBOL(input_mt_drop_unused);
+
 /**
  * input_mt_sync_frame() - synchronize mt frame
  * @dev: input device with allocated MT slots
@@ -247,27 +272,18 @@ EXPORT_SYMBOL(input_mt_report_pointer_emulation);
 void input_mt_sync_frame(struct input_dev *dev)
 {
 	struct input_mt *mt = dev->mt;
-	struct input_mt_slot *s;
 	bool use_count = false;
 
 	if (!mt)
 		return;
 
-	if (mt->flags & INPUT_MT_DROP_UNUSED) {
-		for (s = mt->slots; s != mt->slots + mt->num_slots; s++) {
-			if (input_mt_is_used(mt, s))
-				continue;
-			input_mt_slot(dev, s - mt->slots);
-			input_event(dev, EV_ABS, ABS_MT_TRACKING_ID, -1);
-		}
-	}
+	if (mt->flags & INPUT_MT_DROP_UNUSED)
+		input_mt_drop_unused(dev);
 
 	if ((mt->flags & INPUT_MT_POINTER) && !(mt->flags & INPUT_MT_SEMI_MT))
 		use_count = true;
 
 	input_mt_report_pointer_emulation(dev, use_count);
-
-	mt->frame++;
 }
 EXPORT_SYMBOL(input_mt_sync_frame);
 
diff --git a/include/linux/input/mt.h b/include/linux/input/mt.h
index 1b1dfa80d9ff..f583ff639776 100644
--- a/include/linux/input/mt.h
+++ b/include/linux/input/mt.h
@@ -105,6 +105,7 @@ void input_mt_report_slot_state(struct input_dev *dev,
 
 void input_mt_report_finger_count(struct input_dev *dev, int count);
 void input_mt_report_pointer_emulation(struct input_dev *dev, bool use_count);
+void input_mt_drop_unused(struct input_dev *dev);
 
 void input_mt_sync_frame(struct input_dev *dev);
 
-- 
cgit v1.2.3


From 92d18a6851fb6295466657ad1cf7fe88c2054ffa Mon Sep 17 00:00:00 2001
From: Gavin Shan <gwshan@linux.vnet.ibm.com>
Date: Fri, 8 Aug 2014 10:36:20 -0600
Subject: drivers/vfio: Fix EEH build error
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The VFIO related components could be built as dynamic modules.
Unfortunately, CONFIG_EEH can't be configured to "m". The patch
fixes the build errors when configuring VFIO related components
as dynamic modules as follows:

  CC [M]  drivers/vfio/vfio_iommu_spapr_tce.o
In file included from drivers/vfio/vfio.c:33:0:
include/linux/vfio.h:101:43: warning: ‘struct pci_dev’ declared \
inside parameter list [enabled by default]
   :
  WRAP    arch/powerpc/boot/zImage.pseries
  WRAP    arch/powerpc/boot/zImage.maple
  WRAP    arch/powerpc/boot/zImage.pmac
  WRAP    arch/powerpc/boot/zImage.epapr
  MODPOST 1818 modules
ERROR: ".vfio_spapr_iommu_eeh_ioctl" [drivers/vfio/vfio_iommu_spapr_tce.ko]\
undefined!
ERROR: ".vfio_spapr_pci_eeh_open" [drivers/vfio/pci/vfio-pci.ko] undefined!
ERROR: ".vfio_spapr_pci_eeh_release" [drivers/vfio/pci/vfio-pci.ko] undefined!

Reported-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/Kconfig          | 6 ++++++
 drivers/vfio/Makefile         | 2 +-
 drivers/vfio/vfio_spapr_eeh.c | 3 +++
 include/linux/vfio.h          | 1 +
 4 files changed, 11 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig
index af7b204b9215..d8c57636b9ce 100644
--- a/drivers/vfio/Kconfig
+++ b/drivers/vfio/Kconfig
@@ -8,11 +8,17 @@ config VFIO_IOMMU_SPAPR_TCE
 	depends on VFIO && SPAPR_TCE_IOMMU
 	default n
 
+config VFIO_SPAPR_EEH
+	tristate
+	depends on EEH && VFIO_IOMMU_SPAPR_TCE
+	default n
+
 menuconfig VFIO
 	tristate "VFIO Non-Privileged userspace driver framework"
 	depends on IOMMU_API
 	select VFIO_IOMMU_TYPE1 if X86
 	select VFIO_IOMMU_SPAPR_TCE if (PPC_POWERNV || PPC_PSERIES)
+	select VFIO_SPAPR_EEH if (PPC_POWERNV || PPC_PSERIES)
 	select ANON_INODES
 	help
 	  VFIO provides a framework for secure userspace device drivers.
diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile
index 50e30bc75e85..0b035b12600a 100644
--- a/drivers/vfio/Makefile
+++ b/drivers/vfio/Makefile
@@ -1,5 +1,5 @@
 obj-$(CONFIG_VFIO) += vfio.o
 obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o
 obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o
-obj-$(CONFIG_EEH) += vfio_spapr_eeh.o
+obj-$(CONFIG_VFIO_SPAPR_EEH) += vfio_spapr_eeh.o
 obj-$(CONFIG_VFIO_PCI) += pci/
diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c
index f834b4ce1431..949f98e997af 100644
--- a/drivers/vfio/vfio_spapr_eeh.c
+++ b/drivers/vfio/vfio_spapr_eeh.c
@@ -18,11 +18,13 @@ int vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
 {
 	return eeh_dev_open(pdev);
 }
+EXPORT_SYMBOL_GPL(vfio_spapr_pci_eeh_open);
 
 void vfio_spapr_pci_eeh_release(struct pci_dev *pdev)
 {
 	eeh_dev_release(pdev);
 }
+EXPORT_SYMBOL_GPL(vfio_spapr_pci_eeh_release);
 
 long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
 				unsigned int cmd, unsigned long arg)
@@ -85,3 +87,4 @@ long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
 
 	return ret;
 }
+EXPORT_SYMBOL(vfio_spapr_iommu_eeh_ioctl);
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 25a0fbd4b998..224128a96b7f 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -98,6 +98,7 @@ extern int vfio_external_user_iommu_id(struct vfio_group *group);
 extern long vfio_external_check_extension(struct vfio_group *group,
 					  unsigned long arg);
 
+struct pci_dev;
 #ifdef CONFIG_EEH
 extern int vfio_spapr_pci_eeh_open(struct pci_dev *pdev);
 extern void vfio_spapr_pci_eeh_release(struct pci_dev *pdev);
-- 
cgit v1.2.3


From 9b936c960f22954bfb89f2fefd8f96916bb42908 Mon Sep 17 00:00:00 2001
From: Alexey Kardashevskiy <aik@ozlabs.ru>
Date: Fri, 8 Aug 2014 10:39:16 -0600
Subject: drivers/vfio: Enable VFIO if EEH is not supported

The existing vfio_pci_open() fails upon error returned from
vfio_spapr_pci_eeh_open(), which breaks POWER7's P5IOC2 PHB
support which this patch brings back.

The patch fixes the issue by dropping the return value of
vfio_spapr_pci_eeh_open().

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/pci/vfio_pci.c   | 6 +-----
 drivers/vfio/vfio_spapr_eeh.c | 4 ++--
 include/linux/vfio.h          | 5 ++---
 3 files changed, 5 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 1651c0769b72..f7825332a325 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -200,11 +200,7 @@ static int vfio_pci_open(void *device_data)
 		if (ret)
 			goto error;
 
-		ret = vfio_spapr_pci_eeh_open(vdev->pdev);
-		if (ret) {
-			vfio_pci_disable(vdev);
-			goto error;
-		}
+		vfio_spapr_pci_eeh_open(vdev->pdev);
 	}
 	vdev->refcnt++;
 error:
diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c
index 4779cace8036..86dfceb9201f 100644
--- a/drivers/vfio/vfio_spapr_eeh.c
+++ b/drivers/vfio/vfio_spapr_eeh.c
@@ -19,9 +19,9 @@
 #define DRIVER_DESC	"VFIO IOMMU SPAPR EEH"
 
 /* We might build address mapping here for "fast" path later */
-int vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
+void vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
 {
-	return eeh_dev_open(pdev);
+	eeh_dev_open(pdev);
 }
 EXPORT_SYMBOL_GPL(vfio_spapr_pci_eeh_open);
 
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 224128a96b7f..d3204115f15d 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -100,15 +100,14 @@ extern long vfio_external_check_extension(struct vfio_group *group,
 
 struct pci_dev;
 #ifdef CONFIG_EEH
-extern int vfio_spapr_pci_eeh_open(struct pci_dev *pdev);
+extern void vfio_spapr_pci_eeh_open(struct pci_dev *pdev);
 extern void vfio_spapr_pci_eeh_release(struct pci_dev *pdev);
 extern long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
 				       unsigned int cmd,
 				       unsigned long arg);
 #else
-static inline int vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
+static inline void vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
 {
-	return 0;
 }
 
 static inline void vfio_spapr_pci_eeh_release(struct pci_dev *pdev)
-- 
cgit v1.2.3


From 00501b531c4723972aa11d6d4ebcf8d6552007c8 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Fri, 8 Aug 2014 14:19:20 -0700
Subject: mm: memcontrol: rewrite charge API

These patches rework memcg charge lifetime to integrate more naturally
with the lifetime of user pages.  This drastically simplifies the code and
reduces charging and uncharging overhead.  The most expensive part of
charging and uncharging is the page_cgroup bit spinlock, which is removed
entirely after this series.

Here are the top-10 profile entries of a stress test that reads a 128G
sparse file on a freshly booted box, without even a dedicated cgroup (i.e.
 executing in the root memcg).  Before:

    15.36%              cat  [kernel.kallsyms]   [k] copy_user_generic_string
    13.31%              cat  [kernel.kallsyms]   [k] memset
    11.48%              cat  [kernel.kallsyms]   [k] do_mpage_readpage
     4.23%              cat  [kernel.kallsyms]   [k] get_page_from_freelist
     2.38%              cat  [kernel.kallsyms]   [k] put_page
     2.32%              cat  [kernel.kallsyms]   [k] __mem_cgroup_commit_charge
     2.18%          kswapd0  [kernel.kallsyms]   [k] __mem_cgroup_uncharge_common
     1.92%          kswapd0  [kernel.kallsyms]   [k] shrink_page_list
     1.86%              cat  [kernel.kallsyms]   [k] __radix_tree_lookup
     1.62%              cat  [kernel.kallsyms]   [k] __pagevec_lru_add_fn

After:

    15.67%           cat  [kernel.kallsyms]   [k] copy_user_generic_string
    13.48%           cat  [kernel.kallsyms]   [k] memset
    11.42%           cat  [kernel.kallsyms]   [k] do_mpage_readpage
     3.98%           cat  [kernel.kallsyms]   [k] get_page_from_freelist
     2.46%           cat  [kernel.kallsyms]   [k] put_page
     2.13%       kswapd0  [kernel.kallsyms]   [k] shrink_page_list
     1.88%           cat  [kernel.kallsyms]   [k] __radix_tree_lookup
     1.67%           cat  [kernel.kallsyms]   [k] __pagevec_lru_add_fn
     1.39%       kswapd0  [kernel.kallsyms]   [k] free_pcppages_bulk
     1.30%           cat  [kernel.kallsyms]   [k] kfree

As you can see, the memcg footprint has shrunk quite a bit.

   text    data     bss     dec     hex filename
  37970    9892     400   48262    bc86 mm/memcontrol.o.old
  35239    9892     400   45531    b1db mm/memcontrol.o

This patch (of 4):

The memcg charge API charges pages before they are rmapped - i.e.  have an
actual "type" - and so every callsite needs its own set of charge and
uncharge functions to know what type is being operated on.  Worse,
uncharge has to happen from a context that is still type-specific, rather
than at the end of the page's lifetime with exclusive access, and so
requires a lot of synchronization.

Rewrite the charge API to provide a generic set of try_charge(),
commit_charge() and cancel_charge() transaction operations, much like
what's currently done for swap-in:

  mem_cgroup_try_charge() attempts to reserve a charge, reclaiming
  pages from the memcg if necessary.

  mem_cgroup_commit_charge() commits the page to the charge once it
  has a valid page->mapping and PageAnon() reliably tells the type.

  mem_cgroup_cancel_charge() aborts the transaction.

This reduces the charge API and enables subsequent patches to
drastically simplify uncharging.

As pages need to be committed after rmap is established but before they
are added to the LRU, page_add_new_anon_rmap() must stop doing LRU
additions again.  Revive lru_cache_add_active_or_unevictable().

[hughd@google.com: fix shmem_unuse]
[hughd@google.com: Add comments on the private use of -EAGAIN]
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: Tejun Heo <tj@kernel.org>
Cc: Vladimir Davydov <vdavydov@parallels.com>
Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/cgroups/memcg_test.txt |  32 +--
 include/linux/memcontrol.h           |  53 ++---
 include/linux/swap.h                 |   3 +
 kernel/events/uprobes.c              |  15 +-
 mm/filemap.c                         |  21 +-
 mm/huge_memory.c                     |  57 +++--
 mm/memcontrol.c                      | 407 ++++++++++++++---------------------
 mm/memory.c                          |  41 ++--
 mm/rmap.c                            |  19 --
 mm/shmem.c                           |  37 ++--
 mm/swap.c                            |  34 +++
 mm/swapfile.c                        |  14 +-
 12 files changed, 338 insertions(+), 395 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/cgroups/memcg_test.txt b/Documentation/cgroups/memcg_test.txt
index 80ac454704b8..bcf750d3cecd 100644
--- a/Documentation/cgroups/memcg_test.txt
+++ b/Documentation/cgroups/memcg_test.txt
@@ -24,24 +24,7 @@ Please note that implementation details can be changed.
 
    a page/swp_entry may be charged (usage += PAGE_SIZE) at
 
-	mem_cgroup_charge_anon()
-	  Called at new page fault and Copy-On-Write.
-
-	mem_cgroup_try_charge_swapin()
-	  Called at do_swap_page() (page fault on swap entry) and swapoff.
-	  Followed by charge-commit-cancel protocol. (With swap accounting)
-	  At commit, a charge recorded in swap_cgroup is removed.
-
-	mem_cgroup_charge_file()
-	  Called at add_to_page_cache()
-
-	mem_cgroup_cache_charge_swapin()
-	  Called at shmem's swapin.
-
-	mem_cgroup_prepare_migration()
-	  Called before migration. "extra" charge is done and followed by
-	  charge-commit-cancel protocol.
-	  At commit, charge against oldpage or newpage will be committed.
+	mem_cgroup_try_charge()
 
 2. Uncharge
   a page/swp_entry may be uncharged (usage -= PAGE_SIZE) by
@@ -69,19 +52,14 @@ Please note that implementation details can be changed.
 	to new page is committed. At failure, charge to old page is committed.
 
 3. charge-commit-cancel
-	In some case, we can't know this "charge" is valid or not at charging
-	(because of races).
-	To handle such case, there are charge-commit-cancel functions.
-		mem_cgroup_try_charge_XXX
-		mem_cgroup_commit_charge_XXX
-		mem_cgroup_cancel_charge_XXX
-	these are used in swap-in and migration.
+	Memcg pages are charged in two steps:
+		mem_cgroup_try_charge()
+		mem_cgroup_commit_charge() or mem_cgroup_cancel_charge()
 
 	At try_charge(), there are no flags to say "this page is charged".
 	at this point, usage += PAGE_SIZE.
 
-	At commit(), the function checks the page should be charged or not
-	and set flags or avoid charging.(usage -= PAGE_SIZE)
+	At commit(), the page is associated with the memcg.
 
 	At cancel(), simply usage -= PAGE_SIZE.
 
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index eb65d29516ca..1a9a096858e0 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -54,28 +54,11 @@ struct mem_cgroup_reclaim_cookie {
 };
 
 #ifdef CONFIG_MEMCG
-/*
- * All "charge" functions with gfp_mask should use GFP_KERNEL or
- * (gfp_mask & GFP_RECLAIM_MASK). In current implementatin, memcg doesn't
- * alloc memory but reclaims memory from all available zones. So, "where I want
- * memory from" bits of gfp_mask has no meaning. So any bits of that field is
- * available but adding a rule is better. charge functions' gfp_mask should
- * be set to GFP_KERNEL or gfp_mask & GFP_RECLAIM_MASK for avoiding ambiguous
- * codes.
- * (Of course, if memcg does memory allocation in future, GFP_KERNEL is sane.)
- */
-
-extern int mem_cgroup_charge_anon(struct page *page, struct mm_struct *mm,
-				gfp_t gfp_mask);
-/* for swap handling */
-extern int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
-		struct page *page, gfp_t mask, struct mem_cgroup **memcgp);
-extern void mem_cgroup_commit_charge_swapin(struct page *page,
-					struct mem_cgroup *memcg);
-extern void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg);
-
-extern int mem_cgroup_charge_file(struct page *page, struct mm_struct *mm,
-					gfp_t gfp_mask);
+int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
+			  gfp_t gfp_mask, struct mem_cgroup **memcgp);
+void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg,
+			      bool lrucare);
+void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg);
 
 struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *);
 struct lruvec *mem_cgroup_page_lruvec(struct page *, struct zone *);
@@ -233,30 +216,22 @@ void mem_cgroup_print_bad_page(struct page *page);
 #else /* CONFIG_MEMCG */
 struct mem_cgroup;
 
-static inline int mem_cgroup_charge_anon(struct page *page,
-					struct mm_struct *mm, gfp_t gfp_mask)
-{
-	return 0;
-}
-
-static inline int mem_cgroup_charge_file(struct page *page,
-					struct mm_struct *mm, gfp_t gfp_mask)
-{
-	return 0;
-}
-
-static inline int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
-		struct page *page, gfp_t gfp_mask, struct mem_cgroup **memcgp)
+static inline int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
+					gfp_t gfp_mask,
+					struct mem_cgroup **memcgp)
 {
+	*memcgp = NULL;
 	return 0;
 }
 
-static inline void mem_cgroup_commit_charge_swapin(struct page *page,
-					  struct mem_cgroup *memcg)
+static inline void mem_cgroup_commit_charge(struct page *page,
+					    struct mem_cgroup *memcg,
+					    bool lrucare)
 {
 }
 
-static inline void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg)
+static inline void mem_cgroup_cancel_charge(struct page *page,
+					    struct mem_cgroup *memcg)
 {
 }
 
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 1eb64043c076..46a649e4e8cd 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -320,6 +320,9 @@ extern void swap_setup(void);
 
 extern void add_page_to_unevictable_list(struct page *page);
 
+extern void lru_cache_add_active_or_unevictable(struct page *page,
+						struct vm_area_struct *vma);
+
 /* linux/mm/vmscan.c */
 extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
 					gfp_t gfp_mask, nodemask_t *mask);
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 6f3254e8c137..1d0af8a2c646 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -167,6 +167,11 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
 	/* For mmu_notifiers */
 	const unsigned long mmun_start = addr;
 	const unsigned long mmun_end   = addr + PAGE_SIZE;
+	struct mem_cgroup *memcg;
+
+	err = mem_cgroup_try_charge(kpage, vma->vm_mm, GFP_KERNEL, &memcg);
+	if (err)
+		return err;
 
 	/* For try_to_free_swap() and munlock_vma_page() below */
 	lock_page(page);
@@ -179,6 +184,8 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
 
 	get_page(kpage);
 	page_add_new_anon_rmap(kpage, vma, addr);
+	mem_cgroup_commit_charge(kpage, memcg, false);
+	lru_cache_add_active_or_unevictable(kpage, vma);
 
 	if (!PageAnon(page)) {
 		dec_mm_counter(mm, MM_FILEPAGES);
@@ -200,6 +207,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
 
 	err = 0;
  unlock:
+	mem_cgroup_cancel_charge(kpage, memcg);
 	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
 	unlock_page(page);
 	return err;
@@ -315,18 +323,11 @@ retry:
 	if (!new_page)
 		goto put_old;
 
-	if (mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL))
-		goto put_new;
-
 	__SetPageUptodate(new_page);
 	copy_highpage(new_page, old_page);
 	copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE);
 
 	ret = __replace_page(vma, vaddr, old_page, new_page);
-	if (ret)
-		mem_cgroup_uncharge_page(new_page);
-
-put_new:
 	page_cache_release(new_page);
 put_old:
 	put_page(old_page);
diff --git a/mm/filemap.c b/mm/filemap.c
index af19a6b079f5..349a40e35545 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -31,6 +31,7 @@
 #include <linux/security.h>
 #include <linux/cpuset.h>
 #include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */
+#include <linux/hugetlb.h>
 #include <linux/memcontrol.h>
 #include <linux/cleancache.h>
 #include <linux/rmap.h>
@@ -548,19 +549,24 @@ static int __add_to_page_cache_locked(struct page *page,
 				      pgoff_t offset, gfp_t gfp_mask,
 				      void **shadowp)
 {
+	int huge = PageHuge(page);
+	struct mem_cgroup *memcg;
 	int error;
 
 	VM_BUG_ON_PAGE(!PageLocked(page), page);
 	VM_BUG_ON_PAGE(PageSwapBacked(page), page);
 
-	error = mem_cgroup_charge_file(page, current->mm,
-					gfp_mask & GFP_RECLAIM_MASK);
-	if (error)
-		return error;
+	if (!huge) {
+		error = mem_cgroup_try_charge(page, current->mm,
+					      gfp_mask, &memcg);
+		if (error)
+			return error;
+	}
 
 	error = radix_tree_maybe_preload(gfp_mask & ~__GFP_HIGHMEM);
 	if (error) {
-		mem_cgroup_uncharge_cache_page(page);
+		if (!huge)
+			mem_cgroup_cancel_charge(page, memcg);
 		return error;
 	}
 
@@ -575,13 +581,16 @@ static int __add_to_page_cache_locked(struct page *page,
 		goto err_insert;
 	__inc_zone_page_state(page, NR_FILE_PAGES);
 	spin_unlock_irq(&mapping->tree_lock);
+	if (!huge)
+		mem_cgroup_commit_charge(page, memcg, false);
 	trace_mm_filemap_add_to_page_cache(page);
 	return 0;
 err_insert:
 	page->mapping = NULL;
 	/* Leave page->index set: truncation relies upon it */
 	spin_unlock_irq(&mapping->tree_lock);
-	mem_cgroup_uncharge_cache_page(page);
+	if (!huge)
+		mem_cgroup_cancel_charge(page, memcg);
 	page_cache_release(page);
 	return error;
 }
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 3630d577e987..d9a21d06b862 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -715,13 +715,20 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
 					unsigned long haddr, pmd_t *pmd,
 					struct page *page)
 {
+	struct mem_cgroup *memcg;
 	pgtable_t pgtable;
 	spinlock_t *ptl;
 
 	VM_BUG_ON_PAGE(!PageCompound(page), page);
+
+	if (mem_cgroup_try_charge(page, mm, GFP_TRANSHUGE, &memcg))
+		return VM_FAULT_OOM;
+
 	pgtable = pte_alloc_one(mm, haddr);
-	if (unlikely(!pgtable))
+	if (unlikely(!pgtable)) {
+		mem_cgroup_cancel_charge(page, memcg);
 		return VM_FAULT_OOM;
+	}
 
 	clear_huge_page(page, haddr, HPAGE_PMD_NR);
 	/*
@@ -734,7 +741,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
 	ptl = pmd_lock(mm, pmd);
 	if (unlikely(!pmd_none(*pmd))) {
 		spin_unlock(ptl);
-		mem_cgroup_uncharge_page(page);
+		mem_cgroup_cancel_charge(page, memcg);
 		put_page(page);
 		pte_free(mm, pgtable);
 	} else {
@@ -742,6 +749,8 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
 		entry = mk_huge_pmd(page, vma->vm_page_prot);
 		entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
 		page_add_new_anon_rmap(page, vma, haddr);
+		mem_cgroup_commit_charge(page, memcg, false);
+		lru_cache_add_active_or_unevictable(page, vma);
 		pgtable_trans_huge_deposit(mm, pmd, pgtable);
 		set_pmd_at(mm, haddr, pmd, entry);
 		add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR);
@@ -827,13 +836,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		count_vm_event(THP_FAULT_FALLBACK);
 		return VM_FAULT_FALLBACK;
 	}
-	if (unlikely(mem_cgroup_charge_anon(page, mm, GFP_TRANSHUGE))) {
-		put_page(page);
-		count_vm_event(THP_FAULT_FALLBACK);
-		return VM_FAULT_FALLBACK;
-	}
 	if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, page))) {
-		mem_cgroup_uncharge_page(page);
 		put_page(page);
 		count_vm_event(THP_FAULT_FALLBACK);
 		return VM_FAULT_FALLBACK;
@@ -979,6 +982,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
 					struct page *page,
 					unsigned long haddr)
 {
+	struct mem_cgroup *memcg;
 	spinlock_t *ptl;
 	pgtable_t pgtable;
 	pmd_t _pmd;
@@ -999,20 +1003,21 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
 					       __GFP_OTHER_NODE,
 					       vma, address, page_to_nid(page));
 		if (unlikely(!pages[i] ||
-			     mem_cgroup_charge_anon(pages[i], mm,
-						       GFP_KERNEL))) {
+			     mem_cgroup_try_charge(pages[i], mm, GFP_KERNEL,
+						   &memcg))) {
 			if (pages[i])
 				put_page(pages[i]);
-			mem_cgroup_uncharge_start();
 			while (--i >= 0) {
-				mem_cgroup_uncharge_page(pages[i]);
+				memcg = (void *)page_private(pages[i]);
+				set_page_private(pages[i], 0);
+				mem_cgroup_cancel_charge(pages[i], memcg);
 				put_page(pages[i]);
 			}
-			mem_cgroup_uncharge_end();
 			kfree(pages);
 			ret |= VM_FAULT_OOM;
 			goto out;
 		}
+		set_page_private(pages[i], (unsigned long)memcg);
 	}
 
 	for (i = 0; i < HPAGE_PMD_NR; i++) {
@@ -1041,7 +1046,11 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
 		pte_t *pte, entry;
 		entry = mk_pte(pages[i], vma->vm_page_prot);
 		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+		memcg = (void *)page_private(pages[i]);
+		set_page_private(pages[i], 0);
 		page_add_new_anon_rmap(pages[i], vma, haddr);
+		mem_cgroup_commit_charge(pages[i], memcg, false);
+		lru_cache_add_active_or_unevictable(pages[i], vma);
 		pte = pte_offset_map(&_pmd, haddr);
 		VM_BUG_ON(!pte_none(*pte));
 		set_pte_at(mm, haddr, pte, entry);
@@ -1065,12 +1074,12 @@ out:
 out_free_pages:
 	spin_unlock(ptl);
 	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
-	mem_cgroup_uncharge_start();
 	for (i = 0; i < HPAGE_PMD_NR; i++) {
-		mem_cgroup_uncharge_page(pages[i]);
+		memcg = (void *)page_private(pages[i]);
+		set_page_private(pages[i], 0);
+		mem_cgroup_cancel_charge(pages[i], memcg);
 		put_page(pages[i]);
 	}
-	mem_cgroup_uncharge_end();
 	kfree(pages);
 	goto out;
 }
@@ -1081,6 +1090,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	spinlock_t *ptl;
 	int ret = 0;
 	struct page *page = NULL, *new_page;
+	struct mem_cgroup *memcg;
 	unsigned long haddr;
 	unsigned long mmun_start;	/* For mmu_notifiers */
 	unsigned long mmun_end;		/* For mmu_notifiers */
@@ -1132,7 +1142,8 @@ alloc:
 		goto out;
 	}
 
-	if (unlikely(mem_cgroup_charge_anon(new_page, mm, GFP_TRANSHUGE))) {
+	if (unlikely(mem_cgroup_try_charge(new_page, mm,
+					   GFP_TRANSHUGE, &memcg))) {
 		put_page(new_page);
 		if (page) {
 			split_huge_page(page);
@@ -1161,7 +1172,7 @@ alloc:
 		put_user_huge_page(page);
 	if (unlikely(!pmd_same(*pmd, orig_pmd))) {
 		spin_unlock(ptl);
-		mem_cgroup_uncharge_page(new_page);
+		mem_cgroup_cancel_charge(new_page, memcg);
 		put_page(new_page);
 		goto out_mn;
 	} else {
@@ -1170,6 +1181,8 @@ alloc:
 		entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
 		pmdp_clear_flush(vma, haddr, pmd);
 		page_add_new_anon_rmap(new_page, vma, haddr);
+		mem_cgroup_commit_charge(new_page, memcg, false);
+		lru_cache_add_active_or_unevictable(new_page, vma);
 		set_pmd_at(mm, haddr, pmd, entry);
 		update_mmu_cache_pmd(vma, address, pmd);
 		if (!page) {
@@ -2413,6 +2426,7 @@ static void collapse_huge_page(struct mm_struct *mm,
 	spinlock_t *pmd_ptl, *pte_ptl;
 	int isolated;
 	unsigned long hstart, hend;
+	struct mem_cgroup *memcg;
 	unsigned long mmun_start;	/* For mmu_notifiers */
 	unsigned long mmun_end;		/* For mmu_notifiers */
 
@@ -2423,7 +2437,8 @@ static void collapse_huge_page(struct mm_struct *mm,
 	if (!new_page)
 		return;
 
-	if (unlikely(mem_cgroup_charge_anon(new_page, mm, GFP_TRANSHUGE)))
+	if (unlikely(mem_cgroup_try_charge(new_page, mm,
+					   GFP_TRANSHUGE, &memcg)))
 		return;
 
 	/*
@@ -2510,6 +2525,8 @@ static void collapse_huge_page(struct mm_struct *mm,
 	spin_lock(pmd_ptl);
 	BUG_ON(!pmd_none(*pmd));
 	page_add_new_anon_rmap(new_page, vma, address);
+	mem_cgroup_commit_charge(new_page, memcg, false);
+	lru_cache_add_active_or_unevictable(new_page, vma);
 	pgtable_trans_huge_deposit(mm, pmd, pgtable);
 	set_pmd_at(mm, address, pmd, _pmd);
 	update_mmu_cache_pmd(vma, address, pmd);
@@ -2523,7 +2540,7 @@ out_up_write:
 	return;
 
 out:
-	mem_cgroup_uncharge_page(new_page);
+	mem_cgroup_cancel_charge(new_page, memcg);
 	goto out_up_write;
 }
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 90dc501eaf3f..1cbe1e54ff5f 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2551,17 +2551,8 @@ static int memcg_cpu_hotplug_callback(struct notifier_block *nb,
 	return NOTIFY_OK;
 }
 
-/**
- * mem_cgroup_try_charge - try charging a memcg
- * @memcg: memcg to charge
- * @nr_pages: number of pages to charge
- *
- * Returns 0 if @memcg was charged successfully, -EINTR if the charge
- * was bypassed to root_mem_cgroup, and -ENOMEM if the charge failed.
- */
-static int mem_cgroup_try_charge(struct mem_cgroup *memcg,
-				 gfp_t gfp_mask,
-				 unsigned int nr_pages)
+static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
+		      unsigned int nr_pages)
 {
 	unsigned int batch = max(CHARGE_BATCH, nr_pages);
 	int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
@@ -2660,41 +2651,7 @@ done:
 	return ret;
 }
 
-/**
- * mem_cgroup_try_charge_mm - try charging a mm
- * @mm: mm_struct to charge
- * @nr_pages: number of pages to charge
- * @oom: trigger OOM if reclaim fails
- *
- * Returns the charged mem_cgroup associated with the given mm_struct or
- * NULL the charge failed.
- */
-static struct mem_cgroup *mem_cgroup_try_charge_mm(struct mm_struct *mm,
-				 gfp_t gfp_mask,
-				 unsigned int nr_pages)
-
-{
-	struct mem_cgroup *memcg;
-	int ret;
-
-	memcg = get_mem_cgroup_from_mm(mm);
-	ret = mem_cgroup_try_charge(memcg, gfp_mask, nr_pages);
-	css_put(&memcg->css);
-	if (ret == -EINTR)
-		memcg = root_mem_cgroup;
-	else if (ret)
-		memcg = NULL;
-
-	return memcg;
-}
-
-/*
- * Somemtimes we have to undo a charge we got by try_charge().
- * This function is for that and do uncharge, put css's refcnt.
- * gotten by try_charge().
- */
-static void __mem_cgroup_cancel_charge(struct mem_cgroup *memcg,
-				       unsigned int nr_pages)
+static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages)
 {
 	unsigned long bytes = nr_pages * PAGE_SIZE;
 
@@ -2760,17 +2717,13 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
 	return memcg;
 }
 
-static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
-				       struct page *page,
-				       unsigned int nr_pages,
-				       enum charge_type ctype,
-				       bool lrucare)
+static void commit_charge(struct page *page, struct mem_cgroup *memcg,
+			  unsigned int nr_pages, bool anon, bool lrucare)
 {
 	struct page_cgroup *pc = lookup_page_cgroup(page);
 	struct zone *uninitialized_var(zone);
 	struct lruvec *lruvec;
 	bool was_on_lru = false;
-	bool anon;
 
 	lock_page_cgroup(pc);
 	VM_BUG_ON_PAGE(PageCgroupUsed(pc), page);
@@ -2807,11 +2760,6 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
 		spin_unlock_irq(&zone->lru_lock);
 	}
 
-	if (ctype == MEM_CGROUP_CHARGE_TYPE_ANON)
-		anon = true;
-	else
-		anon = false;
-
 	mem_cgroup_charge_statistics(memcg, page, anon, nr_pages);
 	unlock_page_cgroup(pc);
 
@@ -2882,21 +2830,21 @@ static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size)
 	if (ret)
 		return ret;
 
-	ret = mem_cgroup_try_charge(memcg, gfp, size >> PAGE_SHIFT);
+	ret = try_charge(memcg, gfp, size >> PAGE_SHIFT);
 	if (ret == -EINTR)  {
 		/*
-		 * mem_cgroup_try_charge() chosed to bypass to root due to
-		 * OOM kill or fatal signal.  Since our only options are to
-		 * either fail the allocation or charge it to this cgroup, do
-		 * it as a temporary condition. But we can't fail. From a
-		 * kmem/slab perspective, the cache has already been selected,
-		 * by mem_cgroup_kmem_get_cache(), so it is too late to change
+		 * try_charge() chose to bypass to root due to OOM kill or
+		 * fatal signal.  Since our only options are to either fail
+		 * the allocation or charge it to this cgroup, do it as a
+		 * temporary condition. But we can't fail. From a kmem/slab
+		 * perspective, the cache has already been selected, by
+		 * mem_cgroup_kmem_get_cache(), so it is too late to change
 		 * our minds.
 		 *
 		 * This condition will only trigger if the task entered
-		 * memcg_charge_kmem in a sane state, but was OOM-killed during
-		 * mem_cgroup_try_charge() above. Tasks that were already
-		 * dying when the allocation triggers should have been already
+		 * memcg_charge_kmem in a sane state, but was OOM-killed
+		 * during try_charge() above. Tasks that were already dying
+		 * when the allocation triggers should have been already
 		 * directed to the root cgroup in memcontrol.h
 		 */
 		res_counter_charge_nofail(&memcg->res, size, &fail_res);
@@ -3618,164 +3566,6 @@ out:
 	return ret;
 }
 
-int mem_cgroup_charge_anon(struct page *page,
-			      struct mm_struct *mm, gfp_t gfp_mask)
-{
-	unsigned int nr_pages = 1;
-	struct mem_cgroup *memcg;
-
-	if (mem_cgroup_disabled())
-		return 0;
-
-	VM_BUG_ON_PAGE(page_mapped(page), page);
-	VM_BUG_ON_PAGE(page->mapping && !PageAnon(page), page);
-	VM_BUG_ON(!mm);
-
-	if (PageTransHuge(page)) {
-		nr_pages <<= compound_order(page);
-		VM_BUG_ON_PAGE(!PageTransHuge(page), page);
-	}
-
-	memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, nr_pages);
-	if (!memcg)
-		return -ENOMEM;
-	__mem_cgroup_commit_charge(memcg, page, nr_pages,
-				   MEM_CGROUP_CHARGE_TYPE_ANON, false);
-	return 0;
-}
-
-/*
- * While swap-in, try_charge -> commit or cancel, the page is locked.
- * And when try_charge() successfully returns, one refcnt to memcg without
- * struct page_cgroup is acquired. This refcnt will be consumed by
- * "commit()" or removed by "cancel()"
- */
-static int __mem_cgroup_try_charge_swapin(struct mm_struct *mm,
-					  struct page *page,
-					  gfp_t mask,
-					  struct mem_cgroup **memcgp)
-{
-	struct mem_cgroup *memcg = NULL;
-	struct page_cgroup *pc;
-	int ret;
-
-	pc = lookup_page_cgroup(page);
-	/*
-	 * Every swap fault against a single page tries to charge the
-	 * page, bail as early as possible.  shmem_unuse() encounters
-	 * already charged pages, too.  The USED bit is protected by
-	 * the page lock, which serializes swap cache removal, which
-	 * in turn serializes uncharging.
-	 */
-	if (PageCgroupUsed(pc))
-		goto out;
-	if (do_swap_account)
-		memcg = try_get_mem_cgroup_from_page(page);
-	if (!memcg)
-		memcg = get_mem_cgroup_from_mm(mm);
-	ret = mem_cgroup_try_charge(memcg, mask, 1);
-	css_put(&memcg->css);
-	if (ret == -EINTR)
-		memcg = root_mem_cgroup;
-	else if (ret)
-		return ret;
-out:
-	*memcgp = memcg;
-	return 0;
-}
-
-int mem_cgroup_try_charge_swapin(struct mm_struct *mm, struct page *page,
-				 gfp_t gfp_mask, struct mem_cgroup **memcgp)
-{
-	if (mem_cgroup_disabled()) {
-		*memcgp = NULL;
-		return 0;
-	}
-	/*
-	 * A racing thread's fault, or swapoff, may have already
-	 * updated the pte, and even removed page from swap cache: in
-	 * those cases unuse_pte()'s pte_same() test will fail; but
-	 * there's also a KSM case which does need to charge the page.
-	 */
-	if (!PageSwapCache(page)) {
-		struct mem_cgroup *memcg;
-
-		memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1);
-		if (!memcg)
-			return -ENOMEM;
-		*memcgp = memcg;
-		return 0;
-	}
-	return __mem_cgroup_try_charge_swapin(mm, page, gfp_mask, memcgp);
-}
-
-void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg)
-{
-	if (mem_cgroup_disabled())
-		return;
-	if (!memcg)
-		return;
-	__mem_cgroup_cancel_charge(memcg, 1);
-}
-
-static void
-__mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *memcg,
-					enum charge_type ctype)
-{
-	if (mem_cgroup_disabled())
-		return;
-	if (!memcg)
-		return;
-
-	__mem_cgroup_commit_charge(memcg, page, 1, ctype, true);
-	/*
-	 * Now swap is on-memory. This means this page may be
-	 * counted both as mem and swap....double count.
-	 * Fix it by uncharging from memsw. Basically, this SwapCache is stable
-	 * under lock_page(). But in do_swap_page()::memory.c, reuse_swap_page()
-	 * may call delete_from_swap_cache() before reach here.
-	 */
-	if (do_swap_account && PageSwapCache(page)) {
-		swp_entry_t ent = {.val = page_private(page)};
-		mem_cgroup_uncharge_swap(ent);
-	}
-}
-
-void mem_cgroup_commit_charge_swapin(struct page *page,
-				     struct mem_cgroup *memcg)
-{
-	__mem_cgroup_commit_charge_swapin(page, memcg,
-					  MEM_CGROUP_CHARGE_TYPE_ANON);
-}
-
-int mem_cgroup_charge_file(struct page *page, struct mm_struct *mm,
-				gfp_t gfp_mask)
-{
-	enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE;
-	struct mem_cgroup *memcg;
-	int ret;
-
-	if (mem_cgroup_disabled())
-		return 0;
-	if (PageCompound(page))
-		return 0;
-
-	if (PageSwapCache(page)) { /* shmem */
-		ret = __mem_cgroup_try_charge_swapin(mm, page,
-						     gfp_mask, &memcg);
-		if (ret)
-			return ret;
-		__mem_cgroup_commit_charge_swapin(page, memcg, type);
-		return 0;
-	}
-
-	memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1);
-	if (!memcg)
-		return -ENOMEM;
-	__mem_cgroup_commit_charge(memcg, page, 1, type, false);
-	return 0;
-}
-
 static void mem_cgroup_do_uncharge(struct mem_cgroup *memcg,
 				   unsigned int nr_pages,
 				   const enum charge_type ctype)
@@ -4122,7 +3912,6 @@ void mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
 	struct mem_cgroup *memcg = NULL;
 	unsigned int nr_pages = 1;
 	struct page_cgroup *pc;
-	enum charge_type ctype;
 
 	*memcgp = NULL;
 
@@ -4184,16 +3973,12 @@ void mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
 	 * page. In the case new page is migrated but not remapped, new page's
 	 * mapcount will be finally 0 and we call uncharge in end_migration().
 	 */
-	if (PageAnon(page))
-		ctype = MEM_CGROUP_CHARGE_TYPE_ANON;
-	else
-		ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
 	/*
 	 * The page is committed to the memcg, but it's not actually
 	 * charged to the res_counter since we plan on replacing the
 	 * old one and only one page is going to be left afterwards.
 	 */
-	__mem_cgroup_commit_charge(memcg, newpage, nr_pages, ctype, false);
+	commit_charge(newpage, memcg, nr_pages, PageAnon(page), false);
 }
 
 /* remove redundant charge if migration failed*/
@@ -4252,7 +4037,6 @@ void mem_cgroup_replace_page_cache(struct page *oldpage,
 {
 	struct mem_cgroup *memcg = NULL;
 	struct page_cgroup *pc;
-	enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE;
 
 	if (mem_cgroup_disabled())
 		return;
@@ -4278,7 +4062,7 @@ void mem_cgroup_replace_page_cache(struct page *oldpage,
 	 * the newpage may be on LRU(or pagevec for LRU) already. We lock
 	 * LRU while we overwrite pc->mem_cgroup.
 	 */
-	__mem_cgroup_commit_charge(memcg, newpage, 1, type, true);
+	commit_charge(newpage, memcg, 1, false, true);
 }
 
 #ifdef CONFIG_DEBUG_VM
@@ -6319,20 +6103,19 @@ static int mem_cgroup_do_precharge(unsigned long count)
 	int ret;
 
 	/* Try a single bulk charge without reclaim first */
-	ret = mem_cgroup_try_charge(mc.to, GFP_KERNEL & ~__GFP_WAIT, count);
+	ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_WAIT, count);
 	if (!ret) {
 		mc.precharge += count;
 		return ret;
 	}
 	if (ret == -EINTR) {
-		__mem_cgroup_cancel_charge(root_mem_cgroup, count);
+		cancel_charge(root_mem_cgroup, count);
 		return ret;
 	}
 
 	/* Try charges one by one with reclaim */
 	while (count--) {
-		ret = mem_cgroup_try_charge(mc.to,
-					    GFP_KERNEL & ~__GFP_NORETRY, 1);
+		ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_NORETRY, 1);
 		/*
 		 * In case of failure, any residual charges against
 		 * mc.to will be dropped by mem_cgroup_clear_mc()
@@ -6340,7 +6123,7 @@ static int mem_cgroup_do_precharge(unsigned long count)
 		 * bypassed to root right away or they'll be lost.
 		 */
 		if (ret == -EINTR)
-			__mem_cgroup_cancel_charge(root_mem_cgroup, 1);
+			cancel_charge(root_mem_cgroup, 1);
 		if (ret)
 			return ret;
 		mc.precharge++;
@@ -6609,7 +6392,7 @@ static void __mem_cgroup_clear_mc(void)
 
 	/* we must uncharge all the leftover precharges from mc.to */
 	if (mc.precharge) {
-		__mem_cgroup_cancel_charge(mc.to, mc.precharge);
+		cancel_charge(mc.to, mc.precharge);
 		mc.precharge = 0;
 	}
 	/*
@@ -6617,7 +6400,7 @@ static void __mem_cgroup_clear_mc(void)
 	 * we must uncharge here.
 	 */
 	if (mc.moved_charge) {
-		__mem_cgroup_cancel_charge(mc.from, mc.moved_charge);
+		cancel_charge(mc.from, mc.moved_charge);
 		mc.moved_charge = 0;
 	}
 	/* we must fixup refcnts and charges */
@@ -6946,6 +6729,150 @@ static void __init enable_swap_cgroup(void)
 }
 #endif
 
+/**
+ * mem_cgroup_try_charge - try charging a page
+ * @page: page to charge
+ * @mm: mm context of the victim
+ * @gfp_mask: reclaim mode
+ * @memcgp: charged memcg return
+ *
+ * Try to charge @page to the memcg that @mm belongs to, reclaiming
+ * pages according to @gfp_mask if necessary.
+ *
+ * Returns 0 on success, with *@memcgp pointing to the charged memcg.
+ * Otherwise, an error code is returned.
+ *
+ * After page->mapping has been set up, the caller must finalize the
+ * charge with mem_cgroup_commit_charge().  Or abort the transaction
+ * with mem_cgroup_cancel_charge() in case page instantiation fails.
+ */
+int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
+			  gfp_t gfp_mask, struct mem_cgroup **memcgp)
+{
+	struct mem_cgroup *memcg = NULL;
+	unsigned int nr_pages = 1;
+	int ret = 0;
+
+	if (mem_cgroup_disabled())
+		goto out;
+
+	if (PageSwapCache(page)) {
+		struct page_cgroup *pc = lookup_page_cgroup(page);
+		/*
+		 * Every swap fault against a single page tries to charge the
+		 * page, bail as early as possible.  shmem_unuse() encounters
+		 * already charged pages, too.  The USED bit is protected by
+		 * the page lock, which serializes swap cache removal, which
+		 * in turn serializes uncharging.
+		 */
+		if (PageCgroupUsed(pc))
+			goto out;
+	}
+
+	if (PageTransHuge(page)) {
+		nr_pages <<= compound_order(page);
+		VM_BUG_ON_PAGE(!PageTransHuge(page), page);
+	}
+
+	if (do_swap_account && PageSwapCache(page))
+		memcg = try_get_mem_cgroup_from_page(page);
+	if (!memcg)
+		memcg = get_mem_cgroup_from_mm(mm);
+
+	ret = try_charge(memcg, gfp_mask, nr_pages);
+
+	css_put(&memcg->css);
+
+	if (ret == -EINTR) {
+		memcg = root_mem_cgroup;
+		ret = 0;
+	}
+out:
+	*memcgp = memcg;
+	return ret;
+}
+
+/**
+ * mem_cgroup_commit_charge - commit a page charge
+ * @page: page to charge
+ * @memcg: memcg to charge the page to
+ * @lrucare: page might be on LRU already
+ *
+ * Finalize a charge transaction started by mem_cgroup_try_charge(),
+ * after page->mapping has been set up.  This must happen atomically
+ * as part of the page instantiation, i.e. under the page table lock
+ * for anonymous pages, under the page lock for page and swap cache.
+ *
+ * In addition, the page must not be on the LRU during the commit, to
+ * prevent racing with task migration.  If it might be, use @lrucare.
+ *
+ * Use mem_cgroup_cancel_charge() to cancel the transaction instead.
+ */
+void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg,
+			      bool lrucare)
+{
+	unsigned int nr_pages = 1;
+
+	VM_BUG_ON_PAGE(!page->mapping, page);
+	VM_BUG_ON_PAGE(PageLRU(page) && !lrucare, page);
+
+	if (mem_cgroup_disabled())
+		return;
+	/*
+	 * Swap faults will attempt to charge the same page multiple
+	 * times.  But reuse_swap_page() might have removed the page
+	 * from swapcache already, so we can't check PageSwapCache().
+	 */
+	if (!memcg)
+		return;
+
+	if (PageTransHuge(page)) {
+		nr_pages <<= compound_order(page);
+		VM_BUG_ON_PAGE(!PageTransHuge(page), page);
+	}
+
+	commit_charge(page, memcg, nr_pages, PageAnon(page), lrucare);
+
+	if (do_swap_account && PageSwapCache(page)) {
+		swp_entry_t entry = { .val = page_private(page) };
+		/*
+		 * The swap entry might not get freed for a long time,
+		 * let's not wait for it.  The page already received a
+		 * memory+swap charge, drop the swap entry duplicate.
+		 */
+		mem_cgroup_uncharge_swap(entry);
+	}
+}
+
+/**
+ * mem_cgroup_cancel_charge - cancel a page charge
+ * @page: page to charge
+ * @memcg: memcg to charge the page to
+ *
+ * Cancel a charge transaction started by mem_cgroup_try_charge().
+ */
+void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg)
+{
+	unsigned int nr_pages = 1;
+
+	if (mem_cgroup_disabled())
+		return;
+	/*
+	 * Swap faults will attempt to charge the same page multiple
+	 * times.  But reuse_swap_page() might have removed the page
+	 * from swapcache already, so we can't check PageSwapCache().
+	 */
+	if (!memcg)
+		return;
+
+	if (PageTransHuge(page)) {
+		nr_pages <<= compound_order(page);
+		VM_BUG_ON_PAGE(!PageTransHuge(page), page);
+	}
+
+	cancel_charge(memcg, nr_pages);
+}
+
 /*
  * subsys_initcall() for memory controller.
  *
diff --git a/mm/memory.c b/mm/memory.c
index 5c55270729f7..6d7648773dc4 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2049,6 +2049,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	struct page *dirty_page = NULL;
 	unsigned long mmun_start = 0;	/* For mmu_notifiers */
 	unsigned long mmun_end = 0;	/* For mmu_notifiers */
+	struct mem_cgroup *memcg;
 
 	old_page = vm_normal_page(vma, address, orig_pte);
 	if (!old_page) {
@@ -2204,7 +2205,7 @@ gotten:
 	}
 	__SetPageUptodate(new_page);
 
-	if (mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL))
+	if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg))
 		goto oom_free_new;
 
 	mmun_start  = address & PAGE_MASK;
@@ -2234,6 +2235,8 @@ gotten:
 		 */
 		ptep_clear_flush(vma, address, page_table);
 		page_add_new_anon_rmap(new_page, vma, address);
+		mem_cgroup_commit_charge(new_page, memcg, false);
+		lru_cache_add_active_or_unevictable(new_page, vma);
 		/*
 		 * We call the notify macro here because, when using secondary
 		 * mmu page tables (such as kvm shadow page tables), we want the
@@ -2271,7 +2274,7 @@ gotten:
 		new_page = old_page;
 		ret |= VM_FAULT_WRITE;
 	} else
-		mem_cgroup_uncharge_page(new_page);
+		mem_cgroup_cancel_charge(new_page, memcg);
 
 	if (new_page)
 		page_cache_release(new_page);
@@ -2410,10 +2413,10 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
 {
 	spinlock_t *ptl;
 	struct page *page, *swapcache;
+	struct mem_cgroup *memcg;
 	swp_entry_t entry;
 	pte_t pte;
 	int locked;
-	struct mem_cgroup *ptr;
 	int exclusive = 0;
 	int ret = 0;
 
@@ -2489,7 +2492,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		goto out_page;
 	}
 
-	if (mem_cgroup_try_charge_swapin(mm, page, GFP_KERNEL, &ptr)) {
+	if (mem_cgroup_try_charge(page, mm, GFP_KERNEL, &memcg)) {
 		ret = VM_FAULT_OOM;
 		goto out_page;
 	}
@@ -2514,10 +2517,6 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	 * while the page is counted on swap but not yet in mapcount i.e.
 	 * before page_add_anon_rmap() and swap_free(); try_to_free_swap()
 	 * must be called after the swap_free(), or it will never succeed.
-	 * Because delete_from_swap_page() may be called by reuse_swap_page(),
-	 * mem_cgroup_commit_charge_swapin() may not be able to find swp_entry
-	 * in page->private. In this case, a record in swap_cgroup  is silently
-	 * discarded at swap_free().
 	 */
 
 	inc_mm_counter_fast(mm, MM_ANONPAGES);
@@ -2533,12 +2532,14 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	if (pte_swp_soft_dirty(orig_pte))
 		pte = pte_mksoft_dirty(pte);
 	set_pte_at(mm, address, page_table, pte);
-	if (page == swapcache)
+	if (page == swapcache) {
 		do_page_add_anon_rmap(page, vma, address, exclusive);
-	else /* ksm created a completely new copy */
+		mem_cgroup_commit_charge(page, memcg, true);
+	} else { /* ksm created a completely new copy */
 		page_add_new_anon_rmap(page, vma, address);
-	/* It's better to call commit-charge after rmap is established */
-	mem_cgroup_commit_charge_swapin(page, ptr);
+		mem_cgroup_commit_charge(page, memcg, false);
+		lru_cache_add_active_or_unevictable(page, vma);
+	}
 
 	swap_free(entry);
 	if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page))
@@ -2571,7 +2572,7 @@ unlock:
 out:
 	return ret;
 out_nomap:
-	mem_cgroup_cancel_charge_swapin(ptr);
+	mem_cgroup_cancel_charge(page, memcg);
 	pte_unmap_unlock(page_table, ptl);
 out_page:
 	unlock_page(page);
@@ -2627,6 +2628,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		unsigned long address, pte_t *page_table, pmd_t *pmd,
 		unsigned int flags)
 {
+	struct mem_cgroup *memcg;
 	struct page *page;
 	spinlock_t *ptl;
 	pte_t entry;
@@ -2660,7 +2662,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	 */
 	__SetPageUptodate(page);
 
-	if (mem_cgroup_charge_anon(page, mm, GFP_KERNEL))
+	if (mem_cgroup_try_charge(page, mm, GFP_KERNEL, &memcg))
 		goto oom_free_page;
 
 	entry = mk_pte(page, vma->vm_page_prot);
@@ -2673,6 +2675,8 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 
 	inc_mm_counter_fast(mm, MM_ANONPAGES);
 	page_add_new_anon_rmap(page, vma, address);
+	mem_cgroup_commit_charge(page, memcg, false);
+	lru_cache_add_active_or_unevictable(page, vma);
 setpte:
 	set_pte_at(mm, address, page_table, entry);
 
@@ -2682,7 +2686,7 @@ unlock:
 	pte_unmap_unlock(page_table, ptl);
 	return 0;
 release:
-	mem_cgroup_uncharge_page(page);
+	mem_cgroup_cancel_charge(page, memcg);
 	page_cache_release(page);
 	goto unlock;
 oom_free_page:
@@ -2919,6 +2923,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 		pgoff_t pgoff, unsigned int flags, pte_t orig_pte)
 {
 	struct page *fault_page, *new_page;
+	struct mem_cgroup *memcg;
 	spinlock_t *ptl;
 	pte_t *pte;
 	int ret;
@@ -2930,7 +2935,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	if (!new_page)
 		return VM_FAULT_OOM;
 
-	if (mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL)) {
+	if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg)) {
 		page_cache_release(new_page);
 		return VM_FAULT_OOM;
 	}
@@ -2950,12 +2955,14 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 		goto uncharge_out;
 	}
 	do_set_pte(vma, address, new_page, pte, true, true);
+	mem_cgroup_commit_charge(new_page, memcg, false);
+	lru_cache_add_active_or_unevictable(new_page, vma);
 	pte_unmap_unlock(pte, ptl);
 	unlock_page(fault_page);
 	page_cache_release(fault_page);
 	return ret;
 uncharge_out:
-	mem_cgroup_uncharge_page(new_page);
+	mem_cgroup_cancel_charge(new_page, memcg);
 	page_cache_release(new_page);
 	return ret;
 }
diff --git a/mm/rmap.c b/mm/rmap.c
index 22a4a7699cdb..f56b5ed78128 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1032,25 +1032,6 @@ void page_add_new_anon_rmap(struct page *page,
 	__mod_zone_page_state(page_zone(page), NR_ANON_PAGES,
 			hpage_nr_pages(page));
 	__page_set_anon_rmap(page, vma, address, 1);
-
-	VM_BUG_ON_PAGE(PageLRU(page), page);
-	if (likely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) != VM_LOCKED)) {
-		SetPageActive(page);
-		lru_cache_add(page);
-		return;
-	}
-
-	if (!TestSetPageMlocked(page)) {
-		/*
-		 * We use the irq-unsafe __mod_zone_page_stat because this
-		 * counter is not modified from interrupt context, and the pte
-		 * lock is held(spinlock), which implies preemption disabled.
-		 */
-		__mod_zone_page_state(page_zone(page), NR_MLOCK,
-				    hpage_nr_pages(page));
-		count_vm_event(UNEVICTABLE_PGMLOCKED);
-	}
-	add_page_to_unevictable_list(page);
 }
 
 /**
diff --git a/mm/shmem.c b/mm/shmem.c
index 302d1cf7ad07..1f1a8085538b 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -621,7 +621,7 @@ static int shmem_unuse_inode(struct shmem_inode_info *info,
 	radswap = swp_to_radix_entry(swap);
 	index = radix_tree_locate_item(&mapping->page_tree, radswap);
 	if (index == -1)
-		return 0;
+		return -EAGAIN;	/* tell shmem_unuse we found nothing */
 
 	/*
 	 * Move _head_ to start search for next from here.
@@ -680,7 +680,6 @@ static int shmem_unuse_inode(struct shmem_inode_info *info,
 			spin_unlock(&info->lock);
 			swap_free(swap);
 		}
-		error = 1;	/* not an error, but entry was found */
 	}
 	return error;
 }
@@ -692,7 +691,7 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
 {
 	struct list_head *this, *next;
 	struct shmem_inode_info *info;
-	int found = 0;
+	struct mem_cgroup *memcg;
 	int error = 0;
 
 	/*
@@ -707,26 +706,32 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
 	 * the shmem_swaplist_mutex which might hold up shmem_writepage().
 	 * Charged back to the user (not to caller) when swap account is used.
 	 */
-	error = mem_cgroup_charge_file(page, current->mm, GFP_KERNEL);
+	error = mem_cgroup_try_charge(page, current->mm, GFP_KERNEL, &memcg);
 	if (error)
 		goto out;
 	/* No radix_tree_preload: swap entry keeps a place for page in tree */
+	error = -EAGAIN;
 
 	mutex_lock(&shmem_swaplist_mutex);
 	list_for_each_safe(this, next, &shmem_swaplist) {
 		info = list_entry(this, struct shmem_inode_info, swaplist);
 		if (info->swapped)
-			found = shmem_unuse_inode(info, swap, &page);
+			error = shmem_unuse_inode(info, swap, &page);
 		else
 			list_del_init(&info->swaplist);
 		cond_resched();
-		if (found)
+		if (error != -EAGAIN)
 			break;
+		/* found nothing in this: move on to search the next */
 	}
 	mutex_unlock(&shmem_swaplist_mutex);
 
-	if (found < 0)
-		error = found;
+	if (error) {
+		if (error != -ENOMEM)
+			error = 0;
+		mem_cgroup_cancel_charge(page, memcg);
+	} else
+		mem_cgroup_commit_charge(page, memcg, true);
 out:
 	unlock_page(page);
 	page_cache_release(page);
@@ -1030,6 +1035,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
 	struct address_space *mapping = inode->i_mapping;
 	struct shmem_inode_info *info;
 	struct shmem_sb_info *sbinfo;
+	struct mem_cgroup *memcg;
 	struct page *page;
 	swp_entry_t swap;
 	int error;
@@ -1108,8 +1114,7 @@ repeat:
 				goto failed;
 		}
 
-		error = mem_cgroup_charge_file(page, current->mm,
-						gfp & GFP_RECLAIM_MASK);
+		error = mem_cgroup_try_charge(page, current->mm, gfp, &memcg);
 		if (!error) {
 			error = shmem_add_to_page_cache(page, mapping, index,
 						swp_to_radix_entry(swap));
@@ -1125,12 +1130,16 @@ repeat:
 			 * Reset swap.val? No, leave it so "failed" goes back to
 			 * "repeat": reading a hole and writing should succeed.
 			 */
-			if (error)
+			if (error) {
+				mem_cgroup_cancel_charge(page, memcg);
 				delete_from_swap_cache(page);
+			}
 		}
 		if (error)
 			goto failed;
 
+		mem_cgroup_commit_charge(page, memcg, true);
+
 		spin_lock(&info->lock);
 		info->swapped--;
 		shmem_recalc_inode(inode);
@@ -1168,8 +1177,7 @@ repeat:
 		if (sgp == SGP_WRITE)
 			__SetPageReferenced(page);
 
-		error = mem_cgroup_charge_file(page, current->mm,
-						gfp & GFP_RECLAIM_MASK);
+		error = mem_cgroup_try_charge(page, current->mm, gfp, &memcg);
 		if (error)
 			goto decused;
 		error = radix_tree_maybe_preload(gfp & GFP_RECLAIM_MASK);
@@ -1179,9 +1187,10 @@ repeat:
 			radix_tree_preload_end();
 		}
 		if (error) {
-			mem_cgroup_uncharge_cache_page(page);
+			mem_cgroup_cancel_charge(page, memcg);
 			goto decused;
 		}
+		mem_cgroup_commit_charge(page, memcg, false);
 		lru_cache_add_anon(page);
 
 		spin_lock(&info->lock);
diff --git a/mm/swap.c b/mm/swap.c
index c789d01c9ec3..3baca701bb78 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -687,6 +687,40 @@ void add_page_to_unevictable_list(struct page *page)
 	spin_unlock_irq(&zone->lru_lock);
 }
 
+/**
+ * lru_cache_add_active_or_unevictable
+ * @page:  the page to be added to LRU
+ * @vma:   vma in which page is mapped for determining reclaimability
+ *
+ * Place @page on the active or unevictable LRU list, depending on its
+ * evictability.  Note that if the page is not evictable, it goes
+ * directly back onto it's zone's unevictable list, it does NOT use a
+ * per cpu pagevec.
+ */
+void lru_cache_add_active_or_unevictable(struct page *page,
+					 struct vm_area_struct *vma)
+{
+	VM_BUG_ON_PAGE(PageLRU(page), page);
+
+	if (likely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) != VM_LOCKED)) {
+		SetPageActive(page);
+		lru_cache_add(page);
+		return;
+	}
+
+	if (!TestSetPageMlocked(page)) {
+		/*
+		 * We use the irq-unsafe __mod_zone_page_stat because this
+		 * counter is not modified from interrupt context, and the pte
+		 * lock is held(spinlock), which implies preemption disabled.
+		 */
+		__mod_zone_page_state(page_zone(page), NR_MLOCK,
+				    hpage_nr_pages(page));
+		count_vm_event(UNEVICTABLE_PGMLOCKED);
+	}
+	add_page_to_unevictable_list(page);
+}
+
 /*
  * If the page can not be invalidated, it is moved to the
  * inactive list to speed up its reclaim.  It is moved to the
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 4c524f7bd0bf..0883b4912ff7 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1106,15 +1106,14 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
 	if (unlikely(!page))
 		return -ENOMEM;
 
-	if (mem_cgroup_try_charge_swapin(vma->vm_mm, page,
-					 GFP_KERNEL, &memcg)) {
+	if (mem_cgroup_try_charge(page, vma->vm_mm, GFP_KERNEL, &memcg)) {
 		ret = -ENOMEM;
 		goto out_nolock;
 	}
 
 	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
 	if (unlikely(!maybe_same_pte(*pte, swp_entry_to_pte(entry)))) {
-		mem_cgroup_cancel_charge_swapin(memcg);
+		mem_cgroup_cancel_charge(page, memcg);
 		ret = 0;
 		goto out;
 	}
@@ -1124,11 +1123,14 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
 	get_page(page);
 	set_pte_at(vma->vm_mm, addr, pte,
 		   pte_mkold(mk_pte(page, vma->vm_page_prot)));
-	if (page == swapcache)
+	if (page == swapcache) {
 		page_add_anon_rmap(page, vma, addr);
-	else /* ksm created a completely new copy */
+		mem_cgroup_commit_charge(page, memcg, true);
+	} else { /* ksm created a completely new copy */
 		page_add_new_anon_rmap(page, vma, addr);
-	mem_cgroup_commit_charge_swapin(page, memcg);
+		mem_cgroup_commit_charge(page, memcg, false);
+		lru_cache_add_active_or_unevictable(page, vma);
+	}
 	swap_free(entry);
 	/*
 	 * Move the page to the active list so it is not
-- 
cgit v1.2.3


From 0a31bc97c80c3fa87b32c091d9a930ac19cd0c40 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Fri, 8 Aug 2014 14:19:22 -0700
Subject: mm: memcontrol: rewrite uncharge API

The memcg uncharging code that is involved towards the end of a page's
lifetime - truncation, reclaim, swapout, migration - is impressively
complicated and fragile.

Because anonymous and file pages were always charged before they had their
page->mapping established, uncharges had to happen when the page type
could still be known from the context; as in unmap for anonymous, page
cache removal for file and shmem pages, and swap cache truncation for swap
pages.  However, these operations happen well before the page is actually
freed, and so a lot of synchronization is necessary:

- Charging, uncharging, page migration, and charge migration all need
  to take a per-page bit spinlock as they could race with uncharging.

- Swap cache truncation happens during both swap-in and swap-out, and
  possibly repeatedly before the page is actually freed.  This means
  that the memcg swapout code is called from many contexts that make
  no sense and it has to figure out the direction from page state to
  make sure memory and memory+swap are always correctly charged.

- On page migration, the old page might be unmapped but then reused,
  so memcg code has to prevent untimely uncharging in that case.
  Because this code - which should be a simple charge transfer - is so
  special-cased, it is not reusable for replace_page_cache().

But now that charged pages always have a page->mapping, introduce
mem_cgroup_uncharge(), which is called after the final put_page(), when we
know for sure that nobody is looking at the page anymore.

For page migration, introduce mem_cgroup_migrate(), which is called after
the migration is successful and the new page is fully rmapped.  Because
the old page is no longer uncharged after migration, prevent double
charges by decoupling the page's memcg association (PCG_USED and
pc->mem_cgroup) from the page holding an actual charge.  The new bits
PCG_MEM and PCG_MEMSW represent the respective charges and are transferred
to the new page during migration.

mem_cgroup_migrate() is suitable for replace_page_cache() as well,
which gets rid of mem_cgroup_replace_page_cache().  However, care
needs to be taken because both the source and the target page can
already be charged and on the LRU when fuse is splicing: grab the page
lock on the charge moving side to prevent changing pc->mem_cgroup of a
page under migration.  Also, the lruvecs of both pages change as we
uncharge the old and charge the new during migration, and putback may
race with us, so grab the lru lock and isolate the pages iff on LRU to
prevent races and ensure the pages are on the right lruvec afterward.

Swap accounting is massively simplified: because the page is no longer
uncharged as early as swap cache deletion, a new mem_cgroup_swapout() can
transfer the page's memory+swap charge (PCG_MEMSW) to the swap entry
before the final put_page() in page reclaim.

Finally, page_cgroup changes are now protected by whatever protection the
page itself offers: anonymous pages are charged under the page table lock,
whereas page cache insertions, swapin, and migration hold the page lock.
Uncharging happens under full exclusion with no outstanding references.
Charging and uncharging also ensure that the page is off-LRU, which
serializes against charge migration.  Remove the very costly page_cgroup
lock and set pc->flags non-atomically.

[mhocko@suse.cz: mem_cgroup_charge_statistics needs preempt_disable]
[vdavydov@parallels.com: fix flags definition]
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Vladimir Davydov <vdavydov@parallels.com>
Tested-by: Jet Chen <jet.chen@intel.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Tested-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/cgroups/memcg_test.txt | 128 +-----
 include/linux/memcontrol.h           |  49 +--
 include/linux/page_cgroup.h          |  43 +-
 include/linux/swap.h                 |  12 +-
 mm/filemap.c                         |   4 +-
 mm/memcontrol.c                      | 828 ++++++++++++++---------------------
 mm/memory.c                          |   2 -
 mm/migrate.c                         |  38 +-
 mm/rmap.c                            |   1 -
 mm/shmem.c                           |   8 +-
 mm/swap.c                            |   6 +
 mm/swap_state.c                      |   8 +-
 mm/swapfile.c                        |   7 +-
 mm/truncate.c                        |   9 -
 mm/vmscan.c                          |  12 +-
 mm/zswap.c                           |   2 +-
 16 files changed, 389 insertions(+), 768 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/cgroups/memcg_test.txt b/Documentation/cgroups/memcg_test.txt
index bcf750d3cecd..8870b0212150 100644
--- a/Documentation/cgroups/memcg_test.txt
+++ b/Documentation/cgroups/memcg_test.txt
@@ -29,28 +29,13 @@ Please note that implementation details can be changed.
 2. Uncharge
   a page/swp_entry may be uncharged (usage -= PAGE_SIZE) by
 
-	mem_cgroup_uncharge_page()
-	  Called when an anonymous page is fully unmapped. I.e., mapcount goes
-	  to 0. If the page is SwapCache, uncharge is delayed until
-	  mem_cgroup_uncharge_swapcache().
-
-	mem_cgroup_uncharge_cache_page()
-	  Called when a page-cache is deleted from radix-tree. If the page is
-	  SwapCache, uncharge is delayed until mem_cgroup_uncharge_swapcache().
-
-	mem_cgroup_uncharge_swapcache()
-	  Called when SwapCache is removed from radix-tree. The charge itself
-	  is moved to swap_cgroup. (If mem+swap controller is disabled, no
-	  charge to swap occurs.)
+	mem_cgroup_uncharge()
+	  Called when a page's refcount goes down to 0.
 
 	mem_cgroup_uncharge_swap()
 	  Called when swp_entry's refcnt goes down to 0. A charge against swap
 	  disappears.
 
-	mem_cgroup_end_migration(old, new)
-	At success of migration old is uncharged (if necessary), a charge
-	to new page is committed. At failure, charge to old page is committed.
-
 3. charge-commit-cancel
 	Memcg pages are charged in two steps:
 		mem_cgroup_try_charge()
@@ -69,18 +54,6 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
 	Anonymous page is newly allocated at
 		  - page fault into MAP_ANONYMOUS mapping.
 		  - Copy-On-Write.
- 	It is charged right after it's allocated before doing any page table
-	related operations. Of course, it's uncharged when another page is used
-	for the fault address.
-
-	At freeing anonymous page (by exit() or munmap()), zap_pte() is called
-	and pages for ptes are freed one by one.(see mm/memory.c). Uncharges
-	are done at page_remove_rmap() when page_mapcount() goes down to 0.
-
-	Another page freeing is by page-reclaim (vmscan.c) and anonymous
-	pages are swapped out. In this case, the page is marked as
-	PageSwapCache(). uncharge() routine doesn't uncharge the page marked
-	as SwapCache(). It's delayed until __delete_from_swap_cache().
 
 	4.1 Swap-in.
 	At swap-in, the page is taken from swap-cache. There are 2 cases.
@@ -89,41 +62,6 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
 	(b) If the SwapCache has been mapped by processes, it has been
 	    charged already.
 
-	This swap-in is one of the most complicated work. In do_swap_page(),
-	following events occur when pte is unchanged.
-
-	(1) the page (SwapCache) is looked up.
-	(2) lock_page()
-	(3) try_charge_swapin()
-	(4) reuse_swap_page() (may call delete_swap_cache())
-	(5) commit_charge_swapin()
-	(6) swap_free().
-
-	Considering following situation for example.
-
-	(A) The page has not been charged before (2) and reuse_swap_page()
-	    doesn't call delete_from_swap_cache().
-	(B) The page has not been charged before (2) and reuse_swap_page()
-	    calls delete_from_swap_cache().
-	(C) The page has been charged before (2) and reuse_swap_page() doesn't
-	    call delete_from_swap_cache().
-	(D) The page has been charged before (2) and reuse_swap_page() calls
-	    delete_from_swap_cache().
-
-	    memory.usage/memsw.usage changes to this page/swp_entry will be
-	 Case          (A)      (B)       (C)     (D)
-         Event
-       Before (2)     0/ 1     0/ 1      1/ 1    1/ 1
-          ===========================================
-          (3)        +1/+1    +1/+1     +1/+1   +1/+1
-          (4)          -       0/ 0       -     -1/ 0
-          (5)         0/-1     0/ 0     -1/-1    0/ 0
-          (6)          -       0/-1       -      0/-1
-          ===========================================
-       Result         1/ 1     1/ 1      1/ 1    1/ 1
-
-       In any cases, charges to this page should be 1/ 1.
-
 	4.2 Swap-out.
 	At swap-out, typical state transition is below.
 
@@ -136,28 +74,20 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
 	    swp_entry's refcnt -= 1.
 
 
-	At (b), the page is marked as SwapCache and not uncharged.
-	At (d), the page is removed from SwapCache and a charge in page_cgroup
-	is moved to swap_cgroup.
-
 	Finally, at task exit,
 	(e) zap_pte() is called and swp_entry's refcnt -=1 -> 0.
-	Here, a charge in swap_cgroup disappears.
 
 5. Page Cache
    	Page Cache is charged at
 	- add_to_page_cache_locked().
 
-	uncharged at
-	- __remove_from_page_cache().
-
 	The logic is very clear. (About migration, see below)
 	Note: __remove_from_page_cache() is called by remove_from_page_cache()
 	and __remove_mapping().
 
 6. Shmem(tmpfs) Page Cache
-	Memcg's charge/uncharge have special handlers of shmem. The best way
-	to understand shmem's page state transition is to read mm/shmem.c.
+	The best way to understand shmem's page state transition is to read
+	mm/shmem.c.
 	But brief explanation of the behavior of memcg around shmem will be
 	helpful to understand the logic.
 
@@ -170,56 +100,10 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
 	It's charged when...
 	- A new page is added to shmem's radix-tree.
 	- A swp page is read. (move a charge from swap_cgroup to page_cgroup)
-	It's uncharged when
-	- A page is removed from radix-tree and not SwapCache.
-	- When SwapCache is removed, a charge is moved to swap_cgroup.
-	- When swp_entry's refcnt goes down to 0, a charge in swap_cgroup
-	  disappears.
 
 7. Page Migration
-   	One of the most complicated functions is page-migration-handler.
-	Memcg has 2 routines. Assume that we are migrating a page's contents
-	from OLDPAGE to NEWPAGE.
-
-	Usual migration logic is..
-	(a) remove the page from LRU.
-	(b) allocate NEWPAGE (migration target)
-	(c) lock by lock_page().
-	(d) unmap all mappings.
-	(e-1) If necessary, replace entry in radix-tree.
-	(e-2) move contents of a page.
-	(f) map all mappings again.
-	(g) pushback the page to LRU.
-	(-) OLDPAGE will be freed.
-
-	Before (g), memcg should complete all necessary charge/uncharge to
-	NEWPAGE/OLDPAGE.
-
-	The point is....
-	- If OLDPAGE is anonymous, all charges will be dropped at (d) because
-          try_to_unmap() drops all mapcount and the page will not be
-	  SwapCache.
-
-	- If OLDPAGE is SwapCache, charges will be kept at (g) because
-	  __delete_from_swap_cache() isn't called at (e-1)
-
-	- If OLDPAGE is page-cache, charges will be kept at (g) because
-	  remove_from_swap_cache() isn't called at (e-1)
-
-	memcg provides following hooks.
-
-	- mem_cgroup_prepare_migration(OLDPAGE)
-	  Called after (b) to account a charge (usage += PAGE_SIZE) against
-	  memcg which OLDPAGE belongs to.
-
-        - mem_cgroup_end_migration(OLDPAGE, NEWPAGE)
-	  Called after (f) before (g).
-	  If OLDPAGE is used, commit OLDPAGE again. If OLDPAGE is already
-	  charged, a charge by prepare_migration() is automatically canceled.
-	  If NEWPAGE is used, commit NEWPAGE and uncharge OLDPAGE.
-
-	  But zap_pte() (by exit or munmap) can be called while migration,
-	  we have to check if OLDPAGE/NEWPAGE is a valid page after commit().
+
+	mem_cgroup_migrate()
 
 8. LRU
         Each memcg has its own private LRU. Now, its handling is under global
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 1a9a096858e0..806b8fa15c5f 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -60,15 +60,17 @@ void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg,
 			      bool lrucare);
 void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg);
 
-struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *);
-struct lruvec *mem_cgroup_page_lruvec(struct page *, struct zone *);
+void mem_cgroup_uncharge(struct page *page);
+
+/* Batched uncharging */
+void mem_cgroup_uncharge_start(void);
+void mem_cgroup_uncharge_end(void);
 
-/* For coalescing uncharge for reducing memcg' overhead*/
-extern void mem_cgroup_uncharge_start(void);
-extern void mem_cgroup_uncharge_end(void);
+void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
+			bool lrucare);
 
-extern void mem_cgroup_uncharge_page(struct page *page);
-extern void mem_cgroup_uncharge_cache_page(struct page *page);
+struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *);
+struct lruvec *mem_cgroup_page_lruvec(struct page *, struct zone *);
 
 bool __mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg,
 				  struct mem_cgroup *memcg);
@@ -96,12 +98,6 @@ bool mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *memcg)
 
 extern struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *memcg);
 
-extern void
-mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
-			     struct mem_cgroup **memcgp);
-extern void mem_cgroup_end_migration(struct mem_cgroup *memcg,
-	struct page *oldpage, struct page *newpage, bool migration_ok);
-
 struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *,
 				   struct mem_cgroup *,
 				   struct mem_cgroup_reclaim_cookie *);
@@ -116,8 +112,6 @@ unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list);
 void mem_cgroup_update_lru_size(struct lruvec *, enum lru_list, int);
 extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
 					struct task_struct *p);
-extern void mem_cgroup_replace_page_cache(struct page *oldpage,
-					struct page *newpage);
 
 static inline void mem_cgroup_oom_enable(void)
 {
@@ -235,19 +229,21 @@ static inline void mem_cgroup_cancel_charge(struct page *page,
 {
 }
 
-static inline void mem_cgroup_uncharge_start(void)
+static inline void mem_cgroup_uncharge(struct page *page)
 {
 }
 
-static inline void mem_cgroup_uncharge_end(void)
+static inline void mem_cgroup_uncharge_start(void)
 {
 }
 
-static inline void mem_cgroup_uncharge_page(struct page *page)
+static inline void mem_cgroup_uncharge_end(void)
 {
 }
 
-static inline void mem_cgroup_uncharge_cache_page(struct page *page)
+static inline void mem_cgroup_migrate(struct page *oldpage,
+				      struct page *newpage,
+				      bool lrucare)
 {
 }
 
@@ -286,17 +282,6 @@ static inline struct cgroup_subsys_state
 	return NULL;
 }
 
-static inline void
-mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
-			     struct mem_cgroup **memcgp)
-{
-}
-
-static inline void mem_cgroup_end_migration(struct mem_cgroup *memcg,
-		struct page *oldpage, struct page *newpage, bool migration_ok)
-{
-}
-
 static inline struct mem_cgroup *
 mem_cgroup_iter(struct mem_cgroup *root,
 		struct mem_cgroup *prev,
@@ -392,10 +377,6 @@ static inline
 void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx)
 {
 }
-static inline void mem_cgroup_replace_page_cache(struct page *oldpage,
-				struct page *newpage)
-{
-}
 #endif /* CONFIG_MEMCG */
 
 #if !defined(CONFIG_MEMCG) || !defined(CONFIG_DEBUG_VM)
diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index 777a524716db..9bfb8e68a595 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -3,9 +3,9 @@
 
 enum {
 	/* flags for mem_cgroup */
-	PCG_LOCK,  /* Lock for pc->mem_cgroup and following bits. */
-	PCG_USED, /* this object is in use. */
-	PCG_MIGRATION, /* under page migration */
+	PCG_USED = 0x01,	/* This page is charged to a memcg */
+	PCG_MEM = 0x02,		/* This page holds a memory charge */
+	PCG_MEMSW = 0x04,	/* This page holds a memory+swap charge */
 	__NR_PCG_FLAGS,
 };
 
@@ -44,42 +44,9 @@ static inline void __init page_cgroup_init(void)
 struct page_cgroup *lookup_page_cgroup(struct page *page);
 struct page *lookup_cgroup_page(struct page_cgroup *pc);
 
-#define TESTPCGFLAG(uname, lname)			\
-static inline int PageCgroup##uname(struct page_cgroup *pc)	\
-	{ return test_bit(PCG_##lname, &pc->flags); }
-
-#define SETPCGFLAG(uname, lname)			\
-static inline void SetPageCgroup##uname(struct page_cgroup *pc)\
-	{ set_bit(PCG_##lname, &pc->flags);  }
-
-#define CLEARPCGFLAG(uname, lname)			\
-static inline void ClearPageCgroup##uname(struct page_cgroup *pc)	\
-	{ clear_bit(PCG_##lname, &pc->flags);  }
-
-#define TESTCLEARPCGFLAG(uname, lname)			\
-static inline int TestClearPageCgroup##uname(struct page_cgroup *pc)	\
-	{ return test_and_clear_bit(PCG_##lname, &pc->flags);  }
-
-TESTPCGFLAG(Used, USED)
-CLEARPCGFLAG(Used, USED)
-SETPCGFLAG(Used, USED)
-
-SETPCGFLAG(Migration, MIGRATION)
-CLEARPCGFLAG(Migration, MIGRATION)
-TESTPCGFLAG(Migration, MIGRATION)
-
-static inline void lock_page_cgroup(struct page_cgroup *pc)
-{
-	/*
-	 * Don't take this lock in IRQ context.
-	 * This lock is for pc->mem_cgroup, USED, MIGRATION
-	 */
-	bit_spin_lock(PCG_LOCK, &pc->flags);
-}
-
-static inline void unlock_page_cgroup(struct page_cgroup *pc)
+static inline int PageCgroupUsed(struct page_cgroup *pc)
 {
-	bit_spin_unlock(PCG_LOCK, &pc->flags);
+	return !!(pc->flags & PCG_USED);
 }
 
 #else /* CONFIG_MEMCG */
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 46a649e4e8cd..1b72060f093a 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -381,9 +381,13 @@ static inline int mem_cgroup_swappiness(struct mem_cgroup *mem)
 }
 #endif
 #ifdef CONFIG_MEMCG_SWAP
-extern void mem_cgroup_uncharge_swap(swp_entry_t ent);
+extern void mem_cgroup_swapout(struct page *page, swp_entry_t entry);
+extern void mem_cgroup_uncharge_swap(swp_entry_t entry);
 #else
-static inline void mem_cgroup_uncharge_swap(swp_entry_t ent)
+static inline void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
+{
+}
+static inline void mem_cgroup_uncharge_swap(swp_entry_t entry)
 {
 }
 #endif
@@ -443,7 +447,7 @@ extern void swap_shmem_alloc(swp_entry_t);
 extern int swap_duplicate(swp_entry_t);
 extern int swapcache_prepare(swp_entry_t);
 extern void swap_free(swp_entry_t);
-extern void swapcache_free(swp_entry_t, struct page *page);
+extern void swapcache_free(swp_entry_t);
 extern int free_swap_and_cache(swp_entry_t);
 extern int swap_type_of(dev_t, sector_t, struct block_device **);
 extern unsigned int count_swap_pages(int, int);
@@ -507,7 +511,7 @@ static inline void swap_free(swp_entry_t swp)
 {
 }
 
-static inline void swapcache_free(swp_entry_t swp, struct page *page)
+static inline void swapcache_free(swp_entry_t swp)
 {
 }
 
diff --git a/mm/filemap.c b/mm/filemap.c
index 349a40e35545..f501b56ec2c6 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -234,7 +234,6 @@ void delete_from_page_cache(struct page *page)
 	spin_lock_irq(&mapping->tree_lock);
 	__delete_from_page_cache(page, NULL);
 	spin_unlock_irq(&mapping->tree_lock);
-	mem_cgroup_uncharge_cache_page(page);
 
 	if (freepage)
 		freepage(page);
@@ -490,8 +489,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
 		if (PageSwapBacked(new))
 			__inc_zone_page_state(new, NR_SHMEM);
 		spin_unlock_irq(&mapping->tree_lock);
-		/* mem_cgroup codes must not be called under tree_lock */
-		mem_cgroup_replace_page_cache(old, new);
+		mem_cgroup_migrate(old, new, true);
 		radix_tree_preload_end();
 		if (freepage)
 			freepage(old);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 1cbe1e54ff5f..9106f1b12f56 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -754,9 +754,11 @@ static void __mem_cgroup_remove_exceeded(struct mem_cgroup_per_zone *mz,
 static void mem_cgroup_remove_exceeded(struct mem_cgroup_per_zone *mz,
 				       struct mem_cgroup_tree_per_zone *mctz)
 {
-	spin_lock(&mctz->lock);
+	unsigned long flags;
+
+	spin_lock_irqsave(&mctz->lock, flags);
 	__mem_cgroup_remove_exceeded(mz, mctz);
-	spin_unlock(&mctz->lock);
+	spin_unlock_irqrestore(&mctz->lock, flags);
 }
 
 
@@ -779,7 +781,9 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
 		 * mem is over its softlimit.
 		 */
 		if (excess || mz->on_tree) {
-			spin_lock(&mctz->lock);
+			unsigned long flags;
+
+			spin_lock_irqsave(&mctz->lock, flags);
 			/* if on-tree, remove it */
 			if (mz->on_tree)
 				__mem_cgroup_remove_exceeded(mz, mctz);
@@ -788,7 +792,7 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
 			 * If excess is 0, no tree ops.
 			 */
 			__mem_cgroup_insert_exceeded(mz, mctz, excess);
-			spin_unlock(&mctz->lock);
+			spin_unlock_irqrestore(&mctz->lock, flags);
 		}
 	}
 }
@@ -839,9 +843,9 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
 {
 	struct mem_cgroup_per_zone *mz;
 
-	spin_lock(&mctz->lock);
+	spin_lock_irq(&mctz->lock);
 	mz = __mem_cgroup_largest_soft_limit_node(mctz);
-	spin_unlock(&mctz->lock);
+	spin_unlock_irq(&mctz->lock);
 	return mz;
 }
 
@@ -882,13 +886,6 @@ static long mem_cgroup_read_stat(struct mem_cgroup *memcg,
 	return val;
 }
 
-static void mem_cgroup_swap_statistics(struct mem_cgroup *memcg,
-					 bool charge)
-{
-	int val = (charge) ? 1 : -1;
-	this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_SWAP], val);
-}
-
 static unsigned long mem_cgroup_read_events(struct mem_cgroup *memcg,
 					    enum mem_cgroup_events_index idx)
 {
@@ -909,13 +906,13 @@ static unsigned long mem_cgroup_read_events(struct mem_cgroup *memcg,
 
 static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg,
 					 struct page *page,
-					 bool anon, int nr_pages)
+					 int nr_pages)
 {
 	/*
 	 * Here, RSS means 'mapped anon' and anon's SwapCache. Shmem/tmpfs is
 	 * counted as CACHE even if it's on ANON LRU.
 	 */
-	if (anon)
+	if (PageAnon(page))
 		__this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_RSS],
 				nr_pages);
 	else
@@ -1013,7 +1010,6 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,
  */
 static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
 {
-	preempt_disable();
 	/* threshold event is triggered in finer grain than soft limit */
 	if (unlikely(mem_cgroup_event_ratelimit(memcg,
 						MEM_CGROUP_TARGET_THRESH))) {
@@ -1026,8 +1022,6 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
 		do_numainfo = mem_cgroup_event_ratelimit(memcg,
 						MEM_CGROUP_TARGET_NUMAINFO);
 #endif
-		preempt_enable();
-
 		mem_cgroup_threshold(memcg);
 		if (unlikely(do_softlimit))
 			mem_cgroup_update_tree(memcg, page);
@@ -1035,8 +1029,7 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
 		if (unlikely(do_numainfo))
 			atomic_inc(&memcg->numainfo_events);
 #endif
-	} else
-		preempt_enable();
+	}
 }
 
 struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
@@ -1347,20 +1340,6 @@ out:
 	return lruvec;
 }
 
-/*
- * Following LRU functions are allowed to be used without PCG_LOCK.
- * Operations are called by routine of global LRU independently from memcg.
- * What we have to take care of here is validness of pc->mem_cgroup.
- *
- * Changes to pc->mem_cgroup happens when
- * 1. charge
- * 2. moving account
- * In typical case, "charge" is done before add-to-lru. Exception is SwapCache.
- * It is added to LRU before charge.
- * If PCG_USED bit is not set, page_cgroup is not added to this private LRU.
- * When moving account, the page is not on LRU. It's isolated.
- */
-
 /**
  * mem_cgroup_page_lruvec - return lruvec for adding an lru page
  * @page: the page
@@ -2261,22 +2240,14 @@ cleanup:
  *
  * Notes: Race condition
  *
- * We usually use lock_page_cgroup() for accessing page_cgroup member but
- * it tends to be costly. But considering some conditions, we doesn't need
- * to do so _always_.
- *
- * Considering "charge", lock_page_cgroup() is not required because all
- * file-stat operations happen after a page is attached to radix-tree. There
- * are no race with "charge".
+ * Charging occurs during page instantiation, while the page is
+ * unmapped and locked in page migration, or while the page table is
+ * locked in THP migration.  No race is possible.
  *
- * Considering "uncharge", we know that memcg doesn't clear pc->mem_cgroup
- * at "uncharge" intentionally. So, we always see valid pc->mem_cgroup even
- * if there are race with "uncharge". Statistics itself is properly handled
- * by flags.
+ * Uncharge happens to pages with zero references, no race possible.
  *
- * Considering "move", this is an only case we see a race. To make the race
- * small, we check memcg->moving_account and detect there are possibility
- * of race or not. If there is, we take a lock.
+ * Charge moving between groups is protected by checking mm->moving
+ * account and taking the move_lock in the slowpath.
  */
 
 void __mem_cgroup_begin_update_page_stat(struct page *page,
@@ -2689,6 +2660,16 @@ static struct mem_cgroup *mem_cgroup_lookup(unsigned short id)
 	return mem_cgroup_from_id(id);
 }
 
+/*
+ * try_get_mem_cgroup_from_page - look up page's memcg association
+ * @page: the page
+ *
+ * Look up, get a css reference, and return the memcg that owns @page.
+ *
+ * The page must be locked to prevent racing with swap-in and page
+ * cache charges.  If coming from an unlocked page table, the caller
+ * must ensure the page is on the LRU or this can race with charging.
+ */
 struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
 {
 	struct mem_cgroup *memcg = NULL;
@@ -2699,7 +2680,6 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
 	VM_BUG_ON_PAGE(!PageLocked(page), page);
 
 	pc = lookup_page_cgroup(page);
-	lock_page_cgroup(pc);
 	if (PageCgroupUsed(pc)) {
 		memcg = pc->mem_cgroup;
 		if (memcg && !css_tryget_online(&memcg->css))
@@ -2713,19 +2693,46 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
 			memcg = NULL;
 		rcu_read_unlock();
 	}
-	unlock_page_cgroup(pc);
 	return memcg;
 }
 
+static void lock_page_lru(struct page *page, int *isolated)
+{
+	struct zone *zone = page_zone(page);
+
+	spin_lock_irq(&zone->lru_lock);
+	if (PageLRU(page)) {
+		struct lruvec *lruvec;
+
+		lruvec = mem_cgroup_page_lruvec(page, zone);
+		ClearPageLRU(page);
+		del_page_from_lru_list(page, lruvec, page_lru(page));
+		*isolated = 1;
+	} else
+		*isolated = 0;
+}
+
+static void unlock_page_lru(struct page *page, int isolated)
+{
+	struct zone *zone = page_zone(page);
+
+	if (isolated) {
+		struct lruvec *lruvec;
+
+		lruvec = mem_cgroup_page_lruvec(page, zone);
+		VM_BUG_ON_PAGE(PageLRU(page), page);
+		SetPageLRU(page);
+		add_page_to_lru_list(page, lruvec, page_lru(page));
+	}
+	spin_unlock_irq(&zone->lru_lock);
+}
+
 static void commit_charge(struct page *page, struct mem_cgroup *memcg,
-			  unsigned int nr_pages, bool anon, bool lrucare)
+			  unsigned int nr_pages, bool lrucare)
 {
 	struct page_cgroup *pc = lookup_page_cgroup(page);
-	struct zone *uninitialized_var(zone);
-	struct lruvec *lruvec;
-	bool was_on_lru = false;
+	int isolated;
 
-	lock_page_cgroup(pc);
 	VM_BUG_ON_PAGE(PageCgroupUsed(pc), page);
 	/*
 	 * we don't need page_cgroup_lock about tail pages, becase they are not
@@ -2736,39 +2743,38 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg,
 	 * In some cases, SwapCache and FUSE(splice_buf->radixtree), the page
 	 * may already be on some other mem_cgroup's LRU.  Take care of it.
 	 */
-	if (lrucare) {
-		zone = page_zone(page);
-		spin_lock_irq(&zone->lru_lock);
-		if (PageLRU(page)) {
-			lruvec = mem_cgroup_zone_lruvec(zone, pc->mem_cgroup);
-			ClearPageLRU(page);
-			del_page_from_lru_list(page, lruvec, page_lru(page));
-			was_on_lru = true;
-		}
-	}
+	if (lrucare)
+		lock_page_lru(page, &isolated);
 
+	/*
+	 * Nobody should be changing or seriously looking at
+	 * pc->mem_cgroup and pc->flags at this point:
+	 *
+	 * - the page is uncharged
+	 *
+	 * - the page is off-LRU
+	 *
+	 * - an anonymous fault has exclusive page access, except for
+	 *   a locked page table
+	 *
+	 * - a page cache insertion, a swapin fault, or a migration
+	 *   have the page locked
+	 */
 	pc->mem_cgroup = memcg;
-	SetPageCgroupUsed(pc);
-
-	if (lrucare) {
-		if (was_on_lru) {
-			lruvec = mem_cgroup_zone_lruvec(zone, pc->mem_cgroup);
-			VM_BUG_ON_PAGE(PageLRU(page), page);
-			SetPageLRU(page);
-			add_page_to_lru_list(page, lruvec, page_lru(page));
-		}
-		spin_unlock_irq(&zone->lru_lock);
-	}
+	pc->flags = PCG_USED | PCG_MEM | (do_swap_account ? PCG_MEMSW : 0);
 
-	mem_cgroup_charge_statistics(memcg, page, anon, nr_pages);
-	unlock_page_cgroup(pc);
+	if (lrucare)
+		unlock_page_lru(page, isolated);
 
+	local_irq_disable();
+	mem_cgroup_charge_statistics(memcg, page, nr_pages);
 	/*
 	 * "charge_statistics" updated event counter. Then, check it.
 	 * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree.
 	 * if they exceeds softlimit.
 	 */
 	memcg_check_events(memcg, page);
+	local_irq_enable();
 }
 
 static DEFINE_MUTEX(set_limit_mutex);
@@ -3395,7 +3401,6 @@ static inline void memcg_unregister_all_caches(struct mem_cgroup *memcg)
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 
-#define PCGF_NOCOPY_AT_SPLIT (1 << PCG_LOCK | 1 << PCG_MIGRATION)
 /*
  * Because tail pages are not marked as "used", set it. We're under
  * zone->lru_lock, 'splitting on pmd' and compound_lock.
@@ -3416,7 +3421,7 @@ void mem_cgroup_split_huge_fixup(struct page *head)
 	for (i = 1; i < HPAGE_PMD_NR; i++) {
 		pc = head_pc + i;
 		pc->mem_cgroup = memcg;
-		pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT;
+		pc->flags = head_pc->flags;
 	}
 	__this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE],
 		       HPAGE_PMD_NR);
@@ -3446,7 +3451,6 @@ static int mem_cgroup_move_account(struct page *page,
 {
 	unsigned long flags;
 	int ret;
-	bool anon = PageAnon(page);
 
 	VM_BUG_ON(from == to);
 	VM_BUG_ON_PAGE(PageLRU(page), page);
@@ -3460,15 +3464,21 @@ static int mem_cgroup_move_account(struct page *page,
 	if (nr_pages > 1 && !PageTransHuge(page))
 		goto out;
 
-	lock_page_cgroup(pc);
+	/*
+	 * Prevent mem_cgroup_migrate() from looking at pc->mem_cgroup
+	 * of its source page while we change it: page migration takes
+	 * both pages off the LRU, but page cache replacement doesn't.
+	 */
+	if (!trylock_page(page))
+		goto out;
 
 	ret = -EINVAL;
 	if (!PageCgroupUsed(pc) || pc->mem_cgroup != from)
-		goto unlock;
+		goto out_unlock;
 
 	move_lock_mem_cgroup(from, &flags);
 
-	if (!anon && page_mapped(page)) {
+	if (!PageAnon(page) && page_mapped(page)) {
 		__this_cpu_sub(from->stat->count[MEM_CGROUP_STAT_FILE_MAPPED],
 			       nr_pages);
 		__this_cpu_add(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED],
@@ -3482,20 +3492,25 @@ static int mem_cgroup_move_account(struct page *page,
 			       nr_pages);
 	}
 
-	mem_cgroup_charge_statistics(from, page, anon, -nr_pages);
+	/*
+	 * It is safe to change pc->mem_cgroup here because the page
+	 * is referenced, charged, and isolated - we can't race with
+	 * uncharging, charging, migration, or LRU putback.
+	 */
 
 	/* caller should have done css_get */
 	pc->mem_cgroup = to;
-	mem_cgroup_charge_statistics(to, page, anon, nr_pages);
 	move_unlock_mem_cgroup(from, &flags);
 	ret = 0;
-unlock:
-	unlock_page_cgroup(pc);
-	/*
-	 * check events
-	 */
+
+	local_irq_disable();
+	mem_cgroup_charge_statistics(to, page, nr_pages);
 	memcg_check_events(to, page);
+	mem_cgroup_charge_statistics(from, page, -nr_pages);
 	memcg_check_events(from, page);
+	local_irq_enable();
+out_unlock:
+	unlock_page(page);
 out:
 	return ret;
 }
@@ -3566,193 +3581,6 @@ out:
 	return ret;
 }
 
-static void mem_cgroup_do_uncharge(struct mem_cgroup *memcg,
-				   unsigned int nr_pages,
-				   const enum charge_type ctype)
-{
-	struct memcg_batch_info *batch = NULL;
-	bool uncharge_memsw = true;
-
-	/* If swapout, usage of swap doesn't decrease */
-	if (!do_swap_account || ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT)
-		uncharge_memsw = false;
-
-	batch = &current->memcg_batch;
-	/*
-	 * In usual, we do css_get() when we remember memcg pointer.
-	 * But in this case, we keep res->usage until end of a series of
-	 * uncharges. Then, it's ok to ignore memcg's refcnt.
-	 */
-	if (!batch->memcg)
-		batch->memcg = memcg;
-	/*
-	 * do_batch > 0 when unmapping pages or inode invalidate/truncate.
-	 * In those cases, all pages freed continuously can be expected to be in
-	 * the same cgroup and we have chance to coalesce uncharges.
-	 * But we do uncharge one by one if this is killed by OOM(TIF_MEMDIE)
-	 * because we want to do uncharge as soon as possible.
-	 */
-
-	if (!batch->do_batch || test_thread_flag(TIF_MEMDIE))
-		goto direct_uncharge;
-
-	if (nr_pages > 1)
-		goto direct_uncharge;
-
-	/*
-	 * In typical case, batch->memcg == mem. This means we can
-	 * merge a series of uncharges to an uncharge of res_counter.
-	 * If not, we uncharge res_counter ony by one.
-	 */
-	if (batch->memcg != memcg)
-		goto direct_uncharge;
-	/* remember freed charge and uncharge it later */
-	batch->nr_pages++;
-	if (uncharge_memsw)
-		batch->memsw_nr_pages++;
-	return;
-direct_uncharge:
-	res_counter_uncharge(&memcg->res, nr_pages * PAGE_SIZE);
-	if (uncharge_memsw)
-		res_counter_uncharge(&memcg->memsw, nr_pages * PAGE_SIZE);
-	if (unlikely(batch->memcg != memcg))
-		memcg_oom_recover(memcg);
-}
-
-/*
- * uncharge if !page_mapped(page)
- */
-static struct mem_cgroup *
-__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype,
-			     bool end_migration)
-{
-	struct mem_cgroup *memcg = NULL;
-	unsigned int nr_pages = 1;
-	struct page_cgroup *pc;
-	bool anon;
-
-	if (mem_cgroup_disabled())
-		return NULL;
-
-	if (PageTransHuge(page)) {
-		nr_pages <<= compound_order(page);
-		VM_BUG_ON_PAGE(!PageTransHuge(page), page);
-	}
-	/*
-	 * Check if our page_cgroup is valid
-	 */
-	pc = lookup_page_cgroup(page);
-	if (unlikely(!PageCgroupUsed(pc)))
-		return NULL;
-
-	lock_page_cgroup(pc);
-
-	memcg = pc->mem_cgroup;
-
-	if (!PageCgroupUsed(pc))
-		goto unlock_out;
-
-	anon = PageAnon(page);
-
-	switch (ctype) {
-	case MEM_CGROUP_CHARGE_TYPE_ANON:
-		/*
-		 * Generally PageAnon tells if it's the anon statistics to be
-		 * updated; but sometimes e.g. mem_cgroup_uncharge_page() is
-		 * used before page reached the stage of being marked PageAnon.
-		 */
-		anon = true;
-		/* fallthrough */
-	case MEM_CGROUP_CHARGE_TYPE_DROP:
-		/* See mem_cgroup_prepare_migration() */
-		if (page_mapped(page))
-			goto unlock_out;
-		/*
-		 * Pages under migration may not be uncharged.  But
-		 * end_migration() /must/ be the one uncharging the
-		 * unused post-migration page and so it has to call
-		 * here with the migration bit still set.  See the
-		 * res_counter handling below.
-		 */
-		if (!end_migration && PageCgroupMigration(pc))
-			goto unlock_out;
-		break;
-	case MEM_CGROUP_CHARGE_TYPE_SWAPOUT:
-		if (!PageAnon(page)) {	/* Shared memory */
-			if (page->mapping && !page_is_file_cache(page))
-				goto unlock_out;
-		} else if (page_mapped(page)) /* Anon */
-				goto unlock_out;
-		break;
-	default:
-		break;
-	}
-
-	mem_cgroup_charge_statistics(memcg, page, anon, -nr_pages);
-
-	ClearPageCgroupUsed(pc);
-	/*
-	 * pc->mem_cgroup is not cleared here. It will be accessed when it's
-	 * freed from LRU. This is safe because uncharged page is expected not
-	 * to be reused (freed soon). Exception is SwapCache, it's handled by
-	 * special functions.
-	 */
-
-	unlock_page_cgroup(pc);
-	/*
-	 * even after unlock, we have memcg->res.usage here and this memcg
-	 * will never be freed, so it's safe to call css_get().
-	 */
-	memcg_check_events(memcg, page);
-	if (do_swap_account && ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT) {
-		mem_cgroup_swap_statistics(memcg, true);
-		css_get(&memcg->css);
-	}
-	/*
-	 * Migration does not charge the res_counter for the
-	 * replacement page, so leave it alone when phasing out the
-	 * page that is unused after the migration.
-	 */
-	if (!end_migration)
-		mem_cgroup_do_uncharge(memcg, nr_pages, ctype);
-
-	return memcg;
-
-unlock_out:
-	unlock_page_cgroup(pc);
-	return NULL;
-}
-
-void mem_cgroup_uncharge_page(struct page *page)
-{
-	/* early check. */
-	if (page_mapped(page))
-		return;
-	VM_BUG_ON_PAGE(page->mapping && !PageAnon(page), page);
-	/*
-	 * If the page is in swap cache, uncharge should be deferred
-	 * to the swap path, which also properly accounts swap usage
-	 * and handles memcg lifetime.
-	 *
-	 * Note that this check is not stable and reclaim may add the
-	 * page to swap cache at any time after this.  However, if the
-	 * page is not in swap cache by the time page->mapcount hits
-	 * 0, there won't be any page table references to the swap
-	 * slot, and reclaim will free it and not actually write the
-	 * page to disk.
-	 */
-	if (PageSwapCache(page))
-		return;
-	__mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_ANON, false);
-}
-
-void mem_cgroup_uncharge_cache_page(struct page *page)
-{
-	VM_BUG_ON_PAGE(page_mapped(page), page);
-	VM_BUG_ON_PAGE(page->mapping, page);
-	__mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE, false);
-}
-
 /*
  * Batch_start/batch_end is called in unmap_page_range/invlidate/trucate.
  * In that cases, pages are freed continuously and we can expect pages
@@ -3763,6 +3591,9 @@ void mem_cgroup_uncharge_cache_page(struct page *page)
 
 void mem_cgroup_uncharge_start(void)
 {
+	unsigned long flags;
+
+	local_irq_save(flags);
 	current->memcg_batch.do_batch++;
 	/* We can do nest. */
 	if (current->memcg_batch.do_batch == 1) {
@@ -3770,21 +3601,18 @@ void mem_cgroup_uncharge_start(void)
 		current->memcg_batch.nr_pages = 0;
 		current->memcg_batch.memsw_nr_pages = 0;
 	}
+	local_irq_restore(flags);
 }
 
 void mem_cgroup_uncharge_end(void)
 {
 	struct memcg_batch_info *batch = &current->memcg_batch;
+	unsigned long flags;
 
-	if (!batch->do_batch)
-		return;
-
-	batch->do_batch--;
-	if (batch->do_batch) /* If stacked, do nothing. */
-		return;
-
-	if (!batch->memcg)
-		return;
+	local_irq_save(flags);
+	VM_BUG_ON(!batch->do_batch);
+	if (--batch->do_batch) /* If stacked, do nothing */
+		goto out;
 	/*
 	 * This "batch->memcg" is valid without any css_get/put etc...
 	 * bacause we hide charges behind us.
@@ -3796,61 +3624,16 @@ void mem_cgroup_uncharge_end(void)
 		res_counter_uncharge(&batch->memcg->memsw,
 				     batch->memsw_nr_pages * PAGE_SIZE);
 	memcg_oom_recover(batch->memcg);
-	/* forget this pointer (for sanity check) */
-	batch->memcg = NULL;
-}
-
-#ifdef CONFIG_SWAP
-/*
- * called after __delete_from_swap_cache() and drop "page" account.
- * memcg information is recorded to swap_cgroup of "ent"
- */
-void
-mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout)
-{
-	struct mem_cgroup *memcg;
-	int ctype = MEM_CGROUP_CHARGE_TYPE_SWAPOUT;
-
-	if (!swapout) /* this was a swap cache but the swap is unused ! */
-		ctype = MEM_CGROUP_CHARGE_TYPE_DROP;
-
-	memcg = __mem_cgroup_uncharge_common(page, ctype, false);
-
-	/*
-	 * record memcg information,  if swapout && memcg != NULL,
-	 * css_get() was called in uncharge().
-	 */
-	if (do_swap_account && swapout && memcg)
-		swap_cgroup_record(ent, mem_cgroup_id(memcg));
+out:
+	local_irq_restore(flags);
 }
-#endif
 
 #ifdef CONFIG_MEMCG_SWAP
-/*
- * called from swap_entry_free(). remove record in swap_cgroup and
- * uncharge "memsw" account.
- */
-void mem_cgroup_uncharge_swap(swp_entry_t ent)
+static void mem_cgroup_swap_statistics(struct mem_cgroup *memcg,
+					 bool charge)
 {
-	struct mem_cgroup *memcg;
-	unsigned short id;
-
-	if (!do_swap_account)
-		return;
-
-	id = swap_cgroup_record(ent, 0);
-	rcu_read_lock();
-	memcg = mem_cgroup_lookup(id);
-	if (memcg) {
-		/*
-		 * We uncharge this because swap is freed.  This memcg can
-		 * be obsolete one. We avoid calling css_tryget_online().
-		 */
-		res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
-		mem_cgroup_swap_statistics(memcg, false);
-		css_put(&memcg->css);
-	}
-	rcu_read_unlock();
+	int val = (charge) ? 1 : -1;
+	this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_SWAP], val);
 }
 
 /**
@@ -3902,169 +3685,6 @@ static inline int mem_cgroup_move_swap_account(swp_entry_t entry,
 }
 #endif
 
-/*
- * Before starting migration, account PAGE_SIZE to mem_cgroup that the old
- * page belongs to.
- */
-void mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
-				  struct mem_cgroup **memcgp)
-{
-	struct mem_cgroup *memcg = NULL;
-	unsigned int nr_pages = 1;
-	struct page_cgroup *pc;
-
-	*memcgp = NULL;
-
-	if (mem_cgroup_disabled())
-		return;
-
-	if (PageTransHuge(page))
-		nr_pages <<= compound_order(page);
-
-	pc = lookup_page_cgroup(page);
-	lock_page_cgroup(pc);
-	if (PageCgroupUsed(pc)) {
-		memcg = pc->mem_cgroup;
-		css_get(&memcg->css);
-		/*
-		 * At migrating an anonymous page, its mapcount goes down
-		 * to 0 and uncharge() will be called. But, even if it's fully
-		 * unmapped, migration may fail and this page has to be
-		 * charged again. We set MIGRATION flag here and delay uncharge
-		 * until end_migration() is called
-		 *
-		 * Corner Case Thinking
-		 * A)
-		 * When the old page was mapped as Anon and it's unmap-and-freed
-		 * while migration was ongoing.
-		 * If unmap finds the old page, uncharge() of it will be delayed
-		 * until end_migration(). If unmap finds a new page, it's
-		 * uncharged when it make mapcount to be 1->0. If unmap code
-		 * finds swap_migration_entry, the new page will not be mapped
-		 * and end_migration() will find it(mapcount==0).
-		 *
-		 * B)
-		 * When the old page was mapped but migraion fails, the kernel
-		 * remaps it. A charge for it is kept by MIGRATION flag even
-		 * if mapcount goes down to 0. We can do remap successfully
-		 * without charging it again.
-		 *
-		 * C)
-		 * The "old" page is under lock_page() until the end of
-		 * migration, so, the old page itself will not be swapped-out.
-		 * If the new page is swapped out before end_migraton, our
-		 * hook to usual swap-out path will catch the event.
-		 */
-		if (PageAnon(page))
-			SetPageCgroupMigration(pc);
-	}
-	unlock_page_cgroup(pc);
-	/*
-	 * If the page is not charged at this point,
-	 * we return here.
-	 */
-	if (!memcg)
-		return;
-
-	*memcgp = memcg;
-	/*
-	 * We charge new page before it's used/mapped. So, even if unlock_page()
-	 * is called before end_migration, we can catch all events on this new
-	 * page. In the case new page is migrated but not remapped, new page's
-	 * mapcount will be finally 0 and we call uncharge in end_migration().
-	 */
-	/*
-	 * The page is committed to the memcg, but it's not actually
-	 * charged to the res_counter since we plan on replacing the
-	 * old one and only one page is going to be left afterwards.
-	 */
-	commit_charge(newpage, memcg, nr_pages, PageAnon(page), false);
-}
-
-/* remove redundant charge if migration failed*/
-void mem_cgroup_end_migration(struct mem_cgroup *memcg,
-	struct page *oldpage, struct page *newpage, bool migration_ok)
-{
-	struct page *used, *unused;
-	struct page_cgroup *pc;
-	bool anon;
-
-	if (!memcg)
-		return;
-
-	if (!migration_ok) {
-		used = oldpage;
-		unused = newpage;
-	} else {
-		used = newpage;
-		unused = oldpage;
-	}
-	anon = PageAnon(used);
-	__mem_cgroup_uncharge_common(unused,
-				     anon ? MEM_CGROUP_CHARGE_TYPE_ANON
-				     : MEM_CGROUP_CHARGE_TYPE_CACHE,
-				     true);
-	css_put(&memcg->css);
-	/*
-	 * We disallowed uncharge of pages under migration because mapcount
-	 * of the page goes down to zero, temporarly.
-	 * Clear the flag and check the page should be charged.
-	 */
-	pc = lookup_page_cgroup(oldpage);
-	lock_page_cgroup(pc);
-	ClearPageCgroupMigration(pc);
-	unlock_page_cgroup(pc);
-
-	/*
-	 * If a page is a file cache, radix-tree replacement is very atomic
-	 * and we can skip this check. When it was an Anon page, its mapcount
-	 * goes down to 0. But because we added MIGRATION flage, it's not
-	 * uncharged yet. There are several case but page->mapcount check
-	 * and USED bit check in mem_cgroup_uncharge_page() will do enough
-	 * check. (see prepare_charge() also)
-	 */
-	if (anon)
-		mem_cgroup_uncharge_page(used);
-}
-
-/*
- * At replace page cache, newpage is not under any memcg but it's on
- * LRU. So, this function doesn't touch res_counter but handles LRU
- * in correct way. Both pages are locked so we cannot race with uncharge.
- */
-void mem_cgroup_replace_page_cache(struct page *oldpage,
-				  struct page *newpage)
-{
-	struct mem_cgroup *memcg = NULL;
-	struct page_cgroup *pc;
-
-	if (mem_cgroup_disabled())
-		return;
-
-	pc = lookup_page_cgroup(oldpage);
-	/* fix accounting on old pages */
-	lock_page_cgroup(pc);
-	if (PageCgroupUsed(pc)) {
-		memcg = pc->mem_cgroup;
-		mem_cgroup_charge_statistics(memcg, oldpage, false, -1);
-		ClearPageCgroupUsed(pc);
-	}
-	unlock_page_cgroup(pc);
-
-	/*
-	 * When called from shmem_replace_page(), in some cases the
-	 * oldpage has already been charged, and in some cases not.
-	 */
-	if (!memcg)
-		return;
-	/*
-	 * Even if newpage->mapping was NULL before starting replacement,
-	 * the newpage may be on LRU(or pagevec for LRU) already. We lock
-	 * LRU while we overwrite pc->mem_cgroup.
-	 */
-	commit_charge(newpage, memcg, 1, false, true);
-}
-
 #ifdef CONFIG_DEBUG_VM
 static struct page_cgroup *lookup_page_cgroup_used(struct page *page)
 {
@@ -4263,7 +3883,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
 						    gfp_mask, &nr_scanned);
 		nr_reclaimed += reclaimed;
 		*total_scanned += nr_scanned;
-		spin_lock(&mctz->lock);
+		spin_lock_irq(&mctz->lock);
 
 		/*
 		 * If we failed to reclaim anything from this memory cgroup
@@ -4303,7 +3923,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
 		 */
 		/* If excess == 0, no tree ops */
 		__mem_cgroup_insert_exceeded(mz, mctz, excess);
-		spin_unlock(&mctz->lock);
+		spin_unlock_irq(&mctz->lock);
 		css_put(&mz->memcg->css);
 		loop++;
 		/*
@@ -6265,9 +5885,9 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma,
 	if (page) {
 		pc = lookup_page_cgroup(page);
 		/*
-		 * Do only loose check w/o page_cgroup lock.
-		 * mem_cgroup_move_account() checks the pc is valid or not under
-		 * the lock.
+		 * Do only loose check w/o serialization.
+		 * mem_cgroup_move_account() checks the pc is valid or
+		 * not under LRU exclusion.
 		 */
 		if (PageCgroupUsed(pc) && pc->mem_cgroup == mc.from) {
 			ret = MC_TARGET_PAGE;
@@ -6729,6 +6349,67 @@ static void __init enable_swap_cgroup(void)
 }
 #endif
 
+#ifdef CONFIG_MEMCG_SWAP
+/**
+ * mem_cgroup_swapout - transfer a memsw charge to swap
+ * @page: page whose memsw charge to transfer
+ * @entry: swap entry to move the charge to
+ *
+ * Transfer the memsw charge of @page to @entry.
+ */
+void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
+{
+	struct page_cgroup *pc;
+	unsigned short oldid;
+
+	VM_BUG_ON_PAGE(PageLRU(page), page);
+	VM_BUG_ON_PAGE(page_count(page), page);
+
+	if (!do_swap_account)
+		return;
+
+	pc = lookup_page_cgroup(page);
+
+	/* Readahead page, never charged */
+	if (!PageCgroupUsed(pc))
+		return;
+
+	VM_BUG_ON_PAGE(!(pc->flags & PCG_MEMSW), page);
+
+	oldid = swap_cgroup_record(entry, mem_cgroup_id(pc->mem_cgroup));
+	VM_BUG_ON_PAGE(oldid, page);
+
+	pc->flags &= ~PCG_MEMSW;
+	css_get(&pc->mem_cgroup->css);
+	mem_cgroup_swap_statistics(pc->mem_cgroup, true);
+}
+
+/**
+ * mem_cgroup_uncharge_swap - uncharge a swap entry
+ * @entry: swap entry to uncharge
+ *
+ * Drop the memsw charge associated with @entry.
+ */
+void mem_cgroup_uncharge_swap(swp_entry_t entry)
+{
+	struct mem_cgroup *memcg;
+	unsigned short id;
+
+	if (!do_swap_account)
+		return;
+
+	id = swap_cgroup_record(entry, 0);
+	rcu_read_lock();
+	memcg = mem_cgroup_lookup(id);
+	if (memcg) {
+		res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
+		mem_cgroup_swap_statistics(memcg, false);
+		css_put(&memcg->css);
+	}
+	rcu_read_unlock();
+}
+#endif
+
 /**
  * mem_cgroup_try_charge - try charging a page
  * @page: page to charge
@@ -6831,7 +6512,7 @@ void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg,
 		VM_BUG_ON_PAGE(!PageTransHuge(page), page);
 	}
 
-	commit_charge(page, memcg, nr_pages, PageAnon(page), lrucare);
+	commit_charge(page, memcg, nr_pages, lrucare);
 
 	if (do_swap_account && PageSwapCache(page)) {
 		swp_entry_t entry = { .val = page_private(page) };
@@ -6873,6 +6554,139 @@ void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg)
 	cancel_charge(memcg, nr_pages);
 }
 
+/**
+ * mem_cgroup_uncharge - uncharge a page
+ * @page: page to uncharge
+ *
+ * Uncharge a page previously charged with mem_cgroup_try_charge() and
+ * mem_cgroup_commit_charge().
+ */
+void mem_cgroup_uncharge(struct page *page)
+{
+	struct memcg_batch_info *batch;
+	unsigned int nr_pages = 1;
+	struct mem_cgroup *memcg;
+	struct page_cgroup *pc;
+	unsigned long pc_flags;
+	unsigned long flags;
+
+	VM_BUG_ON_PAGE(PageLRU(page), page);
+	VM_BUG_ON_PAGE(page_count(page), page);
+
+	if (mem_cgroup_disabled())
+		return;
+
+	pc = lookup_page_cgroup(page);
+
+	/* Every final put_page() ends up here */
+	if (!PageCgroupUsed(pc))
+		return;
+
+	if (PageTransHuge(page)) {
+		nr_pages <<= compound_order(page);
+		VM_BUG_ON_PAGE(!PageTransHuge(page), page);
+	}
+	/*
+	 * Nobody should be changing or seriously looking at
+	 * pc->mem_cgroup and pc->flags at this point, we have fully
+	 * exclusive access to the page.
+	 */
+	memcg = pc->mem_cgroup;
+	pc_flags = pc->flags;
+	pc->flags = 0;
+
+	local_irq_save(flags);
+
+	if (nr_pages > 1)
+		goto direct;
+	if (unlikely(test_thread_flag(TIF_MEMDIE)))
+		goto direct;
+	batch = &current->memcg_batch;
+	if (!batch->do_batch)
+		goto direct;
+	if (batch->memcg && batch->memcg != memcg)
+		goto direct;
+	if (!batch->memcg)
+		batch->memcg = memcg;
+	if (pc_flags & PCG_MEM)
+		batch->nr_pages++;
+	if (pc_flags & PCG_MEMSW)
+		batch->memsw_nr_pages++;
+	goto out;
+direct:
+	if (pc_flags & PCG_MEM)
+		res_counter_uncharge(&memcg->res, nr_pages * PAGE_SIZE);
+	if (pc_flags & PCG_MEMSW)
+		res_counter_uncharge(&memcg->memsw, nr_pages * PAGE_SIZE);
+	memcg_oom_recover(memcg);
+out:
+	mem_cgroup_charge_statistics(memcg, page, -nr_pages);
+	memcg_check_events(memcg, page);
+
+	local_irq_restore(flags);
+}
+
+/**
+ * mem_cgroup_migrate - migrate a charge to another page
+ * @oldpage: currently charged page
+ * @newpage: page to transfer the charge to
+ * @lrucare: both pages might be on the LRU already
+ *
+ * Migrate the charge from @oldpage to @newpage.
+ *
+ * Both pages must be locked, @newpage->mapping must be set up.
+ */
+void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
+			bool lrucare)
+{
+	unsigned int nr_pages = 1;
+	struct page_cgroup *pc;
+	int isolated;
+
+	VM_BUG_ON_PAGE(!PageLocked(oldpage), oldpage);
+	VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
+	VM_BUG_ON_PAGE(!lrucare && PageLRU(oldpage), oldpage);
+	VM_BUG_ON_PAGE(!lrucare && PageLRU(newpage), newpage);
+	VM_BUG_ON_PAGE(PageAnon(oldpage) != PageAnon(newpage), newpage);
+
+	if (mem_cgroup_disabled())
+		return;
+
+	/* Page cache replacement: new page already charged? */
+	pc = lookup_page_cgroup(newpage);
+	if (PageCgroupUsed(pc))
+		return;
+
+	/* Re-entrant migration: old page already uncharged? */
+	pc = lookup_page_cgroup(oldpage);
+	if (!PageCgroupUsed(pc))
+		return;
+
+	VM_BUG_ON_PAGE(!(pc->flags & PCG_MEM), oldpage);
+	VM_BUG_ON_PAGE(do_swap_account && !(pc->flags & PCG_MEMSW), oldpage);
+
+	if (PageTransHuge(oldpage)) {
+		nr_pages <<= compound_order(oldpage);
+		VM_BUG_ON_PAGE(!PageTransHuge(oldpage), oldpage);
+		VM_BUG_ON_PAGE(!PageTransHuge(newpage), newpage);
+	}
+
+	if (lrucare)
+		lock_page_lru(oldpage, &isolated);
+
+	pc->flags = 0;
+
+	if (lrucare)
+		unlock_page_lru(oldpage, isolated);
+
+	local_irq_disable();
+	mem_cgroup_charge_statistics(pc->mem_cgroup, oldpage, -nr_pages);
+	memcg_check_events(pc->mem_cgroup, oldpage);
+	local_irq_enable();
+
+	commit_charge(newpage, pc->mem_cgroup, nr_pages, lrucare);
+}
+
 /*
  * subsys_initcall() for memory controller.
  *
diff --git a/mm/memory.c b/mm/memory.c
index 6d7648773dc4..2a899e4e82ba 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1292,7 +1292,6 @@ static void unmap_page_range(struct mmu_gather *tlb,
 		details = NULL;
 
 	BUG_ON(addr >= end);
-	mem_cgroup_uncharge_start();
 	tlb_start_vma(tlb, vma);
 	pgd = pgd_offset(vma->vm_mm, addr);
 	do {
@@ -1302,7 +1301,6 @@ static void unmap_page_range(struct mmu_gather *tlb,
 		next = zap_pud_range(tlb, vma, pgd, addr, next, details);
 	} while (pgd++, addr = next, addr != end);
 	tlb_end_vma(tlb, vma);
-	mem_cgroup_uncharge_end();
 }
 
 
diff --git a/mm/migrate.c b/mm/migrate.c
index be6dbf995c0c..f78ec9bd454d 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -780,6 +780,7 @@ static int move_to_new_page(struct page *newpage, struct page *page,
 	if (rc != MIGRATEPAGE_SUCCESS) {
 		newpage->mapping = NULL;
 	} else {
+		mem_cgroup_migrate(page, newpage, false);
 		if (remap_swapcache)
 			remove_migration_ptes(page, newpage);
 		page->mapping = NULL;
@@ -795,7 +796,6 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
 {
 	int rc = -EAGAIN;
 	int remap_swapcache = 1;
-	struct mem_cgroup *mem;
 	struct anon_vma *anon_vma = NULL;
 
 	if (!trylock_page(page)) {
@@ -821,9 +821,6 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
 		lock_page(page);
 	}
 
-	/* charge against new page */
-	mem_cgroup_prepare_migration(page, newpage, &mem);
-
 	if (PageWriteback(page)) {
 		/*
 		 * Only in the case of a full synchronous migration is it
@@ -833,10 +830,10 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
 		 */
 		if (mode != MIGRATE_SYNC) {
 			rc = -EBUSY;
-			goto uncharge;
+			goto out_unlock;
 		}
 		if (!force)
-			goto uncharge;
+			goto out_unlock;
 		wait_on_page_writeback(page);
 	}
 	/*
@@ -872,7 +869,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
 			 */
 			remap_swapcache = 0;
 		} else {
-			goto uncharge;
+			goto out_unlock;
 		}
 	}
 
@@ -885,7 +882,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
 		 * the page migration right away (proteced by page lock).
 		 */
 		rc = balloon_page_migrate(newpage, page, mode);
-		goto uncharge;
+		goto out_unlock;
 	}
 
 	/*
@@ -904,7 +901,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
 		VM_BUG_ON_PAGE(PageAnon(page), page);
 		if (page_has_private(page)) {
 			try_to_free_buffers(page);
-			goto uncharge;
+			goto out_unlock;
 		}
 		goto skip_unmap;
 	}
@@ -923,10 +920,7 @@ skip_unmap:
 	if (anon_vma)
 		put_anon_vma(anon_vma);
 
-uncharge:
-	mem_cgroup_end_migration(mem, page, newpage,
-				 (rc == MIGRATEPAGE_SUCCESS ||
-				  rc == MIGRATEPAGE_BALLOON_SUCCESS));
+out_unlock:
 	unlock_page(page);
 out:
 	return rc;
@@ -1786,7 +1780,6 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
 	pg_data_t *pgdat = NODE_DATA(node);
 	int isolated = 0;
 	struct page *new_page = NULL;
-	struct mem_cgroup *memcg = NULL;
 	int page_lru = page_is_file_cache(page);
 	unsigned long mmun_start = address & HPAGE_PMD_MASK;
 	unsigned long mmun_end = mmun_start + HPAGE_PMD_SIZE;
@@ -1852,15 +1845,6 @@ fail_putback:
 		goto out_unlock;
 	}
 
-	/*
-	 * Traditional migration needs to prepare the memcg charge
-	 * transaction early to prevent the old page from being
-	 * uncharged when installing migration entries.  Here we can
-	 * save the potential rollback and start the charge transfer
-	 * only when migration is already known to end successfully.
-	 */
-	mem_cgroup_prepare_migration(page, new_page, &memcg);
-
 	orig_entry = *pmd;
 	entry = mk_pmd(new_page, vma->vm_page_prot);
 	entry = pmd_mkhuge(entry);
@@ -1888,14 +1872,10 @@ fail_putback:
 		goto fail_putback;
 	}
 
+	mem_cgroup_migrate(page, new_page, false);
+
 	page_remove_rmap(page);
 
-	/*
-	 * Finish the charge transaction under the page table lock to
-	 * prevent split_huge_page() from dividing up the charge
-	 * before it's fully transferred to the new page.
-	 */
-	mem_cgroup_end_migration(memcg, page, new_page, true);
 	spin_unlock(ptl);
 	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
 
diff --git a/mm/rmap.c b/mm/rmap.c
index f56b5ed78128..3e8491c504f8 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1089,7 +1089,6 @@ void page_remove_rmap(struct page *page)
 	if (unlikely(PageHuge(page)))
 		goto out;
 	if (anon) {
-		mem_cgroup_uncharge_page(page);
 		if (PageTransHuge(page))
 			__dec_zone_page_state(page,
 					      NR_ANON_TRANSPARENT_HUGEPAGES);
diff --git a/mm/shmem.c b/mm/shmem.c
index 1f1a8085538b..6dc80d298f9d 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -419,7 +419,6 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
 			pvec.pages, indices);
 		if (!pvec.nr)
 			break;
-		mem_cgroup_uncharge_start();
 		for (i = 0; i < pagevec_count(&pvec); i++) {
 			struct page *page = pvec.pages[i];
 
@@ -447,7 +446,6 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
 		}
 		pagevec_remove_exceptionals(&pvec);
 		pagevec_release(&pvec);
-		mem_cgroup_uncharge_end();
 		cond_resched();
 		index++;
 	}
@@ -495,7 +493,6 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
 			index = start;
 			continue;
 		}
-		mem_cgroup_uncharge_start();
 		for (i = 0; i < pagevec_count(&pvec); i++) {
 			struct page *page = pvec.pages[i];
 
@@ -531,7 +528,6 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
 		}
 		pagevec_remove_exceptionals(&pvec);
 		pagevec_release(&pvec);
-		mem_cgroup_uncharge_end();
 		index++;
 	}
 
@@ -835,7 +831,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
 	}
 
 	mutex_unlock(&shmem_swaplist_mutex);
-	swapcache_free(swap, NULL);
+	swapcache_free(swap);
 redirty:
 	set_page_dirty(page);
 	if (wbc->for_reclaim)
@@ -1008,7 +1004,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
 		 */
 		oldpage = newpage;
 	} else {
-		mem_cgroup_replace_page_cache(oldpage, newpage);
+		mem_cgroup_migrate(oldpage, newpage, false);
 		lru_cache_add_anon(newpage);
 		*pagep = newpage;
 	}
diff --git a/mm/swap.c b/mm/swap.c
index 3baca701bb78..00523fffa5ed 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -62,6 +62,7 @@ static void __page_cache_release(struct page *page)
 		del_page_from_lru_list(page, lruvec, page_off_lru(page));
 		spin_unlock_irqrestore(&zone->lru_lock, flags);
 	}
+	mem_cgroup_uncharge(page);
 }
 
 static void __put_single_page(struct page *page)
@@ -907,6 +908,8 @@ void release_pages(struct page **pages, int nr, bool cold)
 	struct lruvec *lruvec;
 	unsigned long uninitialized_var(flags);
 
+	mem_cgroup_uncharge_start();
+
 	for (i = 0; i < nr; i++) {
 		struct page *page = pages[i];
 
@@ -938,6 +941,7 @@ void release_pages(struct page **pages, int nr, bool cold)
 			__ClearPageLRU(page);
 			del_page_from_lru_list(page, lruvec, page_off_lru(page));
 		}
+		mem_cgroup_uncharge(page);
 
 		/* Clear Active bit in case of parallel mark_page_accessed */
 		__ClearPageActive(page);
@@ -947,6 +951,8 @@ void release_pages(struct page **pages, int nr, bool cold)
 	if (zone)
 		spin_unlock_irqrestore(&zone->lru_lock, flags);
 
+	mem_cgroup_uncharge_end();
+
 	free_hot_cold_page_list(&pages_to_free, cold);
 }
 EXPORT_SYMBOL(release_pages);
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 2972eee184a4..e160151da6b8 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -176,7 +176,7 @@ int add_to_swap(struct page *page, struct list_head *list)
 
 	if (unlikely(PageTransHuge(page)))
 		if (unlikely(split_huge_page_to_list(page, list))) {
-			swapcache_free(entry, NULL);
+			swapcache_free(entry);
 			return 0;
 		}
 
@@ -202,7 +202,7 @@ int add_to_swap(struct page *page, struct list_head *list)
 		 * add_to_swap_cache() doesn't return -EEXIST, so we can safely
 		 * clear SWAP_HAS_CACHE flag.
 		 */
-		swapcache_free(entry, NULL);
+		swapcache_free(entry);
 		return 0;
 	}
 }
@@ -225,7 +225,7 @@ void delete_from_swap_cache(struct page *page)
 	__delete_from_swap_cache(page);
 	spin_unlock_irq(&address_space->tree_lock);
 
-	swapcache_free(entry, page);
+	swapcache_free(entry);
 	page_cache_release(page);
 }
 
@@ -386,7 +386,7 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 		 * add_to_swap_cache() doesn't return -EEXIST, so we can safely
 		 * clear SWAP_HAS_CACHE flag.
 		 */
-		swapcache_free(entry, NULL);
+		swapcache_free(entry);
 	} while (err != -ENOMEM);
 
 	if (new_page)
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 0883b4912ff7..8798b2e0ac59 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -843,16 +843,13 @@ void swap_free(swp_entry_t entry)
 /*
  * Called after dropping swapcache to decrease refcnt to swap entries.
  */
-void swapcache_free(swp_entry_t entry, struct page *page)
+void swapcache_free(swp_entry_t entry)
 {
 	struct swap_info_struct *p;
-	unsigned char count;
 
 	p = swap_info_get(entry);
 	if (p) {
-		count = swap_entry_free(p, entry, SWAP_HAS_CACHE);
-		if (page)
-			mem_cgroup_uncharge_swapcache(page, entry, count != 0);
+		swap_entry_free(p, entry, SWAP_HAS_CACHE);
 		spin_unlock(&p->lock);
 	}
 }
diff --git a/mm/truncate.c b/mm/truncate.c
index eda247307164..96d167372d89 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -281,7 +281,6 @@ void truncate_inode_pages_range(struct address_space *mapping,
 	while (index < end && pagevec_lookup_entries(&pvec, mapping, index,
 			min(end - index, (pgoff_t)PAGEVEC_SIZE),
 			indices)) {
-		mem_cgroup_uncharge_start();
 		for (i = 0; i < pagevec_count(&pvec); i++) {
 			struct page *page = pvec.pages[i];
 
@@ -307,7 +306,6 @@ void truncate_inode_pages_range(struct address_space *mapping,
 		}
 		pagevec_remove_exceptionals(&pvec);
 		pagevec_release(&pvec);
-		mem_cgroup_uncharge_end();
 		cond_resched();
 		index++;
 	}
@@ -369,7 +367,6 @@ void truncate_inode_pages_range(struct address_space *mapping,
 			pagevec_release(&pvec);
 			break;
 		}
-		mem_cgroup_uncharge_start();
 		for (i = 0; i < pagevec_count(&pvec); i++) {
 			struct page *page = pvec.pages[i];
 
@@ -394,7 +391,6 @@ void truncate_inode_pages_range(struct address_space *mapping,
 		}
 		pagevec_remove_exceptionals(&pvec);
 		pagevec_release(&pvec);
-		mem_cgroup_uncharge_end();
 		index++;
 	}
 	cleancache_invalidate_inode(mapping);
@@ -493,7 +489,6 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
 	while (index <= end && pagevec_lookup_entries(&pvec, mapping, index,
 			min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
 			indices)) {
-		mem_cgroup_uncharge_start();
 		for (i = 0; i < pagevec_count(&pvec); i++) {
 			struct page *page = pvec.pages[i];
 
@@ -522,7 +517,6 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
 		}
 		pagevec_remove_exceptionals(&pvec);
 		pagevec_release(&pvec);
-		mem_cgroup_uncharge_end();
 		cond_resched();
 		index++;
 	}
@@ -553,7 +547,6 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page)
 	BUG_ON(page_has_private(page));
 	__delete_from_page_cache(page, NULL);
 	spin_unlock_irq(&mapping->tree_lock);
-	mem_cgroup_uncharge_cache_page(page);
 
 	if (mapping->a_ops->freepage)
 		mapping->a_ops->freepage(page);
@@ -602,7 +595,6 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
 	while (index <= end && pagevec_lookup_entries(&pvec, mapping, index,
 			min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
 			indices)) {
-		mem_cgroup_uncharge_start();
 		for (i = 0; i < pagevec_count(&pvec); i++) {
 			struct page *page = pvec.pages[i];
 
@@ -655,7 +647,6 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
 		}
 		pagevec_remove_exceptionals(&pvec);
 		pagevec_release(&pvec);
-		mem_cgroup_uncharge_end();
 		cond_resched();
 		index++;
 	}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index d2f65c856350..7068e838d22b 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -577,9 +577,10 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
 
 	if (PageSwapCache(page)) {
 		swp_entry_t swap = { .val = page_private(page) };
+		mem_cgroup_swapout(page, swap);
 		__delete_from_swap_cache(page);
 		spin_unlock_irq(&mapping->tree_lock);
-		swapcache_free(swap, page);
+		swapcache_free(swap);
 	} else {
 		void (*freepage)(struct page *);
 		void *shadow = NULL;
@@ -600,7 +601,6 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
 			shadow = workingset_eviction(mapping, page);
 		__delete_from_page_cache(page, shadow);
 		spin_unlock_irq(&mapping->tree_lock);
-		mem_cgroup_uncharge_cache_page(page);
 
 		if (freepage != NULL)
 			freepage(page);
@@ -1103,6 +1103,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 		 */
 		__clear_page_locked(page);
 free_it:
+		mem_cgroup_uncharge(page);
 		nr_reclaimed++;
 
 		/*
@@ -1132,12 +1133,13 @@ keep:
 		list_add(&page->lru, &ret_pages);
 		VM_BUG_ON_PAGE(PageLRU(page) || PageUnevictable(page), page);
 	}
+	mem_cgroup_uncharge_end();
 
 	free_hot_cold_page_list(&free_pages, true);
 
 	list_splice(&ret_pages, page_list);
 	count_vm_events(PGACTIVATE, pgactivate);
-	mem_cgroup_uncharge_end();
+
 	*ret_nr_dirty += nr_dirty;
 	*ret_nr_congested += nr_congested;
 	*ret_nr_unqueued_dirty += nr_unqueued_dirty;
@@ -1435,6 +1437,8 @@ putback_inactive_pages(struct lruvec *lruvec, struct list_head *page_list)
 			__ClearPageActive(page);
 			del_page_from_lru_list(page, lruvec, lru);
 
+			mem_cgroup_uncharge(page);
+
 			if (unlikely(PageCompound(page))) {
 				spin_unlock_irq(&zone->lru_lock);
 				(*get_compound_page_dtor(page))(page);
@@ -1656,6 +1660,8 @@ static void move_active_pages_to_lru(struct lruvec *lruvec,
 			__ClearPageActive(page);
 			del_page_from_lru_list(page, lruvec, lru);
 
+			mem_cgroup_uncharge(page);
+
 			if (unlikely(PageCompound(page))) {
 				spin_unlock_irq(&zone->lru_lock);
 				(*get_compound_page_dtor(page))(page);
diff --git a/mm/zswap.c b/mm/zswap.c
index 032c21eeab2b..9da56af24df5 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -507,7 +507,7 @@ static int zswap_get_swap_cache_page(swp_entry_t entry,
 		 * add_to_swap_cache() doesn't return -EEXIST, so we can safely
 		 * clear SWAP_HAS_CACHE flag.
 		 */
-		swapcache_free(entry, NULL);
+		swapcache_free(entry);
 	} while (err != -ENOMEM);
 
 	if (new_page)
-- 
cgit v1.2.3


From 747db954cab64c6b7a95b121b517165f34751898 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Fri, 8 Aug 2014 14:19:24 -0700
Subject: mm: memcontrol: use page lists for uncharge batching

Pages are now uncharged at release time, and all sources of batched
uncharges operate on lists of pages.  Directly use those lists, and
get rid of the per-task batching state.

This also batches statistics accounting, in addition to the res
counter charges, to reduce IRQ-disabling and re-enabling.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: Hugh Dickins <hughd@google.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Vladimir Davydov <vdavydov@parallels.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Vladimir Davydov <vdavydov@parallels.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  12 +--
 include/linux/sched.h      |   6 --
 kernel/fork.c              |   4 -
 mm/memcontrol.c            | 206 ++++++++++++++++++++++++---------------------
 mm/swap.c                  |   6 +-
 mm/vmscan.c                |  12 ++-
 6 files changed, 117 insertions(+), 129 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 806b8fa15c5f..e0752d204d9e 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -59,12 +59,8 @@ int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
 void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg,
 			      bool lrucare);
 void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg);
-
 void mem_cgroup_uncharge(struct page *page);
-
-/* Batched uncharging */
-void mem_cgroup_uncharge_start(void);
-void mem_cgroup_uncharge_end(void);
+void mem_cgroup_uncharge_list(struct list_head *page_list);
 
 void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
 			bool lrucare);
@@ -233,11 +229,7 @@ static inline void mem_cgroup_uncharge(struct page *page)
 {
 }
 
-static inline void mem_cgroup_uncharge_start(void)
-{
-}
-
-static inline void mem_cgroup_uncharge_end(void)
+static inline void mem_cgroup_uncharge_list(struct list_head *page_list)
 {
 }
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7c19d552dc3f..4fcf82a4d243 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1628,12 +1628,6 @@ struct task_struct {
 	unsigned long trace_recursion;
 #endif /* CONFIG_TRACING */
 #ifdef CONFIG_MEMCG /* memcg uses this to do batch job */
-	struct memcg_batch_info {
-		int do_batch;	/* incremented when batch uncharge started */
-		struct mem_cgroup *memcg; /* target memcg of uncharge */
-		unsigned long nr_pages;	/* uncharged usage */
-		unsigned long memsw_nr_pages; /* uncharged mem+swap usage */
-	} memcg_batch;
 	unsigned int memcg_kmem_skip_account;
 	struct memcg_oom_info {
 		struct mem_cgroup *memcg;
diff --git a/kernel/fork.c b/kernel/fork.c
index fbd3497b221f..f6f5086c9e7d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1346,10 +1346,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 #ifdef CONFIG_DEBUG_MUTEXES
 	p->blocked_on = NULL; /* not blocked yet */
 #endif
-#ifdef CONFIG_MEMCG
-	p->memcg_batch.do_batch = 0;
-	p->memcg_batch.memcg = NULL;
-#endif
 #ifdef CONFIG_BCACHE
 	p->sequential_io	= 0;
 	p->sequential_io_avg	= 0;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 9106f1b12f56..a6e2be0241af 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3581,53 +3581,6 @@ out:
 	return ret;
 }
 
-/*
- * Batch_start/batch_end is called in unmap_page_range/invlidate/trucate.
- * In that cases, pages are freed continuously and we can expect pages
- * are in the same memcg. All these calls itself limits the number of
- * pages freed at once, then uncharge_start/end() is called properly.
- * This may be called prural(2) times in a context,
- */
-
-void mem_cgroup_uncharge_start(void)
-{
-	unsigned long flags;
-
-	local_irq_save(flags);
-	current->memcg_batch.do_batch++;
-	/* We can do nest. */
-	if (current->memcg_batch.do_batch == 1) {
-		current->memcg_batch.memcg = NULL;
-		current->memcg_batch.nr_pages = 0;
-		current->memcg_batch.memsw_nr_pages = 0;
-	}
-	local_irq_restore(flags);
-}
-
-void mem_cgroup_uncharge_end(void)
-{
-	struct memcg_batch_info *batch = &current->memcg_batch;
-	unsigned long flags;
-
-	local_irq_save(flags);
-	VM_BUG_ON(!batch->do_batch);
-	if (--batch->do_batch) /* If stacked, do nothing */
-		goto out;
-	/*
-	 * This "batch->memcg" is valid without any css_get/put etc...
-	 * bacause we hide charges behind us.
-	 */
-	if (batch->nr_pages)
-		res_counter_uncharge(&batch->memcg->res,
-				     batch->nr_pages * PAGE_SIZE);
-	if (batch->memsw_nr_pages)
-		res_counter_uncharge(&batch->memcg->memsw,
-				     batch->memsw_nr_pages * PAGE_SIZE);
-	memcg_oom_recover(batch->memcg);
-out:
-	local_irq_restore(flags);
-}
-
 #ifdef CONFIG_MEMCG_SWAP
 static void mem_cgroup_swap_statistics(struct mem_cgroup *memcg,
 					 bool charge)
@@ -6554,6 +6507,98 @@ void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg)
 	cancel_charge(memcg, nr_pages);
 }
 
+static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
+			   unsigned long nr_mem, unsigned long nr_memsw,
+			   unsigned long nr_anon, unsigned long nr_file,
+			   unsigned long nr_huge, struct page *dummy_page)
+{
+	unsigned long flags;
+
+	if (nr_mem)
+		res_counter_uncharge(&memcg->res, nr_mem * PAGE_SIZE);
+	if (nr_memsw)
+		res_counter_uncharge(&memcg->memsw, nr_memsw * PAGE_SIZE);
+
+	memcg_oom_recover(memcg);
+
+	local_irq_save(flags);
+	__this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS], nr_anon);
+	__this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_CACHE], nr_file);
+	__this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE], nr_huge);
+	__this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_PGPGOUT], pgpgout);
+	__this_cpu_add(memcg->stat->nr_page_events, nr_anon + nr_file);
+	memcg_check_events(memcg, dummy_page);
+	local_irq_restore(flags);
+}
+
+static void uncharge_list(struct list_head *page_list)
+{
+	struct mem_cgroup *memcg = NULL;
+	unsigned long nr_memsw = 0;
+	unsigned long nr_anon = 0;
+	unsigned long nr_file = 0;
+	unsigned long nr_huge = 0;
+	unsigned long pgpgout = 0;
+	unsigned long nr_mem = 0;
+	struct list_head *next;
+	struct page *page;
+
+	next = page_list->next;
+	do {
+		unsigned int nr_pages = 1;
+		struct page_cgroup *pc;
+
+		page = list_entry(next, struct page, lru);
+		next = page->lru.next;
+
+		VM_BUG_ON_PAGE(PageLRU(page), page);
+		VM_BUG_ON_PAGE(page_count(page), page);
+
+		pc = lookup_page_cgroup(page);
+		if (!PageCgroupUsed(pc))
+			continue;
+
+		/*
+		 * Nobody should be changing or seriously looking at
+		 * pc->mem_cgroup and pc->flags at this point, we have
+		 * fully exclusive access to the page.
+		 */
+
+		if (memcg != pc->mem_cgroup) {
+			if (memcg) {
+				uncharge_batch(memcg, pgpgout, nr_mem, nr_memsw,
+					       nr_anon, nr_file, nr_huge, page);
+				pgpgout = nr_mem = nr_memsw = 0;
+				nr_anon = nr_file = nr_huge = 0;
+			}
+			memcg = pc->mem_cgroup;
+		}
+
+		if (PageTransHuge(page)) {
+			nr_pages <<= compound_order(page);
+			VM_BUG_ON_PAGE(!PageTransHuge(page), page);
+			nr_huge += nr_pages;
+		}
+
+		if (PageAnon(page))
+			nr_anon += nr_pages;
+		else
+			nr_file += nr_pages;
+
+		if (pc->flags & PCG_MEM)
+			nr_mem += nr_pages;
+		if (pc->flags & PCG_MEMSW)
+			nr_memsw += nr_pages;
+		pc->flags = 0;
+
+		pgpgout++;
+	} while (next != page_list);
+
+	if (memcg)
+		uncharge_batch(memcg, pgpgout, nr_mem, nr_memsw,
+			       nr_anon, nr_file, nr_huge, page);
+}
+
 /**
  * mem_cgroup_uncharge - uncharge a page
  * @page: page to uncharge
@@ -6563,67 +6608,34 @@ void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg)
  */
 void mem_cgroup_uncharge(struct page *page)
 {
-	struct memcg_batch_info *batch;
-	unsigned int nr_pages = 1;
-	struct mem_cgroup *memcg;
 	struct page_cgroup *pc;
-	unsigned long pc_flags;
-	unsigned long flags;
-
-	VM_BUG_ON_PAGE(PageLRU(page), page);
-	VM_BUG_ON_PAGE(page_count(page), page);
 
 	if (mem_cgroup_disabled())
 		return;
 
+	/* Don't touch page->lru of any random page, pre-check: */
 	pc = lookup_page_cgroup(page);
-
-	/* Every final put_page() ends up here */
 	if (!PageCgroupUsed(pc))
 		return;
 
-	if (PageTransHuge(page)) {
-		nr_pages <<= compound_order(page);
-		VM_BUG_ON_PAGE(!PageTransHuge(page), page);
-	}
-	/*
-	 * Nobody should be changing or seriously looking at
-	 * pc->mem_cgroup and pc->flags at this point, we have fully
-	 * exclusive access to the page.
-	 */
-	memcg = pc->mem_cgroup;
-	pc_flags = pc->flags;
-	pc->flags = 0;
-
-	local_irq_save(flags);
+	INIT_LIST_HEAD(&page->lru);
+	uncharge_list(&page->lru);
+}
 
-	if (nr_pages > 1)
-		goto direct;
-	if (unlikely(test_thread_flag(TIF_MEMDIE)))
-		goto direct;
-	batch = &current->memcg_batch;
-	if (!batch->do_batch)
-		goto direct;
-	if (batch->memcg && batch->memcg != memcg)
-		goto direct;
-	if (!batch->memcg)
-		batch->memcg = memcg;
-	if (pc_flags & PCG_MEM)
-		batch->nr_pages++;
-	if (pc_flags & PCG_MEMSW)
-		batch->memsw_nr_pages++;
-	goto out;
-direct:
-	if (pc_flags & PCG_MEM)
-		res_counter_uncharge(&memcg->res, nr_pages * PAGE_SIZE);
-	if (pc_flags & PCG_MEMSW)
-		res_counter_uncharge(&memcg->memsw, nr_pages * PAGE_SIZE);
-	memcg_oom_recover(memcg);
-out:
-	mem_cgroup_charge_statistics(memcg, page, -nr_pages);
-	memcg_check_events(memcg, page);
+/**
+ * mem_cgroup_uncharge_list - uncharge a list of page
+ * @page_list: list of pages to uncharge
+ *
+ * Uncharge a list of pages previously charged with
+ * mem_cgroup_try_charge() and mem_cgroup_commit_charge().
+ */
+void mem_cgroup_uncharge_list(struct list_head *page_list)
+{
+	if (mem_cgroup_disabled())
+		return;
 
-	local_irq_restore(flags);
+	if (!list_empty(page_list))
+		uncharge_list(page_list);
 }
 
 /**
diff --git a/mm/swap.c b/mm/swap.c
index 00523fffa5ed..6b2dc3897cd5 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -908,8 +908,6 @@ void release_pages(struct page **pages, int nr, bool cold)
 	struct lruvec *lruvec;
 	unsigned long uninitialized_var(flags);
 
-	mem_cgroup_uncharge_start();
-
 	for (i = 0; i < nr; i++) {
 		struct page *page = pages[i];
 
@@ -941,7 +939,6 @@ void release_pages(struct page **pages, int nr, bool cold)
 			__ClearPageLRU(page);
 			del_page_from_lru_list(page, lruvec, page_off_lru(page));
 		}
-		mem_cgroup_uncharge(page);
 
 		/* Clear Active bit in case of parallel mark_page_accessed */
 		__ClearPageActive(page);
@@ -951,8 +948,7 @@ void release_pages(struct page **pages, int nr, bool cold)
 	if (zone)
 		spin_unlock_irqrestore(&zone->lru_lock, flags);
 
-	mem_cgroup_uncharge_end();
-
+	mem_cgroup_uncharge_list(&pages_to_free);
 	free_hot_cold_page_list(&pages_to_free, cold);
 }
 EXPORT_SYMBOL(release_pages);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 7068e838d22b..2836b5373b2e 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -822,7 +822,6 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 
 	cond_resched();
 
-	mem_cgroup_uncharge_start();
 	while (!list_empty(page_list)) {
 		struct address_space *mapping;
 		struct page *page;
@@ -1103,7 +1102,6 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 		 */
 		__clear_page_locked(page);
 free_it:
-		mem_cgroup_uncharge(page);
 		nr_reclaimed++;
 
 		/*
@@ -1133,8 +1131,8 @@ keep:
 		list_add(&page->lru, &ret_pages);
 		VM_BUG_ON_PAGE(PageLRU(page) || PageUnevictable(page), page);
 	}
-	mem_cgroup_uncharge_end();
 
+	mem_cgroup_uncharge_list(&free_pages);
 	free_hot_cold_page_list(&free_pages, true);
 
 	list_splice(&ret_pages, page_list);
@@ -1437,10 +1435,9 @@ putback_inactive_pages(struct lruvec *lruvec, struct list_head *page_list)
 			__ClearPageActive(page);
 			del_page_from_lru_list(page, lruvec, lru);
 
-			mem_cgroup_uncharge(page);
-
 			if (unlikely(PageCompound(page))) {
 				spin_unlock_irq(&zone->lru_lock);
+				mem_cgroup_uncharge(page);
 				(*get_compound_page_dtor(page))(page);
 				spin_lock_irq(&zone->lru_lock);
 			} else
@@ -1548,6 +1545,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
 
 	spin_unlock_irq(&zone->lru_lock);
 
+	mem_cgroup_uncharge_list(&page_list);
 	free_hot_cold_page_list(&page_list, true);
 
 	/*
@@ -1660,10 +1658,9 @@ static void move_active_pages_to_lru(struct lruvec *lruvec,
 			__ClearPageActive(page);
 			del_page_from_lru_list(page, lruvec, lru);
 
-			mem_cgroup_uncharge(page);
-
 			if (unlikely(PageCompound(page))) {
 				spin_unlock_irq(&zone->lru_lock);
+				mem_cgroup_uncharge(page);
 				(*get_compound_page_dtor(page))(page);
 				spin_lock_irq(&zone->lru_lock);
 			} else
@@ -1771,6 +1768,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
 	__mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken);
 	spin_unlock_irq(&zone->lru_lock);
 
+	mem_cgroup_uncharge_list(&l_hold);
 	free_hot_cold_page_list(&l_hold, true);
 }
 
-- 
cgit v1.2.3


From 434584fe68155f884b19f32b3befec8972c5d563 Mon Sep 17 00:00:00 2001
From: Vladimir Davydov <vdavydov@parallels.com>
Date: Fri, 8 Aug 2014 14:19:26 -0700
Subject: page-cgroup: trivial cleanup

Add forward declarations for struct pglist_data, mem_cgroup.

Remove __init, __meminit from function prototypes and inline functions.

Remove redundant inclusion of bit_spinlock.h.

Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page_cgroup.h | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index 9bfb8e68a595..b8f8c9e36a3e 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -12,8 +12,10 @@ enum {
 #ifndef __GENERATING_BOUNDS_H
 #include <generated/bounds.h>
 
+struct pglist_data;
+
 #ifdef CONFIG_MEMCG
-#include <linux/bit_spinlock.h>
+struct mem_cgroup;
 
 /*
  * Page Cgroup can be considered as an extended mem_map.
@@ -27,16 +29,16 @@ struct page_cgroup {
 	struct mem_cgroup *mem_cgroup;
 };
 
-void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat);
+extern void pgdat_page_cgroup_init(struct pglist_data *pgdat);
 
 #ifdef CONFIG_SPARSEMEM
-static inline void __init page_cgroup_init_flatmem(void)
+static inline void page_cgroup_init_flatmem(void)
 {
 }
-extern void __init page_cgroup_init(void);
+extern void page_cgroup_init(void);
 #else
-void __init page_cgroup_init_flatmem(void);
-static inline void __init page_cgroup_init(void)
+extern void page_cgroup_init_flatmem(void);
+static inline void page_cgroup_init(void)
 {
 }
 #endif
@@ -48,11 +50,10 @@ static inline int PageCgroupUsed(struct page_cgroup *pc)
 {
 	return !!(pc->flags & PCG_USED);
 }
-
-#else /* CONFIG_MEMCG */
+#else /* !CONFIG_MEMCG */
 struct page_cgroup;
 
-static inline void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat)
+static inline void pgdat_page_cgroup_init(struct pglist_data *pgdat)
 {
 }
 
@@ -65,10 +66,9 @@ static inline void page_cgroup_init(void)
 {
 }
 
-static inline void __init page_cgroup_init_flatmem(void)
+static inline void page_cgroup_init_flatmem(void)
 {
 }
-
 #endif /* CONFIG_MEMCG */
 
 #include <linux/swap.h>
-- 
cgit v1.2.3


From 9a3f4d85d58cb4e02e226f9be946d54c33eb715b Mon Sep 17 00:00:00 2001
From: Vladimir Davydov <vdavydov@parallels.com>
Date: Fri, 8 Aug 2014 14:19:28 -0700
Subject: page-cgroup: get rid of NR_PCG_FLAGS

It's not used anywhere today, so let's remove it.

Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page_cgroup.h | 6 ------
 kernel/bounds.c             | 2 --
 2 files changed, 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index b8f8c9e36a3e..9d9f540658f5 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -6,12 +6,8 @@ enum {
 	PCG_USED = 0x01,	/* This page is charged to a memcg */
 	PCG_MEM = 0x02,		/* This page holds a memory charge */
 	PCG_MEMSW = 0x04,	/* This page holds a memory+swap charge */
-	__NR_PCG_FLAGS,
 };
 
-#ifndef __GENERATING_BOUNDS_H
-#include <generated/bounds.h>
-
 struct pglist_data;
 
 #ifdef CONFIG_MEMCG
@@ -107,6 +103,4 @@ static inline void swap_cgroup_swapoff(int type)
 
 #endif /* CONFIG_MEMCG_SWAP */
 
-#endif /* !__GENERATING_BOUNDS_H */
-
 #endif /* __LINUX_PAGE_CGROUP_H */
diff --git a/kernel/bounds.c b/kernel/bounds.c
index 9fd4246b04b8..e1d1d1952bfa 100644
--- a/kernel/bounds.c
+++ b/kernel/bounds.c
@@ -9,7 +9,6 @@
 #include <linux/page-flags.h>
 #include <linux/mmzone.h>
 #include <linux/kbuild.h>
-#include <linux/page_cgroup.h>
 #include <linux/log2.h>
 #include <linux/spinlock_types.h>
 
@@ -18,7 +17,6 @@ void foo(void)
 	/* The enum constants to put into include/generated/bounds.h */
 	DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS);
 	DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES);
-	DEFINE(NR_PCG_FLAGS, __NR_PCG_FLAGS);
 #ifdef CONFIG_SMP
 	DEFINE(NR_CPUS_BITS, ilog2(CONFIG_NR_CPUS));
 #endif
-- 
cgit v1.2.3


From 3cbb01871e22709fdd39478eca831de317df332f Mon Sep 17 00:00:00 2001
From: Greg Thelen <gthelen@google.com>
Date: Fri, 8 Aug 2014 14:19:31 -0700
Subject: memcg: remove lookup_cgroup_page() prototype

Commit 6b208e3f6e35 ("mm: memcg: remove unused node/section info from
pc->flags") deleted lookup_cgroup_page() but left a prototype for it.

Kill the vestigial prototype.

Signed-off-by: Greg Thelen <gthelen@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page_cgroup.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index 9d9f540658f5..5c831f1eca79 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -40,7 +40,6 @@ static inline void page_cgroup_init(void)
 #endif
 
 struct page_cgroup *lookup_page_cgroup(struct page *page);
-struct page *lookup_cgroup_page(struct page_cgroup *pc);
 
 static inline int PageCgroupUsed(struct page_cgroup *pc)
 {
-- 
cgit v1.2.3


From ccf94f1b4a8560ffdc221840535bae5e5a91a53c Mon Sep 17 00:00:00 2001
From: Fabian Frederick <fabf@skynet.be>
Date: Fri, 8 Aug 2014 14:21:22 -0700
Subject: proc: constify seq_operations

proc_uid_seq_operations, proc_gid_seq_operations and
proc_projid_seq_operations are only called in proc_id_map_open with
seq_open as const struct seq_operations so we can constify the 3
structures and update proc_id_map_open prototype.

   text    data     bss     dec     hex filename
   6817     404    1984    9205    23f5 kernel/user_namespace.o-before
   6913     308    1984    9205    23f5 kernel/user_namespace.o-after

Signed-off-by: Fabian Frederick <fabf@skynet.be>
Cc: Joe Perches <joe@perches.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/base.c                 | 2 +-
 include/linux/user_namespace.h | 6 +++---
 kernel/user_namespace.c        | 6 +++---
 3 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 2d696b0c93bf..79df9ff71afd 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2449,7 +2449,7 @@ static int proc_tgid_io_accounting(struct task_struct *task, char *buffer)
 
 #ifdef CONFIG_USER_NS
 static int proc_id_map_open(struct inode *inode, struct file *file,
-	struct seq_operations *seq_ops)
+	const struct seq_operations *seq_ops)
 {
 	struct user_namespace *ns = NULL;
 	struct task_struct *task;
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 4836ba3c1cd8..e95372654f09 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -57,9 +57,9 @@ static inline void put_user_ns(struct user_namespace *ns)
 }
 
 struct seq_operations;
-extern struct seq_operations proc_uid_seq_operations;
-extern struct seq_operations proc_gid_seq_operations;
-extern struct seq_operations proc_projid_seq_operations;
+extern const struct seq_operations proc_uid_seq_operations;
+extern const struct seq_operations proc_gid_seq_operations;
+extern const struct seq_operations proc_projid_seq_operations;
 extern ssize_t proc_uid_map_write(struct file *, const char __user *, size_t, loff_t *);
 extern ssize_t proc_gid_map_write(struct file *, const char __user *, size_t, loff_t *);
 extern ssize_t proc_projid_map_write(struct file *, const char __user *, size_t, loff_t *);
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index fcc02560fd6b..aa312b0dc3ec 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -526,21 +526,21 @@ static void m_stop(struct seq_file *seq, void *v)
 	return;
 }
 
-struct seq_operations proc_uid_seq_operations = {
+const struct seq_operations proc_uid_seq_operations = {
 	.start = uid_m_start,
 	.stop = m_stop,
 	.next = m_next,
 	.show = uid_m_show,
 };
 
-struct seq_operations proc_gid_seq_operations = {
+const struct seq_operations proc_gid_seq_operations = {
 	.start = gid_m_start,
 	.stop = m_stop,
 	.next = m_next,
 	.show = gid_m_show,
 };
 
-struct seq_operations proc_projid_seq_operations = {
+const struct seq_operations proc_projid_seq_operations = {
 	.start = projid_m_start,
 	.stop = m_stop,
 	.next = m_next,
-- 
cgit v1.2.3


From 41f727fde1fe40efeb4fef6fdce74ff794be5aeb Mon Sep 17 00:00:00 2001
From: Vladimir Davydov <vdavydov@parallels.com>
Date: Fri, 8 Aug 2014 14:21:56 -0700
Subject: fork/exec: cleanup mm initialization

mm initialization on fork/exec is spread all over the place, which makes
the code look inconsistent.

We have mm_init(), which is supposed to init/nullify mm's internals, but
it doesn't init all the fields it should:

 - on fork ->mmap,mm_rb,vmacache_seqnum,map_count,mm_cpumask,locked_vm
   are zeroed in dup_mmap();

 - on fork ->pmd_huge_pte is zeroed in dup_mm(), immediately before
   calling mm_init();

 - ->cpu_vm_mask_var ptr is initialized by mm_init_cpumask(), which is
   called before mm_init() on both fork and exec;

 - ->context is initialized by init_new_context(), which is called after
   mm_init() on both fork and exec;

Let's consolidate all the initializations in mm_init() to make the code
look cleaner.

Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Christoph Lameter <cl@linux.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c                |  4 ----
 include/linux/mm_types.h |  1 +
 kernel/fork.c            | 47 ++++++++++++++++++++---------------------------
 3 files changed, 21 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/fs/exec.c b/fs/exec.c
index ab1f1200ce5d..a2b42a98c743 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -368,10 +368,6 @@ static int bprm_mm_init(struct linux_binprm *bprm)
 	if (!mm)
 		goto err;
 
-	err = init_new_context(current, mm);
-	if (err)
-		goto err;
-
 	err = __bprm_mm_init(bprm);
 	if (err)
 		goto err;
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 796deac19fcf..6e0b286649f1 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -461,6 +461,7 @@ static inline void mm_init_cpumask(struct mm_struct *mm)
 #ifdef CONFIG_CPUMASK_OFFSTACK
 	mm->cpu_vm_mask_var = &mm->cpumask_allocation;
 #endif
+	cpumask_clear(mm->cpu_vm_mask_var);
 }
 
 /* Future-safe accessor for struct mm_struct's cpu_vm_mask. */
diff --git a/kernel/fork.c b/kernel/fork.c
index f6f5086c9e7d..418b52a9ec6a 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -374,12 +374,6 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
 	 */
 	down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING);
 
-	mm->locked_vm = 0;
-	mm->mmap = NULL;
-	mm->vmacache_seqnum = 0;
-	mm->map_count = 0;
-	cpumask_clear(mm_cpumask(mm));
-	mm->mm_rb = RB_ROOT;
 	rb_link = &mm->mm_rb.rb_node;
 	rb_parent = NULL;
 	pprev = &mm->mmap;
@@ -538,17 +532,27 @@ static void mm_init_aio(struct mm_struct *mm)
 
 static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
 {
+	mm->mmap = NULL;
+	mm->mm_rb = RB_ROOT;
+	mm->vmacache_seqnum = 0;
 	atomic_set(&mm->mm_users, 1);
 	atomic_set(&mm->mm_count, 1);
 	init_rwsem(&mm->mmap_sem);
 	INIT_LIST_HEAD(&mm->mmlist);
 	mm->core_state = NULL;
 	atomic_long_set(&mm->nr_ptes, 0);
+	mm->map_count = 0;
+	mm->locked_vm = 0;
 	memset(&mm->rss_stat, 0, sizeof(mm->rss_stat));
 	spin_lock_init(&mm->page_table_lock);
+	mm_init_cpumask(mm);
 	mm_init_aio(mm);
 	mm_init_owner(mm, p);
+	mmu_notifier_mm_init(mm);
 	clear_tlb_flush_pending(mm);
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
+	mm->pmd_huge_pte = NULL;
+#endif
 
 	if (current->mm) {
 		mm->flags = current->mm->flags & MMF_INIT_MASK;
@@ -558,11 +562,17 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
 		mm->def_flags = 0;
 	}
 
-	if (likely(!mm_alloc_pgd(mm))) {
-		mmu_notifier_mm_init(mm);
-		return mm;
-	}
+	if (mm_alloc_pgd(mm))
+		goto fail_nopgd;
+
+	if (init_new_context(p, mm))
+		goto fail_nocontext;
 
+	return mm;
+
+fail_nocontext:
+	mm_free_pgd(mm);
+fail_nopgd:
 	free_mm(mm);
 	return NULL;
 }
@@ -596,7 +606,6 @@ struct mm_struct *mm_alloc(void)
 		return NULL;
 
 	memset(mm, 0, sizeof(*mm));
-	mm_init_cpumask(mm);
 	return mm_init(mm, current);
 }
 
@@ -828,17 +837,10 @@ static struct mm_struct *dup_mm(struct task_struct *tsk)
 		goto fail_nomem;
 
 	memcpy(mm, oldmm, sizeof(*mm));
-	mm_init_cpumask(mm);
 
-#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
-	mm->pmd_huge_pte = NULL;
-#endif
 	if (!mm_init(mm, tsk))
 		goto fail_nomem;
 
-	if (init_new_context(tsk, mm))
-		goto fail_nocontext;
-
 	dup_mm_exe_file(oldmm, mm);
 
 	err = dup_mmap(mm, oldmm);
@@ -860,15 +862,6 @@ free_pt:
 
 fail_nomem:
 	return NULL;
-
-fail_nocontext:
-	/*
-	 * If init_new_context() failed, we cannot use mmput() to free the mm
-	 * because it calls destroy_context()
-	 */
-	mm_free_pgd(mm);
-	free_mm(mm);
-	return NULL;
 }
 
 static int copy_mm(unsigned long clone_flags, struct task_struct *tsk)
-- 
cgit v1.2.3


From 33144e8429bd7fceacbb869a7f5061db42e13fe6 Mon Sep 17 00:00:00 2001
From: Vladimir Davydov <vdavydov@parallels.com>
Date: Fri, 8 Aug 2014 14:22:03 -0700
Subject: kernel/fork.c: make mm_init_owner static

It's only used in fork.c:mm_init().

Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h |  5 -----
 kernel/fork.c         | 14 +++++++-------
 2 files changed, 7 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4fcf82a4d243..b21e9218c0fd 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2961,15 +2961,10 @@ static inline void inc_syscw(struct task_struct *tsk)
 
 #ifdef CONFIG_MEMCG
 extern void mm_update_next_owner(struct mm_struct *mm);
-extern void mm_init_owner(struct mm_struct *mm, struct task_struct *p);
 #else
 static inline void mm_update_next_owner(struct mm_struct *mm)
 {
 }
-
-static inline void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
-{
-}
 #endif /* CONFIG_MEMCG */
 
 static inline unsigned long task_rlimit(const struct task_struct *tsk,
diff --git a/kernel/fork.c b/kernel/fork.c
index aff84f84b0d3..86da59e165ad 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -535,6 +535,13 @@ static void mm_init_aio(struct mm_struct *mm)
 #endif
 }
 
+static void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
+{
+#ifdef CONFIG_MEMCG
+	mm->owner = p;
+#endif
+}
+
 static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
 {
 	mm->mmap = NULL;
@@ -1139,13 +1146,6 @@ static void rt_mutex_init_task(struct task_struct *p)
 #endif
 }
 
-#ifdef CONFIG_MEMCG
-void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
-{
-	mm->owner = p;
-}
-#endif /* CONFIG_MEMCG */
-
 /*
  * Initialize POSIX timer handling for a single task.
  */
-- 
cgit v1.2.3


From 4aff1ce7add1c432fe5ea3ae0231155f33e5ef38 Mon Sep 17 00:00:00 2001
From: Alexandre Bounine <alexandre.bounine@idt.com>
Date: Fri, 8 Aug 2014 14:22:09 -0700
Subject: rapidio: add new RapidIO DMA interface routines

Add RapidIO DMA interface routines that directly use reference to the mport
device object and/or target device destination ID as parameters.
This allows to perform RapidIO DMA transfer requests by modules that do not
have an access to the RapidIO device list.

Signed-off-by: Alexandre Bounine <alexandre.bounine@idt.com>
Cc: Matt Porter <mporter@kernel.crashing.org>
Cc: Andre van Herk <andre.van.herk@prodrive-technologies.com>
Cc: Stef van Os <stef.van.os@prodrive-technologies.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rapidio/rio.c   | 66 +++++++++++++++++++++++++++++++++++--------------
 include/linux/rio_drv.h |  5 ++++
 2 files changed, 52 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c
index a54ba0494dd3..d7b87c64b7cd 100644
--- a/drivers/rapidio/rio.c
+++ b/drivers/rapidio/rio.c
@@ -1509,30 +1509,39 @@ EXPORT_SYMBOL_GPL(rio_route_clr_table);
 
 static bool rio_chan_filter(struct dma_chan *chan, void *arg)
 {
-	struct rio_dev *rdev = arg;
+	struct rio_mport *mport = arg;
 
 	/* Check that DMA device belongs to the right MPORT */
-	return (rdev->net->hport ==
-		container_of(chan->device, struct rio_mport, dma));
+	return mport == container_of(chan->device, struct rio_mport, dma);
 }
 
 /**
- * rio_request_dma - request RapidIO capable DMA channel that supports
- *   specified target RapidIO device.
- * @rdev: RIO device control structure
+ * rio_request_mport_dma - request RapidIO capable DMA channel associated
+ *   with specified local RapidIO mport device.
+ * @mport: RIO mport to perform DMA data transfers
  *
  * Returns pointer to allocated DMA channel or NULL if failed.
  */
-struct dma_chan *rio_request_dma(struct rio_dev *rdev)
+struct dma_chan *rio_request_mport_dma(struct rio_mport *mport)
 {
 	dma_cap_mask_t mask;
-	struct dma_chan *dchan;
 
 	dma_cap_zero(mask);
 	dma_cap_set(DMA_SLAVE, mask);
-	dchan = dma_request_channel(mask, rio_chan_filter, rdev);
+	return dma_request_channel(mask, rio_chan_filter, mport);
+}
+EXPORT_SYMBOL_GPL(rio_request_mport_dma);
 
-	return dchan;
+/**
+ * rio_request_dma - request RapidIO capable DMA channel that supports
+ *   specified target RapidIO device.
+ * @rdev: RIO device associated with DMA transfer
+ *
+ * Returns pointer to allocated DMA channel or NULL if failed.
+ */
+struct dma_chan *rio_request_dma(struct rio_dev *rdev)
+{
+	return rio_request_mport_dma(rdev->net->hport);
 }
 EXPORT_SYMBOL_GPL(rio_request_dma);
 
@@ -1547,10 +1556,10 @@ void rio_release_dma(struct dma_chan *dchan)
 EXPORT_SYMBOL_GPL(rio_release_dma);
 
 /**
- * rio_dma_prep_slave_sg - RapidIO specific wrapper
+ * rio_dma_prep_xfer - RapidIO specific wrapper
  *   for device_prep_slave_sg callback defined by DMAENGINE.
- * @rdev: RIO device control structure
  * @dchan: DMA channel to configure
+ * @destid: target RapidIO device destination ID
  * @data: RIO specific data descriptor
  * @direction: DMA data transfer direction (TO or FROM the device)
  * @flags: dmaengine defined flags
@@ -1560,11 +1569,10 @@ EXPORT_SYMBOL_GPL(rio_release_dma);
  * target RIO device.
  * Returns pointer to DMA transaction descriptor or NULL if failed.
  */
-struct dma_async_tx_descriptor *rio_dma_prep_slave_sg(struct rio_dev *rdev,
-	struct dma_chan *dchan, struct rio_dma_data *data,
+struct dma_async_tx_descriptor *rio_dma_prep_xfer(struct dma_chan *dchan,
+	u16 destid, struct rio_dma_data *data,
 	enum dma_transfer_direction direction, unsigned long flags)
 {
-	struct dma_async_tx_descriptor *txd = NULL;
 	struct rio_dma_ext rio_ext;
 
 	if (dchan->device->device_prep_slave_sg == NULL) {
@@ -1572,15 +1580,35 @@ struct dma_async_tx_descriptor *rio_dma_prep_slave_sg(struct rio_dev *rdev,
 		return NULL;
 	}
 
-	rio_ext.destid = rdev->destid;
+	rio_ext.destid = destid;
 	rio_ext.rio_addr_u = data->rio_addr_u;
 	rio_ext.rio_addr = data->rio_addr;
 	rio_ext.wr_type = data->wr_type;
 
-	txd = dmaengine_prep_rio_sg(dchan, data->sg, data->sg_len,
-					direction, flags, &rio_ext);
+	return dmaengine_prep_rio_sg(dchan, data->sg, data->sg_len,
+				     direction, flags, &rio_ext);
+}
+EXPORT_SYMBOL_GPL(rio_dma_prep_xfer);
 
-	return txd;
+/**
+ * rio_dma_prep_slave_sg - RapidIO specific wrapper
+ *   for device_prep_slave_sg callback defined by DMAENGINE.
+ * @rdev: RIO device control structure
+ * @dchan: DMA channel to configure
+ * @data: RIO specific data descriptor
+ * @direction: DMA data transfer direction (TO or FROM the device)
+ * @flags: dmaengine defined flags
+ *
+ * Initializes RapidIO capable DMA channel for the specified data transfer.
+ * Uses DMA channel private extension to pass information related to remote
+ * target RIO device.
+ * Returns pointer to DMA transaction descriptor or NULL if failed.
+ */
+struct dma_async_tx_descriptor *rio_dma_prep_slave_sg(struct rio_dev *rdev,
+	struct dma_chan *dchan, struct rio_dma_data *data,
+	enum dma_transfer_direction direction, unsigned long flags)
+{
+	return rio_dma_prep_xfer(dchan,	rdev->destid, data, direction, flags);
 }
 EXPORT_SYMBOL_GPL(rio_dma_prep_slave_sg);
 
diff --git a/include/linux/rio_drv.h b/include/linux/rio_drv.h
index 5059994fe297..9fc2f213e74f 100644
--- a/include/linux/rio_drv.h
+++ b/include/linux/rio_drv.h
@@ -384,11 +384,16 @@ void rio_dev_put(struct rio_dev *);
 
 #ifdef CONFIG_RAPIDIO_DMA_ENGINE
 extern struct dma_chan *rio_request_dma(struct rio_dev *rdev);
+extern struct dma_chan *rio_request_mport_dma(struct rio_mport *mport);
 extern void rio_release_dma(struct dma_chan *dchan);
 extern struct dma_async_tx_descriptor *rio_dma_prep_slave_sg(
 		struct rio_dev *rdev, struct dma_chan *dchan,
 		struct rio_dma_data *data,
 		enum dma_transfer_direction direction, unsigned long flags);
+extern struct dma_async_tx_descriptor *rio_dma_prep_xfer(
+		struct dma_chan *dchan,	u16 destid,
+		struct rio_dma_data *data,
+		enum dma_transfer_direction direction, unsigned long flags);
 #endif
 
 /**
-- 
cgit v1.2.3


From e5eea0981a3840f3f39f43d2d00461c4c24018e7 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Fri, 8 Aug 2014 14:22:16 -0700
Subject: sysctl: remove typedef ctl_table

Remove the final user, and the typedef itself.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/proc_sysctl.c  | 2 +-
 include/linux/sysctl.h | 2 --
 2 files changed, 1 insertion(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 71290463a1d3..f92d5dd578a4 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -632,7 +632,7 @@ out:
 	return ret;
 }
 
-static int scan(struct ctl_table_header *head, ctl_table *table,
+static int scan(struct ctl_table_header *head, struct ctl_table *table,
 		unsigned long *pos, struct file *file,
 		struct dir_context *ctx)
 {
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 14a8ff2de11e..b7361f831226 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -34,8 +34,6 @@ struct ctl_table_root;
 struct ctl_table_header;
 struct ctl_dir;
 
-typedef struct ctl_table ctl_table;
-
 typedef int proc_handler (struct ctl_table *ctl, int write,
 			  void __user *buffer, size_t *lenp, loff_t *ppos);
 
-- 
cgit v1.2.3


From 69361eef9056b0babb507798c2135ad1572f0ef7 Mon Sep 17 00:00:00 2001
From: Josh Hunt <johunt@akamai.com>
Date: Fri, 8 Aug 2014 14:22:31 -0700
Subject: panic: add TAINT_SOFTLOCKUP

This taint flag will be set if the system has ever entered a softlockup
state.  Similar to TAINT_WARN it is useful to know whether or not the
system has been in a softlockup state when debugging.

[akpm@linux-foundation.org: apply the taint before calling panic()]
Signed-off-by: Josh Hunt <johunt@akamai.com>
Cc: Jason Baron <jbaron@akamai.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/oops-tracing.txt  | 2 ++
 Documentation/sysctl/kernel.txt | 1 +
 include/linux/kernel.h          | 1 +
 kernel/panic.c                  | 1 +
 kernel/watchdog.c               | 1 +
 5 files changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/oops-tracing.txt b/Documentation/oops-tracing.txt
index e3155995ddd8..beefb9f82902 100644
--- a/Documentation/oops-tracing.txt
+++ b/Documentation/oops-tracing.txt
@@ -268,6 +268,8 @@ characters, each representing a particular tainted value.
  14: 'E' if an unsigned module has been loaded in a kernel supporting
      module signature.
 
+ 15: 'L' if a soft lockup has previously occurred on the system.
+
 The primary reason for the 'Tainted: ' string is to tell kernel
 debuggers if this is a clean kernel or if anything unusual has
 occurred.  Tainting is permanent: even if an offending module is
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index c14374e71775..f79eb9666379 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -826,6 +826,7 @@ can be ORed together:
 4096 - An out-of-tree module has been loaded.
 8192 - An unsigned module has been loaded in a kernel supporting module
        signature.
+16384 - A soft lockup has previously occurred on the system.
 
 ==============================================================
 
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 3dc22abbc68a..31ae66f34235 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -470,6 +470,7 @@ extern enum system_states {
 #define TAINT_FIRMWARE_WORKAROUND	11
 #define TAINT_OOT_MODULE		12
 #define TAINT_UNSIGNED_MODULE		13
+#define TAINT_SOFTLOCKUP		14
 
 extern const char hex_asc[];
 #define hex_asc_lo(x)	hex_asc[((x) & 0x0f)]
diff --git a/kernel/panic.c b/kernel/panic.c
index 62e16cef9cc2..d09dc5c32c67 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -224,6 +224,7 @@ static const struct tnt tnts[] = {
 	{ TAINT_FIRMWARE_WORKAROUND,	'I', ' ' },
 	{ TAINT_OOT_MODULE,		'O', ' ' },
 	{ TAINT_UNSIGNED_MODULE,	'E', ' ' },
+	{ TAINT_SOFTLOCKUP,		'L', ' ' },
 };
 
 /**
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 51b29e9d2ba6..a8d6914030fe 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -368,6 +368,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 			smp_mb__after_atomic();
 		}
 
+		add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
 		if (softlockup_panic)
 			panic("softlockup: hung tasks");
 		__this_cpu_write(soft_watchdog_warn, true);
-- 
cgit v1.2.3


From d97b07c54f34e88352ebe676beb798c8f59ac588 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Fri, 8 Aug 2014 14:23:14 -0700
Subject: initramfs: support initramfs that is bigger than 2GiB

Now with 64bit bzImage and kexec tools, we support ramdisk that size is
bigger than 2g, as we could put it above 4G.

Found compressed initramfs image could not be decompressed properly.  It
turns out that image length is int during decompress detection, and it
will become < 0 when length is more than 2G.  Furthermore, during
decompressing len as int is used for inbuf count, that has problem too.

Change len to long, that should be ok as on 32 bit platform long is
32bits.

Tested with following compressed initramfs image as root with kexec.
	gzip, bzip2, xz, lzma, lzop, lz4.
run time for populate_rootfs():
   size        name       Nehalem-EX  Westmere-EX  Ivybridge-EX
 9034400256 root_img     :   26s           24s          30s
 3561095057 root_img.lz4 :   28s           27s          27s
 3459554629 root_img.lzo :   29s           29s          28s
 3219399480 root_img.gz  :   64s           62s          49s
 2251594592 root_img.xz  :  262s          260s         183s
 2226366598 root_img.lzma:  386s          376s         277s
 2901482513 root_img.bz2 :  635s          599s

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Rashika Kheria <rashika.kheria@gmail.com>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Kyungsik Lee <kyungsik.lee@lge.com>
Cc: P J P <ppandit@redhat.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
Cc: "Daniel M. Weeks" <dan@danweeks.net>
Cc: Alexandre Courbot <acourbot@nvidia.com>
Cc: Jan Beulich <JBeulich@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 crypto/zlib.c                      |  8 ++++----
 fs/isofs/compress.c                |  4 ++--
 fs/jffs2/compr_zlib.c              |  7 ++++---
 include/linux/decompress/bunzip2.h |  8 ++++----
 include/linux/decompress/generic.h | 10 +++++-----
 include/linux/decompress/inflate.h |  8 ++++----
 include/linux/decompress/unlz4.h   |  8 ++++----
 include/linux/decompress/unlzma.h  |  8 ++++----
 include/linux/decompress/unlzo.h   |  8 ++++----
 include/linux/decompress/unxz.h    |  8 ++++----
 include/linux/zlib.h               |  4 ++--
 init/do_mounts_rd.c                | 10 +++++-----
 init/initramfs.c                   | 22 +++++++++++-----------
 lib/decompress.c                   |  2 +-
 lib/decompress_bunzip2.c           | 26 +++++++++++++-------------
 lib/decompress_inflate.c           | 12 ++++++------
 lib/decompress_unlz4.c             | 18 +++++++++---------
 lib/decompress_unlzma.c            | 28 ++++++++++++++--------------
 lib/decompress_unlzo.c             | 12 ++++++------
 lib/decompress_unxz.c              | 10 +++++-----
 20 files changed, 111 insertions(+), 110 deletions(-)

(limited to 'include/linux')

diff --git a/crypto/zlib.c b/crypto/zlib.c
index 06b62e5cdcc7..c9ee681d57fd 100644
--- a/crypto/zlib.c
+++ b/crypto/zlib.c
@@ -168,7 +168,7 @@ static int zlib_compress_update(struct crypto_pcomp *tfm,
 	}
 
 	ret = req->avail_out - stream->avail_out;
-	pr_debug("avail_in %u, avail_out %u (consumed %u, produced %u)\n",
+	pr_debug("avail_in %lu, avail_out %lu (consumed %lu, produced %u)\n",
 		 stream->avail_in, stream->avail_out,
 		 req->avail_in - stream->avail_in, ret);
 	req->next_in = stream->next_in;
@@ -198,7 +198,7 @@ static int zlib_compress_final(struct crypto_pcomp *tfm,
 	}
 
 	ret = req->avail_out - stream->avail_out;
-	pr_debug("avail_in %u, avail_out %u (consumed %u, produced %u)\n",
+	pr_debug("avail_in %lu, avail_out %lu (consumed %lu, produced %u)\n",
 		 stream->avail_in, stream->avail_out,
 		 req->avail_in - stream->avail_in, ret);
 	req->next_in = stream->next_in;
@@ -283,7 +283,7 @@ static int zlib_decompress_update(struct crypto_pcomp *tfm,
 	}
 
 	ret = req->avail_out - stream->avail_out;
-	pr_debug("avail_in %u, avail_out %u (consumed %u, produced %u)\n",
+	pr_debug("avail_in %lu, avail_out %lu (consumed %lu, produced %u)\n",
 		 stream->avail_in, stream->avail_out,
 		 req->avail_in - stream->avail_in, ret);
 	req->next_in = stream->next_in;
@@ -331,7 +331,7 @@ static int zlib_decompress_final(struct crypto_pcomp *tfm,
 	}
 
 	ret = req->avail_out - stream->avail_out;
-	pr_debug("avail_in %u, avail_out %u (consumed %u, produced %u)\n",
+	pr_debug("avail_in %lu, avail_out %lu (consumed %lu, produced %u)\n",
 		 stream->avail_in, stream->avail_out,
 		 req->avail_in - stream->avail_in, ret);
 	req->next_in = stream->next_in;
diff --git a/fs/isofs/compress.c b/fs/isofs/compress.c
index 592e5115a561..f311bf084015 100644
--- a/fs/isofs/compress.c
+++ b/fs/isofs/compress.c
@@ -158,8 +158,8 @@ static loff_t zisofs_uncompress_block(struct inode *inode, loff_t block_start,
 					       "zisofs: zisofs_inflate returned"
 					       " %d, inode = %lu,"
 					       " page idx = %d, bh idx = %d,"
-					       " avail_in = %d,"
-					       " avail_out = %d\n",
+					       " avail_in = %ld,"
+					       " avail_out = %ld\n",
 					       zerr, inode->i_ino, curpage,
 					       curbh, stream.avail_in,
 					       stream.avail_out);
diff --git a/fs/jffs2/compr_zlib.c b/fs/jffs2/compr_zlib.c
index 0b9a1e44e833..5698dae5d92d 100644
--- a/fs/jffs2/compr_zlib.c
+++ b/fs/jffs2/compr_zlib.c
@@ -94,11 +94,12 @@ static int jffs2_zlib_compress(unsigned char *data_in,
 
 	while (def_strm.total_out < *dstlen - STREAM_END_SPACE && def_strm.total_in < *sourcelen) {
 		def_strm.avail_out = *dstlen - (def_strm.total_out + STREAM_END_SPACE);
-		def_strm.avail_in = min((unsigned)(*sourcelen-def_strm.total_in), def_strm.avail_out);
-		jffs2_dbg(1, "calling deflate with avail_in %d, avail_out %d\n",
+		def_strm.avail_in = min_t(unsigned long,
+			(*sourcelen-def_strm.total_in), def_strm.avail_out);
+		jffs2_dbg(1, "calling deflate with avail_in %ld, avail_out %ld\n",
 			  def_strm.avail_in, def_strm.avail_out);
 		ret = zlib_deflate(&def_strm, Z_PARTIAL_FLUSH);
-		jffs2_dbg(1, "deflate returned with avail_in %d, avail_out %d, total_in %ld, total_out %ld\n",
+		jffs2_dbg(1, "deflate returned with avail_in %ld, avail_out %ld, total_in %ld, total_out %ld\n",
 			  def_strm.avail_in, def_strm.avail_out,
 			  def_strm.total_in, def_strm.total_out);
 		if (ret != Z_OK) {
diff --git a/include/linux/decompress/bunzip2.h b/include/linux/decompress/bunzip2.h
index 115272137a9c..4d683df898e6 100644
--- a/include/linux/decompress/bunzip2.h
+++ b/include/linux/decompress/bunzip2.h
@@ -1,10 +1,10 @@
 #ifndef DECOMPRESS_BUNZIP2_H
 #define DECOMPRESS_BUNZIP2_H
 
-int bunzip2(unsigned char *inbuf, int len,
-	    int(*fill)(void*, unsigned int),
-	    int(*flush)(void*, unsigned int),
+int bunzip2(unsigned char *inbuf, long len,
+	    long (*fill)(void*, unsigned long),
+	    long (*flush)(void*, unsigned long),
 	    unsigned char *output,
-	    int *pos,
+	    long *pos,
 	    void(*error)(char *x));
 #endif
diff --git a/include/linux/decompress/generic.h b/include/linux/decompress/generic.h
index 0c7111a55a1a..1fcfd64b5076 100644
--- a/include/linux/decompress/generic.h
+++ b/include/linux/decompress/generic.h
@@ -1,11 +1,11 @@
 #ifndef DECOMPRESS_GENERIC_H
 #define DECOMPRESS_GENERIC_H
 
-typedef int (*decompress_fn) (unsigned char *inbuf, int len,
-			      int(*fill)(void*, unsigned int),
-			      int(*flush)(void*, unsigned int),
+typedef int (*decompress_fn) (unsigned char *inbuf, long len,
+			      long (*fill)(void*, unsigned long),
+			      long (*flush)(void*, unsigned long),
 			      unsigned char *outbuf,
-			      int *posp,
+			      long *posp,
 			      void(*error)(char *x));
 
 /* inbuf   - input buffer
@@ -33,7 +33,7 @@ typedef int (*decompress_fn) (unsigned char *inbuf, int len,
 
 
 /* Utility routine to detect the decompression method */
-decompress_fn decompress_method(const unsigned char *inbuf, int len,
+decompress_fn decompress_method(const unsigned char *inbuf, long len,
 				const char **name);
 
 #endif
diff --git a/include/linux/decompress/inflate.h b/include/linux/decompress/inflate.h
index 1d0aedef9822..e4f411fdbd24 100644
--- a/include/linux/decompress/inflate.h
+++ b/include/linux/decompress/inflate.h
@@ -1,10 +1,10 @@
 #ifndef LINUX_DECOMPRESS_INFLATE_H
 #define LINUX_DECOMPRESS_INFLATE_H
 
-int gunzip(unsigned char *inbuf, int len,
-	   int(*fill)(void*, unsigned int),
-	   int(*flush)(void*, unsigned int),
+int gunzip(unsigned char *inbuf, long len,
+	   long (*fill)(void*, unsigned long),
+	   long (*flush)(void*, unsigned long),
 	   unsigned char *output,
-	   int *pos,
+	   long *pos,
 	   void(*error_fn)(char *x));
 #endif
diff --git a/include/linux/decompress/unlz4.h b/include/linux/decompress/unlz4.h
index d5b68bf3ec92..3273c2f36496 100644
--- a/include/linux/decompress/unlz4.h
+++ b/include/linux/decompress/unlz4.h
@@ -1,10 +1,10 @@
 #ifndef DECOMPRESS_UNLZ4_H
 #define DECOMPRESS_UNLZ4_H
 
-int unlz4(unsigned char *inbuf, int len,
-	int(*fill)(void*, unsigned int),
-	int(*flush)(void*, unsigned int),
+int unlz4(unsigned char *inbuf, long len,
+	long (*fill)(void*, unsigned long),
+	long (*flush)(void*, unsigned long),
 	unsigned char *output,
-	int *pos,
+	long *pos,
 	void(*error)(char *x));
 #endif
diff --git a/include/linux/decompress/unlzma.h b/include/linux/decompress/unlzma.h
index 7796538f1bf4..8a891a193840 100644
--- a/include/linux/decompress/unlzma.h
+++ b/include/linux/decompress/unlzma.h
@@ -1,11 +1,11 @@
 #ifndef DECOMPRESS_UNLZMA_H
 #define DECOMPRESS_UNLZMA_H
 
-int unlzma(unsigned char *, int,
-	   int(*fill)(void*, unsigned int),
-	   int(*flush)(void*, unsigned int),
+int unlzma(unsigned char *, long,
+	   long (*fill)(void*, unsigned long),
+	   long (*flush)(void*, unsigned long),
 	   unsigned char *output,
-	   int *posp,
+	   long *posp,
 	   void(*error)(char *x)
 	);
 
diff --git a/include/linux/decompress/unlzo.h b/include/linux/decompress/unlzo.h
index 987229752519..af18f95d6570 100644
--- a/include/linux/decompress/unlzo.h
+++ b/include/linux/decompress/unlzo.h
@@ -1,10 +1,10 @@
 #ifndef DECOMPRESS_UNLZO_H
 #define DECOMPRESS_UNLZO_H
 
-int unlzo(unsigned char *inbuf, int len,
-	int(*fill)(void*, unsigned int),
-	int(*flush)(void*, unsigned int),
+int unlzo(unsigned char *inbuf, long len,
+	long (*fill)(void*, unsigned long),
+	long (*flush)(void*, unsigned long),
 	unsigned char *output,
-	int *pos,
+	long *pos,
 	void(*error)(char *x));
 #endif
diff --git a/include/linux/decompress/unxz.h b/include/linux/decompress/unxz.h
index 41728fc6c8a1..f764e2a7201e 100644
--- a/include/linux/decompress/unxz.h
+++ b/include/linux/decompress/unxz.h
@@ -10,10 +10,10 @@
 #ifndef DECOMPRESS_UNXZ_H
 #define DECOMPRESS_UNXZ_H
 
-int unxz(unsigned char *in, int in_size,
-	 int (*fill)(void *dest, unsigned int size),
-	 int (*flush)(void *src, unsigned int size),
-	 unsigned char *out, int *in_used,
+int unxz(unsigned char *in, long in_size,
+	 long (*fill)(void *dest, unsigned long size),
+	 long (*flush)(void *src, unsigned long size),
+	 unsigned char *out, long *in_used,
 	 void (*error)(char *x));
 
 #endif
diff --git a/include/linux/zlib.h b/include/linux/zlib.h
index 197abb2a54c5..92dbbd3f6c75 100644
--- a/include/linux/zlib.h
+++ b/include/linux/zlib.h
@@ -83,11 +83,11 @@ struct internal_state;
 
 typedef struct z_stream_s {
     const Byte *next_in;   /* next input byte */
-    uInt     avail_in;  /* number of bytes available at next_in */
+	uLong avail_in;  /* number of bytes available at next_in */
     uLong    total_in;  /* total nb of input bytes read so far */
 
     Byte    *next_out;  /* next output byte should be put there */
-    uInt     avail_out; /* remaining free space at next_out */
+	uLong avail_out; /* remaining free space at next_out */
     uLong    total_out; /* total nb of bytes output so far */
 
     char     *msg;      /* last error message, NULL if no error */
diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c
index a8227022e3a0..e5d059e8aa11 100644
--- a/init/do_mounts_rd.c
+++ b/init/do_mounts_rd.c
@@ -311,9 +311,9 @@ static int exit_code;
 static int decompress_error;
 static int crd_infd, crd_outfd;
 
-static int __init compr_fill(void *buf, unsigned int len)
+static long __init compr_fill(void *buf, unsigned long len)
 {
-	int r = sys_read(crd_infd, buf, len);
+	long r = sys_read(crd_infd, buf, len);
 	if (r < 0)
 		printk(KERN_ERR "RAMDISK: error while reading compressed data");
 	else if (r == 0)
@@ -321,13 +321,13 @@ static int __init compr_fill(void *buf, unsigned int len)
 	return r;
 }
 
-static int __init compr_flush(void *window, unsigned int outcnt)
+static long __init compr_flush(void *window, unsigned long outcnt)
 {
-	int written = sys_write(crd_outfd, window, outcnt);
+	long written = sys_write(crd_outfd, window, outcnt);
 	if (written != outcnt) {
 		if (decompress_error == 0)
 			printk(KERN_ERR
-			       "RAMDISK: incomplete write (%d != %d)\n",
+			       "RAMDISK: incomplete write (%ld != %ld)\n",
 			       written, outcnt);
 		decompress_error = 1;
 		return -1;
diff --git a/init/initramfs.c b/init/initramfs.c
index 4f276b6a167b..a7566031242e 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -197,7 +197,7 @@ static __initdata enum state {
 } state, next_state;
 
 static __initdata char *victim;
-static __initdata unsigned count;
+static unsigned long count __initdata;
 static __initdata loff_t this_header, next_header;
 
 static inline void __init eat(unsigned n)
@@ -209,7 +209,7 @@ static inline void __init eat(unsigned n)
 
 static __initdata char *vcollected;
 static __initdata char *collected;
-static __initdata int remains;
+static long remains __initdata;
 static __initdata char *collect;
 
 static void __init read_into(char *buf, unsigned size, enum state next)
@@ -236,7 +236,7 @@ static int __init do_start(void)
 
 static int __init do_collect(void)
 {
-	unsigned n = remains;
+	unsigned long n = remains;
 	if (count < n)
 		n = count;
 	memcpy(collect, victim, n);
@@ -407,7 +407,7 @@ static __initdata int (*actions[])(void) = {
 	[Reset]		= do_reset,
 };
 
-static int __init write_buffer(char *buf, unsigned len)
+static long __init write_buffer(char *buf, unsigned long len)
 {
 	count = len;
 	victim = buf;
@@ -417,11 +417,11 @@ static int __init write_buffer(char *buf, unsigned len)
 	return len - count;
 }
 
-static int __init flush_buffer(void *bufv, unsigned len)
+static long __init flush_buffer(void *bufv, unsigned long len)
 {
 	char *buf = (char *) bufv;
-	int written;
-	int origLen = len;
+	long written;
+	long origLen = len;
 	if (message)
 		return -1;
 	while ((written = write_buffer(buf, len)) < len && !message) {
@@ -440,13 +440,13 @@ static int __init flush_buffer(void *bufv, unsigned len)
 	return origLen;
 }
 
-static unsigned my_inptr;   /* index of next byte to be processed in inbuf */
+static unsigned long my_inptr; /* index of next byte to be processed in inbuf */
 
 #include <linux/decompress/generic.h>
 
-static char * __init unpack_to_rootfs(char *buf, unsigned len)
+static char * __init unpack_to_rootfs(char *buf, unsigned long len)
 {
-	int written, res;
+	long written;
 	decompress_fn decompress;
 	const char *compress_name;
 	static __initdata char msg_buf[64];
@@ -480,7 +480,7 @@ static char * __init unpack_to_rootfs(char *buf, unsigned len)
 		decompress = decompress_method(buf, len, &compress_name);
 		pr_debug("Detected %s compressed data\n", compress_name);
 		if (decompress) {
-			res = decompress(buf, len, NULL, flush_buffer, NULL,
+			int res = decompress(buf, len, NULL, flush_buffer, NULL,
 				   &my_inptr, error);
 			if (res)
 				error("decompressor failed");
diff --git a/lib/decompress.c b/lib/decompress.c
index 86069d74c062..37f3c786348f 100644
--- a/lib/decompress.c
+++ b/lib/decompress.c
@@ -54,7 +54,7 @@ static const struct compress_format compressed_formats[] __initconst = {
 	{ {0, 0}, NULL, NULL }
 };
 
-decompress_fn __init decompress_method(const unsigned char *inbuf, int len,
+decompress_fn __init decompress_method(const unsigned char *inbuf, long len,
 				const char **name)
 {
 	const struct compress_format *cf;
diff --git a/lib/decompress_bunzip2.c b/lib/decompress_bunzip2.c
index 31c5f7675fbf..8290e0bef7ea 100644
--- a/lib/decompress_bunzip2.c
+++ b/lib/decompress_bunzip2.c
@@ -92,8 +92,8 @@ struct bunzip_data {
 	/* State for interrupting output loop */
 	int writeCopies, writePos, writeRunCountdown, writeCount, writeCurrent;
 	/* I/O tracking data (file handles, buffers, positions, etc.) */
-	int (*fill)(void*, unsigned int);
-	int inbufCount, inbufPos /*, outbufPos*/;
+	long (*fill)(void*, unsigned long);
+	long inbufCount, inbufPos /*, outbufPos*/;
 	unsigned char *inbuf /*,*outbuf*/;
 	unsigned int inbufBitCount, inbufBits;
 	/* The CRC values stored in the block header and calculated from the
@@ -617,7 +617,7 @@ decode_next_byte:
 	goto decode_next_byte;
 }
 
-static int INIT nofill(void *buf, unsigned int len)
+static long INIT nofill(void *buf, unsigned long len)
 {
 	return -1;
 }
@@ -625,8 +625,8 @@ static int INIT nofill(void *buf, unsigned int len)
 /* Allocate the structure, read file header.  If in_fd ==-1, inbuf must contain
    a complete bunzip file (len bytes long).  If in_fd!=-1, inbuf and len are
    ignored, and data is read from file handle into temporary buffer. */
-static int INIT start_bunzip(struct bunzip_data **bdp, void *inbuf, int len,
-			     int (*fill)(void*, unsigned int))
+static int INIT start_bunzip(struct bunzip_data **bdp, void *inbuf, long len,
+			     long (*fill)(void*, unsigned long))
 {
 	struct bunzip_data *bd;
 	unsigned int i, j, c;
@@ -675,11 +675,11 @@ static int INIT start_bunzip(struct bunzip_data **bdp, void *inbuf, int len,
 
 /* Example usage: decompress src_fd to dst_fd.  (Stops at end of bzip2 data,
    not end of file.) */
-STATIC int INIT bunzip2(unsigned char *buf, int len,
-			int(*fill)(void*, unsigned int),
-			int(*flush)(void*, unsigned int),
+STATIC int INIT bunzip2(unsigned char *buf, long len,
+			long (*fill)(void*, unsigned long),
+			long (*flush)(void*, unsigned long),
 			unsigned char *outbuf,
-			int *pos,
+			long *pos,
 			void(*error)(char *x))
 {
 	struct bunzip_data *bd;
@@ -743,11 +743,11 @@ exit_0:
 }
 
 #ifdef PREBOOT
-STATIC int INIT decompress(unsigned char *buf, int len,
-			int(*fill)(void*, unsigned int),
-			int(*flush)(void*, unsigned int),
+STATIC int INIT decompress(unsigned char *buf, long len,
+			long (*fill)(void*, unsigned long),
+			long (*flush)(void*, unsigned long),
 			unsigned char *outbuf,
-			int *pos,
+			long *pos,
 			void(*error)(char *x))
 {
 	return bunzip2(buf, len - 4, fill, flush, outbuf, pos, error);
diff --git a/lib/decompress_inflate.c b/lib/decompress_inflate.c
index 0edfd742a154..d4c7891635ec 100644
--- a/lib/decompress_inflate.c
+++ b/lib/decompress_inflate.c
@@ -27,17 +27,17 @@
 
 #define GZIP_IOBUF_SIZE (16*1024)
 
-static int INIT nofill(void *buffer, unsigned int len)
+static long INIT nofill(void *buffer, unsigned long len)
 {
 	return -1;
 }
 
 /* Included from initramfs et al code */
-STATIC int INIT gunzip(unsigned char *buf, int len,
-		       int(*fill)(void*, unsigned int),
-		       int(*flush)(void*, unsigned int),
+STATIC int INIT gunzip(unsigned char *buf, long len,
+		       long (*fill)(void*, unsigned long),
+		       long (*flush)(void*, unsigned long),
 		       unsigned char *out_buf,
-		       int *pos,
+		       long *pos,
 		       void(*error)(char *x)) {
 	u8 *zbuf;
 	struct z_stream_s *strm;
@@ -142,7 +142,7 @@ STATIC int INIT gunzip(unsigned char *buf, int len,
 
 		/* Write any data generated */
 		if (flush && strm->next_out > out_buf) {
-			int l = strm->next_out - out_buf;
+			long l = strm->next_out - out_buf;
 			if (l != flush(out_buf, l)) {
 				rc = -1;
 				error("write error");
diff --git a/lib/decompress_unlz4.c b/lib/decompress_unlz4.c
index 3ad7f3954dfd..40f66ebe57b7 100644
--- a/lib/decompress_unlz4.c
+++ b/lib/decompress_unlz4.c
@@ -31,10 +31,10 @@
 #define LZ4_DEFAULT_UNCOMPRESSED_CHUNK_SIZE (8 << 20)
 #define ARCHIVE_MAGICNUMBER 0x184C2102
 
-STATIC inline int INIT unlz4(u8 *input, int in_len,
-				int (*fill) (void *, unsigned int),
-				int (*flush) (void *, unsigned int),
-				u8 *output, int *posp,
+STATIC inline int INIT unlz4(u8 *input, long in_len,
+				long (*fill)(void *, unsigned long),
+				long (*flush)(void *, unsigned long),
+				u8 *output, long *posp,
 				void (*error) (char *x))
 {
 	int ret = -1;
@@ -43,7 +43,7 @@ STATIC inline int INIT unlz4(u8 *input, int in_len,
 	u8 *inp;
 	u8 *inp_start;
 	u8 *outp;
-	int size = in_len;
+	long size = in_len;
 #ifdef PREBOOT
 	size_t out_len = get_unaligned_le32(input + in_len);
 #endif
@@ -196,11 +196,11 @@ exit_0:
 }
 
 #ifdef PREBOOT
-STATIC int INIT decompress(unsigned char *buf, int in_len,
-			      int(*fill)(void*, unsigned int),
-			      int(*flush)(void*, unsigned int),
+STATIC int INIT decompress(unsigned char *buf, long in_len,
+			      long (*fill)(void*, unsigned long),
+			      long (*flush)(void*, unsigned long),
 			      unsigned char *output,
-			      int *posp,
+			      long *posp,
 			      void(*error)(char *x)
 	)
 {
diff --git a/lib/decompress_unlzma.c b/lib/decompress_unlzma.c
index 32adb73a9038..0be83af62b88 100644
--- a/lib/decompress_unlzma.c
+++ b/lib/decompress_unlzma.c
@@ -65,11 +65,11 @@ static long long INIT read_int(unsigned char *ptr, int size)
 #define LZMA_IOBUF_SIZE	0x10000
 
 struct rc {
-	int (*fill)(void*, unsigned int);
+	long (*fill)(void*, unsigned long);
 	uint8_t *ptr;
 	uint8_t *buffer;
 	uint8_t *buffer_end;
-	int buffer_size;
+	long buffer_size;
 	uint32_t code;
 	uint32_t range;
 	uint32_t bound;
@@ -82,7 +82,7 @@ struct rc {
 #define RC_MODEL_TOTAL_BITS 11
 
 
-static int INIT nofill(void *buffer, unsigned int len)
+static long INIT nofill(void *buffer, unsigned long len)
 {
 	return -1;
 }
@@ -99,8 +99,8 @@ static void INIT rc_read(struct rc *rc)
 
 /* Called once */
 static inline void INIT rc_init(struct rc *rc,
-				       int (*fill)(void*, unsigned int),
-				       char *buffer, int buffer_size)
+				       long (*fill)(void*, unsigned long),
+				       char *buffer, long buffer_size)
 {
 	if (fill)
 		rc->fill = fill;
@@ -280,7 +280,7 @@ struct writer {
 	size_t buffer_pos;
 	int bufsize;
 	size_t global_pos;
-	int(*flush)(void*, unsigned int);
+	long (*flush)(void*, unsigned long);
 	struct lzma_header *header;
 };
 
@@ -534,11 +534,11 @@ static inline int INIT process_bit1(struct writer *wr, struct rc *rc,
 
 
-STATIC inline int INIT unlzma(unsigned char *buf, int in_len,
-			      int(*fill)(void*, unsigned int),
-			      int(*flush)(void*, unsigned int),
+STATIC inline int INIT unlzma(unsigned char *buf, long in_len,
+			      long (*fill)(void*, unsigned long),
+			      long (*flush)(void*, unsigned long),
 			      unsigned char *output,
-			      int *posp,
+			      long *posp,
 			      void(*error)(char *x)
 	)
 {
@@ -667,11 +667,11 @@ exit_0:
 }
 
 #ifdef PREBOOT
-STATIC int INIT decompress(unsigned char *buf, int in_len,
-			      int(*fill)(void*, unsigned int),
-			      int(*flush)(void*, unsigned int),
+STATIC int INIT decompress(unsigned char *buf, long in_len,
+			      long (*fill)(void*, unsigned long),
+			      long (*flush)(void*, unsigned long),
 			      unsigned char *output,
-			      int *posp,
+			      long *posp,
 			      void(*error)(char *x)
 	)
 {
diff --git a/lib/decompress_unlzo.c b/lib/decompress_unlzo.c
index 960183d4258f..b94a31bdd87d 100644
--- a/lib/decompress_unlzo.c
+++ b/lib/decompress_unlzo.c
@@ -51,7 +51,7 @@ static const unsigned char lzop_magic[] = {
 #define HEADER_SIZE_MIN       (9 + 7     + 4 + 8     + 1       + 4)
 #define HEADER_SIZE_MAX       (9 + 7 + 1 + 8 + 8 + 4 + 1 + 255 + 4)
 
-STATIC inline int INIT parse_header(u8 *input, int *skip, int in_len)
+STATIC inline long INIT parse_header(u8 *input, long *skip, long in_len)
 {
 	int l;
 	u8 *parse = input;
@@ -108,14 +108,14 @@ STATIC inline int INIT parse_header(u8 *input, int *skip, int in_len)
 	return 1;
 }
 
-STATIC inline int INIT unlzo(u8 *input, int in_len,
-				int (*fill) (void *, unsigned int),
-				int (*flush) (void *, unsigned int),
-				u8 *output, int *posp,
+STATIC int INIT unlzo(u8 *input, long in_len,
+				long (*fill)(void *, unsigned long),
+				long (*flush)(void *, unsigned long),
+				u8 *output, long *posp,
 				void (*error) (char *x))
 {
 	u8 r = 0;
-	int skip = 0;
+	long skip = 0;
 	u32 src_len, dst_len;
 	size_t tmp;
 	u8 *in_buf, *in_buf_save, *out_buf;
diff --git a/lib/decompress_unxz.c b/lib/decompress_unxz.c
index 9f34eb56854d..b07a78340e9d 100644
--- a/lib/decompress_unxz.c
+++ b/lib/decompress_unxz.c
@@ -248,10 +248,10 @@ void *memmove(void *dest, const void *src, size_t size)
  * both input and output buffers are available as a single chunk, i.e. when
  * fill() and flush() won't be used.
  */
-STATIC int INIT unxz(unsigned char *in, int in_size,
-		     int (*fill)(void *dest, unsigned int size),
-		     int (*flush)(void *src, unsigned int size),
-		     unsigned char *out, int *in_used,
+STATIC int INIT unxz(unsigned char *in, long in_size,
+		     long (*fill)(void *dest, unsigned long size),
+		     long (*flush)(void *src, unsigned long size),
+		     unsigned char *out, long *in_used,
 		     void (*error)(char *x))
 {
 	struct xz_buf b;
@@ -329,7 +329,7 @@ STATIC int INIT unxz(unsigned char *in, int in_size,
 				 * returned by xz_dec_run(), but probably
 				 * it's not too bad.
 				 */
-				if (flush(b.out, b.out_pos) != (int)b.out_pos)
+				if (flush(b.out, b.out_pos) != (long)b.out_pos)
 					ret = XZ_BUF_ERROR;
 
 				b.out_pos = 0;
-- 
cgit v1.2.3


From ab602f799159393143d567e5c04b936fec79d6bd Mon Sep 17 00:00:00 2001
From: Jack Miller <millerjo@us.ibm.com>
Date: Fri, 8 Aug 2014 14:23:19 -0700
Subject: shm: make exit_shm work proportional to task activity

This is small set of patches our team has had kicking around for a few
versions internally that fixes tasks getting hung on shm_exit when there
are many threads hammering it at once.

Anton wrote a simple test to cause the issue:

  http://ozlabs.org/~anton/junkcode/bust_shm_exit.c

Before applying this patchset, this test code will cause either hanging
tracebacks or pthread out of memory errors.

After this patchset, it will still produce output like:

  root@somehost:~# ./bust_shm_exit 1024 160
  ...
  INFO: rcu_sched detected stalls on CPUs/tasks: {} (detected by 116, t=2111 jiffies, g=241, c=240, q=7113)
  INFO: Stall ended before state dump start
  ...

But the task will continue to run along happily, so we consider this an
improvement over hanging, even if it's a bit noisy.

This patch (of 3):

exit_shm obtains the ipc_ns shm rwsem for write and holds it while it
walks every shared memory segment in the namespace.  Thus the amount of
work is related to the number of shm segments in the namespace not the
number of segments that might need to be cleaned.

In addition, this occurs after the task has been notified the thread has
exited, so the number of tasks waiting for the ns shm rwsem can grow
without bound until memory is exausted.

Add a list to the task struct of all shmids allocated by this task.  Init
the list head in copy_process.  Use the ns->rwsem for locking.  Add
segments after id is added, remove before removing from id.

On unshare of NEW_IPCNS orphan any ids as if the task had exited, similar
to handling of semaphore undo.

I chose a define for the init sequence since its a simple list init,
otherwise it would require a function call to avoid include loops between
the semaphore code and the task struct.  Converting the list_del to
list_del_init for the unshare cases would remove the exit followed by
init, but I left it blow up if not inited.

Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Jack Miller <millerjo@us.ibm.com>
Cc: Davidlohr Bueso <davidlohr@hp.com>
Cc: Manfred Spraul <manfred@colorfullife.com>
Cc: Anton Blanchard <anton@samba.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h |  2 ++
 include/linux/shm.h   | 16 +++++++++++++++-
 ipc/shm.c             | 22 +++++++++++-----------
 kernel/fork.c         |  6 ++++++
 4 files changed, 34 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b21e9218c0fd..db2f6474e95e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -33,6 +33,7 @@ struct sched_param {
 
 #include <linux/smp.h>
 #include <linux/sem.h>
+#include <linux/shm.h>
 #include <linux/signal.h>
 #include <linux/compiler.h>
 #include <linux/completion.h>
@@ -1385,6 +1386,7 @@ struct task_struct {
 #ifdef CONFIG_SYSVIPC
 /* ipc stuff */
 	struct sysv_sem sysvsem;
+	struct sysv_shm sysvshm;
 #endif
 #ifdef CONFIG_DETECT_HUNG_TASK
 /* hung task detection */
diff --git a/include/linux/shm.h b/include/linux/shm.h
index 57d77709fbe2..fd206387048a 100644
--- a/include/linux/shm.h
+++ b/include/linux/shm.h
@@ -1,6 +1,7 @@
 #ifndef _LINUX_SHM_H_
 #define _LINUX_SHM_H_
 
+#include <linux/list.h>
 #include <asm/page.h>
 #include <uapi/linux/shm.h>
 #include <asm/shmparam.h>
@@ -20,6 +21,7 @@ struct shmid_kernel /* private to the kernel */
 
 	/* The task created the shm object.  NULL if the task is dead. */
 	struct task_struct	*shm_creator;
+	struct list_head	shm_clist;	/* list by creator */
 };
 
 /* shm_mode upper byte flags */
@@ -44,11 +46,20 @@ struct shmid_kernel /* private to the kernel */
 #define SHM_HUGE_1GB    (30 << SHM_HUGE_SHIFT)
 
 #ifdef CONFIG_SYSVIPC
+struct sysv_shm {
+	struct list_head shm_clist;
+};
+
 long do_shmat(int shmid, char __user *shmaddr, int shmflg, unsigned long *addr,
 	      unsigned long shmlba);
 extern int is_file_shm_hugepages(struct file *file);
-extern void exit_shm(struct task_struct *task);
+void exit_shm(struct task_struct *task);
+#define shm_init_task(task) INIT_LIST_HEAD(&(task)->sysvshm.shm_clist)
 #else
+struct sysv_shm {
+	/* empty */
+};
+
 static inline long do_shmat(int shmid, char __user *shmaddr,
 			    int shmflg, unsigned long *addr,
 			    unsigned long shmlba)
@@ -62,6 +73,9 @@ static inline int is_file_shm_hugepages(struct file *file)
 static inline void exit_shm(struct task_struct *task)
 {
 }
+static inline void shm_init_task(struct task_struct *task)
+{
+}
 #endif
 
 #endif /* _LINUX_SHM_H_ */
diff --git a/ipc/shm.c b/ipc/shm.c
index 89fc354156cb..1fc3a61b443b 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -178,6 +178,7 @@ static void shm_rcu_free(struct rcu_head *head)
 
 static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s)
 {
+	list_del(&s->shm_clist);
 	ipc_rmid(&shm_ids(ns), &s->shm_perm);
 }
 
@@ -268,14 +269,10 @@ static void shm_close(struct vm_area_struct *vma)
 }
 
 /* Called with ns->shm_ids(ns).rwsem locked */
-static int shm_try_destroy_current(int id, void *p, void *data)
+static void shm_mark_orphan(struct shmid_kernel *shp, struct ipc_namespace *ns)
 {
-	struct ipc_namespace *ns = data;
-	struct kern_ipc_perm *ipcp = p;
-	struct shmid_kernel *shp = container_of(ipcp, struct shmid_kernel, shm_perm);
-
-	if (shp->shm_creator != current)
-		return 0;
+	if (WARN_ON(shp->shm_creator != current)) /* Remove me when it works */
+		return;
 
 	/*
 	 * Mark it as orphaned to destroy the segment when
@@ -289,13 +286,12 @@ static int shm_try_destroy_current(int id, void *p, void *data)
 	 * is not set, it shouldn't be deleted here.
 	 */
 	if (!ns->shm_rmid_forced)
-		return 0;
+		return;
 
 	if (shm_may_destroy(ns, shp)) {
 		shm_lock_by_ptr(shp);
 		shm_destroy(ns, shp);
 	}
-	return 0;
 }
 
 /* Called with ns->shm_ids(ns).rwsem locked */
@@ -333,14 +329,17 @@ void shm_destroy_orphaned(struct ipc_namespace *ns)
 void exit_shm(struct task_struct *task)
 {
 	struct ipc_namespace *ns = task->nsproxy->ipc_ns;
+	struct shmid_kernel *shp, *n;
 
 	if (shm_ids(ns).in_use == 0)
 		return;
 
 	/* Destroy all already created segments, but not mapped yet */
 	down_write(&shm_ids(ns).rwsem);
-	if (shm_ids(ns).in_use)
-		idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_current, ns);
+	list_for_each_entry_safe(shp, n, &task->sysvshm.shm_clist, shm_clist)
+		shm_mark_orphan(shp, ns);
+	/* remove the list head from any segments still attached */
+	list_del(&task->sysvshm.shm_clist);
 	up_write(&shm_ids(ns).rwsem);
 }
 
@@ -561,6 +560,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
 	shp->shm_nattch = 0;
 	shp->shm_file = file;
 	shp->shm_creator = current;
+	list_add(&shp->shm_clist, &current->sysvshm.shm_clist);
 
 	/*
 	 * shmid gets reported as "inode#" in /proc/pid/maps.
diff --git a/kernel/fork.c b/kernel/fork.c
index 86da59e165ad..fa9124322cd4 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1362,6 +1362,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	if (retval)
 		goto bad_fork_cleanup_policy;
 	/* copy all the process information */
+	shm_init_task(p);
 	retval = copy_semundo(clone_flags, p);
 	if (retval)
 		goto bad_fork_cleanup_audit;
@@ -1913,6 +1914,11 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
 			 */
 			exit_sem(current);
 		}
+		if (unshare_flags & CLONE_NEWIPC) {
+			/* Orphan segments in old ns (see sem above). */
+			exit_shm(current);
+			shm_init_task(current);
+		}
 
 		if (new_nsproxy)
 			switch_task_namespaces(current, new_nsproxy);
-- 
cgit v1.2.3


From 2f137d66fb65ef41df6e558f23d481f07394a424 Mon Sep 17 00:00:00 2001
From: Jack Miller <millerjo@us.ibm.com>
Date: Fri, 8 Aug 2014 14:23:23 -0700
Subject: shm: remove unneeded extern for function

A small cleanup while changing adjacent code.  Extern is not needed for
functions and only one declaration had it so remove it from the odd line.

Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Jack Miller <millerjo@us.ibm.com>
Cc: Davidlohr Bueso <davidlohr@hp.com>
Cc: Manfred Spraul <manfred@colorfullife.com>
Cc: Anton Blanchard <anton@samba.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/shm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/shm.h b/include/linux/shm.h
index fd206387048a..6fb801686ad6 100644
--- a/include/linux/shm.h
+++ b/include/linux/shm.h
@@ -52,7 +52,7 @@ struct sysv_shm {
 
 long do_shmat(int shmid, char __user *shmaddr, int shmflg, unsigned long *addr,
 	      unsigned long shmlba);
-extern int is_file_shm_hugepages(struct file *file);
+int is_file_shm_hugepages(struct file *file);
 void exit_shm(struct task_struct *task);
 #define shm_init_task(task) INIT_LIST_HEAD(&(task)->sysvshm.shm_clist)
 #else
-- 
cgit v1.2.3


From 308c09f17da4adc53935115dbeb5bce4f067d8f9 Mon Sep 17 00:00:00 2001
From: Laura Abbott <lauraa@codeaurora.org>
Date: Fri, 8 Aug 2014 14:23:25 -0700
Subject: lib/scatterlist: make ARCH_HAS_SG_CHAIN an actual Kconfig

Rather than have architectures #define ARCH_HAS_SG_CHAIN in an
architecture specific scatterlist.h, make it a proper Kconfig option and
use that instead.  At same time, remove the header files are are now
mostly useless and just include asm-generic/scatterlist.h.

[sfr@canb.auug.org.au: powerpc files now need asm/dma.h]
Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
Acked-by: Thomas Gleixner <tglx@linutronix.de>			[x86]
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>	[powerpc]
Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: "James E.J. Bottomley" <JBottomley@parallels.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/Kconfig                        |  1 +
 arch/arm/include/asm/Kbuild             |  1 +
 arch/arm/include/asm/scatterlist.h      | 12 ------------
 arch/arm64/Kconfig                      |  1 +
 arch/ia64/Kconfig                       |  1 +
 arch/ia64/include/asm/Kbuild            |  1 +
 arch/ia64/include/asm/scatterlist.h     |  7 -------
 arch/powerpc/Kconfig                    |  1 +
 arch/powerpc/include/asm/Kbuild         |  1 +
 arch/powerpc/include/asm/scatterlist.h  | 17 -----------------
 arch/powerpc/mm/dma-noncoherent.c       |  1 +
 arch/powerpc/platforms/44x/warp.c       |  1 +
 arch/powerpc/platforms/52xx/efika.c     |  1 +
 arch/powerpc/platforms/amigaone/setup.c |  1 +
 arch/s390/Kconfig                       |  1 +
 arch/s390/include/asm/Kbuild            |  1 +
 arch/s390/include/asm/scatterlist.h     |  3 ---
 arch/sparc/Kconfig                      |  1 +
 arch/sparc/include/asm/Kbuild           |  1 +
 arch/sparc/include/asm/scatterlist.h    |  8 --------
 arch/um/include/asm/Kbuild              |  1 +
 arch/x86/Kconfig                        |  1 +
 arch/x86/include/asm/Kbuild             |  3 ++-
 arch/x86/include/asm/scatterlist.h      |  8 --------
 include/linux/scatterlist.h             |  2 +-
 include/scsi/scsi.h                     |  2 +-
 lib/Kconfig                             |  7 +++++++
 lib/scatterlist.c                       |  4 ++--
 28 files changed, 30 insertions(+), 60 deletions(-)
 delete mode 100644 arch/arm/include/asm/scatterlist.h
 delete mode 100644 arch/ia64/include/asm/scatterlist.h
 delete mode 100644 arch/powerpc/include/asm/scatterlist.h
 delete mode 100644 arch/s390/include/asm/scatterlist.h
 delete mode 100644 arch/sparc/include/asm/scatterlist.h
 delete mode 100644 arch/x86/include/asm/scatterlist.h

(limited to 'include/linux')

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index d31c500653a2..8e9dbcbcf5af 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -83,6 +83,7 @@ config ARM
 	  <http://www.arm.linux.org.uk/>.
 
 config ARM_HAS_SG_CHAIN
+	select ARCH_HAS_SG_CHAIN
 	bool
 
 config NEED_SG_DMA_LENGTH
diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild
index f5a357601983..70cd84eb7fda 100644
--- a/arch/arm/include/asm/Kbuild
+++ b/arch/arm/include/asm/Kbuild
@@ -22,6 +22,7 @@ generic-y += poll.h
 generic-y += preempt.h
 generic-y += resource.h
 generic-y += rwsem.h
+generic-y += scatterlist.h
 generic-y += sections.h
 generic-y += segment.h
 generic-y += sembuf.h
diff --git a/arch/arm/include/asm/scatterlist.h b/arch/arm/include/asm/scatterlist.h
deleted file mode 100644
index cefdb8f898a1..000000000000
--- a/arch/arm/include/asm/scatterlist.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef _ASMARM_SCATTERLIST_H
-#define _ASMARM_SCATTERLIST_H
-
-#ifdef CONFIG_ARM_HAS_SG_CHAIN
-#define ARCH_HAS_SG_CHAIN
-#endif
-
-#include <asm/memory.h>
-#include <asm/types.h>
-#include <asm-generic/scatterlist.h>
-
-#endif /* _ASMARM_SCATTERLIST_H */
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index b0f9c9db9590..fd4e81a4e1ce 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1,6 +1,7 @@
 config ARM64
 	def_bool y
 	select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
+	select ARCH_HAS_SG_CHAIN
 	select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
 	select ARCH_USE_CMPXCHG_LOCKREF
 	select ARCH_SUPPORTS_ATOMIC_RMW
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 44a6915ab13d..c84c88bbbbd7 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -28,6 +28,7 @@ config IA64
 	select HAVE_MEMBLOCK
 	select HAVE_MEMBLOCK_NODE_MAP
 	select HAVE_VIRT_CPU_ACCOUNTING
+	select ARCH_HAS_SG_CHAIN
 	select VIRT_TO_BUS
 	select ARCH_DISCARD_MEMBLOCK
 	select GENERIC_IRQ_PROBE
diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild
index 0da4aa2602ae..e8317d2d6c8d 100644
--- a/arch/ia64/include/asm/Kbuild
+++ b/arch/ia64/include/asm/Kbuild
@@ -5,5 +5,6 @@ generic-y += hash.h
 generic-y += kvm_para.h
 generic-y += mcs_spinlock.h
 generic-y += preempt.h
+generic-y += scatterlist.h
 generic-y += trace_clock.h
 generic-y += vtime.h
diff --git a/arch/ia64/include/asm/scatterlist.h b/arch/ia64/include/asm/scatterlist.h
deleted file mode 100644
index 08fd93bff1db..000000000000
--- a/arch/ia64/include/asm/scatterlist.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef _ASM_IA64_SCATTERLIST_H
-#define _ASM_IA64_SCATTERLIST_H
-
-#include <asm-generic/scatterlist.h>
-#define ARCH_HAS_SG_CHAIN
-
-#endif /* _ASM_IA64_SCATTERLIST_H */
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 80b94b0add1f..4bc7b62fb4b6 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -111,6 +111,7 @@ config PPC
 	select HAVE_DMA_API_DEBUG
 	select HAVE_OPROFILE
 	select HAVE_DEBUG_KMEMLEAK
+	select ARCH_HAS_SG_CHAIN
 	select GENERIC_ATOMIC64 if PPC32
 	select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
 	select HAVE_PERF_EVENTS
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index 3fb1bc432f4f..7f23f162ce9c 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -4,5 +4,6 @@ generic-y += hash.h
 generic-y += mcs_spinlock.h
 generic-y += preempt.h
 generic-y += rwsem.h
+generic-y += scatterlist.h
 generic-y += trace_clock.h
 generic-y += vtime.h
diff --git a/arch/powerpc/include/asm/scatterlist.h b/arch/powerpc/include/asm/scatterlist.h
deleted file mode 100644
index de1f620bd5c9..000000000000
--- a/arch/powerpc/include/asm/scatterlist.h
+++ /dev/null
@@ -1,17 +0,0 @@
-#ifndef _ASM_POWERPC_SCATTERLIST_H
-#define _ASM_POWERPC_SCATTERLIST_H
-/*
- * Copyright (C) 2001 PPC64 Team, IBM Corp
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <asm/dma.h>
-#include <asm-generic/scatterlist.h>
-
-#define ARCH_HAS_SG_CHAIN
-
-#endif /* _ASM_POWERPC_SCATTERLIST_H */
diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c
index 7b6c10750179..d85e86aac7fb 100644
--- a/arch/powerpc/mm/dma-noncoherent.c
+++ b/arch/powerpc/mm/dma-noncoherent.c
@@ -33,6 +33,7 @@
 #include <linux/export.h>
 
 #include <asm/tlbflush.h>
+#include <asm/dma.h>
 
 #include "mmu_decl.h"
 
diff --git a/arch/powerpc/platforms/44x/warp.c b/arch/powerpc/platforms/44x/warp.c
index 534574a97ec9..3a104284b338 100644
--- a/arch/powerpc/platforms/44x/warp.c
+++ b/arch/powerpc/platforms/44x/warp.c
@@ -25,6 +25,7 @@
 #include <asm/time.h>
 #include <asm/uic.h>
 #include <asm/ppc4xx.h>
+#include <asm/dma.h>
 
 
 static __initdata struct of_device_id warp_of_bus[] = {
diff --git a/arch/powerpc/platforms/52xx/efika.c b/arch/powerpc/platforms/52xx/efika.c
index 6e19b0ad5d26..3feffde9128d 100644
--- a/arch/powerpc/platforms/52xx/efika.c
+++ b/arch/powerpc/platforms/52xx/efika.c
@@ -13,6 +13,7 @@
 #include <generated/utsrelease.h>
 #include <linux/pci.h>
 #include <linux/of.h>
+#include <asm/dma.h>
 #include <asm/prom.h>
 #include <asm/time.h>
 #include <asm/machdep.h>
diff --git a/arch/powerpc/platforms/amigaone/setup.c b/arch/powerpc/platforms/amigaone/setup.c
index 03aabc0e16ac..2fe12046279e 100644
--- a/arch/powerpc/platforms/amigaone/setup.c
+++ b/arch/powerpc/platforms/amigaone/setup.c
@@ -24,6 +24,7 @@
 #include <asm/i8259.h>
 #include <asm/time.h>
 #include <asm/udbg.h>
+#include <asm/dma.h>
 
 extern void __flush_disable_L1(void);
 
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 8ca60f8d5683..05c78bb5f570 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -145,6 +145,7 @@ config S390
 	select TTY
 	select VIRT_CPU_ACCOUNTING
 	select VIRT_TO_BUS
+	select ARCH_HAS_SG_CHAIN
 
 config SCHED_OMIT_FRAME_POINTER
 	def_bool y
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index 57892a8a9055..b3fea0722ff1 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -4,4 +4,5 @@ generic-y += clkdev.h
 generic-y += hash.h
 generic-y += mcs_spinlock.h
 generic-y += preempt.h
+generic-y += scatterlist.h
 generic-y += trace_clock.h
diff --git a/arch/s390/include/asm/scatterlist.h b/arch/s390/include/asm/scatterlist.h
deleted file mode 100644
index 6d45ef6c12a7..000000000000
--- a/arch/s390/include/asm/scatterlist.h
+++ /dev/null
@@ -1,3 +0,0 @@
-#include <asm-generic/scatterlist.h>
-
-#define ARCH_HAS_SG_CHAIN
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 4692c90936f1..a537816613f9 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -42,6 +42,7 @@ config SPARC
 	select MODULES_USE_ELF_RELA
 	select ODD_RT_SIGACTION
 	select OLD_SIGSUSPEND
+	select ARCH_HAS_SG_CHAIN
 
 config SPARC32
 	def_bool !64BIT
diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index a45821818003..cdd1b447bb6c 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -15,6 +15,7 @@ generic-y += mcs_spinlock.h
 generic-y += module.h
 generic-y += mutex.h
 generic-y += preempt.h
+generic-y += scatterlist.h
 generic-y += serial.h
 generic-y += trace_clock.h
 generic-y += types.h
diff --git a/arch/sparc/include/asm/scatterlist.h b/arch/sparc/include/asm/scatterlist.h
deleted file mode 100644
index 92bb638313f8..000000000000
--- a/arch/sparc/include/asm/scatterlist.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef _SPARC_SCATTERLIST_H
-#define _SPARC_SCATTERLIST_H
-
-#include <asm-generic/scatterlist.h>
-
-#define ARCH_HAS_SG_CHAIN
-
-#endif /* !(_SPARC_SCATTERLIST_H) */
diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild
index a5e4b6068213..7bd64aa2e94a 100644
--- a/arch/um/include/asm/Kbuild
+++ b/arch/um/include/asm/Kbuild
@@ -21,6 +21,7 @@ generic-y += param.h
 generic-y += pci.h
 generic-y += percpu.h
 generic-y += preempt.h
+generic-y += scatterlist.h
 generic-y += sections.h
 generic-y += switch_to.h
 generic-y += topology.h
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index bf2405053af5..c915cc6e40be 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -96,6 +96,7 @@ config X86
 	select IRQ_FORCED_THREADING
 	select HAVE_BPF_JIT if X86_64
 	select HAVE_ARCH_TRANSPARENT_HUGEPAGE
+	select ARCH_HAS_SG_CHAIN
 	select CLKEVT_I8253
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select GENERIC_IOMAP
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index 3ca9762e1649..3bf000fab0ae 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -5,6 +5,7 @@ genhdr-y += unistd_64.h
 genhdr-y += unistd_x32.h
 
 generic-y += clkdev.h
-generic-y += early_ioremap.h
 generic-y += cputime.h
+generic-y += early_ioremap.h
 generic-y += mcs_spinlock.h
+generic-y += scatterlist.h
diff --git a/arch/x86/include/asm/scatterlist.h b/arch/x86/include/asm/scatterlist.h
deleted file mode 100644
index 4240878b9d76..000000000000
--- a/arch/x86/include/asm/scatterlist.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef _ASM_X86_SCATTERLIST_H
-#define _ASM_X86_SCATTERLIST_H
-
-#include <asm-generic/scatterlist.h>
-
-#define ARCH_HAS_SG_CHAIN
-
-#endif /* _ASM_X86_SCATTERLIST_H */
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index f4ec8bbcb372..ed8f9e70df9b 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -136,7 +136,7 @@ static inline void sg_set_buf(struct scatterlist *sg, const void *buf,
 static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents,
 			    struct scatterlist *sgl)
 {
-#ifndef ARCH_HAS_SG_CHAIN
+#ifndef CONFIG_ARCH_HAS_SG_CHAIN
 	BUG();
 #endif
 
diff --git a/include/scsi/scsi.h b/include/scsi/scsi.h
index e6df23cae7be..261e708010da 100644
--- a/include/scsi/scsi.h
+++ b/include/scsi/scsi.h
@@ -31,7 +31,7 @@ enum scsi_timeouts {
  * Like SCSI_MAX_SG_SEGMENTS, but for archs that have sg chaining. This limit
  * is totally arbitrary, a setting of 2048 will get you at least 8mb ios.
  */
-#ifdef ARCH_HAS_SG_CHAIN
+#ifdef CONFIG_ARCH_HAS_SG_CHAIN
 #define SCSI_MAX_SG_CHAIN_SEGMENTS	2048
 #else
 #define SCSI_MAX_SG_CHAIN_SEGMENTS	SCSI_MAX_SG_SEGMENTS
diff --git a/lib/Kconfig b/lib/Kconfig
index df872659ddd3..a5ce0c7f6c30 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -508,4 +508,11 @@ config UCS2_STRING
 
 source "lib/fonts/Kconfig"
 
+#
+# sg chaining option
+#
+
+config ARCH_HAS_SG_CHAIN
+	def_bool n
+
 endmenu
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index b4415fceb7e7..9cdf62f8accd 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -73,7 +73,7 @@ EXPORT_SYMBOL(sg_nents);
  **/
 struct scatterlist *sg_last(struct scatterlist *sgl, unsigned int nents)
 {
-#ifndef ARCH_HAS_SG_CHAIN
+#ifndef CONFIG_ARCH_HAS_SG_CHAIN
 	struct scatterlist *ret = &sgl[nents - 1];
 #else
 	struct scatterlist *sg, *ret = NULL;
@@ -255,7 +255,7 @@ int __sg_alloc_table(struct sg_table *table, unsigned int nents,
 
 	if (nents == 0)
 		return -EINVAL;
-#ifndef ARCH_HAS_SG_CHAIN
+#ifndef CONFIG_ARCH_HAS_SG_CHAIN
 	if (WARN_ON_ONCE(nents > max_ents))
 		return -EINVAL;
 #endif
-- 
cgit v1.2.3


From a6c19dfe39941a5d3f4d072121c0a4841e7e26fd Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@amacapital.net>
Date: Fri, 8 Aug 2014 14:23:40 -0700
Subject: arm64,ia64,ppc,s390,sh,tile,um,x86,mm: remove default gate area

The core mm code will provide a default gate area based on
FIXADDR_USER_START and FIXADDR_USER_END if
!defined(__HAVE_ARCH_GATE_AREA) && defined(AT_SYSINFO_EHDR).

This default is only useful for ia64.  arm64, ppc, s390, sh, tile, 64-bit
UML, and x86_32 have their own code just to disable it.  arm, 32-bit UML,
and x86_64 have gate areas, but they have their own implementations.

This gets rid of the default and moves the code into ia64.

This should save some code on architectures without a gate area: it's now
possible to inline the gate_area functions in the default case.

Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Acked-by: Nathan Lynch <nathan_lynch@mentor.com>
Acked-by: H. Peter Anvin <hpa@linux.intel.com>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> [in principle]
Acked-by: Richard Weinberger <richard@nod.at> [for um]
Acked-by: Will Deacon <will.deacon@arm.com> [for arm64]
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Nathan Lynch <Nathan_Lynch@mentor.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm64/include/asm/page.h      |  3 ---
 arch/arm64/kernel/vdso.c           | 19 -------------------
 arch/ia64/include/asm/page.h       |  2 ++
 arch/ia64/mm/init.c                | 31 +++++++++++++++++++++++++++++++
 arch/powerpc/include/asm/page.h    |  3 ---
 arch/powerpc/kernel/vdso.c         | 16 ----------------
 arch/s390/include/asm/page.h       |  2 --
 arch/s390/kernel/vdso.c            | 15 ---------------
 arch/sh/include/asm/page.h         |  5 -----
 arch/sh/kernel/vsyscall/vsyscall.c | 15 ---------------
 arch/tile/include/asm/page.h       |  6 ------
 arch/tile/kernel/vdso.c            | 15 ---------------
 arch/um/include/asm/page.h         |  5 +++++
 arch/x86/include/asm/page.h        |  1 -
 arch/x86/include/asm/page_64.h     |  2 ++
 arch/x86/um/asm/elf.h              |  1 -
 arch/x86/um/mem_64.c               | 15 ---------------
 arch/x86/vdso/vdso32-setup.c       | 19 +------------------
 include/linux/mm.h                 | 17 ++++++++++++-----
 mm/memory.c                        | 38 --------------------------------------
 mm/nommu.c                         |  5 -----
 21 files changed, 53 insertions(+), 182 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
index 7a3f462133b0..22b16232bd60 100644
--- a/arch/arm64/include/asm/page.h
+++ b/arch/arm64/include/asm/page.h
@@ -28,9 +28,6 @@
 #define PAGE_SIZE		(_AC(1,UL) << PAGE_SHIFT)
 #define PAGE_MASK		(~(PAGE_SIZE-1))
 
-/* We do define AT_SYSINFO_EHDR but don't use the gate mechanism */
-#define __HAVE_ARCH_GATE_AREA		1
-
 /*
  * The idmap and swapper page tables need some space reserved in the kernel
  * image. Both require pgd, pud (4 levels only) and pmd tables to (section)
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index a81a446a5786..32aeea083d93 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -194,25 +194,6 @@ up_fail:
 	return PTR_ERR(ret);
 }
 
-/*
- * We define AT_SYSINFO_EHDR, so we need these function stubs to keep
- * Linux happy.
- */
-int in_gate_area_no_mm(unsigned long addr)
-{
-	return 0;
-}
-
-int in_gate_area(struct mm_struct *mm, unsigned long addr)
-{
-	return 0;
-}
-
-struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
-{
-	return NULL;
-}
-
 /*
  * Update the vDSO data page to keep in sync with kernel timekeeping.
  */
diff --git a/arch/ia64/include/asm/page.h b/arch/ia64/include/asm/page.h
index f1e1b2e3cdb3..1f1bf144fe62 100644
--- a/arch/ia64/include/asm/page.h
+++ b/arch/ia64/include/asm/page.h
@@ -231,4 +231,6 @@ get_order (unsigned long size)
 #define PERCPU_ADDR		(-PERCPU_PAGE_SIZE)
 #define LOAD_OFFSET		(KERNEL_START - KERNEL_TR_PAGE_SIZE)
 
+#define __HAVE_ARCH_GATE_AREA	1
+
 #endif /* _ASM_IA64_PAGE_H */
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 892d43e32f3b..6b3345758d3e 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -278,6 +278,37 @@ setup_gate (void)
 	ia64_patch_gate();
 }
 
+static struct vm_area_struct gate_vma;
+
+static int __init gate_vma_init(void)
+{
+	gate_vma.vm_mm = NULL;
+	gate_vma.vm_start = FIXADDR_USER_START;
+	gate_vma.vm_end = FIXADDR_USER_END;
+	gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC;
+	gate_vma.vm_page_prot = __P101;
+
+	return 0;
+}
+__initcall(gate_vma_init);
+
+struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
+{
+	return &gate_vma;
+}
+
+int in_gate_area_no_mm(unsigned long addr)
+{
+	if ((addr >= FIXADDR_USER_START) && (addr < FIXADDR_USER_END))
+		return 1;
+	return 0;
+}
+
+int in_gate_area(struct mm_struct *mm, unsigned long addr)
+{
+	return in_gate_area_no_mm(addr);
+}
+
 void ia64_mmu_init(void *my_cpu_data)
 {
 	unsigned long pta, impl_va_bits;
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index 32e4e212b9c1..26fe1ae15212 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -48,9 +48,6 @@ extern unsigned int HPAGE_SHIFT;
 #define HUGE_MAX_HSTATE		(MMU_PAGE_COUNT-1)
 #endif
 
-/* We do define AT_SYSINFO_EHDR but don't use the gate mechanism */
-#define __HAVE_ARCH_GATE_AREA		1
-
 /*
  * Subtle: (1 << PAGE_SHIFT) is an int, not an unsigned long. So if we
  * assign PAGE_MASK to a larger type it gets extended the way we want
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index ce74c335a6a4..f174351842cf 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -840,19 +840,3 @@ static int __init vdso_init(void)
 	return 0;
 }
 arch_initcall(vdso_init);
-
-int in_gate_area_no_mm(unsigned long addr)
-{
-	return 0;
-}
-
-int in_gate_area(struct mm_struct *mm, unsigned long addr)
-{
-	return 0;
-}
-
-struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
-{
-	return NULL;
-}
-
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 114258eeaacd..7b2ac6e44166 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -162,6 +162,4 @@ static inline int devmem_is_allowed(unsigned long pfn)
 #include <asm-generic/memory_model.h>
 #include <asm-generic/getorder.h>
 
-#define __HAVE_ARCH_GATE_AREA 1
-
 #endif /* _S390_PAGE_H */
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index 613649096783..0bbb7e027c5a 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -316,18 +316,3 @@ static int __init vdso_init(void)
 	return 0;
 }
 early_initcall(vdso_init);
-
-int in_gate_area_no_mm(unsigned long addr)
-{
-	return 0;
-}
-
-int in_gate_area(struct mm_struct *mm, unsigned long addr)
-{
-	return 0;
-}
-
-struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
-{
-	return NULL;
-}
diff --git a/arch/sh/include/asm/page.h b/arch/sh/include/asm/page.h
index 15d970328f71..fe20d14ae051 100644
--- a/arch/sh/include/asm/page.h
+++ b/arch/sh/include/asm/page.h
@@ -186,11 +186,6 @@ typedef struct page *pgtable_t;
 #include <asm-generic/memory_model.h>
 #include <asm-generic/getorder.h>
 
-/* vDSO support */
-#ifdef CONFIG_VSYSCALL
-#define __HAVE_ARCH_GATE_AREA
-#endif
-
 /*
  * Some drivers need to perform DMA into kmalloc'ed buffers
  * and so we have to increase the kmalloc minalign for this.
diff --git a/arch/sh/kernel/vsyscall/vsyscall.c b/arch/sh/kernel/vsyscall/vsyscall.c
index 5ca579720a09..ea2aa1393b87 100644
--- a/arch/sh/kernel/vsyscall/vsyscall.c
+++ b/arch/sh/kernel/vsyscall/vsyscall.c
@@ -92,18 +92,3 @@ const char *arch_vma_name(struct vm_area_struct *vma)
 
 	return NULL;
 }
-
-struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
-{
-	return NULL;
-}
-
-int in_gate_area(struct mm_struct *mm, unsigned long address)
-{
-	return 0;
-}
-
-int in_gate_area_no_mm(unsigned long address)
-{
-	return 0;
-}
diff --git a/arch/tile/include/asm/page.h b/arch/tile/include/asm/page.h
index 672768008618..a213a8d84a95 100644
--- a/arch/tile/include/asm/page.h
+++ b/arch/tile/include/asm/page.h
@@ -38,12 +38,6 @@
 #define PAGE_MASK	(~(PAGE_SIZE - 1))
 #define HPAGE_MASK	(~(HPAGE_SIZE - 1))
 
-/*
- * We do define AT_SYSINFO_EHDR to support vDSO,
- * but don't use the gate mechanism.
- */
-#define __HAVE_ARCH_GATE_AREA		1
-
 /*
  * If the Kconfig doesn't specify, set a maximum zone order that
  * is enough so that we can create huge pages from small pages given
diff --git a/arch/tile/kernel/vdso.c b/arch/tile/kernel/vdso.c
index 1533af24106e..5bc51d7dfdcb 100644
--- a/arch/tile/kernel/vdso.c
+++ b/arch/tile/kernel/vdso.c
@@ -121,21 +121,6 @@ const char *arch_vma_name(struct vm_area_struct *vma)
 	return NULL;
 }
 
-struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
-{
-	return NULL;
-}
-
-int in_gate_area(struct mm_struct *mm, unsigned long address)
-{
-	return 0;
-}
-
-int in_gate_area_no_mm(unsigned long address)
-{
-	return 0;
-}
-
 int setup_vdso_pages(void)
 {
 	struct page **pagelist;
diff --git a/arch/um/include/asm/page.h b/arch/um/include/asm/page.h
index 5ff53d9185f7..71c5d132062a 100644
--- a/arch/um/include/asm/page.h
+++ b/arch/um/include/asm/page.h
@@ -119,4 +119,9 @@ extern unsigned long uml_physmem;
 #include <asm-generic/getorder.h>
 
 #endif	/* __ASSEMBLY__ */
+
+#ifdef CONFIG_X86_32
+#define __HAVE_ARCH_GATE_AREA 1
+#endif
+
 #endif	/* __UM_PAGE_H */
diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h
index 775873d3be55..802dde30c928 100644
--- a/arch/x86/include/asm/page.h
+++ b/arch/x86/include/asm/page.h
@@ -70,7 +70,6 @@ extern bool __virt_addr_valid(unsigned long kaddr);
 #include <asm-generic/memory_model.h>
 #include <asm-generic/getorder.h>
 
-#define __HAVE_ARCH_GATE_AREA 1
 #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
 
 #endif	/* __KERNEL__ */
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index 0f1ddee6a0ce..f408caf73430 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -39,4 +39,6 @@ void copy_page(void *to, void *from);
 
 #endif	/* !__ASSEMBLY__ */
 
+#define __HAVE_ARCH_GATE_AREA 1
+
 #endif /* _ASM_X86_PAGE_64_H */
diff --git a/arch/x86/um/asm/elf.h b/arch/x86/um/asm/elf.h
index 0feee2fd5077..25a1022dd793 100644
--- a/arch/x86/um/asm/elf.h
+++ b/arch/x86/um/asm/elf.h
@@ -216,6 +216,5 @@ extern long elf_aux_hwcap;
 #define ELF_HWCAP (elf_aux_hwcap)
 
 #define SET_PERSONALITY(ex) do ; while(0)
-#define __HAVE_ARCH_GATE_AREA 1
 
 #endif
diff --git a/arch/x86/um/mem_64.c b/arch/x86/um/mem_64.c
index c6492e75797b..f8fecaddcc0d 100644
--- a/arch/x86/um/mem_64.c
+++ b/arch/x86/um/mem_64.c
@@ -9,18 +9,3 @@ const char *arch_vma_name(struct vm_area_struct *vma)
 
 	return NULL;
 }
-
-struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
-{
-	return NULL;
-}
-
-int in_gate_area(struct mm_struct *mm, unsigned long addr)
-{
-	return 0;
-}
-
-int in_gate_area_no_mm(unsigned long addr)
-{
-	return 0;
-}
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index e4f7781ee162..e904c270573b 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -115,23 +115,6 @@ static __init int ia32_binfmt_init(void)
 	return 0;
 }
 __initcall(ia32_binfmt_init);
-#endif
-
-#else  /* CONFIG_X86_32 */
-
-struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
-{
-	return NULL;
-}
-
-int in_gate_area(struct mm_struct *mm, unsigned long addr)
-{
-	return 0;
-}
-
-int in_gate_area_no_mm(unsigned long addr)
-{
-	return 0;
-}
+#endif /* CONFIG_SYSCTL */
 
 #endif	/* CONFIG_X86_64 */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index e03dd29145a0..8981cc882ed2 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2014,13 +2014,20 @@ static inline bool kernel_page_present(struct page *page) { return true; }
 #endif /* CONFIG_HIBERNATION */
 #endif
 
+#ifdef __HAVE_ARCH_GATE_AREA
 extern struct vm_area_struct *get_gate_vma(struct mm_struct *mm);
-#ifdef	__HAVE_ARCH_GATE_AREA
-int in_gate_area_no_mm(unsigned long addr);
-int in_gate_area(struct mm_struct *mm, unsigned long addr);
+extern int in_gate_area_no_mm(unsigned long addr);
+extern int in_gate_area(struct mm_struct *mm, unsigned long addr);
 #else
-int in_gate_area_no_mm(unsigned long addr);
-#define in_gate_area(mm, addr) ({(void)mm; in_gate_area_no_mm(addr);})
+static inline struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
+{
+	return NULL;
+}
+static inline int in_gate_area_no_mm(unsigned long addr) { return 0; }
+static inline int in_gate_area(struct mm_struct *mm, unsigned long addr)
+{
+	return 0;
+}
 #endif	/* __HAVE_ARCH_GATE_AREA */
 
 #ifdef CONFIG_SYSCTL
diff --git a/mm/memory.c b/mm/memory.c
index 2a899e4e82ba..ab3537bcfed2 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3430,44 +3430,6 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
 }
 #endif /* __PAGETABLE_PMD_FOLDED */
 
-#if !defined(__HAVE_ARCH_GATE_AREA)
-
-#if defined(AT_SYSINFO_EHDR)
-static struct vm_area_struct gate_vma;
-
-static int __init gate_vma_init(void)
-{
-	gate_vma.vm_mm = NULL;
-	gate_vma.vm_start = FIXADDR_USER_START;
-	gate_vma.vm_end = FIXADDR_USER_END;
-	gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC;
-	gate_vma.vm_page_prot = __P101;
-
-	return 0;
-}
-__initcall(gate_vma_init);
-#endif
-
-struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
-{
-#ifdef AT_SYSINFO_EHDR
-	return &gate_vma;
-#else
-	return NULL;
-#endif
-}
-
-int in_gate_area_no_mm(unsigned long addr)
-{
-#ifdef AT_SYSINFO_EHDR
-	if ((addr >= FIXADDR_USER_START) && (addr < FIXADDR_USER_END))
-		return 1;
-#endif
-	return 0;
-}
-
-#endif	/* __HAVE_ARCH_GATE_AREA */
-
 static int __follow_pte(struct mm_struct *mm, unsigned long address,
 		pte_t **ptepp, spinlock_t **ptlp)
 {
diff --git a/mm/nommu.c b/mm/nommu.c
index 4a852f6c5709..a881d9673c6b 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1981,11 +1981,6 @@ error:
 	return -ENOMEM;
 }
 
-int in_gate_area_no_mm(unsigned long addr)
-{
-	return 0;
-}
-
 int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
 	BUG();
-- 
cgit v1.2.3


From 4bb5f5d9395bc112d93a134d8f5b05611eddc9c0 Mon Sep 17 00:00:00 2001
From: David Herrmann <dh.herrmann@gmail.com>
Date: Fri, 8 Aug 2014 14:25:25 -0700
Subject: mm: allow drivers to prevent new writable mappings

This patch (of 6):

The i_mmap_writable field counts existing writable mappings of an
address_space.  To allow drivers to prevent new writable mappings, make
this counter signed and prevent new writable mappings if it is negative.
This is modelled after i_writecount and DENYWRITE.

This will be required by the shmem-sealing infrastructure to prevent any
new writable mappings after the WRITE seal has been set.  In case there
exists a writable mapping, this operation will fail with EBUSY.

Note that we rely on the fact that iff you already own a writable mapping,
you can increase the counter without using the helpers.  This is the same
that we do for i_writecount.

Signed-off-by: David Herrmann <dh.herrmann@gmail.com>
Acked-by: Hugh Dickins <hughd@google.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Ryan Lortie <desrt@desrt.ca>
Cc: Lennart Poettering <lennart@poettering.net>
Cc: Daniel Mack <zonque@gmail.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/inode.c         |  1 +
 include/linux/fs.h | 29 +++++++++++++++++++++++++++--
 kernel/fork.c      |  2 +-
 mm/mmap.c          | 30 ++++++++++++++++++++++++------
 mm/swap_state.c    |  1 +
 5 files changed, 54 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/fs/inode.c b/fs/inode.c
index 5938f3928944..26753ba7b6d6 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -165,6 +165,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
 	mapping->a_ops = &empty_aops;
 	mapping->host = inode;
 	mapping->flags = 0;
+	atomic_set(&mapping->i_mmap_writable, 0);
 	mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE);
 	mapping->private_data = NULL;
 	mapping->backing_dev_info = &default_backing_dev_info;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1ab6c6913040..f0890e4a7c25 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -387,7 +387,7 @@ struct address_space {
 	struct inode		*host;		/* owner: inode, block_device */
 	struct radix_tree_root	page_tree;	/* radix tree of all pages */
 	spinlock_t		tree_lock;	/* and lock protecting it */
-	unsigned int		i_mmap_writable;/* count VM_SHARED mappings */
+	atomic_t		i_mmap_writable;/* count VM_SHARED mappings */
 	struct rb_root		i_mmap;		/* tree of private and shared mappings */
 	struct list_head	i_mmap_nonlinear;/*list VM_NONLINEAR mappings */
 	struct mutex		i_mmap_mutex;	/* protect tree, count, list */
@@ -470,10 +470,35 @@ static inline int mapping_mapped(struct address_space *mapping)
  * Note that i_mmap_writable counts all VM_SHARED vmas: do_mmap_pgoff
  * marks vma as VM_SHARED if it is shared, and the file was opened for
  * writing i.e. vma may be mprotected writable even if now readonly.
+ *
+ * If i_mmap_writable is negative, no new writable mappings are allowed. You
+ * can only deny writable mappings, if none exists right now.
  */
 static inline int mapping_writably_mapped(struct address_space *mapping)
 {
-	return mapping->i_mmap_writable != 0;
+	return atomic_read(&mapping->i_mmap_writable) > 0;
+}
+
+static inline int mapping_map_writable(struct address_space *mapping)
+{
+	return atomic_inc_unless_negative(&mapping->i_mmap_writable) ?
+		0 : -EPERM;
+}
+
+static inline void mapping_unmap_writable(struct address_space *mapping)
+{
+	atomic_dec(&mapping->i_mmap_writable);
+}
+
+static inline int mapping_deny_writable(struct address_space *mapping)
+{
+	return atomic_dec_unless_positive(&mapping->i_mmap_writable) ?
+		0 : -EBUSY;
+}
+
+static inline void mapping_allow_writable(struct address_space *mapping)
+{
+	atomic_inc(&mapping->i_mmap_writable);
 }
 
 /*
diff --git a/kernel/fork.c b/kernel/fork.c
index fa9124322cd4..1380d8ace334 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -429,7 +429,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
 				atomic_dec(&inode->i_writecount);
 			mutex_lock(&mapping->i_mmap_mutex);
 			if (tmp->vm_flags & VM_SHARED)
-				mapping->i_mmap_writable++;
+				atomic_inc(&mapping->i_mmap_writable);
 			flush_dcache_mmap_lock(mapping);
 			/* insert tmp into the share list, just after mpnt */
 			if (unlikely(tmp->vm_flags & VM_NONLINEAR))
diff --git a/mm/mmap.c b/mm/mmap.c
index 64c9d736155c..c1f2ea4a0b99 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -221,7 +221,7 @@ static void __remove_shared_vm_struct(struct vm_area_struct *vma,
 	if (vma->vm_flags & VM_DENYWRITE)
 		atomic_inc(&file_inode(file)->i_writecount);
 	if (vma->vm_flags & VM_SHARED)
-		mapping->i_mmap_writable--;
+		mapping_unmap_writable(mapping);
 
 	flush_dcache_mmap_lock(mapping);
 	if (unlikely(vma->vm_flags & VM_NONLINEAR))
@@ -622,7 +622,7 @@ static void __vma_link_file(struct vm_area_struct *vma)
 		if (vma->vm_flags & VM_DENYWRITE)
 			atomic_dec(&file_inode(file)->i_writecount);
 		if (vma->vm_flags & VM_SHARED)
-			mapping->i_mmap_writable++;
+			atomic_inc(&mapping->i_mmap_writable);
 
 		flush_dcache_mmap_lock(mapping);
 		if (unlikely(vma->vm_flags & VM_NONLINEAR))
@@ -1577,6 +1577,17 @@ munmap_back:
 			if (error)
 				goto free_vma;
 		}
+		if (vm_flags & VM_SHARED) {
+			error = mapping_map_writable(file->f_mapping);
+			if (error)
+				goto allow_write_and_free_vma;
+		}
+
+		/* ->mmap() can change vma->vm_file, but must guarantee that
+		 * vma_link() below can deny write-access if VM_DENYWRITE is set
+		 * and map writably if VM_SHARED is set. This usually means the
+		 * new file must not have been exposed to user-space, yet.
+		 */
 		vma->vm_file = get_file(file);
 		error = file->f_op->mmap(file, vma);
 		if (error)
@@ -1616,8 +1627,12 @@ munmap_back:
 
 	vma_link(mm, vma, prev, rb_link, rb_parent);
 	/* Once vma denies write, undo our temporary denial count */
-	if (vm_flags & VM_DENYWRITE)
-		allow_write_access(file);
+	if (file) {
+		if (vm_flags & VM_SHARED)
+			mapping_unmap_writable(file->f_mapping);
+		if (vm_flags & VM_DENYWRITE)
+			allow_write_access(file);
+	}
 	file = vma->vm_file;
 out:
 	perf_event_mmap(vma);
@@ -1646,14 +1661,17 @@ out:
 	return addr;
 
 unmap_and_free_vma:
-	if (vm_flags & VM_DENYWRITE)
-		allow_write_access(file);
 	vma->vm_file = NULL;
 	fput(file);
 
 	/* Undo any partial mapping done by a device driver. */
 	unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
 	charged = 0;
+	if (vm_flags & VM_SHARED)
+		mapping_unmap_writable(file->f_mapping);
+allow_write_and_free_vma:
+	if (vm_flags & VM_DENYWRITE)
+		allow_write_access(file);
 free_vma:
 	kmem_cache_free(vm_area_cachep, vma);
 unacct_error:
diff --git a/mm/swap_state.c b/mm/swap_state.c
index e160151da6b8..3e0ec83d000c 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -39,6 +39,7 @@ static struct backing_dev_info swap_backing_dev_info = {
 struct address_space swapper_spaces[MAX_SWAPFILES] = {
 	[0 ... MAX_SWAPFILES - 1] = {
 		.page_tree	= RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN),
+		.i_mmap_writable = ATOMIC_INIT(0),
 		.a_ops		= &swap_aops,
 		.backing_dev_info = &swap_backing_dev_info,
 	}
-- 
cgit v1.2.3


From 40e041a2c858b3caefc757e26cb85bfceae5062b Mon Sep 17 00:00:00 2001
From: David Herrmann <dh.herrmann@gmail.com>
Date: Fri, 8 Aug 2014 14:25:27 -0700
Subject: shm: add sealing API

If two processes share a common memory region, they usually want some
guarantees to allow safe access. This often includes:
  - one side cannot overwrite data while the other reads it
  - one side cannot shrink the buffer while the other accesses it
  - one side cannot grow the buffer beyond previously set boundaries

If there is a trust-relationship between both parties, there is no need
for policy enforcement.  However, if there's no trust relationship (eg.,
for general-purpose IPC) sharing memory-regions is highly fragile and
often not possible without local copies.  Look at the following two
use-cases:

  1) A graphics client wants to share its rendering-buffer with a
     graphics-server. The memory-region is allocated by the client for
     read/write access and a second FD is passed to the server. While
     scanning out from the memory region, the server has no guarantee that
     the client doesn't shrink the buffer at any time, requiring rather
     cumbersome SIGBUS handling.
  2) A process wants to perform an RPC on another process. To avoid huge
     bandwidth consumption, zero-copy is preferred. After a message is
     assembled in-memory and a FD is passed to the remote side, both sides
     want to be sure that neither modifies this shared copy, anymore. The
     source may have put sensible data into the message without a separate
     copy and the target may want to parse the message inline, to avoid a
     local copy.

While SIGBUS handling, POSIX mandatory locking and MAP_DENYWRITE provide
ways to achieve most of this, the first one is unproportionally ugly to
use in libraries and the latter two are broken/racy or even disabled due
to denial of service attacks.

This patch introduces the concept of SEALING.  If you seal a file, a
specific set of operations is blocked on that file forever.  Unlike locks,
seals can only be set, never removed.  Hence, once you verified a specific
set of seals is set, you're guaranteed that no-one can perform the blocked
operations on this file, anymore.

An initial set of SEALS is introduced by this patch:
  - SHRINK: If SEAL_SHRINK is set, the file in question cannot be reduced
            in size. This affects ftruncate() and open(O_TRUNC).
  - GROW: If SEAL_GROW is set, the file in question cannot be increased
          in size. This affects ftruncate(), fallocate() and write().
  - WRITE: If SEAL_WRITE is set, no write operations (besides resizing)
           are possible. This affects fallocate(PUNCH_HOLE), mmap() and
           write().
  - SEAL: If SEAL_SEAL is set, no further seals can be added to a file.
          This basically prevents the F_ADD_SEAL operation on a file and
          can be set to prevent others from adding further seals that you
          don't want.

The described use-cases can easily use these seals to provide safe use
without any trust-relationship:

  1) The graphics server can verify that a passed file-descriptor has
     SEAL_SHRINK set. This allows safe scanout, while the client is
     allowed to increase buffer size for window-resizing on-the-fly.
     Concurrent writes are explicitly allowed.
  2) For general-purpose IPC, both processes can verify that SEAL_SHRINK,
     SEAL_GROW and SEAL_WRITE are set. This guarantees that neither
     process can modify the data while the other side parses it.
     Furthermore, it guarantees that even with writable FDs passed to the
     peer, it cannot increase the size to hit memory-limits of the source
     process (in case the file-storage is accounted to the source).

The new API is an extension to fcntl(), adding two new commands:
  F_GET_SEALS: Return a bitset describing the seals on the file. This
               can be called on any FD if the underlying file supports
               sealing.
  F_ADD_SEALS: Change the seals of a given file. This requires WRITE
               access to the file and F_SEAL_SEAL may not already be set.
               Furthermore, the underlying file must support sealing and
               there may not be any existing shared mapping of that file.
               Otherwise, EBADF/EPERM is returned.
               The given seals are _added_ to the existing set of seals
               on the file. You cannot remove seals again.

The fcntl() handler is currently specific to shmem and disabled on all
files. A file needs to explicitly support sealing for this interface to
work. A separate syscall is added in a follow-up, which creates files that
support sealing. There is no intention to support this on other
file-systems. Semantics are unclear for non-volatile files and we lack any
use-case right now. Therefore, the implementation is specific to shmem.

Signed-off-by: David Herrmann <dh.herrmann@gmail.com>
Acked-by: Hugh Dickins <hughd@google.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Ryan Lortie <desrt@desrt.ca>
Cc: Lennart Poettering <lennart@poettering.net>
Cc: Daniel Mack <zonque@gmail.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/fcntl.c                 |   5 ++
 include/linux/shmem_fs.h   |  17 ++++++
 include/uapi/linux/fcntl.h |  15 +++++
 mm/shmem.c                 | 143 +++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 180 insertions(+)

(limited to 'include/linux')

diff --git a/fs/fcntl.c b/fs/fcntl.c
index 72c82f69b01b..22d1c3df61ac 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -21,6 +21,7 @@
 #include <linux/rcupdate.h>
 #include <linux/pid_namespace.h>
 #include <linux/user_namespace.h>
+#include <linux/shmem_fs.h>
 
 #include <asm/poll.h>
 #include <asm/siginfo.h>
@@ -336,6 +337,10 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
 	case F_GETPIPE_SZ:
 		err = pipe_fcntl(filp, cmd, arg);
 		break;
+	case F_ADD_SEALS:
+	case F_GET_SEALS:
+		err = shmem_fcntl(filp, cmd, arg);
+		break;
 	default:
 		break;
 	}
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 4d1771c2d29f..50777b5b1e4c 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -1,6 +1,7 @@
 #ifndef __SHMEM_FS_H
 #define __SHMEM_FS_H
 
+#include <linux/file.h>
 #include <linux/swap.h>
 #include <linux/mempolicy.h>
 #include <linux/pagemap.h>
@@ -11,6 +12,7 @@
 
 struct shmem_inode_info {
 	spinlock_t		lock;
+	unsigned int		seals;		/* shmem seals */
 	unsigned long		flags;
 	unsigned long		alloced;	/* data pages alloced to file */
 	union {
@@ -65,4 +67,19 @@ static inline struct page *shmem_read_mapping_page(
 					mapping_gfp_mask(mapping));
 }
 
+#ifdef CONFIG_TMPFS
+
+extern int shmem_add_seals(struct file *file, unsigned int seals);
+extern int shmem_get_seals(struct file *file);
+extern long shmem_fcntl(struct file *file, unsigned int cmd, unsigned long arg);
+
+#else
+
+static inline long shmem_fcntl(struct file *f, unsigned int c, unsigned long a)
+{
+	return -EINVAL;
+}
+
+#endif
+
 #endif
diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h
index 074b886c6be0..beed138bd359 100644
--- a/include/uapi/linux/fcntl.h
+++ b/include/uapi/linux/fcntl.h
@@ -27,6 +27,21 @@
 #define F_SETPIPE_SZ	(F_LINUX_SPECIFIC_BASE + 7)
 #define F_GETPIPE_SZ	(F_LINUX_SPECIFIC_BASE + 8)
 
+/*
+ * Set/Get seals
+ */
+#define F_ADD_SEALS	(F_LINUX_SPECIFIC_BASE + 9)
+#define F_GET_SEALS	(F_LINUX_SPECIFIC_BASE + 10)
+
+/*
+ * Types of seals
+ */
+#define F_SEAL_SEAL	0x0001	/* prevent further seals from being set */
+#define F_SEAL_SHRINK	0x0002	/* prevent file from shrinking */
+#define F_SEAL_GROW	0x0004	/* prevent file from growing */
+#define F_SEAL_WRITE	0x0008	/* prevent writes */
+/* (1U << 31) is reserved for signed error codes */
+
 /*
  * Types of directory notifications that may be requested.
  */
diff --git a/mm/shmem.c b/mm/shmem.c
index 6dc80d298f9d..8b43bb7a4efe 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -66,6 +66,7 @@ static struct vfsmount *shm_mnt;
 #include <linux/highmem.h>
 #include <linux/seq_file.h>
 #include <linux/magic.h>
+#include <linux/fcntl.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -547,6 +548,7 @@ EXPORT_SYMBOL_GPL(shmem_truncate_range);
 static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
 {
 	struct inode *inode = dentry->d_inode;
+	struct shmem_inode_info *info = SHMEM_I(inode);
 	int error;
 
 	error = inode_change_ok(inode, attr);
@@ -557,6 +559,11 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
 		loff_t oldsize = inode->i_size;
 		loff_t newsize = attr->ia_size;
 
+		/* protected by i_mutex */
+		if ((newsize < oldsize && (info->seals & F_SEAL_SHRINK)) ||
+		    (newsize > oldsize && (info->seals & F_SEAL_GROW)))
+			return -EPERM;
+
 		if (newsize != oldsize) {
 			error = shmem_reacct_size(SHMEM_I(inode)->flags,
 					oldsize, newsize);
@@ -1412,6 +1419,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode
 		info = SHMEM_I(inode);
 		memset(info, 0, (char *)inode - (char *)info);
 		spin_lock_init(&info->lock);
+		info->seals = F_SEAL_SEAL;
 		info->flags = flags & VM_NORESERVE;
 		INIT_LIST_HEAD(&info->swaplist);
 		simple_xattrs_init(&info->xattrs);
@@ -1470,7 +1478,17 @@ shmem_write_begin(struct file *file, struct address_space *mapping,
 			struct page **pagep, void **fsdata)
 {
 	struct inode *inode = mapping->host;
+	struct shmem_inode_info *info = SHMEM_I(inode);
 	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+
+	/* i_mutex is held by caller */
+	if (unlikely(info->seals)) {
+		if (info->seals & F_SEAL_WRITE)
+			return -EPERM;
+		if ((info->seals & F_SEAL_GROW) && pos + len > inode->i_size)
+			return -EPERM;
+	}
+
 	return shmem_getpage(inode, index, pagep, SGP_WRITE, NULL);
 }
 
@@ -1808,11 +1826,125 @@ static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence)
 	return offset;
 }
 
+static int shmem_wait_for_pins(struct address_space *mapping)
+{
+	return 0;
+}
+
+#define F_ALL_SEALS (F_SEAL_SEAL | \
+		     F_SEAL_SHRINK | \
+		     F_SEAL_GROW | \
+		     F_SEAL_WRITE)
+
+int shmem_add_seals(struct file *file, unsigned int seals)
+{
+	struct inode *inode = file_inode(file);
+	struct shmem_inode_info *info = SHMEM_I(inode);
+	int error;
+
+	/*
+	 * SEALING
+	 * Sealing allows multiple parties to share a shmem-file but restrict
+	 * access to a specific subset of file operations. Seals can only be
+	 * added, but never removed. This way, mutually untrusted parties can
+	 * share common memory regions with a well-defined policy. A malicious
+	 * peer can thus never perform unwanted operations on a shared object.
+	 *
+	 * Seals are only supported on special shmem-files and always affect
+	 * the whole underlying inode. Once a seal is set, it may prevent some
+	 * kinds of access to the file. Currently, the following seals are
+	 * defined:
+	 *   SEAL_SEAL: Prevent further seals from being set on this file
+	 *   SEAL_SHRINK: Prevent the file from shrinking
+	 *   SEAL_GROW: Prevent the file from growing
+	 *   SEAL_WRITE: Prevent write access to the file
+	 *
+	 * As we don't require any trust relationship between two parties, we
+	 * must prevent seals from being removed. Therefore, sealing a file
+	 * only adds a given set of seals to the file, it never touches
+	 * existing seals. Furthermore, the "setting seals"-operation can be
+	 * sealed itself, which basically prevents any further seal from being
+	 * added.
+	 *
+	 * Semantics of sealing are only defined on volatile files. Only
+	 * anonymous shmem files support sealing. More importantly, seals are
+	 * never written to disk. Therefore, there's no plan to support it on
+	 * other file types.
+	 */
+
+	if (file->f_op != &shmem_file_operations)
+		return -EINVAL;
+	if (!(file->f_mode & FMODE_WRITE))
+		return -EPERM;
+	if (seals & ~(unsigned int)F_ALL_SEALS)
+		return -EINVAL;
+
+	mutex_lock(&inode->i_mutex);
+
+	if (info->seals & F_SEAL_SEAL) {
+		error = -EPERM;
+		goto unlock;
+	}
+
+	if ((seals & F_SEAL_WRITE) && !(info->seals & F_SEAL_WRITE)) {
+		error = mapping_deny_writable(file->f_mapping);
+		if (error)
+			goto unlock;
+
+		error = shmem_wait_for_pins(file->f_mapping);
+		if (error) {
+			mapping_allow_writable(file->f_mapping);
+			goto unlock;
+		}
+	}
+
+	info->seals |= seals;
+	error = 0;
+
+unlock:
+	mutex_unlock(&inode->i_mutex);
+	return error;
+}
+EXPORT_SYMBOL_GPL(shmem_add_seals);
+
+int shmem_get_seals(struct file *file)
+{
+	if (file->f_op != &shmem_file_operations)
+		return -EINVAL;
+
+	return SHMEM_I(file_inode(file))->seals;
+}
+EXPORT_SYMBOL_GPL(shmem_get_seals);
+
+long shmem_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	long error;
+
+	switch (cmd) {
+	case F_ADD_SEALS:
+		/* disallow upper 32bit */
+		if (arg > UINT_MAX)
+			return -EINVAL;
+
+		error = shmem_add_seals(file, arg);
+		break;
+	case F_GET_SEALS:
+		error = shmem_get_seals(file);
+		break;
+	default:
+		error = -EINVAL;
+		break;
+	}
+
+	return error;
+}
+
 static long shmem_fallocate(struct file *file, int mode, loff_t offset,
 							 loff_t len)
 {
 	struct inode *inode = file_inode(file);
 	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
+	struct shmem_inode_info *info = SHMEM_I(inode);
 	struct shmem_falloc shmem_falloc;
 	pgoff_t start, index, end;
 	int error;
@@ -1828,6 +1960,12 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
 		loff_t unmap_end = round_down(offset + len, PAGE_SIZE) - 1;
 		DECLARE_WAIT_QUEUE_HEAD_ONSTACK(shmem_falloc_waitq);
 
+		/* protected by i_mutex */
+		if (info->seals & F_SEAL_WRITE) {
+			error = -EPERM;
+			goto out;
+		}
+
 		shmem_falloc.waitq = &shmem_falloc_waitq;
 		shmem_falloc.start = unmap_start >> PAGE_SHIFT;
 		shmem_falloc.next = (unmap_end + 1) >> PAGE_SHIFT;
@@ -1854,6 +1992,11 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
 	if (error)
 		goto out;
 
+	if ((info->seals & F_SEAL_GROW) && offset + len > inode->i_size) {
+		error = -EPERM;
+		goto out;
+	}
+
 	start = offset >> PAGE_CACHE_SHIFT;
 	end = (offset + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 	/* Try to avoid a swapstorm if len is impossible to satisfy */
-- 
cgit v1.2.3


From 9183df25fe7b194563db3fec6dc3202a5855839c Mon Sep 17 00:00:00 2001
From: David Herrmann <dh.herrmann@gmail.com>
Date: Fri, 8 Aug 2014 14:25:29 -0700
Subject: shm: add memfd_create() syscall

memfd_create() is similar to mmap(MAP_ANON), but returns a file-descriptor
that you can pass to mmap().  It can support sealing and avoids any
connection to user-visible mount-points.  Thus, it's not subject to quotas
on mounted file-systems, but can be used like malloc()'ed memory, but with
a file-descriptor to it.

memfd_create() returns the raw shmem file, so calls like ftruncate() can
be used to modify the underlying inode.  Also calls like fstat() will
return proper information and mark the file as regular file.  If you want
sealing, you can specify MFD_ALLOW_SEALING.  Otherwise, sealing is not
supported (like on all other regular files).

Compared to O_TMPFILE, it does not require a tmpfs mount-point and is not
subject to a filesystem size limit.  It is still properly accounted to
memcg limits, though, and to the same overcommit or no-overcommit
accounting as all user memory.

Signed-off-by: David Herrmann <dh.herrmann@gmail.com>
Acked-by: Hugh Dickins <hughd@google.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Ryan Lortie <desrt@desrt.ca>
Cc: Lennart Poettering <lennart@poettering.net>
Cc: Daniel Mack <zonque@gmail.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/syscalls/syscall_32.tbl |  1 +
 arch/x86/syscalls/syscall_64.tbl |  1 +
 include/linux/syscalls.h         |  1 +
 include/uapi/linux/memfd.h       |  8 +++++
 kernel/sys_ni.c                  |  1 +
 mm/shmem.c                       | 73 ++++++++++++++++++++++++++++++++++++++++
 6 files changed, 85 insertions(+)
 create mode 100644 include/uapi/linux/memfd.h

(limited to 'include/linux')

diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index d1b4a119d4a5..028b78168d85 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -362,3 +362,4 @@
 353	i386	renameat2		sys_renameat2
 354	i386	seccomp			sys_seccomp
 355	i386	getrandom		sys_getrandom
+356	i386	memfd_create		sys_memfd_create
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index 252c804bb1aa..ca2b9aa78c81 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -325,6 +325,7 @@
 316	common	renameat2		sys_renameat2
 317	common	seccomp			sys_seccomp
 318	common	getrandom		sys_getrandom
+319	common	memfd_create		sys_memfd_create
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 701daff5d899..15a069425cbf 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -802,6 +802,7 @@ asmlinkage long sys_timerfd_settime(int ufd, int flags,
 asmlinkage long sys_timerfd_gettime(int ufd, struct itimerspec __user *otmr);
 asmlinkage long sys_eventfd(unsigned int count);
 asmlinkage long sys_eventfd2(unsigned int count, int flags);
+asmlinkage long sys_memfd_create(const char __user *uname_ptr, unsigned int flags);
 asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len);
 asmlinkage long sys_old_readdir(unsigned int, struct old_linux_dirent __user *, unsigned int);
 asmlinkage long sys_pselect6(int, fd_set __user *, fd_set __user *,
diff --git a/include/uapi/linux/memfd.h b/include/uapi/linux/memfd.h
new file mode 100644
index 000000000000..534e364bda92
--- /dev/null
+++ b/include/uapi/linux/memfd.h
@@ -0,0 +1,8 @@
+#ifndef _UAPI_LINUX_MEMFD_H
+#define _UAPI_LINUX_MEMFD_H
+
+/* flags for memfd_create(2) (unsigned int) */
+#define MFD_CLOEXEC		0x0001U
+#define MFD_ALLOW_SEALING	0x0002U
+
+#endif /* _UAPI_LINUX_MEMFD_H */
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 2904a2105914..1f79e3714533 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -197,6 +197,7 @@ cond_syscall(compat_sys_timerfd_settime);
 cond_syscall(compat_sys_timerfd_gettime);
 cond_syscall(sys_eventfd);
 cond_syscall(sys_eventfd2);
+cond_syscall(sys_memfd_create);
 
 /* performance counters: */
 cond_syscall(sys_perf_event_open);
diff --git a/mm/shmem.c b/mm/shmem.c
index 8b43bb7a4efe..4a5498795a2b 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -66,7 +66,9 @@ static struct vfsmount *shm_mnt;
 #include <linux/highmem.h>
 #include <linux/seq_file.h>
 #include <linux/magic.h>
+#include <linux/syscalls.h>
 #include <linux/fcntl.h>
+#include <uapi/linux/memfd.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -2732,6 +2734,77 @@ static int shmem_show_options(struct seq_file *seq, struct dentry *root)
 	shmem_show_mpol(seq, sbinfo->mpol);
 	return 0;
 }
+
+#define MFD_NAME_PREFIX "memfd:"
+#define MFD_NAME_PREFIX_LEN (sizeof(MFD_NAME_PREFIX) - 1)
+#define MFD_NAME_MAX_LEN (NAME_MAX - MFD_NAME_PREFIX_LEN)
+
+#define MFD_ALL_FLAGS (MFD_CLOEXEC | MFD_ALLOW_SEALING)
+
+SYSCALL_DEFINE2(memfd_create,
+		const char __user *, uname,
+		unsigned int, flags)
+{
+	struct shmem_inode_info *info;
+	struct file *file;
+	int fd, error;
+	char *name;
+	long len;
+
+	if (flags & ~(unsigned int)MFD_ALL_FLAGS)
+		return -EINVAL;
+
+	/* length includes terminating zero */
+	len = strnlen_user(uname, MFD_NAME_MAX_LEN + 1);
+	if (len <= 0)
+		return -EFAULT;
+	if (len > MFD_NAME_MAX_LEN + 1)
+		return -EINVAL;
+
+	name = kmalloc(len + MFD_NAME_PREFIX_LEN, GFP_TEMPORARY);
+	if (!name)
+		return -ENOMEM;
+
+	strcpy(name, MFD_NAME_PREFIX);
+	if (copy_from_user(&name[MFD_NAME_PREFIX_LEN], uname, len)) {
+		error = -EFAULT;
+		goto err_name;
+	}
+
+	/* terminating-zero may have changed after strnlen_user() returned */
+	if (name[len + MFD_NAME_PREFIX_LEN - 1]) {
+		error = -EFAULT;
+		goto err_name;
+	}
+
+	fd = get_unused_fd_flags((flags & MFD_CLOEXEC) ? O_CLOEXEC : 0);
+	if (fd < 0) {
+		error = fd;
+		goto err_name;
+	}
+
+	file = shmem_file_setup(name, 0, VM_NORESERVE);
+	if (IS_ERR(file)) {
+		error = PTR_ERR(file);
+		goto err_fd;
+	}
+	info = SHMEM_I(file_inode(file));
+	file->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE;
+	file->f_flags |= O_RDWR | O_LARGEFILE;
+	if (flags & MFD_ALLOW_SEALING)
+		info->seals &= ~F_SEAL_SEAL;
+
+	fd_install(fd, file);
+	kfree(name);
+	return fd;
+
+err_fd:
+	put_unused_fd(fd);
+err_name:
+	kfree(name);
+	return error;
+}
+
 #endif /* CONFIG_TMPFS */
 
 static void shmem_put_super(struct super_block *sb)
-- 
cgit v1.2.3


From 7d3e2bca22feb1f4a624009ff6c15e6f724cb4e7 Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@redhat.com>
Date: Fri, 8 Aug 2014 14:25:43 -0700
Subject: kexec: rename unusebale_pages to unusable_pages

Let's use the more common "unusable".

This patch was originally written and posted by Boris. I am including it
in this patch series.

Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Matthew Garrett <mjg59@srcf.ucam.org>
Cc: Greg Kroah-Hartman <greg@kroah.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: WANG Chao <chaowang@redhat.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kexec.h | 2 +-
 kernel/kexec.c        | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index a75641930049..d9bb0a57d208 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -100,7 +100,7 @@ struct kimage {
 
 	struct list_head control_pages;
 	struct list_head dest_pages;
-	struct list_head unuseable_pages;
+	struct list_head unusable_pages;
 
 	/* Address of next control page to allocate for crash kernels. */
 	unsigned long control_page;
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 4b8f0c925884..c7cc2a00181c 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -154,7 +154,7 @@ static int do_kimage_alloc(struct kimage **rimage, unsigned long entry,
 	INIT_LIST_HEAD(&image->dest_pages);
 
 	/* Initialize the list of unusable pages */
-	INIT_LIST_HEAD(&image->unuseable_pages);
+	INIT_LIST_HEAD(&image->unusable_pages);
 
 	/* Read in the segments */
 	image->nr_segments = nr_segments;
@@ -609,7 +609,7 @@ static void kimage_free_extra_pages(struct kimage *image)
 	kimage_free_page_list(&image->dest_pages);
 
 	/* Walk through and free any unusable pages I have cached */
-	kimage_free_page_list(&image->unuseable_pages);
+	kimage_free_page_list(&image->unusable_pages);
 
 }
 static void kimage_terminate(struct kimage *image)
@@ -732,7 +732,7 @@ static struct page *kimage_alloc_page(struct kimage *image,
 		/* If the page cannot be used file it away */
 		if (page_to_pfn(page) >
 				(KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) {
-			list_add(&page->lru, &image->unuseable_pages);
+			list_add(&page->lru, &image->unusable_pages);
 			continue;
 		}
 		addr = page_to_pfn(page) << PAGE_SHIFT;
-- 
cgit v1.2.3


From 8c86e70acead629aacb4afcd818add66bf6844d9 Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@redhat.com>
Date: Fri, 8 Aug 2014 14:25:50 -0700
Subject: resource: provide new functions to walk through resources

I have added two more functions to walk through resources.

Currently walk_system_ram_range() deals with pfn and /proc/iomem can
contain partial pages.  By dealing in pfn, callback function loses the
info that last page of a memory range is a partial page and not the full
page.  So I implemented walk_system_ram_res() which returns u64 values to
callback functions and now it properly return start and end address.

walk_system_ram_range() uses find_next_system_ram() to find the next ram
resource.  This in turn only travels through siblings of top level child
and does not travers through all the nodes of the resoruce tree.  I also
need another function where I can walk through all the resources, for
example figure out where "GART" aperture is.  Figure out where ACPI memory
is.

So I wrote another function walk_iomem_res() which walks through all
/proc/iomem resources and returns matches as asked by caller.  Caller can
specify "name" of resource, start and end and flags.

Got rid of find_next_system_ram_res() and instead implemented more generic
find_next_iomem_res() which can be used to traverse top level children
only based on an argument.

Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Borislav Petkov <bp@suse.de>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Matthew Garrett <mjg59@srcf.ucam.org>
Cc: Greg Kroah-Hartman <greg@kroah.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: WANG Chao <chaowang@redhat.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ioport.h |   6 +++
 kernel/resource.c      | 101 ++++++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 98 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 5e3a906cc089..142ec544167c 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -237,6 +237,12 @@ extern int iomem_is_exclusive(u64 addr);
 extern int
 walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
 		void *arg, int (*func)(unsigned long, unsigned long, void *));
+extern int
+walk_system_ram_res(u64 start, u64 end, void *arg,
+		    int (*func)(u64, u64, void *));
+extern int
+walk_iomem_res(char *name, unsigned long flags, u64 start, u64 end, void *arg,
+	       int (*func)(u64, u64, void *));
 
 /* True if any part of r1 overlaps r2 */
 static inline bool resource_overlaps(struct resource *r1, struct resource *r2)
diff --git a/kernel/resource.c b/kernel/resource.c
index 3c2237ac32db..da14b8d09296 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -59,10 +59,12 @@ static DEFINE_RWLOCK(resource_lock);
 static struct resource *bootmem_resource_free;
 static DEFINE_SPINLOCK(bootmem_resource_lock);
 
-static void *r_next(struct seq_file *m, void *v, loff_t *pos)
+static struct resource *next_resource(struct resource *p, bool sibling_only)
 {
-	struct resource *p = v;
-	(*pos)++;
+	/* Caller wants to traverse through siblings only */
+	if (sibling_only)
+		return p->sibling;
+
 	if (p->child)
 		return p->child;
 	while (!p->sibling && p->parent)
@@ -70,6 +72,13 @@ static void *r_next(struct seq_file *m, void *v, loff_t *pos)
 	return p->sibling;
 }
 
+static void *r_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct resource *p = v;
+	(*pos)++;
+	return (void *)next_resource(p, false);
+}
+
 #ifdef CONFIG_PROC_FS
 
 enum { MAX_IORES_LEVEL = 5 };
@@ -322,16 +331,19 @@ int release_resource(struct resource *old)
 
 EXPORT_SYMBOL(release_resource);
 
-#if !defined(CONFIG_ARCH_HAS_WALK_MEMORY)
 /*
- * Finds the lowest memory reosurce exists within [res->start.res->end)
+ * Finds the lowest iomem reosurce exists with-in [res->start.res->end)
  * the caller must specify res->start, res->end, res->flags and "name".
  * If found, returns 0, res is overwritten, if not found, returns -1.
+ * This walks through whole tree and not just first level children
+ * until and unless first_level_children_only is true.
  */
-static int find_next_system_ram(struct resource *res, char *name)
+static int find_next_iomem_res(struct resource *res, char *name,
+			       bool first_level_children_only)
 {
 	resource_size_t start, end;
 	struct resource *p;
+	bool sibling_only = false;
 
 	BUG_ON(!res);
 
@@ -340,8 +352,14 @@ static int find_next_system_ram(struct resource *res, char *name)
 	BUG_ON(start >= end);
 
 	read_lock(&resource_lock);
-	for (p = iomem_resource.child; p ; p = p->sibling) {
-		/* system ram is just marked as IORESOURCE_MEM */
+
+	if (first_level_children_only) {
+		p = iomem_resource.child;
+		sibling_only = true;
+	} else
+		p = &iomem_resource;
+
+	while ((p = next_resource(p, sibling_only))) {
 		if (p->flags != res->flags)
 			continue;
 		if (name && strcmp(p->name, name))
@@ -353,6 +371,7 @@ static int find_next_system_ram(struct resource *res, char *name)
 		if ((p->end >= start) && (p->start < end))
 			break;
 	}
+
 	read_unlock(&resource_lock);
 	if (!p)
 		return -1;
@@ -364,6 +383,70 @@ static int find_next_system_ram(struct resource *res, char *name)
 	return 0;
 }
 
+/*
+ * Walks through iomem resources and calls func() with matching resource
+ * ranges. This walks through whole tree and not just first level children.
+ * All the memory ranges which overlap start,end and also match flags and
+ * name are valid candidates.
+ *
+ * @name: name of resource
+ * @flags: resource flags
+ * @start: start addr
+ * @end: end addr
+ */
+int walk_iomem_res(char *name, unsigned long flags, u64 start, u64 end,
+		void *arg, int (*func)(u64, u64, void *))
+{
+	struct resource res;
+	u64 orig_end;
+	int ret = -1;
+
+	res.start = start;
+	res.end = end;
+	res.flags = flags;
+	orig_end = res.end;
+	while ((res.start < res.end) &&
+		(!find_next_iomem_res(&res, name, false))) {
+		ret = (*func)(res.start, res.end, arg);
+		if (ret)
+			break;
+		res.start = res.end + 1;
+		res.end = orig_end;
+	}
+	return ret;
+}
+
+/*
+ * This function calls callback against all memory range of "System RAM"
+ * which are marked as IORESOURCE_MEM and IORESOUCE_BUSY.
+ * Now, this function is only for "System RAM". This function deals with
+ * full ranges and not pfn. If resources are not pfn aligned, dealing
+ * with pfn can truncate ranges.
+ */
+int walk_system_ram_res(u64 start, u64 end, void *arg,
+				int (*func)(u64, u64, void *))
+{
+	struct resource res;
+	u64 orig_end;
+	int ret = -1;
+
+	res.start = start;
+	res.end = end;
+	res.flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+	orig_end = res.end;
+	while ((res.start < res.end) &&
+		(!find_next_iomem_res(&res, "System RAM", true))) {
+		ret = (*func)(res.start, res.end, arg);
+		if (ret)
+			break;
+		res.start = res.end + 1;
+		res.end = orig_end;
+	}
+	return ret;
+}
+
+#if !defined(CONFIG_ARCH_HAS_WALK_MEMORY)
+
 /*
  * This function calls callback against all memory range of "System RAM"
  * which are marked as IORESOURCE_MEM and IORESOUCE_BUSY.
@@ -382,7 +465,7 @@ int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
 	res.flags = IORESOURCE_MEM | IORESOURCE_BUSY;
 	orig_end = res.end;
 	while ((res.start < res.end) &&
-		(find_next_system_ram(&res, "System RAM") >= 0)) {
+		(find_next_iomem_res(&res, "System RAM", true) >= 0)) {
 		pfn = (res.start + PAGE_SIZE - 1) >> PAGE_SHIFT;
 		end_pfn = (res.end + 1) >> PAGE_SHIFT;
 		if (end_pfn > pfn)
-- 
cgit v1.2.3


From 815d5704a337a662bf960757edbff7a0680d40fd Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@redhat.com>
Date: Fri, 8 Aug 2014 14:25:52 -0700
Subject: kexec: make kexec_segment user buffer pointer a union

So far kexec_segment->buf was always a user space pointer as user space
passed the array of kexec_segment structures and kernel copied it.

But with new system call, list of kexec segments will be prepared by
kernel and kexec_segment->buf will point to a kernel memory.

So while I was adding code where I made assumption that ->buf is pointing
to kernel memory, sparse started giving warning.

Make ->buf a union.  And where a user space pointer is expected, access it
using ->buf and where a kernel space pointer is expected, access it using
->kbuf.  That takes care of sparse warnings.

Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Matthew Garrett <mjg59@srcf.ucam.org>
Cc: Greg Kroah-Hartman <greg@kroah.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: WANG Chao <chaowang@redhat.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kexec.h | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index d9bb0a57d208..66d56ac0f64c 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -69,7 +69,18 @@ typedef unsigned long kimage_entry_t;
 #define IND_SOURCE       0x8
 
 struct kexec_segment {
-	void __user *buf;
+	/*
+	 * This pointer can point to user memory if kexec_load() system
+	 * call is used or will point to kernel memory if
+	 * kexec_file_load() system call is used.
+	 *
+	 * Use ->buf when expecting to deal with user memory and use ->kbuf
+	 * when expecting to deal with kernel memory.
+	 */
+	union {
+		void __user *buf;
+		void *kbuf;
+	};
 	size_t bufsz;
 	unsigned long mem;
 	size_t memsz;
-- 
cgit v1.2.3


From f0895685c7fd8c938c91a9d8a6f7c11f22df58d2 Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@redhat.com>
Date: Fri, 8 Aug 2014 14:25:55 -0700
Subject: kexec: new syscall kexec_file_load() declaration

This is the new syscall kexec_file_load() declaration/interface.  I have
reserved the syscall number only for x86_64 so far.  Other architectures
(including i386) can reserve syscall number when they enable the support
for this new syscall.

Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Matthew Garrett <mjg59@srcf.ucam.org>
Cc: Greg Kroah-Hartman <greg@kroah.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: WANG Chao <chaowang@redhat.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/syscalls/syscall_64.tbl | 1 +
 include/linux/syscalls.h         | 4 ++++
 kernel/kexec.c                   | 7 +++++++
 kernel/sys_ni.c                  | 1 +
 4 files changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index ca2b9aa78c81..35dd922727b9 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -326,6 +326,7 @@
 317	common	seccomp			sys_seccomp
 318	common	getrandom		sys_getrandom
 319	common	memfd_create		sys_memfd_create
+320	common	kexec_file_load		sys_kexec_file_load
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 15a069425cbf..0f86d85a9ce4 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -317,6 +317,10 @@ asmlinkage long sys_restart_syscall(void);
 asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments,
 				struct kexec_segment __user *segments,
 				unsigned long flags);
+asmlinkage long sys_kexec_file_load(int kernel_fd, int initrd_fd,
+				    unsigned long cmdline_len,
+				    const char __user *cmdline_ptr,
+				    unsigned long flags);
 
 asmlinkage long sys_exit(int error_code);
 asmlinkage long sys_exit_group(int error_code);
diff --git a/kernel/kexec.c b/kernel/kexec.c
index bfdda316697d..ec4386c1b94f 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -1058,6 +1058,13 @@ COMPAT_SYSCALL_DEFINE4(kexec_load, compat_ulong_t, entry,
 }
 #endif
 
+SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
+		unsigned long, cmdline_len, const char __user *, cmdline_ptr,
+		unsigned long, flags)
+{
+	return -ENOSYS;
+}
+
 void crash_kexec(struct pt_regs *regs)
 {
 	/* Take the kexec_mutex here to prevent sys_kexec_load
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 1f79e3714533..391d4ddb6f4b 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -25,6 +25,7 @@ cond_syscall(sys_swapon);
 cond_syscall(sys_swapoff);
 cond_syscall(sys_kexec_load);
 cond_syscall(compat_sys_kexec_load);
+cond_syscall(sys_kexec_file_load);
 cond_syscall(sys_init_module);
 cond_syscall(sys_finit_module);
 cond_syscall(sys_delete_module);
-- 
cgit v1.2.3


From cb1052581e2bddd6096544f3f944f4e7fdad4c7f Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@redhat.com>
Date: Fri, 8 Aug 2014 14:25:57 -0700
Subject: kexec: implementation of new syscall kexec_file_load

Previous patch provided the interface definition and this patch prvides
implementation of new syscall.

Previously segment list was prepared in user space.  Now user space just
passes kernel fd, initrd fd and command line and kernel will create a
segment list internally.

This patch contains generic part of the code.  Actual segment preparation
and loading is done by arch and image specific loader.  Which comes in
next patch.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Matthew Garrett <mjg59@srcf.ucam.org>
Cc: Greg Kroah-Hartman <greg@kroah.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: WANG Chao <chaowang@redhat.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/machine_kexec_64.c |  45 ++++
 include/linux/kexec.h              |  53 ++++
 include/uapi/linux/kexec.h         |  11 +
 kernel/kexec.c                     | 483 ++++++++++++++++++++++++++++++++++++-
 4 files changed, 587 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 679cef0791cd..c8875b5545e1 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -22,6 +22,10 @@
 #include <asm/mmu_context.h>
 #include <asm/debugreg.h>
 
+static struct kexec_file_ops *kexec_file_loaders[] = {
+		NULL,
+};
+
 static void free_transition_pgtable(struct kimage *image)
 {
 	free_page((unsigned long)image->arch.pud);
@@ -283,3 +287,44 @@ void arch_crash_save_vmcoreinfo(void)
 			      (unsigned long)&_text - __START_KERNEL);
 }
 
+/* arch-dependent functionality related to kexec file-based syscall */
+
+int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
+				  unsigned long buf_len)
+{
+	int i, ret = -ENOEXEC;
+	struct kexec_file_ops *fops;
+
+	for (i = 0; i < ARRAY_SIZE(kexec_file_loaders); i++) {
+		fops = kexec_file_loaders[i];
+		if (!fops || !fops->probe)
+			continue;
+
+		ret = fops->probe(buf, buf_len);
+		if (!ret) {
+			image->fops = fops;
+			return ret;
+		}
+	}
+
+	return ret;
+}
+
+void *arch_kexec_kernel_image_load(struct kimage *image)
+{
+	if (!image->fops || !image->fops->load)
+		return ERR_PTR(-ENOEXEC);
+
+	return image->fops->load(image, image->kernel_buf,
+				 image->kernel_buf_len, image->initrd_buf,
+				 image->initrd_buf_len, image->cmdline_buf,
+				 image->cmdline_buf_len);
+}
+
+int arch_kimage_file_post_load_cleanup(struct kimage *image)
+{
+	if (!image->fops || !image->fops->cleanup)
+		return 0;
+
+	return image->fops->cleanup(image);
+}
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 66d56ac0f64c..8e80901e466f 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -121,13 +121,57 @@ struct kimage {
 #define KEXEC_TYPE_DEFAULT 0
 #define KEXEC_TYPE_CRASH   1
 	unsigned int preserve_context : 1;
+	/* If set, we are using file mode kexec syscall */
+	unsigned int file_mode:1;
 
 #ifdef ARCH_HAS_KIMAGE_ARCH
 	struct kimage_arch arch;
 #endif
+
+	/* Additional fields for file based kexec syscall */
+	void *kernel_buf;
+	unsigned long kernel_buf_len;
+
+	void *initrd_buf;
+	unsigned long initrd_buf_len;
+
+	char *cmdline_buf;
+	unsigned long cmdline_buf_len;
+
+	/* File operations provided by image loader */
+	struct kexec_file_ops *fops;
+
+	/* Image loader handling the kernel can store a pointer here */
+	void *image_loader_data;
 };
 
+/*
+ * Keeps track of buffer parameters as provided by caller for requesting
+ * memory placement of buffer.
+ */
+struct kexec_buf {
+	struct kimage *image;
+	char *buffer;
+	unsigned long bufsz;
+	unsigned long memsz;
+	unsigned long buf_align;
+	unsigned long buf_min;
+	unsigned long buf_max;
+	bool top_down;		/* allocate from top of memory hole */
+};
 
+typedef int (kexec_probe_t)(const char *kernel_buf, unsigned long kernel_size);
+typedef void *(kexec_load_t)(struct kimage *image, char *kernel_buf,
+			     unsigned long kernel_len, char *initrd,
+			     unsigned long initrd_len, char *cmdline,
+			     unsigned long cmdline_len);
+typedef int (kexec_cleanup_t)(struct kimage *image);
+
+struct kexec_file_ops {
+	kexec_probe_t *probe;
+	kexec_load_t *load;
+	kexec_cleanup_t *cleanup;
+};
 
 /* kexec interface functions */
 extern void machine_kexec(struct kimage *image);
@@ -138,6 +182,11 @@ extern asmlinkage long sys_kexec_load(unsigned long entry,
 					struct kexec_segment __user *segments,
 					unsigned long flags);
 extern int kernel_kexec(void);
+extern int kexec_add_buffer(struct kimage *image, char *buffer,
+			    unsigned long bufsz, unsigned long memsz,
+			    unsigned long buf_align, unsigned long buf_min,
+			    unsigned long buf_max, bool top_down,
+			    unsigned long *load_addr);
 extern struct page *kimage_alloc_control_pages(struct kimage *image,
 						unsigned int order);
 extern void crash_kexec(struct pt_regs *);
@@ -188,6 +237,10 @@ extern int kexec_load_disabled;
 #define KEXEC_FLAGS    (KEXEC_ON_CRASH | KEXEC_PRESERVE_CONTEXT)
 #endif
 
+/* List of defined/legal kexec file flags */
+#define KEXEC_FILE_FLAGS	(KEXEC_FILE_UNLOAD | KEXEC_FILE_ON_CRASH | \
+				 KEXEC_FILE_NO_INITRAMFS)
+
 #define VMCOREINFO_BYTES           (4096)
 #define VMCOREINFO_NOTE_NAME       "VMCOREINFO"
 #define VMCOREINFO_NOTE_NAME_BYTES ALIGN(sizeof(VMCOREINFO_NOTE_NAME), 4)
diff --git a/include/uapi/linux/kexec.h b/include/uapi/linux/kexec.h
index d6629d49a243..6925f5b42f89 100644
--- a/include/uapi/linux/kexec.h
+++ b/include/uapi/linux/kexec.h
@@ -13,6 +13,17 @@
 #define KEXEC_PRESERVE_CONTEXT	0x00000002
 #define KEXEC_ARCH_MASK		0xffff0000
 
+/*
+ * Kexec file load interface flags.
+ * KEXEC_FILE_UNLOAD : Unload already loaded kexec/kdump image.
+ * KEXEC_FILE_ON_CRASH : Load/unload operation belongs to kdump image.
+ * KEXEC_FILE_NO_INITRAMFS : No initramfs is being loaded. Ignore the initrd
+ *                           fd field.
+ */
+#define KEXEC_FILE_UNLOAD	0x00000001
+#define KEXEC_FILE_ON_CRASH	0x00000002
+#define KEXEC_FILE_NO_INITRAMFS	0x00000004
+
 /* These values match the ELF architecture values.
  * Unless there is a good reason that should continue to be the case.
  */
diff --git a/kernel/kexec.c b/kernel/kexec.c
index ec4386c1b94f..9b46219254dd 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -6,6 +6,8 @@
  * Version 2.  See the file COPYING for more details.
  */
 
+#define pr_fmt(fmt)	"kexec: " fmt
+
 #include <linux/capability.h>
 #include <linux/mm.h>
 #include <linux/file.h>
@@ -327,6 +329,221 @@ out_free_image:
 	return ret;
 }
 
+static int copy_file_from_fd(int fd, void **buf, unsigned long *buf_len)
+{
+	struct fd f = fdget(fd);
+	int ret;
+	struct kstat stat;
+	loff_t pos;
+	ssize_t bytes = 0;
+
+	if (!f.file)
+		return -EBADF;
+
+	ret = vfs_getattr(&f.file->f_path, &stat);
+	if (ret)
+		goto out;
+
+	if (stat.size > INT_MAX) {
+		ret = -EFBIG;
+		goto out;
+	}
+
+	/* Don't hand 0 to vmalloc, it whines. */
+	if (stat.size == 0) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	*buf = vmalloc(stat.size);
+	if (!*buf) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	pos = 0;
+	while (pos < stat.size) {
+		bytes = kernel_read(f.file, pos, (char *)(*buf) + pos,
+				    stat.size - pos);
+		if (bytes < 0) {
+			vfree(*buf);
+			ret = bytes;
+			goto out;
+		}
+
+		if (bytes == 0)
+			break;
+		pos += bytes;
+	}
+
+	if (pos != stat.size) {
+		ret = -EBADF;
+		vfree(*buf);
+		goto out;
+	}
+
+	*buf_len = pos;
+out:
+	fdput(f);
+	return ret;
+}
+
+/* Architectures can provide this probe function */
+int __weak arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
+					 unsigned long buf_len)
+{
+	return -ENOEXEC;
+}
+
+void * __weak arch_kexec_kernel_image_load(struct kimage *image)
+{
+	return ERR_PTR(-ENOEXEC);
+}
+
+void __weak arch_kimage_file_post_load_cleanup(struct kimage *image)
+{
+}
+
+/*
+ * Free up memory used by kernel, initrd, and comand line. This is temporary
+ * memory allocation which is not needed any more after these buffers have
+ * been loaded into separate segments and have been copied elsewhere.
+ */
+static void kimage_file_post_load_cleanup(struct kimage *image)
+{
+	vfree(image->kernel_buf);
+	image->kernel_buf = NULL;
+
+	vfree(image->initrd_buf);
+	image->initrd_buf = NULL;
+
+	kfree(image->cmdline_buf);
+	image->cmdline_buf = NULL;
+
+	/* See if architecture has anything to cleanup post load */
+	arch_kimage_file_post_load_cleanup(image);
+}
+
+/*
+ * In file mode list of segments is prepared by kernel. Copy relevant
+ * data from user space, do error checking, prepare segment list
+ */
+static int
+kimage_file_prepare_segments(struct kimage *image, int kernel_fd, int initrd_fd,
+			     const char __user *cmdline_ptr,
+			     unsigned long cmdline_len, unsigned flags)
+{
+	int ret = 0;
+	void *ldata;
+
+	ret = copy_file_from_fd(kernel_fd, &image->kernel_buf,
+				&image->kernel_buf_len);
+	if (ret)
+		return ret;
+
+	/* Call arch image probe handlers */
+	ret = arch_kexec_kernel_image_probe(image, image->kernel_buf,
+					    image->kernel_buf_len);
+
+	if (ret)
+		goto out;
+
+	/* It is possible that there no initramfs is being loaded */
+	if (!(flags & KEXEC_FILE_NO_INITRAMFS)) {
+		ret = copy_file_from_fd(initrd_fd, &image->initrd_buf,
+					&image->initrd_buf_len);
+		if (ret)
+			goto out;
+	}
+
+	if (cmdline_len) {
+		image->cmdline_buf = kzalloc(cmdline_len, GFP_KERNEL);
+		if (!image->cmdline_buf) {
+			ret = -ENOMEM;
+			goto out;
+		}
+
+		ret = copy_from_user(image->cmdline_buf, cmdline_ptr,
+				     cmdline_len);
+		if (ret) {
+			ret = -EFAULT;
+			goto out;
+		}
+
+		image->cmdline_buf_len = cmdline_len;
+
+		/* command line should be a string with last byte null */
+		if (image->cmdline_buf[cmdline_len - 1] != '\0') {
+			ret = -EINVAL;
+			goto out;
+		}
+	}
+
+	/* Call arch image load handlers */
+	ldata = arch_kexec_kernel_image_load(image);
+
+	if (IS_ERR(ldata)) {
+		ret = PTR_ERR(ldata);
+		goto out;
+	}
+
+	image->image_loader_data = ldata;
+out:
+	/* In case of error, free up all allocated memory in this function */
+	if (ret)
+		kimage_file_post_load_cleanup(image);
+	return ret;
+}
+
+static int
+kimage_file_alloc_init(struct kimage **rimage, int kernel_fd,
+		       int initrd_fd, const char __user *cmdline_ptr,
+		       unsigned long cmdline_len, unsigned long flags)
+{
+	int ret;
+	struct kimage *image;
+
+	image = do_kimage_alloc_init();
+	if (!image)
+		return -ENOMEM;
+
+	image->file_mode = 1;
+
+	ret = kimage_file_prepare_segments(image, kernel_fd, initrd_fd,
+					   cmdline_ptr, cmdline_len, flags);
+	if (ret)
+		goto out_free_image;
+
+	ret = sanity_check_segment_list(image);
+	if (ret)
+		goto out_free_post_load_bufs;
+
+	ret = -ENOMEM;
+	image->control_code_page = kimage_alloc_control_pages(image,
+					   get_order(KEXEC_CONTROL_PAGE_SIZE));
+	if (!image->control_code_page) {
+		pr_err("Could not allocate control_code_buffer\n");
+		goto out_free_post_load_bufs;
+	}
+
+	image->swap_page = kimage_alloc_control_pages(image, 0);
+	if (!image->swap_page) {
+		pr_err(KERN_ERR "Could not allocate swap buffer\n");
+		goto out_free_control_pages;
+	}
+
+	*rimage = image;
+	return 0;
+out_free_control_pages:
+	kimage_free_page_list(&image->control_pages);
+out_free_post_load_bufs:
+	kimage_file_post_load_cleanup(image);
+	kfree(image->image_loader_data);
+out_free_image:
+	kfree(image);
+	return ret;
+}
+
 static int kimage_is_destination_range(struct kimage *image,
 					unsigned long start,
 					unsigned long end)
@@ -644,6 +861,16 @@ static void kimage_free(struct kimage *image)
 
 	/* Free the kexec control pages... */
 	kimage_free_page_list(&image->control_pages);
+
+	kfree(image->image_loader_data);
+
+	/*
+	 * Free up any temporary buffers allocated. This might hit if
+	 * error occurred much later after buffer allocation.
+	 */
+	if (image->file_mode)
+		kimage_file_post_load_cleanup(image);
+
 	kfree(image);
 }
 
@@ -772,10 +999,14 @@ static int kimage_load_normal_segment(struct kimage *image,
 	unsigned long maddr;
 	size_t ubytes, mbytes;
 	int result;
-	unsigned char __user *buf;
+	unsigned char __user *buf = NULL;
+	unsigned char *kbuf = NULL;
 
 	result = 0;
-	buf = segment->buf;
+	if (image->file_mode)
+		kbuf = segment->kbuf;
+	else
+		buf = segment->buf;
 	ubytes = segment->bufsz;
 	mbytes = segment->memsz;
 	maddr = segment->mem;
@@ -807,7 +1038,11 @@ static int kimage_load_normal_segment(struct kimage *image,
 				PAGE_SIZE - (maddr & ~PAGE_MASK));
 		uchunk = min(ubytes, mchunk);
 
-		result = copy_from_user(ptr, buf, uchunk);
+		/* For file based kexec, source pages are in kernel memory */
+		if (image->file_mode)
+			memcpy(ptr, kbuf, uchunk);
+		else
+			result = copy_from_user(ptr, buf, uchunk);
 		kunmap(page);
 		if (result) {
 			result = -EFAULT;
@@ -815,7 +1050,10 @@ static int kimage_load_normal_segment(struct kimage *image,
 		}
 		ubytes -= uchunk;
 		maddr  += mchunk;
-		buf    += mchunk;
+		if (image->file_mode)
+			kbuf += mchunk;
+		else
+			buf += mchunk;
 		mbytes -= mchunk;
 	}
 out:
@@ -1062,7 +1300,72 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
 		unsigned long, cmdline_len, const char __user *, cmdline_ptr,
 		unsigned long, flags)
 {
-	return -ENOSYS;
+	int ret = 0, i;
+	struct kimage **dest_image, *image;
+
+	/* We only trust the superuser with rebooting the system. */
+	if (!capable(CAP_SYS_BOOT) || kexec_load_disabled)
+		return -EPERM;
+
+	/* Make sure we have a legal set of flags */
+	if (flags != (flags & KEXEC_FILE_FLAGS))
+		return -EINVAL;
+
+	image = NULL;
+
+	if (!mutex_trylock(&kexec_mutex))
+		return -EBUSY;
+
+	dest_image = &kexec_image;
+	if (flags & KEXEC_FILE_ON_CRASH)
+		dest_image = &kexec_crash_image;
+
+	if (flags & KEXEC_FILE_UNLOAD)
+		goto exchange;
+
+	/*
+	 * In case of crash, new kernel gets loaded in reserved region. It is
+	 * same memory where old crash kernel might be loaded. Free any
+	 * current crash dump kernel before we corrupt it.
+	 */
+	if (flags & KEXEC_FILE_ON_CRASH)
+		kimage_free(xchg(&kexec_crash_image, NULL));
+
+	ret = kimage_file_alloc_init(&image, kernel_fd, initrd_fd, cmdline_ptr,
+				     cmdline_len, flags);
+	if (ret)
+		goto out;
+
+	ret = machine_kexec_prepare(image);
+	if (ret)
+		goto out;
+
+	for (i = 0; i < image->nr_segments; i++) {
+		struct kexec_segment *ksegment;
+
+		ksegment = &image->segment[i];
+		pr_debug("Loading segment %d: buf=0x%p bufsz=0x%zx mem=0x%lx memsz=0x%zx\n",
+			 i, ksegment->buf, ksegment->bufsz, ksegment->mem,
+			 ksegment->memsz);
+
+		ret = kimage_load_segment(image, &image->segment[i]);
+		if (ret)
+			goto out;
+	}
+
+	kimage_terminate(image);
+
+	/*
+	 * Free up any temporary buffers allocated which are not needed
+	 * after image has been loaded
+	 */
+	kimage_file_post_load_cleanup(image);
+exchange:
+	image = xchg(dest_image, image);
+out:
+	mutex_unlock(&kexec_mutex);
+	kimage_free(image);
+	return ret;
 }
 
 void crash_kexec(struct pt_regs *regs)
@@ -1620,6 +1923,176 @@ static int __init crash_save_vmcoreinfo_init(void)
 
 subsys_initcall(crash_save_vmcoreinfo_init);
 
+static int __kexec_add_segment(struct kimage *image, char *buf,
+			       unsigned long bufsz, unsigned long mem,
+			       unsigned long memsz)
+{
+	struct kexec_segment *ksegment;
+
+	ksegment = &image->segment[image->nr_segments];
+	ksegment->kbuf = buf;
+	ksegment->bufsz = bufsz;
+	ksegment->mem = mem;
+	ksegment->memsz = memsz;
+	image->nr_segments++;
+
+	return 0;
+}
+
+static int locate_mem_hole_top_down(unsigned long start, unsigned long end,
+				    struct kexec_buf *kbuf)
+{
+	struct kimage *image = kbuf->image;
+	unsigned long temp_start, temp_end;
+
+	temp_end = min(end, kbuf->buf_max);
+	temp_start = temp_end - kbuf->memsz;
+
+	do {
+		/* align down start */
+		temp_start = temp_start & (~(kbuf->buf_align - 1));
+
+		if (temp_start < start || temp_start < kbuf->buf_min)
+			return 0;
+
+		temp_end = temp_start + kbuf->memsz - 1;
+
+		/*
+		 * Make sure this does not conflict with any of existing
+		 * segments
+		 */
+		if (kimage_is_destination_range(image, temp_start, temp_end)) {
+			temp_start = temp_start - PAGE_SIZE;
+			continue;
+		}
+
+		/* We found a suitable memory range */
+		break;
+	} while (1);
+
+	/* If we are here, we found a suitable memory range */
+	__kexec_add_segment(image, kbuf->buffer, kbuf->bufsz, temp_start,
+			    kbuf->memsz);
+
+	/* Success, stop navigating through remaining System RAM ranges */
+	return 1;
+}
+
+static int locate_mem_hole_bottom_up(unsigned long start, unsigned long end,
+				     struct kexec_buf *kbuf)
+{
+	struct kimage *image = kbuf->image;
+	unsigned long temp_start, temp_end;
+
+	temp_start = max(start, kbuf->buf_min);
+
+	do {
+		temp_start = ALIGN(temp_start, kbuf->buf_align);
+		temp_end = temp_start + kbuf->memsz - 1;
+
+		if (temp_end > end || temp_end > kbuf->buf_max)
+			return 0;
+		/*
+		 * Make sure this does not conflict with any of existing
+		 * segments
+		 */
+		if (kimage_is_destination_range(image, temp_start, temp_end)) {
+			temp_start = temp_start + PAGE_SIZE;
+			continue;
+		}
+
+		/* We found a suitable memory range */
+		break;
+	} while (1);
+
+	/* If we are here, we found a suitable memory range */
+	__kexec_add_segment(image, kbuf->buffer, kbuf->bufsz, temp_start,
+			    kbuf->memsz);
+
+	/* Success, stop navigating through remaining System RAM ranges */
+	return 1;
+}
+
+static int locate_mem_hole_callback(u64 start, u64 end, void *arg)
+{
+	struct kexec_buf *kbuf = (struct kexec_buf *)arg;
+	unsigned long sz = end - start + 1;
+
+	/* Returning 0 will take to next memory range */
+	if (sz < kbuf->memsz)
+		return 0;
+
+	if (end < kbuf->buf_min || start > kbuf->buf_max)
+		return 0;
+
+	/*
+	 * Allocate memory top down with-in ram range. Otherwise bottom up
+	 * allocation.
+	 */
+	if (kbuf->top_down)
+		return locate_mem_hole_top_down(start, end, kbuf);
+	return locate_mem_hole_bottom_up(start, end, kbuf);
+}
+
+/*
+ * Helper function for placing a buffer in a kexec segment. This assumes
+ * that kexec_mutex is held.
+ */
+int kexec_add_buffer(struct kimage *image, char *buffer, unsigned long bufsz,
+		     unsigned long memsz, unsigned long buf_align,
+		     unsigned long buf_min, unsigned long buf_max,
+		     bool top_down, unsigned long *load_addr)
+{
+
+	struct kexec_segment *ksegment;
+	struct kexec_buf buf, *kbuf;
+	int ret;
+
+	/* Currently adding segment this way is allowed only in file mode */
+	if (!image->file_mode)
+		return -EINVAL;
+
+	if (image->nr_segments >= KEXEC_SEGMENT_MAX)
+		return -EINVAL;
+
+	/*
+	 * Make sure we are not trying to add buffer after allocating
+	 * control pages. All segments need to be placed first before
+	 * any control pages are allocated. As control page allocation
+	 * logic goes through list of segments to make sure there are
+	 * no destination overlaps.
+	 */
+	if (!list_empty(&image->control_pages)) {
+		WARN_ON(1);
+		return -EINVAL;
+	}
+
+	memset(&buf, 0, sizeof(struct kexec_buf));
+	kbuf = &buf;
+	kbuf->image = image;
+	kbuf->buffer = buffer;
+	kbuf->bufsz = bufsz;
+
+	kbuf->memsz = ALIGN(memsz, PAGE_SIZE);
+	kbuf->buf_align = max(buf_align, PAGE_SIZE);
+	kbuf->buf_min = buf_min;
+	kbuf->buf_max = buf_max;
+	kbuf->top_down = top_down;
+
+	/* Walk the RAM ranges and allocate a suitable range for the buffer */
+	ret = walk_system_ram_res(0, -1, kbuf, locate_mem_hole_callback);
+	if (ret != 1) {
+		/* A suitable memory range could not be found for buffer */
+		return -EADDRNOTAVAIL;
+	}
+
+	/* Found a suitable memory range */
+	ksegment = &image->segment[image->nr_segments - 1];
+	*load_addr = ksegment->mem;
+	return 0;
+}
+
+
 /*
  * Move into place and start executing a preloaded standalone
  * executable.  If nothing was preloaded return an error.
-- 
cgit v1.2.3


From 12db5562e0352986a265841638482b84f3a6899b Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@redhat.com>
Date: Fri, 8 Aug 2014 14:26:04 -0700
Subject: kexec: load and relocate purgatory at kernel load time

Load purgatory code in RAM and relocate it based on the location.
Relocation code has been inspired by module relocation code and purgatory
relocation code in kexec-tools.

Also compute the checksums of loaded kexec segments and store them in
purgatory.

Arch independent code provides this functionality so that arch dependent
bootloaders can make use of it.

Helper functions are provided to get/set symbol values in purgatory which
are used by bootloaders later to set things like stack and entry point of
second kernel etc.

Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Matthew Garrett <mjg59@srcf.ucam.org>
Cc: Greg Kroah-Hartman <greg@kroah.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: WANG Chao <chaowang@redhat.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/Kconfig                   |   2 +
 arch/ia64/Kconfig                  |   2 +
 arch/m68k/Kconfig                  |   2 +
 arch/mips/Kconfig                  |   2 +
 arch/powerpc/Kconfig               |   2 +
 arch/s390/Kconfig                  |   2 +
 arch/sh/Kconfig                    |   2 +
 arch/tile/Kconfig                  |   2 +
 arch/x86/Kconfig                   |   2 +
 arch/x86/kernel/machine_kexec_64.c | 142 ++++++++++
 include/linux/kexec.h              |  33 +++
 kernel/kexec.c                     | 544 ++++++++++++++++++++++++++++++++++++-
 12 files changed, 736 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 8e9dbcbcf5af..cacc8d5355b3 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -2065,6 +2065,8 @@ config XIP_PHYS_ADDR
 config KEXEC
 	bool "Kexec system call (EXPERIMENTAL)"
 	depends on (!SMP || PM_SLEEP_SMP)
+	select CRYPTO
+	select CRYPTO_SHA256
 	help
 	  kexec is a system call that implements the ability to shutdown your
 	  current kernel, and to start another kernel.  It is like a reboot
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index c84c88bbbbd7..64aefb76bd69 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -549,6 +549,8 @@ source "drivers/sn/Kconfig"
 config KEXEC
 	bool "kexec system call"
 	depends on !IA64_HP_SIM && (!SMP || HOTPLUG_CPU)
+	select CRYPTO
+	select CRYPTO_SHA256
 	help
 	  kexec is a system call that implements the ability to shutdown your
 	  current kernel, and to start another kernel.  It is like a reboot
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 87b7c7581b1d..3ff8c9a25335 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -91,6 +91,8 @@ config MMU_SUN3
 config KEXEC
 	bool "kexec system call"
 	depends on M68KCLASSIC
+	select CRYPTO
+	select CRYPTO_SHA256
 	help
 	  kexec is a system call that implements the ability to shutdown your
 	  current kernel, and to start another kernel.  It is like a reboot
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 900c7e5333b6..df51e78a72cc 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -2396,6 +2396,8 @@ source "kernel/Kconfig.preempt"
 
 config KEXEC
 	bool "Kexec system call"
+	select CRYPTO
+	select CRYPTO_SHA256
 	help
 	  kexec is a system call that implements the ability to shutdown your
 	  current kernel, and to start another kernel.  It is like a reboot
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 4bc7b62fb4b6..a577609f8ed6 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -399,6 +399,8 @@ config PPC64_SUPPORTS_MEMORY_FAILURE
 config KEXEC
 	bool "kexec system call"
 	depends on (PPC_BOOK3S || FSL_BOOKE || (44x && !SMP))
+	select CRYPTO
+	select CRYPTO_SHA256
 	help
 	  kexec is a system call that implements the ability to shutdown your
 	  current kernel, and to start another kernel.  It is like a reboot
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 05c78bb5f570..ab39ceb89ecf 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -48,6 +48,8 @@ config ARCH_SUPPORTS_DEBUG_PAGEALLOC
 
 config KEXEC
 	def_bool y
+	select CRYPTO
+	select CRYPTO_SHA256
 
 config AUDIT_ARCH
 	def_bool y
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index aa2df3eaeb29..453fa5c09550 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -595,6 +595,8 @@ source kernel/Kconfig.hz
 config KEXEC
 	bool "kexec system call (EXPERIMENTAL)"
 	depends on SUPERH32 && MMU
+	select CRYPTO
+	select CRYPTO_SHA256
 	help
 	  kexec is a system call that implements the ability to shutdown your
 	  current kernel, and to start another kernel.  It is like a reboot
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index 7fcd492adbfc..a3ffe2dd4832 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -191,6 +191,8 @@ source "kernel/Kconfig.hz"
 
 config KEXEC
 	bool "kexec system call"
+	select CRYPTO
+	select CRYPTO_SHA256
 	---help---
 	  kexec is a system call that implements the ability to shutdown your
 	  current kernel, and to start another kernel.  It is like a reboot
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 98fe3df6df82..9558b9fcafbf 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1583,6 +1583,8 @@ source kernel/Kconfig.hz
 config KEXEC
 	bool "kexec system call"
 	select BUILD_BIN2C
+	select CRYPTO
+	select CRYPTO_SHA256
 	---help---
 	  kexec is a system call that implements the ability to shutdown your
 	  current kernel, and to start another kernel.  It is like a reboot
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index c8875b5545e1..88404c440727 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -6,6 +6,8 @@
  * Version 2.  See the file COPYING for more details.
  */
 
+#define pr_fmt(fmt)	"kexec: " fmt
+
 #include <linux/mm.h>
 #include <linux/kexec.h>
 #include <linux/string.h>
@@ -328,3 +330,143 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image)
 
 	return image->fops->cleanup(image);
 }
+
+/*
+ * Apply purgatory relocations.
+ *
+ * ehdr: Pointer to elf headers
+ * sechdrs: Pointer to section headers.
+ * relsec: section index of SHT_RELA section.
+ *
+ * TODO: Some of the code belongs to generic code. Move that in kexec.c.
+ */
+int arch_kexec_apply_relocations_add(const Elf64_Ehdr *ehdr,
+				     Elf64_Shdr *sechdrs, unsigned int relsec)
+{
+	unsigned int i;
+	Elf64_Rela *rel;
+	Elf64_Sym *sym;
+	void *location;
+	Elf64_Shdr *section, *symtabsec;
+	unsigned long address, sec_base, value;
+	const char *strtab, *name, *shstrtab;
+
+	/*
+	 * ->sh_offset has been modified to keep the pointer to section
+	 * contents in memory
+	 */
+	rel = (void *)sechdrs[relsec].sh_offset;
+
+	/* Section to which relocations apply */
+	section = &sechdrs[sechdrs[relsec].sh_info];
+
+	pr_debug("Applying relocate section %u to %u\n", relsec,
+		 sechdrs[relsec].sh_info);
+
+	/* Associated symbol table */
+	symtabsec = &sechdrs[sechdrs[relsec].sh_link];
+
+	/* String table */
+	if (symtabsec->sh_link >= ehdr->e_shnum) {
+		/* Invalid strtab section number */
+		pr_err("Invalid string table section index %d\n",
+		       symtabsec->sh_link);
+		return -ENOEXEC;
+	}
+
+	strtab = (char *)sechdrs[symtabsec->sh_link].sh_offset;
+
+	/* section header string table */
+	shstrtab = (char *)sechdrs[ehdr->e_shstrndx].sh_offset;
+
+	for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
+
+		/*
+		 * rel[i].r_offset contains byte offset from beginning
+		 * of section to the storage unit affected.
+		 *
+		 * This is location to update (->sh_offset). This is temporary
+		 * buffer where section is currently loaded. This will finally
+		 * be loaded to a different address later, pointed to by
+		 * ->sh_addr. kexec takes care of moving it
+		 *  (kexec_load_segment()).
+		 */
+		location = (void *)(section->sh_offset + rel[i].r_offset);
+
+		/* Final address of the location */
+		address = section->sh_addr + rel[i].r_offset;
+
+		/*
+		 * rel[i].r_info contains information about symbol table index
+		 * w.r.t which relocation must be made and type of relocation
+		 * to apply. ELF64_R_SYM() and ELF64_R_TYPE() macros get
+		 * these respectively.
+		 */
+		sym = (Elf64_Sym *)symtabsec->sh_offset +
+				ELF64_R_SYM(rel[i].r_info);
+
+		if (sym->st_name)
+			name = strtab + sym->st_name;
+		else
+			name = shstrtab + sechdrs[sym->st_shndx].sh_name;
+
+		pr_debug("Symbol: %s info: %02x shndx: %02x value=%llx size: %llx\n",
+			 name, sym->st_info, sym->st_shndx, sym->st_value,
+			 sym->st_size);
+
+		if (sym->st_shndx == SHN_UNDEF) {
+			pr_err("Undefined symbol: %s\n", name);
+			return -ENOEXEC;
+		}
+
+		if (sym->st_shndx == SHN_COMMON) {
+			pr_err("symbol '%s' in common section\n", name);
+			return -ENOEXEC;
+		}
+
+		if (sym->st_shndx == SHN_ABS)
+			sec_base = 0;
+		else if (sym->st_shndx >= ehdr->e_shnum) {
+			pr_err("Invalid section %d for symbol %s\n",
+			       sym->st_shndx, name);
+			return -ENOEXEC;
+		} else
+			sec_base = sechdrs[sym->st_shndx].sh_addr;
+
+		value = sym->st_value;
+		value += sec_base;
+		value += rel[i].r_addend;
+
+		switch (ELF64_R_TYPE(rel[i].r_info)) {
+		case R_X86_64_NONE:
+			break;
+		case R_X86_64_64:
+			*(u64 *)location = value;
+			break;
+		case R_X86_64_32:
+			*(u32 *)location = value;
+			if (value != *(u32 *)location)
+				goto overflow;
+			break;
+		case R_X86_64_32S:
+			*(s32 *)location = value;
+			if ((s64)value != *(s32 *)location)
+				goto overflow;
+			break;
+		case R_X86_64_PC32:
+			value -= (u64)address;
+			*(u32 *)location = value;
+			break;
+		default:
+			pr_err("Unknown rela relocation: %llu\n",
+			       ELF64_R_TYPE(rel[i].r_info));
+			return -ENOEXEC;
+		}
+	}
+	return 0;
+
+overflow:
+	pr_err("Overflow in relocation type %d value 0x%lx\n",
+	       (int)ELF64_R_TYPE(rel[i].r_info), value);
+	return -ENOEXEC;
+}
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 8e80901e466f..84f09e9eca26 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -10,6 +10,7 @@
 #include <linux/ioport.h>
 #include <linux/elfcore.h>
 #include <linux/elf.h>
+#include <linux/module.h>
 #include <asm/kexec.h>
 
 /* Verify architecture specific macros are defined */
@@ -95,6 +96,27 @@ struct compat_kexec_segment {
 };
 #endif
 
+struct kexec_sha_region {
+	unsigned long start;
+	unsigned long len;
+};
+
+struct purgatory_info {
+	/* Pointer to elf header of read only purgatory */
+	Elf_Ehdr *ehdr;
+
+	/* Pointer to purgatory sechdrs which are modifiable */
+	Elf_Shdr *sechdrs;
+	/*
+	 * Temporary buffer location where purgatory is loaded and relocated
+	 * This memory can be freed post image load
+	 */
+	void *purgatory_buf;
+
+	/* Address where purgatory is finally loaded and is executed from */
+	unsigned long purgatory_load_addr;
+};
+
 struct kimage {
 	kimage_entry_t head;
 	kimage_entry_t *entry;
@@ -143,6 +165,9 @@ struct kimage {
 
 	/* Image loader handling the kernel can store a pointer here */
 	void *image_loader_data;
+
+	/* Information for loading purgatory */
+	struct purgatory_info purgatory_info;
 };
 
 /*
@@ -189,6 +214,14 @@ extern int kexec_add_buffer(struct kimage *image, char *buffer,
 			    unsigned long *load_addr);
 extern struct page *kimage_alloc_control_pages(struct kimage *image,
 						unsigned int order);
+extern int kexec_load_purgatory(struct kimage *image, unsigned long min,
+				unsigned long max, int top_down,
+				unsigned long *load_addr);
+extern int kexec_purgatory_get_set_symbol(struct kimage *image,
+					  const char *name, void *buf,
+					  unsigned int size, bool get_value);
+extern void *kexec_purgatory_get_symbol_addr(struct kimage *image,
+					     const char *name);
 extern void crash_kexec(struct pt_regs *);
 int kexec_should_crash(struct task_struct *);
 void crash_save_cpu(struct pt_regs *regs, int cpu);
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 9b46219254dd..669e331aa9ec 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -42,6 +42,9 @@
 #include <asm/io.h>
 #include <asm/sections.h>
 
+#include <crypto/hash.h>
+#include <crypto/sha.h>
+
 /* Per cpu memory for storing cpu states in case of system crash. */
 note_buf_t __percpu *crash_notes;
 
@@ -54,6 +57,15 @@ size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data);
 /* Flag to indicate we are going to kexec a new kernel */
 bool kexec_in_progress = false;
 
+/*
+ * Declare these symbols weak so that if architecture provides a purgatory,
+ * these will be overridden.
+ */
+char __weak kexec_purgatory[0];
+size_t __weak kexec_purgatory_size = 0;
+
+static int kexec_calculate_store_digests(struct kimage *image);
+
 /* Location of the reserved area for the crash kernel */
 struct resource crashk_res = {
 	.name  = "Crash kernel",
@@ -404,6 +416,24 @@ void __weak arch_kimage_file_post_load_cleanup(struct kimage *image)
 {
 }
 
+/* Apply relocations of type RELA */
+int __weak
+arch_kexec_apply_relocations_add(const Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
+				 unsigned int relsec)
+{
+	pr_err("RELA relocation unsupported.\n");
+	return -ENOEXEC;
+}
+
+/* Apply relocations of type REL */
+int __weak
+arch_kexec_apply_relocations(const Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
+			     unsigned int relsec)
+{
+	pr_err("REL relocation unsupported.\n");
+	return -ENOEXEC;
+}
+
 /*
  * Free up memory used by kernel, initrd, and comand line. This is temporary
  * memory allocation which is not needed any more after these buffers have
@@ -411,6 +441,8 @@ void __weak arch_kimage_file_post_load_cleanup(struct kimage *image)
  */
 static void kimage_file_post_load_cleanup(struct kimage *image)
 {
+	struct purgatory_info *pi = &image->purgatory_info;
+
 	vfree(image->kernel_buf);
 	image->kernel_buf = NULL;
 
@@ -420,6 +452,12 @@ static void kimage_file_post_load_cleanup(struct kimage *image)
 	kfree(image->cmdline_buf);
 	image->cmdline_buf = NULL;
 
+	vfree(pi->purgatory_buf);
+	pi->purgatory_buf = NULL;
+
+	vfree(pi->sechdrs);
+	pi->sechdrs = NULL;
+
 	/* See if architecture has anything to cleanup post load */
 	arch_kimage_file_post_load_cleanup(image);
 }
@@ -1105,7 +1143,7 @@ static int kimage_load_crash_segment(struct kimage *image,
 		}
 		ubytes -= uchunk;
 		maddr  += mchunk;
-		buf    += mchunk;
+		buf += mchunk;
 		mbytes -= mchunk;
 	}
 out:
@@ -1340,6 +1378,10 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
 	if (ret)
 		goto out;
 
+	ret = kexec_calculate_store_digests(image);
+	if (ret)
+		goto out;
+
 	for (i = 0; i < image->nr_segments; i++) {
 		struct kexec_segment *ksegment;
 
@@ -2092,6 +2134,506 @@ int kexec_add_buffer(struct kimage *image, char *buffer, unsigned long bufsz,
 	return 0;
 }
 
+/* Calculate and store the digest of segments */
+static int kexec_calculate_store_digests(struct kimage *image)
+{
+	struct crypto_shash *tfm;
+	struct shash_desc *desc;
+	int ret = 0, i, j, zero_buf_sz, sha_region_sz;
+	size_t desc_size, nullsz;
+	char *digest;
+	void *zero_buf;
+	struct kexec_sha_region *sha_regions;
+	struct purgatory_info *pi = &image->purgatory_info;
+
+	zero_buf = __va(page_to_pfn(ZERO_PAGE(0)) << PAGE_SHIFT);
+	zero_buf_sz = PAGE_SIZE;
+
+	tfm = crypto_alloc_shash("sha256", 0, 0);
+	if (IS_ERR(tfm)) {
+		ret = PTR_ERR(tfm);
+		goto out;
+	}
+
+	desc_size = crypto_shash_descsize(tfm) + sizeof(*desc);
+	desc = kzalloc(desc_size, GFP_KERNEL);
+	if (!desc) {
+		ret = -ENOMEM;
+		goto out_free_tfm;
+	}
+
+	sha_region_sz = KEXEC_SEGMENT_MAX * sizeof(struct kexec_sha_region);
+	sha_regions = vzalloc(sha_region_sz);
+	if (!sha_regions)
+		goto out_free_desc;
+
+	desc->tfm   = tfm;
+	desc->flags = 0;
+
+	ret = crypto_shash_init(desc);
+	if (ret < 0)
+		goto out_free_sha_regions;
+
+	digest = kzalloc(SHA256_DIGEST_SIZE, GFP_KERNEL);
+	if (!digest) {
+		ret = -ENOMEM;
+		goto out_free_sha_regions;
+	}
+
+	for (j = i = 0; i < image->nr_segments; i++) {
+		struct kexec_segment *ksegment;
+
+		ksegment = &image->segment[i];
+		/*
+		 * Skip purgatory as it will be modified once we put digest
+		 * info in purgatory.
+		 */
+		if (ksegment->kbuf == pi->purgatory_buf)
+			continue;
+
+		ret = crypto_shash_update(desc, ksegment->kbuf,
+					  ksegment->bufsz);
+		if (ret)
+			break;
+
+		/*
+		 * Assume rest of the buffer is filled with zero and
+		 * update digest accordingly.
+		 */
+		nullsz = ksegment->memsz - ksegment->bufsz;
+		while (nullsz) {
+			unsigned long bytes = nullsz;
+
+			if (bytes > zero_buf_sz)
+				bytes = zero_buf_sz;
+			ret = crypto_shash_update(desc, zero_buf, bytes);
+			if (ret)
+				break;
+			nullsz -= bytes;
+		}
+
+		if (ret)
+			break;
+
+		sha_regions[j].start = ksegment->mem;
+		sha_regions[j].len = ksegment->memsz;
+		j++;
+	}
+
+	if (!ret) {
+		ret = crypto_shash_final(desc, digest);
+		if (ret)
+			goto out_free_digest;
+		ret = kexec_purgatory_get_set_symbol(image, "sha_regions",
+						sha_regions, sha_region_sz, 0);
+		if (ret)
+			goto out_free_digest;
+
+		ret = kexec_purgatory_get_set_symbol(image, "sha256_digest",
+						digest, SHA256_DIGEST_SIZE, 0);
+		if (ret)
+			goto out_free_digest;
+	}
+
+out_free_digest:
+	kfree(digest);
+out_free_sha_regions:
+	vfree(sha_regions);
+out_free_desc:
+	kfree(desc);
+out_free_tfm:
+	kfree(tfm);
+out:
+	return ret;
+}
+
+/* Actually load purgatory. Lot of code taken from kexec-tools */
+static int __kexec_load_purgatory(struct kimage *image, unsigned long min,
+				  unsigned long max, int top_down)
+{
+	struct purgatory_info *pi = &image->purgatory_info;
+	unsigned long align, buf_align, bss_align, buf_sz, bss_sz, bss_pad;
+	unsigned long memsz, entry, load_addr, curr_load_addr, bss_addr, offset;
+	unsigned char *buf_addr, *src;
+	int i, ret = 0, entry_sidx = -1;
+	const Elf_Shdr *sechdrs_c;
+	Elf_Shdr *sechdrs = NULL;
+	void *purgatory_buf = NULL;
+
+	/*
+	 * sechdrs_c points to section headers in purgatory and are read
+	 * only. No modifications allowed.
+	 */
+	sechdrs_c = (void *)pi->ehdr + pi->ehdr->e_shoff;
+
+	/*
+	 * We can not modify sechdrs_c[] and its fields. It is read only.
+	 * Copy it over to a local copy where one can store some temporary
+	 * data and free it at the end. We need to modify ->sh_addr and
+	 * ->sh_offset fields to keep track of permanent and temporary
+	 * locations of sections.
+	 */
+	sechdrs = vzalloc(pi->ehdr->e_shnum * sizeof(Elf_Shdr));
+	if (!sechdrs)
+		return -ENOMEM;
+
+	memcpy(sechdrs, sechdrs_c, pi->ehdr->e_shnum * sizeof(Elf_Shdr));
+
+	/*
+	 * We seem to have multiple copies of sections. First copy is which
+	 * is embedded in kernel in read only section. Some of these sections
+	 * will be copied to a temporary buffer and relocated. And these
+	 * sections will finally be copied to their final destination at
+	 * segment load time.
+	 *
+	 * Use ->sh_offset to reflect section address in memory. It will
+	 * point to original read only copy if section is not allocatable.
+	 * Otherwise it will point to temporary copy which will be relocated.
+	 *
+	 * Use ->sh_addr to contain final address of the section where it
+	 * will go during execution time.
+	 */
+	for (i = 0; i < pi->ehdr->e_shnum; i++) {
+		if (sechdrs[i].sh_type == SHT_NOBITS)
+			continue;
+
+		sechdrs[i].sh_offset = (unsigned long)pi->ehdr +
+						sechdrs[i].sh_offset;
+	}
+
+	/*
+	 * Identify entry point section and make entry relative to section
+	 * start.
+	 */
+	entry = pi->ehdr->e_entry;
+	for (i = 0; i < pi->ehdr->e_shnum; i++) {
+		if (!(sechdrs[i].sh_flags & SHF_ALLOC))
+			continue;
+
+		if (!(sechdrs[i].sh_flags & SHF_EXECINSTR))
+			continue;
+
+		/* Make entry section relative */
+		if (sechdrs[i].sh_addr <= pi->ehdr->e_entry &&
+		    ((sechdrs[i].sh_addr + sechdrs[i].sh_size) >
+		     pi->ehdr->e_entry)) {
+			entry_sidx = i;
+			entry -= sechdrs[i].sh_addr;
+			break;
+		}
+	}
+
+	/* Determine how much memory is needed to load relocatable object. */
+	buf_align = 1;
+	bss_align = 1;
+	buf_sz = 0;
+	bss_sz = 0;
+
+	for (i = 0; i < pi->ehdr->e_shnum; i++) {
+		if (!(sechdrs[i].sh_flags & SHF_ALLOC))
+			continue;
+
+		align = sechdrs[i].sh_addralign;
+		if (sechdrs[i].sh_type != SHT_NOBITS) {
+			if (buf_align < align)
+				buf_align = align;
+			buf_sz = ALIGN(buf_sz, align);
+			buf_sz += sechdrs[i].sh_size;
+		} else {
+			/* bss section */
+			if (bss_align < align)
+				bss_align = align;
+			bss_sz = ALIGN(bss_sz, align);
+			bss_sz += sechdrs[i].sh_size;
+		}
+	}
+
+	/* Determine the bss padding required to align bss properly */
+	bss_pad = 0;
+	if (buf_sz & (bss_align - 1))
+		bss_pad = bss_align - (buf_sz & (bss_align - 1));
+
+	memsz = buf_sz + bss_pad + bss_sz;
+
+	/* Allocate buffer for purgatory */
+	purgatory_buf = vzalloc(buf_sz);
+	if (!purgatory_buf) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	if (buf_align < bss_align)
+		buf_align = bss_align;
+
+	/* Add buffer to segment list */
+	ret = kexec_add_buffer(image, purgatory_buf, buf_sz, memsz,
+				buf_align, min, max, top_down,
+				&pi->purgatory_load_addr);
+	if (ret)
+		goto out;
+
+	/* Load SHF_ALLOC sections */
+	buf_addr = purgatory_buf;
+	load_addr = curr_load_addr = pi->purgatory_load_addr;
+	bss_addr = load_addr + buf_sz + bss_pad;
+
+	for (i = 0; i < pi->ehdr->e_shnum; i++) {
+		if (!(sechdrs[i].sh_flags & SHF_ALLOC))
+			continue;
+
+		align = sechdrs[i].sh_addralign;
+		if (sechdrs[i].sh_type != SHT_NOBITS) {
+			curr_load_addr = ALIGN(curr_load_addr, align);
+			offset = curr_load_addr - load_addr;
+			/* We already modifed ->sh_offset to keep src addr */
+			src = (char *) sechdrs[i].sh_offset;
+			memcpy(buf_addr + offset, src, sechdrs[i].sh_size);
+
+			/* Store load address and source address of section */
+			sechdrs[i].sh_addr = curr_load_addr;
+
+			/*
+			 * This section got copied to temporary buffer. Update
+			 * ->sh_offset accordingly.
+			 */
+			sechdrs[i].sh_offset = (unsigned long)(buf_addr + offset);
+
+			/* Advance to the next address */
+			curr_load_addr += sechdrs[i].sh_size;
+		} else {
+			bss_addr = ALIGN(bss_addr, align);
+			sechdrs[i].sh_addr = bss_addr;
+			bss_addr += sechdrs[i].sh_size;
+		}
+	}
+
+	/* Update entry point based on load address of text section */
+	if (entry_sidx >= 0)
+		entry += sechdrs[entry_sidx].sh_addr;
+
+	/* Make kernel jump to purgatory after shutdown */
+	image->start = entry;
+
+	/* Used later to get/set symbol values */
+	pi->sechdrs = sechdrs;
+
+	/*
+	 * Used later to identify which section is purgatory and skip it
+	 * from checksumming.
+	 */
+	pi->purgatory_buf = purgatory_buf;
+	return ret;
+out:
+	vfree(sechdrs);
+	vfree(purgatory_buf);
+	return ret;
+}
+
+static int kexec_apply_relocations(struct kimage *image)
+{
+	int i, ret;
+	struct purgatory_info *pi = &image->purgatory_info;
+	Elf_Shdr *sechdrs = pi->sechdrs;
+
+	/* Apply relocations */
+	for (i = 0; i < pi->ehdr->e_shnum; i++) {
+		Elf_Shdr *section, *symtab;
+
+		if (sechdrs[i].sh_type != SHT_RELA &&
+		    sechdrs[i].sh_type != SHT_REL)
+			continue;
+
+		/*
+		 * For section of type SHT_RELA/SHT_REL,
+		 * ->sh_link contains section header index of associated
+		 * symbol table. And ->sh_info contains section header
+		 * index of section to which relocations apply.
+		 */
+		if (sechdrs[i].sh_info >= pi->ehdr->e_shnum ||
+		    sechdrs[i].sh_link >= pi->ehdr->e_shnum)
+			return -ENOEXEC;
+
+		section = &sechdrs[sechdrs[i].sh_info];
+		symtab = &sechdrs[sechdrs[i].sh_link];
+
+		if (!(section->sh_flags & SHF_ALLOC))
+			continue;
+
+		/*
+		 * symtab->sh_link contain section header index of associated
+		 * string table.
+		 */
+		if (symtab->sh_link >= pi->ehdr->e_shnum)
+			/* Invalid section number? */
+			continue;
+
+		/*
+		 * Respective archicture needs to provide support for applying
+		 * relocations of type SHT_RELA/SHT_REL.
+		 */
+		if (sechdrs[i].sh_type == SHT_RELA)
+			ret = arch_kexec_apply_relocations_add(pi->ehdr,
+							       sechdrs, i);
+		else if (sechdrs[i].sh_type == SHT_REL)
+			ret = arch_kexec_apply_relocations(pi->ehdr,
+							   sechdrs, i);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+/* Load relocatable purgatory object and relocate it appropriately */
+int kexec_load_purgatory(struct kimage *image, unsigned long min,
+			 unsigned long max, int top_down,
+			 unsigned long *load_addr)
+{
+	struct purgatory_info *pi = &image->purgatory_info;
+	int ret;
+
+	if (kexec_purgatory_size <= 0)
+		return -EINVAL;
+
+	if (kexec_purgatory_size < sizeof(Elf_Ehdr))
+		return -ENOEXEC;
+
+	pi->ehdr = (Elf_Ehdr *)kexec_purgatory;
+
+	if (memcmp(pi->ehdr->e_ident, ELFMAG, SELFMAG) != 0
+	    || pi->ehdr->e_type != ET_REL
+	    || !elf_check_arch(pi->ehdr)
+	    || pi->ehdr->e_shentsize != sizeof(Elf_Shdr))
+		return -ENOEXEC;
+
+	if (pi->ehdr->e_shoff >= kexec_purgatory_size
+	    || (pi->ehdr->e_shnum * sizeof(Elf_Shdr) >
+	    kexec_purgatory_size - pi->ehdr->e_shoff))
+		return -ENOEXEC;
+
+	ret = __kexec_load_purgatory(image, min, max, top_down);
+	if (ret)
+		return ret;
+
+	ret = kexec_apply_relocations(image);
+	if (ret)
+		goto out;
+
+	*load_addr = pi->purgatory_load_addr;
+	return 0;
+out:
+	vfree(pi->sechdrs);
+	vfree(pi->purgatory_buf);
+	return ret;
+}
+
+static Elf_Sym *kexec_purgatory_find_symbol(struct purgatory_info *pi,
+					    const char *name)
+{
+	Elf_Sym *syms;
+	Elf_Shdr *sechdrs;
+	Elf_Ehdr *ehdr;
+	int i, k;
+	const char *strtab;
+
+	if (!pi->sechdrs || !pi->ehdr)
+		return NULL;
+
+	sechdrs = pi->sechdrs;
+	ehdr = pi->ehdr;
+
+	for (i = 0; i < ehdr->e_shnum; i++) {
+		if (sechdrs[i].sh_type != SHT_SYMTAB)
+			continue;
+
+		if (sechdrs[i].sh_link >= ehdr->e_shnum)
+			/* Invalid strtab section number */
+			continue;
+		strtab = (char *)sechdrs[sechdrs[i].sh_link].sh_offset;
+		syms = (Elf_Sym *)sechdrs[i].sh_offset;
+
+		/* Go through symbols for a match */
+		for (k = 0; k < sechdrs[i].sh_size/sizeof(Elf_Sym); k++) {
+			if (ELF_ST_BIND(syms[k].st_info) != STB_GLOBAL)
+				continue;
+
+			if (strcmp(strtab + syms[k].st_name, name) != 0)
+				continue;
+
+			if (syms[k].st_shndx == SHN_UNDEF ||
+			    syms[k].st_shndx >= ehdr->e_shnum) {
+				pr_debug("Symbol: %s has bad section index %d.\n",
+						name, syms[k].st_shndx);
+				return NULL;
+			}
+
+			/* Found the symbol we are looking for */
+			return &syms[k];
+		}
+	}
+
+	return NULL;
+}
+
+void *kexec_purgatory_get_symbol_addr(struct kimage *image, const char *name)
+{
+	struct purgatory_info *pi = &image->purgatory_info;
+	Elf_Sym *sym;
+	Elf_Shdr *sechdr;
+
+	sym = kexec_purgatory_find_symbol(pi, name);
+	if (!sym)
+		return ERR_PTR(-EINVAL);
+
+	sechdr = &pi->sechdrs[sym->st_shndx];
+
+	/*
+	 * Returns the address where symbol will finally be loaded after
+	 * kexec_load_segment()
+	 */
+	return (void *)(sechdr->sh_addr + sym->st_value);
+}
+
+/*
+ * Get or set value of a symbol. If "get_value" is true, symbol value is
+ * returned in buf otherwise symbol value is set based on value in buf.
+ */
+int kexec_purgatory_get_set_symbol(struct kimage *image, const char *name,
+				   void *buf, unsigned int size, bool get_value)
+{
+	Elf_Sym *sym;
+	Elf_Shdr *sechdrs;
+	struct purgatory_info *pi = &image->purgatory_info;
+	char *sym_buf;
+
+	sym = kexec_purgatory_find_symbol(pi, name);
+	if (!sym)
+		return -EINVAL;
+
+	if (sym->st_size != size) {
+		pr_err("symbol %s size mismatch: expected %lu actual %u\n",
+		       name, (unsigned long)sym->st_size, size);
+		return -EINVAL;
+	}
+
+	sechdrs = pi->sechdrs;
+
+	if (sechdrs[sym->st_shndx].sh_type == SHT_NOBITS) {
+		pr_err("symbol %s is in a bss section. Cannot %s\n", name,
+		       get_value ? "get" : "set");
+		return -EINVAL;
+	}
+
+	sym_buf = (unsigned char *)sechdrs[sym->st_shndx].sh_offset +
+					sym->st_value;
+
+	if (get_value)
+		memcpy((void *)buf, sym_buf, size);
+	else
+		memcpy((void *)sym_buf, buf, size);
+
+	return 0;
+}
 
 /*
  * Move into place and start executing a preloaded standalone
-- 
cgit v1.2.3


From 27f48d3e633be23656a097baa3be336e04a82d84 Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@redhat.com>
Date: Fri, 8 Aug 2014 14:26:06 -0700
Subject: kexec-bzImage64: support for loading bzImage using 64bit entry

This is loader specific code which can load bzImage and set it up for
64bit entry.  This does not take care of 32bit entry or real mode entry.

32bit mode entry can be implemented if somebody needs it.

Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Matthew Garrett <mjg59@srcf.ucam.org>
Cc: Greg Kroah-Hartman <greg@kroah.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: WANG Chao <chaowang@redhat.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/include/asm/kexec-bzimage64.h |   6 +
 arch/x86/include/asm/kexec.h           |  21 ++
 arch/x86/kernel/Makefile               |   1 +
 arch/x86/kernel/kexec-bzimage64.c      | 375 +++++++++++++++++++++++++++++++++
 arch/x86/kernel/machine_kexec_64.c     |   5 +-
 include/linux/kexec.h                  |   2 +-
 kernel/kexec.c                         |  11 +-
 7 files changed, 415 insertions(+), 6 deletions(-)
 create mode 100644 arch/x86/include/asm/kexec-bzimage64.h
 create mode 100644 arch/x86/kernel/kexec-bzimage64.c

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/kexec-bzimage64.h b/arch/x86/include/asm/kexec-bzimage64.h
new file mode 100644
index 000000000000..d1b5d194e31d
--- /dev/null
+++ b/arch/x86/include/asm/kexec-bzimage64.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_KEXEC_BZIMAGE64_H
+#define _ASM_KEXEC_BZIMAGE64_H
+
+extern struct kexec_file_ops kexec_bzImage64_ops;
+
+#endif  /* _ASM_KEXE_BZIMAGE64_H */
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index 17483a492f18..0dfccced4edf 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -23,6 +23,7 @@
 
 #include <asm/page.h>
 #include <asm/ptrace.h>
+#include <asm/bootparam.h>
 
 /*
  * KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return.
@@ -161,6 +162,26 @@ struct kimage_arch {
 	pmd_t *pmd;
 	pte_t *pte;
 };
+
+struct kexec_entry64_regs {
+	uint64_t rax;
+	uint64_t rbx;
+	uint64_t rcx;
+	uint64_t rdx;
+	uint64_t rsi;
+	uint64_t rdi;
+	uint64_t rsp;
+	uint64_t rbp;
+	uint64_t r8;
+	uint64_t r9;
+	uint64_t r10;
+	uint64_t r11;
+	uint64_t r12;
+	uint64_t r13;
+	uint64_t r14;
+	uint64_t r15;
+	uint64_t rip;
+};
 #endif
 
 typedef void crash_vmclear_fn(void);
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index bde3993624f1..b5ea75c4a4b4 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -118,4 +118,5 @@ ifeq ($(CONFIG_X86_64),y)
 
 	obj-$(CONFIG_PCI_MMCONFIG)	+= mmconf-fam10h_64.o
 	obj-y				+= vsmp_64.o
+	obj-$(CONFIG_KEXEC)		+= kexec-bzimage64.o
 endif
diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c
new file mode 100644
index 000000000000..bcedd100192f
--- /dev/null
+++ b/arch/x86/kernel/kexec-bzimage64.c
@@ -0,0 +1,375 @@
+/*
+ * Kexec bzImage loader
+ *
+ * Copyright (C) 2014 Red Hat Inc.
+ * Authors:
+ *      Vivek Goyal <vgoyal@redhat.com>
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2.  See the file COPYING for more details.
+ */
+
+#define pr_fmt(fmt)	"kexec-bzImage64: " fmt
+
+#include <linux/string.h>
+#include <linux/printk.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/kexec.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+
+#include <asm/bootparam.h>
+#include <asm/setup.h>
+
+/*
+ * Defines lowest physical address for various segments. Not sure where
+ * exactly these limits came from. Current bzimage64 loader in kexec-tools
+ * uses these so I am retaining it. It can be changed over time as we gain
+ * more insight.
+ */
+#define MIN_PURGATORY_ADDR	0x3000
+#define MIN_BOOTPARAM_ADDR	0x3000
+#define MIN_KERNEL_LOAD_ADDR	0x100000
+#define MIN_INITRD_LOAD_ADDR	0x1000000
+
+/*
+ * This is a place holder for all boot loader specific data structure which
+ * gets allocated in one call but gets freed much later during cleanup
+ * time. Right now there is only one field but it can grow as need be.
+ */
+struct bzimage64_data {
+	/*
+	 * Temporary buffer to hold bootparams buffer. This should be
+	 * freed once the bootparam segment has been loaded.
+	 */
+	void *bootparams_buf;
+};
+
+static int setup_initrd(struct boot_params *params,
+		unsigned long initrd_load_addr, unsigned long initrd_len)
+{
+	params->hdr.ramdisk_image = initrd_load_addr & 0xffffffffUL;
+	params->hdr.ramdisk_size = initrd_len & 0xffffffffUL;
+
+	params->ext_ramdisk_image = initrd_load_addr >> 32;
+	params->ext_ramdisk_size = initrd_len >> 32;
+
+	return 0;
+}
+
+static int setup_cmdline(struct boot_params *params,
+			 unsigned long bootparams_load_addr,
+			 unsigned long cmdline_offset, char *cmdline,
+			 unsigned long cmdline_len)
+{
+	char *cmdline_ptr = ((char *)params) + cmdline_offset;
+	unsigned long cmdline_ptr_phys;
+	uint32_t cmdline_low_32, cmdline_ext_32;
+
+	memcpy(cmdline_ptr, cmdline, cmdline_len);
+	cmdline_ptr[cmdline_len - 1] = '\0';
+
+	cmdline_ptr_phys = bootparams_load_addr + cmdline_offset;
+	cmdline_low_32 = cmdline_ptr_phys & 0xffffffffUL;
+	cmdline_ext_32 = cmdline_ptr_phys >> 32;
+
+	params->hdr.cmd_line_ptr = cmdline_low_32;
+	if (cmdline_ext_32)
+		params->ext_cmd_line_ptr = cmdline_ext_32;
+
+	return 0;
+}
+
+static int setup_memory_map_entries(struct boot_params *params)
+{
+	unsigned int nr_e820_entries;
+
+	nr_e820_entries = e820_saved.nr_map;
+
+	/* TODO: Pass entries more than E820MAX in bootparams setup data */
+	if (nr_e820_entries > E820MAX)
+		nr_e820_entries = E820MAX;
+
+	params->e820_entries = nr_e820_entries;
+	memcpy(&params->e820_map, &e820_saved.map,
+	       nr_e820_entries * sizeof(struct e820entry));
+
+	return 0;
+}
+
+static int setup_boot_parameters(struct boot_params *params)
+{
+	unsigned int nr_e820_entries;
+	unsigned long long mem_k, start, end;
+	int i;
+
+	/* Get subarch from existing bootparams */
+	params->hdr.hardware_subarch = boot_params.hdr.hardware_subarch;
+
+	/* Copying screen_info will do? */
+	memcpy(&params->screen_info, &boot_params.screen_info,
+				sizeof(struct screen_info));
+
+	/* Fill in memsize later */
+	params->screen_info.ext_mem_k = 0;
+	params->alt_mem_k = 0;
+
+	/* Default APM info */
+	memset(&params->apm_bios_info, 0, sizeof(params->apm_bios_info));
+
+	/* Default drive info */
+	memset(&params->hd0_info, 0, sizeof(params->hd0_info));
+	memset(&params->hd1_info, 0, sizeof(params->hd1_info));
+
+	/* Default sysdesc table */
+	params->sys_desc_table.length = 0;
+
+	setup_memory_map_entries(params);
+	nr_e820_entries = params->e820_entries;
+
+	for (i = 0; i < nr_e820_entries; i++) {
+		if (params->e820_map[i].type != E820_RAM)
+			continue;
+		start = params->e820_map[i].addr;
+		end = params->e820_map[i].addr + params->e820_map[i].size - 1;
+
+		if ((start <= 0x100000) && end > 0x100000) {
+			mem_k = (end >> 10) - (0x100000 >> 10);
+			params->screen_info.ext_mem_k = mem_k;
+			params->alt_mem_k = mem_k;
+			if (mem_k > 0xfc00)
+				params->screen_info.ext_mem_k = 0xfc00; /* 64M*/
+			if (mem_k > 0xffffffff)
+				params->alt_mem_k = 0xffffffff;
+		}
+	}
+
+	/* Setup EDD info */
+	memcpy(params->eddbuf, boot_params.eddbuf,
+				EDDMAXNR * sizeof(struct edd_info));
+	params->eddbuf_entries = boot_params.eddbuf_entries;
+
+	memcpy(params->edd_mbr_sig_buffer, boot_params.edd_mbr_sig_buffer,
+	       EDD_MBR_SIG_MAX * sizeof(unsigned int));
+
+	return 0;
+}
+
+int bzImage64_probe(const char *buf, unsigned long len)
+{
+	int ret = -ENOEXEC;
+	struct setup_header *header;
+
+	/* kernel should be atleast two sectors long */
+	if (len < 2 * 512) {
+		pr_err("File is too short to be a bzImage\n");
+		return ret;
+	}
+
+	header = (struct setup_header *)(buf + offsetof(struct boot_params, hdr));
+	if (memcmp((char *)&header->header, "HdrS", 4) != 0) {
+		pr_err("Not a bzImage\n");
+		return ret;
+	}
+
+	if (header->boot_flag != 0xAA55) {
+		pr_err("No x86 boot sector present\n");
+		return ret;
+	}
+
+	if (header->version < 0x020C) {
+		pr_err("Must be at least protocol version 2.12\n");
+		return ret;
+	}
+
+	if (!(header->loadflags & LOADED_HIGH)) {
+		pr_err("zImage not a bzImage\n");
+		return ret;
+	}
+
+	if (!(header->xloadflags & XLF_KERNEL_64)) {
+		pr_err("Not a bzImage64. XLF_KERNEL_64 is not set.\n");
+		return ret;
+	}
+
+	if (!(header->xloadflags & XLF_CAN_BE_LOADED_ABOVE_4G)) {
+		pr_err("XLF_CAN_BE_LOADED_ABOVE_4G is not set.\n");
+		return ret;
+	}
+
+	/* I've got a bzImage */
+	pr_debug("It's a relocatable bzImage64\n");
+	ret = 0;
+
+	return ret;
+}
+
+void *bzImage64_load(struct kimage *image, char *kernel,
+		     unsigned long kernel_len, char *initrd,
+		     unsigned long initrd_len, char *cmdline,
+		     unsigned long cmdline_len)
+{
+
+	struct setup_header *header;
+	int setup_sects, kern16_size, ret = 0;
+	unsigned long setup_header_size, params_cmdline_sz;
+	struct boot_params *params;
+	unsigned long bootparam_load_addr, kernel_load_addr, initrd_load_addr;
+	unsigned long purgatory_load_addr;
+	unsigned long kernel_bufsz, kernel_memsz, kernel_align;
+	char *kernel_buf;
+	struct bzimage64_data *ldata;
+	struct kexec_entry64_regs regs64;
+	void *stack;
+	unsigned int setup_hdr_offset = offsetof(struct boot_params, hdr);
+
+	header = (struct setup_header *)(kernel + setup_hdr_offset);
+	setup_sects = header->setup_sects;
+	if (setup_sects == 0)
+		setup_sects = 4;
+
+	kern16_size = (setup_sects + 1) * 512;
+	if (kernel_len < kern16_size) {
+		pr_err("bzImage truncated\n");
+		return ERR_PTR(-ENOEXEC);
+	}
+
+	if (cmdline_len > header->cmdline_size) {
+		pr_err("Kernel command line too long\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	/*
+	 * Load purgatory. For 64bit entry point, purgatory  code can be
+	 * anywhere.
+	 */
+	ret = kexec_load_purgatory(image, MIN_PURGATORY_ADDR, ULONG_MAX, 1,
+				   &purgatory_load_addr);
+	if (ret) {
+		pr_err("Loading purgatory failed\n");
+		return ERR_PTR(ret);
+	}
+
+	pr_debug("Loaded purgatory at 0x%lx\n", purgatory_load_addr);
+
+	/* Load Bootparams and cmdline */
+	params_cmdline_sz = sizeof(struct boot_params) + cmdline_len;
+	params = kzalloc(params_cmdline_sz, GFP_KERNEL);
+	if (!params)
+		return ERR_PTR(-ENOMEM);
+
+	/* Copy setup header onto bootparams. Documentation/x86/boot.txt */
+	setup_header_size = 0x0202 + kernel[0x0201] - setup_hdr_offset;
+
+	/* Is there a limit on setup header size? */
+	memcpy(&params->hdr, (kernel + setup_hdr_offset), setup_header_size);
+
+	ret = kexec_add_buffer(image, (char *)params, params_cmdline_sz,
+			       params_cmdline_sz, 16, MIN_BOOTPARAM_ADDR,
+			       ULONG_MAX, 1, &bootparam_load_addr);
+	if (ret)
+		goto out_free_params;
+	pr_debug("Loaded boot_param and command line at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
+		 bootparam_load_addr, params_cmdline_sz, params_cmdline_sz);
+
+	/* Load kernel */
+	kernel_buf = kernel + kern16_size;
+	kernel_bufsz =  kernel_len - kern16_size;
+	kernel_memsz = PAGE_ALIGN(header->init_size);
+	kernel_align = header->kernel_alignment;
+
+	ret = kexec_add_buffer(image, kernel_buf,
+			       kernel_bufsz, kernel_memsz, kernel_align,
+			       MIN_KERNEL_LOAD_ADDR, ULONG_MAX, 1,
+			       &kernel_load_addr);
+	if (ret)
+		goto out_free_params;
+
+	pr_debug("Loaded 64bit kernel at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
+		 kernel_load_addr, kernel_memsz, kernel_memsz);
+
+	/* Load initrd high */
+	if (initrd) {
+		ret = kexec_add_buffer(image, initrd, initrd_len, initrd_len,
+				       PAGE_SIZE, MIN_INITRD_LOAD_ADDR,
+				       ULONG_MAX, 1, &initrd_load_addr);
+		if (ret)
+			goto out_free_params;
+
+		pr_debug("Loaded initrd at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
+				initrd_load_addr, initrd_len, initrd_len);
+
+		setup_initrd(params, initrd_load_addr, initrd_len);
+	}
+
+	setup_cmdline(params, bootparam_load_addr, sizeof(struct boot_params),
+		      cmdline, cmdline_len);
+
+	/* bootloader info. Do we need a separate ID for kexec kernel loader? */
+	params->hdr.type_of_loader = 0x0D << 4;
+	params->hdr.loadflags = 0;
+
+	/* Setup purgatory regs for entry */
+	ret = kexec_purgatory_get_set_symbol(image, "entry64_regs", &regs64,
+					     sizeof(regs64), 1);
+	if (ret)
+		goto out_free_params;
+
+	regs64.rbx = 0; /* Bootstrap Processor */
+	regs64.rsi = bootparam_load_addr;
+	regs64.rip = kernel_load_addr + 0x200;
+	stack = kexec_purgatory_get_symbol_addr(image, "stack_end");
+	if (IS_ERR(stack)) {
+		pr_err("Could not find address of symbol stack_end\n");
+		ret = -EINVAL;
+		goto out_free_params;
+	}
+
+	regs64.rsp = (unsigned long)stack;
+	ret = kexec_purgatory_get_set_symbol(image, "entry64_regs", &regs64,
+					     sizeof(regs64), 0);
+	if (ret)
+		goto out_free_params;
+
+	setup_boot_parameters(params);
+
+	/* Allocate loader specific data */
+	ldata = kzalloc(sizeof(struct bzimage64_data), GFP_KERNEL);
+	if (!ldata) {
+		ret = -ENOMEM;
+		goto out_free_params;
+	}
+
+	/*
+	 * Store pointer to params so that it could be freed after loading
+	 * params segment has been loaded and contents have been copied
+	 * somewhere else.
+	 */
+	ldata->bootparams_buf = params;
+	return ldata;
+
+out_free_params:
+	kfree(params);
+	return ERR_PTR(ret);
+}
+
+/* This cleanup function is called after various segments have been loaded */
+int bzImage64_cleanup(void *loader_data)
+{
+	struct bzimage64_data *ldata = loader_data;
+
+	if (!ldata)
+		return 0;
+
+	kfree(ldata->bootparams_buf);
+	ldata->bootparams_buf = NULL;
+
+	return 0;
+}
+
+struct kexec_file_ops kexec_bzImage64_ops = {
+	.probe = bzImage64_probe,
+	.load = bzImage64_load,
+	.cleanup = bzImage64_cleanup,
+};
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 88404c440727..18d0f9e0b6da 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -23,9 +23,10 @@
 #include <asm/tlbflush.h>
 #include <asm/mmu_context.h>
 #include <asm/debugreg.h>
+#include <asm/kexec-bzimage64.h>
 
 static struct kexec_file_ops *kexec_file_loaders[] = {
-		NULL,
+		&kexec_bzImage64_ops,
 };
 
 static void free_transition_pgtable(struct kimage *image)
@@ -328,7 +329,7 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image)
 	if (!image->fops || !image->fops->cleanup)
 		return 0;
 
-	return image->fops->cleanup(image);
+	return image->fops->cleanup(image->image_loader_data);
 }
 
 /*
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 84f09e9eca26..9481703b0e7a 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -190,7 +190,7 @@ typedef void *(kexec_load_t)(struct kimage *image, char *kernel_buf,
 			     unsigned long kernel_len, char *initrd,
 			     unsigned long initrd_len, char *cmdline,
 			     unsigned long cmdline_len);
-typedef int (kexec_cleanup_t)(struct kimage *image);
+typedef int (kexec_cleanup_t)(void *loader_data);
 
 struct kexec_file_ops {
 	kexec_probe_t *probe;
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 669e331aa9ec..0926f2a3ed03 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -460,6 +460,14 @@ static void kimage_file_post_load_cleanup(struct kimage *image)
 
 	/* See if architecture has anything to cleanup post load */
 	arch_kimage_file_post_load_cleanup(image);
+
+	/*
+	 * Above call should have called into bootloader to free up
+	 * any data stored in kimage->image_loader_data. It should
+	 * be ok now to free it up.
+	 */
+	kfree(image->image_loader_data);
+	image->image_loader_data = NULL;
 }
 
 /*
@@ -576,7 +584,6 @@ out_free_control_pages:
 	kimage_free_page_list(&image->control_pages);
 out_free_post_load_bufs:
 	kimage_file_post_load_cleanup(image);
-	kfree(image->image_loader_data);
 out_free_image:
 	kfree(image);
 	return ret;
@@ -900,8 +907,6 @@ static void kimage_free(struct kimage *image)
 	/* Free the kexec control pages... */
 	kimage_free_page_list(&image->control_pages);
 
-	kfree(image->image_loader_data);
-
 	/*
 	 * Free up any temporary buffers allocated. This might hit if
 	 * error occurred much later after buffer allocation.
-- 
cgit v1.2.3


From 6a2c20e7d8900ed273dc34a9af9bf02fc478e427 Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@redhat.com>
Date: Fri, 8 Aug 2014 14:26:11 -0700
Subject: kexec: support kexec/kdump on EFI systems

This patch does two things.  It passes EFI run time mappings to second
kernel in bootparams efi_info.  Second kernel parse this info and create
new mappings in second kernel.  That means mappings in first and second
kernel will be same.  This paves the way to enable EFI in kexec kernel.

This patch also prepares and passes EFI setup data through bootparams.
This contains bunch of information about various tables and their
addresses.

These information gathering and passing has been written along the lines
of what current kexec-tools is doing to make kexec work with UEFI.

[akpm@linux-foundation.org: s/get_efi/efi_get/g, per Matt]
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Matthew Garrett <mjg59@srcf.ucam.org>
Cc: Greg Kroah-Hartman <greg@kroah.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: WANG Chao <chaowang@redhat.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Matt Fleming <matt@console-pimps.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/kexec-bzimage64.c  | 146 ++++++++++++++++++++++++++++++++++---
 drivers/firmware/efi/runtime-map.c |  21 ++++++
 include/linux/efi.h                |  19 +++++
 3 files changed, 174 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c
index a8e646458a10..623e6c58081f 100644
--- a/arch/x86/kernel/kexec-bzimage64.c
+++ b/arch/x86/kernel/kexec-bzimage64.c
@@ -18,10 +18,12 @@
 #include <linux/kexec.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
+#include <linux/efi.h>
 
 #include <asm/bootparam.h>
 #include <asm/setup.h>
 #include <asm/crash.h>
+#include <asm/efi.h>
 
 #define MAX_ELFCOREHDR_STR_LEN	30	/* elfcorehdr=0x<64bit-value> */
 
@@ -90,7 +92,7 @@ static int setup_cmdline(struct kimage *image, struct boot_params *params,
 	return 0;
 }
 
-static int setup_memory_map_entries(struct boot_params *params)
+static int setup_e820_entries(struct boot_params *params)
 {
 	unsigned int nr_e820_entries;
 
@@ -107,8 +109,93 @@ static int setup_memory_map_entries(struct boot_params *params)
 	return 0;
 }
 
-static int setup_boot_parameters(struct kimage *image,
-				 struct boot_params *params)
+#ifdef CONFIG_EFI
+static int setup_efi_info_memmap(struct boot_params *params,
+				  unsigned long params_load_addr,
+				  unsigned int efi_map_offset,
+				  unsigned int efi_map_sz)
+{
+	void *efi_map = (void *)params + efi_map_offset;
+	unsigned long efi_map_phys_addr = params_load_addr + efi_map_offset;
+	struct efi_info *ei = &params->efi_info;
+
+	if (!efi_map_sz)
+		return 0;
+
+	efi_runtime_map_copy(efi_map, efi_map_sz);
+
+	ei->efi_memmap = efi_map_phys_addr & 0xffffffff;
+	ei->efi_memmap_hi = efi_map_phys_addr >> 32;
+	ei->efi_memmap_size = efi_map_sz;
+
+	return 0;
+}
+
+static int
+prepare_add_efi_setup_data(struct boot_params *params,
+		       unsigned long params_load_addr,
+		       unsigned int efi_setup_data_offset)
+{
+	unsigned long setup_data_phys;
+	struct setup_data *sd = (void *)params + efi_setup_data_offset;
+	struct efi_setup_data *esd = (void *)sd + sizeof(struct setup_data);
+
+	esd->fw_vendor = efi.fw_vendor;
+	esd->runtime = efi.runtime;
+	esd->tables = efi.config_table;
+	esd->smbios = efi.smbios;
+
+	sd->type = SETUP_EFI;
+	sd->len = sizeof(struct efi_setup_data);
+
+	/* Add setup data */
+	setup_data_phys = params_load_addr + efi_setup_data_offset;
+	sd->next = params->hdr.setup_data;
+	params->hdr.setup_data = setup_data_phys;
+
+	return 0;
+}
+
+static int
+setup_efi_state(struct boot_params *params, unsigned long params_load_addr,
+		unsigned int efi_map_offset, unsigned int efi_map_sz,
+		unsigned int efi_setup_data_offset)
+{
+	struct efi_info *current_ei = &boot_params.efi_info;
+	struct efi_info *ei = &params->efi_info;
+
+	if (!current_ei->efi_memmap_size)
+		return 0;
+
+	/*
+	 * If 1:1 mapping is not enabled, second kernel can not setup EFI
+	 * and use EFI run time services. User space will have to pass
+	 * acpi_rsdp=<addr> on kernel command line to make second kernel boot
+	 * without efi.
+	 */
+	if (efi_enabled(EFI_OLD_MEMMAP))
+		return 0;
+
+	ei->efi_loader_signature = current_ei->efi_loader_signature;
+	ei->efi_systab = current_ei->efi_systab;
+	ei->efi_systab_hi = current_ei->efi_systab_hi;
+
+	ei->efi_memdesc_version = current_ei->efi_memdesc_version;
+	ei->efi_memdesc_size = efi_get_runtime_map_desc_size();
+
+	setup_efi_info_memmap(params, params_load_addr, efi_map_offset,
+			      efi_map_sz);
+	prepare_add_efi_setup_data(params, params_load_addr,
+				   efi_setup_data_offset);
+	return 0;
+}
+#endif /* CONFIG_EFI */
+
+static int
+setup_boot_parameters(struct kimage *image, struct boot_params *params,
+		      unsigned long params_load_addr,
+		      unsigned int efi_map_offset, unsigned int efi_map_sz,
+		      unsigned int efi_setup_data_offset)
 {
 	unsigned int nr_e820_entries;
 	unsigned long long mem_k, start, end;
@@ -140,7 +227,7 @@ static int setup_boot_parameters(struct kimage *image,
 		if (ret)
 			return ret;
 	} else
-		setup_memory_map_entries(params);
+		setup_e820_entries(params);
 
 	nr_e820_entries = params->e820_entries;
 
@@ -161,6 +248,12 @@ static int setup_boot_parameters(struct kimage *image,
 		}
 	}
 
+#ifdef CONFIG_EFI
+	/* Setup EFI state */
+	setup_efi_state(params, params_load_addr, efi_map_offset, efi_map_sz,
+			efi_setup_data_offset);
+#endif
+
 	/* Setup EDD info */
 	memcpy(params->eddbuf, boot_params.eddbuf,
 				EDDMAXNR * sizeof(struct edd_info));
@@ -214,6 +307,15 @@ int bzImage64_probe(const char *buf, unsigned long len)
 		return ret;
 	}
 
+	/*
+	 * Can't handle 32bit EFI as it does not allow loading kernel
+	 * above 4G. This should be handled by 32bit bzImage loader
+	 */
+	if (efi_enabled(EFI_RUNTIME_SERVICES) && !efi_enabled(EFI_64BIT)) {
+		pr_debug("EFI is 32 bit. Can't load kernel above 4G.\n");
+		return ret;
+	}
+
 	/* I've got a bzImage */
 	pr_debug("It's a relocatable bzImage64\n");
 	ret = 0;
@@ -229,7 +331,7 @@ void *bzImage64_load(struct kimage *image, char *kernel,
 
 	struct setup_header *header;
 	int setup_sects, kern16_size, ret = 0;
-	unsigned long setup_header_size, params_cmdline_sz;
+	unsigned long setup_header_size, params_cmdline_sz, params_misc_sz;
 	struct boot_params *params;
 	unsigned long bootparam_load_addr, kernel_load_addr, initrd_load_addr;
 	unsigned long purgatory_load_addr;
@@ -239,6 +341,7 @@ void *bzImage64_load(struct kimage *image, char *kernel,
 	struct kexec_entry64_regs regs64;
 	void *stack;
 	unsigned int setup_hdr_offset = offsetof(struct boot_params, hdr);
+	unsigned int efi_map_offset, efi_map_sz, efi_setup_data_offset;
 
 	header = (struct setup_header *)(kernel + setup_hdr_offset);
 	setup_sects = header->setup_sects;
@@ -285,12 +388,29 @@ void *bzImage64_load(struct kimage *image, char *kernel,
 
 	pr_debug("Loaded purgatory at 0x%lx\n", purgatory_load_addr);
 
-	/* Load Bootparams and cmdline */
+
+	/*
+	 * Load Bootparams and cmdline and space for efi stuff.
+	 *
+	 * Allocate memory together for multiple data structures so
+	 * that they all can go in single area/segment and we don't
+	 * have to create separate segment for each. Keeps things
+	 * little bit simple
+	 */
+	efi_map_sz = efi_get_runtime_map_size();
+	efi_map_sz = ALIGN(efi_map_sz, 16);
 	params_cmdline_sz = sizeof(struct boot_params) + cmdline_len +
 				MAX_ELFCOREHDR_STR_LEN;
-	params = kzalloc(params_cmdline_sz, GFP_KERNEL);
+	params_cmdline_sz = ALIGN(params_cmdline_sz, 16);
+	params_misc_sz = params_cmdline_sz + efi_map_sz +
+				sizeof(struct setup_data) +
+				sizeof(struct efi_setup_data);
+
+	params = kzalloc(params_misc_sz, GFP_KERNEL);
 	if (!params)
 		return ERR_PTR(-ENOMEM);
+	efi_map_offset = params_cmdline_sz;
+	efi_setup_data_offset = efi_map_offset + efi_map_sz;
 
 	/* Copy setup header onto bootparams. Documentation/x86/boot.txt */
 	setup_header_size = 0x0202 + kernel[0x0201] - setup_hdr_offset;
@@ -298,13 +418,13 @@ void *bzImage64_load(struct kimage *image, char *kernel,
 	/* Is there a limit on setup header size? */
 	memcpy(&params->hdr, (kernel + setup_hdr_offset), setup_header_size);
 
-	ret = kexec_add_buffer(image, (char *)params, params_cmdline_sz,
-			       params_cmdline_sz, 16, MIN_BOOTPARAM_ADDR,
+	ret = kexec_add_buffer(image, (char *)params, params_misc_sz,
+			       params_misc_sz, 16, MIN_BOOTPARAM_ADDR,
 			       ULONG_MAX, 1, &bootparam_load_addr);
 	if (ret)
 		goto out_free_params;
-	pr_debug("Loaded boot_param and command line at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
-		 bootparam_load_addr, params_cmdline_sz, params_cmdline_sz);
+	pr_debug("Loaded boot_param, command line and misc at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
+		 bootparam_load_addr, params_misc_sz, params_misc_sz);
 
 	/* Load kernel */
 	kernel_buf = kernel + kern16_size;
@@ -365,7 +485,9 @@ void *bzImage64_load(struct kimage *image, char *kernel,
 	if (ret)
 		goto out_free_params;
 
-	ret = setup_boot_parameters(image, params);
+	ret = setup_boot_parameters(image, params, bootparam_load_addr,
+				    efi_map_offset, efi_map_sz,
+				    efi_setup_data_offset);
 	if (ret)
 		goto out_free_params;
 
diff --git a/drivers/firmware/efi/runtime-map.c b/drivers/firmware/efi/runtime-map.c
index 97cdd16a2169..018c29a26615 100644
--- a/drivers/firmware/efi/runtime-map.c
+++ b/drivers/firmware/efi/runtime-map.c
@@ -138,6 +138,27 @@ add_sysfs_runtime_map_entry(struct kobject *kobj, int nr)
 	return entry;
 }
 
+int efi_get_runtime_map_size(void)
+{
+	return nr_efi_runtime_map * efi_memdesc_size;
+}
+
+int efi_get_runtime_map_desc_size(void)
+{
+	return efi_memdesc_size;
+}
+
+int efi_runtime_map_copy(void *buf, size_t bufsz)
+{
+	size_t sz = efi_get_runtime_map_size();
+
+	if (sz > bufsz)
+		sz = bufsz;
+
+	memcpy(buf, efi_runtime_map, sz);
+	return 0;
+}
+
 void efi_runtime_map_setup(void *map, int nr_entries, u32 desc_size)
 {
 	efi_runtime_map = map;
diff --git a/include/linux/efi.h b/include/linux/efi.h
index efc681fd5895..45cb4ffdea62 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -1156,6 +1156,9 @@ int efivars_sysfs_init(void);
 #ifdef CONFIG_EFI_RUNTIME_MAP
 int efi_runtime_map_init(struct kobject *);
 void efi_runtime_map_setup(void *, int, u32);
+int efi_get_runtime_map_size(void);
+int efi_get_runtime_map_desc_size(void);
+int efi_runtime_map_copy(void *buf, size_t bufsz);
 #else
 static inline int efi_runtime_map_init(struct kobject *kobj)
 {
@@ -1164,6 +1167,22 @@ static inline int efi_runtime_map_init(struct kobject *kobj)
 
 static inline void
 efi_runtime_map_setup(void *map, int nr_entries, u32 desc_size) {}
+
+static inline int efi_get_runtime_map_size(void)
+{
+	return 0;
+}
+
+static inline int efi_get_runtime_map_desc_size(void)
+{
+	return 0;
+}
+
+static inline int efi_runtime_map_copy(void *buf, size_t bufsz)
+{
+	return 0;
+}
+
 #endif
 
 /* prototypes shared between arch specific and generic stub code */
-- 
cgit v1.2.3


From 8e7d838103feac320baf9e68d73f954840ac1eea Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@redhat.com>
Date: Fri, 8 Aug 2014 14:26:13 -0700
Subject: kexec: verify the signature of signed PE bzImage

This is the final piece of the puzzle of verifying kernel image signature
during kexec_file_load() syscall.

This patch calls into PE file routines to verify signature of bzImage.  If
signature are valid, kexec_file_load() succeeds otherwise it fails.

Two new config options have been introduced.  First one is
CONFIG_KEXEC_VERIFY_SIG.  This option enforces that kernel has to be
validly signed otherwise kernel load will fail.  If this option is not
set, no signature verification will be done.  Only exception will be when
secureboot is enabled.  In that case signature verification should be
automatically enforced when secureboot is enabled.  But that will happen
when secureboot patches are merged.

Second config option is CONFIG_KEXEC_BZIMAGE_VERIFY_SIG.  This option
enables signature verification support on bzImage.  If this option is not
set and previous one is set, kernel image loading will fail because kernel
does not have support to verify signature of bzImage.

I tested these patches with both "pesign" and "sbsign" signed bzImages.

I used signing_key.priv key and signing_key.x509 cert for signing as
generated during kernel build process (if module signing is enabled).

Used following method to sign bzImage.

pesign
======
- Convert DER format cert to PEM format cert
openssl x509 -in signing_key.x509 -inform DER -out signing_key.x509.PEM -outform
PEM

- Generate a .p12 file from existing cert and private key file
openssl pkcs12 -export -out kernel-key.p12 -inkey signing_key.priv -in
signing_key.x509.PEM

- Import .p12 file into pesign db
pk12util -i /tmp/kernel-key.p12 -d /etc/pki/pesign

- Sign bzImage
pesign -i /boot/vmlinuz-3.16.0-rc3+ -o /boot/vmlinuz-3.16.0-rc3+.signed.pesign
-c "Glacier signing key - Magrathea" -s

sbsign
======
sbsign --key signing_key.priv --cert signing_key.x509.PEM --output
/boot/vmlinuz-3.16.0-rc3+.signed.sbsign /boot/vmlinuz-3.16.0-rc3+

Patch details:

Well all the hard work is done in previous patches.  Now bzImage loader
has just call into that code and verify whether bzImage signature are
valid or not.

Also create two config options.  First one is CONFIG_KEXEC_VERIFY_SIG.
This option enforces that kernel has to be validly signed otherwise kernel
load will fail.  If this option is not set, no signature verification will
be done.  Only exception will be when secureboot is enabled.  In that case
signature verification should be automatically enforced when secureboot is
enabled.  But that will happen when secureboot patches are merged.

Second config option is CONFIG_KEXEC_BZIMAGE_VERIFY_SIG.  This option
enables signature verification support on bzImage.  If this option is not
set and previous one is set, kernel image loading will fail because kernel
does not have support to verify signature of bzImage.

Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Matthew Garrett <mjg59@srcf.ucam.org>
Cc: Greg Kroah-Hartman <greg@kroah.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: WANG Chao <chaowang@redhat.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Matt Fleming <matt@console-pimps.org>
Cc: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/Kconfig                   | 22 ++++++++++++++++++++++
 arch/x86/kernel/kexec-bzimage64.c  | 21 +++++++++++++++++++++
 arch/x86/kernel/machine_kexec_64.c | 11 +++++++++++
 include/linux/kexec.h              |  3 +++
 kernel/kexec.c                     | 15 +++++++++++++++
 5 files changed, 72 insertions(+)

(limited to 'include/linux')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 9558b9fcafbf..4aafd322e21e 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1599,6 +1599,28 @@ config KEXEC
 	  interface is strongly in flux, so no good recommendation can be
 	  made.
 
+config KEXEC_VERIFY_SIG
+	bool "Verify kernel signature during kexec_file_load() syscall"
+	depends on KEXEC
+	---help---
+	  This option makes kernel signature verification mandatory for
+	  kexec_file_load() syscall. If kernel is signature can not be
+	  verified, kexec_file_load() will fail.
+
+	  This option enforces signature verification at generic level.
+	  One needs to enable signature verification for type of kernel
+	  image being loaded to make sure it works. For example, enable
+	  bzImage signature verification option to be able to load and
+	  verify signatures of bzImage. Otherwise kernel loading will fail.
+
+config KEXEC_BZIMAGE_VERIFY_SIG
+	bool "Enable bzImage signature verification support"
+	depends on KEXEC_VERIFY_SIG
+	depends on SIGNED_PE_FILE_VERIFICATION
+	select SYSTEM_TRUSTED_KEYRING
+	---help---
+	  Enable bzImage signature verification support.
+
 config CRASH_DUMP
 	bool "kernel crash dumps"
 	depends on X86_64 || (X86_32 && HIGHMEM)
diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c
index 623e6c58081f..9642b9b33655 100644
--- a/arch/x86/kernel/kexec-bzimage64.c
+++ b/arch/x86/kernel/kexec-bzimage64.c
@@ -19,6 +19,8 @@
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/efi.h>
+#include <linux/verify_pefile.h>
+#include <keys/system_keyring.h>
 
 #include <asm/bootparam.h>
 #include <asm/setup.h>
@@ -525,8 +527,27 @@ int bzImage64_cleanup(void *loader_data)
 	return 0;
 }
 
+#ifdef CONFIG_KEXEC_BZIMAGE_VERIFY_SIG
+int bzImage64_verify_sig(const char *kernel, unsigned long kernel_len)
+{
+	bool trusted;
+	int ret;
+
+	ret = verify_pefile_signature(kernel, kernel_len,
+				      system_trusted_keyring, &trusted);
+	if (ret < 0)
+		return ret;
+	if (!trusted)
+		return -EKEYREJECTED;
+	return 0;
+}
+#endif
+
 struct kexec_file_ops kexec_bzImage64_ops = {
 	.probe = bzImage64_probe,
 	.load = bzImage64_load,
 	.cleanup = bzImage64_cleanup,
+#ifdef CONFIG_KEXEC_BZIMAGE_VERIFY_SIG
+	.verify_sig = bzImage64_verify_sig,
+#endif
 };
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 9330434da777..8b04018e5d1f 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -372,6 +372,17 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image)
 	return image->fops->cleanup(image->image_loader_data);
 }
 
+int arch_kexec_kernel_verify_sig(struct kimage *image, void *kernel,
+				 unsigned long kernel_len)
+{
+	if (!image->fops || !image->fops->verify_sig) {
+		pr_debug("kernel loader does not support signature verification.");
+		return -EKEYREJECTED;
+	}
+
+	return image->fops->verify_sig(kernel, kernel_len);
+}
+
 /*
  * Apply purgatory relocations.
  *
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 9481703b0e7a..4b2a0e11cc5b 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -191,11 +191,14 @@ typedef void *(kexec_load_t)(struct kimage *image, char *kernel_buf,
 			     unsigned long initrd_len, char *cmdline,
 			     unsigned long cmdline_len);
 typedef int (kexec_cleanup_t)(void *loader_data);
+typedef int (kexec_verify_sig_t)(const char *kernel_buf,
+				 unsigned long kernel_len);
 
 struct kexec_file_ops {
 	kexec_probe_t *probe;
 	kexec_load_t *load;
 	kexec_cleanup_t *cleanup;
+	kexec_verify_sig_t *verify_sig;
 };
 
 /* kexec interface functions */
diff --git a/kernel/kexec.c b/kernel/kexec.c
index f18c780f9716..0b49a0a58102 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -416,6 +416,12 @@ void __weak arch_kimage_file_post_load_cleanup(struct kimage *image)
 {
 }
 
+int __weak arch_kexec_kernel_verify_sig(struct kimage *image, void *buf,
+					unsigned long buf_len)
+{
+	return -EKEYREJECTED;
+}
+
 /* Apply relocations of type RELA */
 int __weak
 arch_kexec_apply_relocations_add(const Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
@@ -494,6 +500,15 @@ kimage_file_prepare_segments(struct kimage *image, int kernel_fd, int initrd_fd,
 	if (ret)
 		goto out;
 
+#ifdef CONFIG_KEXEC_VERIFY_SIG
+	ret = arch_kexec_kernel_verify_sig(image, image->kernel_buf,
+					   image->kernel_buf_len);
+	if (ret) {
+		pr_debug("kernel signature verification failed.\n");
+		goto out;
+	}
+	pr_debug("kernel signature verification successful.\n");
+#endif
 	/* It is possible that there no initramfs is being loaded */
 	if (!(flags & KEXEC_FILE_NO_INITRAMFS)) {
 		ret = copy_file_from_fd(initrd_fd, &image->initrd_buf,
-- 
cgit v1.2.3


From fd3cbdc0d1b5254a2e8793df58c409b469899a3f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 10 Aug 2014 08:53:39 +0200
Subject: jump_label: Fix small typos in the documentation

Was reading through the documentation of this code and noticed
a few typos, missing commas, etc.

Cc: Jason Baron <jbaron@akamai.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Mel Gorman <mgorman@suse.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/jump_label.h | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 784304b222b3..98f923b6a0ea 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -8,28 +8,28 @@
  * Copyright (C) 2011-2012 Peter Zijlstra <pzijlstr@redhat.com>
  *
  * Jump labels provide an interface to generate dynamic branches using
- * self-modifying code. Assuming toolchain and architecture support the result
- * of a "if (static_key_false(&key))" statement is a unconditional branch (which
+ * self-modifying code. Assuming toolchain and architecture support, the result
+ * of a "if (static_key_false(&key))" statement is an unconditional branch (which
  * defaults to false - and the true block is placed out of line).
  *
  * However at runtime we can change the branch target using
  * static_key_slow_{inc,dec}(). These function as a 'reference' count on the key
- * object and for as long as there are references all branches referring to
+ * object, and for as long as there are references all branches referring to
  * that particular key will point to the (out of line) true block.
  *
- * Since this relies on modifying code the static_key_slow_{inc,dec}() functions
+ * Since this relies on modifying code, the static_key_slow_{inc,dec}() functions
  * must be considered absolute slow paths (machine wide synchronization etc.).
- * OTOH, since the affected branches are unconditional their runtime overhead
+ * OTOH, since the affected branches are unconditional, their runtime overhead
  * will be absolutely minimal, esp. in the default (off) case where the total
  * effect is a single NOP of appropriate size. The on case will patch in a jump
  * to the out-of-line block.
  *
- * When the control is directly exposed to userspace it is prudent to delay the
+ * When the control is directly exposed to userspace, it is prudent to delay the
  * decrement to avoid high frequency code modifications which can (and do)
  * cause significant performance degradation. Struct static_key_deferred and
  * static_key_slow_dec_deferred() provide for this.
  *
- * Lacking toolchain and or architecture support, it falls back to a simple
+ * Lacking toolchain and or architecture support, jump labels fall back to a simple
  * conditional branch.
  *
  * struct static_key my_key = STATIC_KEY_INIT_TRUE;
@@ -43,8 +43,7 @@
  *
  * Not initializing the key (static data is initialized to 0s anyway) is the
  * same as using STATIC_KEY_INIT_FALSE.
- *
-*/
+ */
 
 #include <linux/types.h>
 #include <linux/compiler.h>
-- 
cgit v1.2.3


From 26375b5c8449927f740ce0e837e23f45c951fb80 Mon Sep 17 00:00:00 2001
From: Jaehoon Chung <jh80.chung@samsung.com>
Date: Thu, 7 Aug 2014 16:37:58 +0900
Subject: mmc: dw_mmc: Slot quirk "disable-wp" is deprecated.

Slot quirks "disable-wp" is deprecated.
Instead, use the host quirk "disable-wp".
(Because the slot-node is removed in dt-file.)

Signed-off-by: Jaehoon Chung <jh80.chung@samsung.com>
Tested-by: Sachin Kamat <sachin.kamat@samsung.com>
Acked-by: Seungwon Jeon <tgih.jun@samsung.com>
Reviewed-by: Doug Anderson <dianders@chromium.org>
Tested-by: Doug Anderson <dianders@chromium.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/dw_mmc.c  | 11 +++++++++--
 include/linux/mmc/dw_mmc.h |  2 ++
 2 files changed, 11 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index 39cf54f479d9..8f216edbdf08 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -996,7 +996,8 @@ static int dw_mci_get_ro(struct mmc_host *mmc)
 	int gpio_ro = mmc_gpio_get_ro(mmc);
 
 	/* Use platform get_ro function, else try on board write protect */
-	if (slot->quirks & DW_MCI_SLOT_QUIRK_NO_WRITE_PROTECT)
+	if ((slot->quirks & DW_MCI_SLOT_QUIRK_NO_WRITE_PROTECT) ||
+			(slot->host->quirks & DW_MCI_QUIRK_NO_WRITE_PROTECT))
 		read_only = 0;
 	else if (!IS_ERR_VALUE(gpio_ro))
 		read_only = gpio_ro;
@@ -2014,8 +2015,11 @@ static int dw_mci_of_get_slot_quirks(struct device *dev, u8 slot)
 
 	/* get quirks */
 	for (idx = 0; idx < ARRAY_SIZE(of_slot_quirks); idx++)
-		if (of_get_property(np, of_slot_quirks[idx].quirk, NULL))
+		if (of_get_property(np, of_slot_quirks[idx].quirk, NULL)) {
+			dev_warn(dev, "Slot quirk %s is deprecated\n",
+					of_slot_quirks[idx].quirk);
 			quirks |= of_slot_quirks[idx].id;
+		}
 
 	return quirks;
 }
@@ -2279,6 +2283,9 @@ static struct dw_mci_of_quirks {
 	{
 		.quirk	= "broken-cd",
 		.id	= DW_MCI_QUIRK_BROKEN_CARD_DETECTION,
+	}, {
+		.quirk	= "disable-wp",
+		.id	= DW_MCI_QUIRK_NO_WRITE_PROTECT,
 	},
 };
 
diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h
index babaea93bca6..29ce014ab421 100644
--- a/include/linux/mmc/dw_mmc.h
+++ b/include/linux/mmc/dw_mmc.h
@@ -213,6 +213,8 @@ struct dw_mci_dma_ops {
 #define DW_MCI_QUIRK_HIGHSPEED			BIT(2)
 /* Unreliable card detection */
 #define DW_MCI_QUIRK_BROKEN_CARD_DETECTION	BIT(3)
+/* No write protect */
+#define DW_MCI_QUIRK_NO_WRITE_PROTECT		BIT(4)
 
 /* Slot level quirks */
 /* This slot has no write protect */
-- 
cgit v1.2.3


From 0d5501c1c828fb97d02af50aa9d2b1a5498b94e4 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevic@redhat.com>
Date: Fri, 8 Aug 2014 14:42:13 -0400
Subject: net: Always untag vlan-tagged traffic on input.

Currently the functionality to untag traffic on input resides
as part of the vlan module and is build only when VLAN support
is enabled in the kernel.  When VLAN is disabled, the function
vlan_untag() turns into a stub and doesn't really untag the
packets.  This seems to create an interesting interaction
between VMs supporting checksum offloading and some network drivers.

There are some drivers that do not allow the user to change
tx-vlan-offload feature of the driver.  These drivers also seem
to assume that any VLAN-tagged traffic they transmit will
have the vlan information in the vlan_tci and not in the vlan
header already in the skb.  When transmitting skbs that already
have tagged data with partial checksum set, the checksum doesn't
appear to be updated correctly by the card thus resulting in a
failure to establish TCP connections.

The following is a packet trace taken on the receiver where a
sender is a VM with a VLAN configued.  The host VM is running on
doest not have VLAN support and the outging interface on the
host is tg3:
10:12:43.503055 52:54:00:ae:42:3f > 28:d2:44:7d:c2:de, ethertype 802.1Q
(0x8100), length 78: vlan 100, p 0, ethertype IPv4, (tos 0x0, ttl 64, id 27243,
offset 0, flags [DF], proto TCP (6), length 60)
    10.0.100.1.58545 > 10.0.100.10.ircu-2: Flags [S], cksum 0xdc39 (incorrect
-> 0x48d9), seq 1069378582, win 29200, options [mss 1460,sackOK,TS val
4294837885 ecr 0,nop,wscale 7], length 0
10:12:44.505556 52:54:00:ae:42:3f > 28:d2:44:7d:c2:de, ethertype 802.1Q
(0x8100), length 78: vlan 100, p 0, ethertype IPv4, (tos 0x0, ttl 64, id 27244,
offset 0, flags [DF], proto TCP (6), length 60)
    10.0.100.1.58545 > 10.0.100.10.ircu-2: Flags [S], cksum 0xdc39 (incorrect
-> 0x44ee), seq 1069378582, win 29200, options [mss 1460,sackOK,TS val
4294838888 ecr 0,nop,wscale 7], length 0

This connection finally times out.

I've only access to the TG3 hardware in this configuration thus have
only tested this with TG3 driver.  There are a lot of other drivers
that do not permit user changes to vlan acceleration features, and
I don't know if they all suffere from a similar issue.

The patch attempt to fix this another way.  It moves the vlan header
stipping code out of the vlan module and always builds it into the
kernel network core.  This way, even if vlan is not supported on
a virtualizatoin host, the virtual machines running on top of such
host will still work with VLANs enabled.

CC: Patrick McHardy <kaber@trash.net>
CC: Nithin Nayak Sujir <nsujir@broadcom.com>
CC: Michael Chan <mchan@broadcom.com>
CC: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
Acked-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h |  6 ------
 include/linux/skbuff.h  |  1 +
 net/8021q/vlan_core.c   | 53 -------------------------------------------------
 net/bridge/br_vlan.c    |  2 +-
 net/core/dev.c          |  2 +-
 net/core/skbuff.c       | 53 +++++++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 56 insertions(+), 61 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 4967916fe4ac..d69f0577a319 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -187,7 +187,6 @@ vlan_dev_get_egress_qos_mask(struct net_device *dev, u32 skprio)
 }
 
 extern bool vlan_do_receive(struct sk_buff **skb);
-extern struct sk_buff *vlan_untag(struct sk_buff *skb);
 
 extern int vlan_vid_add(struct net_device *dev, __be16 proto, u16 vid);
 extern void vlan_vid_del(struct net_device *dev, __be16 proto, u16 vid);
@@ -241,11 +240,6 @@ static inline bool vlan_do_receive(struct sk_buff **skb)
 	return false;
 }
 
-static inline struct sk_buff *vlan_untag(struct sk_buff *skb)
-{
-	return skb;
-}
-
 static inline int vlan_vid_add(struct net_device *dev, __be16 proto, u16 vid)
 {
 	return 0;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 11c270551d25..abde271c18ae 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2555,6 +2555,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen);
 void skb_scrub_packet(struct sk_buff *skb, bool xnet);
 unsigned int skb_gso_transport_seglen(const struct sk_buff *skb);
 struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features);
+struct sk_buff *skb_vlan_untag(struct sk_buff *skb);
 
 struct skb_checksum_ops {
 	__wsum (*update)(const void *mem, int len, __wsum wsum);
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 75d427763992..90cc2bdd4064 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -112,59 +112,6 @@ __be16 vlan_dev_vlan_proto(const struct net_device *dev)
 }
 EXPORT_SYMBOL(vlan_dev_vlan_proto);
 
-static struct sk_buff *vlan_reorder_header(struct sk_buff *skb)
-{
-	if (skb_cow(skb, skb_headroom(skb)) < 0) {
-		kfree_skb(skb);
-		return NULL;
-	}
-
-	memmove(skb->data - ETH_HLEN, skb->data - VLAN_ETH_HLEN, 2 * ETH_ALEN);
-	skb->mac_header += VLAN_HLEN;
-	return skb;
-}
-
-struct sk_buff *vlan_untag(struct sk_buff *skb)
-{
-	struct vlan_hdr *vhdr;
-	u16 vlan_tci;
-
-	if (unlikely(vlan_tx_tag_present(skb))) {
-		/* vlan_tci is already set-up so leave this for another time */
-		return skb;
-	}
-
-	skb = skb_share_check(skb, GFP_ATOMIC);
-	if (unlikely(!skb))
-		goto err_free;
-
-	if (unlikely(!pskb_may_pull(skb, VLAN_HLEN)))
-		goto err_free;
-
-	vhdr = (struct vlan_hdr *) skb->data;
-	vlan_tci = ntohs(vhdr->h_vlan_TCI);
-	__vlan_hwaccel_put_tag(skb, skb->protocol, vlan_tci);
-
-	skb_pull_rcsum(skb, VLAN_HLEN);
-	vlan_set_encap_proto(skb, vhdr);
-
-	skb = vlan_reorder_header(skb);
-	if (unlikely(!skb))
-		goto err_free;
-
-	skb_reset_network_header(skb);
-	skb_reset_transport_header(skb);
-	skb_reset_mac_len(skb);
-
-	return skb;
-
-err_free:
-	kfree_skb(skb);
-	return NULL;
-}
-EXPORT_SYMBOL(vlan_untag);
-
-
 /*
  * vlan info and vid list
  */
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index febb0f87fa37..e1bcd653899b 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -181,7 +181,7 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
 	 */
 	if (unlikely(!vlan_tx_tag_present(skb) &&
 		     skb->protocol == proto)) {
-		skb = vlan_untag(skb);
+		skb = skb_vlan_untag(skb);
 		if (unlikely(!skb))
 			return false;
 	}
diff --git a/net/core/dev.c b/net/core/dev.c
index 1c15b189c52b..b65a5051361f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3602,7 +3602,7 @@ another_round:
 
 	if (skb->protocol == cpu_to_be16(ETH_P_8021Q) ||
 	    skb->protocol == cpu_to_be16(ETH_P_8021AD)) {
-		skb = vlan_untag(skb);
+		skb = skb_vlan_untag(skb);
 		if (unlikely(!skb))
 			goto unlock;
 	}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 224506a6fa80..163b673f9e62 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -62,6 +62,7 @@
 #include <linux/scatterlist.h>
 #include <linux/errqueue.h>
 #include <linux/prefetch.h>
+#include <linux/if_vlan.h>
 
 #include <net/protocol.h>
 #include <net/dst.h>
@@ -3973,3 +3974,55 @@ unsigned int skb_gso_transport_seglen(const struct sk_buff *skb)
 	return shinfo->gso_size;
 }
 EXPORT_SYMBOL_GPL(skb_gso_transport_seglen);
+
+static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb)
+{
+	if (skb_cow(skb, skb_headroom(skb)) < 0) {
+		kfree_skb(skb);
+		return NULL;
+	}
+
+	memmove(skb->data - ETH_HLEN, skb->data - VLAN_ETH_HLEN, 2 * ETH_ALEN);
+	skb->mac_header += VLAN_HLEN;
+	return skb;
+}
+
+struct sk_buff *skb_vlan_untag(struct sk_buff *skb)
+{
+	struct vlan_hdr *vhdr;
+	u16 vlan_tci;
+
+	if (unlikely(vlan_tx_tag_present(skb))) {
+		/* vlan_tci is already set-up so leave this for another time */
+		return skb;
+	}
+
+	skb = skb_share_check(skb, GFP_ATOMIC);
+	if (unlikely(!skb))
+		goto err_free;
+
+	if (unlikely(!pskb_may_pull(skb, VLAN_HLEN)))
+		goto err_free;
+
+	vhdr = (struct vlan_hdr *)skb->data;
+	vlan_tci = ntohs(vhdr->h_vlan_TCI);
+	__vlan_hwaccel_put_tag(skb, skb->protocol, vlan_tci);
+
+	skb_pull_rcsum(skb, VLAN_HLEN);
+	vlan_set_encap_proto(skb, vhdr);
+
+	skb = skb_reorder_vlan_header(skb);
+	if (unlikely(!skb))
+		goto err_free;
+
+	skb_reset_network_header(skb);
+	skb_reset_transport_header(skb);
+	skb_reset_mac_len(skb);
+
+	return skb;
+
+err_free:
+	kfree_skb(skb);
+	return NULL;
+}
+EXPORT_SYMBOL(skb_vlan_untag);
-- 
cgit v1.2.3


From 14c4000a88afaaa2d0877cc86d42a74fde0f35e0 Mon Sep 17 00:00:00 2001
From: Vasant Hegde <hegdevasant@linux.vnet.ibm.com>
Date: Sat, 9 Aug 2014 11:15:30 +0530
Subject: printk: Add function to return log buffer address and size

Platforms like IBM Power Systems supports service processor
assisted dump. It provides interface to add memory region to
be captured when system is crashed.

During initialization/running we can add kernel memory region
to be collected.

Presently we don't have a way to get the log buffer base address
and size. This patch adds support to return log buffer address
and size.

Signed-off-by: Vasant Hegde <hegdevasant@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/printk.h |  3 +++
 kernel/printk/printk.c | 12 ++++++++++++
 2 files changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/printk.h b/include/linux/printk.h
index 0990997a5304..d78125f73ac4 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -10,6 +10,9 @@
 extern const char linux_banner[];
 extern const char linux_proc_banner[];
 
+extern char *log_buf_addr_get(void);
+extern u32 log_buf_len_get(void);
+
 static inline int printk_get_level(const char *buffer)
 {
 	if (buffer[0] == KERN_SOH_ASCII && buffer[1]) {
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index de1a6bb6861d..e04c455a0e38 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -272,6 +272,18 @@ static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN);
 static char *log_buf = __log_buf;
 static u32 log_buf_len = __LOG_BUF_LEN;
 
+/* Return log buffer address */
+char *log_buf_addr_get(void)
+{
+	return log_buf;
+}
+
+/* Return log buffer size */
+u32 log_buf_len_get(void)
+{
+	return log_buf_len;
+}
+
 /* human readable text of the record */
 static char *log_text(const struct printk_log *msg)
 {
-- 
cgit v1.2.3


From fadfe7be6e50de7f03913833b33c56cd8fb66bac Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Fri, 1 Aug 2014 14:33:02 +0200
Subject: perf: Add queued work to remove orphaned child events

In cases when the  owner task exits before the workload and the
workload made some forks, all the events stay in until the last
workload process exits. Thats' because each child event holds
parent reference.

We want to release all children events once the parent is gone,
because at that time there's no process to read them anyway, so
they're just eating resources.

This removal  races with process exit, which removes all events
and fork, which clone events.  To be clear of those two, adding
work queue to remove orphaned child for context in case such
event is detected.

Using delayed work queue (with delay == 1), because we queue this
work under perf scheduler callbacks. Normal work queue tries to wake
up the queue process, which deadlocks on rq->lock in this place.

Also preventing clones from abandoned parent event.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/1406896382-18404-4-git-send-email-jolsa@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/perf_event.h |  4 +++
 kernel/events/core.c       | 87 +++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 90 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 707617a8c0f6..ef5b62bdb103 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -52,6 +52,7 @@ struct perf_guest_info_callbacks {
 #include <linux/atomic.h>
 #include <linux/sysfs.h>
 #include <linux/perf_regs.h>
+#include <linux/workqueue.h>
 #include <asm/local.h>
 
 struct perf_callchain_entry {
@@ -507,6 +508,9 @@ struct perf_event_context {
 	int				nr_cgroups;	 /* cgroup evts */
 	int				nr_branch_stack; /* branch_stack evt */
 	struct rcu_head			rcu_head;
+
+	struct delayed_work		orphans_remove;
+	bool				orphans_remove_sched;
 };
 
 /*
diff --git a/kernel/events/core.c b/kernel/events/core.c
index bbb3ca22f07c..a25460559b4f 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -46,6 +46,8 @@
 
 #include <asm/irq_regs.h>
 
+static struct workqueue_struct *perf_wq;
+
 struct remote_function_call {
 	struct task_struct	*p;
 	int			(*func)(void *info);
@@ -1381,6 +1383,45 @@ out:
 		perf_event__header_size(tmp);
 }
 
+/*
+ * User event without the task.
+ */
+static bool is_orphaned_event(struct perf_event *event)
+{
+	return event && !is_kernel_event(event) && !event->owner;
+}
+
+/*
+ * Event has a parent but parent's task finished and it's
+ * alive only because of children holding refference.
+ */
+static bool is_orphaned_child(struct perf_event *event)
+{
+	return is_orphaned_event(event->parent);
+}
+
+static void orphans_remove_work(struct work_struct *work);
+
+static void schedule_orphans_remove(struct perf_event_context *ctx)
+{
+	if (!ctx->task || ctx->orphans_remove_sched || !perf_wq)
+		return;
+
+	if (queue_delayed_work(perf_wq, &ctx->orphans_remove, 1)) {
+		get_ctx(ctx);
+		ctx->orphans_remove_sched = true;
+	}
+}
+
+static int __init perf_workqueue_init(void)
+{
+	perf_wq = create_singlethread_workqueue("perf");
+	WARN(!perf_wq, "failed to create perf workqueue\n");
+	return perf_wq ? 0 : -1;
+}
+
+core_initcall(perf_workqueue_init);
+
 static inline int
 event_filter_match(struct perf_event *event)
 {
@@ -1430,6 +1471,9 @@ event_sched_out(struct perf_event *event,
 	if (event->attr.exclusive || !cpuctx->active_oncpu)
 		cpuctx->exclusive = 0;
 
+	if (is_orphaned_child(event))
+		schedule_orphans_remove(ctx);
+
 	perf_pmu_enable(event->pmu);
 }
 
@@ -1732,6 +1776,9 @@ event_sched_in(struct perf_event *event,
 	if (event->attr.exclusive)
 		cpuctx->exclusive = 1;
 
+	if (is_orphaned_child(event))
+		schedule_orphans_remove(ctx);
+
 out:
 	perf_pmu_enable(event->pmu);
 
@@ -3074,6 +3121,7 @@ static void __perf_event_init_context(struct perf_event_context *ctx)
 	INIT_LIST_HEAD(&ctx->flexible_groups);
 	INIT_LIST_HEAD(&ctx->event_list);
 	atomic_set(&ctx->refcount, 1);
+	INIT_DELAYED_WORK(&ctx->orphans_remove, orphans_remove_work);
 }
 
 static struct perf_event_context *
@@ -3405,6 +3453,42 @@ static int perf_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
+/*
+ * Remove all orphanes events from the context.
+ */
+static void orphans_remove_work(struct work_struct *work)
+{
+	struct perf_event_context *ctx;
+	struct perf_event *event, *tmp;
+
+	ctx = container_of(work, struct perf_event_context,
+			   orphans_remove.work);
+
+	mutex_lock(&ctx->mutex);
+	list_for_each_entry_safe(event, tmp, &ctx->event_list, event_entry) {
+		struct perf_event *parent_event = event->parent;
+
+		if (!is_orphaned_child(event))
+			continue;
+
+		perf_remove_from_context(event, true);
+
+		mutex_lock(&parent_event->child_mutex);
+		list_del_init(&event->child_list);
+		mutex_unlock(&parent_event->child_mutex);
+
+		free_event(event);
+		put_event(parent_event);
+	}
+
+	raw_spin_lock_irq(&ctx->lock);
+	ctx->orphans_remove_sched = false;
+	raw_spin_unlock_irq(&ctx->lock);
+	mutex_unlock(&ctx->mutex);
+
+	put_ctx(ctx);
+}
+
 u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
 {
 	struct perf_event *child;
@@ -7709,7 +7793,8 @@ inherit_event(struct perf_event *parent_event,
 	if (IS_ERR(child_event))
 		return child_event;
 
-	if (!atomic_long_inc_not_zero(&parent_event->refcount)) {
+	if (is_orphaned_event(parent_event) ||
+	    !atomic_long_inc_not_zero(&parent_event->refcount)) {
 		free_event(child_event);
 		return NULL;
 	}
-- 
cgit v1.2.3


From 770eee1fd38c70a009b321f5dbe64358f42511fd Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Mon, 11 Aug 2014 21:27:12 +0200
Subject: perf/x86: Fix data source encoding issues for load latency/precise
 store

This patch fixes issues introuduce by Andi's previous patch 'Revamp PEBS'
series.

This patch fixes the following:

 - precise_store_data_hsw() encode the mem op type whenever we can
 - precise_store_data_hsw set the default data source correctly

 - 0 is not a valid init value for data source. Define PERF_MEM_NA as the
   default value

This bug was actually introduced by

    commit 722e76e60f2775c21b087ff12c5e678cf0ebcaaf
    Author: Stephane Eranian <eranian@google.com>
    Date:   Thu May 15 17:56:44 2014 +0200

        fix Haswell precise store data source encoding

Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1407785233-32193-4-git-send-email-eranian@google.com
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: ak@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel_ds.c | 11 +++++++----
 include/linux/perf_event.h                |  9 ++++++++-
 2 files changed, 15 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index a9b60f32064f..67919ce0f76a 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -113,9 +113,12 @@ static u64 precise_store_data_hsw(struct perf_event *event, u64 status)
 	union perf_mem_data_src dse;
 	u64 cfg = event->hw.config & INTEL_ARCH_EVENT_MASK;
 
-	dse.val = 0;
-	dse.mem_op = PERF_MEM_OP_NA;
-	dse.mem_lvl = PERF_MEM_LVL_NA;
+	dse.val = PERF_MEM_NA;
+
+	if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
+		dse.mem_op = PERF_MEM_OP_STORE;
+	else if (event->hw.flags & PERF_X86_EVENT_PEBS_LD_HSW)
+		dse.mem_op = PERF_MEM_OP_LOAD;
 
 	/*
 	 * L1 info only valid for following events:
@@ -126,7 +129,7 @@ static u64 precise_store_data_hsw(struct perf_event *event, u64 status)
 	 * MEM_UOPS_RETIRED.ALL_STORES
 	 */
 	if (cfg != 0x12d0 && cfg != 0x22d0 && cfg != 0x42d0 && cfg != 0x82d0)
-		return dse.mem_lvl;
+		return dse.val;
 
 	if (status & 1)
 		dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index ef5b62bdb103..f0a1036b1911 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -608,6 +608,13 @@ struct perf_sample_data {
 	u64				txn;
 };
 
+/* default value for data source */
+#define PERF_MEM_NA (PERF_MEM_S(OP, NA)   |\
+		    PERF_MEM_S(LVL, NA)   |\
+		    PERF_MEM_S(SNOOP, NA) |\
+		    PERF_MEM_S(LOCK, NA)  |\
+		    PERF_MEM_S(TLB, NA))
+
 static inline void perf_sample_data_init(struct perf_sample_data *data,
 					 u64 addr, u64 period)
 {
@@ -620,7 +627,7 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
 	data->regs_user.regs = NULL;
 	data->stack_user_size = 0;
 	data->weight = 0;
-	data->data_src.val = 0;
+	data->data_src.val = PERF_MEM_NA;
 	data->txn = 0;
 }
 
-- 
cgit v1.2.3


From 2e39465abc4b7856a0ea6fcf4f6b4668bb5db877 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 4 Aug 2014 12:07:15 +0200
Subject: locking: Remove deprecated smp_mb__() barriers

Its been a while and there are no in-tree users left, so remove the
deprecated barriers.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Chen, Gong <gong.chen@linux.intel.com>
Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: Joe Perches <joe@perches.com>
Cc: John Sullivan <jsrhbz@kanargh.force9.co.uk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Cc: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/atomic.h | 36 ------------------------------------
 include/linux/bitops.h | 20 --------------------
 kernel/sched/core.c    | 16 ----------------
 3 files changed, 72 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index fef3a809e7cf..5b08a8540ecf 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -3,42 +3,6 @@
 #define _LINUX_ATOMIC_H
 #include <asm/atomic.h>
 
-/*
- * Provide __deprecated wrappers for the new interface, avoid flag day changes.
- * We need the ugly external functions to break header recursion hell.
- */
-#ifndef smp_mb__before_atomic_inc
-static inline void __deprecated smp_mb__before_atomic_inc(void)
-{
-	extern void __smp_mb__before_atomic(void);
-	__smp_mb__before_atomic();
-}
-#endif
-
-#ifndef smp_mb__after_atomic_inc
-static inline void __deprecated smp_mb__after_atomic_inc(void)
-{
-	extern void __smp_mb__after_atomic(void);
-	__smp_mb__after_atomic();
-}
-#endif
-
-#ifndef smp_mb__before_atomic_dec
-static inline void __deprecated smp_mb__before_atomic_dec(void)
-{
-	extern void __smp_mb__before_atomic(void);
-	__smp_mb__before_atomic();
-}
-#endif
-
-#ifndef smp_mb__after_atomic_dec
-static inline void __deprecated smp_mb__after_atomic_dec(void)
-{
-	extern void __smp_mb__after_atomic(void);
-	__smp_mb__after_atomic();
-}
-#endif
-
 /**
  * atomic_add_unless - add unless the number is already a given value
  * @v: pointer of type atomic_t
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index cbc5833fb221..be5fd38bd5a0 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -32,26 +32,6 @@ extern unsigned long __sw_hweight64(__u64 w);
  */
 #include <asm/bitops.h>
 
-/*
- * Provide __deprecated wrappers for the new interface, avoid flag day changes.
- * We need the ugly external functions to break header recursion hell.
- */
-#ifndef smp_mb__before_clear_bit
-static inline void __deprecated smp_mb__before_clear_bit(void)
-{
-	extern void __smp_mb__before_atomic(void);
-	__smp_mb__before_atomic();
-}
-#endif
-
-#ifndef smp_mb__after_clear_bit
-static inline void __deprecated smp_mb__after_clear_bit(void)
-{
-	extern void __smp_mb__after_atomic(void);
-	__smp_mb__after_atomic();
-}
-#endif
-
 #define for_each_set_bit(bit, addr, size) \
 	for ((bit) = find_first_bit((addr), (size));		\
 	     (bit) < (size);					\
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 1211575a2208..76c518c9b3a7 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -90,22 +90,6 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/sched.h>
 
-#ifdef smp_mb__before_atomic
-void __smp_mb__before_atomic(void)
-{
-	smp_mb__before_atomic();
-}
-EXPORT_SYMBOL(__smp_mb__before_atomic);
-#endif
-
-#ifdef smp_mb__after_atomic
-void __smp_mb__after_atomic(void)
-{
-	smp_mb__after_atomic();
-}
-EXPORT_SYMBOL(__smp_mb__after_atomic);
-#endif
-
 void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period)
 {
 	unsigned long delta;
-- 
cgit v1.2.3


From 7608a43d8f2e02f8b532f8e11481d7ecf8b5d3f9 Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <davidlohr@hp.com>
Date: Wed, 30 Jul 2014 13:41:54 -0700
Subject: locking/mutexes: Use MUTEX_SPIN_ON_OWNER when appropriate

4badad35 ("locking/mutex: Disable optimistic spinning on some
architectures") added a ARCH_SUPPORTS_ATOMIC_RMW flag to
disable the mutex optimistic feature on specific archs.

Because CONFIG_MUTEX_SPIN_ON_OWNER only depended on DEBUG and
SMP, it was ok to have the ->owner field conditional a bit
flexible. However by adding a new variable to the matter,
we can waste space with the unused field, ie: CONFIG_SMP &&
(!CONFIG_MUTEX_SPIN_ON_OWNER && !CONFIG_DEBUG_MUTEX).

Signed-off-by: Davidlohr Bueso <davidlohr@hp.com>
Acked-by: Jason Low <jason.low2@hp.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: aswin@hp.com
Cc: Davidlohr Bueso <davidlohr@hp.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Jason Low <jason.low2@hp.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Link: http://lkml.kernel.org/r/1406752916-3341-5-git-send-email-davidlohr@hp.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/mutex.h  | 2 +-
 kernel/locking/mutex.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index 8d5535c58cc2..e4c29418f407 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -52,7 +52,7 @@ struct mutex {
 	atomic_t		count;
 	spinlock_t		wait_lock;
 	struct list_head	wait_list;
-#if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_SMP)
+#if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_MUTEX_SPIN_ON_OWNER)
 	struct task_struct	*owner;
 #endif
 #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
diff --git a/kernel/locking/mutex.h b/kernel/locking/mutex.h
index 4115fbf83b12..5cda397607f2 100644
--- a/kernel/locking/mutex.h
+++ b/kernel/locking/mutex.h
@@ -16,7 +16,7 @@
 #define mutex_remove_waiter(lock, waiter, ti) \
 		__list_del((waiter)->list.prev, (waiter)->list.next)
 
-#ifdef CONFIG_SMP
+#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
 static inline void mutex_set_owner(struct mutex *lock)
 {
 	lock->owner = current;
-- 
cgit v1.2.3


From 214e0aed639ef40987bf6159fad303171a6de31e Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <davidlohr@hp.com>
Date: Wed, 30 Jul 2014 13:41:55 -0700
Subject: locking/Documentation: Move locking related docs into
 Documentation/locking/

Specifically:
  Documentation/locking/lockdep-design.txt
  Documentation/locking/lockstat.txt
  Documentation/locking/mutex-design.txt
  Documentation/locking/rt-mutex-design.txt
  Documentation/locking/rt-mutex.txt
  Documentation/locking/spinlocks.txt
  Documentation/locking/ww-mutex-design.txt

Signed-off-by: Davidlohr Bueso <davidlohr@hp.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: jason.low2@hp.com
Cc: aswin@hp.com
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Chris Mason <clm@fb.com>
Cc: Dan Streetman <ddstreet@ieee.org>
Cc: David Airlie <airlied@linux.ie>
Cc: Davidlohr Bueso <davidlohr@hp.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Jason Low <jason.low2@hp.com>
Cc: Josef Bacik <jbacik@fusionio.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Lubomir Rintel <lkundrak@v3.sk>
Cc: Masanari Iida <standby24x7@gmail.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: fengguang.wu@intel.com
Link: http://lkml.kernel.org/r/1406752916-3341-6-git-send-email-davidlohr@hp.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 Documentation/00-INDEX                    |   2 +
 Documentation/DocBook/kernel-locking.tmpl |   2 +-
 Documentation/lockdep-design.txt          | 286 -----------
 Documentation/locking/lockdep-design.txt  | 286 +++++++++++
 Documentation/locking/lockstat.txt        | 178 +++++++
 Documentation/locking/mutex-design.txt    | 157 ++++++
 Documentation/locking/rt-mutex-design.txt | 781 ++++++++++++++++++++++++++++++
 Documentation/locking/rt-mutex.txt        |  79 +++
 Documentation/locking/spinlocks.txt       | 167 +++++++
 Documentation/locking/ww-mutex-design.txt | 344 +++++++++++++
 Documentation/lockstat.txt                | 178 -------
 Documentation/mutex-design.txt            | 157 ------
 Documentation/rt-mutex-design.txt         | 781 ------------------------------
 Documentation/rt-mutex.txt                |  79 ---
 Documentation/spinlocks.txt               | 167 -------
 Documentation/ww-mutex-design.txt         | 344 -------------
 MAINTAINERS                               |   4 +-
 drivers/gpu/drm/drm_modeset_lock.c        |   2 +-
 include/linux/lockdep.h                   |   2 +-
 include/linux/mutex.h                     |   2 +-
 include/linux/rwsem.h                     |   2 +-
 kernel/locking/mutex.c                    |   2 +-
 kernel/locking/rtmutex.c                  |   2 +-
 lib/Kconfig.debug                         |   4 +-
 24 files changed, 2005 insertions(+), 2003 deletions(-)
 delete mode 100644 Documentation/lockdep-design.txt
 create mode 100644 Documentation/locking/lockdep-design.txt
 create mode 100644 Documentation/locking/lockstat.txt
 create mode 100644 Documentation/locking/mutex-design.txt
 create mode 100644 Documentation/locking/rt-mutex-design.txt
 create mode 100644 Documentation/locking/rt-mutex.txt
 create mode 100644 Documentation/locking/spinlocks.txt
 create mode 100644 Documentation/locking/ww-mutex-design.txt
 delete mode 100644 Documentation/lockstat.txt
 delete mode 100644 Documentation/mutex-design.txt
 delete mode 100644 Documentation/rt-mutex-design.txt
 delete mode 100644 Documentation/rt-mutex.txt
 delete mode 100644 Documentation/spinlocks.txt
 delete mode 100644 Documentation/ww-mutex-design.txt

(limited to 'include/linux')

diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX
index 27e67a98b7be..1750fcef1ab4 100644
--- a/Documentation/00-INDEX
+++ b/Documentation/00-INDEX
@@ -287,6 +287,8 @@ local_ops.txt
 	- semantics and behavior of local atomic operations.
 lockdep-design.txt
 	- documentation on the runtime locking correctness validator.
+locking/
+	- directory with info about kernel locking primitives
 lockstat.txt
 	- info on collecting statistics on locks (and contention).
 lockup-watchdogs.txt
diff --git a/Documentation/DocBook/kernel-locking.tmpl b/Documentation/DocBook/kernel-locking.tmpl
index e584ee12a1e7..7c9cc4846cb6 100644
--- a/Documentation/DocBook/kernel-locking.tmpl
+++ b/Documentation/DocBook/kernel-locking.tmpl
@@ -1972,7 +1972,7 @@ machines due to caching.
    <itemizedlist>
     <listitem>
      <para>
-       <filename>Documentation/spinlocks.txt</filename>: 
+       <filename>Documentation/locking/spinlocks.txt</filename>:
        Linus Torvalds' spinlocking tutorial in the kernel sources.
      </para>
     </listitem>
diff --git a/Documentation/lockdep-design.txt b/Documentation/lockdep-design.txt
deleted file mode 100644
index 5dbc99c04f6e..000000000000
--- a/Documentation/lockdep-design.txt
+++ /dev/null
@@ -1,286 +0,0 @@
-Runtime locking correctness validator
-=====================================
-
-started by Ingo Molnar <mingo@redhat.com>
-additions by Arjan van de Ven <arjan@linux.intel.com>
-
-Lock-class
-----------
-
-The basic object the validator operates upon is a 'class' of locks.
-
-A class of locks is a group of locks that are logically the same with
-respect to locking rules, even if the locks may have multiple (possibly
-tens of thousands of) instantiations. For example a lock in the inode
-struct is one class, while each inode has its own instantiation of that
-lock class.
-
-The validator tracks the 'state' of lock-classes, and it tracks
-dependencies between different lock-classes. The validator maintains a
-rolling proof that the state and the dependencies are correct.
-
-Unlike an lock instantiation, the lock-class itself never goes away: when
-a lock-class is used for the first time after bootup it gets registered,
-and all subsequent uses of that lock-class will be attached to this
-lock-class.
-
-State
------
-
-The validator tracks lock-class usage history into 4n + 1 separate state bits:
-
-- 'ever held in STATE context'
-- 'ever held as readlock in STATE context'
-- 'ever held with STATE enabled'
-- 'ever held as readlock with STATE enabled'
-
-Where STATE can be either one of (kernel/lockdep_states.h)
- - hardirq
- - softirq
- - reclaim_fs
-
-- 'ever used'                                       [ == !unused        ]
-
-When locking rules are violated, these state bits are presented in the
-locking error messages, inside curlies. A contrived example:
-
-   modprobe/2287 is trying to acquire lock:
-    (&sio_locks[i].lock){-.-...}, at: [<c02867fd>] mutex_lock+0x21/0x24
-
-   but task is already holding lock:
-    (&sio_locks[i].lock){-.-...}, at: [<c02867fd>] mutex_lock+0x21/0x24
-
-
-The bit position indicates STATE, STATE-read, for each of the states listed
-above, and the character displayed in each indicates:
-
-   '.'  acquired while irqs disabled and not in irq context
-   '-'  acquired in irq context
-   '+'  acquired with irqs enabled
-   '?'  acquired in irq context with irqs enabled.
-
-Unused mutexes cannot be part of the cause of an error.
-
-
-Single-lock state rules:
-------------------------
-
-A softirq-unsafe lock-class is automatically hardirq-unsafe as well. The
-following states are exclusive, and only one of them is allowed to be
-set for any lock-class:
-
- <hardirq-safe> and <hardirq-unsafe>
- <softirq-safe> and <softirq-unsafe>
-
-The validator detects and reports lock usage that violate these
-single-lock state rules.
-
-Multi-lock dependency rules:
-----------------------------
-
-The same lock-class must not be acquired twice, because this could lead
-to lock recursion deadlocks.
-
-Furthermore, two locks may not be taken in different order:
-
- <L1> -> <L2>
- <L2> -> <L1>
-
-because this could lead to lock inversion deadlocks. (The validator
-finds such dependencies in arbitrary complexity, i.e. there can be any
-other locking sequence between the acquire-lock operations, the
-validator will still track all dependencies between locks.)
-
-Furthermore, the following usage based lock dependencies are not allowed
-between any two lock-classes:
-
-   <hardirq-safe>   ->  <hardirq-unsafe>
-   <softirq-safe>   ->  <softirq-unsafe>
-
-The first rule comes from the fact the a hardirq-safe lock could be
-taken by a hardirq context, interrupting a hardirq-unsafe lock - and
-thus could result in a lock inversion deadlock. Likewise, a softirq-safe
-lock could be taken by an softirq context, interrupting a softirq-unsafe
-lock.
-
-The above rules are enforced for any locking sequence that occurs in the
-kernel: when acquiring a new lock, the validator checks whether there is
-any rule violation between the new lock and any of the held locks.
-
-When a lock-class changes its state, the following aspects of the above
-dependency rules are enforced:
-
-- if a new hardirq-safe lock is discovered, we check whether it
-  took any hardirq-unsafe lock in the past.
-
-- if a new softirq-safe lock is discovered, we check whether it took
-  any softirq-unsafe lock in the past.
-
-- if a new hardirq-unsafe lock is discovered, we check whether any
-  hardirq-safe lock took it in the past.
-
-- if a new softirq-unsafe lock is discovered, we check whether any
-  softirq-safe lock took it in the past.
-
-(Again, we do these checks too on the basis that an interrupt context
-could interrupt _any_ of the irq-unsafe or hardirq-unsafe locks, which
-could lead to a lock inversion deadlock - even if that lock scenario did
-not trigger in practice yet.)
-
-Exception: Nested data dependencies leading to nested locking
--------------------------------------------------------------
-
-There are a few cases where the Linux kernel acquires more than one
-instance of the same lock-class. Such cases typically happen when there
-is some sort of hierarchy within objects of the same type. In these
-cases there is an inherent "natural" ordering between the two objects
-(defined by the properties of the hierarchy), and the kernel grabs the
-locks in this fixed order on each of the objects.
-
-An example of such an object hierarchy that results in "nested locking"
-is that of a "whole disk" block-dev object and a "partition" block-dev
-object; the partition is "part of" the whole device and as long as one
-always takes the whole disk lock as a higher lock than the partition
-lock, the lock ordering is fully correct. The validator does not
-automatically detect this natural ordering, as the locking rule behind
-the ordering is not static.
-
-In order to teach the validator about this correct usage model, new
-versions of the various locking primitives were added that allow you to
-specify a "nesting level". An example call, for the block device mutex,
-looks like this:
-
-enum bdev_bd_mutex_lock_class
-{
-       BD_MUTEX_NORMAL,
-       BD_MUTEX_WHOLE,
-       BD_MUTEX_PARTITION
-};
-
- mutex_lock_nested(&bdev->bd_contains->bd_mutex, BD_MUTEX_PARTITION);
-
-In this case the locking is done on a bdev object that is known to be a
-partition.
-
-The validator treats a lock that is taken in such a nested fashion as a
-separate (sub)class for the purposes of validation.
-
-Note: When changing code to use the _nested() primitives, be careful and
-check really thoroughly that the hierarchy is correctly mapped; otherwise
-you can get false positives or false negatives.
-
-Proof of 100% correctness:
---------------------------
-
-The validator achieves perfect, mathematical 'closure' (proof of locking
-correctness) in the sense that for every simple, standalone single-task
-locking sequence that occurred at least once during the lifetime of the
-kernel, the validator proves it with a 100% certainty that no
-combination and timing of these locking sequences can cause any class of
-lock related deadlock. [*]
-
-I.e. complex multi-CPU and multi-task locking scenarios do not have to
-occur in practice to prove a deadlock: only the simple 'component'
-locking chains have to occur at least once (anytime, in any
-task/context) for the validator to be able to prove correctness. (For
-example, complex deadlocks that would normally need more than 3 CPUs and
-a very unlikely constellation of tasks, irq-contexts and timings to
-occur, can be detected on a plain, lightly loaded single-CPU system as
-well!)
-
-This radically decreases the complexity of locking related QA of the
-kernel: what has to be done during QA is to trigger as many "simple"
-single-task locking dependencies in the kernel as possible, at least
-once, to prove locking correctness - instead of having to trigger every
-possible combination of locking interaction between CPUs, combined with
-every possible hardirq and softirq nesting scenario (which is impossible
-to do in practice).
-
-[*] assuming that the validator itself is 100% correct, and no other
-    part of the system corrupts the state of the validator in any way.
-    We also assume that all NMI/SMM paths [which could interrupt
-    even hardirq-disabled codepaths] are correct and do not interfere
-    with the validator. We also assume that the 64-bit 'chain hash'
-    value is unique for every lock-chain in the system. Also, lock
-    recursion must not be higher than 20.
-
-Performance:
-------------
-
-The above rules require _massive_ amounts of runtime checking. If we did
-that for every lock taken and for every irqs-enable event, it would
-render the system practically unusably slow. The complexity of checking
-is O(N^2), so even with just a few hundred lock-classes we'd have to do
-tens of thousands of checks for every event.
-
-This problem is solved by checking any given 'locking scenario' (unique
-sequence of locks taken after each other) only once. A simple stack of
-held locks is maintained, and a lightweight 64-bit hash value is
-calculated, which hash is unique for every lock chain. The hash value,
-when the chain is validated for the first time, is then put into a hash
-table, which hash-table can be checked in a lockfree manner. If the
-locking chain occurs again later on, the hash table tells us that we
-dont have to validate the chain again.
-
-Troubleshooting:
-----------------
-
-The validator tracks a maximum of MAX_LOCKDEP_KEYS number of lock classes.
-Exceeding this number will trigger the following lockdep warning:
-
-	(DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS))
-
-By default, MAX_LOCKDEP_KEYS is currently set to 8191, and typical
-desktop systems have less than 1,000 lock classes, so this warning
-normally results from lock-class leakage or failure to properly
-initialize locks.  These two problems are illustrated below:
-
-1.	Repeated module loading and unloading while running the validator
-	will result in lock-class leakage.  The issue here is that each
-	load of the module will create a new set of lock classes for
-	that module's locks, but module unloading does not remove old
-	classes (see below discussion of reuse of lock classes for why).
-	Therefore, if that module is loaded and unloaded repeatedly,
-	the number of lock classes will eventually reach the maximum.
-
-2.	Using structures such as arrays that have large numbers of
-	locks that are not explicitly initialized.  For example,
-	a hash table with 8192 buckets where each bucket has its own
-	spinlock_t will consume 8192 lock classes -unless- each spinlock
-	is explicitly initialized at runtime, for example, using the
-	run-time spin_lock_init() as opposed to compile-time initializers
-	such as __SPIN_LOCK_UNLOCKED().  Failure to properly initialize
-	the per-bucket spinlocks would guarantee lock-class overflow.
-	In contrast, a loop that called spin_lock_init() on each lock
-	would place all 8192 locks into a single lock class.
-
-	The moral of this story is that you should always explicitly
-	initialize your locks.
-
-One might argue that the validator should be modified to allow
-lock classes to be reused.  However, if you are tempted to make this
-argument, first review the code and think through the changes that would
-be required, keeping in mind that the lock classes to be removed are
-likely to be linked into the lock-dependency graph.  This turns out to
-be harder to do than to say.
-
-Of course, if you do run out of lock classes, the next thing to do is
-to find the offending lock classes.  First, the following command gives
-you the number of lock classes currently in use along with the maximum:
-
-	grep "lock-classes" /proc/lockdep_stats
-
-This command produces the following output on a modest system:
-
-	 lock-classes:                          748 [max: 8191]
-
-If the number allocated (748 above) increases continually over time,
-then there is likely a leak.  The following command can be used to
-identify the leaking lock classes:
-
-	grep "BD" /proc/lockdep
-
-Run the command and save the output, then compare against the output from
-a later run of this command to identify the leakers.  This same output
-can also help you find situations where runtime lock initialization has
-been omitted.
diff --git a/Documentation/locking/lockdep-design.txt b/Documentation/locking/lockdep-design.txt
new file mode 100644
index 000000000000..5dbc99c04f6e
--- /dev/null
+++ b/Documentation/locking/lockdep-design.txt
@@ -0,0 +1,286 @@
+Runtime locking correctness validator
+=====================================
+
+started by Ingo Molnar <mingo@redhat.com>
+additions by Arjan van de Ven <arjan@linux.intel.com>
+
+Lock-class
+----------
+
+The basic object the validator operates upon is a 'class' of locks.
+
+A class of locks is a group of locks that are logically the same with
+respect to locking rules, even if the locks may have multiple (possibly
+tens of thousands of) instantiations. For example a lock in the inode
+struct is one class, while each inode has its own instantiation of that
+lock class.
+
+The validator tracks the 'state' of lock-classes, and it tracks
+dependencies between different lock-classes. The validator maintains a
+rolling proof that the state and the dependencies are correct.
+
+Unlike an lock instantiation, the lock-class itself never goes away: when
+a lock-class is used for the first time after bootup it gets registered,
+and all subsequent uses of that lock-class will be attached to this
+lock-class.
+
+State
+-----
+
+The validator tracks lock-class usage history into 4n + 1 separate state bits:
+
+- 'ever held in STATE context'
+- 'ever held as readlock in STATE context'
+- 'ever held with STATE enabled'
+- 'ever held as readlock with STATE enabled'
+
+Where STATE can be either one of (kernel/lockdep_states.h)
+ - hardirq
+ - softirq
+ - reclaim_fs
+
+- 'ever used'                                       [ == !unused        ]
+
+When locking rules are violated, these state bits are presented in the
+locking error messages, inside curlies. A contrived example:
+
+   modprobe/2287 is trying to acquire lock:
+    (&sio_locks[i].lock){-.-...}, at: [<c02867fd>] mutex_lock+0x21/0x24
+
+   but task is already holding lock:
+    (&sio_locks[i].lock){-.-...}, at: [<c02867fd>] mutex_lock+0x21/0x24
+
+
+The bit position indicates STATE, STATE-read, for each of the states listed
+above, and the character displayed in each indicates:
+
+   '.'  acquired while irqs disabled and not in irq context
+   '-'  acquired in irq context
+   '+'  acquired with irqs enabled
+   '?'  acquired in irq context with irqs enabled.
+
+Unused mutexes cannot be part of the cause of an error.
+
+
+Single-lock state rules:
+------------------------
+
+A softirq-unsafe lock-class is automatically hardirq-unsafe as well. The
+following states are exclusive, and only one of them is allowed to be
+set for any lock-class:
+
+ <hardirq-safe> and <hardirq-unsafe>
+ <softirq-safe> and <softirq-unsafe>
+
+The validator detects and reports lock usage that violate these
+single-lock state rules.
+
+Multi-lock dependency rules:
+----------------------------
+
+The same lock-class must not be acquired twice, because this could lead
+to lock recursion deadlocks.
+
+Furthermore, two locks may not be taken in different order:
+
+ <L1> -> <L2>
+ <L2> -> <L1>
+
+because this could lead to lock inversion deadlocks. (The validator
+finds such dependencies in arbitrary complexity, i.e. there can be any
+other locking sequence between the acquire-lock operations, the
+validator will still track all dependencies between locks.)
+
+Furthermore, the following usage based lock dependencies are not allowed
+between any two lock-classes:
+
+   <hardirq-safe>   ->  <hardirq-unsafe>
+   <softirq-safe>   ->  <softirq-unsafe>
+
+The first rule comes from the fact the a hardirq-safe lock could be
+taken by a hardirq context, interrupting a hardirq-unsafe lock - and
+thus could result in a lock inversion deadlock. Likewise, a softirq-safe
+lock could be taken by an softirq context, interrupting a softirq-unsafe
+lock.
+
+The above rules are enforced for any locking sequence that occurs in the
+kernel: when acquiring a new lock, the validator checks whether there is
+any rule violation between the new lock and any of the held locks.
+
+When a lock-class changes its state, the following aspects of the above
+dependency rules are enforced:
+
+- if a new hardirq-safe lock is discovered, we check whether it
+  took any hardirq-unsafe lock in the past.
+
+- if a new softirq-safe lock is discovered, we check whether it took
+  any softirq-unsafe lock in the past.
+
+- if a new hardirq-unsafe lock is discovered, we check whether any
+  hardirq-safe lock took it in the past.
+
+- if a new softirq-unsafe lock is discovered, we check whether any
+  softirq-safe lock took it in the past.
+
+(Again, we do these checks too on the basis that an interrupt context
+could interrupt _any_ of the irq-unsafe or hardirq-unsafe locks, which
+could lead to a lock inversion deadlock - even if that lock scenario did
+not trigger in practice yet.)
+
+Exception: Nested data dependencies leading to nested locking
+-------------------------------------------------------------
+
+There are a few cases where the Linux kernel acquires more than one
+instance of the same lock-class. Such cases typically happen when there
+is some sort of hierarchy within objects of the same type. In these
+cases there is an inherent "natural" ordering between the two objects
+(defined by the properties of the hierarchy), and the kernel grabs the
+locks in this fixed order on each of the objects.
+
+An example of such an object hierarchy that results in "nested locking"
+is that of a "whole disk" block-dev object and a "partition" block-dev
+object; the partition is "part of" the whole device and as long as one
+always takes the whole disk lock as a higher lock than the partition
+lock, the lock ordering is fully correct. The validator does not
+automatically detect this natural ordering, as the locking rule behind
+the ordering is not static.
+
+In order to teach the validator about this correct usage model, new
+versions of the various locking primitives were added that allow you to
+specify a "nesting level". An example call, for the block device mutex,
+looks like this:
+
+enum bdev_bd_mutex_lock_class
+{
+       BD_MUTEX_NORMAL,
+       BD_MUTEX_WHOLE,
+       BD_MUTEX_PARTITION
+};
+
+ mutex_lock_nested(&bdev->bd_contains->bd_mutex, BD_MUTEX_PARTITION);
+
+In this case the locking is done on a bdev object that is known to be a
+partition.
+
+The validator treats a lock that is taken in such a nested fashion as a
+separate (sub)class for the purposes of validation.
+
+Note: When changing code to use the _nested() primitives, be careful and
+check really thoroughly that the hierarchy is correctly mapped; otherwise
+you can get false positives or false negatives.
+
+Proof of 100% correctness:
+--------------------------
+
+The validator achieves perfect, mathematical 'closure' (proof of locking
+correctness) in the sense that for every simple, standalone single-task
+locking sequence that occurred at least once during the lifetime of the
+kernel, the validator proves it with a 100% certainty that no
+combination and timing of these locking sequences can cause any class of
+lock related deadlock. [*]
+
+I.e. complex multi-CPU and multi-task locking scenarios do not have to
+occur in practice to prove a deadlock: only the simple 'component'
+locking chains have to occur at least once (anytime, in any
+task/context) for the validator to be able to prove correctness. (For
+example, complex deadlocks that would normally need more than 3 CPUs and
+a very unlikely constellation of tasks, irq-contexts and timings to
+occur, can be detected on a plain, lightly loaded single-CPU system as
+well!)
+
+This radically decreases the complexity of locking related QA of the
+kernel: what has to be done during QA is to trigger as many "simple"
+single-task locking dependencies in the kernel as possible, at least
+once, to prove locking correctness - instead of having to trigger every
+possible combination of locking interaction between CPUs, combined with
+every possible hardirq and softirq nesting scenario (which is impossible
+to do in practice).
+
+[*] assuming that the validator itself is 100% correct, and no other
+    part of the system corrupts the state of the validator in any way.
+    We also assume that all NMI/SMM paths [which could interrupt
+    even hardirq-disabled codepaths] are correct and do not interfere
+    with the validator. We also assume that the 64-bit 'chain hash'
+    value is unique for every lock-chain in the system. Also, lock
+    recursion must not be higher than 20.
+
+Performance:
+------------
+
+The above rules require _massive_ amounts of runtime checking. If we did
+that for every lock taken and for every irqs-enable event, it would
+render the system practically unusably slow. The complexity of checking
+is O(N^2), so even with just a few hundred lock-classes we'd have to do
+tens of thousands of checks for every event.
+
+This problem is solved by checking any given 'locking scenario' (unique
+sequence of locks taken after each other) only once. A simple stack of
+held locks is maintained, and a lightweight 64-bit hash value is
+calculated, which hash is unique for every lock chain. The hash value,
+when the chain is validated for the first time, is then put into a hash
+table, which hash-table can be checked in a lockfree manner. If the
+locking chain occurs again later on, the hash table tells us that we
+dont have to validate the chain again.
+
+Troubleshooting:
+----------------
+
+The validator tracks a maximum of MAX_LOCKDEP_KEYS number of lock classes.
+Exceeding this number will trigger the following lockdep warning:
+
+	(DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS))
+
+By default, MAX_LOCKDEP_KEYS is currently set to 8191, and typical
+desktop systems have less than 1,000 lock classes, so this warning
+normally results from lock-class leakage or failure to properly
+initialize locks.  These two problems are illustrated below:
+
+1.	Repeated module loading and unloading while running the validator
+	will result in lock-class leakage.  The issue here is that each
+	load of the module will create a new set of lock classes for
+	that module's locks, but module unloading does not remove old
+	classes (see below discussion of reuse of lock classes for why).
+	Therefore, if that module is loaded and unloaded repeatedly,
+	the number of lock classes will eventually reach the maximum.
+
+2.	Using structures such as arrays that have large numbers of
+	locks that are not explicitly initialized.  For example,
+	a hash table with 8192 buckets where each bucket has its own
+	spinlock_t will consume 8192 lock classes -unless- each spinlock
+	is explicitly initialized at runtime, for example, using the
+	run-time spin_lock_init() as opposed to compile-time initializers
+	such as __SPIN_LOCK_UNLOCKED().  Failure to properly initialize
+	the per-bucket spinlocks would guarantee lock-class overflow.
+	In contrast, a loop that called spin_lock_init() on each lock
+	would place all 8192 locks into a single lock class.
+
+	The moral of this story is that you should always explicitly
+	initialize your locks.
+
+One might argue that the validator should be modified to allow
+lock classes to be reused.  However, if you are tempted to make this
+argument, first review the code and think through the changes that would
+be required, keeping in mind that the lock classes to be removed are
+likely to be linked into the lock-dependency graph.  This turns out to
+be harder to do than to say.
+
+Of course, if you do run out of lock classes, the next thing to do is
+to find the offending lock classes.  First, the following command gives
+you the number of lock classes currently in use along with the maximum:
+
+	grep "lock-classes" /proc/lockdep_stats
+
+This command produces the following output on a modest system:
+
+	 lock-classes:                          748 [max: 8191]
+
+If the number allocated (748 above) increases continually over time,
+then there is likely a leak.  The following command can be used to
+identify the leaking lock classes:
+
+	grep "BD" /proc/lockdep
+
+Run the command and save the output, then compare against the output from
+a later run of this command to identify the leakers.  This same output
+can also help you find situations where runtime lock initialization has
+been omitted.
diff --git a/Documentation/locking/lockstat.txt b/Documentation/locking/lockstat.txt
new file mode 100644
index 000000000000..7428773a1e69
--- /dev/null
+++ b/Documentation/locking/lockstat.txt
@@ -0,0 +1,178 @@
+
+LOCK STATISTICS
+
+- WHAT
+
+As the name suggests, it provides statistics on locks.
+
+- WHY
+
+Because things like lock contention can severely impact performance.
+
+- HOW
+
+Lockdep already has hooks in the lock functions and maps lock instances to
+lock classes. We build on that (see Documentation/lokcing/lockdep-design.txt).
+The graph below shows the relation between the lock functions and the various
+hooks therein.
+
+        __acquire
+            |
+           lock _____
+            |        \
+            |    __contended
+            |         |
+            |       <wait>
+            | _______/
+            |/
+            |
+       __acquired
+            |
+            .
+          <hold>
+            .
+            |
+       __release
+            |
+         unlock
+
+lock, unlock	- the regular lock functions
+__*		- the hooks
+<> 		- states
+
+With these hooks we provide the following statistics:
+
+ con-bounces       - number of lock contention that involved x-cpu data
+ contentions       - number of lock acquisitions that had to wait
+ wait time min     - shortest (non-0) time we ever had to wait for a lock
+           max     - longest time we ever had to wait for a lock
+	   total   - total time we spend waiting on this lock
+	   avg     - average time spent waiting on this lock
+ acq-bounces       - number of lock acquisitions that involved x-cpu data
+ acquisitions      - number of times we took the lock
+ hold time min     - shortest (non-0) time we ever held the lock
+	   max     - longest time we ever held the lock
+	   total   - total time this lock was held
+	   avg     - average time this lock was held
+
+These numbers are gathered per lock class, per read/write state (when
+applicable).
+
+It also tracks 4 contention points per class. A contention point is a call site
+that had to wait on lock acquisition.
+
+ - CONFIGURATION
+
+Lock statistics are enabled via CONFIG_LOCK_STAT.
+
+ - USAGE
+
+Enable collection of statistics:
+
+# echo 1 >/proc/sys/kernel/lock_stat
+
+Disable collection of statistics:
+
+# echo 0 >/proc/sys/kernel/lock_stat
+
+Look at the current lock statistics:
+
+( line numbers not part of actual output, done for clarity in the explanation
+  below )
+
+# less /proc/lock_stat
+
+01 lock_stat version 0.4
+02-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+03                              class name    con-bounces    contentions   waittime-min   waittime-max waittime-total   waittime-avg    acq-bounces   acquisitions   holdtime-min   holdtime-max holdtime-total   holdtime-avg
+04-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+05
+06                         &mm->mmap_sem-W:            46             84           0.26         939.10       16371.53         194.90          47291        2922365           0.16     2220301.69 17464026916.32        5975.99
+07                         &mm->mmap_sem-R:            37            100           1.31      299502.61      325629.52        3256.30         212344       34316685           0.10        7744.91    95016910.20           2.77
+08                         ---------------
+09                           &mm->mmap_sem              1          [<ffffffff811502a7>] khugepaged_scan_mm_slot+0x57/0x280
+19                           &mm->mmap_sem             96          [<ffffffff815351c4>] __do_page_fault+0x1d4/0x510
+11                           &mm->mmap_sem             34          [<ffffffff81113d77>] vm_mmap_pgoff+0x87/0xd0
+12                           &mm->mmap_sem             17          [<ffffffff81127e71>] vm_munmap+0x41/0x80
+13                         ---------------
+14                           &mm->mmap_sem              1          [<ffffffff81046fda>] dup_mmap+0x2a/0x3f0
+15                           &mm->mmap_sem             60          [<ffffffff81129e29>] SyS_mprotect+0xe9/0x250
+16                           &mm->mmap_sem             41          [<ffffffff815351c4>] __do_page_fault+0x1d4/0x510
+17                           &mm->mmap_sem             68          [<ffffffff81113d77>] vm_mmap_pgoff+0x87/0xd0
+18
+19.............................................................................................................................................................................................................................
+20
+21                         unix_table_lock:           110            112           0.21          49.24         163.91           1.46          21094          66312           0.12         624.42       31589.81           0.48
+22                         ---------------
+23                         unix_table_lock             45          [<ffffffff8150ad8e>] unix_create1+0x16e/0x1b0
+24                         unix_table_lock             47          [<ffffffff8150b111>] unix_release_sock+0x31/0x250
+25                         unix_table_lock             15          [<ffffffff8150ca37>] unix_find_other+0x117/0x230
+26                         unix_table_lock              5          [<ffffffff8150a09f>] unix_autobind+0x11f/0x1b0
+27                         ---------------
+28                         unix_table_lock             39          [<ffffffff8150b111>] unix_release_sock+0x31/0x250
+29                         unix_table_lock             49          [<ffffffff8150ad8e>] unix_create1+0x16e/0x1b0
+30                         unix_table_lock             20          [<ffffffff8150ca37>] unix_find_other+0x117/0x230
+31                         unix_table_lock              4          [<ffffffff8150a09f>] unix_autobind+0x11f/0x1b0
+
+
+This excerpt shows the first two lock class statistics. Line 01 shows the
+output version - each time the format changes this will be updated. Line 02-04
+show the header with column descriptions. Lines 05-18 and 20-31 show the actual
+statistics. These statistics come in two parts; the actual stats separated by a
+short separator (line 08, 13) from the contention points.
+
+The first lock (05-18) is a read/write lock, and shows two lines above the
+short separator. The contention points don't match the column descriptors,
+they have two: contentions and [<IP>] symbol. The second set of contention
+points are the points we're contending with.
+
+The integer part of the time values is in us.
+
+Dealing with nested locks, subclasses may appear:
+
+32...........................................................................................................................................................................................................................
+33
+34                               &rq->lock:       13128          13128           0.43         190.53      103881.26           7.91          97454        3453404           0.00         401.11    13224683.11           3.82
+35                               ---------
+36                               &rq->lock          645          [<ffffffff8103bfc4>] task_rq_lock+0x43/0x75
+37                               &rq->lock          297          [<ffffffff8104ba65>] try_to_wake_up+0x127/0x25a
+38                               &rq->lock          360          [<ffffffff8103c4c5>] select_task_rq_fair+0x1f0/0x74a
+39                               &rq->lock          428          [<ffffffff81045f98>] scheduler_tick+0x46/0x1fb
+40                               ---------
+41                               &rq->lock           77          [<ffffffff8103bfc4>] task_rq_lock+0x43/0x75
+42                               &rq->lock          174          [<ffffffff8104ba65>] try_to_wake_up+0x127/0x25a
+43                               &rq->lock         4715          [<ffffffff8103ed4b>] double_rq_lock+0x42/0x54
+44                               &rq->lock          893          [<ffffffff81340524>] schedule+0x157/0x7b8
+45
+46...........................................................................................................................................................................................................................
+47
+48                             &rq->lock/1:        1526          11488           0.33         388.73      136294.31          11.86          21461          38404           0.00          37.93      109388.53           2.84
+49                             -----------
+50                             &rq->lock/1        11526          [<ffffffff8103ed58>] double_rq_lock+0x4f/0x54
+51                             -----------
+52                             &rq->lock/1         5645          [<ffffffff8103ed4b>] double_rq_lock+0x42/0x54
+53                             &rq->lock/1         1224          [<ffffffff81340524>] schedule+0x157/0x7b8
+54                             &rq->lock/1         4336          [<ffffffff8103ed58>] double_rq_lock+0x4f/0x54
+55                             &rq->lock/1          181          [<ffffffff8104ba65>] try_to_wake_up+0x127/0x25a
+
+Line 48 shows statistics for the second subclass (/1) of &rq->lock class
+(subclass starts from 0), since in this case, as line 50 suggests,
+double_rq_lock actually acquires a nested lock of two spinlocks.
+
+View the top contending locks:
+
+# grep : /proc/lock_stat | head
+			clockevents_lock:       2926159        2947636           0.15       46882.81  1784540466.34         605.41        3381345        3879161           0.00        2260.97    53178395.68          13.71
+		     tick_broadcast_lock:        346460         346717           0.18        2257.43    39364622.71         113.54        3642919        4242696           0.00        2263.79    49173646.60          11.59
+		  &mapping->i_mmap_mutex:        203896         203899           3.36      645530.05 31767507988.39      155800.21        3361776        8893984           0.17        2254.15    14110121.02           1.59
+			       &rq->lock:        135014         136909           0.18         606.09      842160.68           6.15        1540728       10436146           0.00         728.72    17606683.41           1.69
+	       &(&zone->lru_lock)->rlock:         93000          94934           0.16          59.18      188253.78           1.98        1199912        3809894           0.15         391.40     3559518.81           0.93
+			 tasklist_lock-W:         40667          41130           0.23        1189.42      428980.51          10.43         270278         510106           0.16         653.51     3939674.91           7.72
+			 tasklist_lock-R:         21298          21305           0.20        1310.05      215511.12          10.12         186204         241258           0.14        1162.33     1179779.23           4.89
+			      rcu_node_1:         47656          49022           0.16         635.41      193616.41           3.95         844888        1865423           0.00         764.26     1656226.96           0.89
+       &(&dentry->d_lockref.lock)->rlock:         39791          40179           0.15        1302.08       88851.96           2.21        2790851       12527025           0.10        1910.75     3379714.27           0.27
+			      rcu_node_0:         29203          30064           0.16         786.55     1555573.00          51.74          88963         244254           0.00         398.87      428872.51           1.76
+
+Clear the statistics:
+
+# echo 0 > /proc/lock_stat
diff --git a/Documentation/locking/mutex-design.txt b/Documentation/locking/mutex-design.txt
new file mode 100644
index 000000000000..ee231ed09ec6
--- /dev/null
+++ b/Documentation/locking/mutex-design.txt
@@ -0,0 +1,157 @@
+Generic Mutex Subsystem
+
+started by Ingo Molnar <mingo@redhat.com>
+updated by Davidlohr Bueso <davidlohr@hp.com>
+
+What are mutexes?
+-----------------
+
+In the Linux kernel, mutexes refer to a particular locking primitive
+that enforces serialization on shared memory systems, and not only to
+the generic term referring to 'mutual exclusion' found in academia
+or similar theoretical text books. Mutexes are sleeping locks which
+behave similarly to binary semaphores, and were introduced in 2006[1]
+as an alternative to these. This new data structure provided a number
+of advantages, including simpler interfaces, and at that time smaller
+code (see Disadvantages).
+
+[1] http://lwn.net/Articles/164802/
+
+Implementation
+--------------
+
+Mutexes are represented by 'struct mutex', defined in include/linux/mutex.h
+and implemented in kernel/locking/mutex.c. These locks use a three
+state atomic counter (->count) to represent the different possible
+transitions that can occur during the lifetime of a lock:
+
+	  1: unlocked
+	  0: locked, no waiters
+   negative: locked, with potential waiters
+
+In its most basic form it also includes a wait-queue and a spinlock
+that serializes access to it. CONFIG_SMP systems can also include
+a pointer to the lock task owner (->owner) as well as a spinner MCS
+lock (->osq), both described below in (ii).
+
+When acquiring a mutex, there are three possible paths that can be
+taken, depending on the state of the lock:
+
+(i) fastpath: tries to atomically acquire the lock by decrementing the
+    counter. If it was already taken by another task it goes to the next
+    possible path. This logic is architecture specific. On x86-64, the
+    locking fastpath is 2 instructions:
+
+    0000000000000e10 <mutex_lock>:
+    e21:   f0 ff 0b                lock decl (%rbx)
+    e24:   79 08                   jns    e2e <mutex_lock+0x1e>
+
+   the unlocking fastpath is equally tight:
+
+    0000000000000bc0 <mutex_unlock>:
+    bc8:   f0 ff 07                lock incl (%rdi)
+    bcb:   7f 0a                   jg     bd7 <mutex_unlock+0x17>
+
+
+(ii) midpath: aka optimistic spinning, tries to spin for acquisition
+     while the lock owner is running and there are no other tasks ready
+     to run that have higher priority (need_resched). The rationale is
+     that if the lock owner is running, it is likely to release the lock
+     soon. The mutex spinners are queued up using MCS lock so that only
+     one spinner can compete for the mutex.
+
+     The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spinlock
+     with the desirable properties of being fair and with each cpu trying
+     to acquire the lock spinning on a local variable. It avoids expensive
+     cacheline bouncing that common test-and-set spinlock implementations
+     incur. An MCS-like lock is specially tailored for optimistic spinning
+     for sleeping lock implementation. An important feature of the customized
+     MCS lock is that it has the extra property that spinners are able to exit
+     the MCS spinlock queue when they need to reschedule. This further helps
+     avoid situations where MCS spinners that need to reschedule would continue
+     waiting to spin on mutex owner, only to go directly to slowpath upon
+     obtaining the MCS lock.
+
+
+(iii) slowpath: last resort, if the lock is still unable to be acquired,
+      the task is added to the wait-queue and sleeps until woken up by the
+      unlock path. Under normal circumstances it blocks as TASK_UNINTERRUPTIBLE.
+
+While formally kernel mutexes are sleepable locks, it is path (ii) that
+makes them more practically a hybrid type. By simply not interrupting a
+task and busy-waiting for a few cycles instead of immediately sleeping,
+the performance of this lock has been seen to significantly improve a
+number of workloads. Note that this technique is also used for rw-semaphores.
+
+Semantics
+---------
+
+The mutex subsystem checks and enforces the following rules:
+
+    - Only one task can hold the mutex at a time.
+    - Only the owner can unlock the mutex.
+    - Multiple unlocks are not permitted.
+    - Recursive locking/unlocking is not permitted.
+    - A mutex must only be initialized via the API (see below).
+    - A task may not exit with a mutex held.
+    - Memory areas where held locks reside must not be freed.
+    - Held mutexes must not be reinitialized.
+    - Mutexes may not be used in hardware or software interrupt
+      contexts such as tasklets and timers.
+
+These semantics are fully enforced when CONFIG DEBUG_MUTEXES is enabled.
+In addition, the mutex debugging code also implements a number of other
+features that make lock debugging easier and faster:
+
+    - Uses symbolic names of mutexes, whenever they are printed
+      in debug output.
+    - Point-of-acquire tracking, symbolic lookup of function names,
+      list of all locks held in the system, printout of them.
+    - Owner tracking.
+    - Detects self-recursing locks and prints out all relevant info.
+    - Detects multi-task circular deadlocks and prints out all affected
+      locks and tasks (and only those tasks).
+
+
+Interfaces
+----------
+Statically define the mutex:
+   DEFINE_MUTEX(name);
+
+Dynamically initialize the mutex:
+   mutex_init(mutex);
+
+Acquire the mutex, uninterruptible:
+   void mutex_lock(struct mutex *lock);
+   void mutex_lock_nested(struct mutex *lock, unsigned int subclass);
+   int  mutex_trylock(struct mutex *lock);
+
+Acquire the mutex, interruptible:
+   int mutex_lock_interruptible_nested(struct mutex *lock,
+				       unsigned int subclass);
+   int mutex_lock_interruptible(struct mutex *lock);
+
+Acquire the mutex, interruptible, if dec to 0:
+   int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
+
+Unlock the mutex:
+   void mutex_unlock(struct mutex *lock);
+
+Test if the mutex is taken:
+   int mutex_is_locked(struct mutex *lock);
+
+Disadvantages
+-------------
+
+Unlike its original design and purpose, 'struct mutex' is larger than
+most locks in the kernel. E.g: on x86-64 it is 40 bytes, almost twice
+as large as 'struct semaphore' (24 bytes) and 8 bytes shy of the
+'struct rw_semaphore' variant. Larger structure sizes mean more CPU
+cache and memory footprint.
+
+When to use mutexes
+-------------------
+
+Unless the strict semantics of mutexes are unsuitable and/or the critical
+region prevents the lock from being shared, always prefer them to any other
+locking primitive.
diff --git a/Documentation/locking/rt-mutex-design.txt b/Documentation/locking/rt-mutex-design.txt
new file mode 100644
index 000000000000..8666070d3189
--- /dev/null
+++ b/Documentation/locking/rt-mutex-design.txt
@@ -0,0 +1,781 @@
+#
+# Copyright (c) 2006 Steven Rostedt
+# Licensed under the GNU Free Documentation License, Version 1.2
+#
+
+RT-mutex implementation design
+------------------------------
+
+This document tries to describe the design of the rtmutex.c implementation.
+It doesn't describe the reasons why rtmutex.c exists. For that please see
+Documentation/rt-mutex.txt.  Although this document does explain problems
+that happen without this code, but that is in the concept to understand
+what the code actually is doing.
+
+The goal of this document is to help others understand the priority
+inheritance (PI) algorithm that is used, as well as reasons for the
+decisions that were made to implement PI in the manner that was done.
+
+
+Unbounded Priority Inversion
+----------------------------
+
+Priority inversion is when a lower priority process executes while a higher
+priority process wants to run.  This happens for several reasons, and
+most of the time it can't be helped.  Anytime a high priority process wants
+to use a resource that a lower priority process has (a mutex for example),
+the high priority process must wait until the lower priority process is done
+with the resource.  This is a priority inversion.  What we want to prevent
+is something called unbounded priority inversion.  That is when the high
+priority process is prevented from running by a lower priority process for
+an undetermined amount of time.
+
+The classic example of unbounded priority inversion is where you have three
+processes, let's call them processes A, B, and C, where A is the highest
+priority process, C is the lowest, and B is in between. A tries to grab a lock
+that C owns and must wait and lets C run to release the lock. But in the
+meantime, B executes, and since B is of a higher priority than C, it preempts C,
+but by doing so, it is in fact preempting A which is a higher priority process.
+Now there's no way of knowing how long A will be sleeping waiting for C
+to release the lock, because for all we know, B is a CPU hog and will
+never give C a chance to release the lock.  This is called unbounded priority
+inversion.
+
+Here's a little ASCII art to show the problem.
+
+   grab lock L1 (owned by C)
+     |
+A ---+
+        C preempted by B
+          |
+C    +----+
+
+B         +-------->
+                B now keeps A from running.
+
+
+Priority Inheritance (PI)
+-------------------------
+
+There are several ways to solve this issue, but other ways are out of scope
+for this document.  Here we only discuss PI.
+
+PI is where a process inherits the priority of another process if the other
+process blocks on a lock owned by the current process.  To make this easier
+to understand, let's use the previous example, with processes A, B, and C again.
+
+This time, when A blocks on the lock owned by C, C would inherit the priority
+of A.  So now if B becomes runnable, it would not preempt C, since C now has
+the high priority of A.  As soon as C releases the lock, it loses its
+inherited priority, and A then can continue with the resource that C had.
+
+Terminology
+-----------
+
+Here I explain some terminology that is used in this document to help describe
+the design that is used to implement PI.
+
+PI chain - The PI chain is an ordered series of locks and processes that cause
+           processes to inherit priorities from a previous process that is
+           blocked on one of its locks.  This is described in more detail
+           later in this document.
+
+mutex    - In this document, to differentiate from locks that implement
+           PI and spin locks that are used in the PI code, from now on
+           the PI locks will be called a mutex.
+
+lock     - In this document from now on, I will use the term lock when
+           referring to spin locks that are used to protect parts of the PI
+           algorithm.  These locks disable preemption for UP (when
+           CONFIG_PREEMPT is enabled) and on SMP prevents multiple CPUs from
+           entering critical sections simultaneously.
+
+spin lock - Same as lock above.
+
+waiter   - A waiter is a struct that is stored on the stack of a blocked
+           process.  Since the scope of the waiter is within the code for
+           a process being blocked on the mutex, it is fine to allocate
+           the waiter on the process's stack (local variable).  This
+           structure holds a pointer to the task, as well as the mutex that
+           the task is blocked on.  It also has the plist node structures to
+           place the task in the waiter_list of a mutex as well as the
+           pi_list of a mutex owner task (described below).
+
+           waiter is sometimes used in reference to the task that is waiting
+           on a mutex. This is the same as waiter->task.
+
+waiters  - A list of processes that are blocked on a mutex.
+
+top waiter - The highest priority process waiting on a specific mutex.
+
+top pi waiter - The highest priority process waiting on one of the mutexes
+                that a specific process owns.
+
+Note:  task and process are used interchangeably in this document, mostly to
+       differentiate between two processes that are being described together.
+
+
+PI chain
+--------
+
+The PI chain is a list of processes and mutexes that may cause priority
+inheritance to take place.  Multiple chains may converge, but a chain
+would never diverge, since a process can't be blocked on more than one
+mutex at a time.
+
+Example:
+
+   Process:  A, B, C, D, E
+   Mutexes:  L1, L2, L3, L4
+
+   A owns: L1
+           B blocked on L1
+           B owns L2
+                  C blocked on L2
+                  C owns L3
+                         D blocked on L3
+                         D owns L4
+                                E blocked on L4
+
+The chain would be:
+
+   E->L4->D->L3->C->L2->B->L1->A
+
+To show where two chains merge, we could add another process F and
+another mutex L5 where B owns L5 and F is blocked on mutex L5.
+
+The chain for F would be:
+
+   F->L5->B->L1->A
+
+Since a process may own more than one mutex, but never be blocked on more than
+one, the chains merge.
+
+Here we show both chains:
+
+   E->L4->D->L3->C->L2-+
+                       |
+                       +->B->L1->A
+                       |
+                 F->L5-+
+
+For PI to work, the processes at the right end of these chains (or we may
+also call it the Top of the chain) must be equal to or higher in priority
+than the processes to the left or below in the chain.
+
+Also since a mutex may have more than one process blocked on it, we can
+have multiple chains merge at mutexes.  If we add another process G that is
+blocked on mutex L2:
+
+  G->L2->B->L1->A
+
+And once again, to show how this can grow I will show the merging chains
+again.
+
+   E->L4->D->L3->C-+
+                   +->L2-+
+                   |     |
+                 G-+     +->B->L1->A
+                         |
+                   F->L5-+
+
+
+Plist
+-----
+
+Before I go further and talk about how the PI chain is stored through lists
+on both mutexes and processes, I'll explain the plist.  This is similar to
+the struct list_head functionality that is already in the kernel.
+The implementation of plist is out of scope for this document, but it is
+very important to understand what it does.
+
+There are a few differences between plist and list, the most important one
+being that plist is a priority sorted linked list.  This means that the
+priorities of the plist are sorted, such that it takes O(1) to retrieve the
+highest priority item in the list.  Obviously this is useful to store processes
+based on their priorities.
+
+Another difference, which is important for implementation, is that, unlike
+list, the head of the list is a different element than the nodes of a list.
+So the head of the list is declared as struct plist_head and nodes that will
+be added to the list are declared as struct plist_node.
+
+
+Mutex Waiter List
+-----------------
+
+Every mutex keeps track of all the waiters that are blocked on itself. The mutex
+has a plist to store these waiters by priority.  This list is protected by
+a spin lock that is located in the struct of the mutex. This lock is called
+wait_lock.  Since the modification of the waiter list is never done in
+interrupt context, the wait_lock can be taken without disabling interrupts.
+
+
+Task PI List
+------------
+
+To keep track of the PI chains, each process has its own PI list.  This is
+a list of all top waiters of the mutexes that are owned by the process.
+Note that this list only holds the top waiters and not all waiters that are
+blocked on mutexes owned by the process.
+
+The top of the task's PI list is always the highest priority task that
+is waiting on a mutex that is owned by the task.  So if the task has
+inherited a priority, it will always be the priority of the task that is
+at the top of this list.
+
+This list is stored in the task structure of a process as a plist called
+pi_list.  This list is protected by a spin lock also in the task structure,
+called pi_lock.  This lock may also be taken in interrupt context, so when
+locking the pi_lock, interrupts must be disabled.
+
+
+Depth of the PI Chain
+---------------------
+
+The maximum depth of the PI chain is not dynamic, and could actually be
+defined.  But is very complex to figure it out, since it depends on all
+the nesting of mutexes.  Let's look at the example where we have 3 mutexes,
+L1, L2, and L3, and four separate functions func1, func2, func3 and func4.
+The following shows a locking order of L1->L2->L3, but may not actually
+be directly nested that way.
+
+void func1(void)
+{
+	mutex_lock(L1);
+
+	/* do anything */
+
+	mutex_unlock(L1);
+}
+
+void func2(void)
+{
+	mutex_lock(L1);
+	mutex_lock(L2);
+
+	/* do something */
+
+	mutex_unlock(L2);
+	mutex_unlock(L1);
+}
+
+void func3(void)
+{
+	mutex_lock(L2);
+	mutex_lock(L3);
+
+	/* do something else */
+
+	mutex_unlock(L3);
+	mutex_unlock(L2);
+}
+
+void func4(void)
+{
+	mutex_lock(L3);
+
+	/* do something again */
+
+	mutex_unlock(L3);
+}
+
+Now we add 4 processes that run each of these functions separately.
+Processes A, B, C, and D which run functions func1, func2, func3 and func4
+respectively, and such that D runs first and A last.  With D being preempted
+in func4 in the "do something again" area, we have a locking that follows:
+
+D owns L3
+       C blocked on L3
+       C owns L2
+              B blocked on L2
+              B owns L1
+                     A blocked on L1
+
+And thus we have the chain A->L1->B->L2->C->L3->D.
+
+This gives us a PI depth of 4 (four processes), but looking at any of the
+functions individually, it seems as though they only have at most a locking
+depth of two.  So, although the locking depth is defined at compile time,
+it still is very difficult to find the possibilities of that depth.
+
+Now since mutexes can be defined by user-land applications, we don't want a DOS
+type of application that nests large amounts of mutexes to create a large
+PI chain, and have the code holding spin locks while looking at a large
+amount of data.  So to prevent this, the implementation not only implements
+a maximum lock depth, but also only holds at most two different locks at a
+time, as it walks the PI chain.  More about this below.
+
+
+Mutex owner and flags
+---------------------
+
+The mutex structure contains a pointer to the owner of the mutex.  If the
+mutex is not owned, this owner is set to NULL.  Since all architectures
+have the task structure on at least a four byte alignment (and if this is
+not true, the rtmutex.c code will be broken!), this allows for the two
+least significant bits to be used as flags.  This part is also described
+in Documentation/rt-mutex.txt, but will also be briefly described here.
+
+Bit 0 is used as the "Pending Owner" flag.  This is described later.
+Bit 1 is used as the "Has Waiters" flags.  This is also described later
+  in more detail, but is set whenever there are waiters on a mutex.
+
+
+cmpxchg Tricks
+--------------
+
+Some architectures implement an atomic cmpxchg (Compare and Exchange).  This
+is used (when applicable) to keep the fast path of grabbing and releasing
+mutexes short.
+
+cmpxchg is basically the following function performed atomically:
+
+unsigned long _cmpxchg(unsigned long *A, unsigned long *B, unsigned long *C)
+{
+	unsigned long T = *A;
+	if (*A == *B) {
+		*A = *C;
+	}
+	return T;
+}
+#define cmpxchg(a,b,c) _cmpxchg(&a,&b,&c)
+
+This is really nice to have, since it allows you to only update a variable
+if the variable is what you expect it to be.  You know if it succeeded if
+the return value (the old value of A) is equal to B.
+
+The macro rt_mutex_cmpxchg is used to try to lock and unlock mutexes. If
+the architecture does not support CMPXCHG, then this macro is simply set
+to fail every time.  But if CMPXCHG is supported, then this will
+help out extremely to keep the fast path short.
+
+The use of rt_mutex_cmpxchg with the flags in the owner field help optimize
+the system for architectures that support it.  This will also be explained
+later in this document.
+
+
+Priority adjustments
+--------------------
+
+The implementation of the PI code in rtmutex.c has several places that a
+process must adjust its priority.  With the help of the pi_list of a
+process this is rather easy to know what needs to be adjusted.
+
+The functions implementing the task adjustments are rt_mutex_adjust_prio,
+__rt_mutex_adjust_prio (same as the former, but expects the task pi_lock
+to already be taken), rt_mutex_getprio, and rt_mutex_setprio.
+
+rt_mutex_getprio and rt_mutex_setprio are only used in __rt_mutex_adjust_prio.
+
+rt_mutex_getprio returns the priority that the task should have.  Either the
+task's own normal priority, or if a process of a higher priority is waiting on
+a mutex owned by the task, then that higher priority should be returned.
+Since the pi_list of a task holds an order by priority list of all the top
+waiters of all the mutexes that the task owns, rt_mutex_getprio simply needs
+to compare the top pi waiter to its own normal priority, and return the higher
+priority back.
+
+(Note:  if looking at the code, you will notice that the lower number of
+        prio is returned.  This is because the prio field in the task structure
+        is an inverse order of the actual priority.  So a "prio" of 5 is
+        of higher priority than a "prio" of 10.)
+
+__rt_mutex_adjust_prio examines the result of rt_mutex_getprio, and if the
+result does not equal the task's current priority, then rt_mutex_setprio
+is called to adjust the priority of the task to the new priority.
+Note that rt_mutex_setprio is defined in kernel/sched/core.c to implement the
+actual change in priority.
+
+It is interesting to note that __rt_mutex_adjust_prio can either increase
+or decrease the priority of the task.  In the case that a higher priority
+process has just blocked on a mutex owned by the task, __rt_mutex_adjust_prio
+would increase/boost the task's priority.  But if a higher priority task
+were for some reason to leave the mutex (timeout or signal), this same function
+would decrease/unboost the priority of the task.  That is because the pi_list
+always contains the highest priority task that is waiting on a mutex owned
+by the task, so we only need to compare the priority of that top pi waiter
+to the normal priority of the given task.
+
+
+High level overview of the PI chain walk
+----------------------------------------
+
+The PI chain walk is implemented by the function rt_mutex_adjust_prio_chain.
+
+The implementation has gone through several iterations, and has ended up
+with what we believe is the best.  It walks the PI chain by only grabbing
+at most two locks at a time, and is very efficient.
+
+The rt_mutex_adjust_prio_chain can be used either to boost or lower process
+priorities.
+
+rt_mutex_adjust_prio_chain is called with a task to be checked for PI
+(de)boosting (the owner of a mutex that a process is blocking on), a flag to
+check for deadlocking, the mutex that the task owns, and a pointer to a waiter
+that is the process's waiter struct that is blocked on the mutex (although this
+parameter may be NULL for deboosting).
+
+For this explanation, I will not mention deadlock detection. This explanation
+will try to stay at a high level.
+
+When this function is called, there are no locks held.  That also means
+that the state of the owner and lock can change when entered into this function.
+
+Before this function is called, the task has already had rt_mutex_adjust_prio
+performed on it.  This means that the task is set to the priority that it
+should be at, but the plist nodes of the task's waiter have not been updated
+with the new priorities, and that this task may not be in the proper locations
+in the pi_lists and wait_lists that the task is blocked on.  This function
+solves all that.
+
+A loop is entered, where task is the owner to be checked for PI changes that
+was passed by parameter (for the first iteration).  The pi_lock of this task is
+taken to prevent any more changes to the pi_list of the task.  This also
+prevents new tasks from completing the blocking on a mutex that is owned by this
+task.
+
+If the task is not blocked on a mutex then the loop is exited.  We are at
+the top of the PI chain.
+
+A check is now done to see if the original waiter (the process that is blocked
+on the current mutex) is the top pi waiter of the task.  That is, is this
+waiter on the top of the task's pi_list.  If it is not, it either means that
+there is another process higher in priority that is blocked on one of the
+mutexes that the task owns, or that the waiter has just woken up via a signal
+or timeout and has left the PI chain.  In either case, the loop is exited, since
+we don't need to do any more changes to the priority of the current task, or any
+task that owns a mutex that this current task is waiting on.  A priority chain
+walk is only needed when a new top pi waiter is made to a task.
+
+The next check sees if the task's waiter plist node has the priority equal to
+the priority the task is set at.  If they are equal, then we are done with
+the loop.  Remember that the function started with the priority of the
+task adjusted, but the plist nodes that hold the task in other processes
+pi_lists have not been adjusted.
+
+Next, we look at the mutex that the task is blocked on. The mutex's wait_lock
+is taken.  This is done by a spin_trylock, because the locking order of the
+pi_lock and wait_lock goes in the opposite direction. If we fail to grab the
+lock, the pi_lock is released, and we restart the loop.
+
+Now that we have both the pi_lock of the task as well as the wait_lock of
+the mutex the task is blocked on, we update the task's waiter's plist node
+that is located on the mutex's wait_list.
+
+Now we release the pi_lock of the task.
+
+Next the owner of the mutex has its pi_lock taken, so we can update the
+task's entry in the owner's pi_list.  If the task is the highest priority
+process on the mutex's wait_list, then we remove the previous top waiter
+from the owner's pi_list, and replace it with the task.
+
+Note: It is possible that the task was the current top waiter on the mutex,
+      in which case the task is not yet on the pi_list of the waiter.  This
+      is OK, since plist_del does nothing if the plist node is not on any
+      list.
+
+If the task was not the top waiter of the mutex, but it was before we
+did the priority updates, that means we are deboosting/lowering the
+task.  In this case, the task is removed from the pi_list of the owner,
+and the new top waiter is added.
+
+Lastly, we unlock both the pi_lock of the task, as well as the mutex's
+wait_lock, and continue the loop again.  On the next iteration of the
+loop, the previous owner of the mutex will be the task that will be
+processed.
+
+Note: One might think that the owner of this mutex might have changed
+      since we just grab the mutex's wait_lock. And one could be right.
+      The important thing to remember is that the owner could not have
+      become the task that is being processed in the PI chain, since
+      we have taken that task's pi_lock at the beginning of the loop.
+      So as long as there is an owner of this mutex that is not the same
+      process as the tasked being worked on, we are OK.
+
+      Looking closely at the code, one might be confused.  The check for the
+      end of the PI chain is when the task isn't blocked on anything or the
+      task's waiter structure "task" element is NULL.  This check is
+      protected only by the task's pi_lock.  But the code to unlock the mutex
+      sets the task's waiter structure "task" element to NULL with only
+      the protection of the mutex's wait_lock, which was not taken yet.
+      Isn't this a race condition if the task becomes the new owner?
+
+      The answer is No!  The trick is the spin_trylock of the mutex's
+      wait_lock.  If we fail that lock, we release the pi_lock of the
+      task and continue the loop, doing the end of PI chain check again.
+
+      In the code to release the lock, the wait_lock of the mutex is held
+      the entire time, and it is not let go when we grab the pi_lock of the
+      new owner of the mutex.  So if the switch of a new owner were to happen
+      after the check for end of the PI chain and the grabbing of the
+      wait_lock, the unlocking code would spin on the new owner's pi_lock
+      but never give up the wait_lock.  So the PI chain loop is guaranteed to
+      fail the spin_trylock on the wait_lock, release the pi_lock, and
+      try again.
+
+      If you don't quite understand the above, that's OK. You don't have to,
+      unless you really want to make a proof out of it ;)
+
+
+Pending Owners and Lock stealing
+--------------------------------
+
+One of the flags in the owner field of the mutex structure is "Pending Owner".
+What this means is that an owner was chosen by the process releasing the
+mutex, but that owner has yet to wake up and actually take the mutex.
+
+Why is this important?  Why can't we just give the mutex to another process
+and be done with it?
+
+The PI code is to help with real-time processes, and to let the highest
+priority process run as long as possible with little latencies and delays.
+If a high priority process owns a mutex that a lower priority process is
+blocked on, when the mutex is released it would be given to the lower priority
+process.  What if the higher priority process wants to take that mutex again.
+The high priority process would fail to take that mutex that it just gave up
+and it would need to boost the lower priority process to run with full
+latency of that critical section (since the low priority process just entered
+it).
+
+There's no reason a high priority process that gives up a mutex should be
+penalized if it tries to take that mutex again.  If the new owner of the
+mutex has not woken up yet, there's no reason that the higher priority process
+could not take that mutex away.
+
+To solve this, we introduced Pending Ownership and Lock Stealing.  When a
+new process is given a mutex that it was blocked on, it is only given
+pending ownership.  This means that it's the new owner, unless a higher
+priority process comes in and tries to grab that mutex.  If a higher priority
+process does come along and wants that mutex, we let the higher priority
+process "steal" the mutex from the pending owner (only if it is still pending)
+and continue with the mutex.
+
+
+Taking of a mutex (The walk through)
+------------------------------------
+
+OK, now let's take a look at the detailed walk through of what happens when
+taking a mutex.
+
+The first thing that is tried is the fast taking of the mutex.  This is
+done when we have CMPXCHG enabled (otherwise the fast taking automatically
+fails).  Only when the owner field of the mutex is NULL can the lock be
+taken with the CMPXCHG and nothing else needs to be done.
+
+If there is contention on the lock, whether it is owned or pending owner
+we go about the slow path (rt_mutex_slowlock).
+
+The slow path function is where the task's waiter structure is created on
+the stack.  This is because the waiter structure is only needed for the
+scope of this function.  The waiter structure holds the nodes to store
+the task on the wait_list of the mutex, and if need be, the pi_list of
+the owner.
+
+The wait_lock of the mutex is taken since the slow path of unlocking the
+mutex also takes this lock.
+
+We then call try_to_take_rt_mutex.  This is where the architecture that
+does not implement CMPXCHG would always grab the lock (if there's no
+contention).
+
+try_to_take_rt_mutex is used every time the task tries to grab a mutex in the
+slow path.  The first thing that is done here is an atomic setting of
+the "Has Waiters" flag of the mutex's owner field.  Yes, this could really
+be false, because if the mutex has no owner, there are no waiters and
+the current task also won't have any waiters.  But we don't have the lock
+yet, so we assume we are going to be a waiter.  The reason for this is to
+play nice for those architectures that do have CMPXCHG.  By setting this flag
+now, the owner of the mutex can't release the mutex without going into the
+slow unlock path, and it would then need to grab the wait_lock, which this
+code currently holds.  So setting the "Has Waiters" flag forces the owner
+to synchronize with this code.
+
+Now that we know that we can't have any races with the owner releasing the
+mutex, we check to see if we can take the ownership.  This is done if the
+mutex doesn't have a owner, or if we can steal the mutex from a pending
+owner.  Let's look at the situations we have here.
+
+  1) Has owner that is pending
+  ----------------------------
+
+  The mutex has a owner, but it hasn't woken up and the mutex flag
+  "Pending Owner" is set.  The first check is to see if the owner isn't the
+  current task.  This is because this function is also used for the pending
+  owner to grab the mutex.  When a pending owner wakes up, it checks to see
+  if it can take the mutex, and this is done if the owner is already set to
+  itself.  If so, we succeed and leave the function, clearing the "Pending
+  Owner" bit.
+
+  If the pending owner is not current, we check to see if the current priority is
+  higher than the pending owner.  If not, we fail the function and return.
+
+  There's also something special about a pending owner.  That is a pending owner
+  is never blocked on a mutex.  So there is no PI chain to worry about.  It also
+  means that if the mutex doesn't have any waiters, there's no accounting needed
+  to update the pending owner's pi_list, since we only worry about processes
+  blocked on the current mutex.
+
+  If there are waiters on this mutex, and we just stole the ownership, we need
+  to take the top waiter, remove it from the pi_list of the pending owner, and
+  add it to the current pi_list.  Note that at this moment, the pending owner
+  is no longer on the list of waiters.  This is fine, since the pending owner
+  would add itself back when it realizes that it had the ownership stolen
+  from itself.  When the pending owner tries to grab the mutex, it will fail
+  in try_to_take_rt_mutex if the owner field points to another process.
+
+  2) No owner
+  -----------
+
+  If there is no owner (or we successfully stole the lock), we set the owner
+  of the mutex to current, and set the flag of "Has Waiters" if the current
+  mutex actually has waiters, or we clear the flag if it doesn't.  See, it was
+  OK that we set that flag early, since now it is cleared.
+
+  3) Failed to grab ownership
+  ---------------------------
+
+  The most interesting case is when we fail to take ownership. This means that
+  there exists an owner, or there's a pending owner with equal or higher
+  priority than the current task.
+
+We'll continue on the failed case.
+
+If the mutex has a timeout, we set up a timer to go off to break us out
+of this mutex if we failed to get it after a specified amount of time.
+
+Now we enter a loop that will continue to try to take ownership of the mutex, or
+fail from a timeout or signal.
+
+Once again we try to take the mutex.  This will usually fail the first time
+in the loop, since it had just failed to get the mutex.  But the second time
+in the loop, this would likely succeed, since the task would likely be
+the pending owner.
+
+If the mutex is TASK_INTERRUPTIBLE a check for signals and timeout is done
+here.
+
+The waiter structure has a "task" field that points to the task that is blocked
+on the mutex.  This field can be NULL the first time it goes through the loop
+or if the task is a pending owner and had its mutex stolen.  If the "task"
+field is NULL then we need to set up the accounting for it.
+
+Task blocks on mutex
+--------------------
+
+The accounting of a mutex and process is done with the waiter structure of
+the process.  The "task" field is set to the process, and the "lock" field
+to the mutex.  The plist nodes are initialized to the processes current
+priority.
+
+Since the wait_lock was taken at the entry of the slow lock, we can safely
+add the waiter to the wait_list.  If the current process is the highest
+priority process currently waiting on this mutex, then we remove the
+previous top waiter process (if it exists) from the pi_list of the owner,
+and add the current process to that list.  Since the pi_list of the owner
+has changed, we call rt_mutex_adjust_prio on the owner to see if the owner
+should adjust its priority accordingly.
+
+If the owner is also blocked on a lock, and had its pi_list changed
+(or deadlock checking is on), we unlock the wait_lock of the mutex and go ahead
+and run rt_mutex_adjust_prio_chain on the owner, as described earlier.
+
+Now all locks are released, and if the current process is still blocked on a
+mutex (waiter "task" field is not NULL), then we go to sleep (call schedule).
+
+Waking up in the loop
+---------------------
+
+The schedule can then wake up for a few reasons.
+  1) we were given pending ownership of the mutex.
+  2) we received a signal and was TASK_INTERRUPTIBLE
+  3) we had a timeout and was TASK_INTERRUPTIBLE
+
+In any of these cases, we continue the loop and once again try to grab the
+ownership of the mutex.  If we succeed, we exit the loop, otherwise we continue
+and on signal and timeout, will exit the loop, or if we had the mutex stolen
+we just simply add ourselves back on the lists and go back to sleep.
+
+Note: For various reasons, because of timeout and signals, the steal mutex
+      algorithm needs to be careful. This is because the current process is
+      still on the wait_list. And because of dynamic changing of priorities,
+      especially on SCHED_OTHER tasks, the current process can be the
+      highest priority task on the wait_list.
+
+Failed to get mutex on Timeout or Signal
+----------------------------------------
+
+If a timeout or signal occurred, the waiter's "task" field would not be
+NULL and the task needs to be taken off the wait_list of the mutex and perhaps
+pi_list of the owner.  If this process was a high priority process, then
+the rt_mutex_adjust_prio_chain needs to be executed again on the owner,
+but this time it will be lowering the priorities.
+
+
+Unlocking the Mutex
+-------------------
+
+The unlocking of a mutex also has a fast path for those architectures with
+CMPXCHG.  Since the taking of a mutex on contention always sets the
+"Has Waiters" flag of the mutex's owner, we use this to know if we need to
+take the slow path when unlocking the mutex.  If the mutex doesn't have any
+waiters, the owner field of the mutex would equal the current process and
+the mutex can be unlocked by just replacing the owner field with NULL.
+
+If the owner field has the "Has Waiters" bit set (or CMPXCHG is not available),
+the slow unlock path is taken.
+
+The first thing done in the slow unlock path is to take the wait_lock of the
+mutex.  This synchronizes the locking and unlocking of the mutex.
+
+A check is made to see if the mutex has waiters or not.  On architectures that
+do not have CMPXCHG, this is the location that the owner of the mutex will
+determine if a waiter needs to be awoken or not.  On architectures that
+do have CMPXCHG, that check is done in the fast path, but it is still needed
+in the slow path too.  If a waiter of a mutex woke up because of a signal
+or timeout between the time the owner failed the fast path CMPXCHG check and
+the grabbing of the wait_lock, the mutex may not have any waiters, thus the
+owner still needs to make this check. If there are no waiters then the mutex
+owner field is set to NULL, the wait_lock is released and nothing more is
+needed.
+
+If there are waiters, then we need to wake one up and give that waiter
+pending ownership.
+
+On the wake up code, the pi_lock of the current owner is taken.  The top
+waiter of the lock is found and removed from the wait_list of the mutex
+as well as the pi_list of the current owner.  The task field of the new
+pending owner's waiter structure is set to NULL, and the owner field of the
+mutex is set to the new owner with the "Pending Owner" bit set, as well
+as the "Has Waiters" bit if there still are other processes blocked on the
+mutex.
+
+The pi_lock of the previous owner is released, and the new pending owner's
+pi_lock is taken.  Remember that this is the trick to prevent the race
+condition in rt_mutex_adjust_prio_chain from adding itself as a waiter
+on the mutex.
+
+We now clear the "pi_blocked_on" field of the new pending owner, and if
+the mutex still has waiters pending, we add the new top waiter to the pi_list
+of the pending owner.
+
+Finally we unlock the pi_lock of the pending owner and wake it up.
+
+
+Contact
+-------
+
+For updates on this document, please email Steven Rostedt <rostedt@goodmis.org>
+
+
+Credits
+-------
+
+Author:  Steven Rostedt <rostedt@goodmis.org>
+
+Reviewers:  Ingo Molnar, Thomas Gleixner, Thomas Duetsch, and Randy Dunlap
+
+Updates
+-------
+
+This document was originally written for 2.6.17-rc3-mm1
diff --git a/Documentation/locking/rt-mutex.txt b/Documentation/locking/rt-mutex.txt
new file mode 100644
index 000000000000..243393d882ee
--- /dev/null
+++ b/Documentation/locking/rt-mutex.txt
@@ -0,0 +1,79 @@
+RT-mutex subsystem with PI support
+----------------------------------
+
+RT-mutexes with priority inheritance are used to support PI-futexes,
+which enable pthread_mutex_t priority inheritance attributes
+(PTHREAD_PRIO_INHERIT). [See Documentation/pi-futex.txt for more details
+about PI-futexes.]
+
+This technology was developed in the -rt tree and streamlined for
+pthread_mutex support.
+
+Basic principles:
+-----------------
+
+RT-mutexes extend the semantics of simple mutexes by the priority
+inheritance protocol.
+
+A low priority owner of a rt-mutex inherits the priority of a higher
+priority waiter until the rt-mutex is released. If the temporarily
+boosted owner blocks on a rt-mutex itself it propagates the priority
+boosting to the owner of the other rt_mutex it gets blocked on. The
+priority boosting is immediately removed once the rt_mutex has been
+unlocked.
+
+This approach allows us to shorten the block of high-prio tasks on
+mutexes which protect shared resources. Priority inheritance is not a
+magic bullet for poorly designed applications, but it allows
+well-designed applications to use userspace locks in critical parts of
+an high priority thread, without losing determinism.
+
+The enqueueing of the waiters into the rtmutex waiter list is done in
+priority order. For same priorities FIFO order is chosen. For each
+rtmutex, only the top priority waiter is enqueued into the owner's
+priority waiters list. This list too queues in priority order. Whenever
+the top priority waiter of a task changes (for example it timed out or
+got a signal), the priority of the owner task is readjusted. [The
+priority enqueueing is handled by "plists", see include/linux/plist.h
+for more details.]
+
+RT-mutexes are optimized for fastpath operations and have no internal
+locking overhead when locking an uncontended mutex or unlocking a mutex
+without waiters. The optimized fastpath operations require cmpxchg
+support. [If that is not available then the rt-mutex internal spinlock
+is used]
+
+The state of the rt-mutex is tracked via the owner field of the rt-mutex
+structure:
+
+rt_mutex->owner holds the task_struct pointer of the owner. Bit 0 and 1
+are used to keep track of the "owner is pending" and "rtmutex has
+waiters" state.
+
+ owner		bit1	bit0
+ NULL		0	0	mutex is free (fast acquire possible)
+ NULL		0	1	invalid state
+ NULL		1	0	Transitional state*
+ NULL		1	1	invalid state
+ taskpointer	0	0	mutex is held (fast release possible)
+ taskpointer	0	1	task is pending owner
+ taskpointer	1	0	mutex is held and has waiters
+ taskpointer	1	1	task is pending owner and mutex has waiters
+
+Pending-ownership handling is a performance optimization:
+pending-ownership is assigned to the first (highest priority) waiter of
+the mutex, when the mutex is released. The thread is woken up and once
+it starts executing it can acquire the mutex. Until the mutex is taken
+by it (bit 0 is cleared) a competing higher priority thread can "steal"
+the mutex which puts the woken up thread back on the waiters list.
+
+The pending-ownership optimization is especially important for the
+uninterrupted workflow of high-prio tasks which repeatedly
+takes/releases locks that have lower-prio waiters. Without this
+optimization the higher-prio thread would ping-pong to the lower-prio
+task [because at unlock time we always assign a new owner].
+
+(*) The "mutex has waiters" bit gets set to take the lock. If the lock
+doesn't already have an owner, this bit is quickly cleared if there are
+no waiters.  So this is a transitional state to synchronize with looking
+at the owner field of the mutex and the mutex owner releasing the lock.
diff --git a/Documentation/locking/spinlocks.txt b/Documentation/locking/spinlocks.txt
new file mode 100644
index 000000000000..ff35e40bdf5b
--- /dev/null
+++ b/Documentation/locking/spinlocks.txt
@@ -0,0 +1,167 @@
+Lesson 1: Spin locks
+
+The most basic primitive for locking is spinlock.
+
+static DEFINE_SPINLOCK(xxx_lock);
+
+	unsigned long flags;
+
+	spin_lock_irqsave(&xxx_lock, flags);
+	... critical section here ..
+	spin_unlock_irqrestore(&xxx_lock, flags);
+
+The above is always safe. It will disable interrupts _locally_, but the
+spinlock itself will guarantee the global lock, so it will guarantee that
+there is only one thread-of-control within the region(s) protected by that
+lock. This works well even under UP also, so the code does _not_ need to
+worry about UP vs SMP issues: the spinlocks work correctly under both.
+
+   NOTE! Implications of spin_locks for memory are further described in:
+
+     Documentation/memory-barriers.txt
+       (5) LOCK operations.
+       (6) UNLOCK operations.
+
+The above is usually pretty simple (you usually need and want only one
+spinlock for most things - using more than one spinlock can make things a
+lot more complex and even slower and is usually worth it only for
+sequences that you _know_ need to be split up: avoid it at all cost if you
+aren't sure).
+
+This is really the only really hard part about spinlocks: once you start
+using spinlocks they tend to expand to areas you might not have noticed
+before, because you have to make sure the spinlocks correctly protect the
+shared data structures _everywhere_ they are used. The spinlocks are most
+easily added to places that are completely independent of other code (for
+example, internal driver data structures that nobody else ever touches).
+
+   NOTE! The spin-lock is safe only when you _also_ use the lock itself
+   to do locking across CPU's, which implies that EVERYTHING that
+   touches a shared variable has to agree about the spinlock they want
+   to use.
+
+----
+
+Lesson 2: reader-writer spinlocks.
+
+If your data accesses have a very natural pattern where you usually tend
+to mostly read from the shared variables, the reader-writer locks
+(rw_lock) versions of the spinlocks are sometimes useful. They allow multiple
+readers to be in the same critical region at once, but if somebody wants
+to change the variables it has to get an exclusive write lock.
+
+   NOTE! reader-writer locks require more atomic memory operations than
+   simple spinlocks.  Unless the reader critical section is long, you
+   are better off just using spinlocks.
+
+The routines look the same as above:
+
+   rwlock_t xxx_lock = __RW_LOCK_UNLOCKED(xxx_lock);
+
+	unsigned long flags;
+
+	read_lock_irqsave(&xxx_lock, flags);
+	.. critical section that only reads the info ...
+	read_unlock_irqrestore(&xxx_lock, flags);
+
+	write_lock_irqsave(&xxx_lock, flags);
+	.. read and write exclusive access to the info ...
+	write_unlock_irqrestore(&xxx_lock, flags);
+
+The above kind of lock may be useful for complex data structures like
+linked lists, especially searching for entries without changing the list
+itself.  The read lock allows many concurrent readers.  Anything that
+_changes_ the list will have to get the write lock.
+
+   NOTE! RCU is better for list traversal, but requires careful
+   attention to design detail (see Documentation/RCU/listRCU.txt).
+
+Also, you cannot "upgrade" a read-lock to a write-lock, so if you at _any_
+time need to do any changes (even if you don't do it every time), you have
+to get the write-lock at the very beginning.
+
+   NOTE! We are working hard to remove reader-writer spinlocks in most
+   cases, so please don't add a new one without consensus.  (Instead, see
+   Documentation/RCU/rcu.txt for complete information.)
+
+----
+
+Lesson 3: spinlocks revisited.
+
+The single spin-lock primitives above are by no means the only ones. They
+are the most safe ones, and the ones that work under all circumstances,
+but partly _because_ they are safe they are also fairly slow. They are slower
+than they'd need to be, because they do have to disable interrupts
+(which is just a single instruction on a x86, but it's an expensive one -
+and on other architectures it can be worse).
+
+If you have a case where you have to protect a data structure across
+several CPU's and you want to use spinlocks you can potentially use
+cheaper versions of the spinlocks. IFF you know that the spinlocks are
+never used in interrupt handlers, you can use the non-irq versions:
+
+	spin_lock(&lock);
+	...
+	spin_unlock(&lock);
+
+(and the equivalent read-write versions too, of course). The spinlock will
+guarantee the same kind of exclusive access, and it will be much faster.
+This is useful if you know that the data in question is only ever
+manipulated from a "process context", ie no interrupts involved.
+
+The reasons you mustn't use these versions if you have interrupts that
+play with the spinlock is that you can get deadlocks:
+
+	spin_lock(&lock);
+	...
+		<- interrupt comes in:
+			spin_lock(&lock);
+
+where an interrupt tries to lock an already locked variable. This is ok if
+the other interrupt happens on another CPU, but it is _not_ ok if the
+interrupt happens on the same CPU that already holds the lock, because the
+lock will obviously never be released (because the interrupt is waiting
+for the lock, and the lock-holder is interrupted by the interrupt and will
+not continue until the interrupt has been processed).
+
+(This is also the reason why the irq-versions of the spinlocks only need
+to disable the _local_ interrupts - it's ok to use spinlocks in interrupts
+on other CPU's, because an interrupt on another CPU doesn't interrupt the
+CPU that holds the lock, so the lock-holder can continue and eventually
+releases the lock).
+
+Note that you can be clever with read-write locks and interrupts. For
+example, if you know that the interrupt only ever gets a read-lock, then
+you can use a non-irq version of read locks everywhere - because they
+don't block on each other (and thus there is no dead-lock wrt interrupts.
+But when you do the write-lock, you have to use the irq-safe version.
+
+For an example of being clever with rw-locks, see the "waitqueue_lock"
+handling in kernel/sched/core.c - nothing ever _changes_ a wait-queue from
+within an interrupt, they only read the queue in order to know whom to
+wake up. So read-locks are safe (which is good: they are very common
+indeed), while write-locks need to protect themselves against interrupts.
+
+		Linus
+
+----
+
+Reference information:
+
+For dynamic initialization, use spin_lock_init() or rwlock_init() as
+appropriate:
+
+   spinlock_t xxx_lock;
+   rwlock_t xxx_rw_lock;
+
+   static int __init xxx_init(void)
+   {
+	spin_lock_init(&xxx_lock);
+	rwlock_init(&xxx_rw_lock);
+	...
+   }
+
+   module_init(xxx_init);
+
+For static initialization, use DEFINE_SPINLOCK() / DEFINE_RWLOCK() or
+__SPIN_LOCK_UNLOCKED() / __RW_LOCK_UNLOCKED() as appropriate.
diff --git a/Documentation/locking/ww-mutex-design.txt b/Documentation/locking/ww-mutex-design.txt
new file mode 100644
index 000000000000..8a112dc304c3
--- /dev/null
+++ b/Documentation/locking/ww-mutex-design.txt
@@ -0,0 +1,344 @@
+Wait/Wound Deadlock-Proof Mutex Design
+======================================
+
+Please read mutex-design.txt first, as it applies to wait/wound mutexes too.
+
+Motivation for WW-Mutexes
+-------------------------
+
+GPU's do operations that commonly involve many buffers.  Those buffers
+can be shared across contexts/processes, exist in different memory
+domains (for example VRAM vs system memory), and so on.  And with
+PRIME / dmabuf, they can even be shared across devices.  So there are
+a handful of situations where the driver needs to wait for buffers to
+become ready.  If you think about this in terms of waiting on a buffer
+mutex for it to become available, this presents a problem because
+there is no way to guarantee that buffers appear in a execbuf/batch in
+the same order in all contexts.  That is directly under control of
+userspace, and a result of the sequence of GL calls that an application
+makes.	Which results in the potential for deadlock.  The problem gets
+more complex when you consider that the kernel may need to migrate the
+buffer(s) into VRAM before the GPU operates on the buffer(s), which
+may in turn require evicting some other buffers (and you don't want to
+evict other buffers which are already queued up to the GPU), but for a
+simplified understanding of the problem you can ignore this.
+
+The algorithm that the TTM graphics subsystem came up with for dealing with
+this problem is quite simple.  For each group of buffers (execbuf) that need
+to be locked, the caller would be assigned a unique reservation id/ticket,
+from a global counter.  In case of deadlock while locking all the buffers
+associated with a execbuf, the one with the lowest reservation ticket (i.e.
+the oldest task) wins, and the one with the higher reservation id (i.e. the
+younger task) unlocks all of the buffers that it has already locked, and then
+tries again.
+
+In the RDBMS literature this deadlock handling approach is called wait/wound:
+The older tasks waits until it can acquire the contended lock. The younger tasks
+needs to back off and drop all the locks it is currently holding, i.e. the
+younger task is wounded.
+
+Concepts
+--------
+
+Compared to normal mutexes two additional concepts/objects show up in the lock
+interface for w/w mutexes:
+
+Acquire context: To ensure eventual forward progress it is important the a task
+trying to acquire locks doesn't grab a new reservation id, but keeps the one it
+acquired when starting the lock acquisition. This ticket is stored in the
+acquire context. Furthermore the acquire context keeps track of debugging state
+to catch w/w mutex interface abuse.
+
+W/w class: In contrast to normal mutexes the lock class needs to be explicit for
+w/w mutexes, since it is required to initialize the acquire context.
+
+Furthermore there are three different class of w/w lock acquire functions:
+
+* Normal lock acquisition with a context, using ww_mutex_lock.
+
+* Slowpath lock acquisition on the contending lock, used by the wounded task
+  after having dropped all already acquired locks. These functions have the
+  _slow postfix.
+
+  From a simple semantics point-of-view the _slow functions are not strictly
+  required, since simply calling the normal ww_mutex_lock functions on the
+  contending lock (after having dropped all other already acquired locks) will
+  work correctly. After all if no other ww mutex has been acquired yet there's
+  no deadlock potential and hence the ww_mutex_lock call will block and not
+  prematurely return -EDEADLK. The advantage of the _slow functions is in
+  interface safety:
+  - ww_mutex_lock has a __must_check int return type, whereas ww_mutex_lock_slow
+    has a void return type. Note that since ww mutex code needs loops/retries
+    anyway the __must_check doesn't result in spurious warnings, even though the
+    very first lock operation can never fail.
+  - When full debugging is enabled ww_mutex_lock_slow checks that all acquired
+    ww mutex have been released (preventing deadlocks) and makes sure that we
+    block on the contending lock (preventing spinning through the -EDEADLK
+    slowpath until the contended lock can be acquired).
+
+* Functions to only acquire a single w/w mutex, which results in the exact same
+  semantics as a normal mutex. This is done by calling ww_mutex_lock with a NULL
+  context.
+
+  Again this is not strictly required. But often you only want to acquire a
+  single lock in which case it's pointless to set up an acquire context (and so
+  better to avoid grabbing a deadlock avoidance ticket).
+
+Of course, all the usual variants for handling wake-ups due to signals are also
+provided.
+
+Usage
+-----
+
+Three different ways to acquire locks within the same w/w class. Common
+definitions for methods #1 and #2:
+
+static DEFINE_WW_CLASS(ww_class);
+
+struct obj {
+	struct ww_mutex lock;
+	/* obj data */
+};
+
+struct obj_entry {
+	struct list_head head;
+	struct obj *obj;
+};
+
+Method 1, using a list in execbuf->buffers that's not allowed to be reordered.
+This is useful if a list of required objects is already tracked somewhere.
+Furthermore the lock helper can use propagate the -EALREADY return code back to
+the caller as a signal that an object is twice on the list. This is useful if
+the list is constructed from userspace input and the ABI requires userspace to
+not have duplicate entries (e.g. for a gpu commandbuffer submission ioctl).
+
+int lock_objs(struct list_head *list, struct ww_acquire_ctx *ctx)
+{
+	struct obj *res_obj = NULL;
+	struct obj_entry *contended_entry = NULL;
+	struct obj_entry *entry;
+
+	ww_acquire_init(ctx, &ww_class);
+
+retry:
+	list_for_each_entry (entry, list, head) {
+		if (entry->obj == res_obj) {
+			res_obj = NULL;
+			continue;
+		}
+		ret = ww_mutex_lock(&entry->obj->lock, ctx);
+		if (ret < 0) {
+			contended_entry = entry;
+			goto err;
+		}
+	}
+
+	ww_acquire_done(ctx);
+	return 0;
+
+err:
+	list_for_each_entry_continue_reverse (entry, list, head)
+		ww_mutex_unlock(&entry->obj->lock);
+
+	if (res_obj)
+		ww_mutex_unlock(&res_obj->lock);
+
+	if (ret == -EDEADLK) {
+		/* we lost out in a seqno race, lock and retry.. */
+		ww_mutex_lock_slow(&contended_entry->obj->lock, ctx);
+		res_obj = contended_entry->obj;
+		goto retry;
+	}
+	ww_acquire_fini(ctx);
+
+	return ret;
+}
+
+Method 2, using a list in execbuf->buffers that can be reordered. Same semantics
+of duplicate entry detection using -EALREADY as method 1 above. But the
+list-reordering allows for a bit more idiomatic code.
+
+int lock_objs(struct list_head *list, struct ww_acquire_ctx *ctx)
+{
+	struct obj_entry *entry, *entry2;
+
+	ww_acquire_init(ctx, &ww_class);
+
+	list_for_each_entry (entry, list, head) {
+		ret = ww_mutex_lock(&entry->obj->lock, ctx);
+		if (ret < 0) {
+			entry2 = entry;
+
+			list_for_each_entry_continue_reverse (entry2, list, head)
+				ww_mutex_unlock(&entry2->obj->lock);
+
+			if (ret != -EDEADLK) {
+				ww_acquire_fini(ctx);
+				return ret;
+			}
+
+			/* we lost out in a seqno race, lock and retry.. */
+			ww_mutex_lock_slow(&entry->obj->lock, ctx);
+
+			/*
+			 * Move buf to head of the list, this will point
+			 * buf->next to the first unlocked entry,
+			 * restarting the for loop.
+			 */
+			list_del(&entry->head);
+			list_add(&entry->head, list);
+		}
+	}
+
+	ww_acquire_done(ctx);
+	return 0;
+}
+
+Unlocking works the same way for both methods #1 and #2:
+
+void unlock_objs(struct list_head *list, struct ww_acquire_ctx *ctx)
+{
+	struct obj_entry *entry;
+
+	list_for_each_entry (entry, list, head)
+		ww_mutex_unlock(&entry->obj->lock);
+
+	ww_acquire_fini(ctx);
+}
+
+Method 3 is useful if the list of objects is constructed ad-hoc and not upfront,
+e.g. when adjusting edges in a graph where each node has its own ww_mutex lock,
+and edges can only be changed when holding the locks of all involved nodes. w/w
+mutexes are a natural fit for such a case for two reasons:
+- They can handle lock-acquisition in any order which allows us to start walking
+  a graph from a starting point and then iteratively discovering new edges and
+  locking down the nodes those edges connect to.
+- Due to the -EALREADY return code signalling that a given objects is already
+  held there's no need for additional book-keeping to break cycles in the graph
+  or keep track off which looks are already held (when using more than one node
+  as a starting point).
+
+Note that this approach differs in two important ways from the above methods:
+- Since the list of objects is dynamically constructed (and might very well be
+  different when retrying due to hitting the -EDEADLK wound condition) there's
+  no need to keep any object on a persistent list when it's not locked. We can
+  therefore move the list_head into the object itself.
+- On the other hand the dynamic object list construction also means that the -EALREADY return
+  code can't be propagated.
+
+Note also that methods #1 and #2 and method #3 can be combined, e.g. to first lock a
+list of starting nodes (passed in from userspace) using one of the above
+methods. And then lock any additional objects affected by the operations using
+method #3 below. The backoff/retry procedure will be a bit more involved, since
+when the dynamic locking step hits -EDEADLK we also need to unlock all the
+objects acquired with the fixed list. But the w/w mutex debug checks will catch
+any interface misuse for these cases.
+
+Also, method 3 can't fail the lock acquisition step since it doesn't return
+-EALREADY. Of course this would be different when using the _interruptible
+variants, but that's outside of the scope of these examples here.
+
+struct obj {
+	struct ww_mutex ww_mutex;
+	struct list_head locked_list;
+};
+
+static DEFINE_WW_CLASS(ww_class);
+
+void __unlock_objs(struct list_head *list)
+{
+	struct obj *entry, *temp;
+
+	list_for_each_entry_safe (entry, temp, list, locked_list) {
+		/* need to do that before unlocking, since only the current lock holder is
+		allowed to use object */
+		list_del(&entry->locked_list);
+		ww_mutex_unlock(entry->ww_mutex)
+	}
+}
+
+void lock_objs(struct list_head *list, struct ww_acquire_ctx *ctx)
+{
+	struct obj *obj;
+
+	ww_acquire_init(ctx, &ww_class);
+
+retry:
+	/* re-init loop start state */
+	loop {
+		/* magic code which walks over a graph and decides which objects
+		 * to lock */
+
+		ret = ww_mutex_lock(obj->ww_mutex, ctx);
+		if (ret == -EALREADY) {
+			/* we have that one already, get to the next object */
+			continue;
+		}
+		if (ret == -EDEADLK) {
+			__unlock_objs(list);
+
+			ww_mutex_lock_slow(obj, ctx);
+			list_add(&entry->locked_list, list);
+			goto retry;
+		}
+
+		/* locked a new object, add it to the list */
+		list_add_tail(&entry->locked_list, list);
+	}
+
+	ww_acquire_done(ctx);
+	return 0;
+}
+
+void unlock_objs(struct list_head *list, struct ww_acquire_ctx *ctx)
+{
+	__unlock_objs(list);
+	ww_acquire_fini(ctx);
+}
+
+Method 4: Only lock one single objects. In that case deadlock detection and
+prevention is obviously overkill, since with grabbing just one lock you can't
+produce a deadlock within just one class. To simplify this case the w/w mutex
+api can be used with a NULL context.
+
+Implementation Details
+----------------------
+
+Design:
+  ww_mutex currently encapsulates a struct mutex, this means no extra overhead for
+  normal mutex locks, which are far more common. As such there is only a small
+  increase in code size if wait/wound mutexes are not used.
+
+  In general, not much contention is expected. The locks are typically used to
+  serialize access to resources for devices. The only way to make wakeups
+  smarter would be at the cost of adding a field to struct mutex_waiter. This
+  would add overhead to all cases where normal mutexes are used, and
+  ww_mutexes are generally less performance sensitive.
+
+Lockdep:
+  Special care has been taken to warn for as many cases of api abuse
+  as possible. Some common api abuses will be caught with
+  CONFIG_DEBUG_MUTEXES, but CONFIG_PROVE_LOCKING is recommended.
+
+  Some of the errors which will be warned about:
+   - Forgetting to call ww_acquire_fini or ww_acquire_init.
+   - Attempting to lock more mutexes after ww_acquire_done.
+   - Attempting to lock the wrong mutex after -EDEADLK and
+     unlocking all mutexes.
+   - Attempting to lock the right mutex after -EDEADLK,
+     before unlocking all mutexes.
+
+   - Calling ww_mutex_lock_slow before -EDEADLK was returned.
+
+   - Unlocking mutexes with the wrong unlock function.
+   - Calling one of the ww_acquire_* twice on the same context.
+   - Using a different ww_class for the mutex than for the ww_acquire_ctx.
+   - Normal lockdep errors that can result in deadlocks.
+
+  Some of the lockdep errors that can result in deadlocks:
+   - Calling ww_acquire_init to initialize a second ww_acquire_ctx before
+     having called ww_acquire_fini on the first.
+   - 'normal' deadlocks that can occur.
+
+FIXME: Update this section once we have the TASK_DEADLOCK task state flag magic
+implemented.
diff --git a/Documentation/lockstat.txt b/Documentation/lockstat.txt
deleted file mode 100644
index 72d010689751..000000000000
--- a/Documentation/lockstat.txt
+++ /dev/null
@@ -1,178 +0,0 @@
-
-LOCK STATISTICS
-
-- WHAT
-
-As the name suggests, it provides statistics on locks.
-
-- WHY
-
-Because things like lock contention can severely impact performance.
-
-- HOW
-
-Lockdep already has hooks in the lock functions and maps lock instances to
-lock classes. We build on that (see Documentation/lockdep-design.txt).
-The graph below shows the relation between the lock functions and the various
-hooks therein.
-
-        __acquire
-            |
-           lock _____
-            |        \
-            |    __contended
-            |         |
-            |       <wait>
-            | _______/
-            |/
-            |
-       __acquired
-            |
-            .
-          <hold>
-            .
-            |
-       __release
-            |
-         unlock
-
-lock, unlock	- the regular lock functions
-__*		- the hooks
-<> 		- states
-
-With these hooks we provide the following statistics:
-
- con-bounces       - number of lock contention that involved x-cpu data
- contentions       - number of lock acquisitions that had to wait
- wait time min     - shortest (non-0) time we ever had to wait for a lock
-           max     - longest time we ever had to wait for a lock
-	   total   - total time we spend waiting on this lock
-	   avg     - average time spent waiting on this lock
- acq-bounces       - number of lock acquisitions that involved x-cpu data
- acquisitions      - number of times we took the lock
- hold time min     - shortest (non-0) time we ever held the lock
-	   max     - longest time we ever held the lock
-	   total   - total time this lock was held
-	   avg     - average time this lock was held
-
-These numbers are gathered per lock class, per read/write state (when
-applicable).
-
-It also tracks 4 contention points per class. A contention point is a call site
-that had to wait on lock acquisition.
-
- - CONFIGURATION
-
-Lock statistics are enabled via CONFIG_LOCK_STAT.
-
- - USAGE
-
-Enable collection of statistics:
-
-# echo 1 >/proc/sys/kernel/lock_stat
-
-Disable collection of statistics:
-
-# echo 0 >/proc/sys/kernel/lock_stat
-
-Look at the current lock statistics:
-
-( line numbers not part of actual output, done for clarity in the explanation
-  below )
-
-# less /proc/lock_stat
-
-01 lock_stat version 0.4
-02-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-03                              class name    con-bounces    contentions   waittime-min   waittime-max waittime-total   waittime-avg    acq-bounces   acquisitions   holdtime-min   holdtime-max holdtime-total   holdtime-avg
-04-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-05
-06                         &mm->mmap_sem-W:            46             84           0.26         939.10       16371.53         194.90          47291        2922365           0.16     2220301.69 17464026916.32        5975.99
-07                         &mm->mmap_sem-R:            37            100           1.31      299502.61      325629.52        3256.30         212344       34316685           0.10        7744.91    95016910.20           2.77
-08                         ---------------
-09                           &mm->mmap_sem              1          [<ffffffff811502a7>] khugepaged_scan_mm_slot+0x57/0x280
-19                           &mm->mmap_sem             96          [<ffffffff815351c4>] __do_page_fault+0x1d4/0x510
-11                           &mm->mmap_sem             34          [<ffffffff81113d77>] vm_mmap_pgoff+0x87/0xd0
-12                           &mm->mmap_sem             17          [<ffffffff81127e71>] vm_munmap+0x41/0x80
-13                         ---------------
-14                           &mm->mmap_sem              1          [<ffffffff81046fda>] dup_mmap+0x2a/0x3f0
-15                           &mm->mmap_sem             60          [<ffffffff81129e29>] SyS_mprotect+0xe9/0x250
-16                           &mm->mmap_sem             41          [<ffffffff815351c4>] __do_page_fault+0x1d4/0x510
-17                           &mm->mmap_sem             68          [<ffffffff81113d77>] vm_mmap_pgoff+0x87/0xd0
-18
-19.............................................................................................................................................................................................................................
-20
-21                         unix_table_lock:           110            112           0.21          49.24         163.91           1.46          21094          66312           0.12         624.42       31589.81           0.48
-22                         ---------------
-23                         unix_table_lock             45          [<ffffffff8150ad8e>] unix_create1+0x16e/0x1b0
-24                         unix_table_lock             47          [<ffffffff8150b111>] unix_release_sock+0x31/0x250
-25                         unix_table_lock             15          [<ffffffff8150ca37>] unix_find_other+0x117/0x230
-26                         unix_table_lock              5          [<ffffffff8150a09f>] unix_autobind+0x11f/0x1b0
-27                         ---------------
-28                         unix_table_lock             39          [<ffffffff8150b111>] unix_release_sock+0x31/0x250
-29                         unix_table_lock             49          [<ffffffff8150ad8e>] unix_create1+0x16e/0x1b0
-30                         unix_table_lock             20          [<ffffffff8150ca37>] unix_find_other+0x117/0x230
-31                         unix_table_lock              4          [<ffffffff8150a09f>] unix_autobind+0x11f/0x1b0
-
-
-This excerpt shows the first two lock class statistics. Line 01 shows the
-output version - each time the format changes this will be updated. Line 02-04
-show the header with column descriptions. Lines 05-18 and 20-31 show the actual
-statistics. These statistics come in two parts; the actual stats separated by a
-short separator (line 08, 13) from the contention points.
-
-The first lock (05-18) is a read/write lock, and shows two lines above the
-short separator. The contention points don't match the column descriptors,
-they have two: contentions and [<IP>] symbol. The second set of contention
-points are the points we're contending with.
-
-The integer part of the time values is in us.
-
-Dealing with nested locks, subclasses may appear:
-
-32...........................................................................................................................................................................................................................
-33
-34                               &rq->lock:       13128          13128           0.43         190.53      103881.26           7.91          97454        3453404           0.00         401.11    13224683.11           3.82
-35                               ---------
-36                               &rq->lock          645          [<ffffffff8103bfc4>] task_rq_lock+0x43/0x75
-37                               &rq->lock          297          [<ffffffff8104ba65>] try_to_wake_up+0x127/0x25a
-38                               &rq->lock          360          [<ffffffff8103c4c5>] select_task_rq_fair+0x1f0/0x74a
-39                               &rq->lock          428          [<ffffffff81045f98>] scheduler_tick+0x46/0x1fb
-40                               ---------
-41                               &rq->lock           77          [<ffffffff8103bfc4>] task_rq_lock+0x43/0x75
-42                               &rq->lock          174          [<ffffffff8104ba65>] try_to_wake_up+0x127/0x25a
-43                               &rq->lock         4715          [<ffffffff8103ed4b>] double_rq_lock+0x42/0x54
-44                               &rq->lock          893          [<ffffffff81340524>] schedule+0x157/0x7b8
-45
-46...........................................................................................................................................................................................................................
-47
-48                             &rq->lock/1:        1526          11488           0.33         388.73      136294.31          11.86          21461          38404           0.00          37.93      109388.53           2.84
-49                             -----------
-50                             &rq->lock/1        11526          [<ffffffff8103ed58>] double_rq_lock+0x4f/0x54
-51                             -----------
-52                             &rq->lock/1         5645          [<ffffffff8103ed4b>] double_rq_lock+0x42/0x54
-53                             &rq->lock/1         1224          [<ffffffff81340524>] schedule+0x157/0x7b8
-54                             &rq->lock/1         4336          [<ffffffff8103ed58>] double_rq_lock+0x4f/0x54
-55                             &rq->lock/1          181          [<ffffffff8104ba65>] try_to_wake_up+0x127/0x25a
-
-Line 48 shows statistics for the second subclass (/1) of &rq->lock class
-(subclass starts from 0), since in this case, as line 50 suggests,
-double_rq_lock actually acquires a nested lock of two spinlocks.
-
-View the top contending locks:
-
-# grep : /proc/lock_stat | head
-			clockevents_lock:       2926159        2947636           0.15       46882.81  1784540466.34         605.41        3381345        3879161           0.00        2260.97    53178395.68          13.71
-		     tick_broadcast_lock:        346460         346717           0.18        2257.43    39364622.71         113.54        3642919        4242696           0.00        2263.79    49173646.60          11.59
-		  &mapping->i_mmap_mutex:        203896         203899           3.36      645530.05 31767507988.39      155800.21        3361776        8893984           0.17        2254.15    14110121.02           1.59
-			       &rq->lock:        135014         136909           0.18         606.09      842160.68           6.15        1540728       10436146           0.00         728.72    17606683.41           1.69
-	       &(&zone->lru_lock)->rlock:         93000          94934           0.16          59.18      188253.78           1.98        1199912        3809894           0.15         391.40     3559518.81           0.93
-			 tasklist_lock-W:         40667          41130           0.23        1189.42      428980.51          10.43         270278         510106           0.16         653.51     3939674.91           7.72
-			 tasklist_lock-R:         21298          21305           0.20        1310.05      215511.12          10.12         186204         241258           0.14        1162.33     1179779.23           4.89
-			      rcu_node_1:         47656          49022           0.16         635.41      193616.41           3.95         844888        1865423           0.00         764.26     1656226.96           0.89
-       &(&dentry->d_lockref.lock)->rlock:         39791          40179           0.15        1302.08       88851.96           2.21        2790851       12527025           0.10        1910.75     3379714.27           0.27
-			      rcu_node_0:         29203          30064           0.16         786.55     1555573.00          51.74          88963         244254           0.00         398.87      428872.51           1.76
-
-Clear the statistics:
-
-# echo 0 > /proc/lock_stat
diff --git a/Documentation/mutex-design.txt b/Documentation/mutex-design.txt
deleted file mode 100644
index ee231ed09ec6..000000000000
--- a/Documentation/mutex-design.txt
+++ /dev/null
@@ -1,157 +0,0 @@
-Generic Mutex Subsystem
-
-started by Ingo Molnar <mingo@redhat.com>
-updated by Davidlohr Bueso <davidlohr@hp.com>
-
-What are mutexes?
------------------
-
-In the Linux kernel, mutexes refer to a particular locking primitive
-that enforces serialization on shared memory systems, and not only to
-the generic term referring to 'mutual exclusion' found in academia
-or similar theoretical text books. Mutexes are sleeping locks which
-behave similarly to binary semaphores, and were introduced in 2006[1]
-as an alternative to these. This new data structure provided a number
-of advantages, including simpler interfaces, and at that time smaller
-code (see Disadvantages).
-
-[1] http://lwn.net/Articles/164802/
-
-Implementation
---------------
-
-Mutexes are represented by 'struct mutex', defined in include/linux/mutex.h
-and implemented in kernel/locking/mutex.c. These locks use a three
-state atomic counter (->count) to represent the different possible
-transitions that can occur during the lifetime of a lock:
-
-	  1: unlocked
-	  0: locked, no waiters
-   negative: locked, with potential waiters
-
-In its most basic form it also includes a wait-queue and a spinlock
-that serializes access to it. CONFIG_SMP systems can also include
-a pointer to the lock task owner (->owner) as well as a spinner MCS
-lock (->osq), both described below in (ii).
-
-When acquiring a mutex, there are three possible paths that can be
-taken, depending on the state of the lock:
-
-(i) fastpath: tries to atomically acquire the lock by decrementing the
-    counter. If it was already taken by another task it goes to the next
-    possible path. This logic is architecture specific. On x86-64, the
-    locking fastpath is 2 instructions:
-
-    0000000000000e10 <mutex_lock>:
-    e21:   f0 ff 0b                lock decl (%rbx)
-    e24:   79 08                   jns    e2e <mutex_lock+0x1e>
-
-   the unlocking fastpath is equally tight:
-
-    0000000000000bc0 <mutex_unlock>:
-    bc8:   f0 ff 07                lock incl (%rdi)
-    bcb:   7f 0a                   jg     bd7 <mutex_unlock+0x17>
-
-
-(ii) midpath: aka optimistic spinning, tries to spin for acquisition
-     while the lock owner is running and there are no other tasks ready
-     to run that have higher priority (need_resched). The rationale is
-     that if the lock owner is running, it is likely to release the lock
-     soon. The mutex spinners are queued up using MCS lock so that only
-     one spinner can compete for the mutex.
-
-     The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spinlock
-     with the desirable properties of being fair and with each cpu trying
-     to acquire the lock spinning on a local variable. It avoids expensive
-     cacheline bouncing that common test-and-set spinlock implementations
-     incur. An MCS-like lock is specially tailored for optimistic spinning
-     for sleeping lock implementation. An important feature of the customized
-     MCS lock is that it has the extra property that spinners are able to exit
-     the MCS spinlock queue when they need to reschedule. This further helps
-     avoid situations where MCS spinners that need to reschedule would continue
-     waiting to spin on mutex owner, only to go directly to slowpath upon
-     obtaining the MCS lock.
-
-
-(iii) slowpath: last resort, if the lock is still unable to be acquired,
-      the task is added to the wait-queue and sleeps until woken up by the
-      unlock path. Under normal circumstances it blocks as TASK_UNINTERRUPTIBLE.
-
-While formally kernel mutexes are sleepable locks, it is path (ii) that
-makes them more practically a hybrid type. By simply not interrupting a
-task and busy-waiting for a few cycles instead of immediately sleeping,
-the performance of this lock has been seen to significantly improve a
-number of workloads. Note that this technique is also used for rw-semaphores.
-
-Semantics
----------
-
-The mutex subsystem checks and enforces the following rules:
-
-    - Only one task can hold the mutex at a time.
-    - Only the owner can unlock the mutex.
-    - Multiple unlocks are not permitted.
-    - Recursive locking/unlocking is not permitted.
-    - A mutex must only be initialized via the API (see below).
-    - A task may not exit with a mutex held.
-    - Memory areas where held locks reside must not be freed.
-    - Held mutexes must not be reinitialized.
-    - Mutexes may not be used in hardware or software interrupt
-      contexts such as tasklets and timers.
-
-These semantics are fully enforced when CONFIG DEBUG_MUTEXES is enabled.
-In addition, the mutex debugging code also implements a number of other
-features that make lock debugging easier and faster:
-
-    - Uses symbolic names of mutexes, whenever they are printed
-      in debug output.
-    - Point-of-acquire tracking, symbolic lookup of function names,
-      list of all locks held in the system, printout of them.
-    - Owner tracking.
-    - Detects self-recursing locks and prints out all relevant info.
-    - Detects multi-task circular deadlocks and prints out all affected
-      locks and tasks (and only those tasks).
-
-
-Interfaces
-----------
-Statically define the mutex:
-   DEFINE_MUTEX(name);
-
-Dynamically initialize the mutex:
-   mutex_init(mutex);
-
-Acquire the mutex, uninterruptible:
-   void mutex_lock(struct mutex *lock);
-   void mutex_lock_nested(struct mutex *lock, unsigned int subclass);
-   int  mutex_trylock(struct mutex *lock);
-
-Acquire the mutex, interruptible:
-   int mutex_lock_interruptible_nested(struct mutex *lock,
-				       unsigned int subclass);
-   int mutex_lock_interruptible(struct mutex *lock);
-
-Acquire the mutex, interruptible, if dec to 0:
-   int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
-
-Unlock the mutex:
-   void mutex_unlock(struct mutex *lock);
-
-Test if the mutex is taken:
-   int mutex_is_locked(struct mutex *lock);
-
-Disadvantages
--------------
-
-Unlike its original design and purpose, 'struct mutex' is larger than
-most locks in the kernel. E.g: on x86-64 it is 40 bytes, almost twice
-as large as 'struct semaphore' (24 bytes) and 8 bytes shy of the
-'struct rw_semaphore' variant. Larger structure sizes mean more CPU
-cache and memory footprint.
-
-When to use mutexes
--------------------
-
-Unless the strict semantics of mutexes are unsuitable and/or the critical
-region prevents the lock from being shared, always prefer them to any other
-locking primitive.
diff --git a/Documentation/rt-mutex-design.txt b/Documentation/rt-mutex-design.txt
deleted file mode 100644
index 8666070d3189..000000000000
--- a/Documentation/rt-mutex-design.txt
+++ /dev/null
@@ -1,781 +0,0 @@
-#
-# Copyright (c) 2006 Steven Rostedt
-# Licensed under the GNU Free Documentation License, Version 1.2
-#
-
-RT-mutex implementation design
-------------------------------
-
-This document tries to describe the design of the rtmutex.c implementation.
-It doesn't describe the reasons why rtmutex.c exists. For that please see
-Documentation/rt-mutex.txt.  Although this document does explain problems
-that happen without this code, but that is in the concept to understand
-what the code actually is doing.
-
-The goal of this document is to help others understand the priority
-inheritance (PI) algorithm that is used, as well as reasons for the
-decisions that were made to implement PI in the manner that was done.
-
-
-Unbounded Priority Inversion
-----------------------------
-
-Priority inversion is when a lower priority process executes while a higher
-priority process wants to run.  This happens for several reasons, and
-most of the time it can't be helped.  Anytime a high priority process wants
-to use a resource that a lower priority process has (a mutex for example),
-the high priority process must wait until the lower priority process is done
-with the resource.  This is a priority inversion.  What we want to prevent
-is something called unbounded priority inversion.  That is when the high
-priority process is prevented from running by a lower priority process for
-an undetermined amount of time.
-
-The classic example of unbounded priority inversion is where you have three
-processes, let's call them processes A, B, and C, where A is the highest
-priority process, C is the lowest, and B is in between. A tries to grab a lock
-that C owns and must wait and lets C run to release the lock. But in the
-meantime, B executes, and since B is of a higher priority than C, it preempts C,
-but by doing so, it is in fact preempting A which is a higher priority process.
-Now there's no way of knowing how long A will be sleeping waiting for C
-to release the lock, because for all we know, B is a CPU hog and will
-never give C a chance to release the lock.  This is called unbounded priority
-inversion.
-
-Here's a little ASCII art to show the problem.
-
-   grab lock L1 (owned by C)
-     |
-A ---+
-        C preempted by B
-          |
-C    +----+
-
-B         +-------->
-                B now keeps A from running.
-
-
-Priority Inheritance (PI)
--------------------------
-
-There are several ways to solve this issue, but other ways are out of scope
-for this document.  Here we only discuss PI.
-
-PI is where a process inherits the priority of another process if the other
-process blocks on a lock owned by the current process.  To make this easier
-to understand, let's use the previous example, with processes A, B, and C again.
-
-This time, when A blocks on the lock owned by C, C would inherit the priority
-of A.  So now if B becomes runnable, it would not preempt C, since C now has
-the high priority of A.  As soon as C releases the lock, it loses its
-inherited priority, and A then can continue with the resource that C had.
-
-Terminology
------------
-
-Here I explain some terminology that is used in this document to help describe
-the design that is used to implement PI.
-
-PI chain - The PI chain is an ordered series of locks and processes that cause
-           processes to inherit priorities from a previous process that is
-           blocked on one of its locks.  This is described in more detail
-           later in this document.
-
-mutex    - In this document, to differentiate from locks that implement
-           PI and spin locks that are used in the PI code, from now on
-           the PI locks will be called a mutex.
-
-lock     - In this document from now on, I will use the term lock when
-           referring to spin locks that are used to protect parts of the PI
-           algorithm.  These locks disable preemption for UP (when
-           CONFIG_PREEMPT is enabled) and on SMP prevents multiple CPUs from
-           entering critical sections simultaneously.
-
-spin lock - Same as lock above.
-
-waiter   - A waiter is a struct that is stored on the stack of a blocked
-           process.  Since the scope of the waiter is within the code for
-           a process being blocked on the mutex, it is fine to allocate
-           the waiter on the process's stack (local variable).  This
-           structure holds a pointer to the task, as well as the mutex that
-           the task is blocked on.  It also has the plist node structures to
-           place the task in the waiter_list of a mutex as well as the
-           pi_list of a mutex owner task (described below).
-
-           waiter is sometimes used in reference to the task that is waiting
-           on a mutex. This is the same as waiter->task.
-
-waiters  - A list of processes that are blocked on a mutex.
-
-top waiter - The highest priority process waiting on a specific mutex.
-
-top pi waiter - The highest priority process waiting on one of the mutexes
-                that a specific process owns.
-
-Note:  task and process are used interchangeably in this document, mostly to
-       differentiate between two processes that are being described together.
-
-
-PI chain
---------
-
-The PI chain is a list of processes and mutexes that may cause priority
-inheritance to take place.  Multiple chains may converge, but a chain
-would never diverge, since a process can't be blocked on more than one
-mutex at a time.
-
-Example:
-
-   Process:  A, B, C, D, E
-   Mutexes:  L1, L2, L3, L4
-
-   A owns: L1
-           B blocked on L1
-           B owns L2
-                  C blocked on L2
-                  C owns L3
-                         D blocked on L3
-                         D owns L4
-                                E blocked on L4
-
-The chain would be:
-
-   E->L4->D->L3->C->L2->B->L1->A
-
-To show where two chains merge, we could add another process F and
-another mutex L5 where B owns L5 and F is blocked on mutex L5.
-
-The chain for F would be:
-
-   F->L5->B->L1->A
-
-Since a process may own more than one mutex, but never be blocked on more than
-one, the chains merge.
-
-Here we show both chains:
-
-   E->L4->D->L3->C->L2-+
-                       |
-                       +->B->L1->A
-                       |
-                 F->L5-+
-
-For PI to work, the processes at the right end of these chains (or we may
-also call it the Top of the chain) must be equal to or higher in priority
-than the processes to the left or below in the chain.
-
-Also since a mutex may have more than one process blocked on it, we can
-have multiple chains merge at mutexes.  If we add another process G that is
-blocked on mutex L2:
-
-  G->L2->B->L1->A
-
-And once again, to show how this can grow I will show the merging chains
-again.
-
-   E->L4->D->L3->C-+
-                   +->L2-+
-                   |     |
-                 G-+     +->B->L1->A
-                         |
-                   F->L5-+
-
-
-Plist
------
-
-Before I go further and talk about how the PI chain is stored through lists
-on both mutexes and processes, I'll explain the plist.  This is similar to
-the struct list_head functionality that is already in the kernel.
-The implementation of plist is out of scope for this document, but it is
-very important to understand what it does.
-
-There are a few differences between plist and list, the most important one
-being that plist is a priority sorted linked list.  This means that the
-priorities of the plist are sorted, such that it takes O(1) to retrieve the
-highest priority item in the list.  Obviously this is useful to store processes
-based on their priorities.
-
-Another difference, which is important for implementation, is that, unlike
-list, the head of the list is a different element than the nodes of a list.
-So the head of the list is declared as struct plist_head and nodes that will
-be added to the list are declared as struct plist_node.
-
-
-Mutex Waiter List
------------------
-
-Every mutex keeps track of all the waiters that are blocked on itself. The mutex
-has a plist to store these waiters by priority.  This list is protected by
-a spin lock that is located in the struct of the mutex. This lock is called
-wait_lock.  Since the modification of the waiter list is never done in
-interrupt context, the wait_lock can be taken without disabling interrupts.
-
-
-Task PI List
-------------
-
-To keep track of the PI chains, each process has its own PI list.  This is
-a list of all top waiters of the mutexes that are owned by the process.
-Note that this list only holds the top waiters and not all waiters that are
-blocked on mutexes owned by the process.
-
-The top of the task's PI list is always the highest priority task that
-is waiting on a mutex that is owned by the task.  So if the task has
-inherited a priority, it will always be the priority of the task that is
-at the top of this list.
-
-This list is stored in the task structure of a process as a plist called
-pi_list.  This list is protected by a spin lock also in the task structure,
-called pi_lock.  This lock may also be taken in interrupt context, so when
-locking the pi_lock, interrupts must be disabled.
-
-
-Depth of the PI Chain
----------------------
-
-The maximum depth of the PI chain is not dynamic, and could actually be
-defined.  But is very complex to figure it out, since it depends on all
-the nesting of mutexes.  Let's look at the example where we have 3 mutexes,
-L1, L2, and L3, and four separate functions func1, func2, func3 and func4.
-The following shows a locking order of L1->L2->L3, but may not actually
-be directly nested that way.
-
-void func1(void)
-{
-	mutex_lock(L1);
-
-	/* do anything */
-
-	mutex_unlock(L1);
-}
-
-void func2(void)
-{
-	mutex_lock(L1);
-	mutex_lock(L2);
-
-	/* do something */
-
-	mutex_unlock(L2);
-	mutex_unlock(L1);
-}
-
-void func3(void)
-{
-	mutex_lock(L2);
-	mutex_lock(L3);
-
-	/* do something else */
-
-	mutex_unlock(L3);
-	mutex_unlock(L2);
-}
-
-void func4(void)
-{
-	mutex_lock(L3);
-
-	/* do something again */
-
-	mutex_unlock(L3);
-}
-
-Now we add 4 processes that run each of these functions separately.
-Processes A, B, C, and D which run functions func1, func2, func3 and func4
-respectively, and such that D runs first and A last.  With D being preempted
-in func4 in the "do something again" area, we have a locking that follows:
-
-D owns L3
-       C blocked on L3
-       C owns L2
-              B blocked on L2
-              B owns L1
-                     A blocked on L1
-
-And thus we have the chain A->L1->B->L2->C->L3->D.
-
-This gives us a PI depth of 4 (four processes), but looking at any of the
-functions individually, it seems as though they only have at most a locking
-depth of two.  So, although the locking depth is defined at compile time,
-it still is very difficult to find the possibilities of that depth.
-
-Now since mutexes can be defined by user-land applications, we don't want a DOS
-type of application that nests large amounts of mutexes to create a large
-PI chain, and have the code holding spin locks while looking at a large
-amount of data.  So to prevent this, the implementation not only implements
-a maximum lock depth, but also only holds at most two different locks at a
-time, as it walks the PI chain.  More about this below.
-
-
-Mutex owner and flags
----------------------
-
-The mutex structure contains a pointer to the owner of the mutex.  If the
-mutex is not owned, this owner is set to NULL.  Since all architectures
-have the task structure on at least a four byte alignment (and if this is
-not true, the rtmutex.c code will be broken!), this allows for the two
-least significant bits to be used as flags.  This part is also described
-in Documentation/rt-mutex.txt, but will also be briefly described here.
-
-Bit 0 is used as the "Pending Owner" flag.  This is described later.
-Bit 1 is used as the "Has Waiters" flags.  This is also described later
-  in more detail, but is set whenever there are waiters on a mutex.
-
-
-cmpxchg Tricks
---------------
-
-Some architectures implement an atomic cmpxchg (Compare and Exchange).  This
-is used (when applicable) to keep the fast path of grabbing and releasing
-mutexes short.
-
-cmpxchg is basically the following function performed atomically:
-
-unsigned long _cmpxchg(unsigned long *A, unsigned long *B, unsigned long *C)
-{
-	unsigned long T = *A;
-	if (*A == *B) {
-		*A = *C;
-	}
-	return T;
-}
-#define cmpxchg(a,b,c) _cmpxchg(&a,&b,&c)
-
-This is really nice to have, since it allows you to only update a variable
-if the variable is what you expect it to be.  You know if it succeeded if
-the return value (the old value of A) is equal to B.
-
-The macro rt_mutex_cmpxchg is used to try to lock and unlock mutexes. If
-the architecture does not support CMPXCHG, then this macro is simply set
-to fail every time.  But if CMPXCHG is supported, then this will
-help out extremely to keep the fast path short.
-
-The use of rt_mutex_cmpxchg with the flags in the owner field help optimize
-the system for architectures that support it.  This will also be explained
-later in this document.
-
-
-Priority adjustments
---------------------
-
-The implementation of the PI code in rtmutex.c has several places that a
-process must adjust its priority.  With the help of the pi_list of a
-process this is rather easy to know what needs to be adjusted.
-
-The functions implementing the task adjustments are rt_mutex_adjust_prio,
-__rt_mutex_adjust_prio (same as the former, but expects the task pi_lock
-to already be taken), rt_mutex_getprio, and rt_mutex_setprio.
-
-rt_mutex_getprio and rt_mutex_setprio are only used in __rt_mutex_adjust_prio.
-
-rt_mutex_getprio returns the priority that the task should have.  Either the
-task's own normal priority, or if a process of a higher priority is waiting on
-a mutex owned by the task, then that higher priority should be returned.
-Since the pi_list of a task holds an order by priority list of all the top
-waiters of all the mutexes that the task owns, rt_mutex_getprio simply needs
-to compare the top pi waiter to its own normal priority, and return the higher
-priority back.
-
-(Note:  if looking at the code, you will notice that the lower number of
-        prio is returned.  This is because the prio field in the task structure
-        is an inverse order of the actual priority.  So a "prio" of 5 is
-        of higher priority than a "prio" of 10.)
-
-__rt_mutex_adjust_prio examines the result of rt_mutex_getprio, and if the
-result does not equal the task's current priority, then rt_mutex_setprio
-is called to adjust the priority of the task to the new priority.
-Note that rt_mutex_setprio is defined in kernel/sched/core.c to implement the
-actual change in priority.
-
-It is interesting to note that __rt_mutex_adjust_prio can either increase
-or decrease the priority of the task.  In the case that a higher priority
-process has just blocked on a mutex owned by the task, __rt_mutex_adjust_prio
-would increase/boost the task's priority.  But if a higher priority task
-were for some reason to leave the mutex (timeout or signal), this same function
-would decrease/unboost the priority of the task.  That is because the pi_list
-always contains the highest priority task that is waiting on a mutex owned
-by the task, so we only need to compare the priority of that top pi waiter
-to the normal priority of the given task.
-
-
-High level overview of the PI chain walk
-----------------------------------------
-
-The PI chain walk is implemented by the function rt_mutex_adjust_prio_chain.
-
-The implementation has gone through several iterations, and has ended up
-with what we believe is the best.  It walks the PI chain by only grabbing
-at most two locks at a time, and is very efficient.
-
-The rt_mutex_adjust_prio_chain can be used either to boost or lower process
-priorities.
-
-rt_mutex_adjust_prio_chain is called with a task to be checked for PI
-(de)boosting (the owner of a mutex that a process is blocking on), a flag to
-check for deadlocking, the mutex that the task owns, and a pointer to a waiter
-that is the process's waiter struct that is blocked on the mutex (although this
-parameter may be NULL for deboosting).
-
-For this explanation, I will not mention deadlock detection. This explanation
-will try to stay at a high level.
-
-When this function is called, there are no locks held.  That also means
-that the state of the owner and lock can change when entered into this function.
-
-Before this function is called, the task has already had rt_mutex_adjust_prio
-performed on it.  This means that the task is set to the priority that it
-should be at, but the plist nodes of the task's waiter have not been updated
-with the new priorities, and that this task may not be in the proper locations
-in the pi_lists and wait_lists that the task is blocked on.  This function
-solves all that.
-
-A loop is entered, where task is the owner to be checked for PI changes that
-was passed by parameter (for the first iteration).  The pi_lock of this task is
-taken to prevent any more changes to the pi_list of the task.  This also
-prevents new tasks from completing the blocking on a mutex that is owned by this
-task.
-
-If the task is not blocked on a mutex then the loop is exited.  We are at
-the top of the PI chain.
-
-A check is now done to see if the original waiter (the process that is blocked
-on the current mutex) is the top pi waiter of the task.  That is, is this
-waiter on the top of the task's pi_list.  If it is not, it either means that
-there is another process higher in priority that is blocked on one of the
-mutexes that the task owns, or that the waiter has just woken up via a signal
-or timeout and has left the PI chain.  In either case, the loop is exited, since
-we don't need to do any more changes to the priority of the current task, or any
-task that owns a mutex that this current task is waiting on.  A priority chain
-walk is only needed when a new top pi waiter is made to a task.
-
-The next check sees if the task's waiter plist node has the priority equal to
-the priority the task is set at.  If they are equal, then we are done with
-the loop.  Remember that the function started with the priority of the
-task adjusted, but the plist nodes that hold the task in other processes
-pi_lists have not been adjusted.
-
-Next, we look at the mutex that the task is blocked on. The mutex's wait_lock
-is taken.  This is done by a spin_trylock, because the locking order of the
-pi_lock and wait_lock goes in the opposite direction. If we fail to grab the
-lock, the pi_lock is released, and we restart the loop.
-
-Now that we have both the pi_lock of the task as well as the wait_lock of
-the mutex the task is blocked on, we update the task's waiter's plist node
-that is located on the mutex's wait_list.
-
-Now we release the pi_lock of the task.
-
-Next the owner of the mutex has its pi_lock taken, so we can update the
-task's entry in the owner's pi_list.  If the task is the highest priority
-process on the mutex's wait_list, then we remove the previous top waiter
-from the owner's pi_list, and replace it with the task.
-
-Note: It is possible that the task was the current top waiter on the mutex,
-      in which case the task is not yet on the pi_list of the waiter.  This
-      is OK, since plist_del does nothing if the plist node is not on any
-      list.
-
-If the task was not the top waiter of the mutex, but it was before we
-did the priority updates, that means we are deboosting/lowering the
-task.  In this case, the task is removed from the pi_list of the owner,
-and the new top waiter is added.
-
-Lastly, we unlock both the pi_lock of the task, as well as the mutex's
-wait_lock, and continue the loop again.  On the next iteration of the
-loop, the previous owner of the mutex will be the task that will be
-processed.
-
-Note: One might think that the owner of this mutex might have changed
-      since we just grab the mutex's wait_lock. And one could be right.
-      The important thing to remember is that the owner could not have
-      become the task that is being processed in the PI chain, since
-      we have taken that task's pi_lock at the beginning of the loop.
-      So as long as there is an owner of this mutex that is not the same
-      process as the tasked being worked on, we are OK.
-
-      Looking closely at the code, one might be confused.  The check for the
-      end of the PI chain is when the task isn't blocked on anything or the
-      task's waiter structure "task" element is NULL.  This check is
-      protected only by the task's pi_lock.  But the code to unlock the mutex
-      sets the task's waiter structure "task" element to NULL with only
-      the protection of the mutex's wait_lock, which was not taken yet.
-      Isn't this a race condition if the task becomes the new owner?
-
-      The answer is No!  The trick is the spin_trylock of the mutex's
-      wait_lock.  If we fail that lock, we release the pi_lock of the
-      task and continue the loop, doing the end of PI chain check again.
-
-      In the code to release the lock, the wait_lock of the mutex is held
-      the entire time, and it is not let go when we grab the pi_lock of the
-      new owner of the mutex.  So if the switch of a new owner were to happen
-      after the check for end of the PI chain and the grabbing of the
-      wait_lock, the unlocking code would spin on the new owner's pi_lock
-      but never give up the wait_lock.  So the PI chain loop is guaranteed to
-      fail the spin_trylock on the wait_lock, release the pi_lock, and
-      try again.
-
-      If you don't quite understand the above, that's OK. You don't have to,
-      unless you really want to make a proof out of it ;)
-
-
-Pending Owners and Lock stealing
---------------------------------
-
-One of the flags in the owner field of the mutex structure is "Pending Owner".
-What this means is that an owner was chosen by the process releasing the
-mutex, but that owner has yet to wake up and actually take the mutex.
-
-Why is this important?  Why can't we just give the mutex to another process
-and be done with it?
-
-The PI code is to help with real-time processes, and to let the highest
-priority process run as long as possible with little latencies and delays.
-If a high priority process owns a mutex that a lower priority process is
-blocked on, when the mutex is released it would be given to the lower priority
-process.  What if the higher priority process wants to take that mutex again.
-The high priority process would fail to take that mutex that it just gave up
-and it would need to boost the lower priority process to run with full
-latency of that critical section (since the low priority process just entered
-it).
-
-There's no reason a high priority process that gives up a mutex should be
-penalized if it tries to take that mutex again.  If the new owner of the
-mutex has not woken up yet, there's no reason that the higher priority process
-could not take that mutex away.
-
-To solve this, we introduced Pending Ownership and Lock Stealing.  When a
-new process is given a mutex that it was blocked on, it is only given
-pending ownership.  This means that it's the new owner, unless a higher
-priority process comes in and tries to grab that mutex.  If a higher priority
-process does come along and wants that mutex, we let the higher priority
-process "steal" the mutex from the pending owner (only if it is still pending)
-and continue with the mutex.
-
-
-Taking of a mutex (The walk through)
-------------------------------------
-
-OK, now let's take a look at the detailed walk through of what happens when
-taking a mutex.
-
-The first thing that is tried is the fast taking of the mutex.  This is
-done when we have CMPXCHG enabled (otherwise the fast taking automatically
-fails).  Only when the owner field of the mutex is NULL can the lock be
-taken with the CMPXCHG and nothing else needs to be done.
-
-If there is contention on the lock, whether it is owned or pending owner
-we go about the slow path (rt_mutex_slowlock).
-
-The slow path function is where the task's waiter structure is created on
-the stack.  This is because the waiter structure is only needed for the
-scope of this function.  The waiter structure holds the nodes to store
-the task on the wait_list of the mutex, and if need be, the pi_list of
-the owner.
-
-The wait_lock of the mutex is taken since the slow path of unlocking the
-mutex also takes this lock.
-
-We then call try_to_take_rt_mutex.  This is where the architecture that
-does not implement CMPXCHG would always grab the lock (if there's no
-contention).
-
-try_to_take_rt_mutex is used every time the task tries to grab a mutex in the
-slow path.  The first thing that is done here is an atomic setting of
-the "Has Waiters" flag of the mutex's owner field.  Yes, this could really
-be false, because if the mutex has no owner, there are no waiters and
-the current task also won't have any waiters.  But we don't have the lock
-yet, so we assume we are going to be a waiter.  The reason for this is to
-play nice for those architectures that do have CMPXCHG.  By setting this flag
-now, the owner of the mutex can't release the mutex without going into the
-slow unlock path, and it would then need to grab the wait_lock, which this
-code currently holds.  So setting the "Has Waiters" flag forces the owner
-to synchronize with this code.
-
-Now that we know that we can't have any races with the owner releasing the
-mutex, we check to see if we can take the ownership.  This is done if the
-mutex doesn't have a owner, or if we can steal the mutex from a pending
-owner.  Let's look at the situations we have here.
-
-  1) Has owner that is pending
-  ----------------------------
-
-  The mutex has a owner, but it hasn't woken up and the mutex flag
-  "Pending Owner" is set.  The first check is to see if the owner isn't the
-  current task.  This is because this function is also used for the pending
-  owner to grab the mutex.  When a pending owner wakes up, it checks to see
-  if it can take the mutex, and this is done if the owner is already set to
-  itself.  If so, we succeed and leave the function, clearing the "Pending
-  Owner" bit.
-
-  If the pending owner is not current, we check to see if the current priority is
-  higher than the pending owner.  If not, we fail the function and return.
-
-  There's also something special about a pending owner.  That is a pending owner
-  is never blocked on a mutex.  So there is no PI chain to worry about.  It also
-  means that if the mutex doesn't have any waiters, there's no accounting needed
-  to update the pending owner's pi_list, since we only worry about processes
-  blocked on the current mutex.
-
-  If there are waiters on this mutex, and we just stole the ownership, we need
-  to take the top waiter, remove it from the pi_list of the pending owner, and
-  add it to the current pi_list.  Note that at this moment, the pending owner
-  is no longer on the list of waiters.  This is fine, since the pending owner
-  would add itself back when it realizes that it had the ownership stolen
-  from itself.  When the pending owner tries to grab the mutex, it will fail
-  in try_to_take_rt_mutex if the owner field points to another process.
-
-  2) No owner
-  -----------
-
-  If there is no owner (or we successfully stole the lock), we set the owner
-  of the mutex to current, and set the flag of "Has Waiters" if the current
-  mutex actually has waiters, or we clear the flag if it doesn't.  See, it was
-  OK that we set that flag early, since now it is cleared.
-
-  3) Failed to grab ownership
-  ---------------------------
-
-  The most interesting case is when we fail to take ownership. This means that
-  there exists an owner, or there's a pending owner with equal or higher
-  priority than the current task.
-
-We'll continue on the failed case.
-
-If the mutex has a timeout, we set up a timer to go off to break us out
-of this mutex if we failed to get it after a specified amount of time.
-
-Now we enter a loop that will continue to try to take ownership of the mutex, or
-fail from a timeout or signal.
-
-Once again we try to take the mutex.  This will usually fail the first time
-in the loop, since it had just failed to get the mutex.  But the second time
-in the loop, this would likely succeed, since the task would likely be
-the pending owner.
-
-If the mutex is TASK_INTERRUPTIBLE a check for signals and timeout is done
-here.
-
-The waiter structure has a "task" field that points to the task that is blocked
-on the mutex.  This field can be NULL the first time it goes through the loop
-or if the task is a pending owner and had its mutex stolen.  If the "task"
-field is NULL then we need to set up the accounting for it.
-
-Task blocks on mutex
---------------------
-
-The accounting of a mutex and process is done with the waiter structure of
-the process.  The "task" field is set to the process, and the "lock" field
-to the mutex.  The plist nodes are initialized to the processes current
-priority.
-
-Since the wait_lock was taken at the entry of the slow lock, we can safely
-add the waiter to the wait_list.  If the current process is the highest
-priority process currently waiting on this mutex, then we remove the
-previous top waiter process (if it exists) from the pi_list of the owner,
-and add the current process to that list.  Since the pi_list of the owner
-has changed, we call rt_mutex_adjust_prio on the owner to see if the owner
-should adjust its priority accordingly.
-
-If the owner is also blocked on a lock, and had its pi_list changed
-(or deadlock checking is on), we unlock the wait_lock of the mutex and go ahead
-and run rt_mutex_adjust_prio_chain on the owner, as described earlier.
-
-Now all locks are released, and if the current process is still blocked on a
-mutex (waiter "task" field is not NULL), then we go to sleep (call schedule).
-
-Waking up in the loop
----------------------
-
-The schedule can then wake up for a few reasons.
-  1) we were given pending ownership of the mutex.
-  2) we received a signal and was TASK_INTERRUPTIBLE
-  3) we had a timeout and was TASK_INTERRUPTIBLE
-
-In any of these cases, we continue the loop and once again try to grab the
-ownership of the mutex.  If we succeed, we exit the loop, otherwise we continue
-and on signal and timeout, will exit the loop, or if we had the mutex stolen
-we just simply add ourselves back on the lists and go back to sleep.
-
-Note: For various reasons, because of timeout and signals, the steal mutex
-      algorithm needs to be careful. This is because the current process is
-      still on the wait_list. And because of dynamic changing of priorities,
-      especially on SCHED_OTHER tasks, the current process can be the
-      highest priority task on the wait_list.
-
-Failed to get mutex on Timeout or Signal
-----------------------------------------
-
-If a timeout or signal occurred, the waiter's "task" field would not be
-NULL and the task needs to be taken off the wait_list of the mutex and perhaps
-pi_list of the owner.  If this process was a high priority process, then
-the rt_mutex_adjust_prio_chain needs to be executed again on the owner,
-but this time it will be lowering the priorities.
-
-
-Unlocking the Mutex
--------------------
-
-The unlocking of a mutex also has a fast path for those architectures with
-CMPXCHG.  Since the taking of a mutex on contention always sets the
-"Has Waiters" flag of the mutex's owner, we use this to know if we need to
-take the slow path when unlocking the mutex.  If the mutex doesn't have any
-waiters, the owner field of the mutex would equal the current process and
-the mutex can be unlocked by just replacing the owner field with NULL.
-
-If the owner field has the "Has Waiters" bit set (or CMPXCHG is not available),
-the slow unlock path is taken.
-
-The first thing done in the slow unlock path is to take the wait_lock of the
-mutex.  This synchronizes the locking and unlocking of the mutex.
-
-A check is made to see if the mutex has waiters or not.  On architectures that
-do not have CMPXCHG, this is the location that the owner of the mutex will
-determine if a waiter needs to be awoken or not.  On architectures that
-do have CMPXCHG, that check is done in the fast path, but it is still needed
-in the slow path too.  If a waiter of a mutex woke up because of a signal
-or timeout between the time the owner failed the fast path CMPXCHG check and
-the grabbing of the wait_lock, the mutex may not have any waiters, thus the
-owner still needs to make this check. If there are no waiters then the mutex
-owner field is set to NULL, the wait_lock is released and nothing more is
-needed.
-
-If there are waiters, then we need to wake one up and give that waiter
-pending ownership.
-
-On the wake up code, the pi_lock of the current owner is taken.  The top
-waiter of the lock is found and removed from the wait_list of the mutex
-as well as the pi_list of the current owner.  The task field of the new
-pending owner's waiter structure is set to NULL, and the owner field of the
-mutex is set to the new owner with the "Pending Owner" bit set, as well
-as the "Has Waiters" bit if there still are other processes blocked on the
-mutex.
-
-The pi_lock of the previous owner is released, and the new pending owner's
-pi_lock is taken.  Remember that this is the trick to prevent the race
-condition in rt_mutex_adjust_prio_chain from adding itself as a waiter
-on the mutex.
-
-We now clear the "pi_blocked_on" field of the new pending owner, and if
-the mutex still has waiters pending, we add the new top waiter to the pi_list
-of the pending owner.
-
-Finally we unlock the pi_lock of the pending owner and wake it up.
-
-
-Contact
--------
-
-For updates on this document, please email Steven Rostedt <rostedt@goodmis.org>
-
-
-Credits
--------
-
-Author:  Steven Rostedt <rostedt@goodmis.org>
-
-Reviewers:  Ingo Molnar, Thomas Gleixner, Thomas Duetsch, and Randy Dunlap
-
-Updates
--------
-
-This document was originally written for 2.6.17-rc3-mm1
diff --git a/Documentation/rt-mutex.txt b/Documentation/rt-mutex.txt
deleted file mode 100644
index 243393d882ee..000000000000
--- a/Documentation/rt-mutex.txt
+++ /dev/null
@@ -1,79 +0,0 @@
-RT-mutex subsystem with PI support
-----------------------------------
-
-RT-mutexes with priority inheritance are used to support PI-futexes,
-which enable pthread_mutex_t priority inheritance attributes
-(PTHREAD_PRIO_INHERIT). [See Documentation/pi-futex.txt for more details
-about PI-futexes.]
-
-This technology was developed in the -rt tree and streamlined for
-pthread_mutex support.
-
-Basic principles:
------------------
-
-RT-mutexes extend the semantics of simple mutexes by the priority
-inheritance protocol.
-
-A low priority owner of a rt-mutex inherits the priority of a higher
-priority waiter until the rt-mutex is released. If the temporarily
-boosted owner blocks on a rt-mutex itself it propagates the priority
-boosting to the owner of the other rt_mutex it gets blocked on. The
-priority boosting is immediately removed once the rt_mutex has been
-unlocked.
-
-This approach allows us to shorten the block of high-prio tasks on
-mutexes which protect shared resources. Priority inheritance is not a
-magic bullet for poorly designed applications, but it allows
-well-designed applications to use userspace locks in critical parts of
-an high priority thread, without losing determinism.
-
-The enqueueing of the waiters into the rtmutex waiter list is done in
-priority order. For same priorities FIFO order is chosen. For each
-rtmutex, only the top priority waiter is enqueued into the owner's
-priority waiters list. This list too queues in priority order. Whenever
-the top priority waiter of a task changes (for example it timed out or
-got a signal), the priority of the owner task is readjusted. [The
-priority enqueueing is handled by "plists", see include/linux/plist.h
-for more details.]
-
-RT-mutexes are optimized for fastpath operations and have no internal
-locking overhead when locking an uncontended mutex or unlocking a mutex
-without waiters. The optimized fastpath operations require cmpxchg
-support. [If that is not available then the rt-mutex internal spinlock
-is used]
-
-The state of the rt-mutex is tracked via the owner field of the rt-mutex
-structure:
-
-rt_mutex->owner holds the task_struct pointer of the owner. Bit 0 and 1
-are used to keep track of the "owner is pending" and "rtmutex has
-waiters" state.
-
- owner		bit1	bit0
- NULL		0	0	mutex is free (fast acquire possible)
- NULL		0	1	invalid state
- NULL		1	0	Transitional state*
- NULL		1	1	invalid state
- taskpointer	0	0	mutex is held (fast release possible)
- taskpointer	0	1	task is pending owner
- taskpointer	1	0	mutex is held and has waiters
- taskpointer	1	1	task is pending owner and mutex has waiters
-
-Pending-ownership handling is a performance optimization:
-pending-ownership is assigned to the first (highest priority) waiter of
-the mutex, when the mutex is released. The thread is woken up and once
-it starts executing it can acquire the mutex. Until the mutex is taken
-by it (bit 0 is cleared) a competing higher priority thread can "steal"
-the mutex which puts the woken up thread back on the waiters list.
-
-The pending-ownership optimization is especially important for the
-uninterrupted workflow of high-prio tasks which repeatedly
-takes/releases locks that have lower-prio waiters. Without this
-optimization the higher-prio thread would ping-pong to the lower-prio
-task [because at unlock time we always assign a new owner].
-
-(*) The "mutex has waiters" bit gets set to take the lock. If the lock
-doesn't already have an owner, this bit is quickly cleared if there are
-no waiters.  So this is a transitional state to synchronize with looking
-at the owner field of the mutex and the mutex owner releasing the lock.
diff --git a/Documentation/spinlocks.txt b/Documentation/spinlocks.txt
deleted file mode 100644
index 97eaf5727178..000000000000
--- a/Documentation/spinlocks.txt
+++ /dev/null
@@ -1,167 +0,0 @@
-Lesson 1: Spin locks
-
-The most basic primitive for locking is spinlock.
-
-static DEFINE_SPINLOCK(xxx_lock);
-
-	unsigned long flags;
-
-	spin_lock_irqsave(&xxx_lock, flags);
-	... critical section here ..
-	spin_unlock_irqrestore(&xxx_lock, flags);
-
-The above is always safe. It will disable interrupts _locally_, but the
-spinlock itself will guarantee the global lock, so it will guarantee that
-there is only one thread-of-control within the region(s) protected by that
-lock. This works well even under UP also, so the code does _not_ need to
-worry about UP vs SMP issues: the spinlocks work correctly under both.
-
-   NOTE! Implications of spin_locks for memory are further described in:
-
-     Documentation/memory-barriers.txt
-       (5) LOCK operations.
-       (6) UNLOCK operations.
-
-The above is usually pretty simple (you usually need and want only one
-spinlock for most things - using more than one spinlock can make things a
-lot more complex and even slower and is usually worth it only for
-sequences that you _know_ need to be split up: avoid it at all cost if you
-aren't sure).
-
-This is really the only really hard part about spinlocks: once you start
-using spinlocks they tend to expand to areas you might not have noticed
-before, because you have to make sure the spinlocks correctly protect the
-shared data structures _everywhere_ they are used. The spinlocks are most
-easily added to places that are completely independent of other code (for
-example, internal driver data structures that nobody else ever touches).
-
-   NOTE! The spin-lock is safe only when you _also_ use the lock itself
-   to do locking across CPU's, which implies that EVERYTHING that
-   touches a shared variable has to agree about the spinlock they want
-   to use.
-
-----
-
-Lesson 2: reader-writer spinlocks.
-
-If your data accesses have a very natural pattern where you usually tend
-to mostly read from the shared variables, the reader-writer locks
-(rw_lock) versions of the spinlocks are sometimes useful. They allow multiple
-readers to be in the same critical region at once, but if somebody wants
-to change the variables it has to get an exclusive write lock.
-
-   NOTE! reader-writer locks require more atomic memory operations than
-   simple spinlocks.  Unless the reader critical section is long, you
-   are better off just using spinlocks.
-
-The routines look the same as above:
-
-   rwlock_t xxx_lock = __RW_LOCK_UNLOCKED(xxx_lock);
-
-	unsigned long flags;
-
-	read_lock_irqsave(&xxx_lock, flags);
-	.. critical section that only reads the info ...
-	read_unlock_irqrestore(&xxx_lock, flags);
-
-	write_lock_irqsave(&xxx_lock, flags);
-	.. read and write exclusive access to the info ...
-	write_unlock_irqrestore(&xxx_lock, flags);
-
-The above kind of lock may be useful for complex data structures like
-linked lists, especially searching for entries without changing the list
-itself.  The read lock allows many concurrent readers.  Anything that
-_changes_ the list will have to get the write lock.
-
-   NOTE! RCU is better for list traversal, but requires careful
-   attention to design detail (see Documentation/RCU/listRCU.txt).
-
-Also, you cannot "upgrade" a read-lock to a write-lock, so if you at _any_
-time need to do any changes (even if you don't do it every time), you have
-to get the write-lock at the very beginning.
-
-   NOTE! We are working hard to remove reader-writer spinlocks in most
-   cases, so please don't add a new one without consensus.  (Instead, see
-   Documentation/RCU/rcu.txt for complete information.)
-
-----
-
-Lesson 3: spinlocks revisited.
-
-The single spin-lock primitives above are by no means the only ones. They
-are the most safe ones, and the ones that work under all circumstances,
-but partly _because_ they are safe they are also fairly slow. They are slower
-than they'd need to be, because they do have to disable interrupts
-(which is just a single instruction on a x86, but it's an expensive one -
-and on other architectures it can be worse).
-
-If you have a case where you have to protect a data structure across
-several CPU's and you want to use spinlocks you can potentially use
-cheaper versions of the spinlocks. IFF you know that the spinlocks are
-never used in interrupt handlers, you can use the non-irq versions:
-
-	spin_lock(&lock);
-	...
-	spin_unlock(&lock);
-
-(and the equivalent read-write versions too, of course). The spinlock will
-guarantee the same kind of exclusive access, and it will be much faster. 
-This is useful if you know that the data in question is only ever
-manipulated from a "process context", ie no interrupts involved. 
-
-The reasons you mustn't use these versions if you have interrupts that
-play with the spinlock is that you can get deadlocks:
-
-	spin_lock(&lock);
-	...
-		<- interrupt comes in:
-			spin_lock(&lock);
-
-where an interrupt tries to lock an already locked variable. This is ok if
-the other interrupt happens on another CPU, but it is _not_ ok if the
-interrupt happens on the same CPU that already holds the lock, because the
-lock will obviously never be released (because the interrupt is waiting
-for the lock, and the lock-holder is interrupted by the interrupt and will
-not continue until the interrupt has been processed). 
-
-(This is also the reason why the irq-versions of the spinlocks only need
-to disable the _local_ interrupts - it's ok to use spinlocks in interrupts
-on other CPU's, because an interrupt on another CPU doesn't interrupt the
-CPU that holds the lock, so the lock-holder can continue and eventually
-releases the lock). 
-
-Note that you can be clever with read-write locks and interrupts. For
-example, if you know that the interrupt only ever gets a read-lock, then
-you can use a non-irq version of read locks everywhere - because they
-don't block on each other (and thus there is no dead-lock wrt interrupts. 
-But when you do the write-lock, you have to use the irq-safe version. 
-
-For an example of being clever with rw-locks, see the "waitqueue_lock" 
-handling in kernel/sched/core.c - nothing ever _changes_ a wait-queue from
-within an interrupt, they only read the queue in order to know whom to
-wake up. So read-locks are safe (which is good: they are very common
-indeed), while write-locks need to protect themselves against interrupts.
-
-		Linus
-
-----
-
-Reference information:
-
-For dynamic initialization, use spin_lock_init() or rwlock_init() as
-appropriate:
-
-   spinlock_t xxx_lock;
-   rwlock_t xxx_rw_lock;
-
-   static int __init xxx_init(void)
-   {
-	spin_lock_init(&xxx_lock);
-	rwlock_init(&xxx_rw_lock);
-	...
-   }
-
-   module_init(xxx_init);
-
-For static initialization, use DEFINE_SPINLOCK() / DEFINE_RWLOCK() or
-__SPIN_LOCK_UNLOCKED() / __RW_LOCK_UNLOCKED() as appropriate.
diff --git a/Documentation/ww-mutex-design.txt b/Documentation/ww-mutex-design.txt
deleted file mode 100644
index 8a112dc304c3..000000000000
--- a/Documentation/ww-mutex-design.txt
+++ /dev/null
@@ -1,344 +0,0 @@
-Wait/Wound Deadlock-Proof Mutex Design
-======================================
-
-Please read mutex-design.txt first, as it applies to wait/wound mutexes too.
-
-Motivation for WW-Mutexes
--------------------------
-
-GPU's do operations that commonly involve many buffers.  Those buffers
-can be shared across contexts/processes, exist in different memory
-domains (for example VRAM vs system memory), and so on.  And with
-PRIME / dmabuf, they can even be shared across devices.  So there are
-a handful of situations where the driver needs to wait for buffers to
-become ready.  If you think about this in terms of waiting on a buffer
-mutex for it to become available, this presents a problem because
-there is no way to guarantee that buffers appear in a execbuf/batch in
-the same order in all contexts.  That is directly under control of
-userspace, and a result of the sequence of GL calls that an application
-makes.	Which results in the potential for deadlock.  The problem gets
-more complex when you consider that the kernel may need to migrate the
-buffer(s) into VRAM before the GPU operates on the buffer(s), which
-may in turn require evicting some other buffers (and you don't want to
-evict other buffers which are already queued up to the GPU), but for a
-simplified understanding of the problem you can ignore this.
-
-The algorithm that the TTM graphics subsystem came up with for dealing with
-this problem is quite simple.  For each group of buffers (execbuf) that need
-to be locked, the caller would be assigned a unique reservation id/ticket,
-from a global counter.  In case of deadlock while locking all the buffers
-associated with a execbuf, the one with the lowest reservation ticket (i.e.
-the oldest task) wins, and the one with the higher reservation id (i.e. the
-younger task) unlocks all of the buffers that it has already locked, and then
-tries again.
-
-In the RDBMS literature this deadlock handling approach is called wait/wound:
-The older tasks waits until it can acquire the contended lock. The younger tasks
-needs to back off and drop all the locks it is currently holding, i.e. the
-younger task is wounded.
-
-Concepts
---------
-
-Compared to normal mutexes two additional concepts/objects show up in the lock
-interface for w/w mutexes:
-
-Acquire context: To ensure eventual forward progress it is important the a task
-trying to acquire locks doesn't grab a new reservation id, but keeps the one it
-acquired when starting the lock acquisition. This ticket is stored in the
-acquire context. Furthermore the acquire context keeps track of debugging state
-to catch w/w mutex interface abuse.
-
-W/w class: In contrast to normal mutexes the lock class needs to be explicit for
-w/w mutexes, since it is required to initialize the acquire context.
-
-Furthermore there are three different class of w/w lock acquire functions:
-
-* Normal lock acquisition with a context, using ww_mutex_lock.
-
-* Slowpath lock acquisition on the contending lock, used by the wounded task
-  after having dropped all already acquired locks. These functions have the
-  _slow postfix.
-
-  From a simple semantics point-of-view the _slow functions are not strictly
-  required, since simply calling the normal ww_mutex_lock functions on the
-  contending lock (after having dropped all other already acquired locks) will
-  work correctly. After all if no other ww mutex has been acquired yet there's
-  no deadlock potential and hence the ww_mutex_lock call will block and not
-  prematurely return -EDEADLK. The advantage of the _slow functions is in
-  interface safety:
-  - ww_mutex_lock has a __must_check int return type, whereas ww_mutex_lock_slow
-    has a void return type. Note that since ww mutex code needs loops/retries
-    anyway the __must_check doesn't result in spurious warnings, even though the
-    very first lock operation can never fail.
-  - When full debugging is enabled ww_mutex_lock_slow checks that all acquired
-    ww mutex have been released (preventing deadlocks) and makes sure that we
-    block on the contending lock (preventing spinning through the -EDEADLK
-    slowpath until the contended lock can be acquired).
-
-* Functions to only acquire a single w/w mutex, which results in the exact same
-  semantics as a normal mutex. This is done by calling ww_mutex_lock with a NULL
-  context.
-
-  Again this is not strictly required. But often you only want to acquire a
-  single lock in which case it's pointless to set up an acquire context (and so
-  better to avoid grabbing a deadlock avoidance ticket).
-
-Of course, all the usual variants for handling wake-ups due to signals are also
-provided.
-
-Usage
------
-
-Three different ways to acquire locks within the same w/w class. Common
-definitions for methods #1 and #2:
-
-static DEFINE_WW_CLASS(ww_class);
-
-struct obj {
-	struct ww_mutex lock;
-	/* obj data */
-};
-
-struct obj_entry {
-	struct list_head head;
-	struct obj *obj;
-};
-
-Method 1, using a list in execbuf->buffers that's not allowed to be reordered.
-This is useful if a list of required objects is already tracked somewhere.
-Furthermore the lock helper can use propagate the -EALREADY return code back to
-the caller as a signal that an object is twice on the list. This is useful if
-the list is constructed from userspace input and the ABI requires userspace to
-not have duplicate entries (e.g. for a gpu commandbuffer submission ioctl).
-
-int lock_objs(struct list_head *list, struct ww_acquire_ctx *ctx)
-{
-	struct obj *res_obj = NULL;
-	struct obj_entry *contended_entry = NULL;
-	struct obj_entry *entry;
-
-	ww_acquire_init(ctx, &ww_class);
-
-retry:
-	list_for_each_entry (entry, list, head) {
-		if (entry->obj == res_obj) {
-			res_obj = NULL;
-			continue;
-		}
-		ret = ww_mutex_lock(&entry->obj->lock, ctx);
-		if (ret < 0) {
-			contended_entry = entry;
-			goto err;
-		}
-	}
-
-	ww_acquire_done(ctx);
-	return 0;
-
-err:
-	list_for_each_entry_continue_reverse (entry, list, head)
-		ww_mutex_unlock(&entry->obj->lock);
-
-	if (res_obj)
-		ww_mutex_unlock(&res_obj->lock);
-
-	if (ret == -EDEADLK) {
-		/* we lost out in a seqno race, lock and retry.. */
-		ww_mutex_lock_slow(&contended_entry->obj->lock, ctx);
-		res_obj = contended_entry->obj;
-		goto retry;
-	}
-	ww_acquire_fini(ctx);
-
-	return ret;
-}
-
-Method 2, using a list in execbuf->buffers that can be reordered. Same semantics
-of duplicate entry detection using -EALREADY as method 1 above. But the
-list-reordering allows for a bit more idiomatic code.
-
-int lock_objs(struct list_head *list, struct ww_acquire_ctx *ctx)
-{
-	struct obj_entry *entry, *entry2;
-
-	ww_acquire_init(ctx, &ww_class);
-
-	list_for_each_entry (entry, list, head) {
-		ret = ww_mutex_lock(&entry->obj->lock, ctx);
-		if (ret < 0) {
-			entry2 = entry;
-
-			list_for_each_entry_continue_reverse (entry2, list, head)
-				ww_mutex_unlock(&entry2->obj->lock);
-
-			if (ret != -EDEADLK) {
-				ww_acquire_fini(ctx);
-				return ret;
-			}
-
-			/* we lost out in a seqno race, lock and retry.. */
-			ww_mutex_lock_slow(&entry->obj->lock, ctx);
-
-			/*
-			 * Move buf to head of the list, this will point
-			 * buf->next to the first unlocked entry,
-			 * restarting the for loop.
-			 */
-			list_del(&entry->head);
-			list_add(&entry->head, list);
-		}
-	}
-
-	ww_acquire_done(ctx);
-	return 0;
-}
-
-Unlocking works the same way for both methods #1 and #2:
-
-void unlock_objs(struct list_head *list, struct ww_acquire_ctx *ctx)
-{
-	struct obj_entry *entry;
-
-	list_for_each_entry (entry, list, head)
-		ww_mutex_unlock(&entry->obj->lock);
-
-	ww_acquire_fini(ctx);
-}
-
-Method 3 is useful if the list of objects is constructed ad-hoc and not upfront,
-e.g. when adjusting edges in a graph where each node has its own ww_mutex lock,
-and edges can only be changed when holding the locks of all involved nodes. w/w
-mutexes are a natural fit for such a case for two reasons:
-- They can handle lock-acquisition in any order which allows us to start walking
-  a graph from a starting point and then iteratively discovering new edges and
-  locking down the nodes those edges connect to.
-- Due to the -EALREADY return code signalling that a given objects is already
-  held there's no need for additional book-keeping to break cycles in the graph
-  or keep track off which looks are already held (when using more than one node
-  as a starting point).
-
-Note that this approach differs in two important ways from the above methods:
-- Since the list of objects is dynamically constructed (and might very well be
-  different when retrying due to hitting the -EDEADLK wound condition) there's
-  no need to keep any object on a persistent list when it's not locked. We can
-  therefore move the list_head into the object itself.
-- On the other hand the dynamic object list construction also means that the -EALREADY return
-  code can't be propagated.
-
-Note also that methods #1 and #2 and method #3 can be combined, e.g. to first lock a
-list of starting nodes (passed in from userspace) using one of the above
-methods. And then lock any additional objects affected by the operations using
-method #3 below. The backoff/retry procedure will be a bit more involved, since
-when the dynamic locking step hits -EDEADLK we also need to unlock all the
-objects acquired with the fixed list. But the w/w mutex debug checks will catch
-any interface misuse for these cases.
-
-Also, method 3 can't fail the lock acquisition step since it doesn't return
--EALREADY. Of course this would be different when using the _interruptible
-variants, but that's outside of the scope of these examples here.
-
-struct obj {
-	struct ww_mutex ww_mutex;
-	struct list_head locked_list;
-};
-
-static DEFINE_WW_CLASS(ww_class);
-
-void __unlock_objs(struct list_head *list)
-{
-	struct obj *entry, *temp;
-
-	list_for_each_entry_safe (entry, temp, list, locked_list) {
-		/* need to do that before unlocking, since only the current lock holder is
-		allowed to use object */
-		list_del(&entry->locked_list);
-		ww_mutex_unlock(entry->ww_mutex)
-	}
-}
-
-void lock_objs(struct list_head *list, struct ww_acquire_ctx *ctx)
-{
-	struct obj *obj;
-
-	ww_acquire_init(ctx, &ww_class);
-
-retry:
-	/* re-init loop start state */
-	loop {
-		/* magic code which walks over a graph and decides which objects
-		 * to lock */
-
-		ret = ww_mutex_lock(obj->ww_mutex, ctx);
-		if (ret == -EALREADY) {
-			/* we have that one already, get to the next object */
-			continue;
-		}
-		if (ret == -EDEADLK) {
-			__unlock_objs(list);
-
-			ww_mutex_lock_slow(obj, ctx);
-			list_add(&entry->locked_list, list);
-			goto retry;
-		}
-
-		/* locked a new object, add it to the list */
-		list_add_tail(&entry->locked_list, list);
-	}
-
-	ww_acquire_done(ctx);
-	return 0;
-}
-
-void unlock_objs(struct list_head *list, struct ww_acquire_ctx *ctx)
-{
-	__unlock_objs(list);
-	ww_acquire_fini(ctx);
-}
-
-Method 4: Only lock one single objects. In that case deadlock detection and
-prevention is obviously overkill, since with grabbing just one lock you can't
-produce a deadlock within just one class. To simplify this case the w/w mutex
-api can be used with a NULL context.
-
-Implementation Details
-----------------------
-
-Design:
-  ww_mutex currently encapsulates a struct mutex, this means no extra overhead for
-  normal mutex locks, which are far more common. As such there is only a small
-  increase in code size if wait/wound mutexes are not used.
-
-  In general, not much contention is expected. The locks are typically used to
-  serialize access to resources for devices. The only way to make wakeups
-  smarter would be at the cost of adding a field to struct mutex_waiter. This
-  would add overhead to all cases where normal mutexes are used, and
-  ww_mutexes are generally less performance sensitive.
-
-Lockdep:
-  Special care has been taken to warn for as many cases of api abuse
-  as possible. Some common api abuses will be caught with
-  CONFIG_DEBUG_MUTEXES, but CONFIG_PROVE_LOCKING is recommended.
-
-  Some of the errors which will be warned about:
-   - Forgetting to call ww_acquire_fini or ww_acquire_init.
-   - Attempting to lock more mutexes after ww_acquire_done.
-   - Attempting to lock the wrong mutex after -EDEADLK and
-     unlocking all mutexes.
-   - Attempting to lock the right mutex after -EDEADLK,
-     before unlocking all mutexes.
-
-   - Calling ww_mutex_lock_slow before -EDEADLK was returned.
-
-   - Unlocking mutexes with the wrong unlock function.
-   - Calling one of the ww_acquire_* twice on the same context.
-   - Using a different ww_class for the mutex than for the ww_acquire_ctx.
-   - Normal lockdep errors that can result in deadlocks.
-
-  Some of the lockdep errors that can result in deadlocks:
-   - Calling ww_acquire_init to initialize a second ww_acquire_ctx before
-     having called ww_acquire_fini on the first.
-   - 'normal' deadlocks that can occur.
-
-FIXME: Update this section once we have the TASK_DEADLOCK task state flag magic
-implemented.
diff --git a/MAINTAINERS b/MAINTAINERS
index 1acc624ecfd7..aac481fcbf5c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5523,8 +5523,8 @@ M:	Ingo Molnar <mingo@redhat.com>
 L:	linux-kernel@vger.kernel.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git core/locking
 S:	Maintained
-F:	Documentation/lockdep*.txt
-F:	Documentation/lockstat.txt
+F:	Documentation/locking/lockdep*.txt
+F:	Documentation/locking/lockstat.txt
 F:	include/linux/lockdep.h
 F:	kernel/locking/
 
diff --git a/drivers/gpu/drm/drm_modeset_lock.c b/drivers/gpu/drm/drm_modeset_lock.c
index 0dc57d5ecd10..3a02e5e3e9f3 100644
--- a/drivers/gpu/drm/drm_modeset_lock.c
+++ b/drivers/gpu/drm/drm_modeset_lock.c
@@ -35,7 +35,7 @@
  * of extra utility/tracking out of our acquire-ctx.  This is provided
  * by drm_modeset_lock / drm_modeset_acquire_ctx.
  *
- * For basic principles of ww_mutex, see: Documentation/ww-mutex-design.txt
+ * For basic principles of ww_mutex, see: Documentation/locking/ww-mutex-design.txt
  *
  * The basic usage pattern is to:
  *
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 008388f920d7..f388481201cd 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -4,7 +4,7 @@
  *  Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
  *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
  *
- * see Documentation/lockdep-design.txt for more details.
+ * see Documentation/locking/lockdep-design.txt for more details.
  */
 #ifndef __LINUX_LOCKDEP_H
 #define __LINUX_LOCKDEP_H
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index e4c29418f407..cc31498fc526 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -133,7 +133,7 @@ static inline int mutex_is_locked(struct mutex *lock)
 
 /*
  * See kernel/locking/mutex.c for detailed documentation of these APIs.
- * Also see Documentation/mutex-design.txt.
+ * Also see Documentation/locking/mutex-design.txt.
  */
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 extern void mutex_lock_nested(struct mutex *lock, unsigned int subclass);
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index 035d3c57fc8a..8f498cdde280 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -149,7 +149,7 @@ extern void downgrade_write(struct rw_semaphore *sem);
  * static then another method for expressing nested locking is
  * the explicit definition of lock class keys and the use of
  * lockdep_set_class() at lock initialization time.
- * See Documentation/lockdep-design.txt for more details.)
+ * See Documentation/locking/lockdep-design.txt for more details.)
  */
 extern void down_read_nested(struct rw_semaphore *sem, int subclass);
 extern void down_write_nested(struct rw_semaphore *sem, int subclass);
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 0d8b6ed93874..dadbf88c22c4 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -15,7 +15,7 @@
  *    by Steven Rostedt, based on work by Gregory Haskins, Peter Morreale
  *    and Sven Dietrich.
  *
- * Also see Documentation/mutex-design.txt.
+ * Also see Documentation/locking/mutex-design.txt.
  */
 #include <linux/mutex.h>
 #include <linux/ww_mutex.h>
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index a0ea2a141b3b..7c98873a3077 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -8,7 +8,7 @@
  *  Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
  *  Copyright (C) 2006 Esben Nielsen
  *
- *  See Documentation/rt-mutex-design.txt for details.
+ *  See Documentation/locking/rt-mutex-design.txt for details.
  */
 #include <linux/spinlock.h>
 #include <linux/export.h>
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 901096d31c66..9b94a063e26c 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -924,7 +924,7 @@ config PROVE_LOCKING
 	 the proof of observed correctness is also maintained for an
 	 arbitrary combination of these separate locking variants.
 
-	 For more details, see Documentation/lockdep-design.txt.
+	 For more details, see Documentation/locking/lockdep-design.txt.
 
 config LOCKDEP
 	bool
@@ -945,7 +945,7 @@ config LOCK_STAT
 	help
 	 This feature enables tracking lock contention points
 
-	 For more details, see Documentation/lockstat.txt
+	 For more details, see Documentation/locking/lockstat.txt
 
 	 This also enables lock events required by "perf lock",
 	 subcommand of perf.
-- 
cgit v1.2.3


From 4999201a59ef555f9105d2bb2459ed895627f7aa Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Fri, 8 Aug 2014 12:35:36 +0200
Subject: locking/spinlocks: Always evaluate the second argument of
 spin_lock_nested()

Evaluating a macro argument only if certain configuration options
have been selected is confusing and error-prone. Hence always
evaluate the second argument of spin_lock_nested().

An intentional side effect of this patch is that it avoids that
the following warning is reported for netif_addr_lock_nested()
when building with CONFIG_DEBUG_LOCK_ALLOC=n and with W=1:

  include/linux/netdevice.h: In function 'netif_addr_lock_nested':
  include/linux/netdevice.h:2865:6: warning: variable 'subclass' set but not used [-Wunused-but-set-variable]
    int subclass = SINGLE_DEPTH_NESTING;
        ^

Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: David Rientjes <rientjes@google.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/53E4A7F8.1040700@acm.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/spinlock.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index 3f2867ff0ced..262ba4ef9a8e 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -197,7 +197,13 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock)
 		 _raw_spin_lock_nest_lock(lock, &(nest_lock)->dep_map);	\
 	 } while (0)
 #else
-# define raw_spin_lock_nested(lock, subclass)		_raw_spin_lock(lock)
+/*
+ * Always evaluate the 'subclass' argument to avoid that the compiler
+ * warns about set-but-not-used variables when building with
+ * CONFIG_DEBUG_LOCK_ALLOC=n and with W=1.
+ */
+# define raw_spin_lock_nested(lock, subclass)		\
+	_raw_spin_lock(((void)(subclass), (lock)))
 # define raw_spin_lock_nest_lock(lock, nest_lock)	_raw_spin_lock(lock)
 #endif
 
-- 
cgit v1.2.3


From f0bab73cb539fb803c4d419951e8d28aa4964f8f Mon Sep 17 00:00:00 2001
From: Waiman Long <Waiman.Long@hp.com>
Date: Wed, 6 Aug 2014 13:22:01 -0400
Subject: locking/lockdep: Restrict the use of recursive read_lock() with
 qrwlock

Unlike the original unfair rwlock implementation, queued rwlock
will grant lock according to the chronological sequence of the lock
requests except when the lock requester is in the interrupt context.
Consequently, recursive read_lock calls will now hang the process if
there is a write_lock call somewhere in between the read_lock calls.

This patch updates the lockdep implementation to look for recursive
read_lock calls. A new read state (3) is used to mark those read_lock
call that cannot be recursively called except in the interrupt
context. The new read state does exhaust the 2 bits available in
held_lock:read bit field. The addition of any new read state in the
future may require a redesign of how all those bits are squeezed
together in the held_lock structure.

Signed-off-by: Waiman Long <Waiman.Long@hp.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Maarten Lankhorst <maarten.lankhorst@canonical.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Scott J Norton <scott.norton@hp.com>
Cc: Fengguang Wu <fengguang.wu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/1407345722-61615-2-git-send-email-Waiman.Long@hp.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/lockdep.h  | 10 +++++++++-
 kernel/locking/lockdep.c |  6 ++++++
 2 files changed, 15 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index f388481201cd..b5a84b62fb84 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -478,16 +478,24 @@ static inline void print_irqtrace_events(struct task_struct *curr)
  * on the per lock-class debug mode:
  */
 
+/*
+ * Read states in the 2-bit held_lock:read field:
+ *  0: Exclusive lock
+ *  1: Shareable lock, cannot be recursively called
+ *  2: Shareable lock, can be recursively called
+ *  3: Shareable lock, cannot be recursively called except in interrupt context
+ */
 #define lock_acquire_exclusive(l, s, t, n, i)		lock_acquire(l, s, t, 0, 1, n, i)
 #define lock_acquire_shared(l, s, t, n, i)		lock_acquire(l, s, t, 1, 1, n, i)
 #define lock_acquire_shared_recursive(l, s, t, n, i)	lock_acquire(l, s, t, 2, 1, n, i)
+#define lock_acquire_shared_irecursive(l, s, t, n, i)	lock_acquire(l, s, t, 3, 1, n, i)
 
 #define spin_acquire(l, s, t, i)		lock_acquire_exclusive(l, s, t, NULL, i)
 #define spin_acquire_nest(l, s, t, n, i)	lock_acquire_exclusive(l, s, t, n, i)
 #define spin_release(l, n, i)			lock_release(l, n, i)
 
 #define rwlock_acquire(l, s, t, i)		lock_acquire_exclusive(l, s, t, NULL, i)
-#define rwlock_acquire_read(l, s, t, i)		lock_acquire_shared_recursive(l, s, t, NULL, i)
+#define rwlock_acquire_read(l, s, t, i)		lock_acquire_shared_irecursive(l, s, t, NULL, i)
 #define rwlock_release(l, n, i)			lock_release(l, n, i)
 
 #define seqcount_acquire(l, s, t, i)		lock_acquire_exclusive(l, s, t, NULL, i)
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 88d0d4420ad2..420ba685c4e5 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -3597,6 +3597,12 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
 	raw_local_irq_save(flags);
 	check_flags(flags);
 
+	/*
+	 * An interrupt recursive read in interrupt context can be considered
+	 * to be the same as a recursive read from checking perspective.
+	 */
+	if ((read == 3) && in_interrupt())
+		read = 2;
 	current->lockdep_recursion = 1;
 	trace_lock_acquire(lock, subclass, trylock, read, check, nest_lock, ip);
 	__lock_acquire(lock, subclass, trylock, read, check,
-- 
cgit v1.2.3


From 2c67568903d6ae1b8cfa343c649029180239418e Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Fri, 8 Aug 2014 13:02:36 +0200
Subject: spi: Add missing kerneldoc bits

These are all arguments or fields that got added without updating the
kerneldoc comments.

Signed-off-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 drivers/spi/spi.c       | 1 +
 include/linux/spi/spi.h | 7 +++++++
 2 files changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index e6f076d5ffd5..f52f3e647ef8 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -843,6 +843,7 @@ out:
 
 /**
  * spi_finalize_current_transfer - report completion of a transfer
+ * @master: the master reporting completion
  *
  * Called by SPI drivers using the core transfer_one_message()
  * implementation to notify it that the current interrupt driven
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index e713543336f1..46d188a9947c 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -253,6 +253,7 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv)
  *	the device whose settings are being modified.
  * @transfer: adds a message to the controller's transfer queue.
  * @cleanup: frees controller-specific state
+ * @can_dma: determine whether this master supports DMA
  * @queued: whether this master is providing an internal message queue
  * @kworker: thread struct for message pump
  * @kworker_task: pointer to task for message pump kworker thread
@@ -262,6 +263,7 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv)
  * @cur_msg: the currently in-flight message
  * @cur_msg_prepared: spi_prepare_message was called for the currently
  *                    in-flight message
+ * @cur_msg_mapped: message has been mapped for DMA
  * @xfer_completion: used by core transfer_one_message()
  * @busy: message pump is busy
  * @running: message pump is running
@@ -299,6 +301,10 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv)
  * @cs_gpios: Array of GPIOs to use as chip select lines; one per CS
  *	number. Any individual value may be -ENOENT for CS lines that
  *	are not GPIOs (driven by the SPI controller itself).
+ * @dma_tx: DMA transmit channel
+ * @dma_rx: DMA receive channel
+ * @dummy_rx: dummy receive buffer for full-duplex devices
+ * @dummy_tx: dummy transmit buffer for full-duplex devices
  *
  * Each SPI master controller can communicate with one or more @spi_device
  * children.  These make a small bus, sharing MOSI, MISO and SCK signals
@@ -632,6 +638,7 @@ struct spi_transfer {
  *	addresses for each transfer buffer
  * @complete: called to report transaction completions
  * @context: the argument to complete() when it's called
+ * @frame_length: the total number of bytes in the message
  * @actual_length: the total number of bytes that were transferred in all
  *	successful segments
  * @status: zero for success, else negative errno
-- 
cgit v1.2.3


From 5300fdcb7b7e97d83033bc7196582705524d35ea Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Wed, 13 Aug 2014 16:38:29 +0200
Subject: rhashtable: RCU annotations for next pointers

Properly annotate next pointers as access is RCU protected in
the lookup path.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h | 4 ++--
 lib/rhashtable.c           | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 9cda293c867d..8c6048e77f29 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -21,7 +21,7 @@
 #include <linux/rculist.h>
 
 struct rhash_head {
-	struct rhash_head		*next;
+	struct rhash_head __rcu		*next;
 };
 
 #define INIT_HASH_HEAD(ptr) ((ptr)->next = NULL)
@@ -97,7 +97,7 @@ u32 rhashtable_obj_hashfn(const struct rhashtable *ht, void *ptr);
 void rhashtable_insert(struct rhashtable *ht, struct rhash_head *node, gfp_t);
 bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *node, gfp_t);
 void rhashtable_remove_pprev(struct rhashtable *ht, struct rhash_head *obj,
-			     struct rhash_head **pprev, gfp_t flags);
+			     struct rhash_head __rcu **pprev, gfp_t flags);
 
 bool rht_grow_above_75(const struct rhashtable *ht, size_t new_size);
 bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size);
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index e6940cf16628..338dd7aa5e13 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -386,7 +386,7 @@ EXPORT_SYMBOL_GPL(rhashtable_insert);
  * deletion when combined with walking or lookup.
  */
 void rhashtable_remove_pprev(struct rhashtable *ht, struct rhash_head *obj,
-			     struct rhash_head **pprev, gfp_t flags)
+			     struct rhash_head __rcu **pprev, gfp_t flags)
 {
 	struct bucket_table *tbl = rht_dereference(ht->tbl, ht);
 
-- 
cgit v1.2.3


From c91eee56dc4f8c3d9ae834bacb835596d47a709e Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Wed, 13 Aug 2014 16:38:30 +0200
Subject: rhashtable: unexport and make rht_obj() static

No need to export rht_obj(), all inner to outer object translations
occur internally. It was intended to be used with rht_for_each() which
now primarily serves as the iterator for rhashtable_remove_pprev() to
effectively flush and free the full table.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h | 1 -
 lib/rhashtable.c           | 8 +-------
 2 files changed, 1 insertion(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 8c6048e77f29..af967c4c7591 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -117,7 +117,6 @@ void rhashtable_destroy(const struct rhashtable *ht);
 #define rht_dereference_rcu(p, ht) \
 	rcu_dereference_check(p, lockdep_rht_mutex_is_held(ht))
 
-/* Internal, use rht_obj() instead */
 #define rht_entry(ptr, type, member) container_of(ptr, type, member)
 #define rht_entry_safe(ptr, type, member) \
 ({ \
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 338dd7aa5e13..a2c78810ebc1 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -38,16 +38,10 @@ int lockdep_rht_mutex_is_held(const struct rhashtable *ht)
 EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held);
 #endif
 
-/**
- * rht_obj - cast hash head to outer object
- * @ht:		hash table
- * @he:		hashed node
- */
-void *rht_obj(const struct rhashtable *ht, const struct rhash_head *he)
+static void *rht_obj(const struct rhashtable *ht, const struct rhash_head *he)
 {
 	return (void *) he - ht->p.head_offset;
 }
-EXPORT_SYMBOL_GPL(rht_obj);
 
 static u32 __hashfn(const struct rhashtable *ht, const void *key,
 		      u32 len, u32 hsize)
-- 
cgit v1.2.3


From 93f560811e80216e98f3fcec220aa0f8836b09af Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Wed, 13 Aug 2014 16:38:31 +0200
Subject: rhashtable: fix annotations for rht_for_each_entry_rcu()

Call rcu_deference_raw() directly from within rht_for_each_entry_rcu()
as list_for_each_entry_rcu() does.

Fixes the following sparse warnings:
net/netlink/af_netlink.c:2906:25:    expected struct rhash_head const *__mptr
net/netlink/af_netlink.c:2906:25:    got struct rhash_head [noderef] <asn:4>*<noident>

Fixes: e341694e3eb57fc ("netlink: Convert netlink_lookup() to use RCU protected hash table")
Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index af967c4c7591..36826c0166c5 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -123,11 +123,6 @@ void rhashtable_destroy(const struct rhashtable *ht);
 	typeof(ptr) __ptr = (ptr); \
 	   __ptr ? rht_entry(__ptr, type, member) : NULL; \
 })
-#define rht_entry_safe_rcu(ptr, type, member) \
-({ \
-	typeof(*ptr) __rcu *__ptr = (typeof(*ptr) __rcu __force *)ptr; \
-	__ptr ? container_of((typeof(ptr))rcu_dereference_raw(__ptr), type, member) : NULL; \
-})
 
 #define rht_next_entry_safe(pos, ht, member) \
 ({ \
@@ -204,9 +199,10 @@ void rhashtable_destroy(const struct rhashtable *ht);
  * traversal is guarded by rcu_read_lock().
  */
 #define rht_for_each_entry_rcu(pos, head, member) \
-	for (pos = rht_entry_safe_rcu(head, typeof(*(pos)), member); \
+	for (pos = rht_entry_safe(rcu_dereference_raw(head), \
+				  typeof(*(pos)), member); \
 	     pos; \
-	     pos = rht_entry_safe_rcu((pos)->member.next, \
-				      typeof(*(pos)), member))
+	     pos = rht_entry_safe(rcu_dereference_raw((pos)->member.next), \
+				  typeof(*(pos)), member))
 
 #endif /* _LINUX_RHASHTABLE_H */
-- 
cgit v1.2.3


From 8a58d1f1f373238cb0d6d7f8d3dd723aa164b8ac Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Fri, 15 Aug 2014 12:38:41 -0600
Subject: blk-mq: get rid of unused BLK_MQ_F_SHOULD_SORT flag

We used to use this for determining whether to sort the dispatch list,
but it's unused now.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blk-mq.h | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index eb726b9c5762..a1e31f274fcd 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -127,10 +127,9 @@ enum {
 	BLK_MQ_RQ_QUEUE_ERROR	= 2,	/* end IO with error */
 
 	BLK_MQ_F_SHOULD_MERGE	= 1 << 0,
-	BLK_MQ_F_SHOULD_SORT	= 1 << 1,
-	BLK_MQ_F_TAG_SHARED	= 1 << 2,
-	BLK_MQ_F_SG_MERGE	= 1 << 3,
-	BLK_MQ_F_SYSFS_UP	= 1 << 4,
+	BLK_MQ_F_TAG_SHARED	= 1 << 1,
+	BLK_MQ_F_SG_MERGE	= 1 << 2,
+	BLK_MQ_F_SYSFS_UP	= 1 << 3,
 
 	BLK_MQ_S_STOPPED	= 0,
 	BLK_MQ_S_TAG_ACTIVE	= 1,
-- 
cgit v1.2.3


From 515d9b2c03943ca904cd135e1b1d9ddd168c1b27 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Date: Tue, 12 Aug 2014 18:22:27 +0200
Subject: ata: remove deprecated struct ahci_platform_data

The last user of the deprecated struct ahci_platform_data has been
cleaned up recently (SPEAr1340 got a proper PHY driver).

Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Acked-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 drivers/ata/ahci_platform.c    | 18 +-----------------
 drivers/ata/libahci_platform.c | 23 -----------------------
 include/linux/ahci_platform.h  | 13 -------------
 3 files changed, 1 insertion(+), 53 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/ahci_platform.c b/drivers/ata/ahci_platform.c
index f61ddb9146d6..06f1d59fa678 100644
--- a/drivers/ata/ahci_platform.c
+++ b/drivers/ata/ahci_platform.c
@@ -32,7 +32,6 @@ static const struct ata_port_info ahci_port_info = {
 static int ahci_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
-	struct ahci_platform_data *pdata = dev_get_platdata(dev);
 	struct ahci_host_priv *hpriv;
 	int rc;
 
@@ -44,29 +43,14 @@ static int ahci_probe(struct platform_device *pdev)
 	if (rc)
 		return rc;
 
-	/*
-	 * Some platforms might need to prepare for mmio region access,
-	 * which could be done in the following init call. So, the mmio
-	 * region shouldn't be accessed before init (if provided) has
-	 * returned successfully.
-	 */
-	if (pdata && pdata->init) {
-		rc = pdata->init(dev, hpriv->mmio);
-		if (rc)
-			goto disable_resources;
-	}
-
 	if (of_device_is_compatible(dev->of_node, "hisilicon,hisi-ahci"))
 		hpriv->flags |= AHCI_HFLAG_NO_FBS | AHCI_HFLAG_NO_NCQ;
 
 	rc = ahci_platform_init_host(pdev, hpriv, &ahci_port_info);
 	if (rc)
-		goto pdata_exit;
+		goto disable_resources;
 
 	return 0;
-pdata_exit:
-	if (pdata && pdata->exit)
-		pdata->exit(dev);
 disable_resources:
 	ahci_platform_disable_resources(hpriv);
 	return rc;
diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c
index 5b92c290e6c6..c0510de8a4c9 100644
--- a/drivers/ata/libahci_platform.c
+++ b/drivers/ata/libahci_platform.c
@@ -502,13 +502,8 @@ EXPORT_SYMBOL_GPL(ahci_platform_init_host);
 
 static void ahci_host_stop(struct ata_host *host)
 {
-	struct device *dev = host->dev;
-	struct ahci_platform_data *pdata = dev_get_platdata(dev);
 	struct ahci_host_priv *hpriv = host->private_data;
 
-	if (pdata && pdata->exit)
-		pdata->exit(dev);
-
 	ahci_platform_disable_resources(hpriv);
 }
 
@@ -592,7 +587,6 @@ EXPORT_SYMBOL_GPL(ahci_platform_resume_host);
  */
 int ahci_platform_suspend(struct device *dev)
 {
-	struct ahci_platform_data *pdata = dev_get_platdata(dev);
 	struct ata_host *host = dev_get_drvdata(dev);
 	struct ahci_host_priv *hpriv = host->private_data;
 	int rc;
@@ -601,19 +595,9 @@ int ahci_platform_suspend(struct device *dev)
 	if (rc)
 		return rc;
 
-	if (pdata && pdata->suspend) {
-		rc = pdata->suspend(dev);
-		if (rc)
-			goto resume_host;
-	}
-
 	ahci_platform_disable_resources(hpriv);
 
 	return 0;
-
-resume_host:
-	ahci_platform_resume_host(dev);
-	return rc;
 }
 EXPORT_SYMBOL_GPL(ahci_platform_suspend);
 
@@ -629,7 +613,6 @@ EXPORT_SYMBOL_GPL(ahci_platform_suspend);
  */
 int ahci_platform_resume(struct device *dev)
 {
-	struct ahci_platform_data *pdata = dev_get_platdata(dev);
 	struct ata_host *host = dev_get_drvdata(dev);
 	struct ahci_host_priv *hpriv = host->private_data;
 	int rc;
@@ -638,12 +621,6 @@ int ahci_platform_resume(struct device *dev)
 	if (rc)
 		return rc;
 
-	if (pdata && pdata->resume) {
-		rc = pdata->resume(dev);
-		if (rc)
-			goto disable_resources;
-	}
-
 	rc = ahci_platform_resume_host(dev);
 	if (rc)
 		goto disable_resources;
diff --git a/include/linux/ahci_platform.h b/include/linux/ahci_platform.h
index 09a947e8bc87..642d6ae4030c 100644
--- a/include/linux/ahci_platform.h
+++ b/include/linux/ahci_platform.h
@@ -22,19 +22,6 @@ struct ata_port_info;
 struct ahci_host_priv;
 struct platform_device;
 
-/*
- * Note ahci_platform_data is deprecated, it is only kept around for use
- * by the old da850 and spear13xx ahci code.
- * New drivers should instead declare their own platform_driver struct, and
- * use ahci_platform* functions in their own probe, suspend and resume methods.
- */
-struct ahci_platform_data {
-	int (*init)(struct device *dev, void __iomem *addr);
-	void (*exit)(struct device *dev);
-	int (*suspend)(struct device *dev);
-	int (*resume)(struct device *dev);
-};
-
 int ahci_platform_enable_clks(struct ahci_host_priv *hpriv);
 void ahci_platform_disable_clks(struct ahci_host_priv *hpriv);
 int ahci_platform_enable_resources(struct ahci_host_priv *hpriv);
-- 
cgit v1.2.3


From 005547e0828ce9064afebb1e6d56a18efd80e7a3 Mon Sep 17 00:00:00 2001
From: James Ban <james.ban.opensource@diasemi.com>
Date: Fri, 8 Aug 2014 14:27:04 +0900
Subject: regulator: da9211: support DA9213

This is a patch for supporting DA9213.

Signed-off-by: James Ban <james.ban.opensource@diasemi.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 drivers/regulator/Kconfig            |  9 ++--
 drivers/regulator/da9211-regulator.c | 95 ++++++++++++++++++++++++++++--------
 drivers/regulator/da9211-regulator.h |  7 ++-
 include/linux/regulator/da9211.h     |  7 ++-
 4 files changed, 91 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index 2dc8289e5dba..1344aa83b438 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -199,13 +199,14 @@ config REGULATOR_DA9210
 	  interface.
 
 config REGULATOR_DA9211
-	tristate "Dialog Semiconductor DA9211/DA9212 regulator"
+	tristate "Dialog Semiconductor DA9211/DA9212/DA9213/DA9214 regulator"
 	depends on I2C
 	select REGMAP_I2C
 	help
-	  Say y here to support for the Dialog Semiconductor DA9211/DA9212.
-	  The DA9211/DA9212 is a multi-phase synchronous step down
-	  converter 12A DC-DC Buck controlled through an I2C
+	  Say y here to support for the Dialog Semiconductor DA9211/DA9212
+	  /DA9213/DA9214.
+	  The DA9211/DA9212/DA9213/DA9214 is a multi-phase synchronous
+	  step down converter 12A or 16A DC-DC Buck controlled through an I2C
 	  interface.
 
 config REGULATOR_DBX500_PRCMU
diff --git a/drivers/regulator/da9211-regulator.c b/drivers/regulator/da9211-regulator.c
index 1482adafa1ad..ccc2e362d751 100644
--- a/drivers/regulator/da9211-regulator.c
+++ b/drivers/regulator/da9211-regulator.c
@@ -1,5 +1,5 @@
 /*
- * da9211-regulator.c - Regulator device driver for DA9211
+ * da9211-regulator.c - Regulator device driver for DA9211/DA9213
  * Copyright (C) 2014  Dialog Semiconductor Ltd.
  *
  * This library is free software; you can redistribute it and/or
@@ -27,6 +27,10 @@
 #include <linux/regulator/da9211.h>
 #include "da9211-regulator.h"
 
+/* DEVICE IDs */
+#define DA9211_DEVICE_ID	0x22
+#define DA9213_DEVICE_ID	0x23
+
 #define DA9211_BUCK_MODE_SLEEP	1
 #define DA9211_BUCK_MODE_SYNC	2
 #define DA9211_BUCK_MODE_AUTO	3
@@ -42,6 +46,7 @@ struct da9211 {
 	struct regulator_dev *rdev[DA9211_MAX_REGULATORS];
 	int num_regulator;
 	int chip_irq;
+	int chip_id;
 };
 
 static const struct regmap_range_cfg da9211_regmap_range[] = {
@@ -52,14 +57,14 @@ static const struct regmap_range_cfg da9211_regmap_range[] = {
 		.window_start = 0,
 		.window_len = 256,
 		.range_min = 0,
-		.range_max = 2*256,
+		.range_max = 5*128,
 	},
 };
 
 static const struct regmap_config da9211_regmap_config = {
 	.reg_bits = 8,
 	.val_bits = 8,
-	.max_register = 2 * 256,
+	.max_register = 5 * 128,
 	.ranges = da9211_regmap_range,
 	.num_ranges = ARRAY_SIZE(da9211_regmap_range),
 };
@@ -69,11 +74,20 @@ static const struct regmap_config da9211_regmap_config = {
 #define DA9211_MAX_MV		1570
 #define DA9211_STEP_MV		10
 
-/* Current limits for buck (uA) indices corresponds with register values */
+/* Current limits for DA9211 buck (uA) indices
+ * corresponds with register values
+ */
 static const int da9211_current_limits[] = {
 	2000000, 2200000, 2400000, 2600000, 2800000, 3000000, 3200000, 3400000,
 	3600000, 3800000, 4000000, 4200000, 4400000, 4600000, 4800000, 5000000
 };
+/* Current limits for DA9213 buck (uA) indices
+ * corresponds with register values
+ */
+static const int da9213_current_limits[] = {
+	3000000, 3200000, 3400000, 3600000, 3800000, 4000000, 4200000, 4400000,
+	4600000, 4800000, 5000000, 5200000, 5400000, 5600000, 5800000, 6000000
+};
 
 static unsigned int da9211_buck_get_mode(struct regulator_dev *rdev)
 {
@@ -129,12 +143,26 @@ static int da9211_set_current_limit(struct regulator_dev *rdev, int min,
 {
 	int id = rdev_get_id(rdev);
 	struct da9211 *chip = rdev_get_drvdata(rdev);
-	int i;
+	int i, max_size;
+	const int *current_limits;
+
+	switch (chip->chip_id) {
+	case DA9211:
+		current_limits = da9211_current_limits;
+		max_size = ARRAY_SIZE(da9211_current_limits)-1;
+		break;
+	case DA9213:
+		current_limits = da9213_current_limits;
+		max_size = ARRAY_SIZE(da9213_current_limits)-1;
+		break;
+	default:
+		return -EINVAL;
+	}
 
 	/* search for closest to maximum */
-	for (i = ARRAY_SIZE(da9211_current_limits)-1; i >= 0; i--) {
-		if (min <= da9211_current_limits[i] &&
-		    max >= da9211_current_limits[i]) {
+	for (i = max_size; i >= 0; i--) {
+		if (min <= current_limits[i] &&
+		    max >= current_limits[i]) {
 				return regmap_update_bits(chip->regmap,
 					DA9211_REG_BUCK_ILIM,
 					(0x0F << id*4), (i << id*4));
@@ -150,14 +178,28 @@ static int da9211_get_current_limit(struct regulator_dev *rdev)
 	struct da9211 *chip = rdev_get_drvdata(rdev);
 	unsigned int data;
 	int ret;
+	const int *current_limits;
+
+	switch (chip->chip_id) {
+	case DA9211:
+		current_limits = da9211_current_limits;
+		break;
+	case DA9213:
+		current_limits = da9213_current_limits;
+		break;
+	default:
+		return -EINVAL;
+	}
 
 	ret = regmap_read(chip->regmap, DA9211_REG_BUCK_ILIM, &data);
 	if (ret < 0)
 		return ret;
 
-	/* select one of 16 values: 0000 (2000mA) to 1111 (5000mA) */
+	/* select one of 16 values: 0000 (2000mA or 3000mA)
+	 * to 1111 (5000mA or 6000mA).
+	 */
 	data = (data >> id*4) & 0x0F;
-	return da9211_current_limits[data];
+	return current_limits[data];
 }
 
 static struct regulator_ops da9211_buck_ops = {
@@ -264,10 +306,7 @@ static int da9211_regulator_init(struct da9211 *chip)
 	}
 
 	for (i = 0; i < chip->num_regulator; i++) {
-		if (chip->pdata)
-			config.init_data =
-				&(chip->pdata->init_data[i]);
-
+		config.init_data = &(chip->pdata->init_data[i]);
 		config.dev = chip->dev;
 		config.driver_data = chip;
 		config.regmap = chip->regmap;
@@ -301,6 +340,7 @@ static int da9211_i2c_probe(struct i2c_client *i2c,
 {
 	struct da9211 *chip;
 	int error, ret;
+	unsigned int data;
 
 	chip = devm_kzalloc(&i2c->dev, sizeof(struct da9211), GFP_KERNEL);
 
@@ -308,7 +348,7 @@ static int da9211_i2c_probe(struct i2c_client *i2c,
 	chip->regmap = devm_regmap_init_i2c(i2c, &da9211_regmap_config);
 	if (IS_ERR(chip->regmap)) {
 		error = PTR_ERR(chip->regmap);
-		dev_err(&i2c->dev, "Failed to allocate register map: %d\n",
+		dev_err(chip->dev, "Failed to allocate register map: %d\n",
 			error);
 		return error;
 	}
@@ -316,8 +356,22 @@ static int da9211_i2c_probe(struct i2c_client *i2c,
 	i2c_set_clientdata(i2c, chip);
 
 	chip->pdata = i2c->dev.platform_data;
-	if (!chip->pdata) {
-		dev_err(&i2c->dev, "No platform init data supplied\n");
+
+	ret = regmap_read(chip->regmap, DA9211_REG_DEVICE_ID, &data);
+	if (ret < 0) {
+		dev_err(chip->dev, "Failed to read DEVICE_ID reg: %d\n", ret);
+		return ret;
+	}
+
+	switch (data) {
+	case DA9211_DEVICE_ID:
+		chip->chip_id = DA9211;
+		break;
+	case DA9213_DEVICE_ID:
+		chip->chip_id = DA9213;
+		break;
+	default:
+		dev_err(chip->dev, "Unsupported device id = 0x%x.\n", data);
 		return -ENODEV;
 	}
 
@@ -340,13 +394,14 @@ static int da9211_i2c_probe(struct i2c_client *i2c,
 	ret = da9211_regulator_init(chip);
 
 	if (ret < 0)
-		dev_err(&i2c->dev, "Failed to initialize regulator: %d\n", ret);
+		dev_err(chip->dev, "Failed to initialize regulator: %d\n", ret);
 
 	return ret;
 }
 
 static const struct i2c_device_id da9211_i2c_id[] = {
-	{"da9211", 0},
+	{"da9211", DA9211},
+	{"da9213", DA9213},
 	{},
 };
 
@@ -364,5 +419,5 @@ static struct i2c_driver da9211_regulator_driver = {
 module_i2c_driver(da9211_regulator_driver);
 
 MODULE_AUTHOR("James Ban <James.Ban.opensource@diasemi.com>");
-MODULE_DESCRIPTION("Regulator device driver for Dialog DA9211");
+MODULE_DESCRIPTION("Regulator device driver for Dialog DA9211/DA9213");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/regulator/da9211-regulator.h b/drivers/regulator/da9211-regulator.h
index 88b1769e8058..93fa9df2721c 100644
--- a/drivers/regulator/da9211-regulator.h
+++ b/drivers/regulator/da9211-regulator.h
@@ -1,5 +1,5 @@
 /*
- * da9211-regulator.h - Regulator definitions for DA9211
+ * da9211-regulator.h - Regulator definitions for DA9211/DA9213
  * Copyright (C) 2014  Dialog Semiconductor Ltd.
  *
  * This library is free software; you can redistribute it and/or
@@ -53,12 +53,15 @@
 /* BUCK Phase Selection*/
 #define DA9211_REG_CONFIG_E			0x147
 
+/* Device ID */
+#define	DA9211_REG_DEVICE_ID			0x201
+
 /*
  * Registers bits
  */
 /* DA9211_REG_PAGE_CON (addr=0x00) */
 #define	DA9211_REG_PAGE_SHIFT			1
-#define	DA9211_REG_PAGE_MASK			0x02
+#define	DA9211_REG_PAGE_MASK			0x06
 /* On I2C registers 0x00 - 0xFF */
 #define	DA9211_REG_PAGE0			0
 /* On I2C registers 0x100 - 0x1FF */
diff --git a/include/linux/regulator/da9211.h b/include/linux/regulator/da9211.h
index 0981ce0e72cc..658c3c33f4c0 100644
--- a/include/linux/regulator/da9211.h
+++ b/include/linux/regulator/da9211.h
@@ -1,5 +1,5 @@
 /*
- * da9211.h - Regulator device driver for DA9211
+ * da9211.h - Regulator device driver for DA9211/DA9213
  * Copyright (C) 2014  Dialog Semiconductor Ltd.
  *
  * This library is free software; you can redistribute it and/or
@@ -20,6 +20,11 @@
 
 #define DA9211_MAX_REGULATORS	2
 
+enum da9211_chip_id {
+	DA9211,
+	DA9213,
+};
+
 struct da9211_pdata {
 	/*
 	 * Number of buck
-- 
cgit v1.2.3


From 0e4f417857083f399769491f6e7773d111debd0f Mon Sep 17 00:00:00 2001
From: Amit Daniel Kachhap <amit.daniel@samsung.com>
Date: Tue, 15 Jul 2014 16:32:51 +0530
Subject: regulator: s2mpxxx: Move regulator min/step voltages in common place

This is a cleanup patch and moves min/step voltages in a common samsung
header file so that they can be used by other s2mpxxx PMIC drivers. Only
few required macros are added currently and others can be added if needed.

Reviewed-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: Amit Daniel Kachhap <amit.daniel@samsung.com>
Acked-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 drivers/regulator/s2mpa01.c         | 32 ++++++++++++------------
 drivers/regulator/s2mps11.c         | 50 ++++++++++++++++++-------------------
 include/linux/mfd/samsung/core.h    | 21 ++++++++++++++++
 include/linux/mfd/samsung/s2mpa01.h | 12 ---------
 include/linux/mfd/samsung/s2mps11.h |  9 -------
 include/linux/mfd/samsung/s2mps14.h | 10 --------
 6 files changed, 62 insertions(+), 72 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/s2mpa01.c b/drivers/regulator/s2mpa01.c
index ee83b4876420..962c5f192f7c 100644
--- a/drivers/regulator/s2mpa01.c
+++ b/drivers/regulator/s2mpa01.c
@@ -241,8 +241,8 @@ static struct regulator_ops s2mpa01_buck_ops = {
 	.ops		= &s2mpa01_ldo_ops,		\
 	.type		= REGULATOR_VOLTAGE,		\
 	.owner		= THIS_MODULE,			\
-	.min_uV		= S2MPA01_LDO_MIN,		\
-	.uV_step	= S2MPA01_LDO_STEP1,		\
+	.min_uV		= MIN_800_MV,			\
+	.uV_step	= STEP_50_MV,			\
 	.n_voltages	= S2MPA01_LDO_N_VOLTAGES,	\
 	.vsel_reg	= S2MPA01_REG_L1CTRL + num - 1,	\
 	.vsel_mask	= S2MPA01_LDO_VSEL_MASK,	\
@@ -255,8 +255,8 @@ static struct regulator_ops s2mpa01_buck_ops = {
 	.ops		= &s2mpa01_ldo_ops,		\
 	.type		= REGULATOR_VOLTAGE,		\
 	.owner		= THIS_MODULE,			\
-	.min_uV		= S2MPA01_LDO_MIN,		\
-	.uV_step	= S2MPA01_LDO_STEP2,		\
+	.min_uV		= MIN_800_MV,			\
+	.uV_step	= STEP_25_MV,			\
 	.n_voltages	= S2MPA01_LDO_N_VOLTAGES,	\
 	.vsel_reg	= S2MPA01_REG_L1CTRL + num - 1,	\
 	.vsel_mask	= S2MPA01_LDO_VSEL_MASK,	\
@@ -270,8 +270,8 @@ static struct regulator_ops s2mpa01_buck_ops = {
 	.ops		= &s2mpa01_buck_ops,			\
 	.type		= REGULATOR_VOLTAGE,			\
 	.owner		= THIS_MODULE,				\
-	.min_uV		= S2MPA01_BUCK_MIN1,			\
-	.uV_step	= S2MPA01_BUCK_STEP1,			\
+	.min_uV		= MIN_600_MV,				\
+	.uV_step	= STEP_6_25_MV,				\
 	.n_voltages	= S2MPA01_BUCK_N_VOLTAGES,		\
 	.ramp_delay	= S2MPA01_RAMP_DELAY,			\
 	.vsel_reg	= S2MPA01_REG_B1CTRL2 + (num - 1) * 2,	\
@@ -286,8 +286,8 @@ static struct regulator_ops s2mpa01_buck_ops = {
 	.ops		= &s2mpa01_buck_ops,			\
 	.type		= REGULATOR_VOLTAGE,			\
 	.owner		= THIS_MODULE,				\
-	.min_uV		= S2MPA01_BUCK_MIN2,			\
-	.uV_step	= S2MPA01_BUCK_STEP1,			\
+	.min_uV		= MIN_800_MV,				\
+	.uV_step	= STEP_6_25_MV,				\
 	.n_voltages	= S2MPA01_BUCK_N_VOLTAGES,		\
 	.ramp_delay	= S2MPA01_RAMP_DELAY,			\
 	.vsel_reg	= S2MPA01_REG_B5CTRL2,			\
@@ -302,8 +302,8 @@ static struct regulator_ops s2mpa01_buck_ops = {
 	.ops		= &s2mpa01_buck_ops,			\
 	.type		= REGULATOR_VOLTAGE,			\
 	.owner		= THIS_MODULE,				\
-	.min_uV		= S2MPA01_BUCK_MIN1,			\
-	.uV_step	= S2MPA01_BUCK_STEP1,			\
+	.min_uV		= MIN_600_MV,				\
+	.uV_step	= STEP_6_25_MV,				\
 	.n_voltages	= S2MPA01_BUCK_N_VOLTAGES,		\
 	.ramp_delay	= S2MPA01_RAMP_DELAY,			\
 	.vsel_reg	= S2MPA01_REG_B6CTRL2 + (num - 6) * 2,	\
@@ -318,8 +318,8 @@ static struct regulator_ops s2mpa01_buck_ops = {
 	.ops		= &s2mpa01_buck_ops,			\
 	.type		= REGULATOR_VOLTAGE,			\
 	.owner		= THIS_MODULE,				\
-	.min_uV		= S2MPA01_BUCK_MIN2,			\
-	.uV_step	= S2MPA01_BUCK_STEP2,			\
+	.min_uV		= MIN_800_MV,				\
+	.uV_step	= STEP_12_5_MV,				\
 	.n_voltages	= S2MPA01_BUCK_N_VOLTAGES,		\
 	.ramp_delay	= S2MPA01_RAMP_DELAY,			\
 	.vsel_reg	= S2MPA01_REG_B8CTRL2,			\
@@ -334,8 +334,8 @@ static struct regulator_ops s2mpa01_buck_ops = {
 	.ops		= &s2mpa01_buck_ops,			\
 	.type		= REGULATOR_VOLTAGE,			\
 	.owner		= THIS_MODULE,				\
-	.min_uV		= S2MPA01_BUCK_MIN4,			\
-	.uV_step	= S2MPA01_BUCK_STEP2,			\
+	.min_uV		= MIN_1500_MV,				\
+	.uV_step	= STEP_12_5_MV,				\
 	.n_voltages	= S2MPA01_BUCK_N_VOLTAGES,		\
 	.ramp_delay	= S2MPA01_RAMP_DELAY,			\
 	.vsel_reg	= S2MPA01_REG_B9CTRL2,			\
@@ -350,8 +350,8 @@ static struct regulator_ops s2mpa01_buck_ops = {
 	.ops		= &s2mpa01_buck_ops,			\
 	.type		= REGULATOR_VOLTAGE,			\
 	.owner		= THIS_MODULE,				\
-	.min_uV		= S2MPA01_BUCK_MIN3,			\
-	.uV_step	= S2MPA01_BUCK_STEP2,			\
+	.min_uV		= MIN_1000_MV,				\
+	.uV_step	= STEP_12_5_MV,				\
 	.n_voltages	= S2MPA01_BUCK_N_VOLTAGES,		\
 	.ramp_delay	= S2MPA01_RAMP_DELAY,			\
 	.vsel_reg	= S2MPA01_REG_B10CTRL2,			\
diff --git a/drivers/regulator/s2mps11.c b/drivers/regulator/s2mps11.c
index b16c53a8272f..3dede776a837 100644
--- a/drivers/regulator/s2mps11.c
+++ b/drivers/regulator/s2mps11.c
@@ -255,14 +255,14 @@ static struct regulator_ops s2mps11_buck_ops = {
 	.set_ramp_delay		= s2mps11_set_ramp_delay,
 };
 
-#define regulator_desc_s2mps11_ldo1(num)	{		\
+#define regulator_desc_s2mps11_ldo1(num)	{	\
 	.name		= "LDO"#num,			\
 	.id		= S2MPS11_LDO##num,		\
 	.ops		= &s2mps11_ldo_ops,		\
 	.type		= REGULATOR_VOLTAGE,		\
 	.owner		= THIS_MODULE,			\
-	.min_uV		= S2MPS11_LDO_MIN,		\
-	.uV_step	= S2MPS11_LDO_STEP1,		\
+	.min_uV		= MIN_800_MV,			\
+	.uV_step	= STEP_50_MV,			\
 	.n_voltages	= S2MPS11_LDO_N_VOLTAGES,	\
 	.vsel_reg	= S2MPS11_REG_L1CTRL + num - 1,	\
 	.vsel_mask	= S2MPS11_LDO_VSEL_MASK,	\
@@ -275,8 +275,8 @@ static struct regulator_ops s2mps11_buck_ops = {
 	.ops		= &s2mps11_ldo_ops,		\
 	.type		= REGULATOR_VOLTAGE,		\
 	.owner		= THIS_MODULE,			\
-	.min_uV		= S2MPS11_LDO_MIN,		\
-	.uV_step	= S2MPS11_LDO_STEP2,		\
+	.min_uV		= MIN_800_MV,			\
+	.uV_step	= STEP_25_MV,			\
 	.n_voltages	= S2MPS11_LDO_N_VOLTAGES,	\
 	.vsel_reg	= S2MPS11_REG_L1CTRL + num - 1,	\
 	.vsel_mask	= S2MPS11_LDO_VSEL_MASK,	\
@@ -290,8 +290,8 @@ static struct regulator_ops s2mps11_buck_ops = {
 	.ops		= &s2mps11_buck_ops,			\
 	.type		= REGULATOR_VOLTAGE,			\
 	.owner		= THIS_MODULE,				\
-	.min_uV		= S2MPS11_BUCK_MIN1,			\
-	.uV_step	= S2MPS11_BUCK_STEP1,			\
+	.min_uV		= MIN_600_MV,				\
+	.uV_step	= STEP_6_25_MV,				\
 	.n_voltages	= S2MPS11_BUCK_N_VOLTAGES,		\
 	.ramp_delay	= S2MPS11_RAMP_DELAY,			\
 	.vsel_reg	= S2MPS11_REG_B1CTRL2 + (num - 1) * 2,	\
@@ -306,8 +306,8 @@ static struct regulator_ops s2mps11_buck_ops = {
 	.ops		= &s2mps11_buck_ops,			\
 	.type		= REGULATOR_VOLTAGE,			\
 	.owner		= THIS_MODULE,				\
-	.min_uV		= S2MPS11_BUCK_MIN1,			\
-	.uV_step	= S2MPS11_BUCK_STEP1,			\
+	.min_uV		= MIN_600_MV,				\
+	.uV_step	= STEP_6_25_MV,				\
 	.n_voltages	= S2MPS11_BUCK_N_VOLTAGES,		\
 	.ramp_delay	= S2MPS11_RAMP_DELAY,			\
 	.vsel_reg	= S2MPS11_REG_B5CTRL2,			\
@@ -322,8 +322,8 @@ static struct regulator_ops s2mps11_buck_ops = {
 	.ops		= &s2mps11_buck_ops,			\
 	.type		= REGULATOR_VOLTAGE,			\
 	.owner		= THIS_MODULE,				\
-	.min_uV		= S2MPS11_BUCK_MIN1,			\
-	.uV_step	= S2MPS11_BUCK_STEP1,			\
+	.min_uV		= MIN_600_MV,				\
+	.uV_step	= STEP_6_25_MV,				\
 	.n_voltages	= S2MPS11_BUCK_N_VOLTAGES,		\
 	.ramp_delay	= S2MPS11_RAMP_DELAY,			\
 	.vsel_reg	= S2MPS11_REG_B6CTRL2 + (num - 6) * 2,	\
@@ -338,8 +338,8 @@ static struct regulator_ops s2mps11_buck_ops = {
 	.ops		= &s2mps11_buck_ops,			\
 	.type		= REGULATOR_VOLTAGE,			\
 	.owner		= THIS_MODULE,				\
-	.min_uV		= S2MPS11_BUCK_MIN3,			\
-	.uV_step	= S2MPS11_BUCK_STEP3,			\
+	.min_uV		= MIN_3000_MV,				\
+	.uV_step	= STEP_25_MV,				\
 	.n_voltages	= S2MPS11_BUCK_N_VOLTAGES,		\
 	.ramp_delay	= S2MPS11_RAMP_DELAY,			\
 	.vsel_reg	= S2MPS11_REG_B9CTRL2,			\
@@ -354,8 +354,8 @@ static struct regulator_ops s2mps11_buck_ops = {
 	.ops		= &s2mps11_buck_ops,			\
 	.type		= REGULATOR_VOLTAGE,			\
 	.owner		= THIS_MODULE,				\
-	.min_uV		= S2MPS11_BUCK_MIN2,			\
-	.uV_step	= S2MPS11_BUCK_STEP2,			\
+	.min_uV		= MIN_750_MV,				\
+	.uV_step	= STEP_12_5_MV,				\
 	.n_voltages	= S2MPS11_BUCK_N_VOLTAGES,		\
 	.ramp_delay	= S2MPS11_RAMP_DELAY,			\
 	.vsel_reg	= S2MPS11_REG_B10CTRL2,			\
@@ -516,8 +516,8 @@ static struct regulator_ops s2mps14_reg_ops = {
 	.ops		= &s2mps14_reg_ops,		\
 	.type		= REGULATOR_VOLTAGE,		\
 	.owner		= THIS_MODULE,			\
-	.min_uV		= S2MPS14_LDO_MIN_800MV,	\
-	.uV_step	= S2MPS14_LDO_STEP_25MV,	\
+	.min_uV		= MIN_800_MV,			\
+	.uV_step	= STEP_25_MV,			\
 	.n_voltages	= S2MPS14_LDO_N_VOLTAGES,	\
 	.vsel_reg	= S2MPS14_REG_L1CTRL + num - 1,	\
 	.vsel_mask	= S2MPS14_LDO_VSEL_MASK,	\
@@ -530,8 +530,8 @@ static struct regulator_ops s2mps14_reg_ops = {
 	.ops		= &s2mps14_reg_ops,		\
 	.type		= REGULATOR_VOLTAGE,		\
 	.owner		= THIS_MODULE,			\
-	.min_uV		= S2MPS14_LDO_MIN_1800MV,	\
-	.uV_step	= S2MPS14_LDO_STEP_25MV,	\
+	.min_uV		= MIN_1800_MV,			\
+	.uV_step	= STEP_25_MV,			\
 	.n_voltages	= S2MPS14_LDO_N_VOLTAGES,	\
 	.vsel_reg	= S2MPS14_REG_L1CTRL + num - 1,	\
 	.vsel_mask	= S2MPS14_LDO_VSEL_MASK,	\
@@ -544,8 +544,8 @@ static struct regulator_ops s2mps14_reg_ops = {
 	.ops		= &s2mps14_reg_ops,		\
 	.type		= REGULATOR_VOLTAGE,		\
 	.owner		= THIS_MODULE,			\
-	.min_uV		= S2MPS14_LDO_MIN_800MV,	\
-	.uV_step	= S2MPS14_LDO_STEP_12_5MV,	\
+	.min_uV		= MIN_800_MV,			\
+	.uV_step	= STEP_12_5_MV,			\
 	.n_voltages	= S2MPS14_LDO_N_VOLTAGES,	\
 	.vsel_reg	= S2MPS14_REG_L1CTRL + num - 1,	\
 	.vsel_mask	= S2MPS14_LDO_VSEL_MASK,	\
@@ -558,8 +558,8 @@ static struct regulator_ops s2mps14_reg_ops = {
 	.ops		= &s2mps14_reg_ops,			\
 	.type		= REGULATOR_VOLTAGE,			\
 	.owner		= THIS_MODULE,				\
-	.min_uV		= S2MPS14_BUCK1235_MIN_600MV,		\
-	.uV_step	= S2MPS14_BUCK1235_STEP_6_25MV,		\
+	.min_uV		= MIN_600_MV,				\
+	.uV_step	= STEP_6_25_MV,				\
 	.n_voltages	= S2MPS14_BUCK_N_VOLTAGES,		\
 	.linear_min_sel = S2MPS14_BUCK1235_START_SEL,		\
 	.ramp_delay	= S2MPS14_BUCK_RAMP_DELAY,		\
@@ -574,8 +574,8 @@ static struct regulator_ops s2mps14_reg_ops = {
 	.ops		= &s2mps14_reg_ops,			\
 	.type		= REGULATOR_VOLTAGE,			\
 	.owner		= THIS_MODULE,				\
-	.min_uV		= S2MPS14_BUCK4_MIN_1400MV,		\
-	.uV_step	= S2MPS14_BUCK4_STEP_12_5MV,		\
+	.min_uV		= MIN_1400_MV,				\
+	.uV_step	= STEP_12_5_MV,				\
 	.n_voltages	= S2MPS14_BUCK_N_VOLTAGES,		\
 	.linear_min_sel = S2MPS14_BUCK4_START_SEL,		\
 	.ramp_delay	= S2MPS14_BUCK_RAMP_DELAY,		\
diff --git a/include/linux/mfd/samsung/core.h b/include/linux/mfd/samsung/core.h
index b5f73de81aad..1825edacbda7 100644
--- a/include/linux/mfd/samsung/core.h
+++ b/include/linux/mfd/samsung/core.h
@@ -14,6 +14,27 @@
 #ifndef __LINUX_MFD_SEC_CORE_H
 #define __LINUX_MFD_SEC_CORE_H
 
+/* Macros to represent minimum voltages for LDO/BUCK */
+#define MIN_3000_MV		3000000
+#define MIN_2500_MV		2500000
+#define MIN_2000_MV		2000000
+#define MIN_1800_MV		1800000
+#define MIN_1500_MV		1500000
+#define MIN_1400_MV		1400000
+#define MIN_1000_MV		1000000
+
+#define MIN_900_MV		900000
+#define MIN_850_MV		850000
+#define MIN_800_MV		800000
+#define MIN_750_MV		750000
+#define MIN_600_MV		600000
+
+/* Macros to represent steps for LDO/BUCK */
+#define STEP_50_MV		50000
+#define STEP_25_MV		25000
+#define STEP_12_5_MV		12500
+#define STEP_6_25_MV		6250
+
 enum sec_device_type {
 	S5M8751X,
 	S5M8763X,
diff --git a/include/linux/mfd/samsung/s2mpa01.h b/include/linux/mfd/samsung/s2mpa01.h
index fbc63bc0d6a2..2766108bca2f 100644
--- a/include/linux/mfd/samsung/s2mpa01.h
+++ b/include/linux/mfd/samsung/s2mpa01.h
@@ -155,18 +155,6 @@ enum s2mpa01_regulators {
 	S2MPA01_REGULATOR_MAX,
 };
 
-#define S2MPA01_BUCK_MIN1	600000
-#define S2MPA01_BUCK_MIN2	800000
-#define S2MPA01_BUCK_MIN3	1000000
-#define S2MPA01_BUCK_MIN4	1500000
-#define S2MPA01_LDO_MIN		800000
-
-#define S2MPA01_BUCK_STEP1	6250
-#define S2MPA01_BUCK_STEP2	12500
-
-#define S2MPA01_LDO_STEP1	50000
-#define S2MPA01_LDO_STEP2	25000
-
 #define S2MPA01_LDO_VSEL_MASK	0x3F
 #define S2MPA01_BUCK_VSEL_MASK	0xFF
 #define S2MPA01_ENABLE_MASK	(0x03 << S2MPA01_ENABLE_SHIFT)
diff --git a/include/linux/mfd/samsung/s2mps11.h b/include/linux/mfd/samsung/s2mps11.h
index b3ddf98dec37..7981a9d77d3f 100644
--- a/include/linux/mfd/samsung/s2mps11.h
+++ b/include/linux/mfd/samsung/s2mps11.h
@@ -171,15 +171,6 @@ enum s2mps11_regulators {
 	S2MPS11_REGULATOR_MAX,
 };
 
-#define S2MPS11_BUCK_MIN1	600000
-#define S2MPS11_BUCK_MIN2	750000
-#define S2MPS11_BUCK_MIN3	3000000
-#define S2MPS11_LDO_MIN	800000
-#define S2MPS11_BUCK_STEP1	6250
-#define S2MPS11_BUCK_STEP2	12500
-#define S2MPS11_BUCK_STEP3	25000
-#define S2MPS11_LDO_STEP1	50000
-#define S2MPS11_LDO_STEP2	25000
 #define S2MPS11_LDO_VSEL_MASK	0x3F
 #define S2MPS11_BUCK_VSEL_MASK	0xFF
 #define S2MPS11_ENABLE_MASK	(0x03 << S2MPS11_ENABLE_SHIFT)
diff --git a/include/linux/mfd/samsung/s2mps14.h b/include/linux/mfd/samsung/s2mps14.h
index 900cd7a04314..c92f4782afb5 100644
--- a/include/linux/mfd/samsung/s2mps14.h
+++ b/include/linux/mfd/samsung/s2mps14.h
@@ -123,10 +123,6 @@ enum s2mps14_regulators {
 };
 
 /* Regulator constraints for BUCKx */
-#define S2MPS14_BUCK1235_MIN_600MV	600000
-#define S2MPS14_BUCK4_MIN_1400MV	1400000
-#define S2MPS14_BUCK1235_STEP_6_25MV	6250
-#define S2MPS14_BUCK4_STEP_12_5MV	12500
 #define S2MPS14_BUCK1235_START_SEL	0x20
 #define S2MPS14_BUCK4_START_SEL		0x40
 /*
@@ -136,12 +132,6 @@ enum s2mps14_regulators {
  */
 #define S2MPS14_BUCK_RAMP_DELAY		12500
 
-/* Regulator constraints for different types of LDOx */
-#define S2MPS14_LDO_MIN_800MV		800000
-#define S2MPS14_LDO_MIN_1800MV		1800000
-#define S2MPS14_LDO_STEP_12_5MV		12500
-#define S2MPS14_LDO_STEP_25MV		25000
-
 #define S2MPS14_LDO_VSEL_MASK		0x3F
 #define S2MPS14_BUCK_VSEL_MASK		0xFF
 #define S2MPS14_ENABLE_MASK		(0x03 << S2MPS14_ENABLE_SHIFT)
-- 
cgit v1.2.3


From 272e2315fac3bfca0edfa3252b8a643c425602af Mon Sep 17 00:00:00 2001
From: Guodong Xu <guodong.xu@linaro.org>
Date: Wed, 13 Aug 2014 19:33:38 +0800
Subject: regulator: core: add const qualifier to ops in struct regulator_desc

struct regulator_ops *ops is a member in struct regulator_desc, which gets
its value from individual regulator driver upon regulator_register() and
is used by regulator core APIs. It's not allowed for regulator core to
modify any of these callbacks in *ops. Add 'const' qualifier to enforce that.

Signed-off-by: Guodong Xu <guodong.xu@linaro.org>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 drivers/regulator/core.c         | 24 ++++++++++++------------
 include/linux/regulator/driver.h |  2 +-
 2 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index a3c3785901f5..052e7f1f011d 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -839,7 +839,7 @@ static void print_constraints(struct regulator_dev *rdev)
 static int machine_constraints_voltage(struct regulator_dev *rdev,
 	struct regulation_constraints *constraints)
 {
-	struct regulator_ops *ops = rdev->desc->ops;
+	const struct regulator_ops *ops = rdev->desc->ops;
 	int ret;
 
 	/* do we need to apply the constraint voltage */
@@ -938,7 +938,7 @@ static int machine_constraints_voltage(struct regulator_dev *rdev,
 static int machine_constraints_current(struct regulator_dev *rdev,
 	struct regulation_constraints *constraints)
 {
-	struct regulator_ops *ops = rdev->desc->ops;
+	const struct regulator_ops *ops = rdev->desc->ops;
 	int ret;
 
 	if (!constraints->min_uA && !constraints->max_uA)
@@ -982,7 +982,7 @@ static int set_machine_constraints(struct regulator_dev *rdev,
 	const struct regulation_constraints *constraints)
 {
 	int ret = 0;
-	struct regulator_ops *ops = rdev->desc->ops;
+	const struct regulator_ops *ops = rdev->desc->ops;
 
 	if (constraints)
 		rdev->constraints = kmemdup(constraints, sizeof(*constraints),
@@ -2208,9 +2208,9 @@ EXPORT_SYMBOL_GPL(regulator_count_voltages);
  */
 int regulator_list_voltage(struct regulator *regulator, unsigned selector)
 {
-	struct regulator_dev	*rdev = regulator->rdev;
-	struct regulator_ops	*ops = rdev->desc->ops;
-	int			ret;
+	struct regulator_dev *rdev = regulator->rdev;
+	const struct regulator_ops *ops = rdev->desc->ops;
+	int ret;
 
 	if (rdev->desc->fixed_uV && rdev->desc->n_voltages == 1 && !selector)
 		return rdev->desc->fixed_uV;
@@ -2572,8 +2572,8 @@ EXPORT_SYMBOL_GPL(regulator_set_voltage);
 int regulator_set_voltage_time(struct regulator *regulator,
 			       int old_uV, int new_uV)
 {
-	struct regulator_dev	*rdev = regulator->rdev;
-	struct regulator_ops	*ops = rdev->desc->ops;
+	struct regulator_dev *rdev = regulator->rdev;
+	const struct regulator_ops *ops = rdev->desc->ops;
 	int old_sel = -1;
 	int new_sel = -1;
 	int voltage;
@@ -3336,9 +3336,9 @@ EXPORT_SYMBOL_GPL(regulator_mode_to_status);
  */
 static int add_regulator_attributes(struct regulator_dev *rdev)
 {
-	struct device		*dev = &rdev->dev;
-	struct regulator_ops	*ops = rdev->desc->ops;
-	int			status = 0;
+	struct device *dev = &rdev->dev;
+	const struct regulator_ops *ops = rdev->desc->ops;
+	int status = 0;
 
 	/* some attributes need specific methods to be displayed */
 	if ((ops->get_voltage && ops->get_voltage(rdev) >= 0) ||
@@ -3905,7 +3905,7 @@ core_initcall(regulator_init);
 static int __init regulator_init_complete(void)
 {
 	struct regulator_dev *rdev;
-	struct regulator_ops *ops;
+	const struct regulator_ops *ops;
 	struct regulation_constraints *c;
 	int enabled, ret;
 
diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index bbe03a1924c0..4b628139a9cb 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -245,7 +245,7 @@ struct regulator_desc {
 	int id;
 	bool continuous_voltage_range;
 	unsigned n_voltages;
-	struct regulator_ops *ops;
+	const struct regulator_ops *ops;
 	int irq;
 	enum regulator_type type;
 	struct module *owner;
-- 
cgit v1.2.3


From 871f565055ed232e5751da18a331b73e8254adaf Mon Sep 17 00:00:00 2001
From: Guodong Xu <guodong.xu@linaro.org>
Date: Wed, 13 Aug 2014 19:33:40 +0800
Subject: regulator: core: add guard delay between calling regulator_disable
 and _enable

Some regulator require a minimum delay between its disable and next enable.
This is to avoid damages when out-of-range frequent disable/enable of a
single regulator can bring to the regulator chip.

Add @off_on_delay to struct regulator_desc. Device drivers' can use this field
to set this guard time.

Add @last_off_jiffy to struct regulator_dev. When @off_on_delay is set by
driver, regulator core can store its last off (disable) time into this field.

Signed-off-by: Guodong Xu <guodong.xu@linaro.org>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 drivers/regulator/core.c         | 31 +++++++++++++++++++++++++++++++
 include/linux/regulator/driver.h |  6 ++++++
 2 files changed, 37 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index dc0e9813b62d..1e976b6320a2 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -1813,6 +1813,31 @@ static int _regulator_do_enable(struct regulator_dev *rdev)
 
 	trace_regulator_enable(rdev_get_name(rdev));
 
+	if (rdev->desc->off_on_delay) {
+		/* if needed, keep a distance of off_on_delay from last time
+		 * this regulator was disabled.
+		 */
+		unsigned long start_jiffy = jiffies;
+		unsigned long intended, max_delay, remaining;
+
+		max_delay = usecs_to_jiffies(rdev->desc->off_on_delay);
+		intended = rdev->last_off_jiffy + max_delay;
+
+		if (time_before(start_jiffy, intended)) {
+			/* calc remaining jiffies to deal with one-time
+			 * timer wrapping.
+			 * in case of multiple timer wrapping, either it can be
+			 * detected by out-of-range remaining, or it cannot be
+			 * detected and we gets a panelty of
+			 * _regulator_enable_delay().
+			 */
+			remaining = intended - start_jiffy;
+			if (remaining <= max_delay)
+				_regulator_enable_delay(
+						jiffies_to_usecs(remaining));
+		}
+	}
+
 	if (rdev->ena_pin) {
 		ret = regulator_ena_gpio_ctrl(rdev, true);
 		if (ret < 0)
@@ -1925,6 +1950,12 @@ static int _regulator_do_disable(struct regulator_dev *rdev)
 			return ret;
 	}
 
+	/* cares about last_off_jiffy only if off_on_delay is required by
+	 * device.
+	 */
+	if (rdev->desc->off_on_delay)
+		rdev->last_off_jiffy = jiffies;
+
 	trace_regulator_disable_complete(rdev_get_name(rdev));
 
 	return 0;
diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index 4b628139a9cb..efe058f8f746 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -238,6 +238,7 @@ enum regulator_type {
  * @bypass_val_off: Disabling value for control when using regmap set_bypass
  *
  * @enable_time: Time taken for initial enable of regulator (in uS).
+ * @off_on_delay: guard time (in uS), before re-enabling a regulator
  */
 struct regulator_desc {
 	const char *name;
@@ -276,6 +277,8 @@ struct regulator_desc {
 	unsigned int bypass_val_off;
 
 	unsigned int enable_time;
+
+	unsigned int off_on_delay;
 };
 
 /**
@@ -348,6 +351,9 @@ struct regulator_dev {
 
 	struct regulator_enable_gpio *ena_pin;
 	unsigned int ena_gpio_state:1;
+
+	/* time when this regulator was disabled last time */
+	unsigned long last_off_jiffy;
 };
 
 struct regulator_dev *
-- 
cgit v1.2.3


From 16466f4284154311f163a58b77379eb186274f87 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Thu, 14 Aug 2014 16:52:51 -0700
Subject: net: phy: bcm7xxx: remove 28nm wildcard entry

A wildcard entry with the 32-bits OUI 0x600d8400 was added as part of
the BCM7xxx internal PHY driver, but that entry might match other PHYs
that are not covered by this driver, so let's just remove it.

Fixes: b560a58c45c6 ("net: phy: add Broadcom BCM7xxx internal PHY driver")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@greenl8ke.davemloft.net>
---
 drivers/net/phy/bcm7xxx.c | 14 --------------
 include/linux/brcmphy.h   |  1 -
 2 files changed, 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c
index 526b94cea569..2b40548c85d5 100644
--- a/drivers/net/phy/bcm7xxx.c
+++ b/drivers/net/phy/bcm7xxx.c
@@ -286,19 +286,6 @@ static struct phy_driver bcm7xxx_driver[] = {
 	.suspend	= bcm7xxx_suspend,
 	.resume		= bcm7xxx_28nm_config_init,
 	.driver		= { .owner = THIS_MODULE },
-}, {
-	.name		= "Broadcom BCM7XXX 28nm",
-	.phy_id		= PHY_ID_BCM7XXX_28,
-	.phy_id_mask	= PHY_BCM_OUI_MASK,
-	.features	= PHY_GBIT_FEATURES |
-			  SUPPORTED_Pause | SUPPORTED_Asym_Pause,
-	.flags		= PHY_IS_INTERNAL,
-	.config_init	= bcm7xxx_28nm_config_init,
-	.config_aneg	= genphy_config_aneg,
-	.read_status	= genphy_read_status,
-	.suspend	= bcm7xxx_suspend,
-	.resume		= bcm7xxx_28nm_config_init,
-	.driver		= { .owner = THIS_MODULE },
 }, {
 	.phy_id		= PHY_BCM_OUI_4,
 	.phy_id_mask	= 0xffff0000,
@@ -331,7 +318,6 @@ static struct mdio_device_id __maybe_unused bcm7xxx_tbl[] = {
 	{ PHY_ID_BCM7366, 0xfffffff0, },
 	{ PHY_ID_BCM7439, 0xfffffff0, },
 	{ PHY_ID_BCM7445, 0xfffffff0, },
-	{ PHY_ID_BCM7XXX_28, 0xfffffc00 },
 	{ PHY_BCM_OUI_4, 0xffff0000 },
 	{ PHY_BCM_OUI_5, 0xffffff00 },
 	{ }
diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index 6f76277baf39..61219b9b3445 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -16,7 +16,6 @@
 #define PHY_ID_BCM7366			0x600d8490
 #define PHY_ID_BCM7439			0x600d8480
 #define PHY_ID_BCM7445			0x600d8510
-#define PHY_ID_BCM7XXX_28		0x600d8400
 
 #define PHY_BCM_OUI_MASK		0xfffffc00
 #define PHY_BCM_OUI_1			0x00206000
-- 
cgit v1.2.3


From 983c684466e02b21f83c025ea539deee6c0aeac0 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Sun, 3 Aug 2014 13:03:10 -0400
Subject: SUNRPC: get rid of the request wait queue

We're always _only_ waking up tasks from within the sp_threads list, so
we know that they are enqueued and alive. The rq_wait waitqueue is just
a distraction with extra atomic semantics.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc.h |  1 -
 net/sunrpc/svc.c           |  2 --
 net/sunrpc/svc_xprt.c      | 32 +++++++++++++++++---------------
 3 files changed, 17 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index cf61ecd148e0..21678464883a 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -280,7 +280,6 @@ struct svc_rqst {
 	bool			rq_splice_ok;   /* turned off in gss privacy
 						 * to prevent encrypting page
 						 * cache pages */
-	wait_queue_head_t	rq_wait;	/* synchronization */
 	struct task_struct	*rq_task;	/* service thread */
 };
 
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 1db5007ddbce..ca8a7958f4e6 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -612,8 +612,6 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
 	if (!rqstp)
 		goto out_enomem;
 
-	init_waitqueue_head(&rqstp->rq_wait);
-
 	serv->sv_nrthreads++;
 	spin_lock_bh(&pool->sp_lock);
 	pool->sp_nrthreads++;
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 08e49d1e17b3..faaf2b46273b 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -348,8 +348,6 @@ static void svc_xprt_do_enqueue(struct svc_xprt *xprt)
 
 	cpu = get_cpu();
 	pool = svc_pool_for_cpu(xprt->xpt_server, cpu);
-	put_cpu();
-
 	spin_lock_bh(&pool->sp_lock);
 
 	if (!list_empty(&pool->sp_threads) &&
@@ -382,10 +380,15 @@ static void svc_xprt_do_enqueue(struct svc_xprt *xprt)
 			printk(KERN_ERR
 				"svc_xprt_enqueue: server %p, rq_xprt=%p!\n",
 				rqstp, rqstp->rq_xprt);
-		rqstp->rq_xprt = xprt;
+		/* Note the order of the following 3 lines:
+		 * We want to assign xprt to rqstp->rq_xprt only _after_
+		 * we've woken up the process, so that we don't race with
+		 * the lockless check in svc_get_next_xprt().
+		 */
 		svc_xprt_get(xprt);
+		wake_up_process(rqstp->rq_task);
+		rqstp->rq_xprt = xprt;
 		pool->sp_stats.threads_woken++;
-		wake_up(&rqstp->rq_wait);
 	} else {
 		dprintk("svc: transport %p put into queue\n", xprt);
 		list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);
@@ -394,6 +397,7 @@ static void svc_xprt_do_enqueue(struct svc_xprt *xprt)
 
 out_unlock:
 	spin_unlock_bh(&pool->sp_lock);
+	put_cpu();
 }
 
 /*
@@ -509,7 +513,7 @@ void svc_wake_up(struct svc_serv *serv)
 			svc_thread_dequeue(pool, rqstp);
 			rqstp->rq_xprt = NULL;
 			 */
-			wake_up(&rqstp->rq_wait);
+			wake_up_process(rqstp->rq_task);
 		} else
 			pool->sp_task_pending = 1;
 		spin_unlock_bh(&pool->sp_lock);
@@ -628,7 +632,6 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
 {
 	struct svc_xprt *xprt;
 	struct svc_pool		*pool = rqstp->rq_pool;
-	DECLARE_WAITQUEUE(wait, current);
 	long			time_left;
 
 	/* Normally we will wait up to 5 seconds for any required
@@ -654,15 +657,15 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
 			xprt = ERR_PTR(-EAGAIN);
 			goto out;
 		}
-		/* No data pending. Go to sleep */
-		svc_thread_enqueue(pool, rqstp);
-
 		/*
 		 * We have to be able to interrupt this wait
 		 * to bring down the daemons ...
 		 */
 		set_current_state(TASK_INTERRUPTIBLE);
 
+		/* No data pending. Go to sleep */
+		svc_thread_enqueue(pool, rqstp);
+
 		/*
 		 * checking kthread_should_stop() here allows us to avoid
 		 * locking and signalling when stopping kthreads that call
@@ -676,14 +679,13 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
 			goto out;
 		}
 
-		add_wait_queue(&rqstp->rq_wait, &wait);
 		spin_unlock_bh(&pool->sp_lock);
 
 		time_left = schedule_timeout(timeout);
+		__set_current_state(TASK_RUNNING);
 
 		try_to_freeze();
 
-		remove_wait_queue(&rqstp->rq_wait, &wait);
 		xprt = rqstp->rq_xprt;
 		if (xprt != NULL)
 			return xprt;
@@ -786,10 +788,10 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
 		printk(KERN_ERR
 			"svc_recv: service %p, transport not NULL!\n",
 			 rqstp);
-	if (waitqueue_active(&rqstp->rq_wait))
-		printk(KERN_ERR
-			"svc_recv: service %p, wait queue active!\n",
-			 rqstp);
+
+	/* Make sure the task pointer is set! */
+	if (WARN_ON_ONCE(!rqstp->rq_task))
+		rqstp->rq_task = current_task;
 
 	err = svc_alloc_arg(rqstp);
 	if (err)
-- 
cgit v1.2.3


From 716845ebeb505353d900320b4a74e8330520410d Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Mon, 18 Aug 2014 10:34:08 +0800
Subject: regulator: core: Fix build error due to const qualifier for ops

Drop const qualifier for ops of struct regulator_desc.
Allow regulator drivers to update ops before registering regulator.

Fix below build error:
  CC [M]  drivers/regulator/mc13892-regulator.o
drivers/regulator/mc13892-regulator.c: In function 'mc13892_regulator_probe':
drivers/regulator/mc13892-regulator.c:586:3: error: assignment of member 'set_mode' in read-only object
drivers/regulator/mc13892-regulator.c:588:3: error: assignment of member 'get_mode' in read-only object
make[2]: *** [drivers/regulator/mc13892-regulator.o] Error 1
make[1]: *** [drivers/regulator] Error 2
make: *** [drivers] Error 2

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Axel Lin <axel.lin@ingics.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 include/linux/regulator/driver.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index efe058f8f746..3abda7554d82 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -246,7 +246,7 @@ struct regulator_desc {
 	int id;
 	bool continuous_voltage_range;
 	unsigned n_voltages;
-	const struct regulator_ops *ops;
+	struct regulator_ops *ops;
 	int irq;
 	enum regulator_type type;
 	struct module *owner;
-- 
cgit v1.2.3


From e5f81539f657af7e9f54ea37986fde8f92acef22 Mon Sep 17 00:00:00 2001
From: Feng Kan <fkan@apm.com>
Date: Wed, 30 Jul 2014 14:56:58 -0700
Subject: irqchip: gic: Replace hex numbers with defines.

This is to cleanup some hex numbers used in the code and replace
them with defines to make the code cleaner.

Signed-off-by: Feng Kan <fkan@apm.com>
Reviewed-by: Anup Patel <apatel@apm.com>
Link: https://lkml.kernel.org/r/1406757419-18729-2-git-send-email-fkan@apm.com
Signed-off-by: Jason Cooper <jason@lakedaemon.net>
---
 drivers/irqchip/irq-gic-common.c | 15 +++++++++------
 drivers/irqchip/irq-gic.c        | 25 +++++++++++++------------
 include/linux/irqchip/arm-gic.h  | 15 +++++++++++++++
 3 files changed, 37 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/irqchip/irq-gic-common.c b/drivers/irqchip/irq-gic-common.c
index 60ac704d2090..61541ff24397 100644
--- a/drivers/irqchip/irq-gic-common.c
+++ b/drivers/irqchip/irq-gic-common.c
@@ -74,20 +74,22 @@ void __init gic_dist_config(void __iomem *base, int gic_irqs,
 	 * Set all global interrupts to be level triggered, active low.
 	 */
 	for (i = 32; i < gic_irqs; i += 16)
-		writel_relaxed(0, base + GIC_DIST_CONFIG + i / 4);
+		writel_relaxed(GICD_INT_ACTLOW_LVLTRIG,
+					base + GIC_DIST_CONFIG + i / 4);
 
 	/*
 	 * Set priority on all global interrupts.
 	 */
 	for (i = 32; i < gic_irqs; i += 4)
-		writel_relaxed(0xa0a0a0a0, base + GIC_DIST_PRI + i);
+		writel_relaxed(GICD_INT_DEF_PRI_X4, base + GIC_DIST_PRI + i);
 
 	/*
 	 * Disable all interrupts.  Leave the PPI and SGIs alone
 	 * as they are enabled by redistributor registers.
 	 */
 	for (i = 32; i < gic_irqs; i += 32)
-		writel_relaxed(0xffffffff, base + GIC_DIST_ENABLE_CLEAR + i / 8);
+		writel_relaxed(GICD_INT_EN_CLR_X32,
+					base + GIC_DIST_ENABLE_CLEAR + i / 8);
 
 	if (sync_access)
 		sync_access();
@@ -101,14 +103,15 @@ void gic_cpu_config(void __iomem *base, void (*sync_access)(void))
 	 * Deal with the banked PPI and SGI interrupts - disable all
 	 * PPI interrupts, ensure all SGI interrupts are enabled.
 	 */
-	writel_relaxed(0xffff0000, base + GIC_DIST_ENABLE_CLEAR);
-	writel_relaxed(0x0000ffff, base + GIC_DIST_ENABLE_SET);
+	writel_relaxed(GICD_INT_EN_CLR_PPI, base + GIC_DIST_ENABLE_CLEAR);
+	writel_relaxed(GICD_INT_EN_SET_SGI, base + GIC_DIST_ENABLE_SET);
 
 	/*
 	 * Set priority on PPI and SGI interrupts
 	 */
 	for (i = 0; i < 32; i += 4)
-		writel_relaxed(0xa0a0a0a0, base + GIC_DIST_PRI + i * 4 / 4);
+		writel_relaxed(GICD_INT_DEF_PRI_X4,
+					base + GIC_DIST_PRI + i * 4 / 4);
 
 	if (sync_access)
 		sync_access();
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 4b959e606fe8..35847453cecb 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -298,8 +298,8 @@ static void gic_handle_cascade_irq(unsigned int irq, struct irq_desc *desc)
 	status = readl_relaxed(gic_data_cpu_base(chip_data) + GIC_CPU_INTACK);
 	raw_spin_unlock(&irq_controller_lock);
 
-	gic_irq = (status & 0x3ff);
-	if (gic_irq == 1023)
+	gic_irq = (status & GICC_IAR_INT_ID_MASK);
+	if (gic_irq == GICC_INT_SPURIOUS)
 		goto out;
 
 	cascade_irq = irq_find_mapping(chip_data->domain, gic_irq);
@@ -360,7 +360,7 @@ static void __init gic_dist_init(struct gic_chip_data *gic)
 	unsigned int gic_irqs = gic->gic_irqs;
 	void __iomem *base = gic_data_dist_base(gic);
 
-	writel_relaxed(0, base + GIC_DIST_CTRL);
+	writel_relaxed(GICD_DISABLE, base + GIC_DIST_CTRL);
 
 	/*
 	 * Set all global interrupts to this CPU only.
@@ -373,7 +373,7 @@ static void __init gic_dist_init(struct gic_chip_data *gic)
 
 	gic_dist_config(base, gic_irqs, NULL);
 
-	writel_relaxed(1, base + GIC_DIST_CTRL);
+	writel_relaxed(GICD_ENABLE, base + GIC_DIST_CTRL);
 }
 
 static void gic_cpu_init(struct gic_chip_data *gic)
@@ -400,8 +400,8 @@ static void gic_cpu_init(struct gic_chip_data *gic)
 
 	gic_cpu_config(dist_base, NULL);
 
-	writel_relaxed(0xf0, base + GIC_CPU_PRIMASK);
-	writel_relaxed(1, base + GIC_CPU_CTRL);
+	writel_relaxed(GICC_INT_PRI_THRESHOLD, base + GIC_CPU_PRIMASK);
+	writel_relaxed(GICC_ENABLE, base + GIC_CPU_CTRL);
 }
 
 void gic_cpu_if_down(void)
@@ -467,14 +467,14 @@ static void gic_dist_restore(unsigned int gic_nr)
 	if (!dist_base)
 		return;
 
-	writel_relaxed(0, dist_base + GIC_DIST_CTRL);
+	writel_relaxed(GICD_DISABLE, dist_base + GIC_DIST_CTRL);
 
 	for (i = 0; i < DIV_ROUND_UP(gic_irqs, 16); i++)
 		writel_relaxed(gic_data[gic_nr].saved_spi_conf[i],
 			dist_base + GIC_DIST_CONFIG + i * 4);
 
 	for (i = 0; i < DIV_ROUND_UP(gic_irqs, 4); i++)
-		writel_relaxed(0xa0a0a0a0,
+		writel_relaxed(GICD_INT_DEF_PRI_X4,
 			dist_base + GIC_DIST_PRI + i * 4);
 
 	for (i = 0; i < DIV_ROUND_UP(gic_irqs, 4); i++)
@@ -485,7 +485,7 @@ static void gic_dist_restore(unsigned int gic_nr)
 		writel_relaxed(gic_data[gic_nr].saved_spi_enable[i],
 			dist_base + GIC_DIST_ENABLE_SET + i * 4);
 
-	writel_relaxed(1, dist_base + GIC_DIST_CTRL);
+	writel_relaxed(GICD_ENABLE, dist_base + GIC_DIST_CTRL);
 }
 
 static void gic_cpu_save(unsigned int gic_nr)
@@ -539,10 +539,11 @@ static void gic_cpu_restore(unsigned int gic_nr)
 		writel_relaxed(ptr[i], dist_base + GIC_DIST_CONFIG + i * 4);
 
 	for (i = 0; i < DIV_ROUND_UP(32, 4); i++)
-		writel_relaxed(0xa0a0a0a0, dist_base + GIC_DIST_PRI + i * 4);
+		writel_relaxed(GICD_INT_DEF_PRI_X4,
+					dist_base + GIC_DIST_PRI + i * 4);
 
-	writel_relaxed(0xf0, cpu_base + GIC_CPU_PRIMASK);
-	writel_relaxed(1, cpu_base + GIC_CPU_CTRL);
+	writel_relaxed(GICC_INT_PRI_THRESHOLD, cpu_base + GIC_CPU_PRIMASK);
+	writel_relaxed(GICC_ENABLE, cpu_base + GIC_CPU_CTRL);
 }
 
 static int gic_notifier(struct notifier_block *self, unsigned long cmd,	void *v)
diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index 45e2d8c15bd2..5cb9d41af5be 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -21,7 +21,10 @@
 #define GIC_CPU_ACTIVEPRIO		0xd0
 #define GIC_CPU_IDENT			0xfc
 
+#define GICC_ENABLE			0x1
+#define GICC_INT_PRI_THRESHOLD		0xf0
 #define GICC_IAR_INT_ID_MASK		0x3ff
+#define GICC_INT_SPURIOUS		1023
 
 #define GIC_DIST_CTRL			0x000
 #define GIC_DIST_CTR			0x004
@@ -39,6 +42,18 @@
 #define GIC_DIST_SGI_PENDING_CLEAR	0xf10
 #define GIC_DIST_SGI_PENDING_SET	0xf20
 
+#define GICD_ENABLE			0x1
+#define GICD_DISABLE			0x0
+#define GICD_INT_ACTLOW_LVLTRIG		0x0
+#define GICD_INT_EN_CLR_X32		0xffffffff
+#define GICD_INT_EN_SET_SGI		0x0000ffff
+#define GICD_INT_EN_CLR_PPI		0xffff0000
+#define GICD_INT_DEF_PRI		0xa0
+#define GICD_INT_DEF_PRI_X4		((GICD_INT_DEF_PRI << 24) |\
+					(GICD_INT_DEF_PRI << 16) |\
+					(GICD_INT_DEF_PRI << 8) |\
+					GICD_INT_DEF_PRI)
+
 #define GICH_HCR			0x0
 #define GICH_VTR			0x4
 #define GICH_VMCR			0x8
-- 
cgit v1.2.3


From 3228950621d92f0f212378f95a6998ef3a1be0bb Mon Sep 17 00:00:00 2001
From: Feng Kan <fkan@apm.com>
Date: Wed, 30 Jul 2014 14:56:59 -0700
Subject: irqchip: gic: Preserve gic V2 bypass bits in cpu ctrl register

This change is made to preserve the GIC v2 bypass bits in the
GIC_CPU_CTRL register (also known as the GICC_CTLR register in spec).
This code will preserve all bits configured by the bootloader regarding
v2 bypass group bits. In the X-Gene platform, the bypass functionality
is not used and bypass bits should not be changed by the kernel gic
code as it could lead to incorrect behavior.

Signed-off-by: Feng Kan <fkan@apm.com>
Reviewed-by: Vinayak Kale <vkale@apm.com>
Reviewed-by: Anup Patel <apatel@apm.com>
Link: https://lkml.kernel.org/r/1406757419-18729-3-git-send-email-fkan@apm.com
Signed-off-by: Jason Cooper <jason@lakedaemon.net>
---
 drivers/irqchip/irq-gic.c       | 25 ++++++++++++++++++++++---
 include/linux/irqchip/arm-gic.h |  1 +
 2 files changed, 23 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 35847453cecb..2500f6ba29e1 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -353,6 +353,21 @@ static u8 gic_get_cpumask(struct gic_chip_data *gic)
 	return mask;
 }
 
+static void gic_cpu_if_up(void)
+{
+	void __iomem *cpu_base = gic_data_cpu_base(&gic_data[0]);
+	u32 bypass = 0;
+
+	/*
+	* Preserve bypass disable bits to be written back later
+	*/
+	bypass = readl(cpu_base + GIC_CPU_CTRL);
+	bypass &= GICC_DIS_BYPASS_MASK;
+
+	writel_relaxed(bypass | GICC_ENABLE, cpu_base + GIC_CPU_CTRL);
+}
+
+
 static void __init gic_dist_init(struct gic_chip_data *gic)
 {
 	unsigned int i;
@@ -401,13 +416,17 @@ static void gic_cpu_init(struct gic_chip_data *gic)
 	gic_cpu_config(dist_base, NULL);
 
 	writel_relaxed(GICC_INT_PRI_THRESHOLD, base + GIC_CPU_PRIMASK);
-	writel_relaxed(GICC_ENABLE, base + GIC_CPU_CTRL);
+	gic_cpu_if_up();
 }
 
 void gic_cpu_if_down(void)
 {
 	void __iomem *cpu_base = gic_data_cpu_base(&gic_data[0]);
-	writel_relaxed(0, cpu_base + GIC_CPU_CTRL);
+	u32 val = 0;
+
+	val = readl(cpu_base + GIC_CPU_CTRL);
+	val &= ~GICC_ENABLE;
+	writel_relaxed(val, cpu_base + GIC_CPU_CTRL);
 }
 
 #ifdef CONFIG_CPU_PM
@@ -543,7 +562,7 @@ static void gic_cpu_restore(unsigned int gic_nr)
 					dist_base + GIC_DIST_PRI + i * 4);
 
 	writel_relaxed(GICC_INT_PRI_THRESHOLD, cpu_base + GIC_CPU_PRIMASK);
-	writel_relaxed(GICC_ENABLE, cpu_base + GIC_CPU_CTRL);
+	gic_cpu_if_up();
 }
 
 static int gic_notifier(struct notifier_block *self, unsigned long cmd,	void *v)
diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index 5cb9d41af5be..13eed92c7d24 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -25,6 +25,7 @@
 #define GICC_INT_PRI_THRESHOLD		0xf0
 #define GICC_IAR_INT_ID_MASK		0x3ff
 #define GICC_INT_SPURIOUS		1023
+#define GICC_DIS_BYPASS_MASK		0x1e0
 
 #define GIC_DIST_CTRL			0x000
 #define GIC_DIST_CTR			0x004
-- 
cgit v1.2.3


From 366047515c6eab2ff886bc28d1c2b0ad041d040a Mon Sep 17 00:00:00 2001
From: Lan Tianyu <tianyu.lan@intel.com>
Date: Fri, 15 Aug 2014 13:38:59 +0800
Subject: i2c: rework kernel config I2C_ACPI

Commit da3c6647(I2C/ACPI: Clean up I2C ACPI code and Add CONFIG_I2C_ACPI
config) adds a new kernel config I2C_ACPI and make I2C core built in
when the config is selected. This is wrong because distributions
etc generally compile I2C as a module and the commit broken that.
This patch is to rename I2C_ACPI to ACPI_I2C_OPREGION. New config
only controls ACPI I2C operation region code and depends on I2C=y.

Signed-off-by: Lan Tianyu <tianyu.lan@intel.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
[wsa: removed unrelated change for Kconfig]
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/Kconfig    | 15 ++++++---------
 drivers/i2c/Makefile   |  2 +-
 drivers/i2c/i2c-acpi.c |  2 ++
 include/linux/i2c.h    | 12 ++++++++----
 4 files changed, 17 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/i2c/Kconfig b/drivers/i2c/Kconfig
index 3e3b680dc007..b51a402752c4 100644
--- a/drivers/i2c/Kconfig
+++ b/drivers/i2c/Kconfig
@@ -23,17 +23,14 @@ config I2C
 	  This I2C support can also be built as a module.  If so, the module
 	  will be called i2c-core.
 
-config I2C_ACPI
-	bool "I2C ACPI support"
-	select I2C
-	depends on ACPI
+config ACPI_I2C_OPREGION
+	bool "ACPI I2C Operation region support"
+	depends on I2C=y && ACPI
 	default y
 	help
-	  Say Y here if you want to enable ACPI I2C support. This includes support
-	  for automatic enumeration of I2C slave devices and support for ACPI I2C
-	  Operation Regions. Operation Regions allow firmware (BIOS) code to
-	  access I2C slave devices, such as smart batteries through an I2C host
-	  controller driver.
+	  Say Y here if you want to enable ACPI I2C operation region support.
+	  Operation Regions allow firmware (BIOS) code to access I2C slave devices,
+	  such as smart batteries through an I2C host controller driver.
 
 if I2C
 
diff --git a/drivers/i2c/Makefile b/drivers/i2c/Makefile
index a1f590cbb435..e0228b228256 100644
--- a/drivers/i2c/Makefile
+++ b/drivers/i2c/Makefile
@@ -3,7 +3,7 @@
 #
 
 i2ccore-y := i2c-core.o
-i2ccore-$(CONFIG_I2C_ACPI)	+= i2c-acpi.o
+i2ccore-$(CONFIG_ACPI)	 	+= i2c-acpi.o
 
 obj-$(CONFIG_I2C_BOARDINFO)	+= i2c-boardinfo.o
 obj-$(CONFIG_I2C)		+= i2ccore.o
diff --git a/drivers/i2c/i2c-acpi.c b/drivers/i2c/i2c-acpi.c
index e8b61967334b..0dbc18c15c43 100644
--- a/drivers/i2c/i2c-acpi.c
+++ b/drivers/i2c/i2c-acpi.c
@@ -126,6 +126,7 @@ void acpi_i2c_register_devices(struct i2c_adapter *adap)
 		dev_warn(&adap->dev, "failed to enumerate I2C slaves\n");
 }
 
+#ifdef CONFIG_ACPI_I2C_OPREGION
 static int acpi_gsb_i2c_read_bytes(struct i2c_client *client,
 		u8 cmd, u8 *data, u8 data_len)
 {
@@ -360,3 +361,4 @@ void acpi_i2c_remove_space_handler(struct i2c_adapter *adapter)
 
 	acpi_bus_detach_private_data(handle);
 }
+#endif
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index ea507665896c..a95efeb53a8b 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -577,16 +577,20 @@ static inline struct i2c_adapter *of_find_i2c_adapter_by_node(struct device_node
 }
 #endif /* CONFIG_OF */
 
-#ifdef CONFIG_I2C_ACPI
-int acpi_i2c_install_space_handler(struct i2c_adapter *adapter);
-void acpi_i2c_remove_space_handler(struct i2c_adapter *adapter);
+#ifdef CONFIG_ACPI
 void acpi_i2c_register_devices(struct i2c_adapter *adap);
 #else
 static inline void acpi_i2c_register_devices(struct i2c_adapter *adap) { }
+#endif /* CONFIG_ACPI */
+
+#ifdef CONFIG_ACPI_I2C_OPREGION
+int acpi_i2c_install_space_handler(struct i2c_adapter *adapter);
+void acpi_i2c_remove_space_handler(struct i2c_adapter *adapter);
+#else
 static inline void acpi_i2c_remove_space_handler(struct i2c_adapter *adapter)
 { }
 static inline int acpi_i2c_install_space_handler(struct i2c_adapter *adapter)
 { return 0; }
-#endif
+#endif /* CONFIG_ACPI_I2C_OPREGION */
 
 #endif /* _LINUX_I2C_H */
-- 
cgit v1.2.3


From 7b7d8982f0169d5ac67c6c2877449fb7f6968cac Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sun, 27 Jul 2014 14:31:53 -0700
Subject: mtd: fix linux/mtd/nand.h kernel-doc warning

Fix kernel-doc warning in <linux/mtd/nand.h>:

Warning(..//include/linux/mtd/nand.h:795): No description found for parameter 'ecc'

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc:	David Woodhouse <dwmw2@infradead.org>
Cc:	Brian Norris <computersforpeace@gmail.com>
Cc:	linux-mtd@lists.infradead.org
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 include/linux/mtd/nand.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 3083c53e0270..b7c11991cb09 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -766,6 +766,7 @@ struct nand_chip {
  * @options: stores various chip bit options
  * @id_len: The valid length of the @id.
  * @oobsize: OOB size
+ * @ecc: ECC correctability and step information from the datasheet.
  * @ecc.strength_ds: The ECC correctability from the datasheet, same as the
  *                   @ecc_strength_ds in nand_chip{}.
  * @ecc.step_ds: The ECC step required by the @ecc.strength_ds, same as the
-- 
cgit v1.2.3


From 31f754628cbb12c983600f22d9f0fed50dfe2134 Mon Sep 17 00:00:00 2001
From: Brian Norris <computersforpeace@gmail.com>
Date: Mon, 21 Jul 2014 19:07:22 -0700
Subject: mtd: use __packed shorthand

Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/mtdswap.c        |  2 +-
 drivers/mtd/nand/sm_common.h |  2 +-
 include/linux/mtd/cfi.h      | 22 +++++++++++-----------
 3 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdswap.c b/drivers/mtd/mtdswap.c
index 48cf6f98df44..fc8b3d16cce7 100644
--- a/drivers/mtd/mtdswap.c
+++ b/drivers/mtd/mtdswap.c
@@ -145,7 +145,7 @@ struct mtdswap_dev {
 struct mtdswap_oobdata {
 	__le16 magic;
 	__le32 count;
-} __attribute__((packed));
+} __packed;
 
 #define MTDSWAP_MAGIC_CLEAN	0x2095
 #define MTDSWAP_MAGIC_DIRTY	(MTDSWAP_MAGIC_CLEAN + 1)
diff --git a/drivers/mtd/nand/sm_common.h b/drivers/mtd/nand/sm_common.h
index 00f4a83359b2..d3e028e58b0f 100644
--- a/drivers/mtd/nand/sm_common.h
+++ b/drivers/mtd/nand/sm_common.h
@@ -18,7 +18,7 @@ struct sm_oob {
 	uint8_t ecc2[3];
 	uint8_t lba_copy2[2];
 	uint8_t ecc1[3];
-} __attribute__((packed));
+} __packed;
 
 
 /* one sector is always 512 bytes, but it can consist of two nand pages */
diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h
index 37ef6b194089..299d7d31fe53 100644
--- a/include/linux/mtd/cfi.h
+++ b/include/linux/mtd/cfi.h
@@ -153,7 +153,7 @@ struct cfi_ident {
 	uint16_t MaxBufWriteSize;
 	uint8_t  NumEraseRegions;
 	uint32_t EraseRegionInfo[0]; /* Not host ordered */
-} __attribute__((packed));
+} __packed;
 
 /* Extended Query Structure for both PRI and ALT */
 
@@ -161,7 +161,7 @@ struct cfi_extquery {
 	uint8_t  pri[3];
 	uint8_t  MajorVersion;
 	uint8_t  MinorVersion;
-} __attribute__((packed));
+} __packed;
 
 /* Vendor-Specific PRI for Intel/Sharp Extended Command Set (0x0001) */
 
@@ -180,7 +180,7 @@ struct cfi_pri_intelext {
 	uint8_t  FactProtRegSize;
 	uint8_t  UserProtRegSize;
 	uint8_t  extra[0];
-} __attribute__((packed));
+} __packed;
 
 struct cfi_intelext_otpinfo {
 	uint32_t ProtRegAddr;
@@ -188,7 +188,7 @@ struct cfi_intelext_otpinfo {
 	uint8_t  FactProtRegSize;
 	uint16_t UserGroups;
 	uint8_t  UserProtRegSize;
-} __attribute__((packed));
+} __packed;
 
 struct cfi_intelext_blockinfo {
 	uint16_t NumIdentBlocks;
@@ -196,7 +196,7 @@ struct cfi_intelext_blockinfo {
 	uint16_t MinBlockEraseCycles;
 	uint8_t  BitsPerCell;
 	uint8_t  BlockCap;
-} __attribute__((packed));
+} __packed;
 
 struct cfi_intelext_regioninfo {
 	uint16_t NumIdentPartitions;
@@ -205,7 +205,7 @@ struct cfi_intelext_regioninfo {
 	uint8_t  NumOpAllowedSimEraMode;
 	uint8_t  NumBlockTypes;
 	struct cfi_intelext_blockinfo BlockTypes[1];
-} __attribute__((packed));
+} __packed;
 
 struct cfi_intelext_programming_regioninfo {
 	uint8_t  ProgRegShift;
@@ -214,7 +214,7 @@ struct cfi_intelext_programming_regioninfo {
 	uint8_t  Reserved2;
 	uint8_t  ControlInvalid;
 	uint8_t  Reserved3;
-} __attribute__((packed));
+} __packed;
 
 /* Vendor-Specific PRI for AMD/Fujitsu Extended Command Set (0x0002) */
 
@@ -233,7 +233,7 @@ struct cfi_pri_amdstd {
 	uint8_t  VppMin;
 	uint8_t  VppMax;
 	uint8_t  TopBottom;
-} __attribute__((packed));
+} __packed;
 
 /* Vendor-Specific PRI for Atmel chips (command set 0x0002) */
 
@@ -245,18 +245,18 @@ struct cfi_pri_atmel {
 	uint8_t BottomBoot;
 	uint8_t BurstMode;
 	uint8_t PageMode;
-} __attribute__((packed));
+} __packed;
 
 struct cfi_pri_query {
 	uint8_t  NumFields;
 	uint32_t ProtField[1]; /* Not host ordered */
-} __attribute__((packed));
+} __packed;
 
 struct cfi_bri_query {
 	uint8_t  PageModeReadCap;
 	uint8_t  NumFields;
 	uint32_t ConfField[1]; /* Not host ordered */
-} __attribute__((packed));
+} __packed;
 
 #define P_ID_NONE               0x0000
 #define P_ID_INTEL_EXT          0x0001
-- 
cgit v1.2.3


From b25046b1e5e3f1423434da77ccc859f2f779d1ce Mon Sep 17 00:00:00 2001
From: Boris BREZILLON <boris.brezillon@free-electrons.com>
Date: Sun, 17 Aug 2014 10:29:42 +0200
Subject: mtd: nand: fix DocBook warnings on nand_sdr_timings doc

Change the comment type (from /** to /*) to prevent DocBook from
complaining about missing description for nand_sdr_timings fields.

There is currently no need in documenting those fields because they are
fully described in the ONFI specification (which is pointed out in the
comment).

Signed-off-by: Boris BREZILLON <boris.brezillon@free-electrons.com>
Reported-by: Randy Dunlap <rdunlap@infradead.org>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Tested-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 include/linux/mtd/nand.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 3083c53e0270..c300db3ae285 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -949,7 +949,7 @@ static inline int jedec_feature(struct nand_chip *chip)
 		: 0;
 }
 
-/**
+/*
  * struct nand_sdr_timings - SDR NAND chip timings
  *
  * This struct defines the timing requirements of a SDR NAND chip.
-- 
cgit v1.2.3


From 7132fe4f568721cbd5d9bce5a8a71556e9bc45b4 Mon Sep 17 00:00:00 2001
From: Dan Murphy <dmurphy@ti.com>
Date: Sun, 17 Aug 2014 09:24:26 -0700
Subject: Input: drv260x - add TI drv260x haptics driver

Add the TI drv260x haptics/vibrator driver.  This device uses the input
force feedback to produce a wave form to driver an ERM or LRA actuator
device.

The initial driver supports the devices real time playback mode.  But the
device has additional wave patterns in ROM. This functionality will be
added in future patchsets.

Product data sheet is located here: http://www.ti.com/product/drv2605

Signed-off-by: Dan Murphy <dmurphy@ti.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 .../devicetree/bindings/input/ti,drv260x.txt       |  51 ++
 drivers/input/misc/Kconfig                         |  11 +
 drivers/input/misc/Makefile                        |   1 +
 drivers/input/misc/drv260x.c                       | 738 +++++++++++++++++++++
 include/dt-bindings/input/ti-drv260x.h             |  36 +
 include/linux/platform_data/drv260x-pdata.h        |  28 +
 6 files changed, 865 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/input/ti,drv260x.txt
 create mode 100644 drivers/input/misc/drv260x.c
 create mode 100644 include/dt-bindings/input/ti-drv260x.h
 create mode 100644 include/linux/platform_data/drv260x-pdata.h

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/input/ti,drv260x.txt b/Documentation/devicetree/bindings/input/ti,drv260x.txt
new file mode 100644
index 000000000000..a9c8519eb9de
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/ti,drv260x.txt
@@ -0,0 +1,51 @@
+Texas Instruments - drv260x Haptics driver family
+
+The drv260x family serial control bus communicates through I2C protocols
+
+Required properties:
+	- compatible - One of:
+		"ti,drv2604" - DRV2604
+		"ti,drv2605" - DRV2605
+		"ti,drv2605l" - DRV2605L
+	- reg -  I2C slave address
+	- vbat-supply - Required supply regulator
+	- mode - Power up mode of the chip (defined in include/dt-bindings/input/ti-drv260x.h)
+		DRV260X_LRA_MODE - Linear Resonance Actuator mode (Piezoelectric)
+		DRV260X_LRA_NO_CAL_MODE - This is a LRA Mode but there is no calibration
+				sequence during init.  And the device is configured for real
+				time playback mode (RTP mode).
+		DRV260X_ERM_MODE - Eccentric Rotating Mass mode (Rotary vibrator)
+	- library-sel - These are ROM based waveforms pre-programmed into the IC.
+				This should be set to set the library to use at power up.
+				(defined in include/dt-bindings/input/ti-drv260x.h)
+		DRV260X_LIB_EMPTY - Do not use a pre-programmed library
+		DRV260X_ERM_LIB_A - Pre-programmed Library
+		DRV260X_ERM_LIB_B - Pre-programmed Library
+		DRV260X_ERM_LIB_C - Pre-programmed Library
+		DRV260X_ERM_LIB_D - Pre-programmed Library
+		DRV260X_ERM_LIB_E - Pre-programmed Library
+		DRV260X_ERM_LIB_F - Pre-programmed Library
+		DRV260X_LIB_LRA - Pre-programmed LRA Library
+
+Optional properties:
+	- enable-gpio - gpio pin to enable/disable the device.
+	- vib-rated-mv - The rated voltage of the actuator in millivolts.
+			  If this is not set then the value will be defaulted to
+			  3.2 v.
+	- vib-overdrive-mv - The overdrive voltage of the actuator in millivolts.
+			  If this is not set then the value will be defaulted to
+			  3.2 v.
+Example:
+
+drv2605l: drv2605l@5a {
+		compatible = "ti,drv2605l";
+		reg = <0x5a>;
+		enable-gpio = <&gpio1 28 GPIO_ACTIVE_HIGH>;
+		mode = <DRV260X_LRA_MODE>;
+		library-sel = <DRV260X_LIB_LRA>;
+		vib-rated-mv = <3200>;
+		vib-overdriver-mv = <3200>;
+};
+
+For more product information please see the link below:
+http://www.ti.com/product/drv2605
diff --git a/drivers/input/misc/Kconfig b/drivers/input/misc/Kconfig
index 2ff4425a893b..41d0ae62ba05 100644
--- a/drivers/input/misc/Kconfig
+++ b/drivers/input/misc/Kconfig
@@ -676,4 +676,15 @@ config INPUT_SOC_BUTTON_ARRAY
 	  To compile this driver as a module, choose M here: the
 	  module will be called soc_button_array.
 
+config INPUT_DRV260X_HAPTICS
+	tristate "TI DRV260X haptics support"
+	depends on INPUT && I2C && GPIOLIB
+	select INPUT_FF_MEMLESS
+	select REGMAP_I2C
+	help
+	  Say Y to enable support for the TI DRV260X haptics driver.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called drv260x-haptics.
+
 endif
diff --git a/drivers/input/misc/Makefile b/drivers/input/misc/Makefile
index 4955ad322a01..cd4bc2d3474f 100644
--- a/drivers/input/misc/Makefile
+++ b/drivers/input/misc/Makefile
@@ -26,6 +26,7 @@ obj-$(CONFIG_INPUT_COBALT_BTNS)		+= cobalt_btns.o
 obj-$(CONFIG_INPUT_DA9052_ONKEY)	+= da9052_onkey.o
 obj-$(CONFIG_INPUT_DA9055_ONKEY)	+= da9055_onkey.o
 obj-$(CONFIG_INPUT_DM355EVM)		+= dm355evm_keys.o
+obj-$(CONFIG_INPUT_DRV260X_HAPTICS)	+= drv260x.o
 obj-$(CONFIG_INPUT_GP2A)		+= gp2ap002a00f.o
 obj-$(CONFIG_INPUT_GPIO_BEEPER)		+= gpio-beeper.o
 obj-$(CONFIG_INPUT_GPIO_TILT_POLLED)	+= gpio_tilt_polled.o
diff --git a/drivers/input/misc/drv260x.c b/drivers/input/misc/drv260x.c
new file mode 100644
index 000000000000..e90e3b8189f7
--- /dev/null
+++ b/drivers/input/misc/drv260x.c
@@ -0,0 +1,738 @@
+/*
+ * DRV260X haptics driver family
+ *
+ * Author: Dan Murphy <dmurphy@ti.com>
+ *
+ * Copyright:   (C) 2014 Texas Instruments, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#include <linux/i2c.h>
+#include <linux/input.h>
+#include <linux/module.h>
+#include <linux/of_gpio.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/gpio/consumer.h>
+#include <linux/regulator/consumer.h>
+
+#include <dt-bindings/input/ti-drv260x.h>
+#include <linux/platform_data/drv260x-pdata.h>
+
+#define DRV260X_STATUS		0x0
+#define DRV260X_MODE		0x1
+#define DRV260X_RT_PB_IN	0x2
+#define DRV260X_LIB_SEL		0x3
+#define DRV260X_WV_SEQ_1	0x4
+#define DRV260X_WV_SEQ_2	0x5
+#define DRV260X_WV_SEQ_3	0x6
+#define DRV260X_WV_SEQ_4	0x7
+#define DRV260X_WV_SEQ_5	0x8
+#define DRV260X_WV_SEQ_6	0x9
+#define DRV260X_WV_SEQ_7	0xa
+#define DRV260X_WV_SEQ_8	0xb
+#define DRV260X_GO				0xc
+#define DRV260X_OVERDRIVE_OFF	0xd
+#define DRV260X_SUSTAIN_P_OFF	0xe
+#define DRV260X_SUSTAIN_N_OFF	0xf
+#define DRV260X_BRAKE_OFF		0x10
+#define DRV260X_A_TO_V_CTRL		0x11
+#define DRV260X_A_TO_V_MIN_INPUT	0x12
+#define DRV260X_A_TO_V_MAX_INPUT	0x13
+#define DRV260X_A_TO_V_MIN_OUT	0x14
+#define DRV260X_A_TO_V_MAX_OUT	0x15
+#define DRV260X_RATED_VOLT		0x16
+#define DRV260X_OD_CLAMP_VOLT	0x17
+#define DRV260X_CAL_COMP		0x18
+#define DRV260X_CAL_BACK_EMF	0x19
+#define DRV260X_FEEDBACK_CTRL	0x1a
+#define DRV260X_CTRL1			0x1b
+#define DRV260X_CTRL2			0x1c
+#define DRV260X_CTRL3			0x1d
+#define DRV260X_CTRL4			0x1e
+#define DRV260X_CTRL5			0x1f
+#define DRV260X_LRA_LOOP_PERIOD	0x20
+#define DRV260X_VBAT_MON		0x21
+#define DRV260X_LRA_RES_PERIOD	0x22
+#define DRV260X_MAX_REG			0x23
+
+#define DRV260X_ALLOWED_R_BYTES	25
+#define DRV260X_ALLOWED_W_BYTES	2
+#define DRV260X_MAX_RW_RETRIES	5
+#define DRV260X_I2C_RETRY_DELAY 10
+
+#define DRV260X_GO_BIT				0x01
+
+/* Library Selection */
+#define DRV260X_LIB_SEL_MASK		0x07
+#define DRV260X_LIB_SEL_RAM			0x0
+#define DRV260X_LIB_SEL_OD			0x1
+#define DRV260X_LIB_SEL_40_60		0x2
+#define DRV260X_LIB_SEL_60_80		0x3
+#define DRV260X_LIB_SEL_100_140		0x4
+#define DRV260X_LIB_SEL_140_PLUS	0x5
+
+#define DRV260X_LIB_SEL_HIZ_MASK	0x10
+#define DRV260X_LIB_SEL_HIZ_EN		0x01
+#define DRV260X_LIB_SEL_HIZ_DIS		0
+
+/* Mode register */
+#define DRV260X_STANDBY				(1 << 6)
+#define DRV260X_STANDBY_MASK		0x40
+#define DRV260X_INTERNAL_TRIGGER	0x00
+#define DRV260X_EXT_TRIGGER_EDGE	0x01
+#define DRV260X_EXT_TRIGGER_LEVEL	0x02
+#define DRV260X_PWM_ANALOG_IN		0x03
+#define DRV260X_AUDIOHAPTIC			0x04
+#define DRV260X_RT_PLAYBACK			0x05
+#define DRV260X_DIAGNOSTICS			0x06
+#define DRV260X_AUTO_CAL			0x07
+
+/* Audio to Haptics Control */
+#define DRV260X_AUDIO_HAPTICS_PEAK_10MS		(0 << 2)
+#define DRV260X_AUDIO_HAPTICS_PEAK_20MS		(1 << 2)
+#define DRV260X_AUDIO_HAPTICS_PEAK_30MS		(2 << 2)
+#define DRV260X_AUDIO_HAPTICS_PEAK_40MS		(3 << 2)
+
+#define DRV260X_AUDIO_HAPTICS_FILTER_100HZ	0x00
+#define DRV260X_AUDIO_HAPTICS_FILTER_125HZ	0x01
+#define DRV260X_AUDIO_HAPTICS_FILTER_150HZ	0x02
+#define DRV260X_AUDIO_HAPTICS_FILTER_200HZ	0x03
+
+/* Min/Max Input/Output Voltages */
+#define DRV260X_AUDIO_HAPTICS_MIN_IN_VOLT	0x19
+#define DRV260X_AUDIO_HAPTICS_MAX_IN_VOLT	0x64
+#define DRV260X_AUDIO_HAPTICS_MIN_OUT_VOLT	0x19
+#define DRV260X_AUDIO_HAPTICS_MAX_OUT_VOLT	0xFF
+
+/* Feedback register */
+#define DRV260X_FB_REG_ERM_MODE			0x7f
+#define DRV260X_FB_REG_LRA_MODE			(1 << 7)
+
+#define DRV260X_BRAKE_FACTOR_MASK	0x1f
+#define DRV260X_BRAKE_FACTOR_2X		(1 << 0)
+#define DRV260X_BRAKE_FACTOR_3X		(2 << 4)
+#define DRV260X_BRAKE_FACTOR_4X		(3 << 4)
+#define DRV260X_BRAKE_FACTOR_6X		(4 << 4)
+#define DRV260X_BRAKE_FACTOR_8X		(5 << 4)
+#define DRV260X_BRAKE_FACTOR_16		(6 << 4)
+#define DRV260X_BRAKE_FACTOR_DIS	(7 << 4)
+
+#define DRV260X_LOOP_GAIN_LOW		0xf3
+#define DRV260X_LOOP_GAIN_MED		(1 << 2)
+#define DRV260X_LOOP_GAIN_HIGH		(2 << 2)
+#define DRV260X_LOOP_GAIN_VERY_HIGH	(3 << 2)
+
+#define DRV260X_BEMF_GAIN_0			0xfc
+#define DRV260X_BEMF_GAIN_1		(1 << 0)
+#define DRV260X_BEMF_GAIN_2		(2 << 0)
+#define DRV260X_BEMF_GAIN_3		(3 << 0)
+
+/* Control 1 register */
+#define DRV260X_AC_CPLE_EN			(1 << 5)
+#define DRV260X_STARTUP_BOOST		(1 << 7)
+
+/* Control 2 register */
+
+#define DRV260X_IDISS_TIME_45		0
+#define DRV260X_IDISS_TIME_75		(1 << 0)
+#define DRV260X_IDISS_TIME_150		(1 << 1)
+#define DRV260X_IDISS_TIME_225		0x03
+
+#define DRV260X_BLANK_TIME_45	(0 << 2)
+#define DRV260X_BLANK_TIME_75	(1 << 2)
+#define DRV260X_BLANK_TIME_150	(2 << 2)
+#define DRV260X_BLANK_TIME_225	(3 << 2)
+
+#define DRV260X_SAMP_TIME_150	(0 << 4)
+#define DRV260X_SAMP_TIME_200	(1 << 4)
+#define DRV260X_SAMP_TIME_250	(2 << 4)
+#define DRV260X_SAMP_TIME_300	(3 << 4)
+
+#define DRV260X_BRAKE_STABILIZER	(1 << 6)
+#define DRV260X_UNIDIR_IN			(0 << 7)
+#define DRV260X_BIDIR_IN			(1 << 7)
+
+/* Control 3 Register */
+#define DRV260X_LRA_OPEN_LOOP		(1 << 0)
+#define DRV260X_ANANLOG_IN			(1 << 1)
+#define DRV260X_LRA_DRV_MODE		(1 << 2)
+#define DRV260X_RTP_UNSIGNED_DATA	(1 << 3)
+#define DRV260X_SUPPLY_COMP_DIS		(1 << 4)
+#define DRV260X_ERM_OPEN_LOOP		(1 << 5)
+#define DRV260X_NG_THRESH_0			(0 << 6)
+#define DRV260X_NG_THRESH_2			(1 << 6)
+#define DRV260X_NG_THRESH_4			(2 << 6)
+#define DRV260X_NG_THRESH_8			(3 << 6)
+
+/* Control 4 Register */
+#define DRV260X_AUTOCAL_TIME_150MS		(0 << 4)
+#define DRV260X_AUTOCAL_TIME_250MS		(1 << 4)
+#define DRV260X_AUTOCAL_TIME_500MS		(2 << 4)
+#define DRV260X_AUTOCAL_TIME_1000MS		(3 << 4)
+
+/**
+ * struct drv260x_data -
+ * @input_dev - Pointer to the input device
+ * @client - Pointer to the I2C client
+ * @regmap - Register map of the device
+ * @work - Work item used to off load the enable/disable of the vibration
+ * @enable_gpio - Pointer to the gpio used for enable/disabling
+ * @regulator - Pointer to the regulator for the IC
+ * @magnitude - Magnitude of the vibration event
+ * @mode - The operating mode of the IC (LRA_NO_CAL, ERM or LRA)
+ * @library - The vibration library to be used
+ * @rated_voltage - The rated_voltage of the actuator
+ * @overdriver_voltage - The over drive voltage of the actuator
+**/
+struct drv260x_data {
+	struct input_dev *input_dev;
+	struct i2c_client *client;
+	struct regmap *regmap;
+	struct work_struct work;
+	struct gpio_desc *enable_gpio;
+	struct regulator *regulator;
+	u32 magnitude;
+	u32 mode;
+	u32 library;
+	int rated_voltage;
+	int overdrive_voltage;
+};
+
+static struct reg_default drv260x_reg_defs[] = {
+	{ DRV260X_STATUS, 0xe0 },
+	{ DRV260X_MODE, 0x40 },
+	{ DRV260X_RT_PB_IN, 0x00 },
+	{ DRV260X_LIB_SEL, 0x00 },
+	{ DRV260X_WV_SEQ_1, 0x01 },
+	{ DRV260X_WV_SEQ_2, 0x00 },
+	{ DRV260X_WV_SEQ_3, 0x00 },
+	{ DRV260X_WV_SEQ_4, 0x00 },
+	{ DRV260X_WV_SEQ_5, 0x00 },
+	{ DRV260X_WV_SEQ_6, 0x00 },
+	{ DRV260X_WV_SEQ_7, 0x00 },
+	{ DRV260X_WV_SEQ_8, 0x00 },
+	{ DRV260X_GO, 0x00 },
+	{ DRV260X_OVERDRIVE_OFF, 0x00 },
+	{ DRV260X_SUSTAIN_P_OFF, 0x00 },
+	{ DRV260X_SUSTAIN_N_OFF, 0x00 },
+	{ DRV260X_BRAKE_OFF, 0x00 },
+	{ DRV260X_A_TO_V_CTRL, 0x05 },
+	{ DRV260X_A_TO_V_MIN_INPUT, 0x19 },
+	{ DRV260X_A_TO_V_MAX_INPUT, 0xff },
+	{ DRV260X_A_TO_V_MIN_OUT, 0x19 },
+	{ DRV260X_A_TO_V_MAX_OUT, 0xff },
+	{ DRV260X_RATED_VOLT, 0x3e },
+	{ DRV260X_OD_CLAMP_VOLT, 0x8c },
+	{ DRV260X_CAL_COMP, 0x0c },
+	{ DRV260X_CAL_BACK_EMF, 0x6c },
+	{ DRV260X_FEEDBACK_CTRL, 0x36 },
+	{ DRV260X_CTRL1, 0x93 },
+	{ DRV260X_CTRL2, 0xfa },
+	{ DRV260X_CTRL3, 0xa0 },
+	{ DRV260X_CTRL4, 0x20 },
+	{ DRV260X_CTRL5, 0x80 },
+	{ DRV260X_LRA_LOOP_PERIOD, 0x33 },
+	{ DRV260X_VBAT_MON, 0x00 },
+	{ DRV260X_LRA_RES_PERIOD, 0x00 },
+};
+
+#define DRV260X_DEF_RATED_VOLT		0x90
+#define DRV260X_DEF_OD_CLAMP_VOLT	0x90
+
+/**
+ * Rated and Overdriver Voltages:
+ * Calculated using the formula r = v * 255 / 5.6
+ * where r is what will be written to the register
+ * and v is the rated or overdriver voltage of the actuator
+ **/
+static int drv260x_calculate_voltage(unsigned int voltage)
+{
+	return (voltage * 255 / 5600);
+}
+
+static void drv260x_worker(struct work_struct *work)
+{
+	struct drv260x_data *haptics = container_of(work, struct drv260x_data, work);
+	int error;
+
+	gpiod_set_value(haptics->enable_gpio, 1);
+	/* Data sheet says to wait 250us before trying to communicate */
+	udelay(250);
+
+	error = regmap_write(haptics->regmap,
+			     DRV260X_MODE, DRV260X_RT_PLAYBACK);
+	if (error) {
+		dev_err(&haptics->client->dev,
+			"Failed to write set mode: %d\n", error);
+	} else {
+		error = regmap_write(haptics->regmap,
+				     DRV260X_RT_PB_IN, haptics->magnitude);
+		if (error)
+			dev_err(&haptics->client->dev,
+				"Failed to set magnitude: %d\n", error);
+	}
+}
+
+static int drv260x_haptics_play(struct input_dev *input, void *data,
+				struct ff_effect *effect)
+{
+	struct drv260x_data *haptics = input_get_drvdata(input);
+
+	haptics->mode = DRV260X_LRA_NO_CAL_MODE;
+
+	if (effect->u.rumble.strong_magnitude > 0)
+		haptics->magnitude = effect->u.rumble.strong_magnitude;
+	else if (effect->u.rumble.weak_magnitude > 0)
+		haptics->magnitude = effect->u.rumble.weak_magnitude;
+	else
+		haptics->magnitude = 0;
+
+	schedule_work(&haptics->work);
+
+	return 0;
+}
+
+static void drv260x_close(struct input_dev *input)
+{
+	struct drv260x_data *haptics = input_get_drvdata(input);
+	int error;
+
+	cancel_work_sync(&haptics->work);
+
+	error = regmap_write(haptics->regmap, DRV260X_MODE, DRV260X_STANDBY);
+	if (error)
+		dev_err(&haptics->client->dev,
+			"Failed to enter standby mode: %d\n", error);
+
+	gpiod_set_value(haptics->enable_gpio, 0);
+}
+
+static const struct reg_default drv260x_lra_cal_regs[] = {
+	{ DRV260X_MODE, DRV260X_AUTO_CAL },
+	{ DRV260X_CTRL3, DRV260X_NG_THRESH_2 },
+	{ DRV260X_FEEDBACK_CTRL, DRV260X_FB_REG_LRA_MODE |
+		DRV260X_BRAKE_FACTOR_4X | DRV260X_LOOP_GAIN_HIGH },
+};
+
+static const struct reg_default drv260x_lra_init_regs[] = {
+	{ DRV260X_MODE, DRV260X_RT_PLAYBACK },
+	{ DRV260X_A_TO_V_CTRL, DRV260X_AUDIO_HAPTICS_PEAK_20MS |
+		DRV260X_AUDIO_HAPTICS_FILTER_125HZ },
+	{ DRV260X_A_TO_V_MIN_INPUT, DRV260X_AUDIO_HAPTICS_MIN_IN_VOLT },
+	{ DRV260X_A_TO_V_MAX_INPUT, DRV260X_AUDIO_HAPTICS_MAX_IN_VOLT },
+	{ DRV260X_A_TO_V_MIN_OUT, DRV260X_AUDIO_HAPTICS_MIN_OUT_VOLT },
+	{ DRV260X_A_TO_V_MAX_OUT, DRV260X_AUDIO_HAPTICS_MAX_OUT_VOLT },
+	{ DRV260X_FEEDBACK_CTRL, DRV260X_FB_REG_LRA_MODE |
+		DRV260X_BRAKE_FACTOR_2X | DRV260X_LOOP_GAIN_MED |
+		DRV260X_BEMF_GAIN_3 },
+	{ DRV260X_CTRL1, DRV260X_STARTUP_BOOST },
+	{ DRV260X_CTRL2, DRV260X_SAMP_TIME_250 },
+	{ DRV260X_CTRL3, DRV260X_NG_THRESH_2 | DRV260X_ANANLOG_IN },
+	{ DRV260X_CTRL4, DRV260X_AUTOCAL_TIME_500MS },
+};
+
+static const struct reg_default drv260x_erm_cal_regs[] = {
+	{ DRV260X_MODE, DRV260X_AUTO_CAL },
+	{ DRV260X_A_TO_V_MIN_INPUT, DRV260X_AUDIO_HAPTICS_MIN_IN_VOLT },
+	{ DRV260X_A_TO_V_MAX_INPUT, DRV260X_AUDIO_HAPTICS_MAX_IN_VOLT },
+	{ DRV260X_A_TO_V_MIN_OUT, DRV260X_AUDIO_HAPTICS_MIN_OUT_VOLT },
+	{ DRV260X_A_TO_V_MAX_OUT, DRV260X_AUDIO_HAPTICS_MAX_OUT_VOLT },
+	{ DRV260X_FEEDBACK_CTRL, DRV260X_BRAKE_FACTOR_3X |
+		DRV260X_LOOP_GAIN_MED | DRV260X_BEMF_GAIN_2 },
+	{ DRV260X_CTRL1, DRV260X_STARTUP_BOOST },
+	{ DRV260X_CTRL2, DRV260X_SAMP_TIME_250 | DRV260X_BLANK_TIME_75 |
+		DRV260X_IDISS_TIME_75 },
+	{ DRV260X_CTRL3, DRV260X_NG_THRESH_2 | DRV260X_ERM_OPEN_LOOP },
+	{ DRV260X_CTRL4, DRV260X_AUTOCAL_TIME_500MS },
+};
+
+static int drv260x_init(struct drv260x_data *haptics)
+{
+	int error;
+	unsigned int cal_buf;
+
+	error = regmap_write(haptics->regmap,
+			     DRV260X_RATED_VOLT, haptics->rated_voltage);
+	if (error) {
+		dev_err(&haptics->client->dev,
+			"Failed to write DRV260X_RATED_VOLT register: %d\n",
+			error);
+		return error;
+	}
+
+	error = regmap_write(haptics->regmap,
+			     DRV260X_OD_CLAMP_VOLT, haptics->overdrive_voltage);
+	if (error) {
+		dev_err(&haptics->client->dev,
+			"Failed to write DRV260X_OD_CLAMP_VOLT register: %d\n",
+			error);
+		return error;
+	}
+
+	switch (haptics->mode) {
+	case DRV260X_LRA_MODE:
+		error = regmap_register_patch(haptics->regmap,
+					      drv260x_lra_cal_regs,
+					      ARRAY_SIZE(drv260x_lra_cal_regs));
+		if (error) {
+			dev_err(&haptics->client->dev,
+				"Failed to write LRA calibration registers: %d\n",
+				error);
+			return error;
+		}
+
+		break;
+
+	case DRV260X_ERM_MODE:
+		error = regmap_register_patch(haptics->regmap,
+					      drv260x_erm_cal_regs,
+					      ARRAY_SIZE(drv260x_erm_cal_regs));
+		if (error) {
+			dev_err(&haptics->client->dev,
+				"Failed to write ERM calibration registers: %d\n",
+				error);
+			return error;
+		}
+
+		error = regmap_update_bits(haptics->regmap, DRV260X_LIB_SEL,
+					   DRV260X_LIB_SEL_MASK,
+					   haptics->library);
+		if (error) {
+			dev_err(&haptics->client->dev,
+				"Failed to write DRV260X_LIB_SEL register: %d\n",
+				error);
+			return error;
+		}
+
+		break;
+
+	default:
+		error = regmap_register_patch(haptics->regmap,
+					      drv260x_lra_init_regs,
+					      ARRAY_SIZE(drv260x_lra_init_regs));
+		if (error) {
+			dev_err(&haptics->client->dev,
+				"Failed to write LRA init registers: %d\n",
+				error);
+			return error;
+		}
+
+		error = regmap_update_bits(haptics->regmap, DRV260X_LIB_SEL,
+					   DRV260X_LIB_SEL_MASK,
+					   haptics->library);
+		if (error) {
+			dev_err(&haptics->client->dev,
+				"Failed to write DRV260X_LIB_SEL register: %d\n",
+				error);
+			return error;
+		}
+
+		/* No need to set GO bit here */
+		return 0;
+	}
+
+	error = regmap_write(haptics->regmap, DRV260X_GO, DRV260X_GO_BIT);
+	if (error) {
+		dev_err(&haptics->client->dev,
+			"Failed to write GO register: %d\n",
+			error);
+		return error;
+	}
+
+	do {
+		error = regmap_read(haptics->regmap, DRV260X_GO, &cal_buf);
+		if (error) {
+			dev_err(&haptics->client->dev,
+				"Failed to read GO register: %d\n",
+				error);
+			return error;
+		}
+	} while (cal_buf == DRV260X_GO_BIT);
+
+	return 0;
+}
+
+static const struct regmap_config drv260x_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+
+	.max_register = DRV260X_MAX_REG,
+	.reg_defaults = drv260x_reg_defs,
+	.num_reg_defaults = ARRAY_SIZE(drv260x_reg_defs),
+	.cache_type = REGCACHE_NONE,
+};
+
+#ifdef CONFIG_OF
+static int drv260x_parse_dt(struct device *dev,
+			    struct drv260x_data *haptics)
+{
+	struct device_node *np = dev->of_node;
+	unsigned int voltage;
+	int error;
+
+	error = of_property_read_u32(np, "mode", &haptics->mode);
+	if (error) {
+		dev_err(dev, "%s: No entry for mode\n", __func__);
+		return error;
+	}
+
+	error = of_property_read_u32(np, "library-sel", &haptics->library);
+	if (error) {
+		dev_err(dev, "%s: No entry for library selection\n",
+			__func__);
+		return error;
+	}
+
+	error = of_property_read_u32(np, "vib-rated-mv", &voltage);
+	if (!error)
+		haptics->rated_voltage = drv260x_calculate_voltage(voltage);
+
+
+	error = of_property_read_u32(np, "vib-overdrive-mv", &voltage);
+	if (!error)
+		haptics->overdrive_voltage = drv260x_calculate_voltage(voltage);
+
+	return 0;
+}
+#else
+static inline int drv260x_parse_dt(struct device *dev,
+				   struct drv260x_data *haptics)
+{
+	dev_err(dev, "no platform data defined\n");
+
+	return -EINVAL;
+}
+#endif
+
+static int drv260x_probe(struct i2c_client *client,
+			 const struct i2c_device_id *id)
+{
+	const struct drv260x_platform_data *pdata = dev_get_platdata(&client->dev);
+	struct drv260x_data *haptics;
+	int error;
+
+	haptics = devm_kzalloc(&client->dev, sizeof(*haptics), GFP_KERNEL);
+	if (!haptics)
+		return -ENOMEM;
+
+	haptics->rated_voltage = DRV260X_DEF_OD_CLAMP_VOLT;
+	haptics->rated_voltage = DRV260X_DEF_RATED_VOLT;
+
+	if (pdata) {
+		haptics->mode = pdata->mode;
+		haptics->library = pdata->library_selection;
+		if (pdata->vib_overdrive_voltage)
+			haptics->overdrive_voltage = drv260x_calculate_voltage(pdata->vib_overdrive_voltage);
+		if (pdata->vib_rated_voltage)
+			haptics->rated_voltage = drv260x_calculate_voltage(pdata->vib_rated_voltage);
+	} else if (client->dev.of_node) {
+		error = drv260x_parse_dt(&client->dev, haptics);
+		if (error)
+			return error;
+	} else {
+		dev_err(&client->dev, "Platform data not set\n");
+		return -ENODEV;
+	}
+
+
+	if (haptics->mode < DRV260X_LRA_MODE ||
+	    haptics->mode > DRV260X_ERM_MODE) {
+		dev_err(&client->dev,
+			"Vibrator mode is invalid: %i\n",
+			haptics->mode);
+		return -EINVAL;
+	}
+
+	if (haptics->library < DRV260X_LIB_EMPTY ||
+	    haptics->library > DRV260X_ERM_LIB_F) {
+		dev_err(&client->dev,
+			"Library value is invalid: %i\n", haptics->library);
+		return -EINVAL;
+	}
+
+	if (haptics->mode == DRV260X_LRA_MODE &&
+	    haptics->library != DRV260X_LIB_EMPTY &&
+	    haptics->library != DRV260X_LIB_LRA) {
+		dev_err(&client->dev,
+			"LRA Mode with ERM Library mismatch\n");
+		return -EINVAL;
+	}
+
+	haptics->regulator = devm_regulator_get(&client->dev, "vbat");
+	if (IS_ERR(haptics->regulator)) {
+		error = PTR_ERR(haptics->regulator);
+		dev_err(&client->dev,
+			"unable to get regulator, error: %d\n", error);
+		return error;
+	}
+
+	haptics->enable_gpio = devm_gpiod_get(&client->dev, "enable");
+	if (IS_ERR(haptics->enable_gpio)) {
+		error = PTR_ERR(haptics->enable_gpio);
+		if (error != -ENOENT && error != -ENOSYS)
+			return error;
+		haptics->enable_gpio = NULL;
+	} else {
+		gpiod_direction_output(haptics->enable_gpio, 1);
+	}
+
+	haptics->input_dev = devm_input_allocate_device(&client->dev);
+	if (!haptics->input_dev) {
+		dev_err(&client->dev, "Failed to allocate input device\n");
+		return -ENOMEM;
+	}
+
+	haptics->input_dev->name = "drv260x:haptics";
+	haptics->input_dev->dev.parent = client->dev.parent;
+	haptics->input_dev->close = drv260x_close;
+	input_set_drvdata(haptics->input_dev, haptics);
+	input_set_capability(haptics->input_dev, EV_FF, FF_RUMBLE);
+
+	error = input_ff_create_memless(haptics->input_dev, NULL,
+					drv260x_haptics_play);
+	if (error) {
+		dev_err(&client->dev, "input_ff_create() failed: %d\n",
+			error);
+		return error;
+	}
+
+	INIT_WORK(&haptics->work, drv260x_worker);
+
+	haptics->client = client;
+	i2c_set_clientdata(client, haptics);
+
+	haptics->regmap = devm_regmap_init_i2c(client, &drv260x_regmap_config);
+	if (IS_ERR(haptics->regmap)) {
+		error = PTR_ERR(haptics->regmap);
+		dev_err(&client->dev, "Failed to allocate register map: %d\n",
+			error);
+		return error;
+	}
+
+	error = drv260x_init(haptics);
+	if (error) {
+		dev_err(&client->dev, "Device init failed: %d\n", error);
+		return error;
+	}
+
+	error = input_register_device(haptics->input_dev);
+	if (error) {
+		dev_err(&client->dev, "couldn't register input device: %d\n",
+			error);
+		return error;
+	}
+
+	return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int drv260x_suspend(struct device *dev)
+{
+	struct drv260x_data *haptics = dev_get_drvdata(dev);
+	int ret = 0;
+
+	mutex_lock(&haptics->input_dev->mutex);
+
+	if (haptics->input_dev->users) {
+		ret = regmap_update_bits(haptics->regmap,
+					 DRV260X_MODE,
+					 DRV260X_STANDBY_MASK,
+					 DRV260X_STANDBY);
+		if (ret) {
+			dev_err(dev, "Failed to set standby mode\n");
+			goto out;
+		}
+
+		gpiod_set_value(haptics->enable_gpio, 0);
+
+		ret = regulator_disable(haptics->regulator);
+		if (ret) {
+			dev_err(dev, "Failed to disable regulator\n");
+			regmap_update_bits(haptics->regmap,
+					   DRV260X_MODE,
+					   DRV260X_STANDBY_MASK, 0);
+		}
+	}
+out:
+	mutex_unlock(&haptics->input_dev->mutex);
+	return ret;
+}
+
+static int drv260x_resume(struct device *dev)
+{
+	struct drv260x_data *haptics = dev_get_drvdata(dev);
+	int ret = 0;
+
+	mutex_lock(&haptics->input_dev->mutex);
+
+	if (haptics->input_dev->users) {
+		ret = regulator_enable(haptics->regulator);
+		if (ret) {
+			dev_err(dev, "Failed to enable regulator\n");
+			goto out;
+		}
+
+		ret = regmap_update_bits(haptics->regmap,
+					 DRV260X_MODE,
+					 DRV260X_STANDBY_MASK, 0);
+		if (ret) {
+			dev_err(dev, "Failed to unset standby mode\n");
+			regulator_disable(haptics->regulator);
+			goto out;
+		}
+
+		gpiod_set_value(haptics->enable_gpio, 1);
+	}
+
+out:
+	mutex_unlock(&haptics->input_dev->mutex);
+	return ret;
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(drv260x_pm_ops, drv260x_suspend, drv260x_resume);
+
+static const struct i2c_device_id drv260x_id[] = {
+	{ "drv2605l", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, drv260x_id);
+
+#ifdef CONFIG_OF
+static const struct of_device_id drv260x_of_match[] = {
+	{ .compatible = "ti,drv2604", },
+	{ .compatible = "ti,drv2604l", },
+	{ .compatible = "ti,drv2605", },
+	{ .compatible = "ti,drv2605l", },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, drv260x_of_match);
+#endif
+
+static struct i2c_driver drv260x_driver = {
+	.probe		= drv260x_probe,
+	.driver		= {
+		.name	= "drv260x-haptics",
+		.owner	= THIS_MODULE,
+		.of_match_table = of_match_ptr(drv260x_of_match),
+		.pm	= &drv260x_pm_ops,
+	},
+	.id_table = drv260x_id,
+};
+module_i2c_driver(drv260x_driver);
+
+MODULE_ALIAS("platform:drv260x-haptics");
+MODULE_DESCRIPTION("TI DRV260x haptics driver");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Dan Murphy <dmurphy@ti.com>");
diff --git a/include/dt-bindings/input/ti-drv260x.h b/include/dt-bindings/input/ti-drv260x.h
new file mode 100644
index 000000000000..2626e6d9f707
--- /dev/null
+++ b/include/dt-bindings/input/ti-drv260x.h
@@ -0,0 +1,36 @@
+/*
+ * DRV260X haptics driver family
+ *
+ * Author: Dan Murphy <dmurphy@ti.com>
+ *
+ * Copyright:   (C) 2014 Texas Instruments, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#ifndef _DT_BINDINGS_TI_DRV260X_H
+#define _DT_BINDINGS_TI_DRV260X_H
+
+/* Calibration Types */
+#define DRV260X_LRA_MODE		0x00
+#define DRV260X_LRA_NO_CAL_MODE	0x01
+#define DRV260X_ERM_MODE		0x02
+
+/* Library Selection */
+#define DRV260X_LIB_EMPTY			0x00
+#define DRV260X_ERM_LIB_A			0x01
+#define DRV260X_ERM_LIB_B			0x02
+#define DRV260X_ERM_LIB_C			0x03
+#define DRV260X_ERM_LIB_D			0x04
+#define DRV260X_ERM_LIB_E			0x05
+#define DRV260X_LIB_LRA			0x06
+#define DRV260X_ERM_LIB_F			0x07
+
+#endif
diff --git a/include/linux/platform_data/drv260x-pdata.h b/include/linux/platform_data/drv260x-pdata.h
new file mode 100644
index 000000000000..0a03b0944411
--- /dev/null
+++ b/include/linux/platform_data/drv260x-pdata.h
@@ -0,0 +1,28 @@
+/*
+ * Platform data for DRV260X haptics driver family
+ *
+ * Author: Dan Murphy <dmurphy@ti.com>
+ *
+ * Copyright:   (C) 2014 Texas Instruments, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#ifndef _LINUX_DRV260X_PDATA_H
+#define _LINUX_DRV260X_PDATA_H
+
+struct drv260x_platform_data {
+	u32 library_selection;
+	u32 mode;
+	u32 vib_rated_voltage;
+	u32 vib_overdrive_voltage;
+};
+
+#endif
-- 
cgit v1.2.3


From 58b84f6a97f7f8811e0636836734809ff52cad43 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Tue, 19 Aug 2014 12:00:53 -0500
Subject: gpio: move GPIOD flags outside #ifdef

The GPIOD flags are defined inside the #ifdef CONFIG_GPIOLIB
switch, making the gpiolib stubs fail if these flags are used
by a consumer. This is not correct: the stubs should compile
fine without GPIOLIB.

Reported-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Alexandre Courbot <acourbot@nvidia.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/gpio/consumer.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h
index b7ce0c64c6f3..c7e17de732f3 100644
--- a/include/linux/gpio/consumer.h
+++ b/include/linux/gpio/consumer.h
@@ -16,8 +16,6 @@ struct device;
  */
 struct gpio_desc;
 
-#ifdef CONFIG_GPIOLIB
-
 #define GPIOD_FLAGS_BIT_DIR_SET		BIT(0)
 #define GPIOD_FLAGS_BIT_DIR_OUT		BIT(1)
 #define GPIOD_FLAGS_BIT_DIR_VAL		BIT(2)
@@ -34,6 +32,8 @@ enum gpiod_flags {
 			  GPIOD_FLAGS_BIT_DIR_VAL,
 };
 
+#ifdef CONFIG_GPIOLIB
+
 /* Acquire and dispose GPIOs */
 struct gpio_desc *__must_check __gpiod_get(struct device *dev,
 					 const char *con_id,
-- 
cgit v1.2.3


From df11e506d330d9a0e5a701cd2c5fcb7d461b6060 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Thu, 21 Aug 2014 10:11:34 +0800
Subject: regulator: core: Add back the const qualifier for ops of struct
 regulator_desc

Fix below build warning:
CC [M]  drivers/regulator/hi6421-regulator.o
drivers/regulator/hi6421-regulator.c:356:2: warning: initialization discards 'const' qualifier from pointer target type [enabled by default]

This is a revert of commit 716845ebeb50 ("regulator: core: Fix build error due
to const qualifier for ops"). The build error was fixed by commit 39f5460d7f9c
("regulator: core: add const to regulator_ops and fix build error in mc13892").

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 include/linux/regulator/driver.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index 3abda7554d82..efe058f8f746 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -246,7 +246,7 @@ struct regulator_desc {
 	int id;
 	bool continuous_voltage_range;
 	unsigned n_voltages;
-	struct regulator_ops *ops;
+	const struct regulator_ops *ops;
 	int irq;
 	enum regulator_type type;
 	struct module *owner;
-- 
cgit v1.2.3


From e790d9ef6405633b007339d746b709aed43a928d Mon Sep 17 00:00:00 2001
From: Radim Krčmář <rkrcmar@redhat.com>
Date: Thu, 21 Aug 2014 18:08:05 +0200
Subject: KVM: add kvm_arch_sched_in
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Introduce preempt notifiers for architecture specific code.
Advantage over creating a new notifier in every arch is slightly simpler
code and guaranteed call order with respect to kvm_sched_in.

Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/arm/kvm/arm.c         | 4 ++++
 arch/mips/kvm/mips.c       | 4 ++++
 arch/powerpc/kvm/powerpc.c | 4 ++++
 arch/s390/kvm/kvm-s390.c   | 4 ++++
 arch/x86/kvm/x86.c         | 4 ++++
 include/linux/kvm_host.h   | 2 ++
 virt/kvm/kvm_main.c        | 2 ++
 7 files changed, 24 insertions(+)

(limited to 'include/linux')

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index a99e0cdf8ba2..9f788ebac55b 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -288,6 +288,10 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 {
 }
 
+void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
+{
+}
+
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	vcpu->cpu = cpu;
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index cd7114147ae7..2362df2a79f9 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -1002,6 +1002,10 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 {
 }
 
+void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
+{
+}
+
 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
 				  struct kvm_translation *tr)
 {
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 4c79284b58be..cbc432f4f0a6 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -720,6 +720,10 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 	kvmppc_subarch_vcpu_uninit(vcpu);
 }
 
+void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
+{
+}
+
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 #ifdef CONFIG_BOOKE
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index ce81eb2ab76a..a3c324ec4370 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -555,6 +555,10 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 	/* Nothing todo */
 }
 
+void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
+{
+}
+
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	save_fp_ctl(&vcpu->arch.host_fpregs.fpc);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index cd718c01cdf1..7d43dc7bb906 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7171,6 +7171,10 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 		static_key_slow_dec(&kvm_no_apic_vcpu);
 }
 
+void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
+{
+}
+
 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 {
 	if (type)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index a4c33b34fe3f..ebd723676633 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -624,6 +624,8 @@ void kvm_arch_exit(void);
 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu);
 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu);
 
+void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu);
+
 void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu);
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 39b16035386f..5a0817ee996e 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -3124,6 +3124,8 @@ static void kvm_sched_in(struct preempt_notifier *pn, int cpu)
 	if (vcpu->preempted)
 		vcpu->preempted = false;
 
+	kvm_arch_sched_in(vcpu, cpu);
+
 	kvm_arch_vcpu_load(vcpu, cpu);
 }
 
-- 
cgit v1.2.3


From c200b1aa6cb460ce8c3ecf6fdc690d3949c3cc5d Mon Sep 17 00:00:00 2001
From: Chao Yu <chao2.yu@samsung.com>
Date: Wed, 20 Aug 2014 18:36:46 +0800
Subject: f2fs: fix incorrect calculation with total/free inode num

Theoretically, our total inodes number is the same as total node number, but
there are three node ids are reserved in f2fs, they are 0, 1 (node nid), and 2
(meta nid), and they should never be used by user, so our total/free inode
number calculated in ->statfs is wrong.

This patch indroduces F2FS_RESERVED_NODE_NUM and then fixes this issue by
recalculating total/free inode number with the macro.

Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/node.c          | 2 +-
 fs/f2fs/super.c         | 4 ++--
 include/linux/f2fs_fs.h | 3 +++
 3 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index b4d964029fc7..044395c20ee9 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1957,7 +1957,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
 	nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks;
 
 	/* not used nids: 0, node, meta, (and root counted as valid node) */
-	nm_i->available_nids = nm_i->max_nid - 3;
+	nm_i->available_nids = nm_i->max_nid - F2FS_RESERVED_NODE_NUM;
 	nm_i->fcnt = 0;
 	nm_i->nat_cnt = 0;
 	nm_i->ram_thresh = DEF_RAM_THRESHOLD;
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index ddb1e9d36363..0f61f5aa587c 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -508,8 +508,8 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	buf->f_bfree = buf->f_blocks - valid_user_blocks(sbi) - ovp_count;
 	buf->f_bavail = user_block_count - valid_user_blocks(sbi);
 
-	buf->f_files = sbi->total_node_count;
-	buf->f_ffree = sbi->total_node_count - valid_inode_count(sbi);
+	buf->f_files = sbi->total_node_count - F2FS_RESERVED_NODE_NUM;
+	buf->f_ffree = buf->f_files - valid_inode_count(sbi);
 
 	buf->f_namelen = F2FS_NAME_LEN;
 	buf->f_fsid.val[0] = (u32)id;
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index 6ff0b0b42d47..0ed77f32c9ac 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -24,6 +24,9 @@
 #define NULL_ADDR		((block_t)0)	/* used as block_t addresses */
 #define NEW_ADDR		((block_t)-1)	/* used as block_t addresses */
 
+/* 0, 1(node nid), 2(meta nid) are reserved node id */
+#define F2FS_RESERVED_NODE_NUM		3
+
 #define F2FS_ROOT_INO(sbi)	(sbi->root_ino_num)
 #define F2FS_NODE_INO(sbi)	(sbi->node_ino_num)
 #define F2FS_META_INO(sbi)	(sbi->meta_ino_num)
-- 
cgit v1.2.3


From 8913dc0bb913ac3dc83ed5c10bac2f4e55431981 Mon Sep 17 00:00:00 2001
From: Paul Zimmerman <Paul.Zimmerman@synopsys.com>
Date: Thu, 21 Aug 2014 20:28:20 +0000
Subject: usb: gadget: document a usb_ep_dequeue() requirement

Document the requirement that the request be dequeued and its
completion routine called before usb_ep_dequeue() returns. Also
fix some capitalization issues in the existing text.

Signed-off-by: Paul Zimmerman <paulz@synopsys.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 include/linux/usb/gadget.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h
index c3a61853cd13..c540557b564b 100644
--- a/include/linux/usb/gadget.h
+++ b/include/linux/usb/gadget.h
@@ -345,12 +345,13 @@ static inline int usb_ep_queue(struct usb_ep *ep,
  * @ep:the endpoint associated with the request
  * @req:the request being canceled
  *
- * if the request is still active on the endpoint, it is dequeued and its
+ * If the request is still active on the endpoint, it is dequeued and its
  * completion routine is called (with status -ECONNRESET); else a negative
- * error code is returned.
+ * error code is returned. This is guaranteed to happen before the call to
+ * usb_ep_dequeue() returns.
  *
- * note that some hardware can't clear out write fifos (to unlink the request
- * at the head of the queue) except as part of disconnecting from usb.  such
+ * Note that some hardware can't clear out write fifos (to unlink the request
+ * at the head of the queue) except as part of disconnecting from usb. Such
  * restrictions prevent drivers from supporting configuration changes,
  * even to configuration zero (a "chapter 9" requirement).
  */
-- 
cgit v1.2.3


From 33b7f99cf003ca6c1d31c42b50e1100ad71aaec0 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Fri, 15 Aug 2014 17:23:02 -0400
Subject: ftrace: Allow ftrace_ops to use the hashes from other ops

Currently the top level debug file system function tracer shares its
ftrace_ops with the function graph tracer. This was thought to be fine
because the tracers are not used together, as one can only enable
function or function_graph tracer in the current_tracer file.

But that assumption proved to be incorrect. The function profiler
can use the function graph tracer when function tracing is enabled.
Since all function graph users uses the function tracing ftrace_ops
this causes a conflict and when a user enables both function profiling
as well as the function tracer it will crash ftrace and disable it.

The quick solution so far is to move them as separate ftrace_ops like
it was earlier. The problem though is to synchronize the functions that
are traced because both function and function_graph tracer are limited
by the selections made in the set_ftrace_filter and set_ftrace_notrace
files.

To handle this, a new structure is made called ftrace_ops_hash. This
structure will now hold the filter_hash and notrace_hash, and the
ftrace_ops will point to this structure. That will allow two ftrace_ops
to share the same hashes.

Since most ftrace_ops do not share the hashes, and to keep allocation
simple, the ftrace_ops structure will include both a pointer to the
ftrace_ops_hash called func_hash, as well as the structure itself,
called local_hash. When the ops are registered, the func_hash pointer
will be initialized to point to the local_hash within the ftrace_ops
structure. Some of the ftrace internal ftrace_ops will be initialized
statically. This will allow for the function and function_graph tracer
to have separate ops but still share the same hash tables that determine
what functions they trace.

Cc: stable@vger.kernel.org # 3.16 (apply after 3.17-rc4 is out)
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h |  14 +++++--
 kernel/trace/ftrace.c  | 100 +++++++++++++++++++++++++------------------------
 2 files changed, 63 insertions(+), 51 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 6bb5e3f2a3b4..f0b0edbf55a9 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -102,6 +102,15 @@ enum {
 	FTRACE_OPS_FL_DELETED			= 1 << 8,
 };
 
+#ifdef CONFIG_DYNAMIC_FTRACE
+/* The hash used to know what functions callbacks trace */
+struct ftrace_ops_hash {
+	struct ftrace_hash		*notrace_hash;
+	struct ftrace_hash		*filter_hash;
+	struct mutex			regex_lock;
+};
+#endif
+
 /*
  * Note, ftrace_ops can be referenced outside of RCU protection.
  * (Although, for perf, the control ops prevent that). If ftrace_ops is
@@ -121,10 +130,9 @@ struct ftrace_ops {
 	int __percpu			*disabled;
 #ifdef CONFIG_DYNAMIC_FTRACE
 	int				nr_trampolines;
-	struct ftrace_hash		*notrace_hash;
-	struct ftrace_hash		*filter_hash;
+	struct ftrace_ops_hash		local_hash;
+	struct ftrace_ops_hash		*func_hash;
 	struct ftrace_hash		*tramp_hash;
-	struct mutex			regex_lock;
 	unsigned long			trampoline;
 #endif
 };
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 1654b12c891a..c92757adba79 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -65,15 +65,17 @@
 #define FL_GLOBAL_CONTROL_MASK (FTRACE_OPS_FL_CONTROL)
 
 #ifdef CONFIG_DYNAMIC_FTRACE
-#define INIT_REGEX_LOCK(opsname)	\
-	.regex_lock	= __MUTEX_INITIALIZER(opsname.regex_lock),
+#define INIT_OPS_HASH(opsname)	\
+	.func_hash		= &opsname.local_hash,			\
+	.local_hash.regex_lock	= __MUTEX_INITIALIZER(opsname.local_hash.regex_lock),
 #else
-#define INIT_REGEX_LOCK(opsname)
+#define INIT_OPS_HASH(opsname)
 #endif
 
 static struct ftrace_ops ftrace_list_end __read_mostly = {
 	.func		= ftrace_stub,
 	.flags		= FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_STUB,
+	INIT_OPS_HASH(ftrace_list_end)
 };
 
 /* ftrace_enabled is a method to turn ftrace on or off */
@@ -140,7 +142,8 @@ static inline void ftrace_ops_init(struct ftrace_ops *ops)
 {
 #ifdef CONFIG_DYNAMIC_FTRACE
 	if (!(ops->flags & FTRACE_OPS_FL_INITIALIZED)) {
-		mutex_init(&ops->regex_lock);
+		mutex_init(&ops->local_hash.regex_lock);
+		ops->func_hash = &ops->local_hash;
 		ops->flags |= FTRACE_OPS_FL_INITIALIZED;
 	}
 #endif
@@ -899,7 +902,7 @@ static void unregister_ftrace_profiler(void)
 static struct ftrace_ops ftrace_profile_ops __read_mostly = {
 	.func		= function_profile_call,
 	.flags		= FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_INITIALIZED,
-	INIT_REGEX_LOCK(ftrace_profile_ops)
+	INIT_OPS_HASH(ftrace_profile_ops)
 };
 
 static int register_ftrace_profiler(void)
@@ -1081,11 +1084,12 @@ static const struct ftrace_hash empty_hash = {
 #define EMPTY_HASH	((struct ftrace_hash *)&empty_hash)
 
 static struct ftrace_ops global_ops = {
-	.func			= ftrace_stub,
-	.notrace_hash		= EMPTY_HASH,
-	.filter_hash		= EMPTY_HASH,
-	.flags			= FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_INITIALIZED,
-	INIT_REGEX_LOCK(global_ops)
+	.func				= ftrace_stub,
+	.local_hash.notrace_hash	= EMPTY_HASH,
+	.local_hash.filter_hash		= EMPTY_HASH,
+	INIT_OPS_HASH(global_ops)
+	.flags				= FTRACE_OPS_FL_RECURSION_SAFE |
+					  FTRACE_OPS_FL_INITIALIZED,
 };
 
 struct ftrace_page {
@@ -1226,8 +1230,8 @@ static void free_ftrace_hash_rcu(struct ftrace_hash *hash)
 void ftrace_free_filter(struct ftrace_ops *ops)
 {
 	ftrace_ops_init(ops);
-	free_ftrace_hash(ops->filter_hash);
-	free_ftrace_hash(ops->notrace_hash);
+	free_ftrace_hash(ops->func_hash->filter_hash);
+	free_ftrace_hash(ops->func_hash->notrace_hash);
 }
 
 static struct ftrace_hash *alloc_ftrace_hash(int size_bits)
@@ -1382,8 +1386,8 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs)
 		return 0;
 #endif
 
-	filter_hash = rcu_dereference_raw_notrace(ops->filter_hash);
-	notrace_hash = rcu_dereference_raw_notrace(ops->notrace_hash);
+	filter_hash = rcu_dereference_raw_notrace(ops->func_hash->filter_hash);
+	notrace_hash = rcu_dereference_raw_notrace(ops->func_hash->notrace_hash);
 
 	if ((ftrace_hash_empty(filter_hash) ||
 	     ftrace_lookup_ip(filter_hash, ip)) &&
@@ -1554,14 +1558,14 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
 	 *   gets inversed.
 	 */
 	if (filter_hash) {
-		hash = ops->filter_hash;
-		other_hash = ops->notrace_hash;
+		hash = ops->func_hash->filter_hash;
+		other_hash = ops->func_hash->notrace_hash;
 		if (ftrace_hash_empty(hash))
 			all = 1;
 	} else {
 		inc = !inc;
-		hash = ops->notrace_hash;
-		other_hash = ops->filter_hash;
+		hash = ops->func_hash->notrace_hash;
+		other_hash = ops->func_hash->filter_hash;
 		/*
 		 * If the notrace hash has no items,
 		 * then there's nothing to do.
@@ -2436,8 +2440,8 @@ static inline int ops_traces_mod(struct ftrace_ops *ops)
 	 * Filter_hash being empty will default to trace module.
 	 * But notrace hash requires a test of individual module functions.
 	 */
-	return ftrace_hash_empty(ops->filter_hash) &&
-		ftrace_hash_empty(ops->notrace_hash);
+	return ftrace_hash_empty(ops->func_hash->filter_hash) &&
+		ftrace_hash_empty(ops->func_hash->notrace_hash);
 }
 
 /*
@@ -2459,12 +2463,12 @@ ops_references_rec(struct ftrace_ops *ops, struct dyn_ftrace *rec)
 		return 0;
 
 	/* The function must be in the filter */
-	if (!ftrace_hash_empty(ops->filter_hash) &&
-	    !ftrace_lookup_ip(ops->filter_hash, rec->ip))
+	if (!ftrace_hash_empty(ops->func_hash->filter_hash) &&
+	    !ftrace_lookup_ip(ops->func_hash->filter_hash, rec->ip))
 		return 0;
 
 	/* If in notrace hash, we ignore it too */
-	if (ftrace_lookup_ip(ops->notrace_hash, rec->ip))
+	if (ftrace_lookup_ip(ops->func_hash->notrace_hash, rec->ip))
 		return 0;
 
 	return 1;
@@ -2785,10 +2789,10 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
 	} else {
 		rec = &iter->pg->records[iter->idx++];
 		if (((iter->flags & FTRACE_ITER_FILTER) &&
-		     !(ftrace_lookup_ip(ops->filter_hash, rec->ip))) ||
+		     !(ftrace_lookup_ip(ops->func_hash->filter_hash, rec->ip))) ||
 
 		    ((iter->flags & FTRACE_ITER_NOTRACE) &&
-		     !ftrace_lookup_ip(ops->notrace_hash, rec->ip)) ||
+		     !ftrace_lookup_ip(ops->func_hash->notrace_hash, rec->ip)) ||
 
 		    ((iter->flags & FTRACE_ITER_ENABLED) &&
 		     !(rec->flags & FTRACE_FL_ENABLED))) {
@@ -2837,9 +2841,9 @@ static void *t_start(struct seq_file *m, loff_t *pos)
 	 * functions are enabled.
 	 */
 	if ((iter->flags & FTRACE_ITER_FILTER &&
-	     ftrace_hash_empty(ops->filter_hash)) ||
+	     ftrace_hash_empty(ops->func_hash->filter_hash)) ||
 	    (iter->flags & FTRACE_ITER_NOTRACE &&
-	     ftrace_hash_empty(ops->notrace_hash))) {
+	     ftrace_hash_empty(ops->func_hash->notrace_hash))) {
 		if (*pos > 0)
 			return t_hash_start(m, pos);
 		iter->flags |= FTRACE_ITER_PRINTALL;
@@ -3001,12 +3005,12 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag,
 	iter->ops = ops;
 	iter->flags = flag;
 
-	mutex_lock(&ops->regex_lock);
+	mutex_lock(&ops->func_hash->regex_lock);
 
 	if (flag & FTRACE_ITER_NOTRACE)
-		hash = ops->notrace_hash;
+		hash = ops->func_hash->notrace_hash;
 	else
-		hash = ops->filter_hash;
+		hash = ops->func_hash->filter_hash;
 
 	if (file->f_mode & FMODE_WRITE) {
 		const int size_bits = FTRACE_HASH_DEFAULT_BITS;
@@ -3041,7 +3045,7 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag,
 		file->private_data = iter;
 
  out_unlock:
-	mutex_unlock(&ops->regex_lock);
+	mutex_unlock(&ops->func_hash->regex_lock);
 
 	return ret;
 }
@@ -3279,7 +3283,7 @@ static struct ftrace_ops trace_probe_ops __read_mostly =
 {
 	.func		= function_trace_probe_call,
 	.flags		= FTRACE_OPS_FL_INITIALIZED,
-	INIT_REGEX_LOCK(trace_probe_ops)
+	INIT_OPS_HASH(trace_probe_ops)
 };
 
 static int ftrace_probe_registered;
@@ -3342,7 +3346,7 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
 			      void *data)
 {
 	struct ftrace_func_probe *entry;
-	struct ftrace_hash **orig_hash = &trace_probe_ops.filter_hash;
+	struct ftrace_hash **orig_hash = &trace_probe_ops.func_hash->filter_hash;
 	struct ftrace_hash *hash;
 	struct ftrace_page *pg;
 	struct dyn_ftrace *rec;
@@ -3359,7 +3363,7 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
 	if (WARN_ON(not))
 		return -EINVAL;
 
-	mutex_lock(&trace_probe_ops.regex_lock);
+	mutex_lock(&trace_probe_ops.func_hash->regex_lock);
 
 	hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, *orig_hash);
 	if (!hash) {
@@ -3428,7 +3432,7 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
  out_unlock:
 	mutex_unlock(&ftrace_lock);
  out:
-	mutex_unlock(&trace_probe_ops.regex_lock);
+	mutex_unlock(&trace_probe_ops.func_hash->regex_lock);
 	free_ftrace_hash(hash);
 
 	return count;
@@ -3446,7 +3450,7 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
 	struct ftrace_func_entry *rec_entry;
 	struct ftrace_func_probe *entry;
 	struct ftrace_func_probe *p;
-	struct ftrace_hash **orig_hash = &trace_probe_ops.filter_hash;
+	struct ftrace_hash **orig_hash = &trace_probe_ops.func_hash->filter_hash;
 	struct list_head free_list;
 	struct ftrace_hash *hash;
 	struct hlist_node *tmp;
@@ -3468,7 +3472,7 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
 			return;
 	}
 
-	mutex_lock(&trace_probe_ops.regex_lock);
+	mutex_lock(&trace_probe_ops.func_hash->regex_lock);
 
 	hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, *orig_hash);
 	if (!hash)
@@ -3521,7 +3525,7 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
 	mutex_unlock(&ftrace_lock);
 		
  out_unlock:
-	mutex_unlock(&trace_probe_ops.regex_lock);
+	mutex_unlock(&trace_probe_ops.func_hash->regex_lock);
 	free_ftrace_hash(hash);
 }
 
@@ -3717,12 +3721,12 @@ ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len,
 	if (unlikely(ftrace_disabled))
 		return -ENODEV;
 
-	mutex_lock(&ops->regex_lock);
+	mutex_lock(&ops->func_hash->regex_lock);
 
 	if (enable)
-		orig_hash = &ops->filter_hash;
+		orig_hash = &ops->func_hash->filter_hash;
 	else
-		orig_hash = &ops->notrace_hash;
+		orig_hash = &ops->func_hash->notrace_hash;
 
 	if (reset)
 		hash = alloc_ftrace_hash(FTRACE_HASH_DEFAULT_BITS);
@@ -3752,7 +3756,7 @@ ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len,
 	mutex_unlock(&ftrace_lock);
 
  out_regex_unlock:
-	mutex_unlock(&ops->regex_lock);
+	mutex_unlock(&ops->func_hash->regex_lock);
 
 	free_ftrace_hash(hash);
 	return ret;
@@ -3975,15 +3979,15 @@ int ftrace_regex_release(struct inode *inode, struct file *file)
 
 	trace_parser_put(parser);
 
-	mutex_lock(&iter->ops->regex_lock);
+	mutex_lock(&iter->ops->func_hash->regex_lock);
 
 	if (file->f_mode & FMODE_WRITE) {
 		filter_hash = !!(iter->flags & FTRACE_ITER_FILTER);
 
 		if (filter_hash)
-			orig_hash = &iter->ops->filter_hash;
+			orig_hash = &iter->ops->func_hash->filter_hash;
 		else
-			orig_hash = &iter->ops->notrace_hash;
+			orig_hash = &iter->ops->func_hash->notrace_hash;
 
 		mutex_lock(&ftrace_lock);
 		ret = ftrace_hash_move(iter->ops, filter_hash,
@@ -3994,7 +3998,7 @@ int ftrace_regex_release(struct inode *inode, struct file *file)
 		mutex_unlock(&ftrace_lock);
 	}
 
-	mutex_unlock(&iter->ops->regex_lock);
+	mutex_unlock(&iter->ops->func_hash->regex_lock);
 	free_ftrace_hash(iter->hash);
 	kfree(iter);
 
@@ -4611,7 +4615,7 @@ void __init ftrace_init(void)
 static struct ftrace_ops global_ops = {
 	.func			= ftrace_stub,
 	.flags			= FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_INITIALIZED,
-	INIT_REGEX_LOCK(global_ops)
+	INIT_OPS_HASH(global_ops)
 };
 
 static int __init ftrace_nodyn_init(void)
@@ -4713,7 +4717,7 @@ ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip,
 static struct ftrace_ops control_ops = {
 	.func	= ftrace_ops_control_func,
 	.flags	= FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_INITIALIZED,
-	INIT_REGEX_LOCK(control_ops)
+	INIT_OPS_HASH(control_ops)
 };
 
 static inline void
-- 
cgit v1.2.3


From d4261e5650004d6d51137553ea5433d5828562dc Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Tue, 19 Aug 2014 16:02:12 +0200
Subject: bonding: create netlink event when bonding option is changed

Userspace needs to be notified if one changes some option.

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Acked-by: Veaceslav Falico <vfalico@gmail.com>
Acked-by: Andy Gospodarek <gospo@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_options.c | 2 ++
 include/linux/netdevice.h          | 1 +
 2 files changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
index dc73463c2c23..d8dc17faa6b4 100644
--- a/drivers/net/bonding/bond_options.c
+++ b/drivers/net/bonding/bond_options.c
@@ -625,6 +625,8 @@ int __bond_opt_set(struct bonding *bond,
 out:
 	if (ret)
 		bond_opt_error_interpret(bond, opt, ret, val);
+	else
+		call_netdevice_notifiers(NETDEV_CHANGEINFODATA, bond->dev);
 
 	return ret;
 }
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 38377392d082..7e2b0b8b5cd7 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1982,6 +1982,7 @@ struct pcpu_sw_netstats {
 #define NETDEV_CHANGEUPPER	0x0015
 #define NETDEV_RESEND_IGMP	0x0016
 #define NETDEV_PRECHANGEMTU	0x0017 /* notify before mtu change happened */
+#define NETDEV_CHANGEINFODATA	0x0018
 
 int register_netdevice_notifier(struct notifier_block *nb);
 int unregister_netdevice_notifier(struct notifier_block *nb);
-- 
cgit v1.2.3


From b5b822050ca3c4fc1f475100cc197cc00ba2d492 Mon Sep 17 00:00:00 2001
From: Chao Yu <chao2.yu@samsung.com>
Date: Fri, 22 Aug 2014 16:17:38 +0800
Subject: f2fs: use macro for code readability

This patch introduces DEF_NIDS_PER_INODE/GET_ORPHAN_BLOCKS/F2FS_CP_PACKS macro
instead of numbers in code for readability.

change log from v1:
 o fix typo pointed out by Jaegeuk Kim.

Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/checkpoint.c    | 21 ++++++++++-----------
 include/linux/f2fs_fs.h | 13 ++++++++++---
 2 files changed, 20 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index c9c08d52ecfd..ec3b7a5381fa 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -443,8 +443,8 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
 	struct f2fs_orphan_block *orphan_blk = NULL;
 	unsigned int nentries = 0;
 	unsigned short index;
-	unsigned short orphan_blocks = (unsigned short)((sbi->n_orphans +
-		(F2FS_ORPHANS_PER_BLOCK - 1)) / F2FS_ORPHANS_PER_BLOCK);
+	unsigned short orphan_blocks =
+			(unsigned short)GET_ORPHAN_BLOCKS(sbi->n_orphans);
 	struct page *page = NULL;
 	struct ino_entry *orphan = NULL;
 
@@ -837,7 +837,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
 	ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi));
 	ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi));
 	ckpt->free_segment_count = cpu_to_le32(free_segments(sbi));
-	for (i = 0; i < 3; i++) {
+	for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) {
 		ckpt->cur_node_segno[i] =
 			cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_NODE));
 		ckpt->cur_node_blkoff[i] =
@@ -845,7 +845,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
 		ckpt->alloc_type[i + CURSEG_HOT_NODE] =
 				curseg_alloc_type(sbi, i + CURSEG_HOT_NODE);
 	}
-	for (i = 0; i < 3; i++) {
+	for (i = 0; i < NR_CURSEG_DATA_TYPE; i++) {
 		ckpt->cur_data_segno[i] =
 			cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_DATA));
 		ckpt->cur_data_blkoff[i] =
@@ -860,24 +860,23 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
 
 	/* 2 cp  + n data seg summary + orphan inode blocks */
 	data_sum_blocks = npages_for_summary_flush(sbi);
-	if (data_sum_blocks < 3)
+	if (data_sum_blocks < NR_CURSEG_DATA_TYPE)
 		set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
 	else
 		clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
 
-	orphan_blocks = (sbi->n_orphans + F2FS_ORPHANS_PER_BLOCK - 1)
-					/ F2FS_ORPHANS_PER_BLOCK;
+	orphan_blocks = GET_ORPHAN_BLOCKS(sbi->n_orphans);
 	ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks +
 			orphan_blocks);
 
 	if (is_umount) {
 		set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
-		ckpt->cp_pack_total_block_count = cpu_to_le32(2 +
+		ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS+
 				cp_payload_blks + data_sum_blocks +
 				orphan_blocks + NR_CURSEG_NODE_TYPE);
 	} else {
 		clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
-		ckpt->cp_pack_total_block_count = cpu_to_le32(2 +
+		ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS +
 				cp_payload_blks + data_sum_blocks +
 				orphan_blocks);
 	}
@@ -1022,8 +1021,8 @@ void init_ino_entry_info(struct f2fs_sb_info *sbi)
 	 * for cp pack we can have max 1020*504 orphan entries
 	 */
 	sbi->n_orphans = 0;
-	sbi->max_orphans = (sbi->blocks_per_seg - 2 - NR_CURSEG_TYPE)
-				* F2FS_ORPHANS_PER_BLOCK;
+	sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS -
+			NR_CURSEG_TYPE) * F2FS_ORPHANS_PER_BLOCK;
 }
 
 int __init create_checkpoint_caches(void)
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index 0ed77f32c9ac..08ed2b0a96e6 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -90,6 +90,8 @@ struct f2fs_super_block {
 #define CP_ORPHAN_PRESENT_FLAG	0x00000002
 #define CP_UMOUNT_FLAG		0x00000001
 
+#define F2FS_CP_PACKS		2	/* # of checkpoint packs */
+
 struct f2fs_checkpoint {
 	__le64 checkpoint_ver;		/* checkpoint block version number */
 	__le64 user_block_count;	/* # of user blocks */
@@ -126,6 +128,9 @@ struct f2fs_checkpoint {
  */
 #define F2FS_ORPHANS_PER_BLOCK	1020
 
+#define GET_ORPHAN_BLOCKS(n)	((n + F2FS_ORPHANS_PER_BLOCK - 1) / \
+					F2FS_ORPHANS_PER_BLOCK)
+
 struct f2fs_orphan_block {
 	__le32 ino[F2FS_ORPHANS_PER_BLOCK];	/* inode numbers */
 	__le32 reserved;	/* reserved */
@@ -147,6 +152,7 @@ struct f2fs_extent {
 #define F2FS_NAME_LEN		255
 #define F2FS_INLINE_XATTR_ADDRS	50	/* 200 bytes for inline xattrs */
 #define DEF_ADDRS_PER_INODE	923	/* Address Pointers in an Inode */
+#define DEF_NIDS_PER_INODE	5	/* Node IDs in an Inode */
 #define ADDRS_PER_INODE(fi)	addrs_per_inode(fi)
 #define ADDRS_PER_BLOCK		1018	/* Address Pointers in a Direct Block */
 #define NIDS_PER_BLOCK		1018	/* Node IDs in an Indirect Block */
@@ -166,8 +172,9 @@ struct f2fs_extent {
 #define MAX_INLINE_DATA		(sizeof(__le32) * (DEF_ADDRS_PER_INODE - \
 						F2FS_INLINE_XATTR_ADDRS - 1))
 
-#define INLINE_DATA_OFFSET	(PAGE_CACHE_SIZE - sizeof(struct node_footer) \
-			- sizeof(__le32) * (DEF_ADDRS_PER_INODE + 5 - 1))
+#define INLINE_DATA_OFFSET	(PAGE_CACHE_SIZE - sizeof(struct node_footer) -\
+				sizeof(__le32) * (DEF_ADDRS_PER_INODE + \
+				DEF_NIDS_PER_INODE - 1))
 
 struct f2fs_inode {
 	__le16 i_mode;			/* file mode */
@@ -197,7 +204,7 @@ struct f2fs_inode {
 
 	__le32 i_addr[DEF_ADDRS_PER_INODE];	/* Pointers to data blocks */
 
-	__le32 i_nid[5];		/* direct(2), indirect(2),
+	__le32 i_nid[DEF_NIDS_PER_INODE];	/* direct(2), indirect(2),
 						double_indirect(1) node id */
 } __packed;
 
-- 
cgit v1.2.3


From 7c3af975257383ece54b83c0505d3e0656cb7daf Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@primarydata.com>
Date: Fri, 8 Aug 2014 11:00:57 -0400
Subject: nfs: don't sleep with inode lock in lock_and_join_requests

This handles the 'nonblock=false' case in nfs_lock_and_join_requests.
If the group is already locked and blocking is allowed, drop the inode lock
and wait for the group lock to be cleared before trying it all again.
This should fix warnings found in peterz's tree (sched/wait branch), where
might_sleep() checks are added to wait.[ch].

Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Reviewed-by: Peng Tao <tao.peng@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/pagelist.c        | 17 +++++++++++++++++
 fs/nfs/write.c           | 12 +++++++++++-
 include/linux/nfs_page.h |  1 +
 3 files changed, 29 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 30c9626f96b0..4ec67f8d70aa 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -167,6 +167,23 @@ nfs_page_group_lock(struct nfs_page *req, bool nonblock)
 	return -EAGAIN;
 }
 
+/*
+ * nfs_page_group_lock_wait - wait for the lock to clear, but don't grab it
+ * @req - a request in the group
+ *
+ * This is a blocking call to wait for the group lock to be cleared.
+ */
+void
+nfs_page_group_lock_wait(struct nfs_page *req)
+{
+	struct nfs_page *head = req->wb_head;
+
+	WARN_ON_ONCE(head != head->wb_head);
+
+	wait_on_bit(&head->wb_flags, PG_HEADLOCK,
+		TASK_UNINTERRUPTIBLE);
+}
+
 /*
  * nfs_page_group_unlock - unlock the head of the page group
  * @req - request in group that is to be unlocked
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index e056f617adf2..175d5d073ccf 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -478,13 +478,23 @@ try_again:
 		return NULL;
 	}
 
-	/* lock each request in the page group */
+	/* holding inode lock, so always make a non-blocking call to try the
+	 * page group lock */
 	ret = nfs_page_group_lock(head, true);
 	if (ret < 0) {
 		spin_unlock(&inode->i_lock);
+
+		if (!nonblock && ret == -EAGAIN) {
+			nfs_page_group_lock_wait(head);
+			nfs_release_request(head);
+			goto try_again;
+		}
+
 		nfs_release_request(head);
 		return ERR_PTR(ret);
 	}
+
+	/* lock each request in the page group */
 	subreq = head;
 	do {
 		/*
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 6ad2bbcad405..6c3e06ee2fb7 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -123,6 +123,7 @@ extern  int nfs_wait_on_request(struct nfs_page *);
 extern	void nfs_unlock_request(struct nfs_page *req);
 extern	void nfs_unlock_and_release_request(struct nfs_page *);
 extern int nfs_page_group_lock(struct nfs_page *, bool);
+extern void nfs_page_group_lock_wait(struct nfs_page *);
 extern void nfs_page_group_unlock(struct nfs_page *);
 extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int);
 
-- 
cgit v1.2.3


From 989e04c5bc3ff77d65e1f0d87bf7904dfa30d41c Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Fri, 22 Aug 2014 14:15:22 -0700
Subject: tcp: improve undo on timeout

Upon timeout, undo (via both timestamps/Eifel and DSACKs) was
disabled if any retransmits were still in flight.  The concern was
perhaps that spurious retransmission sent in a previous recovery
episode may trigger DSACKs to falsely undo the current recovery.

However, this inadvertently misses undo opportunities (using either
TCP timestamps or DSACKs) when timeout occurs during a loss episode,
i.e.  recurring timeouts or timeout during fast recovery. In these
cases some retransmissions will be in flight but we should allow
undo. Furthermore, we should only reset undo_marker and undo_retrans
upon timeout if we are starting a new recovery episode. Finally,
when we do reset our undo state, we now do so in a manner similar
to tcp_enter_recovery(), so that we require a DSACK for each of
the outstsanding retransmissions. This will achieve the original
goal by requiring that we receive the same number of DSACKs as
retransmissions.

This patch increases the undo events by 50% on Google servers.

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h  |  2 +-
 net/ipv4/tcp_input.c | 26 +++++++++++---------------
 2 files changed, 12 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index fa5258f322e7..e567f0dbf282 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -276,7 +276,7 @@ struct tcp_sock {
 	u32	retrans_stamp;	/* Timestamp of the last retransmit,
 				 * also used in SYN-SENT to remember stamp of
 				 * the first SYN. */
-	u32	undo_marker;	/* tracking retrans started here. */
+	u32	undo_marker;	/* snd_una upon a new recovery episode. */
 	int	undo_retrans;	/* number of undoable retransmissions. */
 	u32	total_retrans;	/* Total retransmits for entire connection */
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index a906e0200ff2..aba4926ca095 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1888,21 +1888,21 @@ static inline void tcp_reset_reno_sack(struct tcp_sock *tp)
 	tp->sacked_out = 0;
 }
 
-static void tcp_clear_retrans_partial(struct tcp_sock *tp)
+void tcp_clear_retrans(struct tcp_sock *tp)
 {
 	tp->retrans_out = 0;
 	tp->lost_out = 0;
-
 	tp->undo_marker = 0;
 	tp->undo_retrans = -1;
+	tp->fackets_out = 0;
+	tp->sacked_out = 0;
 }
 
-void tcp_clear_retrans(struct tcp_sock *tp)
+static inline void tcp_init_undo(struct tcp_sock *tp)
 {
-	tcp_clear_retrans_partial(tp);
-
-	tp->fackets_out = 0;
-	tp->sacked_out = 0;
+	tp->undo_marker = tp->snd_una;
+	/* Retransmission still in flight may cause DSACKs later. */
+	tp->undo_retrans = tp->retrans_out ? : -1;
 }
 
 /* Enter Loss state. If we detect SACK reneging, forget all SACK information
@@ -1925,18 +1925,18 @@ void tcp_enter_loss(struct sock *sk)
 		tp->prior_ssthresh = tcp_current_ssthresh(sk);
 		tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
 		tcp_ca_event(sk, CA_EVENT_LOSS);
+		tcp_init_undo(tp);
 	}
 	tp->snd_cwnd	   = 1;
 	tp->snd_cwnd_cnt   = 0;
 	tp->snd_cwnd_stamp = tcp_time_stamp;
 
-	tcp_clear_retrans_partial(tp);
+	tp->retrans_out = 0;
+	tp->lost_out = 0;
 
 	if (tcp_is_reno(tp))
 		tcp_reset_reno_sack(tp);
 
-	tp->undo_marker = tp->snd_una;
-
 	skb = tcp_write_queue_head(sk);
 	is_reneg = skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED);
 	if (is_reneg) {
@@ -1950,9 +1950,6 @@ void tcp_enter_loss(struct sock *sk)
 		if (skb == tcp_send_head(sk))
 			break;
 
-		if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
-			tp->undo_marker = 0;
-
 		TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED;
 		if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || is_reneg) {
 			TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
@@ -2671,8 +2668,7 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
 	NET_INC_STATS_BH(sock_net(sk), mib_idx);
 
 	tp->prior_ssthresh = 0;
-	tp->undo_marker = tp->snd_una;
-	tp->undo_retrans = tp->retrans_out ? : -1;
+	tcp_init_undo(tp);
 
 	if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
 		if (!ece_ack)
-- 
cgit v1.2.3


From 53f3cc46336b9e514c98556b4a009a69ed808d3b Mon Sep 17 00:00:00 2001
From: Alexander Shiyan <shc_work@mail.ru>
Date: Sat, 23 Aug 2014 14:45:47 +0400
Subject: pata_platform: Remove useless irq_flags field

IRQ flags can be obtained from resource structure, there are no need
to use additional field in the platform_data to store these values.
This patch removes this field and convert existing users of this driver
to use IRQ flags from the resources.

Signed-off-by: Alexander Shiyan <shc_work@mail.ru>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/blackfin/mach-bf537/boards/cm_bf537e.c | 3 +--
 arch/blackfin/mach-bf537/boards/cm_bf537u.c | 3 +--
 arch/blackfin/mach-bf537/boards/stamp.c     | 3 +--
 arch/blackfin/mach-bf537/boards/tcm_bf537.c | 3 +--
 arch/blackfin/mach-bf561/boards/cm_bf561.c  | 3 +--
 drivers/ata/pata_of_platform.c              | 2 --
 drivers/ata/pata_platform.c                 | 4 +---
 include/linux/ata_platform.h                | 5 -----
 8 files changed, 6 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/arch/blackfin/mach-bf537/boards/cm_bf537e.c b/arch/blackfin/mach-bf537/boards/cm_bf537e.c
index 1e7290ef3525..1e1014df5e9e 100644
--- a/arch/blackfin/mach-bf537/boards/cm_bf537e.c
+++ b/arch/blackfin/mach-bf537/boards/cm_bf537e.c
@@ -733,7 +733,6 @@ static struct platform_device bfin_mac_device = {
 
 static struct pata_platform_info bfin_pata_platform_data = {
 	.ioport_shift = 2,
-	.irq_type = IRQF_TRIGGER_HIGH,
 };
 
 static struct resource bfin_pata_resources[] = {
@@ -750,7 +749,7 @@ static struct resource bfin_pata_resources[] = {
 	{
 		.start = PATA_INT,
 		.end = PATA_INT,
-		.flags = IORESOURCE_IRQ,
+		.flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL,
 	},
 };
 
diff --git a/arch/blackfin/mach-bf537/boards/cm_bf537u.c b/arch/blackfin/mach-bf537/boards/cm_bf537u.c
index c7495dc74690..d056db9e5592 100644
--- a/arch/blackfin/mach-bf537/boards/cm_bf537u.c
+++ b/arch/blackfin/mach-bf537/boards/cm_bf537u.c
@@ -587,7 +587,6 @@ static struct platform_device bfin_mac_device = {
 
 static struct pata_platform_info bfin_pata_platform_data = {
 	.ioport_shift = 2,
-	.irq_type = IRQF_TRIGGER_HIGH,
 };
 
 static struct resource bfin_pata_resources[] = {
@@ -604,7 +603,7 @@ static struct resource bfin_pata_resources[] = {
 	{
 		.start = PATA_INT,
 		.end = PATA_INT,
-		.flags = IORESOURCE_IRQ,
+		.flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL,
 	},
 };
 
diff --git a/arch/blackfin/mach-bf537/boards/stamp.c b/arch/blackfin/mach-bf537/boards/stamp.c
index de19b8a56007..88a19fc9844d 100644
--- a/arch/blackfin/mach-bf537/boards/stamp.c
+++ b/arch/blackfin/mach-bf537/boards/stamp.c
@@ -2462,7 +2462,6 @@ static struct platform_device bfin_sport0_device = {
 #define PATA_INT	IRQ_PF5
 static struct pata_platform_info bfin_pata_platform_data = {
 	.ioport_shift = 1,
-	.irq_flags = IRQF_TRIGGER_HIGH,
 };
 
 static struct resource bfin_pata_resources[] = {
@@ -2479,7 +2478,7 @@ static struct resource bfin_pata_resources[] = {
 	{
 		.start = PATA_INT,
 		.end = PATA_INT,
-		.flags = IORESOURCE_IRQ,
+		.flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL,
 	},
 };
 #elif defined(CF_IDE_NAND_CARD_USE_CF_IN_COMMON_MEMORY_MODE)
diff --git a/arch/blackfin/mach-bf537/boards/tcm_bf537.c b/arch/blackfin/mach-bf537/boards/tcm_bf537.c
index 6b988ad653d8..ed309c9a62b6 100644
--- a/arch/blackfin/mach-bf537/boards/tcm_bf537.c
+++ b/arch/blackfin/mach-bf537/boards/tcm_bf537.c
@@ -589,7 +589,6 @@ static struct platform_device bfin_mac_device = {
 
 static struct pata_platform_info bfin_pata_platform_data = {
 	.ioport_shift = 2,
-	.irq_type = IRQF_TRIGGER_HIGH,
 };
 
 static struct resource bfin_pata_resources[] = {
@@ -606,7 +605,7 @@ static struct resource bfin_pata_resources[] = {
 	{
 		.start = PATA_INT,
 		.end = PATA_INT,
-		.flags = IORESOURCE_IRQ,
+		.flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL,
 	},
 };
 
diff --git a/arch/blackfin/mach-bf561/boards/cm_bf561.c b/arch/blackfin/mach-bf561/boards/cm_bf561.c
index e862f7823e68..c6db52ba3a06 100644
--- a/arch/blackfin/mach-bf561/boards/cm_bf561.c
+++ b/arch/blackfin/mach-bf561/boards/cm_bf561.c
@@ -354,7 +354,6 @@ static struct platform_device bfin_sir0_device = {
 
 static struct pata_platform_info bfin_pata_platform_data = {
 	.ioport_shift = 2,
-	.irq_type = IRQF_TRIGGER_HIGH,
 };
 
 static struct resource bfin_pata_resources[] = {
@@ -371,7 +370,7 @@ static struct resource bfin_pata_resources[] = {
 	{
 		.start = PATA_INT,
 		.end = PATA_INT,
-		.flags = IORESOURCE_IRQ,
+		.flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL,
 	},
 };
 
diff --git a/drivers/ata/pata_of_platform.c b/drivers/ata/pata_of_platform.c
index 6af1c9b9a464..64965398914a 100644
--- a/drivers/ata/pata_of_platform.c
+++ b/drivers/ata/pata_of_platform.c
@@ -43,8 +43,6 @@ static int pata_of_platform_probe(struct platform_device *ofdev)
 	}
 
 	irq_res = platform_get_resource(ofdev, IORESOURCE_IRQ, 0);
-	if (irq_res)
-		irq_res->flags = 0;
 
 	prop = of_get_property(dn, "reg-shift", NULL);
 	if (prop)
diff --git a/drivers/ata/pata_platform.c b/drivers/ata/pata_platform.c
index a5579b55e332..f8cff3e247c5 100644
--- a/drivers/ata/pata_platform.c
+++ b/drivers/ata/pata_platform.c
@@ -118,7 +118,7 @@ int __pata_platform_probe(struct device *dev, struct resource *io_res,
 	 */
 	if (irq_res && irq_res->start > 0) {
 		irq = irq_res->start;
-		irq_flags = irq_res->flags;
+		irq_flags = irq_res->flags & IRQF_TRIGGER_MASK;
 	}
 
 	/*
@@ -213,8 +213,6 @@ static int pata_platform_probe(struct platform_device *pdev)
 	 * And the IRQ
 	 */
 	irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
-	if (irq_res)
-		irq_res->flags = pp_info ? pp_info->irq_flags : 0;
 
 	return __pata_platform_probe(&pdev->dev, io_res, ctl_res, irq_res,
 				     pp_info ? pp_info->ioport_shift : 0,
diff --git a/include/linux/ata_platform.h b/include/linux/ata_platform.h
index b9fde17f767c..5c618a084225 100644
--- a/include/linux/ata_platform.h
+++ b/include/linux/ata_platform.h
@@ -8,11 +8,6 @@ struct pata_platform_info {
 	 * spacing used by ata_std_ports().
 	 */
 	unsigned int ioport_shift;
-	/* 
-	 * Indicate platform specific irq types and initial
-	 * IRQ flags when call request_irq()
-	 */
-	unsigned int irq_flags;
 };
 
 extern int __pata_platform_probe(struct device *dev,
-- 
cgit v1.2.3


From 3af20efc0f83cdc65ce56ec108c0e81f602364df Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Fri, 22 Aug 2014 18:55:39 -0700
Subject: net: phy: broadcom: extract all registers to brcmphy.h

Commit 439d39a9ac8fbbba9c04581361188f33f21ced50 ("net: phy: broadcom:
extract register definitions") added a bunch of registers to brcmphy.h
but left some to broadcom.c, move all of them to the header file since
the BCM54xx and BCM7xxx PHY drivers do share all of these registers.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/broadcom.c | 104 ---------------------------------------------
 include/linux/brcmphy.h    | 103 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 103 insertions(+), 104 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c
index 34088d60da74..6001d76d8676 100644
--- a/drivers/net/phy/broadcom.c
+++ b/drivers/net/phy/broadcom.c
@@ -25,110 +25,6 @@
 #define BRCM_PHY_REV(phydev) \
 	((phydev)->drv->phy_id & ~((phydev)->drv->phy_id_mask))
 
-/*
- * Broadcom LED source encodings.  These are used in BCM5461, BCM5481,
- * BCM5482, and possibly some others.
- */
-#define BCM_LED_SRC_LINKSPD1	0x0
-#define BCM_LED_SRC_LINKSPD2	0x1
-#define BCM_LED_SRC_XMITLED	0x2
-#define BCM_LED_SRC_ACTIVITYLED	0x3
-#define BCM_LED_SRC_FDXLED	0x4
-#define BCM_LED_SRC_SLAVE	0x5
-#define BCM_LED_SRC_INTR	0x6
-#define BCM_LED_SRC_QUALITY	0x7
-#define BCM_LED_SRC_RCVLED	0x8
-#define BCM_LED_SRC_MULTICOLOR1	0xa
-#define BCM_LED_SRC_OPENSHORT	0xb
-#define BCM_LED_SRC_OFF		0xe	/* Tied high */
-#define BCM_LED_SRC_ON		0xf	/* Tied low */
-
-
-/*
- * BCM5482: Shadow registers
- * Shadow values go into bits [14:10] of register 0x1c to select a shadow
- * register to access.
- */
-/* 00101: Spare Control Register 3 */
-#define BCM54XX_SHD_SCR3		0x05
-#define  BCM54XX_SHD_SCR3_DEF_CLK125	0x0001
-#define  BCM54XX_SHD_SCR3_DLLAPD_DIS	0x0002
-#define  BCM54XX_SHD_SCR3_TRDDAPD	0x0004
-
-/* 01010: Auto Power-Down */
-#define BCM54XX_SHD_APD			0x0a
-#define  BCM54XX_SHD_APD_EN		0x0020
-
-#define BCM5482_SHD_LEDS1	0x0d	/* 01101: LED Selector 1 */
-					/* LED3 / ~LINKSPD[2] selector */
-#define BCM5482_SHD_LEDS1_LED3(src)	((src & 0xf) << 4)
-					/* LED1 / ~LINKSPD[1] selector */
-#define BCM5482_SHD_LEDS1_LED1(src)	((src & 0xf) << 0)
-#define BCM54XX_SHD_RGMII_MODE	0x0b	/* 01011: RGMII Mode Selector */
-#define BCM5482_SHD_SSD		0x14	/* 10100: Secondary SerDes control */
-#define BCM5482_SHD_SSD_LEDM	0x0008	/* SSD LED Mode enable */
-#define BCM5482_SHD_SSD_EN	0x0001	/* SSD enable */
-#define BCM5482_SHD_MODE	0x1f	/* 11111: Mode Control Register */
-#define BCM5482_SHD_MODE_1000BX	0x0001	/* Enable 1000BASE-X registers */
-
-
-/*
- * EXPANSION SHADOW ACCESS REGISTERS.  (PHY REG 0x15, 0x16, and 0x17)
- */
-#define MII_BCM54XX_EXP_AADJ1CH0		0x001f
-#define  MII_BCM54XX_EXP_AADJ1CH0_SWP_ABCD_OEN	0x0200
-#define  MII_BCM54XX_EXP_AADJ1CH0_SWSEL_THPF	0x0100
-#define MII_BCM54XX_EXP_AADJ1CH3		0x601f
-#define  MII_BCM54XX_EXP_AADJ1CH3_ADCCKADJ	0x0002
-#define MII_BCM54XX_EXP_EXP08			0x0F08
-#define  MII_BCM54XX_EXP_EXP08_RJCT_2MHZ	0x0001
-#define  MII_BCM54XX_EXP_EXP08_EARLY_DAC_WAKE	0x0200
-#define MII_BCM54XX_EXP_EXP75			0x0f75
-#define  MII_BCM54XX_EXP_EXP75_VDACCTRL		0x003c
-#define  MII_BCM54XX_EXP_EXP75_CM_OSC		0x0001
-#define MII_BCM54XX_EXP_EXP96			0x0f96
-#define  MII_BCM54XX_EXP_EXP96_MYST		0x0010
-#define MII_BCM54XX_EXP_EXP97			0x0f97
-#define  MII_BCM54XX_EXP_EXP97_MYST		0x0c0c
-
-/*
- * BCM5482: Secondary SerDes registers
- */
-#define BCM5482_SSD_1000BX_CTL		0x00	/* 1000BASE-X Control */
-#define BCM5482_SSD_1000BX_CTL_PWRDOWN	0x0800	/* Power-down SSD */
-#define BCM5482_SSD_SGMII_SLAVE		0x15	/* SGMII Slave Register */
-#define BCM5482_SSD_SGMII_SLAVE_EN	0x0002	/* Slave mode enable */
-#define BCM5482_SSD_SGMII_SLAVE_AD	0x0001	/* Slave auto-detection */
-
-
-/*****************************************************************************/
-/* Fast Ethernet Transceiver definitions. */
-/*****************************************************************************/
-
-#define MII_BRCM_FET_INTREG		0x1a	/* Interrupt register */
-#define MII_BRCM_FET_IR_MASK		0x0100	/* Mask all interrupts */
-#define MII_BRCM_FET_IR_LINK_EN		0x0200	/* Link status change enable */
-#define MII_BRCM_FET_IR_SPEED_EN	0x0400	/* Link speed change enable */
-#define MII_BRCM_FET_IR_DUPLEX_EN	0x0800	/* Duplex mode change enable */
-#define MII_BRCM_FET_IR_ENABLE		0x4000	/* Interrupt enable */
-
-#define MII_BRCM_FET_BRCMTEST		0x1f	/* Brcm test register */
-#define MII_BRCM_FET_BT_SRE		0x0080	/* Shadow register enable */
-
-
-/*** Shadow register definitions ***/
-
-#define MII_BRCM_FET_SHDW_MISCCTRL	0x10	/* Shadow misc ctrl */
-#define MII_BRCM_FET_SHDW_MC_FAME	0x4000	/* Force Auto MDIX enable */
-
-#define MII_BRCM_FET_SHDW_AUXMODE4	0x1a	/* Auxiliary mode 4 */
-#define MII_BRCM_FET_SHDW_AM4_LED_MASK	0x0003
-#define MII_BRCM_FET_SHDW_AM4_LED_MODE1 0x0001
-
-#define MII_BRCM_FET_SHDW_AUXSTAT2	0x1b	/* Auxiliary status 2 */
-#define MII_BRCM_FET_SHDW_AS2_APDE	0x0020	/* Auto power down enable */
-
-
 MODULE_DESCRIPTION("Broadcom PHY driver");
 MODULE_AUTHOR("Maciej W. Rozycki");
 MODULE_LICENSE("GPL");
diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index 61219b9b3445..be31bf9f60c2 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -92,4 +92,107 @@
 
 #define MII_BCM54XX_AUXCTL_SHDWSEL_AUXCTL	0x0000
 
+/*
+ * Broadcom LED source encodings.  These are used in BCM5461, BCM5481,
+ * BCM5482, and possibly some others.
+ */
+#define BCM_LED_SRC_LINKSPD1	0x0
+#define BCM_LED_SRC_LINKSPD2	0x1
+#define BCM_LED_SRC_XMITLED	0x2
+#define BCM_LED_SRC_ACTIVITYLED	0x3
+#define BCM_LED_SRC_FDXLED	0x4
+#define BCM_LED_SRC_SLAVE	0x5
+#define BCM_LED_SRC_INTR	0x6
+#define BCM_LED_SRC_QUALITY	0x7
+#define BCM_LED_SRC_RCVLED	0x8
+#define BCM_LED_SRC_MULTICOLOR1	0xa
+#define BCM_LED_SRC_OPENSHORT	0xb
+#define BCM_LED_SRC_OFF		0xe	/* Tied high */
+#define BCM_LED_SRC_ON		0xf	/* Tied low */
+
+
+/*
+ * BCM5482: Shadow registers
+ * Shadow values go into bits [14:10] of register 0x1c to select a shadow
+ * register to access.
+ */
+/* 00101: Spare Control Register 3 */
+#define BCM54XX_SHD_SCR3		0x05
+#define  BCM54XX_SHD_SCR3_DEF_CLK125	0x0001
+#define  BCM54XX_SHD_SCR3_DLLAPD_DIS	0x0002
+#define  BCM54XX_SHD_SCR3_TRDDAPD	0x0004
+
+/* 01010: Auto Power-Down */
+#define BCM54XX_SHD_APD			0x0a
+#define  BCM54XX_SHD_APD_EN		0x0020
+
+#define BCM5482_SHD_LEDS1	0x0d	/* 01101: LED Selector 1 */
+					/* LED3 / ~LINKSPD[2] selector */
+#define BCM5482_SHD_LEDS1_LED3(src)	((src & 0xf) << 4)
+					/* LED1 / ~LINKSPD[1] selector */
+#define BCM5482_SHD_LEDS1_LED1(src)	((src & 0xf) << 0)
+#define BCM54XX_SHD_RGMII_MODE	0x0b	/* 01011: RGMII Mode Selector */
+#define BCM5482_SHD_SSD		0x14	/* 10100: Secondary SerDes control */
+#define BCM5482_SHD_SSD_LEDM	0x0008	/* SSD LED Mode enable */
+#define BCM5482_SHD_SSD_EN	0x0001	/* SSD enable */
+#define BCM5482_SHD_MODE	0x1f	/* 11111: Mode Control Register */
+#define BCM5482_SHD_MODE_1000BX	0x0001	/* Enable 1000BASE-X registers */
+
+
+/*
+ * EXPANSION SHADOW ACCESS REGISTERS.  (PHY REG 0x15, 0x16, and 0x17)
+ */
+#define MII_BCM54XX_EXP_AADJ1CH0		0x001f
+#define  MII_BCM54XX_EXP_AADJ1CH0_SWP_ABCD_OEN	0x0200
+#define  MII_BCM54XX_EXP_AADJ1CH0_SWSEL_THPF	0x0100
+#define MII_BCM54XX_EXP_AADJ1CH3		0x601f
+#define  MII_BCM54XX_EXP_AADJ1CH3_ADCCKADJ	0x0002
+#define MII_BCM54XX_EXP_EXP08			0x0F08
+#define  MII_BCM54XX_EXP_EXP08_RJCT_2MHZ	0x0001
+#define  MII_BCM54XX_EXP_EXP08_EARLY_DAC_WAKE	0x0200
+#define MII_BCM54XX_EXP_EXP75			0x0f75
+#define  MII_BCM54XX_EXP_EXP75_VDACCTRL		0x003c
+#define  MII_BCM54XX_EXP_EXP75_CM_OSC		0x0001
+#define MII_BCM54XX_EXP_EXP96			0x0f96
+#define  MII_BCM54XX_EXP_EXP96_MYST		0x0010
+#define MII_BCM54XX_EXP_EXP97			0x0f97
+#define  MII_BCM54XX_EXP_EXP97_MYST		0x0c0c
+
+/*
+ * BCM5482: Secondary SerDes registers
+ */
+#define BCM5482_SSD_1000BX_CTL		0x00	/* 1000BASE-X Control */
+#define BCM5482_SSD_1000BX_CTL_PWRDOWN	0x0800	/* Power-down SSD */
+#define BCM5482_SSD_SGMII_SLAVE		0x15	/* SGMII Slave Register */
+#define BCM5482_SSD_SGMII_SLAVE_EN	0x0002	/* Slave mode enable */
+#define BCM5482_SSD_SGMII_SLAVE_AD	0x0001	/* Slave auto-detection */
+
+
+/*****************************************************************************/
+/* Fast Ethernet Transceiver definitions. */
+/*****************************************************************************/
+
+#define MII_BRCM_FET_INTREG		0x1a	/* Interrupt register */
+#define MII_BRCM_FET_IR_MASK		0x0100	/* Mask all interrupts */
+#define MII_BRCM_FET_IR_LINK_EN		0x0200	/* Link status change enable */
+#define MII_BRCM_FET_IR_SPEED_EN	0x0400	/* Link speed change enable */
+#define MII_BRCM_FET_IR_DUPLEX_EN	0x0800	/* Duplex mode change enable */
+#define MII_BRCM_FET_IR_ENABLE		0x4000	/* Interrupt enable */
+
+#define MII_BRCM_FET_BRCMTEST		0x1f	/* Brcm test register */
+#define MII_BRCM_FET_BT_SRE		0x0080	/* Shadow register enable */
+
+
+/*** Shadow register definitions ***/
+
+#define MII_BRCM_FET_SHDW_MISCCTRL	0x10	/* Shadow misc ctrl */
+#define MII_BRCM_FET_SHDW_MC_FAME	0x4000	/* Force Auto MDIX enable */
+
+#define MII_BRCM_FET_SHDW_AUXMODE4	0x1a	/* Auxiliary mode 4 */
+#define MII_BRCM_FET_SHDW_AM4_LED_MASK	0x0003
+#define MII_BRCM_FET_SHDW_AM4_LED_MODE1 0x0001
+
+#define MII_BRCM_FET_SHDW_AUXSTAT2	0x1b	/* Auxiliary status 2 */
+#define MII_BRCM_FET_SHDW_AS2_APDE	0x0020	/* Auto power down enable */
+
 #endif /* _LINUX_BRCMPHY_H */
-- 
cgit v1.2.3


From 705314797b8b997554b7e9d0ea7b65a497356e53 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Fri, 22 Aug 2014 18:55:40 -0700
Subject: net: phy: broadcom: move shadow 0x1C register accessors to brcmphy.h

The shadow register 0x1C is used both by the BCM54xxx PHYs and the
BCM7xxx internal PHYs, move the accessors to a common location so both
drivers can use them.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/broadcom.c | 18 ------------------
 include/linux/brcmphy.h    | 20 ++++++++++++++++++++
 2 files changed, 20 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c
index 6001d76d8676..854f2c9a7b2b 100644
--- a/drivers/net/phy/broadcom.c
+++ b/drivers/net/phy/broadcom.c
@@ -29,24 +29,6 @@ MODULE_DESCRIPTION("Broadcom PHY driver");
 MODULE_AUTHOR("Maciej W. Rozycki");
 MODULE_LICENSE("GPL");
 
-/*
- * Indirect register access functions for the 1000BASE-T/100BASE-TX/10BASE-T
- * 0x1c shadow registers.
- */
-static int bcm54xx_shadow_read(struct phy_device *phydev, u16 shadow)
-{
-	phy_write(phydev, MII_BCM54XX_SHD, MII_BCM54XX_SHD_VAL(shadow));
-	return MII_BCM54XX_SHD_DATA(phy_read(phydev, MII_BCM54XX_SHD));
-}
-
-static int bcm54xx_shadow_write(struct phy_device *phydev, u16 shadow, u16 val)
-{
-	return phy_write(phydev, MII_BCM54XX_SHD,
-			 MII_BCM54XX_SHD_WRITE |
-			 MII_BCM54XX_SHD_VAL(shadow) |
-			 MII_BCM54XX_SHD_DATA(val));
-}
-
 /* Indirect register access functions for the Expansion Registers */
 static int bcm54xx_exp_read(struct phy_device *phydev, u16 regnum)
 {
diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index be31bf9f60c2..722cf26567fa 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -195,4 +195,24 @@
 #define MII_BRCM_FET_SHDW_AUXSTAT2	0x1b	/* Auxiliary status 2 */
 #define MII_BRCM_FET_SHDW_AS2_APDE	0x0020	/* Auto power down enable */
 
+/*
+ * Indirect register access functions for the 1000BASE-T/100BASE-TX/10BASE-T
+ * 0x1c shadow registers.
+ */
+static inline int bcm54xx_shadow_read(struct phy_device *phydev, u16 shadow)
+{
+	phy_write(phydev, MII_BCM54XX_SHD, MII_BCM54XX_SHD_VAL(shadow));
+	return MII_BCM54XX_SHD_DATA(phy_read(phydev, MII_BCM54XX_SHD));
+}
+
+static inline int bcm54xx_shadow_write(struct phy_device *phydev, u16 shadow,
+				       u16 val)
+{
+	return phy_write(phydev, MII_BCM54XX_SHD,
+			 MII_BCM54XX_SHD_WRITE |
+			 MII_BCM54XX_SHD_VAL(shadow) |
+			 MII_BCM54XX_SHD_DATA(val));
+}
+
+
 #endif /* _LINUX_BRCMPHY_H */
-- 
cgit v1.2.3


From 66ce7fb9807b036058aa380bfd2b3851ae25ce39 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Fri, 22 Aug 2014 18:55:43 -0700
Subject: net: phy: export phy_{read,write}_mmd_indirect

Some PHY drivers might need to access Clause 45 registers in Clause 22
compatibility mode to e.g: properly advertise EEE support when disabled
by default.

Export these two helper functions: phy_read_mmd_indirect() and
phy_write_mmd_indirect() for drivers to use them.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy.c |  6 ++++--
 include/linux/phy.h   | 27 +++++++++++++++++++++++++++
 2 files changed, 31 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index c94e2a27446a..e7a5893f32ff 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -955,7 +955,7 @@ static inline void mmd_phy_indirect(struct mii_bus *bus, int prtad, int devad,
  * 3) Write reg 13 // MMD Data Command for MMD DEVAD
  * 3) Read  reg 14 // Read MMD data
  */
-static int phy_read_mmd_indirect(struct phy_device *phydev, int prtad,
+int phy_read_mmd_indirect(struct phy_device *phydev, int prtad,
 				 int devad, int addr)
 {
 	struct phy_driver *phydrv = phydev->drv;
@@ -971,6 +971,7 @@ static int phy_read_mmd_indirect(struct phy_device *phydev, int prtad,
 	}
 	return value;
 }
+EXPORT_SYMBOL(phy_read_mmd_indirect);
 
 /**
  * phy_write_mmd_indirect - writes data to the MMD registers
@@ -988,7 +989,7 @@ static int phy_read_mmd_indirect(struct phy_device *phydev, int prtad,
  * 3) Write reg 13 // MMD Data Command for MMD DEVAD
  * 3) Write reg 14 // Write MMD data
  */
-static void phy_write_mmd_indirect(struct phy_device *phydev, int prtad,
+void phy_write_mmd_indirect(struct phy_device *phydev, int prtad,
 				   int devad, int addr, u32 data)
 {
 	struct phy_driver *phydrv = phydev->drv;
@@ -1002,6 +1003,7 @@ static void phy_write_mmd_indirect(struct phy_device *phydev, int prtad,
 		phydrv->write_mmd_indirect(phydev, prtad, devad, addr, data);
 	}
 }
+EXPORT_SYMBOL(phy_write_mmd_indirect);
 
 /**
  * phy_init_eee - init and check the EEE feature
diff --git a/include/linux/phy.h b/include/linux/phy.h
index ed39956b5613..d090cfcaa167 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -597,6 +597,19 @@ static inline int phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum)
 			    MII_ADDR_C45 | (devad << 16) | (regnum & 0xffff));
 }
 
+/**
+ * phy_read_mmd_indirect - reads data from the MMD registers
+ * @phydev: The PHY device bus
+ * @prtad: MMD Address
+ * @devad: MMD DEVAD
+ * @addr: PHY address on the MII bus
+ *
+ * Description: it reads data from the MMD registers (clause 22 to access to
+ * clause 45) of the specified phy address.
+ */
+int phy_read_mmd_indirect(struct phy_device *phydev, int prtad,
+			  int devad, int addr);
+
 /**
  * phy_read - Convenience function for reading a given PHY register
  * @phydev: the phy_device struct
@@ -668,6 +681,20 @@ static inline int phy_write_mmd(struct phy_device *phydev, int devad,
 	return mdiobus_write(phydev->bus, phydev->addr, regnum, val);
 }
 
+/**
+ * phy_write_mmd_indirect - writes data to the MMD registers
+ * @phydev: The PHY device
+ * @prtad: MMD Address
+ * @devad: MMD DEVAD
+ * @addr: PHY address on the MII bus
+ * @data: data to write in the MMD register
+ *
+ * Description: Write data from the MMD registers of the specified
+ * phy address.
+ */
+void phy_write_mmd_indirect(struct phy_device *phydev, int prtad,
+			    int devad, int addr, u32 data);
+
 struct phy_device *phy_device_create(struct mii_bus *bus, int addr, int phy_id,
 				     bool is_c45,
 				     struct phy_c45_device_ids *c45_ids);
-- 
cgit v1.2.3


From b8f9a02924bbeb0c46ca4c19561cbe765b80e264 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Fri, 22 Aug 2014 18:55:45 -0700
Subject: net: phy: bcm7xxx: enable EEE at the PHY level

The 28nm Gigabit PHY on BCM7xxx chips comes out of reset with absolutely
no EEE capabilities, such that we would actually return that we do not
support EEE when accessing 3.20 (MDIO_PCS_EEE_ABLE) registers.

Poke through the vendor-specific C45 register to enable EEE globally at
the PHY level, and advertise supported EEE modes.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/bcm7xxx.c | 31 +++++++++++++++++++++++++++++++
 include/linux/brcmphy.h   |  3 +++
 2 files changed, 34 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c
index 29e256a4ed57..e98c510b75c5 100644
--- a/drivers/net/phy/bcm7xxx.c
+++ b/drivers/net/phy/bcm7xxx.c
@@ -14,6 +14,7 @@
 #include <linux/delay.h>
 #include <linux/bitops.h>
 #include <linux/brcmphy.h>
+#include <linux/mdio.h>
 
 /* Broadcom BCM7xxx internal PHY registers */
 #define MII_BCM7XXX_CHANNEL_WIDTH	0x2000
@@ -167,6 +168,32 @@ static int bcm7xxx_apd_enable(struct phy_device *phydev)
 	return bcm54xx_shadow_write(phydev, BCM54XX_SHD_APD, val);
 }
 
+static int bcm7xxx_eee_enable(struct phy_device *phydev)
+{
+	int val;
+
+	val = phy_read_mmd_indirect(phydev, BRCM_CL45VEN_EEE_CONTROL,
+				    MDIO_MMD_AN, phydev->addr);
+	if (val < 0)
+		return val;
+
+	/* Enable general EEE feature at the PHY level */
+	val |= LPI_FEATURE_EN | LPI_FEATURE_EN_DIG1000X;
+
+	phy_write_mmd_indirect(phydev, BRCM_CL45VEN_EEE_CONTROL,
+			       MDIO_MMD_AN, phydev->addr, val);
+
+	/* Advertise supported modes */
+	val = phy_read_mmd_indirect(phydev, MDIO_AN_EEE_ADV,
+				    MDIO_MMD_AN, phydev->addr);
+
+	val |= (MDIO_AN_EEE_ADV_100TX | MDIO_AN_EEE_ADV_1000T);
+	phy_write_mmd_indirect(phydev, MDIO_AN_EEE_ADV,
+			       MDIO_MMD_AN, phydev->addr, val);
+
+	return 0;
+}
+
 static int bcm7xxx_28nm_config_init(struct phy_device *phydev)
 {
 	int ret;
@@ -179,6 +206,10 @@ static int bcm7xxx_28nm_config_init(struct phy_device *phydev)
 	if (ret)
 		return ret;
 
+	ret = bcm7xxx_eee_enable(phydev);
+	if (ret)
+		return ret;
+
 	return bcm7xxx_apd_enable(phydev);
 }
 
diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index 722cf26567fa..ee1431d976fa 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -214,5 +214,8 @@ static inline int bcm54xx_shadow_write(struct phy_device *phydev, u16 shadow,
 			 MII_BCM54XX_SHD_DATA(val));
 }
 
+#define BRCM_CL45VEN_EEE_CONTROL	0x803d
+#define LPI_FEATURE_EN			0x8000
+#define LPI_FEATURE_EN_DIG1000X		0x4000
 
 #endif /* _LINUX_BRCMPHY_H */
-- 
cgit v1.2.3


From 690e36e726d00d2528bc569809048adf61550d80 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sat, 23 Aug 2014 12:13:41 -0700
Subject: net: Allow raw buffers to be passed into the flow dissector.

Drivers, and perhaps other entities we have not yet considered,
sometimes want to know how deep the protocol headers go before
deciding how large of an SKB to allocate and how much of the packet to
place into the linear SKB area.

For example, consider a driver which has a device which DMAs into
pools of pages and then tells the driver where the data went in the
DMA descriptor(s).  The driver can then build an SKB and reference
most of the data via SKB fragments (which are page/offset/length
triplets).

However at least some of the front of the packet should be placed into
the linear SKB area, which comes before the fragments, so that packet
processing can get at the headers efficiently.  The first thing each
protocol layer is going to do is a "pskb_may_pull()" so we might as
well aggregate as much of this as possible while we're building the
SKB in the driver.

Part of supporting this is that we don't have an SKB yet, so we want
to be able to let the flow dissector operate on a raw buffer in order
to compute the offset of the end of the headers.

So now we have a __skb_flow_dissect() which takes an explicit data
pointer and length.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h    | 18 ++++++++++++------
 include/net/flow_keys.h   | 14 ++++++++++++--
 net/core/flow_dissector.c | 40 ++++++++++++++++++++++++++--------------
 3 files changed, 50 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index abde271c18ae..18ddf9684a27 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2567,20 +2567,26 @@ __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len,
 __wsum skb_checksum(const struct sk_buff *skb, int offset, int len,
 		    __wsum csum);
 
-static inline void *skb_header_pointer(const struct sk_buff *skb, int offset,
-				       int len, void *buffer)
+static inline void *__skb_header_pointer(const struct sk_buff *skb, int offset,
+					 int len, void *data, int hlen, void *buffer)
 {
-	int hlen = skb_headlen(skb);
-
 	if (hlen - offset >= len)
-		return skb->data + offset;
+		return data + offset;
 
-	if (skb_copy_bits(skb, offset, buffer, len) < 0)
+	if (!skb ||
+	    skb_copy_bits(skb, offset, buffer, len) < 0)
 		return NULL;
 
 	return buffer;
 }
 
+static inline void *skb_header_pointer(const struct sk_buff *skb, int offset,
+				       int len, void *buffer)
+{
+	return __skb_header_pointer(skb, offset, len, skb->data,
+				    skb_headlen(skb), buffer);
+}
+
 /**
  *	skb_needs_linearize - check if we need to linearize a given skb
  *			      depending on the given device features.
diff --git a/include/net/flow_keys.h b/include/net/flow_keys.h
index 6667a054763a..4040f63932c5 100644
--- a/include/net/flow_keys.h
+++ b/include/net/flow_keys.h
@@ -27,7 +27,17 @@ struct flow_keys {
 	u8 ip_proto;
 };
 
-bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow);
-__be32 skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto);
+bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow,
+			void *data, int hlen);
+static inline bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow)
+{
+	return __skb_flow_dissect(skb, flow, NULL, 0);
+}
+__be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto,
+			    void *data, int hlen_proto);
+static inline __be32 skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto)
+{
+	return __skb_flow_get_ports(skb, thoff, ip_proto, NULL, 0);
+}
 u32 flow_hash_from_keys(struct flow_keys *keys);
 #endif
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 5f362c1d0332..660c6492fb78 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -34,29 +34,40 @@ static void iph_to_flow_copy_addrs(struct flow_keys *flow, const struct iphdr *i
  * The function will try to retrieve the ports at offset thoff + poff where poff
  * is the protocol port offset returned from proto_ports_offset
  */
-__be32 skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto)
+__be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto,
+			    void *data, int hlen)
 {
 	int poff = proto_ports_offset(ip_proto);
 
+	if (!data) {
+		data = skb->data;
+		hlen = skb_headlen(skb);
+	}
+
 	if (poff >= 0) {
 		__be32 *ports, _ports;
 
-		ports = skb_header_pointer(skb, thoff + poff,
-					   sizeof(_ports), &_ports);
+		ports = __skb_header_pointer(skb, thoff + poff,
+					     sizeof(_ports), data, hlen, &_ports);
 		if (ports)
 			return *ports;
 	}
 
 	return 0;
 }
-EXPORT_SYMBOL(skb_flow_get_ports);
+EXPORT_SYMBOL(__skb_flow_get_ports);
 
-bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow)
+bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow, void *data, int hlen)
 {
 	int nhoff = skb_network_offset(skb);
 	u8 ip_proto;
 	__be16 proto = skb->protocol;
 
+	if (!data) {
+		data = skb->data;
+		hlen = skb_headlen(skb);
+	}
+
 	memset(flow, 0, sizeof(*flow));
 
 again:
@@ -65,7 +76,7 @@ again:
 		const struct iphdr *iph;
 		struct iphdr _iph;
 ip:
-		iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
+		iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph);
 		if (!iph || iph->ihl < 5)
 			return false;
 		nhoff += iph->ihl * 4;
@@ -83,7 +94,7 @@ ip:
 		__be32 flow_label;
 
 ipv6:
-		iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
+		iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph);
 		if (!iph)
 			return false;
 
@@ -113,7 +124,7 @@ ipv6:
 		const struct vlan_hdr *vlan;
 		struct vlan_hdr _vlan;
 
-		vlan = skb_header_pointer(skb, nhoff, sizeof(_vlan), &_vlan);
+		vlan = __skb_header_pointer(skb, nhoff, sizeof(_vlan), data, hlen, &_vlan);
 		if (!vlan)
 			return false;
 
@@ -126,7 +137,7 @@ ipv6:
 			struct pppoe_hdr hdr;
 			__be16 proto;
 		} *hdr, _hdr;
-		hdr = skb_header_pointer(skb, nhoff, sizeof(_hdr), &_hdr);
+		hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
 		if (!hdr)
 			return false;
 		proto = hdr->proto;
@@ -151,7 +162,7 @@ ipv6:
 			__be16 proto;
 		} *hdr, _hdr;
 
-		hdr = skb_header_pointer(skb, nhoff, sizeof(_hdr), &_hdr);
+		hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
 		if (!hdr)
 			return false;
 		/*
@@ -171,8 +182,9 @@ ipv6:
 				const struct ethhdr *eth;
 				struct ethhdr _eth;
 
-				eth = skb_header_pointer(skb, nhoff,
-							 sizeof(_eth), &_eth);
+				eth = __skb_header_pointer(skb, nhoff,
+							   sizeof(_eth),
+							   data, hlen, &_eth);
 				if (!eth)
 					return false;
 				proto = eth->h_proto;
@@ -194,12 +206,12 @@ ipv6:
 
 	flow->n_proto = proto;
 	flow->ip_proto = ip_proto;
-	flow->ports = skb_flow_get_ports(skb, nhoff, ip_proto);
+	flow->ports = __skb_flow_get_ports(skb, nhoff, ip_proto, data, hlen);
 	flow->thoff = (u16) nhoff;
 
 	return true;
 }
-EXPORT_SYMBOL(skb_flow_dissect);
+EXPORT_SYMBOL(__skb_flow_dissect);
 
 static u32 hashrnd __read_mostly;
 static __always_inline void __flow_hash_secret_init(void)
-- 
cgit v1.2.3


From 179033b3e064d2cd3f5f9945e76b0a0f0fbf4883 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Thu, 7 Aug 2014 11:48:26 -0400
Subject: perf: Add PERF_EVENT_STATE_EXIT state for events with exited task

Adding new perf event state to indicate that the monitored task has
exited.  In this case the event stays alive until the owner task exits
or close the event fd while providing the last data through the read
syscall and ring buffer.

Instead it needs to propagate the error info (monitored task has died)
via poll and read  syscalls by  returning POLLHUP and 0 respectively.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20140811120102.GY9918@twins.programming.kicks-ass.net
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jean Pihet <jean.pihet@linaro.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-t5y3w8jjx6tfo5w8y6oajsjq@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 include/linux/perf_event.h |  1 +
 kernel/events/core.c       | 10 +++++++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index f0a1036b1911..893a0d07986f 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -269,6 +269,7 @@ struct pmu {
  * enum perf_event_active_state - the states of a event
  */
 enum perf_event_active_state {
+	PERF_EVENT_STATE_EXIT		= -3,
 	PERF_EVENT_STATE_ERROR		= -2,
 	PERF_EVENT_STATE_OFF		= -1,
 	PERF_EVENT_STATE_INACTIVE	=  0,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 4575dd6e59ea..d8cb4d21a346 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3600,7 +3600,8 @@ perf_read_hw(struct perf_event *event, char __user *buf, size_t count)
 	 * error state (i.e. because it was pinned but it couldn't be
 	 * scheduled on to the CPU at some point).
 	 */
-	if (event->state == PERF_EVENT_STATE_ERROR)
+	if ((event->state == PERF_EVENT_STATE_ERROR) ||
+	    (event->state == PERF_EVENT_STATE_EXIT))
 		return 0;
 
 	if (count < event->read_size)
@@ -3630,6 +3631,10 @@ static unsigned int perf_poll(struct file *file, poll_table *wait)
 	unsigned int events = POLLHUP;
 
 	poll_wait(file, &event->waitq, wait);
+
+	if (event->state == PERF_EVENT_STATE_EXIT)
+		return events;
+
 	/*
 	 * Pin the event->rb by taking event->mmap_mutex; otherwise
 	 * perf_event_set_output() can swizzle our rb and make us miss wakeups.
@@ -7588,6 +7593,9 @@ __perf_event_exit_task(struct perf_event *child_event,
 	if (child_event->parent) {
 		sync_child_event(child_event, child);
 		free_event(child_event);
+	} else {
+		child_event->state = PERF_EVENT_STATE_EXIT;
+		perf_event_wakeup(child_event);
 	}
 }
 
-- 
cgit v1.2.3


From 1b05756c48ea07ced9604ef01d11194d936da163 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Tue, 5 Aug 2014 22:02:34 +0200
Subject: netfilter: ipset: Fix warn: integer overflows 'sizeof(*map) + size *
 set->dsize'

Dan Carpenter reported that the static checker emits the warning

        net/netfilter/ipset/ip_set_list_set.c:600 init_list_set()
        warn: integer overflows 'sizeof(*map) + size * set->dsize'

Limit the maximal number of elements in list type of sets.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set_list.h | 1 +
 net/netfilter/ipset/ip_set_list_set.c       | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/ipset/ip_set_list.h b/include/linux/netfilter/ipset/ip_set_list.h
index 68c2aea897f5..fe2622a00151 100644
--- a/include/linux/netfilter/ipset/ip_set_list.h
+++ b/include/linux/netfilter/ipset/ip_set_list.h
@@ -6,5 +6,6 @@
 
 #define IP_SET_LIST_DEFAULT_SIZE	8
 #define IP_SET_LIST_MIN_SIZE		4
+#define IP_SET_LIST_MAX_SIZE		65536
 
 #endif /* __IP_SET_LIST_H */
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index 3e2317f3cf68..f87adbad6076 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -597,7 +597,9 @@ init_list_set(struct net *net, struct ip_set *set, u32 size)
 	struct set_elem *e;
 	u32 i;
 
-	map = kzalloc(sizeof(*map) + size * set->dsize, GFP_KERNEL);
+	map = kzalloc(sizeof(*map) +
+		      min_t(u32, size, IP_SET_LIST_MAX_SIZE) * set->dsize,
+		      GFP_KERNEL);
 	if (!map)
 		return false;
 
-- 
cgit v1.2.3


From 573e8fca255a27e3573b51f9b183d62641c47a3d Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Fri, 22 Aug 2014 13:33:47 -0700
Subject: net: skb_gro_checksum_* functions

Add skb_gro_checksum_validate, skb_gro_checksum_validate_zero_check,
and skb_gro_checksum_simple_validate, and __skb_gro_checksum_complete.
These are the cognates of the normal checksum functions but are used
in the gro_receive path and operate on GRO related fields in sk_buffs.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 76 +++++++++++++++++++++++++++++++++++++++++++++--
 net/core/dev.c            | 34 ++++++++++++++++++++-
 2 files changed, 107 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 7e2b0b8b5cd7..eb73444e1bd0 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1883,7 +1883,13 @@ struct napi_gro_cb {
 	u16	proto;
 
 	/* Used in udp_gro_receive */
-	u16	udp_mark;
+	u8	udp_mark:1;
+
+	/* GRO checksum is valid */
+	u8	csum_valid:1;
+
+	/* Number encapsulation layers crossed */
+	u8	encapsulation;
 
 	/* used to support CHECKSUM_COMPLETE for tunneling protocols */
 	__wsum	csum;
@@ -2154,11 +2160,77 @@ static inline void *skb_gro_network_header(struct sk_buff *skb)
 static inline void skb_gro_postpull_rcsum(struct sk_buff *skb,
 					const void *start, unsigned int len)
 {
-	if (skb->ip_summed == CHECKSUM_COMPLETE)
+	if (NAPI_GRO_CB(skb)->csum_valid)
 		NAPI_GRO_CB(skb)->csum = csum_sub(NAPI_GRO_CB(skb)->csum,
 						  csum_partial(start, len, 0));
 }
 
+/* GRO checksum functions. These are logical equivalents of the normal
+ * checksum functions (in skbuff.h) except that they operate on the GRO
+ * offsets and fields in sk_buff.
+ */
+
+__sum16 __skb_gro_checksum_complete(struct sk_buff *skb);
+
+static inline bool __skb_gro_checksum_validate_needed(struct sk_buff *skb,
+						      bool zero_okay,
+						      __sum16 check)
+{
+	return (skb->ip_summed != CHECKSUM_PARTIAL &&
+		(skb->ip_summed != CHECKSUM_UNNECESSARY ||
+		 (NAPI_GRO_CB(skb)->encapsulation > skb->encapsulation)) &&
+		(!zero_okay || check));
+}
+
+static inline __sum16 __skb_gro_checksum_validate_complete(struct sk_buff *skb,
+							   __wsum psum)
+{
+	if (NAPI_GRO_CB(skb)->csum_valid &&
+	    !csum_fold(csum_add(psum, NAPI_GRO_CB(skb)->csum)))
+		return 0;
+
+	NAPI_GRO_CB(skb)->csum = psum;
+
+	return __skb_gro_checksum_complete(skb);
+}
+
+/* Update skb for CHECKSUM_UNNECESSARY when we verified a top level
+ * checksum or an encapsulated one during GRO. This saves work
+ * if we fallback to normal path with the packet.
+ */
+static inline void skb_gro_incr_csum_unnecessary(struct sk_buff *skb)
+{
+	if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
+		if (NAPI_GRO_CB(skb)->encapsulation)
+			skb->encapsulation = 1;
+	} else if (skb->ip_summed != CHECKSUM_PARTIAL) {
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+		skb->encapsulation = 0;
+	}
+}
+
+#define __skb_gro_checksum_validate(skb, proto, zero_okay, check,	\
+				    compute_pseudo)			\
+({									\
+	__sum16 __ret = 0;						\
+	if (__skb_gro_checksum_validate_needed(skb, zero_okay, check))	\
+		__ret = __skb_gro_checksum_validate_complete(skb,	\
+				compute_pseudo(skb, proto));		\
+	if (!__ret)							\
+		skb_gro_incr_csum_unnecessary(skb);			\
+	__ret;								\
+})
+
+#define skb_gro_checksum_validate(skb, proto, compute_pseudo)		\
+	__skb_gro_checksum_validate(skb, proto, false, 0, compute_pseudo)
+
+#define skb_gro_checksum_validate_zero_check(skb, proto, check,		\
+					     compute_pseudo)		\
+	__skb_gro_checksum_validate(skb, proto, true, check, compute_pseudo)
+
+#define skb_gro_checksum_simple_validate(skb)				\
+	__skb_gro_checksum_validate(skb, 0, false, 0, null_compute_pseudo)
+
 static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 				  unsigned short type,
 				  const void *daddr, const void *saddr,
diff --git a/net/core/dev.c b/net/core/dev.c
index 1421dad4cb29..b6a718ec11c1 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3962,7 +3962,13 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 		goto normal;
 
 	gro_list_prepare(napi, skb);
-	NAPI_GRO_CB(skb)->csum = skb->csum; /* Needed for CHECKSUM_COMPLETE */
+
+	if (skb->ip_summed == CHECKSUM_COMPLETE) {
+		NAPI_GRO_CB(skb)->csum = skb->csum;
+		NAPI_GRO_CB(skb)->csum_valid = 1;
+	} else {
+		NAPI_GRO_CB(skb)->csum_valid = 0;
+	}
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(ptype, head, list) {
@@ -3975,6 +3981,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 		NAPI_GRO_CB(skb)->flush = 0;
 		NAPI_GRO_CB(skb)->free = 0;
 		NAPI_GRO_CB(skb)->udp_mark = 0;
+		NAPI_GRO_CB(skb)->encapsulation = 0;
 
 		pp = ptype->callbacks.gro_receive(&napi->gro_list, skb);
 		break;
@@ -4205,6 +4212,31 @@ gro_result_t napi_gro_frags(struct napi_struct *napi)
 }
 EXPORT_SYMBOL(napi_gro_frags);
 
+/* Compute the checksum from gro_offset and return the folded value
+ * after adding in any pseudo checksum.
+ */
+__sum16 __skb_gro_checksum_complete(struct sk_buff *skb)
+{
+	__wsum wsum;
+	__sum16 sum;
+
+	wsum = skb_checksum(skb, skb_gro_offset(skb), skb_gro_len(skb), 0);
+
+	/* NAPI_GRO_CB(skb)->csum holds pseudo checksum */
+	sum = csum_fold(csum_add(NAPI_GRO_CB(skb)->csum, wsum));
+	if (likely(!sum)) {
+		if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
+		    !skb->csum_complete_sw)
+			netdev_rx_csum_fault(skb->dev);
+	}
+
+	NAPI_GRO_CB(skb)->csum = wsum;
+	NAPI_GRO_CB(skb)->csum_valid = 1;
+
+	return sum;
+}
+EXPORT_SYMBOL(__skb_gro_checksum_complete);
+
 /*
  * net_rps_action_and_irq_enable sends any pending IPI's for rps.
  * Note: called with local irq disabled, but exits with local irq enabled.
-- 
cgit v1.2.3


From a98406e22c12e514bac28fec0a49dc793edaf3a8 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Sat, 23 Aug 2014 17:03:28 +0200
Subject: random32: improvements to prandom_bytes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch addresses a couple of minor items, mostly addesssing
prandom_bytes(): 1) prandom_bytes{,_state}() should use size_t
for length arguments, 2) We can use put_unaligned() when filling
the array instead of open coding it [ perhaps some archs will
further benefit from their own arch specific implementation when
GCC cannot make up for it ], 3) Fix a typo, 4) Better use unsigned
int as type for getting the arch seed, 5) Make use of
prandom_u32_max() for timer slack.

Regarding the change to put_unaligned(), callers of prandom_bytes()
which internally invoke prandom_bytes_state(), don't bother as
they expect the array to be filled randomly and don't have any
control of the internal state what-so-ever (that's also why we
have periodic reseeding there, etc), so they really don't care.

Now for the direct callers of prandom_bytes_state(), which
are solely located in test cases for MTD devices, that is,
drivers/mtd/tests/{oobtest.c,pagetest.c,subpagetest.c}:

These tests basically fill a test write-vector through
prandom_bytes_state() with an a-priori defined seed each time
and write that to a MTD device. Later on, they set up a read-vector
and read back that blocks from the device. So in the verification
phase, the write-vector is being re-setup [ so same seed and
prandom_bytes_state() called ], and then memcmp()'ed against the
read-vector to check if the data is the same.

Akinobu, Lothar and I also tested this patch and it runs through
the 3 relevant MTD test cases w/o any errors on the nandsim device
(simulator for MTD devs) for x86_64, ppc64, ARM (i.MX28, i.MX53
and i.MX6):

  # modprobe nandsim first_id_byte=0x20 second_id_byte=0xac \
                     third_id_byte=0x00 fourth_id_byte=0x15
  # modprobe mtd_oobtest dev=0
  # modprobe mtd_pagetest dev=0
  # modprobe mtd_subpagetest dev=0

We also don't have any users depending directly on a particular
result of the PRNG (except the PRNG self-test itself), and that's
just fine as it e.g. allowed us easily to do things like upgrading
from taus88 to taus113.

Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Tested-by: Akinobu Mita <akinobu.mita@gmail.com>
Tested-by: Lothar Waßmann <LW@KARO-electronics.de>
Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/random.h |  4 ++--
 lib/random32.c         | 39 ++++++++++++++++++---------------------
 2 files changed, 20 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/random.h b/include/linux/random.h
index 57fbbffd77a0..b05856e16b75 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -26,7 +26,7 @@ unsigned int get_random_int(void);
 unsigned long randomize_range(unsigned long start, unsigned long end, unsigned long len);
 
 u32 prandom_u32(void);
-void prandom_bytes(void *buf, int nbytes);
+void prandom_bytes(void *buf, size_t nbytes);
 void prandom_seed(u32 seed);
 void prandom_reseed_late(void);
 
@@ -35,7 +35,7 @@ struct rnd_state {
 };
 
 u32 prandom_u32_state(struct rnd_state *state);
-void prandom_bytes_state(struct rnd_state *state, void *buf, int nbytes);
+void prandom_bytes_state(struct rnd_state *state, void *buf, size_t nbytes);
 
 /**
  * prandom_u32_max - returns a pseudo-random number in interval [0, ep_ro)
diff --git a/lib/random32.c b/lib/random32.c
index c9b6bf3afe0c..0bee183fa18f 100644
--- a/lib/random32.c
+++ b/lib/random32.c
@@ -37,6 +37,7 @@
 #include <linux/jiffies.h>
 #include <linux/random.h>
 #include <linux/sched.h>
+#include <asm/unaligned.h>
 
 #ifdef CONFIG_RANDOM32_SELFTEST
 static void __init prandom_state_selftest(void);
@@ -96,27 +97,23 @@ EXPORT_SYMBOL(prandom_u32);
  *	This is used for pseudo-randomness with no outside seeding.
  *	For more random results, use prandom_bytes().
  */
-void prandom_bytes_state(struct rnd_state *state, void *buf, int bytes)
+void prandom_bytes_state(struct rnd_state *state, void *buf, size_t bytes)
 {
-	unsigned char *p = buf;
-	int i;
-
-	for (i = 0; i < round_down(bytes, sizeof(u32)); i += sizeof(u32)) {
-		u32 random = prandom_u32_state(state);
-		int j;
+	u8 *ptr = buf;
 
-		for (j = 0; j < sizeof(u32); j++) {
-			p[i + j] = random;
-			random >>= BITS_PER_BYTE;
-		}
+	while (bytes >= sizeof(u32)) {
+		put_unaligned(prandom_u32_state(state), (u32 *) ptr);
+		ptr += sizeof(u32);
+		bytes -= sizeof(u32);
 	}
-	if (i < bytes) {
-		u32 random = prandom_u32_state(state);
 
-		for (; i < bytes; i++) {
-			p[i] = random;
-			random >>= BITS_PER_BYTE;
-		}
+	if (bytes > 0) {
+		u32 rem = prandom_u32_state(state);
+		do {
+			*ptr++ = (u8) rem;
+			bytes--;
+			rem >>= BITS_PER_BYTE;
+		} while (bytes > 0);
 	}
 }
 EXPORT_SYMBOL(prandom_bytes_state);
@@ -126,7 +123,7 @@ EXPORT_SYMBOL(prandom_bytes_state);
  *	@buf: where to copy the pseudo-random bytes to
  *	@bytes: the requested number of bytes
  */
-void prandom_bytes(void *buf, int bytes)
+void prandom_bytes(void *buf, size_t bytes)
 {
 	struct rnd_state *state = &get_cpu_var(net_rand_state);
 
@@ -137,7 +134,7 @@ EXPORT_SYMBOL(prandom_bytes);
 
 static void prandom_warmup(struct rnd_state *state)
 {
-	/* Calling RNG ten times to satify recurrence condition */
+	/* Calling RNG ten times to satisfy recurrence condition */
 	prandom_u32_state(state);
 	prandom_u32_state(state);
 	prandom_u32_state(state);
@@ -152,7 +149,7 @@ static void prandom_warmup(struct rnd_state *state)
 
 static u32 __extract_hwseed(void)
 {
-	u32 val = 0;
+	unsigned int val = 0;
 
 	(void)(arch_get_random_seed_int(&val) ||
 	       arch_get_random_int(&val));
@@ -228,7 +225,7 @@ static void __prandom_timer(unsigned long dontcare)
 	prandom_seed(entropy);
 
 	/* reseed every ~60 seconds, in [40 .. 80) interval with slack */
-	expires = 40 + (prandom_u32() % 40);
+	expires = 40 + prandom_u32_max(40);
 	seed_timer.expires = jiffies + msecs_to_jiffies(expires * MSEC_PER_SEC);
 
 	add_timer(&seed_timer);
-- 
cgit v1.2.3


From 4798248e4e023170e937a65a1d30fcc52496dd42 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 22 Aug 2014 16:21:53 -0700
Subject: net: Add ops->ndo_xmit_flush()

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/dlci.c              |  2 +-
 drivers/usb/gadget/function/f_ncm.c |  2 +-
 include/linux/netdevice.h           | 35 +++++++++++++++++++++++++++++++++++
 net/atm/mpc.c                       |  2 +-
 net/core/dev.c                      |  5 ++---
 net/core/netpoll.c                  |  3 +--
 net/core/pktgen.c                   |  4 +---
 net/packet/af_packet.c              |  3 +--
 net/sched/sch_teql.c                |  3 +--
 9 files changed, 44 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/wan/dlci.c b/drivers/net/wan/dlci.c
index 43c9960dce1c..81b22a180aad 100644
--- a/drivers/net/wan/dlci.c
+++ b/drivers/net/wan/dlci.c
@@ -193,7 +193,7 @@ static netdev_tx_t dlci_transmit(struct sk_buff *skb, struct net_device *dev)
 	struct dlci_local *dlp = netdev_priv(dev);
 
 	if (skb)
-		dlp->slave->netdev_ops->ndo_start_xmit(skb, dlp->slave);
+		netdev_start_xmit(skb, dlp->slave);
 	return NETDEV_TX_OK;
 }
 
diff --git a/drivers/usb/gadget/function/f_ncm.c b/drivers/usb/gadget/function/f_ncm.c
index bcdc882cd415..cb5d646db6a7 100644
--- a/drivers/usb/gadget/function/f_ncm.c
+++ b/drivers/usb/gadget/function/f_ncm.c
@@ -1101,7 +1101,7 @@ static void ncm_tx_tasklet(unsigned long data)
 	/* Only send if data is available. */
 	if (ncm->skb_tx_data) {
 		ncm->timer_force_tx = true;
-		ncm->netdev->netdev_ops->ndo_start_xmit(NULL, ncm->netdev);
+		netdev_start_xmit(NULL, ncm->netdev);
 		ncm->timer_force_tx = false;
 	}
 }
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index eb73444e1bd0..220c50984688 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -782,6 +782,19 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
  *        (can also return NETDEV_TX_LOCKED iff NETIF_F_LLTX)
  *	Required can not be NULL.
  *
+ * void (*ndo_xmit_flush)(struct net_device *dev, u16 queue);
+ *	A driver implements this function when it wishes to support
+ *	deferred TX queue flushing.  The idea is that the expensive
+ *	operation to trigger TX queue processing can be done after
+ *	N calls to ndo_start_xmit rather than being done every single
+ *	time.  In this regime ndo_start_xmit will be called one or more
+ *	times, and then a final ndo_xmit_flush call will be made to
+ *	have the driver tell the device about the new pending TX queue
+ *	entries.  The kernel keeps track of which queues need flushing
+ *	by monitoring skb->queue_mapping of the packets it submits to
+ *	ndo_start_xmit.  This is the queue value that will be passed
+ *	to ndo_xmit_flush.
+ *
  * u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb,
  *                         void *accel_priv, select_queue_fallback_t fallback);
  *	Called to decide which queue to when device supports multiple
@@ -1005,6 +1018,7 @@ struct net_device_ops {
 	int			(*ndo_stop)(struct net_device *dev);
 	netdev_tx_t		(*ndo_start_xmit) (struct sk_buff *skb,
 						   struct net_device *dev);
+	void			(*ndo_xmit_flush)(struct net_device *dev, u16 queue);
 	u16			(*ndo_select_queue)(struct net_device *dev,
 						    struct sk_buff *skb,
 						    void *accel_priv,
@@ -3430,6 +3444,27 @@ int __init dev_proc_init(void);
 #define dev_proc_init() 0
 #endif
 
+static inline netdev_tx_t __netdev_start_xmit(const struct net_device_ops *ops,
+					      struct sk_buff *skb, struct net_device *dev)
+{
+	netdev_tx_t ret;
+	u16 q;
+
+	q = skb->queue_mapping;
+	ret = ops->ndo_start_xmit(skb, dev);
+	if (dev_xmit_complete(ret) && ops->ndo_xmit_flush)
+		ops->ndo_xmit_flush(dev, q);
+
+	return ret;
+}
+
+static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	const struct net_device_ops *ops = dev->netdev_ops;
+
+	return __netdev_start_xmit(ops, skb, dev);
+}
+
 int netdev_class_create_file_ns(struct class_attribute *class_attr,
 				const void *ns);
 void netdev_class_remove_file_ns(struct class_attribute *class_attr,
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index e8e0e7a8a23d..d662da161e5a 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -599,7 +599,7 @@ static netdev_tx_t mpc_send_packet(struct sk_buff *skb,
 	}
 
 non_ip:
-	return mpc->old_ops->ndo_start_xmit(skb, dev);
+	return __netdev_start_xmit(mpc->old_ops, skb, dev);
 }
 
 static int atm_mpoa_vcc_attach(struct atm_vcc *vcc, void __user *arg)
diff --git a/net/core/dev.c b/net/core/dev.c
index b6a718ec11c1..26d296c2447c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2602,7 +2602,6 @@ EXPORT_SYMBOL(netif_skb_features);
 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 			struct netdev_queue *txq)
 {
-	const struct net_device_ops *ops = dev->netdev_ops;
 	int rc = NETDEV_TX_OK;
 	unsigned int skb_len;
 
@@ -2667,7 +2666,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 
 		skb_len = skb->len;
 		trace_net_dev_start_xmit(skb, dev);
-		rc = ops->ndo_start_xmit(skb, dev);
+		rc = netdev_start_xmit(skb, dev);
 		trace_net_dev_xmit(skb, rc, dev, skb_len);
 		if (rc == NETDEV_TX_OK)
 			txq_trans_update(txq);
@@ -2686,7 +2685,7 @@ gso:
 
 		skb_len = nskb->len;
 		trace_net_dev_start_xmit(nskb, dev);
-		rc = ops->ndo_start_xmit(nskb, dev);
+		rc = netdev_start_xmit(nskb, dev);
 		trace_net_dev_xmit(nskb, rc, dev, skb_len);
 		if (unlikely(rc != NETDEV_TX_OK)) {
 			if (rc & ~NETDEV_TX_MASK)
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 907fb5e36c02..a5ad06828d67 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -72,7 +72,6 @@ module_param(carrier_timeout, uint, 0644);
 static int netpoll_start_xmit(struct sk_buff *skb, struct net_device *dev,
 			      struct netdev_queue *txq)
 {
-	const struct net_device_ops *ops = dev->netdev_ops;
 	int status = NETDEV_TX_OK;
 	netdev_features_t features;
 
@@ -92,7 +91,7 @@ static int netpoll_start_xmit(struct sk_buff *skb, struct net_device *dev,
 		skb->vlan_tci = 0;
 	}
 
-	status = ops->ndo_start_xmit(skb, dev);
+	status = netdev_start_xmit(skb, dev);
 	if (status == NETDEV_TX_OK)
 		txq_trans_update(txq);
 
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 8b849ddfef2e..83e2b4b19eb7 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3285,8 +3285,6 @@ static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev)
 static void pktgen_xmit(struct pktgen_dev *pkt_dev)
 {
 	struct net_device *odev = pkt_dev->odev;
-	netdev_tx_t (*xmit)(struct sk_buff *, struct net_device *)
-		= odev->netdev_ops->ndo_start_xmit;
 	struct netdev_queue *txq;
 	u16 queue_map;
 	int ret;
@@ -3339,7 +3337,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
 		goto unlock;
 	}
 	atomic_inc(&(pkt_dev->skb->users));
-	ret = (*xmit)(pkt_dev->skb, odev);
+	ret = netdev_start_xmit(pkt_dev->skb, odev);
 
 	switch (ret) {
 	case NETDEV_TX_OK:
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 93896d2092f6..0dfa990d4eaa 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -240,7 +240,6 @@ static void __fanout_link(struct sock *sk, struct packet_sock *po);
 static int packet_direct_xmit(struct sk_buff *skb)
 {
 	struct net_device *dev = skb->dev;
-	const struct net_device_ops *ops = dev->netdev_ops;
 	netdev_features_t features;
 	struct netdev_queue *txq;
 	int ret = NETDEV_TX_BUSY;
@@ -262,7 +261,7 @@ static int packet_direct_xmit(struct sk_buff *skb)
 
 	HARD_TX_LOCK(dev, txq, smp_processor_id());
 	if (!netif_xmit_frozen_or_drv_stopped(txq)) {
-		ret = ops->ndo_start_xmit(skb, dev);
+		ret = netdev_start_xmit(skb, dev);
 		if (ret == NETDEV_TX_OK)
 			txq_trans_update(txq);
 	}
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index bd33793b527e..64cd93ca8104 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -301,7 +301,6 @@ restart:
 	do {
 		struct net_device *slave = qdisc_dev(q);
 		struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0);
-		const struct net_device_ops *slave_ops = slave->netdev_ops;
 
 		if (slave_txq->qdisc_sleeping != q)
 			continue;
@@ -317,7 +316,7 @@ restart:
 				unsigned int length = qdisc_pkt_len(skb);
 
 				if (!netif_xmit_frozen_or_stopped(slave_txq) &&
-				    slave_ops->ndo_start_xmit(skb, slave) == NETDEV_TX_OK) {
+				    netdev_start_xmit(skb, slave) == NETDEV_TX_OK) {
 					txq_trans_update(slave_txq);
 					__netif_tx_unlock(slave_txq);
 					master->slaves = NEXT_SLAVE(q);
-- 
cgit v1.2.3


From d1c85c2ebe7ffe1f1b27846bd1ba0944c513d822 Mon Sep 17 00:00:00 2001
From: Zhouyi Zhou <zhouzhouyi@gmail.com>
Date: Fri, 22 Aug 2014 10:40:15 +0800
Subject: netfilter: HAVE_JUMP_LABEL instead of CONFIG_JUMP_LABEL

Use HAVE_JUMP_LABEL as elsewhere in the kernel to ensure
that the toolchain has the required support in addition to
CONFIG_JUMP_LABEL being set.

Signed-off-by: Zhouyi Zhou <yizhouzhou@ict.ac.cn>
Reviewed-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter.h | 5 +++--
 net/netfilter/core.c      | 6 +++---
 2 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 2077489f9887..2517ece98820 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -9,6 +9,7 @@
 #include <linux/in6.h>
 #include <linux/wait.h>
 #include <linux/list.h>
+#include <linux/static_key.h>
 #include <uapi/linux/netfilter.h>
 #ifdef CONFIG_NETFILTER
 static inline int NF_DROP_GETERR(int verdict)
@@ -99,9 +100,9 @@ void nf_unregister_sockopt(struct nf_sockopt_ops *reg);
 
 extern struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
 
-#if defined(CONFIG_JUMP_LABEL)
-#include <linux/static_key.h>
+#ifdef HAVE_JUMP_LABEL
 extern struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
+
 static inline bool nf_hooks_active(u_int8_t pf, unsigned int hook)
 {
 	if (__builtin_constant_p(pf) &&
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index a93c97f106d4..024a2e25c8a4 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -54,7 +54,7 @@ EXPORT_SYMBOL_GPL(nf_unregister_afinfo);
 struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS] __read_mostly;
 EXPORT_SYMBOL(nf_hooks);
 
-#if defined(CONFIG_JUMP_LABEL)
+#ifdef HAVE_JUMP_LABEL
 struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
 EXPORT_SYMBOL(nf_hooks_needed);
 #endif
@@ -72,7 +72,7 @@ int nf_register_hook(struct nf_hook_ops *reg)
 	}
 	list_add_rcu(&reg->list, elem->list.prev);
 	mutex_unlock(&nf_hook_mutex);
-#if defined(CONFIG_JUMP_LABEL)
+#ifdef HAVE_JUMP_LABEL
 	static_key_slow_inc(&nf_hooks_needed[reg->pf][reg->hooknum]);
 #endif
 	return 0;
@@ -84,7 +84,7 @@ void nf_unregister_hook(struct nf_hook_ops *reg)
 	mutex_lock(&nf_hook_mutex);
 	list_del_rcu(&reg->list);
 	mutex_unlock(&nf_hook_mutex);
-#if defined(CONFIG_JUMP_LABEL)
+#ifdef HAVE_JUMP_LABEL
 	static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]);
 #endif
 	synchronize_net();
-- 
cgit v1.2.3


From 2ee507c472939db4b146d545352b8a7c79ef47f8 Mon Sep 17 00:00:00 2001
From: Tim Chen <tim.c.chen@linux.intel.com>
Date: Thu, 31 Jul 2014 10:29:48 -0700
Subject: sched: Add function single_task_running to let a task check if it is
 the only task running on a cpu

This function will help an async task processing batched jobs from
workqueue decide if it wants to keep processing on more chunks of batched
work that can be delayed, or to accumulate more work for more efficient
batched processing later.

If no other tasks are running on the cpu, the batching process can take
advantgae of the available cpu cycles to a make decision to continue
processing the existing accumulated work to minimize delay,
otherwise it will yield.

Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/sched.h |  1 +
 kernel/sched/core.c   | 12 ++++++++++++
 2 files changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5c2c885ee52b..e6d2c056d8e0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -167,6 +167,7 @@ extern int nr_threads;
 DECLARE_PER_CPU(unsigned long, process_counts);
 extern int nr_processes(void);
 extern unsigned long nr_running(void);
+extern bool single_task_running(void);
 extern unsigned long nr_iowait(void);
 extern unsigned long nr_iowait_cpu(int cpu);
 extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index ec1a286684a5..59965ec0b7de 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2366,6 +2366,18 @@ unsigned long nr_running(void)
 	return sum;
 }
 
+/*
+ * Check if only the current task is running on the cpu.
+ */
+bool single_task_running(void)
+{
+	if (cpu_rq(smp_processor_id())->nr_running == 1)
+		return true;
+	else
+		return false;
+}
+EXPORT_SYMBOL(single_task_running);
+
 unsigned long long nr_context_switches(void)
 {
 	int i;
-- 
cgit v1.2.3


From f153566570fb9e32c2f59182883f4f66048788fb Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Fri, 22 Aug 2014 21:48:00 +0100
Subject: iio:trigger: modify return value for iio_trigger_get

Instead of a void function, return the trigger pointer.

Whilst not in of itself a fix, this makes the following set of
7 fixes cleaner than they would otherwise be.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
Cc: Stable@vger.kernel.org
---
 include/linux/iio/trigger.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/iio/trigger.h b/include/linux/iio/trigger.h
index 4b79ffe7b188..fa76c79a52a1 100644
--- a/include/linux/iio/trigger.h
+++ b/include/linux/iio/trigger.h
@@ -84,10 +84,12 @@ static inline void iio_trigger_put(struct iio_trigger *trig)
 	put_device(&trig->dev);
 }
 
-static inline void iio_trigger_get(struct iio_trigger *trig)
+static inline struct iio_trigger *iio_trigger_get(struct iio_trigger *trig)
 {
 	get_device(&trig->dev);
 	__module_get(trig->ops->owner);
+
+	return trig;
 }
 
 /**
-- 
cgit v1.2.3


From 48ea526a6877d605c961aa37fae33f3227b29424 Mon Sep 17 00:00:00 2001
From: Amir Vadai <amirv@mellanox.com>
Date: Mon, 25 Aug 2014 16:06:53 +0300
Subject: net/mlx4: Use is_kdump_kernel() to detect kdump kernel

Use is_kdump_kernel() to detect kdump kernel, instead of reset_devices.

Signed-off-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx4/device.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 071f6b234604..783dd099abd1 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -38,6 +38,7 @@
 #include <linux/completion.h>
 #include <linux/radix-tree.h>
 #include <linux/cpu_rmap.h>
+#include <linux/crash_dump.h>
 
 #include <linux/atomic.h>
 
@@ -1275,7 +1276,7 @@ int mlx4_mr_rereg_mem_write(struct mlx4_dev *dev, struct mlx4_mr *mr,
 /* Returns true if running in low memory profile (kdump kernel) */
 static inline bool mlx4_low_memory_profile(void)
 {
-	return reset_devices;
+	return is_kdump_kernel();
 }
 
 #endif /* MLX4_DEVICE_H */
-- 
cgit v1.2.3


From 7d5929c1f34304ca5a970cfde8044053e56aa8c9 Mon Sep 17 00:00:00 2001
From: Roger Quadros <rogerq@ti.com>
Date: Mon, 25 Aug 2014 16:15:32 -0700
Subject: mtd: nand: omap: Revert to using software ECC by default

For v3.12 and prior, 1-bit Hamming code ECC via software was the
default choice. Commit c66d039197e4 in v3.13 changed the behaviour
to use 1-bit Hamming code via Hardware using a different ECC layout
i.e. (ROM code layout) than what is used by software ECC.

This ECC layout change causes NAND filesystems created in v3.12
and prior to be unusable in v3.13 and later. So revert back to
using software ECC by default if an ECC scheme is not explicitely
specified.

This defect can be observed on the following boards during legacy boot

-omap3beagle
-omap3touchbook
-overo
-am3517crane
-devkit8000
-ldp
-3430sdp

Signed-off-by: Roger Quadros <rogerq@ti.com>
Tested-by: Grazvydas Ignotas <notasas@gmail.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/board-flash.c            |  2 +-
 arch/arm/mach-omap2/gpmc-nand.c              |  3 ++-
 drivers/mtd/nand/omap2.c                     | 14 +++++++++++---
 include/linux/platform_data/mtd-nand-omap2.h | 13 +++++++++++--
 4 files changed, 25 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/board-flash.c b/arch/arm/mach-omap2/board-flash.c
index e87f2a83d6bf..2d245c2e641c 100644
--- a/arch/arm/mach-omap2/board-flash.c
+++ b/arch/arm/mach-omap2/board-flash.c
@@ -142,7 +142,7 @@ __init board_nand_init(struct mtd_partition *nand_parts, u8 nr_parts, u8 cs,
 	board_nand_data.nr_parts	= nr_parts;
 	board_nand_data.devsize		= nand_type;
 
-	board_nand_data.ecc_opt = OMAP_ECC_HAM1_CODE_HW;
+	board_nand_data.ecc_opt = OMAP_ECC_HAM1_CODE_SW;
 	gpmc_nand_init(&board_nand_data, gpmc_t);
 }
 #endif /* CONFIG_MTD_NAND_OMAP2 || CONFIG_MTD_NAND_OMAP2_MODULE */
diff --git a/arch/arm/mach-omap2/gpmc-nand.c b/arch/arm/mach-omap2/gpmc-nand.c
index 8897ad7035fd..cb7764314f17 100644
--- a/arch/arm/mach-omap2/gpmc-nand.c
+++ b/arch/arm/mach-omap2/gpmc-nand.c
@@ -49,7 +49,8 @@ static bool gpmc_hwecc_bch_capable(enum omap_ecc ecc_opt)
 		return 0;
 
 	/* legacy platforms support only HAM1 (1-bit Hamming) ECC scheme */
-	if (ecc_opt == OMAP_ECC_HAM1_CODE_HW)
+	if (ecc_opt == OMAP_ECC_HAM1_CODE_HW ||
+	    ecc_opt == OMAP_ECC_HAM1_CODE_SW)
 		return 1;
 	else
 		return 0;
diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c
index f0ed92e210a1..4dd617897eee 100644
--- a/drivers/mtd/nand/omap2.c
+++ b/drivers/mtd/nand/omap2.c
@@ -1794,9 +1794,12 @@ static int omap_nand_probe(struct platform_device *pdev)
 	}
 
 	/* populate MTD interface based on ECC scheme */
-	nand_chip->ecc.layout	= &omap_oobinfo;
 	ecclayout		= &omap_oobinfo;
 	switch (info->ecc_opt) {
+	case OMAP_ECC_HAM1_CODE_SW:
+		nand_chip->ecc.mode = NAND_ECC_SOFT;
+		break;
+
 	case OMAP_ECC_HAM1_CODE_HW:
 		pr_info("nand: using OMAP_ECC_HAM1_CODE_HW\n");
 		nand_chip->ecc.mode             = NAND_ECC_HW;
@@ -1848,7 +1851,7 @@ static int omap_nand_probe(struct platform_device *pdev)
 		nand_chip->ecc.priv		= nand_bch_init(mtd,
 							nand_chip->ecc.size,
 							nand_chip->ecc.bytes,
-							&nand_chip->ecc.layout);
+							&ecclayout);
 		if (!nand_chip->ecc.priv) {
 			pr_err("nand: error: unable to use s/w BCH library\n");
 			err = -EINVAL;
@@ -1923,7 +1926,7 @@ static int omap_nand_probe(struct platform_device *pdev)
 		nand_chip->ecc.priv		= nand_bch_init(mtd,
 							nand_chip->ecc.size,
 							nand_chip->ecc.bytes,
-							&nand_chip->ecc.layout);
+							&ecclayout);
 		if (!nand_chip->ecc.priv) {
 			pr_err("nand: error: unable to use s/w BCH library\n");
 			err = -EINVAL;
@@ -2012,6 +2015,9 @@ static int omap_nand_probe(struct platform_device *pdev)
 		goto return_error;
 	}
 
+	if (info->ecc_opt == OMAP_ECC_HAM1_CODE_SW)
+		goto scan_tail;
+
 	/* all OOB bytes from oobfree->offset till end off OOB are free */
 	ecclayout->oobfree->length = mtd->oobsize - ecclayout->oobfree->offset;
 	/* check if NAND device's OOB is enough to store ECC signatures */
@@ -2021,7 +2027,9 @@ static int omap_nand_probe(struct platform_device *pdev)
 		err = -EINVAL;
 		goto return_error;
 	}
+	nand_chip->ecc.layout = ecclayout;
 
+scan_tail:
 	/* second phase scan */
 	if (nand_scan_tail(mtd)) {
 		err = -ENXIO;
diff --git a/include/linux/platform_data/mtd-nand-omap2.h b/include/linux/platform_data/mtd-nand-omap2.h
index 660c029d694f..16ec262dfcc8 100644
--- a/include/linux/platform_data/mtd-nand-omap2.h
+++ b/include/linux/platform_data/mtd-nand-omap2.h
@@ -21,8 +21,17 @@ enum nand_io {
 };
 
 enum omap_ecc {
-	/* 1-bit  ECC calculation by GPMC, Error detection by Software */
-	OMAP_ECC_HAM1_CODE_HW = 0,
+	/*
+	 * 1-bit ECC: calculation and correction by SW
+	 * ECC stored at end of spare area
+	 */
+	OMAP_ECC_HAM1_CODE_SW = 0,
+
+	/*
+	 * 1-bit ECC: calculation by GPMC, Error detection by Software
+	 * ECC layout compatible with ROM code layout
+	 */
+	OMAP_ECC_HAM1_CODE_HW,
 	/* 4-bit  ECC calculation by GPMC, Error detection by Software */
 	OMAP_ECC_BCH4_CODE_HW_DETECTION_SW,
 	/* 4-bit  ECC calculation by GPMC, Error detection by ELM */
-- 
cgit v1.2.3


From 0b725a2ca61bedc33a2a63d0451d528b268cf975 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 25 Aug 2014 15:51:53 -0700
Subject: net: Remove ndo_xmit_flush netdev operation, use signalling instead.

As reported by Jesper Dangaard Brouer, for high packet rates the
overhead of having another indirect call in the TX path is
non-trivial.

There is the indirect call itself, and then there is all of the
reloading of the state to refetch the tail pointer value and
then write the device register.

Move to a more passive scheme, which requires very light modifications
to the device drivers.

The signal is a new skb->xmit_more value, if it is non-zero it means
that more SKBs are pending to be transmitted on the same queue as the
current SKB.  And therefore, the driver may elide the tail pointer
update.

Right now skb->xmit_more is always zero.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/intel/igb/igb_main.c | 36 +++++++++++--------------------
 drivers/net/virtio_net.c                  | 12 +++--------
 include/linux/netdevice.h                 | 25 ++-------------------
 include/linux/skbuff.h                    |  2 ++
 4 files changed, 19 insertions(+), 56 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index b9c020a05fb8..89c29b40d61c 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -136,7 +136,6 @@ static void igb_update_phy_info(unsigned long);
 static void igb_watchdog(unsigned long);
 static void igb_watchdog_task(struct work_struct *);
 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
-static void igb_xmit_flush(struct net_device *netdev, u16 queue);
 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
 					  struct rtnl_link_stats64 *stats);
 static int igb_change_mtu(struct net_device *, int);
@@ -2076,7 +2075,6 @@ static const struct net_device_ops igb_netdev_ops = {
 	.ndo_open		= igb_open,
 	.ndo_stop		= igb_close,
 	.ndo_start_xmit		= igb_xmit_frame,
-	.ndo_xmit_flush		= igb_xmit_flush,
 	.ndo_get_stats64	= igb_get_stats64,
 	.ndo_set_rx_mode	= igb_set_rx_mode,
 	.ndo_set_mac_address	= igb_set_mac,
@@ -4917,6 +4915,14 @@ static void igb_tx_map(struct igb_ring *tx_ring,
 
 	tx_ring->next_to_use = i;
 
+	if (!skb->xmit_more) {
+		writel(i, tx_ring->tail);
+
+		/* we need this if more than one processor can write to our tail
+		 * at a time, it synchronizes IO on IA64/Altix systems
+		 */
+		mmiowb();
+	}
 	return;
 
 dma_error:
@@ -5052,20 +5058,17 @@ out_drop:
 	return NETDEV_TX_OK;
 }
 
-static struct igb_ring *__igb_tx_queue_mapping(struct igb_adapter *adapter, unsigned int r_idx)
+static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
+						    struct sk_buff *skb)
 {
+	unsigned int r_idx = skb->queue_mapping;
+
 	if (r_idx >= adapter->num_tx_queues)
 		r_idx = r_idx % adapter->num_tx_queues;
 
 	return adapter->tx_ring[r_idx];
 }
 
-static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
-						    struct sk_buff *skb)
-{
-	return __igb_tx_queue_mapping(adapter, skb->queue_mapping);
-}
-
 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
 				  struct net_device *netdev)
 {
@@ -5094,21 +5097,6 @@ static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
 	return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
 }
 
-static void igb_xmit_flush(struct net_device *netdev, u16 queue)
-{
-	struct igb_adapter *adapter = netdev_priv(netdev);
-	struct igb_ring *tx_ring;
-
-	tx_ring = __igb_tx_queue_mapping(adapter, queue);
-
-	writel(tx_ring->next_to_use, tx_ring->tail);
-
-	/* we need this if more than one processor can write to our tail
-	 * at a time, it synchronizes IO on IA64/Altix systems
-	 */
-	mmiowb();
-}
-
 /**
  *  igb_tx_timeout - Respond to a Tx Hang
  *  @netdev: network interface device structure
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 62421086d3e6..f0c2824f5e0f 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -953,15 +953,10 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
 		}
 	}
 
-	return NETDEV_TX_OK;
-}
+	if (!skb->xmit_more)
+		virtqueue_kick(sq->vq);
 
-static void xmit_flush(struct net_device *dev, u16 qnum)
-{
-	struct virtnet_info *vi = netdev_priv(dev);
-	struct send_queue *sq = &vi->sq[qnum];
-
-	virtqueue_kick(sq->vq);
+	return NETDEV_TX_OK;
 }
 
 /*
@@ -1393,7 +1388,6 @@ static const struct net_device_ops virtnet_netdev = {
 	.ndo_open            = virtnet_open,
 	.ndo_stop   	     = virtnet_close,
 	.ndo_start_xmit      = start_xmit,
-	.ndo_xmit_flush      = xmit_flush,
 	.ndo_validate_addr   = eth_validate_addr,
 	.ndo_set_mac_address = virtnet_set_mac_address,
 	.ndo_set_rx_mode     = virtnet_set_rx_mode,
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 220c50984688..039b23786c22 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -782,19 +782,6 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
  *        (can also return NETDEV_TX_LOCKED iff NETIF_F_LLTX)
  *	Required can not be NULL.
  *
- * void (*ndo_xmit_flush)(struct net_device *dev, u16 queue);
- *	A driver implements this function when it wishes to support
- *	deferred TX queue flushing.  The idea is that the expensive
- *	operation to trigger TX queue processing can be done after
- *	N calls to ndo_start_xmit rather than being done every single
- *	time.  In this regime ndo_start_xmit will be called one or more
- *	times, and then a final ndo_xmit_flush call will be made to
- *	have the driver tell the device about the new pending TX queue
- *	entries.  The kernel keeps track of which queues need flushing
- *	by monitoring skb->queue_mapping of the packets it submits to
- *	ndo_start_xmit.  This is the queue value that will be passed
- *	to ndo_xmit_flush.
- *
  * u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb,
  *                         void *accel_priv, select_queue_fallback_t fallback);
  *	Called to decide which queue to when device supports multiple
@@ -1018,7 +1005,6 @@ struct net_device_ops {
 	int			(*ndo_stop)(struct net_device *dev);
 	netdev_tx_t		(*ndo_start_xmit) (struct sk_buff *skb,
 						   struct net_device *dev);
-	void			(*ndo_xmit_flush)(struct net_device *dev, u16 queue);
 	u16			(*ndo_select_queue)(struct net_device *dev,
 						    struct sk_buff *skb,
 						    void *accel_priv,
@@ -3447,15 +3433,8 @@ int __init dev_proc_init(void);
 static inline netdev_tx_t __netdev_start_xmit(const struct net_device_ops *ops,
 					      struct sk_buff *skb, struct net_device *dev)
 {
-	netdev_tx_t ret;
-	u16 q;
-
-	q = skb->queue_mapping;
-	ret = ops->ndo_start_xmit(skb, dev);
-	if (dev_xmit_complete(ret) && ops->ndo_xmit_flush)
-		ops->ndo_xmit_flush(dev, q);
-
-	return ret;
+	skb->xmit_more = 0;
+	return ops->ndo_start_xmit(skb, dev);
 }
 
 static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_device *dev)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 18ddf9684a27..9b3802a197a8 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -452,6 +452,7 @@ static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1,
  *	@tc_verd: traffic control verdict
  *	@hash: the packet hash
  *	@queue_mapping: Queue mapping for multiqueue devices
+ *	@xmit_more: More SKBs are pending for this queue
  *	@ndisc_nodetype: router type (from link layer)
  *	@ooo_okay: allow the mapping of a socket to a queue to be changed
  *	@l4_hash: indicate hash is a canonical 4-tuple hash over transport
@@ -558,6 +559,7 @@ struct sk_buff {
 
 	__u16			queue_mapping;
 	kmemcheck_bitfield_begin(flags2);
+	__u8			xmit_more:1;
 #ifdef CONFIG_IPV6_NDISC_NODETYPE
 	__u8			ndisc_nodetype:2;
 #endif
-- 
cgit v1.2.3


From b4bbb107d73bbc0d92c9ae7fd8e69580aa9381e7 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Fri, 27 Jun 2014 11:56:58 +0200
Subject: dma-mapping: Provide write-combine allocations

Provide an implementation for dma_{alloc,free,mmap}_writecombine() when
the architecture supports DMA attributes.

Signed-off-by: Thierry Reding <treding@nvidia.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
---
 arch/arm/include/asm/dma-mapping.h       | 16 ----------------
 include/asm-generic/dma-mapping-common.h |  8 --------
 include/linux/dma-mapping.h              | 26 ++++++++++++++++++++++++++
 3 files changed, 26 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h
index c45b61a4b4a5..85738b200023 100644
--- a/arch/arm/include/asm/dma-mapping.h
+++ b/arch/arm/include/asm/dma-mapping.h
@@ -265,22 +265,6 @@ extern int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma,
 			void *cpu_addr, dma_addr_t dma_addr, size_t size,
 			struct dma_attrs *attrs);
 
-static inline void *dma_alloc_writecombine(struct device *dev, size_t size,
-				       dma_addr_t *dma_handle, gfp_t flag)
-{
-	DEFINE_DMA_ATTRS(attrs);
-	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
-	return dma_alloc_attrs(dev, size, dma_handle, flag, &attrs);
-}
-
-static inline void dma_free_writecombine(struct device *dev, size_t size,
-				     void *cpu_addr, dma_addr_t dma_handle)
-{
-	DEFINE_DMA_ATTRS(attrs);
-	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
-	return dma_free_attrs(dev, size, cpu_addr, dma_handle, &attrs);
-}
-
 /*
  * This can be called during early boot to increase the size of the atomic
  * coherent DMA pool above the default value of 256KiB. It must be called
diff --git a/include/asm-generic/dma-mapping-common.h b/include/asm-generic/dma-mapping-common.h
index de8bf89940f8..d137431bf26f 100644
--- a/include/asm-generic/dma-mapping-common.h
+++ b/include/asm-generic/dma-mapping-common.h
@@ -205,14 +205,6 @@ dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma, void *cpu_addr,
 
 #define dma_mmap_coherent(d, v, c, h, s) dma_mmap_attrs(d, v, c, h, s, NULL)
 
-static inline int dma_mmap_writecombine(struct device *dev, struct vm_area_struct *vma,
-		      void *cpu_addr, dma_addr_t dma_addr, size_t size)
-{
-	DEFINE_DMA_ATTRS(attrs);
-	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
-	return dma_mmap_attrs(dev, vma, cpu_addr, dma_addr, size, &attrs);
-}
-
 int
 dma_common_get_sgtable(struct device *dev, struct sg_table *sgt,
 		       void *cpu_addr, dma_addr_t dma_addr, size_t size);
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 931b70986272..d5d388160f42 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -263,6 +263,32 @@ struct dma_attrs;
 #define dma_unmap_sg_attrs(dev, sgl, nents, dir, attrs) \
 	dma_unmap_sg(dev, sgl, nents, dir)
 
+#else
+static inline void *dma_alloc_writecombine(struct device *dev, size_t size,
+					   dma_addr_t *dma_addr, gfp_t gfp)
+{
+	DEFINE_DMA_ATTRS(attrs);
+	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+	return dma_alloc_attrs(dev, size, dma_addr, gfp, &attrs);
+}
+
+static inline void dma_free_writecombine(struct device *dev, size_t size,
+					 void *cpu_addr, dma_addr_t dma_addr)
+{
+	DEFINE_DMA_ATTRS(attrs);
+	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+	return dma_free_attrs(dev, size, cpu_addr, dma_addr, &attrs);
+}
+
+static inline int dma_mmap_writecombine(struct device *dev,
+					struct vm_area_struct *vma,
+					void *cpu_addr, dma_addr_t dma_addr,
+					size_t size)
+{
+	DEFINE_DMA_ATTRS(attrs);
+	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+	return dma_mmap_attrs(dev, vma, cpu_addr, dma_addr, size, &attrs);
+}
 #endif /* CONFIG_HAVE_DMA_ATTRS */
 
 #ifdef CONFIG_NEED_DMA_MAP_STATE
-- 
cgit v1.2.3


From 170fd0b1f6108b48df4369afa0ee29a83e922748 Mon Sep 17 00:00:00 2001
From: Andrei Otcheretianski <andrei.otcheretianski@intel.com>
Date: Wed, 30 Jul 2014 14:36:18 +0300
Subject: ieee80211: Support parsing TPC report element in action frames

TPC report element is contained in spectrum management's tpc report
action frames and in radio measurement's link measurement report
action frames. Add a function which checks whether an action frame
contains this element. This may be needed by the drivers in order
to set the correct tx power value in these frames.

Signed-off-by: Andrei Otcheretianski <andrei.otcheretianski@intel.com>
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 65 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 65 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 63ab3873c5ed..8018c915ee63 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -838,6 +838,16 @@ enum ieee80211_vht_opmode_bits {
 
 #define WLAN_SA_QUERY_TR_ID_LEN 2
 
+/**
+ * struct ieee80211_tpc_report_ie
+ *
+ * This structure refers to "TPC Report element"
+ */
+struct ieee80211_tpc_report_ie {
+	u8 tx_power;
+	u8 link_margin;
+} __packed;
+
 struct ieee80211_mgmt {
 	__le16 frame_control;
 	__le16 duration;
@@ -973,6 +983,13 @@ struct ieee80211_mgmt {
 					u8 action_code;
 					u8 operating_mode;
 				} __packed vht_opmode_notif;
+				struct {
+					u8 action_code;
+					u8 dialog_token;
+					u8 tpc_elem_id;
+					u8 tpc_elem_length;
+					struct ieee80211_tpc_report_ie tpc;
+				} __packed tpc_report;
 			} u;
 		} __packed action;
 	} u;
@@ -1865,6 +1882,7 @@ enum ieee80211_category {
 	WLAN_CATEGORY_DLS = 2,
 	WLAN_CATEGORY_BACK = 3,
 	WLAN_CATEGORY_PUBLIC = 4,
+	WLAN_CATEGORY_RADIO_MEASUREMENT = 5,
 	WLAN_CATEGORY_HT = 7,
 	WLAN_CATEGORY_SA_QUERY = 8,
 	WLAN_CATEGORY_PROTECTED_DUAL_OF_ACTION = 9,
@@ -2378,4 +2396,51 @@ static inline bool ieee80211_check_tim(const struct ieee80211_tim_ie *tim,
 #define TU_TO_JIFFIES(x)	(usecs_to_jiffies((x) * 1024))
 #define TU_TO_EXP_TIME(x)	(jiffies + TU_TO_JIFFIES(x))
 
+/**
+ * ieee80211_action_contains_tpc - checks if the frame contains TPC element
+ * @skb: the skb containing the frame, length will be checked
+ *
+ * This function checks if it's either TPC report action frame or Link
+ * Measurement report action frame as defined in IEEE Std. 802.11-2012 8.5.2.5
+ * and 8.5.7.5 accordingly.
+ */
+static inline bool ieee80211_action_contains_tpc(struct sk_buff *skb)
+{
+	struct ieee80211_mgmt *mgmt = (void *)skb->data;
+
+	if (!ieee80211_is_action(mgmt->frame_control))
+		return false;
+
+	if (skb->len < IEEE80211_MIN_ACTION_SIZE +
+		       sizeof(mgmt->u.action.u.tpc_report))
+		return false;
+
+	/*
+	 * TPC report - check that:
+	 * category = 0 (Spectrum Management) or 5 (Radio Measurement)
+	 * spectrum management action = 3 (TPC/Link Measurement report)
+	 * TPC report EID = 35
+	 * TPC report element length = 2
+	 *
+	 * The spectrum management's tpc_report struct is used here both for
+	 * parsing tpc_report and radio measurement's link measurement report
+	 * frame, since the relevant part is identical in both frames.
+	 */
+	if (mgmt->u.action.category != WLAN_CATEGORY_SPECTRUM_MGMT &&
+	    mgmt->u.action.category != WLAN_CATEGORY_RADIO_MEASUREMENT)
+		return false;
+
+	/* both spectrum mgmt and link measurement have same action code */
+	if (mgmt->u.action.u.tpc_report.action_code !=
+	    WLAN_ACTION_SPCT_TPC_RPRT)
+		return false;
+
+	if (mgmt->u.action.u.tpc_report.tpc_elem_id != WLAN_EID_TPC_REPORT ||
+	    mgmt->u.action.u.tpc_report.tpc_elem_length !=
+	    sizeof(struct ieee80211_tpc_report_ie))
+		return false;
+
+	return true;
+}
+
 #endif /* LINUX_IEEE80211_H */
-- 
cgit v1.2.3


From 4a32fea9d78f2d2315c0072757b197d5a304dc8b Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux.com>
Date: Sun, 17 Aug 2014 12:30:27 -0500
Subject: scheduler: Replace __get_cpu_var with this_cpu_ptr

Convert all uses of __get_cpu_var for address calculation to use
this_cpu_ptr instead.

[Uses of __get_cpu_var with cpumask_var_t are no longer
handled by this patch]

Cc: Peter Zijlstra <peterz@infradead.org>
Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/kernel_stat.h   |  4 ++--
 kernel/events/callchain.c     |  4 ++--
 kernel/events/core.c          | 24 ++++++++++++------------
 kernel/sched/sched.h          |  4 ++--
 kernel/taskstats.c            |  2 +-
 kernel/time/tick-sched.c      |  4 ++--
 kernel/user-return-notifier.c |  4 ++--
 7 files changed, 23 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index ecbc52f9ff77..8422b4ed6882 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -44,8 +44,8 @@ DECLARE_PER_CPU(struct kernel_stat, kstat);
 DECLARE_PER_CPU(struct kernel_cpustat, kernel_cpustat);
 
 /* Must have preemption disabled for this to be meaningful. */
-#define kstat_this_cpu (&__get_cpu_var(kstat))
-#define kcpustat_this_cpu (&__get_cpu_var(kernel_cpustat))
+#define kstat_this_cpu this_cpu_ptr(&kstat)
+#define kcpustat_this_cpu this_cpu_ptr(&kernel_cpustat)
 #define kstat_cpu(cpu) per_cpu(kstat, cpu)
 #define kcpustat_cpu(cpu) per_cpu(kernel_cpustat, cpu)
 
diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
index 97b67df8fbfe..c4f63e68a35c 100644
--- a/kernel/events/callchain.c
+++ b/kernel/events/callchain.c
@@ -137,7 +137,7 @@ static struct perf_callchain_entry *get_callchain_entry(int *rctx)
 	int cpu;
 	struct callchain_cpus_entries *entries;
 
-	*rctx = get_recursion_context(__get_cpu_var(callchain_recursion));
+	*rctx = get_recursion_context(this_cpu_ptr(callchain_recursion));
 	if (*rctx == -1)
 		return NULL;
 
@@ -153,7 +153,7 @@ static struct perf_callchain_entry *get_callchain_entry(int *rctx)
 static void
 put_callchain_entry(int rctx)
 {
-	put_recursion_context(__get_cpu_var(callchain_recursion), rctx);
+	put_recursion_context(this_cpu_ptr(callchain_recursion), rctx);
 }
 
 struct perf_callchain_entry *
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 1cf24b3e42ec..4d44e40a0483 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -239,7 +239,7 @@ static void perf_duration_warn(struct irq_work *w)
 	u64 avg_local_sample_len;
 	u64 local_samples_len;
 
-	local_samples_len = __get_cpu_var(running_sample_length);
+	local_samples_len = __this_cpu_read(running_sample_length);
 	avg_local_sample_len = local_samples_len/NR_ACCUMULATED_SAMPLES;
 
 	printk_ratelimited(KERN_WARNING
@@ -261,10 +261,10 @@ void perf_sample_event_took(u64 sample_len_ns)
 		return;
 
 	/* decay the counter by 1 average sample */
-	local_samples_len = __get_cpu_var(running_sample_length);
+	local_samples_len = __this_cpu_read(running_sample_length);
 	local_samples_len -= local_samples_len/NR_ACCUMULATED_SAMPLES;
 	local_samples_len += sample_len_ns;
-	__get_cpu_var(running_sample_length) = local_samples_len;
+	__this_cpu_write(running_sample_length, local_samples_len);
 
 	/*
 	 * note: this will be biased artifically low until we have
@@ -877,7 +877,7 @@ static DEFINE_PER_CPU(struct list_head, rotation_list);
 static void perf_pmu_rotate_start(struct pmu *pmu)
 {
 	struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
-	struct list_head *head = &__get_cpu_var(rotation_list);
+	struct list_head *head = this_cpu_ptr(&rotation_list);
 
 	WARN_ON(!irqs_disabled());
 
@@ -2389,7 +2389,7 @@ void __perf_event_task_sched_out(struct task_struct *task,
 	 * to check if we have to switch out PMU state.
 	 * cgroup event are system-wide mode only
 	 */
-	if (atomic_read(&__get_cpu_var(perf_cgroup_events)))
+	if (atomic_read(this_cpu_ptr(&perf_cgroup_events)))
 		perf_cgroup_sched_out(task, next);
 }
 
@@ -2632,11 +2632,11 @@ void __perf_event_task_sched_in(struct task_struct *prev,
 	 * to check if we have to switch in PMU state.
 	 * cgroup event are system-wide mode only
 	 */
-	if (atomic_read(&__get_cpu_var(perf_cgroup_events)))
+	if (atomic_read(this_cpu_ptr(&perf_cgroup_events)))
 		perf_cgroup_sched_in(prev, task);
 
 	/* check for system-wide branch_stack events */
-	if (atomic_read(&__get_cpu_var(perf_branch_stack_events)))
+	if (atomic_read(this_cpu_ptr(&perf_branch_stack_events)))
 		perf_branch_stack_sched_in(prev, task);
 }
 
@@ -2891,7 +2891,7 @@ bool perf_event_can_stop_tick(void)
 
 void perf_event_task_tick(void)
 {
-	struct list_head *head = &__get_cpu_var(rotation_list);
+	struct list_head *head = this_cpu_ptr(&rotation_list);
 	struct perf_cpu_context *cpuctx, *tmp;
 	struct perf_event_context *ctx;
 	int throttled;
@@ -5671,7 +5671,7 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
 				    struct perf_sample_data *data,
 				    struct pt_regs *regs)
 {
-	struct swevent_htable *swhash = &__get_cpu_var(swevent_htable);
+	struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable);
 	struct perf_event *event;
 	struct hlist_head *head;
 
@@ -5690,7 +5690,7 @@ end:
 
 int perf_swevent_get_recursion_context(void)
 {
-	struct swevent_htable *swhash = &__get_cpu_var(swevent_htable);
+	struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable);
 
 	return get_recursion_context(swhash->recursion);
 }
@@ -5698,7 +5698,7 @@ EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context);
 
 inline void perf_swevent_put_recursion_context(int rctx)
 {
-	struct swevent_htable *swhash = &__get_cpu_var(swevent_htable);
+	struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable);
 
 	put_recursion_context(swhash->recursion, rctx);
 }
@@ -5727,7 +5727,7 @@ static void perf_swevent_read(struct perf_event *event)
 
 static int perf_swevent_add(struct perf_event *event, int flags)
 {
-	struct swevent_htable *swhash = &__get_cpu_var(swevent_htable);
+	struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable);
 	struct hw_perf_event *hwc = &event->hw;
 	struct hlist_head *head;
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 579712f4e9d5..77d92f8130e8 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -650,10 +650,10 @@ static inline int cpu_of(struct rq *rq)
 DECLARE_PER_CPU(struct rq, runqueues);
 
 #define cpu_rq(cpu)		(&per_cpu(runqueues, (cpu)))
-#define this_rq()		(&__get_cpu_var(runqueues))
+#define this_rq()		this_cpu_ptr(&runqueues)
 #define task_rq(p)		cpu_rq(task_cpu(p))
 #define cpu_curr(cpu)		(cpu_rq(cpu)->curr)
-#define raw_rq()		(&__raw_get_cpu_var(runqueues))
+#define raw_rq()		raw_cpu_ptr(&runqueues)
 
 static inline u64 rq_clock(struct rq *rq)
 {
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 13d2f7cd65db..b312fcc73024 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -638,7 +638,7 @@ void taskstats_exit(struct task_struct *tsk, int group_dead)
 		fill_tgid_exit(tsk);
 	}
 
-	listeners = __this_cpu_ptr(&listener_array);
+	listeners = raw_cpu_ptr(&listener_array);
 	if (list_empty(&listeners->list))
 		return;
 
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 73f90932282b..3cadc112519f 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -924,7 +924,7 @@ static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
  */
 void tick_nohz_idle_exit(void)
 {
-	struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
+	struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
 	ktime_t now;
 
 	local_irq_disable();
@@ -1041,7 +1041,7 @@ static void tick_nohz_kick_tick(struct tick_sched *ts, ktime_t now)
 
 static inline void tick_nohz_irq_enter(void)
 {
-	struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
+	struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
 	ktime_t now;
 
 	if (!ts->idle_active && !ts->tick_stopped)
diff --git a/kernel/user-return-notifier.c b/kernel/user-return-notifier.c
index 394f70b17162..9586b670a5b2 100644
--- a/kernel/user-return-notifier.c
+++ b/kernel/user-return-notifier.c
@@ -14,7 +14,7 @@ static DEFINE_PER_CPU(struct hlist_head, return_notifier_list);
 void user_return_notifier_register(struct user_return_notifier *urn)
 {
 	set_tsk_thread_flag(current, TIF_USER_RETURN_NOTIFY);
-	hlist_add_head(&urn->link, &__get_cpu_var(return_notifier_list));
+	hlist_add_head(&urn->link, this_cpu_ptr(&return_notifier_list));
 }
 EXPORT_SYMBOL_GPL(user_return_notifier_register);
 
@@ -25,7 +25,7 @@ EXPORT_SYMBOL_GPL(user_return_notifier_register);
 void user_return_notifier_unregister(struct user_return_notifier *urn)
 {
 	hlist_del(&urn->link);
-	if (hlist_empty(&__get_cpu_var(return_notifier_list)))
+	if (hlist_empty(this_cpu_ptr(&return_notifier_list)))
 		clear_tsk_thread_flag(current, TIF_USER_RETURN_NOTIFY);
 }
 EXPORT_SYMBOL_GPL(user_return_notifier_unregister);
-- 
cgit v1.2.3


From 47405a253da4d8ca4b18ad537423083fdd790440 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux.com>
Date: Sun, 17 Aug 2014 12:30:56 -0500
Subject: percpu: Remove __this_cpu_ptr

The __this_cpu_ptr macro is no longer in use so drop it.

Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/percpu-defs.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index cfd56046ecec..420032d41d27 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -257,9 +257,6 @@ do {									\
 #define __raw_get_cpu_var(var)	(*raw_cpu_ptr(&(var)))
 #define __get_cpu_var(var)	(*this_cpu_ptr(&(var)))
 
-/* keep until we have removed all uses of __this_cpu_ptr */
-#define __this_cpu_ptr(ptr)	raw_cpu_ptr(ptr)
-
 /*
  * Must be an lvalue. Since @var must be a simple identifier,
  * we force a syntax error here if it isn't.
-- 
cgit v1.2.3


From bf3baca6c54ce8a2f51687296f868dfe20d33f13 Mon Sep 17 00:00:00 2001
From: James Ban <james.ban.opensource@diasemi.com>
Date: Wed, 27 Aug 2014 11:47:07 +0900
Subject: regulator: da9211: support device tree

This is a patch for supporting device tree of DA9211/DA9213.

Signed-off-by: James Ban <james.ban.opensource@diasemi.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 .../devicetree/bindings/regulator/da9211.txt       | 63 ++++++++++++++++
 drivers/regulator/da9211-regulator.c               | 85 ++++++++++++++++++++--
 include/linux/regulator/da9211.h                   |  2 +-
 3 files changed, 142 insertions(+), 8 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/regulator/da9211.txt

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/regulator/da9211.txt b/Documentation/devicetree/bindings/regulator/da9211.txt
new file mode 100644
index 000000000000..240019a82f9a
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/da9211.txt
@@ -0,0 +1,63 @@
+* Dialog Semiconductor DA9211/DA9213 Voltage Regulator
+
+Required properties:
+- compatible: "dlg,da9211" or "dlg,da9213".
+- reg: I2C slave address, usually 0x68.
+- interrupts: the interrupt outputs of the controller
+- regulators: A node that houses a sub-node for each regulator within the
+  device. Each sub-node is identified using the node's name, with valid
+  values listed below. The content of each sub-node is defined by the
+  standard binding for regulators; see regulator.txt.
+  BUCKA and BUCKB.
+
+Optional properties:
+- Any optional property defined in regulator.txt
+
+Example 1) DA9211
+
+	pmic: da9211@68 {
+		compatible = "dlg,da9211";
+		reg = <0x68>;
+		interrupts = <3 27>;
+
+		regulators {
+			BUCKA {
+				regulator-name = "VBUCKA";
+				regulator-min-microvolt = < 300000>;
+				regulator-max-microvolt = <1570000>;
+				regulator-min-microamp 	= <2000000>;
+				regulator-max-microamp 	= <5000000>;
+			};
+			BUCKB {
+				regulator-name = "VBUCKB";
+				regulator-min-microvolt = < 300000>;
+				regulator-max-microvolt = <1570000>;
+				regulator-min-microamp 	= <2000000>;
+				regulator-max-microamp 	= <5000000>;
+			};
+		};
+	};
+
+Example 2) DA92113
+	pmic: da9213@68 {
+		compatible = "dlg,da9213";
+		reg = <0x68>;
+		interrupts = <3 27>;
+
+		regulators {
+			BUCKA {
+				regulator-name = "VBUCKA";
+				regulator-min-microvolt = < 300000>;
+				regulator-max-microvolt = <1570000>;
+				regulator-min-microamp 	= <3000000>;
+				regulator-max-microamp 	= <6000000>;
+			};
+			BUCKB {
+				regulator-name = "VBUCKB";
+				regulator-min-microvolt = < 300000>;
+				regulator-max-microvolt = <1570000>;
+				regulator-min-microamp 	= <3000000>;
+				regulator-max-microamp 	= <6000000>;
+			};
+		};
+	};
diff --git a/drivers/regulator/da9211-regulator.c b/drivers/regulator/da9211-regulator.c
index a26f1d283c57..5aabbac1b524 100644
--- a/drivers/regulator/da9211-regulator.c
+++ b/drivers/regulator/da9211-regulator.c
@@ -24,6 +24,7 @@
 #include <linux/regmap.h>
 #include <linux/irq.h>
 #include <linux/interrupt.h>
+#include <linux/regulator/of_regulator.h>
 #include <linux/regulator/da9211.h>
 #include "da9211-regulator.h"
 
@@ -236,6 +237,59 @@ static struct regulator_desc da9211_regulators[] = {
 	DA9211_BUCK(BUCKB),
 };
 
+#ifdef CONFIG_OF
+static struct of_regulator_match da9211_matches[] = {
+	[DA9211_ID_BUCKA] = { .name = "BUCKA" },
+	[DA9211_ID_BUCKB] = { .name = "BUCKB" },
+	};
+
+static struct da9211_pdata *da9211_parse_regulators_dt(
+		struct device *dev)
+{
+	struct da9211_pdata *pdata;
+	struct device_node *node;
+	int i, num, n;
+
+	node = of_get_child_by_name(dev->of_node, "regulators");
+	if (!node) {
+		dev_err(dev, "regulators node not found\n");
+		return ERR_PTR(-ENODEV);
+	}
+
+	num = of_regulator_match(dev, node, da9211_matches,
+				 ARRAY_SIZE(da9211_matches));
+	of_node_put(node);
+	if (num < 0) {
+		dev_err(dev, "Failed to match regulators\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	pdata = devm_kzalloc(dev, sizeof(*pdata), GFP_KERNEL);
+	if (!pdata)
+		return ERR_PTR(-ENOMEM);
+
+	pdata->num_buck = num;
+
+	n = 0;
+	for (i = 0; i < ARRAY_SIZE(da9211_matches); i++) {
+		if (!da9211_matches[i].init_data)
+			continue;
+
+		pdata->init_data[n] = da9211_matches[i].init_data;
+
+		n++;
+	};
+
+	return pdata;
+}
+#else
+static struct da9211_pdata *da9211_parse_regulators_dt(
+		struct device *dev)
+{
+	return ERR_PTR(-ENODEV);
+}
+#endif
+
 static irqreturn_t da9211_irq_handler(int irq, void *data)
 {
 	struct da9211 *chip = data;
@@ -306,7 +360,7 @@ static int da9211_regulator_init(struct da9211 *chip)
 	}
 
 	for (i = 0; i < chip->num_regulator; i++) {
-		config.init_data = &(chip->pdata->init_data[i]);
+		config.init_data = chip->pdata->init_data[i];
 		config.dev = chip->dev;
 		config.driver_data = chip;
 		config.regmap = chip->regmap;
@@ -332,6 +386,21 @@ static int da9211_regulator_init(struct da9211 *chip)
 
 	return 0;
 }
+
+static const struct i2c_device_id da9211_i2c_id[] = {
+	{"da9211", DA9211},
+	{"da9213", DA9213},
+	{},
+};
+
+#ifdef CONFIG_OF
+static const struct of_device_id da9211_dt_ids[] = {
+	{ .compatible = "dlg,da9211", .data = &da9211_i2c_id[0] },
+	{ .compatible = "dlg,da9213", .data = &da9211_i2c_id[1] },
+	{},
+};
+#endif
+
 /*
  * I2C driver interface functions
  */
@@ -377,6 +446,14 @@ static int da9211_i2c_probe(struct i2c_client *i2c,
 		return -ENODEV;
 	}
 
+	if (!chip->pdata)
+		chip->pdata = da9211_parse_regulators_dt(chip->dev);
+
+	if (IS_ERR(chip->pdata)) {
+		dev_err(chip->dev, "No regulators defined for the platform\n");
+		return PTR_ERR(chip->pdata);
+	}
+
 	chip->chip_irq = i2c->irq;
 
 	if (chip->chip_irq != 0) {
@@ -401,12 +478,6 @@ static int da9211_i2c_probe(struct i2c_client *i2c,
 	return ret;
 }
 
-static const struct i2c_device_id da9211_i2c_id[] = {
-	{"da9211", DA9211},
-	{"da9213", DA9213},
-	{},
-};
-
 MODULE_DEVICE_TABLE(i2c, da9211_i2c_id);
 
 static struct i2c_driver da9211_regulator_driver = {
diff --git a/include/linux/regulator/da9211.h b/include/linux/regulator/da9211.h
index 658c3c33f4c0..5479394fefce 100644
--- a/include/linux/regulator/da9211.h
+++ b/include/linux/regulator/da9211.h
@@ -32,6 +32,6 @@ struct da9211_pdata {
 	 * 2 : 2 phase 2 buck
 	 */
 	int num_buck;
-	struct regulator_init_data *init_data;
+	struct regulator_init_data *init_data[DA9211_MAX_REGULATORS];
 };
 #endif
-- 
cgit v1.2.3


From 6a4c264313c4ae32dc53821a9c57e0dc9696fb81 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Wed, 27 Aug 2014 06:21:23 +0930
Subject: module: rename KERNEL_PARAM_FL_NOARG to avoid confusion

Make it clear this is about kernel_param_ops, not kernel_param (which
will soon have a flags field of its own). No functional changes.

Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Jean Delvare <khali@linux-fr.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Li Zhong <zhong@linux.vnet.ibm.com>
Cc: Jon Mason <jon.mason@intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/moduleparam.h | 2 +-
 kernel/module.c             | 2 +-
 kernel/params.c             | 6 +++---
 security/apparmor/lsm.c     | 4 ++--
 4 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index 494f99e852da..16fdddab856a 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -42,7 +42,7 @@ struct kernel_param;
  * NOARG - the parameter allows for no argument (foo instead of foo=1)
  */
 enum {
-	KERNEL_PARAM_FL_NOARG = (1 << 0)
+	KERNEL_PARAM_OPS_FL_NOARG = (1 << 0)
 };
 
 struct kernel_param_ops {
diff --git a/kernel/module.c b/kernel/module.c
index 03214bd288e9..8a0dc91eddbc 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -135,7 +135,7 @@ static int param_set_bool_enable_only(const char *val,
 }
 
 static const struct kernel_param_ops param_ops_bool_enable_only = {
-	.flags = KERNEL_PARAM_FL_NOARG,
+	.flags = KERNEL_PARAM_OPS_FL_NOARG,
 	.set = param_set_bool_enable_only,
 	.get = param_get_bool,
 };
diff --git a/kernel/params.c b/kernel/params.c
index 34f527023794..8a484fc8bde8 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -104,7 +104,7 @@ static int parse_one(char *param,
 				return 0;
 			/* No one handled NULL, so do it here. */
 			if (!val &&
-			    !(params[i].ops->flags & KERNEL_PARAM_FL_NOARG))
+			    !(params[i].ops->flags & KERNEL_PARAM_OPS_FL_NOARG))
 				return -EINVAL;
 			pr_debug("handling %s with %p\n", param,
 				params[i].ops->set);
@@ -318,7 +318,7 @@ int param_get_bool(char *buffer, const struct kernel_param *kp)
 EXPORT_SYMBOL(param_get_bool);
 
 struct kernel_param_ops param_ops_bool = {
-	.flags = KERNEL_PARAM_FL_NOARG,
+	.flags = KERNEL_PARAM_OPS_FL_NOARG,
 	.set = param_set_bool,
 	.get = param_get_bool,
 };
@@ -369,7 +369,7 @@ int param_set_bint(const char *val, const struct kernel_param *kp)
 EXPORT_SYMBOL(param_set_bint);
 
 struct kernel_param_ops param_ops_bint = {
-	.flags = KERNEL_PARAM_FL_NOARG,
+	.flags = KERNEL_PARAM_OPS_FL_NOARG,
 	.set = param_set_bint,
 	.get = param_get_int,
 };
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index 998100093332..65ca451a764d 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -668,7 +668,7 @@ static int param_set_aabool(const char *val, const struct kernel_param *kp);
 static int param_get_aabool(char *buffer, const struct kernel_param *kp);
 #define param_check_aabool param_check_bool
 static struct kernel_param_ops param_ops_aabool = {
-	.flags = KERNEL_PARAM_FL_NOARG,
+	.flags = KERNEL_PARAM_OPS_FL_NOARG,
 	.set = param_set_aabool,
 	.get = param_get_aabool
 };
@@ -685,7 +685,7 @@ static int param_set_aalockpolicy(const char *val, const struct kernel_param *kp
 static int param_get_aalockpolicy(char *buffer, const struct kernel_param *kp);
 #define param_check_aalockpolicy param_check_bool
 static struct kernel_param_ops param_ops_aalockpolicy = {
-	.flags = KERNEL_PARAM_FL_NOARG,
+	.flags = KERNEL_PARAM_OPS_FL_NOARG,
 	.set = param_set_aalockpolicy,
 	.get = param_get_aalockpolicy
 };
-- 
cgit v1.2.3


From 91f9d330cc14932084c37751997213cb0e7ea882 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Wed, 27 Aug 2014 06:22:23 +0930
Subject: module: make it possible to have unsafe, tainting module params

Add flags field to struct kernel_params, and add the first flag: unsafe
parameter. Modifying a kernel parameter with the unsafe flag set, either
via the kernel command line or sysfs, will issue a warning and taint the
kernel.

Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Jean Delvare <khali@linux-fr.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Li Zhong <zhong@linux.vnet.ibm.com>
Cc: Jon Mason <jon.mason@intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/tty/serial/8250/8250_core.c |  2 +-
 include/linux/moduleparam.h         | 44 +++++++++++++++++++++++++++++--------
 kernel/params.c                     | 11 ++++++++++
 3 files changed, 47 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c
index 1d42dba6121d..bd672948f2f1 100644
--- a/drivers/tty/serial/8250/8250_core.c
+++ b/drivers/tty/serial/8250/8250_core.c
@@ -3587,7 +3587,7 @@ static void __used s8250_options(void)
 #ifdef CONFIG_SERIAL_8250_RSA
 	__module_param_call(MODULE_PARAM_PREFIX, probe_rsa,
 		&param_array_ops, .arr = &__param_arr_probe_rsa,
-		0444, -1);
+		0444, -1, 0);
 #endif
 }
 #else
diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index 16fdddab856a..1e3ffb839daa 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -56,11 +56,21 @@ struct kernel_param_ops {
 	void (*free)(void *arg);
 };
 
+/*
+ * Flags available for kernel_param
+ *
+ * UNSAFE - the parameter is dangerous and setting it will taint the kernel
+ */
+enum {
+	KERNEL_PARAM_FL_UNSAFE = (1 << 0)
+};
+
 struct kernel_param {
 	const char *name;
 	const struct kernel_param_ops *ops;
 	u16 perm;
-	s16 level;
+	s8 level;
+	u8 flags;
 	union {
 		void *arg;
 		const struct kparam_string *str;
@@ -137,7 +147,7 @@ struct kparam_array
  * The ops can have NULL set or get functions.
  */
 #define module_param_cb(name, ops, arg, perm)				      \
-	__module_param_call(MODULE_PARAM_PREFIX, name, ops, arg, perm, -1)
+	__module_param_call(MODULE_PARAM_PREFIX, name, ops, arg, perm, -1, 0)
 
 /**
  * <level>_param_cb - general callback for a module/cmdline parameter
@@ -149,7 +159,7 @@ struct kparam_array
  * The ops can have NULL set or get functions.
  */
 #define __level_param_cb(name, ops, arg, perm, level)			\
-	__module_param_call(MODULE_PARAM_PREFIX, name, ops, arg, perm, level)
+	__module_param_call(MODULE_PARAM_PREFIX, name, ops, arg, perm, level, 0)
 
 #define core_param_cb(name, ops, arg, perm)		\
 	__level_param_cb(name, ops, arg, perm, 1)
@@ -184,14 +194,14 @@ struct kparam_array
 
 /* This is the fundamental function for registering boot/module
    parameters. */
-#define __module_param_call(prefix, name, ops, arg, perm, level)	\
+#define __module_param_call(prefix, name, ops, arg, perm, level, flags)	\
 	/* Default value instead of permissions? */			\
 	static const char __param_str_##name[] = prefix #name; \
 	static struct kernel_param __moduleparam_const __param_##name	\
 	__used								\
     __attribute__ ((unused,__section__ ("__param"),aligned(sizeof(void *)))) \
 	= { __param_str_##name, ops, VERIFY_OCTAL_PERMISSIONS(perm),	\
-	    level, { arg } }
+	    level, flags, { arg } }
 
 /* Obsolete - use module_param_cb() */
 #define module_param_call(name, set, get, arg, perm)			\
@@ -199,7 +209,7 @@ struct kparam_array
 		{ 0, (void *)set, (void *)get };			\
 	__module_param_call(MODULE_PARAM_PREFIX,			\
 			    name, &__param_ops_##name, arg,		\
-			    (perm) + sizeof(__check_old_set_param(set))*0, -1)
+			    (perm) + sizeof(__check_old_set_param(set))*0, -1, 0)
 
 /* We don't get oldget: it's often a new-style param_get_uint, etc. */
 static inline int
@@ -279,7 +289,7 @@ static inline void __kernel_param_unlock(void)
  */
 #define core_param(name, var, type, perm)				\
 	param_check_##type(name, &(var));				\
-	__module_param_call("", name, &param_ops_##type, &var, perm, -1)
+	__module_param_call("", name, &param_ops_##type, &var, perm, -1, 0)
 #endif /* !MODULE */
 
 /**
@@ -297,7 +307,7 @@ static inline void __kernel_param_unlock(void)
 		= { len, string };					\
 	__module_param_call(MODULE_PARAM_PREFIX, name,			\
 			    &param_ops_string,				\
-			    .str = &__param_string_##name, perm, -1);	\
+			    .str = &__param_string_##name, perm, -1, 0);\
 	__MODULE_PARM_TYPE(name, "string")
 
 /**
@@ -346,6 +356,22 @@ static inline void destroy_params(const struct kernel_param *params,
 #define __param_check(name, p, type) \
 	static inline type __always_unused *__check_##name(void) { return(p); }
 
+/**
+ * param_check_unsafe - Warn and taint the kernel if setting dangerous options.
+ *
+ * This gets called from all the standard param setters, but can be used from
+ * custom setters as well.
+ */
+static inline void
+param_check_unsafe(const struct kernel_param *kp)
+{
+	if (kp->flags & KERNEL_PARAM_FL_UNSAFE) {
+		pr_warn("Setting dangerous option %s - tainting kernel\n",
+			kp->name);
+		add_taint(TAINT_USER, LOCKDEP_STILL_OK);
+	}
+}
+
 extern struct kernel_param_ops param_ops_byte;
 extern int param_set_byte(const char *val, const struct kernel_param *kp);
 extern int param_get_byte(char *buffer, const struct kernel_param *kp);
@@ -444,7 +470,7 @@ extern int param_set_bint(const char *val, const struct kernel_param *kp);
 	__module_param_call(MODULE_PARAM_PREFIX, name,			\
 			    &param_array_ops,				\
 			    .arr = &__param_arr_##name,			\
-			    perm, -1);					\
+			    perm, -1, 0);				\
 	__MODULE_PARM_TYPE(name, "array of " #type)
 
 extern struct kernel_param_ops param_array_ops;
diff --git a/kernel/params.c b/kernel/params.c
index 8a484fc8bde8..ad8d04563c3a 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -233,6 +233,7 @@ char *parse_args(const char *doing,
 #define STANDARD_PARAM_DEF(name, type, format, strtolfn)      		\
 	int param_set_##name(const char *val, const struct kernel_param *kp) \
 	{								\
+		param_check_unsafe(kp);					\
 		return strtolfn(val, 0, (type *)kp->arg);		\
 	}								\
 	int param_get_##name(char *buffer, const struct kernel_param *kp) \
@@ -265,6 +266,8 @@ int param_set_charp(const char *val, const struct kernel_param *kp)
 		return -ENOSPC;
 	}
 
+	param_check_unsafe(kp);
+
 	maybe_kfree_parameter(*(char **)kp->arg);
 
 	/* This is a hack.  We can't kmalloc in early boot, and we
@@ -302,6 +305,8 @@ EXPORT_SYMBOL(param_ops_charp);
 /* Actually could be a bool or an int, for historical reasons. */
 int param_set_bool(const char *val, const struct kernel_param *kp)
 {
+	param_check_unsafe(kp);
+
 	/* No equals means "set"... */
 	if (!val) val = "1";
 
@@ -331,6 +336,8 @@ int param_set_invbool(const char *val, const struct kernel_param *kp)
 	bool boolval;
 	struct kernel_param dummy;
 
+	param_check_unsafe(kp);
+
 	dummy.arg = &boolval;
 	ret = param_set_bool(val, &dummy);
 	if (ret == 0)
@@ -357,6 +364,8 @@ int param_set_bint(const char *val, const struct kernel_param *kp)
 	bool v;
 	int ret;
 
+	param_check_unsafe(kp);
+
 	/* Match bool exactly, by re-using it. */
 	boolkp = *kp;
 	boolkp.arg = &v;
@@ -476,6 +485,8 @@ int param_set_copystring(const char *val, const struct kernel_param *kp)
 {
 	const struct kparam_string *kps = kp->str;
 
+	param_check_unsafe(kp);
+
 	if (strlen(val)+1 > kps->maxlen) {
 		pr_err("%s: string doesn't fit in %u chars.\n",
 		       kp->name, kps->maxlen-1);
-- 
cgit v1.2.3


From 3baee201b06cfaff84c2c5ddc551b192bb3eaed3 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Wed, 27 Aug 2014 06:23:23 +0930
Subject: module: add module_param_unsafe and module_param_named_unsafe

Add the helpers to be used by modules wishing to expose unsafe debugging
or testing module parameters that taint the kernel when set.

Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Jean Delvare <khali@linux-fr.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Li Zhong <zhong@linux.vnet.ibm.com>
Cc: Jon Mason <jon.mason@intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/moduleparam.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index 1e3ffb839daa..9531f9f9729e 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -122,6 +122,12 @@ struct kparam_array
 #define module_param(name, type, perm)				\
 	module_param_named(name, name, type, perm)
 
+/**
+ * module_param_unsafe - same as module_param but taints kernel
+ */
+#define module_param_unsafe(name, type, perm)			\
+	module_param_named_unsafe(name, name, type, perm)
+
 /**
  * module_param_named - typesafe helper for a renamed module/cmdline parameter
  * @name: a valid C identifier which is the parameter name.
@@ -138,6 +144,14 @@ struct kparam_array
 	module_param_cb(name, &param_ops_##type, &value, perm);		   \
 	__MODULE_PARM_TYPE(name, #type)
 
+/**
+ * module_param_named_unsafe - same as module_param_named but taints kernel
+ */
+#define module_param_named_unsafe(name, value, type, perm)		\
+	param_check_##type(name, &(value));				\
+	module_param_cb_unsafe(name, &param_ops_##type, &value, perm);	\
+	__MODULE_PARM_TYPE(name, #type)
+
 /**
  * module_param_cb - general callback for a module/cmdline parameter
  * @name: a valid C identifier which is the parameter name.
@@ -149,6 +163,10 @@ struct kparam_array
 #define module_param_cb(name, ops, arg, perm)				      \
 	__module_param_call(MODULE_PARAM_PREFIX, name, ops, arg, perm, -1, 0)
 
+#define module_param_cb_unsafe(name, ops, arg, perm)			      \
+	__module_param_call(MODULE_PARAM_PREFIX, name, ops, arg, perm, -1,    \
+			    KERNEL_PARAM_FL_UNSAFE)
+
 /**
  * <level>_param_cb - general callback for a module/cmdline parameter
  *                    to be evaluated before certain initcall level
-- 
cgit v1.2.3


From 7a486d3781295b5298cbf9556928a76d26896863 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Wed, 27 Aug 2014 06:25:23 +0930
Subject: param: check for tainting before calling set op.

This means every set op doesn't need to call it, and it can move into
params.c.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/moduleparam.h | 16 ----------------
 kernel/params.c             | 22 +++++++++++-----------
 2 files changed, 11 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index 9531f9f9729e..593501996574 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -374,22 +374,6 @@ static inline void destroy_params(const struct kernel_param *params,
 #define __param_check(name, p, type) \
 	static inline type __always_unused *__check_##name(void) { return(p); }
 
-/**
- * param_check_unsafe - Warn and taint the kernel if setting dangerous options.
- *
- * This gets called from all the standard param setters, but can be used from
- * custom setters as well.
- */
-static inline void
-param_check_unsafe(const struct kernel_param *kp)
-{
-	if (kp->flags & KERNEL_PARAM_FL_UNSAFE) {
-		pr_warn("Setting dangerous option %s - tainting kernel\n",
-			kp->name);
-		add_taint(TAINT_USER, LOCKDEP_STILL_OK);
-	}
-}
-
 extern struct kernel_param_ops param_ops_byte;
 extern int param_set_byte(const char *val, const struct kernel_param *kp);
 extern int param_get_byte(char *buffer, const struct kernel_param *kp);
diff --git a/kernel/params.c b/kernel/params.c
index ad8d04563c3a..041b5899d5e2 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -83,6 +83,15 @@ bool parameq(const char *a, const char *b)
 	return parameqn(a, b, strlen(a)+1);
 }
 
+static void param_check_unsafe(const struct kernel_param *kp)
+{
+	if (kp->flags & KERNEL_PARAM_FL_UNSAFE) {
+		pr_warn("Setting dangerous option %s - tainting kernel\n",
+			kp->name);
+		add_taint(TAINT_USER, LOCKDEP_STILL_OK);
+	}
+}
+
 static int parse_one(char *param,
 		     char *val,
 		     const char *doing,
@@ -109,6 +118,7 @@ static int parse_one(char *param,
 			pr_debug("handling %s with %p\n", param,
 				params[i].ops->set);
 			mutex_lock(&param_lock);
+			param_check_unsafe(&params[i]);
 			err = params[i].ops->set(val, &params[i]);
 			mutex_unlock(&param_lock);
 			return err;
@@ -233,7 +243,6 @@ char *parse_args(const char *doing,
 #define STANDARD_PARAM_DEF(name, type, format, strtolfn)      		\
 	int param_set_##name(const char *val, const struct kernel_param *kp) \
 	{								\
-		param_check_unsafe(kp);					\
 		return strtolfn(val, 0, (type *)kp->arg);		\
 	}								\
 	int param_get_##name(char *buffer, const struct kernel_param *kp) \
@@ -266,8 +275,6 @@ int param_set_charp(const char *val, const struct kernel_param *kp)
 		return -ENOSPC;
 	}
 
-	param_check_unsafe(kp);
-
 	maybe_kfree_parameter(*(char **)kp->arg);
 
 	/* This is a hack.  We can't kmalloc in early boot, and we
@@ -305,8 +312,6 @@ EXPORT_SYMBOL(param_ops_charp);
 /* Actually could be a bool or an int, for historical reasons. */
 int param_set_bool(const char *val, const struct kernel_param *kp)
 {
-	param_check_unsafe(kp);
-
 	/* No equals means "set"... */
 	if (!val) val = "1";
 
@@ -336,8 +341,6 @@ int param_set_invbool(const char *val, const struct kernel_param *kp)
 	bool boolval;
 	struct kernel_param dummy;
 
-	param_check_unsafe(kp);
-
 	dummy.arg = &boolval;
 	ret = param_set_bool(val, &dummy);
 	if (ret == 0)
@@ -364,8 +367,6 @@ int param_set_bint(const char *val, const struct kernel_param *kp)
 	bool v;
 	int ret;
 
-	param_check_unsafe(kp);
-
 	/* Match bool exactly, by re-using it. */
 	boolkp = *kp;
 	boolkp.arg = &v;
@@ -485,8 +486,6 @@ int param_set_copystring(const char *val, const struct kernel_param *kp)
 {
 	const struct kparam_string *kps = kp->str;
 
-	param_check_unsafe(kp);
-
 	if (strlen(val)+1 > kps->maxlen) {
 		pr_err("%s: string doesn't fit in %u chars.\n",
 		       kp->name, kps->maxlen-1);
@@ -563,6 +562,7 @@ static ssize_t param_attr_store(struct module_attribute *mattr,
 		return -EPERM;
 
 	mutex_lock(&param_lock);
+	param_check_unsafe(attribute->param);
 	err = attribute->param->ops->set(buf, attribute->param);
 	mutex_unlock(&param_lock);
 	if (!err)
-- 
cgit v1.2.3


From 64d831269ccbca1fc6d739a0f3c8aa24afb43a5e Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Tue, 19 Aug 2014 12:15:00 +0200
Subject: KVM: Introduce gfn_to_hva_memslot_prot

To support read-only memory regions on arm and arm64, we have a need to
resolve a gfn to an hva given a pointer to a memslot to avoid looping
through the memslots twice and to reuse the hva error checking of
gfn_to_hva_prot(), add a new gfn_to_hva_memslot_prot() function and
refactor gfn_to_hva_prot() to use this function.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/linux/kvm_host.h |  2 ++
 virt/kvm/kvm_main.c      | 11 +++++++++--
 2 files changed, 11 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ebd723676633..6d8a658ec174 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -528,6 +528,8 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
 unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn);
 unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable);
 unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn);
+unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot, gfn_t gfn,
+				      bool *writable);
 void kvm_release_page_clean(struct page *page);
 void kvm_release_page_dirty(struct page *page);
 void kvm_set_page_accessed(struct page *page);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 5a0817ee996e..76c92a7249c4 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1076,9 +1076,9 @@ EXPORT_SYMBOL_GPL(gfn_to_hva);
  * If writable is set to false, the hva returned by this function is only
  * allowed to be read.
  */
-unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable)
+unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot,
+				      gfn_t gfn, bool *writable)
 {
-	struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
 	unsigned long hva = __gfn_to_hva_many(slot, gfn, NULL, false);
 
 	if (!kvm_is_error_hva(hva) && writable)
@@ -1087,6 +1087,13 @@ unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable)
 	return hva;
 }
 
+unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable)
+{
+	struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
+
+	return gfn_to_hva_memslot_prot(slot, gfn, writable);
+}
+
 static int kvm_read_hva(void *data, void __user *hva, int len)
 {
 	return __copy_from_user(data, hva, len);
-- 
cgit v1.2.3


From 5c21403d74af2c9cd635a34c2f9199681a5b813e Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 27 Aug 2014 14:39:04 -0700
Subject: net: Update sk_buff flag bit availability comment.

We lost one when xmit_more was added.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 9b3802a197a8..b69b7b512c06 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -580,7 +580,7 @@ struct sk_buff {
 	__u8			encap_hdr_csum:1;
 	__u8			csum_valid:1;
 	__u8			csum_complete_sw:1;
-	/* 2/4 bit hole (depending on ndisc_nodetype presence) */
+	/* 1/3 bit hole (depending on ndisc_nodetype presence) */
 	kmemcheck_bitfield_end(flags2);
 
 #if defined CONFIG_NET_DMA || defined CONFIG_NET_RX_BUSY_POLL
-- 
cgit v1.2.3


From 3e8a72d1dae374cf6fc1dba97cec663585845ff9 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Wed, 27 Aug 2014 17:04:46 -0700
Subject: net: dsa: reduce number of protocol hooks

DSA is currently registering one packet_type function per EtherType it
needs to intercept in the receive path of a DSA-enabled Ethernet device.
Right now we have three of them: trailer, DSA and eDSA, and there might
be more in the future, this will not scale to the addition of new
protocols.

This patch proceeds with adding a new layer of abstraction and two new
functions:

dsa_switch_rcv() which will dispatch into the tag-protocol specific
receive function implemented by net/dsa/tag_*.c

dsa_slave_xmit() which will dispatch into the tag-protocol specific
transmit function implemented by net/dsa/tag_*.c

When we do create the per-port slave network devices, we iterate over
the switch protocol to assign the DSA-specific receive and transmit
operations.

A new fake ethertype value is used: ETH_P_XDSA to illustrate the fact
that this is no longer going to look like ETH_P_DSA or ETH_P_TRAILER
like it used to be.

This allows us to greatly simplify the check in eth_type_trans() and
always override the skb->protocol with ETH_P_XDSA for Ethernet switches
tagged protocol, while also reducing the number repetitive slave
netdevice_ops assignments.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h     | 28 ++++++++++++---------------
 include/net/dsa.h             | 20 +++----------------
 include/uapi/linux/if_ether.h |  1 +
 net/dsa/dsa.c                 | 39 ++++++++++++++++++++-----------------
 net/dsa/dsa_priv.h            |  9 +++------
 net/dsa/slave.c               | 45 ++++++++++++++-----------------------------
 net/dsa/tag_dsa.c             |  8 ++++----
 net/dsa/tag_edsa.c            |  8 ++++----
 net/dsa/tag_trailer.c         |  8 ++++----
 net/ethernet/eth.c            |  7 ++-----
 10 files changed, 68 insertions(+), 105 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 039b23786c22..1875dc71422a 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1781,24 +1781,13 @@ void dev_net_set(struct net_device *dev, struct net *net)
 #endif
 }
 
-static inline bool netdev_uses_dsa_tags(struct net_device *dev)
+static inline bool netdev_uses_dsa(struct net_device *dev)
 {
-#ifdef CONFIG_NET_DSA_TAG_DSA
-	if (dev->dsa_ptr != NULL)
-		return dsa_uses_dsa_tags(dev->dsa_ptr);
-#endif
-
-	return 0;
-}
-
-static inline bool netdev_uses_trailer_tags(struct net_device *dev)
-{
-#ifdef CONFIG_NET_DSA_TAG_TRAILER
-	if (dev->dsa_ptr != NULL)
-		return dsa_uses_trailer_tags(dev->dsa_ptr);
+#ifdef CONFIG_NET_DSA
+	return dev->dsa_ptr != NULL;
+#else
+	return false;
 #endif
-
-	return 0;
 }
 
 /**
@@ -1933,6 +1922,13 @@ struct udp_offload {
 	struct offload_callbacks callbacks;
 };
 
+struct dsa_device_ops {
+	netdev_tx_t (*xmit)(struct sk_buff *skb, struct net_device *dev);
+	int (*rcv)(struct sk_buff *skb, struct net_device *dev,
+		   struct packet_type *pt, struct net_device *orig_dev);
+};
+
+
 /* often modified stats are per cpu, other are shared (netdev->stats) */
 struct pcpu_sw_netstats {
 	u64     rx_packets;
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 6efce384451e..6e26f1e4d8ce 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -59,6 +59,8 @@ struct dsa_platform_data {
 	struct dsa_chip_data	*chip;
 };
 
+struct dsa_device_ops;
+
 struct dsa_switch_tree {
 	/*
 	 * Configuration data for the platform device that owns
@@ -71,6 +73,7 @@ struct dsa_switch_tree {
 	 * protocol to use.
 	 */
 	struct net_device	*master_netdev;
+	const struct dsa_device_ops	*ops;
 	__be16			tag_protocol;
 
 	/*
@@ -186,21 +189,4 @@ static inline void *ds_to_priv(struct dsa_switch *ds)
 	return (void *)(ds + 1);
 }
 
-/*
- * The original DSA tag format and some other tag formats have no
- * ethertype, which means that we need to add a little hack to the
- * networking receive path to make sure that received frames get
- * the right ->protocol assigned to them when one of those tag
- * formats is in use.
- */
-static inline bool dsa_uses_dsa_tags(struct dsa_switch_tree *dst)
-{
-	return !!(dst->tag_protocol == htons(ETH_P_DSA));
-}
-
-static inline bool dsa_uses_trailer_tags(struct dsa_switch_tree *dst)
-{
-	return !!(dst->tag_protocol == htons(ETH_P_TRAILER));
-}
-
 #endif
diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h
index 0f8210b8e0bc..aa63ed023c2b 100644
--- a/include/uapi/linux/if_ether.h
+++ b/include/uapi/linux/if_ether.h
@@ -128,6 +128,7 @@
 #define ETH_P_PHONET	0x00F5		/* Nokia Phonet frames          */
 #define ETH_P_IEEE802154 0x00F6		/* IEEE802.15.4 frame		*/
 #define ETH_P_CAIF	0x00F7		/* ST-Ericsson CAIF protocol	*/
+#define ETH_P_XDSA	0x00F8		/* Multiplexed DSA protocol	*/
 
 /*
  *	This is an Ethernet frame header.
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 0a49632fac47..92e71d2a2ccd 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -608,6 +608,24 @@ static void dsa_shutdown(struct platform_device *pdev)
 {
 }
 
+static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
+			  struct packet_type *pt, struct net_device *orig_dev)
+{
+	struct dsa_switch_tree *dst = dev->dsa_ptr;
+
+	if (unlikely(dst == NULL)) {
+		kfree_skb(skb);
+		return 0;
+	}
+
+	return dst->ops->rcv(skb, dev, pt, orig_dev);
+}
+
+struct packet_type dsa_pack_type __read_mostly = {
+	.type	= cpu_to_be16(ETH_P_XDSA),
+	.func	= dsa_switch_rcv,
+};
+
 static const struct of_device_id dsa_of_match_table[] = {
 	{ .compatible = "marvell,dsa", },
 	{}
@@ -633,30 +651,15 @@ static int __init dsa_init_module(void)
 	if (rc)
 		return rc;
 
-#ifdef CONFIG_NET_DSA_TAG_DSA
-	dev_add_pack(&dsa_packet_type);
-#endif
-#ifdef CONFIG_NET_DSA_TAG_EDSA
-	dev_add_pack(&edsa_packet_type);
-#endif
-#ifdef CONFIG_NET_DSA_TAG_TRAILER
-	dev_add_pack(&trailer_packet_type);
-#endif
+	dev_add_pack(&dsa_pack_type);
+
 	return 0;
 }
 module_init(dsa_init_module);
 
 static void __exit dsa_cleanup_module(void)
 {
-#ifdef CONFIG_NET_DSA_TAG_TRAILER
-	dev_remove_pack(&trailer_packet_type);
-#endif
-#ifdef CONFIG_NET_DSA_TAG_EDSA
-	dev_remove_pack(&edsa_packet_type);
-#endif
-#ifdef CONFIG_NET_DSA_TAG_DSA
-	dev_remove_pack(&dsa_packet_type);
-#endif
+	dev_remove_pack(&dsa_pack_type);
 	platform_driver_unregister(&dsa_driver);
 }
 module_exit(dsa_cleanup_module);
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index d4cf5cc747e3..218d75d16f6f 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -45,16 +45,13 @@ struct net_device *dsa_slave_create(struct dsa_switch *ds,
 				    int port, char *name);
 
 /* tag_dsa.c */
-netdev_tx_t dsa_xmit(struct sk_buff *skb, struct net_device *dev);
-extern struct packet_type dsa_packet_type;
+extern const struct dsa_device_ops dsa_netdev_ops;
 
 /* tag_edsa.c */
-netdev_tx_t edsa_xmit(struct sk_buff *skb, struct net_device *dev);
-extern struct packet_type edsa_packet_type;
+extern const struct dsa_device_ops edsa_netdev_ops;
 
 /* tag_trailer.c */
-netdev_tx_t trailer_xmit(struct sk_buff *skb, struct net_device *dev);
-extern struct packet_type trailer_packet_type;
+extern const struct dsa_device_ops trailer_netdev_ops;
 
 
 #endif
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 45a1e34c89e0..ad1a913533aa 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -171,6 +171,14 @@ static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 	return -EOPNOTSUPP;
 }
 
+static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct dsa_slave_priv *p = netdev_priv(dev);
+	struct dsa_switch_tree *dst = p->parent->dst;
+
+	return dst->ops->xmit(skb, dev);
+}
+
 
 /* ethtool operations *******************************************************/
 static int
@@ -293,42 +301,16 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = {
 	.get_sset_count		= dsa_slave_get_sset_count,
 };
 
-#ifdef CONFIG_NET_DSA_TAG_DSA
-static const struct net_device_ops dsa_netdev_ops = {
+static const struct net_device_ops dsa_slave_netdev_ops = {
 	.ndo_init		= dsa_slave_init,
 	.ndo_open	 	= dsa_slave_open,
 	.ndo_stop		= dsa_slave_close,
-	.ndo_start_xmit		= dsa_xmit,
+	.ndo_start_xmit		= dsa_slave_xmit,
 	.ndo_change_rx_flags	= dsa_slave_change_rx_flags,
 	.ndo_set_rx_mode	= dsa_slave_set_rx_mode,
 	.ndo_set_mac_address	= dsa_slave_set_mac_address,
 	.ndo_do_ioctl		= dsa_slave_ioctl,
 };
-#endif
-#ifdef CONFIG_NET_DSA_TAG_EDSA
-static const struct net_device_ops edsa_netdev_ops = {
-	.ndo_init		= dsa_slave_init,
-	.ndo_open	 	= dsa_slave_open,
-	.ndo_stop		= dsa_slave_close,
-	.ndo_start_xmit		= edsa_xmit,
-	.ndo_change_rx_flags	= dsa_slave_change_rx_flags,
-	.ndo_set_rx_mode	= dsa_slave_set_rx_mode,
-	.ndo_set_mac_address	= dsa_slave_set_mac_address,
-	.ndo_do_ioctl		= dsa_slave_ioctl,
-};
-#endif
-#ifdef CONFIG_NET_DSA_TAG_TRAILER
-static const struct net_device_ops trailer_netdev_ops = {
-	.ndo_init		= dsa_slave_init,
-	.ndo_open	 	= dsa_slave_open,
-	.ndo_stop		= dsa_slave_close,
-	.ndo_start_xmit		= trailer_xmit,
-	.ndo_change_rx_flags	= dsa_slave_change_rx_flags,
-	.ndo_set_rx_mode	= dsa_slave_set_rx_mode,
-	.ndo_set_mac_address	= dsa_slave_set_mac_address,
-	.ndo_do_ioctl		= dsa_slave_ioctl,
-};
-#endif
 
 /* slave device setup *******************************************************/
 struct net_device *
@@ -349,21 +331,22 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent,
 	slave_dev->ethtool_ops = &dsa_slave_ethtool_ops;
 	eth_hw_addr_inherit(slave_dev, master);
 	slave_dev->tx_queue_len = 0;
+	slave_dev->netdev_ops = &dsa_slave_netdev_ops;
 
 	switch (ds->dst->tag_protocol) {
 #ifdef CONFIG_NET_DSA_TAG_DSA
 	case htons(ETH_P_DSA):
-		slave_dev->netdev_ops = &dsa_netdev_ops;
+		ds->dst->ops = &dsa_netdev_ops;
 		break;
 #endif
 #ifdef CONFIG_NET_DSA_TAG_EDSA
 	case htons(ETH_P_EDSA):
-		slave_dev->netdev_ops = &edsa_netdev_ops;
+		ds->dst->ops = &edsa_netdev_ops;
 		break;
 #endif
 #ifdef CONFIG_NET_DSA_TAG_TRAILER
 	case htons(ETH_P_TRAILER):
-		slave_dev->netdev_ops = &trailer_netdev_ops;
+		ds->dst->ops = &trailer_netdev_ops;
 		break;
 #endif
 	default:
diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c
index cacce1e22f9c..d7dbc5bda5c0 100644
--- a/net/dsa/tag_dsa.c
+++ b/net/dsa/tag_dsa.c
@@ -16,7 +16,7 @@
 
 #define DSA_HLEN	4
 
-netdev_tx_t dsa_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t dsa_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct dsa_slave_priv *p = netdev_priv(dev);
 	u8 *dsa_header;
@@ -186,7 +186,7 @@ out:
 	return 0;
 }
 
-struct packet_type dsa_packet_type __read_mostly = {
-	.type	= cpu_to_be16(ETH_P_DSA),
-	.func	= dsa_rcv,
+const struct dsa_device_ops dsa_netdev_ops = {
+	.xmit	= dsa_xmit,
+	.rcv	= dsa_rcv,
 };
diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c
index e70c43c25e64..6b30abe89183 100644
--- a/net/dsa/tag_edsa.c
+++ b/net/dsa/tag_edsa.c
@@ -17,7 +17,7 @@
 #define DSA_HLEN	4
 #define EDSA_HLEN	8
 
-netdev_tx_t edsa_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t edsa_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct dsa_slave_priv *p = netdev_priv(dev);
 	u8 *edsa_header;
@@ -205,7 +205,7 @@ out:
 	return 0;
 }
 
-struct packet_type edsa_packet_type __read_mostly = {
-	.type	= cpu_to_be16(ETH_P_EDSA),
-	.func	= edsa_rcv,
+const struct dsa_device_ops edsa_netdev_ops = {
+	.xmit	= edsa_xmit,
+	.rcv	= edsa_rcv,
 };
diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c
index 94bc260d015d..5fe9444842c5 100644
--- a/net/dsa/tag_trailer.c
+++ b/net/dsa/tag_trailer.c
@@ -14,7 +14,7 @@
 #include <linux/slab.h>
 #include "dsa_priv.h"
 
-netdev_tx_t trailer_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t trailer_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct dsa_slave_priv *p = netdev_priv(dev);
 	struct sk_buff *nskb;
@@ -114,7 +114,7 @@ out:
 	return 0;
 }
 
-struct packet_type trailer_packet_type __read_mostly = {
-	.type	= cpu_to_be16(ETH_P_TRAILER),
-	.func	= trailer_rcv,
+const struct dsa_device_ops trailer_netdev_ops = {
+	.xmit	= trailer_xmit,
+	.rcv	= trailer_rcv,
 };
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index f405e0592407..5cebca134585 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -181,11 +181,8 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
 	 * variants has been configured on the receiving interface,
 	 * and if so, set skb->protocol without looking at the packet.
 	 */
-	if (unlikely(netdev_uses_dsa_tags(dev)))
-		return htons(ETH_P_DSA);
-
-	if (unlikely(netdev_uses_trailer_tags(dev)))
-		return htons(ETH_P_TRAILER);
+	if (unlikely(netdev_uses_dsa(dev)))
+		return htons(ETH_P_XDSA);
 
 	if (likely(ntohs(eth->h_proto) >= ETH_P_802_3_MIN))
 		return eth->h_proto;
-- 
cgit v1.2.3


From 464c3668f065baeacfffa9d421959d21069389fe Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Wed, 27 Aug 2014 17:04:48 -0700
Subject: net: phy: provide stub for fixed_phy_set_link_update

In preparation for updating the DSA code and avoid using ifdefs there,
provide an empty stub for fixed_phy_set_link_update when
CONFIG_FIXED_PHY is not selected.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phy_fixed.h | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/phy_fixed.h b/include/linux/phy_fixed.h
index ae612acebb53..941138664c1d 100644
--- a/include/linux/phy_fixed.h
+++ b/include/linux/phy_fixed.h
@@ -18,6 +18,9 @@ extern int fixed_phy_register(unsigned int irq,
 			      struct fixed_phy_status *status,
 			      struct device_node *np);
 extern void fixed_phy_del(int phy_addr);
+extern int fixed_phy_set_link_update(struct phy_device *phydev,
+			int (*link_update)(struct net_device *,
+					   struct fixed_phy_status *));
 #else
 static inline int fixed_phy_add(unsigned int irq, int phy_id,
 				struct fixed_phy_status *status)
@@ -34,14 +37,12 @@ static inline int fixed_phy_del(int phy_addr)
 {
 	return -ENODEV;
 }
-#endif /* CONFIG_FIXED_PHY */
-
-/*
- * This function issued only by fixed_phy-aware drivers, no need
- * protect it with #ifdef
- */
-extern int fixed_phy_set_link_update(struct phy_device *phydev,
+static inline int fixed_phy_set_link_update(struct phy_device *phydev,
 			int (*link_update)(struct net_device *,
-					   struct fixed_phy_status *));
+					   struct fixed_phy_status *))
+{
+	return -ENODEV;
+}
+#endif /* CONFIG_FIXED_PHY */
 
 #endif /* __PHY_FIXED_H */
-- 
cgit v1.2.3


From 5aed85cec29882d1c4b4b2a01cb75a99efdbe4ed Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Wed, 27 Aug 2014 17:04:52 -0700
Subject: net: dsa: allow switches to work without tagging

In case switch port tagging is disabled (voluntarily, or the switch just
does not support it), allow us to continue using the defined set of
dsa_device_ops in net/dsa/slave.c.

We introduce dsa_protocol_is_tagged() to check whether we need to
override skb->protocol and go through the DSA-specifif packet_type
function, or if we just go on and receive the SKB through the normal
path.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  6 +++---
 include/net/dsa.h         |  5 +++++
 net/dsa/slave.c           | 19 ++++++++++++++++++-
 3 files changed, 26 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 1875dc71422a..429801370d0c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1784,10 +1784,10 @@ void dev_net_set(struct net_device *dev, struct net *net)
 static inline bool netdev_uses_dsa(struct net_device *dev)
 {
 #ifdef CONFIG_NET_DSA
-	return dev->dsa_ptr != NULL;
-#else
-	return false;
+	if (dev->dsa_ptr != NULL)
+		return dsa_uses_tagged_protocol(dev->dsa_ptr);
 #endif
+	return false;
 }
 
 /**
diff --git a/include/net/dsa.h b/include/net/dsa.h
index dc357454ae3b..1035f6452d79 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -198,4 +198,9 @@ static inline void *ds_to_priv(struct dsa_switch *ds)
 	return (void *)(ds + 1);
 }
 
+static inline bool dsa_uses_tagged_protocol(struct dsa_switch_tree *dst)
+{
+	return dst->tag_protocol != 0;
+}
+
 #endif
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 03d2894a0f8a..241c2a1684cb 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -181,6 +181,17 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
 	return dst->ops->xmit(skb, dev);
 }
 
+static netdev_tx_t dsa_slave_notag_xmit(struct sk_buff *skb,
+					struct net_device *dev)
+{
+	struct dsa_slave_priv *p = netdev_priv(dev);
+
+	skb->dev = p->parent->dst->master_netdev;
+	dev_queue_xmit(skb);
+
+	return NETDEV_TX_OK;
+}
+
 
 /* ethtool operations *******************************************************/
 static int
@@ -314,6 +325,11 @@ static const struct net_device_ops dsa_slave_netdev_ops = {
 	.ndo_do_ioctl		= dsa_slave_ioctl,
 };
 
+static const struct dsa_device_ops notag_netdev_ops = {
+	.xmit	= dsa_slave_notag_xmit,
+	.rcv	= NULL,
+};
+
 static void dsa_slave_adjust_link(struct net_device *dev)
 {
 	struct dsa_slave_priv *p = netdev_priv(dev);
@@ -415,7 +431,8 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent,
 		break;
 #endif
 	default:
-		BUG();
+		ds->dst->ops = &notag_netdev_ops;
+		break;
 	}
 
 	SET_NETDEV_DEV(slave_dev, parent);
-- 
cgit v1.2.3


From 97fdaab4699de3a2a91001efef60bb0622de1c53 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Tue, 26 Aug 2014 13:15:25 -0700
Subject: net: phy: broadcom: fix PHY_BCM_OUI_4

PHY_BCM_OUI_4 is missing two significant digits that actually make it an
OUI, add those missing bits so it becomes usable again for matching.

Fixes: b560a58c45c6 ("net: phy: add Broadcom BCM7xxx internal PHY driver")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/brcmphy.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index ee1431d976fa..921f17ca4c26 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -21,7 +21,7 @@
 #define PHY_BCM_OUI_1			0x00206000
 #define PHY_BCM_OUI_2			0x0143bc00
 #define PHY_BCM_OUI_3			0x03625c00
-#define PHY_BCM_OUI_4			0x600d0000
+#define PHY_BCM_OUI_4			0x600d8400
 #define PHY_BCM_OUI_5			0x03625e00
 
 
-- 
cgit v1.2.3


From 11bf2bbd596add62a86a74fc7aedc0b86c6ec154 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Tue, 26 Aug 2014 13:15:26 -0700
Subject: net: phy: broadcom: add new Broadcom OUI

Broadcom started to use a new OUI for its 2013 and newer products:
D4-01-29 which translates into 0xae025000 for a 32-bits OUI, add its
definition.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/brcmphy.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index 921f17ca4c26..cbcfad36d4c0 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -23,7 +23,7 @@
 #define PHY_BCM_OUI_3			0x03625c00
 #define PHY_BCM_OUI_4			0x600d8400
 #define PHY_BCM_OUI_5			0x03625e00
-
+#define PHY_BCM_OUI_6			0xae025000
 
 #define PHY_BCM_FLAGS_MODE_COPPER	0x00000001
 #define PHY_BCM_FLAGS_MODE_1000BX	0x00000002
-- 
cgit v1.2.3


From 430ad68ffb5fa632a277162e5995cd6f7a39fb78 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Tue, 26 Aug 2014 13:15:27 -0700
Subject: net: phy: bcm7xxx: add BCM7250 and BCM7364 PHY entries

Add two new entries to the Broadcom BCM7xxx internal PHY driver for
BCM7250 and BCM7364 chips. Those chips share the usual 28nm process
Gigabit PHY sequence and require the same workarounds so far.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/bcm7xxx.c | 4 ++++
 include/linux/brcmphy.h   | 2 ++
 2 files changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c
index 948c7086679a..09dd6e1dc6e1 100644
--- a/drivers/net/phy/bcm7xxx.c
+++ b/drivers/net/phy/bcm7xxx.c
@@ -335,6 +335,8 @@ static int bcm7xxx_dummy_config_init(struct phy_device *phydev)
 }
 
 static struct phy_driver bcm7xxx_driver[] = {
+	BCM7XXX_28NM_GPHY(PHY_ID_BCM7250, "Broadcom BCM7250"),
+	BCM7XXX_28NM_GPHY(PHY_ID_BCM7364, "Broadcom BCM7364"),
 	BCM7XXX_28NM_GPHY(PHY_ID_BCM7366, "Broadcom BCM7366"),
 	BCM7XXX_28NM_GPHY(PHY_ID_BCM7439, "Broadcom BCM7439"),
 	BCM7XXX_28NM_GPHY(PHY_ID_BCM7445, "Broadcom BCM7445"),
@@ -367,6 +369,8 @@ static struct phy_driver bcm7xxx_driver[] = {
 } };
 
 static struct mdio_device_id __maybe_unused bcm7xxx_tbl[] = {
+	{ PHY_ID_BCM7250, 0xfffffff0, },
+	{ PHY_ID_BCM7364, 0xfffffff0, },
 	{ PHY_ID_BCM7366, 0xfffffff0, },
 	{ PHY_ID_BCM7439, 0xfffffff0, },
 	{ PHY_ID_BCM7445, 0xfffffff0, },
diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index cbcfad36d4c0..5bd35cc0d471 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -13,6 +13,8 @@
 #define PHY_ID_BCM5461			0x002060c0
 #define PHY_ID_BCM57780			0x03625d90
 
+#define PHY_ID_BCM7250			0xae025280
+#define PHY_ID_BCM7364			0xae025260
 #define PHY_ID_BCM7366			0x600d8490
 #define PHY_ID_BCM7439			0x600d8480
 #define PHY_ID_BCM7445			0x600d8510
-- 
cgit v1.2.3


From 1f58d9465c568eb47cab939bbc4f30ae51863295 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Fri, 8 Aug 2014 13:06:30 +0200
Subject: dma-buf/fence: Fix one more kerneldoc warning

The seqno_fence_init() function's cond argument isn't described in the
kerneldoc comment. Fix that to silence a warning when building DocBook
documentation.

Signed-off-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Sumit Semwal <sumit.semwal@linaro.org>
---
 include/linux/seqno-fence.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/seqno-fence.h b/include/linux/seqno-fence.h
index 3d6003de4b0d..a1ba6a5ccdd6 100644
--- a/include/linux/seqno-fence.h
+++ b/include/linux/seqno-fence.h
@@ -62,6 +62,7 @@ to_seqno_fence(struct fence *fence)
  * @context: the execution context this fence is a part of
  * @seqno_ofs: the offset within @sync_buf
  * @seqno: the sequence # to signal on
+ * @cond: fence wait condition
  * @ops: the fence_ops for operations on this seqno fence
  *
  * This function initializes a struct seqno_fence with passed parameters,
-- 
cgit v1.2.3


From a8dbfeedfe47a19a4712749eb2444b1d7ea1150e Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Wed, 27 Aug 2014 14:31:24 -0700
Subject: regulator: fix kernel-doc warnings in header files

Fix kernel-doc warnings in regulator header files:

Warning(..//include/linux/regulator/machine.h:140): No description found for parameter 'ramp_disable'
Warning(..//include/linux/regulator/driver.h:279): No description found for parameter 'linear_ranges'
Warning(..//include/linux/regulator/driver.h:279): No description found for parameter 'n_linear_ranges'

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 include/linux/regulator/driver.h  | 2 ++
 include/linux/regulator/machine.h | 1 +
 2 files changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index bbe03a1924c0..4efa1ed8a2b0 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -218,6 +218,8 @@ enum regulator_type {
  * @linear_min_sel: Minimal selector for starting linear mapping
  * @fixed_uV: Fixed voltage of rails.
  * @ramp_delay: Time to settle down after voltage change (unit: uV/us)
+ * @linear_ranges: A constant table of possible voltage ranges.
+ * @n_linear_ranges: Number of entries in the @linear_ranges table.
  * @volt_table: Voltage mapping table (if table based mapping)
  *
  * @vsel_reg: Register for selector when using regulator_regmap_X_voltage_
diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h
index 730e638c5589..0b08d05d470b 100644
--- a/include/linux/regulator/machine.h
+++ b/include/linux/regulator/machine.h
@@ -85,6 +85,7 @@ struct regulator_state {
  *           bootloader then it will be enabled when the constraints are
  *           applied.
  * @apply_uV: Apply the voltage constraint when initialising.
+ * @ramp_disable: Disable ramp delay when initialising or when setting voltage.
  *
  * @input_uV: Input voltage for regulator when supplied by another regulator.
  *
-- 
cgit v1.2.3


From 4ba2968420fa9d0604b6a6a5c61bfa8d0fa84ae0 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux.com>
Date: Tue, 26 Aug 2014 19:12:21 -0500
Subject: percpu: Resolve ambiguities in __get_cpu_var/cpumask_var_t

__get_cpu_var can paper over differences in the definitions of
cpumask_var_t and either use the address of the cpumask variable
directly or perform a fetch of the address of the struct cpumask
allocated elsewhere. This is important particularly when using per cpu
cpumask_var_t declarations because in one case we have an offset into
a per cpu area to handle and in the other case we need to fetch a
pointer from the offset.

This patch introduces a new macro

this_cpu_cpumask_var_ptr()

that is defined where cpumask_var_t is defined and performs the proper
actions. All use cases where __get_cpu_var is used with cpumask_var_t
are converted to the use of this_cpu_cpumask_var_ptr().

Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/include/asm/perf_event_p4.h  |  2 +-
 arch/x86/kernel/apic/x2apic_cluster.c |  3 +--
 arch/x86/oprofile/op_model_p4.c       |  2 +-
 include/linux/cpumask.h               | 11 +++++++++++
 kernel/sched/deadline.c               |  2 +-
 kernel/sched/fair.c                   |  2 +-
 kernel/sched/rt.c                     |  2 +-
 7 files changed, 17 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h
index 85e13ccf15c4..d725382c2ae0 100644
--- a/arch/x86/include/asm/perf_event_p4.h
+++ b/arch/x86/include/asm/perf_event_p4.h
@@ -189,7 +189,7 @@ static inline int p4_ht_thread(int cpu)
 {
 #ifdef CONFIG_SMP
 	if (smp_num_siblings == 2)
-		return cpu != cpumask_first(__get_cpu_var(cpu_sibling_map));
+		return cpu != cpumask_first(this_cpu_cpumask_var_ptr(cpu_sibling_map));
 #endif
 	return 0;
 }
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 6ce600f9bc78..1f5d5f2ffae6 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -42,8 +42,7 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest)
 	 * We are to modify mask, so we need an own copy
 	 * and be sure it's manipulated with irq off.
 	 */
-	ipi_mask_ptr = __raw_get_cpu_var(ipi_mask);
-	cpumask_copy(ipi_mask_ptr, mask);
+	ipi_mask_ptr = this_cpu_cpumask_var_ptr(ipi_mask);
 
 	/*
 	 * The idea is to send one IPI per cluster.
diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c
index 98ab13058f89..ad1d91f475ab 100644
--- a/arch/x86/oprofile/op_model_p4.c
+++ b/arch/x86/oprofile/op_model_p4.c
@@ -372,7 +372,7 @@ static unsigned int get_stagger(void)
 {
 #ifdef CONFIG_SMP
 	int cpu = smp_processor_id();
-	return cpu != cpumask_first(__get_cpu_var(cpu_sibling_map));
+	return cpu != cpumask_first(this_cpu_cpumask_var_ptr(cpu_sibling_map));
 #endif
 	return 0;
 }
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 2997af6d2ccd..0a9a6da21e74 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -666,10 +666,19 @@ static inline size_t cpumask_size(void)
  *
  * This code makes NR_CPUS length memcopy and brings to a memory corruption.
  * cpumask_copy() provide safe copy functionality.
+ *
+ * Note that there is another evil here: If you define a cpumask_var_t
+ * as a percpu variable then the way to obtain the address of the cpumask
+ * structure differently influences what this_cpu_* operation needs to be
+ * used. Please use this_cpu_cpumask_var_t in those cases. The direct use
+ * of this_cpu_ptr() or this_cpu_read() will lead to failures when the
+ * other type of cpumask_var_t implementation is configured.
  */
 #ifdef CONFIG_CPUMASK_OFFSTACK
 typedef struct cpumask *cpumask_var_t;
 
+#define this_cpu_cpumask_var_ptr(x) this_cpu_read(x)
+
 bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node);
 bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags);
 bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node);
@@ -681,6 +690,8 @@ void free_bootmem_cpumask_var(cpumask_var_t mask);
 #else
 typedef struct cpumask cpumask_var_t[1];
 
+#define this_cpu_cpumask_var_ptr(x) this_cpu_ptr(x)
+
 static inline bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags)
 {
 	return true;
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 255ce138b652..4a608cfaecbd 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1158,7 +1158,7 @@ static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl);
 static int find_later_rq(struct task_struct *task)
 {
 	struct sched_domain *sd;
-	struct cpumask *later_mask = __get_cpu_var(local_cpu_mask_dl);
+	struct cpumask *later_mask = this_cpu_cpumask_var_ptr(local_cpu_mask_dl);
 	int this_cpu = smp_processor_id();
 	int best_cpu, cpu = task_cpu(task);
 
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index bfa3c86d0d68..197d659c144c 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6539,7 +6539,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
 	struct sched_group *group;
 	struct rq *busiest;
 	unsigned long flags;
-	struct cpumask *cpus = __get_cpu_var(load_balance_mask);
+	struct cpumask *cpus = this_cpu_cpumask_var_ptr(load_balance_mask);
 
 	struct lb_env env = {
 		.sd		= sd,
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 5f6edca4fafd..a4c50fce9b90 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1526,7 +1526,7 @@ static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask);
 static int find_lowest_rq(struct task_struct *task)
 {
 	struct sched_domain *sd;
-	struct cpumask *lowest_mask = __get_cpu_var(local_cpu_mask);
+	struct cpumask *lowest_mask = this_cpu_cpumask_var_ptr(local_cpu_mask);
 	int this_cpu = smp_processor_id();
 	int cpu      = task_cpu(task);
 
-- 
cgit v1.2.3


From d37aba521379203b740a2929e6e6f6bd2485f5d7 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Thu, 28 Aug 2014 13:54:18 +0200
Subject: ARM: tegra: remove unused tegra_emc.h

The header file include/linux/platform_data/tegra_emc.h does not seem
to be used anywhere. It was orphaned by a7cbe92c "ARM: tegra: remove
tegra EMC scaling driver". Remove it.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Stephen Warren <swarren@nvidia.com>
---
 include/linux/platform_data/tegra_emc.h | 34 ---------------------------------
 1 file changed, 34 deletions(-)
 delete mode 100644 include/linux/platform_data/tegra_emc.h

(limited to 'include/linux')

diff --git a/include/linux/platform_data/tegra_emc.h b/include/linux/platform_data/tegra_emc.h
deleted file mode 100644
index df67505e98f8..000000000000
--- a/include/linux/platform_data/tegra_emc.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (C) 2011 Google, Inc.
- *
- * Author:
- *	Colin Cross <ccross@android.com>
- *	Olof Johansson <olof@lixom.net>
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- */
-
-#ifndef __TEGRA_EMC_H_
-#define __TEGRA_EMC_H_
-
-#define TEGRA_EMC_NUM_REGS 46
-
-struct tegra_emc_table {
-	unsigned long rate;
-	u32 regs[TEGRA_EMC_NUM_REGS];
-};
-
-struct tegra_emc_pdata {
-	int num_tables;
-	struct tegra_emc_table *tables;
-};
-
-#endif
-- 
cgit v1.2.3


From 2b8941b962a9f24d61c2b3c2e889928e6cf3d82b Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@netapp.com>
Date: Wed, 27 Aug 2014 11:17:56 -0400
Subject: NFSD: Update some as-yet unused 4.2 error codes

Recent NFS v4.2 drafts have removed NFS4ERR_METADATA_NOTSUPP and
reassigned the error code to NFS4ERR_UNION_NOTSUPP.

I also add in the NFS4ERR_OFFLOAD_NO_REQS error code.

We're not using any of these yet, so there's no harm done.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfsd.h       | 2 +-
 include/linux/nfs4.h | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 847daf37e566..747f3b95bd11 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -251,7 +251,7 @@ void		nfsd_lockd_shutdown(void);
 #define nfserr_deleg_revoked		cpu_to_be32(NFS4ERR_DELEG_REVOKED)
 #define nfserr_partner_notsupp		cpu_to_be32(NFS4ERR_PARTNER_NOTSUPP)
 #define nfserr_partner_no_auth		cpu_to_be32(NFS4ERR_PARTNER_NO_AUTH)
-#define nfserr_metadata_notsupp		cpu_to_be32(NFS4ERR_METADATA_NOTSUPP)
+#define nfserr_union_notsupp		cpu_to_be32(NFS4ERR_UNION_NOTSUPP)
 #define nfserr_offload_denied		cpu_to_be32(NFS4ERR_OFFLOAD_DENIED)
 #define nfserr_wrong_lfs		cpu_to_be32(NFS4ERR_WRONG_LFS)
 #define nfserr_badlabel		cpu_to_be32(NFS4ERR_BADLABEL)
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index a1e3064a8d99..79b2a0f5c318 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -235,10 +235,11 @@ enum nfsstat4 {
 	/* nfs42 */
 	NFS4ERR_PARTNER_NOTSUPP	= 10088,
 	NFS4ERR_PARTNER_NO_AUTH	= 10089,
-	NFS4ERR_METADATA_NOTSUPP = 10090,
+	NFS4ERR_UNION_NOTSUPP = 10090,
 	NFS4ERR_OFFLOAD_DENIED = 10091,
 	NFS4ERR_WRONG_LFS = 10092,
 	NFS4ERR_BADLABEL = 10093,
+	NFS4ERR_OFFLOAD_NO_REQS = 10094,
 };
 
 static inline bool seqid_mutating_err(u32 err)
-- 
cgit v1.2.3


From db9ee220361de03ee86388f9ea5e529eaad5323c Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Wed, 27 Aug 2014 18:40:07 -0400
Subject: jbd2: fix descriptor block size handling errors with journal_csum

It turns out that there are some serious problems with the on-disk
format of journal checksum v2.  The foremost is that the function to
calculate descriptor tag size returns sizes that are too big.  This
causes alignment issues on some architectures and is compounded by the
fact that some parts of jbd2 use the structure size (incorrectly) to
determine the presence of a 64bit journal instead of checking the
feature flags.

Therefore, introduce journal checksum v3, which enlarges the
descriptor block tag format to allow for full 32-bit checksums of
journal blocks, fix the journal tag function to return the correct
sizes, and fix the jbd2 recovery code to use feature flags to
determine 64bitness.

Add a few function helpers so we don't have to open-code quite so
many pieces.

Switching to a 16-byte block size was found to increase journal size
overhead by a maximum of 0.1%, to convert a 32-bit journal with no
checksumming to a 32-bit journal with checksum v3 enabled.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reported-by: TR Reardon <thomas_reardon@hotmail.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@vger.kernel.org
---
 fs/ext4/super.c      |  5 +++--
 fs/jbd2/commit.c     | 21 +++++++++++---------
 fs/jbd2/journal.c    | 56 ++++++++++++++++++++++++++++++++++------------------
 fs/jbd2/recovery.c   | 26 +++++++++++++-----------
 fs/jbd2/revoke.c     |  6 +++---
 include/linux/jbd2.h | 30 +++++++++++++++++++++++-----
 6 files changed, 95 insertions(+), 49 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 32b43ad154b9..0b28b36e7915 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3181,9 +3181,9 @@ static int set_journal_csum_feature_set(struct super_block *sb)
 
 	if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
 				       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
-		/* journal checksum v2 */
+		/* journal checksum v3 */
 		compat = 0;
-		incompat = JBD2_FEATURE_INCOMPAT_CSUM_V2;
+		incompat = JBD2_FEATURE_INCOMPAT_CSUM_V3;
 	} else {
 		/* journal checksum v1 */
 		compat = JBD2_FEATURE_COMPAT_CHECKSUM;
@@ -3205,6 +3205,7 @@ static int set_journal_csum_feature_set(struct super_block *sb)
 		jbd2_journal_clear_features(sbi->s_journal,
 				JBD2_FEATURE_COMPAT_CHECKSUM, 0,
 				JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT |
+				JBD2_FEATURE_INCOMPAT_CSUM_V3 |
 				JBD2_FEATURE_INCOMPAT_CSUM_V2);
 	}
 
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 6fac74349856..b73e0215baa7 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -97,7 +97,7 @@ static void jbd2_commit_block_csum_set(journal_t *j, struct buffer_head *bh)
 	struct commit_header *h;
 	__u32 csum;
 
-	if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+	if (!jbd2_journal_has_csum_v2or3(j))
 		return;
 
 	h = (struct commit_header *)(bh->b_data);
@@ -313,11 +313,11 @@ static __u32 jbd2_checksum_data(__u32 crc32_sum, struct buffer_head *bh)
 	return checksum;
 }
 
-static void write_tag_block(int tag_bytes, journal_block_tag_t *tag,
+static void write_tag_block(journal_t *j, journal_block_tag_t *tag,
 				   unsigned long long block)
 {
 	tag->t_blocknr = cpu_to_be32(block & (u32)~0);
-	if (tag_bytes > JBD2_TAG_SIZE32)
+	if (JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_64BIT))
 		tag->t_blocknr_high = cpu_to_be32((block >> 31) >> 1);
 }
 
@@ -327,7 +327,7 @@ static void jbd2_descr_block_csum_set(journal_t *j,
 	struct jbd2_journal_block_tail *tail;
 	__u32 csum;
 
-	if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+	if (!jbd2_journal_has_csum_v2or3(j))
 		return;
 
 	tail = (struct jbd2_journal_block_tail *)(bh->b_data + j->j_blocksize -
@@ -340,12 +340,13 @@ static void jbd2_descr_block_csum_set(journal_t *j,
 static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag,
 				    struct buffer_head *bh, __u32 sequence)
 {
+	journal_block_tag3_t *tag3 = (journal_block_tag3_t *)tag;
 	struct page *page = bh->b_page;
 	__u8 *addr;
 	__u32 csum32;
 	__be32 seq;
 
-	if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+	if (!jbd2_journal_has_csum_v2or3(j))
 		return;
 
 	seq = cpu_to_be32(sequence);
@@ -355,8 +356,10 @@ static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag,
 			     bh->b_size);
 	kunmap_atomic(addr);
 
-	/* We only have space to store the lower 16 bits of the crc32c. */
-	tag->t_checksum = cpu_to_be16(csum32);
+	if (JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V3))
+		tag3->t_checksum = cpu_to_be32(csum32);
+	else
+		tag->t_checksum = cpu_to_be16(csum32);
 }
 /*
  * jbd2_journal_commit_transaction
@@ -396,7 +399,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 	LIST_HEAD(io_bufs);
 	LIST_HEAD(log_bufs);
 
-	if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+	if (jbd2_journal_has_csum_v2or3(journal))
 		csum_size = sizeof(struct jbd2_journal_block_tail);
 
 	/*
@@ -690,7 +693,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 			tag_flag |= JBD2_FLAG_SAME_UUID;
 
 		tag = (journal_block_tag_t *) tagp;
-		write_tag_block(tag_bytes, tag, jh2bh(jh)->b_blocknr);
+		write_tag_block(journal, tag, jh2bh(jh)->b_blocknr);
 		tag->t_flags = cpu_to_be16(tag_flag);
 		jbd2_block_tag_csum_set(journal, tag, wbuf[bufs],
 					commit_transaction->t_tid);
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 67b8e303946c..19d74d86d99c 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -124,7 +124,7 @@ EXPORT_SYMBOL(__jbd2_debug);
 /* Checksumming functions */
 static int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb)
 {
-	if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+	if (!jbd2_journal_has_csum_v2or3(j))
 		return 1;
 
 	return sb->s_checksum_type == JBD2_CRC32C_CHKSUM;
@@ -145,7 +145,7 @@ static __be32 jbd2_superblock_csum(journal_t *j, journal_superblock_t *sb)
 
 static int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb)
 {
-	if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+	if (!jbd2_journal_has_csum_v2or3(j))
 		return 1;
 
 	return sb->s_checksum == jbd2_superblock_csum(j, sb);
@@ -153,7 +153,7 @@ static int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb)
 
 static void jbd2_superblock_csum_set(journal_t *j, journal_superblock_t *sb)
 {
-	if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+	if (!jbd2_journal_has_csum_v2or3(j))
 		return;
 
 	sb->s_checksum = jbd2_superblock_csum(j, sb);
@@ -1522,21 +1522,29 @@ static int journal_get_superblock(journal_t *journal)
 		goto out;
 	}
 
-	if (JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM) &&
-	    JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) {
+	if (jbd2_journal_has_csum_v2or3(journal) &&
+	    JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM)) {
 		/* Can't have checksum v1 and v2 on at the same time! */
 		printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2 "
 		       "at the same time!\n");
 		goto out;
 	}
 
+	if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2) &&
+	    JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V3)) {
+		/* Can't have checksum v2 and v3 at the same time! */
+		printk(KERN_ERR "JBD2: Can't enable checksumming v2 and v3 "
+		       "at the same time!\n");
+		goto out;
+	}
+
 	if (!jbd2_verify_csum_type(journal, sb)) {
 		printk(KERN_ERR "JBD2: Unknown checksum type\n");
 		goto out;
 	}
 
 	/* Load the checksum driver */
-	if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) {
+	if (jbd2_journal_has_csum_v2or3(journal)) {
 		journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
 		if (IS_ERR(journal->j_chksum_driver)) {
 			printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n");
@@ -1553,7 +1561,7 @@ static int journal_get_superblock(journal_t *journal)
 	}
 
 	/* Precompute checksum seed for all metadata */
-	if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+	if (jbd2_journal_has_csum_v2or3(journal))
 		journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid,
 						   sizeof(sb->s_uuid));
 
@@ -1813,8 +1821,14 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat,
 	if (!jbd2_journal_check_available_features(journal, compat, ro, incompat))
 		return 0;
 
-	/* Asking for checksumming v2 and v1?  Only give them v2. */
-	if (incompat & JBD2_FEATURE_INCOMPAT_CSUM_V2 &&
+	/* If enabling v2 checksums, turn on v3 instead */
+	if (incompat & JBD2_FEATURE_INCOMPAT_CSUM_V2) {
+		incompat &= ~JBD2_FEATURE_INCOMPAT_CSUM_V2;
+		incompat |= JBD2_FEATURE_INCOMPAT_CSUM_V3;
+	}
+
+	/* Asking for checksumming v3 and v1?  Only give them v3. */
+	if (incompat & JBD2_FEATURE_INCOMPAT_CSUM_V3 &&
 	    compat & JBD2_FEATURE_COMPAT_CHECKSUM)
 		compat &= ~JBD2_FEATURE_COMPAT_CHECKSUM;
 
@@ -1823,8 +1837,8 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat,
 
 	sb = journal->j_superblock;
 
-	/* If enabling v2 checksums, update superblock */
-	if (INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V2)) {
+	/* If enabling v3 checksums, update superblock */
+	if (INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V3)) {
 		sb->s_checksum_type = JBD2_CRC32C_CHKSUM;
 		sb->s_feature_compat &=
 			~cpu_to_be32(JBD2_FEATURE_COMPAT_CHECKSUM);
@@ -1842,8 +1856,7 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat,
 		}
 
 		/* Precompute checksum seed for all metadata */
-		if (JBD2_HAS_INCOMPAT_FEATURE(journal,
-					      JBD2_FEATURE_INCOMPAT_CSUM_V2))
+		if (jbd2_journal_has_csum_v2or3(journal))
 			journal->j_csum_seed = jbd2_chksum(journal, ~0,
 							   sb->s_uuid,
 							   sizeof(sb->s_uuid));
@@ -1852,7 +1865,8 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat,
 	/* If enabling v1 checksums, downgrade superblock */
 	if (COMPAT_FEATURE_ON(JBD2_FEATURE_COMPAT_CHECKSUM))
 		sb->s_feature_incompat &=
-			~cpu_to_be32(JBD2_FEATURE_INCOMPAT_CSUM_V2);
+			~cpu_to_be32(JBD2_FEATURE_INCOMPAT_CSUM_V2 |
+				     JBD2_FEATURE_INCOMPAT_CSUM_V3);
 
 	sb->s_feature_compat    |= cpu_to_be32(compat);
 	sb->s_feature_ro_compat |= cpu_to_be32(ro);
@@ -2165,16 +2179,20 @@ int jbd2_journal_blocks_per_page(struct inode *inode)
  */
 size_t journal_tag_bytes(journal_t *journal)
 {
-	journal_block_tag_t tag;
-	size_t x = 0;
+	size_t sz;
+
+	if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V3))
+		return sizeof(journal_block_tag3_t);
+
+	sz = sizeof(journal_block_tag_t);
 
 	if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2))
-		x += sizeof(tag.t_checksum);
+		sz += sizeof(__u16);
 
 	if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT))
-		return x + JBD2_TAG_SIZE64;
+		return sz;
 	else
-		return x + JBD2_TAG_SIZE32;
+		return sz - sizeof(__u32);
 }
 
 /*
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 00e9703d7dc6..9b329b55ffe3 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -181,7 +181,7 @@ static int jbd2_descr_block_csum_verify(journal_t *j,
 	__be32 provided;
 	__u32 calculated;
 
-	if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+	if (!jbd2_journal_has_csum_v2or3(j))
 		return 1;
 
 	tail = (struct jbd2_journal_block_tail *)(buf + j->j_blocksize -
@@ -205,7 +205,7 @@ static int count_tags(journal_t *journal, struct buffer_head *bh)
 	int			nr = 0, size = journal->j_blocksize;
 	int			tag_bytes = journal_tag_bytes(journal);
 
-	if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+	if (jbd2_journal_has_csum_v2or3(journal))
 		size -= sizeof(struct jbd2_journal_block_tail);
 
 	tagp = &bh->b_data[sizeof(journal_header_t)];
@@ -338,10 +338,11 @@ int jbd2_journal_skip_recovery(journal_t *journal)
 	return err;
 }
 
-static inline unsigned long long read_tag_block(int tag_bytes, journal_block_tag_t *tag)
+static inline unsigned long long read_tag_block(journal_t *journal,
+						journal_block_tag_t *tag)
 {
 	unsigned long long block = be32_to_cpu(tag->t_blocknr);
-	if (tag_bytes > JBD2_TAG_SIZE32)
+	if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT))
 		block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32;
 	return block;
 }
@@ -384,7 +385,7 @@ static int jbd2_commit_block_csum_verify(journal_t *j, void *buf)
 	__be32 provided;
 	__u32 calculated;
 
-	if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+	if (!jbd2_journal_has_csum_v2or3(j))
 		return 1;
 
 	h = buf;
@@ -399,17 +400,21 @@ static int jbd2_commit_block_csum_verify(journal_t *j, void *buf)
 static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag,
 				      void *buf, __u32 sequence)
 {
+	journal_block_tag3_t *tag3 = (journal_block_tag3_t *)tag;
 	__u32 csum32;
 	__be32 seq;
 
-	if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+	if (!jbd2_journal_has_csum_v2or3(j))
 		return 1;
 
 	seq = cpu_to_be32(sequence);
 	csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq));
 	csum32 = jbd2_chksum(j, csum32, buf, j->j_blocksize);
 
-	return tag->t_checksum == cpu_to_be16(csum32);
+	if (JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V3))
+		return tag3->t_checksum == cpu_to_be32(csum32);
+	else
+		return tag->t_checksum == cpu_to_be16(csum32);
 }
 
 static int do_one_pass(journal_t *journal,
@@ -513,8 +518,7 @@ static int do_one_pass(journal_t *journal,
 		switch(blocktype) {
 		case JBD2_DESCRIPTOR_BLOCK:
 			/* Verify checksum first */
-			if (JBD2_HAS_INCOMPAT_FEATURE(journal,
-					JBD2_FEATURE_INCOMPAT_CSUM_V2))
+			if (jbd2_journal_has_csum_v2or3(journal))
 				descr_csum_size =
 					sizeof(struct jbd2_journal_block_tail);
 			if (descr_csum_size > 0 &&
@@ -575,7 +579,7 @@ static int do_one_pass(journal_t *journal,
 					unsigned long long blocknr;
 
 					J_ASSERT(obh != NULL);
-					blocknr = read_tag_block(tag_bytes,
+					blocknr = read_tag_block(journal,
 								 tag);
 
 					/* If the block has been
@@ -814,7 +818,7 @@ static int jbd2_revoke_block_csum_verify(journal_t *j,
 	__be32 provided;
 	__u32 calculated;
 
-	if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+	if (!jbd2_journal_has_csum_v2or3(j))
 		return 1;
 
 	tail = (struct jbd2_journal_revoke_tail *)(buf + j->j_blocksize -
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index 198c9c10276d..d5e95a175c92 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -91,8 +91,8 @@
 #include <linux/list.h>
 #include <linux/init.h>
 #include <linux/bio.h>
-#endif
 #include <linux/log2.h>
+#endif
 
 static struct kmem_cache *jbd2_revoke_record_cache;
 static struct kmem_cache *jbd2_revoke_table_cache;
@@ -597,7 +597,7 @@ static void write_one_revoke_record(journal_t *journal,
 	offset = *offsetp;
 
 	/* Do we need to leave space at the end for a checksum? */
-	if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+	if (jbd2_journal_has_csum_v2or3(journal))
 		csum_size = sizeof(struct jbd2_journal_revoke_tail);
 
 	/* Make sure we have a descriptor with space left for the record */
@@ -644,7 +644,7 @@ static void jbd2_revoke_csum_set(journal_t *j, struct buffer_head *bh)
 	struct jbd2_journal_revoke_tail *tail;
 	__u32 csum;
 
-	if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+	if (!jbd2_journal_has_csum_v2or3(j))
 		return;
 
 	tail = (struct jbd2_journal_revoke_tail *)(bh->b_data + j->j_blocksize -
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index d5b50a19463c..0dae71e9971c 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -159,7 +159,11 @@ typedef struct journal_header_s
  * journal_block_tag (in the descriptor).  The other h_chksum* fields are
  * not used.
  *
- * Checksum v1 and v2 are mutually exclusive features.
+ * If FEATURE_INCOMPAT_CSUM_V3 is set, the descriptor block uses
+ * journal_block_tag3_t to store a full 32-bit checksum.  Everything else
+ * is the same as v2.
+ *
+ * Checksum v1, v2, and v3 are mutually exclusive features.
  */
 struct commit_header {
 	__be32		h_magic;
@@ -179,6 +183,14 @@ struct commit_header {
  * raw struct shouldn't be used for pointer math or sizeof() - use
  * journal_tag_bytes(journal) instead to compute this.
  */
+typedef struct journal_block_tag3_s
+{
+	__be32		t_blocknr;	/* The on-disk block number */
+	__be32		t_flags;	/* See below */
+	__be32		t_blocknr_high; /* most-significant high 32bits. */
+	__be32		t_checksum;	/* crc32c(uuid+seq+block) */
+} journal_block_tag3_t;
+
 typedef struct journal_block_tag_s
 {
 	__be32		t_blocknr;	/* The on-disk block number */
@@ -187,9 +199,6 @@ typedef struct journal_block_tag_s
 	__be32		t_blocknr_high; /* most-significant high 32bits. */
 } journal_block_tag_t;
 
-#define JBD2_TAG_SIZE32 (offsetof(journal_block_tag_t, t_blocknr_high))
-#define JBD2_TAG_SIZE64 (sizeof(journal_block_tag_t))
-
 /* Tail of descriptor block, for checksumming */
 struct jbd2_journal_block_tail {
 	__be32		t_checksum;	/* crc32c(uuid+descr_block) */
@@ -284,6 +293,7 @@ typedef struct journal_superblock_s
 #define JBD2_FEATURE_INCOMPAT_64BIT		0x00000002
 #define JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT	0x00000004
 #define JBD2_FEATURE_INCOMPAT_CSUM_V2		0x00000008
+#define JBD2_FEATURE_INCOMPAT_CSUM_V3		0x00000010
 
 /* Features known to this kernel version: */
 #define JBD2_KNOWN_COMPAT_FEATURES	JBD2_FEATURE_COMPAT_CHECKSUM
@@ -291,7 +301,8 @@ typedef struct journal_superblock_s
 #define JBD2_KNOWN_INCOMPAT_FEATURES	(JBD2_FEATURE_INCOMPAT_REVOKE | \
 					JBD2_FEATURE_INCOMPAT_64BIT | \
 					JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT | \
-					JBD2_FEATURE_INCOMPAT_CSUM_V2)
+					JBD2_FEATURE_INCOMPAT_CSUM_V2 | \
+					JBD2_FEATURE_INCOMPAT_CSUM_V3)
 
 #ifdef __KERNEL__
 
@@ -1296,6 +1307,15 @@ static inline int tid_geq(tid_t x, tid_t y)
 extern int jbd2_journal_blocks_per_page(struct inode *inode);
 extern size_t journal_tag_bytes(journal_t *journal);
 
+static inline int jbd2_journal_has_csum_v2or3(journal_t *journal)
+{
+	if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2) ||
+	    JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V3))
+		return 1;
+
+	return 0;
+}
+
 /*
  * We reserve t_outstanding_credits >> JBD2_CONTROL_BLOCKS_SHIFT for
  * transaction control blocks.
-- 
cgit v1.2.3


From abdc08a3a263a20e49534a36291d657bf53dda5b Mon Sep 17 00:00:00 2001
From: Alexandre Courbot <acourbot@nvidia.com>
Date: Tue, 19 Aug 2014 10:06:09 -0700
Subject: gpio: change gpiochip_request_own_desc() prototype

The current prototype of gpiochip_request_own_desc() requires to obtain
a pointer to a descriptor. This is in contradiction to all other GPIO
request schemes, and imposes an extra step of obtaining a descriptor to
drivers. Most drivers actually cannot even perform that step since the
function that does it (gpichip_get_desc()) is gpiolib-private.

Change gpiochip_request_own_desc() to return a descriptor from a
(chip, hwnum) tuple and update users of this function (currently
gpiolib-acpi only).

Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 Documentation/gpio/driver.txt |  3 ++-
 drivers/gpio/gpiolib-acpi.c   | 20 +++-----------------
 drivers/gpio/gpiolib.c        | 18 ++++++++++++++----
 include/linux/gpio/driver.h   |  3 ++-
 4 files changed, 21 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/gpio/driver.txt b/Documentation/gpio/driver.txt
index 18790c237977..23b751a10d7b 100644
--- a/Documentation/gpio/driver.txt
+++ b/Documentation/gpio/driver.txt
@@ -178,7 +178,8 @@ does not help since it pins the module to the kernel forever (it calls
 try_module_get()). A GPIO driver can use the following functions instead
 to request and free descriptors without being pinned to the kernel forever.
 
-	int gpiochip_request_own_desc(struct gpio_desc *desc, const char *label)
+	struct gpio_desc *gpiochip_request_own_desc(struct gpio_desc *desc,
+						    const char *label)
 
 	void gpiochip_free_own_desc(struct gpio_desc *desc)
 
diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c
index 84540025aa08..f9103e72e2a4 100644
--- a/drivers/gpio/gpiolib-acpi.c
+++ b/drivers/gpio/gpiolib-acpi.c
@@ -145,14 +145,8 @@ static acpi_status acpi_gpiochip_request_interrupt(struct acpi_resource *ares,
 	if (!handler)
 		return AE_BAD_PARAMETER;
 
-	desc = gpiochip_get_desc(chip, pin);
+	desc = gpiochip_request_own_desc(chip, pin, "ACPI:Event");
 	if (IS_ERR(desc)) {
-		dev_err(chip->dev, "Failed to get GPIO descriptor\n");
-		return AE_ERROR;
-	}
-
-	ret = gpiochip_request_own_desc(desc, "ACPI:Event");
-	if (ret) {
 		dev_err(chip->dev, "Failed to request GPIO\n");
 		return AE_ERROR;
 	}
@@ -420,22 +414,14 @@ acpi_gpio_adr_space_handler(u32 function, acpi_physical_address address,
 			}
 		}
 		if (!found) {
-			int ret;
-
-			desc = gpiochip_get_desc(chip, pin);
+			desc = gpiochip_request_own_desc(chip, pin,
+							 "ACPI:OpRegion");
 			if (IS_ERR(desc)) {
 				status = AE_ERROR;
 				mutex_unlock(&achip->conn_lock);
 				goto out;
 			}
 
-			ret = gpiochip_request_own_desc(desc, "ACPI:OpRegion");
-			if (ret) {
-				status = AE_ERROR;
-				mutex_unlock(&achip->conn_lock);
-				goto out;
-			}
-
 			switch (agpio->io_restriction) {
 			case ACPI_IO_RESTRICT_INPUT:
 				gpiod_direction_input(desc);
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 15cc0bb65dda..a5831d6a9b91 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -895,12 +895,22 @@ EXPORT_SYMBOL_GPL(gpiochip_is_requested);
  * allows the GPIO chip module to be unloaded as needed (we assume that the
  * GPIO chip driver handles freeing the GPIOs it has requested).
  */
-int gpiochip_request_own_desc(struct gpio_desc *desc, const char *label)
+struct gpio_desc *gpiochip_request_own_desc(struct gpio_chip *chip, u16 hwnum,
+					    const char *label)
 {
-	if (!desc || !desc->chip)
-		return -EINVAL;
+	struct gpio_desc *desc = gpiochip_get_desc(chip, hwnum);
+	int err;
+
+	if (IS_ERR(desc)) {
+		chip_err(chip, "failed to get GPIO descriptor\n");
+		return desc;
+	}
+
+	err = __gpiod_request(desc, label);
+	if (err < 0)
+		return ERR_PTR(err);
 
-	return __gpiod_request(desc, label);
+	return desc;
 }
 EXPORT_SYMBOL_GPL(gpiochip_request_own_desc);
 
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index e78a2373e374..a2de58fffd19 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -166,7 +166,8 @@ int gpiochip_irqchip_add(struct gpio_chip *gpiochip,
 
 #endif /* CONFIG_GPIO_IRQCHIP */
 
-int gpiochip_request_own_desc(struct gpio_desc *desc, const char *label);
+struct gpio_desc *gpiochip_request_own_desc(struct gpio_chip *chip, u16 hwnum,
+					    const char *label);
 void gpiochip_free_own_desc(struct gpio_desc *desc);
 
 #else /* CONFIG_GPIOLIB */
-- 
cgit v1.2.3


From 7179569aeb52197fd2a9909ba226c4c9cc0e2e2a Mon Sep 17 00:00:00 2001
From: Heiko Stübner <heiko@sntech.de>
Date: Thu, 28 Aug 2014 12:36:04 -0700
Subject: regulator: core: Add REGULATOR_EVENT_PRE_VOLTAGE_CHANGE (and ABORT)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In some cases we need to know when a regulator is about to be changed.
Add a way for clients to be notified.  Note that for set_voltage() we
don't necessarily know what voltage we'll end up with, so we tell the
client what the range will be so they can prepare.

Signed-off-by: Heiko Stübner <heiko@sntech.de>
Signed-off-by: Doug Anderson <dianders@chromium.org>
Signed-off-by: Mark Brown <broonie+linaro@kernel.org>
---
 drivers/regulator/core.c           | 63 +++++++++++++++++++++++++++++++++-----
 include/linux/regulator/consumer.h | 20 ++++++++++++
 2 files changed, 76 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index a3c3785901f5..dabc8e8862c8 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -102,7 +102,7 @@ static int _regulator_disable(struct regulator_dev *rdev);
 static int _regulator_get_voltage(struct regulator_dev *rdev);
 static int _regulator_get_current_limit(struct regulator_dev *rdev);
 static unsigned int _regulator_get_mode(struct regulator_dev *rdev);
-static void _notifier_call_chain(struct regulator_dev *rdev,
+static int _notifier_call_chain(struct regulator_dev *rdev,
 				  unsigned long event, void *data);
 static int _regulator_do_set_voltage(struct regulator_dev *rdev,
 				     int min_uV, int max_uV);
@@ -2369,6 +2369,55 @@ int regulator_is_supported_voltage(struct regulator *regulator,
 }
 EXPORT_SYMBOL_GPL(regulator_is_supported_voltage);
 
+static int _regulator_call_set_voltage(struct regulator_dev *rdev,
+				       int min_uV, int max_uV,
+				       unsigned *selector)
+{
+	struct pre_voltage_change_data data;
+	int ret;
+
+	data.old_uV = _regulator_get_voltage(rdev);
+	data.min_uV = min_uV;
+	data.max_uV = max_uV;
+	ret = _notifier_call_chain(rdev, REGULATOR_EVENT_PRE_VOLTAGE_CHANGE,
+				   &data);
+	if (ret & NOTIFY_STOP_MASK)
+		return -EINVAL;
+
+	ret = rdev->desc->ops->set_voltage(rdev, min_uV, max_uV, selector);
+	if (ret >= 0)
+		return ret;
+
+	_notifier_call_chain(rdev, REGULATOR_EVENT_ABORT_VOLTAGE_CHANGE,
+			     (void *)data.old_uV);
+
+	return ret;
+}
+
+static int _regulator_call_set_voltage_sel(struct regulator_dev *rdev,
+					   int uV, unsigned selector)
+{
+	struct pre_voltage_change_data data;
+	int ret;
+
+	data.old_uV = _regulator_get_voltage(rdev);
+	data.min_uV = uV;
+	data.max_uV = uV;
+	ret = _notifier_call_chain(rdev, REGULATOR_EVENT_PRE_VOLTAGE_CHANGE,
+				   &data);
+	if (ret & NOTIFY_STOP_MASK)
+		return -EINVAL;
+
+	ret = rdev->desc->ops->set_voltage_sel(rdev, selector);
+	if (ret >= 0)
+		return ret;
+
+	_notifier_call_chain(rdev, REGULATOR_EVENT_ABORT_VOLTAGE_CHANGE,
+			     (void *)data.old_uV);
+
+	return ret;
+}
+
 static int _regulator_do_set_voltage(struct regulator_dev *rdev,
 				     int min_uV, int max_uV)
 {
@@ -2396,8 +2445,8 @@ static int _regulator_do_set_voltage(struct regulator_dev *rdev,
 	}
 
 	if (rdev->desc->ops->set_voltage) {
-		ret = rdev->desc->ops->set_voltage(rdev, min_uV, max_uV,
-						   &selector);
+		ret = _regulator_call_set_voltage(rdev, min_uV, max_uV,
+						  &selector);
 
 		if (ret >= 0) {
 			if (rdev->desc->ops->list_voltage)
@@ -2432,8 +2481,8 @@ static int _regulator_do_set_voltage(struct regulator_dev *rdev,
 				if (old_selector == selector)
 					ret = 0;
 				else
-					ret = rdev->desc->ops->set_voltage_sel(
-								rdev, ret);
+					ret = _regulator_call_set_voltage_sel(
+						rdev, best_val, selector);
 			} else {
 				ret = -EINVAL;
 			}
@@ -3079,11 +3128,11 @@ EXPORT_SYMBOL_GPL(regulator_unregister_notifier);
 /* notify regulator consumers and downstream regulator consumers.
  * Note mutex must be held by caller.
  */
-static void _notifier_call_chain(struct regulator_dev *rdev,
+static int _notifier_call_chain(struct regulator_dev *rdev,
 				  unsigned long event, void *data)
 {
 	/* call rdev chain first */
-	blocking_notifier_call_chain(&rdev->notifier, event, data);
+	return blocking_notifier_call_chain(&rdev->notifier, event, data);
 }
 
 /**
diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h
index f8a8733068a7..d347c805f923 100644
--- a/include/linux/regulator/consumer.h
+++ b/include/linux/regulator/consumer.h
@@ -93,7 +93,12 @@ struct regmap;
  * OVER_TEMP      Regulator over temp.
  * FORCE_DISABLE  Regulator forcibly shut down by software.
  * VOLTAGE_CHANGE Regulator voltage changed.
+ *                Data passed is old voltage cast to (void *).
  * DISABLE        Regulator was disabled.
+ * PRE_VOLTAGE_CHANGE   Regulator is about to have voltage changed.
+ *                      Data passed is "struct pre_voltage_change_data"
+ * ABORT_VOLTAGE_CHANGE Regulator voltage change failed for some reason.
+ *                      Data passed is old voltage cast to (void *).
  *
  * NOTE: These events can be OR'ed together when passed into handler.
  */
@@ -106,6 +111,21 @@ struct regmap;
 #define REGULATOR_EVENT_FORCE_DISABLE		0x20
 #define REGULATOR_EVENT_VOLTAGE_CHANGE		0x40
 #define REGULATOR_EVENT_DISABLE 		0x80
+#define REGULATOR_EVENT_PRE_VOLTAGE_CHANGE	0x100
+#define REGULATOR_EVENT_ABORT_VOLTAGE_CHANGE	0x200
+
+/**
+ * struct pre_voltage_change_data - Data sent with PRE_VOLTAGE_CHANGE event
+ *
+ * @old_uV: Current voltage before change.
+ * @min_uV: Min voltage we'll change to.
+ * @max_uV: Max voltage we'll change to.
+ */
+struct pre_voltage_change_data {
+	unsigned long old_uV;
+	unsigned long min_uV;
+	unsigned long max_uV;
+};
 
 struct regulator;
 
-- 
cgit v1.2.3


From 656473003bc7e056c3bbd4a4d9832dad01e86f76 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 29 Aug 2014 14:01:17 +0200
Subject: KVM: forward declare structs in kvm_types.h

Opaque KVM structs are useful for prototypes in asm/kvm_host.h, to avoid
"'struct foo' declared inside parameter list" warnings (and consequent
breakage due to conflicting types).

Move them from individual files to a generic place in linux/kvm_types.h.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/arm/include/asm/kvm_host.h     |  7 ++-----
 arch/arm64/include/asm/kvm_host.h   |  6 ++----
 arch/ia64/include/asm/kvm_host.h    |  3 ---
 arch/mips/include/asm/kvm_host.h    |  5 -----
 arch/powerpc/include/asm/kvm_host.h |  5 -----
 arch/s390/include/asm/kvm_host.h    |  5 +++--
 arch/x86/include/asm/kvm_host.h     |  4 ----
 include/linux/kvm_host.h            |  6 ------
 include/linux/kvm_types.h           | 14 ++++++++++++++
 9 files changed, 21 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 6dfb404f6c46..4843397b812c 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -19,6 +19,8 @@
 #ifndef __ARM_KVM_HOST_H__
 #define __ARM_KVM_HOST_H__
 
+#include <linux/types.h>
+#include <linux/kvm_types.h>
 #include <asm/kvm.h>
 #include <asm/kvm_asm.h>
 #include <asm/kvm_mmio.h>
@@ -40,7 +42,6 @@
 
 #include <kvm/arm_vgic.h>
 
-struct kvm_vcpu;
 u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode);
 int kvm_target_cpu(void);
 int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
@@ -149,20 +150,17 @@ struct kvm_vcpu_stat {
 	u32 halt_wakeup;
 };
 
-struct kvm_vcpu_init;
 int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
 			const struct kvm_vcpu_init *init);
 int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
 unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
 int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
-struct kvm_one_reg;
 int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
 int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
 u64 kvm_call_hyp(void *hypfn, ...);
 void force_vm_exit(const cpumask_t *mask);
 
 #define KVM_ARCH_WANT_MMU_NOTIFIER
-struct kvm;
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
 int kvm_unmap_hva_range(struct kvm *kvm,
 			unsigned long start, unsigned long end);
@@ -187,7 +185,6 @@ struct kvm_vcpu __percpu **kvm_get_running_vcpus(void);
 
 int kvm_arm_copy_coproc_indices(struct kvm_vcpu *vcpu, u64 __user *uindices);
 unsigned long kvm_arm_num_coproc_regs(struct kvm_vcpu *vcpu);
-struct kvm_one_reg;
 int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
 int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
 
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index e10c45a578e3..766147baae0b 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -22,6 +22,8 @@
 #ifndef __ARM64_KVM_HOST_H__
 #define __ARM64_KVM_HOST_H__
 
+#include <linux/types.h>
+#include <linux/kvm_types.h>
 #include <asm/kvm.h>
 #include <asm/kvm_asm.h>
 #include <asm/kvm_mmio.h>
@@ -41,7 +43,6 @@
 
 #define KVM_VCPU_MAX_FEATURES 3
 
-struct kvm_vcpu;
 int kvm_target_cpu(void);
 int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
 int kvm_arch_dev_ioctl_check_extension(long ext);
@@ -164,18 +165,15 @@ struct kvm_vcpu_stat {
 	u32 halt_wakeup;
 };
 
-struct kvm_vcpu_init;
 int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
 			const struct kvm_vcpu_init *init);
 int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
 unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
 int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
-struct kvm_one_reg;
 int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
 int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
 
 #define KVM_ARCH_WANT_MMU_NOTIFIER
-struct kvm;
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
 int kvm_unmap_hva_range(struct kvm *kvm,
 			unsigned long start, unsigned long end);
diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
index db95f570705f..fccc09d04649 100644
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -234,9 +234,6 @@ struct kvm_vm_data {
 #define KVM_REQ_PTC_G		32
 #define KVM_REQ_RESUME		33
 
-struct kvm;
-struct kvm_vcpu;
-
 struct kvm_mmio_req {
 	uint64_t addr;          /*  physical address		*/
 	uint64_t size;          /*  size in bytes		*/
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index 7a3fc67bd7f9..b93bc80ed7e7 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -96,11 +96,6 @@
 #define CAUSEB_DC			27
 #define CAUSEF_DC			(_ULCAST_(1) << 27)
 
-struct kvm;
-struct kvm_run;
-struct kvm_vcpu;
-struct kvm_interrupt;
-
 extern atomic_t kvm_mips_instance;
 extern pfn_t(*kvm_mips_gfn_to_pfn) (struct kvm *kvm, gfn_t gfn);
 extern void (*kvm_mips_release_pfn_clean) (pfn_t pfn);
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 98d9dd50d063..0e597283c5c6 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -53,7 +53,6 @@
 
 #define KVM_ARCH_WANT_MMU_NOTIFIER
 
-struct kvm;
 extern int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
 extern int kvm_unmap_hva_range(struct kvm *kvm,
 			       unsigned long start, unsigned long end);
@@ -76,10 +75,6 @@ extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
 /* Physical Address Mask - allowed range of real mode RAM access */
 #define KVM_PAM			0x0fffffffffffffffULL
 
-struct kvm;
-struct kvm_run;
-struct kvm_vcpu;
-
 struct lppaca;
 struct slb_shadow;
 struct dtl_entry;
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 773bef7614d8..d71291d3fb6f 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -13,8 +13,11 @@
 
 #ifndef ASM_KVM_HOST_H
 #define ASM_KVM_HOST_H
+
+#include <linux/types.h>
 #include <linux/hrtimer.h>
 #include <linux/interrupt.h>
+#include <linux/kvm_types.h>
 #include <linux/kvm_host.h>
 #include <linux/kvm.h>
 #include <asm/debug.h>
@@ -431,8 +434,6 @@ static inline bool kvm_is_error_hva(unsigned long addr)
 }
 
 #define ASYNC_PF_PER_VCPU	64
-struct kvm_vcpu;
-struct kvm_async_pf;
 struct kvm_arch_async_pf {
 	unsigned long pfault_token;
 };
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index ac0f90e26a0b..567fface45f8 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -99,10 +99,6 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
 
 #define ASYNC_PF_PER_VCPU 64
 
-struct kvm_vcpu;
-struct kvm;
-struct kvm_async_pf;
-
 enum kvm_reg {
 	VCPU_REGS_RAX = 0,
 	VCPU_REGS_RCX = 1,
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ebd723676633..e1cb915a1096 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -140,8 +140,6 @@ static inline bool is_error_page(struct page *page)
 #define KVM_USERSPACE_IRQ_SOURCE_ID		0
 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID	1
 
-struct kvm;
-struct kvm_vcpu;
 extern struct kmem_cache *kvm_vcpu_cache;
 
 extern spinlock_t kvm_lock;
@@ -325,8 +323,6 @@ struct kvm_kernel_irq_routing_entry {
 	struct hlist_node link;
 };
 
-struct kvm_irq_routing_table;
-
 #ifndef KVM_PRIVATE_MEM_SLOTS
 #define KVM_PRIVATE_MEM_SLOTS 0
 #endif
@@ -1036,8 +1032,6 @@ static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu)
 
 extern bool kvm_rebooting;
 
-struct kvm_device_ops;
-
 struct kvm_device {
 	struct kvm_device_ops *ops;
 	struct kvm *kvm;
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index b0bcce0ddc95..b606bb689a3e 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -17,6 +17,20 @@
 #ifndef __KVM_TYPES_H__
 #define __KVM_TYPES_H__
 
+struct kvm;
+struct kvm_async_pf;
+struct kvm_device_ops;
+struct kvm_interrupt;
+struct kvm_irq_routing_table;
+struct kvm_memory_slot;
+struct kvm_one_reg;
+struct kvm_run;
+struct kvm_userspace_memory_region;
+struct kvm_vcpu;
+struct kvm_vcpu_init;
+
+enum kvm_mr_change;
+
 #include <asm/types.h>
 
 /*
-- 
cgit v1.2.3


From 13a34e067eab24fec882e1834fbf2cc31911d474 Mon Sep 17 00:00:00 2001
From: Radim Krčmář <rkrcmar@redhat.com>
Date: Thu, 28 Aug 2014 15:13:03 +0200
Subject: KVM: remove garbage arg to *hardware_{en,dis}able
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In the beggining was on_each_cpu(), which required an unused argument to
kvm_arch_ops.hardware_{en,dis}able, but this was soon forgotten.

Remove unnecessary arguments that stem from this.

Signed-off-by: Radim KrÄmÃ¡Å™ <rkrcmar@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/arm/include/asm/kvm_host.h     |  2 +-
 arch/arm/kvm/arm.c                  |  2 +-
 arch/arm64/include/asm/kvm_host.h   |  2 +-
 arch/ia64/kvm/kvm-ia64.c            |  4 ++--
 arch/mips/include/asm/kvm_host.h    |  2 +-
 arch/mips/kvm/mips.c                |  2 +-
 arch/powerpc/include/asm/kvm_host.h |  2 +-
 arch/powerpc/kvm/powerpc.c          |  2 +-
 arch/s390/include/asm/kvm_host.h    |  2 +-
 arch/s390/kvm/kvm-s390.c            |  2 +-
 arch/x86/include/asm/kvm_host.h     |  4 ++--
 arch/x86/kvm/svm.c                  |  4 ++--
 arch/x86/kvm/vmx.c                  |  4 ++--
 arch/x86/kvm/x86.c                  | 12 ++++++------
 include/linux/kvm_host.h            |  4 ++--
 virt/kvm/kvm_main.c                 |  4 ++--
 16 files changed, 27 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index aea259e9431f..032a8538318a 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -230,7 +230,7 @@ static inline void vgic_arch_setup(const struct vgic_params *vgic)
 int kvm_perf_init(void);
 int kvm_perf_teardown(void);
 
-static inline void kvm_arch_hardware_disable(void *garbage) {}
+static inline void kvm_arch_hardware_disable(void) {}
 static inline void kvm_arch_hardware_unsetup(void) {}
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 132bb0d9c5ad..005a7b5fd0aa 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -87,7 +87,7 @@ struct kvm_vcpu __percpu **kvm_get_running_vcpus(void)
 	return &kvm_arm_running_vcpu;
 }
 
-int kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void)
 {
 	return 0;
 }
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index b5045e3e05f8..be9970a59497 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -242,7 +242,7 @@ static inline void vgic_arch_setup(const struct vgic_params *vgic)
 	}
 }
 
-static inline void kvm_arch_hardware_disable(void *garbage) {}
+static inline void kvm_arch_hardware_disable(void) {}
 static inline void kvm_arch_hardware_unsetup(void) {}
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 5e14dcaf844e..ec6b9acb6bea 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -125,7 +125,7 @@ long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 *opt_handler)
 
 static  DEFINE_SPINLOCK(vp_lock);
 
-int kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void)
 {
 	long  status;
 	long  tmp_base;
@@ -160,7 +160,7 @@ int kvm_arch_hardware_enable(void *garbage)
 	return 0;
 }
 
-void kvm_arch_hardware_disable(void *garbage)
+void kvm_arch_hardware_disable(void)
 {
 
 	long status;
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index 0b24d6622ec1..f2c249796ea8 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -762,7 +762,7 @@ extern int kvm_mips_trans_mtc0(uint32_t inst, uint32_t *opc,
 extern void kvm_mips_dump_stats(struct kvm_vcpu *vcpu);
 extern unsigned long kvm_mips_get_ramsize(struct kvm *kvm);
 
-static inline void kvm_arch_hardware_disable(void *garbage) {}
+static inline void kvm_arch_hardware_disable(void) {}
 static inline void kvm_arch_hardware_unsetup(void) {}
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_free_memslot(struct kvm *kvm,
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index 0ec7490d70bd..e3b21e51ff7e 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -77,7 +77,7 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
 	return 1;
 }
 
-int kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void)
 {
 	return 0;
 }
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 237cc0cc80a2..604000882352 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -682,7 +682,7 @@ struct kvm_vcpu_arch {
 #define __KVM_HAVE_ARCH_WQP
 #define __KVM_HAVE_CREATE_DEVICE
 
-static inline void kvm_arch_hardware_disable(void *garbage) {}
+static inline void kvm_arch_hardware_disable(void) {}
 static inline void kvm_arch_hardware_unsetup(void) {}
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_memslots_updated(struct kvm *kvm) {}
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 72c3fc085207..da505237a664 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -384,7 +384,7 @@ int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
 }
 EXPORT_SYMBOL_GPL(kvmppc_ld);
 
-int kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void)
 {
 	return 0;
 }
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index f6dd90684b97..a76a124dff48 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -452,7 +452,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
 extern int sie64a(struct kvm_s390_sie_block *, u64 *);
 extern char sie_exit;
 
-static inline void kvm_arch_hardware_disable(void *garbage) {}
+static inline void kvm_arch_hardware_disable(void) {}
 static inline void kvm_arch_check_processor_compat(void *rtn) {}
 static inline void kvm_arch_exit(void) {}
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index b8fe1ae777db..628e992eeded 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -100,7 +100,7 @@ int test_vfacility(unsigned long nr)
 }
 
 /* Section: not file related */
-int kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void)
 {
 	/* every s390 is virtualization enabled ;-) */
 	return 0;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 567fface45f8..73e4149eda33 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -661,8 +661,8 @@ struct msr_data {
 struct kvm_x86_ops {
 	int (*cpu_has_kvm_support)(void);          /* __init */
 	int (*disabled_by_bios)(void);             /* __init */
-	int (*hardware_enable)(void *dummy);
-	void (*hardware_disable)(void *dummy);
+	int (*hardware_enable)(void);
+	void (*hardware_disable)(void);
 	void (*check_processor_compatibility)(void *rtn);
 	int (*hardware_setup)(void);               /* __init */
 	void (*hardware_unsetup)(void);            /* __exit */
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 7cd230e55118..8ef704037370 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -622,7 +622,7 @@ static int has_svm(void)
 	return 1;
 }
 
-static void svm_hardware_disable(void *garbage)
+static void svm_hardware_disable(void)
 {
 	/* Make sure we clean up behind us */
 	if (static_cpu_has(X86_FEATURE_TSCRATEMSR))
@@ -633,7 +633,7 @@ static void svm_hardware_disable(void *garbage)
 	amd_pmu_disable_virt();
 }
 
-static int svm_hardware_enable(void *garbage)
+static int svm_hardware_enable(void)
 {
 
 	struct svm_cpu_data *sd;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index d70550d0bcff..671ca5edc709 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2728,7 +2728,7 @@ static void kvm_cpu_vmxon(u64 addr)
 			: "memory", "cc");
 }
 
-static int hardware_enable(void *garbage)
+static int hardware_enable(void)
 {
 	int cpu = raw_smp_processor_id();
 	u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
@@ -2792,7 +2792,7 @@ static void kvm_cpu_vmxoff(void)
 	asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
 }
 
-static void hardware_disable(void *garbage)
+static void hardware_disable(void)
 {
 	if (vmm_exclusive) {
 		vmclear_local_loaded_vmcss();
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c10408ef9ab1..a375dfc42f6a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -246,7 +246,7 @@ void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
 }
 EXPORT_SYMBOL_GPL(kvm_set_shared_msr);
 
-static void drop_user_return_notifiers(void *ignore)
+static void drop_user_return_notifiers(void)
 {
 	unsigned int cpu = smp_processor_id();
 	struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
@@ -6959,7 +6959,7 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector)
 	kvm_rip_write(vcpu, 0);
 }
 
-int kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void)
 {
 	struct kvm *kvm;
 	struct kvm_vcpu *vcpu;
@@ -6970,7 +6970,7 @@ int kvm_arch_hardware_enable(void *garbage)
 	bool stable, backwards_tsc = false;
 
 	kvm_shared_msr_cpu_online();
-	ret = kvm_x86_ops->hardware_enable(garbage);
+	ret = kvm_x86_ops->hardware_enable();
 	if (ret != 0)
 		return ret;
 
@@ -7051,10 +7051,10 @@ int kvm_arch_hardware_enable(void *garbage)
 	return 0;
 }
 
-void kvm_arch_hardware_disable(void *garbage)
+void kvm_arch_hardware_disable(void)
 {
-	kvm_x86_ops->hardware_disable(garbage);
-	drop_user_return_notifiers(garbage);
+	kvm_x86_ops->hardware_disable();
+	drop_user_return_notifiers();
 }
 
 int kvm_arch_hardware_setup(void)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index e1cb915a1096..e098dce179df 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -630,8 +630,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu);
 int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu);
 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu);
 
-int kvm_arch_hardware_enable(void *garbage);
-void kvm_arch_hardware_disable(void *garbage);
+int kvm_arch_hardware_enable(void);
+void kvm_arch_hardware_disable(void);
 int kvm_arch_hardware_setup(void);
 void kvm_arch_hardware_unsetup(void);
 void kvm_arch_check_processor_compat(void *rtn);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 1d03967def40..7176929a4cda 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2669,7 +2669,7 @@ static void hardware_enable_nolock(void *junk)
 
 	cpumask_set_cpu(cpu, cpus_hardware_enabled);
 
-	r = kvm_arch_hardware_enable(NULL);
+	r = kvm_arch_hardware_enable();
 
 	if (r) {
 		cpumask_clear_cpu(cpu, cpus_hardware_enabled);
@@ -2694,7 +2694,7 @@ static void hardware_disable_nolock(void *junk)
 	if (!cpumask_test_cpu(cpu, cpus_hardware_enabled))
 		return;
 	cpumask_clear_cpu(cpu, cpus_hardware_enabled);
-	kvm_arch_hardware_disable(NULL);
+	kvm_arch_hardware_disable();
 }
 
 static void hardware_disable(void)
-- 
cgit v1.2.3


From 10c51b56232d24f150e39884a9e749fd99cbc60c Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Wed, 27 Aug 2014 11:11:27 +0200
Subject: net: add skb_get_tx_queue() helper

Replace occurences of skb_get_queue_mapping() and follow-up
netdev_get_tx_queue() with an actual helper function.

Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 6 ++++++
 net/core/netpoll.c        | 2 +-
 net/core/pktgen.c         | 4 +---
 net/packet/af_packet.c    | 4 +---
 net/sched/sch_generic.c   | 6 ++++--
 5 files changed, 13 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 429801370d0c..dfc1d8b8bd0f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1747,6 +1747,12 @@ struct netdev_queue *netdev_get_tx_queue(const struct net_device *dev,
 	return &dev->_tx[index];
 }
 
+static inline struct netdev_queue *skb_get_tx_queue(const struct net_device *dev,
+						    const struct sk_buff *skb)
+{
+	return netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
+}
+
 static inline void netdev_for_each_tx_queue(struct net_device *dev,
 					    void (*f)(struct net_device *,
 						      struct netdev_queue *,
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index a5ad06828d67..12b1df976562 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -115,7 +115,7 @@ static void queue_process(struct work_struct *work)
 			continue;
 		}
 
-		txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
+		txq = skb_get_tx_queue(dev, skb);
 
 		local_irq_save(flags);
 		HARD_TX_LOCK(dev, txq, smp_processor_id());
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 83e2b4b19eb7..d81b540096c3 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3286,7 +3286,6 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
 {
 	struct net_device *odev = pkt_dev->odev;
 	struct netdev_queue *txq;
-	u16 queue_map;
 	int ret;
 
 	/* If device is offline, then don't send */
@@ -3324,8 +3323,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
 	if (pkt_dev->delay && pkt_dev->last_ok)
 		spin(pkt_dev, pkt_dev->next_tx);
 
-	queue_map = skb_get_queue_mapping(pkt_dev->skb);
-	txq = netdev_get_tx_queue(odev, queue_map);
+	txq = skb_get_tx_queue(odev, pkt_dev->skb);
 
 	local_bh_disable();
 
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 0dfa990d4eaa..b7a7f5a721bd 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -243,7 +243,6 @@ static int packet_direct_xmit(struct sk_buff *skb)
 	netdev_features_t features;
 	struct netdev_queue *txq;
 	int ret = NETDEV_TX_BUSY;
-	u16 queue_map;
 
 	if (unlikely(!netif_running(dev) ||
 		     !netif_carrier_ok(dev)))
@@ -254,8 +253,7 @@ static int packet_direct_xmit(struct sk_buff *skb)
 	    __skb_linearize(skb))
 		goto drop;
 
-	queue_map = skb_get_queue_mapping(skb);
-	txq = netdev_get_tx_queue(dev, queue_map);
+	txq = skb_get_tx_queue(dev, skb);
 
 	local_bh_disable();
 
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index fc04fe93c2da..05b3f5d104af 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -63,7 +63,7 @@ static inline struct sk_buff *dequeue_skb(struct Qdisc *q)
 
 	if (unlikely(skb)) {
 		/* check the reason of requeuing without tx lock first */
-		txq = netdev_get_tx_queue(txq->dev, skb_get_queue_mapping(skb));
+		txq = skb_get_tx_queue(txq->dev, skb);
 		if (!netif_xmit_frozen_or_stopped(txq)) {
 			q->gso_skb = NULL;
 			q->q.qlen--;
@@ -183,10 +183,12 @@ static inline int qdisc_restart(struct Qdisc *q)
 	skb = dequeue_skb(q);
 	if (unlikely(!skb))
 		return 0;
+
 	WARN_ON_ONCE(skb_dst_is_noref(skb));
+
 	root_lock = qdisc_lock(q);
 	dev = qdisc_dev(q);
-	txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
+	txq = skb_get_tx_queue(dev, skb);
 
 	return sch_direct_xmit(skb, q, dev, txq, root_lock);
 }
-- 
cgit v1.2.3


From b95089d00c04712a9d4655d5c638930ac24b7bd3 Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Wed, 27 Aug 2014 16:47:48 +0300
Subject: net/mlx4: Move the tunnel steering helper function to mlx4_core

Move the function which we use to set VXLAN DMFS (flow-steering) rules
from mlx4_en to mlx4_core. This refactoring will allow the mlx4_ib driver
to call the helper for the use case of user-space RAW Ethernet QPs, such
that they can serve VXLAN traffic too.

Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 31 ++-------------------
 drivers/net/ethernet/mellanox/mlx4/mcg.c       | 38 ++++++++++++++++++++++++++
 include/linux/mlx4/device.h                    |  3 ++
 3 files changed, 43 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index bb536aa613f4..abddcf8c40aa 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -474,39 +474,12 @@ static int mlx4_en_tunnel_steer_add(struct mlx4_en_priv *priv, unsigned char *ad
 				    int qpn, u64 *reg_id)
 {
 	int err;
-	struct mlx4_spec_list spec_eth_outer = { {NULL} };
-	struct mlx4_spec_list spec_vxlan     = { {NULL} };
-	struct mlx4_spec_list spec_eth_inner = { {NULL} };
-
-	struct mlx4_net_trans_rule rule = {
-		.queue_mode = MLX4_NET_TRANS_Q_FIFO,
-		.exclusive = 0,
-		.allow_loopback = 1,
-		.promisc_mode = MLX4_FS_REGULAR,
-		.priority = MLX4_DOMAIN_NIC,
-	};
-
-	__be64 mac_mask = cpu_to_be64(MLX4_MAC_MASK << 16);
 
 	if (priv->mdev->dev->caps.tunnel_offload_mode != MLX4_TUNNEL_OFFLOAD_MODE_VXLAN)
 		return 0; /* do nothing */
 
-	rule.port = priv->port;
-	rule.qpn = qpn;
-	INIT_LIST_HEAD(&rule.list);
-
-	spec_eth_outer.id = MLX4_NET_TRANS_RULE_ID_ETH;
-	memcpy(spec_eth_outer.eth.dst_mac, addr, ETH_ALEN);
-	memcpy(spec_eth_outer.eth.dst_mac_msk, &mac_mask, ETH_ALEN);
-
-	spec_vxlan.id = MLX4_NET_TRANS_RULE_ID_VXLAN;    /* any vxlan header */
-	spec_eth_inner.id = MLX4_NET_TRANS_RULE_ID_ETH;	 /* any inner eth header */
-
-	list_add_tail(&spec_eth_outer.list, &rule.list);
-	list_add_tail(&spec_vxlan.list,     &rule.list);
-	list_add_tail(&spec_eth_inner.list, &rule.list);
-
-	err = mlx4_flow_attach(priv->mdev->dev, &rule, reg_id);
+	err = mlx4_tunnel_steer_add(priv->mdev->dev, addr, priv->port, qpn,
+				    MLX4_DOMAIN_NIC, reg_id);
 	if (err) {
 		en_err(priv, "failed to add vxlan steering rule, err %d\n", err);
 		return err;
diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c
index d80e7a6fac74..ca0f98c95105 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mcg.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c
@@ -1020,6 +1020,44 @@ int mlx4_flow_detach(struct mlx4_dev *dev, u64 reg_id)
 }
 EXPORT_SYMBOL_GPL(mlx4_flow_detach);
 
+int mlx4_tunnel_steer_add(struct mlx4_dev *dev, unsigned char *addr,
+			  int port, int qpn, u16 prio, u64 *reg_id)
+{
+	int err;
+	struct mlx4_spec_list spec_eth_outer = { {NULL} };
+	struct mlx4_spec_list spec_vxlan     = { {NULL} };
+	struct mlx4_spec_list spec_eth_inner = { {NULL} };
+
+	struct mlx4_net_trans_rule rule = {
+		.queue_mode = MLX4_NET_TRANS_Q_FIFO,
+		.exclusive = 0,
+		.allow_loopback = 1,
+		.promisc_mode = MLX4_FS_REGULAR,
+	};
+
+	__be64 mac_mask = cpu_to_be64(MLX4_MAC_MASK << 16);
+
+	rule.port = port;
+	rule.qpn = qpn;
+	rule.priority = prio;
+	INIT_LIST_HEAD(&rule.list);
+
+	spec_eth_outer.id = MLX4_NET_TRANS_RULE_ID_ETH;
+	memcpy(spec_eth_outer.eth.dst_mac, addr, ETH_ALEN);
+	memcpy(spec_eth_outer.eth.dst_mac_msk, &mac_mask, ETH_ALEN);
+
+	spec_vxlan.id = MLX4_NET_TRANS_RULE_ID_VXLAN;    /* any vxlan header */
+	spec_eth_inner.id = MLX4_NET_TRANS_RULE_ID_ETH;	 /* any inner eth header */
+
+	list_add_tail(&spec_eth_outer.list, &rule.list);
+	list_add_tail(&spec_vxlan.list,     &rule.list);
+	list_add_tail(&spec_eth_inner.list, &rule.list);
+
+	err = mlx4_flow_attach(dev, &rule, reg_id);
+	return err;
+}
+EXPORT_SYMBOL(mlx4_tunnel_steer_add);
+
 int mlx4_FLOW_STEERING_IB_UC_QP_RANGE(struct mlx4_dev *dev, u32 min_range_qpn,
 				      u32 max_range_qpn)
 {
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 071f6b234604..511c6e0d21a9 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -1196,6 +1196,9 @@ int mlx4_map_sw_to_hw_steering_id(struct mlx4_dev *dev,
 				  enum mlx4_net_trans_rule_id id);
 int mlx4_hw_rule_sz(struct mlx4_dev *dev, enum mlx4_net_trans_rule_id id);
 
+int mlx4_tunnel_steer_add(struct mlx4_dev *dev, unsigned char *addr,
+			  int port, int qpn, u16 prio, u64 *reg_id);
+
 void mlx4_sync_pkey_table(struct mlx4_dev *dev, int slave, int port,
 			  int i, int val);
 
-- 
cgit v1.2.3


From 18fe8db5f2b53e4ac67b47048f24f50c57a2a759 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Thu, 28 Aug 2014 13:44:31 +0200
Subject: include/linux/cycx_x25.h: Remove unused header

The header file include/linux/cycx_x25.h does not seem to be used
anywhere. It was orphaned by 6fcdf4facb "wanrouter: delete now
orphaned header content, files/drivers". Remove it.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/cycx_x25.h | 125 -----------------------------------------------
 1 file changed, 125 deletions(-)
 delete mode 100644 include/linux/cycx_x25.h

(limited to 'include/linux')

diff --git a/include/linux/cycx_x25.h b/include/linux/cycx_x25.h
deleted file mode 100644
index 362bf19d6cf1..000000000000
--- a/include/linux/cycx_x25.h
+++ /dev/null
@@ -1,125 +0,0 @@
-#ifndef	_CYCX_X25_H
-#define	_CYCX_X25_H
-/*
-* cycx_x25.h	Cyclom X.25 firmware API definitions.
-*
-* Author:	Arnaldo Carvalho de Melo <acme@conectiva.com.br>
-*
-* Copyright:	(c) 1998-2003 Arnaldo Carvalho de Melo
-*
-* Based on sdla_x25.h by Gene Kozin <74604.152@compuserve.com>
-*
-*		This program is free software; you can redistribute it and/or
-*		modify it under the terms of the GNU General Public License
-*		as published by the Free Software Foundation; either version
-*		2 of the License, or (at your option) any later version.
-* ============================================================================
-* 2000/04/02	acme		dprintk and cycx_debug
-* 1999/01/03	acme		judicious use of data types
-* 1999/01/02	acme		#define X25_ACK_N3	0x4411
-* 1998/12/28	acme		cleanup: lot'o'things removed
-*					 commands listed,
-*					 TX25Cmd & TX25Config structs
-*					 typedef'ed
-*/
-#ifndef PACKED
-#define PACKED __attribute__((packed))
-#endif 
-
-/* X.25 shared memory layout. */
-#define	X25_MBOX_OFFS	0x300	/* general mailbox block */
-#define	X25_RXMBOX_OFFS	0x340	/* receive mailbox */
-
-/* Debug */
-#define dprintk(level, format, a...) if (cycx_debug >= level) printk(format, ##a)
-
-extern unsigned int cycx_debug;
-
-/* Data Structures */
-/* X.25 Command Block. */
-struct cycx_x25_cmd {
-	u16 command;
-	u16 link;	/* values: 0 or 1 */
-	u16 len;	/* values: 0 thru 0x205 (517) */
-	u32 buf;
-} PACKED;
-
-/* Defines for the 'command' field. */
-#define X25_CONNECT_REQUEST             0x4401
-#define X25_CONNECT_RESPONSE            0x4402
-#define X25_DISCONNECT_REQUEST          0x4403
-#define X25_DISCONNECT_RESPONSE         0x4404
-#define X25_DATA_REQUEST                0x4405
-#define X25_ACK_TO_VC			0x4406
-#define X25_INTERRUPT_RESPONSE          0x4407
-#define X25_CONFIG                      0x4408
-#define X25_CONNECT_INDICATION          0x4409
-#define X25_CONNECT_CONFIRM             0x440A
-#define X25_DISCONNECT_INDICATION       0x440B
-#define X25_DISCONNECT_CONFIRM          0x440C
-#define X25_DATA_INDICATION             0x440E
-#define X25_INTERRUPT_INDICATION        0x440F
-#define X25_ACK_FROM_VC			0x4410
-#define X25_ACK_N3			0x4411
-#define X25_CONNECT_COLLISION           0x4413
-#define X25_N3WIN                       0x4414
-#define X25_LINE_ON                     0x4415
-#define X25_LINE_OFF                    0x4416
-#define X25_RESET_REQUEST               0x4417
-#define X25_LOG                         0x4500
-#define X25_STATISTIC                   0x4600
-#define X25_TRACE                       0x4700
-#define X25_N2TRACEXC                   0x4702
-#define X25_N3TRACEXC                   0x4703
-
-/**
- *	struct cycx_x25_config - cyclom2x x25 firmware configuration
- *	@link - link number
- *	@speed - line speed
- *	@clock - internal/external
- *	@n2 - # of level 2 retransm.(values: 1 thru FF)
- *	@n2win - level 2 window (values: 1 thru 7)
- *	@n3win - level 3 window (values: 1 thru 7)
- *	@nvc - # of logical channels (values: 1 thru 64)
- *	@pktlen - level 3 packet length - log base 2 of size
- *	@locaddr - my address
- *	@remaddr - remote address
- *	@t1 - time, in seconds
- *	@t2 - time, in seconds
- *	@t21 - time, in seconds
- *	@npvc - # of permanent virt. circuits (1 thru nvc)
- *	@t23 - time, in seconds
- *	@flags - see dosx25.doc, in portuguese, for details
- */
-struct cycx_x25_config {
-	u8  link;
-	u8  speed;
-	u8  clock;
-	u8  n2;
-	u8  n2win;
-	u8  n3win;
-	u8  nvc;
-	u8  pktlen;
-	u8  locaddr;
-	u8  remaddr;
-	u16 t1;
-	u16 t2;
-	u8  t21;
-	u8  npvc;
-	u8  t23;
-	u8  flags;
-} PACKED;
-
-struct cycx_x25_stats {
-	u16 rx_crc_errors;
-	u16 rx_over_errors;
-	u16 n2_tx_frames;
-	u16 n2_rx_frames;
-	u16 tx_timeouts;
-	u16 rx_timeouts;
-	u16 n3_tx_packets;
-	u16 n3_rx_packets;
-	u16 tx_aborts;
-	u16 rx_aborts;
-} PACKED;
-#endif	/* _CYCX_X25_H */
-- 
cgit v1.2.3


From fbd74659d4513816a6249b0db491e8d831803520 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Thu, 28 Aug 2014 13:44:32 +0200
Subject: include/linux/i82593.h: Remove unused header

The header file include/linux/i82593.h does not seem to be used
anywhere. It was orphaned by 8a594170 "drivers/net: delete intel
i825xx based znet notebook driver". Remove it.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/i82593.h | 229 -------------------------------------------------
 1 file changed, 229 deletions(-)
 delete mode 100644 include/linux/i82593.h

(limited to 'include/linux')

diff --git a/include/linux/i82593.h b/include/linux/i82593.h
deleted file mode 100644
index afac5c7a323d..000000000000
--- a/include/linux/i82593.h
+++ /dev/null
@@ -1,229 +0,0 @@
-/*
- * Definitions for Intel 82593 CSMA/CD Core LAN Controller
- * The definitions are taken from the 1992 users manual with Intel
- * order number 297125-001.
- *
- * /usr/src/pc/RCS/i82593.h,v 1.1 1996/07/17 15:23:12 root Exp
- *
- * Copyright 1994, Anders Klemets <klemets@it.kth.se>
- *
- * HISTORY
- * i82593.h,v
- * Revision 1.4  2005/11/4  09:15:00  baroniunas
- * Modified copyright with permission of author as follows:
- *
- *   "If I82539.H is the only file with my copyright statement
- *    that is included in the Source Forge project, then you have
- *    my approval to change the copyright statement to be a GPL
- *    license, in the way you proposed on October 10."
- *
- * Revision 1.1  1996/07/17 15:23:12  root
- * Initial revision
- *
- * Revision 1.3  1995/04/05  15:13:58  adj
- * Initial alpha release
- *
- * Revision 1.2  1994/06/16  23:57:31  klemets
- * Mirrored all the fields in the configuration block.
- *
- * Revision 1.1  1994/06/02  20:25:34  klemets
- * Initial revision
- *
- *
- */
-#ifndef	_I82593_H
-#define	_I82593_H
-
-/* Intel 82593 CSMA/CD Core LAN Controller */
-
-/* Port 0 Command Register definitions */
-
-/* Execution operations */
-#define OP0_NOP			0	/* CHNL = 0 */
-#define OP0_SWIT_TO_PORT_1	0	/* CHNL = 1 */
-#define OP0_IA_SETUP		1
-#define OP0_CONFIGURE		2
-#define OP0_MC_SETUP		3
-#define OP0_TRANSMIT		4
-#define OP0_TDR			5
-#define OP0_DUMP		6
-#define OP0_DIAGNOSE		7
-#define OP0_TRANSMIT_NO_CRC	9
-#define OP0_RETRANSMIT		12
-#define OP0_ABORT		13
-/* Reception operations */
-#define OP0_RCV_ENABLE		8
-#define OP0_RCV_DISABLE		10
-#define OP0_STOP_RCV		11
-/* Status pointer control operations */
-#define OP0_FIX_PTR		15	/* CHNL = 1 */
-#define OP0_RLS_PTR		15	/* CHNL = 0 */
-#define OP0_RESET		14
-
-#define CR0_CHNL		(1 << 4)	/* 0=Channel 0, 1=Channel 1 */
-#define CR0_STATUS_0		0x00
-#define CR0_STATUS_1		0x20
-#define CR0_STATUS_2		0x40
-#define CR0_STATUS_3		0x60
-#define CR0_INT_ACK		(1 << 7)	/* 0=No ack, 1=acknowledge */
-
-/* Port 0 Status Register definitions */
-
-#define SR0_NO_RESULT		0		/* dummy */
-#define SR0_EVENT_MASK		0x0f
-#define SR0_IA_SETUP_DONE	1
-#define SR0_CONFIGURE_DONE	2
-#define SR0_MC_SETUP_DONE	3
-#define SR0_TRANSMIT_DONE	4
-#define SR0_TDR_DONE		5
-#define SR0_DUMP_DONE		6
-#define SR0_DIAGNOSE_PASSED	7
-#define SR0_TRANSMIT_NO_CRC_DONE 9
-#define SR0_RETRANSMIT_DONE	12
-#define SR0_EXECUTION_ABORTED	13
-#define SR0_END_OF_FRAME	8
-#define SR0_RECEPTION_ABORTED	10
-#define SR0_DIAGNOSE_FAILED	15
-#define SR0_STOP_REG_HIT	11
-
-#define SR0_CHNL		(1 << 4)
-#define SR0_EXECUTION		(1 << 5)
-#define SR0_RECEPTION		(1 << 6)
-#define SR0_INTERRUPT		(1 << 7)
-#define SR0_BOTH_RX_TX		(SR0_EXECUTION | SR0_RECEPTION)
-
-#define SR3_EXEC_STATE_MASK	0x03
-#define SR3_EXEC_IDLE		0
-#define SR3_TX_ABORT_IN_PROGRESS 1
-#define SR3_EXEC_ACTIVE		2
-#define SR3_ABORT_IN_PROGRESS	3
-#define SR3_EXEC_CHNL		(1 << 2)
-#define SR3_STP_ON_NO_RSRC	(1 << 3)
-#define SR3_RCVING_NO_RSRC	(1 << 4)
-#define SR3_RCV_STATE_MASK	0x60
-#define SR3_RCV_IDLE		0x00
-#define SR3_RCV_READY		0x20
-#define SR3_RCV_ACTIVE		0x40
-#define SR3_RCV_STOP_IN_PROG	0x60
-#define SR3_RCV_CHNL		(1 << 7)
-
-/* Port 1 Command Register definitions */
-
-#define OP1_NOP			0
-#define OP1_SWIT_TO_PORT_0	1
-#define OP1_INT_DISABLE		2
-#define OP1_INT_ENABLE		3
-#define OP1_SET_TS		5
-#define OP1_RST_TS		7
-#define OP1_POWER_DOWN		8
-#define OP1_RESET_RING_MNGMT	11
-#define OP1_RESET		14
-#define OP1_SEL_RST		15
-
-#define CR1_STATUS_4		0x00
-#define CR1_STATUS_5		0x20
-#define CR1_STATUS_6		0x40
-#define CR1_STOP_REG_UPDATE	(1 << 7)
-
-/* Receive frame status bits */
-
-#define	RX_RCLD			(1 << 0)
-#define RX_IA_MATCH		(1 << 1)
-#define	RX_NO_AD_MATCH		(1 << 2)
-#define RX_NO_SFD		(1 << 3)
-#define RX_SRT_FRM		(1 << 7)
-#define RX_OVRRUN		(1 << 8)
-#define RX_ALG_ERR		(1 << 10)
-#define RX_CRC_ERR		(1 << 11)
-#define RX_LEN_ERR		(1 << 12)
-#define RX_RCV_OK		(1 << 13)
-#define RX_TYP_LEN		(1 << 15)
-
-/* Transmit status bits */
-
-#define TX_NCOL_MASK		0x0f
-#define TX_FRTL			(1 << 4)
-#define TX_MAX_COL		(1 << 5)
-#define TX_HRT_BEAT		(1 << 6)
-#define TX_DEFER		(1 << 7)
-#define TX_UND_RUN		(1 << 8)
-#define TX_LOST_CTS		(1 << 9)
-#define TX_LOST_CRS		(1 << 10)
-#define TX_LTCOL		(1 << 11)
-#define TX_OK			(1 << 13)
-#define TX_COLL			(1 << 15)
-
-struct i82593_conf_block {
-  u_char fifo_limit : 4,
-  	 forgnesi   : 1,
-  	 fifo_32    : 1,
-  	 d6mod      : 1,
-  	 throttle_enb : 1;
-  u_char throttle   : 6,
-	 cntrxint   : 1,
-	 contin	    : 1;
-  u_char addr_len   : 3,
-  	 acloc 	    : 1,
- 	 preamb_len : 2,
-  	 loopback   : 2;
-  u_char lin_prio   : 3,
-	 tbofstop   : 1,
-	 exp_prio   : 3,
-	 bof_met    : 1;
-  u_char	    : 4,
-	 ifrm_spc   : 4;
-  u_char	    : 5,
-	 slottim_low : 3;
-  u_char slottim_hi : 3,
-		    : 1,
-	 max_retr   : 4;
-  u_char prmisc     : 1,
-	 bc_dis     : 1,
-  		    : 1,
-	 crs_1	    : 1,
-	 nocrc_ins  : 1,
-	 crc_1632   : 1,
-  	 	    : 1,
-  	 crs_cdt    : 1;
-  u_char cs_filter  : 3,
-	 crs_src    : 1,
-	 cd_filter  : 3,
-		    : 1;
-  u_char	    : 2,
-  	 min_fr_len : 6;
-  u_char lng_typ    : 1,
-	 lng_fld    : 1,
-	 rxcrc_xf   : 1,
-	 artx	    : 1,
-	 sarec	    : 1,
-	 tx_jabber  : 1,	/* why is this called max_len in the manual? */
-	 hash_1	    : 1,
-  	 lbpkpol    : 1;
-  u_char	    : 6,
-  	 fdx	    : 1,
-  	  	    : 1;
-  u_char dummy_6    : 6,	/* supposed to be ones */
-  	 mult_ia    : 1,
-  	 dis_bof    : 1;
-  u_char dummy_1    : 1,	/* supposed to be one */
-	 tx_ifs_retrig : 2,
-	 mc_all     : 1,
-	 rcv_mon    : 2,
-	 frag_acpt  : 1,
-  	 tstrttrs   : 1;
-  u_char fretx	    : 1,
-	 runt_eop   : 1,
-	 hw_sw_pin  : 1,
-	 big_endn   : 1,
-	 syncrqs    : 1,
-	 sttlen     : 1,
-	 tx_eop     : 1,
-  	 rx_eop	    : 1;
-  u_char rbuf_size  : 5,
-	 rcvstop    : 1,
-  	 	    : 2;
-};
-
-#define I82593_MAX_MULTICAST_ADDRESSES	128	/* Hardware hashed filter */
-
-#endif /* _I82593_H */
-- 
cgit v1.2.3


From 6fb7c3778f0fba0bad099c30e834c413c4f8bcb5 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Thu, 28 Aug 2014 13:44:33 +0200
Subject: include/linux/phonedev.h: Remove unused header

The header file include/linux/phonedev.h does not seem to be used
anywhere. It was orphaned by 7326446c "Staging: remove telephony
drivers". Remove it.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phonedev.h | 25 -------------------------
 1 file changed, 25 deletions(-)
 delete mode 100644 include/linux/phonedev.h

(limited to 'include/linux')

diff --git a/include/linux/phonedev.h b/include/linux/phonedev.h
deleted file mode 100644
index 4269de99e320..000000000000
--- a/include/linux/phonedev.h
+++ /dev/null
@@ -1,25 +0,0 @@
-#ifndef __LINUX_PHONEDEV_H
-#define __LINUX_PHONEDEV_H
-
-#include <linux/types.h>
-
-#ifdef __KERNEL__
-
-#include <linux/poll.h>
-
-struct phone_device {
-	struct phone_device *next;
-	const struct file_operations *f_op;
-	int (*open) (struct phone_device *, struct file *);
-	int board;		/* Device private index */
-	int minor;
-};
-
-extern int phonedev_init(void);
-#define PHONE_MAJOR	100
-extern int phone_register_device(struct phone_device *, int unit);
-#define PHONE_UNIT_ANY	-1
-extern void phone_unregister_device(struct phone_device *);
-
-#endif
-#endif
-- 
cgit v1.2.3


From de20fe8e2cc3c4ca13fdb529e6720d9d199333fe Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Wed, 27 Aug 2014 21:26:35 -0700
Subject: net: Allocate a new 16 bits for flags in skbuff

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index b69b7b512c06..3c9574c80933 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -598,6 +598,10 @@ struct sk_buff {
 		__u32		reserved_tailroom;
 	};
 
+	kmemcheck_bitfield_begin(flags3);
+	/* 16 bit hole */
+	kmemcheck_bitfield_end(flags3);
+
 	__be16			inner_protocol;
 	__u16			inner_transport_header;
 	__u16			inner_network_header;
-- 
cgit v1.2.3


From 77cffe23c1f88835f6bd7b47bfa0c060c2969828 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Wed, 27 Aug 2014 21:26:46 -0700
Subject: net: Clarification of CHECKSUM_UNNECESSARY

This patch:
 - Clarifies the specific requirements of devices returning
   CHECKSUM_UNNECESSARY (comments in skbuff.h).
 - Adds csum_level field to skbuff. This is used to express how
   many checksums are covered by CHECKSUM_UNNECESSARY (stores n - 1).
   This replaces the overloading of skb->encapsulation, that field is
   is now only used to indicate inner headers are valid.
 - Change __skb_checksum_validate_needed to "consume" each checksum
   as indicated by csum_level as layers of the the packet are parsed.
 - Remove skb_pop_rcv_encapsulation, no longer needed in the new
   csum_level model.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c    |  2 --
 include/linux/skbuff.h | 74 ++++++++++++++++++++++++++++++++++----------------
 net/ipv4/gre_demux.c   |  1 -
 3 files changed, 51 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index beb377b2d4b7..67527f3d3be2 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1158,8 +1158,6 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
 	if (!vs)
 		goto drop;
 
-	skb_pop_rcv_encapsulation(skb);
-
 	vs->rcv(vs, skb, vxh->vx_vni);
 	return 0;
 
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 3c9574c80933..c93b5859a772 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -47,11 +47,29 @@
  *
  *   The hardware you're dealing with doesn't calculate the full checksum
  *   (as in CHECKSUM_COMPLETE), but it does parse headers and verify checksums
- *   for specific protocols e.g. TCP/UDP/SCTP, then, for such packets it will
- *   set CHECKSUM_UNNECESSARY if their checksums are okay. skb->csum is still
- *   undefined in this case though. It is a bad option, but, unfortunately,
- *   nowadays most vendors do this. Apparently with the secret goal to sell
- *   you new devices, when you will add new protocol to your host, f.e. IPv6 8)
+ *   for specific protocols. For such packets it will set CHECKSUM_UNNECESSARY
+ *   if their checksums are okay. skb->csum is still undefined in this case
+ *   though. It is a bad option, but, unfortunately, nowadays most vendors do
+ *   this. Apparently with the secret goal to sell you new devices, when you
+ *   will add new protocol to your host, f.e. IPv6 8)
+ *
+ *   CHECKSUM_UNNECESSARY is applicable to following protocols:
+ *     TCP: IPv6 and IPv4.
+ *     UDP: IPv4 and IPv6. A device may apply CHECKSUM_UNNECESSARY to a
+ *       zero UDP checksum for either IPv4 or IPv6, the networking stack
+ *       may perform further validation in this case.
+ *     GRE: only if the checksum is present in the header.
+ *     SCTP: indicates the CRC in SCTP header has been validated.
+ *
+ *   skb->csum_level indicates the number of consecutive checksums found in
+ *   the packet minus one that have been verified as CHECKSUM_UNNECESSARY.
+ *   For instance if a device receives an IPv6->UDP->GRE->IPv4->TCP packet
+ *   and a device is able to verify the checksums for UDP (possibly zero),
+ *   GRE (checksum flag is set), and TCP-- skb->csum_level would be set to
+ *   two. If the device were only able to verify the UDP checksum and not
+ *   GRE, either because it doesn't support GRE checksum of because GRE
+ *   checksum is bad, skb->csum_level would be set to zero (TCP checksum is
+ *   not considered in this case).
  *
  * CHECKSUM_COMPLETE:
  *
@@ -112,6 +130,9 @@
 #define CHECKSUM_COMPLETE	2
 #define CHECKSUM_PARTIAL	3
 
+/* Maximum value in skb->csum_level */
+#define SKB_MAX_CSUM_LEVEL	3
+
 #define SKB_DATA_ALIGN(X)	ALIGN(X, SMP_CACHE_BYTES)
 #define SKB_WITH_OVERHEAD(X)	\
 	((X) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
@@ -571,11 +592,7 @@ struct sk_buff {
 	__u8			wifi_acked:1;
 	__u8			no_fcs:1;
 	__u8			head_frag:1;
-	/* Encapsulation protocol and NIC drivers should use
-	 * this flag to indicate to each other if the skb contains
-	 * encapsulated packet or not and maybe use the inner packet
-	 * headers if needed
-	 */
+	/* Indicates the inner headers are valid in the skbuff. */
 	__u8			encapsulation:1;
 	__u8			encap_hdr_csum:1;
 	__u8			csum_valid:1;
@@ -599,7 +616,8 @@ struct sk_buff {
 	};
 
 	kmemcheck_bitfield_begin(flags3);
-	/* 16 bit hole */
+	__u8			csum_level:2;
+	/* 14 bit hole */
 	kmemcheck_bitfield_end(flags3);
 
 	__be16			inner_protocol;
@@ -1866,18 +1884,6 @@ static inline int pskb_network_may_pull(struct sk_buff *skb, unsigned int len)
 	return pskb_may_pull(skb, skb_network_offset(skb) + len);
 }
 
-static inline void skb_pop_rcv_encapsulation(struct sk_buff *skb)
-{
-	/* Only continue with checksum unnecessary if device indicated
-	 * it is valid across encapsulation (skb->encapsulation was set).
-	 */
-	if (skb->ip_summed == CHECKSUM_UNNECESSARY && !skb->encapsulation)
-		skb->ip_summed = CHECKSUM_NONE;
-
-	skb->encapsulation = 0;
-	skb->csum_valid = 0;
-}
-
 /*
  * CPUs often take a performance hit when accessing unaligned memory
  * locations. The actual performance hit varies, it can be small if the
@@ -2798,6 +2804,27 @@ static inline __sum16 skb_checksum_complete(struct sk_buff *skb)
 	       0 : __skb_checksum_complete(skb);
 }
 
+static inline void __skb_decr_checksum_unnecessary(struct sk_buff *skb)
+{
+	if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
+		if (skb->csum_level == 0)
+			skb->ip_summed = CHECKSUM_NONE;
+		else
+			skb->csum_level--;
+	}
+}
+
+static inline void __skb_incr_checksum_unnecessary(struct sk_buff *skb)
+{
+	if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
+		if (skb->csum_level < SKB_MAX_CSUM_LEVEL)
+			skb->csum_level++;
+	} else if (skb->ip_summed == CHECKSUM_NONE) {
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+		skb->csum_level = 0;
+	}
+}
+
 /* Check if we need to perform checksum complete validation.
  *
  * Returns true if checksum complete is needed, false otherwise
@@ -2809,6 +2836,7 @@ static inline bool __skb_checksum_validate_needed(struct sk_buff *skb,
 {
 	if (skb_csum_unnecessary(skb) || (zero_okay && !check)) {
 		skb->csum_valid = 1;
+		__skb_decr_checksum_unnecessary(skb);
 		return false;
 	}
 
diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c
index 7c1a8ff974dd..0485bf7f8f03 100644
--- a/net/ipv4/gre_demux.c
+++ b/net/ipv4/gre_demux.c
@@ -125,7 +125,6 @@ static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
 			*csum_err = true;
 			return -EINVAL;
 		}
-		skb_pop_rcv_encapsulation(skb);
 		options++;
 	}
 
-- 
cgit v1.2.3


From 662880f4420340aad4f9a62a349c6c9d4faa1a5d Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Wed, 27 Aug 2014 21:26:56 -0700
Subject: net: Allow GRO to use and set levels of checksum unnecessary

Allow GRO path to "consume" checksums provided in CHECKSUM_UNNECESSARY
and to report new checksums verfied for use in fallback to normal
path.

Change GRO checksum path to track csum_level using a csum_cnt field
in NAPI_GRO_CB. On GRO initialization, if ip_summed is
CHECKSUM_UNNECESSARY set NAPI_GRO_CB(skb)->csum_cnt to
skb->csum_level + 1. For each checksum verified, decrement
NAPI_GRO_CB(skb)->csum_cnt while its greater than zero. If a checksum
is verfied and NAPI_GRO_CB(skb)->csum_cnt == 0, we have verified a
deeper checksum than originally indicated in skbuf so increment
csum_level (or initialize to CHECKSUM_UNNECESSARY if ip_summed is
CHECKSUM_NONE or CHECKSUM_COMPLETE).

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 26 ++++++++++++--------------
 net/core/dev.c            | 24 ++++++++++++++++--------
 net/ipv4/gre_offload.c    |  7 ++-----
 net/ipv4/udp_offload.c    |  5 +++--
 4 files changed, 33 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index dfc1d8b8bd0f..456eb1fe51e8 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1883,8 +1883,8 @@ struct napi_gro_cb {
 	/* GRO checksum is valid */
 	u8	csum_valid:1;
 
-	/* Number encapsulation layers crossed */
-	u8	encapsulation;
+	/* Number of checksums via CHECKSUM_UNNECESSARY */
+	u8	csum_cnt:3;
 
 	/* used to support CHECKSUM_COMPLETE for tunneling protocols */
 	__wsum	csum;
@@ -2179,8 +2179,7 @@ static inline bool __skb_gro_checksum_validate_needed(struct sk_buff *skb,
 						      __sum16 check)
 {
 	return (skb->ip_summed != CHECKSUM_PARTIAL &&
-		(skb->ip_summed != CHECKSUM_UNNECESSARY ||
-		 (NAPI_GRO_CB(skb)->encapsulation > skb->encapsulation)) &&
+		NAPI_GRO_CB(skb)->csum_cnt == 0 &&
 		(!zero_okay || check));
 }
 
@@ -2196,18 +2195,17 @@ static inline __sum16 __skb_gro_checksum_validate_complete(struct sk_buff *skb,
 	return __skb_gro_checksum_complete(skb);
 }
 
-/* Update skb for CHECKSUM_UNNECESSARY when we verified a top level
- * checksum or an encapsulated one during GRO. This saves work
- * if we fallback to normal path with the packet.
- */
 static inline void skb_gro_incr_csum_unnecessary(struct sk_buff *skb)
 {
-	if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
-		if (NAPI_GRO_CB(skb)->encapsulation)
-			skb->encapsulation = 1;
-	} else if (skb->ip_summed != CHECKSUM_PARTIAL) {
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
-		skb->encapsulation = 0;
+	if (NAPI_GRO_CB(skb)->csum_cnt > 0) {
+		/* Consume a checksum from CHECKSUM_UNNECESSARY */
+		NAPI_GRO_CB(skb)->csum_cnt--;
+	} else {
+		/* Update skb for CHECKSUM_UNNECESSARY and csum_level when we
+		 * verified a new top level checksum or an encapsulated one
+		 * during GRO. This saves work if we fallback to normal path.
+		 */
+		__skb_incr_checksum_unnecessary(skb);
 	}
 }
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 26d296c2447c..a6077ef56345 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3962,13 +3962,6 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 
 	gro_list_prepare(napi, skb);
 
-	if (skb->ip_summed == CHECKSUM_COMPLETE) {
-		NAPI_GRO_CB(skb)->csum = skb->csum;
-		NAPI_GRO_CB(skb)->csum_valid = 1;
-	} else {
-		NAPI_GRO_CB(skb)->csum_valid = 0;
-	}
-
 	rcu_read_lock();
 	list_for_each_entry_rcu(ptype, head, list) {
 		if (ptype->type != type || !ptype->callbacks.gro_receive)
@@ -3980,7 +3973,22 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 		NAPI_GRO_CB(skb)->flush = 0;
 		NAPI_GRO_CB(skb)->free = 0;
 		NAPI_GRO_CB(skb)->udp_mark = 0;
-		NAPI_GRO_CB(skb)->encapsulation = 0;
+
+		/* Setup for GRO checksum validation */
+		switch (skb->ip_summed) {
+		case CHECKSUM_COMPLETE:
+			NAPI_GRO_CB(skb)->csum = skb->csum;
+			NAPI_GRO_CB(skb)->csum_valid = 1;
+			NAPI_GRO_CB(skb)->csum_cnt = 0;
+			break;
+		case CHECKSUM_UNNECESSARY:
+			NAPI_GRO_CB(skb)->csum_cnt = skb->csum_level + 1;
+			NAPI_GRO_CB(skb)->csum_valid = 0;
+			break;
+		default:
+			NAPI_GRO_CB(skb)->csum_cnt = 0;
+			NAPI_GRO_CB(skb)->csum_valid = 0;
+		}
 
 		pp = ptype->callbacks.gro_receive(&napi->gro_list, skb);
 		break;
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index d1bd16937d93..a4d7965fb880 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -172,12 +172,9 @@ static struct sk_buff **gre_gro_receive(struct sk_buff **head,
 	}
 
 	/* Don't bother verifying checksum if we're going to flush anyway. */
-	if (greh->flags & GRE_CSUM) {
-		if (!NAPI_GRO_CB(skb)->flush &&
-		    skb_gro_checksum_simple_validate(skb))
+	if ((greh->flags & GRE_CSUM) && !NAPI_GRO_CB(skb)->flush &&
+	    skb_gro_checksum_simple_validate(skb))
 			goto out_unlock;
-		NAPI_GRO_CB(skb)->encapsulation++;
-	}
 
 	flush = 0;
 
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 8ed460e3753c..a6adff98382a 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -238,12 +238,13 @@ struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb,
 	int flush = 1;
 
 	if (NAPI_GRO_CB(skb)->udp_mark ||
-	    (!skb->encapsulation && !NAPI_GRO_CB(skb)->csum_valid))
+	    (skb->ip_summed != CHECKSUM_PARTIAL &&
+	     NAPI_GRO_CB(skb)->csum_cnt == 0 &&
+	     !NAPI_GRO_CB(skb)->csum_valid))
 		goto out;
 
 	/* mark that this skb passed once through the udp gro layer */
 	NAPI_GRO_CB(skb)->udp_mark = 1;
-	NAPI_GRO_CB(skb)->encapsulation++;
 
 	rcu_read_lock();
 	uo_priv = rcu_dereference(udp_offload_base);
-- 
cgit v1.2.3


From 4e00517945bed110f1b8de580cce97626e9ef0b5 Mon Sep 17 00:00:00 2001
From: Robert Jarzmik <robert.jarzmik@free.fr>
Date: Sun, 31 Aug 2014 21:10:51 +0200
Subject: regulator: max1586: add device-tree support

Add device-tree support to max1586.
The driver can still be used with the legacy platform data, or the new
device-tree way.

This work is heavily inspired by the device-tree support of its cousin
max8660 driver.

Signed-off-by: Robert Jarzmik <robert.jarzmik@free.fr>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/max1586.c       | 81 ++++++++++++++++++++++++++++++++++++++-
 include/linux/regulator/max1586.h |  2 +-
 2 files changed, 80 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/max1586.c b/drivers/regulator/max1586.c
index d23d0577754b..5c04a7159baa 100644
--- a/drivers/regulator/max1586.c
+++ b/drivers/regulator/max1586.c
@@ -24,6 +24,8 @@
 #include <linux/regulator/driver.h>
 #include <linux/slab.h>
 #include <linux/regulator/max1586.h>
+#include <linux/of_device.h>
+#include <linux/regulator/of_regulator.h>
 
 #define MAX1586_V3_MAX_VSEL 31
 #define MAX1586_V6_MAX_VSEL 3
@@ -157,13 +159,87 @@ static struct regulator_desc max1586_reg[] = {
 	},
 };
 
+int of_get_max1586_platform_data(struct device *dev,
+				 struct max1586_platform_data *pdata)
+{
+	struct max1586_subdev_data *sub;
+	struct of_regulator_match rmatch[ARRAY_SIZE(max1586_reg)];
+	struct device_node *np = dev->of_node;
+	int i, matched;
+
+	if (of_property_read_u32(np, "v3-gain",
+				 &pdata->v3_gain) < 0) {
+		dev_err(dev, "%s has no 'v3-gain' property\n", np->full_name);
+		return -EINVAL;
+	}
+
+	np = of_get_child_by_name(np, "regulators");
+	if (!np) {
+		dev_err(dev, "missing 'regulators' subnode in DT\n");
+		return -EINVAL;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(rmatch); i++)
+		rmatch[i].name = max1586_reg[i].name;
+
+	matched = of_regulator_match(dev, np, rmatch, ARRAY_SIZE(rmatch));
+	of_node_put(np);
+	/*
+	 * If matched is 0, ie. neither Output_V3 nor Output_V6 have been found,
+	 * return 0, which signals the normal situation where no subregulator is
+	 * available. This is normal because the max1586 doesn't provide any
+	 * readback support, so the subregulators can't report any status
+	 * anyway.  If matched < 0, return the error.
+	 */
+	if (matched <= 0)
+		return matched;
+
+	pdata->subdevs = devm_kzalloc(dev, sizeof(struct max1586_subdev_data) *
+						matched, GFP_KERNEL);
+	if (!pdata->subdevs)
+		return -ENOMEM;
+
+	pdata->num_subdevs = matched;
+	sub = pdata->subdevs;
+
+	for (i = 0; i < matched; i++) {
+		sub->id = i;
+		sub->name = rmatch[i].of_node->name;
+		sub->platform_data = rmatch[i].init_data;
+		sub++;
+	}
+
+	return 0;
+}
+
+static const struct of_device_id max1586_of_match[] = {
+	{ .compatible = "maxim,max1586", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, max1586_of_match);
+
 static int max1586_pmic_probe(struct i2c_client *client,
 					const struct i2c_device_id *i2c_id)
 {
-	struct max1586_platform_data *pdata = dev_get_platdata(&client->dev);
+	struct max1586_platform_data *pdata, pdata_of;
 	struct regulator_config config = { };
 	struct max1586_data *max1586;
-	int i, id;
+	int i, id, ret;
+	const struct of_device_id *match;
+
+	pdata = dev_get_platdata(&client->dev);
+	if (client->dev.of_node && !pdata) {
+		match = of_match_device(of_match_ptr(max1586_of_match),
+					&client->dev);
+		if (!match) {
+			dev_err(&client->dev, "Error: No device match found\n");
+			return -ENODEV;
+		}
+		ret = of_get_max1586_platform_data(&client->dev, &pdata_of);
+		if (ret < 0)
+			return ret;
+		pdata = &pdata_of;
+	}
 
 	max1586 = devm_kzalloc(&client->dev, sizeof(struct max1586_data),
 			GFP_KERNEL);
@@ -229,6 +305,7 @@ static struct i2c_driver max1586_pmic_driver = {
 	.driver		= {
 		.name	= "max1586",
 		.owner	= THIS_MODULE,
+		.of_match_table = of_match_ptr(max1586_of_match),
 	},
 	.id_table	= max1586_id,
 };
diff --git a/include/linux/regulator/max1586.h b/include/linux/regulator/max1586.h
index de9a7fae20be..cedd0febe882 100644
--- a/include/linux/regulator/max1586.h
+++ b/include/linux/regulator/max1586.h
@@ -40,7 +40,7 @@
  */
 struct max1586_subdev_data {
 	int				id;
-	char				*name;
+	const char			*name;
 	struct regulator_init_data	*platform_data;
 };
 
-- 
cgit v1.2.3


From 068765ba7987e73d4381edfe47b70aa121c7155c Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 1 Sep 2014 13:47:49 +0200
Subject: PM / sleep: Mechanism for aborting system suspends unconditionally

It sometimes may be necessary to abort a system suspend in
progress or wake up the system from suspend-to-idle even if the
pm_wakeup_event()/pm_stay_awake() mechanism is not enabled.

For this purpose, introduce a new global variable pm_abort_suspend
and make pm_wakeup_pending() check its value.  Also add routines
for manipulating that variable.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/wakeup.c | 16 +++++++++++++++-
 include/linux/suspend.h     |  4 ++++
 kernel/power/process.c      |  1 +
 3 files changed, 20 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
index eb1bd2ecad8b..c2744b30d5d9 100644
--- a/drivers/base/power/wakeup.c
+++ b/drivers/base/power/wakeup.c
@@ -24,6 +24,9 @@
  */
 bool events_check_enabled __read_mostly;
 
+/* If set and the system is suspending, terminate the suspend. */
+static bool pm_abort_suspend __read_mostly;
+
 /*
  * Combined counters of registered wakeup events and wakeup events in progress.
  * They need to be modified together atomically, so it's better to use one
@@ -719,7 +722,18 @@ bool pm_wakeup_pending(void)
 		pm_print_active_wakeup_sources();
 	}
 
-	return ret;
+	return ret || pm_abort_suspend;
+}
+
+void pm_system_wakeup(void)
+{
+	pm_abort_suspend = true;
+	freeze_wake();
+}
+
+void pm_wakeup_clear(void)
+{
+	pm_abort_suspend = false;
 }
 
 /**
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 519064e0c943..06a9910827c2 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -371,6 +371,8 @@ extern int unregister_pm_notifier(struct notifier_block *nb);
 extern bool events_check_enabled;
 
 extern bool pm_wakeup_pending(void);
+extern void pm_system_wakeup(void);
+extern void pm_wakeup_clear(void);
 extern bool pm_get_wakeup_count(unsigned int *count, bool block);
 extern bool pm_save_wakeup_count(unsigned int count);
 extern void pm_wakep_autosleep_enabled(bool set);
@@ -418,6 +420,8 @@ static inline int unregister_pm_notifier(struct notifier_block *nb)
 #define pm_notifier(fn, pri)	do { (void)(fn); } while (0)
 
 static inline bool pm_wakeup_pending(void) { return false; }
+static inline void pm_system_wakeup(void) {}
+static inline void pm_wakeup_clear(void) {}
 
 static inline void lock_system_sleep(void) {}
 static inline void unlock_system_sleep(void) {}
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 4ee194eb524b..7b323221b9ee 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -129,6 +129,7 @@ int freeze_processes(void)
 	if (!pm_freezing)
 		atomic_inc(&system_freezing_cnt);
 
+	pm_wakeup_clear();
 	printk("Freezing user space processes ... ");
 	pm_freezing = true;
 	error = try_to_freeze_tasks(true);
-- 
cgit v1.2.3


From cab303be91dc47942bc25de33dc1140123540800 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 28 Aug 2014 11:44:31 +0200
Subject: genirq: Add sanity checks for PM options on shared interrupt lines

Account the IRQF_NO_SUSPEND and IRQF_RESUME_EARLY actions on shared
interrupt lines and yell loudly if there is a mismatch.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/irqdesc.h | 10 ++++++++++
 kernel/irq/internals.h  | 10 ++++++++++
 kernel/irq/manage.c     |  4 ++++
 kernel/irq/pm.c         | 36 ++++++++++++++++++++++++++++++++++++
 4 files changed, 60 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index 472c021a2d4f..cb1a31e448ae 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -36,6 +36,11 @@ struct irq_desc;
  * @threads_oneshot:	bitfield to handle shared oneshot threads
  * @threads_active:	number of irqaction threads currently running
  * @wait_for_threads:	wait queue for sync_irq to wait for threaded handlers
+ * @nr_actions:		number of installed actions on this descriptor
+ * @no_suspend_depth:	number of irqactions on a irq descriptor with
+ *			IRQF_NO_SUSPEND set
+ * @force_resume_depth:	number of irqactions on a irq descriptor with
+ *			IRQF_FORCE_RESUME set
  * @dir:		/proc/irq/ procfs entry
  * @name:		flow handler name for /proc/interrupts output
  */
@@ -68,6 +73,11 @@ struct irq_desc {
 	unsigned long		threads_oneshot;
 	atomic_t		threads_active;
 	wait_queue_head_t       wait_for_threads;
+#ifdef CONFIG_PM_SLEEP
+	unsigned int		nr_actions;
+	unsigned int		no_suspend_depth;
+	unsigned int		force_resume_depth;
+#endif
 #ifdef CONFIG_PROC_FS
 	struct proc_dir_entry	*dir;
 #endif
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index af2821178900..c402502a5111 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -194,3 +194,13 @@ static inline void kstat_incr_irqs_this_cpu(unsigned int irq, struct irq_desc *d
 	__this_cpu_inc(*desc->kstat_irqs);
 	__this_cpu_inc(kstat.irqs_sum);
 }
+
+#ifdef CONFIG_PM_SLEEP
+void irq_pm_install_action(struct irq_desc *desc, struct irqaction *action);
+void irq_pm_remove_action(struct irq_desc *desc, struct irqaction *action);
+#else
+static inline void
+irq_pm_install_action(struct irq_desc *desc, struct irqaction *action) { }
+static inline void
+irq_pm_remove_action(struct irq_desc *desc, struct irqaction *action) { }
+#endif
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index fa564e8db996..0a9104b4608b 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1200,6 +1200,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 	new->irq = irq;
 	*old_ptr = new;
 
+	irq_pm_install_action(desc, new);
+
 	/* Reset broken irq detection when installing new handler */
 	desc->irq_count = 0;
 	desc->irqs_unhandled = 0;
@@ -1318,6 +1320,8 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
 	/* Found it - now remove it from the list of entries: */
 	*action_ptr = action->next;
 
+	irq_pm_remove_action(desc, action);
+
 	/* If this was the last handler, shut down the IRQ line: */
 	if (!desc->action) {
 		irq_shutdown(desc);
diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c
index b84141dcee5e..1b1b67a73218 100644
--- a/kernel/irq/pm.c
+++ b/kernel/irq/pm.c
@@ -13,6 +13,42 @@
 
 #include "internals.h"
 
+/*
+ * Called from __setup_irq() with desc->lock held after @action has
+ * been installed in the action chain.
+ */
+void irq_pm_install_action(struct irq_desc *desc, struct irqaction *action)
+{
+	desc->nr_actions++;
+
+	if (action->flags & IRQF_FORCE_RESUME)
+		desc->force_resume_depth++;
+
+	WARN_ON_ONCE(desc->force_resume_depth &&
+		     desc->force_resume_depth != desc->nr_actions);
+
+	if (action->flags & IRQF_NO_SUSPEND)
+		desc->no_suspend_depth++;
+
+	WARN_ON_ONCE(desc->no_suspend_depth &&
+		     desc->no_suspend_depth != desc->nr_actions);
+}
+
+/*
+ * Called from __free_irq() with desc->lock held after @action has
+ * been removed from the action chain.
+ */
+void irq_pm_remove_action(struct irq_desc *desc, struct irqaction *action)
+{
+	desc->nr_actions--;
+
+	if (action->flags & IRQF_FORCE_RESUME)
+		desc->force_resume_depth--;
+
+	if (action->flags & IRQF_NO_SUSPEND)
+		desc->no_suspend_depth--;
+}
+
 static void suspend_device_irq(struct irq_desc *desc, int irq)
 {
 	if (!desc->action || (desc->action->flags & IRQF_NO_SUSPEND))
-- 
cgit v1.2.3


From b76f16748fa61801b1a1fd3ffb6f25ee228a35e0 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 29 Aug 2014 13:54:09 +0200
Subject: genirq: Mark wakeup sources as armed on suspend

This allows us to utilize this information in the irq_may_run() check
without adding another conditional to the fast path.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/irq.h | 8 ++++++++
 kernel/irq/pm.c     | 5 +++++
 2 files changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 62af59242ddc..03f48d936f66 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -173,6 +173,7 @@ struct irq_data {
  * IRQD_IRQ_DISABLED		- Disabled state of the interrupt
  * IRQD_IRQ_MASKED		- Masked state of the interrupt
  * IRQD_IRQ_INPROGRESS		- In progress state of the interrupt
+ * IRQD_WAKEUP_ARMED		- Wakeup mode armed
  */
 enum {
 	IRQD_TRIGGER_MASK		= 0xf,
@@ -186,6 +187,7 @@ enum {
 	IRQD_IRQ_DISABLED		= (1 << 16),
 	IRQD_IRQ_MASKED			= (1 << 17),
 	IRQD_IRQ_INPROGRESS		= (1 << 18),
+	IRQD_WAKEUP_ARMED		= (1 << 19),
 };
 
 static inline bool irqd_is_setaffinity_pending(struct irq_data *d)
@@ -257,6 +259,12 @@ static inline bool irqd_irq_inprogress(struct irq_data *d)
 	return d->state_use_accessors & IRQD_IRQ_INPROGRESS;
 }
 
+static inline bool irqd_is_wakeup_armed(struct irq_data *d)
+{
+	return d->state_use_accessors & IRQD_WAKEUP_ARMED;
+}
+
+
 /*
  * Functions for chained handlers which can be enabled/disabled by the
  * standard disable_irq/enable_irq calls. Must be called with
diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c
index cf0ce0163db9..766930eaeed9 100644
--- a/kernel/irq/pm.c
+++ b/kernel/irq/pm.c
@@ -54,6 +54,9 @@ static bool suspend_device_irq(struct irq_desc *desc, int irq)
 	if (!desc->action || desc->no_suspend_depth)
 		return false;
 
+	if (irqd_is_wakeup_set(&desc->irq_data))
+		irqd_set(&desc->irq_data, IRQD_WAKEUP_ARMED);
+
 	desc->istate |= IRQS_SUSPENDED;
 	__disable_irq(desc, irq);
 
@@ -101,6 +104,8 @@ EXPORT_SYMBOL_GPL(suspend_device_irqs);
 
 static void resume_irq(struct irq_desc *desc, int irq)
 {
+	irqd_clear(&desc->irq_data, IRQD_WAKEUP_ARMED);
+
 	if (desc->istate & IRQS_SUSPENDED)
 		goto resume;
 
-- 
cgit v1.2.3


From 9ce7a25849e80cfb264f4995f832b932c1987e1a Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 29 Aug 2014 14:00:16 +0200
Subject: genirq: Simplify wakeup mechanism

Currently we suspend wakeup interrupts by lazy disabling them and
check later whether the interrupt has fired, but that's not sufficient
for suspend to idle as there is no way to check that once we
transitioned into the CPU idle state.

So we change the mechanism in the following way:

1) Leave the wakeup interrupts enabled across suspend

2) Add a check to irq_may_run() which is called at the beginning of
   each flow handler whether the interrupt is an armed wakeup source.

   This check is basically free as it just extends the existing check
   for IRQD_IRQ_INPROGRESS. So no new conditional in the hot path.

   If the IRQD_WAKEUP_ARMED flag is set, then the interrupt is
   disabled, marked as pending/suspended and the pm core is notified
   about the wakeup event.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
[ rjw: syscore.c and put irq_pm_check_wakeup() into pm.c ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/syscore.c    |  7 +++---
 include/linux/interrupt.h |  5 -----
 kernel/irq/chip.c         | 20 ++++++++++++++++-
 kernel/irq/internals.h    |  2 ++
 kernel/irq/pm.c           | 55 +++++++++++++++++++++++++----------------------
 5 files changed, 53 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/syscore.c b/drivers/base/syscore.c
index dbb8350ea8dc..8d98a329f6ea 100644
--- a/drivers/base/syscore.c
+++ b/drivers/base/syscore.c
@@ -9,7 +9,7 @@
 #include <linux/syscore_ops.h>
 #include <linux/mutex.h>
 #include <linux/module.h>
-#include <linux/interrupt.h>
+#include <linux/suspend.h>
 #include <trace/events/power.h>
 
 static LIST_HEAD(syscore_ops_list);
@@ -54,9 +54,8 @@ int syscore_suspend(void)
 	pr_debug("Checking wakeup interrupts\n");
 
 	/* Return error code if there are any wakeup interrupts pending. */
-	ret = check_wakeup_irqs();
-	if (ret)
-		return ret;
+	if (pm_wakeup_pending())
+		return -EBUSY;
 
 	WARN_ONCE(!irqs_disabled(),
 		"Interrupts enabled before system core suspend.\n");
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 698ad053d064..69517a24bc50 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -193,11 +193,6 @@ extern void irq_wake_thread(unsigned int irq, void *dev_id);
 /* The following three functions are for the core kernel use only. */
 extern void suspend_device_irqs(void);
 extern void resume_device_irqs(void);
-#ifdef CONFIG_PM_SLEEP
-extern int check_wakeup_irqs(void);
-#else
-static inline int check_wakeup_irqs(void) { return 0; }
-#endif
 
 /**
  * struct irq_affinity_notify - context for notification of IRQ affinity changes
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 6baf86085571..e7917ff8a486 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -344,8 +344,26 @@ static bool irq_check_poll(struct irq_desc *desc)
 
 static bool irq_may_run(struct irq_desc *desc)
 {
-	if (!irqd_irq_inprogress(&desc->irq_data))
+	unsigned int mask = IRQD_IRQ_INPROGRESS | IRQD_WAKEUP_ARMED;
+
+	/*
+	 * If the interrupt is not in progress and is not an armed
+	 * wakeup interrupt, proceed.
+	 */
+	if (!irqd_has_set(&desc->irq_data, mask))
 		return true;
+
+	/*
+	 * If the interrupt is an armed wakeup source, mark it pending
+	 * and suspended, disable it and notify the pm core about the
+	 * event.
+	 */
+	if (irq_pm_check_wakeup(desc))
+		return false;
+
+	/*
+	 * Handle a potential concurrent poll on a different core.
+	 */
 	return irq_check_poll(desc);
 }
 
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index c402502a5111..4332d766619d 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -196,9 +196,11 @@ static inline void kstat_incr_irqs_this_cpu(unsigned int irq, struct irq_desc *d
 }
 
 #ifdef CONFIG_PM_SLEEP
+bool irq_pm_check_wakeup(struct irq_desc *desc);
 void irq_pm_install_action(struct irq_desc *desc, struct irqaction *action);
 void irq_pm_remove_action(struct irq_desc *desc, struct irqaction *action);
 #else
+static inline bool irq_pm_check_wakeup(struct irq_desc *desc) { return false; }
 static inline void
 irq_pm_install_action(struct irq_desc *desc, struct irqaction *action) { }
 static inline void
diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c
index 766930eaeed9..3ca532592704 100644
--- a/kernel/irq/pm.c
+++ b/kernel/irq/pm.c
@@ -9,10 +9,24 @@
 #include <linux/irq.h>
 #include <linux/module.h>
 #include <linux/interrupt.h>
+#include <linux/suspend.h>
 #include <linux/syscore_ops.h>
 
 #include "internals.h"
 
+bool irq_pm_check_wakeup(struct irq_desc *desc)
+{
+	if (irqd_is_wakeup_armed(&desc->irq_data)) {
+		irqd_clear(&desc->irq_data, IRQD_WAKEUP_ARMED);
+		desc->istate |= IRQS_SUSPENDED | IRQS_PENDING;
+		desc->depth++;
+		irq_disable(desc);
+		pm_system_wakeup();
+		return true;
+	}
+	return false;
+}
+
 /*
  * Called from __setup_irq() with desc->lock held after @action has
  * been installed in the action chain.
@@ -54,8 +68,16 @@ static bool suspend_device_irq(struct irq_desc *desc, int irq)
 	if (!desc->action || desc->no_suspend_depth)
 		return false;
 
-	if (irqd_is_wakeup_set(&desc->irq_data))
+	if (irqd_is_wakeup_set(&desc->irq_data)) {
 		irqd_set(&desc->irq_data, IRQD_WAKEUP_ARMED);
+		/*
+		 * We return true here to force the caller to issue
+		 * synchronize_irq(). We need to make sure that the
+		 * IRQD_WAKEUP_ARMED is visible before we return from
+		 * suspend_device_irqs().
+		 */
+		return true;
+	}
 
 	desc->istate |= IRQS_SUSPENDED;
 	__disable_irq(desc, irq);
@@ -79,9 +101,13 @@ static bool suspend_device_irq(struct irq_desc *desc, int irq)
  * for this purpose.
  *
  * So we disable all interrupts and mark them IRQS_SUSPENDED except
- * for those which are unused and those which are marked as not
+ * for those which are unused, those which are marked as not
  * suspendable via an interrupt request with the flag IRQF_NO_SUSPEND
- * set.
+ * set and those which are marked as active wakeup sources.
+ *
+ * The active wakeup sources are handled by the flow handler entry
+ * code which checks for the IRQD_WAKEUP_ARMED flag, suspends the
+ * interrupt and notifies the pm core about the wakeup.
  */
 void suspend_device_irqs(void)
 {
@@ -173,26 +199,3 @@ void resume_device_irqs(void)
 	resume_irqs(false);
 }
 EXPORT_SYMBOL_GPL(resume_device_irqs);
-
-/**
- * check_wakeup_irqs - check if any wake-up interrupts are pending
- */
-int check_wakeup_irqs(void)
-{
-	struct irq_desc *desc;
-	int irq;
-
-	for_each_irq_desc(irq, desc) {
-		/*
-		 * Only interrupts which are marked as wakeup source
-		 * and have not been disabled before the suspend check
-		 * can abort suspend.
-		 */
-		if (irqd_is_wakeup_set(&desc->irq_data)) {
-			if (desc->depth == 1 && desc->istate & IRQS_PENDING)
-				return -EBUSY;
-		}
-	}
-
-	return 0;
-}
-- 
cgit v1.2.3


From 10b3ad8c21bb4b135768c30dd4c51a1c744da699 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 29 Aug 2014 21:07:24 -0700
Subject: net: Do txq_trans_update() in netdev_start_xmit()

That way we don't have to audit every call site to make sure it is
doing this properly.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/dlci.c    |  6 ++++--
 include/linux/netdevice.h | 10 ++++++++--
 net/core/dev.c            |  7 ++-----
 net/core/netpoll.c        |  4 +---
 net/core/pktgen.c         |  3 +--
 net/packet/af_packet.c    |  7 ++-----
 net/sched/sch_teql.c      |  3 +--
 7 files changed, 19 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/wan/dlci.c b/drivers/net/wan/dlci.c
index 81b22a180aad..6427e8283419 100644
--- a/drivers/net/wan/dlci.c
+++ b/drivers/net/wan/dlci.c
@@ -192,8 +192,10 @@ static netdev_tx_t dlci_transmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct dlci_local *dlp = netdev_priv(dev);
 
-	if (skb)
-		netdev_start_xmit(skb, dlp->slave);
+	if (skb) {
+		struct netdev_queue *txq = skb_get_tx_queue(dev, skb);
+		netdev_start_xmit(skb, dlp->slave, txq);
+	}
 	return NETDEV_TX_OK;
 }
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 456eb1fe51e8..16171802ea7d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3437,11 +3437,17 @@ static inline netdev_tx_t __netdev_start_xmit(const struct net_device_ops *ops,
 	return ops->ndo_start_xmit(skb, dev);
 }
 
-static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_device *dev,
+					    struct netdev_queue *txq)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
+	int rc;
 
-	return __netdev_start_xmit(ops, skb, dev);
+	rc = __netdev_start_xmit(ops, skb, dev);
+	if (rc == NETDEV_TX_OK)
+		txq_trans_update(txq);
+
+	return rc;
 }
 
 int netdev_class_create_file_ns(struct class_attribute *class_attr,
diff --git a/net/core/dev.c b/net/core/dev.c
index a6077ef56345..6392adaaa22f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2666,10 +2666,8 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 
 		skb_len = skb->len;
 		trace_net_dev_start_xmit(skb, dev);
-		rc = netdev_start_xmit(skb, dev);
+		rc = netdev_start_xmit(skb, dev, txq);
 		trace_net_dev_xmit(skb, rc, dev, skb_len);
-		if (rc == NETDEV_TX_OK)
-			txq_trans_update(txq);
 		return rc;
 	}
 
@@ -2685,7 +2683,7 @@ gso:
 
 		skb_len = nskb->len;
 		trace_net_dev_start_xmit(nskb, dev);
-		rc = netdev_start_xmit(nskb, dev);
+		rc = netdev_start_xmit(nskb, dev, txq);
 		trace_net_dev_xmit(nskb, rc, dev, skb_len);
 		if (unlikely(rc != NETDEV_TX_OK)) {
 			if (rc & ~NETDEV_TX_MASK)
@@ -2694,7 +2692,6 @@ gso:
 			skb->next = nskb;
 			return rc;
 		}
-		txq_trans_update(txq);
 		if (unlikely(netif_xmit_stopped(txq) && skb->next))
 			return NETDEV_TX_BUSY;
 	} while (skb->next);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 12b1df976562..05bc57edaa81 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -91,9 +91,7 @@ static int netpoll_start_xmit(struct sk_buff *skb, struct net_device *dev,
 		skb->vlan_tci = 0;
 	}
 
-	status = netdev_start_xmit(skb, dev);
-	if (status == NETDEV_TX_OK)
-		txq_trans_update(txq);
+	status = netdev_start_xmit(skb, dev, txq);
 
 out:
 	return status;
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index d81b540096c3..34bd2ff9f121 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3335,11 +3335,10 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
 		goto unlock;
 	}
 	atomic_inc(&(pkt_dev->skb->users));
-	ret = netdev_start_xmit(pkt_dev->skb, odev);
+	ret = netdev_start_xmit(pkt_dev->skb, odev, txq);
 
 	switch (ret) {
 	case NETDEV_TX_OK:
-		txq_trans_update(txq);
 		pkt_dev->last_ok = 1;
 		pkt_dev->sofar++;
 		pkt_dev->seq_num++;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index b7a7f5a721bd..fe305a05a8fc 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -258,11 +258,8 @@ static int packet_direct_xmit(struct sk_buff *skb)
 	local_bh_disable();
 
 	HARD_TX_LOCK(dev, txq, smp_processor_id());
-	if (!netif_xmit_frozen_or_drv_stopped(txq)) {
-		ret = netdev_start_xmit(skb, dev);
-		if (ret == NETDEV_TX_OK)
-			txq_trans_update(txq);
-	}
+	if (!netif_xmit_frozen_or_drv_stopped(txq))
+		ret = netdev_start_xmit(skb, dev, txq);
 	HARD_TX_UNLOCK(dev, txq);
 
 	local_bh_enable();
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 64cd93ca8104..193dc2cba1ec 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -316,8 +316,7 @@ restart:
 				unsigned int length = qdisc_pkt_len(skb);
 
 				if (!netif_xmit_frozen_or_stopped(slave_txq) &&
-				    netdev_start_xmit(skb, slave) == NETDEV_TX_OK) {
-					txq_trans_update(slave_txq);
+				    netdev_start_xmit(skb, slave, slave_txq) == NETDEV_TX_OK) {
 					__netif_tx_unlock(slave_txq);
 					master->slaves = NEXT_SLAVE(q);
 					netif_wake_queue(dev);
-- 
cgit v1.2.3


From fa2dbdc253c2aee2a760c64de454cb62469ec11d Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 29 Aug 2014 21:55:22 -0700
Subject: net: Pass a "more" indication down into netdev_start_xmit() code
 paths.

For now it will always be false.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/dlci.c    | 2 +-
 include/linux/netdevice.h | 9 +++++----
 net/atm/mpc.c             | 2 +-
 net/core/dev.c            | 2 +-
 net/core/netpoll.c        | 2 +-
 net/core/pktgen.c         | 2 +-
 net/packet/af_packet.c    | 2 +-
 net/sched/sch_teql.c      | 3 ++-
 8 files changed, 13 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/wan/dlci.c b/drivers/net/wan/dlci.c
index 6427e8283419..ae6ecf401189 100644
--- a/drivers/net/wan/dlci.c
+++ b/drivers/net/wan/dlci.c
@@ -194,7 +194,7 @@ static netdev_tx_t dlci_transmit(struct sk_buff *skb, struct net_device *dev)
 
 	if (skb) {
 		struct netdev_queue *txq = skb_get_tx_queue(dev, skb);
-		netdev_start_xmit(skb, dlp->slave, txq);
+		netdev_start_xmit(skb, dlp->slave, txq, false);
 	}
 	return NETDEV_TX_OK;
 }
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 16171802ea7d..5050218c5b7f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3431,19 +3431,20 @@ int __init dev_proc_init(void);
 #endif
 
 static inline netdev_tx_t __netdev_start_xmit(const struct net_device_ops *ops,
-					      struct sk_buff *skb, struct net_device *dev)
+					      struct sk_buff *skb, struct net_device *dev,
+					      bool more)
 {
-	skb->xmit_more = 0;
+	skb->xmit_more = more ? 1 : 0;
 	return ops->ndo_start_xmit(skb, dev);
 }
 
 static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_device *dev,
-					    struct netdev_queue *txq)
+					    struct netdev_queue *txq, bool more)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
 	int rc;
 
-	rc = __netdev_start_xmit(ops, skb, dev);
+	rc = __netdev_start_xmit(ops, skb, dev, more);
 	if (rc == NETDEV_TX_OK)
 		txq_trans_update(txq);
 
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index d662da161e5a..0e982222d425 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -599,7 +599,7 @@ static netdev_tx_t mpc_send_packet(struct sk_buff *skb,
 	}
 
 non_ip:
-	return __netdev_start_xmit(mpc->old_ops, skb, dev);
+	return __netdev_start_xmit(mpc->old_ops, skb, dev, false);
 }
 
 static int atm_mpoa_vcc_attach(struct atm_vcc *vcc, void __user *arg)
diff --git a/net/core/dev.c b/net/core/dev.c
index ab7bb809711e..f0ed5a611a97 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2610,7 +2610,7 @@ static int xmit_one(struct sk_buff *skb, struct net_device *dev,
 
 	len = skb->len;
 	trace_net_dev_start_xmit(skb, dev);
-	rc = netdev_start_xmit(skb, dev, txq);
+	rc = netdev_start_xmit(skb, dev, txq, false);
 	trace_net_dev_xmit(skb, rc, dev, len);
 
 	return rc;
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 05bc57edaa81..e6645b4f330a 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -91,7 +91,7 @@ static int netpoll_start_xmit(struct sk_buff *skb, struct net_device *dev,
 		skb->vlan_tci = 0;
 	}
 
-	status = netdev_start_xmit(skb, dev, txq);
+	status = netdev_start_xmit(skb, dev, txq, false);
 
 out:
 	return status;
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 34bd2ff9f121..5b36a9428c59 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3335,7 +3335,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
 		goto unlock;
 	}
 	atomic_inc(&(pkt_dev->skb->users));
-	ret = netdev_start_xmit(pkt_dev->skb, odev, txq);
+	ret = netdev_start_xmit(pkt_dev->skb, odev, txq, false);
 
 	switch (ret) {
 	case NETDEV_TX_OK:
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index fe305a05a8fc..87d20f48ff06 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -259,7 +259,7 @@ static int packet_direct_xmit(struct sk_buff *skb)
 
 	HARD_TX_LOCK(dev, txq, smp_processor_id());
 	if (!netif_xmit_frozen_or_drv_stopped(txq))
-		ret = netdev_start_xmit(skb, dev, txq);
+		ret = netdev_start_xmit(skb, dev, txq, false);
 	HARD_TX_UNLOCK(dev, txq);
 
 	local_bh_enable();
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 193dc2cba1ec..aaa8d03ed054 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -316,7 +316,8 @@ restart:
 				unsigned int length = qdisc_pkt_len(skb);
 
 				if (!netif_xmit_frozen_or_stopped(slave_txq) &&
-				    netdev_start_xmit(skb, slave, slave_txq) == NETDEV_TX_OK) {
+				    netdev_start_xmit(skb, slave, slave_txq, false) ==
+				    NETDEV_TX_OK) {
 					__netif_tx_unlock(slave_txq);
 					master->slaves = NEXT_SLAVE(q);
 					netif_wake_queue(dev);
-- 
cgit v1.2.3


From 50cbe9ab5f8d92d2d4a327b56e96559d8f63a1fa Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sat, 30 Aug 2014 19:13:51 -0700
Subject: net: Validate xmit SKBs right when we pull them out of the qdisc.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 1 +
 net/core/dev.c            | 6 +-----
 net/sched/sch_generic.c   | 5 ++++-
 3 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 5050218c5b7f..47c49ba2dcf4 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2827,6 +2827,7 @@ int dev_set_mac_address(struct net_device *, struct sockaddr *);
 int dev_change_carrier(struct net_device *, bool new_carrier);
 int dev_get_phys_port_id(struct net_device *dev,
 			 struct netdev_phys_port_id *ppid);
+struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev);
 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 			struct netdev_queue *txq);
 int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
diff --git a/net/core/dev.c b/net/core/dev.c
index 704a5434f77d..75bc5b068a13 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2656,7 +2656,7 @@ struct sk_buff *validate_xmit_vlan(struct sk_buff *skb, netdev_features_t featur
 	return skb;
 }
 
-static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev)
+struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev)
 {
 	netdev_features_t features;
 
@@ -2719,10 +2719,6 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 {
 	int rc = NETDEV_TX_OK;
 
-	skb = validate_xmit_skb(skb, dev);
-	if (!skb)
-		return rc;
-
 	if (likely(!skb->next))
 		return xmit_one(skb, dev, txq, false);
 
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 05b3f5d104af..f178798a5836 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -70,8 +70,11 @@ static inline struct sk_buff *dequeue_skb(struct Qdisc *q)
 		} else
 			skb = NULL;
 	} else {
-		if (!(q->flags & TCQ_F_ONETXQUEUE) || !netif_xmit_frozen_or_stopped(txq))
+		if (!(q->flags & TCQ_F_ONETXQUEUE) || !netif_xmit_frozen_or_stopped(txq)) {
 			skb = q->dequeue(q);
+			if (skb)
+				skb = validate_xmit_skb(skb, qdisc_dev(q));
+		}
 	}
 
 	return skb;
-- 
cgit v1.2.3


From ce93718fb7cdbc064c3000ff59e4d3200bdfa744 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sat, 30 Aug 2014 19:22:20 -0700
Subject: net: Don't keep around original SKB when we software segment GSO
 frames.

Just maintain the list properly by returning the head of the remaining
SKB list from dev_hard_start_xmit().

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  4 +--
 net/core/dev.c            | 79 +++++++++--------------------------------------
 net/sched/sch_generic.c   |  2 +-
 3 files changed, 17 insertions(+), 68 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 47c49ba2dcf4..202c25a9aadf 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2828,8 +2828,8 @@ int dev_change_carrier(struct net_device *, bool new_carrier);
 int dev_get_phys_port_id(struct net_device *dev,
 			 struct netdev_phys_port_id *ppid);
 struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev);
-int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
-			struct netdev_queue *txq);
+struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
+				    struct netdev_queue *txq, int *ret);
 int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
 int dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
 bool is_skb_forwardable(struct net_device *dev, struct sk_buff *skb);
diff --git a/net/core/dev.c b/net/core/dev.c
index 75bc5b068a13..c89da4f306b1 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2485,52 +2485,6 @@ static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
 	return 0;
 }
 
-struct dev_gso_cb {
-	void (*destructor)(struct sk_buff *skb);
-};
-
-#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
-
-static void dev_gso_skb_destructor(struct sk_buff *skb)
-{
-	struct dev_gso_cb *cb;
-
-	kfree_skb_list(skb->next);
-	skb->next = NULL;
-
-	cb = DEV_GSO_CB(skb);
-	if (cb->destructor)
-		cb->destructor(skb);
-}
-
-/**
- *	dev_gso_segment - Perform emulated hardware segmentation on skb.
- *	@skb: buffer to segment
- *	@features: device features as applicable to this skb
- *
- *	This function segments the given skb and stores the list of segments
- *	in skb->next.
- */
-static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features)
-{
-	struct sk_buff *segs;
-
-	segs = skb_gso_segment(skb, features);
-
-	/* Verifying header integrity only. */
-	if (!segs)
-		return 0;
-
-	if (IS_ERR(segs))
-		return PTR_ERR(segs);
-
-	skb->next = segs;
-	DEV_GSO_CB(skb)->destructor = skb->destructor;
-	skb->destructor = dev_gso_skb_destructor;
-
-	return 0;
-}
-
 /* If MPLS offload request, verify we are testing hardware MPLS features
  * instead of standard features for the netdev.
  */
@@ -2682,8 +2636,13 @@ struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev)
 		features &= dev->hw_enc_features;
 
 	if (netif_needs_gso(skb, features)) {
-		if (unlikely(dev_gso_segment(skb, features)))
-			goto out_kfree_skb;
+		struct sk_buff *segs;
+
+		segs = skb_gso_segment(skb, features);
+		kfree_skb(skb);
+		if (IS_ERR(segs))
+			segs = NULL;
+		skb = segs;
 	} else {
 		if (skb_needs_linearize(skb, features) &&
 		    __skb_linearize(skb))
@@ -2714,26 +2673,16 @@ out_null:
 	return NULL;
 }
 
-int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
-			struct netdev_queue *txq)
+struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
+				    struct netdev_queue *txq, int *ret)
 {
-	int rc = NETDEV_TX_OK;
-
-	if (likely(!skb->next))
-		return xmit_one(skb, dev, txq, false);
-
-	skb->next = xmit_list(skb->next, dev, txq, &rc);
-	if (likely(skb->next == NULL)) {
-		skb->destructor = DEV_GSO_CB(skb)->destructor;
-		consume_skb(skb);
-		return rc;
+	if (likely(!skb->next)) {
+		*ret = xmit_one(skb, dev, txq, false);
+		return skb;
 	}
 
-	kfree_skb(skb);
-
-	return rc;
+	return xmit_list(skb, dev, txq, ret);
 }
-EXPORT_SYMBOL_GPL(dev_hard_start_xmit);
 
 static void qdisc_pkt_len_init(struct sk_buff *skb)
 {
@@ -2945,7 +2894,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
 
 			if (!netif_xmit_stopped(txq)) {
 				__this_cpu_inc(xmit_recursion);
-				rc = dev_hard_start_xmit(skb, dev, txq);
+				skb = dev_hard_start_xmit(skb, dev, txq, &rc);
 				__this_cpu_dec(xmit_recursion);
 				if (dev_xmit_complete(rc)) {
 					HARD_TX_UNLOCK(dev, txq);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index f178798a5836..a8bf9f9928bd 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -129,7 +129,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
 
 	HARD_TX_LOCK(dev, txq, smp_processor_id());
 	if (!netif_xmit_frozen_or_stopped(txq))
-		ret = dev_hard_start_xmit(skb, dev, txq);
+		skb = dev_hard_start_xmit(skb, dev, txq, &ret);
 
 	HARD_TX_UNLOCK(dev, txq);
 
-- 
cgit v1.2.3


From 5a21232983aa7acfe7fd26170832a9e0a4a7b4ae Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Sun, 31 Aug 2014 15:12:41 -0700
Subject: net: Support for csum_bad in skbuff

This flag indicates that an invalid checksum was detected in the
packet. __skb_mark_checksum_bad helper function was added to set this.

Checksums can be marked bad from a driver or the GRO path (the latter
is implemented in this patch). csum_bad is checked in
__skb_checksum_validate_complete (i.e. calling that when ip_summed ==
CHECKSUM_NONE).

csum_bad works in conjunction with ip_summed value. In the case that
ip_summed is CHECKSUM_NONE and csum_bad is set, this implies that the
first (or next) checksum encountered in the packet is bad. When
ip_summed is CHECKSUM_UNNECESSARY, the first checksum after the last
one validated is bad. For example, if ip_summed == CHECKSUM_UNNECESSARY,
csum_level == 1, and csum_bad is set-- then the third checksum in the
packet is bad. In the normal path, the packet will be dropped when
processing the protocol layer of the bad checksum:
__skb_decr_checksum_unnecessary called twice for the good checksums
changing ip_summed to CHECKSUM_NONE so that
__skb_checksum_validate_complete is called to validate the third
checksum and that will fail since csum_bad is set.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  4 +++-
 include/linux/skbuff.h    | 21 ++++++++++++++++++++-
 net/core/dev.c            |  2 +-
 3 files changed, 24 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 202c25a9aadf..a0ab6d9d400a 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2216,7 +2216,9 @@ static inline void skb_gro_incr_csum_unnecessary(struct sk_buff *skb)
 	if (__skb_gro_checksum_validate_needed(skb, zero_okay, check))	\
 		__ret = __skb_gro_checksum_validate_complete(skb,	\
 				compute_pseudo(skb, proto));		\
-	if (!__ret)							\
+	if (__ret)							\
+		__skb_mark_checksum_bad(skb);				\
+	else								\
 		skb_gro_incr_csum_unnecessary(skb);			\
 	__ret;								\
 })
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index c93b5859a772..23710a243439 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -617,7 +617,8 @@ struct sk_buff {
 
 	kmemcheck_bitfield_begin(flags3);
 	__u8			csum_level:2;
-	/* 14 bit hole */
+	__u8			csum_bad:1;
+	/* 13 bit hole */
 	kmemcheck_bitfield_end(flags3);
 
 	__be16			inner_protocol;
@@ -2825,6 +2826,21 @@ static inline void __skb_incr_checksum_unnecessary(struct sk_buff *skb)
 	}
 }
 
+static inline void __skb_mark_checksum_bad(struct sk_buff *skb)
+{
+	/* Mark current checksum as bad (typically called from GRO
+	 * path). In the case that ip_summed is CHECKSUM_NONE
+	 * this must be the first checksum encountered in the packet.
+	 * When ip_summed is CHECKSUM_UNNECESSARY, this is the first
+	 * checksum after the last one validated. For UDP, a zero
+	 * checksum can not be marked as bad.
+	 */
+
+	if (skb->ip_summed == CHECKSUM_NONE ||
+	    skb->ip_summed == CHECKSUM_UNNECESSARY)
+		skb->csum_bad = 1;
+}
+
 /* Check if we need to perform checksum complete validation.
  *
  * Returns true if checksum complete is needed, false otherwise
@@ -2866,6 +2882,9 @@ static inline __sum16 __skb_checksum_validate_complete(struct sk_buff *skb,
 			skb->csum_valid = 1;
 			return 0;
 		}
+	} else if (skb->csum_bad) {
+		/* ip_summed == CHECKSUM_NONE in this case */
+		return 1;
 	}
 
 	skb->csum = psum;
diff --git a/net/core/dev.c b/net/core/dev.c
index 6857d57aa294..3774afc3bebf 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3918,7 +3918,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 	if (!(skb->dev->features & NETIF_F_GRO))
 		goto normal;
 
-	if (skb_is_gso(skb) || skb_has_frag_list(skb))
+	if (skb_is_gso(skb) || skb_has_frag_list(skb) || skb->csum_bad)
 		goto normal;
 
 	gro_list_prepare(napi, skb);
-- 
cgit v1.2.3


From d96535a17dbbafd567961d14c08c0984ddda9c3c Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Sun, 31 Aug 2014 15:12:42 -0700
Subject: net: Infrastructure for checksum unnecessary conversions

For normal path, added skb_checksum_try_convert which is called
to attempt to convert CHECKSUM_UNNECESSARY to CHECKSUM_COMPLETE. The
primary condition to allow this is that ip_summed is CHECKSUM_NONE
and csum_valid is true, which will be the state after consuming
a CHECKSUM_UNNECESSARY.

For GRO path, added skb_gro_checksum_try_convert which is the GRO
analogue of skb_checksum_try_convert. The primary condition to allow
this is that NAPI_GRO_CB(skb)->csum_cnt == 0 and
NAPI_GRO_CB(skb)->csum_valid is set. This implies that we have consumed
all available CHECKSUM_UNNECESSARY checksums in the GRO path.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 20 ++++++++++++++++++++
 include/linux/skbuff.h    | 20 ++++++++++++++++++++
 2 files changed, 40 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a0ab6d9d400a..5be20a7bbb0d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2233,6 +2233,26 @@ static inline void skb_gro_incr_csum_unnecessary(struct sk_buff *skb)
 #define skb_gro_checksum_simple_validate(skb)				\
 	__skb_gro_checksum_validate(skb, 0, false, 0, null_compute_pseudo)
 
+static inline bool __skb_gro_checksum_convert_check(struct sk_buff *skb)
+{
+	return (NAPI_GRO_CB(skb)->csum_cnt == 0 &&
+		!NAPI_GRO_CB(skb)->csum_valid);
+}
+
+static inline void __skb_gro_checksum_convert(struct sk_buff *skb,
+					      __sum16 check, __wsum pseudo)
+{
+	NAPI_GRO_CB(skb)->csum = ~pseudo;
+	NAPI_GRO_CB(skb)->csum_valid = 1;
+}
+
+#define skb_gro_checksum_try_convert(skb, proto, check, compute_pseudo)	\
+do {									\
+	if (__skb_gro_checksum_convert_check(skb))			\
+		__skb_gro_checksum_convert(skb, check,			\
+					   compute_pseudo(skb, proto));	\
+} while (0)
+
 static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 				  unsigned short type,
 				  const void *daddr, const void *saddr,
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 23710a243439..02529fcad1ac 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2942,6 +2942,26 @@ static inline __wsum null_compute_pseudo(struct sk_buff *skb, int proto)
 #define skb_checksum_simple_validate(skb)				\
 	__skb_checksum_validate(skb, 0, true, false, 0, null_compute_pseudo)
 
+static inline bool __skb_checksum_convert_check(struct sk_buff *skb)
+{
+	return (skb->ip_summed == CHECKSUM_NONE &&
+		skb->csum_valid && !skb->csum_bad);
+}
+
+static inline void __skb_checksum_convert(struct sk_buff *skb,
+					  __sum16 check, __wsum pseudo)
+{
+	skb->csum = ~pseudo;
+	skb->ip_summed = CHECKSUM_COMPLETE;
+}
+
+#define skb_checksum_try_convert(skb, proto, check, compute_pseudo)	\
+do {									\
+	if (__skb_checksum_convert_check(skb))				\
+		__skb_checksum_convert(skb, check,			\
+				       compute_pseudo(skb, proto));	\
+} while (0)
+
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 void nf_conntrack_destroy(struct nf_conntrack *nfct);
 static inline void nf_conntrack_put(struct nf_conntrack *nfct)
-- 
cgit v1.2.3


From 2abb7cdc0dc84e99b76ef983a1ae1978922aa9b3 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Sun, 31 Aug 2014 15:12:43 -0700
Subject: udp: Add support for doing checksum unnecessary conversion

Add support for doing CHECKSUM_UNNECESSARY to CHECKSUM_COMPLETE
conversion in UDP tunneling path.

In the normal UDP path, we call skb_checksum_try_convert after locating
the UDP socket. The check is that checksum conversion is enabled for
the socket (new flag in UDP socket) and that checksum field is
non-zero.

In the UDP GRO path, we call skb_gro_checksum_try_convert after
checksum is validated and checksum field is non-zero. Since this is
already in GRO we assume that checksum conversion is always wanted.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/udp.h    | 16 +++++++++++++++-
 net/ipv4/udp.c         |  4 ++++
 net/ipv4/udp_offload.c | 25 +++++++++++++++++--------
 net/ipv6/udp.c         |  4 ++++
 net/ipv6/udp_offload.c | 24 +++++++++++++++++-------
 5 files changed, 57 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/udp.h b/include/linux/udp.h
index 247cfdcc4b08..ee3277593222 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -49,7 +49,11 @@ struct udp_sock {
 	unsigned int	 corkflag;	/* Cork is required */
 	__u8		 encap_type;	/* Is this an Encapsulation socket? */
 	unsigned char	 no_check6_tx:1,/* Send zero UDP6 checksums on TX? */
-			 no_check6_rx:1;/* Allow zero UDP6 checksums on RX? */
+			 no_check6_rx:1,/* Allow zero UDP6 checksums on RX? */
+			 convert_csum:1;/* On receive, convert checksum
+					 * unnecessary to checksum complete
+					 * if possible.
+					 */
 	/*
 	 * Following member retains the information to create a UDP header
 	 * when the socket is uncorked.
@@ -98,6 +102,16 @@ static inline bool udp_get_no_check6_rx(struct sock *sk)
 	return udp_sk(sk)->no_check6_rx;
 }
 
+static inline void udp_set_convert_csum(struct sock *sk, bool val)
+{
+	udp_sk(sk)->convert_csum = val;
+}
+
+static inline bool udp_get_convert_csum(struct sock *sk)
+{
+	return udp_sk(sk)->convert_csum;
+}
+
 #define udp_portaddr_for_each_entry(__sk, node, list) \
 	hlist_nulls_for_each_entry(__sk, node, list, __sk_common.skc_portaddr_node)
 
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 3549c21fe5f7..0da3849fd35b 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1788,6 +1788,10 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 	if (sk != NULL) {
 		int ret;
 
+		if (udp_sk(sk)->convert_csum && uh->check && !IS_UDPLITE(sk))
+			skb_checksum_try_convert(skb, IPPROTO_UDP, uh->check,
+						 inet_compute_pseudo);
+
 		ret = udp_queue_rcv_skb(sk, skb);
 		sock_put(sk);
 
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index a6adff98382a..84e0e05c9c0e 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -290,16 +290,25 @@ static struct sk_buff **udp4_gro_receive(struct sk_buff **head,
 {
 	struct udphdr *uh = udp_gro_udphdr(skb);
 
-	/* Don't bother verifying checksum if we're going to flush anyway. */
-	if (unlikely(!uh) ||
-	    (!NAPI_GRO_CB(skb)->flush &&
-	     skb_gro_checksum_validate_zero_check(skb, IPPROTO_UDP, uh->check,
-						  inet_gro_compute_pseudo))) {
-		NAPI_GRO_CB(skb)->flush = 1;
-		return NULL;
-	}
+	if (unlikely(!uh))
+		goto flush;
 
+	/* Don't bother verifying checksum if we're going to flush anyway. */
+	if (!NAPI_GRO_CB(skb)->flush)
+		goto skip;
+
+	if (skb_gro_checksum_validate_zero_check(skb, IPPROTO_UDP, uh->check,
+						 inet_gro_compute_pseudo))
+		goto flush;
+	else if (uh->check)
+		skb_gro_checksum_try_convert(skb, IPPROTO_UDP, uh->check,
+					     inet_gro_compute_pseudo);
+skip:
 	return udp_gro_receive(head, skb, uh);
+
+flush:
+	NAPI_GRO_CB(skb)->flush = 1;
+	return NULL;
 }
 
 int udp_gro_complete(struct sk_buff *skb, int nhoff)
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 12fcce8fba46..f6ba535b6feb 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -891,6 +891,10 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 			goto csum_error;
 		}
 
+		if (udp_sk(sk)->convert_csum && uh->check && !IS_UDPLITE(sk))
+			skb_checksum_try_convert(skb, IPPROTO_UDP, uh->check,
+						 ip6_compute_pseudo);
+
 		ret = udpv6_queue_rcv_skb(sk, skb);
 		sock_put(sk);
 
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index b13e377e9c53..89cb9a9b8537 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -134,16 +134,26 @@ static struct sk_buff **udp6_gro_receive(struct sk_buff **head,
 {
 	struct udphdr *uh = udp_gro_udphdr(skb);
 
+	if (unlikely(!uh))
+		goto flush;
+
 	/* Don't bother verifying checksum if we're going to flush anyway. */
-	if (unlikely(!uh) ||
-	    (!NAPI_GRO_CB(skb)->flush &&
-	     skb_gro_checksum_validate_zero_check(skb, IPPROTO_UDP, uh->check,
-						  ip6_gro_compute_pseudo))) {
-		NAPI_GRO_CB(skb)->flush = 1;
-		return NULL;
-	}
+	if (!NAPI_GRO_CB(skb)->flush)
+		goto skip;
 
+	if (skb_gro_checksum_validate_zero_check(skb, IPPROTO_UDP, uh->check,
+						 ip6_gro_compute_pseudo))
+		goto flush;
+	else if (uh->check)
+		skb_gro_checksum_try_convert(skb, IPPROTO_UDP, uh->check,
+					     ip6_gro_compute_pseudo);
+
+skip:
 	return udp_gro_receive(head, skb, uh);
+
+flush:
+	NAPI_GRO_CB(skb)->flush = 1;
+	return NULL;
 }
 
 int udp6_gro_complete(struct sk_buff *skb, int nhoff)
-- 
cgit v1.2.3


From 0dbc8b7afef6e4fddcfebcbacbeb269a0a3b06d5 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Mon, 1 Sep 2014 15:15:40 +0200
Subject: gpio: move varargs hack outside #ifdef GPIOLIB

commit 39b2bbe3d715cf5013b5c48695ccdd25bd3bf120
"gpio: add flags argument to gpiod_get*() functions"
added a dynamic flags argument to all the GPIOD getter
functions, however this did not cover the stubs so
when people used gpiod stubs to compile out descriptor
code, compilation failed.

Solve this by:
- Also rename all the stub functions __gpiod_*
- Moving the vararg hack outside of #ifdef CONFIG_GPIOLIB
  so these will always be available.

Reviewed-by: Alexandre Courbot <acourbot@nvidia.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/gpio/consumer.h | 105 ++++++++++++++++++++++++------------------
 1 file changed, 60 insertions(+), 45 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h
index c7e17de732f3..12f146fa6604 100644
--- a/include/linux/gpio/consumer.h
+++ b/include/linux/gpio/consumer.h
@@ -38,60 +38,32 @@ enum gpiod_flags {
 struct gpio_desc *__must_check __gpiod_get(struct device *dev,
 					 const char *con_id,
 					 enum gpiod_flags flags);
-#define __gpiod_get(dev, con_id, flags, ...) __gpiod_get(dev, con_id, flags)
-#define gpiod_get(varargs...) __gpiod_get(varargs, 0)
 struct gpio_desc *__must_check __gpiod_get_index(struct device *dev,
 					       const char *con_id,
 					       unsigned int idx,
 					       enum gpiod_flags flags);
-#define __gpiod_get_index(dev, con_id, index, flags, ...)		\
-	__gpiod_get_index(dev, con_id, index, flags)
-#define gpiod_get_index(varargs...) __gpiod_get_index(varargs, 0)
 struct gpio_desc *__must_check __gpiod_get_optional(struct device *dev,
 						  const char *con_id,
 						  enum gpiod_flags flags);
-#define __gpiod_get_optional(dev, con_id, flags, ...)			\
-	__gpiod_get_optional(dev, con_id, flags)
-#define gpiod_get_optional(varargs...) __gpiod_get_optional(varargs, 0)
 struct gpio_desc *__must_check __gpiod_get_index_optional(struct device *dev,
 							const char *con_id,
 							unsigned int index,
 							enum gpiod_flags flags);
-#define __gpiod_get_index_optional(dev, con_id, index, flags, ...)	\
-	__gpiod_get_index_optional(dev, con_id, index, flags)
-#define gpiod_get_index_optional(varargs...)				\
-	__gpiod_get_index_optional(varargs, 0)
-
 void gpiod_put(struct gpio_desc *desc);
 
 struct gpio_desc *__must_check __devm_gpiod_get(struct device *dev,
 					      const char *con_id,
 					      enum gpiod_flags flags);
-#define __devm_gpiod_get(dev, con_id, flags, ...)			\
-	__devm_gpiod_get(dev, con_id, flags)
-#define devm_gpiod_get(varargs...) __devm_gpiod_get(varargs, 0)
 struct gpio_desc *__must_check __devm_gpiod_get_index(struct device *dev,
 						    const char *con_id,
 						    unsigned int idx,
 						    enum gpiod_flags flags);
-#define __devm_gpiod_get_index(dev, con_id, index, flags, ...)		\
-	__devm_gpiod_get_index(dev, con_id, index, flags)
-#define devm_gpiod_get_index(varargs...) __devm_gpiod_get_index(varargs, 0)
 struct gpio_desc *__must_check __devm_gpiod_get_optional(struct device *dev,
 						       const char *con_id,
 						       enum gpiod_flags flags);
-#define __devm_gpiod_get_optional(dev, con_id, flags, ...)		\
-	__devm_gpiod_get_optional(dev, con_id, flags)
-#define devm_gpiod_get_optional(varargs...)				\
-	__devm_gpiod_get_optional(varargs, 0)
 struct gpio_desc *__must_check
 __devm_gpiod_get_index_optional(struct device *dev, const char *con_id,
 			      unsigned int index, enum gpiod_flags flags);
-#define __devm_gpiod_get_index_optional(dev, con_id, index, flags, ...)	\
-	__devm_gpiod_get_index_optional(dev, con_id, index, flags)
-#define devm_gpiod_get_index_optional(varargs...)			\
-	__devm_gpiod_get_index_optional(varargs, 0)
-
 void devm_gpiod_put(struct device *dev, struct gpio_desc *desc);
 
 int gpiod_get_direction(const struct gpio_desc *desc);
@@ -124,27 +96,31 @@ int desc_to_gpio(const struct gpio_desc *desc);
 
 #else /* CONFIG_GPIOLIB */
 
-static inline struct gpio_desc *__must_check gpiod_get(struct device *dev,
-						       const char *con_id)
+static inline struct gpio_desc *__must_check __gpiod_get(struct device *dev,
+						const char *con_id,
+						enum gpiod_flags flags)
 {
 	return ERR_PTR(-ENOSYS);
 }
-static inline struct gpio_desc *__must_check gpiod_get_index(struct device *dev,
-							     const char *con_id,
-							     unsigned int idx)
+static inline struct gpio_desc *__must_check
+__gpiod_get_index(struct device *dev,
+		  const char *con_id,
+		  unsigned int idx,
+		  enum gpiod_flags flags)
 {
 	return ERR_PTR(-ENOSYS);
 }
 
 static inline struct gpio_desc *__must_check
-gpiod_get_optional(struct device *dev, const char *con_id)
+__gpiod_get_optional(struct device *dev, const char *con_id,
+		     enum gpiod_flags flags)
 {
 	return ERR_PTR(-ENOSYS);
 }
 
 static inline struct gpio_desc *__must_check
-gpiod_get_index_optional(struct device *dev, const char *con_id,
-			 unsigned int index)
+__gpiod_get_index_optional(struct device *dev, const char *con_id,
+			   unsigned int index, enum gpiod_flags flags)
 {
 	return ERR_PTR(-ENOSYS);
 }
@@ -157,28 +133,33 @@ static inline void gpiod_put(struct gpio_desc *desc)
 	WARN_ON(1);
 }
 
-static inline struct gpio_desc *__must_check devm_gpiod_get(struct device *dev,
-							    const char *con_id)
+static inline struct gpio_desc *__must_check
+__devm_gpiod_get(struct device *dev,
+		 const char *con_id,
+		 enum gpiod_flags flags)
 {
 	return ERR_PTR(-ENOSYS);
 }
 static inline
-struct gpio_desc *__must_check devm_gpiod_get_index(struct device *dev,
-						    const char *con_id,
-						    unsigned int idx)
+struct gpio_desc *__must_check
+__devm_gpiod_get_index(struct device *dev,
+		       const char *con_id,
+		       unsigned int idx,
+		       enum gpiod_flags flags)
 {
 	return ERR_PTR(-ENOSYS);
 }
 
 static inline struct gpio_desc *__must_check
-devm_gpiod_get_optional(struct device *dev, const char *con_id)
+__devm_gpiod_get_optional(struct device *dev, const char *con_id,
+			  enum gpiod_flags flags)
 {
 	return ERR_PTR(-ENOSYS);
 }
 
 static inline struct gpio_desc *__must_check
-devm_gpiod_get_index_optional(struct device *dev, const char *con_id,
-			      unsigned int index)
+__devm_gpiod_get_index_optional(struct device *dev, const char *con_id,
+				unsigned int index, enum gpiod_flags flags)
 {
 	return ERR_PTR(-ENOSYS);
 }
@@ -303,9 +284,43 @@ static inline int desc_to_gpio(const struct gpio_desc *desc)
 	return -EINVAL;
 }
 
-
 #endif /* CONFIG_GPIOLIB */
 
+/*
+ * Vararg-hacks! This is done to transition the kernel to always pass
+ * the options flags argument to the below functions. During a transition
+ * phase these vararg macros make both old-and-newstyle code compile,
+ * but when all calls to the elder API are removed, these should go away
+ * and the __gpiod_get() etc functions above be renamed just gpiod_get()
+ * etc.
+ */
+#define __gpiod_get(dev, con_id, flags, ...) __gpiod_get(dev, con_id, flags)
+#define gpiod_get(varargs...) __gpiod_get(varargs, 0)
+#define __gpiod_get_index(dev, con_id, index, flags, ...)		\
+	__gpiod_get_index(dev, con_id, index, flags)
+#define gpiod_get_index(varargs...) __gpiod_get_index(varargs, 0)
+#define __gpiod_get_optional(dev, con_id, flags, ...)			\
+	__gpiod_get_optional(dev, con_id, flags)
+#define gpiod_get_optional(varargs...) __gpiod_get_optional(varargs, 0)
+#define __gpiod_get_index_optional(dev, con_id, index, flags, ...)	\
+	__gpiod_get_index_optional(dev, con_id, index, flags)
+#define gpiod_get_index_optional(varargs...)				\
+	__gpiod_get_index_optional(varargs, 0)
+#define __devm_gpiod_get(dev, con_id, flags, ...)			\
+	__devm_gpiod_get(dev, con_id, flags)
+#define devm_gpiod_get(varargs...) __devm_gpiod_get(varargs, 0)
+#define __devm_gpiod_get_index(dev, con_id, index, flags, ...)		\
+	__devm_gpiod_get_index(dev, con_id, index, flags)
+#define devm_gpiod_get_index(varargs...) __devm_gpiod_get_index(varargs, 0)
+#define __devm_gpiod_get_optional(dev, con_id, flags, ...)		\
+	__devm_gpiod_get_optional(dev, con_id, flags)
+#define devm_gpiod_get_optional(varargs...)				\
+	__devm_gpiod_get_optional(varargs, 0)
+#define __devm_gpiod_get_index_optional(dev, con_id, index, flags, ...)	\
+	__devm_gpiod_get_index_optional(dev, con_id, index, flags)
+#define devm_gpiod_get_index_optional(varargs...)			\
+	__devm_gpiod_get_index_optional(varargs, 0)
+
 #if IS_ENABLED(CONFIG_GPIOLIB) && IS_ENABLED(CONFIG_GPIO_SYSFS)
 
 int gpiod_export(struct gpio_desc *desc, bool direction_may_change);
-- 
cgit v1.2.3


From fbff66108352d19b5cffa7dce26d7638c9dd4d70 Mon Sep 17 00:00:00 2001
From: Mark Rustad <mark.d.rustad@intel.com>
Date: Thu, 28 Aug 2014 04:43:09 -0700
Subject: security: Silence shadow warning

Renaming an unused formal parameter in the static inline function
security_inode_init_security eliminates many W=2 warnings.

Signed-off-by: Mark Rustad <mark.d.rustad@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: James Morris <james.l.morris@oracle.com>
---
 include/linux/security.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/security.h b/include/linux/security.h
index 623f90e5f38d..3b3aeb1b74cb 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -2108,7 +2108,7 @@ static inline int security_dentry_init_security(struct dentry *dentry,
 static inline int security_inode_init_security(struct inode *inode,
 						struct inode *dir,
 						const struct qstr *qstr,
-						const initxattrs initxattrs,
+						const initxattrs xattrs,
 						void *fs_data)
 {
 	return 0;
-- 
cgit v1.2.3


From ea2fdf842365066c82ab941086c6a1741ced4f2a Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Date: Thu, 28 Aug 2014 13:58:53 +0200
Subject: usb: phy: samsung: remove old common USB PHY code

drivers/usb/phy/phy-samsung-usb[2,3] drivers got replaced by
drivers/phy/phy-samsung-usb[2,3] ones and the old common Samsung
USB PHY code is no longer used.

Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Acked-by: Kyungmin Park <kyungmin.park@samsung.com>
Reviewed-by: Vivek Gautam <gautam.vivek@samsung.com>
Reviewed-by: Jingoo Han <jg1.han@samsung.com>
Acked-by: Kishon Vijay Abraham I <kishon@ti.com>
Cc: Kamil Debski <k.debski@samsung.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/phy/phy-samsung-usb.c            | 241 --------------------
 drivers/usb/phy/phy-samsung-usb.h            | 317 ---------------------------
 include/linux/platform_data/samsung-usbphy.h |  27 ---
 3 files changed, 585 deletions(-)
 delete mode 100644 drivers/usb/phy/phy-samsung-usb.c
 delete mode 100644 drivers/usb/phy/phy-samsung-usb.h
 delete mode 100644 include/linux/platform_data/samsung-usbphy.h

(limited to 'include/linux')

diff --git a/drivers/usb/phy/phy-samsung-usb.c b/drivers/usb/phy/phy-samsung-usb.c
deleted file mode 100644
index ac025ca08425..000000000000
--- a/drivers/usb/phy/phy-samsung-usb.c
+++ /dev/null
@@ -1,241 +0,0 @@
-/* linux/drivers/usb/phy/phy-samsung-usb.c
- *
- * Copyright (c) 2012 Samsung Electronics Co., Ltd.
- *              http://www.samsung.com
- *
- * Author: Praveen Paneri <p.paneri@samsung.com>
- *
- * Samsung USB-PHY helper driver with common function calls;
- * interacts with Samsung USB 2.0 PHY controller driver and later
- * with Samsung USB 3.0 PHY driver.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#include <linux/module.h>
-#include <linux/platform_device.h>
-#include <linux/clk.h>
-#include <linux/device.h>
-#include <linux/err.h>
-#include <linux/io.h>
-#include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/usb/samsung_usb_phy.h>
-
-#include "phy-samsung-usb.h"
-
-int samsung_usbphy_parse_dt(struct samsung_usbphy *sphy)
-{
-	struct device_node *usbphy_sys;
-
-	/* Getting node for system controller interface for usb-phy */
-	usbphy_sys = of_get_child_by_name(sphy->dev->of_node, "usbphy-sys");
-	if (!usbphy_sys) {
-		dev_err(sphy->dev, "No sys-controller interface for usb-phy\n");
-		return -ENODEV;
-	}
-
-	sphy->pmuregs = of_iomap(usbphy_sys, 0);
-
-	if (sphy->pmuregs == NULL) {
-		dev_err(sphy->dev, "Can't get usb-phy pmu control register\n");
-		goto err0;
-	}
-
-	sphy->sysreg = of_iomap(usbphy_sys, 1);
-
-	/*
-	 * Not returning error code here, since this situation is not fatal.
-	 * Few SoCs may not have this switch available
-	 */
-	if (sphy->sysreg == NULL)
-		dev_warn(sphy->dev, "Can't get usb-phy sysreg cfg register\n");
-
-	of_node_put(usbphy_sys);
-
-	return 0;
-
-err0:
-	of_node_put(usbphy_sys);
-	return -ENXIO;
-}
-EXPORT_SYMBOL_GPL(samsung_usbphy_parse_dt);
-
-/*
- * Set isolation here for phy.
- * Here 'on = true' would mean USB PHY block is isolated, hence
- * de-activated and vice-versa.
- */
-void samsung_usbphy_set_isolation_4210(struct samsung_usbphy *sphy, bool on)
-{
-	void __iomem *reg = NULL;
-	u32 reg_val;
-	u32 en_mask = 0;
-
-	if (!sphy->pmuregs) {
-		dev_warn(sphy->dev, "Can't set pmu isolation\n");
-		return;
-	}
-
-	if (sphy->phy_type == USB_PHY_TYPE_DEVICE) {
-		reg = sphy->pmuregs + sphy->drv_data->devphy_reg_offset;
-		en_mask = sphy->drv_data->devphy_en_mask;
-	} else if (sphy->phy_type == USB_PHY_TYPE_HOST) {
-		reg = sphy->pmuregs + sphy->drv_data->hostphy_reg_offset;
-		en_mask = sphy->drv_data->hostphy_en_mask;
-	}
-
-	reg_val = readl(reg);
-
-	if (on)
-		reg_val &= ~en_mask;
-	else
-		reg_val |= en_mask;
-
-	writel(reg_val, reg);
-
-	if (sphy->drv_data->cpu_type == TYPE_EXYNOS4X12) {
-		writel(reg_val, sphy->pmuregs + EXYNOS4X12_PHY_HSIC_CTRL0);
-		writel(reg_val, sphy->pmuregs + EXYNOS4X12_PHY_HSIC_CTRL1);
-	}
-}
-EXPORT_SYMBOL_GPL(samsung_usbphy_set_isolation_4210);
-
-/*
- * Configure the mode of working of usb-phy here: HOST/DEVICE.
- */
-void samsung_usbphy_cfg_sel(struct samsung_usbphy *sphy)
-{
-	u32 reg;
-
-	if (!sphy->sysreg) {
-		dev_warn(sphy->dev, "Can't configure specified phy mode\n");
-		return;
-	}
-
-	reg = readl(sphy->sysreg);
-
-	if (sphy->phy_type == USB_PHY_TYPE_DEVICE)
-		reg &= ~EXYNOS_USB20PHY_CFG_HOST_LINK;
-	else if (sphy->phy_type == USB_PHY_TYPE_HOST)
-		reg |= EXYNOS_USB20PHY_CFG_HOST_LINK;
-
-	writel(reg, sphy->sysreg);
-}
-EXPORT_SYMBOL_GPL(samsung_usbphy_cfg_sel);
-
-/*
- * PHYs are different for USB Device and USB Host.
- * This make sure that correct PHY type is selected before
- * any operation on PHY.
- */
-int samsung_usbphy_set_type(struct usb_phy *phy,
-				enum samsung_usb_phy_type phy_type)
-{
-	struct samsung_usbphy *sphy = phy_to_sphy(phy);
-
-	sphy->phy_type = phy_type;
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(samsung_usbphy_set_type);
-
-int samsung_usbphy_rate_to_clksel_64xx(struct samsung_usbphy *sphy,
-							unsigned long rate)
-{
-	unsigned int clksel;
-
-	switch (rate) {
-	case 12 * MHZ:
-		clksel = PHYCLK_CLKSEL_12M;
-		break;
-	case 24 * MHZ:
-		clksel = PHYCLK_CLKSEL_24M;
-		break;
-	case 48 * MHZ:
-		clksel = PHYCLK_CLKSEL_48M;
-		break;
-	default:
-		dev_err(sphy->dev,
-			"Invalid reference clock frequency: %lu\n", rate);
-		return -EINVAL;
-	}
-
-	return clksel;
-}
-EXPORT_SYMBOL_GPL(samsung_usbphy_rate_to_clksel_64xx);
-
-int samsung_usbphy_rate_to_clksel_4x12(struct samsung_usbphy *sphy,
-							unsigned long rate)
-{
-	unsigned int clksel;
-
-	switch (rate) {
-	case 9600 * KHZ:
-		clksel = FSEL_CLKSEL_9600K;
-		break;
-	case 10 * MHZ:
-		clksel = FSEL_CLKSEL_10M;
-		break;
-	case 12 * MHZ:
-		clksel = FSEL_CLKSEL_12M;
-		break;
-	case 19200 * KHZ:
-		clksel = FSEL_CLKSEL_19200K;
-		break;
-	case 20 * MHZ:
-		clksel = FSEL_CLKSEL_20M;
-		break;
-	case 24 * MHZ:
-		clksel = FSEL_CLKSEL_24M;
-		break;
-	case 50 * MHZ:
-		clksel = FSEL_CLKSEL_50M;
-		break;
-	default:
-		dev_err(sphy->dev,
-			"Invalid reference clock frequency: %lu\n", rate);
-		return -EINVAL;
-	}
-
-	return clksel;
-}
-EXPORT_SYMBOL_GPL(samsung_usbphy_rate_to_clksel_4x12);
-
-/*
- * Returns reference clock frequency selection value
- */
-int samsung_usbphy_get_refclk_freq(struct samsung_usbphy *sphy)
-{
-	struct clk *ref_clk;
-	unsigned long rate;
-	int refclk_freq;
-
-	/*
-	 * In exynos5250 USB host and device PHY use
-	 * external crystal clock XXTI
-	 */
-	if (sphy->drv_data->cpu_type == TYPE_EXYNOS5250)
-		ref_clk = clk_get(sphy->dev, "ext_xtal");
-	else
-		ref_clk = clk_get(sphy->dev, "xusbxti");
-	if (IS_ERR(ref_clk)) {
-		dev_err(sphy->dev, "Failed to get reference clock\n");
-		return PTR_ERR(ref_clk);
-	}
-
-	rate = clk_get_rate(ref_clk);
-	refclk_freq = sphy->drv_data->rate_to_clksel(sphy, rate);
-
-	clk_put(ref_clk);
-
-	return refclk_freq;
-}
-EXPORT_SYMBOL_GPL(samsung_usbphy_get_refclk_freq);
diff --git a/drivers/usb/phy/phy-samsung-usb.h b/drivers/usb/phy/phy-samsung-usb.h
deleted file mode 100644
index 4eef45555971..000000000000
--- a/drivers/usb/phy/phy-samsung-usb.h
+++ /dev/null
@@ -1,317 +0,0 @@
-/* linux/drivers/usb/phy/phy-samsung-usb.h
- *
- * Copyright (c) 2012 Samsung Electronics Co., Ltd.
- *              http://www.samsung.com
- *
- * Samsung USB-PHY transceiver; talks to S3C HS OTG controller, EHCI-S5P and
- * OHCI-EXYNOS controllers.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#include <linux/usb/phy.h>
-
-/* Register definitions */
-
-#define SAMSUNG_PHYPWR				(0x00)
-#define PHYPWR_NORMAL_MASK			(0x19 << 0)
-#define PHYPWR_OTG_DISABLE			(0x1 << 4)
-#define PHYPWR_ANALOG_POWERDOWN			(0x1 << 3)
-#define PHYPWR_FORCE_SUSPEND			(0x1 << 1)
-/* For Exynos4 */
-#define PHYPWR_NORMAL_MASK_PHY0			(0x39 << 0)
-#define PHYPWR_SLEEP_PHY0			(0x1 << 5)
-
-#define SAMSUNG_PHYCLK				(0x04)
-#define PHYCLK_MODE_USB11			(0x1 << 6)
-#define PHYCLK_EXT_OSC				(0x1 << 5)
-#define PHYCLK_COMMON_ON_N			(0x1 << 4)
-#define PHYCLK_ID_PULL				(0x1 << 2)
-#define PHYCLK_CLKSEL_MASK			(0x3 << 0)
-#define PHYCLK_CLKSEL_48M			(0x0 << 0)
-#define PHYCLK_CLKSEL_12M			(0x2 << 0)
-#define PHYCLK_CLKSEL_24M			(0x3 << 0)
-
-#define SAMSUNG_RSTCON				(0x08)
-#define RSTCON_PHYLINK_SWRST			(0x1 << 2)
-#define RSTCON_HLINK_SWRST			(0x1 << 1)
-#define RSTCON_SWRST				(0x1 << 0)
-
-/* EXYNOS4X12 */
-#define EXYNOS4X12_PHY_HSIC_CTRL0		(0x04)
-#define EXYNOS4X12_PHY_HSIC_CTRL1		(0x08)
-#define PHYPWR_NORMAL_MASK_HSIC1		(0x7 << 12)
-#define PHYPWR_NORMAL_MASK_HSIC0		(0x7 << 9)
-#define PHYPWR_NORMAL_MASK_PHY1			(0x7 << 6)
-#define RSTCON_HOSTPHY_SWRST			(0xf << 3)
-
-/* EXYNOS5 */
-#define EXYNOS5_PHY_HOST_CTRL0			(0x00)
-#define HOST_CTRL0_PHYSWRSTALL			(0x1 << 31)
-#define HOST_CTRL0_REFCLKSEL_MASK		(0x3 << 19)
-#define HOST_CTRL0_REFCLKSEL_XTAL		(0x0 << 19)
-#define HOST_CTRL0_REFCLKSEL_EXTL		(0x1 << 19)
-#define HOST_CTRL0_REFCLKSEL_CLKCORE		(0x2 << 19)
-#define HOST_CTRL0_FSEL_MASK			(0x7 << 16)
-#define HOST_CTRL0_FSEL(_x)			((_x) << 16)
-#define FSEL_CLKSEL_50M				(0x7)
-#define FSEL_CLKSEL_24M				(0x5)
-#define FSEL_CLKSEL_20M				(0x4)
-#define FSEL_CLKSEL_19200K			(0x3)
-#define FSEL_CLKSEL_12M				(0x2)
-#define FSEL_CLKSEL_10M				(0x1)
-#define FSEL_CLKSEL_9600K			(0x0)
-#define HOST_CTRL0_TESTBURNIN			(0x1 << 11)
-#define HOST_CTRL0_RETENABLE			(0x1 << 10)
-#define HOST_CTRL0_COMMONON_N			(0x1 << 9)
-#define HOST_CTRL0_SIDDQ			(0x1 << 6)
-#define HOST_CTRL0_FORCESLEEP			(0x1 << 5)
-#define HOST_CTRL0_FORCESUSPEND			(0x1 << 4)
-#define HOST_CTRL0_WORDINTERFACE		(0x1 << 3)
-#define HOST_CTRL0_UTMISWRST			(0x1 << 2)
-#define HOST_CTRL0_LINKSWRST			(0x1 << 1)
-#define HOST_CTRL0_PHYSWRST			(0x1 << 0)
-
-#define EXYNOS5_PHY_HOST_TUNE0			(0x04)
-
-#define EXYNOS5_PHY_HSIC_CTRL1			(0x10)
-
-#define EXYNOS5_PHY_HSIC_TUNE1			(0x14)
-
-#define EXYNOS5_PHY_HSIC_CTRL2			(0x20)
-
-#define EXYNOS5_PHY_HSIC_TUNE2			(0x24)
-#define HSIC_CTRL_REFCLKSEL_MASK		(0x3 << 23)
-#define HSIC_CTRL_REFCLKSEL			(0x2 << 23)
-#define HSIC_CTRL_REFCLKDIV_MASK		(0x7f << 16)
-#define HSIC_CTRL_REFCLKDIV(_x)			((_x) << 16)
-#define HSIC_CTRL_REFCLKDIV_12			(0x24 << 16)
-#define HSIC_CTRL_REFCLKDIV_15			(0x1c << 16)
-#define HSIC_CTRL_REFCLKDIV_16			(0x1a << 16)
-#define HSIC_CTRL_REFCLKDIV_19_2		(0x15 << 16)
-#define HSIC_CTRL_REFCLKDIV_20			(0x14 << 16)
-#define HSIC_CTRL_SIDDQ				(0x1 << 6)
-#define HSIC_CTRL_FORCESLEEP			(0x1 << 5)
-#define HSIC_CTRL_FORCESUSPEND			(0x1 << 4)
-#define HSIC_CTRL_WORDINTERFACE			(0x1 << 3)
-#define HSIC_CTRL_UTMISWRST			(0x1 << 2)
-#define HSIC_CTRL_PHYSWRST			(0x1 << 0)
-
-#define EXYNOS5_PHY_HOST_EHCICTRL		(0x30)
-#define HOST_EHCICTRL_ENAINCRXALIGN		(0x1 << 29)
-#define HOST_EHCICTRL_ENAINCR4			(0x1 << 28)
-#define HOST_EHCICTRL_ENAINCR8			(0x1 << 27)
-#define HOST_EHCICTRL_ENAINCR16			(0x1 << 26)
-
-#define EXYNOS5_PHY_HOST_OHCICTRL		(0x34)
-#define HOST_OHCICTRL_SUSPLGCY			(0x1 << 3)
-#define HOST_OHCICTRL_APPSTARTCLK		(0x1 << 2)
-#define HOST_OHCICTRL_CNTSEL			(0x1 << 1)
-#define HOST_OHCICTRL_CLKCKTRST			(0x1 << 0)
-
-#define EXYNOS5_PHY_OTG_SYS			(0x38)
-#define OTG_SYS_PHYLINK_SWRESET			(0x1 << 14)
-#define OTG_SYS_LINKSWRST_UOTG			(0x1 << 13)
-#define OTG_SYS_PHY0_SWRST			(0x1 << 12)
-#define OTG_SYS_REFCLKSEL_MASK			(0x3 << 9)
-#define OTG_SYS_REFCLKSEL_XTAL			(0x0 << 9)
-#define OTG_SYS_REFCLKSEL_EXTL			(0x1 << 9)
-#define OTG_SYS_REFCLKSEL_CLKCORE		(0x2 << 9)
-#define OTG_SYS_IDPULLUP_UOTG			(0x1 << 8)
-#define OTG_SYS_COMMON_ON			(0x1 << 7)
-#define OTG_SYS_FSEL_MASK			(0x7 << 4)
-#define OTG_SYS_FSEL(_x)			((_x) << 4)
-#define OTG_SYS_FORCESLEEP			(0x1 << 3)
-#define OTG_SYS_OTGDISABLE			(0x1 << 2)
-#define OTG_SYS_SIDDQ_UOTG			(0x1 << 1)
-#define OTG_SYS_FORCESUSPEND			(0x1 << 0)
-
-#define EXYNOS5_PHY_OTG_TUNE			(0x40)
-
-/* EXYNOS5: USB 3.0 DRD */
-#define EXYNOS5_DRD_LINKSYSTEM			(0x04)
-#define LINKSYSTEM_FLADJ_MASK			(0x3f << 1)
-#define LINKSYSTEM_FLADJ(_x)			((_x) << 1)
-#define LINKSYSTEM_XHCI_VERSION_CONTROL		(0x1 << 27)
-
-#define EXYNOS5_DRD_PHYUTMI			(0x08)
-#define PHYUTMI_OTGDISABLE			(0x1 << 6)
-#define PHYUTMI_FORCESUSPEND			(0x1 << 1)
-#define PHYUTMI_FORCESLEEP			(0x1 << 0)
-
-#define EXYNOS5_DRD_PHYPIPE			(0x0c)
-
-#define EXYNOS5_DRD_PHYCLKRST			(0x10)
-#define PHYCLKRST_SSC_REFCLKSEL_MASK		(0xff << 23)
-#define PHYCLKRST_SSC_REFCLKSEL(_x)		((_x) << 23)
-#define PHYCLKRST_SSC_RANGE_MASK		(0x03 << 21)
-#define PHYCLKRST_SSC_RANGE(_x)			((_x) << 21)
-#define PHYCLKRST_SSC_EN			(0x1 << 20)
-#define PHYCLKRST_REF_SSP_EN			(0x1 << 19)
-#define PHYCLKRST_REF_CLKDIV2			(0x1 << 18)
-#define PHYCLKRST_MPLL_MULTIPLIER_MASK		(0x7f << 11)
-#define PHYCLKRST_MPLL_MULTIPLIER_100MHZ_REF	(0x19 << 11)
-#define PHYCLKRST_MPLL_MULTIPLIER_50M_REF	(0x02 << 11)
-#define PHYCLKRST_MPLL_MULTIPLIER_24MHZ_REF	(0x68 << 11)
-#define PHYCLKRST_MPLL_MULTIPLIER_20MHZ_REF	(0x7d << 11)
-#define PHYCLKRST_MPLL_MULTIPLIER_19200KHZ_REF	(0x02 << 11)
-#define PHYCLKRST_FSEL_MASK			(0x3f << 5)
-#define PHYCLKRST_FSEL(_x)			((_x) << 5)
-#define PHYCLKRST_FSEL_PAD_100MHZ		(0x27 << 5)
-#define PHYCLKRST_FSEL_PAD_24MHZ		(0x2a << 5)
-#define PHYCLKRST_FSEL_PAD_20MHZ		(0x31 << 5)
-#define PHYCLKRST_FSEL_PAD_19_2MHZ		(0x38 << 5)
-#define PHYCLKRST_RETENABLEN			(0x1 << 4)
-#define PHYCLKRST_REFCLKSEL_MASK		(0x03 << 2)
-#define PHYCLKRST_REFCLKSEL_PAD_REFCLK		(0x2 << 2)
-#define PHYCLKRST_REFCLKSEL_EXT_REFCLK		(0x3 << 2)
-#define PHYCLKRST_PORTRESET			(0x1 << 1)
-#define PHYCLKRST_COMMONONN			(0x1 << 0)
-
-#define EXYNOS5_DRD_PHYREG0			(0x14)
-
-#define EXYNOS5_DRD_PHYREG1			(0x18)
-
-#define EXYNOS5_DRD_PHYPARAM0			(0x1c)
-#define PHYPARAM0_REF_USE_PAD			(0x1 << 31)
-#define PHYPARAM0_REF_LOSLEVEL_MASK		(0x1f << 26)
-#define PHYPARAM0_REF_LOSLEVEL			(0x9 << 26)
-
-#define EXYNOS5_DRD_PHYPARAM1			(0x20)
-#define PHYPARAM1_PCS_TXDEEMPH_MASK		(0x1f << 0)
-#define PHYPARAM1_PCS_TXDEEMPH			(0x1c)
-
-#define EXYNOS5_DRD_PHYTERM			(0x24)
-
-#define EXYNOS5_DRD_PHYTEST			(0x28)
-#define PHYTEST_POWERDOWN_SSP			(0x1 << 3)
-#define PHYTEST_POWERDOWN_HSP			(0x1 << 2)
-
-#define EXYNOS5_DRD_PHYADP			(0x2c)
-
-#define EXYNOS5_DRD_PHYBATCHG			(0x30)
-#define PHYBATCHG_UTMI_CLKSEL			(0x1 << 2)
-
-#define EXYNOS5_DRD_PHYRESUME			(0x34)
-
-#define EXYNOS5_DRD_LINKPORT			(0x44)
-
-#ifndef MHZ
-#define MHZ (1000*1000)
-#endif
-
-#ifndef KHZ
-#define KHZ (1000)
-#endif
-
-#define EXYNOS_USBHOST_PHY_CTRL_OFFSET		(0x4)
-#define S3C64XX_USBPHY_ENABLE			(0x1 << 16)
-#define EXYNOS_USBPHY_ENABLE			(0x1 << 0)
-#define EXYNOS_USB20PHY_CFG_HOST_LINK		(0x1 << 0)
-
-enum samsung_cpu_type {
-	TYPE_S3C64XX,
-	TYPE_EXYNOS4210,
-	TYPE_EXYNOS4X12,
-	TYPE_EXYNOS5250,
-};
-
-struct samsung_usbphy;
-
-/*
- * struct samsung_usbphy_drvdata - driver data for various SoC variants
- * @cpu_type: machine identifier
- * @devphy_en_mask: device phy enable mask for PHY CONTROL register
- * @hostphy_en_mask: host phy enable mask for PHY CONTROL register
- * @devphy_reg_offset: offset to DEVICE PHY CONTROL register from
- *		       mapped address of system controller.
- * @hostphy_reg_offset: offset to HOST PHY CONTROL register from
- *		       mapped address of system controller.
- *
- *	Here we have a separate mask for device type phy.
- *	Having different masks for host and device type phy helps
- *	in setting independent masks in case of SoCs like S5PV210,
- *	in which PHY0 and PHY1 enable bits belong to same register
- *	placed at position 0 and 1 respectively.
- *	Although for newer SoCs like exynos these bits belong to
- *	different registers altogether placed at position 0.
- */
-struct samsung_usbphy_drvdata {
-	int cpu_type;
-	int devphy_en_mask;
-	int hostphy_en_mask;
-	u32 devphy_reg_offset;
-	u32 hostphy_reg_offset;
-	int (*rate_to_clksel)(struct samsung_usbphy *, unsigned long);
-	void (*set_isolation)(struct samsung_usbphy *, bool);
-	void (*phy_enable)(struct samsung_usbphy *);
-	void (*phy_disable)(struct samsung_usbphy *);
-};
-
-/*
- * struct samsung_usbphy - transceiver driver state
- * @phy: transceiver structure
- * @plat: platform data
- * @dev: The parent device supplied to the probe function
- * @clk: usb phy clock
- * @regs: usb phy controller registers memory base
- * @pmuregs: USB device PHY_CONTROL register memory base
- * @sysreg: USB2.0 PHY_CFG register memory base
- * @ref_clk_freq: reference clock frequency selection
- * @drv_data: driver data available for different SoCs
- * @phy_type: Samsung SoCs specific phy types:	#HOST
- *						#DEVICE
- * @phy_usage: usage count for phy
- * @lock: lock for phy operations
- */
-struct samsung_usbphy {
-	struct usb_phy	phy;
-	struct samsung_usbphy_data *plat;
-	struct device	*dev;
-	struct clk	*clk;
-	void __iomem	*regs;
-	void __iomem	*pmuregs;
-	void __iomem	*sysreg;
-	int		ref_clk_freq;
-	const struct samsung_usbphy_drvdata *drv_data;
-	enum samsung_usb_phy_type phy_type;
-	atomic_t	phy_usage;
-	spinlock_t	lock;
-};
-
-#define phy_to_sphy(x)		container_of((x), struct samsung_usbphy, phy)
-
-static const struct of_device_id samsung_usbphy_dt_match[];
-
-static inline const struct samsung_usbphy_drvdata
-*samsung_usbphy_get_driver_data(struct platform_device *pdev)
-{
-	if (pdev->dev.of_node) {
-		const struct of_device_id *match;
-		match = of_match_node(samsung_usbphy_dt_match,
-							pdev->dev.of_node);
-		return match->data;
-	}
-
-	return (struct samsung_usbphy_drvdata *)
-				platform_get_device_id(pdev)->driver_data;
-}
-
-extern int samsung_usbphy_parse_dt(struct samsung_usbphy *sphy);
-extern void samsung_usbphy_set_isolation_4210(struct samsung_usbphy *sphy,
-								bool on);
-extern void samsung_usbphy_cfg_sel(struct samsung_usbphy *sphy);
-extern int samsung_usbphy_set_type(struct usb_phy *phy,
-					enum samsung_usb_phy_type phy_type);
-extern int samsung_usbphy_get_refclk_freq(struct samsung_usbphy *sphy);
-extern int samsung_usbphy_rate_to_clksel_64xx(struct samsung_usbphy *sphy,
-							unsigned long rate);
-extern int samsung_usbphy_rate_to_clksel_4x12(struct samsung_usbphy *sphy,
-							unsigned long rate);
diff --git a/include/linux/platform_data/samsung-usbphy.h b/include/linux/platform_data/samsung-usbphy.h
deleted file mode 100644
index 1bd24cba982b..000000000000
--- a/include/linux/platform_data/samsung-usbphy.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Copyright (C) 2012 Samsung Electronics Co.Ltd
- *		http://www.samsung.com/
- * Author: Praveen Paneri <p.paneri@samsung.com>
- *
- * Defines platform data for samsung usb phy driver.
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
-
-#ifndef __SAMSUNG_USBPHY_PLATFORM_H
-#define __SAMSUNG_USBPHY_PLATFORM_H
-
-/**
- * samsung_usbphy_data - Platform data for USB PHY driver.
- * @pmu_isolation: Function to control usb phy isolation in PMU.
- */
-struct samsung_usbphy_data {
-	void (*pmu_isolation)(int on);
-};
-
-extern void samsung_usbphy_set_pdata(struct samsung_usbphy_data *pd);
-
-#endif /* __SAMSUNG_USBPHY_PLATFORM_H */
-- 
cgit v1.2.3


From 9067359faf890b3a18ab38c792d458fba77b32b4 Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Tue, 2 Sep 2014 02:03:12 -0700
Subject: Revert "leds: convert blink timer to workqueue"

This reverts commit 8b37e1bef5a6b60e949e28a4db3006e4b00bd758.

It's broken as it changes led_blink_set() in a way that it can now sleep
(while synchronously waiting for workqueue to be cancelled). That's a
problem, because it's possible that this function gets called from atomic
context (tpt_trig_timer() takes a readlock and thus disables preemption).

This has been brought up 3 weeks ago already [1] but no proper fix has
materialized, and I keep seeing the problem since 3.17-rc1.

[1] https://lkml.org/lkml/2014/8/16/128

 BUG: sleeping function called from invalid context at kernel/workqueue.c:2650
 in_atomic(): 1, irqs_disabled(): 0, pid: 2335, name: wpa_supplicant
 5 locks held by wpa_supplicant/2335:
  #0:  (rtnl_mutex){+.+.+.}, at: [<ffffffff814c7c92>] rtnl_lock+0x12/0x20
  #1:  (&wdev->mtx){+.+.+.}, at: [<ffffffffc06e649c>] cfg80211_mgd_wext_siwessid+0x5c/0x180 [cfg80211]
  #2:  (&local->mtx){+.+.+.}, at: [<ffffffffc0817dea>] ieee80211_prep_connection+0x17a/0x9a0 [mac80211]
  #3:  (&local->chanctx_mtx){+.+.+.}, at: [<ffffffffc08081ed>] ieee80211_vif_use_channel+0x5d/0x2a0 [mac80211]
  #4:  (&trig->leddev_list_lock){.+.+..}, at: [<ffffffffc081e68c>] tpt_trig_timer+0xec/0x170 [mac80211]
 CPU: 0 PID: 2335 Comm: wpa_supplicant Not tainted 3.17.0-rc3 #1
 Hardware name: LENOVO 7470BN2/7470BN2, BIOS 6DET38WW (2.02 ) 12/19/2008
  ffff8800360b5a50 ffff8800751f76d8 ffffffff8159e97f ffff8800360b5a30
  ffff8800751f76e8 ffffffff810739a5 ffff8800751f77b0 ffffffff8106862f
  ffffffff810685d0 0aa2209200000000 ffff880000000004 ffff8800361c59d0
 Call Trace:
  [<ffffffff8159e97f>] dump_stack+0x4d/0x66
  [<ffffffff810739a5>] __might_sleep+0xe5/0x120
  [<ffffffff8106862f>] flush_work+0x5f/0x270
  [<ffffffff810685d0>] ? mod_delayed_work_on+0x80/0x80
  [<ffffffff810945ca>] ? mark_held_locks+0x6a/0x90
  [<ffffffff81068a5f>] ? __cancel_work_timer+0x6f/0x100
  [<ffffffff810946ed>] ? trace_hardirqs_on_caller+0xfd/0x1c0
  [<ffffffff81068a6b>] __cancel_work_timer+0x7b/0x100
  [<ffffffff81068b0e>] cancel_delayed_work_sync+0xe/0x10
  [<ffffffff8147cf3b>] led_blink_set+0x1b/0x40
  [<ffffffffc081e6b0>] tpt_trig_timer+0x110/0x170 [mac80211]
  [<ffffffffc081ecdd>] ieee80211_mod_tpt_led_trig+0x9d/0x160 [mac80211]
  [<ffffffffc07e4278>] __ieee80211_recalc_idle+0x98/0x140 [mac80211]
  [<ffffffffc07e59ce>] ieee80211_idle_off+0xe/0x10 [mac80211]
  [<ffffffffc0804e5b>] ieee80211_add_chanctx+0x3b/0x220 [mac80211]
  [<ffffffffc08062e4>] ieee80211_new_chanctx+0x44/0xf0 [mac80211]
  [<ffffffffc080838a>] ieee80211_vif_use_channel+0x1fa/0x2a0 [mac80211]
  [<ffffffffc0817df8>] ieee80211_prep_connection+0x188/0x9a0 [mac80211]
  [<ffffffffc081c246>] ieee80211_mgd_auth+0x256/0x2e0 [mac80211]
  [<ffffffffc07eab33>] ieee80211_auth+0x13/0x20 [mac80211]
  [<ffffffffc06cb006>] cfg80211_mlme_auth+0x106/0x270 [cfg80211]
  [<ffffffffc06ce085>] cfg80211_conn_do_work+0x155/0x3b0 [cfg80211]
  [<ffffffffc06cf670>] cfg80211_connect+0x3f0/0x540 [cfg80211]
  [<ffffffffc06e6148>] cfg80211_mgd_wext_connect+0x158/0x1f0 [cfg80211]
  [<ffffffffc06e651e>] cfg80211_mgd_wext_siwessid+0xde/0x180 [cfg80211]
  [<ffffffffc06e36c0>] ? cfg80211_wext_giwessid+0x50/0x50 [cfg80211]
  [<ffffffffc06e36dd>] cfg80211_wext_siwessid+0x1d/0x40 [cfg80211]
  [<ffffffff81584d0c>] ioctl_standard_iw_point+0x14c/0x3e0
  [<ffffffff810946ed>] ? trace_hardirqs_on_caller+0xfd/0x1c0
  [<ffffffff8158502a>] ioctl_standard_call+0x8a/0xd0
  [<ffffffff81584fa0>] ? ioctl_standard_iw_point+0x3e0/0x3e0
  [<ffffffff81584b76>] wireless_process_ioctl.constprop.10+0xb6/0x100
  [<ffffffff8158521d>] wext_handle_ioctl+0x5d/0xb0
  [<ffffffff814cfb29>] dev_ioctl+0x329/0x620
  [<ffffffff810946ed>] ? trace_hardirqs_on_caller+0xfd/0x1c0
  [<ffffffff8149c7f2>] sock_ioctl+0x142/0x2e0
  [<ffffffff811b0140>] do_vfs_ioctl+0x300/0x520
  [<ffffffff815a67fb>] ? sysret_check+0x1b/0x56
  [<ffffffff810946ed>] ? trace_hardirqs_on_caller+0xfd/0x1c0
  [<ffffffff811b03e1>] SyS_ioctl+0x81/0xa0
  [<ffffffff815a67d6>] system_call_fastpath+0x1a/0x1f
 wlan0: send auth to 00:0b:6b:3c:8c:e4 (try 1/3)
 wlan0: authenticated
 wlan0: associate with 00:0b:6b:3c:8c:e4 (try 1/3)
 wlan0: RX AssocResp from 00:0b:6b:3c:8c:e4 (capab=0x431 status=0 aid=2)
 wlan0: associated
 IPv6: ADDRCONF(NETDEV_CHANGE): wlan0: link becomes ready
 cfg80211: Calling CRDA for country: NA
 wlan0: Limiting TX power to 27 (27 - 0) dBm as advertised by 00:0b:6b:3c:8c:e4

 =================================
 [ INFO: inconsistent lock state ]
 3.17.0-rc3 #1 Not tainted
 ---------------------------------
 inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage.
 swapper/0/0 [HC0[0]:SC1[1]:HE1:SE0] takes:
  ((&(&led_cdev->blink_work)->work)){+.?...}, at: [<ffffffff810685d0>] flush_work+0x0/0x270
 {SOFTIRQ-ON-W} state was registered at:
   [<ffffffff81094dbe>] __lock_acquire+0x30e/0x1a30
   [<ffffffff81096c81>] lock_acquire+0x91/0x110
   [<ffffffff81068608>] flush_work+0x38/0x270
   [<ffffffff81068a6b>] __cancel_work_timer+0x7b/0x100
   [<ffffffff81068b0e>] cancel_delayed_work_sync+0xe/0x10
   [<ffffffff8147cf3b>] led_blink_set+0x1b/0x40
   [<ffffffffc081e6b0>] tpt_trig_timer+0x110/0x170 [mac80211]
   [<ffffffffc081ecdd>] ieee80211_mod_tpt_led_trig+0x9d/0x160 [mac80211]
   [<ffffffffc07e4278>] __ieee80211_recalc_idle+0x98/0x140 [mac80211]
   [<ffffffffc07e59ce>] ieee80211_idle_off+0xe/0x10 [mac80211]
   [<ffffffffc0804e5b>] ieee80211_add_chanctx+0x3b/0x220 [mac80211]
   [<ffffffffc08062e4>] ieee80211_new_chanctx+0x44/0xf0 [mac80211]
   [<ffffffffc080838a>] ieee80211_vif_use_channel+0x1fa/0x2a0 [mac80211]
   [<ffffffffc0817df8>] ieee80211_prep_connection+0x188/0x9a0 [mac80211]
   [<ffffffffc081c246>] ieee80211_mgd_auth+0x256/0x2e0 [mac80211]
   [<ffffffffc07eab33>] ieee80211_auth+0x13/0x20 [mac80211]
   [<ffffffffc06cb006>] cfg80211_mlme_auth+0x106/0x270 [cfg80211]
   [<ffffffffc06ce085>] cfg80211_conn_do_work+0x155/0x3b0 [cfg80211]
   [<ffffffffc06cf670>] cfg80211_connect+0x3f0/0x540 [cfg80211]
   [<ffffffffc06e6148>] cfg80211_mgd_wext_connect+0x158/0x1f0 [cfg80211]
   [<ffffffffc06e651e>] cfg80211_mgd_wext_siwessid+0xde/0x180 [cfg80211]
   [<ffffffffc06e36dd>] cfg80211_wext_siwessid+0x1d/0x40 [cfg80211]
   [<ffffffff81584d0c>] ioctl_standard_iw_point+0x14c/0x3e0
   [<ffffffff8158502a>] ioctl_standard_call+0x8a/0xd0
   [<ffffffff81584b76>] wireless_process_ioctl.constprop.10+0xb6/0x100
   [<ffffffff8158521d>] wext_handle_ioctl+0x5d/0xb0
   [<ffffffff814cfb29>] dev_ioctl+0x329/0x620
   [<ffffffff8149c7f2>] sock_ioctl+0x142/0x2e0
   [<ffffffff811b0140>] do_vfs_ioctl+0x300/0x520
   [<ffffffff811b03e1>] SyS_ioctl+0x81/0xa0
   [<ffffffff815a67d6>] system_call_fastpath+0x1a/0x1f
 irq event stamp: 493416
 hardirqs last  enabled at (493416): [<ffffffff81068a5f>] __cancel_work_timer+0x6f/0x100
 hardirqs last disabled at (493415): [<ffffffff81067e9f>] try_to_grab_pending+0x1f/0x160
 softirqs last  enabled at (493408): [<ffffffff81053ced>] _local_bh_enable+0x1d/0x50
 softirqs last disabled at (493409): [<ffffffff81054c75>] irq_exit+0xa5/0xb0

 other info that might help us debug this:
  Possible unsafe locking scenario:

        CPU0
        ----
   lock((&(&led_cdev->blink_work)->work));
   <Interrupt>
     lock((&(&led_cdev->blink_work)->work));

  *** DEADLOCK ***

 2 locks held by swapper/0/0:
  #0:  (((&tpt_trig->timer))){+.-...}, at: [<ffffffff810b4c50>] call_timer_fn+0x0/0x180
  #1:  (&trig->leddev_list_lock){.+.?..}, at: [<ffffffffc081e68c>] tpt_trig_timer+0xec/0x170 [mac80211]

 stack backtrace:
 CPU: 0 PID: 0 Comm: swapper/0 Not tainted 3.17.0-rc3 #1
 Hardware name: LENOVO 7470BN2/7470BN2, BIOS 6DET38WW (2.02 ) 12/19/2008
  ffffffff8246eb30 ffff88007c203b00 ffffffff8159e97f ffffffff81a194c0
  ffff88007c203b50 ffffffff81599c29 0000000000000001 ffffffff00000001
  ffff880000000000 0000000000000006 ffffffff81a194c0 ffffffff81093ad0
 Call Trace:
  <IRQ>  [<ffffffff8159e97f>] dump_stack+0x4d/0x66
  [<ffffffff81599c29>] print_usage_bug+0x1f4/0x205
  [<ffffffff81093ad0>] ? check_usage_backwards+0x140/0x140
  [<ffffffff810944d3>] mark_lock+0x223/0x2b0
  [<ffffffff81094d60>] __lock_acquire+0x2b0/0x1a30
  [<ffffffff81096c81>] lock_acquire+0x91/0x110
  [<ffffffff810685d0>] ? mod_delayed_work_on+0x80/0x80
  [<ffffffffc081e5a0>] ? __ieee80211_get_rx_led_name+0x10/0x10 [mac80211]
  [<ffffffff81068608>] flush_work+0x38/0x270
  [<ffffffff810685d0>] ? mod_delayed_work_on+0x80/0x80
  [<ffffffff810945ca>] ? mark_held_locks+0x6a/0x90
  [<ffffffff81068a5f>] ? __cancel_work_timer+0x6f/0x100
  [<ffffffffc081e5a0>] ? __ieee80211_get_rx_led_name+0x10/0x10 [mac80211]
  [<ffffffff8109469d>] ? trace_hardirqs_on_caller+0xad/0x1c0
  [<ffffffffc081e5a0>] ? __ieee80211_get_rx_led_name+0x10/0x10 [mac80211]
  [<ffffffff81068a6b>] __cancel_work_timer+0x7b/0x100
  [<ffffffff81068b0e>] cancel_delayed_work_sync+0xe/0x10
  [<ffffffff8147cf3b>] led_blink_set+0x1b/0x40
  [<ffffffffc081e6b0>] tpt_trig_timer+0x110/0x170 [mac80211]
  [<ffffffff810b4cc5>] call_timer_fn+0x75/0x180
  [<ffffffff810b4c50>] ? process_timeout+0x10/0x10
  [<ffffffffc081e5a0>] ? __ieee80211_get_rx_led_name+0x10/0x10 [mac80211]
  [<ffffffff810b50ac>] run_timer_softirq+0x1fc/0x2f0
  [<ffffffff81054805>] __do_softirq+0x115/0x2e0
  [<ffffffff81054c75>] irq_exit+0xa5/0xb0
  [<ffffffff810049b3>] do_IRQ+0x53/0xf0
  [<ffffffff815a74af>] common_interrupt+0x6f/0x6f
  <EOI>  [<ffffffff8147b56e>] ? cpuidle_enter_state+0x6e/0x180
  [<ffffffff8147b732>] cpuidle_enter+0x12/0x20
  [<ffffffff8108bba0>] cpu_startup_entry+0x330/0x360
  [<ffffffff8158fb51>] rest_init+0xc1/0xd0
  [<ffffffff8158fa90>] ? csum_partial_copy_generic+0x170/0x170
  [<ffffffff81af3ff2>] start_kernel+0x44f/0x45a
  [<ffffffff81af399c>] ? set_init_arg+0x53/0x53
  [<ffffffff81af35ad>] x86_64_start_reservations+0x2a/0x2c
  [<ffffffff81af36a0>] x86_64_start_kernel+0xf1/0xf4

Cc: Vincent Donnefort <vdonnefort@gmail.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Bryan Wu <cooloney@gmail.com>
---
 drivers/leds/led-class.c | 14 +++++++-------
 drivers/leds/led-core.c  | 11 +++++------
 include/linux/leds.h     |  3 ++-
 3 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c
index 129729d35478..aa29198fca3e 100644
--- a/drivers/leds/led-class.c
+++ b/drivers/leds/led-class.c
@@ -15,10 +15,10 @@
 #include <linux/list.h>
 #include <linux/spinlock.h>
 #include <linux/device.h>
+#include <linux/timer.h>
 #include <linux/err.h>
 #include <linux/ctype.h>
 #include <linux/leds.h>
-#include <linux/workqueue.h>
 #include "leds.h"
 
 static struct class *leds_class;
@@ -97,10 +97,9 @@ static const struct attribute_group *led_groups[] = {
 	NULL,
 };
 
-static void led_work_function(struct work_struct *ws)
+static void led_timer_function(unsigned long data)
 {
-	struct led_classdev *led_cdev =
-		container_of(ws, struct led_classdev, blink_work.work);
+	struct led_classdev *led_cdev = (void *)data;
 	unsigned long brightness;
 	unsigned long delay;
 
@@ -144,8 +143,7 @@ static void led_work_function(struct work_struct *ws)
 		}
 	}
 
-	queue_delayed_work(system_wq, &led_cdev->blink_work,
-			   msecs_to_jiffies(delay));
+	mod_timer(&led_cdev->blink_timer, jiffies + msecs_to_jiffies(delay));
 }
 
 static void set_brightness_delayed(struct work_struct *ws)
@@ -233,7 +231,9 @@ int led_classdev_register(struct device *parent, struct led_classdev *led_cdev)
 
 	INIT_WORK(&led_cdev->set_brightness_work, set_brightness_delayed);
 
-	INIT_DELAYED_WORK(&led_cdev->blink_work, led_work_function);
+	init_timer(&led_cdev->blink_timer);
+	led_cdev->blink_timer.function = led_timer_function;
+	led_cdev->blink_timer.data = (unsigned long)led_cdev;
 
 #ifdef CONFIG_LEDS_TRIGGERS
 	led_trigger_set_default(led_cdev);
diff --git a/drivers/leds/led-core.c b/drivers/leds/led-core.c
index 4bb116867b88..71b40d3bf776 100644
--- a/drivers/leds/led-core.c
+++ b/drivers/leds/led-core.c
@@ -16,7 +16,6 @@
 #include <linux/module.h>
 #include <linux/rwsem.h>
 #include <linux/leds.h>
-#include <linux/workqueue.h>
 #include "leds.h"
 
 DECLARE_RWSEM(leds_list_lock);
@@ -52,7 +51,7 @@ static void led_set_software_blink(struct led_classdev *led_cdev,
 		return;
 	}
 
-	queue_delayed_work(system_wq, &led_cdev->blink_work, 1);
+	mod_timer(&led_cdev->blink_timer, jiffies + 1);
 }
 
 
@@ -76,7 +75,7 @@ void led_blink_set(struct led_classdev *led_cdev,
 		   unsigned long *delay_on,
 		   unsigned long *delay_off)
 {
-	cancel_delayed_work_sync(&led_cdev->blink_work);
+	del_timer_sync(&led_cdev->blink_timer);
 
 	led_cdev->flags &= ~LED_BLINK_ONESHOT;
 	led_cdev->flags &= ~LED_BLINK_ONESHOT_STOP;
@@ -91,7 +90,7 @@ void led_blink_set_oneshot(struct led_classdev *led_cdev,
 			   int invert)
 {
 	if ((led_cdev->flags & LED_BLINK_ONESHOT) &&
-	     delayed_work_pending(&led_cdev->blink_work))
+	     timer_pending(&led_cdev->blink_timer))
 		return;
 
 	led_cdev->flags |= LED_BLINK_ONESHOT;
@@ -108,7 +107,7 @@ EXPORT_SYMBOL(led_blink_set_oneshot);
 
 void led_stop_software_blink(struct led_classdev *led_cdev)
 {
-	cancel_delayed_work_sync(&led_cdev->blink_work);
+	del_timer_sync(&led_cdev->blink_timer);
 	led_cdev->blink_delay_on = 0;
 	led_cdev->blink_delay_off = 0;
 }
@@ -117,7 +116,7 @@ EXPORT_SYMBOL_GPL(led_stop_software_blink);
 void led_set_brightness(struct led_classdev *led_cdev,
 			enum led_brightness brightness)
 {
-	/* delay brightness setting if need to stop soft-blink work */
+	/* delay brightness setting if need to stop soft-blink timer */
 	if (led_cdev->blink_delay_on || led_cdev->blink_delay_off) {
 		led_cdev->delayed_set_value = brightness;
 		schedule_work(&led_cdev->set_brightness_work);
diff --git a/include/linux/leds.h b/include/linux/leds.h
index 6a599dce7f9d..e43686472197 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -15,6 +15,7 @@
 #include <linux/list.h>
 #include <linux/spinlock.h>
 #include <linux/rwsem.h>
+#include <linux/timer.h>
 #include <linux/workqueue.h>
 
 struct device;
@@ -68,7 +69,7 @@ struct led_classdev {
 	const char		*default_trigger;	/* Trigger to use */
 
 	unsigned long		 blink_delay_on, blink_delay_off;
-	struct delayed_work	 blink_work;
+	struct timer_list	 blink_timer;
 	int			 blink_brightness;
 
 	struct work_struct	set_brightness_work;
-- 
cgit v1.2.3


From 5835d96e9ce4efdba8c6cefffc2f1575925456de Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 2 Sep 2014 14:46:04 -0400
Subject: percpu: implement [__]alloc_percpu_gfp()

Now that pcpu_alloc_area() can allocate only from populated areas,
it's easy to add atomic allocation support to [__]alloc_percpu().
Update pcpu_alloc() so that it accepts @gfp and skips all the blocking
operations and allocates only from the populated areas if @gfp doesn't
contain GFP_KERNEL.  New interface functions [__]alloc_percpu_gfp()
are added.

While this means that atomic allocations are possible, this isn't
complete yet as there's no mechanism to ensure that certain amount of
populated areas is kept available and atomic allocations may keep
failing under certain conditions.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/percpu.h |  9 +++++--
 mm/percpu.c            | 64 ++++++++++++++++++++++++++++++--------------------
 2 files changed, 46 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 6f61b61b7996..d1b416da25ed 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -122,11 +122,16 @@ extern void __init setup_per_cpu_areas(void);
 #endif
 extern void __init percpu_init_late(void);
 
+extern void __percpu *__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp);
 extern void __percpu *__alloc_percpu(size_t size, size_t align);
 extern void free_percpu(void __percpu *__pdata);
 extern phys_addr_t per_cpu_ptr_to_phys(void *addr);
 
-#define alloc_percpu(type)	\
-	(typeof(type) __percpu *)__alloc_percpu(sizeof(type), __alignof__(type))
+#define alloc_percpu_gfp(type, gfp)					\
+	(typeof(type) __percpu *)__alloc_percpu_gfp(sizeof(type),	\
+						__alignof__(type), gfp)
+#define alloc_percpu(type)						\
+	(typeof(type) __percpu *)__alloc_percpu(sizeof(type),		\
+						__alignof__(type))
 
 #endif /* __LINUX_PERCPU_H */
diff --git a/mm/percpu.c b/mm/percpu.c
index 577d84fb3002..c52b93117dc2 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -151,11 +151,6 @@ static struct pcpu_chunk *pcpu_first_chunk;
 static struct pcpu_chunk *pcpu_reserved_chunk;
 static int pcpu_reserved_chunk_limit;
 
-/*
- * Free path accesses and alters only the index data structures and can be
- * safely called from atomic context.  When memory needs to be returned to
- * the system, free path schedules reclaim_work.
- */
 static DEFINE_SPINLOCK(pcpu_lock);	/* all internal data structures */
 static DEFINE_MUTEX(pcpu_alloc_mutex);	/* chunk create/destroy, [de]pop */
 
@@ -727,20 +722,21 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr)
  * @size: size of area to allocate in bytes
  * @align: alignment of area (max PAGE_SIZE)
  * @reserved: allocate from the reserved chunk if available
+ * @gfp: allocation flags
  *
- * Allocate percpu area of @size bytes aligned at @align.
- *
- * CONTEXT:
- * Does GFP_KERNEL allocation.
+ * Allocate percpu area of @size bytes aligned at @align.  If @gfp doesn't
+ * contain %GFP_KERNEL, the allocation is atomic.
  *
  * RETURNS:
  * Percpu pointer to the allocated area on success, NULL on failure.
  */
-static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved)
+static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
+				 gfp_t gfp)
 {
 	static int warn_limit = 10;
 	struct pcpu_chunk *chunk;
 	const char *err;
+	bool is_atomic = !(gfp & GFP_KERNEL);
 	int slot, off, new_alloc, cpu, ret;
 	unsigned long flags;
 	void __percpu *ptr;
@@ -773,14 +769,15 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved)
 
 		while ((new_alloc = pcpu_need_to_extend(chunk))) {
 			spin_unlock_irqrestore(&pcpu_lock, flags);
-			if (pcpu_extend_area_map(chunk, new_alloc) < 0) {
+			if (is_atomic ||
+			    pcpu_extend_area_map(chunk, new_alloc) < 0) {
 				err = "failed to extend area map of reserved chunk";
 				goto fail;
 			}
 			spin_lock_irqsave(&pcpu_lock, flags);
 		}
 
-		off = pcpu_alloc_area(chunk, size, align, false);
+		off = pcpu_alloc_area(chunk, size, align, is_atomic);
 		if (off >= 0)
 			goto area_found;
 
@@ -797,6 +794,8 @@ restart:
 
 			new_alloc = pcpu_need_to_extend(chunk);
 			if (new_alloc) {
+				if (is_atomic)
+					continue;
 				spin_unlock_irqrestore(&pcpu_lock, flags);
 				if (pcpu_extend_area_map(chunk,
 							 new_alloc) < 0) {
@@ -811,7 +810,7 @@ restart:
 				goto restart;
 			}
 
-			off = pcpu_alloc_area(chunk, size, align, false);
+			off = pcpu_alloc_area(chunk, size, align, is_atomic);
 			if (off >= 0)
 				goto area_found;
 		}
@@ -824,6 +823,9 @@ restart:
 	 * tasks to create chunks simultaneously.  Serialize and create iff
 	 * there's still no empty chunk after grabbing the mutex.
 	 */
+	if (is_atomic)
+		goto fail;
+
 	mutex_lock(&pcpu_alloc_mutex);
 
 	if (list_empty(&pcpu_slot[pcpu_nr_slots - 1])) {
@@ -846,7 +848,7 @@ area_found:
 	spin_unlock_irqrestore(&pcpu_lock, flags);
 
 	/* populate if not all pages are already there */
-	if (true) {
+	if (!is_atomic) {
 		int page_start, page_end, rs, re;
 
 		mutex_lock(&pcpu_alloc_mutex);
@@ -884,9 +886,9 @@ area_found:
 fail_unlock:
 	spin_unlock_irqrestore(&pcpu_lock, flags);
 fail:
-	if (warn_limit) {
-		pr_warning("PERCPU: allocation failed, size=%zu align=%zu, "
-			   "%s\n", size, align, err);
+	if (!is_atomic && warn_limit) {
+		pr_warning("PERCPU: allocation failed, size=%zu align=%zu atomic=%d, %s\n",
+			   size, align, is_atomic, err);
 		dump_stack();
 		if (!--warn_limit)
 			pr_info("PERCPU: limit reached, disable warning\n");
@@ -895,22 +897,34 @@ fail:
 }
 
 /**
- * __alloc_percpu - allocate dynamic percpu area
+ * __alloc_percpu_gfp - allocate dynamic percpu area
  * @size: size of area to allocate in bytes
  * @align: alignment of area (max PAGE_SIZE)
+ * @gfp: allocation flags
  *
- * Allocate zero-filled percpu area of @size bytes aligned at @align.
- * Might sleep.  Might trigger writeouts.
- *
- * CONTEXT:
- * Does GFP_KERNEL allocation.
+ * Allocate zero-filled percpu area of @size bytes aligned at @align.  If
+ * @gfp doesn't contain %GFP_KERNEL, the allocation doesn't block and can
+ * be called from any context but is a lot more likely to fail.
  *
  * RETURNS:
  * Percpu pointer to the allocated area on success, NULL on failure.
  */
+void __percpu *__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp)
+{
+	return pcpu_alloc(size, align, false, gfp);
+}
+EXPORT_SYMBOL_GPL(__alloc_percpu_gfp);
+
+/**
+ * __alloc_percpu - allocate dynamic percpu area
+ * @size: size of area to allocate in bytes
+ * @align: alignment of area (max PAGE_SIZE)
+ *
+ * Equivalent to __alloc_percpu_gfp(size, align, %GFP_KERNEL).
+ */
 void __percpu *__alloc_percpu(size_t size, size_t align)
 {
-	return pcpu_alloc(size, align, false);
+	return pcpu_alloc(size, align, false, GFP_KERNEL);
 }
 EXPORT_SYMBOL_GPL(__alloc_percpu);
 
@@ -932,7 +946,7 @@ EXPORT_SYMBOL_GPL(__alloc_percpu);
  */
 void __percpu *__alloc_reserved_percpu(size_t size, size_t align)
 {
-	return pcpu_alloc(size, align, true);
+	return pcpu_alloc(size, align, true, GFP_KERNEL);
 }
 
 /**
-- 
cgit v1.2.3


From 1a4d76076cda69b0abf15463a8cebc172406da25 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 2 Sep 2014 14:46:05 -0400
Subject: percpu: implement asynchronous chunk population

The percpu allocator now supports atomic allocations by only
allocating from already populated areas but the mechanism to ensure
that there's adequate amount of populated areas was missing.

This patch expands pcpu_balance_work so that in addition to freeing
excess free chunks it also populates chunks to maintain an adequate
level of populated areas.  pcpu_alloc() schedules pcpu_balance_work if
the amount of free populated areas is too low or after an atomic
allocation failure.

* PERPCU_DYNAMIC_RESERVE is increased by two pages to account for
  PCPU_EMPTY_POP_PAGES_LOW.

* pcpu_async_enabled is added to gate both async jobs -
  chunk->map_extend_work and pcpu_balance_work - so that we don't end
  up scheduling them while the needed subsystems aren't up yet.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/percpu.h |   4 +-
 mm/percpu.c            | 117 +++++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 115 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index d1b416da25ed..a3aa63e47637 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -48,9 +48,9 @@
  * intelligent way to determine this would be nice.
  */
 #if BITS_PER_LONG > 32
-#define PERCPU_DYNAMIC_RESERVE		(20 << 10)
+#define PERCPU_DYNAMIC_RESERVE		(28 << 10)
 #else
-#define PERCPU_DYNAMIC_RESERVE		(12 << 10)
+#define PERCPU_DYNAMIC_RESERVE		(20 << 10)
 #endif
 
 extern void *pcpu_base_addr;
diff --git a/mm/percpu.c b/mm/percpu.c
index 28a830590b4c..867efd38d879 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -78,6 +78,8 @@
 #define PCPU_DFL_MAP_ALLOC		16	/* start a map with 16 ents */
 #define PCPU_ATOMIC_MAP_MARGIN_LOW	32
 #define PCPU_ATOMIC_MAP_MARGIN_HIGH	64
+#define PCPU_EMPTY_POP_PAGES_LOW	2
+#define PCPU_EMPTY_POP_PAGES_HIGH	4
 
 #ifdef CONFIG_SMP
 /* default addr <-> pcpu_ptr mapping, override in asm/percpu.h if necessary */
@@ -168,9 +170,22 @@ static struct list_head *pcpu_slot __read_mostly; /* chunk list slots */
  */
 static int pcpu_nr_empty_pop_pages;
 
-/* balance work is used to populate or destroy chunks asynchronously */
+/*
+ * Balance work is used to populate or destroy chunks asynchronously.  We
+ * try to keep the number of populated free pages between
+ * PCPU_EMPTY_POP_PAGES_LOW and HIGH for atomic allocations and at most one
+ * empty chunk.
+ */
 static void pcpu_balance_workfn(struct work_struct *work);
 static DECLARE_WORK(pcpu_balance_work, pcpu_balance_workfn);
+static bool pcpu_async_enabled __read_mostly;
+static bool pcpu_atomic_alloc_failed;
+
+static void pcpu_schedule_balance_work(void)
+{
+	if (pcpu_async_enabled)
+		schedule_work(&pcpu_balance_work);
+}
 
 static bool pcpu_addr_in_first_chunk(void *addr)
 {
@@ -386,7 +401,8 @@ static int pcpu_need_to_extend(struct pcpu_chunk *chunk, bool is_atomic)
 		margin = 3;
 
 		if (chunk->map_alloc <
-		    chunk->map_used + PCPU_ATOMIC_MAP_MARGIN_LOW)
+		    chunk->map_used + PCPU_ATOMIC_MAP_MARGIN_LOW &&
+		    pcpu_async_enabled)
 			schedule_work(&chunk->map_extend_work);
 	} else {
 		margin = PCPU_ATOMIC_MAP_MARGIN_HIGH;
@@ -1005,6 +1021,9 @@ area_found:
 	if (chunk != pcpu_reserved_chunk)
 		pcpu_nr_empty_pop_pages -= occ_pages;
 
+	if (pcpu_nr_empty_pop_pages < PCPU_EMPTY_POP_PAGES_LOW)
+		pcpu_schedule_balance_work();
+
 	/* clear the areas and return address relative to base address */
 	for_each_possible_cpu(cpu)
 		memset((void *)pcpu_chunk_addr(chunk, cpu, 0) + off, 0, size);
@@ -1023,6 +1042,11 @@ fail:
 		if (!--warn_limit)
 			pr_info("PERCPU: limit reached, disable warning\n");
 	}
+	if (is_atomic) {
+		/* see the flag handling in pcpu_blance_workfn() */
+		pcpu_atomic_alloc_failed = true;
+		pcpu_schedule_balance_work();
+	}
 	return NULL;
 }
 
@@ -1080,7 +1104,7 @@ void __percpu *__alloc_reserved_percpu(size_t size, size_t align)
 }
 
 /**
- * pcpu_balance_workfn - reclaim fully free chunks, workqueue function
+ * pcpu_balance_workfn - manage the amount of free chunks and populated pages
  * @work: unused
  *
  * Reclaim all fully free chunks except for the first one.
@@ -1090,7 +1114,12 @@ static void pcpu_balance_workfn(struct work_struct *work)
 	LIST_HEAD(to_free);
 	struct list_head *free_head = &pcpu_slot[pcpu_nr_slots - 1];
 	struct pcpu_chunk *chunk, *next;
+	int slot, nr_to_pop, ret;
 
+	/*
+	 * There's no reason to keep around multiple unused chunks and VM
+	 * areas can be scarce.  Destroy all free chunks except for one.
+	 */
 	mutex_lock(&pcpu_alloc_mutex);
 	spin_lock_irq(&pcpu_lock);
 
@@ -1118,6 +1147,74 @@ static void pcpu_balance_workfn(struct work_struct *work)
 		pcpu_destroy_chunk(chunk);
 	}
 
+	/*
+	 * Ensure there are certain number of free populated pages for
+	 * atomic allocs.  Fill up from the most packed so that atomic
+	 * allocs don't increase fragmentation.  If atomic allocation
+	 * failed previously, always populate the maximum amount.  This
+	 * should prevent atomic allocs larger than PAGE_SIZE from keeping
+	 * failing indefinitely; however, large atomic allocs are not
+	 * something we support properly and can be highly unreliable and
+	 * inefficient.
+	 */
+retry_pop:
+	if (pcpu_atomic_alloc_failed) {
+		nr_to_pop = PCPU_EMPTY_POP_PAGES_HIGH;
+		/* best effort anyway, don't worry about synchronization */
+		pcpu_atomic_alloc_failed = false;
+	} else {
+		nr_to_pop = clamp(PCPU_EMPTY_POP_PAGES_HIGH -
+				  pcpu_nr_empty_pop_pages,
+				  0, PCPU_EMPTY_POP_PAGES_HIGH);
+	}
+
+	for (slot = pcpu_size_to_slot(PAGE_SIZE); slot < pcpu_nr_slots; slot++) {
+		int nr_unpop = 0, rs, re;
+
+		if (!nr_to_pop)
+			break;
+
+		spin_lock_irq(&pcpu_lock);
+		list_for_each_entry(chunk, &pcpu_slot[slot], list) {
+			nr_unpop = pcpu_unit_pages - chunk->nr_populated;
+			if (nr_unpop)
+				break;
+		}
+		spin_unlock_irq(&pcpu_lock);
+
+		if (!nr_unpop)
+			continue;
+
+		/* @chunk can't go away while pcpu_alloc_mutex is held */
+		pcpu_for_each_unpop_region(chunk, rs, re, 0, pcpu_unit_pages) {
+			int nr = min(re - rs, nr_to_pop);
+
+			ret = pcpu_populate_chunk(chunk, rs, rs + nr);
+			if (!ret) {
+				nr_to_pop -= nr;
+				spin_lock_irq(&pcpu_lock);
+				pcpu_chunk_populated(chunk, rs, rs + nr);
+				spin_unlock_irq(&pcpu_lock);
+			} else {
+				nr_to_pop = 0;
+			}
+
+			if (!nr_to_pop)
+				break;
+		}
+	}
+
+	if (nr_to_pop) {
+		/* ran out of chunks to populate, create a new one and retry */
+		chunk = pcpu_create_chunk();
+		if (chunk) {
+			spin_lock_irq(&pcpu_lock);
+			pcpu_chunk_relocate(chunk, -1);
+			spin_unlock_irq(&pcpu_lock);
+			goto retry_pop;
+		}
+	}
+
 	mutex_unlock(&pcpu_alloc_mutex);
 }
 
@@ -1160,7 +1257,7 @@ void free_percpu(void __percpu *ptr)
 
 		list_for_each_entry(pos, &pcpu_slot[pcpu_nr_slots - 1], list)
 			if (pos != chunk) {
-				schedule_work(&pcpu_balance_work);
+				pcpu_schedule_balance_work();
 				break;
 			}
 	}
@@ -2187,3 +2284,15 @@ void __init percpu_init_late(void)
 		spin_unlock_irqrestore(&pcpu_lock, flags);
 	}
 }
+
+/*
+ * Percpu allocator is initialized early during boot when neither slab or
+ * workqueue is available.  Plug async management until everything is up
+ * and running.
+ */
+static int __init percpu_enable_async(void)
+{
+	pcpu_async_enabled = true;
+	return 0;
+}
+subsys_initcall(percpu_enable_async);
-- 
cgit v1.2.3


From d07e9c178f188f76851aca8ab49a78da13dda006 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Fri, 29 Aug 2014 15:13:21 +0200
Subject: PM / domains: Make generic_pm_domain.name const

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/pm_domain.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 7c1d252b20c0..ebc4c76ffb73 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -60,7 +60,7 @@ struct generic_pm_domain {
 	struct mutex lock;
 	struct dev_power_governor *gov;
 	struct work_struct power_off_work;
-	char *name;
+	const char *name;
 	unsigned int in_progress;	/* Number of devices being suspended now */
 	atomic_t sd_count;	/* Number of subdomains with power "on" */
 	enum gpd_status status;	/* Current state of the domain */
-- 
cgit v1.2.3


From 76ba59f8366f2d9282cb5bda9de75b4b68cbe55f Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 26 Aug 2014 11:03:16 +0100
Subject: genirq: Add irq_domain-aware core IRQ handler

Calling irq_find_mapping from outside a irq_{enter,exit} section is
unsafe and produces ugly messages if CONFIG_PROVE_RCU is enabled:
If coming from the idle state, the rcu_read_lock call in irq_find_mapping
will generate an unpleasant warning:

<quote>
===============================
[ INFO: suspicious RCU usage. ]
3.16.0-rc1+ #135 Not tainted
-------------------------------
include/linux/rcupdate.h:871 rcu_read_lock() used illegally while idle!

other info that might help us debug this:

RCU used illegally from idle CPU!
rcu_scheduler_active = 1, debug_locks = 0
RCU used illegally from extended quiescent state!
1 lock held by swapper/0/0:
 #0:  (rcu_read_lock){......}, at: [<ffffffc00010206c>]
irq_find_mapping+0x4c/0x198
</quote>

As this issue is fairly widespread and involves at least three
different architectures, a possible solution is to add a new
handle_domain_irq entry point into the generic IRQ code that
the interrupt controller code can call.

This new function takes an irq_domain, and calls into irq_find_domain
inside the irq_{enter,exit} block. An additional "lookup" parameter is
used to allow non-domain architecture code to be replaced by this as well.

Interrupt controllers can then be updated to use the new mechanism.

This code is sitting behind a new CONFIG_HANDLE_DOMAIN_IRQ, as not all
architectures implement set_irq_regs (yes, mn10300, I'm looking at you...).

Reported-by: Vladimir Murzin <vladimir.murzin@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Link: https://lkml.kernel.org/r/1409047421-27649-2-git-send-email-marc.zyngier@arm.com
Signed-off-by: Jason Cooper <jason@lakedaemon.net>
---
 include/linux/irqdesc.h | 19 +++++++++++++++++++
 kernel/irq/Kconfig      |  3 +++
 kernel/irq/irqdesc.c    | 42 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 64 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index 472c021a2d4f..ff24667cd86c 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -12,6 +12,8 @@ struct irq_affinity_notify;
 struct proc_dir_entry;
 struct module;
 struct irq_desc;
+struct irq_domain;
+struct pt_regs;
 
 /**
  * struct irq_desc - interrupt descriptor
@@ -118,6 +120,23 @@ static inline void generic_handle_irq_desc(unsigned int irq, struct irq_desc *de
 
 int generic_handle_irq(unsigned int irq);
 
+#ifdef CONFIG_HANDLE_DOMAIN_IRQ
+/*
+ * Convert a HW interrupt number to a logical one using a IRQ domain,
+ * and handle the result interrupt number. Return -EINVAL if
+ * conversion failed. Providing a NULL domain indicates that the
+ * conversion has already been done.
+ */
+int __handle_domain_irq(struct irq_domain *domain, unsigned int hwirq,
+			bool lookup, struct pt_regs *regs);
+
+static inline int handle_domain_irq(struct irq_domain *domain,
+				    unsigned int hwirq, struct pt_regs *regs)
+{
+	return __handle_domain_irq(domain, hwirq, true, regs);
+}
+#endif
+
 /* Test to see if a driver has successfully requested an irq */
 static inline int irq_has_action(unsigned int irq)
 {
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index d269cecdfbf0..225086b2652e 100644
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -55,6 +55,9 @@ config GENERIC_IRQ_CHIP
 config IRQ_DOMAIN
 	bool
 
+config HANDLE_DOMAIN_IRQ
+	bool
+
 config IRQ_DOMAIN_DEBUG
 	bool "Expose hardware/virtual IRQ mapping via debugfs"
 	depends on IRQ_DOMAIN && DEBUG_FS
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 1487a123db5c..a1782f88f0af 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -14,6 +14,7 @@
 #include <linux/kernel_stat.h>
 #include <linux/radix-tree.h>
 #include <linux/bitmap.h>
+#include <linux/irqdomain.h>
 
 #include "internals.h"
 
@@ -336,6 +337,47 @@ int generic_handle_irq(unsigned int irq)
 }
 EXPORT_SYMBOL_GPL(generic_handle_irq);
 
+#ifdef CONFIG_HANDLE_DOMAIN_IRQ
+/**
+ * __handle_domain_irq - Invoke the handler for a HW irq belonging to a domain
+ * @domain:	The domain where to perform the lookup
+ * @hwirq:	The HW irq number to convert to a logical one
+ * @lookup:	Whether to perform the domain lookup or not
+ * @regs:	Register file coming from the low-level handling code
+ *
+ * Returns:	0 on success, or -EINVAL if conversion has failed
+ */
+int __handle_domain_irq(struct irq_domain *domain, unsigned int hwirq,
+			bool lookup, struct pt_regs *regs)
+{
+	struct pt_regs *old_regs = set_irq_regs(regs);
+	unsigned int irq = hwirq;
+	int ret = 0;
+
+	irq_enter();
+
+#ifdef CONFIG_IRQ_DOMAIN
+	if (lookup)
+		irq = irq_find_mapping(domain, hwirq);
+#endif
+
+	/*
+	 * Some hardware gives randomly wrong interrupts.  Rather
+	 * than crashing, do something sensible.
+	 */
+	if (unlikely(!irq || irq >= nr_irqs)) {
+		ack_bad_irq(irq);
+		ret = -EINVAL;
+	} else {
+		generic_handle_irq(irq);
+	}
+
+	irq_exit();
+	set_irq_regs(old_regs);
+	return ret;
+}
+#endif
+
 /* Dynamic interrupt handling */
 
 /**
-- 
cgit v1.2.3


From a4412fc9486ec85686c6c7929e7e829f62ae377e Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@amacapital.net>
Date: Mon, 21 Jul 2014 18:49:14 -0700
Subject: seccomp,x86,arm,mips,s390: Remove nr parameter from secure_computing

The secure_computing function took a syscall number parameter, but
it only paid any attention to that parameter if seccomp mode 1 was
enabled.  Rather than coming up with a kludge to get the parameter
to work in mode 2, just remove the parameter.

To avoid churn in arches that don't have seccomp filters (and may
not even support syscall_get_nr right now), this leaves the
parameter in secure_computing_strict, which is now a real function.

For ARM, this is a bit ugly due to the fact that ARM conditionally
supports seccomp filters.  Fixing that would probably only be a
couple of lines of code, but it should be coordinated with the audit
maintainers.

This will be a slight slowdown on some arches.  The right fix is to
pass in all of seccomp_data instead of trying to make just the
syscall nr part be fast.

This is a prerequisite for making two-phase seccomp work cleanly.

Cc: Russell King <linux@arm.linux.org.uk>
Cc: linux-arm-kernel@lists.infradead.org
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: linux-mips@linux-mips.org
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: linux-s390@vger.kernel.org
Cc: x86@kernel.org
Cc: Kees Cook <keescook@chromium.org>
Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 arch/arm/kernel/ptrace.c      |  7 ++++-
 arch/mips/kernel/ptrace.c     |  2 +-
 arch/s390/kernel/ptrace.c     |  2 +-
 arch/x86/kernel/ptrace.c      |  2 +-
 arch/x86/kernel/vsyscall_64.c |  2 +-
 include/linux/seccomp.h       | 21 +++++++-------
 kernel/seccomp.c              | 64 ++++++++++++++++++++++++++++++-------------
 7 files changed, 66 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 0c27ed6f3f23..5e772a21ab97 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -933,8 +933,13 @@ asmlinkage int syscall_trace_enter(struct pt_regs *regs, int scno)
 	current_thread_info()->syscall = scno;
 
 	/* Do the secure computing check first; failures should be fast. */
-	if (secure_computing(scno) == -1)
+#ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER
+	if (secure_computing() == -1)
 		return -1;
+#else
+	/* XXX: remove this once OABI gets fixed */
+	secure_computing_strict(scno);
+#endif
 
 	if (test_thread_flag(TIF_SYSCALL_TRACE))
 		tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER);
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index 645b3c4fcfba..f7aac5b57b4b 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -770,7 +770,7 @@ asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall)
 	long ret = 0;
 	user_exit();
 
-	if (secure_computing(syscall) == -1)
+	if (secure_computing() == -1)
 		return -1;
 
 	if (test_thread_flag(TIF_SYSCALL_TRACE) &&
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 5dc7ad9e2fbf..bebacad48305 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -803,7 +803,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
 	long ret = 0;
 
 	/* Do the secure computing check first. */
-	if (secure_computing(regs->gprs[2])) {
+	if (secure_computing()) {
 		/* seccomp failures shouldn't expose any additional code. */
 		ret = -1;
 		goto out;
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 678c0ada3b3c..93c182a00506 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1471,7 +1471,7 @@ long syscall_trace_enter(struct pt_regs *regs)
 		regs->flags |= X86_EFLAGS_TF;
 
 	/* do the secure computing check first */
-	if (secure_computing(regs->orig_ax)) {
+	if (secure_computing()) {
 		/* seccomp failures shouldn't expose any additional code. */
 		ret = -1L;
 		goto out;
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index e1e1e80fc6a6..957779f4eb40 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -216,7 +216,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
 	 */
 	regs->orig_ax = syscall_nr;
 	regs->ax = -ENOSYS;
-	tmp = secure_computing(syscall_nr);
+	tmp = secure_computing();
 	if ((!tmp && regs->orig_ax != syscall_nr) || regs->ip != address) {
 		warn_bad_vsyscall(KERN_DEBUG, regs,
 				  "seccomp tried to change syscall nr or ip");
diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
index 5d586a45a319..aa3c040230be 100644
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -27,19 +27,17 @@ struct seccomp {
 	struct seccomp_filter *filter;
 };
 
-extern int __secure_computing(int);
-static inline int secure_computing(int this_syscall)
+#ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER
+extern int __secure_computing(void);
+static inline int secure_computing(void)
 {
 	if (unlikely(test_thread_flag(TIF_SECCOMP)))
-		return  __secure_computing(this_syscall);
+		return  __secure_computing();
 	return 0;
 }
-
-/* A wrapper for architectures supporting only SECCOMP_MODE_STRICT. */
-static inline void secure_computing_strict(int this_syscall)
-{
-	BUG_ON(secure_computing(this_syscall) != 0);
-}
+#else
+extern void secure_computing_strict(int this_syscall);
+#endif
 
 extern long prctl_get_seccomp(void);
 extern long prctl_set_seccomp(unsigned long, char __user *);
@@ -56,8 +54,11 @@ static inline int seccomp_mode(struct seccomp *s)
 struct seccomp { };
 struct seccomp_filter { };
 
-static inline int secure_computing(int this_syscall) { return 0; }
+#ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER
+static inline int secure_computing(void) { return 0; }
+#else
 static inline void secure_computing_strict(int this_syscall) { return; }
+#endif
 
 static inline long prctl_get_seccomp(void)
 {
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 44eb005c6695..5e738e0dd2e9 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -23,8 +23,11 @@
 
 /* #define SECCOMP_DEBUG 1 */
 
-#ifdef CONFIG_SECCOMP_FILTER
+#ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER
 #include <asm/syscall.h>
+#endif
+
+#ifdef CONFIG_SECCOMP_FILTER
 #include <linux/filter.h>
 #include <linux/pid.h>
 #include <linux/ptrace.h>
@@ -172,7 +175,7 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen)
  *
  * Returns valid seccomp BPF response codes.
  */
-static u32 seccomp_run_filters(int syscall)
+static u32 seccomp_run_filters(void)
 {
 	struct seccomp_filter *f = ACCESS_ONCE(current->seccomp.filter);
 	struct seccomp_data sd;
@@ -564,10 +567,43 @@ static int mode1_syscalls_32[] = {
 };
 #endif
 
-int __secure_computing(int this_syscall)
+static void __secure_computing_strict(int this_syscall)
+{
+	int *syscall_whitelist = mode1_syscalls;
+#ifdef CONFIG_COMPAT
+	if (is_compat_task())
+		syscall_whitelist = mode1_syscalls_32;
+#endif
+	do {
+		if (*syscall_whitelist == this_syscall)
+			return;
+	} while (*++syscall_whitelist);
+
+#ifdef SECCOMP_DEBUG
+	dump_stack();
+#endif
+	audit_seccomp(this_syscall, SIGKILL, SECCOMP_RET_KILL);
+	do_exit(SIGKILL);
+}
+
+#ifndef CONFIG_HAVE_ARCH_SECCOMP_FILTER
+void secure_computing_strict(int this_syscall)
+{
+	int mode = current->seccomp.mode;
+
+	if (mode == 0)
+		return;
+	else if (mode == SECCOMP_MODE_STRICT)
+		__secure_computing_strict(this_syscall);
+	else
+		BUG();
+}
+#else
+int __secure_computing(void)
 {
+	struct pt_regs *regs = task_pt_regs(current);
+	int this_syscall = syscall_get_nr(current, regs);
 	int exit_sig = 0;
-	int *syscall;
 	u32 ret;
 
 	/*
@@ -578,23 +614,12 @@ int __secure_computing(int this_syscall)
 
 	switch (current->seccomp.mode) {
 	case SECCOMP_MODE_STRICT:
-		syscall = mode1_syscalls;
-#ifdef CONFIG_COMPAT
-		if (is_compat_task())
-			syscall = mode1_syscalls_32;
-#endif
-		do {
-			if (*syscall == this_syscall)
-				return 0;
-		} while (*++syscall);
-		exit_sig = SIGKILL;
-		ret = SECCOMP_RET_KILL;
-		break;
+		__secure_computing_strict(this_syscall);
+		return 0;
 #ifdef CONFIG_SECCOMP_FILTER
 	case SECCOMP_MODE_FILTER: {
 		int data;
-		struct pt_regs *regs = task_pt_regs(current);
-		ret = seccomp_run_filters(this_syscall);
+		ret = seccomp_run_filters();
 		data = ret & SECCOMP_RET_DATA;
 		ret &= SECCOMP_RET_ACTION;
 		switch (ret) {
@@ -652,9 +677,10 @@ int __secure_computing(int this_syscall)
 #ifdef CONFIG_SECCOMP_FILTER
 skip:
 	audit_seccomp(this_syscall, exit_sig, ret);
-#endif
 	return -1;
+#endif
 }
+#endif /* CONFIG_HAVE_ARCH_SECCOMP_FILTER */
 
 long prctl_get_seccomp(void)
 {
-- 
cgit v1.2.3


From 13aa72f0fd0a9f98a41cefb662487269e2f1ad65 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@amacapital.net>
Date: Mon, 21 Jul 2014 18:49:15 -0700
Subject: seccomp: Refactor the filter callback and the API

The reason I did this is to add a seccomp API that will be usable
for an x86 fast path.  The x86 entry code needs to use a rather
expensive slow path for a syscall that might be visible to things
like ptrace.  By splitting seccomp into two phases, we can check
whether we need the slow path and then use the fast path in if the
filter allows the syscall or just returns some errno.

As a side effect, I think the new code is much easier to understand
than the old code.

This has one user-visible effect: the audit record written for
SECCOMP_RET_TRACE is now a simple indication that SECCOMP_RET_TRACE
happened.  It used to depend in a complicated way on what the tracer
did.  I couldn't make much sense of it.

Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/linux/seccomp.h |   6 ++
 kernel/seccomp.c        | 190 +++++++++++++++++++++++++++++++-----------------
 2 files changed, 130 insertions(+), 66 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
index aa3c040230be..38851085e481 100644
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -35,6 +35,12 @@ static inline int secure_computing(void)
 		return  __secure_computing();
 	return 0;
 }
+
+#define SECCOMP_PHASE1_OK	0
+#define SECCOMP_PHASE1_SKIP	1
+
+extern u32 seccomp_phase1(void);
+int seccomp_phase2(u32 phase1_result);
 #else
 extern void secure_computing_strict(int this_syscall);
 #endif
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 5e738e0dd2e9..6c8528ce9df9 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -21,8 +21,6 @@
 #include <linux/slab.h>
 #include <linux/syscalls.h>
 
-/* #define SECCOMP_DEBUG 1 */
-
 #ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER
 #include <asm/syscall.h>
 #endif
@@ -601,10 +599,21 @@ void secure_computing_strict(int this_syscall)
 #else
 int __secure_computing(void)
 {
-	struct pt_regs *regs = task_pt_regs(current);
-	int this_syscall = syscall_get_nr(current, regs);
-	int exit_sig = 0;
-	u32 ret;
+	u32 phase1_result = seccomp_phase1();
+
+	if (likely(phase1_result == SECCOMP_PHASE1_OK))
+		return 0;
+	else if (likely(phase1_result == SECCOMP_PHASE1_SKIP))
+		return -1;
+	else
+		return seccomp_phase2(phase1_result);
+}
+
+#ifdef CONFIG_SECCOMP_FILTER
+static u32 __seccomp_phase1_filter(int this_syscall, struct pt_regs *regs)
+{
+	u32 filter_ret, action;
+	int data;
 
 	/*
 	 * Make sure that any changes to mode from another thread have
@@ -612,73 +621,122 @@ int __secure_computing(void)
 	 */
 	rmb();
 
-	switch (current->seccomp.mode) {
+	filter_ret = seccomp_run_filters();
+	data = filter_ret & SECCOMP_RET_DATA;
+	action = filter_ret & SECCOMP_RET_ACTION;
+
+	switch (action) {
+	case SECCOMP_RET_ERRNO:
+		/* Set the low-order 16-bits as a errno. */
+		syscall_set_return_value(current, regs,
+					 -data, 0);
+		goto skip;
+
+	case SECCOMP_RET_TRAP:
+		/* Show the handler the original registers. */
+		syscall_rollback(current, regs);
+		/* Let the filter pass back 16 bits of data. */
+		seccomp_send_sigsys(this_syscall, data);
+		goto skip;
+
+	case SECCOMP_RET_TRACE:
+		return filter_ret;  /* Save the rest for phase 2. */
+
+	case SECCOMP_RET_ALLOW:
+		return SECCOMP_PHASE1_OK;
+
+	case SECCOMP_RET_KILL:
+	default:
+		audit_seccomp(this_syscall, SIGSYS, action);
+		do_exit(SIGSYS);
+	}
+
+	unreachable();
+
+skip:
+	audit_seccomp(this_syscall, 0, action);
+	return SECCOMP_PHASE1_SKIP;
+}
+#endif
+
+/**
+ * seccomp_phase1() - run fast path seccomp checks on the current syscall
+ *
+ * This only reads pt_regs via the syscall_xyz helpers.  The only change
+ * it will make to pt_regs is via syscall_set_return_value, and it will
+ * only do that if it returns SECCOMP_PHASE1_SKIP.
+ *
+ * It may also call do_exit or force a signal; these actions must be
+ * safe.
+ *
+ * If it returns SECCOMP_PHASE1_OK, the syscall passes checks and should
+ * be processed normally.
+ *
+ * If it returns SECCOMP_PHASE1_SKIP, then the syscall should not be
+ * invoked.  In this case, seccomp_phase1 will have set the return value
+ * using syscall_set_return_value.
+ *
+ * If it returns anything else, then the return value should be passed
+ * to seccomp_phase2 from a context in which ptrace hooks are safe.
+ */
+u32 seccomp_phase1(void)
+{
+	int mode = current->seccomp.mode;
+	struct pt_regs *regs = task_pt_regs(current);
+	int this_syscall = syscall_get_nr(current, regs);
+
+	switch (mode) {
 	case SECCOMP_MODE_STRICT:
-		__secure_computing_strict(this_syscall);
-		return 0;
+		__secure_computing_strict(this_syscall);  /* may call do_exit */
+		return SECCOMP_PHASE1_OK;
 #ifdef CONFIG_SECCOMP_FILTER
-	case SECCOMP_MODE_FILTER: {
-		int data;
-		ret = seccomp_run_filters();
-		data = ret & SECCOMP_RET_DATA;
-		ret &= SECCOMP_RET_ACTION;
-		switch (ret) {
-		case SECCOMP_RET_ERRNO:
-			/* Set the low-order 16-bits as a errno. */
-			syscall_set_return_value(current, regs,
-						 -data, 0);
-			goto skip;
-		case SECCOMP_RET_TRAP:
-			/* Show the handler the original registers. */
-			syscall_rollback(current, regs);
-			/* Let the filter pass back 16 bits of data. */
-			seccomp_send_sigsys(this_syscall, data);
-			goto skip;
-		case SECCOMP_RET_TRACE:
-			/* Skip these calls if there is no tracer. */
-			if (!ptrace_event_enabled(current, PTRACE_EVENT_SECCOMP)) {
-				syscall_set_return_value(current, regs,
-							 -ENOSYS, 0);
-				goto skip;
-			}
-			/* Allow the BPF to provide the event message */
-			ptrace_event(PTRACE_EVENT_SECCOMP, data);
-			/*
-			 * The delivery of a fatal signal during event
-			 * notification may silently skip tracer notification.
-			 * Terminating the task now avoids executing a system
-			 * call that may not be intended.
-			 */
-			if (fatal_signal_pending(current))
-				break;
-			if (syscall_get_nr(current, regs) < 0)
-				goto skip;  /* Explicit request to skip. */
-
-			return 0;
-		case SECCOMP_RET_ALLOW:
-			return 0;
-		case SECCOMP_RET_KILL:
-		default:
-			break;
-		}
-		exit_sig = SIGSYS;
-		break;
-	}
+	case SECCOMP_MODE_FILTER:
+		return __seccomp_phase1_filter(this_syscall, regs);
 #endif
 	default:
 		BUG();
 	}
+}
 
-#ifdef SECCOMP_DEBUG
-	dump_stack();
-#endif
-	audit_seccomp(this_syscall, exit_sig, ret);
-	do_exit(exit_sig);
-#ifdef CONFIG_SECCOMP_FILTER
-skip:
-	audit_seccomp(this_syscall, exit_sig, ret);
-	return -1;
-#endif
+/**
+ * seccomp_phase2() - finish slow path seccomp work for the current syscall
+ * @phase1_result: The return value from seccomp_phase1()
+ *
+ * This must be called from a context in which ptrace hooks can be used.
+ *
+ * Returns 0 if the syscall should be processed or -1 to skip the syscall.
+ */
+int seccomp_phase2(u32 phase1_result)
+{
+	struct pt_regs *regs = task_pt_regs(current);
+	u32 action = phase1_result & SECCOMP_RET_ACTION;
+	int data = phase1_result & SECCOMP_RET_DATA;
+
+	BUG_ON(action != SECCOMP_RET_TRACE);
+
+	audit_seccomp(syscall_get_nr(current, regs), 0, action);
+
+	/* Skip these calls if there is no tracer. */
+	if (!ptrace_event_enabled(current, PTRACE_EVENT_SECCOMP)) {
+		syscall_set_return_value(current, regs,
+					 -ENOSYS, 0);
+		return -1;
+	}
+
+	/* Allow the BPF to provide the event message */
+	ptrace_event(PTRACE_EVENT_SECCOMP, data);
+	/*
+	 * The delivery of a fatal signal during event
+	 * notification may silently skip tracer notification.
+	 * Terminating the task now avoids executing a system
+	 * call that may not be intended.
+	 */
+	if (fatal_signal_pending(current))
+		do_exit(SIGSYS);
+	if (syscall_get_nr(current, regs) < 0)
+		return -1;  /* Explicit request to skip. */
+
+	return 0;
 }
 #endif /* CONFIG_HAVE_ARCH_SECCOMP_FILTER */
 
-- 
cgit v1.2.3


From d39bd00deabe57420f2a3669eb71b0e0c4997184 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@amacapital.net>
Date: Mon, 21 Jul 2014 18:49:16 -0700
Subject: seccomp: Allow arch code to provide seccomp_data

populate_seccomp_data is expensive: it works by inspecting
task_pt_regs and various other bits to piece together all the
information, and it's does so in multiple partially redundant steps.

Arch-specific code in the syscall entry path can do much better.

Admittedly this adds a bit of additional room for error, but the
speedup should be worth it.

Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/linux/seccomp.h |  2 +-
 kernel/seccomp.c        | 32 +++++++++++++++++++-------------
 2 files changed, 20 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
index 38851085e481..a19ddacdac30 100644
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -39,7 +39,7 @@ static inline int secure_computing(void)
 #define SECCOMP_PHASE1_OK	0
 #define SECCOMP_PHASE1_SKIP	1
 
-extern u32 seccomp_phase1(void);
+extern u32 seccomp_phase1(struct seccomp_data *sd);
 int seccomp_phase2(u32 phase1_result);
 #else
 extern void secure_computing_strict(int this_syscall);
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 6c8528ce9df9..1285cb205d49 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -173,10 +173,10 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen)
  *
  * Returns valid seccomp BPF response codes.
  */
-static u32 seccomp_run_filters(void)
+static u32 seccomp_run_filters(struct seccomp_data *sd)
 {
 	struct seccomp_filter *f = ACCESS_ONCE(current->seccomp.filter);
-	struct seccomp_data sd;
+	struct seccomp_data sd_local;
 	u32 ret = SECCOMP_RET_ALLOW;
 
 	/* Ensure unexpected behavior doesn't result in failing open. */
@@ -186,14 +186,17 @@ static u32 seccomp_run_filters(void)
 	/* Make sure cross-thread synced filter points somewhere sane. */
 	smp_read_barrier_depends();
 
-	populate_seccomp_data(&sd);
+	if (!sd) {
+		populate_seccomp_data(&sd_local);
+		sd = &sd_local;
+	}
 
 	/*
 	 * All filters in the list are evaluated and the lowest BPF return
 	 * value always takes priority (ignoring the DATA).
 	 */
 	for (; f; f = f->prev) {
-		u32 cur_ret = BPF_PROG_RUN(f->prog, (void *)&sd);
+		u32 cur_ret = BPF_PROG_RUN(f->prog, (void *)sd);
 
 		if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION))
 			ret = cur_ret;
@@ -599,7 +602,7 @@ void secure_computing_strict(int this_syscall)
 #else
 int __secure_computing(void)
 {
-	u32 phase1_result = seccomp_phase1();
+	u32 phase1_result = seccomp_phase1(NULL);
 
 	if (likely(phase1_result == SECCOMP_PHASE1_OK))
 		return 0;
@@ -610,7 +613,7 @@ int __secure_computing(void)
 }
 
 #ifdef CONFIG_SECCOMP_FILTER
-static u32 __seccomp_phase1_filter(int this_syscall, struct pt_regs *regs)
+static u32 __seccomp_phase1_filter(int this_syscall, struct seccomp_data *sd)
 {
 	u32 filter_ret, action;
 	int data;
@@ -621,20 +624,20 @@ static u32 __seccomp_phase1_filter(int this_syscall, struct pt_regs *regs)
 	 */
 	rmb();
 
-	filter_ret = seccomp_run_filters();
+	filter_ret = seccomp_run_filters(sd);
 	data = filter_ret & SECCOMP_RET_DATA;
 	action = filter_ret & SECCOMP_RET_ACTION;
 
 	switch (action) {
 	case SECCOMP_RET_ERRNO:
 		/* Set the low-order 16-bits as a errno. */
-		syscall_set_return_value(current, regs,
+		syscall_set_return_value(current, task_pt_regs(current),
 					 -data, 0);
 		goto skip;
 
 	case SECCOMP_RET_TRAP:
 		/* Show the handler the original registers. */
-		syscall_rollback(current, regs);
+		syscall_rollback(current, task_pt_regs(current));
 		/* Let the filter pass back 16 bits of data. */
 		seccomp_send_sigsys(this_syscall, data);
 		goto skip;
@@ -661,11 +664,14 @@ skip:
 
 /**
  * seccomp_phase1() - run fast path seccomp checks on the current syscall
+ * @arg sd: The seccomp_data or NULL
  *
  * This only reads pt_regs via the syscall_xyz helpers.  The only change
  * it will make to pt_regs is via syscall_set_return_value, and it will
  * only do that if it returns SECCOMP_PHASE1_SKIP.
  *
+ * If sd is provided, it will not read pt_regs at all.
+ *
  * It may also call do_exit or force a signal; these actions must be
  * safe.
  *
@@ -679,11 +685,11 @@ skip:
  * If it returns anything else, then the return value should be passed
  * to seccomp_phase2 from a context in which ptrace hooks are safe.
  */
-u32 seccomp_phase1(void)
+u32 seccomp_phase1(struct seccomp_data *sd)
 {
 	int mode = current->seccomp.mode;
-	struct pt_regs *regs = task_pt_regs(current);
-	int this_syscall = syscall_get_nr(current, regs);
+	int this_syscall = sd ? sd->nr :
+		syscall_get_nr(current, task_pt_regs(current));
 
 	switch (mode) {
 	case SECCOMP_MODE_STRICT:
@@ -691,7 +697,7 @@ u32 seccomp_phase1(void)
 		return SECCOMP_PHASE1_OK;
 #ifdef CONFIG_SECCOMP_FILTER
 	case SECCOMP_MODE_FILTER:
-		return __seccomp_phase1_filter(this_syscall, regs);
+		return __seccomp_phase1_filter(this_syscall, sd);
 #endif
 	default:
 		BUG();
-- 
cgit v1.2.3


From 26ae4980b5e4739af93543a147facb421fb78ae8 Mon Sep 17 00:00:00 2001
From: Aaron Sierra <asierra@xes-inc.com>
Date: Fri, 15 Aug 2014 16:07:48 -0500
Subject: fsl_ifc: Fix csor_ext position in fsl_ifc_regs

According to Freescale manuals, the IFC_CSORn_EXT register is located
immediately _after_ the bank's IFC_CSORn register.

This patch adjusts the csor_ext member of and reserved register arrays
immediately surrounding the csor_cs structure to provide proper access
to this register.

Signed-off-by: Aaron Sierra <asierra@xes-inc.com>
Acked-by: Prabhakar Kushwaha <prabhakar@freescale.com>
Signed-off-by: Scott Wood <scottwood@freescale.com>
---
 include/linux/fsl_ifc.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fsl_ifc.h b/include/linux/fsl_ifc.h
index f49ddb1b2273..84d60cb841b1 100644
--- a/include/linux/fsl_ifc.h
+++ b/include/linux/fsl_ifc.h
@@ -781,13 +781,13 @@ struct fsl_ifc_regs {
 		__be32 amask;
 		u32 res4[0x2];
 	} amask_cs[FSL_IFC_BANK_COUNT];
-	u32 res5[0x17];
+	u32 res5[0x18];
 	struct {
-		__be32 csor_ext;
 		__be32 csor;
+		__be32 csor_ext;
 		u32 res6;
 	} csor_cs[FSL_IFC_BANK_COUNT];
-	u32 res7[0x19];
+	u32 res7[0x18];
 	struct {
 		__be32 ftim[4];
 		u32 res8[0x8];
-- 
cgit v1.2.3


From 940001762ac514810e305aab356983829e5fa82a Mon Sep 17 00:00:00 2001
From: Ying Xue <ying.xue@windriver.com>
Date: Wed, 3 Sep 2014 09:22:36 +0800
Subject: lib/rhashtable: allow user to set the minimum shifts of shrinking

Although rhashtable library allows user to specify a quiet big size
for user's created hash table, the table may be shrunk to a
very small size - HASH_MIN_SIZE(4) after object is removed from
the table at the first time. Subsequently, even if the total amount
of objects saved in the table is quite lower than user's initial
setting in a long time, the hash table size is still dynamically
adjusted by rhashtable_shrink() or rhashtable_expand() each time
object is inserted or removed from the table. However, as
synchronize_rcu() has to be called when table is shrunk or
expanded by the two functions, we should permit user to set the
minimum table size through configuring the minimum number of shifts
according to user specific requirement, avoiding these expensive
actions of shrinking or expanding because of calling synchronize_rcu().

Signed-off-by: Ying Xue <ying.xue@windriver.com>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h |  2 ++
 lib/rhashtable.c           | 12 ++++++++----
 2 files changed, 10 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 36826c0166c5..fb298e9d6d3a 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -44,6 +44,7 @@ struct rhashtable;
  * @head_offset: Offset of rhash_head in struct to be hashed
  * @hash_rnd: Seed to use while hashing
  * @max_shift: Maximum number of shifts while expanding
+ * @min_shift: Minimum number of shifts while shrinking
  * @hashfn: Function to hash key
  * @obj_hashfn: Function to hash object
  * @grow_decision: If defined, may return true if table should expand
@@ -57,6 +58,7 @@ struct rhashtable_params {
 	size_t			head_offset;
 	u32			hash_rnd;
 	size_t			max_shift;
+	size_t			min_shift;
 	rht_hashfn_t		hashfn;
 	rht_obj_hashfn_t	obj_hashfn;
 	bool			(*grow_decision)(const struct rhashtable *ht,
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index a2c78810ebc1..8dfec3f26d4c 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -298,7 +298,7 @@ int rhashtable_shrink(struct rhashtable *ht, gfp_t flags)
 
 	ASSERT_RHT_MUTEX(ht);
 
-	if (tbl->size <= HASH_MIN_SIZE)
+	if (ht->shift <= ht->p.min_shift)
 		return 0;
 
 	ntbl = bucket_table_alloc(tbl->size / 2, flags);
@@ -506,9 +506,10 @@ void *rhashtable_lookup_compare(const struct rhashtable *ht, u32 hash,
 }
 EXPORT_SYMBOL_GPL(rhashtable_lookup_compare);
 
-static size_t rounded_hashtable_size(unsigned int nelem)
+static size_t rounded_hashtable_size(struct rhashtable_params *params)
 {
-	return max(roundup_pow_of_two(nelem * 4 / 3), HASH_MIN_SIZE);
+	return max(roundup_pow_of_two(params->nelem_hint * 4 / 3),
+		   1UL << params->min_shift);
 }
 
 /**
@@ -566,8 +567,11 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params)
 	    (!params->key_len && !params->obj_hashfn))
 		return -EINVAL;
 
+	params->min_shift = max_t(size_t, params->min_shift,
+				  ilog2(HASH_MIN_SIZE));
+
 	if (params->nelem_hint)
-		size = rounded_hashtable_size(params->nelem_hint);
+		size = rounded_hashtable_size(params);
 
 	tbl = bucket_table_alloc(size, GFP_KERNEL);
 	if (tbl == NULL)
-- 
cgit v1.2.3


From 03e9f0cac5da6af85758276cb4624caf5911f2b9 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Wed, 3 Sep 2014 13:02:56 +0200
Subject: pinctrl: clean up after enable refactoring

commit 2243a87d90b42eb38bc281957df3e57c712b5e56
"pinctrl: avoid duplicated calling enable_pinmux_setting for a pin"
removed the .disable callback from the struct pinmux_ops,
making the .enable() callback the only remaining callback.

However .enable() is a bad name as it seems to imply that a
muxing can also be disabled. Rename the callback to .set_mux()
and also take this opportunity to clean out any remaining
mentions of .disable() from the documentation.

Acked-by: Stephen Warren <swarren@nvidia.com>
Acked-by: Bjorn Andersson <bjorn.andersson@sonymobile.com>
Acked-by: Fan Wu <fwu@marvell.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 Documentation/pinctrl.txt                    | 14 ++------------
 drivers/pinctrl/berlin/berlin.c              |  8 ++++----
 drivers/pinctrl/mvebu/pinctrl-mvebu.c        |  6 +++---
 drivers/pinctrl/nomadik/pinctrl-abx500.c     |  6 +++---
 drivers/pinctrl/nomadik/pinctrl-nomadik.c    |  6 +++---
 drivers/pinctrl/pinctrl-adi2.c               |  6 +++---
 drivers/pinctrl/pinctrl-as3722.c             |  4 ++--
 drivers/pinctrl/pinctrl-at91.c               |  6 +++---
 drivers/pinctrl/pinctrl-bcm281xx.c           |  8 ++++----
 drivers/pinctrl/pinctrl-bcm2835.c            |  4 ++--
 drivers/pinctrl/pinctrl-imx.c                |  6 +++---
 drivers/pinctrl/pinctrl-imx1-core.c          |  6 +++---
 drivers/pinctrl/pinctrl-lantiq.c             |  8 ++++----
 drivers/pinctrl/pinctrl-mxs.c                |  6 +++---
 drivers/pinctrl/pinctrl-palmas.c             |  5 +++--
 drivers/pinctrl/pinctrl-rockchip.c           |  6 +++---
 drivers/pinctrl/pinctrl-single.c             |  4 ++--
 drivers/pinctrl/pinctrl-st.c                 |  6 +++---
 drivers/pinctrl/pinctrl-tb10x.c              |  4 ++--
 drivers/pinctrl/pinctrl-tegra-xusb.c         |  8 ++++----
 drivers/pinctrl/pinctrl-tegra.c              |  7 ++++---
 drivers/pinctrl/pinctrl-tz1090-pdc.c         |  7 ++++---
 drivers/pinctrl/pinctrl-tz1090.c             |  6 +++---
 drivers/pinctrl/pinctrl-u300.c               |  6 +++---
 drivers/pinctrl/pinmux.c                     | 10 +++++-----
 drivers/pinctrl/qcom/pinctrl-msm.c           |  8 ++++----
 drivers/pinctrl/samsung/pinctrl-exynos5440.c |  7 ++++---
 drivers/pinctrl/samsung/pinctrl-samsung.c    |  7 ++++---
 drivers/pinctrl/sh-pfc/pinctrl.c             |  6 +++---
 drivers/pinctrl/sirf/pinctrl-sirf.c          |  7 ++++---
 drivers/pinctrl/spear/pinctrl-spear.c        |  4 ++--
 drivers/pinctrl/sunxi/pinctrl-sunxi.c        |  8 ++++----
 drivers/pinctrl/vt8500/pinctrl-wmt.c         |  8 ++++----
 include/linux/pinctrl/pinmux.h               |  7 +++----
 34 files changed, 110 insertions(+), 115 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/pinctrl.txt b/Documentation/pinctrl.txt
index 23f1590f49fe..b8f2147b96dd 100644
--- a/Documentation/pinctrl.txt
+++ b/Documentation/pinctrl.txt
@@ -702,7 +702,7 @@ static int foo_get_groups(struct pinctrl_dev *pctldev, unsigned selector,
 	return 0;
 }
 
-int foo_enable(struct pinctrl_dev *pctldev, unsigned selector,
+int foo_set_mux(struct pinctrl_dev *pctldev, unsigned selector,
 		unsigned group)
 {
 	u8 regbit = (1 << selector + group);
@@ -711,21 +711,11 @@ int foo_enable(struct pinctrl_dev *pctldev, unsigned selector,
 	return 0;
 }
 
-void foo_disable(struct pinctrl_dev *pctldev, unsigned selector,
-		unsigned group)
-{
-	u8 regbit = (1 << selector + group);
-
-	writeb((readb(MUX) & ~(regbit)), MUX)
-	return 0;
-}
-
 struct pinmux_ops foo_pmxops = {
 	.get_functions_count = foo_get_functions_count,
 	.get_function_name = foo_get_fname,
 	.get_function_groups = foo_get_groups,
-	.enable = foo_enable,
-	.disable = foo_disable,
+	.set_mux = foo_set_mux,
 };
 
 /* Pinmux operations are handled by some pin controller */
diff --git a/drivers/pinctrl/berlin/berlin.c b/drivers/pinctrl/berlin/berlin.c
index 86db2235ab00..61f1bf0e60ba 100644
--- a/drivers/pinctrl/berlin/berlin.c
+++ b/drivers/pinctrl/berlin/berlin.c
@@ -170,9 +170,9 @@ berlin_pinctrl_find_function_by_name(struct berlin_pinctrl *pctrl,
 	return NULL;
 }
 
-static int berlin_pinmux_enable(struct pinctrl_dev *pctrl_dev,
-				unsigned function,
-				unsigned group)
+static int berlin_pinmux_set(struct pinctrl_dev *pctrl_dev,
+			     unsigned function,
+			     unsigned group)
 {
 	struct berlin_pinctrl *pctrl = pinctrl_dev_get_drvdata(pctrl_dev);
 	const struct berlin_desc_group *group_desc = pctrl->desc->groups + group;
@@ -197,7 +197,7 @@ static const struct pinmux_ops berlin_pinmux_ops = {
 	.get_functions_count	= &berlin_pinmux_get_functions_count,
 	.get_function_name	= &berlin_pinmux_get_function_name,
 	.get_function_groups	= &berlin_pinmux_get_function_groups,
-	.enable			= &berlin_pinmux_enable,
+	.set_mux		= &berlin_pinmux_set,
 };
 
 static int berlin_pinctrl_add_function(struct berlin_pinctrl *pctrl,
diff --git a/drivers/pinctrl/mvebu/pinctrl-mvebu.c b/drivers/pinctrl/mvebu/pinctrl-mvebu.c
index 9908374f8f92..f3b426cdaf8f 100644
--- a/drivers/pinctrl/mvebu/pinctrl-mvebu.c
+++ b/drivers/pinctrl/mvebu/pinctrl-mvebu.c
@@ -259,8 +259,8 @@ static int mvebu_pinmux_get_groups(struct pinctrl_dev *pctldev, unsigned fid,
 	return 0;
 }
 
-static int mvebu_pinmux_enable(struct pinctrl_dev *pctldev, unsigned fid,
-			unsigned gid)
+static int mvebu_pinmux_set(struct pinctrl_dev *pctldev, unsigned fid,
+			    unsigned gid)
 {
 	struct mvebu_pinctrl *pctl = pinctrl_dev_get_drvdata(pctldev);
 	struct mvebu_pinctrl_function *func = &pctl->functions[fid];
@@ -344,7 +344,7 @@ static const struct pinmux_ops mvebu_pinmux_ops = {
 	.get_function_groups = mvebu_pinmux_get_groups,
 	.gpio_request_enable = mvebu_pinmux_gpio_request_enable,
 	.gpio_set_direction = mvebu_pinmux_gpio_set_direction,
-	.enable = mvebu_pinmux_enable,
+	.set_mux = mvebu_pinmux_set,
 };
 
 static int mvebu_pinctrl_get_groups_count(struct pinctrl_dev *pctldev)
diff --git a/drivers/pinctrl/nomadik/pinctrl-abx500.c b/drivers/pinctrl/nomadik/pinctrl-abx500.c
index a53a689a2bfa..b0289824cf73 100644
--- a/drivers/pinctrl/nomadik/pinctrl-abx500.c
+++ b/drivers/pinctrl/nomadik/pinctrl-abx500.c
@@ -709,8 +709,8 @@ static int abx500_pmx_get_func_groups(struct pinctrl_dev *pctldev,
 	return 0;
 }
 
-static int abx500_pmx_enable(struct pinctrl_dev *pctldev, unsigned function,
-			     unsigned group)
+static int abx500_pmx_set(struct pinctrl_dev *pctldev, unsigned function,
+			  unsigned group)
 {
 	struct abx500_pinctrl *pct = pinctrl_dev_get_drvdata(pctldev);
 	struct gpio_chip *chip = &pct->chip;
@@ -784,7 +784,7 @@ static const struct pinmux_ops abx500_pinmux_ops = {
 	.get_functions_count = abx500_pmx_get_funcs_cnt,
 	.get_function_name = abx500_pmx_get_func_name,
 	.get_function_groups = abx500_pmx_get_func_groups,
-	.enable = abx500_pmx_enable,
+	.set_mux = abx500_pmx_set,
 	.gpio_request_enable = abx500_gpio_request_enable,
 	.gpio_disable_free = abx500_gpio_disable_free,
 };
diff --git a/drivers/pinctrl/nomadik/pinctrl-nomadik.c b/drivers/pinctrl/nomadik/pinctrl-nomadik.c
index e7cab07eef47..c093d75a022a 100644
--- a/drivers/pinctrl/nomadik/pinctrl-nomadik.c
+++ b/drivers/pinctrl/nomadik/pinctrl-nomadik.c
@@ -1647,8 +1647,8 @@ static int nmk_pmx_get_func_groups(struct pinctrl_dev *pctldev,
 	return 0;
 }
 
-static int nmk_pmx_enable(struct pinctrl_dev *pctldev, unsigned function,
-			  unsigned group)
+static int nmk_pmx_set(struct pinctrl_dev *pctldev, unsigned function,
+		       unsigned group)
 {
 	struct nmk_pinctrl *npct = pinctrl_dev_get_drvdata(pctldev);
 	const struct nmk_pingroup *g;
@@ -1810,7 +1810,7 @@ static const struct pinmux_ops nmk_pinmux_ops = {
 	.get_functions_count = nmk_pmx_get_funcs_cnt,
 	.get_function_name = nmk_pmx_get_func_name,
 	.get_function_groups = nmk_pmx_get_func_groups,
-	.enable = nmk_pmx_enable,
+	.set_mux = nmk_pmx_set,
 	.gpio_request_enable = nmk_gpio_request_enable,
 	.gpio_disable_free = nmk_gpio_disable_free,
 };
diff --git a/drivers/pinctrl/pinctrl-adi2.c b/drivers/pinctrl/pinctrl-adi2.c
index b092b93c67a1..e02943b0285d 100644
--- a/drivers/pinctrl/pinctrl-adi2.c
+++ b/drivers/pinctrl/pinctrl-adi2.c
@@ -619,8 +619,8 @@ static struct pinctrl_ops adi_pctrl_ops = {
 	.get_group_pins = adi_get_group_pins,
 };
 
-static int adi_pinmux_enable(struct pinctrl_dev *pctldev, unsigned func_id,
-	unsigned group_id)
+static int adi_pinmux_set(struct pinctrl_dev *pctldev, unsigned func_id,
+			  unsigned group_id)
 {
 	struct adi_pinctrl *pinctrl = pinctrl_dev_get_drvdata(pctldev);
 	struct gpio_port *port;
@@ -698,7 +698,7 @@ static int adi_pinmux_request_gpio(struct pinctrl_dev *pctldev,
 }
 
 static struct pinmux_ops adi_pinmux_ops = {
-	.enable = adi_pinmux_enable,
+	.set_mux = adi_pinmux_set,
 	.get_functions_count = adi_pinmux_get_funcs_count,
 	.get_function_name = adi_pinmux_get_func_name,
 	.get_function_groups = adi_pinmux_get_groups,
diff --git a/drivers/pinctrl/pinctrl-as3722.c b/drivers/pinctrl/pinctrl-as3722.c
index 0e4ec91f4d49..1f790a4b83fe 100644
--- a/drivers/pinctrl/pinctrl-as3722.c
+++ b/drivers/pinctrl/pinctrl-as3722.c
@@ -230,7 +230,7 @@ static int as3722_pinctrl_get_func_groups(struct pinctrl_dev *pctldev,
 	return 0;
 }
 
-static int as3722_pinctrl_enable(struct pinctrl_dev *pctldev, unsigned function,
+static int as3722_pinctrl_set(struct pinctrl_dev *pctldev, unsigned function,
 		unsigned group)
 {
 	struct as3722_pctrl_info *as_pci = pinctrl_dev_get_drvdata(pctldev);
@@ -327,7 +327,7 @@ static const struct pinmux_ops as3722_pinmux_ops = {
 	.get_functions_count	= as3722_pinctrl_get_funcs_count,
 	.get_function_name	= as3722_pinctrl_get_func_name,
 	.get_function_groups	= as3722_pinctrl_get_func_groups,
-	.enable			= as3722_pinctrl_enable,
+	.set_mux		= as3722_pinctrl_set,
 	.gpio_request_enable	= as3722_pinctrl_gpio_request_enable,
 	.gpio_set_direction	= as3722_pinctrl_gpio_set_direction,
 };
diff --git a/drivers/pinctrl/pinctrl-at91.c b/drivers/pinctrl/pinctrl-at91.c
index af1ba4fc150d..4839c36a3a1c 100644
--- a/drivers/pinctrl/pinctrl-at91.c
+++ b/drivers/pinctrl/pinctrl-at91.c
@@ -554,8 +554,8 @@ static void at91_mux_gpio_enable(void __iomem *pio, unsigned mask, bool input)
 	writel_relaxed(mask, pio + (input ? PIO_ODR : PIO_OER));
 }
 
-static int at91_pmx_enable(struct pinctrl_dev *pctldev, unsigned selector,
-			   unsigned group)
+static int at91_pmx_set(struct pinctrl_dev *pctldev, unsigned selector,
+			unsigned group)
 {
 	struct at91_pinctrl *info = pinctrl_dev_get_drvdata(pctldev);
 	const struct at91_pmx_pin *pins_conf = info->groups[group].pins_conf;
@@ -684,7 +684,7 @@ static const struct pinmux_ops at91_pmx_ops = {
 	.get_functions_count	= at91_pmx_get_funcs_count,
 	.get_function_name	= at91_pmx_get_func_name,
 	.get_function_groups	= at91_pmx_get_groups,
-	.enable			= at91_pmx_enable,
+	.set_mux		= at91_pmx_set,
 	.gpio_request_enable	= at91_gpio_request_enable,
 	.gpio_disable_free	= at91_gpio_disable_free,
 };
diff --git a/drivers/pinctrl/pinctrl-bcm281xx.c b/drivers/pinctrl/pinctrl-bcm281xx.c
index c5ca9e633fff..a26e0c2ba33e 100644
--- a/drivers/pinctrl/pinctrl-bcm281xx.c
+++ b/drivers/pinctrl/pinctrl-bcm281xx.c
@@ -1055,9 +1055,9 @@ static int bcm281xx_pinctrl_get_fcn_groups(struct pinctrl_dev *pctldev,
 	return 0;
 }
 
-static int bcm281xx_pinmux_enable(struct pinctrl_dev *pctldev,
-				  unsigned function,
-				  unsigned group)
+static int bcm281xx_pinmux_set(struct pinctrl_dev *pctldev,
+			       unsigned function,
+			       unsigned group)
 {
 	struct bcm281xx_pinctrl_data *pdata = pinctrl_dev_get_drvdata(pctldev);
 	const struct bcm281xx_pin_function *f = &pdata->functions[function];
@@ -1084,7 +1084,7 @@ static struct pinmux_ops bcm281xx_pinctrl_pinmux_ops = {
 	.get_functions_count = bcm281xx_pinctrl_get_fcns_count,
 	.get_function_name = bcm281xx_pinctrl_get_fcn_name,
 	.get_function_groups = bcm281xx_pinctrl_get_fcn_groups,
-	.enable = bcm281xx_pinmux_enable,
+	.set_mux = bcm281xx_pinmux_set,
 };
 
 static int bcm281xx_pinctrl_pin_config_get(struct pinctrl_dev *pctldev,
diff --git a/drivers/pinctrl/pinctrl-bcm2835.c b/drivers/pinctrl/pinctrl-bcm2835.c
index 5bcfd7ace0cd..eabba02f71f9 100644
--- a/drivers/pinctrl/pinctrl-bcm2835.c
+++ b/drivers/pinctrl/pinctrl-bcm2835.c
@@ -830,7 +830,7 @@ static int bcm2835_pmx_get_function_groups(struct pinctrl_dev *pctldev,
 	return 0;
 }
 
-static int bcm2835_pmx_enable(struct pinctrl_dev *pctldev,
+static int bcm2835_pmx_set(struct pinctrl_dev *pctldev,
 		unsigned func_selector,
 		unsigned group_selector)
 {
@@ -869,7 +869,7 @@ static const struct pinmux_ops bcm2835_pmx_ops = {
 	.get_functions_count = bcm2835_pmx_get_functions_count,
 	.get_function_name = bcm2835_pmx_get_function_name,
 	.get_function_groups = bcm2835_pmx_get_function_groups,
-	.enable = bcm2835_pmx_enable,
+	.set_mux = bcm2835_pmx_set,
 	.gpio_disable_free = bcm2835_pmx_gpio_disable_free,
 	.gpio_set_direction = bcm2835_pmx_gpio_set_direction,
 };
diff --git a/drivers/pinctrl/pinctrl-imx.c b/drivers/pinctrl/pinctrl-imx.c
index 946d594a64dd..570e5acb4a6a 100644
--- a/drivers/pinctrl/pinctrl-imx.c
+++ b/drivers/pinctrl/pinctrl-imx.c
@@ -179,8 +179,8 @@ static const struct pinctrl_ops imx_pctrl_ops = {
 
 };
 
-static int imx_pmx_enable(struct pinctrl_dev *pctldev, unsigned selector,
-			   unsigned group)
+static int imx_pmx_set(struct pinctrl_dev *pctldev, unsigned selector,
+		       unsigned group)
 {
 	struct imx_pinctrl *ipctl = pinctrl_dev_get_drvdata(pctldev);
 	const struct imx_pinctrl_soc_info *info = ipctl->info;
@@ -298,7 +298,7 @@ static const struct pinmux_ops imx_pmx_ops = {
 	.get_functions_count = imx_pmx_get_funcs_count,
 	.get_function_name = imx_pmx_get_func_name,
 	.get_function_groups = imx_pmx_get_groups,
-	.enable = imx_pmx_enable,
+	.set_mux = imx_pmx_set,
 };
 
 static int imx_pinconf_get(struct pinctrl_dev *pctldev,
diff --git a/drivers/pinctrl/pinctrl-imx1-core.c b/drivers/pinctrl/pinctrl-imx1-core.c
index 483420757c9f..176a3e62f1cf 100644
--- a/drivers/pinctrl/pinctrl-imx1-core.c
+++ b/drivers/pinctrl/pinctrl-imx1-core.c
@@ -298,8 +298,8 @@ static const struct pinctrl_ops imx1_pctrl_ops = {
 
 };
 
-static int imx1_pmx_enable(struct pinctrl_dev *pctldev, unsigned selector,
-			   unsigned group)
+static int imx1_pmx_set(struct pinctrl_dev *pctldev, unsigned selector,
+			unsigned group)
 {
 	struct imx1_pinctrl *ipctl = pinctrl_dev_get_drvdata(pctldev);
 	const struct imx1_pinctrl_soc_info *info = ipctl->info;
@@ -385,7 +385,7 @@ static const struct pinmux_ops imx1_pmx_ops = {
 	.get_functions_count = imx1_pmx_get_funcs_count,
 	.get_function_name = imx1_pmx_get_func_name,
 	.get_function_groups = imx1_pmx_get_groups,
-	.enable = imx1_pmx_enable,
+	.set_mux = imx1_pmx_set,
 };
 
 static int imx1_pinconf_get(struct pinctrl_dev *pctldev,
diff --git a/drivers/pinctrl/pinctrl-lantiq.c b/drivers/pinctrl/pinctrl-lantiq.c
index d22ca252b80d..296e5b37f768 100644
--- a/drivers/pinctrl/pinctrl-lantiq.c
+++ b/drivers/pinctrl/pinctrl-lantiq.c
@@ -257,9 +257,9 @@ static int match_group_mux(const struct ltq_pin_group *grp,
 	return ret;
 }
 
-static int ltq_pmx_enable(struct pinctrl_dev *pctrldev,
-				unsigned func,
-				unsigned group)
+static int ltq_pmx_set(struct pinctrl_dev *pctrldev,
+		       unsigned func,
+		       unsigned group)
 {
 	struct ltq_pinmux_info *info = pinctrl_dev_get_drvdata(pctrldev);
 	const struct ltq_pin_group *pin_grp = &info->grps[group];
@@ -316,7 +316,7 @@ static const struct pinmux_ops ltq_pmx_ops = {
 	.get_functions_count	= ltq_pmx_func_count,
 	.get_function_name	= ltq_pmx_func_name,
 	.get_function_groups	= ltq_pmx_get_groups,
-	.enable			= ltq_pmx_enable,
+	.set_mux		= ltq_pmx_set,
 	.gpio_request_enable	= ltq_pmx_gpio_request_enable,
 };
 
diff --git a/drivers/pinctrl/pinctrl-mxs.c b/drivers/pinctrl/pinctrl-mxs.c
index 40c76f26998c..67035091f8fd 100644
--- a/drivers/pinctrl/pinctrl-mxs.c
+++ b/drivers/pinctrl/pinctrl-mxs.c
@@ -195,8 +195,8 @@ static int mxs_pinctrl_get_func_groups(struct pinctrl_dev *pctldev,
 	return 0;
 }
 
-static int mxs_pinctrl_enable(struct pinctrl_dev *pctldev, unsigned selector,
-			      unsigned group)
+static int mxs_pinctrl_set_mux(struct pinctrl_dev *pctldev, unsigned selector,
+			       unsigned group)
 {
 	struct mxs_pinctrl_data *d = pinctrl_dev_get_drvdata(pctldev);
 	struct mxs_group *g = &d->soc->groups[group];
@@ -223,7 +223,7 @@ static const struct pinmux_ops mxs_pinmux_ops = {
 	.get_functions_count = mxs_pinctrl_get_funcs_count,
 	.get_function_name = mxs_pinctrl_get_func_name,
 	.get_function_groups = mxs_pinctrl_get_func_groups,
-	.enable = mxs_pinctrl_enable,
+	.set_mux = mxs_pinctrl_set_mux,
 };
 
 static int mxs_pinconf_get(struct pinctrl_dev *pctldev,
diff --git a/drivers/pinctrl/pinctrl-palmas.c b/drivers/pinctrl/pinctrl-palmas.c
index f13d0e78a41c..e3079d3d19fe 100644
--- a/drivers/pinctrl/pinctrl-palmas.c
+++ b/drivers/pinctrl/pinctrl-palmas.c
@@ -685,7 +685,8 @@ static int palmas_pinctrl_get_func_groups(struct pinctrl_dev *pctldev,
 	return 0;
 }
 
-static int palmas_pinctrl_enable(struct pinctrl_dev *pctldev, unsigned function,
+static int palmas_pinctrl_set_mux(struct pinctrl_dev *pctldev,
+		unsigned function,
 		unsigned group)
 {
 	struct palmas_pctrl_chip_info *pci = pinctrl_dev_get_drvdata(pctldev);
@@ -742,7 +743,7 @@ static const struct pinmux_ops palmas_pinmux_ops = {
 	.get_functions_count = palmas_pinctrl_get_funcs_count,
 	.get_function_name = palmas_pinctrl_get_func_name,
 	.get_function_groups = palmas_pinctrl_get_func_groups,
-	.enable = palmas_pinctrl_enable,
+	.set_mux = palmas_pinctrl_set_mux,
 };
 
 static int palmas_pinconf_get(struct pinctrl_dev *pctldev,
diff --git a/drivers/pinctrl/pinctrl-rockchip.c b/drivers/pinctrl/pinctrl-rockchip.c
index 5e8b2e04cd7a..b91b966add8f 100644
--- a/drivers/pinctrl/pinctrl-rockchip.c
+++ b/drivers/pinctrl/pinctrl-rockchip.c
@@ -813,8 +813,8 @@ static int rockchip_pmx_get_groups(struct pinctrl_dev *pctldev,
 	return 0;
 }
 
-static int rockchip_pmx_enable(struct pinctrl_dev *pctldev, unsigned selector,
-							    unsigned group)
+static int rockchip_pmx_set(struct pinctrl_dev *pctldev, unsigned selector,
+			    unsigned group)
 {
 	struct rockchip_pinctrl *info = pinctrl_dev_get_drvdata(pctldev);
 	const unsigned int *pins = info->groups[group].pins;
@@ -889,7 +889,7 @@ static const struct pinmux_ops rockchip_pmx_ops = {
 	.get_functions_count	= rockchip_pmx_get_funcs_count,
 	.get_function_name	= rockchip_pmx_get_func_name,
 	.get_function_groups	= rockchip_pmx_get_groups,
-	.enable			= rockchip_pmx_enable,
+	.set_mux		= rockchip_pmx_set,
 	.gpio_set_direction	= rockchip_pmx_gpio_set_direction,
 };
 
diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c
index 784de13facf3..9032422ad18f 100644
--- a/drivers/pinctrl/pinctrl-single.c
+++ b/drivers/pinctrl/pinctrl-single.c
@@ -447,7 +447,7 @@ static int pcs_get_function(struct pinctrl_dev *pctldev, unsigned pin,
 	return 0;
 }
 
-static int pcs_enable(struct pinctrl_dev *pctldev, unsigned fselector,
+static int pcs_set_mux(struct pinctrl_dev *pctldev, unsigned fselector,
 	unsigned group)
 {
 	struct pcs_device *pcs;
@@ -519,7 +519,7 @@ static const struct pinmux_ops pcs_pinmux_ops = {
 	.get_functions_count = pcs_get_functions_count,
 	.get_function_name = pcs_get_function_name,
 	.get_function_groups = pcs_get_function_groups,
-	.enable = pcs_enable,
+	.set_mux = pcs_sex_mux,
 	.gpio_request_enable = pcs_request_gpio,
 };
 
diff --git a/drivers/pinctrl/pinctrl-st.c b/drivers/pinctrl/pinctrl-st.c
index 5475374d803f..6c4c41bed1e3 100644
--- a/drivers/pinctrl/pinctrl-st.c
+++ b/drivers/pinctrl/pinctrl-st.c
@@ -914,8 +914,8 @@ static struct st_pio_control *st_get_pio_control(
 	return &bank->pc;
 }
 
-static int st_pmx_enable(struct pinctrl_dev *pctldev, unsigned fselector,
-		unsigned group)
+static int st_pmx_set_mux(struct pinctrl_dev *pctldev, unsigned fselector,
+			unsigned group)
 {
 	struct st_pinctrl *info = pinctrl_dev_get_drvdata(pctldev);
 	struct st_pinconf *conf = info->groups[group].pin_conf;
@@ -951,7 +951,7 @@ static struct pinmux_ops st_pmxops = {
 	.get_functions_count	= st_pmx_get_funcs_count,
 	.get_function_name	= st_pmx_get_fname,
 	.get_function_groups	= st_pmx_get_groups,
-	.enable			= st_pmx_enable,
+	.set_mux		= st_pmx_set_mux,
 	.gpio_set_direction	= st_pmx_set_gpio_direction,
 };
 
diff --git a/drivers/pinctrl/pinctrl-tb10x.c b/drivers/pinctrl/pinctrl-tb10x.c
index 71c5d4f0c538..3b9bfcf717ac 100644
--- a/drivers/pinctrl/pinctrl-tb10x.c
+++ b/drivers/pinctrl/pinctrl-tb10x.c
@@ -697,7 +697,7 @@ static void tb10x_gpio_disable_free(struct pinctrl_dev *pctl,
 	mutex_unlock(&state->mutex);
 }
 
-static int tb10x_pctl_enable(struct pinctrl_dev *pctl,
+static int tb10x_pctl_set_mux(struct pinctrl_dev *pctl,
 			unsigned func_selector, unsigned group_selector)
 {
 	struct tb10x_pinctrl *state = pinctrl_dev_get_drvdata(pctl);
@@ -744,7 +744,7 @@ static struct pinmux_ops tb10x_pinmux_ops = {
 	.get_function_groups = tb10x_get_function_groups,
 	.gpio_request_enable = tb10x_gpio_request_enable,
 	.gpio_disable_free = tb10x_gpio_disable_free,
-	.enable = tb10x_pctl_enable,
+	.set_mux = tb10x_pctl_set_mux,
 };
 
 static struct pinctrl_desc tb10x_pindesc = {
diff --git a/drivers/pinctrl/pinctrl-tegra-xusb.c b/drivers/pinctrl/pinctrl-tegra-xusb.c
index a06620474845..4648c9e5c582 100644
--- a/drivers/pinctrl/pinctrl-tegra-xusb.c
+++ b/drivers/pinctrl/pinctrl-tegra-xusb.c
@@ -281,9 +281,9 @@ static int tegra_xusb_padctl_get_function_groups(struct pinctrl_dev *pinctrl,
 	return 0;
 }
 
-static int tegra_xusb_padctl_pinmux_enable(struct pinctrl_dev *pinctrl,
-					   unsigned int function,
-					   unsigned int group)
+static int tegra_xusb_padctl_pinmux_set(struct pinctrl_dev *pinctrl,
+					unsigned int function,
+					unsigned int group)
 {
 	struct tegra_xusb_padctl *padctl = pinctrl_dev_get_drvdata(pinctrl);
 	const struct tegra_xusb_padctl_lane *lane;
@@ -311,7 +311,7 @@ static const struct pinmux_ops tegra_xusb_padctl_pinmux_ops = {
 	.get_functions_count = tegra_xusb_padctl_get_functions_count,
 	.get_function_name = tegra_xusb_padctl_get_function_name,
 	.get_function_groups = tegra_xusb_padctl_get_function_groups,
-	.enable = tegra_xusb_padctl_pinmux_enable,
+	.set_mux = tegra_xusb_padctl_pinmux_set,
 };
 
 static int tegra_xusb_padctl_pinconf_group_get(struct pinctrl_dev *pinctrl,
diff --git a/drivers/pinctrl/pinctrl-tegra.c b/drivers/pinctrl/pinctrl-tegra.c
index 150af5503c09..e5949d51bc52 100644
--- a/drivers/pinctrl/pinctrl-tegra.c
+++ b/drivers/pinctrl/pinctrl-tegra.c
@@ -262,8 +262,9 @@ static int tegra_pinctrl_get_func_groups(struct pinctrl_dev *pctldev,
 	return 0;
 }
 
-static int tegra_pinctrl_enable(struct pinctrl_dev *pctldev, unsigned function,
-			       unsigned group)
+static int tegra_pinctrl_set_mux(struct pinctrl_dev *pctldev,
+				 unsigned function,
+				 unsigned group)
 {
 	struct tegra_pmx *pmx = pinctrl_dev_get_drvdata(pctldev);
 	const struct tegra_pingroup *g;
@@ -294,7 +295,7 @@ static const struct pinmux_ops tegra_pinmux_ops = {
 	.get_functions_count = tegra_pinctrl_get_funcs_count,
 	.get_function_name = tegra_pinctrl_get_func_name,
 	.get_function_groups = tegra_pinctrl_get_func_groups,
-	.enable = tegra_pinctrl_enable,
+	.set_mux = tegra_pinctrl_set_mux,
 };
 
 static int tegra_pinconf_reg(struct tegra_pmx *pmx,
diff --git a/drivers/pinctrl/pinctrl-tz1090-pdc.c b/drivers/pinctrl/pinctrl-tz1090-pdc.c
index 41e81a35cabb..3bb6a3b78864 100644
--- a/drivers/pinctrl/pinctrl-tz1090-pdc.c
+++ b/drivers/pinctrl/pinctrl-tz1090-pdc.c
@@ -547,8 +547,9 @@ static void tz1090_pdc_pinctrl_mux(struct tz1090_pdc_pmx *pmx,
 	__global_unlock2(flags);
 }
 
-static int tz1090_pdc_pinctrl_enable(struct pinctrl_dev *pctldev,
-				     unsigned int function, unsigned int group)
+static int tz1090_pdc_pinctrl_set_mux(struct pinctrl_dev *pctldev,
+				      unsigned int function,
+				      unsigned int group)
 {
 	struct tz1090_pdc_pmx *pmx = pinctrl_dev_get_drvdata(pctldev);
 	const struct tz1090_pdc_pingroup *grp = &tz1090_pdc_groups[group];
@@ -634,7 +635,7 @@ static struct pinmux_ops tz1090_pdc_pinmux_ops = {
 	.get_functions_count	= tz1090_pdc_pinctrl_get_funcs_count,
 	.get_function_name	= tz1090_pdc_pinctrl_get_func_name,
 	.get_function_groups	= tz1090_pdc_pinctrl_get_func_groups,
-	.enable			= tz1090_pdc_pinctrl_enable,
+	.set_mux		= tz1090_pdc_pinctrl_set_mux,
 	.gpio_request_enable	= tz1090_pdc_pinctrl_gpio_request_enable,
 	.gpio_disable_free	= tz1090_pdc_pinctrl_gpio_disable_free,
 };
diff --git a/drivers/pinctrl/pinctrl-tz1090.c b/drivers/pinctrl/pinctrl-tz1090.c
index 24082216842e..48d36413b99f 100644
--- a/drivers/pinctrl/pinctrl-tz1090.c
+++ b/drivers/pinctrl/pinctrl-tz1090.c
@@ -1415,8 +1415,8 @@ found_mux:
  * the effect is the same as enabling the function on each individual pin in the
  * group.
  */
-static int tz1090_pinctrl_enable(struct pinctrl_dev *pctldev,
-				 unsigned int function, unsigned int group)
+static int tz1090_pinctrl_set_mux(struct pinctrl_dev *pctldev,
+				  unsigned int function, unsigned int group)
 {
 	struct tz1090_pmx *pmx = pinctrl_dev_get_drvdata(pctldev);
 	struct tz1090_pingroup *grp;
@@ -1517,7 +1517,7 @@ static struct pinmux_ops tz1090_pinmux_ops = {
 	.get_functions_count	= tz1090_pinctrl_get_funcs_count,
 	.get_function_name	= tz1090_pinctrl_get_func_name,
 	.get_function_groups	= tz1090_pinctrl_get_func_groups,
-	.enable			= tz1090_pinctrl_enable,
+	.set_mux		= tz1090_pinctrl_set_mux,
 	.gpio_request_enable	= tz1090_pinctrl_gpio_request_enable,
 	.gpio_disable_free	= tz1090_pinctrl_gpio_disable_free,
 };
diff --git a/drivers/pinctrl/pinctrl-u300.c b/drivers/pinctrl/pinctrl-u300.c
index 0959bb36450f..e9c7113d81f2 100644
--- a/drivers/pinctrl/pinctrl-u300.c
+++ b/drivers/pinctrl/pinctrl-u300.c
@@ -955,8 +955,8 @@ static void u300_pmx_endisable(struct u300_pmx *upmx, unsigned selector,
 	}
 }
 
-static int u300_pmx_enable(struct pinctrl_dev *pctldev, unsigned selector,
-			   unsigned group)
+static int u300_pmx_set_mux(struct pinctrl_dev *pctldev, unsigned selector,
+			    unsigned group)
 {
 	struct u300_pmx *upmx;
 
@@ -994,7 +994,7 @@ static const struct pinmux_ops u300_pmx_ops = {
 	.get_functions_count = u300_pmx_get_funcs_count,
 	.get_function_name = u300_pmx_get_func_name,
 	.get_function_groups = u300_pmx_get_groups,
-	.enable = u300_pmx_enable,
+	.set_mux = u300_pmx_set_mux,
 };
 
 static int u300_pin_config_get(struct pinctrl_dev *pctldev, unsigned pin,
diff --git a/drivers/pinctrl/pinmux.c b/drivers/pinctrl/pinmux.c
index c055daf9a80f..b874458dcb88 100644
--- a/drivers/pinctrl/pinmux.c
+++ b/drivers/pinctrl/pinmux.c
@@ -41,7 +41,7 @@ int pinmux_check_ops(struct pinctrl_dev *pctldev)
 	    !ops->get_functions_count ||
 	    !ops->get_function_name ||
 	    !ops->get_function_groups ||
-	    !ops->enable) {
+	    !ops->set_mux) {
 		dev_err(pctldev->dev, "pinmux ops lacks necessary functions\n");
 		return -EINVAL;
 	}
@@ -445,15 +445,15 @@ int pinmux_enable_setting(struct pinctrl_setting const *setting)
 		desc->mux_setting = &(setting->data.mux);
 	}
 
-	ret = ops->enable(pctldev, setting->data.mux.func,
-			  setting->data.mux.group);
+	ret = ops->set_mux(pctldev, setting->data.mux.func,
+			   setting->data.mux.group);
 
 	if (ret)
-		goto err_enable;
+		goto err_set_mux;
 
 	return 0;
 
-err_enable:
+err_set_mux:
 	for (i = 0; i < num_pins; i++) {
 		desc = pin_desc_get(pctldev, pins[i]);
 		if (desc)
diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c b/drivers/pinctrl/qcom/pinctrl-msm.c
index 80a64cad907d..01eab47a746f 100644
--- a/drivers/pinctrl/qcom/pinctrl-msm.c
+++ b/drivers/pinctrl/qcom/pinctrl-msm.c
@@ -134,9 +134,9 @@ static int msm_get_function_groups(struct pinctrl_dev *pctldev,
 	return 0;
 }
 
-static int msm_pinmux_enable(struct pinctrl_dev *pctldev,
-			     unsigned function,
-			     unsigned group)
+static int msm_pinmux_set_mux(struct pinctrl_dev *pctldev,
+			      unsigned function,
+			      unsigned group)
 {
 	struct msm_pinctrl *pctrl = pinctrl_dev_get_drvdata(pctldev);
 	const struct msm_pingroup *g;
@@ -170,7 +170,7 @@ static const struct pinmux_ops msm_pinmux_ops = {
 	.get_functions_count	= msm_get_functions_count,
 	.get_function_name	= msm_get_function_name,
 	.get_function_groups	= msm_get_function_groups,
-	.enable			= msm_pinmux_enable,
+	.set_mux		= msm_pinmux_set_mux,
 };
 
 static int msm_config_reg(struct msm_pinctrl *pctrl,
diff --git a/drivers/pinctrl/samsung/pinctrl-exynos5440.c b/drivers/pinctrl/samsung/pinctrl-exynos5440.c
index 603da2f9dd95..b995ec2c5d16 100644
--- a/drivers/pinctrl/samsung/pinctrl-exynos5440.c
+++ b/drivers/pinctrl/samsung/pinctrl-exynos5440.c
@@ -364,8 +364,9 @@ static void exynos5440_pinmux_setup(struct pinctrl_dev *pctldev, unsigned select
 }
 
 /* enable a specified pinmux by writing to registers */
-static int exynos5440_pinmux_enable(struct pinctrl_dev *pctldev, unsigned selector,
-					unsigned group)
+static int exynos5440_pinmux_set_mux(struct pinctrl_dev *pctldev,
+				     unsigned selector,
+				     unsigned group)
 {
 	exynos5440_pinmux_setup(pctldev, selector, group, true);
 	return 0;
@@ -387,7 +388,7 @@ static const struct pinmux_ops exynos5440_pinmux_ops = {
 	.get_functions_count	= exynos5440_get_functions_count,
 	.get_function_name	= exynos5440_pinmux_get_fname,
 	.get_function_groups	= exynos5440_pinmux_get_groups,
-	.enable			= exynos5440_pinmux_enable,
+	.set_mux		= exynos5440_pinmux_set_mux,
 	.gpio_set_direction	= exynos5440_pinmux_gpio_set_direction,
 };
 
diff --git a/drivers/pinctrl/samsung/pinctrl-samsung.c b/drivers/pinctrl/samsung/pinctrl-samsung.c
index b07406da333c..4a47691c32b1 100644
--- a/drivers/pinctrl/samsung/pinctrl-samsung.c
+++ b/drivers/pinctrl/samsung/pinctrl-samsung.c
@@ -401,8 +401,9 @@ static void samsung_pinmux_setup(struct pinctrl_dev *pctldev, unsigned selector,
 }
 
 /* enable a specified pinmux by writing to registers */
-static int samsung_pinmux_enable(struct pinctrl_dev *pctldev, unsigned selector,
-					unsigned group)
+static int samsung_pinmux_set_mux(struct pinctrl_dev *pctldev,
+				  unsigned selector,
+				  unsigned group)
 {
 	samsung_pinmux_setup(pctldev, selector, group, true);
 	return 0;
@@ -413,7 +414,7 @@ static const struct pinmux_ops samsung_pinmux_ops = {
 	.get_functions_count	= samsung_get_functions_count,
 	.get_function_name	= samsung_pinmux_get_fname,
 	.get_function_groups	= samsung_pinmux_get_groups,
-	.enable			= samsung_pinmux_enable,
+	.set_mux		= samsung_pinmux_set_mux,
 };
 
 /* set or get the pin config settings for a specified pin */
diff --git a/drivers/pinctrl/sh-pfc/pinctrl.c b/drivers/pinctrl/sh-pfc/pinctrl.c
index 11db3ee39d40..910deaefa0ac 100644
--- a/drivers/pinctrl/sh-pfc/pinctrl.c
+++ b/drivers/pinctrl/sh-pfc/pinctrl.c
@@ -312,8 +312,8 @@ static int sh_pfc_get_function_groups(struct pinctrl_dev *pctldev,
 	return 0;
 }
 
-static int sh_pfc_func_enable(struct pinctrl_dev *pctldev, unsigned selector,
-			      unsigned group)
+static int sh_pfc_func_set_mux(struct pinctrl_dev *pctldev, unsigned selector,
+			       unsigned group)
 {
 	struct sh_pfc_pinctrl *pmx = pinctrl_dev_get_drvdata(pctldev);
 	struct sh_pfc *pfc = pmx->pfc;
@@ -442,7 +442,7 @@ static const struct pinmux_ops sh_pfc_pinmux_ops = {
 	.get_functions_count	= sh_pfc_get_functions_count,
 	.get_function_name	= sh_pfc_get_function_name,
 	.get_function_groups	= sh_pfc_get_function_groups,
-	.enable			= sh_pfc_func_enable,
+	.set_mux		= sh_pfc_func_set_mux,
 	.gpio_request_enable	= sh_pfc_gpio_request_enable,
 	.gpio_disable_free	= sh_pfc_gpio_disable_free,
 	.gpio_set_direction	= sh_pfc_gpio_set_direction,
diff --git a/drivers/pinctrl/sirf/pinctrl-sirf.c b/drivers/pinctrl/sirf/pinctrl-sirf.c
index 45f93f952a39..6f252fd7cb0c 100644
--- a/drivers/pinctrl/sirf/pinctrl-sirf.c
+++ b/drivers/pinctrl/sirf/pinctrl-sirf.c
@@ -179,8 +179,9 @@ static void sirfsoc_pinmux_endisable(struct sirfsoc_pmx *spmx,
 	}
 }
 
-static int sirfsoc_pinmux_enable(struct pinctrl_dev *pmxdev, unsigned selector,
-	unsigned group)
+static int sirfsoc_pinmux_set_mux(struct pinctrl_dev *pmxdev,
+				unsigned selector,
+				unsigned group)
 {
 	struct sirfsoc_pmx *spmx;
 
@@ -237,7 +238,7 @@ static int sirfsoc_pinmux_request_gpio(struct pinctrl_dev *pmxdev,
 }
 
 static struct pinmux_ops sirfsoc_pinmux_ops = {
-	.enable = sirfsoc_pinmux_enable,
+	.set_mux = sirfsoc_pinmux_set_mux,
 	.get_functions_count = sirfsoc_pinmux_get_funcs_count,
 	.get_function_name = sirfsoc_pinmux_get_func_name,
 	.get_function_groups = sirfsoc_pinmux_get_groups,
diff --git a/drivers/pinctrl/spear/pinctrl-spear.c b/drivers/pinctrl/spear/pinctrl-spear.c
index f72cc4e192bd..abdb05ac43dc 100644
--- a/drivers/pinctrl/spear/pinctrl-spear.c
+++ b/drivers/pinctrl/spear/pinctrl-spear.c
@@ -268,7 +268,7 @@ static int spear_pinctrl_endisable(struct pinctrl_dev *pctldev,
 	return 0;
 }
 
-static int spear_pinctrl_enable(struct pinctrl_dev *pctldev, unsigned function,
+static int spear_pinctrl_set_mux(struct pinctrl_dev *pctldev, unsigned function,
 		unsigned group)
 {
 	return spear_pinctrl_endisable(pctldev, function, group, true);
@@ -338,7 +338,7 @@ static const struct pinmux_ops spear_pinmux_ops = {
 	.get_functions_count = spear_pinctrl_get_funcs_count,
 	.get_function_name = spear_pinctrl_get_func_name,
 	.get_function_groups = spear_pinctrl_get_func_groups,
-	.enable = spear_pinctrl_enable,
+	.set_mux = spear_pinctrl_set_mux,
 	.gpio_request_enable = gpio_request_enable,
 	.gpio_disable_free = gpio_disable_free,
 };
diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi.c b/drivers/pinctrl/sunxi/pinctrl-sunxi.c
index 3df66e366c87..ef9d804e55de 100644
--- a/drivers/pinctrl/sunxi/pinctrl-sunxi.c
+++ b/drivers/pinctrl/sunxi/pinctrl-sunxi.c
@@ -393,9 +393,9 @@ static void sunxi_pmx_set(struct pinctrl_dev *pctldev,
 	spin_unlock_irqrestore(&pctl->lock, flags);
 }
 
-static int sunxi_pmx_enable(struct pinctrl_dev *pctldev,
-			    unsigned function,
-			    unsigned group)
+static int sunxi_pmx_set_mux(struct pinctrl_dev *pctldev,
+			     unsigned function,
+			     unsigned group)
 {
 	struct sunxi_pinctrl *pctl = pinctrl_dev_get_drvdata(pctldev);
 	struct sunxi_pinctrl_group *g = pctl->groups + group;
@@ -441,7 +441,7 @@ static const struct pinmux_ops sunxi_pmx_ops = {
 	.get_functions_count	= sunxi_pmx_get_funcs_cnt,
 	.get_function_name	= sunxi_pmx_get_func_name,
 	.get_function_groups	= sunxi_pmx_get_func_groups,
-	.enable			= sunxi_pmx_enable,
+	.set_mux		= sunxi_pmx_set_mux,
 	.gpio_set_direction	= sunxi_pmx_gpio_set_direction,
 };
 
diff --git a/drivers/pinctrl/vt8500/pinctrl-wmt.c b/drivers/pinctrl/vt8500/pinctrl-wmt.c
index 8cea355f9a81..d055d63309e4 100644
--- a/drivers/pinctrl/vt8500/pinctrl-wmt.c
+++ b/drivers/pinctrl/vt8500/pinctrl-wmt.c
@@ -131,9 +131,9 @@ static int wmt_set_pinmux(struct wmt_pinctrl_data *data, unsigned func,
 	return 0;
 }
 
-static int wmt_pmx_enable(struct pinctrl_dev *pctldev,
-			  unsigned func_selector,
-			  unsigned group_selector)
+static int wmt_pmx_set_mux(struct pinctrl_dev *pctldev,
+			   unsigned func_selector,
+			   unsigned group_selector)
 {
 	struct wmt_pinctrl_data *data = pinctrl_dev_get_drvdata(pctldev);
 	u32 pinnum = data->pins[group_selector].number;
@@ -168,7 +168,7 @@ static struct pinmux_ops wmt_pinmux_ops = {
 	.get_functions_count = wmt_pmx_get_functions_count,
 	.get_function_name = wmt_pmx_get_function_name,
 	.get_function_groups = wmt_pmx_get_function_groups,
-	.enable = wmt_pmx_enable,
+	.set_mux = wmt_pmx_set_mux,
 	.gpio_disable_free = wmt_pmx_gpio_disable_free,
 	.gpio_set_direction = wmt_pmx_gpio_set_direction,
 };
diff --git a/include/linux/pinctrl/pinmux.h b/include/linux/pinctrl/pinmux.h
index 3097aafbeb24..511bda9ed4bf 100644
--- a/include/linux/pinctrl/pinmux.h
+++ b/include/linux/pinctrl/pinmux.h
@@ -39,13 +39,12 @@ struct pinctrl_dev;
  *	name can be used with the generic @pinctrl_ops to retrieve the
  *	actual pins affected. The applicable groups will be returned in
  *	@groups and the number of groups in @num_groups
- * @enable: enable a certain muxing function with a certain pin group. The
+ * @set_mux: enable a certain muxing function with a certain pin group. The
  *	driver does not need to figure out whether enabling this function
  *	conflicts some other use of the pins in that group, such collisions
  *	are handled by the pinmux subsystem. The @func_selector selects a
  *	certain function whereas @group_selector selects a certain set of pins
  *	to be used. On simple controllers the latter argument may be ignored
- * @disable: disable a certain muxing selector with a certain pin group
  * @gpio_request_enable: requests and enables GPIO on a certain pin.
  *	Implement this only if you can mux every pin individually as GPIO. The
  *	affected GPIO range is passed along with an offset(pin number) into that
@@ -68,8 +67,8 @@ struct pinmux_ops {
 				  unsigned selector,
 				  const char * const **groups,
 				  unsigned * const num_groups);
-	int (*enable) (struct pinctrl_dev *pctldev, unsigned func_selector,
-		       unsigned group_selector);
+	int (*set_mux) (struct pinctrl_dev *pctldev, unsigned func_selector,
+			unsigned group_selector);
 	int (*gpio_request_enable) (struct pinctrl_dev *pctldev,
 				    struct pinctrl_gpio_range *range,
 				    unsigned offset);
-- 
cgit v1.2.3


From 3af0dbd592fe0a92002f16e341519ba03e92adf7 Mon Sep 17 00:00:00 2001
From: Sonic Zhang <sonic.zhang@analog.com>
Date: Mon, 1 Sep 2014 11:19:52 +0800
Subject: gpio: mcp23s08 to support both device tree and platform data

Device tree is not enabled in some architecture where gpio
driver mcp23s08 is still required.

v2-changes:
- Parse device tree properties into platform data other than
  individual variables.
v3-changes:
- Use of_node in gpio_chip device structure, because the
  struct device * always has an of_node which is NULL when
  OF is not used.

Signed-off-by: Sonic Zhang <sonic.zhang@analog.com>
Reviewed-by: Alexandre Courbot <acourbot@nvidia.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/Kconfig         |  1 -
 drivers/gpio/gpio-mcp23s08.c | 64 +++++++++++++++++++++++---------------------
 include/linux/spi/mcp23s08.h | 18 +++++++++++++
 3 files changed, 52 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index ec27ec0be8c2..ec398bee7c60 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -806,7 +806,6 @@ config GPIO_MAX7301
 
 config GPIO_MCP23S08
 	tristate "Microchip MCP23xxx I/O expander"
-	depends on OF_GPIO
 	depends on (SPI_MASTER && !I2C) || I2C
 	help
 	  SPI/I2C driver for Microchip MCP23S08/MCP23S17/MCP23008/MCP23017
diff --git a/drivers/gpio/gpio-mcp23s08.c b/drivers/gpio/gpio-mcp23s08.c
index 6f183d9b487e..8488e2fd307c 100644
--- a/drivers/gpio/gpio-mcp23s08.c
+++ b/drivers/gpio/gpio-mcp23s08.c
@@ -479,7 +479,7 @@ static int mcp23s08_irq_setup(struct mcp23s08 *mcp)
 
 	mutex_init(&mcp->irq_lock);
 
-	mcp->irq_domain = irq_domain_add_linear(chip->of_node, chip->ngpio,
+	mcp->irq_domain = irq_domain_add_linear(chip->dev->of_node, chip->ngpio,
 						&irq_domain_simple_ops, mcp);
 	if (!mcp->irq_domain)
 		return -ENODEV;
@@ -581,7 +581,7 @@ done:
 
 static int mcp23s08_probe_one(struct mcp23s08 *mcp, struct device *dev,
 			      void *data, unsigned addr, unsigned type,
-			      unsigned base, unsigned pullups)
+			      struct mcp23s08_platform_data *pdata, int cs)
 {
 	int status;
 	bool mirror = false;
@@ -635,7 +635,7 @@ static int mcp23s08_probe_one(struct mcp23s08 *mcp, struct device *dev,
 		return -EINVAL;
 	}
 
-	mcp->chip.base = base;
+	mcp->chip.base = pdata->base;
 	mcp->chip.can_sleep = true;
 	mcp->chip.dev = dev;
 	mcp->chip.owner = THIS_MODULE;
@@ -648,11 +648,9 @@ static int mcp23s08_probe_one(struct mcp23s08 *mcp, struct device *dev,
 	if (status < 0)
 		goto fail;
 
-	mcp->irq_controller = of_property_read_bool(mcp->chip.of_node,
-						"interrupt-controller");
+	mcp->irq_controller = pdata->irq_controller;
 	if (mcp->irq && mcp->irq_controller && (type == MCP_TYPE_017))
-		mirror = of_property_read_bool(mcp->chip.of_node,
-						"microchip,irq-mirror");
+		mirror = pdata->mirror;
 
 	if ((status & IOCON_SEQOP) || !(status & IOCON_HAEN) || mirror) {
 		/* mcp23s17 has IOCON twice, make sure they are in sync */
@@ -668,7 +666,7 @@ static int mcp23s08_probe_one(struct mcp23s08 *mcp, struct device *dev,
 	}
 
 	/* configure ~100K pullups */
-	status = mcp->ops->write(mcp, MCP_GPPU, pullups);
+	status = mcp->ops->write(mcp, MCP_GPPU, pdata->chip[cs].pullups);
 	if (status < 0)
 		goto fail;
 
@@ -768,25 +766,29 @@ MODULE_DEVICE_TABLE(of, mcp23s08_i2c_of_match);
 static int mcp230xx_probe(struct i2c_client *client,
 				    const struct i2c_device_id *id)
 {
-	struct mcp23s08_platform_data *pdata;
+	struct mcp23s08_platform_data *pdata, local_pdata;
 	struct mcp23s08 *mcp;
-	int status, base, pullups;
+	int status;
 	const struct of_device_id *match;
 
 	match = of_match_device(of_match_ptr(mcp23s08_i2c_of_match),
 					&client->dev);
-	pdata = dev_get_platdata(&client->dev);
-	if (match || !pdata) {
-		base = -1;
-		pullups = 0;
+	if (match) {
+		pdata = &local_pdata;
+		pdata->base = -1;
+		pdata->chip[0].pullups = 0;
+		pdata->irq_controller =	of_property_read_bool(
+					client->dev.of_node,
+					"interrupt-controller");
+		pdata->mirror = of_property_read_bool(client->dev.of_node,
+						      "microchip,irq-mirror");
 		client->irq = irq_of_parse_and_map(client->dev.of_node, 0);
 	} else {
-		if (!gpio_is_valid(pdata->base)) {
+		pdata = dev_get_platdata(&client->dev);
+		if (!pdata || !gpio_is_valid(pdata->base)) {
 			dev_dbg(&client->dev, "invalid platform data\n");
 			return -EINVAL;
 		}
-		base = pdata->base;
-		pullups = pdata->chip[0].pullups;
 	}
 
 	mcp = kzalloc(sizeof(*mcp), GFP_KERNEL);
@@ -795,7 +797,7 @@ static int mcp230xx_probe(struct i2c_client *client,
 
 	mcp->irq = client->irq;
 	status = mcp23s08_probe_one(mcp, &client->dev, client, client->addr,
-				    id->driver_data, base, pullups);
+				    id->driver_data, pdata, 0);
 	if (status)
 		goto fail;
 
@@ -863,14 +865,12 @@ static void mcp23s08_i2c_exit(void) { }
 
 static int mcp23s08_probe(struct spi_device *spi)
 {
-	struct mcp23s08_platform_data	*pdata;
+	struct mcp23s08_platform_data	*pdata, local_pdata;
 	unsigned			addr;
 	int				chips = 0;
 	struct mcp23s08_driver_data	*data;
 	int				status, type;
-	unsigned			base = -1,
-					ngpio = 0,
-					pullups[ARRAY_SIZE(pdata->chip)];
+	unsigned			ngpio = 0;
 	const struct			of_device_id *match;
 	u32				spi_present_mask = 0;
 
@@ -893,11 +893,18 @@ static int mcp23s08_probe(struct spi_device *spi)
 			return -ENODEV;
 		}
 
+		pdata = &local_pdata;
+		pdata->base = -1;
 		for (addr = 0; addr < ARRAY_SIZE(pdata->chip); addr++) {
-			pullups[addr] = 0;
+			pdata->chip[addr].pullups = 0;
 			if (spi_present_mask & (1 << addr))
 				chips++;
 		}
+		pdata->irq_controller =	of_property_read_bool(
+					spi->dev.of_node,
+					"interrupt-controller");
+		pdata->mirror = of_property_read_bool(spi->dev.of_node,
+						      "microchip,irq-mirror");
 	} else {
 		type = spi_get_device_id(spi)->driver_data;
 		pdata = dev_get_platdata(&spi->dev);
@@ -917,10 +924,7 @@ static int mcp23s08_probe(struct spi_device *spi)
 				return -EINVAL;
 			}
 			spi_present_mask |= 1 << addr;
-			pullups[addr] = pdata->chip[addr].pullups;
 		}
-
-		base = pdata->base;
 	}
 
 	if (!chips)
@@ -938,13 +942,13 @@ static int mcp23s08_probe(struct spi_device *spi)
 		chips--;
 		data->mcp[addr] = &data->chip[chips];
 		status = mcp23s08_probe_one(data->mcp[addr], &spi->dev, spi,
-					    0x40 | (addr << 1), type, base,
-					    pullups[addr]);
+					    0x40 | (addr << 1), type, pdata,
+					    addr);
 		if (status < 0)
 			goto fail;
 
-		if (base != -1)
-			base += (type == MCP_TYPE_S17) ? 16 : 8;
+		if (pdata->base != -1)
+			pdata->base += (type == MCP_TYPE_S17) ? 16 : 8;
 		ngpio += (type == MCP_TYPE_S17) ? 16 : 8;
 	}
 	data->ngpio = ngpio;
diff --git a/include/linux/spi/mcp23s08.h b/include/linux/spi/mcp23s08.h
index 2d676d5aaa89..aa07d7b32568 100644
--- a/include/linux/spi/mcp23s08.h
+++ b/include/linux/spi/mcp23s08.h
@@ -22,4 +22,22 @@ struct mcp23s08_platform_data {
 	 * base to base+15 (or base+31 for s17 variant).
 	 */
 	unsigned	base;
+	/* Marks the device as a interrupt controller.
+	 * NOTE: The interrupt functionality is only supported for i2c
+	 * versions of the chips. The spi chips can also do the interrupts,
+	 * but this is not supported by the linux driver yet.
+	 */
+	bool		irq_controller;
+
+	/* Sets the mirror flag in the IOCON register. Devices
+	 * with two interrupt outputs (these are the devices ending with 17 and
+	 * those that have 16 IOs) have two IO banks: IO 0-7 form bank 1 and
+	 * IO 8-15 are bank 2. These chips have two different interrupt outputs:
+	 * One for bank 1 and another for bank 2. If irq-mirror is set, both
+	 * interrupts are generated regardless of the bank that an input change
+	 * occurred on. If it is not set, the interrupt are only generated for
+	 * the bank they belong to.
+	 * On devices with only one interrupt output this property is useless.
+	 */
+	bool		mirror;
 };
-- 
cgit v1.2.3


From 0d37899363b0e5486f8800231b7edd75e8b60942 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Wed, 3 Sep 2014 20:01:55 +0200
Subject: pinctrl: generic: Fix PIN_CONFIG_DRIVE_OPEN_SOURCE source/drain doc
 mismatch

PIN_CONFIG_DRIVE_OPEN_SOURCE enables open source, not open drain.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/pinctrl/pinconf-generic.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h
index a15f10727eb8..d578a60eff23 100644
--- a/include/linux/pinctrl/pinconf-generic.h
+++ b/include/linux/pinctrl/pinconf-generic.h
@@ -57,7 +57,7 @@
  *	which are then pulled up with an external resistor. Setting this
  *	config will enable open drain mode, the argument is ignored.
  * @PIN_CONFIG_DRIVE_OPEN_SOURCE: the pin will be driven with open source
- *	(open emitter). Setting this config will enable open drain mode, the
+ *	(open emitter). Setting this config will enable open source mode, the
  *	argument is ignored.
  * @PIN_CONFIG_DRIVE_STRENGTH: the pin will sink or source at most the current
  *	passed as argument. The argument is in mA.
-- 
cgit v1.2.3


From 87fed556d08d21dd7dd3e0222c94c187e4c2d5e2 Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Wed, 3 Sep 2014 10:35:13 +0200
Subject: bcma: get info about flash type SoC booted from
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There is an ongoing work on cleaning MIPS's nvram support so it could be
re-used on other platforms (bcm53xx to say precisely).
This will require a bit of extra logic in bcma this patch implements.

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/bcma/driver_mips.c     | 62 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/bcma/bcma_regs.h |  5 ++++
 2 files changed, 67 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/bcma/driver_mips.c b/drivers/bcma/driver_mips.c
index 11115bbe115c..004d6aa671ce 100644
--- a/drivers/bcma/driver_mips.c
+++ b/drivers/bcma/driver_mips.c
@@ -21,6 +21,14 @@
 #include <linux/serial_reg.h>
 #include <linux/time.h>
 
+enum bcma_boot_dev {
+	BCMA_BOOT_DEV_UNK = 0,
+	BCMA_BOOT_DEV_ROM,
+	BCMA_BOOT_DEV_PARALLEL,
+	BCMA_BOOT_DEV_SERIAL,
+	BCMA_BOOT_DEV_NAND,
+};
+
 static const char * const part_probes[] = { "bcm47xxpart", NULL };
 
 static struct physmap_flash_data bcma_pflash_data = {
@@ -229,11 +237,51 @@ u32 bcma_cpu_clock(struct bcma_drv_mips *mcore)
 }
 EXPORT_SYMBOL(bcma_cpu_clock);
 
+static enum bcma_boot_dev bcma_boot_dev(struct bcma_bus *bus)
+{
+	struct bcma_drv_cc *cc = &bus->drv_cc;
+	u8 cc_rev = cc->core->id.rev;
+
+	if (cc_rev == 42) {
+		struct bcma_device *core;
+
+		core = bcma_find_core(bus, BCMA_CORE_NS_ROM);
+		if (core) {
+			switch (bcma_aread32(core, BCMA_IOST) &
+				BCMA_NS_ROM_IOST_BOOT_DEV_MASK) {
+			case BCMA_NS_ROM_IOST_BOOT_DEV_NOR:
+				return BCMA_BOOT_DEV_SERIAL;
+			case BCMA_NS_ROM_IOST_BOOT_DEV_NAND:
+				return BCMA_BOOT_DEV_NAND;
+			case BCMA_NS_ROM_IOST_BOOT_DEV_ROM:
+			default:
+				return BCMA_BOOT_DEV_ROM;
+			}
+		}
+	} else {
+		if (cc_rev == 38) {
+			if (cc->status & BCMA_CC_CHIPST_5357_NAND_BOOT)
+				return BCMA_BOOT_DEV_NAND;
+			else if (cc->status & BIT(5))
+				return BCMA_BOOT_DEV_ROM;
+		}
+
+		if ((cc->capabilities & BCMA_CC_CAP_FLASHT) ==
+		    BCMA_CC_FLASHT_PARA)
+			return BCMA_BOOT_DEV_PARALLEL;
+		else
+			return BCMA_BOOT_DEV_SERIAL;
+	}
+
+	return BCMA_BOOT_DEV_SERIAL;
+}
+
 static void bcma_core_mips_flash_detect(struct bcma_drv_mips *mcore)
 {
 	struct bcma_bus *bus = mcore->core->bus;
 	struct bcma_drv_cc *cc = &bus->drv_cc;
 	struct bcma_pflash *pflash = &cc->pflash;
+	enum bcma_boot_dev boot_dev;
 
 	switch (cc->capabilities & BCMA_CC_CAP_FLASHT) {
 	case BCMA_CC_FLASHT_STSER:
@@ -269,6 +317,20 @@ static void bcma_core_mips_flash_detect(struct bcma_drv_mips *mcore)
 			bcma_nflash_init(cc);
 		}
 	}
+
+	/* Determine flash type this SoC boots from */
+	boot_dev = bcma_boot_dev(bus);
+	switch (boot_dev) {
+	case BCMA_BOOT_DEV_PARALLEL:
+	case BCMA_BOOT_DEV_SERIAL:
+		/* TODO: Init NVRAM using BCMA_SOC_FLASH2 window */
+		break;
+	case BCMA_BOOT_DEV_NAND:
+		/* TODO: Init NVRAM using BCMA_SOC_FLASH1 window */
+		break;
+	default:
+		break;
+	}
 }
 
 void bcma_core_mips_early_init(struct bcma_drv_mips *mcore)
diff --git a/include/linux/bcma/bcma_regs.h b/include/linux/bcma/bcma_regs.h
index 917dcd7965e7..e64ae7bf80a1 100644
--- a/include/linux/bcma/bcma_regs.h
+++ b/include/linux/bcma/bcma_regs.h
@@ -39,6 +39,11 @@
 #define  BCMA_RESET_CTL_RESET		0x0001
 #define BCMA_RESET_ST			0x0804
 
+#define BCMA_NS_ROM_IOST_BOOT_DEV_MASK	0x0003
+#define BCMA_NS_ROM_IOST_BOOT_DEV_NOR	0x0000
+#define BCMA_NS_ROM_IOST_BOOT_DEV_NAND	0x0001
+#define BCMA_NS_ROM_IOST_BOOT_DEV_ROM	0x0002
+
 /* BCMA PCI config space registers. */
 #define BCMA_PCI_PMCSR			0x44
 #define  BCMA_PCI_PE			0x100
-- 
cgit v1.2.3


From 40bea039593dfc7f3f9814dab844f6db43ae580b Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 13 Aug 2014 18:50:16 +0200
Subject: nohz: Restore NMI safe local irq work for local nohz kick

The local nohz kick is currently used by perf which needs it to be
NMI-safe. Recent commit though (7d1311b93e58ed55f3a31cc8f94c4b8fe988a2b9)
changed its implementation to fire the local kick using the remote kick
API. It was convenient to make the code more generic but the remote kick
isn't NMI-safe.

As a result:

	WARNING: CPU: 3 PID: 18062 at kernel/irq_work.c:72 irq_work_queue_on+0x11e/0x140()
	CPU: 3 PID: 18062 Comm: trinity-subchil Not tainted 3.16.0+ #34
	0000000000000009 00000000903774d1 ffff880244e06c00 ffffffff9a7f1e37
	0000000000000000 ffff880244e06c38 ffffffff9a0791dd ffff880244fce180
	0000000000000003 ffff880244e06d58 ffff880244e06ef8 0000000000000000
	Call Trace:
	<NMI>  [<ffffffff9a7f1e37>] dump_stack+0x4e/0x7a
	[<ffffffff9a0791dd>] warn_slowpath_common+0x7d/0xa0
	[<ffffffff9a07930a>] warn_slowpath_null+0x1a/0x20
	[<ffffffff9a17ca1e>] irq_work_queue_on+0x11e/0x140
	[<ffffffff9a10a2c7>] tick_nohz_full_kick_cpu+0x57/0x90
	[<ffffffff9a186cd5>] __perf_event_overflow+0x275/0x350
	[<ffffffff9a184f80>] ? perf_event_task_disable+0xa0/0xa0
	[<ffffffff9a01a4cf>] ? x86_perf_event_set_period+0xbf/0x150
	[<ffffffff9a187934>] perf_event_overflow+0x14/0x20
	[<ffffffff9a020386>] intel_pmu_handle_irq+0x206/0x410
	[<ffffffff9a0b54d3>] ? arch_vtime_task_switch+0x63/0x130
	[<ffffffff9a01937b>] perf_event_nmi_handler+0x2b/0x50
	[<ffffffff9a007b72>] nmi_handle+0xd2/0x390
	[<ffffffff9a007aa5>] ? nmi_handle+0x5/0x390
	[<ffffffff9a0d131b>] ? lock_release+0xab/0x330
	[<ffffffff9a008062>] default_do_nmi+0x72/0x1c0
	[<ffffffff9a0c925f>] ? cpuacct_account_field+0xcf/0x200
	[<ffffffff9a008268>] do_nmi+0xb8/0x100

Lets fix this by restoring the use of local irq work for the nohz local
kick.

Reported-by: Catalin Iacob <iacobcatalin@gmail.com>
Reported-and-tested-by: Dave Jones <davej@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/linux/tick.h     |  7 +------
 kernel/time/tick-sched.c | 14 ++++++++++++++
 2 files changed, 15 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tick.h b/include/linux/tick.h
index 059052306831..9a82c7dc3fdd 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -183,13 +183,8 @@ static inline bool tick_nohz_full_cpu(int cpu)
 
 extern void tick_nohz_init(void);
 extern void __tick_nohz_full_check(void);
+extern void tick_nohz_full_kick(void);
 extern void tick_nohz_full_kick_cpu(int cpu);
-
-static inline void tick_nohz_full_kick(void)
-{
-	tick_nohz_full_kick_cpu(smp_processor_id());
-}
-
 extern void tick_nohz_full_kick_all(void);
 extern void __tick_nohz_task_switch(struct task_struct *tsk);
 #else
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 99aa6ee3908f..f654a8a298fa 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -224,6 +224,20 @@ static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
 	.func = nohz_full_kick_work_func,
 };
 
+/*
+ * Kick this CPU if it's full dynticks in order to force it to
+ * re-evaluate its dependency on the tick and restart it if necessary.
+ * This kick, unlike tick_nohz_full_kick_cpu() and tick_nohz_full_kick_all(),
+ * is NMI safe.
+ */
+void tick_nohz_full_kick(void)
+{
+	if (!tick_nohz_full_cpu(smp_processor_id()))
+		return;
+
+	irq_work_queue(&__get_cpu_var(nohz_full_kick_work));
+}
+
 /*
  * Kick the CPU if it's full dynticks in order to force it to
  * re-evaluate its dependency on the tick and restart it if necessary.
-- 
cgit v1.2.3


From 8d38821cbcf51292cd5a23469d03bd38932a3ba9 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Fri, 1 Aug 2014 14:15:10 +0200
Subject: resources: Add device-managed request/release_resource()

Provide device-managed implementations of the request_resource() and
release_resource() functions.  Upon failure to request a resource, the new
devm_request_resource() function will output an error message for
consistent error reporting.

Signed-off-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Tejun Heo <tj@kernel.org>
---
 Documentation/driver-model/devres.txt |  2 +
 include/linux/ioport.h                |  5 +++
 kernel/resource.c                     | 70 +++++++++++++++++++++++++++++++++++
 3 files changed, 77 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt
index d14710b04439..befc3fe12ba6 100644
--- a/Documentation/driver-model/devres.txt
+++ b/Documentation/driver-model/devres.txt
@@ -264,8 +264,10 @@ IIO
 IO region
   devm_release_mem_region()
   devm_release_region()
+  devm_release_resource()
   devm_request_mem_region()
   devm_request_region()
+  devm_request_resource()
 
 IOMAP
   devm_ioport_map()
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 142ec544167c..2c5250222278 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -215,6 +215,11 @@ static inline int __deprecated check_region(resource_size_t s,
 
 /* Wrappers for managed devices */
 struct device;
+
+extern int devm_request_resource(struct device *dev, struct resource *root,
+				 struct resource *new);
+extern void devm_release_resource(struct device *dev, struct resource *new);
+
 #define devm_request_region(dev,start,n,name) \
 	__devm_request_region(dev, &ioport_resource, (start), (n), (name))
 #define devm_request_mem_region(dev,start,n,name) \
diff --git a/kernel/resource.c b/kernel/resource.c
index da14b8d09296..ca24f19f9d18 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -1248,6 +1248,76 @@ int release_mem_region_adjustable(struct resource *parent,
 /*
  * Managed region resource
  */
+static void devm_resource_release(struct device *dev, void *ptr)
+{
+	struct resource **r = ptr;
+
+	release_resource(*r);
+}
+
+/**
+ * devm_request_resource() - request and reserve an I/O or memory resource
+ * @dev: device for which to request the resource
+ * @root: root of the resource tree from which to request the resource
+ * @new: descriptor of the resource to request
+ *
+ * This is a device-managed version of request_resource(). There is usually
+ * no need to release resources requested by this function explicitly since
+ * that will be taken care of when the device is unbound from its driver.
+ * If for some reason the resource needs to be released explicitly, because
+ * of ordering issues for example, drivers must call devm_release_resource()
+ * rather than the regular release_resource().
+ *
+ * When a conflict is detected between any existing resources and the newly
+ * requested resource, an error message will be printed.
+ *
+ * Returns 0 on success or a negative error code on failure.
+ */
+int devm_request_resource(struct device *dev, struct resource *root,
+			  struct resource *new)
+{
+	struct resource *conflict, **ptr;
+
+	ptr = devres_alloc(devm_resource_release, sizeof(*ptr), GFP_KERNEL);
+	if (!ptr)
+		return -ENOMEM;
+
+	*ptr = new;
+
+	conflict = request_resource_conflict(root, new);
+	if (conflict) {
+		dev_err(dev, "resource collision: %pR conflicts with %s %pR\n",
+			new, conflict->name, conflict);
+		devres_free(ptr);
+		return -EBUSY;
+	}
+
+	devres_add(dev, ptr);
+	return 0;
+}
+EXPORT_SYMBOL(devm_request_resource);
+
+static int devm_resource_match(struct device *dev, void *res, void *data)
+{
+	struct resource **ptr = res;
+
+	return *ptr == data;
+}
+
+/**
+ * devm_release_resource() - release a previously requested resource
+ * @dev: device for which to release the resource
+ * @new: descriptor of the resource to release
+ *
+ * Releases a resource previously requested using devm_request_resource().
+ */
+void devm_release_resource(struct device *dev, struct resource *new)
+{
+	WARN_ON(devres_release(dev, devm_resource_release, devm_resource_match,
+			       new));
+}
+EXPORT_SYMBOL(devm_release_resource);
+
 struct region_devres {
 	struct resource *parent;
 	resource_size_t start;
-- 
cgit v1.2.3


From efd01a72e7ec99ed583151fbf16b176cd2158967 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Tue, 5 Aug 2014 14:08:55 +0200
Subject: PCI/AER: Make <linux/aer.h> standalone includable

The header file references u16 and u32 types, but they are not defined in
the header nor does the header pull in the necessary includes for them.
This causes build breakage when the file is included without any of the
dependencies being satisfied from somewhere else.

Fix this by including linux/types.h (for u16 and u32).

[bhelgaas: removed pci_dev declaration (already added by 5ccb8225abf2)]
Signed-off-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 include/linux/aer.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/aer.h b/include/linux/aer.h
index c826d1c28f9c..4fef65e57023 100644
--- a/include/linux/aer.h
+++ b/include/linux/aer.h
@@ -7,6 +7,8 @@
 #ifndef _AER_H_
 #define _AER_H_
 
+#include <linux/types.h>
+
 #define AER_NONFATAL			0
 #define AER_FATAL			1
 #define AER_CORRECTABLE			2
-- 
cgit v1.2.3


From 3b5e6454aaf6b4439b19400d8365e2ec2d24e411 Mon Sep 17 00:00:00 2001
From: Gioh Kim <gioh.kim@lge.com>
Date: Thu, 4 Sep 2014 22:04:42 -0400
Subject: fs/buffer.c: support buffer cache allocations with gfp modifiers

A buffer cache is allocated from movable area because it is referred
for a while and released soon.  But some filesystems are taking buffer
cache for a long time and it can disturb page migration.

New APIs are introduced to allocate buffer cache with user specific
flag.  *_gfp APIs are for user want to set page allocation flag for
page cache allocation.  And *_unmovable APIs are for the user wants to
allocate page cache from non-movable area.

Signed-off-by: Gioh Kim <gioh.kim@lge.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Jan Kara <jack@suse.cz>
---
 fs/buffer.c                 | 45 +++++++++++++++++++++++++------------------
 include/linux/buffer_head.h | 47 ++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 68 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/fs/buffer.c b/fs/buffer.c
index 8f05111bbb8b..9a6029e0dd71 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -993,7 +993,7 @@ init_page_buffers(struct page *page, struct block_device *bdev,
  */
 static int
 grow_dev_page(struct block_device *bdev, sector_t block,
-		pgoff_t index, int size, int sizebits)
+	      pgoff_t index, int size, int sizebits, gfp_t gfp)
 {
 	struct inode *inode = bdev->bd_inode;
 	struct page *page;
@@ -1002,8 +1002,8 @@ grow_dev_page(struct block_device *bdev, sector_t block,
 	int ret = 0;		/* Will call free_more_memory() */
 	gfp_t gfp_mask;
 
-	gfp_mask = mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS;
-	gfp_mask |= __GFP_MOVABLE;
+	gfp_mask = (mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS) | gfp;
+
 	/*
 	 * XXX: __getblk_slow() can not really deal with failure and
 	 * will endlessly loop on improvised global reclaim.  Prefer
@@ -1058,7 +1058,7 @@ failed:
  * that page was dirty, the buffers are set dirty also.
  */
 static int
-grow_buffers(struct block_device *bdev, sector_t block, int size)
+grow_buffers(struct block_device *bdev, sector_t block, int size, gfp_t gfp)
 {
 	pgoff_t index;
 	int sizebits;
@@ -1085,11 +1085,12 @@ grow_buffers(struct block_device *bdev, sector_t block, int size)
 	}
 
 	/* Create a page with the proper size buffers.. */
-	return grow_dev_page(bdev, block, index, size, sizebits);
+	return grow_dev_page(bdev, block, index, size, sizebits, gfp);
 }
 
-static struct buffer_head *
-__getblk_slow(struct block_device *bdev, sector_t block, int size)
+struct buffer_head *
+__getblk_slow(struct block_device *bdev, sector_t block,
+	     unsigned size, gfp_t gfp)
 {
 	/* Size must be multiple of hard sectorsize */
 	if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
@@ -1111,13 +1112,14 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size)
 		if (bh)
 			return bh;
 
-		ret = grow_buffers(bdev, block, size);
+		ret = grow_buffers(bdev, block, size, gfp);
 		if (ret < 0)
 			return NULL;
 		if (ret == 0)
 			free_more_memory();
 	}
 }
+EXPORT_SYMBOL(__getblk_slow);
 
 /*
  * The relationship between dirty buffers and dirty pages:
@@ -1371,24 +1373,25 @@ __find_get_block(struct block_device *bdev, sector_t block, unsigned size)
 EXPORT_SYMBOL(__find_get_block);
 
 /*
- * __getblk will locate (and, if necessary, create) the buffer_head
+ * __getblk_gfp() will locate (and, if necessary, create) the buffer_head
  * which corresponds to the passed block_device, block and size. The
  * returned buffer has its reference count incremented.
  *
- * __getblk() will lock up the machine if grow_dev_page's try_to_free_buffers()
- * attempt is failing.  FIXME, perhaps?
+ * __getblk_gfp() will lock up the machine if grow_dev_page's
+ * try_to_free_buffers() attempt is failing.  FIXME, perhaps?
  */
 struct buffer_head *
-__getblk(struct block_device *bdev, sector_t block, unsigned size)
+__getblk_gfp(struct block_device *bdev, sector_t block,
+	     unsigned size, gfp_t gfp)
 {
 	struct buffer_head *bh = __find_get_block(bdev, block, size);
 
 	might_sleep();
 	if (bh == NULL)
-		bh = __getblk_slow(bdev, block, size);
+		bh = __getblk_slow(bdev, block, size, gfp);
 	return bh;
 }
-EXPORT_SYMBOL(__getblk);
+EXPORT_SYMBOL(__getblk_gfp);
 
 /*
  * Do async read-ahead on a buffer..
@@ -1404,24 +1407,28 @@ void __breadahead(struct block_device *bdev, sector_t block, unsigned size)
 EXPORT_SYMBOL(__breadahead);
 
 /**
- *  __bread() - reads a specified block and returns the bh
+ *  __bread_gfp() - reads a specified block and returns the bh
  *  @bdev: the block_device to read from
  *  @block: number of block
  *  @size: size (in bytes) to read
- * 
+ *  @gfp: page allocation flag
+ *
  *  Reads a specified block, and returns buffer head that contains it.
+ *  The page cache can be allocated from non-movable area
+ *  not to prevent page migration if you set gfp to zero.
  *  It returns NULL if the block was unreadable.
  */
 struct buffer_head *
-__bread(struct block_device *bdev, sector_t block, unsigned size)
+__bread_gfp(struct block_device *bdev, sector_t block,
+		   unsigned size, gfp_t gfp)
 {
-	struct buffer_head *bh = __getblk(bdev, block, size);
+	struct buffer_head *bh = __getblk_gfp(bdev, block, size, gfp);
 
 	if (likely(bh) && !buffer_uptodate(bh))
 		bh = __bread_slow(bh);
 	return bh;
 }
-EXPORT_SYMBOL(__bread);
+EXPORT_SYMBOL(__bread_gfp);
 
 /*
  * invalidate_bh_lrus() is called rarely - but not only at unmount.
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 324329ceea1e..73b45225a7ca 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -175,12 +175,13 @@ void __wait_on_buffer(struct buffer_head *);
 wait_queue_head_t *bh_waitq_head(struct buffer_head *bh);
 struct buffer_head *__find_get_block(struct block_device *bdev, sector_t block,
 			unsigned size);
-struct buffer_head *__getblk(struct block_device *bdev, sector_t block,
-			unsigned size);
+struct buffer_head *__getblk_gfp(struct block_device *bdev, sector_t block,
+				  unsigned size, gfp_t gfp);
 void __brelse(struct buffer_head *);
 void __bforget(struct buffer_head *);
 void __breadahead(struct block_device *, sector_t block, unsigned int size);
-struct buffer_head *__bread(struct block_device *, sector_t block, unsigned size);
+struct buffer_head *__bread_gfp(struct block_device *,
+				sector_t block, unsigned size, gfp_t gfp);
 void invalidate_bh_lrus(void);
 struct buffer_head *alloc_buffer_head(gfp_t gfp_flags);
 void free_buffer_head(struct buffer_head * bh);
@@ -295,7 +296,13 @@ static inline void bforget(struct buffer_head *bh)
 static inline struct buffer_head *
 sb_bread(struct super_block *sb, sector_t block)
 {
-	return __bread(sb->s_bdev, block, sb->s_blocksize);
+	return __bread_gfp(sb->s_bdev, block, sb->s_blocksize, __GFP_MOVABLE);
+}
+
+static inline struct buffer_head *
+sb_bread_unmovable(struct super_block *sb, sector_t block)
+{
+	return __bread_gfp(sb->s_bdev, block, sb->s_blocksize, 0);
 }
 
 static inline void
@@ -307,7 +314,7 @@ sb_breadahead(struct super_block *sb, sector_t block)
 static inline struct buffer_head *
 sb_getblk(struct super_block *sb, sector_t block)
 {
-	return __getblk(sb->s_bdev, block, sb->s_blocksize);
+	return __getblk_gfp(sb->s_bdev, block, sb->s_blocksize, __GFP_MOVABLE);
 }
 
 static inline struct buffer_head *
@@ -344,6 +351,36 @@ static inline void lock_buffer(struct buffer_head *bh)
 		__lock_buffer(bh);
 }
 
+static inline struct buffer_head *getblk_unmovable(struct block_device *bdev,
+						   sector_t block,
+						   unsigned size)
+{
+	return __getblk_gfp(bdev, block, size, 0);
+}
+
+static inline struct buffer_head *__getblk(struct block_device *bdev,
+					   sector_t block,
+					   unsigned size)
+{
+	return __getblk_gfp(bdev, block, size, __GFP_MOVABLE);
+}
+
+/**
+ *  __bread() - reads a specified block and returns the bh
+ *  @bdev: the block_device to read from
+ *  @block: number of block
+ *  @size: size (in bytes) to read
+ *
+ *  Reads a specified block, and returns buffer head that contains it.
+ *  The page cache is allocated from movable area so that it can be migrated.
+ *  It returns NULL if the block was unreadable.
+ */
+static inline struct buffer_head *
+__bread(struct block_device *bdev, sector_t block, unsigned size)
+{
+	return __bread_gfp(bdev, block, size, __GFP_MOVABLE);
+}
+
 extern int __set_page_dirty_buffers(struct page *page);
 
 #else /* CONFIG_BLOCK */
-- 
cgit v1.2.3


From a9fe8e29945d56f35235a3a0fba99b4cf181d211 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Tue, 2 Sep 2014 15:49:26 +0200
Subject: ipv4: implement igmp_qrv sysctl to tune igmp robustness variable

As in IPv6 people might increase the igmp query robustness variable to
make sure unsolicited state change reports aren't lost on the network. Add
and document this new knob to igmp code.

RFCs allow tuning this parameter back to first IGMP RFC, so we also use
this setting for all counters, including source specific multicast.

Also take over sysctl value when upping the interface and don't reuse
the last one seen on the interface.

Cc: Flavio Leitner <fbl@redhat.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Acked-by: Flavio Leitner <fbl@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt |  5 +++++
 include/linux/igmp.h                   |  1 +
 net/ipv4/igmp.c                        | 31 +++++++++++++++----------------
 net/ipv4/sysctl_net_ipv4.c             | 10 ++++++++++
 4 files changed, 31 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index cfc71ac0f764..db2383cb1df9 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -844,6 +844,11 @@ igmp_max_memberships - INTEGER
 
 	conf/all/*	  is special, changes the settings for all interfaces
 
+igmp_qrv - INTEGER
+	 Controls the IGMP query robustness variable (see RFC2236 8.1).
+	 Default: 2 (as specified by RFC2236 8.1)
+	 Minimum: 1 (as specified by RFC6636 4.5)
+
 log_martians - BOOLEAN
 	Log packets with impossible addresses to kernel log.
 	log_martians for the interface will be enabled if at least one of
diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index f47550d75f85..2c677afeea47 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -39,6 +39,7 @@ static inline struct igmpv3_query *
 
 extern int sysctl_igmp_max_memberships;
 extern int sysctl_igmp_max_msf;
+extern int sysctl_igmp_qrv;
 
 struct ip_sf_socklist {
 	unsigned int		sl_max;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 890c4258804c..4146153d875d 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -117,7 +117,7 @@
 #define IGMP_V2_Unsolicited_Report_Interval	(10*HZ)
 #define IGMP_V3_Unsolicited_Report_Interval	(1*HZ)
 #define IGMP_Query_Response_Interval		(10*HZ)
-#define IGMP_Unsolicited_Report_Count		2
+#define IGMP_Query_Robustness_Variable		2
 
 
 #define IGMP_Initial_Report_Delay		(1)
@@ -756,8 +756,7 @@ static void igmp_ifc_event(struct in_device *in_dev)
 {
 	if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev))
 		return;
-	in_dev->mr_ifc_count = in_dev->mr_qrv ? in_dev->mr_qrv :
-		IGMP_Unsolicited_Report_Count;
+	in_dev->mr_ifc_count = in_dev->mr_qrv ?: sysctl_igmp_qrv;
 	igmp_ifc_start_timer(in_dev, 1);
 }
 
@@ -1086,8 +1085,7 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im)
 	pmc->interface = im->interface;
 	in_dev_hold(in_dev);
 	pmc->multiaddr = im->multiaddr;
-	pmc->crcount = in_dev->mr_qrv ? in_dev->mr_qrv :
-		IGMP_Unsolicited_Report_Count;
+	pmc->crcount = in_dev->mr_qrv ?: sysctl_igmp_qrv;
 	pmc->sfmode = im->sfmode;
 	if (pmc->sfmode == MCAST_INCLUDE) {
 		struct ip_sf_list *psf;
@@ -1226,8 +1224,7 @@ static void igmp_group_added(struct ip_mc_list *im)
 	}
 	/* else, v3 */
 
-	im->crcount = in_dev->mr_qrv ? in_dev->mr_qrv :
-		IGMP_Unsolicited_Report_Count;
+	im->crcount = in_dev->mr_qrv ?: sysctl_igmp_qrv;
 	igmp_ifc_event(in_dev);
 #endif
 }
@@ -1322,7 +1319,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
 	spin_lock_init(&im->lock);
 #ifdef CONFIG_IP_MULTICAST
 	setup_timer(&im->timer, igmp_timer_expire, (unsigned long)im);
-	im->unsolicit_count = IGMP_Unsolicited_Report_Count;
+	im->unsolicit_count = sysctl_igmp_qrv;
 #endif
 
 	im->next_rcu = in_dev->mc_list;
@@ -1460,7 +1457,7 @@ void ip_mc_init_dev(struct in_device *in_dev)
 			(unsigned long)in_dev);
 	setup_timer(&in_dev->mr_ifc_timer, igmp_ifc_timer_expire,
 			(unsigned long)in_dev);
-	in_dev->mr_qrv = IGMP_Unsolicited_Report_Count;
+	in_dev->mr_qrv = sysctl_igmp_qrv;
 #endif
 
 	spin_lock_init(&in_dev->mc_tomb_lock);
@@ -1474,6 +1471,9 @@ void ip_mc_up(struct in_device *in_dev)
 
 	ASSERT_RTNL();
 
+#ifdef CONFIG_IP_MULTICAST
+	in_dev->mr_qrv = sysctl_igmp_qrv;
+#endif
 	ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS);
 
 	for_each_pmc_rtnl(in_dev, pmc)
@@ -1540,7 +1540,9 @@ static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr)
  */
 int sysctl_igmp_max_memberships __read_mostly = IP_MAX_MEMBERSHIPS;
 int sysctl_igmp_max_msf __read_mostly = IP_MAX_MSF;
-
+#ifdef CONFIG_IP_MULTICAST
+int sysctl_igmp_qrv __read_mostly = IGMP_Query_Robustness_Variable;
+#endif
 
 static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode,
 	__be32 *psfsrc)
@@ -1575,8 +1577,7 @@ static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode,
 #ifdef CONFIG_IP_MULTICAST
 		if (psf->sf_oldin &&
 		    !IGMP_V1_SEEN(in_dev) && !IGMP_V2_SEEN(in_dev)) {
-			psf->sf_crcount = in_dev->mr_qrv ? in_dev->mr_qrv :
-				IGMP_Unsolicited_Report_Count;
+			psf->sf_crcount = in_dev->mr_qrv ?: sysctl_igmp_qrv;
 			psf->sf_next = pmc->tomb;
 			pmc->tomb = psf;
 			rv = 1;
@@ -1639,8 +1640,7 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
 		/* filter mode change */
 		pmc->sfmode = MCAST_INCLUDE;
 #ifdef CONFIG_IP_MULTICAST
-		pmc->crcount = in_dev->mr_qrv ? in_dev->mr_qrv :
-			IGMP_Unsolicited_Report_Count;
+		pmc->crcount = in_dev->mr_qrv ?: sysctl_igmp_qrv;
 		in_dev->mr_ifc_count = pmc->crcount;
 		for (psf = pmc->sources; psf; psf = psf->sf_next)
 			psf->sf_crcount = 0;
@@ -1818,8 +1818,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
 #ifdef CONFIG_IP_MULTICAST
 		/* else no filters; keep old mode for reports */
 
-		pmc->crcount = in_dev->mr_qrv ? in_dev->mr_qrv :
-			IGMP_Unsolicited_Report_Count;
+		pmc->crcount = in_dev->mr_qrv ?: sysctl_igmp_qrv;
 		in_dev->mr_ifc_count = pmc->crcount;
 		for (psf = pmc->sources; psf; psf = psf->sf_next)
 			psf->sf_crcount = 0;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 79a007c52558..45d156dacd61 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -450,6 +450,16 @@ static struct ctl_table ipv4_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
+#ifdef CONFIG_IP_MULTICAST
+	{
+		.procname	= "igmp_qrv",
+		.data		= &sysctl_igmp_qrv,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &one
+	},
+#endif
 	{
 		.procname	= "inet_peer_threshold",
 		.data		= &inet_peer_threshold,
-- 
cgit v1.2.3


From 6b44f519017b219a12b37173c7eef8dfce2c0100 Mon Sep 17 00:00:00 2001
From: Scot Doyle <lkml14@scotdoyle.com>
Date: Sun, 24 Aug 2014 17:12:27 +0000
Subject: sched/wait: Document timeout corner case

The timeout may elapse without 0 being returned, such as when waiting
on an unused queue. Document this possibility.

Signed-off-by: Scot Doyle <lkml14@scotdoyle.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/alpine.LNX.2.11.1408241710070.6462@localhost.localdomain
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/wait.h | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index 6fb1ba5f9b2f..034f6fc7c65f 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -280,9 +280,11 @@ do {									\
  * wake_up() has to be called after changing any variable that could
  * change the result of the wait condition.
  *
- * The function returns 0 if the @timeout elapsed, or the remaining
- * jiffies (at least 1) if the @condition evaluated to %true before
- * the @timeout elapsed.
+ * Returns:
+ * 0 if the @condition evaluated to %false after the @timeout elapsed,
+ * 1 if the @condition evaluated to %true after the @timeout elapsed,
+ * or the remaining jiffies (at least 1) if the @condition evaluated
+ * to %true before the @timeout elapsed.
  */
 #define wait_event_timeout(wq, condition, timeout)			\
 ({									\
@@ -363,9 +365,11 @@ do {									\
  * change the result of the wait condition.
  *
  * Returns:
- * 0 if the @timeout elapsed, -%ERESTARTSYS if it was interrupted by
- * a signal, or the remaining jiffies (at least 1) if the @condition
- * evaluated to %true before the @timeout elapsed.
+ * 0 if the @condition evaluated to %false after the @timeout elapsed,
+ * 1 if the @condition evaluated to %true after the @timeout elapsed,
+ * the remaining jiffies (at least 1) if the @condition evaluated
+ * to %true before the @timeout elapsed, or -%ERESTARTSYS if it was
+ * interrupted by a signal.
  */
 #define wait_event_interruptible_timeout(wq, condition, timeout)	\
 ({									\
-- 
cgit v1.2.3


From 2740f0cf8ec8bc7ee6a58f68841759e367dda98f Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 3 Sep 2014 15:24:58 +0300
Subject: cfg80211: add Intel Mobile Communications copyright

Our legal structure changed at some point (see wikipedia), but
we forgot to immediately switch over to the new copyright
notice.

For files that we have modified in the time since the change,
add the proper copyright notice now.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 1 +
 include/net/cfg80211.h    | 1 +
 net/wireless/chan.c       | 1 +
 net/wireless/core.c       | 1 +
 net/wireless/nl80211.c    | 1 +
 net/wireless/reg.c        | 1 +
 net/wireless/scan.c       | 1 +
 net/wireless/util.c       | 1 +
 8 files changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 8018c915ee63..77d4f26e0235 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -6,6 +6,7 @@
  * Copyright (c) 2002-2003, Jouni Malinen <jkmaline@cc.hut.fi>
  * Copyright (c) 2005, Devicescape Software, Inc.
  * Copyright (c) 2006, Michael Wu <flamingice@sourmilk.net>
+ * Copyright (c) 2013 - 2014 Intel Mobile Communications GmbH
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index ab21299c8f4d..6be68bb31918 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -4,6 +4,7 @@
  * 802.11 device and configuration interface
  *
  * Copyright 2006-2010	Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2013-2014 Intel Mobile Communications GmbH
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index 992b34070bcb..72d81e2154d5 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -4,6 +4,7 @@
  * any point in time.
  *
  * Copyright 2009	Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2013-2014  Intel Mobile Communications GmbH
  */
 
 #include <linux/export.h>
diff --git a/net/wireless/core.c b/net/wireless/core.c
index a207eb116829..9698fe709251 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -2,6 +2,7 @@
  * This is the linux wireless configuration interface.
  *
  * Copyright 2006-2010		Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2013-2014  Intel Mobile Communications GmbH
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 3011401f52c0..d876146498dd 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2,6 +2,7 @@
  * This is the new netlink-based wireless configuration interface.
  *
  * Copyright 2006-2010	Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2013-2014  Intel Mobile Communications GmbH
  */
 
 #include <linux/if.h>
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 1afdf45db38f..8bfc9b172a49 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -3,6 +3,7 @@
  * Copyright 2005-2006, Devicescape Software, Inc.
  * Copyright 2007	Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2008-2011	Luis R. Rodriguez <mcgrof@qca.qualcomm.com>
+ * Copyright 2013-2014  Intel Mobile Communications GmbH
  *
  * Permission to use, copy, modify, and/or distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 620a4b40d466..bda39f149810 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -2,6 +2,7 @@
  * cfg80211 scan result handling
  *
  * Copyright 2008 Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2013-2014  Intel Mobile Communications GmbH
  */
 #include <linux/kernel.h>
 #include <linux/slab.h>
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 728f1c0dc70d..a8b2816ffcb9 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -2,6 +2,7 @@
  * Wireless utility functions
  *
  * Copyright 2007-2009	Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2013-2014  Intel Mobile Communications GmbH
  */
 #include <linux/export.h>
 #include <linux/bitops.h>
-- 
cgit v1.2.3


From 60a3b2253c413cf601783b070507d7dd6620c954 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Tue, 2 Sep 2014 22:53:44 +0200
Subject: net: bpf: make eBPF interpreter images read-only

With eBPF getting more extended and exposure to user space is on it's way,
hardening the memory range the interpreter uses to steer its command flow
seems appropriate.  This patch moves the to be interpreted bytecode to
read-only pages.

In case we execute a corrupted BPF interpreter image for some reason e.g.
caused by an attacker which got past a verifier stage, it would not only
provide arbitrary read/write memory access but arbitrary function calls
as well. After setting up the BPF interpreter image, its contents do not
change until destruction time, thus we can setup the image on immutable
made pages in order to mitigate modifications to that code. The idea
is derived from commit 314beb9bcabf ("x86: bpf_jit_comp: secure bpf jit
against spraying attacks").

This is possible because bpf_prog is not part of sk_filter anymore.
After setup bpf_prog cannot be altered during its life-time. This prevents
any modifications to the entire bpf_prog structure (incl. function/JIT
image pointer).

Every eBPF program (including classic BPF that are migrated) have to call
bpf_prog_select_runtime() to select either interpreter or a JIT image
as a last setup step, and they all are being freed via bpf_prog_free(),
including non-JIT. Therefore, we can easily integrate this into the
eBPF life-time, plus since we directly allocate a bpf_prog, we have no
performance penalty.

Tested with seccomp and test_bpf testsuite in JIT/non-JIT mode and manual
inspection of kernel_page_tables.  Brad Spengler proposed the same idea
via Twitter during development of this patch.

Joint work with Hannes Frederic Sowa.

Suggested-by: Brad Spengler <spender@grsecurity.net>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Kees Cook <keescook@chromium.org>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/arm/net/bpf_jit_32.c       |  3 +-
 arch/mips/net/bpf_jit.c         |  3 +-
 arch/powerpc/net/bpf_jit_comp.c |  3 +-
 arch/s390/net/bpf_jit_comp.c    |  2 +-
 arch/sparc/net/bpf_jit_comp.c   |  3 +-
 arch/x86/net/bpf_jit_comp.c     | 18 ++++------
 include/linux/filter.h          | 49 ++++++++++++++++++++++---
 kernel/bpf/core.c               | 80 +++++++++++++++++++++++++++++++++++++++--
 kernel/seccomp.c                |  7 ++--
 lib/test_bpf.c                  |  2 +-
 net/core/filter.c               |  6 ++--
 11 files changed, 144 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index a37b989a2f91..a76623bcf722 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -930,5 +930,6 @@ void bpf_jit_free(struct bpf_prog *fp)
 {
 	if (fp->jited)
 		module_free(NULL, fp->bpf_func);
-	kfree(fp);
+
+	bpf_prog_unlock_free(fp);
 }
diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c
index 05a56619ece2..cfa83cf2447d 100644
--- a/arch/mips/net/bpf_jit.c
+++ b/arch/mips/net/bpf_jit.c
@@ -1427,5 +1427,6 @@ void bpf_jit_free(struct bpf_prog *fp)
 {
 	if (fp->jited)
 		module_free(NULL, fp->bpf_func);
-	kfree(fp);
+
+	bpf_prog_unlock_free(fp);
 }
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 3afa6f4c1957..40c53ff59124 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -697,5 +697,6 @@ void bpf_jit_free(struct bpf_prog *fp)
 {
 	if (fp->jited)
 		module_free(NULL, fp->bpf_func);
-	kfree(fp);
+
+	bpf_prog_unlock_free(fp);
 }
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 61e45b7c04d7..f2833c5b218a 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -887,5 +887,5 @@ void bpf_jit_free(struct bpf_prog *fp)
 	module_free(NULL, header);
 
 free_filter:
-	kfree(fp);
+	bpf_prog_unlock_free(fp);
 }
diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c
index 1f76c22a6a75..f7a736b645e8 100644
--- a/arch/sparc/net/bpf_jit_comp.c
+++ b/arch/sparc/net/bpf_jit_comp.c
@@ -812,5 +812,6 @@ void bpf_jit_free(struct bpf_prog *fp)
 {
 	if (fp->jited)
 		module_free(NULL, fp->bpf_func);
-	kfree(fp);
+
+	bpf_prog_unlock_free(fp);
 }
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index b08a98c59530..39ccfbb4a723 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -972,23 +972,17 @@ out:
 	kfree(addrs);
 }
 
-static void bpf_jit_free_deferred(struct work_struct *work)
+void bpf_jit_free(struct bpf_prog *fp)
 {
-	struct bpf_prog *fp = container_of(work, struct bpf_prog, work);
 	unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
 	struct bpf_binary_header *header = (void *)addr;
 
+	if (!fp->jited)
+		goto free_filter;
+
 	set_memory_rw(addr, header->pages);
 	module_free(NULL, header);
-	kfree(fp);
-}
 
-void bpf_jit_free(struct bpf_prog *fp)
-{
-	if (fp->jited) {
-		INIT_WORK(&fp->work, bpf_jit_free_deferred);
-		schedule_work(&fp->work);
-	} else {
-		kfree(fp);
-	}
+free_filter:
+	bpf_prog_unlock_free(fp);
 }
diff --git a/include/linux/filter.h b/include/linux/filter.h
index a5227ab8ccb1..c78994593355 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -9,6 +9,11 @@
 #include <linux/skbuff.h>
 #include <linux/workqueue.h>
 #include <uapi/linux/filter.h>
+#include <asm/cacheflush.h>
+
+struct sk_buff;
+struct sock;
+struct seccomp_data;
 
 /* Internally used and optimized filter representation with extended
  * instruction set based on top of classic BPF.
@@ -320,20 +325,23 @@ struct sock_fprog_kern {
 	struct sock_filter	*filter;
 };
 
-struct sk_buff;
-struct sock;
-struct seccomp_data;
+struct bpf_work_struct {
+	struct bpf_prog *prog;
+	struct work_struct work;
+};
 
 struct bpf_prog {
+	u32			pages;		/* Number of allocated pages */
 	u32			jited:1,	/* Is our filter JIT'ed? */
 				len:31;		/* Number of filter blocks */
 	struct sock_fprog_kern	*orig_prog;	/* Original BPF program */
+	struct bpf_work_struct	*work;		/* Deferred free work struct */
 	unsigned int		(*bpf_func)(const struct sk_buff *skb,
 					    const struct bpf_insn *filter);
+	/* Instructions for interpreter */
 	union {
 		struct sock_filter	insns[0];
 		struct bpf_insn		insnsi[0];
-		struct work_struct	work;
 	};
 };
 
@@ -353,6 +361,26 @@ static inline unsigned int bpf_prog_size(unsigned int proglen)
 
 #define bpf_classic_proglen(fprog) (fprog->len * sizeof(fprog->filter[0]))
 
+#ifdef CONFIG_DEBUG_SET_MODULE_RONX
+static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
+{
+	set_memory_ro((unsigned long)fp, fp->pages);
+}
+
+static inline void bpf_prog_unlock_ro(struct bpf_prog *fp)
+{
+	set_memory_rw((unsigned long)fp, fp->pages);
+}
+#else
+static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
+{
+}
+
+static inline void bpf_prog_unlock_ro(struct bpf_prog *fp)
+{
+}
+#endif /* CONFIG_DEBUG_SET_MODULE_RONX */
+
 int sk_filter(struct sock *sk, struct sk_buff *skb);
 
 void bpf_prog_select_runtime(struct bpf_prog *fp);
@@ -361,6 +389,17 @@ void bpf_prog_free(struct bpf_prog *fp);
 int bpf_convert_filter(struct sock_filter *prog, int len,
 		       struct bpf_insn *new_prog, int *new_len);
 
+struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags);
+struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
+				  gfp_t gfp_extra_flags);
+void __bpf_prog_free(struct bpf_prog *fp);
+
+static inline void bpf_prog_unlock_free(struct bpf_prog *fp)
+{
+	bpf_prog_unlock_ro(fp);
+	__bpf_prog_free(fp);
+}
+
 int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog);
 void bpf_prog_destroy(struct bpf_prog *fp);
 
@@ -450,7 +489,7 @@ static inline void bpf_jit_compile(struct bpf_prog *fp)
 
 static inline void bpf_jit_free(struct bpf_prog *fp)
 {
-	kfree(fp);
+	bpf_prog_unlock_free(fp);
 }
 #endif /* CONFIG_BPF_JIT */
 
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 7f0dbcbb34af..b54bb2c2e494 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -22,6 +22,7 @@
  */
 #include <linux/filter.h>
 #include <linux/skbuff.h>
+#include <linux/vmalloc.h>
 #include <asm/unaligned.h>
 
 /* Registers */
@@ -63,6 +64,67 @@ void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, uns
 	return NULL;
 }
 
+struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
+{
+	gfp_t gfp_flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO |
+			  gfp_extra_flags;
+	struct bpf_work_struct *ws;
+	struct bpf_prog *fp;
+
+	size = round_up(size, PAGE_SIZE);
+	fp = __vmalloc(size, gfp_flags, PAGE_KERNEL);
+	if (fp == NULL)
+		return NULL;
+
+	ws = kmalloc(sizeof(*ws), GFP_KERNEL | gfp_extra_flags);
+	if (ws == NULL) {
+		vfree(fp);
+		return NULL;
+	}
+
+	fp->pages = size / PAGE_SIZE;
+	fp->work = ws;
+
+	return fp;
+}
+EXPORT_SYMBOL_GPL(bpf_prog_alloc);
+
+struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
+				  gfp_t gfp_extra_flags)
+{
+	gfp_t gfp_flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO |
+			  gfp_extra_flags;
+	struct bpf_prog *fp;
+
+	BUG_ON(fp_old == NULL);
+
+	size = round_up(size, PAGE_SIZE);
+	if (size <= fp_old->pages * PAGE_SIZE)
+		return fp_old;
+
+	fp = __vmalloc(size, gfp_flags, PAGE_KERNEL);
+	if (fp != NULL) {
+		memcpy(fp, fp_old, fp_old->pages * PAGE_SIZE);
+		fp->pages = size / PAGE_SIZE;
+
+		/* We keep fp->work from fp_old around in the new
+		 * reallocated structure.
+		 */
+		fp_old->work = NULL;
+		__bpf_prog_free(fp_old);
+	}
+
+	return fp;
+}
+EXPORT_SYMBOL_GPL(bpf_prog_realloc);
+
+void __bpf_prog_free(struct bpf_prog *fp)
+{
+	kfree(fp->work);
+	vfree(fp);
+}
+EXPORT_SYMBOL_GPL(__bpf_prog_free);
+
 /* Base function for offset calculation. Needs to go into .text section,
  * therefore keeping it non-static as well; will also be used by JITs
  * anyway later on, so do not let the compiler omit it.
@@ -523,12 +585,26 @@ void bpf_prog_select_runtime(struct bpf_prog *fp)
 
 	/* Probe if internal BPF can be JITed */
 	bpf_int_jit_compile(fp);
+	/* Lock whole bpf_prog as read-only */
+	bpf_prog_lock_ro(fp);
 }
 EXPORT_SYMBOL_GPL(bpf_prog_select_runtime);
 
-/* free internal BPF program */
+static void bpf_prog_free_deferred(struct work_struct *work)
+{
+	struct bpf_work_struct *ws;
+
+	ws = container_of(work, struct bpf_work_struct, work);
+	bpf_jit_free(ws->prog);
+}
+
+/* Free internal BPF program */
 void bpf_prog_free(struct bpf_prog *fp)
 {
-	bpf_jit_free(fp);
+	struct bpf_work_struct *ws = fp->work;
+
+	INIT_WORK(&ws->work, bpf_prog_free_deferred);
+	ws->prog = fp;
+	schedule_work(&ws->work);
 }
 EXPORT_SYMBOL_GPL(bpf_prog_free);
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 44eb005c6695..84922befea84 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -395,16 +395,15 @@ static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog)
 	if (!filter)
 		goto free_prog;
 
-	filter->prog = kzalloc(bpf_prog_size(new_len),
-			       GFP_KERNEL|__GFP_NOWARN);
+	filter->prog = bpf_prog_alloc(bpf_prog_size(new_len), __GFP_NOWARN);
 	if (!filter->prog)
 		goto free_filter;
 
 	ret = bpf_convert_filter(fp, fprog->len, filter->prog->insnsi, &new_len);
 	if (ret)
 		goto free_filter_prog;
-	kfree(fp);
 
+	kfree(fp);
 	atomic_set(&filter->usage, 1);
 	filter->prog->len = new_len;
 
@@ -413,7 +412,7 @@ static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog)
 	return filter;
 
 free_filter_prog:
-	kfree(filter->prog);
+	__bpf_prog_free(filter->prog);
 free_filter:
 	kfree(filter);
 free_prog:
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index 8c66c6aace04..9a67456ba29a 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -1836,7 +1836,7 @@ static struct bpf_prog *generate_filter(int which, int *err)
 		break;
 
 	case INTERNAL:
-		fp = kzalloc(bpf_prog_size(flen), GFP_KERNEL);
+		fp = bpf_prog_alloc(bpf_prog_size(flen), 0);
 		if (fp == NULL) {
 			pr_cont("UNEXPECTED_FAIL no memory left\n");
 			*err = -ENOMEM;
diff --git a/net/core/filter.c b/net/core/filter.c
index d814b8a89d0f..37f8eb06fdee 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -933,7 +933,7 @@ static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
 
 	/* Expand fp for appending the new filter representation. */
 	old_fp = fp;
-	fp = krealloc(old_fp, bpf_prog_size(new_len), GFP_KERNEL);
+	fp = bpf_prog_realloc(old_fp, bpf_prog_size(new_len), 0);
 	if (!fp) {
 		/* The old_fp is still around in case we couldn't
 		 * allocate new memory, so uncharge on that one.
@@ -1013,7 +1013,7 @@ int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog)
 	if (fprog->filter == NULL)
 		return -EINVAL;
 
-	fp = kmalloc(bpf_prog_size(fprog->len), GFP_KERNEL);
+	fp = bpf_prog_alloc(bpf_prog_size(fprog->len), 0);
 	if (!fp)
 		return -ENOMEM;
 
@@ -1069,7 +1069,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
 	if (fprog->filter == NULL)
 		return -EINVAL;
 
-	prog = kmalloc(bpf_fsize, GFP_KERNEL);
+	prog = bpf_prog_alloc(bpf_fsize, 0);
 	if (!prog)
 		return -ENOMEM;
 
-- 
cgit v1.2.3


From f0db9b073415848709dd59a6394969882f517da9 Mon Sep 17 00:00:00 2001
From: Govindarajulu Varadarajan <_govind@gmx.com>
Date: Wed, 3 Sep 2014 03:17:20 +0530
Subject: ethtool: Add generic options for tunables

This patch adds new ethtool cmd, ETHTOOL_GTUNABLE & ETHTOOL_STUNABLE for getting
tunable values from driver.

Add get_tunable and set_tunable to ethtool_ops. Driver implements these
functions for getting/setting tunable value.

Signed-off-by: Govindarajulu Varadarajan <_govind@gmx.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h      |  4 +++
 include/uapi/linux/ethtool.h | 28 +++++++++++++++
 net/core/ethtool.c           | 81 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 113 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index e658229fee39..c1a2d60dfb82 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -257,6 +257,10 @@ struct ethtool_ops {
 				     struct ethtool_eeprom *, u8 *);
 	int	(*get_eee)(struct net_device *, struct ethtool_eee *);
 	int	(*set_eee)(struct net_device *, struct ethtool_eee *);
+	int	(*get_tunable)(struct net_device *,
+			       const struct ethtool_tunable *, void *);
+	int	(*set_tunable)(struct net_device *,
+			       const struct ethtool_tunable *, const void *);
 
 
 };
diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index e3c7a719c76b..7a364f2f3d3f 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -209,6 +209,32 @@ struct ethtool_value {
 	__u32	data;
 };
 
+enum tunable_id {
+	ETHTOOL_ID_UNSPEC,
+	ETHTOOL_RX_COPYBREAK,
+};
+
+enum tunable_type_id {
+	ETHTOOL_TUNABLE_UNSPEC,
+	ETHTOOL_TUNABLE_U8,
+	ETHTOOL_TUNABLE_U16,
+	ETHTOOL_TUNABLE_U32,
+	ETHTOOL_TUNABLE_U64,
+	ETHTOOL_TUNABLE_STRING,
+	ETHTOOL_TUNABLE_S8,
+	ETHTOOL_TUNABLE_S16,
+	ETHTOOL_TUNABLE_S32,
+	ETHTOOL_TUNABLE_S64,
+};
+
+struct ethtool_tunable {
+	__u32	cmd;
+	__u32	id;
+	__u32	type_id;
+	__u32	len;
+	void	*data[0];
+};
+
 /**
  * struct ethtool_regs - hardware register dump
  * @cmd: Command number = %ETHTOOL_GREGS
@@ -1152,6 +1178,8 @@ enum ethtool_sfeatures_retval_bits {
 
 #define ETHTOOL_GRSSH		0x00000046 /* Get RX flow hash configuration */
 #define ETHTOOL_SRSSH		0x00000047 /* Set RX flow hash configuration */
+#define ETHTOOL_GTUNABLE	0x00000048 /* Get tunable configuration */
+#define ETHTOOL_STUNABLE	0x00000049 /* Set tunable configuration */
 
 /* compatibility with older code */
 #define SPARC_ETH_GSET		ETHTOOL_GSET
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 17cb912793fa..27e61b886520 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1621,6 +1621,80 @@ static int ethtool_get_module_eeprom(struct net_device *dev,
 				      modinfo.eeprom_len);
 }
 
+static int ethtool_tunable_valid(const struct ethtool_tunable *tuna)
+{
+	switch (tuna->id) {
+	case ETHTOOL_RX_COPYBREAK:
+		if (tuna->len != sizeof(u32) ||
+		    tuna->type_id != ETHTOOL_TUNABLE_U32)
+			return -EINVAL;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int ethtool_get_tunable(struct net_device *dev, void __user *useraddr)
+{
+	int ret;
+	struct ethtool_tunable tuna;
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+	void *data;
+
+	if (!ops->get_tunable)
+		return -EOPNOTSUPP;
+	if (copy_from_user(&tuna, useraddr, sizeof(tuna)))
+		return -EFAULT;
+	ret = ethtool_tunable_valid(&tuna);
+	if (ret)
+		return ret;
+	data = kmalloc(tuna.len, GFP_USER);
+	if (!data)
+		return -ENOMEM;
+	ret = ops->get_tunable(dev, &tuna, data);
+	if (ret)
+		goto out;
+	useraddr += sizeof(tuna);
+	ret = -EFAULT;
+	if (copy_to_user(useraddr, data, tuna.len))
+		goto out;
+	ret = 0;
+
+out:
+	kfree(data);
+	return ret;
+}
+
+static int ethtool_set_tunable(struct net_device *dev, void __user *useraddr)
+{
+	int ret;
+	struct ethtool_tunable tuna;
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+	void *data;
+
+	if (!ops->set_tunable)
+		return -EOPNOTSUPP;
+	if (copy_from_user(&tuna, useraddr, sizeof(tuna)))
+		return -EFAULT;
+	ret = ethtool_tunable_valid(&tuna);
+	if (ret)
+		return ret;
+	data = kmalloc(tuna.len, GFP_USER);
+	if (!data)
+		return -ENOMEM;
+	useraddr += sizeof(tuna);
+	ret = -EFAULT;
+	if (copy_from_user(data, useraddr, tuna.len))
+		goto out;
+	ret = ops->set_tunable(dev, &tuna, data);
+
+out:
+	kfree(data);
+	return ret;
+}
+
 /* The main entry point in this file.  Called from net/core/dev_ioctl.c */
 
 int dev_ethtool(struct net *net, struct ifreq *ifr)
@@ -1670,6 +1744,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_GCHANNELS:
 	case ETHTOOL_GET_TS_INFO:
 	case ETHTOOL_GEEE:
+	case ETHTOOL_GTUNABLE:
 		break;
 	default:
 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
@@ -1857,6 +1932,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_GMODULEEEPROM:
 		rc = ethtool_get_module_eeprom(dev, useraddr);
 		break;
+	case ETHTOOL_GTUNABLE:
+		rc = ethtool_get_tunable(dev, useraddr);
+		break;
+	case ETHTOOL_STUNABLE:
+		rc = ethtool_set_tunable(dev, useraddr);
+		break;
 	default:
 		rc = -EOPNOTSUPP;
 	}
-- 
cgit v1.2.3


From e793c0f70e9bdf4a2e71c151a1a3cf85c4db92ad Mon Sep 17 00:00:00 2001
From: Masanari Iida <standby24x7@gmail.com>
Date: Thu, 4 Sep 2014 23:44:36 +0900
Subject: net: treewide: Fix typo found in DocBook/networking.xml

This patch fix spelling typo found in DocBook/networking.xml.
It is because the neworking.xml is generated from comments
in the source, I have to fix typo in comments within the source.

Signed-off-by: Masanari Iida <standby24x7@gmail.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h  | 4 ++--
 include/net/wimax.h        | 2 +-
 include/trace/events/irq.h | 4 ++--
 net/core/datagram.c        | 2 +-
 net/core/gen_estimator.c   | 2 +-
 net/core/gen_stats.c       | 2 +-
 net/core/skbuff.c          | 4 ++--
 net/core/sock.c            | 4 ++--
 net/socket.c               | 2 +-
 9 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 38377392d082..c8e388e5fccc 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3176,7 +3176,7 @@ static inline int __dev_uc_sync(struct net_device *dev,
 }
 
 /**
- *  __dev_uc_unsync - Remove synchonized addresses from device
+ *  __dev_uc_unsync - Remove synchronized addresses from device
  *  @dev:  device to sync
  *  @unsync: function to call if address should be removed
  *
@@ -3220,7 +3220,7 @@ static inline int __dev_mc_sync(struct net_device *dev,
 }
 
 /**
- *  __dev_mc_unsync - Remove synchonized addresses from device
+ *  __dev_mc_unsync - Remove synchronized addresses from device
  *  @dev:  device to sync
  *  @unsync: function to call if address should be removed
  *
diff --git a/include/net/wimax.h b/include/net/wimax.h
index e52ef5357e08..c52b68577cb0 100644
--- a/include/net/wimax.h
+++ b/include/net/wimax.h
@@ -290,7 +290,7 @@ struct wimax_dev;
  *     This operation has to be synchronous, and return only when the
  *     reset is complete. In case of having had to resort to bus/cold
  *     reset implying a device disconnection, the call is allowed to
- *     return inmediately.
+ *     return immediately.
  *     NOTE: wimax_dev->mutex is NOT locked when this op is being
  *     called; however, wimax_dev->mutex_reset IS locked to ensure
  *     serialization of calls to wimax_reset().
diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h
index 1c09820df585..3608bebd3d9c 100644
--- a/include/trace/events/irq.h
+++ b/include/trace/events/irq.h
@@ -107,7 +107,7 @@ DECLARE_EVENT_CLASS(softirq,
  * @vec_nr:  softirq vector number
  *
  * When used in combination with the softirq_exit tracepoint
- * we can determine the softirq handler runtine.
+ * we can determine the softirq handler routine.
  */
 DEFINE_EVENT(softirq, softirq_entry,
 
@@ -121,7 +121,7 @@ DEFINE_EVENT(softirq, softirq_entry,
  * @vec_nr:  softirq vector number
  *
  * When used in combination with the softirq_entry tracepoint
- * we can determine the softirq handler runtine.
+ * we can determine the softirq handler routine.
  */
 DEFINE_EVENT(softirq, softirq_exit,
 
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 488dd1a825c0..fdbc9a81d4c2 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -775,7 +775,7 @@ __sum16 __skb_checksum_complete(struct sk_buff *skb)
 EXPORT_SYMBOL(__skb_checksum_complete);
 
 /**
- *	skb_copy_and_csum_datagram_iovec - Copy and checkum skb to user iovec.
+ *	skb_copy_and_csum_datagram_iovec - Copy and checksum skb to user iovec.
  *	@skb: skbuff
  *	@hlen: hardware length
  *	@iov: io vector
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 6b5b6e7013ca..9d33dfffca19 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -197,7 +197,7 @@ struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats
  * as destination. A new timer with the interval specified in the
  * configuration TLV is created. Upon each interval, the latest statistics
  * will be read from &bstats and the estimated rate will be stored in
- * &rate_est with the statistics lock grabed during this period.
+ * &rate_est with the statistics lock grabbed during this period.
  *
  * Returns 0 on success or a negative error code.
  *
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index 9d3d9e78397b..2ddbce4cce14 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -206,7 +206,7 @@ EXPORT_SYMBOL(gnet_stats_copy_queue);
  * @st: application specific statistics data
  * @len: length of data
  *
- * Appends the application sepecific statistics to the top level TLV created by
+ * Appends the application specific statistics to the top level TLV created by
  * gnet_stats_start_copy() and remembers the data for XSTATS if the dumping
  * handle is in backward compatibility mode.
  *
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 163b673f9e62..da1378a3e2c7 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2647,7 +2647,7 @@ EXPORT_SYMBOL(skb_prepare_seq_read);
  * skb_seq_read() will return the remaining part of the block.
  *
  * Note 1: The size of each block of data returned can be arbitrary,
- *       this limitation is the cost for zerocopy seqeuental
+ *       this limitation is the cost for zerocopy sequential
  *       reads of potentially non linear data.
  *
  * Note 2: Fragment lists within fragments are not implemented
@@ -2781,7 +2781,7 @@ EXPORT_SYMBOL(skb_find_text);
 /**
  * skb_append_datato_frags - append the user data to a skb
  * @sk: sock  structure
- * @skb: skb structure to be appened with user data.
+ * @skb: skb structure to be appended with user data.
  * @getfrag: call back function to be used for getting the user data
  * @from: pointer to user message iov
  * @length: length of the iov message
diff --git a/net/core/sock.c b/net/core/sock.c
index 29870571c42f..d372b4bd3f99 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -166,7 +166,7 @@ EXPORT_SYMBOL(sk_ns_capable);
 /**
  * sk_capable - Socket global capability test
  * @sk: Socket to use a capability on or through
- * @cap: The global capbility to use
+ * @cap: The global capability to use
  *
  * Test to see if the opener of the socket had when the socket was
  * created and the current process has the capability @cap in all user
@@ -183,7 +183,7 @@ EXPORT_SYMBOL(sk_capable);
  * @sk: Socket to use a capability on or through
  * @cap: The capability to use
  *
- * Test to see if the opener of the socket had when the socke was created
+ * Test to see if the opener of the socket had when the socket was created
  * and the current process has the capability @cap over the network namespace
  * the socket is a member of.
  */
diff --git a/net/socket.c b/net/socket.c
index 4eb09b34b2d3..2e2586e2dee1 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2601,7 +2601,7 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
  *
  *	This function is called by a protocol handler that wants to
  *	advertise its address family, and have it linked into the
- *	socket interface. The value ops->family coresponds to the
+ *	socket interface. The value ops->family corresponds to the
  *	socket system call protocol family.
  */
 int sock_register(const struct net_proto_family *ops)
-- 
cgit v1.2.3


From 62bccb8cdb69051b95a55ab0c489e3cab261c8ef Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Thu, 4 Sep 2014 13:31:35 -0400
Subject: net-timestamp: Make the clone operation stand-alone from phy
 timestamping

The phy timestamping takes a different path than the regular timestamping
does in that it will create a clone first so that the packets needing to be
timestamped can be placed in a queue, or the context block could be used.

In order to support these use cases I am pulling the core of the code out
so it can be used in other drivers beyond just phy devices.

In addition I have added a destructor named sock_efree which is meant to
provide a simple way for dropping the reference to skb exceptions that
aren't part of either the receive or send windows for the socket, and I
have removed some duplication in spots where this destructor could be used
in place of sock_edemux.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/dp83640.c |  6 +++---
 include/linux/skbuff.h    |  2 ++
 include/net/sock.h        |  1 +
 net/core/skbuff.c         | 32 +++++++++++++++++++++++++-------
 net/core/sock.c           |  6 ++++++
 net/core/timestamping.c   | 14 +++-----------
 6 files changed, 40 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c
index d5991ac46ab0..87648b306551 100644
--- a/drivers/net/phy/dp83640.c
+++ b/drivers/net/phy/dp83640.c
@@ -1148,7 +1148,7 @@ static void dp83640_remove(struct phy_device *phydev)
 		kfree_skb(skb);
 
 	while ((skb = skb_dequeue(&dp83640->tx_queue)) != NULL)
-		skb_complete_tx_timestamp(skb, NULL);
+		kfree_skb(skb);
 
 	clock = dp83640_clock_get(dp83640->clock);
 
@@ -1405,7 +1405,7 @@ static void dp83640_txtstamp(struct phy_device *phydev,
 
 	case HWTSTAMP_TX_ONESTEP_SYNC:
 		if (is_sync(skb, type)) {
-			skb_complete_tx_timestamp(skb, NULL);
+			kfree_skb(skb);
 			return;
 		}
 		/* fall through */
@@ -1416,7 +1416,7 @@ static void dp83640_txtstamp(struct phy_device *phydev,
 
 	case HWTSTAMP_TX_OFF:
 	default:
-		skb_complete_tx_timestamp(skb, NULL);
+		kfree_skb(skb);
 		break;
 	}
 }
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 02529fcad1ac..1cf0cfaef10a 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2690,6 +2690,8 @@ static inline ktime_t net_invalid_timestamp(void)
 	return ktime_set(0, 0);
 }
 
+struct sk_buff *skb_clone_sk(struct sk_buff *skb);
+
 #ifdef CONFIG_NETWORK_PHY_TIMESTAMPING
 
 void skb_clone_tx_timestamp(struct sk_buff *skb);
diff --git a/include/net/sock.h b/include/net/sock.h
index 3fde6130789d..e02be37a3d91 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1574,6 +1574,7 @@ struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
 void sock_wfree(struct sk_buff *skb);
 void skb_orphan_partial(struct sk_buff *skb);
 void sock_rfree(struct sk_buff *skb);
+void sock_efree(struct sk_buff *skb);
 void sock_edemux(struct sk_buff *skb);
 
 int sock_setsockopt(struct socket *sock, int level, int op,
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 697e696c914b..a936a401564e 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3511,6 +3511,27 @@ struct sk_buff *sock_dequeue_err_skb(struct sock *sk)
 }
 EXPORT_SYMBOL(sock_dequeue_err_skb);
 
+struct sk_buff *skb_clone_sk(struct sk_buff *skb)
+{
+	struct sock *sk = skb->sk;
+	struct sk_buff *clone;
+
+	if (!sk || !atomic_inc_not_zero(&sk->sk_refcnt))
+		return NULL;
+
+	clone = skb_clone(skb, GFP_ATOMIC);
+	if (!clone) {
+		sock_put(sk);
+		return NULL;
+	}
+
+	clone->sk = sk;
+	clone->destructor = sock_efree;
+
+	return clone;
+}
+EXPORT_SYMBOL(skb_clone_sk);
+
 static void __skb_complete_tx_timestamp(struct sk_buff *skb,
 					struct sock *sk,
 					int tstype)
@@ -3540,14 +3561,11 @@ void skb_complete_tx_timestamp(struct sk_buff *skb,
 {
 	struct sock *sk = skb->sk;
 
-	skb->sk = NULL;
+	/* take a reference to prevent skb_orphan() from freeing the socket */
+	sock_hold(sk);
 
-	if (hwtstamps) {
-		*skb_hwtstamps(skb) = *hwtstamps;
-		__skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND);
-	} else {
-		kfree_skb(skb);
-	}
+	*skb_hwtstamps(skb) = *hwtstamps;
+	__skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND);
 
 	sock_put(sk);
 }
diff --git a/net/core/sock.c b/net/core/sock.c
index f1a638ee93d9..d04005c51724 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1637,6 +1637,12 @@ void sock_rfree(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(sock_rfree);
 
+void sock_efree(struct sk_buff *skb)
+{
+	sock_put(skb->sk);
+}
+EXPORT_SYMBOL(sock_efree);
+
 void sock_edemux(struct sk_buff *skb)
 {
 	struct sock *sk = skb->sk;
diff --git a/net/core/timestamping.c b/net/core/timestamping.c
index f48a59fd8f39..43d3dd62fcc8 100644
--- a/net/core/timestamping.c
+++ b/net/core/timestamping.c
@@ -36,10 +36,9 @@ void skb_clone_tx_timestamp(struct sk_buff *skb)
 {
 	struct phy_device *phydev;
 	struct sk_buff *clone;
-	struct sock *sk = skb->sk;
 	unsigned int type;
 
-	if (!sk)
+	if (!skb->sk)
 		return;
 
 	type = classify(skb);
@@ -48,16 +47,9 @@ void skb_clone_tx_timestamp(struct sk_buff *skb)
 
 	phydev = skb->dev->phydev;
 	if (likely(phydev->drv->txtstamp)) {
-		if (!atomic_inc_not_zero(&sk->sk_refcnt))
+		clone = skb_clone_sk(skb);
+		if (!clone)
 			return;
-
-		clone = skb_clone(skb, GFP_ATOMIC);
-		if (!clone) {
-			sock_put(sk);
-			return;
-		}
-
-		clone->sk = sk;
 		phydev->drv->txtstamp(phydev, clone, type);
 	}
 }
-- 
cgit v1.2.3


From 56193d1bce2b2759cb4bdcc00cd05544894a0c90 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Fri, 5 Sep 2014 19:20:26 -0400
Subject: net: Add function for parsing the header length out of linear
 ethernet frames

This patch updates some of the flow_dissector api so that it can be used to
parse the length of ethernet buffers stored in fragments.  Most of the
changes needed were to __skb_get_poff as it needed to be updated to support
sending a linear buffer instead of a skb.

I have split __skb_get_poff into two functions, the first is skb_get_poff
and it retains the functionality of the original __skb_get_poff.  The other
function is __skb_get_poff which now works much like __skb_flow_dissect in
relation to skb_flow_dissect in that it provides the same functionality but
works with just a data buffer and hlen instead of needing an skb.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/etherdevice.h |  1 +
 include/linux/skbuff.h      |  4 +++-
 include/net/flow_keys.h     |  2 ++
 net/core/filter.c           |  2 +-
 net/core/flow_dissector.c   | 46 +++++++++++++++++++++++++++++++--------------
 net/ethernet/eth.c          | 27 ++++++++++++++++++++++++++
 6 files changed, 66 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 9c5529dc6d07..733980fce8e3 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -29,6 +29,7 @@
 #include <asm/bitsperlong.h>
 
 #ifdef __KERNEL__
+u32 eth_get_headlen(void *data, unsigned int max_len);
 __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev);
 extern const struct header_ops eth_header_ops;
 
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 1cf0cfaef10a..07c9fdd0c126 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3218,7 +3218,9 @@ bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off);
 
 int skb_checksum_setup(struct sk_buff *skb, bool recalculate);
 
-u32 __skb_get_poff(const struct sk_buff *skb);
+u32 skb_get_poff(const struct sk_buff *skb);
+u32 __skb_get_poff(const struct sk_buff *skb, void *data,
+		   const struct flow_keys *keys, int hlen);
 
 /**
  * skb_head_is_locked - Determine if the skb->head is locked down
diff --git a/include/net/flow_keys.h b/include/net/flow_keys.h
index 9a03f73c4974..7ee2df083542 100644
--- a/include/net/flow_keys.h
+++ b/include/net/flow_keys.h
@@ -40,4 +40,6 @@ static inline __be32 skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8
 	return __skb_flow_get_ports(skb, thoff, ip_proto, NULL, 0);
 }
 u32 flow_hash_from_keys(struct flow_keys *keys);
+unsigned int flow_get_hlen(const unsigned char *data, unsigned int max_len,
+			   __be16 protocol);
 #endif
diff --git a/net/core/filter.c b/net/core/filter.c
index 37f8eb06fdee..fa5b7d0f77ac 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -113,7 +113,7 @@ static unsigned int pkt_type_offset(void)
 
 static u64 __skb_get_pay_offset(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
 {
-	return __skb_get_poff((struct sk_buff *)(unsigned long) ctx);
+	return skb_get_poff((struct sk_buff *)(unsigned long) ctx);
 }
 
 static u64 __skb_get_nlattr(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 12f48ca7a0b0..8560dea58803 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -13,6 +13,7 @@
 #include <linux/if_pppox.h>
 #include <linux/ppp_defs.h>
 #include <net/flow_keys.h>
+#include <scsi/fc/fc_fcoe.h>
 
 /* copy saddr & daddr, possibly using 64bit load/store
  * Equivalent to :	flow->src = iph->saddr;
@@ -117,6 +118,13 @@ ipv6:
 		flow->dst = (__force __be32)ipv6_addr_hash(&iph->daddr);
 		nhoff += sizeof(struct ipv6hdr);
 
+		/* skip the flow label processing if skb is NULL.  The
+		 * assumption here is that if there is no skb we are not
+		 * looking for flow info as much as we are length.
+		 */
+		if (!skb)
+			break;
+
 		flow_label = ip6_flowlabel(iph);
 		if (flow_label) {
 			/* Awesome, IPv6 packet has a flow label so we can
@@ -165,6 +173,9 @@ ipv6:
 			return false;
 		}
 	}
+	case htons(ETH_P_FCOE):
+		flow->thoff = (u16)(nhoff + FCOE_HEADER_LEN);
+		/* fall through */
 	default:
 		return false;
 	}
@@ -316,26 +327,18 @@ u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb,
 }
 EXPORT_SYMBOL(__skb_tx_hash);
 
-/* __skb_get_poff() returns the offset to the payload as far as it could
- * be dissected. The main user is currently BPF, so that we can dynamically
- * truncate packets without needing to push actual payload to the user
- * space and can analyze headers only, instead.
- */
-u32 __skb_get_poff(const struct sk_buff *skb)
+u32 __skb_get_poff(const struct sk_buff *skb, void *data,
+		   const struct flow_keys *keys, int hlen)
 {
-	struct flow_keys keys;
-	u32 poff = 0;
+	u32 poff = keys->thoff;
 
-	if (!skb_flow_dissect(skb, &keys))
-		return 0;
-
-	poff += keys.thoff;
-	switch (keys.ip_proto) {
+	switch (keys->ip_proto) {
 	case IPPROTO_TCP: {
 		const struct tcphdr *tcph;
 		struct tcphdr _tcph;
 
-		tcph = skb_header_pointer(skb, poff, sizeof(_tcph), &_tcph);
+		tcph = __skb_header_pointer(skb, poff, sizeof(_tcph),
+					    data, hlen, &_tcph);
 		if (!tcph)
 			return poff;
 
@@ -369,6 +372,21 @@ u32 __skb_get_poff(const struct sk_buff *skb)
 	return poff;
 }
 
+/* skb_get_poff() returns the offset to the payload as far as it could
+ * be dissected. The main user is currently BPF, so that we can dynamically
+ * truncate packets without needing to push actual payload to the user
+ * space and can analyze headers only, instead.
+ */
+u32 skb_get_poff(const struct sk_buff *skb)
+{
+	struct flow_keys keys;
+
+	if (!skb_flow_dissect(skb, &keys))
+		return 0;
+
+	return __skb_get_poff(skb, skb->data, &keys, skb_headlen(skb));
+}
+
 static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
 {
 #ifdef CONFIG_XPS
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 5cebca134585..33a140e15834 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -145,6 +145,33 @@ int eth_rebuild_header(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(eth_rebuild_header);
 
+/**
+ * eth_get_headlen - determine the the length of header for an ethernet frame
+ * @data: pointer to start of frame
+ * @len: total length of frame
+ *
+ * Make a best effort attempt to pull the length for all of the headers for
+ * a given frame in a linear buffer.
+ */
+u32 eth_get_headlen(void *data, unsigned int len)
+{
+	const struct ethhdr *eth = (const struct ethhdr *)data;
+	struct flow_keys keys;
+
+	/* this should never happen, but better safe than sorry */
+	if (len < sizeof(*eth))
+		return len;
+
+	/* parse any remaining L2/L3 headers, check for L4 */
+	if (!__skb_flow_dissect(NULL, &keys, data,
+				eth->h_proto, sizeof(*eth), len))
+		return max_t(u32, keys.thoff, sizeof(*eth));
+
+	/* parse for any L4 headers */
+	return min_t(u32, __skb_get_poff(NULL, data, &keys, len), len);
+}
+EXPORT_SYMBOL(eth_get_headlen);
+
 /**
  * eth_type_trans - determine the packet's protocol ID.
  * @skb: received socket data
-- 
cgit v1.2.3


From dec38b5ce6a9edb406c60c2670b26a1a4262fdb9 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Sat, 6 Sep 2014 01:11:12 +0100
Subject: regulator: isl9305: Add Intersil ISL9305/H driver

The ISL9305 and ISL9305H are mini-PMICs offering two DCDC regulators and
two LDO regulators. While there are some register differences between them
these do not affect the current Linux driver as the relevant features are
not yet supported.

Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../devicetree/bindings/regulator/isl9305.txt      |  36 +++
 drivers/regulator/Kconfig                          |   6 +
 drivers/regulator/Makefile                         |   1 +
 drivers/regulator/isl9305.c                        | 247 +++++++++++++++++++++
 include/linux/platform_data/isl9305.h              |  30 +++
 5 files changed, 320 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/regulator/isl9305.txt
 create mode 100644 drivers/regulator/isl9305.c
 create mode 100644 include/linux/platform_data/isl9305.h

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/regulator/isl9305.txt b/Documentation/devicetree/bindings/regulator/isl9305.txt
new file mode 100644
index 000000000000..a626fc1bbf0d
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/isl9305.txt
@@ -0,0 +1,36 @@
+Intersil ISL9305/ISL9305H voltage regulator
+
+Required properties:
+
+- compatible: "isl,isl9305" or "isl,isl9305h"
+- reg: I2C slave address, usually 0x68.
+- regulators: A node that houses a sub-node for each regulator within the
+  device. Each sub-node is identified using the node's name, with valid
+  values being "dcd1", "dcd2", "ldo1" and "ldo2". The content of each sub-node
+  is defined by the standard binding for regulators; see regulator.txt.
+- VINDCD1-supply: A phandle to a regulator node supplying VINDCD1.
+  VINDCD2-supply: A phandle to a regulator node supplying VINDCD2.
+  VINLDO1-supply: A phandle to a regulator node supplying VINLDO1.
+  VINLDO2-supply: A phandle to a regulator node supplying VINLDO2.
+
+Optional properties:
+- Per-regulator optional properties are defined in regulator.txt
+
+Example
+
+	pmic: isl9305@68 {
+		compatible = "isl,isl9305";
+		reg = <0x68>;
+
+		VINDCD1-supply = <&system_power>;
+		VINDCD2-supply = <&system_power>;
+		VINLDO1-supply = <&system_power>;
+		VINLDO2-supply = <&system_power>;
+
+		regulators {
+			dcd1 {
+			        regulator-name = "VDD_DSP";
+				regulator-always-on;
+                        };
+		};
+	};
diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index e6b98ed4c12f..3c8b6f401e4f 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -250,6 +250,12 @@ config REGULATOR_HI6421
 	  21 general purpose LDOs, 3 dedicated LDOs, and 5 BUCKs. All
 	  of them come with support to either ECO (idle) or sleep mode.
 
+config REGULATOR_ISL9305
+	tristate "Intersil ISL9305 regulator"
+	depends on I2C
+	help
+	  This driver supports ISL9305 voltage regulator chip.
+
 config REGULATOR_ISL6271A
 	tristate "Intersil ISL6271A Power regulator"
 	depends on I2C
diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index 5513e2c141b1..e12fee8c943b 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -34,6 +34,7 @@ obj-$(CONFIG_REGULATOR_FAN53555) += fan53555.o
 obj-$(CONFIG_REGULATOR_GPIO) += gpio-regulator.o
 obj-$(CONFIG_REGULATOR_HI6421) += hi6421-regulator.o
 obj-$(CONFIG_REGULATOR_ISL6271A) += isl6271a-regulator.o
+obj-$(CONFIG_REGULATOR_ISL9305) += isl9305.o
 obj-$(CONFIG_REGULATOR_LP3971) += lp3971.o
 obj-$(CONFIG_REGULATOR_LP3972) += lp3972.o
 obj-$(CONFIG_REGULATOR_LP872X) += lp872x.o
diff --git a/drivers/regulator/isl9305.c b/drivers/regulator/isl9305.c
new file mode 100644
index 000000000000..b0d12d186b68
--- /dev/null
+++ b/drivers/regulator/isl9305.c
@@ -0,0 +1,247 @@
+/*
+ * isl9305 - Intersil ISL9305 DCDC regulator
+ *
+ * Copyright 2014 Linaro Ltd
+ *
+ * Author: Mark Brown <broonie@kernel.org>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/of.h>
+#include <linux/platform_data/isl9305.h>
+#include <linux/regmap.h>
+#include <linux/regulator/driver.h>
+#include <linux/regulator/of_regulator.h>
+#include <linux/slab.h>
+
+/*
+ * Registers
+ */
+#define ISL9305_DCD1OUT          0x0
+#define ISL9305_DCD2OUT          0x1
+#define ISL9305_LDO1OUT          0x2
+#define ISL9305_LDO2OUT          0x3
+#define ISL9305_DCD_PARAMETER    0x4
+#define ISL9305_SYSTEM_PARAMETER 0x5
+#define ISL9305_DCD_SRCTL        0x6
+
+#define ISL9305_MAX_REG ISL9305_DCD_SRCTL
+
+/*
+ * DCD_PARAMETER
+ */
+#define ISL9305_DCD_PHASE   0x40
+#define ISL9305_DCD2_ULTRA  0x20
+#define ISL9305_DCD1_ULTRA  0x10
+#define ISL9305_DCD2_BLD    0x08
+#define ISL9305_DCD1_BLD    0x04
+#define ISL9305_DCD2_MODE   0x02
+#define ISL9305_DCD1_MODE   0x01
+
+/*
+ * SYSTEM_PARAMETER
+ */
+#define ISL9305_I2C_EN      0x40
+#define ISL9305_DCDPOR_MASK 0x30
+#define ISL9305_LDO2_EN     0x08
+#define ISL9305_LDO1_EN     0x04
+#define ISL9305_DCD2_EN     0x02
+#define ISL9305_DCD1_EN     0x01
+
+/*
+ * DCD_SRCTL
+ */
+#define ISL9305_DCD2SR_MASK 0xc0
+#define ISL9305_DCD1SR_MASK 0x07
+
+static const struct regulator_ops isl9305_ops = {
+	.enable = regulator_enable_regmap,
+	.disable = regulator_disable_regmap,
+	.is_enabled = regulator_is_enabled_regmap,
+	.list_voltage = regulator_list_voltage_linear,
+	.get_voltage_sel = regulator_get_voltage_sel_regmap,
+	.set_voltage_sel = regulator_set_voltage_sel_regmap,
+};
+
+static const struct regulator_desc isl9305_regulators[] = {
+	[ISL9305_DCD1] = {
+		.name =		"DCD1",
+		.n_voltages =	0x70,
+		.min_uV =	825000,
+		.uV_step =	25000,
+		.vsel_reg =	ISL9305_DCD1OUT,
+		.vsel_mask =	0x7f,
+		.enable_reg =	ISL9305_SYSTEM_PARAMETER,
+		.enable_mask =	ISL9305_DCD1_EN,
+		.supply_name =	"VINDCD1",
+		.ops =		&isl9305_ops,
+	},
+	[ISL9305_DCD2] = {
+		.name =		"DCD2",
+		.n_voltages =	0x70,
+		.min_uV =	825000,
+		.uV_step =	25000,
+		.vsel_reg =	ISL9305_DCD2OUT,
+		.vsel_mask =	0x7f,
+		.enable_reg =	ISL9305_SYSTEM_PARAMETER,
+		.enable_mask =	ISL9305_DCD2_EN,
+		.supply_name =	"VINDCD2",
+		.ops =		&isl9305_ops,
+	},
+	[ISL9305_LDO1] = {
+		.name =		"LDO1",
+		.n_voltages =	0x37,
+		.min_uV =	900000,
+		.uV_step =	50000,
+		.vsel_reg =	ISL9305_LDO1OUT,
+		.vsel_mask =	0x3f,
+		.enable_reg =	ISL9305_SYSTEM_PARAMETER,
+		.enable_mask =	ISL9305_LDO1_EN,
+		.supply_name =	"VINLDO1",
+		.ops =		&isl9305_ops,
+	},
+	[ISL9305_LDO2] = {
+		.name =		"LDO2",
+		.n_voltages =	0x37,
+		.min_uV =	900000,
+		.uV_step =	50000,
+		.vsel_reg =	ISL9305_LDO2OUT,
+		.vsel_mask =	0x3f,
+		.enable_reg =	ISL9305_SYSTEM_PARAMETER,
+		.enable_mask =	ISL9305_LDO2_EN,
+		.supply_name =	"VINLDO2",
+		.ops =		&isl9305_ops,
+	},
+};
+
+#ifdef CONFIG_OF
+static struct of_regulator_match isl9305_reg_matches[] = {
+	[ISL9305_DCD1] = { .name = "dcd1" },
+	[ISL9305_DCD2] = { .name = "dcd2" },
+	[ISL9305_LDO1] = { .name = "ldo1" },
+	[ISL9305_LDO2] = { .name = "ldo2" },
+};
+
+static struct of_regulator_match *isl9305_parse_dt(struct i2c_client *i2c)
+{
+	struct device_node *node = i2c->dev.of_node;
+	struct of_regulator_match *matches;
+	struct device_node *regs;
+	int count;
+
+	regs = of_get_child_by_name(node, "regulators");
+	if (!regs)
+		return NULL;
+
+	matches = devm_kmemdup(&i2c->dev, isl9305_reg_matches,
+			       sizeof(isl9305_reg_matches), GFP_KERNEL);
+	if (!matches)
+		return NULL;
+
+	count = of_regulator_match(&i2c->dev, regs, matches,
+				   ARRAY_SIZE(isl9305_reg_matches));
+	of_node_put(regs);
+	if ((count < 0) || (count > ARRAY_SIZE(isl9305_reg_matches)))
+		return NULL;
+
+	return matches;
+}
+#else
+static struct of_regulator_match *isl9305_parse_dt(struct i2c_client *i2c)
+{
+	return NULL;
+}
+#endif
+
+static const struct regmap_config isl9305_regmap = {
+	.reg_bits = 8,
+	.val_bits = 8,
+
+	.max_register = ISL9305_MAX_REG,
+	.cache_type = REGCACHE_RBTREE,
+};
+
+static int isl9305_i2c_probe(struct i2c_client *i2c,
+			     const struct i2c_device_id *id)
+{
+	struct regulator_config config = { };
+	struct isl9305_pdata *pdata = i2c->dev.platform_data;
+	struct of_regulator_match *of_matches;
+	struct regulator_dev *rdev;
+	struct regmap *regmap;
+	int i, ret;
+
+	of_matches = isl9305_parse_dt(i2c);
+
+	regmap = devm_regmap_init_i2c(i2c, &isl9305_regmap);
+	if (IS_ERR(regmap)) {
+		ret = PTR_ERR(regmap);
+		dev_err(&i2c->dev, "Failed to create regmap: %d\n", ret);
+		return ret;
+	}
+
+	config.dev = &i2c->dev;
+
+	for (i = 0; i < ARRAY_SIZE(isl9305_regulators); i++) {
+		config.of_node = NULL;
+		config.init_data = NULL;
+
+		if (of_matches) {
+			config.init_data = of_matches[i].init_data;
+			config.of_node = of_matches[i].of_node;
+		}
+
+		if (!config.init_data && pdata)
+			config.init_data = pdata->init_data[i];
+
+		rdev = devm_regulator_register(&i2c->dev,
+					       &isl9305_regulators[i],
+					       &config);
+		if (IS_ERR(rdev)) {
+			ret = PTR_ERR(rdev);
+			dev_err(&i2c->dev, "Failed to register %s: %d\n",
+				isl9305_regulators[i].name, ret);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id isl9305_dt_ids[] = {
+	{ .compatible = "isl,isl9305" },
+	{ .compatible = "isl,isl9305h" },
+	{},
+};
+#endif
+
+static const struct i2c_device_id isl9305_i2c_id[] = {
+	{ "isl9305", },
+	{ "isl9305h", },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, isl9305_i2c_id);
+
+static struct i2c_driver isl9305_regulator_driver = {
+	.driver = {
+		.name = "isl9305",
+		.owner = THIS_MODULE,
+		.of_match_table	= of_match_ptr(isl9305_dt_ids),
+	},
+	.probe = isl9305_i2c_probe,
+	.id_table = isl9305_i2c_id,
+};
+
+module_i2c_driver(isl9305_regulator_driver);
+
+MODULE_AUTHOR("Mark Brown");
+MODULE_DESCRIPTION("Intersil ISL9305 DCDC regulator");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/platform_data/isl9305.h b/include/linux/platform_data/isl9305.h
new file mode 100644
index 000000000000..1419133fa69e
--- /dev/null
+++ b/include/linux/platform_data/isl9305.h
@@ -0,0 +1,30 @@
+/*
+ * isl9305 - Intersil ISL9305 DCDC regulator
+ *
+ * Copyright 2014 Linaro Ltd
+ *
+ * Author: Mark Brown <broonie@kernel.org>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ */
+
+#ifndef __ISL9305_H
+#define __ISL9305_H
+
+#define ISL9305_DCD1 0
+#define ISL9305_DCD2 1
+#define ISL9305_LDO1 2
+#define ISL9305_LDO2 3
+
+#define ISL9305_MAX_REGULATOR ISL9305_LDO2
+
+struct regulator_init_data;
+
+struct isl9305_pdata {
+	struct regulator_init_data *init_data[ISL9305_MAX_REGULATOR];
+};
+
+#endif
-- 
cgit v1.2.3


From a8adcc9012d8502e06ba7b3f966bad8f2c58edc3 Mon Sep 17 00:00:00 2001
From: Ramakrishna Pallala <ramakrishna.pallala@intel.com>
Date: Wed, 27 Aug 2014 23:44:08 +0530
Subject: power_supply: Add boot and calibration attributes

Usually PMIC's come with coulomb counting mechanism which can be
used to implement a Fuel Gauginig solution in Software itself.
One of key input to these SW Fuel Gauge solutioons is the boot up
parameters like boot voltage and boot current.

This patch adds the VOLTAGE_BOOT and CURRENT_BOOT power supply attributes
to report bootup voltage and current.

This patch also adds CALIBRATE power supply attribute which useful is
for calibrating the battery/coulomb counter.

Signed-off-by: Ramakrishna Pallala <ramakrishna.pallala@intel.com>
Signed-off-by: Sebastian Reichel <sre@kernel.org>
---
 Documentation/power/power_supply_class.txt | 6 ++++++
 drivers/power/power_supply_sysfs.c         | 3 +++
 include/linux/power_supply.h               | 5 +++++
 3 files changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/power/power_supply_class.txt b/Documentation/power/power_supply_class.txt
index 48cff881cb8a..82dacc06e355 100644
--- a/Documentation/power/power_supply_class.txt
+++ b/Documentation/power/power_supply_class.txt
@@ -101,6 +101,10 @@ VOLTAGE_MAX, VOLTAGE_MIN - same as _DESIGN voltage values except that
 these ones should be used if hardware could only guess (measure and
 retain) the thresholds of a given power supply.
 
+VOLTAGE_BOOT - Reports the voltage measured during boot
+
+CURRENT_BOOT - Reports the current measured during boot
+
 CHARGE_FULL_DESIGN, CHARGE_EMPTY_DESIGN - design charge values, when
 battery considered full/empty.
 
@@ -123,6 +127,8 @@ the current drawn from a charging source.
 CHARGE_TERM_CURRENT - Charge termination current used to detect the end of charge
 condition.
 
+CALIBRATE - battery or coulomb counter calibration status
+
 CONSTANT_CHARGE_VOLTAGE - constant charge voltage programmed by charger.
 CONSTANT_CHARGE_VOLTAGE_MAX - maximum charge voltage supported by the
 power supply object.
diff --git a/drivers/power/power_supply_sysfs.c b/drivers/power/power_supply_sysfs.c
index 750a20275664..7e4726d37211 100644
--- a/drivers/power/power_supply_sysfs.c
+++ b/drivers/power/power_supply_sysfs.c
@@ -149,9 +149,11 @@ static struct device_attribute power_supply_attrs[] = {
 	POWER_SUPPLY_ATTR(voltage_now),
 	POWER_SUPPLY_ATTR(voltage_avg),
 	POWER_SUPPLY_ATTR(voltage_ocv),
+	POWER_SUPPLY_ATTR(voltage_boot),
 	POWER_SUPPLY_ATTR(current_max),
 	POWER_SUPPLY_ATTR(current_now),
 	POWER_SUPPLY_ATTR(current_avg),
+	POWER_SUPPLY_ATTR(current_boot),
 	POWER_SUPPLY_ATTR(power_now),
 	POWER_SUPPLY_ATTR(power_avg),
 	POWER_SUPPLY_ATTR(charge_full_design),
@@ -193,6 +195,7 @@ static struct device_attribute power_supply_attrs[] = {
 	POWER_SUPPLY_ATTR(type),
 	POWER_SUPPLY_ATTR(scope),
 	POWER_SUPPLY_ATTR(charge_term_current),
+	POWER_SUPPLY_ATTR(calibrate),
 	/* Properties of type `const char *' */
 	POWER_SUPPLY_ATTR(model_name),
 	POWER_SUPPLY_ATTR(manufacturer),
diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index f3dea41dbcd2..de59a28b1b5b 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -102,9 +102,11 @@ enum power_supply_property {
 	POWER_SUPPLY_PROP_VOLTAGE_NOW,
 	POWER_SUPPLY_PROP_VOLTAGE_AVG,
 	POWER_SUPPLY_PROP_VOLTAGE_OCV,
+	POWER_SUPPLY_PROP_VOLTAGE_BOOT,
 	POWER_SUPPLY_PROP_CURRENT_MAX,
 	POWER_SUPPLY_PROP_CURRENT_NOW,
 	POWER_SUPPLY_PROP_CURRENT_AVG,
+	POWER_SUPPLY_PROP_CURRENT_BOOT,
 	POWER_SUPPLY_PROP_POWER_NOW,
 	POWER_SUPPLY_PROP_POWER_AVG,
 	POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN,
@@ -146,6 +148,7 @@ enum power_supply_property {
 	POWER_SUPPLY_PROP_TYPE, /* use power_supply.type instead */
 	POWER_SUPPLY_PROP_SCOPE,
 	POWER_SUPPLY_PROP_CHARGE_TERM_CURRENT,
+	POWER_SUPPLY_PROP_CALIBRATE,
 	/* Properties of type `const char *' */
 	POWER_SUPPLY_PROP_MODEL_NAME,
 	POWER_SUPPLY_PROP_MANUFACTURER,
@@ -291,6 +294,7 @@ static inline bool power_supply_is_amp_property(enum power_supply_property psp)
 	case POWER_SUPPLY_PROP_CURRENT_MAX:
 	case POWER_SUPPLY_PROP_CURRENT_NOW:
 	case POWER_SUPPLY_PROP_CURRENT_AVG:
+	case POWER_SUPPLY_PROP_CURRENT_BOOT:
 		return 1;
 	default:
 		break;
@@ -315,6 +319,7 @@ static inline bool power_supply_is_watt_property(enum power_supply_property psp)
 	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
 	case POWER_SUPPLY_PROP_VOLTAGE_AVG:
 	case POWER_SUPPLY_PROP_VOLTAGE_OCV:
+	case POWER_SUPPLY_PROP_VOLTAGE_BOOT:
 	case POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE:
 	case POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE_MAX:
 	case POWER_SUPPLY_PROP_POWER_NOW:
-- 
cgit v1.2.3


From 521d24ee598bd8a8b71d7ac76ce2c0da0e548406 Mon Sep 17 00:00:00 2001
From: Pranith Kumar <bobby.prani@gmail.com>
Date: Tue, 8 Jul 2014 18:26:18 -0400
Subject: rcu: Return bool type in rcu_lockdep_current_cpu_online()

Return true instead of 1 in rcu_lockdep_current_cpu_online() as this
has bool as return type.

Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index d231aa17b1d7..7e47e44bce03 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -349,7 +349,7 @@ bool rcu_lockdep_current_cpu_online(void);
 #else /* #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */
 static inline bool rcu_lockdep_current_cpu_online(void)
 {
-	return 1;
+	return true;
 }
 #endif /* #else #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */
 
-- 
cgit v1.2.3


From 85b39d305bfe809a11ff2770d380be3e2465beec Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 8 Jul 2014 15:17:59 -0700
Subject: rcu: Uninline rcu_read_lock_held()

This commit uninlines rcu_read_lock_held(). According to "size vmlinux"
this saves 28549 in .text:

	- 5541731 3014560 14757888 23314179
	+ 5513182 3026848 14757888 23297918

Note: it looks as if the data grows by 12288 bytes but this is not true,
it does not actually grow. But .data starts with ALIGN(THREAD_SIZE) and
since .text shrinks the padding grows, and thus .data grows too as it
seen by /bin/size. diff System.map:

	- ffffffff81510000 D _sdata
	- ffffffff81510000 D init_thread_union
	+ ffffffff81509000 D _sdata
	+ ffffffff8150c000 D init_thread_union

Perhaps we can change vmlinux.lds.S to .data itself, so that /bin/size
can't "wrongly" report that .data grows if .text shinks.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 36 +-----------------------------------
 kernel/rcu/update.c      | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 33 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 7e47e44bce03..321ed0d4e675 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -371,41 +371,7 @@ extern struct lockdep_map rcu_sched_lock_map;
 extern struct lockdep_map rcu_callback_map;
 int debug_lockdep_rcu_enabled(void);
 
-/**
- * rcu_read_lock_held() - might we be in RCU read-side critical section?
- *
- * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an RCU
- * read-side critical section.  In absence of CONFIG_DEBUG_LOCK_ALLOC,
- * this assumes we are in an RCU read-side critical section unless it can
- * prove otherwise.  This is useful for debug checks in functions that
- * require that they be called within an RCU read-side critical section.
- *
- * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot
- * and while lockdep is disabled.
- *
- * Note that rcu_read_lock() and the matching rcu_read_unlock() must
- * occur in the same context, for example, it is illegal to invoke
- * rcu_read_unlock() in process context if the matching rcu_read_lock()
- * was invoked from within an irq handler.
- *
- * Note that rcu_read_lock() is disallowed if the CPU is either idle or
- * offline from an RCU perspective, so check for those as well.
- */
-static inline int rcu_read_lock_held(void)
-{
-	if (!debug_lockdep_rcu_enabled())
-		return 1;
-	if (!rcu_is_watching())
-		return 0;
-	if (!rcu_lockdep_current_cpu_online())
-		return 0;
-	return lock_is_held(&rcu_lock_map);
-}
-
-/*
- * rcu_read_lock_bh_held() is defined out of line to avoid #include-file
- * hell.
- */
+int rcu_read_lock_held(void);
 int rcu_read_lock_bh_held(void);
 
 /**
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 4056d7992a6c..ea8ea7b16e11 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -136,6 +136,38 @@ int notrace debug_lockdep_rcu_enabled(void)
 }
 EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled);
 
+/**
+ * rcu_read_lock_held() - might we be in RCU read-side critical section?
+ *
+ * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an RCU
+ * read-side critical section.  In absence of CONFIG_DEBUG_LOCK_ALLOC,
+ * this assumes we are in an RCU read-side critical section unless it can
+ * prove otherwise.  This is useful for debug checks in functions that
+ * require that they be called within an RCU read-side critical section.
+ *
+ * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot
+ * and while lockdep is disabled.
+ *
+ * Note that rcu_read_lock() and the matching rcu_read_unlock() must
+ * occur in the same context, for example, it is illegal to invoke
+ * rcu_read_unlock() in process context if the matching rcu_read_lock()
+ * was invoked from within an irq handler.
+ *
+ * Note that rcu_read_lock() is disallowed if the CPU is either idle or
+ * offline from an RCU perspective, so check for those as well.
+ */
+int rcu_read_lock_held(void)
+{
+	if (!debug_lockdep_rcu_enabled())
+		return 1;
+	if (!rcu_is_watching())
+		return 0;
+	if (!rcu_lockdep_current_cpu_online())
+		return 0;
+	return lock_is_held(&rcu_lock_map);
+}
+EXPORT_SYMBOL_GPL(rcu_read_lock_held);
+
 /**
  * rcu_read_lock_bh_held() - might we be in RCU-bh read-side critical section?
  *
-- 
cgit v1.2.3


From eea203fea3484598280a07fe503e025e886297fb Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 14 Jul 2014 09:16:15 -0400
Subject: rcu: Use pr_alert/pr_cont for printing logs

User pr_alert/pr_cont for printing the logs from rcutorture module directly
instead of writing it to a buffer and then printing it. This allows us from not
having to allocate such buffers. Also remove a resulting empty function.

I tested this using the parse-torture.sh script as follows:

$ dmesg | grep torture > log.txt
$ bash parse-torture.sh log.txt test
$

There were no warnings which means that parsing went fine.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/torture.h |   2 +-
 kernel/rcu/rcutorture.c | 127 +++++++++++++++++++++---------------------------
 kernel/torture.c        |  16 +++---
 3 files changed, 64 insertions(+), 81 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/torture.h b/include/linux/torture.h
index 5ca58fcbaf1b..fec46f8c08eb 100644
--- a/include/linux/torture.h
+++ b/include/linux/torture.h
@@ -51,7 +51,7 @@
 
 /* Definitions for online/offline exerciser. */
 int torture_onoff_init(long ooholdoff, long oointerval);
-char *torture_onoff_stats(char *page);
+void torture_onoff_stats(void);
 bool torture_onoff_failures(void);
 
 /* Low-rider random number generator. */
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 7e67711cbae8..ff4f0c756dee 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -242,7 +242,7 @@ struct rcu_torture_ops {
 	void (*call)(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
 	void (*cb_barrier)(void);
 	void (*fqs)(void);
-	void (*stats)(char *page);
+	void (*stats)(void);
 	int irq_capable;
 	int can_boost;
 	const char *name;
@@ -525,21 +525,21 @@ static void srcu_torture_barrier(void)
 	srcu_barrier(&srcu_ctl);
 }
 
-static void srcu_torture_stats(char *page)
+static void srcu_torture_stats(void)
 {
 	int cpu;
 	int idx = srcu_ctl.completed & 0x1;
 
-	page += sprintf(page, "%s%s per-CPU(idx=%d):",
-		       torture_type, TORTURE_FLAG, idx);
+	pr_alert("%s%s per-CPU(idx=%d):",
+		 torture_type, TORTURE_FLAG, idx);
 	for_each_possible_cpu(cpu) {
 		long c0, c1;
 
 		c0 = (long)per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[!idx];
 		c1 = (long)per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[idx];
-		page += sprintf(page, " %d(%ld,%ld)", cpu, c0, c1);
+		pr_cont(" %d(%ld,%ld)", cpu, c0, c1);
 	}
-	sprintf(page, "\n");
+	pr_cont("\n");
 }
 
 static void srcu_torture_synchronize_expedited(void)
@@ -1031,10 +1031,15 @@ rcu_torture_reader(void *arg)
 }
 
 /*
- * Create an RCU-torture statistics message in the specified buffer.
+ * Print torture statistics.  Caller must ensure that there is only
+ * one call to this function at a given time!!!  This is normally
+ * accomplished by relying on the module system to only have one copy
+ * of the module loaded, and then by giving the rcu_torture_stats
+ * kthread full control (or the init/cleanup functions when rcu_torture_stats
+ * thread is not running).
  */
 static void
-rcu_torture_printk(char *page)
+rcu_torture_stats_print(void)
 {
 	int cpu;
 	int i;
@@ -1052,55 +1057,60 @@ rcu_torture_printk(char *page)
 		if (pipesummary[i] != 0)
 			break;
 	}
-	page += sprintf(page, "%s%s ", torture_type, TORTURE_FLAG);
-	page += sprintf(page,
-		       "rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d ",
-		       rcu_torture_current,
-		       rcu_torture_current_version,
-		       list_empty(&rcu_torture_freelist),
-		       atomic_read(&n_rcu_torture_alloc),
-		       atomic_read(&n_rcu_torture_alloc_fail),
-		       atomic_read(&n_rcu_torture_free));
-	page += sprintf(page, "rtmbe: %d rtbke: %ld rtbre: %ld ",
-		       atomic_read(&n_rcu_torture_mberror),
-		       n_rcu_torture_boost_ktrerror,
-		       n_rcu_torture_boost_rterror);
-	page += sprintf(page, "rtbf: %ld rtb: %ld nt: %ld ",
-		       n_rcu_torture_boost_failure,
-		       n_rcu_torture_boosts,
-		       n_rcu_torture_timers);
-	page = torture_onoff_stats(page);
-	page += sprintf(page, "barrier: %ld/%ld:%ld",
-		       n_barrier_successes,
-		       n_barrier_attempts,
-		       n_rcu_torture_barrier_error);
-	page += sprintf(page, "\n%s%s ", torture_type, TORTURE_FLAG);
+
+	pr_alert("%s%s ", torture_type, TORTURE_FLAG);
+	pr_cont("rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d ",
+		rcu_torture_current,
+		rcu_torture_current_version,
+		list_empty(&rcu_torture_freelist),
+		atomic_read(&n_rcu_torture_alloc),
+		atomic_read(&n_rcu_torture_alloc_fail),
+		atomic_read(&n_rcu_torture_free));
+	pr_cont("rtmbe: %d rtbke: %ld rtbre: %ld ",
+		atomic_read(&n_rcu_torture_mberror),
+		n_rcu_torture_boost_ktrerror,
+		n_rcu_torture_boost_rterror);
+	pr_cont("rtbf: %ld rtb: %ld nt: %ld ",
+		n_rcu_torture_boost_failure,
+		n_rcu_torture_boosts,
+		n_rcu_torture_timers);
+	torture_onoff_stats();
+	pr_cont("barrier: %ld/%ld:%ld\n",
+		n_barrier_successes,
+		n_barrier_attempts,
+		n_rcu_torture_barrier_error);
+
+	pr_alert("%s%s ", torture_type, TORTURE_FLAG);
 	if (atomic_read(&n_rcu_torture_mberror) != 0 ||
 	    n_rcu_torture_barrier_error != 0 ||
 	    n_rcu_torture_boost_ktrerror != 0 ||
 	    n_rcu_torture_boost_rterror != 0 ||
 	    n_rcu_torture_boost_failure != 0 ||
 	    i > 1) {
-		page += sprintf(page, "!!! ");
+		pr_cont("%s", "!!! ");
 		atomic_inc(&n_rcu_torture_error);
 		WARN_ON_ONCE(1);
 	}
-	page += sprintf(page, "Reader Pipe: ");
+	pr_cont("Reader Pipe: ");
 	for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++)
-		page += sprintf(page, " %ld", pipesummary[i]);
-	page += sprintf(page, "\n%s%s ", torture_type, TORTURE_FLAG);
-	page += sprintf(page, "Reader Batch: ");
+		pr_cont(" %ld", pipesummary[i]);
+	pr_cont("\n");
+
+	pr_alert("%s%s ", torture_type, TORTURE_FLAG);
+	pr_cont("Reader Batch: ");
 	for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++)
-		page += sprintf(page, " %ld", batchsummary[i]);
-	page += sprintf(page, "\n%s%s ", torture_type, TORTURE_FLAG);
-	page += sprintf(page, "Free-Block Circulation: ");
+		pr_cont(" %ld", batchsummary[i]);
+	pr_cont("\n");
+
+	pr_alert("%s%s ", torture_type, TORTURE_FLAG);
+	pr_cont("Free-Block Circulation: ");
 	for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) {
-		page += sprintf(page, " %d",
-			       atomic_read(&rcu_torture_wcount[i]));
+		pr_cont(" %d", atomic_read(&rcu_torture_wcount[i]));
 	}
-	page += sprintf(page, "\n");
+	pr_cont("\n");
+
 	if (cur_ops->stats)
-		cur_ops->stats(page);
+		cur_ops->stats();
 	if (rtcv_snap == rcu_torture_current_version &&
 	    rcu_torture_current != NULL) {
 		int __maybe_unused flags;
@@ -1109,40 +1119,15 @@ rcu_torture_printk(char *page)
 
 		rcutorture_get_gp_data(cur_ops->ttype,
 				       &flags, &gpnum, &completed);
-		page += sprintf(page,
-				"??? Writer stall state %d g%lu c%lu f%#x\n",
-				rcu_torture_writer_state,
-				gpnum, completed, flags);
+		pr_alert("??? Writer stall state %d g%lu c%lu f%#x\n",
+			 rcu_torture_writer_state,
+			 gpnum, completed, flags);
 		show_rcu_gp_kthreads();
 		rcutorture_trace_dump();
 	}
 	rtcv_snap = rcu_torture_current_version;
 }
 
-/*
- * Print torture statistics.  Caller must ensure that there is only
- * one call to this function at a given time!!!  This is normally
- * accomplished by relying on the module system to only have one copy
- * of the module loaded, and then by giving the rcu_torture_stats
- * kthread full control (or the init/cleanup functions when rcu_torture_stats
- * thread is not running).
- */
-static void
-rcu_torture_stats_print(void)
-{
-	int size = nr_cpu_ids * 200 + 8192;
-	char *buf;
-
-	buf = kmalloc(size, GFP_KERNEL);
-	if (!buf) {
-		pr_err("rcu-torture: Out of memory, need: %d", size);
-		return;
-	}
-	rcu_torture_printk(buf);
-	pr_alert("%s", buf);
-	kfree(buf);
-}
-
 /*
  * Periodically prints torture statistics, if periodic statistics printing
  * was specified via the stat_interval module parameter.
diff --git a/kernel/torture.c b/kernel/torture.c
index d600af21f022..ede8b25ec1ae 100644
--- a/kernel/torture.c
+++ b/kernel/torture.c
@@ -211,18 +211,16 @@ EXPORT_SYMBOL_GPL(torture_onoff_cleanup);
 /*
  * Print online/offline testing statistics.
  */
-char *torture_onoff_stats(char *page)
+void torture_onoff_stats(void)
 {
 #ifdef CONFIG_HOTPLUG_CPU
-	page += sprintf(page,
-		       "onoff: %ld/%ld:%ld/%ld %d,%d:%d,%d %lu:%lu (HZ=%d) ",
-		       n_online_successes, n_online_attempts,
-		       n_offline_successes, n_offline_attempts,
-		       min_online, max_online,
-		       min_offline, max_offline,
-		       sum_online, sum_offline, HZ);
+	pr_cont("onoff: %ld/%ld:%ld/%ld %d,%d:%d,%d %lu:%lu (HZ=%d) ",
+		n_online_successes, n_online_attempts,
+		n_offline_successes, n_offline_attempts,
+		min_online, max_online,
+		min_offline, max_offline,
+		sum_online, sum_offline, HZ);
 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
-	return page;
 }
 EXPORT_SYMBOL_GPL(torture_onoff_stats);
 
-- 
cgit v1.2.3


From 8315f42295d2667a7f942f154b73a86fd7cb2227 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 27 Jun 2014 13:42:20 -0700
Subject: rcu: Add call_rcu_tasks()

This commit adds a new RCU-tasks flavor of RCU, which provides
call_rcu_tasks().  This RCU flavor's quiescent states are voluntary
context switch (not preemption!) and userspace execution (not the idle
loop -- use some sort of schedule_on_each_cpu() if you need to handle the
idle tasks.  Note that unlike other RCU flavors, these quiescent states
occur in tasks, not necessarily CPUs.  Includes fixes from Steven Rostedt.

This RCU flavor is assumed to have very infrequent latency-tolerant
updaters.  This assumption permits significant simplifications, including
a single global callback list protected by a single global lock, along
with a single task-private linked list containing all tasks that have not
yet passed through a quiescent state.  If experience shows this assumption
to be incorrect, the required additional complexity will be added.

Suggested-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/init_task.h |   9 +++
 include/linux/rcupdate.h  |  36 ++++++++++
 include/linux/sched.h     |  23 ++++---
 init/Kconfig              |  10 +++
 kernel/rcu/tiny.c         |   2 +
 kernel/rcu/tree.c         |   2 +
 kernel/rcu/update.c       | 171 ++++++++++++++++++++++++++++++++++++++++++++++
 7 files changed, 242 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 2bb4c4f3531a..dffd9258ee60 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -117,6 +117,14 @@ extern struct group_info init_groups;
 #else
 #define INIT_TASK_RCU_PREEMPT(tsk)
 #endif
+#ifdef CONFIG_TASKS_RCU
+#define INIT_TASK_RCU_TASKS(tsk)					\
+	.rcu_tasks_holdout = false,					\
+	.rcu_tasks_holdout_list =					\
+		LIST_HEAD_INIT(tsk.rcu_tasks_holdout_list),
+#else
+#define INIT_TASK_RCU_TASKS(tsk)
+#endif
 
 extern struct cred init_cred;
 
@@ -224,6 +232,7 @@ extern struct task_group root_task_group;
 	INIT_FTRACE_GRAPH						\
 	INIT_TRACE_RECURSION						\
 	INIT_TASK_RCU_PREEMPT(tsk)					\
+	INIT_TASK_RCU_TASKS(tsk)					\
 	INIT_CPUSET_SEQ(tsk)						\
 	INIT_RT_MUTEXES(tsk)						\
 	INIT_VTIME(tsk)							\
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index d231aa17b1d7..3432063f4c87 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -197,6 +197,26 @@ void call_rcu_sched(struct rcu_head *head,
 
 void synchronize_sched(void);
 
+/**
+ * call_rcu_tasks() - Queue an RCU for invocation task-based grace period
+ * @head: structure to be used for queueing the RCU updates.
+ * @func: actual callback function to be invoked after the grace period
+ *
+ * The callback function will be invoked some time after a full grace
+ * period elapses, in other words after all currently executing RCU
+ * read-side critical sections have completed. call_rcu_tasks() assumes
+ * that the read-side critical sections end at a voluntary context
+ * switch (not a preemption!), entry into idle, or transition to usermode
+ * execution.  As such, there are no read-side primitives analogous to
+ * rcu_read_lock() and rcu_read_unlock() because this primitive is intended
+ * to determine that all tasks have passed through a safe state, not so
+ * much for data-strcuture synchronization.
+ *
+ * See the description of call_rcu() for more detailed information on
+ * memory ordering guarantees.
+ */
+void call_rcu_tasks(struct rcu_head *head, void (*func)(struct rcu_head *head));
+
 #ifdef CONFIG_PREEMPT_RCU
 
 void __rcu_read_lock(void);
@@ -294,6 +314,22 @@ static inline void rcu_user_hooks_switch(struct task_struct *prev,
 		rcu_irq_exit(); \
 	} while (0)
 
+/*
+ * Note a voluntary context switch for RCU-tasks benefit.  This is a
+ * macro rather than an inline function to avoid #include hell.
+ */
+#ifdef CONFIG_TASKS_RCU
+#define rcu_note_voluntary_context_switch(t) \
+	do { \
+		preempt_disable(); /* Exclude synchronize_sched(); */ \
+		if (ACCESS_ONCE((t)->rcu_tasks_holdout)) \
+			ACCESS_ONCE((t)->rcu_tasks_holdout) = false; \
+		preempt_enable(); \
+	} while (0)
+#else /* #ifdef CONFIG_TASKS_RCU */
+#define rcu_note_voluntary_context_switch(t)	do { } while (0)
+#endif /* #else #ifdef CONFIG_TASKS_RCU */
+
 #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) || defined(CONFIG_SMP)
 bool __rcu_is_watching(void);
 #endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) || defined(CONFIG_SMP) */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5c2c885ee52b..eaacac4ae77d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1270,6 +1270,11 @@ struct task_struct {
 #ifdef CONFIG_TREE_PREEMPT_RCU
 	struct rcu_node *rcu_blocked_node;
 #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
+#ifdef CONFIG_TASKS_RCU
+	unsigned long rcu_tasks_nvcsw;
+	bool rcu_tasks_holdout;
+	struct list_head rcu_tasks_holdout_list;
+#endif /* #ifdef CONFIG_TASKS_RCU */
 
 #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
 	struct sched_info sched_info;
@@ -2000,28 +2005,24 @@ extern void task_clear_jobctl_pending(struct task_struct *task,
 				      unsigned int mask);
 
 #ifdef CONFIG_PREEMPT_RCU
-
 #define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */
 #define RCU_READ_UNLOCK_NEED_QS (1 << 1) /* RCU core needs CPU response. */
+#endif /* #ifdef CONFIG_PREEMPT_RCU */
 
 static inline void rcu_copy_process(struct task_struct *p)
 {
+#ifdef CONFIG_PREEMPT_RCU
 	p->rcu_read_lock_nesting = 0;
 	p->rcu_read_unlock_special = 0;
-#ifdef CONFIG_TREE_PREEMPT_RCU
 	p->rcu_blocked_node = NULL;
-#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
 	INIT_LIST_HEAD(&p->rcu_node_entry);
+#endif /* #ifdef CONFIG_PREEMPT_RCU */
+#ifdef CONFIG_TASKS_RCU
+	p->rcu_tasks_holdout = false;
+	INIT_LIST_HEAD(&p->rcu_tasks_holdout_list);
+#endif /* #ifdef CONFIG_TASKS_RCU */
 }
 
-#else
-
-static inline void rcu_copy_process(struct task_struct *p)
-{
-}
-
-#endif
-
 static inline void tsk_restore_flags(struct task_struct *task,
 				unsigned long orig_flags, unsigned long flags)
 {
diff --git a/init/Kconfig b/init/Kconfig
index e84c6423a2e5..c4539c4e177f 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -507,6 +507,16 @@ config PREEMPT_RCU
 	  This option enables preemptible-RCU code that is common between
 	  TREE_PREEMPT_RCU and, in the old days, TINY_PREEMPT_RCU.
 
+config TASKS_RCU
+	bool "Task_based RCU implementation using voluntary context switch"
+	default n
+	help
+	  This option enables a task-based RCU implementation that uses
+	  only voluntary context switch (not preemption!), idle, and
+	  user-mode execution as quiescent states.
+
+	  If unsure, say N.
+
 config RCU_STALL_COMMON
 	def_bool ( TREE_RCU || TREE_PREEMPT_RCU || RCU_TRACE )
 	help
diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
index d9efcc13008c..717f00854fc0 100644
--- a/kernel/rcu/tiny.c
+++ b/kernel/rcu/tiny.c
@@ -254,6 +254,8 @@ void rcu_check_callbacks(int cpu, int user)
 		rcu_sched_qs(cpu);
 	else if (!in_softirq())
 		rcu_bh_qs(cpu);
+	if (user)
+		rcu_note_voluntary_context_switch(current);
 }
 
 /*
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 1b70cb6fbe3c..8ad91d1e317d 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2410,6 +2410,8 @@ void rcu_check_callbacks(int cpu, int user)
 	rcu_preempt_check_callbacks(cpu);
 	if (rcu_pending(cpu))
 		invoke_rcu_core();
+	if (user)
+		rcu_note_voluntary_context_switch(current);
 	trace_rcu_utilization(TPS("End scheduler-tick"));
 }
 
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 4056d7992a6c..19b3dacb0753 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -47,6 +47,7 @@
 #include <linux/hardirq.h>
 #include <linux/delay.h>
 #include <linux/module.h>
+#include <linux/kthread.h>
 
 #define CREATE_TRACE_POINTS
 
@@ -347,3 +348,173 @@ static int __init check_cpu_stall_init(void)
 early_initcall(check_cpu_stall_init);
 
 #endif /* #ifdef CONFIG_RCU_STALL_COMMON */
+
+#ifdef CONFIG_TASKS_RCU
+
+/*
+ * Simple variant of RCU whose quiescent states are voluntary context switch,
+ * user-space execution, and idle.  As such, grace periods can take one good
+ * long time.  There are no read-side primitives similar to rcu_read_lock()
+ * and rcu_read_unlock() because this implementation is intended to get
+ * the system into a safe state for some of the manipulations involved in
+ * tracing and the like.  Finally, this implementation does not support
+ * high call_rcu_tasks() rates from multiple CPUs.  If this is required,
+ * per-CPU callback lists will be needed.
+ */
+
+/* Global list of callbacks and associated lock. */
+static struct rcu_head *rcu_tasks_cbs_head;
+static struct rcu_head **rcu_tasks_cbs_tail = &rcu_tasks_cbs_head;
+static DEFINE_RAW_SPINLOCK(rcu_tasks_cbs_lock);
+
+/* Post an RCU-tasks callback. */
+void call_rcu_tasks(struct rcu_head *rhp, void (*func)(struct rcu_head *rhp))
+{
+	unsigned long flags;
+
+	rhp->next = NULL;
+	rhp->func = func;
+	raw_spin_lock_irqsave(&rcu_tasks_cbs_lock, flags);
+	*rcu_tasks_cbs_tail = rhp;
+	rcu_tasks_cbs_tail = &rhp->next;
+	raw_spin_unlock_irqrestore(&rcu_tasks_cbs_lock, flags);
+}
+EXPORT_SYMBOL_GPL(call_rcu_tasks);
+
+/* See if the current task has stopped holding out, remove from list if so. */
+static void check_holdout_task(struct task_struct *t)
+{
+	if (!ACCESS_ONCE(t->rcu_tasks_holdout) ||
+	    t->rcu_tasks_nvcsw != ACCESS_ONCE(t->nvcsw) ||
+	    !ACCESS_ONCE(t->on_rq)) {
+		ACCESS_ONCE(t->rcu_tasks_holdout) = false;
+		list_del_rcu(&t->rcu_tasks_holdout_list);
+		put_task_struct(t);
+	}
+}
+
+/* RCU-tasks kthread that detects grace periods and invokes callbacks. */
+static int __noreturn rcu_tasks_kthread(void *arg)
+{
+	unsigned long flags;
+	struct task_struct *g, *t;
+	struct rcu_head *list;
+	struct rcu_head *next;
+	LIST_HEAD(rcu_tasks_holdouts);
+
+	/* FIXME: Add housekeeping affinity. */
+
+	/*
+	 * Each pass through the following loop makes one check for
+	 * newly arrived callbacks, and, if there are some, waits for
+	 * one RCU-tasks grace period and then invokes the callbacks.
+	 * This loop is terminated by the system going down.  ;-)
+	 */
+	for (;;) {
+
+		/* Pick up any new callbacks. */
+		raw_spin_lock_irqsave(&rcu_tasks_cbs_lock, flags);
+		list = rcu_tasks_cbs_head;
+		rcu_tasks_cbs_head = NULL;
+		rcu_tasks_cbs_tail = &rcu_tasks_cbs_head;
+		raw_spin_unlock_irqrestore(&rcu_tasks_cbs_lock, flags);
+
+		/* If there were none, wait a bit and start over. */
+		if (!list) {
+			schedule_timeout_interruptible(HZ);
+			WARN_ON(signal_pending(current));
+			continue;
+		}
+
+		/*
+		 * Wait for all pre-existing t->on_rq and t->nvcsw
+		 * transitions to complete.  Invoking synchronize_sched()
+		 * suffices because all these transitions occur with
+		 * interrupts disabled.  Without this synchronize_sched(),
+		 * a read-side critical section that started before the
+		 * grace period might be incorrectly seen as having started
+		 * after the grace period.
+		 *
+		 * This synchronize_sched() also dispenses with the
+		 * need for a memory barrier on the first store to
+		 * ->rcu_tasks_holdout, as it forces the store to happen
+		 * after the beginning of the grace period.
+		 */
+		synchronize_sched();
+
+		/*
+		 * There were callbacks, so we need to wait for an
+		 * RCU-tasks grace period.  Start off by scanning
+		 * the task list for tasks that are not already
+		 * voluntarily blocked.  Mark these tasks and make
+		 * a list of them in rcu_tasks_holdouts.
+		 */
+		rcu_read_lock();
+		for_each_process_thread(g, t) {
+			if (t != current && ACCESS_ONCE(t->on_rq) &&
+			    !is_idle_task(t)) {
+				get_task_struct(t);
+				t->rcu_tasks_nvcsw = ACCESS_ONCE(t->nvcsw);
+				ACCESS_ONCE(t->rcu_tasks_holdout) = true;
+				list_add(&t->rcu_tasks_holdout_list,
+					 &rcu_tasks_holdouts);
+			}
+		}
+		rcu_read_unlock();
+
+		/*
+		 * Each pass through the following loop scans the list
+		 * of holdout tasks, removing any that are no longer
+		 * holdouts.  When the list is empty, we are done.
+		 */
+		while (!list_empty(&rcu_tasks_holdouts)) {
+			schedule_timeout_interruptible(HZ);
+			WARN_ON(signal_pending(current));
+			rcu_read_lock();
+			list_for_each_entry_rcu(t, &rcu_tasks_holdouts,
+						rcu_tasks_holdout_list)
+				check_holdout_task(t);
+			rcu_read_unlock();
+		}
+
+		/*
+		 * Because ->on_rq and ->nvcsw are not guaranteed
+		 * to have a full memory barriers prior to them in the
+		 * schedule() path, memory reordering on other CPUs could
+		 * cause their RCU-tasks read-side critical sections to
+		 * extend past the end of the grace period.  However,
+		 * because these ->nvcsw updates are carried out with
+		 * interrupts disabled, we can use synchronize_sched()
+		 * to force the needed ordering on all such CPUs.
+		 *
+		 * This synchronize_sched() also confines all
+		 * ->rcu_tasks_holdout accesses to be within the grace
+		 * period, avoiding the need for memory barriers for
+		 * ->rcu_tasks_holdout accesses.
+		 */
+		synchronize_sched();
+
+		/* Invoke the callbacks. */
+		while (list) {
+			next = list->next;
+			local_bh_disable();
+			list->func(list);
+			local_bh_enable();
+			list = next;
+			cond_resched();
+		}
+	}
+}
+
+/* Spawn rcu_tasks_kthread() at boot time. */
+static int __init rcu_spawn_tasks_kthread(void)
+{
+	struct task_struct __maybe_unused *t;
+
+	t = kthread_run(rcu_tasks_kthread, NULL, "rcu_tasks_kthread");
+	BUG_ON(IS_ERR(t));
+	return 0;
+}
+early_initcall(rcu_spawn_tasks_kthread);
+
+#endif /* #ifdef CONFIG_TASKS_RCU */
-- 
cgit v1.2.3


From bde6c3aa993066acb0d6ce32ecabe03b9d5df92d Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 1 Jul 2014 11:26:57 -0700
Subject: rcu: Provide cond_resched_rcu_qs() to force quiescent states in long
 loops

RCU-tasks requires the occasional voluntary context switch
from CPU-bound in-kernel tasks.  In some cases, this requires
instrumenting cond_resched().  However, there is some reluctance
to countenance unconditionally instrumenting cond_resched() (see
http://lwn.net/Articles/603252/), so this commit creates a separate
cond_resched_rcu_qs() that may be used in place of cond_resched() in
locations prone to long-duration in-kernel looping.

This commit currently instruments only RCU-tasks.  Future possibilities
include also instrumenting RCU, RCU-bh, and RCU-sched in order to reduce
IPI usage.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 fs/file.c                |  2 +-
 include/linux/rcupdate.h | 13 +++++++++++++
 kernel/rcu/rcutorture.c  |  4 ++--
 kernel/rcu/tree.c        | 12 ++++++------
 kernel/rcu/tree_plugin.h |  2 +-
 mm/mlock.c               |  2 +-
 6 files changed, 24 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/fs/file.c b/fs/file.c
index 66923fe3176e..1cafc4c9275b 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -367,7 +367,7 @@ static struct fdtable *close_files(struct files_struct * files)
 				struct file * file = xchg(&fdt->fd[i], NULL);
 				if (file) {
 					filp_close(file, files);
-					cond_resched();
+					cond_resched_rcu_qs();
 				}
 			}
 			i++;
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 3432063f4c87..473350462d04 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -330,6 +330,19 @@ static inline void rcu_user_hooks_switch(struct task_struct *prev,
 #define rcu_note_voluntary_context_switch(t)	do { } while (0)
 #endif /* #else #ifdef CONFIG_TASKS_RCU */
 
+/**
+ * cond_resched_rcu_qs - Report potential quiescent states to RCU
+ *
+ * This macro resembles cond_resched(), except that it is defined to
+ * report potential quiescent states to RCU-tasks even if the cond_resched()
+ * machinery were to be shut off, as some advocate for PREEMPT kernels.
+ */
+#define cond_resched_rcu_qs() \
+do { \
+	rcu_note_voluntary_context_switch(current); \
+	cond_resched(); \
+} while (0)
+
 #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) || defined(CONFIG_SMP)
 bool __rcu_is_watching(void);
 #endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) || defined(CONFIG_SMP) */
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 948a7693748e..178716713e11 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -667,7 +667,7 @@ static int rcu_torture_boost(void *arg)
 				}
 				call_rcu_time = jiffies;
 			}
-			cond_resched();
+			cond_resched_rcu_qs();
 			stutter_wait("rcu_torture_boost");
 			if (torture_must_stop())
 				goto checkwait;
@@ -1019,7 +1019,7 @@ rcu_torture_reader(void *arg)
 		__this_cpu_inc(rcu_torture_batch[completed]);
 		preempt_enable();
 		cur_ops->readunlock(idx);
-		cond_resched();
+		cond_resched_rcu_qs();
 		stutter_wait("rcu_torture_reader");
 	} while (!torture_must_stop());
 	if (irqreader && cur_ops->irq_capable) {
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 8ad91d1e317d..e23dad0661e2 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1647,7 +1647,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
 					    rnp->level, rnp->grplo,
 					    rnp->grphi, rnp->qsmask);
 		raw_spin_unlock_irq(&rnp->lock);
-		cond_resched();
+		cond_resched_rcu_qs();
 	}
 
 	mutex_unlock(&rsp->onoff_mutex);
@@ -1736,7 +1736,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
 		/* smp_mb() provided by prior unlock-lock pair. */
 		nocb += rcu_future_gp_cleanup(rsp, rnp);
 		raw_spin_unlock_irq(&rnp->lock);
-		cond_resched();
+		cond_resched_rcu_qs();
 	}
 	rnp = rcu_get_root(rsp);
 	raw_spin_lock_irq(&rnp->lock);
@@ -1785,7 +1785,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
 			/* Locking provides needed memory barrier. */
 			if (rcu_gp_init(rsp))
 				break;
-			cond_resched();
+			cond_resched_rcu_qs();
 			flush_signals(current);
 			trace_rcu_grace_period(rsp->name,
 					       ACCESS_ONCE(rsp->gpnum),
@@ -1828,10 +1828,10 @@ static int __noreturn rcu_gp_kthread(void *arg)
 				trace_rcu_grace_period(rsp->name,
 						       ACCESS_ONCE(rsp->gpnum),
 						       TPS("fqsend"));
-				cond_resched();
+				cond_resched_rcu_qs();
 			} else {
 				/* Deal with stray signal. */
-				cond_resched();
+				cond_resched_rcu_qs();
 				flush_signals(current);
 				trace_rcu_grace_period(rsp->name,
 						       ACCESS_ONCE(rsp->gpnum),
@@ -2434,7 +2434,7 @@ static void force_qs_rnp(struct rcu_state *rsp,
 	struct rcu_node *rnp;
 
 	rcu_for_each_leaf_node(rsp, rnp) {
-		cond_resched();
+		cond_resched_rcu_qs();
 		mask = 0;
 		raw_spin_lock_irqsave(&rnp->lock, flags);
 		smp_mb__after_unlock_lock();
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index a7997e272564..7672586d3920 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -1848,7 +1848,7 @@ static int rcu_oom_notify(struct notifier_block *self,
 	get_online_cpus();
 	for_each_online_cpu(cpu) {
 		smp_call_function_single(cpu, rcu_oom_notify_cpu, NULL, 1);
-		cond_resched();
+		cond_resched_rcu_qs();
 	}
 	put_online_cpus();
 
diff --git a/mm/mlock.c b/mm/mlock.c
index ce84cb0b83ef..ab3150c26711 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -789,7 +789,7 @@ static int do_mlockall(int flags)
 
 		/* Ignore errors */
 		mlock_fixup(vma, &prev, vma->vm_start, vma->vm_end, newflags);
-		cond_resched();
+		cond_resched_rcu_qs();
 	}
 out:
 	return 0;
-- 
cgit v1.2.3


From 53c6d4edf874d3cbc031a53738c6cba9277faea5 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 1 Jul 2014 12:22:23 -0700
Subject: rcu: Add synchronous grace-period waiting for RCU-tasks

It turns out to be easier to add the synchronous grace-period waiting
functions to RCU-tasks than to work around their absense in rcutorture,
so this commit adds them.  The key point is that the existence of
call_rcu_tasks() means that rcutorture needs an rcu_barrier_tasks().

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h |  2 ++
 kernel/rcu/update.c      | 55 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 57 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 473350462d04..640152fedcde 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -216,6 +216,8 @@ void synchronize_sched(void);
  * memory ordering guarantees.
  */
 void call_rcu_tasks(struct rcu_head *head, void (*func)(struct rcu_head *head));
+void synchronize_rcu_tasks(void);
+void rcu_barrier_tasks(void);
 
 #ifdef CONFIG_PREEMPT_RCU
 
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 19b3dacb0753..5fd1ddbfcc55 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -381,6 +381,61 @@ void call_rcu_tasks(struct rcu_head *rhp, void (*func)(struct rcu_head *rhp))
 }
 EXPORT_SYMBOL_GPL(call_rcu_tasks);
 
+/**
+ * synchronize_rcu_tasks - wait until an rcu-tasks grace period has elapsed.
+ *
+ * Control will return to the caller some time after a full rcu-tasks
+ * grace period has elapsed, in other words after all currently
+ * executing rcu-tasks read-side critical sections have elapsed.  These
+ * read-side critical sections are delimited by calls to schedule(),
+ * cond_resched_rcu_qs(), idle execution, userspace execution, calls
+ * to synchronize_rcu_tasks(), and (in theory, anyway) cond_resched().
+ *
+ * This is a very specialized primitive, intended only for a few uses in
+ * tracing and other situations requiring manipulation of function
+ * preambles and profiling hooks.  The synchronize_rcu_tasks() function
+ * is not (yet) intended for heavy use from multiple CPUs.
+ *
+ * Note that this guarantee implies further memory-ordering guarantees.
+ * On systems with more than one CPU, when synchronize_rcu_tasks() returns,
+ * each CPU is guaranteed to have executed a full memory barrier since the
+ * end of its last RCU-tasks read-side critical section whose beginning
+ * preceded the call to synchronize_rcu_tasks().  In addition, each CPU
+ * having an RCU-tasks read-side critical section that extends beyond
+ * the return from synchronize_rcu_tasks() is guaranteed to have executed
+ * a full memory barrier after the beginning of synchronize_rcu_tasks()
+ * and before the beginning of that RCU-tasks read-side critical section.
+ * Note that these guarantees include CPUs that are offline, idle, or
+ * executing in user mode, as well as CPUs that are executing in the kernel.
+ *
+ * Furthermore, if CPU A invoked synchronize_rcu_tasks(), which returned
+ * to its caller on CPU B, then both CPU A and CPU B are guaranteed
+ * to have executed a full memory barrier during the execution of
+ * synchronize_rcu_tasks() -- even if CPU A and CPU B are the same CPU
+ * (but again only if the system has more than one CPU).
+ */
+void synchronize_rcu_tasks(void)
+{
+	/* Complain if the scheduler has not started.  */
+	rcu_lockdep_assert(!rcu_scheduler_active,
+			   "synchronize_rcu_tasks called too soon");
+
+	/* Wait for the grace period. */
+	wait_rcu_gp(call_rcu_tasks);
+}
+
+/**
+ * rcu_barrier_tasks - Wait for in-flight call_rcu_tasks() callbacks.
+ *
+ * Although the current implementation is guaranteed to wait, it is not
+ * obligated to, for example, if there are no pending callbacks.
+ */
+void rcu_barrier_tasks(void)
+{
+	/* There is only one callback queue, so this is easy.  ;-) */
+	synchronize_rcu_tasks();
+}
+
 /* See if the current task has stopped holding out, remove from list if so. */
 static void check_holdout_task(struct task_struct *t)
 {
-- 
cgit v1.2.3


From 3f95aa81d265223fdb13ea2b59883766a05adbdf Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 4 Aug 2014 06:10:23 -0700
Subject: rcu: Make TASKS_RCU handle tasks that are almost done exiting

Once a task has passed exit_notify() in the do_exit() code path, it
is no longer on the task lists, and is therefore no longer visible
to rcu_tasks_kthread().  This means that an almost-exited task might
be preempted while within a trampoline, and this task won't be waited
on by rcu_tasks_kthread().  This commit fixes this bug by adding an
srcu_struct.  An exiting task does srcu_read_lock() just before calling
exit_notify(), and does the corresponding srcu_read_unlock() after
doing the final preempt_disable().  This means that rcu_tasks_kthread()
can do synchronize_srcu() to wait for all mostly-exited tasks to reach
their final preempt_disable() region, and then use synchronize_sched()
to wait for those tasks to finish exiting.

Reported-by: Oleg Nesterov <oleg@redhat.com>
Suggested-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h |  3 +++
 kernel/exit.c            |  3 +++
 kernel/rcu/update.c      | 21 +++++++++++++++++++++
 3 files changed, 27 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 640152fedcde..54b2ebb20313 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -321,6 +321,8 @@ static inline void rcu_user_hooks_switch(struct task_struct *prev,
  * macro rather than an inline function to avoid #include hell.
  */
 #ifdef CONFIG_TASKS_RCU
+#define TASKS_RCU(x) x
+extern struct srcu_struct tasks_rcu_exit_srcu;
 #define rcu_note_voluntary_context_switch(t) \
 	do { \
 		preempt_disable(); /* Exclude synchronize_sched(); */ \
@@ -329,6 +331,7 @@ static inline void rcu_user_hooks_switch(struct task_struct *prev,
 		preempt_enable(); \
 	} while (0)
 #else /* #ifdef CONFIG_TASKS_RCU */
+#define TASKS_RCU(x) do { } while (0)
 #define rcu_note_voluntary_context_switch(t)	do { } while (0)
 #endif /* #else #ifdef CONFIG_TASKS_RCU */
 
diff --git a/kernel/exit.c b/kernel/exit.c
index 32c58f7433a3..d13f2eec4bb8 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -667,6 +667,7 @@ void do_exit(long code)
 {
 	struct task_struct *tsk = current;
 	int group_dead;
+	TASKS_RCU(int tasks_rcu_i);
 
 	profile_task_exit(tsk);
 
@@ -775,6 +776,7 @@ void do_exit(long code)
 	 */
 	flush_ptrace_hw_breakpoint(tsk);
 
+	TASKS_RCU(tasks_rcu_i = __srcu_read_lock(&tasks_rcu_exit_srcu));
 	exit_notify(tsk, group_dead);
 	proc_exit_connector(tsk);
 #ifdef CONFIG_NUMA
@@ -814,6 +816,7 @@ void do_exit(long code)
 	if (tsk->nr_dirtied)
 		__this_cpu_add(dirty_throttle_leaks, tsk->nr_dirtied);
 	exit_rcu();
+	TASKS_RCU(__srcu_read_unlock(&tasks_rcu_exit_srcu, tasks_rcu_i));
 
 	/*
 	 * The setting of TASK_RUNNING by try_to_wake_up() may be delayed
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 5fd1ddbfcc55..403fc4ae539e 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -367,6 +367,13 @@ static struct rcu_head *rcu_tasks_cbs_head;
 static struct rcu_head **rcu_tasks_cbs_tail = &rcu_tasks_cbs_head;
 static DEFINE_RAW_SPINLOCK(rcu_tasks_cbs_lock);
 
+/* Track exiting tasks in order to allow them to be waited for. */
+DEFINE_SRCU(tasks_rcu_exit_srcu);
+
+/* Control stall timeouts.  Disable with <= 0, otherwise jiffies till stall. */
+static int rcu_task_stall_timeout __read_mostly = HZ * 60 * 3;
+module_param(rcu_task_stall_timeout, int, 0644);
+
 /* Post an RCU-tasks callback. */
 void call_rcu_tasks(struct rcu_head *rhp, void (*func)(struct rcu_head *rhp))
 {
@@ -517,6 +524,15 @@ static int __noreturn rcu_tasks_kthread(void *arg)
 		}
 		rcu_read_unlock();
 
+		/*
+		 * Wait for tasks that are in the process of exiting.
+		 * This does only part of the job, ensuring that all
+		 * tasks that were previously exiting reach the point
+		 * where they have disabled preemption, allowing the
+		 * later synchronize_sched() to finish the job.
+		 */
+		synchronize_srcu(&tasks_rcu_exit_srcu);
+
 		/*
 		 * Each pass through the following loop scans the list
 		 * of holdout tasks, removing any that are no longer
@@ -546,6 +562,11 @@ static int __noreturn rcu_tasks_kthread(void *arg)
 		 * ->rcu_tasks_holdout accesses to be within the grace
 		 * period, avoiding the need for memory barriers for
 		 * ->rcu_tasks_holdout accesses.
+		 *
+		 * In addition, this synchronize_sched() waits for exiting
+		 * tasks to complete their final preempt_disable() region
+		 * of execution, cleaning up after the synchronize_srcu()
+		 * above.
 		 */
 		synchronize_sched();
 
-- 
cgit v1.2.3


From 69c604557ce34015629b325b85ff1a4996038a3b Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 1 Jul 2014 11:59:36 -0700
Subject: rcutorture: Add torture tests for RCU-tasks

This commit adds torture tests for RCU-tasks.  It also fixes a bug that
would segfault for an RCU flavor lacking a callback-barrier function.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/rcupdate.h |  1 +
 kernel/rcu/rcutorture.c  | 50 +++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 50 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 54b2ebb20313..a3123f53a4ce 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -55,6 +55,7 @@ enum rcutorture_type {
 	RCU_FLAVOR,
 	RCU_BH_FLAVOR,
 	RCU_SCHED_FLAVOR,
+	RCU_TASKS_FLAVOR,
 	SRCU_FLAVOR,
 	INVALID_RCU_FLAVOR
 };
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 178716713e11..75b1abf78c48 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -601,6 +601,52 @@ static struct rcu_torture_ops sched_ops = {
 	.name		= "sched"
 };
 
+#ifdef CONFIG_TASKS_RCU
+
+/*
+ * Definitions for RCU-tasks torture testing.
+ */
+
+static int tasks_torture_read_lock(void)
+{
+	return 0;
+}
+
+static void tasks_torture_read_unlock(int idx)
+{
+}
+
+static void rcu_tasks_torture_deferred_free(struct rcu_torture *p)
+{
+	call_rcu_tasks(&p->rtort_rcu, rcu_torture_cb);
+}
+
+static struct rcu_torture_ops tasks_ops = {
+	.ttype		= RCU_TASKS_FLAVOR,
+	.init		= rcu_sync_torture_init,
+	.readlock	= tasks_torture_read_lock,
+	.read_delay	= rcu_read_delay,  /* just reuse rcu's version. */
+	.readunlock	= tasks_torture_read_unlock,
+	.completed	= rcu_no_completed,
+	.deferred_free	= rcu_tasks_torture_deferred_free,
+	.sync		= synchronize_rcu_tasks,
+	.exp_sync	= synchronize_rcu_tasks,
+	.call		= call_rcu_tasks,
+	.cb_barrier	= rcu_barrier_tasks,
+	.fqs		= NULL,
+	.stats		= NULL,
+	.irq_capable	= 1,
+	.name		= "tasks"
+};
+
+#define RCUTORTURE_TASKS_OPS &tasks_ops,
+
+#else /* #ifdef CONFIG_TASKS_RCU */
+
+#define RCUTORTURE_TASKS_OPS
+
+#endif /* #else #ifdef CONFIG_TASKS_RCU */
+
 /*
  * RCU torture priority-boost testing.  Runs one real-time thread per
  * CPU for moderate bursts, repeatedly registering RCU callbacks and
@@ -1295,7 +1341,8 @@ static int rcu_torture_barrier_cbs(void *arg)
 		if (atomic_dec_and_test(&barrier_cbs_count))
 			wake_up(&barrier_wq);
 	} while (!torture_must_stop());
-	cur_ops->cb_barrier();
+	if (cur_ops->cb_barrier != NULL)
+		cur_ops->cb_barrier();
 	destroy_rcu_head_on_stack(&rcu);
 	torture_kthread_stopping("rcu_torture_barrier_cbs");
 	return 0;
@@ -1534,6 +1581,7 @@ rcu_torture_init(void)
 	int firsterr = 0;
 	static struct rcu_torture_ops *torture_ops[] = {
 		&rcu_ops, &rcu_bh_ops, &rcu_busted_ops, &srcu_ops, &sched_ops,
+		RCUTORTURE_TASKS_OPS
 	};
 
 	if (!torture_init_begin(torture_type, verbose, &rcutorture_runnable))
-- 
cgit v1.2.3


From 176f8f7a52cc6d09d686f0d900abda6942a52fbb Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 4 Aug 2014 17:43:50 -0700
Subject: rcu: Make TASKS_RCU handle nohz_full= CPUs

Currently TASKS_RCU would ignore a CPU running a task in nohz_full=
usermode execution.  There would be neither a context switch nor a
scheduling-clock interrupt to tell TASKS_RCU that the task in question
had passed through a quiescent state.  The grace period would therefore
extend indefinitely.  This commit therefore makes RCU's dyntick-idle
subsystem record the task_struct structure of the task that is running
in dyntick-idle mode on each CPU.  The TASKS_RCU grace period can
then access this information and record a quiescent state on
behalf of any CPU running in dyntick-idle usermode.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/init_task.h |  3 ++-
 include/linux/sched.h     |  2 ++
 kernel/rcu/tree.c         |  2 ++
 kernel/rcu/tree.h         |  2 ++
 kernel/rcu/tree_plugin.h  | 16 ++++++++++++++++
 kernel/rcu/update.c       |  4 +++-
 6 files changed, 27 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index dffd9258ee60..03b274873b06 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -121,7 +121,8 @@ extern struct group_info init_groups;
 #define INIT_TASK_RCU_TASKS(tsk)					\
 	.rcu_tasks_holdout = false,					\
 	.rcu_tasks_holdout_list =					\
-		LIST_HEAD_INIT(tsk.rcu_tasks_holdout_list),
+		LIST_HEAD_INIT(tsk.rcu_tasks_holdout_list),		\
+	.rcu_tasks_idle_cpu = -1,
 #else
 #define INIT_TASK_RCU_TASKS(tsk)
 #endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index eaacac4ae77d..ec8b34722bcc 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1274,6 +1274,7 @@ struct task_struct {
 	unsigned long rcu_tasks_nvcsw;
 	bool rcu_tasks_holdout;
 	struct list_head rcu_tasks_holdout_list;
+	int rcu_tasks_idle_cpu;
 #endif /* #ifdef CONFIG_TASKS_RCU */
 
 #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
@@ -2020,6 +2021,7 @@ static inline void rcu_copy_process(struct task_struct *p)
 #ifdef CONFIG_TASKS_RCU
 	p->rcu_tasks_holdout = false;
 	INIT_LIST_HEAD(&p->rcu_tasks_holdout_list);
+	p->rcu_tasks_idle_cpu = -1;
 #endif /* #ifdef CONFIG_TASKS_RCU */
 }
 
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index e23dad0661e2..c880f5387b1f 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -526,6 +526,7 @@ static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval,
 	atomic_inc(&rdtp->dynticks);
 	smp_mb__after_atomic();  /* Force ordering with next sojourn. */
 	WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
+	rcu_dynticks_task_enter();
 
 	/*
 	 * It is illegal to enter an extended quiescent state while
@@ -642,6 +643,7 @@ void rcu_irq_exit(void)
 static void rcu_eqs_exit_common(struct rcu_dynticks *rdtp, long long oldval,
 			       int user)
 {
+	rcu_dynticks_task_exit();
 	smp_mb__before_atomic();  /* Force ordering w/previous sojourn. */
 	atomic_inc(&rdtp->dynticks);
 	/* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 6a86eb7bac45..3a92000c354f 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -605,6 +605,8 @@ static void rcu_sysidle_report_gp(struct rcu_state *rsp, int isidle,
 static void rcu_bind_gp_kthread(void);
 static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp);
 static bool rcu_nohz_full_cpu(struct rcu_state *rsp);
+static void rcu_dynticks_task_enter(void);
+static void rcu_dynticks_task_exit(void);
 
 #endif /* #ifndef RCU_TREE_NONCORE */
 
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 7672586d3920..e466b40052a7 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -3036,3 +3036,19 @@ static void rcu_bind_gp_kthread(void)
 		housekeeping_affine(current);
 #endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
 }
+
+/* Record the current task on dyntick-idle entry. */
+static void rcu_dynticks_task_enter(void)
+{
+#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
+	ACCESS_ONCE(current->rcu_tasks_idle_cpu) = smp_processor_id();
+#endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */
+}
+
+/* Record no current task on dyntick-idle exit. */
+static void rcu_dynticks_task_exit(void)
+{
+#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
+	ACCESS_ONCE(current->rcu_tasks_idle_cpu) = -1;
+#endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */
+}
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index e1d71741958f..2658de4a5975 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -463,7 +463,9 @@ static void check_holdout_task(struct task_struct *t,
 {
 	if (!ACCESS_ONCE(t->rcu_tasks_holdout) ||
 	    t->rcu_tasks_nvcsw != ACCESS_ONCE(t->nvcsw) ||
-	    !ACCESS_ONCE(t->on_rq)) {
+	    !ACCESS_ONCE(t->on_rq) ||
+	    (IS_ENABLED(CONFIG_NO_HZ_FULL) &&
+	     !is_idle_task(t) && t->rcu_tasks_idle_cpu >= 0)) {
 		ACCESS_ONCE(t->rcu_tasks_holdout) = false;
 		list_del_rcu(&t->rcu_tasks_holdout_list);
 		put_task_struct(t);
-- 
cgit v1.2.3


From 01a81330344b09028881c953a51d1106a9e63518 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 5 Aug 2014 05:23:35 -0700
Subject: rcu: Remove redundant preempt_disable() from
 rcu_note_voluntary_context_switch()

In theory, synchronize_sched() requires a read-side critical section
to order against.  In practice, preemption can be thought of as
being disabled across every machine instruction, at least for those
machine instructions that are not in the idle loop and not on offline
CPUs.  So this commit removes the redundant preempt_disable() from
rcu_note_voluntary_context_switch().

Please note that the single instruction in question is the store of
zero to ->rcu_tasks_holdout.  The "if" is simply a performance optimization
that avoids unnecessary stores.  To see this, keep in mind that both
the "if" condition and the store are in a quiescent state.  Therefore,
even if the task is preempted for a full grace period (presumably due
to its having done a context switch beforehand), the store will be
recording a legitimate quiescent state.

Reported-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

Conflicts:
	include/linux/rcupdate.h
---
 include/linux/rcupdate.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index a3123f53a4ce..132e1e34cdca 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -326,10 +326,8 @@ static inline void rcu_user_hooks_switch(struct task_struct *prev,
 extern struct srcu_struct tasks_rcu_exit_srcu;
 #define rcu_note_voluntary_context_switch(t) \
 	do { \
-		preempt_disable(); /* Exclude synchronize_sched(); */ \
 		if (ACCESS_ONCE((t)->rcu_tasks_holdout)) \
 			ACCESS_ONCE((t)->rcu_tasks_holdout) = false; \
-		preempt_enable(); \
 	} while (0)
 #else /* #ifdef CONFIG_TASKS_RCU */
 #define TASKS_RCU(x) do { } while (0)
-- 
cgit v1.2.3


From 1d082fd061884a587c490c4fc8a2056ce1e47624 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 14 Aug 2014 16:01:53 -0700
Subject: rcu: Remove local_irq_disable() in rcu_preempt_note_context_switch()

The rcu_preempt_note_context_switch() function is on a scheduling fast
path, so it would be good to avoid disabling irqs.  The reason that irqs
are disabled is to synchronize process-level and irq-handler access to
the task_struct ->rcu_read_unlock_special bitmask.  This commit therefore
makes ->rcu_read_unlock_special instead be a union of bools with a short
allowing single-access checks in RCU's __rcu_read_unlock().  This results
in the process-level and irq-handler accesses being simple loads and
stores, so that irqs need no longer be disabled.  This commit therefore
removes the irq disabling from rcu_preempt_note_context_switch().

Reported-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/init_task.h |  2 +-
 include/linux/sched.h     | 16 +++++++++-------
 kernel/rcu/tree_plugin.h  | 32 +++++++++++++++-----------------
 kernel/rcu/update.c       |  2 +-
 4 files changed, 26 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 03b274873b06..77fc43f8fb72 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -111,7 +111,7 @@ extern struct group_info init_groups;
 #ifdef CONFIG_PREEMPT_RCU
 #define INIT_TASK_RCU_PREEMPT(tsk)					\
 	.rcu_read_lock_nesting = 0,					\
-	.rcu_read_unlock_special = 0,					\
+	.rcu_read_unlock_special.s = 0,					\
 	.rcu_node_entry = LIST_HEAD_INIT(tsk.rcu_node_entry),		\
 	INIT_TASK_RCU_TREE_PREEMPT()
 #else
diff --git a/include/linux/sched.h b/include/linux/sched.h
index ec8b34722bcc..42888d715fb1 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1212,6 +1212,13 @@ struct sched_dl_entity {
 	struct hrtimer dl_timer;
 };
 
+union rcu_special {
+	struct {
+		bool blocked;
+		bool need_qs;
+	} b;
+	short s;
+};
 struct rcu_node;
 
 enum perf_event_task_context {
@@ -1264,7 +1271,7 @@ struct task_struct {
 
 #ifdef CONFIG_PREEMPT_RCU
 	int rcu_read_lock_nesting;
-	char rcu_read_unlock_special;
+	union rcu_special rcu_read_unlock_special;
 	struct list_head rcu_node_entry;
 #endif /* #ifdef CONFIG_PREEMPT_RCU */
 #ifdef CONFIG_TREE_PREEMPT_RCU
@@ -2005,16 +2012,11 @@ extern void task_clear_jobctl_trapping(struct task_struct *task);
 extern void task_clear_jobctl_pending(struct task_struct *task,
 				      unsigned int mask);
 
-#ifdef CONFIG_PREEMPT_RCU
-#define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */
-#define RCU_READ_UNLOCK_NEED_QS (1 << 1) /* RCU core needs CPU response. */
-#endif /* #ifdef CONFIG_PREEMPT_RCU */
-
 static inline void rcu_copy_process(struct task_struct *p)
 {
 #ifdef CONFIG_PREEMPT_RCU
 	p->rcu_read_lock_nesting = 0;
-	p->rcu_read_unlock_special = 0;
+	p->rcu_read_unlock_special.s = 0;
 	p->rcu_blocked_node = NULL;
 	INIT_LIST_HEAD(&p->rcu_node_entry);
 #endif /* #ifdef CONFIG_PREEMPT_RCU */
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index e466b40052a7..0981c0cd70fe 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -155,9 +155,8 @@ EXPORT_SYMBOL_GPL(rcu_batches_completed);
  * not in a quiescent state.  There might be any number of tasks blocked
  * while in an RCU read-side critical section.
  *
- * Unlike the other rcu_*_qs() functions, callers to this function
- * must disable irqs in order to protect the assignment to
- * ->rcu_read_unlock_special.
+ * As with the other rcu_*_qs() functions, callers to this function
+ * must disable preemption.
  */
 static void rcu_preempt_qs(int cpu)
 {
@@ -166,7 +165,7 @@ static void rcu_preempt_qs(int cpu)
 	if (rdp->passed_quiesce == 0)
 		trace_rcu_grace_period(TPS("rcu_preempt"), rdp->gpnum, TPS("cpuqs"));
 	rdp->passed_quiesce = 1;
-	current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
+	current->rcu_read_unlock_special.b.need_qs = false;
 }
 
 /*
@@ -190,14 +189,14 @@ static void rcu_preempt_note_context_switch(int cpu)
 	struct rcu_node *rnp;
 
 	if (t->rcu_read_lock_nesting > 0 &&
-	    (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) {
+	    !t->rcu_read_unlock_special.b.blocked) {
 
 		/* Possibly blocking in an RCU read-side critical section. */
 		rdp = per_cpu_ptr(rcu_preempt_state.rda, cpu);
 		rnp = rdp->mynode;
 		raw_spin_lock_irqsave(&rnp->lock, flags);
 		smp_mb__after_unlock_lock();
-		t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED;
+		t->rcu_read_unlock_special.b.blocked = true;
 		t->rcu_blocked_node = rnp;
 
 		/*
@@ -239,7 +238,7 @@ static void rcu_preempt_note_context_switch(int cpu)
 				       : rnp->gpnum + 1);
 		raw_spin_unlock_irqrestore(&rnp->lock, flags);
 	} else if (t->rcu_read_lock_nesting < 0 &&
-		   t->rcu_read_unlock_special) {
+		   t->rcu_read_unlock_special.s) {
 
 		/*
 		 * Complete exit from RCU read-side critical section on
@@ -257,9 +256,7 @@ static void rcu_preempt_note_context_switch(int cpu)
 	 * grace period, then the fact that the task has been enqueued
 	 * means that we continue to block the current grace period.
 	 */
-	local_irq_save(flags);
 	rcu_preempt_qs(cpu);
-	local_irq_restore(flags);
 }
 
 /*
@@ -340,7 +337,7 @@ void rcu_read_unlock_special(struct task_struct *t)
 	bool drop_boost_mutex = false;
 #endif /* #ifdef CONFIG_RCU_BOOST */
 	struct rcu_node *rnp;
-	int special;
+	union rcu_special special;
 
 	/* NMI handlers cannot block and cannot safely manipulate state. */
 	if (in_nmi())
@@ -350,12 +347,13 @@ void rcu_read_unlock_special(struct task_struct *t)
 
 	/*
 	 * If RCU core is waiting for this CPU to exit critical section,
-	 * let it know that we have done so.
+	 * let it know that we have done so.  Because irqs are disabled,
+	 * t->rcu_read_unlock_special cannot change.
 	 */
 	special = t->rcu_read_unlock_special;
-	if (special & RCU_READ_UNLOCK_NEED_QS) {
+	if (special.b.need_qs) {
 		rcu_preempt_qs(smp_processor_id());
-		if (!t->rcu_read_unlock_special) {
+		if (!t->rcu_read_unlock_special.s) {
 			local_irq_restore(flags);
 			return;
 		}
@@ -368,8 +366,8 @@ void rcu_read_unlock_special(struct task_struct *t)
 	}
 
 	/* Clean up if blocked during RCU read-side critical section. */
-	if (special & RCU_READ_UNLOCK_BLOCKED) {
-		t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED;
+	if (special.b.blocked) {
+		t->rcu_read_unlock_special.b.blocked = false;
 
 		/*
 		 * Remove this task from the list it blocked on.  The
@@ -658,7 +656,7 @@ static void rcu_preempt_check_callbacks(int cpu)
 	}
 	if (t->rcu_read_lock_nesting > 0 &&
 	    per_cpu(rcu_preempt_data, cpu).qs_pending)
-		t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
+		t->rcu_read_unlock_special.b.need_qs = true;
 }
 
 #ifdef CONFIG_RCU_BOOST
@@ -941,7 +939,7 @@ void exit_rcu(void)
 		return;
 	t->rcu_read_lock_nesting = 1;
 	barrier();
-	t->rcu_read_unlock_special = RCU_READ_UNLOCK_BLOCKED;
+	t->rcu_read_unlock_special.b.blocked = true;
 	__rcu_read_unlock();
 }
 
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 9487b4898e51..6fb911558562 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -93,7 +93,7 @@ void __rcu_read_unlock(void)
 		barrier();  /* critical section before exit code. */
 		t->rcu_read_lock_nesting = INT_MIN;
 		barrier();  /* assign before ->rcu_read_unlock_special load */
-		if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
+		if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special.s)))
 			rcu_read_unlock_special(t);
 		barrier();  /* ->rcu_read_unlock_special load before assign */
 		t->rcu_read_lock_nesting = 0;
-- 
cgit v1.2.3


From 284a8c93af47306beed967a303d84730b32bab39 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 14 Aug 2014 16:38:46 -0700
Subject: rcu: Per-CPU operation cleanups to rcu_*_qs() functions

The rcu_bh_qs(), rcu_preempt_qs(), and rcu_sched_qs() functions use
old-style per-CPU variable access and write to ->passed_quiesce even
if it is already set.  This commit therefore updates to use the new-style
per-CPU variable access functions and avoids the spurious writes.
This commit also eliminates the "cpu" argument to these functions because
they are always invoked on the indicated CPU.

Reported-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h |  4 ++--
 include/linux/rcutiny.h  |  2 +-
 kernel/rcu/tiny.c        | 10 +++++-----
 kernel/rcu/tree.c        | 34 ++++++++++++++++++----------------
 kernel/rcu/tree_plugin.h | 27 +++++++++++++++------------
 kernel/softirq.c         |  2 +-
 6 files changed, 42 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 132e1e34cdca..2fab0e37afe0 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -261,8 +261,8 @@ static inline int rcu_preempt_depth(void)
 
 /* Internal to kernel */
 void rcu_init(void);
-void rcu_sched_qs(int cpu);
-void rcu_bh_qs(int cpu);
+void rcu_sched_qs(void);
+void rcu_bh_qs(void);
 void rcu_check_callbacks(int cpu, int user);
 struct notifier_block;
 void rcu_idle_enter(void);
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index d40a6a451330..38cc5b1e252d 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -80,7 +80,7 @@ static inline void kfree_call_rcu(struct rcu_head *head,
 
 static inline void rcu_note_context_switch(int cpu)
 {
-	rcu_sched_qs(cpu);
+	rcu_sched_qs();
 }
 
 /*
diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
index 717f00854fc0..61b8d2ccc2cb 100644
--- a/kernel/rcu/tiny.c
+++ b/kernel/rcu/tiny.c
@@ -72,7 +72,7 @@ static void rcu_idle_enter_common(long long newval)
 			  current->pid, current->comm,
 			  idle->pid, idle->comm); /* must be idle task! */
 	}
-	rcu_sched_qs(0); /* implies rcu_bh_qsctr_inc(0) */
+	rcu_sched_qs(); /* implies rcu_bh_inc() */
 	barrier();
 	rcu_dynticks_nesting = newval;
 }
@@ -217,7 +217,7 @@ static int rcu_qsctr_help(struct rcu_ctrlblk *rcp)
  * are at it, given that any rcu quiescent state is also an rcu_bh
  * quiescent state.  Use "+" instead of "||" to defeat short circuiting.
  */
-void rcu_sched_qs(int cpu)
+void rcu_sched_qs(void)
 {
 	unsigned long flags;
 
@@ -231,7 +231,7 @@ void rcu_sched_qs(int cpu)
 /*
  * Record an rcu_bh quiescent state.
  */
-void rcu_bh_qs(int cpu)
+void rcu_bh_qs(void)
 {
 	unsigned long flags;
 
@@ -251,9 +251,9 @@ void rcu_check_callbacks(int cpu, int user)
 {
 	RCU_TRACE(check_cpu_stalls());
 	if (user || rcu_is_cpu_rrupt_from_idle())
-		rcu_sched_qs(cpu);
+		rcu_sched_qs();
 	else if (!in_softirq())
-		rcu_bh_qs(cpu);
+		rcu_bh_qs();
 	if (user)
 		rcu_note_voluntary_context_switch(current);
 }
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index c880f5387b1f..4c340625ffd4 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -188,22 +188,24 @@ static int rcu_gp_in_progress(struct rcu_state *rsp)
  * one since the start of the grace period, this just sets a flag.
  * The caller must have disabled preemption.
  */
-void rcu_sched_qs(int cpu)
+void rcu_sched_qs(void)
 {
-	struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu);
-
-	if (rdp->passed_quiesce == 0)
-		trace_rcu_grace_period(TPS("rcu_sched"), rdp->gpnum, TPS("cpuqs"));
-	rdp->passed_quiesce = 1;
+	if (!__this_cpu_read(rcu_sched_data.passed_quiesce)) {
+		trace_rcu_grace_period(TPS("rcu_sched"),
+				       __this_cpu_read(rcu_sched_data.gpnum),
+				       TPS("cpuqs"));
+		__this_cpu_write(rcu_sched_data.passed_quiesce, 1);
+	}
 }
 
-void rcu_bh_qs(int cpu)
+void rcu_bh_qs(void)
 {
-	struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu);
-
-	if (rdp->passed_quiesce == 0)
-		trace_rcu_grace_period(TPS("rcu_bh"), rdp->gpnum, TPS("cpuqs"));
-	rdp->passed_quiesce = 1;
+	if (!__this_cpu_read(rcu_bh_data.passed_quiesce)) {
+		trace_rcu_grace_period(TPS("rcu_bh"),
+				       __this_cpu_read(rcu_bh_data.gpnum),
+				       TPS("cpuqs"));
+		__this_cpu_write(rcu_bh_data.passed_quiesce, 1);
+	}
 }
 
 static DEFINE_PER_CPU(int, rcu_sched_qs_mask);
@@ -278,7 +280,7 @@ static void rcu_momentary_dyntick_idle(void)
 void rcu_note_context_switch(int cpu)
 {
 	trace_rcu_utilization(TPS("Start context switch"));
-	rcu_sched_qs(cpu);
+	rcu_sched_qs();
 	rcu_preempt_note_context_switch(cpu);
 	if (unlikely(raw_cpu_read(rcu_sched_qs_mask)))
 		rcu_momentary_dyntick_idle();
@@ -2395,8 +2397,8 @@ void rcu_check_callbacks(int cpu, int user)
 		 * at least not while the corresponding CPU is online.
 		 */
 
-		rcu_sched_qs(cpu);
-		rcu_bh_qs(cpu);
+		rcu_sched_qs();
+		rcu_bh_qs();
 
 	} else if (!in_softirq()) {
 
@@ -2407,7 +2409,7 @@ void rcu_check_callbacks(int cpu, int user)
 		 * critical section, so note it.
 		 */
 
-		rcu_bh_qs(cpu);
+		rcu_bh_qs();
 	}
 	rcu_preempt_check_callbacks(cpu);
 	if (rcu_pending(cpu))
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 0981c0cd70fe..25e692a36280 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -158,14 +158,16 @@ EXPORT_SYMBOL_GPL(rcu_batches_completed);
  * As with the other rcu_*_qs() functions, callers to this function
  * must disable preemption.
  */
-static void rcu_preempt_qs(int cpu)
-{
-	struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu);
-
-	if (rdp->passed_quiesce == 0)
-		trace_rcu_grace_period(TPS("rcu_preempt"), rdp->gpnum, TPS("cpuqs"));
-	rdp->passed_quiesce = 1;
-	current->rcu_read_unlock_special.b.need_qs = false;
+static void rcu_preempt_qs(void)
+{
+	if (!__this_cpu_read(rcu_preempt_data.passed_quiesce)) {
+		trace_rcu_grace_period(TPS("rcu_preempt"),
+				       __this_cpu_read(rcu_preempt_data.gpnum),
+				       TPS("cpuqs"));
+		__this_cpu_write(rcu_preempt_data.passed_quiesce, 1);
+		barrier(); /* Coordinate with rcu_preempt_check_callbacks(). */
+		current->rcu_read_unlock_special.b.need_qs = false;
+	}
 }
 
 /*
@@ -256,7 +258,7 @@ static void rcu_preempt_note_context_switch(int cpu)
 	 * grace period, then the fact that the task has been enqueued
 	 * means that we continue to block the current grace period.
 	 */
-	rcu_preempt_qs(cpu);
+	rcu_preempt_qs();
 }
 
 /*
@@ -352,7 +354,7 @@ void rcu_read_unlock_special(struct task_struct *t)
 	 */
 	special = t->rcu_read_unlock_special;
 	if (special.b.need_qs) {
-		rcu_preempt_qs(smp_processor_id());
+		rcu_preempt_qs();
 		if (!t->rcu_read_unlock_special.s) {
 			local_irq_restore(flags);
 			return;
@@ -651,11 +653,12 @@ static void rcu_preempt_check_callbacks(int cpu)
 	struct task_struct *t = current;
 
 	if (t->rcu_read_lock_nesting == 0) {
-		rcu_preempt_qs(cpu);
+		rcu_preempt_qs();
 		return;
 	}
 	if (t->rcu_read_lock_nesting > 0 &&
-	    per_cpu(rcu_preempt_data, cpu).qs_pending)
+	    per_cpu(rcu_preempt_data, cpu).qs_pending &&
+	    !per_cpu(rcu_preempt_data, cpu).passed_quiesce)
 		t->rcu_read_unlock_special.b.need_qs = true;
 }
 
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 5918d227730f..348ec763b104 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -278,7 +278,7 @@ restart:
 		pending >>= softirq_bit;
 	}
 
-	rcu_bh_qs(smp_processor_id());
+	rcu_bh_qs();
 	local_irq_disable();
 
 	pending = local_softirq_pending();
-- 
cgit v1.2.3


From 908c7f1949cb7cc6e92ba8f18f2998e87e265b8e Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 8 Sep 2014 09:51:29 +0900
Subject: percpu_counter: add @gfp to percpu_counter_init()

Percpu allocator now supports allocation mask.  Add @gfp to
percpu_counter_init() so that !GFP_KERNEL allocation masks can be used
with percpu_counters too.

We could have left percpu_counter_init() alone and added
percpu_counter_init_gfp(); however, the number of users isn't that
high and introducing _gfp variants to all percpu data structures would
be quite ugly, so let's just do the conversion.  This is the one with
the most users.  Other percpu data structures are a lot easier to
convert.

This patch doesn't make any functional difference.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Jan Kara <jack@suse.cz>
Acked-by: "David S. Miller" <davem@davemloft.net>
Cc: x86@kernel.org
Cc: Jens Axboe <axboe@kernel.dk>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Andrew Morton <akpm@linux-foundation.org>
---
 arch/x86/kvm/mmu.c             |  2 +-
 fs/btrfs/disk-io.c             |  8 ++++----
 fs/btrfs/extent-tree.c         |  2 +-
 fs/ext2/super.c                |  6 +++---
 fs/ext3/super.c                |  6 +++---
 fs/ext4/super.c                | 14 +++++++++-----
 fs/file_table.c                |  2 +-
 fs/quota/dquot.c               |  2 +-
 fs/super.c                     |  3 ++-
 include/linux/percpu_counter.h | 10 ++++++----
 include/net/dst_ops.h          |  2 +-
 include/net/inet_frag.h        |  2 +-
 lib/flex_proportions.c         |  4 ++--
 lib/percpu_counter.c           |  4 ++--
 lib/proportions.c              |  6 +++---
 mm/backing-dev.c               |  2 +-
 mm/mmap.c                      |  2 +-
 mm/nommu.c                     |  2 +-
 mm/shmem.c                     |  2 +-
 net/dccp/proto.c               |  2 +-
 net/ipv4/tcp.c                 |  4 ++--
 net/ipv4/tcp_memcontrol.c      |  2 +-
 net/sctp/protocol.c            |  2 +-
 23 files changed, 49 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 931467881da7..5bd53f206f4f 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -4534,7 +4534,7 @@ int kvm_mmu_module_init(void)
 	if (!mmu_page_header_cache)
 		goto nomem;
 
-	if (percpu_counter_init(&kvm_total_used_mmu_pages, 0))
+	if (percpu_counter_init(&kvm_total_used_mmu_pages, 0, GFP_KERNEL))
 		goto nomem;
 
 	register_shrinker(&mmu_shrinker);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 08e65e9cf2aa..61dae01788d7 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1180,7 +1180,7 @@ static struct btrfs_subvolume_writers *btrfs_alloc_subvolume_writers(void)
 	if (!writers)
 		return ERR_PTR(-ENOMEM);
 
-	ret = percpu_counter_init(&writers->counter, 0);
+	ret = percpu_counter_init(&writers->counter, 0, GFP_KERNEL);
 	if (ret < 0) {
 		kfree(writers);
 		return ERR_PTR(ret);
@@ -2185,7 +2185,7 @@ int open_ctree(struct super_block *sb,
 		goto fail_srcu;
 	}
 
-	ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0);
+	ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0, GFP_KERNEL);
 	if (ret) {
 		err = ret;
 		goto fail_bdi;
@@ -2193,13 +2193,13 @@ int open_ctree(struct super_block *sb,
 	fs_info->dirty_metadata_batch = PAGE_CACHE_SIZE *
 					(1 + ilog2(nr_cpu_ids));
 
-	ret = percpu_counter_init(&fs_info->delalloc_bytes, 0);
+	ret = percpu_counter_init(&fs_info->delalloc_bytes, 0, GFP_KERNEL);
 	if (ret) {
 		err = ret;
 		goto fail_dirty_metadata_bytes;
 	}
 
-	ret = percpu_counter_init(&fs_info->bio_counter, 0);
+	ret = percpu_counter_init(&fs_info->bio_counter, 0, GFP_KERNEL);
 	if (ret) {
 		err = ret;
 		goto fail_delalloc_bytes;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 813537f362f9..94ec71eda86b 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3493,7 +3493,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
 	if (!found)
 		return -ENOMEM;
 
-	ret = percpu_counter_init(&found->total_bytes_pinned, 0);
+	ret = percpu_counter_init(&found->total_bytes_pinned, 0, GFP_KERNEL);
 	if (ret) {
 		kfree(found);
 		return ret;
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index b88edc05c230..170dc41e8bf4 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1067,14 +1067,14 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
 	ext2_rsv_window_add(sb, &sbi->s_rsv_window_head);
 
 	err = percpu_counter_init(&sbi->s_freeblocks_counter,
-				ext2_count_free_blocks(sb));
+				ext2_count_free_blocks(sb), GFP_KERNEL);
 	if (!err) {
 		err = percpu_counter_init(&sbi->s_freeinodes_counter,
-				ext2_count_free_inodes(sb));
+				ext2_count_free_inodes(sb), GFP_KERNEL);
 	}
 	if (!err) {
 		err = percpu_counter_init(&sbi->s_dirs_counter,
-				ext2_count_dirs(sb));
+				ext2_count_dirs(sb), GFP_KERNEL);
 	}
 	if (err) {
 		ext2_msg(sb, KERN_ERR, "error: insufficient memory");
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 08cdfe5461e3..eba021b88440 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2039,14 +2039,14 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 		goto failed_mount2;
 	}
 	err = percpu_counter_init(&sbi->s_freeblocks_counter,
-			ext3_count_free_blocks(sb));
+			ext3_count_free_blocks(sb), GFP_KERNEL);
 	if (!err) {
 		err = percpu_counter_init(&sbi->s_freeinodes_counter,
-				ext3_count_free_inodes(sb));
+				ext3_count_free_inodes(sb), GFP_KERNEL);
 	}
 	if (!err) {
 		err = percpu_counter_init(&sbi->s_dirs_counter,
-				ext3_count_dirs(sb));
+				ext3_count_dirs(sb), GFP_KERNEL);
 	}
 	if (err) {
 		ext3_msg(sb, KERN_ERR, "error: insufficient memory");
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 32b43ad154b9..e25ca8fdde7d 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3891,7 +3891,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	/* Register extent status tree shrinker */
 	ext4_es_register_shrinker(sbi);
 
-	if ((err = percpu_counter_init(&sbi->s_extent_cache_cnt, 0)) != 0) {
+	err = percpu_counter_init(&sbi->s_extent_cache_cnt, 0, GFP_KERNEL);
+	if (err) {
 		ext4_msg(sb, KERN_ERR, "insufficient memory");
 		goto failed_mount3;
 	}
@@ -4105,17 +4106,20 @@ no_journal:
 	block = ext4_count_free_clusters(sb);
 	ext4_free_blocks_count_set(sbi->s_es, 
 				   EXT4_C2B(sbi, block));
-	err = percpu_counter_init(&sbi->s_freeclusters_counter, block);
+	err = percpu_counter_init(&sbi->s_freeclusters_counter, block,
+				  GFP_KERNEL);
 	if (!err) {
 		unsigned long freei = ext4_count_free_inodes(sb);
 		sbi->s_es->s_free_inodes_count = cpu_to_le32(freei);
-		err = percpu_counter_init(&sbi->s_freeinodes_counter, freei);
+		err = percpu_counter_init(&sbi->s_freeinodes_counter, freei,
+					  GFP_KERNEL);
 	}
 	if (!err)
 		err = percpu_counter_init(&sbi->s_dirs_counter,
-					  ext4_count_dirs(sb));
+					  ext4_count_dirs(sb), GFP_KERNEL);
 	if (!err)
-		err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0);
+		err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0,
+					  GFP_KERNEL);
 	if (err) {
 		ext4_msg(sb, KERN_ERR, "insufficient memory");
 		goto failed_mount6;
diff --git a/fs/file_table.c b/fs/file_table.c
index 385bfd31512a..0bab12b20460 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -331,5 +331,5 @@ void __init files_init(unsigned long mempages)
 
 	n = (mempages * (PAGE_SIZE / 1024)) / 10;
 	files_stat.max_files = max_t(unsigned long, n, NR_FILE);
-	percpu_counter_init(&nr_files, 0);
+	percpu_counter_init(&nr_files, 0, GFP_KERNEL);
 } 
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index f2d0eee9d1f1..8b663b2d9562 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -2725,7 +2725,7 @@ static int __init dquot_init(void)
 		panic("Cannot create dquot hash table");
 
 	for (i = 0; i < _DQST_DQSTAT_LAST; i++) {
-		ret = percpu_counter_init(&dqstats.counter[i], 0);
+		ret = percpu_counter_init(&dqstats.counter[i], 0, GFP_KERNEL);
 		if (ret)
 			panic("Cannot create dquot stat counters");
 	}
diff --git a/fs/super.c b/fs/super.c
index b9a214d2fe98..1b836107acee 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -175,7 +175,8 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
 		goto fail;
 
 	for (i = 0; i < SB_FREEZE_LEVELS; i++) {
-		if (percpu_counter_init(&s->s_writers.counter[i], 0) < 0)
+		if (percpu_counter_init(&s->s_writers.counter[i], 0,
+					GFP_KERNEL) < 0)
 			goto fail;
 		lockdep_init_map(&s->s_writers.lock_map[i], sb_writers_name[i],
 				 &type->s_writers_key[i], 0);
diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h
index d5dd4657c8d6..50e50095c8d1 100644
--- a/include/linux/percpu_counter.h
+++ b/include/linux/percpu_counter.h
@@ -12,6 +12,7 @@
 #include <linux/threads.h>
 #include <linux/percpu.h>
 #include <linux/types.h>
+#include <linux/gfp.h>
 
 #ifdef CONFIG_SMP
 
@@ -26,14 +27,14 @@ struct percpu_counter {
 
 extern int percpu_counter_batch;
 
-int __percpu_counter_init(struct percpu_counter *fbc, s64 amount,
+int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, gfp_t gfp,
 			  struct lock_class_key *key);
 
-#define percpu_counter_init(fbc, value)					\
+#define percpu_counter_init(fbc, value, gfp)				\
 	({								\
 		static struct lock_class_key __key;			\
 									\
-		__percpu_counter_init(fbc, value, &__key);		\
+		__percpu_counter_init(fbc, value, gfp, &__key);		\
 	})
 
 void percpu_counter_destroy(struct percpu_counter *fbc);
@@ -89,7 +90,8 @@ struct percpu_counter {
 	s64 count;
 };
 
-static inline int percpu_counter_init(struct percpu_counter *fbc, s64 amount)
+static inline int percpu_counter_init(struct percpu_counter *fbc, s64 amount,
+				      gfp_t gfp)
 {
 	fbc->count = amount;
 	return 0;
diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h
index 2f26dfb8450e..1f99a1de0e4f 100644
--- a/include/net/dst_ops.h
+++ b/include/net/dst_ops.h
@@ -63,7 +63,7 @@ static inline void dst_entries_add(struct dst_ops *dst, int val)
 
 static inline int dst_entries_init(struct dst_ops *dst)
 {
-	return percpu_counter_init(&dst->pcpuc_entries, 0);
+	return percpu_counter_init(&dst->pcpuc_entries, 0, GFP_KERNEL);
 }
 
 static inline void dst_entries_destroy(struct dst_ops *dst)
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 65a8855e99fe..8d1765577acc 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -151,7 +151,7 @@ static inline void add_frag_mem_limit(struct inet_frag_queue *q, int i)
 
 static inline void init_frag_mem_limit(struct netns_frags *nf)
 {
-	percpu_counter_init(&nf->mem, 0);
+	percpu_counter_init(&nf->mem, 0, GFP_KERNEL);
 }
 
 static inline unsigned int sum_frag_mem_limit(struct netns_frags *nf)
diff --git a/lib/flex_proportions.c b/lib/flex_proportions.c
index ebf3bac460b0..b9d026bfcf38 100644
--- a/lib/flex_proportions.c
+++ b/lib/flex_proportions.c
@@ -40,7 +40,7 @@ int fprop_global_init(struct fprop_global *p)
 
 	p->period = 0;
 	/* Use 1 to avoid dealing with periods with 0 events... */
-	err = percpu_counter_init(&p->events, 1);
+	err = percpu_counter_init(&p->events, 1, GFP_KERNEL);
 	if (err)
 		return err;
 	seqcount_init(&p->sequence);
@@ -172,7 +172,7 @@ int fprop_local_init_percpu(struct fprop_local_percpu *pl)
 {
 	int err;
 
-	err = percpu_counter_init(&pl->events, 0);
+	err = percpu_counter_init(&pl->events, 0, GFP_KERNEL);
 	if (err)
 		return err;
 	pl->period = 0;
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index 3fde78275cd1..48144cdae819 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -112,7 +112,7 @@ s64 __percpu_counter_sum(struct percpu_counter *fbc)
 }
 EXPORT_SYMBOL(__percpu_counter_sum);
 
-int __percpu_counter_init(struct percpu_counter *fbc, s64 amount,
+int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, gfp_t gfp,
 			  struct lock_class_key *key)
 {
 	unsigned long flags __maybe_unused;
@@ -120,7 +120,7 @@ int __percpu_counter_init(struct percpu_counter *fbc, s64 amount,
 	raw_spin_lock_init(&fbc->lock);
 	lockdep_set_class(&fbc->lock, key);
 	fbc->count = amount;
-	fbc->counters = alloc_percpu(s32);
+	fbc->counters = alloc_percpu_gfp(s32, gfp);
 	if (!fbc->counters)
 		return -ENOMEM;
 
diff --git a/lib/proportions.c b/lib/proportions.c
index 05df84801b56..ca95f8d54384 100644
--- a/lib/proportions.c
+++ b/lib/proportions.c
@@ -83,11 +83,11 @@ int prop_descriptor_init(struct prop_descriptor *pd, int shift)
 	pd->index = 0;
 	pd->pg[0].shift = shift;
 	mutex_init(&pd->mutex);
-	err = percpu_counter_init(&pd->pg[0].events, 0);
+	err = percpu_counter_init(&pd->pg[0].events, 0, GFP_KERNEL);
 	if (err)
 		goto out;
 
-	err = percpu_counter_init(&pd->pg[1].events, 0);
+	err = percpu_counter_init(&pd->pg[1].events, 0, GFP_KERNEL);
 	if (err)
 		percpu_counter_destroy(&pd->pg[0].events);
 
@@ -193,7 +193,7 @@ int prop_local_init_percpu(struct prop_local_percpu *pl)
 	raw_spin_lock_init(&pl->lock);
 	pl->shift = 0;
 	pl->period = 0;
-	return percpu_counter_init(&pl->events, 0);
+	return percpu_counter_init(&pl->events, 0, GFP_KERNEL);
 }
 
 void prop_local_destroy_percpu(struct prop_local_percpu *pl)
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 1706cbbdf5f0..f19a818be2d3 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -455,7 +455,7 @@ int bdi_init(struct backing_dev_info *bdi)
 	bdi_wb_init(&bdi->wb, bdi);
 
 	for (i = 0; i < NR_BDI_STAT_ITEMS; i++) {
-		err = percpu_counter_init(&bdi->bdi_stat[i], 0);
+		err = percpu_counter_init(&bdi->bdi_stat[i], 0, GFP_KERNEL);
 		if (err)
 			goto err;
 	}
diff --git a/mm/mmap.c b/mm/mmap.c
index c1f2ea4a0b99..d7ec93e25fa1 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -3196,7 +3196,7 @@ void __init mmap_init(void)
 {
 	int ret;
 
-	ret = percpu_counter_init(&vm_committed_as, 0);
+	ret = percpu_counter_init(&vm_committed_as, 0, GFP_KERNEL);
 	VM_BUG_ON(ret);
 }
 
diff --git a/mm/nommu.c b/mm/nommu.c
index a881d9673c6b..bd1808e194a7 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -539,7 +539,7 @@ void __init mmap_init(void)
 {
 	int ret;
 
-	ret = percpu_counter_init(&vm_committed_as, 0);
+	ret = percpu_counter_init(&vm_committed_as, 0, GFP_KERNEL);
 	VM_BUG_ON(ret);
 	vm_region_jar = KMEM_CACHE(vm_region, SLAB_PANIC);
 }
diff --git a/mm/shmem.c b/mm/shmem.c
index 0e5fb225007c..d4bc55d3f107 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2993,7 +2993,7 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent)
 #endif
 
 	spin_lock_init(&sbinfo->stat_lock);
-	if (percpu_counter_init(&sbinfo->used_blocks, 0))
+	if (percpu_counter_init(&sbinfo->used_blocks, 0, GFP_KERNEL))
 		goto failed;
 	sbinfo->free_inodes = sbinfo->max_inodes;
 
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index de2c1e719305..e421eddf67b4 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -1115,7 +1115,7 @@ static int __init dccp_init(void)
 
 	BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
 		     FIELD_SIZEOF(struct sk_buff, cb));
-	rc = percpu_counter_init(&dccp_orphan_count, 0);
+	rc = percpu_counter_init(&dccp_orphan_count, 0, GFP_KERNEL);
 	if (rc)
 		goto out_fail;
 	rc = -ENOBUFS;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 541f26a67ba2..d59c2604c247 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3188,8 +3188,8 @@ void __init tcp_init(void)
 
 	BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb));
 
-	percpu_counter_init(&tcp_sockets_allocated, 0);
-	percpu_counter_init(&tcp_orphan_count, 0);
+	percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL);
+	percpu_counter_init(&tcp_orphan_count, 0, GFP_KERNEL);
 	tcp_hashinfo.bind_bucket_cachep =
 		kmem_cache_create("tcp_bind_bucket",
 				  sizeof(struct inet_bind_bucket), 0,
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c
index 3af522622fad..1d191357bf88 100644
--- a/net/ipv4/tcp_memcontrol.c
+++ b/net/ipv4/tcp_memcontrol.c
@@ -32,7 +32,7 @@ int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
 		res_parent = &parent_cg->memory_allocated;
 
 	res_counter_init(&cg_proto->memory_allocated, res_parent);
-	percpu_counter_init(&cg_proto->sockets_allocated, 0);
+	percpu_counter_init(&cg_proto->sockets_allocated, 0, GFP_KERNEL);
 
 	return 0;
 }
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 6240834f4b95..f00a85a3fddd 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1341,7 +1341,7 @@ static __init int sctp_init(void)
 	if (!sctp_chunk_cachep)
 		goto err_chunk_cachep;
 
-	status = percpu_counter_init(&sctp_sockets_allocated, 0);
+	status = percpu_counter_init(&sctp_sockets_allocated, 0, GFP_KERNEL);
 	if (status)
 		goto err_percpu_counter_init;
 
-- 
cgit v1.2.3


From 20ae00792c6f1f18fc4fc5965445a145df92827e Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 8 Sep 2014 09:51:30 +0900
Subject: proportions: add @gfp to init functions

Percpu allocator now supports allocation mask.  Add @gfp to
[flex_]proportions init functions so that !GFP_KERNEL allocation masks
can be used with them too.

This patch doesn't make any functional difference.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Peter Zijlstra <peterz@infradead.org>
---
 include/linux/flex_proportions.h |  5 +++--
 include/linux/proportions.h      |  5 +++--
 lib/flex_proportions.c           |  8 ++++----
 lib/proportions.c                | 10 +++++-----
 mm/backing-dev.c                 |  2 +-
 mm/page-writeback.c              |  2 +-
 6 files changed, 17 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/flex_proportions.h b/include/linux/flex_proportions.h
index 4ebc49fae391..0d348e011a6e 100644
--- a/include/linux/flex_proportions.h
+++ b/include/linux/flex_proportions.h
@@ -10,6 +10,7 @@
 #include <linux/percpu_counter.h>
 #include <linux/spinlock.h>
 #include <linux/seqlock.h>
+#include <linux/gfp.h>
 
 /*
  * When maximum proportion of some event type is specified, this is the
@@ -32,7 +33,7 @@ struct fprop_global {
 	seqcount_t sequence;
 };
 
-int fprop_global_init(struct fprop_global *p);
+int fprop_global_init(struct fprop_global *p, gfp_t gfp);
 void fprop_global_destroy(struct fprop_global *p);
 bool fprop_new_period(struct fprop_global *p, int periods);
 
@@ -79,7 +80,7 @@ struct fprop_local_percpu {
 	raw_spinlock_t lock;	/* Protect period and numerator */
 };
 
-int fprop_local_init_percpu(struct fprop_local_percpu *pl);
+int fprop_local_init_percpu(struct fprop_local_percpu *pl, gfp_t gfp);
 void fprop_local_destroy_percpu(struct fprop_local_percpu *pl);
 void __fprop_inc_percpu(struct fprop_global *p, struct fprop_local_percpu *pl);
 void __fprop_inc_percpu_max(struct fprop_global *p, struct fprop_local_percpu *pl,
diff --git a/include/linux/proportions.h b/include/linux/proportions.h
index 26a8a4ed9b07..00e8e8fa7358 100644
--- a/include/linux/proportions.h
+++ b/include/linux/proportions.h
@@ -12,6 +12,7 @@
 #include <linux/percpu_counter.h>
 #include <linux/spinlock.h>
 #include <linux/mutex.h>
+#include <linux/gfp.h>
 
 struct prop_global {
 	/*
@@ -40,7 +41,7 @@ struct prop_descriptor {
 	struct mutex mutex;		/* serialize the prop_global switch */
 };
 
-int prop_descriptor_init(struct prop_descriptor *pd, int shift);
+int prop_descriptor_init(struct prop_descriptor *pd, int shift, gfp_t gfp);
 void prop_change_shift(struct prop_descriptor *pd, int new_shift);
 
 /*
@@ -61,7 +62,7 @@ struct prop_local_percpu {
 	raw_spinlock_t lock;		/* protect the snapshot state */
 };
 
-int prop_local_init_percpu(struct prop_local_percpu *pl);
+int prop_local_init_percpu(struct prop_local_percpu *pl, gfp_t gfp);
 void prop_local_destroy_percpu(struct prop_local_percpu *pl);
 void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl);
 void prop_fraction_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl,
diff --git a/lib/flex_proportions.c b/lib/flex_proportions.c
index b9d026bfcf38..8f25652f40d4 100644
--- a/lib/flex_proportions.c
+++ b/lib/flex_proportions.c
@@ -34,13 +34,13 @@
  */
 #include <linux/flex_proportions.h>
 
-int fprop_global_init(struct fprop_global *p)
+int fprop_global_init(struct fprop_global *p, gfp_t gfp)
 {
 	int err;
 
 	p->period = 0;
 	/* Use 1 to avoid dealing with periods with 0 events... */
-	err = percpu_counter_init(&p->events, 1, GFP_KERNEL);
+	err = percpu_counter_init(&p->events, 1, gfp);
 	if (err)
 		return err;
 	seqcount_init(&p->sequence);
@@ -168,11 +168,11 @@ void fprop_fraction_single(struct fprop_global *p,
  */
 #define PROP_BATCH (8*(1+ilog2(nr_cpu_ids)))
 
-int fprop_local_init_percpu(struct fprop_local_percpu *pl)
+int fprop_local_init_percpu(struct fprop_local_percpu *pl, gfp_t gfp)
 {
 	int err;
 
-	err = percpu_counter_init(&pl->events, 0, GFP_KERNEL);
+	err = percpu_counter_init(&pl->events, 0, gfp);
 	if (err)
 		return err;
 	pl->period = 0;
diff --git a/lib/proportions.c b/lib/proportions.c
index ca95f8d54384..6f724298f67a 100644
--- a/lib/proportions.c
+++ b/lib/proportions.c
@@ -73,7 +73,7 @@
 #include <linux/proportions.h>
 #include <linux/rcupdate.h>
 
-int prop_descriptor_init(struct prop_descriptor *pd, int shift)
+int prop_descriptor_init(struct prop_descriptor *pd, int shift, gfp_t gfp)
 {
 	int err;
 
@@ -83,11 +83,11 @@ int prop_descriptor_init(struct prop_descriptor *pd, int shift)
 	pd->index = 0;
 	pd->pg[0].shift = shift;
 	mutex_init(&pd->mutex);
-	err = percpu_counter_init(&pd->pg[0].events, 0, GFP_KERNEL);
+	err = percpu_counter_init(&pd->pg[0].events, 0, gfp);
 	if (err)
 		goto out;
 
-	err = percpu_counter_init(&pd->pg[1].events, 0, GFP_KERNEL);
+	err = percpu_counter_init(&pd->pg[1].events, 0, gfp);
 	if (err)
 		percpu_counter_destroy(&pd->pg[0].events);
 
@@ -188,12 +188,12 @@ prop_adjust_shift(int *pl_shift, unsigned long *pl_period, int new_shift)
 
 #define PROP_BATCH (8*(1+ilog2(nr_cpu_ids)))
 
-int prop_local_init_percpu(struct prop_local_percpu *pl)
+int prop_local_init_percpu(struct prop_local_percpu *pl, gfp_t gfp)
 {
 	raw_spin_lock_init(&pl->lock);
 	pl->shift = 0;
 	pl->period = 0;
-	return percpu_counter_init(&pl->events, 0, GFP_KERNEL);
+	return percpu_counter_init(&pl->events, 0, gfp);
 }
 
 void prop_local_destroy_percpu(struct prop_local_percpu *pl)
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index f19a818be2d3..64ec49d1772b 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -470,7 +470,7 @@ int bdi_init(struct backing_dev_info *bdi)
 	bdi->write_bandwidth = INIT_BW;
 	bdi->avg_write_bandwidth = INIT_BW;
 
-	err = fprop_local_init_percpu(&bdi->completions);
+	err = fprop_local_init_percpu(&bdi->completions, GFP_KERNEL);
 
 	if (err) {
 err:
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 91d73ef1744d..508599403721 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1777,7 +1777,7 @@ void __init page_writeback_init(void)
 	writeback_set_ratelimit();
 	register_cpu_notifier(&ratelimit_nb);
 
-	fprop_global_init(&writeout_completions);
+	fprop_global_init(&writeout_completions, GFP_KERNEL);
 }
 
 /**
-- 
cgit v1.2.3


From a34375ef9e65340a138fc0be287de5c940d260fc Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 8 Sep 2014 09:51:30 +0900
Subject: percpu-refcount: add @gfp to percpu_ref_init()

Percpu allocator now supports allocation mask.  Add @gfp to
percpu_ref_init() so that !GFP_KERNEL allocation masks can be used
with percpu_refs too.

This patch doesn't make any functional difference.

v2: blk-mq conversion was missing.  Updated.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Kent Overstreet <koverstreet@google.com>
Cc: Benjamin LaHaise <bcrl@kvack.org>
Cc: Li Zefan <lizefan@huawei.com>
Cc: Nicholas A. Bellinger <nab@linux-iscsi.org>
Cc: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c                   | 3 ++-
 drivers/target/target_core_tpg.c | 3 ++-
 fs/aio.c                         | 4 ++--
 include/linux/percpu-refcount.h  | 3 ++-
 kernel/cgroup.c                  | 6 +++---
 lib/percpu-refcount.c            | 6 ++++--
 6 files changed, 15 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 5189cb1e478a..702df07b980d 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1776,7 +1776,8 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
 	if (!q)
 		goto err_hctxs;
 
-	if (percpu_ref_init(&q->mq_usage_counter, blk_mq_usage_counter_release))
+	if (percpu_ref_init(&q->mq_usage_counter, blk_mq_usage_counter_release,
+			    GFP_KERNEL))
 		goto err_map;
 
 	setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q);
diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c
index fddfae61222f..4ab6da338585 100644
--- a/drivers/target/target_core_tpg.c
+++ b/drivers/target/target_core_tpg.c
@@ -819,7 +819,8 @@ int core_tpg_add_lun(
 {
 	int ret;
 
-	ret = percpu_ref_init(&lun->lun_ref, core_tpg_lun_ref_release);
+	ret = percpu_ref_init(&lun->lun_ref, core_tpg_lun_ref_release,
+			      GFP_KERNEL);
 	if (ret < 0)
 		return ret;
 
diff --git a/fs/aio.c b/fs/aio.c
index bd7ec2cc2674..93fbcc0f5696 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -666,10 +666,10 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
 
 	INIT_LIST_HEAD(&ctx->active_reqs);
 
-	if (percpu_ref_init(&ctx->users, free_ioctx_users))
+	if (percpu_ref_init(&ctx->users, free_ioctx_users, GFP_KERNEL))
 		goto err;
 
-	if (percpu_ref_init(&ctx->reqs, free_ioctx_reqs))
+	if (percpu_ref_init(&ctx->reqs, free_ioctx_reqs, GFP_KERNEL))
 		goto err;
 
 	ctx->cpu = alloc_percpu(struct kioctx_cpu);
diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index 3dfbf237cd8f..ee8325122dbd 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -49,6 +49,7 @@
 #include <linux/kernel.h>
 #include <linux/percpu.h>
 #include <linux/rcupdate.h>
+#include <linux/gfp.h>
 
 struct percpu_ref;
 typedef void (percpu_ref_func_t)(struct percpu_ref *);
@@ -66,7 +67,7 @@ struct percpu_ref {
 };
 
 int __must_check percpu_ref_init(struct percpu_ref *ref,
-				 percpu_ref_func_t *release);
+				 percpu_ref_func_t *release, gfp_t gfp);
 void percpu_ref_reinit(struct percpu_ref *ref);
 void percpu_ref_exit(struct percpu_ref *ref);
 void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 7dc8788cfd52..589b4d89a0a5 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1628,7 +1628,7 @@ static int cgroup_setup_root(struct cgroup_root *root, unsigned int ss_mask)
 		goto out;
 	root_cgrp->id = ret;
 
-	ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release);
+	ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release, GFP_KERNEL);
 	if (ret)
 		goto out;
 
@@ -4487,7 +4487,7 @@ static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss,
 
 	init_and_link_css(css, ss, cgrp);
 
-	err = percpu_ref_init(&css->refcnt, css_release);
+	err = percpu_ref_init(&css->refcnt, css_release, GFP_KERNEL);
 	if (err)
 		goto err_free_css;
 
@@ -4555,7 +4555,7 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
 		goto out_unlock;
 	}
 
-	ret = percpu_ref_init(&cgrp->self.refcnt, css_release);
+	ret = percpu_ref_init(&cgrp->self.refcnt, css_release, GFP_KERNEL);
 	if (ret)
 		goto out_free_cgrp;
 
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index fe5a3342e960..ff9903264a91 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -40,6 +40,7 @@ static unsigned __percpu *pcpu_count_ptr(struct percpu_ref *ref)
  * percpu_ref_init - initialize a percpu refcount
  * @ref: percpu_ref to initialize
  * @release: function which will be called when refcount hits 0
+ * @gfp: allocation mask to use
  *
  * Initializes the refcount in single atomic counter mode with a refcount of 1;
  * analagous to atomic_set(ref, 1).
@@ -47,11 +48,12 @@ static unsigned __percpu *pcpu_count_ptr(struct percpu_ref *ref)
  * Note that @release must not sleep - it may potentially be called from RCU
  * callback context by percpu_ref_kill().
  */
-int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release)
+int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release,
+		    gfp_t gfp)
 {
 	atomic_set(&ref->count, 1 + PCPU_COUNT_BIAS);
 
-	ref->pcpu_count_ptr = (unsigned long)alloc_percpu(unsigned);
+	ref->pcpu_count_ptr = (unsigned long)alloc_percpu_gfp(unsigned, gfp);
 	if (!ref->pcpu_count_ptr)
 		return -ENOMEM;
 
-- 
cgit v1.2.3


From e78c3496790ee8a36522a838b59b388e8a709e65 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Sat, 16 Aug 2014 13:40:10 -0400
Subject: time, signal: Protect resource use statistics with seqlock

Both times() and clock_gettime(CLOCK_PROCESS_CPUTIME_ID) have scalability
issues on large systems, due to both functions being serialized with a
lock.

The lock protects against reporting a wrong value, due to a thread in the
task group exiting, its statistics reporting up to the signal struct, and
that exited task's statistics being counted twice (or not at all).

Protecting that with a lock results in times() and clock_gettime() being
completely serialized on large systems.

This can be fixed by using a seqlock around the events that gather and
propagate statistics. As an additional benefit, the protection code can
be moved into thread_group_cputime(), slightly simplifying the calling
functions.

In the case of posix_cpu_clock_get_task() things can be simplified a
lot, because the calling function already ensures that the task sticks
around, and the rest is now taken care of in thread_group_cputime().

This way the statistics reporting code can run lockless.

Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alex Thorlton <athorlton@sgi.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Daeseok Youn <daeseok.youn@gmail.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Dongsheng Yang <yangds.fnst@cn.fujitsu.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Guillaume Morin <guillaume@morinfr.org>
Cc: Ionut Alexa <ionut.m.alexa@gmail.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Li Zefan <lizefan@huawei.com>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Michal Schmidt <mschmidt@redhat.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Vladimir Davydov <vdavydov@parallels.com>
Cc: umgwanakikbuti@gmail.com
Cc: fweisbec@gmail.com
Cc: srao@redhat.com
Cc: lwoodman@redhat.com
Cc: atheurer@redhat.com
Link: http://lkml.kernel.org/r/20140816134010.26a9b572@annuminas.surriel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h          |  1 +
 kernel/exit.c                  |  4 ++++
 kernel/fork.c                  |  1 +
 kernel/sched/cputime.c         | 33 ++++++++++++++++++++-------------
 kernel/sys.c                   |  2 --
 kernel/time/posix-cpu-timers.c | 14 --------------
 6 files changed, 26 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5c2c885ee52b..dd9eb4807389 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -645,6 +645,7 @@ struct signal_struct {
 	 * Live threads maintain their own counters and add to these
 	 * in __exit_signal, except for the group leader.
 	 */
+	seqlock_t stats_lock;
 	cputime_t utime, stime, cutime, cstime;
 	cputime_t gtime;
 	cputime_t cgtime;
diff --git a/kernel/exit.c b/kernel/exit.c
index b93d46dab6fc..fa09b86609db 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -127,6 +127,7 @@ static void __exit_signal(struct task_struct *tsk)
 	 * the signal_struct.
 	 */
 	task_cputime(tsk, &utime, &stime);
+	write_seqlock(&sig->stats_lock);
 	sig->utime += utime;
 	sig->stime += stime;
 	sig->gtime += task_gtime(tsk);
@@ -140,6 +141,7 @@ static void __exit_signal(struct task_struct *tsk)
 	sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
 	sig->nr_threads--;
 	__unhash_process(tsk, group_dead);
+	write_sequnlock(&sig->stats_lock);
 
 	/*
 	 * Do this under ->siglock, we can race with another thread
@@ -1042,6 +1044,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
 		spin_lock_irq(&p->real_parent->sighand->siglock);
 		psig = p->real_parent->signal;
 		sig = p->signal;
+		write_seqlock(&psig->stats_lock);
 		psig->cutime += tgutime + sig->cutime;
 		psig->cstime += tgstime + sig->cstime;
 		psig->cgtime += task_gtime(p) + sig->gtime + sig->cgtime;
@@ -1064,6 +1067,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
 			psig->cmaxrss = maxrss;
 		task_io_accounting_add(&psig->ioac, &p->ioac);
 		task_io_accounting_add(&psig->ioac, &sig->ioac);
+		write_sequnlock(&psig->stats_lock);
 		spin_unlock_irq(&p->real_parent->sighand->siglock);
 	}
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 0cf9cdb6e491..9387ae8ab048 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1068,6 +1068,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 	sig->curr_target = tsk;
 	init_sigpending(&sig->shared_pending);
 	INIT_LIST_HEAD(&sig->posix_timers);
+	seqlock_init(&sig->stats_lock);
 
 	hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	sig->real_timer.function = it_real_fn;
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 3e52836359ba..49b7cfe98f7a 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -288,18 +288,28 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
 	struct signal_struct *sig = tsk->signal;
 	cputime_t utime, stime;
 	struct task_struct *t;
-
-	times->utime = sig->utime;
-	times->stime = sig->stime;
-	times->sum_exec_runtime = sig->sum_sched_runtime;
+	unsigned int seq, nextseq;
 
 	rcu_read_lock();
-	for_each_thread(tsk, t) {
-		task_cputime(t, &utime, &stime);
-		times->utime += utime;
-		times->stime += stime;
-		times->sum_exec_runtime += task_sched_runtime(t);
-	}
+	/* Attempt a lockless read on the first round. */
+	nextseq = 0;
+	do {
+		seq = nextseq;
+		read_seqbegin_or_lock(&sig->stats_lock, &seq);
+		times->utime = sig->utime;
+		times->stime = sig->stime;
+		times->sum_exec_runtime = sig->sum_sched_runtime;
+
+		for_each_thread(tsk, t) {
+			task_cputime(t, &utime, &stime);
+			times->utime += utime;
+			times->stime += stime;
+			times->sum_exec_runtime += task_sched_runtime(t);
+		}
+		/* If lockless access failed, take the lock. */
+		nextseq = 1;
+	} while (need_seqretry(&sig->stats_lock, seq));
+	done_seqretry(&sig->stats_lock, seq);
 	rcu_read_unlock();
 }
 
@@ -611,9 +621,6 @@ void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
 	cputime_adjust(&cputime, &p->prev_cputime, ut, st);
 }
 
-/*
- * Must be called with siglock held.
- */
 void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
 {
 	struct task_cputime cputime;
diff --git a/kernel/sys.c b/kernel/sys.c
index ce8129192a26..b6636643cbd1 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -862,11 +862,9 @@ void do_sys_times(struct tms *tms)
 {
 	cputime_t tgutime, tgstime, cutime, cstime;
 
-	spin_lock_irq(&current->sighand->siglock);
 	thread_group_cputime_adjusted(current, &tgutime, &tgstime);
 	cutime = current->signal->cutime;
 	cstime = current->signal->cstime;
-	spin_unlock_irq(&current->sighand->siglock);
 	tms->tms_utime = cputime_to_clock_t(tgutime);
 	tms->tms_stime = cputime_to_clock_t(tgstime);
 	tms->tms_cutime = cputime_to_clock_t(cutime);
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 3b8946416a5f..492b986195d5 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -272,22 +272,8 @@ static int posix_cpu_clock_get_task(struct task_struct *tsk,
 		if (same_thread_group(tsk, current))
 			err = cpu_clock_sample(which_clock, tsk, &rtn);
 	} else {
-		unsigned long flags;
-		struct sighand_struct *sighand;
-
-		/*
-		 * while_each_thread() is not yet entirely RCU safe,
-		 * keep locking the group while sampling process
-		 * clock for now.
-		 */
-		sighand = lock_task_sighand(tsk, &flags);
-		if (!sighand)
-			return err;
-
 		if (tsk == current || thread_group_leader(tsk))
 			err = cpu_clock_sample_group(which_clock, tsk, &rtn);
-
-		unlock_task_sighand(tsk, &flags);
 	}
 
 	if (!err)
-- 
cgit v1.2.3


From 0b750b3baa2d64f1b77aecc10f20deeb28efe60d Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Fri, 5 Sep 2014 18:08:47 +0200
Subject: HID: usbhid: add always-poll quirk

Add quirk to make sure that a device is always polled for input events
even if it hasn't been opened.

This is needed for devices that disconnects from the bus unless the
interrupt endpoint has been polled at least once or when not responding
to an input event (e.g. after having shut down X).

Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/usbhid/hid-core.c | 26 +++++++++++++++++++++++---
 include/linux/hid.h           |  1 +
 2 files changed, 24 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c
index 79cf503e37bf..ddd547ad6d7e 100644
--- a/drivers/hid/usbhid/hid-core.c
+++ b/drivers/hid/usbhid/hid-core.c
@@ -82,7 +82,7 @@ static int hid_start_in(struct hid_device *hid)
 	struct usbhid_device *usbhid = hid->driver_data;
 
 	spin_lock_irqsave(&usbhid->lock, flags);
-	if (hid->open > 0 &&
+	if ((hid->open > 0 || hid->quirks & HID_QUIRK_ALWAYS_POLL) &&
 			!test_bit(HID_DISCONNECTED, &usbhid->iofl) &&
 			!test_bit(HID_SUSPENDED, &usbhid->iofl) &&
 			!test_and_set_bit(HID_IN_RUNNING, &usbhid->iofl)) {
@@ -292,6 +292,8 @@ static void hid_irq_in(struct urb *urb)
 	case 0:			/* success */
 		usbhid_mark_busy(usbhid);
 		usbhid->retry_delay = 0;
+		if ((hid->quirks & HID_QUIRK_ALWAYS_POLL) && !hid->open)
+			break;
 		hid_input_report(urb->context, HID_INPUT_REPORT,
 				 urb->transfer_buffer,
 				 urb->actual_length, 1);
@@ -735,8 +737,10 @@ void usbhid_close(struct hid_device *hid)
 	if (!--hid->open) {
 		spin_unlock_irq(&usbhid->lock);
 		hid_cancel_delayed_stuff(usbhid);
-		usb_kill_urb(usbhid->urbin);
-		usbhid->intf->needs_remote_wakeup = 0;
+		if (!(hid->quirks & HID_QUIRK_ALWAYS_POLL)) {
+			usb_kill_urb(usbhid->urbin);
+			usbhid->intf->needs_remote_wakeup = 0;
+		}
 	} else {
 		spin_unlock_irq(&usbhid->lock);
 	}
@@ -1134,6 +1138,19 @@ static int usbhid_start(struct hid_device *hid)
 
 	set_bit(HID_STARTED, &usbhid->iofl);
 
+	if (hid->quirks & HID_QUIRK_ALWAYS_POLL) {
+		ret = usb_autopm_get_interface(usbhid->intf);
+		if (ret)
+			goto fail;
+		usbhid->intf->needs_remote_wakeup = 1;
+		ret = hid_start_in(hid);
+		if (ret) {
+			dev_err(&hid->dev,
+				"failed to start in urb: %d\n", ret);
+		}
+		usb_autopm_put_interface(usbhid->intf);
+	}
+
 	/* Some keyboards don't work until their LEDs have been set.
 	 * Since BIOSes do set the LEDs, it must be safe for any device
 	 * that supports the keyboard boot protocol.
@@ -1166,6 +1183,9 @@ static void usbhid_stop(struct hid_device *hid)
 	if (WARN_ON(!usbhid))
 		return;
 
+	if (hid->quirks & HID_QUIRK_ALWAYS_POLL)
+		usbhid->intf->needs_remote_wakeup = 0;
+
 	clear_bit(HID_STARTED, &usbhid->iofl);
 	spin_lock_irq(&usbhid->lock);	/* Sync with error and led handlers */
 	set_bit(HID_DISCONNECTED, &usbhid->iofl);
diff --git a/include/linux/hid.h b/include/linux/hid.h
index f53c4a9cca1d..26ee25fced27 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -287,6 +287,7 @@ struct hid_item {
 #define HID_QUIRK_HIDINPUT_FORCE		0x00000080
 #define HID_QUIRK_NO_EMPTY_INPUT		0x00000100
 #define HID_QUIRK_NO_INIT_INPUT_REPORTS		0x00000200
+#define HID_QUIRK_ALWAYS_POLL			0x00000400
 #define HID_QUIRK_SKIP_OUTPUT_REPORTS		0x00010000
 #define HID_QUIRK_SKIP_OUTPUT_REPORT_ID		0x00020000
 #define HID_QUIRK_NO_OUTPUT_REPORTS_ON_INTR_EP	0x00040000
-- 
cgit v1.2.3


From c8d6591752e96c550cb98b781326d72d8eedcc79 Mon Sep 17 00:00:00 2001
From: "Steinar H. Gunderson" <sgunderson@bigfoot.com>
Date: Wed, 3 Sep 2014 06:48:37 -0700
Subject: mac80211: support DTPC IE (from Cisco Client eXtensions)

Linux already supports 802.11h, where the access point can tell the
client to reduce its transmission power. However, 802.11h is only
defined for 5 GHz, where the need for this is much smaller than on
2.4 GHz.

Cisco has their own solution, called DTPC (Dynamic Transmit Power
Control). Cisco APs on a controller sometimes but not always send
802.11h; they always send DTPC, even on 2.4 GHz. This patch adds support
for parsing and honoring the DTPC IE in addition to the 802.11h
element (they do not always contain the same limits, so both must
be honored); the format is not documented, but very simple.

Tested (on top of wireless.git and on 3.16.1) against a Cisco Aironet
1142 joined to a Cisco 2504 WLC, by setting various transmit power
levels for the given access points and observing the results.
The Wireshark 802.11 dissector agrees with the interpretation of the
element, except for negative numbers, which seem to never happen
anyway.

Signed-off-by: Steinar H. Gunderson <sgunderson@bigfoot.com>
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
---
 include/linux/ieee80211.h  |  3 ++-
 net/mac80211/ieee80211_i.h |  1 +
 net/mac80211/mlme.c        | 60 +++++++++++++++++++++++++++++++++++++---------
 net/mac80211/util.c        | 25 +++++++++++++++++++
 4 files changed, 77 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 77d4f26e0235..61a09f0644aa 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1824,7 +1824,8 @@ enum ieee80211_eid {
 	WLAN_EID_DMG_TSPEC = 146,
 	WLAN_EID_DMG_AT = 147,
 	WLAN_EID_DMG_CAP = 148,
-	/* 149-150 reserved for Cisco */
+	/* 149 reserved for Cisco */
+	WLAN_EID_CISCO_VENDOR_SPECIFIC = 150,
 	WLAN_EID_DMG_OPERATION = 151,
 	WLAN_EID_DMG_BSS_PARAM_CHANGE = 152,
 	WLAN_EID_DMG_BEAM_REFINEMENT = 153,
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 6b7bdd8fae29..c2aaec4dfcf0 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1369,6 +1369,7 @@ struct ieee802_11_elems {
 	const struct ieee80211_wide_bw_chansw_ie *wide_bw_chansw_ie;
 	const u8 *country_elem;
 	const u8 *pwr_constr_elem;
+	const u8 *cisco_dtpc_elem;
 	const struct ieee80211_timeout_interval_ie *timeout_int;
 	const u8 *opmode_notif;
 	const struct ieee80211_sec_chan_offs_ie *sec_chan_offs;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 0b812a88f95f..a7b92f5f7161 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1230,14 +1230,30 @@ ieee80211_find_80211h_pwr_constr(struct ieee80211_sub_if_data *sdata,
 	return have_chan_pwr;
 }
 
+static void ieee80211_find_cisco_dtpc(struct ieee80211_sub_if_data *sdata,
+				      struct ieee80211_channel *channel,
+				      const u8 *cisco_dtpc_ie,
+				      int *pwr_level)
+{
+	/* From practical testing, the first data byte of the DTPC element
+	 * seems to contain the requested dBm level, and the CLI on Cisco
+	 * APs clearly state the range is -127 to 127 dBm, which indicates
+	 * a signed byte, although it seemingly never actually goes negative.
+	 * The other byte seems to always be zero.
+	 */
+	*pwr_level = (__s8)cisco_dtpc_ie[4];
+}
+
 static u32 ieee80211_handle_pwr_constr(struct ieee80211_sub_if_data *sdata,
 				       struct ieee80211_channel *channel,
 				       struct ieee80211_mgmt *mgmt,
 				       const u8 *country_ie, u8 country_ie_len,
-				       const u8 *pwr_constr_ie)
+				       const u8 *pwr_constr_ie,
+				       const u8 *cisco_dtpc_ie)
 {
-	bool has_80211h_pwr = false;
-	int chan_pwr, pwr_reduction_80211h;
+	bool has_80211h_pwr = false, has_cisco_pwr = false;
+	int chan_pwr = 0, pwr_reduction_80211h = 0;
+	int pwr_level_cisco, pwr_level_80211h;
 	int new_ap_level;
 
 	if (country_ie && pwr_constr_ie &&
@@ -1246,16 +1262,35 @@ static u32 ieee80211_handle_pwr_constr(struct ieee80211_sub_if_data *sdata,
 		has_80211h_pwr = ieee80211_find_80211h_pwr_constr(
 			sdata, channel, country_ie, country_ie_len,
 			pwr_constr_ie, &chan_pwr, &pwr_reduction_80211h);
-		new_ap_level = max_t(int, 0, chan_pwr - pwr_reduction_80211h);
+		pwr_level_80211h =
+			max_t(int, 0, chan_pwr - pwr_reduction_80211h);
+	}
+
+	if (cisco_dtpc_ie) {
+		ieee80211_find_cisco_dtpc(
+			sdata, channel, cisco_dtpc_ie, &pwr_level_cisco);
+		has_cisco_pwr = true;
 	}
 
-	if (!has_80211h_pwr)
+	if (!has_80211h_pwr && !has_cisco_pwr)
 		return 0;
 
-	sdata_info(sdata,
-		   "Limiting TX power to %d (%d - %d) dBm as advertised by %pM\n",
-		   new_ap_level, chan_pwr, pwr_reduction_80211h,
-		   sdata->u.mgd.bssid);
+	/* If we have both 802.11h and Cisco DTPC, apply both limits
+	 * by picking the smallest of the two power levels advertised.
+	 */
+	if (has_80211h_pwr &&
+	    (!has_cisco_pwr || pwr_level_80211h <= pwr_level_cisco)) {
+		sdata_info(sdata,
+			   "Limiting TX power to %d (%d - %d) dBm as advertised by %pM\n",
+			   pwr_level_80211h, chan_pwr, pwr_reduction_80211h,
+			   sdata->u.mgd.bssid);
+		new_ap_level = pwr_level_80211h;
+	} else {  /* has_cisco_pwr is always true here. */
+		sdata_info(sdata,
+			   "Limiting TX power to %d dBm as advertised by %pM\n",
+			   pwr_level_cisco, sdata->u.mgd.bssid);
+		new_ap_level = pwr_level_cisco;
+	}
 
 	if (sdata->ap_power_level == new_ap_level)
 		return 0;
@@ -2923,7 +2958,9 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata,
 /*
  * This is the canonical list of information elements we care about,
  * the filter code also gives us all changes to the Microsoft OUI
- * (00:50:F2) vendor IE which is used for WMM which we need to track.
+ * (00:50:F2) vendor IE which is used for WMM which we need to track,
+ * as well as the DTPC IE (part of the Cisco OUI) used for signaling
+ * changes to requested client power.
  *
  * We implement beacon filtering in software since that means we can
  * avoid processing the frame here and in cfg80211, and userspace
@@ -3232,7 +3269,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 	changed |= ieee80211_handle_pwr_constr(sdata, chan, mgmt,
 					       elems.country_elem,
 					       elems.country_elem_len,
-					       elems.pwr_constr_elem);
+					       elems.pwr_constr_elem,
+					       elems.cisco_dtpc_elem);
 
 	ieee80211_bss_info_change_notify(sdata, changed);
 }
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index b444f27a788e..3c61060a4d2b 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1015,6 +1015,31 @@ u32 ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
 			}
 			elems->pwr_constr_elem = pos;
 			break;
+		case WLAN_EID_CISCO_VENDOR_SPECIFIC:
+			/* Lots of different options exist, but we only care
+			 * about the Dynamic Transmit Power Control element.
+			 * First check for the Cisco OUI, then for the DTPC
+			 * tag (0x00).
+			 */
+			if (elen < 4) {
+				elem_parse_failed = true;
+				break;
+			}
+
+			if (pos[0] != 0x00 || pos[1] != 0x40 ||
+			    pos[2] != 0x96 || pos[3] != 0x00)
+				break;
+
+			if (elen != 6) {
+				elem_parse_failed = true;
+				break;
+			}
+
+			if (calc_crc)
+				crc = crc32_be(crc, pos - 2, elen + 2);
+
+			elems->cisco_dtpc_elem = pos;
+			break;
 		case WLAN_EID_TIMEOUT_INTERVAL:
 			if (elen >= sizeof(struct ieee80211_timeout_interval_ie))
 				elems->timeout_int = (void *)pos;
-- 
cgit v1.2.3


From 4930d247af29f849cd1bddd65be2400684dc886e Mon Sep 17 00:00:00 2001
From: Gaël PORTAY <gael.portay@gmail.com>
Date: Sat, 6 Sep 2014 19:52:35 +0200
Subject: ARM: at91/tclib: move initialization from alloc to probe
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move resource retrieval from atmel_tc_alloc to tc_probe to avoid lately
reporting resource related issues when a TC block user request a TC block.

Moreover, resources retrieval are usually done in the probe function,
thus moving them add some consistency with other drivers.

Initialization is done once, ie not every time a tc block is requested.
If it fails, the device is not appended to the list of tc blocks.

Furhermore, the device id is retrieved at probe as well, avoiding parsing
DT every time the user requests of tc block.

Signed-off-by: Gaël PORTAY <gael.portay@gmail.com>
Acked-by: Thierry Reding <thierry.reding@gmail.com>
Acked-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
---
 drivers/clocksource/tcb_clksrc.c |  2 +-
 drivers/misc/atmel_tclib.c       | 71 +++++++++++++---------------------------
 drivers/pwm/pwm-atmel-tcb.c      |  2 +-
 include/linux/atmel_tc.h         |  8 +++--
 4 files changed, 29 insertions(+), 54 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/clocksource/tcb_clksrc.c b/drivers/clocksource/tcb_clksrc.c
index a8d7ea14f183..f922e81d531b 100644
--- a/drivers/clocksource/tcb_clksrc.c
+++ b/drivers/clocksource/tcb_clksrc.c
@@ -279,7 +279,7 @@ static int __init tcb_clksrc_init(void)
 	int i;
 	int ret;
 
-	tc = atmel_tc_alloc(CONFIG_ATMEL_TCB_CLKSRC_BLOCK, clksrc.name);
+	tc = atmel_tc_alloc(CONFIG_ATMEL_TCB_CLKSRC_BLOCK);
 	if (!tc) {
 		pr_debug("can't alloc TC for clocksource\n");
 		return -ENODEV;
diff --git a/drivers/misc/atmel_tclib.c b/drivers/misc/atmel_tclib.c
index b514a2d4485b..d505d1e0857b 100644
--- a/drivers/misc/atmel_tclib.c
+++ b/drivers/misc/atmel_tclib.c
@@ -35,60 +35,31 @@ static LIST_HEAD(tc_list);
 /**
  * atmel_tc_alloc - allocate a specified TC block
  * @block: which block to allocate
- * @name: name to be associated with the iomem resource
  *
  * Caller allocates a block.  If it is available, a pointer to a
  * pre-initialized struct atmel_tc is returned. The caller can access
  * the registers directly through the "regs" field.
  */
-struct atmel_tc *atmel_tc_alloc(unsigned block, const char *name)
+struct atmel_tc *atmel_tc_alloc(unsigned block)
 {
 	struct atmel_tc		*tc;
 	struct platform_device	*pdev = NULL;
-	struct resource		*r;
-	size_t			size;
 
 	spin_lock(&tc_list_lock);
 	list_for_each_entry(tc, &tc_list, node) {
-		if (tc->pdev->dev.of_node) {
-			if (of_alias_get_id(tc->pdev->dev.of_node, "tcb")
-					== block) {
-				pdev = tc->pdev;
-				break;
-			}
-		} else if (tc->pdev->id == block) {
+		if (tc->allocated)
+			continue;
+
+		if ((tc->pdev->dev.of_node && tc->id == block) ||
+		    (tc->pdev->id == block)) {
 			pdev = tc->pdev;
+			tc->allocated = true;
 			break;
 		}
 	}
-
-	if (!pdev || tc->iomem)
-		goto fail;
-
-	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!r)
-		goto fail;
-
-	size = resource_size(r);
-	r = request_mem_region(r->start, size, name);
-	if (!r)
-		goto fail;
-
-	tc->regs = ioremap(r->start, size);
-	if (!tc->regs)
-		goto fail_ioremap;
-
-	tc->iomem = r;
-
-out:
 	spin_unlock(&tc_list_lock);
-	return tc;
 
-fail_ioremap:
-	release_mem_region(r->start, size);
-fail:
-	tc = NULL;
-	goto out;
+	return pdev ? tc : NULL;
 }
 EXPORT_SYMBOL_GPL(atmel_tc_alloc);
 
@@ -96,19 +67,14 @@ EXPORT_SYMBOL_GPL(atmel_tc_alloc);
  * atmel_tc_free - release a specified TC block
  * @tc: Timer/counter block that was returned by atmel_tc_alloc()
  *
- * This reverses the effect of atmel_tc_alloc(), unmapping the I/O
- * registers, invalidating the resource returned by that routine and
- * making the TC available to other drivers.
+ * This reverses the effect of atmel_tc_alloc(), invalidating the resource
+ * returned by that routine and making the TC available to other drivers.
  */
 void atmel_tc_free(struct atmel_tc *tc)
 {
 	spin_lock(&tc_list_lock);
-	if (tc->regs) {
-		iounmap(tc->regs);
-		release_mem_region(tc->iomem->start, resource_size(tc->iomem));
-		tc->regs = NULL;
-		tc->iomem = NULL;
-	}
+	if (tc->allocated)
+		tc->allocated = false;
 	spin_unlock(&tc_list_lock);
 }
 EXPORT_SYMBOL_GPL(atmel_tc_free);
@@ -142,9 +108,7 @@ static int __init tc_probe(struct platform_device *pdev)
 	struct atmel_tc *tc;
 	struct clk	*clk;
 	int		irq;
-
-	if (!platform_get_resource(pdev, IORESOURCE_MEM, 0))
-		return -EINVAL;
+	struct resource	*r;
 
 	irq = platform_get_irq(pdev, 0);
 	if (irq < 0)
@@ -160,12 +124,21 @@ static int __init tc_probe(struct platform_device *pdev)
 	if (IS_ERR(clk))
 		return PTR_ERR(clk);
 
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	tc->regs = devm_ioremap_resource(&pdev->dev, r);
+	if (IS_ERR(tc->regs))
+		return PTR_ERR(tc->regs);
+
 	/* Now take SoC information if available */
 	if (pdev->dev.of_node) {
 		const struct of_device_id *match;
 		match = of_match_node(atmel_tcb_dt_ids, pdev->dev.of_node);
 		if (match)
 			tc->tcb_config = match->data;
+
+		tc->id = of_alias_get_id(tc->pdev->dev.of_node, "tcb");
+	} else {
+		tc->id = pdev->id;
 	}
 
 	tc->clk[0] = clk;
diff --git a/drivers/pwm/pwm-atmel-tcb.c b/drivers/pwm/pwm-atmel-tcb.c
index f3dcd02390f1..d56e5b717431 100644
--- a/drivers/pwm/pwm-atmel-tcb.c
+++ b/drivers/pwm/pwm-atmel-tcb.c
@@ -379,7 +379,7 @@ static int atmel_tcb_pwm_probe(struct platform_device *pdev)
 		return err;
 	}
 
-	tc = atmel_tc_alloc(tcblock, "tcb-pwm");
+	tc = atmel_tc_alloc(tcblock);
 	if (tc == NULL) {
 		dev_err(&pdev->dev, "failed to allocate Timer Counter Block\n");
 		return -ENOMEM;
diff --git a/include/linux/atmel_tc.h b/include/linux/atmel_tc.h
index 89a931babecf..d8aa88461c64 100644
--- a/include/linux/atmel_tc.h
+++ b/include/linux/atmel_tc.h
@@ -44,12 +44,13 @@ struct atmel_tcb_config {
 /**
  * struct atmel_tc - information about a Timer/Counter Block
  * @pdev: physical device
- * @iomem: resource associated with the I/O register
  * @regs: mapping through which the I/O registers can be accessed
+ * @id: block id
  * @tcb_config: configuration data from SoC
  * @irq: irq for each of the three channels
  * @clk: internal clock source for each of the three channels
  * @node: list node, for tclib internal use
+ * @allocated: if already used, for tclib internal use
  *
  * On some platforms, each TC channel has its own clocks and IRQs,
  * while on others, all TC channels share the same clock and IRQ.
@@ -61,15 +62,16 @@ struct atmel_tcb_config {
  */
 struct atmel_tc {
 	struct platform_device	*pdev;
-	struct resource		*iomem;
 	void __iomem		*regs;
+	int                     id;
 	const struct atmel_tcb_config *tcb_config;
 	int			irq[3];
 	struct clk		*clk[3];
 	struct list_head	node;
+	bool			allocated;
 };
 
-extern struct atmel_tc *atmel_tc_alloc(unsigned block, const char *name);
+extern struct atmel_tc *atmel_tc_alloc(unsigned block);
 extern void atmel_tc_free(struct atmel_tc *tc);
 
 /* platform-specific ATMEL_TC_TIMER_CLOCKx divisors (0 means 32KiHz) */
-- 
cgit v1.2.3


From 84f462371cc07272a17e2ae96c3540f795db273a Mon Sep 17 00:00:00 2001
From: Gaël PORTAY <gael.portay@gmail.com>
Date: Sat, 6 Sep 2014 19:52:36 +0200
Subject: ARM: at91/tclib: mask interruptions at shutdown and probe
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Shutdown properly the timer counter block by masking interruptions. Otherwise,
a segmentation may happen when kexec-ing a new kernel (see backtrace below).
An interruption may happen before the handler is set, leading to a kernel
segmentation fault.

Furthermore, we make sure the interruptions are masked when the driver is
initialized. This will prevent freshly kexec-ed kernel from crashing when
launched from a kernel which does not properly mask interruptions at shutdown.

The backtrace below happened after kexec-ing a new kernel, from a kernel
that did not shut down properly leaving interruptions unmasked.

Unable to handle kernel NULL pointer dereference at virtual address 00000000
pgd = c0004000
[00000000] *pgd=00000000
Internal error: Oops: 80000005 [#1] ARM
Modules linked in:
CPU: 0 PID: 1 Comm: swapper Not tainted 3.16.0+ #144
task: c1828aa0 ti: c182a000 task.ti: c182a000
PC is at 0x0
LR is at ch2_irq+0x28/0x30
pc : [<00000000>]    lr : [<c01db904>]    psr: 000000d3
sp : c182bd38  ip : c182bd48  fp : c182bd44
r10: c0373390  r9 : c1825b00  r8 : 60000053
r7 : 00000000  r6 : 00000000  r5 : 00000013  r4 : c036e800
r3 : 00000000  r2 : 00002004  r1 : c036e760  r0 : c036e760
Flags: nzcv  IRQs off  FIQs off  Mode SVC_32  ISA ARM  Segment kernel
Control: 0005317f  Table: 20004000  DAC: 00000017
Process swapper (pid: 1, stack limit = 0xc182a1c0)
Stack: (0xc182bd38 to 0xc182c000)
bd20:                                                       c182bd7c c182bd48
bd40: c0045430 c01db8ec 00000000 c18c6f40 c182bd74 c1825b00 c035cec4 00000000
bd60: c182be2c 60000053 c1825b34 00000000 c182bd94 c182bd80 c0045570 c0045408
bd80: 00000000 c1825b00 c182bdac c182bd98 c0047f34 c0045550 00000013 c036619c
bda0: c182bdc4 c182bdb0 c0044da4 c0047e98 0000007f 00000013 c182bde4 c182bdc8
bdc0: c0009e34 c0044d8c fefff000 c0046728 60000053 ffffffff c182bdf4 c182bde8
bde0: c00086a8 c0009ddc c182be74 c182bdf8 c000cb80 c0008674 00000000 00000013
be00: 00000000 00014200 c1825b00 c036e800 00000013 c035ed98 60000053 c1825b34
be20: 00000000 c182be74 c182be20 c182be40 c0047994 c0046728 60000053 ffffffff
be40: 00000013 c036e800 c182be64 c1825b00 00000013 c036e800 c035ed98 c03874bc
be60: 00000004 c036e700 c182be94 c182be78 c004689c c0046398 c036e760 c18c6080
be80: 00000000 c035ed10 c182bedc c182be98 c0348b08 c004684c 0000000c c034dac8
bea0: 004c4b3f c028c338 c036e760 00000013 c014ecc8 c18e67e0 c035b9c0 c0348884
bec0: c035b9c0 c182a020 00000000 00000000 c182bf54 c182bee0 c00089fc c0348894
bee0: c00da51c c1ffcc78 c182bf0c c182bef8 c002d100 c002d09c c1ffcc78 00000000
bf00: c182bf54 c182bf10 c002d308 c0336570 c182bf3c c0334e44 00000003 00000003
bf20: 00000030 c0334b44 c0044d74 00000003 00000003 c034dac8 c0350a94 c0373440
bf40: c0373440 00000030 c182bf94 c182bf58 c0336d24 c000890c 00000003 00000003
bf60: c0336560 c182bf64 c182bf64 6e616e0d 00000000 c0272fc8 00000000 00000000
bf80: 00000000 00000000 c182bfac c182bf98 c0272fd8 c0336bd8 c182a000 00000000
bfa0: 00000000 c182bfb0 c00095d0 c0272fd8 00000000 00000000 00000000 00000000
bfc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
bfe0: 00000000 00000000 00000000 00000000 00000013 00000000 374d27cd 33cc33e4
Backtrace:
[<c01db8dc>] (ch2_irq) from [<c0045430>] (handle_irq_event_percpu+0x38/0x148)
[<c00453f8>] (handle_irq_event_percpu) from [<c0045570>] (handle_irq_event+0x30/0x40)
 r10:00000000 r9:c1825b34 r8:60000053 r7:c182be2c r6:00000000 r5:c035cec4
 r4:c1825b00
[<c0045540>] (handle_irq_event) from [<c0047f34>] (handle_fasteoi_irq+0xac/0x11c)
 r4:c1825b00 r3:00000000
[<c0047e88>] (handle_fasteoi_irq) from [<c0044da4>] (generic_handle_irq+0x28/0x38)
 r5:c036619c r4:00000013
[<c0044d7c>] (generic_handle_irq) from [<c0009e34>] (handle_IRQ+0x68/0x88)
 r4:00000013 r3:0000007f
[<c0009dcc>] (handle_IRQ) from [<c00086a8>] (at91_aic_handle_irq+0x44/0x4c)
 r6:ffffffff r5:60000053 r4:c0046728 r3:fefff000
[<c0008664>] (at91_aic_handle_irq) from [<c000cb80>] (__irq_svc+0x40/0x4c)
Exception stack(0xc182bdf8 to 0xc182be40)
bde0:                                                       00000000 00000013
be00: 00000000 00014200 c1825b00 c036e800 00000013 c035ed98 60000053 c1825b34
be20: 00000000 c182be74 c182be20 c182be40 c0047994 c0046728 60000053 ffffffff
[<c0046388>] (__setup_irq) from [<c004689c>] (setup_irq+0x60/0x8c)
 r10:c036e700 r9:00000004 r8:c03874bc r7:c035ed98 r6:c036e800 r5:00000013
 r4:c1825b00
[<c004683c>] (setup_irq) from [<c0348b08>] (tcb_clksrc_init+0x284/0x31c)
 r6:c035ed10 r5:00000000 r4:c18c6080 r3:c036e760
[<c0348884>] (tcb_clksrc_init) from [<c00089fc>] (do_one_initcall+0x100/0x1b4)
 r10:00000000 r9:00000000 r8:c182a020 r7:c035b9c0 r6:c0348884 r5:c035b9c0
 r4:c18e67e0
[<c00088fc>] (do_one_initcall) from [<c0336d24>] (kernel_init_freeable+0x15c/0x224)
 r9:00000030 r8:c0373440 r7:c0373440 r6:c0350a94 r5:c034dac8 r4:00000003
[<c0336bc8>] (kernel_init_freeable) from [<c0272fd8>] (kernel_init+0x10/0xec)
 r9:00000000 r8:00000000 r7:00000000 r6:00000000 r5:c0272fc8 r4:00000000
[<c0272fc8>] (kernel_init) from [<c00095d0>] (ret_from_fork+0x14/0x24)
 r4:00000000 r3:c182a000
Code: bad PC value
---[ end trace 5b30f0017e282e47 ]---
Kernel panic - not syncing: Fatal exception in interrupt

Signed-off-by: Gaël PORTAY <gael.portay@gmail.com>
Acked-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
---
 drivers/misc/atmel_tclib.c | 16 ++++++++++++++++
 include/linux/atmel_tc.h   |  5 +++++
 2 files changed, 21 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/misc/atmel_tclib.c b/drivers/misc/atmel_tclib.c
index d505d1e0857b..0ca05c3ec8d6 100644
--- a/drivers/misc/atmel_tclib.c
+++ b/drivers/misc/atmel_tclib.c
@@ -109,6 +109,7 @@ static int __init tc_probe(struct platform_device *pdev)
 	struct clk	*clk;
 	int		irq;
 	struct resource	*r;
+	unsigned int	i;
 
 	irq = platform_get_irq(pdev, 0);
 	if (irq < 0)
@@ -157,18 +158,33 @@ static int __init tc_probe(struct platform_device *pdev)
 	if (tc->irq[2] < 0)
 		tc->irq[2] = irq;
 
+	for (i = 0; i < 3; i++)
+		writel(ATMEL_TC_ALL_IRQ, tc->regs + ATMEL_TC_REG(i, IDR));
+
 	spin_lock(&tc_list_lock);
 	list_add_tail(&tc->node, &tc_list);
 	spin_unlock(&tc_list_lock);
 
+	platform_set_drvdata(pdev, tc);
+
 	return 0;
 }
 
+static void tc_shutdown(struct platform_device *pdev)
+{
+	int i;
+	struct atmel_tc *tc = platform_get_drvdata(pdev);
+
+	for (i = 0; i < 3; i++)
+		writel(ATMEL_TC_ALL_IRQ, tc->regs + ATMEL_TC_REG(i, IDR));
+}
+
 static struct platform_driver tc_driver = {
 	.driver = {
 		.name	= "atmel_tcb",
 		.of_match_table	= of_match_ptr(atmel_tcb_dt_ids),
 	},
+	.shutdown = tc_shutdown,
 };
 
 static int __init tc_init(void)
diff --git a/include/linux/atmel_tc.h b/include/linux/atmel_tc.h
index d8aa88461c64..b87c1c7c242a 100644
--- a/include/linux/atmel_tc.h
+++ b/include/linux/atmel_tc.h
@@ -260,5 +260,10 @@ extern const u8 atmel_tc_divisors[5];
 #define     ATMEL_TC_LDRAS	(1 <<  5)	/* RA loading */
 #define     ATMEL_TC_LDRBS	(1 <<  6)	/* RB loading */
 #define     ATMEL_TC_ETRGS	(1 <<  7)	/* external trigger */
+#define     ATMEL_TC_ALL_IRQ	(ATMEL_TC_COVFS	| ATMEL_TC_LOVRS | \
+				 ATMEL_TC_CPAS | ATMEL_TC_CPBS | \
+				 ATMEL_TC_CPCS | ATMEL_TC_LDRAS | \
+				 ATMEL_TC_LDRBS | ATMEL_TC_ETRGS) \
+				 /* all IRQs */
 
 #endif
-- 
cgit v1.2.3


From ff9ea323816dc1c8ac7144afd4eab3ac97704430 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 8 Sep 2014 08:03:56 +0900
Subject: block, bdi: an active gendisk always has a request_queue associated
 with it

bdev_get_queue() returns the request_queue associated with the
specified block_device.  blk_get_backing_dev_info() makes use of
bdev_get_queue() to determine the associated bdi given a block_device.

All the callers of bdev_get_queue() including
blk_get_backing_dev_info() assume that bdev_get_queue() may return
NULL and implement NULL handling; however, bdev_get_queue() requires
the passed in block_device is opened and attached to its gendisk.
Because an active gendisk always has a valid request_queue associated
with it, bdev_get_queue() can never return NULL and neither can
blk_get_backing_dev_info().

Make it clear that neither of the two functions can return NULL and
remove NULL handling from all the callers.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Chris Mason <clm@fb.com>
Cc: Dave Chinner <david@fromorbit.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-core.c       | 10 +++-------
 block/compat_ioctl.c   |  4 ----
 block/ioctl.c          |  4 ----
 fs/block_dev.c         |  2 --
 fs/btrfs/disk-io.c     |  2 +-
 fs/xfs/xfs_buf.c       |  2 --
 include/linux/blkdev.h |  2 +-
 7 files changed, 5 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 93603e6ff479..817446175489 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -83,18 +83,14 @@ void blk_queue_congestion_threshold(struct request_queue *q)
  * @bdev:	device
  *
  * Locates the passed device's request queue and returns the address of its
- * backing_dev_info
- *
- * Will return NULL if the request queue cannot be located.
+ * backing_dev_info.  This function can only be called if @bdev is opened
+ * and the return value is never NULL.
  */
 struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev)
 {
-	struct backing_dev_info *ret = NULL;
 	struct request_queue *q = bdev_get_queue(bdev);
 
-	if (q)
-		ret = &q->backing_dev_info;
-	return ret;
+	return &q->backing_dev_info;
 }
 EXPORT_SYMBOL(blk_get_backing_dev_info);
 
diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c
index 18b282ce361e..f678c733df40 100644
--- a/block/compat_ioctl.c
+++ b/block/compat_ioctl.c
@@ -709,8 +709,6 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 		if (!arg)
 			return -EINVAL;
 		bdi = blk_get_backing_dev_info(bdev);
-		if (bdi == NULL)
-			return -ENOTTY;
 		return compat_put_long(arg,
 				       (bdi->ra_pages * PAGE_CACHE_SIZE) / 512);
 	case BLKROGET: /* compatible */
@@ -731,8 +729,6 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 		if (!capable(CAP_SYS_ADMIN))
 			return -EACCES;
 		bdi = blk_get_backing_dev_info(bdev);
-		if (bdi == NULL)
-			return -ENOTTY;
 		bdi->ra_pages = (arg * 512) / PAGE_CACHE_SIZE;
 		return 0;
 	case BLKGETSIZE:
diff --git a/block/ioctl.c b/block/ioctl.c
index d6cda8147c91..6c7bf903742f 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -356,8 +356,6 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
 		if (!arg)
 			return -EINVAL;
 		bdi = blk_get_backing_dev_info(bdev);
-		if (bdi == NULL)
-			return -ENOTTY;
 		return put_long(arg, (bdi->ra_pages * PAGE_CACHE_SIZE) / 512);
 	case BLKROGET:
 		return put_int(arg, bdev_read_only(bdev) != 0);
@@ -386,8 +384,6 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
 		if(!capable(CAP_SYS_ADMIN))
 			return -EACCES;
 		bdi = blk_get_backing_dev_info(bdev);
-		if (bdi == NULL)
-			return -ENOTTY;
 		bdi->ra_pages = (arg * 512) / PAGE_CACHE_SIZE;
 		return 0;
 	case BLKBSZSET:
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 6d7274619bf9..d3251eca6429 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1173,8 +1173,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 			if (!ret) {
 				bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
 				bdi = blk_get_backing_dev_info(bdev);
-				if (bdi == NULL)
-					bdi = &default_backing_dev_info;
 				bdev_inode_switch_bdi(bdev->bd_inode, bdi);
 			}
 
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index d0ed9e664f7d..39ff591ae1b4 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1694,7 +1694,7 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits)
 		if (!device->bdev)
 			continue;
 		bdi = blk_get_backing_dev_info(device->bdev);
-		if (bdi && bdi_congested(bdi, bdi_bits)) {
+		if (bdi_congested(bdi, bdi_bits)) {
 			ret = 1;
 			break;
 		}
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index cd7b8ca9b064..497fcde381d7 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1678,8 +1678,6 @@ xfs_alloc_buftarg(
 	btp->bt_dev =  bdev->bd_dev;
 	btp->bt_bdev = bdev;
 	btp->bt_bdi = blk_get_backing_dev_info(bdev);
-	if (!btp->bt_bdi)
-		goto error;
 
 	if (xfs_setsize_buftarg_early(btp, bdev))
 		goto error;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 518b46555b80..e267bf0db559 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -865,7 +865,7 @@ extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *,
 
 static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
 {
-	return bdev->bd_disk->queue;
+	return bdev->bd_disk->queue;	/* this is never NULL */
 }
 
 /*
-- 
cgit v1.2.3


From e36f1dfce0b45d347927568efe1088821758cc3c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 8 Sep 2014 08:03:57 +0900
Subject: bdi: remove unused stuff

Two flags and one bdi_writeback field are no longer used.  Remove
them.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/backing-dev.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index e488e9459a93..2103a7fa3fd8 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -28,12 +28,10 @@ struct dentry;
  * Bits in backing_dev_info.state
  */
 enum bdi_state {
-	BDI_wb_alloc,		/* Default embedded wb allocated */
 	BDI_async_congested,	/* The async (write) queue is getting full */
 	BDI_sync_congested,	/* The sync queue is getting full */
 	BDI_registered,		/* bdi_register() was done */
 	BDI_writeback_running,	/* Writeback is in progress */
-	BDI_unused,		/* Available bits start here */
 };
 
 typedef int (congested_fn)(void *, int);
@@ -50,7 +48,6 @@ enum bdi_stat_item {
 
 struct bdi_writeback {
 	struct backing_dev_info *bdi;	/* our parent bdi */
-	unsigned int nr;
 
 	unsigned long last_old_flush;	/* last old data flush */
 
-- 
cgit v1.2.3


From 018a17bdc8658ad448497c84d4ba21b6985820ec Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 8 Sep 2014 08:04:01 +0900
Subject: bdi: reimplement bdev_inode_switch_bdi()

A block_device may be attached to different gendisks and thus
different bdis over time.  bdev_inode_switch_bdi() is used to switch
the associated bdi.  The function assumes that the inode could be
dirty and transfers it between bdis if so.  This is a bit nasty in
that it reaches into bdi internals.

This patch reimplements the function so that it writes out the inode
if dirty.  This is a lot simpler and can be implemented without
exposing bdi internals.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 fs/block_dev.c              | 32 +++++++++++---------------------
 include/linux/backing-dev.h |  1 -
 mm/backing-dev.c            |  2 +-
 3 files changed, 12 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index d3251eca6429..cc8d68ac29aa 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -50,32 +50,22 @@ inline struct block_device *I_BDEV(struct inode *inode)
 EXPORT_SYMBOL(I_BDEV);
 
 /*
- * Move the inode from its current bdi to a new bdi. If the inode is dirty we
- * need to move it onto the dirty list of @dst so that the inode is always on
- * the right list.
+ * Move the inode from its current bdi to a new bdi.  Make sure the inode
+ * is clean before moving so that it doesn't linger on the old bdi.
  */
 static void bdev_inode_switch_bdi(struct inode *inode,
 			struct backing_dev_info *dst)
 {
-	struct backing_dev_info *old = inode->i_data.backing_dev_info;
-	bool wakeup_bdi = false;
-
-	if (unlikely(dst == old))		/* deadlock avoidance */
-		return;
-	bdi_lock_two(&old->wb, &dst->wb);
-	spin_lock(&inode->i_lock);
-	inode->i_data.backing_dev_info = dst;
-	if (inode->i_state & I_DIRTY) {
-		if (bdi_cap_writeback_dirty(dst) && !wb_has_dirty_io(&dst->wb))
-			wakeup_bdi = true;
-		list_move(&inode->i_wb_list, &dst->wb.b_dirty);
+	while (true) {
+		spin_lock(&inode->i_lock);
+		if (!(inode->i_state & I_DIRTY)) {
+			inode->i_data.backing_dev_info = dst;
+			spin_unlock(&inode->i_lock);
+			return;
+		}
+		spin_unlock(&inode->i_lock);
+		WARN_ON_ONCE(write_inode_now(inode, true));
 	}
-	spin_unlock(&inode->i_lock);
-	spin_unlock(&old->wb.list_lock);
-	spin_unlock(&dst->wb.list_lock);
-
-	if (wakeup_bdi)
-		bdi_wakeup_thread_delayed(dst);
 }
 
 /* Kill _all_ buffers and pagecache , dirty or not.. */
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 2103a7fa3fd8..5da6012b7a14 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -121,7 +121,6 @@ void bdi_start_background_writeback(struct backing_dev_info *bdi);
 void bdi_writeback_workfn(struct work_struct *work);
 int bdi_has_dirty_io(struct backing_dev_info *bdi);
 void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi);
-void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2);
 
 extern spinlock_t bdi_lock;
 extern struct list_head bdi_list;
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index b65fe93ad612..7d63d5e9d3de 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -40,7 +40,7 @@ LIST_HEAD(bdi_list);
 /* bdi_wq serves all asynchronous writeback tasks */
 struct workqueue_struct *bdi_wq;
 
-void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2)
+static void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2)
 {
 	if (wb1 < wb2) {
 		spin_lock(&wb1->list_lock);
-- 
cgit v1.2.3


From e676253b19b2d269cccf67fdb1592120a0cd0676 Mon Sep 17 00:00:00 2001
From: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
Date: Tue, 5 Aug 2014 11:45:59 +0200
Subject: serial/8250: Add support for RS485 IOCTLs

This patch allow the users of the 8250 infrastructure to define a
handler for RS485 configration.

If no handler is defined the 8250 driver will work as usual.

Signed-off-by: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
Acked-by: Alan Cox <alan@linux.intel.com>
--
v2:Change suggested by Alan "One Thousand Gnomes":
- Move rs485 structure further down on the uart_8250_port structure

 drivers/tty/serial/8250/8250_core.c | 39 +++++++++++++++++++++++++++++++++++++
 include/linux/serial_8250.h         |  3 +++
 2 files changed, 42 insertions(+)
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_core.c | 39 +++++++++++++++++++++++++++++++++++++
 include/linux/serial_8250.h         |  3 +++
 2 files changed, 42 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c
index 1d42dba6121d..b28ed1beb50f 100644
--- a/drivers/tty/serial/8250/8250_core.c
+++ b/drivers/tty/serial/8250/8250_core.c
@@ -2843,6 +2843,42 @@ serial8250_verify_port(struct uart_port *port, struct serial_struct *ser)
 	return 0;
 }
 
+static int serial8250_ioctl(struct uart_port *port, unsigned int cmd,
+			   unsigned long arg)
+{
+	struct uart_8250_port *up =
+		container_of(port, struct uart_8250_port, port);
+	int ret;
+	struct serial_rs485 rs485_config;
+
+	if (!up->rs485_config)
+		return -ENOIOCTLCMD;
+
+	switch (cmd) {
+	case TIOCSRS485:
+		if (copy_from_user(&rs485_config, (void __user *)arg,
+				   sizeof(rs485_config)))
+			return -EFAULT;
+
+		ret = up->rs485_config(up, &rs485_config);
+		if (ret)
+			return ret;
+
+		memcpy(&up->rs485, &rs485_config, sizeof(rs485_config));
+
+		return 0;
+	case TIOCGRS485:
+		if (copy_to_user((void __user *)arg, &up->rs485,
+				 sizeof(up->rs485)))
+			return -EFAULT;
+		return 0;
+	default:
+		break;
+	}
+
+	return -ENOIOCTLCMD;
+}
+
 static const char *
 serial8250_type(struct uart_port *port)
 {
@@ -2872,6 +2908,7 @@ static struct uart_ops serial8250_pops = {
 	.request_port	= serial8250_request_port,
 	.config_port	= serial8250_config_port,
 	.verify_port	= serial8250_verify_port,
+	.ioctl		= serial8250_ioctl,
 #ifdef CONFIG_CONSOLE_POLL
 	.poll_get_char = serial8250_get_poll_char,
 	.poll_put_char = serial8250_put_poll_char,
@@ -3388,6 +3425,8 @@ int serial8250_register_8250_port(struct uart_8250_port *up)
 		uart->port.fifosize	= up->port.fifosize;
 		uart->tx_loadsz		= up->tx_loadsz;
 		uart->capabilities	= up->capabilities;
+		uart->rs485_config	= up->rs485_config;
+		uart->rs485		= up->rs485;
 
 		/* Take tx_loadsz from fifosize if it wasn't set separately */
 		if (uart->port.fifosize && !uart->tx_loadsz)
diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h
index f93649e22c43..6dd671765312 100644
--- a/include/linux/serial_8250.h
+++ b/include/linux/serial_8250.h
@@ -96,10 +96,13 @@ struct uart_8250_port {
 	unsigned char		msr_saved_flags;
 
 	struct uart_8250_dma	*dma;
+	struct serial_rs485     rs485;
 
 	/* 8250 specific callbacks */
 	int			(*dl_read)(struct uart_8250_port *);
 	void			(*dl_write)(struct uart_8250_port *, int);
+	int			(*rs485_config)(struct uart_8250_port *,
+						struct serial_rs485 *rs485);
 };
 
 static inline struct uart_8250_port *up_to_u8250p(struct uart_port *up)
-- 
cgit v1.2.3


From 55e15c949fd05d247a889df0ed0177a676fec665 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Wed, 3 Sep 2014 12:52:17 +0200
Subject: PM / domains: Remove the pm_genpd_add|remove_callbacks APIs

There are no users of these APIs. To simplify the generic power domain
let's remove them.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Kevin Hilman <khilman@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 106 --------------------------------------------
 include/linux/pm_domain.h   |  19 --------
 2 files changed, 125 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index eee55c1e5fde..e613e3cb96d8 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -1743,112 +1743,6 @@ int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd,
 	return ret;
 }
 
-/**
- * pm_genpd_add_callbacks - Add PM domain callbacks to a given device.
- * @dev: Device to add the callbacks to.
- * @ops: Set of callbacks to add.
- * @td: Timing data to add to the device along with the callbacks (optional).
- *
- * Every call to this routine should be balanced with a call to
- * __pm_genpd_remove_callbacks() and they must not be nested.
- */
-int pm_genpd_add_callbacks(struct device *dev, struct gpd_dev_ops *ops,
-			   struct gpd_timing_data *td)
-{
-	struct generic_pm_domain_data *gpd_data_new, *gpd_data = NULL;
-	int ret = 0;
-
-	if (!(dev && ops))
-		return -EINVAL;
-
-	gpd_data_new = __pm_genpd_alloc_dev_data(dev);
-	if (!gpd_data_new)
-		return -ENOMEM;
-
-	pm_runtime_disable(dev);
-	device_pm_lock();
-
-	ret = dev_pm_get_subsys_data(dev);
-	if (ret)
-		goto out;
-
-	spin_lock_irq(&dev->power.lock);
-
-	if (dev->power.subsys_data->domain_data) {
-		gpd_data = to_gpd_data(dev->power.subsys_data->domain_data);
-	} else {
-		gpd_data = gpd_data_new;
-		dev->power.subsys_data->domain_data = &gpd_data->base;
-	}
-	gpd_data->refcount++;
-	gpd_data->ops = *ops;
-	if (td)
-		gpd_data->td = *td;
-
-	spin_unlock_irq(&dev->power.lock);
-
- out:
-	device_pm_unlock();
-	pm_runtime_enable(dev);
-
-	if (gpd_data != gpd_data_new)
-		__pm_genpd_free_dev_data(dev, gpd_data_new);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(pm_genpd_add_callbacks);
-
-/**
- * __pm_genpd_remove_callbacks - Remove PM domain callbacks from a given device.
- * @dev: Device to remove the callbacks from.
- * @clear_td: If set, clear the device's timing data too.
- *
- * This routine can only be called after pm_genpd_add_callbacks().
- */
-int __pm_genpd_remove_callbacks(struct device *dev, bool clear_td)
-{
-	struct generic_pm_domain_data *gpd_data = NULL;
-	bool remove = false;
-	int ret = 0;
-
-	if (!(dev && dev->power.subsys_data))
-		return -EINVAL;
-
-	pm_runtime_disable(dev);
-	device_pm_lock();
-
-	spin_lock_irq(&dev->power.lock);
-
-	if (dev->power.subsys_data->domain_data) {
-		gpd_data = to_gpd_data(dev->power.subsys_data->domain_data);
-		gpd_data->ops = (struct gpd_dev_ops){ NULL };
-		if (clear_td)
-			gpd_data->td = (struct gpd_timing_data){ 0 };
-
-		if (--gpd_data->refcount == 0) {
-			dev->power.subsys_data->domain_data = NULL;
-			remove = true;
-		}
-	} else {
-		ret = -EINVAL;
-	}
-
-	spin_unlock_irq(&dev->power.lock);
-
-	device_pm_unlock();
-	pm_runtime_enable(dev);
-
-	if (ret)
-		return ret;
-
-	dev_pm_put_subsys_data(dev);
-	if (remove)
-		__pm_genpd_free_dev_data(dev, gpd_data);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(__pm_genpd_remove_callbacks);
-
 /**
  * pm_genpd_attach_cpuidle - Connect the given PM domain with cpuidle.
  * @genpd: PM domain to be connected with cpuidle.
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index ebc4c76ffb73..9ae2b9e88d61 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -151,10 +151,6 @@ extern int pm_genpd_add_subdomain_names(const char *master_name,
 					const char *subdomain_name);
 extern int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd,
 				     struct generic_pm_domain *target);
-extern int pm_genpd_add_callbacks(struct device *dev,
-				  struct gpd_dev_ops *ops,
-				  struct gpd_timing_data *td);
-extern int __pm_genpd_remove_callbacks(struct device *dev, bool clear_td);
 extern int pm_genpd_attach_cpuidle(struct generic_pm_domain *genpd, int state);
 extern int pm_genpd_name_attach_cpuidle(const char *name, int state);
 extern int pm_genpd_detach_cpuidle(struct generic_pm_domain *genpd);
@@ -217,16 +213,6 @@ static inline int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd,
 {
 	return -ENOSYS;
 }
-static inline int pm_genpd_add_callbacks(struct device *dev,
-					 struct gpd_dev_ops *ops,
-					 struct gpd_timing_data *td)
-{
-	return -ENOSYS;
-}
-static inline int __pm_genpd_remove_callbacks(struct device *dev, bool clear_td)
-{
-	return -ENOSYS;
-}
 static inline int pm_genpd_attach_cpuidle(struct generic_pm_domain *genpd, int st)
 {
 	return -ENOSYS;
@@ -281,11 +267,6 @@ static inline int pm_genpd_name_add_device(const char *domain_name,
 	return __pm_genpd_name_add_device(domain_name, dev, NULL);
 }
 
-static inline int pm_genpd_remove_callbacks(struct device *dev)
-{
-	return __pm_genpd_remove_callbacks(dev, true);
-}
-
 #ifdef CONFIG_PM_GENERIC_DOMAINS_RUNTIME
 extern void genpd_queue_power_off_work(struct generic_pm_domain *genpd);
 extern void pm_genpd_poweroff_unused(void);
-- 
cgit v1.2.3


From 67da6d4bf43c4208433ef8f3ee487401b4dc9c74 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Wed, 3 Sep 2014 12:52:18 +0200
Subject: PM / domains: Ignore callbacks for subsys generic_pm_domain_data

In a step of simplifying the generic power domain let's move away from
using these callbacks.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Kevin Hilman <khilman@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 131 +++-----------------------------------------
 include/linux/pm_domain.h   |   1 -
 2 files changed, 8 insertions(+), 124 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index e613e3cb96d8..aa5b14c1e643 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -25,10 +25,6 @@
 	__routine = genpd->dev_ops.callback; 			\
 	if (__routine) {					\
 		__ret = __routine(dev); 			\
-	} else {						\
-		__routine = dev_gpd_data(dev)->ops.callback;	\
-		if (__routine) 					\
-			__ret = __routine(dev);			\
 	}							\
 	__ret;							\
 })
@@ -1871,10 +1867,6 @@ static int pm_genpd_default_save_state(struct device *dev)
 {
 	int (*cb)(struct device *__dev);
 
-	cb = dev_gpd_data(dev)->ops.save_state;
-	if (cb)
-		return cb(dev);
-
 	if (dev->type && dev->type->pm)
 		cb = dev->type->pm->runtime_suspend;
 	else if (dev->class && dev->class->pm)
@@ -1898,10 +1890,6 @@ static int pm_genpd_default_restore_state(struct device *dev)
 {
 	int (*cb)(struct device *__dev);
 
-	cb = dev_gpd_data(dev)->ops.restore_state;
-	if (cb)
-		return cb(dev);
-
 	if (dev->type && dev->type->pm)
 		cb = dev->type->pm->runtime_resume;
 	else if (dev->class && dev->class->pm)
@@ -1917,109 +1905,6 @@ static int pm_genpd_default_restore_state(struct device *dev)
 	return cb ? cb(dev) : 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-
-/**
- * pm_genpd_default_suspend - Default "device suspend" for PM domians.
- * @dev: Device to handle.
- */
-static int pm_genpd_default_suspend(struct device *dev)
-{
-	int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.suspend;
-
-	return cb ? cb(dev) : pm_generic_suspend(dev);
-}
-
-/**
- * pm_genpd_default_suspend_late - Default "late device suspend" for PM domians.
- * @dev: Device to handle.
- */
-static int pm_genpd_default_suspend_late(struct device *dev)
-{
-	int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.suspend_late;
-
-	return cb ? cb(dev) : pm_generic_suspend_late(dev);
-}
-
-/**
- * pm_genpd_default_resume_early - Default "early device resume" for PM domians.
- * @dev: Device to handle.
- */
-static int pm_genpd_default_resume_early(struct device *dev)
-{
-	int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.resume_early;
-
-	return cb ? cb(dev) : pm_generic_resume_early(dev);
-}
-
-/**
- * pm_genpd_default_resume - Default "device resume" for PM domians.
- * @dev: Device to handle.
- */
-static int pm_genpd_default_resume(struct device *dev)
-{
-	int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.resume;
-
-	return cb ? cb(dev) : pm_generic_resume(dev);
-}
-
-/**
- * pm_genpd_default_freeze - Default "device freeze" for PM domians.
- * @dev: Device to handle.
- */
-static int pm_genpd_default_freeze(struct device *dev)
-{
-	int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.freeze;
-
-	return cb ? cb(dev) : pm_generic_freeze(dev);
-}
-
-/**
- * pm_genpd_default_freeze_late - Default "late device freeze" for PM domians.
- * @dev: Device to handle.
- */
-static int pm_genpd_default_freeze_late(struct device *dev)
-{
-	int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.freeze_late;
-
-	return cb ? cb(dev) : pm_generic_freeze_late(dev);
-}
-
-/**
- * pm_genpd_default_thaw_early - Default "early device thaw" for PM domians.
- * @dev: Device to handle.
- */
-static int pm_genpd_default_thaw_early(struct device *dev)
-{
-	int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.thaw_early;
-
-	return cb ? cb(dev) : pm_generic_thaw_early(dev);
-}
-
-/**
- * pm_genpd_default_thaw - Default "device thaw" for PM domians.
- * @dev: Device to handle.
- */
-static int pm_genpd_default_thaw(struct device *dev)
-{
-	int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.thaw;
-
-	return cb ? cb(dev) : pm_generic_thaw(dev);
-}
-
-#else /* !CONFIG_PM_SLEEP */
-
-#define pm_genpd_default_suspend	NULL
-#define pm_genpd_default_suspend_late	NULL
-#define pm_genpd_default_resume_early	NULL
-#define pm_genpd_default_resume		NULL
-#define pm_genpd_default_freeze		NULL
-#define pm_genpd_default_freeze_late	NULL
-#define pm_genpd_default_thaw_early	NULL
-#define pm_genpd_default_thaw		NULL
-
-#endif /* !CONFIG_PM_SLEEP */
-
 /**
  * pm_genpd_init - Initialize a generic I/O PM domain object.
  * @genpd: PM domain object to initialize.
@@ -2071,14 +1956,14 @@ void pm_genpd_init(struct generic_pm_domain *genpd,
 	genpd->domain.ops.complete = pm_genpd_complete;
 	genpd->dev_ops.save_state = pm_genpd_default_save_state;
 	genpd->dev_ops.restore_state = pm_genpd_default_restore_state;
-	genpd->dev_ops.suspend = pm_genpd_default_suspend;
-	genpd->dev_ops.suspend_late = pm_genpd_default_suspend_late;
-	genpd->dev_ops.resume_early = pm_genpd_default_resume_early;
-	genpd->dev_ops.resume = pm_genpd_default_resume;
-	genpd->dev_ops.freeze = pm_genpd_default_freeze;
-	genpd->dev_ops.freeze_late = pm_genpd_default_freeze_late;
-	genpd->dev_ops.thaw_early = pm_genpd_default_thaw_early;
-	genpd->dev_ops.thaw = pm_genpd_default_thaw;
+	genpd->dev_ops.suspend = pm_generic_suspend;
+	genpd->dev_ops.suspend_late = pm_generic_suspend_late;
+	genpd->dev_ops.resume_early = pm_generic_resume_early;
+	genpd->dev_ops.resume = pm_generic_resume;
+	genpd->dev_ops.freeze = pm_generic_freeze;
+	genpd->dev_ops.freeze_late = pm_generic_freeze_late;
+	genpd->dev_ops.thaw_early = pm_generic_thaw_early;
+	genpd->dev_ops.thaw = pm_generic_thaw;
 	mutex_lock(&gpd_list_lock);
 	list_add(&genpd->gpd_list_node, &gpd_list);
 	mutex_unlock(&gpd_list_lock);
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 9ae2b9e88d61..436ea149d40c 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -108,7 +108,6 @@ struct gpd_timing_data {
 
 struct generic_pm_domain_data {
 	struct pm_domain_data base;
-	struct gpd_dev_ops ops;
 	struct gpd_timing_data td;
 	struct notifier_block nb;
 	struct mutex lock;
-- 
cgit v1.2.3


From 1e0407ca54d28db8e5f02e437ff21cc6416c0be8 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Wed, 3 Sep 2014 12:52:19 +0200
Subject: PM / domains: Remove system PM callbacks from gpd_dev_ops

There no users of these callbacks, let's simplify the generic power
domain by removing them.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Kevin Hilman <khilman@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 64 ++++++---------------------------------------
 include/linux/pm_domain.h   |  8 ------
 2 files changed, 8 insertions(+), 64 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index aa5b14c1e643..e777609ccc77 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -770,46 +770,6 @@ static bool genpd_dev_active_wakeup(struct generic_pm_domain *genpd,
 	return GENPD_DEV_CALLBACK(genpd, bool, active_wakeup, dev);
 }
 
-static int genpd_suspend_dev(struct generic_pm_domain *genpd, struct device *dev)
-{
-	return GENPD_DEV_CALLBACK(genpd, int, suspend, dev);
-}
-
-static int genpd_suspend_late(struct generic_pm_domain *genpd, struct device *dev)
-{
-	return GENPD_DEV_CALLBACK(genpd, int, suspend_late, dev);
-}
-
-static int genpd_resume_early(struct generic_pm_domain *genpd, struct device *dev)
-{
-	return GENPD_DEV_CALLBACK(genpd, int, resume_early, dev);
-}
-
-static int genpd_resume_dev(struct generic_pm_domain *genpd, struct device *dev)
-{
-	return GENPD_DEV_CALLBACK(genpd, int, resume, dev);
-}
-
-static int genpd_freeze_dev(struct generic_pm_domain *genpd, struct device *dev)
-{
-	return GENPD_DEV_CALLBACK(genpd, int, freeze, dev);
-}
-
-static int genpd_freeze_late(struct generic_pm_domain *genpd, struct device *dev)
-{
-	return GENPD_DEV_CALLBACK(genpd, int, freeze_late, dev);
-}
-
-static int genpd_thaw_early(struct generic_pm_domain *genpd, struct device *dev)
-{
-	return GENPD_DEV_CALLBACK(genpd, int, thaw_early, dev);
-}
-
-static int genpd_thaw_dev(struct generic_pm_domain *genpd, struct device *dev)
-{
-	return GENPD_DEV_CALLBACK(genpd, int, thaw, dev);
-}
-
 /**
  * pm_genpd_sync_poweroff - Synchronously power off a PM domain and its masters.
  * @genpd: PM domain to power off, if possible.
@@ -991,7 +951,7 @@ static int pm_genpd_suspend(struct device *dev)
 	if (IS_ERR(genpd))
 		return -EINVAL;
 
-	return genpd->suspend_power_off ? 0 : genpd_suspend_dev(genpd, dev);
+	return genpd->suspend_power_off ? 0 : pm_generic_suspend(dev);
 }
 
 /**
@@ -1012,7 +972,7 @@ static int pm_genpd_suspend_late(struct device *dev)
 	if (IS_ERR(genpd))
 		return -EINVAL;
 
-	return genpd->suspend_power_off ? 0 : genpd_suspend_late(genpd, dev);
+	return genpd->suspend_power_off ? 0 : pm_generic_suspend_late(dev);
 }
 
 /**
@@ -1099,7 +1059,7 @@ static int pm_genpd_resume_early(struct device *dev)
 	if (IS_ERR(genpd))
 		return -EINVAL;
 
-	return genpd->suspend_power_off ? 0 : genpd_resume_early(genpd, dev);
+	return genpd->suspend_power_off ? 0 : pm_generic_resume_early(dev);
 }
 
 /**
@@ -1120,7 +1080,7 @@ static int pm_genpd_resume(struct device *dev)
 	if (IS_ERR(genpd))
 		return -EINVAL;
 
-	return genpd->suspend_power_off ? 0 : genpd_resume_dev(genpd, dev);
+	return genpd->suspend_power_off ? 0 : pm_generic_resume(dev);
 }
 
 /**
@@ -1141,7 +1101,7 @@ static int pm_genpd_freeze(struct device *dev)
 	if (IS_ERR(genpd))
 		return -EINVAL;
 
-	return genpd->suspend_power_off ? 0 : genpd_freeze_dev(genpd, dev);
+	return genpd->suspend_power_off ? 0 : pm_generic_freeze(dev);
 }
 
 /**
@@ -1163,7 +1123,7 @@ static int pm_genpd_freeze_late(struct device *dev)
 	if (IS_ERR(genpd))
 		return -EINVAL;
 
-	return genpd->suspend_power_off ? 0 : genpd_freeze_late(genpd, dev);
+	return genpd->suspend_power_off ? 0 : pm_generic_freeze_late(dev);
 }
 
 /**
@@ -1227,7 +1187,7 @@ static int pm_genpd_thaw_early(struct device *dev)
 	if (IS_ERR(genpd))
 		return -EINVAL;
 
-	return genpd->suspend_power_off ? 0 : genpd_thaw_early(genpd, dev);
+	return genpd->suspend_power_off ? 0 : pm_generic_thaw_early(dev);
 }
 
 /**
@@ -1248,7 +1208,7 @@ static int pm_genpd_thaw(struct device *dev)
 	if (IS_ERR(genpd))
 		return -EINVAL;
 
-	return genpd->suspend_power_off ? 0 : genpd_thaw_dev(genpd, dev);
+	return genpd->suspend_power_off ? 0 : pm_generic_thaw(dev);
 }
 
 /**
@@ -1956,14 +1916,6 @@ void pm_genpd_init(struct generic_pm_domain *genpd,
 	genpd->domain.ops.complete = pm_genpd_complete;
 	genpd->dev_ops.save_state = pm_genpd_default_save_state;
 	genpd->dev_ops.restore_state = pm_genpd_default_restore_state;
-	genpd->dev_ops.suspend = pm_generic_suspend;
-	genpd->dev_ops.suspend_late = pm_generic_suspend_late;
-	genpd->dev_ops.resume_early = pm_generic_resume_early;
-	genpd->dev_ops.resume = pm_generic_resume;
-	genpd->dev_ops.freeze = pm_generic_freeze;
-	genpd->dev_ops.freeze_late = pm_generic_freeze_late;
-	genpd->dev_ops.thaw_early = pm_generic_thaw_early;
-	genpd->dev_ops.thaw = pm_generic_thaw;
 	mutex_lock(&gpd_list_lock);
 	list_add(&genpd->gpd_list_node, &gpd_list);
 	mutex_unlock(&gpd_list_lock);
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 436ea149d40c..b6343d4b9b04 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -35,14 +35,6 @@ struct gpd_dev_ops {
 	int (*stop)(struct device *dev);
 	int (*save_state)(struct device *dev);
 	int (*restore_state)(struct device *dev);
-	int (*suspend)(struct device *dev);
-	int (*suspend_late)(struct device *dev);
-	int (*resume_early)(struct device *dev);
-	int (*resume)(struct device *dev);
-	int (*freeze)(struct device *dev);
-	int (*freeze_late)(struct device *dev);
-	int (*thaw_early)(struct device *dev);
-	int (*thaw)(struct device *dev);
 	bool (*active_wakeup)(struct device *dev);
 };
 
-- 
cgit v1.2.3


From c5d79ec2a5715489cff16a0d1cf4fa9108a5509e Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Wed, 3 Sep 2014 12:52:22 +0200
Subject: PM / domains: Remove dev_irq_safe from genpd config

The genpd dev_irq_safe configuration somewhat overlaps with the runtime
PM pm_runtime_irq_safe() option. Also, currently genpd don't have a
good way to deal with these device. So, until we figured out if and how
to support this in genpd, let's remove the option to configure it.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Kevin Hilman <khilman@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 4 ----
 include/linux/pm_domain.h   | 1 -
 2 files changed, 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index e777609ccc77..4298a2bcd228 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -615,8 +615,6 @@ static int pm_genpd_runtime_suspend(struct device *dev)
 	if (IS_ERR(genpd))
 		return -EINVAL;
 
-	might_sleep_if(!genpd->dev_irq_safe);
-
 	stop_ok = genpd->gov ? genpd->gov->stop_ok : NULL;
 	if (stop_ok && !stop_ok(dev))
 		return -EBUSY;
@@ -661,8 +659,6 @@ static int pm_genpd_runtime_resume(struct device *dev)
 	if (IS_ERR(genpd))
 		return -EINVAL;
 
-	might_sleep_if(!genpd->dev_irq_safe);
-
 	/* If power.irq_safe, the PM domain is never powered off. */
 	if (dev->power.irq_safe)
 		return genpd_start_dev_no_timing(genpd, dev);
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index b6343d4b9b04..360c94ceeebb 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -63,7 +63,6 @@ struct generic_pm_domain {
 	unsigned int suspended_count;	/* System suspend device counter */
 	unsigned int prepared_count;	/* Suspend counter of prepared devices */
 	bool suspend_power_off;	/* Power status before system suspend */
-	bool dev_irq_safe;	/* Device callbacks are IRQ-safe */
 	int (*power_off)(struct generic_pm_domain *domain);
 	s64 power_off_latency_ns;
 	int (*power_on)(struct generic_pm_domain *domain);
-- 
cgit v1.2.3


From d47e6464ae6c96735d4706f5cb0537fe717b6b00 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Wed, 3 Sep 2014 12:52:24 +0200
Subject: PM / domains: Remove pm_genpd_syscore_switch() API

The pm_genpd_syscore_poweroff() API and pm_genpd_syscore_poweron() API
makes the pm_genpd_syscore_switch() API redundant.

Moreover, since there are no active users, let's just remove it.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Kevin Hilman <khilman@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 17 ++++++++++++++---
 include/linux/pm_domain.h   | 16 ++++------------
 2 files changed, 18 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index b910d0f6ff60..601e35b2fa71 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -1292,13 +1292,13 @@ static void pm_genpd_complete(struct device *dev)
 }
 
 /**
- * pm_genpd_syscore_switch - Switch power during system core suspend or resume.
+ * genpd_syscore_switch - Switch power during system core suspend or resume.
  * @dev: Device that normally is marked as "always on" to switch power for.
  *
  * This routine may only be called during the system core (syscore) suspend or
  * resume phase for devices whose "always on" flags are set.
  */
-void pm_genpd_syscore_switch(struct device *dev, bool suspend)
+static void genpd_syscore_switch(struct device *dev, bool suspend)
 {
 	struct generic_pm_domain *genpd;
 
@@ -1314,7 +1314,18 @@ void pm_genpd_syscore_switch(struct device *dev, bool suspend)
 		genpd->suspended_count--;
 	}
 }
-EXPORT_SYMBOL_GPL(pm_genpd_syscore_switch);
+
+void pm_genpd_syscore_poweroff(struct device *dev)
+{
+	genpd_syscore_switch(dev, true);
+}
+EXPORT_SYMBOL_GPL(pm_genpd_syscore_poweroff);
+
+void pm_genpd_syscore_poweron(struct device *dev)
+{
+	genpd_syscore_switch(dev, false);
+}
+EXPORT_SYMBOL_GPL(pm_genpd_syscore_poweron);
 
 #else
 
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 360c94ceeebb..3997af691600 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -266,19 +266,11 @@ static inline void pm_genpd_poweroff_unused(void) {}
 #endif
 
 #ifdef CONFIG_PM_GENERIC_DOMAINS_SLEEP
-extern void pm_genpd_syscore_switch(struct device *dev, bool suspend);
+extern void pm_genpd_syscore_poweroff(struct device *dev);
+extern void pm_genpd_syscore_poweron(struct device *dev);
 #else
-static inline void pm_genpd_syscore_switch(struct device *dev, bool suspend) {}
+static inline void pm_genpd_syscore_poweroff(struct device *dev) {}
+static inline void pm_genpd_syscore_poweron(struct device *dev) {}
 #endif
 
-static inline void pm_genpd_syscore_poweroff(struct device *dev)
-{
-	pm_genpd_syscore_switch(dev, true);
-}
-
-static inline void pm_genpd_syscore_poweron(struct device *dev)
-{
-	pm_genpd_syscore_switch(dev, false);
-}
-
 #endif /* _LINUX_PM_DOMAIN_H */
-- 
cgit v1.2.3


From d971f0b0eaaf3f2086bf21bbd64f7ea7e2f28459 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Wed, 3 Sep 2014 12:52:25 +0200
Subject: PM / domains: Remove genpd_queue_power_off_work() API

There are no active users of this API. Let's remove it and if future
needs shows up we could consider to have a get/put API instead.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Kevin Hilman <khilman@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 5 ++++-
 include/linux/pm_domain.h   | 2 --
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 601e35b2fa71..d71d0458d41f 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -422,7 +422,7 @@ static bool genpd_abort_poweroff(struct generic_pm_domain *genpd)
  * Queue up the execution of pm_genpd_poweroff() unless it's already been done
  * before.
  */
-void genpd_queue_power_off_work(struct generic_pm_domain *genpd)
+static void genpd_queue_power_off_work(struct generic_pm_domain *genpd)
 {
 	queue_work(pm_wq, &genpd->power_off_work);
 }
@@ -729,6 +729,9 @@ static inline int genpd_dev_pm_qos_notifier(struct notifier_block *nb,
 	return NOTIFY_DONE;
 }
 
+static inline void
+genpd_queue_power_off_work(struct generic_pm_domain *genpd) {}
+
 static inline void genpd_power_off_work_fn(struct work_struct *work) {}
 
 #define pm_genpd_runtime_suspend	NULL
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 3997af691600..c19c90b839b8 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -258,10 +258,8 @@ static inline int pm_genpd_name_add_device(const char *domain_name,
 }
 
 #ifdef CONFIG_PM_GENERIC_DOMAINS_RUNTIME
-extern void genpd_queue_power_off_work(struct generic_pm_domain *genpd);
 extern void pm_genpd_poweroff_unused(void);
 #else
-static inline void genpd_queue_power_off_work(struct generic_pm_domain *gpd) {}
 static inline void pm_genpd_poweroff_unused(void) {}
 #endif
 
-- 
cgit v1.2.3


From 0f574d4c3a7a325cbbef28ee738dedca9851e957 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Wed, 3 Sep 2014 12:52:30 +0200
Subject: PM / domains: Remove default_stop_ok() API

There are currently no need to export default_stop_ok() as an API,
instead let's keep it local to the PM domain governor.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Kevin Hilman <khilman@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain_governor.c | 7 ++-----
 include/linux/pm_domain.h            | 6 ------
 2 files changed, 2 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/domain_governor.c b/drivers/base/power/domain_governor.c
index a089e3bcdfbc..d88a62e104d4 100644
--- a/drivers/base/power/domain_governor.c
+++ b/drivers/base/power/domain_governor.c
@@ -42,7 +42,7 @@ static int dev_update_qos_constraint(struct device *dev, void *data)
  * default_stop_ok - Default PM domain governor routine for stopping devices.
  * @dev: Device to check.
  */
-bool default_stop_ok(struct device *dev)
+static bool default_stop_ok(struct device *dev)
 {
 	struct gpd_timing_data *td = &dev_gpd_data(dev)->td;
 	unsigned long flags;
@@ -229,10 +229,7 @@ static bool always_on_power_down_ok(struct dev_pm_domain *domain)
 
 #else /* !CONFIG_PM_RUNTIME */
 
-bool default_stop_ok(struct device *dev)
-{
-	return false;
-}
+static inline bool default_stop_ok(struct device *dev) { return false; }
 
 #define default_power_down_ok	NULL
 #define always_on_power_down_ok	NULL
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index c19c90b839b8..3b59f296e6ec 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -151,8 +151,6 @@ extern void pm_genpd_init(struct generic_pm_domain *genpd,
 extern int pm_genpd_poweron(struct generic_pm_domain *genpd);
 extern int pm_genpd_name_poweron(const char *domain_name);
 
-extern bool default_stop_ok(struct device *dev);
-
 extern struct dev_power_governor pm_domain_always_on_gov;
 #else
 
@@ -231,10 +229,6 @@ static inline int pm_genpd_name_poweron(const char *domain_name)
 {
 	return -ENOSYS;
 }
-static inline bool default_stop_ok(struct device *dev)
-{
-	return false;
-}
 #define simple_qos_governor NULL
 #define pm_domain_always_on_gov NULL
 #endif
-- 
cgit v1.2.3


From ae3c511c2d72161b11e93866203b59a3a37dfac7 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Wed, 3 Sep 2014 12:52:31 +0200
Subject: PM / domains: Keep declaration of dev_power_governors together

This is a pure code cleanup in the header file for the PM domain. No
functional change.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Kevin Hilman <khilman@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/pm_domain.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 3b59f296e6ec..aa03586c94a2 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -117,8 +117,6 @@ static inline struct generic_pm_domain_data *dev_gpd_data(struct device *dev)
 	return to_gpd_data(dev->power.subsys_data->domain_data);
 }
 
-extern struct dev_power_governor simple_qos_governor;
-
 extern struct generic_pm_domain *dev_to_genpd(struct device *dev);
 extern int __pm_genpd_add_device(struct generic_pm_domain *genpd,
 				 struct device *dev,
@@ -151,6 +149,7 @@ extern void pm_genpd_init(struct generic_pm_domain *genpd,
 extern int pm_genpd_poweron(struct generic_pm_domain *genpd);
 extern int pm_genpd_name_poweron(const char *domain_name);
 
+extern struct dev_power_governor simple_qos_governor;
 extern struct dev_power_governor pm_domain_always_on_gov;
 #else
 
-- 
cgit v1.2.3


From 8a949b07e4062cbd07e04e6a47249e69ca65b944 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Tue, 2 Sep 2014 17:39:19 -0400
Subject: serial: core: Document lock requirement for UPF_* flags updates

The flags field of struct uart_port can only be safely modified
if the port mutex is held; no other lock prevents concurrent
changes from corrupting the field.

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/serial_core.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index cf3a1e789bf5..8cb267b1fcd5 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -152,6 +152,7 @@ struct uart_port {
 	unsigned long		sysrq;			/* sysrq timeout */
 #endif
 
+	/* flags must be updated while holding port mutex */
 	upf_t			flags;
 
 #define UPF_FOURPORT		((__force upf_t) (1 << 1))
-- 
cgit v1.2.3


From b99b121b2aa42e60e5b73fdd3a49863337839c7b Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Fri, 5 Sep 2014 21:02:37 +0200
Subject: tty: serial: 8250_core: allow to overwrite & export
 serial8250_startup()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The OMAP version of the 8250 can actually use 1:1 serial8250_startup().
However it needs to be extended by a wake up irq which should to be
requested & enabled at ->startup() time and disabled at ->shutdown() time.

v2…v3: properly copy callbacks
v1…v2: add shutdown callback

Acked-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_core.c | 25 +++++++++++++++++++++++--
 include/linux/serial_8250.h         |  2 ++
 include/linux/serial_core.h         |  2 ++
 3 files changed, 27 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c
index 90e0d5e2b7ee..872c020607a8 100644
--- a/drivers/tty/serial/8250/8250_core.c
+++ b/drivers/tty/serial/8250/8250_core.c
@@ -1930,7 +1930,7 @@ static void serial8250_put_poll_char(struct uart_port *port,
 
 #endif /* CONFIG_CONSOLE_POLL */
 
-static int serial8250_startup(struct uart_port *port)
+int serial8250_do_startup(struct uart_port *port)
 {
 	struct uart_8250_port *up = up_to_u8250p(port);
 	unsigned long flags;
@@ -2181,8 +2181,16 @@ dont_test_tx_en:
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(serial8250_do_startup);
 
-static void serial8250_shutdown(struct uart_port *port)
+static int serial8250_startup(struct uart_port *port)
+{
+	if (port->startup)
+		return port->startup(port);
+	return serial8250_do_startup(port);
+}
+
+void serial8250_do_shutdown(struct uart_port *port)
 {
 	struct uart_8250_port *up = up_to_u8250p(port);
 	unsigned long flags;
@@ -2232,6 +2240,15 @@ static void serial8250_shutdown(struct uart_port *port)
 	if (port->irq)
 		serial_unlink_irq_chain(up);
 }
+EXPORT_SYMBOL_GPL(serial8250_do_shutdown);
+
+static void serial8250_shutdown(struct uart_port *port)
+{
+	if (port->shutdown)
+		port->shutdown(port);
+	else
+		serial8250_do_shutdown(port);
+}
 
 static unsigned int serial8250_get_divisor(struct uart_port *port, unsigned int baud)
 {
@@ -3466,6 +3483,10 @@ int serial8250_register_8250_port(struct uart_8250_port *up)
 		/*  Possibly override set_termios call */
 		if (up->port.set_termios)
 			uart->port.set_termios = up->port.set_termios;
+		if (up->port.startup)
+			uart->port.startup = up->port.startup;
+		if (up->port.shutdown)
+			uart->port.shutdown = up->port.shutdown;
 		if (up->port.pm)
 			uart->port.pm = up->port.pm;
 		if (up->port.handle_break)
diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h
index 6dd671765312..6fc9d7bee05e 100644
--- a/include/linux/serial_8250.h
+++ b/include/linux/serial_8250.h
@@ -124,6 +124,8 @@ extern void serial8250_early_out(struct uart_port *port, int offset, int value);
 extern int setup_early_serial8250_console(char *cmdline);
 extern void serial8250_do_set_termios(struct uart_port *port,
 		struct ktermios *termios, struct ktermios *old);
+extern int serial8250_do_startup(struct uart_port *port);
+extern void serial8250_do_shutdown(struct uart_port *port);
 extern void serial8250_do_pm(struct uart_port *port, unsigned int state,
 			     unsigned int oldstate);
 extern int fsl8250_handle_irq(struct uart_port *port);
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 8cb267b1fcd5..3bd7d55eebce 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -122,6 +122,8 @@ struct uart_port {
 	void			(*set_termios)(struct uart_port *,
 				               struct ktermios *new,
 				               struct ktermios *old);
+	int			(*startup)(struct uart_port *port);
+	void			(*shutdown)(struct uart_port *port);
 	int			(*handle_irq)(struct uart_port *);
 	void			(*pm)(struct uart_port *, unsigned int state,
 				      unsigned int old);
-- 
cgit v1.2.3


From 413fffc3a1db7f270afdf1ecb35c1edc013acc68 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Thu, 28 Aug 2014 11:22:23 +0530
Subject: cpufreq: Add support for per-policy driver data

Drivers supporting multiple clusters or multiple 'struct cpufreq_policy'
instances may need to keep per-policy data. If the core doesn't provide support
for that, they might do it in the most unoptimized way: 'per-cpu' data.

This patch adds another field in struct cpufreq_policy: 'driver_data'. It isn't
accessed by core and is for driver's internal use only.

Tested-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/cpufreq.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 7d1955afa62c..138336b6bb04 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -112,6 +112,9 @@ struct cpufreq_policy {
 	spinlock_t		transition_lock;
 	wait_queue_head_t	transition_wait;
 	struct task_struct	*transition_task; /* Task which is doing the transition */
+
+	/* For cpufreq driver's internal use */
+	void			*driver_data;
 };
 
 /* Only for ACPI */
-- 
cgit v1.2.3


From 8ca28610e5e37193cd61fefa4310941e28de10ca Mon Sep 17 00:00:00 2001
From: Hans-Christian Egtvedt <egtvedt@samfundet.no>
Date: Thu, 7 Aug 2014 15:14:06 +0200
Subject: mmc: include linux/types.h for bool definition in atmel-mci.h

This patch adds an include of linux/types.h to make sure bool is defined
before utilized in this header file.

Signed-off-by: Hans-Christian Egtvedt <egtvedt@samfundet.no>
Acked-by: Ludovic Desroches <ludovic.desroches@atmel.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/atmel-mci.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/atmel-mci.h b/include/linux/atmel-mci.h
index 4c7a4b2104bf..91b77f8d495d 100644
--- a/include/linux/atmel-mci.h
+++ b/include/linux/atmel-mci.h
@@ -1,6 +1,8 @@
 #ifndef __LINUX_ATMEL_MCI_H
 #define __LINUX_ATMEL_MCI_H
 
+#include <linux/types.h>
+
 #define ATMCI_MAX_NR_SLOTS	2
 
 /**
-- 
cgit v1.2.3


From b3683994843a0ede0e19daccd1ac32a46b21eb39 Mon Sep 17 00:00:00 2001
From: Yi Sun <yi.y.sun@intel.com>
Date: Wed, 13 Aug 2014 13:34:01 +0800
Subject: mmc: Correct the value of MMC_NUM_PHY_PARTITION

eMMC card can support up to 7 physical partitions, including 2 boot,
1 RPMB and 4 GPs. Change MMC_NUM_PHY_PARTITION from 6 to 7, which is
the correct value.

Signed-off-by: Yi Sun <yi.y.sun@intel.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/mmc/card.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index d424b9de3aff..bde5147a4221 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -214,11 +214,12 @@ enum mmc_blk_status {
 };
 
 /* The number of MMC physical partitions.  These consist of:
- * boot partitions (2), general purpose partitions (4) in MMC v4.4.
+ * boot partitions (2), general purpose partitions (4) and
+ * RPMB partition (1) in MMC v4.4.
  */
 #define MMC_NUM_BOOT_PARTITION	2
 #define MMC_NUM_GP_PARTITION	4
-#define MMC_NUM_PHY_PARTITION	6
+#define MMC_NUM_PHY_PARTITION	7
 #define MAX_MMC_PART_NAME_LEN	20
 
 /*
-- 
cgit v1.2.3


From 3d705d14fe4c72be83bae1610680e209ee226b9d Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Tue, 19 Aug 2014 10:45:51 +0200
Subject: mmc: implement Driver Stage Register handling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some eMMC and SD cards implement a DSR register that allows to tune
raise/fall times and drive strength of the CMD and DATA outputs.
The values to use depend on the card in use and the host.
It might be needed to reduce the drive strength to prevent voltage peaks
above the host's specification.

Implement a 'dsr' devicetree property that allows to specify the value
to set the DSR to. For non-dt setups the new members of mmc_host can be
set by board code.

This patch was initially authored by Sascha Hauer. It contains
improvements authored by Markus Niebel and Uwe Kleine-König.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Markus Niebel <Markus.Niebel@tq-group.com>
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 Documentation/devicetree/bindings/mmc/mmc.txt |  2 ++
 drivers/mmc/core/host.c                       |  8 ++++++++
 drivers/mmc/core/mmc.c                        |  8 ++++++++
 drivers/mmc/core/mmc_ops.c                    | 20 ++++++++++++++++++++
 drivers/mmc/core/mmc_ops.h                    |  1 +
 drivers/mmc/core/sd.c                         |  8 ++++++++
 include/linux/mmc/card.h                      |  3 ++-
 include/linux/mmc/host.h                      |  3 +++
 8 files changed, 52 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/mmc/mmc.txt b/Documentation/devicetree/bindings/mmc/mmc.txt
index 431716e37a39..b52628b18a53 100644
--- a/Documentation/devicetree/bindings/mmc/mmc.txt
+++ b/Documentation/devicetree/bindings/mmc/mmc.txt
@@ -40,6 +40,8 @@ Optional properties:
 - mmc-hs200-1_2v: eMMC HS200 mode(1.2V I/O) is supported
 - mmc-hs400-1_8v: eMMC HS400 mode(1.8V I/O) is supported
 - mmc-hs400-1_2v: eMMC HS400 mode(1.2V I/O) is supported
+- dsr: Value the card's (optional) Driver Stage Register (DSR) should be
+  programmed with. Valid range: [0 .. 0xffff].
 
 *NOTE* on CD and WP polarity. To use common for all SD/MMC host controllers line
 polarity properties, we have to fix the meaning of the "normal" and "inverted"
diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c
index 95cceae96944..d572b2beb65a 100644
--- a/drivers/mmc/core/host.c
+++ b/drivers/mmc/core/host.c
@@ -452,6 +452,14 @@ int mmc_of_parse(struct mmc_host *host)
 	if (of_find_property(np, "mmc-hs400-1_2v", &len))
 		host->caps2 |= MMC_CAP2_HS400_1_2V | MMC_CAP2_HS200_1_2V_SDR;
 
+	host->dsr_req = !of_property_read_u32(np, "dsr", &host->dsr);
+	if (host->dsr_req && (host->dsr & ~0xffff)) {
+		dev_err(host->parent,
+			"device tree specified broken value for DSR: 0x%x, ignoring\n",
+			host->dsr);
+		host->dsr_req = 0;
+	}
+
 	return 0;
 
 out:
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 1eda8dd8c867..31c43165f8bc 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -162,6 +162,7 @@ static int mmc_decode_csd(struct mmc_card *card)
 	csd->read_partial = UNSTUFF_BITS(resp, 79, 1);
 	csd->write_misalign = UNSTUFF_BITS(resp, 78, 1);
 	csd->read_misalign = UNSTUFF_BITS(resp, 77, 1);
+	csd->dsr_imp = UNSTUFF_BITS(resp, 76, 1);
 	csd->r2w_factor = UNSTUFF_BITS(resp, 26, 3);
 	csd->write_blkbits = UNSTUFF_BITS(resp, 22, 4);
 	csd->write_partial = UNSTUFF_BITS(resp, 21, 1);
@@ -1271,6 +1272,13 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 			goto free_card;
 	}
 
+	/*
+	 * handling only for cards supporting DSR and hosts requesting
+	 * DSR configuration
+	 */
+	if (card->csd.dsr_imp && host->dsr_req)
+		mmc_set_dsr(host);
+
 	/*
 	 * Select card, as all following commands rely on that.
 	 */
diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c
index f51b5ba3bbea..ba0275e90617 100644
--- a/drivers/mmc/core/mmc_ops.c
+++ b/drivers/mmc/core/mmc_ops.c
@@ -93,6 +93,26 @@ int mmc_deselect_cards(struct mmc_host *host)
 	return _mmc_select_card(host, NULL);
 }
 
+/*
+ * Write the value specified in the device tree or board code into the optional
+ * 16 bit Driver Stage Register. This can be used to tune raise/fall times and
+ * drive strength of the DAT and CMD outputs. The actual meaning of a given
+ * value is hardware dependant.
+ * The presence of the DSR register can be determined from the CSD register,
+ * bit 76.
+ */
+int mmc_set_dsr(struct mmc_host *host)
+{
+	struct mmc_command cmd = {0};
+
+	cmd.opcode = MMC_SET_DSR;
+
+	cmd.arg = (host->dsr << 16) | 0xffff;
+	cmd.flags = MMC_RSP_NONE | MMC_CMD_AC;
+
+	return mmc_wait_for_cmd(host, &cmd, MMC_CMD_RETRIES);
+}
+
 int mmc_go_idle(struct mmc_host *host)
 {
 	int err;
diff --git a/drivers/mmc/core/mmc_ops.h b/drivers/mmc/core/mmc_ops.h
index 80ae9f4e0293..390dac665b2a 100644
--- a/drivers/mmc/core/mmc_ops.h
+++ b/drivers/mmc/core/mmc_ops.h
@@ -14,6 +14,7 @@
 
 int mmc_select_card(struct mmc_card *card);
 int mmc_deselect_cards(struct mmc_host *host);
+int mmc_set_dsr(struct mmc_host *host);
 int mmc_go_idle(struct mmc_host *host);
 int mmc_send_op_cond(struct mmc_host *host, u32 ocr, u32 *rocr);
 int mmc_all_send_cid(struct mmc_host *host, u32 *cid);
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index 0c44510bf717..25913889cbaa 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -127,6 +127,7 @@ static int mmc_decode_csd(struct mmc_card *card)
 		csd->read_partial = UNSTUFF_BITS(resp, 79, 1);
 		csd->write_misalign = UNSTUFF_BITS(resp, 78, 1);
 		csd->read_misalign = UNSTUFF_BITS(resp, 77, 1);
+		csd->dsr_imp = UNSTUFF_BITS(resp, 76, 1);
 		csd->r2w_factor = UNSTUFF_BITS(resp, 26, 3);
 		csd->write_blkbits = UNSTUFF_BITS(resp, 22, 4);
 		csd->write_partial = UNSTUFF_BITS(resp, 21, 1);
@@ -953,6 +954,13 @@ static int mmc_sd_init_card(struct mmc_host *host, u32 ocr,
 		mmc_decode_cid(card);
 	}
 
+	/*
+	 * handling only for cards supporting DSR and hosts requesting
+	 * DSR configuration
+	 */
+	if (card->csd.dsr_imp && host->dsr_req)
+		mmc_set_dsr(host);
+
 	/*
 	 * Select card, as all following commands rely on that.
 	 */
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index bde5147a4221..0ea795f5feb9 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -42,7 +42,8 @@ struct mmc_csd {
 	unsigned int		read_partial:1,
 				read_misalign:1,
 				write_partial:1,
-				write_misalign:1;
+				write_misalign:1,
+				dsr_imp:1;
 };
 
 struct mmc_ext_csd {
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 7960424d0bc0..4cbf61476999 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -365,6 +365,9 @@ struct mmc_host {
 
 	unsigned int		slotno;	/* used for sdio acpi binding */
 
+	int			dsr_req;	/* DSR value is valid */
+	u32			dsr;	/* optional driver stage (DSR) value */
+
 	unsigned long		private[0] ____cacheline_aligned;
 };
 
-- 
cgit v1.2.3


From 384b2cbd56a02efb16358ed7c0c039e4afca5ed0 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Sun, 24 Aug 2014 19:58:48 -0700
Subject: mmc: tmio: care about DMA tx/rx addr offset

Basically, SD_BUF0 Tx/Rx addresses are same
in normal TMIO controller,
but, it is different on Renesas R-Car SDHI controller
if it uses DMAC
(Rx address needs to add 0x2000 to Tx address)

This patch adds new .dma_rx_offset and cares it

Tested-by: Nguyen Xuan Nui <nx-nui@jinso.co.jp>
Tested-by: Hiep Cao Minh <cm-hiep@jinso.co.jp>
Acked-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Acked-by: Ben Dooks <ben-linux@fluff.org>
Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sh_mobile_sdhi.c | 3 +++
 drivers/mmc/host/tmio_mmc_dma.c   | 2 +-
 include/linux/mfd/tmio.h          | 1 +
 3 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/sh_mobile_sdhi.c b/drivers/mmc/host/sh_mobile_sdhi.c
index 2dafe28c8df9..f0818cd0242c 100644
--- a/drivers/mmc/host/sh_mobile_sdhi.c
+++ b/drivers/mmc/host/sh_mobile_sdhi.c
@@ -39,6 +39,7 @@ struct sh_mobile_sdhi_of_data {
 	unsigned long tmio_flags;
 	unsigned long capabilities;
 	unsigned long capabilities2;
+	dma_addr_t dma_rx_offset;
 };
 
 static const struct sh_mobile_sdhi_of_data sh_mobile_sdhi_of_cfg[] = {
@@ -56,6 +57,7 @@ static const struct sh_mobile_sdhi_of_data of_rcar_gen2_compatible = {
 	.tmio_flags	= TMIO_MMC_HAS_IDLE_WAIT | TMIO_MMC_WRPROTECT_DISABLE,
 	.capabilities	= MMC_CAP_SD_HIGHSPEED | MMC_CAP_SDIO_IRQ,
 	.capabilities2	= MMC_CAP2_NO_MULTI_READ,
+	.dma_rx_offset	= 0x2000,
 };
 
 static const struct of_device_id sh_mobile_sdhi_of_match[] = {
@@ -228,6 +230,7 @@ static int sh_mobile_sdhi_probe(struct platform_device *pdev)
 		mmc_data->flags |= of_data->tmio_flags;
 		mmc_data->capabilities |= of_data->capabilities;
 		mmc_data->capabilities2 |= of_data->capabilities2;
+		dma_priv->dma_rx_offset = of_data->dma_rx_offset;
 	}
 
 	/* SD control register space size is 0x100, 0x200 for bus_shift=1 */
diff --git a/drivers/mmc/host/tmio_mmc_dma.c b/drivers/mmc/host/tmio_mmc_dma.c
index eb8f1d5c34b1..bd646b041085 100644
--- a/drivers/mmc/host/tmio_mmc_dma.c
+++ b/drivers/mmc/host/tmio_mmc_dma.c
@@ -312,7 +312,7 @@ void tmio_mmc_request_dma(struct tmio_mmc_host *host, struct tmio_mmc_data *pdat
 		if (pdata->dma->chan_priv_rx)
 			cfg.slave_id = pdata->dma->slave_id_rx;
 		cfg.direction = DMA_DEV_TO_MEM;
-		cfg.src_addr = cfg.dst_addr;
+		cfg.src_addr = cfg.dst_addr + pdata->dma->dma_rx_offset;
 		cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES;
 		cfg.dst_addr = 0;
 		ret = dmaengine_slave_config(host->chan_rx, &cfg);
diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h
index 8f6f2e91e7ae..777e29b1ad0f 100644
--- a/include/linux/mfd/tmio.h
+++ b/include/linux/mfd/tmio.h
@@ -96,6 +96,7 @@ struct tmio_mmc_dma {
 	int slave_id_tx;
 	int slave_id_rx;
 	int alignment_shift;
+	dma_addr_t dma_rx_offset;
 	bool (*filter)(struct dma_chan *chan, void *arg);
 };
 
-- 
cgit v1.2.3


From b8d11962c2d83c984d5afd091e5b725ad2fd5607 Mon Sep 17 00:00:00 2001
From: Shinobu Uehara <shinobu.uehara.xc@renesas.com>
Date: Sun, 24 Aug 2014 20:00:25 -0700
Subject: mmc: tmio: control multiple block transfer mode

Renesas SDHI has "Multiple Block Transfer Mode" settings
on SD_CMD register which controls CMD12 automatically.

This patch cares it, because
CMD12 is not needed when CMD53 (= SD_IO_RW_EXTENDED)

[Kuninori Morimoto: tidyuped for upstreaming
                    enabled this flags for all SH-Mobile/R-Car]

Tested-by: Nguyen Xuan Nui <nx-nui@jinso.co.jp>
Tested-by: Hiep Cao Minh <cm-hiep@jinso.co.jp>
Signed-off-by: Shinobu Uehara <shinobu.uehara.xc@renesas.com>
Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sh_mobile_sdhi.c |  5 +++++
 drivers/mmc/host/tmio_mmc_pio.c   | 10 ++++++++++
 include/linux/mfd/tmio.h          |  6 ++++++
 3 files changed, 21 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/sh_mobile_sdhi.c b/drivers/mmc/host/sh_mobile_sdhi.c
index f0818cd0242c..4727586b063e 100644
--- a/drivers/mmc/host/sh_mobile_sdhi.c
+++ b/drivers/mmc/host/sh_mobile_sdhi.c
@@ -225,6 +225,11 @@ static int sh_mobile_sdhi_probe(struct platform_device *pdev)
 	 */
 	mmc_data->flags |= TMIO_MMC_SDIO_IRQ;
 
+	/*
+	 * All SDHI have CMD12 controll bit
+	 */
+	mmc_data->flags |= TMIO_MMC_HAVE_CMD12_CTRL;
+
 	if (of_id && of_id->data) {
 		const struct sh_mobile_sdhi_of_data *of_data = of_id->data;
 		mmc_data->flags |= of_data->tmio_flags;
diff --git a/drivers/mmc/host/tmio_mmc_pio.c b/drivers/mmc/host/tmio_mmc_pio.c
index 3d0ad737abea..c2804827be9f 100644
--- a/drivers/mmc/host/tmio_mmc_pio.c
+++ b/drivers/mmc/host/tmio_mmc_pio.c
@@ -44,6 +44,7 @@
 #include <linux/pm_qos.h>
 #include <linux/pm_runtime.h>
 #include <linux/regulator/consumer.h>
+#include <linux/mmc/sdio.h>
 #include <linux/scatterlist.h>
 #include <linux/spinlock.h>
 #include <linux/workqueue.h>
@@ -310,6 +311,7 @@ static void tmio_mmc_done_work(struct work_struct *work)
 #define TRANSFER_READ  0x1000
 #define TRANSFER_MULTI 0x2000
 #define SECURITY_CMD   0x4000
+#define NO_CMD12_ISSUE 0x4000 /* TMIO_MMC_HAVE_CMD12_CTRL */
 
 static int tmio_mmc_start_command(struct tmio_mmc_host *host, struct mmc_command *cmd)
 {
@@ -346,6 +348,14 @@ static int tmio_mmc_start_command(struct tmio_mmc_host *host, struct mmc_command
 		if (data->blocks > 1) {
 			sd_ctrl_write16(host, CTL_STOP_INTERNAL_ACTION, 0x100);
 			c |= TRANSFER_MULTI;
+
+			/*
+			 * Disable auto CMD12 at IO_RW_EXTENDED when
+			 * multiple block transfer
+			 */
+			if ((host->pdata->flags & TMIO_MMC_HAVE_CMD12_CTRL) &&
+			    (cmd->opcode == SD_IO_RW_EXTENDED))
+				c |= NO_CMD12_ISSUE;
 		}
 		if (data->flags & MMC_DATA_READ)
 			c |= TRANSFER_READ;
diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h
index 777e29b1ad0f..7432d95b08e2 100644
--- a/include/linux/mfd/tmio.h
+++ b/include/linux/mfd/tmio.h
@@ -83,6 +83,12 @@
  */
 #define TMIO_MMC_HAVE_HIGH_REG		(1 << 6)
 
+/*
+ * Some controllers have CMD12 automatically
+ * issue/non-issue register
+ */
+#define TMIO_MMC_HAVE_CMD12_CTRL	(1 << 7)
+
 int tmio_core_mmc_enable(void __iomem *cnf, int shift, unsigned long base);
 int tmio_core_mmc_resume(void __iomem *cnf, int shift, unsigned long base);
 void tmio_core_mmc_pwr(void __iomem *cnf, int shift, int state);
-- 
cgit v1.2.3


From 6b98757e53cb0e93b02db4067c14afcb32c90615 Mon Sep 17 00:00:00 2001
From: Shinobu Uehara <shinobu.uehara.xc@renesas.com>
Date: Sun, 24 Aug 2014 20:00:52 -0700
Subject: mmc: tmio: add TMIO_MMC_SDIO_STATUS_QUIRK

Renesas R-Car SDHI should set reserved bits
on CTL_SDIO_STATUS register when writing.
This patch adds new TMIO_MMC_SDIO_STATUS_QUIRK flags
for this purpose

[Kuninori Morimoto: tidyuped for upstreaming
                    enabled this flags for all SH-Mobile/R-Car]

Tested-by: Nguyen Xuan Nui <nx-nui@jinso.co.jp>
Tested-by: Hiep Cao Minh <cm-hiep@jinso.co.jp>
Signed-off-by: Shinobu Uehara <shinobu.uehara.xc@renesas.com>
Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sh_mobile_sdhi.c | 5 +++++
 drivers/mmc/host/tmio_mmc_pio.c   | 7 ++++++-
 include/linux/mfd/tmio.h          | 5 +++++
 3 files changed, 16 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/sh_mobile_sdhi.c b/drivers/mmc/host/sh_mobile_sdhi.c
index 4727586b063e..ebcfc659b799 100644
--- a/drivers/mmc/host/sh_mobile_sdhi.c
+++ b/drivers/mmc/host/sh_mobile_sdhi.c
@@ -230,6 +230,11 @@ static int sh_mobile_sdhi_probe(struct platform_device *pdev)
 	 */
 	mmc_data->flags |= TMIO_MMC_HAVE_CMD12_CTRL;
 
+	/*
+	 * All SDHI need SDIO_INFO1 reserved bit
+	 */
+	mmc_data->flags |= TMIO_MMC_SDIO_STATUS_QUIRK;
+
 	if (of_id && of_id->data) {
 		const struct sh_mobile_sdhi_of_data *of_data = of_id->data;
 		mmc_data->flags |= of_data->tmio_flags;
diff --git a/drivers/mmc/host/tmio_mmc_pio.c b/drivers/mmc/host/tmio_mmc_pio.c
index c2804827be9f..c5ffa92ccb18 100644
--- a/drivers/mmc/host/tmio_mmc_pio.c
+++ b/drivers/mmc/host/tmio_mmc_pio.c
@@ -665,6 +665,7 @@ irqreturn_t tmio_mmc_sdio_irq(int irq, void *devid)
 	struct mmc_host *mmc = host->mmc;
 	struct tmio_mmc_data *pdata = host->pdata;
 	unsigned int ireg, status;
+	unsigned int sdio_status;
 
 	if (!(pdata->flags & TMIO_MMC_SDIO_IRQ))
 		return IRQ_HANDLED;
@@ -672,7 +673,11 @@ irqreturn_t tmio_mmc_sdio_irq(int irq, void *devid)
 	status = sd_ctrl_read16(host, CTL_SDIO_STATUS);
 	ireg = status & TMIO_SDIO_MASK_ALL & ~host->sdcard_irq_mask;
 
-	sd_ctrl_write16(host, CTL_SDIO_STATUS, status & ~TMIO_SDIO_MASK_ALL);
+	sdio_status = status & ~TMIO_SDIO_MASK_ALL;
+	if (pdata->flags & TMIO_MMC_SDIO_STATUS_QUIRK)
+		sdio_status |= 6;
+
+	sd_ctrl_write16(host, CTL_SDIO_STATUS, sdio_status);
 
 	if (mmc->caps & MMC_CAP_SDIO_IRQ && ireg & TMIO_SDIO_STAT_IOIRQ)
 		mmc_signal_sdio_irq(mmc);
diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h
index 7432d95b08e2..a7493ae01b58 100644
--- a/include/linux/mfd/tmio.h
+++ b/include/linux/mfd/tmio.h
@@ -89,6 +89,11 @@
  */
 #define TMIO_MMC_HAVE_CMD12_CTRL	(1 << 7)
 
+/*
+ * Some controllers needs to set 1 on SDIO status reserved bits
+ */
+#define TMIO_MMC_SDIO_STATUS_QUIRK	(1 << 8)
+
 int tmio_core_mmc_enable(void __iomem *cnf, int shift, unsigned long base);
 int tmio_core_mmc_resume(void __iomem *cnf, int shift, unsigned long base);
 void tmio_core_mmc_pwr(void __iomem *cnf, int shift, int state);
-- 
cgit v1.2.3


From e85dd04ea8c8d32ba8eae278959d28df34338e9d Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Sun, 24 Aug 2014 20:01:54 -0700
Subject: mmc: tmio: remove Renesas specific #ifdef

This patch adds new TMIO_MMC_HAVE_CTL_DMA_REG flag,
and remove Renesas specific #ifdef from tmio driver

Tested-by: Nguyen Xuan Nui <nx-nui@jinso.co.jp>
Tested-by: Hiep Cao Minh <cm-hiep@jinso.co.jp>
Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sh_mobile_sdhi.c | 5 +++++
 drivers/mmc/host/tmio_mmc_dma.c   | 6 ++----
 include/linux/mfd/tmio.h          | 5 +++++
 3 files changed, 12 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/sh_mobile_sdhi.c b/drivers/mmc/host/sh_mobile_sdhi.c
index ebcfc659b799..a077da02bb8e 100644
--- a/drivers/mmc/host/sh_mobile_sdhi.c
+++ b/drivers/mmc/host/sh_mobile_sdhi.c
@@ -235,6 +235,11 @@ static int sh_mobile_sdhi_probe(struct platform_device *pdev)
 	 */
 	mmc_data->flags |= TMIO_MMC_SDIO_STATUS_QUIRK;
 
+	/*
+	 * All SDHI have DMA control register
+	 */
+	mmc_data->flags |= TMIO_MMC_HAVE_CTL_DMA_REG;
+
 	if (of_id && of_id->data) {
 		const struct sh_mobile_sdhi_of_data *of_data = of_id->data;
 		mmc_data->flags |= of_data->tmio_flags;
diff --git a/drivers/mmc/host/tmio_mmc_dma.c b/drivers/mmc/host/tmio_mmc_dma.c
index bd646b041085..7d077388b9eb 100644
--- a/drivers/mmc/host/tmio_mmc_dma.c
+++ b/drivers/mmc/host/tmio_mmc_dma.c
@@ -28,10 +28,8 @@ void tmio_mmc_enable_dma(struct tmio_mmc_host *host, bool enable)
 	if (!host->chan_tx || !host->chan_rx)
 		return;
 
-#if defined(CONFIG_SUPERH) || defined(CONFIG_ARCH_SHMOBILE)
-	/* Switch DMA mode on or off - SuperH specific? */
-	sd_ctrl_write16(host, CTL_DMA_ENABLE, enable ? 2 : 0);
-#endif
+	if (host->pdata->flags & TMIO_MMC_HAVE_CTL_DMA_REG)
+		sd_ctrl_write16(host, CTL_DMA_ENABLE, enable ? 2 : 0);
 }
 
 void tmio_mmc_abort_dma(struct tmio_mmc_host *host)
diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h
index a7493ae01b58..adcb0cdb2fdb 100644
--- a/include/linux/mfd/tmio.h
+++ b/include/linux/mfd/tmio.h
@@ -94,6 +94,11 @@
  */
 #define TMIO_MMC_SDIO_STATUS_QUIRK	(1 << 8)
 
+/*
+ * Some controllers have DMA enable/disable register
+ */
+#define TMIO_MMC_HAVE_CTL_DMA_REG	(1 << 9)
+
 int tmio_core_mmc_enable(void __iomem *cnf, int shift, unsigned long base);
 int tmio_core_mmc_resume(void __iomem *cnf, int shift, unsigned long base);
 void tmio_core_mmc_pwr(void __iomem *cnf, int shift, int state);
-- 
cgit v1.2.3


From da29fe2bf573f0ae56fdc2e790387cb73fc8c6f8 Mon Sep 17 00:00:00 2001
From: Shinobu Uehara <shinobu.uehara.xc@renesas.com>
Date: Sun, 24 Aug 2014 20:03:00 -0700
Subject: mmc: tmio: add actual clock support as option

Some controller is supporting actual clock on SD_CLK_CTRL :: DIV[7:0].
Renesas SH-Mobile SDHI doesn't support,
but, Renesas R-Car SDHI supports it.
This patch adds new TMIO_MMC_CLK_ACTUAL flag for it.

[Kuninori Morimoto: tidyuped for upstreaming]

Tested-by: Nguyen Xuan Nui <nx-nui@jinso.co.jp>
Tested-by: Hiep Cao Minh <cm-hiep@jinso.co.jp>
Signed-off-by: Shinobu Uehara <shinobu.uehara.xc@renesas.com>
Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sh_mobile_sdhi.c | 6 ++++--
 drivers/mmc/host/tmio_mmc_pio.c   | 5 +++++
 include/linux/mfd/tmio.h          | 5 +++++
 3 files changed, 14 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/sh_mobile_sdhi.c b/drivers/mmc/host/sh_mobile_sdhi.c
index a077da02bb8e..d5d493719491 100644
--- a/drivers/mmc/host/sh_mobile_sdhi.c
+++ b/drivers/mmc/host/sh_mobile_sdhi.c
@@ -49,12 +49,14 @@ static const struct sh_mobile_sdhi_of_data sh_mobile_sdhi_of_cfg[] = {
 };
 
 static const struct sh_mobile_sdhi_of_data of_rcar_gen1_compatible = {
-	.tmio_flags	= TMIO_MMC_HAS_IDLE_WAIT | TMIO_MMC_WRPROTECT_DISABLE,
+	.tmio_flags	= TMIO_MMC_HAS_IDLE_WAIT | TMIO_MMC_WRPROTECT_DISABLE |
+			  TMIO_MMC_CLK_ACTUAL,
 	.capabilities	= MMC_CAP_SD_HIGHSPEED | MMC_CAP_SDIO_IRQ,
 };
 
 static const struct sh_mobile_sdhi_of_data of_rcar_gen2_compatible = {
-	.tmio_flags	= TMIO_MMC_HAS_IDLE_WAIT | TMIO_MMC_WRPROTECT_DISABLE,
+	.tmio_flags	= TMIO_MMC_HAS_IDLE_WAIT | TMIO_MMC_WRPROTECT_DISABLE |
+			  TMIO_MMC_CLK_ACTUAL,
 	.capabilities	= MMC_CAP_SD_HIGHSPEED | MMC_CAP_SDIO_IRQ,
 	.capabilities2	= MMC_CAP2_NO_MULTI_READ,
 	.dma_rx_offset	= 0x2000,
diff --git a/drivers/mmc/host/tmio_mmc_pio.c b/drivers/mmc/host/tmio_mmc_pio.c
index 7cfe939992de..ba454131f9a8 100644
--- a/drivers/mmc/host/tmio_mmc_pio.c
+++ b/drivers/mmc/host/tmio_mmc_pio.c
@@ -159,6 +159,11 @@ static void tmio_mmc_set_clock(struct tmio_mmc_host *host,
 		for (clock = host->mmc->f_min, clk = 0x80000080;
 			new_clock >= (clock<<1); clk >>= 1)
 			clock <<= 1;
+
+		/* 1/1 clock is option */
+		if ((host->pdata->flags & TMIO_MMC_CLK_ACTUAL) &&
+		    ((clk >> 22) & 0x1))
+			clk |= 0xff;
 	}
 
 	if (host->set_clk_div)
diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h
index adcb0cdb2fdb..90436d523e5e 100644
--- a/include/linux/mfd/tmio.h
+++ b/include/linux/mfd/tmio.h
@@ -99,6 +99,11 @@
  */
 #define TMIO_MMC_HAVE_CTL_DMA_REG	(1 << 9)
 
+/*
+ * Some controllers allows to set SDx actual clock
+ */
+#define TMIO_MMC_CLK_ACTUAL		(1 << 10)
+
 int tmio_core_mmc_enable(void __iomem *cnf, int shift, unsigned long base);
 int tmio_core_mmc_resume(void __iomem *cnf, int shift, unsigned long base);
 void tmio_core_mmc_pwr(void __iomem *cnf, int shift, int state);
-- 
cgit v1.2.3


From 51da2240906cb94e8f6ba55e403b6206df6fb2dd Mon Sep 17 00:00:00 2001
From: Yuvaraj CD <yuvaraj.cd@gmail.com>
Date: Fri, 22 Aug 2014 19:17:50 +0530
Subject: mmc: dw_mmc: use mmc_regulator_get_supply to handle regulators

This patch makes use of mmc_regulator_get_supply() to handle
the vmmc and vqmmc regulators.Also it moves the code handling
the these regulators to dw_mci_set_ios().It turned on the vmmc
and vqmmc during MMC_POWER_UP and MMC_POWER_ON,and turned off
during MMC_POWER_OFF.

Signed-off-by: Yuvaraj Kumar C D <yuvaraj.cd@samsung.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/dw_mmc.c  | 72 ++++++++++++++++++++++------------------------
 include/linux/mmc/dw_mmc.h |  2 +-
 2 files changed, 36 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index 7f227e964265..aadb0d6aa63f 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -936,6 +936,7 @@ static void dw_mci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 	struct dw_mci_slot *slot = mmc_priv(mmc);
 	const struct dw_mci_drv_data *drv_data = slot->host->drv_data;
 	u32 regs;
+	int ret;
 
 	switch (ios->bus_width) {
 	case MMC_BUS_WIDTH_4:
@@ -974,12 +975,38 @@ static void dw_mci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 
 	switch (ios->power_mode) {
 	case MMC_POWER_UP:
+		if (!IS_ERR(mmc->supply.vmmc)) {
+			ret = mmc_regulator_set_ocr(mmc, mmc->supply.vmmc,
+					ios->vdd);
+			if (ret) {
+				dev_err(slot->host->dev,
+					"failed to enable vmmc regulator\n");
+				/*return, if failed turn on vmmc*/
+				return;
+			}
+		}
+		if (!IS_ERR(mmc->supply.vqmmc) && !slot->host->vqmmc_enabled) {
+			ret = regulator_enable(mmc->supply.vqmmc);
+			if (ret < 0)
+				dev_err(slot->host->dev,
+					"failed to enable vqmmc regulator\n");
+			else
+				slot->host->vqmmc_enabled = true;
+		}
 		set_bit(DW_MMC_CARD_NEED_INIT, &slot->flags);
 		regs = mci_readl(slot->host, PWREN);
 		regs |= (1 << slot->id);
 		mci_writel(slot->host, PWREN, regs);
 		break;
 	case MMC_POWER_OFF:
+		if (!IS_ERR(mmc->supply.vmmc))
+			mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, 0);
+
+		if (!IS_ERR(mmc->supply.vqmmc) && slot->host->vqmmc_enabled) {
+			regulator_disable(mmc->supply.vqmmc);
+			slot->host->vqmmc_enabled = false;
+		}
+
 		regs = mci_readl(slot->host, PWREN);
 		regs &= ~(1 << slot->id);
 		mci_writel(slot->host, PWREN, regs);
@@ -2110,7 +2137,13 @@ static int dw_mci_init_slot(struct dw_mci *host, unsigned int id)
 		mmc->f_max = freq[1];
 	}
 
-	mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34;
+	/*if there are external regulators, get them*/
+	ret = mmc_regulator_get_supply(mmc);
+	if (ret == -EPROBE_DEFER)
+		goto err_setup_bus;
+
+	if (!mmc->ocr_avail)
+		mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34;
 
 	if (host->pdata->caps)
 		mmc->caps = host->pdata->caps;
@@ -2176,7 +2209,7 @@ static int dw_mci_init_slot(struct dw_mci *host, unsigned int id)
 
 err_setup_bus:
 	mmc_free_host(mmc);
-	return -EINVAL;
+	return ret;
 }
 
 static void dw_mci_cleanup_slot(struct dw_mci_slot *slot, unsigned int id)
@@ -2469,24 +2502,6 @@ int dw_mci_probe(struct dw_mci *host)
 		}
 	}
 
-	host->vmmc = devm_regulator_get_optional(host->dev, "vmmc");
-	if (IS_ERR(host->vmmc)) {
-		ret = PTR_ERR(host->vmmc);
-		if (ret == -EPROBE_DEFER)
-			goto err_clk_ciu;
-
-		dev_info(host->dev, "no vmmc regulator found: %d\n", ret);
-		host->vmmc = NULL;
-	} else {
-		ret = regulator_enable(host->vmmc);
-		if (ret) {
-			if (ret != -EPROBE_DEFER)
-				dev_err(host->dev,
-					"regulator_enable fail: %d\n", ret);
-			goto err_clk_ciu;
-		}
-	}
-
 	host->quirks = host->pdata->quirks;
 
 	spin_lock_init(&host->lock);
@@ -2630,8 +2645,6 @@ err_workqueue:
 err_dmaunmap:
 	if (host->use_dma && host->dma_ops->exit)
 		host->dma_ops->exit(host);
-	if (host->vmmc)
-		regulator_disable(host->vmmc);
 
 err_clk_ciu:
 	if (!IS_ERR(host->ciu_clk))
@@ -2667,9 +2680,6 @@ void dw_mci_remove(struct dw_mci *host)
 	if (host->use_dma && host->dma_ops->exit)
 		host->dma_ops->exit(host);
 
-	if (host->vmmc)
-		regulator_disable(host->vmmc);
-
 	if (!IS_ERR(host->ciu_clk))
 		clk_disable_unprepare(host->ciu_clk);
 
@@ -2686,9 +2696,6 @@ EXPORT_SYMBOL(dw_mci_remove);
  */
 int dw_mci_suspend(struct dw_mci *host)
 {
-	if (host->vmmc)
-		regulator_disable(host->vmmc);
-
 	return 0;
 }
 EXPORT_SYMBOL(dw_mci_suspend);
@@ -2697,15 +2704,6 @@ int dw_mci_resume(struct dw_mci *host)
 {
 	int i, ret;
 
-	if (host->vmmc) {
-		ret = regulator_enable(host->vmmc);
-		if (ret) {
-			dev_err(host->dev,
-				"failed to enable regulator: %d\n", ret);
-			return ret;
-		}
-	}
-
 	if (!dw_mci_ctrl_reset(host, SDMMC_CTRL_ALL_RESET_FLAGS)) {
 		ret = -ENODEV;
 		return ret;
diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h
index 29ce014ab421..84e2827d0f0b 100644
--- a/include/linux/mmc/dw_mmc.h
+++ b/include/linux/mmc/dw_mmc.h
@@ -188,7 +188,7 @@ struct dw_mci {
 	/* Workaround flags */
 	u32			quirks;
 
-	struct regulator	*vmmc;	/* Power regulator */
+	bool			vqmmc_enabled;
 	unsigned long		irq_flags; /* IRQ flags */
 	int			irq;
 };
-- 
cgit v1.2.3


From 0173055842cd1d9ed3984e70891c22dbf2f29372 Mon Sep 17 00:00:00 2001
From: Doug Anderson <dianders@chromium.org>
Date: Fri, 22 Aug 2014 19:17:51 +0530
Subject: mmc: dw_mmc: Support voltage changes

For UHS cards we need the ability to switch voltages from 3.3V to
1.8V.  Add support to the dw_mmc driver to handle this.  Note that
dw_mmc needs a little bit of extra code since the interface needs a
special bit programmed to the CMD register while CMD11 is progressing.
This means adding a few extra states to the state machine to track.

Signed-off-by: Doug Anderson <dianders@chromium.org>
Signed-off-by: Yuvaraj Kumar C D <yuvaraj.cd@samsung.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/dw_mmc.c  | 137 ++++++++++++++++++++++++++++++++++++++++++---
 drivers/mmc/host/dw_mmc.h  |   5 +-
 include/linux/mmc/dw_mmc.h |   2 +
 3 files changed, 134 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index aadb0d6aa63f..23719249182b 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -29,6 +29,7 @@
 #include <linux/irq.h>
 #include <linux/mmc/host.h>
 #include <linux/mmc/mmc.h>
+#include <linux/mmc/sd.h>
 #include <linux/mmc/sdio.h>
 #include <linux/mmc/dw_mmc.h>
 #include <linux/bitops.h>
@@ -234,10 +235,13 @@ err:
 }
 #endif /* defined(CONFIG_DEBUG_FS) */
 
+static void mci_send_cmd(struct dw_mci_slot *slot, u32 cmd, u32 arg);
+
 static u32 dw_mci_prepare_command(struct mmc_host *mmc, struct mmc_command *cmd)
 {
 	struct mmc_data	*data;
 	struct dw_mci_slot *slot = mmc_priv(mmc);
+	struct dw_mci *host = slot->host;
 	const struct dw_mci_drv_data *drv_data = slot->host->drv_data;
 	u32 cmdr;
 	cmd->error = -EINPROGRESS;
@@ -253,6 +257,34 @@ static u32 dw_mci_prepare_command(struct mmc_host *mmc, struct mmc_command *cmd)
 	else if (cmd->opcode != MMC_SEND_STATUS && cmd->data)
 		cmdr |= SDMMC_CMD_PRV_DAT_WAIT;
 
+	if (cmd->opcode == SD_SWITCH_VOLTAGE) {
+		u32 clk_en_a;
+
+		/* Special bit makes CMD11 not die */
+		cmdr |= SDMMC_CMD_VOLT_SWITCH;
+
+		/* Change state to continue to handle CMD11 weirdness */
+		WARN_ON(slot->host->state != STATE_SENDING_CMD);
+		slot->host->state = STATE_SENDING_CMD11;
+
+		/*
+		 * We need to disable low power mode (automatic clock stop)
+		 * while doing voltage switch so we don't confuse the card,
+		 * since stopping the clock is a specific part of the UHS
+		 * voltage change dance.
+		 *
+		 * Note that low power mode (SDMMC_CLKEN_LOW_PWR) will be
+		 * unconditionally turned back on in dw_mci_setup_bus() if it's
+		 * ever called with a non-zero clock.  That shouldn't happen
+		 * until the voltage change is all done.
+		 */
+		clk_en_a = mci_readl(host, CLKENA);
+		clk_en_a &= ~(SDMMC_CLKEN_LOW_PWR << slot->id);
+		mci_writel(host, CLKENA, clk_en_a);
+		mci_send_cmd(slot, SDMMC_CMD_UPD_CLK |
+			     SDMMC_CMD_PRV_DAT_WAIT, 0);
+	}
+
 	if (cmd->flags & MMC_RSP_PRESENT) {
 		/* We expect a response, so set this bit */
 		cmdr |= SDMMC_CMD_RESP_EXP;
@@ -775,11 +807,15 @@ static void dw_mci_setup_bus(struct dw_mci_slot *slot, bool force_clkinit)
 	unsigned int clock = slot->clock;
 	u32 div;
 	u32 clk_en_a;
+	u32 sdmmc_cmd_bits = SDMMC_CMD_UPD_CLK | SDMMC_CMD_PRV_DAT_WAIT;
+
+	/* We must continue to set bit 28 in CMD until the change is complete */
+	if (host->state == STATE_WAITING_CMD11_DONE)
+		sdmmc_cmd_bits |= SDMMC_CMD_VOLT_SWITCH;
 
 	if (!clock) {
 		mci_writel(host, CLKENA, 0);
-		mci_send_cmd(slot,
-			     SDMMC_CMD_UPD_CLK | SDMMC_CMD_PRV_DAT_WAIT, 0);
+		mci_send_cmd(slot, sdmmc_cmd_bits, 0);
 	} else if (clock != host->current_speed || force_clkinit) {
 		div = host->bus_hz / clock;
 		if (host->bus_hz % clock && host->bus_hz > clock)
@@ -803,15 +839,13 @@ static void dw_mci_setup_bus(struct dw_mci_slot *slot, bool force_clkinit)
 		mci_writel(host, CLKSRC, 0);
 
 		/* inform CIU */
-		mci_send_cmd(slot,
-			     SDMMC_CMD_UPD_CLK | SDMMC_CMD_PRV_DAT_WAIT, 0);
+		mci_send_cmd(slot, sdmmc_cmd_bits, 0);
 
 		/* set clock to desired speed */
 		mci_writel(host, CLKDIV, div);
 
 		/* inform CIU */
-		mci_send_cmd(slot,
-			     SDMMC_CMD_UPD_CLK | SDMMC_CMD_PRV_DAT_WAIT, 0);
+		mci_send_cmd(slot, sdmmc_cmd_bits, 0);
 
 		/* enable clock; only low power if no SDIO */
 		clk_en_a = SDMMC_CLKEN_ENABLE << slot->id;
@@ -820,8 +854,7 @@ static void dw_mci_setup_bus(struct dw_mci_slot *slot, bool force_clkinit)
 		mci_writel(host, CLKENA, clk_en_a);
 
 		/* inform CIU */
-		mci_send_cmd(slot,
-			     SDMMC_CMD_UPD_CLK | SDMMC_CMD_PRV_DAT_WAIT, 0);
+		mci_send_cmd(slot, sdmmc_cmd_bits, 0);
 
 		/* keep the clock with reflecting clock dividor */
 		slot->__clk_old = clock << div;
@@ -897,6 +930,17 @@ static void dw_mci_queue_request(struct dw_mci *host, struct dw_mci_slot *slot,
 
 	slot->mrq = mrq;
 
+	if (host->state == STATE_WAITING_CMD11_DONE) {
+		dev_warn(&slot->mmc->class_dev,
+			 "Voltage change didn't complete\n");
+		/*
+		 * this case isn't expected to happen, so we can
+		 * either crash here or just try to continue on
+		 * in the closest possible state
+		 */
+		host->state = STATE_IDLE;
+	}
+
 	if (host->state == STATE_IDLE) {
 		host->state = STATE_SENDING_CMD;
 		dw_mci_start_request(host, slot);
@@ -973,6 +1017,9 @@ static void dw_mci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 	/* Slot specific timing and width adjustment */
 	dw_mci_setup_bus(slot, false);
 
+	if (slot->host->state == STATE_WAITING_CMD11_DONE && ios->clock != 0)
+		slot->host->state = STATE_IDLE;
+
 	switch (ios->power_mode) {
 	case MMC_POWER_UP:
 		if (!IS_ERR(mmc->supply.vmmc)) {
@@ -1016,6 +1063,59 @@ static void dw_mci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 	}
 }
 
+static int dw_mci_card_busy(struct mmc_host *mmc)
+{
+	struct dw_mci_slot *slot = mmc_priv(mmc);
+	u32 status;
+
+	/*
+	 * Check the busy bit which is low when DAT[3:0]
+	 * (the data lines) are 0000
+	 */
+	status = mci_readl(slot->host, STATUS);
+
+	return !!(status & SDMMC_STATUS_BUSY);
+}
+
+static int dw_mci_switch_voltage(struct mmc_host *mmc, struct mmc_ios *ios)
+{
+	struct dw_mci_slot *slot = mmc_priv(mmc);
+	struct dw_mci *host = slot->host;
+	u32 uhs;
+	u32 v18 = SDMMC_UHS_18V << slot->id;
+	int min_uv, max_uv;
+	int ret;
+
+	/*
+	 * Program the voltage.  Note that some instances of dw_mmc may use
+	 * the UHS_REG for this.  For other instances (like exynos) the UHS_REG
+	 * does no harm but you need to set the regulator directly.  Try both.
+	 */
+	uhs = mci_readl(host, UHS_REG);
+	if (ios->signal_voltage == MMC_SIGNAL_VOLTAGE_330) {
+		min_uv = 2700000;
+		max_uv = 3600000;
+		uhs &= ~v18;
+	} else {
+		min_uv = 1700000;
+		max_uv = 1950000;
+		uhs |= v18;
+	}
+	if (!IS_ERR(mmc->supply.vqmmc)) {
+		ret = regulator_set_voltage(mmc->supply.vqmmc, min_uv, max_uv);
+
+		if (ret) {
+			dev_err(&mmc->class_dev,
+					 "Regulator set error %d: %d - %d\n",
+					 ret, min_uv, max_uv);
+			return ret;
+		}
+	}
+	mci_writel(host, UHS_REG, uhs);
+
+	return 0;
+}
+
 static int dw_mci_get_ro(struct mmc_host *mmc)
 {
 	int read_only;
@@ -1158,6 +1258,9 @@ static const struct mmc_host_ops dw_mci_ops = {
 	.get_cd			= dw_mci_get_cd,
 	.enable_sdio_irq	= dw_mci_enable_sdio_irq,
 	.execute_tuning		= dw_mci_execute_tuning,
+	.card_busy		= dw_mci_card_busy,
+	.start_signal_voltage_switch = dw_mci_switch_voltage,
+
 };
 
 static void dw_mci_request_end(struct dw_mci *host, struct mmc_request *mrq)
@@ -1181,7 +1284,11 @@ static void dw_mci_request_end(struct dw_mci *host, struct mmc_request *mrq)
 		dw_mci_start_request(host, slot);
 	} else {
 		dev_vdbg(host->dev, "list empty\n");
-		host->state = STATE_IDLE;
+
+		if (host->state == STATE_SENDING_CMD11)
+			host->state = STATE_WAITING_CMD11_DONE;
+		else
+			host->state = STATE_IDLE;
 	}
 
 	spin_unlock(&host->lock);
@@ -1292,8 +1399,10 @@ static void dw_mci_tasklet_func(unsigned long priv)
 
 		switch (state) {
 		case STATE_IDLE:
+		case STATE_WAITING_CMD11_DONE:
 			break;
 
+		case STATE_SENDING_CMD11:
 		case STATE_SENDING_CMD:
 			if (!test_and_clear_bit(EVENT_CMD_COMPLETE,
 						&host->pending_events))
@@ -1894,6 +2003,14 @@ static irqreturn_t dw_mci_interrupt(int irq, void *dev_id)
 	}
 
 	if (pending) {
+		/* Check volt switch first, since it can look like an error */
+		if ((host->state == STATE_SENDING_CMD11) &&
+		    (pending & SDMMC_INT_VOLT_SWITCH)) {
+			mci_writel(host, RINTSTS, SDMMC_INT_VOLT_SWITCH);
+			pending &= ~SDMMC_INT_VOLT_SWITCH;
+			dw_mci_cmd_interrupt(host, pending);
+		}
+
 		if (pending & DW_MCI_CMD_ERROR_FLAGS) {
 			mci_writel(host, RINTSTS, DW_MCI_CMD_ERROR_FLAGS);
 			host->cmd_status = pending;
@@ -1999,7 +2116,9 @@ static void dw_mci_work_routine_card(struct work_struct *work)
 
 					switch (host->state) {
 					case STATE_IDLE:
+					case STATE_WAITING_CMD11_DONE:
 						break;
+					case STATE_SENDING_CMD11:
 					case STATE_SENDING_CMD:
 						mrq->cmd->error = -ENOMEDIUM;
 						if (!mrq->data)
diff --git a/drivers/mmc/host/dw_mmc.h b/drivers/mmc/host/dw_mmc.h
index 08fd956d81f3..01b99e8a9190 100644
--- a/drivers/mmc/host/dw_mmc.h
+++ b/drivers/mmc/host/dw_mmc.h
@@ -99,6 +99,7 @@
 #define SDMMC_INT_HLE			BIT(12)
 #define SDMMC_INT_FRUN			BIT(11)
 #define SDMMC_INT_HTO			BIT(10)
+#define SDMMC_INT_VOLT_SWITCH		BIT(10) /* overloads bit 10! */
 #define SDMMC_INT_DRTO			BIT(9)
 #define SDMMC_INT_RTO			BIT(8)
 #define SDMMC_INT_DCRC			BIT(7)
@@ -113,6 +114,7 @@
 /* Command register defines */
 #define SDMMC_CMD_START			BIT(31)
 #define SDMMC_CMD_USE_HOLD_REG	BIT(29)
+#define SDMMC_CMD_VOLT_SWITCH		BIT(28)
 #define SDMMC_CMD_CCS_EXP		BIT(23)
 #define SDMMC_CMD_CEATA_RD		BIT(22)
 #define SDMMC_CMD_UPD_CLK		BIT(21)
@@ -130,6 +132,7 @@
 /* Status register defines */
 #define SDMMC_GET_FCNT(x)		(((x)>>17) & 0x1FFF)
 #define SDMMC_STATUS_DMA_REQ		BIT(31)
+#define SDMMC_STATUS_BUSY		BIT(9)
 /* FIFOTH register defines */
 #define SDMMC_SET_FIFOTH(m, r, t)	(((m) & 0x7) << 28 | \
 					 ((r) & 0xFFF) << 16 | \
@@ -150,7 +153,7 @@
 #define SDMMC_GET_VERID(x)		((x) & 0xFFFF)
 /* Card read threshold */
 #define SDMMC_SET_RD_THLD(v, x)		(((v) & 0x1FFF) << 16 | (x))
-
+#define SDMMC_UHS_18V			BIT(0)
 /* All ctrl reset bits */
 #define SDMMC_CTRL_ALL_RESET_FLAGS \
 	(SDMMC_CTRL_RESET | SDMMC_CTRL_FIFO_RESET | SDMMC_CTRL_DMA_RESET)
diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h
index 84e2827d0f0b..001366927cf4 100644
--- a/include/linux/mmc/dw_mmc.h
+++ b/include/linux/mmc/dw_mmc.h
@@ -26,6 +26,8 @@ enum dw_mci_state {
 	STATE_DATA_BUSY,
 	STATE_SENDING_STOP,
 	STATE_DATA_ERROR,
+	STATE_SENDING_CMD11,
+	STATE_WAITING_CMD11_DONE,
 };
 
 enum {
-- 
cgit v1.2.3


From e99783a45220a2c5f5a598e0e81213ecf2dbcf2f Mon Sep 17 00:00:00 2001
From: Chanho Min <chanho.min@lge.com>
Date: Sat, 30 Aug 2014 12:40:40 +0900
Subject: mmc: sdhci: handle busy-end interrupt during command

It is fully legal for a controller to start handling busy-end interrupt
before it has signaled that the command has completed. So make sure
we do things in the proper order, Or it results that command interrupt
is ignored so it can cause unexpected operations. This is founded at some
toshiba emmc with the bellow warning.

"mmc0: Got command interrupt 0x00000001 even though
no command operation was in progress."

This issue has been also reported by Youssef TRIKI:
It is not specific to Toshiba devices, and happens with eMMC devices
as well as SD card which support Auto-CMD12 rather than CMD23.

Also, similar patch is submitted by:
Gwendal Grignou <gwendal@chromium.org>

Changes since v1:
 Fixed conflict with the next of git.linaro.org/people/ulf.hansson/mmc.git
 and Tested if issue is fixed again.

Signed-off-by: Hankyung Yu <hankyung.yu@lge.com>
Signed-off-by: Chanho Min <chanho.min@lge.com>
Tested-by: Youssef TRIKI <youssef.triki@st.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci.c  | 17 +++++++++++++++--
 include/linux/mmc/sdhci.h |  1 +
 2 files changed, 16 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index b5e22b8c4885..335cdf3b5224 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -1016,6 +1016,7 @@ void sdhci_send_command(struct sdhci_host *host, struct mmc_command *cmd)
 	mod_timer(&host->timer, timeout);
 
 	host->cmd = cmd;
+	host->busy_handle = 0;
 
 	sdhci_prepare_data(host, cmd);
 
@@ -2261,8 +2262,12 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask)
 		if (host->cmd->data)
 			DBG("Cannot wait for busy signal when also "
 				"doing a data transfer");
-		else if (!(host->quirks & SDHCI_QUIRK_NO_BUSY_IRQ))
+		else if (!(host->quirks & SDHCI_QUIRK_NO_BUSY_IRQ)
+				&& !host->busy_handle) {
+			/* Mark that command complete before busy is ended */
+			host->busy_handle = 1;
 			return;
+		}
 
 		/* The controller does not support the end-of-busy IRQ,
 		 * fall through and take the SDHCI_INT_RESPONSE */
@@ -2330,7 +2335,15 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
 				return;
 			}
 			if (intmask & SDHCI_INT_DATA_END) {
-				sdhci_finish_command(host);
+				/*
+				 * Some cards handle busy-end interrupt
+				 * before the command completed, so make
+				 * sure we do things in the proper order.
+				 */
+				if (host->busy_handle)
+					sdhci_finish_command(host);
+				else
+					host->busy_handle = 1;
 				return;
 			}
 		}
diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h
index 09ebe57d5ce9..0aa85ca0c788 100644
--- a/include/linux/mmc/sdhci.h
+++ b/include/linux/mmc/sdhci.h
@@ -146,6 +146,7 @@ struct sdhci_host {
 	struct mmc_command *cmd;	/* Current command */
 	struct mmc_data *data;	/* Current data request */
 	unsigned int data_early:1;	/* Data finished before cmd */
+	unsigned int busy_handle:1;	/* Handling the order of Busy-end */
 
 	struct sg_mapping_iter sg_miter;	/* SG state for PIO */
 	unsigned int blocks;	/* remaining PIO blocks */
-- 
cgit v1.2.3


From 2e47e84245adcb1b3872210678b6146f674fb3ff Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Tue, 2 Sep 2014 19:08:53 -0700
Subject: mmc: Add .multi_io_quirk callback for multi I/O HW bug

Historically, we have been using MMC_CAP* to handle host HW issues and
currently the block layer uses MMC_CAP2_NO_MULTI_READ flag for a multi
I/O HW bug workaround.

There are a few tweaks needed to make MMC_CAP2_NO_MULTI_READ suite all
situations. Therefore let's add an optional host ops callback to enable
host drivers to return the number of blocks it allows per request.

In a future patch and when host drivers have converted to the new
callback, MMC_CAP2_NO_MULTI_READ shall be removed.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/card/block.c | 10 ++++++++++
 include/linux/mmc/host.h |  7 +++++++
 2 files changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index ede41f05c392..adab9038f6f7 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -1402,6 +1402,16 @@ static void mmc_blk_rw_rq_prep(struct mmc_queue_req *mqrq,
 		if (card->host->caps2 & MMC_CAP2_NO_MULTI_READ &&
 		    rq_data_dir(req) == READ)
 			brq->data.blocks = 1;
+
+		/*
+		 * Some controllers have HW issues while operating
+		 * in multiple I/O mode
+		 */
+		if (card->host->ops->multi_io_quirk)
+			brq->data.blocks = card->host->ops->multi_io_quirk(card,
+						(rq_data_dir(req) == READ) ?
+						MMC_DATA_READ : MMC_DATA_WRITE,
+						brq->data.blocks);
 	}
 
 	if (brq->data.blocks > 1 || do_rel_wr) {
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 4cbf61476999..10e2bd6985ae 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -139,6 +139,13 @@ struct mmc_host_ops {
 	int	(*select_drive_strength)(unsigned int max_dtr, int host_drv, int card_drv);
 	void	(*hw_reset)(struct mmc_host *host);
 	void	(*card_event)(struct mmc_host *host);
+
+	/*
+	 * Optional callback to support controllers with HW issues for multiple
+	 * I/O. Returns the number of supported blocks for the request.
+	 */
+	int	(*multi_io_quirk)(struct mmc_card *card,
+				  unsigned int direction, int blk_size);
 };
 
 struct mmc_card;
-- 
cgit v1.2.3


From bbf0208d39121bd8873b032459cb2b5f35e14593 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Mon, 8 Sep 2014 23:45:25 -0700
Subject: mmc: use .multi_io_quirk on tmio_mmc

Now, tmio_mmc can use .multi_io_quirk callback
instead of MMC_CAP2_NO_MULTI_READ flags.
let's use it.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Acked-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/tmio_mmc_pio.c | 13 +++++++++++++
 include/linux/mfd/tmio.h        |  3 +++
 2 files changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/tmio_mmc_pio.c b/drivers/mmc/host/tmio_mmc_pio.c
index ba454131f9a8..ff5ff0f725c9 100644
--- a/drivers/mmc/host/tmio_mmc_pio.c
+++ b/drivers/mmc/host/tmio_mmc_pio.c
@@ -970,12 +970,25 @@ static int tmio_mmc_get_ro(struct mmc_host *mmc)
 	return ret;
 }
 
+static int tmio_multi_io_quirk(struct mmc_card *card,
+			       unsigned int direction, int blk_size)
+{
+	struct tmio_mmc_host *host = mmc_priv(card->host);
+	struct tmio_mmc_data *pdata = host->pdata;
+
+	if (pdata->multi_io_quirk)
+		return pdata->multi_io_quirk(card, direction, blk_size);
+
+	return blk_size;
+}
+
 static const struct mmc_host_ops tmio_mmc_ops = {
 	.request	= tmio_mmc_request,
 	.set_ios	= tmio_mmc_set_ios,
 	.get_ro         = tmio_mmc_get_ro,
 	.get_cd		= mmc_gpio_get_cd,
 	.enable_sdio_irq = tmio_mmc_enable_sdio_irq,
+	.multi_io_quirk	= tmio_multi_io_quirk,
 };
 
 static int tmio_mmc_init_ocr(struct tmio_mmc_host *host)
diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h
index 90436d523e5e..57388171610d 100644
--- a/include/linux/mfd/tmio.h
+++ b/include/linux/mfd/tmio.h
@@ -5,6 +5,7 @@
 #include <linux/fb.h>
 #include <linux/io.h>
 #include <linux/jiffies.h>
+#include <linux/mmc/card.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 
@@ -142,6 +143,8 @@ struct tmio_mmc_data {
 	/* clock management callbacks */
 	int (*clk_enable)(struct platform_device *pdev, unsigned int *f);
 	void (*clk_disable)(struct platform_device *pdev);
+	int (*multi_io_quirk)(struct mmc_card *card,
+			      unsigned int direction, int blk_size);
 };
 
 /*
-- 
cgit v1.2.3


From 0abb71feb228ddbd17e0dfa13216541e036bb549 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Mon, 8 Sep 2014 23:46:49 -0700
Subject: mmc: remove MMC_CAP2_NO_MULTI_READ flags

Now, mmc framework uses multi_io_quirk
for I/O HW bug workaround.
MMC_CAP2_NO_MULTI_READ flag is no longer needed

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/card/block.c | 5 -----
 include/linux/mmc/host.h | 1 -
 2 files changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index adab9038f6f7..413f984cb76d 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -1398,11 +1398,6 @@ static void mmc_blk_rw_rq_prep(struct mmc_queue_req *mqrq,
 		if (disable_multi)
 			brq->data.blocks = 1;
 
-		/* Some controllers can't do multiblock reads due to hw bugs */
-		if (card->host->caps2 & MMC_CAP2_NO_MULTI_READ &&
-		    rq_data_dir(req) == READ)
-			brq->data.blocks = 1;
-
 		/*
 		 * Some controllers have HW issues while operating
 		 * in multiple I/O mode
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 10e2bd6985ae..797ae657dc3d 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -272,7 +272,6 @@ struct mmc_host {
 
 #define MMC_CAP2_BOOTPART_NOACC	(1 << 0)	/* Boot partition no access */
 #define MMC_CAP2_FULL_PWR_CYCLE	(1 << 2)	/* Can do full power cycle */
-#define MMC_CAP2_NO_MULTI_READ	(1 << 3)	/* Multiblock reads don't work */
 #define MMC_CAP2_HS200_1_8V_SDR	(1 << 5)        /* can support */
 #define MMC_CAP2_HS200_1_2V_SDR	(1 << 6)        /* can support */
 #define MMC_CAP2_HS200		(MMC_CAP2_HS200_1_8V_SDR | \
-- 
cgit v1.2.3


From 9d2fa2428ae149ba3a5b7a4ceb0a9e11f1882b3b Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Wed, 27 Aug 2014 13:00:51 +0200
Subject: mmc: slot-gpio: add gpiod variant to get wp GPIO

This makes it possible to get the write protect (read only)
GPIO line from a GPIO descriptor. Written to exactly mirror
the card detect function.

Acked-by: Alexandre Courbot <acourbot@nvidia.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/slot-gpio.c  | 48 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/mmc/slot-gpio.h |  3 +++
 2 files changed, 51 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/slot-gpio.c b/drivers/mmc/core/slot-gpio.c
index 908c2b29e79f..e3fce4493fab 100644
--- a/drivers/mmc/core/slot-gpio.c
+++ b/drivers/mmc/core/slot-gpio.c
@@ -325,6 +325,54 @@ int mmc_gpiod_request_cd(struct mmc_host *host, const char *con_id,
 }
 EXPORT_SYMBOL(mmc_gpiod_request_cd);
 
+/**
+ * mmc_gpiod_request_ro - request a gpio descriptor for write protection
+ * @host: mmc host
+ * @con_id: function within the GPIO consumer
+ * @idx: index of the GPIO to obtain in the consumer
+ * @override_active_level: ignore %GPIO_ACTIVE_LOW flag
+ * @debounce: debounce time in microseconds
+ *
+ * Use this function in place of mmc_gpio_request_ro() to use the GPIO
+ * descriptor API.  Note that it is paired with mmc_gpiod_free_ro() not
+ * mmc_gpio_free_ro().
+ *
+ * Returns zero on success, else an error.
+ */
+int mmc_gpiod_request_ro(struct mmc_host *host, const char *con_id,
+			 unsigned int idx, bool override_active_level,
+			 unsigned int debounce)
+{
+	struct mmc_gpio *ctx;
+	struct gpio_desc *desc;
+	int ret;
+
+	ret = mmc_gpio_alloc(host);
+	if (ret < 0)
+		return ret;
+
+	ctx = host->slot.handler_priv;
+
+	if (!con_id)
+		con_id = ctx->ro_label;
+
+	desc = devm_gpiod_get_index(host->parent, con_id, idx, GPIOD_IN);
+	if (IS_ERR(desc))
+		return PTR_ERR(desc);
+
+	if (debounce) {
+		ret = gpiod_set_debounce(desc, debounce);
+		if (ret < 0)
+			return ret;
+	}
+
+	ctx->override_ro_active_level = override_active_level;
+	ctx->ro_gpio = desc;
+
+	return 0;
+}
+EXPORT_SYMBOL(mmc_gpiod_request_ro);
+
 /**
  * mmc_gpiod_free_cd - free the card-detection gpio descriptor
  * @host: mmc host
diff --git a/include/linux/mmc/slot-gpio.h b/include/linux/mmc/slot-gpio.h
index d2433381e828..a0d0442c15bf 100644
--- a/include/linux/mmc/slot-gpio.h
+++ b/include/linux/mmc/slot-gpio.h
@@ -25,6 +25,9 @@ void mmc_gpio_free_cd(struct mmc_host *host);
 int mmc_gpiod_request_cd(struct mmc_host *host, const char *con_id,
 			 unsigned int idx, bool override_active_level,
 			 unsigned int debounce);
+int mmc_gpiod_request_ro(struct mmc_host *host, const char *con_id,
+			 unsigned int idx, bool override_active_level,
+			 unsigned int debounce);
 void mmc_gpiod_free_cd(struct mmc_host *host);
 void mmc_gpiod_request_cd_irq(struct mmc_host *host);
 
-- 
cgit v1.2.3


From 3034a146820c26fe6da66a45f6340fe87fe0983a Mon Sep 17 00:00:00 2001
From: Dmitry Kasatkin <d.kasatkin@samsung.com>
Date: Fri, 27 Jun 2014 18:15:44 +0300
Subject: ima: pass 'opened' flag to identify newly created files

Empty files and missing xattrs do not guarantee that a file was
just created.  This patch passes FILE_CREATED flag to IMA to
reliably identify new files.

Signed-off-by: Dmitry Kasatkin <d.kasatkin@samsung.com>
Signed-off-by: Mimi Zohar <zohar@linux.vnet.ibm.com>
Cc: <stable@vger.kernel.org>  3.14+
---
 fs/namei.c                            |  2 +-
 fs/nfsd/vfs.c                         |  2 +-
 include/linux/ima.h                   |  4 ++--
 security/integrity/ima/ima.h          |  4 ++--
 security/integrity/ima/ima_appraise.c |  4 ++--
 security/integrity/ima/ima_main.c     | 16 ++++++++--------
 6 files changed, 16 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namei.c b/fs/namei.c
index 985c6f368485..005771f97189 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3058,7 +3058,7 @@ opened:
 	error = open_check_o_direct(file);
 	if (error)
 		goto exit_fput;
-	error = ima_file_check(file, op->acc_mode);
+	error = ima_file_check(file, op->acc_mode, *opened);
 	if (error)
 		goto exit_fput;
 
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 140c496f612c..d49c778faecb 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -709,7 +709,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
 		host_err = PTR_ERR(*filp);
 		*filp = NULL;
 	} else {
-		host_err = ima_file_check(*filp, may_flags);
+		host_err = ima_file_check(*filp, may_flags, 0);
 
 		if (may_flags & NFSD_MAY_64BIT_COOKIE)
 			(*filp)->f_mode |= FMODE_64BITHASH;
diff --git a/include/linux/ima.h b/include/linux/ima.h
index 7cf5e9b32550..120ccc53fcb7 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -15,7 +15,7 @@ struct linux_binprm;
 
 #ifdef CONFIG_IMA
 extern int ima_bprm_check(struct linux_binprm *bprm);
-extern int ima_file_check(struct file *file, int mask);
+extern int ima_file_check(struct file *file, int mask, int opened);
 extern void ima_file_free(struct file *file);
 extern int ima_file_mmap(struct file *file, unsigned long prot);
 extern int ima_module_check(struct file *file);
@@ -27,7 +27,7 @@ static inline int ima_bprm_check(struct linux_binprm *bprm)
 	return 0;
 }
 
-static inline int ima_file_check(struct file *file, int mask)
+static inline int ima_file_check(struct file *file, int mask, int opened)
 {
 	return 0;
 }
diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
index 57da4bd7ba0c..0fb456c20eda 100644
--- a/security/integrity/ima/ima.h
+++ b/security/integrity/ima/ima.h
@@ -177,7 +177,7 @@ void ima_delete_rules(void);
 int ima_appraise_measurement(int func, struct integrity_iint_cache *iint,
 			     struct file *file, const unsigned char *filename,
 			     struct evm_ima_xattr_data *xattr_value,
-			     int xattr_len);
+			     int xattr_len, int opened);
 int ima_must_appraise(struct inode *inode, int mask, enum ima_hooks func);
 void ima_update_xattr(struct integrity_iint_cache *iint, struct file *file);
 enum integrity_status ima_get_cache_status(struct integrity_iint_cache *iint,
@@ -193,7 +193,7 @@ static inline int ima_appraise_measurement(int func,
 					   struct file *file,
 					   const unsigned char *filename,
 					   struct evm_ima_xattr_data *xattr_value,
-					   int xattr_len)
+					   int xattr_len, int opened)
 {
 	return INTEGRITY_UNKNOWN;
 }
diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c
index a4605d677248..225fd944a4ef 100644
--- a/security/integrity/ima/ima_appraise.c
+++ b/security/integrity/ima/ima_appraise.c
@@ -183,7 +183,7 @@ int ima_read_xattr(struct dentry *dentry,
 int ima_appraise_measurement(int func, struct integrity_iint_cache *iint,
 			     struct file *file, const unsigned char *filename,
 			     struct evm_ima_xattr_data *xattr_value,
-			     int xattr_len)
+			     int xattr_len, int opened)
 {
 	static const char op[] = "appraise_data";
 	char *cause = "unknown";
@@ -203,7 +203,7 @@ int ima_appraise_measurement(int func, struct integrity_iint_cache *iint,
 
 		cause = "missing-hash";
 		status = INTEGRITY_NOLABEL;
-		if (inode->i_size == 0) {
+		if (opened & FILE_CREATED) {
 			iint->flags |= IMA_NEW_FILE;
 			status = INTEGRITY_PASS;
 		}
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index 0a2298f90c9c..f82cf9b8e92b 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -157,7 +157,7 @@ void ima_file_free(struct file *file)
 }
 
 static int process_measurement(struct file *file, const char *filename,
-			       int mask, int function)
+			       int mask, int function, int opened)
 {
 	struct inode *inode = file_inode(file);
 	struct integrity_iint_cache *iint;
@@ -226,7 +226,7 @@ static int process_measurement(struct file *file, const char *filename,
 				      xattr_value, xattr_len);
 	if (action & IMA_APPRAISE_SUBMASK)
 		rc = ima_appraise_measurement(_func, iint, file, pathname,
-					      xattr_value, xattr_len);
+					      xattr_value, xattr_len, opened);
 	if (action & IMA_AUDIT)
 		ima_audit_measurement(iint, pathname);
 	kfree(pathbuf);
@@ -255,7 +255,7 @@ out:
 int ima_file_mmap(struct file *file, unsigned long prot)
 {
 	if (file && (prot & PROT_EXEC))
-		return process_measurement(file, NULL, MAY_EXEC, MMAP_CHECK);
+		return process_measurement(file, NULL, MAY_EXEC, MMAP_CHECK, 0);
 	return 0;
 }
 
@@ -277,7 +277,7 @@ int ima_bprm_check(struct linux_binprm *bprm)
 	return process_measurement(bprm->file,
 				   (strcmp(bprm->filename, bprm->interp) == 0) ?
 				   bprm->filename : bprm->interp,
-				   MAY_EXEC, BPRM_CHECK);
+				   MAY_EXEC, BPRM_CHECK, 0);
 }
 
 /**
@@ -290,12 +290,12 @@ int ima_bprm_check(struct linux_binprm *bprm)
  * On success return 0.  On integrity appraisal error, assuming the file
  * is in policy and IMA-appraisal is in enforcing mode, return -EACCES.
  */
-int ima_file_check(struct file *file, int mask)
+int ima_file_check(struct file *file, int mask, int opened)
 {
 	ima_rdwr_violation_check(file);
 	return process_measurement(file, NULL,
 				   mask & (MAY_READ | MAY_WRITE | MAY_EXEC),
-				   FILE_CHECK);
+				   FILE_CHECK, opened);
 }
 EXPORT_SYMBOL_GPL(ima_file_check);
 
@@ -318,7 +318,7 @@ int ima_module_check(struct file *file)
 #endif
 		return 0;	/* We rely on module signature checking */
 	}
-	return process_measurement(file, NULL, MAY_EXEC, MODULE_CHECK);
+	return process_measurement(file, NULL, MAY_EXEC, MODULE_CHECK, 0);
 }
 
 int ima_fw_from_file(struct file *file, char *buf, size_t size)
@@ -329,7 +329,7 @@ int ima_fw_from_file(struct file *file, char *buf, size_t size)
 			return -EACCES;	/* INTEGRITY_UNKNOWN */
 		return 0;
 	}
-	return process_measurement(file, NULL, MAY_EXEC, FIRMWARE_CHECK);
+	return process_measurement(file, NULL, MAY_EXEC, FIRMWARE_CHECK, 0);
 }
 
 static int __init init_ima(void)
-- 
cgit v1.2.3


From ef979a26e3d521d51dbd9950e46a69e303073171 Mon Sep 17 00:00:00 2001
From: Peter Chen <peter.chen@freescale.com>
Date: Tue, 9 Sep 2014 08:56:48 +0800
Subject: usb: gadget: add reset API at usb_gadget_driver

Adding reset API for UDC bus reset handler is useful for below
two issues.

Current disconnect API at usb_gadget_driver is also invoked at
udc's bus reset handler, but the document says it is invoked when
the host is disconnected.

Besides, we may expect the gadget_driver to do different things
for host sends bus reset and host disconnects gadget, eg, we may not
want to flush dirty page for mass storage at bus reset, and want to
do it at disconnection.

Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Peter Chen <peter.chen@freescale.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 include/linux/usb/gadget.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h
index c540557b564b..598a6e9b2850 100644
--- a/include/linux/usb/gadget.h
+++ b/include/linux/usb/gadget.h
@@ -817,6 +817,8 @@ static inline int usb_gadget_disconnect(struct usb_gadget *gadget)
  *	Called in a context that permits sleeping.
  * @suspend: Invoked on USB suspend.  May be called in_interrupt.
  * @resume: Invoked on USB resume.  May be called in_interrupt.
+ * @reset: Invoked on USB bus reset. It is mandatory for all gadget drivers
+ *	and should be called in_interrupt.
  * @driver: Driver model state for this driver.
  *
  * Devices are disabled till a gadget driver successfully bind()s, which
@@ -874,6 +876,7 @@ struct usb_gadget_driver {
 	void			(*disconnect)(struct usb_gadget *);
 	void			(*suspend)(struct usb_gadget *);
 	void			(*resume)(struct usb_gadget *);
+	void			(*reset)(struct usb_gadget *);
 
 	/* FIXME support safe rmmod */
 	struct device_driver	driver;
-- 
cgit v1.2.3


From 02ab695bb37ee9ad515df0d0790d5977505dd04a Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Thu, 4 Sep 2014 22:17:17 -0700
Subject: net: filter: add "load 64-bit immediate" eBPF instruction

add BPF_LD_IMM64 instruction to load 64-bit immediate value into a register.
All previous instructions were 8-byte. This is first 16-byte instruction.
Two consecutive 'struct bpf_insn' blocks are interpreted as single instruction:
insn[0].code = BPF_LD | BPF_DW | BPF_IMM
insn[0].dst_reg = destination register
insn[0].imm = lower 32-bit
insn[1].code = 0
insn[1].imm = upper 32-bit
All unused fields must be zero.

Classic BPF has similar instruction: BPF_LD | BPF_W | BPF_IMM
which loads 32-bit immediate value into a register.

x64 JITs it as single 'movabsq %rax, imm64'
arm64 may JIT as sequence of four 'movk x0, #imm16, lsl #shift' insn

Note that old eBPF programs are binary compatible with new interpreter.

It helps eBPF programs load 64-bit constant into a register with one
instruction instead of using two registers and 4 instructions:
BPF_MOV32_IMM(R1, imm32)
BPF_ALU64_IMM(BPF_LSH, R1, 32)
BPF_MOV32_IMM(R2, imm32)
BPF_ALU64_REG(BPF_OR, R1, R2)

User space generated programs will use this instruction to load constants only.

To tell kernel that user space needs a pointer the _pseudo_ variant of
this instruction may be added later, which will use extra bits of encoding
to indicate what type of pointer user space is asking kernel to provide.
For example 'off' or 'src_reg' fields can be used for such purpose.
src_reg = 1 could mean that user space is asking kernel to validate and
load in-kernel map pointer.
src_reg = 2 could mean that user space needs readonly data section pointer
src_reg = 3 could mean that user space needs a pointer to per-cpu local data
All such future pseudo instructions will not be carrying the actual pointer
as part of the instruction, but rather will be treated as a request to kernel
to provide one. The kernel will verify the request_for_a_pointer, then
will drop _pseudo_ marking and will store actual internal pointer inside
the instruction, so the end result is the interpreter and JITs never
see pseudo BPF_LD_IMM64 insns and only operate on generic BPF_LD_IMM64 that
loads 64-bit immediate into a register. User space never operates on direct
pointers and verifier can easily recognize request_for_pointer vs other
instructions.

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/filter.txt |  8 +++++++-
 arch/x86/net/bpf_jit_comp.c         | 17 +++++++++++++++++
 include/linux/filter.h              | 18 ++++++++++++++++++
 kernel/bpf/core.c                   |  5 +++++
 lib/test_bpf.c                      | 21 +++++++++++++++++++++
 5 files changed, 68 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt
index c48a9704bda8..81916ab5d96f 100644
--- a/Documentation/networking/filter.txt
+++ b/Documentation/networking/filter.txt
@@ -951,7 +951,7 @@ Size modifier is one of ...
 
 Mode modifier is one of:
 
-  BPF_IMM  0x00  /* classic BPF only, reserved in eBPF */
+  BPF_IMM  0x00  /* used for 32-bit mov in classic BPF and 64-bit in eBPF */
   BPF_ABS  0x20
   BPF_IND  0x40
   BPF_MEM  0x60
@@ -995,6 +995,12 @@ BPF_XADD | BPF_DW | BPF_STX: lock xadd *(u64 *)(dst_reg + off16) += src_reg
 Where size is one of: BPF_B or BPF_H or BPF_W or BPF_DW. Note that 1 and
 2 byte atomic increments are not supported.
 
+eBPF has one 16-byte instruction: BPF_LD | BPF_DW | BPF_IMM which consists
+of two consecutive 'struct bpf_insn' 8-byte blocks and interpreted as single
+instruction that loads 64-bit immediate value into a dst_reg.
+Classic BPF has similar instruction: BPF_LD | BPF_W | BPF_IMM which loads
+32-bit immediate value into a register.
+
 Testing
 -------
 
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 39ccfbb4a723..06f8c17f5484 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -393,6 +393,23 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 			EMIT1_off32(add_1reg(0xB8, dst_reg), imm32);
 			break;
 
+		case BPF_LD | BPF_IMM | BPF_DW:
+			if (insn[1].code != 0 || insn[1].src_reg != 0 ||
+			    insn[1].dst_reg != 0 || insn[1].off != 0) {
+				/* verifier must catch invalid insns */
+				pr_err("invalid BPF_LD_IMM64 insn\n");
+				return -EINVAL;
+			}
+
+			/* movabsq %rax, imm64 */
+			EMIT2(add_1mod(0x48, dst_reg), add_1reg(0xB8, dst_reg));
+			EMIT(insn[0].imm, 4);
+			EMIT(insn[1].imm, 4);
+
+			insn++;
+			i++;
+			break;
+
 			/* dst %= src, dst /= src, dst %= imm32, dst /= imm32 */
 		case BPF_ALU | BPF_MOD | BPF_X:
 		case BPF_ALU | BPF_DIV | BPF_X:
diff --git a/include/linux/filter.h b/include/linux/filter.h
index c78994593355..bf323da77950 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -166,6 +166,24 @@ enum {
 		.off   = 0,					\
 		.imm   = IMM })
 
+/* BPF_LD_IMM64 macro encodes single 'load 64-bit immediate' insn */
+#define BPF_LD_IMM64(DST, IMM)					\
+	BPF_LD_IMM64_RAW(DST, 0, IMM)
+
+#define BPF_LD_IMM64_RAW(DST, SRC, IMM)				\
+	((struct bpf_insn) {					\
+		.code  = BPF_LD | BPF_DW | BPF_IMM,		\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
+		.off   = 0,					\
+		.imm   = (__u32) (IMM) }),			\
+	((struct bpf_insn) {					\
+		.code  = 0, /* zero is reserved opcode */	\
+		.dst_reg = 0,					\
+		.src_reg = 0,					\
+		.off   = 0,					\
+		.imm   = ((__u64) (IMM)) >> 32 })
+
 /* Short form of mov based on type, BPF_X: dst_reg = src_reg, BPF_K: dst_reg = imm32 */
 
 #define BPF_MOV64_RAW(TYPE, DST, SRC, IMM)			\
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index b54bb2c2e494..2c2bfaacce66 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -242,6 +242,7 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn)
 		[BPF_LD | BPF_IND | BPF_W] = &&LD_IND_W,
 		[BPF_LD | BPF_IND | BPF_H] = &&LD_IND_H,
 		[BPF_LD | BPF_IND | BPF_B] = &&LD_IND_B,
+		[BPF_LD | BPF_IMM | BPF_DW] = &&LD_IMM_DW,
 	};
 	void *ptr;
 	int off;
@@ -301,6 +302,10 @@ select_insn:
 	ALU64_MOV_K:
 		DST = IMM;
 		CONT;
+	LD_IMM_DW:
+		DST = (u64) (u32) insn[0].imm | ((u64) (u32) insn[1].imm) << 32;
+		insn++;
+		CONT;
 	ALU64_ARSH_X:
 		(*(s64 *) &DST) >>= SRC;
 		CONT;
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index 9a67456ba29a..413890815d3e 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -1735,6 +1735,27 @@ static struct bpf_test tests[] = {
 		{ },
 		{ { 1, 0 } },
 	},
+	{
+		"load 64-bit immediate",
+		.u.insns_int = {
+			BPF_LD_IMM64(R1, 0x567800001234L),
+			BPF_MOV64_REG(R2, R1),
+			BPF_MOV64_REG(R3, R2),
+			BPF_ALU64_IMM(BPF_RSH, R2, 32),
+			BPF_ALU64_IMM(BPF_LSH, R3, 32),
+			BPF_ALU64_IMM(BPF_RSH, R3, 32),
+			BPF_ALU64_IMM(BPF_MOV, R0, 0),
+			BPF_JMP_IMM(BPF_JEQ, R2, 0x5678, 1),
+			BPF_EXIT_INSN(),
+			BPF_JMP_IMM(BPF_JEQ, R3, 0x1234, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_IMM(BPF_MOV, R0, 1),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 1 } }
+	},
 };
 
 static struct net_device dev;
-- 
cgit v1.2.3


From daedfb22451dd02b35c0549566cbb7cc06bdd53b Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Thu, 4 Sep 2014 22:17:18 -0700
Subject: net: filter: split filter.h and expose eBPF to user space

allow user space to generate eBPF programs

uapi/linux/bpf.h: eBPF instruction set definition

linux/filter.h: the rest

This patch only moves macro definitions, but practically it freezes existing
eBPF instruction set, though new instructions can still be added in the future.

These eBPF definitions cannot go into uapi/linux/filter.h, since the names
may conflict with existing applications.

Full eBPF ISA description is in Documentation/networking/filter.txt

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Acked-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h    | 56 +---------------------------------------
 include/uapi/linux/Kbuild |  1 +
 include/uapi/linux/bpf.h  | 65 +++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 67 insertions(+), 55 deletions(-)
 create mode 100644 include/uapi/linux/bpf.h

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index bf323da77950..8f82ef3f1cdd 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -10,58 +10,12 @@
 #include <linux/workqueue.h>
 #include <uapi/linux/filter.h>
 #include <asm/cacheflush.h>
+#include <uapi/linux/bpf.h>
 
 struct sk_buff;
 struct sock;
 struct seccomp_data;
 
-/* Internally used and optimized filter representation with extended
- * instruction set based on top of classic BPF.
- */
-
-/* instruction classes */
-#define BPF_ALU64	0x07	/* alu mode in double word width */
-
-/* ld/ldx fields */
-#define BPF_DW		0x18	/* double word */
-#define BPF_XADD	0xc0	/* exclusive add */
-
-/* alu/jmp fields */
-#define BPF_MOV		0xb0	/* mov reg to reg */
-#define BPF_ARSH	0xc0	/* sign extending arithmetic shift right */
-
-/* change endianness of a register */
-#define BPF_END		0xd0	/* flags for endianness conversion: */
-#define BPF_TO_LE	0x00	/* convert to little-endian */
-#define BPF_TO_BE	0x08	/* convert to big-endian */
-#define BPF_FROM_LE	BPF_TO_LE
-#define BPF_FROM_BE	BPF_TO_BE
-
-#define BPF_JNE		0x50	/* jump != */
-#define BPF_JSGT	0x60	/* SGT is signed '>', GT in x86 */
-#define BPF_JSGE	0x70	/* SGE is signed '>=', GE in x86 */
-#define BPF_CALL	0x80	/* function call */
-#define BPF_EXIT	0x90	/* function return */
-
-/* Register numbers */
-enum {
-	BPF_REG_0 = 0,
-	BPF_REG_1,
-	BPF_REG_2,
-	BPF_REG_3,
-	BPF_REG_4,
-	BPF_REG_5,
-	BPF_REG_6,
-	BPF_REG_7,
-	BPF_REG_8,
-	BPF_REG_9,
-	BPF_REG_10,
-	__MAX_BPF_REG,
-};
-
-/* BPF has 10 general purpose 64-bit registers and stack frame. */
-#define MAX_BPF_REG	__MAX_BPF_REG
-
 /* ArgX, context and stack frame pointer register positions. Note,
  * Arg1, Arg2, Arg3, etc are used as argument mappings of function
  * calls in BPF_CALL instruction.
@@ -322,14 +276,6 @@ enum {
 #define SK_RUN_FILTER(filter, ctx) \
 	(*filter->prog->bpf_func)(ctx, filter->prog->insnsi)
 
-struct bpf_insn {
-	__u8	code;		/* opcode */
-	__u8	dst_reg:4;	/* dest register */
-	__u8	src_reg:4;	/* source register */
-	__s16	off;		/* signed offset */
-	__s32	imm;		/* signed immediate constant */
-};
-
 #ifdef CONFIG_COMPAT
 /* A struct sock_filter is architecture independent. */
 struct compat_sock_fprog {
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 24e9033f8b3f..fb3f7b675229 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -67,6 +67,7 @@ header-y += bfs_fs.h
 header-y += binfmts.h
 header-y += blkpg.h
 header-y += blktrace_api.h
+header-y += bpf.h
 header-y += bpqether.h
 header-y += bsg.h
 header-y += btrfs.h
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
new file mode 100644
index 000000000000..479ed0b6be16
--- /dev/null
+++ b/include/uapi/linux/bpf.h
@@ -0,0 +1,65 @@
+/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#ifndef _UAPI__LINUX_BPF_H__
+#define _UAPI__LINUX_BPF_H__
+
+#include <linux/types.h>
+
+/* Extended instruction set based on top of classic BPF */
+
+/* instruction classes */
+#define BPF_ALU64	0x07	/* alu mode in double word width */
+
+/* ld/ldx fields */
+#define BPF_DW		0x18	/* double word */
+#define BPF_XADD	0xc0	/* exclusive add */
+
+/* alu/jmp fields */
+#define BPF_MOV		0xb0	/* mov reg to reg */
+#define BPF_ARSH	0xc0	/* sign extending arithmetic shift right */
+
+/* change endianness of a register */
+#define BPF_END		0xd0	/* flags for endianness conversion: */
+#define BPF_TO_LE	0x00	/* convert to little-endian */
+#define BPF_TO_BE	0x08	/* convert to big-endian */
+#define BPF_FROM_LE	BPF_TO_LE
+#define BPF_FROM_BE	BPF_TO_BE
+
+#define BPF_JNE		0x50	/* jump != */
+#define BPF_JSGT	0x60	/* SGT is signed '>', GT in x86 */
+#define BPF_JSGE	0x70	/* SGE is signed '>=', GE in x86 */
+#define BPF_CALL	0x80	/* function call */
+#define BPF_EXIT	0x90	/* function return */
+
+/* Register numbers */
+enum {
+	BPF_REG_0 = 0,
+	BPF_REG_1,
+	BPF_REG_2,
+	BPF_REG_3,
+	BPF_REG_4,
+	BPF_REG_5,
+	BPF_REG_6,
+	BPF_REG_7,
+	BPF_REG_8,
+	BPF_REG_9,
+	BPF_REG_10,
+	__MAX_BPF_REG,
+};
+
+/* BPF has 10 general purpose 64-bit registers and stack frame. */
+#define MAX_BPF_REG	__MAX_BPF_REG
+
+struct bpf_insn {
+	__u8	code;		/* opcode */
+	__u8	dst_reg:4;	/* dest register */
+	__u8	src_reg:4;	/* source register */
+	__s16	off;		/* signed offset */
+	__s32	imm;		/* signed immediate constant */
+};
+
+#endif /* _UAPI__LINUX_BPF_H__ */
-- 
cgit v1.2.3


From dc8ecdd3a3fccf73fcb07711cde064ce5727f9d1 Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Mon, 1 Sep 2014 23:11:06 +0200
Subject: bcma: move bus struct setup into early part of host specific code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This change is important for SoC host. In future we will want to know
chip ID (needed for early MIPS boot) before doing cores scanning.

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Acked-by: Hauke Mehrtens <hauke@hauke-m.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/bcma/host_pci.c   | 3 +++
 drivers/bcma/host_soc.c   | 3 +++
 drivers/bcma/main.c       | 2 --
 drivers/bcma/scan.c       | 7 -------
 include/linux/bcma/bcma.h | 1 -
 5 files changed, 6 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/bcma/host_pci.c b/drivers/bcma/host_pci.c
index f032ed6dd459..1e5ac0a79696 100644
--- a/drivers/bcma/host_pci.c
+++ b/drivers/bcma/host_pci.c
@@ -208,6 +208,9 @@ static int bcma_host_pci_probe(struct pci_dev *dev,
 	bus->boardinfo.vendor = bus->host_pci->subsystem_vendor;
 	bus->boardinfo.type = bus->host_pci->subsystem_device;
 
+	/* Initialize struct, detect chip */
+	bcma_init_bus(bus);
+
 	/* Register */
 	err = bcma_bus_register(bus);
 	if (err)
diff --git a/drivers/bcma/host_soc.c b/drivers/bcma/host_soc.c
index 1edd7e064621..379e0d4ebe72 100644
--- a/drivers/bcma/host_soc.c
+++ b/drivers/bcma/host_soc.c
@@ -178,6 +178,9 @@ int __init bcma_host_soc_register(struct bcma_soc *soc)
 	bus->hosttype = BCMA_HOSTTYPE_SOC;
 	bus->ops = &bcma_host_soc_ops;
 
+	/* Initialize struct, detect chip */
+	bcma_init_bus(bus);
+
 	/* Register */
 	err = bcma_bus_early_register(bus, &soc->core_cc, &soc->core_mips);
 	if (err)
diff --git a/drivers/bcma/main.c b/drivers/bcma/main.c
index 0ff8d58831ef..9f6b0cb9a12c 100644
--- a/drivers/bcma/main.c
+++ b/drivers/bcma/main.c
@@ -334,8 +334,6 @@ int __init bcma_bus_early_register(struct bcma_bus *bus,
 	struct bcma_device *core;
 	struct bcma_device_id match;
 
-	bcma_init_bus(bus);
-
 	match.manuf = BCMA_MANUF_BCM;
 	match.id = bcma_cc_core_id(bus);
 	match.class = BCMA_CL_SIM;
diff --git a/drivers/bcma/scan.c b/drivers/bcma/scan.c
index e9bd77249a4c..e2b990303042 100644
--- a/drivers/bcma/scan.c
+++ b/drivers/bcma/scan.c
@@ -438,9 +438,6 @@ void bcma_init_bus(struct bcma_bus *bus)
 	s32 tmp;
 	struct bcma_chipinfo *chipinfo = &(bus->chipinfo);
 
-	if (bus->init_done)
-		return;
-
 	INIT_LIST_HEAD(&bus->cores);
 	bus->nr_cores = 0;
 
@@ -452,8 +449,6 @@ void bcma_init_bus(struct bcma_bus *bus)
 	chipinfo->pkg = (tmp & BCMA_CC_ID_PKG) >> BCMA_CC_ID_PKG_SHIFT;
 	bcma_info(bus, "Found chip with id 0x%04X, rev 0x%02X and package 0x%02X\n",
 		  chipinfo->id, chipinfo->rev, chipinfo->pkg);
-
-	bus->init_done = true;
 }
 
 int bcma_bus_scan(struct bcma_bus *bus)
@@ -463,8 +458,6 @@ int bcma_bus_scan(struct bcma_bus *bus)
 
 	int err, core_num = 0;
 
-	bcma_init_bus(bus);
-
 	erombase = bcma_scan_read32(bus, 0, BCMA_CC_EROM);
 	if (bus->hosttype == BCMA_HOSTTYPE_SOC) {
 		eromptr = ioremap_nocache(erombase, BCMA_CORE_SIZE);
diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h
index 0272e49135d0..c1ba87d1548e 100644
--- a/include/linux/bcma/bcma.h
+++ b/include/linux/bcma/bcma.h
@@ -332,7 +332,6 @@ struct bcma_bus {
 	struct bcma_device *mapped_core;
 	struct list_head cores;
 	u8 nr_cores;
-	u8 init_done:1;
 	u8 num;
 
 	struct bcma_drv_cc drv_cc;
-- 
cgit v1.2.3


From a395135ddebb0a06052b84c309eb6cb68b79c797 Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Mon, 1 Sep 2014 23:11:07 +0200
Subject: bcma: use separated function to initialize bus on SoC
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is required to split SoC bus init into two phases. The later one
(which includes scanning) should be called when kalloc is available.

Cc: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Acked-by: Hauke Mehrtens <hauke@hauke-m.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 arch/mips/bcm47xx/setup.c     |  4 ++++
 drivers/bcma/host_soc.c       | 11 +++++++++--
 include/linux/bcma/bcma_soc.h |  1 +
 3 files changed, 14 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/mips/bcm47xx/setup.c b/arch/mips/bcm47xx/setup.c
index 2b63e7e7d3d3..fff6ed4978c7 100644
--- a/arch/mips/bcm47xx/setup.c
+++ b/arch/mips/bcm47xx/setup.c
@@ -201,6 +201,10 @@ static void __init bcm47xx_register_bcma(void)
 		pr_warn("bcm47xx: someone else already registered a bcma SPROM callback handler (err %d)\n", err);
 
 	err = bcma_host_soc_register(&bcm47xx_bus.bcma);
+	if (err)
+		panic("Failed to register BCMA bus (err %d)", err);
+
+	err = bcma_host_soc_init(&bcm47xx_bus.bcma);
 	if (err)
 		panic("Failed to initialize BCMA bus (err %d)", err);
 
diff --git a/drivers/bcma/host_soc.c b/drivers/bcma/host_soc.c
index 379e0d4ebe72..718e054dd727 100644
--- a/drivers/bcma/host_soc.c
+++ b/drivers/bcma/host_soc.c
@@ -165,7 +165,6 @@ static const struct bcma_host_ops bcma_host_soc_ops = {
 int __init bcma_host_soc_register(struct bcma_soc *soc)
 {
 	struct bcma_bus *bus = &soc->bus;
-	int err;
 
 	/* iomap only first core. We have to read some register on this core
 	 * to scan the bus.
@@ -181,7 +180,15 @@ int __init bcma_host_soc_register(struct bcma_soc *soc)
 	/* Initialize struct, detect chip */
 	bcma_init_bus(bus);
 
-	/* Register */
+	return 0;
+}
+
+int __init bcma_host_soc_init(struct bcma_soc *soc)
+{
+	struct bcma_bus *bus = &soc->bus;
+	int err;
+
+	/* Scan bus and initialize it */
 	err = bcma_bus_early_register(bus, &soc->core_cc, &soc->core_mips);
 	if (err)
 		iounmap(bus->mmio);
diff --git a/include/linux/bcma/bcma_soc.h b/include/linux/bcma/bcma_soc.h
index 4203c5593b9f..f24d245f8394 100644
--- a/include/linux/bcma/bcma_soc.h
+++ b/include/linux/bcma/bcma_soc.h
@@ -10,6 +10,7 @@ struct bcma_soc {
 };
 
 int __init bcma_host_soc_register(struct bcma_soc *soc);
+int __init bcma_host_soc_init(struct bcma_soc *soc);
 
 int bcma_bus_register(struct bcma_bus *bus);
 
-- 
cgit v1.2.3


From 23a2f39c8f4035eade7f226eb7ada30c78d9eee3 Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens <hauke@hauke-m.de>
Date: Mon, 8 Sep 2014 22:53:35 +0200
Subject: bcma: store more alternative addresses

Each core could have more than one alternative address. There are cores
with 8 alternative addresses for different functions. The PHY control
in the Chip common B core is done through the 2. alternative address
and not the first one.

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
CC: linux-usb@vger.kernel.org
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/bcma/scan.c         | 9 +++++----
 drivers/usb/host/bcma-hcd.c | 2 +-
 include/linux/bcma/bcma.h   | 2 +-
 3 files changed, 7 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/bcma/scan.c b/drivers/bcma/scan.c
index e2b990303042..06be7282cbc4 100644
--- a/drivers/bcma/scan.c
+++ b/drivers/bcma/scan.c
@@ -276,7 +276,7 @@ static int bcma_get_next_core(struct bcma_bus *bus, u32 __iomem **eromptr,
 			      struct bcma_device *core)
 {
 	u32 tmp;
-	u8 i, j;
+	u8 i, j, k;
 	s32 cia, cib;
 	u8 ports[2], wrappers[2];
 
@@ -367,6 +367,7 @@ static int bcma_get_next_core(struct bcma_bus *bus, u32 __iomem **eromptr,
 	core->addr = tmp;
 
 	/* get & parse slave ports */
+	k = 0;
 	for (i = 0; i < ports[1]; i++) {
 		for (j = 0; ; j++) {
 			tmp = bcma_erom_get_addr_desc(bus, eromptr,
@@ -376,9 +377,9 @@ static int bcma_get_next_core(struct bcma_bus *bus, u32 __iomem **eromptr,
 				/* pr_debug("erom: slave port %d "
 				 * "has %d descriptors\n", i, j); */
 				break;
-			} else {
-				if (i == 0 && j == 0)
-					core->addr1 = tmp;
+			} else if (k < ARRAY_SIZE(core->addr_s)) {
+				core->addr_s[k] = tmp;
+				k++;
 			}
 		}
 	}
diff --git a/drivers/usb/host/bcma-hcd.c b/drivers/usb/host/bcma-hcd.c
index 205f4a336583..cd6d0afb6b8f 100644
--- a/drivers/usb/host/bcma-hcd.c
+++ b/drivers/usb/host/bcma-hcd.c
@@ -237,7 +237,7 @@ static int bcma_hcd_probe(struct bcma_device *dev)
 	bcma_hcd_init_chip(dev);
 
 	/* In AI chips EHCI is addrspace 0, OHCI is 1 */
-	ohci_addr = dev->addr1;
+	ohci_addr = dev->addr_s[0];
 	if ((chipinfo->id == 0x5357 || chipinfo->id == 0x4749)
 	    && chipinfo->rev == 0)
 		ohci_addr = 0x18009000;
diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h
index c1ba87d1548e..7fc16c991291 100644
--- a/include/linux/bcma/bcma.h
+++ b/include/linux/bcma/bcma.h
@@ -267,7 +267,7 @@ struct bcma_device {
 	u8 core_unit;
 
 	u32 addr;
-	u32 addr1;
+	u32 addr_s[8];
 	u32 wrap;
 
 	void __iomem *io_addr;
-- 
cgit v1.2.3


From 1716bcf3f76fe71e98d4851a3eb73ea3d93d4773 Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens <hauke@hauke-m.de>
Date: Mon, 8 Sep 2014 22:53:36 +0200
Subject: bcma: add support for chipcommon B core

This core is used on BCM4708 to configure the PCIe and USB3 PHYs and it
contains the addresses to the Device Management unit. This will be used
by the PCIe driver first.

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/bcma/Makefile                       |  1 +
 drivers/bcma/bcma_private.h                 |  4 ++
 drivers/bcma/driver_chipcommon_b.c          | 61 +++++++++++++++++++++++++++++
 drivers/bcma/main.c                         | 10 +++++
 drivers/bcma/scan.c                         |  1 +
 include/linux/bcma/bcma.h                   |  1 +
 include/linux/bcma/bcma_driver_chipcommon.h |  8 ++++
 7 files changed, 86 insertions(+)
 create mode 100644 drivers/bcma/driver_chipcommon_b.c

(limited to 'include/linux')

diff --git a/drivers/bcma/Makefile b/drivers/bcma/Makefile
index 91290f7f61b8..838b4b9d352f 100644
--- a/drivers/bcma/Makefile
+++ b/drivers/bcma/Makefile
@@ -1,5 +1,6 @@
 bcma-y					+= main.o scan.o core.o sprom.o
 bcma-y					+= driver_chipcommon.o driver_chipcommon_pmu.o
+bcma-y					+= driver_chipcommon_b.o
 bcma-$(CONFIG_BCMA_SFLASH)		+= driver_chipcommon_sflash.o
 bcma-$(CONFIG_BCMA_NFLASH)		+= driver_chipcommon_nflash.o
 bcma-y					+= driver_pci.o
diff --git a/drivers/bcma/bcma_private.h b/drivers/bcma/bcma_private.h
index 09b632ad0fe2..b40be43c6f31 100644
--- a/drivers/bcma/bcma_private.h
+++ b/drivers/bcma/bcma_private.h
@@ -50,6 +50,10 @@ void bcma_chipco_serial_init(struct bcma_drv_cc *cc);
 extern struct platform_device bcma_pflash_dev;
 #endif /* CONFIG_BCMA_DRIVER_MIPS */
 
+/* driver_chipcommon_b.c */
+int bcma_core_chipcommon_b_init(struct bcma_drv_cc_b *ccb);
+void bcma_core_chipcommon_b_free(struct bcma_drv_cc_b *ccb);
+
 /* driver_chipcommon_pmu.c */
 u32 bcma_pmu_get_alp_clock(struct bcma_drv_cc *cc);
 u32 bcma_pmu_get_cpu_clock(struct bcma_drv_cc *cc);
diff --git a/drivers/bcma/driver_chipcommon_b.c b/drivers/bcma/driver_chipcommon_b.c
new file mode 100644
index 000000000000..c20b5f4ff290
--- /dev/null
+++ b/drivers/bcma/driver_chipcommon_b.c
@@ -0,0 +1,61 @@
+/*
+ * Broadcom specific AMBA
+ * ChipCommon B Unit driver
+ *
+ * Copyright 2014, Hauke Mehrtens <hauke@hauke-m.de>
+ *
+ * Licensed under the GNU/GPL. See COPYING for details.
+ */
+
+#include "bcma_private.h"
+#include <linux/export.h>
+#include <linux/bcma/bcma.h>
+
+static bool bcma_wait_reg(struct bcma_bus *bus, void __iomem *addr, u32 mask,
+			  u32 value, int timeout)
+{
+	unsigned long deadline = jiffies + timeout;
+	u32 val;
+
+	do {
+		val = readl(addr);
+		if ((val & mask) == value)
+			return true;
+		cpu_relax();
+		udelay(10);
+	} while (!time_after_eq(jiffies, deadline));
+
+	bcma_err(bus, "Timeout waiting for register %p\n", addr);
+
+	return false;
+}
+
+void bcma_chipco_b_mii_write(struct bcma_drv_cc_b *ccb, u32 offset, u32 value)
+{
+	struct bcma_bus *bus = ccb->core->bus;
+
+	writel(offset, ccb->mii + 0x00);
+	bcma_wait_reg(bus, ccb->mii + 0x00, 0x0100, 0x0000, 100);
+	writel(value, ccb->mii + 0x04);
+	bcma_wait_reg(bus, ccb->mii + 0x00, 0x0100, 0x0000, 100);
+}
+EXPORT_SYMBOL_GPL(bcma_chipco_b_mii_write);
+
+int bcma_core_chipcommon_b_init(struct bcma_drv_cc_b *ccb)
+{
+	if (ccb->setup_done)
+		return 0;
+
+	ccb->setup_done = 1;
+	ccb->mii = ioremap_nocache(ccb->core->addr_s[1], BCMA_CORE_SIZE);
+	if (!ccb->mii)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void bcma_core_chipcommon_b_free(struct bcma_drv_cc_b *ccb)
+{
+	if (ccb->mii)
+		iounmap(ccb->mii);
+}
diff --git a/drivers/bcma/main.c b/drivers/bcma/main.c
index 297a2d46985a..c421403cab43 100644
--- a/drivers/bcma/main.c
+++ b/drivers/bcma/main.c
@@ -173,6 +173,7 @@ static int bcma_register_devices(struct bcma_bus *bus)
 		switch (core->id.id) {
 		case BCMA_CORE_4706_CHIPCOMMON:
 		case BCMA_CORE_CHIPCOMMON:
+		case BCMA_CORE_NS_CHIPCOMMON_B:
 		case BCMA_CORE_PCI:
 		case BCMA_CORE_PCIE:
 		case BCMA_CORE_PCIE2:
@@ -287,6 +288,13 @@ int bcma_bus_register(struct bcma_bus *bus)
 		bcma_core_chipcommon_init(&bus->drv_cc);
 	}
 
+	/* Init CC core */
+	core = bcma_find_core(bus, BCMA_CORE_NS_CHIPCOMMON_B);
+	if (core) {
+		bus->drv_cc_b.core = core;
+		bcma_core_chipcommon_b_init(&bus->drv_cc_b);
+	}
+
 	/* Init MIPS core */
 	core = bcma_find_core(bus, BCMA_CORE_MIPS_74K);
 	if (core) {
@@ -341,6 +349,8 @@ void bcma_bus_unregister(struct bcma_bus *bus)
 	else if (err)
 		bcma_err(bus, "Can not unregister GPIO driver: %i\n", err);
 
+	bcma_core_chipcommon_b_free(&bus->drv_cc_b);
+
 	cores[0] = bcma_find_core(bus, BCMA_CORE_MIPS_74K);
 	cores[1] = bcma_find_core(bus, BCMA_CORE_PCIE);
 	cores[2] = bcma_find_core(bus, BCMA_CORE_4706_MAC_GBIT_COMMON);
diff --git a/drivers/bcma/scan.c b/drivers/bcma/scan.c
index 06be7282cbc4..b3a403c136fb 100644
--- a/drivers/bcma/scan.c
+++ b/drivers/bcma/scan.c
@@ -314,6 +314,7 @@ static int bcma_get_next_core(struct bcma_bus *bus, u32 __iomem **eromptr,
 		/* Some specific cores don't need wrappers */
 		switch (core->id.id) {
 		case BCMA_CORE_4706_MAC_GBIT_COMMON:
+		case BCMA_CORE_NS_CHIPCOMMON_B:
 		/* Not used yet: case BCMA_CORE_OOB_ROUTER: */
 			break;
 		default:
diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h
index 7fc16c991291..634597917670 100644
--- a/include/linux/bcma/bcma.h
+++ b/include/linux/bcma/bcma.h
@@ -335,6 +335,7 @@ struct bcma_bus {
 	u8 num;
 
 	struct bcma_drv_cc drv_cc;
+	struct bcma_drv_cc_b drv_cc_b;
 	struct bcma_drv_pci drv_pci[2];
 	struct bcma_drv_pcie2 drv_pcie2;
 	struct bcma_drv_mips drv_mips;
diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h
index 63d105cd14a3..db6fa217f98b 100644
--- a/include/linux/bcma/bcma_driver_chipcommon.h
+++ b/include/linux/bcma/bcma_driver_chipcommon.h
@@ -644,6 +644,12 @@ struct bcma_drv_cc {
 #endif
 };
 
+struct bcma_drv_cc_b {
+	struct bcma_device *core;
+	u8 setup_done:1;
+	void __iomem *mii;
+};
+
 /* Register access */
 #define bcma_cc_read32(cc, offset) \
 	bcma_read32((cc)->core, offset)
@@ -699,4 +705,6 @@ extern void bcma_pmu_spuravoid_pllupdate(struct bcma_drv_cc *cc, int spuravoid);
 
 extern u32 bcma_pmu_get_bus_clock(struct bcma_drv_cc *cc);
 
+void bcma_chipco_b_mii_write(struct bcma_drv_cc_b *ccb, u32 offset, u32 value);
+
 #endif /* LINUX_BCMA_DRIVER_CC_H_ */
-- 
cgit v1.2.3


From d0449b90f80f263e17e8b3ce31442e45121dc46c Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Fri, 22 Aug 2014 10:18:42 -0400
Subject: locks: Remove unused conf argument from lm_grant

This argument is always NULL so don't pass it around.

[jlayton: remove dependencies on previous patches in series]

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Jeff Layton <jlayton@primarydata.com>
---
 fs/dlm/plock.c     |  8 ++++----
 fs/lockd/svclock.c | 12 +++---------
 include/linux/fs.h |  2 +-
 3 files changed, 8 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c
index f704458ea5f5..e0ab3a93eeff 100644
--- a/fs/dlm/plock.c
+++ b/fs/dlm/plock.c
@@ -30,7 +30,7 @@ struct plock_op {
 
 struct plock_xop {
 	struct plock_op xop;
-	void *callback;
+	int (*callback)(struct file_lock *fl, int result);
 	void *fl;
 	void *file;
 	struct file_lock flc;
@@ -190,7 +190,7 @@ static int dlm_plock_callback(struct plock_op *op)
 	struct file *file;
 	struct file_lock *fl;
 	struct file_lock *flc;
-	int (*notify)(void *, void *, int) = NULL;
+	int (*notify)(struct file_lock *fl, int result) = NULL;
 	struct plock_xop *xop = (struct plock_xop *)op;
 	int rv = 0;
 
@@ -209,7 +209,7 @@ static int dlm_plock_callback(struct plock_op *op)
 	notify = xop->callback;
 
 	if (op->info.rv) {
-		notify(fl, NULL, op->info.rv);
+		notify(fl, op->info.rv);
 		goto out;
 	}
 
@@ -228,7 +228,7 @@ static int dlm_plock_callback(struct plock_op *op)
 			  (unsigned long long)op->info.number, file, fl);
 	}
 
-	rv = notify(fl, NULL, 0);
+	rv = notify(fl, 0);
 	if (rv) {
 		/* XXX: We need to cancel the fs lock here: */
 		log_print("dlm_plock_callback: lock granted after lock request "
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index ab798a88ec1d..2a6170133c1d 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -667,22 +667,16 @@ nlmsvc_cancel_blocked(struct net *net, struct nlm_file *file, struct nlm_lock *l
  * deferred rpc for GETLK and SETLK.
  */
 static void
-nlmsvc_update_deferred_block(struct nlm_block *block, struct file_lock *conf,
-			     int result)
+nlmsvc_update_deferred_block(struct nlm_block *block, int result)
 {
 	block->b_flags |= B_GOT_CALLBACK;
 	if (result == 0)
 		block->b_granted = 1;
 	else
 		block->b_flags |= B_TIMED_OUT;
-	if (conf) {
-		if (block->b_fl)
-			__locks_copy_lock(block->b_fl, conf);
-	}
 }
 
-static int nlmsvc_grant_deferred(struct file_lock *fl, struct file_lock *conf,
-					int result)
+static int nlmsvc_grant_deferred(struct file_lock *fl, int result)
 {
 	struct nlm_block *block;
 	int rc = -ENOENT;
@@ -697,7 +691,7 @@ static int nlmsvc_grant_deferred(struct file_lock *fl, struct file_lock *conf,
 					rc = -ENOLCK;
 					break;
 				}
-				nlmsvc_update_deferred_block(block, conf, result);
+				nlmsvc_update_deferred_block(block, result);
 			} else if (result == 0)
 				block->b_granted = 1;
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 94187721ad41..908af4f81680 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -869,7 +869,7 @@ struct lock_manager_operations {
 	int (*lm_compare_owner)(struct file_lock *, struct file_lock *);
 	unsigned long (*lm_owner_key)(struct file_lock *);
 	void (*lm_notify)(struct file_lock *);	/* unblock callback */
-	int (*lm_grant)(struct file_lock *, struct file_lock *, int);
+	int (*lm_grant)(struct file_lock *, int);
 	void (*lm_break)(struct file_lock *);
 	int (*lm_change)(struct file_lock **, int);
 };
-- 
cgit v1.2.3


From 3fe0fff18fe87c6a2179837de68d1174903c6367 Mon Sep 17 00:00:00 2001
From: Kinglong Mee <kinglongmee@gmail.com>
Date: Fri, 22 Aug 2014 10:18:42 -0400
Subject: locks: Rename __locks_copy_lock() to locks_copy_conflock()

Jeff advice, " Right now __locks_copy_lock is only used to copy
conflocks. It would be good to rename that to something more
distinct (i.e.locks_copy_conflock), to make it clear that we're
generating a conflock there."

v5: change order from 3/6 to 2/6
v4: new patch only renaming function name

Signed-off-by: Kinglong Mee <kinglongmee@gmail.com>
Signed-off-by: Jeff Layton <jlayton@primarydata.com>
---
 fs/locks.c         | 10 +++++-----
 include/linux/fs.h |  4 ++--
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/fs/locks.c b/fs/locks.c
index bb08857f90b5..ec9becd02d3d 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -281,7 +281,7 @@ static void locks_copy_private(struct file_lock *new, struct file_lock *fl)
 /*
  * Initialize a new lock from an existing file_lock structure.
  */
-void __locks_copy_lock(struct file_lock *new, const struct file_lock *fl)
+void locks_copy_conflock(struct file_lock *new, struct file_lock *fl)
 {
 	new->fl_owner = fl->fl_owner;
 	new->fl_pid = fl->fl_pid;
@@ -293,14 +293,14 @@ void __locks_copy_lock(struct file_lock *new, const struct file_lock *fl)
 	new->fl_ops = NULL;
 	new->fl_lmops = NULL;
 }
-EXPORT_SYMBOL(__locks_copy_lock);
+EXPORT_SYMBOL(locks_copy_conflock);
 
 void locks_copy_lock(struct file_lock *new, struct file_lock *fl)
 {
 	/* "new" must be a freshly-initialized lock */
 	WARN_ON_ONCE(new->fl_ops);
 
-	__locks_copy_lock(new, fl);
+	locks_copy_conflock(new, fl);
 	new->fl_file = fl->fl_file;
 	new->fl_ops = fl->fl_ops;
 	new->fl_lmops = fl->fl_lmops;
@@ -735,7 +735,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl)
 			break;
 	}
 	if (cfl) {
-		__locks_copy_lock(fl, cfl);
+		locks_copy_conflock(fl, cfl);
 		if (cfl->fl_nspid)
 			fl->fl_pid = pid_vnr(cfl->fl_nspid);
 	} else
@@ -941,7 +941,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
 			if (!posix_locks_conflict(request, fl))
 				continue;
 			if (conflock)
-				__locks_copy_lock(conflock, fl);
+				locks_copy_conflock(conflock, fl);
 			error = -EAGAIN;
 			if (!(request->fl_flags & FL_SLEEP))
 				goto out;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 908af4f81680..5ab86f44b697 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -966,7 +966,7 @@ void locks_free_lock(struct file_lock *fl);
 extern void locks_init_lock(struct file_lock *);
 extern struct file_lock * locks_alloc_lock(void);
 extern void locks_copy_lock(struct file_lock *, struct file_lock *);
-extern void __locks_copy_lock(struct file_lock *, const struct file_lock *);
+extern void locks_copy_conflock(struct file_lock *, struct file_lock *);
 extern void locks_remove_posix(struct file *, fl_owner_t);
 extern void locks_remove_file(struct file *);
 extern void locks_release_private(struct file_lock *);
@@ -1026,7 +1026,7 @@ static inline void locks_init_lock(struct file_lock *fl)
 	return;
 }
 
-static inline void __locks_copy_lock(struct file_lock *new, struct file_lock *fl)
+static inline void locks_copy_conflock(struct file_lock *new, struct file_lock *fl)
 {
 	return;
 }
-- 
cgit v1.2.3


From 5c97d7b1479982a48cf2129062b880c2555049ac Mon Sep 17 00:00:00 2001
From: Kinglong Mee <kinglongmee@gmail.com>
Date: Fri, 22 Aug 2014 10:18:43 -0400
Subject: locks: New ops in lock_manager_operations for get/put owner

NFSD or other lockmanager may increase the owner's reference,
so adds two new options for copying and releasing owner.

v5: change order from 2/6 to 3/6
v4: rename lm_copy_owner/lm_release_owner to lm_get_owner/lm_put_owner

Reviewed-by: Jeff Layton <jlayton@primarydata.com>
Signed-off-by: Kinglong Mee <kinglongmee@gmail.com>
Signed-off-by: Jeff Layton <jlayton@primarydata.com>
---
 fs/locks.c         | 12 ++++++++++--
 include/linux/fs.h |  2 ++
 2 files changed, 12 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/locks.c b/fs/locks.c
index ec9becd02d3d..5e83f3a99377 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -230,8 +230,12 @@ void locks_release_private(struct file_lock *fl)
 			fl->fl_ops->fl_release_private(fl);
 		fl->fl_ops = NULL;
 	}
-	fl->fl_lmops = NULL;
 
+	if (fl->fl_lmops) {
+		if (fl->fl_lmops->lm_put_owner)
+			fl->fl_lmops->lm_put_owner(fl);
+		fl->fl_lmops = NULL;
+	}
 }
 EXPORT_SYMBOL_GPL(locks_release_private);
 
@@ -274,8 +278,12 @@ static void locks_copy_private(struct file_lock *new, struct file_lock *fl)
 			fl->fl_ops->fl_copy_lock(new, fl);
 		new->fl_ops = fl->fl_ops;
 	}
-	if (fl->fl_lmops)
+
+	if (fl->fl_lmops) {
+		if (fl->fl_lmops->lm_get_owner)
+			fl->fl_lmops->lm_get_owner(new, fl);
 		new->fl_lmops = fl->fl_lmops;
+	}
 }
 
 /*
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5ab86f44b697..3b07ce2698de 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -868,6 +868,8 @@ struct file_lock_operations {
 struct lock_manager_operations {
 	int (*lm_compare_owner)(struct file_lock *, struct file_lock *);
 	unsigned long (*lm_owner_key)(struct file_lock *);
+	void (*lm_get_owner)(struct file_lock *, struct file_lock *);
+	void (*lm_put_owner)(struct file_lock *);
 	void (*lm_notify)(struct file_lock *);	/* unblock callback */
 	int (*lm_grant)(struct file_lock *, int);
 	void (*lm_break)(struct file_lock *);
-- 
cgit v1.2.3


From 09802fd2a8caea2a2147fca8d7975697c5de573d Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@primarydata.com>
Date: Fri, 22 Aug 2014 10:18:44 -0400
Subject: lockd: rip out deferred lock handling from testlock codepath

As Kinglong points out, the nlm_block->b_fl field is no longer used at
all. Also, vfs_test_lock in the generic locking code will only return
FILE_LOCK_DEFERRED if FL_SLEEP is set, and it isn't here.

The only other place that returns that value is the DLM lock code, but
it only does that in dlm_posix_lock, never in dlm_posix_get.

Remove all of the deferred locking code from the testlock codepath
since it doesn't appear to ever be used anyway.

I do have a small concern that this might cause a behavior change in the
case where you have a block already sitting on the list when the
testlock request comes in, but that looks like it doesn't really work
properly anyway. I think it's best to just pass that down to
vfs_test_lock and let the filesystem report that instead of trying to
infer what's going on with the lock by looking at an existing block.

Cc: cluster-devel@redhat.com
Signed-off-by: Jeff Layton <jlayton@primarydata.com>
Reviewed-by: Kinglong Mee <kinglongmee@gmail.com>
---
 fs/lockd/svclock.c          | 55 +++++----------------------------------------
 include/linux/lockd/lockd.h |  1 -
 2 files changed, 6 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index acfa94d5b489..13db95f54176 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -245,7 +245,6 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_host *host,
 	block->b_daemon = rqstp->rq_server;
 	block->b_host   = host;
 	block->b_file   = file;
-	block->b_fl = NULL;
 	file->f_count++;
 
 	/* Add to file's list of blocks */
@@ -295,7 +294,6 @@ static void nlmsvc_free_block(struct kref *kref)
 	nlmsvc_freegrantargs(block->b_call);
 	nlmsvc_release_call(block->b_call);
 	nlm_release_file(block->b_file);
-	kfree(block->b_fl);
 	kfree(block);
 }
 
@@ -508,7 +506,6 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
 		struct nlm_host *host, struct nlm_lock *lock,
 		struct nlm_lock *conflock, struct nlm_cookie *cookie)
 {
-	struct nlm_block 	*block = NULL;
 	int			error;
 	__be32			ret;
 
@@ -519,63 +516,26 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
 				(long long)lock->fl.fl_start,
 				(long long)lock->fl.fl_end);
 
-	/* Get existing block (in case client is busy-waiting) */
-	block = nlmsvc_lookup_block(file, lock);
-
-	if (block == NULL) {
-		struct file_lock *conf = kzalloc(sizeof(*conf), GFP_KERNEL);
-
-		if (conf == NULL)
-			return nlm_granted;
-		block = nlmsvc_create_block(rqstp, host, file, lock, cookie);
-		if (block == NULL) {
-			kfree(conf);
-			return nlm_granted;
-		}
-		block->b_fl = conf;
-	}
-	if (block->b_flags & B_QUEUED) {
-		dprintk("lockd: nlmsvc_testlock deferred block %p flags %d fl %p\n",
-			block, block->b_flags, block->b_fl);
-		if (block->b_flags & B_TIMED_OUT) {
-			nlmsvc_unlink_block(block);
-			ret = nlm_lck_denied;
-			goto out;
-		}
-		if (block->b_flags & B_GOT_CALLBACK) {
-			nlmsvc_unlink_block(block);
-			if (block->b_fl != NULL
-					&& block->b_fl->fl_type != F_UNLCK) {
-				lock->fl = *block->b_fl;
-				goto conf_lock;
-			} else {
-				ret = nlm_granted;
-				goto out;
-			}
-		}
-		ret = nlm_drop_reply;
-		goto out;
-	}
-
 	if (locks_in_grace(SVC_NET(rqstp))) {
 		ret = nlm_lck_denied_grace_period;
 		goto out;
 	}
+
 	error = vfs_test_lock(file->f_file, &lock->fl);
-	if (error == FILE_LOCK_DEFERRED) {
-		ret = nlmsvc_defer_lock_rqst(rqstp, block);
-		goto out;
-	}
 	if (error) {
+		/* We can't currently deal with deferred test requests */
+		if (error == FILE_LOCK_DEFERRED)
+			WARN_ON_ONCE(1);
+
 		ret = nlm_lck_denied_nolocks;
 		goto out;
 	}
+
 	if (lock->fl.fl_type == F_UNLCK) {
 		ret = nlm_granted;
 		goto out;
 	}
 
-conf_lock:
 	dprintk("lockd: conflicting lock(ty=%d, %Ld-%Ld)\n",
 		lock->fl.fl_type, (long long)lock->fl.fl_start,
 		(long long)lock->fl.fl_end);
@@ -589,8 +549,6 @@ conf_lock:
 	locks_release_private(&lock->fl);
 	ret = nlm_lck_denied;
 out:
-	if (block)
-		nlmsvc_release_block(block);
 	return ret;
 }
 
@@ -661,7 +619,6 @@ nlmsvc_cancel_blocked(struct net *net, struct nlm_file *file, struct nlm_lock *l
  * This is a callback from the filesystem for VFS file lock requests.
  * It will be used if lm_grant is defined and the filesystem can not
  * respond to the request immediately.
- * For GETLK request it will copy the reply to the nlm_block.
  * For SETLK or SETLKW request it will get the local posix lock.
  * In all cases it will move the block to the head of nlm_blocked q where
  * nlmsvc_retry_blocked() can send back a reply for SETLKW or revisit the
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 219d79627c05..ff82a32871b5 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -178,7 +178,6 @@ struct nlm_block {
 	unsigned char		b_granted;	/* VFS granted lock */
 	struct nlm_file *	b_file;		/* file in question */
 	struct cache_req *	b_cache_req;	/* deferred request handling */
-	struct file_lock *	b_fl;		/* set for GETLK */
 	struct cache_deferred_req * b_deferred_req;
 	unsigned int		b_flags;	/* block flags */
 #define B_QUEUED		1	/* lock queued */
-- 
cgit v1.2.3


From 699688a416524c3cea9eafaca69fc6c06c13c02e Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@primarydata.com>
Date: Fri, 22 Aug 2014 10:18:44 -0400
Subject: locks: remove lock_may_read and lock_may_write

There are no callers of these functions.

Signed-off-by: Jeff Layton <jlayton@primarydata.com>
---
 fs/locks.c         | 80 ------------------------------------------------------
 include/linux/fs.h | 14 ----------
 2 files changed, 94 deletions(-)

(limited to 'include/linux')

diff --git a/fs/locks.c b/fs/locks.c
index c3991dc80137..5200ffd2ba9b 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -2597,86 +2597,6 @@ static int __init proc_locks_init(void)
 module_init(proc_locks_init);
 #endif
 
-/**
- *	lock_may_read - checks that the region is free of locks
- *	@inode: the inode that is being read
- *	@start: the first byte to read
- *	@len: the number of bytes to read
- *
- *	Emulates Windows locking requirements.  Whole-file
- *	mandatory locks (share modes) can prohibit a read and
- *	byte-range POSIX locks can prohibit a read if they overlap.
- *
- *	N.B. this function is only ever called
- *	from knfsd and ownership of locks is never checked.
- */
-int lock_may_read(struct inode *inode, loff_t start, unsigned long len)
-{
-	struct file_lock *fl;
-	int result = 1;
-
-	spin_lock(&inode->i_lock);
-	for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
-		if (IS_POSIX(fl)) {
-			if (fl->fl_type == F_RDLCK)
-				continue;
-			if ((fl->fl_end < start) || (fl->fl_start > (start + len)))
-				continue;
-		} else if (IS_FLOCK(fl)) {
-			if (!(fl->fl_type & LOCK_MAND))
-				continue;
-			if (fl->fl_type & LOCK_READ)
-				continue;
-		} else
-			continue;
-		result = 0;
-		break;
-	}
-	spin_unlock(&inode->i_lock);
-	return result;
-}
-
-EXPORT_SYMBOL(lock_may_read);
-
-/**
- *	lock_may_write - checks that the region is free of locks
- *	@inode: the inode that is being written
- *	@start: the first byte to write
- *	@len: the number of bytes to write
- *
- *	Emulates Windows locking requirements.  Whole-file
- *	mandatory locks (share modes) can prohibit a write and
- *	byte-range POSIX locks can prohibit a write if they overlap.
- *
- *	N.B. this function is only ever called
- *	from knfsd and ownership of locks is never checked.
- */
-int lock_may_write(struct inode *inode, loff_t start, unsigned long len)
-{
-	struct file_lock *fl;
-	int result = 1;
-
-	spin_lock(&inode->i_lock);
-	for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
-		if (IS_POSIX(fl)) {
-			if ((fl->fl_end < start) || (fl->fl_start > (start + len)))
-				continue;
-		} else if (IS_FLOCK(fl)) {
-			if (!(fl->fl_type & LOCK_MAND))
-				continue;
-			if (fl->fl_type & LOCK_WRITE)
-				continue;
-		} else
-			continue;
-		result = 0;
-		break;
-	}
-	spin_unlock(&inode->i_lock);
-	return result;
-}
-
-EXPORT_SYMBOL(lock_may_write);
-
 static int __init filelock_init(void)
 {
 	int i;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 3b07ce2698de..458f733c96bd 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -985,8 +985,6 @@ extern void lease_get_mtime(struct inode *, struct timespec *time);
 extern int generic_setlease(struct file *, long, struct file_lock **);
 extern int vfs_setlease(struct file *, long, struct file_lock **);
 extern int lease_modify(struct file_lock **, int);
-extern int lock_may_read(struct inode *, loff_t start, unsigned long count);
-extern int lock_may_write(struct inode *, loff_t start, unsigned long count);
 #else /* !CONFIG_FILE_LOCKING */
 static inline int fcntl_getlk(struct file *file, unsigned int cmd,
 			      struct flock __user *user)
@@ -1117,18 +1115,6 @@ static inline int lease_modify(struct file_lock **before, int arg)
 {
 	return -EINVAL;
 }
-
-static inline int lock_may_read(struct inode *inode, loff_t start,
-				unsigned long len)
-{
-	return 1;
-}
-
-static inline int lock_may_write(struct inode *inode, loff_t start,
-				 unsigned long len)
-{
-	return 1;
-}
 #endif /* !CONFIG_FILE_LOCKING */
 
 
-- 
cgit v1.2.3


From 1c994a0909a556508c2cc26ab5d9e13c5ce33aa0 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@primarydata.com>
Date: Wed, 27 Aug 2014 06:49:41 -0400
Subject: locks: consolidate "nolease" routines

GFS2 and NFS have setlease routines that always just return -EINVAL.
Turn that into a generic routine that can live in fs/libfs.c.

Cc: <linux-nfs@vger.kernel.org>
Cc: Steven Whitehouse <swhiteho@redhat.com>
Cc: <cluster-devel@redhat.com>
Signed-off-by: Jeff Layton <jlayton@primarydata.com>
Acked-by: Trond Myklebust <trond.myklebust@primarydata.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 fs/gfs2/file.c     | 22 +---------------------
 fs/libfs.c         | 16 ++++++++++++++++
 fs/nfs/file.c      | 13 +------------
 fs/nfs/internal.h  |  1 -
 fs/nfs/nfs4file.c  |  2 +-
 include/linux/fs.h |  1 +
 6 files changed, 20 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 26b3f952e6b1..2c02478a86b0 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -912,26 +912,6 @@ out_uninit:
 
 #ifdef CONFIG_GFS2_FS_LOCKING_DLM
 
-/**
- * gfs2_setlease - acquire/release a file lease
- * @file: the file pointer
- * @arg: lease type
- * @fl: file lock
- *
- * We don't currently have a way to enforce a lease across the whole
- * cluster; until we do, disable leases (by just returning -EINVAL),
- * unless the administrator has requested purely local locking.
- *
- * Locking: called under i_lock
- *
- * Returns: errno
- */
-
-static int gfs2_setlease(struct file *file, long arg, struct file_lock **fl)
-{
-	return -EINVAL;
-}
-
 /**
  * gfs2_lock - acquire/release a posix lock on a file
  * @file: the file pointer
@@ -1069,7 +1049,7 @@ const struct file_operations gfs2_file_fops = {
 	.flock		= gfs2_flock,
 	.splice_read	= generic_file_splice_read,
 	.splice_write	= iter_file_splice_write,
-	.setlease	= gfs2_setlease,
+	.setlease	= simple_nosetlease,
 	.fallocate	= gfs2_fallocate,
 };
 
diff --git a/fs/libfs.c b/fs/libfs.c
index 88e3e00e2eca..29012a303ef8 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -1075,3 +1075,19 @@ struct inode *alloc_anon_inode(struct super_block *s)
 	return inode;
 }
 EXPORT_SYMBOL(alloc_anon_inode);
+
+/**
+ * simple_nosetlease - generic helper for prohibiting leases
+ * @filp: file pointer
+ * @arg: type of lease to obtain
+ * @flp: new lease supplied for insertion
+ *
+ * Generic helper for filesystems that do not wish to allow leases to be set.
+ * All arguments are ignored and it just returns -EINVAL.
+ */
+int
+simple_nosetlease(struct file *filp, long arg, struct file_lock **flp)
+{
+	return -EINVAL;
+}
+EXPORT_SYMBOL(simple_nosetlease);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 524dd80d1898..8c4048ecdad1 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -891,17 +891,6 @@ int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
 }
 EXPORT_SYMBOL_GPL(nfs_flock);
 
-/*
- * There is no protocol support for leases, so we have no way to implement
- * them correctly in the face of opens by other clients.
- */
-int nfs_setlease(struct file *file, long arg, struct file_lock **fl)
-{
-	dprintk("NFS: setlease(%pD2, arg=%ld)\n", file, arg);
-	return -EINVAL;
-}
-EXPORT_SYMBOL_GPL(nfs_setlease);
-
 const struct file_operations nfs_file_operations = {
 	.llseek		= nfs_file_llseek,
 	.read		= new_sync_read,
@@ -918,6 +907,6 @@ const struct file_operations nfs_file_operations = {
 	.splice_read	= nfs_file_splice_read,
 	.splice_write	= iter_file_splice_write,
 	.check_flags	= nfs_check_flags,
-	.setlease	= nfs_setlease,
+	.setlease	= simple_nosetlease,
 };
 EXPORT_SYMBOL_GPL(nfs_file_operations);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 9056622d2230..94d922ebb5ac 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -346,7 +346,6 @@ int nfs_file_release(struct inode *, struct file *);
 int nfs_lock(struct file *, int, struct file_lock *);
 int nfs_flock(struct file *, int, struct file_lock *);
 int nfs_check_flags(int);
-int nfs_setlease(struct file *, long, struct file_lock **);
 
 /* inode.c */
 extern struct workqueue_struct *nfsiod_workqueue;
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index a816f0627a6c..3e987ad9ae25 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -131,5 +131,5 @@ const struct file_operations nfs4_file_operations = {
 	.splice_read	= nfs_file_splice_read,
 	.splice_write	= iter_file_splice_write,
 	.check_flags	= nfs_check_flags,
-	.setlease	= nfs_setlease,
+	.setlease	= simple_nosetlease,
 };
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 458f733c96bd..435e3d9ec5cf 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2599,6 +2599,7 @@ extern int simple_write_end(struct file *file, struct address_space *mapping,
 			struct page *page, void *fsdata);
 extern int always_delete_dentry(const struct dentry *);
 extern struct inode *alloc_anon_inode(struct super_block *);
+extern int simple_nosetlease(struct file *, long, struct file_lock **);
 extern const struct dentry_operations simple_dentry_operations;
 
 extern struct dentry *simple_lookup(struct inode *, struct dentry *, unsigned int flags);
-- 
cgit v1.2.3


From e0b93eddfe17dcb7d644eb5d6ad02a86fc41a977 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@primarydata.com>
Date: Fri, 22 Aug 2014 11:27:32 -0400
Subject: security: make security_file_set_fowner, f_setown and __f_setown void
 return

security_file_set_fowner always returns 0, so make it f_setown and
__f_setown void return functions and fix up the error handling in the
callers.

Cc: linux-security-module@vger.kernel.org
Signed-off-by: Jeff Layton <jlayton@primarydata.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 drivers/net/tun.c           |  4 +---
 drivers/tty/tty_io.c        |  3 ++-
 fs/fcntl.c                  | 21 +++++++--------------
 fs/locks.c                  |  2 +-
 fs/notify/dnotify/dnotify.c |  8 +-------
 include/linux/fs.h          |  4 ++--
 include/linux/security.h    |  8 ++++----
 net/socket.c                |  3 ++-
 security/capability.c       |  4 ++--
 security/security.c         |  4 ++--
 security/selinux/hooks.c    |  4 +---
 security/smack/smack_lsm.c  |  3 +--
 12 files changed, 26 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index acaaf6784179..186ce541c657 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -2152,9 +2152,7 @@ static int tun_chr_fasync(int fd, struct file *file, int on)
 		goto out;
 
 	if (on) {
-		ret = __f_setown(file, task_pid(current), PIDTYPE_PID, 0);
-		if (ret)
-			goto out;
+		__f_setown(file, task_pid(current), PIDTYPE_PID, 0);
 		tfile->flags |= TUN_FASYNC;
 	} else
 		tfile->flags &= ~TUN_FASYNC;
diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index 8fbad3410c75..aea3b66f7bf2 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -2163,8 +2163,9 @@ static int __tty_fasync(int fd, struct file *filp, int on)
 		}
 		get_pid(pid);
 		spin_unlock_irqrestore(&tty->ctrl_lock, flags);
-		retval = __f_setown(filp, pid, type, 0);
+		__f_setown(filp, pid, type, 0);
 		put_pid(pid);
+		retval = 0;
 	}
 out:
 	return retval;
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 22d1c3df61ac..99d440a4a6ba 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -98,26 +98,19 @@ static void f_modown(struct file *filp, struct pid *pid, enum pid_type type,
 	write_unlock_irq(&filp->f_owner.lock);
 }
 
-int __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
+void __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
 		int force)
 {
-	int err;
-
-	err = security_file_set_fowner(filp);
-	if (err)
-		return err;
-
+	security_file_set_fowner(filp);
 	f_modown(filp, pid, type, force);
-	return 0;
 }
 EXPORT_SYMBOL(__f_setown);
 
-int f_setown(struct file *filp, unsigned long arg, int force)
+void f_setown(struct file *filp, unsigned long arg, int force)
 {
 	enum pid_type type;
 	struct pid *pid;
 	int who = arg;
-	int result;
 	type = PIDTYPE_PID;
 	if (who < 0) {
 		type = PIDTYPE_PGID;
@@ -125,9 +118,8 @@ int f_setown(struct file *filp, unsigned long arg, int force)
 	}
 	rcu_read_lock();
 	pid = find_vpid(who);
-	result = __f_setown(filp, pid, type, force);
+	__f_setown(filp, pid, type, force);
 	rcu_read_unlock();
-	return result;
 }
 EXPORT_SYMBOL(f_setown);
 
@@ -181,7 +173,7 @@ static int f_setown_ex(struct file *filp, unsigned long arg)
 	if (owner.pid && !pid)
 		ret = -ESRCH;
 	else
-		ret = __f_setown(filp, pid, type, 1);
+		 __f_setown(filp, pid, type, 1);
 	rcu_read_unlock();
 
 	return ret;
@@ -302,7 +294,8 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
 		force_successful_syscall_return();
 		break;
 	case F_SETOWN:
-		err = f_setown(filp, arg, 1);
+		f_setown(filp, arg, 1);
+		err = 0;
 		break;
 	case F_GETOWN_EX:
 		err = f_getown_ex(filp, arg);
diff --git a/fs/locks.c b/fs/locks.c
index 5200ffd2ba9b..f5f648e003dd 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1776,7 +1776,7 @@ static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg)
 	if (!fasync_insert_entry(fd, filp, &ret->fl_fasync, new))
 		new = NULL;
 
-	error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
+	__f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
 out_unlock:
 	spin_unlock(&inode->i_lock);
 	if (fl)
diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index abc8cbcfe90e..caaaf9dfe353 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -346,13 +346,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
 		goto out;
 	}
 
-	error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
-	if (error) {
-		/* if we added, we must shoot */
-		if (dn_mark == new_dn_mark)
-			destroy = 1;
-		goto out;
-	}
+	__f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
 
 	error = attach_dn(dn, dn_mark, id, fd, filp, mask);
 	/* !error means that we attached the dn to the dn_mark, so don't free it */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 435e3d9ec5cf..96528f73dda4 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1139,8 +1139,8 @@ extern void fasync_free(struct fasync_struct *);
 /* can be called from interrupts */
 extern void kill_fasync(struct fasync_struct **, int, int);
 
-extern int __f_setown(struct file *filp, struct pid *, enum pid_type, int force);
-extern int f_setown(struct file *filp, unsigned long arg, int force);
+extern void __f_setown(struct file *filp, struct pid *, enum pid_type, int force);
+extern void f_setown(struct file *filp, unsigned long arg, int force);
 extern void f_delown(struct file *filp);
 extern pid_t f_getown(struct file *filp);
 extern int send_sigurg(struct fown_struct *fown);
diff --git a/include/linux/security.h b/include/linux/security.h
index 623f90e5f38d..b10e7af95d3b 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1559,7 +1559,7 @@ struct security_operations {
 	int (*file_lock) (struct file *file, unsigned int cmd);
 	int (*file_fcntl) (struct file *file, unsigned int cmd,
 			   unsigned long arg);
-	int (*file_set_fowner) (struct file *file);
+	void (*file_set_fowner) (struct file *file);
 	int (*file_send_sigiotask) (struct task_struct *tsk,
 				    struct fown_struct *fown, int sig);
 	int (*file_receive) (struct file *file);
@@ -1834,7 +1834,7 @@ int security_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot,
 			   unsigned long prot);
 int security_file_lock(struct file *file, unsigned int cmd);
 int security_file_fcntl(struct file *file, unsigned int cmd, unsigned long arg);
-int security_file_set_fowner(struct file *file);
+void security_file_set_fowner(struct file *file);
 int security_file_send_sigiotask(struct task_struct *tsk,
 				 struct fown_struct *fown, int sig);
 int security_file_receive(struct file *file);
@@ -2312,9 +2312,9 @@ static inline int security_file_fcntl(struct file *file, unsigned int cmd,
 	return 0;
 }
 
-static inline int security_file_set_fowner(struct file *file)
+static inline void security_file_set_fowner(struct file *file)
 {
-	return 0;
+	return;
 }
 
 static inline int security_file_send_sigiotask(struct task_struct *tsk,
diff --git a/net/socket.c b/net/socket.c
index 95ee7d8682e7..769c9671847e 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1069,7 +1069,8 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 			err = -EFAULT;
 			if (get_user(pid, (int __user *)argp))
 				break;
-			err = f_setown(sock->file, pid, 1);
+			f_setown(sock->file, pid, 1);
+			err = 0;
 			break;
 		case FIOGETOWN:
 		case SIOCGPGRP:
diff --git a/security/capability.c b/security/capability.c
index a74fde6a7468..d68c57a62bcf 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -343,9 +343,9 @@ static int cap_file_fcntl(struct file *file, unsigned int cmd,
 	return 0;
 }
 
-static int cap_file_set_fowner(struct file *file)
+static void cap_file_set_fowner(struct file *file)
 {
-	return 0;
+	return;
 }
 
 static int cap_file_send_sigiotask(struct task_struct *tsk,
diff --git a/security/security.c b/security/security.c
index e41b1a8d7644..18b35c63fc0c 100644
--- a/security/security.c
+++ b/security/security.c
@@ -775,9 +775,9 @@ int security_file_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
 	return security_ops->file_fcntl(file, cmd, arg);
 }
 
-int security_file_set_fowner(struct file *file)
+void security_file_set_fowner(struct file *file)
 {
-	return security_ops->file_set_fowner(file);
+	security_ops->file_set_fowner(file);
 }
 
 int security_file_send_sigiotask(struct task_struct *tsk,
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index b0e940497e23..ada0d0bf3463 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -3346,14 +3346,12 @@ static int selinux_file_fcntl(struct file *file, unsigned int cmd,
 	return err;
 }
 
-static int selinux_file_set_fowner(struct file *file)
+static void selinux_file_set_fowner(struct file *file)
 {
 	struct file_security_struct *fsec;
 
 	fsec = file->f_security;
 	fsec->fown_sid = current_sid();
-
-	return 0;
 }
 
 static int selinux_file_send_sigiotask(struct task_struct *tsk,
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index e6ab307ce86e..69e5635d89e5 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -1390,12 +1390,11 @@ static int smack_mmap_file(struct file *file,
  * Returns 0
  * Further research may be required on this one.
  */
-static int smack_file_set_fowner(struct file *file)
+static void smack_file_set_fowner(struct file *file)
 {
 	struct smack_known *skp = smk_of_current();
 
 	file->f_security = skp->smk_known;
-	return 0;
 }
 
 /**
-- 
cgit v1.2.3


From 2ae4c673e3cbd69bc2decf6d7f5961f3c7b9b38b Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Tue, 2 Sep 2014 15:43:52 -0700
Subject: f2fs: retain inconsistency information to initiate fsck.f2fs

This patch adds sbi->need_fsck to conduct fsck.f2fs later.
This flag can only be removed by fsck.f2fs.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/checkpoint.c    | 3 +++
 fs/f2fs/f2fs.h          | 1 +
 fs/f2fs/super.c         | 1 +
 include/linux/f2fs_fs.h | 1 +
 4 files changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index cb5cb4ca1814..5af97d99106e 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -882,6 +882,9 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
 	else
 		clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
 
+	if (sbi->need_fsck)
+		set_ckpt_flags(ckpt, CP_FSCK_FLAG);
+
 	/* update SIT/NAT bitmap */
 	get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP));
 	get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP));
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 222ff5b4d1e1..210c62df08c3 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -434,6 +434,7 @@ struct f2fs_sb_info {
 	struct buffer_head *raw_super_buf;	/* buffer head of raw sb */
 	struct f2fs_super_block *raw_super;	/* raw super block pointer */
 	int s_dirty;				/* dirty flag for checkpoint */
+	bool need_fsck;				/* need fsck.f2fs to fix */
 
 	/* for node-related operations */
 	struct f2fs_nm_info *nm_info;		/* node manager */
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 41bdf511003d..a6923041f41a 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -849,6 +849,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
 		atomic_set(&sbi->nr_pages[i], 0);
 
 	sbi->dir_level = DEF_DIR_LEVEL;
+	sbi->need_fsck = false;
 }
 
 /*
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index 08ed2b0a96e6..9ca1ff3d4662 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -85,6 +85,7 @@ struct f2fs_super_block {
 /*
  * For checkpoint
  */
+#define CP_FSCK_FLAG		0x00000010
 #define CP_ERROR_FLAG		0x00000008
 #define CP_COMPACT_SUM_FLAG	0x00000004
 #define CP_ORPHAN_PRESENT_FLAG	0x00000002
-- 
cgit v1.2.3


From 87354059881ce9315181604dc17076c535f4d744 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Tue, 22 Jul 2014 20:41:42 -0400
Subject: ftrace: Add helper function ftrace_ops_get_func()

Add the helper function to what the mcount trampoline is to call
for a ftrace_ops function. This helper will be used by arch code
in the future to set up dynamic trampolines. But as this does the
same tests that are performed in choosing what function to call for
the default mcount trampoline, might as well use it to clean up
the existing code.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h |  2 ++
 kernel/trace/ftrace.c  | 47 +++++++++++++++++++++++++++++++++++------------
 2 files changed, 37 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index f0b0edbf55a9..ef37286547fc 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -56,6 +56,8 @@ struct ftrace_ops;
 typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip,
 			      struct ftrace_ops *op, struct pt_regs *regs);
 
+ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops);
+
 /*
  * FTRACE_OPS_FL_* bits denote the state of ftrace_ops struct and are
  * set in the flags member.
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 17b606362ab4..dabf734f909c 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -259,20 +259,12 @@ static void update_ftrace_function(void)
 	 * then have the mcount trampoline call the function directly.
 	 */
 	if (ftrace_ops_list == &ftrace_list_end ||
-	    (ftrace_ops_list->next == &ftrace_list_end &&
-	     !(ftrace_ops_list->flags & FTRACE_OPS_FL_DYNAMIC) &&
-	     !FTRACE_FORCE_LIST_FUNC)) {
+	    (ftrace_ops_list->next == &ftrace_list_end)) {
+
 		/* Set the ftrace_ops that the arch callback uses */
 		set_function_trace_op = ftrace_ops_list;
-		/*
-		 * If the func handles its own recursion, call it directly.
-		 * Otherwise call the recursion protected function that
-		 * will call the ftrace ops function.
-		 */
-		if (ftrace_ops_list->flags & FTRACE_OPS_FL_RECURSION_SAFE)
-			func = ftrace_ops_list->func;
-		else
-			func = ftrace_ops_recurs_func;
+
+		func = ftrace_ops_get_func(ftrace_ops_list);
 	} else {
 		/* Just use the default ftrace_ops */
 		set_function_trace_op = &ftrace_list_end;
@@ -4856,6 +4848,37 @@ static void ftrace_ops_recurs_func(unsigned long ip, unsigned long parent_ip,
 	trace_clear_recursion(bit);
 }
 
+/**
+ * ftrace_ops_get_func - get the function a trampoline should call
+ * @ops: the ops to get the function for
+ *
+ * Normally the mcount trampoline will call the ops->func, but there
+ * are times that it should not. For example, if the ops does not
+ * have its own recursion protection, then it should call the
+ * ftrace_ops_recurs_func() instead.
+ *
+ * Returns the function that the trampoline should call for @ops.
+ */
+ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops)
+{
+	/*
+	 * If this is a dynamic ops or we force list func,
+	 * then it needs to call the list anyway.
+	 */
+	if (ops->flags & FTRACE_OPS_FL_DYNAMIC || FTRACE_FORCE_LIST_FUNC)
+		return ftrace_ops_list_func;
+
+	/*
+	 * If the func handles its own recursion, call it directly.
+	 * Otherwise call the recursion protected function that
+	 * will call the ftrace ops function.
+	 */
+	if (!(ops->flags & FTRACE_OPS_FL_RECURSION_SAFE))
+		return ftrace_ops_recurs_func;
+
+	return ops->func;
+}
+
 static void clear_ftrace_swapper(void)
 {
 	struct task_struct *p;
-- 
cgit v1.2.3


From 738cbe72adc5c8f2016c4c68aa5162631d4f27e1 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Mon, 8 Sep 2014 08:04:47 +0200
Subject: net: bpf: consolidate JIT binary allocator

Introduced in commit 314beb9bcabf ("x86: bpf_jit_comp: secure bpf jit
against spraying attacks") and later on replicated in aa2d2c73c21f
("s390/bpf,jit: address randomize and write protect jit code") for
s390 architecture, write protection for BPF JIT images got added and
a random start address of the JIT code, so that it's not on a page
boundary anymore.

Since both use a very similar allocator for the BPF binary header,
we can consolidate this code into the BPF core as it's mostly JIT
independant anyway.

This will also allow for future archs that support DEBUG_SET_MODULE_RONX
to just reuse instead of reimplementing it.

JIT tested on x86_64 and s390x with BPF test suite.

Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/s390/net/bpf_jit_comp.c | 45 ++++++++-------------------------------
 arch/x86/net/bpf_jit_comp.c  | 50 ++++++++++----------------------------------
 include/linux/filter.h       | 13 ++++++++++++
 kernel/bpf/core.c            | 39 ++++++++++++++++++++++++++++++++++
 4 files changed, 72 insertions(+), 75 deletions(-)

(limited to 'include/linux')

diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index f2833c5b218a..b734f975c22e 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -5,11 +5,9 @@
  *
  * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
  */
-#include <linux/moduleloader.h>
 #include <linux/netdevice.h>
 #include <linux/if_vlan.h>
 #include <linux/filter.h>
-#include <linux/random.h>
 #include <linux/init.h>
 #include <asm/cacheflush.h>
 #include <asm/facility.h>
@@ -148,6 +146,12 @@ struct bpf_jit {
 	ret;						\
 })
 
+static void bpf_jit_fill_hole(void *area, unsigned int size)
+{
+	/* Fill whole space with illegal instructions */
+	memset(area, 0, size);
+}
+
 static void bpf_jit_prologue(struct bpf_jit *jit)
 {
 	/* Save registers and create stack frame if necessary */
@@ -780,38 +784,6 @@ out:
 	return -1;
 }
 
-/*
- * Note: for security reasons, bpf code will follow a randomly
- *	 sized amount of illegal instructions.
- */
-struct bpf_binary_header {
-	unsigned int pages;
-	u8 image[];
-};
-
-static struct bpf_binary_header *bpf_alloc_binary(unsigned int bpfsize,
-						  u8 **image_ptr)
-{
-	struct bpf_binary_header *header;
-	unsigned int sz, hole;
-
-	/* Most BPF filters are really small, but if some of them fill a page,
-	 * allow at least 128 extra bytes for illegal instructions.
-	 */
-	sz = round_up(bpfsize + sizeof(*header) + 128, PAGE_SIZE);
-	header = module_alloc(sz);
-	if (!header)
-		return NULL;
-	memset(header, 0, sz);
-	header->pages = sz / PAGE_SIZE;
-	hole = min(sz - (bpfsize + sizeof(*header)), PAGE_SIZE - sizeof(*header));
-	/* Insert random number of illegal instructions before BPF code
-	 * and make sure the first instruction starts at an even address.
-	 */
-	*image_ptr = &header->image[(prandom_u32() % hole) & -2];
-	return header;
-}
-
 void bpf_jit_compile(struct bpf_prog *fp)
 {
 	struct bpf_binary_header *header = NULL;
@@ -850,7 +822,8 @@ void bpf_jit_compile(struct bpf_prog *fp)
 			size = prg_len + lit_len;
 			if (size >= BPF_SIZE_MAX)
 				goto out;
-			header = bpf_alloc_binary(size, &jit.start);
+			header = bpf_jit_binary_alloc(size, &jit.start,
+						      2, bpf_jit_fill_hole);
 			if (!header)
 				goto out;
 			jit.prg = jit.mid = jit.start + prg_len;
@@ -884,7 +857,7 @@ void bpf_jit_free(struct bpf_prog *fp)
 		goto free_filter;
 
 	set_memory_rw(addr, header->pages);
-	module_free(NULL, header);
+	bpf_jit_binary_free(header);
 
 free_filter:
 	bpf_prog_unlock_free(fp);
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 06f8c17f5484..9de0b5476b0c 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -8,12 +8,10 @@
  * as published by the Free Software Foundation; version 2
  * of the License.
  */
-#include <linux/moduleloader.h>
-#include <asm/cacheflush.h>
 #include <linux/netdevice.h>
 #include <linux/filter.h>
 #include <linux/if_vlan.h>
-#include <linux/random.h>
+#include <asm/cacheflush.h>
 
 int bpf_jit_enable __read_mostly;
 
@@ -109,39 +107,6 @@ static inline void bpf_flush_icache(void *start, void *end)
 #define CHOOSE_LOAD_FUNC(K, func) \
 	((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
 
-struct bpf_binary_header {
-	unsigned int	pages;
-	/* Note : for security reasons, bpf code will follow a randomly
-	 * sized amount of int3 instructions
-	 */
-	u8		image[];
-};
-
-static struct bpf_binary_header *bpf_alloc_binary(unsigned int proglen,
-						  u8 **image_ptr)
-{
-	unsigned int sz, hole;
-	struct bpf_binary_header *header;
-
-	/* Most of BPF filters are really small,
-	 * but if some of them fill a page, allow at least
-	 * 128 extra bytes to insert a random section of int3
-	 */
-	sz = round_up(proglen + sizeof(*header) + 128, PAGE_SIZE);
-	header = module_alloc(sz);
-	if (!header)
-		return NULL;
-
-	memset(header, 0xcc, sz); /* fill whole space with int3 instructions */
-
-	header->pages = sz / PAGE_SIZE;
-	hole = min(sz - (proglen + sizeof(*header)), PAGE_SIZE - sizeof(*header));
-
-	/* insert a random number of int3 instructions before BPF code */
-	*image_ptr = &header->image[prandom_u32() % hole];
-	return header;
-}
-
 /* pick a register outside of BPF range for JIT internal work */
 #define AUX_REG (MAX_BPF_REG + 1)
 
@@ -206,6 +171,12 @@ static inline u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg)
 	return byte + reg2hex[dst_reg] + (reg2hex[src_reg] << 3);
 }
 
+static void jit_fill_hole(void *area, unsigned int size)
+{
+	/* fill whole space with int3 instructions */
+	memset(area, 0xcc, size);
+}
+
 struct jit_context {
 	unsigned int cleanup_addr; /* epilogue code offset */
 	bool seen_ld_abs;
@@ -959,7 +930,7 @@ void bpf_int_jit_compile(struct bpf_prog *prog)
 		if (proglen <= 0) {
 			image = NULL;
 			if (header)
-				module_free(NULL, header);
+				bpf_jit_binary_free(header);
 			goto out;
 		}
 		if (image) {
@@ -969,7 +940,8 @@ void bpf_int_jit_compile(struct bpf_prog *prog)
 			break;
 		}
 		if (proglen == oldproglen) {
-			header = bpf_alloc_binary(proglen, &image);
+			header = bpf_jit_binary_alloc(proglen, &image,
+						      1, jit_fill_hole);
 			if (!header)
 				goto out;
 		}
@@ -998,7 +970,7 @@ void bpf_jit_free(struct bpf_prog *fp)
 		goto free_filter;
 
 	set_memory_rw(addr, header->pages);
-	module_free(NULL, header);
+	bpf_jit_binary_free(header);
 
 free_filter:
 	bpf_prog_unlock_free(fp);
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 8f82ef3f1cdd..868764fcffb8 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -289,6 +289,11 @@ struct sock_fprog_kern {
 	struct sock_filter	*filter;
 };
 
+struct bpf_binary_header {
+	unsigned int pages;
+	u8 image[];
+};
+
 struct bpf_work_struct {
 	struct bpf_prog *prog;
 	struct work_struct work;
@@ -358,6 +363,14 @@ struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
 				  gfp_t gfp_extra_flags);
 void __bpf_prog_free(struct bpf_prog *fp);
 
+typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size);
+
+struct bpf_binary_header *
+bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
+		     unsigned int alignment,
+		     bpf_jit_fill_hole_t bpf_fill_ill_insns);
+void bpf_jit_binary_free(struct bpf_binary_header *hdr);
+
 static inline void bpf_prog_unlock_free(struct bpf_prog *fp)
 {
 	bpf_prog_unlock_ro(fp);
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 2c2bfaacce66..8ee520f0ec70 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -20,9 +20,12 @@
  * Andi Kleen - Fix a few bad bugs and races.
  * Kris Katterjohn - Added many additional checks in bpf_check_classic()
  */
+
 #include <linux/filter.h>
 #include <linux/skbuff.h>
 #include <linux/vmalloc.h>
+#include <linux/random.h>
+#include <linux/moduleloader.h>
 #include <asm/unaligned.h>
 
 /* Registers */
@@ -125,6 +128,42 @@ void __bpf_prog_free(struct bpf_prog *fp)
 }
 EXPORT_SYMBOL_GPL(__bpf_prog_free);
 
+struct bpf_binary_header *
+bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
+		     unsigned int alignment,
+		     bpf_jit_fill_hole_t bpf_fill_ill_insns)
+{
+	struct bpf_binary_header *hdr;
+	unsigned int size, hole, start;
+
+	/* Most of BPF filters are really small, but if some of them
+	 * fill a page, allow at least 128 extra bytes to insert a
+	 * random section of illegal instructions.
+	 */
+	size = round_up(proglen + sizeof(*hdr) + 128, PAGE_SIZE);
+	hdr = module_alloc(size);
+	if (hdr == NULL)
+		return NULL;
+
+	/* Fill space with illegal/arch-dep instructions. */
+	bpf_fill_ill_insns(hdr, size);
+
+	hdr->pages = size / PAGE_SIZE;
+	hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)),
+		     PAGE_SIZE - sizeof(*hdr));
+	start = (prandom_u32() % hole) & ~(alignment - 1);
+
+	/* Leave a random number of instructions before BPF code. */
+	*image_ptr = &hdr->image[start];
+
+	return hdr;
+}
+
+void bpf_jit_binary_free(struct bpf_binary_header *hdr)
+{
+	module_free(NULL, hdr);
+}
+
 /* Base function for offset calculation. Needs to go into .text section,
  * therefore keeping it non-static as well; will also be used by JITs
  * anyway later on, so do not let the compiler omit it.
-- 
cgit v1.2.3


From 286aad3c4014ca825c447e07e24f8929e6d266d2 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Mon, 8 Sep 2014 08:04:49 +0200
Subject: net: bpf: be friendly to kmemcheck

Reported by Mikulas Patocka, kmemcheck currently barks out a
false positive since we don't have special kmemcheck annotation
for bitfields used in bpf_prog structure.

We currently have jited:1, len:31 and thus when accessing len
while CONFIG_KMEMCHECK enabled, kmemcheck throws a warning that
we're reading uninitialized memory.

As we don't need the whole bit universe for pages member, we
can just split it to u16 and use a bool flag for jited instead
of a bitfield.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/arm/net/bpf_jit_32.c       | 2 +-
 arch/mips/net/bpf_jit.c         | 2 +-
 arch/powerpc/net/bpf_jit_comp.c | 2 +-
 arch/s390/net/bpf_jit_comp.c    | 2 +-
 arch/sparc/net/bpf_jit_comp.c   | 2 +-
 arch/x86/net/bpf_jit_comp.c     | 2 +-
 include/linux/filter.h          | 6 +++---
 net/core/filter.c               | 2 +-
 8 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index 2d1a5b93d91c..6b45f649eff0 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -933,7 +933,7 @@ void bpf_jit_compile(struct bpf_prog *fp)
 
 	set_memory_ro((unsigned long)header, header->pages);
 	fp->bpf_func = (void *)ctx.target;
-	fp->jited = 1;
+	fp->jited = true;
 out:
 	kfree(ctx.offsets);
 	return;
diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c
index cfa83cf2447d..0e97ccd29fe3 100644
--- a/arch/mips/net/bpf_jit.c
+++ b/arch/mips/net/bpf_jit.c
@@ -1417,7 +1417,7 @@ void bpf_jit_compile(struct bpf_prog *fp)
 		bpf_jit_dump(fp->len, alloc_size, 2, ctx.target);
 
 	fp->bpf_func = (void *)ctx.target;
-	fp->jited = 1;
+	fp->jited = true;
 
 out:
 	kfree(ctx.offsets);
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 40c53ff59124..cbae2dfd053c 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -686,7 +686,7 @@ void bpf_jit_compile(struct bpf_prog *fp)
 		((u64 *)image)[0] = (u64)code_base;
 		((u64 *)image)[1] = local_paca->kernel_toc;
 		fp->bpf_func = (void *)image;
-		fp->jited = 1;
+		fp->jited = true;
 	}
 out:
 	kfree(addrs);
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index b734f975c22e..555f5c7e83ab 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -842,7 +842,7 @@ void bpf_jit_compile(struct bpf_prog *fp)
 	if (jit.start) {
 		set_memory_ro((unsigned long)header, header->pages);
 		fp->bpf_func = (void *) jit.start;
-		fp->jited = 1;
+		fp->jited = true;
 	}
 out:
 	kfree(addrs);
diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c
index f7a736b645e8..b2ad9dc5425e 100644
--- a/arch/sparc/net/bpf_jit_comp.c
+++ b/arch/sparc/net/bpf_jit_comp.c
@@ -801,7 +801,7 @@ cond_branch:			f_offset = addrs[i + filter[i].jf];
 	if (image) {
 		bpf_flush_icache(image, image + proglen);
 		fp->bpf_func = (void *)image;
-		fp->jited = 1;
+		fp->jited = true;
 	}
 out:
 	kfree(addrs);
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 9de0b5476b0c..d56cd1f515bd 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -955,7 +955,7 @@ void bpf_int_jit_compile(struct bpf_prog *prog)
 		bpf_flush_icache(header, image + proglen);
 		set_memory_ro((unsigned long)header, header->pages);
 		prog->bpf_func = (void *)image;
-		prog->jited = 1;
+		prog->jited = true;
 	}
 out:
 	kfree(addrs);
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 868764fcffb8..4b59edead908 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -300,9 +300,9 @@ struct bpf_work_struct {
 };
 
 struct bpf_prog {
-	u32			pages;		/* Number of allocated pages */
-	u32			jited:1,	/* Is our filter JIT'ed? */
-				len:31;		/* Number of filter blocks */
+	u16			pages;		/* Number of allocated pages */
+	bool			jited;		/* Is our filter JIT'ed? */
+	u32			len;		/* Number of filter blocks */
 	struct sock_fprog_kern	*orig_prog;	/* Original BPF program */
 	struct bpf_work_struct	*work;		/* Deferred free work struct */
 	unsigned int		(*bpf_func)(const struct sk_buff *skb,
diff --git a/net/core/filter.c b/net/core/filter.c
index fa5b7d0f77ac..dfc716ffa44b 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -972,7 +972,7 @@ static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp)
 	int err;
 
 	fp->bpf_func = NULL;
-	fp->jited = 0;
+	fp->jited = false;
 
 	err = bpf_check_classic(fp->insns, fp->len);
 	if (err) {
-- 
cgit v1.2.3


From a0c7b164ad115ec0556dc0904ee2218cbc5cedfa Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Tue, 9 Sep 2014 23:13:57 +0100
Subject: regulator: of: Provide simplified DT parsing method

Currently regulator drivers which support DT all repeat very similar code
to supply a list of known regulator identifiers to be matched with DT,
convert that to platform data which is then matched up with the regulators
as they are registered. This is both fiddly to get right and for devices
which can use the standard helpers to provide their operations is the main
source of code in the driver.

Since this code is essentially identical for most drivers we can factor it
out into the core, moving the identifiers in the match table into the
regulator descriptors and also allowing drivers to pass in the name of the
subnode to search. When a driver provides an of_match string for the
regulator the core will attempt to use that to obtain init_data, allowing
the driver to remove all explicit code for DT parsing and simply provide
data instead.

The current code leaks the phandles for the child nodes, this will be
addressed incrementally and makes no practical difference for FDT anyway
as the DT data structures are never freed.

Signed-off-by: Mark Brown <broonie@linaro.org>
---
 drivers/regulator/core.c         | 10 +++++---
 drivers/regulator/internal.h     |  4 ++++
 drivers/regulator/of_regulator.c | 51 ++++++++++++++++++++++++++++++++++++++++
 include/linux/regulator/driver.h |  4 ++++
 4 files changed, 66 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index a3c3785901f5..496002efd961 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -3516,12 +3516,17 @@ regulator_register(const struct regulator_desc *regulator_desc,
 		return ERR_PTR(-EINVAL);
 	}
 
-	init_data = config->init_data;
-
 	rdev = kzalloc(sizeof(struct regulator_dev), GFP_KERNEL);
 	if (rdev == NULL)
 		return ERR_PTR(-ENOMEM);
 
+	init_data = regulator_of_get_init_data(dev, regulator_desc,
+					       &rdev->dev.of_node);
+	if (!init_data) {
+		init_data = config->init_data;
+		rdev->dev.of_node = of_node_get(config->of_node);
+	}
+
 	mutex_lock(&regulator_list_mutex);
 
 	mutex_init(&rdev->mutex);
@@ -3548,7 +3553,6 @@ regulator_register(const struct regulator_desc *regulator_desc,
 
 	/* register with sysfs */
 	rdev->dev.class = &regulator_class;
-	rdev->dev.of_node = of_node_get(config->of_node);
 	rdev->dev.parent = dev;
 	dev_set_name(&rdev->dev, "regulator.%d",
 		     atomic_inc_return(&regulator_no) - 1);
diff --git a/drivers/regulator/internal.h b/drivers/regulator/internal.h
index 84bbda10c396..a6043ad32ead 100644
--- a/drivers/regulator/internal.h
+++ b/drivers/regulator/internal.h
@@ -35,4 +35,8 @@ struct regulator {
 	struct dentry *debugfs;
 };
 
+struct regulator_init_data *regulator_of_get_init_data(struct device *dev,
+			         const struct regulator_desc *desc,
+				 struct device_node **node);
+
 #endif
diff --git a/drivers/regulator/of_regulator.c b/drivers/regulator/of_regulator.c
index ee5e67bc8d5b..7a51814abdc5 100644
--- a/drivers/regulator/of_regulator.c
+++ b/drivers/regulator/of_regulator.c
@@ -14,8 +14,11 @@
 #include <linux/slab.h>
 #include <linux/of.h>
 #include <linux/regulator/machine.h>
+#include <linux/regulator/driver.h>
 #include <linux/regulator/of_regulator.h>
 
+#include "internal.h"
+
 static void of_get_regulation_constraints(struct device_node *np,
 					struct regulator_init_data **init_data)
 {
@@ -189,3 +192,51 @@ int of_regulator_match(struct device *dev, struct device_node *node,
 	return count;
 }
 EXPORT_SYMBOL_GPL(of_regulator_match);
+
+struct regulator_init_data *regulator_of_get_init_data(struct device *dev,
+					    const struct regulator_desc *desc,
+					    struct device_node **node)
+{
+	struct device_node *search, *child;
+	struct regulator_init_data *init_data = NULL;
+	const char *name;
+
+	if (!dev->of_node || !desc->of_match)
+		return NULL;
+
+	if (desc->regulators_node)
+		search = of_get_child_by_name(dev->of_node,
+					      desc->regulators_node);
+	else
+		search = dev->of_node;
+
+	if (!search) {
+		dev_err(dev, "Failed to find regulator container node\n");
+		return NULL;
+	}
+
+	for_each_child_of_node(search, child) {
+		name = of_get_property(child, "regulator-compatible", NULL);
+		if (!name)
+			name = child->name;
+
+		if (strcmp(desc->of_match, name))
+			continue;
+
+		init_data = of_get_regulator_init_data(dev, child);
+		if (!init_data) {
+			dev_err(dev,
+				"failed to parse DT for regulator %s\n",
+				child->name);
+			break;
+		}
+
+		of_node_get(child);
+		*node = child;
+		break;
+	}
+
+	of_node_put(search);
+
+	return init_data;
+}
diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index bbe03a1924c0..c35f5f97a147 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -203,6 +203,8 @@ enum regulator_type {
  *
  * @name: Identifying name for the regulator.
  * @supply_name: Identifying the regulator supply
+ * @of_match: Name used to identify regulator in DT.
+ * @regulators_node: Name of node containing regulator definitions in DT.
  * @id: Numerical identifier for the regulator.
  * @ops: Regulator operations table.
  * @irq: Interrupt number for the regulator.
@@ -242,6 +244,8 @@ enum regulator_type {
 struct regulator_desc {
 	const char *name;
 	const char *supply_name;
+	const char *of_match;
+	const char *regulators_node;
 	int id;
 	bool continuous_voltage_range;
 	unsigned n_voltages;
-- 
cgit v1.2.3


From e1effa0144a1ddf5b456c388ffaf784f3c5163fd Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Tue, 5 Aug 2014 17:19:38 -0400
Subject: ftrace: Annotate the ops operation on update

Add three new flags for ftrace_ops:

  FTRACE_OPS_FL_ADDING
  FTRACE_OPS_FL_REMOVING
  FTRACE_OPS_FL_MODIFYING

These will be set for the ftrace_ops when they are first added
to the function tracing, being removed from function tracing
or just having their functions changed from function tracing,
respectively.

This will be needed to remove the tramp_hash, which can grow quite
big. The tramp_hash is used to note what functions a ftrace_ops
is using a trampoline for. Denoting which ftrace_ops is being
modified, will allow us to use the ftrace_ops hashes themselves,
which are much smaller as they have a global flag to denote if
a ftrace_ops is tracing all functions, as well as a notrace hash
if the ftrace_ops is tracing all but a few. The tramp_hash just
creates a hash item for every function, which can go into the 10s
of thousands if all functions are using the ftrace_ops trampoline.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h |  6 ++++++
 kernel/trace/ftrace.c  | 45 +++++++++++++++++++++++++++++++++++++++------
 2 files changed, 45 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index ef37286547fc..d9216f6385d9 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -91,6 +91,9 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops);
  * INITIALIZED - The ftrace_ops has already been initialized (first use time
  *            register_ftrace_function() is called, it will initialized the ops)
  * DELETED - The ops are being deleted, do not let them be registered again.
+ * ADDING  - The ops is in the process of being added.
+ * REMOVING - The ops is in the process of being removed.
+ * MODIFYING - The ops is in the process of changing its filter functions.
  */
 enum {
 	FTRACE_OPS_FL_ENABLED			= 1 << 0,
@@ -102,6 +105,9 @@ enum {
 	FTRACE_OPS_FL_STUB			= 1 << 6,
 	FTRACE_OPS_FL_INITIALIZED		= 1 << 7,
 	FTRACE_OPS_FL_DELETED			= 1 << 8,
+	FTRACE_OPS_FL_ADDING			= 1 << 9,
+	FTRACE_OPS_FL_REMOVING			= 1 << 10,
+	FTRACE_OPS_FL_MODIFYING			= 1 << 11,
 };
 
 #ifdef CONFIG_DYNAMIC_FTRACE
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 858ac16f8492..e43c793093e5 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1057,6 +1057,12 @@ static struct pid * const ftrace_swapper_pid = &init_struct_pid;
 
 static struct ftrace_ops *removed_ops;
 
+/*
+ * Set when doing a global update, like enabling all recs or disabling them.
+ * It is not set when just updating a single ftrace_ops.
+ */
+static bool update_all_ops;
+
 #ifndef CONFIG_FTRACE_MCOUNT_RECORD
 # error Dynamic ftrace depends on MCOUNT_RECORD
 #endif
@@ -2366,6 +2372,13 @@ static void ftrace_run_update_code(int command)
 	FTRACE_WARN_ON(ret);
 }
 
+static void ftrace_run_modify_code(struct ftrace_ops *ops, int command)
+{
+	ops->flags |= FTRACE_OPS_FL_MODIFYING;
+	ftrace_run_update_code(command);
+	ops->flags &= ~FTRACE_OPS_FL_MODIFYING;
+}
+
 static ftrace_func_t saved_ftrace_func;
 static int ftrace_start_up;
 
@@ -2387,6 +2400,13 @@ static void ftrace_startup_enable(int command)
 	ftrace_run_update_code(command);
 }
 
+static void ftrace_startup_all(int command)
+{
+	update_all_ops = true;
+	ftrace_startup_enable(command);
+	update_all_ops = false;
+}
+
 static int ftrace_startup(struct ftrace_ops *ops, int command)
 {
 	int ret;
@@ -2401,12 +2421,22 @@ static int ftrace_startup(struct ftrace_ops *ops, int command)
 	ftrace_start_up++;
 	command |= FTRACE_UPDATE_CALLS;
 
-	ops->flags |= FTRACE_OPS_FL_ENABLED;
+	/*
+	 * Note that ftrace probes uses this to start up
+	 * and modify functions it will probe. But we still
+	 * set the ADDING flag for modification, as probes
+	 * do not have trampolines. If they add them in the
+	 * future, then the probes will need to distinguish
+	 * between adding and updating probes.
+	 */
+	ops->flags |= FTRACE_OPS_FL_ENABLED | FTRACE_OPS_FL_ADDING;
 
 	ftrace_hash_rec_enable(ops, 1);
 
 	ftrace_startup_enable(command);
 
+	ops->flags &= ~FTRACE_OPS_FL_ADDING;
+
 	return 0;
 }
 
@@ -2456,11 +2486,12 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command)
 	 * If the ops uses a trampoline, then it needs to be
 	 * tested first on update.
 	 */
+	ops->flags |= FTRACE_OPS_FL_REMOVING;
 	removed_ops = ops;
 
 	ftrace_run_update_code(command);
 
-	removed_ops = NULL;
+	ops->flags &= ~FTRACE_OPS_FL_REMOVING;
 
 	/*
 	 * Dynamic ops may be freed, we must make sure that all
@@ -3373,7 +3404,7 @@ static void __enable_ftrace_function_probe(void)
 	if (ftrace_probe_registered) {
 		/* still need to update the function call sites */
 		if (ftrace_enabled)
-			ftrace_run_update_code(FTRACE_UPDATE_CALLS);
+			ftrace_run_modify_code(&trace_probe_ops, FTRACE_UPDATE_CALLS);
 		return;
 	}
 
@@ -3792,7 +3823,7 @@ ftrace_match_addr(struct ftrace_hash *hash, unsigned long ip, int remove)
 static void ftrace_ops_update_code(struct ftrace_ops *ops)
 {
 	if (ops->flags & FTRACE_OPS_FL_ENABLED && ftrace_enabled)
-		ftrace_run_update_code(FTRACE_UPDATE_CALLS);
+		ftrace_run_modify_code(ops, FTRACE_UPDATE_CALLS);
 }
 
 static int
@@ -4717,6 +4748,7 @@ core_initcall(ftrace_nodyn_init);
 
 static inline int ftrace_init_dyn_debugfs(struct dentry *d_tracer) { return 0; }
 static inline void ftrace_startup_enable(int command) { }
+static inline void ftrace_startup_all(int command) { }
 /* Keep as macros so we do not need to define the commands */
 # define ftrace_startup(ops, command)					\
 	({								\
@@ -5016,7 +5048,8 @@ static int ftrace_pid_add(int p)
 	set_ftrace_pid_task(pid);
 
 	ftrace_update_pid_func();
-	ftrace_startup_enable(0);
+
+	ftrace_startup_all(0);
 
 	mutex_unlock(&ftrace_lock);
 	return 0;
@@ -5045,7 +5078,7 @@ static void ftrace_pid_reset(void)
 	}
 
 	ftrace_update_pid_func();
-	ftrace_startup_enable(0);
+	ftrace_startup_all(0);
 
 	mutex_unlock(&ftrace_lock);
 }
-- 
cgit v1.2.3


From fef5aeeee9e3717e7aea991a7ae9ff6a7a2d4c85 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Thu, 24 Jul 2014 12:25:47 -0400
Subject: ftrace: Replace tramp_hash with old_*_hash to save space

Allowing function callbacks to declare their own trampolines requires
that each ftrace_ops that has a trampoline must have some sort of
accounting that keeps track of which ops has a trampoline attached
to a record.

The easy way to solve this was to add a "tramp_hash" that created a
hash entry for every function that a ops uses with a trampoline.
But since we can have literally tens of thousands of functions being
traced, that means we need tens of thousands of descriptors to map
the ops to the function in the hash. This is quite expensive and
can cause enabling and disabling the function graph tracer to take
some time to start and stop. It can take up to several seconds to
disable or enable all functions in the function graph tracer for this
reason.

The better approach albeit more complex, is to keep track of how ops
are being enabled and disabled, and use that along with the counting
of the number of ops attached to records, to determive what ops has
a trampoline attached to a record at enabling and disabling of
tracing.

To do this, the tramp_hash has been replaced with an old_filter_hash
and old_notrace_hash, which get the copy of the ops filter_hash and
notrace_hash respectively. The old hashes is kept until the ops has
been modified or removed and the old hashes are used with the logic
of the accounting to determine the ops that have the trampoline of
a record. The reason this has less of a footprint is due to the trick
that an "empty" hash in the filter_hash means "all functions" and
an empty hash in the notrace hash means "no functions" in the hash.

This is much more efficienct, doesn't have the delay, and takes up
much less memory, as we do not need to map all the functions but
just figure out which functions are mapped at the time it is
enabled or disabled.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h |   2 +-
 kernel/trace/ftrace.c  | 239 +++++++++++++++++--------------------------------
 2 files changed, 85 insertions(+), 156 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index d9216f6385d9..662697babd48 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -140,7 +140,7 @@ struct ftrace_ops {
 	int				nr_trampolines;
 	struct ftrace_ops_hash		local_hash;
 	struct ftrace_ops_hash		*func_hash;
-	struct ftrace_hash		*tramp_hash;
+	struct ftrace_ops_hash		old_hash;
 	unsigned long			trampoline;
 #endif
 };
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index e43c793093e5..d325a1e76554 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1373,6 +1373,21 @@ update:
 	return 0;
 }
 
+static bool hash_contains_ip(unsigned long ip,
+			     struct ftrace_ops_hash *hash)
+{
+	/*
+	 * The function record is a match if it exists in the filter
+	 * hash and not in the notrace hash. Note, an emty hash is
+	 * considered a match for the filter hash, but an empty
+	 * notrace hash is considered not in the notrace hash.
+	 */
+	return (ftrace_hash_empty(hash->filter_hash) ||
+		ftrace_lookup_ip(hash->filter_hash, ip)) &&
+		(ftrace_hash_empty(hash->notrace_hash) ||
+		 !ftrace_lookup_ip(hash->notrace_hash, ip));
+}
+
 /*
  * Test the hashes for this ops to see if we want to call
  * the ops->func or not.
@@ -1388,8 +1403,7 @@ update:
 static int
 ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs)
 {
-	struct ftrace_hash *filter_hash;
-	struct ftrace_hash *notrace_hash;
+	struct ftrace_ops_hash hash;
 	int ret;
 
 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
@@ -1402,13 +1416,10 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs)
 		return 0;
 #endif
 
-	filter_hash = rcu_dereference_raw_notrace(ops->func_hash->filter_hash);
-	notrace_hash = rcu_dereference_raw_notrace(ops->func_hash->notrace_hash);
+	hash.filter_hash = rcu_dereference_raw_notrace(ops->func_hash->filter_hash);
+	hash.notrace_hash = rcu_dereference_raw_notrace(ops->func_hash->notrace_hash);
 
-	if ((ftrace_hash_empty(filter_hash) ||
-	     ftrace_lookup_ip(filter_hash, ip)) &&
-	    (ftrace_hash_empty(notrace_hash) ||
-	     !ftrace_lookup_ip(notrace_hash, ip)))
+	if (hash_contains_ip(ip, &hash))
 		ret = 1;
 	else
 		ret = 0;
@@ -1520,46 +1531,6 @@ static bool test_rec_ops_needs_regs(struct dyn_ftrace *rec)
 	return  keep_regs;
 }
 
-static void ftrace_remove_tramp(struct ftrace_ops *ops,
-				struct dyn_ftrace *rec)
-{
-	/* If TRAMP is not set, no ops should have a trampoline for this */
-	if (!(rec->flags & FTRACE_FL_TRAMP))
-		return;
-
-	rec->flags &= ~FTRACE_FL_TRAMP;
-
-	if ((!ftrace_hash_empty(ops->func_hash->filter_hash) &&
-	     !ftrace_lookup_ip(ops->func_hash->filter_hash, rec->ip)) ||
-	    ftrace_lookup_ip(ops->func_hash->notrace_hash, rec->ip))
-		return;
-	/*
-	 * The tramp_hash entry will be removed at time
-	 * of update.
-	 */
-	ops->nr_trampolines--;
-}
-
-static void ftrace_clear_tramps(struct dyn_ftrace *rec, struct ftrace_ops *ops)
-{
-	struct ftrace_ops *op;
-
-	/* If TRAMP is not set, no ops should have a trampoline for this */
-	if (!(rec->flags & FTRACE_FL_TRAMP))
-		return;
-
-	do_for_each_ftrace_op(op, ftrace_ops_list) {
-		/*
-		 * This function is called to clear other tramps
-		 * not the one that is being updated.
-		 */
-		if (op == ops)
-			continue;
-		if (op->nr_trampolines)
-			ftrace_remove_tramp(op, rec);
-	} while_for_each_ftrace_op(op);
-}
-
 static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
 				     int filter_hash,
 				     bool inc)
@@ -1648,18 +1619,16 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
 			 * function, and the ops has a trampoline registered
 			 * for it, then we can call it directly.
 			 */
-			if (ftrace_rec_count(rec) == 1 && ops->trampoline) {
+			if (ftrace_rec_count(rec) == 1 && ops->trampoline)
 				rec->flags |= FTRACE_FL_TRAMP;
-				ops->nr_trampolines++;
-			} else {
+			else
 				/*
 				 * If we are adding another function callback
 				 * to this function, and the previous had a
 				 * custom trampoline in use, then we need to go
 				 * back to the default trampoline.
 				 */
-				ftrace_clear_tramps(rec, ops);
-			}
+				rec->flags &= ~FTRACE_FL_TRAMP;
 
 			/*
 			 * If any ops wants regs saved for this function
@@ -1672,9 +1641,6 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
 				return;
 			rec->flags--;
 
-			if (ops->trampoline && !ftrace_rec_count(rec))
-				ftrace_remove_tramp(ops, rec);
-
 			/*
 			 * If the rec had REGS enabled and the ops that is
 			 * being removed had REGS set, then see if there is
@@ -1688,6 +1654,17 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
 					rec->flags &= ~FTRACE_FL_REGS;
 			}
 
+			/*
+			 * If the rec had TRAMP enabled, then it needs to
+			 * be cleared. As TRAMP can only be enabled iff
+			 * there is only a single ops attached to it.
+			 * In otherwords, always disable it on decrementing.
+			 * In the future, we may set it if rec count is
+			 * decremented to one, and the ops that is left
+			 * has a trampoline.
+			 */
+			rec->flags &= ~FTRACE_FL_TRAMP;
+
 			/*
 			 * flags will be cleared in ftrace_check_record()
 			 * if rec count is zero.
@@ -1910,15 +1887,14 @@ static struct ftrace_ops *
 ftrace_find_tramp_ops_any(struct dyn_ftrace *rec)
 {
 	struct ftrace_ops *op;
+	unsigned long ip = rec->ip;
 
 	do_for_each_ftrace_op(op, ftrace_ops_list) {
 
 		if (!op->trampoline)
 			continue;
 
-		if (ftrace_lookup_ip(op->func_hash->filter_hash, rec->ip) &&
-		    (ftrace_hash_empty(op->func_hash->notrace_hash) ||
-		     !ftrace_lookup_ip(op->func_hash->notrace_hash, rec->ip)))
+		if (hash_contains_ip(ip, op->func_hash))
 			return op;
 	} while_for_each_ftrace_op(op);
 
@@ -1929,18 +1905,51 @@ static struct ftrace_ops *
 ftrace_find_tramp_ops_curr(struct dyn_ftrace *rec)
 {
 	struct ftrace_ops *op;
+	unsigned long ip = rec->ip;
 
-	/* Removed ops need to be tested first */
-	if (removed_ops && removed_ops->tramp_hash) {
-		if (ftrace_lookup_ip(removed_ops->tramp_hash, rec->ip))
+	/*
+	 * Need to check removed ops first.
+	 * If they are being removed, and this rec has a tramp,
+	 * and this rec is in the ops list, then it would be the
+	 * one with the tramp.
+	 */
+	if (removed_ops) {
+		if (hash_contains_ip(ip, &removed_ops->old_hash))
 			return removed_ops;
 	}
 
+	/*
+	 * Need to find the current trampoline for a rec.
+	 * Now, a trampoline is only attached to a rec if there
+	 * was a single 'ops' attached to it. But this can be called
+	 * when we are adding another op to the rec or removing the
+	 * current one. Thus, if the op is being added, we can
+	 * ignore it because it hasn't attached itself to the rec
+	 * yet. That means we just need to find the op that has a
+	 * trampoline and is not beeing added.
+	 */
 	do_for_each_ftrace_op(op, ftrace_ops_list) {
-		if (!op->tramp_hash)
+
+		if (!op->trampoline)
+			continue;
+
+		/*
+		 * If the ops is being added, it hasn't gotten to
+		 * the point to be removed from this tree yet.
+		 */
+		if (op->flags & FTRACE_OPS_FL_ADDING)
 			continue;
 
-		if (ftrace_lookup_ip(op->tramp_hash, rec->ip))
+		/*
+		 * If the ops is not being added and has a trampoline,
+		 * then it must be the one that we want!
+		 */
+		if (hash_contains_ip(ip, op->func_hash))
+			return op;
+
+		/* If the ops is being modified, it may be in the old hash. */
+		if ((op->flags & FTRACE_OPS_FL_MODIFYING) &&
+		    hash_contains_ip(ip, &op->old_hash))
 			return op;
 
 	} while_for_each_ftrace_op(op);
@@ -1952,10 +1961,11 @@ static struct ftrace_ops *
 ftrace_find_tramp_ops_new(struct dyn_ftrace *rec)
 {
 	struct ftrace_ops *op;
+	unsigned long ip = rec->ip;
 
 	do_for_each_ftrace_op(op, ftrace_ops_list) {
 		/* pass rec in as regs to have non-NULL val */
-		if (ftrace_ops_test(op, rec->ip, rec))
+		if (hash_contains_ip(ip, op->func_hash))
 			return op;
 	} while_for_each_ftrace_op(op);
 
@@ -2262,92 +2272,6 @@ void __weak arch_ftrace_update_code(int command)
 	ftrace_run_stop_machine(command);
 }
 
-static int ftrace_save_ops_tramp_hash(struct ftrace_ops *ops)
-{
-	struct ftrace_page *pg;
-	struct dyn_ftrace *rec;
-	int size, bits;
-	int ret;
-
-	size = ops->nr_trampolines;
-	bits = 0;
-	/*
-	 * Make the hash size about 1/2 the # found
-	 */
-	for (size /= 2; size; size >>= 1)
-		bits++;
-
-	ops->tramp_hash = alloc_ftrace_hash(bits);
-	/*
-	 * TODO: a failed allocation is going to screw up
-	 * the accounting of what needs to be modified
-	 * and not. For now, we kill ftrace if we fail
-	 * to allocate here. But there are ways around this,
-	 * but that will take a little more work.
-	 */
-	if (!ops->tramp_hash)
-		return -ENOMEM;
-
-	do_for_each_ftrace_rec(pg, rec) {
-		if (ftrace_rec_count(rec) == 1 &&
-		    ftrace_ops_test(ops, rec->ip, rec)) {
-
-			/*
-			 * If another ops adds to a rec, the rec will
-			 * lose its trampoline and never get it back
-			 * until all ops are off of it.
-			 */
-			if (!(rec->flags & FTRACE_FL_TRAMP))
-				continue;
-
-			/* This record had better have a trampoline */
-			if (FTRACE_WARN_ON(!(rec->flags & FTRACE_FL_TRAMP_EN)))
-				return -1;
-
-			ret = add_hash_entry(ops->tramp_hash, rec->ip);
-			if (ret < 0)
-				return ret;
-		}
-	} while_for_each_ftrace_rec();
-
-	/* The number of recs in the hash must match nr_trampolines */
-	if (FTRACE_WARN_ON(ops->tramp_hash->count != ops->nr_trampolines))
-		pr_warn("count=%ld trampolines=%d\n",
-			ops->tramp_hash->count,
-			ops->nr_trampolines);
-
-	return 0;
-}
-
-static int ftrace_save_tramp_hashes(void)
-{
-	struct ftrace_ops *op;
-	int ret;
-
-	/*
-	 * Now that any trampoline is being used, we need to save the
-	 * hashes for the ops that have them. This allows the mapping
-	 * back from the record to the ops that has the trampoline to
-	 * know what code is being replaced. Modifying code must always
-	 * verify what it is changing.
-	 */
-	do_for_each_ftrace_op(op, ftrace_ops_list) {
-
-		/* The tramp_hash is recreated each time. */
-		free_ftrace_hash(op->tramp_hash);
-		op->tramp_hash = NULL;
-
-		if (op->nr_trampolines) {
-			ret = ftrace_save_ops_tramp_hash(op);
-			if (ret)
-				return ret;
-		}
-
-	} while_for_each_ftrace_op(op);
-
-	return 0;
-}
-
 static void ftrace_run_update_code(int command)
 {
 	int ret;
@@ -2367,9 +2291,6 @@ static void ftrace_run_update_code(int command)
 
 	ret = ftrace_arch_code_modify_post_process();
 	FTRACE_WARN_ON(ret);
-
-	ret = ftrace_save_tramp_hashes();
-	FTRACE_WARN_ON(ret);
 }
 
 static void ftrace_run_modify_code(struct ftrace_ops *ops, int command)
@@ -2489,8 +2410,16 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command)
 	ops->flags |= FTRACE_OPS_FL_REMOVING;
 	removed_ops = ops;
 
+	/* The trampoline logic checks the old hashes */
+	ops->old_hash.filter_hash = ops->func_hash->filter_hash;
+	ops->old_hash.notrace_hash = ops->func_hash->notrace_hash;
+
 	ftrace_run_update_code(command);
 
+	ops->old_hash.filter_hash = NULL;
+	ops->old_hash.notrace_hash = NULL;
+
+	removed_ops = NULL;
 	ops->flags &= ~FTRACE_OPS_FL_REMOVING;
 
 	/*
@@ -3017,7 +2946,7 @@ static int t_show(struct seq_file *m, void *v)
 			struct ftrace_ops *ops;
 
 			ops = ftrace_find_tramp_ops_any(rec);
-			if (ops && ops->trampoline)
+			if (ops)
 				seq_printf(m, "\ttramp: %pS",
 					   (void *)ops->trampoline);
 			else
-- 
cgit v1.2.3


From 3a630178fd5f30c285fd7016c5340a176b625913 Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave.hansen@linux.intel.com>
Date: Thu, 7 Aug 2014 10:52:04 -0700
Subject: tracing: generate RCU warnings even when tracepoints are disabled

Dave Jones reported seeing a bug from one of my TLB tracepoints:

	http://lkml.kernel.org/r/20140806181801.GA4605@redhat.com

I've been running these patches for months and never saw this.
But, a big chunk of my testing, especially with all the debugging
enabled, was in a vm where intel_idle doesn't work.  On the
systems where I was using intel_idle, I never had lockdep enabled
and this tracepoint on at the same time.

This patch ensures that whenever we have lockdep available, we do
_some_ RCU activity at the site of the tracepoint, despite
whether the tracepoint's condition matches or even if the
tracepoint itself is completely disabled.  This is a bit of a
hack, but it is pretty self-contained.

I confirmed that with this patch plus lockdep I get the same
splat as Dave Jones did, but without enabling the tracepoint
explicitly.

Link: http://lkml.kernel.org/p/20140807175204.C257CAC5@viggo.jf.intel.com

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dave Hansen <dave@sr71.net>
Cc: Dave Jones <davej@redhat.com>,
Cc: paulmck@linux.vnet.ibm.com
Cc: Ingo Molnar <mingo@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/tracepoint.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index b1293f15f592..e08e21e5f601 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -157,6 +157,12 @@ extern void syscall_unregfunc(void);
  * Make sure the alignment of the structure in the __tracepoints section will
  * not add unwanted padding between the beginning of the section and the
  * structure. Force alignment to the same alignment as the section start.
+ *
+ * When lockdep is enabled, we make sure to always do the RCU portions of
+ * the tracepoint code, regardless of whether tracing is on or we match the
+ * condition.  This lets us find RCU issues triggered with tracepoints even
+ * when this tracepoint is off.  This code has no purpose other than poking
+ * RCU a bit.
  */
 #define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \
 	extern struct tracepoint __tracepoint_##name;			\
@@ -167,6 +173,11 @@ extern void syscall_unregfunc(void);
 				TP_PROTO(data_proto),			\
 				TP_ARGS(data_args),			\
 				TP_CONDITION(cond),,);			\
+		if (IS_ENABLED(CONFIG_LOCKDEP)) {			\
+			rcu_read_lock_sched_notrace();			\
+			rcu_dereference_sched(__tracepoint_##name.funcs);\
+			rcu_read_unlock_sched_notrace();		\
+		}							\
 	}								\
 	__DECLARE_TRACE_RCU(name, PARAMS(proto), PARAMS(args),		\
 		PARAMS(cond), PARAMS(data_proto), PARAMS(data_args))	\
-- 
cgit v1.2.3


From b440bde74f043c8ec31081cb59c9a53ade954701 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Wed, 10 Sep 2014 13:45:01 -0600
Subject: PCI: Add pci_ignore_hotplug() to ignore hotplug events for a device

Powering off a hot-pluggable device, e.g., with pci_set_power_state(D3cold),
normally generates a hot-remove event that unbinds the driver.

Some drivers expect to remain bound to a device even while they power it
off and back on again.  This can be dangerous, because if the device is
removed or replaced while it is powered off, the driver doesn't know that
anything changed.  But some drivers accept that risk.

Add pci_ignore_hotplug() for use by drivers that know their device cannot
be removed.  Using pci_ignore_hotplug() tells the PCI core that hot-plug
events for the device should be ignored.

The radeon and nouveau drivers use this to switch between a low-power,
integrated GPU and a higher-power, higher-performance discrete GPU.  They
power off the unused GPU, but they want to remain bound to it.

This is a reimplementation of f244d8b623da ("ACPIPHP / radeon / nouveau:
Fix VGA switcheroo problem related to hotplug") but extends it to work with
both acpiphp and pciehp.

This fixes a problem where systems with dual GPUs using the radeon drivers
become unusable, freezing every few seconds (see bugzillas below).  The
resume of the radeon device may also fail, e.g.,

This fixes problems on dual GPU systems where the radeon driver becomes
unusable because of problems while suspending the device, as in bug 79701:

    [drm] radeon: finishing device.
    radeon 0000:01:00.0: Userspace still has active objects !
    radeon 0000:01:00.0: ffff8800cb4ec288 ffff8800cb4ec000 16384 4294967297 force free
    ...
    WARNING: CPU: 0 PID: 67 at /home/apw/COD/linux/drivers/gpu/drm/radeon/radeon_gart.c:234 radeon_gart_unbind+0xd2/0xe0 [radeon]()
    trying to unbind memory from uninitialized GART !

or while resuming it, as in bug 77261:

    radeon 0000:01:00.0: ring 0 stalled for more than 10158msec
    radeon 0000:01:00.0: GPU lockup ...
    radeon 0000:01:00.0: GPU pci config reset
    pciehp 0000:00:01.0:pcie04: Card not present on Slot(1-1)
    radeon 0000:01:00.0: GPU reset succeeded, trying to resume
    *ERROR* radeon: dpm resume failed
    radeon 0000:01:00.0: Wait for MC idle timedout !

Link: https://bugzilla.kernel.org/show_bug.cgi?id=77261
Link: https://bugzilla.kernel.org/show_bug.cgi?id=79701
Reported-by: Shawn Starr <shawn.starr@rogers.com>
Reported-by: Jose P. <lbdkmjdf@sharklasers.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Rajat Jain <rajatxjain@gmail.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Dave Airlie <airlied@redhat.com>
CC: stable@vger.kernel.org	# v3.15+
---
 drivers/gpu/drm/nouveau/nouveau_drm.c |  1 +
 drivers/gpu/drm/radeon/radeon_drv.c   |  1 +
 drivers/pci/hotplug/acpiphp_glue.c    | 16 ++++++----------
 drivers/pci/hotplug/pciehp_hpc.c      | 12 ++++++++++++
 include/linux/pci.h                   |  6 ++++++
 5 files changed, 26 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
index 250a5e88c751..9c3af96a7153 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -627,6 +627,7 @@ int nouveau_pmops_suspend(struct device *dev)
 
 	pci_save_state(pdev);
 	pci_disable_device(pdev);
+	pci_ignore_hotplug(pdev);
 	pci_set_power_state(pdev, PCI_D3hot);
 	return 0;
 }
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index 8df888908833..abbd87adfd75 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -440,6 +440,7 @@ static int radeon_pmops_runtime_suspend(struct device *dev)
 	ret = radeon_suspend_kms(drm_dev, false, false);
 	pci_save_state(pdev);
 	pci_disable_device(pdev);
+	pci_ignore_hotplug(pdev);
 	pci_set_power_state(pdev, PCI_D3cold);
 	drm_dev->switch_power_state = DRM_SWITCH_POWER_DYNAMIC_OFF;
 
diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index 70741c8c46a0..6cd5160fc057 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -560,19 +560,15 @@ static void disable_slot(struct acpiphp_slot *slot)
 	slot->flags &= (~SLOT_ENABLED);
 }
 
-static bool acpiphp_no_hotplug(struct acpi_device *adev)
-{
-	return adev && adev->flags.no_hotplug;
-}
-
 static bool slot_no_hotplug(struct acpiphp_slot *slot)
 {
-	struct acpiphp_func *func;
+	struct pci_bus *bus = slot->bus;
+	struct pci_dev *dev;
 
-	list_for_each_entry(func, &slot->funcs, sibling)
-		if (acpiphp_no_hotplug(func_to_acpi_device(func)))
+	list_for_each_entry(dev, &bus->devices, bus_list) {
+		if (PCI_SLOT(dev->devfn) == slot->device && dev->ignore_hotplug)
 			return true;
-
+	}
 	return false;
 }
 
@@ -645,7 +641,7 @@ static void trim_stale_devices(struct pci_dev *dev)
 
 		status = acpi_evaluate_integer(adev->handle, "_STA", NULL, &sta);
 		alive = (ACPI_SUCCESS(status) && device_status_valid(sta))
-			|| acpiphp_no_hotplug(adev);
+			|| dev->ignore_hotplug;
 	}
 	if (!alive)
 		alive = pci_device_is_present(dev);
diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c
index 9da84b8b27d8..5e01ae39ec46 100644
--- a/drivers/pci/hotplug/pciehp_hpc.c
+++ b/drivers/pci/hotplug/pciehp_hpc.c
@@ -506,6 +506,8 @@ static irqreturn_t pcie_isr(int irq, void *dev_id)
 {
 	struct controller *ctrl = (struct controller *)dev_id;
 	struct pci_dev *pdev = ctrl_dev(ctrl);
+	struct pci_bus *subordinate = pdev->subordinate;
+	struct pci_dev *dev;
 	struct slot *slot = ctrl->slot;
 	u16 detected, intr_loc;
 
@@ -539,6 +541,16 @@ static irqreturn_t pcie_isr(int irq, void *dev_id)
 		wake_up(&ctrl->queue);
 	}
 
+	if (subordinate) {
+		list_for_each_entry(dev, &subordinate->devices, bus_list) {
+			if (dev->ignore_hotplug) {
+				ctrl_dbg(ctrl, "ignoring hotplug event %#06x (%s requested no hotplug)\n",
+					 intr_loc, pci_name(dev));
+				return IRQ_HANDLED;
+			}
+		}
+	}
+
 	if (!(intr_loc & ~PCI_EXP_SLTSTA_CC))
 		return IRQ_HANDLED;
 
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 61978a460841..96453f9bc8ba 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -303,6 +303,7 @@ struct pci_dev {
 						   D3cold, not set for devices
 						   powered on/off by the
 						   corresponding bridge */
+	unsigned int	ignore_hotplug:1;	/* Ignore hotplug events */
 	unsigned int	d3_delay;	/* D3->D0 transition time in ms */
 	unsigned int	d3cold_delay;	/* D3cold->D0 transition time in ms */
 
@@ -1021,6 +1022,11 @@ bool pci_dev_run_wake(struct pci_dev *dev);
 bool pci_check_pme_status(struct pci_dev *dev);
 void pci_pme_wakeup_bus(struct pci_bus *bus);
 
+static inline void pci_ignore_hotplug(struct pci_dev *dev)
+{
+	dev->ignore_hotplug = 1;
+}
+
 static inline int pci_enable_wake(struct pci_dev *dev, pci_power_t state,
 				  bool enable)
 {
-- 
cgit v1.2.3


From 378520b837cf4da769600b83690d8e825f16a611 Mon Sep 17 00:00:00 2001
From: Peng Tao <tao.peng@primarydata.com>
Date: Thu, 7 Aug 2014 10:15:02 +0800
Subject: nfs41: add a helper function to set layoutcommit after commit

Track lwb in nfs_commit_data so that we can use it to setup
layoutcommit in commit_done callback.

Signed-off-by: Peng Tao <tao.peng@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/pnfs.c           | 29 +++++++++++++++++++++++++++++
 fs/nfs/pnfs.h           |  1 +
 fs/nfs/write.c          | 15 +++++++++++++++
 include/linux/nfs_xdr.h |  1 +
 4 files changed, 46 insertions(+)

(limited to 'include/linux')

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index a3851debf8a2..a6586cdee211 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1797,6 +1797,35 @@ pnfs_set_layoutcommit(struct nfs_pgio_header *hdr)
 }
 EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit);
 
+void pnfs_commit_set_layoutcommit(struct nfs_commit_data *data)
+{
+	struct inode *inode = data->inode;
+	struct nfs_inode *nfsi = NFS_I(inode);
+	bool mark_as_dirty = false;
+
+	spin_lock(&inode->i_lock);
+	if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) {
+		mark_as_dirty = true;
+		dprintk("%s: Set layoutcommit for inode %lu ",
+			__func__, inode->i_ino);
+	}
+	if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &data->lseg->pls_flags)) {
+		/* references matched in nfs4_layoutcommit_release */
+		pnfs_get_lseg(data->lseg);
+	}
+	if (data->lwb > nfsi->layout->plh_lwb)
+		nfsi->layout->plh_lwb = data->lwb;
+	spin_unlock(&inode->i_lock);
+	dprintk("%s: lseg %p end_pos %llu\n",
+		__func__, data->lseg, nfsi->layout->plh_lwb);
+
+	/* if pnfs_layoutcommit_inode() runs between inode locks, the next one
+	 * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */
+	if (mark_as_dirty)
+		mark_inode_dirty_sync(inode);
+}
+EXPORT_SYMBOL_GPL(pnfs_commit_set_layoutcommit);
+
 void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data)
 {
 	struct nfs_server *nfss = NFS_SERVER(data->args.inode);
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index aca3dff5dae6..79c63114ce77 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -219,6 +219,7 @@ void pnfs_roc_release(struct inode *ino);
 void pnfs_roc_set_barrier(struct inode *ino, u32 barrier);
 bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task);
 void pnfs_set_layoutcommit(struct nfs_pgio_header *);
+void pnfs_commit_set_layoutcommit(struct nfs_commit_data *data);
 void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data);
 int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
 int _pnfs_return_layout(struct inode *);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 175d5d073ccf..3c5638f381cd 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1538,6 +1538,18 @@ int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data,
 }
 EXPORT_SYMBOL_GPL(nfs_initiate_commit);
 
+static loff_t nfs_get_lwb(struct list_head *head)
+{
+	loff_t lwb = 0;
+	struct nfs_page *req;
+
+	list_for_each_entry(req, head, wb_list)
+		if (lwb < (req_offset(req) + req->wb_bytes))
+			lwb = req_offset(req) + req->wb_bytes;
+
+	return lwb;
+}
+
 /*
  * Set up the argument/result storage required for the RPC call.
  */
@@ -1557,6 +1569,9 @@ void nfs_init_commit(struct nfs_commit_data *data,
 	data->inode	  = inode;
 	data->cred	  = first->wb_context->cred;
 	data->lseg	  = lseg; /* reference transferred */
+	/* only set lwb for pnfs commit */
+	if (lseg)
+		data->lwb = nfs_get_lwb(&data->pages);
 	data->mds_ops     = &nfs_commit_ops;
 	data->completion_ops = cinfo->completion_ops;
 	data->dreq	  = cinfo->dreq;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 0040629894df..e563b2c976ef 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1328,6 +1328,7 @@ struct nfs_commit_data {
 	struct pnfs_layout_segment *lseg;
 	struct nfs_client	*ds_clp;	/* pNFS data server */
 	int			ds_commit_index;
+	loff_t			lwb;
 	const struct rpc_call_ops *mds_ops;
 	const struct nfs_commit_completion_ops *completion_ops;
 	int (*commit_done_cb) (struct rpc_task *task, struct nfs_commit_data *data);
-- 
cgit v1.2.3


From 5f919c9f10c1cf821ee5f414683214a361a1b98c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 21 Aug 2014 11:09:25 -0500
Subject: pnfs: allow splicing pre-encoded pages into the layoutcommit args

Currently there is no XDR buffer space allocated for the per-layout driver
layoutcommit payload, which leads to server buffer overflows in the
blocklayout driver even under simple workloads.  As we can't do per-layout
sizes for XDR operations we'll have to splice a previously encoded list
of pages into the XDR stream, similar to how we handle ACL buffers.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs4xdr.c        | 18 +++++++++++++-----
 fs/nfs/pnfs.c           | 15 +++++++++++++++
 fs/nfs/pnfs.h           |  4 ++--
 include/linux/nfs_xdr.h |  3 +++
 4 files changed, 33 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index e13b59d8d9aa..f2cd957adb90 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -395,7 +395,10 @@ static int nfs4_stat_to_errno(int);
 				2 /* last byte written */ + \
 				1 /* nt_timechanged (false) */ + \
 				1 /* layoutupdate4 layout type */ + \
-				1 /* NULL filelayout layoutupdate4 payload */)
+				1 /* layoutupdate4 opaqueue len */)
+				  /* the actual content of layoutupdate4 should
+				     be allocated by drivers and spliced in
+				     using xdr_write_pages */
 #define decode_layoutcommit_maxsz (op_decode_hdr_maxsz + 3)
 #define encode_layoutreturn_maxsz (8 + op_encode_hdr_maxsz + \
 				encode_stateid_maxsz + \
@@ -1990,7 +1993,7 @@ encode_layoutget(struct xdr_stream *xdr,
 static int
 encode_layoutcommit(struct xdr_stream *xdr,
 		    struct inode *inode,
-		    const struct nfs4_layoutcommit_args *args,
+		    struct nfs4_layoutcommit_args *args,
 		    struct compound_hdr *hdr)
 {
 	__be32 *p;
@@ -2011,11 +2014,16 @@ encode_layoutcommit(struct xdr_stream *xdr,
 	*p++ = cpu_to_be32(0); /* Never send time_modify_changed */
 	*p++ = cpu_to_be32(NFS_SERVER(args->inode)->pnfs_curr_ld->id);/* type */
 
-	if (NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit)
+	if (NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit) {
 		NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit(
 			NFS_I(inode)->layout, xdr, args);
-	else
-		encode_uint32(xdr, 0); /* no layout-type payload */
+	} else {
+		encode_uint32(xdr, args->layoutupdate_len);
+		if (args->layoutupdate_pages) {
+			xdr_write_pages(xdr, args->layoutupdate_pages, 0,
+					args->layoutupdate_len);
+		}
+	}
 
 	return 0;
 }
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 57b5728e0b8e..8827ab130ed3 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1854,6 +1854,7 @@ void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data)
 int
 pnfs_layoutcommit_inode(struct inode *inode, bool sync)
 {
+	struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
 	struct nfs4_layoutcommit_data *data;
 	struct nfs_inode *nfsi = NFS_I(inode);
 	loff_t end_pos;
@@ -1904,6 +1905,20 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync)
 	data->args.lastbytewritten = end_pos - 1;
 	data->res.server = NFS_SERVER(inode);
 
+	if (ld->prepare_layoutcommit) {
+		status = ld->prepare_layoutcommit(&data->args);
+		if (status) {
+			spin_lock(&inode->i_lock);
+			if (end_pos < nfsi->layout->plh_lwb)
+				nfsi->layout->plh_lwb = end_pos;
+			spin_unlock(&inode->i_lock);
+			put_rpccred(data->cred);
+			set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags);
+			goto clear_layoutcommitting;
+		}
+	}
+
+
 	status = nfs4_proc_layoutcommit(data, sync);
 out:
 	if (status)
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 1dd8a5e96c9f..8835b5a320cc 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -128,8 +128,8 @@ struct pnfs_layoutdriver_type {
 				     const struct nfs4_layoutreturn_args *args);
 
 	void (*cleanup_layoutcommit) (struct nfs4_layoutcommit_data *data);
-
-	void (*encode_layoutcommit) (struct pnfs_layout_hdr *layoutid,
+	int (*prepare_layoutcommit) (struct nfs4_layoutcommit_args *args);
+	void (*encode_layoutcommit) (struct pnfs_layout_hdr *lo,
 				     struct xdr_stream *xdr,
 				     const struct nfs4_layoutcommit_args *args);
 };
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index e563b2c976ef..f4092c6b90fb 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -279,6 +279,9 @@ struct nfs4_layoutcommit_args {
 	__u64 lastbytewritten;
 	struct inode *inode;
 	const u32 *bitmask;
+	size_t layoutupdate_len;
+	struct page *layoutupdate_page;
+	struct page **layoutupdate_pages;
 };
 
 struct nfs4_layoutcommit_res {
-- 
cgit v1.2.3


From b954d83421d51d822c42e5ab7b65069b25ad3005 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Wed, 10 Sep 2014 15:01:02 +0200
Subject: net: bpf: only build bpf_jit_binary_{alloc, free}() when jit selected

Since BPF JIT depends on the availability of module_alloc() and
module_free() helpers (HAVE_BPF_JIT and MODULES), we better build
that code only in case we have BPF_JIT in our config enabled, just
like with other JIT code. Fixes builds for arm/marzen_defconfig
and sh/rsk7269_defconfig.

====================
kernel/built-in.o: In function `bpf_jit_binary_alloc':
/home/cwang/linux/kernel/bpf/core.c:144: undefined reference to `module_alloc'
kernel/built-in.o: In function `bpf_jit_binary_free':
/home/cwang/linux/kernel/bpf/core.c:164: undefined reference to `module_free'
make: *** [vmlinux] Error 1
====================

Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Fixes: 738cbe72adc5 ("net: bpf: consolidate JIT binary allocator")
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h | 78 +++++++++++++++++++++++++-------------------------
 kernel/bpf/core.c      |  2 ++
 2 files changed, 41 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 4b59edead908..1a0bc6d134d7 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -4,12 +4,18 @@
 #ifndef __LINUX_FILTER_H__
 #define __LINUX_FILTER_H__
 
+#include <stdarg.h>
+
 #include <linux/atomic.h>
 #include <linux/compat.h>
 #include <linux/skbuff.h>
+#include <linux/linkage.h>
+#include <linux/printk.h>
 #include <linux/workqueue.h>
-#include <uapi/linux/filter.h>
+
 #include <asm/cacheflush.h>
+
+#include <uapi/linux/filter.h>
 #include <uapi/linux/bpf.h>
 
 struct sk_buff;
@@ -363,14 +369,6 @@ struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
 				  gfp_t gfp_extra_flags);
 void __bpf_prog_free(struct bpf_prog *fp);
 
-typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size);
-
-struct bpf_binary_header *
-bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
-		     unsigned int alignment,
-		     bpf_jit_fill_hole_t bpf_fill_ill_insns);
-void bpf_jit_binary_free(struct bpf_binary_header *hdr);
-
 static inline void bpf_prog_unlock_free(struct bpf_prog *fp)
 {
 	bpf_prog_unlock_ro(fp);
@@ -393,6 +391,38 @@ void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp);
 u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
 void bpf_int_jit_compile(struct bpf_prog *fp);
 
+#ifdef CONFIG_BPF_JIT
+typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size);
+
+struct bpf_binary_header *
+bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
+		     unsigned int alignment,
+		     bpf_jit_fill_hole_t bpf_fill_ill_insns);
+void bpf_jit_binary_free(struct bpf_binary_header *hdr);
+
+void bpf_jit_compile(struct bpf_prog *fp);
+void bpf_jit_free(struct bpf_prog *fp);
+
+static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen,
+				u32 pass, void *image)
+{
+	pr_err("flen=%u proglen=%u pass=%u image=%pK\n",
+	       flen, proglen, pass, image);
+	if (image)
+		print_hex_dump(KERN_ERR, "JIT code: ", DUMP_PREFIX_OFFSET,
+			       16, 1, image, proglen, false);
+}
+#else
+static inline void bpf_jit_compile(struct bpf_prog *fp)
+{
+}
+
+static inline void bpf_jit_free(struct bpf_prog *fp)
+{
+	bpf_prog_unlock_free(fp);
+}
+#endif /* CONFIG_BPF_JIT */
+
 #define BPF_ANC		BIT(15)
 
 static inline u16 bpf_anc_helper(const struct sock_filter *ftest)
@@ -440,36 +470,6 @@ static inline void *bpf_load_pointer(const struct sk_buff *skb, int k,
 	return bpf_internal_load_pointer_neg_helper(skb, k, size);
 }
 
-#ifdef CONFIG_BPF_JIT
-#include <stdarg.h>
-#include <linux/linkage.h>
-#include <linux/printk.h>
-
-void bpf_jit_compile(struct bpf_prog *fp);
-void bpf_jit_free(struct bpf_prog *fp);
-
-static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen,
-				u32 pass, void *image)
-{
-	pr_err("flen=%u proglen=%u pass=%u image=%pK\n",
-	       flen, proglen, pass, image);
-	if (image)
-		print_hex_dump(KERN_ERR, "JIT code: ", DUMP_PREFIX_OFFSET,
-			       16, 1, image, proglen, false);
-}
-#else
-#include <linux/slab.h>
-
-static inline void bpf_jit_compile(struct bpf_prog *fp)
-{
-}
-
-static inline void bpf_jit_free(struct bpf_prog *fp)
-{
-	bpf_prog_unlock_free(fp);
-}
-#endif /* CONFIG_BPF_JIT */
-
 static inline int bpf_tell_extensions(void)
 {
 	return SKF_AD_MAX;
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 8ee520f0ec70..8b7002488251 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -128,6 +128,7 @@ void __bpf_prog_free(struct bpf_prog *fp)
 }
 EXPORT_SYMBOL_GPL(__bpf_prog_free);
 
+#ifdef CONFIG_BPF_JIT
 struct bpf_binary_header *
 bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
 		     unsigned int alignment,
@@ -163,6 +164,7 @@ void bpf_jit_binary_free(struct bpf_binary_header *hdr)
 {
 	module_free(NULL, hdr);
 }
+#endif /* CONFIG_BPF_JIT */
 
 /* Base function for offset calculation. Needs to go into .text section,
  * therefore keeping it non-static as well; will also be used by JITs
-- 
cgit v1.2.3


From 6314b6796e3c070d4c8086b08dfd453a0aeac4cf Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Thu, 4 Sep 2014 23:37:49 -0700
Subject: clk: Don't hold prepare_lock across debugfs creation

Rob Clark reports a lockdep splat that involves the prepare_lock
chained with the mmap semaphore.

======================================================
[ INFO: possible circular locking dependency detected ]
3.17.0-rc1-00050-g07a489b #802 Tainted: G        W
-------------------------------------------------------
Xorg.bin/5413 is trying to acquire lock:
 (prepare_lock){+.+.+.}, at: [<c0781280>] clk_prepare_lock+0x88/0xfc

but task is already holding lock:
 (qcom_iommu_lock){+.+...}, at: [<c079f664>] qcom_iommu_unmap+0x1c/0x1f0

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #4 (qcom_iommu_lock){+.+...}:
       [<c079f860>] qcom_iommu_map+0x28/0x450
       [<c079eb50>] iommu_map+0xc8/0x12c
       [<c056c1fc>] msm_iommu_map+0xb4/0x130
       [<c05697bc>] msm_gem_get_iova_locked+0x9c/0xe8
       [<c0569854>] msm_gem_get_iova+0x4c/0x64
       [<c0562208>] mdp4_kms_init+0x4c4/0x6c0
       [<c056881c>] msm_load+0x2ac/0x34c
       [<c0545724>] drm_dev_register+0xac/0x108
       [<c0547510>] drm_platform_init+0x50/0xf0
       [<c0578a60>] try_to_bring_up_master.part.3+0xc8/0x108
       [<c0578b48>] component_master_add_with_match+0xa8/0x104
       [<c0568294>] msm_pdev_probe+0x64/0x70
       [<c057e704>] platform_drv_probe+0x2c/0x60
       [<c057cff8>] driver_probe_device+0x108/0x234
       [<c057b65c>] bus_for_each_drv+0x64/0x98
       [<c057cec0>] device_attach+0x78/0x8c
       [<c057c590>] bus_probe_device+0x88/0xac
       [<c057c9b8>] deferred_probe_work_func+0x68/0x9c
       [<c0259db4>] process_one_work+0x1a0/0x40c
       [<c025a710>] worker_thread+0x44/0x4d8
       [<c025ec54>] kthread+0xd8/0xec
       [<c020e9a8>] ret_from_fork+0x14/0x2c

-> #3 (&dev->struct_mutex){+.+.+.}:
       [<c0541188>] drm_gem_mmap+0x38/0xd0
       [<c05695b8>] msm_gem_mmap+0xc/0x5c
       [<c02f0b6c>] mmap_region+0x35c/0x6c8
       [<c02f11ec>] do_mmap_pgoff+0x314/0x398
       [<c02de1e0>] vm_mmap_pgoff+0x84/0xb4
       [<c02ef83c>] SyS_mmap_pgoff+0x94/0xbc
       [<c020e8e0>] ret_fast_syscall+0x0/0x48

-> #2 (&mm->mmap_sem){++++++}:
       [<c0321138>] filldir64+0x68/0x180
       [<c0333fe0>] dcache_readdir+0x188/0x22c
       [<c0320ed0>] iterate_dir+0x9c/0x11c
       [<c03213b0>] SyS_getdents64+0x78/0xe8
       [<c020e8e0>] ret_fast_syscall+0x0/0x48

-> #1 (&sb->s_type->i_mutex_key#3){+.+.+.}:
       [<c03fc544>] __create_file+0x58/0x1dc
       [<c03fc70c>] debugfs_create_dir+0x1c/0x24
       [<c0781c7c>] clk_debug_create_subtree+0x20/0x170
       [<c0be2af8>] clk_debug_init+0xec/0x14c
       [<c0208c70>] do_one_initcall+0x8c/0x1c8
       [<c0b9cce4>] kernel_init_freeable+0x13c/0x1dc
       [<c0877bc4>] kernel_init+0x8/0xe8
       [<c020e9a8>] ret_from_fork+0x14/0x2c

-> #0 (prepare_lock){+.+.+.}:
       [<c087c408>] mutex_lock_nested+0x70/0x3e8
       [<c0781280>] clk_prepare_lock+0x88/0xfc
       [<c0782c50>] clk_prepare+0xc/0x24
       [<c079f474>] __enable_clocks.isra.4+0x18/0xa4
       [<c079f614>] __flush_iotlb_va+0xe0/0x114
       [<c079f6f4>] qcom_iommu_unmap+0xac/0x1f0
       [<c079ea3c>] iommu_unmap+0x9c/0xe8
       [<c056c2fc>] msm_iommu_unmap+0x64/0x84
       [<c0569da4>] msm_gem_free_object+0x11c/0x338
       [<c05413ec>] drm_gem_object_handle_unreference_unlocked+0xfc/0x130
       [<c0541604>] drm_gem_object_release_handle+0x50/0x68
       [<c0447a98>] idr_for_each+0xa8/0xdc
       [<c0541c10>] drm_gem_release+0x1c/0x28
       [<c0540b3c>] drm_release+0x370/0x428
       [<c031105c>] __fput+0x98/0x1e8
       [<c025d73c>] task_work_run+0xb0/0xfc
       [<c02477ec>] do_exit+0x2ec/0x948
       [<c0247ec0>] do_group_exit+0x4c/0xb8
       [<c025180c>] get_signal+0x28c/0x6ac
       [<c0211204>] do_signal+0xc4/0x3e4
       [<c02116cc>] do_work_pending+0xb4/0xc4
       [<c020e938>] work_pending+0xc/0x20

other info that might help us debug this:

Chain exists of:
  prepare_lock --> &dev->struct_mutex --> qcom_iommu_lock

 Possible unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  lock(qcom_iommu_lock);
                               lock(&dev->struct_mutex);
                               lock(qcom_iommu_lock);
  lock(prepare_lock);

 *** DEADLOCK ***

3 locks held by Xorg.bin/5413:
 #0:  (drm_global_mutex){+.+.+.}, at: [<c0540800>] drm_release+0x34/0x428
 #1:  (&dev->struct_mutex){+.+.+.}, at: [<c05413bc>] drm_gem_object_handle_unreference_unlocked+0xcc/0x130
 #2:  (qcom_iommu_lock){+.+...}, at: [<c079f664>] qcom_iommu_unmap+0x1c/0x1f0

stack backtrace:
CPU: 1 PID: 5413 Comm: Xorg.bin Tainted: G        W      3.17.0-rc1-00050-g07a489b #802
[<c0216290>] (unwind_backtrace) from [<c0211d8c>] (show_stack+0x10/0x14)
[<c0211d8c>] (show_stack) from [<c087a078>] (dump_stack+0x98/0xb8)
[<c087a078>] (dump_stack) from [<c027f024>] (print_circular_bug+0x218/0x340)
[<c027f024>] (print_circular_bug) from [<c0283e08>] (__lock_acquire+0x1d24/0x20b8)
[<c0283e08>] (__lock_acquire) from [<c0284774>] (lock_acquire+0x9c/0xbc)
[<c0284774>] (lock_acquire) from [<c087c408>] (mutex_lock_nested+0x70/0x3e8)
[<c087c408>] (mutex_lock_nested) from [<c0781280>] (clk_prepare_lock+0x88/0xfc)
[<c0781280>] (clk_prepare_lock) from [<c0782c50>] (clk_prepare+0xc/0x24)
[<c0782c50>] (clk_prepare) from [<c079f474>] (__enable_clocks.isra.4+0x18/0xa4)
[<c079f474>] (__enable_clocks.isra.4) from [<c079f614>] (__flush_iotlb_va+0xe0/0x114)
[<c079f614>] (__flush_iotlb_va) from [<c079f6f4>] (qcom_iommu_unmap+0xac/0x1f0)
[<c079f6f4>] (qcom_iommu_unmap) from [<c079ea3c>] (iommu_unmap+0x9c/0xe8)
[<c079ea3c>] (iommu_unmap) from [<c056c2fc>] (msm_iommu_unmap+0x64/0x84)
[<c056c2fc>] (msm_iommu_unmap) from [<c0569da4>] (msm_gem_free_object+0x11c/0x338)
[<c0569da4>] (msm_gem_free_object) from [<c05413ec>] (drm_gem_object_handle_unreference_unlocked+0xfc/0x130)
[<c05413ec>] (drm_gem_object_handle_unreference_unlocked) from [<c0541604>] (drm_gem_object_release_handle+0x50/0x68)
[<c0541604>] (drm_gem_object_release_handle) from [<c0447a98>] (idr_for_each+0xa8/0xdc)
[<c0447a98>] (idr_for_each) from [<c0541c10>] (drm_gem_release+0x1c/0x28)
[<c0541c10>] (drm_gem_release) from [<c0540b3c>] (drm_release+0x370/0x428)
[<c0540b3c>] (drm_release) from [<c031105c>] (__fput+0x98/0x1e8)
[<c031105c>] (__fput) from [<c025d73c>] (task_work_run+0xb0/0xfc)
[<c025d73c>] (task_work_run) from [<c02477ec>] (do_exit+0x2ec/0x948)
[<c02477ec>] (do_exit) from [<c0247ec0>] (do_group_exit+0x4c/0xb8)
[<c0247ec0>] (do_group_exit) from [<c025180c>] (get_signal+0x28c/0x6ac)
[<c025180c>] (get_signal) from [<c0211204>] (do_signal+0xc4/0x3e4)
[<c0211204>] (do_signal) from [<c02116cc>] (do_work_pending+0xb4/0xc4)
[<c02116cc>] (do_work_pending) from [<c020e938>] (work_pending+0xc/0x20)

We can break this chain if we don't hold the prepare_lock while
creating debugfs directories. We only hold the prepare_lock right
now because we're traversing the clock tree recursively and we
don't want the hierarchy to change during the traversal.
Replacing this traversal with a simple linked list walk allows us
to only grab a list lock instead of the prepare_lock, thus
breaking the lock chain.

Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Mike Turquette <mturquette@linaro.org>
---
 drivers/clk/clk.c           | 73 +++++++++++++++++----------------------------
 include/linux/clk-private.h |  1 +
 2 files changed, 28 insertions(+), 46 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index b76fa69b44cb..8ca28189e4e9 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -100,6 +100,8 @@ static void clk_enable_unlock(unsigned long flags)
 
 static struct dentry *rootdir;
 static int inited = 0;
+static DEFINE_MUTEX(clk_debug_lock);
+static HLIST_HEAD(clk_debug_list);
 
 static struct hlist_head *all_lists[] = {
 	&clk_root_list,
@@ -300,28 +302,6 @@ out:
 	return ret;
 }
 
-/* caller must hold prepare_lock */
-static int clk_debug_create_subtree(struct clk *clk, struct dentry *pdentry)
-{
-	struct clk *child;
-	int ret = -EINVAL;;
-
-	if (!clk || !pdentry)
-		goto out;
-
-	ret = clk_debug_create_one(clk, pdentry);
-
-	if (ret)
-		goto out;
-
-	hlist_for_each_entry(child, &clk->children, child_node)
-		clk_debug_create_subtree(child, pdentry);
-
-	ret = 0;
-out:
-	return ret;
-}
-
 /**
  * clk_debug_register - add a clk node to the debugfs clk tree
  * @clk: the clk being added to the debugfs clk tree
@@ -329,20 +309,21 @@ out:
  * Dynamically adds a clk to the debugfs clk tree if debugfs has been
  * initialized.  Otherwise it bails out early since the debugfs clk tree
  * will be created lazily by clk_debug_init as part of a late_initcall.
- *
- * Caller must hold prepare_lock.  Only clk_init calls this function (so
- * far) so this is taken care.
  */
 static int clk_debug_register(struct clk *clk)
 {
 	int ret = 0;
 
+	mutex_lock(&clk_debug_lock);
+	hlist_add_head(&clk->debug_node, &clk_debug_list);
+
 	if (!inited)
-		goto out;
+		goto unlock;
 
-	ret = clk_debug_create_subtree(clk, rootdir);
+	ret = clk_debug_create_one(clk, rootdir);
+unlock:
+	mutex_unlock(&clk_debug_lock);
 
-out:
 	return ret;
 }
 
@@ -353,12 +334,18 @@ out:
  * Dynamically removes a clk and all it's children clk nodes from the
  * debugfs clk tree if clk->dentry points to debugfs created by
  * clk_debug_register in __clk_init.
- *
- * Caller must hold prepare_lock.
  */
 static void clk_debug_unregister(struct clk *clk)
 {
+	mutex_lock(&clk_debug_lock);
+	if (!clk->dentry)
+		goto out;
+
+	hlist_del_init(&clk->debug_node);
 	debugfs_remove_recursive(clk->dentry);
+	clk->dentry = NULL;
+out:
+	mutex_unlock(&clk_debug_lock);
 }
 
 struct dentry *clk_debugfs_add_file(struct clk *clk, char *name, umode_t mode,
@@ -415,17 +402,12 @@ static int __init clk_debug_init(void)
 	if (!d)
 		return -ENOMEM;
 
-	clk_prepare_lock();
-
-	hlist_for_each_entry(clk, &clk_root_list, child_node)
-		clk_debug_create_subtree(clk, rootdir);
-
-	hlist_for_each_entry(clk, &clk_orphan_list, child_node)
-		clk_debug_create_subtree(clk, rootdir);
+	mutex_lock(&clk_debug_lock);
+	hlist_for_each_entry(clk, &clk_debug_list, debug_node)
+		clk_debug_create_one(clk, rootdir);
 
 	inited = 1;
-
-	clk_prepare_unlock();
+	mutex_unlock(&clk_debug_lock);
 
 	return 0;
 }
@@ -2087,14 +2069,16 @@ void clk_unregister(struct clk *clk)
 {
 	unsigned long flags;
 
-       if (!clk || WARN_ON_ONCE(IS_ERR(clk)))
-               return;
+	if (!clk || WARN_ON_ONCE(IS_ERR(clk)))
+		return;
+
+	clk_debug_unregister(clk);
 
 	clk_prepare_lock();
 
 	if (clk->ops == &clk_nodrv_ops) {
 		pr_err("%s: unregistered clock: %s\n", __func__, clk->name);
-		goto out;
+		return;
 	}
 	/*
 	 * Assign empty clock ops for consumers that might still hold
@@ -2113,16 +2097,13 @@ void clk_unregister(struct clk *clk)
 			clk_set_parent(child, NULL);
 	}
 
-	clk_debug_unregister(clk);
-
 	hlist_del_init(&clk->child_node);
 
 	if (clk->prepare_count)
 		pr_warn("%s: unregistering prepared clock: %s\n",
 					__func__, clk->name);
-
 	kref_put(&clk->ref, __clk_release);
-out:
+
 	clk_prepare_unlock();
 }
 EXPORT_SYMBOL_GPL(clk_unregister);
diff --git a/include/linux/clk-private.h b/include/linux/clk-private.h
index efbf70b9fd84..4ed34105c371 100644
--- a/include/linux/clk-private.h
+++ b/include/linux/clk-private.h
@@ -48,6 +48,7 @@ struct clk {
 	unsigned long		accuracy;
 	struct hlist_head	children;
 	struct hlist_node	child_node;
+	struct hlist_node	debug_node;
 	unsigned int		notifier_count;
 #ifdef CONFIG_DEBUG_FS
 	struct dentry		*dentry;
-- 
cgit v1.2.3


From 09e05c3f78e9e82bda5958eb95bbf719f7a0ed6b Mon Sep 17 00:00:00 2001
From: Matan Barak <matanb@mellanox.com>
Date: Wed, 10 Sep 2014 16:41:56 +0300
Subject: net/mlx4: Set vlan stripping policy by the right command

Changing the vlan stripping policy of the QP isn't supported by older
firmware versions for the INIT2RTR command. Nevertheless, we've used it.

Fix that by doing this policy change using INIT2RTR only if the firmware
supports it, otherwise, we call UPDATE_QP command to do the task.

Fixes: 7677fc9 ('net/mlx4: Strengthen VLAN tags/priorities enforcement in VST mode')
Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/hw/mlx4/main.c                     |  2 +-
 drivers/net/ethernet/mellanox/mlx4/qp.c               | 12 ++++++++++--
 drivers/net/ethernet/mellanox/mlx4/resource_tracker.c | 18 ++++++++++++++++--
 include/linux/mlx4/device.h                           |  1 +
 include/linux/mlx4/qp.h                               | 12 ++++++++++--
 5 files changed, 38 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index af8256353c7d..162b82c1dde4 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -1680,7 +1680,7 @@ static void mlx4_ib_update_qps(struct mlx4_ib_dev *ibdev,
 			goto unlock;
 
 		update_params.smac_index = new_smac_index;
-		if (mlx4_update_qp(ibdev->dev, &qp->mqp, MLX4_UPDATE_QP_SMAC,
+		if (mlx4_update_qp(ibdev->dev, qp->mqp.qpn, MLX4_UPDATE_QP_SMAC,
 				   &update_params)) {
 			release_mac = new_smac;
 			goto unlock;
diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c
index 0dc31d85fc3b..2301365c79c7 100644
--- a/drivers/net/ethernet/mellanox/mlx4/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx4/qp.c
@@ -390,13 +390,14 @@ err_icm:
 EXPORT_SYMBOL_GPL(mlx4_qp_alloc);
 
 #define MLX4_UPDATE_QP_SUPPORTED_ATTRS MLX4_UPDATE_QP_SMAC
-int mlx4_update_qp(struct mlx4_dev *dev, struct mlx4_qp *qp,
+int mlx4_update_qp(struct mlx4_dev *dev, u32 qpn,
 		   enum mlx4_update_qp_attr attr,
 		   struct mlx4_update_qp_params *params)
 {
 	struct mlx4_cmd_mailbox *mailbox;
 	struct mlx4_update_qp_context *cmd;
 	u64 pri_addr_path_mask = 0;
+	u64 qp_mask = 0;
 	int err = 0;
 
 	mailbox = mlx4_alloc_cmd_mailbox(dev);
@@ -413,9 +414,16 @@ int mlx4_update_qp(struct mlx4_dev *dev, struct mlx4_qp *qp,
 		cmd->qp_context.pri_path.grh_mylmc = params->smac_index;
 	}
 
+	if (attr & MLX4_UPDATE_QP_VSD) {
+		qp_mask |= 1ULL << MLX4_UPD_QP_MASK_VSD;
+		if (params->flags & MLX4_UPDATE_QP_PARAMS_FLAGS_VSD_ENABLE)
+			cmd->qp_context.param3 |= cpu_to_be32(MLX4_STRIP_VLAN);
+	}
+
 	cmd->primary_addr_path_mask = cpu_to_be64(pri_addr_path_mask);
+	cmd->qp_mask = cpu_to_be64(qp_mask);
 
-	err = mlx4_cmd(dev, mailbox->dma, qp->qpn & 0xffffff, 0,
+	err = mlx4_cmd(dev, mailbox->dma, qpn & 0xffffff, 0,
 		       MLX4_CMD_UPDATE_QP, MLX4_CMD_TIME_CLASS_A,
 		       MLX4_CMD_NATIVE);
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index 2fe61b6a8e37..5d2498dcf536 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -702,11 +702,13 @@ static int update_vport_qp_param(struct mlx4_dev *dev,
 	struct mlx4_qp_context	*qpc = inbox->buf + 8;
 	struct mlx4_vport_oper_state *vp_oper;
 	struct mlx4_priv *priv;
+	u32 qp_type;
 	int port;
 
 	port = (qpc->pri_path.sched_queue & 0x40) ? 2 : 1;
 	priv = mlx4_priv(dev);
 	vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port];
+	qp_type	= (be32_to_cpu(qpc->flags) >> 16) & 0xff;
 
 	if (MLX4_VGT != vp_oper->state.default_vlan) {
 		/* the reserved QPs (special, proxy, tunnel)
@@ -715,8 +717,20 @@ static int update_vport_qp_param(struct mlx4_dev *dev,
 		if (mlx4_is_qp_reserved(dev, qpn))
 			return 0;
 
-		/* force strip vlan by clear vsd */
-		qpc->param3 &= ~cpu_to_be32(MLX4_STRIP_VLAN);
+		/* force strip vlan by clear vsd, MLX QP refers to Raw Ethernet */
+		if (qp_type == MLX4_QP_ST_UD ||
+		    (qp_type == MLX4_QP_ST_MLX && mlx4_is_eth(dev, port))) {
+			if (dev->caps.bmme_flags & MLX4_BMME_FLAG_VSD_INIT2RTR) {
+				*(__be32 *)inbox->buf =
+					cpu_to_be32(be32_to_cpu(*(__be32 *)inbox->buf) |
+					MLX4_QP_OPTPAR_VLAN_STRIPPING);
+				qpc->param3 &= ~cpu_to_be32(MLX4_STRIP_VLAN);
+			} else {
+				struct mlx4_update_qp_params params = {.flags = 0};
+
+				mlx4_update_qp(dev, qpn, MLX4_UPDATE_QP_VSD, &params);
+			}
+		}
 
 		if (vp_oper->state.link_state == IFLA_VF_LINK_STATE_DISABLE &&
 		    dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_UPDATE_QP) {
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 511c6e0d21a9..a5b7d7cfcedf 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -209,6 +209,7 @@ enum {
 	MLX4_BMME_FLAG_TYPE_2_WIN	= 1 <<  9,
 	MLX4_BMME_FLAG_RESERVED_LKEY	= 1 << 10,
 	MLX4_BMME_FLAG_FAST_REG_WR	= 1 << 11,
+	MLX4_BMME_FLAG_VSD_INIT2RTR	= 1 << 28,
 };
 
 enum mlx4_event {
diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h
index 7040dc98ff8b..5f4e36cf0091 100644
--- a/include/linux/mlx4/qp.h
+++ b/include/linux/mlx4/qp.h
@@ -56,7 +56,8 @@ enum mlx4_qp_optpar {
 	MLX4_QP_OPTPAR_RNR_RETRY		= 1 << 13,
 	MLX4_QP_OPTPAR_ACK_TIMEOUT		= 1 << 14,
 	MLX4_QP_OPTPAR_SCHED_QUEUE		= 1 << 16,
-	MLX4_QP_OPTPAR_COUNTER_INDEX		= 1 << 20
+	MLX4_QP_OPTPAR_COUNTER_INDEX		= 1 << 20,
+	MLX4_QP_OPTPAR_VLAN_STRIPPING		= 1 << 21,
 };
 
 enum mlx4_qp_state {
@@ -423,13 +424,20 @@ struct mlx4_wqe_inline_seg {
 
 enum mlx4_update_qp_attr {
 	MLX4_UPDATE_QP_SMAC		= 1 << 0,
+	MLX4_UPDATE_QP_VSD		= 1 << 2,
+	MLX4_UPDATE_QP_SUPPORTED_ATTRS	= (1 << 2) - 1
+};
+
+enum mlx4_update_qp_params_flags {
+	MLX4_UPDATE_QP_PARAMS_FLAGS_VSD_ENABLE		= 1 << 0,
 };
 
 struct mlx4_update_qp_params {
 	u8	smac_index;
+	u32	flags;
 };
 
-int mlx4_update_qp(struct mlx4_dev *dev, struct mlx4_qp *qp,
+int mlx4_update_qp(struct mlx4_dev *dev, u32 qpn,
 		   enum mlx4_update_qp_attr attr,
 		   struct mlx4_update_qp_params *params);
 int mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
-- 
cgit v1.2.3


From 184c3fc3f52fb75800deb76deffb70907d1f76ea Mon Sep 17 00:00:00 2001
From: Mark Rustad <mark.d.rustad@intel.com>
Date: Thu, 11 Sep 2014 09:47:23 +0930
Subject: moduleparam: Resolve missing-field-initializer warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Resolve a missing-field-initializer warning, that is produced
by every reference to module_param_call, by using designated
initialization for the first field. That is enough to silence
the complaint.

The message is only seen when doing a W=2 build. I happened to be using gcc
4.8.3, but I think most versions would produce the warning when it is
enabled. It can either be silenced by using even a single designated
initializer as I did here, or providing values for all of the fields. Because
of the number of references to the macro, this change silences many warnings
in W=2 builds.

One instance of the full warning message looks like this:

/home/share/git/nn-mdr/include/linux/moduleparam.h:198:16: warning: missing
initializer for field ‘free’ of ‘struct kernel_param_ops’
[-Wmissing-field-initializers]
  static struct kernel_param_ops __param_ops_##name =  \
		  ^
/home/share/git/nn-mdr/fs/fuse/inode.c:35:1: note: in expansion of macro
‘module_param_call’
 module_param_call(max_user_bgreq, set_global_limit, param_get_uint,
 ^
/home/share/git/nn-mdr/include/linux/moduleparam.h:56:9: note: ‘free’
declared here
  void (*free)(void *arg);

Signed-off-by: Mark Rustad <mark.d.rustad@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/moduleparam.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index 593501996574..b43f4752304e 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -224,7 +224,7 @@ struct kparam_array
 /* Obsolete - use module_param_cb() */
 #define module_param_call(name, set, get, arg, perm)			\
 	static struct kernel_param_ops __param_ops_##name =		\
-		{ 0, (void *)set, (void *)get };			\
+		{ .flags = 0, (void *)set, (void *)get };		\
 	__module_param_call(MODULE_PARAM_PREFIX,			\
 			    name, &__param_ops_##name, arg,		\
 			    (perm) + sizeof(__check_old_set_param(set))*0, -1, 0)
-- 
cgit v1.2.3


From 3d598f47e804a77208c6bb0a454123018e2f2281 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 19 Aug 2014 20:29:12 +0300
Subject: dmaengine: dw: move dw_dmac.h to where it belongs to

There is a common storage for platform data related structures and definitions
inside kernel source tree. The patch moves file from include/linux to
include/linux/platform_data and renames it acoordingly. The users are also
updated.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
[For the arch/avr32/.* and .*sound/atmel.*]
Acked-by: Hans-Christian Egtvedt <egtvedt@samfundet.no>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 MAINTAINERS                                     |   2 +-
 arch/avr32/mach-at32ap/at32ap700x.c             |   2 +-
 arch/avr32/mach-at32ap/include/mach/atmel-mci.h |   2 +-
 drivers/dma/dw/internal.h                       |   2 +-
 drivers/dma/dw/regs.h                           |   6 +-
 include/linux/dw_dmac.h                         | 111 ------------------------
 include/linux/platform_data/dma-dw.h            | 111 ++++++++++++++++++++++++
 include/sound/atmel-abdac.h                     |   2 +-
 include/sound/atmel-ac97c.h                     |   2 +-
 sound/atmel/abdac.c                             |   2 +-
 sound/atmel/ac97c.c                             |   2 +-
 11 files changed, 122 insertions(+), 122 deletions(-)
 delete mode 100644 include/linux/dw_dmac.h
 create mode 100644 include/linux/platform_data/dma-dw.h

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index aefa94841ff3..a10072963889 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7875,7 +7875,7 @@ SYNOPSYS DESIGNWARE DMAC DRIVER
 M:	Viresh Kumar <viresh.linux@gmail.com>
 M:	Andy Shevchenko <andriy.shevchenko@linux.intel.com>
 S:	Maintained
-F:	include/linux/dw_dmac.h
+F:	include/linux/platform_data/dma-dw.h
 F:	drivers/dma/dw/
 
 SYNOPSYS DESIGNWARE MMC/SD/SDIO DRIVER
diff --git a/arch/avr32/mach-at32ap/at32ap700x.c b/arch/avr32/mach-at32ap/at32ap700x.c
index db85b5ec3351..2a1aa712c6e7 100644
--- a/arch/avr32/mach-at32ap/at32ap700x.c
+++ b/arch/avr32/mach-at32ap/at32ap700x.c
@@ -7,7 +7,7 @@
  */
 #include <linux/clk.h>
 #include <linux/delay.h>
-#include <linux/dw_dmac.h>
+#include <linux/platform_data/dma-dw.h>
 #include <linux/fb.h>
 #include <linux/init.h>
 #include <linux/platform_device.h>
diff --git a/arch/avr32/mach-at32ap/include/mach/atmel-mci.h b/arch/avr32/mach-at32ap/include/mach/atmel-mci.h
index 4bba58561d5c..11d7f4b28dc8 100644
--- a/arch/avr32/mach-at32ap/include/mach/atmel-mci.h
+++ b/arch/avr32/mach-at32ap/include/mach/atmel-mci.h
@@ -1,7 +1,7 @@
 #ifndef __MACH_ATMEL_MCI_H
 #define __MACH_ATMEL_MCI_H
 
-#include <linux/dw_dmac.h>
+#include <linux/platform_data/dma-dw.h>
 
 /**
  * struct mci_dma_data - DMA data for MCI interface
diff --git a/drivers/dma/dw/internal.h b/drivers/dma/dw/internal.h
index 32667f9e0dda..43cc1dfad5c9 100644
--- a/drivers/dma/dw/internal.h
+++ b/drivers/dma/dw/internal.h
@@ -12,7 +12,7 @@
 #define _DW_DMAC_INTERNAL_H
 
 #include <linux/device.h>
-#include <linux/dw_dmac.h>
+#include <linux/platform_data/dma-dw.h>
 
 #include "regs.h"
 
diff --git a/drivers/dma/dw/regs.h b/drivers/dma/dw/regs.h
index bb98d3e91e8b..af02439155e9 100644
--- a/drivers/dma/dw/regs.h
+++ b/drivers/dma/dw/regs.h
@@ -11,7 +11,7 @@
 
 #include <linux/interrupt.h>
 #include <linux/dmaengine.h>
-#include <linux/dw_dmac.h>
+#include <linux/platform_data/dma-dw.h>
 
 #define DW_DMA_MAX_NR_CHANNELS	8
 #define DW_DMA_MAX_NR_REQUESTS	16
@@ -161,7 +161,7 @@ struct dw_dma_regs {
 #define DWC_CTLH_DONE		0x00001000
 #define DWC_CTLH_BLOCK_TS_MASK	0x00000fff
 
-/* Bitfields in CFG_LO. Platform-configurable bits are in <linux/dw_dmac.h> */
+/* Bitfields in CFG_LO. Platform-configurable bits are in <linux/platform_data/dma-dw.h> */
 #define DWC_CFGL_CH_PRIOR_MASK	(0x7 << 5)	/* priority mask */
 #define DWC_CFGL_CH_PRIOR(x)	((x) << 5)	/* priority */
 #define DWC_CFGL_CH_SUSP	(1 << 8)	/* pause xfer */
@@ -172,7 +172,7 @@ struct dw_dma_regs {
 #define DWC_CFGL_RELOAD_SAR	(1 << 30)
 #define DWC_CFGL_RELOAD_DAR	(1 << 31)
 
-/* Bitfields in CFG_HI. Platform-configurable bits are in <linux/dw_dmac.h> */
+/* Bitfields in CFG_HI. Platform-configurable bits are in <linux/platform_data/dma-dw.h> */
 #define DWC_CFGH_DS_UPD_EN	(1 << 5)
 #define DWC_CFGH_SS_UPD_EN	(1 << 6)
 
diff --git a/include/linux/dw_dmac.h b/include/linux/dw_dmac.h
deleted file mode 100644
index 68b4024184de..000000000000
--- a/include/linux/dw_dmac.h
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Driver for the Synopsys DesignWare DMA Controller
- *
- * Copyright (C) 2007 Atmel Corporation
- * Copyright (C) 2010-2011 ST Microelectronics
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#ifndef DW_DMAC_H
-#define DW_DMAC_H
-
-#include <linux/dmaengine.h>
-
-/**
- * struct dw_dma_slave - Controller-specific information about a slave
- *
- * @dma_dev: required DMA master device. Depricated.
- * @bus_id: name of this device channel, not just a device name since
- *          devices may have more than one channel e.g. "foo_tx"
- * @cfg_hi: Platform-specific initializer for the CFG_HI register
- * @cfg_lo: Platform-specific initializer for the CFG_LO register
- * @src_master: src master for transfers on allocated channel.
- * @dst_master: dest master for transfers on allocated channel.
- */
-struct dw_dma_slave {
-	struct device		*dma_dev;
-	u32			cfg_hi;
-	u32			cfg_lo;
-	u8			src_master;
-	u8			dst_master;
-};
-
-/**
- * struct dw_dma_platform_data - Controller configuration parameters
- * @nr_channels: Number of channels supported by hardware (max 8)
- * @is_private: The device channels should be marked as private and not for
- *	by the general purpose DMA channel allocator.
- * @chan_allocation_order: Allocate channels starting from 0 or 7
- * @chan_priority: Set channel priority increasing from 0 to 7 or 7 to 0.
- * @block_size: Maximum block size supported by the controller
- * @nr_masters: Number of AHB masters supported by the controller
- * @data_width: Maximum data width supported by hardware per AHB master
- *		(0 - 8bits, 1 - 16bits, ..., 5 - 256bits)
- */
-struct dw_dma_platform_data {
-	unsigned int	nr_channels;
-	bool		is_private;
-#define CHAN_ALLOCATION_ASCENDING	0	/* zero to seven */
-#define CHAN_ALLOCATION_DESCENDING	1	/* seven to zero */
-	unsigned char	chan_allocation_order;
-#define CHAN_PRIORITY_ASCENDING		0	/* chan0 highest */
-#define CHAN_PRIORITY_DESCENDING	1	/* chan7 highest */
-	unsigned char	chan_priority;
-	unsigned short	block_size;
-	unsigned char	nr_masters;
-	unsigned char	data_width[4];
-};
-
-/* bursts size */
-enum dw_dma_msize {
-	DW_DMA_MSIZE_1,
-	DW_DMA_MSIZE_4,
-	DW_DMA_MSIZE_8,
-	DW_DMA_MSIZE_16,
-	DW_DMA_MSIZE_32,
-	DW_DMA_MSIZE_64,
-	DW_DMA_MSIZE_128,
-	DW_DMA_MSIZE_256,
-};
-
-/* Platform-configurable bits in CFG_HI */
-#define DWC_CFGH_FCMODE		(1 << 0)
-#define DWC_CFGH_FIFO_MODE	(1 << 1)
-#define DWC_CFGH_PROTCTL(x)	((x) << 2)
-#define DWC_CFGH_SRC_PER(x)	((x) << 7)
-#define DWC_CFGH_DST_PER(x)	((x) << 11)
-
-/* Platform-configurable bits in CFG_LO */
-#define DWC_CFGL_LOCK_CH_XFER	(0 << 12)	/* scope of LOCK_CH */
-#define DWC_CFGL_LOCK_CH_BLOCK	(1 << 12)
-#define DWC_CFGL_LOCK_CH_XACT	(2 << 12)
-#define DWC_CFGL_LOCK_BUS_XFER	(0 << 14)	/* scope of LOCK_BUS */
-#define DWC_CFGL_LOCK_BUS_BLOCK	(1 << 14)
-#define DWC_CFGL_LOCK_BUS_XACT	(2 << 14)
-#define DWC_CFGL_LOCK_CH	(1 << 15)	/* channel lockout */
-#define DWC_CFGL_LOCK_BUS	(1 << 16)	/* busmaster lockout */
-#define DWC_CFGL_HS_DST_POL	(1 << 18)	/* dst handshake active low */
-#define DWC_CFGL_HS_SRC_POL	(1 << 19)	/* src handshake active low */
-
-/* DMA API extensions */
-struct dw_cyclic_desc {
-	struct dw_desc	**desc;
-	unsigned long	periods;
-	void		(*period_callback)(void *param);
-	void		*period_callback_param;
-};
-
-struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan,
-		dma_addr_t buf_addr, size_t buf_len, size_t period_len,
-		enum dma_transfer_direction direction);
-void dw_dma_cyclic_free(struct dma_chan *chan);
-int dw_dma_cyclic_start(struct dma_chan *chan);
-void dw_dma_cyclic_stop(struct dma_chan *chan);
-
-dma_addr_t dw_dma_get_src_addr(struct dma_chan *chan);
-
-dma_addr_t dw_dma_get_dst_addr(struct dma_chan *chan);
-
-#endif /* DW_DMAC_H */
diff --git a/include/linux/platform_data/dma-dw.h b/include/linux/platform_data/dma-dw.h
new file mode 100644
index 000000000000..68b4024184de
--- /dev/null
+++ b/include/linux/platform_data/dma-dw.h
@@ -0,0 +1,111 @@
+/*
+ * Driver for the Synopsys DesignWare DMA Controller
+ *
+ * Copyright (C) 2007 Atmel Corporation
+ * Copyright (C) 2010-2011 ST Microelectronics
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef DW_DMAC_H
+#define DW_DMAC_H
+
+#include <linux/dmaengine.h>
+
+/**
+ * struct dw_dma_slave - Controller-specific information about a slave
+ *
+ * @dma_dev: required DMA master device. Depricated.
+ * @bus_id: name of this device channel, not just a device name since
+ *          devices may have more than one channel e.g. "foo_tx"
+ * @cfg_hi: Platform-specific initializer for the CFG_HI register
+ * @cfg_lo: Platform-specific initializer for the CFG_LO register
+ * @src_master: src master for transfers on allocated channel.
+ * @dst_master: dest master for transfers on allocated channel.
+ */
+struct dw_dma_slave {
+	struct device		*dma_dev;
+	u32			cfg_hi;
+	u32			cfg_lo;
+	u8			src_master;
+	u8			dst_master;
+};
+
+/**
+ * struct dw_dma_platform_data - Controller configuration parameters
+ * @nr_channels: Number of channels supported by hardware (max 8)
+ * @is_private: The device channels should be marked as private and not for
+ *	by the general purpose DMA channel allocator.
+ * @chan_allocation_order: Allocate channels starting from 0 or 7
+ * @chan_priority: Set channel priority increasing from 0 to 7 or 7 to 0.
+ * @block_size: Maximum block size supported by the controller
+ * @nr_masters: Number of AHB masters supported by the controller
+ * @data_width: Maximum data width supported by hardware per AHB master
+ *		(0 - 8bits, 1 - 16bits, ..., 5 - 256bits)
+ */
+struct dw_dma_platform_data {
+	unsigned int	nr_channels;
+	bool		is_private;
+#define CHAN_ALLOCATION_ASCENDING	0	/* zero to seven */
+#define CHAN_ALLOCATION_DESCENDING	1	/* seven to zero */
+	unsigned char	chan_allocation_order;
+#define CHAN_PRIORITY_ASCENDING		0	/* chan0 highest */
+#define CHAN_PRIORITY_DESCENDING	1	/* chan7 highest */
+	unsigned char	chan_priority;
+	unsigned short	block_size;
+	unsigned char	nr_masters;
+	unsigned char	data_width[4];
+};
+
+/* bursts size */
+enum dw_dma_msize {
+	DW_DMA_MSIZE_1,
+	DW_DMA_MSIZE_4,
+	DW_DMA_MSIZE_8,
+	DW_DMA_MSIZE_16,
+	DW_DMA_MSIZE_32,
+	DW_DMA_MSIZE_64,
+	DW_DMA_MSIZE_128,
+	DW_DMA_MSIZE_256,
+};
+
+/* Platform-configurable bits in CFG_HI */
+#define DWC_CFGH_FCMODE		(1 << 0)
+#define DWC_CFGH_FIFO_MODE	(1 << 1)
+#define DWC_CFGH_PROTCTL(x)	((x) << 2)
+#define DWC_CFGH_SRC_PER(x)	((x) << 7)
+#define DWC_CFGH_DST_PER(x)	((x) << 11)
+
+/* Platform-configurable bits in CFG_LO */
+#define DWC_CFGL_LOCK_CH_XFER	(0 << 12)	/* scope of LOCK_CH */
+#define DWC_CFGL_LOCK_CH_BLOCK	(1 << 12)
+#define DWC_CFGL_LOCK_CH_XACT	(2 << 12)
+#define DWC_CFGL_LOCK_BUS_XFER	(0 << 14)	/* scope of LOCK_BUS */
+#define DWC_CFGL_LOCK_BUS_BLOCK	(1 << 14)
+#define DWC_CFGL_LOCK_BUS_XACT	(2 << 14)
+#define DWC_CFGL_LOCK_CH	(1 << 15)	/* channel lockout */
+#define DWC_CFGL_LOCK_BUS	(1 << 16)	/* busmaster lockout */
+#define DWC_CFGL_HS_DST_POL	(1 << 18)	/* dst handshake active low */
+#define DWC_CFGL_HS_SRC_POL	(1 << 19)	/* src handshake active low */
+
+/* DMA API extensions */
+struct dw_cyclic_desc {
+	struct dw_desc	**desc;
+	unsigned long	periods;
+	void		(*period_callback)(void *param);
+	void		*period_callback_param;
+};
+
+struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan,
+		dma_addr_t buf_addr, size_t buf_len, size_t period_len,
+		enum dma_transfer_direction direction);
+void dw_dma_cyclic_free(struct dma_chan *chan);
+int dw_dma_cyclic_start(struct dma_chan *chan);
+void dw_dma_cyclic_stop(struct dma_chan *chan);
+
+dma_addr_t dw_dma_get_src_addr(struct dma_chan *chan);
+
+dma_addr_t dw_dma_get_dst_addr(struct dma_chan *chan);
+
+#endif /* DW_DMAC_H */
diff --git a/include/sound/atmel-abdac.h b/include/sound/atmel-abdac.h
index edff6a8ba1b5..a8f735d677fa 100644
--- a/include/sound/atmel-abdac.h
+++ b/include/sound/atmel-abdac.h
@@ -10,7 +10,7 @@
 #ifndef __INCLUDE_SOUND_ATMEL_ABDAC_H
 #define __INCLUDE_SOUND_ATMEL_ABDAC_H
 
-#include <linux/dw_dmac.h>
+#include <linux/platform_data/dma-dw.h>
 
 /**
  * struct atmel_abdac_pdata - board specific ABDAC configuration
diff --git a/include/sound/atmel-ac97c.h b/include/sound/atmel-ac97c.h
index 00e6c289a936..f2a1cdc37661 100644
--- a/include/sound/atmel-ac97c.h
+++ b/include/sound/atmel-ac97c.h
@@ -10,7 +10,7 @@
 #ifndef __INCLUDE_SOUND_ATMEL_AC97C_H
 #define __INCLUDE_SOUND_ATMEL_AC97C_H
 
-#include <linux/dw_dmac.h>
+#include <linux/platform_data/dma-dw.h>
 
 #define AC97C_CAPTURE	0x01
 #define AC97C_PLAYBACK	0x02
diff --git a/sound/atmel/abdac.c b/sound/atmel/abdac.c
index edf2ca72d518..154a7c44e38d 100644
--- a/sound/atmel/abdac.c
+++ b/sound/atmel/abdac.c
@@ -9,7 +9,7 @@
  */
 #include <linux/clk.h>
 #include <linux/bitmap.h>
-#include <linux/dw_dmac.h>
+#include <linux/platform_data/dma-dw.h>
 #include <linux/dmaengine.h>
 #include <linux/dma-mapping.h>
 #include <linux/init.h>
diff --git a/sound/atmel/ac97c.c b/sound/atmel/ac97c.c
index a04d23174dc2..1dfb35afef8f 100644
--- a/sound/atmel/ac97c.c
+++ b/sound/atmel/ac97c.c
@@ -31,7 +31,7 @@
 #include <sound/atmel-ac97c.h>
 #include <sound/memalloc.h>
 
-#include <linux/dw_dmac.h>
+#include <linux/platform_data/dma-dw.h>
 
 #include <mach/cpu.h>
 
-- 
cgit v1.2.3


From 7e1e2f27c5508518e58e5cbb11e26cbb815f4c56 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 19 Aug 2014 20:29:14 +0300
Subject: dmaengine: dw: convert dw_dma_slave to use explicit HS interfaces

Instead of exposing the possibility to set DMA registers CFG_HI and CFG_LO
strict user to provide handshake interfaces explicitly.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Hans-Christian Egtvedt <egtvedt@samfundet.no>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 arch/avr32/mach-at32ap/at32ap700x.c  | 15 +++++----------
 drivers/dma/dw/core.c                |  4 ++--
 include/linux/platform_data/dma-dw.h | 10 ++++------
 3 files changed, 11 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/arch/avr32/mach-at32ap/at32ap700x.c b/arch/avr32/mach-at32ap/at32ap700x.c
index ec7be287a97e..37b75602adf6 100644
--- a/arch/avr32/mach-at32ap/at32ap700x.c
+++ b/arch/avr32/mach-at32ap/at32ap700x.c
@@ -1356,10 +1356,8 @@ at32_add_device_mci(unsigned int id, struct mci_platform_data *data)
 		goto fail;
 
 	slave->sdata.dma_dev = &dw_dmac0_device.dev;
-	slave->sdata.cfg_hi = (DWC_CFGH_SRC_PER(0)
-				| DWC_CFGH_DST_PER(1));
-	slave->sdata.cfg_lo &= ~(DWC_CFGL_HS_DST_POL
-				| DWC_CFGL_HS_SRC_POL);
+	slave->sdata.src_id = 0;
+	slave->sdata.dst_id = 1;
 	slave->sdata.src_master = 1;
 	slave->sdata.dst_master = 0;
 
@@ -2054,8 +2052,7 @@ at32_add_device_ac97c(unsigned int id, struct ac97c_platform_data *data,
 	/* Check if DMA slave interface for capture should be configured. */
 	if (flags & AC97C_CAPTURE) {
 		rx_dws->dma_dev = &dw_dmac0_device.dev;
-		rx_dws->cfg_hi = DWC_CFGH_SRC_PER(3);
-		rx_dws->cfg_lo &= ~(DWC_CFGL_HS_DST_POL | DWC_CFGL_HS_SRC_POL);
+		rx_dws->src_id = 3;
 		rx_dws->src_master = 0;
 		rx_dws->dst_master = 1;
 	}
@@ -2063,8 +2060,7 @@ at32_add_device_ac97c(unsigned int id, struct ac97c_platform_data *data,
 	/* Check if DMA slave interface for playback should be configured. */
 	if (flags & AC97C_PLAYBACK) {
 		tx_dws->dma_dev = &dw_dmac0_device.dev;
-		tx_dws->cfg_hi = DWC_CFGH_DST_PER(4);
-		tx_dws->cfg_lo &= ~(DWC_CFGL_HS_DST_POL | DWC_CFGL_HS_SRC_POL);
+		tx_dws->dst_id = 4;
 		tx_dws->src_master = 0;
 		tx_dws->dst_master = 1;
 	}
@@ -2136,8 +2132,7 @@ at32_add_device_abdac(unsigned int id, struct atmel_abdac_pdata *data)
 	dws = &data->dws;
 
 	dws->dma_dev = &dw_dmac0_device.dev;
-	dws->cfg_hi = DWC_CFGH_DST_PER(2);
-	dws->cfg_lo &= ~(DWC_CFGL_HS_DST_POL | DWC_CFGL_HS_SRC_POL);
+	dws->dst_id = 2;
 	dws->src_master = 0;
 	dws->dst_master = 1;
 
diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c
index 1af731b83b3f..0a9c052d437c 100644
--- a/drivers/dma/dw/core.c
+++ b/drivers/dma/dw/core.c
@@ -155,8 +155,8 @@ static void dwc_initialize(struct dw_dma_chan *dwc)
 		 */
 		BUG_ON(!dws->dma_dev || dws->dma_dev != dw->dma.dev);
 
-		cfghi = dws->cfg_hi;
-		cfglo |= dws->cfg_lo & ~DWC_CFGL_CH_PRIOR_MASK;
+		cfghi |= DWC_CFGH_DST_PER(dws->dst_id);
+		cfghi |= DWC_CFGH_SRC_PER(dws->src_id);
 	} else {
 		if (dwc->direction == DMA_MEM_TO_DEV)
 			cfghi = DWC_CFGH_DST_PER(dwc->request_line);
diff --git a/include/linux/platform_data/dma-dw.h b/include/linux/platform_data/dma-dw.h
index 68b4024184de..bc411a1bf8e7 100644
--- a/include/linux/platform_data/dma-dw.h
+++ b/include/linux/platform_data/dma-dw.h
@@ -17,17 +17,15 @@
  * struct dw_dma_slave - Controller-specific information about a slave
  *
  * @dma_dev: required DMA master device. Depricated.
- * @bus_id: name of this device channel, not just a device name since
- *          devices may have more than one channel e.g. "foo_tx"
- * @cfg_hi: Platform-specific initializer for the CFG_HI register
- * @cfg_lo: Platform-specific initializer for the CFG_LO register
+ * @src_id:	src request line
+ * @dst_id:	dst request line
  * @src_master: src master for transfers on allocated channel.
  * @dst_master: dest master for transfers on allocated channel.
  */
 struct dw_dma_slave {
 	struct device		*dma_dev;
-	u32			cfg_hi;
-	u32			cfg_lo;
+	u8			src_id;
+	u8			dst_id;
 	u8			src_master;
 	u8			dst_master;
 };
-- 
cgit v1.2.3


From 960d01acf62747d6518694f92be5b06f67473833 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 9 Sep 2014 22:55:35 +0300
Subject: cfg80211: add WMM traffic stream API

Add nl80211 and driver API to validate, add and delete traffic
streams with appropriate settings.

The API calls for userspace doing the action frame handshake
with the peer, and then allows only to set up the parameters
in the driver. To avoid setting up a session only to tear it
down again, the validate API is provided, but the real usage
later can still fail so userspace must be prepared for that.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h    |   4 ++
 include/net/cfg80211.h       |  23 ++++++++-
 include/uapi/linux/nl80211.h |  28 +++++++++++
 net/wireless/nl80211.c       | 109 +++++++++++++++++++++++++++++++++++++++++++
 net/wireless/rdev-ops.h      |  31 ++++++++++++
 net/wireless/trace.h         |  45 ++++++++++++++++++
 6 files changed, 239 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 61a09f0644aa..b1be39c76931 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -166,8 +166,12 @@ static inline u16 ieee80211_sn_sub(u16 sn1, u16 sn2)
 
 #define IEEE80211_MAX_MESH_ID_LEN	32
 
+#define IEEE80211_FIRST_TSPEC_TSID	8
 #define IEEE80211_NUM_TIDS		16
 
+/* number of user priorities 802.11 uses */
+#define IEEE80211_NUM_UPS		8
+
 #define IEEE80211_QOS_CTL_LEN		2
 /* 1d tag mask */
 #define IEEE80211_QOS_CTL_TAG1D_MASK		0x0007
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index c2c710c14a50..a13beab123dc 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -2318,6 +2318,17 @@ struct cfg80211_qos_map {
  * @set_ap_chanwidth: Set the AP (including P2P GO) mode channel width for the
  *	given interface This is used e.g. for dynamic HT 20/40 MHz channel width
  *	changes during the lifetime of the BSS.
+ *
+ * @add_tx_ts: validate (if admitted_time is 0) or add a TX TS to the device
+ *	with the given parameters; action frame exchange has been handled by
+ *	userspace so this just has to modify the TX path to take the TS into
+ *	account.
+ *	If the admitted time is 0 just validate the parameters to make sure
+ *	the session can be created at all; it is valid to just always return
+ *	success for that but that may result in inefficient behaviour (handshake
+ *	with the peer followed by immediate teardown when the addition is later
+ *	rejected)
+ * @del_tx_ts: remove an existing TX TS
  */
 struct cfg80211_ops {
 	int	(*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow);
@@ -2558,6 +2569,12 @@ struct cfg80211_ops {
 
 	int	(*set_ap_chanwidth)(struct wiphy *wiphy, struct net_device *dev,
 				    struct cfg80211_chan_def *chandef);
+
+	int	(*add_tx_ts)(struct wiphy *wiphy, struct net_device *dev,
+			     u8 tsid, const u8 *peer, u8 user_prio,
+			     u16 admitted_time);
+	int	(*del_tx_ts)(struct wiphy *wiphy, struct net_device *dev,
+			     u8 tsid, const u8 *peer);
 };
 
 /*
@@ -2604,9 +2621,13 @@ struct cfg80211_ops {
  * @WIPHY_FLAG_SUPPORTS_5_10_MHZ: Device supports 5 MHz and 10 MHz channels.
  * @WIPHY_FLAG_HAS_CHANNEL_SWITCH: Device supports channel switch in
  *	beaconing mode (AP, IBSS, Mesh, ...).
+ * @WIPHY_FLAG_SUPPORTS_WMM_ADMISSION: the device supports setting up WMM
+ *	TSPEC sessions (TID aka TSID 0-7) with the NL80211_CMD_ADD_TX_TS
+ *	command. Standard IEEE 802.11 TSPEC setup is not yet supported, it
+ *	needs to be able to handle Block-Ack agreements and other things.
  */
 enum wiphy_flags {
-	/* use hole at 0 */
+	WIPHY_FLAG_SUPPORTS_WMM_ADMISSION	= BIT(0),
 	/* use hole at 1 */
 	/* use hole at 2 */
 	WIPHY_FLAG_NETNS_OK			= BIT(3),
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 29c4399e08b9..e5b8caf5e466 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -722,6 +722,22 @@
  *	QoS mapping is relevant for IP packets, it is only valid during an
  *	association. This is cleared on disassociation and AP restart.
  *
+ * @NL80211_CMD_ADD_TX_TS: Ask the kernel to add a traffic stream for the given
+ *	%NL80211_ATTR_TSID and %NL80211_ATTR_MAC with %NL80211_ATTR_USER_PRIO
+ *	and %NL80211_ATTR_ADMITTED_TIME parameters.
+ *	Note that the action frame handshake with the AP shall be handled by
+ *	userspace via the normal management RX/TX framework, this only sets
+ *	up the TX TS in the driver/device.
+ *	If the admitted time attribute is not added then the request just checks
+ *	if a subsequent setup could be successful, the intent is to use this to
+ *	avoid setting up a session with the AP when local restrictions would
+ *	make that impossible. However, the subsequent "real" setup may still
+ *	fail even if the check was successful.
+ * @NL80211_CMD_DEL_TX_TS: Remove an existing TS with the %NL80211_ATTR_TSID
+ *	and %NL80211_ATTR_MAC parameters. It isn't necessary to call this
+ *	before removing a station entry entirely, or before disassociating
+ *	or similar, cleanup will happen in the driver/device in this case.
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -893,6 +909,9 @@ enum nl80211_commands {
 
 	NL80211_CMD_SET_QOS_MAP,
 
+	NL80211_CMD_ADD_TX_TS,
+	NL80211_CMD_DEL_TX_TS,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
@@ -1611,6 +1630,11 @@ enum nl80211_commands {
  *	drivers to indicate dynack capability. Dynack is automatically disabled
  *	setting valid value for coverage class.
  *
+ * @NL80211_ATTR_TSID: a TSID value (u8 attribute)
+ * @NL80211_ATTR_USER_PRIO: user priority value (u8 attribute)
+ * @NL80211_ATTR_ADMITTED_TIME: admitted time in units of 32 microseconds
+ *	(per second) (u16 attribute)
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -1957,6 +1981,10 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_WIPHY_DYN_ACK,
 
+	NL80211_ATTR_TSID,
+	NL80211_ATTR_USER_PRIO,
+	NL80211_ATTR_ADMITTED_TIME,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index e9fbd4f4ddb0..ab7ee4893e40 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -391,6 +391,9 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 	[NL80211_ATTR_IFACE_SOCKET_OWNER] = { .type = NLA_FLAG },
 	[NL80211_ATTR_CSA_C_OFFSETS_TX] = { .type = NLA_BINARY },
 	[NL80211_ATTR_USE_RRM] = { .type = NLA_FLAG },
+	[NL80211_ATTR_TSID] = { .type = NLA_U8 },
+	[NL80211_ATTR_USER_PRIO] = { .type = NLA_U8 },
+	[NL80211_ATTR_ADMITTED_TIME] = { .type = NLA_U16 },
 };
 
 /* policy for the key attributes */
@@ -1510,6 +1513,9 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev,
 			if (rdev->wiphy.flags & WIPHY_FLAG_HAS_CHANNEL_SWITCH)
 				CMD(channel_switch, CHANNEL_SWITCH);
 			CMD(set_qos_map, SET_QOS_MAP);
+			if (rdev->wiphy.flags &
+					WIPHY_FLAG_SUPPORTS_WMM_ADMISSION)
+				CMD(add_tx_ts, ADD_TX_TS);
 		}
 		/* add into the if now */
 #undef CMD
@@ -9390,6 +9396,93 @@ static int nl80211_set_qos_map(struct sk_buff *skb,
 	return ret;
 }
 
+static int nl80211_add_tx_ts(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	struct net_device *dev = info->user_ptr[1];
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	const u8 *peer;
+	u8 tsid, up;
+	u16 admitted_time = 0;
+	int err;
+
+	if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_WMM_ADMISSION))
+		return -EOPNOTSUPP;
+
+	if (!info->attrs[NL80211_ATTR_TSID] || !info->attrs[NL80211_ATTR_MAC] ||
+	    !info->attrs[NL80211_ATTR_USER_PRIO])
+		return -EINVAL;
+
+	tsid = nla_get_u8(info->attrs[NL80211_ATTR_TSID]);
+	if (tsid >= IEEE80211_NUM_TIDS)
+		return -EINVAL;
+
+	up = nla_get_u8(info->attrs[NL80211_ATTR_USER_PRIO]);
+	if (up >= IEEE80211_NUM_UPS)
+		return -EINVAL;
+
+	/* WMM uses TIDs 0-7 even for TSPEC */
+	if (tsid < IEEE80211_FIRST_TSPEC_TSID) {
+		if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_WMM_ADMISSION))
+			return -EINVAL;
+	} else {
+		/* TODO: handle 802.11 TSPEC/admission control
+		 * need more attributes for that (e.g. BA session requirement)
+		 */
+		return -EINVAL;
+	}
+
+	peer = nla_data(info->attrs[NL80211_ATTR_MAC]);
+
+	if (info->attrs[NL80211_ATTR_ADMITTED_TIME]) {
+		admitted_time =
+			nla_get_u16(info->attrs[NL80211_ATTR_ADMITTED_TIME]);
+		if (!admitted_time)
+			return -EINVAL;
+	}
+
+	wdev_lock(wdev);
+	switch (wdev->iftype) {
+	case NL80211_IFTYPE_STATION:
+	case NL80211_IFTYPE_P2P_CLIENT:
+		if (wdev->current_bss)
+			break;
+		err = -ENOTCONN;
+		goto out;
+	default:
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	err = rdev_add_tx_ts(rdev, dev, tsid, peer, up, admitted_time);
+
+ out:
+	wdev_unlock(wdev);
+	return err;
+}
+
+static int nl80211_del_tx_ts(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	struct net_device *dev = info->user_ptr[1];
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	const u8 *peer;
+	u8 tsid;
+	int err;
+
+	if (!info->attrs[NL80211_ATTR_TSID] || !info->attrs[NL80211_ATTR_MAC])
+		return -EINVAL;
+
+	tsid = nla_get_u8(info->attrs[NL80211_ATTR_TSID]);
+	peer = nla_data(info->attrs[NL80211_ATTR_MAC]);
+
+	wdev_lock(wdev);
+	err = rdev_del_tx_ts(rdev, dev, tsid, peer);
+	wdev_unlock(wdev);
+
+	return err;
+}
+
 #define NL80211_FLAG_NEED_WIPHY		0x01
 #define NL80211_FLAG_NEED_NETDEV	0x02
 #define NL80211_FLAG_NEED_RTNL		0x04
@@ -10147,6 +10240,22 @@ static const struct genl_ops nl80211_ops[] = {
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
+	{
+		.cmd = NL80211_CMD_ADD_TX_TS,
+		.doit = nl80211_add_tx_ts,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
+				  NL80211_FLAG_NEED_RTNL,
+	},
+	{
+		.cmd = NL80211_CMD_DEL_TX_TS,
+		.doit = nl80211_del_tx_ts,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
+				  NL80211_FLAG_NEED_RTNL,
+	},
 };
 
 /* notification functions */
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index 56c2240c30ce..f6d457d6a558 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -915,4 +915,35 @@ rdev_set_ap_chanwidth(struct cfg80211_registered_device *rdev,
 	return ret;
 }
 
+static inline int
+rdev_add_tx_ts(struct cfg80211_registered_device *rdev,
+	       struct net_device *dev, u8 tsid, const u8 *peer,
+	       u8 user_prio, u16 admitted_time)
+{
+	int ret = -EOPNOTSUPP;
+
+	trace_rdev_add_tx_ts(&rdev->wiphy, dev, tsid, peer,
+			     user_prio, admitted_time);
+	if (rdev->ops->add_tx_ts)
+		ret = rdev->ops->add_tx_ts(&rdev->wiphy, dev, tsid, peer,
+					   user_prio, admitted_time);
+	trace_rdev_return_int(&rdev->wiphy, ret);
+
+	return ret;
+}
+
+static inline int
+rdev_del_tx_ts(struct cfg80211_registered_device *rdev,
+	       struct net_device *dev, u8 tsid, const u8 *peer)
+{
+	int ret = -EOPNOTSUPP;
+
+	trace_rdev_del_tx_ts(&rdev->wiphy, dev, tsid, peer);
+	if (rdev->ops->del_tx_ts)
+		ret = rdev->ops->del_tx_ts(&rdev->wiphy, dev, tsid, peer);
+	trace_rdev_return_int(&rdev->wiphy, ret);
+
+	return ret;
+}
+
 #endif /* __CFG80211_RDEV_OPS */
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 0c524cd76c83..625a6e6d1168 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -1896,6 +1896,51 @@ TRACE_EVENT(rdev_set_ap_chanwidth,
 		  WIPHY_PR_ARG, NETDEV_PR_ARG, CHAN_DEF_PR_ARG)
 );
 
+TRACE_EVENT(rdev_add_tx_ts,
+	TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+		 u8 tsid, const u8 *peer, u8 user_prio, u16 admitted_time),
+	TP_ARGS(wiphy, netdev, tsid, peer, user_prio, admitted_time),
+	TP_STRUCT__entry(
+		WIPHY_ENTRY
+		NETDEV_ENTRY
+		MAC_ENTRY(peer)
+		__field(u8, tsid)
+		__field(u8, user_prio)
+		__field(u16, admitted_time)
+	),
+	TP_fast_assign(
+		WIPHY_ASSIGN;
+		NETDEV_ASSIGN;
+		MAC_ASSIGN(peer, peer);
+		__entry->tsid = tsid;
+		__entry->user_prio = user_prio;
+		__entry->admitted_time = admitted_time;
+	),
+	TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " MAC_PR_FMT ", TSID %d, UP %d, time %d",
+		  WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(peer),
+		  __entry->tsid, __entry->user_prio, __entry->admitted_time)
+);
+
+TRACE_EVENT(rdev_del_tx_ts,
+	TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+		 u8 tsid, const u8 *peer),
+	TP_ARGS(wiphy, netdev, tsid, peer),
+	TP_STRUCT__entry(
+		WIPHY_ENTRY
+		NETDEV_ENTRY
+		MAC_ENTRY(peer)
+		__field(u8, tsid)
+	),
+	TP_fast_assign(
+		WIPHY_ASSIGN;
+		NETDEV_ASSIGN;
+		MAC_ASSIGN(peer, peer);
+		__entry->tsid = tsid;
+	),
+	TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " MAC_PR_FMT ", TSID %d",
+		  WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(peer), __entry->tsid)
+);
+
 /*************************************************************
  *	     cfg80211 exported functions traces		     *
  *************************************************************/
-- 
cgit v1.2.3


From 047133066e6c2549403fe5a2d619f47ba4212ef5 Mon Sep 17 00:00:00 2001
From: Jacek Anaszewski <j.anaszewski@samsung.com>
Date: Thu, 7 Aug 2014 05:10:22 -0700
Subject: leds: Reorder include directives

Reorder include directives so that they are arranged
in alphabetical order.

Signed-off-by: Jacek Anaszewski <j.anaszewski@samsung.com>
Acked-by: Kyungmin Park <kyungmin.park@samsung.com>
Cc: Richard Purdie <rpurdie@rpsys.net>
Signed-off-by: Bryan Wu <cooloney@gmail.com>
---
 drivers/leds/led-class.c | 13 +++++++------
 drivers/leds/led-core.c  |  3 ++-
 include/linux/leds.h     |  2 +-
 3 files changed, 10 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c
index aa29198fca3e..4bd4572efe6e 100644
--- a/drivers/leds/led-class.c
+++ b/drivers/leds/led-class.c
@@ -9,16 +9,17 @@
  * published by the Free Software Foundation.
  */
 
-#include <linux/module.h>
-#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/device.h>
+#include <linux/err.h>
 #include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/leds.h>
 #include <linux/list.h>
+#include <linux/module.h>
+#include <linux/slab.h>
 #include <linux/spinlock.h>
-#include <linux/device.h>
 #include <linux/timer.h>
-#include <linux/err.h>
-#include <linux/ctype.h>
-#include <linux/leds.h>
 #include "leds.h"
 
 static struct class *leds_class;
diff --git a/drivers/leds/led-core.c b/drivers/leds/led-core.c
index 71b40d3bf776..50b579ad948e 100644
--- a/drivers/leds/led-core.c
+++ b/drivers/leds/led-core.c
@@ -12,10 +12,11 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/leds.h>
 #include <linux/list.h>
 #include <linux/module.h>
+#include <linux/mutex.h>
 #include <linux/rwsem.h>
-#include <linux/leds.h>
 #include "leds.h"
 
 DECLARE_RWSEM(leds_list_lock);
diff --git a/include/linux/leds.h b/include/linux/leds.h
index e43686472197..4be2d7623d9e 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -13,8 +13,8 @@
 #define __LINUX_LEDS_H_INCLUDED
 
 #include <linux/list.h>
-#include <linux/spinlock.h>
 #include <linux/rwsem.h>
+#include <linux/spinlock.h>
 #include <linux/timer.h>
 #include <linux/workqueue.h>
 
-- 
cgit v1.2.3


From d8082827d8a214343b761f2c4554d2a7d1573d63 Mon Sep 17 00:00:00 2001
From: Jacek Anaszewski <j.anaszewski@samsung.com>
Date: Thu, 7 Aug 2014 05:10:23 -0700
Subject: leds: make brightness type consistent across whole subsystem

Documentations states that brightness units type is enum led_brightness
and this is the type used by the led API functions. Adjust the type
of brightness variables in the struct led_classdev accordingly.

Signed-off-by: Jacek Anaszewski <j.anaszewski@samsung.com>
Acked-by: Kyungmin Park <kyungmin.park@samsung.com>
Cc: Richard Purdie <rpurdie@rpsys.net>
Signed-off-by: Bryan Wu <cooloney@gmail.com>
---
 include/linux/leds.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/leds.h b/include/linux/leds.h
index 4be2d7623d9e..f2e1cbc25705 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -31,8 +31,8 @@ enum led_brightness {
 
 struct led_classdev {
 	const char		*name;
-	int			 brightness;
-	int			 max_brightness;
+	enum led_brightness	 brightness;
+	enum led_brightness	 max_brightness;
 	int			 flags;
 
 	/* Lower 16 bits reflect status */
-- 
cgit v1.2.3


From a3b3ca753cdc92c7d5f57404afed3115b3b79cc6 Mon Sep 17 00:00:00 2001
From: Jaewon Kim <jaewon02.kim@samsung.com>
Date: Thu, 11 Sep 2014 23:15:01 -0700
Subject: Input: add haptic driver on max77693

This driver to supports the haptic controller on MAX77693 Multifunction
device with PMIC, CHARGER, LED, MUIC, HAPTIC.

This driver supports external pwm and LRA (Linear Resonant Actuator) motor.
User can control the haptic device via force feedback framework.

Signed-off-by: Jaewon Kim <jaewon02.kim@samsung.com>
Acked-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/misc/Kconfig           |  11 ++
 drivers/input/misc/Makefile          |   1 +
 drivers/input/misc/max77693-haptic.c | 357 +++++++++++++++++++++++++++++++++++
 include/linux/mfd/max77693-private.h |   9 +
 4 files changed, 378 insertions(+)
 create mode 100644 drivers/input/misc/max77693-haptic.c

(limited to 'include/linux')

diff --git a/drivers/input/misc/Kconfig b/drivers/input/misc/Kconfig
index 36382e5b7703..23297ab6163f 100644
--- a/drivers/input/misc/Kconfig
+++ b/drivers/input/misc/Kconfig
@@ -144,6 +144,17 @@ config INPUT_M68K_BEEP
 	tristate "M68k Beeper support"
 	depends on M68K
 
+config INPUT_MAX77693_HAPTIC
+	tristate "MAXIM MAX77693 haptic controller support"
+	depends on MFD_MAX77693 && PWM
+	select INPUT_FF_MEMLESS
+	help
+	  This option enables support for the haptic controller on
+	  MAXIM MAX77693 chip.
+
+	  To compile this driver as module, choose M here: the
+	  module will be called max77693-haptic.
+
 config INPUT_MAX8925_ONKEY
 	tristate "MAX8925 ONKEY support"
 	depends on MFD_MAX8925
diff --git a/drivers/input/misc/Makefile b/drivers/input/misc/Makefile
index e8b84d2c845f..19c760361f80 100644
--- a/drivers/input/misc/Makefile
+++ b/drivers/input/misc/Makefile
@@ -37,6 +37,7 @@ obj-$(CONFIG_INPUT_IXP4XX_BEEPER)	+= ixp4xx-beeper.o
 obj-$(CONFIG_INPUT_KEYSPAN_REMOTE)	+= keyspan_remote.o
 obj-$(CONFIG_INPUT_KXTJ9)		+= kxtj9.o
 obj-$(CONFIG_INPUT_M68K_BEEP)		+= m68kspkr.o
+obj-$(CONFIG_INPUT_MAX77693_HAPTIC)	+= max77693-haptic.o
 obj-$(CONFIG_INPUT_MAX8925_ONKEY)	+= max8925_onkey.o
 obj-$(CONFIG_INPUT_MAX8997_HAPTIC)	+= max8997_haptic.o
 obj-$(CONFIG_INPUT_MC13783_PWRBUTTON)	+= mc13783-pwrbutton.o
diff --git a/drivers/input/misc/max77693-haptic.c b/drivers/input/misc/max77693-haptic.c
new file mode 100644
index 000000000000..d605db4d2f39
--- /dev/null
+++ b/drivers/input/misc/max77693-haptic.c
@@ -0,0 +1,357 @@
+/*
+ * MAXIM MAX77693 Haptic device driver
+ *
+ * Copyright (C) 2014 Samsung Electronics
+ * Jaewon Kim <jaewon02.kim@samsung.com>
+ *
+ * This program is not provided / owned by Maxim Integrated Products.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/i2c.h>
+#include <linux/regmap.h>
+#include <linux/input.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/pwm.h>
+#include <linux/slab.h>
+#include <linux/workqueue.h>
+#include <linux/regulator/consumer.h>
+#include <linux/mfd/max77693.h>
+#include <linux/mfd/max77693-private.h>
+
+#define MAX_MAGNITUDE_SHIFT	16
+
+enum max77693_haptic_motor_type {
+	MAX77693_HAPTIC_ERM = 0,
+	MAX77693_HAPTIC_LRA,
+};
+
+enum max77693_haptic_pulse_mode {
+	MAX77693_HAPTIC_EXTERNAL_MODE = 0,
+	MAX77693_HAPTIC_INTERNAL_MODE,
+};
+
+enum max77693_haptic_pwm_divisor {
+	MAX77693_HAPTIC_PWM_DIVISOR_32 = 0,
+	MAX77693_HAPTIC_PWM_DIVISOR_64,
+	MAX77693_HAPTIC_PWM_DIVISOR_128,
+	MAX77693_HAPTIC_PWM_DIVISOR_256,
+};
+
+struct max77693_haptic {
+	struct regmap *regmap_pmic;
+	struct regmap *regmap_haptic;
+	struct device *dev;
+	struct input_dev *input_dev;
+	struct pwm_device *pwm_dev;
+	struct regulator *motor_reg;
+
+	bool enabled;
+	bool suspend_state;
+	unsigned int magnitude;
+	unsigned int pwm_duty;
+	enum max77693_haptic_motor_type type;
+	enum max77693_haptic_pulse_mode mode;
+	enum max77693_haptic_pwm_divisor pwm_divisor;
+
+	struct work_struct work;
+};
+
+static int max77693_haptic_set_duty_cycle(struct max77693_haptic *haptic)
+{
+	int delta = (haptic->pwm_dev->period + haptic->pwm_duty) / 2;
+	int error;
+
+	error = pwm_config(haptic->pwm_dev, delta, haptic->pwm_dev->period);
+	if (error) {
+		dev_err(haptic->dev, "failed to configure pwm: %d\n", error);
+		return error;
+	}
+
+	return 0;
+}
+
+static int max77693_haptic_configure(struct max77693_haptic *haptic,
+				     bool enable)
+{
+	unsigned int value;
+	int error;
+
+	value = ((haptic->type << MAX77693_CONFIG2_MODE) |
+		(enable << MAX77693_CONFIG2_MEN) |
+		(haptic->mode << MAX77693_CONFIG2_HTYP) |
+		(haptic->pwm_divisor));
+
+	error = regmap_write(haptic->regmap_haptic,
+			     MAX77693_HAPTIC_REG_CONFIG2, value);
+	if (error) {
+		dev_err(haptic->dev,
+			"failed to update haptic config: %d\n", error);
+		return error;
+	}
+
+	return 0;
+}
+
+static int max77693_haptic_lowsys(struct max77693_haptic *haptic, bool enable)
+{
+	int error;
+
+	error = regmap_update_bits(haptic->regmap_pmic,
+				   MAX77693_PMIC_REG_LSCNFG,
+				   MAX77693_PMIC_LOW_SYS_MASK,
+				   enable << MAX77693_PMIC_LOW_SYS_SHIFT);
+	if (error) {
+		dev_err(haptic->dev, "cannot update pmic regmap: %d\n", error);
+		return error;
+	}
+
+	return 0;
+}
+
+static void max77693_haptic_enable(struct max77693_haptic *haptic)
+{
+	int error;
+
+	if (haptic->enabled)
+		return;
+
+	error = pwm_enable(haptic->pwm_dev);
+	if (error) {
+		dev_err(haptic->dev,
+			"failed to enable haptic pwm device: %d\n", error);
+		return;
+	}
+
+	error = max77693_haptic_lowsys(haptic, true);
+	if (error)
+		goto err_enable_lowsys;
+
+	error = max77693_haptic_configure(haptic, true);
+	if (error)
+		goto err_enable_config;
+
+	haptic->enabled = true;
+
+	return;
+
+err_enable_config:
+	max77693_haptic_lowsys(haptic, false);
+err_enable_lowsys:
+	pwm_disable(haptic->pwm_dev);
+}
+
+static void max77693_haptic_disable(struct max77693_haptic *haptic)
+{
+	int error;
+
+	if (haptic->enabled)
+		return;
+
+	error = max77693_haptic_configure(haptic, false);
+	if (error)
+		return;
+
+	error = max77693_haptic_lowsys(haptic, false);
+	if (error)
+		goto err_disable_lowsys;
+
+	pwm_disable(haptic->pwm_dev);
+	haptic->enabled = false;
+
+	return;
+
+err_disable_lowsys:
+	max77693_haptic_configure(haptic, true);
+}
+
+static void max77693_haptic_play_work(struct work_struct *work)
+{
+	struct max77693_haptic *haptic =
+			container_of(work, struct max77693_haptic, work);
+	int error;
+
+	error = max77693_haptic_set_duty_cycle(haptic);
+	if (error) {
+		dev_err(haptic->dev, "failed to set duty cycle: %d\n", error);
+		return;
+	}
+
+	if (haptic->magnitude)
+		max77693_haptic_enable(haptic);
+	else
+		max77693_haptic_disable(haptic);
+}
+
+static int max77693_haptic_play_effect(struct input_dev *dev, void *data,
+				       struct ff_effect *effect)
+{
+	struct max77693_haptic *haptic = input_get_drvdata(dev);
+	uint64_t period_mag_multi;
+
+	haptic->magnitude = effect->u.rumble.strong_magnitude;
+	if (!haptic->magnitude)
+		haptic->magnitude = effect->u.rumble.weak_magnitude;
+
+	/*
+	 * The magnitude comes from force-feedback interface.
+	 * The formula to convert magnitude to pwm_duty as follows:
+	 * - pwm_duty = (magnitude * pwm_period) / MAX_MAGNITUDE(0xFFFF)
+	 */
+	period_mag_multi = (int64_t)(haptic->pwm_dev->period *
+						haptic->magnitude);
+	haptic->pwm_duty = (unsigned int)(period_mag_multi >>
+						MAX_MAGNITUDE_SHIFT);
+
+	schedule_work(&haptic->work);
+
+	return 0;
+}
+
+static int max77693_haptic_open(struct input_dev *dev)
+{
+	struct max77693_haptic *haptic = input_get_drvdata(dev);
+	int error;
+
+	error = regulator_enable(haptic->motor_reg);
+	if (error) {
+		dev_err(haptic->dev,
+			"failed to enable regulator: %d\n", error);
+		return error;
+	}
+
+	return 0;
+}
+
+static void max77693_haptic_close(struct input_dev *dev)
+{
+	struct max77693_haptic *haptic = input_get_drvdata(dev);
+	int error;
+
+	cancel_work_sync(&haptic->work);
+	max77693_haptic_disable(haptic);
+
+	error = regulator_disable(haptic->motor_reg);
+	if (error)
+		dev_err(haptic->dev,
+			"failed to disable regulator: %d\n", error);
+}
+
+static int max77693_haptic_probe(struct platform_device *pdev)
+{
+	struct max77693_dev *max77693 = dev_get_drvdata(pdev->dev.parent);
+	struct max77693_haptic *haptic;
+	int error;
+
+	haptic = devm_kzalloc(&pdev->dev, sizeof(*haptic), GFP_KERNEL);
+	if (!haptic)
+		return -ENOMEM;
+
+	haptic->regmap_pmic = max77693->regmap;
+	haptic->regmap_haptic = max77693->regmap_haptic;
+	haptic->dev = &pdev->dev;
+	haptic->type = MAX77693_HAPTIC_LRA;
+	haptic->mode = MAX77693_HAPTIC_EXTERNAL_MODE;
+	haptic->pwm_divisor = MAX77693_HAPTIC_PWM_DIVISOR_128;
+	haptic->suspend_state = false;
+
+	INIT_WORK(&haptic->work, max77693_haptic_play_work);
+
+	/* Get pwm and regulatot for haptic device */
+	haptic->pwm_dev = devm_pwm_get(&pdev->dev, NULL);
+	if (IS_ERR(haptic->pwm_dev)) {
+		dev_err(&pdev->dev, "failed to get pwm device\n");
+		return PTR_ERR(haptic->pwm_dev);
+	}
+
+	haptic->motor_reg = devm_regulator_get(&pdev->dev, "haptic");
+	if (IS_ERR(haptic->motor_reg)) {
+		dev_err(&pdev->dev, "failed to get regulator\n");
+		return PTR_ERR(haptic->motor_reg);
+	}
+
+	/* Initialize input device for haptic device */
+	haptic->input_dev = devm_input_allocate_device(&pdev->dev);
+	if (!haptic->input_dev) {
+		dev_err(&pdev->dev, "failed to allocate input device\n");
+		return -ENOMEM;
+	}
+
+	haptic->input_dev->name = "max77693-haptic";
+	haptic->input_dev->id.version = 1;
+	haptic->input_dev->dev.parent = &pdev->dev;
+	haptic->input_dev->open = max77693_haptic_open;
+	haptic->input_dev->close = max77693_haptic_close;
+	input_set_drvdata(haptic->input_dev, haptic);
+	input_set_capability(haptic->input_dev, EV_FF, FF_RUMBLE);
+
+	error = input_ff_create_memless(haptic->input_dev, NULL,
+				max77693_haptic_play_effect);
+	if (error) {
+		dev_err(&pdev->dev, "failed to create force-feedback\n");
+		return error;
+	}
+
+	error = input_register_device(haptic->input_dev);
+	if (error) {
+		dev_err(&pdev->dev, "failed to register input device\n");
+		return error;
+	}
+
+	platform_set_drvdata(pdev, haptic);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int max77693_haptic_suspend(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct max77693_haptic *haptic = platform_get_drvdata(pdev);
+
+	if (haptic->enabled) {
+		max77693_haptic_disable(haptic);
+		haptic->suspend_state = true;
+	}
+
+	return 0;
+}
+
+static int max77693_haptic_resume(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct max77693_haptic *haptic = platform_get_drvdata(pdev);
+
+	if (haptic->suspend_state) {
+		max77693_haptic_enable(haptic);
+		haptic->suspend_state = false;
+	}
+
+	return 0;
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(max77693_haptic_pm_ops,
+			 max77693_haptic_suspend, max77693_haptic_resume);
+
+static struct platform_driver max77693_haptic_driver = {
+	.driver		= {
+		.name	= "max77693-haptic",
+		.owner	= THIS_MODULE,
+		.pm	= &max77693_haptic_pm_ops,
+	},
+	.probe		= max77693_haptic_probe,
+};
+module_platform_driver(max77693_haptic_driver);
+
+MODULE_AUTHOR("Jaewon Kim <jaewon02.kim@samsung.com>");
+MODULE_DESCRIPTION("MAXIM MAX77693 Haptic driver");
+MODULE_ALIAS("platform:max77693-haptic");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/mfd/max77693-private.h b/include/linux/mfd/max77693-private.h
index c466ff3e16b8..d0e578fd7053 100644
--- a/include/linux/mfd/max77693-private.h
+++ b/include/linux/mfd/max77693-private.h
@@ -251,6 +251,15 @@ enum max77693_haptic_reg {
 	MAX77693_HAPTIC_REG_END,
 };
 
+/* max77693-pmic LSCNFG configuraton register */
+#define MAX77693_PMIC_LOW_SYS_MASK      0x80
+#define MAX77693_PMIC_LOW_SYS_SHIFT     7
+
+/* max77693-haptic configuration register */
+#define MAX77693_CONFIG2_MODE           7
+#define MAX77693_CONFIG2_MEN            6
+#define MAX77693_CONFIG2_HTYP           5
+
 enum max77693_irq_source {
 	LED_INT = 0,
 	TOPSYS_INT,
-- 
cgit v1.2.3


From 974a70bdecea5296db1b643e4046ef208e99c592 Mon Sep 17 00:00:00 2001
From: Peter Chen <peter.chen@freescale.com>
Date: Fri, 12 Sep 2014 09:32:41 +0800
Subject: usb: gadget: udc-core: add utility for bus reset

The udc driver can notify the udc core that bus reset occurs by
calling this utility, the core will notify gadget driver this
information and update gadget state accordingly.

Signed-off-by: Peter Chen <peter.chen@freescale.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/gadget/udc/udc-core.c | 17 +++++++++++++++++
 include/linux/usb/gadget.h        |  6 ++++++
 2 files changed, 23 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/usb/gadget/udc/udc-core.c b/drivers/usb/gadget/udc/udc-core.c
index b0d98172bc07..ba661c6da54a 100644
--- a/drivers/usb/gadget/udc/udc-core.c
+++ b/drivers/usb/gadget/udc/udc-core.c
@@ -123,6 +123,23 @@ EXPORT_SYMBOL_GPL(usb_gadget_set_state);
 
 /* ------------------------------------------------------------------------- */
 
+/**
+ * usb_gadget_udc_reset - notifies the udc core that bus reset occurs
+ * @gadget: The gadget which bus reset occurs
+ * @driver: The gadget driver we want to notify
+ *
+ * If the udc driver has bus reset handler, it needs to call this when the bus
+ * reset occurs, it notifies the gadget driver that the bus reset occurs as
+ * well as updates gadget state.
+ */
+void usb_gadget_udc_reset(struct usb_gadget *gadget,
+		struct usb_gadget_driver *driver)
+{
+	driver->reset(gadget);
+	usb_gadget_set_state(gadget, USB_STATE_DEFAULT);
+}
+EXPORT_SYMBOL_GPL(usb_gadget_udc_reset);
+
 /**
  * usb_gadget_udc_start - tells usb device controller to start up
  * @gadget: The gadget we want to get started
diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h
index 598a6e9b2850..d18811433324 100644
--- a/include/linux/usb/gadget.h
+++ b/include/linux/usb/gadget.h
@@ -1017,6 +1017,12 @@ extern void usb_gadget_set_state(struct usb_gadget *gadget,
 
 /*-------------------------------------------------------------------------*/
 
+/* utility to tell udc core that the bus reset occurs */
+extern void usb_gadget_udc_reset(struct usb_gadget *gadget,
+		struct usb_gadget_driver *driver);
+
+/*-------------------------------------------------------------------------*/
+
 /* utility wrapping a simple endpoint selection policy */
 
 extern struct usb_ep *usb_ep_autoconfig(struct usb_gadget *,
-- 
cgit v1.2.3


From d4b18c3e00b8d18fbd316abe9639b91ad416e1f3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 10 Sep 2014 17:36:31 -0700
Subject: pnfs: remove GETDEVICELIST implementation

The current GETDEVICELIST implementation is buggy in that it doesn't handle
cursors correctly, and in that it returns an error if the server returns
NFSERR_NOTSUPP.  Given that there is no actual need for GETDEVICELIST,
it has various issues and might get removed for NFSv4.2 stop using it in
the blocklayout driver, and thus the Linux NFS client as whole.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/blocklayout/blocklayout.c |   2 +-
 fs/nfs/nfs4proc.c                |  48 ---------------
 fs/nfs/nfs4xdr.c                 | 130 ---------------------------------------
 fs/nfs/pnfs.h                    |   5 --
 fs/nfs/pnfs_dev.c                |  30 ---------
 include/linux/nfs_xdr.h          |  11 ----
 6 files changed, 1 insertion(+), 225 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 25ba9e0e6fff..3e1f1afc6db4 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -532,7 +532,7 @@ bl_set_layoutdriver(struct nfs_server *server, const struct nfs_fh *fh)
 		return -EINVAL;
 	}
 
-	return nfs4_deviceid_getdevicelist(server, fh);
+	return 0;
 }
 
 static bool
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 47fa67a3a8cf..0b711227cfa5 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -7808,54 +7808,6 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp)
 	return status;
 }
 
-/*
- * Retrieve the list of Data Server devices from the MDS.
- */
-static int _nfs4_getdevicelist(struct nfs_server *server,
-				    const struct nfs_fh *fh,
-				    struct pnfs_devicelist *devlist)
-{
-	struct nfs4_getdevicelist_args args = {
-		.fh = fh,
-		.layoutclass = server->pnfs_curr_ld->id,
-	};
-	struct nfs4_getdevicelist_res res = {
-		.devlist = devlist,
-	};
-	struct rpc_message msg = {
-		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETDEVICELIST],
-		.rpc_argp = &args,
-		.rpc_resp = &res,
-	};
-	int status;
-
-	dprintk("--> %s\n", __func__);
-	status = nfs4_call_sync(server->client, server, &msg, &args.seq_args,
-				&res.seq_res, 0);
-	dprintk("<-- %s status=%d\n", __func__, status);
-	return status;
-}
-
-int nfs4_proc_getdevicelist(struct nfs_server *server,
-			    const struct nfs_fh *fh,
-			    struct pnfs_devicelist *devlist)
-{
-	struct nfs4_exception exception = { };
-	int err;
-
-	do {
-		err = nfs4_handle_exception(server,
-				_nfs4_getdevicelist(server, fh, devlist),
-				&exception);
-	} while (exception.retry);
-
-	dprintk("%s: err=%d, num_devs=%u\n", __func__,
-		err, devlist->num_devs);
-
-	return err;
-}
-EXPORT_SYMBOL_GPL(nfs4_proc_getdevicelist);
-
 static int
 _nfs4_proc_getdeviceinfo(struct nfs_server *server,
 		struct pnfs_device *pdev,
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index f2cd957adb90..b8165eab0a32 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -362,17 +362,6 @@ static int nfs4_stat_to_errno(int);
 				XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5)
 #define encode_reclaim_complete_maxsz	(op_encode_hdr_maxsz + 4)
 #define decode_reclaim_complete_maxsz	(op_decode_hdr_maxsz + 4)
-#define encode_getdevicelist_maxsz (op_encode_hdr_maxsz + 4 + \
-				encode_verifier_maxsz)
-#define decode_getdevicelist_maxsz (op_decode_hdr_maxsz + \
-				2 /* nfs_cookie4 gdlr_cookie */ + \
-				decode_verifier_maxsz \
-				  /* verifier4 gdlr_verifier */ + \
-				1 /* gdlr_deviceid_list count */ + \
-				XDR_QUADLEN(NFS4_PNFS_GETDEVLIST_MAXNUM * \
-					    NFS4_DEVICEID4_SIZE) \
-				  /* gdlr_deviceid_list */ + \
-				1 /* bool gdlr_eof */)
 #define encode_getdeviceinfo_maxsz (op_encode_hdr_maxsz + 4 + \
 				XDR_QUADLEN(NFS4_DEVICEID4_SIZE))
 #define decode_getdeviceinfo_maxsz (op_decode_hdr_maxsz + \
@@ -812,14 +801,6 @@ static int nfs4_stat_to_errno(int);
 #define NFS4_dec_reclaim_complete_sz	(compound_decode_hdr_maxsz + \
 					 decode_sequence_maxsz + \
 					 decode_reclaim_complete_maxsz)
-#define NFS4_enc_getdevicelist_sz (compound_encode_hdr_maxsz + \
-				encode_sequence_maxsz + \
-				encode_putfh_maxsz + \
-				encode_getdevicelist_maxsz)
-#define NFS4_dec_getdevicelist_sz (compound_decode_hdr_maxsz + \
-				decode_sequence_maxsz + \
-				decode_putfh_maxsz + \
-				decode_getdevicelist_maxsz)
 #define NFS4_enc_getdeviceinfo_sz (compound_encode_hdr_maxsz +    \
 				encode_sequence_maxsz +\
 				encode_getdeviceinfo_maxsz)
@@ -1929,24 +1910,6 @@ static void encode_sequence(struct xdr_stream *xdr,
 }
 
 #ifdef CONFIG_NFS_V4_1
-static void
-encode_getdevicelist(struct xdr_stream *xdr,
-		     const struct nfs4_getdevicelist_args *args,
-		     struct compound_hdr *hdr)
-{
-	__be32 *p;
-	nfs4_verifier dummy = {
-		.data = "dummmmmy",
-	};
-
-	encode_op_hdr(xdr, OP_GETDEVICELIST, decode_getdevicelist_maxsz, hdr);
-	p = reserve_space(xdr, 16);
-	*p++ = cpu_to_be32(args->layoutclass);
-	*p++ = cpu_to_be32(NFS4_PNFS_GETDEVLIST_MAXNUM);
-	xdr_encode_hyper(p, 0ULL);                          /* cookie */
-	encode_nfs4_verifier(xdr, &dummy);
-}
-
 static void
 encode_getdeviceinfo(struct xdr_stream *xdr,
 		     const struct nfs4_getdeviceinfo_args *args,
@@ -2900,24 +2863,6 @@ static void nfs4_xdr_enc_reclaim_complete(struct rpc_rqst *req,
 	encode_nops(&hdr);
 }
 
-/*
- * Encode GETDEVICELIST request
- */
-static void nfs4_xdr_enc_getdevicelist(struct rpc_rqst *req,
-				       struct xdr_stream *xdr,
-				       struct nfs4_getdevicelist_args *args)
-{
-	struct compound_hdr hdr = {
-		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
-	};
-
-	encode_compound_hdr(xdr, req, &hdr);
-	encode_sequence(xdr, &args->seq_args, &hdr);
-	encode_putfh(xdr, args->fh, &hdr);
-	encode_getdevicelist(xdr, args, &hdr);
-	encode_nops(&hdr);
-}
-
 /*
  * Encode GETDEVICEINFO request
  */
@@ -5773,54 +5718,6 @@ out_overflow:
 }
 
 #if defined(CONFIG_NFS_V4_1)
-/*
- * TODO: Need to handle case when EOF != true;
- */
-static int decode_getdevicelist(struct xdr_stream *xdr,
-				struct pnfs_devicelist *res)
-{
-	__be32 *p;
-	int status, i;
-	nfs4_verifier verftemp;
-
-	status = decode_op_hdr(xdr, OP_GETDEVICELIST);
-	if (status)
-		return status;
-
-	p = xdr_inline_decode(xdr, 8 + 8 + 4);
-	if (unlikely(!p))
-		goto out_overflow;
-
-	/* TODO: Skip cookie for now */
-	p += 2;
-
-	/* Read verifier */
-	p = xdr_decode_opaque_fixed(p, verftemp.data, NFS4_VERIFIER_SIZE);
-
-	res->num_devs = be32_to_cpup(p);
-
-	dprintk("%s: num_dev %d\n", __func__, res->num_devs);
-
-	if (res->num_devs > NFS4_PNFS_GETDEVLIST_MAXNUM) {
-		printk(KERN_ERR "NFS: %s too many result dev_num %u\n",
-				__func__, res->num_devs);
-		return -EIO;
-	}
-
-	p = xdr_inline_decode(xdr,
-			      res->num_devs * NFS4_DEVICEID4_SIZE + 4);
-	if (unlikely(!p))
-		goto out_overflow;
-	for (i = 0; i < res->num_devs; i++)
-		p = xdr_decode_opaque_fixed(p, res->dev_id[i].data,
-					    NFS4_DEVICEID4_SIZE);
-	res->eof = be32_to_cpup(p);
-	return 0;
-out_overflow:
-	print_overflow_msg(__func__, xdr);
-	return -EIO;
-}
-
 static int decode_getdeviceinfo(struct xdr_stream *xdr,
 				struct pnfs_device *pdev)
 {
@@ -7104,32 +7001,6 @@ static int nfs4_xdr_dec_reclaim_complete(struct rpc_rqst *rqstp,
 	return status;
 }
 
-/*
- * Decode GETDEVICELIST response
- */
-static int nfs4_xdr_dec_getdevicelist(struct rpc_rqst *rqstp,
-				      struct xdr_stream *xdr,
-				      struct nfs4_getdevicelist_res *res)
-{
-	struct compound_hdr hdr;
-	int status;
-
-	dprintk("encoding getdevicelist!\n");
-
-	status = decode_compound_hdr(xdr, &hdr);
-	if (status != 0)
-		goto out;
-	status = decode_sequence(xdr, &res->seq_res, rqstp);
-	if (status != 0)
-		goto out;
-	status = decode_putfh(xdr);
-	if (status != 0)
-		goto out;
-	status = decode_getdevicelist(xdr, res->devlist);
-out:
-	return status;
-}
-
 /*
  * Decode GETDEVINFO response
  */
@@ -7498,7 +7369,6 @@ struct rpc_procinfo	nfs4_procedures[] = {
 	PROC(SECINFO_NO_NAME,	enc_secinfo_no_name,	dec_secinfo_no_name),
 	PROC(TEST_STATEID,	enc_test_stateid,	dec_test_stateid),
 	PROC(FREE_STATEID,	enc_free_stateid,	dec_free_stateid),
-	PROC(GETDEVICELIST,	enc_getdevicelist,	dec_getdevicelist),
 	PROC(BIND_CONN_TO_SESSION,
 			enc_bind_conn_to_session, dec_bind_conn_to_session),
 	PROC(DESTROY_CLIENTID,	enc_destroy_clientid,	dec_destroy_clientid),
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index d84fe29fc88b..b10f12fc6818 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -180,9 +180,6 @@ extern int pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *);
 extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *);
 
 /* nfs4proc.c */
-extern int nfs4_proc_getdevicelist(struct nfs_server *server,
-				   const struct nfs_fh *fh,
-				   struct pnfs_devicelist *devlist);
 extern int nfs4_proc_getdeviceinfo(struct nfs_server *server,
 				   struct pnfs_device *dev,
 				   struct rpc_cred *cred);
@@ -277,8 +274,6 @@ bool nfs4_put_deviceid_node(struct nfs4_deviceid_node *);
 void nfs4_mark_deviceid_unavailable(struct nfs4_deviceid_node *node);
 bool nfs4_test_deviceid_unavailable(struct nfs4_deviceid_node *node);
 void nfs4_deviceid_purge_client(const struct nfs_client *);
-int nfs4_deviceid_getdevicelist(struct nfs_server *server,
-		const struct nfs_fh *fh);
 
 static inline struct nfs4_deviceid_node *
 nfs4_get_deviceid(struct nfs4_deviceid_node *d)
diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c
index 82c2836fdb38..aa2ec0015183 100644
--- a/fs/nfs/pnfs_dev.c
+++ b/fs/nfs/pnfs_dev.c
@@ -358,33 +358,3 @@ nfs4_deviceid_mark_client_invalid(struct nfs_client *clp)
 	}
 	rcu_read_unlock();
 }
-
-int
-nfs4_deviceid_getdevicelist(struct nfs_server *server,
-		const struct nfs_fh *fh)
-{
-	struct pnfs_devicelist *dlist;
-	struct nfs4_deviceid_node *d;
-	int error = 0, i;
-
-	dlist = kzalloc(sizeof(struct pnfs_devicelist), GFP_NOFS);
-	if (!dlist)
-		return -ENOMEM;
-
-	while (!dlist->eof) {
-		error = nfs4_proc_getdevicelist(server, fh, dlist);
-		if (error)
-			break;
-
-		for (i = 0; i < dlist->num_devs; i++) {
-			d = nfs4_find_get_deviceid(server, &dlist->dev_id[i],
-					NULL, GFP_NOFS);
-			if (d)
-				nfs4_put_deviceid_node(d);
-		}
-	}
-
-	kfree(dlist);
-	return error;
-}
-EXPORT_SYMBOL_GPL(nfs4_deviceid_getdevicelist);
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index f4092c6b90fb..7ae249ccb78d 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -252,17 +252,6 @@ struct nfs4_layoutget {
 	gfp_t gfp_flags;
 };
 
-struct nfs4_getdevicelist_args {
-	struct nfs4_sequence_args seq_args;
-	const struct nfs_fh *fh;
-	u32 layoutclass;
-};
-
-struct nfs4_getdevicelist_res {
-	struct nfs4_sequence_res seq_res;
-	struct pnfs_devicelist *devlist;
-};
-
 struct nfs4_getdeviceinfo_args {
 	struct nfs4_sequence_args seq_args;
 	struct pnfs_device *pdev;
-- 
cgit v1.2.3


From f418c64b71590bac8fdebd0969a1eeaffaf036d2 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@netapp.com>
Date: Wed, 3 Sep 2014 12:19:07 -0400
Subject: NFS: Unconditionally enable commit code

The goal is to create a generic NFS module with code that does not
depend on what versions of NFS are enabled.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/direct.c        | 14 ------------
 fs/nfs/write.c         | 61 --------------------------------------------------
 include/linux/nfs_fs.h |  8 -------
 3 files changed, 83 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 65ef6e00deee..dda4b8667c02 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -178,7 +178,6 @@ static int nfs_direct_set_or_cmp_hdr_verf(struct nfs_direct_req *dreq,
 	return memcmp(verfp, &hdr->verf, sizeof(struct nfs_writeverf));
 }
 
-#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
 /*
  * nfs_direct_cmp_commit_data_verf - compare verifier for commit data
  * @dreq - direct request possibly spanning multiple servers
@@ -197,7 +196,6 @@ static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq,
 	WARN_ON_ONCE(verfp->committed < 0);
 	return memcmp(verfp, &data->verf, sizeof(struct nfs_writeverf));
 }
-#endif
 
 /**
  * nfs_direct_IO - NFS address space operation for direct I/O
@@ -576,7 +574,6 @@ out:
 	return result;
 }
 
-#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
 static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
 {
 	struct nfs_pageio_descriptor desc;
@@ -700,17 +697,6 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode
 	schedule_work(&dreq->work); /* Calls nfs_direct_write_schedule_work */
 }
 
-#else
-static void nfs_direct_write_schedule_work(struct work_struct *work)
-{
-}
-
-static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
-{
-	nfs_direct_complete(dreq, true);
-}
-#endif
-
 static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
 {
 	struct nfs_direct_req *dreq = hdr->dreq;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 128d97f01d1c..6786873a2901 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -720,7 +720,6 @@ nfs_mark_request_dirty(struct nfs_page *req)
 	__set_page_dirty_nobuffers(req->wb_page);
 }
 
-#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
 /*
  * nfs_page_search_commits_for_head_request_locked
  *
@@ -870,43 +869,6 @@ int nfs_write_need_commit(struct nfs_pgio_header *hdr)
 	return hdr->verf.committed != NFS_FILE_SYNC;
 }
 
-#else
-static struct nfs_page *
-nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi,
-						struct page *page)
-{
-	return NULL;
-}
-
-static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo,
-				      struct inode *inode)
-{
-}
-
-void nfs_init_cinfo(struct nfs_commit_info *cinfo,
-		    struct inode *inode,
-		    struct nfs_direct_req *dreq)
-{
-}
-
-void
-nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
-			struct nfs_commit_info *cinfo)
-{
-}
-
-static void
-nfs_clear_request_commit(struct nfs_page *req)
-{
-}
-
-int nfs_write_need_commit(struct nfs_pgio_header *hdr)
-{
-	return 0;
-}
-
-#endif
-
 static void nfs_write_completion(struct nfs_pgio_header *hdr)
 {
 	struct nfs_commit_info cinfo;
@@ -942,7 +904,6 @@ out:
 	hdr->release(hdr);
 }
 
-#if  IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
 unsigned long
 nfs_reqs_to_commit(struct nfs_commit_info *cinfo)
 {
@@ -999,19 +960,6 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst,
 	return ret;
 }
 
-#else
-unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo)
-{
-	return 0;
-}
-
-int nfs_scan_commit(struct inode *inode, struct list_head *dst,
-		    struct nfs_commit_info *cinfo)
-{
-	return 0;
-}
-#endif
-
 /*
  * Search for an existing write request, and attempt to update
  * it to reflect a new dirty region on a given page.
@@ -1404,7 +1352,6 @@ static int nfs_writeback_done(struct rpc_task *task,
 		return status;
 	nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, hdr->res.count);
 
-#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
 	if (hdr->res.verf->committed < hdr->args.stable &&
 	    task->tk_status >= 0) {
 		/* We tried a write call, but the server did not
@@ -1426,7 +1373,6 @@ static int nfs_writeback_done(struct rpc_task *task,
 			complain = jiffies + 300 * HZ;
 		}
 	}
-#endif
 
 	/* Deal with the suid/sgid bit corner case */
 	if (nfs_should_remove_suid(inode))
@@ -1479,7 +1425,6 @@ static void nfs_writeback_result(struct rpc_task *task,
 }
 
 
-#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
 static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait)
 {
 	int ret;
@@ -1803,12 +1748,6 @@ out_mark_dirty:
 	__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
 	return ret;
 }
-#else
-static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc)
-{
-	return 0;
-}
-#endif
 
 int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 5180a7ededec..fd334d4b0d41 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -529,17 +529,9 @@ extern int  nfs_updatepage(struct file *, struct page *, unsigned int, unsigned
 extern int nfs_wb_all(struct inode *inode);
 extern int nfs_wb_page(struct inode *inode, struct page* page);
 extern int nfs_wb_page_cancel(struct inode *inode, struct page* page);
-#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
 extern int  nfs_commit_inode(struct inode *, int);
 extern struct nfs_commit_data *nfs_commitdata_alloc(void);
 extern void nfs_commit_free(struct nfs_commit_data *data);
-#else
-static inline int
-nfs_commit_inode(struct inode *inode, int how)
-{
-	return 0;
-}
-#endif
 
 static inline int
 nfs_have_writebacks(struct inode *inode)
-- 
cgit v1.2.3


From cb8c20fa53ec28602793ee43ddc7e8883be62e69 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@netapp.com>
Date: Wed, 3 Sep 2014 12:19:10 -0400
Subject: NFS: Move NFS v3 acl functions to nfs3_fs.h

This code is internal to the v3 module, so other parts of the client
shouldn't have any knowledge of it.

nfs3_getxattr(), nfs3_setxattr(), and nfs3_removexattr() no longer exist
anywhere so I remove the declarations while I'm here.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs3_fs.h       | 19 +++++++++++++++++++
 fs/nfs/nfs3super.c     |  1 +
 include/linux/nfs_fs.h | 33 ---------------------------------
 3 files changed, 20 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs3_fs.h b/fs/nfs/nfs3_fs.h
index 6599e0dcd69f..333ae4068506 100644
--- a/fs/nfs/nfs3_fs.h
+++ b/fs/nfs/nfs3_fs.h
@@ -6,6 +6,25 @@
 #ifndef __LINUX_FS_NFS_NFS3_FS_H
 #define __LINUX_FS_NFS_NFS3_FS_H
 
+/*
+ * nfs3acl.c
+ */
+#ifdef CONFIG_NFS_V3_ACL
+extern struct posix_acl *nfs3_get_acl(struct inode *inode, int type);
+extern int nfs3_set_acl(struct inode *inode, struct posix_acl *acl, int type);
+extern int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
+		struct posix_acl *dfacl);
+extern ssize_t nfs3_listxattr(struct dentry *, char *, size_t);
+extern const struct xattr_handler *nfs3_xattr_handlers[];
+#else
+static inline int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
+		struct posix_acl *dfacl)
+{
+	return 0;
+}
+#define nfs3_listxattr NULL
+#endif /* CONFIG_NFS_V3_ACL */
+
 /* nfs3client.c */
 struct nfs_server *nfs3_create_server(struct nfs_mount_info *, struct nfs_subversion *);
 struct nfs_server *nfs3_clone_server(struct nfs_server *, struct nfs_fh *,
diff --git a/fs/nfs/nfs3super.c b/fs/nfs/nfs3super.c
index d6a98949af19..6af29c2da352 100644
--- a/fs/nfs/nfs3super.c
+++ b/fs/nfs/nfs3super.c
@@ -4,6 +4,7 @@
 #include <linux/module.h>
 #include <linux/nfs_fs.h>
 #include "internal.h"
+#include "nfs3_fs.h"
 #include "nfs.h"
 
 static struct nfs_subversion nfs_v3 = {
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index fd334d4b0d41..28d649054d5f 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -442,22 +442,6 @@ static inline struct rpc_cred *nfs_file_cred(struct file *file)
 	return NULL;
 }
 
-/*
- * linux/fs/nfs/xattr.c
- */
-#ifdef CONFIG_NFS_V3_ACL
-extern ssize_t nfs3_listxattr(struct dentry *, char *, size_t);
-extern ssize_t nfs3_getxattr(struct dentry *, const char *, void *, size_t);
-extern int nfs3_setxattr(struct dentry *, const char *,
-			const void *, size_t, int);
-extern int nfs3_removexattr (struct dentry *, const char *name);
-#else
-# define nfs3_listxattr NULL
-# define nfs3_getxattr NULL
-# define nfs3_setxattr NULL
-# define nfs3_removexattr NULL
-#endif
-
 /*
  * linux/fs/nfs/direct.c
  */
@@ -548,23 +532,6 @@ extern int  nfs_readpages(struct file *, struct address_space *,
 extern int  nfs_readpage_async(struct nfs_open_context *, struct inode *,
 			       struct page *);
 
-/*
- * linux/fs/nfs3proc.c
- */
-#ifdef CONFIG_NFS_V3_ACL
-extern struct posix_acl *nfs3_get_acl(struct inode *inode, int type);
-extern int nfs3_set_acl(struct inode *inode, struct posix_acl *acl, int type);
-extern int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
-		struct posix_acl *dfacl);
-extern const struct xattr_handler *nfs3_xattr_handlers[];
-#else
-static inline int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
-		struct posix_acl *dfacl)
-{
-	return 0;
-}
-#endif /* CONFIG_NFS_V3_ACL */
-
 /*
  * inline functions
  */
-- 
cgit v1.2.3


From e09c2c295468476a239d13324ce9042ec4de05eb Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sat, 13 Sep 2014 04:14:30 +0900
Subject: workqueue: apply __WQ_ORDERED to create_singlethread_workqueue()

create_singlethread_workqueue() is a compat interface for single
threaded workqueue which maps to ordered workqueue w/ rescuer in the
current implementation.  create_singlethread_workqueue() currently
implemented by invoking alloc_workqueue() w/ appropriate parameters.

8719dceae2f9 ("workqueue: reject adjusting max_active or applying
attrs to ordered workqueues") introduced __WQ_ORDERED to protect
ordered workqueues against dynamic attribute changes which can break
ordering guarantees but forgot to apply it to
create_singlethread_workqueue().  This in itself is okay as nobody
currently uses dynamic attribute change on workqueues created with
create_singlethread_workqueue().

However, 4c16bd327c ("workqueue: implement NUMA affinity for unbound
workqueues") broke singlethreaded guarantee for ordered workqueues
through allocating a separate pool_workqueue on each NUMA node by
default.  A later change 8a2b75384444 ("workqueue: fix ordered
workqueues in NUMA setups") fixed it by allocating only one global
pool_workqueue if __WQ_ORDERED is set.

Combined, the __WQ_ORDERED omission in create_singlethread_workqueue()
became critical breaking its single threadedness and ordering
guarantee.

Let's make create_singlethread_workqueue() wrap
alloc_ordered_workqueue() instead so that it inherits __WQ_ORDERED and
can implicitly track future ordered_workqueue changes.

v2: I missed that __WQ_ORDERED now protects against pwq splitting
    across NUMA nodes and incorrectly described the patch as a
    nice-to-have fix to protect against future dynamic attribute
    usages.  Oleg pointed out that this is actually a critical
    breakage due to 8a2b75384444 ("workqueue: fix ordered workqueues
    in NUMA setups").

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Mike Anderson <mike.anderson@us.ibm.com>
Cc: Oleg Nesterov <onestero@redhat.com>
Cc: Gustavo Luiz Duarte <gduarte@redhat.com>
Cc: Tomas Henzl <thenzl@redhat.com>
Cc: stable@vger.kernel.org
Fixes: 4c16bd327c ("workqueue: implement NUMA affinity for unbound workqueues")
---
 include/linux/workqueue.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index a0cc2e95ed1b..b996e6cde6bb 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -419,7 +419,7 @@ __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active,
 	alloc_workqueue("%s", WQ_FREEZABLE | WQ_UNBOUND | WQ_MEM_RECLAIM, \
 			1, (name))
 #define create_singlethread_workqueue(name)				\
-	alloc_workqueue("%s", WQ_UNBOUND | WQ_MEM_RECLAIM, 1, (name))
+	alloc_ordered_workqueue("%s", WQ_MEM_RECLAIM, name)
 
 extern void destroy_workqueue(struct workqueue_struct *wq);
 
-- 
cgit v1.2.3


From d78c9300c51d6ceed9f6d078d4e9366f259de28c Mon Sep 17 00:00:00 2001
From: Andrew Hunter <ahh@google.com>
Date: Thu, 4 Sep 2014 14:17:16 -0700
Subject: jiffies: Fix timeval conversion to jiffies

timeval_to_jiffies tried to round a timeval up to an integral number
of jiffies, but the logic for doing so was incorrect: intervals
corresponding to exactly N jiffies would become N+1. This manifested
itself particularly repeatedly stopping/starting an itimer:

setitimer(ITIMER_PROF, &val, NULL);
setitimer(ITIMER_PROF, NULL, &val);

would add a full tick to val, _even if it was exactly representable in
terms of jiffies_ (say, the result of a previous rounding.)  Doing
this repeatedly would cause unbounded growth in val.  So fix the math.

Here's what was wrong with the conversion: we essentially computed
(eliding seconds)

jiffies = usec  * (NSEC_PER_USEC/TICK_NSEC)

by using scaling arithmetic, which took the best approximation of
NSEC_PER_USEC/TICK_NSEC with denominator of 2^USEC_JIFFIE_SC =
x/(2^USEC_JIFFIE_SC), and computed:

jiffies = (usec * x) >> USEC_JIFFIE_SC

and rounded this calculation up in the intermediate form (since we
can't necessarily exactly represent TICK_NSEC in usec.) But the
scaling arithmetic is a (very slight) *over*approximation of the true
value; that is, instead of dividing by (1 usec/ 1 jiffie), we
effectively divided by (1 usec/1 jiffie)-epsilon (rounding
down). This would normally be fine, but we want to round timeouts up,
and we did so by adding 2^USEC_JIFFIE_SC - 1 before the shift; this
would be fine if our division was exact, but dividing this by the
slightly smaller factor was equivalent to adding just _over_ 1 to the
final result (instead of just _under_ 1, as desired.)

In particular, with HZ=1000, we consistently computed that 10000 usec
was 11 jiffies; the same was true for any exact multiple of
TICK_NSEC.

We could possibly still round in the intermediate form, adding
something less than 2^USEC_JIFFIE_SC - 1, but easier still is to
convert usec->nsec, round in nanoseconds, and then convert using
time*spec*_to_jiffies.  This adds one constant multiplication, and is
not observably slower in microbenchmarks on recent x86 hardware.

Tested: the following program:

int main() {
  struct itimerval zero = {{0, 0}, {0, 0}};
  /* Initially set to 10 ms. */
  struct itimerval initial = zero;
  initial.it_interval.tv_usec = 10000;
  setitimer(ITIMER_PROF, &initial, NULL);
  /* Save and restore several times. */
  for (size_t i = 0; i < 10; ++i) {
    struct itimerval prev;
    setitimer(ITIMER_PROF, &zero, &prev);
    /* on old kernels, this goes up by TICK_USEC every iteration */
    printf("previous value: %ld %ld %ld %ld\n",
           prev.it_interval.tv_sec, prev.it_interval.tv_usec,
           prev.it_value.tv_sec, prev.it_value.tv_usec);
    setitimer(ITIMER_PROF, &prev, NULL);
  }
    return 0;
}

Cc: stable@vger.kernel.org
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Paul Turner <pjt@google.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Prarit Bhargava <prarit@redhat.com>
Reviewed-by: Paul Turner <pjt@google.com>
Reported-by: Aaron Jacobs <jacobsa@google.com>
Signed-off-by: Andrew Hunter <ahh@google.com>
[jstultz: Tweaked to apply to 3.17-rc]
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/jiffies.h | 12 -----------
 kernel/time/time.c      | 56 +++++++++++++++++++++++++++----------------------
 2 files changed, 31 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index 1f44466c1e9d..c367cbdf73ab 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -258,23 +258,11 @@ extern unsigned long preset_lpj;
 #define SEC_JIFFIE_SC (32 - SHIFT_HZ)
 #endif
 #define NSEC_JIFFIE_SC (SEC_JIFFIE_SC + 29)
-#define USEC_JIFFIE_SC (SEC_JIFFIE_SC + 19)
 #define SEC_CONVERSION ((unsigned long)((((u64)NSEC_PER_SEC << SEC_JIFFIE_SC) +\
                                 TICK_NSEC -1) / (u64)TICK_NSEC))
 
 #define NSEC_CONVERSION ((unsigned long)((((u64)1 << NSEC_JIFFIE_SC) +\
                                         TICK_NSEC -1) / (u64)TICK_NSEC))
-#define USEC_CONVERSION  \
-                    ((unsigned long)((((u64)NSEC_PER_USEC << USEC_JIFFIE_SC) +\
-                                        TICK_NSEC -1) / (u64)TICK_NSEC))
-/*
- * USEC_ROUND is used in the timeval to jiffie conversion.  See there
- * for more details.  It is the scaled resolution rounding value.  Note
- * that it is a 64-bit value.  Since, when it is applied, we are already
- * in jiffies (albit scaled), it is nothing but the bits we will shift
- * off.
- */
-#define USEC_ROUND (u64)(((u64)1 << USEC_JIFFIE_SC) - 1)
 /*
  * The maximum jiffie value is (MAX_INT >> 1).  Here we translate that
  * into seconds.  The 64-bit case will overflow if we are not careful,
diff --git a/kernel/time/time.c b/kernel/time/time.c
index f0294ba14634..a9ae20fb0b11 100644
--- a/kernel/time/time.c
+++ b/kernel/time/time.c
@@ -559,17 +559,20 @@ EXPORT_SYMBOL(usecs_to_jiffies);
  * that a remainder subtract here would not do the right thing as the
  * resolution values don't fall on second boundries.  I.e. the line:
  * nsec -= nsec % TICK_NSEC; is NOT a correct resolution rounding.
+ * Note that due to the small error in the multiplier here, this
+ * rounding is incorrect for sufficiently large values of tv_nsec, but
+ * well formed timespecs should have tv_nsec < NSEC_PER_SEC, so we're
+ * OK.
  *
  * Rather, we just shift the bits off the right.
  *
  * The >> (NSEC_JIFFIE_SC - SEC_JIFFIE_SC) converts the scaled nsec
  * value to a scaled second value.
  */
-unsigned long
-timespec_to_jiffies(const struct timespec *value)
+static unsigned long
+__timespec_to_jiffies(unsigned long sec, long nsec)
 {
-	unsigned long sec = value->tv_sec;
-	long nsec = value->tv_nsec + TICK_NSEC - 1;
+	nsec = nsec + TICK_NSEC - 1;
 
 	if (sec >= MAX_SEC_IN_JIFFIES){
 		sec = MAX_SEC_IN_JIFFIES;
@@ -580,6 +583,13 @@ timespec_to_jiffies(const struct timespec *value)
 		 (NSEC_JIFFIE_SC - SEC_JIFFIE_SC))) >> SEC_JIFFIE_SC;
 
 }
+
+unsigned long
+timespec_to_jiffies(const struct timespec *value)
+{
+	return __timespec_to_jiffies(value->tv_sec, value->tv_nsec);
+}
+
 EXPORT_SYMBOL(timespec_to_jiffies);
 
 void
@@ -596,31 +606,27 @@ jiffies_to_timespec(const unsigned long jiffies, struct timespec *value)
 }
 EXPORT_SYMBOL(jiffies_to_timespec);
 
-/* Same for "timeval"
- *
- * Well, almost.  The problem here is that the real system resolution is
- * in nanoseconds and the value being converted is in micro seconds.
- * Also for some machines (those that use HZ = 1024, in-particular),
- * there is a LARGE error in the tick size in microseconds.
-
- * The solution we use is to do the rounding AFTER we convert the
- * microsecond part.  Thus the USEC_ROUND, the bits to be shifted off.
- * Instruction wise, this should cost only an additional add with carry
- * instruction above the way it was done above.
+/*
+ * We could use a similar algorithm to timespec_to_jiffies (with a
+ * different multiplier for usec instead of nsec). But this has a
+ * problem with rounding: we can't exactly add TICK_NSEC - 1 to the
+ * usec value, since it's not necessarily integral.
+ *
+ * We could instead round in the intermediate scaled representation
+ * (i.e. in units of 1/2^(large scale) jiffies) but that's also
+ * perilous: the scaling introduces a small positive error, which
+ * combined with a division-rounding-upward (i.e. adding 2^(scale) - 1
+ * units to the intermediate before shifting) leads to accidental
+ * overflow and overestimates.
+ *
+ * At the cost of one additional multiplication by a constant, just
+ * use the timespec implementation.
  */
 unsigned long
 timeval_to_jiffies(const struct timeval *value)
 {
-	unsigned long sec = value->tv_sec;
-	long usec = value->tv_usec;
-
-	if (sec >= MAX_SEC_IN_JIFFIES){
-		sec = MAX_SEC_IN_JIFFIES;
-		usec = 0;
-	}
-	return (((u64)sec * SEC_CONVERSION) +
-		(((u64)usec * USEC_CONVERSION + USEC_ROUND) >>
-		 (USEC_JIFFIE_SC - SEC_JIFFIE_SC))) >> SEC_JIFFIE_SC;
+	return __timespec_to_jiffies(value->tv_sec,
+				     value->tv_usec * NSEC_PER_USEC);
 }
 EXPORT_SYMBOL(timeval_to_jiffies);
 
-- 
cgit v1.2.3


From 3ef7de5304edf60d0b8674dd7cdacc104e15a93c Mon Sep 17 00:00:00 2001
From: Jacek Anaszewski <j.anaszewski@samsung.com>
Date: Wed, 20 Aug 2014 06:41:55 -0700
Subject: leds: Improve and export led_update_brightness

led_update_brightness helper function used to be exploited only locally
in the led-class.c module, where its result was being passed to the
brightness_show sysfs callback. With the introduction of v4l2-flash
subdevice the same functionality becomes required for reading current
brightness from a LED device. This patch adds checking of return value
of the brightness_get callback and moves the led_update_brightness()
function to the LED subsystem public API.

Signed-off-by: Jacek Anaszewski <j.anaszewski@samsung.com>
Acked-by: Kyungmin Park <kyungmin.park@samsung.com>
Cc: Richard Purdie <rpurdie@rpsys.net>
Signed-off-by: Bryan Wu <cooloney@gmail.com>
---
 drivers/leds/led-class.c |  6 ------
 drivers/leds/led-core.c  | 16 ++++++++++++++++
 include/linux/leds.h     | 10 ++++++++++
 3 files changed, 26 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c
index 71666a40b79a..7440c58b8e6f 100644
--- a/drivers/leds/led-class.c
+++ b/drivers/leds/led-class.c
@@ -24,12 +24,6 @@
 
 static struct class *leds_class;
 
-static void led_update_brightness(struct led_classdev *led_cdev)
-{
-	if (led_cdev->brightness_get)
-		led_cdev->brightness = led_cdev->brightness_get(led_cdev);
-}
-
 static ssize_t brightness_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
diff --git a/drivers/leds/led-core.c b/drivers/leds/led-core.c
index 50b579ad948e..aaa8eba9099f 100644
--- a/drivers/leds/led-core.c
+++ b/drivers/leds/led-core.c
@@ -127,3 +127,19 @@ void led_set_brightness(struct led_classdev *led_cdev,
 	__led_set_brightness(led_cdev, brightness);
 }
 EXPORT_SYMBOL(led_set_brightness);
+
+int led_update_brightness(struct led_classdev *led_cdev)
+{
+	int ret = 0;
+
+	if (led_cdev->brightness_get) {
+		ret = led_cdev->brightness_get(led_cdev);
+		if (ret >= 0) {
+			led_cdev->brightness = ret;
+			return 0;
+		}
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(led_update_brightness);
diff --git a/include/linux/leds.h b/include/linux/leds.h
index f2e1cbc25705..a57611d0c94e 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -140,6 +140,16 @@ extern void led_blink_set_oneshot(struct led_classdev *led_cdev,
  */
 extern void led_set_brightness(struct led_classdev *led_cdev,
 			       enum led_brightness brightness);
+/**
+ * led_update_brightness - update LED brightness
+ * @led_cdev: the LED to query
+ *
+ * Get an LED's current brightness and update led_cdev->brightness
+ * member with the obtained value.
+ *
+ * Returns: 0 on success or negative error value on failure
+ */
+extern int led_update_brightness(struct led_classdev *led_cdev);
 
 /*
  * LED Triggers
-- 
cgit v1.2.3


From fbfa398b84a5fc6e085dedba5ec3e94f21815d05 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Thu, 28 Aug 2014 12:21:44 -0600
Subject: PCI: Remove unused pci_configure_slot()

All pci_configure_slot() uses have been removed, so remove the definition
as well.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Yinghai Lu <yinghai@kernel.org>
---
 drivers/pci/probe.c         | 28 ----------------------------
 include/linux/pci_hotplug.h |  2 --
 2 files changed, 30 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index e9482867555b..4b3b29bc7a82 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1358,34 +1358,6 @@ static void program_hpp_type2(struct pci_dev *dev, struct hpp_type2 *hpp)
 	 */
 }
 
-void pci_configure_slot(struct pci_dev *dev)
-{
-	struct pci_dev *cdev;
-	struct hotplug_params hpp;
-	int ret;
-
-	if (!(dev->hdr_type == PCI_HEADER_TYPE_NORMAL ||
-			(dev->hdr_type == PCI_HEADER_TYPE_BRIDGE &&
-			(dev->class >> 8) == PCI_CLASS_BRIDGE_PCI)))
-		return;
-
-	pcie_bus_configure_settings(dev->bus);
-
-	memset(&hpp, 0, sizeof(hpp));
-	ret = pci_get_hp_params(dev, &hpp);
-
-	program_hpp_type2(dev, hpp.t2);
-	program_hpp_type1(dev, hpp.t1);
-	program_hpp_type0(dev, hpp.t0);
-
-	if (dev->subordinate) {
-		list_for_each_entry(cdev, &dev->subordinate->devices,
-				    bus_list)
-			pci_configure_slot(cdev);
-	}
-}
-EXPORT_SYMBOL_GPL(pci_configure_slot);
-
 static void pci_configure_device(struct pci_dev *dev)
 {
 	struct hotplug_params hpp;
diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h
index 5f2e559af6b0..2706ee9a4327 100644
--- a/include/linux/pci_hotplug.h
+++ b/include/linux/pci_hotplug.h
@@ -187,6 +187,4 @@ static inline int pci_get_hp_params(struct pci_dev *dev,
 	return -ENODEV;
 }
 #endif
-
-void pci_configure_slot(struct pci_dev *dev);
 #endif
-- 
cgit v1.2.3


From 46e5da40aec256155cfedee96dd21a75da941f2c Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Fri, 12 Sep 2014 20:04:52 -0700
Subject: net: qdisc: use rcu prefix and silence sparse warnings

Add __rcu notation to qdisc handling by doing this we can make
smatch output more legible. And anyways some of the cases should
be using rcu_dereference() see qdisc_all_tx_empty(),
qdisc_tx_chainging(), and so on.

Also *wake_queue() API is commonly called from driver timer routines
without rcu lock or rtnl lock. So I added rcu_read_lock() blocks
around netif_wake_subqueue and netif_tx_wake_queue.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 29 ++++-----------------------
 include/net/sch_generic.h | 21 +++++++++++++------
 net/core/dev.c            | 51 +++++++++++++++++++++++++++++++++++++++++++++--
 net/sched/sch_generic.c   |  4 ++--
 net/sched/sch_mqprio.c    |  6 ++++--
 net/sched/sch_teql.c      | 13 +++++++-----
 6 files changed, 82 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ba72f6baae1a..ae721f53739e 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -543,7 +543,7 @@ struct netdev_queue {
  * read mostly part
  */
 	struct net_device	*dev;
-	struct Qdisc		*qdisc;
+	struct Qdisc __rcu	*qdisc;
 	struct Qdisc		*qdisc_sleeping;
 #ifdef CONFIG_SYSFS
 	struct kobject		kobj;
@@ -2356,12 +2356,7 @@ static inline void input_queue_tail_incr_save(struct softnet_data *sd,
 DECLARE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
 
 void __netif_schedule(struct Qdisc *q);
-
-static inline void netif_schedule_queue(struct netdev_queue *txq)
-{
-	if (!(txq->state & QUEUE_STATE_ANY_XOFF))
-		__netif_schedule(txq->qdisc);
-}
+void netif_schedule_queue(struct netdev_queue *txq);
 
 static inline void netif_tx_schedule_all(struct net_device *dev)
 {
@@ -2397,11 +2392,7 @@ static inline void netif_tx_start_all_queues(struct net_device *dev)
 	}
 }
 
-static inline void netif_tx_wake_queue(struct netdev_queue *dev_queue)
-{
-	if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state))
-		__netif_schedule(dev_queue->qdisc);
-}
+void netif_tx_wake_queue(struct netdev_queue *dev_queue);
 
 /**
  *	netif_wake_queue - restart transmit
@@ -2673,19 +2664,7 @@ static inline bool netif_subqueue_stopped(const struct net_device *dev,
 	return __netif_subqueue_stopped(dev, skb_get_queue_mapping(skb));
 }
 
-/**
- *	netif_wake_subqueue - allow sending packets on subqueue
- *	@dev: network device
- *	@queue_index: sub queue index
- *
- * Resume individual transmit queue of a device with multiple transmit queues.
- */
-static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
-{
-	struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index);
-	if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &txq->state))
-		__netif_schedule(txq->qdisc);
-}
+void netif_wake_subqueue(struct net_device *dev, u16 queue_index);
 
 #ifdef CONFIG_XPS
 int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index a3cfb8ebeb53..56838ab29b42 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -259,7 +259,9 @@ static inline spinlock_t *qdisc_lock(struct Qdisc *qdisc)
 
 static inline struct Qdisc *qdisc_root(const struct Qdisc *qdisc)
 {
-	return qdisc->dev_queue->qdisc;
+	struct Qdisc *q = rcu_dereference_rtnl(qdisc->dev_queue->qdisc);
+
+	return q;
 }
 
 static inline struct Qdisc *qdisc_root_sleeping(const struct Qdisc *qdisc)
@@ -384,7 +386,7 @@ static inline void qdisc_reset_all_tx_gt(struct net_device *dev, unsigned int i)
 	struct Qdisc *qdisc;
 
 	for (; i < dev->num_tx_queues; i++) {
-		qdisc = netdev_get_tx_queue(dev, i)->qdisc;
+		qdisc = rtnl_dereference(netdev_get_tx_queue(dev, i)->qdisc);
 		if (qdisc) {
 			spin_lock_bh(qdisc_lock(qdisc));
 			qdisc_reset(qdisc);
@@ -402,13 +404,18 @@ static inline void qdisc_reset_all_tx(struct net_device *dev)
 static inline bool qdisc_all_tx_empty(const struct net_device *dev)
 {
 	unsigned int i;
+
+	rcu_read_lock();
 	for (i = 0; i < dev->num_tx_queues; i++) {
 		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
-		const struct Qdisc *q = txq->qdisc;
+		const struct Qdisc *q = rcu_dereference(txq->qdisc);
 
-		if (q->q.qlen)
+		if (q->q.qlen) {
+			rcu_read_unlock();
 			return false;
+		}
 	}
+	rcu_read_unlock();
 	return true;
 }
 
@@ -416,9 +423,10 @@ static inline bool qdisc_all_tx_empty(const struct net_device *dev)
 static inline bool qdisc_tx_changing(const struct net_device *dev)
 {
 	unsigned int i;
+
 	for (i = 0; i < dev->num_tx_queues; i++) {
 		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
-		if (txq->qdisc != txq->qdisc_sleeping)
+		if (rcu_access_pointer(txq->qdisc) != txq->qdisc_sleeping)
 			return true;
 	}
 	return false;
@@ -428,9 +436,10 @@ static inline bool qdisc_tx_changing(const struct net_device *dev)
 static inline bool qdisc_tx_is_noop(const struct net_device *dev)
 {
 	unsigned int i;
+
 	for (i = 0; i < dev->num_tx_queues; i++) {
 		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
-		if (txq->qdisc != &noop_qdisc)
+		if (rcu_access_pointer(txq->qdisc) != &noop_qdisc)
 			return false;
 	}
 	return true;
diff --git a/net/core/dev.c b/net/core/dev.c
index 3c6a967e5830..b3d6dbc0c696 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2177,6 +2177,53 @@ static struct dev_kfree_skb_cb *get_kfree_skb_cb(const struct sk_buff *skb)
 	return (struct dev_kfree_skb_cb *)skb->cb;
 }
 
+void netif_schedule_queue(struct netdev_queue *txq)
+{
+	rcu_read_lock();
+	if (!(txq->state & QUEUE_STATE_ANY_XOFF)) {
+		struct Qdisc *q = rcu_dereference(txq->qdisc);
+
+		__netif_schedule(q);
+	}
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL(netif_schedule_queue);
+
+/**
+ *	netif_wake_subqueue - allow sending packets on subqueue
+ *	@dev: network device
+ *	@queue_index: sub queue index
+ *
+ * Resume individual transmit queue of a device with multiple transmit queues.
+ */
+void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
+{
+	struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index);
+
+	if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &txq->state)) {
+		struct Qdisc *q;
+
+		rcu_read_lock();
+		q = rcu_dereference(txq->qdisc);
+		__netif_schedule(q);
+		rcu_read_unlock();
+	}
+}
+EXPORT_SYMBOL(netif_wake_subqueue);
+
+void netif_tx_wake_queue(struct netdev_queue *dev_queue)
+{
+	if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state)) {
+		struct Qdisc *q;
+
+		rcu_read_lock();
+		q = rcu_dereference(dev_queue->qdisc);
+		__netif_schedule(q);
+		rcu_read_unlock();
+	}
+}
+EXPORT_SYMBOL(netif_tx_wake_queue);
+
 void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason)
 {
 	unsigned long flags;
@@ -3432,7 +3479,7 @@ static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq)
 	skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
 	skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
 
-	q = rxq->qdisc;
+	q = rcu_dereference(rxq->qdisc);
 	if (q != &noop_qdisc) {
 		spin_lock(qdisc_lock(q));
 		if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
@@ -3449,7 +3496,7 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
 {
 	struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue);
 
-	if (!rxq || rxq->qdisc == &noop_qdisc)
+	if (!rxq || rcu_access_pointer(rxq->qdisc) == &noop_qdisc)
 		goto out;
 
 	if (*pt_prev) {
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 19696ebe9ebc..346ef85617d3 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -783,7 +783,7 @@ static void dev_deactivate_queue(struct net_device *dev,
 	struct Qdisc *qdisc_default = _qdisc_default;
 	struct Qdisc *qdisc;
 
-	qdisc = dev_queue->qdisc;
+	qdisc = rtnl_dereference(dev_queue->qdisc);
 	if (qdisc) {
 		spin_lock_bh(qdisc_lock(qdisc));
 
@@ -876,7 +876,7 @@ static void dev_init_scheduler_queue(struct net_device *dev,
 {
 	struct Qdisc *qdisc = _qdisc;
 
-	dev_queue->qdisc = qdisc;
+	rcu_assign_pointer(dev_queue->qdisc, qdisc);
 	dev_queue->qdisc_sleeping = qdisc;
 }
 
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index 6749e2f540d0..37e7d25d21f1 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -231,7 +231,7 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
 	memset(&sch->qstats, 0, sizeof(sch->qstats));
 
 	for (i = 0; i < dev->num_tx_queues; i++) {
-		qdisc = netdev_get_tx_queue(dev, i)->qdisc;
+		qdisc = rtnl_dereference(netdev_get_tx_queue(dev, i)->qdisc);
 		spin_lock_bh(qdisc_lock(qdisc));
 		sch->q.qlen		+= qdisc->q.qlen;
 		sch->bstats.bytes	+= qdisc->bstats.bytes;
@@ -340,7 +340,9 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
 		spin_unlock_bh(d->lock);
 
 		for (i = tc.offset; i < tc.offset + tc.count; i++) {
-			qdisc = netdev_get_tx_queue(dev, i)->qdisc;
+			struct netdev_queue *q = netdev_get_tx_queue(dev, i);
+
+			qdisc = rtnl_dereference(q->qdisc);
 			spin_lock_bh(qdisc_lock(qdisc));
 			bstats.bytes      += qdisc->bstats.bytes;
 			bstats.packets    += qdisc->bstats.packets;
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index aaa8d03ed054..5cd291bd00e4 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -96,11 +96,14 @@ teql_dequeue(struct Qdisc *sch)
 	struct teql_sched_data *dat = qdisc_priv(sch);
 	struct netdev_queue *dat_queue;
 	struct sk_buff *skb;
+	struct Qdisc *q;
 
 	skb = __skb_dequeue(&dat->q);
 	dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
+	q = rcu_dereference_bh(dat_queue->qdisc);
+
 	if (skb == NULL) {
-		struct net_device *m = qdisc_dev(dat_queue->qdisc);
+		struct net_device *m = qdisc_dev(q);
 		if (m) {
 			dat->m->slaves = sch;
 			netif_wake_queue(m);
@@ -108,7 +111,7 @@ teql_dequeue(struct Qdisc *sch)
 	} else {
 		qdisc_bstats_update(sch, skb);
 	}
-	sch->q.qlen = dat->q.qlen + dat_queue->qdisc->q.qlen;
+	sch->q.qlen = dat->q.qlen + q->q.qlen;
 	return skb;
 }
 
@@ -157,9 +160,9 @@ teql_destroy(struct Qdisc *sch)
 						txq = netdev_get_tx_queue(master->dev, 0);
 						master->slaves = NULL;
 
-						root_lock = qdisc_root_sleeping_lock(txq->qdisc);
+						root_lock = qdisc_root_sleeping_lock(rtnl_dereference(txq->qdisc));
 						spin_lock_bh(root_lock);
-						qdisc_reset(txq->qdisc);
+						qdisc_reset(rtnl_dereference(txq->qdisc));
 						spin_unlock_bh(root_lock);
 					}
 				}
@@ -266,7 +269,7 @@ static inline int teql_resolve(struct sk_buff *skb,
 	struct dst_entry *dst = skb_dst(skb);
 	int res;
 
-	if (txq->qdisc == &noop_qdisc)
+	if (rcu_access_pointer(txq->qdisc) == &noop_qdisc)
 		return -ENODEV;
 
 	if (!dev->header_ops || !dst)
-- 
cgit v1.2.3


From 331b72922c5f58d48fd5500acadc91777cc31970 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Fri, 12 Sep 2014 20:08:20 -0700
Subject: net: sched: RCU cls_tcindex

Make cls_tcindex RCU safe.

This patch addds a new RCU routine rcu_dereference_bh_rtnl() to check
caller either holds the rcu read lock or RTNL. This is needed to
handle the case where tcindex_lookup() is being called in both cases.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rtnetlink.h |  10 ++
 net/sched/cls_tcindex.c   | 248 ++++++++++++++++++++++++++++------------------
 2 files changed, 164 insertions(+), 94 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 167bae7bdfa4..6cacbce1a06c 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -46,6 +46,16 @@ static inline int lockdep_rtnl_is_held(void)
 #define rcu_dereference_rtnl(p)					\
 	rcu_dereference_check(p, lockdep_rtnl_is_held())
 
+/**
+ * rcu_dereference_bh_rtnl - rcu_dereference_bh with debug checking
+ * @p: The pointer to read, prior to dereference
+ *
+ * Do an rcu_dereference_bh(p), but check caller either holds rcu_read_lock_bh()
+ * or RTNL. Note : Please prefer rtnl_dereference() or rcu_dereference_bh()
+ */
+#define rcu_dereference_bh_rtnl(p)				\
+	rcu_dereference_bh_check(p, lockdep_rtnl_is_held())
+
 /**
  * rtnl_dereference - fetch RCU pointer when updates are prevented by RTNL
  * @p: The pointer to read, prior to dereferencing
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 3e9f76413b3b..a9f4279fbd69 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -32,19 +32,21 @@ struct tcindex_filter_result {
 struct tcindex_filter {
 	u16 key;
 	struct tcindex_filter_result result;
-	struct tcindex_filter *next;
+	struct tcindex_filter __rcu *next;
+	struct rcu_head rcu;
 };
 
 
 struct tcindex_data {
 	struct tcindex_filter_result *perfect; /* perfect hash; NULL if none */
-	struct tcindex_filter **h; /* imperfect hash; only used if !perfect;
-				      NULL if unused */
+	struct tcindex_filter __rcu **h; /* imperfect hash; */
+	struct tcf_proto *tp;
 	u16 mask;		/* AND key with mask */
-	int shift;		/* shift ANDed key to the right */
-	int hash;		/* hash table size; 0 if undefined */
-	int alloc_hash;		/* allocated size */
-	int fall_through;	/* 0: only classify if explicit match */
+	u32 shift;		/* shift ANDed key to the right */
+	u32 hash;		/* hash table size; 0 if undefined */
+	u32 alloc_hash;		/* allocated size */
+	u32 fall_through;	/* 0: only classify if explicit match */
+	struct rcu_head rcu;
 };
 
 static inline int
@@ -56,13 +58,18 @@ tcindex_filter_is_set(struct tcindex_filter_result *r)
 static struct tcindex_filter_result *
 tcindex_lookup(struct tcindex_data *p, u16 key)
 {
-	struct tcindex_filter *f;
+	if (p->perfect) {
+		struct tcindex_filter_result *f = p->perfect + key;
+
+		return tcindex_filter_is_set(f) ? f : NULL;
+	} else if (p->h) {
+		struct tcindex_filter __rcu **fp;
+		struct tcindex_filter *f;
 
-	if (p->perfect)
-		return tcindex_filter_is_set(p->perfect + key) ?
-			p->perfect + key : NULL;
-	else if (p->h) {
-		for (f = p->h[key % p->hash]; f; f = f->next)
+		fp = &p->h[key % p->hash];
+		for (f = rcu_dereference_bh_rtnl(*fp);
+		     f;
+		     fp = &f->next, f = rcu_dereference_bh_rtnl(*fp))
 			if (f->key == key)
 				return &f->result;
 	}
@@ -74,7 +81,7 @@ tcindex_lookup(struct tcindex_data *p, u16 key)
 static int tcindex_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 			    struct tcf_result *res)
 {
-	struct tcindex_data *p = tp->root;
+	struct tcindex_data *p = rcu_dereference(tp->root);
 	struct tcindex_filter_result *f;
 	int key = (skb->tc_index & p->mask) >> p->shift;
 
@@ -99,7 +106,7 @@ static int tcindex_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 
 static unsigned long tcindex_get(struct tcf_proto *tp, u32 handle)
 {
-	struct tcindex_data *p = tp->root;
+	struct tcindex_data *p = rtnl_dereference(tp->root);
 	struct tcindex_filter_result *r;
 
 	pr_debug("tcindex_get(tp %p,handle 0x%08x)\n", tp, handle);
@@ -129,49 +136,59 @@ static int tcindex_init(struct tcf_proto *tp)
 	p->hash = DEFAULT_HASH_SIZE;
 	p->fall_through = 1;
 
-	tp->root = p;
+	rcu_assign_pointer(tp->root, p);
 	return 0;
 }
 
-
 static int
-__tcindex_delete(struct tcf_proto *tp, unsigned long arg, int lock)
+tcindex_delete(struct tcf_proto *tp, unsigned long arg)
 {
-	struct tcindex_data *p = tp->root;
+	struct tcindex_data *p = rtnl_dereference(tp->root);
 	struct tcindex_filter_result *r = (struct tcindex_filter_result *) arg;
+	struct tcindex_filter __rcu **walk;
 	struct tcindex_filter *f = NULL;
 
-	pr_debug("tcindex_delete(tp %p,arg 0x%lx),p %p,f %p\n", tp, arg, p, f);
+	pr_debug("tcindex_delete(tp %p,arg 0x%lx),p %p\n", tp, arg, p);
 	if (p->perfect) {
 		if (!r->res.class)
 			return -ENOENT;
 	} else {
 		int i;
-		struct tcindex_filter **walk = NULL;
 
-		for (i = 0; i < p->hash; i++)
-			for (walk = p->h+i; *walk; walk = &(*walk)->next)
-				if (&(*walk)->result == r)
+		for (i = 0; i < p->hash; i++) {
+			walk = p->h + i;
+			for (f = rtnl_dereference(*walk); f;
+			     walk = &f->next, f = rtnl_dereference(*walk)) {
+				if (&f->result == r)
 					goto found;
+			}
+		}
 		return -ENOENT;
 
 found:
-		f = *walk;
-		if (lock)
-			tcf_tree_lock(tp);
-		*walk = f->next;
-		if (lock)
-			tcf_tree_unlock(tp);
+		rcu_assign_pointer(*walk, rtnl_dereference(f->next));
 	}
 	tcf_unbind_filter(tp, &r->res);
 	tcf_exts_destroy(tp, &r->exts);
-	kfree(f);
+	if (f)
+		kfree_rcu(f, rcu);
 	return 0;
 }
 
-static int tcindex_delete(struct tcf_proto *tp, unsigned long arg)
+static int tcindex_destroy_element(struct tcf_proto *tp,
+				   unsigned long arg,
+				   struct tcf_walker *walker)
 {
-	return __tcindex_delete(tp, arg, 1);
+	return tcindex_delete(tp, arg);
+}
+
+static void __tcindex_destroy(struct rcu_head *head)
+{
+	struct tcindex_data *p = container_of(head, struct tcindex_data, rcu);
+
+	kfree(p->perfect);
+	kfree(p->h);
+	kfree(p);
 }
 
 static inline int
@@ -194,6 +211,14 @@ static void tcindex_filter_result_init(struct tcindex_filter_result *r)
 	tcf_exts_init(&r->exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
 }
 
+static void __tcindex_partial_destroy(struct rcu_head *head)
+{
+	struct tcindex_data *p = container_of(head, struct tcindex_data, rcu);
+
+	kfree(p->perfect);
+	kfree(p);
+}
+
 static int
 tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
 		  u32 handle, struct tcindex_data *p,
@@ -203,7 +228,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
 	int err, balloc = 0;
 	struct tcindex_filter_result new_filter_result, *old_r = r;
 	struct tcindex_filter_result cr;
-	struct tcindex_data cp;
+	struct tcindex_data *cp, *oldp;
 	struct tcindex_filter *f = NULL; /* make gcc behave */
 	struct tcf_exts e;
 
@@ -212,84 +237,118 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
 	if (err < 0)
 		return err;
 
-	memcpy(&cp, p, sizeof(cp));
-	tcindex_filter_result_init(&new_filter_result);
+	/* tcindex_data attributes must look atomic to classifier/lookup so
+	 * allocate new tcindex data and RCU assign it onto root. Keeping
+	 * perfect hash and hash pointers from old data.
+	 */
+	cp = kzalloc(sizeof(cp), GFP_KERNEL);
+	if (!cp)
+		return -ENOMEM;
+
+	cp->mask = p->mask;
+	cp->shift = p->shift;
+	cp->hash = p->hash;
+	cp->alloc_hash = p->alloc_hash;
+	cp->fall_through = p->fall_through;
+	cp->tp = tp;
+
+	if (p->perfect) {
+		cp->perfect = kmemdup(p->perfect,
+				      sizeof(*r) * cp->hash, GFP_KERNEL);
+		if (!cp->perfect)
+			goto errout;
+	}
+	cp->h = p->h;
+
+	memset(&new_filter_result, 0, sizeof(new_filter_result));
+	tcf_exts_init(&new_filter_result.exts,
+		      TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
 
 	tcindex_filter_result_init(&cr);
 	if (old_r)
 		cr.res = r->res;
 
 	if (tb[TCA_TCINDEX_HASH])
-		cp.hash = nla_get_u32(tb[TCA_TCINDEX_HASH]);
+		cp->hash = nla_get_u32(tb[TCA_TCINDEX_HASH]);
 
 	if (tb[TCA_TCINDEX_MASK])
-		cp.mask = nla_get_u16(tb[TCA_TCINDEX_MASK]);
+		cp->mask = nla_get_u16(tb[TCA_TCINDEX_MASK]);
 
 	if (tb[TCA_TCINDEX_SHIFT])
-		cp.shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]);
+		cp->shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]);
 
 	err = -EBUSY;
+
 	/* Hash already allocated, make sure that we still meet the
 	 * requirements for the allocated hash.
 	 */
-	if (cp.perfect) {
-		if (!valid_perfect_hash(&cp) ||
-		    cp.hash > cp.alloc_hash)
+	if (cp->perfect) {
+		if (!valid_perfect_hash(cp) ||
+		    cp->hash > cp->alloc_hash)
 			goto errout;
-	} else if (cp.h && cp.hash != cp.alloc_hash)
+	} else if (cp->h && cp->hash != cp->alloc_hash) {
 		goto errout;
+	}
 
 	err = -EINVAL;
 	if (tb[TCA_TCINDEX_FALL_THROUGH])
-		cp.fall_through = nla_get_u32(tb[TCA_TCINDEX_FALL_THROUGH]);
+		cp->fall_through = nla_get_u32(tb[TCA_TCINDEX_FALL_THROUGH]);
 
-	if (!cp.hash) {
+	if (!cp->hash) {
 		/* Hash not specified, use perfect hash if the upper limit
 		 * of the hashing index is below the threshold.
 		 */
-		if ((cp.mask >> cp.shift) < PERFECT_HASH_THRESHOLD)
-			cp.hash = (cp.mask >> cp.shift) + 1;
+		if ((cp->mask >> cp->shift) < PERFECT_HASH_THRESHOLD)
+			cp->hash = (cp->mask >> cp->shift) + 1;
 		else
-			cp.hash = DEFAULT_HASH_SIZE;
+			cp->hash = DEFAULT_HASH_SIZE;
 	}
 
-	if (!cp.perfect && !cp.h)
-		cp.alloc_hash = cp.hash;
+	if (!cp->perfect && cp->h)
+		cp->alloc_hash = cp->hash;
 
 	/* Note: this could be as restrictive as if (handle & ~(mask >> shift))
 	 * but then, we'd fail handles that may become valid after some future
 	 * mask change. While this is extremely unlikely to ever matter,
 	 * the check below is safer (and also more backwards-compatible).
 	 */
-	if (cp.perfect || valid_perfect_hash(&cp))
-		if (handle >= cp.alloc_hash)
+	if (cp->perfect || valid_perfect_hash(cp))
+		if (handle >= cp->alloc_hash)
 			goto errout;
 
 
 	err = -ENOMEM;
-	if (!cp.perfect && !cp.h) {
-		if (valid_perfect_hash(&cp)) {
+	if (!cp->perfect && !cp->h) {
+		if (valid_perfect_hash(cp)) {
 			int i;
 
-			cp.perfect = kcalloc(cp.hash, sizeof(*r), GFP_KERNEL);
-			if (!cp.perfect)
+			cp->perfect = kcalloc(cp->hash, sizeof(*r), GFP_KERNEL);
+			if (!cp->perfect)
 				goto errout;
-			for (i = 0; i < cp.hash; i++)
-				tcf_exts_init(&cp.perfect[i].exts, TCA_TCINDEX_ACT,
+			for (i = 0; i < cp->hash; i++)
+				tcf_exts_init(&cp->perfect[i].exts,
+					      TCA_TCINDEX_ACT,
 					      TCA_TCINDEX_POLICE);
 			balloc = 1;
 		} else {
-			cp.h = kcalloc(cp.hash, sizeof(f), GFP_KERNEL);
-			if (!cp.h)
+			struct tcindex_filter __rcu **hash;
+
+			hash = kcalloc(cp->hash,
+				       sizeof(struct tcindex_filter *),
+				       GFP_KERNEL);
+
+			if (!hash)
 				goto errout;
+
+			cp->h = hash;
 			balloc = 2;
 		}
 	}
 
-	if (cp.perfect)
-		r = cp.perfect + handle;
+	if (cp->perfect)
+		r = cp->perfect + handle;
 	else
-		r = tcindex_lookup(&cp, handle) ? : &new_filter_result;
+		r = tcindex_lookup(cp, handle) ? : &new_filter_result;
 
 	if (r == &new_filter_result) {
 		f = kzalloc(sizeof(*f), GFP_KERNEL);
@@ -307,33 +366,41 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
 	else
 		tcf_exts_change(tp, &cr.exts, &e);
 
-	tcf_tree_lock(tp);
 	if (old_r && old_r != r)
 		tcindex_filter_result_init(old_r);
 
-	memcpy(p, &cp, sizeof(cp));
+	oldp = p;
 	r->res = cr.res;
+	rcu_assign_pointer(tp->root, cp);
 
 	if (r == &new_filter_result) {
-		struct tcindex_filter **fp;
+		struct tcindex_filter *nfp;
+		struct tcindex_filter __rcu **fp;
 
 		f->key = handle;
 		f->result = new_filter_result;
 		f->next = NULL;
-		for (fp = p->h+(handle % p->hash); *fp; fp = &(*fp)->next)
-			/* nothing */;
-		*fp = f;
+
+		fp = p->h + (handle % p->hash);
+		for (nfp = rtnl_dereference(*fp);
+		     nfp;
+		     fp = &nfp->next, nfp = rtnl_dereference(*fp))
+				; /* nothing */
+
+		rcu_assign_pointer(*fp, f);
 	}
-	tcf_tree_unlock(tp);
 
+	if (oldp)
+		call_rcu(&oldp->rcu, __tcindex_partial_destroy);
 	return 0;
 
 errout_alloc:
 	if (balloc == 1)
-		kfree(cp.perfect);
+		kfree(cp->perfect);
 	else if (balloc == 2)
-		kfree(cp.h);
+		kfree(cp->h);
 errout:
+	kfree(cp);
 	tcf_exts_destroy(tp, &e);
 	return err;
 }
@@ -345,7 +412,7 @@ tcindex_change(struct net *net, struct sk_buff *in_skb,
 {
 	struct nlattr *opt = tca[TCA_OPTIONS];
 	struct nlattr *tb[TCA_TCINDEX_MAX + 1];
-	struct tcindex_data *p = tp->root;
+	struct tcindex_data *p = rtnl_dereference(tp->root);
 	struct tcindex_filter_result *r = (struct tcindex_filter_result *) *arg;
 	int err;
 
@@ -364,10 +431,9 @@ tcindex_change(struct net *net, struct sk_buff *in_skb,
 				 tca[TCA_RATE], ovr);
 }
 
-
 static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
 {
-	struct tcindex_data *p = tp->root;
+	struct tcindex_data *p = rtnl_dereference(tp->root);
 	struct tcindex_filter *f, *next;
 	int i;
 
@@ -390,8 +456,8 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
 	if (!p->h)
 		return;
 	for (i = 0; i < p->hash; i++) {
-		for (f = p->h[i]; f; f = next) {
-			next = f->next;
+		for (f = rtnl_dereference(p->h[i]); f; f = next) {
+			next = rtnl_dereference(f->next);
 			if (walker->count >= walker->skip) {
 				if (walker->fn(tp, (unsigned long) &f->result,
 				    walker) < 0) {
@@ -404,17 +470,9 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
 	}
 }
 
-
-static int tcindex_destroy_element(struct tcf_proto *tp,
-    unsigned long arg, struct tcf_walker *walker)
-{
-	return __tcindex_delete(tp, arg, 0);
-}
-
-
 static void tcindex_destroy(struct tcf_proto *tp)
 {
-	struct tcindex_data *p = tp->root;
+	struct tcindex_data *p = rtnl_dereference(tp->root);
 	struct tcf_walker walker;
 
 	pr_debug("tcindex_destroy(tp %p),p %p\n", tp, p);
@@ -422,17 +480,16 @@ static void tcindex_destroy(struct tcf_proto *tp)
 	walker.skip = 0;
 	walker.fn = tcindex_destroy_element;
 	tcindex_walk(tp, &walker);
-	kfree(p->perfect);
-	kfree(p->h);
-	kfree(p);
-	tp->root = NULL;
+
+	RCU_INIT_POINTER(tp->root, NULL);
+	call_rcu(&p->rcu, __tcindex_destroy);
 }
 
 
 static int tcindex_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
     struct sk_buff *skb, struct tcmsg *t)
 {
-	struct tcindex_data *p = tp->root;
+	struct tcindex_data *p = rtnl_dereference(tp->root);
 	struct tcindex_filter_result *r = (struct tcindex_filter_result *) fh;
 	unsigned char *b = skb_tail_pointer(skb);
 	struct nlattr *nest;
@@ -455,15 +512,18 @@ static int tcindex_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
 		nla_nest_end(skb, nest);
 	} else {
 		if (p->perfect) {
-			t->tcm_handle = r-p->perfect;
+			t->tcm_handle = r - p->perfect;
 		} else {
 			struct tcindex_filter *f;
+			struct tcindex_filter __rcu **fp;
 			int i;
 
 			t->tcm_handle = 0;
 			for (i = 0; !t->tcm_handle && i < p->hash; i++) {
-				for (f = p->h[i]; !t->tcm_handle && f;
-				     f = f->next) {
+				fp = &p->h[i];
+				for (f = rtnl_dereference(*fp);
+				     !t->tcm_handle && f;
+				     fp = &f->next, f = rtnl_dereference(*fp)) {
 					if (&f->result == r)
 						t->tcm_handle = f->key;
 				}
-- 
cgit v1.2.3


From a80e49e2cc3145af014a8ae44f575829cc236192 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sat, 16 Aug 2014 17:47:18 +0200
Subject: nohz: Move nohz full init call to tick init

This way we unbloat a bit main.c and more importantly we initialize
nohz full after init_IRQ(). This dependency will be needed in further
patches because nohz full needs irq work to raise its own IRQ.
Information about the support for this ability on ARM64 is obtained on
init_IRQ() which initialize the pointer to __smp_call_function.

Since tick_init() is called right after init_IRQ(), this is a good place
to call tick_nohz_init() and prepare for that dependency.

Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/linux/tick.h        | 2 --
 init/main.c                 | 1 -
 kernel/time/tick-common.c   | 1 +
 kernel/time/tick-internal.h | 7 +++++++
 4 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tick.h b/include/linux/tick.h
index 9a82c7dc3fdd..595ee86f5e0d 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -181,14 +181,12 @@ static inline bool tick_nohz_full_cpu(int cpu)
 	return cpumask_test_cpu(cpu, tick_nohz_full_mask);
 }
 
-extern void tick_nohz_init(void);
 extern void __tick_nohz_full_check(void);
 extern void tick_nohz_full_kick(void);
 extern void tick_nohz_full_kick_cpu(int cpu);
 extern void tick_nohz_full_kick_all(void);
 extern void __tick_nohz_task_switch(struct task_struct *tsk);
 #else
-static inline void tick_nohz_init(void) { }
 static inline bool tick_nohz_full_enabled(void) { return false; }
 static inline bool tick_nohz_full_cpu(int cpu) { return false; }
 static inline void __tick_nohz_full_check(void) { }
diff --git a/init/main.c b/init/main.c
index bb1aed928f21..8af2f1abfe38 100644
--- a/init/main.c
+++ b/init/main.c
@@ -577,7 +577,6 @@ asmlinkage __visible void __init start_kernel(void)
 		local_irq_disable();
 	idr_init_cache();
 	rcu_init();
-	tick_nohz_init();
 	context_tracking_init();
 	radix_tree_init();
 	/* init some links before init_ISA_irqs() */
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 0a0608edeb26..052b4b53c3d6 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -400,4 +400,5 @@ void tick_resume(void)
 void __init tick_init(void)
 {
 	tick_broadcast_init();
+	tick_nohz_init();
 }
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index c19c1d84b6f3..366aeb4f2c66 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -99,6 +99,13 @@ static inline int tick_broadcast_oneshot_active(void) { return 0; }
 static inline bool tick_broadcast_oneshot_available(void) { return false; }
 #endif /* !TICK_ONESHOT */
 
+/* NO_HZ_FULL internal */
+#ifdef CONFIG_NO_HZ_FULL
+extern void tick_nohz_init(void);
+# else
+static inline void tick_nohz_init(void) { }
+#endif
+
 /*
  * Broadcasting support
  */
-- 
cgit v1.2.3


From c5c38ef3d70377dc504a6a3f611a3ec814bc757b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Sat, 6 Sep 2014 15:43:02 +0200
Subject: irq_work: Introduce arch_irq_work_has_interrupt()

The nohz full code needs irq work to trigger its own interrupt so that
the subsystem can work even when the tick is stopped.

Lets introduce arch_irq_work_has_interrupt() that archs can override to
tell about their support for this ability.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 arch/alpha/include/asm/Kbuild      |  1 +
 arch/arc/include/asm/Kbuild        |  1 +
 arch/arm/include/asm/Kbuild        |  1 +
 arch/arm64/include/asm/Kbuild      |  3 ++-
 arch/avr32/include/asm/Kbuild      |  1 +
 arch/blackfin/include/asm/Kbuild   |  1 +
 arch/c6x/include/asm/Kbuild        |  1 +
 arch/cris/include/asm/Kbuild       |  1 +
 arch/frv/include/asm/Kbuild        |  1 +
 arch/hexagon/include/asm/Kbuild    |  1 +
 arch/ia64/include/asm/Kbuild       |  1 +
 arch/m32r/include/asm/Kbuild       |  1 +
 arch/m68k/include/asm/Kbuild       |  1 +
 arch/metag/include/asm/Kbuild      |  1 +
 arch/microblaze/include/asm/Kbuild |  1 +
 arch/mips/include/asm/Kbuild       |  1 +
 arch/mn10300/include/asm/Kbuild    |  1 +
 arch/openrisc/include/asm/Kbuild   |  1 +
 arch/parisc/include/asm/Kbuild     |  1 +
 arch/powerpc/include/asm/Kbuild    |  1 +
 arch/s390/include/asm/Kbuild       |  1 +
 arch/score/include/asm/Kbuild      |  1 +
 arch/sh/include/asm/Kbuild         |  1 +
 arch/sparc/include/asm/Kbuild      |  1 +
 arch/tile/include/asm/Kbuild       |  1 +
 arch/um/include/asm/Kbuild         |  1 +
 arch/unicore32/include/asm/Kbuild  |  1 +
 arch/x86/include/asm/Kbuild        |  1 +
 arch/xtensa/include/asm/Kbuild     |  1 +
 include/asm-generic/irq_work.h     | 10 ++++++++++
 include/linux/irq_work.h           |  2 ++
 31 files changed, 42 insertions(+), 1 deletion(-)
 create mode 100644 include/asm-generic/irq_work.h

(limited to 'include/linux')

diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild
index e858aa0ad8af..a52cbf178c3a 100644
--- a/arch/alpha/include/asm/Kbuild
+++ b/arch/alpha/include/asm/Kbuild
@@ -4,6 +4,7 @@ generic-y += clkdev.h
 generic-y += cputime.h
 generic-y += exec.h
 generic-y += hash.h
+generic-y += irq_work.h
 generic-y += mcs_spinlock.h
 generic-y += preempt.h
 generic-y += scatterlist.h
diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild
index e76fd79f32b0..b8fffc1a2ac2 100644
--- a/arch/arc/include/asm/Kbuild
+++ b/arch/arc/include/asm/Kbuild
@@ -18,6 +18,7 @@ generic-y += ioctl.h
 generic-y += ioctls.h
 generic-y += ipcbuf.h
 generic-y += irq_regs.h
+generic-y += irq_work.h
 generic-y += kmap_types.h
 generic-y += kvm_para.h
 generic-y += local.h
diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild
index 70cd84eb7fda..202905e7ea0c 100644
--- a/arch/arm/include/asm/Kbuild
+++ b/arch/arm/include/asm/Kbuild
@@ -11,6 +11,7 @@ generic-y += hash.h
 generic-y += ioctl.h
 generic-y += ipcbuf.h
 generic-y += irq_regs.h
+generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += local.h
 generic-y += local64.h
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index 0b3fcf86e6ba..d617789b1ebd 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -9,8 +9,8 @@ generic-y += current.h
 generic-y += delay.h
 generic-y += div64.h
 generic-y += dma.h
-generic-y += emergency-restart.h
 generic-y += early_ioremap.h
+generic-y += emergency-restart.h
 generic-y += errno.h
 generic-y += ftrace.h
 generic-y += hash.h
@@ -19,6 +19,7 @@ generic-y += ioctl.h
 generic-y += ioctls.h
 generic-y += ipcbuf.h
 generic-y += irq_regs.h
+generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += kmap_types.h
 generic-y += kvm_para.h
diff --git a/arch/avr32/include/asm/Kbuild b/arch/avr32/include/asm/Kbuild
index 00a0f3ccd6eb..2a71b1cb9848 100644
--- a/arch/avr32/include/asm/Kbuild
+++ b/arch/avr32/include/asm/Kbuild
@@ -9,6 +9,7 @@ generic-y += exec.h
 generic-y += futex.h
 generic-y += hash.h
 generic-y += irq_regs.h
+generic-y += irq_work.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
diff --git a/arch/blackfin/include/asm/Kbuild b/arch/blackfin/include/asm/Kbuild
index 0d93b9a79ca9..46ed6bb9c679 100644
--- a/arch/blackfin/include/asm/Kbuild
+++ b/arch/blackfin/include/asm/Kbuild
@@ -15,6 +15,7 @@ generic-y += hw_irq.h
 generic-y += ioctl.h
 generic-y += ipcbuf.h
 generic-y += irq_regs.h
+generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += kmap_types.h
 generic-y += kvm_para.h
diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild
index 8dbdce8421b0..e77e0c1dbe75 100644
--- a/arch/c6x/include/asm/Kbuild
+++ b/arch/c6x/include/asm/Kbuild
@@ -22,6 +22,7 @@ generic-y += ioctl.h
 generic-y += ioctls.h
 generic-y += ipcbuf.h
 generic-y += irq_regs.h
+generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += kmap_types.h
 generic-y += local.h
diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild
index 31742dfadff9..802b94c4ca86 100644
--- a/arch/cris/include/asm/Kbuild
+++ b/arch/cris/include/asm/Kbuild
@@ -8,6 +8,7 @@ generic-y += clkdev.h
 generic-y += cputime.h
 generic-y += exec.h
 generic-y += hash.h
+generic-y += irq_work.h
 generic-y += kvm_para.h
 generic-y += linkage.h
 generic-y += mcs_spinlock.h
diff --git a/arch/frv/include/asm/Kbuild b/arch/frv/include/asm/Kbuild
index 5b73921b6e9d..3caf05cabfc5 100644
--- a/arch/frv/include/asm/Kbuild
+++ b/arch/frv/include/asm/Kbuild
@@ -3,6 +3,7 @@ generic-y += clkdev.h
 generic-y += cputime.h
 generic-y += exec.h
 generic-y += hash.h
+generic-y += irq_work.h
 generic-y += mcs_spinlock.h
 generic-y += preempt.h
 generic-y += scatterlist.h
diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild
index 0e69796b58c7..5f234a5a2320 100644
--- a/arch/hexagon/include/asm/Kbuild
+++ b/arch/hexagon/include/asm/Kbuild
@@ -23,6 +23,7 @@ generic-y += ioctls.h
 generic-y += iomap.h
 generic-y += ipcbuf.h
 generic-y += irq_regs.h
+generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += kmap_types.h
 generic-y += local.h
diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild
index e8317d2d6c8d..747320be9d0e 100644
--- a/arch/ia64/include/asm/Kbuild
+++ b/arch/ia64/include/asm/Kbuild
@@ -2,6 +2,7 @@
 generic-y += clkdev.h
 generic-y += exec.h
 generic-y += hash.h
+generic-y += irq_work.h
 generic-y += kvm_para.h
 generic-y += mcs_spinlock.h
 generic-y += preempt.h
diff --git a/arch/m32r/include/asm/Kbuild b/arch/m32r/include/asm/Kbuild
index accc10a3dc78..e02448b0648b 100644
--- a/arch/m32r/include/asm/Kbuild
+++ b/arch/m32r/include/asm/Kbuild
@@ -3,6 +3,7 @@ generic-y += clkdev.h
 generic-y += cputime.h
 generic-y += exec.h
 generic-y += hash.h
+generic-y += irq_work.h
 generic-y += mcs_spinlock.h
 generic-y += module.h
 generic-y += preempt.h
diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild
index c67c94a2d672..dbaf9f3065e8 100644
--- a/arch/m68k/include/asm/Kbuild
+++ b/arch/m68k/include/asm/Kbuild
@@ -11,6 +11,7 @@ generic-y += hw_irq.h
 generic-y += ioctl.h
 generic-y += ipcbuf.h
 generic-y += irq_regs.h
+generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += kmap_types.h
 generic-y += kvm_para.h
diff --git a/arch/metag/include/asm/Kbuild b/arch/metag/include/asm/Kbuild
index c29ead89a317..7b8111c8f937 100644
--- a/arch/metag/include/asm/Kbuild
+++ b/arch/metag/include/asm/Kbuild
@@ -19,6 +19,7 @@ generic-y += ioctl.h
 generic-y += ioctls.h
 generic-y += ipcbuf.h
 generic-y += irq_regs.h
+generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += kmap_types.h
 generic-y += kvm_para.h
diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild
index 27a3acda6c19..448143b8cabd 100644
--- a/arch/microblaze/include/asm/Kbuild
+++ b/arch/microblaze/include/asm/Kbuild
@@ -5,6 +5,7 @@ generic-y += cputime.h
 generic-y += device.h
 generic-y += exec.h
 generic-y += hash.h
+generic-y += irq_work.h
 generic-y += mcs_spinlock.h
 generic-y += preempt.h
 generic-y += scatterlist.h
diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild
index 335e5290ec75..57012ef1f51e 100644
--- a/arch/mips/include/asm/Kbuild
+++ b/arch/mips/include/asm/Kbuild
@@ -3,6 +3,7 @@ generic-y += cputime.h
 generic-y += current.h
 generic-y += emergency-restart.h
 generic-y += hash.h
+generic-y += irq_work.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mutex.h
diff --git a/arch/mn10300/include/asm/Kbuild b/arch/mn10300/include/asm/Kbuild
index ecbd6676bd33..77eb1a68d13b 100644
--- a/arch/mn10300/include/asm/Kbuild
+++ b/arch/mn10300/include/asm/Kbuild
@@ -4,6 +4,7 @@ generic-y += clkdev.h
 generic-y += cputime.h
 generic-y += exec.h
 generic-y += hash.h
+generic-y += irq_work.h
 generic-y += mcs_spinlock.h
 generic-y += preempt.h
 generic-y += scatterlist.h
diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild
index 480af0d9c2f5..89b61d7dc790 100644
--- a/arch/openrisc/include/asm/Kbuild
+++ b/arch/openrisc/include/asm/Kbuild
@@ -31,6 +31,7 @@ generic-y += ioctl.h
 generic-y += ioctls.h
 generic-y += ipcbuf.h
 generic-y += irq_regs.h
+generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += kmap_types.h
 generic-y += kvm_para.h
diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild
index ecf25e6678ad..ffb024b8423f 100644
--- a/arch/parisc/include/asm/Kbuild
+++ b/arch/parisc/include/asm/Kbuild
@@ -10,6 +10,7 @@ generic-y += exec.h
 generic-y += hash.h
 generic-y += hw_irq.h
 generic-y += irq_regs.h
+generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += kvm_para.h
 generic-y += local.h
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index 7f23f162ce9c..31e8f59aff38 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -1,6 +1,7 @@
 
 generic-y += clkdev.h
 generic-y += hash.h
+generic-y += irq_work.h
 generic-y += mcs_spinlock.h
 generic-y += preempt.h
 generic-y += rwsem.h
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index b3fea0722ff1..773f86676588 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -2,6 +2,7 @@
 
 generic-y += clkdev.h
 generic-y += hash.h
+generic-y += irq_work.h
 generic-y += mcs_spinlock.h
 generic-y += preempt.h
 generic-y += scatterlist.h
diff --git a/arch/score/include/asm/Kbuild b/arch/score/include/asm/Kbuild
index aad209199f7e..1909d2a5b82f 100644
--- a/arch/score/include/asm/Kbuild
+++ b/arch/score/include/asm/Kbuild
@@ -6,6 +6,7 @@ generic-y += barrier.h
 generic-y += clkdev.h
 generic-y += cputime.h
 generic-y += hash.h
+generic-y += irq_work.h
 generic-y += mcs_spinlock.h
 generic-y += preempt.h
 generic-y += scatterlist.h
diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild
index c19e47dacb31..5a6c9acff0d2 100644
--- a/arch/sh/include/asm/Kbuild
+++ b/arch/sh/include/asm/Kbuild
@@ -12,6 +12,7 @@ generic-y += hash.h
 generic-y += ioctl.h
 generic-y += ipcbuf.h
 generic-y += irq_regs.h
+generic-y += irq_work.h
 generic-y += kvm_para.h
 generic-y += local.h
 generic-y += local64.h
diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index cdd1b447bb6c..f5f94ce1692c 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -8,6 +8,7 @@ generic-y += emergency-restart.h
 generic-y += exec.h
 generic-y += hash.h
 generic-y += irq_regs.h
+generic-y += irq_work.h
 generic-y += linkage.h
 generic-y += local.h
 generic-y += local64.h
diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild
index 0aa5675e7025..e6462b8a6284 100644
--- a/arch/tile/include/asm/Kbuild
+++ b/arch/tile/include/asm/Kbuild
@@ -17,6 +17,7 @@ generic-y += ioctl.h
 generic-y += ioctls.h
 generic-y += ipcbuf.h
 generic-y += irq_regs.h
+generic-y += irq_work.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild
index 7bd64aa2e94a..244b12c8cb39 100644
--- a/arch/um/include/asm/Kbuild
+++ b/arch/um/include/asm/Kbuild
@@ -14,6 +14,7 @@ generic-y += hash.h
 generic-y += hw_irq.h
 generic-y += io.h
 generic-y += irq_regs.h
+generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += mcs_spinlock.h
 generic-y += mutex.h
diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild
index 1e5fb872a4aa..5a2bb53faa42 100644
--- a/arch/unicore32/include/asm/Kbuild
+++ b/arch/unicore32/include/asm/Kbuild
@@ -22,6 +22,7 @@ generic-y += ioctl.h
 generic-y += ioctls.h
 generic-y += ipcbuf.h
 generic-y += irq_regs.h
+generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += kmap_types.h
 generic-y += local.h
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index 3bf000fab0ae..8fa909cc6553 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -7,5 +7,6 @@ genhdr-y += unistd_x32.h
 generic-y += clkdev.h
 generic-y += cputime.h
 generic-y += early_ioremap.h
+generic-y += irq_work.h
 generic-y += mcs_spinlock.h
 generic-y += scatterlist.h
diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild
index c3d20ba6eb86..105d38922c44 100644
--- a/arch/xtensa/include/asm/Kbuild
+++ b/arch/xtensa/include/asm/Kbuild
@@ -12,6 +12,7 @@ generic-y += hardirq.h
 generic-y += hash.h
 generic-y += ioctl.h
 generic-y += irq_regs.h
+generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += kmap_types.h
 generic-y += kvm_para.h
diff --git a/include/asm-generic/irq_work.h b/include/asm-generic/irq_work.h
new file mode 100644
index 000000000000..a44f452c6590
--- /dev/null
+++ b/include/asm-generic/irq_work.h
@@ -0,0 +1,10 @@
+#ifndef __ASM_IRQ_WORK_H
+#define __ASM_IRQ_WORK_H
+
+static inline bool arch_irq_work_has_interrupt(void)
+{
+	return false;
+}
+
+#endif /* __ASM_IRQ_WORK_H */
+
diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h
index bf9422c3aefe..6b47b2ede405 100644
--- a/include/linux/irq_work.h
+++ b/include/linux/irq_work.h
@@ -42,6 +42,8 @@ void irq_work_run(void);
 void irq_work_sync(struct irq_work *work);
 
 #ifdef CONFIG_IRQ_WORK
+#include <asm/irq_work.h>
+
 bool irq_work_needs_cpu(void);
 #else
 static inline bool irq_work_needs_cpu(void) { return false; }
-- 
cgit v1.2.3


From 76a33061b9323b7fdb220ae5fa116c10833ec22e Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sat, 16 Aug 2014 18:37:19 +0200
Subject: irq_work: Force raised irq work to run on irq work interrupt

The nohz full kick, which restarts the tick when any resource depend
on it, can't be executed anywhere given the operation it does on timers.
If it is called from the scheduler or timers code, chances are that
we run into a deadlock.

This is why we run the nohz full kick from an irq work. That way we make
sure that the kick runs on a virgin context.

However if that's the case when irq work runs in its own dedicated
self-ipi, things are different for the big bunch of archs that don't
support the self triggered way. In order to support them, irq works are
also handled by the timer interrupt as fallback.

Now when irq works run on the timer interrupt, the context isn't blank.
More precisely, they can run in the context of the hrtimer that runs the
tick. But the nohz kick cancels and restarts this hrtimer and cancelling
an hrtimer from itself isn't allowed. This is why we run in an endless
loop:

	Kernel panic - not syncing: Watchdog detected hard LOCKUP on cpu 2
	CPU: 2 PID: 7538 Comm: kworker/u8:8 Not tainted 3.16.0+ #34
	Workqueue: btrfs-endio-write normal_work_helper [btrfs]
	 ffff880244c06c88 000000001b486fe1 ffff880244c06bf0 ffffffff8a7f1e37
	 ffffffff8ac52a18 ffff880244c06c78 ffffffff8a7ef928 0000000000000010
	 ffff880244c06c88 ffff880244c06c20 000000001b486fe1 0000000000000000
	Call Trace:
	 <NMI[<ffffffff8a7f1e37>] dump_stack+0x4e/0x7a
	 [<ffffffff8a7ef928>] panic+0xd4/0x207
	 [<ffffffff8a1450e8>] watchdog_overflow_callback+0x118/0x120
	 [<ffffffff8a186b0e>] __perf_event_overflow+0xae/0x350
	 [<ffffffff8a184f80>] ? perf_event_task_disable+0xa0/0xa0
	 [<ffffffff8a01a4cf>] ? x86_perf_event_set_period+0xbf/0x150
	 [<ffffffff8a187934>] perf_event_overflow+0x14/0x20
	 [<ffffffff8a020386>] intel_pmu_handle_irq+0x206/0x410
	 [<ffffffff8a01937b>] perf_event_nmi_handler+0x2b/0x50
	 [<ffffffff8a007b72>] nmi_handle+0xd2/0x390
	 [<ffffffff8a007aa5>] ? nmi_handle+0x5/0x390
	 [<ffffffff8a0cb7f8>] ? match_held_lock+0x8/0x1b0
	 [<ffffffff8a008062>] default_do_nmi+0x72/0x1c0
	 [<ffffffff8a008268>] do_nmi+0xb8/0x100
	 [<ffffffff8a7ff66a>] end_repeat_nmi+0x1e/0x2e
	 [<ffffffff8a0cb7f8>] ? match_held_lock+0x8/0x1b0
	 [<ffffffff8a0cb7f8>] ? match_held_lock+0x8/0x1b0
	 [<ffffffff8a0cb7f8>] ? match_held_lock+0x8/0x1b0
	 <<EOE><IRQ[<ffffffff8a0ccd2f>] lock_acquired+0xaf/0x450
	 [<ffffffff8a0f74c5>] ? lock_hrtimer_base.isra.20+0x25/0x50
	 [<ffffffff8a7fc678>] _raw_spin_lock_irqsave+0x78/0x90
	 [<ffffffff8a0f74c5>] ? lock_hrtimer_base.isra.20+0x25/0x50
	 [<ffffffff8a0f74c5>] lock_hrtimer_base.isra.20+0x25/0x50
	 [<ffffffff8a0f7723>] hrtimer_try_to_cancel+0x33/0x1e0
	 [<ffffffff8a0f78ea>] hrtimer_cancel+0x1a/0x30
	 [<ffffffff8a109237>] tick_nohz_restart+0x17/0x90
	 [<ffffffff8a10a213>] __tick_nohz_full_check+0xc3/0x100
	 [<ffffffff8a10a25e>] nohz_full_kick_work_func+0xe/0x10
	 [<ffffffff8a17c884>] irq_work_run_list+0x44/0x70
	 [<ffffffff8a17c8da>] irq_work_run+0x2a/0x50
	 [<ffffffff8a0f700b>] update_process_times+0x5b/0x70
	 [<ffffffff8a109005>] tick_sched_handle.isra.21+0x25/0x60
	 [<ffffffff8a109b81>] tick_sched_timer+0x41/0x60
	 [<ffffffff8a0f7aa2>] __run_hrtimer+0x72/0x470
	 [<ffffffff8a109b40>] ? tick_sched_do_timer+0xb0/0xb0
	 [<ffffffff8a0f8707>] hrtimer_interrupt+0x117/0x270
	 [<ffffffff8a034357>] local_apic_timer_interrupt+0x37/0x60
	 [<ffffffff8a80010f>] smp_apic_timer_interrupt+0x3f/0x50
	 [<ffffffff8a7fe52f>] apic_timer_interrupt+0x6f/0x80

To fix this we force non-lazy irq works to run on irq work self-IPIs
when available. That ability of the arch to trigger irq work self IPIs
is available with arch_irq_work_has_interrupt().

Reported-by: Catalin Iacob <iacobcatalin@gmail.com>
Reported-by: Dave Jones <davej@redhat.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/linux/irq_work.h |  1 +
 kernel/irq_work.c        | 15 +++++++++++++--
 kernel/time/timer.c      |  2 +-
 3 files changed, 15 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h
index 6b47b2ede405..bf3fe719c7ce 100644
--- a/include/linux/irq_work.h
+++ b/include/linux/irq_work.h
@@ -39,6 +39,7 @@ bool irq_work_queue_on(struct irq_work *work, int cpu);
 #endif
 
 void irq_work_run(void);
+void irq_work_tick(void);
 void irq_work_sync(struct irq_work *work);
 
 #ifdef CONFIG_IRQ_WORK
diff --git a/kernel/irq_work.c b/kernel/irq_work.c
index e6bcbe756663..385b85aded19 100644
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -115,8 +115,10 @@ bool irq_work_needs_cpu(void)
 
 	raised = &__get_cpu_var(raised_list);
 	lazy = &__get_cpu_var(lazy_list);
-	if (llist_empty(raised) && llist_empty(lazy))
-		return false;
+
+	if (llist_empty(raised) || arch_irq_work_has_interrupt())
+		if (llist_empty(lazy))
+			return false;
 
 	/* All work should have been flushed before going offline */
 	WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
@@ -171,6 +173,15 @@ void irq_work_run(void)
 }
 EXPORT_SYMBOL_GPL(irq_work_run);
 
+void irq_work_tick(void)
+{
+	struct llist_head *raised = &__get_cpu_var(raised_list);
+
+	if (!llist_empty(raised) && !arch_irq_work_has_interrupt())
+		irq_work_run_list(raised);
+	irq_work_run_list(&__get_cpu_var(lazy_list));
+}
+
 /*
  * Synchronize against the irq_work @entry, ensures the entry is not
  * currently in use.
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index aca5dfe2fa3d..9bbb8344ed3b 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1385,7 +1385,7 @@ void update_process_times(int user_tick)
 	rcu_check_callbacks(cpu, user_tick);
 #ifdef CONFIG_IRQ_WORK
 	if (in_irq())
-		irq_work_run();
+		irq_work_tick();
 #endif
 	scheduler_tick();
 	run_posix_cpu_timers(p);
-- 
cgit v1.2.3


From 23d0db76ffa13ffb95229946e4648568c3c29db5 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sat, 13 Sep 2014 11:24:03 -0700
Subject: Make hash_64() use a 64-bit multiply when appropriate

The hash_64() function historically does the multiply by the
GOLDEN_RATIO_PRIME_64 number with explicit shifts and adds, because
unlike the 32-bit case, gcc seems unable to turn the constant multiply
into the more appropriate shift and adds when required.

However, that means that we generate those shifts and adds even when the
architecture has a fast multiplier, and could just do it better in
hardware.

Use the now-cleaned-up CONFIG_ARCH_HAS_FAST_MULTIPLIER (together with
"is it a 64-bit architecture") to decide whether to use an integer
multiply or the explicit sequence of shift/add instructions.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hash.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/hash.h b/include/linux/hash.h
index bd1754c7ecef..d0494c399392 100644
--- a/include/linux/hash.h
+++ b/include/linux/hash.h
@@ -37,6 +37,9 @@ static __always_inline u64 hash_64(u64 val, unsigned int bits)
 {
 	u64 hash = val;
 
+#if defined(CONFIG_ARCH_HAS_FAST_MULTIPLIER) && BITS_PER_LONG == 64
+	hash = hash * GOLDEN_RATIO_PRIME_64;
+#else
 	/*  Sigh, gcc can't optimise this alone like it does for 32 bits. */
 	u64 n = hash;
 	n <<= 18;
@@ -51,6 +54,7 @@ static __always_inline u64 hash_64(u64 val, unsigned int bits)
 	hash += n;
 	n <<= 2;
 	hash += n;
+#endif
 
 	/* High bits are more random, so use them. */
 	return hash >> (64 - bits);
-- 
cgit v1.2.3


From 6c555490e0ce885a9caf0a045db69382a3ccbc9c Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Thu, 11 Sep 2014 15:35:09 -0700
Subject: ipv6: drop useless rcu_read_lock() in anycast

These code is now protected by rtnl lock, rcu read lock
is useless now.

Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  4 ++--
 net/core/dev.c            | 14 ++++++++------
 net/ipv6/anycast.c        | 18 ++++++------------
 3 files changed, 16 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ae721f53739e..ee38b948d9a0 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2083,8 +2083,8 @@ void __dev_remove_pack(struct packet_type *pt);
 void dev_add_offload(struct packet_offload *po);
 void dev_remove_offload(struct packet_offload *po);
 
-struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short flags,
-					unsigned short mask);
+struct net_device *__dev_get_by_flags(struct net *net, unsigned short flags,
+				      unsigned short mask);
 struct net_device *dev_get_by_name(struct net *net, const char *name);
 struct net_device *dev_get_by_name_rcu(struct net *net, const char *name);
 struct net_device *__dev_get_by_name(struct net *net, const char *name);
diff --git a/net/core/dev.c b/net/core/dev.c
index b3d6dbc0c696..e916ba8caccf 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -897,23 +897,25 @@ struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
 EXPORT_SYMBOL(dev_getfirstbyhwtype);
 
 /**
- *	dev_get_by_flags_rcu - find any device with given flags
+ *	__dev_get_by_flags - find any device with given flags
  *	@net: the applicable net namespace
  *	@if_flags: IFF_* values
  *	@mask: bitmask of bits in if_flags to check
  *
  *	Search for any interface with the given flags. Returns NULL if a device
  *	is not found or a pointer to the device. Must be called inside
- *	rcu_read_lock(), and result refcount is unchanged.
+ *	rtnl_lock(), and result refcount is unchanged.
  */
 
-struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short if_flags,
-				    unsigned short mask)
+struct net_device *__dev_get_by_flags(struct net *net, unsigned short if_flags,
+				      unsigned short mask)
 {
 	struct net_device *dev, *ret;
 
+	ASSERT_RTNL();
+
 	ret = NULL;
-	for_each_netdev_rcu(net, dev) {
+	for_each_netdev(net, dev) {
 		if (((dev->flags ^ if_flags) & mask) == 0) {
 			ret = dev;
 			break;
@@ -921,7 +923,7 @@ struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short if_flags
 	}
 	return ret;
 }
-EXPORT_SYMBOL(dev_get_by_flags_rcu);
+EXPORT_SYMBOL(__dev_get_by_flags);
 
 /**
  *	dev_valid_name - check if name is okay for network device
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index ff2de7d9d8e6..3b0429bc6b5a 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -78,7 +78,6 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 	pac->acl_addr = *addr;
 
 	rtnl_lock();
-	rcu_read_lock();
 	if (ifindex == 0) {
 		struct rt6_info *rt;
 
@@ -91,11 +90,11 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 			goto error;
 		} else {
 			/* router, no matching interface: just pick one */
-			dev = dev_get_by_flags_rcu(net, IFF_UP,
-						   IFF_UP | IFF_LOOPBACK);
+			dev = __dev_get_by_flags(net, IFF_UP,
+						 IFF_UP | IFF_LOOPBACK);
 		}
 	} else
-		dev = dev_get_by_index_rcu(net, ifindex);
+		dev = __dev_get_by_index(net, ifindex);
 
 	if (dev == NULL) {
 		err = -ENODEV;
@@ -137,7 +136,6 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 	}
 
 error:
-	rcu_read_unlock();
 	rtnl_unlock();
 	if (pac)
 		sock_kfree_s(sk, pac, sizeof(*pac));
@@ -174,11 +172,9 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
 	spin_unlock_bh(&ipv6_sk_ac_lock);
 
 	rtnl_lock();
-	rcu_read_lock();
-	dev = dev_get_by_index_rcu(net, pac->acl_ifindex);
+	dev = __dev_get_by_index(net, pac->acl_ifindex);
 	if (dev)
 		ipv6_dev_ac_dec(dev, &pac->acl_addr);
-	rcu_read_unlock();
 	rtnl_unlock();
 
 	sock_kfree_s(sk, pac, sizeof(*pac));
@@ -203,12 +199,11 @@ void ipv6_sock_ac_close(struct sock *sk)
 
 	prev_index = 0;
 	rtnl_lock();
-	rcu_read_lock();
 	while (pac) {
 		struct ipv6_ac_socklist *next = pac->acl_next;
 
 		if (pac->acl_ifindex != prev_index) {
-			dev = dev_get_by_index_rcu(net, pac->acl_ifindex);
+			dev = __dev_get_by_index(net, pac->acl_ifindex);
 			prev_index = pac->acl_ifindex;
 		}
 		if (dev)
@@ -216,7 +211,6 @@ void ipv6_sock_ac_close(struct sock *sk)
 		sock_kfree_s(sk, pac, sizeof(*pac));
 		pac = next;
 	}
-	rcu_read_unlock();
 	rtnl_unlock();
 }
 
@@ -341,7 +335,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
 	return 0;
 }
 
-/* called with rcu_read_lock() */
+/* called with rtnl_lock() */
 static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr)
 {
 	struct inet6_dev *idev = __in6_dev_get(dev);
-- 
cgit v1.2.3


From 233577a22089facf5271ab5e845b2262047c971f Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Fri, 12 Sep 2014 14:04:43 +0200
Subject: net: filter: constify detection of pkt_type_offset

Currently we have 2 pkt_type_offset functions doing the same thing and
spread across the architecture files. Remove those and replace them
with a PKT_TYPE_OFFSET macro helper which gets the constant value from a
zero sized sk_buff member right in front of the bitfield with offsetof.
This new offset marker does not change size of struct sk_buff.

Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Markos Chandras <markos.chandras@imgtec.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Daniel Borkmann <dborkman@redhat.com>
Cc: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Signed-off-by: Denis Kirjanov <kda@linux-powerpc.org>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Acked-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/mips/net/bpf_jit.c      | 27 +--------------------------
 arch/s390/net/bpf_jit_comp.c | 35 +----------------------------------
 include/linux/skbuff.h       | 10 ++++++++++
 net/core/filter.c            | 31 ++-----------------------------
 4 files changed, 14 insertions(+), 89 deletions(-)

(limited to 'include/linux')

diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c
index 0e97ccd29fe3..7edc08398c4a 100644
--- a/arch/mips/net/bpf_jit.c
+++ b/arch/mips/net/bpf_jit.c
@@ -765,27 +765,6 @@ static u64 jit_get_skb_w(struct sk_buff *skb, unsigned offset)
 	return (u64)err << 32 | ntohl(ret);
 }
 
-#ifdef __BIG_ENDIAN_BITFIELD
-#define PKT_TYPE_MAX	(7 << 5)
-#else
-#define PKT_TYPE_MAX	7
-#endif
-static int pkt_type_offset(void)
-{
-	struct sk_buff skb_probe = {
-		.pkt_type = ~0,
-	};
-	u8 *ct = (u8 *)&skb_probe;
-	unsigned int off;
-
-	for (off = 0; off < sizeof(struct sk_buff); off++) {
-		if (ct[off] == PKT_TYPE_MAX)
-			return off;
-	}
-	pr_err_once("Please fix pkt_type_offset(), as pkt_type couldn't be found\n");
-	return -1;
-}
-
 static int build_body(struct jit_ctx *ctx)
 {
 	void *load_func[] = {jit_get_skb_b, jit_get_skb_h, jit_get_skb_w};
@@ -1332,11 +1311,7 @@ jmp_cmp:
 		case BPF_ANC | SKF_AD_PKTTYPE:
 			ctx->flags |= SEEN_SKB;
 
-			off = pkt_type_offset();
-
-			if (off < 0)
-				return -1;
-			emit_load_byte(r_tmp, r_skb, off, ctx);
+			emit_load_byte(r_tmp, r_skb, PKT_TYPE_OFFSET(), ctx);
 			/* Keep only the last 3 bits */
 			emit_andi(r_A, r_tmp, PKT_TYPE_MAX, ctx);
 #ifdef __BIG_ENDIAN_BITFIELD
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 555f5c7e83ab..c52ac77408ca 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -227,37 +227,6 @@ static void bpf_jit_epilogue(struct bpf_jit *jit)
 	EMIT2(0x07fe);
 }
 
-/* Helper to find the offset of pkt_type in sk_buff
- * Make sure its still a 3bit field starting at the MSBs within a byte.
- */
-#define PKT_TYPE_MAX 0xe0
-static int pkt_type_offset;
-
-static int __init bpf_pkt_type_offset_init(void)
-{
-	struct sk_buff skb_probe = {
-		.pkt_type = ~0,
-	};
-	char *ct = (char *)&skb_probe;
-	int off;
-
-	pkt_type_offset = -1;
-	for (off = 0; off < sizeof(struct sk_buff); off++) {
-		if (!ct[off])
-			continue;
-		if (ct[off] == PKT_TYPE_MAX)
-			pkt_type_offset = off;
-		else {
-			/* Found non matching bit pattern, fix needed. */
-			WARN_ON_ONCE(1);
-			pkt_type_offset = -1;
-			return -1;
-		}
-	}
-	return 0;
-}
-device_initcall(bpf_pkt_type_offset_init);
-
 /*
  * make sure we dont leak kernel information to user
  */
@@ -757,12 +726,10 @@ call_fn:	/* lg %r1,<d(function)>(%r13) */
 		}
 		break;
 	case BPF_ANC | SKF_AD_PKTTYPE:
-		if (pkt_type_offset < 0)
-			goto out;
 		/* lhi %r5,0 */
 		EMIT4(0xa7580000);
 		/* ic %r5,<d(pkt_type_offset)>(%r2) */
-		EMIT4_DISP(0x43502000, pkt_type_offset);
+		EMIT4_DISP(0x43502000, PKT_TYPE_OFFSET());
 		/* srl %r5,5 */
 		EMIT4_DISP(0x88500000, 5);
 		break;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 07c9fdd0c126..756e3d057e84 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -548,6 +548,16 @@ struct sk_buff {
 				ip_summed:2,
 				nohdr:1,
 				nfctinfo:3;
+
+/* if you move pkt_type around you also must adapt those constants */
+#ifdef __BIG_ENDIAN_BITFIELD
+#define PKT_TYPE_MAX	(7 << 5)
+#else
+#define PKT_TYPE_MAX	7
+#endif
+#define PKT_TYPE_OFFSET()	offsetof(struct sk_buff, __pkt_type_offset)
+
+	__u8			__pkt_type_offset[0];
 	__u8			pkt_type:3,
 				fclone:2,
 				ipvs_property:1,
diff --git a/net/core/filter.c b/net/core/filter.c
index dfc716ffa44b..601f28de7311 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -87,30 +87,6 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
 }
 EXPORT_SYMBOL(sk_filter);
 
-/* Helper to find the offset of pkt_type in sk_buff structure. We want
- * to make sure its still a 3bit field starting at a byte boundary;
- * taken from arch/x86/net/bpf_jit_comp.c.
- */
-#ifdef __BIG_ENDIAN_BITFIELD
-#define PKT_TYPE_MAX	(7 << 5)
-#else
-#define PKT_TYPE_MAX	7
-#endif
-static unsigned int pkt_type_offset(void)
-{
-	struct sk_buff skb_probe = { .pkt_type = ~0, };
-	u8 *ct = (u8 *) &skb_probe;
-	unsigned int off;
-
-	for (off = 0; off < sizeof(struct sk_buff); off++) {
-		if (ct[off] == PKT_TYPE_MAX)
-			return off;
-	}
-
-	pr_err_once("Please fix %s, as pkt_type couldn't be found!\n", __func__);
-	return -1;
-}
-
 static u64 __skb_get_pay_offset(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
 {
 	return skb_get_poff((struct sk_buff *)(unsigned long) ctx);
@@ -190,11 +166,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 		break;
 
 	case SKF_AD_OFF + SKF_AD_PKTTYPE:
-		*insn = BPF_LDX_MEM(BPF_B, BPF_REG_A, BPF_REG_CTX,
-				    pkt_type_offset());
-		if (insn->off < 0)
-			return false;
-		insn++;
+		*insn++ = BPF_LDX_MEM(BPF_B, BPF_REG_A, BPF_REG_CTX,
+				      PKT_TYPE_OFFSET());
 		*insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, PKT_TYPE_MAX);
 #ifdef __BIG_ENDIAN_BITFIELD
 		insn++;
-- 
cgit v1.2.3


From 3fc8867740b4a0bf56f372c6f5ddd14970962fb1 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Fri, 12 Sep 2014 23:12:46 -0700
Subject: netdevice: Support DSA tagging when DSA is built as a module

This change corrects an error seen when DSA tagging is built as a module.
Without this change it is not possible to get XDSA tagged frames as the
test for tagging is stripped by the #ifdef check.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ee38b948d9a0..f9e81d10a3b9 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1789,7 +1789,7 @@ void dev_net_set(struct net_device *dev, struct net *net)
 
 static inline bool netdev_uses_dsa(struct net_device *dev)
 {
-#ifdef CONFIG_NET_DSA
+#if IS_ENABLED(CONFIG_NET_DSA)
 	if (dev->dsa_ptr != NULL)
 		return dsa_uses_tagged_protocol(dev->dsa_ptr);
 #endif
-- 
cgit v1.2.3


From 9226b5b440f2b4fbb3b797f3cb74a9a627220660 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 14 Sep 2014 17:28:32 -0700
Subject: vfs: avoid non-forwarding large load after small store in path lookup

The performance regression that Josef Bacik reported in the pathname
lookup (see commit 99d263d4c5b2 "vfs: fix bad hashing of dentries") made
me look at performance stability of the dcache code, just to verify that
the problem was actually fixed.  That turned up a few other problems in
this area.

There are a few cases where we exit RCU lookup mode and go to the slow
serializing case when we shouldn't, Al has fixed those and they'll come
in with the next VFS pull.

But my performance verification also shows that link_path_walk() turns
out to have a very unfortunate 32-bit store of the length and hash of
the name we look up, followed by a 64-bit read of the combined hash_len
field.  That screws up the processor store to load forwarding, causing
an unnecessary hickup in this critical routine.

It's caused by the ugly calling convention for the "hash_name()"
function, and easily fixed by just making hash_name() fill in the whole
'struct qstr' rather than passing it a pointer to just the hash value.

With that, the profile for this function looks much smoother.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/namei.c             | 19 ++++++++++---------
 include/linux/dcache.h |  1 +
 2 files changed, 11 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namei.c b/fs/namei.c
index 229235862e50..2be5120b81b3 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1669,13 +1669,14 @@ EXPORT_SYMBOL(full_name_hash);
 
 /*
  * Calculate the length and hash of the path component, and
- * return the length of the component;
+ * fill in the qstr. return the "len" as the result.
  */
-static inline unsigned long hash_name(const char *name, unsigned int *hashp)
+static inline unsigned long hash_name(const char *name, struct qstr *res)
 {
 	unsigned long a, b, adata, bdata, mask, hash, len;
 	const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS;
 
+	res->name = name;
 	hash = a = 0;
 	len = -sizeof(unsigned long);
 	do {
@@ -1691,9 +1692,10 @@ static inline unsigned long hash_name(const char *name, unsigned int *hashp)
 	mask = create_zero_mask(adata | bdata);
 
 	hash += a & zero_bytemask(mask);
-	*hashp = fold_hash(hash);
+	len += find_zero(mask);
+	res->hash_len = hashlen_create(fold_hash(hash), len);
 
-	return len + find_zero(mask);
+	return len;
 }
 
 #else
@@ -1711,18 +1713,19 @@ EXPORT_SYMBOL(full_name_hash);
  * We know there's a real path component here of at least
  * one character.
  */
-static inline unsigned long hash_name(const char *name, unsigned int *hashp)
+static inline long hash_name(const char *name, struct qstr *res)
 {
 	unsigned long hash = init_name_hash();
 	unsigned long len = 0, c;
 
+	res->name = name;
 	c = (unsigned char)*name;
 	do {
 		len++;
 		hash = partial_name_hash(c, hash);
 		c = (unsigned char)name[len];
 	} while (c && c != '/');
-	*hashp = end_name_hash(hash);
+	res->hash_len = hashlen_create(end_name_hash(hash), len);
 	return len;
 }
 
@@ -1756,9 +1759,7 @@ static int link_path_walk(const char *name, struct nameidata *nd)
  		if (err)
 			break;
 
-		len = hash_name(name, &this.hash);
-		this.name = name;
-		this.len = len;
+		len = hash_name(name, &this);
 
 		type = LAST_NORM;
 		if (name[0] == '.') switch (len) {
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index e4ae2ad48d07..75a227cc7ce2 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -55,6 +55,7 @@ struct qstr {
 #define QSTR_INIT(n,l) { { { .len = l } }, .name = n }
 #define hashlen_hash(hashlen) ((u32) (hashlen))
 #define hashlen_len(hashlen)  ((u32)((hashlen) >> 32))
+#define hashlen_create(hash,len) (((u64)(len)<<32)|(u32)(hash))
 
 struct dentry_stat_t {
 	long nr_dentry;
-- 
cgit v1.2.3


From 1d46fea7d091f9dc2d4fd3fcb9f0117ca288f9a5 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Date: Wed, 27 Aug 2014 00:42:56 +0200
Subject: drm/rcar-du: Use struct videomode in platform data

In preparation for DT support where panel timings will be described by a
DRM-agnostic video mode, replace the struct drm_mode_modeinfo instance
in the panel platform data with a struct videomode.

Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
---
 arch/arm/mach-shmobile/board-koelsch-reference.c | 19 +++++++++----------
 arch/arm/mach-shmobile/board-koelsch.c           | 19 +++++++++----------
 arch/arm/mach-shmobile/board-lager-reference.c   | 19 +++++++++----------
 arch/arm/mach-shmobile/board-lager.c             | 19 +++++++++----------
 arch/arm/mach-shmobile/board-marzen.c            | 19 +++++++++----------
 drivers/gpu/drm/rcar-du/Kconfig                  |  1 +
 drivers/gpu/drm/rcar-du/rcar_du_lvdscon.c        | 15 +++------------
 include/linux/platform_data/rcar-du.h            |  4 ++--
 8 files changed, 51 insertions(+), 64 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-shmobile/board-koelsch-reference.c b/arch/arm/mach-shmobile/board-koelsch-reference.c
index 3ff88c138896..364e69bf85d4 100644
--- a/arch/arm/mach-shmobile/board-koelsch-reference.c
+++ b/arch/arm/mach-shmobile/board-koelsch-reference.c
@@ -41,16 +41,15 @@ static struct rcar_du_encoder_data koelsch_du_encoders[] = {
 			.width_mm = 210,
 			.height_mm = 158,
 			.mode = {
-				.clock = 65000,
-				.hdisplay = 1024,
-				.hsync_start = 1048,
-				.hsync_end = 1184,
-				.htotal = 1344,
-				.vdisplay = 768,
-				.vsync_start = 771,
-				.vsync_end = 777,
-				.vtotal = 806,
-				.flags = 0,
+				.pixelclock = 65000000,
+				.hactive = 1024,
+				.hfront_porch = 20,
+				.hback_porch = 160,
+				.hsync_len = 136,
+				.vactive = 768,
+				.vfront_porch = 3,
+				.vback_porch = 29,
+				.vsync_len = 6,
 			},
 		},
 	},
diff --git a/arch/arm/mach-shmobile/board-koelsch.c b/arch/arm/mach-shmobile/board-koelsch.c
index b7d5bc7659cd..ad10ddb6a321 100644
--- a/arch/arm/mach-shmobile/board-koelsch.c
+++ b/arch/arm/mach-shmobile/board-koelsch.c
@@ -63,16 +63,15 @@ static struct rcar_du_encoder_data koelsch_du_encoders[] = {
 			.width_mm = 210,
 			.height_mm = 158,
 			.mode = {
-				.clock = 65000,
-				.hdisplay = 1024,
-				.hsync_start = 1048,
-				.hsync_end = 1184,
-				.htotal = 1344,
-				.vdisplay = 768,
-				.vsync_start = 771,
-				.vsync_end = 777,
-				.vtotal = 806,
-				.flags = 0,
+				.pixelclock = 65000000,
+				.hactive = 1024,
+				.hfront_porch = 20,
+				.hback_porch = 160,
+				.hsync_len = 136,
+				.vactive = 768,
+				.vfront_porch = 3,
+				.vback_porch = 29,
+				.vsync_len = 6,
 			},
 		},
 	},
diff --git a/arch/arm/mach-shmobile/board-lager-reference.c b/arch/arm/mach-shmobile/board-lager-reference.c
index 41c808e56005..12a53a1c3d02 100644
--- a/arch/arm/mach-shmobile/board-lager-reference.c
+++ b/arch/arm/mach-shmobile/board-lager-reference.c
@@ -43,16 +43,15 @@ static struct rcar_du_encoder_data lager_du_encoders[] = {
 			.width_mm = 210,
 			.height_mm = 158,
 			.mode = {
-				.clock = 65000,
-				.hdisplay = 1024,
-				.hsync_start = 1048,
-				.hsync_end = 1184,
-				.htotal = 1344,
-				.vdisplay = 768,
-				.vsync_start = 771,
-				.vsync_end = 777,
-				.vtotal = 806,
-				.flags = 0,
+				.pixelclock = 65000000,
+				.hactive = 1024,
+				.hfront_porch = 20,
+				.hback_porch = 160,
+				.hsync_len = 136,
+				.vactive = 768,
+				.vfront_porch = 3,
+				.vback_porch = 29,
+				.vsync_len = 6,
 			},
 		},
 	},
diff --git a/arch/arm/mach-shmobile/board-lager.c b/arch/arm/mach-shmobile/board-lager.c
index e1d8215da0b0..80576c2ee668 100644
--- a/arch/arm/mach-shmobile/board-lager.c
+++ b/arch/arm/mach-shmobile/board-lager.c
@@ -99,16 +99,15 @@ static struct rcar_du_encoder_data lager_du_encoders[] = {
 			.width_mm = 210,
 			.height_mm = 158,
 			.mode = {
-				.clock = 65000,
-				.hdisplay = 1024,
-				.hsync_start = 1048,
-				.hsync_end = 1184,
-				.htotal = 1344,
-				.vdisplay = 768,
-				.vsync_start = 771,
-				.vsync_end = 777,
-				.vtotal = 806,
-				.flags = 0,
+				.pixelclock = 65000000,
+				.hactive = 1024,
+				.hfront_porch = 20,
+				.hback_porch = 160,
+				.hsync_len = 136,
+				.vactive = 768,
+				.vfront_porch = 3,
+				.vback_porch = 29,
+				.vsync_len = 6,
 			},
 		},
 	},
diff --git a/arch/arm/mach-shmobile/board-marzen.c b/arch/arm/mach-shmobile/board-marzen.c
index e5cf4201e769..ce33d7825c49 100644
--- a/arch/arm/mach-shmobile/board-marzen.c
+++ b/arch/arm/mach-shmobile/board-marzen.c
@@ -192,16 +192,15 @@ static struct rcar_du_encoder_data du_encoders[] = {
 			.width_mm = 210,
 			.height_mm = 158,
 			.mode = {
-				.clock = 65000,
-				.hdisplay = 1024,
-				.hsync_start = 1048,
-				.hsync_end = 1184,
-				.htotal = 1344,
-				.vdisplay = 768,
-				.vsync_start = 771,
-				.vsync_end = 777,
-				.vtotal = 806,
-				.flags = 0,
+				.pixelclock = 65000000,
+				.hactive = 1024,
+				.hfront_porch = 20,
+				.hback_porch = 160,
+				.hsync_len = 136,
+				.vactive = 768,
+				.vfront_porch = 3,
+				.vback_porch = 29,
+				.vsync_len = 6,
 			},
 		},
 	},
diff --git a/drivers/gpu/drm/rcar-du/Kconfig b/drivers/gpu/drm/rcar-du/Kconfig
index 2e3d7b5b0ad7..c96f6089f8bf 100644
--- a/drivers/gpu/drm/rcar-du/Kconfig
+++ b/drivers/gpu/drm/rcar-du/Kconfig
@@ -6,6 +6,7 @@ config DRM_RCAR_DU
 	select DRM_KMS_CMA_HELPER
 	select DRM_GEM_CMA_HELPER
 	select DRM_KMS_FB_HELPER
+	select VIDEOMODE_HELPERS
 	help
 	  Choose this option if you have an R-Car chipset.
 	  If M is selected the module will be called rcar-du-drm.
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_lvdscon.c b/drivers/gpu/drm/rcar-du/rcar_du_lvdscon.c
index cfcf6e74ad0a..d29544121658 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_lvdscon.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_lvdscon.c
@@ -40,18 +40,9 @@ static int rcar_du_lvds_connector_get_modes(struct drm_connector *connector)
 		return 0;
 
 	mode->type = DRM_MODE_TYPE_PREFERRED | DRM_MODE_TYPE_DRIVER;
-	mode->clock = lvdscon->panel->mode.clock;
-	mode->hdisplay = lvdscon->panel->mode.hdisplay;
-	mode->hsync_start = lvdscon->panel->mode.hsync_start;
-	mode->hsync_end = lvdscon->panel->mode.hsync_end;
-	mode->htotal = lvdscon->panel->mode.htotal;
-	mode->vdisplay = lvdscon->panel->mode.vdisplay;
-	mode->vsync_start = lvdscon->panel->mode.vsync_start;
-	mode->vsync_end = lvdscon->panel->mode.vsync_end;
-	mode->vtotal = lvdscon->panel->mode.vtotal;
-	mode->flags = lvdscon->panel->mode.flags;
-
-	drm_mode_set_name(mode);
+
+	drm_display_mode_from_videomode(&lvdscon->panel->mode, mode);
+
 	drm_mode_probed_add(connector, mode);
 
 	return 1;
diff --git a/include/linux/platform_data/rcar-du.h b/include/linux/platform_data/rcar-du.h
index 1a2e9901a22e..a5f045e1d8fe 100644
--- a/include/linux/platform_data/rcar-du.h
+++ b/include/linux/platform_data/rcar-du.h
@@ -14,7 +14,7 @@
 #ifndef __RCAR_DU_H__
 #define __RCAR_DU_H__
 
-#include <drm/drm_mode.h>
+#include <video/videomode.h>
 
 enum rcar_du_output {
 	RCAR_DU_OUTPUT_DPAD0,
@@ -35,7 +35,7 @@ enum rcar_du_encoder_type {
 struct rcar_du_panel_data {
 	unsigned int width_mm;		/* Panel width in mm */
 	unsigned int height_mm;		/* Panel height in mm */
-	struct drm_mode_modeinfo mode;
+	struct videomode mode;
 };
 
 struct rcar_du_connector_lvds_data {
-- 
cgit v1.2.3


From 0e9871e3f79fd17c691b50a9669220c54ff084a2 Mon Sep 17 00:00:00 2001
From: Anton Danilov <littlesmilingcloud@gmail.com>
Date: Thu, 28 Aug 2014 10:11:27 +0400
Subject: netfilter: ipset: Add skbinfo extension kernel support in the ipset
 core.

Skbinfo extension provides mapping of metainformation with lookup in the ipset tables.
This patch defines the flags, the constants, the functions and the structures
for the data type independent support of the extension.
Note the firewall mark stores in the kernel structures as two 32bit values,
but transfered through netlink as one 64bit value.

Signed-off-by: Anton Danilov <littlesmilingcloud@gmail.com>
Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set.h      | 56 ++++++++++++++++++++++++++++-
 include/uapi/linux/netfilter/ipset/ip_set.h | 12 +++++++
 net/netfilter/ipset/ip_set_core.c           | 27 +++++++++++++-
 3 files changed, 93 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 96afc29184be..b97aac5142ed 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -57,6 +57,8 @@ enum ip_set_extension {
 	IPSET_EXT_COUNTER = (1 << IPSET_EXT_BIT_COUNTER),
 	IPSET_EXT_BIT_COMMENT = 2,
 	IPSET_EXT_COMMENT = (1 << IPSET_EXT_BIT_COMMENT),
+	IPSET_EXT_BIT_SKBINFO = 3,
+	IPSET_EXT_SKBINFO = (1 << IPSET_EXT_BIT_SKBINFO),
 	/* Mark set with an extension which needs to call destroy */
 	IPSET_EXT_BIT_DESTROY = 7,
 	IPSET_EXT_DESTROY = (1 << IPSET_EXT_BIT_DESTROY),
@@ -65,12 +67,14 @@ enum ip_set_extension {
 #define SET_WITH_TIMEOUT(s)	((s)->extensions & IPSET_EXT_TIMEOUT)
 #define SET_WITH_COUNTER(s)	((s)->extensions & IPSET_EXT_COUNTER)
 #define SET_WITH_COMMENT(s)	((s)->extensions & IPSET_EXT_COMMENT)
+#define SET_WITH_SKBINFO(s)	((s)->extensions & IPSET_EXT_SKBINFO)
 #define SET_WITH_FORCEADD(s)	((s)->flags & IPSET_CREATE_FLAG_FORCEADD)
 
 /* Extension id, in size order */
 enum ip_set_ext_id {
 	IPSET_EXT_ID_COUNTER = 0,
 	IPSET_EXT_ID_TIMEOUT,
+	IPSET_EXT_ID_SKBINFO,
 	IPSET_EXT_ID_COMMENT,
 	IPSET_EXT_ID_MAX,
 };
@@ -92,6 +96,10 @@ struct ip_set_ext {
 	u64 packets;
 	u64 bytes;
 	u32 timeout;
+	u32 skbmark;
+	u32 skbmarkmask;
+	u32 skbprio;
+	u16 skbqueue;
 	char *comment;
 };
 
@@ -104,6 +112,13 @@ struct ip_set_comment {
 	char *str;
 };
 
+struct ip_set_skbinfo {
+	u32 skbmark;
+	u32 skbmarkmask;
+	u32 skbprio;
+	u16 skbqueue;
+};
+
 struct ip_set;
 
 #define ext_timeout(e, s)	\
@@ -112,7 +127,8 @@ struct ip_set;
 (struct ip_set_counter *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_COUNTER])
 #define ext_comment(e, s)	\
 (struct ip_set_comment *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_COMMENT])
-
+#define ext_skbinfo(e, s)	\
+(struct ip_set_skbinfo *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_SKBINFO])
 
 typedef int (*ipset_adtfn)(struct ip_set *set, void *value,
 			   const struct ip_set_ext *ext,
@@ -256,6 +272,8 @@ ip_set_put_flags(struct sk_buff *skb, struct ip_set *set)
 		cadt_flags |= IPSET_FLAG_WITH_COUNTERS;
 	if (SET_WITH_COMMENT(set))
 		cadt_flags |= IPSET_FLAG_WITH_COMMENT;
+	if (SET_WITH_SKBINFO(set))
+		cadt_flags |= IPSET_FLAG_WITH_SKBINFO;
 	if (SET_WITH_FORCEADD(set))
 		cadt_flags |= IPSET_FLAG_WITH_FORCEADD;
 
@@ -304,6 +322,39 @@ ip_set_update_counter(struct ip_set_counter *counter,
 	}
 }
 
+static inline void
+ip_set_get_skbinfo(struct ip_set_skbinfo *skbinfo,
+		      const struct ip_set_ext *ext,
+		      struct ip_set_ext *mext, u32 flags)
+{
+		mext->skbmark = skbinfo->skbmark;
+		mext->skbmarkmask = skbinfo->skbmarkmask;
+		mext->skbprio = skbinfo->skbprio;
+		mext->skbqueue = skbinfo->skbqueue;
+}
+static inline bool
+ip_set_put_skbinfo(struct sk_buff *skb, struct ip_set_skbinfo *skbinfo)
+{
+	return nla_put_net64(skb, IPSET_ATTR_SKBMARK,
+			     cpu_to_be64((u64)skbinfo->skbmark << 32 |
+					 skbinfo->skbmarkmask)) ||
+	       nla_put_net32(skb, IPSET_ATTR_SKBPRIO,
+			     cpu_to_be32(skbinfo->skbprio)) ||
+	       nla_put_net16(skb, IPSET_ATTR_SKBQUEUE,
+			     cpu_to_be16(skbinfo->skbqueue));
+
+}
+
+static inline void
+ip_set_init_skbinfo(struct ip_set_skbinfo *skbinfo,
+		    const struct ip_set_ext *ext)
+{
+	skbinfo->skbmark = ext->skbmark;
+	skbinfo->skbmarkmask = ext->skbmarkmask;
+	skbinfo->skbprio = ext->skbprio;
+	skbinfo->skbqueue = ext->skbqueue;
+}
+
 static inline bool
 ip_set_put_counter(struct sk_buff *skb, struct ip_set_counter *counter)
 {
@@ -497,6 +548,9 @@ ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set,
 	if (SET_WITH_COMMENT(set) &&
 	    ip_set_put_comment(skb, ext_comment(e, set)))
 		return -EMSGSIZE;
+	if (SET_WITH_SKBINFO(set) &&
+	    ip_set_put_skbinfo(skb, ext_skbinfo(e, set)))
+		return -EMSGSIZE;
 	return 0;
 }
 
diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h
index 78c2f2e79920..ca03119111a2 100644
--- a/include/uapi/linux/netfilter/ipset/ip_set.h
+++ b/include/uapi/linux/netfilter/ipset/ip_set.h
@@ -115,6 +115,9 @@ enum {
 	IPSET_ATTR_BYTES,
 	IPSET_ATTR_PACKETS,
 	IPSET_ATTR_COMMENT,
+	IPSET_ATTR_SKBMARK,
+	IPSET_ATTR_SKBPRIO,
+	IPSET_ATTR_SKBQUEUE,
 	__IPSET_ATTR_ADT_MAX,
 };
 #define IPSET_ATTR_ADT_MAX	(__IPSET_ATTR_ADT_MAX - 1)
@@ -147,6 +150,7 @@ enum ipset_errno {
 	IPSET_ERR_COUNTER,
 	IPSET_ERR_COMMENT,
 	IPSET_ERR_INVALID_MARKMASK,
+	IPSET_ERR_SKBINFO,
 
 	/* Type specific error codes */
 	IPSET_ERR_TYPE_SPECIFIC = 4352,
@@ -170,6 +174,12 @@ enum ipset_cmd_flags {
 	IPSET_FLAG_MATCH_COUNTERS = (1 << IPSET_FLAG_BIT_MATCH_COUNTERS),
 	IPSET_FLAG_BIT_RETURN_NOMATCH = 7,
 	IPSET_FLAG_RETURN_NOMATCH = (1 << IPSET_FLAG_BIT_RETURN_NOMATCH),
+	IPSET_FLAG_BIT_MAP_SKBMARK = 8,
+	IPSET_FLAG_MAP_SKBMARK = (1 << IPSET_FLAG_BIT_MAP_SKBMARK),
+	IPSET_FLAG_BIT_MAP_SKBPRIO = 9,
+	IPSET_FLAG_MAP_SKBPRIO = (1 << IPSET_FLAG_BIT_MAP_SKBPRIO),
+	IPSET_FLAG_BIT_MAP_SKBQUEUE = 10,
+	IPSET_FLAG_MAP_SKBQUEUE = (1 << IPSET_FLAG_BIT_MAP_SKBQUEUE),
 	IPSET_FLAG_CMD_MAX = 15,
 };
 
@@ -187,6 +197,8 @@ enum ipset_cadt_flags {
 	IPSET_FLAG_WITH_COMMENT = (1 << IPSET_FLAG_BIT_WITH_COMMENT),
 	IPSET_FLAG_BIT_WITH_FORCEADD = 5,
 	IPSET_FLAG_WITH_FORCEADD = (1 << IPSET_FLAG_BIT_WITH_FORCEADD),
+	IPSET_FLAG_BIT_WITH_SKBINFO = 6,
+	IPSET_FLAG_WITH_SKBINFO = (1 << IPSET_FLAG_BIT_WITH_SKBINFO),
 	IPSET_FLAG_CADT_MAX	= 15,
 };
 
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 4ca4e5ca6f57..26c795e6b57f 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -337,6 +337,12 @@ const struct ip_set_ext_type ip_set_extensions[] = {
 		.len	= sizeof(unsigned long),
 		.align	= __alignof__(unsigned long),
 	},
+	[IPSET_EXT_ID_SKBINFO] = {
+		.type	= IPSET_EXT_SKBINFO,
+		.flag	= IPSET_FLAG_WITH_SKBINFO,
+		.len	= sizeof(struct ip_set_skbinfo),
+		.align	= __alignof__(struct ip_set_skbinfo),
+	},
 	[IPSET_EXT_ID_COMMENT] = {
 		.type	 = IPSET_EXT_COMMENT | IPSET_EXT_DESTROY,
 		.flag	 = IPSET_FLAG_WITH_COMMENT,
@@ -382,6 +388,7 @@ int
 ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
 		      struct ip_set_ext *ext)
 {
+	u64 fullmark;
 	if (tb[IPSET_ATTR_TIMEOUT]) {
 		if (!(set->extensions & IPSET_EXT_TIMEOUT))
 			return -IPSET_ERR_TIMEOUT;
@@ -402,7 +409,25 @@ ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
 			return -IPSET_ERR_COMMENT;
 		ext->comment = ip_set_comment_uget(tb[IPSET_ATTR_COMMENT]);
 	}
-
+	if (tb[IPSET_ATTR_SKBMARK]) {
+		if (!(set->extensions & IPSET_EXT_SKBINFO))
+			return -IPSET_ERR_SKBINFO;
+		fullmark = be64_to_cpu(nla_get_be64(tb[IPSET_ATTR_SKBMARK]));
+		ext->skbmark = fullmark >> 32;
+		ext->skbmarkmask = fullmark & 0xffffffff;
+	}
+	if (tb[IPSET_ATTR_SKBPRIO]) {
+		if (!(set->extensions & IPSET_EXT_SKBINFO))
+			return -IPSET_ERR_SKBINFO;
+		ext->skbprio = be32_to_cpu(nla_get_be32(
+					    tb[IPSET_ATTR_SKBPRIO]));
+	}
+	if (tb[IPSET_ATTR_SKBQUEUE]) {
+		if (!(set->extensions & IPSET_EXT_SKBINFO))
+			return -IPSET_ERR_SKBINFO;
+		ext->skbqueue = be16_to_cpu(nla_get_be16(
+					    tb[IPSET_ATTR_SKBQUEUE]));
+	}
 	return 0;
 }
 EXPORT_SYMBOL_GPL(ip_set_get_extensions);
-- 
cgit v1.2.3


From aef96193fe7b2791c4a3b19fe75426b929769471 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Mon, 15 Sep 2014 17:30:54 +0200
Subject: netfilter: ipset: send nonzero skbinfo extensions only

Do not send zero valued skbinfo extensions to userspace at listing.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set.h | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index b97aac5142ed..f1606fa6132d 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -335,13 +335,17 @@ ip_set_get_skbinfo(struct ip_set_skbinfo *skbinfo,
 static inline bool
 ip_set_put_skbinfo(struct sk_buff *skb, struct ip_set_skbinfo *skbinfo)
 {
-	return nla_put_net64(skb, IPSET_ATTR_SKBMARK,
-			     cpu_to_be64((u64)skbinfo->skbmark << 32 |
-					 skbinfo->skbmarkmask)) ||
-	       nla_put_net32(skb, IPSET_ATTR_SKBPRIO,
-			     cpu_to_be32(skbinfo->skbprio)) ||
-	       nla_put_net16(skb, IPSET_ATTR_SKBQUEUE,
-			     cpu_to_be16(skbinfo->skbqueue));
+	/* Send nonzero parameters only */
+	return ((skbinfo->skbmark || skbinfo->skbmarkmask) &&
+		nla_put_net64(skb, IPSET_ATTR_SKBMARK,
+			      cpu_to_be64((u64)skbinfo->skbmark << 32 |
+					  skbinfo->skbmarkmask))) ||
+	       (skbinfo->skbprio &&
+	        nla_put_net32(skb, IPSET_ATTR_SKBPRIO,
+			      cpu_to_be32(skbinfo->skbprio))) ||
+	       (skbinfo->skbqueue &&
+	        nla_put_net16(skb, IPSET_ATTR_SKBQUEUE,
+			     cpu_to_be16(skbinfo->skbqueue)));
 
 }
 
-- 
cgit v1.2.3


From 5075314e4e4b559cc37675ad8a721a89bccd6284 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Mon, 15 Sep 2014 13:00:19 -0400
Subject: dsa: Split ops up, and avoid assigning tag_protocol and receive
 separately

This change addresses several issues.

First, it was possible to set tag_protocol without setting the ops pointer.
To correct that I have reordered things so that rcv is now populated before
we set tag_protocol.

Second, it didn't make much sense to keep setting the device ops each time a
new slave was registered.  So by moving the receive portion out into root
switch initialization that issue should be addressed.

Third, I wanted to avoid sending tags if the rcv pointer was not registered
so I changed the tag check to verify if the rcv function pointer is set on
the root tree.  If it is then we start sending DSA tagged frames.

Finally I split the device ops pointer in the structures into two spots.  I
placed the rcv function pointer in the root switch since this makes it
easiest to access from there, and I placed the xmit function pointer in the
slave for the same reason.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  7 -------
 include/net/dsa.h         | 10 ++++++----
 net/dsa/dsa.c             | 32 ++++++++++++++++++++++++++++----
 net/dsa/dsa_priv.h        | 11 ++++++++++-
 net/dsa/slave.c           | 37 +++++++++++++++----------------------
 net/dsa/tag_brcm.c        |  1 -
 net/dsa/tag_dsa.c         |  1 -
 net/dsa/tag_edsa.c        |  1 -
 net/dsa/tag_trailer.c     |  1 -
 9 files changed, 59 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f9e81d10a3b9..28d4378615e5 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1928,13 +1928,6 @@ struct udp_offload {
 	struct offload_callbacks callbacks;
 };
 
-struct dsa_device_ops {
-	netdev_tx_t (*xmit)(struct sk_buff *skb, struct net_device *dev);
-	int (*rcv)(struct sk_buff *skb, struct net_device *dev,
-		   struct packet_type *pt, struct net_device *orig_dev);
-};
-
-
 /* often modified stats are per cpu, other are shared (netdev->stats) */
 struct pcpu_sw_netstats {
 	u64     rx_packets;
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 8a8a5d976f97..a55c4e6a4f0f 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -77,7 +77,7 @@ struct dsa_platform_data {
 	struct dsa_chip_data	*chip;
 };
 
-struct dsa_device_ops;
+struct packet_type;
 
 struct dsa_switch_tree {
 	/*
@@ -91,7 +91,10 @@ struct dsa_switch_tree {
 	 * protocol to use.
 	 */
 	struct net_device	*master_netdev;
-	const struct dsa_device_ops	*ops;
+	int			(*rcv)(struct sk_buff *skb,
+				       struct net_device *dev,
+				       struct packet_type *pt,
+				       struct net_device *orig_dev);
 	enum dsa_tag_protocol	tag_protocol;
 
 	/*
@@ -218,7 +221,6 @@ static inline void *ds_to_priv(struct dsa_switch *ds)
 
 static inline bool dsa_uses_tagged_protocol(struct dsa_switch_tree *dst)
 {
-	return dst->tag_protocol != DSA_TAG_PROTO_NONE;
+	return dst->rcv != NULL;
 }
-
 #endif
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 61f145c44555..1df0a7cf1e9e 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -10,7 +10,6 @@
  */
 
 #include <linux/list.h>
-#include <linux/netdevice.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/module.h>
@@ -154,9 +153,34 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index,
 	 * tagging protocol to the preferred tagging format of this
 	 * switch.
 	 */
-	if (ds->dst->cpu_switch == index)
-		ds->dst->tag_protocol = drv->tag_protocol;
+	if (dst->cpu_switch == index) {
+		switch (drv->tag_protocol) {
+#ifdef CONFIG_NET_DSA_TAG_DSA
+		case DSA_TAG_PROTO_DSA:
+			dst->rcv = dsa_netdev_ops.rcv;
+			break;
+#endif
+#ifdef CONFIG_NET_DSA_TAG_EDSA
+		case DSA_TAG_PROTO_EDSA:
+			dst->rcv = edsa_netdev_ops.rcv;
+			break;
+#endif
+#ifdef CONFIG_NET_DSA_TAG_TRAILER
+		case DSA_TAG_PROTO_TRAILER:
+			dst->rcv = trailer_netdev_ops.rcv;
+			break;
+#endif
+#ifdef CONFIG_NET_DSA_TAG_BRCM
+		case DSA_TAG_PROTO_BRCM:
+			dst->rcv = brcm_netdev_ops.rcv;
+			break;
+#endif
+		default:
+			break;
+		}
 
+		dst->tag_protocol = drv->tag_protocol;
+	}
 
 	/*
 	 * Do basic register setup.
@@ -626,7 +650,7 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
 		return 0;
 	}
 
-	return dst->ops->rcv(skb, dev, pt, orig_dev);
+	return dst->rcv(skb, dev, pt, orig_dev);
 }
 
 static struct packet_type dsa_pack_type __read_mostly = {
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 98afed4d92ba..f90899e8ab5a 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -12,7 +12,13 @@
 #define __DSA_PRIV_H
 
 #include <linux/phy.h>
-#include <net/dsa.h>
+#include <linux/netdevice.h>
+
+struct dsa_device_ops {
+	netdev_tx_t (*xmit)(struct sk_buff *skb, struct net_device *dev);
+	int (*rcv)(struct sk_buff *skb, struct net_device *dev,
+		   struct packet_type *pt, struct net_device *orig_dev);
+};
 
 struct dsa_slave_priv {
 	/*
@@ -20,6 +26,8 @@ struct dsa_slave_priv {
 	 * switch port.
 	 */
 	struct net_device	*dev;
+	netdev_tx_t		(*xmit)(struct sk_buff *skb,
+					struct net_device *dev);
 
 	/*
 	 * Which switch this port is a part of, and the port index
@@ -43,6 +51,7 @@ struct dsa_slave_priv {
 extern char dsa_driver_version[];
 
 /* slave.c */
+extern const struct dsa_device_ops notag_netdev_ops;
 void dsa_slave_mii_bus_init(struct dsa_switch *ds);
 struct net_device *dsa_slave_create(struct dsa_switch *ds,
 				    struct device *parent,
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 809eeb13eb12..e38a331111c0 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -9,7 +9,6 @@
  */
 
 #include <linux/list.h>
-#include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/phy.h>
 #include <linux/of_net.h>
@@ -176,9 +175,8 @@ static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct dsa_slave_priv *p = netdev_priv(dev);
-	struct dsa_switch_tree *dst = p->parent->dst;
 
-	return dst->ops->xmit(skb, dev);
+	return p->xmit(skb, dev);
 }
 
 static netdev_tx_t dsa_slave_notag_xmit(struct sk_buff *skb,
@@ -325,11 +323,6 @@ static const struct net_device_ops dsa_slave_netdev_ops = {
 	.ndo_do_ioctl		= dsa_slave_ioctl,
 };
 
-static const struct dsa_device_ops notag_netdev_ops = {
-	.xmit	= dsa_slave_notag_xmit,
-	.rcv	= NULL,
-};
-
 static void dsa_slave_adjust_link(struct net_device *dev)
 {
 	struct dsa_slave_priv *p = netdev_priv(dev);
@@ -435,41 +428,41 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent,
 	slave_dev->tx_queue_len = 0;
 	slave_dev->netdev_ops = &dsa_slave_netdev_ops;
 
+	SET_NETDEV_DEV(slave_dev, parent);
+	slave_dev->dev.of_node = ds->pd->port_dn[port];
+	slave_dev->vlan_features = master->vlan_features;
+
+	p = netdev_priv(slave_dev);
+	p->dev = slave_dev;
+	p->parent = ds;
+	p->port = port;
+
 	switch (ds->dst->tag_protocol) {
 #ifdef CONFIG_NET_DSA_TAG_DSA
 	case DSA_TAG_PROTO_DSA:
-		ds->dst->ops = &dsa_netdev_ops;
+		p->xmit = dsa_netdev_ops.xmit;
 		break;
 #endif
 #ifdef CONFIG_NET_DSA_TAG_EDSA
 	case DSA_TAG_PROTO_EDSA:
-		ds->dst->ops = &edsa_netdev_ops;
+		p->xmit = edsa_netdev_ops.xmit;
 		break;
 #endif
 #ifdef CONFIG_NET_DSA_TAG_TRAILER
 	case DSA_TAG_PROTO_TRAILER:
-		ds->dst->ops = &trailer_netdev_ops;
+		p->xmit = trailer_netdev_ops.xmit;
 		break;
 #endif
 #ifdef CONFIG_NET_DSA_TAG_BRCM
 	case DSA_TAG_PROTO_BRCM:
-		ds->dst->ops = &brcm_netdev_ops;
+		p->xmit = brcm_netdev_ops.xmit;
 		break;
 #endif
 	default:
-		ds->dst->ops = &notag_netdev_ops;
+		p->xmit	= dsa_slave_notag_xmit;
 		break;
 	}
 
-	SET_NETDEV_DEV(slave_dev, parent);
-	slave_dev->dev.of_node = ds->pd->port_dn[port];
-	slave_dev->vlan_features = master->vlan_features;
-
-	p = netdev_priv(slave_dev);
-	p->dev = slave_dev;
-	p->parent = ds;
-	p->port = port;
-
 	p->old_pause = -1;
 	p->old_link = -1;
 	p->old_duplex = -1;
diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c
index 8fbc21c0de78..83d3572cdb20 100644
--- a/net/dsa/tag_brcm.c
+++ b/net/dsa/tag_brcm.c
@@ -11,7 +11,6 @@
 
 #include <linux/etherdevice.h>
 #include <linux/list.h>
-#include <linux/netdevice.h>
 #include <linux/slab.h>
 #include "dsa_priv.h"
 
diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c
index d7dbc5bda5c0..ce90c8bdc658 100644
--- a/net/dsa/tag_dsa.c
+++ b/net/dsa/tag_dsa.c
@@ -10,7 +10,6 @@
 
 #include <linux/etherdevice.h>
 #include <linux/list.h>
-#include <linux/netdevice.h>
 #include <linux/slab.h>
 #include "dsa_priv.h"
 
diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c
index 6b30abe89183..94fcce778679 100644
--- a/net/dsa/tag_edsa.c
+++ b/net/dsa/tag_edsa.c
@@ -10,7 +10,6 @@
 
 #include <linux/etherdevice.h>
 #include <linux/list.h>
-#include <linux/netdevice.h>
 #include <linux/slab.h>
 #include "dsa_priv.h"
 
diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c
index 5fe9444842c5..115fdca34077 100644
--- a/net/dsa/tag_trailer.c
+++ b/net/dsa/tag_trailer.c
@@ -10,7 +10,6 @@
 
 #include <linux/etherdevice.h>
 #include <linux/list.h>
-#include <linux/netdevice.h>
 #include <linux/slab.h>
 #include "dsa_priv.h"
 
-- 
cgit v1.2.3


From 2e151c70dfb0075ff83bec305c52a9da1ba49089 Mon Sep 17 00:00:00 2001
From: Peter Neubauer <pneubauer@bluerwhite.org>
Date: Fri, 12 Sep 2014 13:06:13 +0200
Subject: x86: HPET force enable for e6xx based systems

As the Soekris net6501 and other e6xx based systems do not have
any ACPI implementation, HPET won't get enabled.
This patch enables HPET on such platforms.

[    0.430149] pci 0000:00:01.0: Force enabled HPET at 0xfed00000
[    0.644838] HPET: 3 timers in total, 0 timers will be used for per-cpu timer

Original patch by Peter Neubauer (http://www.mail-archive.com/soekris-tech@lists.soekris.com/msg06462.html)
slightly modified by Conrad Kostecki <ck@conrad-kostecki.de> and massaged
accoring to Thomas Gleixners <tglx@linutronix.de> by me.

Suggested-by: Conrad Kostecki <ck@conrad-kostecki.de>
Signed-off-by: Eric Sesterhenn <eric.sesterhenn@lsexperts.de>
Cc: Peter Neubauer <pneubauer@bluerwhite.org>
Link: http://lkml.kernel.org/r/5412D3A5.2030909@lsexperts.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/quirks.c | 18 ++++++++++++++++++
 include/linux/pci_ids.h  |  1 +
 2 files changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index ff898bbf579d..176a0f99d4da 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -497,6 +497,24 @@ void force_hpet_resume(void)
 	}
 }
 
+/*
+ * According to the datasheet e6xx systems have the HPET hardwired to
+ * 0xfed00000
+ */
+static void e6xx_force_enable_hpet(struct pci_dev *dev)
+{
+	if (hpet_address || force_hpet_address)
+		return;
+
+	force_hpet_address = 0xFED00000;
+	force_hpet_resume_type = NONE_FORCE_HPET_RESUME;
+	dev_printk(KERN_DEBUG, &dev->dev, "Force enabled HPET at "
+		"0x%lx\n", force_hpet_address);
+	return;
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E6XX_CU,
+			 e6xx_force_enable_hpet);
+
 /*
  * HPET MSI on some boards (ATI SB700/SB800) has side effect on
  * floppy DMA. Disable HPET MSI on such platforms.
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 6ed0bb73a864..aa0d39073e9c 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2860,6 +2860,7 @@
 #define PCI_DEVICE_ID_INTEL_82372FB_1	0x7601
 #define PCI_DEVICE_ID_INTEL_SCH_LPC	0x8119
 #define PCI_DEVICE_ID_INTEL_SCH_IDE	0x811a
+#define PCI_DEVICE_ID_INTEL_E6XX_CU	0x8183
 #define PCI_DEVICE_ID_INTEL_ITC_LPC	0x8186
 #define PCI_DEVICE_ID_INTEL_82454GX	0x84c4
 #define PCI_DEVICE_ID_INTEL_82450GX	0x84c5
-- 
cgit v1.2.3


From 3c5f8853469d3e549799808b9bf639b5d32751f0 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Thu, 4 Sep 2014 17:31:25 +0530
Subject: power-supply: Forward declare structs together

power_supply.h requires to forward declare few structures. One of them is done
at the top of the file and other one just before it is used. Declare them
together for better readability.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Sebastian Reichel <sre@kernel.org>
---
 include/linux/power_supply.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index de59a28b1b5b..3ed049673022 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -18,8 +18,6 @@
 #include <linux/spinlock.h>
 #include <linux/notifier.h>
 
-struct device;
-
 /*
  * All voltages, currents, charges, energies, time and temperatures in uV,
  * µA, µAh, µWh, seconds and tenths of degree Celsius unless otherwise
@@ -175,6 +173,7 @@ union power_supply_propval {
 	const char *strval;
 };
 
+struct device;
 struct device_node;
 
 struct power_supply {
-- 
cgit v1.2.3


From 53d91c5ce0cb8945b55e8bb54e551cabc51eb28d Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 16 Sep 2014 17:36:01 +0100
Subject: Provide a binary to hex conversion function

Provide a function to convert a buffer of binary data into an unterminated
ascii hex string representation of that data.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Vivek Goyal <vgoyal@redhat.com>
---
 include/linux/kernel.h |  1 +
 lib/hexdump.c          | 16 ++++++++++++++++
 2 files changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 4c52907a6d8b..89a0b8e5a952 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -500,6 +500,7 @@ static inline char * __deprecated pack_hex_byte(char *buf, u8 byte)
 
 extern int hex_to_bin(char ch);
 extern int __must_check hex2bin(u8 *dst, const char *src, size_t count);
+extern char *bin2hex(char *dst, const void *src, size_t count);
 
 int mac_pton(const char *s, u8 *mac);
 
diff --git a/lib/hexdump.c b/lib/hexdump.c
index 8499c810909a..270773b91923 100644
--- a/lib/hexdump.c
+++ b/lib/hexdump.c
@@ -58,6 +58,22 @@ int hex2bin(u8 *dst, const char *src, size_t count)
 }
 EXPORT_SYMBOL(hex2bin);
 
+/**
+ * bin2hex - convert binary data to an ascii hexadecimal string
+ * @dst: ascii hexadecimal result
+ * @src: binary data
+ * @count: binary data length
+ */
+char *bin2hex(char *dst, const void *src, size_t count)
+{
+	const unsigned char *_src = src;
+
+	while (count--)
+		dst = hex_byte_pack(dst, *_src++);
+	return dst;
+}
+EXPORT_SYMBOL(bin2hex);
+
 /**
  * hex_dump_to_buffer - convert a blob of data to "hex ASCII" in memory
  * @buf: data blob to dump
-- 
cgit v1.2.3


From 462919591a1791e76042dc5c1e0148715df59beb Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 16 Sep 2014 17:36:02 +0100
Subject: KEYS: Preparse match data

Preparse the match data.  This provides several advantages:

 (1) The preparser can reject invalid criteria up front.

 (2) The preparser can convert the criteria to binary data if necessary (the
     asymmetric key type really wants to do binary comparison of the key IDs).

 (3) The preparser can set the type of search to be performed.  This means
     that it's not then a one-off setting in the key type.

 (4) The preparser can set an appropriate comparator function.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Vivek Goyal <vgoyal@redhat.com>
---
 crypto/asymmetric_keys/asymmetric_type.c | 31 +++++++++++++++++++-
 include/keys/user-type.h                 |  4 ++-
 include/linux/key-type.h                 | 31 ++++++++++++++++++--
 net/dns_resolver/dns_key.c               |  5 ++--
 security/keys/internal.h                 |  8 ++----
 security/keys/keyring.c                  | 49 +++++++++++++++++++-------------
 security/keys/proc.c                     |  8 +++---
 security/keys/process_keys.c             | 13 +++++----
 security/keys/request_key.c              | 21 ++++++++++----
 security/keys/request_key_auth.c         |  6 ++--
 security/keys/user_defined.c             |  4 +--
 11 files changed, 129 insertions(+), 51 deletions(-)

(limited to 'include/linux')

diff --git a/crypto/asymmetric_keys/asymmetric_type.c b/crypto/asymmetric_keys/asymmetric_type.c
index eb8cd46961a5..f666b4e8d256 100644
--- a/crypto/asymmetric_keys/asymmetric_type.c
+++ b/crypto/asymmetric_keys/asymmetric_type.c
@@ -59,9 +59,11 @@ EXPORT_SYMBOL_GPL(asymmetric_keyid_match);
  *	"id:<id>"	- request a key matching the ID
  *	"<subtype>:<id>" - request a key of a subtype
  */
-static int asymmetric_key_match(const struct key *key, const void *description)
+static int asymmetric_key_match(const struct key *key,
+				const struct key_match_data *match_data)
 {
 	const struct asymmetric_key_subtype *subtype = asymmetric_key_subtype(key);
+	const char *description = match_data->raw_data;
 	const char *spec = description;
 	const char *id;
 	ptrdiff_t speclen;
@@ -93,6 +95,31 @@ static int asymmetric_key_match(const struct key *key, const void *description)
 	return 0;
 }
 
+/*
+ * Preparse the match criterion.  If we don't set lookup_type and cmp,
+ * the default will be an exact match on the key description.
+ *
+ * There are some specifiers for matching key IDs rather than by the key
+ * description:
+ *
+ *	"id:<id>" - request a key by any available ID
+ *
+ * These have to be searched by iteration rather than by direct lookup because
+ * the key is hashed according to its description.
+ */
+static int asymmetric_key_match_preparse(struct key_match_data *match_data)
+{
+	match_data->lookup_type = KEYRING_SEARCH_LOOKUP_ITERATE;
+	return 0;
+}
+
+/*
+ * Free the preparsed the match criterion.
+ */
+static void asymmetric_key_match_free(struct key_match_data *match_data)
+{
+}
+
 /*
  * Describe the asymmetric key
  */
@@ -196,7 +223,9 @@ struct key_type key_type_asymmetric = {
 	.preparse	= asymmetric_key_preparse,
 	.free_preparse	= asymmetric_key_free_preparse,
 	.instantiate	= generic_key_instantiate,
+	.match_preparse	= asymmetric_key_match_preparse,
 	.match		= asymmetric_key_match,
+	.match_free	= asymmetric_key_match_free,
 	.destroy	= asymmetric_key_destroy,
 	.describe	= asymmetric_key_describe,
 	.def_lookup_type = KEYRING_SEARCH_LOOKUP_ITERATE,
diff --git a/include/keys/user-type.h b/include/keys/user-type.h
index 3ab1873a4bfa..66d92af30e7c 100644
--- a/include/keys/user-type.h
+++ b/include/keys/user-type.h
@@ -36,11 +36,13 @@ extern struct key_type key_type_user;
 extern struct key_type key_type_logon;
 
 struct key_preparsed_payload;
+struct key_match_data;
 
 extern int user_preparse(struct key_preparsed_payload *prep);
 extern void user_free_preparse(struct key_preparsed_payload *prep);
 extern int user_update(struct key *key, struct key_preparsed_payload *prep);
-extern int user_match(const struct key *key, const void *criterion);
+extern int user_match(const struct key *key,
+		      const struct key_match_data *match_data);
 extern void user_revoke(struct key *key);
 extern void user_destroy(struct key *key);
 extern void user_describe(const struct key *user, struct seq_file *m);
diff --git a/include/linux/key-type.h b/include/linux/key-type.h
index 44792ee649de..8aba688a451a 100644
--- a/include/linux/key-type.h
+++ b/include/linux/key-type.h
@@ -52,6 +52,22 @@ struct key_preparsed_payload {
 typedef int (*request_key_actor_t)(struct key_construction *key,
 				   const char *op, void *aux);
 
+/*
+ * Preparsed matching criterion.
+ */
+struct key_match_data {
+	/* Comparison function, defaults to type->match, but can be replaced by
+	 * type->match_preparse(). */
+	int (*cmp)(const struct key *key,
+		   const struct key_match_data *match_data);
+
+	const void	*raw_data;	/* Raw match data */
+	void		*preparsed;	/* For ->match_preparse() to stash stuff */
+	unsigned	lookup_type;	/* Type of lookup for this search. */
+#define KEYRING_SEARCH_LOOKUP_DIRECT	0x0000	/* Direct lookup by description. */
+#define KEYRING_SEARCH_LOOKUP_ITERATE	0x0001	/* Iterative search. */
+};
+
 /*
  * kernel managed key type definition
  */
@@ -67,8 +83,6 @@ struct key_type {
 
 	/* Default key search algorithm. */
 	unsigned def_lookup_type;
-#define KEYRING_SEARCH_LOOKUP_DIRECT	0x0000	/* Direct lookup by description. */
-#define KEYRING_SEARCH_LOOKUP_ITERATE	0x0001	/* Iterative search. */
 
 	/* vet a description */
 	int (*vet_description)(const char *description);
@@ -96,8 +110,19 @@ struct key_type {
 	 */
 	int (*update)(struct key *key, struct key_preparsed_payload *prep);
 
+	/* Preparse the data supplied to ->match() (optional).  The
+	 * data to be preparsed can be found in match_data->raw_data.
+	 * The lookup type can also be set by this function.
+	 */
+	int (*match_preparse)(struct key_match_data *match_data);
+
 	/* match a key against a description */
-	int (*match)(const struct key *key, const void *desc);
+	int (*match)(const struct key *key,
+		     const struct key_match_data *match_data);
+
+	/* Free preparsed match data (optional).  This should be supplied it
+	 * ->match_preparse() is supplied. */
+	void (*match_free)(struct key_match_data *match_data);
 
 	/* clear some of the data from a key on revokation (optional)
 	 * - the key's semaphore will be write-locked by the caller
diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c
index f380b2c58178..92df6e508ae7 100644
--- a/net/dns_resolver/dns_key.c
+++ b/net/dns_resolver/dns_key.c
@@ -177,10 +177,11 @@ static void dns_resolver_free_preparse(struct key_preparsed_payload *prep)
  * should end with a period).  The domain name is case-independent.
  */
 static int
-dns_resolver_match(const struct key *key, const void *description)
+dns_resolver_match(const struct key *key,
+		   const struct key_match_data *match_data)
 {
 	int slen, dlen, ret = 0;
-	const char *src = key->description, *dsp = description;
+	const char *src = key->description, *dsp = match_data->raw_data;
 
 	kenter("%s,%s", src, dsp);
 
diff --git a/security/keys/internal.h b/security/keys/internal.h
index 5f20da01fd8d..805e60b0b87e 100644
--- a/security/keys/internal.h
+++ b/security/keys/internal.h
@@ -107,13 +107,10 @@ extern int iterate_over_keyring(const struct key *keyring,
 				int (*func)(const struct key *key, void *data),
 				void *data);
 
-typedef int (*key_match_func_t)(const struct key *, const void *);
-
 struct keyring_search_context {
 	struct keyring_index_key index_key;
 	const struct cred	*cred;
-	key_match_func_t	match;
-	const void		*match_data;
+	struct key_match_data	match_data;
 	unsigned		flags;
 #define KEYRING_SEARCH_LOOKUP_TYPE	0x0001	/* [as type->def_lookup_type] */
 #define KEYRING_SEARCH_NO_STATE_CHECK	0x0002	/* Skip state checks */
@@ -152,7 +149,8 @@ extern struct key *request_key_and_link(struct key_type *type,
 					struct key *dest_keyring,
 					unsigned long flags);
 
-extern int lookup_user_key_possessed(const struct key *key, const void *target);
+extern int lookup_user_key_possessed(const struct key *key,
+				     const struct key_match_data *match_data);
 extern key_ref_t lookup_user_key(key_serial_t id, unsigned long flags,
 				 key_perm_t perm);
 #define KEY_LOOKUP_CREATE	0x01
diff --git a/security/keys/keyring.c b/security/keys/keyring.c
index 8314a7d2104d..10f0a5f2d362 100644
--- a/security/keys/keyring.c
+++ b/security/keys/keyring.c
@@ -545,7 +545,7 @@ static int keyring_search_iterator(const void *object, void *iterator_data)
 	}
 
 	/* keys that don't match */
-	if (!ctx->match(key, ctx->match_data)) {
+	if (!ctx->match_data.cmp(key, &ctx->match_data)) {
 		kleave(" = 0 [!match]");
 		return 0;
 	}
@@ -585,8 +585,7 @@ skipped:
  */
 static int search_keyring(struct key *keyring, struct keyring_search_context *ctx)
 {
-	if ((ctx->flags & KEYRING_SEARCH_LOOKUP_TYPE) ==
-	    KEYRING_SEARCH_LOOKUP_DIRECT) {
+	if (ctx->match_data.lookup_type == KEYRING_SEARCH_LOOKUP_DIRECT) {
 		const void *object;
 
 		object = assoc_array_find(&keyring->keys,
@@ -627,7 +626,7 @@ static bool search_nested_keyrings(struct key *keyring,
 	/* Check to see if this top-level keyring is what we are looking for
 	 * and whether it is valid or not.
 	 */
-	if (ctx->flags & KEYRING_SEARCH_LOOKUP_ITERATE ||
+	if (ctx->match_data.lookup_type == KEYRING_SEARCH_LOOKUP_ITERATE ||
 	    keyring_compare_object(keyring, &ctx->index_key)) {
 		ctx->skipped_ret = 2;
 		ctx->flags |= KEYRING_SEARCH_DO_STATE_CHECK;
@@ -885,16 +884,28 @@ key_ref_t keyring_search(key_ref_t keyring,
 		.index_key.type		= type,
 		.index_key.description	= description,
 		.cred			= current_cred(),
-		.match			= type->match,
-		.match_data		= description,
-		.flags			= (type->def_lookup_type |
-					   KEYRING_SEARCH_DO_STATE_CHECK),
+		.match_data.cmp		= type->match,
+		.match_data.raw_data	= description,
+		.match_data.lookup_type	= KEYRING_SEARCH_LOOKUP_DIRECT,
+		.flags			= KEYRING_SEARCH_DO_STATE_CHECK,
 	};
+	key_ref_t key;
+	int ret;
 
-	if (!ctx.match)
+	if (!ctx.match_data.cmp)
 		return ERR_PTR(-ENOKEY);
 
-	return keyring_search_aux(keyring, &ctx);
+	if (type->match_preparse) {
+		ret = type->match_preparse(&ctx.match_data);
+		if (ret < 0)
+			return ERR_PTR(ret);
+	}
+
+	key = keyring_search_aux(keyring, &ctx);
+
+	if (type->match_free)
+		type->match_free(&ctx.match_data);
+	return key;
 }
 EXPORT_SYMBOL(keyring_search);
 
@@ -1014,7 +1025,7 @@ static int keyring_detect_cycle_iterator(const void *object,
 
 	/* We might get a keyring with matching index-key that is nonetheless a
 	 * different keyring. */
-	if (key != ctx->match_data)
+	if (key != ctx->match_data.raw_data)
 		return 0;
 
 	ctx->result = ERR_PTR(-EDEADLK);
@@ -1031,14 +1042,14 @@ static int keyring_detect_cycle_iterator(const void *object,
 static int keyring_detect_cycle(struct key *A, struct key *B)
 {
 	struct keyring_search_context ctx = {
-		.index_key	= A->index_key,
-		.match_data	= A,
-		.iterator	= keyring_detect_cycle_iterator,
-		.flags		= (KEYRING_SEARCH_LOOKUP_DIRECT |
-				   KEYRING_SEARCH_NO_STATE_CHECK |
-				   KEYRING_SEARCH_NO_UPDATE_TIME |
-				   KEYRING_SEARCH_NO_CHECK_PERM |
-				   KEYRING_SEARCH_DETECT_TOO_DEEP),
+		.index_key		= A->index_key,
+		.match_data.raw_data	= A,
+		.match_data.lookup_type = KEYRING_SEARCH_LOOKUP_DIRECT,
+		.iterator		= keyring_detect_cycle_iterator,
+		.flags			= (KEYRING_SEARCH_NO_STATE_CHECK |
+					   KEYRING_SEARCH_NO_UPDATE_TIME |
+					   KEYRING_SEARCH_NO_CHECK_PERM |
+					   KEYRING_SEARCH_DETECT_TOO_DEEP),
 	};
 
 	rcu_read_lock();
diff --git a/security/keys/proc.c b/security/keys/proc.c
index d3f6f2fd21db..972eeb336b81 100644
--- a/security/keys/proc.c
+++ b/security/keys/proc.c
@@ -194,10 +194,10 @@ static int proc_keys_show(struct seq_file *m, void *v)
 		.index_key.type		= key->type,
 		.index_key.description	= key->description,
 		.cred			= current_cred(),
-		.match			= lookup_user_key_possessed,
-		.match_data		= key,
-		.flags			= (KEYRING_SEARCH_NO_STATE_CHECK |
-					   KEYRING_SEARCH_LOOKUP_DIRECT),
+		.match_data.cmp		= lookup_user_key_possessed,
+		.match_data.raw_data	= key,
+		.match_data.lookup_type	= KEYRING_SEARCH_LOOKUP_DIRECT,
+		.flags			= KEYRING_SEARCH_NO_STATE_CHECK,
 	};
 
 	key_ref = make_key_ref(key, 0);
diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c
index 0cf8a130a267..08bd533d014f 100644
--- a/security/keys/process_keys.c
+++ b/security/keys/process_keys.c
@@ -489,9 +489,10 @@ found:
 /*
  * See if the key we're looking at is the target key.
  */
-int lookup_user_key_possessed(const struct key *key, const void *target)
+int lookup_user_key_possessed(const struct key *key,
+			      const struct key_match_data *match_data)
 {
-	return key == target;
+	return key == match_data->raw_data;
 }
 
 /*
@@ -516,9 +517,9 @@ key_ref_t lookup_user_key(key_serial_t id, unsigned long lflags,
 			  key_perm_t perm)
 {
 	struct keyring_search_context ctx = {
-		.match	= lookup_user_key_possessed,
-		.flags	= (KEYRING_SEARCH_NO_STATE_CHECK |
-			   KEYRING_SEARCH_LOOKUP_DIRECT),
+		.match_data.cmp		= lookup_user_key_possessed,
+		.match_data.lookup_type	= KEYRING_SEARCH_LOOKUP_DIRECT,
+		.flags			= KEYRING_SEARCH_NO_STATE_CHECK,
 	};
 	struct request_key_auth *rka;
 	struct key *key;
@@ -673,7 +674,7 @@ try_again:
 		ctx.index_key.type		= key->type;
 		ctx.index_key.description	= key->description;
 		ctx.index_key.desc_len		= strlen(key->description);
-		ctx.match_data			= key;
+		ctx.match_data.raw_data		= key;
 		kdebug("check possessed");
 		skey_ref = search_process_keyrings(&ctx);
 		kdebug("possessed=%p", skey_ref);
diff --git a/security/keys/request_key.c b/security/keys/request_key.c
index 381411941cc1..408523e5e2e2 100644
--- a/security/keys/request_key.c
+++ b/security/keys/request_key.c
@@ -531,9 +531,9 @@ struct key *request_key_and_link(struct key_type *type,
 		.index_key.type		= type,
 		.index_key.description	= description,
 		.cred			= current_cred(),
-		.match			= type->match,
-		.match_data		= description,
-		.flags			= KEYRING_SEARCH_LOOKUP_DIRECT,
+		.match_data.cmp		= type->match,
+		.match_data.raw_data	= description,
+		.match_data.lookup_type	= KEYRING_SEARCH_LOOKUP_DIRECT,
 	};
 	struct key *key;
 	key_ref_t key_ref;
@@ -543,6 +543,14 @@ struct key *request_key_and_link(struct key_type *type,
 	       ctx.index_key.type->name, ctx.index_key.description,
 	       callout_info, callout_len, aux, dest_keyring, flags);
 
+	if (type->match_preparse) {
+		ret = type->match_preparse(&ctx.match_data);
+		if (ret < 0) {
+			key = ERR_PTR(ret);
+			goto error;
+		}
+	}
+
 	/* search all the process keyrings for a key */
 	key_ref = search_process_keyrings(&ctx);
 
@@ -555,7 +563,7 @@ struct key *request_key_and_link(struct key_type *type,
 			if (ret < 0) {
 				key_put(key);
 				key = ERR_PTR(ret);
-				goto error;
+				goto error_free;
 			}
 		}
 	} else if (PTR_ERR(key_ref) != -EAGAIN) {
@@ -565,12 +573,15 @@ struct key *request_key_and_link(struct key_type *type,
 		 * should consult userspace if we can */
 		key = ERR_PTR(-ENOKEY);
 		if (!callout_info)
-			goto error;
+			goto error_free;
 
 		key = construct_key_and_link(&ctx, callout_info, callout_len,
 					     aux, dest_keyring, flags);
 	}
 
+error_free:
+	if (type->match_free)
+		type->match_free(&ctx.match_data);
 error:
 	kleave(" = %p", key);
 	return key;
diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c
index 739e7455d388..9ae02819cc06 100644
--- a/security/keys/request_key_auth.c
+++ b/security/keys/request_key_auth.c
@@ -246,9 +246,9 @@ struct key *key_get_instantiation_authkey(key_serial_t target_id)
 		.index_key.type		= &key_type_request_key_auth,
 		.index_key.description	= description,
 		.cred			= current_cred(),
-		.match			= user_match,
-		.match_data		= description,
-		.flags			= KEYRING_SEARCH_LOOKUP_DIRECT,
+		.match_data.cmp		= user_match,
+		.match_data.raw_data	= description,
+		.match_data.lookup_type	= KEYRING_SEARCH_LOOKUP_DIRECT,
 	};
 	struct key *authkey;
 	key_ref_t authkey_ref;
diff --git a/security/keys/user_defined.c b/security/keys/user_defined.c
index eee340011f2b..ec8a56063b02 100644
--- a/security/keys/user_defined.c
+++ b/security/keys/user_defined.c
@@ -141,9 +141,9 @@ EXPORT_SYMBOL_GPL(user_update);
 /*
  * match users on their name
  */
-int user_match(const struct key *key, const void *description)
+int user_match(const struct key *key, const struct key_match_data *match_data)
 {
-	return strcmp(key->description, description) == 0;
+	return strcmp(key->description, match_data->raw_data) == 0;
 }
 
 EXPORT_SYMBOL_GPL(user_match);
-- 
cgit v1.2.3


From 614d8c39014c185aa0f7254f0a470cc33fc1b284 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 16 Sep 2014 17:36:04 +0100
Subject: KEYS: Remove key_type::def_lookup_type

Remove key_type::def_lookup_type as it's no longer used.  The information now
defaults to KEYRING_SEARCH_LOOKUP_DIRECT but may be overridden by
type->match_preparse().

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Vivek Goyal <vgoyal@redhat.com>
---
 crypto/asymmetric_keys/asymmetric_type.c |  1 -
 crypto/asymmetric_keys/pkcs7_key_type.c  |  1 -
 include/linux/key-type.h                 |  3 ---
 security/keys/big_key.c                  |  1 -
 security/keys/internal.h                 | 11 +++++------
 security/keys/user_defined.c             |  2 --
 6 files changed, 5 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/crypto/asymmetric_keys/asymmetric_type.c b/crypto/asymmetric_keys/asymmetric_type.c
index f666b4e8d256..9d78ad7754d9 100644
--- a/crypto/asymmetric_keys/asymmetric_type.c
+++ b/crypto/asymmetric_keys/asymmetric_type.c
@@ -228,7 +228,6 @@ struct key_type key_type_asymmetric = {
 	.match_free	= asymmetric_key_match_free,
 	.destroy	= asymmetric_key_destroy,
 	.describe	= asymmetric_key_describe,
-	.def_lookup_type = KEYRING_SEARCH_LOOKUP_ITERATE,
 };
 EXPORT_SYMBOL_GPL(key_type_asymmetric);
 
diff --git a/crypto/asymmetric_keys/pkcs7_key_type.c b/crypto/asymmetric_keys/pkcs7_key_type.c
index 3de5fb011de0..d1faa1df1dec 100644
--- a/crypto/asymmetric_keys/pkcs7_key_type.c
+++ b/crypto/asymmetric_keys/pkcs7_key_type.c
@@ -72,7 +72,6 @@ error:
  */
 static struct key_type key_type_pkcs7 = {
 	.name			= "pkcs7_test",
-	.def_lookup_type	= KEYRING_SEARCH_LOOKUP_DIRECT,
 	.preparse		= pkcs7_preparse,
 	.free_preparse		= user_free_preparse,
 	.instantiate		= generic_key_instantiate,
diff --git a/include/linux/key-type.h b/include/linux/key-type.h
index 8aba688a451a..bf93ea609273 100644
--- a/include/linux/key-type.h
+++ b/include/linux/key-type.h
@@ -81,9 +81,6 @@ struct key_type {
 	 */
 	size_t def_datalen;
 
-	/* Default key search algorithm. */
-	unsigned def_lookup_type;
-
 	/* vet a description */
 	int (*vet_description)(const char *description);
 
diff --git a/security/keys/big_key.c b/security/keys/big_key.c
index c2f91a0cf889..4045c13a761a 100644
--- a/security/keys/big_key.c
+++ b/security/keys/big_key.c
@@ -33,7 +33,6 @@ MODULE_LICENSE("GPL");
  */
 struct key_type key_type_big_key = {
 	.name			= "big_key",
-	.def_lookup_type	= KEYRING_SEARCH_LOOKUP_DIRECT,
 	.preparse		= big_key_preparse,
 	.free_preparse		= big_key_free_preparse,
 	.instantiate		= generic_key_instantiate,
diff --git a/security/keys/internal.h b/security/keys/internal.h
index 805e60b0b87e..b47cc532be1e 100644
--- a/security/keys/internal.h
+++ b/security/keys/internal.h
@@ -112,12 +112,11 @@ struct keyring_search_context {
 	const struct cred	*cred;
 	struct key_match_data	match_data;
 	unsigned		flags;
-#define KEYRING_SEARCH_LOOKUP_TYPE	0x0001	/* [as type->def_lookup_type] */
-#define KEYRING_SEARCH_NO_STATE_CHECK	0x0002	/* Skip state checks */
-#define KEYRING_SEARCH_DO_STATE_CHECK	0x0004	/* Override NO_STATE_CHECK */
-#define KEYRING_SEARCH_NO_UPDATE_TIME	0x0008	/* Don't update times */
-#define KEYRING_SEARCH_NO_CHECK_PERM	0x0010	/* Don't check permissions */
-#define KEYRING_SEARCH_DETECT_TOO_DEEP	0x0020	/* Give an error on excessive depth */
+#define KEYRING_SEARCH_NO_STATE_CHECK	0x0001	/* Skip state checks */
+#define KEYRING_SEARCH_DO_STATE_CHECK	0x0002	/* Override NO_STATE_CHECK */
+#define KEYRING_SEARCH_NO_UPDATE_TIME	0x0004	/* Don't update times */
+#define KEYRING_SEARCH_NO_CHECK_PERM	0x0008	/* Don't check permissions */
+#define KEYRING_SEARCH_DETECT_TOO_DEEP	0x0010	/* Give an error on excessive depth */
 
 	int (*iterator)(const void *object, void *iterator_data);
 
diff --git a/security/keys/user_defined.c b/security/keys/user_defined.c
index ec8a56063b02..cd7e726e8646 100644
--- a/security/keys/user_defined.c
+++ b/security/keys/user_defined.c
@@ -26,7 +26,6 @@ static int logon_vet_description(const char *desc);
  */
 struct key_type key_type_user = {
 	.name			= "user",
-	.def_lookup_type	= KEYRING_SEARCH_LOOKUP_DIRECT,
 	.preparse		= user_preparse,
 	.free_preparse		= user_free_preparse,
 	.instantiate		= generic_key_instantiate,
@@ -48,7 +47,6 @@ EXPORT_SYMBOL_GPL(key_type_user);
  */
 struct key_type key_type_logon = {
 	.name			= "logon",
-	.def_lookup_type	= KEYRING_SEARCH_LOOKUP_DIRECT,
 	.preparse		= user_preparse,
 	.free_preparse		= user_free_preparse,
 	.instantiate		= generic_key_instantiate,
-- 
cgit v1.2.3


From c06cfb08b88dfbe13be44a69ae2fdc3a7c902d81 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 16 Sep 2014 17:36:06 +0100
Subject: KEYS: Remove key_type::match in favour of overriding default by
 match_preparse

A previous patch added a ->match_preparse() method to the key type.  This is
allowed to override the function called by the iteration algorithm.
Therefore, we can just set a default that simply checks for an exact match of
the key description with the original criterion data and allow match_preparse
to override it as needed.

The key_type::match op is then redundant and can be removed, as can the
user_match() function.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Vivek Goyal <vgoyal@redhat.com>
---
 crypto/asymmetric_keys/asymmetric_type.c |  6 +++---
 crypto/asymmetric_keys/pkcs7_key_type.c  |  1 -
 fs/cifs/cifs_spnego.c                    |  1 -
 fs/cifs/cifsacl.c                        |  1 -
 fs/nfs/idmap.c                           |  2 --
 include/keys/user-type.h                 |  3 ---
 include/linux/key-type.h                 |  4 ----
 net/ceph/crypto.c                        |  1 -
 net/dns_resolver/dns_key.c               | 17 +++++++++++++----
 net/rxrpc/ar-key.c                       |  2 --
 security/keys/big_key.c                  |  1 -
 security/keys/encrypted-keys/encrypted.c |  1 -
 security/keys/internal.h                 |  2 ++
 security/keys/key.c                      |  2 +-
 security/keys/keyring.c                  | 15 ++++++++++-----
 security/keys/request_key.c              |  2 +-
 security/keys/request_key_auth.c         |  2 +-
 security/keys/trusted.c                  |  1 -
 security/keys/user_defined.c             | 12 ------------
 19 files changed, 31 insertions(+), 45 deletions(-)

(limited to 'include/linux')

diff --git a/crypto/asymmetric_keys/asymmetric_type.c b/crypto/asymmetric_keys/asymmetric_type.c
index 9d78ad7754d9..7c0498968975 100644
--- a/crypto/asymmetric_keys/asymmetric_type.c
+++ b/crypto/asymmetric_keys/asymmetric_type.c
@@ -59,8 +59,8 @@ EXPORT_SYMBOL_GPL(asymmetric_keyid_match);
  *	"id:<id>"	- request a key matching the ID
  *	"<subtype>:<id>" - request a key of a subtype
  */
-static int asymmetric_key_match(const struct key *key,
-				const struct key_match_data *match_data)
+static int asymmetric_key_cmp(const struct key *key,
+			      const struct key_match_data *match_data)
 {
 	const struct asymmetric_key_subtype *subtype = asymmetric_key_subtype(key);
 	const char *description = match_data->raw_data;
@@ -110,6 +110,7 @@ static int asymmetric_key_match(const struct key *key,
 static int asymmetric_key_match_preparse(struct key_match_data *match_data)
 {
 	match_data->lookup_type = KEYRING_SEARCH_LOOKUP_ITERATE;
+	match_data->cmp = asymmetric_key_cmp;
 	return 0;
 }
 
@@ -224,7 +225,6 @@ struct key_type key_type_asymmetric = {
 	.free_preparse	= asymmetric_key_free_preparse,
 	.instantiate	= generic_key_instantiate,
 	.match_preparse	= asymmetric_key_match_preparse,
-	.match		= asymmetric_key_match,
 	.match_free	= asymmetric_key_match_free,
 	.destroy	= asymmetric_key_destroy,
 	.describe	= asymmetric_key_describe,
diff --git a/crypto/asymmetric_keys/pkcs7_key_type.c b/crypto/asymmetric_keys/pkcs7_key_type.c
index d1faa1df1dec..751f8fd7335d 100644
--- a/crypto/asymmetric_keys/pkcs7_key_type.c
+++ b/crypto/asymmetric_keys/pkcs7_key_type.c
@@ -75,7 +75,6 @@ static struct key_type key_type_pkcs7 = {
 	.preparse		= pkcs7_preparse,
 	.free_preparse		= user_free_preparse,
 	.instantiate		= generic_key_instantiate,
-	.match			= user_match,
 	.revoke			= user_revoke,
 	.destroy		= user_destroy,
 	.describe		= user_describe,
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index a3e932547617..f4cf200b3c76 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -62,7 +62,6 @@ cifs_spnego_key_destroy(struct key *key)
 struct key_type cifs_spnego_key_type = {
 	.name		= "cifs.spnego",
 	.instantiate	= cifs_spnego_key_instantiate,
-	.match		= user_match,
 	.destroy	= cifs_spnego_key_destroy,
 	.describe	= user_describe,
 };
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 7ff866dbb89e..6d00c419cbae 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -84,7 +84,6 @@ static struct key_type cifs_idmap_key_type = {
 	.instantiate = cifs_idmap_key_instantiate,
 	.destroy     = cifs_idmap_key_destroy,
 	.describe    = user_describe,
-	.match       = user_match,
 };
 
 static char *
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 7dd55b745c4d..2f5db844c172 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -177,7 +177,6 @@ static struct key_type key_type_id_resolver = {
 	.preparse	= user_preparse,
 	.free_preparse	= user_free_preparse,
 	.instantiate	= generic_key_instantiate,
-	.match		= user_match,
 	.revoke		= user_revoke,
 	.destroy	= user_destroy,
 	.describe	= user_describe,
@@ -401,7 +400,6 @@ static struct key_type key_type_id_resolver_legacy = {
 	.preparse	= user_preparse,
 	.free_preparse	= user_free_preparse,
 	.instantiate	= generic_key_instantiate,
-	.match		= user_match,
 	.revoke		= user_revoke,
 	.destroy	= user_destroy,
 	.describe	= user_describe,
diff --git a/include/keys/user-type.h b/include/keys/user-type.h
index 66d92af30e7c..cebefb069c44 100644
--- a/include/keys/user-type.h
+++ b/include/keys/user-type.h
@@ -36,13 +36,10 @@ extern struct key_type key_type_user;
 extern struct key_type key_type_logon;
 
 struct key_preparsed_payload;
-struct key_match_data;
 
 extern int user_preparse(struct key_preparsed_payload *prep);
 extern void user_free_preparse(struct key_preparsed_payload *prep);
 extern int user_update(struct key *key, struct key_preparsed_payload *prep);
-extern int user_match(const struct key *key,
-		      const struct key_match_data *match_data);
 extern void user_revoke(struct key *key);
 extern void user_destroy(struct key *key);
 extern void user_describe(const struct key *user, struct seq_file *m);
diff --git a/include/linux/key-type.h b/include/linux/key-type.h
index bf93ea609273..c14816bd3b44 100644
--- a/include/linux/key-type.h
+++ b/include/linux/key-type.h
@@ -113,10 +113,6 @@ struct key_type {
 	 */
 	int (*match_preparse)(struct key_match_data *match_data);
 
-	/* match a key against a description */
-	int (*match)(const struct key *key,
-		     const struct key_match_data *match_data);
-
 	/* Free preparsed match data (optional).  This should be supplied it
 	 * ->match_preparse() is supplied. */
 	void (*match_free)(struct key_match_data *match_data);
diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c
index ffeba8f9dda9..62fc5e7a9acf 100644
--- a/net/ceph/crypto.c
+++ b/net/ceph/crypto.c
@@ -476,7 +476,6 @@ struct key_type key_type_ceph = {
 	.preparse	= ceph_key_preparse,
 	.free_preparse	= ceph_key_free_preparse,
 	.instantiate	= generic_key_instantiate,
-	.match		= user_match,
 	.destroy	= ceph_key_destroy,
 };
 
diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c
index 92df6e508ae7..a07b9ba7e0b7 100644
--- a/net/dns_resolver/dns_key.c
+++ b/net/dns_resolver/dns_key.c
@@ -176,9 +176,8 @@ static void dns_resolver_free_preparse(struct key_preparsed_payload *prep)
  * The domain name may be a simple name or an absolute domain name (which
  * should end with a period).  The domain name is case-independent.
  */
-static int
-dns_resolver_match(const struct key *key,
-		   const struct key_match_data *match_data)
+static int dns_resolver_cmp(const struct key *key,
+			    const struct key_match_data *match_data)
 {
 	int slen, dlen, ret = 0;
 	const char *src = key->description, *dsp = match_data->raw_data;
@@ -209,6 +208,16 @@ no_match:
 	return ret;
 }
 
+/*
+ * Preparse the match criterion.
+ */
+static int dns_resolver_match_preparse(struct key_match_data *match_data)
+{
+	match_data->lookup_type = KEYRING_SEARCH_LOOKUP_ITERATE;
+	match_data->cmp = dns_resolver_cmp;
+	return 0;
+}
+
 /*
  * Describe a DNS key
  */
@@ -243,7 +252,7 @@ struct key_type key_type_dns_resolver = {
 	.preparse	= dns_resolver_preparse,
 	.free_preparse	= dns_resolver_free_preparse,
 	.instantiate	= generic_key_instantiate,
-	.match		= dns_resolver_match,
+	.match_preparse	= dns_resolver_match_preparse,
 	.revoke		= user_revoke,
 	.destroy	= user_destroy,
 	.describe	= dns_resolver_describe,
diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c
index 3907add75932..10c6cb694b43 100644
--- a/net/rxrpc/ar-key.c
+++ b/net/rxrpc/ar-key.c
@@ -44,7 +44,6 @@ struct key_type key_type_rxrpc = {
 	.preparse	= rxrpc_preparse,
 	.free_preparse	= rxrpc_free_preparse,
 	.instantiate	= generic_key_instantiate,
-	.match		= user_match,
 	.destroy	= rxrpc_destroy,
 	.describe	= rxrpc_describe,
 	.read		= rxrpc_read,
@@ -61,7 +60,6 @@ struct key_type key_type_rxrpc_s = {
 	.preparse	= rxrpc_preparse_s,
 	.free_preparse	= rxrpc_free_preparse_s,
 	.instantiate	= generic_key_instantiate,
-	.match		= user_match,
 	.destroy	= rxrpc_destroy_s,
 	.describe	= rxrpc_describe,
 };
diff --git a/security/keys/big_key.c b/security/keys/big_key.c
index 4045c13a761a..b6adb94f6d52 100644
--- a/security/keys/big_key.c
+++ b/security/keys/big_key.c
@@ -36,7 +36,6 @@ struct key_type key_type_big_key = {
 	.preparse		= big_key_preparse,
 	.free_preparse		= big_key_free_preparse,
 	.instantiate		= generic_key_instantiate,
-	.match			= user_match,
 	.revoke			= big_key_revoke,
 	.destroy		= big_key_destroy,
 	.describe		= big_key_describe,
diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c
index 5fe443d120af..db9675db1026 100644
--- a/security/keys/encrypted-keys/encrypted.c
+++ b/security/keys/encrypted-keys/encrypted.c
@@ -970,7 +970,6 @@ struct key_type key_type_encrypted = {
 	.name = "encrypted",
 	.instantiate = encrypted_instantiate,
 	.update = encrypted_update,
-	.match = user_match,
 	.destroy = encrypted_destroy,
 	.describe = user_describe,
 	.read = encrypted_read,
diff --git a/security/keys/internal.h b/security/keys/internal.h
index b47cc532be1e..e66a16cb63e1 100644
--- a/security/keys/internal.h
+++ b/security/keys/internal.h
@@ -127,6 +127,8 @@ struct keyring_search_context {
 	struct timespec		now;
 };
 
+extern int key_default_cmp(const struct key *key,
+			   const struct key_match_data *match_data);
 extern key_ref_t keyring_search_aux(key_ref_t keyring_ref,
 				    struct keyring_search_context *ctx);
 
diff --git a/security/keys/key.c b/security/keys/key.c
index b90a68c4e2c4..8c0092ca0443 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -799,7 +799,7 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
 	}
 
 	key_ref = ERR_PTR(-EINVAL);
-	if (!index_key.type->match || !index_key.type->instantiate ||
+	if (!index_key.type->instantiate ||
 	    (!index_key.description && !index_key.type->preparse))
 		goto error_put_type;
 
diff --git a/security/keys/keyring.c b/security/keys/keyring.c
index 10f0a5f2d362..253c9a0eb092 100644
--- a/security/keys/keyring.c
+++ b/security/keys/keyring.c
@@ -89,7 +89,6 @@ struct key_type key_type_keyring = {
 	.preparse	= keyring_preparse,
 	.free_preparse	= keyring_free_preparse,
 	.instantiate	= keyring_instantiate,
-	.match		= user_match,
 	.revoke		= keyring_revoke,
 	.destroy	= keyring_destroy,
 	.describe	= keyring_describe,
@@ -511,6 +510,15 @@ struct key *keyring_alloc(const char *description, kuid_t uid, kgid_t gid,
 }
 EXPORT_SYMBOL(keyring_alloc);
 
+/*
+ * By default, we keys found by getting an exact match on their descriptions.
+ */
+int key_default_cmp(const struct key *key,
+		    const struct key_match_data *match_data)
+{
+	return strcmp(key->description, match_data->raw_data) == 0;
+}
+
 /*
  * Iteration function to consider each key found.
  */
@@ -884,7 +892,7 @@ key_ref_t keyring_search(key_ref_t keyring,
 		.index_key.type		= type,
 		.index_key.description	= description,
 		.cred			= current_cred(),
-		.match_data.cmp		= type->match,
+		.match_data.cmp		= key_default_cmp,
 		.match_data.raw_data	= description,
 		.match_data.lookup_type	= KEYRING_SEARCH_LOOKUP_DIRECT,
 		.flags			= KEYRING_SEARCH_DO_STATE_CHECK,
@@ -892,9 +900,6 @@ key_ref_t keyring_search(key_ref_t keyring,
 	key_ref_t key;
 	int ret;
 
-	if (!ctx.match_data.cmp)
-		return ERR_PTR(-ENOKEY);
-
 	if (type->match_preparse) {
 		ret = type->match_preparse(&ctx.match_data);
 		if (ret < 0)
diff --git a/security/keys/request_key.c b/security/keys/request_key.c
index 408523e5e2e2..dc6ed32b7844 100644
--- a/security/keys/request_key.c
+++ b/security/keys/request_key.c
@@ -531,7 +531,7 @@ struct key *request_key_and_link(struct key_type *type,
 		.index_key.type		= type,
 		.index_key.description	= description,
 		.cred			= current_cred(),
-		.match_data.cmp		= type->match,
+		.match_data.cmp		= key_default_cmp,
 		.match_data.raw_data	= description,
 		.match_data.lookup_type	= KEYRING_SEARCH_LOOKUP_DIRECT,
 	};
diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c
index 9ae02819cc06..6639e2cb8853 100644
--- a/security/keys/request_key_auth.c
+++ b/security/keys/request_key_auth.c
@@ -246,7 +246,7 @@ struct key *key_get_instantiation_authkey(key_serial_t target_id)
 		.index_key.type		= &key_type_request_key_auth,
 		.index_key.description	= description,
 		.cred			= current_cred(),
-		.match_data.cmp		= user_match,
+		.match_data.cmp		= key_default_cmp,
 		.match_data.raw_data	= description,
 		.match_data.lookup_type	= KEYRING_SEARCH_LOOKUP_DIRECT,
 	};
diff --git a/security/keys/trusted.c b/security/keys/trusted.c
index 6b804aa4529a..c0594cb07ada 100644
--- a/security/keys/trusted.c
+++ b/security/keys/trusted.c
@@ -1096,7 +1096,6 @@ struct key_type key_type_trusted = {
 	.name = "trusted",
 	.instantiate = trusted_instantiate,
 	.update = trusted_update,
-	.match = user_match,
 	.destroy = trusted_destroy,
 	.describe = user_describe,
 	.read = trusted_read,
diff --git a/security/keys/user_defined.c b/security/keys/user_defined.c
index cd7e726e8646..36b47bbd3d8c 100644
--- a/security/keys/user_defined.c
+++ b/security/keys/user_defined.c
@@ -30,7 +30,6 @@ struct key_type key_type_user = {
 	.free_preparse		= user_free_preparse,
 	.instantiate		= generic_key_instantiate,
 	.update			= user_update,
-	.match			= user_match,
 	.revoke			= user_revoke,
 	.destroy		= user_destroy,
 	.describe		= user_describe,
@@ -51,7 +50,6 @@ struct key_type key_type_logon = {
 	.free_preparse		= user_free_preparse,
 	.instantiate		= generic_key_instantiate,
 	.update			= user_update,
-	.match			= user_match,
 	.revoke			= user_revoke,
 	.destroy		= user_destroy,
 	.describe		= user_describe,
@@ -136,16 +134,6 @@ error:
 
 EXPORT_SYMBOL_GPL(user_update);
 
-/*
- * match users on their name
- */
-int user_match(const struct key *key, const struct key_match_data *match_data)
-{
-	return strcmp(key->description, match_data->raw_data) == 0;
-}
-
-EXPORT_SYMBOL_GPL(user_match);
-
 /*
  * dispose of the links from a revoked keyring
  * - called with the key sem write-locked
-- 
cgit v1.2.3


From 0c903ab64feb0fe83eac9f67a06e2f5b9508de16 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 16 Sep 2014 17:36:08 +0100
Subject: KEYS: Make the key matching functions return bool

Make the key matching functions pointed to by key_match_data::cmp return bool
rather than int.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Vivek Goyal <vgoyal@redhat.com>
---
 crypto/asymmetric_keys/asymmetric_type.c |  4 ++--
 include/linux/key-type.h                 | 10 ++++++----
 net/dns_resolver/dns_key.c               |  4 ++--
 security/keys/internal.h                 |  8 ++++----
 security/keys/keyring.c                  |  4 ++--
 security/keys/process_keys.c             |  4 ++--
 6 files changed, 18 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/crypto/asymmetric_keys/asymmetric_type.c b/crypto/asymmetric_keys/asymmetric_type.c
index 7c0498968975..7755f918e8d9 100644
--- a/crypto/asymmetric_keys/asymmetric_type.c
+++ b/crypto/asymmetric_keys/asymmetric_type.c
@@ -59,8 +59,8 @@ EXPORT_SYMBOL_GPL(asymmetric_keyid_match);
  *	"id:<id>"	- request a key matching the ID
  *	"<subtype>:<id>" - request a key of a subtype
  */
-static int asymmetric_key_cmp(const struct key *key,
-			      const struct key_match_data *match_data)
+static bool asymmetric_key_cmp(const struct key *key,
+			       const struct key_match_data *match_data)
 {
 	const struct asymmetric_key_subtype *subtype = asymmetric_key_subtype(key);
 	const char *description = match_data->raw_data;
diff --git a/include/linux/key-type.h b/include/linux/key-type.h
index c14816bd3b44..ff9f1d394235 100644
--- a/include/linux/key-type.h
+++ b/include/linux/key-type.h
@@ -56,10 +56,12 @@ typedef int (*request_key_actor_t)(struct key_construction *key,
  * Preparsed matching criterion.
  */
 struct key_match_data {
-	/* Comparison function, defaults to type->match, but can be replaced by
-	 * type->match_preparse(). */
-	int (*cmp)(const struct key *key,
-		   const struct key_match_data *match_data);
+	/* Comparison function, defaults to exact description match, but can be
+	 * overridden by type->match_preparse().  Should return true if a match
+	 * is found and false if not.
+	 */
+	bool (*cmp)(const struct key *key,
+		    const struct key_match_data *match_data);
 
 	const void	*raw_data;	/* Raw match data */
 	void		*preparsed;	/* For ->match_preparse() to stash stuff */
diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c
index a07b9ba7e0b7..31cd4fd75486 100644
--- a/net/dns_resolver/dns_key.c
+++ b/net/dns_resolver/dns_key.c
@@ -176,8 +176,8 @@ static void dns_resolver_free_preparse(struct key_preparsed_payload *prep)
  * The domain name may be a simple name or an absolute domain name (which
  * should end with a period).  The domain name is case-independent.
  */
-static int dns_resolver_cmp(const struct key *key,
-			    const struct key_match_data *match_data)
+static bool dns_resolver_cmp(const struct key *key,
+			     const struct key_match_data *match_data)
 {
 	int slen, dlen, ret = 0;
 	const char *src = key->description, *dsp = match_data->raw_data;
diff --git a/security/keys/internal.h b/security/keys/internal.h
index e66a16cb63e1..b8960c4959a5 100644
--- a/security/keys/internal.h
+++ b/security/keys/internal.h
@@ -127,8 +127,8 @@ struct keyring_search_context {
 	struct timespec		now;
 };
 
-extern int key_default_cmp(const struct key *key,
-			   const struct key_match_data *match_data);
+extern bool key_default_cmp(const struct key *key,
+			    const struct key_match_data *match_data);
 extern key_ref_t keyring_search_aux(key_ref_t keyring_ref,
 				    struct keyring_search_context *ctx);
 
@@ -150,8 +150,8 @@ extern struct key *request_key_and_link(struct key_type *type,
 					struct key *dest_keyring,
 					unsigned long flags);
 
-extern int lookup_user_key_possessed(const struct key *key,
-				     const struct key_match_data *match_data);
+extern bool lookup_user_key_possessed(const struct key *key,
+				      const struct key_match_data *match_data);
 extern key_ref_t lookup_user_key(key_serial_t id, unsigned long flags,
 				 key_perm_t perm);
 #define KEY_LOOKUP_CREATE	0x01
diff --git a/security/keys/keyring.c b/security/keys/keyring.c
index 253c9a0eb092..8177010174f7 100644
--- a/security/keys/keyring.c
+++ b/security/keys/keyring.c
@@ -513,8 +513,8 @@ EXPORT_SYMBOL(keyring_alloc);
 /*
  * By default, we keys found by getting an exact match on their descriptions.
  */
-int key_default_cmp(const struct key *key,
-		    const struct key_match_data *match_data)
+bool key_default_cmp(const struct key *key,
+		     const struct key_match_data *match_data)
 {
 	return strcmp(key->description, match_data->raw_data) == 0;
 }
diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c
index 08bd533d014f..bd536cb221e2 100644
--- a/security/keys/process_keys.c
+++ b/security/keys/process_keys.c
@@ -489,8 +489,8 @@ found:
 /*
  * See if the key we're looking at is the target key.
  */
-int lookup_user_key_possessed(const struct key *key,
-			      const struct key_match_data *match_data)
+bool lookup_user_key_possessed(const struct key *key,
+			       const struct key_match_data *match_data)
 {
 	return key == match_data->raw_data;
 }
-- 
cgit v1.2.3


From f4579fc57cf4244057b713b1f73f4dc9f0b11e97 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 25 Jul 2014 11:21:47 -0700
Subject: rcu: Fix attempt to avoid unsolicited offloading of callbacks

Commit b58cc46c5f6b (rcu: Don't offload callbacks unless specifically
requested) failed to adjust the callback lists of the CPUs that are
known to be no-CBs CPUs only because they are also nohz_full= CPUs.
This failure can result in callbacks that are posted during early boot
getting stranded on nxtlist for CPUs whose no-CBs property becomes
apparent late, and there can also be spurious warnings about offline
CPUs posting callbacks.

This commit fixes these problems by adding an early-boot rcu_init_nohz()
that properly initializes the no-CBs CPUs.

Note that kernels built with CONFIG_RCU_NOCB_CPU_ALL=y or with
CONFIG_RCU_NOCB_CPU=n do not exhibit this bug.  Neither do kernels
booted without the nohz_full= boot parameter.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Pranith Kumar <bobby.prani@gmail.com>
Tested-by: Paul Gortmaker <paul.gortmaker@windriver.com>
---
 include/linux/rcupdate.h |  8 +++++
 init/Kconfig             |  4 +--
 init/main.c              |  1 +
 kernel/rcu/tree_plugin.h | 92 ++++++++++++++++++++++++++++++++----------------
 4 files changed, 72 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index d231aa17b1d7..cc7bed1c90dc 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -269,6 +269,14 @@ static inline void rcu_user_hooks_switch(struct task_struct *prev,
 					 struct task_struct *next) { }
 #endif /* CONFIG_RCU_USER_QS */
 
+#ifdef CONFIG_RCU_NOCB_CPU
+void rcu_init_nohz(void);
+#else /* #ifdef CONFIG_RCU_NOCB_CPU */
+static inline void rcu_init_nohz(void)
+{
+}
+#endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */
+
 /**
  * RCU_NONIDLE - Indicate idle-loop code that needs RCU readers
  * @a: Code that RCU needs to pay attention to.
diff --git a/init/Kconfig b/init/Kconfig
index e84c6423a2e5..64ee4d967786 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -737,7 +737,7 @@ choice
 
 config RCU_NOCB_CPU_NONE
 	bool "No build_forced no-CBs CPUs"
-	depends on RCU_NOCB_CPU && !NO_HZ_FULL_ALL
+	depends on RCU_NOCB_CPU
 	help
 	  This option does not force any of the CPUs to be no-CBs CPUs.
 	  Only CPUs designated by the rcu_nocbs= boot parameter will be
@@ -751,7 +751,7 @@ config RCU_NOCB_CPU_NONE
 
 config RCU_NOCB_CPU_ZERO
 	bool "CPU 0 is a build_forced no-CBs CPU"
-	depends on RCU_NOCB_CPU && !NO_HZ_FULL_ALL
+	depends on RCU_NOCB_CPU
 	help
 	  This option forces CPU 0 to be a no-CBs CPU, so that its RCU
 	  callbacks are invoked by a per-CPU kthread whose name begins
diff --git a/init/main.c b/init/main.c
index bb1aed928f21..e3c4cdd94d5b 100644
--- a/init/main.c
+++ b/init/main.c
@@ -578,6 +578,7 @@ asmlinkage __visible void __init start_kernel(void)
 	idr_init_cache();
 	rcu_init();
 	tick_nohz_init();
+	rcu_init_nohz();
 	context_tracking_init();
 	radix_tree_init();
 	/* init some links before init_ISA_irqs() */
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index a7997e272564..06d077ccf8d5 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -85,33 +85,6 @@ static void __init rcu_bootup_announce_oddness(void)
 		pr_info("\tBoot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf);
 	if (nr_cpu_ids != NR_CPUS)
 		pr_info("\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids);
-#ifdef CONFIG_RCU_NOCB_CPU
-#ifndef CONFIG_RCU_NOCB_CPU_NONE
-	if (!have_rcu_nocb_mask) {
-		zalloc_cpumask_var(&rcu_nocb_mask, GFP_KERNEL);
-		have_rcu_nocb_mask = true;
-	}
-#ifdef CONFIG_RCU_NOCB_CPU_ZERO
-	pr_info("\tOffload RCU callbacks from CPU 0\n");
-	cpumask_set_cpu(0, rcu_nocb_mask);
-#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ZERO */
-#ifdef CONFIG_RCU_NOCB_CPU_ALL
-	pr_info("\tOffload RCU callbacks from all CPUs\n");
-	cpumask_copy(rcu_nocb_mask, cpu_possible_mask);
-#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ALL */
-#endif /* #ifndef CONFIG_RCU_NOCB_CPU_NONE */
-	if (have_rcu_nocb_mask) {
-		if (!cpumask_subset(rcu_nocb_mask, cpu_possible_mask)) {
-			pr_info("\tNote: kernel parameter 'rcu_nocbs=' contains nonexistent CPUs.\n");
-			cpumask_and(rcu_nocb_mask, cpu_possible_mask,
-				    rcu_nocb_mask);
-		}
-		cpulist_scnprintf(nocb_buf, sizeof(nocb_buf), rcu_nocb_mask);
-		pr_info("\tOffload RCU callbacks from CPUs: %s.\n", nocb_buf);
-		if (rcu_nocb_poll)
-			pr_info("\tPoll for callbacks from no-CBs CPUs.\n");
-	}
-#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
 }
 
 #ifdef CONFIG_TREE_PREEMPT_RCU
@@ -2451,6 +2424,67 @@ static void do_nocb_deferred_wakeup(struct rcu_data *rdp)
 	trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("DeferredWakeEmpty"));
 }
 
+void __init rcu_init_nohz(void)
+{
+	int cpu;
+	bool need_rcu_nocb_mask = true;
+	struct rcu_state *rsp;
+
+#ifdef CONFIG_RCU_NOCB_CPU_NONE
+	need_rcu_nocb_mask = false;
+#endif /* #ifndef CONFIG_RCU_NOCB_CPU_NONE */
+
+#if defined(CONFIG_NO_HZ_FULL)
+	if (tick_nohz_full_running && cpumask_weight(tick_nohz_full_mask))
+		need_rcu_nocb_mask = true;
+#endif /* #if defined(CONFIG_NO_HZ_FULL) */
+
+	if (!have_rcu_nocb_mask && need_rcu_nocb_mask) {
+		zalloc_cpumask_var(&rcu_nocb_mask, GFP_KERNEL);
+		have_rcu_nocb_mask = true;
+	}
+	if (!have_rcu_nocb_mask)
+		return;
+
+#ifdef CONFIG_RCU_NOCB_CPU_ZERO
+	pr_info("\tOffload RCU callbacks from CPU 0\n");
+	cpumask_set_cpu(0, rcu_nocb_mask);
+#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ZERO */
+#ifdef CONFIG_RCU_NOCB_CPU_ALL
+	pr_info("\tOffload RCU callbacks from all CPUs\n");
+	cpumask_copy(rcu_nocb_mask, cpu_possible_mask);
+#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ALL */
+#if defined(CONFIG_NO_HZ_FULL)
+	if (tick_nohz_full_running)
+		cpumask_or(rcu_nocb_mask, rcu_nocb_mask, tick_nohz_full_mask);
+#endif /* #if defined(CONFIG_NO_HZ_FULL) */
+
+	if (!cpumask_subset(rcu_nocb_mask, cpu_possible_mask)) {
+		pr_info("\tNote: kernel parameter 'rcu_nocbs=' contains nonexistent CPUs.\n");
+		cpumask_and(rcu_nocb_mask, cpu_possible_mask,
+			    rcu_nocb_mask);
+	}
+	cpulist_scnprintf(nocb_buf, sizeof(nocb_buf), rcu_nocb_mask);
+	pr_info("\tOffload RCU callbacks from CPUs: %s.\n", nocb_buf);
+	if (rcu_nocb_poll)
+		pr_info("\tPoll for callbacks from no-CBs CPUs.\n");
+
+	for_each_rcu_flavor(rsp) {
+		for_each_cpu(cpu, rcu_nocb_mask) {
+			struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
+
+			/*
+			 * If there are early callbacks, they will need
+			 * to be moved to the nocb lists.
+			 */
+			WARN_ON_ONCE(rdp->nxttail[RCU_NEXT_TAIL] !=
+				     &rdp->nxtlist &&
+				     rdp->nxttail[RCU_NEXT_TAIL] != NULL);
+			init_nocb_callback_list(rdp);
+		}
+	}
+}
+
 /* Initialize per-rcu_data variables for no-CBs CPUs. */
 static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
 {
@@ -2479,10 +2513,6 @@ static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp)
 
 	if (rcu_nocb_mask == NULL)
 		return;
-#if defined(CONFIG_NO_HZ_FULL) && !defined(CONFIG_NO_HZ_FULL_ALL)
-	if (tick_nohz_full_running)
-		cpumask_or(rcu_nocb_mask, rcu_nocb_mask, tick_nohz_full_mask);
-#endif /* #if defined(CONFIG_NO_HZ_FULL) && !defined(CONFIG_NO_HZ_FULL_ALL) */
 	if (ls == -1) {
 		ls = int_sqrt(nr_cpu_ids);
 		rcu_nocb_leader_stride = ls;
-- 
cgit v1.2.3


From d36a7a0d5e8b5bff1671723d733eb61621b0cee4 Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <dave@stgolabs.net>
Date: Thu, 11 Sep 2014 20:40:21 -0700
Subject: torture: Address race in module cleanup

When performing module cleanups by calling torture_cleanup() the
'torture_type' string in nullified However, callers are not necessarily
done, and might still need to reference the variable. This impacts
both rcutorture and locktorture, causing printing things like:

[   94.226618] (null)-torture: Stopping lock_torture_writer task
[   94.226624] (null)-torture: Stopping lock_torture_stats task

Thus delay this operation until the very end of the cleanup process.
The consequence (which shouldn't matter for this kid of program) is,
of course, that we delay the window between rmmod and modprobing,
for instance in module_torture_begin().

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/torture.h      |  3 ++-
 kernel/locking/locktorture.c |  3 ++-
 kernel/rcu/rcutorture.c      |  3 ++-
 kernel/torture.c             | 16 +++++++++++++---
 4 files changed, 19 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/torture.h b/include/linux/torture.h
index fec46f8c08eb..7759fc3c622d 100644
--- a/include/linux/torture.h
+++ b/include/linux/torture.h
@@ -77,7 +77,8 @@ int torture_stutter_init(int s);
 /* Initialization and cleanup. */
 bool torture_init_begin(char *ttype, bool v, int *runnable);
 void torture_init_end(void);
-bool torture_cleanup(void);
+bool torture_cleanup_begin(void);
+void torture_cleanup_end(void);
 bool torture_must_stop(void);
 bool torture_must_stop_irq(void);
 void torture_kthread_stopping(char *title);
diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
index de703a769c1d..988267cc92c1 100644
--- a/kernel/locking/locktorture.c
+++ b/kernel/locking/locktorture.c
@@ -361,7 +361,7 @@ static void lock_torture_cleanup(void)
 {
 	int i;
 
-	if (torture_cleanup())
+	if (torture_cleanup_begin())
 		return;
 
 	if (writer_tasks) {
@@ -384,6 +384,7 @@ static void lock_torture_cleanup(void)
 	else
 		lock_torture_print_module_parms(cur_ops,
 						"End of test: SUCCESS");
+	torture_cleanup_end();
 }
 
 static int __init lock_torture_init(void)
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 6d1509500d2b..04c4b5afb759 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -1514,7 +1514,7 @@ rcu_torture_cleanup(void)
 	int i;
 
 	rcutorture_record_test_transition();
-	if (torture_cleanup()) {
+	if (torture_cleanup_begin()) {
 		if (cur_ops->cb_barrier != NULL)
 			cur_ops->cb_barrier();
 		return;
@@ -1566,6 +1566,7 @@ rcu_torture_cleanup(void)
 					       "End of test: RCU_HOTPLUG");
 	else
 		rcu_torture_print_module_parms(cur_ops, "End of test: SUCCESS");
+	torture_cleanup_end();
 }
 
 #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
diff --git a/kernel/torture.c b/kernel/torture.c
index ede8b25ec1ae..dd70993c266c 100644
--- a/kernel/torture.c
+++ b/kernel/torture.c
@@ -633,8 +633,13 @@ EXPORT_SYMBOL_GPL(torture_init_end);
  *
  * This must be called before the caller starts shutting down its own
  * kthreads.
+ *
+ * Both torture_cleanup_begin() and torture_cleanup_end() must be paired,
+ * in order to correctly perform the cleanup. They are separated because
+ * threads can still need to reference the torture_type type, thus nullify
+ * only after completing all other relevant calls.
  */
-bool torture_cleanup(void)
+bool torture_cleanup_begin(void)
 {
 	mutex_lock(&fullstop_mutex);
 	if (ACCESS_ONCE(fullstop) == FULLSTOP_SHUTDOWN) {
@@ -649,12 +654,17 @@ bool torture_cleanup(void)
 	torture_shuffle_cleanup();
 	torture_stutter_cleanup();
 	torture_onoff_cleanup();
+	return false;
+}
+EXPORT_SYMBOL_GPL(torture_cleanup_begin);
+
+void torture_cleanup_end(void)
+{
 	mutex_lock(&fullstop_mutex);
 	torture_type = NULL;
 	mutex_unlock(&fullstop_mutex);
-	return false;
 }
-EXPORT_SYMBOL_GPL(torture_cleanup);
+EXPORT_SYMBOL_GPL(torture_cleanup_end);
 
 /*
  * Is it time for the current torture test to stop?
-- 
cgit v1.2.3


From 59da22a02032cf1a069ec431f93d403b321ff6b4 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 12 Sep 2014 10:36:15 -0700
Subject: rcutorture: Rename rcutorture_runnable parameter

This commit changes rcutorture_runnable to torture_runnable, which is
consistent with the names of the other parameters and is a bit shorter
as well.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 Documentation/kernel-parameters.txt                             | 2 +-
 include/linux/rcupdate.h                                        | 3 ---
 kernel/rcu/rcutorture.c                                         | 8 ++++----
 kernel/sysctl.c                                                 | 9 ---------
 tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh | 2 +-
 5 files changed, 6 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index e1147bc62633..7aba744afcde 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2938,7 +2938,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			Set time (s) between CPU-hotplug operations, or
 			zero to disable CPU-hotplug testing.
 
-	rcutorture.rcutorture_runnable= [BOOT]
+	rcutorture.torture_runnable= [BOOT]
 			Start rcutorture running at boot time.
 
 	rcutorture.shuffle_interval= [KNL]
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 5cafd60c1ee4..a4a819ffb2d1 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -47,9 +47,6 @@
 #include <asm/barrier.h>
 
 extern int rcu_expedited; /* for sysctl */
-#ifdef CONFIG_RCU_TORTURE_TEST
-extern int rcutorture_runnable; /* for sysctl */
-#endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
 
 enum rcutorture_type {
 	RCU_FLAVOR,
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 04c4b5afb759..240fa9094f83 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -168,9 +168,9 @@ static int rcu_torture_writer_state;
 #else
 #define RCUTORTURE_RUNNABLE_INIT 0
 #endif
-int rcutorture_runnable = RCUTORTURE_RUNNABLE_INIT;
-module_param(rcutorture_runnable, int, 0444);
-MODULE_PARM_DESC(rcutorture_runnable, "Start rcutorture at boot");
+static int torture_runnable = RCUTORTURE_RUNNABLE_INIT;
+module_param(torture_runnable, int, 0444);
+MODULE_PARM_DESC(torture_runnable, "Start rcutorture at boot");
 
 #if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU)
 #define rcu_can_boost() 1
@@ -1636,7 +1636,7 @@ rcu_torture_init(void)
 		RCUTORTURE_TASKS_OPS
 	};
 
-	if (!torture_init_begin(torture_type, verbose, &rcutorture_runnable))
+	if (!torture_init_begin(torture_type, verbose, &torture_runnable))
 		return -EBUSY;
 
 	/* Process args and tell the world that the torturer is on the job. */
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 75875a741b5e..ab456664609d 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1055,15 +1055,6 @@ static struct ctl_table kern_table[] = {
 		.child		= key_sysctls,
 	},
 #endif
-#ifdef CONFIG_RCU_TORTURE_TEST
-	{
-		.procname       = "rcutorture_runnable",
-		.data           = &rcutorture_runnable,
-		.maxlen         = sizeof(int),
-		.mode           = 0644,
-		.proc_handler	= proc_dointvec,
-	},
-#endif
 #ifdef CONFIG_PERF_EVENTS
 	/*
 	 * User-space scripts rely on the existence of this file
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh
index 8977d8d31b19..ffb85ed786fa 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh
+++ b/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh
@@ -51,7 +51,7 @@ per_version_boot_params () {
 		`rcutorture_param_n_barrier_cbs "$1"` \
 		rcutorture.stat_interval=15 \
 		rcutorture.shutdown_secs=$3 \
-		rcutorture.rcutorture_runnable=1 \
+		rcutorture.torture_runnable=1 \
 		rcutorture.test_no_idle_hz=1 \
 		rcutorture.verbose=1
 }
-- 
cgit v1.2.3


From ce6eacb07e287c0c3c8b5c316a9a7f9cecf69e95 Mon Sep 17 00:00:00 2001
From: Bruno Prémont <bonbons@linux-vserver.org>
Date: Sun, 24 Aug 2014 23:13:15 +0200
Subject: vgaarb: Drop obsolete #ifndef
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit 20cde694027e ("x86, ia64: Move EFI_FB vga_default_device()
initialization to pci_vga_fixup()") moved boot video device detection from
efifb to x86 and ia64 pci/fixup.c.

Remove the left-over #ifndef check that will always match since the
corresponding arch-specific define is gone with above patch.

Signed-off-by: Bruno Prémont <bonbons@linux-vserver.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
CC: Matthew Garrett <matthew.garrett@nebula.com>
---
 drivers/gpu/vga/vgaarb.c | 8 --------
 include/linux/vgaarb.h   | 2 --
 2 files changed, 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/vga/vgaarb.c b/drivers/gpu/vga/vgaarb.c
index 24ac52e6cd41..77711623b973 100644
--- a/drivers/gpu/vga/vgaarb.c
+++ b/drivers/gpu/vga/vgaarb.c
@@ -113,10 +113,8 @@ both:
 	return 1;
 }
 
-#ifndef __ARCH_HAS_VGA_DEFAULT_DEVICE
 /* this is only used a cookie - it should not be dereferenced */
 static struct pci_dev *vga_default;
-#endif
 
 static void vga_arb_device_card_gone(struct pci_dev *pdev);
 
@@ -132,7 +130,6 @@ static struct vga_device *vgadev_find(struct pci_dev *pdev)
 }
 
 /* Returns the default VGA device (vgacon's babe) */
-#ifndef __ARCH_HAS_VGA_DEFAULT_DEVICE
 struct pci_dev *vga_default_device(void)
 {
 	return vga_default;
@@ -148,7 +145,6 @@ void vga_set_default_device(struct pci_dev *pdev)
 	pci_dev_put(vga_default);
 	vga_default = pci_dev_get(pdev);
 }
-#endif
 
 static inline void vga_irq_set_state(struct vga_device *vgadev, bool state)
 {
@@ -584,14 +580,12 @@ static bool vga_arbiter_add_pci_device(struct pci_dev *pdev)
 	/* Deal with VGA default device. Use first enabled one
 	 * by default if arch doesn't have it's own hook
 	 */
-#ifndef __ARCH_HAS_VGA_DEFAULT_DEVICE
 	if (vga_default == NULL &&
 	    ((vgadev->owns & VGA_RSRC_LEGACY_MASK) == VGA_RSRC_LEGACY_MASK)) {
 		pr_info("vgaarb: setting as boot device: PCI:%s\n",
 			pci_name(pdev));
 		vga_set_default_device(pdev);
 	}
-#endif
 
 	vga_arbiter_check_bridge_sharing(vgadev);
 
@@ -625,10 +619,8 @@ static bool vga_arbiter_del_pci_device(struct pci_dev *pdev)
 		goto bail;
 	}
 
-#ifndef __ARCH_HAS_VGA_DEFAULT_DEVICE
 	if (vga_default == pdev)
 		vga_set_default_device(NULL);
-#endif
 
 	if (vgadev->decodes & (VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM))
 		vga_decode_count--;
diff --git a/include/linux/vgaarb.h b/include/linux/vgaarb.h
index 2c02f3a8d2ba..c37bd4d06739 100644
--- a/include/linux/vgaarb.h
+++ b/include/linux/vgaarb.h
@@ -182,7 +182,6 @@ extern void vga_put(struct pci_dev *pdev, unsigned int rsrc);
  *     vga_get()...
  */
 
-#ifndef __ARCH_HAS_VGA_DEFAULT_DEVICE
 #ifdef CONFIG_VGA_ARB
 extern struct pci_dev *vga_default_device(void);
 extern void vga_set_default_device(struct pci_dev *pdev);
@@ -190,7 +189,6 @@ extern void vga_set_default_device(struct pci_dev *pdev);
 static inline struct pci_dev *vga_default_device(void) { return NULL; };
 static inline void vga_set_default_device(struct pci_dev *pdev) { };
 #endif
-#endif
 
 /**
  *     vga_conflicts
-- 
cgit v1.2.3


From eaacabc0d9b637c82788c66955b4ba0efebd5500 Mon Sep 17 00:00:00 2001
From: Felipe Balbi <balbi@ti.com>
Date: Mon, 15 Sep 2014 16:15:01 -0500
Subject: irqchip: add irq-omap-intc.h header

OMAP INTC irqchip driver will be moved under
drivers/irqchip/ soon but we still have a dependency
with mach-omap2 when it comes to idle functions.

In order to make it easy to share those function
prototypes with OMAP PM code, we introduce this new
header.

To avoid modifying several board-files and some of
the PM-related code, we just include the new header
from common.h which was already included by all
users of IRQ-related PM code.

Signed-off-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/common.h          | 10 +---------
 include/linux/irqchip/irq-omap-intc.h | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 33 insertions(+), 9 deletions(-)
 create mode 100644 include/linux/irqchip/irq-omap-intc.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/common.h b/arch/arm/mach-omap2/common.h
index 180009343adb..377eea849e7b 100644
--- a/arch/arm/mach-omap2/common.h
+++ b/arch/arm/mach-omap2/common.h
@@ -32,6 +32,7 @@
 #include <linux/i2c/twl.h>
 #include <linux/i2c-omap.h>
 #include <linux/reboot.h>
+#include <linux/irqchip/irq-omap-intc.h>
 
 #include <asm/proc-fns.h>
 
@@ -210,15 +211,6 @@ extern struct device *omap2_get_iva_device(void);
 extern struct device *omap2_get_l3_device(void);
 extern struct device *omap4_get_dsp_device(void);
 
-void omap2_init_irq(void);
-void omap3_init_irq(void);
-void ti81xx_init_irq(void);
-extern int omap_irq_pending(void);
-void omap_intc_save_context(void);
-void omap_intc_restore_context(void);
-void omap3_intc_suspend(void);
-void omap3_intc_prepare_idle(void);
-void omap3_intc_resume_idle(void);
 void omap_gic_of_init(void);
 
 #ifdef CONFIG_CACHE_L2X0
diff --git a/include/linux/irqchip/irq-omap-intc.h b/include/linux/irqchip/irq-omap-intc.h
new file mode 100644
index 000000000000..e06b370cfc0d
--- /dev/null
+++ b/include/linux/irqchip/irq-omap-intc.h
@@ -0,0 +1,32 @@
+/**
+ * irq-omap-intc.h - INTC Idle Functions
+ *
+ * Copyright (C) 2014 Texas Instruments Incorporated - http://www.ti.com
+ *
+ * Author: Felipe Balbi <balbi@ti.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2  of
+ * the License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __INCLUDE_LINUX_IRQCHIP_IRQ_OMAP_INTC_H
+#define __INCLUDE_LINUX_IRQCHIP_IRQ_OMAP_INTC_H
+
+void omap2_init_irq(void);
+void omap3_init_irq(void);
+void ti81xx_init_irq(void);
+
+int omap_irq_pending(void);
+void omap_intc_save_context(void);
+void omap_intc_restore_context(void);
+void omap3_intc_suspend(void);
+void omap3_intc_prepare_idle(void);
+void omap3_intc_resume_idle(void);
+
+#endif /* __INCLUDE_LINUX_IRQCHIP_IRQ_OMAP_INTC_H */
-- 
cgit v1.2.3


From ce0529843a505d09f5809a7db6288d2f038f64c4 Mon Sep 17 00:00:00 2001
From: Ethan Zhao <ethan.zhao@oracle.com>
Date: Tue, 9 Sep 2014 10:21:25 +0800
Subject: PCI: Add device flag helper functions

Add helper functions to hide direct device flag operations:

    void pci_set_dev_assigned(struct pci_dev *dev);
    void pci_clear_dev_assigned(struct pci_dev *dev);
    bool pci_is_dev_assigned(struct pci_dev *dev);

Signed-off-by: Ethan Zhao <ethan.zhao@oracle.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 include/linux/pci.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 61978a460841..92c131efec1c 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1839,4 +1839,17 @@ int pci_for_each_dma_alias(struct pci_dev *pdev,
  */
 struct pci_dev *pci_find_upstream_pcie_bridge(struct pci_dev *pdev);
 
+/* helper functions for operation of device flag */
+static inline void pci_set_dev_assigned(struct pci_dev *pdev)
+{
+	pdev->dev_flags |= PCI_DEV_FLAGS_ASSIGNED;
+}
+static inline void pci_clear_dev_assigned(struct pci_dev *pdev)
+{
+	pdev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED;
+}
+static inline bool pci_is_dev_assigned(struct pci_dev *pdev)
+{
+	return (pdev->dev_flags & PCI_DEV_FLAGS_ASSIGNED) == PCI_DEV_FLAGS_ASSIGNED;
+}
 #endif /* LINUX_PCI_H */
-- 
cgit v1.2.3


From e0d1b6b77ced59d852d38fcf9a8a0a1c40c84cee Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Tue, 5 Aug 2014 14:08:55 +0200
Subject: PCI/AER: Make <linux/aer.h> standalone includable

The header file references u16 and u32 types, but they are not defined in
the header nor does the header pull in the necessary includes for them.
This causes build breakage when the file is included without any of the
dependencies being satisfied from somewhere else.

Fix this by including linux/types.h (for u16 and u32).

[bhelgaas: removed pci_dev declaration (already added by 5ccb8225abf2)]
Signed-off-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 include/linux/aer.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/aer.h b/include/linux/aer.h
index c826d1c28f9c..4fef65e57023 100644
--- a/include/linux/aer.h
+++ b/include/linux/aer.h
@@ -7,6 +7,8 @@
 #ifndef _AER_H_
 #define _AER_H_
 
+#include <linux/types.h>
+
 #define AER_NONFATAL			0
 #define AER_FATAL			1
 #define AER_CORRECTABLE			2
-- 
cgit v1.2.3


From 63ddc0b8fe5ebbac88e2ac84b489470bf3a22965 Mon Sep 17 00:00:00 2001
From: Megan Kamiya <megan.a.kamiya@intel.com>
Date: Fri, 5 Sep 2014 20:19:10 -0700
Subject: PCI: Parenthesize PCI_DEVID and PCI_VPD_LRDT_ID parameters

Add parentheses around parameters in PCI_DEVID and PCI_VPD_LRDT_ID macros
to prevent possible expansion errors as described by the CERT Secure Coding
Standard: PRE01-C: Use parentheses within macros around parameter names

Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Megan Kamiya <megan.a.kamiya@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 include/linux/pci.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 61978a460841..cb744f3925a4 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -45,7 +45,7 @@
  * In the interest of not exposing interfaces to user-space unnecessarily,
  * the following kernel-only defines are being added here.
  */
-#define PCI_DEVID(bus, devfn)  ((((u16)bus) << 8) | devfn)
+#define PCI_DEVID(bus, devfn)  ((((u16)(bus)) << 8) | (devfn))
 /* return bus from PCI devid = ((u16)bus_number) << 8) | devfn */
 #define PCI_BUS_NUM(x) (((x) >> 8) & 0xff)
 
@@ -1701,7 +1701,7 @@ bool pci_acs_path_enabled(struct pci_dev *start,
 			  struct pci_dev *end, u16 acs_flags);
 
 #define PCI_VPD_LRDT			0x80	/* Large Resource Data Type */
-#define PCI_VPD_LRDT_ID(x)		(x | PCI_VPD_LRDT)
+#define PCI_VPD_LRDT_ID(x)		((x) | PCI_VPD_LRDT)
 
 /* Large Resource Data Type Tag Item Names */
 #define PCI_VPD_LTIN_ID_STRING		0x02	/* Identifier String */
-- 
cgit v1.2.3


From fef775caa705255358cdf7bbaf9bbc2fd1111761 Mon Sep 17 00:00:00 2001
From: Ezequiel García <ezequiel@vanguardiasur.com.ar>
Date: Thu, 11 Sep 2014 12:02:08 -0300
Subject: nand: omap2: Add support for flash-based bad block table

This commit adds a new platform-data boolean property that enables use
of a flash-based bad block table. This can also be enabled by setting
the 'nand-on-flash-bbt' devicetree property.

If the flash BBT is not enabled, the driver falls back to use OOB
bad block markers only, as before. If the flash BBT is enabled the
kernel will keep track of bad blocks using a BBT, in addition to
the OOB markers.

As explained by Brian Norris the reasons for using a BBT are:

""
The primary reason would be that NAND datasheets specify it these days.
A better argument is that nobody guarantees that you can write a
bad block marker to a worn out block; you may just get program failures.

This has been acknowledged by several developers over the last several
years.

Additionally, you get a boot-time performance improvement if you only
have to read a few pages, instead of a page or two from every block on
the flash.
""

Signed-off-by: Ezequiel Garcia <ezequiel@vanguardiasur.com.ar>
Acked-by: Roger Quadros <rogerq@ti.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 arch/arm/mach-omap2/gpmc.c                   | 2 ++
 drivers/mtd/nand/omap2.c                     | 6 +++++-
 include/linux/platform_data/mtd-nand-omap2.h | 1 +
 3 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/gpmc.c b/arch/arm/mach-omap2/gpmc.c
index 2f97228f188a..b55a225387cd 100644
--- a/arch/arm/mach-omap2/gpmc.c
+++ b/arch/arm/mach-omap2/gpmc.c
@@ -1440,6 +1440,8 @@ static int gpmc_probe_nand_child(struct platform_device *pdev,
 				break;
 			}
 
+	gpmc_nand_data->flash_bbt = of_get_nand_on_flash_bbt(child);
+
 	val = of_get_nand_bus_width(child);
 	if (val == 16)
 		gpmc_nand_data->devsize = NAND_BUSWIDTH_16;
diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c
index 5967b385141b..e1a9b310c159 100644
--- a/drivers/mtd/nand/omap2.c
+++ b/drivers/mtd/nand/omap2.c
@@ -1663,7 +1663,6 @@ static int omap_nand_probe(struct platform_device *pdev)
 	mtd->owner		= THIS_MODULE;
 	nand_chip		= &info->nand;
 	nand_chip->ecc.priv	= NULL;
-	nand_chip->options	|= NAND_SKIP_BBTSCAN;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	nand_chip->IO_ADDR_R = devm_ioremap_resource(&pdev->dev, res);
@@ -1692,6 +1691,11 @@ static int omap_nand_probe(struct platform_device *pdev)
 		nand_chip->chip_delay = 50;
 	}
 
+	if (pdata->flash_bbt)
+		nand_chip->bbt_options |= NAND_BBT_USE_FLASH | NAND_BBT_NO_OOB;
+	else
+		nand_chip->options |= NAND_SKIP_BBTSCAN;
+
 	/* scan NAND device connected to chip controller */
 	nand_chip->options |= pdata->devsize & NAND_BUSWIDTH_16;
 	if (nand_scan_ident(mtd, 1, NULL)) {
diff --git a/include/linux/platform_data/mtd-nand-omap2.h b/include/linux/platform_data/mtd-nand-omap2.h
index 16ec262dfcc8..090bbab0130a 100644
--- a/include/linux/platform_data/mtd-nand-omap2.h
+++ b/include/linux/platform_data/mtd-nand-omap2.h
@@ -71,6 +71,7 @@ struct omap_nand_platform_data {
 	struct mtd_partition	*parts;
 	int			nr_parts;
 	bool			dev_ready;
+	bool			flash_bbt;
 	enum nand_io		xfer_type;
 	int			devsize;
 	enum omap_ecc           ecc_opt;
-- 
cgit v1.2.3


From d60eacb07053142bfb9b41582074a89a790a9d46 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 2 Sep 2014 10:27:33 +0100
Subject: KVM: device: add simple registration mechanism for kvm_device_ops

kvm_ioctl_create_device currently has knowledge of all the device types
and their associated ops. This is fairly inflexible when adding support
for new in-kernel device emulations, so move what we currently have out
into a table, which can support dynamic registration of ops by new
drivers for virtual hardware.

Cc: Alex Williamson <Alex.Williamson@redhat.com>
Cc: Alex Graf <agraf@suse.de>
Cc: Gleb Natapov <gleb@kernel.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h |  1 +
 include/uapi/linux/kvm.h | 22 +++++++++++-----
 virt/kvm/kvm_main.c      | 65 ++++++++++++++++++++++++++++--------------------
 3 files changed, 55 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index e098dce179df..b6e954742951 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1064,6 +1064,7 @@ struct kvm_device_ops {
 void kvm_device_get(struct kvm_device *dev);
 void kvm_device_put(struct kvm_device *dev);
 struct kvm_device *kvm_device_from_filp(struct file *filp);
+int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type);
 
 extern struct kvm_device_ops kvm_mpic_ops;
 extern struct kvm_device_ops kvm_xics_ops;
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 0695a1e3e332..60768822b140 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -943,15 +943,25 @@ struct kvm_device_attr {
 	__u64	addr;		/* userspace address of attr data */
 };
 
-#define KVM_DEV_TYPE_FSL_MPIC_20	1
-#define KVM_DEV_TYPE_FSL_MPIC_42	2
-#define KVM_DEV_TYPE_XICS		3
-#define KVM_DEV_TYPE_VFIO		4
 #define  KVM_DEV_VFIO_GROUP			1
 #define   KVM_DEV_VFIO_GROUP_ADD			1
 #define   KVM_DEV_VFIO_GROUP_DEL			2
-#define KVM_DEV_TYPE_ARM_VGIC_V2	5
-#define KVM_DEV_TYPE_FLIC		6
+
+enum kvm_device_type {
+	KVM_DEV_TYPE_FSL_MPIC_20	= 1,
+#define KVM_DEV_TYPE_FSL_MPIC_20	KVM_DEV_TYPE_FSL_MPIC_20
+	KVM_DEV_TYPE_FSL_MPIC_42,
+#define KVM_DEV_TYPE_FSL_MPIC_42	KVM_DEV_TYPE_FSL_MPIC_42
+	KVM_DEV_TYPE_XICS,
+#define KVM_DEV_TYPE_XICS		KVM_DEV_TYPE_XICS
+	KVM_DEV_TYPE_VFIO,
+#define KVM_DEV_TYPE_VFIO		KVM_DEV_TYPE_VFIO
+	KVM_DEV_TYPE_ARM_VGIC_V2,
+#define KVM_DEV_TYPE_ARM_VGIC_V2	KVM_DEV_TYPE_ARM_VGIC_V2
+	KVM_DEV_TYPE_FLIC,
+#define KVM_DEV_TYPE_FLIC		KVM_DEV_TYPE_FLIC
+	KVM_DEV_TYPE_MAX,
+};
 
 /*
  * ioctls for VM fds
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index c338599804e0..686d783387a0 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2272,44 +2272,55 @@ struct kvm_device *kvm_device_from_filp(struct file *filp)
 	return filp->private_data;
 }
 
-static int kvm_ioctl_create_device(struct kvm *kvm,
-				   struct kvm_create_device *cd)
-{
-	struct kvm_device_ops *ops = NULL;
-	struct kvm_device *dev;
-	bool test = cd->flags & KVM_CREATE_DEVICE_TEST;
-	int ret;
-
-	switch (cd->type) {
+static struct kvm_device_ops *kvm_device_ops_table[KVM_DEV_TYPE_MAX] = {
 #ifdef CONFIG_KVM_MPIC
-	case KVM_DEV_TYPE_FSL_MPIC_20:
-	case KVM_DEV_TYPE_FSL_MPIC_42:
-		ops = &kvm_mpic_ops;
-		break;
+	[KVM_DEV_TYPE_FSL_MPIC_20]	= &kvm_mpic_ops,
+	[KVM_DEV_TYPE_FSL_MPIC_42]	= &kvm_mpic_ops,
 #endif
+
 #ifdef CONFIG_KVM_XICS
-	case KVM_DEV_TYPE_XICS:
-		ops = &kvm_xics_ops;
-		break;
+	[KVM_DEV_TYPE_XICS]		= &kvm_xics_ops,
 #endif
+
 #ifdef CONFIG_KVM_VFIO
-	case KVM_DEV_TYPE_VFIO:
-		ops = &kvm_vfio_ops;
-		break;
+	[KVM_DEV_TYPE_VFIO]		= &kvm_vfio_ops,
 #endif
+
 #ifdef CONFIG_KVM_ARM_VGIC
-	case KVM_DEV_TYPE_ARM_VGIC_V2:
-		ops = &kvm_arm_vgic_v2_ops;
-		break;
+	[KVM_DEV_TYPE_ARM_VGIC_V2]	= &kvm_arm_vgic_v2_ops,
 #endif
+
 #ifdef CONFIG_S390
-	case KVM_DEV_TYPE_FLIC:
-		ops = &kvm_flic_ops;
-		break;
+	[KVM_DEV_TYPE_FLIC]		= &kvm_flic_ops,
 #endif
-	default:
+};
+
+int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type)
+{
+	if (type >= ARRAY_SIZE(kvm_device_ops_table))
+		return -ENOSPC;
+
+	if (kvm_device_ops_table[type] != NULL)
+		return -EEXIST;
+
+	kvm_device_ops_table[type] = ops;
+	return 0;
+}
+
+static int kvm_ioctl_create_device(struct kvm *kvm,
+				   struct kvm_create_device *cd)
+{
+	struct kvm_device_ops *ops = NULL;
+	struct kvm_device *dev;
+	bool test = cd->flags & KVM_CREATE_DEVICE_TEST;
+	int ret;
+
+	if (cd->type >= ARRAY_SIZE(kvm_device_ops_table))
+		return -ENODEV;
+
+	ops = kvm_device_ops_table[cd->type];
+	if (ops == NULL)
 		return -ENODEV;
-	}
 
 	if (test)
 		return 0;
-- 
cgit v1.2.3


From c06a841bf36340e9e917ce60d11a6425ac85d0bd Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 2 Sep 2014 10:27:34 +0100
Subject: KVM: ARM: vgic: register kvm_device_ops dynamically

Now that we have a dynamic means to register kvm_device_ops, use that
for the ARM VGIC, instead of relying on the static table.

Cc: Gleb Natapov <gleb@kernel.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h |   1 -
 virt/kvm/arm/vgic.c      | 157 ++++++++++++++++++++++++-----------------------
 virt/kvm/kvm_main.c      |   4 --
 3 files changed, 79 insertions(+), 83 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index b6e954742951..601d321f96e7 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1069,7 +1069,6 @@ int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type);
 extern struct kvm_device_ops kvm_mpic_ops;
 extern struct kvm_device_ops kvm_xics_ops;
 extern struct kvm_device_ops kvm_vfio_ops;
-extern struct kvm_device_ops kvm_arm_vgic_v2_ops;
 extern struct kvm_device_ops kvm_flic_ops;
 
 #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 73eba793b17f..3ee3ce06bbec 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1522,83 +1522,6 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
-static void vgic_init_maintenance_interrupt(void *info)
-{
-	enable_percpu_irq(vgic->maint_irq, 0);
-}
-
-static int vgic_cpu_notify(struct notifier_block *self,
-			   unsigned long action, void *cpu)
-{
-	switch (action) {
-	case CPU_STARTING:
-	case CPU_STARTING_FROZEN:
-		vgic_init_maintenance_interrupt(NULL);
-		break;
-	case CPU_DYING:
-	case CPU_DYING_FROZEN:
-		disable_percpu_irq(vgic->maint_irq);
-		break;
-	}
-
-	return NOTIFY_OK;
-}
-
-static struct notifier_block vgic_cpu_nb = {
-	.notifier_call = vgic_cpu_notify,
-};
-
-static const struct of_device_id vgic_ids[] = {
-	{ .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, },
-	{ .compatible = "arm,gic-v3", .data = vgic_v3_probe, },
-	{},
-};
-
-int kvm_vgic_hyp_init(void)
-{
-	const struct of_device_id *matched_id;
-	int (*vgic_probe)(struct device_node *,const struct vgic_ops **,
-			  const struct vgic_params **);
-	struct device_node *vgic_node;
-	int ret;
-
-	vgic_node = of_find_matching_node_and_match(NULL,
-						    vgic_ids, &matched_id);
-	if (!vgic_node) {
-		kvm_err("error: no compatible GIC node found\n");
-		return -ENODEV;
-	}
-
-	vgic_probe = matched_id->data;
-	ret = vgic_probe(vgic_node, &vgic_ops, &vgic);
-	if (ret)
-		return ret;
-
-	ret = request_percpu_irq(vgic->maint_irq, vgic_maintenance_handler,
-				 "vgic", kvm_get_running_vcpus());
-	if (ret) {
-		kvm_err("Cannot register interrupt %d\n", vgic->maint_irq);
-		return ret;
-	}
-
-	ret = __register_cpu_notifier(&vgic_cpu_nb);
-	if (ret) {
-		kvm_err("Cannot register vgic CPU notifier\n");
-		goto out_free_irq;
-	}
-
-	/* Callback into for arch code for setup */
-	vgic_arch_setup(vgic);
-
-	on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1);
-
-	return 0;
-
-out_free_irq:
-	free_percpu_irq(vgic->maint_irq, kvm_get_running_vcpus());
-	return ret;
-}
-
 /**
  * kvm_vgic_init - Initialize global VGIC state before running any VCPUs
  * @kvm: pointer to the kvm struct
@@ -2062,7 +1985,7 @@ static int vgic_create(struct kvm_device *dev, u32 type)
 	return kvm_vgic_create(dev->kvm);
 }
 
-struct kvm_device_ops kvm_arm_vgic_v2_ops = {
+static struct kvm_device_ops kvm_arm_vgic_v2_ops = {
 	.name = "kvm-arm-vgic",
 	.create = vgic_create,
 	.destroy = vgic_destroy,
@@ -2070,3 +1993,81 @@ struct kvm_device_ops kvm_arm_vgic_v2_ops = {
 	.get_attr = vgic_get_attr,
 	.has_attr = vgic_has_attr,
 };
+
+static void vgic_init_maintenance_interrupt(void *info)
+{
+	enable_percpu_irq(vgic->maint_irq, 0);
+}
+
+static int vgic_cpu_notify(struct notifier_block *self,
+			   unsigned long action, void *cpu)
+{
+	switch (action) {
+	case CPU_STARTING:
+	case CPU_STARTING_FROZEN:
+		vgic_init_maintenance_interrupt(NULL);
+		break;
+	case CPU_DYING:
+	case CPU_DYING_FROZEN:
+		disable_percpu_irq(vgic->maint_irq);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block vgic_cpu_nb = {
+	.notifier_call = vgic_cpu_notify,
+};
+
+static const struct of_device_id vgic_ids[] = {
+	{ .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, },
+	{ .compatible = "arm,gic-v3", .data = vgic_v3_probe, },
+	{},
+};
+
+int kvm_vgic_hyp_init(void)
+{
+	const struct of_device_id *matched_id;
+	int (*vgic_probe)(struct device_node *,const struct vgic_ops **,
+			  const struct vgic_params **);
+	struct device_node *vgic_node;
+	int ret;
+
+	vgic_node = of_find_matching_node_and_match(NULL,
+						    vgic_ids, &matched_id);
+	if (!vgic_node) {
+		kvm_err("error: no compatible GIC node found\n");
+		return -ENODEV;
+	}
+
+	vgic_probe = matched_id->data;
+	ret = vgic_probe(vgic_node, &vgic_ops, &vgic);
+	if (ret)
+		return ret;
+
+	ret = request_percpu_irq(vgic->maint_irq, vgic_maintenance_handler,
+				 "vgic", kvm_get_running_vcpus());
+	if (ret) {
+		kvm_err("Cannot register interrupt %d\n", vgic->maint_irq);
+		return ret;
+	}
+
+	ret = __register_cpu_notifier(&vgic_cpu_nb);
+	if (ret) {
+		kvm_err("Cannot register vgic CPU notifier\n");
+		goto out_free_irq;
+	}
+
+	/* Callback into for arch code for setup */
+	vgic_arch_setup(vgic);
+
+	on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1);
+
+	return kvm_register_device_ops(&kvm_arm_vgic_v2_ops,
+				       KVM_DEV_TYPE_ARM_VGIC_V2);
+
+out_free_irq:
+	free_percpu_irq(vgic->maint_irq, kvm_get_running_vcpus());
+	return ret;
+}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 686d783387a0..68d96f5dbfe2 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2286,10 +2286,6 @@ static struct kvm_device_ops *kvm_device_ops_table[KVM_DEV_TYPE_MAX] = {
 	[KVM_DEV_TYPE_VFIO]		= &kvm_vfio_ops,
 #endif
 
-#ifdef CONFIG_KVM_ARM_VGIC
-	[KVM_DEV_TYPE_ARM_VGIC_V2]	= &kvm_arm_vgic_v2_ops,
-#endif
-
 #ifdef CONFIG_S390
 	[KVM_DEV_TYPE_FLIC]		= &kvm_flic_ops,
 #endif
-- 
cgit v1.2.3


From 84877d93336de21a6251db00b841468a83c65906 Mon Sep 17 00:00:00 2001
From: Cornelia Huck <cornelia.huck@de.ibm.com>
Date: Tue, 2 Sep 2014 10:27:35 +0100
Subject: KVM: s390: register flic ops dynamically

Using the new kvm_register_device_ops() interface makes us get rid of
an #ifdef in common code.

Cc: Gleb Natapov <gleb@kernel.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/s390/kvm/kvm-s390.c | 3 ++-
 arch/s390/kvm/kvm-s390.h | 1 +
 include/linux/kvm_host.h | 1 -
 virt/kvm/kvm_main.c      | 4 ----
 4 files changed, 3 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index b95d4a481b0c..56a411c0245a 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -122,7 +122,8 @@ void kvm_arch_hardware_unsetup(void)
 
 int kvm_arch_init(void *opaque)
 {
-	return 0;
+	/* Register floating interrupt controller interface. */
+	return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 }
 
 /* Section: device related */
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index b1a77669137b..244d02303182 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -227,6 +227,7 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
 int psw_extint_disabled(struct kvm_vcpu *vcpu);
 void kvm_s390_destroy_adapters(struct kvm *kvm);
 int kvm_s390_si_ext_call_pending(struct kvm_vcpu *vcpu);
+extern struct kvm_device_ops kvm_flic_ops;
 
 /* implemented in guestdbg.c */
 void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 601d321f96e7..7ef088bec715 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1069,7 +1069,6 @@ int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type);
 extern struct kvm_device_ops kvm_mpic_ops;
 extern struct kvm_device_ops kvm_xics_ops;
 extern struct kvm_device_ops kvm_vfio_ops;
-extern struct kvm_device_ops kvm_flic_ops;
 
 #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 68d96f5dbfe2..f4e792fedb4f 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2285,10 +2285,6 @@ static struct kvm_device_ops *kvm_device_ops_table[KVM_DEV_TYPE_MAX] = {
 #ifdef CONFIG_KVM_VFIO
 	[KVM_DEV_TYPE_VFIO]		= &kvm_vfio_ops,
 #endif
-
-#ifdef CONFIG_S390
-	[KVM_DEV_TYPE_FLIC]		= &kvm_flic_ops,
-#endif
 };
 
 int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type)
-- 
cgit v1.2.3


From 80ce1639727e9d38729c34f162378508c307ca25 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 2 Sep 2014 10:27:36 +0100
Subject: KVM: VFIO: register kvm_device_ops dynamically

Now that we have a dynamic means to register kvm_device_ops, use that
for the VFIO kvm device, instead of relying on the static table.

This is achieved by a module_init call to register the ops with KVM.

Cc: Gleb Natapov <gleb@kernel.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Acked-by: Alex Williamson <Alex.Williamson@redhat.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h |  1 -
 virt/kvm/kvm_main.c      |  4 ----
 virt/kvm/vfio.c          | 22 +++++++++++++++-------
 3 files changed, 15 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 7ef088bec715..d44a2d640551 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1068,7 +1068,6 @@ int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type);
 
 extern struct kvm_device_ops kvm_mpic_ops;
 extern struct kvm_device_ops kvm_xics_ops;
-extern struct kvm_device_ops kvm_vfio_ops;
 
 #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index f4e792fedb4f..db57363cc287 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2281,10 +2281,6 @@ static struct kvm_device_ops *kvm_device_ops_table[KVM_DEV_TYPE_MAX] = {
 #ifdef CONFIG_KVM_XICS
 	[KVM_DEV_TYPE_XICS]		= &kvm_xics_ops,
 #endif
-
-#ifdef CONFIG_KVM_VFIO
-	[KVM_DEV_TYPE_VFIO]		= &kvm_vfio_ops,
-#endif
 };
 
 int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type)
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
index ba1a93f935c7..bb11b36ee8a2 100644
--- a/virt/kvm/vfio.c
+++ b/virt/kvm/vfio.c
@@ -246,6 +246,16 @@ static void kvm_vfio_destroy(struct kvm_device *dev)
 	kfree(dev); /* alloc by kvm_ioctl_create_device, free by .destroy */
 }
 
+static int kvm_vfio_create(struct kvm_device *dev, u32 type);
+
+static struct kvm_device_ops kvm_vfio_ops = {
+	.name = "kvm-vfio",
+	.create = kvm_vfio_create,
+	.destroy = kvm_vfio_destroy,
+	.set_attr = kvm_vfio_set_attr,
+	.has_attr = kvm_vfio_has_attr,
+};
+
 static int kvm_vfio_create(struct kvm_device *dev, u32 type)
 {
 	struct kvm_device *tmp;
@@ -268,10 +278,8 @@ static int kvm_vfio_create(struct kvm_device *dev, u32 type)
 	return 0;
 }
 
-struct kvm_device_ops kvm_vfio_ops = {
-	.name = "kvm-vfio",
-	.create = kvm_vfio_create,
-	.destroy = kvm_vfio_destroy,
-	.set_attr = kvm_vfio_set_attr,
-	.has_attr = kvm_vfio_has_attr,
-};
+static int __init kvm_vfio_ops_init(void)
+{
+	return kvm_register_device_ops(&kvm_vfio_ops, KVM_DEV_TYPE_VFIO);
+}
+module_init(kvm_vfio_ops_init);
-- 
cgit v1.2.3


From f7790029655f79cdcee4fa7c7884e0c2795ebebe Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@primarydata.com>
Date: Fri, 12 Sep 2014 16:40:20 -0400
Subject: lockd: move lockd's grace period handling into its own module

Currently, all of the grace period handling is part of lockd. Eventually
though we'd like to be able to build v4-only servers, at which point
we'll need to put all of this elsewhere.

Move the code itself into fs/nfs_common and have it build a grace.ko
module. Then, rejigger the Kconfig options so that both nfsd and lockd
enable it automatically.

Signed-off-by: Jeff Layton <jlayton@primarydata.com>
---
 fs/Kconfig              |   6 ++-
 fs/lockd/Makefile       |   2 +-
 fs/lockd/grace.c        |  65 ----------------------------
 fs/lockd/netns.h        |   1 -
 fs/lockd/svc.c          |   2 +-
 fs/nfs_common/Makefile  |   3 +-
 fs/nfs_common/grace.c   | 113 ++++++++++++++++++++++++++++++++++++++++++++++++
 fs/nfsd/Kconfig         |   1 +
 include/linux/proc_fs.h |   2 +
 9 files changed, 125 insertions(+), 70 deletions(-)
 delete mode 100644 fs/lockd/grace.c
 create mode 100644 fs/nfs_common/grace.c

(limited to 'include/linux')

diff --git a/fs/Kconfig b/fs/Kconfig
index 312393f32948..db5dc1598716 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -233,9 +233,13 @@ if NETWORK_FILESYSTEMS
 source "fs/nfs/Kconfig"
 source "fs/nfsd/Kconfig"
 
+config GRACE_PERIOD
+	tristate
+
 config LOCKD
 	tristate
 	depends on FILE_LOCKING
+	select GRACE_PERIOD
 
 config LOCKD_V4
 	bool
@@ -249,7 +253,7 @@ config NFS_ACL_SUPPORT
 
 config NFS_COMMON
 	bool
-	depends on NFSD || NFS_FS
+	depends on NFSD || NFS_FS || LOCKD
 	default y
 
 source "net/sunrpc/Kconfig"
diff --git a/fs/lockd/Makefile b/fs/lockd/Makefile
index ca58d64374ca..6a0b351ce30e 100644
--- a/fs/lockd/Makefile
+++ b/fs/lockd/Makefile
@@ -5,6 +5,6 @@
 obj-$(CONFIG_LOCKD) += lockd.o
 
 lockd-objs-y := clntlock.o clntproc.o clntxdr.o host.o svc.o svclock.o \
-	        svcshare.o svcproc.o svcsubs.o mon.o xdr.o grace.o
+	        svcshare.o svcproc.o svcsubs.o mon.o xdr.o
 lockd-objs-$(CONFIG_LOCKD_V4) += clnt4xdr.o xdr4.o svc4proc.o
 lockd-objs		      := $(lockd-objs-y)
diff --git a/fs/lockd/grace.c b/fs/lockd/grace.c
deleted file mode 100644
index 6d1ee7204c88..000000000000
--- a/fs/lockd/grace.c
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Common code for control of lockd and nfsv4 grace periods.
- */
-
-#include <linux/module.h>
-#include <linux/lockd/bind.h>
-#include <net/net_namespace.h>
-
-#include "netns.h"
-
-static DEFINE_SPINLOCK(grace_lock);
-
-/**
- * locks_start_grace
- * @lm: who this grace period is for
- *
- * A grace period is a period during which locks should not be given
- * out.  Currently grace periods are only enforced by the two lock
- * managers (lockd and nfsd), using the locks_in_grace() function to
- * check when they are in a grace period.
- *
- * This function is called to start a grace period.
- */
-void locks_start_grace(struct net *net, struct lock_manager *lm)
-{
-	struct lockd_net *ln = net_generic(net, lockd_net_id);
-
-	spin_lock(&grace_lock);
-	list_add(&lm->list, &ln->grace_list);
-	spin_unlock(&grace_lock);
-}
-EXPORT_SYMBOL_GPL(locks_start_grace);
-
-/**
- * locks_end_grace
- * @lm: who this grace period is for
- *
- * Call this function to state that the given lock manager is ready to
- * resume regular locking.  The grace period will not end until all lock
- * managers that called locks_start_grace() also call locks_end_grace().
- * Note that callers count on it being safe to call this more than once,
- * and the second call should be a no-op.
- */
-void locks_end_grace(struct lock_manager *lm)
-{
-	spin_lock(&grace_lock);
-	list_del_init(&lm->list);
-	spin_unlock(&grace_lock);
-}
-EXPORT_SYMBOL_GPL(locks_end_grace);
-
-/**
- * locks_in_grace
- *
- * Lock managers call this function to determine when it is OK for them
- * to answer ordinary lock requests, and when they should accept only
- * lock reclaims.
- */
-int locks_in_grace(struct net *net)
-{
-	struct lockd_net *ln = net_generic(net, lockd_net_id);
-
-	return !list_empty(&ln->grace_list);
-}
-EXPORT_SYMBOL_GPL(locks_in_grace);
diff --git a/fs/lockd/netns.h b/fs/lockd/netns.h
index 5010b55628b4..097bfa3adb1c 100644
--- a/fs/lockd/netns.h
+++ b/fs/lockd/netns.h
@@ -11,7 +11,6 @@ struct lockd_net {
 
 	struct delayed_work grace_period_end;
 	struct lock_manager lockd_manager;
-	struct list_head grace_list;
 
 	spinlock_t nsm_clnt_lock;
 	unsigned int nsm_users;
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 09857b48d0c3..266b67972305 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -586,7 +586,7 @@ static int lockd_init_net(struct net *net)
 	struct lockd_net *ln = net_generic(net, lockd_net_id);
 
 	INIT_DELAYED_WORK(&ln->grace_period_end, grace_ender);
-	INIT_LIST_HEAD(&ln->grace_list);
+	INIT_LIST_HEAD(&ln->lockd_manager.list);
 	spin_lock_init(&ln->nsm_clnt_lock);
 	return 0;
 }
diff --git a/fs/nfs_common/Makefile b/fs/nfs_common/Makefile
index f689ed82af3a..d153ca3ea577 100644
--- a/fs/nfs_common/Makefile
+++ b/fs/nfs_common/Makefile
@@ -3,5 +3,6 @@
 #
 
 obj-$(CONFIG_NFS_ACL_SUPPORT) += nfs_acl.o
-
 nfs_acl-objs := nfsacl.o
+
+obj-$(CONFIG_GRACE_PERIOD) += grace.o
diff --git a/fs/nfs_common/grace.c b/fs/nfs_common/grace.c
new file mode 100644
index 000000000000..ae6e58ea4de5
--- /dev/null
+++ b/fs/nfs_common/grace.c
@@ -0,0 +1,113 @@
+/*
+ * Common code for control of lockd and nfsv4 grace periods.
+ *
+ * Transplanted from lockd code
+ */
+
+#include <linux/module.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+#include <linux/fs.h>
+
+static int grace_net_id;
+static DEFINE_SPINLOCK(grace_lock);
+
+/**
+ * locks_start_grace
+ * @net: net namespace that this lock manager belongs to
+ * @lm: who this grace period is for
+ *
+ * A grace period is a period during which locks should not be given
+ * out.  Currently grace periods are only enforced by the two lock
+ * managers (lockd and nfsd), using the locks_in_grace() function to
+ * check when they are in a grace period.
+ *
+ * This function is called to start a grace period.
+ */
+void
+locks_start_grace(struct net *net, struct lock_manager *lm)
+{
+	struct list_head *grace_list = net_generic(net, grace_net_id);
+
+	spin_lock(&grace_lock);
+	list_add(&lm->list, grace_list);
+	spin_unlock(&grace_lock);
+}
+EXPORT_SYMBOL_GPL(locks_start_grace);
+
+/**
+ * locks_end_grace
+ * @net: net namespace that this lock manager belongs to
+ * @lm: who this grace period is for
+ *
+ * Call this function to state that the given lock manager is ready to
+ * resume regular locking.  The grace period will not end until all lock
+ * managers that called locks_start_grace() also call locks_end_grace().
+ * Note that callers count on it being safe to call this more than once,
+ * and the second call should be a no-op.
+ */
+void
+locks_end_grace(struct lock_manager *lm)
+{
+	spin_lock(&grace_lock);
+	list_del_init(&lm->list);
+	spin_unlock(&grace_lock);
+}
+EXPORT_SYMBOL_GPL(locks_end_grace);
+
+/**
+ * locks_in_grace
+ *
+ * Lock managers call this function to determine when it is OK for them
+ * to answer ordinary lock requests, and when they should accept only
+ * lock reclaims.
+ */
+int
+locks_in_grace(struct net *net)
+{
+	struct list_head *grace_list = net_generic(net, grace_net_id);
+
+	return !list_empty(grace_list);
+}
+EXPORT_SYMBOL_GPL(locks_in_grace);
+
+static int __net_init
+grace_init_net(struct net *net)
+{
+	struct list_head *grace_list = net_generic(net, grace_net_id);
+
+	INIT_LIST_HEAD(grace_list);
+	return 0;
+}
+
+static void __net_exit
+grace_exit_net(struct net *net)
+{
+	struct list_head *grace_list = net_generic(net, grace_net_id);
+
+	BUG_ON(!list_empty(grace_list));
+}
+
+static struct pernet_operations grace_net_ops = {
+	.init = grace_init_net,
+	.exit = grace_exit_net,
+	.id   = &grace_net_id,
+	.size = sizeof(struct list_head),
+};
+
+static int __init
+init_grace(void)
+{
+	return register_pernet_subsys(&grace_net_ops);
+}
+
+static void __exit
+exit_grace(void)
+{
+	unregister_pernet_subsys(&grace_net_ops);
+}
+
+MODULE_AUTHOR("Jeff Layton <jlayton@primarydata.com>");
+MODULE_LICENSE("GPL");
+module_init(init_grace)
+module_exit(exit_grace)
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index f3586b645d7d..73395156bdb4 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -71,6 +71,7 @@ config NFSD_V4
 	select FS_POSIX_ACL
 	select SUNRPC_GSS
 	select CRYPTO
+	select GRACE_PERIOD
 	help
 	  This option enables support in your system's NFS server for
 	  version 4 of the NFS protocol (RFC 3530).
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 9d117f61d976..b97bf2ef996e 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -74,6 +74,8 @@ static inline int remove_proc_subtree(const char *name, struct proc_dir_entry *p
 
 #endif /* CONFIG_PROC_FS */
 
+struct net;
+
 static inline struct proc_dir_entry *proc_net_mkdir(
 	struct net *net, const char *name, struct proc_dir_entry *parent)
 {
-- 
cgit v1.2.3


From 6213daab2547fdc0d02a86abf3ac209ac6881ae3 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizefan@huawei.com>
Date: Wed, 17 Sep 2014 18:18:09 +0800
Subject: cgroup: remove some useless forward declarations

Signed-off-by: Zefan Li <lizefan@huawei.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cgroup.h | 1 -
 kernel/cgroup.c        | 2 --
 2 files changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index b5223c570eba..f7898e0bce1e 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -27,7 +27,6 @@
 
 struct cgroup_root;
 struct cgroup_subsys;
-struct inode;
 struct cgroup;
 
 extern int cgroup_init_early(void);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index ebd4476c57de..619aae399a3a 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -185,7 +185,6 @@ static int need_forkexit_callback __read_mostly;
 static struct cftype cgroup_dfl_base_files[];
 static struct cftype cgroup_legacy_base_files[];
 
-static void cgroup_put(struct cgroup *cgrp);
 static int rebind_subsystems(struct cgroup_root *dst_root,
 			     unsigned int ss_mask);
 static int cgroup_destroy_locked(struct cgroup *cgrp);
@@ -195,7 +194,6 @@ static void css_release(struct percpu_ref *ref);
 static void kill_css(struct cgroup_subsys_state *css);
 static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[],
 			      bool is_add);
-static void cgroup_pidlist_destroy_all(struct cgroup *cgrp);
 
 /* IDR wrappers which synchronize using cgroup_idr_lock */
 static int cgroup_idr_alloc(struct idr *idr, void *ptr, int start, int end,
-- 
cgit v1.2.3


From 50849db32a9f529235a84bcc84a6b8e631b1d0ec Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 18 Sep 2014 00:58:12 -0400
Subject: jbd2: simplify calling convention around
 __jbd2_journal_clean_checkpoint_list

__jbd2_journal_clean_checkpoint_list() returns number of buffers it
freed but noone was using the value so just stop doing that. This
also allows for simplifying the calling convention for
journal_clean_once_cp_list().

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/jbd2/checkpoint.c | 56 ++++++++++++++++++++++------------------------------
 include/linux/jbd2.h |  2 +-
 2 files changed, 25 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 3ab4c5ee12ce..988b32ed4c87 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -421,16 +421,15 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
  * release them.
  *
  * Called with j_list_lock held.
- * Returns number of buffers reaped (for debug)
+ * Returns 1 if we freed the transaction, 0 otherwise.
  */
-
-static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
+static int journal_clean_one_cp_list(struct journal_head *jh)
 {
 	struct journal_head *last_jh;
 	struct journal_head *next_jh = jh;
-	int ret, freed = 0;
+	int ret;
+	int freed = 0;
 
-	*released = 0;
 	if (!jh)
 		return 0;
 
@@ -441,11 +440,9 @@ static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
 		ret = __try_to_free_cp_buf(jh);
 		if (!ret)
 			return freed;
-		freed++;
-		if (ret == 2) {
-			*released = 1;
-			return freed;
-		}
+		if (ret == 2)
+			return 1;
+		freed = 1;
 		/*
 		 * This function only frees up some memory
 		 * if possible so we dont have an obligation
@@ -465,53 +462,48 @@ static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
  * Find all the written-back checkpoint buffers in the journal and release them.
  *
  * Called with j_list_lock held.
- * Returns number of buffers reaped (for debug)
  */
-
-int __jbd2_journal_clean_checkpoint_list(journal_t *journal)
+void __jbd2_journal_clean_checkpoint_list(journal_t *journal)
 {
 	transaction_t *transaction, *last_transaction, *next_transaction;
 	int ret;
-	int freed = 0;
-	int released;
 
 	transaction = journal->j_checkpoint_transactions;
 	if (!transaction)
-		goto out;
+		return;
 
 	last_transaction = transaction->t_cpprev;
 	next_transaction = transaction;
 	do {
 		transaction = next_transaction;
 		next_transaction = transaction->t_cpnext;
-		ret = journal_clean_one_cp_list(transaction->
-				t_checkpoint_list, &released);
+		ret = journal_clean_one_cp_list(transaction->t_checkpoint_list);
 		/*
 		 * This function only frees up some memory if possible so we
 		 * dont have an obligation to finish processing. Bail out if
 		 * preemption requested:
 		 */
-		if (need_resched()) {
-			freed += ret;
-			goto out;
-		}
-		if (released) {
-			freed += ret;
+		if (need_resched())
+			return;
+		if (ret)
 			continue;
-		}
 		/*
 		 * It is essential that we are as careful as in the case of
 		 * t_checkpoint_list with removing the buffer from the list as
 		 * we can possibly see not yet submitted buffers on io_list
 		 */
-		ret += journal_clean_one_cp_list(transaction->
-				t_checkpoint_io_list, &released);
-		freed += ret;
-		if (need_resched() || !ret)
-			goto out;
+		ret = journal_clean_one_cp_list(transaction->
+				t_checkpoint_io_list);
+		if (need_resched())
+			return;
+		/*
+		 * Stop scanning if we couldn't free the transaction. This
+		 * avoids pointless scanning of transactions which still
+		 * weren't checkpointed.
+		 */
+		if (!ret)
+			return;
 	} while (transaction != last_transaction);
-out:
-	return freed;
 }
 
 /*
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 0dae71e9971c..704b9a599b26 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1042,7 +1042,7 @@ void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block);
 extern void jbd2_journal_commit_transaction(journal_t *);
 
 /* Checkpoint list management */
-int __jbd2_journal_clean_checkpoint_list(journal_t *journal);
+void __jbd2_journal_clean_checkpoint_list(journal_t *journal);
 int __jbd2_journal_remove_checkpoint(struct journal_head *);
 void __jbd2_journal_insert_checkpoint(struct journal_head *, transaction_t *);
 
-- 
cgit v1.2.3


From b729bf34535ed413667b397a2f59cfa81266facf Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Tue, 19 Aug 2014 20:29:19 +0300
Subject: spi/pxa2xx: Don't use slave_id of dma_slave_config

That field has been deprecated in favour of getting the necessary
information from ACPI/DT.

However, we still need to deal systems that are PCI only (no ACPI to back
up). In order to support such systems, we allow the DMA filter function and
its corresponding parameter via pxa2xx_spi_master platform data. Then when
the pxa2xx_spi_dma_setup() doesn't find the channel via ACPI, it falls back
to use the given filter function.

Suggested-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Mark Brown <broonie@linaro.org>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/spi/spi-pxa2xx-dma.c   | 15 ++--------
 drivers/spi/spi-pxa2xx-pci.c   | 64 +++++++++++++++++++++++++++++++-----------
 drivers/spi/spi-pxa2xx.c       |  2 --
 include/linux/spi/pxa2xx_spi.h |  9 +++---
 4 files changed, 54 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/spi/spi-pxa2xx-dma.c b/drivers/spi/spi-pxa2xx-dma.c
index c41ff148a2b4..62a9297e96ac 100644
--- a/drivers/spi/spi-pxa2xx-dma.c
+++ b/drivers/spi/spi-pxa2xx-dma.c
@@ -157,7 +157,6 @@ static struct dma_async_tx_descriptor *
 pxa2xx_spi_dma_prepare_one(struct driver_data *drv_data,
 			   enum dma_transfer_direction dir)
 {
-	struct pxa2xx_spi_master *pdata = drv_data->master_info;
 	struct chip_data *chip = drv_data->cur_chip;
 	enum dma_slave_buswidth width;
 	struct dma_slave_config cfg;
@@ -184,7 +183,6 @@ pxa2xx_spi_dma_prepare_one(struct driver_data *drv_data,
 		cfg.dst_addr = drv_data->ssdr_physical;
 		cfg.dst_addr_width = width;
 		cfg.dst_maxburst = chip->dma_burst_size;
-		cfg.slave_id = pdata->tx_slave_id;
 
 		sgt = &drv_data->tx_sgt;
 		nents = drv_data->tx_nents;
@@ -193,7 +191,6 @@ pxa2xx_spi_dma_prepare_one(struct driver_data *drv_data,
 		cfg.src_addr = drv_data->ssdr_physical;
 		cfg.src_addr_width = width;
 		cfg.src_maxburst = chip->dma_burst_size;
-		cfg.slave_id = pdata->rx_slave_id;
 
 		sgt = &drv_data->rx_sgt;
 		nents = drv_data->rx_nents;
@@ -210,14 +207,6 @@ pxa2xx_spi_dma_prepare_one(struct driver_data *drv_data,
 				       DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
 }
 
-static bool pxa2xx_spi_dma_filter(struct dma_chan *chan, void *param)
-{
-	const struct pxa2xx_spi_master *pdata = param;
-
-	return chan->chan_id == pdata->tx_chan_id ||
-	       chan->chan_id == pdata->rx_chan_id;
-}
-
 bool pxa2xx_spi_dma_is_possible(size_t len)
 {
 	return len <= MAX_DMA_LEN;
@@ -321,12 +310,12 @@ int pxa2xx_spi_dma_setup(struct driver_data *drv_data)
 		return -ENOMEM;
 
 	drv_data->tx_chan = dma_request_slave_channel_compat(mask,
-				pxa2xx_spi_dma_filter, pdata, dev, "tx");
+				pdata->dma_filter, pdata->tx_param, dev, "tx");
 	if (!drv_data->tx_chan)
 		return -ENODEV;
 
 	drv_data->rx_chan = dma_request_slave_channel_compat(mask,
-				pxa2xx_spi_dma_filter, pdata, dev, "rx");
+				pdata->dma_filter, pdata->rx_param, dev, "rx");
 	if (!drv_data->rx_chan) {
 		dma_release_channel(drv_data->tx_chan);
 		drv_data->tx_chan = NULL;
diff --git a/drivers/spi/spi-pxa2xx-pci.c b/drivers/spi/spi-pxa2xx-pci.c
index 20ebbc764693..0424b67c983e 100644
--- a/drivers/spi/spi-pxa2xx-pci.c
+++ b/drivers/spi/spi-pxa2xx-pci.c
@@ -10,6 +10,9 @@
 #include <linux/clk.h>
 #include <linux/clk-provider.h>
 
+#include <linux/dmaengine.h>
+#include <linux/platform_data/dma-dw.h>
+
 enum {
 	PORT_CE4100,
 	PORT_BYT,
@@ -19,33 +22,41 @@ struct pxa_spi_info {
 	enum pxa_ssp_type type;
 	int port_id;
 	int num_chipselect;
-	int tx_slave_id;
-	int tx_chan_id;
-	int rx_slave_id;
-	int rx_chan_id;
 	unsigned long max_clk_rate;
+
+	/* DMA channel request parameters */
+	void *tx_param;
+	void *rx_param;
 };
 
+static struct dw_dma_slave byt_tx_param = { .dst_id = 0 };
+static struct dw_dma_slave byt_rx_param = { .src_id = 1 };
+
+static bool lpss_dma_filter(struct dma_chan *chan, void *param)
+{
+	struct dw_dma_slave *dws = param;
+
+	if (dws->dma_dev != chan->device->dev)
+		return false;
+
+	chan->private = dws;
+	return true;
+}
+
 static struct pxa_spi_info spi_info_configs[] = {
 	[PORT_CE4100] = {
 		.type = PXA25x_SSP,
 		.port_id =  -1,
 		.num_chipselect = -1,
-		.tx_slave_id = -1,
-		.tx_chan_id = -1,
-		.rx_slave_id = -1,
-		.rx_chan_id = -1,
 		.max_clk_rate = 3686400,
 	},
 	[PORT_BYT] = {
 		.type = LPSS_SSP,
 		.port_id = 0,
 		.num_chipselect = 1,
-		.tx_slave_id = 0,
-		.tx_chan_id = 0,
-		.rx_slave_id = 1,
-		.rx_chan_id = 1,
 		.max_clk_rate = 50000000,
+		.tx_param = &byt_tx_param,
+		.rx_param = &byt_rx_param,
 	},
 };
 
@@ -59,6 +70,7 @@ static int pxa2xx_spi_pci_probe(struct pci_dev *dev,
 	struct ssp_device *ssp;
 	struct pxa_spi_info *c;
 	char buf[40];
+	struct pci_dev *dma_dev;
 
 	ret = pcim_enable_device(dev);
 	if (ret)
@@ -73,11 +85,29 @@ static int pxa2xx_spi_pci_probe(struct pci_dev *dev,
 	memset(&spi_pdata, 0, sizeof(spi_pdata));
 	spi_pdata.num_chipselect = (c->num_chipselect > 0) ?
 					c->num_chipselect : dev->devfn;
-	spi_pdata.tx_slave_id = c->tx_slave_id;
-	spi_pdata.tx_chan_id = c->tx_chan_id;
-	spi_pdata.rx_slave_id = c->rx_slave_id;
-	spi_pdata.rx_chan_id = c->rx_chan_id;
-	spi_pdata.enable_dma = c->rx_slave_id >= 0 && c->tx_slave_id >= 0;
+
+	dma_dev = pci_get_slot(dev->bus, PCI_DEVFN(PCI_SLOT(dev->devfn), 0));
+
+	if (c->tx_param) {
+		struct dw_dma_slave *slave = c->tx_param;
+
+		slave->dma_dev = &dma_dev->dev;
+		slave->src_master = 1;
+		slave->dst_master = 0;
+	}
+
+	if (c->rx_param) {
+		struct dw_dma_slave *slave = c->rx_param;
+
+		slave->dma_dev = &dma_dev->dev;
+		slave->src_master = 1;
+		slave->dst_master = 0;
+	}
+
+	spi_pdata.dma_filter = lpss_dma_filter;
+	spi_pdata.tx_param = c->tx_param;
+	spi_pdata.rx_param = c->rx_param;
+	spi_pdata.enable_dma = c->rx_param && c->tx_param;
 
 	ssp = &spi_pdata.ssp;
 	ssp->phys_base = pci_resource_start(dev, 0);
diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index fe792106bdc5..256c0abbddd2 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -1062,8 +1062,6 @@ pxa2xx_spi_acpi_get_pdata(struct platform_device *pdev)
 
 	pdata->num_chipselect = 1;
 	pdata->enable_dma = true;
-	pdata->tx_chan_id = -1;
-	pdata->rx_chan_id = -1;
 
 	return pdata;
 }
diff --git a/include/linux/spi/pxa2xx_spi.h b/include/linux/spi/pxa2xx_spi.h
index 82d5111cd0c2..d5a316550177 100644
--- a/include/linux/spi/pxa2xx_spi.h
+++ b/include/linux/spi/pxa2xx_spi.h
@@ -23,6 +23,8 @@
 #define PXA2XX_CS_ASSERT (0x01)
 #define PXA2XX_CS_DEASSERT (0x02)
 
+struct dma_chan;
+
 /* device.platform_data for SSP controller devices */
 struct pxa2xx_spi_master {
 	u32 clock_enable;
@@ -30,10 +32,9 @@ struct pxa2xx_spi_master {
 	u8 enable_dma;
 
 	/* DMA engine specific config */
-	int rx_chan_id;
-	int tx_chan_id;
-	int rx_slave_id;
-	int tx_slave_id;
+	bool (*dma_filter)(struct dma_chan *chan, void *param);
+	void *tx_param;
+	void *rx_param;
 
 	/* For non-PXA arches */
 	struct ssp_device ssp;
-- 
cgit v1.2.3


From 175655bd79b815f2b2035f3b44117c60be90e1d4 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Tue, 16 Sep 2014 17:36:28 -0700
Subject: ARM: OMAP: Remove unused pieces of legacy DMA API

We're moving to the dmaengine API, so let's remove the unused
pieces of the omap legacy DMA code to make sure we don't get
any new users for these:

omap_set_dma_color_mode
omap_set_dma_src_index
omap_set_dma_dest_index
omap_dma_unlink_lch
omap_clear_dma
omap_dma_running
omap_dma_set_prio_lch
omap_set_dma_dst_endian_type
omap_set_dma_src_endian_type
omap_get_dma_index
omap_dma_disable_irq
omap_request_dma_chain
omap_free_dma_chain
omap_dma_chain_a_transfer
omap_start_dma_chain_transfers
omap_stop_dma_chain_transfers
omap_get_dma_chain_index
omap_get_dma_chain_dst_pos
omap_get_dma_chain_src_pos
omap_modify_dma_chain_params
omap_dma_chain_status

Cc: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/plat-omap/dma.c | 737 +----------------------------------------------
 include/linux/omap-dma.h |  37 ---
 2 files changed, 6 insertions(+), 768 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/plat-omap/dma.c b/arch/arm/plat-omap/dma.c
index c2baa8ede543..24770e5a5081 100644
--- a/arch/arm/plat-omap/dma.c
+++ b/arch/arm/plat-omap/dma.c
@@ -64,7 +64,9 @@ enum { DMA_CHAIN_STARTED, DMA_CHAIN_NOTSTARTED };
 
 static struct omap_system_dma_plat_info *p;
 static struct omap_dma_dev_attr *d;
-
+static void omap_clear_dma(int lch);
+static int omap_dma_set_prio_lch(int lch, unsigned char read_prio,
+				 unsigned char write_prio);
 static int enable_1510_mode;
 static u32 errata;
 
@@ -284,66 +286,6 @@ void omap_set_dma_transfer_params(int lch, int data_type, int elem_count,
 }
 EXPORT_SYMBOL(omap_set_dma_transfer_params);
 
-void omap_set_dma_color_mode(int lch, enum omap_dma_color_mode mode, u32 color)
-{
-	BUG_ON(omap_dma_in_1510_mode());
-
-	if (dma_omap1()) {
-		u16 w;
-
-		w = p->dma_read(CCR2, lch);
-		w &= ~0x03;
-
-		switch (mode) {
-		case OMAP_DMA_CONSTANT_FILL:
-			w |= 0x01;
-			break;
-		case OMAP_DMA_TRANSPARENT_COPY:
-			w |= 0x02;
-			break;
-		case OMAP_DMA_COLOR_DIS:
-			break;
-		default:
-			BUG();
-		}
-		p->dma_write(w, CCR2, lch);
-
-		w = p->dma_read(LCH_CTRL, lch);
-		w &= ~0x0f;
-		/* Default is channel type 2D */
-		if (mode) {
-			p->dma_write(color, COLOR, lch);
-			w |= 1;		/* Channel type G */
-		}
-		p->dma_write(w, LCH_CTRL, lch);
-	}
-
-	if (dma_omap2plus()) {
-		u32 val;
-
-		val = p->dma_read(CCR, lch);
-		val &= ~((1 << 17) | (1 << 16));
-
-		switch (mode) {
-		case OMAP_DMA_CONSTANT_FILL:
-			val |= 1 << 16;
-			break;
-		case OMAP_DMA_TRANSPARENT_COPY:
-			val |= 1 << 17;
-			break;
-		case OMAP_DMA_COLOR_DIS:
-			break;
-		default:
-			BUG();
-		}
-		p->dma_write(val, CCR, lch);
-
-		color &= 0xffffff;
-		p->dma_write(color, COLOR, lch);
-	}
-}
-EXPORT_SYMBOL(omap_set_dma_color_mode);
-
 void omap_set_dma_write_mode(int lch, enum omap_dma_write_mode mode)
 {
 	if (dma_omap2plus()) {
@@ -417,16 +359,6 @@ void omap_set_dma_params(int lch, struct omap_dma_channel_params *params)
 }
 EXPORT_SYMBOL(omap_set_dma_params);
 
-void omap_set_dma_src_index(int lch, int eidx, int fidx)
-{
-	if (dma_omap2plus())
-		return;
-
-	p->dma_write(eidx, CSEI, lch);
-	p->dma_write(fidx, CSFI, lch);
-}
-EXPORT_SYMBOL(omap_set_dma_src_index);
-
 void omap_set_dma_src_data_pack(int lch, int enable)
 {
 	u32 l;
@@ -510,16 +442,6 @@ void omap_set_dma_dest_params(int lch, int dest_port, int dest_amode,
 }
 EXPORT_SYMBOL(omap_set_dma_dest_params);
 
-void omap_set_dma_dest_index(int lch, int eidx, int fidx)
-{
-	if (dma_omap2plus())
-		return;
-
-	p->dma_write(eidx, CDEI, lch);
-	p->dma_write(fidx, CDFI, lch);
-}
-EXPORT_SYMBOL(omap_set_dma_dest_index);
-
 void omap_set_dma_dest_data_pack(int lch, int enable)
 {
 	u32 l;
@@ -843,7 +765,7 @@ EXPORT_SYMBOL(omap_dma_set_global_params);
  * Both of the above can be set with one of the following values :
  * 	DMA_CH_PRIO_HIGH/DMA_CH_PRIO_LOW
  */
-int
+static int
 omap_dma_set_prio_lch(int lch, unsigned char read_prio,
 		      unsigned char write_prio)
 {
@@ -864,13 +786,13 @@ omap_dma_set_prio_lch(int lch, unsigned char read_prio,
 
 	return 0;
 }
-EXPORT_SYMBOL(omap_dma_set_prio_lch);
+
 
 /*
  * Clears any DMA state so the DMA engine is ready to restart with new buffers
  * through omap_start_dma(). Any buffers in flight are discarded.
  */
-void omap_clear_dma(int lch)
+static void omap_clear_dma(int lch)
 {
 	unsigned long flags;
 
@@ -878,7 +800,6 @@ void omap_clear_dma(int lch)
 	p->clear_dma(lch);
 	local_irq_restore(flags);
 }
-EXPORT_SYMBOL(omap_clear_dma);
 
 void omap_start_dma(int lch)
 {
@@ -1167,652 +1088,6 @@ void omap_dma_link_lch(int lch_head, int lch_queue)
 }
 EXPORT_SYMBOL(omap_dma_link_lch);
 
-/*
- * Once the DMA queue is stopped, we can destroy it.
- */
-void omap_dma_unlink_lch(int lch_head, int lch_queue)
-{
-	if (omap_dma_in_1510_mode()) {
-		if (lch_head == lch_queue) {
-			p->dma_write(p->dma_read(CCR, lch_head) & ~(3 << 8),
-								CCR, lch_head);
-			return;
-		}
-		printk(KERN_ERR "DMA linking is not supported in 1510 mode\n");
-		BUG();
-		return;
-	}
-
-	if (dma_chan[lch_head].next_lch != lch_queue ||
-	    dma_chan[lch_head].next_lch == -1) {
-		pr_err("omap_dma: trying to unlink non linked channels\n");
-		dump_stack();
-	}
-
-	if ((dma_chan[lch_head].flags & OMAP_DMA_ACTIVE) ||
-	    (dma_chan[lch_queue].flags & OMAP_DMA_ACTIVE)) {
-		pr_err("omap_dma: You need to stop the DMA channels before unlinking\n");
-		dump_stack();
-	}
-
-	dma_chan[lch_head].next_lch = -1;
-}
-EXPORT_SYMBOL(omap_dma_unlink_lch);
-
-#ifndef CONFIG_ARCH_OMAP1
-/* Create chain of DMA channesls */
-static void create_dma_lch_chain(int lch_head, int lch_queue)
-{
-	u32 l;
-
-	/* Check if this is the first link in chain */
-	if (dma_chan[lch_head].next_linked_ch == -1) {
-		dma_chan[lch_head].next_linked_ch = lch_queue;
-		dma_chan[lch_head].prev_linked_ch = lch_queue;
-		dma_chan[lch_queue].next_linked_ch = lch_head;
-		dma_chan[lch_queue].prev_linked_ch = lch_head;
-	}
-
-	/* a link exists, link the new channel in circular chain */
-	else {
-		dma_chan[lch_queue].next_linked_ch =
-					dma_chan[lch_head].next_linked_ch;
-		dma_chan[lch_queue].prev_linked_ch = lch_head;
-		dma_chan[lch_head].next_linked_ch = lch_queue;
-		dma_chan[dma_chan[lch_queue].next_linked_ch].prev_linked_ch =
-					lch_queue;
-	}
-
-	l = p->dma_read(CLNK_CTRL, lch_head);
-	l &= ~(0x1f);
-	l |= lch_queue;
-	p->dma_write(l, CLNK_CTRL, lch_head);
-
-	l = p->dma_read(CLNK_CTRL, lch_queue);
-	l &= ~(0x1f);
-	l |= (dma_chan[lch_queue].next_linked_ch);
-	p->dma_write(l, CLNK_CTRL, lch_queue);
-}
-
-/**
- * @brief omap_request_dma_chain : Request a chain of DMA channels
- *
- * @param dev_id - Device id using the dma channel
- * @param dev_name - Device name
- * @param callback - Call back function
- * @chain_id -
- * @no_of_chans - Number of channels requested
- * @chain_mode - Dynamic or static chaining : OMAP_DMA_STATIC_CHAIN
- * 					      OMAP_DMA_DYNAMIC_CHAIN
- * @params - Channel parameters
- *
- * @return - Success : 0
- * 	     Failure: -EINVAL/-ENOMEM
- */
-int omap_request_dma_chain(int dev_id, const char *dev_name,
-			   void (*callback) (int lch, u16 ch_status,
-					     void *data),
-			   int *chain_id, int no_of_chans, int chain_mode,
-			   struct omap_dma_channel_params params)
-{
-	int *channels;
-	int i, err;
-
-	/* Is the chain mode valid ? */
-	if (chain_mode != OMAP_DMA_STATIC_CHAIN
-			&& chain_mode != OMAP_DMA_DYNAMIC_CHAIN) {
-		printk(KERN_ERR "Invalid chain mode requested\n");
-		return -EINVAL;
-	}
-
-	if (unlikely((no_of_chans < 1
-			|| no_of_chans > dma_lch_count))) {
-		printk(KERN_ERR "Invalid Number of channels requested\n");
-		return -EINVAL;
-	}
-
-	/*
-	 * Allocate a queue to maintain the status of the channels
-	 * in the chain
-	 */
-	channels = kmalloc(sizeof(*channels) * no_of_chans, GFP_KERNEL);
-	if (channels == NULL) {
-		printk(KERN_ERR "omap_dma: No memory for channel queue\n");
-		return -ENOMEM;
-	}
-
-	/* request and reserve DMA channels for the chain */
-	for (i = 0; i < no_of_chans; i++) {
-		err = omap_request_dma(dev_id, dev_name,
-					callback, NULL, &channels[i]);
-		if (err < 0) {
-			int j;
-			for (j = 0; j < i; j++)
-				omap_free_dma(channels[j]);
-			kfree(channels);
-			printk(KERN_ERR "omap_dma: Request failed %d\n", err);
-			return err;
-		}
-		dma_chan[channels[i]].prev_linked_ch = -1;
-		dma_chan[channels[i]].state = DMA_CH_NOTSTARTED;
-
-		/*
-		 * Allowing client drivers to set common parameters now,
-		 * so that later only relevant (src_start, dest_start
-		 * and element count) can be set
-		 */
-		omap_set_dma_params(channels[i], &params);
-	}
-
-	*chain_id = channels[0];
-	dma_linked_lch[*chain_id].linked_dmach_q = channels;
-	dma_linked_lch[*chain_id].chain_mode = chain_mode;
-	dma_linked_lch[*chain_id].chain_state = DMA_CHAIN_NOTSTARTED;
-	dma_linked_lch[*chain_id].no_of_lchs_linked = no_of_chans;
-
-	for (i = 0; i < no_of_chans; i++)
-		dma_chan[channels[i]].chain_id = *chain_id;
-
-	/* Reset the Queue pointers */
-	OMAP_DMA_CHAIN_QINIT(*chain_id);
-
-	/* Set up the chain */
-	if (no_of_chans == 1)
-		create_dma_lch_chain(channels[0], channels[0]);
-	else {
-		for (i = 0; i < (no_of_chans - 1); i++)
-			create_dma_lch_chain(channels[i], channels[i + 1]);
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL(omap_request_dma_chain);
-
-/**
- * @brief omap_modify_dma_chain_param : Modify the chain's params - Modify the
- * params after setting it. Dont do this while dma is running!!
- *
- * @param chain_id - Chained logical channel id.
- * @param params
- *
- * @return - Success : 0
- * 	     Failure : -EINVAL
- */
-int omap_modify_dma_chain_params(int chain_id,
-				struct omap_dma_channel_params params)
-{
-	int *channels;
-	u32 i;
-
-	/* Check for input params */
-	if (unlikely((chain_id < 0
-			|| chain_id >= dma_lch_count))) {
-		printk(KERN_ERR "Invalid chain id\n");
-		return -EINVAL;
-	}
-
-	/* Check if the chain exists */
-	if (dma_linked_lch[chain_id].linked_dmach_q == NULL) {
-		printk(KERN_ERR "Chain doesn't exists\n");
-		return -EINVAL;
-	}
-	channels = dma_linked_lch[chain_id].linked_dmach_q;
-
-	for (i = 0; i < dma_linked_lch[chain_id].no_of_lchs_linked; i++) {
-		/*
-		 * Allowing client drivers to set common parameters now,
-		 * so that later only relevant (src_start, dest_start
-		 * and element count) can be set
-		 */
-		omap_set_dma_params(channels[i], &params);
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL(omap_modify_dma_chain_params);
-
-/**
- * @brief omap_free_dma_chain - Free all the logical channels in a chain.
- *
- * @param chain_id
- *
- * @return - Success : 0
- * 	     Failure : -EINVAL
- */
-int omap_free_dma_chain(int chain_id)
-{
-	int *channels;
-	u32 i;
-
-	/* Check for input params */
-	if (unlikely((chain_id < 0 || chain_id >= dma_lch_count))) {
-		printk(KERN_ERR "Invalid chain id\n");
-		return -EINVAL;
-	}
-
-	/* Check if the chain exists */
-	if (dma_linked_lch[chain_id].linked_dmach_q == NULL) {
-		printk(KERN_ERR "Chain doesn't exists\n");
-		return -EINVAL;
-	}
-
-	channels = dma_linked_lch[chain_id].linked_dmach_q;
-	for (i = 0; i < dma_linked_lch[chain_id].no_of_lchs_linked; i++) {
-		dma_chan[channels[i]].next_linked_ch = -1;
-		dma_chan[channels[i]].prev_linked_ch = -1;
-		dma_chan[channels[i]].chain_id = -1;
-		dma_chan[channels[i]].state = DMA_CH_NOTSTARTED;
-		omap_free_dma(channels[i]);
-	}
-
-	kfree(channels);
-
-	dma_linked_lch[chain_id].linked_dmach_q = NULL;
-	dma_linked_lch[chain_id].chain_mode = -1;
-	dma_linked_lch[chain_id].chain_state = -1;
-
-	return (0);
-}
-EXPORT_SYMBOL(omap_free_dma_chain);
-
-/**
- * @brief omap_dma_chain_status - Check if the chain is in
- * active / inactive state.
- * @param chain_id
- *
- * @return - Success : OMAP_DMA_CHAIN_ACTIVE/OMAP_DMA_CHAIN_INACTIVE
- * 	     Failure : -EINVAL
- */
-int omap_dma_chain_status(int chain_id)
-{
-	/* Check for input params */
-	if (unlikely((chain_id < 0 || chain_id >= dma_lch_count))) {
-		printk(KERN_ERR "Invalid chain id\n");
-		return -EINVAL;
-	}
-
-	/* Check if the chain exists */
-	if (dma_linked_lch[chain_id].linked_dmach_q == NULL) {
-		printk(KERN_ERR "Chain doesn't exists\n");
-		return -EINVAL;
-	}
-	pr_debug("CHAINID=%d, qcnt=%d\n", chain_id,
-			dma_linked_lch[chain_id].q_count);
-
-	if (OMAP_DMA_CHAIN_QEMPTY(chain_id))
-		return OMAP_DMA_CHAIN_INACTIVE;
-
-	return OMAP_DMA_CHAIN_ACTIVE;
-}
-EXPORT_SYMBOL(omap_dma_chain_status);
-
-/**
- * @brief omap_dma_chain_a_transfer - Get a free channel from a chain,
- * set the params and start the transfer.
- *
- * @param chain_id
- * @param src_start - buffer start address
- * @param dest_start - Dest address
- * @param elem_count
- * @param frame_count
- * @param callbk_data - channel callback parameter data.
- *
- * @return  - Success : 0
- * 	      Failure: -EINVAL/-EBUSY
- */
-int omap_dma_chain_a_transfer(int chain_id, int src_start, int dest_start,
-			int elem_count, int frame_count, void *callbk_data)
-{
-	int *channels;
-	u32 l, lch;
-	int start_dma = 0;
-
-	/*
-	 * if buffer size is less than 1 then there is
-	 * no use of starting the chain
-	 */
-	if (elem_count < 1) {
-		printk(KERN_ERR "Invalid buffer size\n");
-		return -EINVAL;
-	}
-
-	/* Check for input params */
-	if (unlikely((chain_id < 0
-			|| chain_id >= dma_lch_count))) {
-		printk(KERN_ERR "Invalid chain id\n");
-		return -EINVAL;
-	}
-
-	/* Check if the chain exists */
-	if (dma_linked_lch[chain_id].linked_dmach_q == NULL) {
-		printk(KERN_ERR "Chain doesn't exist\n");
-		return -EINVAL;
-	}
-
-	/* Check if all the channels in chain are in use */
-	if (OMAP_DMA_CHAIN_QFULL(chain_id))
-		return -EBUSY;
-
-	/* Frame count may be negative in case of indexed transfers */
-	channels = dma_linked_lch[chain_id].linked_dmach_q;
-
-	/* Get a free channel */
-	lch = channels[dma_linked_lch[chain_id].q_tail];
-
-	/* Store the callback data */
-	dma_chan[lch].data = callbk_data;
-
-	/* Increment the q_tail */
-	OMAP_DMA_CHAIN_INCQTAIL(chain_id);
-
-	/* Set the params to the free channel */
-	if (src_start != 0)
-		p->dma_write(src_start, CSSA, lch);
-	if (dest_start != 0)
-		p->dma_write(dest_start, CDSA, lch);
-
-	/* Write the buffer size */
-	p->dma_write(elem_count, CEN, lch);
-	p->dma_write(frame_count, CFN, lch);
-
-	/*
-	 * If the chain is dynamically linked,
-	 * then we may have to start the chain if its not active
-	 */
-	if (dma_linked_lch[chain_id].chain_mode == OMAP_DMA_DYNAMIC_CHAIN) {
-
-		/*
-		 * In Dynamic chain, if the chain is not started,
-		 * queue the channel
-		 */
-		if (dma_linked_lch[chain_id].chain_state ==
-						DMA_CHAIN_NOTSTARTED) {
-			/* Enable the link in previous channel */
-			if (dma_chan[dma_chan[lch].prev_linked_ch].state ==
-								DMA_CH_QUEUED)
-				enable_lnk(dma_chan[lch].prev_linked_ch);
-			dma_chan[lch].state = DMA_CH_QUEUED;
-		}
-
-		/*
-		 * Chain is already started, make sure its active,
-		 * if not then start the chain
-		 */
-		else {
-			start_dma = 1;
-
-			if (dma_chan[dma_chan[lch].prev_linked_ch].state ==
-							DMA_CH_STARTED) {
-				enable_lnk(dma_chan[lch].prev_linked_ch);
-				dma_chan[lch].state = DMA_CH_QUEUED;
-				start_dma = 0;
-				if (0 == ((1 << 7) & p->dma_read(
-					CCR, dma_chan[lch].prev_linked_ch))) {
-					disable_lnk(dma_chan[lch].
-						    prev_linked_ch);
-					pr_debug("\n prev ch is stopped\n");
-					start_dma = 1;
-				}
-			}
-
-			else if (dma_chan[dma_chan[lch].prev_linked_ch].state
-							== DMA_CH_QUEUED) {
-				enable_lnk(dma_chan[lch].prev_linked_ch);
-				dma_chan[lch].state = DMA_CH_QUEUED;
-				start_dma = 0;
-			}
-			omap_enable_channel_irq(lch);
-
-			l = p->dma_read(CCR, lch);
-
-			if ((0 == (l & (1 << 24))))
-				l &= ~(1 << 25);
-			else
-				l |= (1 << 25);
-			if (start_dma == 1) {
-				if (0 == (l & (1 << 7))) {
-					l |= (1 << 7);
-					dma_chan[lch].state = DMA_CH_STARTED;
-					pr_debug("starting %d\n", lch);
-					p->dma_write(l, CCR, lch);
-				} else
-					start_dma = 0;
-			} else {
-				if (0 == (l & (1 << 7)))
-					p->dma_write(l, CCR, lch);
-			}
-			dma_chan[lch].flags |= OMAP_DMA_ACTIVE;
-		}
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL(omap_dma_chain_a_transfer);
-
-/**
- * @brief omap_start_dma_chain_transfers - Start the chain
- *
- * @param chain_id
- *
- * @return - Success : 0
- * 	     Failure : -EINVAL/-EBUSY
- */
-int omap_start_dma_chain_transfers(int chain_id)
-{
-	int *channels;
-	u32 l, i;
-
-	if (unlikely((chain_id < 0 || chain_id >= dma_lch_count))) {
-		printk(KERN_ERR "Invalid chain id\n");
-		return -EINVAL;
-	}
-
-	channels = dma_linked_lch[chain_id].linked_dmach_q;
-
-	if (dma_linked_lch[channels[0]].chain_state == DMA_CHAIN_STARTED) {
-		printk(KERN_ERR "Chain is already started\n");
-		return -EBUSY;
-	}
-
-	if (dma_linked_lch[chain_id].chain_mode == OMAP_DMA_STATIC_CHAIN) {
-		for (i = 0; i < dma_linked_lch[chain_id].no_of_lchs_linked;
-									i++) {
-			enable_lnk(channels[i]);
-			omap_enable_channel_irq(channels[i]);
-		}
-	} else {
-		omap_enable_channel_irq(channels[0]);
-	}
-
-	l = p->dma_read(CCR, channels[0]);
-	l |= (1 << 7);
-	dma_linked_lch[chain_id].chain_state = DMA_CHAIN_STARTED;
-	dma_chan[channels[0]].state = DMA_CH_STARTED;
-
-	if ((0 == (l & (1 << 24))))
-		l &= ~(1 << 25);
-	else
-		l |= (1 << 25);
-	p->dma_write(l, CCR, channels[0]);
-
-	dma_chan[channels[0]].flags |= OMAP_DMA_ACTIVE;
-
-	return 0;
-}
-EXPORT_SYMBOL(omap_start_dma_chain_transfers);
-
-/**
- * @brief omap_stop_dma_chain_transfers - Stop the dma transfer of a chain.
- *
- * @param chain_id
- *
- * @return - Success : 0
- * 	     Failure : EINVAL
- */
-int omap_stop_dma_chain_transfers(int chain_id)
-{
-	int *channels;
-	u32 l, i;
-	u32 sys_cf = 0;
-
-	/* Check for input params */
-	if (unlikely((chain_id < 0 || chain_id >= dma_lch_count))) {
-		printk(KERN_ERR "Invalid chain id\n");
-		return -EINVAL;
-	}
-
-	/* Check if the chain exists */
-	if (dma_linked_lch[chain_id].linked_dmach_q == NULL) {
-		printk(KERN_ERR "Chain doesn't exists\n");
-		return -EINVAL;
-	}
-	channels = dma_linked_lch[chain_id].linked_dmach_q;
-
-	if (IS_DMA_ERRATA(DMA_ERRATA_i88)) {
-		sys_cf = p->dma_read(OCP_SYSCONFIG, 0);
-		l = sys_cf;
-		/* Middle mode reg set no Standby */
-		l &= ~((1 << 12)|(1 << 13));
-		p->dma_write(l, OCP_SYSCONFIG, 0);
-	}
-
-	for (i = 0; i < dma_linked_lch[chain_id].no_of_lchs_linked; i++) {
-
-		/* Stop the Channel transmission */
-		l = p->dma_read(CCR, channels[i]);
-		l &= ~(1 << 7);
-		p->dma_write(l, CCR, channels[i]);
-
-		/* Disable the link in all the channels */
-		disable_lnk(channels[i]);
-		dma_chan[channels[i]].state = DMA_CH_NOTSTARTED;
-
-	}
-	dma_linked_lch[chain_id].chain_state = DMA_CHAIN_NOTSTARTED;
-
-	/* Reset the Queue pointers */
-	OMAP_DMA_CHAIN_QINIT(chain_id);
-
-	if (IS_DMA_ERRATA(DMA_ERRATA_i88))
-		p->dma_write(sys_cf, OCP_SYSCONFIG, 0);
-
-	return 0;
-}
-EXPORT_SYMBOL(omap_stop_dma_chain_transfers);
-
-/* Get the index of the ongoing DMA in chain */
-/**
- * @brief omap_get_dma_chain_index - Get the element and frame index
- * of the ongoing DMA in chain
- *
- * @param chain_id
- * @param ei - Element index
- * @param fi - Frame index
- *
- * @return - Success : 0
- * 	     Failure : -EINVAL
- */
-int omap_get_dma_chain_index(int chain_id, int *ei, int *fi)
-{
-	int lch;
-	int *channels;
-
-	/* Check for input params */
-	if (unlikely((chain_id < 0 || chain_id >= dma_lch_count))) {
-		printk(KERN_ERR "Invalid chain id\n");
-		return -EINVAL;
-	}
-
-	/* Check if the chain exists */
-	if (dma_linked_lch[chain_id].linked_dmach_q == NULL) {
-		printk(KERN_ERR "Chain doesn't exists\n");
-		return -EINVAL;
-	}
-	if ((!ei) || (!fi))
-		return -EINVAL;
-
-	channels = dma_linked_lch[chain_id].linked_dmach_q;
-
-	/* Get the current channel */
-	lch = channels[dma_linked_lch[chain_id].q_head];
-
-	*ei = p->dma_read(CCEN, lch);
-	*fi = p->dma_read(CCFN, lch);
-
-	return 0;
-}
-EXPORT_SYMBOL(omap_get_dma_chain_index);
-
-/**
- * @brief omap_get_dma_chain_dst_pos - Get the destination position of the
- * ongoing DMA in chain
- *
- * @param chain_id
- *
- * @return - Success : Destination position
- * 	     Failure : -EINVAL
- */
-int omap_get_dma_chain_dst_pos(int chain_id)
-{
-	int lch;
-	int *channels;
-
-	/* Check for input params */
-	if (unlikely((chain_id < 0 || chain_id >= dma_lch_count))) {
-		printk(KERN_ERR "Invalid chain id\n");
-		return -EINVAL;
-	}
-
-	/* Check if the chain exists */
-	if (dma_linked_lch[chain_id].linked_dmach_q == NULL) {
-		printk(KERN_ERR "Chain doesn't exists\n");
-		return -EINVAL;
-	}
-
-	channels = dma_linked_lch[chain_id].linked_dmach_q;
-
-	/* Get the current channel */
-	lch = channels[dma_linked_lch[chain_id].q_head];
-
-	return p->dma_read(CDAC, lch);
-}
-EXPORT_SYMBOL(omap_get_dma_chain_dst_pos);
-
-/**
- * @brief omap_get_dma_chain_src_pos - Get the source position
- * of the ongoing DMA in chain
- * @param chain_id
- *
- * @return - Success : Destination position
- * 	     Failure : -EINVAL
- */
-int omap_get_dma_chain_src_pos(int chain_id)
-{
-	int lch;
-	int *channels;
-
-	/* Check for input params */
-	if (unlikely((chain_id < 0 || chain_id >= dma_lch_count))) {
-		printk(KERN_ERR "Invalid chain id\n");
-		return -EINVAL;
-	}
-
-	/* Check if the chain exists */
-	if (dma_linked_lch[chain_id].linked_dmach_q == NULL) {
-		printk(KERN_ERR "Chain doesn't exists\n");
-		return -EINVAL;
-	}
-
-	channels = dma_linked_lch[chain_id].linked_dmach_q;
-
-	/* Get the current channel */
-	lch = channels[dma_linked_lch[chain_id].q_head];
-
-	return p->dma_read(CSAC, lch);
-}
-EXPORT_SYMBOL(omap_get_dma_chain_src_pos);
-#endif	/* ifndef CONFIG_ARCH_OMAP1 */
-
 /*----------------------------------------------------------------------------*/
 
 #ifdef CONFIG_ARCH_OMAP1
diff --git a/include/linux/omap-dma.h b/include/linux/omap-dma.h
index 6f06f8bc612c..e5a70132a240 100644
--- a/include/linux/omap-dma.h
+++ b/include/linux/omap-dma.h
@@ -306,15 +306,12 @@ extern void omap_set_dma_transfer_params(int lch, int data_type,
 					 int elem_count, int frame_count,
 					 int sync_mode,
 					 int dma_trigger, int src_or_dst_synch);
-extern void omap_set_dma_color_mode(int lch, enum omap_dma_color_mode mode,
-				    u32 color);
 extern void omap_set_dma_write_mode(int lch, enum omap_dma_write_mode mode);
 extern void omap_set_dma_channel_mode(int lch, enum omap_dma_channel_mode mode);
 
 extern void omap_set_dma_src_params(int lch, int src_port, int src_amode,
 				    unsigned long src_start,
 				    int src_ei, int src_fi);
-extern void omap_set_dma_src_index(int lch, int eidx, int fidx);
 extern void omap_set_dma_src_data_pack(int lch, int enable);
 extern void omap_set_dma_src_burst_mode(int lch,
 					enum omap_dma_burst_mode burst_mode);
@@ -322,7 +319,6 @@ extern void omap_set_dma_src_burst_mode(int lch,
 extern void omap_set_dma_dest_params(int lch, int dest_port, int dest_amode,
 				     unsigned long dest_start,
 				     int dst_ei, int dst_fi);
-extern void omap_set_dma_dest_index(int lch, int eidx, int fidx);
 extern void omap_set_dma_dest_data_pack(int lch, int enable);
 extern void omap_set_dma_dest_burst_mode(int lch,
 					 enum omap_dma_burst_mode burst_mode);
@@ -331,52 +327,19 @@ extern void omap_set_dma_params(int lch,
 				struct omap_dma_channel_params *params);
 
 extern void omap_dma_link_lch(int lch_head, int lch_queue);
-extern void omap_dma_unlink_lch(int lch_head, int lch_queue);
 
 extern int omap_set_dma_callback(int lch,
 			void (*callback)(int lch, u16 ch_status, void *data),
 			void *data);
 extern dma_addr_t omap_get_dma_src_pos(int lch);
 extern dma_addr_t omap_get_dma_dst_pos(int lch);
-extern void omap_clear_dma(int lch);
 extern int omap_get_dma_active_status(int lch);
 extern int omap_dma_running(void);
 extern void omap_dma_set_global_params(int arb_rate, int max_fifo_depth,
 				       int tparams);
-extern int omap_dma_set_prio_lch(int lch, unsigned char read_prio,
-				 unsigned char write_prio);
-extern void omap_set_dma_dst_endian_type(int lch, enum end_type etype);
-extern void omap_set_dma_src_endian_type(int lch, enum end_type etype);
-extern int omap_get_dma_index(int lch, int *ei, int *fi);
-
 void omap_dma_global_context_save(void);
 void omap_dma_global_context_restore(void);
 
-extern void omap_dma_disable_irq(int lch);
-
-/* Chaining APIs */
-#ifndef CONFIG_ARCH_OMAP1
-extern int omap_request_dma_chain(int dev_id, const char *dev_name,
-				  void (*callback) (int lch, u16 ch_status,
-						    void *data),
-				  int *chain_id, int no_of_chans,
-				  int chain_mode,
-				  struct omap_dma_channel_params params);
-extern int omap_free_dma_chain(int chain_id);
-extern int omap_dma_chain_a_transfer(int chain_id, int src_start,
-				     int dest_start, int elem_count,
-				     int frame_count, void *callbk_data);
-extern int omap_start_dma_chain_transfers(int chain_id);
-extern int omap_stop_dma_chain_transfers(int chain_id);
-extern int omap_get_dma_chain_index(int chain_id, int *ei, int *fi);
-extern int omap_get_dma_chain_dst_pos(int chain_id);
-extern int omap_get_dma_chain_src_pos(int chain_id);
-
-extern int omap_modify_dma_chain_params(int chain_id,
-					struct omap_dma_channel_params params);
-extern int omap_dma_chain_status(int chain_id);
-#endif
-
 #if defined(CONFIG_ARCH_OMAP1) && IS_ENABLED(CONFIG_FB_OMAP)
 #include <mach/lcd_dma.h>
 #else
-- 
cgit v1.2.3


From 971ff49355387fef41d1327434d8939721a4eb35 Mon Sep 17 00:00:00 2001
From: Zefan Li <lizefan@huawei.com>
Date: Thu, 18 Sep 2014 16:06:19 +0800
Subject: cgroup: use a per-cgroup work for release agent

Instead of using a global work to schedule release agent on removable
cgroups, we change to use a per-cgroup work to do this, which makes
the code much simpler.

v2: use a dedicated work instead of reusing css->destroy_work. (Tejun)

Signed-off-by: Zefan Li <lizefan@huawei.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cgroup.h |  10 ++---
 kernel/cgroup.c        | 108 +++++++++++++++----------------------------------
 2 files changed, 36 insertions(+), 82 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index f7898e0bce1e..51958d0fb88f 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -233,13 +233,6 @@ struct cgroup {
 	 */
 	struct list_head e_csets[CGROUP_SUBSYS_COUNT];
 
-	/*
-	 * Linked list running through all cgroups that can
-	 * potentially be reaped by the release agent. Protected by
-	 * release_list_lock
-	 */
-	struct list_head release_list;
-
 	/*
 	 * list of pidlists, up to two for each namespace (one for procs, one
 	 * for tasks); created on demand.
@@ -249,6 +242,9 @@ struct cgroup {
 
 	/* used to wait for offlining of csses */
 	wait_queue_head_t offline_waitq;
+
+	/* used to schedule release agent */
+	struct work_struct release_agent_work;
 };
 
 #define MAX_CGROUP_ROOT_NAMELEN 64
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 4ddc75588983..db19a4884a7f 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -392,12 +392,7 @@ static int notify_on_release(const struct cgroup *cgrp)
 			;						\
 		else
 
-/* the list of cgroups eligible for automatic release. Protected by
- * release_list_lock */
-static LIST_HEAD(release_list);
-static DEFINE_RAW_SPINLOCK(release_list_lock);
 static void cgroup_release_agent(struct work_struct *work);
-static DECLARE_WORK(release_agent_work, cgroup_release_agent);
 static void check_for_release(struct cgroup *cgrp);
 
 /*
@@ -1577,7 +1572,6 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
 	INIT_LIST_HEAD(&cgrp->self.sibling);
 	INIT_LIST_HEAD(&cgrp->self.children);
 	INIT_LIST_HEAD(&cgrp->cset_links);
-	INIT_LIST_HEAD(&cgrp->release_list);
 	INIT_LIST_HEAD(&cgrp->pidlists);
 	mutex_init(&cgrp->pidlist_mutex);
 	cgrp->self.cgroup = cgrp;
@@ -1587,6 +1581,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
 		INIT_LIST_HEAD(&cgrp->e_csets[ssid]);
 
 	init_waitqueue_head(&cgrp->offline_waitq);
+	INIT_WORK(&cgrp->release_agent_work, cgroup_release_agent);
 }
 
 static void init_cgroup_root(struct cgroup_root *root,
@@ -4342,6 +4337,7 @@ static void css_free_work_fn(struct work_struct *work)
 		/* cgroup free path */
 		atomic_dec(&cgrp->root->nr_cgrps);
 		cgroup_pidlist_destroy_all(cgrp);
+		cancel_work_sync(&cgrp->release_agent_work);
 
 		if (cgroup_parent(cgrp)) {
 			/*
@@ -4804,12 +4800,6 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
 	for_each_css(css, ssid, cgrp)
 		kill_css(css);
 
-	/* CSS_ONLINE is clear, remove from ->release_list for the last time */
-	raw_spin_lock(&release_list_lock);
-	if (!list_empty(&cgrp->release_list))
-		list_del_init(&cgrp->release_list);
-	raw_spin_unlock(&release_list_lock);
-
 	/*
 	 * Remove @cgrp directory along with the base files.  @cgrp has an
 	 * extra ref on its kn.
@@ -5271,25 +5261,9 @@ void cgroup_exit(struct task_struct *tsk)
 
 static void check_for_release(struct cgroup *cgrp)
 {
-	if (cgroup_is_releasable(cgrp) && list_empty(&cgrp->cset_links) &&
-	    !css_has_online_children(&cgrp->self)) {
-		/*
-		 * Control Group is currently removeable. If it's not
-		 * already queued for a userspace notification, queue
-		 * it now
-		 */
-		int need_schedule_work = 0;
-
-		raw_spin_lock(&release_list_lock);
-		if (!cgroup_is_dead(cgrp) &&
-		    list_empty(&cgrp->release_list)) {
-			list_add(&cgrp->release_list, &release_list);
-			need_schedule_work = 1;
-		}
-		raw_spin_unlock(&release_list_lock);
-		if (need_schedule_work)
-			schedule_work(&release_agent_work);
-	}
+	if (cgroup_is_releasable(cgrp) && !cgroup_has_tasks(cgrp) &&
+	    !css_has_online_children(&cgrp->self) && !cgroup_is_dead(cgrp))
+		schedule_work(&cgrp->release_agent_work);
 }
 
 /*
@@ -5317,52 +5291,36 @@ static void check_for_release(struct cgroup *cgrp)
  */
 static void cgroup_release_agent(struct work_struct *work)
 {
-	BUG_ON(work != &release_agent_work);
+	struct cgroup *cgrp =
+		container_of(work, struct cgroup, release_agent_work);
+	char *pathbuf = NULL, *agentbuf = NULL, *path;
+	char *argv[3], *envp[3];
+
 	mutex_lock(&cgroup_mutex);
-	raw_spin_lock(&release_list_lock);
-	while (!list_empty(&release_list)) {
-		char *argv[3], *envp[3];
-		int i;
-		char *pathbuf = NULL, *agentbuf = NULL, *path;
-		struct cgroup *cgrp = list_entry(release_list.next,
-						    struct cgroup,
-						    release_list);
-		list_del_init(&cgrp->release_list);
-		raw_spin_unlock(&release_list_lock);
-		pathbuf = kmalloc(PATH_MAX, GFP_KERNEL);
-		if (!pathbuf)
-			goto continue_free;
-		path = cgroup_path(cgrp, pathbuf, PATH_MAX);
-		if (!path)
-			goto continue_free;
-		agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL);
-		if (!agentbuf)
-			goto continue_free;
-
-		i = 0;
-		argv[i++] = agentbuf;
-		argv[i++] = path;
-		argv[i] = NULL;
-
-		i = 0;
-		/* minimal command environment */
-		envp[i++] = "HOME=/";
-		envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
-		envp[i] = NULL;
-
-		/* Drop the lock while we invoke the usermode helper,
-		 * since the exec could involve hitting disk and hence
-		 * be a slow process */
-		mutex_unlock(&cgroup_mutex);
-		call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
-		mutex_lock(&cgroup_mutex);
- continue_free:
-		kfree(pathbuf);
-		kfree(agentbuf);
-		raw_spin_lock(&release_list_lock);
-	}
-	raw_spin_unlock(&release_list_lock);
+
+	pathbuf = kmalloc(PATH_MAX, GFP_KERNEL);
+	agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL);
+	if (!pathbuf || !agentbuf)
+		goto out;
+
+	path = cgroup_path(cgrp, pathbuf, PATH_MAX);
+	if (!path)
+		goto out;
+
+	argv[0] = agentbuf;
+	argv[1] = path;
+	argv[2] = NULL;
+
+	/* minimal command environment */
+	envp[0] = "HOME=/";
+	envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
+	envp[2] = NULL;
+
 	mutex_unlock(&cgroup_mutex);
+	call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
+out:
+	kfree(agentbuf);
+	kfree(pathbuf);
 }
 
 static int __init cgroup_disable(char *str)
-- 
cgit v1.2.3


From 006f4ac49742b5f70ef7e39176fd42a500144ccc Mon Sep 17 00:00:00 2001
From: Zefan Li <lizefan@huawei.com>
Date: Thu, 18 Sep 2014 16:03:15 +0800
Subject: cgroup: simplify proc_cgroup_show()

Use the ONE macro instead of REG, and we can simplify proc_cgroup_show().

Signed-off-by: Zefan Li <lizefan@huawei.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 fs/proc/base.c         | 19 ++-----------------
 include/linux/cgroup.h |  3 ++-
 kernel/cgroup.c        | 18 +++---------------
 3 files changed, 7 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index baf852b648ad..6b96892015ec 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -376,21 +376,6 @@ static const struct file_operations proc_lstats_operations = {
 
 #endif
 
-#ifdef CONFIG_CGROUPS
-static int cgroup_open(struct inode *inode, struct file *file)
-{
-	struct pid *pid = PROC_I(inode)->pid;
-	return single_open(file, proc_cgroup_show, pid);
-}
-
-static const struct file_operations proc_cgroup_operations = {
-	.open		= cgroup_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-#endif
-
 #ifdef CONFIG_PROC_PID_CPUSET
 
 static int cpuset_open(struct inode *inode, struct file *file)
@@ -2576,7 +2561,7 @@ static const struct pid_entry tgid_base_stuff[] = {
 	REG("cpuset",     S_IRUGO, proc_cpuset_operations),
 #endif
 #ifdef CONFIG_CGROUPS
-	REG("cgroup",  S_IRUGO, proc_cgroup_operations),
+	ONE("cgroup",  S_IRUGO, proc_cgroup_show),
 #endif
 	ONE("oom_score",  S_IRUGO, proc_oom_score),
 	REG("oom_adj",    S_IRUGO|S_IWUSR, proc_oom_adj_operations),
@@ -2922,7 +2907,7 @@ static const struct pid_entry tid_base_stuff[] = {
 	REG("cpuset",    S_IRUGO, proc_cpuset_operations),
 #endif
 #ifdef CONFIG_CGROUPS
-	REG("cgroup",  S_IRUGO, proc_cgroup_operations),
+	ONE("cgroup",  S_IRUGO, proc_cgroup_show),
 #endif
 	ONE("oom_score", S_IRUGO, proc_oom_score),
 	REG("oom_adj",   S_IRUGO|S_IWUSR, proc_oom_adj_operations),
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 51958d0fb88f..77a1d37b742b 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -37,7 +37,8 @@ extern void cgroup_exit(struct task_struct *p);
 extern int cgroupstats_build(struct cgroupstats *stats,
 				struct dentry *dentry);
 
-extern int proc_cgroup_show(struct seq_file *, void *);
+extern int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
+			    struct pid *pid, struct task_struct *tsk);
 
 /* define the enumeration of all cgroup subsystems */
 #define SUBSYS(_x) _x ## _cgrp_id,
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index db19a4884a7f..df7733b48d2e 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -5030,12 +5030,9 @@ core_initcall(cgroup_wq_init);
  *  - Print task's cgroup paths into seq_file, one line for each hierarchy
  *  - Used for /proc/<pid>/cgroup.
  */
-
-/* TODO: Use a proper seq_file iterator */
-int proc_cgroup_show(struct seq_file *m, void *v)
+int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
+		     struct pid *pid, struct task_struct *tsk)
 {
-	struct pid *pid;
-	struct task_struct *tsk;
 	char *buf, *path;
 	int retval;
 	struct cgroup_root *root;
@@ -5045,14 +5042,6 @@ int proc_cgroup_show(struct seq_file *m, void *v)
 	if (!buf)
 		goto out;
 
-	retval = -ESRCH;
-	pid = m->private;
-	tsk = get_pid_task(pid, PIDTYPE_PID);
-	if (!tsk)
-		goto out_free;
-
-	retval = 0;
-
 	mutex_lock(&cgroup_mutex);
 	down_read(&css_set_rwsem);
 
@@ -5082,11 +5071,10 @@ int proc_cgroup_show(struct seq_file *m, void *v)
 		seq_putc(m, '\n');
 	}
 
+	retval = 0;
 out_unlock:
 	up_read(&css_set_rwsem);
 	mutex_unlock(&cgroup_mutex);
-	put_task_struct(tsk);
-out_free:
 	kfree(buf);
 out:
 	return retval;
-- 
cgit v1.2.3


From 52de4779f201758ddcf37360f09a16895756e708 Mon Sep 17 00:00:00 2001
From: Zefan Li <lizefan@huawei.com>
Date: Thu, 18 Sep 2014 16:03:36 +0800
Subject: cpuset: simplify proc_cpuset_show()

Use the ONE macro instead of REG, and we can simplify proc_cpuset_show().

Signed-off-by: Zefan Li <lizefan@huawei.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 fs/proc/base.c         | 20 ++------------------
 include/linux/cpuset.h |  3 ++-
 kernel/cpuset.c        | 15 +++------------
 3 files changed, 7 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 6b96892015ec..4e8aa35fc3eb 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -376,22 +376,6 @@ static const struct file_operations proc_lstats_operations = {
 
 #endif
 
-#ifdef CONFIG_PROC_PID_CPUSET
-
-static int cpuset_open(struct inode *inode, struct file *file)
-{
-	struct pid *pid = PROC_I(inode)->pid;
-	return single_open(file, proc_cpuset_show, pid);
-}
-
-static const struct file_operations proc_cpuset_operations = {
-	.open		= cpuset_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-#endif
-
 static int proc_oom_score(struct seq_file *m, struct pid_namespace *ns,
 			  struct pid *pid, struct task_struct *task)
 {
@@ -2558,7 +2542,7 @@ static const struct pid_entry tgid_base_stuff[] = {
 	REG("latency",  S_IRUGO, proc_lstats_operations),
 #endif
 #ifdef CONFIG_PROC_PID_CPUSET
-	REG("cpuset",     S_IRUGO, proc_cpuset_operations),
+	ONE("cpuset",     S_IRUGO, proc_cpuset_show),
 #endif
 #ifdef CONFIG_CGROUPS
 	ONE("cgroup",  S_IRUGO, proc_cgroup_show),
@@ -2904,7 +2888,7 @@ static const struct pid_entry tid_base_stuff[] = {
 	REG("latency",  S_IRUGO, proc_lstats_operations),
 #endif
 #ifdef CONFIG_PROC_PID_CPUSET
-	REG("cpuset",    S_IRUGO, proc_cpuset_operations),
+	ONE("cpuset",    S_IRUGO, proc_cpuset_show),
 #endif
 #ifdef CONFIG_CGROUPS
 	ONE("cgroup",  S_IRUGO, proc_cgroup_show),
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index ade2390ffe92..0d4e0675b318 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -86,7 +86,8 @@ extern void __cpuset_memory_pressure_bump(void);
 
 extern void cpuset_task_status_allowed(struct seq_file *m,
 					struct task_struct *task);
-extern int proc_cpuset_show(struct seq_file *, void *);
+extern int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns,
+			    struct pid *pid, struct task_struct *tsk);
 
 extern int cpuset_mem_spread_node(void);
 extern int cpuset_slab_spread_node(void);
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 22874d7cf2c0..a37f4ed24867 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2729,10 +2729,9 @@ void __cpuset_memory_pressure_bump(void)
  *    and we take cpuset_mutex, keeping cpuset_attach() from changing it
  *    anyway.
  */
-int proc_cpuset_show(struct seq_file *m, void *unused_v)
+int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns,
+		     struct pid *pid, struct task_struct *tsk)
 {
-	struct pid *pid;
-	struct task_struct *tsk;
 	char *buf, *p;
 	struct cgroup_subsys_state *css;
 	int retval;
@@ -2742,24 +2741,16 @@ int proc_cpuset_show(struct seq_file *m, void *unused_v)
 	if (!buf)
 		goto out;
 
-	retval = -ESRCH;
-	pid = m->private;
-	tsk = get_pid_task(pid, PIDTYPE_PID);
-	if (!tsk)
-		goto out_free;
-
 	retval = -ENAMETOOLONG;
 	rcu_read_lock();
 	css = task_css(tsk, cpuset_cgrp_id);
 	p = cgroup_path(css->cgroup, buf, PATH_MAX);
 	rcu_read_unlock();
 	if (!p)
-		goto out_put_task;
+		goto out_free;
 	seq_puts(m, p);
 	seq_putc(m, '\n');
 	retval = 0;
-out_put_task:
-	put_task_struct(tsk);
 out_free:
 	kfree(buf);
 out:
-- 
cgit v1.2.3


From 69803d4f487fc60ce740f1fe1f0d2092d97277b6 Mon Sep 17 00:00:00 2001
From: Grégory Soutadé <gsoutade@neotion.com>
Date: Mon, 15 Sep 2014 17:47:09 +0200
Subject: mmc: Replace "enhanced_area_en" attribute by
 "partition_setting_completed"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace ext_csd "enhanced_area_en" attribute by
 "partition_setting_completed". It was used whether or
 not enhanced user area is defined and without checks of
 EXT_CSD_PARTITION_SETTING_COMPLETED bit.

Signed-off-by: Grégory Soutadé <gsoutade@neotion.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/mmc.c   | 13 ++++++++-----
 include/linux/mmc/card.h |  2 +-
 include/linux/mmc/mmc.h  |  2 ++
 3 files changed, 11 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 5b4b969c18bf..eb78fcd01e52 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -315,7 +315,6 @@ static void mmc_manage_enhanced_area(struct mmc_card *card, u8 *ext_csd)
 		hc_wp_grp_sz =
 			ext_csd[EXT_CSD_HC_WP_GRP_SIZE];
 
-		card->ext_csd.enhanced_area_en = 1;
 		/*
 		 * calculate the enhanced data area offset, in bytes
 		 */
@@ -356,13 +355,11 @@ static void mmc_manage_gp_partitions(struct mmc_card *card, u8 *ext_csd)
 	 */
 	if (ext_csd[EXT_CSD_PARTITION_SUPPORT] &
 	    EXT_CSD_PART_SUPPORT_PART_EN) {
-		if (card->ext_csd.enhanced_area_en != 1) {
+		if (card->ext_csd.partition_setting_completed) {
 			hc_erase_grp_sz =
 				ext_csd[EXT_CSD_HC_ERASE_GRP_SIZE];
 			hc_wp_grp_sz =
 				ext_csd[EXT_CSD_HC_WP_GRP_SIZE];
-
-			card->ext_csd.enhanced_area_en = 1;
 		}
 
 		for (idx = 0; idx < MMC_NUM_GP_PARTITION; idx++) {
@@ -489,6 +486,12 @@ static int mmc_read_ext_csd(struct mmc_card *card, u8 *ext_csd)
 		ext_csd[EXT_CSD_TRIM_MULT];
 	card->ext_csd.raw_partition_support = ext_csd[EXT_CSD_PARTITION_SUPPORT];
 	if (card->ext_csd.rev >= 4) {
+		if (ext_csd[EXT_CSD_PARTITION_SETTING_COMPLETED] &
+		    EXT_CSD_PART_SETTING_COMPLETED)
+			card->ext_csd.partition_setting_completed = 1;
+		else
+			card->ext_csd.partition_setting_completed = 0;
+
 		mmc_manage_enhanced_area(card, ext_csd);
 
 		mmc_manage_gp_partitions(card, ext_csd);
@@ -1348,7 +1351,7 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 	 * If enhanced_area_en is TRUE, host needs to enable ERASE_GRP_DEF
 	 * bit.  This bit will be lost every time after a reset or power off.
 	 */
-	if (card->ext_csd.enhanced_area_en ||
+	if (card->ext_csd.partition_setting_completed ||
 	    (card->ext_csd.rev >= 3 && (host->caps2 & MMC_CAP2_HC_ERASE_SZ))) {
 		err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
 				 EXT_CSD_ERASE_GROUP_DEF, 1,
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 0ea795f5feb9..b0692d28f8e6 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -75,7 +75,7 @@ struct mmc_ext_csd {
 	unsigned int		sec_trim_mult;	/* Secure trim multiplier  */
 	unsigned int		sec_erase_mult;	/* Secure erase multiplier */
 	unsigned int		trim_timeout;		/* In milliseconds */
-	bool			enhanced_area_en;	/* enable bit */
+	bool			partition_setting_completed;	/* enable bit */
 	unsigned long long	enhanced_area_offset;	/* Units: Byte */
 	unsigned int		enhanced_area_size;	/* Units: KB */
 	unsigned int		cache_size;		/* Units: KB */
diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index 64ec963ed347..78753bc90a87 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -281,6 +281,7 @@ struct _mmc_csd {
 #define EXT_CSD_EXP_EVENTS_CTRL		56	/* R/W, 2 bytes */
 #define EXT_CSD_DATA_SECTOR_SIZE	61	/* R */
 #define EXT_CSD_GP_SIZE_MULT		143	/* R/W */
+#define EXT_CSD_PARTITION_SETTING_COMPLETED 155	/* R/W */
 #define EXT_CSD_PARTITION_ATTRIBUTE	156	/* R/W */
 #define EXT_CSD_PARTITION_SUPPORT	160	/* RO */
 #define EXT_CSD_HPI_MGMT		161	/* R/W */
@@ -349,6 +350,7 @@ struct _mmc_csd {
 #define EXT_CSD_PART_CONFIG_ACC_RPMB	(0x3)
 #define EXT_CSD_PART_CONFIG_ACC_GP0	(0x4)
 
+#define EXT_CSD_PART_SETTING_COMPLETED	(0x1)
 #define EXT_CSD_PART_SUPPORT_PART_EN	(0x1)
 
 #define EXT_CSD_CMD_SET_NORMAL		(1<<0)
-- 
cgit v1.2.3


From 766a53d059d1500c9755c8af017bd411bd8f1b20 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Fri, 12 Sep 2014 17:51:29 -0400
Subject: vgaswitcheroo: add vga_switcheroo_fini_domain_pm_ops
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Drivers should call this on unload to unregister pmops.

Bug:
https://bugzilla.kernel.org/show_bug.cgi?id=84431

Reviewed-by: Ben Skeggs <bskeggs@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
Signed-off-by: Pali Rohár <pali.rohar@gmail.com>
Cc: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/vga/vga_switcheroo.c | 6 ++++++
 include/linux/vga_switcheroo.h   | 2 ++
 2 files changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/gpu/vga/vga_switcheroo.c b/drivers/gpu/vga/vga_switcheroo.c
index 6866448083b2..37ac7b5dbd06 100644
--- a/drivers/gpu/vga/vga_switcheroo.c
+++ b/drivers/gpu/vga/vga_switcheroo.c
@@ -660,6 +660,12 @@ int vga_switcheroo_init_domain_pm_ops(struct device *dev, struct dev_pm_domain *
 }
 EXPORT_SYMBOL(vga_switcheroo_init_domain_pm_ops);
 
+void vga_switcheroo_fini_domain_pm_ops(struct device *dev)
+{
+	dev->pm_domain = NULL;
+}
+EXPORT_SYMBOL(vga_switcheroo_fini_domain_pm_ops);
+
 static int vga_switcheroo_runtime_resume_hdmi_audio(struct device *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h
index 502073a53dd3..b483abd34493 100644
--- a/include/linux/vga_switcheroo.h
+++ b/include/linux/vga_switcheroo.h
@@ -64,6 +64,7 @@ int vga_switcheroo_get_client_state(struct pci_dev *dev);
 void vga_switcheroo_set_dynamic_switch(struct pci_dev *pdev, enum vga_switcheroo_state dynamic);
 
 int vga_switcheroo_init_domain_pm_ops(struct device *dev, struct dev_pm_domain *domain);
+void vga_switcheroo_fini_domain_pm_ops(struct device *dev);
 int vga_switcheroo_init_domain_pm_optimus_hdmi_audio(struct device *dev, struct dev_pm_domain *domain);
 #else
 
@@ -82,6 +83,7 @@ static inline int vga_switcheroo_get_client_state(struct pci_dev *dev) { return
 static inline void vga_switcheroo_set_dynamic_switch(struct pci_dev *pdev, enum vga_switcheroo_state dynamic) {}
 
 static inline int vga_switcheroo_init_domain_pm_ops(struct device *dev, struct dev_pm_domain *domain) { return -EINVAL; }
+static inline void vga_switcheroo_fini_domain_pm_ops(struct device *dev) {}
 static inline int vga_switcheroo_init_domain_pm_optimus_hdmi_audio(struct device *dev, struct dev_pm_domain *domain) { return -EINVAL; }
 
 #endif
-- 
cgit v1.2.3


From dd56af42bd829c6e770ed69812bd65a04eaeb1e4 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 25 Aug 2014 20:25:06 -0700
Subject: rcu: Eliminate deadlock between CPU hotplug and expedited grace
 periods

Currently, the expedited grace-period primitives do get_online_cpus().
This greatly simplifies their implementation, but means that calls
to them holding locks that are acquired by CPU-hotplug notifiers (to
say nothing of calls to these primitives from CPU-hotplug notifiers)
can deadlock.  But this is starting to become inconvenient, as can be
seen here: https://lkml.org/lkml/2014/8/5/754.  The problem in this
case is that some developers need to acquire a mutex from a CPU-hotplug
notifier, but also need to hold it across a synchronize_rcu_expedited().
As noted above, this currently results in deadlock.

This commit avoids the deadlock and retains the simplicity by creating
a try_get_online_cpus(), which returns false if the get_online_cpus()
reference count could not immediately be incremented.  If a call to
try_get_online_cpus() returns true, the expedited primitives operate as
before.  If a call returns false, the expedited primitives fall back to
normal grace-period operations.  This falling back of course results in
increased grace-period latency, but only during times when CPU hotplug
operations are actually in flight.  The effect should therefore be
negligible during normal operation.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Tested-by: Lan Tianyu <tianyu.lan@intel.com>
---
 include/linux/cpu.h      |  2 ++
 include/linux/lockdep.h  |  1 +
 kernel/cpu.c             | 16 +++++++++++++++-
 kernel/rcu/tree.c        | 19 ++++++++++++-------
 kernel/rcu/tree_plugin.h | 11 +++++------
 5 files changed, 35 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 95978ad7fcdd..b2d9a43012b2 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -213,6 +213,7 @@ extern struct bus_type cpu_subsys;
 extern void cpu_hotplug_begin(void);
 extern void cpu_hotplug_done(void);
 extern void get_online_cpus(void);
+extern bool try_get_online_cpus(void);
 extern void put_online_cpus(void);
 extern void cpu_hotplug_disable(void);
 extern void cpu_hotplug_enable(void);
@@ -230,6 +231,7 @@ int cpu_down(unsigned int cpu);
 static inline void cpu_hotplug_begin(void) {}
 static inline void cpu_hotplug_done(void) {}
 #define get_online_cpus()	do { } while (0)
+#define try_get_online_cpus()	true
 #define put_online_cpus()	do { } while (0)
 #define cpu_hotplug_disable()	do { } while (0)
 #define cpu_hotplug_enable()	do { } while (0)
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 008388f920d7..4f86465cc317 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -505,6 +505,7 @@ static inline void print_irqtrace_events(struct task_struct *curr)
 
 #define lock_map_acquire(l)			lock_acquire_exclusive(l, 0, 0, NULL, _THIS_IP_)
 #define lock_map_acquire_read(l)		lock_acquire_shared_recursive(l, 0, 0, NULL, _THIS_IP_)
+#define lock_map_acquire_tryread(l)		lock_acquire_shared_recursive(l, 0, 1, NULL, _THIS_IP_)
 #define lock_map_release(l)			lock_release(l, 1, _THIS_IP_)
 
 #ifdef CONFIG_PROVE_LOCKING
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 81e2a388a0f6..356450f09c1f 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -79,6 +79,8 @@ static struct {
 
 /* Lockdep annotations for get/put_online_cpus() and cpu_hotplug_begin/end() */
 #define cpuhp_lock_acquire_read() lock_map_acquire_read(&cpu_hotplug.dep_map)
+#define cpuhp_lock_acquire_tryread() \
+				  lock_map_acquire_tryread(&cpu_hotplug.dep_map)
 #define cpuhp_lock_acquire()      lock_map_acquire(&cpu_hotplug.dep_map)
 #define cpuhp_lock_release()      lock_map_release(&cpu_hotplug.dep_map)
 
@@ -91,10 +93,22 @@ void get_online_cpus(void)
 	mutex_lock(&cpu_hotplug.lock);
 	cpu_hotplug.refcount++;
 	mutex_unlock(&cpu_hotplug.lock);
-
 }
 EXPORT_SYMBOL_GPL(get_online_cpus);
 
+bool try_get_online_cpus(void)
+{
+	if (cpu_hotplug.active_writer == current)
+		return true;
+	if (!mutex_trylock(&cpu_hotplug.lock))
+		return false;
+	cpuhp_lock_acquire_tryread();
+	cpu_hotplug.refcount++;
+	mutex_unlock(&cpu_hotplug.lock);
+	return true;
+}
+EXPORT_SYMBOL_GPL(try_get_online_cpus);
+
 void put_online_cpus(void)
 {
 	if (cpu_hotplug.active_writer == current)
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index d7a3b13bc94c..133e47223095 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2940,11 +2940,6 @@ static int synchronize_sched_expedited_cpu_stop(void *data)
  * restructure your code to batch your updates, and then use a single
  * synchronize_sched() instead.
  *
- * Note that it is illegal to call this function while holding any lock
- * that is acquired by a CPU-hotplug notifier.  And yes, it is also illegal
- * to call this function from a CPU-hotplug notifier.  Failing to observe
- * these restriction will result in deadlock.
- *
  * This implementation can be thought of as an application of ticket
  * locking to RCU, with sync_sched_expedited_started and
  * sync_sched_expedited_done taking on the roles of the halves
@@ -2994,7 +2989,12 @@ void synchronize_sched_expedited(void)
 	 */
 	snap = atomic_long_inc_return(&rsp->expedited_start);
 	firstsnap = snap;
-	get_online_cpus();
+	if (!try_get_online_cpus()) {
+		/* CPU hotplug operation in flight, fall back to normal GP. */
+		wait_rcu_gp(call_rcu_sched);
+		atomic_long_inc(&rsp->expedited_normal);
+		return;
+	}
 	WARN_ON_ONCE(cpu_is_offline(raw_smp_processor_id()));
 
 	/*
@@ -3041,7 +3041,12 @@ void synchronize_sched_expedited(void)
 		 * and they started after our first try, so their grace
 		 * period works for us.
 		 */
-		get_online_cpus();
+		if (!try_get_online_cpus()) {
+			/* CPU hotplug operation in flight, use normal GP. */
+			wait_rcu_gp(call_rcu_sched);
+			atomic_long_inc(&rsp->expedited_normal);
+			return;
+		}
 		snap = atomic_long_read(&rsp->expedited_start);
 		smp_mb(); /* ensure read is before try_stop_cpus(). */
 	}
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index e2c5910546f6..387dd4599344 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -793,11 +793,6 @@ sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)
  * In fact, if you are using synchronize_rcu_expedited() in a loop,
  * please restructure your code to batch your updates, and then Use a
  * single synchronize_rcu() instead.
- *
- * Note that it is illegal to call this function while holding any lock
- * that is acquired by a CPU-hotplug notifier.  And yes, it is also illegal
- * to call this function from a CPU-hotplug notifier.  Failing to observe
- * these restriction will result in deadlock.
  */
 void synchronize_rcu_expedited(void)
 {
@@ -819,7 +814,11 @@ void synchronize_rcu_expedited(void)
 	 * being boosted.  This simplifies the process of moving tasks
 	 * from leaf to root rcu_node structures.
 	 */
-	get_online_cpus();
+	if (!try_get_online_cpus()) {
+		/* CPU-hotplug operation in flight, fall back to normal GP. */
+		wait_rcu_gp(call_rcu);
+		return;
+	}
 
 	/*
 	 * Acquire lock, falling back to synchronize_rcu() if too many
-- 
cgit v1.2.3


From f6be8af1c95de4a46e325e728900a70ceadb52cf Mon Sep 17 00:00:00 2001
From: Chuansheng Liu <chuansheng.liu@intel.com>
Date: Thu, 4 Sep 2014 15:17:53 +0800
Subject: sched: Add new API wake_up_if_idle() to wake up the idle cpu

Implementing one new API wake_up_if_idle(), which is used to
wake up the idle CPU.

Suggested-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Chuansheng Liu <chuansheng.liu@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: daniel.lezcano@linaro.org
Cc: rjw@rjwysocki.net
Cc: linux-pm@vger.kernel.org
Cc: changcheng.liu@intel.com
Cc: xiaoming.wang@intel.com
Cc: souvik.k.chakravarty@intel.com
Cc: chuansheng.liu@intel.com
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/1409815075-4180-1-git-send-email-chuansheng.liu@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h |  1 +
 kernel/sched/core.c   | 19 +++++++++++++++++++
 2 files changed, 20 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index dd9eb4807389..82ff3d6efb19 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1024,6 +1024,7 @@ struct sched_domain_topology_level {
 extern struct sched_domain_topology_level *sched_domain_topology;
 
 extern void set_sched_topology(struct sched_domain_topology_level *tl);
+extern void wake_up_if_idle(int cpu);
 
 #ifdef CONFIG_SCHED_DEBUG
 # define SD_INIT_NAME(type)		.name = #type
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 78e5c839df13..f7c6ed2fd69d 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1634,6 +1634,25 @@ static void ttwu_queue_remote(struct task_struct *p, int cpu)
 	}
 }
 
+void wake_up_if_idle(int cpu)
+{
+	struct rq *rq = cpu_rq(cpu);
+	unsigned long flags;
+
+	if (!is_idle_task(rq->curr))
+		return;
+
+	if (set_nr_if_polling(rq->idle)) {
+		trace_sched_wake_idle_without_ipi(cpu);
+	} else {
+		raw_spin_lock_irqsave(&rq->lock, flags);
+		if (is_idle_task(rq->curr))
+			smp_send_reschedule(cpu);
+		/* Else cpu is not in idle, do nothing here */
+		raw_spin_unlock_irqrestore(&rq->lock, flags);
+	}
+}
+
 bool cpus_share_cache(int this_cpu, int that_cpu)
 {
 	return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu);
-- 
cgit v1.2.3


From c6f4459fc3ba532e896cb678e29b45cb985f82bf Mon Sep 17 00:00:00 2001
From: Chuansheng Liu <chuansheng.liu@intel.com>
Date: Thu, 4 Sep 2014 15:17:54 +0800
Subject: smp: Add new wake_up_all_idle_cpus() function

Currently kick_all_cpus_sync() can break non-polling idle cpus
thru IPI interrupts.

But sometimes we need to break the polling idle cpus immediately
to reselect the suitable c-state, also for non-idle cpus, we need
to do nothing if we try to wake up them.

Here adding one new function wake_up_all_idle_cpus() to let all cpus
out of idle based on function wake_up_if_idle().

Signed-off-by: Chuansheng Liu <chuansheng.liu@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: daniel.lezcano@linaro.org
Cc: rjw@rjwysocki.net
Cc: linux-pm@vger.kernel.org
Cc: changcheng.liu@intel.com
Cc: xiaoming.wang@intel.com
Cc: souvik.k.chakravarty@intel.com
Cc: luto@amacapital.net
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Geert Uytterhoeven <geert+renesas@glider.be>
Cc: Jan Kara <jack@suse.cz>
Cc: Jens Axboe <axboe@fb.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Roman Gushchin <klamm@yandex-team.ru>
Cc: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/1409815075-4180-2-git-send-email-chuansheng.liu@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/smp.h |  2 ++
 kernel/smp.c        | 22 ++++++++++++++++++++++
 2 files changed, 24 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/smp.h b/include/linux/smp.h
index 34347f26be9b..93dff5fff524 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -100,6 +100,7 @@ int smp_call_function_any(const struct cpumask *mask,
 			  smp_call_func_t func, void *info, int wait);
 
 void kick_all_cpus_sync(void);
+void wake_up_all_idle_cpus(void);
 
 /*
  * Generic and arch helpers
@@ -148,6 +149,7 @@ smp_call_function_any(const struct cpumask *mask, smp_call_func_t func,
 }
 
 static inline void kick_all_cpus_sync(void) {  }
+static inline void wake_up_all_idle_cpus(void) {  }
 
 #endif /* !SMP */
 
diff --git a/kernel/smp.c b/kernel/smp.c
index aff8aa14f547..9e0d0b289118 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -13,6 +13,7 @@
 #include <linux/gfp.h>
 #include <linux/smp.h>
 #include <linux/cpu.h>
+#include <linux/sched.h>
 
 #include "smpboot.h"
 
@@ -699,3 +700,24 @@ void kick_all_cpus_sync(void)
 	smp_call_function(do_nothing, NULL, 1);
 }
 EXPORT_SYMBOL_GPL(kick_all_cpus_sync);
+
+/**
+ * wake_up_all_idle_cpus - break all cpus out of idle
+ * wake_up_all_idle_cpus try to break all cpus which is in idle state even
+ * including idle polling cpus, for non-idle cpus, we will do nothing
+ * for them.
+ */
+void wake_up_all_idle_cpus(void)
+{
+	int cpu;
+
+	preempt_disable();
+	for_each_online_cpu(cpu) {
+		if (cpu == smp_processor_id())
+			continue;
+
+		wake_up_if_idle(cpu);
+	}
+	preempt_enable();
+}
+EXPORT_SYMBOL_GPL(wake_up_all_idle_cpus);
-- 
cgit v1.2.3


From ef8ac06359ddf95431cf6bb04ad2b36fff562328 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Fri, 12 Sep 2014 09:12:14 -0400
Subject: seqlock: Add irqsave variant of read_seqbegin_or_lock()

There are cases where read_seqbegin_or_lock() needs to block irqs,
because the seqlock in question nests inside a lock that is also
be taken from irq context.

Add read_seqbegin_or_lock_irqsave() and done_seqretry_irqrestore(), which
are almost identical to read_seqbegin_or_lock() and done_seqretry().

Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: prarit@redhat.com
Cc: oleg@redhat.com
Cc: sgruszka@redhat.com
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Stephen Boyd <sboyd@codeaurora.org>
Cc: Trond Myklebust <trond.myklebust@primarydata.com>
Link: http://lkml.kernel.org/r/1410527535-9814-2-git-send-email-riel@redhat.com
[ Improved the readability of the code a bit. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/seqlock.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index cc359636cfa3..f5df8f687b4d 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -456,4 +456,23 @@ read_sequnlock_excl_irqrestore(seqlock_t *sl, unsigned long flags)
 	spin_unlock_irqrestore(&sl->lock, flags);
 }
 
+static inline unsigned long
+read_seqbegin_or_lock_irqsave(seqlock_t *lock, int *seq)
+{
+	unsigned long flags = 0;
+
+	if (!(*seq & 1))	/* Even */
+		*seq = read_seqbegin(lock);
+	else			/* Odd */
+		read_seqlock_excl_irqsave(lock, flags);
+
+	return flags;
+}
+
+static inline void
+done_seqretry_irqrestore(seqlock_t *lock, int seq, unsigned long flags)
+{
+	if (seq & 1)
+		read_sequnlock_excl_irqrestore(lock, flags);
+}
 #endif /* __LINUX_SEQLOCK_H */
-- 
cgit v1.2.3


From d4311ff1a8da48d609db9500f121c15580dfeeb7 Mon Sep 17 00:00:00 2001
From: Aaron Tomlin <atomlin@redhat.com>
Date: Fri, 12 Sep 2014 14:16:17 +0100
Subject: init/main.c: Give init_task a canary

Tasks get their end of stack set to STACK_END_MAGIC with the
aim to catch stack overruns. Currently this feature does not
apply to init_task. This patch removes this restriction.

Note that a similar patch was posted by Prarit Bhargava
some time ago but was never merged:

  http://marc.info/?l=linux-kernel&m=127144305403241&w=2

Signed-off-by: Aaron Tomlin <atomlin@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Cc: aneesh.kumar@linux.vnet.ibm.com
Cc: dzickus@redhat.com
Cc: bmr@redhat.com
Cc: jcastillo@redhat.com
Cc: jgh@redhat.com
Cc: minchan@kernel.org
Cc: tglx@linutronix.de
Cc: hannes@cmpxchg.org
Cc: Alex Thorlton <athorlton@sgi.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Daeseok Youn <daeseok.youn@gmail.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Fabian Frederick <fabf@skynet.be>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Michael Opdenacker <michael.opdenacker@free-electrons.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Seiji Aguchi <seiji.aguchi@hds.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Vladimir Davydov <vdavydov@parallels.com>
Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: linuxppc-dev@lists.ozlabs.org
Link: http://lkml.kernel.org/r/1410527779-8133-2-git-send-email-atomlin@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/powerpc/mm/fault.c    |  3 +--
 arch/x86/mm/fault.c        |  3 +--
 include/linux/sched.h      |  2 ++
 init/main.c                |  1 +
 kernel/fork.c              | 12 +++++++++---
 kernel/trace/trace_stack.c |  4 +---
 6 files changed, 15 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 51ab9e7e6c39..35d0760c3fa4 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -30,7 +30,6 @@
 #include <linux/kprobes.h>
 #include <linux/kdebug.h>
 #include <linux/perf_event.h>
-#include <linux/magic.h>
 #include <linux/ratelimit.h>
 #include <linux/context_tracking.h>
 
@@ -538,7 +537,7 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
 		regs->nip);
 
 	stackend = end_of_stack(current);
-	if (current != &init_task && *stackend != STACK_END_MAGIC)
+	if (*stackend != STACK_END_MAGIC)
 		printk(KERN_ALERT "Thread overran stack, or stack corrupted\n");
 
 	die("Kernel access of bad area", regs, sig);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index a24194681513..bc23a7043c65 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -3,7 +3,6 @@
  *  Copyright (C) 2001, 2002 Andi Kleen, SuSE Labs.
  *  Copyright (C) 2008-2009, Red Hat Inc., Ingo Molnar
  */
-#include <linux/magic.h>		/* STACK_END_MAGIC		*/
 #include <linux/sched.h>		/* test_thread_flag(), ...	*/
 #include <linux/kdebug.h>		/* oops_begin/end, ...		*/
 #include <linux/module.h>		/* search_exception_table	*/
@@ -710,7 +709,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
 	show_fault_oops(regs, error_code, address);
 
 	stackend = end_of_stack(tsk);
-	if (tsk != &init_task && *stackend != STACK_END_MAGIC)
+	if (*stackend != STACK_END_MAGIC)
 		printk(KERN_EMERG "Thread overran stack, or stack corrupted\n");
 
 	tsk->thread.cr2		= address;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 82ff3d6efb19..118dca7d5a28 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -57,6 +57,7 @@ struct sched_param {
 #include <linux/llist.h>
 #include <linux/uidgid.h>
 #include <linux/gfp.h>
+#include <linux/magic.h>
 
 #include <asm/processor.h>
 
@@ -2638,6 +2639,7 @@ static inline unsigned long stack_not_used(struct task_struct *p)
 	return (unsigned long)n - (unsigned long)end_of_stack(p);
 }
 #endif
+extern void set_task_stack_end_magic(struct task_struct *tsk);
 
 /* set thread flags in other task's structures
  * - see asm/thread_info.h for TIF_xxxx flags available
diff --git a/init/main.c b/init/main.c
index bb1aed928f21..5fc3fc7bd475 100644
--- a/init/main.c
+++ b/init/main.c
@@ -508,6 +508,7 @@ asmlinkage __visible void __init start_kernel(void)
 	 * lockdep hash:
 	 */
 	lockdep_init();
+	set_task_stack_end_magic(&init_task);
 	smp_setup_processor_id();
 	debug_objects_early_init();
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 9387ae8ab048..ad64248c4b18 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -294,11 +294,18 @@ int __weak arch_dup_task_struct(struct task_struct *dst,
 	return 0;
 }
 
+void set_task_stack_end_magic(struct task_struct *tsk)
+{
+	unsigned long *stackend;
+
+	stackend = end_of_stack(tsk);
+	*stackend = STACK_END_MAGIC;	/* for overflow detection */
+}
+
 static struct task_struct *dup_task_struct(struct task_struct *orig)
 {
 	struct task_struct *tsk;
 	struct thread_info *ti;
-	unsigned long *stackend;
 	int node = tsk_fork_get_node(orig);
 	int err;
 
@@ -328,8 +335,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
 	setup_thread_stack(tsk, orig);
 	clear_user_return_notifier(tsk);
 	clear_tsk_need_resched(tsk);
-	stackend = end_of_stack(tsk);
-	*stackend = STACK_END_MAGIC;	/* for overflow detection */
+	set_task_stack_end_magic(tsk);
 
 #ifdef CONFIG_CC_STACKPROTECTOR
 	tsk->stack_canary = get_random_int();
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 8a4e5cb66a4c..1636e41828c2 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -13,7 +13,6 @@
 #include <linux/sysctl.h>
 #include <linux/init.h>
 #include <linux/fs.h>
-#include <linux/magic.h>
 
 #include <asm/setup.h>
 
@@ -171,8 +170,7 @@ check_stack(unsigned long ip, unsigned long *stack)
 			i++;
 	}
 
-	if ((current != &init_task &&
-		*(end_of_stack(current)) != STACK_END_MAGIC)) {
+	if (*end_of_stack(current) != STACK_END_MAGIC) {
 		print_max_stack();
 		BUG();
 	}
-- 
cgit v1.2.3


From a70857e46dd13e87ae06bf0e64cb6a2d4f436265 Mon Sep 17 00:00:00 2001
From: Aaron Tomlin <atomlin@redhat.com>
Date: Fri, 12 Sep 2014 14:16:18 +0100
Subject: sched: Add helper for task stack page overrun checking

This facility is used in a few places so let's introduce
a helper function to improve code readability.

Signed-off-by: Aaron Tomlin <atomlin@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: aneesh.kumar@linux.vnet.ibm.com
Cc: dzickus@redhat.com
Cc: bmr@redhat.com
Cc: jcastillo@redhat.com
Cc: oleg@redhat.com
Cc: riel@redhat.com
Cc: prarit@redhat.com
Cc: jgh@redhat.com
Cc: minchan@kernel.org
Cc: mpe@ellerman.id.au
Cc: tglx@linutronix.de
Cc: hannes@cmpxchg.org
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Seiji Aguchi <seiji.aguchi@hds.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: linuxppc-dev@lists.ozlabs.org
Link: http://lkml.kernel.org/r/1410527779-8133-3-git-send-email-atomlin@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/powerpc/mm/fault.c    | 4 +---
 arch/x86/mm/fault.c        | 4 +---
 include/linux/sched.h      | 2 ++
 kernel/trace/trace_stack.c | 2 +-
 4 files changed, 5 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 35d0760c3fa4..99b2f2775658 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -507,7 +507,6 @@ bail:
 void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
 {
 	const struct exception_table_entry *entry;
-	unsigned long *stackend;
 
 	/* Are we prepared to handle this fault?  */
 	if ((entry = search_exception_tables(regs->nip)) != NULL) {
@@ -536,8 +535,7 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
 	printk(KERN_ALERT "Faulting instruction address: 0x%08lx\n",
 		regs->nip);
 
-	stackend = end_of_stack(current);
-	if (*stackend != STACK_END_MAGIC)
+	if (task_stack_end_corrupted(current))
 		printk(KERN_ALERT "Thread overran stack, or stack corrupted\n");
 
 	die("Kernel access of bad area", regs, sig);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index bc23a7043c65..6240bc7ae741 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -648,7 +648,6 @@ no_context(struct pt_regs *regs, unsigned long error_code,
 	   unsigned long address, int signal, int si_code)
 {
 	struct task_struct *tsk = current;
-	unsigned long *stackend;
 	unsigned long flags;
 	int sig;
 
@@ -708,8 +707,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
 
 	show_fault_oops(regs, error_code, address);
 
-	stackend = end_of_stack(tsk);
-	if (*stackend != STACK_END_MAGIC)
+	if (task_stack_end_corrupted(tsk))
 		printk(KERN_EMERG "Thread overran stack, or stack corrupted\n");
 
 	tsk->thread.cr2		= address;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 118dca7d5a28..18f52624eaa6 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2617,6 +2617,8 @@ static inline unsigned long *end_of_stack(struct task_struct *p)
 }
 
 #endif
+#define task_stack_end_corrupted(task) \
+		(*(end_of_stack(task)) != STACK_END_MAGIC)
 
 static inline int object_is_on_stack(void *obj)
 {
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 1636e41828c2..16eddb308c33 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -170,7 +170,7 @@ check_stack(unsigned long ip, unsigned long *stack)
 			i++;
 	}
 
-	if (*end_of_stack(current) != STACK_END_MAGIC) {
+	if (task_stack_end_corrupted(current)) {
 		print_max_stack();
 		BUG();
 	}
-- 
cgit v1.2.3


From f29374b146dd02f5f99742aedaddd6ef3512fc9c Mon Sep 17 00:00:00 2001
From: Zefan Li <lizefan@huawei.com>
Date: Fri, 19 Sep 2014 16:29:31 +0800
Subject: cgroup: remove redundant check in cgroup_ino()

After we implemented default unified hierarchy, cgrp->kn can never
be NULL.

Signed-off-by: Zefan Li <lizefan@huawei.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cgroup.h | 7 ++-----
 mm/memory-failure.c    | 2 +-
 2 files changed, 3 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 77a1d37b742b..818a81fe7ccc 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -532,13 +532,10 @@ static inline bool cgroup_has_tasks(struct cgroup *cgrp)
 	return !list_empty(&cgrp->cset_links);
 }
 
-/* returns ino associated with a cgroup, 0 indicates unmounted root */
+/* returns ino associated with a cgroup */
 static inline ino_t cgroup_ino(struct cgroup *cgrp)
 {
-	if (cgrp->kn)
-		return cgrp->kn->ino;
-	else
-		return 0;
+	return cgrp->kn->ino;
 }
 
 /* cft/css accessors for cftype->write() operation */
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 44c6bd201d3a..8639f6b28746 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -148,7 +148,7 @@ static int hwpoison_filter_task(struct page *p)
 	ino = cgroup_ino(css->cgroup);
 	css_put(css);
 
-	if (!ino || ino != hwpoison_filter_memcg)
+	if (ino != hwpoison_filter_memcg)
 		return -EINVAL;
 
 	return 0;
-- 
cgit v1.2.3


From a25eb52e81a40e986179a790fbb5a1f02f482b7a Mon Sep 17 00:00:00 2001
From: Zefan Li <lizefan@huawei.com>
Date: Fri, 19 Sep 2014 16:51:00 +0800
Subject: cgroup: remove CGRP_RELEASABLE flag

We call put_css_set() after setting CGRP_RELEASABLE flag in
cgroup_task_migrate(), but in other places we call it without setting
the flag. I don't see the necessity of this flag.

Moreover once the flag is set, it will never be cleared, unless writing
to the notify_on_release control file, so it can be quite confusing
if we look at the output of debug.releasable.

  # mount -t cgroup -o debug xxx /cgroup
  # mkdir /cgroup/child
  # cat /cgroup/child/debug.releasable
  0   <-- shows 0 though the cgroup is empty
  # echo $$ > /cgroup/child/tasks
  # cat /cgroup/child/debug.releasable
  0
  # echo $$ > /cgroup/tasks && echo $$ > /cgroup/child/tasks
  # cat /proc/child/debug.releasable
  1   <-- shows 1 though the cgroup is not empty

This patch removes the flag, and now debug.releasable shows if the
cgroup is empty or not.

Signed-off-by: Zefan Li <lizefan@huawei.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cgroup.h |  5 -----
 kernel/cgroup.c        | 40 +++++++++++++---------------------------
 2 files changed, 13 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 818a81fe7ccc..1d5196889048 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -161,11 +161,6 @@ static inline void css_put(struct cgroup_subsys_state *css)
 
 /* bits in struct cgroup flags field */
 enum {
-	/*
-	 * Control Group has previously had a child cgroup or a task,
-	 * but no longer (only if CGRP_NOTIFY_ON_RELEASE is set)
-	 */
-	CGRP_RELEASABLE,
 	/* Control Group requires release notifications to userspace */
 	CGRP_NOTIFY_ON_RELEASE,
 	/*
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index df7733b48d2e..16e3a4f5c9dc 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -329,14 +329,6 @@ bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor)
 	return false;
 }
 
-static int cgroup_is_releasable(const struct cgroup *cgrp)
-{
-	const int bits =
-		(1 << CGRP_RELEASABLE) |
-		(1 << CGRP_NOTIFY_ON_RELEASE);
-	return (cgrp->flags & bits) == bits;
-}
-
 static int notify_on_release(const struct cgroup *cgrp)
 {
 	return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
@@ -491,7 +483,7 @@ static unsigned long css_set_hash(struct cgroup_subsys_state *css[])
 	return key;
 }
 
-static void put_css_set_locked(struct css_set *cset, bool taskexit)
+static void put_css_set_locked(struct css_set *cset)
 {
 	struct cgrp_cset_link *link, *tmp_link;
 	struct cgroup_subsys *ss;
@@ -517,11 +509,7 @@ static void put_css_set_locked(struct css_set *cset, bool taskexit)
 		/* @cgrp can't go away while we're holding css_set_rwsem */
 		if (list_empty(&cgrp->cset_links)) {
 			cgroup_update_populated(cgrp, false);
-			if (notify_on_release(cgrp)) {
-				if (taskexit)
-					set_bit(CGRP_RELEASABLE, &cgrp->flags);
-				check_for_release(cgrp);
-			}
+			check_for_release(cgrp);
 		}
 
 		kfree(link);
@@ -530,7 +518,7 @@ static void put_css_set_locked(struct css_set *cset, bool taskexit)
 	kfree_rcu(cset, rcu_head);
 }
 
-static void put_css_set(struct css_set *cset, bool taskexit)
+static void put_css_set(struct css_set *cset)
 {
 	/*
 	 * Ensure that the refcount doesn't hit zero while any readers
@@ -541,7 +529,7 @@ static void put_css_set(struct css_set *cset, bool taskexit)
 		return;
 
 	down_write(&css_set_rwsem);
-	put_css_set_locked(cset, taskexit);
+	put_css_set_locked(cset);
 	up_write(&css_set_rwsem);
 }
 
@@ -2037,8 +2025,7 @@ static void cgroup_task_migrate(struct cgroup *old_cgrp,
 	 * task. As trading it for new_cset is protected by cgroup_mutex,
 	 * we're safe to drop it here; it will be freed under RCU.
 	 */
-	set_bit(CGRP_RELEASABLE, &old_cgrp->flags);
-	put_css_set_locked(old_cset, false);
+	put_css_set_locked(old_cset);
 }
 
 /**
@@ -2059,7 +2046,7 @@ static void cgroup_migrate_finish(struct list_head *preloaded_csets)
 		cset->mg_src_cgrp = NULL;
 		cset->mg_dst_cset = NULL;
 		list_del_init(&cset->mg_preload_node);
-		put_css_set_locked(cset, false);
+		put_css_set_locked(cset);
 	}
 	up_write(&css_set_rwsem);
 }
@@ -2153,8 +2140,8 @@ static int cgroup_migrate_prepare_dst(struct cgroup *dst_cgrp,
 		if (src_cset == dst_cset) {
 			src_cset->mg_src_cgrp = NULL;
 			list_del_init(&src_cset->mg_preload_node);
-			put_css_set(src_cset, false);
-			put_css_set(dst_cset, false);
+			put_css_set(src_cset);
+			put_css_set(dst_cset);
 			continue;
 		}
 
@@ -2163,7 +2150,7 @@ static int cgroup_migrate_prepare_dst(struct cgroup *dst_cgrp,
 		if (list_empty(&dst_cset->mg_preload_node))
 			list_add(&dst_cset->mg_preload_node, &csets);
 		else
-			put_css_set(dst_cset, false);
+			put_css_set(dst_cset);
 	}
 
 	list_splice_tail(&csets, preloaded_csets);
@@ -4159,7 +4146,6 @@ static u64 cgroup_read_notify_on_release(struct cgroup_subsys_state *css,
 static int cgroup_write_notify_on_release(struct cgroup_subsys_state *css,
 					  struct cftype *cft, u64 val)
 {
-	clear_bit(CGRP_RELEASABLE, &css->cgroup->flags);
 	if (val)
 		set_bit(CGRP_NOTIFY_ON_RELEASE, &css->cgroup->flags);
 	else
@@ -4806,7 +4792,6 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
 	 */
 	kernfs_remove(cgrp->kn);
 
-	set_bit(CGRP_RELEASABLE, &cgroup_parent(cgrp)->flags);
 	check_for_release(cgroup_parent(cgrp));
 
 	/* put the base reference */
@@ -5244,12 +5229,12 @@ void cgroup_exit(struct task_struct *tsk)
 	}
 
 	if (put_cset)
-		put_css_set(cset, true);
+		put_css_set(cset);
 }
 
 static void check_for_release(struct cgroup *cgrp)
 {
-	if (cgroup_is_releasable(cgrp) && !cgroup_has_tasks(cgrp) &&
+	if (notify_on_release(cgrp) && !cgroup_has_tasks(cgrp) &&
 	    !css_has_online_children(&cgrp->self) && !cgroup_is_dead(cgrp))
 		schedule_work(&cgrp->release_agent_work);
 }
@@ -5496,7 +5481,8 @@ static int cgroup_css_links_read(struct seq_file *seq, void *v)
 
 static u64 releasable_read(struct cgroup_subsys_state *css, struct cftype *cft)
 {
-	return test_bit(CGRP_RELEASABLE, &css->cgroup->flags);
+	return (!cgroup_has_tasks(css->cgroup) &&
+		!css_has_online_children(&css->cgroup->self));
 }
 
 static struct cftype debug_files[] =  {
-- 
cgit v1.2.3


From 2e4e44107176d552f8bb1bb76053e850e3809841 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 17 Sep 2014 04:49:49 -0700
Subject: net: add alloc_skb_with_frags() helper

Extract from sock_alloc_send_pskb() code building skb with frags,
so that we can reuse this in other contexts.

Intent is to use it from tcp_send_rcvq(), tcp_collapse(), ...

We also want to replace some skb_linearize() calls to a more reliable
strategy in pathological cases where we need to reduce number of frags.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |  6 ++++
 net/core/skbuff.c      | 78 ++++++++++++++++++++++++++++++++++++++++++++++++++
 net/core/sock.c        | 78 ++++++++++----------------------------------------
 3 files changed, 99 insertions(+), 63 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 756e3d057e84..f1bfa3781c75 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -769,6 +769,12 @@ static inline struct sk_buff *alloc_skb(unsigned int size,
 	return __alloc_skb(size, priority, 0, NUMA_NO_NODE);
 }
 
+struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
+				     unsigned long data_len,
+				     int max_page_order,
+				     int *errcode,
+				     gfp_t gfp_mask);
+
 static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
 					       gfp_t priority)
 {
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 29f7f0121491..06a8feb10099 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4102,3 +4102,81 @@ err_free:
 	return NULL;
 }
 EXPORT_SYMBOL(skb_vlan_untag);
+
+/**
+ * alloc_skb_with_frags - allocate skb with page frags
+ *
+ * header_len: size of linear part
+ * data_len: needed length in frags
+ * max_page_order: max page order desired.
+ * errcode: pointer to error code if any
+ * gfp_mask: allocation mask
+ *
+ * This can be used to allocate a paged skb, given a maximal order for frags.
+ */
+struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
+				     unsigned long data_len,
+				     int max_page_order,
+				     int *errcode,
+				     gfp_t gfp_mask)
+{
+	int npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+	unsigned long chunk;
+	struct sk_buff *skb;
+	struct page *page;
+	gfp_t gfp_head;
+	int i;
+
+	*errcode = -EMSGSIZE;
+	/* Note this test could be relaxed, if we succeed to allocate
+	 * high order pages...
+	 */
+	if (npages > MAX_SKB_FRAGS)
+		return NULL;
+
+	gfp_head = gfp_mask;
+	if (gfp_head & __GFP_WAIT)
+		gfp_head |= __GFP_REPEAT;
+
+	*errcode = -ENOBUFS;
+	skb = alloc_skb(header_len, gfp_head);
+	if (!skb)
+		return NULL;
+
+	skb->truesize += npages << PAGE_SHIFT;
+
+	for (i = 0; npages > 0; i++) {
+		int order = max_page_order;
+
+		while (order) {
+			if (npages >= 1 << order) {
+				page = alloc_pages(gfp_mask |
+						   __GFP_COMP |
+						   __GFP_NOWARN |
+						   __GFP_NORETRY,
+						   order);
+				if (page)
+					goto fill_page;
+				/* Do not retry other high order allocations */
+				order = 1;
+				max_page_order = 0;
+			}
+			order--;
+		}
+		page = alloc_page(gfp_mask);
+		if (!page)
+			goto failure;
+fill_page:
+		chunk = min_t(unsigned long, data_len,
+			      PAGE_SIZE << order);
+		skb_fill_page_desc(skb, i, page, 0, chunk);
+		data_len -= chunk;
+		npages -= 1 << order;
+	}
+	return skb;
+
+failure:
+	kfree_skb(skb);
+	return NULL;
+}
+EXPORT_SYMBOL(alloc_skb_with_frags);
diff --git a/net/core/sock.c b/net/core/sock.c
index 6f436b5e4961..de887c45c63b 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1762,21 +1762,12 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
 				     unsigned long data_len, int noblock,
 				     int *errcode, int max_page_order)
 {
-	struct sk_buff *skb = NULL;
-	unsigned long chunk;
-	gfp_t gfp_mask;
+	struct sk_buff *skb;
 	long timeo;
 	int err;
-	int npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
-	struct page *page;
-	int i;
-
-	err = -EMSGSIZE;
-	if (npages > MAX_SKB_FRAGS)
-		goto failure;
 
 	timeo = sock_sndtimeo(sk, noblock);
-	while (!skb) {
+	for (;;) {
 		err = sock_error(sk);
 		if (err != 0)
 			goto failure;
@@ -1785,66 +1776,27 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
 		if (sk->sk_shutdown & SEND_SHUTDOWN)
 			goto failure;
 
-		if (atomic_read(&sk->sk_wmem_alloc) >= sk->sk_sndbuf) {
-			set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
-			set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
-			err = -EAGAIN;
-			if (!timeo)
-				goto failure;
-			if (signal_pending(current))
-				goto interrupted;
-			timeo = sock_wait_for_wmem(sk, timeo);
-			continue;
-		}
-
-		err = -ENOBUFS;
-		gfp_mask = sk->sk_allocation;
-		if (gfp_mask & __GFP_WAIT)
-			gfp_mask |= __GFP_REPEAT;
+		if (sk_wmem_alloc_get(sk) < sk->sk_sndbuf)
+			break;
 
-		skb = alloc_skb(header_len, gfp_mask);
-		if (!skb)
+		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+		err = -EAGAIN;
+		if (!timeo)
 			goto failure;
-
-		skb->truesize += data_len;
-
-		for (i = 0; npages > 0; i++) {
-			int order = max_page_order;
-
-			while (order) {
-				if (npages >= 1 << order) {
-					page = alloc_pages(sk->sk_allocation |
-							   __GFP_COMP |
-							   __GFP_NOWARN |
-							   __GFP_NORETRY,
-							   order);
-					if (page)
-						goto fill_page;
-					/* Do not retry other high order allocations */
-					order = 1;
-					max_page_order = 0;
-				}
-				order--;
-			}
-			page = alloc_page(sk->sk_allocation);
-			if (!page)
-				goto failure;
-fill_page:
-			chunk = min_t(unsigned long, data_len,
-				      PAGE_SIZE << order);
-			skb_fill_page_desc(skb, i, page, 0, chunk);
-			data_len -= chunk;
-			npages -= 1 << order;
-		}
+		if (signal_pending(current))
+			goto interrupted;
+		timeo = sock_wait_for_wmem(sk, timeo);
 	}
-
-	skb_set_owner_w(skb, sk);
+	skb = alloc_skb_with_frags(header_len, data_len, max_page_order,
+				   errcode, sk->sk_allocation);
+	if (skb)
+		skb_set_owner_w(skb, sk);
 	return skb;
 
 interrupted:
 	err = sock_intr_errno(timeo);
 failure:
-	kfree_skb(skb);
 	*errcode = err;
 	return NULL;
 }
-- 
cgit v1.2.3


From bb7d93496f7ac203f7c3e9678000d1c83eb4e0ba Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Fri, 19 Sep 2014 13:07:50 -0700
Subject: net: phy: broadcom: add helper for PHY revision and patch level

The Broadcom BCM7xxx internal PHYs do not contain any useful revision
information in the low 4-bits of their MII_PHYSID2 (MII register 3)
which could allow us to properly identify them.

As a result, we need the actual hardware block integrating these PHYs:
GENET or the SF2 switch to tell us what revision they are built with. To
assist with that, add two helper macros for fetching the the PHY
revision and patch level from the struct phy_device::dev_flags.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/brcmphy.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index 5bd35cc0d471..e4ee9c6679e8 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -41,6 +41,8 @@
 #define PHY_BRCM_DIS_TXCRXC_NOENRGY	0x00008000
 /* Broadcom BCM7xxx specific workarounds */
 #define PHY_BRCM_100MBPS_WAR		0x00010000
+#define PHY_BRCM_7XXX_REV(x)		(((x) >> 8) & 0xff)
+#define PHY_BRCM_7XXX_PATCH(x)		((x) & 0xff)
 #define PHY_BCM_FLAGS_VALID		0x80000000
 
 /* Broadcom BCM54XX register definitions, common to most Broadcom PHYs */
-- 
cgit v1.2.3


From 80780a54ecded1647e661ababde13554a149f7f3 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Fri, 19 Sep 2014 13:07:52 -0700
Subject: net: bcmgenet: remove PHY_BRCM_100MBPS_WAR

Now that we have removed the need for the PHY_BRCM_100MBPS_WAR flag, we
can remove it from the GENET driver and the broadcom shared header file.
The PHY driver checks the PHY supported bitmask instead.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/genet/bcmmii.c | 10 ----------
 include/linux/brcmphy.h                      |  1 -
 2 files changed, 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c
index c88f7ae99636..71f2ef7d33a9 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmmii.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c
@@ -296,7 +296,6 @@ static int bcmgenet_mii_probe(struct net_device *dev)
 	struct bcmgenet_priv *priv = netdev_priv(dev);
 	struct device_node *dn = priv->pdev->dev.of_node;
 	struct phy_device *phydev;
-	unsigned int phy_flags;
 	int ret;
 
 	if (priv->phydev) {
@@ -338,15 +337,6 @@ static int bcmgenet_mii_probe(struct net_device *dev)
 		return ret;
 	}
 
-	phy_flags = PHY_BRCM_100MBPS_WAR;
-
-	/* workarounds are only needed for 100Mpbs PHYs, and
-	 * never on GENET V1 hardware
-	 */
-	if ((phydev->supported & PHY_GBIT_FEATURES) || GENET_IS_V1(priv))
-		phy_flags = 0;
-
-	phydev->dev_flags |= phy_flags;
 	phydev->advertising = phydev->supported;
 
 	/* The internal PHY has its link interrupts routed to the
diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index e4ee9c6679e8..3f626fe48c9a 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -40,7 +40,6 @@
 #define PHY_BRCM_CLEAR_RGMII_MODE	0x00004000
 #define PHY_BRCM_DIS_TXCRXC_NOENRGY	0x00008000
 /* Broadcom BCM7xxx specific workarounds */
-#define PHY_BRCM_100MBPS_WAR		0x00010000
 #define PHY_BRCM_7XXX_REV(x)		(((x) >> 8) & 0xff)
 #define PHY_BRCM_7XXX_PATCH(x)		((x) & 0xff)
 #define PHY_BCM_FLAGS_VALID		0x80000000
-- 
cgit v1.2.3


From afe93325bc02a5b2dea0cd7d78225de692265e6e Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Wed, 17 Sep 2014 12:25:57 -0700
Subject: fou: Add GRO support

Implement fou_gro_receive and fou_gro_complete, and populate these
in the correponsing udp_offloads for the socket. Added ipproto to
udp_offloads and pass this from UDP to the fou GRO routine in proto
field of napi_gro_cb structure.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  3 +-
 net/ipv4/fou.c            | 89 +++++++++++++++++++++++++++++++++++++++++++++++
 net/ipv4/udp_offload.c    |  5 ++-
 3 files changed, 95 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 28d4378615e5..4354b4307e37 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1874,7 +1874,7 @@ struct napi_gro_cb {
 	/* jiffies when first packet was created/queued */
 	unsigned long age;
 
-	/* Used in ipv6_gro_receive() */
+	/* Used in ipv6_gro_receive() and foo-over-udp */
 	u16	proto;
 
 	/* Used in udp_gro_receive */
@@ -1925,6 +1925,7 @@ struct packet_offload {
 
 struct udp_offload {
 	__be16			 port;
+	u8			 ipproto;
 	struct offload_callbacks callbacks;
 };
 
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index d44f97b79418..dced89fbe480 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -8,6 +8,7 @@
 #include <linux/kernel.h>
 #include <net/genetlink.h>
 #include <net/ip.h>
+#include <net/protocol.h>
 #include <net/udp.h>
 #include <net/udp_tunnel.h>
 #include <net/xfrm.h>
@@ -21,6 +22,7 @@ struct fou {
 	struct socket *sock;
 	u8 protocol;
 	u16 port;
+	struct udp_offload udp_offloads;
 	struct list_head list;
 };
 
@@ -62,6 +64,69 @@ static int fou_udp_recv(struct sock *sk, struct sk_buff *skb)
 					  sizeof(struct udphdr));
 }
 
+static struct sk_buff **fou_gro_receive(struct sk_buff **head,
+					struct sk_buff *skb,
+					const struct net_offload **offloads)
+{
+	const struct net_offload *ops;
+	struct sk_buff **pp = NULL;
+	u8 proto = NAPI_GRO_CB(skb)->proto;
+
+	rcu_read_lock();
+	ops = rcu_dereference(offloads[proto]);
+	if (!ops || !ops->callbacks.gro_receive)
+		goto out_unlock;
+
+	pp = ops->callbacks.gro_receive(head, skb);
+
+out_unlock:
+	rcu_read_unlock();
+
+	return pp;
+}
+
+static int fou_gro_complete(struct sk_buff *skb, int nhoff,
+			    const struct net_offload **offloads)
+{
+	const struct net_offload *ops;
+	u8 proto = NAPI_GRO_CB(skb)->proto;
+	int err = -ENOSYS;
+
+	rcu_read_lock();
+	ops = rcu_dereference(offloads[proto]);
+	if (WARN_ON(!ops || !ops->callbacks.gro_complete))
+		goto out_unlock;
+
+	err = ops->callbacks.gro_complete(skb, nhoff);
+
+out_unlock:
+	rcu_read_unlock();
+
+	return err;
+}
+
+static struct sk_buff **fou4_gro_receive(struct sk_buff **head,
+					 struct sk_buff *skb)
+{
+	return fou_gro_receive(head, skb, inet_offloads);
+}
+
+static int fou4_gro_complete(struct sk_buff *skb, int nhoff)
+{
+	return fou_gro_complete(skb, nhoff, inet_offloads);
+}
+
+static struct sk_buff **fou6_gro_receive(struct sk_buff **head,
+					 struct sk_buff *skb)
+{
+	return fou_gro_receive(head, skb, inet6_offloads);
+}
+
+static int fou6_gro_complete(struct sk_buff *skb, int nhoff)
+{
+	return fou_gro_complete(skb, nhoff, inet6_offloads);
+}
+
 static int fou_add_to_port_list(struct fou *fou)
 {
 	struct fou *fout;
@@ -134,6 +199,29 @@ static int fou_create(struct net *net, struct fou_cfg *cfg,
 
 	sk->sk_allocation = GFP_ATOMIC;
 
+	switch (cfg->udp_config.family) {
+	case AF_INET:
+		fou->udp_offloads.callbacks.gro_receive = fou4_gro_receive;
+		fou->udp_offloads.callbacks.gro_complete = fou4_gro_complete;
+		break;
+	case AF_INET6:
+		fou->udp_offloads.callbacks.gro_receive = fou6_gro_receive;
+		fou->udp_offloads.callbacks.gro_complete = fou6_gro_complete;
+		break;
+	default:
+		err = -EPFNOSUPPORT;
+		goto error;
+	}
+
+	fou->udp_offloads.port = cfg->udp_config.local_udp_port;
+	fou->udp_offloads.ipproto = cfg->protocol;
+
+	if (cfg->udp_config.family == AF_INET) {
+		err = udp_add_offload(&fou->udp_offloads);
+		if (err)
+			goto error;
+	}
+
 	err = fou_add_to_port_list(fou);
 	if (err)
 		goto error;
@@ -160,6 +248,7 @@ static int fou_destroy(struct net *net, struct fou_cfg *cfg)
 	spin_lock(&fou_lock);
 	list_for_each_entry(fou, &fou_list, list) {
 		if (fou->port == port) {
+			udp_del_offload(&fou->udp_offloads);
 			fou_release(fou);
 			err = 0;
 			break;
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index adab393b2fe5..d7c43f764c71 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -276,6 +276,7 @@ unflush:
 
 	skb_gro_pull(skb, sizeof(struct udphdr)); /* pull encapsulating udp header */
 	skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
+	NAPI_GRO_CB(skb)->proto = uo_priv->offload->ipproto;
 	pp = uo_priv->offload->callbacks.gro_receive(head, skb);
 
 out_unlock:
@@ -329,8 +330,10 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff)
 			break;
 	}
 
-	if (uo_priv != NULL)
+	if (uo_priv != NULL) {
+		NAPI_GRO_CB(skb)->proto = uo_priv->offload->ipproto;
 		err = uo_priv->offload->callbacks.gro_complete(skb, nhoff + sizeof(struct udphdr));
+	}
 
 	rcu_read_unlock();
 	return err;
-- 
cgit v1.2.3


From 77507aa249aecd06fa25ad058b64481e46887a01 Mon Sep 17 00:00:00 2001
From: Ido Shamay <idos@mellanox.com>
Date: Thu, 18 Sep 2014 11:50:59 +0300
Subject: net/mlx4_core: Enable CQE/EQE stride support

This feature is intended for archs having cache line larger then 64B.

Since our CQE/EQEs are generally 64B in those systems, HW will write
twice to the same cache line consecutively, causing pipe locks due to
he hazard prevention mechanism. For elements in a cyclic buffer, writes
are consecutive, so entries smaller than a cache line should be
avoided, especially if they are written at a high rate.

Reduce consecutive writes to same cache line in CQs/EQs, by allowing the
driver to increase the distance between entries so that each will reside
in a different cache line. Until the introduction of this feature, there
were two types of CQE/EQE:

1. 32B stride and context in the [0-31] segment
2. 64B stride and context in the [32-63] segment

This feature introduces two additional types:

3. 128B stride and context in the [0-31] segment (128B cache line)
4. 256B stride and context in the [0-31] segment (256B cache line)

Modify the mlx4_core driver to query the device for the CQE/EQE cache
line stride capability and to enable that capability when the host
cache line size is larger than 64 bytes (supported cache lines are
128B and 256B).

The mlx4 IB driver and libmlx4 need not be aware of this change. The PF
context behaviour is changed to require this change in VF drivers
running on such archs.

Signed-off-by: Ido Shamay <idos@mellanox.com>
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/fw.c   | 38 ++++++++++++++++++-
 drivers/net/ethernet/mellanox/mlx4/fw.h   |  2 +
 drivers/net/ethernet/mellanox/mlx4/main.c | 61 ++++++++++++++++++++++++++++++-
 drivers/net/ethernet/mellanox/mlx4/mlx4.h |  3 ++
 include/linux/mlx4/device.h               | 11 ++++--
 5 files changed, 108 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 494753e44ae3..13b2e4a51ef4 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -137,7 +137,9 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags)
 		[8] = "Dynamic QP updates support",
 		[9] = "Device managed flow steering IPoIB support",
 		[10] = "TCP/IP offloads/flow-steering for VXLAN support",
-		[11] = "MAD DEMUX (Secure-Host) support"
+		[11] = "MAD DEMUX (Secure-Host) support",
+		[12] = "Large cache line (>64B) CQE stride support",
+		[13] = "Large cache line (>64B) EQE stride support"
 	};
 	int i;
 
@@ -557,6 +559,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 #define QUERY_DEV_CAP_FLOW_STEERING_IPOIB_OFFSET	0x74
 #define QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET	0x76
 #define QUERY_DEV_CAP_FLOW_STEERING_MAX_QP_OFFSET	0x77
+#define QUERY_DEV_CAP_CQ_EQ_CACHE_LINE_STRIDE	0x7a
 #define QUERY_DEV_CAP_RDMARC_ENTRY_SZ_OFFSET	0x80
 #define QUERY_DEV_CAP_QPC_ENTRY_SZ_OFFSET	0x82
 #define QUERY_DEV_CAP_AUX_ENTRY_SZ_OFFSET	0x84
@@ -733,6 +736,11 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	dev_cap->max_rq_sg = field;
 	MLX4_GET(size, outbox, QUERY_DEV_CAP_MAX_DESC_SZ_RQ_OFFSET);
 	dev_cap->max_rq_desc_sz = size;
+	MLX4_GET(field, outbox, QUERY_DEV_CAP_CQ_EQ_CACHE_LINE_STRIDE);
+	if (field & (1 << 6))
+		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_CQE_STRIDE;
+	if (field & (1 << 7))
+		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
 
 	MLX4_GET(dev_cap->bmme_flags, outbox,
 		 QUERY_DEV_CAP_BMME_FLAGS_OFFSET);
@@ -1376,6 +1384,7 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
 #define	 INIT_HCA_CQC_BASE_OFFSET	 (INIT_HCA_QPC_OFFSET + 0x30)
 #define	 INIT_HCA_LOG_CQ_OFFSET		 (INIT_HCA_QPC_OFFSET + 0x37)
 #define	 INIT_HCA_EQE_CQE_OFFSETS	 (INIT_HCA_QPC_OFFSET + 0x38)
+#define	 INIT_HCA_EQE_CQE_STRIDE_OFFSET  (INIT_HCA_QPC_OFFSET + 0x3b)
 #define	 INIT_HCA_ALTC_BASE_OFFSET	 (INIT_HCA_QPC_OFFSET + 0x40)
 #define	 INIT_HCA_AUXC_BASE_OFFSET	 (INIT_HCA_QPC_OFFSET + 0x50)
 #define	 INIT_HCA_EQC_BASE_OFFSET	 (INIT_HCA_QPC_OFFSET + 0x60)
@@ -1452,11 +1461,25 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
 	if (dev->caps.flags & MLX4_DEV_CAP_FLAG_64B_CQE) {
 		*(inbox + INIT_HCA_EQE_CQE_OFFSETS / 4) |= cpu_to_be32(1 << 30);
 		dev->caps.cqe_size   = 64;
-		dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_64B_CQE;
+		dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE;
 	} else {
 		dev->caps.cqe_size   = 32;
 	}
 
+	/* CX3 is capable of extending CQEs\EQEs to strides larger than 64B */
+	if ((dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_EQE_STRIDE) &&
+	    (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_CQE_STRIDE)) {
+		dev->caps.eqe_size = cache_line_size();
+		dev->caps.cqe_size = cache_line_size();
+		dev->caps.eqe_factor = 0;
+		MLX4_PUT(inbox, (u8)((ilog2(dev->caps.eqe_size) - 5) << 4 |
+				      (ilog2(dev->caps.eqe_size) - 5)),
+			 INIT_HCA_EQE_CQE_STRIDE_OFFSET);
+
+		/* User still need to know to support CQE > 32B */
+		dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE;
+	}
+
 	/* QPC/EEC/CQC/EQC/RDMARC attributes */
 
 	MLX4_PUT(inbox, param->qpc_base,      INIT_HCA_QPC_BASE_OFFSET);
@@ -1616,6 +1639,17 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev,
 	if (byte_field & 0x40) /* 64-bytes cqe enabled */
 		param->dev_cap_enabled |= MLX4_DEV_CAP_64B_CQE_ENABLED;
 
+	/* CX3 is capable of extending CQEs\EQEs to strides larger than 64B */
+	MLX4_GET(byte_field, outbox, INIT_HCA_EQE_CQE_STRIDE_OFFSET);
+	if (byte_field) {
+		param->dev_cap_enabled |= MLX4_DEV_CAP_64B_EQE_ENABLED;
+		param->dev_cap_enabled |= MLX4_DEV_CAP_64B_CQE_ENABLED;
+		param->cqe_size = 1 << ((byte_field &
+					 MLX4_CQE_SIZE_MASK_STRIDE) + 5);
+		param->eqe_size = 1 << (((byte_field &
+					  MLX4_EQE_SIZE_MASK_STRIDE) >> 4) + 5);
+	}
+
 	/* TPT attributes */
 
 	MLX4_GET(param->dmpt_base,  outbox, INIT_HCA_DMPT_BASE_OFFSET);
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h
index 1fce03ebe5c4..9b835aecac96 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.h
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.h
@@ -178,6 +178,8 @@ struct mlx4_init_hca_param {
 	u8  uar_page_sz; /* log pg sz in 4k chunks */
 	u8  steering_mode; /* for QUERY_HCA */
 	u64 dev_cap_enabled;
+	u16 cqe_size; /* For use only when CQE stride feature enabled */
+	u16 eqe_size; /* For use only when EQE stride feature enabled */
 };
 
 struct mlx4_init_ib_param {
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 7e2d5d57c598..1f10023af1db 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -104,7 +104,8 @@ module_param(enable_64b_cqe_eqe, bool, 0444);
 MODULE_PARM_DESC(enable_64b_cqe_eqe,
 		 "Enable 64 byte CQEs/EQEs when the FW supports this (default: True)");
 
-#define PF_CONTEXT_BEHAVIOUR_MASK	MLX4_FUNC_CAP_64B_EQE_CQE
+#define PF_CONTEXT_BEHAVIOUR_MASK	(MLX4_FUNC_CAP_64B_EQE_CQE | \
+					 MLX4_FUNC_CAP_EQE_CQE_STRIDE)
 
 static char mlx4_version[] =
 	DRV_NAME ": Mellanox ConnectX core driver v"
@@ -196,6 +197,40 @@ static void mlx4_set_port_mask(struct mlx4_dev *dev)
 		dev->caps.port_mask[i] = dev->caps.port_type[i];
 }
 
+static void mlx4_enable_cqe_eqe_stride(struct mlx4_dev *dev)
+{
+	struct mlx4_caps *dev_cap = &dev->caps;
+
+	/* FW not supporting or cancelled by user */
+	if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_EQE_STRIDE) ||
+	    !(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_CQE_STRIDE))
+		return;
+
+	/* Must have 64B CQE_EQE enabled by FW to use bigger stride
+	 * When FW has NCSI it may decide not to report 64B CQE/EQEs
+	 */
+	if (!(dev_cap->flags & MLX4_DEV_CAP_FLAG_64B_EQE) ||
+	    !(dev_cap->flags & MLX4_DEV_CAP_FLAG_64B_CQE)) {
+		dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_CQE_STRIDE;
+		dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
+		return;
+	}
+
+	if (cache_line_size() == 128 || cache_line_size() == 256) {
+		mlx4_dbg(dev, "Enabling CQE stride cacheLine supported\n");
+		/* Changing the real data inside CQE size to 32B */
+		dev_cap->flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE;
+		dev_cap->flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE;
+
+		if (mlx4_is_master(dev))
+			dev_cap->function_caps |= MLX4_FUNC_CAP_EQE_CQE_STRIDE;
+	} else {
+		mlx4_dbg(dev, "Disabling CQE stride cacheLine unsupported\n");
+		dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_CQE_STRIDE;
+		dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
+	}
+}
+
 static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 {
 	int err;
@@ -390,6 +425,14 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 			dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE;
 			dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE;
 		}
+
+		if (dev_cap->flags2 &
+		    (MLX4_DEV_CAP_FLAG2_CQE_STRIDE |
+		     MLX4_DEV_CAP_FLAG2_EQE_STRIDE)) {
+			mlx4_warn(dev, "Disabling EQE/CQE stride per user request\n");
+			dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_CQE_STRIDE;
+			dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
+		}
 	}
 
 	if ((dev->caps.flags &
@@ -397,6 +440,9 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	    mlx4_is_master(dev))
 		dev->caps.function_caps |= MLX4_FUNC_CAP_64B_EQE_CQE;
 
+	if (!mlx4_is_slave(dev))
+		mlx4_enable_cqe_eqe_stride(dev);
+
 	return 0;
 }
 
@@ -724,11 +770,22 @@ static int mlx4_slave_cap(struct mlx4_dev *dev)
 
 	if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_CQE_ENABLED) {
 		dev->caps.cqe_size   = 64;
-		dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_64B_CQE;
+		dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE;
 	} else {
 		dev->caps.cqe_size   = 32;
 	}
 
+	if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_EQE_STRIDE_ENABLED) {
+		dev->caps.eqe_size = hca_param.eqe_size;
+		dev->caps.eqe_factor = 0;
+	}
+
+	if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_CQE_STRIDE_ENABLED) {
+		dev->caps.cqe_size = hca_param.cqe_size;
+		/* User still need to know when CQE > 32B */
+		dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE;
+	}
+
 	dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
 	mlx4_warn(dev, "Timestamping is not supported in slave mode\n");
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index b508c7887ef8..de10dbb2e6ed 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -285,6 +285,9 @@ struct mlx4_icm_table {
 #define MLX4_MPT_STATUS_SW		0xF0
 #define MLX4_MPT_STATUS_HW		0x00
 
+#define MLX4_CQE_SIZE_MASK_STRIDE	0x3
+#define MLX4_EQE_SIZE_MASK_STRIDE	0x30
+
 /*
  * Must be packed because mtt_seg is 64 bits but only aligned to 32 bits.
  */
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 1befd8df9cfc..7bcefe749a39 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -185,19 +185,24 @@ enum {
 	MLX4_DEV_CAP_FLAG2_DMFS_IPOIB		= 1LL <<  9,
 	MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS	= 1LL <<  10,
 	MLX4_DEV_CAP_FLAG2_MAD_DEMUX		= 1LL <<  11,
+	MLX4_DEV_CAP_FLAG2_CQE_STRIDE		= 1LL <<  12,
+	MLX4_DEV_CAP_FLAG2_EQE_STRIDE		= 1LL <<  13
 };
 
 enum {
 	MLX4_DEV_CAP_64B_EQE_ENABLED	= 1LL << 0,
-	MLX4_DEV_CAP_64B_CQE_ENABLED	= 1LL << 1
+	MLX4_DEV_CAP_64B_CQE_ENABLED	= 1LL << 1,
+	MLX4_DEV_CAP_CQE_STRIDE_ENABLED	= 1LL << 2,
+	MLX4_DEV_CAP_EQE_STRIDE_ENABLED	= 1LL << 3
 };
 
 enum {
-	MLX4_USER_DEV_CAP_64B_CQE	= 1L << 0
+	MLX4_USER_DEV_CAP_LARGE_CQE	= 1L << 0
 };
 
 enum {
-	MLX4_FUNC_CAP_64B_EQE_CQE	= 1L << 0
+	MLX4_FUNC_CAP_64B_EQE_CQE	= 1L << 0,
+	MLX4_FUNC_CAP_EQE_CQE_STRIDE	= 1L << 1
 };
 
 
-- 
cgit v1.2.3


From e625305b390790717cf2cccf61efb81299647028 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sat, 20 Sep 2014 01:27:25 -0400
Subject: percpu-refcount: make percpu_ref based on longs instead of ints

percpu_ref is currently based on ints and the number of refs it can
cover is (1 << 31).  This makes it impossible to use a percpu_ref to
count memory objects or pages on 64bit machines as it may overflow.
This forces those users to somehow aggregate the references before
contributing to the percpu_ref which is often cumbersome and sometimes
challenging to get the same level of performance as using the
percpu_ref directly.

While using ints for the percpu counters makes them pack tighter on
64bit machines, the possible gain from using ints instead of longs is
extremely small compared to the overall gain from per-cpu operation.
This patch makes percpu_ref based on longs so that it can be used to
directly count memory objects or pages.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Kent Overstreet <kmo@daterainc.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
---
 include/linux/percpu-refcount.h | 24 ++++++++++++------------
 lib/percpu-refcount.c           | 37 +++++++++++++++++++------------------
 2 files changed, 31 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index ee8325122dbd..5df6784bd9d2 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -55,7 +55,7 @@ struct percpu_ref;
 typedef void (percpu_ref_func_t)(struct percpu_ref *);
 
 struct percpu_ref {
-	atomic_t		count;
+	atomic_long_t		count;
 	/*
 	 * The low bit of the pointer indicates whether the ref is in percpu
 	 * mode; if set, then get/put will manipulate the atomic_t.
@@ -97,7 +97,7 @@ static inline void percpu_ref_kill(struct percpu_ref *ref)
  * branches as it can't assume that @ref->pcpu_count is not NULL.
  */
 static inline bool __pcpu_ref_alive(struct percpu_ref *ref,
-				    unsigned __percpu **pcpu_countp)
+				    unsigned long __percpu **pcpu_countp)
 {
 	unsigned long pcpu_ptr = ACCESS_ONCE(ref->pcpu_count_ptr);
 
@@ -107,7 +107,7 @@ static inline bool __pcpu_ref_alive(struct percpu_ref *ref,
 	if (unlikely(pcpu_ptr & PCPU_REF_DEAD))
 		return false;
 
-	*pcpu_countp = (unsigned __percpu *)pcpu_ptr;
+	*pcpu_countp = (unsigned long __percpu *)pcpu_ptr;
 	return true;
 }
 
@@ -119,14 +119,14 @@ static inline bool __pcpu_ref_alive(struct percpu_ref *ref,
   */
 static inline void percpu_ref_get(struct percpu_ref *ref)
 {
-	unsigned __percpu *pcpu_count;
+	unsigned long __percpu *pcpu_count;
 
 	rcu_read_lock_sched();
 
 	if (__pcpu_ref_alive(ref, &pcpu_count))
 		this_cpu_inc(*pcpu_count);
 	else
-		atomic_inc(&ref->count);
+		atomic_long_inc(&ref->count);
 
 	rcu_read_unlock_sched();
 }
@@ -142,7 +142,7 @@ static inline void percpu_ref_get(struct percpu_ref *ref)
  */
 static inline bool percpu_ref_tryget(struct percpu_ref *ref)
 {
-	unsigned __percpu *pcpu_count;
+	unsigned long __percpu *pcpu_count;
 	int ret = false;
 
 	rcu_read_lock_sched();
@@ -151,7 +151,7 @@ static inline bool percpu_ref_tryget(struct percpu_ref *ref)
 		this_cpu_inc(*pcpu_count);
 		ret = true;
 	} else {
-		ret = atomic_inc_not_zero(&ref->count);
+		ret = atomic_long_inc_not_zero(&ref->count);
 	}
 
 	rcu_read_unlock_sched();
@@ -175,7 +175,7 @@ static inline bool percpu_ref_tryget(struct percpu_ref *ref)
  */
 static inline bool percpu_ref_tryget_live(struct percpu_ref *ref)
 {
-	unsigned __percpu *pcpu_count;
+	unsigned long __percpu *pcpu_count;
 	int ret = false;
 
 	rcu_read_lock_sched();
@@ -199,13 +199,13 @@ static inline bool percpu_ref_tryget_live(struct percpu_ref *ref)
  */
 static inline void percpu_ref_put(struct percpu_ref *ref)
 {
-	unsigned __percpu *pcpu_count;
+	unsigned long __percpu *pcpu_count;
 
 	rcu_read_lock_sched();
 
 	if (__pcpu_ref_alive(ref, &pcpu_count))
 		this_cpu_dec(*pcpu_count);
-	else if (unlikely(atomic_dec_and_test(&ref->count)))
+	else if (unlikely(atomic_long_dec_and_test(&ref->count)))
 		ref->release(ref);
 
 	rcu_read_unlock_sched();
@@ -219,11 +219,11 @@ static inline void percpu_ref_put(struct percpu_ref *ref)
  */
 static inline bool percpu_ref_is_zero(struct percpu_ref *ref)
 {
-	unsigned __percpu *pcpu_count;
+	unsigned long __percpu *pcpu_count;
 
 	if (__pcpu_ref_alive(ref, &pcpu_count))
 		return false;
-	return !atomic_read(&ref->count);
+	return !atomic_long_read(&ref->count);
 }
 
 #endif
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index 70d28c91f35a..559ee0b20318 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -25,15 +25,15 @@
  * works.
  *
  * Converting to non percpu mode is done with some RCUish stuff in
- * percpu_ref_kill. Additionally, we need a bias value so that the atomic_t
- * can't hit 0 before we've added up all the percpu refs.
+ * percpu_ref_kill. Additionally, we need a bias value so that the
+ * atomic_long_t can't hit 0 before we've added up all the percpu refs.
  */
 
-#define PCPU_COUNT_BIAS		(1U << 31)
+#define PCPU_COUNT_BIAS		(1LU << (BITS_PER_LONG - 1))
 
-static unsigned __percpu *pcpu_count_ptr(struct percpu_ref *ref)
+static unsigned long __percpu *pcpu_count_ptr(struct percpu_ref *ref)
 {
-	return (unsigned __percpu *)(ref->pcpu_count_ptr & ~PCPU_REF_DEAD);
+	return (unsigned long __percpu *)(ref->pcpu_count_ptr & ~PCPU_REF_DEAD);
 }
 
 /**
@@ -43,7 +43,7 @@ static unsigned __percpu *pcpu_count_ptr(struct percpu_ref *ref)
  * @gfp: allocation mask to use
  *
  * Initializes the refcount in single atomic counter mode with a refcount of 1;
- * analagous to atomic_set(ref, 1).
+ * analagous to atomic_long_set(ref, 1).
  *
  * Note that @release must not sleep - it may potentially be called from RCU
  * callback context by percpu_ref_kill().
@@ -51,9 +51,9 @@ static unsigned __percpu *pcpu_count_ptr(struct percpu_ref *ref)
 int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release,
 		    gfp_t gfp)
 {
-	atomic_set(&ref->count, 1 + PCPU_COUNT_BIAS);
+	atomic_long_set(&ref->count, 1 + PCPU_COUNT_BIAS);
 
-	ref->pcpu_count_ptr = (unsigned long)alloc_percpu_gfp(unsigned, gfp);
+	ref->pcpu_count_ptr = (unsigned long)alloc_percpu_gfp(unsigned long, gfp);
 	if (!ref->pcpu_count_ptr)
 		return -ENOMEM;
 
@@ -75,13 +75,13 @@ EXPORT_SYMBOL_GPL(percpu_ref_init);
  */
 void percpu_ref_reinit(struct percpu_ref *ref)
 {
-	unsigned __percpu *pcpu_count = pcpu_count_ptr(ref);
+	unsigned long __percpu *pcpu_count = pcpu_count_ptr(ref);
 	int cpu;
 
 	BUG_ON(!pcpu_count);
 	WARN_ON(!percpu_ref_is_zero(ref));
 
-	atomic_set(&ref->count, 1 + PCPU_COUNT_BIAS);
+	atomic_long_set(&ref->count, 1 + PCPU_COUNT_BIAS);
 
 	/*
 	 * Restore per-cpu operation.  smp_store_release() is paired with
@@ -109,7 +109,7 @@ EXPORT_SYMBOL_GPL(percpu_ref_reinit);
  */
 void percpu_ref_exit(struct percpu_ref *ref)
 {
-	unsigned __percpu *pcpu_count = pcpu_count_ptr(ref);
+	unsigned long __percpu *pcpu_count = pcpu_count_ptr(ref);
 
 	if (pcpu_count) {
 		free_percpu(pcpu_count);
@@ -121,14 +121,15 @@ EXPORT_SYMBOL_GPL(percpu_ref_exit);
 static void percpu_ref_kill_rcu(struct rcu_head *rcu)
 {
 	struct percpu_ref *ref = container_of(rcu, struct percpu_ref, rcu);
-	unsigned __percpu *pcpu_count = pcpu_count_ptr(ref);
-	unsigned count = 0;
+	unsigned long __percpu *pcpu_count = pcpu_count_ptr(ref);
+	unsigned long count = 0;
 	int cpu;
 
 	for_each_possible_cpu(cpu)
 		count += *per_cpu_ptr(pcpu_count, cpu);
 
-	pr_debug("global %i pcpu %i", atomic_read(&ref->count), (int) count);
+	pr_debug("global %ld pcpu %ld",
+		 atomic_long_read(&ref->count), (long)count);
 
 	/*
 	 * It's crucial that we sum the percpu counters _before_ adding the sum
@@ -143,11 +144,11 @@ static void percpu_ref_kill_rcu(struct rcu_head *rcu)
 	 * time is equivalent and saves us atomic operations:
 	 */
 
-	atomic_add((int) count - PCPU_COUNT_BIAS, &ref->count);
+	atomic_long_add((long)count - PCPU_COUNT_BIAS, &ref->count);
 
-	WARN_ONCE(atomic_read(&ref->count) <= 0,
-		  "percpu ref (%pf) <= 0 (%i) after killed",
-		  ref->release, atomic_read(&ref->count));
+	WARN_ONCE(atomic_long_read(&ref->count) <= 0,
+		  "percpu ref (%pf) <= 0 (%ld) after killed",
+		  ref->release, atomic_long_read(&ref->count));
 
 	/* @ref is viewed as dead on all CPUs, send out kill confirmation */
 	if (ref->confirm_kill)
-- 
cgit v1.2.3


From 6a40281ab5c1ed8ba2253857118a5d400a2d084b Mon Sep 17 00:00:00 2001
From: Chuck Ebbert <cebbert.lkml@gmail.com>
Date: Sat, 20 Sep 2014 10:17:51 -0500
Subject: sched: Fix end_of_stack() and location of stack canary for
 architectures using CONFIG_STACK_GROWSUP

Aaron Tomlin recently posted patches [1] to enable checking the
stack canary on every task switch. Looking at the canary code, I
realized that every arch (except ia64, which adds some space for
register spill above the stack) shares a definition of
end_of_stack() that makes it the first long after the
threadinfo.

For stacks that grow down, this low address is correct because
the stack starts at the end of the thread area and grows toward
lower addresses. However, for stacks that grow up, toward higher
addresses, this is wrong. (The stack actually grows away from
the canary.) On these archs end_of_stack() should return the
address of the last long, at the highest possible address for the stack.

[1] http://lkml.org/lkml/2014/9/12/293

Signed-off-by: Chuck Ebbert <cebbert.lkml@gmail.com>
Link: http://lkml.kernel.org/r/20140920101751.6c5166b6@as
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Tested-by: James Hogan <james.hogan@imgtec.com> [metag]
Acked-by: James Hogan <james.hogan@imgtec.com>
Acked-by: Aaron Tomlin <atomlin@redhat.com>
---
 include/linux/sched.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5c2c885ee52b..1f07040d28e3 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2608,9 +2608,22 @@ static inline void setup_thread_stack(struct task_struct *p, struct task_struct
 	task_thread_info(p)->task = p;
 }
 
+/*
+ * Return the address of the last usable long on the stack.
+ *
+ * When the stack grows down, this is just above the thread
+ * info struct. Going any lower will corrupt the threadinfo.
+ *
+ * When the stack grows up, this is the highest address.
+ * Beyond that position, we corrupt data on the next page.
+ */
 static inline unsigned long *end_of_stack(struct task_struct *p)
 {
+#ifdef CONFIG_STACK_GROWSUP
+	return (unsigned long *)((unsigned long)task_thread_info(p) + THREAD_SIZE) - 1;
+#else
 	return (unsigned long *)(task_thread_info(p) + 1);
+#endif
 }
 
 #endif
-- 
cgit v1.2.3


From ca2a07e45d1d3d31a0a85d2f63d81a897c610040 Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Thu, 31 Jul 2014 16:32:46 +0900
Subject: extcon: sm5502: Move sm5502.h header file to extcon directory

This patch move sm5502.h header file from 'include/linux/extcon' to
'driver/extcon' because sm5502.h is used for driver/extcon/extcon-sm5502.c.
and remove duplicate license description.

Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 drivers/extcon/extcon-sm5502.c |   8 +-
 drivers/extcon/extcon-sm5502.h | 282 ++++++++++++++++++++++++++++++++++++++++
 include/linux/extcon/sm5502.h  | 287 -----------------------------------------
 3 files changed, 284 insertions(+), 293 deletions(-)
 create mode 100644 drivers/extcon/extcon-sm5502.h
 delete mode 100644 include/linux/extcon/sm5502.h

(limited to 'include/linux')

diff --git a/drivers/extcon/extcon-sm5502.c b/drivers/extcon/extcon-sm5502.c
index a1ba9242e9cb..f94d66aae9e3 100644
--- a/drivers/extcon/extcon-sm5502.c
+++ b/drivers/extcon/extcon-sm5502.c
@@ -8,11 +8,6 @@
  * under  the terms of  the GNU General  Public License as published by the
  * Free Software Foundation;  either version 2 of the  License, or (at your
  * option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/err.h>
@@ -26,7 +21,8 @@
 #include <linux/regmap.h>
 #include <linux/slab.h>
 #include <linux/extcon.h>
-#include <linux/extcon/sm5502.h>
+
+#include "extcon-sm5502.h"
 
 #define	DELAY_MS_DEFAULT		17000	/* unit: millisecond */
 
diff --git a/drivers/extcon/extcon-sm5502.h b/drivers/extcon/extcon-sm5502.h
new file mode 100644
index 000000000000..974b53222f56
--- /dev/null
+++ b/drivers/extcon/extcon-sm5502.h
@@ -0,0 +1,282 @@
+/*
+ * sm5502.h
+ *
+ * Copyright (c) 2014 Samsung Electronics Co., Ltd
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#ifndef __LINUX_EXTCON_SM5502_H
+#define __LINUX_EXTCON_SM5502_H
+
+enum sm5502_types {
+	TYPE_SM5502,
+};
+
+/* SM5502 registers */
+enum sm5502_reg {
+	SM5502_REG_DEVICE_ID = 0x01,
+	SM5502_REG_CONTROL,
+	SM5502_REG_INT1,
+	SM5502_REG_INT2,
+	SM5502_REG_INTMASK1,
+	SM5502_REG_INTMASK2,
+	SM5502_REG_ADC,
+	SM5502_REG_TIMING_SET1,
+	SM5502_REG_TIMING_SET2,
+	SM5502_REG_DEV_TYPE1,
+	SM5502_REG_DEV_TYPE2,
+	SM5502_REG_BUTTON1,
+	SM5502_REG_BUTTON2,
+	SM5502_REG_CAR_KIT_STATUS,
+	SM5502_REG_RSVD1,
+	SM5502_REG_RSVD2,
+	SM5502_REG_RSVD3,
+	SM5502_REG_RSVD4,
+	SM5502_REG_MANUAL_SW1,
+	SM5502_REG_MANUAL_SW2,
+	SM5502_REG_DEV_TYPE3,
+	SM5502_REG_RSVD5,
+	SM5502_REG_RSVD6,
+	SM5502_REG_RSVD7,
+	SM5502_REG_RSVD8,
+	SM5502_REG_RSVD9,
+	SM5502_REG_RESET,
+	SM5502_REG_RSVD10,
+	SM5502_REG_RESERVED_ID1,
+	SM5502_REG_RSVD11,
+	SM5502_REG_RSVD12,
+	SM5502_REG_RESERVED_ID2,
+	SM5502_REG_RSVD13,
+	SM5502_REG_OCP,
+	SM5502_REG_RSVD14,
+	SM5502_REG_RSVD15,
+	SM5502_REG_RSVD16,
+	SM5502_REG_RSVD17,
+	SM5502_REG_RSVD18,
+	SM5502_REG_RSVD19,
+	SM5502_REG_RSVD20,
+	SM5502_REG_RSVD21,
+	SM5502_REG_RSVD22,
+	SM5502_REG_RSVD23,
+	SM5502_REG_RSVD24,
+	SM5502_REG_RSVD25,
+	SM5502_REG_RSVD26,
+	SM5502_REG_RSVD27,
+	SM5502_REG_RSVD28,
+	SM5502_REG_RSVD29,
+	SM5502_REG_RSVD30,
+	SM5502_REG_RSVD31,
+	SM5502_REG_RSVD32,
+	SM5502_REG_RSVD33,
+	SM5502_REG_RSVD34,
+	SM5502_REG_RSVD35,
+	SM5502_REG_RSVD36,
+	SM5502_REG_RESERVED_ID3,
+
+	SM5502_REG_END,
+};
+
+/* Define SM5502 MASK/SHIFT constant */
+#define SM5502_REG_DEVICE_ID_VENDOR_SHIFT	0
+#define SM5502_REG_DEVICE_ID_VERSION_SHIFT	3
+#define SM5502_REG_DEVICE_ID_VENDOR_MASK	(0x3 << SM5502_REG_DEVICE_ID_VENDOR_SHIFT)
+#define SM5502_REG_DEVICE_ID_VERSION_MASK	(0x1f << SM5502_REG_DEVICE_ID_VERSION_SHIFT)
+
+#define SM5502_REG_CONTROL_MASK_INT_SHIFT	0
+#define SM5502_REG_CONTROL_WAIT_SHIFT		1
+#define SM5502_REG_CONTROL_MANUAL_SW_SHIFT	2
+#define SM5502_REG_CONTROL_RAW_DATA_SHIFT	3
+#define SM5502_REG_CONTROL_SW_OPEN_SHIFT	4
+#define SM5502_REG_CONTROL_MASK_INT_MASK	(0x1 << SM5502_REG_CONTROL_MASK_INT_SHIFT)
+#define SM5502_REG_CONTROL_WAIT_MASK		(0x1 << SM5502_REG_CONTROL_WAIT_SHIFT)
+#define SM5502_REG_CONTROL_MANUAL_SW_MASK	(0x1 << SM5502_REG_CONTROL_MANUAL_SW_SHIFT)
+#define SM5502_REG_CONTROL_RAW_DATA_MASK	(0x1 << SM5502_REG_CONTROL_RAW_DATA_SHIFT)
+#define SM5502_REG_CONTROL_SW_OPEN_MASK		(0x1 << SM5502_REG_CONTROL_SW_OPEN_SHIFT)
+
+#define SM5502_REG_INTM1_ATTACH_SHIFT		0
+#define SM5502_REG_INTM1_DETACH_SHIFT		1
+#define SM5502_REG_INTM1_KP_SHIFT		2
+#define SM5502_REG_INTM1_LKP_SHIFT		3
+#define SM5502_REG_INTM1_LKR_SHIFT		4
+#define SM5502_REG_INTM1_OVP_EVENT_SHIFT	5
+#define SM5502_REG_INTM1_OCP_EVENT_SHIFT	6
+#define SM5502_REG_INTM1_OVP_OCP_DIS_SHIFT	7
+#define SM5502_REG_INTM1_ATTACH_MASK		(0x1 << SM5502_REG_INTM1_ATTACH_SHIFT)
+#define SM5502_REG_INTM1_DETACH_MASK		(0x1 << SM5502_REG_INTM1_DETACH_SHIFT)
+#define SM5502_REG_INTM1_KP_MASK		(0x1 << SM5502_REG_INTM1_KP_SHIFT)
+#define SM5502_REG_INTM1_LKP_MASK		(0x1 << SM5502_REG_INTM1_LKP_SHIFT)
+#define SM5502_REG_INTM1_LKR_MASK		(0x1 << SM5502_REG_INTM1_LKR_SHIFT)
+#define SM5502_REG_INTM1_OVP_EVENT_MASK		(0x1 << SM5502_REG_INTM1_OVP_EVENT_SHIFT)
+#define SM5502_REG_INTM1_OCP_EVENT_MASK		(0x1 << SM5502_REG_INTM1_OCP_EVENT_SHIFT)
+#define SM5502_REG_INTM1_OVP_OCP_DIS_MASK	(0x1 << SM5502_REG_INTM1_OVP_OCP_DIS_SHIFT)
+
+#define SM5502_REG_INTM2_VBUS_DET_SHIFT		0
+#define SM5502_REG_INTM2_REV_ACCE_SHIFT		1
+#define SM5502_REG_INTM2_ADC_CHG_SHIFT		2
+#define SM5502_REG_INTM2_STUCK_KEY_SHIFT	3
+#define SM5502_REG_INTM2_STUCK_KEY_RCV_SHIFT	4
+#define SM5502_REG_INTM2_MHL_SHIFT		5
+#define SM5502_REG_INTM2_VBUS_DET_MASK		(0x1 << SM5502_REG_INTM2_VBUS_DET_SHIFT)
+#define SM5502_REG_INTM2_REV_ACCE_MASK		(0x1 << SM5502_REG_INTM2_REV_ACCE_SHIFT)
+#define SM5502_REG_INTM2_ADC_CHG_MASK		(0x1 << SM5502_REG_INTM2_ADC_CHG_SHIFT)
+#define SM5502_REG_INTM2_STUCK_KEY_MASK		(0x1 << SM5502_REG_INTM2_STUCK_KEY_SHIFT)
+#define SM5502_REG_INTM2_STUCK_KEY_RCV_MASK	(0x1 << SM5502_REG_INTM2_STUCK_KEY_RCV_SHIFT)
+#define SM5502_REG_INTM2_MHL_MASK		(0x1 << SM5502_REG_INTM2_MHL_SHIFT)
+
+#define SM5502_REG_ADC_SHIFT			0
+#define SM5502_REG_ADC_MASK			(0x1f << SM5502_REG_ADC_SHIFT)
+
+#define SM5502_REG_TIMING_SET1_KEY_PRESS_SHIFT	4
+#define SM5502_REG_TIMING_SET1_KEY_PRESS_MASK	(0xf << SM5502_REG_TIMING_SET1_KEY_PRESS_SHIFT)
+#define TIMING_KEY_PRESS_100MS			0x0
+#define TIMING_KEY_PRESS_200MS			0x1
+#define TIMING_KEY_PRESS_300MS			0x2
+#define TIMING_KEY_PRESS_400MS			0x3
+#define TIMING_KEY_PRESS_500MS			0x4
+#define TIMING_KEY_PRESS_600MS			0x5
+#define TIMING_KEY_PRESS_700MS			0x6
+#define TIMING_KEY_PRESS_800MS			0x7
+#define TIMING_KEY_PRESS_900MS			0x8
+#define TIMING_KEY_PRESS_1000MS			0x9
+#define SM5502_REG_TIMING_SET1_ADC_DET_SHIFT	0
+#define SM5502_REG_TIMING_SET1_ADC_DET_MASK	(0xf << SM5502_REG_TIMING_SET1_ADC_DET_SHIFT)
+#define TIMING_ADC_DET_50MS			0x0
+#define TIMING_ADC_DET_100MS			0x1
+#define TIMING_ADC_DET_150MS			0x2
+#define TIMING_ADC_DET_200MS			0x3
+#define TIMING_ADC_DET_300MS			0x4
+#define TIMING_ADC_DET_400MS			0x5
+#define TIMING_ADC_DET_500MS			0x6
+#define TIMING_ADC_DET_600MS			0x7
+#define TIMING_ADC_DET_700MS			0x8
+#define TIMING_ADC_DET_800MS			0x9
+#define TIMING_ADC_DET_900MS			0xA
+#define TIMING_ADC_DET_1000MS			0xB
+
+#define SM5502_REG_TIMING_SET2_SW_WAIT_SHIFT	4
+#define SM5502_REG_TIMING_SET2_SW_WAIT_MASK	(0xf << SM5502_REG_TIMING_SET2_SW_WAIT_SHIFT)
+#define TIMING_SW_WAIT_10MS			0x0
+#define TIMING_SW_WAIT_30MS			0x1
+#define TIMING_SW_WAIT_50MS			0x2
+#define TIMING_SW_WAIT_70MS			0x3
+#define TIMING_SW_WAIT_90MS			0x4
+#define TIMING_SW_WAIT_110MS			0x5
+#define TIMING_SW_WAIT_130MS			0x6
+#define TIMING_SW_WAIT_150MS			0x7
+#define TIMING_SW_WAIT_170MS			0x8
+#define TIMING_SW_WAIT_190MS			0x9
+#define TIMING_SW_WAIT_210MS			0xA
+#define SM5502_REG_TIMING_SET2_LONG_KEY_SHIFT	0
+#define SM5502_REG_TIMING_SET2_LONG_KEY_MASK	(0xf << SM5502_REG_TIMING_SET2_LONG_KEY_SHIFT)
+#define TIMING_LONG_KEY_300MS			0x0
+#define TIMING_LONG_KEY_400MS			0x1
+#define TIMING_LONG_KEY_500MS			0x2
+#define TIMING_LONG_KEY_600MS			0x3
+#define TIMING_LONG_KEY_700MS			0x4
+#define TIMING_LONG_KEY_800MS			0x5
+#define TIMING_LONG_KEY_900MS			0x6
+#define TIMING_LONG_KEY_1000MS			0x7
+#define TIMING_LONG_KEY_1100MS			0x8
+#define TIMING_LONG_KEY_1200MS			0x9
+#define TIMING_LONG_KEY_1300MS			0xA
+#define TIMING_LONG_KEY_1400MS			0xB
+#define TIMING_LONG_KEY_1500MS			0xC
+
+#define SM5502_REG_DEV_TYPE1_AUDIO_TYPE1_SHIFT		0
+#define SM5502_REG_DEV_TYPE1_AUDIO_TYPE2_SHIFT		1
+#define SM5502_REG_DEV_TYPE1_USB_SDP_SHIFT		2
+#define SM5502_REG_DEV_TYPE1_UART_SHIFT			3
+#define SM5502_REG_DEV_TYPE1_CAR_KIT_CHARGER_SHIFT	4
+#define SM5502_REG_DEV_TYPE1_USB_CHG_SHIFT		5
+#define SM5502_REG_DEV_TYPE1_DEDICATED_CHG_SHIFT	6
+#define SM5502_REG_DEV_TYPE1_USB_OTG_SHIFT		7
+#define SM5502_REG_DEV_TYPE1_AUDIO_TYPE1_MASK		(0x1 << SM5502_REG_DEV_TYPE1_AUDIO_TYPE1_SHIFT)
+#define SM5502_REG_DEV_TYPE1_AUDIO_TYPE1__MASK		(0x1 << SM5502_REG_DEV_TYPE1_AUDIO_TYPE2_SHIFT)
+#define SM5502_REG_DEV_TYPE1_USB_SDP_MASK		(0x1 << SM5502_REG_DEV_TYPE1_USB_SDP_SHIFT)
+#define SM5502_REG_DEV_TYPE1_UART_MASK			(0x1 << SM5502_REG_DEV_TYPE1_UART_SHIFT)
+#define SM5502_REG_DEV_TYPE1_CAR_KIT_CHARGER_MASK	(0x1 << SM5502_REG_DEV_TYPE1_CAR_KIT_CHARGER_SHIFT)
+#define SM5502_REG_DEV_TYPE1_USB_CHG_MASK		(0x1 << SM5502_REG_DEV_TYPE1_USB_CHG_SHIFT)
+#define SM5502_REG_DEV_TYPE1_DEDICATED_CHG_MASK		(0x1 << SM5502_REG_DEV_TYPE1_DEDICATED_CHG_SHIFT)
+#define SM5502_REG_DEV_TYPE1_USB_OTG_MASK		(0x1 << SM5502_REG_DEV_TYPE1_USB_OTG_SHIFT)
+
+#define SM5502_REG_DEV_TYPE2_JIG_USB_ON_SHIFT		0
+#define SM5502_REG_DEV_TYPE2_JIG_USB_OFF_SHIFT		1
+#define SM5502_REG_DEV_TYPE2_JIG_UART_ON_SHIFT		2
+#define SM5502_REG_DEV_TYPE2_JIG_UART_OFF_SHIFT		3
+#define SM5502_REG_DEV_TYPE2_PPD_SHIFT			4
+#define SM5502_REG_DEV_TYPE2_TTY_SHIFT			5
+#define SM5502_REG_DEV_TYPE2_AV_CABLE_SHIFT		6
+#define SM5502_REG_DEV_TYPE2_JIG_USB_ON_MASK		(0x1 << SM5502_REG_DEV_TYPE2_JIG_USB_ON_SHIFT)
+#define SM5502_REG_DEV_TYPE2_JIG_USB_OFF_MASK		(0x1 << SM5502_REG_DEV_TYPE2_JIG_USB_OFF_SHIFT)
+#define SM5502_REG_DEV_TYPE2_JIG_UART_ON_MASK		(0x1 << SM5502_REG_DEV_TYPE2_JIG_UART_ON_SHIFT)
+#define SM5502_REG_DEV_TYPE2_JIG_UART_OFF_MASK		(0x1 << SM5502_REG_DEV_TYPE2_JIG_UART_OFF_SHIFT)
+#define SM5502_REG_DEV_TYPE2_PPD_MASK			(0x1 << SM5502_REG_DEV_TYPE2_PPD_SHIFT)
+#define SM5502_REG_DEV_TYPE2_TTY_MASK			(0x1 << SM5502_REG_DEV_TYPE2_TTY_SHIFT)
+#define SM5502_REG_DEV_TYPE2_AV_CABLE_MASK		(0x1 << SM5502_REG_DEV_TYPE2_AV_CABLE_SHIFT)
+
+#define SM5502_REG_MANUAL_SW1_VBUSIN_SHIFT	0
+#define SM5502_REG_MANUAL_SW1_DP_SHIFT		2
+#define SM5502_REG_MANUAL_SW1_DM_SHIFT		5
+#define SM5502_REG_MANUAL_SW1_VBUSIN_MASK	(0x3 << SM5502_REG_MANUAL_SW1_VBUSIN_SHIFT)
+#define SM5502_REG_MANUAL_SW1_DP_MASK		(0x7 << SM5502_REG_MANUAL_SW1_DP_SHIFT)
+#define SM5502_REG_MANUAL_SW1_DM_MASK		(0x7 << SM5502_REG_MANUAL_SW1_DM_SHIFT)
+#define VBUSIN_SWITCH_OPEN			0x0
+#define VBUSIN_SWITCH_VBUSOUT			0x1
+#define VBUSIN_SWITCH_MIC			0x2
+#define VBUSIN_SWITCH_VBUSOUT_WITH_USB		0x3
+#define DM_DP_CON_SWITCH_OPEN			0x0
+#define DM_DP_CON_SWITCH_USB			0x1
+#define DM_DP_CON_SWITCH_AUDIO			0x2
+#define DM_DP_CON_SWITCH_UART			0x3
+#define DM_DP_SWITCH_OPEN			((DM_DP_CON_SWITCH_OPEN <<SM5502_REG_MANUAL_SW1_DP_SHIFT) \
+						| (DM_DP_CON_SWITCH_OPEN <<SM5502_REG_MANUAL_SW1_DM_SHIFT))
+#define DM_DP_SWITCH_USB			((DM_DP_CON_SWITCH_USB <<SM5502_REG_MANUAL_SW1_DP_SHIFT) \
+						| (DM_DP_CON_SWITCH_USB <<SM5502_REG_MANUAL_SW1_DM_SHIFT))
+#define DM_DP_SWITCH_AUDIO			((DM_DP_CON_SWITCH_AUDIO <<SM5502_REG_MANUAL_SW1_DP_SHIFT) \
+						| (DM_DP_CON_SWITCH_AUDIO <<SM5502_REG_MANUAL_SW1_DM_SHIFT))
+#define DM_DP_SWITCH_UART			((DM_DP_CON_SWITCH_UART <<SM5502_REG_MANUAL_SW1_DP_SHIFT) \
+						| (DM_DP_CON_SWITCH_UART <<SM5502_REG_MANUAL_SW1_DM_SHIFT))
+
+/* SM5502 Interrupts */
+enum sm5502_irq {
+	/* INT1 */
+	SM5502_IRQ_INT1_ATTACH,
+	SM5502_IRQ_INT1_DETACH,
+	SM5502_IRQ_INT1_KP,
+	SM5502_IRQ_INT1_LKP,
+	SM5502_IRQ_INT1_LKR,
+	SM5502_IRQ_INT1_OVP_EVENT,
+	SM5502_IRQ_INT1_OCP_EVENT,
+	SM5502_IRQ_INT1_OVP_OCP_DIS,
+
+	/* INT2 */
+	SM5502_IRQ_INT2_VBUS_DET,
+	SM5502_IRQ_INT2_REV_ACCE,
+	SM5502_IRQ_INT2_ADC_CHG,
+	SM5502_IRQ_INT2_STUCK_KEY,
+	SM5502_IRQ_INT2_STUCK_KEY_RCV,
+	SM5502_IRQ_INT2_MHL,
+
+	SM5502_IRQ_NUM,
+};
+
+#define SM5502_IRQ_INT1_ATTACH_MASK		BIT(0)
+#define SM5502_IRQ_INT1_DETACH_MASK		BIT(1)
+#define SM5502_IRQ_INT1_KP_MASK			BIT(2)
+#define SM5502_IRQ_INT1_LKP_MASK		BIT(3)
+#define SM5502_IRQ_INT1_LKR_MASK		BIT(4)
+#define SM5502_IRQ_INT1_OVP_EVENT_MASK		BIT(5)
+#define SM5502_IRQ_INT1_OCP_EVENT_MASK		BIT(6)
+#define SM5502_IRQ_INT1_OVP_OCP_DIS_MASK	BIT(7)
+#define SM5502_IRQ_INT2_VBUS_DET_MASK		BIT(0)
+#define SM5502_IRQ_INT2_REV_ACCE_MASK		BIT(1)
+#define SM5502_IRQ_INT2_ADC_CHG_MASK		BIT(2)
+#define SM5502_IRQ_INT2_STUCK_KEY_MASK		BIT(3)
+#define SM5502_IRQ_INT2_STUCK_KEY_RCV_MASK	BIT(4)
+#define SM5502_IRQ_INT2_MHL_MASK		BIT(5)
+
+#endif /*  __LINUX_EXTCON_SM5502_H */
diff --git a/include/linux/extcon/sm5502.h b/include/linux/extcon/sm5502.h
deleted file mode 100644
index 030526bf8d79..000000000000
--- a/include/linux/extcon/sm5502.h
+++ /dev/null
@@ -1,287 +0,0 @@
-/*
- * sm5502.h
- *
- * Copyright (c) 2014 Samsung Electronics Co., Ltd
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#ifndef __LINUX_EXTCON_SM5502_H
-#define __LINUX_EXTCON_SM5502_H
-
-enum sm5502_types {
-	TYPE_SM5502,
-};
-
-/* SM5502 registers */
-enum sm5502_reg {
-	SM5502_REG_DEVICE_ID = 0x01,
-	SM5502_REG_CONTROL,
-	SM5502_REG_INT1,
-	SM5502_REG_INT2,
-	SM5502_REG_INTMASK1,
-	SM5502_REG_INTMASK2,
-	SM5502_REG_ADC,
-	SM5502_REG_TIMING_SET1,
-	SM5502_REG_TIMING_SET2,
-	SM5502_REG_DEV_TYPE1,
-	SM5502_REG_DEV_TYPE2,
-	SM5502_REG_BUTTON1,
-	SM5502_REG_BUTTON2,
-	SM5502_REG_CAR_KIT_STATUS,
-	SM5502_REG_RSVD1,
-	SM5502_REG_RSVD2,
-	SM5502_REG_RSVD3,
-	SM5502_REG_RSVD4,
-	SM5502_REG_MANUAL_SW1,
-	SM5502_REG_MANUAL_SW2,
-	SM5502_REG_DEV_TYPE3,
-	SM5502_REG_RSVD5,
-	SM5502_REG_RSVD6,
-	SM5502_REG_RSVD7,
-	SM5502_REG_RSVD8,
-	SM5502_REG_RSVD9,
-	SM5502_REG_RESET,
-	SM5502_REG_RSVD10,
-	SM5502_REG_RESERVED_ID1,
-	SM5502_REG_RSVD11,
-	SM5502_REG_RSVD12,
-	SM5502_REG_RESERVED_ID2,
-	SM5502_REG_RSVD13,
-	SM5502_REG_OCP,
-	SM5502_REG_RSVD14,
-	SM5502_REG_RSVD15,
-	SM5502_REG_RSVD16,
-	SM5502_REG_RSVD17,
-	SM5502_REG_RSVD18,
-	SM5502_REG_RSVD19,
-	SM5502_REG_RSVD20,
-	SM5502_REG_RSVD21,
-	SM5502_REG_RSVD22,
-	SM5502_REG_RSVD23,
-	SM5502_REG_RSVD24,
-	SM5502_REG_RSVD25,
-	SM5502_REG_RSVD26,
-	SM5502_REG_RSVD27,
-	SM5502_REG_RSVD28,
-	SM5502_REG_RSVD29,
-	SM5502_REG_RSVD30,
-	SM5502_REG_RSVD31,
-	SM5502_REG_RSVD32,
-	SM5502_REG_RSVD33,
-	SM5502_REG_RSVD34,
-	SM5502_REG_RSVD35,
-	SM5502_REG_RSVD36,
-	SM5502_REG_RESERVED_ID3,
-
-	SM5502_REG_END,
-};
-
-/* Define SM5502 MASK/SHIFT constant */
-#define SM5502_REG_DEVICE_ID_VENDOR_SHIFT	0
-#define SM5502_REG_DEVICE_ID_VERSION_SHIFT	3
-#define SM5502_REG_DEVICE_ID_VENDOR_MASK	(0x3 << SM5502_REG_DEVICE_ID_VENDOR_SHIFT)
-#define SM5502_REG_DEVICE_ID_VERSION_MASK	(0x1f << SM5502_REG_DEVICE_ID_VERSION_SHIFT)
-
-#define SM5502_REG_CONTROL_MASK_INT_SHIFT	0
-#define SM5502_REG_CONTROL_WAIT_SHIFT		1
-#define SM5502_REG_CONTROL_MANUAL_SW_SHIFT	2
-#define SM5502_REG_CONTROL_RAW_DATA_SHIFT	3
-#define SM5502_REG_CONTROL_SW_OPEN_SHIFT	4
-#define SM5502_REG_CONTROL_MASK_INT_MASK	(0x1 << SM5502_REG_CONTROL_MASK_INT_SHIFT)
-#define SM5502_REG_CONTROL_WAIT_MASK		(0x1 << SM5502_REG_CONTROL_WAIT_SHIFT)
-#define SM5502_REG_CONTROL_MANUAL_SW_MASK	(0x1 << SM5502_REG_CONTROL_MANUAL_SW_SHIFT)
-#define SM5502_REG_CONTROL_RAW_DATA_MASK	(0x1 << SM5502_REG_CONTROL_RAW_DATA_SHIFT)
-#define SM5502_REG_CONTROL_SW_OPEN_MASK		(0x1 << SM5502_REG_CONTROL_SW_OPEN_SHIFT)
-
-#define SM5502_REG_INTM1_ATTACH_SHIFT		0
-#define SM5502_REG_INTM1_DETACH_SHIFT		1
-#define SM5502_REG_INTM1_KP_SHIFT		2
-#define SM5502_REG_INTM1_LKP_SHIFT		3
-#define SM5502_REG_INTM1_LKR_SHIFT		4
-#define SM5502_REG_INTM1_OVP_EVENT_SHIFT	5
-#define SM5502_REG_INTM1_OCP_EVENT_SHIFT	6
-#define SM5502_REG_INTM1_OVP_OCP_DIS_SHIFT	7
-#define SM5502_REG_INTM1_ATTACH_MASK		(0x1 << SM5502_REG_INTM1_ATTACH_SHIFT)
-#define SM5502_REG_INTM1_DETACH_MASK		(0x1 << SM5502_REG_INTM1_DETACH_SHIFT)
-#define SM5502_REG_INTM1_KP_MASK		(0x1 << SM5502_REG_INTM1_KP_SHIFT)
-#define SM5502_REG_INTM1_LKP_MASK		(0x1 << SM5502_REG_INTM1_LKP_SHIFT)
-#define SM5502_REG_INTM1_LKR_MASK		(0x1 << SM5502_REG_INTM1_LKR_SHIFT)
-#define SM5502_REG_INTM1_OVP_EVENT_MASK		(0x1 << SM5502_REG_INTM1_OVP_EVENT_SHIFT)
-#define SM5502_REG_INTM1_OCP_EVENT_MASK		(0x1 << SM5502_REG_INTM1_OCP_EVENT_SHIFT)
-#define SM5502_REG_INTM1_OVP_OCP_DIS_MASK	(0x1 << SM5502_REG_INTM1_OVP_OCP_DIS_SHIFT)
-
-#define SM5502_REG_INTM2_VBUS_DET_SHIFT		0
-#define SM5502_REG_INTM2_REV_ACCE_SHIFT		1
-#define SM5502_REG_INTM2_ADC_CHG_SHIFT		2
-#define SM5502_REG_INTM2_STUCK_KEY_SHIFT	3
-#define SM5502_REG_INTM2_STUCK_KEY_RCV_SHIFT	4
-#define SM5502_REG_INTM2_MHL_SHIFT		5
-#define SM5502_REG_INTM2_VBUS_DET_MASK		(0x1 << SM5502_REG_INTM2_VBUS_DET_SHIFT)
-#define SM5502_REG_INTM2_REV_ACCE_MASK		(0x1 << SM5502_REG_INTM2_REV_ACCE_SHIFT)
-#define SM5502_REG_INTM2_ADC_CHG_MASK		(0x1 << SM5502_REG_INTM2_ADC_CHG_SHIFT)
-#define SM5502_REG_INTM2_STUCK_KEY_MASK		(0x1 << SM5502_REG_INTM2_STUCK_KEY_SHIFT)
-#define SM5502_REG_INTM2_STUCK_KEY_RCV_MASK	(0x1 << SM5502_REG_INTM2_STUCK_KEY_RCV_SHIFT)
-#define SM5502_REG_INTM2_MHL_MASK		(0x1 << SM5502_REG_INTM2_MHL_SHIFT)
-
-#define SM5502_REG_ADC_SHIFT			0
-#define SM5502_REG_ADC_MASK			(0x1f << SM5502_REG_ADC_SHIFT)
-
-#define SM5502_REG_TIMING_SET1_KEY_PRESS_SHIFT	4
-#define SM5502_REG_TIMING_SET1_KEY_PRESS_MASK	(0xf << SM5502_REG_TIMING_SET1_KEY_PRESS_SHIFT)
-#define TIMING_KEY_PRESS_100MS			0x0
-#define TIMING_KEY_PRESS_200MS			0x1
-#define TIMING_KEY_PRESS_300MS			0x2
-#define TIMING_KEY_PRESS_400MS			0x3
-#define TIMING_KEY_PRESS_500MS			0x4
-#define TIMING_KEY_PRESS_600MS			0x5
-#define TIMING_KEY_PRESS_700MS			0x6
-#define TIMING_KEY_PRESS_800MS			0x7
-#define TIMING_KEY_PRESS_900MS			0x8
-#define TIMING_KEY_PRESS_1000MS			0x9
-#define SM5502_REG_TIMING_SET1_ADC_DET_SHIFT	0
-#define SM5502_REG_TIMING_SET1_ADC_DET_MASK	(0xf << SM5502_REG_TIMING_SET1_ADC_DET_SHIFT)
-#define TIMING_ADC_DET_50MS			0x0
-#define TIMING_ADC_DET_100MS			0x1
-#define TIMING_ADC_DET_150MS			0x2
-#define TIMING_ADC_DET_200MS			0x3
-#define TIMING_ADC_DET_300MS			0x4
-#define TIMING_ADC_DET_400MS			0x5
-#define TIMING_ADC_DET_500MS			0x6
-#define TIMING_ADC_DET_600MS			0x7
-#define TIMING_ADC_DET_700MS			0x8
-#define TIMING_ADC_DET_800MS			0x9
-#define TIMING_ADC_DET_900MS			0xA
-#define TIMING_ADC_DET_1000MS			0xB
-
-#define SM5502_REG_TIMING_SET2_SW_WAIT_SHIFT	4
-#define SM5502_REG_TIMING_SET2_SW_WAIT_MASK	(0xf << SM5502_REG_TIMING_SET2_SW_WAIT_SHIFT)
-#define TIMING_SW_WAIT_10MS			0x0
-#define TIMING_SW_WAIT_30MS			0x1
-#define TIMING_SW_WAIT_50MS			0x2
-#define TIMING_SW_WAIT_70MS			0x3
-#define TIMING_SW_WAIT_90MS			0x4
-#define TIMING_SW_WAIT_110MS			0x5
-#define TIMING_SW_WAIT_130MS			0x6
-#define TIMING_SW_WAIT_150MS			0x7
-#define TIMING_SW_WAIT_170MS			0x8
-#define TIMING_SW_WAIT_190MS			0x9
-#define TIMING_SW_WAIT_210MS			0xA
-#define SM5502_REG_TIMING_SET2_LONG_KEY_SHIFT	0
-#define SM5502_REG_TIMING_SET2_LONG_KEY_MASK	(0xf << SM5502_REG_TIMING_SET2_LONG_KEY_SHIFT)
-#define TIMING_LONG_KEY_300MS			0x0
-#define TIMING_LONG_KEY_400MS			0x1
-#define TIMING_LONG_KEY_500MS			0x2
-#define TIMING_LONG_KEY_600MS			0x3
-#define TIMING_LONG_KEY_700MS			0x4
-#define TIMING_LONG_KEY_800MS			0x5
-#define TIMING_LONG_KEY_900MS			0x6
-#define TIMING_LONG_KEY_1000MS			0x7
-#define TIMING_LONG_KEY_1100MS			0x8
-#define TIMING_LONG_KEY_1200MS			0x9
-#define TIMING_LONG_KEY_1300MS			0xA
-#define TIMING_LONG_KEY_1400MS			0xB
-#define TIMING_LONG_KEY_1500MS			0xC
-
-#define SM5502_REG_DEV_TYPE1_AUDIO_TYPE1_SHIFT		0
-#define SM5502_REG_DEV_TYPE1_AUDIO_TYPE2_SHIFT		1
-#define SM5502_REG_DEV_TYPE1_USB_SDP_SHIFT		2
-#define SM5502_REG_DEV_TYPE1_UART_SHIFT			3
-#define SM5502_REG_DEV_TYPE1_CAR_KIT_CHARGER_SHIFT	4
-#define SM5502_REG_DEV_TYPE1_USB_CHG_SHIFT		5
-#define SM5502_REG_DEV_TYPE1_DEDICATED_CHG_SHIFT	6
-#define SM5502_REG_DEV_TYPE1_USB_OTG_SHIFT		7
-#define SM5502_REG_DEV_TYPE1_AUDIO_TYPE1_MASK		(0x1 << SM5502_REG_DEV_TYPE1_AUDIO_TYPE1_SHIFT)
-#define SM5502_REG_DEV_TYPE1_AUDIO_TYPE1__MASK		(0x1 << SM5502_REG_DEV_TYPE1_AUDIO_TYPE2_SHIFT)
-#define SM5502_REG_DEV_TYPE1_USB_SDP_MASK		(0x1 << SM5502_REG_DEV_TYPE1_USB_SDP_SHIFT)
-#define SM5502_REG_DEV_TYPE1_UART_MASK			(0x1 << SM5502_REG_DEV_TYPE1_UART_SHIFT)
-#define SM5502_REG_DEV_TYPE1_CAR_KIT_CHARGER_MASK	(0x1 << SM5502_REG_DEV_TYPE1_CAR_KIT_CHARGER_SHIFT)
-#define SM5502_REG_DEV_TYPE1_USB_CHG_MASK		(0x1 << SM5502_REG_DEV_TYPE1_USB_CHG_SHIFT)
-#define SM5502_REG_DEV_TYPE1_DEDICATED_CHG_MASK		(0x1 << SM5502_REG_DEV_TYPE1_DEDICATED_CHG_SHIFT)
-#define SM5502_REG_DEV_TYPE1_USB_OTG_MASK		(0x1 << SM5502_REG_DEV_TYPE1_USB_OTG_SHIFT)
-
-#define SM5502_REG_DEV_TYPE2_JIG_USB_ON_SHIFT		0
-#define SM5502_REG_DEV_TYPE2_JIG_USB_OFF_SHIFT		1
-#define SM5502_REG_DEV_TYPE2_JIG_UART_ON_SHIFT		2
-#define SM5502_REG_DEV_TYPE2_JIG_UART_OFF_SHIFT		3
-#define SM5502_REG_DEV_TYPE2_PPD_SHIFT			4
-#define SM5502_REG_DEV_TYPE2_TTY_SHIFT			5
-#define SM5502_REG_DEV_TYPE2_AV_CABLE_SHIFT		6
-#define SM5502_REG_DEV_TYPE2_JIG_USB_ON_MASK		(0x1 << SM5502_REG_DEV_TYPE2_JIG_USB_ON_SHIFT)
-#define SM5502_REG_DEV_TYPE2_JIG_USB_OFF_MASK		(0x1 << SM5502_REG_DEV_TYPE2_JIG_USB_OFF_SHIFT)
-#define SM5502_REG_DEV_TYPE2_JIG_UART_ON_MASK		(0x1 << SM5502_REG_DEV_TYPE2_JIG_UART_ON_SHIFT)
-#define SM5502_REG_DEV_TYPE2_JIG_UART_OFF_MASK		(0x1 << SM5502_REG_DEV_TYPE2_JIG_UART_OFF_SHIFT)
-#define SM5502_REG_DEV_TYPE2_PPD_MASK			(0x1 << SM5502_REG_DEV_TYPE2_PPD_SHIFT)
-#define SM5502_REG_DEV_TYPE2_TTY_MASK			(0x1 << SM5502_REG_DEV_TYPE2_TTY_SHIFT)
-#define SM5502_REG_DEV_TYPE2_AV_CABLE_MASK		(0x1 << SM5502_REG_DEV_TYPE2_AV_CABLE_SHIFT)
-
-#define SM5502_REG_MANUAL_SW1_VBUSIN_SHIFT	0
-#define SM5502_REG_MANUAL_SW1_DP_SHIFT		2
-#define SM5502_REG_MANUAL_SW1_DM_SHIFT		5
-#define SM5502_REG_MANUAL_SW1_VBUSIN_MASK	(0x3 << SM5502_REG_MANUAL_SW1_VBUSIN_SHIFT)
-#define SM5502_REG_MANUAL_SW1_DP_MASK		(0x7 << SM5502_REG_MANUAL_SW1_DP_SHIFT)
-#define SM5502_REG_MANUAL_SW1_DM_MASK		(0x7 << SM5502_REG_MANUAL_SW1_DM_SHIFT)
-#define VBUSIN_SWITCH_OPEN			0x0
-#define VBUSIN_SWITCH_VBUSOUT			0x1
-#define VBUSIN_SWITCH_MIC			0x2
-#define VBUSIN_SWITCH_VBUSOUT_WITH_USB		0x3
-#define DM_DP_CON_SWITCH_OPEN			0x0
-#define DM_DP_CON_SWITCH_USB			0x1
-#define DM_DP_CON_SWITCH_AUDIO			0x2
-#define DM_DP_CON_SWITCH_UART			0x3
-#define DM_DP_SWITCH_OPEN			((DM_DP_CON_SWITCH_OPEN <<SM5502_REG_MANUAL_SW1_DP_SHIFT) \
-						| (DM_DP_CON_SWITCH_OPEN <<SM5502_REG_MANUAL_SW1_DM_SHIFT))
-#define DM_DP_SWITCH_USB			((DM_DP_CON_SWITCH_USB <<SM5502_REG_MANUAL_SW1_DP_SHIFT) \
-						| (DM_DP_CON_SWITCH_USB <<SM5502_REG_MANUAL_SW1_DM_SHIFT))
-#define DM_DP_SWITCH_AUDIO			((DM_DP_CON_SWITCH_AUDIO <<SM5502_REG_MANUAL_SW1_DP_SHIFT) \
-						| (DM_DP_CON_SWITCH_AUDIO <<SM5502_REG_MANUAL_SW1_DM_SHIFT))
-#define DM_DP_SWITCH_UART			((DM_DP_CON_SWITCH_UART <<SM5502_REG_MANUAL_SW1_DP_SHIFT) \
-						| (DM_DP_CON_SWITCH_UART <<SM5502_REG_MANUAL_SW1_DM_SHIFT))
-
-/* SM5502 Interrupts */
-enum sm5502_irq {
-	/* INT1 */
-	SM5502_IRQ_INT1_ATTACH,
-	SM5502_IRQ_INT1_DETACH,
-	SM5502_IRQ_INT1_KP,
-	SM5502_IRQ_INT1_LKP,
-	SM5502_IRQ_INT1_LKR,
-	SM5502_IRQ_INT1_OVP_EVENT,
-	SM5502_IRQ_INT1_OCP_EVENT,
-	SM5502_IRQ_INT1_OVP_OCP_DIS,
-
-	/* INT2 */
-	SM5502_IRQ_INT2_VBUS_DET,
-	SM5502_IRQ_INT2_REV_ACCE,
-	SM5502_IRQ_INT2_ADC_CHG,
-	SM5502_IRQ_INT2_STUCK_KEY,
-	SM5502_IRQ_INT2_STUCK_KEY_RCV,
-	SM5502_IRQ_INT2_MHL,
-
-	SM5502_IRQ_NUM,
-};
-
-#define SM5502_IRQ_INT1_ATTACH_MASK		BIT(0)
-#define SM5502_IRQ_INT1_DETACH_MASK		BIT(1)
-#define SM5502_IRQ_INT1_KP_MASK			BIT(2)
-#define SM5502_IRQ_INT1_LKP_MASK		BIT(3)
-#define SM5502_IRQ_INT1_LKR_MASK		BIT(4)
-#define SM5502_IRQ_INT1_OVP_EVENT_MASK		BIT(5)
-#define SM5502_IRQ_INT1_OCP_EVENT_MASK		BIT(6)
-#define SM5502_IRQ_INT1_OVP_OCP_DIS_MASK	BIT(7)
-#define SM5502_IRQ_INT2_VBUS_DET_MASK		BIT(0)
-#define SM5502_IRQ_INT2_REV_ACCE_MASK		BIT(1)
-#define SM5502_IRQ_INT2_ADC_CHG_MASK		BIT(2)
-#define SM5502_IRQ_INT2_STUCK_KEY_MASK		BIT(3)
-#define SM5502_IRQ_INT2_STUCK_KEY_RCV_MASK	BIT(4)
-#define SM5502_IRQ_INT2_MHL_MASK		BIT(5)
-
-#endif /*  __LINUX_EXTCON_SM5502_H */
-- 
cgit v1.2.3


From 62364357c184db52d556f868e493963fac2aea78 Mon Sep 17 00:00:00 2001
From: George Cherian <george.cherian@ti.com>
Date: Tue, 9 Sep 2014 09:44:34 +0530
Subject: extcon: gpio: Fix code cleanup

This patch fixes following minor cleanup:
 - Order the include files in alphabetical order.
 - Fix description of state_off in extcon_gpio.h
 - Add a descrition for check_on_resume in extcon_gpio.h

Signed-off-by: George Cherian <george.cherian@ti.com>
[Modify the name/description of patch to keep standary codiyg style by Chanwoo Choi]
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 drivers/extcon/extcon-gpio.c       | 10 +++++-----
 include/linux/extcon/extcon-gpio.h |  4 +++-
 2 files changed, 8 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/extcon/extcon-gpio.c b/drivers/extcon/extcon-gpio.c
index 5b7ec274cb63..72f19a37fd01 100644
--- a/drivers/extcon/extcon-gpio.c
+++ b/drivers/extcon/extcon-gpio.c
@@ -20,16 +20,16 @@
  *
 */
 
-#include <linux/module.h>
-#include <linux/kernel.h>
+#include <linux/extcon.h>
+#include <linux/extcon/extcon-gpio.h>
+#include <linux/gpio.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/workqueue.h>
-#include <linux/gpio.h>
-#include <linux/extcon.h>
-#include <linux/extcon/extcon-gpio.h>
 
 struct gpio_extcon_data {
 	struct extcon_dev *edev;
diff --git a/include/linux/extcon/extcon-gpio.h b/include/linux/extcon/extcon-gpio.h
index 8900fdf511c6..0b17ad43fbfc 100644
--- a/include/linux/extcon/extcon-gpio.h
+++ b/include/linux/extcon/extcon-gpio.h
@@ -34,8 +34,10 @@
  * @irq_flags:		IRQ Flags (e.g., IRQF_TRIGGER_LOW).
  * @state_on:		print_state is overriden with state_on if attached.
  *			If NULL, default method of extcon class is used.
- * @state_off:		print_state is overriden with state_on if detached.
+ * @state_off:		print_state is overriden with state_off if detached.
  *			If NUll, default method of extcon class is used.
+ * @check_on_resume:	Boolean describing whether to check the state of gpio
+ *			while resuming from sleep.
  *
  * Note that in order for state_on or state_off to be valid, both state_on
  * and state_off should be not NULL. If at least one of them is NULL,
-- 
cgit v1.2.3


From bcc5fd49a0fda5abc22057f65b318788ccb5d2ad Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Mon, 15 Sep 2014 18:15:53 +0200
Subject: clk: at91: add a driver for the h32mx clock

Newer SoCs have two different AHB interconnect. The AHB 32 bits Matrix
interconnect (h32mx) has a clock that can be setup at the half of the h64mx
clock (which is mck). The h32mx clock can not exceed 90 MHz.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Acked-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
---
 .../devicetree/bindings/clock/at91-clock.txt       |  14 +++
 arch/arm/mach-at91/Kconfig                         |   3 +
 drivers/clk/at91/Makefile                          |   1 +
 drivers/clk/at91/clk-h32mx.c                       | 123 +++++++++++++++++++++
 drivers/clk/at91/pmc.c                             |   6 +
 drivers/clk/at91/pmc.h                             |   5 +
 include/linux/clk/at91_pmc.h                       |   1 +
 7 files changed, 153 insertions(+)
 create mode 100644 drivers/clk/at91/clk-h32mx.c

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/clock/at91-clock.txt b/Documentation/devicetree/bindings/clock/at91-clock.txt
index b3d544ca522a..7a4d4926f44e 100644
--- a/Documentation/devicetree/bindings/clock/at91-clock.txt
+++ b/Documentation/devicetree/bindings/clock/at91-clock.txt
@@ -74,6 +74,9 @@ Required properties:
 	"atmel,at91sam9x5-clk-utmi":
 		at91 utmi clock
 
+	"atmel,sama5d4-clk-h32mx":
+		at91 h32mx clock
+
 Required properties for SCKC node:
 - reg : defines the IO memory reserved for the SCKC.
 - #size-cells : shall be 0 (reg is used to encode clk id).
@@ -447,3 +450,14 @@ For example:
 		#clock-cells = <0>;
 		clocks = <&main>;
 	};
+
+Required properties for 32 bits bus Matrix clock (h32mx clock):
+- #clock-cells : from common clock binding; shall be set to 0.
+- clocks : shall be the master clock source phandle.
+
+For example:
+	h32ck: h32mxck {
+		#clock-cells = <0>;
+		compatible = "atmel,sama5d4-clk-h32mx";
+		clocks = <&mck>;
+	};
diff --git a/arch/arm/mach-at91/Kconfig b/arch/arm/mach-at91/Kconfig
index 6cc6f7aebdae..321210cc3be7 100644
--- a/arch/arm/mach-at91/Kconfig
+++ b/arch/arm/mach-at91/Kconfig
@@ -42,6 +42,9 @@ config AT91_SAM9_TIME
 config HAVE_AT91_SMD
 	bool
 
+config HAVE_AT91_H32MX
+	bool
+
 config SOC_AT91SAM9
 	bool
 	select AT91_SAM9_TIME
diff --git a/drivers/clk/at91/Makefile b/drivers/clk/at91/Makefile
index 4998aee59267..89a48a7bd5df 100644
--- a/drivers/clk/at91/Makefile
+++ b/drivers/clk/at91/Makefile
@@ -9,3 +9,4 @@ obj-y += clk-system.o clk-peripheral.o clk-programmable.o
 obj-$(CONFIG_HAVE_AT91_UTMI)		+= clk-utmi.o
 obj-$(CONFIG_HAVE_AT91_USB_CLK)		+= clk-usb.o
 obj-$(CONFIG_HAVE_AT91_SMD)		+= clk-smd.o
+obj-$(CONFIG_HAVE_AT91_H32MX)		+= clk-h32mx.o
diff --git a/drivers/clk/at91/clk-h32mx.c b/drivers/clk/at91/clk-h32mx.c
new file mode 100644
index 000000000000..152dcb3f7b5f
--- /dev/null
+++ b/drivers/clk/at91/clk-h32mx.c
@@ -0,0 +1,123 @@
+/*
+ * clk-h32mx.c
+ *
+ *  Copyright (C) 2014 Atmel
+ *
+ * Alexandre Belloni <alexandre.belloni@free-electrons.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/clkdev.h>
+#include <linux/clk/at91_pmc.h>
+#include <linux/delay.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+
+#include "pmc.h"
+
+#define H32MX_MAX_FREQ	90000000
+
+struct clk_sama5d4_h32mx {
+	struct clk_hw hw;
+	struct at91_pmc *pmc;
+};
+
+#define to_clk_sama5d4_h32mx(hw) container_of(hw, struct clk_sama5d4_h32mx, hw)
+
+static unsigned long clk_sama5d4_h32mx_recalc_rate(struct clk_hw *hw,
+						 unsigned long parent_rate)
+{
+	struct clk_sama5d4_h32mx *h32mxclk = to_clk_sama5d4_h32mx(hw);
+
+	if (pmc_read(h32mxclk->pmc, AT91_PMC_MCKR) & AT91_PMC_H32MXDIV)
+		return parent_rate / 2;
+
+	if (parent_rate > H32MX_MAX_FREQ)
+		pr_warn("H32MX clock is too fast\n");
+	return parent_rate;
+}
+
+static long clk_sama5d4_h32mx_round_rate(struct clk_hw *hw, unsigned long rate,
+				       unsigned long *parent_rate)
+{
+	unsigned long div;
+
+	if (rate > *parent_rate)
+		return *parent_rate;
+	div = *parent_rate / 2;
+	if (rate < div)
+		return div;
+
+	if (rate - div < *parent_rate - rate)
+		return div;
+
+	return *parent_rate;
+}
+
+static int clk_sama5d4_h32mx_set_rate(struct clk_hw *hw, unsigned long rate,
+				    unsigned long parent_rate)
+{
+	struct clk_sama5d4_h32mx *h32mxclk = to_clk_sama5d4_h32mx(hw);
+	struct at91_pmc *pmc = h32mxclk->pmc;
+	u32 tmp;
+
+	if (parent_rate != rate && (parent_rate / 2) != rate)
+		return -EINVAL;
+
+	pmc_lock(pmc);
+	tmp = pmc_read(pmc, AT91_PMC_MCKR) & ~AT91_PMC_H32MXDIV;
+	if ((parent_rate / 2) == rate)
+		tmp |= AT91_PMC_H32MXDIV;
+	pmc_write(pmc, AT91_PMC_MCKR, tmp);
+	pmc_unlock(pmc);
+
+	return 0;
+}
+
+static const struct clk_ops h32mx_ops = {
+	.recalc_rate = clk_sama5d4_h32mx_recalc_rate,
+	.round_rate = clk_sama5d4_h32mx_round_rate,
+	.set_rate = clk_sama5d4_h32mx_set_rate,
+};
+
+void __init of_sama5d4_clk_h32mx_setup(struct device_node *np,
+				     struct at91_pmc *pmc)
+{
+	struct clk_sama5d4_h32mx *h32mxclk;
+	struct clk_init_data init;
+	const char *parent_name;
+	struct clk *clk;
+
+	h32mxclk = kzalloc(sizeof(*h32mxclk), GFP_KERNEL);
+	if (!h32mxclk)
+		return;
+
+	parent_name = of_clk_get_parent_name(np, 0);
+
+	init.name = np->name;
+	init.ops = &h32mx_ops;
+	init.parent_names = parent_name ? &parent_name : NULL;
+	init.num_parents = parent_name ? 1 : 0;
+	init.flags = CLK_SET_RATE_GATE;
+
+	h32mxclk->hw.init = &init;
+	h32mxclk->pmc = pmc;
+
+	clk = clk_register(NULL, &h32mxclk->hw);
+	if (!clk)
+		return;
+
+	of_clk_add_provider(np, of_clk_src_simple_get, clk);
+}
diff --git a/drivers/clk/at91/pmc.c b/drivers/clk/at91/pmc.c
index 524196bb35a5..386999b4f8eb 100644
--- a/drivers/clk/at91/pmc.c
+++ b/drivers/clk/at91/pmc.c
@@ -336,6 +336,12 @@ static const struct of_device_id pmc_clk_ids[] __initconst = {
 		.compatible = "atmel,at91sam9x5-clk-smd",
 		.data = of_at91sam9x5_clk_smd_setup,
 	},
+#endif
+#if defined(CONFIG_HAVE_AT91_H32MX)
+	{
+		.compatible = "atmel,sama5d4-clk-h32mx",
+		.data = of_sama5d4_clk_h32mx_setup,
+	},
 #endif
 	{ /*sentinel*/ }
 };
diff --git a/drivers/clk/at91/pmc.h b/drivers/clk/at91/pmc.h
index 6c7625976113..52d2041fa3f6 100644
--- a/drivers/clk/at91/pmc.h
+++ b/drivers/clk/at91/pmc.h
@@ -120,4 +120,9 @@ extern void __init of_at91sam9x5_clk_smd_setup(struct device_node *np,
 					       struct at91_pmc *pmc);
 #endif
 
+#if defined(CONFIG_HAVE_AT91_SMD)
+extern void __init of_sama5d4_clk_h32mx_setup(struct device_node *np,
+					      struct at91_pmc *pmc);
+#endif
+
 #endif /* __PMC_H_ */
diff --git a/include/linux/clk/at91_pmc.h b/include/linux/clk/at91_pmc.h
index de4268d4987a..c8e3b3d1eded 100644
--- a/include/linux/clk/at91_pmc.h
+++ b/include/linux/clk/at91_pmc.h
@@ -125,6 +125,7 @@ extern void __iomem *at91_pmc_base;
 #define		AT91_PMC_PLLADIV2	(1 << 12)		/* PLLA divisor by 2 [some SAM9 only] */
 #define			AT91_PMC_PLLADIV2_OFF		(0 << 12)
 #define			AT91_PMC_PLLADIV2_ON		(1 << 12)
+#define		AT91_PMC_H32MXDIV	BIT(24)
 
 #define	AT91_PMC_USB		0x38			/* USB Clock Register [some SAM9 only] */
 #define		AT91_PMC_USBS		(0x1 <<  0)		/* USB OHCI Input clock selection */
-- 
cgit v1.2.3


From c3099a5294f2c7266234e8ea35cbffc20a41aa9a Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Fri, 19 Sep 2014 20:27:34 +0200
Subject: PM / Domains: Add a detach callback to the struct dev_pm_domain

The intent of this callback is to simplify detachment of devices from
their PM domains. Further patches will show the benefit.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/pm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/pm.h b/include/linux/pm.h
index 72c0fe098a27..1022ba1eb4de 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -619,6 +619,7 @@ extern int dev_pm_put_subsys_data(struct device *dev);
  */
 struct dev_pm_domain {
 	struct dev_pm_ops	ops;
+	void (*detach)(struct device *dev, bool power_off);
 };
 
 /*
-- 
cgit v1.2.3


From aa42240ab2544a8bcb2efb400193826f57f3175e Mon Sep 17 00:00:00 2001
From: Tomasz Figa <tomasz.figa@gmail.com>
Date: Fri, 19 Sep 2014 20:27:36 +0200
Subject: PM / Domains: Add generic OF-based PM domain look-up

This patch introduces generic code to perform PM domain look-up using
device tree and automatically bind devices to their PM domains.

Generic device tree bindings are introduced to specify PM domains of
devices in their device tree nodes.

Backwards compatibility with legacy Samsung-specific PM domain bindings
is provided, but for now the new code is not compiled when
CONFIG_ARCH_EXYNOS is selected to avoid collision with legacy code.
This will change as soon as the Exynos PM domain code gets converted to
use the generic framework in further patch.

This patch was originally submitted by Tomasz Figa when he was employed
by Samsung.

Link: http://marc.info/?l=linux-pm&m=139955349702152&w=2
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Rob Herring <robh@kernel.org>
Tested-by: Philipp Zabel <p.zabel@pengutronix.de>
Reviewed-by: Kevin Hilman <khilman@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 .../devicetree/bindings/power/power_domain.txt     |  49 ++++
 drivers/base/power/domain.c                        | 289 +++++++++++++++++++++
 include/linux/pm_domain.h                          |  52 ++++
 kernel/power/Kconfig                               |   4 +
 4 files changed, 394 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/power/power_domain.txt

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/power/power_domain.txt b/Documentation/devicetree/bindings/power/power_domain.txt
new file mode 100644
index 000000000000..98c16672ab5f
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/power_domain.txt
@@ -0,0 +1,49 @@
+* Generic PM domains
+
+System on chip designs are often divided into multiple PM domains that can be
+used for power gating of selected IP blocks for power saving by reduced leakage
+current.
+
+This device tree binding can be used to bind PM domain consumer devices with
+their PM domains provided by PM domain providers. A PM domain provider can be
+represented by any node in the device tree and can provide one or more PM
+domains. A consumer node can refer to the provider by a phandle and a set of
+phandle arguments (so called PM domain specifiers) of length specified by the
+#power-domain-cells property in the PM domain provider node.
+
+==PM domain providers==
+
+Required properties:
+ - #power-domain-cells : Number of cells in a PM domain specifier;
+   Typically 0 for nodes representing a single PM domain and 1 for nodes
+   providing multiple PM domains (e.g. power controllers), but can be any value
+   as specified by device tree binding documentation of particular provider.
+
+Example:
+
+	power: power-controller@12340000 {
+		compatible = "foo,power-controller";
+		reg = <0x12340000 0x1000>;
+		#power-domain-cells = <1>;
+	};
+
+The node above defines a power controller that is a PM domain provider and
+expects one cell as its phandle argument.
+
+==PM domain consumers==
+
+Required properties:
+ - power-domains : A phandle and PM domain specifier as defined by bindings of
+                   the power controller specified by phandle.
+
+Example:
+
+	leaky-device@12350000 {
+		compatible = "foo,i-leak-current";
+		reg = <0x12350000 0x1000>;
+		power-domains = <&power 0>;
+	};
+
+The node above defines a typical PM domain consumer device, which is located
+inside a PM domain with index 0 of a power controller represented by a node
+with the label "power".
diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index e6a11ca3ce26..a3d41a883d83 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -8,6 +8,7 @@
 
 #include <linux/kernel.h>
 #include <linux/io.h>
+#include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/pm_domain.h>
 #include <linux/pm_qos.h>
@@ -1933,3 +1934,291 @@ void pm_genpd_init(struct generic_pm_domain *genpd,
 	list_add(&genpd->gpd_list_node, &gpd_list);
 	mutex_unlock(&gpd_list_lock);
 }
+
+#ifdef CONFIG_PM_GENERIC_DOMAINS_OF
+/*
+ * Device Tree based PM domain providers.
+ *
+ * The code below implements generic device tree based PM domain providers that
+ * bind device tree nodes with generic PM domains registered in the system.
+ *
+ * Any driver that registers generic PM domains and needs to support binding of
+ * devices to these domains is supposed to register a PM domain provider, which
+ * maps a PM domain specifier retrieved from the device tree to a PM domain.
+ *
+ * Two simple mapping functions have been provided for convenience:
+ *  - __of_genpd_xlate_simple() for 1:1 device tree node to PM domain mapping.
+ *  - __of_genpd_xlate_onecell() for mapping of multiple PM domains per node by
+ *    index.
+ */
+
+/**
+ * struct of_genpd_provider - PM domain provider registration structure
+ * @link: Entry in global list of PM domain providers
+ * @node: Pointer to device tree node of PM domain provider
+ * @xlate: Provider-specific xlate callback mapping a set of specifier cells
+ *         into a PM domain.
+ * @data: context pointer to be passed into @xlate callback
+ */
+struct of_genpd_provider {
+	struct list_head link;
+	struct device_node *node;
+	genpd_xlate_t xlate;
+	void *data;
+};
+
+/* List of registered PM domain providers. */
+static LIST_HEAD(of_genpd_providers);
+/* Mutex to protect the list above. */
+static DEFINE_MUTEX(of_genpd_mutex);
+
+/**
+ * __of_genpd_xlate_simple() - Xlate function for direct node-domain mapping
+ * @genpdspec: OF phandle args to map into a PM domain
+ * @data: xlate function private data - pointer to struct generic_pm_domain
+ *
+ * This is a generic xlate function that can be used to model PM domains that
+ * have their own device tree nodes. The private data of xlate function needs
+ * to be a valid pointer to struct generic_pm_domain.
+ */
+struct generic_pm_domain *__of_genpd_xlate_simple(
+					struct of_phandle_args *genpdspec,
+					void *data)
+{
+	if (genpdspec->args_count != 0)
+		return ERR_PTR(-EINVAL);
+	return data;
+}
+EXPORT_SYMBOL_GPL(__of_genpd_xlate_simple);
+
+/**
+ * __of_genpd_xlate_onecell() - Xlate function using a single index.
+ * @genpdspec: OF phandle args to map into a PM domain
+ * @data: xlate function private data - pointer to struct genpd_onecell_data
+ *
+ * This is a generic xlate function that can be used to model simple PM domain
+ * controllers that have one device tree node and provide multiple PM domains.
+ * A single cell is used as an index into an array of PM domains specified in
+ * the genpd_onecell_data struct when registering the provider.
+ */
+struct generic_pm_domain *__of_genpd_xlate_onecell(
+					struct of_phandle_args *genpdspec,
+					void *data)
+{
+	struct genpd_onecell_data *genpd_data = data;
+	unsigned int idx = genpdspec->args[0];
+
+	if (genpdspec->args_count != 1)
+		return ERR_PTR(-EINVAL);
+
+	if (idx >= genpd_data->num_domains) {
+		pr_err("%s: invalid domain index %u\n", __func__, idx);
+		return ERR_PTR(-EINVAL);
+	}
+
+	if (!genpd_data->domains[idx])
+		return ERR_PTR(-ENOENT);
+
+	return genpd_data->domains[idx];
+}
+EXPORT_SYMBOL_GPL(__of_genpd_xlate_onecell);
+
+/**
+ * __of_genpd_add_provider() - Register a PM domain provider for a node
+ * @np: Device node pointer associated with the PM domain provider.
+ * @xlate: Callback for decoding PM domain from phandle arguments.
+ * @data: Context pointer for @xlate callback.
+ */
+int __of_genpd_add_provider(struct device_node *np, genpd_xlate_t xlate,
+			void *data)
+{
+	struct of_genpd_provider *cp;
+
+	cp = kzalloc(sizeof(*cp), GFP_KERNEL);
+	if (!cp)
+		return -ENOMEM;
+
+	cp->node = of_node_get(np);
+	cp->data = data;
+	cp->xlate = xlate;
+
+	mutex_lock(&of_genpd_mutex);
+	list_add(&cp->link, &of_genpd_providers);
+	mutex_unlock(&of_genpd_mutex);
+	pr_debug("Added domain provider from %s\n", np->full_name);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(__of_genpd_add_provider);
+
+/**
+ * of_genpd_del_provider() - Remove a previously registered PM domain provider
+ * @np: Device node pointer associated with the PM domain provider
+ */
+void of_genpd_del_provider(struct device_node *np)
+{
+	struct of_genpd_provider *cp;
+
+	mutex_lock(&of_genpd_mutex);
+	list_for_each_entry(cp, &of_genpd_providers, link) {
+		if (cp->node == np) {
+			list_del(&cp->link);
+			of_node_put(cp->node);
+			kfree(cp);
+			break;
+		}
+	}
+	mutex_unlock(&of_genpd_mutex);
+}
+EXPORT_SYMBOL_GPL(of_genpd_del_provider);
+
+/**
+ * of_genpd_get_from_provider() - Look-up PM domain
+ * @genpdspec: OF phandle args to use for look-up
+ *
+ * Looks for a PM domain provider under the node specified by @genpdspec and if
+ * found, uses xlate function of the provider to map phandle args to a PM
+ * domain.
+ *
+ * Returns a valid pointer to struct generic_pm_domain on success or ERR_PTR()
+ * on failure.
+ */
+static struct generic_pm_domain *of_genpd_get_from_provider(
+					struct of_phandle_args *genpdspec)
+{
+	struct generic_pm_domain *genpd = ERR_PTR(-ENOENT);
+	struct of_genpd_provider *provider;
+
+	mutex_lock(&of_genpd_mutex);
+
+	/* Check if we have such a provider in our array */
+	list_for_each_entry(provider, &of_genpd_providers, link) {
+		if (provider->node == genpdspec->np)
+			genpd = provider->xlate(genpdspec, provider->data);
+		if (!IS_ERR(genpd))
+			break;
+	}
+
+	mutex_unlock(&of_genpd_mutex);
+
+	return genpd;
+}
+
+/**
+ * genpd_dev_pm_detach - Detach a device from its PM domain.
+ * @dev: Device to attach.
+ * @power_off: Currently not used
+ *
+ * Try to locate a corresponding generic PM domain, which the device was
+ * attached to previously. If such is found, the device is detached from it.
+ */
+static void genpd_dev_pm_detach(struct device *dev, bool power_off)
+{
+	struct generic_pm_domain *pd = NULL, *gpd;
+	int ret = 0;
+
+	if (!dev->pm_domain)
+		return;
+
+	mutex_lock(&gpd_list_lock);
+	list_for_each_entry(gpd, &gpd_list, gpd_list_node) {
+		if (&gpd->domain == dev->pm_domain) {
+			pd = gpd;
+			break;
+		}
+	}
+	mutex_unlock(&gpd_list_lock);
+
+	if (!pd)
+		return;
+
+	dev_dbg(dev, "removing from PM domain %s\n", pd->name);
+
+	while (1) {
+		ret = pm_genpd_remove_device(pd, dev);
+		if (ret != -EAGAIN)
+			break;
+		cond_resched();
+	}
+
+	if (ret < 0) {
+		dev_err(dev, "failed to remove from PM domain %s: %d",
+			pd->name, ret);
+		return;
+	}
+
+	/* Check if PM domain can be powered off after removing this device. */
+	genpd_queue_power_off_work(pd);
+}
+
+/**
+ * genpd_dev_pm_attach - Attach a device to its PM domain using DT.
+ * @dev: Device to attach.
+ *
+ * Parse device's OF node to find a PM domain specifier. If such is found,
+ * attaches the device to retrieved pm_domain ops.
+ *
+ * Both generic and legacy Samsung-specific DT bindings are supported to keep
+ * backwards compatibility with existing DTBs.
+ *
+ * Returns 0 on successfully attached PM domain or negative error code.
+ */
+int genpd_dev_pm_attach(struct device *dev)
+{
+	struct of_phandle_args pd_args;
+	struct generic_pm_domain *pd;
+	int ret;
+
+	if (!dev->of_node)
+		return -ENODEV;
+
+	if (dev->pm_domain)
+		return -EEXIST;
+
+	ret = of_parse_phandle_with_args(dev->of_node, "power-domains",
+					"#power-domain-cells", 0, &pd_args);
+	if (ret < 0) {
+		if (ret != -ENOENT)
+			return ret;
+
+		/*
+		 * Try legacy Samsung-specific bindings
+		 * (for backwards compatibility of DT ABI)
+		 */
+		pd_args.args_count = 0;
+		pd_args.np = of_parse_phandle(dev->of_node,
+						"samsung,power-domain", 0);
+		if (!pd_args.np)
+			return -ENOENT;
+	}
+
+	pd = of_genpd_get_from_provider(&pd_args);
+	if (IS_ERR(pd)) {
+		dev_dbg(dev, "%s() failed to find PM domain: %ld\n",
+			__func__, PTR_ERR(pd));
+		of_node_put(dev->of_node);
+		return PTR_ERR(pd);
+	}
+
+	dev_dbg(dev, "adding to PM domain %s\n", pd->name);
+
+	while (1) {
+		ret = pm_genpd_add_device(pd, dev);
+		if (ret != -EAGAIN)
+			break;
+		cond_resched();
+	}
+
+	if (ret < 0) {
+		dev_err(dev, "failed to add to PM domain %s: %d",
+			pd->name, ret);
+		of_node_put(dev->of_node);
+		return ret;
+	}
+
+	dev->pm_domain->detach = genpd_dev_pm_detach;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(genpd_dev_pm_attach);
+#endif
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index aa03586c94a2..292079d8da6b 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -264,4 +264,56 @@ static inline void pm_genpd_syscore_poweroff(struct device *dev) {}
 static inline void pm_genpd_syscore_poweron(struct device *dev) {}
 #endif
 
+/* OF PM domain providers */
+struct of_device_id;
+
+struct genpd_onecell_data {
+	struct generic_pm_domain **domains;
+	unsigned int num_domains;
+};
+
+typedef struct generic_pm_domain *(*genpd_xlate_t)(struct of_phandle_args *args,
+						void *data);
+
+#ifdef CONFIG_PM_GENERIC_DOMAINS_OF
+int __of_genpd_add_provider(struct device_node *np, genpd_xlate_t xlate,
+			void *data);
+void of_genpd_del_provider(struct device_node *np);
+
+struct generic_pm_domain *__of_genpd_xlate_simple(
+					struct of_phandle_args *genpdspec,
+					void *data);
+struct generic_pm_domain *__of_genpd_xlate_onecell(
+					struct of_phandle_args *genpdspec,
+					void *data);
+
+int genpd_dev_pm_attach(struct device *dev);
+#else /* !CONFIG_PM_GENERIC_DOMAINS_OF */
+static inline int __of_genpd_add_provider(struct device_node *np,
+					genpd_xlate_t xlate, void *data)
+{
+	return 0;
+}
+static inline void of_genpd_del_provider(struct device_node *np) {}
+
+#define __of_genpd_xlate_simple		NULL
+#define __of_genpd_xlate_onecell	NULL
+
+static inline int genpd_dev_pm_attach(struct device *dev)
+{
+	return -ENODEV;
+}
+#endif /* CONFIG_PM_GENERIC_DOMAINS_OF */
+
+static inline int of_genpd_add_provider_simple(struct device_node *np,
+					struct generic_pm_domain *genpd)
+{
+	return __of_genpd_add_provider(np, __of_genpd_xlate_simple, genpd);
+}
+static inline int of_genpd_add_provider_onecell(struct device_node *np,
+					struct genpd_onecell_data *data)
+{
+	return __of_genpd_add_provider(np, __of_genpd_xlate_onecell, data);
+}
+
 #endif /* _LINUX_PM_DOMAIN_H */
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index e4e4121fa327..897619b11fb2 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -302,6 +302,10 @@ config PM_GENERIC_DOMAINS_RUNTIME
 	def_bool y
 	depends on PM_RUNTIME && PM_GENERIC_DOMAINS
 
+config PM_GENERIC_DOMAINS_OF
+	def_bool y
+	depends on PM_GENERIC_DOMAINS && OF && !ARCH_EXYNOS
+
 config CPU_PM
 	bool
 	depends on SUSPEND || CPU_IDLE
-- 
cgit v1.2.3


From 46420dd73b800f87a19af13af5883855cf38cb08 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Fri, 19 Sep 2014 20:27:37 +0200
Subject: PM / Domains: Add APIs to attach/detach a PM domain for a device

To maintain scalability let's add common methods to attach and detach
a PM domain for a device, dev_pm_domain_attach|detach().

Typically dev_pm_domain_attach() shall be invoked from subsystem level
code at the probe phase to try to attach a device to its PM domain.
The reversed actions may be done a the remove phase and then by
invoking dev_pm_domain_detach().

When attachment succeeds, the attach function should assign its
corresponding detach function to a new ->detach() callback added in the
struct dev_pm_domain.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Tested-by: Philipp Zabel <p.zabel@pengutronix.de>
Reviewed-by: Kevin Hilman <khilman@linaro.org>
Reviewed-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/common.c | 52 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/pm.h          | 11 ++++++++++
 2 files changed, 63 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/power/common.c b/drivers/base/power/common.c
index df2e5eeaeb05..b0f138806bbc 100644
--- a/drivers/base/power/common.c
+++ b/drivers/base/power/common.c
@@ -11,6 +11,8 @@
 #include <linux/export.h>
 #include <linux/slab.h>
 #include <linux/pm_clock.h>
+#include <linux/acpi.h>
+#include <linux/pm_domain.h>
 
 /**
  * dev_pm_get_subsys_data - Create or refcount power.subsys_data for device.
@@ -82,3 +84,53 @@ int dev_pm_put_subsys_data(struct device *dev)
 	return ret;
 }
 EXPORT_SYMBOL_GPL(dev_pm_put_subsys_data);
+
+/**
+ * dev_pm_domain_attach - Attach a device to its PM domain.
+ * @dev: Device to attach.
+ * @power_on: Used to indicate whether we should power on the device.
+ *
+ * The @dev may only be attached to a single PM domain. By iterating through
+ * the available alternatives we try to find a valid PM domain for the device.
+ * As attachment succeeds, the ->detach() callback in the struct dev_pm_domain
+ * should be assigned by the corresponding attach function.
+ *
+ * This function should typically be invoked from subsystem level code during
+ * the probe phase. Especially for those that holds devices which requires
+ * power management through PM domains.
+ *
+ * Callers must ensure proper synchronization of this function with power
+ * management callbacks.
+ *
+ * Returns 0 on successfully attached PM domain or negative error code.
+ */
+int dev_pm_domain_attach(struct device *dev, bool power_on)
+{
+	int ret;
+
+	ret = acpi_dev_pm_attach(dev, power_on);
+	if (ret)
+		ret = genpd_dev_pm_attach(dev);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dev_pm_domain_attach);
+
+/**
+ * dev_pm_domain_detach - Detach a device from its PM domain.
+ * @dev: Device to attach.
+ * @power_off: Used to indicate whether we should power off the device.
+ *
+ * This functions will reverse the actions from dev_pm_domain_attach() and thus
+ * try to detach the @dev from its PM domain. Typically it should be invoked
+ * from subsystem level code during the remove phase.
+ *
+ * Callers must ensure proper synchronization of this function with power
+ * management callbacks.
+ */
+void dev_pm_domain_detach(struct device *dev, bool power_off)
+{
+	if (dev->pm_domain && dev->pm_domain->detach)
+		dev->pm_domain->detach(dev, power_off);
+}
+EXPORT_SYMBOL_GPL(dev_pm_domain_detach);
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 1022ba1eb4de..c4cbf485a5d6 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -622,6 +622,17 @@ struct dev_pm_domain {
 	void (*detach)(struct device *dev, bool power_off);
 };
 
+#ifdef CONFIG_PM
+extern int dev_pm_domain_attach(struct device *dev, bool power_on);
+extern void dev_pm_domain_detach(struct device *dev, bool power_off);
+#else
+static inline int dev_pm_domain_attach(struct device *dev, bool power_on)
+{
+	return -ENODEV;
+}
+static inline void dev_pm_domain_detach(struct device *dev, bool power_off) {}
+#endif
+
 /*
  * The PM_EVENT_ messages are also used by drivers implementing the legacy
  * suspend framework, based on the ->suspend() and ->resume() callbacks common
-- 
cgit v1.2.3


From 91d66cd27f5fd8a3bca4621a3505c9067925478d Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Fri, 19 Sep 2014 20:27:44 +0200
Subject: ACPI / PM: Convert acpi_dev_pm_detach() into a static function

The ->detach() callback for the PM domain has now been fully adopted,
thus there no users left of the acpi_dev_pm_detach() API. This allow us
to convert it into a static function.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/device_pm.c | 69 ++++++++++++++++++++++++------------------------
 include/linux/acpi.h     |  2 --
 2 files changed, 34 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c
index eec5ed5be949..bea6896be122 100644
--- a/drivers/acpi/device_pm.c
+++ b/drivers/acpi/device_pm.c
@@ -1040,6 +1040,40 @@ static struct dev_pm_domain acpi_general_pm_domain = {
 	},
 };
 
+/**
+ * acpi_dev_pm_detach - Remove ACPI power management from the device.
+ * @dev: Device to take care of.
+ * @power_off: Whether or not to try to remove power from the device.
+ *
+ * Remove the device from the general ACPI PM domain and remove its wakeup
+ * notifier.  If @power_off is set, additionally remove power from the device if
+ * possible.
+ *
+ * Callers must ensure proper synchronization of this function with power
+ * management callbacks.
+ */
+static void acpi_dev_pm_detach(struct device *dev, bool power_off)
+{
+	struct acpi_device *adev = ACPI_COMPANION(dev);
+
+	if (adev && dev->pm_domain == &acpi_general_pm_domain) {
+		dev->pm_domain = NULL;
+		acpi_remove_pm_notifier(adev);
+		if (power_off) {
+			/*
+			 * If the device's PM QoS resume latency limit or flags
+			 * have been exposed to user space, they have to be
+			 * hidden at this point, so that they don't affect the
+			 * choice of the low-power state to put the device into.
+			 */
+			dev_pm_qos_hide_latency_limit(dev);
+			dev_pm_qos_hide_flags(dev);
+			acpi_device_wakeup(adev, ACPI_STATE_S0, false);
+			acpi_dev_pm_low_power(dev, adev, ACPI_STATE_S0);
+		}
+	}
+}
+
 /**
  * acpi_dev_pm_attach - Prepare device for ACPI power management.
  * @dev: Device to prepare.
@@ -1077,39 +1111,4 @@ int acpi_dev_pm_attach(struct device *dev, bool power_on)
 	return 0;
 }
 EXPORT_SYMBOL_GPL(acpi_dev_pm_attach);
-
-/**
- * acpi_dev_pm_detach - Remove ACPI power management from the device.
- * @dev: Device to take care of.
- * @power_off: Whether or not to try to remove power from the device.
- *
- * Remove the device from the general ACPI PM domain and remove its wakeup
- * notifier.  If @power_off is set, additionally remove power from the device if
- * possible.
- *
- * Callers must ensure proper synchronization of this function with power
- * management callbacks.
- */
-void acpi_dev_pm_detach(struct device *dev, bool power_off)
-{
-	struct acpi_device *adev = ACPI_COMPANION(dev);
-
-	if (adev && dev->pm_domain == &acpi_general_pm_domain) {
-		dev->pm_domain = NULL;
-		acpi_remove_pm_notifier(adev);
-		if (power_off) {
-			/*
-			 * If the device's PM QoS resume latency limit or flags
-			 * have been exposed to user space, they have to be
-			 * hidden at this point, so that they don't affect the
-			 * choice of the low-power state to put the device into.
-			 */
-			dev_pm_qos_hide_latency_limit(dev);
-			dev_pm_qos_hide_flags(dev);
-			acpi_device_wakeup(adev, ACPI_STATE_S0, false);
-			acpi_dev_pm_low_power(dev, adev, ACPI_STATE_S0);
-		}
-	}
-}
-EXPORT_SYMBOL_GPL(acpi_dev_pm_detach);
 #endif /* CONFIG_PM */
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 807cbc46d73e..b7926bb9b444 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -587,7 +587,6 @@ static inline int acpi_subsys_freeze(struct device *dev) { return 0; }
 #if defined(CONFIG_ACPI) && defined(CONFIG_PM)
 struct acpi_device *acpi_dev_pm_get_node(struct device *dev);
 int acpi_dev_pm_attach(struct device *dev, bool power_on);
-void acpi_dev_pm_detach(struct device *dev, bool power_off);
 #else
 static inline struct acpi_device *acpi_dev_pm_get_node(struct device *dev)
 {
@@ -597,7 +596,6 @@ static inline int acpi_dev_pm_attach(struct device *dev, bool power_on)
 {
 	return -ENODEV;
 }
-static inline void acpi_dev_pm_detach(struct device *dev, bool power_off) {}
 #endif
 
 #ifdef CONFIG_ACPI
-- 
cgit v1.2.3


From bf57229745f849e500ba69ff91e35bc8160a7373 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 13 Sep 2014 16:40:08 -0700
Subject: blk-mq: remove REQ_END

Pass an explicit parameter for the last request in a batch to ->queue_rq
instead of using a request flag.  Besides being a cleaner and non-stateful
interface this is also required for the next patch, which fixes the blk-mq
I/O submission code to not start a time too early.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c                    | 22 +++++-----------------
 drivers/block/mtip32xx/mtip32xx.c |  3 ++-
 drivers/block/null_blk.c          |  3 ++-
 drivers/block/virtio_blk.c        |  4 ++--
 drivers/scsi/scsi_lib.c           |  3 ++-
 include/linux/blk-mq.h            |  2 +-
 include/linux/blk_types.h         |  2 --
 7 files changed, 14 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index e743d28620b2..32b4797f4186 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -384,7 +384,7 @@ void blk_mq_complete_request(struct request *rq)
 }
 EXPORT_SYMBOL(blk_mq_complete_request);
 
-static void blk_mq_start_request(struct request *rq, bool last)
+static void blk_mq_start_request(struct request *rq)
 {
 	struct request_queue *q = rq->q;
 
@@ -421,16 +421,6 @@ static void blk_mq_start_request(struct request *rq, bool last)
 		 */
 		rq->nr_phys_segments++;
 	}
-
-	/*
-	 * Flag the last request in the series so that drivers know when IO
-	 * should be kicked off, if they don't do it on a per-request basis.
-	 *
-	 * Note: the flag isn't the only condition drivers should do kick off.
-	 * If drive is busy, the last request might not have the bit set.
-	 */
-	if (last)
-		rq->cmd_flags |= REQ_END;
 }
 
 static void __blk_mq_requeue_request(struct request *rq)
@@ -440,8 +430,6 @@ static void __blk_mq_requeue_request(struct request *rq)
 	trace_block_rq_requeue(q, rq);
 	clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
 
-	rq->cmd_flags &= ~REQ_END;
-
 	if (q->dma_drain_size && blk_rq_bytes(rq))
 		rq->nr_phys_segments--;
 }
@@ -755,9 +743,9 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
 		rq = list_first_entry(&rq_list, struct request, queuelist);
 		list_del_init(&rq->queuelist);
 
-		blk_mq_start_request(rq, list_empty(&rq_list));
+		blk_mq_start_request(rq);
 
-		ret = q->mq_ops->queue_rq(hctx, rq);
+		ret = q->mq_ops->queue_rq(hctx, rq, list_empty(&rq_list));
 		switch (ret) {
 		case BLK_MQ_RQ_QUEUE_OK:
 			queued++;
@@ -1198,14 +1186,14 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
 		int ret;
 
 		blk_mq_bio_to_request(rq, bio);
-		blk_mq_start_request(rq, true);
+		blk_mq_start_request(rq);
 
 		/*
 		 * For OK queue, we are done. For error, kill it. Any other
 		 * error (busy), just add it to our list as we previously
 		 * would have done
 		 */
-		ret = q->mq_ops->queue_rq(data.hctx, rq);
+		ret = q->mq_ops->queue_rq(data.hctx, rq, true);
 		if (ret == BLK_MQ_RQ_QUEUE_OK)
 			goto done;
 		else {
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 5c8e7fe07745..0e2084f37c67 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -3775,7 +3775,8 @@ static bool mtip_check_unal_depth(struct blk_mq_hw_ctx *hctx,
 	return false;
 }
 
-static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq)
+static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq,
+		bool last)
 {
 	int ret;
 
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index 00d469c7f9f7..c5b7315c2c13 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -313,7 +313,8 @@ static void null_request_fn(struct request_queue *q)
 	}
 }
 
-static int null_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq)
+static int null_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq,
+		bool last)
 {
 	struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq);
 
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 0a581400de0f..13756e016797 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -164,14 +164,14 @@ static void virtblk_done(struct virtqueue *vq)
 	spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
 }
 
-static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req)
+static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req,
+		bool last)
 {
 	struct virtio_blk *vblk = hctx->queue->queuedata;
 	struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
 	unsigned long flags;
 	unsigned int num;
 	int qid = hctx->queue_num;
-	const bool last = (req->cmd_flags & REQ_END) != 0;
 	int err;
 	bool notify = false;
 
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 1f2bae475cb7..f1df41168391 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1855,7 +1855,8 @@ static void scsi_mq_done(struct scsi_cmnd *cmd)
 	blk_mq_complete_request(cmd->request);
 }
 
-static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req)
+static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req,
+		bool last)
 {
 	struct request_queue *q = req->q;
 	struct scsi_device *sdev = q->queuedata;
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index a1e31f274fcd..9c4e306a9217 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -77,7 +77,7 @@ struct blk_mq_tag_set {
 	struct list_head	tag_list;
 };
 
-typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, struct request *);
+typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, struct request *, bool);
 typedef struct blk_mq_hw_ctx *(map_queue_fn)(struct request_queue *, const int);
 typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int);
 typedef void (exit_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int);
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 66c2167f04a9..bb7d66460e7a 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -188,7 +188,6 @@ enum rq_flag_bits {
 	__REQ_MIXED_MERGE,	/* merge of different types, fail separately */
 	__REQ_KERNEL, 		/* direct IO to kernel pages */
 	__REQ_PM,		/* runtime pm request */
-	__REQ_END,		/* last of chain of requests */
 	__REQ_HASHED,		/* on IO scheduler merge hash */
 	__REQ_MQ_INFLIGHT,	/* track inflight for MQ */
 	__REQ_NR_BITS,		/* stops here */
@@ -242,7 +241,6 @@ enum rq_flag_bits {
 #define REQ_SECURE		(1ULL << __REQ_SECURE)
 #define REQ_KERNEL		(1ULL << __REQ_KERNEL)
 #define REQ_PM			(1ULL << __REQ_PM)
-#define REQ_END			(1ULL << __REQ_END)
 #define REQ_HASHED		(1ULL << __REQ_HASHED)
 #define REQ_MQ_INFLIGHT		(1ULL << __REQ_MQ_INFLIGHT)
 
-- 
cgit v1.2.3


From e2490073cd7c3d6f6ef6e029a208edd4d38efac4 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 13 Sep 2014 16:40:09 -0700
Subject: blk-mq: call blk_mq_start_request from ->queue_rq

When we call blk_mq_start_request from the core blk-mq code before calling into
->queue_rq there is a racy window where the timeout handler can hit before we've
fully set up the driver specific part of the command.

Move the call to blk_mq_start_request into the driver so the driver can start
the request only once it is fully set up.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c                    | 13 ++++++-------
 drivers/block/mtip32xx/mtip32xx.c |  2 ++
 drivers/block/null_blk.c          |  2 ++
 drivers/block/virtio_blk.c        |  2 ++
 drivers/scsi/scsi_lib.c           |  1 +
 include/linux/blk-mq.h            |  1 +
 6 files changed, 14 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 32b4797f4186..141f2e06803a 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -384,7 +384,7 @@ void blk_mq_complete_request(struct request *rq)
 }
 EXPORT_SYMBOL(blk_mq_complete_request);
 
-static void blk_mq_start_request(struct request *rq)
+void blk_mq_start_request(struct request *rq)
 {
 	struct request_queue *q = rq->q;
 
@@ -422,16 +422,18 @@ static void blk_mq_start_request(struct request *rq)
 		rq->nr_phys_segments++;
 	}
 }
+EXPORT_SYMBOL(blk_mq_start_request);
 
 static void __blk_mq_requeue_request(struct request *rq)
 {
 	struct request_queue *q = rq->q;
 
 	trace_block_rq_requeue(q, rq);
-	clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
 
-	if (q->dma_drain_size && blk_rq_bytes(rq))
-		rq->nr_phys_segments--;
+	if (test_and_clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) {
+		if (q->dma_drain_size && blk_rq_bytes(rq))
+			rq->nr_phys_segments--;
+	}
 }
 
 void blk_mq_requeue_request(struct request *rq)
@@ -743,8 +745,6 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
 		rq = list_first_entry(&rq_list, struct request, queuelist);
 		list_del_init(&rq->queuelist);
 
-		blk_mq_start_request(rq);
-
 		ret = q->mq_ops->queue_rq(hctx, rq, list_empty(&rq_list));
 		switch (ret) {
 		case BLK_MQ_RQ_QUEUE_OK:
@@ -1186,7 +1186,6 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
 		int ret;
 
 		blk_mq_bio_to_request(rq, bio);
-		blk_mq_start_request(rq);
 
 		/*
 		 * For OK queue, we are done. For error, kill it. Any other
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 0e2084f37c67..4042440a0470 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -3783,6 +3783,8 @@ static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq,
 	if (unlikely(mtip_check_unal_depth(hctx, rq)))
 		return BLK_MQ_RQ_QUEUE_BUSY;
 
+	blk_mq_start_request(rq);
+
 	ret = mtip_submit_request(hctx, rq);
 	if (likely(!ret))
 		return BLK_MQ_RQ_QUEUE_OK;
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index c5b7315c2c13..332ce20d45da 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -321,6 +321,8 @@ static int null_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq,
 	cmd->rq = rq;
 	cmd->nq = hctx->driver_data;
 
+	blk_mq_start_request(rq);
+
 	null_handle_cmd(cmd);
 	return BLK_MQ_RQ_QUEUE_OK;
 }
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 13756e016797..83816bf6882f 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -205,6 +205,8 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req,
 		}
 	}
 
+	blk_mq_start_request(req);
+
 	num = blk_rq_map_sg(hctx->queue, vbr->req, vbr->sg);
 	if (num) {
 		if (rq_data_dir(vbr->req) == WRITE)
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index f1df41168391..2dcd9078de48 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1890,6 +1890,7 @@ static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req,
 	scsi_init_cmd_errh(cmd);
 	cmd->scsi_done = scsi_mq_done;
 
+	blk_mq_start_request(req);
 	reason = scsi_dispatch_cmd(cmd);
 	if (reason) {
 		scsi_set_blocked(cmd, reason);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 9c4e306a9217..878b6f71da48 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -159,6 +159,7 @@ struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag);
 struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_index);
 struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *, unsigned int, int);
 
+void blk_mq_start_request(struct request *rq);
 void blk_mq_end_io(struct request *rq, int error);
 void __blk_mq_end_io(struct request *rq, int error);
 
-- 
cgit v1.2.3


From c8a446ad695ada43a885ec12b38411dbd190a11b Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 13 Sep 2014 16:40:10 -0700
Subject: blk-mq: rename blk_mq_end_io to blk_mq_end_request

Now that we've changed the driver API on the submission side use the
opportunity to fix up the name on the completion side to fit into the
general scheme.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-flush.c                 |  4 ++--
 block/blk-mq.c                    | 16 ++++++++--------
 drivers/block/mtip32xx/mtip32xx.c |  4 ++--
 drivers/block/null_blk.c          |  2 +-
 drivers/block/virtio_blk.c        |  2 +-
 drivers/scsi/scsi_lib.c           |  4 ++--
 include/linux/blk-mq.h            |  4 ++--
 7 files changed, 18 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-flush.c b/block/blk-flush.c
index 3cb5e9e7108a..698e6926388c 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -202,7 +202,7 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq,
 		list_del_init(&rq->flush.list);
 		blk_flush_restore_request(rq);
 		if (q->mq_ops)
-			blk_mq_end_io(rq, error);
+			blk_mq_end_request(rq, error);
 		else
 			__blk_end_request_all(rq, error);
 		break;
@@ -378,7 +378,7 @@ void blk_insert_flush(struct request *rq)
 	 */
 	if (!policy) {
 		if (q->mq_ops)
-			blk_mq_end_io(rq, 0);
+			blk_mq_end_request(rq, 0);
 		else
 			__blk_end_bidi_request(rq, 0, 0, 0);
 		return;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 141f2e06803a..1713686f5c2f 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -300,7 +300,7 @@ void blk_mq_clone_flush_request(struct request *flush_rq,
 		hctx->cmd_size);
 }
 
-inline void __blk_mq_end_io(struct request *rq, int error)
+inline void __blk_mq_end_request(struct request *rq, int error)
 {
 	blk_account_io_done(rq);
 
@@ -312,15 +312,15 @@ inline void __blk_mq_end_io(struct request *rq, int error)
 		blk_mq_free_request(rq);
 	}
 }
-EXPORT_SYMBOL(__blk_mq_end_io);
+EXPORT_SYMBOL(__blk_mq_end_request);
 
-void blk_mq_end_io(struct request *rq, int error)
+void blk_mq_end_request(struct request *rq, int error)
 {
 	if (blk_update_request(rq, error, blk_rq_bytes(rq)))
 		BUG();
-	__blk_mq_end_io(rq, error);
+	__blk_mq_end_request(rq, error);
 }
-EXPORT_SYMBOL(blk_mq_end_io);
+EXPORT_SYMBOL(blk_mq_end_request);
 
 static void __blk_mq_complete_request_remote(void *data)
 {
@@ -360,7 +360,7 @@ void __blk_mq_complete_request(struct request *rq)
 	struct request_queue *q = rq->q;
 
 	if (!q->softirq_done_fn)
-		blk_mq_end_io(rq, rq->errors);
+		blk_mq_end_request(rq, rq->errors);
 	else
 		blk_mq_ipi_complete_request(rq);
 }
@@ -758,7 +758,7 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
 			pr_err("blk-mq: bad return on queue: %d\n", ret);
 		case BLK_MQ_RQ_QUEUE_ERROR:
 			rq->errors = -EIO;
-			blk_mq_end_io(rq, rq->errors);
+			blk_mq_end_request(rq, rq->errors);
 			break;
 		}
 
@@ -1200,7 +1200,7 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
 
 			if (ret == BLK_MQ_RQ_QUEUE_ERROR) {
 				rq->errors = -EIO;
-				blk_mq_end_io(rq, rq->errors);
+				blk_mq_end_request(rq, rq->errors);
 				goto done;
 			}
 		}
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 4042440a0470..6b7e8d0fba99 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -247,7 +247,7 @@ static void mtip_async_complete(struct mtip_port *port,
 	if (unlikely(cmd->unaligned))
 		up(&port->cmd_slot_unal);
 
-	blk_mq_end_io(rq, status ? -EIO : 0);
+	blk_mq_end_request(rq, status ? -EIO : 0);
 }
 
 /*
@@ -3739,7 +3739,7 @@ static int mtip_submit_request(struct blk_mq_hw_ctx *hctx, struct request *rq)
 		int err;
 
 		err = mtip_send_trim(dd, blk_rq_pos(rq), blk_rq_sectors(rq));
-		blk_mq_end_io(rq, err);
+		blk_mq_end_request(rq, err);
 		return 0;
 	}
 
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index 332ce20d45da..ac50a2931044 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -177,7 +177,7 @@ static void end_cmd(struct nullb_cmd *cmd)
 {
 	switch (queue_mode)  {
 	case NULL_Q_MQ:
-		blk_mq_end_io(cmd->rq, 0);
+		blk_mq_end_request(cmd->rq, 0);
 		return;
 	case NULL_Q_RQ:
 		INIT_LIST_HEAD(&cmd->rq->queuelist);
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 83816bf6882f..f751fc392ba9 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -135,7 +135,7 @@ static inline void virtblk_request_done(struct request *req)
 		req->errors = (error != 0);
 	}
 
-	blk_mq_end_io(req, error);
+	blk_mq_end_request(req, error);
 }
 
 static void virtblk_done(struct virtqueue *vq)
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 2dcd9078de48..73ce7d27f5c8 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -713,7 +713,7 @@ static bool scsi_end_request(struct request *req, int error,
 
 	if (req->mq_ctx) {
 		/*
-		 * In the MQ case the command gets freed by __blk_mq_end_io,
+		 * In the MQ case the command gets freed by __blk_mq_end_request,
 		 * so we have to do all cleanup that depends on it earlier.
 		 *
 		 * We also can't kick the queues from irq context, so we
@@ -721,7 +721,7 @@ static bool scsi_end_request(struct request *req, int error,
 		 */
 		scsi_mq_uninit_cmd(cmd);
 
-		__blk_mq_end_io(req, error);
+		__blk_mq_end_request(req, error);
 
 		if (scsi_target(sdev)->single_lun ||
 		    !list_empty(&sdev->host->starved_list))
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 878b6f71da48..cb217c16990d 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -160,8 +160,8 @@ struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_ind
 struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *, unsigned int, int);
 
 void blk_mq_start_request(struct request *rq);
-void blk_mq_end_io(struct request *rq, int error);
-void __blk_mq_end_io(struct request *rq, int error);
+void blk_mq_end_request(struct request *rq, int error);
+void __blk_mq_end_request(struct request *rq, int error);
 
 void blk_mq_requeue_request(struct request *rq);
 void blk_mq_add_to_requeue_list(struct request *rq, bool at_head);
-- 
cgit v1.2.3


From 81481eb423c295c5480a3fab9bb961cf286c91e7 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 13 Sep 2014 16:40:11 -0700
Subject: blk-mq: fix and simplify tag iteration for the timeout handler

Don't do a kmalloc from timer to handle timeouts, chances are we could be
under heavy load or similar and thus just miss out on the timeouts.
Fortunately it is very easy to just iterate over all in use tags, and doing
this properly actually cleans up the blk_mq_busy_iter API as well, and
prepares us for the next patch by passing a reserved argument to the
iterator.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq-tag.c      | 44 +++++++++++--------------
 block/blk-mq.c          | 85 +++++++++++++++----------------------------------
 include/linux/blk-mq.h  |  6 +++-
 include/scsi/scsi_tcq.h |  2 +-
 4 files changed, 49 insertions(+), 88 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index c1b92426c95e..b08788086414 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -392,45 +392,37 @@ void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, unsigned int tag,
 		__blk_mq_put_reserved_tag(tags, tag);
 }
 
-static void bt_for_each_free(struct blk_mq_bitmap_tags *bt,
-			     unsigned long *free_map, unsigned int off)
+static void bt_for_each(struct blk_mq_hw_ctx *hctx,
+		struct blk_mq_bitmap_tags *bt, unsigned int off,
+		busy_iter_fn *fn, void *data, bool reserved)
 {
-	int i;
+	struct request *rq;
+	int bit, i;
 
 	for (i = 0; i < bt->map_nr; i++) {
 		struct blk_align_bitmap *bm = &bt->map[i];
-		int bit = 0;
-
-		do {
-			bit = find_next_zero_bit(&bm->word, bm->depth, bit);
-			if (bit >= bm->depth)
-				break;
 
-			__set_bit(bit + off, free_map);
-			bit++;
-		} while (1);
+		for (bit = find_first_bit(&bm->word, bm->depth);
+		     bit < bm->depth;
+		     bit = find_next_bit(&bm->word, bm->depth, bit + 1)) {
+		     	rq = blk_mq_tag_to_rq(hctx->tags, off + bit);
+			if (rq->q == hctx->queue)
+				fn(hctx, rq, data, reserved);
+		}
 
 		off += (1 << bt->bits_per_word);
 	}
 }
 
-void blk_mq_tag_busy_iter(struct blk_mq_tags *tags,
-			  void (*fn)(void *, unsigned long *), void *data)
+void blk_mq_tag_busy_iter(struct blk_mq_hw_ctx *hctx, busy_iter_fn *fn,
+		void *priv)
 {
-	unsigned long *tag_map;
-	size_t map_size;
-
-	map_size = ALIGN(tags->nr_tags, BITS_PER_LONG) / BITS_PER_LONG;
-	tag_map = kzalloc(map_size * sizeof(unsigned long), GFP_ATOMIC);
-	if (!tag_map)
-		return;
+	struct blk_mq_tags *tags = hctx->tags;
 
-	bt_for_each_free(&tags->bitmap_tags, tag_map, tags->nr_reserved_tags);
 	if (tags->nr_reserved_tags)
-		bt_for_each_free(&tags->breserved_tags, tag_map, 0);
-
-	fn(data, tag_map);
-	kfree(tag_map);
+		bt_for_each(hctx, &tags->breserved_tags, 0, fn, priv, true);
+	bt_for_each(hctx, &tags->bitmap_tags, tags->nr_reserved_tags, fn, priv,
+			false);
 }
 EXPORT_SYMBOL(blk_mq_tag_busy_iter);
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 1713686f5c2f..3baebcaf36db 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -525,58 +525,6 @@ struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
 }
 EXPORT_SYMBOL(blk_mq_tag_to_rq);
 
-struct blk_mq_timeout_data {
-	struct blk_mq_hw_ctx *hctx;
-	unsigned long *next;
-	unsigned int *next_set;
-};
-
-static void blk_mq_timeout_check(void *__data, unsigned long *free_tags)
-{
-	struct blk_mq_timeout_data *data = __data;
-	struct blk_mq_hw_ctx *hctx = data->hctx;
-	unsigned int tag;
-
-	 /* It may not be in flight yet (this is where
-	 * the REQ_ATOMIC_STARTED flag comes in). The requests are
-	 * statically allocated, so we know it's always safe to access the
-	 * memory associated with a bit offset into ->rqs[].
-	 */
-	tag = 0;
-	do {
-		struct request *rq;
-
-		tag = find_next_zero_bit(free_tags, hctx->tags->nr_tags, tag);
-		if (tag >= hctx->tags->nr_tags)
-			break;
-
-		rq = blk_mq_tag_to_rq(hctx->tags, tag++);
-		if (rq->q != hctx->queue)
-			continue;
-		if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags))
-			continue;
-
-		blk_rq_check_expired(rq, data->next, data->next_set);
-	} while (1);
-}
-
-static void blk_mq_hw_ctx_check_timeout(struct blk_mq_hw_ctx *hctx,
-					unsigned long *next,
-					unsigned int *next_set)
-{
-	struct blk_mq_timeout_data data = {
-		.hctx		= hctx,
-		.next		= next,
-		.next_set	= next_set,
-	};
-
-	/*
-	 * Ask the tagging code to iterate busy requests, so we can
-	 * check them for timeout.
-	 */
-	blk_mq_tag_busy_iter(hctx->tags, blk_mq_timeout_check, &data);
-}
-
 static enum blk_eh_timer_return blk_mq_rq_timed_out(struct request *rq)
 {
 	struct request_queue *q = rq->q;
@@ -598,13 +546,30 @@ static enum blk_eh_timer_return blk_mq_rq_timed_out(struct request *rq)
 
 	return q->mq_ops->timeout(rq);
 }
+		
+struct blk_mq_timeout_data {
+	unsigned long next;
+	unsigned int next_set;
+};
+
+static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
+		struct request *rq, void *priv, bool reserved)
+{
+	struct blk_mq_timeout_data *data = priv;
+
+	if (test_bit(REQ_ATOM_STARTED, &rq->atomic_flags))
+		blk_rq_check_expired(rq, &data->next, &data->next_set);
+}
 
-static void blk_mq_rq_timer(unsigned long data)
+static void blk_mq_rq_timer(unsigned long priv)
 {
-	struct request_queue *q = (struct request_queue *) data;
+	struct request_queue *q = (struct request_queue *)priv;
+	struct blk_mq_timeout_data data = {
+		.next		= 0,
+		.next_set	= 0,
+	};
 	struct blk_mq_hw_ctx *hctx;
-	unsigned long next = 0;
-	int i, next_set = 0;
+	int i;
 
 	queue_for_each_hw_ctx(q, hctx, i) {
 		/*
@@ -614,12 +579,12 @@ static void blk_mq_rq_timer(unsigned long data)
 		if (!hctx->nr_ctx || !hctx->tags)
 			continue;
 
-		blk_mq_hw_ctx_check_timeout(hctx, &next, &next_set);
+		blk_mq_tag_busy_iter(hctx, blk_mq_check_expired, &data);
 	}
 
-	if (next_set) {
-		next = blk_rq_timeout(round_jiffies_up(next));
-		mod_timer(&q->timeout, next);
+	if (data.next_set) {
+		data.next = blk_rq_timeout(round_jiffies_up(data.next));
+		mod_timer(&q->timeout, data.next);
 	} else {
 		queue_for_each_hw_ctx(q, hctx, i)
 			blk_mq_tag_idle(hctx);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index cb217c16990d..0eb0f642be4b 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -86,6 +86,9 @@ typedef int (init_request_fn)(void *, struct request *, unsigned int,
 typedef void (exit_request_fn)(void *, struct request *, unsigned int,
 		unsigned int);
 
+typedef void (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *,
+		bool);
+
 struct blk_mq_ops {
 	/*
 	 * Queue request
@@ -174,7 +177,8 @@ void blk_mq_stop_hw_queues(struct request_queue *q);
 void blk_mq_start_hw_queues(struct request_queue *q);
 void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async);
 void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
-void blk_mq_tag_busy_iter(struct blk_mq_tags *tags, void (*fn)(void *data, unsigned long *), void *data);
+void blk_mq_tag_busy_iter(struct blk_mq_hw_ctx *hctx, busy_iter_fn *fn,
+		void *priv);
 
 /*
  * Driver command data is immediately after the request. So subtract request
diff --git a/include/scsi/scsi_tcq.h b/include/scsi/scsi_tcq.h
index cdcc90b07ecb..e64583560701 100644
--- a/include/scsi/scsi_tcq.h
+++ b/include/scsi/scsi_tcq.h
@@ -68,7 +68,7 @@ static inline void scsi_activate_tcq(struct scsi_device *sdev, int depth)
 		return;
 
 	if (!shost_use_blk_mq(sdev->host) &&
-	    blk_queue_tagged(sdev->request_queue))
+	    !blk_queue_tagged(sdev->request_queue))
 		blk_queue_init_tags(sdev->request_queue, depth,
 				    sdev->host->bqt);
 
-- 
cgit v1.2.3


From 0152fb6b57c4fae769ee75ea2ae670f4ff39fba9 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 13 Sep 2014 16:40:13 -0700
Subject: blk-mq: pass a reserved argument to the timeout handler

Allow blk-mq to pass an argument to the timeout handler to indicate
if we're timing out a reserved or regular command.  For many drivers
those need to be handled different.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c          |  6 +++---
 drivers/scsi/scsi_lib.c | 10 +++++++++-
 include/linux/blk-mq.h  |  3 ++-
 3 files changed, 14 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 298d6e360661..d12f1983d493 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -530,7 +530,7 @@ struct blk_mq_timeout_data {
 	unsigned int next_set;
 };
 
-static void blk_mq_rq_timed_out(struct request *req)
+static void blk_mq_rq_timed_out(struct request *req, bool reserved)
 {
 	struct blk_mq_ops *ops = req->q->mq_ops;
 	enum blk_eh_timer_return ret = BLK_EH_RESET_TIMER;
@@ -548,7 +548,7 @@ static void blk_mq_rq_timed_out(struct request *req)
 		return;
 
 	if (ops->timeout)
-		ret = ops->timeout(req);
+		ret = ops->timeout(req, reserved);
 
 	switch (ret) {
 	case BLK_EH_HANDLED:
@@ -576,7 +576,7 @@ static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
 
 	if (time_after_eq(jiffies, rq->deadline)) {
 		if (!blk_mark_rq_complete(rq))
-			blk_mq_rq_timed_out(rq);
+			blk_mq_rq_timed_out(rq, reserved);
 	} else if (!data->next_set || time_after(data->next, rq->deadline)) {
 		data->next = rq->deadline;
 		data->next_set = 1;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 73ce7d27f5c8..86b1156edb82 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1932,6 +1932,14 @@ out:
 	return ret;
 }
 
+static enum blk_eh_timer_return scsi_timeout(struct request *req,
+		bool reserved)
+{
+	if (reserved)
+		return BLK_EH_RESET_TIMER;
+	return scsi_times_out(req);
+}
+
 static int scsi_init_request(void *data, struct request *rq,
 		unsigned int hctx_idx, unsigned int request_idx,
 		unsigned int numa_node)
@@ -2043,7 +2051,7 @@ static struct blk_mq_ops scsi_mq_ops = {
 	.map_queue	= blk_mq_map_queue,
 	.queue_rq	= scsi_queue_rq,
 	.complete	= scsi_softirq_done,
-	.timeout	= scsi_times_out,
+	.timeout	= scsi_timeout,
 	.init_request	= scsi_init_request,
 	.exit_request	= scsi_exit_request,
 };
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 0eb0f642be4b..325349559fb0 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -79,6 +79,7 @@ struct blk_mq_tag_set {
 
 typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, struct request *, bool);
 typedef struct blk_mq_hw_ctx *(map_queue_fn)(struct request_queue *, const int);
+typedef enum blk_eh_timer_return (timeout_fn)(struct request *, bool);
 typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int);
 typedef void (exit_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int);
 typedef int (init_request_fn)(void *, struct request *, unsigned int,
@@ -103,7 +104,7 @@ struct blk_mq_ops {
 	/*
 	 * Called on request timeout
 	 */
-	rq_timed_out_fn		*timeout;
+	timeout_fn		*timeout;
 
 	softirq_done_fn		*complete;
 
-- 
cgit v1.2.3


From 57a94e24bc927f642f7f48ca1bf5476aa5be269d Mon Sep 17 00:00:00 2001
From: Boris BREZILLON <boris.brezillon@free-electrons.com>
Date: Mon, 22 Sep 2014 20:11:50 +0200
Subject: mtd: nand: support ONFI timing mode retrieval for non-ONFI NANDs

Add an onfi_timing_mode_default field to nand_chip and nand_flash_dev in
order to support NAND timings definition for non-ONFI NAND.

NAND that support better timings mode than the default one have to define
a new entry in the nand_ids table.

The default timing mode should be deduced from timings description from
the datasheet and the ONFI specification
(www.onfi.org/~/media/ONFI/specs/onfi_3_1_spec.pdf, chapter 4.15
"Timing Parameters").
You should choose the closest mode that fit the timings requirements of
your NAND chip.

Signed-off-by: Boris BREZILLON <boris.brezillon@free-electrons.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/nand/nand_base.c |  2 ++
 include/linux/mtd/nand.h     | 11 +++++++++++
 2 files changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 801cad1de9eb..5b5c62712814 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -3594,6 +3594,8 @@ static bool find_full_id_nand(struct mtd_info *mtd, struct nand_chip *chip,
 		chip->options |= type->options;
 		chip->ecc_strength_ds = NAND_ECC_STRENGTH(type);
 		chip->ecc_step_ds = NAND_ECC_STEP(type);
+		chip->onfi_timing_mode_default =
+					type->onfi_timing_mode_default;
 
 		*busw = type->options & NAND_BUSWIDTH_16;
 
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 8acb307b6fde..e4d451e4600b 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -587,6 +587,11 @@ struct nand_buffers {
  * @ecc_step_ds:	[INTERN] ECC step required by the @ecc_strength_ds,
  *                      also from the datasheet. It is the recommended ECC step
  *			size, if known; if unknown, set to zero.
+ * @onfi_timing_mode_default: [INTERN] default ONFI timing mode. This field is
+ *			      either deduced from the datasheet if the NAND
+ *			      chip is not ONFI compliant or set to 0 if it is
+ *			      (an ONFI chip is always configured in mode 0
+ *			      after a NAND reset)
  * @numchips:		[INTERN] number of physical chips
  * @chipsize:		[INTERN] the size of one chip for multichip arrays
  * @pagemask:		[INTERN] page number mask = number of (pages / chip) - 1
@@ -671,6 +676,7 @@ struct nand_chip {
 	uint8_t bits_per_cell;
 	uint16_t ecc_strength_ds;
 	uint16_t ecc_step_ds;
+	int onfi_timing_mode_default;
 	int badblockpos;
 	int badblockbits;
 
@@ -773,6 +779,10 @@ struct nand_chip {
  *               @ecc_step_ds in nand_chip{}, also from the datasheet.
  *               For example, the "4bit ECC for each 512Byte" can be set with
  *               NAND_ECC_INFO(4, 512).
+ * @onfi_timing_mode_default: the default ONFI timing mode entered after a NAND
+ *			      reset. Should be deduced from timings described
+ *			      in the datasheet.
+ *
  */
 struct nand_flash_dev {
 	char *name;
@@ -793,6 +803,7 @@ struct nand_flash_dev {
 		uint16_t strength_ds;
 		uint16_t step_ds;
 	} ecc;
+	int onfi_timing_mode_default;
 };
 
 /**
-- 
cgit v1.2.3


From 93af53b8633c4cb474585158512182b21219d743 Mon Sep 17 00:00:00 2001
From: Ezequiel García <ezequiel@vanguardiasur.com.ar>
Date: Sat, 20 Sep 2014 17:53:12 +0100
Subject: nand: omap2: Remove horrible ifdefs to fix module probe

The current code abuses ifdefs to determine if the selected ECC scheme
is supported by the running kernel. As a result the code is hard to read,
and it also fails to load as a module.

This commit removes all the ifdefs and instead introduces a function
omap2_nand_ecc_check() to check if the ECC is supported by using
IS_ENABLED(CONFIG_xxx).

Since IS_ENABLED() is true when a config is =y or =m, this change fixes the
module so it can be loaded with no issues.

Acked-by: Roger Quadros <rogerq@ti.com>
Signed-off-by: Ezequiel Garcia <ezequiel@vanguardiasur.com.ar>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/nand/omap2.c          | 130 +++++++++++++++++++++-----------------
 include/linux/platform_data/elm.h |  16 +++++
 2 files changed, 87 insertions(+), 59 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c
index e1a9b310c159..f97a4ffd489d 100644
--- a/drivers/mtd/nand/omap2.c
+++ b/drivers/mtd/nand/omap2.c
@@ -136,7 +136,6 @@
 
 #define BADBLOCK_MARKER_LENGTH		2
 
-#ifdef CONFIG_MTD_NAND_OMAP_BCH
 static u_char bch16_vector[] = {0xf5, 0x24, 0x1c, 0xd0, 0x61, 0xb3, 0xf1, 0x55,
 				0x2e, 0x2c, 0x86, 0xa3, 0xed, 0x36, 0x1b, 0x78,
 				0x48, 0x76, 0xa9, 0x3b, 0x97, 0xd1, 0x7a, 0x93,
@@ -144,7 +143,6 @@ static u_char bch16_vector[] = {0xf5, 0x24, 0x1c, 0xd0, 0x61, 0xb3, 0xf1, 0x55,
 static u_char bch8_vector[] = {0xf3, 0xdb, 0x14, 0x16, 0x8b, 0xd2, 0xbe, 0xcc,
 	0xac, 0x6b, 0xff, 0x99, 0x7b};
 static u_char bch4_vector[] = {0x00, 0x6b, 0x31, 0xdd, 0x41, 0xbc, 0x10};
-#endif
 
 /* oob info generated runtime depending on ecc algorithm and layout selected */
 static struct nand_ecclayout omap_oobinfo;
@@ -1292,7 +1290,6 @@ static int __maybe_unused omap_calculate_ecc_bch(struct mtd_info *mtd,
 	return 0;
 }
 
-#ifdef CONFIG_MTD_NAND_OMAP_BCH
 /**
  * erased_sector_bitflips - count bit flips
  * @data:	data sector buffer
@@ -1593,33 +1590,71 @@ static int omap_read_page_bch(struct mtd_info *mtd, struct nand_chip *chip,
 /**
  * is_elm_present - checks for presence of ELM module by scanning DT nodes
  * @omap_nand_info: NAND device structure containing platform data
- * @bch_type: 0x0=BCH4, 0x1=BCH8, 0x2=BCH16
  */
-static int is_elm_present(struct omap_nand_info *info,
-			struct device_node *elm_node, enum bch_ecc bch_type)
+static bool is_elm_present(struct omap_nand_info *info,
+			   struct device_node *elm_node)
 {
 	struct platform_device *pdev;
-	struct nand_ecc_ctrl *ecc = &info->nand.ecc;
-	int err;
+
 	/* check whether elm-id is passed via DT */
 	if (!elm_node) {
 		pr_err("nand: error: ELM DT node not found\n");
-		return -ENODEV;
+		return false;
 	}
 	pdev = of_find_device_by_node(elm_node);
 	/* check whether ELM device is registered */
 	if (!pdev) {
 		pr_err("nand: error: ELM device not found\n");
-		return -ENODEV;
+		return false;
 	}
 	/* ELM module available, now configure it */
 	info->elm_dev = &pdev->dev;
-	err = elm_config(info->elm_dev, bch_type,
-		(info->mtd.writesize / ecc->size), ecc->size, ecc->bytes);
+	return true;
+}
 
-	return err;
+static bool omap2_nand_ecc_check(struct omap_nand_info *info,
+				 struct omap_nand_platform_data	*pdata)
+{
+	bool ecc_needs_bch, ecc_needs_omap_bch, ecc_needs_elm;
+
+	switch (info->ecc_opt) {
+	case OMAP_ECC_BCH4_CODE_HW_DETECTION_SW:
+	case OMAP_ECC_BCH8_CODE_HW_DETECTION_SW:
+		ecc_needs_omap_bch = false;
+		ecc_needs_bch = true;
+		ecc_needs_elm = false;
+		break;
+	case OMAP_ECC_BCH4_CODE_HW:
+	case OMAP_ECC_BCH8_CODE_HW:
+	case OMAP_ECC_BCH16_CODE_HW:
+		ecc_needs_omap_bch = true;
+		ecc_needs_bch = false;
+		ecc_needs_elm = true;
+		break;
+	default:
+		ecc_needs_omap_bch = false;
+		ecc_needs_bch = false;
+		ecc_needs_elm = false;
+		break;
+	}
+
+	if (ecc_needs_bch && !IS_ENABLED(CONFIG_MTD_NAND_ECC_BCH)) {
+		dev_err(&info->pdev->dev,
+			"CONFIG_MTD_NAND_ECC_BCH not enabled\n");
+		return false;
+	}
+	if (ecc_needs_omap_bch && !IS_ENABLED(CONFIG_MTD_NAND_OMAP_BCH)) {
+		dev_err(&info->pdev->dev,
+			"CONFIG_MTD_NAND_OMAP_BCH not enabled\n");
+		return false;
+	}
+	if (ecc_needs_elm && !is_elm_present(info, pdata->elm_of_node)) {
+		dev_err(&info->pdev->dev, "ELM not available\n");
+		return false;
+	}
+
+	return true;
 }
-#endif /* CONFIG_MTD_NAND_ECC_BCH */
 
 static int omap_nand_probe(struct platform_device *pdev)
 {
@@ -1797,6 +1832,11 @@ static int omap_nand_probe(struct platform_device *pdev)
 		goto return_error;
 	}
 
+	if (!omap2_nand_ecc_check(info, pdata)) {
+		err = -EINVAL;
+		goto return_error;
+	}
+
 	/* populate MTD interface based on ECC scheme */
 	ecclayout		= &omap_oobinfo;
 	switch (info->ecc_opt) {
@@ -1829,7 +1869,6 @@ static int omap_nand_probe(struct platform_device *pdev)
 		break;
 
 	case OMAP_ECC_BCH4_CODE_HW_DETECTION_SW:
-#ifdef CONFIG_MTD_NAND_ECC_BCH
 		pr_info("nand: using OMAP_ECC_BCH4_CODE_HW_DETECTION_SW\n");
 		nand_chip->ecc.mode		= NAND_ECC_HW;
 		nand_chip->ecc.size		= 512;
@@ -1861,14 +1900,8 @@ static int omap_nand_probe(struct platform_device *pdev)
 			err = -EINVAL;
 		}
 		break;
-#else
-		pr_err("nand: error: CONFIG_MTD_NAND_ECC_BCH not enabled\n");
-		err = -EINVAL;
-		goto return_error;
-#endif
 
 	case OMAP_ECC_BCH4_CODE_HW:
-#ifdef CONFIG_MTD_NAND_OMAP_BCH
 		pr_info("nand: using OMAP_ECC_BCH4_CODE_HW ECC scheme\n");
 		nand_chip->ecc.mode		= NAND_ECC_HW;
 		nand_chip->ecc.size		= 512;
@@ -1890,21 +1923,15 @@ static int omap_nand_probe(struct platform_device *pdev)
 		/* reserved marker already included in ecclayout->eccbytes */
 		ecclayout->oobfree->offset	=
 				ecclayout->eccpos[ecclayout->eccbytes - 1] + 1;
-		/* This ECC scheme requires ELM H/W block */
-		if (is_elm_present(info, pdata->elm_of_node, BCH4_ECC) < 0) {
-			pr_err("nand: error: could not initialize ELM\n");
-			err = -ENODEV;
+
+		err = elm_config(info->elm_dev, BCH4_ECC,
+				 info->mtd.writesize / nand_chip->ecc.size,
+				 nand_chip->ecc.size, nand_chip->ecc.bytes);
+		if (err < 0)
 			goto return_error;
-		}
 		break;
-#else
-		pr_err("nand: error: CONFIG_MTD_NAND_OMAP_BCH not enabled\n");
-		err = -EINVAL;
-		goto return_error;
-#endif
 
 	case OMAP_ECC_BCH8_CODE_HW_DETECTION_SW:
-#ifdef CONFIG_MTD_NAND_ECC_BCH
 		pr_info("nand: using OMAP_ECC_BCH8_CODE_HW_DETECTION_SW\n");
 		nand_chip->ecc.mode		= NAND_ECC_HW;
 		nand_chip->ecc.size		= 512;
@@ -1937,14 +1964,8 @@ static int omap_nand_probe(struct platform_device *pdev)
 			goto return_error;
 		}
 		break;
-#else
-		pr_err("nand: error: CONFIG_MTD_NAND_ECC_BCH not enabled\n");
-		err = -EINVAL;
-		goto return_error;
-#endif
 
 	case OMAP_ECC_BCH8_CODE_HW:
-#ifdef CONFIG_MTD_NAND_OMAP_BCH
 		pr_info("nand: using OMAP_ECC_BCH8_CODE_HW ECC scheme\n");
 		nand_chip->ecc.mode		= NAND_ECC_HW;
 		nand_chip->ecc.size		= 512;
@@ -1956,12 +1977,13 @@ static int omap_nand_probe(struct platform_device *pdev)
 		nand_chip->ecc.calculate	= omap_calculate_ecc_bch;
 		nand_chip->ecc.read_page	= omap_read_page_bch;
 		nand_chip->ecc.write_page	= omap_write_page_bch;
-		/* This ECC scheme requires ELM H/W block */
-		err = is_elm_present(info, pdata->elm_of_node, BCH8_ECC);
-		if (err < 0) {
-			pr_err("nand: error: could not initialize ELM\n");
+
+		err = elm_config(info->elm_dev, BCH8_ECC,
+				 info->mtd.writesize / nand_chip->ecc.size,
+				 nand_chip->ecc.size, nand_chip->ecc.bytes);
+		if (err < 0)
 			goto return_error;
-		}
+
 		/* define ECC layout */
 		ecclayout->eccbytes		= nand_chip->ecc.bytes *
 							(mtd->writesize /
@@ -1973,14 +1995,8 @@ static int omap_nand_probe(struct platform_device *pdev)
 		ecclayout->oobfree->offset	=
 				ecclayout->eccpos[ecclayout->eccbytes - 1] + 1;
 		break;
-#else
-		pr_err("nand: error: CONFIG_MTD_NAND_OMAP_BCH not enabled\n");
-		err = -EINVAL;
-		goto return_error;
-#endif
 
 	case OMAP_ECC_BCH16_CODE_HW:
-#ifdef CONFIG_MTD_NAND_OMAP_BCH
 		pr_info("using OMAP_ECC_BCH16_CODE_HW ECC scheme\n");
 		nand_chip->ecc.mode		= NAND_ECC_HW;
 		nand_chip->ecc.size		= 512;
@@ -1991,12 +2007,13 @@ static int omap_nand_probe(struct platform_device *pdev)
 		nand_chip->ecc.calculate	= omap_calculate_ecc_bch;
 		nand_chip->ecc.read_page	= omap_read_page_bch;
 		nand_chip->ecc.write_page	= omap_write_page_bch;
-		/* This ECC scheme requires ELM H/W block */
-		err = is_elm_present(info, pdata->elm_of_node, BCH16_ECC);
-		if (err < 0) {
-			pr_err("ELM is required for this ECC scheme\n");
+
+		err = elm_config(info->elm_dev, BCH16_ECC,
+				 info->mtd.writesize / nand_chip->ecc.size,
+				 nand_chip->ecc.size, nand_chip->ecc.bytes);
+		if (err < 0)
 			goto return_error;
-		}
+
 		/* define ECC layout */
 		ecclayout->eccbytes		= nand_chip->ecc.bytes *
 							(mtd->writesize /
@@ -2008,11 +2025,6 @@ static int omap_nand_probe(struct platform_device *pdev)
 		ecclayout->oobfree->offset	=
 				ecclayout->eccpos[ecclayout->eccbytes - 1] + 1;
 		break;
-#else
-		pr_err("nand: error: CONFIG_MTD_NAND_OMAP_BCH not enabled\n");
-		err = -EINVAL;
-		goto return_error;
-#endif
 	default:
 		pr_err("nand: error: invalid or unsupported ECC scheme\n");
 		err = -EINVAL;
diff --git a/include/linux/platform_data/elm.h b/include/linux/platform_data/elm.h
index 780d1e97f620..b8686c00f15f 100644
--- a/include/linux/platform_data/elm.h
+++ b/include/linux/platform_data/elm.h
@@ -42,8 +42,24 @@ struct elm_errorvec {
 	int error_loc[16];
 };
 
+#if IS_ENABLED(CONFIG_MTD_NAND_OMAP_BCH)
 void elm_decode_bch_error_page(struct device *dev, u8 *ecc_calc,
 		struct elm_errorvec *err_vec);
 int elm_config(struct device *dev, enum bch_ecc bch_type,
 	int ecc_steps, int ecc_step_size, int ecc_syndrome_size);
+#else
+static inline void
+elm_decode_bch_error_page(struct device *dev, u8 *ecc_calc,
+			  struct elm_errorvec *err_vec)
+{
+}
+
+static inline int elm_config(struct device *dev, enum bch_ecc bch_type,
+			     int ecc_steps, int ecc_step_size,
+			     int ecc_syndrome_size)
+{
+	return -ENOSYS;
+}
+#endif /* CONFIG_MTD_NAND_ECC_BCH */
+
 #endif /* __ELM_H */
-- 
cgit v1.2.3


From bee3f304435a9c8c70b135083e23516872a17c98 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Mon, 22 Sep 2014 14:46:13 -0600
Subject: PCI: Remove unused pci_find_upstream_pcie_bridge()

pci_find_upstream_pcie_bridge() is unused, so remove it.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/search.c | 34 ----------------------------------
 include/linux/pci.h  | 11 -----------
 2 files changed, 45 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/search.c b/drivers/pci/search.c
index 827ad831f1dd..a81f413083e4 100644
--- a/drivers/pci/search.c
+++ b/drivers/pci/search.c
@@ -103,40 +103,6 @@ int pci_for_each_dma_alias(struct pci_dev *pdev,
 	return ret;
 }
 
-/*
- * find the upstream PCIe-to-PCI bridge of a PCI device
- * if the device is PCIE, return NULL
- * if the device isn't connected to a PCIe bridge (that is its parent is a
- * legacy PCI bridge and the bridge is directly connected to bus 0), return its
- * parent
- */
-struct pci_dev *pci_find_upstream_pcie_bridge(struct pci_dev *pdev)
-{
-	struct pci_dev *tmp = NULL;
-
-	if (pci_is_pcie(pdev))
-		return NULL;
-	while (1) {
-		if (pci_is_root_bus(pdev->bus))
-			break;
-		pdev = pdev->bus->self;
-		/* a p2p bridge */
-		if (!pci_is_pcie(pdev)) {
-			tmp = pdev;
-			continue;
-		}
-		/* PCI device should connect to a PCIe bridge */
-		if (pci_pcie_type(pdev) != PCI_EXP_TYPE_PCI_BRIDGE) {
-			/* Busted hardware? */
-			WARN_ON_ONCE(1);
-			return NULL;
-		}
-		return pdev;
-	}
-
-	return tmp;
-}
-
 static struct pci_bus *pci_do_find_bus(struct pci_bus *bus, unsigned char busnr)
 {
 	struct pci_bus *child;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 92c131efec1c..bf5a47c0cb42 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1828,17 +1828,6 @@ int pci_for_each_dma_alias(struct pci_dev *pdev,
 			   int (*fn)(struct pci_dev *pdev,
 				     u16 alias, void *data), void *data);
 
-/**
- * pci_find_upstream_pcie_bridge - find upstream PCIe-to-PCI bridge of a device
- * @pdev: the PCI device
- *
- * if the device is PCIE, return NULL
- * if the device isn't connected to a PCIe bridge (that is its parent is a
- * legacy PCI bridge and the bridge is directly connected to bus 0), return its
- * parent
- */
-struct pci_dev *pci_find_upstream_pcie_bridge(struct pci_dev *pdev);
-
 /* helper functions for operation of device flag */
 static inline void pci_set_dev_assigned(struct pci_dev *pdev)
 {
-- 
cgit v1.2.3


From 5d8f4c9fdd67404c9f94683836e49ec8bded2287 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Wed, 17 Sep 2014 10:41:13 -0600
Subject: PCI: Remove unused pci_get_dma_source()

pci_get_dma_source() is unused, so remove it.  We now have
dma_alias_devfn() to describe this.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/quirks.c | 51 ---------------------------------------------------
 include/linux/pci.h  |  5 -----
 2 files changed, 56 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index da062d8ae36f..01c935cc38dc 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -3514,57 +3514,6 @@ DECLARE_PCI_FIXUP_HEADER(0x1283, 0x8892, quirk_use_pcie_bridge_dma_alias);
 /* Intel 82801, https://bugzilla.kernel.org/show_bug.cgi?id=44881#c49 */
 DECLARE_PCI_FIXUP_HEADER(0x8086, 0x244e, quirk_use_pcie_bridge_dma_alias);
 
-static struct pci_dev *pci_func_0_dma_source(struct pci_dev *dev)
-{
-	if (!PCI_FUNC(dev->devfn))
-		return pci_dev_get(dev);
-
-	return pci_get_slot(dev->bus, PCI_DEVFN(PCI_SLOT(dev->devfn), 0));
-}
-
-static const struct pci_dev_dma_source {
-	u16 vendor;
-	u16 device;
-	struct pci_dev *(*dma_source)(struct pci_dev *dev);
-} pci_dev_dma_source[] = {
-	/*
-	 * https://bugzilla.redhat.com/show_bug.cgi?id=605888
-	 *
-	 * Some Ricoh devices use the function 0 source ID for DMA on
-	 * other functions of a multifunction device.  The DMA devices
-	 * is therefore function 0, which will have implications of the
-	 * iommu grouping of these devices.
-	 */
-	{ PCI_VENDOR_ID_RICOH, 0xe822, pci_func_0_dma_source },
-	{ PCI_VENDOR_ID_RICOH, 0xe230, pci_func_0_dma_source },
-	{ PCI_VENDOR_ID_RICOH, 0xe832, pci_func_0_dma_source },
-	{ PCI_VENDOR_ID_RICOH, 0xe476, pci_func_0_dma_source },
-	{ 0 }
-};
-
-/*
- * IOMMUs with isolation capabilities need to be programmed with the
- * correct source ID of a device.  In most cases, the source ID matches
- * the device doing the DMA, but sometimes hardware is broken and will
- * tag the DMA as being sourced from a different device.  This function
- * allows that translation.  Note that the reference count of the
- * returned device is incremented on all paths.
- */
-struct pci_dev *pci_get_dma_source(struct pci_dev *dev)
-{
-	const struct pci_dev_dma_source *i;
-
-	for (i = pci_dev_dma_source; i->dma_source; i++) {
-		if ((i->vendor == dev->vendor ||
-		     i->vendor == (u16)PCI_ANY_ID) &&
-		    (i->device == dev->device ||
-		     i->device == (u16)PCI_ANY_ID))
-			return i->dma_source(dev);
-	}
-
-	return pci_dev_get(dev);
-}
-
 /*
  * AMD has indicated that the devices below do not support peer-to-peer
  * in any system where they are found in the southbridge with an AMD
diff --git a/include/linux/pci.h b/include/linux/pci.h
index bf5a47c0cb42..d68d8e33c196 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1557,16 +1557,11 @@ enum pci_fixup_pass {
 
 #ifdef CONFIG_PCI_QUIRKS
 void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev);
-struct pci_dev *pci_get_dma_source(struct pci_dev *dev);
 int pci_dev_specific_acs_enabled(struct pci_dev *dev, u16 acs_flags);
 void pci_dev_specific_enable_acs(struct pci_dev *dev);
 #else
 static inline void pci_fixup_device(enum pci_fixup_pass pass,
 				    struct pci_dev *dev) { }
-static inline struct pci_dev *pci_get_dma_source(struct pci_dev *dev)
-{
-	return pci_dev_get(dev);
-}
 static inline int pci_dev_specific_acs_enabled(struct pci_dev *dev,
 					       u16 acs_flags)
 {
-- 
cgit v1.2.3


From 48d11e067fc90ec9fc9c8f7ab982e5a83bf1887b Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Mon, 22 Sep 2014 12:26:10 -0700
Subject: mmc: Consolidate emmc tuning blocks

The same tuning block exists in the dw_mmc h.c and sdhci-msm.c
files. Move these into mmc.c so that they can be shared across
drivers.

Reported-by: Jaehoon Chung <jh80.chung@samsung.com>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/mmc.c       | 32 ++++++++++++++++++++++++++++++++
 drivers/mmc/host/dw_mmc.c    | 30 ------------------------------
 drivers/mmc/host/sdhci-msm.c | 38 ++++----------------------------------
 include/linux/mmc/mmc.h      |  5 +++++
 4 files changed, 41 insertions(+), 64 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index ce11d89aa305..6390787fb32a 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -1177,6 +1177,38 @@ bus_speed:
 	return err;
 }
 
+const u8 tuning_blk_pattern_4bit[MMC_TUNING_BLK_PATTERN_4BIT_SIZE] = {
+	0xff, 0x0f, 0xff, 0x00, 0xff, 0xcc, 0xc3, 0xcc,
+	0xc3, 0x3c, 0xcc, 0xff, 0xfe, 0xff, 0xfe, 0xef,
+	0xff, 0xdf, 0xff, 0xdd, 0xff, 0xfb, 0xff, 0xfb,
+	0xbf, 0xff, 0x7f, 0xff, 0x77, 0xf7, 0xbd, 0xef,
+	0xff, 0xf0, 0xff, 0xf0, 0x0f, 0xfc, 0xcc, 0x3c,
+	0xcc, 0x33, 0xcc, 0xcf, 0xff, 0xef, 0xff, 0xee,
+	0xff, 0xfd, 0xff, 0xfd, 0xdf, 0xff, 0xbf, 0xff,
+	0xbb, 0xff, 0xf7, 0xff, 0xf7, 0x7f, 0x7b, 0xde,
+};
+EXPORT_SYMBOL(tuning_blk_pattern_4bit);
+
+const u8 tuning_blk_pattern_8bit[MMC_TUNING_BLK_PATTERN_8BIT_SIZE] = {
+	0xff, 0xff, 0x00, 0xff, 0xff, 0xff, 0x00, 0x00,
+	0xff, 0xff, 0xcc, 0xcc, 0xcc, 0x33, 0xcc, 0xcc,
+	0xcc, 0x33, 0x33, 0xcc, 0xcc, 0xcc, 0xff, 0xff,
+	0xff, 0xee, 0xff, 0xff, 0xff, 0xee, 0xee, 0xff,
+	0xff, 0xff, 0xdd, 0xff, 0xff, 0xff, 0xdd, 0xdd,
+	0xff, 0xff, 0xff, 0xbb, 0xff, 0xff, 0xff, 0xbb,
+	0xbb, 0xff, 0xff, 0xff, 0x77, 0xff, 0xff, 0xff,
+	0x77, 0x77, 0xff, 0x77, 0xbb, 0xdd, 0xee, 0xff,
+	0xff, 0xff, 0xff, 0x00, 0xff, 0xff, 0xff, 0x00,
+	0x00, 0xff, 0xff, 0xcc, 0xcc, 0xcc, 0x33, 0xcc,
+	0xcc, 0xcc, 0x33, 0x33, 0xcc, 0xcc, 0xcc, 0xff,
+	0xff, 0xff, 0xee, 0xff, 0xff, 0xff, 0xee, 0xee,
+	0xff, 0xff, 0xff, 0xdd, 0xff, 0xff, 0xff, 0xdd,
+	0xdd, 0xff, 0xff, 0xff, 0xbb, 0xff, 0xff, 0xff,
+	0xbb, 0xbb, 0xff, 0xff, 0xff, 0x77, 0xff, 0xff,
+	0xff, 0x77, 0x77, 0xff, 0x77, 0xbb, 0xdd, 0xee,
+};
+EXPORT_SYMBOL(tuning_blk_pattern_8bit);
+
 /*
  * Execute tuning sequence to seek the proper bus operating
  * conditions for HS200 and HS400, which sends CMD21 to the device.
diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index 109fefeb9597..69f0cc68d5b2 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -82,36 +82,6 @@ struct idmac_desc {
 };
 #endif /* CONFIG_MMC_DW_IDMAC */
 
-static const u8 tuning_blk_pattern_4bit[] = {
-	0xff, 0x0f, 0xff, 0x00, 0xff, 0xcc, 0xc3, 0xcc,
-	0xc3, 0x3c, 0xcc, 0xff, 0xfe, 0xff, 0xfe, 0xef,
-	0xff, 0xdf, 0xff, 0xdd, 0xff, 0xfb, 0xff, 0xfb,
-	0xbf, 0xff, 0x7f, 0xff, 0x77, 0xf7, 0xbd, 0xef,
-	0xff, 0xf0, 0xff, 0xf0, 0x0f, 0xfc, 0xcc, 0x3c,
-	0xcc, 0x33, 0xcc, 0xcf, 0xff, 0xef, 0xff, 0xee,
-	0xff, 0xfd, 0xff, 0xfd, 0xdf, 0xff, 0xbf, 0xff,
-	0xbb, 0xff, 0xf7, 0xff, 0xf7, 0x7f, 0x7b, 0xde,
-};
-
-static const u8 tuning_blk_pattern_8bit[] = {
-	0xff, 0xff, 0x00, 0xff, 0xff, 0xff, 0x00, 0x00,
-	0xff, 0xff, 0xcc, 0xcc, 0xcc, 0x33, 0xcc, 0xcc,
-	0xcc, 0x33, 0x33, 0xcc, 0xcc, 0xcc, 0xff, 0xff,
-	0xff, 0xee, 0xff, 0xff, 0xff, 0xee, 0xee, 0xff,
-	0xff, 0xff, 0xdd, 0xff, 0xff, 0xff, 0xdd, 0xdd,
-	0xff, 0xff, 0xff, 0xbb, 0xff, 0xff, 0xff, 0xbb,
-	0xbb, 0xff, 0xff, 0xff, 0x77, 0xff, 0xff, 0xff,
-	0x77, 0x77, 0xff, 0x77, 0xbb, 0xdd, 0xee, 0xff,
-	0xff, 0xff, 0xff, 0x00, 0xff, 0xff, 0xff, 0x00,
-	0x00, 0xff, 0xff, 0xcc, 0xcc, 0xcc, 0x33, 0xcc,
-	0xcc, 0xcc, 0x33, 0x33, 0xcc, 0xcc, 0xcc, 0xff,
-	0xff, 0xff, 0xee, 0xff, 0xff, 0xff, 0xee, 0xee,
-	0xff, 0xff, 0xff, 0xdd, 0xff, 0xff, 0xff, 0xdd,
-	0xdd, 0xff, 0xff, 0xff, 0xbb, 0xff, 0xff, 0xff,
-	0xbb, 0xbb, 0xff, 0xff, 0xff, 0x77, 0xff, 0xff,
-	0xff, 0x77, 0x77, 0xff, 0x77, 0xbb, 0xdd, 0xee,
-};
-
 static bool dw_mci_reset(struct dw_mci *host);
 
 #if defined(CONFIG_DEBUG_FS)
diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c
index 19539c61353c..30804385af6d 100644
--- a/drivers/mmc/host/sdhci-msm.c
+++ b/drivers/mmc/host/sdhci-msm.c
@@ -46,36 +46,6 @@
 #define CMUX_SHIFT_PHASE_SHIFT	24
 #define CMUX_SHIFT_PHASE_MASK	(7 << CMUX_SHIFT_PHASE_SHIFT)
 
-static const u8 tuning_block_64[] = {
-	0xff, 0x0f, 0xff, 0x00, 0xff, 0xcc, 0xc3, 0xcc,
-	0xc3, 0x3c, 0xcc, 0xff, 0xfe, 0xff, 0xfe, 0xef,
-	0xff, 0xdf, 0xff, 0xdd, 0xff, 0xfb, 0xff, 0xfb,
-	0xbf, 0xff, 0x7f, 0xff, 0x77, 0xf7, 0xbd, 0xef,
-	0xff, 0xf0, 0xff, 0xf0, 0x0f, 0xfc, 0xcc, 0x3c,
-	0xcc, 0x33, 0xcc, 0xcf, 0xff, 0xef, 0xff, 0xee,
-	0xff, 0xfd, 0xff, 0xfd, 0xdf, 0xff, 0xbf, 0xff,
-	0xbb, 0xff, 0xf7, 0xff, 0xf7, 0x7f, 0x7b, 0xde,
-};
-
-static const u8 tuning_block_128[] = {
-	0xff, 0xff, 0x00, 0xff, 0xff, 0xff, 0x00, 0x00,
-	0xff, 0xff, 0xcc, 0xcc, 0xcc, 0x33, 0xcc, 0xcc,
-	0xcc, 0x33, 0x33, 0xcc, 0xcc, 0xcc, 0xff, 0xff,
-	0xff, 0xee, 0xff, 0xff, 0xff, 0xee, 0xee, 0xff,
-	0xff, 0xff, 0xdd, 0xff, 0xff, 0xff, 0xdd, 0xdd,
-	0xff, 0xff, 0xff, 0xbb, 0xff, 0xff, 0xff, 0xbb,
-	0xbb, 0xff, 0xff, 0xff, 0x77, 0xff, 0xff, 0xff,
-	0x77, 0x77, 0xff, 0x77, 0xbb, 0xdd, 0xee, 0xff,
-	0xff, 0xff, 0xff, 0x00, 0xff, 0xff, 0xff, 0x00,
-	0x00, 0xff, 0xff, 0xcc, 0xcc, 0xcc, 0x33, 0xcc,
-	0xcc, 0xcc, 0x33, 0x33, 0xcc, 0xcc, 0xcc, 0xff,
-	0xff, 0xff, 0xee, 0xff, 0xff, 0xff, 0xee, 0xee,
-	0xff, 0xff, 0xff, 0xdd, 0xff, 0xff, 0xff, 0xdd,
-	0xdd, 0xff, 0xff, 0xff, 0xbb, 0xff, 0xff, 0xff,
-	0xbb, 0xbb, 0xff, 0xff, 0xff, 0x77, 0xff, 0xff,
-	0xff, 0x77, 0x77, 0xff, 0x77, 0xbb, 0xdd, 0xee,
-};
-
 struct sdhci_msm_host {
 	struct platform_device *pdev;
 	void __iomem *core_mem;	/* MSM SDCC mapped address */
@@ -370,8 +340,8 @@ static int sdhci_msm_execute_tuning(struct sdhci_host *host, u32 opcode)
 {
 	int tuning_seq_cnt = 3;
 	u8 phase, *data_buf, tuned_phases[16], tuned_phase_cnt = 0;
-	const u8 *tuning_block_pattern = tuning_block_64;
-	int size = sizeof(tuning_block_64);	/* Pattern size in bytes */
+	const u8 *tuning_block_pattern = tuning_blk_pattern_4bit;
+	int size = sizeof(tuning_blk_pattern_4bit);
 	int rc;
 	struct mmc_host *mmc = host->mmc;
 	struct mmc_ios ios = host->mmc->ios;
@@ -387,8 +357,8 @@ static int sdhci_msm_execute_tuning(struct sdhci_host *host, u32 opcode)
 
 	if ((opcode == MMC_SEND_TUNING_BLOCK_HS200) &&
 	    (mmc->ios.bus_width == MMC_BUS_WIDTH_8)) {
-		tuning_block_pattern = tuning_block_128;
-		size = sizeof(tuning_block_128);
+		tuning_block_pattern = tuning_blk_pattern_8bit;
+		size = sizeof(tuning_blk_pattern_8bit);
 	}
 
 	data_buf = kmalloc(size, GFP_KERNEL);
diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index 78753bc90a87..1cd00b3a75b9 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -53,6 +53,11 @@
 #define MMC_SEND_TUNING_BLOCK    19   /* adtc                    R1  */
 #define MMC_SEND_TUNING_BLOCK_HS200	21	/* adtc R1  */
 
+#define MMC_TUNING_BLK_PATTERN_4BIT_SIZE	 64
+#define MMC_TUNING_BLK_PATTERN_8BIT_SIZE	128
+extern const u8 tuning_blk_pattern_4bit[MMC_TUNING_BLK_PATTERN_4BIT_SIZE];
+extern const u8 tuning_blk_pattern_8bit[MMC_TUNING_BLK_PATTERN_8BIT_SIZE];
+
   /* class 3 */
 #define MMC_WRITE_DAT_UNTIL_STOP 20   /* adtc [31:0] data addr   R1  */
 
-- 
cgit v1.2.3


From d7bead1b8890c3e47a24db270fdb840ea728d8f0 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 22 Sep 2014 09:52:18 -0700
Subject: libata: change ata_<foo>_printk routines to return void

The return value is not used by callers of these functions nor
by uses of all macros so change the functions to return void.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 drivers/ata/libata-core.c | 35 +++++++++++++----------------------
 include/linux/libata.h    | 12 ++++++------
 2 files changed, 19 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 782d126cf191..f785ae50a0ea 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -6772,32 +6772,28 @@ const struct ata_port_info ata_dummy_port_info = {
 /*
  * Utility print functions
  */
-int ata_port_printk(const struct ata_port *ap, const char *level,
-		    const char *fmt, ...)
+void ata_port_printk(const struct ata_port *ap, const char *level,
+		     const char *fmt, ...)
 {
 	struct va_format vaf;
 	va_list args;
-	int r;
 
 	va_start(args, fmt);
 
 	vaf.fmt = fmt;
 	vaf.va = &args;
 
-	r = printk("%sata%u: %pV", level, ap->print_id, &vaf);
+	printk("%sata%u: %pV", level, ap->print_id, &vaf);
 
 	va_end(args);
-
-	return r;
 }
 EXPORT_SYMBOL(ata_port_printk);
 
-int ata_link_printk(const struct ata_link *link, const char *level,
-		    const char *fmt, ...)
+void ata_link_printk(const struct ata_link *link, const char *level,
+		     const char *fmt, ...)
 {
 	struct va_format vaf;
 	va_list args;
-	int r;
 
 	va_start(args, fmt);
 
@@ -6805,37 +6801,32 @@ int ata_link_printk(const struct ata_link *link, const char *level,
 	vaf.va = &args;
 
 	if (sata_pmp_attached(link->ap) || link->ap->slave_link)
-		r = printk("%sata%u.%02u: %pV",
-			   level, link->ap->print_id, link->pmp, &vaf);
+		printk("%sata%u.%02u: %pV",
+		       level, link->ap->print_id, link->pmp, &vaf);
 	else
-		r = printk("%sata%u: %pV",
-			   level, link->ap->print_id, &vaf);
+		printk("%sata%u: %pV",
+		       level, link->ap->print_id, &vaf);
 
 	va_end(args);
-
-	return r;
 }
 EXPORT_SYMBOL(ata_link_printk);
 
-int ata_dev_printk(const struct ata_device *dev, const char *level,
+void ata_dev_printk(const struct ata_device *dev, const char *level,
 		    const char *fmt, ...)
 {
 	struct va_format vaf;
 	va_list args;
-	int r;
 
 	va_start(args, fmt);
 
 	vaf.fmt = fmt;
 	vaf.va = &args;
 
-	r = printk("%sata%u.%02u: %pV",
-		   level, dev->link->ap->print_id, dev->link->pmp + dev->devno,
-		   &vaf);
+	printk("%sata%u.%02u: %pV",
+	       level, dev->link->ap->print_id, dev->link->pmp + dev->devno,
+	       &vaf);
 
 	va_end(args);
-
-	return r;
 }
 EXPORT_SYMBOL(ata_dev_printk);
 
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 92abb497ab14..bd5fefeaf548 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1404,14 +1404,14 @@ static inline int sata_srst_pmp(struct ata_link *link)
  * printk helpers
  */
 __printf(3, 4)
-int ata_port_printk(const struct ata_port *ap, const char *level,
-		    const char *fmt, ...);
+void ata_port_printk(const struct ata_port *ap, const char *level,
+		     const char *fmt, ...);
 __printf(3, 4)
-int ata_link_printk(const struct ata_link *link, const char *level,
-		    const char *fmt, ...);
+void ata_link_printk(const struct ata_link *link, const char *level,
+		     const char *fmt, ...);
 __printf(3, 4)
-int ata_dev_printk(const struct ata_device *dev, const char *level,
-		   const char *fmt, ...);
+void ata_dev_printk(const struct ata_device *dev, const char *level,
+		    const char *fmt, ...);
 
 #define ata_port_err(ap, fmt, ...)				\
 	ata_port_printk(ap, KERN_ERR, fmt, ##__VA_ARGS__)
-- 
cgit v1.2.3


From d9ff958bb34aabdce08d11b0db24123c093d87cd Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Date: Wed, 20 Aug 2014 19:20:53 +0200
Subject: dmaengine: Mark the struct dma_slave_config direction field
 deprecated

The direction passed to the device_prep_slave_sg, device_prep_dma_cyclic
or device_prep_interleaved_dma (through struct dma_interleaved_template)
should be used instead.

Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 include/linux/dmaengine.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 1f9e642c66ad..3d291f59acd8 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -307,7 +307,9 @@ enum dma_slave_buswidth {
  * struct dma_slave_config - dma slave channel runtime config
  * @direction: whether the data shall go in or out on this slave
  * channel, right now. DMA_MEM_TO_DEV and DMA_DEV_TO_MEM are
- * legal values.
+ * legal values. DEPRECATED, drivers should use the direction argument
+ * to the device_prep_slave_sg and device_prep_dma_cyclic functions or
+ * the dir field in the dma_interleaved_template structure.
  * @src_addr: this is the physical address where DMA slave data
  * should be read (RX), if the source is memory this argument is
  * ignored.
-- 
cgit v1.2.3


From e1db1706c86ee455f25eeaeadeda827e1e02310f Mon Sep 17 00:00:00 2001
From: abdoulaye berthe <berthe.ab@gmail.com>
Date: Sat, 5 Jul 2014 18:28:50 +0200
Subject: gpio: gpiolib: set gpiochip_remove retval to void

This avoids handling gpiochip remove error in device
remove handler.

Signed-off-by: Abdoulaye Berthe <berthe.ab@gmail.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpiolib.c      | 24 +++++++-----------------
 include/linux/gpio/driver.h |  2 +-
 2 files changed, 8 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index a5831d6a9b91..bf1bb795f100 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -308,10 +308,9 @@ static void gpiochip_irqchip_remove(struct gpio_chip *gpiochip);
  *
  * A gpio_chip with any GPIOs still requested may not be removed.
  */
-int gpiochip_remove(struct gpio_chip *chip)
+void gpiochip_remove(struct gpio_chip *chip)
 {
 	unsigned long	flags;
-	int		status = 0;
 	unsigned	id;
 
 	acpi_gpiochip_remove(chip);
@@ -323,24 +322,15 @@ int gpiochip_remove(struct gpio_chip *chip)
 	of_gpiochip_remove(chip);
 
 	for (id = 0; id < chip->ngpio; id++) {
-		if (test_bit(FLAG_REQUESTED, &chip->desc[id].flags)) {
-			status = -EBUSY;
-			break;
-		}
-	}
-	if (status == 0) {
-		for (id = 0; id < chip->ngpio; id++)
-			chip->desc[id].chip = NULL;
-
-		list_del(&chip->list);
+		if (test_bit(FLAG_REQUESTED, &chip->desc[id].flags))
+			dev_crit(chip->dev, "REMOVING GPIOCHIP WITH GPIOS STILL REQUESTED\n");
 	}
+	for (id = 0; id < chip->ngpio; id++)
+		chip->desc[id].chip = NULL;
 
+	list_del(&chip->list);
 	spin_unlock_irqrestore(&gpio_lock, flags);
-
-	if (status == 0)
-		gpiochip_unexport(chip);
-
-	return status;
+	gpiochip_unexport(chip);
 }
 EXPORT_SYMBOL_GPL(gpiochip_remove);
 
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index a2de58fffd19..560bf7fa614f 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -141,7 +141,7 @@ extern const char *gpiochip_is_requested(struct gpio_chip *chip,
 
 /* add/remove chips */
 extern int gpiochip_add(struct gpio_chip *chip);
-extern int gpiochip_remove(struct gpio_chip *chip);
+extern void gpiochip_remove(struct gpio_chip *chip);
 extern struct gpio_chip *gpiochip_find(void *data,
 			      int (*match)(struct gpio_chip *chip, void *data));
 
-- 
cgit v1.2.3


From 3d2613c4289ff22de3aa24d2d0a29e33937f023a Mon Sep 17 00:00:00 2001
From: Weike Chen <alvin.chen@intel.com>
Date: Wed, 17 Sep 2014 09:18:39 -0700
Subject: GPIO: gpio-dwapb: Enable platform driver binding to MFD driver

The Synopsys DesignWare APB GPIO driver only supports open firmware devices.
But, like Intel Quark X1000 SOC, which has a single PCI function exporting
a GPIO and an I2C controller, it is a Multifunction device. This patch is
to enable the current Synopsys DesignWare APB GPIO driver to support the
Multifunction device which exports the designware GPIO controller.

Reviewed-by: Hock Leong Kweh <hock.leong.kweh@intel.com>
Signed-off-by: Weike Chen <alvin.chen@intel.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/Kconfig                     |   1 -
 drivers/gpio/gpio-dwapb.c                | 224 +++++++++++++++++++++++--------
 include/linux/platform_data/gpio-dwapb.h |  32 +++++
 3 files changed, 199 insertions(+), 58 deletions(-)
 create mode 100644 include/linux/platform_data/gpio-dwapb.h

(limited to 'include/linux')

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index ec398bee7c60..d9ffb6dfa54b 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -136,7 +136,6 @@ config GPIO_DWAPB
 	tristate "Synopsys DesignWare APB GPIO driver"
 	select GPIO_GENERIC
 	select GENERIC_IRQ_CHIP
-	depends on OF_GPIO
 	help
 	  Say Y or M here to build support for the Synopsys DesignWare APB
 	  GPIO block.
diff --git a/drivers/gpio/gpio-dwapb.c b/drivers/gpio/gpio-dwapb.c
index d6618a6e2399..27466b5af5e8 100644
--- a/drivers/gpio/gpio-dwapb.c
+++ b/drivers/gpio/gpio-dwapb.c
@@ -21,6 +21,8 @@
 #include <linux/of_irq.h>
 #include <linux/platform_device.h>
 #include <linux/spinlock.h>
+#include <linux/platform_data/gpio-dwapb.h>
+#include <linux/slab.h>
 
 #define GPIO_SWPORTA_DR		0x00
 #define GPIO_SWPORTA_DDR	0x04
@@ -84,11 +86,10 @@ static void dwapb_toggle_trigger(struct dwapb_gpio *gpio, unsigned int offs)
 	writel(v, gpio->regs + GPIO_INT_POLARITY);
 }
 
-static void dwapb_irq_handler(u32 irq, struct irq_desc *desc)
+static u32 dwapb_do_irq(struct dwapb_gpio *gpio)
 {
-	struct dwapb_gpio *gpio = irq_get_handler_data(irq);
-	struct irq_chip *chip = irq_desc_get_chip(desc);
 	u32 irq_status = readl_relaxed(gpio->regs + GPIO_INTSTATUS);
+	u32 ret = irq_status;
 
 	while (irq_status) {
 		int hwirq = fls(irq_status) - 1;
@@ -102,6 +103,16 @@ static void dwapb_irq_handler(u32 irq, struct irq_desc *desc)
 			dwapb_toggle_trigger(gpio, hwirq);
 	}
 
+	return ret;
+}
+
+static void dwapb_irq_handler(u32 irq, struct irq_desc *desc)
+{
+	struct dwapb_gpio *gpio = irq_get_handler_data(irq);
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+
+	dwapb_do_irq(gpio);
+
 	if (chip->irq_eoi)
 		chip->irq_eoi(irq_desc_get_irq_data(desc));
 }
@@ -207,22 +218,26 @@ static int dwapb_irq_set_type(struct irq_data *d, u32 type)
 	return 0;
 }
 
+static irqreturn_t dwapb_irq_handler_mfd(int irq, void *dev_id)
+{
+	u32 worked;
+	struct dwapb_gpio *gpio = dev_id;
+
+	worked = dwapb_do_irq(gpio);
+
+	return worked ? IRQ_HANDLED : IRQ_NONE;
+}
+
 static void dwapb_configure_irqs(struct dwapb_gpio *gpio,
-				 struct dwapb_gpio_port *port)
+				 struct dwapb_gpio_port *port,
+				 struct dwapb_port_property *pp)
 {
 	struct gpio_chip *gc = &port->bgc.gc;
-	struct device_node *node =  gc->of_node;
-	struct irq_chip_generic	*irq_gc;
+	struct device_node *node = pp->node;
+	struct irq_chip_generic	*irq_gc = NULL;
 	unsigned int hwirq, ngpio = gc->ngpio;
 	struct irq_chip_type *ct;
-	int err, irq, i;
-
-	irq = irq_of_parse_and_map(node, 0);
-	if (!irq) {
-		dev_warn(gpio->dev, "no irq for bank %s\n",
-			port->bgc.gc.of_node->full_name);
-		return;
-	}
+	int err, i;
 
 	gpio->domain = irq_domain_add_linear(node, ngpio,
 					     &irq_generic_chip_ops, gpio);
@@ -269,8 +284,24 @@ static void dwapb_configure_irqs(struct dwapb_gpio *gpio,
 	irq_gc->chip_types[1].type = IRQ_TYPE_EDGE_BOTH;
 	irq_gc->chip_types[1].handler = handle_edge_irq;
 
-	irq_set_chained_handler(irq, dwapb_irq_handler);
-	irq_set_handler_data(irq, gpio);
+	if (!pp->irq_shared) {
+		irq_set_chained_handler(pp->irq, dwapb_irq_handler);
+		irq_set_handler_data(pp->irq, gpio);
+	} else {
+		/*
+		 * Request a shared IRQ since where MFD would have devices
+		 * using the same irq pin
+		 */
+		err = devm_request_irq(gpio->dev, pp->irq,
+				       dwapb_irq_handler_mfd,
+				       IRQF_SHARED, "gpio-dwapb-mfd", gpio);
+		if (err) {
+			dev_err(gpio->dev, "error requesting IRQ\n");
+			irq_domain_remove(gpio->domain);
+			gpio->domain = NULL;
+			return;
+		}
+	}
 
 	for (hwirq = 0 ; hwirq < ngpio ; hwirq++)
 		irq_create_mapping(gpio->domain, hwirq);
@@ -296,57 +327,42 @@ static void dwapb_irq_teardown(struct dwapb_gpio *gpio)
 }
 
 static int dwapb_gpio_add_port(struct dwapb_gpio *gpio,
-			       struct device_node *port_np,
+			       struct dwapb_port_property *pp,
 			       unsigned int offs)
 {
 	struct dwapb_gpio_port *port;
-	u32 port_idx, ngpio;
 	void __iomem *dat, *set, *dirout;
 	int err;
 
-	if (of_property_read_u32(port_np, "reg", &port_idx) ||
-		port_idx >= DWAPB_MAX_PORTS) {
-		dev_err(gpio->dev, "missing/invalid port index for %s\n",
-			port_np->full_name);
-		return -EINVAL;
-	}
-
 	port = &gpio->ports[offs];
 	port->gpio = gpio;
 
-	if (of_property_read_u32(port_np, "snps,nr-gpios", &ngpio)) {
-		dev_info(gpio->dev, "failed to get number of gpios for %s\n",
-			 port_np->full_name);
-		ngpio = 32;
-	}
-
-	dat = gpio->regs + GPIO_EXT_PORTA + (port_idx * GPIO_EXT_PORT_SIZE);
-	set = gpio->regs + GPIO_SWPORTA_DR + (port_idx * GPIO_SWPORT_DR_SIZE);
+	dat = gpio->regs + GPIO_EXT_PORTA + (pp->idx * GPIO_EXT_PORT_SIZE);
+	set = gpio->regs + GPIO_SWPORTA_DR + (pp->idx * GPIO_SWPORT_DR_SIZE);
 	dirout = gpio->regs + GPIO_SWPORTA_DDR +
-		(port_idx * GPIO_SWPORT_DDR_SIZE);
+		(pp->idx * GPIO_SWPORT_DDR_SIZE);
 
 	err = bgpio_init(&port->bgc, gpio->dev, 4, dat, set, NULL, dirout,
 			 NULL, false);
 	if (err) {
 		dev_err(gpio->dev, "failed to init gpio chip for %s\n",
-			port_np->full_name);
+			pp->name);
 		return err;
 	}
 
-	port->bgc.gc.ngpio = ngpio;
-	port->bgc.gc.of_node = port_np;
+#ifdef CONFIG_OF_GPIO
+	port->bgc.gc.of_node = pp->node;
+#endif
+	port->bgc.gc.ngpio = pp->ngpio;
+	port->bgc.gc.base = pp->gpio_base;
 
-	/*
-	 * Only port A can provide interrupts in all configurations of the IP.
-	 */
-	if (port_idx == 0 &&
-	    of_property_read_bool(port_np, "interrupt-controller"))
-		dwapb_configure_irqs(gpio, port);
+	if (pp->irq)
+		dwapb_configure_irqs(gpio, port, pp);
 
 	err = gpiochip_add(&port->bgc.gc);
 	if (err)
 		dev_err(gpio->dev, "failed to register gpiochip for %s\n",
-			port_np->full_name);
+			pp->name);
 	else
 		port->is_registered = true;
 
@@ -362,25 +378,116 @@ static void dwapb_gpio_unregister(struct dwapb_gpio *gpio)
 			gpiochip_remove(&gpio->ports[m].bgc.gc);
 }
 
+static struct dwapb_platform_data *
+dwapb_gpio_get_pdata_of(struct device *dev)
+{
+	struct device_node *node, *port_np;
+	struct dwapb_platform_data *pdata;
+	struct dwapb_port_property *pp;
+	int nports;
+	int i;
+
+	node = dev->of_node;
+	if (!IS_ENABLED(CONFIG_OF_GPIO) || !node)
+		return ERR_PTR(-ENODEV);
+
+	nports = of_get_child_count(node);
+	if (nports == 0)
+		return ERR_PTR(-ENODEV);
+
+	pdata = kzalloc(sizeof(*pdata), GFP_KERNEL);
+	if (!pdata)
+		return ERR_PTR(-ENOMEM);
+
+	pdata->properties = kcalloc(nports, sizeof(*pp), GFP_KERNEL);
+	if (!pdata->properties) {
+		kfree(pdata);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	pdata->nports = nports;
+
+	i = 0;
+	for_each_child_of_node(node, port_np) {
+		pp = &pdata->properties[i++];
+		pp->node = port_np;
+
+		if (of_property_read_u32(port_np, "reg", &pp->idx) ||
+		    pp->idx >= DWAPB_MAX_PORTS) {
+			dev_err(dev, "missing/invalid port index for %s\n",
+				port_np->full_name);
+			kfree(pdata->properties);
+			kfree(pdata);
+			return ERR_PTR(-EINVAL);
+		}
+
+		if (of_property_read_u32(port_np, "snps,nr-gpios",
+					 &pp->ngpio)) {
+			dev_info(dev, "failed to get number of gpios for %s\n",
+				 port_np->full_name);
+			pp->ngpio = 32;
+		}
+
+		/*
+		 * Only port A can provide interrupts in all configurations of
+		 * the IP.
+		 */
+		if (pp->idx == 0 &&
+		    of_property_read_bool(port_np, "interrupt-controller")) {
+			pp->irq = irq_of_parse_and_map(port_np, 0);
+			if (!pp->irq) {
+				dev_warn(dev, "no irq for bank %s\n",
+					 port_np->full_name);
+			}
+		}
+
+		pp->irq_shared	= false;
+		pp->gpio_base	= -1;
+		pp->name	= port_np->full_name;
+	}
+
+	return pdata;
+}
+
+static inline void dwapb_free_pdata_of(struct dwapb_platform_data *pdata)
+{
+	if (!IS_ENABLED(CONFIG_OF_GPIO) || !pdata)
+		return;
+
+	kfree(pdata->properties);
+	kfree(pdata);
+}
+
 static int dwapb_gpio_probe(struct platform_device *pdev)
 {
+	unsigned int i;
 	struct resource *res;
 	struct dwapb_gpio *gpio;
-	struct device_node *np;
 	int err;
-	unsigned int offs = 0;
+	struct device *dev = &pdev->dev;
+	struct dwapb_platform_data *pdata = dev_get_platdata(dev);
+	bool is_pdata_alloc = !pdata;
+
+	if (is_pdata_alloc) {
+		pdata = dwapb_gpio_get_pdata_of(dev);
+		if (IS_ERR(pdata))
+			return PTR_ERR(pdata);
+	}
 
-	gpio = devm_kzalloc(&pdev->dev, sizeof(*gpio), GFP_KERNEL);
-	if (!gpio)
-		return -ENOMEM;
-	gpio->dev = &pdev->dev;
+	if (!pdata->nports) {
+		err = -ENODEV;
+		goto out_err;
+	}
 
-	gpio->nr_ports = of_get_child_count(pdev->dev.of_node);
-	if (!gpio->nr_ports) {
-		err = -EINVAL;
+	gpio = devm_kzalloc(&pdev->dev, sizeof(*gpio), GFP_KERNEL);
+	if (!gpio) {
+		err = -ENOMEM;
 		goto out_err;
 	}
-	gpio->ports = devm_kzalloc(&pdev->dev, gpio->nr_ports *
+	gpio->dev = &pdev->dev;
+	gpio->nr_ports = pdata->nports;
+
+	gpio->ports = devm_kcalloc(&pdev->dev, gpio->nr_ports,
 				   sizeof(*gpio->ports), GFP_KERNEL);
 	if (!gpio->ports) {
 		err = -ENOMEM;
@@ -394,20 +501,23 @@ static int dwapb_gpio_probe(struct platform_device *pdev)
 		goto out_err;
 	}
 
-	for_each_child_of_node(pdev->dev.of_node, np) {
-		err = dwapb_gpio_add_port(gpio, np, offs++);
+	for (i = 0; i < gpio->nr_ports; i++) {
+		err = dwapb_gpio_add_port(gpio, &pdata->properties[i], i);
 		if (err)
 			goto out_unregister;
 	}
 	platform_set_drvdata(pdev, gpio);
 
-	return 0;
+	goto out_err;
 
 out_unregister:
 	dwapb_gpio_unregister(gpio);
 	dwapb_irq_teardown(gpio);
 
 out_err:
+	if (is_pdata_alloc)
+		dwapb_free_pdata_of(pdata);
+
 	return err;
 }
 
diff --git a/include/linux/platform_data/gpio-dwapb.h b/include/linux/platform_data/gpio-dwapb.h
new file mode 100644
index 000000000000..28702c849af1
--- /dev/null
+++ b/include/linux/platform_data/gpio-dwapb.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef GPIO_DW_APB_H
+#define GPIO_DW_APB_H
+
+struct dwapb_port_property {
+	struct device_node *node;
+	const char	*name;
+	unsigned int	idx;
+	unsigned int	ngpio;
+	unsigned int	gpio_base;
+	unsigned int	irq;
+	bool		irq_shared;
+};
+
+struct dwapb_platform_data {
+	struct dwapb_port_property *properties;
+	unsigned int nports;
+};
+
+#endif
-- 
cgit v1.2.3


From 55cf9cb63f0e5439f208d78ed944de9a8df65011 Mon Sep 17 00:00:00 2001
From: Chao Yu <chao2.yu@samsung.com>
Date: Mon, 15 Sep 2014 18:01:10 +0800
Subject: f2fs: support large sector size

Block size in f2fs is 4096 bytes, so theoretically, f2fs can support 4096 bytes
sector device at maximum. But now f2fs only support 512 bytes size sector, so
block device such as zRAM which uses page cache as its block storage space will
not be mounted successfully as mismatch between sector size of zRAM and sector
size of f2fs supported.

In this patch we support large sector size in f2fs, so block device with sector
size of 512/1024/2048/4096 bytes can be supported in f2fs.

Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/data.c          |  2 +-
 fs/f2fs/segment.c       |  4 ++--
 fs/f2fs/segment.h       | 10 +++++-----
 fs/f2fs/super.c         | 20 ++++++++++++++------
 include/linux/f2fs_fs.h |  5 +++--
 5 files changed, 25 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index aaf22a912044..13ab72084913 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -85,7 +85,7 @@ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
 	bio = bio_alloc(GFP_NOIO, npages);
 
 	bio->bi_bdev = sbi->sb->s_bdev;
-	bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr);
+	bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr);
 	bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io;
 	bio->bi_private = sbi;
 
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 4ea53aab786d..24b768ae39c4 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -370,8 +370,8 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
 static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
 				block_t blkstart, block_t blklen)
 {
-	sector_t start = SECTOR_FROM_BLOCK(sbi, blkstart);
-	sector_t len = SECTOR_FROM_BLOCK(sbi, blklen);
+	sector_t start = SECTOR_FROM_BLOCK(blkstart);
+	sector_t len = SECTOR_FROM_BLOCK(blklen);
 	trace_f2fs_issue_discard(sbi->sb, blkstart, blklen);
 	return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0);
 }
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 848ac519958f..032c0905a12b 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -89,10 +89,10 @@
 #define TOTAL_SECS(sbi)	(sbi->total_sections)
 #define TOTAL_BLKS(sbi)	(SM_I(sbi)->segment_count << sbi->log_blocks_per_seg)
 
-#define SECTOR_FROM_BLOCK(sbi, blk_addr)				\
-	(((sector_t)blk_addr) << (sbi)->log_sectors_per_block)
-#define SECTOR_TO_BLOCK(sbi, sectors)					\
-	(sectors >> (sbi)->log_sectors_per_block)
+#define SECTOR_FROM_BLOCK(blk_addr)					\
+	(((sector_t)blk_addr) << F2FS_LOG_SECTORS_PER_BLOCK)
+#define SECTOR_TO_BLOCK(sectors)					\
+	(sectors >> F2FS_LOG_SECTORS_PER_BLOCK)
 #define MAX_BIO_BLOCKS(sbi)						\
 	((int)min((int)max_hw_blocks(sbi), BIO_MAX_PAGES))
 
@@ -711,7 +711,7 @@ static inline unsigned int max_hw_blocks(struct f2fs_sb_info *sbi)
 {
 	struct block_device *bdev = sbi->sb->s_bdev;
 	struct request_queue *q = bdev_get_queue(bdev);
-	return SECTOR_TO_BLOCK(sbi, queue_max_sectors(q));
+	return SECTOR_TO_BLOCK(queue_max_sectors(q));
 }
 
 /*
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index ed4095e5e8a7..3dfa1b5eae2f 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -788,14 +788,22 @@ static int sanity_check_raw_super(struct super_block *sb,
 		return 1;
 	}
 
-	if (le32_to_cpu(raw_super->log_sectorsize) !=
-					F2FS_LOG_SECTOR_SIZE) {
-		f2fs_msg(sb, KERN_INFO, "Invalid log sectorsize");
+	/* Currently, support 512/1024/2048/4096 bytes sector size */
+	if (le32_to_cpu(raw_super->log_sectorsize) >
+				F2FS_MAX_LOG_SECTOR_SIZE ||
+		le32_to_cpu(raw_super->log_sectorsize) <
+				F2FS_MIN_LOG_SECTOR_SIZE) {
+		f2fs_msg(sb, KERN_INFO, "Invalid log sectorsize (%u)",
+			le32_to_cpu(raw_super->log_sectorsize));
 		return 1;
 	}
-	if (le32_to_cpu(raw_super->log_sectors_per_block) !=
-					F2FS_LOG_SECTORS_PER_BLOCK) {
-		f2fs_msg(sb, KERN_INFO, "Invalid log sectors per block");
+	if (le32_to_cpu(raw_super->log_sectors_per_block) +
+		le32_to_cpu(raw_super->log_sectorsize) !=
+			F2FS_MAX_LOG_SECTOR_SIZE) {
+		f2fs_msg(sb, KERN_INFO,
+			"Invalid log sectors per block(%u) log sectorsize(%u)",
+			le32_to_cpu(raw_super->log_sectors_per_block),
+			le32_to_cpu(raw_super->log_sectorsize));
 		return 1;
 	}
 	return 0;
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index 9ca1ff3d4662..860313a33a43 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -15,8 +15,9 @@
 #include <linux/types.h>
 
 #define F2FS_SUPER_OFFSET		1024	/* byte-size offset */
-#define F2FS_LOG_SECTOR_SIZE		9	/* 9 bits for 512 byte */
-#define F2FS_LOG_SECTORS_PER_BLOCK	3	/* 4KB: F2FS_BLKSIZE */
+#define F2FS_MIN_LOG_SECTOR_SIZE	9	/* 9 bits for 512 bytes */
+#define F2FS_MAX_LOG_SECTOR_SIZE	12	/* 12 bits for 4096 bytes */
+#define F2FS_LOG_SECTORS_PER_BLOCK	3	/* log number for sector/blk */
 #define F2FS_BLKSIZE			4096	/* support only 4KB block */
 #define F2FS_MAX_EXTENSION		64	/* # of extension entries */
 #define F2FS_BLK_ALIGN(x)	(((x) + F2FS_BLKSIZE - 1) / F2FS_BLKSIZE)
-- 
cgit v1.2.3


From 91397401bb5072f71e8ce8744ad0bdec3e875a91 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 11 Mar 2014 13:29:28 -0400
Subject: ARCH: AUDIT: audit_syscall_entry() should not require the arch

We have a function where the arch can be queried, syscall_get_arch().
So rather than have every single piece of arch specific code use and/or
duplicate syscall_get_arch(), just have the audit code use the
syscall_get_arch() code.

Based-on-patch-by: Richard Briggs <rgb@redhat.com>
Signed-off-by: Eric Paris <eparis@redhat.com>
Cc: linux-alpha@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-ia64@vger.kernel.org
Cc: microblaze-uclinux@itee.uq.edu.au
Cc: linux-mips@linux-mips.org
Cc: linux@lists.openrisc.net
Cc: linux-parisc@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Cc: linux-s390@vger.kernel.org
Cc: linux-sh@vger.kernel.org
Cc: sparclinux@vger.kernel.org
Cc: user-mode-linux-devel@lists.sourceforge.net
Cc: linux-xtensa@linux-xtensa.org
Cc: x86@kernel.org
---
 arch/alpha/kernel/ptrace.c      |  2 +-
 arch/arm/kernel/ptrace.c        |  4 ++--
 arch/ia64/kernel/ptrace.c       |  2 +-
 arch/microblaze/kernel/ptrace.c |  3 +--
 arch/mips/kernel/ptrace.c       |  4 +---
 arch/openrisc/kernel/ptrace.c   |  3 +--
 arch/parisc/kernel/ptrace.c     |  9 +++------
 arch/powerpc/kernel/ptrace.c    |  7 ++-----
 arch/s390/kernel/ptrace.c       |  4 +---
 arch/sh/kernel/ptrace_32.c      | 14 +-------------
 arch/sh/kernel/ptrace_64.c      | 17 +----------------
 arch/sparc/kernel/ptrace_64.c   |  9 ++-------
 arch/um/kernel/ptrace.c         |  3 +--
 arch/x86/kernel/ptrace.c        |  8 ++------
 arch/x86/um/asm/ptrace.h        |  4 ----
 arch/xtensa/kernel/ptrace.c     |  2 +-
 include/linux/audit.h           |  7 ++++---
 17 files changed, 25 insertions(+), 77 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/kernel/ptrace.c b/arch/alpha/kernel/ptrace.c
index 86d835157b54..d9ee81769899 100644
--- a/arch/alpha/kernel/ptrace.c
+++ b/arch/alpha/kernel/ptrace.c
@@ -321,7 +321,7 @@ asmlinkage unsigned long syscall_trace_enter(void)
 	if (test_thread_flag(TIF_SYSCALL_TRACE) &&
 	    tracehook_report_syscall_entry(current_pt_regs()))
 		ret = -1UL;
-	audit_syscall_entry(AUDIT_ARCH_ALPHA, regs->r0, regs->r16, regs->r17, regs->r18, regs->r19);
+	audit_syscall_entry(regs->r0, regs->r16, regs->r17, regs->r18, regs->r19);
 	return ret ?: current_pt_regs()->r0;
 }
 
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 0c27ed6f3f23..6af95986fbf7 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -944,8 +944,8 @@ asmlinkage int syscall_trace_enter(struct pt_regs *regs, int scno)
 	if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
 		trace_sys_enter(regs, scno);
 
-	audit_syscall_entry(AUDIT_ARCH_ARM, scno, regs->ARM_r0, regs->ARM_r1,
-			    regs->ARM_r2, regs->ARM_r3);
+	audit_syscall_entry(scno, regs->ARM_r0, regs->ARM_r1, regs->ARM_r2,
+			    regs->ARM_r3);
 
 	return scno;
 }
diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
index b7a5fffe0924..6f54d511cc50 100644
--- a/arch/ia64/kernel/ptrace.c
+++ b/arch/ia64/kernel/ptrace.c
@@ -1219,7 +1219,7 @@ syscall_trace_enter (long arg0, long arg1, long arg2, long arg3,
 		ia64_sync_krbs();
 
 
-	audit_syscall_entry(AUDIT_ARCH_IA64, regs.r15, arg0, arg1, arg2, arg3);
+	audit_syscall_entry(regs.r15, arg0, arg1, arg2, arg3);
 
 	return 0;
 }
diff --git a/arch/microblaze/kernel/ptrace.c b/arch/microblaze/kernel/ptrace.c
index 39cf50841f6d..bb10637ce688 100644
--- a/arch/microblaze/kernel/ptrace.c
+++ b/arch/microblaze/kernel/ptrace.c
@@ -147,8 +147,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
 		 */
 		ret = -1L;
 
-	audit_syscall_entry(EM_MICROBLAZE, regs->r12, regs->r5, regs->r6,
-			    regs->r7, regs->r8);
+	audit_syscall_entry(regs->r12, regs->r5, regs->r6, regs->r7, regs->r8);
 
 	return ret ?: regs->r12;
 }
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index f639ccd5060c..d8a76f97a053 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -649,9 +649,7 @@ asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall)
 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
 		trace_sys_enter(regs, regs->regs[2]);
 
-	audit_syscall_entry(syscall_get_arch(),
-			    syscall,
-			    regs->regs[4], regs->regs[5],
+	audit_syscall_entry(syscall, regs->regs[4], regs->regs[5],
 			    regs->regs[6], regs->regs[7]);
 	return syscall;
 }
diff --git a/arch/openrisc/kernel/ptrace.c b/arch/openrisc/kernel/ptrace.c
index 71a2a0c34c65..4f59fa4e34e5 100644
--- a/arch/openrisc/kernel/ptrace.c
+++ b/arch/openrisc/kernel/ptrace.c
@@ -187,8 +187,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
 		 */
 		ret = -1L;
 
-	audit_syscall_entry(AUDIT_ARCH_OPENRISC, regs->gpr[11],
-			    regs->gpr[3], regs->gpr[4],
+	audit_syscall_entry(regs->gpr[11], regs->gpr[3], regs->gpr[4],
 			    regs->gpr[5], regs->gpr[6]);
 
 	return ret ? : regs->gpr[11];
diff --git a/arch/parisc/kernel/ptrace.c b/arch/parisc/kernel/ptrace.c
index e842ee233db4..74814577e4b8 100644
--- a/arch/parisc/kernel/ptrace.c
+++ b/arch/parisc/kernel/ptrace.c
@@ -276,14 +276,11 @@ long do_syscall_trace_enter(struct pt_regs *regs)
 
 #ifdef CONFIG_64BIT
 	if (!is_compat_task())
-		audit_syscall_entry(AUDIT_ARCH_PARISC64,
-			regs->gr[20],
-			regs->gr[26], regs->gr[25],
-			regs->gr[24], regs->gr[23]);
+		audit_syscall_entry(regs->gr[20], regs->gr[26], regs->gr[25],
+				    regs->gr[24], regs->gr[23]);
 	else
 #endif
-		audit_syscall_entry(AUDIT_ARCH_PARISC,
-			regs->gr[20] & 0xffffffff,
+		audit_syscall_entry(regs->gr[20] & 0xffffffff,
 			regs->gr[26] & 0xffffffff,
 			regs->gr[25] & 0xffffffff,
 			regs->gr[24] & 0xffffffff,
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 2e3d2bf536c5..524a943a33bb 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -1788,14 +1788,11 @@ long do_syscall_trace_enter(struct pt_regs *regs)
 
 #ifdef CONFIG_PPC64
 	if (!is_32bit_task())
-		audit_syscall_entry(AUDIT_ARCH_PPC64,
-				    regs->gpr[0],
-				    regs->gpr[3], regs->gpr[4],
+		audit_syscall_entry(regs->gpr[0], regs->gpr[3], regs->gpr[4],
 				    regs->gpr[5], regs->gpr[6]);
 	else
 #endif
-		audit_syscall_entry(AUDIT_ARCH_PPC,
-				    regs->gpr[0],
+		audit_syscall_entry(regs->gpr[0],
 				    regs->gpr[3] & 0xffffffff,
 				    regs->gpr[4] & 0xffffffff,
 				    regs->gpr[5] & 0xffffffff,
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 5dc7ad9e2fbf..910f253b22bc 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -828,9 +828,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
 		trace_sys_enter(regs, regs->gprs[2]);
 
-	audit_syscall_entry(is_compat_task() ?
-				AUDIT_ARCH_S390 : AUDIT_ARCH_S390X,
-			    regs->gprs[2], regs->orig_gpr2,
+	audit_syscall_entry(regs->gprs[2], regs->orig_gpr2,
 			    regs->gprs[3], regs->gprs[4],
 			    regs->gprs[5]);
 out:
diff --git a/arch/sh/kernel/ptrace_32.c b/arch/sh/kernel/ptrace_32.c
index 668c81631c08..c1a6b89bfe70 100644
--- a/arch/sh/kernel/ptrace_32.c
+++ b/arch/sh/kernel/ptrace_32.c
@@ -484,17 +484,6 @@ long arch_ptrace(struct task_struct *child, long request,
 	return ret;
 }
 
-static inline int audit_arch(void)
-{
-	int arch = EM_SH;
-
-#ifdef CONFIG_CPU_LITTLE_ENDIAN
-	arch |= __AUDIT_ARCH_LE;
-#endif
-
-	return arch;
-}
-
 asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
 {
 	long ret = 0;
@@ -513,8 +502,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
 		trace_sys_enter(regs, regs->regs[0]);
 
-	audit_syscall_entry(audit_arch(), regs->regs[3],
-			    regs->regs[4], regs->regs[5],
+	audit_syscall_entry(regs->regs[3], regs->regs[4], regs->regs[5],
 			    regs->regs[6], regs->regs[7]);
 
 	return ret ?: regs->regs[0];
diff --git a/arch/sh/kernel/ptrace_64.c b/arch/sh/kernel/ptrace_64.c
index af90339dadcd..5cea973a65b2 100644
--- a/arch/sh/kernel/ptrace_64.c
+++ b/arch/sh/kernel/ptrace_64.c
@@ -504,20 +504,6 @@ asmlinkage int sh64_ptrace(long request, long pid,
 	return sys_ptrace(request, pid, addr, data);
 }
 
-static inline int audit_arch(void)
-{
-	int arch = EM_SH;
-
-#ifdef CONFIG_64BIT
-	arch |= __AUDIT_ARCH_64BIT;
-#endif
-#ifdef CONFIG_CPU_LITTLE_ENDIAN
-	arch |= __AUDIT_ARCH_LE;
-#endif
-
-	return arch;
-}
-
 asmlinkage long long do_syscall_trace_enter(struct pt_regs *regs)
 {
 	long long ret = 0;
@@ -536,8 +522,7 @@ asmlinkage long long do_syscall_trace_enter(struct pt_regs *regs)
 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
 		trace_sys_enter(regs, regs->regs[9]);
 
-	audit_syscall_entry(audit_arch(), regs->regs[1],
-			    regs->regs[2], regs->regs[3],
+	audit_syscall_entry(regs->regs[1], regs->regs[2], regs->regs[3],
 			    regs->regs[4], regs->regs[5]);
 
 	return ret ?: regs->regs[9];
diff --git a/arch/sparc/kernel/ptrace_64.c b/arch/sparc/kernel/ptrace_64.c
index c13c9f25d83a..9ddc4928a089 100644
--- a/arch/sparc/kernel/ptrace_64.c
+++ b/arch/sparc/kernel/ptrace_64.c
@@ -1076,13 +1076,8 @@ asmlinkage int syscall_trace_enter(struct pt_regs *regs)
 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
 		trace_sys_enter(regs, regs->u_regs[UREG_G1]);
 
-	audit_syscall_entry((test_thread_flag(TIF_32BIT) ?
-			     AUDIT_ARCH_SPARC :
-			     AUDIT_ARCH_SPARC64),
-			    regs->u_regs[UREG_G1],
-			    regs->u_regs[UREG_I0],
-			    regs->u_regs[UREG_I1],
-			    regs->u_regs[UREG_I2],
+	audit_syscall_entry(regs->u_regs[UREG_G1], regs->u_regs[UREG_I0],
+			    regs->u_regs[UREG_I1], regs->u_regs[UREG_I2],
 			    regs->u_regs[UREG_I3]);
 
 	return ret;
diff --git a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c
index 694d551c8899..62435ef003d9 100644
--- a/arch/um/kernel/ptrace.c
+++ b/arch/um/kernel/ptrace.c
@@ -165,8 +165,7 @@ static void send_sigtrap(struct task_struct *tsk, struct uml_pt_regs *regs,
  */
 void syscall_trace_enter(struct pt_regs *regs)
 {
-	audit_syscall_entry(HOST_AUDIT_ARCH,
-			    UPT_SYSCALL_NR(&regs->regs),
+	audit_syscall_entry(UPT_SYSCALL_NR(&regs->regs),
 			    UPT_SYSCALL_ARG1(&regs->regs),
 			    UPT_SYSCALL_ARG2(&regs->regs),
 			    UPT_SYSCALL_ARG3(&regs->regs),
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 678c0ada3b3c..eb1c87f0b03b 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1488,15 +1488,11 @@ long syscall_trace_enter(struct pt_regs *regs)
 		trace_sys_enter(regs, regs->orig_ax);
 
 	if (IS_IA32)
-		audit_syscall_entry(AUDIT_ARCH_I386,
-				    regs->orig_ax,
-				    regs->bx, regs->cx,
+		audit_syscall_entry(regs->orig_ax, regs->bx, regs->cx,
 				    regs->dx, regs->si);
 #ifdef CONFIG_X86_64
 	else
-		audit_syscall_entry(AUDIT_ARCH_X86_64,
-				    regs->orig_ax,
-				    regs->di, regs->si,
+		audit_syscall_entry(regs->orig_ax, regs->di, regs->si,
 				    regs->dx, regs->r10);
 #endif
 
diff --git a/arch/x86/um/asm/ptrace.h b/arch/x86/um/asm/ptrace.h
index 54f8102ccde5..e59eef20647b 100644
--- a/arch/x86/um/asm/ptrace.h
+++ b/arch/x86/um/asm/ptrace.h
@@ -47,8 +47,6 @@ struct user_desc;
 
 #ifdef CONFIG_X86_32
 
-#define HOST_AUDIT_ARCH AUDIT_ARCH_I386
-
 extern int ptrace_get_thread_area(struct task_struct *child, int idx,
                                   struct user_desc __user *user_desc);
 
@@ -57,8 +55,6 @@ extern int ptrace_set_thread_area(struct task_struct *child, int idx,
 
 #else
 
-#define HOST_AUDIT_ARCH AUDIT_ARCH_X86_64
-
 #define PT_REGS_R8(r) UPT_R8(&(r)->regs)
 #define PT_REGS_R9(r) UPT_R9(&(r)->regs)
 #define PT_REGS_R10(r) UPT_R10(&(r)->regs)
diff --git a/arch/xtensa/kernel/ptrace.c b/arch/xtensa/kernel/ptrace.c
index 562fac664751..4d54b481123b 100644
--- a/arch/xtensa/kernel/ptrace.c
+++ b/arch/xtensa/kernel/ptrace.c
@@ -342,7 +342,7 @@ void do_syscall_trace_enter(struct pt_regs *regs)
 		do_syscall_trace();
 
 #if 0
-	audit_syscall_entry(current, AUDIT_ARCH_XTENSA..);
+	audit_syscall_entry(...);
 #endif
 }
 
diff --git a/include/linux/audit.h b/include/linux/audit.h
index 22cfddb75566..bb1c3ab611bf 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -26,6 +26,7 @@
 #include <linux/sched.h>
 #include <linux/ptrace.h>
 #include <uapi/linux/audit.h>
+#include <asm/syscall.h>
 
 struct audit_sig_info {
 	uid_t		uid;
@@ -141,12 +142,12 @@ static inline void audit_free(struct task_struct *task)
 	if (unlikely(task->audit_context))
 		__audit_free(task);
 }
-static inline void audit_syscall_entry(int arch, int major, unsigned long a0,
+static inline void audit_syscall_entry(int major, unsigned long a0,
 				       unsigned long a1, unsigned long a2,
 				       unsigned long a3)
 {
 	if (unlikely(current->audit_context))
-		__audit_syscall_entry(arch, major, a0, a1, a2, a3);
+		__audit_syscall_entry(syscall_get_arch(), major, a0, a1, a2, a3);
 }
 static inline void audit_syscall_exit(void *pt_regs)
 {
@@ -322,7 +323,7 @@ static inline int audit_alloc(struct task_struct *task)
 }
 static inline void audit_free(struct task_struct *task)
 { }
-static inline void audit_syscall_entry(int arch, int major, unsigned long a0,
+static inline void audit_syscall_entry(int major, unsigned long a0,
 				       unsigned long a1, unsigned long a2,
 				       unsigned long a3)
 { }
-- 
cgit v1.2.3


From a9ebe0b98896b276a3a1664da5f40d3b7c92f316 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 22 Apr 2014 11:46:16 -0400
Subject: audit: fix build error when asm/syscall.h does not exist

avr32 does not have an asm/syscall.h file.  We need the
syscall_get_arch() definition from that file for all arch's which
support CONFIG_AUDITSYSCALL.  Obviously avr32 is not one of those
arch's.  Move the include inside the CONFIG_AUDITSYSCALL such that we
only do the include if we need the results.

When the syscall_get_arch() call is moved inside __audit_syscall_entry()
this include can be dropped entirely.  But that is going to require some
assembly changes on x86* in a patch that is not ready for the tree...

Reported-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/audit.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index bb1c3ab611bf..783157b289e8 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -26,7 +26,6 @@
 #include <linux/sched.h>
 #include <linux/ptrace.h>
 #include <uapi/linux/audit.h>
-#include <asm/syscall.h>
 
 struct audit_sig_info {
 	uid_t		uid;
@@ -110,6 +109,8 @@ extern void audit_log_session_info(struct audit_buffer *ab);
 #endif
 
 #ifdef CONFIG_AUDITSYSCALL
+#include <asm/syscall.h> /* for syscall_get_arch() */
+
 /* These are defined in auditsc.c */
 				/* Public API */
 extern int  audit_alloc(struct task_struct *task);
-- 
cgit v1.2.3


From b4f0d3755c5e9cc86292d5fd78261903b4f23d4a Mon Sep 17 00:00:00 2001
From: Richard Guy Briggs <rgb@redhat.com>
Date: Tue, 4 Mar 2014 10:38:06 -0500
Subject: audit: x86: drop arch from __audit_syscall_entry() interface

Since the arch is found locally in __audit_syscall_entry(), there is no need to
pass it in as a parameter.  Delete it from the parameter list.

x86* was the only arch to call __audit_syscall_entry() directly and did so from
assembly code.

Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: x86@kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: linux-audit@redhat.com
Signed-off-by: Eric Paris <eparis@redhat.com>

---

As this patch relies on changes in the audit tree, I think it
appropriate to send it through my tree rather than the x86 tree.
---
 arch/x86/ia32/ia32entry.S  | 12 ++++++------
 arch/x86/kernel/entry_32.S | 11 +++++------
 arch/x86/kernel/entry_64.S | 11 +++++------
 include/linux/audit.h      |  5 ++---
 kernel/auditsc.c           |  6 ++----
 5 files changed, 20 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 4299eb05023c..f5bdd2881815 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -186,12 +186,12 @@ sysexit_from_sys_call:
 
 #ifdef CONFIG_AUDITSYSCALL
 	.macro auditsys_entry_common
-	movl %esi,%r9d			/* 6th arg: 4th syscall arg */
-	movl %edx,%r8d			/* 5th arg: 3rd syscall arg */
-	/* (already in %ecx)		   4th arg: 2nd syscall arg */
-	movl %ebx,%edx			/* 3rd arg: 1st syscall arg */
-	movl %eax,%esi			/* 2nd arg: syscall number */
-	movl $AUDIT_ARCH_I386,%edi	/* 1st arg: audit arch */
+	movl %esi,%r8d			/* 5th arg: 4th syscall arg */
+	movl %ecx,%r9d			/*swap with edx*/
+	movl %edx,%ecx			/* 4th arg: 3rd syscall arg */
+	movl %r9d,%edx			/* 3rd arg: 2nd syscall arg */
+	movl %ebx,%esi			/* 2nd arg: 1st syscall arg */
+	movl %eax,%edi			/* 1st arg: syscall number */
 	call __audit_syscall_entry
 	movl RAX-ARGOFFSET(%rsp),%eax	/* reload syscall number */
 	cmpq $(IA32_NR_syscalls-1),%rax
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 0d0c9d4ab6d5..f9e3fabc8716 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -449,12 +449,11 @@ sysenter_audit:
 	jnz syscall_trace_entry
 	addl $4,%esp
 	CFI_ADJUST_CFA_OFFSET -4
-	/* %esi already in 8(%esp)	   6th arg: 4th syscall arg */
-	/* %edx already in 4(%esp)	   5th arg: 3rd syscall arg */
-	/* %ecx already in 0(%esp)	   4th arg: 2nd syscall arg */
-	movl %ebx,%ecx			/* 3rd arg: 1st syscall arg */
-	movl %eax,%edx			/* 2nd arg: syscall number */
-	movl $AUDIT_ARCH_I386,%eax	/* 1st arg: audit arch */
+	movl %esi,4(%esp)		/* 5th arg: 4th syscall arg */
+	movl %edx,(%esp)		/* 4th arg: 3rd syscall arg */
+	/* %ecx already in %ecx		   3rd arg: 2nd syscall arg */
+	movl %ebx,%edx			/* 2nd arg: 1st syscall arg */
+	/* %eax already in %eax		   1st arg: syscall number */
 	call __audit_syscall_entry
 	pushl_cfi %ebx
 	movl PT_EAX(%esp),%eax		/* reload syscall number */
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index c844f0816ab8..5e8cb2ad9fb3 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -488,12 +488,11 @@ badsys:
 	 * jump back to the normal fast path.
 	 */
 auditsys:
-	movq %r10,%r9			/* 6th arg: 4th syscall arg */
-	movq %rdx,%r8			/* 5th arg: 3rd syscall arg */
-	movq %rsi,%rcx			/* 4th arg: 2nd syscall arg */
-	movq %rdi,%rdx			/* 3rd arg: 1st syscall arg */
-	movq %rax,%rsi			/* 2nd arg: syscall number */
-	movl $AUDIT_ARCH_X86_64,%edi	/* 1st arg: audit arch */
+	movq %r10,%r8			/* 5th arg: 4th syscall arg */
+	movq %rdx,%rcx			/* 4th arg: 3rd syscall arg */
+	movq %rsi,%rdx			/* 3rd arg: 2nd syscall arg */
+	movq %rdi,%rsi			/* 2nd arg: 1st syscall arg */
+	movq %rax,%rdi			/* 1st arg: syscall number */
 	call __audit_syscall_entry
 	LOAD_ARGS 0		/* reload call-clobbered registers */
 	jmp system_call_fastpath
diff --git a/include/linux/audit.h b/include/linux/audit.h
index 783157b289e8..1ae00891aff9 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -115,8 +115,7 @@ extern void audit_log_session_info(struct audit_buffer *ab);
 				/* Public API */
 extern int  audit_alloc(struct task_struct *task);
 extern void __audit_free(struct task_struct *task);
-extern void __audit_syscall_entry(int arch,
-				  int major, unsigned long a0, unsigned long a1,
+extern void __audit_syscall_entry(int major, unsigned long a0, unsigned long a1,
 				  unsigned long a2, unsigned long a3);
 extern void __audit_syscall_exit(int ret_success, long ret_value);
 extern struct filename *__audit_reusename(const __user char *uptr);
@@ -148,7 +147,7 @@ static inline void audit_syscall_entry(int major, unsigned long a0,
 				       unsigned long a3)
 {
 	if (unlikely(current->audit_context))
-		__audit_syscall_entry(syscall_get_arch(), major, a0, a1, a2, a3);
+		__audit_syscall_entry(major, a0, a1, a2, a3);
 }
 static inline void audit_syscall_exit(void *pt_regs)
 {
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 9f03ac205e1f..4e17443fd1ef 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1506,7 +1506,6 @@ void __audit_free(struct task_struct *tsk)
 
 /**
  * audit_syscall_entry - fill in an audit record at syscall entry
- * @arch: architecture type
  * @major: major syscall type (function)
  * @a1: additional syscall register 1
  * @a2: additional syscall register 2
@@ -1521,9 +1520,8 @@ void __audit_free(struct task_struct *tsk)
  * will only be written if another part of the kernel requests that it
  * be written).
  */
-void __audit_syscall_entry(int arch, int major,
-			 unsigned long a1, unsigned long a2,
-			 unsigned long a3, unsigned long a4)
+void __audit_syscall_entry(int major, unsigned long a1, unsigned long a2,
+			   unsigned long a3, unsigned long a4)
 {
 	struct task_struct *tsk = current;
 	struct audit_context *context = tsk->audit_context;
-- 
cgit v1.2.3


From 219ca39427bf6c46c4e1473493e33bc00635e99b Mon Sep 17 00:00:00 2001
From: Richard Guy Briggs <rgb@redhat.com>
Date: Wed, 26 Mar 2014 07:26:47 -0400
Subject: audit: use union for audit_field values since they are mutually
 exclusive

Since only one of val, uid, gid and lsm* are used at any given time, combine
them to reduce the size of the struct audit_field.

Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
---
 include/linux/audit.h | 14 +++++++++-----
 kernel/auditfilter.c  | 29 ++++++++++++++++++++---------
 2 files changed, 29 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 1ae00891aff9..36dffeccebdb 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -66,12 +66,16 @@ struct audit_krule {
 
 struct audit_field {
 	u32				type;
-	u32				val;
-	kuid_t				uid;
-	kgid_t				gid;
+	union {
+		u32			val;
+		kuid_t			uid;
+		kgid_t			gid;
+		struct {
+			char		*lsm_str;
+			void		*lsm_rule;
+		};
+	};
 	u32				op;
-	char				*lsm_str;
-	void				*lsm_rule;
 };
 
 extern int is_audit_feature_set(int which);
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index b65a138250b8..40ed9813d4b2 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -71,6 +71,24 @@ static struct list_head audit_rules_list[AUDIT_NR_FILTERS] = {
 
 DEFINE_MUTEX(audit_filter_mutex);
 
+static void audit_free_lsm_field(struct audit_field *f)
+{
+	switch (f->type) {
+	case AUDIT_SUBJ_USER:
+	case AUDIT_SUBJ_ROLE:
+	case AUDIT_SUBJ_TYPE:
+	case AUDIT_SUBJ_SEN:
+	case AUDIT_SUBJ_CLR:
+	case AUDIT_OBJ_USER:
+	case AUDIT_OBJ_ROLE:
+	case AUDIT_OBJ_TYPE:
+	case AUDIT_OBJ_LEV_LOW:
+	case AUDIT_OBJ_LEV_HIGH:
+		kfree(f->lsm_str);
+		security_audit_rule_free(f->lsm_rule);
+	}
+}
+
 static inline void audit_free_rule(struct audit_entry *e)
 {
 	int i;
@@ -80,11 +98,8 @@ static inline void audit_free_rule(struct audit_entry *e)
 	if (erule->watch)
 		audit_put_watch(erule->watch);
 	if (erule->fields)
-		for (i = 0; i < erule->field_count; i++) {
-			struct audit_field *f = &erule->fields[i];
-			kfree(f->lsm_str);
-			security_audit_rule_free(f->lsm_rule);
-		}
+		for (i = 0; i < erule->field_count; i++)
+			audit_free_lsm_field(&erule->fields[i]);
 	kfree(erule->fields);
 	kfree(erule->filterkey);
 	kfree(e);
@@ -422,10 +437,6 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
 
 		f->type = data->fields[i];
 		f->val = data->values[i];
-		f->uid = INVALID_UID;
-		f->gid = INVALID_GID;
-		f->lsm_str = NULL;
-		f->lsm_rule = NULL;
 
 		/* Support legacy tests for a valid loginuid */
 		if ((f->type == AUDIT_LOGINUID) && (f->val == AUDIT_UID_UNSET)) {
-- 
cgit v1.2.3


From 9a37110d20c95d1ebf6c04881177fe8f62831db2 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Wed, 10 Sep 2014 14:31:39 -0400
Subject: locking: Add WARN_ON_ONCE lock assertion

An interface may need to assert a lock invariant and not flood the
system logs; add a lockdep helper macro equivalent to
lockdep_assert_held() which only WARNs once.

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/lockdep.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 008388f920d7..64c7425afbce 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -362,6 +362,10 @@ extern void lockdep_trace_alloc(gfp_t mask);
 		WARN_ON(debug_locks && !lockdep_is_held(l));	\
 	} while (0)
 
+#define lockdep_assert_held_once(l)	do {				\
+		WARN_ON_ONCE(debug_locks && !lockdep_is_held(l));	\
+	} while (0)
+
 #define lockdep_recursing(tsk)	((tsk)->lockdep_recursion)
 
 #else /* !CONFIG_LOCKDEP */
@@ -412,6 +416,7 @@ struct lock_class_key { };
 #define lockdep_depth(tsk)	(0)
 
 #define lockdep_assert_held(l)			do { (void)(l); } while (0)
+#define lockdep_assert_held_once(l)		do { (void)(l); } while (0)
 
 #define lockdep_recursing(tsk)			(0)
 
-- 
cgit v1.2.3


From 2a159389bf5d962359349a76827b2f683276a1c7 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 25 Aug 2014 17:51:26 +0200
Subject: USB: core: add device-qualifier quirk
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add new quirk for devices that cannot handle requests for the
device_qualifier descriptor.

A USB-2.0 compliant device must respond to requests for the
device_qualifier descriptor (even if it's with a request error), but at
least one device is known to misbehave after such a request.

Suggested-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hub.c     | 3 +++
 include/linux/usb/quirks.h | 3 +++
 2 files changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index d481c99a20d7..c01b72467524 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -4538,6 +4538,9 @@ check_highspeed (struct usb_hub *hub, struct usb_device *udev, int port1)
 	struct usb_qualifier_descriptor	*qual;
 	int				status;
 
+	if (udev->quirks & USB_QUIRK_DEVICE_QUALIFIER)
+		return;
+
 	qual = kmalloc (sizeof *qual, GFP_KERNEL);
 	if (qual == NULL)
 		return;
diff --git a/include/linux/usb/quirks.h b/include/linux/usb/quirks.h
index 55a17b188daa..ffe565c94743 100644
--- a/include/linux/usb/quirks.h
+++ b/include/linux/usb/quirks.h
@@ -41,4 +41,7 @@
  */
 #define USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL	0x00000080
 
+/* device can't handle device_qualifier descriptor requests */
+#define USB_QUIRK_DEVICE_QUALIFIER	0x00000100
+
 #endif /* __LINUX_USB_QUIRKS_H */
-- 
cgit v1.2.3


From 299245a145b2ad4cfb4c5432eb1264299f55e7e0 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Wed, 10 Sep 2014 15:06:24 -0400
Subject: serial: core: Privatize modem status enable flags

The serial core uses the tty port flags, ASYNC_CTS_FLOW and
ASYNC_CD_CHECK, to track whether CTS and DCD changes should be
ignored or handled. However, the tty port flags are not safe for
atomic bit operations and no lock provides serialized updates.

Introduce the struct uart_port status field to track CTS and DCD
enable states, and serialize access with uart port lock. Substitute
uart_cts_enabled() helper for tty_port_cts_enabled().

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/mxs-auart.c   |  2 +-
 drivers/tty/serial/serial_core.c | 29 +++++++++++++++++------------
 include/linux/serial_core.h      | 12 ++++++++++++
 3 files changed, 30 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/mxs-auart.c b/drivers/tty/serial/mxs-auart.c
index b5c329248c81..10c29334fe2f 100644
--- a/drivers/tty/serial/mxs-auart.c
+++ b/drivers/tty/serial/mxs-auart.c
@@ -408,7 +408,7 @@ static void mxs_auart_set_mctrl(struct uart_port *u, unsigned mctrl)
 
 	ctrl &= ~(AUART_CTRL2_RTSEN | AUART_CTRL2_RTS);
 	if (mctrl & TIOCM_RTS) {
-		if (tty_port_cts_enabled(&u->state->port))
+		if (uart_cts_enabled(u))
 			ctrl |= AUART_CTRL2_RTSEN;
 		else
 			ctrl |= AUART_CTRL2_RTS;
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index f764de32b658..dd21ed900635 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -59,6 +59,11 @@ static void uart_change_pm(struct uart_state *state,
 
 static void uart_port_shutdown(struct tty_port *port);
 
+static int uart_dcd_enabled(struct uart_port *uport)
+{
+	return uport->status & UPSTAT_DCD_ENABLE;
+}
+
 /*
  * This routine is used by the interrupt handler to schedule processing in
  * the software interrupt portion of the driver.
@@ -130,7 +135,6 @@ static int uart_port_startup(struct tty_struct *tty, struct uart_state *state,
 		int init_hw)
 {
 	struct uart_port *uport = state->uart_port;
-	struct tty_port *port = &state->port;
 	unsigned long page;
 	int retval = 0;
 
@@ -176,12 +180,12 @@ static int uart_port_startup(struct tty_struct *tty, struct uart_state *state,
 				uart_set_mctrl(uport, TIOCM_RTS | TIOCM_DTR);
 		}
 
-		if (tty_port_cts_enabled(port)) {
-			spin_lock_irq(&uport->lock);
+		spin_lock_irq(&uport->lock);
+		if (uart_cts_enabled(uport)) {
 			if (!(uport->ops->get_mctrl(uport) & TIOCM_CTS))
 				tty->hw_stopped = 1;
-			spin_unlock_irq(&uport->lock);
 		}
+		spin_unlock_irq(&uport->lock);
 	}
 
 	/*
@@ -435,7 +439,6 @@ EXPORT_SYMBOL(uart_get_divisor);
 static void uart_change_speed(struct tty_struct *tty, struct uart_state *state,
 					struct ktermios *old_termios)
 {
-	struct tty_port *port = &state->port;
 	struct uart_port *uport = state->uart_port;
 	struct ktermios *termios;
 
@@ -450,17 +453,19 @@ static void uart_change_speed(struct tty_struct *tty, struct uart_state *state,
 	uport->ops->set_termios(uport, termios, old_termios);
 
 	/*
-	 * Set flags based on termios cflag
+	 * Set modem status enables based on termios cflag
 	 */
+	spin_lock_irq(&uport->lock);
 	if (termios->c_cflag & CRTSCTS)
-		set_bit(ASYNCB_CTS_FLOW, &port->flags);
+		uport->status |= UPSTAT_CTS_ENABLE;
 	else
-		clear_bit(ASYNCB_CTS_FLOW, &port->flags);
+		uport->status &= ~UPSTAT_CTS_ENABLE;
 
 	if (termios->c_cflag & CLOCAL)
-		clear_bit(ASYNCB_CHECK_CD, &port->flags);
+		uport->status &= ~UPSTAT_DCD_ENABLE;
 	else
-		set_bit(ASYNCB_CHECK_CD, &port->flags);
+		uport->status |= UPSTAT_DCD_ENABLE;
+	spin_unlock_irq(&uport->lock);
 }
 
 static inline int __uart_put_char(struct uart_port *port,
@@ -2765,7 +2770,7 @@ void uart_handle_dcd_change(struct uart_port *uport, unsigned int status)
 
 	uport->icount.dcd++;
 
-	if (port->flags & ASYNC_CHECK_CD) {
+	if (uart_dcd_enabled(uport)) {
 		if (status)
 			wake_up_interruptible(&port->open_wait);
 		else if (tty)
@@ -2790,7 +2795,7 @@ void uart_handle_cts_change(struct uart_port *uport, unsigned int status)
 
 	uport->icount.cts++;
 
-	if (tty_port_cts_enabled(port)) {
+	if (uart_cts_enabled(uport)) {
 		if (tty->hw_stopped) {
 			if (status) {
 				tty->hw_stopped = 0;
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 3bd7d55eebce..452c9cc9d717 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -112,6 +112,7 @@ struct uart_icount {
 };
 
 typedef unsigned int __bitwise__ upf_t;
+typedef unsigned int __bitwise__ upstat_t;
 
 struct uart_port {
 	spinlock_t		lock;			/* port lock */
@@ -190,6 +191,12 @@ struct uart_port {
 #define UPF_CHANGE_MASK		((__force upf_t) (0x17fff))
 #define UPF_USR_MASK		((__force upf_t) (UPF_SPD_MASK|UPF_LOW_LATENCY))
 
+	/* status must be updated while holding port lock */
+	upstat_t		status;
+
+#define UPSTAT_CTS_ENABLE	((__force upstat_t) (1 << 0))
+#define UPSTAT_DCD_ENABLE	((__force upstat_t) (1 << 1))
+
 	unsigned int		mctrl;			/* current modem ctrl settings */
 	unsigned int		timeout;		/* character-based timeout */
 	unsigned int		type;			/* port type */
@@ -355,6 +362,11 @@ static inline int uart_tx_stopped(struct uart_port *port)
 	return 0;
 }
 
+static inline bool uart_cts_enabled(struct uart_port *uport)
+{
+	return uport->status & UPSTAT_CTS_ENABLE;
+}
+
 /*
  * The following are helper functions for the low level drivers.
  */
-- 
cgit v1.2.3


From d01f4d181c92877ecc678adce248a30cb7077ff1 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Wed, 10 Sep 2014 15:06:26 -0400
Subject: serial: core: Privatize tty->hw_stopped

tty->hw_stopped is not used by the tty core and is thread-unsafe;
hw_stopped is a member of a bitfield whose fields are updated
non-atomically and no lock is suitable for serializing updates.

Replace serial core usage of tty->hw_stopped with uport->hw_stopped.
Use int storage which works around Alpha EV4/5 non-atomic byte storage,
since uart_port uses different locks to protect certain fields within the
structure.

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/bfin_uart.c   | 14 +++++++-------
 drivers/tty/serial/serial_core.c | 28 +++++++++++++---------------
 include/linux/serial_core.h      |  3 ++-
 3 files changed, 22 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/bfin_uart.c b/drivers/tty/serial/bfin_uart.c
index dec0fd725d80..fc9fbf3f24a7 100644
--- a/drivers/tty/serial/bfin_uart.c
+++ b/drivers/tty/serial/bfin_uart.c
@@ -108,22 +108,22 @@ static void bfin_serial_set_mctrl(struct uart_port *port, unsigned int mctrl)
 static irqreturn_t bfin_serial_mctrl_cts_int(int irq, void *dev_id)
 {
 	struct bfin_serial_port *uart = dev_id;
-	unsigned int status = bfin_serial_get_mctrl(&uart->port);
+	struct uart_port *uport = &uart->port;
+	unsigned int status = bfin_serial_get_mctrl(uport);
 #ifdef CONFIG_SERIAL_BFIN_HARD_CTSRTS
-	struct tty_struct *tty = uart->port.state->port.tty;
 
 	UART_CLEAR_SCTS(uart);
-	if (tty->hw_stopped) {
+	if (uport->hw_stopped) {
 		if (status) {
-			tty->hw_stopped = 0;
-			uart_write_wakeup(&uart->port);
+			uport->hw_stopped = 0;
+			uart_write_wakeup(uport);
 		}
 	} else {
 		if (!status)
-			tty->hw_stopped = 1;
+			uport->hw_stopped = 1;
 	}
 #endif
-	uart_handle_cts_change(&uart->port, status & TIOCM_CTS);
+	uart_handle_cts_change(uport, status & TIOCM_CTS);
 
 	return IRQ_HANDLED;
 }
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index dd21ed900635..7d51e26627fb 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -95,7 +95,7 @@ static void __uart_start(struct tty_struct *tty)
 	struct uart_state *state = tty->driver_data;
 	struct uart_port *port = state->uart_port;
 
-	if (!tty->stopped && !tty->hw_stopped)
+	if (!uart_tx_stopped(port))
 		port->ops->start_tx(port);
 }
 
@@ -181,10 +181,11 @@ static int uart_port_startup(struct tty_struct *tty, struct uart_state *state,
 		}
 
 		spin_lock_irq(&uport->lock);
-		if (uart_cts_enabled(uport)) {
-			if (!(uport->ops->get_mctrl(uport) & TIOCM_CTS))
-				tty->hw_stopped = 1;
-		}
+		if (uart_cts_enabled(uport) &&
+		    !(uport->ops->get_mctrl(uport) & TIOCM_CTS))
+			uport->hw_stopped = 1;
+		else
+			uport->hw_stopped = 0;
 		spin_unlock_irq(&uport->lock);
 	}
 
@@ -949,7 +950,7 @@ static int uart_get_lsr_info(struct tty_struct *tty,
 	 */
 	if (uport->x_char ||
 	    ((uart_circ_chars_pending(&state->xmit) > 0) &&
-	     !tty->stopped && !tty->hw_stopped))
+	     !uart_tx_stopped(uport)))
 		result &= ~TIOCSER_TEMT;
 
 	return put_user(result, value);
@@ -1295,7 +1296,7 @@ static void uart_set_termios(struct tty_struct *tty,
 
 	/*
 	 * If the port is doing h/w assisted flow control, do nothing.
-	 * We assume that tty->hw_stopped has never been set.
+	 * We assume that port->hw_stopped has never been set.
 	 */
 	if (uport->flags & UPF_HARD_FLOW)
 		return;
@@ -1303,7 +1304,7 @@ static void uart_set_termios(struct tty_struct *tty,
 	/* Handle turning off CRTSCTS */
 	if ((old_termios->c_cflag & CRTSCTS) && !(cflag & CRTSCTS)) {
 		spin_lock_irqsave(&uport->lock, flags);
-		tty->hw_stopped = 0;
+		uport->hw_stopped = 0;
 		__uart_start(tty);
 		spin_unlock_irqrestore(&uport->lock, flags);
 	}
@@ -1311,7 +1312,7 @@ static void uart_set_termios(struct tty_struct *tty,
 	else if (!(old_termios->c_cflag & CRTSCTS) && (cflag & CRTSCTS)) {
 		spin_lock_irqsave(&uport->lock, flags);
 		if (!(uport->ops->get_mctrl(uport) & TIOCM_CTS)) {
-			tty->hw_stopped = 1;
+			uport->hw_stopped = 1;
 			uport->ops->stop_tx(uport);
 		}
 		spin_unlock_irqrestore(&uport->lock, flags);
@@ -2788,23 +2789,20 @@ EXPORT_SYMBOL_GPL(uart_handle_dcd_change);
  */
 void uart_handle_cts_change(struct uart_port *uport, unsigned int status)
 {
-	struct tty_port *port = &uport->state->port;
-	struct tty_struct *tty = port->tty;
-
 	lockdep_assert_held_once(&uport->lock);
 
 	uport->icount.cts++;
 
 	if (uart_cts_enabled(uport)) {
-		if (tty->hw_stopped) {
+		if (uport->hw_stopped) {
 			if (status) {
-				tty->hw_stopped = 0;
+				uport->hw_stopped = 0;
 				uport->ops->start_tx(uport);
 				uart_write_wakeup(uport);
 			}
 		} else {
 			if (!status) {
-				tty->hw_stopped = 1;
+				uport->hw_stopped = 1;
 				uport->ops->stop_tx(uport);
 			}
 		}
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 452c9cc9d717..204c452a7567 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -197,6 +197,7 @@ struct uart_port {
 #define UPSTAT_CTS_ENABLE	((__force upstat_t) (1 << 0))
 #define UPSTAT_DCD_ENABLE	((__force upstat_t) (1 << 1))
 
+	int			hw_stopped;		/* sw-assisted CTS flow state */
 	unsigned int		mctrl;			/* current modem ctrl settings */
 	unsigned int		timeout;		/* character-based timeout */
 	unsigned int		type;			/* port type */
@@ -357,7 +358,7 @@ int uart_resume_port(struct uart_driver *reg, struct uart_port *port);
 static inline int uart_tx_stopped(struct uart_port *port)
 {
 	struct tty_struct *tty = port->state->port.tty;
-	if(tty->stopped || tty->hw_stopped)
+	if (tty->stopped || port->hw_stopped)
 		return 1;
 	return 0;
 }
-- 
cgit v1.2.3


From d7a855bd6ab25d10d5e3b6aeb53d9c57fa17b808 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Wed, 10 Sep 2014 15:06:30 -0400
Subject: tty: Convert tty_struct bitfield to ints

The stopped, hw_stopped, flow_stopped and packet bits are smp-unsafe
and interrupt-unsafe. For example,

CPU 0                         | CPU 1
                              |
tty->flow_stopped = 1         | tty->hw_stopped = 0

One of these updates will be corrupted, as the bitwise operation
on the bitfield is non-atomic.

Ensure each flag has a separate memory location, so concurrent
updates do not corrupt orthogonal states. Because DEC Alpha EV4 and EV5
cpus (from 1995) perform RMW on smaller-than-machine-word storage,
"separate memory location" must be int instead of byte.

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/tty.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/tty.h b/include/linux/tty.h
index 84132942902a..4cfd4a82fc31 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -261,7 +261,10 @@ struct tty_struct {
 	unsigned long flags;
 	int count;
 	struct winsize winsize;		/* winsize_mutex */
-	unsigned char stopped:1, hw_stopped:1, flow_stopped:1, packet:1;
+	int stopped;
+	int flow_stopped;
+	int hw_stopped;
+	int packet;
 	unsigned char ctrl_status;	/* ctrl_lock */
 	unsigned int receive_room;	/* Bytes free for queue */
 	int flow_change;
-- 
cgit v1.2.3


From f9e053dcfc02b0ad29daec8524fb1afe09774976 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Wed, 10 Sep 2014 15:06:31 -0400
Subject: tty: Serialize tty flow control changes with flow_lock

Without serialization, the flow control state can become inverted
wrt. the actual hardware state. For example,

CPU 0                          | CPU 1
stop_tty()                     |
  lock ctrl_lock               |
  tty->stopped = 1             |
  unlock ctrl_lock             |
                               | start_tty()
                               |   lock ctrl_lock
                               |   tty->stopped = 0
                               |   unlock ctrl_lock
                               |   driver->start()
  driver->stop()               |

In this case, the flow control state now indicates the tty has
been started, but the actual hardware state has actually been stopped.

Introduce tty->flow_lock spinlock to serialize tty flow control changes.
Split out unlocked __start_tty()/__stop_tty() flavors for use by
ioctl(TCXONC) in follow-on patch.

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/tty_io.c       | 39 ++++++++++++++++++++++++++++-----------
 include/linux/tty.h        |  5 ++++-
 include/linux/tty_driver.h |  4 ++++
 3 files changed, 36 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index d4eb2a8b7047..b8ddfef6b5d8 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -919,18 +919,18 @@ void no_tty(void)
  *	but not always.
  *
  *	Locking:
- *		Uses the tty control lock internally
+ *		ctrl_lock
+ *		flow_lock
  */
 
-void stop_tty(struct tty_struct *tty)
+void __stop_tty(struct tty_struct *tty)
 {
 	unsigned long flags;
-	spin_lock_irqsave(&tty->ctrl_lock, flags);
-	if (tty->stopped) {
-		spin_unlock_irqrestore(&tty->ctrl_lock, flags);
+
+	if (tty->stopped)
 		return;
-	}
 	tty->stopped = 1;
+	spin_lock_irqsave(&tty->ctrl_lock, flags);
 	if (tty->link && tty->link->packet) {
 		tty->ctrl_status &= ~TIOCPKT_START;
 		tty->ctrl_status |= TIOCPKT_STOP;
@@ -941,6 +941,14 @@ void stop_tty(struct tty_struct *tty)
 		(tty->ops->stop)(tty);
 }
 
+void stop_tty(struct tty_struct *tty)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&tty->flow_lock, flags);
+	__stop_tty(tty);
+	spin_unlock_irqrestore(&tty->flow_lock, flags);
+}
 EXPORT_SYMBOL(stop_tty);
 
 /**
@@ -954,17 +962,17 @@ EXPORT_SYMBOL(stop_tty);
  *
  *	Locking:
  *		ctrl_lock
+ *		flow_lock
  */
 
-void start_tty(struct tty_struct *tty)
+void __start_tty(struct tty_struct *tty)
 {
 	unsigned long flags;
-	spin_lock_irqsave(&tty->ctrl_lock, flags);
-	if (!tty->stopped || tty->flow_stopped) {
-		spin_unlock_irqrestore(&tty->ctrl_lock, flags);
+
+	if (!tty->stopped || tty->flow_stopped)
 		return;
-	}
 	tty->stopped = 0;
+	spin_lock_irqsave(&tty->ctrl_lock, flags);
 	if (tty->link && tty->link->packet) {
 		tty->ctrl_status &= ~TIOCPKT_STOP;
 		tty->ctrl_status |= TIOCPKT_START;
@@ -977,6 +985,14 @@ void start_tty(struct tty_struct *tty)
 	tty_wakeup(tty);
 }
 
+void start_tty(struct tty_struct *tty)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&tty->flow_lock, flags);
+	__start_tty(tty);
+	spin_unlock_irqrestore(&tty->flow_lock, flags);
+}
 EXPORT_SYMBOL(start_tty);
 
 /* We limit tty time update visibility to every 8 seconds or so. */
@@ -3019,6 +3035,7 @@ struct tty_struct *alloc_tty_struct(struct tty_driver *driver, int idx)
 	INIT_WORK(&tty->hangup_work, do_tty_hangup);
 	mutex_init(&tty->atomic_write_lock);
 	spin_lock_init(&tty->ctrl_lock);
+	spin_lock_init(&tty->flow_lock);
 	INIT_LIST_HEAD(&tty->tty_files);
 	INIT_WORK(&tty->SAK_work, do_SAK_work);
 
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 4cfd4a82fc31..fd4148d3a261 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -252,6 +252,7 @@ struct tty_struct {
 	struct rw_semaphore termios_rwsem;
 	struct mutex winsize_mutex;
 	spinlock_t ctrl_lock;
+	spinlock_t flow_lock;
 	/* Termios values are protected by the termios rwsem */
 	struct ktermios termios, termios_locked;
 	struct termiox *termiox;	/* May be NULL for unsupported */
@@ -261,7 +262,7 @@ struct tty_struct {
 	unsigned long flags;
 	int count;
 	struct winsize winsize;		/* winsize_mutex */
-	int stopped;
+	int stopped;			/* flow_lock */
 	int flow_stopped;
 	int hw_stopped;
 	int packet;
@@ -400,7 +401,9 @@ extern int tty_paranoia_check(struct tty_struct *tty, struct inode *inode,
 extern char *tty_name(struct tty_struct *tty, char *buf);
 extern void tty_wait_until_sent(struct tty_struct *tty, long timeout);
 extern int tty_check_change(struct tty_struct *tty);
+extern void __stop_tty(struct tty_struct *tty);
 extern void stop_tty(struct tty_struct *tty);
+extern void __start_tty(struct tty_struct *tty);
 extern void start_tty(struct tty_struct *tty);
 extern int tty_register_driver(struct tty_driver *driver);
 extern int tty_unregister_driver(struct tty_driver *driver);
diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h
index e48c608a8fa8..92e337c18839 100644
--- a/include/linux/tty_driver.h
+++ b/include/linux/tty_driver.h
@@ -152,6 +152,8 @@
  * 	This routine notifies the tty driver that it should stop
  * 	outputting characters to the tty device.  
  *
+ *	Called with ->flow_lock held. Serialized with start() method.
+ *
  *	Optional:
  *
  *	Note: Call stop_tty not this method.
@@ -161,6 +163,8 @@
  * 	This routine notifies the tty driver that it resume sending
  *	characters to the tty device.
  *
+ *	Called with ->flow_lock held. Serialized with stop() method.
+ *
  *	Optional:
  *
  *	Note: Call start_tty not this method.
-- 
cgit v1.2.3


From c545b66c6922b002b5fe224a6eaec58c913650b5 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Wed, 10 Sep 2014 15:06:33 -0400
Subject: tty: Serialize tcflow() with other tty flow control changes

Use newly-introduced tty->flow_lock to serialize updates to
tty->flow_stopped (via tcflow()) and with concurrent tty flow
control changes from other sources.

Merge the storage for ->stopped and ->flow_stopped, now that both
flags are serialized by ->flow_lock.

The padding bits are necessary to force the compiler to allocate the
type specified; otherwise, gcc will ignore the type specifier and
allocate the minimum number of bytes necessary to store the bitfield.
In turn, this would allow Alpha EV4 and EV5 cpus to corrupt adjacent
byte storage because those cpus use RMW to store byte and short data.

gcc versions < 4.7.2 will also corrupt storage adjacent to bitfields
smaller than unsigned long on ia64, ppc64, hppa64 and sparc64, thus
requiring more than unsigned int storage (which would otherwise be
sufficient to workaround the Alpha non-atomic byte/short storage problem).

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/tty_ioctl.c | 8 ++++++--
 include/linux/tty.h     | 5 +++--
 2 files changed, 9 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/tty_ioctl.c b/drivers/tty/tty_ioctl.c
index cd1285c3bfe9..dcf5c0af7de9 100644
--- a/drivers/tty/tty_ioctl.c
+++ b/drivers/tty/tty_ioctl.c
@@ -1177,16 +1177,20 @@ int n_tty_ioctl_helper(struct tty_struct *tty, struct file *file,
 			return retval;
 		switch (arg) {
 		case TCOOFF:
+			spin_lock_irq(&tty->flow_lock);
 			if (!tty->flow_stopped) {
 				tty->flow_stopped = 1;
-				stop_tty(tty);
+				__stop_tty(tty);
 			}
+			spin_unlock_irq(&tty->flow_lock);
 			break;
 		case TCOON:
+			spin_lock_irq(&tty->flow_lock);
 			if (tty->flow_stopped) {
 				tty->flow_stopped = 0;
-				start_tty(tty);
+				__start_tty(tty);
 			}
+			spin_unlock_irq(&tty->flow_lock);
 			break;
 		case TCIOFF:
 			if (STOP_CHAR(tty) != __DISABLED_CHAR)
diff --git a/include/linux/tty.h b/include/linux/tty.h
index fd4148d3a261..d80b53642f2c 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -262,8 +262,9 @@ struct tty_struct {
 	unsigned long flags;
 	int count;
 	struct winsize winsize;		/* winsize_mutex */
-	int stopped;			/* flow_lock */
-	int flow_stopped;
+	unsigned long stopped:1,	/* flow_lock */
+		      flow_stopped:1,
+		      unused:62;
 	int hw_stopped;
 	int packet;
 	unsigned char ctrl_status;	/* ctrl_lock */
-- 
cgit v1.2.3


From 136d5258b2bc4ffae99cb69874a76624c26fbfad Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Wed, 10 Sep 2014 15:06:34 -0400
Subject: tty: Move and rename send_prio_char() as tty_send_xchar()

Relocate the file-scope function, send_prio_char(), as a global
helper tty_send_xchar(). Remove the global declarations for
tty_write_lock()/tty_write_unlock(), as these are file-scope only now.

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/tty_io.c    | 33 +++++++++++++++++++++++++++++++--
 drivers/tty/tty_ioctl.c | 33 ++-------------------------------
 include/linux/tty.h     |  3 +--
 3 files changed, 34 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index c249ff1d51ce..2f6f9b5e4891 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -1023,14 +1023,14 @@ static ssize_t tty_read(struct file *file, char __user *buf, size_t count,
 	return i;
 }
 
-void tty_write_unlock(struct tty_struct *tty)
+static void tty_write_unlock(struct tty_struct *tty)
 	__releases(&tty->atomic_write_lock)
 {
 	mutex_unlock(&tty->atomic_write_lock);
 	wake_up_interruptible_poll(&tty->write_wait, POLLOUT);
 }
 
-int tty_write_lock(struct tty_struct *tty, int ndelay)
+static int tty_write_lock(struct tty_struct *tty, int ndelay)
 	__acquires(&tty->atomic_write_lock)
 {
 	if (!mutex_trylock(&tty->atomic_write_lock)) {
@@ -1217,6 +1217,35 @@ ssize_t redirected_tty_write(struct file *file, const char __user *buf,
 	return tty_write(file, buf, count, ppos);
 }
 
+/**
+ *	tty_send_xchar	-	send priority character
+ *
+ *	Send a high priority character to the tty even if stopped
+ *
+ *	Locking: none for xchar method, write ordering for write method.
+ */
+
+int tty_send_xchar(struct tty_struct *tty, char ch)
+{
+	int	was_stopped = tty->stopped;
+
+	if (tty->ops->send_xchar) {
+		tty->ops->send_xchar(tty, ch);
+		return 0;
+	}
+
+	if (tty_write_lock(tty, 0) < 0)
+		return -ERESTARTSYS;
+
+	if (was_stopped)
+		start_tty(tty);
+	tty->ops->write(tty, &ch, 1);
+	if (was_stopped)
+		stop_tty(tty);
+	tty_write_unlock(tty);
+	return 0;
+}
+
 static char ptychar[] = "pqrstuvwxyzabcde";
 
 /**
diff --git a/drivers/tty/tty_ioctl.c b/drivers/tty/tty_ioctl.c
index dcf5c0af7de9..ad9120d1c0f1 100644
--- a/drivers/tty/tty_ioctl.c
+++ b/drivers/tty/tty_ioctl.c
@@ -911,35 +911,6 @@ static int set_ltchars(struct tty_struct *tty, struct ltchars __user *ltchars)
 }
 #endif
 
-/**
- *	send_prio_char		-	send priority character
- *
- *	Send a high priority character to the tty even if stopped
- *
- *	Locking: none for xchar method, write ordering for write method.
- */
-
-static int send_prio_char(struct tty_struct *tty, char ch)
-{
-	int	was_stopped = tty->stopped;
-
-	if (tty->ops->send_xchar) {
-		tty->ops->send_xchar(tty, ch);
-		return 0;
-	}
-
-	if (tty_write_lock(tty, 0) < 0)
-		return -ERESTARTSYS;
-
-	if (was_stopped)
-		start_tty(tty);
-	tty->ops->write(tty, &ch, 1);
-	if (was_stopped)
-		stop_tty(tty);
-	tty_write_unlock(tty);
-	return 0;
-}
-
 /**
  *	tty_change_softcar	-	carrier change ioctl helper
  *	@tty: tty to update
@@ -1194,11 +1165,11 @@ int n_tty_ioctl_helper(struct tty_struct *tty, struct file *file,
 			break;
 		case TCIOFF:
 			if (STOP_CHAR(tty) != __DISABLED_CHAR)
-				return send_prio_char(tty, STOP_CHAR(tty));
+				return tty_send_xchar(tty, STOP_CHAR(tty));
 			break;
 		case TCION:
 			if (START_CHAR(tty) != __DISABLED_CHAR)
-				return send_prio_char(tty, START_CHAR(tty));
+				return tty_send_xchar(tty, START_CHAR(tty));
 			break;
 		default:
 			return -EINVAL;
diff --git a/include/linux/tty.h b/include/linux/tty.h
index d80b53642f2c..4c6b9fd3c750 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -418,6 +418,7 @@ extern void tty_unregister_device(struct tty_driver *driver, unsigned index);
 extern int tty_read_raw_data(struct tty_struct *tty, unsigned char *bufp,
 			     int buflen);
 extern void tty_write_message(struct tty_struct *tty, char *msg);
+extern int tty_send_xchar(struct tty_struct *tty, char ch);
 extern int tty_put_char(struct tty_struct *tty, unsigned char c);
 extern int tty_chars_in_buffer(struct tty_struct *tty);
 extern int tty_write_room(struct tty_struct *tty);
@@ -502,8 +503,6 @@ extern struct tty_struct *tty_pair_get_pty(struct tty_struct *tty);
 extern struct mutex tty_mutex;
 extern spinlock_t tty_files_lock;
 
-extern void tty_write_unlock(struct tty_struct *tty);
-extern int tty_write_lock(struct tty_struct *tty, int ndelay);
 #define tty_is_writelocked(tty)  (mutex_is_locked(&tty->atomic_write_lock))
 
 extern void tty_port_init(struct tty_port *port);
-- 
cgit v1.2.3


From 99416322dd16b810ba74098cc50ef2a844091d35 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Wed, 10 Sep 2014 15:06:36 -0400
Subject: tty: Workaround Alpha non-atomic byte storage in tty_struct

The Alpha EV4/EV5 cpus can corrupt adjacent byte and short data because
those cpus use RMW to store byte and short data. Thus, concurrent adjacent
byte stores could become corrupted, if serialized by a different lock.
tty_struct uses different locks to protect certain fields within the
structure, and thus is vulnerable to byte stores which are not atomic.

Merge the ->ctrl_status byte and packet mode bit, both protected by the
->ctrl_lock, into an unsigned long.

The padding bits are necessary to force the compiler to allocate the
type specified; otherwise, gcc will ignore the type specifier and
allocate the minimum number of bytes required to store the bitfield.
In turn, this would allow Alpha EV4/EV5 cpus to corrupt adjacent byte
or short storage (because those cpus use RMW to store byte and short data).

gcc versions < 4.7.2 will also corrupt storage adjacent to bitfields
smaller than unsigned long on ia64, ppc64, hppa64, and sparc64, thus
requiring more than unsigned int storage (which would otherwise be
sufficient to fix the Alpha non-atomic storage problem).

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/tty.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tty.h b/include/linux/tty.h
index 4c6b9fd3c750..546e94557895 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -266,8 +266,9 @@ struct tty_struct {
 		      flow_stopped:1,
 		      unused:62;
 	int hw_stopped;
-	int packet;
-	unsigned char ctrl_status;	/* ctrl_lock */
+	unsigned long ctrl_status:8,	/* ctrl_lock */
+		      packet:1,
+		      unused_ctrl:55;
 	unsigned int receive_room;	/* Bytes free for queue */
 	int flow_change;
 
-- 
cgit v1.2.3


From f2693b430b33d9554daa0f21fbcae57633f8d4f1 Mon Sep 17 00:00:00 2001
From: Peter Chen <peter.chen@freescale.com>
Date: Tue, 19 Aug 2014 09:51:52 +0800
Subject: usb: hcd: add TPL support flag

The targeted hosts (non-PC hosts) need to have TPL (Targeted Peripheral List)
for USB OTG & EH certification and other vendor specific requirements.

The platform who needs TPL feature should set this flag at usb host
controller driver.

Signed-off-by: Peter Chen <peter.chen@freescale.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb/hcd.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
index 485cd5e2100c..a48c89e96988 100644
--- a/include/linux/usb/hcd.h
+++ b/include/linux/usb/hcd.h
@@ -144,6 +144,7 @@ struct usb_hcd {
 	unsigned		has_tt:1;	/* Integrated TT in root hub */
 	unsigned		amd_resume_bug:1; /* AMD remote wakeup quirk */
 	unsigned		can_do_streams:1; /* HC supports streams */
+	unsigned		tpl_support:1; /* OTG & EH TPL support */
 
 	unsigned int		irq;		/* irq allocated */
 	void __iomem		*regs;		/* device memory/io */
-- 
cgit v1.2.3


From 05f8b35a62efb8e70ebcd78e9c957324e9caddad Mon Sep 17 00:00:00 2001
From: Peter Chen <peter.chen@freescale.com>
Date: Tue, 19 Aug 2014 09:51:55 +0800
Subject: usb: common: add API to get if the platform supports TPL

The TPL (Targeted Peripheral List) is used for targeted hosts
(non-PC hosts), and it can be used at USB OTG & EH certification
and some specific products which need white list.

Signed-off-by: Peter Chen <peter.chen@freescale.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/common/usb-common.c | 15 +++++++++++++++
 include/linux/usb/of.h          |  5 +++++
 2 files changed, 20 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/usb/common/usb-common.c b/drivers/usb/common/usb-common.c
index 6dfd30a863c7..b530fd403ffb 100644
--- a/drivers/usb/common/usb-common.c
+++ b/drivers/usb/common/usb-common.c
@@ -139,6 +139,21 @@ enum usb_device_speed of_usb_get_maximum_speed(struct device_node *np)
 }
 EXPORT_SYMBOL_GPL(of_usb_get_maximum_speed);
 
+/**
+ * of_usb_host_tpl_support - to get if Targeted Peripheral List is supported
+ * for given targeted hosts (non-PC hosts)
+ * @np: Pointer to the given device_node
+ *
+ * The function gets if the targeted hosts support TPL or not
+ */
+bool of_usb_host_tpl_support(struct device_node *np)
+{
+	if (of_find_property(np, "tpl-support", NULL))
+		return true;
+
+	return false;
+}
+EXPORT_SYMBOL_GPL(of_usb_host_tpl_support);
 #endif
 
 MODULE_LICENSE("GPL");
diff --git a/include/linux/usb/of.h b/include/linux/usb/of.h
index 8c38aa26b3bb..cfe0528cdbb1 100644
--- a/include/linux/usb/of.h
+++ b/include/linux/usb/of.h
@@ -14,6 +14,7 @@
 #if IS_ENABLED(CONFIG_OF)
 enum usb_dr_mode of_usb_get_dr_mode(struct device_node *np);
 enum usb_device_speed of_usb_get_maximum_speed(struct device_node *np);
+bool of_usb_host_tpl_support(struct device_node *np);
 #else
 static inline enum usb_dr_mode of_usb_get_dr_mode(struct device_node *np)
 {
@@ -25,6 +26,10 @@ of_usb_get_maximum_speed(struct device_node *np)
 {
 	return USB_SPEED_UNKNOWN;
 }
+static inline bool of_usb_host_tpl_support(struct device_node *np)
+{
+	return false;
+}
 #endif
 
 #if IS_ENABLED(CONFIG_OF) && IS_ENABLED(CONFIG_USB_SUPPORT)
-- 
cgit v1.2.3


From f6a9ff07832a9d30d457e976e6233b4351cd4cdf Mon Sep 17 00:00:00 2001
From: Peter Chen <peter.chen@freescale.com>
Date: Tue, 19 Aug 2014 09:51:56 +0800
Subject: usb: chipidea: add TPL support for targeted hosts

For OTG and Embedded hosts, they may need TPL (Targeted Peripheral List)
for usb certification and other vender specific requirements, the
platform can tell chipidea core driver if it supports tpl through DT
or platform data.

Signed-off-by: Peter Chen <peter.chen@freescale.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/chipidea/core.c  | 4 ++++
 drivers/usb/chipidea/host.c  | 1 +
 include/linux/usb/chipidea.h | 1 +
 3 files changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/usb/chipidea/core.c b/drivers/usb/chipidea/core.c
index 619d13e29995..41d45a16dd30 100644
--- a/drivers/usb/chipidea/core.c
+++ b/drivers/usb/chipidea/core.c
@@ -473,6 +473,10 @@ static int ci_get_platdata(struct device *dev,
 				PTR_ERR(platdata->reg_vbus));
 			return PTR_ERR(platdata->reg_vbus);
 		}
+		/* Get TPL support */
+		if (!platdata->tpl_support)
+			platdata->tpl_support =
+				of_usb_host_tpl_support(dev->of_node);
 	}
 
 	if (of_usb_get_maximum_speed(dev->of_node) == USB_SPEED_FULL)
diff --git a/drivers/usb/chipidea/host.c b/drivers/usb/chipidea/host.c
index a93d950e9468..0d6b24cb2270 100644
--- a/drivers/usb/chipidea/host.c
+++ b/drivers/usb/chipidea/host.c
@@ -60,6 +60,7 @@ static int host_start(struct ci_hdrc *ci)
 
 	hcd->power_budget = ci->platdata->power_budget;
 	hcd->phy = ci->transceiver;
+	hcd->tpl_support = ci->platdata->tpl_support;
 
 	ehci = hcd_to_ehci(hcd);
 	ehci->caps = ci->hw_bank.cap;
diff --git a/include/linux/usb/chipidea.h b/include/linux/usb/chipidea.h
index bbe779f640be..e14c09a45c5a 100644
--- a/include/linux/usb/chipidea.h
+++ b/include/linux/usb/chipidea.h
@@ -31,6 +31,7 @@ struct ci_hdrc_platform_data {
 #define CI_HDRC_CONTROLLER_STOPPED_EVENT	1
 	void	(*notify_event) (struct ci_hdrc *ci, unsigned event);
 	struct regulator	*reg_vbus;
+	bool			tpl_support;
 };
 
 /* Default offset of capability registers */
-- 
cgit v1.2.3


From 593078525c8b234a35a36ff551b8716464e86481 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Mon, 15 Sep 2014 16:04:12 +0200
Subject: uas: Add a quirk for rejecting ATA_12 and ATA_16 commands

And set this quirk for the Seagate Expansion Desk (0bc2:2312), as that one
seems to hang upon receiving an ATA_12 or ATA_16 command.

https://bugzilla.kernel.org/show_bug.cgi?id=79511
https://bbs.archlinux.org/viewtopic.php?id=183190

While at it also add missing documentation for the u value for usb-storage
quirks.

Cc: stable@vger.kernel.org # 3.16, 3.17
Signed-off-by: Hans de Goede <hdegoede@redhat.com>

--
Changes in v2: Add documentation for new t and u usb-storage.quirks flags
Changes in v3: Fix typo in documentation
Changes in v4: Also apply the quirk to (0bc2:3312)
Changes in v5: Rebased on 3.17-rc5, drop u documentation, already upstream
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/kernel-parameters.txt |  2 ++
 drivers/usb/storage/uas.c           | 13 +++++++++++++
 drivers/usb/storage/unusual_uas.h   | 23 +++++++++++++----------
 drivers/usb/storage/usb.c           |  6 +++++-
 include/linux/usb_usual.h           |  2 ++
 5 files changed, 35 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 10d51c2f10d7..dd4fe9880e4a 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -3541,6 +3541,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 					bogus residue values);
 				s = SINGLE_LUN (the device has only one
 					Logical Unit);
+				t = NO_ATA_1X (don't allow ATA(12) and ATA(16)
+					commands, uas only);
 				u = IGNORE_UAS (don't bind to the uas driver);
 				w = NO_WP_DETECT (don't test whether the
 					medium is write-protected).
diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c
index 05b2d8e077d9..8c7d4a239f4c 100644
--- a/drivers/usb/storage/uas.c
+++ b/drivers/usb/storage/uas.c
@@ -28,6 +28,7 @@
 #include <scsi/scsi_tcq.h>
 
 #include "uas-detect.h"
+#include "scsiglue.h"
 
 /*
  * The r00-r01c specs define this version of the SENSE IU data structure.
@@ -49,6 +50,7 @@ struct uas_dev_info {
 	struct usb_anchor cmd_urbs;
 	struct usb_anchor sense_urbs;
 	struct usb_anchor data_urbs;
+	unsigned long flags;
 	int qdepth, resetting;
 	struct response_iu response;
 	unsigned cmd_pipe, status_pipe, data_in_pipe, data_out_pipe;
@@ -714,6 +716,15 @@ static int uas_queuecommand_lck(struct scsi_cmnd *cmnd,
 
 	BUILD_BUG_ON(sizeof(struct uas_cmd_info) > sizeof(struct scsi_pointer));
 
+	if ((devinfo->flags & US_FL_NO_ATA_1X) &&
+			(cmnd->cmnd[0] == ATA_12 || cmnd->cmnd[0] == ATA_16)) {
+		memcpy(cmnd->sense_buffer, usb_stor_sense_invalidCDB,
+		       sizeof(usb_stor_sense_invalidCDB));
+		cmnd->result = SAM_STAT_CHECK_CONDITION;
+		cmnd->scsi_done(cmnd);
+		return 0;
+	}
+
 	spin_lock_irqsave(&devinfo->lock, flags);
 
 	if (devinfo->resetting) {
@@ -1080,6 +1091,8 @@ static int uas_probe(struct usb_interface *intf, const struct usb_device_id *id)
 	devinfo->resetting = 0;
 	devinfo->running_task = 0;
 	devinfo->shutdown = 0;
+	devinfo->flags = id->driver_info;
+	usb_stor_adjust_quirks(udev, &devinfo->flags);
 	init_usb_anchor(&devinfo->cmd_urbs);
 	init_usb_anchor(&devinfo->sense_urbs);
 	init_usb_anchor(&devinfo->data_urbs);
diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h
index 7244444df8ee..3ff2dd4c78ca 100644
--- a/drivers/usb/storage/unusual_uas.h
+++ b/drivers/usb/storage/unusual_uas.h
@@ -40,13 +40,16 @@
  * and don't forget to CC: the USB development list <linux-usb@vger.kernel.org>
  */
 
-/*
- * This is an example entry for the US_FL_IGNORE_UAS flag. Once we have an
- * actual entry using US_FL_IGNORE_UAS this entry should be removed.
- *
- * UNUSUAL_DEV(  0xabcd, 0x1234, 0x0100, 0x0100,
- *		"Example",
- *		"Storage with broken UAS",
- *		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
- *		US_FL_IGNORE_UAS),
- */
+/* https://bugzilla.kernel.org/show_bug.cgi?id=79511 */
+UNUSUAL_DEV(0x0bc2, 0x2312, 0x0000, 0x9999,
+		"Seagate",
+		"Expansion Desk",
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+		US_FL_NO_ATA_1X),
+
+/* https://bbs.archlinux.org/viewtopic.php?id=183190 */
+UNUSUAL_DEV(0x0bc2, 0x3312, 0x0000, 0x9999,
+		"Seagate",
+		"Expansion Desk",
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+		US_FL_NO_ATA_1X),
diff --git a/drivers/usb/storage/usb.c b/drivers/usb/storage/usb.c
index cedb29252a92..b9d1b9357287 100644
--- a/drivers/usb/storage/usb.c
+++ b/drivers/usb/storage/usb.c
@@ -478,7 +478,8 @@ void usb_stor_adjust_quirks(struct usb_device *udev, unsigned long *fflags)
 			US_FL_CAPACITY_OK | US_FL_IGNORE_RESIDUE |
 			US_FL_SINGLE_LUN | US_FL_NO_WP_DETECT |
 			US_FL_NO_READ_DISC_INFO | US_FL_NO_READ_CAPACITY_16 |
-			US_FL_INITIAL_READ10 | US_FL_WRITE_CACHE);
+			US_FL_INITIAL_READ10 | US_FL_WRITE_CACHE |
+			US_FL_NO_ATA_1X);
 
 	p = quirks;
 	while (*p) {
@@ -543,6 +544,9 @@ void usb_stor_adjust_quirks(struct usb_device *udev, unsigned long *fflags)
 		case 's':
 			f |= US_FL_SINGLE_LUN;
 			break;
+		case 't':
+			f |= US_FL_NO_ATA_1X;
+			break;
 		case 'u':
 			f |= US_FL_IGNORE_UAS;
 			break;
diff --git a/include/linux/usb_usual.h b/include/linux/usb_usual.h
index 9b7de1b46437..d271f88f30ad 100644
--- a/include/linux/usb_usual.h
+++ b/include/linux/usb_usual.h
@@ -73,6 +73,8 @@
 		/* Device advertises UAS but it is broken */	\
 	US_FLAG(BROKEN_FUA,	0x01000000)			\
 		/* Cannot handle FUA in WRITE or READ CDBs */	\
+	US_FLAG(NO_ATA_1X,	0x02000000)			\
+		/* Cannot handle ATA_12 or ATA_16 CDBs */	\
 
 #define US_FLAG(name, value)	US_FL_##name = value ,
 enum { US_DO_ALL_FLAGS };
-- 
cgit v1.2.3


From 734016b00b50a3c6a0e1fc1b7b217e783f5123a1 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Tue, 16 Sep 2014 18:36:52 +0200
Subject: uas: Add no-report-opcodes quirk

Besides the ASM1051 (*) needing sdev->no_report_opcodes = 1, it turns out that
the JMicron JMS567 also needs it to work properly with uas (usb-storage always
sets it). Since some of the scsi devs were not to keen on the idea to
outrightly set sdev->no_report_opcodes = 1 for all uas devices, so add a quirk
for this, and set it for the JMS567.

*) Which has become a non-issue since we've completely blacklisted uas on
the ASM1051 for other reasons

Cc: stable@vger.kernel.org
Reported-and-tested-by: Claudio Bizzarri <claudio.bizzarri@gmail.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/kernel-parameters.txt | 2 ++
 drivers/usb/storage/uas.c           | 4 ++++
 drivers/usb/storage/unusual_uas.h   | 7 +++++++
 drivers/usb/storage/usb.c           | 5 ++++-
 include/linux/usb_usual.h           | 2 ++
 5 files changed, 19 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index dd4fe9880e4a..1edd5fdc629d 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -3522,6 +3522,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 					READ_DISC_INFO command);
 				e = NO_READ_CAPACITY_16 (don't use
 					READ_CAPACITY_16 command);
+				f = NO_REPORT_OPCODES (don't use report opcodes
+					command, uas only);
 				h = CAPACITY_HEURISTICS (decrease the
 					reported device capacity by one
 					sector if the number is odd);
diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c
index 8c7d4a239f4c..cad02ac88564 100644
--- a/drivers/usb/storage/uas.c
+++ b/drivers/usb/storage/uas.c
@@ -961,6 +961,10 @@ static int uas_slave_alloc(struct scsi_device *sdev)
 static int uas_slave_configure(struct scsi_device *sdev)
 {
 	struct uas_dev_info *devinfo = sdev->hostdata;
+
+	if (devinfo->flags & US_FL_NO_REPORT_OPCODES)
+		sdev->no_report_opcodes = 1;
+
 	scsi_set_tag_type(sdev, MSG_ORDERED_TAG);
 	scsi_activate_tcq(sdev, devinfo->qdepth - 2);
 	return 0;
diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h
index 3ff2dd4c78ca..3e6243719df8 100644
--- a/drivers/usb/storage/unusual_uas.h
+++ b/drivers/usb/storage/unusual_uas.h
@@ -53,3 +53,10 @@ UNUSUAL_DEV(0x0bc2, 0x3312, 0x0000, 0x9999,
 		"Expansion Desk",
 		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_NO_ATA_1X),
+
+/* Reported-by: Claudio Bizzarri <claudio.bizzarri@gmail.com> */
+UNUSUAL_DEV(0x152d, 0x0567, 0x0000, 0x9999,
+		"JMicron",
+		"JMS567",
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+		US_FL_NO_REPORT_OPCODES),
diff --git a/drivers/usb/storage/usb.c b/drivers/usb/storage/usb.c
index b9d1b9357287..f60e7d463636 100644
--- a/drivers/usb/storage/usb.c
+++ b/drivers/usb/storage/usb.c
@@ -479,7 +479,7 @@ void usb_stor_adjust_quirks(struct usb_device *udev, unsigned long *fflags)
 			US_FL_SINGLE_LUN | US_FL_NO_WP_DETECT |
 			US_FL_NO_READ_DISC_INFO | US_FL_NO_READ_CAPACITY_16 |
 			US_FL_INITIAL_READ10 | US_FL_WRITE_CACHE |
-			US_FL_NO_ATA_1X);
+			US_FL_NO_ATA_1X | US_FL_NO_REPORT_OPCODES);
 
 	p = quirks;
 	while (*p) {
@@ -517,6 +517,9 @@ void usb_stor_adjust_quirks(struct usb_device *udev, unsigned long *fflags)
 		case 'e':
 			f |= US_FL_NO_READ_CAPACITY_16;
 			break;
+		case 'f':
+			f |= US_FL_NO_REPORT_OPCODES;
+			break;
 		case 'h':
 			f |= US_FL_CAPACITY_HEURISTICS;
 			break;
diff --git a/include/linux/usb_usual.h b/include/linux/usb_usual.h
index d271f88f30ad..a7f2604c5f25 100644
--- a/include/linux/usb_usual.h
+++ b/include/linux/usb_usual.h
@@ -75,6 +75,8 @@
 		/* Cannot handle FUA in WRITE or READ CDBs */	\
 	US_FLAG(NO_ATA_1X,	0x02000000)			\
 		/* Cannot handle ATA_12 or ATA_16 CDBs */	\
+	US_FLAG(NO_REPORT_OPCODES,	0x04000000)		\
+		/* Cannot handle MI_REPORT_SUPPORTED_OPERATION_CODES */	\
 
 #define US_FLAG(name, value)	US_FL_##name = value ,
 enum { US_DO_ALL_FLAGS };
-- 
cgit v1.2.3


From ddbe1fca0bcb87ca8c199ea873a456ca8a948567 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Fri, 19 Sep 2014 10:13:50 +0800
Subject: USB: Add device quirk for ASUS T100 Base Station keyboard

This full-speed USB device generates spurious remote wakeup event
as soon as USB_DEVICE_REMOTE_WAKEUP feature is set. As the result,
Linux can't enter system suspend and S0ix power saving modes once
this keyboard is used.

This patch tries to introduce USB_QUIRK_IGNORE_REMOTE_WAKEUP quirk.
With this quirk set, wakeup capability will be ignored during
device configure.

This patch could be back-ported to kernels as old as 2.6.39.

Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hub.c     | 6 ++++--
 drivers/usb/core/quirks.c  | 4 ++++
 include/linux/usb/quirks.h | 3 +++
 3 files changed, 11 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index db1d587cbb95..d5419292f89f 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -1984,8 +1984,10 @@ void usb_set_device_state(struct usb_device *udev,
 					|| new_state == USB_STATE_SUSPENDED)
 				;	/* No change to wakeup settings */
 			else if (new_state == USB_STATE_CONFIGURED)
-				wakeup = udev->actconfig->desc.bmAttributes
-					 & USB_CONFIG_ATT_WAKEUP;
+				wakeup = (udev->quirks &
+					USB_QUIRK_IGNORE_REMOTE_WAKEUP) ? 0 :
+					udev->actconfig->desc.bmAttributes &
+					USB_CONFIG_ATT_WAKEUP;
 			else
 				wakeup = 0;
 		}
diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
index a342a783d496..5ae883dc21f5 100644
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -163,6 +163,10 @@ static const struct usb_device_id usb_quirk_list[] = {
 	/* USB3503 */
 	{ USB_DEVICE(0x0424, 0x3503), .driver_info = USB_QUIRK_RESET_RESUME },
 
+	/* ASUS Base Station(T100) */
+	{ USB_DEVICE(0x0b05, 0x17e0), .driver_info =
+			USB_QUIRK_IGNORE_REMOTE_WAKEUP },
+
 	{ }  /* terminating entry must be last */
 };
 
diff --git a/include/linux/usb/quirks.h b/include/linux/usb/quirks.h
index ffe565c94743..a4abaeb3fb00 100644
--- a/include/linux/usb/quirks.h
+++ b/include/linux/usb/quirks.h
@@ -44,4 +44,7 @@
 /* device can't handle device_qualifier descriptor requests */
 #define USB_QUIRK_DEVICE_QUALIFIER	0x00000100
 
+/* device generates spurious wakeup, ignore remote wakeup capability */
+#define USB_QUIRK_IGNORE_REMOTE_WAKEUP	0x00000200
+
 #endif /* __LINUX_USB_QUIRKS_H */
-- 
cgit v1.2.3


From 5cb307c4c27a9f37ef0c8e6bedc8c53c9197f48f Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Tue, 23 Sep 2014 22:23:55 -0700
Subject: USB: quirks.h: use BIT()

Use the BIT macro instead of "open coding" bit fields.  This makes it
easier to actually see that the bits are not conflicting/overlapping.

Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb/quirks.h | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/quirks.h b/include/linux/usb/quirks.h
index a4abaeb3fb00..9948c874e3f1 100644
--- a/include/linux/usb/quirks.h
+++ b/include/linux/usb/quirks.h
@@ -8,27 +8,27 @@
 #define __LINUX_USB_QUIRKS_H
 
 /* string descriptors must not be fetched using a 255-byte read */
-#define USB_QUIRK_STRING_FETCH_255	0x00000001
+#define USB_QUIRK_STRING_FETCH_255		BIT(0)
 
 /* device can't resume correctly so reset it instead */
-#define USB_QUIRK_RESET_RESUME		0x00000002
+#define USB_QUIRK_RESET_RESUME			BIT(1)
 
 /* device can't handle Set-Interface requests */
-#define USB_QUIRK_NO_SET_INTF		0x00000004
+#define USB_QUIRK_NO_SET_INTF			BIT(2)
 
 /* device can't handle its Configuration or Interface strings */
-#define USB_QUIRK_CONFIG_INTF_STRINGS	0x00000008
+#define USB_QUIRK_CONFIG_INTF_STRINGS		BIT(3)
 
 /* device can't be reset(e.g morph devices), don't use reset */
-#define USB_QUIRK_RESET			0x00000010
+#define USB_QUIRK_RESET				BIT(4)
 
 /* device has more interface descriptions than the bNumInterfaces count,
    and can't handle talking to these interfaces */
-#define USB_QUIRK_HONOR_BNUMINTERFACES	0x00000020
+#define USB_QUIRK_HONOR_BNUMINTERFACES		BIT(5)
 
 /* device needs a pause during initialization, after we read the device
    descriptor */
-#define USB_QUIRK_DELAY_INIT		0x00000040
+#define USB_QUIRK_DELAY_INIT			BIT(6)
 
 /*
  * For high speed and super speed interupt endpoints, the USB 2.0 and
@@ -39,12 +39,12 @@
  * Devices with this quirk report their bInterval as the result of this
  * calculation instead of the exponent variable used in the calculation.
  */
-#define USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL	0x00000080
+#define USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL	BIT(7)
 
 /* device can't handle device_qualifier descriptor requests */
-#define USB_QUIRK_DEVICE_QUALIFIER	0x00000100
+#define USB_QUIRK_DEVICE_QUALIFIER		BIT(8)
 
 /* device generates spurious wakeup, ignore remote wakeup capability */
-#define USB_QUIRK_IGNORE_REMOTE_WAKEUP	0x00000200
+#define USB_QUIRK_IGNORE_REMOTE_WAKEUP		BIT(9)
 
 #endif /* __LINUX_USB_QUIRKS_H */
-- 
cgit v1.2.3


From 833c95456a70826d1384883b73fd23aff24d366f Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 12 Sep 2014 09:01:56 +0200
Subject: device coredump: add new device coredump class

Many devices run firmware and/or complex hardware, and most of that
can have bugs. When it misbehaves, however, it is often much harder
to debug than software running on the host.

Introduce a "device coredump" mechanism to allow dumping internal
device/firmware state through a generalized mechanism. As devices
are different and information needed can vary accordingly, this
doesn't prescribe a file format - it just provides mechanism to
get data to be able to capture it in a generalized way (e.g. in
distributions.)

The dumped data will be readable in sysfs in the virtual device's
data file under /sys/class/devcoredump/devcd*/. Writing to it will
free the data and remove the device, as does a 5-minute timeout.

Note that generalized capturing of such data may result in privacy
issues, so users generally need to be involved. In order to allow
certain users/system integrators/... to disable the feature at all,
introduce a Kconfig option to override the drivers that would like
to have the feature.

For now, this provides two ways of dumping data:
 1) with a vmalloc'ed area, that is then given to the subsystem
    and freed after retrieval or timeout
 2) with a generalized reader/free function method

We could/should add more options, e.g. a list of pages, since the
vmalloc area is very limited on some architectures.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 MAINTAINERS                 |   7 ++
 drivers/base/Kconfig        |  21 ++++
 drivers/base/Makefile       |   1 +
 drivers/base/devcoredump.c  | 265 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/devcoredump.h |  35 ++++++
 5 files changed, 329 insertions(+)
 create mode 100644 drivers/base/devcoredump.c
 create mode 100644 include/linux/devcoredump.h

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index 809ecd680d88..d810a6fa8276 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2859,6 +2859,13 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git
 S:	Maintained
 F:	drivers/usb/dwc3/
 
+DEVICE COREDUMP (DEV_COREDUMP)
+M:	Johannes Berg <johannes@sipsolutions.net>
+L:	linux-kernel@vger.kernel.org
+S:	Maintained
+F:	drivers/base/devcoredump.c
+F:	include/linux/devcoredump.h
+
 DEVICE FREQUENCY (DEVFREQ)
 M:	MyungJoo Ham <myungjoo.ham@samsung.com>
 M:	Kyungmin Park <kyungmin.park@samsung.com>
diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index 4e7f0ff83ae7..134f763d90fd 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -165,6 +165,27 @@ config FW_LOADER_USER_HELPER_FALLBACK
 
 	  If you are unsure about this, say N here.
 
+config WANT_DEV_COREDUMP
+	bool
+	help
+	  Drivers should "select" this option if they desire to use the
+	  device coredump mechanism.
+
+config DISABLE_DEV_COREDUMP
+	bool "Disable device coredump" if EXPERT
+	help
+	  Disable the device coredump mechanism despite drivers wanting to
+	  use it; this allows for more sensitive systems or systems that
+	  don't want to ever access the information to not have the code,
+	  nor keep any data.
+
+	  If unsure, say N.
+
+config DEV_COREDUMP
+	bool
+	default y if WANT_DEV_COREDUMP
+	depends on !DISABLE_DEV_COREDUMP
+
 config DEBUG_DRIVER
 	bool "Driver Core verbose debug messages"
 	depends on DEBUG_KERNEL
diff --git a/drivers/base/Makefile b/drivers/base/Makefile
index 4aab26ec0292..6922cd6850a2 100644
--- a/drivers/base/Makefile
+++ b/drivers/base/Makefile
@@ -21,6 +21,7 @@ obj-$(CONFIG_SYS_HYPERVISOR) += hypervisor.o
 obj-$(CONFIG_REGMAP)	+= regmap/
 obj-$(CONFIG_SOC_BUS) += soc.o
 obj-$(CONFIG_PINCTRL) += pinctrl.o
+obj-$(CONFIG_DEV_COREDUMP) += devcoredump.o
 
 ccflags-$(CONFIG_DEBUG_DRIVER) := -DDEBUG
 
diff --git a/drivers/base/devcoredump.c b/drivers/base/devcoredump.c
new file mode 100644
index 000000000000..96614b04544c
--- /dev/null
+++ b/drivers/base/devcoredump.c
@@ -0,0 +1,265 @@
+/*
+ * This file is provided under the GPLv2 license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2014 Intel Mobile Communications GmbH
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution
+ * in the file called COPYING.
+ *
+ * Contact Information:
+ *  Intel Linux Wireless <ilw@linux.intel.com>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ * Author: Johannes Berg <johannes@sipsolutions.net>
+ */
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/devcoredump.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/workqueue.h>
+
+/* if data isn't read by userspace after 5 minutes then delete it */
+#define DEVCD_TIMEOUT	(HZ * 60 * 5)
+
+struct devcd_entry {
+	struct device devcd_dev;
+	const void *data;
+	size_t datalen;
+	struct module *owner;
+	ssize_t (*read)(char *buffer, loff_t offset, size_t count,
+			const void *data, size_t datalen);
+	void (*free)(const void *data);
+	struct delayed_work del_wk;
+	struct device *failing_dev;
+};
+
+static struct devcd_entry *dev_to_devcd(struct device *dev)
+{
+	return container_of(dev, struct devcd_entry, devcd_dev);
+}
+
+static void devcd_dev_release(struct device *dev)
+{
+	struct devcd_entry *devcd = dev_to_devcd(dev);
+
+	devcd->free(devcd->data);
+	module_put(devcd->owner);
+
+	/*
+	 * this seems racy, but I don't see a notifier or such on
+	 * a struct device to know when it goes away?
+	 */
+	if (devcd->failing_dev->kobj.sd)
+		sysfs_delete_link(&devcd->failing_dev->kobj, &dev->kobj,
+				  "devcoredump");
+
+	put_device(devcd->failing_dev);
+	kfree(devcd);
+}
+
+static void devcd_del(struct work_struct *wk)
+{
+	struct devcd_entry *devcd;
+
+	devcd = container_of(wk, struct devcd_entry, del_wk.work);
+
+	device_del(&devcd->devcd_dev);
+	put_device(&devcd->devcd_dev);
+}
+
+static ssize_t devcd_data_read(struct file *filp, struct kobject *kobj,
+			       struct bin_attribute *bin_attr,
+			       char *buffer, loff_t offset, size_t count)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct devcd_entry *devcd = dev_to_devcd(dev);
+
+	return devcd->read(buffer, offset, count, devcd->data, devcd->datalen);
+}
+
+static ssize_t devcd_data_write(struct file *filp, struct kobject *kobj,
+				struct bin_attribute *bin_attr,
+				char *buffer, loff_t offset, size_t count)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct devcd_entry *devcd = dev_to_devcd(dev);
+
+	mod_delayed_work(system_wq, &devcd->del_wk, 0);
+
+	return count;
+}
+
+static struct bin_attribute devcd_attr_data = {
+	.attr = { .name = "data", .mode = S_IRUSR | S_IWUSR, },
+	.size = 0,
+	.read = devcd_data_read,
+	.write = devcd_data_write,
+};
+
+static struct bin_attribute *devcd_dev_bin_attrs[] = {
+	&devcd_attr_data, NULL,
+};
+
+static const struct attribute_group devcd_dev_group = {
+	.bin_attrs = devcd_dev_bin_attrs,
+};
+
+static const struct attribute_group *devcd_dev_groups[] = {
+	&devcd_dev_group, NULL,
+};
+
+static struct class devcd_class = {
+	.name		= "devcoredump",
+	.owner		= THIS_MODULE,
+	.dev_release	= devcd_dev_release,
+	.dev_groups	= devcd_dev_groups,
+};
+
+static ssize_t devcd_readv(char *buffer, loff_t offset, size_t count,
+			   const void *data, size_t datalen)
+{
+	if (offset > datalen)
+		return -EINVAL;
+
+	if (offset + count > datalen)
+		count = datalen - offset;
+
+	if (count)
+		memcpy(buffer, ((u8 *)data) + offset, count);
+
+	return count;
+}
+
+/**
+ * dev_coredumpv - create device coredump with vmalloc data
+ * @dev: the struct device for the crashed device
+ * @data: vmalloc data containing the device coredump
+ * @datalen: length of the data
+ * @gfp: allocation flags
+ *
+ * This function takes ownership of the vmalloc'ed data and will free
+ * it when it is no longer used. See dev_coredumpm() for more information.
+ */
+void dev_coredumpv(struct device *dev, const void *data, size_t datalen,
+		   gfp_t gfp)
+{
+	dev_coredumpm(dev, NULL, data, datalen, gfp, devcd_readv, vfree);
+}
+EXPORT_SYMBOL_GPL(dev_coredumpv);
+
+static int devcd_match_failing(struct device *dev, const void *failing)
+{
+	struct devcd_entry *devcd = dev_to_devcd(dev);
+
+	return devcd->failing_dev == failing;
+}
+
+/**
+ * dev_coredumpm - create device coredump with read/free methods
+ * @dev: the struct device for the crashed device
+ * @owner: the module that contains the read/free functions, use %THIS_MODULE
+ * @data: data cookie for the @read/@free functions
+ * @datalen: length of the data
+ * @gfp: allocation flags
+ * @read: function to read from the given buffer
+ * @free: function to free the given buffer
+ *
+ * Creates a new device coredump for the given device. If a previous one hasn't
+ * been read yet, the new coredump is discarded. The data lifetime is determined
+ * by the device coredump framework and when it is no longer needed the @free
+ * function will be called to free the data.
+ */
+void dev_coredumpm(struct device *dev, struct module *owner,
+		   const void *data, size_t datalen, gfp_t gfp,
+		   ssize_t (*read)(char *buffer, loff_t offset, size_t count,
+				   const void *data, size_t datalen),
+		   void (*free)(const void *data))
+{
+	static atomic_t devcd_count = ATOMIC_INIT(0);
+	struct devcd_entry *devcd;
+	struct device *existing;
+
+	existing = class_find_device(&devcd_class, NULL, dev,
+				     devcd_match_failing);
+	if (existing) {
+		put_device(existing);
+		goto free;
+	}
+
+	if (!try_module_get(owner))
+		goto free;
+
+	devcd = kzalloc(sizeof(*devcd), gfp);
+	if (!devcd)
+		goto put_module;
+
+	devcd->owner = owner;
+	devcd->data = data;
+	devcd->datalen = datalen;
+	devcd->read = read;
+	devcd->free = free;
+	devcd->failing_dev = get_device(dev);
+
+	device_initialize(&devcd->devcd_dev);
+
+	dev_set_name(&devcd->devcd_dev, "devcd%d",
+		     atomic_inc_return(&devcd_count));
+	devcd->devcd_dev.class = &devcd_class;
+
+	if (device_add(&devcd->devcd_dev))
+		goto put_device;
+
+	if (sysfs_create_link(&devcd->devcd_dev.kobj, &dev->kobj,
+			      "failing_device"))
+		/* nothing - symlink will be missing */;
+
+	if (sysfs_create_link(&dev->kobj, &devcd->devcd_dev.kobj,
+			      "devcoredump"))
+		/* nothing - symlink will be missing */;
+
+	INIT_DELAYED_WORK(&devcd->del_wk, devcd_del);
+	schedule_delayed_work(&devcd->del_wk, DEVCD_TIMEOUT);
+
+	return;
+ put_device:
+	put_device(&devcd->devcd_dev);
+ put_module:
+	module_put(owner);
+ free:
+	free(data);
+}
+EXPORT_SYMBOL_GPL(dev_coredumpm);
+
+static int __init devcoredump_init(void)
+{
+	return class_register(&devcd_class);
+}
+__initcall(devcoredump_init);
+
+static int devcd_free(struct device *dev, void *data)
+{
+	struct devcd_entry *devcd = dev_to_devcd(dev);
+
+	flush_delayed_work(&devcd->del_wk);
+	return 0;
+}
+
+static void __exit devcoredump_exit(void)
+{
+	class_for_each_device(&devcd_class, NULL, NULL, devcd_free);
+	class_unregister(&devcd_class);
+}
+__exitcall(devcoredump_exit);
diff --git a/include/linux/devcoredump.h b/include/linux/devcoredump.h
new file mode 100644
index 000000000000..c0a360e99f64
--- /dev/null
+++ b/include/linux/devcoredump.h
@@ -0,0 +1,35 @@
+#ifndef __DEVCOREDUMP_H
+#define __DEVCOREDUMP_H
+
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+
+#ifdef CONFIG_DEV_COREDUMP
+void dev_coredumpv(struct device *dev, const void *data, size_t datalen,
+		   gfp_t gfp);
+
+void dev_coredumpm(struct device *dev, struct module *owner,
+		   const void *data, size_t datalen, gfp_t gfp,
+		   ssize_t (*read)(char *buffer, loff_t offset, size_t count,
+				   const void *data, size_t datalen),
+		   void (*free)(const void *data));
+#else
+static inline void dev_coredumpv(struct device *dev, const void *data,
+				 size_t datalen, gfp_t gfp)
+{
+	vfree(data);
+}
+
+static inline void
+dev_coredumpm(struct device *dev, struct module *owner,
+	      const void *data, size_t datalen, gfp_t gfp,
+	      ssize_t (*read)(char *buffer, loff_t offset, size_t count,
+			      const void *data, size_t datalen),
+	      void (*free)(const void *data))
+{
+	free(data);
+}
+#endif /* CONFIG_DEV_COREDUMP */
+
+#endif /* __DEVCOREDUMP_H */
-- 
cgit v1.2.3


From 36c53b3cc3fac6952af68f43609b15ae050c9318 Mon Sep 17 00:00:00 2001
From: Federico Vaga <federico.vaga@cern.ch>
Date: Tue, 2 Sep 2014 17:31:40 +0200
Subject: ipack: save carrier owner to allow device to get it

There was not any kind of protection against carrier driver removal.
In this way, device driver can 'get' the carrier driver when it is
using it.

Signed-off-by: Federico Vaga <federico.vaga@cern.ch>
Acked-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/ipack/carriers/tpci200.c |  3 ++-
 drivers/ipack/ipack.c            |  4 +++-
 include/linux/ipack.h            | 24 +++++++++++++++++++++++-
 3 files changed, 28 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ipack/carriers/tpci200.c b/drivers/ipack/carriers/tpci200.c
index de5e32151a1e..9b23843dcad4 100644
--- a/drivers/ipack/carriers/tpci200.c
+++ b/drivers/ipack/carriers/tpci200.c
@@ -572,7 +572,8 @@ static int tpci200_pci_probe(struct pci_dev *pdev,
 	/* Register the carrier in the industry pack bus driver */
 	tpci200->info->ipack_bus = ipack_bus_register(&pdev->dev,
 						      TPCI200_NB_SLOT,
-						      &tpci200_bus_ops);
+						      &tpci200_bus_ops,
+						      THIS_MODULE);
 	if (!tpci200->info->ipack_bus) {
 		dev_err(&pdev->dev,
 			"error registering the carrier on ipack driver\n");
diff --git a/drivers/ipack/ipack.c b/drivers/ipack/ipack.c
index d0016ba469ed..c0e7b624ce54 100644
--- a/drivers/ipack/ipack.c
+++ b/drivers/ipack/ipack.c
@@ -206,7 +206,8 @@ static struct bus_type ipack_bus_type = {
 };
 
 struct ipack_bus_device *ipack_bus_register(struct device *parent, int slots,
-					    const struct ipack_bus_ops *ops)
+					    const struct ipack_bus_ops *ops,
+					    struct module *owner)
 {
 	int bus_nr;
 	struct ipack_bus_device *bus;
@@ -225,6 +226,7 @@ struct ipack_bus_device *ipack_bus_register(struct device *parent, int slots,
 	bus->parent = parent;
 	bus->slots = slots;
 	bus->ops = ops;
+	bus->owner = owner;
 	return bus;
 }
 EXPORT_SYMBOL_GPL(ipack_bus_register);
diff --git a/include/linux/ipack.h b/include/linux/ipack.h
index 1888e06ddf64..8bddc3fbdddf 100644
--- a/include/linux/ipack.h
+++ b/include/linux/ipack.h
@@ -172,6 +172,7 @@ struct ipack_bus_ops {
  *	@ops: bus operations for the mezzanine drivers
  */
 struct ipack_bus_device {
+	struct module *owner;
 	struct device *parent;
 	int slots;
 	int bus_nr;
@@ -189,7 +190,8 @@ struct ipack_bus_device {
  * available bus device in ipack.
  */
 struct ipack_bus_device *ipack_bus_register(struct device *parent, int slots,
-					    const struct ipack_bus_ops *ops);
+					    const struct ipack_bus_ops *ops,
+					    struct module *owner);
 
 /**
  *	ipack_bus_unregister -- unregister an ipack bus
@@ -265,3 +267,23 @@ void ipack_put_device(struct ipack_device *dev);
 	 .format = (_format), \
 	 .vendor = (vend), \
 	 .device = (dev)
+
+/**
+ * ipack_get_carrier - it increase the carrier ref. counter of
+ *                     the carrier module
+ * @dev: mezzanine device which wants to get the carrier
+ */
+static inline int ipack_get_carrier(struct ipack_device *dev)
+{
+	return try_module_get(dev->bus->owner);
+}
+
+/**
+ * ipack_get_carrier - it decrease the carrier ref. counter of
+ *                     the carrier module
+ * @dev: mezzanine device which wants to get the carrier
+ */
+static inline void ipack_put_carrier(struct ipack_device *dev)
+{
+	module_put(dev->bus->owner);
+}
-- 
cgit v1.2.3


From ffdbb715fa0c53203b1ea2a6ecc54bdcc8951612 Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Thu, 28 Aug 2014 14:14:09 +0100
Subject: misc: st_kim: Increase size of dev_name buffer to incorporate
 termination

Calling strncpy with a maximum size argument of 32 bytes on destination
array kim_gdata->dev_name of size 32 bytes might leave the destination
string unterminated.

Signed-off-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/ti_wilink_st.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ti_wilink_st.h b/include/linux/ti_wilink_st.h
index 932b76392248..884d6263e962 100644
--- a/include/linux/ti_wilink_st.h
+++ b/include/linux/ti_wilink_st.h
@@ -268,7 +268,7 @@ struct kim_data_s {
 	struct st_data_s *core_data;
 	struct chip_version version;
 	unsigned char ldisc_install;
-	unsigned char dev_name[UART_DEV_NAME_LEN];
+	unsigned char dev_name[UART_DEV_NAME_LEN + 1];
 	unsigned char flow_cntrl;
 	unsigned long baud_rate;
 };
-- 
cgit v1.2.3


From c9f21cb6388898bfe69886d001316dae7ecc9a4b Mon Sep 17 00:00:00 2001
From: Tom Lendacky <thomas.lendacky@amd.com>
Date: Fri, 5 Sep 2014 10:31:09 -0500
Subject: crypto: ccp - Check for CCP before registering crypto algs

If the ccp is built as a built-in module, then ccp-crypto (whether
built as a module or a built-in module) will be able to load and
it will register its crypto algorithms.  If the system does not have
a CCP this will result in -ENODEV being returned whenever a command
is attempted to be queued by the registered crypto algorithms.

Add an API, ccp_present(), that checks for the presence of a CCP
on the system.  The ccp-crypto module can use this to determine if it
should register it's crypto alogorithms.

Cc: stable@vger.kernel.org
Reported-by: Scot Doyle <lkml14@scotdoyle.com>
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Tested-by: Scot Doyle <lkml14@scotdoyle.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ccp/ccp-crypto-main.c |  4 ++++
 drivers/crypto/ccp/ccp-dev.c         | 14 ++++++++++++++
 include/linux/ccp.h                  | 12 ++++++++++++
 3 files changed, 30 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/crypto/ccp/ccp-crypto-main.c b/drivers/crypto/ccp/ccp-crypto-main.c
index 20dc848481e7..4d4e016d755b 100644
--- a/drivers/crypto/ccp/ccp-crypto-main.c
+++ b/drivers/crypto/ccp/ccp-crypto-main.c
@@ -367,6 +367,10 @@ static int ccp_crypto_init(void)
 {
 	int ret;
 
+	ret = ccp_present();
+	if (ret)
+		return ret;
+
 	spin_lock_init(&req_queue_lock);
 	INIT_LIST_HEAD(&req_queue.cmds);
 	req_queue.backlog = &req_queue.cmds;
diff --git a/drivers/crypto/ccp/ccp-dev.c b/drivers/crypto/ccp/ccp-dev.c
index a7d110652a74..c6e6171eb6d3 100644
--- a/drivers/crypto/ccp/ccp-dev.c
+++ b/drivers/crypto/ccp/ccp-dev.c
@@ -54,6 +54,20 @@ static inline void ccp_del_device(struct ccp_device *ccp)
 	ccp_dev = NULL;
 }
 
+/**
+ * ccp_present - check if a CCP device is present
+ *
+ * Returns zero if a CCP device is present, -ENODEV otherwise.
+ */
+int ccp_present(void)
+{
+	if (ccp_get_device())
+		return 0;
+
+	return -ENODEV;
+}
+EXPORT_SYMBOL_GPL(ccp_present);
+
 /**
  * ccp_enqueue_cmd - queue an operation for processing by the CCP
  *
diff --git a/include/linux/ccp.h b/include/linux/ccp.h
index ebcc9d146219..7f437036baa4 100644
--- a/include/linux/ccp.h
+++ b/include/linux/ccp.h
@@ -26,6 +26,13 @@ struct ccp_cmd;
 #if defined(CONFIG_CRYPTO_DEV_CCP_DD) || \
 	defined(CONFIG_CRYPTO_DEV_CCP_DD_MODULE)
 
+/**
+ * ccp_present - check if a CCP device is present
+ *
+ * Returns zero if a CCP device is present, -ENODEV otherwise.
+ */
+int ccp_present(void);
+
 /**
  * ccp_enqueue_cmd - queue an operation for processing by the CCP
  *
@@ -53,6 +60,11 @@ int ccp_enqueue_cmd(struct ccp_cmd *cmd);
 
 #else /* CONFIG_CRYPTO_DEV_CCP_DD is not enabled */
 
+static inline int ccp_present(void)
+{
+	return -ENODEV;
+}
+
 static inline int ccp_enqueue_cmd(struct ccp_cmd *cmd)
 {
 	return -ENODEV;
-- 
cgit v1.2.3


From bef59c5024be687ef4f228915af9961307a779ab Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Wed, 20 Aug 2014 15:26:35 +0200
Subject: devres: Improve devm_kasprintf()/kvasprintf() support

  - Add devm_kasprintf()/kvasprintf(), introduced by commit
    75f2a4ead5d5890ada9c2663a70fb58613c0d9f2 ("devres: Add
    devm_kasprintf and devm_kvasprintf API"), to
    Documentation/driver-model/devres.txt,
  - Improve kernel doc: the string is not an existing formatted string,
    but is formatted into the newly-allocated buffer,
  - Add a __printf() annotation to devm_kasprintf(), so the compiler
    will verify the format string argument types.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/driver-model/devres.txt |  2 ++
 drivers/base/devres.c                 | 15 ++++++++-------
 include/linux/device.h                |  4 ++--
 3 files changed, 12 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt
index ffa2d0e67a5d..40677443c0c5 100644
--- a/Documentation/driver-model/devres.txt
+++ b/Documentation/driver-model/devres.txt
@@ -293,12 +293,14 @@ MDIO
 MEM
   devm_free_pages()
   devm_get_free_pages()
+  devm_kasprintf()
   devm_kcalloc()
   devm_kfree()
   devm_kmalloc()
   devm_kmalloc_array()
   devm_kmemdup()
   devm_kstrdup()
+  devm_kvasprintf()
   devm_kzalloc()
 
 PCI
diff --git a/drivers/base/devres.c b/drivers/base/devres.c
index 69d9b0c89a01..c8a53d1e019f 100644
--- a/drivers/base/devres.c
+++ b/drivers/base/devres.c
@@ -817,13 +817,13 @@ char *devm_kstrdup(struct device *dev, const char *s, gfp_t gfp)
 EXPORT_SYMBOL_GPL(devm_kstrdup);
 
 /**
- * devm_kvasprintf - Allocate resource managed space
- *			for the formatted string.
+ * devm_kvasprintf - Allocate resource managed space and format a string
+ *		     into that.
  * @dev: Device to allocate memory for
  * @gfp: the GFP mask used in the devm_kmalloc() call when
  *       allocating memory
- * @fmt: the formatted string to duplicate
- * @ap: the list of tokens to be placed in the formatted string
+ * @fmt: The printf()-style format string
+ * @ap: Arguments for the format string
  * RETURNS:
  * Pointer to allocated string on success, NULL on failure.
  */
@@ -849,12 +849,13 @@ char *devm_kvasprintf(struct device *dev, gfp_t gfp, const char *fmt,
 EXPORT_SYMBOL(devm_kvasprintf);
 
 /**
- * devm_kasprintf - Allocate resource managed space
- *		and copy an existing formatted string into that
+ * devm_kasprintf - Allocate resource managed space and format a string
+ *		    into that.
  * @dev: Device to allocate memory for
  * @gfp: the GFP mask used in the devm_kmalloc() call when
  *       allocating memory
- * @fmt: the string to duplicate
+ * @fmt: The printf()-style format string
+ * @...: Arguments for the format string
  * RETURNS:
  * Pointer to allocated string on success, NULL on failure.
  */
diff --git a/include/linux/device.h b/include/linux/device.h
index 43d183aeb25b..a608e237f0a8 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -607,8 +607,8 @@ extern int devres_release_group(struct device *dev, void *id);
 extern void *devm_kmalloc(struct device *dev, size_t size, gfp_t gfp);
 extern char *devm_kvasprintf(struct device *dev, gfp_t gfp, const char *fmt,
 			     va_list ap);
-extern char *devm_kasprintf(struct device *dev, gfp_t gfp,
-			    const char *fmt, ...);
+extern __printf(3, 4)
+char *devm_kasprintf(struct device *dev, gfp_t gfp, const char *fmt, ...);
 static inline void *devm_kzalloc(struct device *dev, size_t size, gfp_t gfp)
 {
 	return devm_kmalloc(dev, size, gfp | __GFP_ZERO);
-- 
cgit v1.2.3


From 295494af0695bc190e6b939df1036af898c2856f Mon Sep 17 00:00:00 2001
From: Octavian Purdila <octavian.purdila@intel.com>
Date: Fri, 19 Sep 2014 23:22:44 +0300
Subject: gpiolib: add irq_not_threaded flag to gpio_chip

Some GPIO chips (e.g. the DLN2 USB adapter) have blocking get/set
operation but do not need a threaded irq handler.

Signed-off-by: Octavian Purdila <octavian.purdila@intel.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpiolib.c      | 2 +-
 include/linux/gpio/driver.h | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 4acf8b2e9226..6fdae789ccc9 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -437,7 +437,7 @@ static int gpiochip_irq_map(struct irq_domain *d, unsigned int irq,
 	irq_set_lockdep_class(irq, &gpiochip_irq_lock_class);
 	irq_set_chip_and_handler(irq, chip->irqchip, chip->irq_handler);
 	/* Chips that can sleep need nested thread handlers */
-	if (chip->can_sleep)
+	if (chip->can_sleep && !chip->irq_not_threaded)
 		irq_set_nested_thread(irq, 1);
 #ifdef CONFIG_ARM
 	set_irq_flags(irq, IRQF_VALID);
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index 560bf7fa614f..719fab209158 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -56,6 +56,8 @@ struct seq_file;
  *	as the chip access may sleep when e.g. reading out the IRQ status
  *	registers.
  * @exported: flags if the gpiochip is exported for use from sysfs. Private.
+ * @irq_not_threaded: flag must be set if @can_sleep is set but the
+ *	IRQs don't need to be threaded
  *
  * A gpio_chip can help platforms abstract various sources of GPIOs so
  * they can all be accessed through a common programing interface.
@@ -101,6 +103,7 @@ struct gpio_chip {
 	struct gpio_desc	*desc;
 	const char		*const *names;
 	bool			can_sleep;
+	bool			irq_not_threaded;
 	bool			exported;
 
 #ifdef CONFIG_GPIOLIB_IRQCHIP
-- 
cgit v1.2.3


From 8af465db967bf25a4617416c0cbaaaa506d444f5 Mon Sep 17 00:00:00 2001
From: Roger Tseng <rogerable@realtek.com>
Date: Wed, 24 Sep 2014 17:07:13 +0800
Subject: mmc: core: Add new power_mode MMC_POWER_UNDEFINED

Add MMC_POWER_UNDEFINED for power_mode in struct mmc_ios and use it as
the initial value of host->ios.power_mode.

For hosts with MMC_CAP2_NO_PRESCAN_POWERUP, this makes the later
mmc_power_off() do real power-off things instead of NOP, and further
prevents state messed up in cards that was already initialized (eg. by
BIOS of UEFI driver).

Signed-off-by: Roger Tseng <rogerable@realtek.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/core.c  | 1 +
 include/linux/mmc/host.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index b1e209f479ad..cbb23215ad87 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -2482,6 +2482,7 @@ void mmc_start_host(struct mmc_host *host)
 {
 	host->f_init = max(freqs[0], host->f_min);
 	host->rescan_disable = 0;
+	host->ios.power_mode = MMC_POWER_UNDEFINED;
 	if (host->caps2 & MMC_CAP2_NO_PRESCAN_POWERUP)
 		mmc_power_off(host);
 	else
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 797ae657dc3d..df0c15396bbf 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -42,6 +42,7 @@ struct mmc_ios {
 #define MMC_POWER_OFF		0
 #define MMC_POWER_UP		1
 #define MMC_POWER_ON		2
+#define MMC_POWER_UNDEFINED	3
 
 	unsigned char	bus_width;		/* data bus width */
 
-- 
cgit v1.2.3


From db4fa45ed3182d8206af241811dfc99369ffa849 Mon Sep 17 00:00:00 2001
From: Anders Berg <anders.berg@avagotech.com>
Date: Wed, 17 Sep 2014 08:46:58 +0200
Subject: spi: pl022: Add support for chip select extension

Add support for a extended PL022 which has an extra register for controlling up
to five chip select signals. This controller is found on the AXM5516 SoC.
Unfortunately the PrimeCell identification registers are identical to a
standard ARM PL022. To work around this, the peripheral ID must be overridden
in the device tree using the "arm,primecell-periphid" property with the value
0x000b6022.

Signed-off-by: Anders Berg <anders.berg@avagotech.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-pl022.c  | 59 +++++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/amba/bus.h |  5 ++++
 2 files changed, 63 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/spi/spi-pl022.c b/drivers/spi/spi-pl022.c
index 7f13f3f7198b..d0741b2eabb4 100644
--- a/drivers/spi/spi-pl022.c
+++ b/drivers/spi/spi-pl022.c
@@ -82,6 +82,7 @@
 #define SSP_MIS(r)	(r + 0x01C)
 #define SSP_ICR(r)	(r + 0x020)
 #define SSP_DMACR(r)	(r + 0x024)
+#define SSP_CSR(r)	(r + 0x030) /* vendor extension */
 #define SSP_ITCR(r)	(r + 0x080)
 #define SSP_ITIP(r)	(r + 0x084)
 #define SSP_ITOP(r)	(r + 0x088)
@@ -197,6 +198,12 @@
 /* Transmit DMA Enable bit */
 #define SSP_DMACR_MASK_TXDMAE		(0x1UL << 1)
 
+/*
+ * SSP Chip Select Control Register - SSP_CSR
+ * (vendor extension)
+ */
+#define SSP_CSR_CSVALUE_MASK		(0x1FUL << 0)
+
 /*
  * SSP Integration Test control Register - SSP_ITCR
  */
@@ -313,6 +320,7 @@ enum ssp_writing {
  * @extended_cr: 32 bit wide control register 0 with extra
  * features and extra features in CR1 as found in the ST variants
  * @pl023: supports a subset of the ST extensions called "PL023"
+ * @internal_cs_ctrl: supports chip select control register
  */
 struct vendor_data {
 	int fifodepth;
@@ -321,6 +329,7 @@ struct vendor_data {
 	bool extended_cr;
 	bool pl023;
 	bool loopback;
+	bool internal_cs_ctrl;
 };
 
 /**
@@ -440,9 +449,32 @@ static void null_cs_control(u32 command)
 	pr_debug("pl022: dummy chip select control, CS=0x%x\n", command);
 }
 
+/**
+ * internal_cs_control - Control chip select signals via SSP_CSR.
+ * @pl022: SSP driver private data structure
+ * @command: select/delect the chip
+ *
+ * Used on controller with internal chip select control via SSP_CSR register
+ * (vendor extension). Each of the 5 LSB in the register controls one chip
+ * select signal.
+ */
+static void internal_cs_control(struct pl022 *pl022, u32 command)
+{
+	u32 tmp;
+
+	tmp = readw(SSP_CSR(pl022->virtbase));
+	if (command == SSP_CHIP_SELECT)
+		tmp &= ~BIT(pl022->cur_cs);
+	else
+		tmp |= BIT(pl022->cur_cs);
+	writew(tmp, SSP_CSR(pl022->virtbase));
+}
+
 static void pl022_cs_control(struct pl022 *pl022, u32 command)
 {
-	if (gpio_is_valid(pl022->cur_cs))
+	if (pl022->vendor->internal_cs_ctrl)
+		internal_cs_control(pl022, command);
+	else if (gpio_is_valid(pl022->cur_cs))
 		gpio_set_value(pl022->cur_cs, command);
 	else
 		pl022->cur_chip->cs_control(command);
@@ -2122,6 +2154,9 @@ static int pl022_probe(struct amba_device *adev, const struct amba_id *id)
 	if (platform_info->num_chipselect && platform_info->chipselects) {
 		for (i = 0; i < num_cs; i++)
 			pl022->chipselects[i] = platform_info->chipselects[i];
+	} else if (pl022->vendor->internal_cs_ctrl) {
+		for (i = 0; i < num_cs; i++)
+			pl022->chipselects[i] = i;
 	} else if (IS_ENABLED(CONFIG_OF)) {
 		for (i = 0; i < num_cs; i++) {
 			int cs_gpio = of_get_named_gpio(np, "cs-gpios", i);
@@ -2352,6 +2387,7 @@ static struct vendor_data vendor_arm = {
 	.extended_cr = false,
 	.pl023 = false,
 	.loopback = true,
+	.internal_cs_ctrl = false,
 };
 
 static struct vendor_data vendor_st = {
@@ -2361,6 +2397,7 @@ static struct vendor_data vendor_st = {
 	.extended_cr = true,
 	.pl023 = false,
 	.loopback = true,
+	.internal_cs_ctrl = false,
 };
 
 static struct vendor_data vendor_st_pl023 = {
@@ -2370,6 +2407,17 @@ static struct vendor_data vendor_st_pl023 = {
 	.extended_cr = true,
 	.pl023 = true,
 	.loopback = false,
+	.internal_cs_ctrl = false,
+};
+
+static struct vendor_data vendor_lsi = {
+	.fifodepth = 8,
+	.max_bpw = 16,
+	.unidir = false,
+	.extended_cr = false,
+	.pl023 = false,
+	.loopback = true,
+	.internal_cs_ctrl = true,
 };
 
 static struct amba_id pl022_ids[] = {
@@ -2403,6 +2451,15 @@ static struct amba_id pl022_ids[] = {
 		.mask	= 0xffffffff,
 		.data	= &vendor_st_pl023,
 	},
+	{
+		/*
+		 * PL022 variant that has a chip select control register whih
+		 * allows control of 5 output signals nCS[0:4].
+		 */
+		.id	= 0x000b6022,
+		.mask	= 0x000fffff,
+		.data	= &vendor_lsi,
+	},
 	{ 0, 0 },
 };
 
diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h
index fdd7e1b61f60..c324f5700d1a 100644
--- a/include/linux/amba/bus.h
+++ b/include/linux/amba/bus.h
@@ -44,10 +44,15 @@ struct amba_driver {
 	const struct amba_id	*id_table;
 };
 
+/*
+ * Constants for the designer field of the Peripheral ID register. When bit 7
+ * is set to '1', bits [6:0] should be the JEP106 manufacturer identity code.
+ */
 enum amba_vendor {
 	AMBA_VENDOR_ARM = 0x41,
 	AMBA_VENDOR_ST = 0x80,
 	AMBA_VENDOR_QCOM = 0x51,
+	AMBA_VENDOR_LSI = 0xb6,
 };
 
 extern struct bus_type amba_bustype;
-- 
cgit v1.2.3


From 234b239bea395316d7f78018c672f4a88b3cdf0d Mon Sep 17 00:00:00 2001
From: Andres Lagar-Cavilla <andreslc@google.com>
Date: Wed, 17 Sep 2014 10:51:48 -0700
Subject: kvm: Faults which trigger IO release the mmap_sem
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When KVM handles a tdp fault it uses FOLL_NOWAIT. If the guest memory
has been swapped out or is behind a filemap, this will trigger async
readahead and return immediately. The rationale is that KVM will kick
back the guest with an "async page fault" and allow for some other
guest process to take over.

If async PFs are enabled the fault is retried asap from an async
workqueue. If not, it's retried immediately in the same code path. In
either case the retry will not relinquish the mmap semaphore and will
block on the IO. This is a bad thing, as other mmap semaphore users
now stall as a function of swap or filemap latency.

This patch ensures both the regular and async PF path re-enter the
fault allowing for the mmap semaphore to be relinquished in the case
of IO wait.

Reviewed-by: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Andres Lagar-Cavilla <andreslc@google.com>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 11 +++++++++++
 include/linux/mm.h       |  1 +
 mm/gup.c                 |  4 ++++
 virt/kvm/async_pf.c      |  4 +---
 virt/kvm/kvm_main.c      | 49 +++++++++++++++++++++++++++++++++++++++++++++---
 5 files changed, 63 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index d44a2d640551..45aaeb3360c9 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -198,6 +198,17 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva,
 int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu);
 #endif
 
+/*
+ * Carry out a gup that requires IO. Allow the mm to relinquish the mmap
+ * semaphore if the filemap/swap has to wait on a page lock. pagep == NULL
+ * controls whether we retry the gup one more time to completion in that case.
+ * Typically this is called after a FAULT_FLAG_RETRY_NOWAIT in the main tdp
+ * handler.
+ */
+int kvm_get_user_page_io(struct task_struct *tsk, struct mm_struct *mm,
+			 unsigned long addr, bool write_fault,
+			 struct page **pagep);
+
 enum {
 	OUTSIDE_GUEST_MODE,
 	IN_GUEST_MODE,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8981cc882ed2..0f4196a0bc20 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1985,6 +1985,7 @@ static inline struct page *follow_page(struct vm_area_struct *vma,
 #define FOLL_HWPOISON	0x100	/* check page is hwpoisoned */
 #define FOLL_NUMA	0x200	/* force NUMA hinting page fault */
 #define FOLL_MIGRATION	0x400	/* wait for page to replace migration entry */
+#define FOLL_TRIED	0x800	/* a retry, previous pass started an IO */
 
 typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
 			void *data);
diff --git a/mm/gup.c b/mm/gup.c
index 91d044b1600d..af7ea3e0826b 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -281,6 +281,10 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
 		fault_flags |= FAULT_FLAG_ALLOW_RETRY;
 	if (*flags & FOLL_NOWAIT)
 		fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT;
+	if (*flags & FOLL_TRIED) {
+		VM_WARN_ON_ONCE(fault_flags & FAULT_FLAG_ALLOW_RETRY);
+		fault_flags |= FAULT_FLAG_TRIED;
+	}
 
 	ret = handle_mm_fault(mm, vma, address, fault_flags);
 	if (ret & VM_FAULT_ERROR) {
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index d6a3d0993d88..5ff7f7f2689a 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -80,9 +80,7 @@ static void async_pf_execute(struct work_struct *work)
 
 	might_sleep();
 
-	down_read(&mm->mmap_sem);
-	get_user_pages(NULL, mm, addr, 1, 1, 0, NULL, NULL);
-	up_read(&mm->mmap_sem);
+	kvm_get_user_page_io(NULL, mm, addr, 1, NULL);
 	kvm_async_page_present_sync(vcpu, apf);
 
 	spin_lock(&vcpu->async_pf.lock);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 499db0977f3c..1c6e8476b244 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1122,6 +1122,43 @@ static int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm,
 	return __get_user_pages(tsk, mm, start, 1, flags, page, NULL, NULL);
 }
 
+int kvm_get_user_page_io(struct task_struct *tsk, struct mm_struct *mm,
+			 unsigned long addr, bool write_fault,
+			 struct page **pagep)
+{
+	int npages;
+	int locked = 1;
+	int flags = FOLL_TOUCH | FOLL_HWPOISON |
+		    (pagep ? FOLL_GET : 0) |
+		    (write_fault ? FOLL_WRITE : 0);
+
+	/*
+	 * If retrying the fault, we get here *not* having allowed the filemap
+	 * to wait on the page lock. We should now allow waiting on the IO with
+	 * the mmap semaphore released.
+	 */
+	down_read(&mm->mmap_sem);
+	npages = __get_user_pages(tsk, mm, addr, 1, flags, pagep, NULL,
+				  &locked);
+	if (!locked) {
+		VM_BUG_ON(npages != -EBUSY);
+
+		if (!pagep)
+			return 0;
+
+		/*
+		 * The previous call has now waited on the IO. Now we can
+		 * retry and complete. Pass TRIED to ensure we do not re
+		 * schedule async IO (see e.g. filemap_fault).
+		 */
+		down_read(&mm->mmap_sem);
+		npages = __get_user_pages(tsk, mm, addr, 1, flags | FOLL_TRIED,
+					  pagep, NULL, NULL);
+	}
+	up_read(&mm->mmap_sem);
+	return npages;
+}
+
 static inline int check_user_page_hwpoison(unsigned long addr)
 {
 	int rc, flags = FOLL_TOUCH | FOLL_HWPOISON | FOLL_WRITE;
@@ -1184,9 +1221,15 @@ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault,
 		npages = get_user_page_nowait(current, current->mm,
 					      addr, write_fault, page);
 		up_read(&current->mm->mmap_sem);
-	} else
-		npages = get_user_pages_fast(addr, 1, write_fault,
-					     page);
+	} else {
+		/*
+		 * By now we have tried gup_fast, and possibly async_pf, and we
+		 * are certainly not atomic. Time to retry the gup, allowing
+		 * mmap semaphore to be relinquished in the case of IO.
+		 */
+		npages = kvm_get_user_page_io(current, current->mm, addr,
+					      write_fault, page);
+	}
 	if (npages != 1)
 		return npages;
 
-- 
cgit v1.2.3


From 57128468080a8b6ea452223036d3e417f748af55 Mon Sep 17 00:00:00 2001
From: Andres Lagar-Cavilla <andreslc@google.com>
Date: Mon, 22 Sep 2014 14:54:42 -0700
Subject: kvm: Fix page ageing bugs

1. We were calling clear_flush_young_notify in unmap_one, but we are
within an mmu notifier invalidate range scope. The spte exists no more
(due to range_start) and the accessed bit info has already been
propagated (due to kvm_pfn_set_accessed). Simply call
clear_flush_young.

2. We clear_flush_young on a primary MMU PMD, but this may be mapped
as a collection of PTEs by the secondary MMU (e.g. during log-dirty).
This required expanding the interface of the clear_flush_young mmu
notifier, so a lot of code has been trivially touched.

3. In the absence of shadow_accessed_mask (e.g. EPT A bit), we emulate
the access bit by blowing the spte. This requires proper synchronizing
with MMU notifier consumers, like every other removal of spte's does.

Signed-off-by: Andres Lagar-Cavilla <andreslc@google.com>
Acked-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/arm/include/asm/kvm_host.h     |  3 ++-
 arch/arm64/include/asm/kvm_host.h   |  3 ++-
 arch/powerpc/include/asm/kvm_host.h |  2 +-
 arch/powerpc/include/asm/kvm_ppc.h  |  2 +-
 arch/powerpc/kvm/book3s.c           |  4 ++--
 arch/powerpc/kvm/book3s.h           |  3 ++-
 arch/powerpc/kvm/book3s_64_mmu_hv.c |  4 ++--
 arch/powerpc/kvm/book3s_pr.c        |  3 ++-
 arch/powerpc/kvm/e500_mmu_host.c    |  2 +-
 arch/x86/include/asm/kvm_host.h     |  2 +-
 arch/x86/kvm/mmu.c                  | 38 ++++++++++++++++++++++---------------
 drivers/iommu/amd_iommu_v2.c        |  6 ++++--
 include/linux/mmu_notifier.h        | 24 ++++++++++++++++-------
 mm/mmu_notifier.c                   |  5 +++--
 mm/rmap.c                           |  6 +++++-
 virt/kvm/kvm_main.c                 |  5 +++--
 16 files changed, 71 insertions(+), 41 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 032a8538318a..8c3f7eb62b54 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -170,7 +170,8 @@ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
 int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
 
 /* We do not have shadow page tables, hence the empty hooks */
-static inline int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+static inline int kvm_age_hva(struct kvm *kvm, unsigned long start,
+			      unsigned long end)
 {
 	return 0;
 }
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index be9970a59497..a3c671b3acc9 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -180,7 +180,8 @@ int kvm_unmap_hva_range(struct kvm *kvm,
 void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
 
 /* We do not have shadow page tables, hence the empty hooks */
-static inline int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+static inline int kvm_age_hva(struct kvm *kvm, unsigned long start,
+			      unsigned long end)
 {
 	return 0;
 }
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 604000882352..d329bc5543a2 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -56,7 +56,7 @@
 extern int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
 extern int kvm_unmap_hva_range(struct kvm *kvm,
 			       unsigned long start, unsigned long end);
-extern int kvm_age_hva(struct kvm *kvm, unsigned long hva);
+extern int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
 extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
 extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
 
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index fb86a2299d8a..d4a92d7cea6a 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -243,7 +243,7 @@ struct kvmppc_ops {
 	int (*unmap_hva)(struct kvm *kvm, unsigned long hva);
 	int (*unmap_hva_range)(struct kvm *kvm, unsigned long start,
 			   unsigned long end);
-	int (*age_hva)(struct kvm *kvm, unsigned long hva);
+	int (*age_hva)(struct kvm *kvm, unsigned long start, unsigned long end);
 	int (*test_age_hva)(struct kvm *kvm, unsigned long hva);
 	void (*set_spte_hva)(struct kvm *kvm, unsigned long hva, pte_t pte);
 	void (*mmu_destroy)(struct kvm_vcpu *vcpu);
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index dd03f6b299ba..c16cfbfeb781 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -851,9 +851,9 @@ int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
 	return kvm->arch.kvm_ops->unmap_hva_range(kvm, start, end);
 }
 
-int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
 {
-	return kvm->arch.kvm_ops->age_hva(kvm, hva);
+	return kvm->arch.kvm_ops->age_hva(kvm, start, end);
 }
 
 int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
diff --git a/arch/powerpc/kvm/book3s.h b/arch/powerpc/kvm/book3s.h
index 4bf956cf94d6..d2b3ec088b8c 100644
--- a/arch/powerpc/kvm/book3s.h
+++ b/arch/powerpc/kvm/book3s.h
@@ -17,7 +17,8 @@ extern void kvmppc_core_flush_memslot_hv(struct kvm *kvm,
 extern int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva);
 extern int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start,
 				  unsigned long end);
-extern int kvm_age_hva_hv(struct kvm *kvm, unsigned long hva);
+extern int kvm_age_hva_hv(struct kvm *kvm, unsigned long start,
+			  unsigned long end);
 extern int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva);
 extern void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte);
 
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 72c20bb16d26..81460c5359c0 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -1002,11 +1002,11 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
 	return ret;
 }
 
-int kvm_age_hva_hv(struct kvm *kvm, unsigned long hva)
+int kvm_age_hva_hv(struct kvm *kvm, unsigned long start, unsigned long end)
 {
 	if (!kvm->arch.using_mmu_notifiers)
 		return 0;
-	return kvm_handle_hva(kvm, hva, kvm_age_rmapp);
+	return kvm_handle_hva_range(kvm, start, end, kvm_age_rmapp);
 }
 
 static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index faffb27badd9..852fcd8951c4 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -295,7 +295,8 @@ static int kvm_unmap_hva_range_pr(struct kvm *kvm, unsigned long start,
 	return 0;
 }
 
-static int kvm_age_hva_pr(struct kvm *kvm, unsigned long hva)
+static int kvm_age_hva_pr(struct kvm *kvm, unsigned long start,
+			  unsigned long end)
 {
 	/* XXX could be more clever ;) */
 	return 0;
diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
index 08f14bb57897..b1f3f630315e 100644
--- a/arch/powerpc/kvm/e500_mmu_host.c
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -732,7 +732,7 @@ int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
 	return 0;
 }
 
-int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
 {
 	/* XXX could be more clever ;) */
 	return 0;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index eeeb573fcf6f..763d273cab1d 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1035,7 +1035,7 @@ asmlinkage void kvm_spurious_fault(void);
 #define KVM_ARCH_WANT_MMU_NOTIFIER
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
 int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end);
-int kvm_age_hva(struct kvm *kvm, unsigned long hva);
+int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
 int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
 void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
 int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 47d534066325..3201e93ebd07 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1417,18 +1417,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
 	struct rmap_iterator uninitialized_var(iter);
 	int young = 0;
 
-	/*
-	 * In case of absence of EPT Access and Dirty Bits supports,
-	 * emulate the accessed bit for EPT, by checking if this page has
-	 * an EPT mapping, and clearing it if it does. On the next access,
-	 * a new EPT mapping will be established.
-	 * This has some overhead, but not as much as the cost of swapping
-	 * out actively used pages or breaking up actively used hugepages.
-	 */
-	if (!shadow_accessed_mask) {
-		young = kvm_unmap_rmapp(kvm, rmapp, slot, gfn, level, data);
-		goto out;
-	}
+	BUG_ON(!shadow_accessed_mask);
 
 	for (sptep = rmap_get_first(*rmapp, &iter); sptep;
 	     sptep = rmap_get_next(&iter)) {
@@ -1440,7 +1429,6 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
 				 (unsigned long *)sptep);
 		}
 	}
-out:
 	trace_kvm_age_page(gfn, level, slot, young);
 	return young;
 }
@@ -1489,9 +1477,29 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
 	kvm_flush_remote_tlbs(vcpu->kvm);
 }
 
-int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
 {
-	return kvm_handle_hva(kvm, hva, 0, kvm_age_rmapp);
+	/*
+	 * In case of absence of EPT Access and Dirty Bits supports,
+	 * emulate the accessed bit for EPT, by checking if this page has
+	 * an EPT mapping, and clearing it if it does. On the next access,
+	 * a new EPT mapping will be established.
+	 * This has some overhead, but not as much as the cost of swapping
+	 * out actively used pages or breaking up actively used hugepages.
+	 */
+	if (!shadow_accessed_mask) {
+		/*
+		 * We are holding the kvm->mmu_lock, and we are blowing up
+		 * shadow PTEs. MMU notifier consumers need to be kept at bay.
+		 * This is correct as long as we don't decouple the mmu_lock
+		 * protected regions (like invalidate_range_start|end does).
+		 */
+		kvm->mmu_notifier_seq++;
+		return kvm_handle_hva_range(kvm, start, end, 0,
+					    kvm_unmap_rmapp);
+	}
+
+	return kvm_handle_hva_range(kvm, start, end, 0, kvm_age_rmapp);
 }
 
 int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c
index 5f578e850fc5..90d734bbf467 100644
--- a/drivers/iommu/amd_iommu_v2.c
+++ b/drivers/iommu/amd_iommu_v2.c
@@ -402,9 +402,11 @@ static void __mn_flush_page(struct mmu_notifier *mn,
 
 static int mn_clear_flush_young(struct mmu_notifier *mn,
 				struct mm_struct *mm,
-				unsigned long address)
+				unsigned long start,
+				unsigned long end)
 {
-	__mn_flush_page(mn, address);
+	for (; start < end; start += PAGE_SIZE)
+		__mn_flush_page(mn, start);
 
 	return 0;
 }
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index 27288692241e..88787bb4b3b9 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -57,10 +57,13 @@ struct mmu_notifier_ops {
 	 * pte. This way the VM will provide proper aging to the
 	 * accesses to the page through the secondary MMUs and not
 	 * only to the ones through the Linux pte.
+	 * Start-end is necessary in case the secondary MMU is mapping the page
+	 * at a smaller granularity than the primary MMU.
 	 */
 	int (*clear_flush_young)(struct mmu_notifier *mn,
 				 struct mm_struct *mm,
-				 unsigned long address);
+				 unsigned long start,
+				 unsigned long end);
 
 	/*
 	 * test_young is called to check the young/accessed bitflag in
@@ -175,7 +178,8 @@ extern void mmu_notifier_unregister_no_release(struct mmu_notifier *mn,
 extern void __mmu_notifier_mm_destroy(struct mm_struct *mm);
 extern void __mmu_notifier_release(struct mm_struct *mm);
 extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
-					  unsigned long address);
+					  unsigned long start,
+					  unsigned long end);
 extern int __mmu_notifier_test_young(struct mm_struct *mm,
 				     unsigned long address);
 extern void __mmu_notifier_change_pte(struct mm_struct *mm,
@@ -194,10 +198,11 @@ static inline void mmu_notifier_release(struct mm_struct *mm)
 }
 
 static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
-					  unsigned long address)
+					  unsigned long start,
+					  unsigned long end)
 {
 	if (mm_has_notifiers(mm))
-		return __mmu_notifier_clear_flush_young(mm, address);
+		return __mmu_notifier_clear_flush_young(mm, start, end);
 	return 0;
 }
 
@@ -255,7 +260,9 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
 	unsigned long ___address = __address;				\
 	__young = ptep_clear_flush_young(___vma, ___address, __ptep);	\
 	__young |= mmu_notifier_clear_flush_young(___vma->vm_mm,	\
-						  ___address);		\
+						  ___address,		\
+						  ___address +		\
+							PAGE_SIZE);	\
 	__young;							\
 })
 
@@ -266,7 +273,9 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
 	unsigned long ___address = __address;				\
 	__young = pmdp_clear_flush_young(___vma, ___address, __pmdp);	\
 	__young |= mmu_notifier_clear_flush_young(___vma->vm_mm,	\
-						  ___address);		\
+						  ___address,		\
+						  ___address +		\
+							PMD_SIZE);	\
 	__young;							\
 })
 
@@ -301,7 +310,8 @@ static inline void mmu_notifier_release(struct mm_struct *mm)
 }
 
 static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
-					  unsigned long address)
+					  unsigned long start,
+					  unsigned long end)
 {
 	return 0;
 }
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index 950813b1eb36..2c8da9825fe3 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -107,7 +107,8 @@ void __mmu_notifier_release(struct mm_struct *mm)
  * existed or not.
  */
 int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
-					unsigned long address)
+					unsigned long start,
+					unsigned long end)
 {
 	struct mmu_notifier *mn;
 	int young = 0, id;
@@ -115,7 +116,7 @@ int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
 	id = srcu_read_lock(&srcu);
 	hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) {
 		if (mn->ops->clear_flush_young)
-			young |= mn->ops->clear_flush_young(mn, mm, address);
+			young |= mn->ops->clear_flush_young(mn, mm, start, end);
 	}
 	srcu_read_unlock(&srcu, id);
 
diff --git a/mm/rmap.c b/mm/rmap.c
index 3e8491c504f8..bc74e0012809 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1355,7 +1355,11 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
 			continue;	/* don't unmap */
 		}
 
-		if (ptep_clear_flush_young_notify(vma, address, pte))
+		/*
+		 * No need for _notify because we're within an
+		 * mmu_notifier_invalidate_range_ {start|end} scope.
+		 */
+		if (ptep_clear_flush_young(vma, address, pte))
 			continue;
 
 		/* Nuke the page table entry. */
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index ff42b11d2b9c..0316314d48f4 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -369,7 +369,8 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
 
 static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
 					      struct mm_struct *mm,
-					      unsigned long address)
+					      unsigned long start,
+					      unsigned long end)
 {
 	struct kvm *kvm = mmu_notifier_to_kvm(mn);
 	int young, idx;
@@ -377,7 +378,7 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
 	idx = srcu_read_lock(&kvm->srcu);
 	spin_lock(&kvm->mmu_lock);
 
-	young = kvm_age_hva(kvm, address);
+	young = kvm_age_hva(kvm, start, end);
 	if (young)
 		kvm_flush_remote_tlbs(kvm);
 
-- 
cgit v1.2.3


From 445b8236959bfe624a5aa9bce89f44a3bec9b2b1 Mon Sep 17 00:00:00 2001
From: Tang Chen <tangchen@cn.fujitsu.com>
Date: Wed, 24 Sep 2014 15:57:55 +0800
Subject: kvm: Rename make_all_cpus_request() to kvm_make_all_cpus_request()
 and make it non-static

Different architectures need different requests, and in fact we
will use this function in architecture-specific code later. This
will be outside kvm_main.c, so make it non-static and rename it to
kvm_make_all_cpus_request().

Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h |  1 +
 virt/kvm/kvm_main.c      | 10 +++++-----
 2 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 45aaeb3360c9..12c591fc2571 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -586,6 +586,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm);
 void kvm_reload_remote_mmus(struct kvm *kvm);
 void kvm_make_mclock_inprogress_request(struct kvm *kvm);
 void kvm_make_scan_ioapic_request(struct kvm *kvm);
+bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req);
 
 long kvm_arch_dev_ioctl(struct file *filp,
 			unsigned int ioctl, unsigned long arg);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 0316314d48f4..5b8ca365932a 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -153,7 +153,7 @@ static void ack_flush(void *_completed)
 {
 }
 
-static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
+bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req)
 {
 	int i, cpu, me;
 	cpumask_var_t cpus;
@@ -190,7 +190,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
 	long dirty_count = kvm->tlbs_dirty;
 
 	smp_mb();
-	if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
+	if (kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
 		++kvm->stat.remote_tlb_flush;
 	cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
 }
@@ -198,17 +198,17 @@ EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);
 
 void kvm_reload_remote_mmus(struct kvm *kvm)
 {
-	make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD);
+	kvm_make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD);
 }
 
 void kvm_make_mclock_inprogress_request(struct kvm *kvm)
 {
-	make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS);
+	kvm_make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS);
 }
 
 void kvm_make_scan_ioapic_request(struct kvm *kvm)
 {
-	make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC);
+	kvm_make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC);
 }
 
 int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
-- 
cgit v1.2.3


From 4256f43f9fab91e1c17b5846a240cf4b66a768a8 Mon Sep 17 00:00:00 2001
From: Tang Chen <tangchen@cn.fujitsu.com>
Date: Wed, 24 Sep 2014 15:57:54 +0800
Subject: kvm: x86: Add request bit to reload APIC access page address

Currently, the APIC access page is pinned by KVM for the entire life
of the guest.  We want to make it migratable in order to make memory
hot-unplug available for machines that run KVM.

This patch prepares to handle this in generic code, through a new
request bit (that will be set by the MMU notifier) and a new hook
that is called whenever the request bit is processed.

Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  2 ++
 arch/x86/kvm/x86.c              | 14 ++++++++++++++
 include/linux/kvm_host.h        |  1 +
 3 files changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 022c356e0fed..60f9d73c6282 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -736,6 +736,7 @@ struct kvm_x86_ops {
 	void (*hwapic_isr_update)(struct kvm *kvm, int isr);
 	void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
 	void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set);
+	void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa);
 	void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
 	void (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
 	int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
@@ -1044,6 +1045,7 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
 int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
 int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
 void kvm_vcpu_reset(struct kvm_vcpu *vcpu);
+void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu);
 void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
 					   unsigned long address);
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 142569e6f8f9..c1412f5d93db 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6026,6 +6026,18 @@ static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
 	kvm_x86_ops->tlb_flush(vcpu);
 }
 
+void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
+{
+	if (!kvm_x86_ops->set_apic_access_page_addr)
+		return;
+
+	vcpu->kvm->arch.apic_access_page = gfn_to_page(vcpu->kvm,
+			APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
+	kvm_x86_ops->set_apic_access_page_addr(vcpu,
+			page_to_phys(vcpu->kvm->arch.apic_access_page));
+}
+EXPORT_SYMBOL_GPL(kvm_vcpu_reload_apic_access_page);
+
 void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
 					   unsigned long address)
 {
@@ -6091,6 +6103,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 			kvm_deliver_pmi(vcpu);
 		if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
 			vcpu_scan_ioapic(vcpu);
+		if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
+			kvm_vcpu_reload_apic_access_page(vcpu);
 	}
 
 	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 12c591fc2571..d594f9f34429 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -136,6 +136,7 @@ static inline bool is_error_page(struct page *page)
 #define KVM_REQ_GLOBAL_CLOCK_UPDATE 22
 #define KVM_REQ_ENABLE_IBS        23
 #define KVM_REQ_DISABLE_IBS       24
+#define KVM_REQ_APIC_PAGE_RELOAD  25
 
 #define KVM_USERSPACE_IRQ_SOURCE_ID		0
 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID	1
-- 
cgit v1.2.3


From 25cc24c200dcba21bd1f1a59a01741185062dc0e Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Date: Fri, 12 Sep 2014 08:53:53 +0200
Subject: mfd: max14577: Add defines for MAX77836 charger

Prepare for adding support for MAX77836 charger to the max14577 charger
driver by adding necessary new defines and prefixes to existing ones.

The MAX77836 uses slightly different values for ChgTyp field of STATUS2
register. On the MAX14577 value of 0x6 is reserved and 0x7 dead battery.
On the MAX77836 the opposite:
 - 0x6 means special charger,
 - 0x7 is reserved.
Regardless of these differences use one common enum
max14577_muic_charger_type.

Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Cc: Kyungmin Park <kyungmin.park@samsung.com>
Acked-by: Lee Jones <lee.jones@linaro.org>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mfd/max14577-private.h | 54 ++++++++++++++++++++++++++----------
 1 file changed, 40 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/max14577-private.h b/include/linux/mfd/max14577-private.h
index 499253604026..d6f321699b89 100644
--- a/include/linux/mfd/max14577-private.h
+++ b/include/linux/mfd/max14577-private.h
@@ -72,15 +72,33 @@ enum max14577_muic_reg {
 	MAX14577_MUIC_REG_END,
 };
 
+/*
+ * Combined charger types for max14577 and max77836.
+ *
+ * On max14577 three lower bits map to STATUS2/CHGTYP field.
+ * However the max77836 has different two last values of STATUS2/CHGTYP.
+ * To indicate the difference enum has two additional values for max77836.
+ * These values are just a register value bitwise OR with 0x8.
+ */
 enum max14577_muic_charger_type {
-	MAX14577_CHARGER_TYPE_NONE = 0,
-	MAX14577_CHARGER_TYPE_USB,
-	MAX14577_CHARGER_TYPE_DOWNSTREAM_PORT,
-	MAX14577_CHARGER_TYPE_DEDICATED_CHG,
-	MAX14577_CHARGER_TYPE_SPECIAL_500MA,
-	MAX14577_CHARGER_TYPE_SPECIAL_1A,
-	MAX14577_CHARGER_TYPE_RESERVED,
-	MAX14577_CHARGER_TYPE_DEAD_BATTERY = 7,
+	MAX14577_CHARGER_TYPE_NONE		= 0x0,
+	MAX14577_CHARGER_TYPE_USB		= 0x1,
+	MAX14577_CHARGER_TYPE_DOWNSTREAM_PORT	= 0x2,
+	MAX14577_CHARGER_TYPE_DEDICATED_CHG	= 0x3,
+	MAX14577_CHARGER_TYPE_SPECIAL_500MA	= 0x4,
+	/* Special 1A or 2A charger */
+	MAX14577_CHARGER_TYPE_SPECIAL_1A	= 0x5,
+	/* max14577: reserved, used on max77836 */
+	MAX14577_CHARGER_TYPE_RESERVED		= 0x6,
+	/* max14577: dead-battery charing with maximum current 100mA */
+	MAX14577_CHARGER_TYPE_DEAD_BATTERY	= 0x7,
+	/*
+	 * max77836: special charger (bias on D+/D-),
+	 * matches register value of 0x6
+	 */
+	MAX77836_CHARGER_TYPE_SPECIAL_BIAS	= 0xe,
+	/* max77836: reserved, register value 0x7 */
+	MAX77836_CHARGER_TYPE_RESERVED		= 0xf,
 };
 
 /* MAX14577 interrupts */
@@ -121,13 +139,15 @@ enum max14577_muic_charger_type {
 #define STATUS2_CHGTYP_SHIFT		0
 #define STATUS2_CHGDETRUN_SHIFT		3
 #define STATUS2_DCDTMR_SHIFT		4
-#define STATUS2_DBCHG_SHIFT		5
+#define MAX14577_STATUS2_DBCHG_SHIFT	5
+#define MAX77836_STATUS2_DXOVP_SHIFT	5
 #define STATUS2_VBVOLT_SHIFT		6
 #define MAX77836_STATUS2_VIDRM_SHIFT	7
 #define STATUS2_CHGTYP_MASK		(0x7 << STATUS2_CHGTYP_SHIFT)
 #define STATUS2_CHGDETRUN_MASK		BIT(STATUS2_CHGDETRUN_SHIFT)
 #define STATUS2_DCDTMR_MASK		BIT(STATUS2_DCDTMR_SHIFT)
-#define STATUS2_DBCHG_MASK		BIT(STATUS2_DBCHG_SHIFT)
+#define MAX14577_STATUS2_DBCHG_MASK	BIT(MAX14577_STATUS2_DBCHG_SHIFT)
+#define MAX77836_STATUS2_DXOVP_MASK	BIT(MAX77836_STATUS2_DXOVP_SHIFT)
 #define STATUS2_VBVOLT_MASK		BIT(STATUS2_VBVOLT_SHIFT)
 #define MAX77836_STATUS2_VIDRM_MASK	BIT(MAX77836_STATUS2_VIDRM_SHIFT)
 
@@ -177,9 +197,11 @@ enum max14577_muic_charger_type {
 #define CTRL3_JIGSET_SHIFT		0
 #define CTRL3_BOOTSET_SHIFT		2
 #define CTRL3_ADCDBSET_SHIFT		4
+#define CTRL3_WBTH_SHIFT		6
 #define CTRL3_JIGSET_MASK		(0x3 << CTRL3_JIGSET_SHIFT)
 #define CTRL3_BOOTSET_MASK		(0x3 << CTRL3_BOOTSET_SHIFT)
 #define CTRL3_ADCDBSET_MASK		(0x3 << CTRL3_ADCDBSET_SHIFT)
+#define CTRL3_WBTH_MASK			(0x3 << CTRL3_WBTH_SHIFT)
 
 /* Slave addr = 0x4A: Charger */
 enum max14577_charger_reg {
@@ -210,16 +232,20 @@ enum max14577_charger_reg {
 #define CDETCTRL1_CHGTYPMAN_SHIFT	1
 #define CDETCTRL1_DCDEN_SHIFT		2
 #define CDETCTRL1_DCD2SCT_SHIFT		3
-#define CDETCTRL1_DCHKTM_SHIFT		4
-#define CDETCTRL1_DBEXIT_SHIFT		5
+#define MAX14577_CDETCTRL1_DCHKTM_SHIFT	4
+#define MAX77836_CDETCTRL1_CDLY_SHIFT	4
+#define MAX14577_CDETCTRL1_DBEXIT_SHIFT	5
+#define MAX77836_CDETCTRL1_DCDCPL_SHIFT	5
 #define CDETCTRL1_DBIDLE_SHIFT		6
 #define CDETCTRL1_CDPDET_SHIFT		7
 #define CDETCTRL1_CHGDETEN_MASK		BIT(CDETCTRL1_CHGDETEN_SHIFT)
 #define CDETCTRL1_CHGTYPMAN_MASK	BIT(CDETCTRL1_CHGTYPMAN_SHIFT)
 #define CDETCTRL1_DCDEN_MASK		BIT(CDETCTRL1_DCDEN_SHIFT)
 #define CDETCTRL1_DCD2SCT_MASK		BIT(CDETCTRL1_DCD2SCT_SHIFT)
-#define CDETCTRL1_DCHKTM_MASK		BIT(CDETCTRL1_DCHKTM_SHIFT)
-#define CDETCTRL1_DBEXIT_MASK		BIT(CDETCTRL1_DBEXIT_SHIFT)
+#define MAX14577_CDETCTRL1_DCHKTM_MASK	BIT(MAX14577_CDETCTRL1_DCHKTM_SHIFT)
+#define MAX77836_CDETCTRL1_CDDLY_MASK	BIT(MAX77836_CDETCTRL1_CDDLY_SHIFT)
+#define MAX14577_CDETCTRL1_DBEXIT_MASK	BIT(MAX14577_CDETCTRL1_DBEXIT_SHIFT)
+#define MAX77836_CDETCTRL1_DCDCPL_MASK	BIT(MAX77836_CDETCTRL1_DCDCPL_SHIFT)
 #define CDETCTRL1_DBIDLE_MASK		BIT(CDETCTRL1_DBIDLE_SHIFT)
 #define CDETCTRL1_CDPDET_MASK		BIT(CDETCTRL1_CDPDET_SHIFT)
 
-- 
cgit v1.2.3


From 521e8bac67a71a6544274f39d5c61473e0e54ac0 Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Wed, 17 Sep 2014 11:06:16 +0200
Subject: perf/x86/intel/uncore: Update support for client uncore IMC PMU

This patch restructures the memory controller (IMC) uncore PMU support
for client SNB/IVB/HSW processors. The main change is that it can now
cope with more than one PCI device ID per processor model. There are
many flavors of memory controllers for each processor. They have
different PCI device ID, yet they behave the same w.r.t. the memory
controller PMU that we are interested in.

The patch now supports two distinct memory controllers for IVB
processors: one for mobile, one for desktop.

Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20140917090616.GA11281@quad
Cc: ak@linux.intel.com
Cc: kan.liang@intel.com
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c | 69 +++++++++++++++++------
 include/linux/pci_ids.h                           |  1 +
 2 files changed, 52 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
index e0e934c8ee77..3001015b755c 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
@@ -460,6 +460,10 @@ static const struct pci_device_id ivb_uncore_pci_ids[] = {
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IVB_IMC),
 		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
 	},
+	{ /* IMC */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IVB_E3_IMC),
+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+	},
 	{ /* end: all zeroes */ },
 };
 
@@ -486,34 +490,63 @@ static struct pci_driver hsw_uncore_pci_driver = {
 	.id_table	= hsw_uncore_pci_ids,
 };
 
-int snb_uncore_pci_init(void)
+struct imc_uncore_pci_dev {
+	__u32 pci_id;
+	struct pci_driver *driver;
+};
+#define IMC_DEV(a, d) \
+	{ .pci_id = PCI_DEVICE_ID_INTEL_##a, .driver = (d) }
+
+static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = {
+	IMC_DEV(SNB_IMC, &snb_uncore_pci_driver),
+	IMC_DEV(IVB_IMC, &ivb_uncore_pci_driver),    /* 3rd Gen Core processor */
+	IMC_DEV(IVB_E3_IMC, &ivb_uncore_pci_driver), /* Xeon E3-1200 v2/3rd Gen Core processor */
+	IMC_DEV(HSW_IMC, &hsw_uncore_pci_driver),    /* 4th Gen Core Processor */
+	{  /* end marker */ }
+};
+
+
+#define for_each_imc_pci_id(x, t) \
+	for (x = (t); (x)->pci_id; x++)
+
+static struct pci_driver *imc_uncore_find_dev(void)
 {
-	int ret = snb_pci2phy_map_init(PCI_DEVICE_ID_INTEL_SNB_IMC);
-	if (ret)
-		return ret;
-	uncore_pci_uncores = snb_pci_uncores;
-	uncore_pci_driver = &snb_uncore_pci_driver;
-	return 0;
+	const struct imc_uncore_pci_dev *p;
+	int ret;
+
+	for_each_imc_pci_id(p, desktop_imc_pci_ids) {
+		ret = snb_pci2phy_map_init(p->pci_id);
+		if (ret == 0)
+			return p->driver;
+	}
+	return NULL;
 }
 
-int ivb_uncore_pci_init(void)
+static int imc_uncore_pci_init(void)
 {
-	int ret = snb_pci2phy_map_init(PCI_DEVICE_ID_INTEL_IVB_IMC);
-	if (ret)
-		return ret;
+	struct pci_driver *imc_drv = imc_uncore_find_dev();
+
+	if (!imc_drv)
+		return -ENODEV;
+
 	uncore_pci_uncores = snb_pci_uncores;
-	uncore_pci_driver = &ivb_uncore_pci_driver;
+	uncore_pci_driver = imc_drv;
+
 	return 0;
 }
 
+int snb_uncore_pci_init(void)
+{
+	return imc_uncore_pci_init();
+}
+
+int ivb_uncore_pci_init(void)
+{
+	return imc_uncore_pci_init();
+}
 int hsw_uncore_pci_init(void)
 {
-	int ret = snb_pci2phy_map_init(PCI_DEVICE_ID_INTEL_HSW_IMC);
-	if (ret)
-		return ret;
-	uncore_pci_uncores = snb_pci_uncores;
-	uncore_pci_driver = &hsw_uncore_pci_driver;
-	return 0;
+	return imc_uncore_pci_init();
 }
 
 /* end of Sandy Bridge uncore support */
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 6ed0bb73a864..3102b7e3b460 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2536,6 +2536,7 @@
 #define PCI_DEVICE_ID_INTEL_EESSC	0x0008
 #define PCI_DEVICE_ID_INTEL_SNB_IMC	0x0100
 #define PCI_DEVICE_ID_INTEL_IVB_IMC	0x0154
+#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC	0x0150
 #define PCI_DEVICE_ID_INTEL_HSW_IMC	0x0c00
 #define PCI_DEVICE_ID_INTEL_PXHD_0	0x0320
 #define PCI_DEVICE_ID_INTEL_PXHD_1	0x0321
-- 
cgit v1.2.3


From 41f93af900a20d1a0a358b522b5129c89677e9dc Mon Sep 17 00:00:00 2001
From: Sandeep Nair <sandeep_n@ti.com>
Date: Fri, 28 Feb 2014 10:47:50 -0500
Subject: soc: ti: add Keystone Navigator QMSS driver

The QMSS (Queue Manager Sub System) found on Keystone SOCs is one of
the main hardware sub system which forms the backbone of the Keystone
Multi-core Navigator. QMSS consist of queue managers, packed-data structure
processors(PDSP), linking RAM, descriptor pools and infrastructure
Packet DMA.

The Queue Manager is a hardware module that is responsible for accelerating
management of the packet queues. Packets are queued/de-queued by writing or
reading descriptor address to a particular memory mapped location. The PDSPs
perform QMSS related functions like accumulation, QoS, or event management.
Linking RAM registers are used to link the descriptors which are stored in
descriptor RAM. Descriptor RAM is configurable as internal or external memory.

The QMSS driver manages the PDSP setups, linking RAM regions,
queue pool management (allocation, push, pop and notify) and descriptor
pool management. The specifics on the device tree bindings for
QMSS can be found in:
	Documentation/devicetree/bindings/soc/keystone-navigator-qmss.txt

Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Kumar Gala <galak@codeaurora.org>
Cc: Olof Johansson <olof@lixom.net>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Grant Likely <grant.likely@linaro.org>
Cc: Rob Herring <robh+dt@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Sandeep Nair <sandeep_n@ti.com>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
---
 drivers/Kconfig                  |    2 +
 drivers/soc/Kconfig              |    1 +
 drivers/soc/Makefile             |    1 +
 drivers/soc/ti/Kconfig           |   21 +
 drivers/soc/ti/Makefile          |    4 +
 drivers/soc/ti/knav_qmss.h       |  386 ++++++++
 drivers/soc/ti/knav_qmss_acc.c   |  591 +++++++++++++
 drivers/soc/ti/knav_qmss_queue.c | 1816 ++++++++++++++++++++++++++++++++++++++
 include/linux/soc/ti/knav_qmss.h |   90 ++
 9 files changed, 2912 insertions(+)
 create mode 100644 drivers/soc/ti/Kconfig
 create mode 100644 drivers/soc/ti/Makefile
 create mode 100644 drivers/soc/ti/knav_qmss.h
 create mode 100644 drivers/soc/ti/knav_qmss_acc.c
 create mode 100644 drivers/soc/ti/knav_qmss_queue.c
 create mode 100644 include/linux/soc/ti/knav_qmss.h

(limited to 'include/linux')

diff --git a/drivers/Kconfig b/drivers/Kconfig
index 622fa266b29e..1a693d3f9d51 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -148,6 +148,8 @@ source "drivers/remoteproc/Kconfig"
 
 source "drivers/rpmsg/Kconfig"
 
+source "drivers/soc/Kconfig"
+
 source "drivers/devfreq/Kconfig"
 
 source "drivers/extcon/Kconfig"
diff --git a/drivers/soc/Kconfig b/drivers/soc/Kconfig
index c8543855aa82..49e3f0cc71af 100644
--- a/drivers/soc/Kconfig
+++ b/drivers/soc/Kconfig
@@ -1,5 +1,6 @@
 menu "SOC (System On Chip) specific Drivers"
 
 source "drivers/soc/qcom/Kconfig"
+source "drivers/soc/ti/Kconfig"
 
 endmenu
diff --git a/drivers/soc/Makefile b/drivers/soc/Makefile
index 3b1b95d932d1..0d6e35dfea8c 100644
--- a/drivers/soc/Makefile
+++ b/drivers/soc/Makefile
@@ -4,3 +4,4 @@
 
 obj-$(CONFIG_ARCH_QCOM)		+= qcom/
 obj-$(CONFIG_ARCH_TEGRA)	+= tegra/
+obj-$(CONFIG_SOC_TI)		+= ti/
diff --git a/drivers/soc/ti/Kconfig b/drivers/soc/ti/Kconfig
new file mode 100644
index 000000000000..f73896f762e8
--- /dev/null
+++ b/drivers/soc/ti/Kconfig
@@ -0,0 +1,21 @@
+#
+# TI SOC drivers
+#
+menuconfig SOC_TI
+	bool "TI SOC drivers support"
+
+if SOC_TI
+
+config KEYSTONE_NAVIGATOR_QMSS
+	tristate "Keystone Queue Manager Sub System"
+	depends on ARCH_KEYSTONE
+	help
+	  Say y here to support the Keystone multicore Navigator Queue
+	  Manager support. The Queue Manager is a hardware module that
+	  is responsible for accelerating management of the packet queues.
+	  Packets are queued/de-queued by writing/reading descriptor address
+	  to a particular memory mapped location in the Queue Manager module.
+
+	  If unsure, say N.
+
+endif # SOC_TI
diff --git a/drivers/soc/ti/Makefile b/drivers/soc/ti/Makefile
new file mode 100644
index 000000000000..bf85cacd5b85
--- /dev/null
+++ b/drivers/soc/ti/Makefile
@@ -0,0 +1,4 @@
+#
+# TI Keystone SOC drivers
+#
+obj-$(CONFIG_KEYSTONE_NAVIGATOR_QMSS)	+= knav_qmss_queue.o knav_qmss_acc.o
diff --git a/drivers/soc/ti/knav_qmss.h b/drivers/soc/ti/knav_qmss.h
new file mode 100644
index 000000000000..bc9dcc8cc3ce
--- /dev/null
+++ b/drivers/soc/ti/knav_qmss.h
@@ -0,0 +1,386 @@
+/*
+ * Keystone Navigator QMSS driver internal header
+ *
+ * Copyright (C) 2014 Texas Instruments Incorporated - http://www.ti.com
+ * Author:	Sandeep Nair <sandeep_n@ti.com>
+ *		Cyril Chemparathy <cyril@ti.com>
+ *		Santosh Shilimkar <santosh.shilimkar@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#ifndef __KNAV_QMSS_H__
+#define __KNAV_QMSS_H__
+
+#define THRESH_GTE	BIT(7)
+#define THRESH_LT	0
+
+#define PDSP_CTRL_PC_MASK	0xffff0000
+#define PDSP_CTRL_SOFT_RESET	BIT(0)
+#define PDSP_CTRL_ENABLE	BIT(1)
+#define PDSP_CTRL_RUNNING	BIT(15)
+
+#define ACC_MAX_CHANNEL		48
+#define ACC_DEFAULT_PERIOD	25 /* usecs */
+
+#define ACC_CHANNEL_INT_BASE		2
+
+#define ACC_LIST_ENTRY_TYPE		1
+#define ACC_LIST_ENTRY_WORDS		(1 << ACC_LIST_ENTRY_TYPE)
+#define ACC_LIST_ENTRY_QUEUE_IDX	0
+#define ACC_LIST_ENTRY_DESC_IDX	(ACC_LIST_ENTRY_WORDS - 1)
+
+#define ACC_CMD_DISABLE_CHANNEL	0x80
+#define ACC_CMD_ENABLE_CHANNEL	0x81
+#define ACC_CFG_MULTI_QUEUE		BIT(21)
+
+#define ACC_INTD_OFFSET_EOI		(0x0010)
+#define ACC_INTD_OFFSET_COUNT(ch)	(0x0300 + 4 * (ch))
+#define ACC_INTD_OFFSET_STATUS(ch)	(0x0200 + 4 * ((ch) / 32))
+
+#define RANGE_MAX_IRQS			64
+
+#define ACC_DESCS_MAX		SZ_1K
+#define ACC_DESCS_MASK		(ACC_DESCS_MAX - 1)
+#define DESC_SIZE_MASK		0xful
+#define DESC_PTR_MASK		(~DESC_SIZE_MASK)
+
+#define KNAV_NAME_SIZE			32
+
+enum knav_acc_result {
+	ACC_RET_IDLE,
+	ACC_RET_SUCCESS,
+	ACC_RET_INVALID_COMMAND,
+	ACC_RET_INVALID_CHANNEL,
+	ACC_RET_INACTIVE_CHANNEL,
+	ACC_RET_ACTIVE_CHANNEL,
+	ACC_RET_INVALID_QUEUE,
+	ACC_RET_INVALID_RET,
+};
+
+struct knav_reg_config {
+	u32		revision;
+	u32		__pad1;
+	u32		divert;
+	u32		link_ram_base0;
+	u32		link_ram_size0;
+	u32		link_ram_base1;
+	u32		__pad2[2];
+	u32		starvation[0];
+};
+
+struct knav_reg_region {
+	u32		base;
+	u32		start_index;
+	u32		size_count;
+	u32		__pad;
+};
+
+struct knav_reg_pdsp_regs {
+	u32		control;
+	u32		status;
+	u32		cycle_count;
+	u32		stall_count;
+};
+
+struct knav_reg_acc_command {
+	u32		command;
+	u32		queue_mask;
+	u32		list_phys;
+	u32		queue_num;
+	u32		timer_config;
+};
+
+struct knav_link_ram_block {
+	dma_addr_t	 phys;
+	void		*virt;
+	size_t		 size;
+};
+
+struct knav_acc_info {
+	u32			 pdsp_id;
+	u32			 start_channel;
+	u32			 list_entries;
+	u32			 pacing_mode;
+	u32			 timer_count;
+	int			 mem_size;
+	int			 list_size;
+	struct knav_pdsp_info	*pdsp;
+};
+
+struct knav_acc_channel {
+	u32			channel;
+	u32			list_index;
+	u32			open_mask;
+	u32			*list_cpu[2];
+	dma_addr_t		list_dma[2];
+	char			name[KNAV_NAME_SIZE];
+	atomic_t		retrigger_count;
+};
+
+struct knav_pdsp_info {
+	const char					*name;
+	struct knav_reg_pdsp_regs  __iomem		*regs;
+	union {
+		void __iomem				*command;
+		struct knav_reg_acc_command __iomem	*acc_command;
+		u32 __iomem				*qos_command;
+	};
+	void __iomem					*intd;
+	u32 __iomem					*iram;
+	const char					*firmware;
+	u32						id;
+	struct list_head				list;
+};
+
+struct knav_qmgr_info {
+	unsigned			start_queue;
+	unsigned			num_queues;
+	struct knav_reg_config __iomem	*reg_config;
+	struct knav_reg_region __iomem	*reg_region;
+	struct knav_reg_queue __iomem	*reg_push, *reg_pop, *reg_peek;
+	void __iomem			*reg_status;
+	struct list_head		list;
+};
+
+#define KNAV_NUM_LINKRAM	2
+
+/**
+ * struct knav_queue_stats:	queue statistics
+ * pushes:			number of push operations
+ * pops:			number of pop operations
+ * push_errors:			number of push errors
+ * pop_errors:			number of pop errors
+ * notifies:			notifier counts
+ */
+struct knav_queue_stats {
+	atomic_t	 pushes;
+	atomic_t	 pops;
+	atomic_t	 push_errors;
+	atomic_t	 pop_errors;
+	atomic_t	 notifies;
+};
+
+/**
+ * struct knav_reg_queue:	queue registers
+ * @entry_count:		valid entries in the queue
+ * @byte_count:			total byte count in thhe queue
+ * @packet_size:		packet size for the queue
+ * @ptr_size_thresh:		packet pointer size threshold
+ */
+struct knav_reg_queue {
+	u32		entry_count;
+	u32		byte_count;
+	u32		packet_size;
+	u32		ptr_size_thresh;
+};
+
+/**
+ * struct knav_region:		qmss region info
+ * @dma_start, dma_end:		start and end dma address
+ * @virt_start, virt_end:	start and end virtual address
+ * @desc_size:			descriptor size
+ * @used_desc:			consumed descriptors
+ * @id:				region number
+ * @num_desc:			total descriptors
+ * @link_index:			index of the first descriptor
+ * @name:			region name
+ * @list:			instance in the device's region list
+ * @pools:			list of descriptor pools in the region
+ */
+struct knav_region {
+	dma_addr_t		dma_start, dma_end;
+	void			*virt_start, *virt_end;
+	unsigned		desc_size;
+	unsigned		used_desc;
+	unsigned		id;
+	unsigned		num_desc;
+	unsigned		link_index;
+	const char		*name;
+	struct list_head	list;
+	struct list_head	pools;
+};
+
+/**
+ * struct knav_pool:		qmss pools
+ * @dev:			device pointer
+ * @region:			qmss region info
+ * @queue:			queue registers
+ * @kdev:			qmss device pointer
+ * @region_offset:		offset from the base
+ * @num_desc:			total descriptors
+ * @desc_size:			descriptor size
+ * @region_id:			region number
+ * @name:			pool name
+ * @list:			list head
+ * @region_inst:		instance in the region's pool list
+ */
+struct knav_pool {
+	struct device			*dev;
+	struct knav_region		*region;
+	struct knav_queue		*queue;
+	struct knav_device		*kdev;
+	int				region_offset;
+	int				num_desc;
+	int				desc_size;
+	int				region_id;
+	const char			*name;
+	struct list_head		list;
+	struct list_head		region_inst;
+};
+
+/**
+ * struct knav_queue_inst:		qmss queue instace properties
+ * @descs:				descriptor pointer
+ * @desc_head, desc_tail, desc_count:	descriptor counters
+ * @acc:				accumulator channel pointer
+ * @kdev:				qmss device pointer
+ * @range:				range info
+ * @qmgr:				queue manager info
+ * @id:					queue instace id
+ * @irq_num:				irq line number
+ * @notify_needed:			notifier needed based on queue type
+ * @num_notifiers:			total notifiers
+ * @handles:				list head
+ * @name:				queue instance name
+ * @irq_name:				irq line name
+ */
+struct knav_queue_inst {
+	u32				*descs;
+	atomic_t			desc_head, desc_tail, desc_count;
+	struct knav_acc_channel	*acc;
+	struct knav_device		*kdev;
+	struct knav_range_info		*range;
+	struct knav_qmgr_info		*qmgr;
+	u32				id;
+	int				irq_num;
+	int				notify_needed;
+	atomic_t			num_notifiers;
+	struct list_head		handles;
+	const char			*name;
+	const char			*irq_name;
+};
+
+/**
+ * struct knav_queue:			qmss queue properties
+ * @reg_push, reg_pop, reg_peek:	push, pop queue registers
+ * @inst:				qmss queue instace properties
+ * @notifier_fn:			notifier function
+ * @notifier_fn_arg:			notifier function argument
+ * @notifier_enabled:			notier enabled for a give queue
+ * @rcu:				rcu head
+ * @flags:				queue flags
+ * @list:				list head
+ */
+struct knav_queue {
+	struct knav_reg_queue __iomem	*reg_push, *reg_pop, *reg_peek;
+	struct knav_queue_inst		*inst;
+	struct knav_queue_stats	stats;
+	knav_queue_notify_fn		notifier_fn;
+	void				*notifier_fn_arg;
+	atomic_t			notifier_enabled;
+	struct rcu_head			rcu;
+	unsigned			flags;
+	struct list_head		list;
+};
+
+struct knav_device {
+	struct device				*dev;
+	unsigned				base_id;
+	unsigned				num_queues;
+	unsigned				num_queues_in_use;
+	unsigned				inst_shift;
+	struct knav_link_ram_block		link_rams[KNAV_NUM_LINKRAM];
+	void					*instances;
+	struct list_head			regions;
+	struct list_head			queue_ranges;
+	struct list_head			pools;
+	struct list_head			pdsps;
+	struct list_head			qmgrs;
+};
+
+struct knav_range_ops {
+	int	(*init_range)(struct knav_range_info *range);
+	int	(*free_range)(struct knav_range_info *range);
+	int	(*init_queue)(struct knav_range_info *range,
+			      struct knav_queue_inst *inst);
+	int	(*open_queue)(struct knav_range_info *range,
+			      struct knav_queue_inst *inst, unsigned flags);
+	int	(*close_queue)(struct knav_range_info *range,
+			       struct knav_queue_inst *inst);
+	int	(*set_notify)(struct knav_range_info *range,
+			      struct knav_queue_inst *inst, bool enabled);
+};
+
+struct knav_irq_info {
+	int	irq;
+	u32	cpu_map;
+};
+
+struct knav_range_info {
+	const char			*name;
+	struct knav_device		*kdev;
+	unsigned			queue_base;
+	unsigned			num_queues;
+	void				*queue_base_inst;
+	unsigned			flags;
+	struct list_head		list;
+	struct knav_range_ops		*ops;
+	struct knav_acc_info		acc_info;
+	struct knav_acc_channel	*acc;
+	unsigned			num_irqs;
+	struct knav_irq_info		irqs[RANGE_MAX_IRQS];
+};
+
+#define RANGE_RESERVED		BIT(0)
+#define RANGE_HAS_IRQ		BIT(1)
+#define RANGE_HAS_ACCUMULATOR	BIT(2)
+#define RANGE_MULTI_QUEUE	BIT(3)
+
+#define for_each_region(kdev, region)				\
+	list_for_each_entry(region, &kdev->regions, list)
+
+#define first_region(kdev)					\
+	list_first_entry(&kdev->regions, \
+			struct knav_region, list)
+
+#define for_each_queue_range(kdev, range)			\
+	list_for_each_entry(range, &kdev->queue_ranges, list)
+
+#define first_queue_range(kdev)					\
+	list_first_entry(&kdev->queue_ranges, \
+			struct knav_range_info, list)
+
+#define for_each_pool(kdev, pool)				\
+	list_for_each_entry(pool, &kdev->pools, list)
+
+#define for_each_pdsp(kdev, pdsp)				\
+	list_for_each_entry(pdsp, &kdev->pdsps, list)
+
+#define for_each_qmgr(kdev, qmgr)				\
+	list_for_each_entry(qmgr, &kdev->qmgrs, list)
+
+static inline struct knav_pdsp_info *
+knav_find_pdsp(struct knav_device *kdev, unsigned pdsp_id)
+{
+	struct knav_pdsp_info *pdsp;
+
+	for_each_pdsp(kdev, pdsp)
+		if (pdsp_id == pdsp->id)
+			return pdsp;
+	return NULL;
+}
+
+extern int knav_init_acc_range(struct knav_device *kdev,
+					struct device_node *node,
+					struct knav_range_info *range);
+extern void knav_queue_notify(struct knav_queue_inst *inst);
+
+#endif /* __KNAV_QMSS_H__ */
diff --git a/drivers/soc/ti/knav_qmss_acc.c b/drivers/soc/ti/knav_qmss_acc.c
new file mode 100644
index 000000000000..6fbfde6e748f
--- /dev/null
+++ b/drivers/soc/ti/knav_qmss_acc.c
@@ -0,0 +1,591 @@
+/*
+ * Keystone accumulator queue manager
+ *
+ * Copyright (C) 2014 Texas Instruments Incorporated - http://www.ti.com
+ * Author:	Sandeep Nair <sandeep_n@ti.com>
+ *		Cyril Chemparathy <cyril@ti.com>
+ *		Santosh Shilimkar <santosh.shilimkar@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/bitops.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/soc/ti/knav_qmss.h>
+#include <linux/platform_device.h>
+#include <linux/dma-mapping.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_address.h>
+#include <linux/firmware.h>
+
+#include "knav_qmss.h"
+
+#define knav_range_offset_to_inst(kdev, range, q)	\
+	(range->queue_base_inst + (q << kdev->inst_shift))
+
+static void __knav_acc_notify(struct knav_range_info *range,
+				struct knav_acc_channel *acc)
+{
+	struct knav_device *kdev = range->kdev;
+	struct knav_queue_inst *inst;
+	int range_base, queue;
+
+	range_base = kdev->base_id + range->queue_base;
+
+	if (range->flags & RANGE_MULTI_QUEUE) {
+		for (queue = 0; queue < range->num_queues; queue++) {
+			inst = knav_range_offset_to_inst(kdev, range,
+								queue);
+			if (inst->notify_needed) {
+				inst->notify_needed = 0;
+				dev_dbg(kdev->dev, "acc-irq: notifying %d\n",
+					range_base + queue);
+				knav_queue_notify(inst);
+			}
+		}
+	} else {
+		queue = acc->channel - range->acc_info.start_channel;
+		inst = knav_range_offset_to_inst(kdev, range, queue);
+		dev_dbg(kdev->dev, "acc-irq: notifying %d\n",
+			range_base + queue);
+		knav_queue_notify(inst);
+	}
+}
+
+static int knav_acc_set_notify(struct knav_range_info *range,
+				struct knav_queue_inst *kq,
+				bool enabled)
+{
+	struct knav_pdsp_info *pdsp = range->acc_info.pdsp;
+	struct knav_device *kdev = range->kdev;
+	u32 mask, offset;
+
+	/*
+	 * when enabling, we need to re-trigger an interrupt if we
+	 * have descriptors pending
+	 */
+	if (!enabled || atomic_read(&kq->desc_count) <= 0)
+		return 0;
+
+	kq->notify_needed = 1;
+	atomic_inc(&kq->acc->retrigger_count);
+	mask = BIT(kq->acc->channel % 32);
+	offset = ACC_INTD_OFFSET_STATUS(kq->acc->channel);
+	dev_dbg(kdev->dev, "setup-notify: re-triggering irq for %s\n",
+		kq->acc->name);
+	writel_relaxed(mask, pdsp->intd + offset);
+	return 0;
+}
+
+static irqreturn_t knav_acc_int_handler(int irq, void *_instdata)
+{
+	struct knav_acc_channel *acc;
+	struct knav_queue_inst *kq = NULL;
+	struct knav_range_info *range;
+	struct knav_pdsp_info *pdsp;
+	struct knav_acc_info *info;
+	struct knav_device *kdev;
+
+	u32 *list, *list_cpu, val, idx, notifies;
+	int range_base, channel, queue = 0;
+	dma_addr_t list_dma;
+
+	range = _instdata;
+	info  = &range->acc_info;
+	kdev  = range->kdev;
+	pdsp  = range->acc_info.pdsp;
+	acc   = range->acc;
+
+	range_base = kdev->base_id + range->queue_base;
+	if ((range->flags & RANGE_MULTI_QUEUE) == 0) {
+		for (queue = 0; queue < range->num_irqs; queue++)
+			if (range->irqs[queue].irq == irq)
+				break;
+		kq = knav_range_offset_to_inst(kdev, range, queue);
+		acc += queue;
+	}
+
+	channel = acc->channel;
+	list_dma = acc->list_dma[acc->list_index];
+	list_cpu = acc->list_cpu[acc->list_index];
+	dev_dbg(kdev->dev, "acc-irq: channel %d, list %d, virt %p, phys %x\n",
+		channel, acc->list_index, list_cpu, list_dma);
+	if (atomic_read(&acc->retrigger_count)) {
+		atomic_dec(&acc->retrigger_count);
+		__knav_acc_notify(range, acc);
+		writel_relaxed(1, pdsp->intd + ACC_INTD_OFFSET_COUNT(channel));
+		/* ack the interrupt */
+		writel_relaxed(ACC_CHANNEL_INT_BASE + channel,
+			       pdsp->intd + ACC_INTD_OFFSET_EOI);
+
+		return IRQ_HANDLED;
+	}
+
+	notifies = readl_relaxed(pdsp->intd + ACC_INTD_OFFSET_COUNT(channel));
+	WARN_ON(!notifies);
+	dma_sync_single_for_cpu(kdev->dev, list_dma, info->list_size,
+				DMA_FROM_DEVICE);
+
+	for (list = list_cpu; list < list_cpu + (info->list_size / sizeof(u32));
+	     list += ACC_LIST_ENTRY_WORDS) {
+		if (ACC_LIST_ENTRY_WORDS == 1) {
+			dev_dbg(kdev->dev,
+				"acc-irq: list %d, entry @%p, %08x\n",
+				acc->list_index, list, list[0]);
+		} else if (ACC_LIST_ENTRY_WORDS == 2) {
+			dev_dbg(kdev->dev,
+				"acc-irq: list %d, entry @%p, %08x %08x\n",
+				acc->list_index, list, list[0], list[1]);
+		} else if (ACC_LIST_ENTRY_WORDS == 4) {
+			dev_dbg(kdev->dev,
+				"acc-irq: list %d, entry @%p, %08x %08x %08x %08x\n",
+				acc->list_index, list, list[0], list[1],
+				list[2], list[3]);
+		}
+
+		val = list[ACC_LIST_ENTRY_DESC_IDX];
+		if (!val)
+			break;
+
+		if (range->flags & RANGE_MULTI_QUEUE) {
+			queue = list[ACC_LIST_ENTRY_QUEUE_IDX] >> 16;
+			if (queue < range_base ||
+			    queue >= range_base + range->num_queues) {
+				dev_err(kdev->dev,
+					"bad queue %d, expecting %d-%d\n",
+					queue, range_base,
+					range_base + range->num_queues);
+				break;
+			}
+			queue -= range_base;
+			kq = knav_range_offset_to_inst(kdev, range,
+								queue);
+		}
+
+		if (atomic_inc_return(&kq->desc_count) >= ACC_DESCS_MAX) {
+			atomic_dec(&kq->desc_count);
+			dev_err(kdev->dev,
+				"acc-irq: queue %d full, entry dropped\n",
+				queue + range_base);
+			continue;
+		}
+
+		idx = atomic_inc_return(&kq->desc_tail) & ACC_DESCS_MASK;
+		kq->descs[idx] = val;
+		kq->notify_needed = 1;
+		dev_dbg(kdev->dev, "acc-irq: enqueue %08x at %d, queue %d\n",
+			val, idx, queue + range_base);
+	}
+
+	__knav_acc_notify(range, acc);
+	memset(list_cpu, 0, info->list_size);
+	dma_sync_single_for_device(kdev->dev, list_dma, info->list_size,
+				   DMA_TO_DEVICE);
+
+	/* flip to the other list */
+	acc->list_index ^= 1;
+
+	/* reset the interrupt counter */
+	writel_relaxed(1, pdsp->intd + ACC_INTD_OFFSET_COUNT(channel));
+
+	/* ack the interrupt */
+	writel_relaxed(ACC_CHANNEL_INT_BASE + channel,
+		       pdsp->intd + ACC_INTD_OFFSET_EOI);
+
+	return IRQ_HANDLED;
+}
+
+int knav_range_setup_acc_irq(struct knav_range_info *range,
+				int queue, bool enabled)
+{
+	struct knav_device *kdev = range->kdev;
+	struct knav_acc_channel *acc;
+	unsigned long cpu_map;
+	int ret = 0, irq;
+	u32 old, new;
+
+	if (range->flags & RANGE_MULTI_QUEUE) {
+		acc = range->acc;
+		irq = range->irqs[0].irq;
+		cpu_map = range->irqs[0].cpu_map;
+	} else {
+		acc = range->acc + queue;
+		irq = range->irqs[queue].irq;
+		cpu_map = range->irqs[queue].cpu_map;
+	}
+
+	old = acc->open_mask;
+	if (enabled)
+		new = old | BIT(queue);
+	else
+		new = old & ~BIT(queue);
+	acc->open_mask = new;
+
+	dev_dbg(kdev->dev,
+		"setup-acc-irq: open mask old %08x, new %08x, channel %s\n",
+		old, new, acc->name);
+
+	if (likely(new == old))
+		return 0;
+
+	if (new && !old) {
+		dev_dbg(kdev->dev,
+			"setup-acc-irq: requesting %s for channel %s\n",
+			acc->name, acc->name);
+		ret = request_irq(irq, knav_acc_int_handler, 0, acc->name,
+				  range);
+		if (!ret && cpu_map) {
+			ret = irq_set_affinity_hint(irq, to_cpumask(&cpu_map));
+			if (ret) {
+				dev_warn(range->kdev->dev,
+					 "Failed to set IRQ affinity\n");
+				return ret;
+			}
+		}
+	}
+
+	if (old && !new) {
+		dev_dbg(kdev->dev, "setup-acc-irq: freeing %s for channel %s\n",
+			acc->name, acc->name);
+		free_irq(irq, range);
+	}
+
+	return ret;
+}
+
+static const char *knav_acc_result_str(enum knav_acc_result result)
+{
+	static const char * const result_str[] = {
+		[ACC_RET_IDLE]			= "idle",
+		[ACC_RET_SUCCESS]		= "success",
+		[ACC_RET_INVALID_COMMAND]	= "invalid command",
+		[ACC_RET_INVALID_CHANNEL]	= "invalid channel",
+		[ACC_RET_INACTIVE_CHANNEL]	= "inactive channel",
+		[ACC_RET_ACTIVE_CHANNEL]	= "active channel",
+		[ACC_RET_INVALID_QUEUE]		= "invalid queue",
+		[ACC_RET_INVALID_RET]		= "invalid return code",
+	};
+
+	if (result >= ARRAY_SIZE(result_str))
+		return result_str[ACC_RET_INVALID_RET];
+	else
+		return result_str[result];
+}
+
+static enum knav_acc_result
+knav_acc_write(struct knav_device *kdev, struct knav_pdsp_info *pdsp,
+		struct knav_reg_acc_command *cmd)
+{
+	u32 result;
+
+	dev_dbg(kdev->dev, "acc command %08x %08x %08x %08x %08x\n",
+		cmd->command, cmd->queue_mask, cmd->list_phys,
+		cmd->queue_num, cmd->timer_config);
+
+	writel_relaxed(cmd->timer_config, &pdsp->acc_command->timer_config);
+	writel_relaxed(cmd->queue_num, &pdsp->acc_command->queue_num);
+	writel_relaxed(cmd->list_phys, &pdsp->acc_command->list_phys);
+	writel_relaxed(cmd->queue_mask, &pdsp->acc_command->queue_mask);
+	writel_relaxed(cmd->command, &pdsp->acc_command->command);
+
+	/* wait for the command to clear */
+	do {
+		result = readl_relaxed(&pdsp->acc_command->command);
+	} while ((result >> 8) & 0xff);
+
+	return (result >> 24) & 0xff;
+}
+
+static void knav_acc_setup_cmd(struct knav_device *kdev,
+				struct knav_range_info *range,
+				struct knav_reg_acc_command *cmd,
+				int queue)
+{
+	struct knav_acc_info *info = &range->acc_info;
+	struct knav_acc_channel *acc;
+	int queue_base;
+	u32 queue_mask;
+
+	if (range->flags & RANGE_MULTI_QUEUE) {
+		acc = range->acc;
+		queue_base = range->queue_base;
+		queue_mask = BIT(range->num_queues) - 1;
+	} else {
+		acc = range->acc + queue;
+		queue_base = range->queue_base + queue;
+		queue_mask = 0;
+	}
+
+	memset(cmd, 0, sizeof(*cmd));
+	cmd->command    = acc->channel;
+	cmd->queue_mask = queue_mask;
+	cmd->list_phys  = acc->list_dma[0];
+	cmd->queue_num  = info->list_entries << 16;
+	cmd->queue_num |= queue_base;
+
+	cmd->timer_config = ACC_LIST_ENTRY_TYPE << 18;
+	if (range->flags & RANGE_MULTI_QUEUE)
+		cmd->timer_config |= ACC_CFG_MULTI_QUEUE;
+	cmd->timer_config |= info->pacing_mode << 16;
+	cmd->timer_config |= info->timer_count;
+}
+
+static void knav_acc_stop(struct knav_device *kdev,
+				struct knav_range_info *range,
+				int queue)
+{
+	struct knav_reg_acc_command cmd;
+	struct knav_acc_channel *acc;
+	enum knav_acc_result result;
+
+	acc = range->acc + queue;
+
+	knav_acc_setup_cmd(kdev, range, &cmd, queue);
+	cmd.command |= ACC_CMD_DISABLE_CHANNEL << 8;
+	result = knav_acc_write(kdev, range->acc_info.pdsp, &cmd);
+
+	dev_dbg(kdev->dev, "stopped acc channel %s, result %s\n",
+		acc->name, knav_acc_result_str(result));
+}
+
+static enum knav_acc_result knav_acc_start(struct knav_device *kdev,
+						struct knav_range_info *range,
+						int queue)
+{
+	struct knav_reg_acc_command cmd;
+	struct knav_acc_channel *acc;
+	enum knav_acc_result result;
+
+	acc = range->acc + queue;
+
+	knav_acc_setup_cmd(kdev, range, &cmd, queue);
+	cmd.command |= ACC_CMD_ENABLE_CHANNEL << 8;
+	result = knav_acc_write(kdev, range->acc_info.pdsp, &cmd);
+
+	dev_dbg(kdev->dev, "started acc channel %s, result %s\n",
+		acc->name, knav_acc_result_str(result));
+
+	return result;
+}
+
+static int knav_acc_init_range(struct knav_range_info *range)
+{
+	struct knav_device *kdev = range->kdev;
+	struct knav_acc_channel *acc;
+	enum knav_acc_result result;
+	int queue;
+
+	for (queue = 0; queue < range->num_queues; queue++) {
+		acc = range->acc + queue;
+
+		knav_acc_stop(kdev, range, queue);
+		acc->list_index = 0;
+		result = knav_acc_start(kdev, range, queue);
+
+		if (result != ACC_RET_SUCCESS)
+			return -EIO;
+
+		if (range->flags & RANGE_MULTI_QUEUE)
+			return 0;
+	}
+	return 0;
+}
+
+static int knav_acc_init_queue(struct knav_range_info *range,
+				struct knav_queue_inst *kq)
+{
+	unsigned id = kq->id - range->queue_base;
+
+	kq->descs = devm_kzalloc(range->kdev->dev,
+				 ACC_DESCS_MAX * sizeof(u32), GFP_KERNEL);
+	if (!kq->descs)
+		return -ENOMEM;
+
+	kq->acc = range->acc;
+	if ((range->flags & RANGE_MULTI_QUEUE) == 0)
+		kq->acc += id;
+	return 0;
+}
+
+static int knav_acc_open_queue(struct knav_range_info *range,
+				struct knav_queue_inst *inst, unsigned flags)
+{
+	unsigned id = inst->id - range->queue_base;
+
+	return knav_range_setup_acc_irq(range, id, true);
+}
+
+static int knav_acc_close_queue(struct knav_range_info *range,
+					struct knav_queue_inst *inst)
+{
+	unsigned id = inst->id - range->queue_base;
+
+	return knav_range_setup_acc_irq(range, id, false);
+}
+
+static int knav_acc_free_range(struct knav_range_info *range)
+{
+	struct knav_device *kdev = range->kdev;
+	struct knav_acc_channel *acc;
+	struct knav_acc_info *info;
+	int channel, channels;
+
+	info = &range->acc_info;
+
+	if (range->flags & RANGE_MULTI_QUEUE)
+		channels = 1;
+	else
+		channels = range->num_queues;
+
+	for (channel = 0; channel < channels; channel++) {
+		acc = range->acc + channel;
+		if (!acc->list_cpu[0])
+			continue;
+		dma_unmap_single(kdev->dev, acc->list_dma[0],
+				 info->mem_size, DMA_BIDIRECTIONAL);
+		free_pages_exact(acc->list_cpu[0], info->mem_size);
+	}
+	devm_kfree(range->kdev->dev, range->acc);
+	return 0;
+}
+
+struct knav_range_ops knav_acc_range_ops = {
+	.set_notify	= knav_acc_set_notify,
+	.init_queue	= knav_acc_init_queue,
+	.open_queue	= knav_acc_open_queue,
+	.close_queue	= knav_acc_close_queue,
+	.init_range	= knav_acc_init_range,
+	.free_range	= knav_acc_free_range,
+};
+
+/**
+ * knav_init_acc_range: Initialise accumulator ranges
+ *
+ * @kdev:		qmss device
+ * @node:		device node
+ * @range:		qmms range information
+ *
+ * Return 0 on success or error
+ */
+int knav_init_acc_range(struct knav_device *kdev,
+				struct device_node *node,
+				struct knav_range_info *range)
+{
+	struct knav_acc_channel *acc;
+	struct knav_pdsp_info *pdsp;
+	struct knav_acc_info *info;
+	int ret, channel, channels;
+	int list_size, mem_size;
+	dma_addr_t list_dma;
+	void *list_mem;
+	u32 config[5];
+
+	range->flags |= RANGE_HAS_ACCUMULATOR;
+	info = &range->acc_info;
+
+	ret = of_property_read_u32_array(node, "accumulator", config, 5);
+	if (ret)
+		return ret;
+
+	info->pdsp_id		= config[0];
+	info->start_channel	= config[1];
+	info->list_entries	= config[2];
+	info->pacing_mode	= config[3];
+	info->timer_count	= config[4] / ACC_DEFAULT_PERIOD;
+
+	if (info->start_channel > ACC_MAX_CHANNEL) {
+		dev_err(kdev->dev, "channel %d invalid for range %s\n",
+			info->start_channel, range->name);
+		return -EINVAL;
+	}
+
+	if (info->pacing_mode > 3) {
+		dev_err(kdev->dev, "pacing mode %d invalid for range %s\n",
+			info->pacing_mode, range->name);
+		return -EINVAL;
+	}
+
+	pdsp = knav_find_pdsp(kdev, info->pdsp_id);
+	if (!pdsp) {
+		dev_err(kdev->dev, "pdsp id %d not found for range %s\n",
+			info->pdsp_id, range->name);
+		return -EINVAL;
+	}
+
+	info->pdsp = pdsp;
+	channels = range->num_queues;
+	if (of_get_property(node, "multi-queue", NULL)) {
+		range->flags |= RANGE_MULTI_QUEUE;
+		channels = 1;
+		if (range->queue_base & (32 - 1)) {
+			dev_err(kdev->dev,
+				"misaligned multi-queue accumulator range %s\n",
+				range->name);
+			return -EINVAL;
+		}
+		if (range->num_queues > 32) {
+			dev_err(kdev->dev,
+				"too many queues in accumulator range %s\n",
+				range->name);
+			return -EINVAL;
+		}
+	}
+
+	/* figure out list size */
+	list_size  = info->list_entries;
+	list_size *= ACC_LIST_ENTRY_WORDS * sizeof(u32);
+	info->list_size = list_size;
+	mem_size   = PAGE_ALIGN(list_size * 2);
+	info->mem_size  = mem_size;
+	range->acc = devm_kzalloc(kdev->dev, channels * sizeof(*range->acc),
+				  GFP_KERNEL);
+	if (!range->acc)
+		return -ENOMEM;
+
+	for (channel = 0; channel < channels; channel++) {
+		acc = range->acc + channel;
+		acc->channel = info->start_channel + channel;
+
+		/* allocate memory for the two lists */
+		list_mem = alloc_pages_exact(mem_size, GFP_KERNEL | GFP_DMA);
+		if (!list_mem)
+			return -ENOMEM;
+
+		list_dma = dma_map_single(kdev->dev, list_mem, mem_size,
+					  DMA_BIDIRECTIONAL);
+		if (dma_mapping_error(kdev->dev, list_dma)) {
+			free_pages_exact(list_mem, mem_size);
+			return -ENOMEM;
+		}
+
+		memset(list_mem, 0, mem_size);
+		dma_sync_single_for_device(kdev->dev, list_dma, mem_size,
+					   DMA_TO_DEVICE);
+		scnprintf(acc->name, sizeof(acc->name), "hwqueue-acc-%d",
+			  acc->channel);
+		acc->list_cpu[0] = list_mem;
+		acc->list_cpu[1] = list_mem + list_size;
+		acc->list_dma[0] = list_dma;
+		acc->list_dma[1] = list_dma + list_size;
+		dev_dbg(kdev->dev, "%s: channel %d, phys %08x, virt %8p\n",
+			acc->name, acc->channel, list_dma, list_mem);
+	}
+
+	range->ops = &knav_acc_range_ops;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(knav_init_acc_range);
diff --git a/drivers/soc/ti/knav_qmss_queue.c b/drivers/soc/ti/knav_qmss_queue.c
new file mode 100644
index 000000000000..0a2c8634c48b
--- /dev/null
+++ b/drivers/soc/ti/knav_qmss_queue.c
@@ -0,0 +1,1816 @@
+/*
+ * Keystone Queue Manager subsystem driver
+ *
+ * Copyright (C) 2014 Texas Instruments Incorporated - http://www.ti.com
+ * Authors:	Sandeep Nair <sandeep_n@ti.com>
+ *		Cyril Chemparathy <cyril@ti.com>
+ *		Santosh Shilimkar <santosh.shilimkar@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/clk.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/bitops.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/platform_device.h>
+#include <linux/dma-mapping.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/of_device.h>
+#include <linux/of_address.h>
+#include <linux/pm_runtime.h>
+#include <linux/firmware.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/string.h>
+#include <linux/soc/ti/knav_qmss.h>
+
+#include "knav_qmss.h"
+
+static struct knav_device *kdev;
+static DEFINE_MUTEX(knav_dev_lock);
+
+/* Queue manager register indices in DTS */
+#define KNAV_QUEUE_PEEK_REG_INDEX	0
+#define KNAV_QUEUE_STATUS_REG_INDEX	1
+#define KNAV_QUEUE_CONFIG_REG_INDEX	2
+#define KNAV_QUEUE_REGION_REG_INDEX	3
+#define KNAV_QUEUE_PUSH_REG_INDEX	4
+#define KNAV_QUEUE_POP_REG_INDEX	5
+
+/* PDSP register indices in DTS */
+#define KNAV_QUEUE_PDSP_IRAM_REG_INDEX	0
+#define KNAV_QUEUE_PDSP_REGS_REG_INDEX	1
+#define KNAV_QUEUE_PDSP_INTD_REG_INDEX	2
+#define KNAV_QUEUE_PDSP_CMD_REG_INDEX	3
+
+#define knav_queue_idx_to_inst(kdev, idx)			\
+	(kdev->instances + (idx << kdev->inst_shift))
+
+#define for_each_handle_rcu(qh, inst)			\
+	list_for_each_entry_rcu(qh, &inst->handles, list)
+
+#define for_each_instance(idx, inst, kdev)		\
+	for (idx = 0, inst = kdev->instances;		\
+	     idx < (kdev)->num_queues_in_use;			\
+	     idx++, inst = knav_queue_idx_to_inst(kdev, idx))
+
+/**
+ * knav_queue_notify: qmss queue notfier call
+ *
+ * @inst:		qmss queue instance like accumulator
+ */
+void knav_queue_notify(struct knav_queue_inst *inst)
+{
+	struct knav_queue *qh;
+
+	if (!inst)
+		return;
+
+	rcu_read_lock();
+	for_each_handle_rcu(qh, inst) {
+		if (atomic_read(&qh->notifier_enabled) <= 0)
+			continue;
+		if (WARN_ON(!qh->notifier_fn))
+			continue;
+		atomic_inc(&qh->stats.notifies);
+		qh->notifier_fn(qh->notifier_fn_arg);
+	}
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(knav_queue_notify);
+
+static irqreturn_t knav_queue_int_handler(int irq, void *_instdata)
+{
+	struct knav_queue_inst *inst = _instdata;
+
+	knav_queue_notify(inst);
+	return IRQ_HANDLED;
+}
+
+static int knav_queue_setup_irq(struct knav_range_info *range,
+			  struct knav_queue_inst *inst)
+{
+	unsigned queue = inst->id - range->queue_base;
+	unsigned long cpu_map;
+	int ret = 0, irq;
+
+	if (range->flags & RANGE_HAS_IRQ) {
+		irq = range->irqs[queue].irq;
+		cpu_map = range->irqs[queue].cpu_map;
+		ret = request_irq(irq, knav_queue_int_handler, 0,
+					inst->irq_name, inst);
+		if (ret)
+			return ret;
+		disable_irq(irq);
+		if (cpu_map) {
+			ret = irq_set_affinity_hint(irq, to_cpumask(&cpu_map));
+			if (ret) {
+				dev_warn(range->kdev->dev,
+					 "Failed to set IRQ affinity\n");
+				return ret;
+			}
+		}
+	}
+	return ret;
+}
+
+static void knav_queue_free_irq(struct knav_queue_inst *inst)
+{
+	struct knav_range_info *range = inst->range;
+	unsigned queue = inst->id - inst->range->queue_base;
+	int irq;
+
+	if (range->flags & RANGE_HAS_IRQ) {
+		irq = range->irqs[queue].irq;
+		irq_set_affinity_hint(irq, NULL);
+		free_irq(irq, inst);
+	}
+}
+
+static inline bool knav_queue_is_busy(struct knav_queue_inst *inst)
+{
+	return !list_empty(&inst->handles);
+}
+
+static inline bool knav_queue_is_reserved(struct knav_queue_inst *inst)
+{
+	return inst->range->flags & RANGE_RESERVED;
+}
+
+static inline bool knav_queue_is_shared(struct knav_queue_inst *inst)
+{
+	struct knav_queue *tmp;
+
+	rcu_read_lock();
+	for_each_handle_rcu(tmp, inst) {
+		if (tmp->flags & KNAV_QUEUE_SHARED) {
+			rcu_read_unlock();
+			return true;
+		}
+	}
+	rcu_read_unlock();
+	return false;
+}
+
+static inline bool knav_queue_match_type(struct knav_queue_inst *inst,
+						unsigned type)
+{
+	if ((type == KNAV_QUEUE_QPEND) &&
+	    (inst->range->flags & RANGE_HAS_IRQ)) {
+		return true;
+	} else if ((type == KNAV_QUEUE_ACC) &&
+		(inst->range->flags & RANGE_HAS_ACCUMULATOR)) {
+		return true;
+	} else if ((type == KNAV_QUEUE_GP) &&
+		!(inst->range->flags &
+			(RANGE_HAS_ACCUMULATOR | RANGE_HAS_IRQ))) {
+		return true;
+	}
+	return false;
+}
+
+static inline struct knav_queue_inst *
+knav_queue_match_id_to_inst(struct knav_device *kdev, unsigned id)
+{
+	struct knav_queue_inst *inst;
+	int idx;
+
+	for_each_instance(idx, inst, kdev) {
+		if (inst->id == id)
+			return inst;
+	}
+	return NULL;
+}
+
+static inline struct knav_queue_inst *knav_queue_find_by_id(int id)
+{
+	if (kdev->base_id <= id &&
+	    kdev->base_id + kdev->num_queues > id) {
+		id -= kdev->base_id;
+		return knav_queue_match_id_to_inst(kdev, id);
+	}
+	return NULL;
+}
+
+static struct knav_queue *__knav_queue_open(struct knav_queue_inst *inst,
+				      const char *name, unsigned flags)
+{
+	struct knav_queue *qh;
+	unsigned id;
+	int ret = 0;
+
+	qh = devm_kzalloc(inst->kdev->dev, sizeof(*qh), GFP_KERNEL);
+	if (!qh)
+		return ERR_PTR(-ENOMEM);
+
+	qh->flags = flags;
+	qh->inst = inst;
+	id = inst->id - inst->qmgr->start_queue;
+	qh->reg_push = &inst->qmgr->reg_push[id];
+	qh->reg_pop = &inst->qmgr->reg_pop[id];
+	qh->reg_peek = &inst->qmgr->reg_peek[id];
+
+	/* first opener? */
+	if (!knav_queue_is_busy(inst)) {
+		struct knav_range_info *range = inst->range;
+
+		inst->name = kstrndup(name, KNAV_NAME_SIZE, GFP_KERNEL);
+		if (range->ops && range->ops->open_queue)
+			ret = range->ops->open_queue(range, inst, flags);
+
+		if (ret) {
+			devm_kfree(inst->kdev->dev, qh);
+			return ERR_PTR(ret);
+		}
+	}
+	list_add_tail_rcu(&qh->list, &inst->handles);
+	return qh;
+}
+
+static struct knav_queue *
+knav_queue_open_by_id(const char *name, unsigned id, unsigned flags)
+{
+	struct knav_queue_inst *inst;
+	struct knav_queue *qh;
+
+	mutex_lock(&knav_dev_lock);
+
+	qh = ERR_PTR(-ENODEV);
+	inst = knav_queue_find_by_id(id);
+	if (!inst)
+		goto unlock_ret;
+
+	qh = ERR_PTR(-EEXIST);
+	if (!(flags & KNAV_QUEUE_SHARED) && knav_queue_is_busy(inst))
+		goto unlock_ret;
+
+	qh = ERR_PTR(-EBUSY);
+	if ((flags & KNAV_QUEUE_SHARED) &&
+	    (knav_queue_is_busy(inst) && !knav_queue_is_shared(inst)))
+		goto unlock_ret;
+
+	qh = __knav_queue_open(inst, name, flags);
+
+unlock_ret:
+	mutex_unlock(&knav_dev_lock);
+
+	return qh;
+}
+
+static struct knav_queue *knav_queue_open_by_type(const char *name,
+						unsigned type, unsigned flags)
+{
+	struct knav_queue_inst *inst;
+	struct knav_queue *qh = ERR_PTR(-EINVAL);
+	int idx;
+
+	mutex_lock(&knav_dev_lock);
+
+	for_each_instance(idx, inst, kdev) {
+		if (knav_queue_is_reserved(inst))
+			continue;
+		if (!knav_queue_match_type(inst, type))
+			continue;
+		if (knav_queue_is_busy(inst))
+			continue;
+		qh = __knav_queue_open(inst, name, flags);
+		goto unlock_ret;
+	}
+
+unlock_ret:
+	mutex_unlock(&knav_dev_lock);
+	return qh;
+}
+
+static void knav_queue_set_notify(struct knav_queue_inst *inst, bool enabled)
+{
+	struct knav_range_info *range = inst->range;
+
+	if (range->ops && range->ops->set_notify)
+		range->ops->set_notify(range, inst, enabled);
+}
+
+static int knav_queue_enable_notifier(struct knav_queue *qh)
+{
+	struct knav_queue_inst *inst = qh->inst;
+	bool first;
+
+	if (WARN_ON(!qh->notifier_fn))
+		return -EINVAL;
+
+	/* Adjust the per handle notifier count */
+	first = (atomic_inc_return(&qh->notifier_enabled) == 1);
+	if (!first)
+		return 0; /* nothing to do */
+
+	/* Now adjust the per instance notifier count */
+	first = (atomic_inc_return(&inst->num_notifiers) == 1);
+	if (first)
+		knav_queue_set_notify(inst, true);
+
+	return 0;
+}
+
+static int knav_queue_disable_notifier(struct knav_queue *qh)
+{
+	struct knav_queue_inst *inst = qh->inst;
+	bool last;
+
+	last = (atomic_dec_return(&qh->notifier_enabled) == 0);
+	if (!last)
+		return 0; /* nothing to do */
+
+	last = (atomic_dec_return(&inst->num_notifiers) == 0);
+	if (last)
+		knav_queue_set_notify(inst, false);
+
+	return 0;
+}
+
+static int knav_queue_set_notifier(struct knav_queue *qh,
+				struct knav_queue_notify_config *cfg)
+{
+	knav_queue_notify_fn old_fn = qh->notifier_fn;
+
+	if (!cfg)
+		return -EINVAL;
+
+	if (!(qh->inst->range->flags & (RANGE_HAS_ACCUMULATOR | RANGE_HAS_IRQ)))
+		return -ENOTSUPP;
+
+	if (!cfg->fn && old_fn)
+		knav_queue_disable_notifier(qh);
+
+	qh->notifier_fn = cfg->fn;
+	qh->notifier_fn_arg = cfg->fn_arg;
+
+	if (cfg->fn && !old_fn)
+		knav_queue_enable_notifier(qh);
+
+	return 0;
+}
+
+static int knav_gp_set_notify(struct knav_range_info *range,
+			       struct knav_queue_inst *inst,
+			       bool enabled)
+{
+	unsigned queue;
+
+	if (range->flags & RANGE_HAS_IRQ) {
+		queue = inst->id - range->queue_base;
+		if (enabled)
+			enable_irq(range->irqs[queue].irq);
+		else
+			disable_irq_nosync(range->irqs[queue].irq);
+	}
+	return 0;
+}
+
+static int knav_gp_open_queue(struct knav_range_info *range,
+				struct knav_queue_inst *inst, unsigned flags)
+{
+	return knav_queue_setup_irq(range, inst);
+}
+
+static int knav_gp_close_queue(struct knav_range_info *range,
+				struct knav_queue_inst *inst)
+{
+	knav_queue_free_irq(inst);
+	return 0;
+}
+
+struct knav_range_ops knav_gp_range_ops = {
+	.set_notify	= knav_gp_set_notify,
+	.open_queue	= knav_gp_open_queue,
+	.close_queue	= knav_gp_close_queue,
+};
+
+
+static int knav_queue_get_count(void *qhandle)
+{
+	struct knav_queue *qh = qhandle;
+	struct knav_queue_inst *inst = qh->inst;
+
+	return readl_relaxed(&qh->reg_peek[0].entry_count) +
+		atomic_read(&inst->desc_count);
+}
+
+static void knav_queue_debug_show_instance(struct seq_file *s,
+					struct knav_queue_inst *inst)
+{
+	struct knav_device *kdev = inst->kdev;
+	struct knav_queue *qh;
+
+	if (!knav_queue_is_busy(inst))
+		return;
+
+	seq_printf(s, "\tqueue id %d (%s)\n",
+		   kdev->base_id + inst->id, inst->name);
+	for_each_handle_rcu(qh, inst) {
+		seq_printf(s, "\t\thandle %p: ", qh);
+		seq_printf(s, "pushes %8d, ",
+			   atomic_read(&qh->stats.pushes));
+		seq_printf(s, "pops %8d, ",
+			   atomic_read(&qh->stats.pops));
+		seq_printf(s, "count %8d, ",
+			   knav_queue_get_count(qh));
+		seq_printf(s, "notifies %8d, ",
+			   atomic_read(&qh->stats.notifies));
+		seq_printf(s, "push errors %8d, ",
+			   atomic_read(&qh->stats.push_errors));
+		seq_printf(s, "pop errors %8d\n",
+			   atomic_read(&qh->stats.pop_errors));
+	}
+}
+
+static int knav_queue_debug_show(struct seq_file *s, void *v)
+{
+	struct knav_queue_inst *inst;
+	int idx;
+
+	mutex_lock(&knav_dev_lock);
+	seq_printf(s, "%s: %u-%u\n",
+		   dev_name(kdev->dev), kdev->base_id,
+		   kdev->base_id + kdev->num_queues - 1);
+	for_each_instance(idx, inst, kdev)
+		knav_queue_debug_show_instance(s, inst);
+	mutex_unlock(&knav_dev_lock);
+
+	return 0;
+}
+
+static int knav_queue_debug_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, knav_queue_debug_show, NULL);
+}
+
+static const struct file_operations knav_queue_debug_ops = {
+	.open		= knav_queue_debug_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static inline int knav_queue_pdsp_wait(u32 * __iomem addr, unsigned timeout,
+					u32 flags)
+{
+	unsigned long end;
+	u32 val = 0;
+
+	end = jiffies + msecs_to_jiffies(timeout);
+	while (time_after(end, jiffies)) {
+		val = readl_relaxed(addr);
+		if (flags)
+			val &= flags;
+		if (!val)
+			break;
+		cpu_relax();
+	}
+	return val ? -ETIMEDOUT : 0;
+}
+
+
+static int knav_queue_flush(struct knav_queue *qh)
+{
+	struct knav_queue_inst *inst = qh->inst;
+	unsigned id = inst->id - inst->qmgr->start_queue;
+
+	atomic_set(&inst->desc_count, 0);
+	writel_relaxed(0, &inst->qmgr->reg_push[id].ptr_size_thresh);
+	return 0;
+}
+
+/**
+ * knav_queue_open()	- open a hardware queue
+ * @name		- name to give the queue handle
+ * @id			- desired queue number if any or specifes the type
+ *			  of queue
+ * @flags		- the following flags are applicable to queues:
+ *	KNAV_QUEUE_SHARED - allow the queue to be shared. Queues are
+ *			     exclusive by default.
+ *			     Subsequent attempts to open a shared queue should
+ *			     also have this flag.
+ *
+ * Returns a handle to the open hardware queue if successful. Use IS_ERR()
+ * to check the returned value for error codes.
+ */
+void *knav_queue_open(const char *name, unsigned id,
+					unsigned flags)
+{
+	struct knav_queue *qh = ERR_PTR(-EINVAL);
+
+	switch (id) {
+	case KNAV_QUEUE_QPEND:
+	case KNAV_QUEUE_ACC:
+	case KNAV_QUEUE_GP:
+		qh = knav_queue_open_by_type(name, id, flags);
+		break;
+
+	default:
+		qh = knav_queue_open_by_id(name, id, flags);
+		break;
+	}
+	return qh;
+}
+EXPORT_SYMBOL_GPL(knav_queue_open);
+
+/**
+ * knav_queue_close()	- close a hardware queue handle
+ * @qh			- handle to close
+ */
+void knav_queue_close(void *qhandle)
+{
+	struct knav_queue *qh = qhandle;
+	struct knav_queue_inst *inst = qh->inst;
+
+	while (atomic_read(&qh->notifier_enabled) > 0)
+		knav_queue_disable_notifier(qh);
+
+	mutex_lock(&knav_dev_lock);
+	list_del_rcu(&qh->list);
+	mutex_unlock(&knav_dev_lock);
+	synchronize_rcu();
+	if (!knav_queue_is_busy(inst)) {
+		struct knav_range_info *range = inst->range;
+
+		if (range->ops && range->ops->close_queue)
+			range->ops->close_queue(range, inst);
+	}
+	devm_kfree(inst->kdev->dev, qh);
+}
+EXPORT_SYMBOL_GPL(knav_queue_close);
+
+/**
+ * knav_queue_device_control()	- Perform control operations on a queue
+ * @qh				- queue handle
+ * @cmd				- control commands
+ * @arg				- command argument
+ *
+ * Returns 0 on success, errno otherwise.
+ */
+int knav_queue_device_control(void *qhandle, enum knav_queue_ctrl_cmd cmd,
+				unsigned long arg)
+{
+	struct knav_queue *qh = qhandle;
+	struct knav_queue_notify_config *cfg;
+	int ret;
+
+	switch ((int)cmd) {
+	case KNAV_QUEUE_GET_ID:
+		ret = qh->inst->kdev->base_id + qh->inst->id;
+		break;
+
+	case KNAV_QUEUE_FLUSH:
+		ret = knav_queue_flush(qh);
+		break;
+
+	case KNAV_QUEUE_SET_NOTIFIER:
+		cfg = (void *)arg;
+		ret = knav_queue_set_notifier(qh, cfg);
+		break;
+
+	case KNAV_QUEUE_ENABLE_NOTIFY:
+		ret = knav_queue_enable_notifier(qh);
+		break;
+
+	case KNAV_QUEUE_DISABLE_NOTIFY:
+		ret = knav_queue_disable_notifier(qh);
+		break;
+
+	case KNAV_QUEUE_GET_COUNT:
+		ret = knav_queue_get_count(qh);
+		break;
+
+	default:
+		ret = -ENOTSUPP;
+		break;
+	}
+	return ret;
+}
+EXPORT_SYMBOL_GPL(knav_queue_device_control);
+
+
+
+/**
+ * knav_queue_push()	- push data (or descriptor) to the tail of a queue
+ * @qh			- hardware queue handle
+ * @data		- data to push
+ * @size		- size of data to push
+ * @flags		- can be used to pass additional information
+ *
+ * Returns 0 on success, errno otherwise.
+ */
+int knav_queue_push(void *qhandle, dma_addr_t dma,
+					unsigned size, unsigned flags)
+{
+	struct knav_queue *qh = qhandle;
+	u32 val;
+
+	val = (u32)dma | ((size / 16) - 1);
+	writel_relaxed(val, &qh->reg_push[0].ptr_size_thresh);
+
+	atomic_inc(&qh->stats.pushes);
+	return 0;
+}
+
+/**
+ * knav_queue_pop()	- pop data (or descriptor) from the head of a queue
+ * @qh			- hardware queue handle
+ * @size		- (optional) size of the data pop'ed.
+ *
+ * Returns a DMA address on success, 0 on failure.
+ */
+dma_addr_t knav_queue_pop(void *qhandle, unsigned *size)
+{
+	struct knav_queue *qh = qhandle;
+	struct knav_queue_inst *inst = qh->inst;
+	dma_addr_t dma;
+	u32 val, idx;
+
+	/* are we accumulated? */
+	if (inst->descs) {
+		if (unlikely(atomic_dec_return(&inst->desc_count) < 0)) {
+			atomic_inc(&inst->desc_count);
+			return 0;
+		}
+		idx  = atomic_inc_return(&inst->desc_head);
+		idx &= ACC_DESCS_MASK;
+		val = inst->descs[idx];
+	} else {
+		val = readl_relaxed(&qh->reg_pop[0].ptr_size_thresh);
+		if (unlikely(!val))
+			return 0;
+	}
+
+	dma = val & DESC_PTR_MASK;
+	if (size)
+		*size = ((val & DESC_SIZE_MASK) + 1) * 16;
+
+	atomic_inc(&qh->stats.pops);
+	return dma;
+}
+
+/* carve out descriptors and push into queue */
+static void kdesc_fill_pool(struct knav_pool *pool)
+{
+	struct knav_region *region;
+	int i;
+
+	region = pool->region;
+	pool->desc_size = region->desc_size;
+	for (i = 0; i < pool->num_desc; i++) {
+		int index = pool->region_offset + i;
+		dma_addr_t dma_addr;
+		unsigned dma_size;
+		dma_addr = region->dma_start + (region->desc_size * index);
+		dma_size = ALIGN(pool->desc_size, SMP_CACHE_BYTES);
+		dma_sync_single_for_device(pool->dev, dma_addr, dma_size,
+					   DMA_TO_DEVICE);
+		knav_queue_push(pool->queue, dma_addr, dma_size, 0);
+	}
+}
+
+/* pop out descriptors and close the queue */
+static void kdesc_empty_pool(struct knav_pool *pool)
+{
+	dma_addr_t dma;
+	unsigned size;
+	void *desc;
+	int i;
+
+	if (!pool->queue)
+		return;
+
+	for (i = 0;; i++) {
+		dma = knav_queue_pop(pool->queue, &size);
+		if (!dma)
+			break;
+		desc = knav_pool_desc_dma_to_virt(pool, dma);
+		if (!desc) {
+			dev_dbg(pool->kdev->dev,
+				"couldn't unmap desc, continuing\n");
+			continue;
+		}
+	}
+	WARN_ON(i != pool->num_desc);
+	knav_queue_close(pool->queue);
+}
+
+
+/* Get the DMA address of a descriptor */
+dma_addr_t knav_pool_desc_virt_to_dma(void *ph, void *virt)
+{
+	struct knav_pool *pool = ph;
+	return pool->region->dma_start + (virt - pool->region->virt_start);
+}
+
+void *knav_pool_desc_dma_to_virt(void *ph, dma_addr_t dma)
+{
+	struct knav_pool *pool = ph;
+	return pool->region->virt_start + (dma - pool->region->dma_start);
+}
+
+/**
+ * knav_pool_create()	- Create a pool of descriptors
+ * @name		- name to give the pool handle
+ * @num_desc		- numbers of descriptors in the pool
+ * @region_id		- QMSS region id from which the descriptors are to be
+ *			  allocated.
+ *
+ * Returns a pool handle on success.
+ * Use IS_ERR_OR_NULL() to identify error values on return.
+ */
+void *knav_pool_create(const char *name,
+					int num_desc, int region_id)
+{
+	struct knav_region *reg_itr, *region = NULL;
+	struct knav_pool *pool, *pi;
+	struct list_head *node;
+	unsigned last_offset;
+	bool slot_found;
+	int ret;
+
+	if (!kdev->dev)
+		return ERR_PTR(-ENODEV);
+
+	pool = devm_kzalloc(kdev->dev, sizeof(*pool), GFP_KERNEL);
+	if (!pool) {
+		dev_err(kdev->dev, "out of memory allocating pool\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	for_each_region(kdev, reg_itr) {
+		if (reg_itr->id != region_id)
+			continue;
+		region = reg_itr;
+		break;
+	}
+
+	if (!region) {
+		dev_err(kdev->dev, "region-id(%d) not found\n", region_id);
+		ret = -EINVAL;
+		goto err;
+	}
+
+	pool->queue = knav_queue_open(name, KNAV_QUEUE_GP, 0);
+	if (IS_ERR_OR_NULL(pool->queue)) {
+		dev_err(kdev->dev,
+			"failed to open queue for pool(%s), error %ld\n",
+			name, PTR_ERR(pool->queue));
+		ret = PTR_ERR(pool->queue);
+		goto err;
+	}
+
+	pool->name = kstrndup(name, KNAV_NAME_SIZE, GFP_KERNEL);
+	pool->kdev = kdev;
+	pool->dev = kdev->dev;
+
+	mutex_lock(&knav_dev_lock);
+
+	if (num_desc > (region->num_desc - region->used_desc)) {
+		dev_err(kdev->dev, "out of descs in region(%d) for pool(%s)\n",
+			region_id, name);
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	/* Region maintains a sorted (by region offset) list of pools
+	 * use the first free slot which is large enough to accomodate
+	 * the request
+	 */
+	last_offset = 0;
+	slot_found = false;
+	node = &region->pools;
+	list_for_each_entry(pi, &region->pools, region_inst) {
+		if ((pi->region_offset - last_offset) >= num_desc) {
+			slot_found = true;
+			break;
+		}
+		last_offset = pi->region_offset + pi->num_desc;
+	}
+	node = &pi->region_inst;
+
+	if (slot_found) {
+		pool->region = region;
+		pool->num_desc = num_desc;
+		pool->region_offset = last_offset;
+		region->used_desc += num_desc;
+		list_add_tail(&pool->list, &kdev->pools);
+		list_add_tail(&pool->region_inst, node);
+	} else {
+		dev_err(kdev->dev, "pool(%s) create failed: fragmented desc pool in region(%d)\n",
+			name, region_id);
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	mutex_unlock(&knav_dev_lock);
+	kdesc_fill_pool(pool);
+	return pool;
+
+err:
+	mutex_unlock(&knav_dev_lock);
+	kfree(pool->name);
+	devm_kfree(kdev->dev, pool);
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(knav_pool_create);
+
+/**
+ * knav_pool_destroy()	- Free a pool of descriptors
+ * @pool		- pool handle
+ */
+void knav_pool_destroy(void *ph)
+{
+	struct knav_pool *pool = ph;
+
+	if (!pool)
+		return;
+
+	if (!pool->region)
+		return;
+
+	kdesc_empty_pool(pool);
+	mutex_lock(&knav_dev_lock);
+
+	pool->region->used_desc -= pool->num_desc;
+	list_del(&pool->region_inst);
+	list_del(&pool->list);
+
+	mutex_unlock(&knav_dev_lock);
+	kfree(pool->name);
+	devm_kfree(kdev->dev, pool);
+}
+EXPORT_SYMBOL_GPL(knav_pool_destroy);
+
+
+/**
+ * knav_pool_desc_get()	- Get a descriptor from the pool
+ * @pool			- pool handle
+ *
+ * Returns descriptor from the pool.
+ */
+void *knav_pool_desc_get(void *ph)
+{
+	struct knav_pool *pool = ph;
+	dma_addr_t dma;
+	unsigned size;
+	void *data;
+
+	dma = knav_queue_pop(pool->queue, &size);
+	if (unlikely(!dma))
+		return ERR_PTR(-ENOMEM);
+	data = knav_pool_desc_dma_to_virt(pool, dma);
+	return data;
+}
+
+/**
+ * knav_pool_desc_put()	- return a descriptor to the pool
+ * @pool			- pool handle
+ */
+void knav_pool_desc_put(void *ph, void *desc)
+{
+	struct knav_pool *pool = ph;
+	dma_addr_t dma;
+	dma = knav_pool_desc_virt_to_dma(pool, desc);
+	knav_queue_push(pool->queue, dma, pool->region->desc_size, 0);
+}
+
+/**
+ * knav_pool_desc_map()	- Map descriptor for DMA transfer
+ * @pool			- pool handle
+ * @desc			- address of descriptor to map
+ * @size			- size of descriptor to map
+ * @dma				- DMA address return pointer
+ * @dma_sz			- adjusted return pointer
+ *
+ * Returns 0 on success, errno otherwise.
+ */
+int knav_pool_desc_map(void *ph, void *desc, unsigned size,
+					dma_addr_t *dma, unsigned *dma_sz)
+{
+	struct knav_pool *pool = ph;
+	*dma = knav_pool_desc_virt_to_dma(pool, desc);
+	size = min(size, pool->region->desc_size);
+	size = ALIGN(size, SMP_CACHE_BYTES);
+	*dma_sz = size;
+	dma_sync_single_for_device(pool->dev, *dma, size, DMA_TO_DEVICE);
+
+	/* Ensure the descriptor reaches to the memory */
+	__iowmb();
+
+	return 0;
+}
+
+/**
+ * knav_pool_desc_unmap()	- Unmap descriptor after DMA transfer
+ * @pool			- pool handle
+ * @dma				- DMA address of descriptor to unmap
+ * @dma_sz			- size of descriptor to unmap
+ *
+ * Returns descriptor address on success, Use IS_ERR_OR_NULL() to identify
+ * error values on return.
+ */
+void *knav_pool_desc_unmap(void *ph, dma_addr_t dma, unsigned dma_sz)
+{
+	struct knav_pool *pool = ph;
+	unsigned desc_sz;
+	void *desc;
+
+	desc_sz = min(dma_sz, pool->region->desc_size);
+	desc = knav_pool_desc_dma_to_virt(pool, dma);
+	dma_sync_single_for_cpu(pool->dev, dma, desc_sz, DMA_FROM_DEVICE);
+	prefetch(desc);
+	return desc;
+}
+
+/**
+ * knav_pool_count()	- Get the number of descriptors in pool.
+ * @pool		- pool handle
+ * Returns number of elements in the pool.
+ */
+int knav_pool_count(void *ph)
+{
+	struct knav_pool *pool = ph;
+	return knav_queue_get_count(pool->queue);
+}
+
+static void knav_queue_setup_region(struct knav_device *kdev,
+					struct knav_region *region)
+{
+	unsigned hw_num_desc, hw_desc_size, size;
+	struct knav_reg_region __iomem  *regs;
+	struct knav_qmgr_info *qmgr;
+	struct knav_pool *pool;
+	int id = region->id;
+	struct page *page;
+
+	/* unused region? */
+	if (!region->num_desc) {
+		dev_warn(kdev->dev, "unused region %s\n", region->name);
+		return;
+	}
+
+	/* get hardware descriptor value */
+	hw_num_desc = ilog2(region->num_desc - 1) + 1;
+
+	/* did we force fit ourselves into nothingness? */
+	if (region->num_desc < 32) {
+		region->num_desc = 0;
+		dev_warn(kdev->dev, "too few descriptors in region %s\n",
+			 region->name);
+		return;
+	}
+
+	size = region->num_desc * region->desc_size;
+	region->virt_start = alloc_pages_exact(size, GFP_KERNEL | GFP_DMA |
+						GFP_DMA32);
+	if (!region->virt_start) {
+		region->num_desc = 0;
+		dev_err(kdev->dev, "memory alloc failed for region %s\n",
+			region->name);
+		return;
+	}
+	region->virt_end = region->virt_start + size;
+	page = virt_to_page(region->virt_start);
+
+	region->dma_start = dma_map_page(kdev->dev, page, 0, size,
+					 DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(kdev->dev, region->dma_start)) {
+		dev_err(kdev->dev, "dma map failed for region %s\n",
+			region->name);
+		goto fail;
+	}
+	region->dma_end = region->dma_start + size;
+
+	pool = devm_kzalloc(kdev->dev, sizeof(*pool), GFP_KERNEL);
+	if (!pool) {
+		dev_err(kdev->dev, "out of memory allocating dummy pool\n");
+		goto fail;
+	}
+	pool->num_desc = 0;
+	pool->region_offset = region->num_desc;
+	list_add(&pool->region_inst, &region->pools);
+
+	dev_dbg(kdev->dev,
+		"region %s (%d): size:%d, link:%d@%d, phys:%08x-%08x, virt:%p-%p\n",
+		region->name, id, region->desc_size, region->num_desc,
+		region->link_index, region->dma_start, region->dma_end,
+		region->virt_start, region->virt_end);
+
+	hw_desc_size = (region->desc_size / 16) - 1;
+	hw_num_desc -= 5;
+
+	for_each_qmgr(kdev, qmgr) {
+		regs = qmgr->reg_region + id;
+		writel_relaxed(region->dma_start, &regs->base);
+		writel_relaxed(region->link_index, &regs->start_index);
+		writel_relaxed(hw_desc_size << 16 | hw_num_desc,
+			       &regs->size_count);
+	}
+	return;
+
+fail:
+	if (region->dma_start)
+		dma_unmap_page(kdev->dev, region->dma_start, size,
+				DMA_BIDIRECTIONAL);
+	if (region->virt_start)
+		free_pages_exact(region->virt_start, size);
+	region->num_desc = 0;
+	return;
+}
+
+static const char *knav_queue_find_name(struct device_node *node)
+{
+	const char *name;
+
+	if (of_property_read_string(node, "label", &name) < 0)
+		name = node->name;
+	if (!name)
+		name = "unknown";
+	return name;
+}
+
+static int knav_queue_setup_regions(struct knav_device *kdev,
+					struct device_node *regions)
+{
+	struct device *dev = kdev->dev;
+	struct knav_region *region;
+	struct device_node *child;
+	u32 temp[2];
+	int ret;
+
+	for_each_child_of_node(regions, child) {
+		region = devm_kzalloc(dev, sizeof(*region), GFP_KERNEL);
+		if (!region) {
+			dev_err(dev, "out of memory allocating region\n");
+			return -ENOMEM;
+		}
+
+		region->name = knav_queue_find_name(child);
+		of_property_read_u32(child, "id", &region->id);
+		ret = of_property_read_u32_array(child, "region-spec", temp, 2);
+		if (!ret) {
+			region->num_desc  = temp[0];
+			region->desc_size = temp[1];
+		} else {
+			dev_err(dev, "invalid region info %s\n", region->name);
+			devm_kfree(dev, region);
+			continue;
+		}
+
+		if (!of_get_property(child, "link-index", NULL)) {
+			dev_err(dev, "No link info for %s\n", region->name);
+			devm_kfree(dev, region);
+			continue;
+		}
+		ret = of_property_read_u32(child, "link-index",
+					   &region->link_index);
+		if (ret) {
+			dev_err(dev, "link index not found for %s\n",
+				region->name);
+			devm_kfree(dev, region);
+			continue;
+		}
+
+		INIT_LIST_HEAD(&region->pools);
+		list_add_tail(&region->list, &kdev->regions);
+	}
+	if (list_empty(&kdev->regions)) {
+		dev_err(dev, "no valid region information found\n");
+		return -ENODEV;
+	}
+
+	/* Next, we run through the regions and set things up */
+	for_each_region(kdev, region)
+		knav_queue_setup_region(kdev, region);
+
+	return 0;
+}
+
+static int knav_get_link_ram(struct knav_device *kdev,
+				       const char *name,
+				       struct knav_link_ram_block *block)
+{
+	struct platform_device *pdev = to_platform_device(kdev->dev);
+	struct device_node *node = pdev->dev.of_node;
+	u32 temp[2];
+
+	/*
+	 * Note: link ram resources are specified in "entry" sized units. In
+	 * reality, although entries are ~40bits in hardware, we treat them as
+	 * 64-bit entities here.
+	 *
+	 * For example, to specify the internal link ram for Keystone-I class
+	 * devices, we would set the linkram0 resource to 0x80000-0x83fff.
+	 *
+	 * This gets a bit weird when other link rams are used.  For example,
+	 * if the range specified is 0x0c000000-0x0c003fff (i.e., 16K entries
+	 * in MSMC SRAM), the actual memory used is 0x0c000000-0x0c020000,
+	 * which accounts for 64-bits per entry, for 16K entries.
+	 */
+	if (!of_property_read_u32_array(node, name , temp, 2)) {
+		if (temp[0]) {
+			/*
+			 * queue_base specified => using internal or onchip
+			 * link ram WARNING - we do not "reserve" this block
+			 */
+			block->phys = (dma_addr_t)temp[0];
+			block->virt = NULL;
+			block->size = temp[1];
+		} else {
+			block->size = temp[1];
+			/* queue_base not specific => allocate requested size */
+			block->virt = dmam_alloc_coherent(kdev->dev,
+						  8 * block->size, &block->phys,
+						  GFP_KERNEL);
+			if (!block->virt) {
+				dev_err(kdev->dev, "failed to alloc linkram\n");
+				return -ENOMEM;
+			}
+		}
+	} else {
+		return -ENODEV;
+	}
+	return 0;
+}
+
+static int knav_queue_setup_link_ram(struct knav_device *kdev)
+{
+	struct knav_link_ram_block *block;
+	struct knav_qmgr_info *qmgr;
+
+	for_each_qmgr(kdev, qmgr) {
+		block = &kdev->link_rams[0];
+		dev_dbg(kdev->dev, "linkram0: phys:%x, virt:%p, size:%x\n",
+			block->phys, block->virt, block->size);
+		writel_relaxed(block->phys, &qmgr->reg_config->link_ram_base0);
+		writel_relaxed(block->size, &qmgr->reg_config->link_ram_size0);
+
+		block++;
+		if (!block->size)
+			return 0;
+
+		dev_dbg(kdev->dev, "linkram1: phys:%x, virt:%p, size:%x\n",
+			block->phys, block->virt, block->size);
+		writel_relaxed(block->phys, &qmgr->reg_config->link_ram_base1);
+	}
+
+	return 0;
+}
+
+static int knav_setup_queue_range(struct knav_device *kdev,
+					struct device_node *node)
+{
+	struct device *dev = kdev->dev;
+	struct knav_range_info *range;
+	struct knav_qmgr_info *qmgr;
+	u32 temp[2], start, end, id, index;
+	int ret, i;
+
+	range = devm_kzalloc(dev, sizeof(*range), GFP_KERNEL);
+	if (!range) {
+		dev_err(dev, "out of memory allocating range\n");
+		return -ENOMEM;
+	}
+
+	range->kdev = kdev;
+	range->name = knav_queue_find_name(node);
+	ret = of_property_read_u32_array(node, "qrange", temp, 2);
+	if (!ret) {
+		range->queue_base = temp[0] - kdev->base_id;
+		range->num_queues = temp[1];
+	} else {
+		dev_err(dev, "invalid queue range %s\n", range->name);
+		devm_kfree(dev, range);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < RANGE_MAX_IRQS; i++) {
+		struct of_phandle_args oirq;
+
+		if (of_irq_parse_one(node, i, &oirq))
+			break;
+
+		range->irqs[i].irq = irq_create_of_mapping(&oirq);
+		if (range->irqs[i].irq == IRQ_NONE)
+			break;
+
+		range->num_irqs++;
+
+		if (oirq.args_count == 3)
+			range->irqs[i].cpu_map =
+				(oirq.args[2] & 0x0000ff00) >> 8;
+	}
+
+	range->num_irqs = min(range->num_irqs, range->num_queues);
+	if (range->num_irqs)
+		range->flags |= RANGE_HAS_IRQ;
+
+	if (of_get_property(node, "qalloc-by-id", NULL))
+		range->flags |= RANGE_RESERVED;
+
+	if (of_get_property(node, "accumulator", NULL)) {
+		ret = knav_init_acc_range(kdev, node, range);
+		if (ret < 0) {
+			devm_kfree(dev, range);
+			return ret;
+		}
+	} else {
+		range->ops = &knav_gp_range_ops;
+	}
+
+	/* set threshold to 1, and flush out the queues */
+	for_each_qmgr(kdev, qmgr) {
+		start = max(qmgr->start_queue, range->queue_base);
+		end   = min(qmgr->start_queue + qmgr->num_queues,
+			    range->queue_base + range->num_queues);
+		for (id = start; id < end; id++) {
+			index = id - qmgr->start_queue;
+			writel_relaxed(THRESH_GTE | 1,
+				       &qmgr->reg_peek[index].ptr_size_thresh);
+			writel_relaxed(0,
+				       &qmgr->reg_push[index].ptr_size_thresh);
+		}
+	}
+
+	list_add_tail(&range->list, &kdev->queue_ranges);
+	dev_dbg(dev, "added range %s: %d-%d, %d irqs%s%s%s\n",
+		range->name, range->queue_base,
+		range->queue_base + range->num_queues - 1,
+		range->num_irqs,
+		(range->flags & RANGE_HAS_IRQ) ? ", has irq" : "",
+		(range->flags & RANGE_RESERVED) ? ", reserved" : "",
+		(range->flags & RANGE_HAS_ACCUMULATOR) ? ", acc" : "");
+	kdev->num_queues_in_use += range->num_queues;
+	return 0;
+}
+
+static int knav_setup_queue_pools(struct knav_device *kdev,
+				   struct device_node *queue_pools)
+{
+	struct device_node *type, *range;
+	int ret;
+
+	for_each_child_of_node(queue_pools, type) {
+		for_each_child_of_node(type, range) {
+			ret = knav_setup_queue_range(kdev, range);
+			/* return value ignored, we init the rest... */
+		}
+	}
+
+	/* ... and barf if they all failed! */
+	if (list_empty(&kdev->queue_ranges)) {
+		dev_err(kdev->dev, "no valid queue range found\n");
+		return -ENODEV;
+	}
+	return 0;
+}
+
+static void knav_free_queue_range(struct knav_device *kdev,
+				  struct knav_range_info *range)
+{
+	if (range->ops && range->ops->free_range)
+		range->ops->free_range(range);
+	list_del(&range->list);
+	devm_kfree(kdev->dev, range);
+}
+
+static void knav_free_queue_ranges(struct knav_device *kdev)
+{
+	struct knav_range_info *range;
+
+	for (;;) {
+		range = first_queue_range(kdev);
+		if (!range)
+			break;
+		knav_free_queue_range(kdev, range);
+	}
+}
+
+static void knav_queue_free_regions(struct knav_device *kdev)
+{
+	struct knav_region *region;
+	struct knav_pool *pool;
+	unsigned size;
+
+	for (;;) {
+		region = first_region(kdev);
+		if (!region)
+			break;
+		list_for_each_entry(pool, &region->pools, region_inst)
+			knav_pool_destroy(pool);
+
+		size = region->virt_end - region->virt_start;
+		if (size)
+			free_pages_exact(region->virt_start, size);
+		list_del(&region->list);
+		devm_kfree(kdev->dev, region);
+	}
+}
+
+static void __iomem *knav_queue_map_reg(struct knav_device *kdev,
+					struct device_node *node, int index)
+{
+	struct resource res;
+	void __iomem *regs;
+	int ret;
+
+	ret = of_address_to_resource(node, index, &res);
+	if (ret) {
+		dev_err(kdev->dev, "Can't translate of node(%s) address for index(%d)\n",
+			node->name, index);
+		return ERR_PTR(ret);
+	}
+
+	regs = devm_ioremap_resource(kdev->dev, &res);
+	if (IS_ERR(regs))
+		dev_err(kdev->dev, "Failed to map register base for index(%d) node(%s)\n",
+			index, node->name);
+	return regs;
+}
+
+static int knav_queue_init_qmgrs(struct knav_device *kdev,
+					struct device_node *qmgrs)
+{
+	struct device *dev = kdev->dev;
+	struct knav_qmgr_info *qmgr;
+	struct device_node *child;
+	u32 temp[2];
+	int ret;
+
+	for_each_child_of_node(qmgrs, child) {
+		qmgr = devm_kzalloc(dev, sizeof(*qmgr), GFP_KERNEL);
+		if (!qmgr) {
+			dev_err(dev, "out of memory allocating qmgr\n");
+			return -ENOMEM;
+		}
+
+		ret = of_property_read_u32_array(child, "managed-queues",
+						 temp, 2);
+		if (!ret) {
+			qmgr->start_queue = temp[0];
+			qmgr->num_queues = temp[1];
+		} else {
+			dev_err(dev, "invalid qmgr queue range\n");
+			devm_kfree(dev, qmgr);
+			continue;
+		}
+
+		dev_info(dev, "qmgr start queue %d, number of queues %d\n",
+			 qmgr->start_queue, qmgr->num_queues);
+
+		qmgr->reg_peek =
+			knav_queue_map_reg(kdev, child,
+					   KNAV_QUEUE_PEEK_REG_INDEX);
+		qmgr->reg_status =
+			knav_queue_map_reg(kdev, child,
+					   KNAV_QUEUE_STATUS_REG_INDEX);
+		qmgr->reg_config =
+			knav_queue_map_reg(kdev, child,
+					   KNAV_QUEUE_CONFIG_REG_INDEX);
+		qmgr->reg_region =
+			knav_queue_map_reg(kdev, child,
+					   KNAV_QUEUE_REGION_REG_INDEX);
+		qmgr->reg_push =
+			knav_queue_map_reg(kdev, child,
+					   KNAV_QUEUE_PUSH_REG_INDEX);
+		qmgr->reg_pop =
+			knav_queue_map_reg(kdev, child,
+					   KNAV_QUEUE_POP_REG_INDEX);
+
+		if (IS_ERR(qmgr->reg_peek) || IS_ERR(qmgr->reg_status) ||
+		    IS_ERR(qmgr->reg_config) || IS_ERR(qmgr->reg_region) ||
+		    IS_ERR(qmgr->reg_push) || IS_ERR(qmgr->reg_pop)) {
+			dev_err(dev, "failed to map qmgr regs\n");
+			if (!IS_ERR(qmgr->reg_peek))
+				devm_iounmap(dev, qmgr->reg_peek);
+			if (!IS_ERR(qmgr->reg_status))
+				devm_iounmap(dev, qmgr->reg_status);
+			if (!IS_ERR(qmgr->reg_config))
+				devm_iounmap(dev, qmgr->reg_config);
+			if (!IS_ERR(qmgr->reg_region))
+				devm_iounmap(dev, qmgr->reg_region);
+			if (!IS_ERR(qmgr->reg_push))
+				devm_iounmap(dev, qmgr->reg_push);
+			if (!IS_ERR(qmgr->reg_pop))
+				devm_iounmap(dev, qmgr->reg_pop);
+			devm_kfree(dev, qmgr);
+			continue;
+		}
+
+		list_add_tail(&qmgr->list, &kdev->qmgrs);
+		dev_info(dev, "added qmgr start queue %d, num of queues %d, reg_peek %p, reg_status %p, reg_config %p, reg_region %p, reg_push %p, reg_pop %p\n",
+			 qmgr->start_queue, qmgr->num_queues,
+			 qmgr->reg_peek, qmgr->reg_status,
+			 qmgr->reg_config, qmgr->reg_region,
+			 qmgr->reg_push, qmgr->reg_pop);
+	}
+	return 0;
+}
+
+static int knav_queue_init_pdsps(struct knav_device *kdev,
+					struct device_node *pdsps)
+{
+	struct device *dev = kdev->dev;
+	struct knav_pdsp_info *pdsp;
+	struct device_node *child;
+	int ret;
+
+	for_each_child_of_node(pdsps, child) {
+		pdsp = devm_kzalloc(dev, sizeof(*pdsp), GFP_KERNEL);
+		if (!pdsp) {
+			dev_err(dev, "out of memory allocating pdsp\n");
+			return -ENOMEM;
+		}
+		pdsp->name = knav_queue_find_name(child);
+		ret = of_property_read_string(child, "firmware",
+					      &pdsp->firmware);
+		if (ret < 0 || !pdsp->firmware) {
+			dev_err(dev, "unknown firmware for pdsp %s\n",
+				pdsp->name);
+			devm_kfree(dev, pdsp);
+			continue;
+		}
+		dev_dbg(dev, "pdsp name %s fw name :%s\n", pdsp->name,
+			pdsp->firmware);
+
+		pdsp->iram =
+			knav_queue_map_reg(kdev, child,
+					   KNAV_QUEUE_PDSP_IRAM_REG_INDEX);
+		pdsp->regs =
+			knav_queue_map_reg(kdev, child,
+					   KNAV_QUEUE_PDSP_REGS_REG_INDEX);
+		pdsp->intd =
+			knav_queue_map_reg(kdev, child,
+					   KNAV_QUEUE_PDSP_INTD_REG_INDEX);
+		pdsp->command =
+			knav_queue_map_reg(kdev, child,
+					   KNAV_QUEUE_PDSP_CMD_REG_INDEX);
+
+		if (IS_ERR(pdsp->command) || IS_ERR(pdsp->iram) ||
+		    IS_ERR(pdsp->regs) || IS_ERR(pdsp->intd)) {
+			dev_err(dev, "failed to map pdsp %s regs\n",
+				pdsp->name);
+			if (!IS_ERR(pdsp->command))
+				devm_iounmap(dev, pdsp->command);
+			if (!IS_ERR(pdsp->iram))
+				devm_iounmap(dev, pdsp->iram);
+			if (!IS_ERR(pdsp->regs))
+				devm_iounmap(dev, pdsp->regs);
+			if (!IS_ERR(pdsp->intd))
+				devm_iounmap(dev, pdsp->intd);
+			devm_kfree(dev, pdsp);
+			continue;
+		}
+		of_property_read_u32(child, "id", &pdsp->id);
+		list_add_tail(&pdsp->list, &kdev->pdsps);
+		dev_dbg(dev, "added pdsp %s: command %p, iram %p, regs %p, intd %p, firmware %s\n",
+			pdsp->name, pdsp->command, pdsp->iram, pdsp->regs,
+			pdsp->intd, pdsp->firmware);
+	}
+	return 0;
+}
+
+static int knav_queue_stop_pdsp(struct knav_device *kdev,
+			  struct knav_pdsp_info *pdsp)
+{
+	u32 val, timeout = 1000;
+	int ret;
+
+	val = readl_relaxed(&pdsp->regs->control) & ~PDSP_CTRL_ENABLE;
+	writel_relaxed(val, &pdsp->regs->control);
+	ret = knav_queue_pdsp_wait(&pdsp->regs->control, timeout,
+					PDSP_CTRL_RUNNING);
+	if (ret < 0) {
+		dev_err(kdev->dev, "timed out on pdsp %s stop\n", pdsp->name);
+		return ret;
+	}
+	return 0;
+}
+
+static int knav_queue_load_pdsp(struct knav_device *kdev,
+			  struct knav_pdsp_info *pdsp)
+{
+	int i, ret, fwlen;
+	const struct firmware *fw;
+	u32 *fwdata;
+
+	ret = request_firmware(&fw, pdsp->firmware, kdev->dev);
+	if (ret) {
+		dev_err(kdev->dev, "failed to get firmware %s for pdsp %s\n",
+			pdsp->firmware, pdsp->name);
+		return ret;
+	}
+	writel_relaxed(pdsp->id + 1, pdsp->command + 0x18);
+	/* download the firmware */
+	fwdata = (u32 *)fw->data;
+	fwlen = (fw->size + sizeof(u32) - 1) / sizeof(u32);
+	for (i = 0; i < fwlen; i++)
+		writel_relaxed(be32_to_cpu(fwdata[i]), pdsp->iram + i);
+
+	release_firmware(fw);
+	return 0;
+}
+
+static int knav_queue_start_pdsp(struct knav_device *kdev,
+			   struct knav_pdsp_info *pdsp)
+{
+	u32 val, timeout = 1000;
+	int ret;
+
+	/* write a command for sync */
+	writel_relaxed(0xffffffff, pdsp->command);
+	while (readl_relaxed(pdsp->command) != 0xffffffff)
+		cpu_relax();
+
+	/* soft reset the PDSP */
+	val  = readl_relaxed(&pdsp->regs->control);
+	val &= ~(PDSP_CTRL_PC_MASK | PDSP_CTRL_SOFT_RESET);
+	writel_relaxed(val, &pdsp->regs->control);
+
+	/* enable pdsp */
+	val = readl_relaxed(&pdsp->regs->control) | PDSP_CTRL_ENABLE;
+	writel_relaxed(val, &pdsp->regs->control);
+
+	/* wait for command register to clear */
+	ret = knav_queue_pdsp_wait(pdsp->command, timeout, 0);
+	if (ret < 0) {
+		dev_err(kdev->dev,
+			"timed out on pdsp %s command register wait\n",
+			pdsp->name);
+		return ret;
+	}
+	return 0;
+}
+
+static void knav_queue_stop_pdsps(struct knav_device *kdev)
+{
+	struct knav_pdsp_info *pdsp;
+
+	/* disable all pdsps */
+	for_each_pdsp(kdev, pdsp)
+		knav_queue_stop_pdsp(kdev, pdsp);
+}
+
+static int knav_queue_start_pdsps(struct knav_device *kdev)
+{
+	struct knav_pdsp_info *pdsp;
+	int ret;
+
+	knav_queue_stop_pdsps(kdev);
+	/* now load them all */
+	for_each_pdsp(kdev, pdsp) {
+		ret = knav_queue_load_pdsp(kdev, pdsp);
+		if (ret < 0)
+			return ret;
+	}
+
+	for_each_pdsp(kdev, pdsp) {
+		ret = knav_queue_start_pdsp(kdev, pdsp);
+		WARN_ON(ret);
+	}
+	return 0;
+}
+
+static inline struct knav_qmgr_info *knav_find_qmgr(unsigned id)
+{
+	struct knav_qmgr_info *qmgr;
+
+	for_each_qmgr(kdev, qmgr) {
+		if ((id >= qmgr->start_queue) &&
+		    (id < qmgr->start_queue + qmgr->num_queues))
+			return qmgr;
+	}
+	return NULL;
+}
+
+static int knav_queue_init_queue(struct knav_device *kdev,
+					struct knav_range_info *range,
+					struct knav_queue_inst *inst,
+					unsigned id)
+{
+	char irq_name[KNAV_NAME_SIZE];
+	inst->qmgr = knav_find_qmgr(id);
+	if (!inst->qmgr)
+		return -1;
+
+	INIT_LIST_HEAD(&inst->handles);
+	inst->kdev = kdev;
+	inst->range = range;
+	inst->irq_num = -1;
+	inst->id = id;
+	scnprintf(irq_name, sizeof(irq_name), "hwqueue-%d", id);
+	inst->irq_name = kstrndup(irq_name, sizeof(irq_name), GFP_KERNEL);
+
+	if (range->ops && range->ops->init_queue)
+		return range->ops->init_queue(range, inst);
+	else
+		return 0;
+}
+
+static int knav_queue_init_queues(struct knav_device *kdev)
+{
+	struct knav_range_info *range;
+	int size, id, base_idx;
+	int idx = 0, ret = 0;
+
+	/* how much do we need for instance data? */
+	size = sizeof(struct knav_queue_inst);
+
+	/* round this up to a power of 2, keep the index to instance
+	 * arithmetic fast.
+	 * */
+	kdev->inst_shift = order_base_2(size);
+	size = (1 << kdev->inst_shift) * kdev->num_queues_in_use;
+	kdev->instances = devm_kzalloc(kdev->dev, size, GFP_KERNEL);
+	if (!kdev->instances)
+		return -1;
+
+	for_each_queue_range(kdev, range) {
+		if (range->ops && range->ops->init_range)
+			range->ops->init_range(range);
+		base_idx = idx;
+		for (id = range->queue_base;
+		     id < range->queue_base + range->num_queues; id++, idx++) {
+			ret = knav_queue_init_queue(kdev, range,
+					knav_queue_idx_to_inst(kdev, idx), id);
+			if (ret < 0)
+				return ret;
+		}
+		range->queue_base_inst =
+			knav_queue_idx_to_inst(kdev, base_idx);
+	}
+	return 0;
+}
+
+static int knav_queue_probe(struct platform_device *pdev)
+{
+	struct device_node *node = pdev->dev.of_node;
+	struct device_node *qmgrs, *queue_pools, *regions, *pdsps;
+	struct device *dev = &pdev->dev;
+	u32 temp[2];
+	int ret;
+
+	if (!node) {
+		dev_err(dev, "device tree info unavailable\n");
+		return -ENODEV;
+	}
+
+	kdev = devm_kzalloc(dev, sizeof(struct knav_device), GFP_KERNEL);
+	if (!kdev) {
+		dev_err(dev, "memory allocation failed\n");
+		return -ENOMEM;
+	}
+
+	platform_set_drvdata(pdev, kdev);
+	kdev->dev = dev;
+	INIT_LIST_HEAD(&kdev->queue_ranges);
+	INIT_LIST_HEAD(&kdev->qmgrs);
+	INIT_LIST_HEAD(&kdev->pools);
+	INIT_LIST_HEAD(&kdev->regions);
+	INIT_LIST_HEAD(&kdev->pdsps);
+
+	pm_runtime_enable(&pdev->dev);
+	ret = pm_runtime_get_sync(&pdev->dev);
+	if (ret < 0) {
+		dev_err(dev, "Failed to enable QMSS\n");
+		return ret;
+	}
+
+	if (of_property_read_u32_array(node, "queue-range", temp, 2)) {
+		dev_err(dev, "queue-range not specified\n");
+		ret = -ENODEV;
+		goto err;
+	}
+	kdev->base_id    = temp[0];
+	kdev->num_queues = temp[1];
+
+	/* Initialize queue managers using device tree configuration */
+	qmgrs =  of_get_child_by_name(node, "qmgrs");
+	if (!qmgrs) {
+		dev_err(dev, "queue manager info not specified\n");
+		ret = -ENODEV;
+		goto err;
+	}
+	ret = knav_queue_init_qmgrs(kdev, qmgrs);
+	of_node_put(qmgrs);
+	if (ret)
+		goto err;
+
+	/* get pdsp configuration values from device tree */
+	pdsps =  of_get_child_by_name(node, "pdsps");
+	if (pdsps) {
+		ret = knav_queue_init_pdsps(kdev, pdsps);
+		if (ret)
+			goto err;
+
+		ret = knav_queue_start_pdsps(kdev);
+		if (ret)
+			goto err;
+	}
+	of_node_put(pdsps);
+
+	/* get usable queue range values from device tree */
+	queue_pools = of_get_child_by_name(node, "queue-pools");
+	if (!queue_pools) {
+		dev_err(dev, "queue-pools not specified\n");
+		ret = -ENODEV;
+		goto err;
+	}
+	ret = knav_setup_queue_pools(kdev, queue_pools);
+	of_node_put(queue_pools);
+	if (ret)
+		goto err;
+
+	ret = knav_get_link_ram(kdev, "linkram0", &kdev->link_rams[0]);
+	if (ret) {
+		dev_err(kdev->dev, "could not setup linking ram\n");
+		goto err;
+	}
+
+	ret = knav_get_link_ram(kdev, "linkram1", &kdev->link_rams[1]);
+	if (ret) {
+		/*
+		 * nothing really, we have one linking ram already, so we just
+		 * live within our means
+		 */
+	}
+
+	ret = knav_queue_setup_link_ram(kdev);
+	if (ret)
+		goto err;
+
+	regions =  of_get_child_by_name(node, "descriptor-regions");
+	if (!regions) {
+		dev_err(dev, "descriptor-regions not specified\n");
+		goto err;
+	}
+	ret = knav_queue_setup_regions(kdev, regions);
+	of_node_put(regions);
+	if (ret)
+		goto err;
+
+	ret = knav_queue_init_queues(kdev);
+	if (ret < 0) {
+		dev_err(dev, "hwqueue initialization failed\n");
+		goto err;
+	}
+
+	debugfs_create_file("qmss", S_IFREG | S_IRUGO, NULL, NULL,
+			    &knav_queue_debug_ops);
+	return 0;
+
+err:
+	knav_queue_stop_pdsps(kdev);
+	knav_queue_free_regions(kdev);
+	knav_free_queue_ranges(kdev);
+	pm_runtime_put_sync(&pdev->dev);
+	pm_runtime_disable(&pdev->dev);
+	return ret;
+}
+
+static int knav_queue_remove(struct platform_device *pdev)
+{
+	/* TODO: Free resources */
+	pm_runtime_put_sync(&pdev->dev);
+	pm_runtime_disable(&pdev->dev);
+	return 0;
+}
+
+/* Match table for of_platform binding */
+static struct of_device_id keystone_qmss_of_match[] = {
+	{ .compatible = "ti,keystone-navigator-qmss", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, keystone_qmss_of_match);
+
+static struct platform_driver keystone_qmss_driver = {
+	.probe		= knav_queue_probe,
+	.remove		= knav_queue_remove,
+	.driver		= {
+		.name	= "keystone-navigator-qmss",
+		.owner	= THIS_MODULE,
+		.of_match_table = keystone_qmss_of_match,
+	},
+};
+module_platform_driver(keystone_qmss_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("TI QMSS driver for Keystone SOCs");
+MODULE_AUTHOR("Sandeep Nair <sandeep_n@ti.com>");
+MODULE_AUTHOR("Santosh Shilimkar <santosh.shilimkar@ti.com>");
diff --git a/include/linux/soc/ti/knav_qmss.h b/include/linux/soc/ti/knav_qmss.h
new file mode 100644
index 000000000000..9f0ebb3bad27
--- /dev/null
+++ b/include/linux/soc/ti/knav_qmss.h
@@ -0,0 +1,90 @@
+/*
+ * Keystone Navigator Queue Management Sub-System header
+ *
+ * Copyright (C) 2014 Texas Instruments Incorporated - http://www.ti.com
+ * Author:	Sandeep Nair <sandeep_n@ti.com>
+ *		Cyril Chemparathy <cyril@ti.com>
+ *		Santosh Shilimkar <santosh.shilimkar@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __SOC_TI_KNAV_QMSS_H__
+#define __SOC_TI_KNAV_QMSS_H__
+
+#include <linux/err.h>
+#include <linux/time.h>
+#include <linux/atomic.h>
+#include <linux/device.h>
+#include <linux/fcntl.h>
+#include <linux/dma-mapping.h>
+
+/* queue types */
+#define KNAV_QUEUE_QPEND	((unsigned)-2) /* interruptible qpend queue */
+#define KNAV_QUEUE_ACC		((unsigned)-3) /* Accumulated queue */
+#define KNAV_QUEUE_GP		((unsigned)-4) /* General purpose queue */
+
+/* queue flags */
+#define KNAV_QUEUE_SHARED	0x0001		/* Queue can be shared */
+
+/**
+ * enum knav_queue_ctrl_cmd -	queue operations.
+ * @KNAV_QUEUE_GET_ID:		Get the ID number for an open queue
+ * @KNAV_QUEUE_FLUSH:		forcibly empty a queue if possible
+ * @KNAV_QUEUE_SET_NOTIFIER:	Set a notifier callback to a queue handle.
+ * @KNAV_QUEUE_ENABLE_NOTIFY:	Enable notifier callback for a queue handle.
+ * @KNAV_QUEUE_DISABLE_NOTIFY:	Disable notifier callback for a queue handle.
+ * @KNAV_QUEUE_GET_COUNT:	Get number of queues.
+ */
+enum knav_queue_ctrl_cmd {
+	KNAV_QUEUE_GET_ID,
+	KNAV_QUEUE_FLUSH,
+	KNAV_QUEUE_SET_NOTIFIER,
+	KNAV_QUEUE_ENABLE_NOTIFY,
+	KNAV_QUEUE_DISABLE_NOTIFY,
+	KNAV_QUEUE_GET_COUNT
+};
+
+/* Queue notifier callback prototype */
+typedef void (*knav_queue_notify_fn)(void *arg);
+
+/**
+ * struct knav_queue_notify_config:	Notifier configuration
+ * @fn:					Notifier function
+ * @fn_arg:				Notifier function arguments
+ */
+struct knav_queue_notify_config {
+	knav_queue_notify_fn fn;
+	void *fn_arg;
+};
+
+void *knav_queue_open(const char *name, unsigned id,
+					unsigned flags);
+void knav_queue_close(void *qhandle);
+int knav_queue_device_control(void *qhandle,
+				enum knav_queue_ctrl_cmd cmd,
+				unsigned long arg);
+dma_addr_t knav_queue_pop(void *qhandle, unsigned *size);
+int knav_queue_push(void *qhandle, dma_addr_t dma,
+				unsigned size, unsigned flags);
+
+void *knav_pool_create(const char *name,
+				int num_desc, int region_id);
+void knav_pool_destroy(void *ph);
+int knav_pool_count(void *ph);
+void *knav_pool_desc_get(void *ph);
+void knav_pool_desc_put(void *ph, void *desc);
+int knav_pool_desc_map(void *ph, void *desc, unsigned size,
+					dma_addr_t *dma, unsigned *dma_sz);
+void *knav_pool_desc_unmap(void *ph, dma_addr_t dma, unsigned dma_sz);
+dma_addr_t knav_pool_desc_virt_to_dma(void *ph, void *virt);
+void *knav_pool_desc_dma_to_virt(void *ph, dma_addr_t dma);
+
+#endif /* __SOC_TI_KNAV_QMSS_H__ */
-- 
cgit v1.2.3


From 88139ed030583557751e279968e13e892ae10825 Mon Sep 17 00:00:00 2001
From: Santosh Shilimkar <santosh.shilimkar@ti.com>
Date: Sun, 30 Mar 2014 17:29:04 -0400
Subject: soc: ti: add Keystone Navigator DMA support

The Keystone Navigator DMA driver sets up the dma channels and flows for
the QMSS(Queue Manager SubSystem) who triggers the actual data movements
across clients using destination queues. Every client modules like
NETCP(Network Coprocessor), SRIO(Serial Rapid IO) and CRYPTO
Engines has its own instance of packet dma hardware. QMSS has also
an internal packet DMA module which is used as an infrastructure
DMA with zero copy.

Initially this driver was proposed as DMA engine driver but since the
hardware is not typical DMA engine and hence doesn't comply with typical
DMA engine driver needs, that approach was naked. Link to that
discussion -
	https://lkml.org/lkml/2014/3/18/340

As aligned, now we pair the Navigator DMA with its companion Navigator
QMSS subsystem driver.

Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Kumar Gala <galak@codeaurora.org>
Cc: Olof Johansson <olof@lixom.net>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Grant Likely <grant.likely@linaro.org>
Cc: Rob Herring <robh+dt@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Sandeep Nair <sandeep_n@ti.com>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
---
 drivers/soc/ti/Kconfig          |  10 +
 drivers/soc/ti/Makefile         |   1 +
 drivers/soc/ti/knav_dma.c       | 815 ++++++++++++++++++++++++++++++++++++++++
 include/linux/soc/ti/knav_dma.h | 175 +++++++++
 4 files changed, 1001 insertions(+)
 create mode 100644 drivers/soc/ti/knav_dma.c
 create mode 100644 include/linux/soc/ti/knav_dma.h

(limited to 'include/linux')

diff --git a/drivers/soc/ti/Kconfig b/drivers/soc/ti/Kconfig
index f73896f762e8..7266b2165183 100644
--- a/drivers/soc/ti/Kconfig
+++ b/drivers/soc/ti/Kconfig
@@ -18,4 +18,14 @@ config KEYSTONE_NAVIGATOR_QMSS
 
 	  If unsure, say N.
 
+config KEYSTONE_NAVIGATOR_DMA
+	tristate "TI Keystone Navigator Packet DMA support"
+	depends on ARCH_KEYSTONE
+	help
+	  Say y tp enable support for the Keystone Navigator Packet DMA on
+	  on Keystone family of devices. It sets up the dma channels for the
+	  Queue Manager Sub System.
+
+	  If unsure, say N.
+
 endif # SOC_TI
diff --git a/drivers/soc/ti/Makefile b/drivers/soc/ti/Makefile
index bf85cacd5b85..6bed611e1934 100644
--- a/drivers/soc/ti/Makefile
+++ b/drivers/soc/ti/Makefile
@@ -2,3 +2,4 @@
 # TI Keystone SOC drivers
 #
 obj-$(CONFIG_KEYSTONE_NAVIGATOR_QMSS)	+= knav_qmss_queue.o knav_qmss_acc.o
+obj-$(CONFIG_KEYSTONE_NAVIGATOR_DMA)	+= knav_dma.o
diff --git a/drivers/soc/ti/knav_dma.c b/drivers/soc/ti/knav_dma.c
new file mode 100644
index 000000000000..17264275f32b
--- /dev/null
+++ b/drivers/soc/ti/knav_dma.c
@@ -0,0 +1,815 @@
+/*
+ * Copyright (C) 2014 Texas Instruments Incorporated
+ * Authors:	Santosh Shilimkar <santosh.shilimkar@ti.com>
+ *		Sandeep Nair <sandeep_n@ti.com>
+ *		Cyril Chemparathy <cyril@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/io.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/dma-direction.h>
+#include <linux/interrupt.h>
+#include <linux/pm_runtime.h>
+#include <linux/of_dma.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+#include <linux/soc/ti/knav_dma.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#define REG_MASK		0xffffffff
+
+#define DMA_LOOPBACK		BIT(31)
+#define DMA_ENABLE		BIT(31)
+#define DMA_TEARDOWN		BIT(30)
+
+#define DMA_TX_FILT_PSWORDS	BIT(29)
+#define DMA_TX_FILT_EINFO	BIT(30)
+#define DMA_TX_PRIO_SHIFT	0
+#define DMA_RX_PRIO_SHIFT	16
+#define DMA_PRIO_MASK		GENMASK(3, 0)
+#define DMA_PRIO_DEFAULT	0
+#define DMA_RX_TIMEOUT_DEFAULT	17500 /* cycles */
+#define DMA_RX_TIMEOUT_MASK	GENMASK(16, 0)
+#define DMA_RX_TIMEOUT_SHIFT	0
+
+#define CHAN_HAS_EPIB		BIT(30)
+#define CHAN_HAS_PSINFO		BIT(29)
+#define CHAN_ERR_RETRY		BIT(28)
+#define CHAN_PSINFO_AT_SOP	BIT(25)
+#define CHAN_SOP_OFF_SHIFT	16
+#define CHAN_SOP_OFF_MASK	GENMASK(9, 0)
+#define DESC_TYPE_SHIFT		26
+#define DESC_TYPE_MASK		GENMASK(2, 0)
+
+/*
+ * QMGR & QNUM together make up 14 bits with QMGR as the 2 MSb's in the logical
+ * navigator cloud mapping scheme.
+ * using the 14bit physical queue numbers directly maps into this scheme.
+ */
+#define CHAN_QNUM_MASK		GENMASK(14, 0)
+#define DMA_MAX_QMS		4
+#define DMA_TIMEOUT		1	/* msecs */
+#define DMA_INVALID_ID		0xffff
+
+struct reg_global {
+	u32	revision;
+	u32	perf_control;
+	u32	emulation_control;
+	u32	priority_control;
+	u32	qm_base_address[DMA_MAX_QMS];
+};
+
+struct reg_chan {
+	u32	control;
+	u32	mode;
+	u32	__rsvd[6];
+};
+
+struct reg_tx_sched {
+	u32	prio;
+};
+
+struct reg_rx_flow {
+	u32	control;
+	u32	tags;
+	u32	tag_sel;
+	u32	fdq_sel[2];
+	u32	thresh[3];
+};
+
+struct knav_dma_pool_device {
+	struct device			*dev;
+	struct list_head		list;
+};
+
+struct knav_dma_device {
+	bool				loopback, enable_all;
+	unsigned			tx_priority, rx_priority, rx_timeout;
+	unsigned			logical_queue_managers;
+	unsigned			qm_base_address[DMA_MAX_QMS];
+	struct reg_global __iomem	*reg_global;
+	struct reg_chan __iomem		*reg_tx_chan;
+	struct reg_rx_flow __iomem	*reg_rx_flow;
+	struct reg_chan __iomem		*reg_rx_chan;
+	struct reg_tx_sched __iomem	*reg_tx_sched;
+	unsigned			max_rx_chan, max_tx_chan;
+	unsigned			max_rx_flow;
+	char				name[32];
+	atomic_t			ref_count;
+	struct list_head		list;
+	struct list_head		chan_list;
+	spinlock_t			lock;
+};
+
+struct knav_dma_chan {
+	enum dma_transfer_direction	direction;
+	struct knav_dma_device		*dma;
+	atomic_t			ref_count;
+
+	/* registers */
+	struct reg_chan __iomem		*reg_chan;
+	struct reg_tx_sched __iomem	*reg_tx_sched;
+	struct reg_rx_flow __iomem	*reg_rx_flow;
+
+	/* configuration stuff */
+	unsigned			channel, flow;
+	struct knav_dma_cfg		cfg;
+	struct list_head		list;
+	spinlock_t			lock;
+};
+
+#define chan_number(ch)	((ch->direction == DMA_MEM_TO_DEV) ? \
+			ch->channel : ch->flow)
+
+static struct knav_dma_pool_device *kdev;
+
+static bool check_config(struct knav_dma_chan *chan, struct knav_dma_cfg *cfg)
+{
+	if (!memcmp(&chan->cfg, cfg, sizeof(*cfg)))
+		return true;
+	else
+		return false;
+}
+
+static int chan_start(struct knav_dma_chan *chan,
+			struct knav_dma_cfg *cfg)
+{
+	u32 v = 0;
+
+	spin_lock(&chan->lock);
+	if ((chan->direction == DMA_MEM_TO_DEV) && chan->reg_chan) {
+		if (cfg->u.tx.filt_pswords)
+			v |= DMA_TX_FILT_PSWORDS;
+		if (cfg->u.tx.filt_einfo)
+			v |= DMA_TX_FILT_EINFO;
+		writel_relaxed(v, &chan->reg_chan->mode);
+		writel_relaxed(DMA_ENABLE, &chan->reg_chan->control);
+	}
+
+	if (chan->reg_tx_sched)
+		writel_relaxed(cfg->u.tx.priority, &chan->reg_tx_sched->prio);
+
+	if (chan->reg_rx_flow) {
+		v = 0;
+
+		if (cfg->u.rx.einfo_present)
+			v |= CHAN_HAS_EPIB;
+		if (cfg->u.rx.psinfo_present)
+			v |= CHAN_HAS_PSINFO;
+		if (cfg->u.rx.err_mode == DMA_RETRY)
+			v |= CHAN_ERR_RETRY;
+		v |= (cfg->u.rx.desc_type & DESC_TYPE_MASK) << DESC_TYPE_SHIFT;
+		if (cfg->u.rx.psinfo_at_sop)
+			v |= CHAN_PSINFO_AT_SOP;
+		v |= (cfg->u.rx.sop_offset & CHAN_SOP_OFF_MASK)
+			<< CHAN_SOP_OFF_SHIFT;
+		v |= cfg->u.rx.dst_q & CHAN_QNUM_MASK;
+
+		writel_relaxed(v, &chan->reg_rx_flow->control);
+		writel_relaxed(0, &chan->reg_rx_flow->tags);
+		writel_relaxed(0, &chan->reg_rx_flow->tag_sel);
+
+		v =  cfg->u.rx.fdq[0] << 16;
+		v |=  cfg->u.rx.fdq[1] & CHAN_QNUM_MASK;
+		writel_relaxed(v, &chan->reg_rx_flow->fdq_sel[0]);
+
+		v =  cfg->u.rx.fdq[2] << 16;
+		v |=  cfg->u.rx.fdq[3] & CHAN_QNUM_MASK;
+		writel_relaxed(v, &chan->reg_rx_flow->fdq_sel[1]);
+
+		writel_relaxed(0, &chan->reg_rx_flow->thresh[0]);
+		writel_relaxed(0, &chan->reg_rx_flow->thresh[1]);
+		writel_relaxed(0, &chan->reg_rx_flow->thresh[2]);
+	}
+
+	/* Keep a copy of the cfg */
+	memcpy(&chan->cfg, cfg, sizeof(*cfg));
+	spin_unlock(&chan->lock);
+
+	return 0;
+}
+
+static int chan_teardown(struct knav_dma_chan *chan)
+{
+	unsigned long end, value;
+
+	if (!chan->reg_chan)
+		return 0;
+
+	/* indicate teardown */
+	writel_relaxed(DMA_TEARDOWN, &chan->reg_chan->control);
+
+	/* wait for the dma to shut itself down */
+	end = jiffies + msecs_to_jiffies(DMA_TIMEOUT);
+	do {
+		value = readl_relaxed(&chan->reg_chan->control);
+		if ((value & DMA_ENABLE) == 0)
+			break;
+	} while (time_after(end, jiffies));
+
+	if (readl_relaxed(&chan->reg_chan->control) & DMA_ENABLE) {
+		dev_err(kdev->dev, "timeout waiting for teardown\n");
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+static void chan_stop(struct knav_dma_chan *chan)
+{
+	spin_lock(&chan->lock);
+	if (chan->reg_rx_flow) {
+		/* first detach fdqs, starve out the flow */
+		writel_relaxed(0, &chan->reg_rx_flow->fdq_sel[0]);
+		writel_relaxed(0, &chan->reg_rx_flow->fdq_sel[1]);
+		writel_relaxed(0, &chan->reg_rx_flow->thresh[0]);
+		writel_relaxed(0, &chan->reg_rx_flow->thresh[1]);
+		writel_relaxed(0, &chan->reg_rx_flow->thresh[2]);
+	}
+
+	/* teardown the dma channel */
+	chan_teardown(chan);
+
+	/* then disconnect the completion side */
+	if (chan->reg_rx_flow) {
+		writel_relaxed(0, &chan->reg_rx_flow->control);
+		writel_relaxed(0, &chan->reg_rx_flow->tags);
+		writel_relaxed(0, &chan->reg_rx_flow->tag_sel);
+	}
+
+	memset(&chan->cfg, 0, sizeof(struct knav_dma_cfg));
+	spin_unlock(&chan->lock);
+
+	dev_dbg(kdev->dev, "channel stopped\n");
+}
+
+static void dma_hw_enable_all(struct knav_dma_device *dma)
+{
+	int i;
+
+	for (i = 0; i < dma->max_tx_chan; i++) {
+		writel_relaxed(0, &dma->reg_tx_chan[i].mode);
+		writel_relaxed(DMA_ENABLE, &dma->reg_tx_chan[i].control);
+	}
+}
+
+
+static void knav_dma_hw_init(struct knav_dma_device *dma)
+{
+	unsigned v;
+	int i;
+
+	spin_lock(&dma->lock);
+	v  = dma->loopback ? DMA_LOOPBACK : 0;
+	writel_relaxed(v, &dma->reg_global->emulation_control);
+
+	v = readl_relaxed(&dma->reg_global->perf_control);
+	v |= ((dma->rx_timeout & DMA_RX_TIMEOUT_MASK) << DMA_RX_TIMEOUT_SHIFT);
+	writel_relaxed(v, &dma->reg_global->perf_control);
+
+	v = ((dma->tx_priority << DMA_TX_PRIO_SHIFT) |
+	     (dma->rx_priority << DMA_RX_PRIO_SHIFT));
+
+	writel_relaxed(v, &dma->reg_global->priority_control);
+
+	/* Always enable all Rx channels. Rx paths are managed using flows */
+	for (i = 0; i < dma->max_rx_chan; i++)
+		writel_relaxed(DMA_ENABLE, &dma->reg_rx_chan[i].control);
+
+	for (i = 0; i < dma->logical_queue_managers; i++)
+		writel_relaxed(dma->qm_base_address[i],
+			       &dma->reg_global->qm_base_address[i]);
+	spin_unlock(&dma->lock);
+}
+
+static void knav_dma_hw_destroy(struct knav_dma_device *dma)
+{
+	int i;
+	unsigned v;
+
+	spin_lock(&dma->lock);
+	v = ~DMA_ENABLE & REG_MASK;
+
+	for (i = 0; i < dma->max_rx_chan; i++)
+		writel_relaxed(v, &dma->reg_rx_chan[i].control);
+
+	for (i = 0; i < dma->max_tx_chan; i++)
+		writel_relaxed(v, &dma->reg_tx_chan[i].control);
+	spin_unlock(&dma->lock);
+}
+
+static void dma_debug_show_channels(struct seq_file *s,
+					struct knav_dma_chan *chan)
+{
+	int i;
+
+	seq_printf(s, "\t%s %d:\t",
+		((chan->direction == DMA_MEM_TO_DEV) ? "tx chan" : "rx flow"),
+		chan_number(chan));
+
+	if (chan->direction == DMA_MEM_TO_DEV) {
+		seq_printf(s, "einfo - %d, pswords - %d, priority - %d\n",
+			chan->cfg.u.tx.filt_einfo,
+			chan->cfg.u.tx.filt_pswords,
+			chan->cfg.u.tx.priority);
+	} else {
+		seq_printf(s, "einfo - %d, psinfo - %d, desc_type - %d\n",
+			chan->cfg.u.rx.einfo_present,
+			chan->cfg.u.rx.psinfo_present,
+			chan->cfg.u.rx.desc_type);
+		seq_printf(s, "\t\t\tdst_q: [%d], thresh: %d fdq: ",
+			chan->cfg.u.rx.dst_q,
+			chan->cfg.u.rx.thresh);
+		for (i = 0; i < KNAV_DMA_FDQ_PER_CHAN; i++)
+			seq_printf(s, "[%d]", chan->cfg.u.rx.fdq[i]);
+		seq_printf(s, "\n");
+	}
+}
+
+static void dma_debug_show_devices(struct seq_file *s,
+					struct knav_dma_device *dma)
+{
+	struct knav_dma_chan *chan;
+
+	list_for_each_entry(chan, &dma->chan_list, list) {
+		if (atomic_read(&chan->ref_count))
+			dma_debug_show_channels(s, chan);
+	}
+}
+
+static int dma_debug_show(struct seq_file *s, void *v)
+{
+	struct knav_dma_device *dma;
+
+	list_for_each_entry(dma, &kdev->list, list) {
+		if (atomic_read(&dma->ref_count)) {
+			seq_printf(s, "%s : max_tx_chan: (%d), max_rx_flows: (%d)\n",
+			dma->name, dma->max_tx_chan, dma->max_rx_flow);
+			dma_debug_show_devices(s, dma);
+		}
+	}
+
+	return 0;
+}
+
+static int knav_dma_debug_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, dma_debug_show, NULL);
+}
+
+static const struct file_operations knav_dma_debug_ops = {
+	.open		= knav_dma_debug_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int of_channel_match_helper(struct device_node *np, const char *name,
+					const char **dma_instance)
+{
+	struct of_phandle_args args;
+	struct device_node *dma_node;
+	int index;
+
+	dma_node = of_parse_phandle(np, "ti,navigator-dmas", 0);
+	if (!dma_node)
+		return -ENODEV;
+
+	*dma_instance = dma_node->name;
+	index = of_property_match_string(np, "ti,navigator-dma-names", name);
+	if (index < 0) {
+		dev_err(kdev->dev, "No 'ti,navigator-dma-names' propery\n");
+		return -ENODEV;
+	}
+
+	if (of_parse_phandle_with_fixed_args(np, "ti,navigator-dmas",
+					1, index, &args)) {
+		dev_err(kdev->dev, "Missing the pahndle args name %s\n", name);
+		return -ENODEV;
+	}
+
+	if (args.args[0] < 0) {
+		dev_err(kdev->dev, "Missing args for %s\n", name);
+		return -ENODEV;
+	}
+
+	return args.args[0];
+}
+
+/**
+ * knav_dma_open_channel() - try to setup an exclusive slave channel
+ * @dev:	pointer to client device structure
+ * @name:	slave channel name
+ * @config:	dma configuration parameters
+ *
+ * Returns pointer to appropriate DMA channel on success or NULL.
+ */
+void *knav_dma_open_channel(struct device *dev, const char *name,
+					struct knav_dma_cfg *config)
+{
+	struct knav_dma_chan *chan;
+	struct knav_dma_device *dma;
+	bool found = false;
+	int chan_num = -1;
+	const char *instance;
+
+	if (!kdev) {
+		pr_err("keystone-navigator-dma driver not registered\n");
+		return (void *)-EINVAL;
+	}
+
+	chan_num = of_channel_match_helper(dev->of_node, name, &instance);
+	if (chan_num < 0) {
+		dev_err(kdev->dev, "No DMA instace with name %s\n", name);
+		return (void *)-EINVAL;
+	}
+
+	dev_dbg(kdev->dev, "initializing %s channel %d from DMA %s\n",
+		  config->direction == DMA_MEM_TO_DEV   ? "transmit" :
+		  config->direction == DMA_DEV_TO_MEM ? "receive"  :
+		  "unknown", chan_num, instance);
+
+	if (config->direction != DMA_MEM_TO_DEV &&
+	    config->direction != DMA_DEV_TO_MEM) {
+		dev_err(kdev->dev, "bad direction\n");
+		return (void *)-EINVAL;
+	}
+
+	/* Look for correct dma instance */
+	list_for_each_entry(dma, &kdev->list, list) {
+		if (!strcmp(dma->name, instance)) {
+			found = true;
+			break;
+		}
+	}
+	if (!found) {
+		dev_err(kdev->dev, "No DMA instace with name %s\n", instance);
+		return (void *)-EINVAL;
+	}
+
+	/* Look for correct dma channel from dma instance */
+	found = false;
+	list_for_each_entry(chan, &dma->chan_list, list) {
+		if (config->direction == DMA_MEM_TO_DEV) {
+			if (chan->channel == chan_num) {
+				found = true;
+				break;
+			}
+		} else {
+			if (chan->flow == chan_num) {
+				found = true;
+				break;
+			}
+		}
+	}
+	if (!found) {
+		dev_err(kdev->dev, "channel %d is not in DMA %s\n",
+				chan_num, instance);
+		return (void *)-EINVAL;
+	}
+
+	if (atomic_read(&chan->ref_count) >= 1) {
+		if (!check_config(chan, config)) {
+			dev_err(kdev->dev, "channel %d config miss-match\n",
+				chan_num);
+			return (void *)-EINVAL;
+		}
+	}
+
+	if (atomic_inc_return(&chan->dma->ref_count) <= 1)
+		knav_dma_hw_init(chan->dma);
+
+	if (atomic_inc_return(&chan->ref_count) <= 1)
+		chan_start(chan, config);
+
+	dev_dbg(kdev->dev, "channel %d opened from DMA %s\n",
+				chan_num, instance);
+
+	return chan;
+}
+EXPORT_SYMBOL_GPL(knav_dma_open_channel);
+
+/**
+ * knav_dma_close_channel()	- Destroy a dma channel
+ *
+ * channel:	dma channel handle
+ *
+ */
+void knav_dma_close_channel(void *channel)
+{
+	struct knav_dma_chan *chan = channel;
+
+	if (!kdev) {
+		pr_err("keystone-navigator-dma driver not registered\n");
+		return;
+	}
+
+	if (atomic_dec_return(&chan->ref_count) <= 0)
+		chan_stop(chan);
+
+	if (atomic_dec_return(&chan->dma->ref_count) <= 0)
+		knav_dma_hw_destroy(chan->dma);
+
+	dev_dbg(kdev->dev, "channel %d or flow %d closed from DMA %s\n",
+			chan->channel, chan->flow, chan->dma->name);
+}
+EXPORT_SYMBOL_GPL(knav_dma_close_channel);
+
+static void __iomem *pktdma_get_regs(struct knav_dma_device *dma,
+				struct device_node *node,
+				unsigned index, resource_size_t *_size)
+{
+	struct device *dev = kdev->dev;
+	struct resource res;
+	void __iomem *regs;
+	int ret;
+
+	ret = of_address_to_resource(node, index, &res);
+	if (ret) {
+		dev_err(dev, "Can't translate of node(%s) address for index(%d)\n",
+			node->name, index);
+		return ERR_PTR(ret);
+	}
+
+	regs = devm_ioremap_resource(kdev->dev, &res);
+	if (IS_ERR(regs))
+		dev_err(dev, "Failed to map register base for index(%d) node(%s)\n",
+			index, node->name);
+	if (_size)
+		*_size = resource_size(&res);
+
+	return regs;
+}
+
+static int pktdma_init_rx_chan(struct knav_dma_chan *chan, u32 flow)
+{
+	struct knav_dma_device *dma = chan->dma;
+
+	chan->flow = flow;
+	chan->reg_rx_flow = dma->reg_rx_flow + flow;
+	chan->channel = DMA_INVALID_ID;
+	dev_dbg(kdev->dev, "rx flow(%d) (%p)\n", chan->flow, chan->reg_rx_flow);
+
+	return 0;
+}
+
+static int pktdma_init_tx_chan(struct knav_dma_chan *chan, u32 channel)
+{
+	struct knav_dma_device *dma = chan->dma;
+
+	chan->channel = channel;
+	chan->reg_chan = dma->reg_tx_chan + channel;
+	chan->reg_tx_sched = dma->reg_tx_sched + channel;
+	chan->flow = DMA_INVALID_ID;
+	dev_dbg(kdev->dev, "tx channel(%d) (%p)\n", chan->channel, chan->reg_chan);
+
+	return 0;
+}
+
+static int pktdma_init_chan(struct knav_dma_device *dma,
+				enum dma_transfer_direction dir,
+				unsigned chan_num)
+{
+	struct device *dev = kdev->dev;
+	struct knav_dma_chan *chan;
+	int ret = -EINVAL;
+
+	chan = devm_kzalloc(dev, sizeof(*chan), GFP_KERNEL);
+	if (!chan)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&chan->list);
+	chan->dma	= dma;
+	chan->direction	= DMA_NONE;
+	atomic_set(&chan->ref_count, 0);
+	spin_lock_init(&chan->lock);
+
+	if (dir == DMA_MEM_TO_DEV) {
+		chan->direction = dir;
+		ret = pktdma_init_tx_chan(chan, chan_num);
+	} else if (dir == DMA_DEV_TO_MEM) {
+		chan->direction = dir;
+		ret = pktdma_init_rx_chan(chan, chan_num);
+	} else {
+		dev_err(dev, "channel(%d) direction unknown\n", chan_num);
+	}
+
+	list_add_tail(&chan->list, &dma->chan_list);
+
+	return ret;
+}
+
+static int dma_init(struct device_node *cloud, struct device_node *dma_node)
+{
+	unsigned max_tx_chan, max_rx_chan, max_rx_flow, max_tx_sched;
+	struct device_node *node = dma_node;
+	struct knav_dma_device *dma;
+	int ret, len, num_chan = 0;
+	resource_size_t size;
+	u32 timeout;
+	u32 i;
+
+	dma = devm_kzalloc(kdev->dev, sizeof(*dma), GFP_KERNEL);
+	if (!dma) {
+		dev_err(kdev->dev, "could not allocate driver mem\n");
+		return -ENOMEM;
+	}
+	INIT_LIST_HEAD(&dma->list);
+	INIT_LIST_HEAD(&dma->chan_list);
+
+	if (!of_find_property(cloud, "ti,navigator-cloud-address", &len)) {
+		dev_err(kdev->dev, "unspecified navigator cloud addresses\n");
+		return -ENODEV;
+	}
+
+	dma->logical_queue_managers = len / sizeof(u32);
+	if (dma->logical_queue_managers > DMA_MAX_QMS) {
+		dev_warn(kdev->dev, "too many queue mgrs(>%d) rest ignored\n",
+			 dma->logical_queue_managers);
+		dma->logical_queue_managers = DMA_MAX_QMS;
+	}
+
+	ret = of_property_read_u32_array(cloud, "ti,navigator-cloud-address",
+					dma->qm_base_address,
+					dma->logical_queue_managers);
+	if (ret) {
+		dev_err(kdev->dev, "invalid navigator cloud addresses\n");
+		return -ENODEV;
+	}
+
+	dma->reg_global	 = pktdma_get_regs(dma, node, 0, &size);
+	if (!dma->reg_global)
+		return -ENODEV;
+	if (size < sizeof(struct reg_global)) {
+		dev_err(kdev->dev, "bad size %pa for global regs\n", &size);
+		return -ENODEV;
+	}
+
+	dma->reg_tx_chan = pktdma_get_regs(dma, node, 1, &size);
+	if (!dma->reg_tx_chan)
+		return -ENODEV;
+
+	max_tx_chan = size / sizeof(struct reg_chan);
+	dma->reg_rx_chan = pktdma_get_regs(dma, node, 2, &size);
+	if (!dma->reg_rx_chan)
+		return -ENODEV;
+
+	max_rx_chan = size / sizeof(struct reg_chan);
+	dma->reg_tx_sched = pktdma_get_regs(dma, node, 3, &size);
+	if (!dma->reg_tx_sched)
+		return -ENODEV;
+
+	max_tx_sched = size / sizeof(struct reg_tx_sched);
+	dma->reg_rx_flow = pktdma_get_regs(dma, node, 4, &size);
+	if (!dma->reg_rx_flow)
+		return -ENODEV;
+
+	max_rx_flow = size / sizeof(struct reg_rx_flow);
+	dma->rx_priority = DMA_PRIO_DEFAULT;
+	dma->tx_priority = DMA_PRIO_DEFAULT;
+
+	dma->enable_all	= (of_get_property(node, "ti,enable-all", NULL) != NULL);
+	dma->loopback	= (of_get_property(node, "ti,loop-back",  NULL) != NULL);
+
+	ret = of_property_read_u32(node, "ti,rx-retry-timeout", &timeout);
+	if (ret < 0) {
+		dev_dbg(kdev->dev, "unspecified rx timeout using value %d\n",
+			DMA_RX_TIMEOUT_DEFAULT);
+		timeout = DMA_RX_TIMEOUT_DEFAULT;
+	}
+
+	dma->rx_timeout = timeout;
+	dma->max_rx_chan = max_rx_chan;
+	dma->max_rx_flow = max_rx_flow;
+	dma->max_tx_chan = min(max_tx_chan, max_tx_sched);
+	atomic_set(&dma->ref_count, 0);
+	strcpy(dma->name, node->name);
+	spin_lock_init(&dma->lock);
+
+	for (i = 0; i < dma->max_tx_chan; i++) {
+		if (pktdma_init_chan(dma, DMA_MEM_TO_DEV, i) >= 0)
+			num_chan++;
+	}
+
+	for (i = 0; i < dma->max_rx_flow; i++) {
+		if (pktdma_init_chan(dma, DMA_DEV_TO_MEM, i) >= 0)
+			num_chan++;
+	}
+
+	list_add_tail(&dma->list, &kdev->list);
+
+	/*
+	 * For DSP software usecases or userpace transport software, setup all
+	 * the DMA hardware resources.
+	 */
+	if (dma->enable_all) {
+		atomic_inc(&dma->ref_count);
+		knav_dma_hw_init(dma);
+		dma_hw_enable_all(dma);
+	}
+
+	dev_info(kdev->dev, "DMA %s registered %d logical channels, flows %d, tx chans: %d, rx chans: %d%s\n",
+		dma->name, num_chan, dma->max_rx_flow,
+		dma->max_tx_chan, dma->max_rx_chan,
+		dma->loopback ? ", loopback" : "");
+
+	return 0;
+}
+
+static int knav_dma_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *node = pdev->dev.of_node;
+	struct device_node *child;
+	int ret = 0;
+
+	if (!node) {
+		dev_err(&pdev->dev, "could not find device info\n");
+		return -EINVAL;
+	}
+
+	kdev = devm_kzalloc(dev,
+			sizeof(struct knav_dma_pool_device), GFP_KERNEL);
+	if (!kdev) {
+		dev_err(dev, "could not allocate driver mem\n");
+		return -ENOMEM;
+	}
+
+	kdev->dev = dev;
+	INIT_LIST_HEAD(&kdev->list);
+
+	pm_runtime_enable(kdev->dev);
+	ret = pm_runtime_get_sync(kdev->dev);
+	if (ret < 0) {
+		dev_err(kdev->dev, "unable to enable pktdma, err %d\n", ret);
+		return ret;
+	}
+
+	/* Initialise all packet dmas */
+	for_each_child_of_node(node, child) {
+		ret = dma_init(node, child);
+		if (ret) {
+			dev_err(&pdev->dev, "init failed with %d\n", ret);
+			break;
+		}
+	}
+
+	if (list_empty(&kdev->list)) {
+		dev_err(dev, "no valid dma instance\n");
+		return -ENODEV;
+	}
+
+	debugfs_create_file("knav_dma", S_IFREG | S_IRUGO, NULL, NULL,
+			    &knav_dma_debug_ops);
+
+	return ret;
+}
+
+static int knav_dma_remove(struct platform_device *pdev)
+{
+	struct knav_dma_device *dma;
+
+	list_for_each_entry(dma, &kdev->list, list) {
+		if (atomic_dec_return(&dma->ref_count) == 0)
+			knav_dma_hw_destroy(dma);
+	}
+
+	pm_runtime_put_sync(&pdev->dev);
+	pm_runtime_disable(&pdev->dev);
+
+	return 0;
+}
+
+static struct of_device_id of_match[] = {
+	{ .compatible = "ti,keystone-navigator-dma", },
+	{},
+};
+
+MODULE_DEVICE_TABLE(of, of_match);
+
+static struct platform_driver knav_dma_driver = {
+	.probe	= knav_dma_probe,
+	.remove	= knav_dma_remove,
+	.driver = {
+		.name		= "keystone-navigator-dma",
+		.owner		= THIS_MODULE,
+		.of_match_table	= of_match,
+	},
+};
+module_platform_driver(knav_dma_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("TI Keystone Navigator Packet DMA driver");
+MODULE_AUTHOR("Sandeep Nair <sandeep_n@ti.com>");
+MODULE_AUTHOR("Santosh Shilimkar <santosh.shilimkar@ti.com>");
diff --git a/include/linux/soc/ti/knav_dma.h b/include/linux/soc/ti/knav_dma.h
new file mode 100644
index 000000000000..e864a3eb9ac4
--- /dev/null
+++ b/include/linux/soc/ti/knav_dma.h
@@ -0,0 +1,175 @@
+/*
+ * Copyright (C) 2014 Texas Instruments Incorporated
+ * Authors:	Sandeep Nair <sandeep_n@ti.com
+ *		Cyril Chemparathy <cyril@ti.com
+		Santosh Shilimkar <santosh.shilimkar@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __SOC_TI_KEYSTONE_NAVIGATOR_DMA_H__
+#define __SOC_TI_KEYSTONE_NAVIGATOR_DMA_H__
+
+/*
+ * PKTDMA descriptor manipulation macros for host packet descriptor
+ */
+#define MASK(x)					(BIT(x) - 1)
+#define KNAV_DMA_DESC_PKT_LEN_MASK		MASK(22)
+#define KNAV_DMA_DESC_PKT_LEN_SHIFT		0
+#define KNAV_DMA_DESC_PS_INFO_IN_SOP		BIT(22)
+#define KNAV_DMA_DESC_PS_INFO_IN_DESC		0
+#define KNAV_DMA_DESC_TAG_MASK			MASK(8)
+#define KNAV_DMA_DESC_SAG_HI_SHIFT		24
+#define KNAV_DMA_DESC_STAG_LO_SHIFT		16
+#define KNAV_DMA_DESC_DTAG_HI_SHIFT		8
+#define KNAV_DMA_DESC_DTAG_LO_SHIFT		0
+#define KNAV_DMA_DESC_HAS_EPIB			BIT(31)
+#define KNAV_DMA_DESC_NO_EPIB			0
+#define KNAV_DMA_DESC_PSLEN_SHIFT		24
+#define KNAV_DMA_DESC_PSLEN_MASK		MASK(6)
+#define KNAV_DMA_DESC_ERR_FLAG_SHIFT		20
+#define KNAV_DMA_DESC_ERR_FLAG_MASK		MASK(4)
+#define KNAV_DMA_DESC_PSFLAG_SHIFT		16
+#define KNAV_DMA_DESC_PSFLAG_MASK		MASK(4)
+#define KNAV_DMA_DESC_RETQ_SHIFT		0
+#define KNAV_DMA_DESC_RETQ_MASK			MASK(14)
+#define KNAV_DMA_DESC_BUF_LEN_MASK		MASK(22)
+
+#define KNAV_DMA_NUM_EPIB_WORDS			4
+#define KNAV_DMA_NUM_PS_WORDS			16
+#define KNAV_DMA_FDQ_PER_CHAN			4
+
+/* Tx channel scheduling priority */
+enum knav_dma_tx_priority {
+	DMA_PRIO_HIGH	= 0,
+	DMA_PRIO_MED_H,
+	DMA_PRIO_MED_L,
+	DMA_PRIO_LOW
+};
+
+/* Rx channel error handling mode during buffer starvation */
+enum knav_dma_rx_err_mode {
+	DMA_DROP = 0,
+	DMA_RETRY
+};
+
+/* Rx flow size threshold configuration */
+enum knav_dma_rx_thresholds {
+	DMA_THRESH_NONE		= 0,
+	DMA_THRESH_0		= 1,
+	DMA_THRESH_0_1		= 3,
+	DMA_THRESH_0_1_2	= 7
+};
+
+/* Descriptor type */
+enum knav_dma_desc_type {
+	DMA_DESC_HOST = 0,
+	DMA_DESC_MONOLITHIC = 2
+};
+
+/**
+ * struct knav_dma_tx_cfg:	Tx channel configuration
+ * @filt_einfo:			Filter extended packet info
+ * @filt_pswords:		Filter PS words present
+ * @knav_dma_tx_priority:	Tx channel scheduling priority
+ */
+struct knav_dma_tx_cfg {
+	bool				filt_einfo;
+	bool				filt_pswords;
+	enum knav_dma_tx_priority	priority;
+};
+
+/**
+ * struct knav_dma_rx_cfg:	Rx flow configuration
+ * @einfo_present:		Extended packet info present
+ * @psinfo_present:		PS words present
+ * @knav_dma_rx_err_mode:	Error during buffer starvation
+ * @knav_dma_desc_type:	Host or Monolithic desc
+ * @psinfo_at_sop:		PS word located at start of packet
+ * @sop_offset:			Start of packet offset
+ * @dst_q:			Destination queue for a given flow
+ * @thresh:			Rx flow size threshold
+ * @fdq[]:			Free desc Queue array
+ * @sz_thresh0:			RX packet size threshold 0
+ * @sz_thresh1:			RX packet size threshold 1
+ * @sz_thresh2:			RX packet size threshold 2
+ */
+struct knav_dma_rx_cfg {
+	bool				einfo_present;
+	bool				psinfo_present;
+	enum knav_dma_rx_err_mode	err_mode;
+	enum knav_dma_desc_type		desc_type;
+	bool				psinfo_at_sop;
+	unsigned int			sop_offset;
+	unsigned int			dst_q;
+	enum knav_dma_rx_thresholds	thresh;
+	unsigned int			fdq[KNAV_DMA_FDQ_PER_CHAN];
+	unsigned int			sz_thresh0;
+	unsigned int			sz_thresh1;
+	unsigned int			sz_thresh2;
+};
+
+/**
+ * struct knav_dma_cfg:	Pktdma channel configuration
+ * @sl_cfg:			Slave configuration
+ * @tx:				Tx channel configuration
+ * @rx:				Rx flow configuration
+ */
+struct knav_dma_cfg {
+	enum dma_transfer_direction direction;
+	union {
+		struct knav_dma_tx_cfg	tx;
+		struct knav_dma_rx_cfg	rx;
+	} u;
+};
+
+/**
+ * struct knav_dma_desc:	Host packet descriptor layout
+ * @desc_info:			Descriptor information like id, type, length
+ * @tag_info:			Flow tag info written in during RX
+ * @packet_info:		Queue Manager, policy, flags etc
+ * @buff_len:			Buffer length in bytes
+ * @buff:			Buffer pointer
+ * @next_desc:			For chaining the descriptors
+ * @orig_len:			length since 'buff_len' can be overwritten
+ * @orig_buff:			buff pointer since 'buff' can be overwritten
+ * @epib:			Extended packet info block
+ * @psdata:			Protocol specific
+ */
+struct knav_dma_desc {
+	u32	desc_info;
+	u32	tag_info;
+	u32	packet_info;
+	u32	buff_len;
+	u32	buff;
+	u32	next_desc;
+	u32	orig_len;
+	u32	orig_buff;
+	u32	epib[KNAV_DMA_NUM_EPIB_WORDS];
+	u32	psdata[KNAV_DMA_NUM_PS_WORDS];
+	u32	pad[4];
+} ____cacheline_aligned;
+
+#ifdef CONFIG_KEYSTONE_NAVIGATOR_DMA
+void *knav_dma_open_channel(struct device *dev, const char *name,
+				struct knav_dma_cfg *config);
+void knav_dma_close_channel(void *channel);
+#else
+static inline void *knav_dma_open_channel(struct device *dev, const char *name,
+				struct knav_dma_cfg *config)
+{
+	return (void *) NULL;
+}
+static inline void knav_dma_close_channel(void *channel)
+{}
+
+#endif
+
+#endif /* __SOC_TI_KEYSTONE_NAVIGATOR_DMA_H__ */
-- 
cgit v1.2.3


From b8f139f68f2099b7f8b4ef470a1e53210e3aa025 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Date: Fri, 12 Sep 2014 08:53:56 +0200
Subject: regulator/mfd: max14577: Export symbols for calculating charger
 current

This patch prepares for changing the max14577 charger driver to allow
configuring battery-dependent settings from DTS.

The patch moves from regulator driver to MFD core driver and exports:
 - function for calculating register value for charger's current;
 - table of limits for chargers (MAX14577, MAX77836).

Previously they were used only by the max14577 regulator driver. In next
patch the charger driver will use them as well. Exporting them will
reduce unnecessary code duplication.

Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Acked-by: Mark Brown <broonie@linaro.org>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/max14577.c               | 95 ++++++++++++++++++++++++++++++++++++
 drivers/regulator/max14577.c         | 80 ++----------------------------
 include/linux/mfd/max14577-private.h | 22 ++++-----
 include/linux/mfd/max14577.h         | 23 +++++++++
 4 files changed, 133 insertions(+), 87 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/max14577.c b/drivers/mfd/max14577.c
index 6599407b5624..b8af263be594 100644
--- a/drivers/mfd/max14577.c
+++ b/drivers/mfd/max14577.c
@@ -26,6 +26,87 @@
 #include <linux/mfd/max14577.h>
 #include <linux/mfd/max14577-private.h>
 
+/*
+ * Table of valid charger currents for different Maxim chipsets.
+ * It is placed here because it is used by both charger and regulator driver.
+ */
+const struct maxim_charger_current maxim_charger_currents[] = {
+	[MAXIM_DEVICE_TYPE_UNKNOWN] = { 0, 0, 0, 0 },
+	[MAXIM_DEVICE_TYPE_MAX14577] = {
+		.min		= MAX14577_CHARGER_CURRENT_LIMIT_MIN,
+		.high_start	= MAX14577_CHARGER_CURRENT_LIMIT_HIGH_START,
+		.high_step	= MAX14577_CHARGER_CURRENT_LIMIT_HIGH_STEP,
+		.max		= MAX14577_CHARGER_CURRENT_LIMIT_MAX,
+	},
+	[MAXIM_DEVICE_TYPE_MAX77836] = {
+		.min		= MAX77836_CHARGER_CURRENT_LIMIT_MIN,
+		.high_start	= MAX77836_CHARGER_CURRENT_LIMIT_HIGH_START,
+		.high_step	= MAX77836_CHARGER_CURRENT_LIMIT_HIGH_STEP,
+		.max		= MAX77836_CHARGER_CURRENT_LIMIT_MAX,
+	},
+};
+EXPORT_SYMBOL_GPL(maxim_charger_currents);
+
+/*
+ * maxim_charger_calc_reg_current - Calculate register value for current
+ * @limits:	constraints for charger, matching the MBCICHWRC register
+ * @min_ua:	minimal requested current, micro Amps
+ * @max_ua:	maximum requested current, micro Amps
+ * @dst:	destination to store calculated register value
+ *
+ * Calculates the value of MBCICHWRC (Fast Battery Charge Current) register
+ * for given current and stores it under pointed 'dst'. The stored value
+ * combines low bit (MBCICHWRCL) and high bits (MBCICHWRCH). It is also
+ * properly shifted.
+ *
+ * The calculated register value matches the current which:
+ *  - is always between <limits.min, limits.max>;
+ *  - is always less or equal to max_ua;
+ *  - is the highest possible value;
+ *  - may be lower than min_ua.
+ *
+ * On success returns 0. On error returns -EINVAL (requested min/max current
+ * is outside of given charger limits) and 'dst' is not set.
+ */
+int maxim_charger_calc_reg_current(const struct maxim_charger_current *limits,
+		unsigned int min_ua, unsigned int max_ua, u8 *dst)
+{
+	unsigned int current_bits = 0xf;
+
+	if (min_ua > max_ua)
+		return -EINVAL;
+
+	if (min_ua > limits->max || max_ua < limits->min)
+		return -EINVAL;
+
+	if (max_ua < limits->high_start) {
+		/*
+		 * Less than high_start, so set the minimal current
+		 * (turn Low Bit off, 0 as high bits).
+		 */
+		*dst = 0x0;
+		return 0;
+	}
+
+	/* max_ua is in range: <high_start, infinite>, cut it to limits.max */
+	max_ua = min(limits->max, max_ua);
+	max_ua -= limits->high_start;
+	/*
+	 * There is no risk of overflow 'max_ua' here because:
+	 *  - max_ua >= limits.high_start
+	 *  - BUILD_BUG checks that 'limits' are: max >= high_start + high_step
+	 */
+	current_bits = max_ua / limits->high_step;
+
+	/* Turn Low Bit on (use range <limits.high_start, limits.max>) ... */
+	*dst = 0x1 << CHGCTRL4_MBCICHWRCL_SHIFT;
+	/* and set proper High Bits */
+	*dst |= current_bits << CHGCTRL4_MBCICHWRCH_SHIFT;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(maxim_charger_calc_reg_current);
+
 static const struct mfd_cell max14577_devs[] = {
 	{
 		.name = "max14577-muic",
@@ -466,6 +547,20 @@ static int __init max14577_i2c_init(void)
 	BUILD_BUG_ON(ARRAY_SIZE(max14577_i2c_id) != MAXIM_DEVICE_TYPE_NUM);
 	BUILD_BUG_ON(ARRAY_SIZE(max14577_dt_match) != MAXIM_DEVICE_TYPE_NUM);
 
+	/* Valid charger current values must be provided for each chipset */
+	BUILD_BUG_ON(ARRAY_SIZE(maxim_charger_currents) != MAXIM_DEVICE_TYPE_NUM);
+
+	/* Check for valid values for charger */
+	BUILD_BUG_ON(MAX14577_CHARGER_CURRENT_LIMIT_HIGH_START +
+			MAX14577_CHARGER_CURRENT_LIMIT_HIGH_STEP * 0xf !=
+			MAX14577_CHARGER_CURRENT_LIMIT_MAX);
+	BUILD_BUG_ON(MAX14577_CHARGER_CURRENT_LIMIT_HIGH_STEP == 0);
+
+	BUILD_BUG_ON(MAX77836_CHARGER_CURRENT_LIMIT_HIGH_START +
+			MAX77836_CHARGER_CURRENT_LIMIT_HIGH_STEP * 0xf !=
+			MAX77836_CHARGER_CURRENT_LIMIT_MAX);
+	BUILD_BUG_ON(MAX77836_CHARGER_CURRENT_LIMIT_HIGH_STEP == 0);
+
 	return i2c_add_driver(&max14577_i2c_driver);
 }
 subsys_initcall(max14577_i2c_init);
diff --git a/drivers/regulator/max14577.c b/drivers/regulator/max14577.c
index 5d9c605cf534..0ff5a20ac958 100644
--- a/drivers/regulator/max14577.c
+++ b/drivers/regulator/max14577.c
@@ -22,42 +22,6 @@
 #include <linux/mfd/max14577-private.h>
 #include <linux/regulator/of_regulator.h>
 
-/*
- * Valid limits of current for max14577 and max77836 chargers.
- * They must correspond to MBCICHWRCL and MBCICHWRCH fields in CHGCTRL4
- * register for given chipset.
- */
-struct maxim_charger_current {
-	/* Minimal current, set in CHGCTRL4/MBCICHWRCL, uA */
-	unsigned int min;
-	/*
-	 * Minimal current when high setting is active,
-	 * set in CHGCTRL4/MBCICHWRCH, uA
-	 */
-	unsigned int high_start;
-	/* Value of one step in high setting, uA */
-	unsigned int high_step;
-	/* Maximum current of high setting, uA */
-	unsigned int max;
-};
-
-/* Table of valid charger currents for different Maxim chipsets */
-static const struct maxim_charger_current maxim_charger_currents[] = {
-	[MAXIM_DEVICE_TYPE_UNKNOWN] = { 0, 0, 0, 0 },
-	[MAXIM_DEVICE_TYPE_MAX14577] = {
-		.min		= MAX14577_REGULATOR_CURRENT_LIMIT_MIN,
-		.high_start	= MAX14577_REGULATOR_CURRENT_LIMIT_HIGH_START,
-		.high_step	= MAX14577_REGULATOR_CURRENT_LIMIT_HIGH_STEP,
-		.max		= MAX14577_REGULATOR_CURRENT_LIMIT_MAX,
-	},
-	[MAXIM_DEVICE_TYPE_MAX77836] = {
-		.min		= MAX77836_REGULATOR_CURRENT_LIMIT_MIN,
-		.high_start	= MAX77836_REGULATOR_CURRENT_LIMIT_HIGH_START,
-		.high_step	= MAX77836_REGULATOR_CURRENT_LIMIT_HIGH_STEP,
-		.max		= MAX77836_REGULATOR_CURRENT_LIMIT_MAX,
-	},
-};
-
 static int max14577_reg_is_enabled(struct regulator_dev *rdev)
 {
 	int rid = rdev_get_id(rdev);
@@ -103,8 +67,8 @@ static int max14577_reg_get_current_limit(struct regulator_dev *rdev)
 static int max14577_reg_set_current_limit(struct regulator_dev *rdev,
 		int min_uA, int max_uA)
 {
-	int i, current_bits = 0xf;
 	u8 reg_data;
+	int ret;
 	struct max14577 *max14577 = rdev_get_drvdata(rdev);
 	const struct maxim_charger_current *limits =
 		&maxim_charger_currents[max14577->dev_type];
@@ -112,35 +76,9 @@ static int max14577_reg_set_current_limit(struct regulator_dev *rdev,
 	if (rdev_get_id(rdev) != MAX14577_CHARGER)
 		return -EINVAL;
 
-	if (min_uA > limits->max || max_uA < limits->min)
-		return -EINVAL;
-
-	if (max_uA < limits->high_start) {
-		/*
-		 * Less than high_start,
-		 * so set the minimal current (turn only Low Bit off)
-		 */
-		u8 reg_data = 0x0 << CHGCTRL4_MBCICHWRCL_SHIFT;
-		return max14577_update_reg(rdev->regmap,
-				MAX14577_CHG_REG_CHG_CTRL4,
-				CHGCTRL4_MBCICHWRCL_MASK, reg_data);
-	}
-
-	/*
-	 * max_uA is in range: <high_start, inifinite>, so search for
-	 * valid current starting from maximum current.
-	 */
-	for (i = limits->max; i >= limits->high_start; i -= limits->high_step) {
-		if (i <= max_uA)
-			break;
-		current_bits--;
-	}
-	BUG_ON(current_bits < 0); /* Cannot happen */
-
-	/* Turn Low Bit on (use range high_start-max)... */
-	reg_data = 0x1 << CHGCTRL4_MBCICHWRCL_SHIFT;
-	/* and set proper High Bits */
-	reg_data |= current_bits << CHGCTRL4_MBCICHWRCH_SHIFT;
+	ret = maxim_charger_calc_reg_current(limits, min_uA, max_uA, &reg_data);
+	if (ret)
+		return ret;
 
 	return max14577_update_reg(rdev->regmap, MAX14577_CHG_REG_CHG_CTRL4,
 			CHGCTRL4_MBCICHWRCL_MASK | CHGCTRL4_MBCICHWRCH_MASK,
@@ -442,16 +380,6 @@ static struct platform_driver max14577_regulator_driver = {
 
 static int __init max14577_regulator_init(void)
 {
-	/* Check for valid values for charger */
-	BUILD_BUG_ON(MAX14577_REGULATOR_CURRENT_LIMIT_HIGH_START +
-			MAX14577_REGULATOR_CURRENT_LIMIT_HIGH_STEP * 0xf !=
-			MAX14577_REGULATOR_CURRENT_LIMIT_MAX);
-	BUILD_BUG_ON(MAX77836_REGULATOR_CURRENT_LIMIT_HIGH_START +
-			MAX77836_REGULATOR_CURRENT_LIMIT_HIGH_STEP * 0xf !=
-			MAX77836_REGULATOR_CURRENT_LIMIT_MAX);
-	/* Valid charger current values must be provided for each chipset */
-	BUILD_BUG_ON(ARRAY_SIZE(maxim_charger_currents) != MAXIM_DEVICE_TYPE_NUM);
-
 	BUILD_BUG_ON(ARRAY_SIZE(max14577_supported_regulators) != MAX14577_REGULATOR_NUM);
 	BUILD_BUG_ON(ARRAY_SIZE(max77836_supported_regulators) != MAX77836_REGULATOR_NUM);
 
diff --git a/include/linux/mfd/max14577-private.h b/include/linux/mfd/max14577-private.h
index d6f321699b89..7d514839c764 100644
--- a/include/linux/mfd/max14577-private.h
+++ b/include/linux/mfd/max14577-private.h
@@ -281,17 +281,17 @@ enum max14577_charger_reg {
 #define CHGCTRL7_OTPCGHCVS_SHIFT	0
 #define CHGCTRL7_OTPCGHCVS_MASK		(0x3 << CHGCTRL7_OTPCGHCVS_SHIFT)
 
-/* MAX14577 regulator current limits (as in CHGCTRL4 register), uA */
-#define MAX14577_REGULATOR_CURRENT_LIMIT_MIN		 90000
-#define MAX14577_REGULATOR_CURRENT_LIMIT_HIGH_START	200000
-#define MAX14577_REGULATOR_CURRENT_LIMIT_HIGH_STEP	 50000
-#define MAX14577_REGULATOR_CURRENT_LIMIT_MAX		950000
-
-/* MAX77836 regulator current limits (as in CHGCTRL4 register), uA */
-#define MAX77836_REGULATOR_CURRENT_LIMIT_MIN		 45000
-#define MAX77836_REGULATOR_CURRENT_LIMIT_HIGH_START	100000
-#define MAX77836_REGULATOR_CURRENT_LIMIT_HIGH_STEP	 25000
-#define MAX77836_REGULATOR_CURRENT_LIMIT_MAX		475000
+/* MAX14577 charger current limits (as in CHGCTRL4 register), uA */
+#define MAX14577_CHARGER_CURRENT_LIMIT_MIN		 90000U
+#define MAX14577_CHARGER_CURRENT_LIMIT_HIGH_START	200000U
+#define MAX14577_CHARGER_CURRENT_LIMIT_HIGH_STEP	 50000U
+#define MAX14577_CHARGER_CURRENT_LIMIT_MAX		950000U
+
+/* MAX77836 charger current limits (as in CHGCTRL4 register), uA */
+#define MAX77836_CHARGER_CURRENT_LIMIT_MIN		 45000U
+#define MAX77836_CHARGER_CURRENT_LIMIT_HIGH_START	100000U
+#define MAX77836_CHARGER_CURRENT_LIMIT_HIGH_STEP	 25000U
+#define MAX77836_CHARGER_CURRENT_LIMIT_MAX		475000U
 
 /* MAX14577 regulator SFOUT LDO voltage, fixed, uV */
 #define MAX14577_REGULATOR_SAFEOUT_VOLTAGE		4900000
diff --git a/include/linux/mfd/max14577.h b/include/linux/mfd/max14577.h
index c83fbed1c7b6..3c098d57b1d1 100644
--- a/include/linux/mfd/max14577.h
+++ b/include/linux/mfd/max14577.h
@@ -74,4 +74,27 @@ struct max14577_platform_data {
 	struct max14577_regulator_platform_data *regulators;
 };
 
+/*
+ * Valid limits of current for max14577 and max77836 chargers.
+ * They must correspond to MBCICHWRCL and MBCICHWRCH fields in CHGCTRL4
+ * register for given chipset.
+ */
+struct maxim_charger_current {
+	/* Minimal current, set in CHGCTRL4/MBCICHWRCL, uA */
+	unsigned int min;
+	/*
+	 * Minimal current when high setting is active,
+	 * set in CHGCTRL4/MBCICHWRCH, uA
+	 */
+	unsigned int high_start;
+	/* Value of one step in high setting, uA */
+	unsigned int high_step;
+	/* Maximum current of high setting, uA */
+	unsigned int max;
+};
+
+extern const struct maxim_charger_current maxim_charger_currents[];
+extern int maxim_charger_calc_reg_current(const struct maxim_charger_current *limits,
+		unsigned int min_ua, unsigned int max_ua, u8 *dst);
+
 #endif /* __MAX14577_H__ */
-- 
cgit v1.2.3


From e30110e9c96f48aea01abc3e6dfadb369cbafec3 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Date: Fri, 12 Sep 2014 08:53:57 +0200
Subject: charger: max14577: Configure battery-dependent settings from DTS and
 sysfs

Remove hard-coded values for:
 - Fast Charge current,
 - End Of Charge current,
 - Fast Charge timer,
 - Overvoltage Protection Threshold,
 - Battery Constant Voltage,
and use DTS or sysfs to configure them. This allows using the max14577 charger
driver with different batteries.

Now the charger driver requires valid configuration data from DTS. In
case of wrong configuration data it fails during probe.

The fast charge timer is configured through sysfs entry.

Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/power/Kconfig                |   1 +
 drivers/power/max14577_charger.c     | 311 +++++++++++++++++++++++++++++++----
 include/linux/mfd/max14577-private.h |  19 +++
 include/linux/mfd/max14577.h         |   7 +
 4 files changed, 310 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/power/Kconfig b/drivers/power/Kconfig
index 69fa8a9ef7a6..04e1d2fe2201 100644
--- a/drivers/power/Kconfig
+++ b/drivers/power/Kconfig
@@ -327,6 +327,7 @@ config CHARGER_MANAGER
 config CHARGER_MAX14577
 	tristate "Maxim MAX14577/77836 battery charger driver"
 	depends on MFD_MAX14577
+	select SYSFS
 	help
 	  Say Y to enable support for the battery charger control sysfs and
 	  platform data of MAX14577/77836 MUICs.
diff --git a/drivers/power/max14577_charger.c b/drivers/power/max14577_charger.c
index 193530549b1f..0a2bc7277026 100644
--- a/drivers/power/max14577_charger.c
+++ b/drivers/power/max14577_charger.c
@@ -19,6 +19,7 @@
 #include <linux/platform_device.h>
 #include <linux/power_supply.h>
 #include <linux/mfd/max14577-private.h>
+#include <linux/mfd/max14577.h>
 
 struct max14577_charger {
 	struct device *dev;
@@ -27,6 +28,8 @@ struct max14577_charger {
 
 	unsigned int		charging_state;
 	unsigned int		battery_state;
+
+	struct max14577_charger_platform_data	*pdata;
 };
 
 /*
@@ -178,15 +181,131 @@ static int max14577_get_present(struct max14577_charger *chg)
 	return 1;
 }
 
+static int max14577_set_fast_charge_timer(struct max14577_charger *chg,
+		unsigned long hours)
+{
+	u8 reg_data;
+
+	switch (hours) {
+	case 5 ... 7:
+		reg_data = hours - 3;
+		break;
+	case 0:
+		/* Disable */
+		reg_data = 0x7;
+		break;
+	default:
+		dev_err(chg->dev, "Wrong value for Fast-Charge Timer: %lu\n",
+				hours);
+		return -EINVAL;
+	}
+	reg_data <<= CHGCTRL1_TCHW_SHIFT;
+
+	return max14577_update_reg(chg->max14577->regmap,
+			MAX14577_REG_CHGCTRL1, CHGCTRL1_TCHW_MASK, reg_data);
+}
+
+static int max14577_init_constant_voltage(struct max14577_charger *chg,
+		unsigned int uvolt)
+{
+	u8 reg_data;
+
+	if (uvolt < MAXIM_CHARGER_CONSTANT_VOLTAGE_MIN ||
+			uvolt > MAXIM_CHARGER_CONSTANT_VOLTAGE_MAX)
+		return -EINVAL;
+
+	if (uvolt == 4200000)
+		reg_data = 0x0;
+	else if (uvolt == MAXIM_CHARGER_CONSTANT_VOLTAGE_MAX)
+		reg_data = 0x1f;
+	else if (uvolt <= 4280000) {
+		unsigned int val = uvolt;
+
+		val -= MAXIM_CHARGER_CONSTANT_VOLTAGE_MIN;
+		val /= MAXIM_CHARGER_CONSTANT_VOLTAGE_STEP;
+		if (uvolt <= 4180000)
+			reg_data = 0x1 + val;
+		else
+			reg_data = val; /* Fix for gap between 4.18V and 4.22V */
+	} else
+		return -EINVAL;
+
+	reg_data <<= CHGCTRL3_MBCCVWRC_SHIFT;
+
+	return max14577_write_reg(chg->max14577->regmap,
+			MAX14577_CHG_REG_CHG_CTRL3, reg_data);
+}
+
+static int max14577_init_eoc(struct max14577_charger *chg,
+		unsigned int uamp)
+{
+	unsigned int current_bits = 0xf;
+	u8 reg_data;
+
+	switch (chg->max14577->dev_type) {
+	case MAXIM_DEVICE_TYPE_MAX77836:
+		if (uamp < 5000)
+			return -EINVAL; /* Requested current is too low */
+
+		if (uamp >= 7500 && uamp < 10000)
+			current_bits = 0x0;
+		else if (uamp <= 50000) {
+			/* <5000, 7499> and <10000, 50000> */
+			current_bits = uamp / 5000;
+		} else {
+			uamp = min(uamp, 100000U) - 50000U;
+			current_bits = 0xa + uamp / 10000;
+		}
+		break;
+
+	case MAXIM_DEVICE_TYPE_MAX14577:
+	default:
+		if (uamp < MAX14577_CHARGER_EOC_CURRENT_LIMIT_MIN)
+			return -EINVAL; /* Requested current is too low */
+
+		uamp = min(uamp, MAX14577_CHARGER_EOC_CURRENT_LIMIT_MAX);
+		uamp -= MAX14577_CHARGER_EOC_CURRENT_LIMIT_MIN;
+		current_bits = uamp / MAX14577_CHARGER_EOC_CURRENT_LIMIT_STEP;
+		break;
+	}
+
+	reg_data = current_bits << CHGCTRL5_EOCS_SHIFT;
+
+	return max14577_update_reg(chg->max14577->regmap,
+			MAX14577_CHG_REG_CHG_CTRL5, CHGCTRL5_EOCS_MASK,
+			reg_data);
+}
+
+static int max14577_init_fast_charge(struct max14577_charger *chg,
+		unsigned int uamp)
+{
+	u8 reg_data;
+	int ret;
+	const struct maxim_charger_current *limits =
+		&maxim_charger_currents[chg->max14577->dev_type];
+
+	ret = maxim_charger_calc_reg_current(limits, uamp, uamp, &reg_data);
+	if (ret) {
+		dev_err(chg->dev, "Wrong value for fast charge: %u\n", uamp);
+		return ret;
+	}
+
+	return max14577_update_reg(chg->max14577->regmap,
+			MAX14577_CHG_REG_CHG_CTRL4,
+			CHGCTRL4_MBCICHWRCL_MASK | CHGCTRL4_MBCICHWRCH_MASK,
+			reg_data);
+}
+
 /*
  * Sets charger registers to proper and safe default values.
  * Some of these values are equal to defaults in MAX14577E
  * data sheet but there are minor differences.
  */
-static void max14577_charger_reg_init(struct max14577_charger *chg)
+static int max14577_charger_reg_init(struct max14577_charger *chg)
 {
 	struct regmap *rmap = chg->max14577->regmap;
 	u8 reg_data;
+	int ret;
 
 	/*
 	 * Charger-Type Manual Detection, default off (set CHGTYPMAN to 0)
@@ -198,10 +317,6 @@ static void max14577_charger_reg_init(struct max14577_charger *chg)
 			CDETCTRL1_CHGDETEN_MASK | CDETCTRL1_CHGTYPMAN_MASK,
 			reg_data);
 
-	/* Battery Fast-Charge Timer, set to: 6hrs */
-	reg_data = 0x3 << CHGCTRL1_TCHW_SHIFT;
-	max14577_write_reg(rmap, MAX14577_REG_CHGCTRL1, reg_data);
-
 	/*
 	 * Wall-Adapter Rapid Charge, default on
 	 * Battery-Charger, default on
@@ -210,32 +325,46 @@ static void max14577_charger_reg_init(struct max14577_charger *chg)
 	reg_data |= 0x1 << CHGCTRL2_MBCHOSTEN_SHIFT;
 	max14577_write_reg(rmap, MAX14577_REG_CHGCTRL2, reg_data);
 
-	/* Battery-Charger Constant Voltage (CV) Mode, set to: 4.35V */
-	reg_data = 0xf << CHGCTRL3_MBCCVWRC_SHIFT;
-	max14577_write_reg(rmap, MAX14577_REG_CHGCTRL3, reg_data);
-
-	/*
-	 * Fast Battery-Charge Current Low,
-	 * default 200-950mA (max14577) / 100-475mA (max77836)
-	 *
-	 * Fast Battery-Charge Current High,
-	 * set to 450mA (max14577) / 225mA (max77836)
-	 */
-	reg_data = 0x1 << CHGCTRL4_MBCICHWRCL_SHIFT;
-	reg_data |= 0x5 << CHGCTRL4_MBCICHWRCH_SHIFT;
-	max14577_write_reg(rmap, MAX14577_REG_CHGCTRL4, reg_data);
-
-	/* End-of-Charge Current, set to 50mA (max14577) / 7.5mA (max77836) */
-	reg_data = 0x0 << CHGCTRL5_EOCS_SHIFT;
-	max14577_write_reg(rmap, MAX14577_REG_CHGCTRL5, reg_data);
-
 	/* Auto Charging Stop, default off */
 	reg_data = 0x0 << CHGCTRL6_AUTOSTOP_SHIFT;
 	max14577_write_reg(rmap, MAX14577_REG_CHGCTRL6, reg_data);
 
-	/* Overvoltage-Protection Threshold, set to 6.5V */
-	reg_data = 0x2 << CHGCTRL7_OTPCGHCVS_SHIFT;
+	ret = max14577_init_constant_voltage(chg, chg->pdata->constant_uvolt);
+	if (ret)
+		return ret;
+
+	ret = max14577_init_eoc(chg, chg->pdata->eoc_uamp);
+	if (ret)
+		return ret;
+
+	ret = max14577_init_fast_charge(chg, chg->pdata->fast_charge_uamp);
+	if (ret)
+		return ret;
+
+	ret = max14577_set_fast_charge_timer(chg,
+			MAXIM_CHARGER_FAST_CHARGE_TIMER_DEFAULT);
+	if (ret)
+		return ret;
+
+	/* Initialize Overvoltage-Protection Threshold */
+	switch (chg->pdata->ovp_uvolt) {
+	case 7500000:
+		reg_data = 0x0;
+		break;
+	case 6000000:
+	case 6500000:
+	case 7000000:
+		reg_data = 0x1 + (chg->pdata->ovp_uvolt - 6000000) / 500000;
+		break;
+	default:
+		dev_err(chg->dev, "Wrong value for OVP: %u\n",
+				chg->pdata->ovp_uvolt);
+		return -EINVAL;
+	}
+	reg_data <<= CHGCTRL7_OTPCGHCVS_SHIFT;
 	max14577_write_reg(rmap, MAX14577_REG_CHGCTRL7, reg_data);
+
+	return 0;
 }
 
 /* Support property from charger */
@@ -295,6 +424,110 @@ static int max14577_charger_get_property(struct power_supply *psy,
 	return ret;
 }
 
+#ifdef CONFIG_OF
+static struct max14577_charger_platform_data *max14577_charger_dt_init(
+		struct platform_device *pdev)
+{
+	struct max14577_charger_platform_data *pdata;
+	struct device_node *np = pdev->dev.of_node;
+	int ret;
+
+	if (!np) {
+		dev_err(&pdev->dev, "No charger OF node\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
+	if (!pdata)
+		return ERR_PTR(-ENOMEM);
+
+	ret = of_property_read_u32(np, "maxim,constant-uvolt",
+			&pdata->constant_uvolt);
+	if (ret) {
+		dev_err(&pdev->dev, "Cannot parse maxim,constant-uvolt field from DT\n");
+		return ERR_PTR(ret);
+	}
+
+	ret = of_property_read_u32(np, "maxim,fast-charge-uamp",
+			&pdata->fast_charge_uamp);
+	if (ret) {
+		dev_err(&pdev->dev, "Cannot parse maxim,fast-charge-uamp field from DT\n");
+		return ERR_PTR(ret);
+	}
+
+	ret = of_property_read_u32(np, "maxim,eoc-uamp", &pdata->eoc_uamp);
+	if (ret) {
+		dev_err(&pdev->dev, "Cannot parse maxim,eoc-uamp field from DT\n");
+		return ERR_PTR(ret);
+	}
+
+	ret = of_property_read_u32(np, "maxim,ovp-uvolt", &pdata->ovp_uvolt);
+	if (ret) {
+		dev_err(&pdev->dev, "Cannot parse maxim,ovp-uvolt field from DT\n");
+		return ERR_PTR(ret);
+	}
+
+	return pdata;
+}
+#else /* CONFIG_OF */
+static struct max14577_charger_platform_data *max14577_charger_dt_init(
+		struct platform_device *pdev)
+{
+	return NULL;
+}
+#endif /* CONFIG_OF */
+
+static ssize_t show_fast_charge_timer(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct max14577_charger *chg = dev_get_drvdata(dev);
+	u8 reg_data;
+	int ret;
+	unsigned int val;
+
+	ret = max14577_read_reg(chg->max14577->regmap, MAX14577_REG_CHGCTRL1,
+			&reg_data);
+	if (ret)
+		return ret;
+
+	reg_data &= CHGCTRL1_TCHW_MASK;
+	reg_data >>= CHGCTRL1_TCHW_SHIFT;
+	switch (reg_data) {
+	case 0x2 ... 0x4:
+		val = reg_data + 3;
+		break;
+	case 0x7:
+		val = 0;
+		break;
+	default:
+		val = 5;
+		break;
+	}
+
+	return scnprintf(buf, PAGE_SIZE, "%u\n", val);
+}
+
+static ssize_t store_fast_charge_timer(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct max14577_charger *chg = dev_get_drvdata(dev);
+	unsigned long val;
+	int ret;
+
+	ret = kstrtoul(buf, 10, &val);
+	if (ret)
+		return ret;
+
+	ret = max14577_set_fast_charge_timer(chg, val);
+	if (ret)
+		return ret;
+
+	return count;
+}
+
+static DEVICE_ATTR(fast_charge_timer, S_IRUGO | S_IWUSR,
+		show_fast_charge_timer, store_fast_charge_timer);
+
 static int max14577_charger_probe(struct platform_device *pdev)
 {
 	struct max14577_charger *chg;
@@ -309,7 +542,13 @@ static int max14577_charger_probe(struct platform_device *pdev)
 	chg->dev = &pdev->dev;
 	chg->max14577 = max14577;
 
-	max14577_charger_reg_init(chg);
+	chg->pdata = max14577_charger_dt_init(pdev);
+	if (IS_ERR_OR_NULL(chg->pdata))
+		return PTR_ERR(chg->pdata);
+
+	ret = max14577_charger_reg_init(chg);
+	if (ret)
+		return ret;
 
 	chg->charger.name = "max14577-charger",
 	chg->charger.type = POWER_SUPPLY_TYPE_BATTERY,
@@ -317,19 +556,35 @@ static int max14577_charger_probe(struct platform_device *pdev)
 	chg->charger.num_properties = ARRAY_SIZE(max14577_charger_props),
 	chg->charger.get_property = max14577_charger_get_property,
 
+	ret = device_create_file(&pdev->dev, &dev_attr_fast_charge_timer);
+	if (ret) {
+		dev_err(&pdev->dev, "failed: create sysfs entry\n");
+		return ret;
+	}
+
 	ret = power_supply_register(&pdev->dev, &chg->charger);
 	if (ret) {
 		dev_err(&pdev->dev, "failed: power supply register\n");
-		return ret;
+		goto err;
 	}
 
+	/* Check for valid values for charger */
+	BUILD_BUG_ON(MAX14577_CHARGER_EOC_CURRENT_LIMIT_MIN +
+			MAX14577_CHARGER_EOC_CURRENT_LIMIT_STEP * 0xf !=
+			MAX14577_CHARGER_EOC_CURRENT_LIMIT_MAX);
 	return 0;
+
+err:
+	device_remove_file(&pdev->dev, &dev_attr_fast_charge_timer);
+
+	return ret;
 }
 
 static int max14577_charger_remove(struct platform_device *pdev)
 {
 	struct max14577_charger *chg = platform_get_drvdata(pdev);
 
+	device_remove_file(&pdev->dev, &dev_attr_fast_charge_timer);
 	power_supply_unregister(&chg->charger);
 
 	return 0;
diff --git a/include/linux/mfd/max14577-private.h b/include/linux/mfd/max14577-private.h
index 7d514839c764..f01c1fae4d84 100644
--- a/include/linux/mfd/max14577-private.h
+++ b/include/linux/mfd/max14577-private.h
@@ -293,6 +293,25 @@ enum max14577_charger_reg {
 #define MAX77836_CHARGER_CURRENT_LIMIT_HIGH_STEP	 25000U
 #define MAX77836_CHARGER_CURRENT_LIMIT_MAX		475000U
 
+/*
+ * MAX14577 charger End-Of-Charge current limits
+ * (as in CHGCTRL5 register), uA
+ */
+#define MAX14577_CHARGER_EOC_CURRENT_LIMIT_MIN		50000U
+#define MAX14577_CHARGER_EOC_CURRENT_LIMIT_STEP		10000U
+#define MAX14577_CHARGER_EOC_CURRENT_LIMIT_MAX		200000U
+
+/*
+ * MAX14577/MAX77836 Battery Constant Voltage
+ * (as in CHGCTRL3 register), uV
+ */
+#define MAXIM_CHARGER_CONSTANT_VOLTAGE_MIN		4000000U
+#define MAXIM_CHARGER_CONSTANT_VOLTAGE_STEP		20000U
+#define MAXIM_CHARGER_CONSTANT_VOLTAGE_MAX		4350000U
+
+/* Default value for fast charge timer, in hours */
+#define MAXIM_CHARGER_FAST_CHARGE_TIMER_DEFAULT		5
+
 /* MAX14577 regulator SFOUT LDO voltage, fixed, uV */
 #define MAX14577_REGULATOR_SAFEOUT_VOLTAGE		4900000
 
diff --git a/include/linux/mfd/max14577.h b/include/linux/mfd/max14577.h
index 3c098d57b1d1..ccfaf952c31b 100644
--- a/include/linux/mfd/max14577.h
+++ b/include/linux/mfd/max14577.h
@@ -54,6 +54,13 @@ struct max14577_regulator_platform_data {
 	struct device_node *of_node;
 };
 
+struct max14577_charger_platform_data {
+	u32 constant_uvolt;
+	u32 fast_charge_uamp;
+	u32 eoc_uamp;
+	u32 ovp_uvolt;
+};
+
 /*
  * MAX14577 MFD platform data
  */
-- 
cgit v1.2.3


From 0a30288da1aec914e158c2d7a3482a85f632750f Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 23 Sep 2014 15:24:32 -0400
Subject: blk-mq, percpu_ref: implement a kludge for SCSI blk-mq stall during
 probe

blk-mq uses percpu_ref for its usage counter which tracks the number
of in-flight commands and used to synchronously drain the queue on
freeze.  percpu_ref shutdown takes measureable wallclock time as it
involves a sched RCU grace period.  This means that draining a blk-mq
takes measureable wallclock time.  One would think that this shouldn't
matter as queue shutdown should be a rare event which takes place
asynchronously w.r.t. userland.

Unfortunately, SCSI probing involves synchronously setting up and then
tearing down a lot of request_queues back-to-back for non-existent
LUNs.  This means that SCSI probing may take more than ten seconds
when scsi-mq is used.

This will be properly fixed by implementing a mechanism to keep
q->mq_usage_counter in atomic mode till genhd registration; however,
that involves rather big updates to percpu_ref which is difficult to
apply late in the devel cycle (v3.17-rc6 at the moment).  As a
stop-gap measure till the proper fix can be implemented in the next
cycle, this patch introduces __percpu_ref_kill_expedited() and makes
blk_mq_freeze_queue() use it.  This is heavy-handed but should work
for testing the experimental SCSI blk-mq implementation.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Christoph Hellwig <hch@infradead.org>
Link: http://lkml.kernel.org/g/20140919113815.GA10791@lst.de
Fixes: add703fda981 ("blk-mq: use percpu_ref for mq usage count")
Cc: Kent Overstreet <kmo@daterainc.com>
Cc: Jens Axboe <axboe@kernel.dk>
Tested-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c                  | 11 ++++++++++-
 include/linux/percpu-refcount.h |  1 +
 lib/percpu-refcount.c           | 16 ++++++++++++++++
 3 files changed, 27 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index c88e6089746d..df8e1e09dd17 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -119,7 +119,16 @@ void blk_mq_freeze_queue(struct request_queue *q)
 	spin_unlock_irq(q->queue_lock);
 
 	if (freeze) {
-		percpu_ref_kill(&q->mq_usage_counter);
+		/*
+		 * XXX: Temporary kludge to work around SCSI blk-mq stall.
+		 * SCSI synchronously creates and destroys many queues
+		 * back-to-back during probe leading to lengthy stalls.
+		 * This will be fixed by keeping ->mq_usage_counter in
+		 * atomic mode until genhd registration, but, for now,
+		 * let's work around using expedited synchronization.
+		 */
+		__percpu_ref_kill_expedited(&q->mq_usage_counter);
+
 		blk_mq_run_queues(q, false);
 	}
 	wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->mq_usage_counter));
diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index 3dfbf237cd8f..ef5894ca8e50 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -71,6 +71,7 @@ void percpu_ref_reinit(struct percpu_ref *ref);
 void percpu_ref_exit(struct percpu_ref *ref);
 void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
 				 percpu_ref_func_t *confirm_kill);
+void __percpu_ref_kill_expedited(struct percpu_ref *ref);
 
 /**
  * percpu_ref_kill - drop the initial ref
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index fe5a3342e960..a89cf09a8268 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -184,3 +184,19 @@ void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
 	call_rcu_sched(&ref->rcu, percpu_ref_kill_rcu);
 }
 EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm);
+
+/*
+ * XXX: Temporary kludge to work around SCSI blk-mq stall.  Used only by
+ * block/blk-mq.c::blk_mq_freeze_queue().  Will be removed during v3.18
+ * devel cycle.  Do not use anywhere else.
+ */
+void __percpu_ref_kill_expedited(struct percpu_ref *ref)
+{
+	WARN_ONCE(ref->pcpu_count_ptr & PCPU_REF_DEAD,
+		  "percpu_ref_kill() called more than once on %pf!",
+		  ref->release);
+
+	ref->pcpu_count_ptr |= PCPU_REF_DEAD;
+	synchronize_sched_expedited();
+	percpu_ref_kill_rcu(&ref->rcu);
+}
-- 
cgit v1.2.3


From 9eca80461a45177e456219a9cd944c27675d6512 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 24 Sep 2014 13:07:33 -0400
Subject: Revert "blk-mq, percpu_ref: implement a kludge for SCSI blk-mq stall
 during probe"

This reverts commit 0a30288da1aec914e158c2d7a3482a85f632750f, which
was a temporary fix for SCSI blk-mq stall issue.  The following
patches will fix the issue properly by introducing atomic mode to
percpu_ref.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Kent Overstreet <kmo@daterainc.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Christoph Hellwig <hch@lst.de>
---
 block/blk-mq.c                  | 11 +----------
 include/linux/percpu-refcount.h |  1 -
 lib/percpu-refcount.c           | 16 ----------------
 3 files changed, 1 insertion(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 255d79c14dc1..44a78ae3f899 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -119,16 +119,7 @@ void blk_mq_freeze_queue(struct request_queue *q)
 	spin_unlock_irq(q->queue_lock);
 
 	if (freeze) {
-		/*
-		 * XXX: Temporary kludge to work around SCSI blk-mq stall.
-		 * SCSI synchronously creates and destroys many queues
-		 * back-to-back during probe leading to lengthy stalls.
-		 * This will be fixed by keeping ->mq_usage_counter in
-		 * atomic mode until genhd registration, but, for now,
-		 * let's work around using expedited synchronization.
-		 */
-		__percpu_ref_kill_expedited(&q->mq_usage_counter);
-
+		percpu_ref_kill(&q->mq_usage_counter);
 		blk_mq_run_queues(q, false);
 	}
 	wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->mq_usage_counter));
diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index 11b38ceca7e2..5df6784bd9d2 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -72,7 +72,6 @@ void percpu_ref_reinit(struct percpu_ref *ref);
 void percpu_ref_exit(struct percpu_ref *ref);
 void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
 				 percpu_ref_func_t *confirm_kill);
-void __percpu_ref_kill_expedited(struct percpu_ref *ref);
 
 /**
  * percpu_ref_kill - drop the initial ref
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index c6c31e2829b1..559ee0b20318 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -189,19 +189,3 @@ void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
 	call_rcu_sched(&ref->rcu, percpu_ref_kill_rcu);
 }
 EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm);
-
-/*
- * XXX: Temporary kludge to work around SCSI blk-mq stall.  Used only by
- * block/blk-mq.c::blk_mq_freeze_queue().  Will be removed during v3.18
- * devel cycle.  Do not use anywhere else.
- */
-void __percpu_ref_kill_expedited(struct percpu_ref *ref)
-{
-	WARN_ONCE(ref->pcpu_count_ptr & PCPU_REF_DEAD,
-		  "percpu_ref_kill() called more than once on %pf!",
-		  ref->release);
-
-	ref->pcpu_count_ptr |= PCPU_REF_DEAD;
-	synchronize_sched_expedited();
-	percpu_ref_kill_rcu(&ref->rcu);
-}
-- 
cgit v1.2.3


From a2237370194484ee6aeeff04b617e4b14d178966 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 24 Sep 2014 13:31:48 -0400
Subject: percpu_ref: relocate percpu_ref_reinit()

percpu_ref is gonna go through restructuring.  Move
percpu_ref_reinit() after percpu_ref_kill_and_confirm().  This will
make later changes easier to follow and result in cleaner
organization.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Kent Overstreet <kmo@daterainc.com>
---
 include/linux/percpu-refcount.h |  2 +-
 lib/percpu-refcount.c           | 70 ++++++++++++++++++++---------------------
 2 files changed, 36 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index 5df6784bd9d2..f015f139d491 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -68,10 +68,10 @@ struct percpu_ref {
 
 int __must_check percpu_ref_init(struct percpu_ref *ref,
 				 percpu_ref_func_t *release, gfp_t gfp);
-void percpu_ref_reinit(struct percpu_ref *ref);
 void percpu_ref_exit(struct percpu_ref *ref);
 void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
 				 percpu_ref_func_t *confirm_kill);
+void percpu_ref_reinit(struct percpu_ref *ref);
 
 /**
  * percpu_ref_kill - drop the initial ref
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index 559ee0b20318..070dab5e7d77 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -62,41 +62,6 @@ int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release,
 }
 EXPORT_SYMBOL_GPL(percpu_ref_init);
 
-/**
- * percpu_ref_reinit - re-initialize a percpu refcount
- * @ref: perpcu_ref to re-initialize
- *
- * Re-initialize @ref so that it's in the same state as when it finished
- * percpu_ref_init().  @ref must have been initialized successfully, killed
- * and reached 0 but not exited.
- *
- * Note that percpu_ref_tryget[_live]() are safe to perform on @ref while
- * this function is in progress.
- */
-void percpu_ref_reinit(struct percpu_ref *ref)
-{
-	unsigned long __percpu *pcpu_count = pcpu_count_ptr(ref);
-	int cpu;
-
-	BUG_ON(!pcpu_count);
-	WARN_ON(!percpu_ref_is_zero(ref));
-
-	atomic_long_set(&ref->count, 1 + PCPU_COUNT_BIAS);
-
-	/*
-	 * Restore per-cpu operation.  smp_store_release() is paired with
-	 * smp_read_barrier_depends() in __pcpu_ref_alive() and guarantees
-	 * that the zeroing is visible to all percpu accesses which can see
-	 * the following PCPU_REF_DEAD clearing.
-	 */
-	for_each_possible_cpu(cpu)
-		*per_cpu_ptr(pcpu_count, cpu) = 0;
-
-	smp_store_release(&ref->pcpu_count_ptr,
-			  ref->pcpu_count_ptr & ~PCPU_REF_DEAD);
-}
-EXPORT_SYMBOL_GPL(percpu_ref_reinit);
-
 /**
  * percpu_ref_exit - undo percpu_ref_init()
  * @ref: percpu_ref to exit
@@ -189,3 +154,38 @@ void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
 	call_rcu_sched(&ref->rcu, percpu_ref_kill_rcu);
 }
 EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm);
+
+/**
+ * percpu_ref_reinit - re-initialize a percpu refcount
+ * @ref: perpcu_ref to re-initialize
+ *
+ * Re-initialize @ref so that it's in the same state as when it finished
+ * percpu_ref_init().  @ref must have been initialized successfully, killed
+ * and reached 0 but not exited.
+ *
+ * Note that percpu_ref_tryget[_live]() are safe to perform on @ref while
+ * this function is in progress.
+ */
+void percpu_ref_reinit(struct percpu_ref *ref)
+{
+	unsigned long __percpu *pcpu_count = pcpu_count_ptr(ref);
+	int cpu;
+
+	BUG_ON(!pcpu_count);
+	WARN_ON(!percpu_ref_is_zero(ref));
+
+	atomic_long_set(&ref->count, 1 + PCPU_COUNT_BIAS);
+
+	/*
+	 * Restore per-cpu operation.  smp_store_release() is paired with
+	 * smp_read_barrier_depends() in __pcpu_ref_alive() and guarantees
+	 * that the zeroing is visible to all percpu accesses which can see
+	 * the following PCPU_REF_DEAD clearing.
+	 */
+	for_each_possible_cpu(cpu)
+		*per_cpu_ptr(pcpu_count, cpu) = 0;
+
+	smp_store_release(&ref->pcpu_count_ptr,
+			  ref->pcpu_count_ptr & ~PCPU_REF_DEAD);
+}
+EXPORT_SYMBOL_GPL(percpu_ref_reinit);
-- 
cgit v1.2.3


From 6251f9976af7656b6970a8820153f356430f5de2 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 24 Sep 2014 13:31:48 -0400
Subject: percpu_ref: minor code and comment updates

* Some comments became stale.  Updated.
* percpu_ref_tryget() unnecessarily initializes @ret.  Removed.
* A blank line removed from percpu_ref_kill_rcu().
* Explicit function name in a WARN format string replaced with __func__.
* WARN_ON() in percpu_ref_reinit() converted to WARN_ON_ONCE().

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Kent Overstreet <kmo@daterainc.com>
---
 include/linux/percpu-refcount.h | 25 ++++++++++++++++---------
 lib/percpu-refcount.c           | 14 ++++++--------
 2 files changed, 22 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index f015f139d491..d44b027f74fd 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -115,8 +115,10 @@ static inline bool __pcpu_ref_alive(struct percpu_ref *ref,
  * percpu_ref_get - increment a percpu refcount
  * @ref: percpu_ref to get
  *
- * Analagous to atomic_inc().
-  */
+ * Analagous to atomic_long_inc().
+ *
+ * This function is safe to call as long as @ref is between init and exit.
+ */
 static inline void percpu_ref_get(struct percpu_ref *ref)
 {
 	unsigned long __percpu *pcpu_count;
@@ -138,12 +140,12 @@ static inline void percpu_ref_get(struct percpu_ref *ref)
  * Increment a percpu refcount unless its count already reached zero.
  * Returns %true on success; %false on failure.
  *
- * The caller is responsible for ensuring that @ref stays accessible.
+ * This function is safe to call as long as @ref is between init and exit.
  */
 static inline bool percpu_ref_tryget(struct percpu_ref *ref)
 {
 	unsigned long __percpu *pcpu_count;
-	int ret = false;
+	int ret;
 
 	rcu_read_lock_sched();
 
@@ -166,12 +168,13 @@ static inline bool percpu_ref_tryget(struct percpu_ref *ref)
  * Increment a percpu refcount unless it has already been killed.  Returns
  * %true on success; %false on failure.
  *
- * Completion of percpu_ref_kill() in itself doesn't guarantee that tryget
- * will fail.  For such guarantee, percpu_ref_kill_and_confirm() should be
- * used.  After the confirm_kill callback is invoked, it's guaranteed that
- * no new reference will be given out by percpu_ref_tryget().
+ * Completion of percpu_ref_kill() in itself doesn't guarantee that this
+ * function will fail.  For such guarantee, percpu_ref_kill_and_confirm()
+ * should be used.  After the confirm_kill callback is invoked, it's
+ * guaranteed that no new reference will be given out by
+ * percpu_ref_tryget_live().
  *
- * The caller is responsible for ensuring that @ref stays accessible.
+ * This function is safe to call as long as @ref is between init and exit.
  */
 static inline bool percpu_ref_tryget_live(struct percpu_ref *ref)
 {
@@ -196,6 +199,8 @@ static inline bool percpu_ref_tryget_live(struct percpu_ref *ref)
  *
  * Decrement the refcount, and if 0, call the release function (which was passed
  * to percpu_ref_init())
+ *
+ * This function is safe to call as long as @ref is between init and exit.
  */
 static inline void percpu_ref_put(struct percpu_ref *ref)
 {
@@ -216,6 +221,8 @@ static inline void percpu_ref_put(struct percpu_ref *ref)
  * @ref: percpu_ref to test
  *
  * Returns %true if @ref reached zero.
+ *
+ * This function is safe to call as long as @ref is between init and exit.
  */
 static inline bool percpu_ref_is_zero(struct percpu_ref *ref)
 {
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index 070dab5e7d77..8ef3f5c20df6 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -108,7 +108,6 @@ static void percpu_ref_kill_rcu(struct rcu_head *rcu)
 	 * reaching 0 before we add the percpu counts. But doing it at the same
 	 * time is equivalent and saves us atomic operations:
 	 */
-
 	atomic_long_add((long)count - PCPU_COUNT_BIAS, &ref->count);
 
 	WARN_ONCE(atomic_long_read(&ref->count) <= 0,
@@ -120,8 +119,8 @@ static void percpu_ref_kill_rcu(struct rcu_head *rcu)
 		ref->confirm_kill(ref);
 
 	/*
-	 * Now we're in single atomic_t mode with a consistent refcount, so it's
-	 * safe to drop our initial ref:
+	 * Now we're in single atomic_long_t mode with a consistent
+	 * refcount, so it's safe to drop our initial ref:
 	 */
 	percpu_ref_put(ref);
 }
@@ -134,8 +133,8 @@ static void percpu_ref_kill_rcu(struct rcu_head *rcu)
  * Equivalent to percpu_ref_kill() but also schedules kill confirmation if
  * @confirm_kill is not NULL.  @confirm_kill, which may not block, will be
  * called after @ref is seen as dead from all CPUs - all further
- * invocations of percpu_ref_tryget() will fail.  See percpu_ref_tryget()
- * for more details.
+ * invocations of percpu_ref_tryget_live() will fail.  See
+ * percpu_ref_tryget_live() for more details.
  *
  * Due to the way percpu_ref is implemented, @confirm_kill will be called
  * after at least one full RCU grace period has passed but this is an
@@ -145,8 +144,7 @@ void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
 				 percpu_ref_func_t *confirm_kill)
 {
 	WARN_ONCE(ref->pcpu_count_ptr & PCPU_REF_DEAD,
-		  "percpu_ref_kill() called more than once on %pf!",
-		  ref->release);
+		  "%s called more than once on %pf!", __func__, ref->release);
 
 	ref->pcpu_count_ptr |= PCPU_REF_DEAD;
 	ref->confirm_kill = confirm_kill;
@@ -172,7 +170,7 @@ void percpu_ref_reinit(struct percpu_ref *ref)
 	int cpu;
 
 	BUG_ON(!pcpu_count);
-	WARN_ON(!percpu_ref_is_zero(ref));
+	WARN_ON_ONCE(!percpu_ref_is_zero(ref));
 
 	atomic_long_set(&ref->count, 1 + PCPU_COUNT_BIAS);
 
-- 
cgit v1.2.3


From eecc16ba9a49b05dd847a317af166a6728eb56ca Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 24 Sep 2014 13:31:48 -0400
Subject: percpu_ref: replace pcpu_ prefix with percpu_

percpu_ref uses pcpu_ prefix for internal stuff and percpu_ for
externally visible ones.  This is the same convention used in the
percpu allocator implementation.  It works fine there but percpu_ref
doesn't have too much internal-only stuff and scattered usages of
pcpu_ prefix are confusing than helpful.

This patch replaces all pcpu_ prefixes with percpu_.  This is pure
rename and there's no functional change.  Note that PCPU_REF_DEAD is
renamed to __PERCPU_REF_DEAD to signify that the flag is internal.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Kent Overstreet <kmo@daterainc.com>
---
 include/linux/percpu-refcount.h | 46 ++++++++++++++++-----------------
 lib/percpu-refcount.c           | 56 +++++++++++++++++++++--------------------
 2 files changed, 52 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index d44b027f74fd..3d463a39e0f7 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -13,7 +13,7 @@
  *
  * The refcount will have a range of 0 to ((1U << 31) - 1), i.e. one bit less
  * than an atomic_t - this is because of the way shutdown works, see
- * percpu_ref_kill()/PCPU_COUNT_BIAS.
+ * percpu_ref_kill()/PERCPU_COUNT_BIAS.
  *
  * Before you call percpu_ref_kill(), percpu_ref_put() does not check for the
  * refcount hitting 0 - it can't, if it was in percpu mode. percpu_ref_kill()
@@ -60,7 +60,7 @@ struct percpu_ref {
 	 * The low bit of the pointer indicates whether the ref is in percpu
 	 * mode; if set, then get/put will manipulate the atomic_t.
 	 */
-	unsigned long		pcpu_count_ptr;
+	unsigned long		percpu_count_ptr;
 	percpu_ref_func_t	*release;
 	percpu_ref_func_t	*confirm_kill;
 	struct rcu_head		rcu;
@@ -88,26 +88,26 @@ static inline void percpu_ref_kill(struct percpu_ref *ref)
 	return percpu_ref_kill_and_confirm(ref, NULL);
 }
 
-#define PCPU_REF_DEAD		1
+#define __PERCPU_REF_DEAD	1
 
 /*
  * Internal helper.  Don't use outside percpu-refcount proper.  The
  * function doesn't return the pointer and let the caller test it for NULL
  * because doing so forces the compiler to generate two conditional
- * branches as it can't assume that @ref->pcpu_count is not NULL.
+ * branches as it can't assume that @ref->percpu_count is not NULL.
  */
-static inline bool __pcpu_ref_alive(struct percpu_ref *ref,
-				    unsigned long __percpu **pcpu_countp)
+static inline bool __percpu_ref_alive(struct percpu_ref *ref,
+				      unsigned long __percpu **percpu_countp)
 {
-	unsigned long pcpu_ptr = ACCESS_ONCE(ref->pcpu_count_ptr);
+	unsigned long percpu_ptr = ACCESS_ONCE(ref->percpu_count_ptr);
 
 	/* paired with smp_store_release() in percpu_ref_reinit() */
 	smp_read_barrier_depends();
 
-	if (unlikely(pcpu_ptr & PCPU_REF_DEAD))
+	if (unlikely(percpu_ptr & __PERCPU_REF_DEAD))
 		return false;
 
-	*pcpu_countp = (unsigned long __percpu *)pcpu_ptr;
+	*percpu_countp = (unsigned long __percpu *)percpu_ptr;
 	return true;
 }
 
@@ -121,12 +121,12 @@ static inline bool __pcpu_ref_alive(struct percpu_ref *ref,
  */
 static inline void percpu_ref_get(struct percpu_ref *ref)
 {
-	unsigned long __percpu *pcpu_count;
+	unsigned long __percpu *percpu_count;
 
 	rcu_read_lock_sched();
 
-	if (__pcpu_ref_alive(ref, &pcpu_count))
-		this_cpu_inc(*pcpu_count);
+	if (__percpu_ref_alive(ref, &percpu_count))
+		this_cpu_inc(*percpu_count);
 	else
 		atomic_long_inc(&ref->count);
 
@@ -144,13 +144,13 @@ static inline void percpu_ref_get(struct percpu_ref *ref)
  */
 static inline bool percpu_ref_tryget(struct percpu_ref *ref)
 {
-	unsigned long __percpu *pcpu_count;
+	unsigned long __percpu *percpu_count;
 	int ret;
 
 	rcu_read_lock_sched();
 
-	if (__pcpu_ref_alive(ref, &pcpu_count)) {
-		this_cpu_inc(*pcpu_count);
+	if (__percpu_ref_alive(ref, &percpu_count)) {
+		this_cpu_inc(*percpu_count);
 		ret = true;
 	} else {
 		ret = atomic_long_inc_not_zero(&ref->count);
@@ -178,13 +178,13 @@ static inline bool percpu_ref_tryget(struct percpu_ref *ref)
  */
 static inline bool percpu_ref_tryget_live(struct percpu_ref *ref)
 {
-	unsigned long __percpu *pcpu_count;
+	unsigned long __percpu *percpu_count;
 	int ret = false;
 
 	rcu_read_lock_sched();
 
-	if (__pcpu_ref_alive(ref, &pcpu_count)) {
-		this_cpu_inc(*pcpu_count);
+	if (__percpu_ref_alive(ref, &percpu_count)) {
+		this_cpu_inc(*percpu_count);
 		ret = true;
 	}
 
@@ -204,12 +204,12 @@ static inline bool percpu_ref_tryget_live(struct percpu_ref *ref)
  */
 static inline void percpu_ref_put(struct percpu_ref *ref)
 {
-	unsigned long __percpu *pcpu_count;
+	unsigned long __percpu *percpu_count;
 
 	rcu_read_lock_sched();
 
-	if (__pcpu_ref_alive(ref, &pcpu_count))
-		this_cpu_dec(*pcpu_count);
+	if (__percpu_ref_alive(ref, &percpu_count))
+		this_cpu_dec(*percpu_count);
 	else if (unlikely(atomic_long_dec_and_test(&ref->count)))
 		ref->release(ref);
 
@@ -226,9 +226,9 @@ static inline void percpu_ref_put(struct percpu_ref *ref)
  */
 static inline bool percpu_ref_is_zero(struct percpu_ref *ref)
 {
-	unsigned long __percpu *pcpu_count;
+	unsigned long __percpu *percpu_count;
 
-	if (__pcpu_ref_alive(ref, &pcpu_count))
+	if (__percpu_ref_alive(ref, &percpu_count))
 		return false;
 	return !atomic_long_read(&ref->count);
 }
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index 8ef3f5c20df6..5aea6b7356c7 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -11,8 +11,8 @@
  * percpu counters will all sum to the correct value
  *
  * (More precisely: because moduler arithmatic is commutative the sum of all the
- * pcpu_count vars will be equal to what it would have been if all the gets and
- * puts were done to a single integer, even if some of the percpu integers
+ * percpu_count vars will be equal to what it would have been if all the gets
+ * and puts were done to a single integer, even if some of the percpu integers
  * overflow or underflow).
  *
  * The real trick to implementing percpu refcounts is shutdown. We can't detect
@@ -29,11 +29,12 @@
  * atomic_long_t can't hit 0 before we've added up all the percpu refs.
  */
 
-#define PCPU_COUNT_BIAS		(1LU << (BITS_PER_LONG - 1))
+#define PERCPU_COUNT_BIAS	(1LU << (BITS_PER_LONG - 1))
 
-static unsigned long __percpu *pcpu_count_ptr(struct percpu_ref *ref)
+static unsigned long __percpu *percpu_count_ptr(struct percpu_ref *ref)
 {
-	return (unsigned long __percpu *)(ref->pcpu_count_ptr & ~PCPU_REF_DEAD);
+	return (unsigned long __percpu *)
+		(ref->percpu_count_ptr & ~__PERCPU_REF_DEAD);
 }
 
 /**
@@ -51,10 +52,11 @@ static unsigned long __percpu *pcpu_count_ptr(struct percpu_ref *ref)
 int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release,
 		    gfp_t gfp)
 {
-	atomic_long_set(&ref->count, 1 + PCPU_COUNT_BIAS);
+	atomic_long_set(&ref->count, 1 + PERCPU_COUNT_BIAS);
 
-	ref->pcpu_count_ptr = (unsigned long)alloc_percpu_gfp(unsigned long, gfp);
-	if (!ref->pcpu_count_ptr)
+	ref->percpu_count_ptr =
+		(unsigned long)alloc_percpu_gfp(unsigned long, gfp);
+	if (!ref->percpu_count_ptr)
 		return -ENOMEM;
 
 	ref->release = release;
@@ -74,11 +76,11 @@ EXPORT_SYMBOL_GPL(percpu_ref_init);
  */
 void percpu_ref_exit(struct percpu_ref *ref)
 {
-	unsigned long __percpu *pcpu_count = pcpu_count_ptr(ref);
+	unsigned long __percpu *percpu_count = percpu_count_ptr(ref);
 
-	if (pcpu_count) {
-		free_percpu(pcpu_count);
-		ref->pcpu_count_ptr = PCPU_REF_DEAD;
+	if (percpu_count) {
+		free_percpu(percpu_count);
+		ref->percpu_count_ptr = __PERCPU_REF_DEAD;
 	}
 }
 EXPORT_SYMBOL_GPL(percpu_ref_exit);
@@ -86,14 +88,14 @@ EXPORT_SYMBOL_GPL(percpu_ref_exit);
 static void percpu_ref_kill_rcu(struct rcu_head *rcu)
 {
 	struct percpu_ref *ref = container_of(rcu, struct percpu_ref, rcu);
-	unsigned long __percpu *pcpu_count = pcpu_count_ptr(ref);
+	unsigned long __percpu *percpu_count = percpu_count_ptr(ref);
 	unsigned long count = 0;
 	int cpu;
 
 	for_each_possible_cpu(cpu)
-		count += *per_cpu_ptr(pcpu_count, cpu);
+		count += *per_cpu_ptr(percpu_count, cpu);
 
-	pr_debug("global %ld pcpu %ld",
+	pr_debug("global %ld percpu %ld",
 		 atomic_long_read(&ref->count), (long)count);
 
 	/*
@@ -108,7 +110,7 @@ static void percpu_ref_kill_rcu(struct rcu_head *rcu)
 	 * reaching 0 before we add the percpu counts. But doing it at the same
 	 * time is equivalent and saves us atomic operations:
 	 */
-	atomic_long_add((long)count - PCPU_COUNT_BIAS, &ref->count);
+	atomic_long_add((long)count - PERCPU_COUNT_BIAS, &ref->count);
 
 	WARN_ONCE(atomic_long_read(&ref->count) <= 0,
 		  "percpu ref (%pf) <= 0 (%ld) after killed",
@@ -143,10 +145,10 @@ static void percpu_ref_kill_rcu(struct rcu_head *rcu)
 void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
 				 percpu_ref_func_t *confirm_kill)
 {
-	WARN_ONCE(ref->pcpu_count_ptr & PCPU_REF_DEAD,
+	WARN_ONCE(ref->percpu_count_ptr & __PERCPU_REF_DEAD,
 		  "%s called more than once on %pf!", __func__, ref->release);
 
-	ref->pcpu_count_ptr |= PCPU_REF_DEAD;
+	ref->percpu_count_ptr |= __PERCPU_REF_DEAD;
 	ref->confirm_kill = confirm_kill;
 
 	call_rcu_sched(&ref->rcu, percpu_ref_kill_rcu);
@@ -166,24 +168,24 @@ EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm);
  */
 void percpu_ref_reinit(struct percpu_ref *ref)
 {
-	unsigned long __percpu *pcpu_count = pcpu_count_ptr(ref);
+	unsigned long __percpu *percpu_count = percpu_count_ptr(ref);
 	int cpu;
 
-	BUG_ON(!pcpu_count);
+	BUG_ON(!percpu_count);
 	WARN_ON_ONCE(!percpu_ref_is_zero(ref));
 
-	atomic_long_set(&ref->count, 1 + PCPU_COUNT_BIAS);
+	atomic_long_set(&ref->count, 1 + PERCPU_COUNT_BIAS);
 
 	/*
 	 * Restore per-cpu operation.  smp_store_release() is paired with
-	 * smp_read_barrier_depends() in __pcpu_ref_alive() and guarantees
-	 * that the zeroing is visible to all percpu accesses which can see
-	 * the following PCPU_REF_DEAD clearing.
+	 * smp_read_barrier_depends() in __percpu_ref_alive() and
+	 * guarantees that the zeroing is visible to all percpu accesses
+	 * which can see the following __PERCPU_REF_DEAD clearing.
 	 */
 	for_each_possible_cpu(cpu)
-		*per_cpu_ptr(pcpu_count, cpu) = 0;
+		*per_cpu_ptr(percpu_count, cpu) = 0;
 
-	smp_store_release(&ref->pcpu_count_ptr,
-			  ref->pcpu_count_ptr & ~PCPU_REF_DEAD);
+	smp_store_release(&ref->percpu_count_ptr,
+			  ref->percpu_count_ptr & ~__PERCPU_REF_DEAD);
 }
 EXPORT_SYMBOL_GPL(percpu_ref_reinit);
-- 
cgit v1.2.3


From 9e804d1f58da1eca079f796347c1cf1d1df564e2 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 24 Sep 2014 13:31:48 -0400
Subject: percpu_ref: rename things to prepare for decoupling percpu/atomic
 mode switch

percpu_ref will be restructured so that percpu/atomic mode switching
and reference killing are dedoupled.  In preparation, do the following
renames.

* percpu_ref->confirm_kill	-> percpu_ref->confirm_switch
* __PERCPU_REF_DEAD		-> __PERCPU_REF_ATOMIC
* __percpu_ref_alive()		-> __ref_is_percpu()

This patch is pure rename and doesn't introduce any functional
changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Kent Overstreet <kmo@daterainc.com>
---
 include/linux/percpu-refcount.h | 25 ++++++++++++++-----------
 lib/percpu-refcount.c           | 22 +++++++++++-----------
 2 files changed, 25 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index 3d463a39e0f7..910e5f72055d 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -54,6 +54,11 @@
 struct percpu_ref;
 typedef void (percpu_ref_func_t)(struct percpu_ref *);
 
+/* flags set in the lower bits of percpu_ref->percpu_count_ptr */
+enum {
+	__PERCPU_REF_ATOMIC	= 1LU << 0,	/* operating in atomic mode */
+};
+
 struct percpu_ref {
 	atomic_long_t		count;
 	/*
@@ -62,7 +67,7 @@ struct percpu_ref {
 	 */
 	unsigned long		percpu_count_ptr;
 	percpu_ref_func_t	*release;
-	percpu_ref_func_t	*confirm_kill;
+	percpu_ref_func_t	*confirm_switch;
 	struct rcu_head		rcu;
 };
 
@@ -88,23 +93,21 @@ static inline void percpu_ref_kill(struct percpu_ref *ref)
 	return percpu_ref_kill_and_confirm(ref, NULL);
 }
 
-#define __PERCPU_REF_DEAD	1
-
 /*
  * Internal helper.  Don't use outside percpu-refcount proper.  The
  * function doesn't return the pointer and let the caller test it for NULL
  * because doing so forces the compiler to generate two conditional
  * branches as it can't assume that @ref->percpu_count is not NULL.
  */
-static inline bool __percpu_ref_alive(struct percpu_ref *ref,
-				      unsigned long __percpu **percpu_countp)
+static inline bool __ref_is_percpu(struct percpu_ref *ref,
+					  unsigned long __percpu **percpu_countp)
 {
 	unsigned long percpu_ptr = ACCESS_ONCE(ref->percpu_count_ptr);
 
 	/* paired with smp_store_release() in percpu_ref_reinit() */
 	smp_read_barrier_depends();
 
-	if (unlikely(percpu_ptr & __PERCPU_REF_DEAD))
+	if (unlikely(percpu_ptr & __PERCPU_REF_ATOMIC))
 		return false;
 
 	*percpu_countp = (unsigned long __percpu *)percpu_ptr;
@@ -125,7 +128,7 @@ static inline void percpu_ref_get(struct percpu_ref *ref)
 
 	rcu_read_lock_sched();
 
-	if (__percpu_ref_alive(ref, &percpu_count))
+	if (__ref_is_percpu(ref, &percpu_count))
 		this_cpu_inc(*percpu_count);
 	else
 		atomic_long_inc(&ref->count);
@@ -149,7 +152,7 @@ static inline bool percpu_ref_tryget(struct percpu_ref *ref)
 
 	rcu_read_lock_sched();
 
-	if (__percpu_ref_alive(ref, &percpu_count)) {
+	if (__ref_is_percpu(ref, &percpu_count)) {
 		this_cpu_inc(*percpu_count);
 		ret = true;
 	} else {
@@ -183,7 +186,7 @@ static inline bool percpu_ref_tryget_live(struct percpu_ref *ref)
 
 	rcu_read_lock_sched();
 
-	if (__percpu_ref_alive(ref, &percpu_count)) {
+	if (__ref_is_percpu(ref, &percpu_count)) {
 		this_cpu_inc(*percpu_count);
 		ret = true;
 	}
@@ -208,7 +211,7 @@ static inline void percpu_ref_put(struct percpu_ref *ref)
 
 	rcu_read_lock_sched();
 
-	if (__percpu_ref_alive(ref, &percpu_count))
+	if (__ref_is_percpu(ref, &percpu_count))
 		this_cpu_dec(*percpu_count);
 	else if (unlikely(atomic_long_dec_and_test(&ref->count)))
 		ref->release(ref);
@@ -228,7 +231,7 @@ static inline bool percpu_ref_is_zero(struct percpu_ref *ref)
 {
 	unsigned long __percpu *percpu_count;
 
-	if (__percpu_ref_alive(ref, &percpu_count))
+	if (__ref_is_percpu(ref, &percpu_count))
 		return false;
 	return !atomic_long_read(&ref->count);
 }
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index 5aea6b7356c7..7aef590c1ef8 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -34,7 +34,7 @@
 static unsigned long __percpu *percpu_count_ptr(struct percpu_ref *ref)
 {
 	return (unsigned long __percpu *)
-		(ref->percpu_count_ptr & ~__PERCPU_REF_DEAD);
+		(ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC);
 }
 
 /**
@@ -80,7 +80,7 @@ void percpu_ref_exit(struct percpu_ref *ref)
 
 	if (percpu_count) {
 		free_percpu(percpu_count);
-		ref->percpu_count_ptr = __PERCPU_REF_DEAD;
+		ref->percpu_count_ptr = __PERCPU_REF_ATOMIC;
 	}
 }
 EXPORT_SYMBOL_GPL(percpu_ref_exit);
@@ -117,8 +117,8 @@ static void percpu_ref_kill_rcu(struct rcu_head *rcu)
 		  ref->release, atomic_long_read(&ref->count));
 
 	/* @ref is viewed as dead on all CPUs, send out kill confirmation */
-	if (ref->confirm_kill)
-		ref->confirm_kill(ref);
+	if (ref->confirm_switch)
+		ref->confirm_switch(ref);
 
 	/*
 	 * Now we're in single atomic_long_t mode with a consistent
@@ -145,11 +145,11 @@ static void percpu_ref_kill_rcu(struct rcu_head *rcu)
 void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
 				 percpu_ref_func_t *confirm_kill)
 {
-	WARN_ONCE(ref->percpu_count_ptr & __PERCPU_REF_DEAD,
+	WARN_ONCE(ref->percpu_count_ptr & __PERCPU_REF_ATOMIC,
 		  "%s called more than once on %pf!", __func__, ref->release);
 
-	ref->percpu_count_ptr |= __PERCPU_REF_DEAD;
-	ref->confirm_kill = confirm_kill;
+	ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC;
+	ref->confirm_switch = confirm_kill;
 
 	call_rcu_sched(&ref->rcu, percpu_ref_kill_rcu);
 }
@@ -178,14 +178,14 @@ void percpu_ref_reinit(struct percpu_ref *ref)
 
 	/*
 	 * Restore per-cpu operation.  smp_store_release() is paired with
-	 * smp_read_barrier_depends() in __percpu_ref_alive() and
-	 * guarantees that the zeroing is visible to all percpu accesses
-	 * which can see the following __PERCPU_REF_DEAD clearing.
+	 * smp_read_barrier_depends() in __ref_is_percpu() and guarantees
+	 * that the zeroing is visible to all percpu accesses which can see
+	 * the following __PERCPU_REF_ATOMIC clearing.
 	 */
 	for_each_possible_cpu(cpu)
 		*per_cpu_ptr(percpu_count, cpu) = 0;
 
 	smp_store_release(&ref->percpu_count_ptr,
-			  ref->percpu_count_ptr & ~__PERCPU_REF_DEAD);
+			  ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC);
 }
 EXPORT_SYMBOL_GPL(percpu_ref_reinit);
-- 
cgit v1.2.3


From 27344a9017cdaff82a167827da3001a0918afdc3 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 24 Sep 2014 13:31:49 -0400
Subject: percpu_ref: add PCPU_REF_DEAD

percpu_ref will be restructured so that percpu/atomic mode switching
and reference killing are dedoupled.  In preparation, add
PCPU_REF_DEAD and PCPU_REF_ATOMIC_DEAD which is OR of ATOMIC and DEAD.
For now, ATOMIC and DEAD are changed together and all PCPU_REF_ATOMIC
uses are converted to PCPU_REF_ATOMIC_DEAD without causing any
behavior changes.

percpu_ref_init() now specifies an explicit alignment when allocating
the percpu counters so that the pointer has enough unused low bits to
accomodate the flags.  Note that one flag was fine as min alignment
for percpu memory is 2 bytes but two flags are already too many for
the natural alignment of unsigned longs on archs like cris and m68k.

v2: The original patch had BUILD_BUG_ON() which triggers if unsigned
    long's alignment isn't enough to accomodate the flags, which
    triggered on cris and m64k.  percpu_ref_init() updated to specify
    the required alignment explicitly.  Reported by Fengguang.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Kent Overstreet <kmo@daterainc.com>
Cc: kbuild test robot <fengguang.wu@intel.com>
---
 include/linux/percpu-refcount.h |  6 +++++-
 lib/percpu-refcount.c           | 19 +++++++++++--------
 2 files changed, 16 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index 910e5f72055d..bd9483d390b4 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -57,6 +57,10 @@ typedef void (percpu_ref_func_t)(struct percpu_ref *);
 /* flags set in the lower bits of percpu_ref->percpu_count_ptr */
 enum {
 	__PERCPU_REF_ATOMIC	= 1LU << 0,	/* operating in atomic mode */
+	__PERCPU_REF_DEAD	= 1LU << 1,	/* (being) killed */
+	__PERCPU_REF_ATOMIC_DEAD = __PERCPU_REF_ATOMIC | __PERCPU_REF_DEAD,
+
+	__PERCPU_REF_FLAG_BITS	= 2,
 };
 
 struct percpu_ref {
@@ -107,7 +111,7 @@ static inline bool __ref_is_percpu(struct percpu_ref *ref,
 	/* paired with smp_store_release() in percpu_ref_reinit() */
 	smp_read_barrier_depends();
 
-	if (unlikely(percpu_ptr & __PERCPU_REF_ATOMIC))
+	if (unlikely(percpu_ptr & __PERCPU_REF_ATOMIC_DEAD))
 		return false;
 
 	*percpu_countp = (unsigned long __percpu *)percpu_ptr;
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index 7aef590c1ef8..e2ff19f970cf 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -34,7 +34,7 @@
 static unsigned long __percpu *percpu_count_ptr(struct percpu_ref *ref)
 {
 	return (unsigned long __percpu *)
-		(ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC);
+		(ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC_DEAD);
 }
 
 /**
@@ -52,10 +52,13 @@ static unsigned long __percpu *percpu_count_ptr(struct percpu_ref *ref)
 int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release,
 		    gfp_t gfp)
 {
+	size_t align = max_t(size_t, 1 << __PERCPU_REF_FLAG_BITS,
+			     __alignof__(unsigned long));
+
 	atomic_long_set(&ref->count, 1 + PERCPU_COUNT_BIAS);
 
-	ref->percpu_count_ptr =
-		(unsigned long)alloc_percpu_gfp(unsigned long, gfp);
+	ref->percpu_count_ptr = (unsigned long)
+		__alloc_percpu_gfp(sizeof(unsigned long), align, gfp);
 	if (!ref->percpu_count_ptr)
 		return -ENOMEM;
 
@@ -80,7 +83,7 @@ void percpu_ref_exit(struct percpu_ref *ref)
 
 	if (percpu_count) {
 		free_percpu(percpu_count);
-		ref->percpu_count_ptr = __PERCPU_REF_ATOMIC;
+		ref->percpu_count_ptr = __PERCPU_REF_ATOMIC_DEAD;
 	}
 }
 EXPORT_SYMBOL_GPL(percpu_ref_exit);
@@ -145,10 +148,10 @@ static void percpu_ref_kill_rcu(struct rcu_head *rcu)
 void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
 				 percpu_ref_func_t *confirm_kill)
 {
-	WARN_ONCE(ref->percpu_count_ptr & __PERCPU_REF_ATOMIC,
+	WARN_ONCE(ref->percpu_count_ptr & __PERCPU_REF_ATOMIC_DEAD,
 		  "%s called more than once on %pf!", __func__, ref->release);
 
-	ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC;
+	ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC_DEAD;
 	ref->confirm_switch = confirm_kill;
 
 	call_rcu_sched(&ref->rcu, percpu_ref_kill_rcu);
@@ -180,12 +183,12 @@ void percpu_ref_reinit(struct percpu_ref *ref)
 	 * Restore per-cpu operation.  smp_store_release() is paired with
 	 * smp_read_barrier_depends() in __ref_is_percpu() and guarantees
 	 * that the zeroing is visible to all percpu accesses which can see
-	 * the following __PERCPU_REF_ATOMIC clearing.
+	 * the following __PERCPU_REF_ATOMIC_DEAD clearing.
 	 */
 	for_each_possible_cpu(cpu)
 		*per_cpu_ptr(percpu_count, cpu) = 0;
 
 	smp_store_release(&ref->percpu_count_ptr,
-			  ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC);
+			  ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC_DEAD);
 }
 EXPORT_SYMBOL_GPL(percpu_ref_reinit);
-- 
cgit v1.2.3


From 490c79a65708873228cf114cf00e32c204e4e907 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 24 Sep 2014 13:31:49 -0400
Subject: percpu_ref: decouple switching to atomic mode and killing

percpu_ref has treated the dropping of the base reference and
switching to atomic mode as an integral operation; however, there's
nothing inherent tying the two together.

The use cases for percpu_ref have been expanding continuously.  While
the current init/kill/reinit/exit model can cover a lot, the coupling
of kill/reinit with atomic/percpu mode switching is turning out to be
too restrictive for use cases where many percpu_refs are created and
destroyed back-to-back with only some of them reaching extended
operation.  The coupling also makes implementing always-atomic debug
mode difficult.

This patch separates out atomic mode switching into
percpu_ref_switch_to_atomic() and reimplements
percpu_ref_kill_and_confirm() on top of it.

* The handling of __PERCPU_REF_ATOMIC and __PERCPU_REF_DEAD is now
  differentiated.  Among get/put operations, percpu_ref_tryget_live()
  is the only one which cares about DEAD.

* percpu_ref_switch_to_atomic() can be called multiple times on the
  same ref.  This means that multiple @confirm_switch may get queued
  up which we can't do reliably without extra memory area.  This is
  handled by making the later invocation synchronously wait for the
  completion of the previous one.  This isn't particularly desirable
  but such synchronous waits shouldn't happen in most cases.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Kent Overstreet <kmo@daterainc.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
---
 include/linux/percpu-refcount.h |   8 ++-
 lib/percpu-refcount.c           | 141 +++++++++++++++++++++++++++++++---------
 2 files changed, 116 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index bd9483d390b4..d1252e1335e8 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -78,9 +78,11 @@ struct percpu_ref {
 int __must_check percpu_ref_init(struct percpu_ref *ref,
 				 percpu_ref_func_t *release, gfp_t gfp);
 void percpu_ref_exit(struct percpu_ref *ref);
+void percpu_ref_switch_to_atomic(struct percpu_ref *ref,
+				 percpu_ref_func_t *confirm_switch);
+void percpu_ref_reinit(struct percpu_ref *ref);
 void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
 				 percpu_ref_func_t *confirm_kill);
-void percpu_ref_reinit(struct percpu_ref *ref);
 
 /**
  * percpu_ref_kill - drop the initial ref
@@ -111,7 +113,7 @@ static inline bool __ref_is_percpu(struct percpu_ref *ref,
 	/* paired with smp_store_release() in percpu_ref_reinit() */
 	smp_read_barrier_depends();
 
-	if (unlikely(percpu_ptr & __PERCPU_REF_ATOMIC_DEAD))
+	if (unlikely(percpu_ptr & __PERCPU_REF_ATOMIC))
 		return false;
 
 	*percpu_countp = (unsigned long __percpu *)percpu_ptr;
@@ -193,6 +195,8 @@ static inline bool percpu_ref_tryget_live(struct percpu_ref *ref)
 	if (__ref_is_percpu(ref, &percpu_count)) {
 		this_cpu_inc(*percpu_count);
 		ret = true;
+	} else if (!(ACCESS_ONCE(ref->percpu_count_ptr) & __PERCPU_REF_DEAD)) {
+		ret = atomic_long_inc_not_zero(&ref->count);
 	}
 
 	rcu_read_unlock_sched();
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index e2ff19f970cf..6e0d14366c5d 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -1,6 +1,8 @@
 #define pr_fmt(fmt) "%s: " fmt "\n", __func__
 
 #include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
 #include <linux/percpu-refcount.h>
 
 /*
@@ -31,6 +33,8 @@
 
 #define PERCPU_COUNT_BIAS	(1LU << (BITS_PER_LONG - 1))
 
+static DECLARE_WAIT_QUEUE_HEAD(percpu_ref_switch_waitq);
+
 static unsigned long __percpu *percpu_count_ptr(struct percpu_ref *ref)
 {
 	return (unsigned long __percpu *)
@@ -88,7 +92,19 @@ void percpu_ref_exit(struct percpu_ref *ref)
 }
 EXPORT_SYMBOL_GPL(percpu_ref_exit);
 
-static void percpu_ref_kill_rcu(struct rcu_head *rcu)
+static void percpu_ref_call_confirm_rcu(struct rcu_head *rcu)
+{
+	struct percpu_ref *ref = container_of(rcu, struct percpu_ref, rcu);
+
+	ref->confirm_switch(ref);
+	ref->confirm_switch = NULL;
+	wake_up_all(&percpu_ref_switch_waitq);
+
+	/* drop ref from percpu_ref_switch_to_atomic() */
+	percpu_ref_put(ref);
+}
+
+static void percpu_ref_switch_to_atomic_rcu(struct rcu_head *rcu)
 {
 	struct percpu_ref *ref = container_of(rcu, struct percpu_ref, rcu);
 	unsigned long __percpu *percpu_count = percpu_count_ptr(ref);
@@ -116,47 +132,79 @@ static void percpu_ref_kill_rcu(struct rcu_head *rcu)
 	atomic_long_add((long)count - PERCPU_COUNT_BIAS, &ref->count);
 
 	WARN_ONCE(atomic_long_read(&ref->count) <= 0,
-		  "percpu ref (%pf) <= 0 (%ld) after killed",
+		  "percpu ref (%pf) <= 0 (%ld) after switching to atomic",
 		  ref->release, atomic_long_read(&ref->count));
 
-	/* @ref is viewed as dead on all CPUs, send out kill confirmation */
-	if (ref->confirm_switch)
-		ref->confirm_switch(ref);
+	/* @ref is viewed as dead on all CPUs, send out switch confirmation */
+	percpu_ref_call_confirm_rcu(rcu);
+}
 
-	/*
-	 * Now we're in single atomic_long_t mode with a consistent
-	 * refcount, so it's safe to drop our initial ref:
-	 */
-	percpu_ref_put(ref);
+static void percpu_ref_noop_confirm_switch(struct percpu_ref *ref)
+{
+}
+
+static void __percpu_ref_switch_to_atomic(struct percpu_ref *ref,
+					  percpu_ref_func_t *confirm_switch)
+{
+	if (!(ref->percpu_count_ptr & __PERCPU_REF_ATOMIC)) {
+		/* switching from percpu to atomic */
+		ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC;
+
+		/*
+		 * Non-NULL ->confirm_switch is used to indicate that
+		 * switching is in progress.  Use noop one if unspecified.
+		 */
+		WARN_ON_ONCE(ref->confirm_switch);
+		ref->confirm_switch =
+			confirm_switch ?: percpu_ref_noop_confirm_switch;
+
+		percpu_ref_get(ref);	/* put after confirmation */
+		call_rcu_sched(&ref->rcu, percpu_ref_switch_to_atomic_rcu);
+	} else if (confirm_switch) {
+		/*
+		 * Somebody already set ATOMIC.  Switching may still be in
+		 * progress.  @confirm_switch must be invoked after the
+		 * switching is complete and a full sched RCU grace period
+		 * has passed.  Wait synchronously for the previous
+		 * switching and schedule @confirm_switch invocation.
+		 */
+		wait_event(percpu_ref_switch_waitq, !ref->confirm_switch);
+		ref->confirm_switch = confirm_switch;
+
+		percpu_ref_get(ref);	/* put after confirmation */
+		call_rcu_sched(&ref->rcu, percpu_ref_call_confirm_rcu);
+	}
 }
 
 /**
- * percpu_ref_kill_and_confirm - drop the initial ref and schedule confirmation
- * @ref: percpu_ref to kill
- * @confirm_kill: optional confirmation callback
+ * percpu_ref_switch_to_atomic - switch a percpu_ref to atomic mode
+ * @ref: percpu_ref to switch to atomic mode
+ * @confirm_switch: optional confirmation callback
  *
- * Equivalent to percpu_ref_kill() but also schedules kill confirmation if
- * @confirm_kill is not NULL.  @confirm_kill, which may not block, will be
- * called after @ref is seen as dead from all CPUs - all further
- * invocations of percpu_ref_tryget_live() will fail.  See
- * percpu_ref_tryget_live() for more details.
+ * There's no reason to use this function for the usual reference counting.
+ * Use percpu_ref_kill[_and_confirm]().
+ *
+ * Schedule switching of @ref to atomic mode.  All its percpu counts will
+ * be collected to the main atomic counter.  On completion, when all CPUs
+ * are guaraneed to be in atomic mode, @confirm_switch, which may not
+ * block, is invoked.  This function may be invoked concurrently with all
+ * the get/put operations and can safely be mixed with kill and reinit
+ * operations.
  *
- * Due to the way percpu_ref is implemented, @confirm_kill will be called
- * after at least one full RCU grace period has passed but this is an
- * implementation detail and callers must not depend on it.
+ * This function normally doesn't block and can be called from any context
+ * but it may block if @confirm_kill is specified and @ref is already in
+ * the process of switching to atomic mode.  In such cases, @confirm_switch
+ * will be invoked after the switching is complete.
+ *
+ * Due to the way percpu_ref is implemented, @confirm_switch will be called
+ * after at least one full sched RCU grace period has passed but this is an
+ * implementation detail and must not be depended upon.
  */
-void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
-				 percpu_ref_func_t *confirm_kill)
+void percpu_ref_switch_to_atomic(struct percpu_ref *ref,
+				 percpu_ref_func_t *confirm_switch)
 {
-	WARN_ONCE(ref->percpu_count_ptr & __PERCPU_REF_ATOMIC_DEAD,
-		  "%s called more than once on %pf!", __func__, ref->release);
-
-	ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC_DEAD;
-	ref->confirm_switch = confirm_kill;
-
-	call_rcu_sched(&ref->rcu, percpu_ref_kill_rcu);
+	__percpu_ref_switch_to_atomic(ref, confirm_switch);
 }
-EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm);
 
 /**
  * percpu_ref_reinit - re-initialize a percpu refcount
@@ -192,3 +240,34 @@ void percpu_ref_reinit(struct percpu_ref *ref)
 			  ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC_DEAD);
 }
 EXPORT_SYMBOL_GPL(percpu_ref_reinit);
+
+/**
+ * percpu_ref_kill_and_confirm - drop the initial ref and schedule confirmation
+ * @ref: percpu_ref to kill
+ * @confirm_kill: optional confirmation callback
+ *
+ * Equivalent to percpu_ref_kill() but also schedules kill confirmation if
+ * @confirm_kill is not NULL.  @confirm_kill, which may not block, will be
+ * called after @ref is seen as dead from all CPUs at which point all
+ * further invocations of percpu_ref_tryget_live() will fail.  See
+ * percpu_ref_tryget_live() for details.
+ *
+ * This function normally doesn't block and can be called from any context
+ * but it may block if @confirm_kill is specified and @ref is already in
+ * the process of switching to atomic mode by percpu_ref_switch_atomic().
+ *
+ * Due to the way percpu_ref is implemented, @confirm_switch will be called
+ * after at least one full sched RCU grace period has passed but this is an
+ * implementation detail and must not be depended upon.
+ */
+void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
+				 percpu_ref_func_t *confirm_kill)
+{
+	WARN_ONCE(ref->percpu_count_ptr & __PERCPU_REF_DEAD,
+		  "%s called more than once on %pf!", __func__, ref->release);
+
+	ref->percpu_count_ptr |= __PERCPU_REF_DEAD;
+	__percpu_ref_switch_to_atomic(ref, confirm_kill);
+	percpu_ref_put(ref);
+}
+EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm);
-- 
cgit v1.2.3


From f47ad45784611297b699f3dffb6c7222b76afe64 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 24 Sep 2014 13:31:49 -0400
Subject: percpu_ref: decouple switching to percpu mode and reinit

percpu_ref has treated the dropping of the base reference and
switching to atomic mode as an integral operation; however, there's
nothing inherent tying the two together.

The use cases for percpu_ref have been expanding continuously.  While
the current init/kill/reinit/exit model can cover a lot, the coupling
of kill/reinit with atomic/percpu mode switching is turning out to be
too restrictive for use cases where many percpu_refs are created and
destroyed back-to-back with only some of them reaching extended
operation.  The coupling also makes implementing always-atomic debug
mode difficult.

This patch separates out percpu mode switching into
percpu_ref_switch_to_percpu() and reimplements percpu_ref_reinit() on
top of it.

* DEAD still requires ATOMIC.  A dead ref can't be switched to percpu
  mode w/o going through reinit.

v2: __percpu_ref_switch_to_percpu() was missing static.  Fixed.
    Reported by Fengguang aka kbuild test robot.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Kent Overstreet <kmo@daterainc.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: kbuild test robot <fengguang.wu@intel.com>
---
 include/linux/percpu-refcount.h |  3 +-
 lib/percpu-refcount.c           | 73 ++++++++++++++++++++++++++++++-----------
 2 files changed, 56 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index d1252e1335e8..cd7e20f0fe47 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -80,9 +80,10 @@ int __must_check percpu_ref_init(struct percpu_ref *ref,
 void percpu_ref_exit(struct percpu_ref *ref);
 void percpu_ref_switch_to_atomic(struct percpu_ref *ref,
 				 percpu_ref_func_t *confirm_switch);
-void percpu_ref_reinit(struct percpu_ref *ref);
+void percpu_ref_switch_to_percpu(struct percpu_ref *ref);
 void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
 				 percpu_ref_func_t *confirm_kill);
+void percpu_ref_reinit(struct percpu_ref *ref);
 
 /**
  * percpu_ref_kill - drop the initial ref
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index 6e0d14366c5d..5a6d43baccc5 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -206,40 +206,54 @@ void percpu_ref_switch_to_atomic(struct percpu_ref *ref,
 	__percpu_ref_switch_to_atomic(ref, confirm_switch);
 }
 
-/**
- * percpu_ref_reinit - re-initialize a percpu refcount
- * @ref: perpcu_ref to re-initialize
- *
- * Re-initialize @ref so that it's in the same state as when it finished
- * percpu_ref_init().  @ref must have been initialized successfully, killed
- * and reached 0 but not exited.
- *
- * Note that percpu_ref_tryget[_live]() are safe to perform on @ref while
- * this function is in progress.
- */
-void percpu_ref_reinit(struct percpu_ref *ref)
+static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref)
 {
 	unsigned long __percpu *percpu_count = percpu_count_ptr(ref);
 	int cpu;
 
 	BUG_ON(!percpu_count);
-	WARN_ON_ONCE(!percpu_ref_is_zero(ref));
 
-	atomic_long_set(&ref->count, 1 + PERCPU_COUNT_BIAS);
+	if (!(ref->percpu_count_ptr & __PERCPU_REF_ATOMIC))
+		return;
+
+	wait_event(percpu_ref_switch_waitq, !ref->confirm_switch);
+
+	atomic_long_add(PERCPU_COUNT_BIAS, &ref->count);
 
 	/*
 	 * Restore per-cpu operation.  smp_store_release() is paired with
 	 * smp_read_barrier_depends() in __ref_is_percpu() and guarantees
 	 * that the zeroing is visible to all percpu accesses which can see
-	 * the following __PERCPU_REF_ATOMIC_DEAD clearing.
+	 * the following __PERCPU_REF_ATOMIC clearing.
 	 */
 	for_each_possible_cpu(cpu)
 		*per_cpu_ptr(percpu_count, cpu) = 0;
 
 	smp_store_release(&ref->percpu_count_ptr,
-			  ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC_DEAD);
+			  ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC);
+}
+
+/**
+ * percpu_ref_switch_to_percpu - switch a percpu_ref to percpu mode
+ * @ref: percpu_ref to switch to percpu mode
+ *
+ * There's no reason to use this function for the usual reference counting.
+ * To re-use an expired ref, use percpu_ref_reinit().
+ *
+ * Switch @ref to percpu mode.  This function may be invoked concurrently
+ * with all the get/put operations and can safely be mixed with kill and
+ * reinit operations.
+ *
+ * This function normally doesn't block and can be called from any context
+ * but it may block if @ref is in the process of switching to atomic mode
+ * by percpu_ref_switch_atomic().
+ */
+void percpu_ref_switch_to_percpu(struct percpu_ref *ref)
+{
+	/* a dying or dead ref can't be switched to percpu mode w/o reinit */
+	if (!(ref->percpu_count_ptr & __PERCPU_REF_DEAD))
+		__percpu_ref_switch_to_percpu(ref);
 }
-EXPORT_SYMBOL_GPL(percpu_ref_reinit);
 
 /**
  * percpu_ref_kill_and_confirm - drop the initial ref and schedule confirmation
@@ -253,8 +267,8 @@ EXPORT_SYMBOL_GPL(percpu_ref_reinit);
  * percpu_ref_tryget_live() for details.
  *
  * This function normally doesn't block and can be called from any context
- * but it may block if @confirm_kill is specified and @ref is already in
- * the process of switching to atomic mode by percpu_ref_switch_atomic().
+ * but it may block if @confirm_kill is specified and @ref is in the
+ * process of switching to atomic mode by percpu_ref_switch_atomic().
  *
  * Due to the way percpu_ref is implemented, @confirm_switch will be called
  * after at least one full sched RCU grace period has passed but this is an
@@ -271,3 +285,24 @@ void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
 	percpu_ref_put(ref);
 }
 EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm);
+
+/**
+ * percpu_ref_reinit - re-initialize a percpu refcount
+ * @ref: perpcu_ref to re-initialize
+ *
+ * Re-initialize @ref so that it's in the same state as when it finished
+ * percpu_ref_init().  @ref must have been initialized successfully and
+ * reached 0 but not exited.
+ *
+ * Note that percpu_ref_tryget[_live]() are safe to perform on @ref while
+ * this function is in progress.
+ */
+void percpu_ref_reinit(struct percpu_ref *ref)
+{
+	WARN_ON_ONCE(!percpu_ref_is_zero(ref));
+
+	ref->percpu_count_ptr &= ~__PERCPU_REF_DEAD;
+	percpu_ref_get(ref);
+	__percpu_ref_switch_to_percpu(ref);
+}
+EXPORT_SYMBOL_GPL(percpu_ref_reinit);
-- 
cgit v1.2.3


From 2aad2a86f6685c10360ec8a5a55eb9ab7059cb72 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 24 Sep 2014 13:31:50 -0400
Subject: percpu_ref: add PERCPU_REF_INIT_* flags

With the recent addition of percpu_ref_reinit(), percpu_ref now can be
used as a persistent switch which can be turned on and off repeatedly
where turning off maps to killing the ref and waiting for it to drain;
however, there currently isn't a way to initialize a percpu_ref in its
off (killed and drained) state, which can be inconvenient for certain
persistent switch use cases.

Similarly, percpu_ref_switch_to_atomic/percpu() allow dynamic
selection of operation mode; however, currently a newly initialized
percpu_ref is always in percpu mode making it impossible to avoid the
latency overhead of switching to atomic mode.

This patch adds @flags to percpu_ref_init() and implements the
following flags.

* PERCPU_REF_INIT_ATOMIC	: start ref in atomic mode
* PERCPU_REF_INIT_DEAD		: start ref killed and drained

These flags should be able to serve the above two use cases.

v2: target_core_tpg.c conversion was missing.  Fixed.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Kent Overstreet <kmo@daterainc.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
---
 block/blk-mq.c                   |  2 +-
 drivers/target/target_core_tpg.c |  2 +-
 fs/aio.c                         |  4 ++--
 include/linux/percpu-refcount.h  | 18 +++++++++++++++++-
 kernel/cgroup.c                  |  7 ++++---
 lib/percpu-refcount.c            | 23 ++++++++++++++++++-----
 6 files changed, 43 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 44a78ae3f899..d85fe01c44ef 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1796,7 +1796,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
 		goto err_hctxs;
 
 	if (percpu_ref_init(&q->mq_usage_counter, blk_mq_usage_counter_release,
-			    GFP_KERNEL))
+			    0, GFP_KERNEL))
 		goto err_map;
 
 	setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q);
diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c
index 4ab6da338585..be783f717f19 100644
--- a/drivers/target/target_core_tpg.c
+++ b/drivers/target/target_core_tpg.c
@@ -819,7 +819,7 @@ int core_tpg_add_lun(
 {
 	int ret;
 
-	ret = percpu_ref_init(&lun->lun_ref, core_tpg_lun_ref_release,
+	ret = percpu_ref_init(&lun->lun_ref, core_tpg_lun_ref_release, 0,
 			      GFP_KERNEL);
 	if (ret < 0)
 		return ret;
diff --git a/fs/aio.c b/fs/aio.c
index 8d217ed04e6e..84a751005f5b 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -661,10 +661,10 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
 
 	INIT_LIST_HEAD(&ctx->active_reqs);
 
-	if (percpu_ref_init(&ctx->users, free_ioctx_users, GFP_KERNEL))
+	if (percpu_ref_init(&ctx->users, free_ioctx_users, 0, GFP_KERNEL))
 		goto err;
 
-	if (percpu_ref_init(&ctx->reqs, free_ioctx_reqs, GFP_KERNEL))
+	if (percpu_ref_init(&ctx->reqs, free_ioctx_reqs, 0, GFP_KERNEL))
 		goto err;
 
 	ctx->cpu = alloc_percpu(struct kioctx_cpu);
diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index cd7e20f0fe47..b0293f268cd2 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -63,6 +63,21 @@ enum {
 	__PERCPU_REF_FLAG_BITS	= 2,
 };
 
+/* @flags for percpu_ref_init() */
+enum {
+	/*
+	 * Start w/ ref == 1 in atomic mode.  Can be switched to percpu
+	 * operation using percpu_ref_switch_to_percpu().
+	 */
+	PERCPU_REF_INIT_ATOMIC	= 1 << 0,
+
+	/*
+	 * Start dead w/ ref == 0 in atomic mode.  Must be revived with
+	 * percpu_ref_reinit() before used.  Implies INIT_ATOMIC.
+	 */
+	PERCPU_REF_INIT_DEAD	= 1 << 1,
+};
+
 struct percpu_ref {
 	atomic_long_t		count;
 	/*
@@ -76,7 +91,8 @@ struct percpu_ref {
 };
 
 int __must_check percpu_ref_init(struct percpu_ref *ref,
-				 percpu_ref_func_t *release, gfp_t gfp);
+				 percpu_ref_func_t *release, unsigned int flags,
+				 gfp_t gfp);
 void percpu_ref_exit(struct percpu_ref *ref);
 void percpu_ref_switch_to_atomic(struct percpu_ref *ref,
 				 percpu_ref_func_t *confirm_switch);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index a99d504294de..753df01a9831 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1634,7 +1634,8 @@ static int cgroup_setup_root(struct cgroup_root *root, unsigned int ss_mask)
 		goto out;
 	root_cgrp->id = ret;
 
-	ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release, GFP_KERNEL);
+	ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release, 0,
+			      GFP_KERNEL);
 	if (ret)
 		goto out;
 
@@ -4510,7 +4511,7 @@ static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss,
 
 	init_and_link_css(css, ss, cgrp);
 
-	err = percpu_ref_init(&css->refcnt, css_release, GFP_KERNEL);
+	err = percpu_ref_init(&css->refcnt, css_release, 0, GFP_KERNEL);
 	if (err)
 		goto err_free_css;
 
@@ -4583,7 +4584,7 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
 		goto out_unlock;
 	}
 
-	ret = percpu_ref_init(&cgrp->self.refcnt, css_release, GFP_KERNEL);
+	ret = percpu_ref_init(&cgrp->self.refcnt, css_release, 0, GFP_KERNEL);
 	if (ret)
 		goto out_free_cgrp;
 
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index 5a6d43baccc5..ed280fb1e5b5 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -45,27 +45,40 @@ static unsigned long __percpu *percpu_count_ptr(struct percpu_ref *ref)
  * percpu_ref_init - initialize a percpu refcount
  * @ref: percpu_ref to initialize
  * @release: function which will be called when refcount hits 0
+ * @flags: PERCPU_REF_INIT_* flags
  * @gfp: allocation mask to use
  *
- * Initializes the refcount in single atomic counter mode with a refcount of 1;
- * analagous to atomic_long_set(ref, 1).
+ * Initializes @ref.  If @flags is zero, @ref starts in percpu mode with a
+ * refcount of 1; analagous to atomic_long_set(ref, 1).  See the
+ * definitions of PERCPU_REF_INIT_* flags for flag behaviors.
  *
  * Note that @release must not sleep - it may potentially be called from RCU
  * callback context by percpu_ref_kill().
  */
 int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release,
-		    gfp_t gfp)
+		    unsigned int flags, gfp_t gfp)
 {
 	size_t align = max_t(size_t, 1 << __PERCPU_REF_FLAG_BITS,
 			     __alignof__(unsigned long));
-
-	atomic_long_set(&ref->count, 1 + PERCPU_COUNT_BIAS);
+	unsigned long start_count = 0;
 
 	ref->percpu_count_ptr = (unsigned long)
 		__alloc_percpu_gfp(sizeof(unsigned long), align, gfp);
 	if (!ref->percpu_count_ptr)
 		return -ENOMEM;
 
+	if (flags & (PERCPU_REF_INIT_ATOMIC | PERCPU_REF_INIT_DEAD))
+		ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC;
+	else
+		start_count += PERCPU_COUNT_BIAS;
+
+	if (flags & PERCPU_REF_INIT_DEAD)
+		ref->percpu_count_ptr |= __PERCPU_REF_DEAD;
+	else
+		start_count++;
+
+	atomic_long_set(&ref->count, start_count);
+
 	ref->release = release;
 	return 0;
 }
-- 
cgit v1.2.3


From 1cae13e75b7a7848c03138636d4eb8d8a5054dd5 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 24 Sep 2014 13:31:50 -0400
Subject: percpu_ref: make INIT_ATOMIC and switch_to_atomic() sticky

Currently, a percpu_ref which is initialized with
PERPCU_REF_INIT_ATOMIC or switched to atomic mode via
switch_to_atomic() automatically reverts to percpu mode on the first
percpu_ref_reinit().  This makes the atomic mode difficult to use for
cases where a percpu_ref is used as a persistent on/off switch which
may be cycled multiple times.

This patch makes such atomic state sticky so that it survives through
kill/reinit cycles.  After this patch, atomic state is cleared only by
an explicit percpu_ref_switch_to_percpu() call.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Kent Overstreet <kmo@daterainc.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
---
 include/linux/percpu-refcount.h |  5 ++++-
 lib/percpu-refcount.c           | 20 +++++++++++++++-----
 2 files changed, 19 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index b0293f268cd2..00c01fc6d88c 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -67,7 +67,9 @@ enum {
 enum {
 	/*
 	 * Start w/ ref == 1 in atomic mode.  Can be switched to percpu
-	 * operation using percpu_ref_switch_to_percpu().
+	 * operation using percpu_ref_switch_to_percpu().  If initialized
+	 * with this flag, the ref will stay in atomic mode until
+	 * percpu_ref_switch_to_percpu() is invoked on it.
 	 */
 	PERCPU_REF_INIT_ATOMIC	= 1 << 0,
 
@@ -87,6 +89,7 @@ struct percpu_ref {
 	unsigned long		percpu_count_ptr;
 	percpu_ref_func_t	*release;
 	percpu_ref_func_t	*confirm_switch;
+	bool			force_atomic:1;
 	struct rcu_head		rcu;
 };
 
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index ed280fb1e5b5..6111bcb28376 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -67,6 +67,8 @@ int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release,
 	if (!ref->percpu_count_ptr)
 		return -ENOMEM;
 
+	ref->force_atomic = flags & PERCPU_REF_INIT_ATOMIC;
+
 	if (flags & (PERCPU_REF_INIT_ATOMIC | PERCPU_REF_INIT_DEAD))
 		ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC;
 	else
@@ -202,7 +204,8 @@ static void __percpu_ref_switch_to_atomic(struct percpu_ref *ref,
  * are guaraneed to be in atomic mode, @confirm_switch, which may not
  * block, is invoked.  This function may be invoked concurrently with all
  * the get/put operations and can safely be mixed with kill and reinit
- * operations.
+ * operations.  Note that @ref will stay in atomic mode across kill/reinit
+ * cycles until percpu_ref_switch_to_percpu() is called.
  *
  * This function normally doesn't block and can be called from any context
  * but it may block if @confirm_kill is specified and @ref is already in
@@ -216,6 +219,7 @@ static void __percpu_ref_switch_to_atomic(struct percpu_ref *ref,
 void percpu_ref_switch_to_atomic(struct percpu_ref *ref,
 				 percpu_ref_func_t *confirm_switch)
 {
+	ref->force_atomic = true;
 	__percpu_ref_switch_to_atomic(ref, confirm_switch);
 }
 
@@ -255,7 +259,10 @@ static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref)
  *
  * Switch @ref to percpu mode.  This function may be invoked concurrently
  * with all the get/put operations and can safely be mixed with kill and
- * reinit operations.
+ * reinit operations.  This function reverses the sticky atomic state set
+ * by PERCPU_REF_INIT_ATOMIC or percpu_ref_switch_to_atomic().  If @ref is
+ * dying or dead, the actual switching takes place on the following
+ * percpu_ref_reinit().
  *
  * This function normally doesn't block and can be called from any context
  * but it may block if @ref is in the process of switching to atomic mode
@@ -263,6 +270,8 @@ static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref)
  */
 void percpu_ref_switch_to_percpu(struct percpu_ref *ref)
 {
+	ref->force_atomic = false;
+
 	/* a dying or dead ref can't be switched to percpu mode w/o reinit */
 	if (!(ref->percpu_count_ptr & __PERCPU_REF_DEAD))
 		__percpu_ref_switch_to_percpu(ref);
@@ -304,8 +313,8 @@ EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm);
  * @ref: perpcu_ref to re-initialize
  *
  * Re-initialize @ref so that it's in the same state as when it finished
- * percpu_ref_init().  @ref must have been initialized successfully and
- * reached 0 but not exited.
+ * percpu_ref_init() ignoring %PERCPU_REF_INIT_DEAD.  @ref must have been
+ * initialized successfully and reached 0 but not exited.
  *
  * Note that percpu_ref_tryget[_live]() are safe to perform on @ref while
  * this function is in progress.
@@ -316,6 +325,7 @@ void percpu_ref_reinit(struct percpu_ref *ref)
 
 	ref->percpu_count_ptr &= ~__PERCPU_REF_DEAD;
 	percpu_ref_get(ref);
-	__percpu_ref_switch_to_percpu(ref);
+	if (!ref->force_atomic)
+		__percpu_ref_switch_to_percpu(ref);
 }
 EXPORT_SYMBOL_GPL(percpu_ref_reinit);
-- 
cgit v1.2.3


From 17497acbdce9506fd6a75115dee4ab80c3cc5ee5 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 24 Sep 2014 13:31:50 -0400
Subject: blk-mq, percpu_ref: start q->mq_usage_counter in atomic mode

blk-mq uses percpu_ref for its usage counter which tracks the number
of in-flight commands and used to synchronously drain the queue on
freeze.  percpu_ref shutdown takes measureable wallclock time as it
involves a sched RCU grace period.  This means that draining a blk-mq
takes measureable wallclock time.  One would think that this shouldn't
matter as queue shutdown should be a rare event which takes place
asynchronously w.r.t. userland.

Unfortunately, SCSI probing involves synchronously setting up and then
tearing down a lot of request_queues back-to-back for non-existent
LUNs.  This means that SCSI probing may take above ten seconds when
scsi-mq is used.

  [    0.949892] scsi host0: Virtio SCSI HBA
  [    1.007864] scsi 0:0:0:0: Direct-Access     QEMU     QEMU HARDDISK    1.1. PQ: 0 ANSI: 5
  [    1.021299] scsi 0:0:1:0: Direct-Access     QEMU     QEMU HARDDISK    1.1. PQ: 0 ANSI: 5
  [    1.520356] tsc: Refined TSC clocksource calibration: 2491.910 MHz

  <stall>

  [   16.186549] sd 0:0:0:0: Attached scsi generic sg0 type 0
  [   16.190478] sd 0:0:1:0: Attached scsi generic sg1 type 0
  [   16.194099] osd: LOADED open-osd 0.2.1
  [   16.203202] sd 0:0:0:0: [sda] 31457280 512-byte logical blocks: (16.1 GB/15.0 GiB)
  [   16.208478] sd 0:0:0:0: [sda] Write Protect is off
  [   16.211439] sd 0:0:0:0: [sda] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
  [   16.218771] sd 0:0:1:0: [sdb] 31457280 512-byte logical blocks: (16.1 GB/15.0 GiB)
  [   16.223264] sd 0:0:1:0: [sdb] Write Protect is off
  [   16.225682] sd 0:0:1:0: [sdb] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA

This is also the reason why request_queues start in bypass mode which
is ended on blk_register_queue() as shutting down a fully functional
queue also involves a RCU grace period and the queues for non-existent
SCSI devices never reach registration.

blk-mq basically needs to do the same thing - start the mq in a
degraded mode which is faster to shut down and then make it fully
functional only after the queue reaches registration.  percpu_ref
recently grew facilities to force atomic operation until explicitly
switched to percpu mode, which can be used for this purpose.  This
patch makes blk-mq initialize q->mq_usage_counter in atomic mode and
switch it to percpu mode only once blk_register_queue() is reached.

Note that this issue was previously worked around by 0a30288da1ae
("blk-mq, percpu_ref: implement a kludge for SCSI blk-mq stall during
probe") for v3.17.  The temp fix was reverted in preparation of adding
persistent atomic mode to percpu_ref by 9eca80461a45 ("Revert "blk-mq,
percpu_ref: implement a kludge for SCSI blk-mq stall during probe"").
This patch and the prerequisite percpu_ref changes will be merged
during v3.18 devel cycle.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Christoph Hellwig <hch@infradead.org>
Link: http://lkml.kernel.org/g/20140919113815.GA10791@lst.de
Fixes: add703fda981 ("blk-mq: use percpu_ref for mq usage count")
Reviewed-by: Kent Overstreet <kmo@daterainc.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Johannes Weiner <hannes@cmpxchg.org>
---
 block/blk-mq-sysfs.c   |  6 ++++++
 block/blk-mq.c         |  6 +++++-
 block/blk-sysfs.c      | 11 +++++++++--
 include/linux/blk-mq.h |  1 +
 4 files changed, 21 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index ed5217867555..371d8800b48a 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -402,6 +402,12 @@ static void blk_mq_sysfs_init(struct request_queue *q)
 	}
 }
 
+/* see blk_register_queue() */
+void blk_mq_finish_init(struct request_queue *q)
+{
+	percpu_ref_switch_to_percpu(&q->mq_usage_counter);
+}
+
 int blk_mq_register_disk(struct gendisk *disk)
 {
 	struct device *dev = disk_to_dev(disk);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index d85fe01c44ef..38f4a165640d 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1795,8 +1795,12 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
 	if (!q)
 		goto err_hctxs;
 
+	/*
+	 * Init percpu_ref in atomic mode so that it's faster to shutdown.
+	 * See blk_register_queue() for details.
+	 */
 	if (percpu_ref_init(&q->mq_usage_counter, blk_mq_usage_counter_release,
-			    0, GFP_KERNEL))
+			    PERCPU_REF_INIT_ATOMIC, GFP_KERNEL))
 		goto err_map;
 
 	setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q);
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 17f5c84ce7bf..521ae9089c50 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -551,12 +551,19 @@ int blk_register_queue(struct gendisk *disk)
 		return -ENXIO;
 
 	/*
-	 * Initialization must be complete by now.  Finish the initial
-	 * bypass from queue allocation.
+	 * SCSI probing may synchronously create and destroy a lot of
+	 * request_queues for non-existent devices.  Shutting down a fully
+	 * functional queue takes measureable wallclock time as RCU grace
+	 * periods are involved.  To avoid excessive latency in these
+	 * cases, a request_queue starts out in a degraded mode which is
+	 * faster to shut down and is made fully functional here as
+	 * request_queues for non-existent devices never get registered.
 	 */
 	if (!blk_queue_init_done(q)) {
 		queue_flag_set_unlocked(QUEUE_FLAG_INIT_DONE, q);
 		blk_queue_bypass_end(q);
+		if (q->mq_ops)
+			blk_mq_finish_init(q);
 	}
 
 	ret = blk_trace_init_sysfs(dev);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index a1e31f274fcd..c13a0c09faea 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -140,6 +140,7 @@ enum {
 };
 
 struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *);
+void blk_mq_finish_init(struct request_queue *q);
 int blk_mq_register_disk(struct gendisk *);
 void blk_mq_unregister_disk(struct gendisk *);
 
-- 
cgit v1.2.3


From 94e57fea62020dbf6e5d0093eabcd28366e86044 Mon Sep 17 00:00:00 2001
From: Francesco Ruggeri <fruggeri@arista.com>
Date: Wed, 24 Sep 2014 10:12:41 -0700
Subject: PCI: Move PCI_VENDOR_ID_VMWARE to pci_ids.h

Move PCI_VENDOR_ID_VMWARE from device-specific files to pci_ids.h.
It is useful to always have access to it, especially when accessing
subsystem_vendor_id on emulated devices.

[bhelgaas: keep pci_ids.h sorted and use lower-case hex]
Signed-off-by: Francesco Ruggeri <fruggeri@arista.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/gpu/drm/vmwgfx/svga_reg.h  | 1 -
 drivers/misc/vmw_vmci/vmci_guest.c | 1 -
 drivers/net/vmxnet3/vmxnet3_int.h  | 1 -
 drivers/scsi/vmw_pvscsi.h          | 1 -
 include/linux/pci_ids.h            | 2 ++
 5 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/drm/vmwgfx/svga_reg.h b/drivers/gpu/drm/vmwgfx/svga_reg.h
index 11323dd5196f..e4259c2c1acc 100644
--- a/drivers/gpu/drm/vmwgfx/svga_reg.h
+++ b/drivers/gpu/drm/vmwgfx/svga_reg.h
@@ -35,7 +35,6 @@
 /*
  * PCI device IDs.
  */
-#define PCI_VENDOR_ID_VMWARE            0x15AD
 #define PCI_DEVICE_ID_VMWARE_SVGA2      0x0405
 
 /*
diff --git a/drivers/misc/vmw_vmci/vmci_guest.c b/drivers/misc/vmw_vmci/vmci_guest.c
index 248399a881af..189b32519748 100644
--- a/drivers/misc/vmw_vmci/vmci_guest.c
+++ b/drivers/misc/vmw_vmci/vmci_guest.c
@@ -35,7 +35,6 @@
 #include "vmci_driver.h"
 #include "vmci_event.h"
 
-#define PCI_VENDOR_ID_VMWARE		0x15AD
 #define PCI_DEVICE_ID_VMWARE_VMCI	0x0740
 
 #define VMCI_UTIL_NUM_RESOURCES 1
diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h
index 29ee77f2c97f..c388ef509d66 100644
--- a/drivers/net/vmxnet3/vmxnet3_int.h
+++ b/drivers/net/vmxnet3/vmxnet3_int.h
@@ -117,7 +117,6 @@ enum {
 /*
  * PCI vendor and device IDs.
  */
-#define PCI_VENDOR_ID_VMWARE            0x15AD
 #define PCI_DEVICE_ID_VMWARE_VMXNET3    0x07B0
 #define MAX_ETHERNET_CARDS		10
 #define MAX_PCI_PASSTHRU_DEVICE		6
diff --git a/drivers/scsi/vmw_pvscsi.h b/drivers/scsi/vmw_pvscsi.h
index ce4588851274..ee16f0c5c47d 100644
--- a/drivers/scsi/vmw_pvscsi.h
+++ b/drivers/scsi/vmw_pvscsi.h
@@ -32,7 +32,6 @@
 
 #define MASK(n)        ((1 << (n)) - 1)        /* make an n-bit mask */
 
-#define PCI_VENDOR_ID_VMWARE		0x15AD
 #define PCI_DEVICE_ID_VMWARE_PVSCSI	0x07C0
 
 /*
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 6ed0bb73a864..da9e6f753196 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2245,6 +2245,8 @@
 #define PCI_VENDOR_ID_MORETON		0x15aa
 #define PCI_DEVICE_ID_RASTEL_2PORT	0x2000
 
+#define PCI_VENDOR_ID_VMWARE		0x15ad
+
 #define PCI_VENDOR_ID_ZOLTRIX		0x15b0
 #define PCI_DEVICE_ID_ZOLTRIX_2BD0	0x2bd0
 
-- 
cgit v1.2.3


From b2fc3f3c6d397d434174147eca3db1ec778195ce Mon Sep 17 00:00:00 2001
From: Olof Johansson <olof@lixom.net>
Date: Wed, 24 Sep 2014 11:42:38 -0700
Subject: drivers/soc: ti: fix build break with modules

Fixes below build break by not switching to stubs when the driver is a module:

drivers/soc/ti/knav_dma.c:418:7: error: redefinition of 'knav_dma_open_channel'
 void *knav_dma_open_channel(struct device *dev, const char *name,
       ^
In file included from drivers/soc/ti/knav_dma.c:26:0:
include/linux/soc/ti/knav_dma.h:165:21: note: previous definition of 'knav_dma_open_channel' was here
 static inline void *knav_dma_open_channel(struct device *dev, const char *name,
                     ^

Cc: Santosh Shilimkar <santosh.shilimkar@ti.com>
Signed-off-by: Olof Johansson <olof@lixom.net>
---
 include/linux/soc/ti/knav_dma.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/soc/ti/knav_dma.h b/include/linux/soc/ti/knav_dma.h
index e864a3eb9ac4..dad035c16d94 100644
--- a/include/linux/soc/ti/knav_dma.h
+++ b/include/linux/soc/ti/knav_dma.h
@@ -157,7 +157,7 @@ struct knav_dma_desc {
 	u32	pad[4];
 } ____cacheline_aligned;
 
-#ifdef CONFIG_KEYSTONE_NAVIGATOR_DMA
+#if IS_ENABLED(CONFIG_KEYSTONE_NAVIGATOR_DMA)
 void *knav_dma_open_channel(struct device *dev, const char *name,
 				struct knav_dma_cfg *config);
 void knav_dma_close_channel(void *channel);
-- 
cgit v1.2.3


From 7990da71ebfa887ae6fe4464ab0d99ddeb8efacc Mon Sep 17 00:00:00 2001
From: Tomeu Vizoso <tomeu.vizoso@collabora.com>
Date: Wed, 3 Sep 2014 17:49:32 +0200
Subject: PM / QoS: Add PM_QOS_MEMORY_BANDWIDTH class

Also adds a class type PM_QOS_SUM that aggregates the values by summing them.

It can be used by memory controllers to calculate the optimum clock frequency
based on the bandwidth needs of the different memory clients.

Signed-off-by: Tomeu Vizoso <tomeu.vizoso@collabora.com>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/power/pm_qos_interface.txt |  4 +++-
 include/linux/pm_qos.h                   |  5 ++++-
 kernel/power/qos.c                       | 27 ++++++++++++++++++++++++++-
 3 files changed, 33 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/power/pm_qos_interface.txt b/Documentation/power/pm_qos_interface.txt
index a5da5c7e7128..129f7c0e1483 100644
--- a/Documentation/power/pm_qos_interface.txt
+++ b/Documentation/power/pm_qos_interface.txt
@@ -5,7 +5,8 @@ performance expectations by drivers, subsystems and user space applications on
 one of the parameters.
 
 Two different PM QoS frameworks are available:
-1. PM QoS classes for cpu_dma_latency, network_latency, network_throughput.
+1. PM QoS classes for cpu_dma_latency, network_latency, network_throughput,
+memory_bandwidth.
 2. the per-device PM QoS framework provides the API to manage the per-device latency
 constraints and PM QoS flags.
 
@@ -13,6 +14,7 @@ Each parameters have defined units:
  * latency: usec
  * timeout: usec
  * throughput: kbs (kilo bit / sec)
+ * memory bandwidth: mbs (mega bit / sec)
 
 
 1. PM QoS framework
diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h
index 9ab4bf7c4646..636e82834506 100644
--- a/include/linux/pm_qos.h
+++ b/include/linux/pm_qos.h
@@ -15,6 +15,7 @@ enum {
 	PM_QOS_CPU_DMA_LATENCY,
 	PM_QOS_NETWORK_LATENCY,
 	PM_QOS_NETWORK_THROUGHPUT,
+	PM_QOS_MEMORY_BANDWIDTH,
 
 	/* insert new class ID */
 	PM_QOS_NUM_CLASSES,
@@ -32,6 +33,7 @@ enum pm_qos_flags_status {
 #define PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE	(2000 * USEC_PER_SEC)
 #define PM_QOS_NETWORK_LAT_DEFAULT_VALUE	(2000 * USEC_PER_SEC)
 #define PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE	0
+#define PM_QOS_MEMORY_BANDWIDTH_DEFAULT_VALUE	0
 #define PM_QOS_RESUME_LATENCY_DEFAULT_VALUE	0
 #define PM_QOS_LATENCY_TOLERANCE_DEFAULT_VALUE	0
 #define PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT	(-1)
@@ -69,7 +71,8 @@ struct dev_pm_qos_request {
 enum pm_qos_type {
 	PM_QOS_UNITIALIZED,
 	PM_QOS_MAX,		/* return the largest value */
-	PM_QOS_MIN		/* return the smallest value */
+	PM_QOS_MIN,		/* return the smallest value */
+	PM_QOS_SUM		/* return the sum */
 };
 
 /*
diff --git a/kernel/power/qos.c b/kernel/power/qos.c
index 884b77058864..5f4c006c4b1e 100644
--- a/kernel/power/qos.c
+++ b/kernel/power/qos.c
@@ -105,11 +105,27 @@ static struct pm_qos_object network_throughput_pm_qos = {
 };
 
 
+static BLOCKING_NOTIFIER_HEAD(memory_bandwidth_notifier);
+static struct pm_qos_constraints memory_bw_constraints = {
+	.list = PLIST_HEAD_INIT(memory_bw_constraints.list),
+	.target_value = PM_QOS_MEMORY_BANDWIDTH_DEFAULT_VALUE,
+	.default_value = PM_QOS_MEMORY_BANDWIDTH_DEFAULT_VALUE,
+	.no_constraint_value = PM_QOS_MEMORY_BANDWIDTH_DEFAULT_VALUE,
+	.type = PM_QOS_SUM,
+	.notifiers = &memory_bandwidth_notifier,
+};
+static struct pm_qos_object memory_bandwidth_pm_qos = {
+	.constraints = &memory_bw_constraints,
+	.name = "memory_bandwidth",
+};
+
+
 static struct pm_qos_object *pm_qos_array[] = {
 	&null_pm_qos,
 	&cpu_dma_pm_qos,
 	&network_lat_pm_qos,
-	&network_throughput_pm_qos
+	&network_throughput_pm_qos,
+	&memory_bandwidth_pm_qos,
 };
 
 static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf,
@@ -130,6 +146,9 @@ static const struct file_operations pm_qos_power_fops = {
 /* unlocked internal variant */
 static inline int pm_qos_get_value(struct pm_qos_constraints *c)
 {
+	struct plist_node *node;
+	int total_value = 0;
+
 	if (plist_head_empty(&c->list))
 		return c->no_constraint_value;
 
@@ -140,6 +159,12 @@ static inline int pm_qos_get_value(struct pm_qos_constraints *c)
 	case PM_QOS_MAX:
 		return plist_last(&c->list)->prio;
 
+	case PM_QOS_SUM:
+		plist_for_each(node, &c->list)
+			total_value += node->prio;
+
+		return total_value;
+
 	default:
 		/* runtime check for not using enum */
 		BUG();
-- 
cgit v1.2.3


From a2b86f772227bcaf962c8b134f8d187046ac5f0e Mon Sep 17 00:00:00 2001
From: Zefan Li <lizefan@huawei.com>
Date: Thu, 25 Sep 2014 09:40:17 +0800
Subject: sched: fix confusing PFA_NO_NEW_PRIVS constant

Commit 1d4457f99928 ("sched: move no_new_privs into new atomic flags")
defined PFA_NO_NEW_PRIVS as hexadecimal value, but it is confusing
because it is used as bit number. Redefine it as decimal bit number.

Note this changes the bit position of PFA_NOW_NEW_PRIVS from 1 to 0.

Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Miao Xie <miaox@cn.fujitsu.com>
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Acked-by: Kees Cook <keescook@chromium.org>
[ lizf: slightly modified subject and changelog ]
Signed-off-by: Zefan Li <lizefan@huawei.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/sched.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5c2c885ee52b..45577650f629 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1957,7 +1957,7 @@ static inline void memalloc_noio_restore(unsigned int flags)
 }
 
 /* Per-process atomic flags. */
-#define PFA_NO_NEW_PRIVS 0x00000001	/* May not gain new privileges. */
+#define PFA_NO_NEW_PRIVS 0	/* May not gain new privileges. */
 
 static inline bool task_no_new_privs(struct task_struct *p)
 {
-- 
cgit v1.2.3


From e0e5070b20e01f0321f97db4e4e174f3f6b49e50 Mon Sep 17 00:00:00 2001
From: Zefan Li <lizefan@huawei.com>
Date: Thu, 25 Sep 2014 09:40:40 +0800
Subject: sched: add macros to define bitops for task atomic flags

This will simplify code when we add new flags.

v3:
- Kees pointed out that no_new_privs should never be cleared, so we
shouldn't define task_clear_no_new_privs(). we define 3 macros instead
of a single one.

v2:
- updated scripts/tags.sh, suggested by Peter

Cc: Ingo Molnar <mingo@kernel.org>
Cc: Miao Xie <miaox@cn.fujitsu.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Zefan Li <lizefan@huawei.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/sched.h | 21 ++++++++++++---------
 scripts/tags.sh       |  6 ++++++
 2 files changed, 18 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 45577650f629..5630763956d9 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1959,15 +1959,18 @@ static inline void memalloc_noio_restore(unsigned int flags)
 /* Per-process atomic flags. */
 #define PFA_NO_NEW_PRIVS 0	/* May not gain new privileges. */
 
-static inline bool task_no_new_privs(struct task_struct *p)
-{
-	return test_bit(PFA_NO_NEW_PRIVS, &p->atomic_flags);
-}
-
-static inline void task_set_no_new_privs(struct task_struct *p)
-{
-	set_bit(PFA_NO_NEW_PRIVS, &p->atomic_flags);
-}
+#define TASK_PFA_TEST(name, func)					\
+	static inline bool task_##func(struct task_struct *p)		\
+	{ return test_bit(PFA_##name, &p->atomic_flags); }
+#define TASK_PFA_SET(name, func)					\
+	static inline void task_set_##func(struct task_struct *p)	\
+	{ set_bit(PFA_##name, &p->atomic_flags); }
+#define TASK_PFA_CLEAR(name, func)					\
+	static inline void task_clear_##func(struct task_struct *p)	\
+	{ clear_bit(PFA_##name, &p->atomic_flags); }
+
+TASK_PFA_TEST(NO_NEW_PRIVS, no_new_privs)
+TASK_PFA_SET(NO_NEW_PRIVS, no_new_privs)
 
 /*
  * task->jobctl flags
diff --git a/scripts/tags.sh b/scripts/tags.sh
index cbfd269a6011..293828bfd4ac 100755
--- a/scripts/tags.sh
+++ b/scripts/tags.sh
@@ -197,6 +197,9 @@ exuberant()
 	--regex-c++='/SETPCGFLAG\(([^,)]*).*/SetPageCgroup\1/'		\
 	--regex-c++='/CLEARPCGFLAG\(([^,)]*).*/ClearPageCgroup\1/'	\
 	--regex-c++='/TESTCLEARPCGFLAG\(([^,)]*).*/TestClearPageCgroup\1/' \
+	--regex-c++='/TASK_PFA_TEST\([^,]*,\s*([^)]*)\)/task_\1/'	\
+	--regex-c++='/TASK_PFA_SET\([^,]*,\s*([^)]*)\)/task_set_\1/'	\
+	--regex-c++='/TASK_PFA_CLEAR\([^,]*,\s*([^)]*)\)/task_clear_\1/'\
 	--regex-c='/PCI_OP_READ\((\w*).*[1-4]\)/pci_bus_read_config_\1/' \
 	--regex-c='/PCI_OP_WRITE\((\w*).*[1-4]\)/pci_bus_write_config_\1/' \
 	--regex-c='/DEFINE_(MUTEX|SEMAPHORE|SPINLOCK)\((\w*)/\2/v/'	\
@@ -260,6 +263,9 @@ emacs()
 	--regex='/SETPCGFLAG\(([^,)]*).*/SetPageCgroup\1/'	\
 	--regex='/CLEARPCGFLAG\(([^,)]*).*/ClearPageCgroup\1/'	\
 	--regex='/TESTCLEARPCGFLAG\(([^,)]*).*/TestClearPageCgroup\1/' \
+	--regex='/TASK_PFA_TEST\([^,]*,\s*([^)]*)\)/task_\1/'		\
+	--regex='/TASK_PFA_SET\([^,]*,\s*([^)]*)\)/task_set_\1/'	\
+	--regex='/TASK_PFA_CLEAR\([^,]*,\s*([^)]*)\)/task_clear_\1/'	\
 	--regex='/_PE(\([^,)]*\).*/PEVENT_ERRNO__\1/'		\
 	--regex='/PCI_OP_READ(\([a-z]*[a-z]\).*[1-4])/pci_bus_read_config_\1/' \
 	--regex='/PCI_OP_WRITE(\([a-z]*[a-z]\).*[1-4])/pci_bus_write_config_\1/'\
-- 
cgit v1.2.3


From 2ad654bc5e2b211e92f66da1d819e47d79a866f0 Mon Sep 17 00:00:00 2001
From: Zefan Li <lizefan@huawei.com>
Date: Thu, 25 Sep 2014 09:41:02 +0800
Subject: cpuset: PF_SPREAD_PAGE and PF_SPREAD_SLAB should be atomic flags

When we change cpuset.memory_spread_{page,slab}, cpuset will flip
PF_SPREAD_{PAGE,SLAB} bit of tsk->flags for each task in that cpuset.
This should be done using atomic bitops, but currently we don't,
which is broken.

Tetsuo reported a hard-to-reproduce kernel crash on RHEL6, which happened
when one thread tried to clear PF_USED_MATH while at the same time another
thread tried to flip PF_SPREAD_PAGE/PF_SPREAD_SLAB. They both operate on
the same task.

Here's the full report:
https://lkml.org/lkml/2014/9/19/230

To fix this, we make PF_SPREAD_PAGE and PF_SPREAD_SLAB atomic flags.

v4:
- updated mm/slab.c. (Fengguang Wu)
- updated Documentation.

Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Miao Xie <miaox@cn.fujitsu.com>
Cc: Kees Cook <keescook@chromium.org>
Fixes: 950592f7b991 ("cpusets: update tasks' page/slab spread flags in time")
Cc: <stable@vger.kernel.org> # 2.6.31+
Reported-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: Zefan Li <lizefan@huawei.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 Documentation/cgroups/cpusets.txt |  6 +++---
 include/linux/cpuset.h            |  4 ++--
 include/linux/sched.h             | 13 +++++++++++--
 kernel/cpuset.c                   |  9 +++++----
 mm/slab.c                         |  4 ++--
 5 files changed, 23 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/cgroups/cpusets.txt b/Documentation/cgroups/cpusets.txt
index 7740038d82bc..3c94ff3f9693 100644
--- a/Documentation/cgroups/cpusets.txt
+++ b/Documentation/cgroups/cpusets.txt
@@ -345,14 +345,14 @@ the named feature on.
 The implementation is simple.
 
 Setting the flag 'cpuset.memory_spread_page' turns on a per-process flag
-PF_SPREAD_PAGE for each task that is in that cpuset or subsequently
+PFA_SPREAD_PAGE for each task that is in that cpuset or subsequently
 joins that cpuset.  The page allocation calls for the page cache
-is modified to perform an inline check for this PF_SPREAD_PAGE task
+is modified to perform an inline check for this PFA_SPREAD_PAGE task
 flag, and if set, a call to a new routine cpuset_mem_spread_node()
 returns the node to prefer for the allocation.
 
 Similarly, setting 'cpuset.memory_spread_slab' turns on the flag
-PF_SPREAD_SLAB, and appropriately marked slab caches will allocate
+PFA_SPREAD_SLAB, and appropriately marked slab caches will allocate
 pages from the node returned by cpuset_mem_spread_node().
 
 The cpuset_mem_spread_node() routine is also simple.  It uses the
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index ade2390ffe92..6e39c9bb0dae 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -93,12 +93,12 @@ extern int cpuset_slab_spread_node(void);
 
 static inline int cpuset_do_page_mem_spread(void)
 {
-	return current->flags & PF_SPREAD_PAGE;
+	return task_spread_page(current);
 }
 
 static inline int cpuset_do_slab_mem_spread(void)
 {
-	return current->flags & PF_SPREAD_SLAB;
+	return task_spread_slab(current);
 }
 
 extern int current_cpuset_is_being_rebound(void);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5630763956d9..7b1cafefb05e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1903,8 +1903,6 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut,
 #define PF_KTHREAD	0x00200000	/* I am a kernel thread */
 #define PF_RANDOMIZE	0x00400000	/* randomize virtual address space */
 #define PF_SWAPWRITE	0x00800000	/* Allowed to write to swap */
-#define PF_SPREAD_PAGE	0x01000000	/* Spread page cache over cpuset */
-#define PF_SPREAD_SLAB	0x02000000	/* Spread some slab caches over cpuset */
 #define PF_NO_SETAFFINITY 0x04000000	/* Userland is not allowed to meddle with cpus_allowed */
 #define PF_MCE_EARLY    0x08000000      /* Early kill for mce process policy */
 #define PF_MUTEX_TESTER	0x20000000	/* Thread belongs to the rt mutex tester */
@@ -1958,6 +1956,9 @@ static inline void memalloc_noio_restore(unsigned int flags)
 
 /* Per-process atomic flags. */
 #define PFA_NO_NEW_PRIVS 0	/* May not gain new privileges. */
+#define PFA_SPREAD_PAGE  1      /* Spread page cache over cpuset */
+#define PFA_SPREAD_SLAB  2      /* Spread some slab caches over cpuset */
+
 
 #define TASK_PFA_TEST(name, func)					\
 	static inline bool task_##func(struct task_struct *p)		\
@@ -1972,6 +1973,14 @@ static inline void memalloc_noio_restore(unsigned int flags)
 TASK_PFA_TEST(NO_NEW_PRIVS, no_new_privs)
 TASK_PFA_SET(NO_NEW_PRIVS, no_new_privs)
 
+TASK_PFA_TEST(SPREAD_PAGE, spread_page)
+TASK_PFA_SET(SPREAD_PAGE, spread_page)
+TASK_PFA_CLEAR(SPREAD_PAGE, spread_page)
+
+TASK_PFA_TEST(SPREAD_SLAB, spread_slab)
+TASK_PFA_SET(SPREAD_SLAB, spread_slab)
+TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab)
+
 /*
  * task->jobctl flags
  */
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 22874d7cf2c0..52cb04c993b7 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -365,13 +365,14 @@ static void cpuset_update_task_spread_flag(struct cpuset *cs,
 					struct task_struct *tsk)
 {
 	if (is_spread_page(cs))
-		tsk->flags |= PF_SPREAD_PAGE;
+		task_set_spread_page(tsk);
 	else
-		tsk->flags &= ~PF_SPREAD_PAGE;
+		task_clear_spread_page(tsk);
+
 	if (is_spread_slab(cs))
-		tsk->flags |= PF_SPREAD_SLAB;
+		task_set_spread_slab(tsk);
 	else
-		tsk->flags &= ~PF_SPREAD_SLAB;
+		task_clear_spread_slab(tsk);
 }
 
 /*
diff --git a/mm/slab.c b/mm/slab.c
index a467b308c682..881951e67f12 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2994,7 +2994,7 @@ out:
 
 #ifdef CONFIG_NUMA
 /*
- * Try allocating on another node if PF_SPREAD_SLAB is a mempolicy is set.
+ * Try allocating on another node if PFA_SPREAD_SLAB is a mempolicy is set.
  *
  * If we are in_interrupt, then process context, including cpusets and
  * mempolicy, may not apply and should not be used for allocation policy.
@@ -3226,7 +3226,7 @@ __do_cache_alloc(struct kmem_cache *cache, gfp_t flags)
 {
 	void *objp;
 
-	if (current->mempolicy || unlikely(current->flags & PF_SPREAD_SLAB)) {
+	if (current->mempolicy || cpuset_do_slab_mem_spread()) {
 		objp = alternate_node_alloc(cache, flags);
 		if (objp)
 			goto out;
-- 
cgit v1.2.3


From a743419f420a64d442280845c0377a915b76644f Mon Sep 17 00:00:00 2001
From: Benjamin Coddington <bcodding@redhat.com>
Date: Tue, 23 Sep 2014 12:26:19 -0400
Subject: SUNRPC: Don't wake tasks during connection abort

When aborting a connection to preserve source ports, don't wake the task in
xs_error_report.  This allows tasks with RPC_TASK_SOFTCONN to succeed if the
connection needs to be re-established since it preserves the task's status
instead of setting it to the status of the aborting kernel_connect().

This may also avoid a potential conflict on the socket's lock.

Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
Cc: stable@vger.kernel.org # 3.14+
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 include/linux/sunrpc/xprt.h | 1 +
 net/sunrpc/xprtsock.c       | 4 ++++
 2 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index fcbfe8783243..cf391eef2e6d 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -357,6 +357,7 @@ int			xs_swapper(struct rpc_xprt *xprt, int enable);
 #define XPRT_CONNECTION_ABORT	(7)
 #define XPRT_CONNECTION_CLOSE	(8)
 #define XPRT_CONGESTED		(9)
+#define XPRT_CONNECTION_REUSE	(10)
 
 static inline void xprt_set_connected(struct rpc_xprt *xprt)
 {
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 7ed47b4943da..bffba4e4bfc6 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -845,6 +845,8 @@ static void xs_error_report(struct sock *sk)
 	dprintk("RPC:       xs_error_report client %p, error=%d...\n",
 			xprt, -err);
 	trace_rpc_socket_error(xprt, sk->sk_socket, err);
+	if (test_bit(XPRT_CONNECTION_REUSE, &xprt->state))
+		goto out;
 	xprt_wake_pending_tasks(xprt, err);
  out:
 	read_unlock_bh(&sk->sk_callback_lock);
@@ -2261,7 +2263,9 @@ static void xs_tcp_setup_socket(struct work_struct *work)
 		abort_and_exit = test_and_clear_bit(XPRT_CONNECTION_ABORT,
 				&xprt->state);
 		/* "close" the socket, preserving the local port */
+		set_bit(XPRT_CONNECTION_REUSE, &xprt->state);
 		xs_tcp_reuse_connection(transport);
+		clear_bit(XPRT_CONNECTION_REUSE, &xprt->state);
 
 		if (abort_and_exit)
 			goto out_eagain;
-- 
cgit v1.2.3


From 8478eaa16e701ecfe054b62ec764bc1291b79e19 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Thu, 18 Sep 2014 16:09:27 +1000
Subject: NFSv4: use exponential retry on NFS4ERR_DELAY for async requests.

Currently asynchronous NFSv4 request will be retried with
exponential timeout (from 1/10 to 15 seconds), but async
requests will always use a 15second retry.

Some "async" requests are really synchronous though.  The
async mechanism is used to allow the request to continue if
the requesting process is killed.
In those cases, an exponential retry is appropriate.

For example, if two different clients both open a file and
get a READ delegation, and one client then unlinks the file
(while still holding an open file descriptor), that unlink
will used the "silly-rename" handling which is async.
The first rename will result in NFS4ERR_DELAY while the
delegation is reclaimed from the other client.  The rename
will not be retried for 15 seconds, causing an unlink to take
15 seconds rather than 100msec.

This patch only added exponential timeout for async unlink and
async rename.  Other async calls, such as 'close' are sometimes
waited for so they might benefit from exponential timeout too.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs4proc.c       | 65 ++++++++++++++++++++++++++++++++-----------------
 include/linux/nfs_xdr.h |  2 ++
 2 files changed, 44 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 288be08bda95..45bce9ed6791 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -77,7 +77,7 @@ struct nfs4_opendata;
 static int _nfs4_proc_open(struct nfs4_opendata *data);
 static int _nfs4_recover_proc_open(struct nfs4_opendata *data);
 static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
-static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *);
+static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *, long *);
 static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr);
 static int nfs4_proc_getattr(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, struct nfs4_label *label);
 static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label);
@@ -314,20 +314,30 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent
 	kunmap_atomic(start);
 }
 
+static long nfs4_update_delay(long *timeout)
+{
+	long ret;
+	if (!timeout)
+		return NFS4_POLL_RETRY_MAX;
+	if (*timeout <= 0)
+		*timeout = NFS4_POLL_RETRY_MIN;
+	if (*timeout > NFS4_POLL_RETRY_MAX)
+		*timeout = NFS4_POLL_RETRY_MAX;
+	ret = *timeout;
+	*timeout <<= 1;
+	return ret;
+}
+
 static int nfs4_delay(struct rpc_clnt *clnt, long *timeout)
 {
 	int res = 0;
 
 	might_sleep();
 
-	if (*timeout <= 0)
-		*timeout = NFS4_POLL_RETRY_MIN;
-	if (*timeout > NFS4_POLL_RETRY_MAX)
-		*timeout = NFS4_POLL_RETRY_MAX;
-	freezable_schedule_timeout_killable_unsafe(*timeout);
+	freezable_schedule_timeout_killable_unsafe(
+		nfs4_update_delay(timeout));
 	if (fatal_signal_pending(current))
 		res = -ERESTARTSYS;
-	*timeout <<= 1;
 	return res;
 }
 
@@ -2583,7 +2593,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
 			if (calldata->arg.fmode == 0)
 				break;
 		default:
-			if (nfs4_async_handle_error(task, server, state) == -EAGAIN) {
+			if (nfs4_async_handle_error(task, server, state, NULL) == -EAGAIN) {
 				rpc_restart_call_prepare(task);
 				goto out_release;
 			}
@@ -3572,7 +3582,8 @@ static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir)
 
 	if (!nfs4_sequence_done(task, &res->seq_res))
 		return 0;
-	if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN)
+	if (nfs4_async_handle_error(task, res->server, NULL,
+				    &data->timeout) == -EAGAIN)
 		return 0;
 	update_changeattr(dir, &res->cinfo);
 	return 1;
@@ -3605,7 +3616,7 @@ static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
 
 	if (!nfs4_sequence_done(task, &res->seq_res))
 		return 0;
-	if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN)
+	if (nfs4_async_handle_error(task, res->server, NULL, &data->timeout) == -EAGAIN)
 		return 0;
 
 	update_changeattr(old_dir, &res->old_cinfo);
@@ -4109,7 +4120,8 @@ static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_pgio_header *hdr)
 
 	trace_nfs4_read(hdr, task->tk_status);
 	if (nfs4_async_handle_error(task, server,
-				    hdr->args.context->state) == -EAGAIN) {
+				    hdr->args.context->state,
+				    NULL) == -EAGAIN) {
 		rpc_restart_call_prepare(task);
 		return -EAGAIN;
 	}
@@ -4177,10 +4189,11 @@ static int nfs4_write_done_cb(struct rpc_task *task,
 			      struct nfs_pgio_header *hdr)
 {
 	struct inode *inode = hdr->inode;
-	
+
 	trace_nfs4_write(hdr, task->tk_status);
 	if (nfs4_async_handle_error(task, NFS_SERVER(inode),
-				    hdr->args.context->state) == -EAGAIN) {
+				    hdr->args.context->state,
+				    NULL) == -EAGAIN) {
 		rpc_restart_call_prepare(task);
 		return -EAGAIN;
 	}
@@ -4260,7 +4273,8 @@ static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_commit_data *da
 	struct inode *inode = data->inode;
 
 	trace_nfs4_commit(data, task->tk_status);
-	if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL) == -EAGAIN) {
+	if (nfs4_async_handle_error(task, NFS_SERVER(inode),
+				    NULL, NULL) == -EAGAIN) {
 		rpc_restart_call_prepare(task);
 		return -EAGAIN;
 	}
@@ -4813,7 +4827,8 @@ out:
 
 
 static int
-nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, struct nfs4_state *state)
+nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
+			struct nfs4_state *state, long *timeout)
 {
 	struct nfs_client *clp = server->nfs_client;
 
@@ -4863,6 +4878,8 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
 #endif /* CONFIG_NFS_V4_1 */
 		case -NFS4ERR_DELAY:
 			nfs_inc_server_stats(server, NFSIOS_DELAY);
+			rpc_delay(task, nfs4_update_delay(timeout));
+			goto restart_call;
 		case -NFS4ERR_GRACE:
 			rpc_delay(task, NFS4_POLL_RETRY_MAX);
 		case -NFS4ERR_RETRY_UNCACHED_REP:
@@ -5103,8 +5120,8 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
 			pnfs_roc_set_barrier(data->inode, data->roc_barrier);
 		break;
 	default:
-		if (nfs4_async_handle_error(task, data->res.server, NULL) ==
-				-EAGAIN) {
+		if (nfs4_async_handle_error(task, data->res.server,
+					    NULL, NULL) == -EAGAIN) {
 			rpc_restart_call_prepare(task);
 			return;
 		}
@@ -5368,7 +5385,8 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
 		case -NFS4ERR_EXPIRED:
 			break;
 		default:
-			if (nfs4_async_handle_error(task, calldata->server, NULL) == -EAGAIN)
+			if (nfs4_async_handle_error(task, calldata->server,
+						    NULL, NULL) == -EAGAIN)
 				rpc_restart_call_prepare(task);
 	}
 	nfs_release_seqid(calldata->arg.seqid);
@@ -5974,7 +5992,8 @@ static void nfs4_release_lockowner_done(struct rpc_task *task, void *calldata)
 		break;
 	case -NFS4ERR_LEASE_MOVED:
 	case -NFS4ERR_DELAY:
-		if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN)
+		if (nfs4_async_handle_error(task, server,
+					    NULL, NULL) == -EAGAIN)
 			rpc_restart_call_prepare(task);
 	}
 }
@@ -7591,7 +7610,7 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
 			rpc_restart_call_prepare(task);
 		}
 	}
-	if (nfs4_async_handle_error(task, server, state) == -EAGAIN)
+	if (nfs4_async_handle_error(task, server, state, NULL) == -EAGAIN)
 		rpc_restart_call_prepare(task);
 out:
 	dprintk("<-- %s\n", __func__);
@@ -7751,7 +7770,7 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
 	case 0:
 		break;
 	case -NFS4ERR_DELAY:
-		if (nfs4_async_handle_error(task, server, NULL) != -EAGAIN)
+		if (nfs4_async_handle_error(task, server, NULL, NULL) != -EAGAIN)
 			break;
 		rpc_restart_call_prepare(task);
 		return;
@@ -7882,7 +7901,7 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata)
 	case 0:
 		break;
 	default:
-		if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) {
+		if (nfs4_async_handle_error(task, server, NULL, NULL) == -EAGAIN) {
 			rpc_restart_call_prepare(task);
 			return;
 		}
@@ -8178,7 +8197,7 @@ static void nfs41_free_stateid_done(struct rpc_task *task, void *calldata)
 
 	switch (task->tk_status) {
 	case -NFS4ERR_DELAY:
-		if (nfs4_async_handle_error(task, data->server, NULL) == -EAGAIN)
+		if (nfs4_async_handle_error(task, data->server, NULL, NULL) == -EAGAIN)
 			rpc_restart_call_prepare(task);
 	}
 }
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 7ae249ccb78d..6951c7d9097d 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1339,6 +1339,7 @@ struct nfs_unlinkdata {
 	struct inode *dir;
 	struct rpc_cred	*cred;
 	struct nfs_fattr dir_attr;
+	long timeout;
 };
 
 struct nfs_renamedata {
@@ -1352,6 +1353,7 @@ struct nfs_renamedata {
 	struct dentry		*new_dentry;
 	struct nfs_fattr	new_fattr;
 	void (*complete)(struct rpc_task *, struct nfs_renamedata *);
+	long timeout;
 };
 
 struct nfs_access_entry;
-- 
cgit v1.2.3


From cc952e7017fa2e8871ee6a94f2c606ff5911f61e Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Wed, 24 Sep 2014 06:26:21 -0400
Subject: tty: Fix width of unsigned long bitfield padding

Commit c545b66c6922b002b5fe224a6eaec58c913650b5,
'tty: Serialize tcflow() with other tty flow control changes' and
commit 99416322dd16b810ba74098cc50ef2a844091d35,
'tty: Workaround Alpha non-atomic byte storage in tty_struct' work around
compiler bugs and non-atomic storage on multiple arches by padding
bitfields out to the declared type which is unsigned long. However, the
width varies by arch.

Pad bitfields to actual width of unsigned long (which is BITS_PER_LONG).

Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/tty.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tty.h b/include/linux/tty.h
index 546e94557895..5171ef8f7b85 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -264,11 +264,11 @@ struct tty_struct {
 	struct winsize winsize;		/* winsize_mutex */
 	unsigned long stopped:1,	/* flow_lock */
 		      flow_stopped:1,
-		      unused:62;
+		      unused:BITS_PER_LONG - 2;
 	int hw_stopped;
 	unsigned long ctrl_status:8,	/* ctrl_lock */
 		      packet:1,
-		      unused_ctrl:55;
+		      unused_ctrl:BITS_PER_LONG - 9;
 	unsigned int receive_room;	/* Bytes free for queue */
 	int flow_change;
 
-- 
cgit v1.2.3


From cbbce82209490df8b68da9aec0d642451fe0a668 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Thu, 25 Sep 2014 13:55:19 +1000
Subject: SCHED: add some "wait..on_bit...timeout()" interfaces.

In commit c1221321b7c25b53204447cff9949a6d5a7ddddc
   sched: Allow wait_on_bit_action() functions to support a timeout

I suggested that a "wait_on_bit_timeout()" interface would not meet my
need.  This isn't true - I was just over-engineering.

Including a 'private' field in wait_bit_key instead of a focused
"timeout" field was just premature generalization.  If some other
use is ever found, it can be generalized or added later.

So this patch renames "private" to "timeout" with a meaning "stop
waiting when "jiffies" reaches or passes "timeout",
and adds two of the many possible wait..bit..timeout() interfaces:

wait_on_page_bit_killable_timeout(), which is the one I want to use,
and out_of_line_wait_on_bit_timeout() which is a reasonably general
example.  Others can be added as needed.

Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: NeilBrown <neilb@suse.de>
Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 include/linux/pagemap.h |  2 ++
 include/linux/wait.h    |  5 ++++-
 kernel/sched/wait.c     | 36 ++++++++++++++++++++++++++++++++++++
 mm/filemap.c            | 13 +++++++++++++
 4 files changed, 55 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 3df8c7db7a4e..87f9e4230d3a 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -502,6 +502,8 @@ static inline int lock_page_or_retry(struct page *page, struct mm_struct *mm,
 extern void wait_on_page_bit(struct page *page, int bit_nr);
 
 extern int wait_on_page_bit_killable(struct page *page, int bit_nr);
+extern int wait_on_page_bit_killable_timeout(struct page *page,
+					     int bit_nr, unsigned long timeout);
 
 static inline int wait_on_page_locked_killable(struct page *page)
 {
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 6fb1ba5f9b2f..80115bf88671 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -25,7 +25,7 @@ struct wait_bit_key {
 	void			*flags;
 	int			bit_nr;
 #define WAIT_ATOMIC_T_BIT_NR	-1
-	unsigned long		private;
+	unsigned long		timeout;
 };
 
 struct wait_bit_queue {
@@ -154,6 +154,7 @@ int __wait_on_bit_lock(wait_queue_head_t *, struct wait_bit_queue *, wait_bit_ac
 void wake_up_bit(void *, int);
 void wake_up_atomic_t(atomic_t *);
 int out_of_line_wait_on_bit(void *, int, wait_bit_action_f *, unsigned);
+int out_of_line_wait_on_bit_timeout(void *, int, wait_bit_action_f *, unsigned, unsigned long);
 int out_of_line_wait_on_bit_lock(void *, int, wait_bit_action_f *, unsigned);
 int out_of_line_wait_on_atomic_t(atomic_t *, int (*)(atomic_t *), unsigned);
 wait_queue_head_t *bit_waitqueue(void *, int);
@@ -859,6 +860,8 @@ int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
 
 extern int bit_wait(struct wait_bit_key *);
 extern int bit_wait_io(struct wait_bit_key *);
+extern int bit_wait_timeout(struct wait_bit_key *);
+extern int bit_wait_io_timeout(struct wait_bit_key *);
 
 /**
  * wait_on_bit - wait for a bit to be cleared
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index 15cab1a4f84e..5a62915f47a8 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -343,6 +343,18 @@ int __sched out_of_line_wait_on_bit(void *word, int bit,
 }
 EXPORT_SYMBOL(out_of_line_wait_on_bit);
 
+int __sched out_of_line_wait_on_bit_timeout(
+	void *word, int bit, wait_bit_action_f *action,
+	unsigned mode, unsigned long timeout)
+{
+	wait_queue_head_t *wq = bit_waitqueue(word, bit);
+	DEFINE_WAIT_BIT(wait, word, bit);
+
+	wait.key.timeout = jiffies + timeout;
+	return __wait_on_bit(wq, &wait, action, mode);
+}
+EXPORT_SYMBOL_GPL(out_of_line_wait_on_bit_timeout);
+
 int __sched
 __wait_on_bit_lock(wait_queue_head_t *wq, struct wait_bit_queue *q,
 			wait_bit_action_f *action, unsigned mode)
@@ -520,3 +532,27 @@ __sched int bit_wait_io(struct wait_bit_key *word)
 	return 0;
 }
 EXPORT_SYMBOL(bit_wait_io);
+
+__sched int bit_wait_timeout(struct wait_bit_key *word)
+{
+	unsigned long now = ACCESS_ONCE(jiffies);
+	if (signal_pending_state(current->state, current))
+		return 1;
+	if (time_after_eq(now, word->timeout))
+		return -EAGAIN;
+	schedule_timeout(word->timeout - now);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(bit_wait_timeout);
+
+__sched int bit_wait_io_timeout(struct wait_bit_key *word)
+{
+	unsigned long now = ACCESS_ONCE(jiffies);
+	if (signal_pending_state(current->state, current))
+		return 1;
+	if (time_after_eq(now, word->timeout))
+		return -EAGAIN;
+	io_schedule_timeout(word->timeout - now);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(bit_wait_io_timeout);
diff --git a/mm/filemap.c b/mm/filemap.c
index 90effcdf948d..cbe5a9013f70 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -703,6 +703,19 @@ int wait_on_page_bit_killable(struct page *page, int bit_nr)
 			     bit_wait_io, TASK_KILLABLE);
 }
 
+int wait_on_page_bit_killable_timeout(struct page *page,
+				       int bit_nr, unsigned long timeout)
+{
+	DEFINE_WAIT_BIT(wait, &page->flags, bit_nr);
+
+	wait.key.timeout = jiffies + timeout;
+	if (!test_bit(bit_nr, &page->flags))
+		return 0;
+	return __wait_on_bit(page_waitqueue(page), &wait,
+			     bit_wait_io_timeout, TASK_KILLABLE);
+}
+EXPORT_SYMBOL_GPL(wait_on_page_bit_killable_timeout);
+
 /**
  * add_page_wait_queue - Add an arbitrary waiter to a page's wait queue
  * @page: Page defining the wait queue of interest
-- 
cgit v1.2.3


From a4796e37c12e177572b80864cbab9c907ea250b0 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Wed, 24 Sep 2014 11:28:32 +1000
Subject: MM: export page_wakeup functions

This will allow NFS to wait for PG_private to be cleared and,
particularly, to send a wake-up when it is.

Signed-off-by: NeilBrown <neilb@suse.de>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 include/linux/pagemap.h | 10 ++++++++--
 mm/filemap.c            |  8 ++------
 2 files changed, 10 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 87f9e4230d3a..2dca0cef3506 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -496,8 +496,8 @@ static inline int lock_page_or_retry(struct page *page, struct mm_struct *mm,
 }
 
 /*
- * This is exported only for wait_on_page_locked/wait_on_page_writeback.
- * Never use this directly!
+ * This is exported only for wait_on_page_locked/wait_on_page_writeback,
+ * and for filesystems which need to wait on PG_private.
  */
 extern void wait_on_page_bit(struct page *page, int bit_nr);
 
@@ -512,6 +512,12 @@ static inline int wait_on_page_locked_killable(struct page *page)
 	return 0;
 }
 
+extern wait_queue_head_t *page_waitqueue(struct page *page);
+static inline void wake_up_page(struct page *page, int bit)
+{
+	__wake_up_bit(page_waitqueue(page), &page->flags, bit);
+}
+
 /* 
  * Wait for a page to be unlocked.
  *
diff --git a/mm/filemap.c b/mm/filemap.c
index cbe5a9013f70..b9b1413080be 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -670,17 +670,13 @@ EXPORT_SYMBOL(__page_cache_alloc);
  * at a cost of "thundering herd" phenomena during rare hash
  * collisions.
  */
-static wait_queue_head_t *page_waitqueue(struct page *page)
+wait_queue_head_t *page_waitqueue(struct page *page)
 {
 	const struct zone *zone = page_zone(page);
 
 	return &zone->wait_table[hash_ptr(page, zone->wait_table_bits)];
 }
-
-static inline void wake_up_page(struct page *page, int bit)
-{
-	__wake_up_bit(page_waitqueue(page), &page->flags, bit);
-}
+EXPORT_SYMBOL(page_waitqueue);
 
 void wait_on_page_bit(struct page *page, int bit_nr)
 {
-- 
cgit v1.2.3


From 1aed074869a9cbe0a846ea7b254d8fd9a4a4d31f Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Wed, 3 Sep 2014 18:34:04 +0200
Subject: iommu: Convert iommu-caps from define to enum

Allow compile-time type-checking.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/iommu.c |  2 +-
 include/linux/iommu.h | 11 +++++++----
 2 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 0639b9274b11..bc45478e26db 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -948,7 +948,7 @@ phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
 EXPORT_SYMBOL_GPL(iommu_iova_to_phys);
 
 int iommu_domain_has_cap(struct iommu_domain *domain,
-			 unsigned long cap)
+			 enum iommu_cap cap)
 {
 	if (unlikely(domain->ops->domain_has_cap == NULL))
 		return 0;
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 20f9a527922a..98fc126655ae 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -57,8 +57,11 @@ struct iommu_domain {
 	struct iommu_domain_geometry geometry;
 };
 
-#define IOMMU_CAP_CACHE_COHERENCY	0x1
-#define IOMMU_CAP_INTR_REMAP		0x2	/* isolates device intrs */
+enum iommu_cap {
+	IOMMU_CAP_CACHE_COHERENCY,	/* IOMMU can enforce cache coherent DMA
+					   transactions */
+	IOMMU_CAP_INTR_REMAP,		/* IOMMU supports interrupt isolation */
+};
 
 /*
  * Following constraints are specifc to FSL_PAMUV1:
@@ -155,7 +158,7 @@ extern size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova,
 		       size_t size);
 extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova);
 extern int iommu_domain_has_cap(struct iommu_domain *domain,
-				unsigned long cap);
+				enum iommu_cap cap);
 extern void iommu_set_fault_handler(struct iommu_domain *domain,
 			iommu_fault_handler_t handler, void *token);
 
@@ -305,7 +308,7 @@ static inline phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_ad
 }
 
 static inline int iommu_domain_has_cap(struct iommu_domain *domain,
-				       unsigned long cap)
+				       enum iommu_cap cap)
 {
 	return 0;
 }
-- 
cgit v1.2.3


From 3c0e0ca0a4e757159d868c4870556515d66b6c97 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Wed, 3 Sep 2014 18:47:25 +0200
Subject: iommu: Introduce iommu_capable API function

This function will replace the current iommu_domain_has_cap
function and clean up the interface while at it.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/iommu.c | 18 +++++++++++++++---
 include/linux/iommu.h |  7 +++++++
 2 files changed, 22 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index bc45478e26db..aeb243f46332 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -817,6 +817,15 @@ bool iommu_present(struct bus_type *bus)
 }
 EXPORT_SYMBOL_GPL(iommu_present);
 
+bool iommu_capable(struct bus_type *bus, enum iommu_cap cap)
+{
+	if (!bus->iommu_ops || !bus->iommu_ops->capable)
+		return false;
+
+	return bus->iommu_ops->capable(cap);
+}
+EXPORT_SYMBOL_GPL(iommu_capable);
+
 /**
  * iommu_set_fault_handler() - set a fault handler for an iommu domain
  * @domain: iommu domain
@@ -950,10 +959,13 @@ EXPORT_SYMBOL_GPL(iommu_iova_to_phys);
 int iommu_domain_has_cap(struct iommu_domain *domain,
 			 enum iommu_cap cap)
 {
-	if (unlikely(domain->ops->domain_has_cap == NULL))
-		return 0;
+	if (domain->ops->domain_has_cap != NULL)
+		return domain->ops->domain_has_cap(domain, cap);
+
+	if (domain->ops->capable != NULL)
+		return domain->ops->capable(cap);
 
-	return domain->ops->domain_has_cap(domain, cap);
+	return 0;
 }
 EXPORT_SYMBOL_GPL(iommu_domain_has_cap);
 
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 98fc126655ae..d5534d554693 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -105,6 +105,7 @@ enum iommu_attr {
  * @pgsize_bitmap: bitmap of supported page sizes
  */
 struct iommu_ops {
+	bool (*capable)(enum iommu_cap);
 	int (*domain_init)(struct iommu_domain *domain);
 	void (*domain_destroy)(struct iommu_domain *domain);
 	int (*attach_dev)(struct iommu_domain *domain, struct device *dev);
@@ -145,6 +146,7 @@ struct iommu_ops {
 
 extern int bus_set_iommu(struct bus_type *bus, const struct iommu_ops *ops);
 extern bool iommu_present(struct bus_type *bus);
+extern bool iommu_capable(struct bus_type *bus, enum iommu_cap cap);
 extern struct iommu_domain *iommu_domain_alloc(struct bus_type *bus);
 extern struct iommu_group *iommu_group_get_by_id(int id);
 extern void iommu_domain_free(struct iommu_domain *domain);
@@ -253,6 +255,11 @@ static inline bool iommu_present(struct bus_type *bus)
 	return false;
 }
 
+static inline bool iommu_capable(struct bus_type *bus, enum iommu_cap cap)
+{
+	return false;
+}
+
 static inline struct iommu_domain *iommu_domain_alloc(struct bus_type *bus)
 {
 	return NULL;
-- 
cgit v1.2.3


From 24278a24d88ae730229417e5d3bd452d7545fbcc Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Fri, 5 Sep 2014 10:57:11 +0200
Subject: iommu: Remove iommu_domain_has_cap() API function

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/iommu.c | 13 -------------
 include/linux/iommu.h | 11 -----------
 2 files changed, 24 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index aeb243f46332..d2d242fcaa3d 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -956,19 +956,6 @@ phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
 }
 EXPORT_SYMBOL_GPL(iommu_iova_to_phys);
 
-int iommu_domain_has_cap(struct iommu_domain *domain,
-			 enum iommu_cap cap)
-{
-	if (domain->ops->domain_has_cap != NULL)
-		return domain->ops->domain_has_cap(domain, cap);
-
-	if (domain->ops->capable != NULL)
-		return domain->ops->capable(cap);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(iommu_domain_has_cap);
-
 static size_t iommu_pgsize(struct iommu_domain *domain,
 			   unsigned long addr_merge, size_t size)
 {
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index d5534d554693..379a6179fd96 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -97,7 +97,6 @@ enum iommu_attr {
  * @map: map a physically contiguous memory region to an iommu domain
  * @unmap: unmap a physically contiguous memory region from an iommu domain
  * @iova_to_phys: translate iova to physical address
- * @domain_has_cap: domain capabilities query
  * @add_device: add device to iommu grouping
  * @remove_device: remove device from iommu grouping
  * @domain_get_attr: Query domain attributes
@@ -115,8 +114,6 @@ struct iommu_ops {
 	size_t (*unmap)(struct iommu_domain *domain, unsigned long iova,
 		     size_t size);
 	phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, dma_addr_t iova);
-	int (*domain_has_cap)(struct iommu_domain *domain,
-			      unsigned long cap);
 	int (*add_device)(struct device *dev);
 	void (*remove_device)(struct device *dev);
 	int (*device_group)(struct device *dev, unsigned int *groupid);
@@ -159,8 +156,6 @@ extern int iommu_map(struct iommu_domain *domain, unsigned long iova,
 extern size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova,
 		       size_t size);
 extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova);
-extern int iommu_domain_has_cap(struct iommu_domain *domain,
-				enum iommu_cap cap);
 extern void iommu_set_fault_handler(struct iommu_domain *domain,
 			iommu_fault_handler_t handler, void *token);
 
@@ -314,12 +309,6 @@ static inline phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_ad
 	return 0;
 }
 
-static inline int iommu_domain_has_cap(struct iommu_domain *domain,
-				       enum iommu_cap cap)
-{
-	return 0;
-}
-
 static inline void iommu_set_fault_handler(struct iommu_domain *domain,
 				iommu_fault_handler_t handler, void *token)
 {
-- 
cgit v1.2.3


From 17f4a5c47f28de9ea59182f48d07f8c44ee5dcc9 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa@the-dreams.de>
Date: Mon, 22 Sep 2014 19:41:00 +0200
Subject: i2c: move acpi code back into the core

Commit 5d98e61d337c ("I2C/ACPI: Add i2c ACPI operation region support")
renamed the i2c-core module. This may cause regressions for
distributions, so put the ACPI code back into the core.

Reported-by: Jean Delvare <jdelvare@suse.de>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Tested-by: Lan Tianyu <tianyu.lan@intel.com>
Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
---
 MAINTAINERS            |   1 -
 drivers/i2c/Makefile   |   5 +-
 drivers/i2c/i2c-acpi.c | 364 -------------------------------------------------
 drivers/i2c/i2c-core.c | 354 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/i2c.h    |  16 ---
 5 files changed, 355 insertions(+), 385 deletions(-)
 delete mode 100644 drivers/i2c/i2c-acpi.c

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index 809ecd680d88..e3682d0dea1e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4477,7 +4477,6 @@ M:	Mika Westerberg <mika.westerberg@linux.intel.com>
 L:	linux-i2c@vger.kernel.org
 L:	linux-acpi@vger.kernel.org
 S:	Maintained
-F:	drivers/i2c/i2c-acpi.c
 
 I2C-TAOS-EVM DRIVER
 M:	Jean Delvare <jdelvare@suse.de>
diff --git a/drivers/i2c/Makefile b/drivers/i2c/Makefile
index e0228b228256..1722f50f2473 100644
--- a/drivers/i2c/Makefile
+++ b/drivers/i2c/Makefile
@@ -2,11 +2,8 @@
 # Makefile for the i2c core.
 #
 
-i2ccore-y := i2c-core.o
-i2ccore-$(CONFIG_ACPI)	 	+= i2c-acpi.o
-
 obj-$(CONFIG_I2C_BOARDINFO)	+= i2c-boardinfo.o
-obj-$(CONFIG_I2C)		+= i2ccore.o
+obj-$(CONFIG_I2C)		+= i2c-core.o
 obj-$(CONFIG_I2C_SMBUS)		+= i2c-smbus.o
 obj-$(CONFIG_I2C_CHARDEV)	+= i2c-dev.o
 obj-$(CONFIG_I2C_MUX)		+= i2c-mux.o
diff --git a/drivers/i2c/i2c-acpi.c b/drivers/i2c/i2c-acpi.c
deleted file mode 100644
index 0dbc18c15c43..000000000000
--- a/drivers/i2c/i2c-acpi.c
+++ /dev/null
@@ -1,364 +0,0 @@
-/*
- * I2C ACPI code
- *
- * Copyright (C) 2014 Intel Corp
- *
- * Author: Lan Tianyu <tianyu.lan@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * for more details.
- */
-#define pr_fmt(fmt) "I2C/ACPI : " fmt
-
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/err.h>
-#include <linux/i2c.h>
-#include <linux/acpi.h>
-
-struct acpi_i2c_handler_data {
-	struct acpi_connection_info info;
-	struct i2c_adapter *adapter;
-};
-
-struct gsb_buffer {
-	u8	status;
-	u8	len;
-	union {
-		u16	wdata;
-		u8	bdata;
-		u8	data[0];
-	};
-} __packed;
-
-static int acpi_i2c_add_resource(struct acpi_resource *ares, void *data)
-{
-	struct i2c_board_info *info = data;
-
-	if (ares->type == ACPI_RESOURCE_TYPE_SERIAL_BUS) {
-		struct acpi_resource_i2c_serialbus *sb;
-
-		sb = &ares->data.i2c_serial_bus;
-		if (sb->type == ACPI_RESOURCE_SERIAL_TYPE_I2C) {
-			info->addr = sb->slave_address;
-			if (sb->access_mode == ACPI_I2C_10BIT_MODE)
-				info->flags |= I2C_CLIENT_TEN;
-		}
-	} else if (info->irq < 0) {
-		struct resource r;
-
-		if (acpi_dev_resource_interrupt(ares, 0, &r))
-			info->irq = r.start;
-	}
-
-	/* Tell the ACPI core to skip this resource */
-	return 1;
-}
-
-static acpi_status acpi_i2c_add_device(acpi_handle handle, u32 level,
-				       void *data, void **return_value)
-{
-	struct i2c_adapter *adapter = data;
-	struct list_head resource_list;
-	struct i2c_board_info info;
-	struct acpi_device *adev;
-	int ret;
-
-	if (acpi_bus_get_device(handle, &adev))
-		return AE_OK;
-	if (acpi_bus_get_status(adev) || !adev->status.present)
-		return AE_OK;
-
-	memset(&info, 0, sizeof(info));
-	info.acpi_node.companion = adev;
-	info.irq = -1;
-
-	INIT_LIST_HEAD(&resource_list);
-	ret = acpi_dev_get_resources(adev, &resource_list,
-				     acpi_i2c_add_resource, &info);
-	acpi_dev_free_resource_list(&resource_list);
-
-	if (ret < 0 || !info.addr)
-		return AE_OK;
-
-	adev->power.flags.ignore_parent = true;
-	strlcpy(info.type, dev_name(&adev->dev), sizeof(info.type));
-	if (!i2c_new_device(adapter, &info)) {
-		adev->power.flags.ignore_parent = false;
-		dev_err(&adapter->dev,
-			"failed to add I2C device %s from ACPI\n",
-			dev_name(&adev->dev));
-	}
-
-	return AE_OK;
-}
-
-/**
- * acpi_i2c_register_devices - enumerate I2C slave devices behind adapter
- * @adap: pointer to adapter
- *
- * Enumerate all I2C slave devices behind this adapter by walking the ACPI
- * namespace. When a device is found it will be added to the Linux device
- * model and bound to the corresponding ACPI handle.
- */
-void acpi_i2c_register_devices(struct i2c_adapter *adap)
-{
-	acpi_handle handle;
-	acpi_status status;
-
-	if (!adap->dev.parent)
-		return;
-
-	handle = ACPI_HANDLE(adap->dev.parent);
-	if (!handle)
-		return;
-
-	status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, 1,
-				     acpi_i2c_add_device, NULL,
-				     adap, NULL);
-	if (ACPI_FAILURE(status))
-		dev_warn(&adap->dev, "failed to enumerate I2C slaves\n");
-}
-
-#ifdef CONFIG_ACPI_I2C_OPREGION
-static int acpi_gsb_i2c_read_bytes(struct i2c_client *client,
-		u8 cmd, u8 *data, u8 data_len)
-{
-
-	struct i2c_msg msgs[2];
-	int ret;
-	u8 *buffer;
-
-	buffer = kzalloc(data_len, GFP_KERNEL);
-	if (!buffer)
-		return AE_NO_MEMORY;
-
-	msgs[0].addr = client->addr;
-	msgs[0].flags = client->flags;
-	msgs[0].len = 1;
-	msgs[0].buf = &cmd;
-
-	msgs[1].addr = client->addr;
-	msgs[1].flags = client->flags | I2C_M_RD;
-	msgs[1].len = data_len;
-	msgs[1].buf = buffer;
-
-	ret = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs));
-	if (ret < 0)
-		dev_err(&client->adapter->dev, "i2c read failed\n");
-	else
-		memcpy(data, buffer, data_len);
-
-	kfree(buffer);
-	return ret;
-}
-
-static int acpi_gsb_i2c_write_bytes(struct i2c_client *client,
-		u8 cmd, u8 *data, u8 data_len)
-{
-
-	struct i2c_msg msgs[1];
-	u8 *buffer;
-	int ret = AE_OK;
-
-	buffer = kzalloc(data_len + 1, GFP_KERNEL);
-	if (!buffer)
-		return AE_NO_MEMORY;
-
-	buffer[0] = cmd;
-	memcpy(buffer + 1, data, data_len);
-
-	msgs[0].addr = client->addr;
-	msgs[0].flags = client->flags;
-	msgs[0].len = data_len + 1;
-	msgs[0].buf = buffer;
-
-	ret = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs));
-	if (ret < 0)
-		dev_err(&client->adapter->dev, "i2c write failed\n");
-
-	kfree(buffer);
-	return ret;
-}
-
-static acpi_status
-acpi_i2c_space_handler(u32 function, acpi_physical_address command,
-			u32 bits, u64 *value64,
-			void *handler_context, void *region_context)
-{
-	struct gsb_buffer *gsb = (struct gsb_buffer *)value64;
-	struct acpi_i2c_handler_data *data = handler_context;
-	struct acpi_connection_info *info = &data->info;
-	struct acpi_resource_i2c_serialbus *sb;
-	struct i2c_adapter *adapter = data->adapter;
-	struct i2c_client client;
-	struct acpi_resource *ares;
-	u32 accessor_type = function >> 16;
-	u8 action = function & ACPI_IO_MASK;
-	acpi_status ret = AE_OK;
-	int status;
-
-	ret = acpi_buffer_to_resource(info->connection, info->length, &ares);
-	if (ACPI_FAILURE(ret))
-		return ret;
-
-	if (!value64 || ares->type != ACPI_RESOURCE_TYPE_SERIAL_BUS) {
-		ret = AE_BAD_PARAMETER;
-		goto err;
-	}
-
-	sb = &ares->data.i2c_serial_bus;
-	if (sb->type != ACPI_RESOURCE_SERIAL_TYPE_I2C) {
-		ret = AE_BAD_PARAMETER;
-		goto err;
-	}
-
-	memset(&client, 0, sizeof(client));
-	client.adapter = adapter;
-	client.addr = sb->slave_address;
-	client.flags = 0;
-
-	if (sb->access_mode == ACPI_I2C_10BIT_MODE)
-		client.flags |= I2C_CLIENT_TEN;
-
-	switch (accessor_type) {
-	case ACPI_GSB_ACCESS_ATTRIB_SEND_RCV:
-		if (action == ACPI_READ) {
-			status = i2c_smbus_read_byte(&client);
-			if (status >= 0) {
-				gsb->bdata = status;
-				status = 0;
-			}
-		} else {
-			status = i2c_smbus_write_byte(&client, gsb->bdata);
-		}
-		break;
-
-	case ACPI_GSB_ACCESS_ATTRIB_BYTE:
-		if (action == ACPI_READ) {
-			status = i2c_smbus_read_byte_data(&client, command);
-			if (status >= 0) {
-				gsb->bdata = status;
-				status = 0;
-			}
-		} else {
-			status = i2c_smbus_write_byte_data(&client, command,
-					gsb->bdata);
-		}
-		break;
-
-	case ACPI_GSB_ACCESS_ATTRIB_WORD:
-		if (action == ACPI_READ) {
-			status = i2c_smbus_read_word_data(&client, command);
-			if (status >= 0) {
-				gsb->wdata = status;
-				status = 0;
-			}
-		} else {
-			status = i2c_smbus_write_word_data(&client, command,
-					gsb->wdata);
-		}
-		break;
-
-	case ACPI_GSB_ACCESS_ATTRIB_BLOCK:
-		if (action == ACPI_READ) {
-			status = i2c_smbus_read_block_data(&client, command,
-					gsb->data);
-			if (status >= 0) {
-				gsb->len = status;
-				status = 0;
-			}
-		} else {
-			status = i2c_smbus_write_block_data(&client, command,
-					gsb->len, gsb->data);
-		}
-		break;
-
-	case ACPI_GSB_ACCESS_ATTRIB_MULTIBYTE:
-		if (action == ACPI_READ) {
-			status = acpi_gsb_i2c_read_bytes(&client, command,
-					gsb->data, info->access_length);
-			if (status > 0)
-				status = 0;
-		} else {
-			status = acpi_gsb_i2c_write_bytes(&client, command,
-					gsb->data, info->access_length);
-		}
-		break;
-
-	default:
-		pr_info("protocol(0x%02x) is not supported.\n", accessor_type);
-		ret = AE_BAD_PARAMETER;
-		goto err;
-	}
-
-	gsb->status = status;
-
- err:
-	ACPI_FREE(ares);
-	return ret;
-}
-
-
-int acpi_i2c_install_space_handler(struct i2c_adapter *adapter)
-{
-	acpi_handle handle = ACPI_HANDLE(adapter->dev.parent);
-	struct acpi_i2c_handler_data *data;
-	acpi_status status;
-
-	if (!handle)
-		return -ENODEV;
-
-	data = kzalloc(sizeof(struct acpi_i2c_handler_data),
-			    GFP_KERNEL);
-	if (!data)
-		return -ENOMEM;
-
-	data->adapter = adapter;
-	status = acpi_bus_attach_private_data(handle, (void *)data);
-	if (ACPI_FAILURE(status)) {
-		kfree(data);
-		return -ENOMEM;
-	}
-
-	status = acpi_install_address_space_handler(handle,
-				ACPI_ADR_SPACE_GSBUS,
-				&acpi_i2c_space_handler,
-				NULL,
-				data);
-	if (ACPI_FAILURE(status)) {
-		dev_err(&adapter->dev, "Error installing i2c space handler\n");
-		acpi_bus_detach_private_data(handle);
-		kfree(data);
-		return -ENOMEM;
-	}
-
-	return 0;
-}
-
-void acpi_i2c_remove_space_handler(struct i2c_adapter *adapter)
-{
-	acpi_handle handle = ACPI_HANDLE(adapter->dev.parent);
-	struct acpi_i2c_handler_data *data;
-	acpi_status status;
-
-	if (!handle)
-		return;
-
-	acpi_remove_address_space_handler(handle,
-				ACPI_ADR_SPACE_GSBUS,
-				&acpi_i2c_space_handler);
-
-	status = acpi_bus_get_private_data(handle, (void **)&data);
-	if (ACPI_SUCCESS(status))
-		kfree(data);
-
-	acpi_bus_detach_private_data(handle);
-}
-#endif
diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index 632057a44615..b696ac7e6d86 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -27,6 +27,8 @@
    OF support is copyright (c) 2008 Jochen Friedrich <jochen@scram.de>
    (based on a previous patch from Jon Smirl <jonsmirl@gmail.com>) and
    (c) 2013  Wolfram Sang <wsa@the-dreams.de>
+   I2C ACPI code Copyright (C) 2014 Intel Corp
+   Author: Lan Tianyu <tianyu.lan@intel.com>
  */
 
 #include <linux/module.h>
@@ -78,6 +80,358 @@ void i2c_transfer_trace_unreg(void)
 	static_key_slow_dec(&i2c_trace_msg);
 }
 
+#if defined(CONFIG_ACPI)
+struct acpi_i2c_handler_data {
+	struct acpi_connection_info info;
+	struct i2c_adapter *adapter;
+};
+
+struct gsb_buffer {
+	u8	status;
+	u8	len;
+	union {
+		u16	wdata;
+		u8	bdata;
+		u8	data[0];
+	};
+} __packed;
+
+static int acpi_i2c_add_resource(struct acpi_resource *ares, void *data)
+{
+	struct i2c_board_info *info = data;
+
+	if (ares->type == ACPI_RESOURCE_TYPE_SERIAL_BUS) {
+		struct acpi_resource_i2c_serialbus *sb;
+
+		sb = &ares->data.i2c_serial_bus;
+		if (sb->type == ACPI_RESOURCE_SERIAL_TYPE_I2C) {
+			info->addr = sb->slave_address;
+			if (sb->access_mode == ACPI_I2C_10BIT_MODE)
+				info->flags |= I2C_CLIENT_TEN;
+		}
+	} else if (info->irq < 0) {
+		struct resource r;
+
+		if (acpi_dev_resource_interrupt(ares, 0, &r))
+			info->irq = r.start;
+	}
+
+	/* Tell the ACPI core to skip this resource */
+	return 1;
+}
+
+static acpi_status acpi_i2c_add_device(acpi_handle handle, u32 level,
+				       void *data, void **return_value)
+{
+	struct i2c_adapter *adapter = data;
+	struct list_head resource_list;
+	struct i2c_board_info info;
+	struct acpi_device *adev;
+	int ret;
+
+	if (acpi_bus_get_device(handle, &adev))
+		return AE_OK;
+	if (acpi_bus_get_status(adev) || !adev->status.present)
+		return AE_OK;
+
+	memset(&info, 0, sizeof(info));
+	info.acpi_node.companion = adev;
+	info.irq = -1;
+
+	INIT_LIST_HEAD(&resource_list);
+	ret = acpi_dev_get_resources(adev, &resource_list,
+				     acpi_i2c_add_resource, &info);
+	acpi_dev_free_resource_list(&resource_list);
+
+	if (ret < 0 || !info.addr)
+		return AE_OK;
+
+	adev->power.flags.ignore_parent = true;
+	strlcpy(info.type, dev_name(&adev->dev), sizeof(info.type));
+	if (!i2c_new_device(adapter, &info)) {
+		adev->power.flags.ignore_parent = false;
+		dev_err(&adapter->dev,
+			"failed to add I2C device %s from ACPI\n",
+			dev_name(&adev->dev));
+	}
+
+	return AE_OK;
+}
+
+/**
+ * acpi_i2c_register_devices - enumerate I2C slave devices behind adapter
+ * @adap: pointer to adapter
+ *
+ * Enumerate all I2C slave devices behind this adapter by walking the ACPI
+ * namespace. When a device is found it will be added to the Linux device
+ * model and bound to the corresponding ACPI handle.
+ */
+static void acpi_i2c_register_devices(struct i2c_adapter *adap)
+{
+	acpi_handle handle;
+	acpi_status status;
+
+	if (!adap->dev.parent)
+		return;
+
+	handle = ACPI_HANDLE(adap->dev.parent);
+	if (!handle)
+		return;
+
+	status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, 1,
+				     acpi_i2c_add_device, NULL,
+				     adap, NULL);
+	if (ACPI_FAILURE(status))
+		dev_warn(&adap->dev, "failed to enumerate I2C slaves\n");
+}
+
+#else /* CONFIG_ACPI */
+static inline void acpi_i2c_register_devices(struct i2c_adapter *adap) { }
+#endif /* CONFIG_ACPI */
+
+#ifdef CONFIG_ACPI_I2C_OPREGION
+static int acpi_gsb_i2c_read_bytes(struct i2c_client *client,
+		u8 cmd, u8 *data, u8 data_len)
+{
+
+	struct i2c_msg msgs[2];
+	int ret;
+	u8 *buffer;
+
+	buffer = kzalloc(data_len, GFP_KERNEL);
+	if (!buffer)
+		return AE_NO_MEMORY;
+
+	msgs[0].addr = client->addr;
+	msgs[0].flags = client->flags;
+	msgs[0].len = 1;
+	msgs[0].buf = &cmd;
+
+	msgs[1].addr = client->addr;
+	msgs[1].flags = client->flags | I2C_M_RD;
+	msgs[1].len = data_len;
+	msgs[1].buf = buffer;
+
+	ret = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs));
+	if (ret < 0)
+		dev_err(&client->adapter->dev, "i2c read failed\n");
+	else
+		memcpy(data, buffer, data_len);
+
+	kfree(buffer);
+	return ret;
+}
+
+static int acpi_gsb_i2c_write_bytes(struct i2c_client *client,
+		u8 cmd, u8 *data, u8 data_len)
+{
+
+	struct i2c_msg msgs[1];
+	u8 *buffer;
+	int ret = AE_OK;
+
+	buffer = kzalloc(data_len + 1, GFP_KERNEL);
+	if (!buffer)
+		return AE_NO_MEMORY;
+
+	buffer[0] = cmd;
+	memcpy(buffer + 1, data, data_len);
+
+	msgs[0].addr = client->addr;
+	msgs[0].flags = client->flags;
+	msgs[0].len = data_len + 1;
+	msgs[0].buf = buffer;
+
+	ret = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs));
+	if (ret < 0)
+		dev_err(&client->adapter->dev, "i2c write failed\n");
+
+	kfree(buffer);
+	return ret;
+}
+
+static acpi_status
+acpi_i2c_space_handler(u32 function, acpi_physical_address command,
+			u32 bits, u64 *value64,
+			void *handler_context, void *region_context)
+{
+	struct gsb_buffer *gsb = (struct gsb_buffer *)value64;
+	struct acpi_i2c_handler_data *data = handler_context;
+	struct acpi_connection_info *info = &data->info;
+	struct acpi_resource_i2c_serialbus *sb;
+	struct i2c_adapter *adapter = data->adapter;
+	struct i2c_client client;
+	struct acpi_resource *ares;
+	u32 accessor_type = function >> 16;
+	u8 action = function & ACPI_IO_MASK;
+	acpi_status ret = AE_OK;
+	int status;
+
+	ret = acpi_buffer_to_resource(info->connection, info->length, &ares);
+	if (ACPI_FAILURE(ret))
+		return ret;
+
+	if (!value64 || ares->type != ACPI_RESOURCE_TYPE_SERIAL_BUS) {
+		ret = AE_BAD_PARAMETER;
+		goto err;
+	}
+
+	sb = &ares->data.i2c_serial_bus;
+	if (sb->type != ACPI_RESOURCE_SERIAL_TYPE_I2C) {
+		ret = AE_BAD_PARAMETER;
+		goto err;
+	}
+
+	memset(&client, 0, sizeof(client));
+	client.adapter = adapter;
+	client.addr = sb->slave_address;
+	client.flags = 0;
+
+	if (sb->access_mode == ACPI_I2C_10BIT_MODE)
+		client.flags |= I2C_CLIENT_TEN;
+
+	switch (accessor_type) {
+	case ACPI_GSB_ACCESS_ATTRIB_SEND_RCV:
+		if (action == ACPI_READ) {
+			status = i2c_smbus_read_byte(&client);
+			if (status >= 0) {
+				gsb->bdata = status;
+				status = 0;
+			}
+		} else {
+			status = i2c_smbus_write_byte(&client, gsb->bdata);
+		}
+		break;
+
+	case ACPI_GSB_ACCESS_ATTRIB_BYTE:
+		if (action == ACPI_READ) {
+			status = i2c_smbus_read_byte_data(&client, command);
+			if (status >= 0) {
+				gsb->bdata = status;
+				status = 0;
+			}
+		} else {
+			status = i2c_smbus_write_byte_data(&client, command,
+					gsb->bdata);
+		}
+		break;
+
+	case ACPI_GSB_ACCESS_ATTRIB_WORD:
+		if (action == ACPI_READ) {
+			status = i2c_smbus_read_word_data(&client, command);
+			if (status >= 0) {
+				gsb->wdata = status;
+				status = 0;
+			}
+		} else {
+			status = i2c_smbus_write_word_data(&client, command,
+					gsb->wdata);
+		}
+		break;
+
+	case ACPI_GSB_ACCESS_ATTRIB_BLOCK:
+		if (action == ACPI_READ) {
+			status = i2c_smbus_read_block_data(&client, command,
+					gsb->data);
+			if (status >= 0) {
+				gsb->len = status;
+				status = 0;
+			}
+		} else {
+			status = i2c_smbus_write_block_data(&client, command,
+					gsb->len, gsb->data);
+		}
+		break;
+
+	case ACPI_GSB_ACCESS_ATTRIB_MULTIBYTE:
+		if (action == ACPI_READ) {
+			status = acpi_gsb_i2c_read_bytes(&client, command,
+					gsb->data, info->access_length);
+			if (status > 0)
+				status = 0;
+		} else {
+			status = acpi_gsb_i2c_write_bytes(&client, command,
+					gsb->data, info->access_length);
+		}
+		break;
+
+	default:
+		pr_info("protocol(0x%02x) is not supported.\n", accessor_type);
+		ret = AE_BAD_PARAMETER;
+		goto err;
+	}
+
+	gsb->status = status;
+
+ err:
+	ACPI_FREE(ares);
+	return ret;
+}
+
+
+static int acpi_i2c_install_space_handler(struct i2c_adapter *adapter)
+{
+	acpi_handle handle = ACPI_HANDLE(adapter->dev.parent);
+	struct acpi_i2c_handler_data *data;
+	acpi_status status;
+
+	if (!handle)
+		return -ENODEV;
+
+	data = kzalloc(sizeof(struct acpi_i2c_handler_data),
+			    GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data->adapter = adapter;
+	status = acpi_bus_attach_private_data(handle, (void *)data);
+	if (ACPI_FAILURE(status)) {
+		kfree(data);
+		return -ENOMEM;
+	}
+
+	status = acpi_install_address_space_handler(handle,
+				ACPI_ADR_SPACE_GSBUS,
+				&acpi_i2c_space_handler,
+				NULL,
+				data);
+	if (ACPI_FAILURE(status)) {
+		dev_err(&adapter->dev, "Error installing i2c space handler\n");
+		acpi_bus_detach_private_data(handle);
+		kfree(data);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void acpi_i2c_remove_space_handler(struct i2c_adapter *adapter)
+{
+	acpi_handle handle = ACPI_HANDLE(adapter->dev.parent);
+	struct acpi_i2c_handler_data *data;
+	acpi_status status;
+
+	if (!handle)
+		return;
+
+	acpi_remove_address_space_handler(handle,
+				ACPI_ADR_SPACE_GSBUS,
+				&acpi_i2c_space_handler);
+
+	status = acpi_bus_get_private_data(handle, (void **)&data);
+	if (ACPI_SUCCESS(status))
+		kfree(data);
+
+	acpi_bus_detach_private_data(handle);
+}
+#else /* CONFIG_ACPI_I2C_OPREGION */
+static inline void acpi_i2c_remove_space_handler(struct i2c_adapter *adapter)
+{ }
+
+static inline int acpi_i2c_install_space_handler(struct i2c_adapter *adapter)
+{ return 0; }
+#endif /* CONFIG_ACPI_I2C_OPREGION */
+
 /* ------------------------------------------------------------------------- */
 
 static const struct i2c_device_id *i2c_match_id(const struct i2c_device_id *id,
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index a95efeb53a8b..b556e0ab946f 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -577,20 +577,4 @@ static inline struct i2c_adapter *of_find_i2c_adapter_by_node(struct device_node
 }
 #endif /* CONFIG_OF */
 
-#ifdef CONFIG_ACPI
-void acpi_i2c_register_devices(struct i2c_adapter *adap);
-#else
-static inline void acpi_i2c_register_devices(struct i2c_adapter *adap) { }
-#endif /* CONFIG_ACPI */
-
-#ifdef CONFIG_ACPI_I2C_OPREGION
-int acpi_i2c_install_space_handler(struct i2c_adapter *adapter);
-void acpi_i2c_remove_space_handler(struct i2c_adapter *adapter);
-#else
-static inline void acpi_i2c_remove_space_handler(struct i2c_adapter *adapter)
-{ }
-static inline int acpi_i2c_install_space_handler(struct i2c_adapter *adapter)
-{ return 0; }
-#endif /* CONFIG_ACPI_I2C_OPREGION */
-
 #endif /* _LINUX_I2C_H */
-- 
cgit v1.2.3


From 3fc2aa5522ab958374d93ef5d2e12df7ee233c91 Mon Sep 17 00:00:00 2001
From: Michal Sojka <sojka@merica.cz>
Date: Wed, 24 Sep 2014 22:43:18 +0200
Subject: usb: gadget: Introduce usb_gadget_giveback_request()

All USB peripheral controller drivers call completion routines directly.
This patch adds usb_gadget_giveback_request() which will be used instead
of direct invocation in the next patch. The goal here is to have a place
where common functionality can be added.

Signed-off-by: Michal Sojka <sojka@merica.cz>
Acked-by: Felipe Balbi <balbi@ti.com>
Tested-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/udc/udc-core.c | 16 ++++++++++++++++
 include/linux/usb/gadget.h        |  8 ++++++++
 2 files changed, 24 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/usb/gadget/udc/udc-core.c b/drivers/usb/gadget/udc/udc-core.c
index ad1ceac15468..16d3f6fedd1c 100644
--- a/drivers/usb/gadget/udc/udc-core.c
+++ b/drivers/usb/gadget/udc/udc-core.c
@@ -106,6 +106,22 @@ EXPORT_SYMBOL_GPL(usb_gadget_unmap_request);
 
 /* ------------------------------------------------------------------------- */
 
+/**
+ * usb_gadget_giveback_request - give the request back to the gadget layer
+ * Context: in_interrupt()
+ *
+ * This is called by device controller drivers in order to return the
+ * completed request back to the gadget layer.
+ */
+void usb_gadget_giveback_request(struct usb_ep *ep,
+		struct usb_request *req)
+{
+	req->complete(ep, req);
+}
+EXPORT_SYMBOL_GPL(usb_gadget_giveback_request);
+
+/* ------------------------------------------------------------------------- */
+
 static void usb_gadget_state_work(struct work_struct *work)
 {
 	struct usb_gadget	*gadget = work_to_gadget(work);
diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h
index d18811433324..522cafe26790 100644
--- a/include/linux/usb/gadget.h
+++ b/include/linux/usb/gadget.h
@@ -1023,6 +1023,14 @@ extern void usb_gadget_udc_reset(struct usb_gadget *gadget,
 
 /*-------------------------------------------------------------------------*/
 
+/* utility to give requests back to the gadget layer */
+
+extern void usb_gadget_giveback_request(struct usb_ep *ep,
+		struct usb_request *req);
+
+
+/*-------------------------------------------------------------------------*/
+
 /* utility wrapping a simple endpoint selection policy */
 
 extern struct usb_ep *usb_ep_autoconfig(struct usb_gadget *,
-- 
cgit v1.2.3


From 0cfbd328d60f85b0dcf66df61a3615e9a8e5d4e4 Mon Sep 17 00:00:00 2001
From: Michal Sojka <sojka@merica.cz>
Date: Wed, 24 Sep 2014 22:43:21 +0200
Subject: usb: Add LED triggers for USB activity
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With this patch, USB activity can be signaled by blinking a LED. There
are two triggers, one for activity on USB host and one for USB gadget.

Both triggers should work with all host/device controllers. Tested only
with musb.

Performace: I measured performance overheads on ARM Cortex-A8 (TI
AM335x) running on 600 MHz.

Duration of usb_led_activity():
- with no LED attached to the trigger:        2 ± 1 µs
- with one GPIO LED attached to the trigger:  2 ± 1 µs or 8 ± 2 µs (two peaks in histogram)

Duration of functions calling usb_led_activity() (with this patch
applied and no LED attached to the trigger):
- __usb_hcd_giveback_urb():    10 - 25 µs
- usb_gadget_giveback_request(): 2 - 6 µs

Signed-off-by: Michal Sojka <sojka@merica.cz>
Acked-by: Felipe Balbi <balbi@ti.com>
Tested-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/Kconfig               | 10 +++++++
 drivers/usb/common/Makefile       |  1 +
 drivers/usb/common/led.c          | 57 +++++++++++++++++++++++++++++++++++++++
 drivers/usb/core/hcd.c            |  2 ++
 drivers/usb/gadget/udc/udc-core.c |  4 +++
 include/linux/usb.h               | 12 +++++++++
 6 files changed, 86 insertions(+)
 create mode 100644 drivers/usb/common/led.c

(limited to 'include/linux')

diff --git a/drivers/usb/Kconfig b/drivers/usb/Kconfig
index cf1b19bca306..ae481c37a208 100644
--- a/drivers/usb/Kconfig
+++ b/drivers/usb/Kconfig
@@ -149,4 +149,14 @@ source "drivers/usb/phy/Kconfig"
 
 source "drivers/usb/gadget/Kconfig"
 
+config USB_LED_TRIG
+	bool "USB LED Triggers"
+	depends on LEDS_CLASS && USB_COMMON && LEDS_TRIGGERS
+	help
+	  This option adds LED triggers for USB host and/or gadget activity.
+
+	  Say Y here if you are working on a system with led-class supported
+	  LEDs and you want to use them as activity indicators for USB host or
+	  gadget.
+
 endif # USB_SUPPORT
diff --git a/drivers/usb/common/Makefile b/drivers/usb/common/Makefile
index 052c12069c24..ca2f8bd0e431 100644
--- a/drivers/usb/common/Makefile
+++ b/drivers/usb/common/Makefile
@@ -4,5 +4,6 @@
 
 obj-$(CONFIG_USB_COMMON)	  += usb-common.o
 usb-common-y			  += common.o
+usb-common-$(CONFIG_USB_LED_TRIG) += led.o
 
 obj-$(CONFIG_USB_OTG_FSM) += usb-otg-fsm.o
diff --git a/drivers/usb/common/led.c b/drivers/usb/common/led.c
new file mode 100644
index 000000000000..df23da00a901
--- /dev/null
+++ b/drivers/usb/common/led.c
@@ -0,0 +1,57 @@
+/*
+ * LED Triggers for USB Activity
+ *
+ * Copyright 2014 Michal Sojka <sojka@merica.cz>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/leds.h>
+#include <linux/usb.h>
+
+#define BLINK_DELAY 30
+
+static unsigned long usb_blink_delay = BLINK_DELAY;
+
+DEFINE_LED_TRIGGER(ledtrig_usb_gadget);
+DEFINE_LED_TRIGGER(ledtrig_usb_host);
+
+void usb_led_activity(enum usb_led_event ev)
+{
+	struct led_trigger *trig = NULL;
+
+	switch (ev) {
+	case USB_LED_EVENT_GADGET:
+		trig = ledtrig_usb_gadget;
+		break;
+	case USB_LED_EVENT_HOST:
+		trig = ledtrig_usb_host;
+		break;
+	}
+	/* led_trigger_blink_oneshot() handles trig == NULL gracefully */
+	led_trigger_blink_oneshot(trig, &usb_blink_delay, &usb_blink_delay, 0);
+}
+EXPORT_SYMBOL_GPL(usb_led_activity);
+
+
+static int __init ledtrig_usb_init(void)
+{
+	led_trigger_register_simple("usb-gadget", &ledtrig_usb_gadget);
+	led_trigger_register_simple("usb-host", &ledtrig_usb_host);
+	return 0;
+}
+
+static void __exit ledtrig_usb_exit(void)
+{
+	led_trigger_unregister_simple(ledtrig_usb_gadget);
+	led_trigger_unregister_simple(ledtrig_usb_host);
+}
+
+module_init(ledtrig_usb_init);
+module_exit(ledtrig_usb_exit);
diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index d3fe161bec05..bcb96ff207ba 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -1664,6 +1664,8 @@ static void __usb_hcd_giveback_urb(struct urb *urb)
 	usbmon_urb_complete(&hcd->self, urb, status);
 	usb_anchor_suspend_wakeups(anchor);
 	usb_unanchor_urb(urb);
+	if (likely(status == 0))
+		usb_led_activity(USB_LED_EVENT_HOST);
 
 	/* pass ownership to the completion handler */
 	urb->status = status;
diff --git a/drivers/usb/gadget/udc/udc-core.c b/drivers/usb/gadget/udc/udc-core.c
index 16d3f6fedd1c..f107bb60a5ab 100644
--- a/drivers/usb/gadget/udc/udc-core.c
+++ b/drivers/usb/gadget/udc/udc-core.c
@@ -27,6 +27,7 @@
 
 #include <linux/usb/ch9.h>
 #include <linux/usb/gadget.h>
+#include <linux/usb.h>
 
 /**
  * struct usb_udc - describes one usb device controller
@@ -116,6 +117,9 @@ EXPORT_SYMBOL_GPL(usb_gadget_unmap_request);
 void usb_gadget_giveback_request(struct usb_ep *ep,
 		struct usb_request *req)
 {
+	if (likely(req->status == 0))
+		usb_led_activity(USB_LED_EVENT_GADGET);
+
 	req->complete(ep, req);
 }
 EXPORT_SYMBOL_GPL(usb_gadget_giveback_request);
diff --git a/include/linux/usb.h b/include/linux/usb.h
index d2465bc0e73c..447a7e2fc19b 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1862,6 +1862,18 @@ extern void usb_unregister_notify(struct notifier_block *nb);
 /* debugfs stuff */
 extern struct dentry *usb_debug_root;
 
+/* LED triggers */
+enum usb_led_event {
+	USB_LED_EVENT_HOST = 0,
+	USB_LED_EVENT_GADGET = 1,
+};
+
+#ifdef CONFIG_USB_LED_TRIG
+extern void usb_led_activity(enum usb_led_event ev);
+#else
+static inline void usb_led_activity(enum usb_led_event ev) {}
+#endif
+
 #endif  /* __KERNEL__ */
 
 #endif
-- 
cgit v1.2.3


From d79b6fe17aa279c7015a9c4ee88809dad4be9959 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Thu, 25 Sep 2014 18:28:28 +0200
Subject: PM / Domains: Add genpd attach/detach callbacks

While a PM domain can enable PM runtime management of its devices' module
clocks by setting

	genpd->dev_ops.stop = pm_clk_suspend;
	genpd->dev_ops.start = pm_clk_resume;

this also requires registering the clocks with the pm_clk subsystem.
In the legacy case, this is handled by the platform code, after
attaching the device to its PM domain.

When the devices are instantiated from DT, devices are attached to their
PM domains by generic code, leaving no method for the platform-specific
PM domain code to register their clocks.

Add two callbacks, allowing a PM domain to perform platform-specific
tasks when a device is attached to or detached from a PM domain.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 6 ++++++
 include/linux/pm_domain.h   | 2 ++
 2 files changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index cd400575ee0e..2a9f4c5025e3 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -1436,6 +1436,9 @@ int __pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev,
 
 	spin_unlock_irq(&dev->power.lock);
 
+	if (genpd->attach_dev)
+		genpd->attach_dev(dev);
+
 	mutex_lock(&gpd_data->lock);
 	gpd_data->base.dev = dev;
 	list_add_tail(&gpd_data->base.list_node, &genpd->dev_list);
@@ -1528,6 +1531,9 @@ int pm_genpd_remove_device(struct generic_pm_domain *genpd,
 	genpd->device_count--;
 	genpd->max_off_time_changed = true;
 
+	if (genpd->detach_dev)
+		genpd->detach_dev(dev);
+
 	spin_lock_irq(&dev->power.lock);
 
 	dev->pm_domain = NULL;
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 292079d8da6b..9a93e622bdea 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -73,6 +73,8 @@ struct generic_pm_domain {
 	bool cached_power_down_ok;
 	struct device_node *of_node; /* Node in device tree */
 	struct gpd_cpu_data *cpu_data;
+	void (*attach_dev)(struct device *dev);
+	void (*detach_dev)(struct device *dev);
 };
 
 static inline struct generic_pm_domain *pd_to_genpd(struct dev_pm_domain *pd)
-- 
cgit v1.2.3


From 263c589bae9eb404df2c1e8d49ec775bb7b288d4 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Thu, 25 Sep 2014 17:49:59 +0200
Subject: PM / Domains: Remove legacy API for adding devices through DT

There are no active clients of the legacy API and we now also have a
better way to handle genpd DT support. So let's remove the legacy API.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 33 ---------------------------------
 include/linux/pm_domain.h   | 17 -----------------
 2 files changed, 50 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 2a9f4c5025e3..18cc68d058d6 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -1456,39 +1456,6 @@ int __pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev,
 	return ret;
 }
 
-/**
- * __pm_genpd_of_add_device - Add a device to an I/O PM domain.
- * @genpd_node: Device tree node pointer representing a PM domain to which the
- *   the device is added to.
- * @dev: Device to be added.
- * @td: Set of PM QoS timing parameters to attach to the device.
- */
-int __pm_genpd_of_add_device(struct device_node *genpd_node, struct device *dev,
-			     struct gpd_timing_data *td)
-{
-	struct generic_pm_domain *genpd = NULL, *gpd;
-
-	dev_dbg(dev, "%s()\n", __func__);
-
-	if (IS_ERR_OR_NULL(genpd_node) || IS_ERR_OR_NULL(dev))
-		return -EINVAL;
-
-	mutex_lock(&gpd_list_lock);
-	list_for_each_entry(gpd, &gpd_list, gpd_list_node) {
-		if (gpd->of_node == genpd_node) {
-			genpd = gpd;
-			break;
-		}
-	}
-	mutex_unlock(&gpd_list_lock);
-
-	if (!genpd)
-		return -EINVAL;
-
-	return __pm_genpd_add_device(genpd, dev, td);
-}
-
-
 /**
  * __pm_genpd_name_add_device - Find I/O PM domain and add a device to it.
  * @domain_name: Name of the PM domain to add the device to.
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 9a93e622bdea..ed4f4a79c528 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -71,7 +71,6 @@ struct generic_pm_domain {
 	s64 max_off_time_ns;	/* Maximum allowed "suspended" time. */
 	bool max_off_time_changed;
 	bool cached_power_down_ok;
-	struct device_node *of_node; /* Node in device tree */
 	struct gpd_cpu_data *cpu_data;
 	void (*attach_dev)(struct device *dev);
 	void (*detach_dev)(struct device *dev);
@@ -124,10 +123,6 @@ extern int __pm_genpd_add_device(struct generic_pm_domain *genpd,
 				 struct device *dev,
 				 struct gpd_timing_data *td);
 
-extern int __pm_genpd_of_add_device(struct device_node *genpd_node,
-				    struct device *dev,
-				    struct gpd_timing_data *td);
-
 extern int __pm_genpd_name_add_device(const char *domain_name,
 				      struct device *dev,
 				      struct gpd_timing_data *td);
@@ -169,12 +164,6 @@ static inline int __pm_genpd_add_device(struct generic_pm_domain *genpd,
 {
 	return -ENOSYS;
 }
-static inline int __pm_genpd_of_add_device(struct device_node *genpd_node,
-					   struct device *dev,
-					   struct gpd_timing_data *td)
-{
-	return -ENOSYS;
-}
 static inline int __pm_genpd_name_add_device(const char *domain_name,
 					     struct device *dev,
 					     struct gpd_timing_data *td)
@@ -240,12 +229,6 @@ static inline int pm_genpd_add_device(struct generic_pm_domain *genpd,
 	return __pm_genpd_add_device(genpd, dev, NULL);
 }
 
-static inline int pm_genpd_of_add_device(struct device_node *genpd_node,
-					 struct device *dev)
-{
-	return __pm_genpd_of_add_device(genpd_node, dev, NULL);
-}
-
 static inline int pm_genpd_name_add_device(const char *domain_name,
 					   struct device *dev)
 {
-- 
cgit v1.2.3


From 7c94e1c157a227837b04f02f5edeff8301410ba2 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@canonical.com>
Date: Thu, 25 Sep 2014 23:23:43 +0800
Subject: block: introduce blk_flush_queue to drive flush machinery

This patch introduces 'struct blk_flush_queue' and puts all
flush machinery related fields into this structure, so that

	- flush implementation details aren't exposed to driver
	- it is easy to convert to per dispatch-queue flush machinery

This patch is basically a mechanical replacement.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ming Lei <ming.lei@canonical.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-core.c       |   4 +-
 block/blk-flush.c      | 109 ++++++++++++++++++++++++++++++-------------------
 block/blk-mq.c         |  10 +++--
 block/blk.h            |  22 +++++++++-
 include/linux/blkdev.h |  10 +----
 5 files changed, 99 insertions(+), 56 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 222fe84d6ac4..cfaca8ca6cc4 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -390,11 +390,13 @@ static void __blk_drain_queue(struct request_queue *q, bool drain_all)
 		 * be drained.  Check all the queues and counters.
 		 */
 		if (drain_all) {
+			struct blk_flush_queue *fq = blk_get_flush_queue(q);
 			drain |= !list_empty(&q->queue_head);
 			for (i = 0; i < 2; i++) {
 				drain |= q->nr_rqs[i];
 				drain |= q->in_flight[i];
-				drain |= !list_empty(&q->flush_queue[i]);
+				if (fq)
+				    drain |= !list_empty(&fq->flush_queue[i]);
 			}
 		}
 
diff --git a/block/blk-flush.c b/block/blk-flush.c
index caf44756d329..b01a86d6bf86 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -28,7 +28,7 @@
  *
  * The actual execution of flush is double buffered.  Whenever a request
  * needs to execute PRE or POSTFLUSH, it queues at
- * q->flush_queue[q->flush_pending_idx].  Once certain criteria are met, a
+ * fq->flush_queue[fq->flush_pending_idx].  Once certain criteria are met, a
  * flush is issued and the pending_idx is toggled.  When the flush
  * completes, all the requests which were pending are proceeded to the next
  * step.  This allows arbitrary merging of different types of FLUSH/FUA
@@ -155,7 +155,7 @@ static bool blk_flush_queue_rq(struct request *rq, bool add_front)
  * completion and trigger the next step.
  *
  * CONTEXT:
- * spin_lock_irq(q->queue_lock or q->mq_flush_lock)
+ * spin_lock_irq(q->queue_lock or fq->mq_flush_lock)
  *
  * RETURNS:
  * %true if requests were added to the dispatch queue, %false otherwise.
@@ -164,7 +164,8 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq,
 				   int error)
 {
 	struct request_queue *q = rq->q;
-	struct list_head *pending = &q->flush_queue[q->flush_pending_idx];
+	struct blk_flush_queue *fq = blk_get_flush_queue(q);
+	struct list_head *pending = &fq->flush_queue[fq->flush_pending_idx];
 	bool queued = false, kicked;
 
 	BUG_ON(rq->flush.seq & seq);
@@ -180,12 +181,12 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq,
 	case REQ_FSEQ_POSTFLUSH:
 		/* queue for flush */
 		if (list_empty(pending))
-			q->flush_pending_since = jiffies;
+			fq->flush_pending_since = jiffies;
 		list_move_tail(&rq->flush.list, pending);
 		break;
 
 	case REQ_FSEQ_DATA:
-		list_move_tail(&rq->flush.list, &q->flush_data_in_flight);
+		list_move_tail(&rq->flush.list, &fq->flush_data_in_flight);
 		queued = blk_flush_queue_rq(rq, true);
 		break;
 
@@ -220,17 +221,18 @@ static void flush_end_io(struct request *flush_rq, int error)
 	bool queued = false;
 	struct request *rq, *n;
 	unsigned long flags = 0;
+	struct blk_flush_queue *fq = blk_get_flush_queue(q);
 
 	if (q->mq_ops) {
-		spin_lock_irqsave(&q->mq_flush_lock, flags);
+		spin_lock_irqsave(&fq->mq_flush_lock, flags);
 		flush_rq->tag = -1;
 	}
 
-	running = &q->flush_queue[q->flush_running_idx];
-	BUG_ON(q->flush_pending_idx == q->flush_running_idx);
+	running = &fq->flush_queue[fq->flush_running_idx];
+	BUG_ON(fq->flush_pending_idx == fq->flush_running_idx);
 
 	/* account completion of the flush request */
-	q->flush_running_idx ^= 1;
+	fq->flush_running_idx ^= 1;
 
 	if (!q->mq_ops)
 		elv_completed_request(q, flush_rq);
@@ -254,13 +256,13 @@ static void flush_end_io(struct request *flush_rq, int error)
 	 * directly into request_fn may confuse the driver.  Always use
 	 * kblockd.
 	 */
-	if (queued || q->flush_queue_delayed) {
+	if (queued || fq->flush_queue_delayed) {
 		WARN_ON(q->mq_ops);
 		blk_run_queue_async(q);
 	}
-	q->flush_queue_delayed = 0;
+	fq->flush_queue_delayed = 0;
 	if (q->mq_ops)
-		spin_unlock_irqrestore(&q->mq_flush_lock, flags);
+		spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
 }
 
 /**
@@ -271,33 +273,34 @@ static void flush_end_io(struct request *flush_rq, int error)
  * Please read the comment at the top of this file for more info.
  *
  * CONTEXT:
- * spin_lock_irq(q->queue_lock or q->mq_flush_lock)
+ * spin_lock_irq(q->queue_lock or fq->mq_flush_lock)
  *
  * RETURNS:
  * %true if flush was issued, %false otherwise.
  */
 static bool blk_kick_flush(struct request_queue *q)
 {
-	struct list_head *pending = &q->flush_queue[q->flush_pending_idx];
+	struct blk_flush_queue *fq = blk_get_flush_queue(q);
+	struct list_head *pending = &fq->flush_queue[fq->flush_pending_idx];
 	struct request *first_rq =
 		list_first_entry(pending, struct request, flush.list);
-	struct request *flush_rq = q->flush_rq;
+	struct request *flush_rq = fq->flush_rq;
 
 	/* C1 described at the top of this file */
-	if (q->flush_pending_idx != q->flush_running_idx || list_empty(pending))
+	if (fq->flush_pending_idx != fq->flush_running_idx || list_empty(pending))
 		return false;
 
 	/* C2 and C3 */
-	if (!list_empty(&q->flush_data_in_flight) &&
+	if (!list_empty(&fq->flush_data_in_flight) &&
 	    time_before(jiffies,
-			q->flush_pending_since + FLUSH_PENDING_TIMEOUT))
+			fq->flush_pending_since + FLUSH_PENDING_TIMEOUT))
 		return false;
 
 	/*
 	 * Issue flush and toggle pending_idx.  This makes pending_idx
 	 * different from running_idx, which means flush is in flight.
 	 */
-	q->flush_pending_idx ^= 1;
+	fq->flush_pending_idx ^= 1;
 
 	blk_rq_init(q, flush_rq);
 	if (q->mq_ops)
@@ -329,6 +332,7 @@ static void mq_flush_data_end_io(struct request *rq, int error)
 	struct blk_mq_hw_ctx *hctx;
 	struct blk_mq_ctx *ctx;
 	unsigned long flags;
+	struct blk_flush_queue *fq = blk_get_flush_queue(q);
 
 	ctx = rq->mq_ctx;
 	hctx = q->mq_ops->map_queue(q, ctx->cpu);
@@ -337,10 +341,10 @@ static void mq_flush_data_end_io(struct request *rq, int error)
 	 * After populating an empty queue, kick it to avoid stall.  Read
 	 * the comment in flush_end_io().
 	 */
-	spin_lock_irqsave(&q->mq_flush_lock, flags);
+	spin_lock_irqsave(&fq->mq_flush_lock, flags);
 	if (blk_flush_complete_seq(rq, REQ_FSEQ_DATA, error))
 		blk_mq_run_hw_queue(hctx, true);
-	spin_unlock_irqrestore(&q->mq_flush_lock, flags);
+	spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
 }
 
 /**
@@ -408,11 +412,13 @@ void blk_insert_flush(struct request *rq)
 	rq->cmd_flags |= REQ_FLUSH_SEQ;
 	rq->flush.saved_end_io = rq->end_io; /* Usually NULL */
 	if (q->mq_ops) {
+		struct blk_flush_queue *fq = blk_get_flush_queue(q);
+
 		rq->end_io = mq_flush_data_end_io;
 
-		spin_lock_irq(&q->mq_flush_lock);
+		spin_lock_irq(&fq->mq_flush_lock);
 		blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0);
-		spin_unlock_irq(&q->mq_flush_lock);
+		spin_unlock_irq(&fq->mq_flush_lock);
 		return;
 	}
 	rq->end_io = flush_data_end_io;
@@ -473,31 +479,52 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
 }
 EXPORT_SYMBOL(blkdev_issue_flush);
 
-static int blk_mq_init_flush(struct request_queue *q)
+static struct blk_flush_queue *blk_alloc_flush_queue(
+		struct request_queue *q)
 {
-	struct blk_mq_tag_set *set = q->tag_set;
+	struct blk_flush_queue *fq;
+	int rq_sz = sizeof(struct request);
 
-	spin_lock_init(&q->mq_flush_lock);
+	fq = kzalloc(sizeof(*fq), GFP_KERNEL);
+	if (!fq)
+		goto fail;
 
-	q->flush_rq = kzalloc(round_up(sizeof(struct request) +
-				set->cmd_size, cache_line_size()),
-				GFP_KERNEL);
-	if (!q->flush_rq)
-		return -ENOMEM;
-	return 0;
+	if (q->mq_ops) {
+		spin_lock_init(&fq->mq_flush_lock);
+		rq_sz = round_up(rq_sz + q->tag_set->cmd_size,
+				cache_line_size());
+	}
+
+	fq->flush_rq = kzalloc(rq_sz, GFP_KERNEL);
+	if (!fq->flush_rq)
+		goto fail_rq;
+
+	INIT_LIST_HEAD(&fq->flush_queue[0]);
+	INIT_LIST_HEAD(&fq->flush_queue[1]);
+	INIT_LIST_HEAD(&fq->flush_data_in_flight);
+
+	return fq;
+
+ fail_rq:
+	kfree(fq);
+ fail:
+	return NULL;
 }
 
-int blk_init_flush(struct request_queue *q)
+static void blk_free_flush_queue(struct blk_flush_queue *fq)
 {
-	INIT_LIST_HEAD(&q->flush_queue[0]);
-	INIT_LIST_HEAD(&q->flush_queue[1]);
-	INIT_LIST_HEAD(&q->flush_data_in_flight);
+	/* bio based request queue hasn't flush queue */
+	if (!fq)
+		return;
 
-	if (q->mq_ops)
-		return blk_mq_init_flush(q);
+	kfree(fq->flush_rq);
+	kfree(fq);
+}
 
-	q->flush_rq = kzalloc(sizeof(struct request), GFP_KERNEL);
-	if (!q->flush_rq)
+int blk_init_flush(struct request_queue *q)
+{
+	q->fq = blk_alloc_flush_queue(q);
+	if (!q->fq)
 		return -ENOMEM;
 
 	return 0;
@@ -505,5 +532,5 @@ int blk_init_flush(struct request_queue *q)
 
 void blk_exit_flush(struct request_queue *q)
 {
-	kfree(q->flush_rq);
+	blk_free_flush_queue(q->fq);
 }
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 2758cdf2de94..d39e8a5eaeaa 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -508,20 +508,22 @@ void blk_mq_kick_requeue_list(struct request_queue *q)
 }
 EXPORT_SYMBOL(blk_mq_kick_requeue_list);
 
-static inline bool is_flush_request(struct request *rq, unsigned int tag)
+static inline bool is_flush_request(struct request *rq,
+		struct blk_flush_queue *fq, unsigned int tag)
 {
 	return ((rq->cmd_flags & REQ_FLUSH_SEQ) &&
-			rq->q->flush_rq->tag == tag);
+			fq->flush_rq->tag == tag);
 }
 
 struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
 {
 	struct request *rq = tags->rqs[tag];
+	struct blk_flush_queue *fq = blk_get_flush_queue(rq->q);
 
-	if (!is_flush_request(rq, tag))
+	if (!is_flush_request(rq, fq, tag))
 		return rq;
 
-	return rq->q->flush_rq;
+	return fq->flush_rq;
 }
 EXPORT_SYMBOL(blk_mq_tag_to_rq);
 
diff --git a/block/blk.h b/block/blk.h
index c6fa3d4c6a89..833c4ac6c4eb 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -12,11 +12,28 @@
 /* Max future timer expiry for timeouts */
 #define BLK_MAX_TIMEOUT		(5 * HZ)
 
+struct blk_flush_queue {
+	unsigned int		flush_queue_delayed:1;
+	unsigned int		flush_pending_idx:1;
+	unsigned int		flush_running_idx:1;
+	unsigned long		flush_pending_since;
+	struct list_head	flush_queue[2];
+	struct list_head	flush_data_in_flight;
+	struct request		*flush_rq;
+	spinlock_t		mq_flush_lock;
+};
+
 extern struct kmem_cache *blk_requestq_cachep;
 extern struct kmem_cache *request_cachep;
 extern struct kobj_type blk_queue_ktype;
 extern struct ida blk_queue_ida;
 
+static inline struct blk_flush_queue *blk_get_flush_queue(
+		struct request_queue *q)
+{
+	return q->fq;
+}
+
 static inline void __blk_get_queue(struct request_queue *q)
 {
 	kobject_get(&q->kobj);
@@ -89,6 +106,7 @@ void blk_insert_flush(struct request *rq);
 static inline struct request *__elv_next_request(struct request_queue *q)
 {
 	struct request *rq;
+	struct blk_flush_queue *fq = blk_get_flush_queue(q);
 
 	while (1) {
 		if (!list_empty(&q->queue_head)) {
@@ -111,9 +129,9 @@ static inline struct request *__elv_next_request(struct request_queue *q)
 		 * should be restarted later. Please see flush_end_io() for
 		 * details.
 		 */
-		if (q->flush_pending_idx != q->flush_running_idx &&
+		if (fq->flush_pending_idx != fq->flush_running_idx &&
 				!queue_flush_queueable(q)) {
-			q->flush_queue_delayed = 1;
+			fq->flush_queue_delayed = 1;
 			return NULL;
 		}
 		if (unlikely(blk_queue_bypass(q)) ||
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index e267bf0db559..49f3461e4272 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -36,6 +36,7 @@ struct request;
 struct sg_io_hdr;
 struct bsg_job;
 struct blkcg_gq;
+struct blk_flush_queue;
 
 #define BLKDEV_MIN_RQ	4
 #define BLKDEV_MAX_RQ	128	/* Default maximum */
@@ -455,14 +456,7 @@ struct request_queue {
 	 */
 	unsigned int		flush_flags;
 	unsigned int		flush_not_queueable:1;
-	unsigned int		flush_queue_delayed:1;
-	unsigned int		flush_pending_idx:1;
-	unsigned int		flush_running_idx:1;
-	unsigned long		flush_pending_since;
-	struct list_head	flush_queue[2];
-	struct list_head	flush_data_in_flight;
-	struct request		*flush_rq;
-	spinlock_t		mq_flush_lock;
+	struct blk_flush_queue	*fq;
 
 	struct list_head	requeue_list;
 	spinlock_t		requeue_lock;
-- 
cgit v1.2.3


From f70ced09170761acb69840cafaace4abc72cba4b Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@canonical.com>
Date: Thu, 25 Sep 2014 23:23:47 +0800
Subject: blk-mq: support per-distpatch_queue flush machinery

This patch supports to run one single flush machinery for
each blk-mq dispatch queue, so that:

- current init_request and exit_request callbacks can
cover flush request too, then the buggy copying way of
initializing flush request's pdu can be fixed

- flushing performance gets improved in case of multi hw-queue

In fio sync write test over virtio-blk(4 hw queues, ioengine=sync,
iodepth=64, numjobs=4, bs=4K), it is observed that througput gets
increased a lot over my test environment:
	- throughput: +70% in case of virtio-blk over null_blk
	- throughput: +30% in case of virtio-blk over SSD image

The multi virtqueue feature isn't merged to QEMU yet, and patches for
the feature can be found in below tree:

	git://kernel.ubuntu.com/ming/qemu.git  	v2.1.0-mq.4

And simply passing 'num_queues=4 vectors=5' should be enough to
enable multi queue(quad queue) feature for QEMU virtio-blk.

Suggested-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ming Lei <ming.lei@canonical.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-core.c       |  2 +-
 block/blk-flush.c      | 21 ++++++++++++++-------
 block/blk-mq.c         | 50 ++++++++++++++++++++++++--------------------------
 block/blk-sysfs.c      |  4 ++--
 block/blk.h            | 16 +++++++++++++---
 include/linux/blk-mq.h |  6 ++++++
 6 files changed, 60 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index b1dd4e086740..e1c2775c7597 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -704,7 +704,7 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
 	if (!q)
 		return NULL;
 
-	q->fq = blk_alloc_flush_queue(q);
+	q->fq = blk_alloc_flush_queue(q, NUMA_NO_NODE, 0);
 	if (!q->fq)
 		return NULL;
 
diff --git a/block/blk-flush.c b/block/blk-flush.c
index 004d95e4098e..20badd7b9d1b 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -305,8 +305,15 @@ static bool blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq)
 	fq->flush_pending_idx ^= 1;
 
 	blk_rq_init(q, flush_rq);
-	if (q->mq_ops)
-		blk_mq_clone_flush_request(flush_rq, first_rq);
+
+	/*
+	 * Borrow tag from the first request since they can't
+	 * be in flight at the same time.
+	 */
+	if (q->mq_ops) {
+		flush_rq->mq_ctx = first_rq->mq_ctx;
+		flush_rq->tag = first_rq->tag;
+	}
 
 	flush_rq->cmd_type = REQ_TYPE_FS;
 	flush_rq->cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ;
@@ -480,22 +487,22 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
 }
 EXPORT_SYMBOL(blkdev_issue_flush);
 
-struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q)
+struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q,
+		int node, int cmd_size)
 {
 	struct blk_flush_queue *fq;
 	int rq_sz = sizeof(struct request);
 
-	fq = kzalloc(sizeof(*fq), GFP_KERNEL);
+	fq = kzalloc_node(sizeof(*fq), GFP_KERNEL, node);
 	if (!fq)
 		goto fail;
 
 	if (q->mq_ops) {
 		spin_lock_init(&fq->mq_flush_lock);
-		rq_sz = round_up(rq_sz + q->tag_set->cmd_size,
-				cache_line_size());
+		rq_sz = round_up(rq_sz + cmd_size, cache_line_size());
 	}
 
-	fq->flush_rq = kzalloc(rq_sz, GFP_KERNEL);
+	fq->flush_rq = kzalloc_node(rq_sz, GFP_KERNEL, node);
 	if (!fq->flush_rq)
 		goto fail_rq;
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 53b6def12fc4..4e7a31466139 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -281,26 +281,6 @@ void blk_mq_free_request(struct request *rq)
 	__blk_mq_free_request(hctx, ctx, rq);
 }
 
-/*
- * Clone all relevant state from a request that has been put on hold in
- * the flush state machine into the preallocated flush request that hangs
- * off the request queue.
- *
- * For a driver the flush request should be invisible, that's why we are
- * impersonating the original request here.
- */
-void blk_mq_clone_flush_request(struct request *flush_rq,
-		struct request *orig_rq)
-{
-	struct blk_mq_hw_ctx *hctx =
-		orig_rq->q->mq_ops->map_queue(orig_rq->q, orig_rq->mq_ctx->cpu);
-
-	flush_rq->mq_ctx = orig_rq->mq_ctx;
-	flush_rq->tag = orig_rq->tag;
-	memcpy(blk_mq_rq_to_pdu(flush_rq), blk_mq_rq_to_pdu(orig_rq),
-		hctx->cmd_size);
-}
-
 inline void __blk_mq_end_request(struct request *rq, int error)
 {
 	blk_account_io_done(rq);
@@ -1516,12 +1496,20 @@ static void blk_mq_exit_hctx(struct request_queue *q,
 		struct blk_mq_tag_set *set,
 		struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
 {
+	unsigned flush_start_tag = set->queue_depth;
+
 	blk_mq_tag_idle(hctx);
 
+	if (set->ops->exit_request)
+		set->ops->exit_request(set->driver_data,
+				       hctx->fq->flush_rq, hctx_idx,
+				       flush_start_tag + hctx_idx);
+
 	if (set->ops->exit_hctx)
 		set->ops->exit_hctx(hctx, hctx_idx);
 
 	blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier);
+	blk_free_flush_queue(hctx->fq);
 	kfree(hctx->ctxs);
 	blk_mq_free_bitmap(&hctx->ctx_map);
 }
@@ -1556,6 +1544,7 @@ static int blk_mq_init_hctx(struct request_queue *q,
 		struct blk_mq_hw_ctx *hctx, unsigned hctx_idx)
 {
 	int node;
+	unsigned flush_start_tag = set->queue_depth;
 
 	node = hctx->numa_node;
 	if (node == NUMA_NO_NODE)
@@ -1594,8 +1583,23 @@ static int blk_mq_init_hctx(struct request_queue *q,
 	    set->ops->init_hctx(hctx, set->driver_data, hctx_idx))
 		goto free_bitmap;
 
+	hctx->fq = blk_alloc_flush_queue(q, hctx->numa_node, set->cmd_size);
+	if (!hctx->fq)
+		goto exit_hctx;
+
+	if (set->ops->init_request &&
+	    set->ops->init_request(set->driver_data,
+				   hctx->fq->flush_rq, hctx_idx,
+				   flush_start_tag + hctx_idx, node))
+		goto free_fq;
+
 	return 0;
 
+ free_fq:
+	kfree(hctx->fq);
+ exit_hctx:
+	if (set->ops->exit_hctx)
+		set->ops->exit_hctx(hctx, hctx_idx);
  free_bitmap:
 	blk_mq_free_bitmap(&hctx->ctx_map);
  free_ctxs:
@@ -1862,16 +1866,10 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
 
 	blk_mq_add_queue_tag_set(set, q);
 
-	q->fq = blk_alloc_flush_queue(q);
-	if (!q->fq)
-		goto err_hw_queues;
-
 	blk_mq_map_swqueue(q);
 
 	return q;
 
-err_hw_queues:
-	blk_mq_exit_hw_queues(q, set, set->nr_hw_queues);
 err_hw:
 	blk_cleanup_queue(q);
 err_hctxs:
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 718cffc4c678..e8f38a36c625 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -517,10 +517,10 @@ static void blk_release_queue(struct kobject *kobj)
 	if (q->queue_tags)
 		__blk_queue_free_tags(q);
 
-	blk_free_flush_queue(q->fq);
-
 	if (q->mq_ops)
 		blk_mq_free_queue(q);
+	else
+		blk_free_flush_queue(q->fq);
 
 	blk_trace_shutdown(q);
 
diff --git a/block/blk.h b/block/blk.h
index 7ecdd8517e69..43b036185712 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -2,6 +2,8 @@
 #define BLK_INTERNAL_H
 
 #include <linux/idr.h>
+#include <linux/blk-mq.h>
+#include "blk-mq.h"
 
 /* Amount of time in which a process may batch requests */
 #define BLK_BATCH_TIME	(HZ/50UL)
@@ -31,7 +33,14 @@ extern struct ida blk_queue_ida;
 static inline struct blk_flush_queue *blk_get_flush_queue(
 		struct request_queue *q, struct blk_mq_ctx *ctx)
 {
-	return q->fq;
+	struct blk_mq_hw_ctx *hctx;
+
+	if (!q->mq_ops)
+		return q->fq;
+
+	hctx = q->mq_ops->map_queue(q, ctx->cpu);
+
+	return hctx->fq;
 }
 
 static inline void __blk_get_queue(struct request_queue *q)
@@ -39,8 +48,9 @@ static inline void __blk_get_queue(struct request_queue *q)
 	kobject_get(&q->kobj);
 }
 
-struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q);
-void blk_free_flush_queue(struct blk_flush_queue *fq);
+struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q,
+		int node, int cmd_size);
+void blk_free_flush_queue(struct blk_flush_queue *q);
 
 int blk_init_rl(struct request_list *rl, struct request_queue *q,
 		gfp_t gfp_mask);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 325349559fb0..02c5d950f444 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -4,6 +4,7 @@
 #include <linux/blkdev.h>
 
 struct blk_mq_tags;
+struct blk_flush_queue;
 
 struct blk_mq_cpu_notifier {
 	struct list_head list;
@@ -34,6 +35,7 @@ struct blk_mq_hw_ctx {
 
 	struct request_queue	*queue;
 	unsigned int		queue_num;
+	struct blk_flush_queue	*fq;
 
 	void			*driver_data;
 
@@ -119,6 +121,10 @@ struct blk_mq_ops {
 	/*
 	 * Called for every command allocated by the block layer to allow
 	 * the driver to set up driver specific data.
+	 *
+	 * Tag greater than or equal to queue_depth is for setting up
+	 * flush request.
+	 *
 	 * Ditto for exit/teardown.
 	 */
 	init_request_fn		*init_request;
-- 
cgit v1.2.3


From 53e50398968d43338c4d932114e68bc099fc5fbd Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Sat, 20 Sep 2014 14:52:30 -0700
Subject: net: Remove gso_send_check as an offload callback

The send_check logic was only interesting in cases of TCP offload and
UDP UFO where the checksum needed to be initialized to the pseudo
header checksum. Now we've moved that logic into the related
gso_segment functions so gso_send_check is no longer needed.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  1 -
 net/core/dev.c            | 10 ----------
 net/ipv4/af_inet.c        | 36 ------------------------------------
 net/ipv4/gre_offload.c    | 11 +++--------
 net/ipv4/tcp_offload.c    |  6 ------
 net/ipv4/udp_offload.c    |  6 ------
 net/ipv6/ip6_offload.c    | 27 ---------------------------
 net/ipv6/tcpv6_offload.c  |  6 ------
 net/ipv6/udp_offload.c    |  6 ------
 net/mpls/mpls_gso.c       |  7 -------
 10 files changed, 3 insertions(+), 113 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 4354b4307e37..9f5d293a0281 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1911,7 +1911,6 @@ struct packet_type {
 struct offload_callbacks {
 	struct sk_buff		*(*gso_segment)(struct sk_buff *skb,
 						netdev_features_t features);
-	int			(*gso_send_check)(struct sk_buff *skb);
 	struct sk_buff		**(*gro_receive)(struct sk_buff **head,
 					       struct sk_buff *skb);
 	int			(*gro_complete)(struct sk_buff *skb, int nhoff);
diff --git a/net/core/dev.c b/net/core/dev.c
index db0388607329..e2ced01c01e4 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2422,16 +2422,6 @@ struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
 	rcu_read_lock();
 	list_for_each_entry_rcu(ptype, &offload_base, list) {
 		if (ptype->type == type && ptype->callbacks.gso_segment) {
-			if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
-				int err;
-
-				err = ptype->callbacks.gso_send_check(skb);
-				segs = ERR_PTR(err);
-				if (err || skb_gso_ok(skb, features))
-					break;
-				__skb_push(skb, (skb->data -
-						 skb_network_header(skb)));
-			}
 			segs = ptype->callbacks.gso_segment(skb, features);
 			break;
 		}
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 72011cc4c13b..28e589c5f32d 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1197,40 +1197,6 @@ int inet_sk_rebuild_header(struct sock *sk)
 }
 EXPORT_SYMBOL(inet_sk_rebuild_header);
 
-static int inet_gso_send_check(struct sk_buff *skb)
-{
-	const struct net_offload *ops;
-	const struct iphdr *iph;
-	int proto;
-	int ihl;
-	int err = -EINVAL;
-
-	if (unlikely(!pskb_may_pull(skb, sizeof(*iph))))
-		goto out;
-
-	iph = ip_hdr(skb);
-	ihl = iph->ihl * 4;
-	if (ihl < sizeof(*iph))
-		goto out;
-
-	proto = iph->protocol;
-
-	/* Warning: after this point, iph might be no longer valid */
-	if (unlikely(!pskb_may_pull(skb, ihl)))
-		goto out;
-	__skb_pull(skb, ihl);
-
-	skb_reset_transport_header(skb);
-	err = -EPROTONOSUPPORT;
-
-	ops = rcu_dereference(inet_offloads[proto]);
-	if (likely(ops && ops->callbacks.gso_send_check))
-		err = ops->callbacks.gso_send_check(skb);
-
-out:
-	return err;
-}
-
 static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
 					netdev_features_t features)
 {
@@ -1655,7 +1621,6 @@ static int ipv4_proc_init(void);
 static struct packet_offload ip_packet_offload __read_mostly = {
 	.type = cpu_to_be16(ETH_P_IP),
 	.callbacks = {
-		.gso_send_check = inet_gso_send_check,
 		.gso_segment = inet_gso_segment,
 		.gro_receive = inet_gro_receive,
 		.gro_complete = inet_gro_complete,
@@ -1664,7 +1629,6 @@ static struct packet_offload ip_packet_offload __read_mostly = {
 
 static const struct net_offload ipip_offload = {
 	.callbacks = {
-		.gso_send_check = inet_gso_send_check,
 		.gso_segment	= inet_gso_segment,
 		.gro_receive	= inet_gro_receive,
 		.gro_complete	= inet_gro_complete,
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index d3fe2ac05167..a77729503071 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -15,13 +15,6 @@
 #include <net/protocol.h>
 #include <net/gre.h>
 
-static int gre_gso_send_check(struct sk_buff *skb)
-{
-	if (!skb->encapsulation)
-		return -EINVAL;
-	return 0;
-}
-
 static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
 				       netdev_features_t features)
 {
@@ -46,6 +39,9 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
 				  SKB_GSO_IPIP)))
 		goto out;
 
+	if (!skb->encapsulation)
+		goto out;
+
 	if (unlikely(!pskb_may_pull(skb, sizeof(*greh))))
 		goto out;
 
@@ -256,7 +252,6 @@ static int gre_gro_complete(struct sk_buff *skb, int nhoff)
 
 static const struct net_offload gre_offload = {
 	.callbacks = {
-		.gso_send_check = gre_gso_send_check,
 		.gso_segment = gre_gso_segment,
 		.gro_receive = gre_gro_receive,
 		.gro_complete = gre_gro_complete,
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 7cd12b0458ff..5b90f2f447a5 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -288,11 +288,6 @@ int tcp_gro_complete(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(tcp_gro_complete);
 
-static int tcp_v4_gso_send_check(struct sk_buff *skb)
-{
-	return 0;
-}
-
 static struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
 {
 	/* Don't bother verifying checksum if we're going to flush anyway. */
@@ -320,7 +315,6 @@ static int tcp4_gro_complete(struct sk_buff *skb, int thoff)
 
 static const struct net_offload tcpv4_offload = {
 	.callbacks = {
-		.gso_send_check	=	tcp_v4_gso_send_check,
 		.gso_segment	=	tcp4_gso_segment,
 		.gro_receive	=	tcp4_gro_receive,
 		.gro_complete	=	tcp4_gro_complete,
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 2918cc914824..19ebe6a39ddc 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -25,11 +25,6 @@ struct udp_offload_priv {
 	struct udp_offload_priv __rcu *next;
 };
 
-static int udp4_ufo_send_check(struct sk_buff *skb)
-{
-	return 0;
-}
-
 struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
 				       netdev_features_t features)
 {
@@ -346,7 +341,6 @@ static int udp4_gro_complete(struct sk_buff *skb, int nhoff)
 
 static const struct net_offload udpv4_offload = {
 	.callbacks = {
-		.gso_send_check = udp4_ufo_send_check,
 		.gso_segment = udp4_ufo_fragment,
 		.gro_receive  =	udp4_gro_receive,
 		.gro_complete =	udp4_gro_complete,
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 9952f3fce30a..9034f76ae013 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -53,31 +53,6 @@ static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto)
 	return proto;
 }
 
-static int ipv6_gso_send_check(struct sk_buff *skb)
-{
-	const struct ipv6hdr *ipv6h;
-	const struct net_offload *ops;
-	int err = -EINVAL;
-
-	if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
-		goto out;
-
-	ipv6h = ipv6_hdr(skb);
-	__skb_pull(skb, sizeof(*ipv6h));
-	err = -EPROTONOSUPPORT;
-
-	ops = rcu_dereference(inet6_offloads[
-		ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr)]);
-
-	if (likely(ops && ops->callbacks.gso_send_check)) {
-		skb_reset_transport_header(skb);
-		err = ops->callbacks.gso_send_check(skb);
-	}
-
-out:
-	return err;
-}
-
 static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
 	netdev_features_t features)
 {
@@ -306,7 +281,6 @@ out_unlock:
 static struct packet_offload ipv6_packet_offload __read_mostly = {
 	.type = cpu_to_be16(ETH_P_IPV6),
 	.callbacks = {
-		.gso_send_check = ipv6_gso_send_check,
 		.gso_segment = ipv6_gso_segment,
 		.gro_receive = ipv6_gro_receive,
 		.gro_complete = ipv6_gro_complete,
@@ -315,7 +289,6 @@ static struct packet_offload ipv6_packet_offload __read_mostly = {
 
 static const struct net_offload sit_offload = {
 	.callbacks = {
-		.gso_send_check = ipv6_gso_send_check,
 		.gso_segment	= ipv6_gso_segment,
 		.gro_receive	= ipv6_gro_receive,
 		.gro_complete	= ipv6_gro_complete,
diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c
index 96253154db3a..c1ab77105b4c 100644
--- a/net/ipv6/tcpv6_offload.c
+++ b/net/ipv6/tcpv6_offload.c
@@ -15,11 +15,6 @@
 #include <net/ip6_checksum.h>
 #include "ip6_offload.h"
 
-static int tcp_v6_gso_send_check(struct sk_buff *skb)
-{
-	return 0;
-}
-
 static struct sk_buff **tcp6_gro_receive(struct sk_buff **head,
 					 struct sk_buff *skb)
 {
@@ -71,7 +66,6 @@ struct sk_buff *tcp6_gso_segment(struct sk_buff *skb,
 }
 static const struct net_offload tcpv6_offload = {
 	.callbacks = {
-		.gso_send_check	=	tcp_v6_gso_send_check,
 		.gso_segment	=	tcp6_gso_segment,
 		.gro_receive	=	tcp6_gro_receive,
 		.gro_complete	=	tcp6_gro_complete,
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index e4af6437ea3b..212ebfc7973f 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -17,11 +17,6 @@
 #include <net/ip6_checksum.h>
 #include "ip6_offload.h"
 
-static int udp6_ufo_send_check(struct sk_buff *skb)
-{
-	return 0;
-}
-
 static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
 					 netdev_features_t features)
 {
@@ -166,7 +161,6 @@ static int udp6_gro_complete(struct sk_buff *skb, int nhoff)
 
 static const struct net_offload udpv6_offload = {
 	.callbacks = {
-		.gso_send_check =	udp6_ufo_send_check,
 		.gso_segment	=	udp6_ufo_fragment,
 		.gro_receive	=	udp6_gro_receive,
 		.gro_complete	=	udp6_gro_complete,
diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c
index 6b38d083e1c9..e28ed2ef5b06 100644
--- a/net/mpls/mpls_gso.c
+++ b/net/mpls/mpls_gso.c
@@ -65,15 +65,9 @@ out:
 	return segs;
 }
 
-static int mpls_gso_send_check(struct sk_buff *skb)
-{
-	return 0;
-}
-
 static struct packet_offload mpls_mc_offload = {
 	.type = cpu_to_be16(ETH_P_MPLS_MC),
 	.callbacks = {
-		.gso_send_check =	mpls_gso_send_check,
 		.gso_segment    =	mpls_gso_segment,
 	},
 };
@@ -81,7 +75,6 @@ static struct packet_offload mpls_mc_offload = {
 static struct packet_offload mpls_uc_offload = {
 	.type = cpu_to_be16(ETH_P_MPLS_UC),
 	.callbacks = {
-		.gso_send_check =	mpls_gso_send_check,
 		.gso_segment    =	mpls_gso_segment,
 	},
 };
-- 
cgit v1.2.3


From b63adb979583ef185718d774d8162387db5589c0 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Fri, 26 Sep 2014 00:03:16 +0000
Subject: kernel: add support for kernel restart handler call chain

Various drivers implement architecture and/or device specific means to
restart (reset) the system.  Various mechanisms have been implemented to
support those schemes.  The best known mechanism is arm_pm_restart, which
is a function pointer to be set either from platform specific code or from
drivers.  Another mechanism is to use hardware watchdogs to issue a reset;
this mechanism is used if there is no other method available to reset a
board or system.  Two examples are alim7101_wdt, which currently uses the
reboot notifier to trigger a reset, and moxart_wdt, which registers the
arm_pm_restart function.

The existing mechanisms have a number of drawbacks.  Typically only one
scheme to restart the system is supported (at least if arm_pm_restart is
used).  At least in theory there can be multiple means to restart the
system, some of which may be less desirable (for example one mechanism may
only reset the CPU, while another may reset the entire system).  Using
arm_pm_restart can also be racy if the function pointer is set from a
driver, as the driver may be in the process of being unloaded when
arm_pm_restart is called.  Using the reboot notifier is always racy, as it
is unknown if and when other functions using the reboot notifier have
completed execution by the time the watchdog fires.

Introduce a system restart handler call chain to solve the described
problems.  This call chain is expected to be executed from the
architecture specific machine_restart() function.  Drivers providing
system restart functionality (such as the watchdog drivers mentioned
above) are expected to register with this call chain.  By using the
priority field in the notifier block, callers can control restart handler
execution sequence and thus ensure that the restart handler with the
optimal restart capabilities for a given system is called first.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Heiko Stuebner <heiko@sntech.de>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Wim Van Sebroeck <wim@iguana.be>
Cc: Maxime Ripard <maxime.ripard@free-electrons.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Jonas Jensen <jonas.jensen@gmail.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Tomasz Figa <t.figa@samsung.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/reboot.h |  3 ++
 kernel/reboot.c        | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 84 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/reboot.h b/include/linux/reboot.h
index 48bf152761c7..67fc8fcdc4b0 100644
--- a/include/linux/reboot.h
+++ b/include/linux/reboot.h
@@ -38,6 +38,9 @@ extern int reboot_force;
 extern int register_reboot_notifier(struct notifier_block *);
 extern int unregister_reboot_notifier(struct notifier_block *);
 
+extern int register_restart_handler(struct notifier_block *);
+extern int unregister_restart_handler(struct notifier_block *);
+extern void do_kernel_restart(char *cmd);
 
 /*
  * Architecture-specific implementations of sys_reboot commands.
diff --git a/kernel/reboot.c b/kernel/reboot.c
index a3a9e240fcdb..5925f5ae8dff 100644
--- a/kernel/reboot.c
+++ b/kernel/reboot.c
@@ -104,6 +104,87 @@ int unregister_reboot_notifier(struct notifier_block *nb)
 }
 EXPORT_SYMBOL(unregister_reboot_notifier);
 
+/*
+ *	Notifier list for kernel code which wants to be called
+ *	to restart the system.
+ */
+static ATOMIC_NOTIFIER_HEAD(restart_handler_list);
+
+/**
+ *	register_restart_handler - Register function to be called to reset
+ *				   the system
+ *	@nb: Info about handler function to be called
+ *	@nb->priority:	Handler priority. Handlers should follow the
+ *			following guidelines for setting priorities.
+ *			0:	Restart handler of last resort,
+ *				with limited restart capabilities
+ *			128:	Default restart handler; use if no other
+ *				restart handler is expected to be available,
+ *				and/or if restart functionality is
+ *				sufficient to restart the entire system
+ *			255:	Highest priority restart handler, will
+ *				preempt all other restart handlers
+ *
+ *	Registers a function with code to be called to restart the
+ *	system.
+ *
+ *	Registered functions will be called from machine_restart as last
+ *	step of the restart sequence (if the architecture specific
+ *	machine_restart function calls do_kernel_restart - see below
+ *	for details).
+ *	Registered functions are expected to restart the system immediately.
+ *	If more than one function is registered, the restart handler priority
+ *	selects which function will be called first.
+ *
+ *	Restart handlers are expected to be registered from non-architecture
+ *	code, typically from drivers. A typical use case would be a system
+ *	where restart functionality is provided through a watchdog. Multiple
+ *	restart handlers may exist; for example, one restart handler might
+ *	restart the entire system, while another only restarts the CPU.
+ *	In such cases, the restart handler which only restarts part of the
+ *	hardware is expected to register with low priority to ensure that
+ *	it only runs if no other means to restart the system is available.
+ *
+ *	Currently always returns zero, as atomic_notifier_chain_register()
+ *	always returns zero.
+ */
+int register_restart_handler(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_register(&restart_handler_list, nb);
+}
+EXPORT_SYMBOL(register_restart_handler);
+
+/**
+ *	unregister_restart_handler - Unregister previously registered
+ *				     restart handler
+ *	@nb: Hook to be unregistered
+ *
+ *	Unregisters a previously registered restart handler function.
+ *
+ *	Returns zero on success, or %-ENOENT on failure.
+ */
+int unregister_restart_handler(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_unregister(&restart_handler_list, nb);
+}
+EXPORT_SYMBOL(unregister_restart_handler);
+
+/**
+ *	do_kernel_restart - Execute kernel restart handler call chain
+ *
+ *	Calls functions registered with register_restart_handler.
+ *
+ *	Expected to be called from machine_restart as last step of the restart
+ *	sequence.
+ *
+ *	Restarts the system immediately if a restart handler function has been
+ *	registered. Otherwise does nothing.
+ */
+void do_kernel_restart(char *cmd)
+{
+	atomic_notifier_call_chain(&restart_handler_list, reboot_mode, cmd);
+}
+
 void migrate_to_reboot_cpu(void)
 {
 	/* The boot cpu is always logical cpu 0 */
-- 
cgit v1.2.3


From 6ae61fbf38d0cd2aa922eb5e7241e9b0bfd7009d Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Wed, 13 Aug 2014 12:45:19 +0100
Subject: misc: st_kim: Increase size of dev_name buffer to incorporate
 termination

Calling strncpy with a maximum size argument of 32 bytes on destination
array kim_gdata->dev_name of size 32 bytes might leave the destination
string unterminated.

Cc: gregkh@linuxfoundation.org
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/ti_wilink_st.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ti_wilink_st.h b/include/linux/ti_wilink_st.h
index 932b76392248..884d6263e962 100644
--- a/include/linux/ti_wilink_st.h
+++ b/include/linux/ti_wilink_st.h
@@ -268,7 +268,7 @@ struct kim_data_s {
 	struct st_data_s *core_data;
 	struct chip_version version;
 	unsigned char ldisc_install;
-	unsigned char dev_name[UART_DEV_NAME_LEN];
+	unsigned char dev_name[UART_DEV_NAME_LEN + 1];
 	unsigned char flow_cntrl;
 	unsigned long baud_rate;
 };
-- 
cgit v1.2.3


From 48018943eb906d81e48f40675c17b92abfeafcf1 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@linaro.org>
Date: Wed, 13 Aug 2014 11:42:46 +0100
Subject: mfd: wm5102: Mark register write sequencer control 3 readable

During init the core checks if the wm5102 has finished starting by reading
register 0x19 and looking at the value. This read always fails since this
is not a readable register, mark it as being one. While we're at it provide
a constant for the register name (as supplied by Charles Keepax).

Signed-off-by: Mark Brown <broonie@linaro.org>
Signed-off-by: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/arizona-core.c            | 3 ++-
 drivers/mfd/wm5102-tables.c           | 1 +
 include/linux/mfd/arizona/registers.h | 1 +
 3 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/arizona-core.c b/drivers/mfd/arizona-core.c
index 10a0cb90619a..7eabc36b97c1 100644
--- a/drivers/mfd/arizona-core.c
+++ b/drivers/mfd/arizona-core.c
@@ -784,7 +784,8 @@ int arizona_dev_init(struct arizona *arizona)
 	/* Ensure device startup is complete */
 	switch (arizona->type) {
 	case WM5102:
-		ret = regmap_read(arizona->regmap, 0x19, &val);
+		ret = regmap_read(arizona->regmap,
+				  ARIZONA_WRITE_SEQUENCER_CTRL_3, &val);
 		if (ret != 0)
 			dev_err(dev,
 				"Failed to check write sequencer state: %d\n",
diff --git a/drivers/mfd/wm5102-tables.c b/drivers/mfd/wm5102-tables.c
index a219888d7f4b..77e16e59a45d 100644
--- a/drivers/mfd/wm5102-tables.c
+++ b/drivers/mfd/wm5102-tables.c
@@ -1017,6 +1017,7 @@ static bool wm5102_readable_register(struct device *dev, unsigned int reg)
 	case ARIZONA_WRITE_SEQUENCER_CTRL_0:
 	case ARIZONA_WRITE_SEQUENCER_CTRL_1:
 	case ARIZONA_WRITE_SEQUENCER_CTRL_2:
+	case ARIZONA_WRITE_SEQUENCER_CTRL_3:
 	case ARIZONA_WRITE_SEQUENCER_PROM:
 	case ARIZONA_TONE_GENERATOR_1:
 	case ARIZONA_TONE_GENERATOR_2:
diff --git a/include/linux/mfd/arizona/registers.h b/include/linux/mfd/arizona/registers.h
index dbd23c36de21..68913ec90969 100644
--- a/include/linux/mfd/arizona/registers.h
+++ b/include/linux/mfd/arizona/registers.h
@@ -27,6 +27,7 @@
 #define ARIZONA_WRITE_SEQUENCER_CTRL_0           0x16
 #define ARIZONA_WRITE_SEQUENCER_CTRL_1           0x17
 #define ARIZONA_WRITE_SEQUENCER_CTRL_2           0x18
+#define ARIZONA_WRITE_SEQUENCER_CTRL_3           0x19
 #define ARIZONA_WRITE_SEQUENCER_PROM             0x1A
 #define ARIZONA_TONE_GENERATOR_1                 0x20
 #define ARIZONA_TONE_GENERATOR_2                 0x21
-- 
cgit v1.2.3


From 4b5c1f1e080f79c3c226596047a20ccd1c8a9486 Mon Sep 17 00:00:00 2001
From: Jacek Anaszewski <j.anaszewski@samsung.com>
Date: Wed, 20 Aug 2014 15:43:39 +0200
Subject: mfd: max77693: Fix register enum name

According to the MAX77693 documentation the name of
the register is FLASH_STATUS.

Signed-off-by: Jacek Anaszewski <j.anaszewski@samsung.com>
Acked-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/max77693-private.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/max77693-private.h b/include/linux/mfd/max77693-private.h
index c466ff3e16b8..615f12141854 100644
--- a/include/linux/mfd/max77693-private.h
+++ b/include/linux/mfd/max77693-private.h
@@ -46,7 +46,7 @@ enum max77693_pmic_reg {
 	MAX77693_LED_REG_VOUT_FLASH2			= 0x0C,
 	MAX77693_LED_REG_FLASH_INT			= 0x0E,
 	MAX77693_LED_REG_FLASH_INT_MASK			= 0x0F,
-	MAX77693_LED_REG_FLASH_INT_STATUS		= 0x10,
+	MAX77693_LED_REG_FLASH_STATUS			= 0x10,
 
 	MAX77693_PMIC_REG_PMIC_ID1			= 0x20,
 	MAX77693_PMIC_REG_PMIC_ID2			= 0x21,
-- 
cgit v1.2.3


From a0bc607208e295f70d0355fa4e632a0c8c27533b Mon Sep 17 00:00:00 2001
From: Jacek Anaszewski <j.anaszewski@samsung.com>
Date: Fri, 22 Aug 2014 11:06:18 +0200
Subject: mfd: max77693: Improve support for the flash cell

This patch improves support for the flash cell of
max77693 mfd by adding relevant of_compatible field
and a structure for caching related platform data.
Added are also FLASH registers related macro definitions.

Signed-off-by: Jacek Anaszewski <j.anaszewski@samsung.com>
Signed-off-by: Andrzej Hajda <a.hajda@samsung.com>
Acked-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/max77693.c               |  5 ++-
 include/linux/mfd/max77693-private.h | 59 ++++++++++++++++++++++++++++++++++++
 include/linux/mfd/max77693.h         | 40 ++++++++++++++++++++++++
 3 files changed, 103 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/max77693.c b/drivers/mfd/max77693.c
index 249c139ef04a..cf008f45968c 100644
--- a/drivers/mfd/max77693.c
+++ b/drivers/mfd/max77693.c
@@ -44,9 +44,12 @@
 static const struct mfd_cell max77693_devs[] = {
 	{ .name = "max77693-pmic", },
 	{ .name = "max77693-charger", },
-	{ .name = "max77693-flash", },
 	{ .name = "max77693-muic", },
 	{ .name = "max77693-haptic", },
+	{
+		.name = "max77693-flash",
+		.of_compatible = "maxim,max77693-flash",
+	},
 };
 
 static const struct regmap_config max77693_regmap_config = {
diff --git a/include/linux/mfd/max77693-private.h b/include/linux/mfd/max77693-private.h
index 615f12141854..de063773f3b1 100644
--- a/include/linux/mfd/max77693-private.h
+++ b/include/linux/mfd/max77693-private.h
@@ -85,6 +85,65 @@ enum max77693_pmic_reg {
 	MAX77693_PMIC_REG_END,
 };
 
+/* MAX77693 ITORCH register */
+#define TORCH_IOUT1_SHIFT	0
+#define TORCH_IOUT2_SHIFT	4
+#define TORCH_IOUT_MIN		15625
+#define TORCH_IOUT_MAX		250000
+#define TORCH_IOUT_STEP		15625
+
+/* MAX77693 IFLASH1 and IFLASH2 registers */
+#define FLASH_IOUT_MIN		15625
+#define FLASH_IOUT_MAX_1LED	1000000
+#define FLASH_IOUT_MAX_2LEDS	625000
+#define FLASH_IOUT_STEP		15625
+
+/* MAX77693 TORCH_TIMER register */
+#define TORCH_TMR_NO_TIMER	0x40
+#define TORCH_TIMEOUT_MIN	262000
+#define TORCH_TIMEOUT_MAX	15728000
+
+/* MAX77693 FLASH_TIMER register */
+#define FLASH_TMR_LEVEL		0x80
+#define FLASH_TIMEOUT_MIN	62500
+#define FLASH_TIMEOUT_MAX	1000000
+#define FLASH_TIMEOUT_STEP	62500
+
+/* MAX77693 FLASH_EN register */
+#define FLASH_EN_OFF		0x0
+#define FLASH_EN_FLASH		0x1
+#define FLASH_EN_TORCH		0x2
+#define FLASH_EN_ON		0x3
+#define FLASH_EN_SHIFT(x)	(6 - ((x) - 1) * 2)
+#define TORCH_EN_SHIFT(x)	(2 - ((x) - 1) * 2)
+
+/* MAX77693 MAX_FLASH1 register */
+#define MAX_FLASH1_MAX_FL_EN	0x80
+#define MAX_FLASH1_VSYS_MIN	2400
+#define MAX_FLASH1_VSYS_MAX	3400
+#define MAX_FLASH1_VSYS_STEP	33
+
+/* MAX77693 VOUT_CNTL register */
+#define FLASH_BOOST_FIXED	0x04
+#define FLASH_BOOST_LEDNUM_2	0x80
+
+/* MAX77693 VOUT_FLASH1 register */
+#define FLASH_VOUT_MIN		3300
+#define FLASH_VOUT_MAX		5500
+#define FLASH_VOUT_STEP		25
+#define FLASH_VOUT_RMIN		0x0c
+
+/* MAX77693 FLASH_STATUS register */
+#define FLASH_STATUS_FLASH_ON	BIT(3)
+#define FLASH_STATUS_TORCH_ON	BIT(2)
+
+/* MAX77693 FLASH_INT register */
+#define FLASH_INT_FLED2_OPEN	BIT(0)
+#define FLASH_INT_FLED2_SHORT	BIT(1)
+#define FLASH_INT_FLED1_OPEN	BIT(2)
+#define FLASH_INT_FLED1_SHORT	BIT(3)
+#define FLASH_INT_OVER_CURRENT	BIT(4)
+
 /* MAX77693 CHG_CNFG_00 register */
 #define CHG_CNFG_00_CHG_MASK		0x1
 #define CHG_CNFG_00_BUCK_MASK		0x4
diff --git a/include/linux/mfd/max77693.h b/include/linux/mfd/max77693.h
index 3f3dc45f93ee..f0b6585cd874 100644
--- a/include/linux/mfd/max77693.h
+++ b/include/linux/mfd/max77693.h
@@ -63,6 +63,45 @@ struct max77693_muic_platform_data {
 	int path_uart;
 };
 
+/* MAX77693 led flash */
+
+/* triggers */
+enum max77693_led_trigger {
+	MAX77693_LED_TRIG_OFF,
+	MAX77693_LED_TRIG_FLASH,
+	MAX77693_LED_TRIG_TORCH,
+	MAX77693_LED_TRIG_EXT,
+	MAX77693_LED_TRIG_SOFT,
+};
+
+/* trigger types */
+enum max77693_led_trigger_type {
+	MAX77693_LED_TRIG_TYPE_EDGE,
+	MAX77693_LED_TRIG_TYPE_LEVEL,
+};
+
+/* boost modes */
+enum max77693_led_boost_mode {
+	MAX77693_LED_BOOST_NONE,
+	MAX77693_LED_BOOST_ADAPTIVE,
+	MAX77693_LED_BOOST_FIXED,
+};
+
+struct max77693_led_platform_data {
+	u32 fleds[2];
+	u32 iout_torch[2];
+	u32 iout_flash[2];
+	u32 trigger[2];
+	u32 trigger_type[2];
+	u32 num_leds;
+	u32 boost_mode;
+	u32 flash_timeout;
+	u32 boost_vout;
+	u32 low_vsys;
+};
+
+/* MAX77693 */
+
 struct max77693_platform_data {
 	/* regulator data */
 	struct max77693_regulator_data *regulators;
@@ -70,5 +109,6 @@ struct max77693_platform_data {
 
 	/* muic data */
 	struct max77693_muic_platform_data *muic_data;
+	struct max77693_led_platform_data *led_data;
 };
 #endif	/* __LINUX_MFD_MAX77693_H */
-- 
cgit v1.2.3


From dfa52c852dd3fcf3c0e696ab2d7df0bf91b2aed9 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Thu, 28 Aug 2014 13:52:50 +0200
Subject: mfd: ti_ssp: Remove unused header

The header file include/linux/mfd/ti_ssp.h does not seem to be used
anywhere. It was orphaned by 3033ee62 "mfd: Remove obsolete ti-ssp
driver". Remove it.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/ti_ssp.h | 93 ----------------------------------------------
 1 file changed, 93 deletions(-)
 delete mode 100644 include/linux/mfd/ti_ssp.h

(limited to 'include/linux')

diff --git a/include/linux/mfd/ti_ssp.h b/include/linux/mfd/ti_ssp.h
deleted file mode 100644
index dbb4b43bd20e..000000000000
--- a/include/linux/mfd/ti_ssp.h
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * Sequencer Serial Port (SSP) driver for Texas Instruments' SoCs
- *
- * Copyright (C) 2010 Texas Instruments Inc
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-#ifndef __TI_SSP_H__
-#define __TI_SSP_H__
-
-struct ti_ssp_dev_data {
-	const char	*dev_name;
-	void		*pdata;
-	size_t		pdata_size;
-};
-
-struct ti_ssp_data {
-	unsigned long		out_clock;
-	struct ti_ssp_dev_data	dev_data[2];
-};
-
-struct ti_ssp_spi_data {
-	unsigned long	iosel;
-	int		num_cs;
-	void		(*select)(int cs);
-};
-
-/*
- * Sequencer port IO pin configuration bits.  These do not correlate 1-1 with
- * the hardware.  The iosel field in the port data combines iosel1 and iosel2,
- * and is therefore not a direct map to register space.  It is best to use the
- * macros below to construct iosel values.
- *
- * least significant 16 bits --> iosel1
- * most significant 16 bits  --> iosel2
- */
-
-#define SSP_IN			0x0000
-#define SSP_DATA		0x0001
-#define SSP_CLOCK		0x0002
-#define SSP_CHIPSEL		0x0003
-#define SSP_OUT			0x0004
-#define SSP_PIN_SEL(pin, v)	((v) << ((pin) * 3))
-#define SSP_PIN_MASK(pin)	SSP_PIN_SEL(pin, 0x7)
-#define SSP_INPUT_SEL(pin)	((pin) << 16)
-
-/* Sequencer port config bits */
-#define SSP_EARLY_DIN		BIT(8)
-#define SSP_DELAY_DOUT		BIT(9)
-
-/* Sequence map definitions */
-#define SSP_CLK_HIGH		BIT(0)
-#define SSP_CLK_LOW		0
-#define SSP_DATA_HIGH		BIT(1)
-#define SSP_DATA_LOW		0
-#define SSP_CS_HIGH		BIT(2)
-#define SSP_CS_LOW		0
-#define SSP_OUT_MODE		BIT(3)
-#define SSP_IN_MODE		0
-#define SSP_DATA_REG		BIT(4)
-#define SSP_ADDR_REG		0
-
-#define SSP_OPCODE_DIRECT	((0x0) << 5)
-#define SSP_OPCODE_TOGGLE	((0x1) << 5)
-#define SSP_OPCODE_SHIFT	((0x2) << 5)
-#define SSP_OPCODE_BRANCH0	((0x4) << 5)
-#define SSP_OPCODE_BRANCH1	((0x5) << 5)
-#define SSP_OPCODE_BRANCH	((0x6) << 5)
-#define SSP_OPCODE_STOP		((0x7) << 5)
-#define SSP_BRANCH(addr)	((addr) << 8)
-#define SSP_COUNT(cycles)	((cycles) << 8)
-
-int ti_ssp_raw_read(struct device *dev);
-int ti_ssp_raw_write(struct device *dev, u32 val);
-int ti_ssp_load(struct device *dev, int offs, u32* prog, int len);
-int ti_ssp_run(struct device *dev, u32 pc, u32 input, u32 *output);
-int ti_ssp_set_mode(struct device *dev, int mode);
-int ti_ssp_set_iosel(struct device *dev, u32 iosel);
-
-#endif /* __TI_SSP_H__ */
-- 
cgit v1.2.3


From c593aca4e42a24b229a8070c4eb50d01f54877f2 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Fri, 22 Aug 2014 18:49:03 +0200
Subject: mfd: davinci_voicecodec: Fix 'if defined' guard type in header

The include guard doesn't work as intended due to the transposition
typo DAVINCI -> DAVINIC.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/davinci_voicecodec.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/davinci_voicecodec.h b/include/linux/mfd/davinci_voicecodec.h
index 5166935ce66d..cb01496bfa49 100644
--- a/include/linux/mfd/davinci_voicecodec.h
+++ b/include/linux/mfd/davinci_voicecodec.h
@@ -21,7 +21,7 @@
  */
 
 #ifndef __LINUX_MFD_DAVINCI_VOICECODEC_H_
-#define __LINUX_MFD_DAVINIC_VOICECODEC_H_
+#define __LINUX_MFD_DAVINCI_VOICECODEC_H_
 
 #include <linux/kernel.h>
 #include <linux/platform_device.h>
-- 
cgit v1.2.3


From 9bb9e29c78f8d8ee310987fd58a2b908a4ce0c40 Mon Sep 17 00:00:00 2001
From: Beniamino Galvani <b.galvani@gmail.com>
Date: Sat, 30 Aug 2014 14:50:23 +0200
Subject: mfd: Add Ricoh RN5T618 PMIC core driver

Ricoh RN5T618 is a power management IC which integrates 3 step-down
DCDC converters, 7 low-dropout regulators, a Li-ion battery charger,
fuel gauge, ADC, GPIOs and a watchdog timer.

This commit adds a MFD core driver to support the I2C communication
with the device.

Signed-off-by: Beniamino Galvani <b.galvani@gmail.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/Kconfig         |  11 +++
 drivers/mfd/Makefile        |   1 +
 drivers/mfd/rn5t618.c       | 134 ++++++++++++++++++++++++++
 include/linux/mfd/rn5t618.h | 228 ++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 374 insertions(+)
 create mode 100644 drivers/mfd/rn5t618.c
 create mode 100644 include/linux/mfd/rn5t618.h

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index fc419da8d722..82f70dcab136 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -597,6 +597,17 @@ config MFD_RC5T583
 	  Additional drivers must be enabled in order to use the
 	  different functionality of the device.
 
+config MFD_RN5T618
+	tristate "Ricoh RN5T5618 PMIC"
+	depends on I2C
+	select MFD_CORE
+	select REGMAP_I2C
+	help
+	  Say yes here to add support for the Ricoh RN5T618 PMIC. This
+	  driver provides common support for accessing the device,
+	  additional drivers must be enabled in order to use the
+	  functionality of the device.
+
 config MFD_SEC_CORE
 	bool "SAMSUNG Electronics PMIC Series Support"
 	depends on I2C=y
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index d58068aa8aa9..aa5a73a5ba47 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -160,6 +160,7 @@ obj-$(CONFIG_MFD_INTEL_MSIC)	+= intel_msic.o
 obj-$(CONFIG_MFD_PALMAS)	+= palmas.o
 obj-$(CONFIG_MFD_VIPERBOARD)    += viperboard.o
 obj-$(CONFIG_MFD_RC5T583)	+= rc5t583.o rc5t583-irq.o
+obj-$(CONFIG_MFD_RN5T618)	+= rn5t618.o
 obj-$(CONFIG_MFD_SEC_CORE)	+= sec-core.o sec-irq.o
 obj-$(CONFIG_MFD_SYSCON)	+= syscon.o
 obj-$(CONFIG_MFD_LM3533)	+= lm3533-core.o lm3533-ctrlbank.o
diff --git a/drivers/mfd/rn5t618.c b/drivers/mfd/rn5t618.c
new file mode 100644
index 000000000000..666857192dbe
--- /dev/null
+++ b/drivers/mfd/rn5t618.c
@@ -0,0 +1,134 @@
+/*
+ * MFD core driver for Ricoh RN5T618 PMIC
+ *
+ * Copyright (C) 2014 Beniamino Galvani <b.galvani@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/i2c.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/rn5t618.h>
+#include <linux/module.h>
+#include <linux/regmap.h>
+
+static const struct mfd_cell rn5t618_cells[] = {
+	{ .name = "rn5t618-regulator" },
+	{ .name = "rn5t618-wdt" },
+};
+
+static bool rn5t618_volatile_reg(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case RN5T618_WATCHDOGCNT:
+	case RN5T618_DCIRQ:
+	case RN5T618_ILIMDATAH ... RN5T618_AIN0DATAL:
+	case RN5T618_IR_ADC1 ... RN5T618_IR_ADC3:
+	case RN5T618_IR_GPR:
+	case RN5T618_IR_GPF:
+	case RN5T618_MON_IOIN:
+	case RN5T618_INTMON:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static const struct regmap_config rn5t618_regmap_config = {
+	.reg_bits	= 8,
+	.val_bits	= 8,
+	.volatile_reg	= rn5t618_volatile_reg,
+	.max_register	= RN5T618_MAX_REG,
+	.cache_type	= REGCACHE_RBTREE,
+};
+
+static struct rn5t618 *rn5t618_pm_power_off;
+
+static void rn5t618_power_off(void)
+{
+	/* disable automatic repower-on */
+	regmap_update_bits(rn5t618_pm_power_off->regmap, RN5T618_REPCNT,
+			   RN5T618_REPCNT_REPWRON, 0);
+	/* start power-off sequence */
+	regmap_update_bits(rn5t618_pm_power_off->regmap, RN5T618_SLPCNT,
+			   RN5T618_SLPCNT_SWPWROFF, RN5T618_SLPCNT_SWPWROFF);
+}
+
+static int rn5t618_i2c_probe(struct i2c_client *i2c,
+			     const struct i2c_device_id *id)
+{
+	struct rn5t618 *priv;
+	int ret;
+
+	priv = devm_kzalloc(&i2c->dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	i2c_set_clientdata(i2c, priv);
+
+	priv->regmap = devm_regmap_init_i2c(i2c, &rn5t618_regmap_config);
+	if (IS_ERR(priv->regmap)) {
+		ret = PTR_ERR(priv->regmap);
+		dev_err(&i2c->dev, "regmap init failed: %d\n", ret);
+		return ret;
+	}
+
+	ret = mfd_add_devices(&i2c->dev, -1, rn5t618_cells,
+			      ARRAY_SIZE(rn5t618_cells), NULL, 0, NULL);
+	if (ret) {
+		dev_err(&i2c->dev, "failed to add sub-devices: %d\n", ret);
+		return ret;
+	}
+
+	if (!pm_power_off) {
+		rn5t618_pm_power_off = priv;
+		pm_power_off = rn5t618_power_off;
+	}
+
+	return 0;
+}
+
+static int rn5t618_i2c_remove(struct i2c_client *i2c)
+{
+	struct rn5t618 *priv = i2c_get_clientdata(i2c);
+
+	if (priv == rn5t618_pm_power_off) {
+		rn5t618_pm_power_off = NULL;
+		pm_power_off = NULL;
+	}
+
+	mfd_remove_devices(&i2c->dev);
+	return 0;
+}
+
+static const struct of_device_id rn5t618_of_match[] = {
+	{ .compatible = "ricoh,rn5t618" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, rn5t618_of_match);
+
+static const struct i2c_device_id rn5t618_i2c_id[] = {
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, rn5t618_i2c_id);
+
+static struct i2c_driver rn5t618_i2c_driver = {
+	.driver = {
+		.name = "rn5t618",
+		.of_match_table = of_match_ptr(rn5t618_of_match),
+	},
+	.probe = rn5t618_i2c_probe,
+	.remove = rn5t618_i2c_remove,
+	.id_table = rn5t618_i2c_id,
+};
+
+module_i2c_driver(rn5t618_i2c_driver);
+
+MODULE_AUTHOR("Beniamino Galvani <b.galvani@gmail.com>");
+MODULE_DESCRIPTION("Ricoh RN5T618 MFD driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/mfd/rn5t618.h b/include/linux/mfd/rn5t618.h
new file mode 100644
index 000000000000..c72d5344f3b3
--- /dev/null
+++ b/include/linux/mfd/rn5t618.h
@@ -0,0 +1,228 @@
+/*
+ * MFD core driver for Ricoh RN5T618 PMIC
+ *
+ * Copyright (C) 2014 Beniamino Galvani <b.galvani@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __LINUX_MFD_RN5T618_H
+#define __LINUX_MFD_RN5T618_H
+
+#include <linux/regmap.h>
+
+#define RN5T618_LSIVER			0x00
+#define RN5T618_OTPVER			0x01
+#define RN5T618_IODAC			0x02
+#define RN5T618_VINDAC			0x03
+#define RN5T618_CPUCNT			0x06
+#define RN5T618_PSWR			0x07
+#define RN5T618_PONHIS			0x09
+#define RN5T618_POFFHIS			0x0a
+#define RN5T618_WATCHDOG		0x0b
+#define RN5T618_WATCHDOGCNT		0x0c
+#define RN5T618_PWRFUNC			0x0d
+#define RN5T618_SLPCNT			0x0e
+#define RN5T618_REPCNT			0x0f
+#define RN5T618_PWRONTIMSET		0x10
+#define RN5T618_NOETIMSETCNT		0x11
+#define RN5T618_PWRIREN			0x12
+#define RN5T618_PWRIRQ			0x13
+#define RN5T618_PWRMON			0x14
+#define RN5T618_PWRIRSEL		0x15
+#define RN5T618_DC1_SLOT		0x16
+#define RN5T618_DC2_SLOT		0x17
+#define RN5T618_DC3_SLOT		0x18
+#define RN5T618_LDO1_SLOT		0x1b
+#define RN5T618_LDO2_SLOT		0x1c
+#define RN5T618_LDO3_SLOT		0x1d
+#define RN5T618_LDO4_SLOT		0x1e
+#define RN5T618_LDO5_SLOT		0x1f
+#define RN5T618_PSO0_SLOT		0x25
+#define RN5T618_PSO1_SLOT		0x26
+#define RN5T618_PSO2_SLOT		0x27
+#define RN5T618_PSO3_SLOT		0x28
+#define RN5T618_LDORTC1_SLOT		0x2a
+#define RN5T618_DC1CTL			0x2c
+#define RN5T618_DC1CTL2			0x2d
+#define RN5T618_DC2CTL			0x2e
+#define RN5T618_DC2CTL2			0x2f
+#define RN5T618_DC3CTL			0x30
+#define RN5T618_DC3CTL2			0x31
+#define RN5T618_DC1DAC			0x36
+#define RN5T618_DC2DAC			0x37
+#define RN5T618_DC3DAC			0x38
+#define RN5T618_DC1DAC_SLP		0x3b
+#define RN5T618_DC2DAC_SLP		0x3c
+#define RN5T618_DC3DAC_SLP		0x3d
+#define RN5T618_DCIREN			0x40
+#define RN5T618_DCIRQ			0x41
+#define RN5T618_DCIRMON			0x42
+#define RN5T618_LDOEN1			0x44
+#define RN5T618_LDOEN2			0x45
+#define RN5T618_LDODIS			0x46
+#define RN5T618_LDO1DAC			0x4c
+#define RN5T618_LDO2DAC			0x4d
+#define RN5T618_LDO3DAC			0x4e
+#define RN5T618_LDO4DAC			0x4f
+#define RN5T618_LDO5DAC			0x50
+#define RN5T618_LDORTCDAC		0x56
+#define RN5T618_LDORTC2DAC		0x57
+#define RN5T618_LDO1DAC_SLP		0x58
+#define RN5T618_LDO2DAC_SLP		0x59
+#define RN5T618_LDO3DAC_SLP		0x5a
+#define RN5T618_LDO4DAC_SLP		0x5b
+#define RN5T618_LDO5DAC_SLP		0x5c
+#define RN5T618_ADCCNT1			0x64
+#define RN5T618_ADCCNT2			0x65
+#define RN5T618_ADCCNT3			0x66
+#define RN5T618_ILIMDATAH		0x68
+#define RN5T618_ILIMDATAL		0x69
+#define RN5T618_VBATDATAH		0x6a
+#define RN5T618_VBATDATAL		0x6b
+#define RN5T618_VADPDATAH		0x6c
+#define RN5T618_VADPDATAL		0x6d
+#define RN5T618_VUSBDATAH		0x6e
+#define RN5T618_VUSBDATAL		0x6f
+#define RN5T618_VSYSDATAH		0x70
+#define RN5T618_VSYSDATAL		0x71
+#define RN5T618_VTHMDATAH		0x72
+#define RN5T618_VTHMDATAL		0x73
+#define RN5T618_AIN1DATAH		0x74
+#define RN5T618_AIN1DATAL		0x75
+#define RN5T618_AIN0DATAH		0x76
+#define RN5T618_AIN0DATAL		0x77
+#define RN5T618_ILIMTHL			0x78
+#define RN5T618_ILIMTHH			0x79
+#define RN5T618_VBATTHL			0x7a
+#define RN5T618_VBATTHH			0x7b
+#define RN5T618_VADPTHL			0x7c
+#define RN5T618_VADPTHH			0x7d
+#define RN5T618_VUSBTHL			0x7e
+#define RN5T618_VUSBTHH			0x7f
+#define RN5T618_VSYSTHL			0x80
+#define RN5T618_VSYSTHH			0x81
+#define RN5T618_VTHMTHL			0x82
+#define RN5T618_VTHMTHH			0x83
+#define RN5T618_AIN1THL			0x84
+#define RN5T618_AIN1THH			0x85
+#define RN5T618_AIN0THL			0x86
+#define RN5T618_AIN0THH			0x87
+#define RN5T618_EN_ADCIR1		0x88
+#define RN5T618_EN_ADCIR2		0x89
+#define RN5T618_EN_ADCIR3		0x8a
+#define RN5T618_IR_ADC1			0x8c
+#define RN5T618_IR_ADC2			0x8d
+#define RN5T618_IR_ADC3			0x8e
+#define RN5T618_IOSEL			0x90
+#define RN5T618_IOOUT			0x91
+#define RN5T618_GPEDGE1			0x92
+#define RN5T618_GPEDGE2			0x93
+#define RN5T618_EN_GPIR			0x94
+#define RN5T618_IR_GPR			0x95
+#define RN5T618_IR_GPF			0x96
+#define RN5T618_MON_IOIN		0x97
+#define RN5T618_GPLED_FUNC		0x98
+#define RN5T618_INTPOL			0x9c
+#define RN5T618_INTEN			0x9d
+#define RN5T618_INTMON			0x9e
+#define RN5T618_PREVINDAC		0xb0
+#define RN5T618_BATDAC			0xb1
+#define RN5T618_CHGCTL1			0xb3
+#define RN5T618_CHGCTL2			0xb4
+#define RN5T618_VSYSSET			0xb5
+#define RN5T618_REGISET1		0xb6
+#define RN5T618_REGISET2		0xb7
+#define RN5T618_CHGISET			0xb8
+#define RN5T618_TIMSET			0xb9
+#define RN5T618_BATSET1			0xba
+#define RN5T618_BATSET2			0xbb
+#define RN5T618_DIESET			0xbc
+#define RN5T618_CHGSTATE		0xbd
+#define RN5T618_CHGCTRL_IRFMASK		0xbe
+#define RN5T618_CHGSTAT_IRFMASK1	0xbf
+#define RN5T618_CHGSTAT_IRFMASK2	0xc0
+#define RN5T618_CHGERR_IRFMASK		0xc1
+#define RN5T618_CHGCTRL_IRR		0xc2
+#define RN5T618_CHGSTAT_IRR1		0xc3
+#define RN5T618_CHGSTAT_IRR2		0xc4
+#define RN5T618_CHGERR_IRR		0xc5
+#define RN5T618_CHGCTRL_MONI		0xc6
+#define RN5T618_CHGSTAT_MONI1		0xc7
+#define RN5T618_CHGSTAT_MONI2		0xc8
+#define RN5T618_CHGERR_MONI		0xc9
+#define RN5T618_CHGCTRL_DETMOD1		0xca
+#define RN5T618_CHGCTRL_DETMOD2		0xcb
+#define RN5T618_CHGSTAT_DETMOD1		0xcc
+#define RN5T618_CHGSTAT_DETMOD2		0xcd
+#define RN5T618_CHGSTAT_DETMOD3		0xce
+#define RN5T618_CHGERR_DETMOD1		0xcf
+#define RN5T618_CHGERR_DETMOD2		0xd0
+#define RN5T618_CHGOSCCTL		0xd4
+#define RN5T618_CHGOSCSCORESET1		0xd5
+#define RN5T618_CHGOSCSCORESET2		0xd6
+#define RN5T618_CHGOSCSCORESET3		0xd7
+#define RN5T618_CHGOSCFREQSET1		0xd8
+#define RN5T618_CHGOSCFREQSET2		0xd9
+#define RN5T618_CONTROL			0xe0
+#define RN5T618_SOC			0xe1
+#define RN5T618_RE_CAP_H		0xe2
+#define RN5T618_RE_CAP_L		0xe3
+#define RN5T618_FA_CAP_H		0xe4
+#define RN5T618_FA_CAP_L		0xe5
+#define RN5T618_AGE			0xe6
+#define RN5T618_TT_EMPTY_H		0xe7
+#define RN5T618_TT_EMPTY_L		0xe8
+#define RN5T618_TT_FULL_H		0xe9
+#define RN5T618_TT_FULL_L		0xea
+#define RN5T618_VOLTAGE_1		0xeb
+#define RN5T618_VOLTAGE_0		0xec
+#define RN5T618_TEMP_1			0xed
+#define RN5T618_TEMP_0			0xee
+#define RN5T618_CC_CTRL			0xef
+#define RN5T618_CC_COUNT2		0xf0
+#define RN5T618_CC_COUNT1		0xf1
+#define RN5T618_CC_COUNT0		0xf2
+#define RN5T618_CC_SUMREG3		0xf3
+#define RN5T618_CC_SUMREG2		0xf4
+#define RN5T618_CC_SUMREG1		0xf5
+#define RN5T618_CC_SUMREG0		0xf6
+#define RN5T618_CC_OFFREG1		0xf7
+#define RN5T618_CC_OFFREG0		0xf8
+#define RN5T618_CC_GAINREG1		0xf9
+#define RN5T618_CC_GAINREG0		0xfa
+#define RN5T618_CC_AVEREG1		0xfb
+#define RN5T618_CC_AVEREG0		0xfc
+#define RN5T618_MAX_REG			0xfc
+
+#define RN5T618_REPCNT_REPWRON		BIT(0)
+#define RN5T618_SLPCNT_SWPWROFF		BIT(0)
+#define RN5T618_WATCHDOG_WDOGEN		BIT(2)
+#define RN5T618_WATCHDOG_WDOGTIM_M	(BIT(0) | BIT(1))
+#define RN5T618_WATCHDOG_WDOGTIM_S	0
+#define RN5T618_PWRIRQ_IR_WDOG		BIT(6)
+
+enum {
+	RN5T618_DCDC1,
+	RN5T618_DCDC2,
+	RN5T618_DCDC3,
+	RN5T618_LDO1,
+	RN5T618_LDO2,
+	RN5T618_LDO3,
+	RN5T618_LDO4,
+	RN5T618_LDO5,
+	RN5T618_LDORTC1,
+	RN5T618_LDORTC2,
+	RN5T618_REG_NUM,
+};
+
+struct rn5t618 {
+	struct regmap *regmap;
+};
+
+#endif /* __LINUX_MFD_RN5T618_H */
-- 
cgit v1.2.3


From c24084db223aec7793201b94f0712cfdfa7e9fe7 Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Date: Mon, 1 Sep 2014 15:48:52 +0100
Subject: mfd: arizona: Add ASYNC_SAMPLE_RATE_2 registers

Some arizona devices have a second asynchronous sample rate, add the
registers necessary to support this.

Signed-off-by: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Acked-by: Mark Brown <broonie@linaro.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/wm5102-tables.c           |  3 +++
 drivers/mfd/wm5110-tables.c           |  4 ++++
 include/linux/mfd/arizona/registers.h | 28 ++++++++++++++++++++++------
 sound/soc/codecs/arizona.c            |  2 +-
 4 files changed, 30 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/wm5102-tables.c b/drivers/mfd/wm5102-tables.c
index 6d9b7f832bf5..d6f35bbf795b 100644
--- a/drivers/mfd/wm5102-tables.c
+++ b/drivers/mfd/wm5102-tables.c
@@ -1059,6 +1059,8 @@ static bool wm5102_readable_register(struct device *dev, unsigned int reg)
 	case ARIZONA_ASYNC_CLOCK_1:
 	case ARIZONA_ASYNC_SAMPLE_RATE_1:
 	case ARIZONA_ASYNC_SAMPLE_RATE_1_STATUS:
+	case ARIZONA_ASYNC_SAMPLE_RATE_2:
+	case ARIZONA_ASYNC_SAMPLE_RATE_2_STATUS:
 	case ARIZONA_OUTPUT_SYSTEM_CLOCK:
 	case ARIZONA_OUTPUT_ASYNC_CLOCK:
 	case ARIZONA_RATE_ESTIMATOR_1:
@@ -1892,6 +1894,7 @@ static bool wm5102_volatile_register(struct device *dev, unsigned int reg)
 	case ARIZONA_SAMPLE_RATE_3_STATUS:
 	case ARIZONA_HAPTICS_STATUS:
 	case ARIZONA_ASYNC_SAMPLE_RATE_1_STATUS:
+	case ARIZONA_ASYNC_SAMPLE_RATE_2_STATUS:
 	case ARIZONA_FLL1_NCO_TEST_0:
 	case ARIZONA_FLL2_NCO_TEST_0:
 	case ARIZONA_DAC_COMP_1:
diff --git a/drivers/mfd/wm5110-tables.c b/drivers/mfd/wm5110-tables.c
index beae0a397ee1..4642b5b816a0 100644
--- a/drivers/mfd/wm5110-tables.c
+++ b/drivers/mfd/wm5110-tables.c
@@ -702,6 +702,7 @@ static const struct reg_default wm5110_reg_default[] = {
 	{ 0x00000104, 0x0011 },    /* R260   - Sample rate 3 */
 	{ 0x00000112, 0x0305 },    /* R274   - Async clock 1 */
 	{ 0x00000113, 0x0011 },    /* R275   - Async sample rate 1 */
+	{ 0x00000114, 0x0011 },    /* R276   - Async sample rate 2 */
 	{ 0x00000149, 0x0000 },    /* R329   - Output system clock */
 	{ 0x0000014A, 0x0000 },    /* R330   - Output async clock */
 	{ 0x00000152, 0x0000 },    /* R338   - Rate Estimator 1 */
@@ -1738,6 +1739,8 @@ static bool wm5110_readable_register(struct device *dev, unsigned int reg)
 	case ARIZONA_ASYNC_CLOCK_1:
 	case ARIZONA_ASYNC_SAMPLE_RATE_1:
 	case ARIZONA_ASYNC_SAMPLE_RATE_1_STATUS:
+	case ARIZONA_ASYNC_SAMPLE_RATE_2:
+	case ARIZONA_ASYNC_SAMPLE_RATE_2_STATUS:
 	case ARIZONA_OUTPUT_SYSTEM_CLOCK:
 	case ARIZONA_OUTPUT_ASYNC_CLOCK:
 	case ARIZONA_RATE_ESTIMATOR_1:
@@ -2820,6 +2823,7 @@ static bool wm5110_volatile_register(struct device *dev, unsigned int reg)
 	case ARIZONA_SAMPLE_RATE_2_STATUS:
 	case ARIZONA_SAMPLE_RATE_3_STATUS:
 	case ARIZONA_ASYNC_SAMPLE_RATE_1_STATUS:
+	case ARIZONA_ASYNC_SAMPLE_RATE_2_STATUS:
 	case ARIZONA_MIC_DETECT_3:
 	case ARIZONA_HEADPHONE_DETECT_2:
 	case ARIZONA_INPUT_ENABLES_STATUS:
diff --git a/include/linux/mfd/arizona/registers.h b/include/linux/mfd/arizona/registers.h
index 68913ec90969..c0b075f6bc35 100644
--- a/include/linux/mfd/arizona/registers.h
+++ b/include/linux/mfd/arizona/registers.h
@@ -71,7 +71,9 @@
 #define ARIZONA_SAMPLE_RATE_3_STATUS             0x10C
 #define ARIZONA_ASYNC_CLOCK_1                    0x112
 #define ARIZONA_ASYNC_SAMPLE_RATE_1              0x113
+#define ARIZONA_ASYNC_SAMPLE_RATE_2              0x114
 #define ARIZONA_ASYNC_SAMPLE_RATE_1_STATUS       0x11B
+#define ARIZONA_ASYNC_SAMPLE_RATE_2_STATUS       0x11C
 #define ARIZONA_OUTPUT_SYSTEM_CLOCK              0x149
 #define ARIZONA_OUTPUT_ASYNC_CLOCK               0x14A
 #define ARIZONA_RATE_ESTIMATOR_1                 0x152
@@ -1665,16 +1667,30 @@
 /*
  * R275 (0x113) - Async sample rate 1
  */
-#define ARIZONA_ASYNC_SAMPLE_RATE_MASK           0x001F  /* ASYNC_SAMPLE_RATE - [4:0] */
-#define ARIZONA_ASYNC_SAMPLE_RATE_SHIFT               0  /* ASYNC_SAMPLE_RATE - [4:0] */
-#define ARIZONA_ASYNC_SAMPLE_RATE_WIDTH               5  /* ASYNC_SAMPLE_RATE - [4:0] */
+#define ARIZONA_ASYNC_SAMPLE_RATE_1_MASK         0x001F  /* ASYNC_SAMPLE_RATE_1 - [4:0] */
+#define ARIZONA_ASYNC_SAMPLE_RATE_1_SHIFT             0  /* ASYNC_SAMPLE_RATE_1 - [4:0] */
+#define ARIZONA_ASYNC_SAMPLE_RATE_1_WIDTH             5  /* ASYNC_SAMPLE_RATE_1 - [4:0] */
+
+/*
+ * R276 (0x114) - Async sample rate 2
+ */
+#define ARIZONA_ASYNC_SAMPLE_RATE_2_MASK         0x001F  /* ASYNC_SAMPLE_RATE_2 - [4:0] */
+#define ARIZONA_ASYNC_SAMPLE_RATE_2_SHIFT             0  /* ASYNC_SAMPLE_RATE_2 - [4:0] */
+#define ARIZONA_ASYNC_SAMPLE_RATE_2_WIDTH             5  /* ASYNC_SAMPLE_RATE_2 - [4:0] */
 
 /*
  * R283 (0x11B) - Async sample rate 1 status
  */
-#define ARIZONA_ASYNC_SAMPLE_RATE_STS_MASK       0x001F  /* ASYNC_SAMPLE_RATE_STS - [4:0] */
-#define ARIZONA_ASYNC_SAMPLE_RATE_STS_SHIFT           0  /* ASYNC_SAMPLE_RATE_STS - [4:0] */
-#define ARIZONA_ASYNC_SAMPLE_RATE_STS_WIDTH           5  /* ASYNC_SAMPLE_RATE_STS - [4:0] */
+#define ARIZONA_ASYNC_SAMPLE_RATE_1_STS_MASK     0x001F  /* ASYNC_SAMPLE_RATE_1_STS - [4:0] */
+#define ARIZONA_ASYNC_SAMPLE_RATE_1_STS_SHIFT         0  /* ASYNC_SAMPLE_RATE_1_STS - [4:0] */
+#define ARIZONA_ASYNC_SAMPLE_RATE_1_STS_WIDTH         5  /* ASYNC_SAMPLE_RATE_1_STS - [4:0] */
+
+/*
+ * R284 (0x11C) - Async sample rate 2 status
+ */
+#define ARIZONA_ASYNC_SAMPLE_RATE_2_STS_MASK     0x001F  /* ASYNC_SAMPLE_RATE_2_STS - [4:0] */
+#define ARIZONA_ASYNC_SAMPLE_RATE_2_STS_SHIFT         0  /* ASYNC_SAMPLE_RATE_2_STS - [4:0] */
+#define ARIZONA_ASYNC_SAMPLE_RATE_2_STS_WIDTH         5  /* ASYNC_SAMPLE_RATE_2_STS - [4:0] */
 
 /*
  * R329 (0x149) - Output system clock
diff --git a/sound/soc/codecs/arizona.c b/sound/soc/codecs/arizona.c
index 2c71f16bd661..0c05e7a7945f 100644
--- a/sound/soc/codecs/arizona.c
+++ b/sound/soc/codecs/arizona.c
@@ -1220,7 +1220,7 @@ static int arizona_hw_params_rate(struct snd_pcm_substream *substream,
 		break;
 	case ARIZONA_CLK_ASYNCCLK:
 		snd_soc_update_bits(codec, ARIZONA_ASYNC_SAMPLE_RATE_1,
-				    ARIZONA_ASYNC_SAMPLE_RATE_MASK, sr_val);
+				    ARIZONA_ASYNC_SAMPLE_RATE_1_MASK, sr_val);
 		if (base)
 			snd_soc_update_bits(codec, base + ARIZONA_AIF_RATE_CTRL,
 					    ARIZONA_AIF1_RATE_MASK,
-- 
cgit v1.2.3


From e9e9d3973594cadd9e892bc79f914f299bb61124 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Sat, 16 Aug 2014 21:23:40 +0800
Subject: mfd: da9052: Avoid setting read_flag_mask for da9052-i2c driver

Current code init regmap with &da9052_regmap_config for both da9052-spi and
da9052-i2c drivers. da9052-spi sets the read_flag_mask.
The same setting may be applied for da9052-i2c if da9052-spi driver is loaded
first because they actually use the same regmap_config setting.
Fix this issue by using a local variable for regmap_config in da9052-spi driver,
so the settings in spi driver won't impact the settings in i2c driver.
Also makes da9052_regmap_config const to avoid similar issue.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Acked-by: Adam Thomson <Adam.Thomson.Opensource@diasemi.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/da9052-core.c         | 2 +-
 drivers/mfd/da9052-spi.c          | 7 ++++---
 include/linux/mfd/da9052/da9052.h | 2 +-
 3 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/da9052-core.c b/drivers/mfd/da9052-core.c
index e8af816d73a9..52a0c2f6264f 100644
--- a/drivers/mfd/da9052-core.c
+++ b/drivers/mfd/da9052-core.c
@@ -522,7 +522,7 @@ static const struct mfd_cell da9052_subdev_info[] = {
 	},
 };
 
-struct regmap_config da9052_regmap_config = {
+const struct regmap_config da9052_regmap_config = {
 	.reg_bits = 8,
 	.val_bits = 8,
 
diff --git a/drivers/mfd/da9052-spi.c b/drivers/mfd/da9052-spi.c
index 17666b40b70c..45ae0b7d13ef 100644
--- a/drivers/mfd/da9052-spi.c
+++ b/drivers/mfd/da9052-spi.c
@@ -23,6 +23,7 @@
 
 static int da9052_spi_probe(struct spi_device *spi)
 {
+	struct regmap_config config;
 	int ret;
 	const struct spi_device_id *id = spi_get_device_id(spi);
 	struct da9052 *da9052;
@@ -40,10 +41,10 @@ static int da9052_spi_probe(struct spi_device *spi)
 
 	spi_set_drvdata(spi, da9052);
 
-	da9052_regmap_config.read_flag_mask = 1;
-	da9052_regmap_config.write_flag_mask = 0;
+	config = da9052_regmap_config;
+	config.read_flag_mask = 1;
 
-	da9052->regmap = devm_regmap_init_spi(spi, &da9052_regmap_config);
+	da9052->regmap = devm_regmap_init_spi(spi, &config);
 	if (IS_ERR(da9052->regmap)) {
 		ret = PTR_ERR(da9052->regmap);
 		dev_err(&spi->dev, "Failed to allocate register map: %d\n",
diff --git a/include/linux/mfd/da9052/da9052.h b/include/linux/mfd/da9052/da9052.h
index bba65f51a0b5..c18a4c19d6fc 100644
--- a/include/linux/mfd/da9052/da9052.h
+++ b/include/linux/mfd/da9052/da9052.h
@@ -211,7 +211,7 @@ static inline int da9052_reg_update(struct da9052 *da9052, unsigned char reg,
 int da9052_device_init(struct da9052 *da9052, u8 chip_id);
 void da9052_device_exit(struct da9052 *da9052);
 
-extern struct regmap_config da9052_regmap_config;
+extern const struct regmap_config da9052_regmap_config;
 
 int da9052_irq_init(struct da9052 *da9052);
 int da9052_irq_exit(struct da9052 *da9052);
-- 
cgit v1.2.3


From f69a7cf74d5536faa180437581be2a9c0aad1bb1 Mon Sep 17 00:00:00 2001
From: Chris Zhong <zyw@rock-chips.com>
Date: Wed, 3 Sep 2014 21:51:44 +0800
Subject: mfd: RK808: Add new mfd driver for RK808

The RK808 chip is a power management IC for multimedia and handheld
devices. It contains the following components:

- Regulators
- RTC
- Clkout

The RK808 core driver is registered as a platform driver and provides
communication through I2C with the host device for the different
components.

Signed-off-by: Chris Zhong <zyw@rock-chips.com>
Signed-off-by: Zhang Qing <zhangqing@rock-chips.com>
Tested-by: Heiko <heiko@sntech.de>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/Kconfig       |  13 +++
 drivers/mfd/Makefile      |   1 +
 drivers/mfd/rk808.c       | 245 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/rk808.h | 196 +++++++++++++++++++++++++++++++++++++
 4 files changed, 455 insertions(+)
 create mode 100644 drivers/mfd/rk808.c
 create mode 100644 include/linux/mfd/rk808.h

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 82f70dcab136..049796a28215 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -597,6 +597,19 @@ config MFD_RC5T583
 	  Additional drivers must be enabled in order to use the
 	  different functionality of the device.
 
+config MFD_RK808
+	tristate "Rockchip RK808 Power Management chip"
+	depends on I2C && OF
+	select MFD_CORE
+	select REGMAP_I2C
+	select REGMAP_IRQ
+	help
+	  If you say yes here you get support for the RK808
+	  Power Management chips.
+	  This driver provides common support for accessing the device
+	  through I2C interface. The device supports multiple sub-devices
+	  including interrupts, RTC, LDO & DCDC regulators, and onkey.
+
 config MFD_RN5T618
 	tristate "Ricoh RN5T5618 PMIC"
 	depends on I2C
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index aa5a73a5ba47..bd111213bd58 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -160,6 +160,7 @@ obj-$(CONFIG_MFD_INTEL_MSIC)	+= intel_msic.o
 obj-$(CONFIG_MFD_PALMAS)	+= palmas.o
 obj-$(CONFIG_MFD_VIPERBOARD)    += viperboard.o
 obj-$(CONFIG_MFD_RC5T583)	+= rc5t583.o rc5t583-irq.o
+obj-$(CONFIG_MFD_RK808)		+= rk808.o
 obj-$(CONFIG_MFD_RN5T618)	+= rn5t618.o
 obj-$(CONFIG_MFD_SEC_CORE)	+= sec-core.o sec-irq.o
 obj-$(CONFIG_MFD_SYSCON)	+= syscon.o
diff --git a/drivers/mfd/rk808.c b/drivers/mfd/rk808.c
new file mode 100644
index 000000000000..0324422b3b10
--- /dev/null
+++ b/drivers/mfd/rk808.c
@@ -0,0 +1,245 @@
+/*
+ * MFD core driver for Rockchip RK808
+ *
+ * Copyright (c) 2014, Fuzhou Rockchip Electronics Co., Ltd
+ *
+ * Author: Chris Zhong <zyw@rock-chips.com>
+ * Author: Zhang Qing <zhangqing@rock-chips.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/mfd/rk808.h>
+#include <linux/mfd/core.h>
+#include <linux/module.h>
+#include <linux/regmap.h>
+
+struct rk808_reg_data {
+	int addr;
+	int mask;
+	int value;
+};
+
+static const struct regmap_config rk808_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+	.max_register = RK808_IO_POL_REG,
+};
+
+static struct resource rtc_resources[] = {
+	{
+		.start  = RK808_IRQ_RTC_ALARM,
+		.end    = RK808_IRQ_RTC_ALARM,
+		.flags  = IORESOURCE_IRQ,
+	}
+};
+
+static const struct mfd_cell rk808s[] = {
+	{ .name = "rk808-clkout", },
+	{ .name = "rk808-regulator", },
+	{
+		.name = "rk808-rtc",
+		.num_resources = ARRAY_SIZE(rtc_resources),
+		.resources = &rtc_resources[0],
+	},
+};
+
+static const struct rk808_reg_data pre_init_reg[] = {
+	{ RK808_BUCK3_CONFIG_REG, BUCK_ILMIN_MASK,  BUCK_ILMIN_150MA },
+	{ RK808_BUCK4_CONFIG_REG, BUCK_ILMIN_MASK,  BUCK_ILMIN_200MA },
+	{ RK808_BOOST_CONFIG_REG, BOOST_ILMIN_MASK, BOOST_ILMIN_100MA },
+	{ RK808_BUCK1_CONFIG_REG, BUCK1_RATE_MASK,  BUCK_ILMIN_200MA },
+	{ RK808_BUCK2_CONFIG_REG, BUCK2_RATE_MASK,  BUCK_ILMIN_200MA },
+	{ RK808_VB_MON_REG,       MASK_ALL,         VB_LO_ACT |
+						    VB_LO_SEL_3500MV },
+};
+
+static const struct regmap_irq rk808_irqs[] = {
+	/* INT_STS */
+	[RK808_IRQ_VOUT_LO] = {
+		.mask = RK808_IRQ_VOUT_LO_MSK,
+		.reg_offset = 0,
+	},
+	[RK808_IRQ_VB_LO] = {
+		.mask = RK808_IRQ_VB_LO_MSK,
+		.reg_offset = 0,
+	},
+	[RK808_IRQ_PWRON] = {
+		.mask = RK808_IRQ_PWRON_MSK,
+		.reg_offset = 0,
+	},
+	[RK808_IRQ_PWRON_LP] = {
+		.mask = RK808_IRQ_PWRON_LP_MSK,
+		.reg_offset = 0,
+	},
+	[RK808_IRQ_HOTDIE] = {
+		.mask = RK808_IRQ_HOTDIE_MSK,
+		.reg_offset = 0,
+	},
+	[RK808_IRQ_RTC_ALARM] = {
+		.mask = RK808_IRQ_RTC_ALARM_MSK,
+		.reg_offset = 0,
+	},
+	[RK808_IRQ_RTC_PERIOD] = {
+		.mask = RK808_IRQ_RTC_PERIOD_MSK,
+		.reg_offset = 0,
+	},
+
+	/* INT_STS2 */
+	[RK808_IRQ_PLUG_IN_INT] = {
+		.mask = RK808_IRQ_PLUG_IN_INT_MSK,
+		.reg_offset = 1,
+	},
+	[RK808_IRQ_PLUG_OUT_INT] = {
+		.mask = RK808_IRQ_PLUG_OUT_INT_MSK,
+		.reg_offset = 1,
+	},
+};
+
+static struct regmap_irq_chip rk808_irq_chip = {
+	.name = "rk808",
+	.irqs = rk808_irqs,
+	.num_irqs = ARRAY_SIZE(rk808_irqs),
+	.num_regs = 2,
+	.irq_reg_stride = 2,
+	.status_base = RK808_INT_STS_REG1,
+	.mask_base = RK808_INT_STS_MSK_REG1,
+	.ack_base = RK808_INT_STS_REG1,
+	.init_ack_masked = true,
+};
+
+static struct i2c_client *rk808_i2c_client;
+static void rk808_device_shutdown(void)
+{
+	int ret;
+	struct rk808 *rk808 = i2c_get_clientdata(rk808_i2c_client);
+
+	if (!rk808) {
+		dev_warn(&rk808_i2c_client->dev,
+			 "have no rk808, so do nothing here\n");
+		return;
+	}
+
+	ret = regmap_update_bits(rk808->regmap,
+				 RK808_DEVCTRL_REG,
+				 DEV_OFF_RST, DEV_OFF_RST);
+	if (ret)
+		dev_err(&rk808_i2c_client->dev, "power off error!\n");
+}
+
+static int rk808_probe(struct i2c_client *client,
+		       const struct i2c_device_id *id)
+{
+	struct device_node *np = client->dev.of_node;
+	struct rk808 *rk808;
+	int pm_off = 0;
+	int ret;
+	int i;
+
+	if (!client->irq) {
+		dev_err(&client->dev, "No interrupt support, no core IRQ\n");
+		return -EINVAL;
+	}
+
+	rk808 = devm_kzalloc(&client->dev, sizeof(*rk808), GFP_KERNEL);
+	if (!rk808)
+		return -ENOMEM;
+
+	rk808->regmap = devm_regmap_init_i2c(client, &rk808_regmap_config);
+	if (IS_ERR(rk808->regmap)) {
+		dev_err(&client->dev, "regmap initialization failed\n");
+		return PTR_ERR(rk808->regmap);
+	}
+
+	for (i = 0; i < ARRAY_SIZE(pre_init_reg); i++) {
+		ret = regmap_update_bits(rk808->regmap, pre_init_reg[i].addr,
+					 pre_init_reg[i].mask,
+					 pre_init_reg[i].value);
+		if (ret) {
+			dev_err(&client->dev,
+				"0x%x write err\n", pre_init_reg[i].addr);
+			return ret;
+		}
+	}
+
+	ret = regmap_add_irq_chip(rk808->regmap, client->irq,
+				  IRQF_ONESHOT, -1,
+				  &rk808_irq_chip, &rk808->irq_data);
+	if (ret) {
+		dev_err(&client->dev, "Failed to add irq_chip %d\n", ret);
+		return ret;
+	}
+
+	rk808->i2c = client;
+	i2c_set_clientdata(client, rk808);
+
+	ret = mfd_add_devices(&client->dev, -1,
+			      rk808s, ARRAY_SIZE(rk808s),
+			      NULL, 0, regmap_irq_get_domain(rk808->irq_data));
+	if (ret) {
+		dev_err(&client->dev, "failed to add MFD devices %d\n", ret);
+		goto err_irq;
+	}
+
+	pm_off = of_property_read_bool(np,
+				"rockchip,system-power-controller");
+	if (pm_off && !pm_power_off) {
+		rk808_i2c_client = client;
+		pm_power_off = rk808_device_shutdown;
+	}
+
+	return 0;
+
+err_irq:
+	regmap_del_irq_chip(client->irq, rk808->irq_data);
+	return ret;
+}
+
+static int rk808_remove(struct i2c_client *client)
+{
+	struct rk808 *rk808 = i2c_get_clientdata(client);
+
+	regmap_del_irq_chip(client->irq, rk808->irq_data);
+	mfd_remove_devices(&client->dev);
+	pm_power_off = NULL;
+
+	return 0;
+}
+
+static struct of_device_id rk808_of_match[] = {
+	{ .compatible = "rockchip,rk808" },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, rk808_of_match);
+
+static const struct i2c_device_id rk808_ids[] = {
+	{ "rk808" },
+	{ },
+};
+MODULE_DEVICE_TABLE(i2c, rk808_ids);
+
+static struct i2c_driver rk808_i2c_driver = {
+	.driver = {
+		.name = "rk808",
+		.of_match_table = rk808_of_match,
+	},
+	.probe    = rk808_probe,
+	.remove   = rk808_remove,
+	.id_table = rk808_ids,
+};
+
+module_i2c_driver(rk808_i2c_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Chris Zhong <zyw@rock-chips.com>");
+MODULE_AUTHOR("Zhang Qing <zhangqing@rock-chips.com>");
+MODULE_DESCRIPTION("RK808 PMIC driver");
diff --git a/include/linux/mfd/rk808.h b/include/linux/mfd/rk808.h
new file mode 100644
index 000000000000..fb09312d854b
--- /dev/null
+++ b/include/linux/mfd/rk808.h
@@ -0,0 +1,196 @@
+/*
+ * rk808.h for Rockchip RK808
+ *
+ * Copyright (c) 2014, Fuzhou Rockchip Electronics Co., Ltd
+ *
+ * Author: Chris Zhong <zyw@rock-chips.com>
+ * Author: Zhang Qing <zhangqing@rock-chips.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __LINUX_REGULATOR_rk808_H
+#define __LINUX_REGULATOR_rk808_H
+
+#include <linux/regulator/machine.h>
+#include <linux/regmap.h>
+
+/*
+ * rk808 Global Register Map.
+ */
+
+#define RK808_DCDC1	0 /* (0+RK808_START) */
+#define RK808_LDO1	4 /* (4+RK808_START) */
+#define RK808_NUM_REGULATORS   14
+
+enum rk808_reg {
+	RK808_ID_DCDC1,
+	RK808_ID_DCDC2,
+	RK808_ID_DCDC3,
+	RK808_ID_DCDC4,
+	RK808_ID_LDO1,
+	RK808_ID_LDO2,
+	RK808_ID_LDO3,
+	RK808_ID_LDO4,
+	RK808_ID_LDO5,
+	RK808_ID_LDO6,
+	RK808_ID_LDO7,
+	RK808_ID_LDO8,
+	RK808_ID_SWITCH1,
+	RK808_ID_SWITCH2,
+};
+
+#define RK808_SECONDS_REG	0x00
+#define RK808_MINUTES_REG	0x01
+#define RK808_HOURS_REG		0x02
+#define RK808_DAYS_REG		0x03
+#define RK808_MONTHS_REG	0x04
+#define RK808_YEARS_REG		0x05
+#define RK808_WEEKS_REG		0x06
+#define RK808_ALARM_SECONDS_REG	0x08
+#define RK808_ALARM_MINUTES_REG	0x09
+#define RK808_ALARM_HOURS_REG	0x0a
+#define RK808_ALARM_DAYS_REG	0x0b
+#define RK808_ALARM_MONTHS_REG	0x0c
+#define RK808_ALARM_YEARS_REG	0x0d
+#define RK808_RTC_CTRL_REG	0x10
+#define RK808_RTC_STATUS_REG	0x11
+#define RK808_RTC_INT_REG	0x12
+#define RK808_RTC_COMP_LSB_REG	0x13
+#define RK808_RTC_COMP_MSB_REG	0x14
+#define RK808_CLK32OUT_REG	0x20
+#define RK808_VB_MON_REG	0x21
+#define RK808_THERMAL_REG	0x22
+#define RK808_DCDC_EN_REG	0x23
+#define RK808_LDO_EN_REG	0x24
+#define RK808_SLEEP_SET_OFF_REG1	0x25
+#define RK808_SLEEP_SET_OFF_REG2	0x26
+#define RK808_DCDC_UV_STS_REG	0x27
+#define RK808_DCDC_UV_ACT_REG	0x28
+#define RK808_LDO_UV_STS_REG	0x29
+#define RK808_LDO_UV_ACT_REG	0x2a
+#define RK808_DCDC_PG_REG	0x2b
+#define RK808_LDO_PG_REG	0x2c
+#define RK808_VOUT_MON_TDB_REG	0x2d
+#define RK808_BUCK1_CONFIG_REG		0x2e
+#define RK808_BUCK1_ON_VSEL_REG		0x2f
+#define RK808_BUCK1_SLP_VSEL_REG	0x30
+#define RK808_BUCK1_DVS_VSEL_REG	0x31
+#define RK808_BUCK2_CONFIG_REG		0x32
+#define RK808_BUCK2_ON_VSEL_REG		0x33
+#define RK808_BUCK2_SLP_VSEL_REG	0x34
+#define RK808_BUCK2_DVS_VSEL_REG	0x35
+#define RK808_BUCK3_CONFIG_REG		0x36
+#define RK808_BUCK4_CONFIG_REG		0x37
+#define RK808_BUCK4_ON_VSEL_REG		0x38
+#define RK808_BUCK4_SLP_VSEL_REG	0x39
+#define RK808_BOOST_CONFIG_REG		0x3a
+#define RK808_LDO1_ON_VSEL_REG		0x3b
+#define RK808_LDO1_SLP_VSEL_REG		0x3c
+#define RK808_LDO2_ON_VSEL_REG		0x3d
+#define RK808_LDO2_SLP_VSEL_REG		0x3e
+#define RK808_LDO3_ON_VSEL_REG		0x3f
+#define RK808_LDO3_SLP_VSEL_REG		0x40
+#define RK808_LDO4_ON_VSEL_REG		0x41
+#define RK808_LDO4_SLP_VSEL_REG		0x42
+#define RK808_LDO5_ON_VSEL_REG		0x43
+#define RK808_LDO5_SLP_VSEL_REG		0x44
+#define RK808_LDO6_ON_VSEL_REG		0x45
+#define RK808_LDO6_SLP_VSEL_REG		0x46
+#define RK808_LDO7_ON_VSEL_REG		0x47
+#define RK808_LDO7_SLP_VSEL_REG		0x48
+#define RK808_LDO8_ON_VSEL_REG		0x49
+#define RK808_LDO8_SLP_VSEL_REG		0x4a
+#define RK808_DEVCTRL_REG	0x4b
+#define RK808_INT_STS_REG1	0x4c
+#define RK808_INT_STS_MSK_REG1	0x4d
+#define RK808_INT_STS_REG2	0x4e
+#define RK808_INT_STS_MSK_REG2	0x4f
+#define RK808_IO_POL_REG	0x50
+
+/* IRQ Definitions */
+#define RK808_IRQ_VOUT_LO	0
+#define RK808_IRQ_VB_LO		1
+#define RK808_IRQ_PWRON		2
+#define RK808_IRQ_PWRON_LP	3
+#define RK808_IRQ_HOTDIE	4
+#define RK808_IRQ_RTC_ALARM	5
+#define RK808_IRQ_RTC_PERIOD	6
+#define RK808_IRQ_PLUG_IN_INT	7
+#define RK808_IRQ_PLUG_OUT_INT	8
+#define RK808_NUM_IRQ		9
+
+#define RK808_IRQ_VOUT_LO_MSK		BIT(0)
+#define RK808_IRQ_VB_LO_MSK		BIT(1)
+#define RK808_IRQ_PWRON_MSK		BIT(2)
+#define RK808_IRQ_PWRON_LP_MSK		BIT(3)
+#define RK808_IRQ_HOTDIE_MSK		BIT(4)
+#define RK808_IRQ_RTC_ALARM_MSK		BIT(5)
+#define RK808_IRQ_RTC_PERIOD_MSK	BIT(6)
+#define RK808_IRQ_PLUG_IN_INT_MSK	BIT(0)
+#define RK808_IRQ_PLUG_OUT_INT_MSK	BIT(1)
+
+#define RK808_VBAT_LOW_2V8	0x00
+#define RK808_VBAT_LOW_2V9	0x01
+#define RK808_VBAT_LOW_3V0	0x02
+#define RK808_VBAT_LOW_3V1	0x03
+#define RK808_VBAT_LOW_3V2	0x04
+#define RK808_VBAT_LOW_3V3	0x05
+#define RK808_VBAT_LOW_3V4	0x06
+#define RK808_VBAT_LOW_3V5	0x07
+#define VBAT_LOW_VOL_MASK	(0x07 << 0)
+#define EN_VABT_LOW_SHUT_DOWN	(0x00 << 4)
+#define EN_VBAT_LOW_IRQ		(0x1 << 4)
+#define VBAT_LOW_ACT_MASK	(0x1 << 4)
+
+#define BUCK_ILMIN_MASK		(7 << 0)
+#define BOOST_ILMIN_MASK	(7 << 0)
+#define BUCK1_RATE_MASK		(3 << 3)
+#define BUCK2_RATE_MASK		(3 << 3)
+#define MASK_ALL	0xff
+
+#define SWITCH2_EN	BIT(6)
+#define SWITCH1_EN	BIT(5)
+#define DEV_OFF_RST	BIT(3)
+
+#define VB_LO_ACT		BIT(4)
+#define VB_LO_SEL_3500MV	(7 << 0)
+
+#define VOUT_LO_INT	BIT(0)
+#define CLK32KOUT2_EN	BIT(0)
+
+enum {
+	BUCK_ILMIN_50MA,
+	BUCK_ILMIN_100MA,
+	BUCK_ILMIN_150MA,
+	BUCK_ILMIN_200MA,
+	BUCK_ILMIN_250MA,
+	BUCK_ILMIN_300MA,
+	BUCK_ILMIN_350MA,
+	BUCK_ILMIN_400MA,
+};
+
+enum {
+	BOOST_ILMIN_75MA,
+	BOOST_ILMIN_100MA,
+	BOOST_ILMIN_125MA,
+	BOOST_ILMIN_150MA,
+	BOOST_ILMIN_175MA,
+	BOOST_ILMIN_200MA,
+	BOOST_ILMIN_225MA,
+	BOOST_ILMIN_250MA,
+};
+
+struct rk808 {
+	struct i2c_client *i2c;
+	struct regmap_irq_chip_data *irq_data;
+	struct regmap *regmap;
+};
+#endif /* __LINUX_REGULATOR_rk808_H */
-- 
cgit v1.2.3


From bb048713bba3ead39f6112910906d9fe3f88ede7 Mon Sep 17 00:00:00 2001
From: Josef Ahmad <josef.ahmad@intel.com>
Date: Tue, 2 Sep 2014 13:45:20 +0300
Subject: pci_ids: Add support for Intel Quark ILB

This patch adds the PCI id for Intel Quark ILB.
It will be used for GPIO and Multifunction device driver.

Signed-off-by: Josef Ahmad <josef.ahmad@intel.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/pci_ids.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 6ed0bb73a864..4e82195b1695 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2557,6 +2557,7 @@
 #define PCI_DEVICE_ID_INTEL_MFD_EMMC0	0x0823
 #define PCI_DEVICE_ID_INTEL_MFD_EMMC1	0x0824
 #define PCI_DEVICE_ID_INTEL_MRST_SD2	0x084F
+#define PCI_DEVICE_ID_INTEL_QUARK_X1000_ILB	0x095E
 #define PCI_DEVICE_ID_INTEL_I960	0x0960
 #define PCI_DEVICE_ID_INTEL_I960RM	0x0962
 #define PCI_DEVICE_ID_INTEL_CENTERTON_ILB	0x0c60
-- 
cgit v1.2.3


From 8bdf87b400271ebc7fbf71e117c299d19a97ebb4 Mon Sep 17 00:00:00 2001
From: Guodong Xu <guodong.xu@linaro.org>
Date: Mon, 1 Sep 2014 16:28:34 +0800
Subject: mfd: Add HI6421 PMIC Core driver

This adds driver to support HiSilicon Hi6421 PMIC. Hi6421 includes multi-
functions, such as regulators, codec, ADCs, Coulomb counter, etc.
This driver includes core APIs _only_.

Drivers for individul components, like voltage regulators, are
implemented in corresponding driver directories and files.

Registers in Hi6421 are memory mapped, so using regmap-mmio API.

Signed-off-by: Guodong Xu <guodong.xu@linaro.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/Kconfig             |  13 +++++
 drivers/mfd/Makefile            |   1 +
 drivers/mfd/hi6421-pmic-core.c  | 113 ++++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/hi6421-pmic.h |  41 +++++++++++++++
 4 files changed, 168 insertions(+)
 create mode 100644 drivers/mfd/hi6421-pmic-core.c
 create mode 100644 include/linux/mfd/hi6421-pmic.h

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 049796a28215..609b7a2144d6 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -210,6 +210,19 @@ config MFD_MC13XXX_I2C
 	help
 	  Select this if your MC13xxx is connected via an I2C bus.
 
+config MFD_HI6421_PMIC
+	tristate "HiSilicon Hi6421 PMU/Codec IC"
+	depends on OF
+	select MFD_CORE
+	select REGMAP_MMIO
+	help
+	  Add support for HiSilicon Hi6421 PMIC. Hi6421 includes multi-
+	  functions, such as regulators, RTC, codec, Coulomb counter, etc.
+	  This driver includes core APIs _only_. You have to select
+	  individul components like voltage regulators under corresponding
+	  menus in order to enable them.
+	  We communicate with the Hi6421 via memory-mapped I/O.
+
 config HTC_EGPIO
 	bool "HTC EGPIO support"
 	depends on GPIOLIB && ARM
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index bd111213bd58..1d11a247abbb 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -172,6 +172,7 @@ obj-$(CONFIG_MFD_AS3722)	+= as3722.o
 obj-$(CONFIG_MFD_STW481X)	+= stw481x.o
 obj-$(CONFIG_MFD_IPAQ_MICRO)	+= ipaq-micro.o
 obj-$(CONFIG_MFD_MENF21BMC)	+= menf21bmc.o
+obj-$(CONFIG_MFD_HI6421_PMIC)	+= hi6421-pmic-core.o
 
 intel-soc-pmic-objs		:= intel_soc_pmic_core.o intel_soc_pmic_crc.o
 obj-$(CONFIG_INTEL_SOC_PMIC)	+= intel-soc-pmic.o
diff --git a/drivers/mfd/hi6421-pmic-core.c b/drivers/mfd/hi6421-pmic-core.c
new file mode 100644
index 000000000000..321a2656fd00
--- /dev/null
+++ b/drivers/mfd/hi6421-pmic-core.c
@@ -0,0 +1,113 @@
+/*
+ * Device driver for Hi6421 IC
+ *
+ * Copyright (c) <2011-2014> HiSilicon Technologies Co., Ltd.
+ *              http://www.hisilicon.com
+ * Copyright (c) <2013-2014> Linaro Ltd.
+ *              http://www.linaro.org
+ *
+ * Author: Guodong Xu <guodong.xu@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/mfd/core.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/mfd/hi6421-pmic.h>
+
+static const struct mfd_cell hi6421_devs[] = {
+	{ .name = "hi6421-regulator", },
+};
+
+static struct regmap_config hi6421_regmap_config = {
+	.reg_bits = 32,
+	.reg_stride = 4,
+	.val_bits = 8,
+	.max_register = HI6421_REG_TO_BUS_ADDR(HI6421_REG_MAX),
+};
+
+static int hi6421_pmic_probe(struct platform_device *pdev)
+{
+	struct hi6421_pmic *pmic;
+	struct resource *res;
+	void __iomem *base;
+	int ret;
+
+	pmic = devm_kzalloc(&pdev->dev, sizeof(*pmic), GFP_KERNEL);
+	if (!pmic)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	pmic->regmap = devm_regmap_init_mmio_clk(&pdev->dev, NULL, base,
+						 &hi6421_regmap_config);
+	if (IS_ERR(pmic->regmap)) {
+		dev_err(&pdev->dev,
+			"regmap init failed: %ld\n", PTR_ERR(pmic->regmap));
+		return PTR_ERR(pmic->regmap);
+	}
+
+	/* set over-current protection debounce 8ms */
+	regmap_update_bits(pmic->regmap, HI6421_OCP_DEB_CTRL_REG,
+				(HI6421_OCP_DEB_SEL_MASK
+				 | HI6421_OCP_EN_DEBOUNCE_MASK
+				 | HI6421_OCP_AUTO_STOP_MASK),
+				(HI6421_OCP_DEB_SEL_8MS
+				 | HI6421_OCP_EN_DEBOUNCE_ENABLE));
+
+	platform_set_drvdata(pdev, pmic);
+
+	ret = mfd_add_devices(&pdev->dev, 0, hi6421_devs,
+			ARRAY_SIZE(hi6421_devs), NULL, 0, NULL);
+	if (ret) {
+		dev_err(&pdev->dev, "add mfd devices failed: %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int hi6421_pmic_remove(struct platform_device *pdev)
+{
+	mfd_remove_devices(&pdev->dev);
+
+	return 0;
+}
+
+static struct of_device_id of_hi6421_pmic_match_tbl[] = {
+	{ .compatible = "hisilicon,hi6421-pmic", },
+	{ },
+};
+
+static struct platform_driver hi6421_pmic_driver = {
+	.driver = {
+		.name	= "hi6421_pmic",
+		.of_match_table = of_hi6421_pmic_match_tbl,
+	},
+	.probe	= hi6421_pmic_probe,
+	.remove	= hi6421_pmic_remove,
+};
+module_platform_driver(hi6421_pmic_driver);
+
+MODULE_AUTHOR("Guodong Xu <guodong.xu@linaro.org>");
+MODULE_DESCRIPTION("Hi6421 PMIC driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/mfd/hi6421-pmic.h b/include/linux/mfd/hi6421-pmic.h
new file mode 100644
index 000000000000..587273e35acf
--- /dev/null
+++ b/include/linux/mfd/hi6421-pmic.h
@@ -0,0 +1,41 @@
+/*
+ * Header file for device driver Hi6421 PMIC
+ *
+ * Copyright (c) <2011-2014> HiSilicon Technologies Co., Ltd.
+ *              http://www.hisilicon.com
+ * Copyright (c) <2013-2014> Linaro Ltd.
+ *              http://www.linaro.org
+ *
+ * Author: Guodong Xu <guodong.xu@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef	__HI6421_PMIC_H
+#define	__HI6421_PMIC_H
+
+/* Hi6421 registers are mapped to memory bus in 4 bytes stride */
+#define HI6421_REG_TO_BUS_ADDR(x)	(x << 2)
+
+/* Hi6421 maximum register number */
+#define HI6421_REG_MAX			0xFF
+
+/* Hi6421 OCP (over current protection) and DEB (debounce) control register */
+#define	HI6421_OCP_DEB_CTRL_REG		HI6421_REG_TO_BUS_ADDR(0x51)
+#define	HI6421_OCP_DEB_SEL_MASK		0x0C
+#define HI6421_OCP_DEB_SEL_8MS		0x00
+#define HI6421_OCP_DEB_SEL_16MS		0x04
+#define HI6421_OCP_DEB_SEL_32MS		0x08
+#define HI6421_OCP_DEB_SEL_64MS		0x0C
+#define HI6421_OCP_EN_DEBOUNCE_MASK	0x02
+#define HI6421_OCP_EN_DEBOUNCE_ENABLE	0x02
+#define HI6421_OCP_AUTO_STOP_MASK	0x01
+#define HI6421_OCP_AUTO_STOP_ENABLE	0x01
+
+struct hi6421_pmic {
+	struct regmap		*regmap;
+};
+
+#endif		/* __HI6421_PMIC_H */
-- 
cgit v1.2.3


From 0b496b4c95c74ba795bc642a6092263ebf905759 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Fri, 5 Sep 2014 22:16:18 +0100
Subject: mfd: tps65217: Tell regmap what registers are valid

Allow regmap to provide debugfs access to the register map by telling it
what registers are valid.

Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/tps65217.c       | 2 ++
 include/linux/mfd/tps65217.h | 2 ++
 2 files changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mfd/tps65217.c b/drivers/mfd/tps65217.c
index 3cc4c7084b92..a8ee52c95f2f 100644
--- a/drivers/mfd/tps65217.c
+++ b/drivers/mfd/tps65217.c
@@ -146,6 +146,8 @@ EXPORT_SYMBOL_GPL(tps65217_clear_bits);
 static struct regmap_config tps65217_regmap_config = {
 	.reg_bits = 8,
 	.val_bits = 8,
+
+	.max_register = TPS65217_REG_MAX,
 };
 
 static const struct of_device_id tps65217_of_match[] = {
diff --git a/include/linux/mfd/tps65217.h b/include/linux/mfd/tps65217.h
index 95d6938737fd..ac7fba44d7e4 100644
--- a/include/linux/mfd/tps65217.h
+++ b/include/linux/mfd/tps65217.h
@@ -60,6 +60,8 @@
 #define TPS65217_REG_SEQ5		0X1D
 #define TPS65217_REG_SEQ6		0X1E
 
+#define TPS65217_REG_MAX		TPS65217_REG_SEQ6
+
 /* Register field definitions */
 #define TPS65217_CHIPID_CHIP_MASK	0xF0
 #define TPS65217_CHIPID_REV_MASK	0x0F
-- 
cgit v1.2.3


From f0933a60d1902c918249d11fb6d9a5ffd581ef5b Mon Sep 17 00:00:00 2001
From: Jeff Lance <j-lance1@ti.com>
Date: Thu, 4 Sep 2014 19:01:57 +0200
Subject: mfd: ti_am335x_tscadc: Update logic in CTRL register for 5-wire TS

The logic in AFE_Pen_Ctrl bitmask in the CTRL register is different for five
wire versus four or eight wire touschscreens. This patch should fix this for
five-wire touch screens. There should be no change needed here for four and
eight wire tousch screens.

Signed-off-by: Jeff Lance <j-lance1@ti.com>
[bigeasy: keep the change mfd only]
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/ti_am335x_tscadc.c       | 30 +++++++++++++++++-------------
 include/linux/mfd/ti_am335x_tscadc.h |  1 +
 2 files changed, 18 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/ti_am335x_tscadc.c b/drivers/mfd/ti_am335x_tscadc.c
index 121add8be456..d877e777cce6 100644
--- a/drivers/mfd/ti_am335x_tscadc.c
+++ b/drivers/mfd/ti_am335x_tscadc.c
@@ -242,18 +242,20 @@ static	int ti_tscadc_probe(struct platform_device *pdev)
 	tscadc_writel(tscadc, REG_CLKDIV, tscadc->clk_div);
 
 	/* Set the control register bits */
-	ctrl = CNTRLREG_STEPCONFIGWRT |
-			CNTRLREG_STEPID;
-	if (tsc_wires > 0)
-		ctrl |= CNTRLREG_4WIRE | CNTRLREG_TSCENB;
+	ctrl = CNTRLREG_STEPCONFIGWRT |	CNTRLREG_STEPID;
 	tscadc_writel(tscadc, REG_CTRL, ctrl);
 
 	/* Set register bits for Idle Config Mode */
-	if (tsc_wires > 0)
+	if (tsc_wires > 0) {
+		tscadc->tsc_wires = tsc_wires;
+		if (tsc_wires == 5)
+			ctrl |= CNTRLREG_5WIRE | CNTRLREG_TSCENB;
+		else
+			ctrl |= CNTRLREG_4WIRE | CNTRLREG_TSCENB;
 		tscadc_idle_config(tscadc);
+	}
 
 	/* Enable the TSC module enable bit */
-	ctrl = tscadc_readl(tscadc, REG_CTRL);
 	ctrl |= CNTRLREG_TSCSSENB;
 	tscadc_writel(tscadc, REG_CTRL, ctrl);
 
@@ -325,21 +327,23 @@ static int tscadc_suspend(struct device *dev)
 static int tscadc_resume(struct device *dev)
 {
 	struct ti_tscadc_dev	*tscadc_dev = dev_get_drvdata(dev);
-	unsigned int restore, ctrl;
+	u32 ctrl;
 
 	pm_runtime_get_sync(dev);
 
 	/* context restore */
 	ctrl = CNTRLREG_STEPCONFIGWRT |	CNTRLREG_STEPID;
-	if (tscadc_dev->tsc_cell != -1)
-		ctrl |= CNTRLREG_TSCENB | CNTRLREG_4WIRE;
 	tscadc_writel(tscadc_dev, REG_CTRL, ctrl);
 
-	if (tscadc_dev->tsc_cell != -1)
+	if (tscadc_dev->tsc_cell != -1) {
+		if (tscadc_dev->tsc_wires == 5)
+			ctrl |= CNTRLREG_5WIRE | CNTRLREG_TSCENB;
+		else
+			ctrl |= CNTRLREG_4WIRE | CNTRLREG_TSCENB;
 		tscadc_idle_config(tscadc_dev);
-	restore = tscadc_readl(tscadc_dev, REG_CTRL);
-	tscadc_writel(tscadc_dev, REG_CTRL,
-			(restore | CNTRLREG_TSCSSENB));
+	}
+	ctrl |= CNTRLREG_TSCSSENB;
+	tscadc_writel(tscadc_dev, REG_CTRL, ctrl);
 
 	tscadc_writel(tscadc_dev, REG_CLKDIV, tscadc_dev->clk_div);
 
diff --git a/include/linux/mfd/ti_am335x_tscadc.h b/include/linux/mfd/ti_am335x_tscadc.h
index fb96c84dada5..e2e70053470e 100644
--- a/include/linux/mfd/ti_am335x_tscadc.h
+++ b/include/linux/mfd/ti_am335x_tscadc.h
@@ -155,6 +155,7 @@ struct ti_tscadc_dev {
 	void __iomem *tscadc_base;
 	int irq;
 	int used_cells;	/* 1-2 */
+	int tsc_wires;
 	int tsc_cell;	/* -1 if not used */
 	int adc_cell;	/* -1 if not used */
 	struct mfd_cell cells[TSCADC_CELLS];
-- 
cgit v1.2.3


From 6ab3430129e258ea31dd214adf1c760dfafde67a Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Tue, 16 Sep 2014 14:52:36 +0300
Subject: mfd: Add ACPI support

If an MFD device is backed by ACPI namespace, we should allow subdevice
drivers to access their corresponding ACPI companion devices through normal
means (e.g using ACPI_COMPANION()).

This patch adds such support to the MFD core. If the MFD parent device
does not specify any ACPI _HID/_CID for the child device, the child
device will share the parent ACPI companion device. Otherwise the child
device will be assigned with the corresponding ACPI companion, if found
in the namespace below the parent.

Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Reviewed-by: Darren Hart <dvhart@linux.intel.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 Documentation/acpi/enumeration.txt | 27 +++++++++++++++++++++++++
 drivers/mfd/mfd-core.c             | 40 ++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/core.h           |  3 +++
 3 files changed, 70 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/acpi/enumeration.txt b/Documentation/acpi/enumeration.txt
index e182be5e3c83..b60d2ab69497 100644
--- a/Documentation/acpi/enumeration.txt
+++ b/Documentation/acpi/enumeration.txt
@@ -312,3 +312,30 @@ a code like this:
 
 There are also devm_* versions of these functions which release the
 descriptors once the device is released.
+
+MFD devices
+~~~~~~~~~~~
+The MFD devices register their children as platform devices. For the child
+devices there needs to be an ACPI handle that they can use to reference
+parts of the ACPI namespace that relate to them. In the Linux MFD subsystem
+we provide two ways:
+
+	o The children share the parent ACPI handle.
+	o The MFD cell can specify the ACPI id of the device.
+
+For the first case, the MFD drivers do not need to do anything. The
+resulting child platform device will have its ACPI_COMPANION() set to point
+to the parent device.
+
+If the ACPI namespace has a device that we can match using an ACPI id,
+the id should be set like:
+
+	static struct mfd_cell my_subdevice_cell = {
+		.name = "my_subdevice",
+		/* set the resources relative to the parent */
+		.acpi_pnpid = "XYZ0001",
+	};
+
+The ACPI id "XYZ0001" is then used to lookup an ACPI device directly under
+the MFD device and if found, that ACPI companion device is bound to the
+resulting child platform device.
diff --git a/drivers/mfd/mfd-core.c b/drivers/mfd/mfd-core.c
index 5d0fbe1e097a..f3338fe9d069 100644
--- a/drivers/mfd/mfd-core.c
+++ b/drivers/mfd/mfd-core.c
@@ -78,6 +78,44 @@ static int mfd_platform_add_cell(struct platform_device *pdev,
 	return 0;
 }
 
+#if IS_ENABLED(CONFIG_ACPI)
+static void mfd_acpi_add_device(const struct mfd_cell *cell,
+				struct platform_device *pdev)
+{
+	struct acpi_device *parent_adev;
+	struct acpi_device *adev;
+
+	parent_adev = ACPI_COMPANION(pdev->dev.parent);
+	if (!parent_adev)
+		return;
+
+	/*
+	 * MFD child device gets its ACPI handle either from the ACPI
+	 * device directly under the parent that matches the acpi_pnpid or
+	 * it will use the parent handle if is no acpi_pnpid is given.
+	 */
+	adev = parent_adev;
+	if (cell->acpi_pnpid) {
+		struct acpi_device_id ids[2] = {};
+		struct acpi_device *child_adev;
+
+		strlcpy(ids[0].id, cell->acpi_pnpid, sizeof(ids[0].id));
+		list_for_each_entry(child_adev, &parent_adev->children, node)
+			if (acpi_match_device_ids(child_adev, ids)) {
+				adev = child_adev;
+				break;
+			}
+	}
+
+	ACPI_COMPANION_SET(&pdev->dev, adev);
+}
+#else
+static inline void mfd_acpi_add_device(const struct mfd_cell *cell,
+				       struct platform_device *pdev)
+{
+}
+#endif
+
 static int mfd_add_device(struct device *parent, int id,
 			  const struct mfd_cell *cell, atomic_t *usage_count,
 			  struct resource *mem_base,
@@ -119,6 +157,8 @@ static int mfd_add_device(struct device *parent, int id,
 		}
 	}
 
+	mfd_acpi_add_device(cell, pdev);
+
 	if (cell->pdata_size) {
 		ret = platform_device_add_data(pdev,
 					cell->platform_data, cell->pdata_size);
diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h
index f543de91ce19..73e1709d4c09 100644
--- a/include/linux/mfd/core.h
+++ b/include/linux/mfd/core.h
@@ -44,6 +44,9 @@ struct mfd_cell {
 	 */
 	const char		*of_compatible;
 
+	/* Matches ACPI PNP id, either _HID or _CID */
+	const char		*acpi_pnpid;
+
 	/*
 	 * These resources can be specified relative to the parent device.
 	 * For accessing hardware you should use resources from the platform dev
-- 
cgit v1.2.3


From 234abab143aef82c0ef1f2de409c0db96b666f3c Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 10 Sep 2014 21:29:56 +0200
Subject: tty: serial: 8250_core: allow to set ->throttle / ->unthrottle
 callbacks

The OMAP UART provides support for HW assisted flow control. What is
missing is the support to throttle / unthrottle callbacks which are used
by the omap-serial driver at the moment.
This patch adds the callbacks. It should be safe to add them since they
are only invoked from the serial_core (uart_throttle()) if the feature
flags are set.

Reviewed-by: Tony Lindgren <tony@atomide.com>
Tested-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_core.c | 14 ++++++++++++++
 include/linux/serial_core.h         |  2 ++
 2 files changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c
index a0c1d64f34c5..68c44d97091b 100644
--- a/drivers/tty/serial/8250/8250_core.c
+++ b/drivers/tty/serial/8250/8250_core.c
@@ -1319,6 +1319,16 @@ static void serial8250_start_tx(struct uart_port *port)
 	}
 }
 
+static void serial8250_throttle(struct uart_port *port)
+{
+	port->throttle(port);
+}
+
+static void serial8250_unthrottle(struct uart_port *port)
+{
+	port->unthrottle(port);
+}
+
 static void serial8250_stop_rx(struct uart_port *port)
 {
 	struct uart_8250_port *up = up_to_u8250p(port);
@@ -2912,6 +2922,8 @@ static struct uart_ops serial8250_pops = {
 	.get_mctrl	= serial8250_get_mctrl,
 	.stop_tx	= serial8250_stop_tx,
 	.start_tx	= serial8250_start_tx,
+	.throttle	= serial8250_throttle,
+	.unthrottle	= serial8250_unthrottle,
 	.stop_rx	= serial8250_stop_rx,
 	.enable_ms	= serial8250_enable_ms,
 	.break_ctl	= serial8250_break_ctl,
@@ -3462,6 +3474,8 @@ int serial8250_register_8250_port(struct uart_8250_port *up)
 		uart->capabilities	= up->capabilities;
 		uart->rs485_config	= up->rs485_config;
 		uart->rs485		= up->rs485;
+		uart->port.throttle	= up->port.throttle;
+		uart->port.unthrottle	= up->port.unthrottle;
 
 		/* Take tx_loadsz from fifosize if it wasn't set separately */
 		if (uart->port.fifosize && !uart->tx_loadsz)
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 204c452a7567..21c2e05c1bc3 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -125,6 +125,8 @@ struct uart_port {
 				               struct ktermios *old);
 	int			(*startup)(struct uart_port *port);
 	void			(*shutdown)(struct uart_port *port);
+	void			(*throttle)(struct uart_port *port);
+	void			(*unthrottle)(struct uart_port *port);
 	int			(*handle_irq)(struct uart_port *);
 	void			(*pm)(struct uart_port *, unsigned int state,
 				      unsigned int old);
-- 
cgit v1.2.3


From d74d5d1b7288ff9d4439c8c7e0e314cde9743467 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 10 Sep 2014 21:29:57 +0200
Subject: tty: serial: 8250_core: add run time pm
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

While comparing the OMAP-serial and the 8250 part of this I noticed that
the latter does not use run time-pm. Here are the pieces. It is
basically a get before first register access and a last_busy + put after
last access. This has to be enabled from userland _and_ UART_CAP_RPM is
required for this.
The runtime PM can usually work transparently in the background however
there is one exception to this: After serial8250_tx_chars() completes
there still may be unsent bytes in the FIFO (depending on CPU speed vs
baud rate + flow control). Even if the TTY-buffer is empty we do not
want RPM to disable the device because it won't send the remaining
bytes. Instead we leave serial8250_tx_chars() with RPM enabled and wait
for the FIFO empty interrupt. Once we enter serial8250_tx_chars() with
an empty buffer we know that the FIFO is empty and since we are not going
to send anything, we can disable the device.
That xchg() is to ensure that serial8250_tx_chars() can be called
multiple times and only the first invocation will actually invoke the
runtime PM function. So that the last invocation of __stop_tx() will
disable runtime pm.

NOTE: do not enable RPM on the device unless you know what you do! If
the device goes idle, it won't be woken up by incomming RX data _unless_
there is a wakeup irq configured which is usually the RX pin configure
for wakeup via the reset module. The RX activity will then wake up the
device from idle. However the first character is garbage and lost. The
following bytes will be received once the device is up in time. On the
beagle board xm (omap3) it takes approx 13ms from the first wakeup byte
until the first byte that is received properly if the device was in
core-off.

v5…v8:
	- drop RPM from serial8250_set_mctrl() it will be used in
	  restore path which already has RPM active and holds
	  dev->power.lock
v4…v5:
	- add a wrapper around rpm function and introduce UART_CAP_RPM
	  to ensure RPM put is invoked after the TX FIFO is empty.
v3…v4:
	- added runtime to the console code
	- removed device_may_wakeup() from serial8250_set_sleep()

Cc: mika.westerberg@linux.intel.com
Reviewed-by: Tony Lindgren <tony@atomide.com>
Tested-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250.h      |   1 +
 drivers/tty/serial/8250/8250_core.c | 133 ++++++++++++++++++++++++++++++++----
 include/linux/serial_8250.h         |   1 +
 3 files changed, 122 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/8250/8250.h b/drivers/tty/serial/8250/8250.h
index 85bfec58d77c..1bcb4b2141a6 100644
--- a/drivers/tty/serial/8250/8250.h
+++ b/drivers/tty/serial/8250/8250.h
@@ -72,6 +72,7 @@ struct serial8250_config {
 #define UART_CAP_UUE	(1 << 12)	/* UART needs IER bit 6 set (Xscale) */
 #define UART_CAP_RTOIE	(1 << 13)	/* UART needs IER bit 4 set (Xscale, Tegra) */
 #define UART_CAP_HFIFO	(1 << 14)	/* UART has a "hidden" FIFO */
+#define UART_CAP_RPM	(1 << 15)	/* Runtime PM is active while idle */
 
 #define UART_BUG_QUOT	(1 << 0)	/* UART has buggy quot LSB */
 #define UART_BUG_TXEN	(1 << 1)	/* UART has buggy TX IIR status */
diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c
index 68c44d97091b..3cf5c98013e4 100644
--- a/drivers/tty/serial/8250/8250_core.c
+++ b/drivers/tty/serial/8250/8250_core.c
@@ -38,6 +38,7 @@
 #include <linux/mutex.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
+#include <linux/pm_runtime.h>
 #ifdef CONFIG_SPARC
 #include <linux/sunserialcore.h>
 #endif
@@ -540,6 +541,53 @@ void serial8250_clear_and_reinit_fifos(struct uart_8250_port *p)
 }
 EXPORT_SYMBOL_GPL(serial8250_clear_and_reinit_fifos);
 
+static void serial8250_rpm_get(struct uart_8250_port *p)
+{
+	if (!(p->capabilities & UART_CAP_RPM))
+		return;
+	pm_runtime_get_sync(p->port.dev);
+}
+
+static void serial8250_rpm_put(struct uart_8250_port *p)
+{
+	if (!(p->capabilities & UART_CAP_RPM))
+		return;
+	pm_runtime_mark_last_busy(p->port.dev);
+	pm_runtime_put_autosuspend(p->port.dev);
+}
+
+/*
+ * This two wrapper ensure, that enable_runtime_pm_tx() can be called more than
+ * once and disable_runtime_pm_tx() will still disable RPM because the fifo is
+ * empty and the HW can idle again.
+ */
+static void serial8250_rpm_get_tx(struct uart_8250_port *p)
+{
+	unsigned char rpm_active;
+
+	if (!(p->capabilities & UART_CAP_RPM))
+		return;
+
+	rpm_active = xchg(&p->rpm_tx_active, 1);
+	if (rpm_active)
+		return;
+	pm_runtime_get_sync(p->port.dev);
+}
+
+static void serial8250_rpm_put_tx(struct uart_8250_port *p)
+{
+	unsigned char rpm_active;
+
+	if (!(p->capabilities & UART_CAP_RPM))
+		return;
+
+	rpm_active = xchg(&p->rpm_tx_active, 0);
+	if (!rpm_active)
+		return;
+	pm_runtime_mark_last_busy(p->port.dev);
+	pm_runtime_put_autosuspend(p->port.dev);
+}
+
 /*
  * IER sleep support.  UARTs which have EFRs need the "extended
  * capability" bit enabled.  Note that on XR16C850s, we need to
@@ -554,10 +602,11 @@ static void serial8250_set_sleep(struct uart_8250_port *p, int sleep)
 	 * offset but the UART channel may only write to the corresponding
 	 * bit.
 	 */
+	serial8250_rpm_get(p);
 	if ((p->port.type == PORT_XR17V35X) ||
 	   (p->port.type == PORT_XR17D15X)) {
 		serial_out(p, UART_EXAR_SLEEP, sleep ? 0xff : 0);
-		return;
+		goto out;
 	}
 
 	if (p->capabilities & UART_CAP_SLEEP) {
@@ -573,6 +622,8 @@ static void serial8250_set_sleep(struct uart_8250_port *p, int sleep)
 			serial_out(p, UART_LCR, 0);
 		}
 	}
+out:
+	serial8250_rpm_put(p);
 }
 
 #ifdef CONFIG_SERIAL_8250_RSA
@@ -1273,6 +1324,7 @@ static inline void __stop_tx(struct uart_8250_port *p)
 	if (p->ier & UART_IER_THRI) {
 		p->ier &= ~UART_IER_THRI;
 		serial_out(p, UART_IER, p->ier);
+		serial8250_rpm_put_tx(p);
 	}
 }
 
@@ -1280,6 +1332,7 @@ static void serial8250_stop_tx(struct uart_port *port)
 {
 	struct uart_8250_port *up = up_to_u8250p(port);
 
+	serial8250_rpm_get(up);
 	__stop_tx(up);
 
 	/*
@@ -1289,12 +1342,14 @@ static void serial8250_stop_tx(struct uart_port *port)
 		up->acr |= UART_ACR_TXDIS;
 		serial_icr_write(up, UART_ACR, up->acr);
 	}
+	serial8250_rpm_put(up);
 }
 
 static void serial8250_start_tx(struct uart_port *port)
 {
 	struct uart_8250_port *up = up_to_u8250p(port);
 
+	serial8250_rpm_get_tx(up);
 	if (up->dma && !serial8250_tx_dma(up)) {
 		return;
 	} else if (!(up->ier & UART_IER_THRI)) {
@@ -1333,9 +1388,13 @@ static void serial8250_stop_rx(struct uart_port *port)
 {
 	struct uart_8250_port *up = up_to_u8250p(port);
 
+	serial8250_rpm_get(up);
+
 	up->ier &= ~UART_IER_RLSI;
 	up->port.read_status_mask &= ~UART_LSR_DR;
 	serial_port_out(port, UART_IER, up->ier);
+
+	serial8250_rpm_put(up);
 }
 
 static void serial8250_enable_ms(struct uart_port *port)
@@ -1347,7 +1406,10 @@ static void serial8250_enable_ms(struct uart_port *port)
 		return;
 
 	up->ier |= UART_IER_MSI;
+
+	serial8250_rpm_get(up);
 	serial_port_out(port, UART_IER, up->ier);
+	serial8250_rpm_put(up);
 }
 
 /*
@@ -1469,7 +1531,12 @@ void serial8250_tx_chars(struct uart_8250_port *up)
 
 	DEBUG_INTR("THRE...");
 
-	if (uart_circ_empty(xmit))
+	/*
+	 * With RPM enabled, we have to wait once the FIFO is empty before the
+	 * HW can go idle. So we get here once again with empty FIFO and disable
+	 * the interrupt and RPM in __stop_tx()
+	 */
+	if (uart_circ_empty(xmit) && !(up->capabilities & UART_CAP_RPM))
 		__stop_tx(up);
 }
 EXPORT_SYMBOL_GPL(serial8250_tx_chars);
@@ -1537,9 +1604,17 @@ EXPORT_SYMBOL_GPL(serial8250_handle_irq);
 
 static int serial8250_default_handle_irq(struct uart_port *port)
 {
-	unsigned int iir = serial_port_in(port, UART_IIR);
+	struct uart_8250_port *up = up_to_u8250p(port);
+	unsigned int iir;
+	int ret;
+
+	serial8250_rpm_get(up);
 
-	return serial8250_handle_irq(port, iir);
+	iir = serial_port_in(port, UART_IIR);
+	ret = serial8250_handle_irq(port, iir);
+
+	serial8250_rpm_put(up);
+	return ret;
 }
 
 /*
@@ -1796,11 +1871,15 @@ static unsigned int serial8250_tx_empty(struct uart_port *port)
 	unsigned long flags;
 	unsigned int lsr;
 
+	serial8250_rpm_get(up);
+
 	spin_lock_irqsave(&port->lock, flags);
 	lsr = serial_port_in(port, UART_LSR);
 	up->lsr_saved_flags |= lsr & LSR_SAVE_FLAGS;
 	spin_unlock_irqrestore(&port->lock, flags);
 
+	serial8250_rpm_put(up);
+
 	return (lsr & BOTH_EMPTY) == BOTH_EMPTY ? TIOCSER_TEMT : 0;
 }
 
@@ -1810,7 +1889,9 @@ static unsigned int serial8250_get_mctrl(struct uart_port *port)
 	unsigned int status;
 	unsigned int ret;
 
+	serial8250_rpm_get(up);
 	status = serial8250_modem_status(up);
+	serial8250_rpm_put(up);
 
 	ret = 0;
 	if (status & UART_MSR_DCD)
@@ -1850,6 +1931,7 @@ static void serial8250_break_ctl(struct uart_port *port, int break_state)
 	struct uart_8250_port *up = up_to_u8250p(port);
 	unsigned long flags;
 
+	serial8250_rpm_get(up);
 	spin_lock_irqsave(&port->lock, flags);
 	if (break_state == -1)
 		up->lcr |= UART_LCR_SBC;
@@ -1857,6 +1939,7 @@ static void serial8250_break_ctl(struct uart_port *port, int break_state)
 		up->lcr &= ~UART_LCR_SBC;
 	serial_port_out(port, UART_LCR, up->lcr);
 	spin_unlock_irqrestore(&port->lock, flags);
+	serial8250_rpm_put(up);
 }
 
 /*
@@ -1901,12 +1984,23 @@ static void wait_for_xmitr(struct uart_8250_port *up, int bits)
 
 static int serial8250_get_poll_char(struct uart_port *port)
 {
-	unsigned char lsr = serial_port_in(port, UART_LSR);
+	struct uart_8250_port *up = up_to_u8250p(port);
+	unsigned char lsr;
+	int status;
+
+	serial8250_rpm_get(up);
 
-	if (!(lsr & UART_LSR_DR))
-		return NO_POLL_CHAR;
+	lsr = serial_port_in(port, UART_LSR);
 
-	return serial_port_in(port, UART_RX);
+	if (!(lsr & UART_LSR_DR)) {
+		status = NO_POLL_CHAR;
+		goto out;
+	}
+
+	status = serial_port_in(port, UART_RX);
+out:
+	serial8250_rpm_put(up);
+	return status;
 }
 
 
@@ -1916,6 +2010,7 @@ static void serial8250_put_poll_char(struct uart_port *port,
 	unsigned int ier;
 	struct uart_8250_port *up = up_to_u8250p(port);
 
+	serial8250_rpm_get(up);
 	/*
 	 *	First save the IER then disable the interrupts
 	 */
@@ -1937,6 +2032,7 @@ static void serial8250_put_poll_char(struct uart_port *port,
 	 */
 	wait_for_xmitr(up, BOTH_EMPTY);
 	serial_port_out(port, UART_IER, ier);
+	serial8250_rpm_put(up);
 }
 
 #endif /* CONFIG_CONSOLE_POLL */
@@ -1962,6 +2058,7 @@ int serial8250_do_startup(struct uart_port *port)
 	if (port->iotype != up->cur_iotype)
 		set_io_from_upio(port);
 
+	serial8250_rpm_get(up);
 	if (port->type == PORT_16C950) {
 		/* Wake up and initialize UART */
 		up->acr = 0;
@@ -1982,7 +2079,6 @@ int serial8250_do_startup(struct uart_port *port)
 	 */
 	enable_rsa(up);
 #endif
-
 	/*
 	 * Clear the FIFO buffers and disable them.
 	 * (they will be reenabled in set_termios())
@@ -2006,7 +2102,8 @@ int serial8250_do_startup(struct uart_port *port)
 	    (serial_port_in(port, UART_LSR) == 0xff)) {
 		printk_ratelimited(KERN_INFO "ttyS%d: LSR safety check engaged!\n",
 				   serial_index(port));
-		return -ENODEV;
+		retval = -ENODEV;
+		goto out;
 	}
 
 	/*
@@ -2091,7 +2188,7 @@ int serial8250_do_startup(struct uart_port *port)
 	} else {
 		retval = serial_link_irq_chain(up);
 		if (retval)
-			return retval;
+			goto out;
 	}
 
 	/*
@@ -2189,8 +2286,10 @@ dont_test_tx_en:
 		outb_p(0x80, icp);
 		inb_p(icp);
 	}
-
-	return 0;
+	retval = 0;
+out:
+	serial8250_rpm_put(up);
+	return retval;
 }
 EXPORT_SYMBOL_GPL(serial8250_do_startup);
 
@@ -2206,6 +2305,7 @@ void serial8250_do_shutdown(struct uart_port *port)
 	struct uart_8250_port *up = up_to_u8250p(port);
 	unsigned long flags;
 
+	serial8250_rpm_get(up);
 	/*
 	 * Disable interrupts from this port
 	 */
@@ -2245,6 +2345,7 @@ void serial8250_do_shutdown(struct uart_port *port)
 	 * the IRQ chain.
 	 */
 	serial_port_in(port, UART_RX);
+	serial8250_rpm_put(up);
 
 	del_timer_sync(&up->timer);
 	up->timer.function = serial8250_timeout;
@@ -2360,6 +2461,7 @@ serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios,
 	 * Ok, we're now changing the port state.  Do it with
 	 * interrupts disabled.
 	 */
+	serial8250_rpm_get(up);
 	spin_lock_irqsave(&port->lock, flags);
 
 	/*
@@ -2481,6 +2583,8 @@ serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios,
 	}
 	serial8250_set_mctrl(port, port->mctrl);
 	spin_unlock_irqrestore(&port->lock, flags);
+	serial8250_rpm_put(up);
+
 	/* Don't rewrite B0 */
 	if (tty_termios_baud_rate(termios))
 		tty_termios_encode_baud_rate(termios, baud, baud);
@@ -3091,6 +3195,8 @@ serial8250_console_write(struct console *co, const char *s, unsigned int count)
 
 	touch_nmi_watchdog();
 
+	serial8250_rpm_get(up);
+
 	if (port->sysrq || oops_in_progress)
 		locked = spin_trylock_irqsave(&port->lock, flags);
 	else
@@ -3127,6 +3233,7 @@ serial8250_console_write(struct console *co, const char *s, unsigned int count)
 
 	if (locked)
 		spin_unlock_irqrestore(&port->lock, flags);
+	serial8250_rpm_put(up);
 }
 
 static int __init serial8250_console_setup(struct console *co, char *options)
diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h
index 6fc9d7bee05e..c267412a3ef4 100644
--- a/include/linux/serial_8250.h
+++ b/include/linux/serial_8250.h
@@ -84,6 +84,7 @@ struct uart_8250_port {
 	unsigned char		mcr_mask;	/* mask of user bits */
 	unsigned char		mcr_force;	/* mask of forced bits */
 	unsigned char		cur_iotype;	/* Running I/O type */
+	unsigned char		rpm_tx_active;
 
 	/*
 	 * Some bits in registers are cleared on a read, so they must
-- 
cgit v1.2.3


From 7276ca3fa23864133f5ee7431c51546d9b7f695f Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 22 Sep 2014 13:28:16 +0200
Subject: netfilter: bridge: nf_bridge_copy_header as static inline in header

Move nf_bridge_copy_header() as static inline in netfilter_bridge.h
header file. This patch prepares the modularization of the br_netfilter
code.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_bridge.h | 48 +++++++++++++++++++++++++++++++---------
 net/bridge/br_netfilter.c        | 28 -----------------------
 2 files changed, 38 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h
index 8ab1c278b66d..fe996d59de64 100644
--- a/include/linux/netfilter_bridge.h
+++ b/include/linux/netfilter_bridge.h
@@ -24,16 +24,6 @@ enum nf_br_hook_priorities {
 #define BRNF_8021Q			0x10
 #define BRNF_PPPoE			0x20
 
-/* Only used in br_forward.c */
-int nf_bridge_copy_header(struct sk_buff *skb);
-static inline int nf_bridge_maybe_copy_header(struct sk_buff *skb)
-{
-	if (skb->nf_bridge &&
-	    skb->nf_bridge->mask & (BRNF_BRIDGED | BRNF_BRIDGED_DNAT))
-		return nf_bridge_copy_header(skb);
-  	return 0;
-}
-
 static inline unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb)
 {
 	switch (skb->protocol) {
@@ -46,6 +36,44 @@ static inline unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb)
 	}
 }
 
+static inline void nf_bridge_update_protocol(struct sk_buff *skb)
+{
+	if (skb->nf_bridge->mask & BRNF_8021Q)
+		skb->protocol = htons(ETH_P_8021Q);
+	else if (skb->nf_bridge->mask & BRNF_PPPoE)
+		skb->protocol = htons(ETH_P_PPP_SES);
+}
+
+/* Fill in the header for fragmented IP packets handled by
+ * the IPv4 connection tracking code.
+ *
+ * Only used in br_forward.c
+ */
+static inline int nf_bridge_copy_header(struct sk_buff *skb)
+{
+	int err;
+	unsigned int header_size;
+
+	nf_bridge_update_protocol(skb);
+	header_size = ETH_HLEN + nf_bridge_encap_header_len(skb);
+	err = skb_cow_head(skb, header_size);
+	if (err)
+		return err;
+
+	skb_copy_to_linear_data_offset(skb, -header_size,
+				       skb->nf_bridge->data, header_size);
+	__skb_push(skb, nf_bridge_encap_header_len(skb));
+	return 0;
+}
+
+static inline int nf_bridge_maybe_copy_header(struct sk_buff *skb)
+{
+	if (skb->nf_bridge &&
+	    skb->nf_bridge->mask & (BRNF_BRIDGED | BRNF_BRIDGED_DNAT))
+		return nf_bridge_copy_header(skb);
+  	return 0;
+}
+
 static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb)
 {
 	if (unlikely(skb->nf_bridge->mask & BRNF_PPPoE))
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index a615264cf01a..61929a7cd815 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -245,14 +245,6 @@ static inline void nf_bridge_save_header(struct sk_buff *skb)
 					 skb->nf_bridge->data, header_size);
 }
 
-static inline void nf_bridge_update_protocol(struct sk_buff *skb)
-{
-	if (skb->nf_bridge->mask & BRNF_8021Q)
-		skb->protocol = htons(ETH_P_8021Q);
-	else if (skb->nf_bridge->mask & BRNF_PPPoE)
-		skb->protocol = htons(ETH_P_PPP_SES);
-}
-
 /* When handing a packet over to the IP layer
  * check whether we have a skb that is in the
  * expected format
@@ -320,26 +312,6 @@ drop:
 	return -1;
 }
 
-/* Fill in the header for fragmented IP packets handled by
- * the IPv4 connection tracking code.
- */
-int nf_bridge_copy_header(struct sk_buff *skb)
-{
-	int err;
-	unsigned int header_size;
-
-	nf_bridge_update_protocol(skb);
-	header_size = ETH_HLEN + nf_bridge_encap_header_len(skb);
-	err = skb_cow_head(skb, header_size);
-	if (err)
-		return err;
-
-	skb_copy_to_linear_data_offset(skb, -header_size,
-				       skb->nf_bridge->data, header_size);
-	__skb_push(skb, nf_bridge_encap_header_len(skb));
-	return 0;
-}
-
 /* PF_BRIDGE/PRE_ROUTING *********************************************/
 /* Undo the changes made for ip6tables PREROUTING and continue the
  * bridge PRE_ROUTING hook. */
-- 
cgit v1.2.3


From 34666d467cbf1e2e3c7bb15a63eccfb582cdd71f Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 18 Sep 2014 11:29:03 +0200
Subject: netfilter: bridge: move br_netfilter out of the core

Jesper reported that br_netfilter always registers the hooks since
this is part of the bridge core. This harms performance for people that
don't need this.

This patch modularizes br_netfilter so it can be rmmod'ed, thus,
the hooks can be unregistered. I think the bridge netfilter should have
been a separated module since the beginning, Patrick agreed on that.

Note that this is breaking compatibility for users that expect that
bridge netfilter is going to be available after explicitly 'modprobe
bridge' or via automatic load through brctl.

However, the damage can be easily undone by modprobing br_netfilter.
The bridge core also spots a message to provide a clue to people that
didn't notice that this has been deprecated.

On top of that, the plan is that nftables will not rely on this software
layer, but integrate the connection tracking into the bridge layer to
enable stateful filtering and NAT, which is was bridge netfilter users
seem to require.

This patch still keeps the fake_dst_ops in the bridge core, since this
is required by when the bridge port is initialized. So we can safely
modprobe/rmmod br_netfilter anytime.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Acked-by: Florian Westphal <fw@strlen.de>
---
 include/linux/netfilter_bridge.h       |  2 +-
 include/linux/skbuff.h                 | 12 ++---
 include/net/neighbour.h                |  2 +-
 include/net/netfilter/ipv4/nf_reject.h |  2 +-
 include/net/netfilter/ipv6/nf_reject.h |  2 +-
 net/Kconfig                            |  7 +--
 net/bridge/Makefile                    |  5 +-
 net/bridge/br.c                        | 14 ++---
 net/bridge/br_device.c                 |  4 +-
 net/bridge/br_forward.c                |  2 +
 net/bridge/br_input.c                  |  1 +
 net/bridge/br_netfilter.c              | 88 ++++++-------------------------
 net/bridge/br_netlink.c                |  2 +-
 net/bridge/br_nf_core.c                | 96 ++++++++++++++++++++++++++++++++++
 net/bridge/br_private.h                | 12 ++---
 net/bridge/br_sysfs_br.c               |  4 +-
 16 files changed, 151 insertions(+), 104 deletions(-)
 create mode 100644 net/bridge/br_nf_core.c

(limited to 'include/linux')

diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h
index fe996d59de64..c755e4971fa3 100644
--- a/include/linux/netfilter_bridge.h
+++ b/include/linux/netfilter_bridge.h
@@ -15,7 +15,7 @@ enum nf_br_hook_priorities {
 	NF_BR_PRI_LAST = INT_MAX,
 };
 
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 
 #define BRNF_PKT_TYPE			0x01
 #define BRNF_BRIDGED_DNAT		0x02
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 07c9fdd0c126..c4ff43f84573 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -156,7 +156,7 @@ struct nf_conntrack {
 };
 #endif
 
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 struct nf_bridge_info {
 	atomic_t		use;
 	unsigned int		mask;
@@ -560,7 +560,7 @@ struct sk_buff {
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 	struct nf_conntrack	*nfct;
 #endif
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 	struct nf_bridge_info	*nf_bridge;
 #endif
 
@@ -2977,7 +2977,7 @@ static inline void nf_conntrack_get(struct nf_conntrack *nfct)
 		atomic_inc(&nfct->use);
 }
 #endif
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 static inline void nf_bridge_put(struct nf_bridge_info *nf_bridge)
 {
 	if (nf_bridge && atomic_dec_and_test(&nf_bridge->use))
@@ -2995,7 +2995,7 @@ static inline void nf_reset(struct sk_buff *skb)
 	nf_conntrack_put(skb->nfct);
 	skb->nfct = NULL;
 #endif
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 	nf_bridge_put(skb->nf_bridge);
 	skb->nf_bridge = NULL;
 #endif
@@ -3016,7 +3016,7 @@ static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src)
 	nf_conntrack_get(src->nfct);
 	dst->nfctinfo = src->nfctinfo;
 #endif
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 	dst->nf_bridge  = src->nf_bridge;
 	nf_bridge_get(src->nf_bridge);
 #endif
@@ -3030,7 +3030,7 @@ static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src)
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 	nf_conntrack_put(dst->nfct);
 #endif
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 	nf_bridge_put(dst->nf_bridge);
 #endif
 	__nf_copy(dst, src);
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 47f425464f84..f60558d0254c 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -373,7 +373,7 @@ static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
 	return 0;
 }
 
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 static inline int neigh_hh_bridge(struct hh_cache *hh, struct sk_buff *skb)
 {
 	unsigned int seq, hh_alen;
diff --git a/include/net/netfilter/ipv4/nf_reject.h b/include/net/netfilter/ipv4/nf_reject.h
index 931fbf812171..f713b5a31d62 100644
--- a/include/net/netfilter/ipv4/nf_reject.h
+++ b/include/net/netfilter/ipv4/nf_reject.h
@@ -98,7 +98,7 @@ static void nf_send_reset(struct sk_buff *oldskb, int hook)
 
 	nf_ct_attach(nskb, oldskb);
 
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 	/* If we use ip_local_out for bridged traffic, the MAC source on
 	 * the RST will be ours, instead of the destination's.  This confuses
 	 * some routers/firewalls, and they drop the packet.  So we need to
diff --git a/include/net/netfilter/ipv6/nf_reject.h b/include/net/netfilter/ipv6/nf_reject.h
index 710d17ed70b4..7a10cfcd8e33 100644
--- a/include/net/netfilter/ipv6/nf_reject.h
+++ b/include/net/netfilter/ipv6/nf_reject.h
@@ -147,7 +147,7 @@ static void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook)
 
 	nf_ct_attach(nskb, oldskb);
 
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 	/* If we use ip6_local_out for bridged traffic, the MAC source on
 	 * the RST will be ours, instead of the destination's.  This confuses
 	 * some routers/firewalls, and they drop the packet.  So we need to
diff --git a/net/Kconfig b/net/Kconfig
index 4051fdfa4367..dc5d700d05e7 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -176,10 +176,11 @@ config NETFILTER_ADVANCED
 	  If unsure, say Y.
 
 config BRIDGE_NETFILTER
-	bool "Bridged IP/ARP packets filtering"
-	depends on BRIDGE && NETFILTER && INET
+	tristate "Bridged IP/ARP packets filtering"
+	depends on (BRIDGE || BRIDGE=n)
+	depends on NETFILTER && INET
 	depends on NETFILTER_ADVANCED
-	default y
+	default m
 	---help---
 	  Enabling this option will let arptables resp. iptables see bridged
 	  ARP resp. IP traffic. If you want a bridging firewall, you probably
diff --git a/net/bridge/Makefile b/net/bridge/Makefile
index 8590b942bffa..5e3eac5dc8b9 100644
--- a/net/bridge/Makefile
+++ b/net/bridge/Makefile
@@ -6,11 +6,12 @@ obj-$(CONFIG_BRIDGE) += bridge.o
 
 bridge-y	:= br.o br_device.o br_fdb.o br_forward.o br_if.o br_input.o \
 			br_ioctl.o br_stp.o br_stp_bpdu.o \
-			br_stp_if.o br_stp_timer.o br_netlink.o
+			br_stp_if.o br_stp_timer.o br_netlink.o \
+			br_nf_core.o
 
 bridge-$(CONFIG_SYSFS) += br_sysfs_if.o br_sysfs_br.o
 
-bridge-$(CONFIG_BRIDGE_NETFILTER) += br_netfilter.o
+obj-$(CONFIG_BRIDGE_NETFILTER) += br_netfilter.o
 
 bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o br_mdb.o
 
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 1a755a1e5410..44425aff7cba 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -161,7 +161,7 @@ static int __init br_init(void)
 	if (err)
 		goto err_out1;
 
-	err = br_netfilter_init();
+	err = br_nf_core_init();
 	if (err)
 		goto err_out2;
 
@@ -179,11 +179,16 @@ static int __init br_init(void)
 	br_fdb_test_addr_hook = br_fdb_test_addr;
 #endif
 
+	pr_info("bridge: automatic filtering via arp/ip/ip6tables has been "
+		"deprecated. Update your scripts to load br_netfilter if you "
+		"need this.\n");
+
 	return 0;
+
 err_out4:
 	unregister_netdevice_notifier(&br_device_notifier);
 err_out3:
-	br_netfilter_fini();
+	br_nf_core_fini();
 err_out2:
 	unregister_pernet_subsys(&br_net_ops);
 err_out1:
@@ -196,20 +201,17 @@ err_out:
 static void __exit br_deinit(void)
 {
 	stp_proto_unregister(&br_stp_proto);
-
 	br_netlink_fini();
 	unregister_netdevice_notifier(&br_device_notifier);
 	brioctl_set(NULL);
-
 	unregister_pernet_subsys(&br_net_ops);
 
 	rcu_barrier(); /* Wait for completion of call_rcu()'s */
 
-	br_netfilter_fini();
+	br_nf_core_fini();
 #if IS_ENABLED(CONFIG_ATM_LANE)
 	br_fdb_test_addr_hook = NULL;
 #endif
-
 	br_fdb_fini();
 }
 
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 568cccd39a3d..659cac15c0df 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -36,7 +36,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 	u16 vid = 0;
 
 	rcu_read_lock();
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 	if (skb->nf_bridge && (skb->nf_bridge->mask & BRNF_BRIDGED_DNAT)) {
 		br_nf_pre_routing_finish_bridge_slow(skb);
 		rcu_read_unlock();
@@ -167,7 +167,7 @@ static int br_change_mtu(struct net_device *dev, int new_mtu)
 
 	dev->mtu = new_mtu;
 
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 	/* remember the MTU in the rtable for PMTU */
 	dst_metric_set(&br->fake_rtable.dst, RTAX_MTU, new_mtu);
 #endif
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 056b67b0e277..992ec49a96aa 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -49,6 +49,7 @@ int br_dev_queue_push_xmit(struct sk_buff *skb)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(br_dev_queue_push_xmit);
 
 int br_forward_finish(struct sk_buff *skb)
 {
@@ -56,6 +57,7 @@ int br_forward_finish(struct sk_buff *skb)
 		       br_dev_queue_push_xmit);
 
 }
+EXPORT_SYMBOL_GPL(br_forward_finish);
 
 static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb)
 {
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 366c43649079..6fd5522df696 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -140,6 +140,7 @@ drop:
 	kfree_skb(skb);
 	goto out;
 }
+EXPORT_SYMBOL_GPL(br_handle_frame_finish);
 
 /* note: already called with rcu_read_lock */
 static int br_handle_local_finish(struct sk_buff *skb)
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 61929a7cd815..97e43937aaca 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -111,66 +111,6 @@ static inline __be16 pppoe_proto(const struct sk_buff *skb)
 	 pppoe_proto(skb) == htons(PPP_IPV6) && \
 	 brnf_filter_pppoe_tagged)
 
-static void fake_update_pmtu(struct dst_entry *dst, struct sock *sk,
-			     struct sk_buff *skb, u32 mtu)
-{
-}
-
-static void fake_redirect(struct dst_entry *dst, struct sock *sk,
-			  struct sk_buff *skb)
-{
-}
-
-static u32 *fake_cow_metrics(struct dst_entry *dst, unsigned long old)
-{
-	return NULL;
-}
-
-static struct neighbour *fake_neigh_lookup(const struct dst_entry *dst,
-					   struct sk_buff *skb,
-					   const void *daddr)
-{
-	return NULL;
-}
-
-static unsigned int fake_mtu(const struct dst_entry *dst)
-{
-	return dst->dev->mtu;
-}
-
-static struct dst_ops fake_dst_ops = {
-	.family =		AF_INET,
-	.protocol =		cpu_to_be16(ETH_P_IP),
-	.update_pmtu =		fake_update_pmtu,
-	.redirect =		fake_redirect,
-	.cow_metrics =		fake_cow_metrics,
-	.neigh_lookup =		fake_neigh_lookup,
-	.mtu =			fake_mtu,
-};
-
-/*
- * Initialize bogus route table used to keep netfilter happy.
- * Currently, we fill in the PMTU entry because netfilter
- * refragmentation needs it, and the rt_flags entry because
- * ipt_REJECT needs it.  Future netfilter modules might
- * require us to fill additional fields.
- */
-static const u32 br_dst_default_metrics[RTAX_MAX] = {
-	[RTAX_MTU - 1] = 1500,
-};
-
-void br_netfilter_rtable_init(struct net_bridge *br)
-{
-	struct rtable *rt = &br->fake_rtable;
-
-	atomic_set(&rt->dst.__refcnt, 1);
-	rt->dst.dev = br->dev;
-	rt->dst.path = &rt->dst;
-	dst_init_metrics(&rt->dst, br_dst_default_metrics, true);
-	rt->dst.flags	= DST_NOXFRM | DST_FAKE_RTABLE;
-	rt->dst.ops = &fake_dst_ops;
-}
-
 static inline struct rtable *bridge_parent_rtable(const struct net_device *dev)
 {
 	struct net_bridge_port *port;
@@ -1031,38 +971,42 @@ static struct ctl_table brnf_table[] = {
 };
 #endif
 
-int __init br_netfilter_init(void)
+static int __init br_netfilter_init(void)
 {
 	int ret;
 
-	ret = dst_entries_init(&fake_dst_ops);
+	ret = nf_register_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
 	if (ret < 0)
 		return ret;
 
-	ret = nf_register_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
-	if (ret < 0) {
-		dst_entries_destroy(&fake_dst_ops);
-		return ret;
-	}
 #ifdef CONFIG_SYSCTL
 	brnf_sysctl_header = register_net_sysctl(&init_net, "net/bridge", brnf_table);
 	if (brnf_sysctl_header == NULL) {
 		printk(KERN_WARNING
 		       "br_netfilter: can't register to sysctl.\n");
-		nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
-		dst_entries_destroy(&fake_dst_ops);
-		return -ENOMEM;
+		ret = -ENOMEM;
+		goto err1;
 	}
 #endif
 	printk(KERN_NOTICE "Bridge firewalling registered\n");
 	return 0;
+err1:
+	nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
+	return ret;
 }
 
-void br_netfilter_fini(void)
+static void __exit br_netfilter_fini(void)
 {
 	nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
 #ifdef CONFIG_SYSCTL
 	unregister_net_sysctl_table(brnf_sysctl_header);
 #endif
-	dst_entries_destroy(&fake_dst_ops);
 }
+
+module_init(br_netfilter_init);
+module_exit(br_netfilter_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Lennert Buytenhek <buytenh@gnu.org>");
+MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>");
+MODULE_DESCRIPTION("Linux ethernet netfilter firewall bridge");
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 90a91e137acc..0fa66b83685f 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -602,7 +602,7 @@ out_af:
 	return err;
 }
 
-void __exit br_netlink_fini(void)
+void br_netlink_fini(void)
 {
 	br_mdb_uninit();
 	rtnl_af_unregister(&br_af_ops);
diff --git a/net/bridge/br_nf_core.c b/net/bridge/br_nf_core.c
new file mode 100644
index 000000000000..387cb3bd017c
--- /dev/null
+++ b/net/bridge/br_nf_core.c
@@ -0,0 +1,96 @@
+/*
+ *	Handle firewalling core
+ *	Linux ethernet bridge
+ *
+ *	Authors:
+ *	Lennert Buytenhek		<buytenh@gnu.org>
+ *	Bart De Schuymer		<bdschuym@pandora.be>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ *
+ *	Lennert dedicates this file to Kerstin Wurdinger.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/in_route.h>
+#include <linux/inetdevice.h>
+#include <net/route.h>
+
+#include "br_private.h"
+#ifdef CONFIG_SYSCTL
+#include <linux/sysctl.h>
+#endif
+
+static void fake_update_pmtu(struct dst_entry *dst, struct sock *sk,
+			     struct sk_buff *skb, u32 mtu)
+{
+}
+
+static void fake_redirect(struct dst_entry *dst, struct sock *sk,
+			  struct sk_buff *skb)
+{
+}
+
+static u32 *fake_cow_metrics(struct dst_entry *dst, unsigned long old)
+{
+	return NULL;
+}
+
+static struct neighbour *fake_neigh_lookup(const struct dst_entry *dst,
+					   struct sk_buff *skb,
+					   const void *daddr)
+{
+	return NULL;
+}
+
+static unsigned int fake_mtu(const struct dst_entry *dst)
+{
+	return dst->dev->mtu;
+}
+
+static struct dst_ops fake_dst_ops = {
+	.family		= AF_INET,
+	.protocol	= cpu_to_be16(ETH_P_IP),
+	.update_pmtu	= fake_update_pmtu,
+	.redirect	= fake_redirect,
+	.cow_metrics	= fake_cow_metrics,
+	.neigh_lookup	= fake_neigh_lookup,
+	.mtu		= fake_mtu,
+};
+
+/*
+ * Initialize bogus route table used to keep netfilter happy.
+ * Currently, we fill in the PMTU entry because netfilter
+ * refragmentation needs it, and the rt_flags entry because
+ * ipt_REJECT needs it.  Future netfilter modules might
+ * require us to fill additional fields.
+ */
+static const u32 br_dst_default_metrics[RTAX_MAX] = {
+	[RTAX_MTU - 1] = 1500,
+};
+
+void br_netfilter_rtable_init(struct net_bridge *br)
+{
+	struct rtable *rt = &br->fake_rtable;
+
+	atomic_set(&rt->dst.__refcnt, 1);
+	rt->dst.dev = br->dev;
+	rt->dst.path = &rt->dst;
+	dst_init_metrics(&rt->dst, br_dst_default_metrics, true);
+	rt->dst.flags	= DST_NOXFRM | DST_FAKE_RTABLE;
+	rt->dst.ops = &fake_dst_ops;
+}
+
+int __init br_nf_core_init(void)
+{
+	return dst_entries_init(&fake_dst_ops);
+}
+
+void br_nf_core_fini(void)
+{
+	dst_entries_destroy(&fake_dst_ops);
+}
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 62a7fa2e3569..d304d752c091 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -221,7 +221,7 @@ struct net_bridge
 	struct pcpu_sw_netstats		__percpu *stats;
 	spinlock_t			hash_lock;
 	struct hlist_head		hash[BR_HASH_SIZE];
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 	struct rtable 			fake_rtable;
 	bool				nf_call_iptables;
 	bool				nf_call_ip6tables;
@@ -751,13 +751,13 @@ static inline int br_vlan_enabled(struct net_bridge *br)
 #endif
 
 /* br_netfilter.c */
-#ifdef CONFIG_BRIDGE_NETFILTER
-int br_netfilter_init(void);
-void br_netfilter_fini(void);
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+int br_nf_core_init(void);
+void br_nf_core_fini(void);
 void br_netfilter_rtable_init(struct net_bridge *);
 #else
-#define br_netfilter_init()	(0)
-#define br_netfilter_fini()	do { } while (0)
+static inline int br_nf_core_init(void) { return 0; }
+static inline void br_nf_core_fini(void) {}
 #define br_netfilter_rtable_init(x)
 #endif
 
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index c9e2572b15f4..cb431c6016ee 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -629,7 +629,7 @@ static ssize_t multicast_startup_query_interval_store(
 }
 static DEVICE_ATTR_RW(multicast_startup_query_interval);
 #endif
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 static ssize_t nf_call_iptables_show(
 	struct device *d, struct device_attribute *attr, char *buf)
 {
@@ -763,7 +763,7 @@ static struct attribute *bridge_attrs[] = {
 	&dev_attr_multicast_query_response_interval.attr,
 	&dev_attr_multicast_startup_query_interval.attr,
 #endif
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 	&dev_attr_nf_call_iptables.attr,
 	&dev_attr_nf_call_ip6tables.attr,
 	&dev_attr_nf_call_arptables.attr,
-- 
cgit v1.2.3


From 99c55f7d47c0dc6fc64729f37bf435abf43f4c60 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Fri, 26 Sep 2014 00:16:57 -0700
Subject: bpf: introduce BPF syscall and maps

BPF syscall is a multiplexor for a range of different operations on eBPF.
This patch introduces syscall with single command to create a map.
Next patch adds commands to access maps.

'maps' is a generic storage of different types for sharing data between kernel
and userspace.

Userspace example:
/* this syscall wrapper creates a map with given type and attributes
 * and returns map_fd on success.
 * use close(map_fd) to delete the map
 */
int bpf_create_map(enum bpf_map_type map_type, int key_size,
                   int value_size, int max_entries)
{
    union bpf_attr attr = {
        .map_type = map_type,
        .key_size = key_size,
        .value_size = value_size,
        .max_entries = max_entries
    };

    return bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
}

'union bpf_attr' is backwards compatible with future extensions.

More details in Documentation/networking/filter.txt and in manpage

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/filter.txt |  39 +++++++++
 include/linux/bpf.h                 |  41 +++++++++
 include/uapi/linux/bpf.h            |  23 +++++
 kernel/bpf/Makefile                 |   2 +-
 kernel/bpf/syscall.c                | 169 ++++++++++++++++++++++++++++++++++++
 5 files changed, 273 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/bpf.h
 create mode 100644 kernel/bpf/syscall.c

(limited to 'include/linux')

diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt
index 014e0319a5c4..4a01d71785e9 100644
--- a/Documentation/networking/filter.txt
+++ b/Documentation/networking/filter.txt
@@ -1001,6 +1001,45 @@ instruction that loads 64-bit immediate value into a dst_reg.
 Classic BPF has similar instruction: BPF_LD | BPF_W | BPF_IMM which loads
 32-bit immediate value into a register.
 
+eBPF maps
+---------
+'maps' is a generic storage of different types for sharing data between kernel
+and userspace.
+
+The maps are accessed from user space via BPF syscall, which has commands:
+- create a map with given type and attributes
+  map_fd = bpf(BPF_MAP_CREATE, union bpf_attr *attr, u32 size)
+  using attr->map_type, attr->key_size, attr->value_size, attr->max_entries
+  returns process-local file descriptor or negative error
+
+- lookup key in a given map
+  err = bpf(BPF_MAP_LOOKUP_ELEM, union bpf_attr *attr, u32 size)
+  using attr->map_fd, attr->key, attr->value
+  returns zero and stores found elem into value or negative error
+
+- create or update key/value pair in a given map
+  err = bpf(BPF_MAP_UPDATE_ELEM, union bpf_attr *attr, u32 size)
+  using attr->map_fd, attr->key, attr->value
+  returns zero or negative error
+
+- find and delete element by key in a given map
+  err = bpf(BPF_MAP_DELETE_ELEM, union bpf_attr *attr, u32 size)
+  using attr->map_fd, attr->key
+
+- to delete map: close(fd)
+  Exiting process will delete maps automatically
+
+userspace programs use this syscall to create/access maps that eBPF programs
+are concurrently updating.
+
+maps can have different types: hash, array, bloom filter, radix-tree, etc.
+
+The map is defined by:
+  . type
+  . max number of elements
+  . key size in bytes
+  . value size in bytes
+
 Testing
 -------
 
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
new file mode 100644
index 000000000000..48014a71f0fe
--- /dev/null
+++ b/include/linux/bpf.h
@@ -0,0 +1,41 @@
+/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#ifndef _LINUX_BPF_H
+#define _LINUX_BPF_H 1
+
+#include <uapi/linux/bpf.h>
+#include <linux/workqueue.h>
+
+struct bpf_map;
+
+/* map is generic key/value storage optionally accesible by eBPF programs */
+struct bpf_map_ops {
+	/* funcs callable from userspace (via syscall) */
+	struct bpf_map *(*map_alloc)(union bpf_attr *attr);
+	void (*map_free)(struct bpf_map *);
+};
+
+struct bpf_map {
+	atomic_t refcnt;
+	enum bpf_map_type map_type;
+	u32 key_size;
+	u32 value_size;
+	u32 max_entries;
+	struct bpf_map_ops *ops;
+	struct work_struct work;
+};
+
+struct bpf_map_type_list {
+	struct list_head list_node;
+	struct bpf_map_ops *ops;
+	enum bpf_map_type type;
+};
+
+void bpf_register_map_type(struct bpf_map_type_list *tl);
+void bpf_map_put(struct bpf_map *map);
+
+#endif /* _LINUX_BPF_H */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 479ed0b6be16..f58a10f9670c 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -62,4 +62,27 @@ struct bpf_insn {
 	__s32	imm;		/* signed immediate constant */
 };
 
+/* BPF syscall commands */
+enum bpf_cmd {
+	/* create a map with given type and attributes
+	 * fd = bpf(BPF_MAP_CREATE, union bpf_attr *, u32 size)
+	 * returns fd or negative error
+	 * map is deleted when fd is closed
+	 */
+	BPF_MAP_CREATE,
+};
+
+enum bpf_map_type {
+	BPF_MAP_TYPE_UNSPEC,
+};
+
+union bpf_attr {
+	struct { /* anonymous struct used by BPF_MAP_CREATE command */
+		__u32	map_type;	/* one of enum bpf_map_type */
+		__u32	key_size;	/* size of key in bytes */
+		__u32	value_size;	/* size of value in bytes */
+		__u32	max_entries;	/* max number of entries in a map */
+	};
+} __attribute__((aligned(8)));
+
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 6a71145e2769..e9f7334ed07a 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -1 +1 @@
-obj-y := core.o
+obj-y := core.o syscall.o
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
new file mode 100644
index 000000000000..428a0e23adc0
--- /dev/null
+++ b/kernel/bpf/syscall.c
@@ -0,0 +1,169 @@
+/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#include <linux/bpf.h>
+#include <linux/syscalls.h>
+#include <linux/slab.h>
+#include <linux/anon_inodes.h>
+
+static LIST_HEAD(bpf_map_types);
+
+static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
+{
+	struct bpf_map_type_list *tl;
+	struct bpf_map *map;
+
+	list_for_each_entry(tl, &bpf_map_types, list_node) {
+		if (tl->type == attr->map_type) {
+			map = tl->ops->map_alloc(attr);
+			if (IS_ERR(map))
+				return map;
+			map->ops = tl->ops;
+			map->map_type = attr->map_type;
+			return map;
+		}
+	}
+	return ERR_PTR(-EINVAL);
+}
+
+/* boot time registration of different map implementations */
+void bpf_register_map_type(struct bpf_map_type_list *tl)
+{
+	list_add(&tl->list_node, &bpf_map_types);
+}
+
+/* called from workqueue */
+static void bpf_map_free_deferred(struct work_struct *work)
+{
+	struct bpf_map *map = container_of(work, struct bpf_map, work);
+
+	/* implementation dependent freeing */
+	map->ops->map_free(map);
+}
+
+/* decrement map refcnt and schedule it for freeing via workqueue
+ * (unrelying map implementation ops->map_free() might sleep)
+ */
+void bpf_map_put(struct bpf_map *map)
+{
+	if (atomic_dec_and_test(&map->refcnt)) {
+		INIT_WORK(&map->work, bpf_map_free_deferred);
+		schedule_work(&map->work);
+	}
+}
+
+static int bpf_map_release(struct inode *inode, struct file *filp)
+{
+	struct bpf_map *map = filp->private_data;
+
+	bpf_map_put(map);
+	return 0;
+}
+
+static const struct file_operations bpf_map_fops = {
+	.release = bpf_map_release,
+};
+
+/* helper macro to check that unused fields 'union bpf_attr' are zero */
+#define CHECK_ATTR(CMD) \
+	memchr_inv((void *) &attr->CMD##_LAST_FIELD + \
+		   sizeof(attr->CMD##_LAST_FIELD), 0, \
+		   sizeof(*attr) - \
+		   offsetof(union bpf_attr, CMD##_LAST_FIELD) - \
+		   sizeof(attr->CMD##_LAST_FIELD)) != NULL
+
+#define BPF_MAP_CREATE_LAST_FIELD max_entries
+/* called via syscall */
+static int map_create(union bpf_attr *attr)
+{
+	struct bpf_map *map;
+	int err;
+
+	err = CHECK_ATTR(BPF_MAP_CREATE);
+	if (err)
+		return -EINVAL;
+
+	/* find map type and init map: hashtable vs rbtree vs bloom vs ... */
+	map = find_and_alloc_map(attr);
+	if (IS_ERR(map))
+		return PTR_ERR(map);
+
+	atomic_set(&map->refcnt, 1);
+
+	err = anon_inode_getfd("bpf-map", &bpf_map_fops, map, O_RDWR | O_CLOEXEC);
+
+	if (err < 0)
+		/* failed to allocate fd */
+		goto free_map;
+
+	return err;
+
+free_map:
+	map->ops->map_free(map);
+	return err;
+}
+
+SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
+{
+	union bpf_attr attr = {};
+	int err;
+
+	/* the syscall is limited to root temporarily. This restriction will be
+	 * lifted when security audit is clean. Note that eBPF+tracing must have
+	 * this restriction, since it may pass kernel data to user space
+	 */
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (!access_ok(VERIFY_READ, uattr, 1))
+		return -EFAULT;
+
+	if (size > PAGE_SIZE)	/* silly large */
+		return -E2BIG;
+
+	/* If we're handed a bigger struct than we know of,
+	 * ensure all the unknown bits are 0 - i.e. new
+	 * user-space does not rely on any kernel feature
+	 * extensions we dont know about yet.
+	 */
+	if (size > sizeof(attr)) {
+		unsigned char __user *addr;
+		unsigned char __user *end;
+		unsigned char val;
+
+		addr = (void __user *)uattr + sizeof(attr);
+		end  = (void __user *)uattr + size;
+
+		for (; addr < end; addr++) {
+			err = get_user(val, addr);
+			if (err)
+				return err;
+			if (val)
+				return -E2BIG;
+		}
+		size = sizeof(attr);
+	}
+
+	/* copy attributes from user space, may be less than sizeof(bpf_attr) */
+	if (copy_from_user(&attr, uattr, size) != 0)
+		return -EFAULT;
+
+	switch (cmd) {
+	case BPF_MAP_CREATE:
+		err = map_create(&attr);
+		break;
+	default:
+		err = -EINVAL;
+		break;
+	}
+
+	return err;
+}
-- 
cgit v1.2.3


From 749730ce42a2121e1c88350d69478bff3994b10a Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Fri, 26 Sep 2014 00:16:58 -0700
Subject: bpf: enable bpf syscall on x64 and i386

done as separate commit to ease conflict resolution

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/x86/syscalls/syscall_32.tbl  | 1 +
 arch/x86/syscalls/syscall_64.tbl  | 1 +
 include/linux/syscalls.h          | 3 ++-
 include/uapi/asm-generic/unistd.h | 4 +++-
 kernel/sys_ni.c                   | 3 +++
 5 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index 028b78168d85..9fe1b5d002f0 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -363,3 +363,4 @@
 354	i386	seccomp			sys_seccomp
 355	i386	getrandom		sys_getrandom
 356	i386	memfd_create		sys_memfd_create
+357	i386	bpf			sys_bpf
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index 35dd922727b9..281150b539a2 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -327,6 +327,7 @@
 318	common	getrandom		sys_getrandom
 319	common	memfd_create		sys_memfd_create
 320	common	kexec_file_load		sys_kexec_file_load
+321	common	bpf			sys_bpf
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 0f86d85a9ce4..bda9b81357cc 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -65,6 +65,7 @@ struct old_linux_dirent;
 struct perf_event_attr;
 struct file_handle;
 struct sigaltstack;
+union bpf_attr;
 
 #include <linux/types.h>
 #include <linux/aio_abi.h>
@@ -875,5 +876,5 @@ asmlinkage long sys_seccomp(unsigned int op, unsigned int flags,
 			    const char __user *uargs);
 asmlinkage long sys_getrandom(char __user *buf, size_t count,
 			      unsigned int flags);
-
+asmlinkage long sys_bpf(int cmd, union bpf_attr *attr, unsigned int size);
 #endif
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 11d11bc5c78f..22749c134117 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -705,9 +705,11 @@ __SYSCALL(__NR_seccomp, sys_seccomp)
 __SYSCALL(__NR_getrandom, sys_getrandom)
 #define __NR_memfd_create 279
 __SYSCALL(__NR_memfd_create, sys_memfd_create)
+#define __NR_bpf 280
+__SYSCALL(__NR_bpf, sys_bpf)
 
 #undef __NR_syscalls
-#define __NR_syscalls 280
+#define __NR_syscalls 281
 
 /*
  * All syscalls below here should go away really,
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 391d4ddb6f4b..b4b5083f5f5e 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -218,3 +218,6 @@ cond_syscall(sys_kcmp);
 
 /* operate on Secure Computing state */
 cond_syscall(sys_seccomp);
+
+/* access BPF programs and maps */
+cond_syscall(sys_bpf);
-- 
cgit v1.2.3


From db20fd2b01087bdfbe30bce314a198eefedcc42e Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Fri, 26 Sep 2014 00:16:59 -0700
Subject: bpf: add lookup/update/delete/iterate methods to BPF maps

'maps' is a generic storage of different types for sharing data between kernel
and userspace.

The maps are accessed from user space via BPF syscall, which has commands:

- create a map with given type and attributes
  fd = bpf(BPF_MAP_CREATE, union bpf_attr *attr, u32 size)
  returns fd or negative error

- lookup key in a given map referenced by fd
  err = bpf(BPF_MAP_LOOKUP_ELEM, union bpf_attr *attr, u32 size)
  using attr->map_fd, attr->key, attr->value
  returns zero and stores found elem into value or negative error

- create or update key/value pair in a given map
  err = bpf(BPF_MAP_UPDATE_ELEM, union bpf_attr *attr, u32 size)
  using attr->map_fd, attr->key, attr->value
  returns zero or negative error

- find and delete element by key in a given map
  err = bpf(BPF_MAP_DELETE_ELEM, union bpf_attr *attr, u32 size)
  using attr->map_fd, attr->key

- iterate map elements (based on input key return next_key)
  err = bpf(BPF_MAP_GET_NEXT_KEY, union bpf_attr *attr, u32 size)
  using attr->map_fd, attr->key, attr->next_key

- close(fd) deletes the map

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h      |   8 ++
 include/uapi/linux/bpf.h |  38 ++++++++
 kernel/bpf/syscall.c     | 235 +++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 281 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 48014a71f0fe..2887f3f9da59 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -9,6 +9,7 @@
 
 #include <uapi/linux/bpf.h>
 #include <linux/workqueue.h>
+#include <linux/file.h>
 
 struct bpf_map;
 
@@ -17,6 +18,12 @@ struct bpf_map_ops {
 	/* funcs callable from userspace (via syscall) */
 	struct bpf_map *(*map_alloc)(union bpf_attr *attr);
 	void (*map_free)(struct bpf_map *);
+	int (*map_get_next_key)(struct bpf_map *map, void *key, void *next_key);
+
+	/* funcs callable from userspace and from eBPF programs */
+	void *(*map_lookup_elem)(struct bpf_map *map, void *key);
+	int (*map_update_elem)(struct bpf_map *map, void *key, void *value);
+	int (*map_delete_elem)(struct bpf_map *map, void *key);
 };
 
 struct bpf_map {
@@ -37,5 +44,6 @@ struct bpf_map_type_list {
 
 void bpf_register_map_type(struct bpf_map_type_list *tl);
 void bpf_map_put(struct bpf_map *map);
+struct bpf_map *bpf_map_get(struct fd f);
 
 #endif /* _LINUX_BPF_H */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f58a10f9670c..395cabd2ca0a 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -70,6 +70,35 @@ enum bpf_cmd {
 	 * map is deleted when fd is closed
 	 */
 	BPF_MAP_CREATE,
+
+	/* lookup key in a given map
+	 * err = bpf(BPF_MAP_LOOKUP_ELEM, union bpf_attr *attr, u32 size)
+	 * Using attr->map_fd, attr->key, attr->value
+	 * returns zero and stores found elem into value
+	 * or negative error
+	 */
+	BPF_MAP_LOOKUP_ELEM,
+
+	/* create or update key/value pair in a given map
+	 * err = bpf(BPF_MAP_UPDATE_ELEM, union bpf_attr *attr, u32 size)
+	 * Using attr->map_fd, attr->key, attr->value
+	 * returns zero or negative error
+	 */
+	BPF_MAP_UPDATE_ELEM,
+
+	/* find and delete elem by key in a given map
+	 * err = bpf(BPF_MAP_DELETE_ELEM, union bpf_attr *attr, u32 size)
+	 * Using attr->map_fd, attr->key
+	 * returns zero or negative error
+	 */
+	BPF_MAP_DELETE_ELEM,
+
+	/* lookup key in a given map and return next key
+	 * err = bpf(BPF_MAP_GET_NEXT_KEY, union bpf_attr *attr, u32 size)
+	 * Using attr->map_fd, attr->key, attr->next_key
+	 * returns zero and stores next key or negative error
+	 */
+	BPF_MAP_GET_NEXT_KEY,
 };
 
 enum bpf_map_type {
@@ -83,6 +112,15 @@ union bpf_attr {
 		__u32	value_size;	/* size of value in bytes */
 		__u32	max_entries;	/* max number of entries in a map */
 	};
+
+	struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
+		__u32		map_fd;
+		__aligned_u64	key;
+		union {
+			__aligned_u64 value;
+			__aligned_u64 next_key;
+		};
+	};
 } __attribute__((aligned(8)));
 
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 428a0e23adc0..f94349ecaf61 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -13,6 +13,7 @@
 #include <linux/syscalls.h>
 #include <linux/slab.h>
 #include <linux/anon_inodes.h>
+#include <linux/file.h>
 
 static LIST_HEAD(bpf_map_types);
 
@@ -111,6 +112,228 @@ free_map:
 	return err;
 }
 
+/* if error is returned, fd is released.
+ * On success caller should complete fd access with matching fdput()
+ */
+struct bpf_map *bpf_map_get(struct fd f)
+{
+	struct bpf_map *map;
+
+	if (!f.file)
+		return ERR_PTR(-EBADF);
+
+	if (f.file->f_op != &bpf_map_fops) {
+		fdput(f);
+		return ERR_PTR(-EINVAL);
+	}
+
+	map = f.file->private_data;
+
+	return map;
+}
+
+/* helper to convert user pointers passed inside __aligned_u64 fields */
+static void __user *u64_to_ptr(__u64 val)
+{
+	return (void __user *) (unsigned long) val;
+}
+
+/* last field in 'union bpf_attr' used by this command */
+#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value
+
+static int map_lookup_elem(union bpf_attr *attr)
+{
+	void __user *ukey = u64_to_ptr(attr->key);
+	void __user *uvalue = u64_to_ptr(attr->value);
+	int ufd = attr->map_fd;
+	struct fd f = fdget(ufd);
+	struct bpf_map *map;
+	void *key, *value;
+	int err;
+
+	if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
+		return -EINVAL;
+
+	map = bpf_map_get(f);
+	if (IS_ERR(map))
+		return PTR_ERR(map);
+
+	err = -ENOMEM;
+	key = kmalloc(map->key_size, GFP_USER);
+	if (!key)
+		goto err_put;
+
+	err = -EFAULT;
+	if (copy_from_user(key, ukey, map->key_size) != 0)
+		goto free_key;
+
+	err = -ESRCH;
+	rcu_read_lock();
+	value = map->ops->map_lookup_elem(map, key);
+	if (!value)
+		goto err_unlock;
+
+	err = -EFAULT;
+	if (copy_to_user(uvalue, value, map->value_size) != 0)
+		goto err_unlock;
+
+	err = 0;
+
+err_unlock:
+	rcu_read_unlock();
+free_key:
+	kfree(key);
+err_put:
+	fdput(f);
+	return err;
+}
+
+#define BPF_MAP_UPDATE_ELEM_LAST_FIELD value
+
+static int map_update_elem(union bpf_attr *attr)
+{
+	void __user *ukey = u64_to_ptr(attr->key);
+	void __user *uvalue = u64_to_ptr(attr->value);
+	int ufd = attr->map_fd;
+	struct fd f = fdget(ufd);
+	struct bpf_map *map;
+	void *key, *value;
+	int err;
+
+	if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM))
+		return -EINVAL;
+
+	map = bpf_map_get(f);
+	if (IS_ERR(map))
+		return PTR_ERR(map);
+
+	err = -ENOMEM;
+	key = kmalloc(map->key_size, GFP_USER);
+	if (!key)
+		goto err_put;
+
+	err = -EFAULT;
+	if (copy_from_user(key, ukey, map->key_size) != 0)
+		goto free_key;
+
+	err = -ENOMEM;
+	value = kmalloc(map->value_size, GFP_USER);
+	if (!value)
+		goto free_key;
+
+	err = -EFAULT;
+	if (copy_from_user(value, uvalue, map->value_size) != 0)
+		goto free_value;
+
+	/* eBPF program that use maps are running under rcu_read_lock(),
+	 * therefore all map accessors rely on this fact, so do the same here
+	 */
+	rcu_read_lock();
+	err = map->ops->map_update_elem(map, key, value);
+	rcu_read_unlock();
+
+free_value:
+	kfree(value);
+free_key:
+	kfree(key);
+err_put:
+	fdput(f);
+	return err;
+}
+
+#define BPF_MAP_DELETE_ELEM_LAST_FIELD key
+
+static int map_delete_elem(union bpf_attr *attr)
+{
+	void __user *ukey = u64_to_ptr(attr->key);
+	int ufd = attr->map_fd;
+	struct fd f = fdget(ufd);
+	struct bpf_map *map;
+	void *key;
+	int err;
+
+	if (CHECK_ATTR(BPF_MAP_DELETE_ELEM))
+		return -EINVAL;
+
+	map = bpf_map_get(f);
+	if (IS_ERR(map))
+		return PTR_ERR(map);
+
+	err = -ENOMEM;
+	key = kmalloc(map->key_size, GFP_USER);
+	if (!key)
+		goto err_put;
+
+	err = -EFAULT;
+	if (copy_from_user(key, ukey, map->key_size) != 0)
+		goto free_key;
+
+	rcu_read_lock();
+	err = map->ops->map_delete_elem(map, key);
+	rcu_read_unlock();
+
+free_key:
+	kfree(key);
+err_put:
+	fdput(f);
+	return err;
+}
+
+/* last field in 'union bpf_attr' used by this command */
+#define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key
+
+static int map_get_next_key(union bpf_attr *attr)
+{
+	void __user *ukey = u64_to_ptr(attr->key);
+	void __user *unext_key = u64_to_ptr(attr->next_key);
+	int ufd = attr->map_fd;
+	struct fd f = fdget(ufd);
+	struct bpf_map *map;
+	void *key, *next_key;
+	int err;
+
+	if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY))
+		return -EINVAL;
+
+	map = bpf_map_get(f);
+	if (IS_ERR(map))
+		return PTR_ERR(map);
+
+	err = -ENOMEM;
+	key = kmalloc(map->key_size, GFP_USER);
+	if (!key)
+		goto err_put;
+
+	err = -EFAULT;
+	if (copy_from_user(key, ukey, map->key_size) != 0)
+		goto free_key;
+
+	err = -ENOMEM;
+	next_key = kmalloc(map->key_size, GFP_USER);
+	if (!next_key)
+		goto free_key;
+
+	rcu_read_lock();
+	err = map->ops->map_get_next_key(map, key, next_key);
+	rcu_read_unlock();
+	if (err)
+		goto free_next_key;
+
+	err = -EFAULT;
+	if (copy_to_user(unext_key, next_key, map->key_size) != 0)
+		goto free_next_key;
+
+	err = 0;
+
+free_next_key:
+	kfree(next_key);
+free_key:
+	kfree(key);
+err_put:
+	fdput(f);
+	return err;
+}
+
 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
 {
 	union bpf_attr attr = {};
@@ -160,6 +383,18 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
 	case BPF_MAP_CREATE:
 		err = map_create(&attr);
 		break;
+	case BPF_MAP_LOOKUP_ELEM:
+		err = map_lookup_elem(&attr);
+		break;
+	case BPF_MAP_UPDATE_ELEM:
+		err = map_update_elem(&attr);
+		break;
+	case BPF_MAP_DELETE_ELEM:
+		err = map_delete_elem(&attr);
+		break;
+	case BPF_MAP_GET_NEXT_KEY:
+		err = map_get_next_key(&attr);
+		break;
 	default:
 		err = -EINVAL;
 		break;
-- 
cgit v1.2.3


From 09756af46893c18839062976c3252e93a1beeba7 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Fri, 26 Sep 2014 00:17:00 -0700
Subject: bpf: expand BPF syscall with program load/unload

eBPF programs are similar to kernel modules. They are loaded by the user
process and automatically unloaded when process exits. Each eBPF program is
a safe run-to-completion set of instructions. eBPF verifier statically
determines that the program terminates and is safe to execute.

The following syscall wrapper can be used to load the program:
int bpf_prog_load(enum bpf_prog_type prog_type,
                  const struct bpf_insn *insns, int insn_cnt,
                  const char *license)
{
    union bpf_attr attr = {
        .prog_type = prog_type,
        .insns = ptr_to_u64(insns),
        .insn_cnt = insn_cnt,
        .license = ptr_to_u64(license),
    };

    return bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
}
where 'insns' is an array of eBPF instructions and 'license' is a string
that must be GPL compatible to call helper functions marked gpl_only

Upon succesful load the syscall returns prog_fd.
Use close(prog_fd) to unload the program.

User space tests and examples follow in the later patches

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h      |  38 +++++++++++
 include/linux/filter.h   |   8 +--
 include/uapi/linux/bpf.h |  26 ++++++++
 kernel/bpf/core.c        |  29 +++++----
 kernel/bpf/syscall.c     | 165 +++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 246 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 2887f3f9da59..92979182be81 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -46,4 +46,42 @@ void bpf_register_map_type(struct bpf_map_type_list *tl);
 void bpf_map_put(struct bpf_map *map);
 struct bpf_map *bpf_map_get(struct fd f);
 
+/* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs
+ * to in-kernel helper functions and for adjusting imm32 field in BPF_CALL
+ * instructions after verifying
+ */
+struct bpf_func_proto {
+	u64 (*func)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
+	bool gpl_only;
+};
+
+struct bpf_verifier_ops {
+	/* return eBPF function prototype for verification */
+	const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
+};
+
+struct bpf_prog_type_list {
+	struct list_head list_node;
+	struct bpf_verifier_ops *ops;
+	enum bpf_prog_type type;
+};
+
+void bpf_register_prog_type(struct bpf_prog_type_list *tl);
+
+struct bpf_prog;
+
+struct bpf_prog_aux {
+	atomic_t refcnt;
+	bool is_gpl_compatible;
+	enum bpf_prog_type prog_type;
+	struct bpf_verifier_ops *ops;
+	struct bpf_map **used_maps;
+	u32 used_map_cnt;
+	struct bpf_prog *prog;
+	struct work_struct work;
+};
+
+void bpf_prog_put(struct bpf_prog *prog);
+struct bpf_prog *bpf_prog_get(u32 ufd);
+
 #endif /* _LINUX_BPF_H */
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 1a0bc6d134d7..4ffc0958d85e 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -21,6 +21,7 @@
 struct sk_buff;
 struct sock;
 struct seccomp_data;
+struct bpf_prog_aux;
 
 /* ArgX, context and stack frame pointer register positions. Note,
  * Arg1, Arg2, Arg3, etc are used as argument mappings of function
@@ -300,17 +301,12 @@ struct bpf_binary_header {
 	u8 image[];
 };
 
-struct bpf_work_struct {
-	struct bpf_prog *prog;
-	struct work_struct work;
-};
-
 struct bpf_prog {
 	u16			pages;		/* Number of allocated pages */
 	bool			jited;		/* Is our filter JIT'ed? */
 	u32			len;		/* Number of filter blocks */
 	struct sock_fprog_kern	*orig_prog;	/* Original BPF program */
-	struct bpf_work_struct	*work;		/* Deferred free work struct */
+	struct bpf_prog_aux	*aux;		/* Auxiliary fields */
 	unsigned int		(*bpf_func)(const struct sk_buff *skb,
 					    const struct bpf_insn *filter);
 	/* Instructions for interpreter */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 395cabd2ca0a..424f442016e7 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -99,12 +99,23 @@ enum bpf_cmd {
 	 * returns zero and stores next key or negative error
 	 */
 	BPF_MAP_GET_NEXT_KEY,
+
+	/* verify and load eBPF program
+	 * prog_fd = bpf(BPF_PROG_LOAD, union bpf_attr *attr, u32 size)
+	 * Using attr->prog_type, attr->insns, attr->license
+	 * returns fd or negative error
+	 */
+	BPF_PROG_LOAD,
 };
 
 enum bpf_map_type {
 	BPF_MAP_TYPE_UNSPEC,
 };
 
+enum bpf_prog_type {
+	BPF_PROG_TYPE_UNSPEC,
+};
+
 union bpf_attr {
 	struct { /* anonymous struct used by BPF_MAP_CREATE command */
 		__u32	map_type;	/* one of enum bpf_map_type */
@@ -121,6 +132,21 @@ union bpf_attr {
 			__aligned_u64 next_key;
 		};
 	};
+
+	struct { /* anonymous struct used by BPF_PROG_LOAD command */
+		__u32		prog_type;	/* one of enum bpf_prog_type */
+		__u32		insn_cnt;
+		__aligned_u64	insns;
+		__aligned_u64	license;
+	};
 } __attribute__((aligned(8)));
 
+/* integer value in 'imm' field of BPF_CALL instruction selects which helper
+ * function eBPF program intends to call
+ */
+enum bpf_func_id {
+	BPF_FUNC_unspec,
+	__BPF_FUNC_MAX_ID,
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 8b7002488251..f0c30c59b317 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -27,6 +27,7 @@
 #include <linux/random.h>
 #include <linux/moduleloader.h>
 #include <asm/unaligned.h>
+#include <linux/bpf.h>
 
 /* Registers */
 #define BPF_R0	regs[BPF_REG_0]
@@ -71,7 +72,7 @@ struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
 {
 	gfp_t gfp_flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO |
 			  gfp_extra_flags;
-	struct bpf_work_struct *ws;
+	struct bpf_prog_aux *aux;
 	struct bpf_prog *fp;
 
 	size = round_up(size, PAGE_SIZE);
@@ -79,14 +80,14 @@ struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
 	if (fp == NULL)
 		return NULL;
 
-	ws = kmalloc(sizeof(*ws), GFP_KERNEL | gfp_extra_flags);
-	if (ws == NULL) {
+	aux = kzalloc(sizeof(*aux), GFP_KERNEL | gfp_extra_flags);
+	if (aux == NULL) {
 		vfree(fp);
 		return NULL;
 	}
 
 	fp->pages = size / PAGE_SIZE;
-	fp->work = ws;
+	fp->aux = aux;
 
 	return fp;
 }
@@ -110,10 +111,10 @@ struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
 		memcpy(fp, fp_old, fp_old->pages * PAGE_SIZE);
 		fp->pages = size / PAGE_SIZE;
 
-		/* We keep fp->work from fp_old around in the new
+		/* We keep fp->aux from fp_old around in the new
 		 * reallocated structure.
 		 */
-		fp_old->work = NULL;
+		fp_old->aux = NULL;
 		__bpf_prog_free(fp_old);
 	}
 
@@ -123,7 +124,7 @@ EXPORT_SYMBOL_GPL(bpf_prog_realloc);
 
 void __bpf_prog_free(struct bpf_prog *fp)
 {
-	kfree(fp->work);
+	kfree(fp->aux);
 	vfree(fp);
 }
 EXPORT_SYMBOL_GPL(__bpf_prog_free);
@@ -638,19 +639,19 @@ EXPORT_SYMBOL_GPL(bpf_prog_select_runtime);
 
 static void bpf_prog_free_deferred(struct work_struct *work)
 {
-	struct bpf_work_struct *ws;
+	struct bpf_prog_aux *aux;
 
-	ws = container_of(work, struct bpf_work_struct, work);
-	bpf_jit_free(ws->prog);
+	aux = container_of(work, struct bpf_prog_aux, work);
+	bpf_jit_free(aux->prog);
 }
 
 /* Free internal BPF program */
 void bpf_prog_free(struct bpf_prog *fp)
 {
-	struct bpf_work_struct *ws = fp->work;
+	struct bpf_prog_aux *aux = fp->aux;
 
-	INIT_WORK(&ws->work, bpf_prog_free_deferred);
-	ws->prog = fp;
-	schedule_work(&ws->work);
+	INIT_WORK(&aux->work, bpf_prog_free_deferred);
+	aux->prog = fp;
+	schedule_work(&aux->work);
 }
 EXPORT_SYMBOL_GPL(bpf_prog_free);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index f94349ecaf61..0afb4eaa1887 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -14,6 +14,8 @@
 #include <linux/slab.h>
 #include <linux/anon_inodes.h>
 #include <linux/file.h>
+#include <linux/license.h>
+#include <linux/filter.h>
 
 static LIST_HEAD(bpf_map_types);
 
@@ -334,6 +336,166 @@ err_put:
 	return err;
 }
 
+static LIST_HEAD(bpf_prog_types);
+
+static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
+{
+	struct bpf_prog_type_list *tl;
+
+	list_for_each_entry(tl, &bpf_prog_types, list_node) {
+		if (tl->type == type) {
+			prog->aux->ops = tl->ops;
+			prog->aux->prog_type = type;
+			return 0;
+		}
+	}
+	return -EINVAL;
+}
+
+void bpf_register_prog_type(struct bpf_prog_type_list *tl)
+{
+	list_add(&tl->list_node, &bpf_prog_types);
+}
+
+/* drop refcnt on maps used by eBPF program and free auxilary data */
+static void free_used_maps(struct bpf_prog_aux *aux)
+{
+	int i;
+
+	for (i = 0; i < aux->used_map_cnt; i++)
+		bpf_map_put(aux->used_maps[i]);
+
+	kfree(aux->used_maps);
+}
+
+void bpf_prog_put(struct bpf_prog *prog)
+{
+	if (atomic_dec_and_test(&prog->aux->refcnt)) {
+		free_used_maps(prog->aux);
+		bpf_prog_free(prog);
+	}
+}
+
+static int bpf_prog_release(struct inode *inode, struct file *filp)
+{
+	struct bpf_prog *prog = filp->private_data;
+
+	bpf_prog_put(prog);
+	return 0;
+}
+
+static const struct file_operations bpf_prog_fops = {
+        .release = bpf_prog_release,
+};
+
+static struct bpf_prog *get_prog(struct fd f)
+{
+	struct bpf_prog *prog;
+
+	if (!f.file)
+		return ERR_PTR(-EBADF);
+
+	if (f.file->f_op != &bpf_prog_fops) {
+		fdput(f);
+		return ERR_PTR(-EINVAL);
+	}
+
+	prog = f.file->private_data;
+
+	return prog;
+}
+
+/* called by sockets/tracing/seccomp before attaching program to an event
+ * pairs with bpf_prog_put()
+ */
+struct bpf_prog *bpf_prog_get(u32 ufd)
+{
+	struct fd f = fdget(ufd);
+	struct bpf_prog *prog;
+
+	prog = get_prog(f);
+
+	if (IS_ERR(prog))
+		return prog;
+
+	atomic_inc(&prog->aux->refcnt);
+	fdput(f);
+	return prog;
+}
+
+/* last field in 'union bpf_attr' used by this command */
+#define	BPF_PROG_LOAD_LAST_FIELD license
+
+static int bpf_prog_load(union bpf_attr *attr)
+{
+	enum bpf_prog_type type = attr->prog_type;
+	struct bpf_prog *prog;
+	int err;
+	char license[128];
+	bool is_gpl;
+
+	if (CHECK_ATTR(BPF_PROG_LOAD))
+		return -EINVAL;
+
+	/* copy eBPF program license from user space */
+	if (strncpy_from_user(license, u64_to_ptr(attr->license),
+			      sizeof(license) - 1) < 0)
+		return -EFAULT;
+	license[sizeof(license) - 1] = 0;
+
+	/* eBPF programs must be GPL compatible to use GPL-ed functions */
+	is_gpl = license_is_gpl_compatible(license);
+
+	if (attr->insn_cnt >= BPF_MAXINSNS)
+		return -EINVAL;
+
+	/* plain bpf_prog allocation */
+	prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER);
+	if (!prog)
+		return -ENOMEM;
+
+	prog->len = attr->insn_cnt;
+
+	err = -EFAULT;
+	if (copy_from_user(prog->insns, u64_to_ptr(attr->insns),
+			   prog->len * sizeof(struct bpf_insn)) != 0)
+		goto free_prog;
+
+	prog->orig_prog = NULL;
+	prog->jited = false;
+
+	atomic_set(&prog->aux->refcnt, 1);
+	prog->aux->is_gpl_compatible = is_gpl;
+
+	/* find program type: socket_filter vs tracing_filter */
+	err = find_prog_type(type, prog);
+	if (err < 0)
+		goto free_prog;
+
+	/* run eBPF verifier */
+	/* err = bpf_check(prog, tb); */
+
+	if (err < 0)
+		goto free_used_maps;
+
+	/* eBPF program is ready to be JITed */
+	bpf_prog_select_runtime(prog);
+
+	err = anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, O_RDWR | O_CLOEXEC);
+
+	if (err < 0)
+		/* failed to allocate fd */
+		goto free_used_maps;
+
+	return err;
+
+free_used_maps:
+	free_used_maps(prog->aux);
+free_prog:
+	bpf_prog_free(prog);
+	return err;
+}
+
 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
 {
 	union bpf_attr attr = {};
@@ -395,6 +557,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
 	case BPF_MAP_GET_NEXT_KEY:
 		err = map_get_next_key(&attr);
 		break;
+	case BPF_PROG_LOAD:
+		err = bpf_prog_load(&attr);
+		break;
 	default:
 		err = -EINVAL;
 		break;
-- 
cgit v1.2.3


From 51580e798cb61b0fc63fa3aa6c5c975375aa0550 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Fri, 26 Sep 2014 00:17:02 -0700
Subject: bpf: verifier (add docs)

this patch adds all of eBPF verfier documentation and empty bpf_check()

The end goal for the verifier is to statically check safety of the program.

Verifier will catch:
- loops
- out of range jumps
- unreachable instructions
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention

More details in Documentation/networking/filter.txt

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/filter.txt | 224 ++++++++++++++++++++++++++++++++++++
 include/linux/bpf.h                 |   2 +
 kernel/bpf/Makefile                 |   2 +-
 kernel/bpf/syscall.c                |   2 +-
 kernel/bpf/verifier.c               | 133 +++++++++++++++++++++
 5 files changed, 361 insertions(+), 2 deletions(-)
 create mode 100644 kernel/bpf/verifier.c

(limited to 'include/linux')

diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt
index 4a01d71785e9..5ce4d07406a5 100644
--- a/Documentation/networking/filter.txt
+++ b/Documentation/networking/filter.txt
@@ -1001,6 +1001,99 @@ instruction that loads 64-bit immediate value into a dst_reg.
 Classic BPF has similar instruction: BPF_LD | BPF_W | BPF_IMM which loads
 32-bit immediate value into a register.
 
+eBPF verifier
+-------------
+The safety of the eBPF program is determined in two steps.
+
+First step does DAG check to disallow loops and other CFG validation.
+In particular it will detect programs that have unreachable instructions.
+(though classic BPF checker allows them)
+
+Second step starts from the first insn and descends all possible paths.
+It simulates execution of every insn and observes the state change of
+registers and stack.
+
+At the start of the program the register R1 contains a pointer to context
+and has type PTR_TO_CTX.
+If verifier sees an insn that does R2=R1, then R2 has now type
+PTR_TO_CTX as well and can be used on the right hand side of expression.
+If R1=PTR_TO_CTX and insn is R2=R1+R1, then R2=UNKNOWN_VALUE,
+since addition of two valid pointers makes invalid pointer.
+(In 'secure' mode verifier will reject any type of pointer arithmetic to make
+sure that kernel addresses don't leak to unprivileged users)
+
+If register was never written to, it's not readable:
+  bpf_mov R0 = R2
+  bpf_exit
+will be rejected, since R2 is unreadable at the start of the program.
+
+After kernel function call, R1-R5 are reset to unreadable and
+R0 has a return type of the function.
+
+Since R6-R9 are callee saved, their state is preserved across the call.
+  bpf_mov R6 = 1
+  bpf_call foo
+  bpf_mov R0 = R6
+  bpf_exit
+is a correct program. If there was R1 instead of R6, it would have
+been rejected.
+
+load/store instructions are allowed only with registers of valid types, which
+are PTR_TO_CTX, PTR_TO_MAP, FRAME_PTR. They are bounds and alignment checked.
+For example:
+ bpf_mov R1 = 1
+ bpf_mov R2 = 2
+ bpf_xadd *(u32 *)(R1 + 3) += R2
+ bpf_exit
+will be rejected, since R1 doesn't have a valid pointer type at the time of
+execution of instruction bpf_xadd.
+
+At the start R1 type is PTR_TO_CTX (a pointer to generic 'struct bpf_context')
+A callback is used to customize verifier to restrict eBPF program access to only
+certain fields within ctx structure with specified size and alignment.
+
+For example, the following insn:
+  bpf_ld R0 = *(u32 *)(R6 + 8)
+intends to load a word from address R6 + 8 and store it into R0
+If R6=PTR_TO_CTX, via is_valid_access() callback the verifier will know
+that offset 8 of size 4 bytes can be accessed for reading, otherwise
+the verifier will reject the program.
+If R6=FRAME_PTR, then access should be aligned and be within
+stack bounds, which are [-MAX_BPF_STACK, 0). In this example offset is 8,
+so it will fail verification, since it's out of bounds.
+
+The verifier will allow eBPF program to read data from stack only after
+it wrote into it.
+Classic BPF verifier does similar check with M[0-15] memory slots.
+For example:
+  bpf_ld R0 = *(u32 *)(R10 - 4)
+  bpf_exit
+is invalid program.
+Though R10 is correct read-only register and has type FRAME_PTR
+and R10 - 4 is within stack bounds, there were no stores into that location.
+
+Pointer register spill/fill is tracked as well, since four (R6-R9)
+callee saved registers may not be enough for some programs.
+
+Allowed function calls are customized with bpf_verifier_ops->get_func_proto()
+The eBPF verifier will check that registers match argument constraints.
+After the call register R0 will be set to return type of the function.
+
+Function calls is a main mechanism to extend functionality of eBPF programs.
+Socket filters may let programs to call one set of functions, whereas tracing
+filters may allow completely different set.
+
+If a function made accessible to eBPF program, it needs to be thought through
+from safety point of view. The verifier will guarantee that the function is
+called with valid arguments.
+
+seccomp vs socket filters have different security restrictions for classic BPF.
+Seccomp solves this by two stage verifier: classic BPF verifier is followed
+by seccomp verifier. In case of eBPF one configurable verifier is shared for
+all use cases.
+
+See details of eBPF verifier in kernel/bpf/verifier.c
+
 eBPF maps
 ---------
 'maps' is a generic storage of different types for sharing data between kernel
@@ -1040,6 +1133,137 @@ The map is defined by:
   . key size in bytes
   . value size in bytes
 
+Understanding eBPF verifier messages
+------------------------------------
+
+The following are few examples of invalid eBPF programs and verifier error
+messages as seen in the log:
+
+Program with unreachable instructions:
+static struct bpf_insn prog[] = {
+  BPF_EXIT_INSN(),
+  BPF_EXIT_INSN(),
+};
+Error:
+  unreachable insn 1
+
+Program that reads uninitialized register:
+  BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+  BPF_EXIT_INSN(),
+Error:
+  0: (bf) r0 = r2
+  R2 !read_ok
+
+Program that doesn't initialize R0 before exiting:
+  BPF_MOV64_REG(BPF_REG_2, BPF_REG_1),
+  BPF_EXIT_INSN(),
+Error:
+  0: (bf) r2 = r1
+  1: (95) exit
+  R0 !read_ok
+
+Program that accesses stack out of bounds:
+  BPF_ST_MEM(BPF_DW, BPF_REG_10, 8, 0),
+  BPF_EXIT_INSN(),
+Error:
+  0: (7a) *(u64 *)(r10 +8) = 0
+  invalid stack off=8 size=8
+
+Program that doesn't initialize stack before passing its address into function:
+  BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+  BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+  BPF_LD_MAP_FD(BPF_REG_1, 0),
+  BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+  BPF_EXIT_INSN(),
+Error:
+  0: (bf) r2 = r10
+  1: (07) r2 += -8
+  2: (b7) r1 = 0x0
+  3: (85) call 1
+  invalid indirect read from stack off -8+0 size 8
+
+Program that uses invalid map_fd=0 while calling to map_lookup_elem() function:
+  BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+  BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+  BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+  BPF_LD_MAP_FD(BPF_REG_1, 0),
+  BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+  BPF_EXIT_INSN(),
+Error:
+  0: (7a) *(u64 *)(r10 -8) = 0
+  1: (bf) r2 = r10
+  2: (07) r2 += -8
+  3: (b7) r1 = 0x0
+  4: (85) call 1
+  fd 0 is not pointing to valid bpf_map
+
+Program that doesn't check return value of map_lookup_elem() before accessing
+map element:
+  BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+  BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+  BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+  BPF_LD_MAP_FD(BPF_REG_1, 0),
+  BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+  BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+  BPF_EXIT_INSN(),
+Error:
+  0: (7a) *(u64 *)(r10 -8) = 0
+  1: (bf) r2 = r10
+  2: (07) r2 += -8
+  3: (b7) r1 = 0x0
+  4: (85) call 1
+  5: (7a) *(u64 *)(r0 +0) = 0
+  R0 invalid mem access 'map_value_or_null'
+
+Program that correctly checks map_lookup_elem() returned value for NULL, but
+accesses the memory with incorrect alignment:
+  BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+  BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+  BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+  BPF_LD_MAP_FD(BPF_REG_1, 0),
+  BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+  BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+  BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0),
+  BPF_EXIT_INSN(),
+Error:
+  0: (7a) *(u64 *)(r10 -8) = 0
+  1: (bf) r2 = r10
+  2: (07) r2 += -8
+  3: (b7) r1 = 1
+  4: (85) call 1
+  5: (15) if r0 == 0x0 goto pc+1
+   R0=map_ptr R10=fp
+  6: (7a) *(u64 *)(r0 +4) = 0
+  misaligned access off 4 size 8
+
+Program that correctly checks map_lookup_elem() returned value for NULL and
+accesses memory with correct alignment in one side of 'if' branch, but fails
+to do so in the other side of 'if' branch:
+  BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+  BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+  BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+  BPF_LD_MAP_FD(BPF_REG_1, 0),
+  BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+  BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+  BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+  BPF_EXIT_INSN(),
+  BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 1),
+  BPF_EXIT_INSN(),
+Error:
+  0: (7a) *(u64 *)(r10 -8) = 0
+  1: (bf) r2 = r10
+  2: (07) r2 += -8
+  3: (b7) r1 = 1
+  4: (85) call 1
+  5: (15) if r0 == 0x0 goto pc+2
+   R0=map_ptr R10=fp
+  6: (7a) *(u64 *)(r0 +0) = 0
+  7: (95) exit
+
+  from 5 to 8: R0=imm0 R10=fp
+  8: (7a) *(u64 *)(r0 +0) = 1
+  R0 invalid mem access 'imm'
+
 Testing
 -------
 
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 92979182be81..9dfeb36f8971 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -83,5 +83,7 @@ struct bpf_prog_aux {
 
 void bpf_prog_put(struct bpf_prog *prog);
 struct bpf_prog *bpf_prog_get(u32 ufd);
+/* verify correctness of eBPF program */
+int bpf_check(struct bpf_prog *fp, union bpf_attr *attr);
 
 #endif /* _LINUX_BPF_H */
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index e9f7334ed07a..3c726b0995b7 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -1 +1 @@
-obj-y := core.o syscall.o
+obj-y := core.o syscall.o verifier.o
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index b513659d120f..74b3628c5fdb 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -507,7 +507,7 @@ static int bpf_prog_load(union bpf_attr *attr)
 		goto free_prog;
 
 	/* run eBPF verifier */
-	/* err = bpf_check(prog, tb); */
+	err = bpf_check(prog, attr);
 
 	if (err < 0)
 		goto free_used_maps;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
new file mode 100644
index 000000000000..d6f9c3d6b4d7
--- /dev/null
+++ b/kernel/bpf/verifier.c
@@ -0,0 +1,133 @@
+/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/bpf.h>
+#include <linux/filter.h>
+#include <net/netlink.h>
+#include <linux/file.h>
+#include <linux/vmalloc.h>
+
+/* bpf_check() is a static code analyzer that walks eBPF program
+ * instruction by instruction and updates register/stack state.
+ * All paths of conditional branches are analyzed until 'bpf_exit' insn.
+ *
+ * The first pass is depth-first-search to check that the program is a DAG.
+ * It rejects the following programs:
+ * - larger than BPF_MAXINSNS insns
+ * - if loop is present (detected via back-edge)
+ * - unreachable insns exist (shouldn't be a forest. program = one function)
+ * - out of bounds or malformed jumps
+ * The second pass is all possible path descent from the 1st insn.
+ * Since it's analyzing all pathes through the program, the length of the
+ * analysis is limited to 32k insn, which may be hit even if total number of
+ * insn is less then 4K, but there are too many branches that change stack/regs.
+ * Number of 'branches to be analyzed' is limited to 1k
+ *
+ * On entry to each instruction, each register has a type, and the instruction
+ * changes the types of the registers depending on instruction semantics.
+ * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
+ * copied to R1.
+ *
+ * All registers are 64-bit.
+ * R0 - return register
+ * R1-R5 argument passing registers
+ * R6-R9 callee saved registers
+ * R10 - frame pointer read-only
+ *
+ * At the start of BPF program the register R1 contains a pointer to bpf_context
+ * and has type PTR_TO_CTX.
+ *
+ * Verifier tracks arithmetic operations on pointers in case:
+ *    BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
+ * 1st insn copies R10 (which has FRAME_PTR) type into R1
+ * and 2nd arithmetic instruction is pattern matched to recognize
+ * that it wants to construct a pointer to some element within stack.
+ * So after 2nd insn, the register R1 has type PTR_TO_STACK
+ * (and -20 constant is saved for further stack bounds checking).
+ * Meaning that this reg is a pointer to stack plus known immediate constant.
+ *
+ * Most of the time the registers have UNKNOWN_VALUE type, which
+ * means the register has some value, but it's not a valid pointer.
+ * (like pointer plus pointer becomes UNKNOWN_VALUE type)
+ *
+ * When verifier sees load or store instructions the type of base register
+ * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, FRAME_PTR. These are three pointer
+ * types recognized by check_mem_access() function.
+ *
+ * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
+ * and the range of [ptr, ptr + map's value_size) is accessible.
+ *
+ * registers used to pass values to function calls are checked against
+ * function argument constraints.
+ *
+ * ARG_PTR_TO_MAP_KEY is one of such argument constraints.
+ * It means that the register type passed to this function must be
+ * PTR_TO_STACK and it will be used inside the function as
+ * 'pointer to map element key'
+ *
+ * For example the argument constraints for bpf_map_lookup_elem():
+ *   .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
+ *   .arg1_type = ARG_CONST_MAP_PTR,
+ *   .arg2_type = ARG_PTR_TO_MAP_KEY,
+ *
+ * ret_type says that this function returns 'pointer to map elem value or null'
+ * function expects 1st argument to be a const pointer to 'struct bpf_map' and
+ * 2nd argument should be a pointer to stack, which will be used inside
+ * the helper function as a pointer to map element key.
+ *
+ * On the kernel side the helper function looks like:
+ * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+ * {
+ *    struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
+ *    void *key = (void *) (unsigned long) r2;
+ *    void *value;
+ *
+ *    here kernel can access 'key' and 'map' pointers safely, knowing that
+ *    [key, key + map->key_size) bytes are valid and were initialized on
+ *    the stack of eBPF program.
+ * }
+ *
+ * Corresponding eBPF program may look like:
+ *    BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),  // after this insn R2 type is FRAME_PTR
+ *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
+ *    BPF_LD_MAP_FD(BPF_REG_1, map_fd),      // after this insn R1 type is CONST_PTR_TO_MAP
+ *    BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ * here verifier looks at prototype of map_lookup_elem() and sees:
+ * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
+ * Now verifier knows that this map has key of R1->map_ptr->key_size bytes
+ *
+ * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
+ * Now verifier checks that [R2, R2 + map's key_size) are within stack limits
+ * and were initialized prior to this call.
+ * If it's ok, then verifier allows this BPF_CALL insn and looks at
+ * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
+ * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
+ * returns ether pointer to map value or NULL.
+ *
+ * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
+ * insn, the register holding that pointer in the true branch changes state to
+ * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
+ * branch. See check_cond_jmp_op().
+ *
+ * After the call R0 is set to return type of the function and registers R1-R5
+ * are set to NOT_INIT to indicate that they are no longer readable.
+ */
+
+int bpf_check(struct bpf_prog *prog, union bpf_attr *attr)
+{
+	int ret = -EINVAL;
+
+	return ret;
+}
-- 
cgit v1.2.3


From 0246e64d9a5fcd4805198de59b9b5cf1f974eb41 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Fri, 26 Sep 2014 00:17:04 -0700
Subject: bpf: handle pseudo BPF_LD_IMM64 insn

eBPF programs passed from userspace are using pseudo BPF_LD_IMM64 instructions
to refer to process-local map_fd. Scan the program for such instructions and
if FDs are valid, convert them to 'struct bpf_map' pointers which will be used
by verifier to check access to maps in bpf_map_lookup/update() calls.
If program passes verifier, convert pseudo BPF_LD_IMM64 into generic by dropping
BPF_PSEUDO_MAP_FD flag.

Note that eBPF interpreter is generic and knows nothing about pseudo insns.

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h |   6 ++
 kernel/bpf/verifier.c  | 147 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 153 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 4ffc0958d85e..ca95abd2bed1 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -145,6 +145,12 @@ struct bpf_prog_aux;
 		.off   = 0,					\
 		.imm   = ((__u64) (IMM)) >> 32 })
 
+#define BPF_PSEUDO_MAP_FD	1
+
+/* pseudo BPF_LD_IMM64 insn used to refer to process-local map_fd */
+#define BPF_LD_MAP_FD(DST, MAP_FD)				\
+	BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD)
+
 /* Short form of mov based on type, BPF_X: dst_reg = src_reg, BPF_K: dst_reg = imm32 */
 
 #define BPF_MOV64_RAW(TYPE, DST, SRC, IMM)			\
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 871edc1f2e1f..7227543e474b 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -125,10 +125,15 @@
  * are set to NOT_INIT to indicate that they are no longer readable.
  */
 
+#define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
+
 /* single container for all structs
  * one verifier_env per bpf_check() call
  */
 struct verifier_env {
+	struct bpf_prog *prog;		/* eBPF program being verified */
+	struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */
+	u32 used_map_cnt;		/* number of used maps */
 };
 
 /* verbose verifier prints what it's seeing
@@ -300,6 +305,115 @@ static void print_bpf_insn(struct bpf_insn *insn)
 	}
 }
 
+/* return the map pointer stored inside BPF_LD_IMM64 instruction */
+static struct bpf_map *ld_imm64_to_map_ptr(struct bpf_insn *insn)
+{
+	u64 imm64 = ((u64) (u32) insn[0].imm) | ((u64) (u32) insn[1].imm) << 32;
+
+	return (struct bpf_map *) (unsigned long) imm64;
+}
+
+/* look for pseudo eBPF instructions that access map FDs and
+ * replace them with actual map pointers
+ */
+static int replace_map_fd_with_map_ptr(struct verifier_env *env)
+{
+	struct bpf_insn *insn = env->prog->insnsi;
+	int insn_cnt = env->prog->len;
+	int i, j;
+
+	for (i = 0; i < insn_cnt; i++, insn++) {
+		if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
+			struct bpf_map *map;
+			struct fd f;
+
+			if (i == insn_cnt - 1 || insn[1].code != 0 ||
+			    insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
+			    insn[1].off != 0) {
+				verbose("invalid bpf_ld_imm64 insn\n");
+				return -EINVAL;
+			}
+
+			if (insn->src_reg == 0)
+				/* valid generic load 64-bit imm */
+				goto next_insn;
+
+			if (insn->src_reg != BPF_PSEUDO_MAP_FD) {
+				verbose("unrecognized bpf_ld_imm64 insn\n");
+				return -EINVAL;
+			}
+
+			f = fdget(insn->imm);
+
+			map = bpf_map_get(f);
+			if (IS_ERR(map)) {
+				verbose("fd %d is not pointing to valid bpf_map\n",
+					insn->imm);
+				fdput(f);
+				return PTR_ERR(map);
+			}
+
+			/* store map pointer inside BPF_LD_IMM64 instruction */
+			insn[0].imm = (u32) (unsigned long) map;
+			insn[1].imm = ((u64) (unsigned long) map) >> 32;
+
+			/* check whether we recorded this map already */
+			for (j = 0; j < env->used_map_cnt; j++)
+				if (env->used_maps[j] == map) {
+					fdput(f);
+					goto next_insn;
+				}
+
+			if (env->used_map_cnt >= MAX_USED_MAPS) {
+				fdput(f);
+				return -E2BIG;
+			}
+
+			/* remember this map */
+			env->used_maps[env->used_map_cnt++] = map;
+
+			/* hold the map. If the program is rejected by verifier,
+			 * the map will be released by release_maps() or it
+			 * will be used by the valid program until it's unloaded
+			 * and all maps are released in free_bpf_prog_info()
+			 */
+			atomic_inc(&map->refcnt);
+
+			fdput(f);
+next_insn:
+			insn++;
+			i++;
+		}
+	}
+
+	/* now all pseudo BPF_LD_IMM64 instructions load valid
+	 * 'struct bpf_map *' into a register instead of user map_fd.
+	 * These pointers will be used later by verifier to validate map access.
+	 */
+	return 0;
+}
+
+/* drop refcnt of maps used by the rejected program */
+static void release_maps(struct verifier_env *env)
+{
+	int i;
+
+	for (i = 0; i < env->used_map_cnt; i++)
+		bpf_map_put(env->used_maps[i]);
+}
+
+/* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
+static void convert_pseudo_ld_imm64(struct verifier_env *env)
+{
+	struct bpf_insn *insn = env->prog->insnsi;
+	int insn_cnt = env->prog->len;
+	int i;
+
+	for (i = 0; i < insn_cnt; i++, insn++)
+		if (insn->code == (BPF_LD | BPF_IMM | BPF_DW))
+			insn->src_reg = 0;
+}
+
 int bpf_check(struct bpf_prog *prog, union bpf_attr *attr)
 {
 	char __user *log_ubuf = NULL;
@@ -316,6 +430,8 @@ int bpf_check(struct bpf_prog *prog, union bpf_attr *attr)
 	if (!env)
 		return -ENOMEM;
 
+	env->prog = prog;
+
 	/* grab the mutex to protect few globals used by verifier */
 	mutex_lock(&bpf_verifier_lock);
 
@@ -342,8 +458,14 @@ int bpf_check(struct bpf_prog *prog, union bpf_attr *attr)
 		log_level = 0;
 	}
 
+	ret = replace_map_fd_with_map_ptr(env);
+	if (ret < 0)
+		goto skip_full_check;
+
 	/* ret = do_check(env); */
 
+skip_full_check:
+
 	if (log_level && log_len >= log_size - 1) {
 		BUG_ON(log_len >= log_size);
 		/* verifier log exceeded user supplied buffer */
@@ -357,11 +479,36 @@ int bpf_check(struct bpf_prog *prog, union bpf_attr *attr)
 		goto free_log_buf;
 	}
 
+	if (ret == 0 && env->used_map_cnt) {
+		/* if program passed verifier, update used_maps in bpf_prog_info */
+		prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
+						     sizeof(env->used_maps[0]),
+						     GFP_KERNEL);
+
+		if (!prog->aux->used_maps) {
+			ret = -ENOMEM;
+			goto free_log_buf;
+		}
+
+		memcpy(prog->aux->used_maps, env->used_maps,
+		       sizeof(env->used_maps[0]) * env->used_map_cnt);
+		prog->aux->used_map_cnt = env->used_map_cnt;
+
+		/* program is valid. Convert pseudo bpf_ld_imm64 into generic
+		 * bpf_ld_imm64 instructions
+		 */
+		convert_pseudo_ld_imm64(env);
+	}
 
 free_log_buf:
 	if (log_level)
 		vfree(log_buf);
 free_env:
+	if (!prog->aux->used_maps)
+		/* if we didn't copy map pointers into bpf_prog_info, release
+		 * them now. Otherwise free_bpf_prog_info() will release them.
+		 */
+		release_maps(env);
 	kfree(env);
 	mutex_unlock(&bpf_verifier_lock);
 	return ret;
-- 
cgit v1.2.3


From 17a5267067f3c372fec9ffb798d6eaba6b5e6a4c Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Fri, 26 Sep 2014 00:17:06 -0700
Subject: bpf: verifier (add verifier core)

This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6

This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields

Kernel subsystem configures the verifier with two callbacks:

- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
  that provides information to the verifer which fields of 'ctx'
  are accessible (remember 'ctx' is the first argument to eBPF program)

- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
  returns argument constraints of kernel helper functions that eBPF program
  may call, so that verifier can checks that R1-R5 types match the prototype

More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h   |   47 +++
 kernel/bpf/verifier.c | 1075 ++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 1121 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 9dfeb36f8971..3cf91754a957 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -46,6 +46,31 @@ void bpf_register_map_type(struct bpf_map_type_list *tl);
 void bpf_map_put(struct bpf_map *map);
 struct bpf_map *bpf_map_get(struct fd f);
 
+/* function argument constraints */
+enum bpf_arg_type {
+	ARG_ANYTHING = 0,	/* any argument is ok */
+
+	/* the following constraints used to prototype
+	 * bpf_map_lookup/update/delete_elem() functions
+	 */
+	ARG_CONST_MAP_PTR,	/* const argument used as pointer to bpf_map */
+	ARG_PTR_TO_MAP_KEY,	/* pointer to stack used as map key */
+	ARG_PTR_TO_MAP_VALUE,	/* pointer to stack used as map value */
+
+	/* the following constraints used to prototype bpf_memcmp() and other
+	 * functions that access data on eBPF program stack
+	 */
+	ARG_PTR_TO_STACK,	/* any pointer to eBPF program stack */
+	ARG_CONST_STACK_SIZE,	/* number of bytes accessed from stack */
+};
+
+/* type of values returned from helper functions */
+enum bpf_return_type {
+	RET_INTEGER,			/* function returns integer */
+	RET_VOID,			/* function doesn't return anything */
+	RET_PTR_TO_MAP_VALUE_OR_NULL,	/* returns a pointer to map elem value or NULL */
+};
+
 /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs
  * to in-kernel helper functions and for adjusting imm32 field in BPF_CALL
  * instructions after verifying
@@ -53,11 +78,33 @@ struct bpf_map *bpf_map_get(struct fd f);
 struct bpf_func_proto {
 	u64 (*func)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
 	bool gpl_only;
+	enum bpf_return_type ret_type;
+	enum bpf_arg_type arg1_type;
+	enum bpf_arg_type arg2_type;
+	enum bpf_arg_type arg3_type;
+	enum bpf_arg_type arg4_type;
+	enum bpf_arg_type arg5_type;
+};
+
+/* bpf_context is intentionally undefined structure. Pointer to bpf_context is
+ * the first argument to eBPF programs.
+ * For socket filters: 'struct bpf_context *' == 'struct sk_buff *'
+ */
+struct bpf_context;
+
+enum bpf_access_type {
+	BPF_READ = 1,
+	BPF_WRITE = 2
 };
 
 struct bpf_verifier_ops {
 	/* return eBPF function prototype for verification */
 	const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
+
+	/* return true if 'size' wide access at offset 'off' within bpf_context
+	 * with 'type' (read or write) is allowed
+	 */
+	bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
 };
 
 struct bpf_prog_type_list {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index c689ab8e2713..a086dd3210a8 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -125,6 +125,70 @@
  * are set to NOT_INIT to indicate that they are no longer readable.
  */
 
+/* types of values stored in eBPF registers */
+enum bpf_reg_type {
+	NOT_INIT = 0,		 /* nothing was written into register */
+	UNKNOWN_VALUE,		 /* reg doesn't contain a valid pointer */
+	PTR_TO_CTX,		 /* reg points to bpf_context */
+	CONST_PTR_TO_MAP,	 /* reg points to struct bpf_map */
+	PTR_TO_MAP_VALUE,	 /* reg points to map element value */
+	PTR_TO_MAP_VALUE_OR_NULL,/* points to map elem value or NULL */
+	FRAME_PTR,		 /* reg == frame_pointer */
+	PTR_TO_STACK,		 /* reg == frame_pointer + imm */
+	CONST_IMM,		 /* constant integer value */
+};
+
+struct reg_state {
+	enum bpf_reg_type type;
+	union {
+		/* valid when type == CONST_IMM | PTR_TO_STACK */
+		int imm;
+
+		/* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE |
+		 *   PTR_TO_MAP_VALUE_OR_NULL
+		 */
+		struct bpf_map *map_ptr;
+	};
+};
+
+enum bpf_stack_slot_type {
+	STACK_INVALID,    /* nothing was stored in this stack slot */
+	STACK_SPILL,      /* 1st byte of register spilled into stack */
+	STACK_SPILL_PART, /* other 7 bytes of register spill */
+	STACK_MISC	  /* BPF program wrote some data into this slot */
+};
+
+struct bpf_stack_slot {
+	enum bpf_stack_slot_type stype;
+	struct reg_state reg_st;
+};
+
+/* state of the program:
+ * type of all registers and stack info
+ */
+struct verifier_state {
+	struct reg_state regs[MAX_BPF_REG];
+	struct bpf_stack_slot stack[MAX_BPF_STACK];
+};
+
+/* linked list of verifier states used to prune search */
+struct verifier_state_list {
+	struct verifier_state state;
+	struct verifier_state_list *next;
+};
+
+/* verifier_state + insn_idx are pushed to stack when branch is encountered */
+struct verifier_stack_elem {
+	/* verifer state is 'st'
+	 * before processing instruction 'insn_idx'
+	 * and after processing instruction 'prev_insn_idx'
+	 */
+	struct verifier_state st;
+	int insn_idx;
+	int prev_insn_idx;
+	struct verifier_stack_elem *next;
+};
+
 #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
 
 /* single container for all structs
@@ -132,6 +196,9 @@
  */
 struct verifier_env {
 	struct bpf_prog *prog;		/* eBPF program being verified */
+	struct verifier_stack_elem *head; /* stack of verifier states to be processed */
+	int stack_size;			/* number of states to be processed */
+	struct verifier_state cur_state; /* current verifier state */
 	struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */
 	u32 used_map_cnt;		/* number of used maps */
 };
@@ -160,6 +227,45 @@ static void verbose(const char *fmt, ...)
 	va_end(args);
 }
 
+/* string representation of 'enum bpf_reg_type' */
+static const char * const reg_type_str[] = {
+	[NOT_INIT]		= "?",
+	[UNKNOWN_VALUE]		= "inv",
+	[PTR_TO_CTX]		= "ctx",
+	[CONST_PTR_TO_MAP]	= "map_ptr",
+	[PTR_TO_MAP_VALUE]	= "map_value",
+	[PTR_TO_MAP_VALUE_OR_NULL] = "map_value_or_null",
+	[FRAME_PTR]		= "fp",
+	[PTR_TO_STACK]		= "fp",
+	[CONST_IMM]		= "imm",
+};
+
+static void print_verifier_state(struct verifier_env *env)
+{
+	enum bpf_reg_type t;
+	int i;
+
+	for (i = 0; i < MAX_BPF_REG; i++) {
+		t = env->cur_state.regs[i].type;
+		if (t == NOT_INIT)
+			continue;
+		verbose(" R%d=%s", i, reg_type_str[t]);
+		if (t == CONST_IMM || t == PTR_TO_STACK)
+			verbose("%d", env->cur_state.regs[i].imm);
+		else if (t == CONST_PTR_TO_MAP || t == PTR_TO_MAP_VALUE ||
+			 t == PTR_TO_MAP_VALUE_OR_NULL)
+			verbose("(ks=%d,vs=%d)",
+				env->cur_state.regs[i].map_ptr->key_size,
+				env->cur_state.regs[i].map_ptr->value_size);
+	}
+	for (i = 0; i < MAX_BPF_STACK; i++) {
+		if (env->cur_state.stack[i].stype == STACK_SPILL)
+			verbose(" fp%d=%s", -MAX_BPF_STACK + i,
+				reg_type_str[env->cur_state.stack[i].reg_st.type]);
+	}
+	verbose("\n");
+}
+
 static const char *const bpf_class_string[] = {
 	[BPF_LD]    = "ld",
 	[BPF_LDX]   = "ldx",
@@ -305,6 +411,735 @@ static void print_bpf_insn(struct bpf_insn *insn)
 	}
 }
 
+static int pop_stack(struct verifier_env *env, int *prev_insn_idx)
+{
+	struct verifier_stack_elem *elem;
+	int insn_idx;
+
+	if (env->head == NULL)
+		return -1;
+
+	memcpy(&env->cur_state, &env->head->st, sizeof(env->cur_state));
+	insn_idx = env->head->insn_idx;
+	if (prev_insn_idx)
+		*prev_insn_idx = env->head->prev_insn_idx;
+	elem = env->head->next;
+	kfree(env->head);
+	env->head = elem;
+	env->stack_size--;
+	return insn_idx;
+}
+
+static struct verifier_state *push_stack(struct verifier_env *env, int insn_idx,
+					 int prev_insn_idx)
+{
+	struct verifier_stack_elem *elem;
+
+	elem = kmalloc(sizeof(struct verifier_stack_elem), GFP_KERNEL);
+	if (!elem)
+		goto err;
+
+	memcpy(&elem->st, &env->cur_state, sizeof(env->cur_state));
+	elem->insn_idx = insn_idx;
+	elem->prev_insn_idx = prev_insn_idx;
+	elem->next = env->head;
+	env->head = elem;
+	env->stack_size++;
+	if (env->stack_size > 1024) {
+		verbose("BPF program is too complex\n");
+		goto err;
+	}
+	return &elem->st;
+err:
+	/* pop all elements and return */
+	while (pop_stack(env, NULL) >= 0);
+	return NULL;
+}
+
+#define CALLER_SAVED_REGS 6
+static const int caller_saved[CALLER_SAVED_REGS] = {
+	BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
+};
+
+static void init_reg_state(struct reg_state *regs)
+{
+	int i;
+
+	for (i = 0; i < MAX_BPF_REG; i++) {
+		regs[i].type = NOT_INIT;
+		regs[i].imm = 0;
+		regs[i].map_ptr = NULL;
+	}
+
+	/* frame pointer */
+	regs[BPF_REG_FP].type = FRAME_PTR;
+
+	/* 1st arg to a function */
+	regs[BPF_REG_1].type = PTR_TO_CTX;
+}
+
+static void mark_reg_unknown_value(struct reg_state *regs, u32 regno)
+{
+	BUG_ON(regno >= MAX_BPF_REG);
+	regs[regno].type = UNKNOWN_VALUE;
+	regs[regno].imm = 0;
+	regs[regno].map_ptr = NULL;
+}
+
+enum reg_arg_type {
+	SRC_OP,		/* register is used as source operand */
+	DST_OP,		/* register is used as destination operand */
+	DST_OP_NO_MARK	/* same as above, check only, don't mark */
+};
+
+static int check_reg_arg(struct reg_state *regs, u32 regno,
+			 enum reg_arg_type t)
+{
+	if (regno >= MAX_BPF_REG) {
+		verbose("R%d is invalid\n", regno);
+		return -EINVAL;
+	}
+
+	if (t == SRC_OP) {
+		/* check whether register used as source operand can be read */
+		if (regs[regno].type == NOT_INIT) {
+			verbose("R%d !read_ok\n", regno);
+			return -EACCES;
+		}
+	} else {
+		/* check whether register used as dest operand can be written to */
+		if (regno == BPF_REG_FP) {
+			verbose("frame pointer is read only\n");
+			return -EACCES;
+		}
+		if (t == DST_OP)
+			mark_reg_unknown_value(regs, regno);
+	}
+	return 0;
+}
+
+static int bpf_size_to_bytes(int bpf_size)
+{
+	if (bpf_size == BPF_W)
+		return 4;
+	else if (bpf_size == BPF_H)
+		return 2;
+	else if (bpf_size == BPF_B)
+		return 1;
+	else if (bpf_size == BPF_DW)
+		return 8;
+	else
+		return -EINVAL;
+}
+
+/* check_stack_read/write functions track spill/fill of registers,
+ * stack boundary and alignment are checked in check_mem_access()
+ */
+static int check_stack_write(struct verifier_state *state, int off, int size,
+			     int value_regno)
+{
+	struct bpf_stack_slot *slot;
+	int i;
+
+	if (value_regno >= 0 &&
+	    (state->regs[value_regno].type == PTR_TO_MAP_VALUE ||
+	     state->regs[value_regno].type == PTR_TO_STACK ||
+	     state->regs[value_regno].type == PTR_TO_CTX)) {
+
+		/* register containing pointer is being spilled into stack */
+		if (size != 8) {
+			verbose("invalid size of register spill\n");
+			return -EACCES;
+		}
+
+		slot = &state->stack[MAX_BPF_STACK + off];
+		slot->stype = STACK_SPILL;
+		/* save register state */
+		slot->reg_st = state->regs[value_regno];
+		for (i = 1; i < 8; i++) {
+			slot = &state->stack[MAX_BPF_STACK + off + i];
+			slot->stype = STACK_SPILL_PART;
+			slot->reg_st.type = UNKNOWN_VALUE;
+			slot->reg_st.map_ptr = NULL;
+		}
+	} else {
+
+		/* regular write of data into stack */
+		for (i = 0; i < size; i++) {
+			slot = &state->stack[MAX_BPF_STACK + off + i];
+			slot->stype = STACK_MISC;
+			slot->reg_st.type = UNKNOWN_VALUE;
+			slot->reg_st.map_ptr = NULL;
+		}
+	}
+	return 0;
+}
+
+static int check_stack_read(struct verifier_state *state, int off, int size,
+			    int value_regno)
+{
+	int i;
+	struct bpf_stack_slot *slot;
+
+	slot = &state->stack[MAX_BPF_STACK + off];
+
+	if (slot->stype == STACK_SPILL) {
+		if (size != 8) {
+			verbose("invalid size of register spill\n");
+			return -EACCES;
+		}
+		for (i = 1; i < 8; i++) {
+			if (state->stack[MAX_BPF_STACK + off + i].stype !=
+			    STACK_SPILL_PART) {
+				verbose("corrupted spill memory\n");
+				return -EACCES;
+			}
+		}
+
+		if (value_regno >= 0)
+			/* restore register state from stack */
+			state->regs[value_regno] = slot->reg_st;
+		return 0;
+	} else {
+		for (i = 0; i < size; i++) {
+			if (state->stack[MAX_BPF_STACK + off + i].stype !=
+			    STACK_MISC) {
+				verbose("invalid read from stack off %d+%d size %d\n",
+					off, i, size);
+				return -EACCES;
+			}
+		}
+		if (value_regno >= 0)
+			/* have read misc data from the stack */
+			mark_reg_unknown_value(state->regs, value_regno);
+		return 0;
+	}
+}
+
+/* check read/write into map element returned by bpf_map_lookup_elem() */
+static int check_map_access(struct verifier_env *env, u32 regno, int off,
+			    int size)
+{
+	struct bpf_map *map = env->cur_state.regs[regno].map_ptr;
+
+	if (off < 0 || off + size > map->value_size) {
+		verbose("invalid access to map value, value_size=%d off=%d size=%d\n",
+			map->value_size, off, size);
+		return -EACCES;
+	}
+	return 0;
+}
+
+/* check access to 'struct bpf_context' fields */
+static int check_ctx_access(struct verifier_env *env, int off, int size,
+			    enum bpf_access_type t)
+{
+	if (env->prog->aux->ops->is_valid_access &&
+	    env->prog->aux->ops->is_valid_access(off, size, t))
+		return 0;
+
+	verbose("invalid bpf_context access off=%d size=%d\n", off, size);
+	return -EACCES;
+}
+
+/* check whether memory at (regno + off) is accessible for t = (read | write)
+ * if t==write, value_regno is a register which value is stored into memory
+ * if t==read, value_regno is a register which will receive the value from memory
+ * if t==write && value_regno==-1, some unknown value is stored into memory
+ * if t==read && value_regno==-1, don't care what we read from memory
+ */
+static int check_mem_access(struct verifier_env *env, u32 regno, int off,
+			    int bpf_size, enum bpf_access_type t,
+			    int value_regno)
+{
+	struct verifier_state *state = &env->cur_state;
+	int size, err = 0;
+
+	size = bpf_size_to_bytes(bpf_size);
+	if (size < 0)
+		return size;
+
+	if (off % size != 0) {
+		verbose("misaligned access off %d size %d\n", off, size);
+		return -EACCES;
+	}
+
+	if (state->regs[regno].type == PTR_TO_MAP_VALUE) {
+		err = check_map_access(env, regno, off, size);
+		if (!err && t == BPF_READ && value_regno >= 0)
+			mark_reg_unknown_value(state->regs, value_regno);
+
+	} else if (state->regs[regno].type == PTR_TO_CTX) {
+		err = check_ctx_access(env, off, size, t);
+		if (!err && t == BPF_READ && value_regno >= 0)
+			mark_reg_unknown_value(state->regs, value_regno);
+
+	} else if (state->regs[regno].type == FRAME_PTR) {
+		if (off >= 0 || off < -MAX_BPF_STACK) {
+			verbose("invalid stack off=%d size=%d\n", off, size);
+			return -EACCES;
+		}
+		if (t == BPF_WRITE)
+			err = check_stack_write(state, off, size, value_regno);
+		else
+			err = check_stack_read(state, off, size, value_regno);
+	} else {
+		verbose("R%d invalid mem access '%s'\n",
+			regno, reg_type_str[state->regs[regno].type]);
+		return -EACCES;
+	}
+	return err;
+}
+
+static int check_xadd(struct verifier_env *env, struct bpf_insn *insn)
+{
+	struct reg_state *regs = env->cur_state.regs;
+	int err;
+
+	if ((BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) ||
+	    insn->imm != 0) {
+		verbose("BPF_XADD uses reserved fields\n");
+		return -EINVAL;
+	}
+
+	/* check src1 operand */
+	err = check_reg_arg(regs, insn->src_reg, SRC_OP);
+	if (err)
+		return err;
+
+	/* check src2 operand */
+	err = check_reg_arg(regs, insn->dst_reg, SRC_OP);
+	if (err)
+		return err;
+
+	/* check whether atomic_add can read the memory */
+	err = check_mem_access(env, insn->dst_reg, insn->off,
+			       BPF_SIZE(insn->code), BPF_READ, -1);
+	if (err)
+		return err;
+
+	/* check whether atomic_add can write into the same memory */
+	return check_mem_access(env, insn->dst_reg, insn->off,
+				BPF_SIZE(insn->code), BPF_WRITE, -1);
+}
+
+/* when register 'regno' is passed into function that will read 'access_size'
+ * bytes from that pointer, make sure that it's within stack boundary
+ * and all elements of stack are initialized
+ */
+static int check_stack_boundary(struct verifier_env *env,
+				int regno, int access_size)
+{
+	struct verifier_state *state = &env->cur_state;
+	struct reg_state *regs = state->regs;
+	int off, i;
+
+	if (regs[regno].type != PTR_TO_STACK)
+		return -EACCES;
+
+	off = regs[regno].imm;
+	if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
+	    access_size <= 0) {
+		verbose("invalid stack type R%d off=%d access_size=%d\n",
+			regno, off, access_size);
+		return -EACCES;
+	}
+
+	for (i = 0; i < access_size; i++) {
+		if (state->stack[MAX_BPF_STACK + off + i].stype != STACK_MISC) {
+			verbose("invalid indirect read from stack off %d+%d size %d\n",
+				off, i, access_size);
+			return -EACCES;
+		}
+	}
+	return 0;
+}
+
+static int check_func_arg(struct verifier_env *env, u32 regno,
+			  enum bpf_arg_type arg_type, struct bpf_map **mapp)
+{
+	struct reg_state *reg = env->cur_state.regs + regno;
+	enum bpf_reg_type expected_type;
+	int err = 0;
+
+	if (arg_type == ARG_ANYTHING)
+		return 0;
+
+	if (reg->type == NOT_INIT) {
+		verbose("R%d !read_ok\n", regno);
+		return -EACCES;
+	}
+
+	if (arg_type == ARG_PTR_TO_STACK || arg_type == ARG_PTR_TO_MAP_KEY ||
+	    arg_type == ARG_PTR_TO_MAP_VALUE) {
+		expected_type = PTR_TO_STACK;
+	} else if (arg_type == ARG_CONST_STACK_SIZE) {
+		expected_type = CONST_IMM;
+	} else if (arg_type == ARG_CONST_MAP_PTR) {
+		expected_type = CONST_PTR_TO_MAP;
+	} else {
+		verbose("unsupported arg_type %d\n", arg_type);
+		return -EFAULT;
+	}
+
+	if (reg->type != expected_type) {
+		verbose("R%d type=%s expected=%s\n", regno,
+			reg_type_str[reg->type], reg_type_str[expected_type]);
+		return -EACCES;
+	}
+
+	if (arg_type == ARG_CONST_MAP_PTR) {
+		/* bpf_map_xxx(map_ptr) call: remember that map_ptr */
+		*mapp = reg->map_ptr;
+
+	} else if (arg_type == ARG_PTR_TO_MAP_KEY) {
+		/* bpf_map_xxx(..., map_ptr, ..., key) call:
+		 * check that [key, key + map->key_size) are within
+		 * stack limits and initialized
+		 */
+		if (!*mapp) {
+			/* in function declaration map_ptr must come before
+			 * map_key, so that it's verified and known before
+			 * we have to check map_key here. Otherwise it means
+			 * that kernel subsystem misconfigured verifier
+			 */
+			verbose("invalid map_ptr to access map->key\n");
+			return -EACCES;
+		}
+		err = check_stack_boundary(env, regno, (*mapp)->key_size);
+
+	} else if (arg_type == ARG_PTR_TO_MAP_VALUE) {
+		/* bpf_map_xxx(..., map_ptr, ..., value) call:
+		 * check [value, value + map->value_size) validity
+		 */
+		if (!*mapp) {
+			/* kernel subsystem misconfigured verifier */
+			verbose("invalid map_ptr to access map->value\n");
+			return -EACCES;
+		}
+		err = check_stack_boundary(env, regno, (*mapp)->value_size);
+
+	} else if (arg_type == ARG_CONST_STACK_SIZE) {
+		/* bpf_xxx(..., buf, len) call will access 'len' bytes
+		 * from stack pointer 'buf'. Check it
+		 * note: regno == len, regno - 1 == buf
+		 */
+		if (regno == 0) {
+			/* kernel subsystem misconfigured verifier */
+			verbose("ARG_CONST_STACK_SIZE cannot be first argument\n");
+			return -EACCES;
+		}
+		err = check_stack_boundary(env, regno - 1, reg->imm);
+	}
+
+	return err;
+}
+
+static int check_call(struct verifier_env *env, int func_id)
+{
+	struct verifier_state *state = &env->cur_state;
+	const struct bpf_func_proto *fn = NULL;
+	struct reg_state *regs = state->regs;
+	struct bpf_map *map = NULL;
+	struct reg_state *reg;
+	int i, err;
+
+	/* find function prototype */
+	if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) {
+		verbose("invalid func %d\n", func_id);
+		return -EINVAL;
+	}
+
+	if (env->prog->aux->ops->get_func_proto)
+		fn = env->prog->aux->ops->get_func_proto(func_id);
+
+	if (!fn) {
+		verbose("unknown func %d\n", func_id);
+		return -EINVAL;
+	}
+
+	/* eBPF programs must be GPL compatible to use GPL-ed functions */
+	if (!env->prog->aux->is_gpl_compatible && fn->gpl_only) {
+		verbose("cannot call GPL only function from proprietary program\n");
+		return -EINVAL;
+	}
+
+	/* check args */
+	err = check_func_arg(env, BPF_REG_1, fn->arg1_type, &map);
+	if (err)
+		return err;
+	err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &map);
+	if (err)
+		return err;
+	err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &map);
+	if (err)
+		return err;
+	err = check_func_arg(env, BPF_REG_4, fn->arg4_type, &map);
+	if (err)
+		return err;
+	err = check_func_arg(env, BPF_REG_5, fn->arg5_type, &map);
+	if (err)
+		return err;
+
+	/* reset caller saved regs */
+	for (i = 0; i < CALLER_SAVED_REGS; i++) {
+		reg = regs + caller_saved[i];
+		reg->type = NOT_INIT;
+		reg->imm = 0;
+	}
+
+	/* update return register */
+	if (fn->ret_type == RET_INTEGER) {
+		regs[BPF_REG_0].type = UNKNOWN_VALUE;
+	} else if (fn->ret_type == RET_VOID) {
+		regs[BPF_REG_0].type = NOT_INIT;
+	} else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL) {
+		regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
+		/* remember map_ptr, so that check_map_access()
+		 * can check 'value_size' boundary of memory access
+		 * to map element returned from bpf_map_lookup_elem()
+		 */
+		if (map == NULL) {
+			verbose("kernel subsystem misconfigured verifier\n");
+			return -EINVAL;
+		}
+		regs[BPF_REG_0].map_ptr = map;
+	} else {
+		verbose("unknown return type %d of func %d\n",
+			fn->ret_type, func_id);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/* check validity of 32-bit and 64-bit arithmetic operations */
+static int check_alu_op(struct reg_state *regs, struct bpf_insn *insn)
+{
+	u8 opcode = BPF_OP(insn->code);
+	int err;
+
+	if (opcode == BPF_END || opcode == BPF_NEG) {
+		if (opcode == BPF_NEG) {
+			if (BPF_SRC(insn->code) != 0 ||
+			    insn->src_reg != BPF_REG_0 ||
+			    insn->off != 0 || insn->imm != 0) {
+				verbose("BPF_NEG uses reserved fields\n");
+				return -EINVAL;
+			}
+		} else {
+			if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
+			    (insn->imm != 16 && insn->imm != 32 && insn->imm != 64)) {
+				verbose("BPF_END uses reserved fields\n");
+				return -EINVAL;
+			}
+		}
+
+		/* check src operand */
+		err = check_reg_arg(regs, insn->dst_reg, SRC_OP);
+		if (err)
+			return err;
+
+		/* check dest operand */
+		err = check_reg_arg(regs, insn->dst_reg, DST_OP);
+		if (err)
+			return err;
+
+	} else if (opcode == BPF_MOV) {
+
+		if (BPF_SRC(insn->code) == BPF_X) {
+			if (insn->imm != 0 || insn->off != 0) {
+				verbose("BPF_MOV uses reserved fields\n");
+				return -EINVAL;
+			}
+
+			/* check src operand */
+			err = check_reg_arg(regs, insn->src_reg, SRC_OP);
+			if (err)
+				return err;
+		} else {
+			if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
+				verbose("BPF_MOV uses reserved fields\n");
+				return -EINVAL;
+			}
+		}
+
+		/* check dest operand */
+		err = check_reg_arg(regs, insn->dst_reg, DST_OP);
+		if (err)
+			return err;
+
+		if (BPF_SRC(insn->code) == BPF_X) {
+			if (BPF_CLASS(insn->code) == BPF_ALU64) {
+				/* case: R1 = R2
+				 * copy register state to dest reg
+				 */
+				regs[insn->dst_reg] = regs[insn->src_reg];
+			} else {
+				regs[insn->dst_reg].type = UNKNOWN_VALUE;
+				regs[insn->dst_reg].map_ptr = NULL;
+			}
+		} else {
+			/* case: R = imm
+			 * remember the value we stored into this reg
+			 */
+			regs[insn->dst_reg].type = CONST_IMM;
+			regs[insn->dst_reg].imm = insn->imm;
+		}
+
+	} else if (opcode > BPF_END) {
+		verbose("invalid BPF_ALU opcode %x\n", opcode);
+		return -EINVAL;
+
+	} else {	/* all other ALU ops: and, sub, xor, add, ... */
+
+		bool stack_relative = false;
+
+		if (BPF_SRC(insn->code) == BPF_X) {
+			if (insn->imm != 0 || insn->off != 0) {
+				verbose("BPF_ALU uses reserved fields\n");
+				return -EINVAL;
+			}
+			/* check src1 operand */
+			err = check_reg_arg(regs, insn->src_reg, SRC_OP);
+			if (err)
+				return err;
+		} else {
+			if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
+				verbose("BPF_ALU uses reserved fields\n");
+				return -EINVAL;
+			}
+		}
+
+		/* check src2 operand */
+		err = check_reg_arg(regs, insn->dst_reg, SRC_OP);
+		if (err)
+			return err;
+
+		if ((opcode == BPF_MOD || opcode == BPF_DIV) &&
+		    BPF_SRC(insn->code) == BPF_K && insn->imm == 0) {
+			verbose("div by zero\n");
+			return -EINVAL;
+		}
+
+		/* pattern match 'bpf_add Rx, imm' instruction */
+		if (opcode == BPF_ADD && BPF_CLASS(insn->code) == BPF_ALU64 &&
+		    regs[insn->dst_reg].type == FRAME_PTR &&
+		    BPF_SRC(insn->code) == BPF_K)
+			stack_relative = true;
+
+		/* check dest operand */
+		err = check_reg_arg(regs, insn->dst_reg, DST_OP);
+		if (err)
+			return err;
+
+		if (stack_relative) {
+			regs[insn->dst_reg].type = PTR_TO_STACK;
+			regs[insn->dst_reg].imm = insn->imm;
+		}
+	}
+
+	return 0;
+}
+
+static int check_cond_jmp_op(struct verifier_env *env,
+			     struct bpf_insn *insn, int *insn_idx)
+{
+	struct reg_state *regs = env->cur_state.regs;
+	struct verifier_state *other_branch;
+	u8 opcode = BPF_OP(insn->code);
+	int err;
+
+	if (opcode > BPF_EXIT) {
+		verbose("invalid BPF_JMP opcode %x\n", opcode);
+		return -EINVAL;
+	}
+
+	if (BPF_SRC(insn->code) == BPF_X) {
+		if (insn->imm != 0) {
+			verbose("BPF_JMP uses reserved fields\n");
+			return -EINVAL;
+		}
+
+		/* check src1 operand */
+		err = check_reg_arg(regs, insn->src_reg, SRC_OP);
+		if (err)
+			return err;
+	} else {
+		if (insn->src_reg != BPF_REG_0) {
+			verbose("BPF_JMP uses reserved fields\n");
+			return -EINVAL;
+		}
+	}
+
+	/* check src2 operand */
+	err = check_reg_arg(regs, insn->dst_reg, SRC_OP);
+	if (err)
+		return err;
+
+	/* detect if R == 0 where R was initialized to zero earlier */
+	if (BPF_SRC(insn->code) == BPF_K &&
+	    (opcode == BPF_JEQ || opcode == BPF_JNE) &&
+	    regs[insn->dst_reg].type == CONST_IMM &&
+	    regs[insn->dst_reg].imm == insn->imm) {
+		if (opcode == BPF_JEQ) {
+			/* if (imm == imm) goto pc+off;
+			 * only follow the goto, ignore fall-through
+			 */
+			*insn_idx += insn->off;
+			return 0;
+		} else {
+			/* if (imm != imm) goto pc+off;
+			 * only follow fall-through branch, since
+			 * that's where the program will go
+			 */
+			return 0;
+		}
+	}
+
+	other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx);
+	if (!other_branch)
+		return -EFAULT;
+
+	/* detect if R == 0 where R is returned value from bpf_map_lookup_elem() */
+	if (BPF_SRC(insn->code) == BPF_K &&
+	    insn->imm == 0 && (opcode == BPF_JEQ ||
+			       opcode == BPF_JNE) &&
+	    regs[insn->dst_reg].type == PTR_TO_MAP_VALUE_OR_NULL) {
+		if (opcode == BPF_JEQ) {
+			/* next fallthrough insn can access memory via
+			 * this register
+			 */
+			regs[insn->dst_reg].type = PTR_TO_MAP_VALUE;
+			/* branch targer cannot access it, since reg == 0 */
+			other_branch->regs[insn->dst_reg].type = CONST_IMM;
+			other_branch->regs[insn->dst_reg].imm = 0;
+		} else {
+			other_branch->regs[insn->dst_reg].type = PTR_TO_MAP_VALUE;
+			regs[insn->dst_reg].type = CONST_IMM;
+			regs[insn->dst_reg].imm = 0;
+		}
+	} else if (BPF_SRC(insn->code) == BPF_K &&
+		   (opcode == BPF_JEQ || opcode == BPF_JNE)) {
+
+		if (opcode == BPF_JEQ) {
+			/* detect if (R == imm) goto
+			 * and in the target state recognize that R = imm
+			 */
+			other_branch->regs[insn->dst_reg].type = CONST_IMM;
+			other_branch->regs[insn->dst_reg].imm = insn->imm;
+		} else {
+			/* detect if (R != imm) goto
+			 * and in the fall-through state recognize that R = imm
+			 */
+			regs[insn->dst_reg].type = CONST_IMM;
+			regs[insn->dst_reg].imm = insn->imm;
+		}
+	}
+	if (log_level)
+		print_verifier_state(env);
+	return 0;
+}
+
 /* return the map pointer stored inside BPF_LD_IMM64 instruction */
 static struct bpf_map *ld_imm64_to_map_ptr(struct bpf_insn *insn)
 {
@@ -313,6 +1148,37 @@ static struct bpf_map *ld_imm64_to_map_ptr(struct bpf_insn *insn)
 	return (struct bpf_map *) (unsigned long) imm64;
 }
 
+/* verify BPF_LD_IMM64 instruction */
+static int check_ld_imm(struct verifier_env *env, struct bpf_insn *insn)
+{
+	struct reg_state *regs = env->cur_state.regs;
+	int err;
+
+	if (BPF_SIZE(insn->code) != BPF_DW) {
+		verbose("invalid BPF_LD_IMM insn\n");
+		return -EINVAL;
+	}
+	if (insn->off != 0) {
+		verbose("BPF_LD_IMM64 uses reserved fields\n");
+		return -EINVAL;
+	}
+
+	err = check_reg_arg(regs, insn->dst_reg, DST_OP);
+	if (err)
+		return err;
+
+	if (insn->src_reg == 0)
+		/* generic move 64-bit immediate into a register */
+		return 0;
+
+	/* replace_map_fd_with_map_ptr() should have caught bad ld_imm64 */
+	BUG_ON(insn->src_reg != BPF_PSEUDO_MAP_FD);
+
+	regs[insn->dst_reg].type = CONST_PTR_TO_MAP;
+	regs[insn->dst_reg].map_ptr = ld_imm64_to_map_ptr(insn);
+	return 0;
+}
+
 /* non-recursive DFS pseudo code
  * 1  procedure DFS-iterative(G,v):
  * 2      label v as discovered
@@ -498,6 +1364,212 @@ err_free:
 	return ret;
 }
 
+static int do_check(struct verifier_env *env)
+{
+	struct verifier_state *state = &env->cur_state;
+	struct bpf_insn *insns = env->prog->insnsi;
+	struct reg_state *regs = state->regs;
+	int insn_cnt = env->prog->len;
+	int insn_idx, prev_insn_idx = 0;
+	int insn_processed = 0;
+	bool do_print_state = false;
+
+	init_reg_state(regs);
+	insn_idx = 0;
+	for (;;) {
+		struct bpf_insn *insn;
+		u8 class;
+		int err;
+
+		if (insn_idx >= insn_cnt) {
+			verbose("invalid insn idx %d insn_cnt %d\n",
+				insn_idx, insn_cnt);
+			return -EFAULT;
+		}
+
+		insn = &insns[insn_idx];
+		class = BPF_CLASS(insn->code);
+
+		if (++insn_processed > 32768) {
+			verbose("BPF program is too large. Proccessed %d insn\n",
+				insn_processed);
+			return -E2BIG;
+		}
+
+		if (log_level && do_print_state) {
+			verbose("\nfrom %d to %d:", prev_insn_idx, insn_idx);
+			print_verifier_state(env);
+			do_print_state = false;
+		}
+
+		if (log_level) {
+			verbose("%d: ", insn_idx);
+			print_bpf_insn(insn);
+		}
+
+		if (class == BPF_ALU || class == BPF_ALU64) {
+			err = check_alu_op(regs, insn);
+			if (err)
+				return err;
+
+		} else if (class == BPF_LDX) {
+			if (BPF_MODE(insn->code) != BPF_MEM ||
+			    insn->imm != 0) {
+				verbose("BPF_LDX uses reserved fields\n");
+				return -EINVAL;
+			}
+			/* check src operand */
+			err = check_reg_arg(regs, insn->src_reg, SRC_OP);
+			if (err)
+				return err;
+
+			err = check_reg_arg(regs, insn->dst_reg, DST_OP_NO_MARK);
+			if (err)
+				return err;
+
+			/* check that memory (src_reg + off) is readable,
+			 * the state of dst_reg will be updated by this func
+			 */
+			err = check_mem_access(env, insn->src_reg, insn->off,
+					       BPF_SIZE(insn->code), BPF_READ,
+					       insn->dst_reg);
+			if (err)
+				return err;
+
+		} else if (class == BPF_STX) {
+			if (BPF_MODE(insn->code) == BPF_XADD) {
+				err = check_xadd(env, insn);
+				if (err)
+					return err;
+				insn_idx++;
+				continue;
+			}
+
+			if (BPF_MODE(insn->code) != BPF_MEM ||
+			    insn->imm != 0) {
+				verbose("BPF_STX uses reserved fields\n");
+				return -EINVAL;
+			}
+			/* check src1 operand */
+			err = check_reg_arg(regs, insn->src_reg, SRC_OP);
+			if (err)
+				return err;
+			/* check src2 operand */
+			err = check_reg_arg(regs, insn->dst_reg, SRC_OP);
+			if (err)
+				return err;
+
+			/* check that memory (dst_reg + off) is writeable */
+			err = check_mem_access(env, insn->dst_reg, insn->off,
+					       BPF_SIZE(insn->code), BPF_WRITE,
+					       insn->src_reg);
+			if (err)
+				return err;
+
+		} else if (class == BPF_ST) {
+			if (BPF_MODE(insn->code) != BPF_MEM ||
+			    insn->src_reg != BPF_REG_0) {
+				verbose("BPF_ST uses reserved fields\n");
+				return -EINVAL;
+			}
+			/* check src operand */
+			err = check_reg_arg(regs, insn->dst_reg, SRC_OP);
+			if (err)
+				return err;
+
+			/* check that memory (dst_reg + off) is writeable */
+			err = check_mem_access(env, insn->dst_reg, insn->off,
+					       BPF_SIZE(insn->code), BPF_WRITE,
+					       -1);
+			if (err)
+				return err;
+
+		} else if (class == BPF_JMP) {
+			u8 opcode = BPF_OP(insn->code);
+
+			if (opcode == BPF_CALL) {
+				if (BPF_SRC(insn->code) != BPF_K ||
+				    insn->off != 0 ||
+				    insn->src_reg != BPF_REG_0 ||
+				    insn->dst_reg != BPF_REG_0) {
+					verbose("BPF_CALL uses reserved fields\n");
+					return -EINVAL;
+				}
+
+				err = check_call(env, insn->imm);
+				if (err)
+					return err;
+
+			} else if (opcode == BPF_JA) {
+				if (BPF_SRC(insn->code) != BPF_K ||
+				    insn->imm != 0 ||
+				    insn->src_reg != BPF_REG_0 ||
+				    insn->dst_reg != BPF_REG_0) {
+					verbose("BPF_JA uses reserved fields\n");
+					return -EINVAL;
+				}
+
+				insn_idx += insn->off + 1;
+				continue;
+
+			} else if (opcode == BPF_EXIT) {
+				if (BPF_SRC(insn->code) != BPF_K ||
+				    insn->imm != 0 ||
+				    insn->src_reg != BPF_REG_0 ||
+				    insn->dst_reg != BPF_REG_0) {
+					verbose("BPF_EXIT uses reserved fields\n");
+					return -EINVAL;
+				}
+
+				/* eBPF calling convetion is such that R0 is used
+				 * to return the value from eBPF program.
+				 * Make sure that it's readable at this time
+				 * of bpf_exit, which means that program wrote
+				 * something into it earlier
+				 */
+				err = check_reg_arg(regs, BPF_REG_0, SRC_OP);
+				if (err)
+					return err;
+
+				insn_idx = pop_stack(env, &prev_insn_idx);
+				if (insn_idx < 0) {
+					break;
+				} else {
+					do_print_state = true;
+					continue;
+				}
+			} else {
+				err = check_cond_jmp_op(env, insn, &insn_idx);
+				if (err)
+					return err;
+			}
+		} else if (class == BPF_LD) {
+			u8 mode = BPF_MODE(insn->code);
+
+			if (mode == BPF_ABS || mode == BPF_IND) {
+				verbose("LD_ABS is not supported yet\n");
+				return -EINVAL;
+			} else if (mode == BPF_IMM) {
+				err = check_ld_imm(env, insn);
+				if (err)
+					return err;
+
+				insn_idx++;
+			} else {
+				verbose("invalid BPF_LD mode\n");
+				return -EINVAL;
+			}
+		} else {
+			verbose("unknown insn class %d\n", class);
+			return -EINVAL;
+		}
+
+		insn_idx++;
+	}
+
+	return 0;
+}
+
 /* look for pseudo eBPF instructions that access map FDs and
  * replace them with actual map pointers
  */
@@ -651,9 +1723,10 @@ int bpf_check(struct bpf_prog *prog, union bpf_attr *attr)
 	if (ret < 0)
 		goto skip_full_check;
 
-	/* ret = do_check(env); */
+	ret = do_check(env);
 
 skip_full_check:
+	while (pop_stack(env, NULL) >= 0);
 
 	if (log_level && log_len >= log_size - 1) {
 		BUG_ON(log_len >= log_size);
-- 
cgit v1.2.3


From 6ea754eb761d9e7a8ac6fa462b05f9e4cf04fb6c Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 22 Sep 2014 11:10:50 -0700
Subject: net: Change netdev_<level> logging functions to return void

No caller or macro uses the return value so make all
the functions return void.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 19 +++++++++----------
 net/core/dev.c            | 44 +++++++++++++++++---------------------------
 2 files changed, 26 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9f5d293a0281..9b7fbacb6296 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3612,22 +3612,22 @@ static inline const char *netdev_reg_state(const struct net_device *dev)
 }
 
 __printf(3, 4)
-int netdev_printk(const char *level, const struct net_device *dev,
-		  const char *format, ...);
+void netdev_printk(const char *level, const struct net_device *dev,
+		   const char *format, ...);
 __printf(2, 3)
-int netdev_emerg(const struct net_device *dev, const char *format, ...);
+void netdev_emerg(const struct net_device *dev, const char *format, ...);
 __printf(2, 3)
-int netdev_alert(const struct net_device *dev, const char *format, ...);
+void netdev_alert(const struct net_device *dev, const char *format, ...);
 __printf(2, 3)
-int netdev_crit(const struct net_device *dev, const char *format, ...);
+void netdev_crit(const struct net_device *dev, const char *format, ...);
 __printf(2, 3)
-int netdev_err(const struct net_device *dev, const char *format, ...);
+void netdev_err(const struct net_device *dev, const char *format, ...);
 __printf(2, 3)
-int netdev_warn(const struct net_device *dev, const char *format, ...);
+void netdev_warn(const struct net_device *dev, const char *format, ...);
 __printf(2, 3)
-int netdev_notice(const struct net_device *dev, const char *format, ...);
+void netdev_notice(const struct net_device *dev, const char *format, ...);
 __printf(2, 3)
-int netdev_info(const struct net_device *dev, const char *format, ...);
+void netdev_info(const struct net_device *dev, const char *format, ...);
 
 #define MODULE_ALIAS_NETDEV(device) \
 	MODULE_ALIAS("netdev-" device)
@@ -3645,7 +3645,6 @@ do {								\
 ({								\
 	if (0)							\
 		netdev_printk(KERN_DEBUG, __dev, format, ##args); \
-	0;							\
 })
 #endif
 
diff --git a/net/core/dev.c b/net/core/dev.c
index e2ced01c01e4..e55c546717d4 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -7066,53 +7066,45 @@ const char *netdev_drivername(const struct net_device *dev)
 	return empty;
 }
 
-static int __netdev_printk(const char *level, const struct net_device *dev,
-			   struct va_format *vaf)
+static void __netdev_printk(const char *level, const struct net_device *dev,
+			    struct va_format *vaf)
 {
-	int r;
-
 	if (dev && dev->dev.parent) {
-		r = dev_printk_emit(level[1] - '0',
-				    dev->dev.parent,
-				    "%s %s %s%s: %pV",
-				    dev_driver_string(dev->dev.parent),
-				    dev_name(dev->dev.parent),
-				    netdev_name(dev), netdev_reg_state(dev),
-				    vaf);
+		dev_printk_emit(level[1] - '0',
+				dev->dev.parent,
+				"%s %s %s%s: %pV",
+				dev_driver_string(dev->dev.parent),
+				dev_name(dev->dev.parent),
+				netdev_name(dev), netdev_reg_state(dev),
+				vaf);
 	} else if (dev) {
-		r = printk("%s%s%s: %pV", level, netdev_name(dev),
-			   netdev_reg_state(dev), vaf);
+		printk("%s%s%s: %pV",
+		       level, netdev_name(dev), netdev_reg_state(dev), vaf);
 	} else {
-		r = printk("%s(NULL net_device): %pV", level, vaf);
+		printk("%s(NULL net_device): %pV", level, vaf);
 	}
-
-	return r;
 }
 
-int netdev_printk(const char *level, const struct net_device *dev,
-		  const char *format, ...)
+void netdev_printk(const char *level, const struct net_device *dev,
+		   const char *format, ...)
 {
 	struct va_format vaf;
 	va_list args;
-	int r;
 
 	va_start(args, format);
 
 	vaf.fmt = format;
 	vaf.va = &args;
 
-	r = __netdev_printk(level, dev, &vaf);
+	__netdev_printk(level, dev, &vaf);
 
 	va_end(args);
-
-	return r;
 }
 EXPORT_SYMBOL(netdev_printk);
 
 #define define_netdev_printk_level(func, level)			\
-int func(const struct net_device *dev, const char *fmt, ...)	\
+void func(const struct net_device *dev, const char *fmt, ...)	\
 {								\
-	int r;							\
 	struct va_format vaf;					\
 	va_list args;						\
 								\
@@ -7121,11 +7113,9 @@ int func(const struct net_device *dev, const char *fmt, ...)	\
 	vaf.fmt = fmt;						\
 	vaf.va = &args;						\
 								\
-	r = __netdev_printk(level, dev, &vaf);			\
+	__netdev_printk(level, dev, &vaf);			\
 								\
 	va_end(args);						\
-								\
-	return r;						\
 }								\
 EXPORT_SYMBOL(func);
 
-- 
cgit v1.2.3


From f4a775d14489a801a5b8b0540e23ab82e2703091 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 22 Sep 2014 16:29:32 -0700
Subject: net: introduce __skb_header_release()

While profiling TCP stack, I noticed one useless atomic operation
in tcp_sendmsg(), caused by skb_header_release().

It turns out all current skb_header_release() users have a fresh skb,
that no other user can see, so we can avoid one atomic operation.

Introduce __skb_header_release() to clearly document this.

This gave me a 1.5 % improvement on TCP_RR workload.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 15 +++++++++++++++
 net/core/skbuff.c      |  4 ++--
 net/ipv4/tcp.c         |  4 ++--
 net/ipv4/tcp_output.c  | 10 +++++-----
 4 files changed, 24 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index f1bfa3781c75..8eaa62400fca 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1083,6 +1083,7 @@ static inline int skb_header_cloned(const struct sk_buff *skb)
  *	Drop a reference to the header part of the buffer.  This is done
  *	by acquiring a payload reference.  You must not read from the header
  *	part of skb->data after this.
+ *	Note : Check if you can use __skb_header_release() instead.
  */
 static inline void skb_header_release(struct sk_buff *skb)
 {
@@ -1091,6 +1092,20 @@ static inline void skb_header_release(struct sk_buff *skb)
 	atomic_add(1 << SKB_DATAREF_SHIFT, &skb_shinfo(skb)->dataref);
 }
 
+/**
+ *	__skb_header_release - release reference to header
+ *	@skb: buffer to operate on
+ *
+ *	Variant of skb_header_release() assuming skb is private to caller.
+ *	We can avoid one atomic operation.
+ */
+static inline void __skb_header_release(struct sk_buff *skb)
+{
+	skb->nohdr = 1;
+	atomic_set(&skb_shinfo(skb)->dataref, 1 + (1 << SKB_DATAREF_SHIFT));
+}
+
+
 /**
  *	skb_shared - is the buffer shared
  *	@skb: buffer to check
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 06a8feb10099..512dc7dcbc32 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3179,7 +3179,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
 	skb_shinfo(nskb)->frag_list = p;
 	skb_shinfo(nskb)->gso_size = pinfo->gso_size;
 	pinfo->gso_size = 0;
-	skb_header_release(p);
+	__skb_header_release(p);
 	NAPI_GRO_CB(nskb)->last = p;
 
 	nskb->data_len += p->len;
@@ -3211,7 +3211,7 @@ merge:
 	else
 		NAPI_GRO_CB(p)->last->next = skb;
 	NAPI_GRO_CB(p)->last = skb;
-	skb_header_release(skb);
+	__skb_header_release(skb);
 	lp = p;
 
 done:
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 070aeff1b131..553b01f52f71 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -609,7 +609,7 @@ static inline bool forced_push(const struct tcp_sock *tp)
 	return after(tp->write_seq, tp->pushed_seq + (tp->max_window >> 1));
 }
 
-static inline void skb_entail(struct sock *sk, struct sk_buff *skb)
+static void skb_entail(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
@@ -618,7 +618,7 @@ static inline void skb_entail(struct sock *sk, struct sk_buff *skb)
 	tcb->seq     = tcb->end_seq = tp->write_seq;
 	tcb->tcp_flags = TCPHDR_ACK;
 	tcb->sacked  = 0;
-	skb_header_release(skb);
+	__skb_header_release(skb);
 	tcp_add_write_queue_tail(sk, skb);
 	sk->sk_wmem_queued += skb->truesize;
 	sk_mem_charge(sk, skb->truesize);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 8c61a7c0c889..f173b1c4f815 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -995,7 +995,7 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
 
 	/* Advance write_seq and place onto the write_queue. */
 	tp->write_seq = TCP_SKB_CB(skb)->end_seq;
-	skb_header_release(skb);
+	__skb_header_release(skb);
 	tcp_add_write_queue_tail(sk, skb);
 	sk->sk_wmem_queued += skb->truesize;
 	sk_mem_charge(sk, skb->truesize);
@@ -1167,7 +1167,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
 	}
 
 	/* Link BUFF into the send queue. */
-	skb_header_release(buff);
+	__skb_header_release(buff);
 	tcp_insert_write_queue_after(skb, buff, sk);
 
 	return 0;
@@ -1671,7 +1671,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
 	tcp_set_skb_tso_segs(sk, buff, mss_now);
 
 	/* Link BUFF into the send queue. */
-	skb_header_release(buff);
+	__skb_header_release(buff);
 	tcp_insert_write_queue_after(skb, buff, sk);
 
 	return 0;
@@ -2772,7 +2772,7 @@ int tcp_send_synack(struct sock *sk)
 			if (nskb == NULL)
 				return -ENOMEM;
 			tcp_unlink_write_queue(skb, sk);
-			skb_header_release(nskb);
+			__skb_header_release(nskb);
 			__tcp_add_write_queue_head(sk, nskb);
 			sk_wmem_free_skb(sk, skb);
 			sk->sk_wmem_queued += nskb->truesize;
@@ -2947,7 +2947,7 @@ static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb)
 	struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
 
 	tcb->end_seq += skb->len;
-	skb_header_release(skb);
+	__skb_header_release(skb);
 	__tcp_add_write_queue_tail(sk, skb);
 	sk->sk_wmem_queued += skb->truesize;
 	sk_mem_charge(sk, skb->truesize);
-- 
cgit v1.2.3


From c873d14d30b838a516a94967242322d4b73e79e7 Mon Sep 17 00:00:00 2001
From: Jyri Sarha <jsarha@ti.com>
Date: Fri, 5 Sep 2014 15:21:34 +0300
Subject: clk: add gpio gated clock

The added gpio-gate-clock is a basic clock that can be enabled and
disabled trough a gpio output. The DT binding document for the clock
is also added. For EPROBE_DEFER handling the registering of the clock
has to be delayed until of_clk_get() call time.

Signed-off-by: Jyri Sarha <jsarha@ti.com>
Signed-off-by: Mike Turquette <mturquette@linaro.org>
---
 .../devicetree/bindings/clock/gpio-gate-clock.txt  |  21 +++
 drivers/clk/Makefile                               |   1 +
 drivers/clk/clk-gpio-gate.c                        | 204 +++++++++++++++++++++
 include/linux/clk-provider.h                       |  22 +++
 4 files changed, 248 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/clock/gpio-gate-clock.txt
 create mode 100644 drivers/clk/clk-gpio-gate.c

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/clock/gpio-gate-clock.txt b/Documentation/devicetree/bindings/clock/gpio-gate-clock.txt
new file mode 100644
index 000000000000..d3379ff9b84b
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/gpio-gate-clock.txt
@@ -0,0 +1,21 @@
+Binding for simple gpio gated clock.
+
+This binding uses the common clock binding[1].
+
+[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+Required properties:
+- compatible : shall be "gpio-gate-clock".
+- #clock-cells : from common clock binding; shall be set to 0.
+- enable-gpios : GPIO reference for enabling and disabling the clock.
+
+Optional properties:
+- clocks: Maximum of one parent clock is supported.
+
+Example:
+	clock {
+		compatible = "gpio-gate-clock";
+		clocks = <&parentclk>;
+		#clock-cells = <0>;
+		enable-gpios = <&gpio 1 GPIO_ACTIVE_HIGH>;
+	};
diff --git a/drivers/clk/Makefile b/drivers/clk/Makefile
index 27c542ba9e73..92a7f6c02394 100644
--- a/drivers/clk/Makefile
+++ b/drivers/clk/Makefile
@@ -9,6 +9,7 @@ obj-$(CONFIG_COMMON_CLK)	+= clk-gate.o
 obj-$(CONFIG_COMMON_CLK)	+= clk-mux.o
 obj-$(CONFIG_COMMON_CLK)	+= clk-composite.o
 obj-$(CONFIG_COMMON_CLK)	+= clk-fractional-divider.o
+obj-$(CONFIG_COMMON_CLK)	+= clk-gpio-gate.o
 ifeq ($(CONFIG_OF), y)
 obj-$(CONFIG_COMMON_CLK)	+= clk-conf.o
 endif
diff --git a/drivers/clk/clk-gpio-gate.c b/drivers/clk/clk-gpio-gate.c
new file mode 100644
index 000000000000..9dde88533684
--- /dev/null
+++ b/drivers/clk/clk-gpio-gate.c
@@ -0,0 +1,204 @@
+/*
+ * Copyright (C) 2013 - 2014 Texas Instruments Incorporated - http://www.ti.com
+ * Author: Jyri Sarha <jsarha@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Gpio gated clock implementation
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/gpio.h>
+#include <linux/of_gpio.h>
+#include <linux/err.h>
+#include <linux/device.h>
+
+/**
+ * DOC: basic gpio gated clock which can be enabled and disabled
+ *      with gpio output
+ * Traits of this clock:
+ * prepare - clk_(un)prepare only ensures parent is (un)prepared
+ * enable - clk_enable and clk_disable are functional & control gpio
+ * rate - inherits rate from parent.  No clk_set_rate support
+ * parent - fixed parent.  No clk_set_parent support
+ */
+
+#define to_clk_gpio(_hw) container_of(_hw, struct clk_gpio, hw)
+
+static int clk_gpio_gate_enable(struct clk_hw *hw)
+{
+	struct clk_gpio *clk = to_clk_gpio(hw);
+
+	gpiod_set_value(clk->gpiod, 1);
+
+	return 0;
+}
+
+static void clk_gpio_gate_disable(struct clk_hw *hw)
+{
+	struct clk_gpio *clk = to_clk_gpio(hw);
+
+	gpiod_set_value(clk->gpiod, 0);
+}
+
+static int clk_gpio_gate_is_enabled(struct clk_hw *hw)
+{
+	struct clk_gpio *clk = to_clk_gpio(hw);
+
+	return gpiod_get_value(clk->gpiod);
+}
+
+const struct clk_ops clk_gpio_gate_ops = {
+	.enable = clk_gpio_gate_enable,
+	.disable = clk_gpio_gate_disable,
+	.is_enabled = clk_gpio_gate_is_enabled,
+};
+EXPORT_SYMBOL_GPL(clk_gpio_gate_ops);
+
+/**
+ * clk_register_gpio - register a gpip clock with the clock framework
+ * @dev: device that is registering this clock
+ * @name: name of this clock
+ * @parent_name: name of this clock's parent
+ * @gpiod: gpio descriptor to gate this clock
+ */
+struct clk *clk_register_gpio_gate(struct device *dev, const char *name,
+		const char *parent_name, struct gpio_desc *gpiod,
+		unsigned long flags)
+{
+	struct clk_gpio *clk_gpio = NULL;
+	struct clk *clk = ERR_PTR(-EINVAL);
+	struct clk_init_data init = { NULL };
+	unsigned long gpio_flags;
+	int err;
+
+	if (gpiod_is_active_low(gpiod))
+		gpio_flags = GPIOF_OUT_INIT_HIGH;
+	else
+		gpio_flags = GPIOF_OUT_INIT_LOW;
+
+	if (dev)
+		err = devm_gpio_request_one(dev, desc_to_gpio(gpiod),
+					    gpio_flags, name);
+	else
+		err = gpio_request_one(desc_to_gpio(gpiod), gpio_flags, name);
+
+	if (err) {
+		pr_err("%s: %s: Error requesting clock control gpio %u\n",
+		       __func__, name, desc_to_gpio(gpiod));
+		return ERR_PTR(err);
+	}
+
+	if (dev)
+		clk_gpio = devm_kzalloc(dev, sizeof(struct clk_gpio),
+					GFP_KERNEL);
+	else
+		clk_gpio = kzalloc(sizeof(struct clk_gpio), GFP_KERNEL);
+
+	if (!clk_gpio) {
+		clk = ERR_PTR(-ENOMEM);
+		goto clk_register_gpio_gate_err;
+	}
+
+	init.name = name;
+	init.ops = &clk_gpio_gate_ops;
+	init.flags = flags | CLK_IS_BASIC;
+	init.parent_names = (parent_name ? &parent_name : NULL);
+	init.num_parents = (parent_name ? 1 : 0);
+
+	clk_gpio->gpiod = gpiod;
+	clk_gpio->hw.init = &init;
+
+	clk = clk_register(dev, &clk_gpio->hw);
+
+	if (!IS_ERR(clk))
+		return clk;
+
+	if (!dev)
+		kfree(clk_gpio);
+
+clk_register_gpio_gate_err:
+	gpiod_put(gpiod);
+
+	return clk;
+}
+EXPORT_SYMBOL_GPL(clk_register_gpio_gate);
+
+#ifdef CONFIG_OF
+/**
+ * The clk_register_gpio_gate has to be delayed, because the EPROBE_DEFER
+ * can not be handled properly at of_clk_init() call time.
+ */
+
+struct clk_gpio_gate_delayed_register_data {
+	struct device_node *node;
+	struct mutex lock;
+	struct clk *clk;
+};
+
+static struct clk *of_clk_gpio_gate_delayed_register_get(
+		struct of_phandle_args *clkspec,
+		void *_data)
+{
+	struct clk_gpio_gate_delayed_register_data *data = _data;
+	struct clk *clk;
+	const char *clk_name = data->node->name;
+	const char *parent_name;
+	struct gpio_desc *gpiod;
+	int gpio;
+
+	mutex_lock(&data->lock);
+
+	if (data->clk) {
+		mutex_unlock(&data->lock);
+		return data->clk;
+	}
+
+	gpio = of_get_named_gpio_flags(data->node, "enable-gpios", 0, NULL);
+	if (gpio < 0) {
+		mutex_unlock(&data->lock);
+		if (gpio != -EPROBE_DEFER)
+			pr_err("%s: %s: Can't get 'enable-gpios' DT property\n",
+			       __func__, clk_name);
+		return ERR_PTR(gpio);
+	}
+	gpiod = gpio_to_desc(gpio);
+
+	parent_name = of_clk_get_parent_name(data->node, 0);
+
+	clk = clk_register_gpio_gate(NULL, clk_name, parent_name, gpiod, 0);
+	if (IS_ERR(clk)) {
+		mutex_unlock(&data->lock);
+		return clk;
+	}
+
+	data->clk = clk;
+	mutex_unlock(&data->lock);
+
+	return clk;
+}
+
+/**
+ * of_gpio_gate_clk_setup() - Setup function for gpio controlled clock
+ */
+void __init of_gpio_gate_clk_setup(struct device_node *node)
+{
+	struct clk_gpio_gate_delayed_register_data *data;
+
+	data = kzalloc(sizeof(struct clk_gpio_gate_delayed_register_data),
+		       GFP_KERNEL);
+	if (!data)
+		return;
+
+	data->node = node;
+	mutex_init(&data->lock);
+
+	of_clk_add_provider(node, of_clk_gpio_gate_delayed_register_get, data);
+}
+EXPORT_SYMBOL_GPL(of_gpio_gate_clk_setup);
+CLK_OF_DECLARE(gpio_gate_clk, "gpio-gate-clock", of_gpio_gate_clk_setup);
+#endif
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 411dd7eb2653..ec1581bd94cd 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -488,6 +488,28 @@ struct clk *clk_register_composite(struct device *dev, const char *name,
 		struct clk_hw *gate_hw, const struct clk_ops *gate_ops,
 		unsigned long flags);
 
+/***
+ * struct clk_gpio_gate - gpio gated clock
+ *
+ * @hw:		handle between common and hardware-specific interfaces
+ * @gpiod:	gpio descriptor
+ *
+ * Clock with a gpio control for enabling and disabling the parent clock.
+ * Implements .enable, .disable and .is_enabled
+ */
+
+struct clk_gpio {
+	struct clk_hw	hw;
+	struct gpio_desc *gpiod;
+};
+
+extern const struct clk_ops clk_gpio_gate_ops;
+struct clk *clk_register_gpio_gate(struct device *dev, const char *name,
+		const char *parent_name, struct gpio_desc *gpio,
+		unsigned long flags);
+
+void of_gpio_clk_gate_setup(struct device_node *node);
+
 /**
  * clk_register - allocate a new clock, register it and return an opaque cookie
  * @dev: device that is registering this clock
-- 
cgit v1.2.3


From 2c80929c4c4d54e568b07ab85877d5fd38f4b02f Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Wed, 24 Sep 2014 17:09:11 +0200
Subject: fuse: honour max_read and max_write in direct_io mode

The third argument of fuse_get_user_pages() "nbytesp" refers to the number of
bytes a caller asked to pack into fuse request. This value may be lesser
than capacity of fuse request or iov_iter.  So fuse_get_user_pages() must
ensure that *nbytesp won't grow.

Now, when helper iov_iter_get_pages() performs all hard work of extracting
pages from iov_iter, it can be done by passing properly calculated
"maxsize" to the helper.

The other caller of iov_iter_get_pages() (dio_refill_pages()) doesn't need
this capability, so pass LONG_MAX as the maxsize argument here.

Fixes: c9c37e2e6378 ("fuse: switch to iov_iter_get_pages()")
Reported-by: Werner Baumann <werner.baumann@onlinehome.de>
Tested-by: Maxim Patlasov <mpatlasov@parallels.com>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/direct-io.c      |  2 +-
 fs/fuse/file.c      |  1 +
 include/linux/uio.h |  2 +-
 mm/iov_iter.c       | 14 +++++++++-----
 4 files changed, 12 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/fs/direct-io.c b/fs/direct-io.c
index c3116404ab49..e181b6b2e297 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -158,7 +158,7 @@ static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio)
 {
 	ssize_t ret;
 
-	ret = iov_iter_get_pages(sdio->iter, dio->pages, DIO_PAGES,
+	ret = iov_iter_get_pages(sdio->iter, dio->pages, LONG_MAX, DIO_PAGES,
 				&sdio->from);
 
 	if (ret < 0 && sdio->blocks_available && (dio->rw & WRITE)) {
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 912061ac4baf..caa8d95b24e8 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1305,6 +1305,7 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii,
 		size_t start;
 		ssize_t ret = iov_iter_get_pages(ii,
 					&req->pages[req->num_pages],
+					*nbytesp - nbytes,
 					req->max_pages - req->num_pages,
 					&start);
 		if (ret < 0)
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 48d64e6ab292..290fbf0b6b8a 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -84,7 +84,7 @@ unsigned long iov_iter_alignment(const struct iov_iter *i);
 void iov_iter_init(struct iov_iter *i, int direction, const struct iovec *iov,
 			unsigned long nr_segs, size_t count);
 ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages,
-			unsigned maxpages, size_t *start);
+			size_t maxsize, unsigned maxpages, size_t *start);
 ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages,
 			size_t maxsize, size_t *start);
 int iov_iter_npages(const struct iov_iter *i, int maxpages);
diff --git a/mm/iov_iter.c b/mm/iov_iter.c
index ab88dc0ea1d3..9a09f2034fcc 100644
--- a/mm/iov_iter.c
+++ b/mm/iov_iter.c
@@ -310,7 +310,7 @@ void iov_iter_init(struct iov_iter *i, int direction,
 EXPORT_SYMBOL(iov_iter_init);
 
 static ssize_t get_pages_iovec(struct iov_iter *i,
-		   struct page **pages, unsigned maxpages,
+		   struct page **pages, size_t maxsize, unsigned maxpages,
 		   size_t *start)
 {
 	size_t offset = i->iov_offset;
@@ -323,6 +323,8 @@ static ssize_t get_pages_iovec(struct iov_iter *i,
 	len = iov->iov_len - offset;
 	if (len > i->count)
 		len = i->count;
+	if (len > maxsize)
+		len = maxsize;
 	addr = (unsigned long)iov->iov_base + offset;
 	len += *start = addr & (PAGE_SIZE - 1);
 	if (len > maxpages * PAGE_SIZE)
@@ -588,13 +590,15 @@ static unsigned long alignment_bvec(const struct iov_iter *i)
 }
 
 static ssize_t get_pages_bvec(struct iov_iter *i,
-		   struct page **pages, unsigned maxpages,
+		   struct page **pages, size_t maxsize, unsigned maxpages,
 		   size_t *start)
 {
 	const struct bio_vec *bvec = i->bvec;
 	size_t len = bvec->bv_len - i->iov_offset;
 	if (len > i->count)
 		len = i->count;
+	if (len > maxsize)
+		len = maxsize;
 	/* can't be more than PAGE_SIZE */
 	*start = bvec->bv_offset + i->iov_offset;
 
@@ -711,13 +715,13 @@ unsigned long iov_iter_alignment(const struct iov_iter *i)
 EXPORT_SYMBOL(iov_iter_alignment);
 
 ssize_t iov_iter_get_pages(struct iov_iter *i,
-		   struct page **pages, unsigned maxpages,
+		   struct page **pages, size_t maxsize, unsigned maxpages,
 		   size_t *start)
 {
 	if (i->type & ITER_BVEC)
-		return get_pages_bvec(i, pages, maxpages, start);
+		return get_pages_bvec(i, pages, maxsize, maxpages, start);
 	else
-		return get_pages_iovec(i, pages, maxpages, start);
+		return get_pages_iovec(i, pages, maxsize, maxpages, start);
 }
 EXPORT_SYMBOL(iov_iter_get_pages);
 
-- 
cgit v1.2.3


From e59c5371fb9d8268d1c043172e88cecab9dc934f Mon Sep 17 00:00:00 2001
From: Mike Turquette <mturquette@linaro.org>
Date: Tue, 18 Feb 2014 21:21:25 -0800
Subject: clk: introduce clk_set_phase function & callback

A common operation for a clock signal generator is to shift the phase of
that signal. This patch introduces a new function to the clk.h API to
dynamically adjust the phase of a clock signal. Additionally this patch
introduces support for the new function in the common clock framework
via the .set_phase call back in struct clk_ops.

Signed-off-by: Mike Turquette <mturquette@linaro.org>
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
Reviewed-by: Heiko Stuebner <heiko@sntech.de>
Acked-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/clk/clk.c            | 85 +++++++++++++++++++++++++++++++++++++++++---
 include/linux/clk-private.h  |  1 +
 include/linux/clk-provider.h |  5 +++
 include/linux/clk.h          | 29 +++++++++++++++
 4 files changed, 116 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index b76fa69b44cb..d87661af0c72 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -117,11 +117,11 @@ static void clk_summary_show_one(struct seq_file *s, struct clk *c, int level)
 	if (!c)
 		return;
 
-	seq_printf(s, "%*s%-*s %11d %12d %11lu %10lu\n",
+	seq_printf(s, "%*s%-*s %11d %12d %11lu %10lu %-3d\n",
 		   level * 3 + 1, "",
 		   30 - level * 3, c->name,
 		   c->enable_count, c->prepare_count, clk_get_rate(c),
-		   clk_get_accuracy(c));
+		   clk_get_accuracy(c), clk_get_phase(c));
 }
 
 static void clk_summary_show_subtree(struct seq_file *s, struct clk *c,
@@ -143,8 +143,8 @@ static int clk_summary_show(struct seq_file *s, void *data)
 	struct clk *c;
 	struct hlist_head **lists = (struct hlist_head **)s->private;
 
-	seq_puts(s, "   clock                         enable_cnt  prepare_cnt        rate   accuracy\n");
-	seq_puts(s, "--------------------------------------------------------------------------------\n");
+	seq_puts(s, "   clock                         enable_cnt  prepare_cnt        rate   accuracy   phase\n");
+	seq_puts(s, "----------------------------------------------------------------------------------------\n");
 
 	clk_prepare_lock();
 
@@ -180,6 +180,7 @@ static void clk_dump_one(struct seq_file *s, struct clk *c, int level)
 	seq_printf(s, "\"prepare_count\": %d,", c->prepare_count);
 	seq_printf(s, "\"rate\": %lu", clk_get_rate(c));
 	seq_printf(s, "\"accuracy\": %lu", clk_get_accuracy(c));
+	seq_printf(s, "\"phase\": %d", clk_get_phase(c));
 }
 
 static void clk_dump_subtree(struct seq_file *s, struct clk *c, int level)
@@ -264,6 +265,11 @@ static int clk_debug_create_one(struct clk *clk, struct dentry *pdentry)
 	if (!d)
 		goto err_out;
 
+	d = debugfs_create_u32("clk_phase", S_IRUGO, clk->dentry,
+			(u32 *)&clk->phase);
+	if (!d)
+		goto err_out;
+
 	d = debugfs_create_x32("clk_flags", S_IRUGO, clk->dentry,
 			(u32 *)&clk->flags);
 	if (!d)
@@ -1738,6 +1744,77 @@ out:
 }
 EXPORT_SYMBOL_GPL(clk_set_parent);
 
+/**
+ * clk_set_phase - adjust the phase shift of a clock signal
+ * @clk: clock signal source
+ * @degrees: number of degrees the signal is shifted
+ *
+ * Shifts the phase of a clock signal by the specified
+ * degrees. Returns 0 on success, -EERROR otherwise.
+ *
+ * This function makes no distinction about the input or reference
+ * signal that we adjust the clock signal phase against. For example
+ * phase locked-loop clock signal generators we may shift phase with
+ * respect to feedback clock signal input, but for other cases the
+ * clock phase may be shifted with respect to some other, unspecified
+ * signal.
+ *
+ * Additionally the concept of phase shift does not propagate through
+ * the clock tree hierarchy, which sets it apart from clock rates and
+ * clock accuracy. A parent clock phase attribute does not have an
+ * impact on the phase attribute of a child clock.
+ */
+int clk_set_phase(struct clk *clk, int degrees)
+{
+	int ret = 0;
+
+	if (!clk)
+		goto out;
+
+	/* sanity check degrees */
+	degrees %= 360;
+	if (degrees < 0)
+		degrees += 360;
+
+	clk_prepare_lock();
+
+	if (!clk->ops->set_phase)
+		goto out_unlock;
+
+	ret = clk->ops->set_phase(clk->hw, degrees);
+
+	if (!ret)
+		clk->phase = degrees;
+
+out_unlock:
+	clk_prepare_unlock();
+
+out:
+	return ret;
+}
+
+/**
+ * clk_get_phase - return the phase shift of a clock signal
+ * @clk: clock signal source
+ *
+ * Returns the phase shift of a clock node in degrees, otherwise returns
+ * -EERROR.
+ */
+int clk_get_phase(struct clk *clk)
+{
+	int ret = 0;
+
+	if (!clk)
+		goto out;
+
+	clk_prepare_lock();
+	ret = clk->phase;
+	clk_prepare_unlock();
+
+out:
+	return ret;
+}
+
 /**
  * __clk_init - initialize the data structures in a struct clk
  * @dev:	device initializing this clk, placeholder for now
diff --git a/include/linux/clk-private.h b/include/linux/clk-private.h
index efbf70b9fd84..845be30be50f 100644
--- a/include/linux/clk-private.h
+++ b/include/linux/clk-private.h
@@ -46,6 +46,7 @@ struct clk {
 	unsigned int		enable_count;
 	unsigned int		prepare_count;
 	unsigned long		accuracy;
+	int			phase;
 	struct hlist_head	children;
 	struct hlist_node	child_node;
 	unsigned int		notifier_count;
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 411dd7eb2653..201a6195a3eb 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -129,6 +129,10 @@ struct dentry;
  *		set then clock accuracy will be initialized to parent accuracy
  *		or 0 (perfect clock) if clock has no parent.
  *
+ * @set_phase:	Shift the phase this clock signal in degrees specified
+ *		by the second argument. Valid values for degrees are
+ *		0-359. Return 0 on success, otherwise -EERROR.
+ *
  * @init:	Perform platform-specific initialization magic.
  *		This is not not used by any of the basic clock types.
  *		Please consider other ways of solving initialization problems
@@ -177,6 +181,7 @@ struct clk_ops {
 				    unsigned long parent_rate, u8 index);
 	unsigned long	(*recalc_accuracy)(struct clk_hw *hw,
 					   unsigned long parent_accuracy);
+	int		(*set_phase)(struct clk_hw *hw, int degrees);
 	void		(*init)(struct clk_hw *hw);
 	int		(*debug_init)(struct clk_hw *hw, struct dentry *dentry);
 };
diff --git a/include/linux/clk.h b/include/linux/clk.h
index fb5e097d8f72..38bdedd3e389 100644
--- a/include/linux/clk.h
+++ b/include/linux/clk.h
@@ -106,6 +106,25 @@ int clk_notifier_unregister(struct clk *clk, struct notifier_block *nb);
  */
 long clk_get_accuracy(struct clk *clk);
 
+/**
+ * clk_set_phase - adjust the phase shift of a clock signal
+ * @clk: clock signal source
+ * @degrees: number of degrees the signal is shifted
+ *
+ * Shifts the phase of a clock signal by the specified degrees. Returns 0 on
+ * success, -EERROR otherwise.
+ */
+int clk_set_phase(struct clk *clk, int degrees);
+
+/**
+ * clk_get_phase - return the phase shift of a clock signal
+ * @clk: clock signal source
+ *
+ * Returns the phase shift of a clock node in degrees, otherwise returns
+ * -EERROR.
+ */
+int clk_get_phase(struct clk *clk);
+
 #else
 
 static inline long clk_get_accuracy(struct clk *clk)
@@ -113,6 +132,16 @@ static inline long clk_get_accuracy(struct clk *clk)
 	return -ENOTSUPP;
 }
 
+static inline long clk_set_phase(struct clk *clk, int phase)
+{
+	return -ENOTSUPP;
+}
+
+static inline long clk_get_phase(struct clk *clk)
+{
+	return -ENOTSUPP;
+}
+
 #endif
 
 /**
-- 
cgit v1.2.3


From 355bb165cd8bad2500df37437a9121f0177b6741 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@free-electrons.com>
Date: Sat, 30 Aug 2014 21:18:00 +0200
Subject: clk: Include of.h in clock-provider.h

CLK_OF_DECLARE relies on OF_DECLARE_1 that is defined in of.h. Fixes build
errors when one use CLK_OF_DECLARE but doesn't include of.h

Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
Acked-by: Hans de Goede <hdegoede@redhat.com>
---
 include/linux/clk-provider.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 201a6195a3eb..69b20d4c1e1a 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -13,6 +13,7 @@
 
 #include <linux/clk.h>
 #include <linux/io.h>
+#include <linux/of.h>
 
 #ifdef CONFIG_COMMON_CLK
 
-- 
cgit v1.2.3


From 9824cf73c3a5e677bee6fcba43c4807e01ff1b4a Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@free-electrons.com>
Date: Mon, 14 Jul 2014 13:53:27 +0200
Subject: clk: Add a function to retrieve phase

The current phase API doesn't look into the actual hardware to get the phase
value, but will rather get it from a variable only set by the set_phase
function.

This will cause issue when the client driver will never call the set_phase
function, where we can end up having a reported phase that will not match what
the hardware has been programmed to by the bootloader or what phase is
programmed out of reset.

Add a new get_phase function for the drivers to implement so that we can get
this value.

Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
Reviewed-by: Heiko Stuebner <heiko@sntech.de>
Acked-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/clk/clk.c            | 10 ++++++++++
 include/linux/clk-provider.h |  5 +++++
 2 files changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index d87661af0c72..113d75db371d 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -1933,6 +1933,16 @@ int __clk_init(struct device *dev, struct clk *clk)
 	else
 		clk->accuracy = 0;
 
+	/*
+	 * Set clk's phase.
+	 * Since a phase is by definition relative to its parent, just
+	 * query the current clock phase, or just assume it's in phase.
+	 */
+	if (clk->ops->get_phase)
+		clk->phase = clk->ops->get_phase(clk->hw);
+	else
+		clk->phase = 0;
+
 	/*
 	 * Set clk's rate.  The preferred method is to use .recalc_rate.  For
 	 * simple clocks and lazy developers the default fallback is to use the
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 69b20d4c1e1a..abec961092a7 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -130,6 +130,10 @@ struct dentry;
  *		set then clock accuracy will be initialized to parent accuracy
  *		or 0 (perfect clock) if clock has no parent.
  *
+ * @get_phase:	Queries the hardware to get the current phase of a clock.
+ *		Returned values are 0-359 degrees on success, negative
+ *		error codes on failure.
+ *
  * @set_phase:	Shift the phase this clock signal in degrees specified
  *		by the second argument. Valid values for degrees are
  *		0-359. Return 0 on success, otherwise -EERROR.
@@ -182,6 +186,7 @@ struct clk_ops {
 				    unsigned long parent_rate, u8 index);
 	unsigned long	(*recalc_accuracy)(struct clk_hw *hw,
 					   unsigned long parent_accuracy);
+	int		(*get_phase)(struct clk_hw *hw);
 	int		(*set_phase)(struct clk_hw *hw, int degrees);
 	void		(*init)(struct clk_hw *hw);
 	int		(*debug_init)(struct clk_hw *hw, struct dentry *dentry);
-- 
cgit v1.2.3


From e7258c1a269e0967856c81d182c286a78f5ecf15 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Fri, 26 Sep 2014 19:19:55 -0400
Subject: block: Get rid of bdev_integrity_enabled()

bdev_integrity_enabled() is only used by bio_integrity_enabled().
Combine these two functions.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 Documentation/block/data-integrity.txt | 10 ---------
 block/bio-integrity.c                  | 39 +++++++++++++++-------------------
 include/linux/bio.h                    |  6 +++---
 3 files changed, 20 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/block/data-integrity.txt b/Documentation/block/data-integrity.txt
index 2d735b0ae383..b4eacf48053c 100644
--- a/Documentation/block/data-integrity.txt
+++ b/Documentation/block/data-integrity.txt
@@ -192,16 +192,6 @@ will require extra work due to the application tag.
     supported by the block device.
 
 
-    int bdev_integrity_enabled(block_device, int rw);
-
-      bdev_integrity_enabled() will return 1 if the block device
-      supports integrity metadata transfer for the data direction
-      specified in 'rw'.
-
-      bdev_integrity_enabled() honors the write_generate and
-      read_verify flags in sysfs and will respond accordingly.
-
-
     int bio_integrity_prep(bio);
 
       To generate IMD for WRITE and to set up buffers for READ, the
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index f14b4abbebd8..36b788552c3e 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -147,24 +147,6 @@ int bio_integrity_add_page(struct bio *bio, struct page *page,
 }
 EXPORT_SYMBOL(bio_integrity_add_page);
 
-static int bdev_integrity_enabled(struct block_device *bdev, int rw)
-{
-	struct blk_integrity *bi = bdev_get_integrity(bdev);
-
-	if (bi == NULL)
-		return 0;
-
-	if (rw == READ && bi->verify_fn != NULL &&
-	    (bi->flags & INTEGRITY_FLAG_READ))
-		return 1;
-
-	if (rw == WRITE && bi->generate_fn != NULL &&
-	    (bi->flags & INTEGRITY_FLAG_WRITE))
-		return 1;
-
-	return 0;
-}
-
 /**
  * bio_integrity_enabled - Check whether integrity can be passed
  * @bio:	bio to check
@@ -174,16 +156,29 @@ static int bdev_integrity_enabled(struct block_device *bdev, int rw)
  * set prior to calling.  The functions honors the write_generate and
  * read_verify flags in sysfs.
  */
-int bio_integrity_enabled(struct bio *bio)
+bool bio_integrity_enabled(struct bio *bio)
 {
+	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
+
 	if (!bio_is_rw(bio))
-		return 0;
+		return false;
 
 	/* Already protected? */
 	if (bio_integrity(bio))
-		return 0;
+		return false;
+
+	if (bi == NULL)
+		return false;
+
+	if (bio_data_dir(bio) == READ && bi->verify_fn != NULL &&
+	    (bi->flags & INTEGRITY_FLAG_READ))
+		return true;
+
+	if (bio_data_dir(bio) == WRITE && bi->generate_fn != NULL &&
+	    (bi->flags & INTEGRITY_FLAG_WRITE))
+		return true;
 
-	return bdev_integrity_enabled(bio->bi_bdev, bio_data_dir(bio));
+	return false;
 }
 EXPORT_SYMBOL(bio_integrity_enabled);
 
diff --git a/include/linux/bio.h b/include/linux/bio.h
index b39e5000ff58..63e399b4fde5 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -666,7 +666,7 @@ struct biovec_slab {
 extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int);
 extern void bio_integrity_free(struct bio *);
 extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int);
-extern int bio_integrity_enabled(struct bio *bio);
+extern bool bio_integrity_enabled(struct bio *bio);
 extern int bio_integrity_set_tag(struct bio *, void *, unsigned int);
 extern int bio_integrity_get_tag(struct bio *, void *, unsigned int);
 extern int bio_integrity_prep(struct bio *);
@@ -685,9 +685,9 @@ static inline int bio_integrity(struct bio *bio)
 	return 0;
 }
 
-static inline int bio_integrity_enabled(struct bio *bio)
+static inline bool bio_integrity_enabled(struct bio *bio)
 {
-	return 0;
+	return false;
 }
 
 static inline int bioset_integrity_create(struct bio_set *bs, int pool_size)
-- 
cgit v1.2.3


From 180b2f95dd331010a9930a65c8a18d6d81b94dc1 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Fri, 26 Sep 2014 19:19:56 -0400
Subject: block: Replace bi_integrity with bi_special

For commands like REQ_COPY we need a way to pass extra information along
with each bio. Like integrity metadata this information must be
available at the bottom of the stack so bi_private does not suffice.

Rename the existing bi_integrity field to bi_special and make it a union
so we can have different bio extensions for each class of command.

We previously used bi_integrity != NULL as a way to identify whether a
bio had integrity metadata or not. Introduce a REQ_INTEGRITY to be the
indicator now that bi_special can contain different things.

In addition, bio_integrity(bio) will now return a pointer to the
integrity payload (when applicable).

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 Documentation/block/data-integrity.txt | 10 +++++-----
 block/bio-integrity.c                  | 19 ++++++++++---------
 drivers/scsi/sd_dif.c                  |  8 ++++----
 include/linux/bio.h                    | 11 +++++++++--
 include/linux/blk_types.h              |  8 ++++++--
 include/linux/blkdev.h                 |  7 ++-----
 6 files changed, 36 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/block/data-integrity.txt b/Documentation/block/data-integrity.txt
index b4eacf48053c..4d4de8b09530 100644
--- a/Documentation/block/data-integrity.txt
+++ b/Documentation/block/data-integrity.txt
@@ -129,11 +129,11 @@ interface for this is being worked on.
 4.1 BIO
 
 The data integrity patches add a new field to struct bio when
-CONFIG_BLK_DEV_INTEGRITY is enabled.  bio->bi_integrity is a pointer
-to a struct bip which contains the bio integrity payload.  Essentially
-a bip is a trimmed down struct bio which holds a bio_vec containing
-the integrity metadata and the required housekeeping information (bvec
-pool, vector count, etc.)
+CONFIG_BLK_DEV_INTEGRITY is enabled.  bio_integrity(bio) returns a
+pointer to a struct bip which contains the bio integrity payload.
+Essentially a bip is a trimmed down struct bio which holds a bio_vec
+containing the integrity metadata and the required housekeeping
+information (bvec pool, vector count, etc.)
 
 A kernel subsystem can enable data integrity protection on a bio by
 calling bio_integrity_alloc(bio).  This will allocate and attach the
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 36b788552c3e..bd3125c3c124 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -79,6 +79,7 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
 	bip->bip_slab = idx;
 	bip->bip_bio = bio;
 	bio->bi_integrity = bip;
+	bio->bi_rw |= REQ_INTEGRITY;
 
 	return bip;
 err:
@@ -96,7 +97,7 @@ EXPORT_SYMBOL(bio_integrity_alloc);
  */
 void bio_integrity_free(struct bio *bio)
 {
-	struct bio_integrity_payload *bip = bio->bi_integrity;
+	struct bio_integrity_payload *bip = bio_integrity(bio);
 	struct bio_set *bs = bio->bi_pool;
 
 	if (bip->bip_owns_buf)
@@ -128,7 +129,7 @@ EXPORT_SYMBOL(bio_integrity_free);
 int bio_integrity_add_page(struct bio *bio, struct page *page,
 			   unsigned int len, unsigned int offset)
 {
-	struct bio_integrity_payload *bip = bio->bi_integrity;
+	struct bio_integrity_payload *bip = bio_integrity(bio);
 	struct bio_vec *iv;
 
 	if (bip->bip_vcnt >= bip->bip_max_vcnt) {
@@ -229,7 +230,7 @@ EXPORT_SYMBOL(bio_integrity_tag_size);
 static int bio_integrity_tag(struct bio *bio, void *tag_buf, unsigned int len,
 			     int set)
 {
-	struct bio_integrity_payload *bip = bio->bi_integrity;
+	struct bio_integrity_payload *bip = bio_integrity(bio);
 	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
 	unsigned int nr_sectors;
 
@@ -304,12 +305,12 @@ static int bio_integrity_generate_verify(struct bio *bio, int operate)
 	struct bio_vec *bv;
 	sector_t sector;
 	unsigned int sectors, ret = 0, i;
-	void *prot_buf = bio->bi_integrity->bip_buf;
+	void *prot_buf = bio_integrity(bio)->bip_buf;
 
 	if (operate)
 		sector = bio->bi_iter.bi_sector;
 	else
-		sector = bio->bi_integrity->bip_iter.bi_sector;
+		sector = bio_integrity(bio)->bip_iter.bi_sector;
 
 	bix.disk_name = bio->bi_bdev->bd_disk->disk_name;
 	bix.sector_size = bi->sector_size;
@@ -505,7 +506,7 @@ static void bio_integrity_verify_fn(struct work_struct *work)
  */
 void bio_integrity_endio(struct bio *bio, int error)
 {
-	struct bio_integrity_payload *bip = bio->bi_integrity;
+	struct bio_integrity_payload *bip = bio_integrity(bio);
 
 	BUG_ON(bip->bip_bio != bio);
 
@@ -536,7 +537,7 @@ EXPORT_SYMBOL(bio_integrity_endio);
  */
 void bio_integrity_advance(struct bio *bio, unsigned int bytes_done)
 {
-	struct bio_integrity_payload *bip = bio->bi_integrity;
+	struct bio_integrity_payload *bip = bio_integrity(bio);
 	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
 	unsigned bytes = bio_integrity_bytes(bi, bytes_done >> 9);
 
@@ -558,7 +559,7 @@ EXPORT_SYMBOL(bio_integrity_advance);
 void bio_integrity_trim(struct bio *bio, unsigned int offset,
 			unsigned int sectors)
 {
-	struct bio_integrity_payload *bip = bio->bi_integrity;
+	struct bio_integrity_payload *bip = bio_integrity(bio);
 	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
 
 	bio_integrity_advance(bio, offset << 9);
@@ -577,7 +578,7 @@ EXPORT_SYMBOL(bio_integrity_trim);
 int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
 			gfp_t gfp_mask)
 {
-	struct bio_integrity_payload *bip_src = bio_src->bi_integrity;
+	struct bio_integrity_payload *bip_src = bio_integrity(bio_src);
 	struct bio_integrity_payload *bip;
 
 	BUG_ON(bip_src == NULL);
diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c
index a7a691d0af7d..29f0477a8708 100644
--- a/drivers/scsi/sd_dif.c
+++ b/drivers/scsi/sd_dif.c
@@ -383,9 +383,9 @@ void sd_dif_prepare(struct request *rq, sector_t hw_sector,
 		if (bio_flagged(bio, BIO_MAPPED_INTEGRITY))
 			break;
 
-		virt = bio->bi_integrity->bip_iter.bi_sector & 0xffffffff;
+		virt = bio_integrity(bio)->bip_iter.bi_sector & 0xffffffff;
 
-		bip_for_each_vec(iv, bio->bi_integrity, iter) {
+		bip_for_each_vec(iv, bio_integrity(bio), iter) {
 			sdt = kmap_atomic(iv.bv_page)
 				+ iv.bv_offset;
 
@@ -434,9 +434,9 @@ void sd_dif_complete(struct scsi_cmnd *scmd, unsigned int good_bytes)
 		struct bio_vec iv;
 		struct bvec_iter iter;
 
-		virt = bio->bi_integrity->bip_iter.bi_sector & 0xffffffff;
+		virt = bio_integrity(bio)->bip_iter.bi_sector & 0xffffffff;
 
-		bip_for_each_vec(iv, bio->bi_integrity, iter) {
+		bip_for_each_vec(iv, bio_integrity(bio), iter) {
 			sdt = kmap_atomic(iv.bv_page)
 				+ iv.bv_offset;
 
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 63e399b4fde5..a810a74071b2 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -293,6 +293,15 @@ static inline unsigned bio_segments(struct bio *bio)
 #define bio_get(bio)	atomic_inc(&(bio)->bi_cnt)
 
 #if defined(CONFIG_BLK_DEV_INTEGRITY)
+
+static inline struct bio_integrity_payload *bio_integrity(struct bio *bio)
+{
+	if (bio->bi_rw & REQ_INTEGRITY)
+		return bio->bi_integrity;
+
+	return NULL;
+}
+
 /*
  * bio integrity payload
  */
@@ -661,8 +670,6 @@ struct biovec_slab {
 	for_each_bio(_bio)						\
 		bip_for_each_vec(_bvl, _bio->bi_integrity, _iter)
 
-#define bio_integrity(bio) (bio->bi_integrity != NULL)
-
 extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int);
 extern void bio_integrity_free(struct bio *);
 extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int);
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index bb7d66460e7a..6a5d2f2de1b9 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -78,9 +78,11 @@ struct bio {
 	struct io_context	*bi_ioc;
 	struct cgroup_subsys_state *bi_css;
 #endif
+	union {
 #if defined(CONFIG_BLK_DEV_INTEGRITY)
-	struct bio_integrity_payload *bi_integrity;  /* data integrity */
+		struct bio_integrity_payload *bi_integrity; /* data integrity */
 #endif
+	};
 
 	unsigned short		bi_vcnt;	/* how many bio_vec's */
 
@@ -162,6 +164,7 @@ enum rq_flag_bits {
 	__REQ_WRITE_SAME,	/* write same block many times */
 
 	__REQ_NOIDLE,		/* don't anticipate more IO after this one */
+	__REQ_INTEGRITY,	/* I/O includes block integrity payload */
 	__REQ_FUA,		/* forced unit access */
 	__REQ_FLUSH,		/* request for cache flush */
 
@@ -203,13 +206,14 @@ enum rq_flag_bits {
 #define REQ_DISCARD		(1ULL << __REQ_DISCARD)
 #define REQ_WRITE_SAME		(1ULL << __REQ_WRITE_SAME)
 #define REQ_NOIDLE		(1ULL << __REQ_NOIDLE)
+#define REQ_INTEGRITY		(1ULL << __REQ_INTEGRITY)
 
 #define REQ_FAILFAST_MASK \
 	(REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER)
 #define REQ_COMMON_MASK \
 	(REQ_WRITE | REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_PRIO | \
 	 REQ_DISCARD | REQ_WRITE_SAME | REQ_NOIDLE | REQ_FLUSH | REQ_FUA | \
-	 REQ_SECURE)
+	 REQ_SECURE | REQ_INTEGRITY)
 #define REQ_CLONE_MASK		REQ_COMMON_MASK
 
 #define BIO_NO_ADVANCE_ITER_MASK	(REQ_DISCARD|REQ_WRITE_SAME)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 49f3461e4272..7fcb2caef559 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1514,12 +1514,9 @@ static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk)
 	return disk->integrity;
 }
 
-static inline int blk_integrity_rq(struct request *rq)
+static inline bool blk_integrity_rq(struct request *rq)
 {
-	if (rq->bio == NULL)
-		return 0;
-
-	return bio_integrity(rq->bio);
+	return rq->cmd_flags & REQ_INTEGRITY;
 }
 
 static inline void blk_queue_max_integrity_segments(struct request_queue *q,
-- 
cgit v1.2.3


From 8492b68bc4025e7bce1d57761bd7c047efda2f81 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Fri, 26 Sep 2014 19:19:57 -0400
Subject: block: Remove integrity tagging functions

None of the filesystems appear interested in using the integrity tagging
feature. Potentially because very few storage devices actually permit
using the application tag space.

Remove the tagging functions.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 Documentation/block/data-integrity.txt | 34 ------------
 block/bio-integrity.c                  | 94 +---------------------------------
 block/blk-integrity.c                  |  2 -
 drivers/scsi/sd_dif.c                  | 65 -----------------------
 include/linux/bio.h                    |  3 --
 include/linux/blkdev.h                 |  4 --
 6 files changed, 1 insertion(+), 201 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/block/data-integrity.txt b/Documentation/block/data-integrity.txt
index 4d4de8b09530..f56ec97f0d14 100644
--- a/Documentation/block/data-integrity.txt
+++ b/Documentation/block/data-integrity.txt
@@ -206,36 +206,6 @@ will require extra work due to the application tag.
       bio_integrity_enabled() returned 1.
 
 
-    int bio_integrity_tag_size(bio);
-
-      If the filesystem wants to use the application tag space it will
-      first have to find out how much storage space is available.
-      Because tag space is generally limited (usually 2 bytes per
-      sector regardless of sector size), the integrity framework
-      supports interleaving the information between the sectors in an
-      I/O.
-
-      Filesystems can call bio_integrity_tag_size(bio) to find out how
-      many bytes of storage are available for that particular bio.
-
-      Another option is bdev_get_tag_size(block_device) which will
-      return the number of available bytes per hardware sector.
-
-
-    int bio_integrity_set_tag(bio, void *tag_buf, len);
-
-      After a successful return from bio_integrity_prep(),
-      bio_integrity_set_tag() can be used to attach an opaque tag
-      buffer to a bio.  Obviously this only makes sense if the I/O is
-      a WRITE.
-
-
-    int bio_integrity_get_tag(bio, void *tag_buf, len);
-
-      Similarly, at READ I/O completion time the filesystem can
-      retrieve the tag buffer using bio_integrity_get_tag().
-
-
 5.3 PASSING EXISTING INTEGRITY METADATA
 
     Filesystems that either generate their own integrity metadata or
@@ -288,8 +258,6 @@ will require extra work due to the application tag.
             .name                   = "STANDARDSBODY-TYPE-VARIANT-CSUM",
             .generate_fn            = my_generate_fn,
        	    .verify_fn              = my_verify_fn,
-       	    .get_tag_fn             = my_get_tag_fn,
-       	    .set_tag_fn             = my_set_tag_fn,
 	    .tuple_size             = sizeof(struct my_tuple_size),
 	    .tag_size               = <tag bytes per hw sector>,
         };
@@ -311,7 +279,5 @@ will require extra work due to the application tag.
       are available per hardware sector.  For DIF this is either 2 or
       0 depending on the value of the Control Mode Page ATO bit.
 
-      See 6.2 for a description of get_tag_fn and set_tag_fn.
-
 ----------------------------------------------------------------------
 2007-12-24 Martin K. Petersen <martin.petersen@oracle.com>
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index bd3125c3c124..367bb24bb9f1 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -209,90 +209,6 @@ static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi,
 	return bio_integrity_hw_sectors(bi, sectors) * bi->tuple_size;
 }
 
-/**
- * bio_integrity_tag_size - Retrieve integrity tag space
- * @bio:	bio to inspect
- *
- * Description: Returns the maximum number of tag bytes that can be
- * attached to this bio. Filesystems can use this to determine how
- * much metadata to attach to an I/O.
- */
-unsigned int bio_integrity_tag_size(struct bio *bio)
-{
-	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
-
-	BUG_ON(bio->bi_iter.bi_size == 0);
-
-	return bi->tag_size * (bio->bi_iter.bi_size / bi->sector_size);
-}
-EXPORT_SYMBOL(bio_integrity_tag_size);
-
-static int bio_integrity_tag(struct bio *bio, void *tag_buf, unsigned int len,
-			     int set)
-{
-	struct bio_integrity_payload *bip = bio_integrity(bio);
-	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
-	unsigned int nr_sectors;
-
-	BUG_ON(bip->bip_buf == NULL);
-
-	if (bi->tag_size == 0)
-		return -1;
-
-	nr_sectors = bio_integrity_hw_sectors(bi,
-					DIV_ROUND_UP(len, bi->tag_size));
-
-	if (nr_sectors * bi->tuple_size > bip->bip_iter.bi_size) {
-		printk(KERN_ERR "%s: tag too big for bio: %u > %u\n", __func__,
-		       nr_sectors * bi->tuple_size, bip->bip_iter.bi_size);
-		return -1;
-	}
-
-	if (set)
-		bi->set_tag_fn(bip->bip_buf, tag_buf, nr_sectors);
-	else
-		bi->get_tag_fn(bip->bip_buf, tag_buf, nr_sectors);
-
-	return 0;
-}
-
-/**
- * bio_integrity_set_tag - Attach a tag buffer to a bio
- * @bio:	bio to attach buffer to
- * @tag_buf:	Pointer to a buffer containing tag data
- * @len:	Length of the included buffer
- *
- * Description: Use this function to tag a bio by leveraging the extra
- * space provided by devices formatted with integrity protection.  The
- * size of the integrity buffer must be <= to the size reported by
- * bio_integrity_tag_size().
- */
-int bio_integrity_set_tag(struct bio *bio, void *tag_buf, unsigned int len)
-{
-	BUG_ON(bio_data_dir(bio) != WRITE);
-
-	return bio_integrity_tag(bio, tag_buf, len, 1);
-}
-EXPORT_SYMBOL(bio_integrity_set_tag);
-
-/**
- * bio_integrity_get_tag - Retrieve a tag buffer from a bio
- * @bio:	bio to retrieve buffer from
- * @tag_buf:	Pointer to a buffer for the tag data
- * @len:	Length of the target buffer
- *
- * Description: Use this function to retrieve the tag buffer from a
- * completed I/O. The size of the integrity buffer must be <= to the
- * size reported by bio_integrity_tag_size().
- */
-int bio_integrity_get_tag(struct bio *bio, void *tag_buf, unsigned int len)
-{
-	BUG_ON(bio_data_dir(bio) != READ);
-
-	return bio_integrity_tag(bio, tag_buf, len, 0);
-}
-EXPORT_SYMBOL(bio_integrity_get_tag);
-
 /**
  * bio_integrity_generate_verify - Generate/verify integrity metadata for a bio
  * @bio:	bio to generate/verify integrity metadata for
@@ -355,14 +271,6 @@ static void bio_integrity_generate(struct bio *bio)
 	bio_integrity_generate_verify(bio, 1);
 }
 
-static inline unsigned short blk_integrity_tuple_size(struct blk_integrity *bi)
-{
-	if (bi)
-		return bi->tuple_size;
-
-	return 0;
-}
-
 /**
  * bio_integrity_prep - Prepare bio for integrity I/O
  * @bio:	bio to prepare
@@ -393,7 +301,7 @@ int bio_integrity_prep(struct bio *bio)
 	sectors = bio_integrity_hw_sectors(bi, bio_sectors(bio));
 
 	/* Allocate kernel buffer for protection data */
-	len = sectors * blk_integrity_tuple_size(bi);
+	len = sectors * bi->tuple_size;
 	buf = kmalloc(len, GFP_NOIO | q->bounce_gfp);
 	if (unlikely(buf == NULL)) {
 		printk(KERN_ERR "could not allocate integrity buffer\n");
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index 7fbab84399e6..7ac17160ab69 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -418,8 +418,6 @@ int blk_integrity_register(struct gendisk *disk, struct blk_integrity *template)
 		bi->generate_fn = template->generate_fn;
 		bi->verify_fn = template->verify_fn;
 		bi->tuple_size = template->tuple_size;
-		bi->set_tag_fn = template->set_tag_fn;
-		bi->get_tag_fn = template->get_tag_fn;
 		bi->tag_size = template->tag_size;
 	} else
 		bi->name = bi_unsupported_name;
diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c
index 29f0477a8708..38a7778631be 100644
--- a/drivers/scsi/sd_dif.c
+++ b/drivers/scsi/sd_dif.c
@@ -128,39 +128,10 @@ static int sd_dif_type1_verify_ip(struct blk_integrity_exchg *bix)
 	return sd_dif_type1_verify(bix, sd_dif_ip_fn);
 }
 
-/*
- * Functions for interleaving and deinterleaving application tags
- */
-static void sd_dif_type1_set_tag(void *prot, void *tag_buf, unsigned int sectors)
-{
-	struct sd_dif_tuple *sdt = prot;
-	u8 *tag = tag_buf;
-	unsigned int i, j;
-
-	for (i = 0, j = 0 ; i < sectors ; i++, j += 2, sdt++) {
-		sdt->app_tag = tag[j] << 8 | tag[j+1];
-		BUG_ON(sdt->app_tag == 0xffff);
-	}
-}
-
-static void sd_dif_type1_get_tag(void *prot, void *tag_buf, unsigned int sectors)
-{
-	struct sd_dif_tuple *sdt = prot;
-	u8 *tag = tag_buf;
-	unsigned int i, j;
-
-	for (i = 0, j = 0 ; i < sectors ; i++, j += 2, sdt++) {
-		tag[j] = (sdt->app_tag & 0xff00) >> 8;
-		tag[j+1] = sdt->app_tag & 0xff;
-	}
-}
-
 static struct blk_integrity dif_type1_integrity_crc = {
 	.name			= "T10-DIF-TYPE1-CRC",
 	.generate_fn		= sd_dif_type1_generate_crc,
 	.verify_fn		= sd_dif_type1_verify_crc,
-	.get_tag_fn		= sd_dif_type1_get_tag,
-	.set_tag_fn		= sd_dif_type1_set_tag,
 	.tuple_size		= sizeof(struct sd_dif_tuple),
 	.tag_size		= 0,
 };
@@ -169,8 +140,6 @@ static struct blk_integrity dif_type1_integrity_ip = {
 	.name			= "T10-DIF-TYPE1-IP",
 	.generate_fn		= sd_dif_type1_generate_ip,
 	.verify_fn		= sd_dif_type1_verify_ip,
-	.get_tag_fn		= sd_dif_type1_get_tag,
-	.set_tag_fn		= sd_dif_type1_set_tag,
 	.tuple_size		= sizeof(struct sd_dif_tuple),
 	.tag_size		= 0,
 };
@@ -245,42 +214,10 @@ static int sd_dif_type3_verify_ip(struct blk_integrity_exchg *bix)
 	return sd_dif_type3_verify(bix, sd_dif_ip_fn);
 }
 
-static void sd_dif_type3_set_tag(void *prot, void *tag_buf, unsigned int sectors)
-{
-	struct sd_dif_tuple *sdt = prot;
-	u8 *tag = tag_buf;
-	unsigned int i, j;
-
-	for (i = 0, j = 0 ; i < sectors ; i++, j += 6, sdt++) {
-		sdt->app_tag = tag[j] << 8 | tag[j+1];
-		sdt->ref_tag = tag[j+2] << 24 | tag[j+3] << 16 |
-			tag[j+4] << 8 | tag[j+5];
-	}
-}
-
-static void sd_dif_type3_get_tag(void *prot, void *tag_buf, unsigned int sectors)
-{
-	struct sd_dif_tuple *sdt = prot;
-	u8 *tag = tag_buf;
-	unsigned int i, j;
-
-	for (i = 0, j = 0 ; i < sectors ; i++, j += 2, sdt++) {
-		tag[j] = (sdt->app_tag & 0xff00) >> 8;
-		tag[j+1] = sdt->app_tag & 0xff;
-		tag[j+2] = (sdt->ref_tag & 0xff000000) >> 24;
-		tag[j+3] = (sdt->ref_tag & 0xff0000) >> 16;
-		tag[j+4] = (sdt->ref_tag & 0xff00) >> 8;
-		tag[j+5] = sdt->ref_tag & 0xff;
-		BUG_ON(sdt->app_tag == 0xffff || sdt->ref_tag == 0xffffffff);
-	}
-}
-
 static struct blk_integrity dif_type3_integrity_crc = {
 	.name			= "T10-DIF-TYPE3-CRC",
 	.generate_fn		= sd_dif_type3_generate_crc,
 	.verify_fn		= sd_dif_type3_verify_crc,
-	.get_tag_fn		= sd_dif_type3_get_tag,
-	.set_tag_fn		= sd_dif_type3_set_tag,
 	.tuple_size		= sizeof(struct sd_dif_tuple),
 	.tag_size		= 0,
 };
@@ -289,8 +226,6 @@ static struct blk_integrity dif_type3_integrity_ip = {
 	.name			= "T10-DIF-TYPE3-IP",
 	.generate_fn		= sd_dif_type3_generate_ip,
 	.verify_fn		= sd_dif_type3_verify_ip,
-	.get_tag_fn		= sd_dif_type3_get_tag,
-	.set_tag_fn		= sd_dif_type3_set_tag,
 	.tuple_size		= sizeof(struct sd_dif_tuple),
 	.tag_size		= 0,
 };
diff --git a/include/linux/bio.h b/include/linux/bio.h
index a810a74071b2..63a0e53e238c 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -362,7 +362,6 @@ extern struct bio *bio_clone_fast(struct bio *, gfp_t, struct bio_set *);
 extern struct bio *bio_clone_bioset(struct bio *, gfp_t, struct bio_set *bs);
 
 extern struct bio_set *fs_bio_set;
-unsigned int bio_integrity_tag_size(struct bio *bio);
 
 static inline struct bio *bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
 {
@@ -674,8 +673,6 @@ extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, un
 extern void bio_integrity_free(struct bio *);
 extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int);
 extern bool bio_integrity_enabled(struct bio *bio);
-extern int bio_integrity_set_tag(struct bio *, void *, unsigned int);
-extern int bio_integrity_get_tag(struct bio *, void *, unsigned int);
 extern int bio_integrity_prep(struct bio *);
 extern void bio_integrity_endio(struct bio *, int);
 extern void bio_integrity_advance(struct bio *, unsigned int);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 7fcb2caef559..0bf5d79d9ba0 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1472,14 +1472,10 @@ struct blk_integrity_exchg {
 
 typedef void (integrity_gen_fn) (struct blk_integrity_exchg *);
 typedef int (integrity_vrfy_fn) (struct blk_integrity_exchg *);
-typedef void (integrity_set_tag_fn) (void *, void *, unsigned int);
-typedef void (integrity_get_tag_fn) (void *, void *, unsigned int);
 
 struct blk_integrity {
 	integrity_gen_fn	*generate_fn;
 	integrity_vrfy_fn	*verify_fn;
-	integrity_set_tag_fn	*set_tag_fn;
-	integrity_get_tag_fn	*get_tag_fn;
 
 	unsigned short		flags;
 	unsigned short		tuple_size;
-- 
cgit v1.2.3


From 5f9378fa9ca214977b5bfc12197c67eea450fc40 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Fri, 26 Sep 2014 19:19:58 -0400
Subject: block: Remove bip_buf

bip_buf is not really needed so we can remove it.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/bio-integrity.c | 10 ++++++----
 include/linux/bio.h   |  3 ---
 2 files changed, 6 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 367bb24bb9f1..e84f7fb8694b 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -101,7 +101,8 @@ void bio_integrity_free(struct bio *bio)
 	struct bio_set *bs = bio->bi_pool;
 
 	if (bip->bip_owns_buf)
-		kfree(bip->bip_buf);
+		kfree(page_address(bip->bip_vec->bv_page) +
+		      bip->bip_vec->bv_offset);
 
 	if (bs) {
 		if (bip->bip_slab != BIO_POOL_NONE)
@@ -219,14 +220,16 @@ static int bio_integrity_generate_verify(struct bio *bio, int operate)
 	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
 	struct blk_integrity_exchg bix;
 	struct bio_vec *bv;
+	struct bio_integrity_payload *bip = bio_integrity(bio);
 	sector_t sector;
 	unsigned int sectors, ret = 0, i;
-	void *prot_buf = bio_integrity(bio)->bip_buf;
+	void *prot_buf = page_address(bip->bip_vec->bv_page) +
+		bip->bip_vec->bv_offset;
 
 	if (operate)
 		sector = bio->bi_iter.bi_sector;
 	else
-		sector = bio_integrity(bio)->bip_iter.bi_sector;
+		sector = bip->bip_iter.bi_sector;
 
 	bix.disk_name = bio->bi_bdev->bd_disk->disk_name;
 	bix.sector_size = bi->sector_size;
@@ -321,7 +324,6 @@ int bio_integrity_prep(struct bio *bio)
 	}
 
 	bip->bip_owns_buf = 1;
-	bip->bip_buf = buf;
 	bip->bip_iter.bi_size = len;
 	bip->bip_iter.bi_sector = bio->bi_iter.bi_sector;
 
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 63a0e53e238c..448d8c052cb7 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -310,9 +310,6 @@ struct bio_integrity_payload {
 
 	struct bvec_iter	bip_iter;
 
-	/* kill - should just use bip_vec */
-	void			*bip_buf;	/* generated integrity data */
-
 	bio_end_io_t		*bip_end_io;	/* saved I/O completion fn */
 
 	unsigned short		bip_slab;	/* slab the bip came from */
-- 
cgit v1.2.3


From 3be91c4a3d090bd700bd6ee5bf457c1bbf189a4f Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Fri, 26 Sep 2014 19:19:59 -0400
Subject: block: Deprecate the use of the term sector in the context of block
 integrity

The protection interval is not necessarily tied to the logical block
size of a block device. Stop using the terms "sector" and "sectors".

Going forward we will use the term "seed" to describe the initial
reference tag value for a given I/O. "Interval" will be used to describe
the portion of the data buffer that a given piece of protection
information is associated with.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/bio-integrity.c  | 42 +++++++++++++++++++++---------------------
 block/blk-integrity.c  | 10 +++++-----
 drivers/scsi/sd_dif.c  | 46 +++++++++++++++++++++++-----------------------
 include/linux/blkdev.h |  6 +++---
 4 files changed, 52 insertions(+), 52 deletions(-)

(limited to 'include/linux')

diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index e84f7fb8694b..6a3aacf57b19 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -185,20 +185,20 @@ bool bio_integrity_enabled(struct bio *bio)
 EXPORT_SYMBOL(bio_integrity_enabled);
 
 /**
- * bio_integrity_hw_sectors - Convert 512b sectors to hardware ditto
+ * bio_integrity_intervals - Return number of integrity intervals for a bio
  * @bi:		blk_integrity profile for device
- * @sectors:	Number of 512 sectors to convert
+ * @sectors:	Size of the bio in 512-byte sectors
  *
  * Description: The block layer calculates everything in 512 byte
- * sectors but integrity metadata is done in terms of the hardware
- * sector size of the storage device.  Convert the block layer sectors
- * to physical sectors.
+ * sectors but integrity metadata is done in terms of the data integrity
+ * interval size of the storage device.  Convert the block layer sectors
+ * to the appropriate number of integrity intervals.
  */
-static inline unsigned int bio_integrity_hw_sectors(struct blk_integrity *bi,
-						    unsigned int sectors)
+static inline unsigned int bio_integrity_intervals(struct blk_integrity *bi,
+						   unsigned int sectors)
 {
 	/* At this point there are only 512b or 4096b DIF/EPP devices */
-	if (bi->sector_size == 4096)
+	if (bi->interval == 4096)
 		return sectors >>= 3;
 
 	return sectors;
@@ -207,7 +207,7 @@ static inline unsigned int bio_integrity_hw_sectors(struct blk_integrity *bi,
 static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi,
 					       unsigned int sectors)
 {
-	return bio_integrity_hw_sectors(bi, sectors) * bi->tuple_size;
+	return bio_integrity_intervals(bi, sectors) * bi->tuple_size;
 }
 
 /**
@@ -221,25 +221,25 @@ static int bio_integrity_generate_verify(struct bio *bio, int operate)
 	struct blk_integrity_exchg bix;
 	struct bio_vec *bv;
 	struct bio_integrity_payload *bip = bio_integrity(bio);
-	sector_t sector;
-	unsigned int sectors, ret = 0, i;
+	sector_t seed;
+	unsigned int intervals, ret = 0, i;
 	void *prot_buf = page_address(bip->bip_vec->bv_page) +
 		bip->bip_vec->bv_offset;
 
 	if (operate)
-		sector = bio->bi_iter.bi_sector;
+		seed = bio->bi_iter.bi_sector;
 	else
-		sector = bip->bip_iter.bi_sector;
+		seed = bip->bip_iter.bi_sector;
 
 	bix.disk_name = bio->bi_bdev->bd_disk->disk_name;
-	bix.sector_size = bi->sector_size;
+	bix.interval = bi->interval;
 
 	bio_for_each_segment_all(bv, bio, i) {
 		void *kaddr = kmap_atomic(bv->bv_page);
 		bix.data_buf = kaddr + bv->bv_offset;
 		bix.data_size = bv->bv_len;
 		bix.prot_buf = prot_buf;
-		bix.sector = sector;
+		bix.seed = seed;
 
 		if (operate)
 			bi->generate_fn(&bix);
@@ -251,9 +251,9 @@ static int bio_integrity_generate_verify(struct bio *bio, int operate)
 			}
 		}
 
-		sectors = bv->bv_len / bi->sector_size;
-		sector += sectors;
-		prot_buf += sectors * bi->tuple_size;
+		intervals = bv->bv_len / bi->interval;
+		seed += intervals;
+		prot_buf += intervals * bi->tuple_size;
 
 		kunmap_atomic(kaddr);
 	}
@@ -294,17 +294,17 @@ int bio_integrity_prep(struct bio *bio)
 	unsigned long start, end;
 	unsigned int len, nr_pages;
 	unsigned int bytes, offset, i;
-	unsigned int sectors;
+	unsigned int intervals;
 
 	bi = bdev_get_integrity(bio->bi_bdev);
 	q = bdev_get_queue(bio->bi_bdev);
 	BUG_ON(bi == NULL);
 	BUG_ON(bio_integrity(bio));
 
-	sectors = bio_integrity_hw_sectors(bi, bio_sectors(bio));
+	intervals = bio_integrity_intervals(bi, bio_sectors(bio));
 
 	/* Allocate kernel buffer for protection data */
-	len = sectors * bi->tuple_size;
+	len = intervals * bi->tuple_size;
 	buf = kmalloc(len, GFP_NOIO | q->bounce_gfp);
 	if (unlikely(buf == NULL)) {
 		printk(KERN_ERR "could not allocate integrity buffer\n");
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index 7ac17160ab69..3a83a7d08177 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -154,10 +154,10 @@ int blk_integrity_compare(struct gendisk *gd1, struct gendisk *gd2)
 	if (!b1 || !b2)
 		return -1;
 
-	if (b1->sector_size != b2->sector_size) {
-		printk(KERN_ERR "%s: %s/%s sector sz %u != %u\n", __func__,
-		       gd1->disk_name, gd2->disk_name,
-		       b1->sector_size, b2->sector_size);
+	if (b1->interval != b2->interval) {
+		pr_err("%s: %s/%s protection interval %u != %u\n",
+		       __func__, gd1->disk_name, gd2->disk_name,
+		       b1->interval, b2->interval);
 		return -1;
 	}
 
@@ -407,7 +407,7 @@ int blk_integrity_register(struct gendisk *disk, struct blk_integrity *template)
 		kobject_uevent(&bi->kobj, KOBJ_ADD);
 
 		bi->flags |= INTEGRITY_FLAG_READ | INTEGRITY_FLAG_WRITE;
-		bi->sector_size = queue_logical_block_size(disk->queue);
+		bi->interval = queue_logical_block_size(disk->queue);
 		disk->integrity = bi;
 	} else
 		bi = disk->integrity;
diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c
index 38a7778631be..1600270a46e5 100644
--- a/drivers/scsi/sd_dif.c
+++ b/drivers/scsi/sd_dif.c
@@ -57,16 +57,16 @@ static void sd_dif_type1_generate(struct blk_integrity_exchg *bix, csum_fn *fn)
 {
 	void *buf = bix->data_buf;
 	struct sd_dif_tuple *sdt = bix->prot_buf;
-	sector_t sector = bix->sector;
+	sector_t seed = bix->seed;
 	unsigned int i;
 
-	for (i = 0 ; i < bix->data_size ; i += bix->sector_size, sdt++) {
-		sdt->guard_tag = fn(buf, bix->sector_size);
-		sdt->ref_tag = cpu_to_be32(sector & 0xffffffff);
+	for (i = 0 ; i < bix->data_size ; i += bix->interval, sdt++) {
+		sdt->guard_tag = fn(buf, bix->interval);
+		sdt->ref_tag = cpu_to_be32(seed & 0xffffffff);
 		sdt->app_tag = 0;
 
-		buf += bix->sector_size;
-		sector++;
+		buf += bix->interval;
+		seed++;
 	}
 }
 
@@ -84,35 +84,35 @@ static int sd_dif_type1_verify(struct blk_integrity_exchg *bix, csum_fn *fn)
 {
 	void *buf = bix->data_buf;
 	struct sd_dif_tuple *sdt = bix->prot_buf;
-	sector_t sector = bix->sector;
+	sector_t seed = bix->seed;
 	unsigned int i;
 	__u16 csum;
 
-	for (i = 0 ; i < bix->data_size ; i += bix->sector_size, sdt++) {
+	for (i = 0 ; i < bix->data_size ; i += bix->interval, sdt++) {
 		/* Unwritten sectors */
 		if (sdt->app_tag == 0xffff)
 			return 0;
 
-		if (be32_to_cpu(sdt->ref_tag) != (sector & 0xffffffff)) {
+		if (be32_to_cpu(sdt->ref_tag) != (seed & 0xffffffff)) {
 			printk(KERN_ERR
 			       "%s: ref tag error on sector %lu (rcvd %u)\n",
-			       bix->disk_name, (unsigned long)sector,
+			       bix->disk_name, (unsigned long)seed,
 			       be32_to_cpu(sdt->ref_tag));
 			return -EIO;
 		}
 
-		csum = fn(buf, bix->sector_size);
+		csum = fn(buf, bix->interval);
 
 		if (sdt->guard_tag != csum) {
 			printk(KERN_ERR "%s: guard tag error on sector %lu " \
 			       "(rcvd %04x, data %04x)\n", bix->disk_name,
-			       (unsigned long)sector,
+			       (unsigned long)seed,
 			       be16_to_cpu(sdt->guard_tag), be16_to_cpu(csum));
 			return -EIO;
 		}
 
-		buf += bix->sector_size;
-		sector++;
+		buf += bix->interval;
+		seed++;
 	}
 
 	return 0;
@@ -155,12 +155,12 @@ static void sd_dif_type3_generate(struct blk_integrity_exchg *bix, csum_fn *fn)
 	struct sd_dif_tuple *sdt = bix->prot_buf;
 	unsigned int i;
 
-	for (i = 0 ; i < bix->data_size ; i += bix->sector_size, sdt++) {
-		sdt->guard_tag = fn(buf, bix->sector_size);
+	for (i = 0 ; i < bix->data_size ; i += bix->interval, sdt++) {
+		sdt->guard_tag = fn(buf, bix->interval);
 		sdt->ref_tag = 0;
 		sdt->app_tag = 0;
 
-		buf += bix->sector_size;
+		buf += bix->interval;
 	}
 }
 
@@ -178,27 +178,27 @@ static int sd_dif_type3_verify(struct blk_integrity_exchg *bix, csum_fn *fn)
 {
 	void *buf = bix->data_buf;
 	struct sd_dif_tuple *sdt = bix->prot_buf;
-	sector_t sector = bix->sector;
+	sector_t seed = bix->seed;
 	unsigned int i;
 	__u16 csum;
 
-	for (i = 0 ; i < bix->data_size ; i += bix->sector_size, sdt++) {
+	for (i = 0 ; i < bix->data_size ; i += bix->interval, sdt++) {
 		/* Unwritten sectors */
 		if (sdt->app_tag == 0xffff && sdt->ref_tag == 0xffffffff)
 			return 0;
 
-		csum = fn(buf, bix->sector_size);
+		csum = fn(buf, bix->interval);
 
 		if (sdt->guard_tag != csum) {
 			printk(KERN_ERR "%s: guard tag error on sector %lu " \
 			       "(rcvd %04x, data %04x)\n", bix->disk_name,
-			       (unsigned long)sector,
+			       (unsigned long)seed,
 			       be16_to_cpu(sdt->guard_tag), be16_to_cpu(csum));
 			return -EIO;
 		}
 
-		buf += bix->sector_size;
-		sector++;
+		buf += bix->interval;
+		seed++;
 	}
 
 	return 0;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 0bf5d79d9ba0..d364c42dbf17 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1464,9 +1464,9 @@ static inline uint64_t rq_io_start_time_ns(struct request *req)
 struct blk_integrity_exchg {
 	void			*prot_buf;
 	void			*data_buf;
-	sector_t		sector;
+	sector_t		seed;
 	unsigned int		data_size;
-	unsigned short		sector_size;
+	unsigned short		interval;
 	const char		*disk_name;
 };
 
@@ -1479,7 +1479,7 @@ struct blk_integrity {
 
 	unsigned short		flags;
 	unsigned short		tuple_size;
-	unsigned short		sector_size;
+	unsigned short		interval;
 	unsigned short		tag_size;
 
 	const char		*name;
-- 
cgit v1.2.3


From 1859308853b19c4daf4afaab910d3d52ac1ec2ff Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Fri, 26 Sep 2014 19:20:01 -0400
Subject: block: Clean up the code used to generate and verify integrity
 metadata

Instead of the "operate" parameter we pass in a seed value and a pointer
to a function that can be used to process the integrity metadata. The
generation function is changed to have a return value to fit into this
scheme.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/bio-integrity.c  |  82 ++++++++++----------------------------
 drivers/scsi/sd_dif.c  | 106 ++++++++++++++++++++++++++-----------------------
 include/linux/bio.h    |  12 ++++++
 include/linux/blkdev.h |   9 ++---
 4 files changed, 94 insertions(+), 115 deletions(-)

(limited to 'include/linux')

diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index cf40837e7710..fe4de033b34c 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -207,69 +207,43 @@ static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi,
 }
 
 /**
- * bio_integrity_generate_verify - Generate/verify integrity metadata for a bio
+ * bio_integrity_process - Process integrity metadata for a bio
  * @bio:	bio to generate/verify integrity metadata for
- * @operate:	operate number, 1 for generate, 0 for verify
+ * @proc_fn:	Pointer to the relevant processing function
  */
-static int bio_integrity_generate_verify(struct bio *bio, int operate)
+static int bio_integrity_process(struct bio *bio,
+				 integrity_processing_fn *proc_fn)
 {
 	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
-	struct blk_integrity_exchg bix;
+	struct blk_integrity_iter iter;
 	struct bio_vec *bv;
 	struct bio_integrity_payload *bip = bio_integrity(bio);
-	sector_t seed;
-	unsigned int intervals, ret = 0, i;
+	unsigned int i, ret = 0;
 	void *prot_buf = page_address(bip->bip_vec->bv_page) +
 		bip->bip_vec->bv_offset;
 
-	if (operate)
-		seed = bio->bi_iter.bi_sector;
-	else
-		seed = bip->bip_iter.bi_sector;
-
-	bix.disk_name = bio->bi_bdev->bd_disk->disk_name;
-	bix.interval = bi->interval;
+	iter.disk_name = bio->bi_bdev->bd_disk->disk_name;
+	iter.interval = bi->interval;
+	iter.seed = bip_get_seed(bip);
+	iter.prot_buf = prot_buf;
 
 	bio_for_each_segment_all(bv, bio, i) {
 		void *kaddr = kmap_atomic(bv->bv_page);
-		bix.data_buf = kaddr + bv->bv_offset;
-		bix.data_size = bv->bv_len;
-		bix.prot_buf = prot_buf;
-		bix.seed = seed;
-
-		if (operate)
-			bi->generate_fn(&bix);
-		else {
-			ret = bi->verify_fn(&bix);
-			if (ret) {
-				kunmap_atomic(kaddr);
-				return ret;
-			}
-		}
 
-		intervals = bv->bv_len / bi->interval;
-		seed += intervals;
-		prot_buf += intervals * bi->tuple_size;
+		iter.data_buf = kaddr + bv->bv_offset;
+		iter.data_size = bv->bv_len;
+
+		ret = proc_fn(&iter);
+		if (ret) {
+			kunmap_atomic(kaddr);
+			return ret;
+		}
 
 		kunmap_atomic(kaddr);
 	}
 	return ret;
 }
 
-/**
- * bio_integrity_generate - Generate integrity metadata for a bio
- * @bio:	bio to generate integrity metadata for
- *
- * Description: Generates integrity metadata for a bio by calling the
- * block device's generation callback function.  The bio must have a
- * bip attached with enough room to accommodate the generated
- * integrity metadata.
- */
-static void bio_integrity_generate(struct bio *bio)
-{
-	bio_integrity_generate_verify(bio, 1);
-}
-
 /**
  * bio_integrity_prep - Prepare bio for integrity I/O
  * @bio:	bio to prepare
@@ -321,7 +295,7 @@ int bio_integrity_prep(struct bio *bio)
 
 	bip->bip_owns_buf = 1;
 	bip->bip_iter.bi_size = len;
-	bip->bip_iter.bi_sector = bio->bi_iter.bi_sector;
+	bip_set_seed(bip, bio->bi_iter.bi_sector);
 
 	/* Map it */
 	offset = offset_in_page(buf);
@@ -357,25 +331,12 @@ int bio_integrity_prep(struct bio *bio)
 
 	/* Auto-generate integrity metadata if this is a write */
 	if (bio_data_dir(bio) == WRITE)
-		bio_integrity_generate(bio);
+		bio_integrity_process(bio, bi->generate_fn);
 
 	return 0;
 }
 EXPORT_SYMBOL(bio_integrity_prep);
 
-/**
- * bio_integrity_verify - Verify integrity metadata for a bio
- * @bio:	bio to verify
- *
- * Description: This function is called to verify the integrity of a
- * bio.	 The data in the bio io_vec is compared to the integrity
- * metadata returned by the HBA.
- */
-static int bio_integrity_verify(struct bio *bio)
-{
-	return bio_integrity_generate_verify(bio, 0);
-}
-
 /**
  * bio_integrity_verify_fn - Integrity I/O completion worker
  * @work:	Work struct stored in bio to be verified
@@ -389,9 +350,10 @@ static void bio_integrity_verify_fn(struct work_struct *work)
 	struct bio_integrity_payload *bip =
 		container_of(work, struct bio_integrity_payload, bip_work);
 	struct bio *bio = bip->bip_bio;
+	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
 	int error;
 
-	error = bio_integrity_verify(bio);
+	error = bio_integrity_process(bio, bi->verify_fn);
 
 	/* Restore original bio completion handler */
 	bio->bi_end_io = bip->bip_end_io;
diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c
index 1600270a46e5..801c41851a01 100644
--- a/drivers/scsi/sd_dif.c
+++ b/drivers/scsi/sd_dif.c
@@ -53,42 +53,44 @@ static __u16 sd_dif_ip_fn(void *data, unsigned int len)
  * Type 1 and Type 2 protection use the same format: 16 bit guard tag,
  * 16 bit app tag, 32 bit reference tag.
  */
-static void sd_dif_type1_generate(struct blk_integrity_exchg *bix, csum_fn *fn)
+static void sd_dif_type1_generate(struct blk_integrity_iter *iter, csum_fn *fn)
 {
-	void *buf = bix->data_buf;
-	struct sd_dif_tuple *sdt = bix->prot_buf;
-	sector_t seed = bix->seed;
+	void *buf = iter->data_buf;
+	struct sd_dif_tuple *sdt = iter->prot_buf;
+	sector_t seed = iter->seed;
 	unsigned int i;
 
-	for (i = 0 ; i < bix->data_size ; i += bix->interval, sdt++) {
-		sdt->guard_tag = fn(buf, bix->interval);
+	for (i = 0 ; i < iter->data_size ; i += iter->interval, sdt++) {
+		sdt->guard_tag = fn(buf, iter->interval);
 		sdt->ref_tag = cpu_to_be32(seed & 0xffffffff);
 		sdt->app_tag = 0;
 
-		buf += bix->interval;
+		buf += iter->interval;
 		seed++;
 	}
 }
 
-static void sd_dif_type1_generate_crc(struct blk_integrity_exchg *bix)
+static int sd_dif_type1_generate_crc(struct blk_integrity_iter *iter)
 {
-	sd_dif_type1_generate(bix, sd_dif_crc_fn);
+	sd_dif_type1_generate(iter, sd_dif_crc_fn);
+	return 0;
 }
 
-static void sd_dif_type1_generate_ip(struct blk_integrity_exchg *bix)
+static int sd_dif_type1_generate_ip(struct blk_integrity_iter *iter)
 {
-	sd_dif_type1_generate(bix, sd_dif_ip_fn);
+	sd_dif_type1_generate(iter, sd_dif_ip_fn);
+	return 0;
 }
 
-static int sd_dif_type1_verify(struct blk_integrity_exchg *bix, csum_fn *fn)
+static int sd_dif_type1_verify(struct blk_integrity_iter *iter, csum_fn *fn)
 {
-	void *buf = bix->data_buf;
-	struct sd_dif_tuple *sdt = bix->prot_buf;
-	sector_t seed = bix->seed;
+	void *buf = iter->data_buf;
+	struct sd_dif_tuple *sdt = iter->prot_buf;
+	sector_t seed = iter->seed;
 	unsigned int i;
 	__u16 csum;
 
-	for (i = 0 ; i < bix->data_size ; i += bix->interval, sdt++) {
+	for (i = 0 ; i < iter->data_size ; i += iter->interval, sdt++) {
 		/* Unwritten sectors */
 		if (sdt->app_tag == 0xffff)
 			return 0;
@@ -96,36 +98,36 @@ static int sd_dif_type1_verify(struct blk_integrity_exchg *bix, csum_fn *fn)
 		if (be32_to_cpu(sdt->ref_tag) != (seed & 0xffffffff)) {
 			printk(KERN_ERR
 			       "%s: ref tag error on sector %lu (rcvd %u)\n",
-			       bix->disk_name, (unsigned long)seed,
+			       iter->disk_name, (unsigned long)seed,
 			       be32_to_cpu(sdt->ref_tag));
 			return -EIO;
 		}
 
-		csum = fn(buf, bix->interval);
+		csum = fn(buf, iter->interval);
 
 		if (sdt->guard_tag != csum) {
 			printk(KERN_ERR "%s: guard tag error on sector %lu " \
-			       "(rcvd %04x, data %04x)\n", bix->disk_name,
+			       "(rcvd %04x, data %04x)\n", iter->disk_name,
 			       (unsigned long)seed,
 			       be16_to_cpu(sdt->guard_tag), be16_to_cpu(csum));
 			return -EIO;
 		}
 
-		buf += bix->interval;
+		buf += iter->interval;
 		seed++;
 	}
 
 	return 0;
 }
 
-static int sd_dif_type1_verify_crc(struct blk_integrity_exchg *bix)
+static int sd_dif_type1_verify_crc(struct blk_integrity_iter *iter)
 {
-	return sd_dif_type1_verify(bix, sd_dif_crc_fn);
+	return sd_dif_type1_verify(iter, sd_dif_crc_fn);
 }
 
-static int sd_dif_type1_verify_ip(struct blk_integrity_exchg *bix)
+static int sd_dif_type1_verify_ip(struct blk_integrity_iter *iter)
 {
-	return sd_dif_type1_verify(bix, sd_dif_ip_fn);
+	return sd_dif_type1_verify(iter, sd_dif_ip_fn);
 }
 
 static struct blk_integrity dif_type1_integrity_crc = {
@@ -149,69 +151,71 @@ static struct blk_integrity dif_type1_integrity_ip = {
  * Type 3 protection has a 16-bit guard tag and 16 + 32 bits of opaque
  * tag space.
  */
-static void sd_dif_type3_generate(struct blk_integrity_exchg *bix, csum_fn *fn)
+static void sd_dif_type3_generate(struct blk_integrity_iter *iter, csum_fn *fn)
 {
-	void *buf = bix->data_buf;
-	struct sd_dif_tuple *sdt = bix->prot_buf;
+	void *buf = iter->data_buf;
+	struct sd_dif_tuple *sdt = iter->prot_buf;
 	unsigned int i;
 
-	for (i = 0 ; i < bix->data_size ; i += bix->interval, sdt++) {
-		sdt->guard_tag = fn(buf, bix->interval);
+	for (i = 0 ; i < iter->data_size ; i += iter->interval, sdt++) {
+		sdt->guard_tag = fn(buf, iter->interval);
 		sdt->ref_tag = 0;
 		sdt->app_tag = 0;
 
-		buf += bix->interval;
+		buf += iter->interval;
 	}
 }
 
-static void sd_dif_type3_generate_crc(struct blk_integrity_exchg *bix)
+static int sd_dif_type3_generate_crc(struct blk_integrity_iter *iter)
 {
-	sd_dif_type3_generate(bix, sd_dif_crc_fn);
+	sd_dif_type3_generate(iter, sd_dif_crc_fn);
+	return 0;
 }
 
-static void sd_dif_type3_generate_ip(struct blk_integrity_exchg *bix)
+static int sd_dif_type3_generate_ip(struct blk_integrity_iter *iter)
 {
-	sd_dif_type3_generate(bix, sd_dif_ip_fn);
+	sd_dif_type3_generate(iter, sd_dif_ip_fn);
+	return 0;
 }
 
-static int sd_dif_type3_verify(struct blk_integrity_exchg *bix, csum_fn *fn)
+static int sd_dif_type3_verify(struct blk_integrity_iter *iter, csum_fn *fn)
 {
-	void *buf = bix->data_buf;
-	struct sd_dif_tuple *sdt = bix->prot_buf;
-	sector_t seed = bix->seed;
+	void *buf = iter->data_buf;
+	struct sd_dif_tuple *sdt = iter->prot_buf;
+	sector_t seed = iter->seed;
 	unsigned int i;
 	__u16 csum;
 
-	for (i = 0 ; i < bix->data_size ; i += bix->interval, sdt++) {
+	for (i = 0 ; i < iter->data_size ; i += iter->interval, sdt++) {
 		/* Unwritten sectors */
 		if (sdt->app_tag == 0xffff && sdt->ref_tag == 0xffffffff)
 			return 0;
 
-		csum = fn(buf, bix->interval);
+		csum = fn(buf, iter->interval);
 
 		if (sdt->guard_tag != csum) {
 			printk(KERN_ERR "%s: guard tag error on sector %lu " \
-			       "(rcvd %04x, data %04x)\n", bix->disk_name,
+			       "(rcvd %04x, data %04x)\n", iter->disk_name,
 			       (unsigned long)seed,
 			       be16_to_cpu(sdt->guard_tag), be16_to_cpu(csum));
 			return -EIO;
 		}
 
-		buf += bix->interval;
+		buf += iter->interval;
 		seed++;
 	}
 
 	return 0;
 }
 
-static int sd_dif_type3_verify_crc(struct blk_integrity_exchg *bix)
+static int sd_dif_type3_verify_crc(struct blk_integrity_iter *iter)
 {
-	return sd_dif_type3_verify(bix, sd_dif_crc_fn);
+	return sd_dif_type3_verify(iter, sd_dif_crc_fn);
 }
 
-static int sd_dif_type3_verify_ip(struct blk_integrity_exchg *bix)
+static int sd_dif_type3_verify_ip(struct blk_integrity_iter *iter)
 {
-	return sd_dif_type3_verify(bix, sd_dif_ip_fn);
+	return sd_dif_type3_verify(iter, sd_dif_ip_fn);
 }
 
 static struct blk_integrity dif_type3_integrity_crc = {
@@ -310,6 +314,7 @@ void sd_dif_prepare(struct request *rq, sector_t hw_sector,
 	phys = hw_sector & 0xffffffff;
 
 	__rq_for_each_bio(bio, rq) {
+		struct bio_integrity_payload *bip = bio_integrity(bio);
 		struct bio_vec iv;
 		struct bvec_iter iter;
 		unsigned int j;
@@ -318,9 +323,9 @@ void sd_dif_prepare(struct request *rq, sector_t hw_sector,
 		if (bio_flagged(bio, BIO_MAPPED_INTEGRITY))
 			break;
 
-		virt = bio_integrity(bio)->bip_iter.bi_sector & 0xffffffff;
+		virt = bip_get_seed(bip) & 0xffffffff;
 
-		bip_for_each_vec(iv, bio_integrity(bio), iter) {
+		bip_for_each_vec(iv, bip, iter) {
 			sdt = kmap_atomic(iv.bv_page)
 				+ iv.bv_offset;
 
@@ -366,12 +371,13 @@ void sd_dif_complete(struct scsi_cmnd *scmd, unsigned int good_bytes)
 		phys >>= 3;
 
 	__rq_for_each_bio(bio, scmd->request) {
+		struct bio_integrity_payload *bip = bio_integrity(bio);
 		struct bio_vec iv;
 		struct bvec_iter iter;
 
-		virt = bio_integrity(bio)->bip_iter.bi_sector & 0xffffffff;
+		virt = bip_get_seed(bip) & 0xffffffff;
 
-		bip_for_each_vec(iv, bio_integrity(bio), iter) {
+		bip_for_each_vec(iv, bip, iter) {
 			sdt = kmap_atomic(iv.bv_page)
 				+ iv.bv_offset;
 
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 448d8c052cb7..3fd36660fd10 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -322,6 +322,18 @@ struct bio_integrity_payload {
 	struct bio_vec		*bip_vec;
 	struct bio_vec		bip_inline_vecs[0];/* embedded bvec array */
 };
+
+static inline sector_t bip_get_seed(struct bio_integrity_payload *bip)
+{
+	return bip->bip_iter.bi_sector;
+}
+
+static inline void bip_set_seed(struct bio_integrity_payload *bip,
+				sector_t seed)
+{
+	bip->bip_iter.bi_sector = seed;
+}
+
 #endif /* CONFIG_BLK_DEV_INTEGRITY */
 
 extern void bio_trim(struct bio *bio, int offset, int size);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index d364c42dbf17..24c1e055b8a7 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1461,7 +1461,7 @@ static inline uint64_t rq_io_start_time_ns(struct request *req)
 #define INTEGRITY_FLAG_READ	2	/* verify data integrity on read */
 #define INTEGRITY_FLAG_WRITE	4	/* generate data integrity on write */
 
-struct blk_integrity_exchg {
+struct blk_integrity_iter {
 	void			*prot_buf;
 	void			*data_buf;
 	sector_t		seed;
@@ -1470,12 +1470,11 @@ struct blk_integrity_exchg {
 	const char		*disk_name;
 };
 
-typedef void (integrity_gen_fn) (struct blk_integrity_exchg *);
-typedef int (integrity_vrfy_fn) (struct blk_integrity_exchg *);
+typedef int (integrity_processing_fn) (struct blk_integrity_iter *);
 
 struct blk_integrity {
-	integrity_gen_fn	*generate_fn;
-	integrity_vrfy_fn	*verify_fn;
+	integrity_processing_fn	*generate_fn;
+	integrity_processing_fn	*verify_fn;
 
 	unsigned short		flags;
 	unsigned short		tuple_size;
-- 
cgit v1.2.3


From 8288f496eb1b1905c425e92eaf1abbb29119217b Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Fri, 26 Sep 2014 19:20:02 -0400
Subject: block: Add prefix to block integrity profile flags

Add a BLK_ prefix to the integrity profile flags. Also rename the flags
to be more consistent with the generate/verify terminology in the rest
of the integrity code.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/bio-integrity.c  |  4 ++--
 block/blk-integrity.c  | 43 ++++++++++++++++++++++---------------------
 include/linux/blkdev.h |  6 ++++--
 3 files changed, 28 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index fe4de033b34c..e64733bb29b6 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -173,11 +173,11 @@ bool bio_integrity_enabled(struct bio *bio)
 		return false;
 
 	if (bio_data_dir(bio) == READ && bi->verify_fn != NULL &&
-	    (bi->flags & INTEGRITY_FLAG_READ))
+	    (bi->flags & BLK_INTEGRITY_VERIFY))
 		return true;
 
 	if (bio_data_dir(bio) == WRITE && bi->generate_fn != NULL &&
-	    (bi->flags & INTEGRITY_FLAG_WRITE))
+	    (bi->flags & BLK_INTEGRITY_GENERATE))
 		return true;
 
 	return false;
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index 3a83a7d08177..a7436ccc936b 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -269,42 +269,42 @@ static ssize_t integrity_tag_size_show(struct blk_integrity *bi, char *page)
 		return sprintf(page, "0\n");
 }
 
-static ssize_t integrity_read_store(struct blk_integrity *bi,
-				    const char *page, size_t count)
+static ssize_t integrity_verify_store(struct blk_integrity *bi,
+				      const char *page, size_t count)
 {
 	char *p = (char *) page;
 	unsigned long val = simple_strtoul(p, &p, 10);
 
 	if (val)
-		bi->flags |= INTEGRITY_FLAG_READ;
+		bi->flags |= BLK_INTEGRITY_VERIFY;
 	else
-		bi->flags &= ~INTEGRITY_FLAG_READ;
+		bi->flags &= ~BLK_INTEGRITY_VERIFY;
 
 	return count;
 }
 
-static ssize_t integrity_read_show(struct blk_integrity *bi, char *page)
+static ssize_t integrity_verify_show(struct blk_integrity *bi, char *page)
 {
-	return sprintf(page, "%d\n", (bi->flags & INTEGRITY_FLAG_READ) != 0);
+	return sprintf(page, "%d\n", (bi->flags & BLK_INTEGRITY_VERIFY) != 0);
 }
 
-static ssize_t integrity_write_store(struct blk_integrity *bi,
-				     const char *page, size_t count)
+static ssize_t integrity_generate_store(struct blk_integrity *bi,
+					const char *page, size_t count)
 {
 	char *p = (char *) page;
 	unsigned long val = simple_strtoul(p, &p, 10);
 
 	if (val)
-		bi->flags |= INTEGRITY_FLAG_WRITE;
+		bi->flags |= BLK_INTEGRITY_GENERATE;
 	else
-		bi->flags &= ~INTEGRITY_FLAG_WRITE;
+		bi->flags &= ~BLK_INTEGRITY_GENERATE;
 
 	return count;
 }
 
-static ssize_t integrity_write_show(struct blk_integrity *bi, char *page)
+static ssize_t integrity_generate_show(struct blk_integrity *bi, char *page)
 {
-	return sprintf(page, "%d\n", (bi->flags & INTEGRITY_FLAG_WRITE) != 0);
+	return sprintf(page, "%d\n", (bi->flags & BLK_INTEGRITY_GENERATE) != 0);
 }
 
 static struct integrity_sysfs_entry integrity_format_entry = {
@@ -317,23 +317,23 @@ static struct integrity_sysfs_entry integrity_tag_size_entry = {
 	.show = integrity_tag_size_show,
 };
 
-static struct integrity_sysfs_entry integrity_read_entry = {
+static struct integrity_sysfs_entry integrity_verify_entry = {
 	.attr = { .name = "read_verify", .mode = S_IRUGO | S_IWUSR },
-	.show = integrity_read_show,
-	.store = integrity_read_store,
+	.show = integrity_verify_show,
+	.store = integrity_verify_store,
 };
 
-static struct integrity_sysfs_entry integrity_write_entry = {
+static struct integrity_sysfs_entry integrity_generate_entry = {
 	.attr = { .name = "write_generate", .mode = S_IRUGO | S_IWUSR },
-	.show = integrity_write_show,
-	.store = integrity_write_store,
+	.show = integrity_generate_show,
+	.store = integrity_generate_store,
 };
 
 static struct attribute *integrity_attrs[] = {
 	&integrity_format_entry.attr,
 	&integrity_tag_size_entry.attr,
-	&integrity_read_entry.attr,
-	&integrity_write_entry.attr,
+	&integrity_verify_entry.attr,
+	&integrity_generate_entry.attr,
 	NULL,
 };
 
@@ -406,7 +406,7 @@ int blk_integrity_register(struct gendisk *disk, struct blk_integrity *template)
 
 		kobject_uevent(&bi->kobj, KOBJ_ADD);
 
-		bi->flags |= INTEGRITY_FLAG_READ | INTEGRITY_FLAG_WRITE;
+		bi->flags |= BLK_INTEGRITY_VERIFY | BLK_INTEGRITY_GENERATE;
 		bi->interval = queue_logical_block_size(disk->queue);
 		disk->integrity = bi;
 	} else
@@ -419,6 +419,7 @@ int blk_integrity_register(struct gendisk *disk, struct blk_integrity *template)
 		bi->verify_fn = template->verify_fn;
 		bi->tuple_size = template->tuple_size;
 		bi->tag_size = template->tag_size;
+		bi->flags |= template->flags;
 	} else
 		bi->name = bi_unsupported_name;
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 24c1e055b8a7..cf92eb031ae9 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1458,8 +1458,10 @@ static inline uint64_t rq_io_start_time_ns(struct request *req)
 
 #if defined(CONFIG_BLK_DEV_INTEGRITY)
 
-#define INTEGRITY_FLAG_READ	2	/* verify data integrity on read */
-#define INTEGRITY_FLAG_WRITE	4	/* generate data integrity on write */
+enum blk_integrity_flags {
+	BLK_INTEGRITY_VERIFY		= 1 << 0,
+	BLK_INTEGRITY_GENERATE		= 1 << 1,
+};
 
 struct blk_integrity_iter {
 	void			*prot_buf;
-- 
cgit v1.2.3


From 3aec2f41a8baeb70aa77556a4e4dcec7d9d70b4d Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Fri, 26 Sep 2014 19:20:03 -0400
Subject: block: Add a disk flag to block integrity profile

So far we have relied on the app tag size to determine whether a disk
has been formatted with T10 protection information or not. However, not
all target devices provide application tag storage.

Add a flag to the block integrity profile that indicates whether the
disk has been formatted with protection information.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Sagi Grimberg <sagig@dev.mellanox.co.il>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 Documentation/ABI/testing/sysfs-block |  8 ++++++++
 block/blk-integrity.c                 | 12 ++++++++++++
 drivers/scsi/sd_dif.c                 |  8 +++++++-
 include/linux/blkdev.h                |  1 +
 4 files changed, 28 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block
index 279da08f7541..8df003963d99 100644
--- a/Documentation/ABI/testing/sysfs-block
+++ b/Documentation/ABI/testing/sysfs-block
@@ -53,6 +53,14 @@ Description:
 		512 bytes of data.
 
 
+What:		/sys/block/<disk>/integrity/device_is_integrity_capable
+Date:		July 2014
+Contact:	Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+		Indicates whether a storage device is capable of storing
+		integrity metadata. Set if the device is T10 PI-capable.
+
+
 What:		/sys/block/<disk>/integrity/write_generate
 Date:		June 2008
 Contact:	Martin K. Petersen <martin.petersen@oracle.com>
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index a7436ccc936b..1c6ba442cd91 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -307,6 +307,12 @@ static ssize_t integrity_generate_show(struct blk_integrity *bi, char *page)
 	return sprintf(page, "%d\n", (bi->flags & BLK_INTEGRITY_GENERATE) != 0);
 }
 
+static ssize_t integrity_device_show(struct blk_integrity *bi, char *page)
+{
+	return sprintf(page, "%u\n",
+		       (bi->flags & BLK_INTEGRITY_DEVICE_CAPABLE) != 0);
+}
+
 static struct integrity_sysfs_entry integrity_format_entry = {
 	.attr = { .name = "format", .mode = S_IRUGO },
 	.show = integrity_format_show,
@@ -329,11 +335,17 @@ static struct integrity_sysfs_entry integrity_generate_entry = {
 	.store = integrity_generate_store,
 };
 
+static struct integrity_sysfs_entry integrity_device_entry = {
+	.attr = { .name = "device_is_integrity_capable", .mode = S_IRUGO },
+	.show = integrity_device_show,
+};
+
 static struct attribute *integrity_attrs[] = {
 	&integrity_format_entry.attr,
 	&integrity_tag_size_entry.attr,
 	&integrity_verify_entry.attr,
 	&integrity_generate_entry.attr,
+	&integrity_device_entry.attr,
 	NULL,
 };
 
diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c
index 801c41851a01..1e971c6f8c2b 100644
--- a/drivers/scsi/sd_dif.c
+++ b/drivers/scsi/sd_dif.c
@@ -270,7 +270,13 @@ void sd_dif_config_host(struct scsi_disk *sdkp)
 		  "Enabling DIX %s protection\n", disk->integrity->name);
 
 	/* Signal to block layer that we support sector tagging */
-	if (dif && type && sdkp->ATO) {
+	if (dif && type) {
+
+		disk->integrity->flags |= BLK_INTEGRITY_DEVICE_CAPABLE;
+
+		if (!sdkp)
+			return;
+
 		if (type == SD_DIF_TYPE3_PROTECTION)
 			disk->integrity->tag_size = sizeof(u16) + sizeof(u32);
 		else
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index cf92eb031ae9..4600fc63e3fc 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1461,6 +1461,7 @@ static inline uint64_t rq_io_start_time_ns(struct request *req)
 enum blk_integrity_flags {
 	BLK_INTEGRITY_VERIFY		= 1 << 0,
 	BLK_INTEGRITY_GENERATE		= 1 << 1,
+	BLK_INTEGRITY_DEVICE_CAPABLE	= 1 << 2,
 };
 
 struct blk_integrity_iter {
-- 
cgit v1.2.3


From b1f01388574c9329922f760fc2a7335c2d14b08b Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Fri, 26 Sep 2014 19:20:04 -0400
Subject: block: Relocate bio integrity flags

Move flags affecting the integrity code out of the bio bi_flags and into
the block integrity payload.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/bio-integrity.c     | 4 ++--
 drivers/scsi/sd_dif.c     | 4 ++--
 include/linux/bio.h       | 9 ++++++++-
 include/linux/blk_types.h | 6 ++----
 4 files changed, 14 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index e64733bb29b6..26aa901b961f 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -100,7 +100,7 @@ void bio_integrity_free(struct bio *bio)
 	struct bio_integrity_payload *bip = bio_integrity(bio);
 	struct bio_set *bs = bio->bi_pool;
 
-	if (bip->bip_owns_buf)
+	if (bip->bip_flags & BIP_BLOCK_INTEGRITY)
 		kfree(page_address(bip->bip_vec->bv_page) +
 		      bip->bip_vec->bv_offset);
 
@@ -293,7 +293,7 @@ int bio_integrity_prep(struct bio *bio)
 		return -EIO;
 	}
 
-	bip->bip_owns_buf = 1;
+	bip->bip_flags |= BIP_BLOCK_INTEGRITY;
 	bip->bip_iter.bi_size = len;
 	bip_set_seed(bip, bio->bi_iter.bi_sector);
 
diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c
index 1e971c6f8c2b..4ce636fdc15f 100644
--- a/drivers/scsi/sd_dif.c
+++ b/drivers/scsi/sd_dif.c
@@ -326,7 +326,7 @@ void sd_dif_prepare(struct request *rq, sector_t hw_sector,
 		unsigned int j;
 
 		/* Already remapped? */
-		if (bio_flagged(bio, BIO_MAPPED_INTEGRITY))
+		if (bip->bip_flags & BIP_MAPPED_INTEGRITY)
 			break;
 
 		virt = bip_get_seed(bip) & 0xffffffff;
@@ -347,7 +347,7 @@ void sd_dif_prepare(struct request *rq, sector_t hw_sector,
 			kunmap_atomic(sdt);
 		}
 
-		bio->bi_flags |= (1 << BIO_MAPPED_INTEGRITY);
+		bip->bip_flags |= BIP_MAPPED_INTEGRITY;
 	}
 }
 
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 3fd36660fd10..b508cf69206d 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -315,7 +315,7 @@ struct bio_integrity_payload {
 	unsigned short		bip_slab;	/* slab the bip came from */
 	unsigned short		bip_vcnt;	/* # of integrity bio_vecs */
 	unsigned short		bip_max_vcnt;	/* integrity bio_vec slots */
-	unsigned		bip_owns_buf:1;	/* should free bip_buf */
+	unsigned short		bip_flags;	/* control flags */
 
 	struct work_struct	bip_work;	/* I/O completion */
 
@@ -323,6 +323,13 @@ struct bio_integrity_payload {
 	struct bio_vec		bip_inline_vecs[0];/* embedded bvec array */
 };
 
+enum bip_flags {
+	BIP_BLOCK_INTEGRITY	= 1 << 0, /* block layer owns integrity data */
+	BIP_MAPPED_INTEGRITY	= 1 << 1, /* ref tag has been remapped */
+	BIP_CTRL_NOCHECK	= 1 << 2, /* disable HBA integrity checking */
+	BIP_DISK_NOCHECK	= 1 << 3, /* disable disk integrity checking */
+};
+
 static inline sector_t bip_get_seed(struct bio_integrity_payload *bip)
 {
 	return bip->bip_iter.bi_sector;
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 6a5d2f2de1b9..38bc008e4503 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -120,10 +120,8 @@ struct bio {
 #define BIO_USER_MAPPED 6	/* contains user pages */
 #define BIO_EOPNOTSUPP	7	/* not supported */
 #define BIO_NULL_MAPPED 8	/* contains invalid user pages */
-#define BIO_FS_INTEGRITY 9	/* fs owns integrity data, not block layer */
-#define BIO_QUIET	10	/* Make BIO Quiet */
-#define BIO_MAPPED_INTEGRITY 11/* integrity metadata has been remapped */
-#define BIO_SNAP_STABLE	12	/* bio data must be snapshotted during write */
+#define BIO_QUIET	9	/* Make BIO Quiet */
+#define BIO_SNAP_STABLE	10	/* bio data must be snapshotted during write */
 
 /*
  * Flags starting here get preserved by bio_reset() - this includes
-- 
cgit v1.2.3


From aae7df50190a640e51bfe11c93f94741ac82ff0b Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Fri, 26 Sep 2014 19:20:05 -0400
Subject: block: Integrity checksum flag

Make the choice of checksum a per-I/O property by introducing a flag
that can be inspected by the SCSI layer. There are several reasons for
this:

 1. It allows us to switch choice of checksum without unloading and
    reloading the HBA driver.

 2. During error recovery we need to be able to tell the HBA that
    checksums read from disk should not be verified and converted to IP
    checksums.

 3. For error injection purposes we need to be able to write a bad guard
    tag to storage. Since the storage device only supports T10 CRC we
    need to be able to disable IP checksum conversion on the HBA.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/bio-integrity.c  | 3 +++
 drivers/scsi/sd_dif.c  | 6 ++++--
 include/linux/bio.h    | 1 +
 include/linux/blkdev.h | 1 +
 4 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 26aa901b961f..8e0548484dd3 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -297,6 +297,9 @@ int bio_integrity_prep(struct bio *bio)
 	bip->bip_iter.bi_size = len;
 	bip_set_seed(bip, bio->bi_iter.bi_sector);
 
+	if (bi->flags & BLK_INTEGRITY_IP_CHECKSUM)
+		bip->bip_flags |= BIP_IP_CHECKSUM;
+
 	/* Map it */
 	offset = offset_in_page(buf);
 	for (i = 0 ; i < nr_pages ; i++) {
diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c
index 4ce636fdc15f..2198abee619e 100644
--- a/drivers/scsi/sd_dif.c
+++ b/drivers/scsi/sd_dif.c
@@ -255,12 +255,14 @@ void sd_dif_config_host(struct scsi_disk *sdkp)
 		return;
 
 	/* Enable DMA of protection information */
-	if (scsi_host_get_guard(sdkp->device->host) & SHOST_DIX_GUARD_IP)
+	if (scsi_host_get_guard(sdkp->device->host) & SHOST_DIX_GUARD_IP) {
 		if (type == SD_DIF_TYPE3_PROTECTION)
 			blk_integrity_register(disk, &dif_type3_integrity_ip);
 		else
 			blk_integrity_register(disk, &dif_type1_integrity_ip);
-	else
+
+		disk->integrity->flags |= BLK_INTEGRITY_IP_CHECKSUM;
+	} else
 		if (type == SD_DIF_TYPE3_PROTECTION)
 			blk_integrity_register(disk, &dif_type3_integrity_crc);
 		else
diff --git a/include/linux/bio.h b/include/linux/bio.h
index b508cf69206d..14bff3fe56d4 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -328,6 +328,7 @@ enum bip_flags {
 	BIP_MAPPED_INTEGRITY	= 1 << 1, /* ref tag has been remapped */
 	BIP_CTRL_NOCHECK	= 1 << 2, /* disable HBA integrity checking */
 	BIP_DISK_NOCHECK	= 1 << 3, /* disable disk integrity checking */
+	BIP_IP_CHECKSUM		= 1 << 4, /* IP checksum */
 };
 
 static inline sector_t bip_get_seed(struct bio_integrity_payload *bip)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 4600fc63e3fc..773df190a4ee 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1462,6 +1462,7 @@ enum blk_integrity_flags {
 	BLK_INTEGRITY_VERIFY		= 1 << 0,
 	BLK_INTEGRITY_GENERATE		= 1 << 1,
 	BLK_INTEGRITY_DEVICE_CAPABLE	= 1 << 2,
+	BLK_INTEGRITY_IP_CHECKSUM	= 1 << 3,
 };
 
 struct blk_integrity_iter {
-- 
cgit v1.2.3


From 4eaf99beadcefbf126fa05e66fb40fca999e09fd Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Fri, 26 Sep 2014 19:20:06 -0400
Subject: block: Don't merge requests if integrity flags differ

We'd occasionally merge requests with conflicting integrity flags.
Introduce a merge helper which checks that the requests have compatible
integrity payloads.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-integrity.c  | 36 ++++++++++++++++++++++++++----------
 block/blk-merge.c      |  6 +++---
 include/linux/blkdev.h | 20 ++++++++++----------
 3 files changed, 39 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index 1c6ba442cd91..79ffb4855af0 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -186,37 +186,53 @@ int blk_integrity_compare(struct gendisk *gd1, struct gendisk *gd2)
 }
 EXPORT_SYMBOL(blk_integrity_compare);
 
-int blk_integrity_merge_rq(struct request_queue *q, struct request *req,
-			   struct request *next)
+bool blk_integrity_merge_rq(struct request_queue *q, struct request *req,
+			    struct request *next)
 {
-	if (blk_integrity_rq(req) != blk_integrity_rq(next))
-		return -1;
+	if (blk_integrity_rq(req) == 0 && blk_integrity_rq(next) == 0)
+		return true;
+
+	if (blk_integrity_rq(req) == 0 || blk_integrity_rq(next) == 0)
+		return false;
+
+	if (bio_integrity(req->bio)->bip_flags !=
+	    bio_integrity(next->bio)->bip_flags)
+		return false;
 
 	if (req->nr_integrity_segments + next->nr_integrity_segments >
 	    q->limits.max_integrity_segments)
-		return -1;
+		return false;
 
-	return 0;
+	return true;
 }
 EXPORT_SYMBOL(blk_integrity_merge_rq);
 
-int blk_integrity_merge_bio(struct request_queue *q, struct request *req,
-			    struct bio *bio)
+bool blk_integrity_merge_bio(struct request_queue *q, struct request *req,
+			     struct bio *bio)
 {
 	int nr_integrity_segs;
 	struct bio *next = bio->bi_next;
 
+	if (blk_integrity_rq(req) == 0 && bio_integrity(bio) == NULL)
+		return true;
+
+	if (blk_integrity_rq(req) == 0 || bio_integrity(bio) == NULL)
+		return false;
+
+	if (bio_integrity(req->bio)->bip_flags != bio_integrity(bio)->bip_flags)
+		return false;
+
 	bio->bi_next = NULL;
 	nr_integrity_segs = blk_rq_count_integrity_sg(q, bio);
 	bio->bi_next = next;
 
 	if (req->nr_integrity_segments + nr_integrity_segs >
 	    q->limits.max_integrity_segments)
-		return -1;
+		return false;
 
 	req->nr_integrity_segments += nr_integrity_segs;
 
-	return 0;
+	return true;
 }
 EXPORT_SYMBOL(blk_integrity_merge_bio);
 
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 77881798f793..f71bad35b4cc 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -313,7 +313,7 @@ static inline int ll_new_hw_segment(struct request_queue *q,
 	if (req->nr_phys_segments + nr_phys_segs > queue_max_segments(q))
 		goto no_merge;
 
-	if (bio_integrity(bio) && blk_integrity_merge_bio(q, req, bio))
+	if (blk_integrity_merge_bio(q, req, bio) == false)
 		goto no_merge;
 
 	/*
@@ -410,7 +410,7 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
 	if (total_phys_segments > queue_max_segments(q))
 		return 0;
 
-	if (blk_integrity_rq(req) && blk_integrity_merge_rq(q, req, next))
+	if (blk_integrity_merge_rq(q, req, next) == false)
 		return 0;
 
 	/* Merge is OK... */
@@ -590,7 +590,7 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
 		return false;
 
 	/* only merge integrity protected bio into ditto rq */
-	if (bio_integrity(bio) != blk_integrity_rq(rq))
+	if (blk_integrity_merge_bio(rq->q, rq, bio) == false)
 		return false;
 
 	/* must be using the same buffer */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 773df190a4ee..038b40f84c7a 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1497,10 +1497,10 @@ extern int blk_integrity_compare(struct gendisk *, struct gendisk *);
 extern int blk_rq_map_integrity_sg(struct request_queue *, struct bio *,
 				   struct scatterlist *);
 extern int blk_rq_count_integrity_sg(struct request_queue *, struct bio *);
-extern int blk_integrity_merge_rq(struct request_queue *, struct request *,
-				  struct request *);
-extern int blk_integrity_merge_bio(struct request_queue *, struct request *,
-				   struct bio *);
+extern bool blk_integrity_merge_rq(struct request_queue *, struct request *,
+				   struct request *);
+extern bool blk_integrity_merge_bio(struct request_queue *, struct request *,
+				    struct bio *);
 
 static inline
 struct blk_integrity *bdev_get_integrity(struct block_device *bdev)
@@ -1580,15 +1580,15 @@ static inline unsigned short queue_max_integrity_segments(struct request_queue *
 {
 	return 0;
 }
-static inline int blk_integrity_merge_rq(struct request_queue *rq,
-					 struct request *r1,
-					 struct request *r2)
+static inline bool blk_integrity_merge_rq(struct request_queue *rq,
+					  struct request *r1,
+					  struct request *r2)
 {
 	return 0;
 }
-static inline int blk_integrity_merge_bio(struct request_queue *rq,
-					  struct request *r,
-					  struct bio *b)
+static inline bool blk_integrity_merge_bio(struct request_queue *rq,
+					   struct request *r,
+					   struct bio *b)
 {
 	return 0;
 }
-- 
cgit v1.2.3


From 2341c2f8c33196d02cf5a721746eea4e3c06674a Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Fri, 26 Sep 2014 19:20:07 -0400
Subject: block: Add T10 Protection Information functions

The T10 Protection Information format is also used by some devices that
do not go through the SCSI layer (virtual block devices, NVMe). Relocate
the relevant functions to a block layer library that can be used without
involving SCSI.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/Kconfig              |   1 +
 block/Makefile             |   4 +-
 block/t10-pi.c             | 197 ++++++++++++++++++++++++++++++++++++
 drivers/scsi/Kconfig       |   1 -
 drivers/scsi/sd_dif.c      | 241 ++++-----------------------------------------
 include/linux/crc-t10dif.h |   5 +-
 include/linux/t10-pi.h     |  22 +++++
 7 files changed, 245 insertions(+), 226 deletions(-)
 create mode 100644 block/t10-pi.c
 create mode 100644 include/linux/t10-pi.h

(limited to 'include/linux')

diff --git a/block/Kconfig b/block/Kconfig
index 2429515c05c2..161491d0a879 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -77,6 +77,7 @@ config BLK_DEV_BSGLIB
 
 config BLK_DEV_INTEGRITY
 	bool "Block layer data integrity support"
+	select CRC_T10DIF if BLK_DEV_INTEGRITY
 	---help---
 	Some storage devices allow extra information to be
 	stored/retrieved to help protect the data.  The block layer
diff --git a/block/Makefile b/block/Makefile
index a2ce6ac935ec..00ecc97629db 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -20,6 +20,6 @@ obj-$(CONFIG_IOSCHED_DEADLINE)	+= deadline-iosched.o
 obj-$(CONFIG_IOSCHED_CFQ)	+= cfq-iosched.o
 
 obj-$(CONFIG_BLOCK_COMPAT)	+= compat_ioctl.o
-obj-$(CONFIG_BLK_DEV_INTEGRITY)	+= blk-integrity.o
 obj-$(CONFIG_BLK_CMDLINE_PARSER)	+= cmdline-parser.o
-obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o
+obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o t10-pi.o
+
diff --git a/block/t10-pi.c b/block/t10-pi.c
new file mode 100644
index 000000000000..24d6e9715318
--- /dev/null
+++ b/block/t10-pi.c
@@ -0,0 +1,197 @@
+/*
+ * t10_pi.c - Functions for generating and verifying T10 Protection
+ *	      Information.
+ *
+ * Copyright (C) 2007, 2008, 2014 Oracle Corporation
+ * Written by: Martin K. Petersen <martin.petersen@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
+ * USA.
+ *
+ */
+
+#include <linux/t10-pi.h>
+#include <linux/blkdev.h>
+#include <linux/crc-t10dif.h>
+#include <net/checksum.h>
+
+typedef __be16 (csum_fn) (void *, unsigned int);
+
+static const __be16 APP_ESCAPE = (__force __be16) 0xffff;
+static const __be32 REF_ESCAPE = (__force __be32) 0xffffffff;
+
+static __be16 t10_pi_crc_fn(void *data, unsigned int len)
+{
+	return cpu_to_be16(crc_t10dif(data, len));
+}
+
+static __be16 t10_pi_ip_fn(void *data, unsigned int len)
+{
+	return (__force __be16)ip_compute_csum(data, len);
+}
+
+/*
+ * Type 1 and Type 2 protection use the same format: 16 bit guard tag,
+ * 16 bit app tag, 32 bit reference tag. Type 3 does not define the ref
+ * tag.
+ */
+static int t10_pi_generate(struct blk_integrity_iter *iter, csum_fn *fn,
+			   unsigned int type)
+{
+	unsigned int i;
+
+	for (i = 0 ; i < iter->data_size ; i += iter->interval) {
+		struct t10_pi_tuple *pi = iter->prot_buf;
+
+		pi->guard_tag = fn(iter->data_buf, iter->interval);
+		pi->app_tag = 0;
+
+		if (type == 1)
+			pi->ref_tag = cpu_to_be32(lower_32_bits(iter->seed));
+		else
+			pi->ref_tag = 0;
+
+		iter->data_buf += iter->interval;
+		iter->prot_buf += sizeof(struct t10_pi_tuple);
+		iter->seed++;
+	}
+
+	return 0;
+}
+
+static int t10_pi_verify(struct blk_integrity_iter *iter, csum_fn *fn,
+				unsigned int type)
+{
+	unsigned int i;
+
+	for (i = 0 ; i < iter->data_size ; i += iter->interval) {
+		struct t10_pi_tuple *pi = iter->prot_buf;
+		__be16 csum;
+
+		switch (type) {
+		case 1:
+		case 2:
+			if (pi->app_tag == APP_ESCAPE)
+				goto next;
+
+			if (be32_to_cpu(pi->ref_tag) !=
+			    lower_32_bits(iter->seed)) {
+				pr_err("%s: ref tag error at location %llu " \
+				       "(rcvd %u)\n", iter->disk_name,
+				       (unsigned long long)
+				       iter->seed, be32_to_cpu(pi->ref_tag));
+				return -EILSEQ;
+			}
+			break;
+		case 3:
+			if (pi->app_tag == APP_ESCAPE &&
+			    pi->ref_tag == REF_ESCAPE)
+				goto next;
+			break;
+		}
+
+		csum = fn(iter->data_buf, iter->interval);
+
+		if (pi->guard_tag != csum) {
+			pr_err("%s: guard tag error at sector %llu " \
+			       "(rcvd %04x, want %04x)\n", iter->disk_name,
+			       (unsigned long long)iter->seed,
+			       be16_to_cpu(pi->guard_tag), be16_to_cpu(csum));
+			return -EILSEQ;
+		}
+
+next:
+		iter->data_buf += iter->interval;
+		iter->prot_buf += sizeof(struct t10_pi_tuple);
+		iter->seed++;
+	}
+
+	return 0;
+}
+
+static int t10_pi_type1_generate_crc(struct blk_integrity_iter *iter)
+{
+	return t10_pi_generate(iter, t10_pi_crc_fn, 1);
+}
+
+static int t10_pi_type1_generate_ip(struct blk_integrity_iter *iter)
+{
+	return t10_pi_generate(iter, t10_pi_ip_fn, 1);
+}
+
+static int t10_pi_type1_verify_crc(struct blk_integrity_iter *iter)
+{
+	return t10_pi_verify(iter, t10_pi_crc_fn, 1);
+}
+
+static int t10_pi_type1_verify_ip(struct blk_integrity_iter *iter)
+{
+	return t10_pi_verify(iter, t10_pi_ip_fn, 1);
+}
+
+static int t10_pi_type3_generate_crc(struct blk_integrity_iter *iter)
+{
+	return t10_pi_generate(iter, t10_pi_crc_fn, 3);
+}
+
+static int t10_pi_type3_generate_ip(struct blk_integrity_iter *iter)
+{
+	return t10_pi_generate(iter, t10_pi_ip_fn, 3);
+}
+
+static int t10_pi_type3_verify_crc(struct blk_integrity_iter *iter)
+{
+	return t10_pi_verify(iter, t10_pi_crc_fn, 3);
+}
+
+static int t10_pi_type3_verify_ip(struct blk_integrity_iter *iter)
+{
+	return t10_pi_verify(iter, t10_pi_ip_fn, 3);
+}
+
+struct blk_integrity t10_pi_type1_crc = {
+	.name			= "T10-DIF-TYPE1-CRC",
+	.generate_fn		= t10_pi_type1_generate_crc,
+	.verify_fn		= t10_pi_type1_verify_crc,
+	.tuple_size		= sizeof(struct t10_pi_tuple),
+	.tag_size		= 0,
+};
+EXPORT_SYMBOL(t10_pi_type1_crc);
+
+struct blk_integrity t10_pi_type1_ip = {
+	.name			= "T10-DIF-TYPE1-IP",
+	.generate_fn		= t10_pi_type1_generate_ip,
+	.verify_fn		= t10_pi_type1_verify_ip,
+	.tuple_size		= sizeof(struct t10_pi_tuple),
+	.tag_size		= 0,
+};
+EXPORT_SYMBOL(t10_pi_type1_ip);
+
+struct blk_integrity t10_pi_type3_crc = {
+	.name			= "T10-DIF-TYPE3-CRC",
+	.generate_fn		= t10_pi_type3_generate_crc,
+	.verify_fn		= t10_pi_type3_verify_crc,
+	.tuple_size		= sizeof(struct t10_pi_tuple),
+	.tag_size		= 0,
+};
+EXPORT_SYMBOL(t10_pi_type3_crc);
+
+struct blk_integrity t10_pi_type3_ip = {
+	.name			= "T10-DIF-TYPE3-IP",
+	.generate_fn		= t10_pi_type3_generate_ip,
+	.verify_fn		= t10_pi_type3_verify_ip,
+	.tuple_size		= sizeof(struct t10_pi_tuple),
+	.tag_size		= 0,
+};
+EXPORT_SYMBOL(t10_pi_type3_ip);
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 18a3358eb1d4..9ece13f922a6 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -62,7 +62,6 @@ comment "SCSI support type (disk, tape, CD-ROM)"
 config BLK_DEV_SD
 	tristate "SCSI disk support"
 	depends on SCSI
-	select CRC_T10DIF if BLK_DEV_INTEGRITY
 	---help---
 	  If you want to use SCSI hard disks, Fibre Channel disks,
 	  Serial ATA (SATA) or Parallel ATA (PATA) hard disks,
diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c
index 2198abee619e..b7eaeadc18f9 100644
--- a/drivers/scsi/sd_dif.c
+++ b/drivers/scsi/sd_dif.c
@@ -21,7 +21,7 @@
  */
 
 #include <linux/blkdev.h>
-#include <linux/crc-t10dif.h>
+#include <linux/t10-pi.h>
 
 #include <scsi/scsi.h>
 #include <scsi/scsi_cmnd.h>
@@ -33,207 +33,8 @@
 #include <scsi/scsi_ioctl.h>
 #include <scsi/scsicam.h>
 
-#include <net/checksum.h>
-
 #include "sd.h"
 
-typedef __u16 (csum_fn) (void *, unsigned int);
-
-static __u16 sd_dif_crc_fn(void *data, unsigned int len)
-{
-	return cpu_to_be16(crc_t10dif(data, len));
-}
-
-static __u16 sd_dif_ip_fn(void *data, unsigned int len)
-{
-	return ip_compute_csum(data, len);
-}
-
-/*
- * Type 1 and Type 2 protection use the same format: 16 bit guard tag,
- * 16 bit app tag, 32 bit reference tag.
- */
-static void sd_dif_type1_generate(struct blk_integrity_iter *iter, csum_fn *fn)
-{
-	void *buf = iter->data_buf;
-	struct sd_dif_tuple *sdt = iter->prot_buf;
-	sector_t seed = iter->seed;
-	unsigned int i;
-
-	for (i = 0 ; i < iter->data_size ; i += iter->interval, sdt++) {
-		sdt->guard_tag = fn(buf, iter->interval);
-		sdt->ref_tag = cpu_to_be32(seed & 0xffffffff);
-		sdt->app_tag = 0;
-
-		buf += iter->interval;
-		seed++;
-	}
-}
-
-static int sd_dif_type1_generate_crc(struct blk_integrity_iter *iter)
-{
-	sd_dif_type1_generate(iter, sd_dif_crc_fn);
-	return 0;
-}
-
-static int sd_dif_type1_generate_ip(struct blk_integrity_iter *iter)
-{
-	sd_dif_type1_generate(iter, sd_dif_ip_fn);
-	return 0;
-}
-
-static int sd_dif_type1_verify(struct blk_integrity_iter *iter, csum_fn *fn)
-{
-	void *buf = iter->data_buf;
-	struct sd_dif_tuple *sdt = iter->prot_buf;
-	sector_t seed = iter->seed;
-	unsigned int i;
-	__u16 csum;
-
-	for (i = 0 ; i < iter->data_size ; i += iter->interval, sdt++) {
-		/* Unwritten sectors */
-		if (sdt->app_tag == 0xffff)
-			return 0;
-
-		if (be32_to_cpu(sdt->ref_tag) != (seed & 0xffffffff)) {
-			printk(KERN_ERR
-			       "%s: ref tag error on sector %lu (rcvd %u)\n",
-			       iter->disk_name, (unsigned long)seed,
-			       be32_to_cpu(sdt->ref_tag));
-			return -EIO;
-		}
-
-		csum = fn(buf, iter->interval);
-
-		if (sdt->guard_tag != csum) {
-			printk(KERN_ERR "%s: guard tag error on sector %lu " \
-			       "(rcvd %04x, data %04x)\n", iter->disk_name,
-			       (unsigned long)seed,
-			       be16_to_cpu(sdt->guard_tag), be16_to_cpu(csum));
-			return -EIO;
-		}
-
-		buf += iter->interval;
-		seed++;
-	}
-
-	return 0;
-}
-
-static int sd_dif_type1_verify_crc(struct blk_integrity_iter *iter)
-{
-	return sd_dif_type1_verify(iter, sd_dif_crc_fn);
-}
-
-static int sd_dif_type1_verify_ip(struct blk_integrity_iter *iter)
-{
-	return sd_dif_type1_verify(iter, sd_dif_ip_fn);
-}
-
-static struct blk_integrity dif_type1_integrity_crc = {
-	.name			= "T10-DIF-TYPE1-CRC",
-	.generate_fn		= sd_dif_type1_generate_crc,
-	.verify_fn		= sd_dif_type1_verify_crc,
-	.tuple_size		= sizeof(struct sd_dif_tuple),
-	.tag_size		= 0,
-};
-
-static struct blk_integrity dif_type1_integrity_ip = {
-	.name			= "T10-DIF-TYPE1-IP",
-	.generate_fn		= sd_dif_type1_generate_ip,
-	.verify_fn		= sd_dif_type1_verify_ip,
-	.tuple_size		= sizeof(struct sd_dif_tuple),
-	.tag_size		= 0,
-};
-
-
-/*
- * Type 3 protection has a 16-bit guard tag and 16 + 32 bits of opaque
- * tag space.
- */
-static void sd_dif_type3_generate(struct blk_integrity_iter *iter, csum_fn *fn)
-{
-	void *buf = iter->data_buf;
-	struct sd_dif_tuple *sdt = iter->prot_buf;
-	unsigned int i;
-
-	for (i = 0 ; i < iter->data_size ; i += iter->interval, sdt++) {
-		sdt->guard_tag = fn(buf, iter->interval);
-		sdt->ref_tag = 0;
-		sdt->app_tag = 0;
-
-		buf += iter->interval;
-	}
-}
-
-static int sd_dif_type3_generate_crc(struct blk_integrity_iter *iter)
-{
-	sd_dif_type3_generate(iter, sd_dif_crc_fn);
-	return 0;
-}
-
-static int sd_dif_type3_generate_ip(struct blk_integrity_iter *iter)
-{
-	sd_dif_type3_generate(iter, sd_dif_ip_fn);
-	return 0;
-}
-
-static int sd_dif_type3_verify(struct blk_integrity_iter *iter, csum_fn *fn)
-{
-	void *buf = iter->data_buf;
-	struct sd_dif_tuple *sdt = iter->prot_buf;
-	sector_t seed = iter->seed;
-	unsigned int i;
-	__u16 csum;
-
-	for (i = 0 ; i < iter->data_size ; i += iter->interval, sdt++) {
-		/* Unwritten sectors */
-		if (sdt->app_tag == 0xffff && sdt->ref_tag == 0xffffffff)
-			return 0;
-
-		csum = fn(buf, iter->interval);
-
-		if (sdt->guard_tag != csum) {
-			printk(KERN_ERR "%s: guard tag error on sector %lu " \
-			       "(rcvd %04x, data %04x)\n", iter->disk_name,
-			       (unsigned long)seed,
-			       be16_to_cpu(sdt->guard_tag), be16_to_cpu(csum));
-			return -EIO;
-		}
-
-		buf += iter->interval;
-		seed++;
-	}
-
-	return 0;
-}
-
-static int sd_dif_type3_verify_crc(struct blk_integrity_iter *iter)
-{
-	return sd_dif_type3_verify(iter, sd_dif_crc_fn);
-}
-
-static int sd_dif_type3_verify_ip(struct blk_integrity_iter *iter)
-{
-	return sd_dif_type3_verify(iter, sd_dif_ip_fn);
-}
-
-static struct blk_integrity dif_type3_integrity_crc = {
-	.name			= "T10-DIF-TYPE3-CRC",
-	.generate_fn		= sd_dif_type3_generate_crc,
-	.verify_fn		= sd_dif_type3_verify_crc,
-	.tuple_size		= sizeof(struct sd_dif_tuple),
-	.tag_size		= 0,
-};
-
-static struct blk_integrity dif_type3_integrity_ip = {
-	.name			= "T10-DIF-TYPE3-IP",
-	.generate_fn		= sd_dif_type3_generate_ip,
-	.verify_fn		= sd_dif_type3_verify_ip,
-	.tuple_size		= sizeof(struct sd_dif_tuple),
-	.tag_size		= 0,
-};
-
 /*
  * Configure exchange of protection information between OS and HBA.
  */
@@ -257,16 +58,16 @@ void sd_dif_config_host(struct scsi_disk *sdkp)
 	/* Enable DMA of protection information */
 	if (scsi_host_get_guard(sdkp->device->host) & SHOST_DIX_GUARD_IP) {
 		if (type == SD_DIF_TYPE3_PROTECTION)
-			blk_integrity_register(disk, &dif_type3_integrity_ip);
+			blk_integrity_register(disk, &t10_pi_type3_ip);
 		else
-			blk_integrity_register(disk, &dif_type1_integrity_ip);
+			blk_integrity_register(disk, &t10_pi_type1_ip);
 
 		disk->integrity->flags |= BLK_INTEGRITY_IP_CHECKSUM;
 	} else
 		if (type == SD_DIF_TYPE3_PROTECTION)
-			blk_integrity_register(disk, &dif_type3_integrity_crc);
+			blk_integrity_register(disk, &t10_pi_type3_crc);
 		else
-			blk_integrity_register(disk, &dif_type1_integrity_crc);
+			blk_integrity_register(disk, &t10_pi_type1_crc);
 
 	sd_printk(KERN_NOTICE, sdkp,
 		  "Enabling DIX %s protection\n", disk->integrity->name);
@@ -308,10 +109,10 @@ void sd_dif_config_host(struct scsi_disk *sdkp)
 void sd_dif_prepare(struct request *rq, sector_t hw_sector,
 		    unsigned int sector_sz)
 {
-	const int tuple_sz = sizeof(struct sd_dif_tuple);
+	const int tuple_sz = sizeof(struct t10_pi_tuple);
 	struct bio *bio;
 	struct scsi_disk *sdkp;
-	struct sd_dif_tuple *sdt;
+	struct t10_pi_tuple *pi;
 	u32 phys, virt;
 
 	sdkp = rq->bio->bi_bdev->bd_disk->private_data;
@@ -334,19 +135,18 @@ void sd_dif_prepare(struct request *rq, sector_t hw_sector,
 		virt = bip_get_seed(bip) & 0xffffffff;
 
 		bip_for_each_vec(iv, bip, iter) {
-			sdt = kmap_atomic(iv.bv_page)
-				+ iv.bv_offset;
+			pi = kmap_atomic(iv.bv_page) + iv.bv_offset;
 
-			for (j = 0; j < iv.bv_len; j += tuple_sz, sdt++) {
+			for (j = 0; j < iv.bv_len; j += tuple_sz, pi++) {
 
-				if (be32_to_cpu(sdt->ref_tag) == virt)
-					sdt->ref_tag = cpu_to_be32(phys);
+				if (be32_to_cpu(pi->ref_tag) == virt)
+					pi->ref_tag = cpu_to_be32(phys);
 
 				virt++;
 				phys++;
 			}
 
-			kunmap_atomic(sdt);
+			kunmap_atomic(pi);
 		}
 
 		bip->bip_flags |= BIP_MAPPED_INTEGRITY;
@@ -359,10 +159,10 @@ void sd_dif_prepare(struct request *rq, sector_t hw_sector,
  */
 void sd_dif_complete(struct scsi_cmnd *scmd, unsigned int good_bytes)
 {
-	const int tuple_sz = sizeof(struct sd_dif_tuple);
+	const int tuple_sz = sizeof(struct t10_pi_tuple);
 	struct scsi_disk *sdkp;
 	struct bio *bio;
-	struct sd_dif_tuple *sdt;
+	struct t10_pi_tuple *pi;
 	unsigned int j, sectors, sector_sz;
 	u32 phys, virt;
 
@@ -386,25 +186,24 @@ void sd_dif_complete(struct scsi_cmnd *scmd, unsigned int good_bytes)
 		virt = bip_get_seed(bip) & 0xffffffff;
 
 		bip_for_each_vec(iv, bip, iter) {
-			sdt = kmap_atomic(iv.bv_page)
-				+ iv.bv_offset;
+			pi = kmap_atomic(iv.bv_page) + iv.bv_offset;
 
-			for (j = 0; j < iv.bv_len; j += tuple_sz, sdt++) {
+			for (j = 0; j < iv.bv_len; j += tuple_sz, pi++) {
 
 				if (sectors == 0) {
-					kunmap_atomic(sdt);
+					kunmap_atomic(pi);
 					return;
 				}
 
-				if (be32_to_cpu(sdt->ref_tag) == phys)
-					sdt->ref_tag = cpu_to_be32(virt);
+				if (be32_to_cpu(pi->ref_tag) == phys)
+					pi->ref_tag = cpu_to_be32(virt);
 
 				virt++;
 				phys++;
 				sectors--;
 			}
 
-			kunmap_atomic(sdt);
+			kunmap_atomic(pi);
 		}
 	}
 }
diff --git a/include/linux/crc-t10dif.h b/include/linux/crc-t10dif.h
index b3cb71f0d3b0..cf53d0773ce3 100644
--- a/include/linux/crc-t10dif.h
+++ b/include/linux/crc-t10dif.h
@@ -6,7 +6,8 @@
 #define CRC_T10DIF_DIGEST_SIZE 2
 #define CRC_T10DIF_BLOCK_SIZE 1
 
-__u16 crc_t10dif_generic(__u16 crc, const unsigned char *buffer, size_t len);
-__u16 crc_t10dif(unsigned char const *, size_t);
+extern __u16 crc_t10dif_generic(__u16 crc, const unsigned char *buffer,
+				size_t len);
+extern __u16 crc_t10dif(unsigned char const *, size_t);
 
 #endif
diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h
new file mode 100644
index 000000000000..6a8b9942632d
--- /dev/null
+++ b/include/linux/t10-pi.h
@@ -0,0 +1,22 @@
+#ifndef _LINUX_T10_PI_H
+#define _LINUX_T10_PI_H
+
+#include <linux/types.h>
+#include <linux/blkdev.h>
+
+/*
+ * T10 Protection Information tuple.
+ */
+struct t10_pi_tuple {
+	__be16 guard_tag;	/* Checksum */
+	__be16 app_tag;		/* Opaque storage */
+	__be32 ref_tag;		/* Target LBA or indirect LBA */
+};
+
+
+extern struct blk_integrity t10_pi_type1_crc;
+extern struct blk_integrity t10_pi_type1_ip;
+extern struct blk_integrity t10_pi_type3_crc;
+extern struct blk_integrity t10_pi_type3_ip;
+
+#endif
-- 
cgit v1.2.3


From 7bced397510ab569d31de4c70b39e13355046387 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 30 Dec 2013 12:37:29 -0800
Subject: net_dma: simple removal

Per commit "77873803363c net_dma: mark broken" net_dma is no longer used
and there is no plan to fix it.

This is the mechanical removal of bits in CONFIG_NET_DMA ifdef guards.
Reverting the remainder of the net_dma induced changes is deferred to
subsequent patches.

Marked for stable due to Roman's report of a memory leak in
dma_pin_iovec_pages():

    https://lkml.org/lkml/2014/9/3/177

Cc: Dave Jiang <dave.jiang@intel.com>
Cc: Vinod Koul <vinod.koul@intel.com>
Cc: David Whipple <whipple@securedatainnovations.ch>
Cc: Alexander Duyck <alexander.h.duyck@intel.com>
Cc: <stable@vger.kernel.org>
Reported-by: Roman Gushchin <klamm@yandex-team.ru>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 Documentation/ABI/removed/net_dma      |   8 +
 Documentation/networking/ip-sysctl.txt |   6 -
 drivers/dma/Kconfig                    |  12 --
 drivers/dma/Makefile                   |   1 -
 drivers/dma/dmaengine.c                | 104 ------------
 drivers/dma/ioat/dma.c                 |   1 -
 drivers/dma/ioat/dma.h                 |   7 -
 drivers/dma/ioat/dma_v2.c              |   1 -
 drivers/dma/ioat/dma_v3.c              |   1 -
 drivers/dma/iovlock.c                  | 280 ---------------------------------
 include/linux/dmaengine.h              |  22 +--
 include/linux/skbuff.h                 |   8 +-
 include/linux/tcp.h                    |   8 -
 include/net/netdma.h                   |  32 ----
 include/net/sock.h                     |  19 +--
 include/net/tcp.h                      |   8 -
 kernel/sysctl_binary.c                 |   1 -
 net/core/Makefile                      |   1 -
 net/core/dev.c                         |  10 --
 net/core/sock.c                        |   6 -
 net/core/user_dma.c                    | 131 ---------------
 net/dccp/proto.c                       |   4 +-
 net/ipv4/sysctl_net_ipv4.c             |   9 --
 net/ipv4/tcp.c                         | 147 ++---------------
 net/ipv4/tcp_input.c                   |  61 -------
 net/ipv4/tcp_ipv4.c                    |  18 +--
 net/ipv6/tcp_ipv6.c                    |  13 +-
 net/llc/af_llc.c                       |  10 +-
 28 files changed, 35 insertions(+), 894 deletions(-)
 create mode 100644 Documentation/ABI/removed/net_dma
 delete mode 100644 drivers/dma/iovlock.c
 delete mode 100644 include/net/netdma.h
 delete mode 100644 net/core/user_dma.c

(limited to 'include/linux')

diff --git a/Documentation/ABI/removed/net_dma b/Documentation/ABI/removed/net_dma
new file mode 100644
index 000000000000..a173aecc2f18
--- /dev/null
+++ b/Documentation/ABI/removed/net_dma
@@ -0,0 +1,8 @@
+What:		tcp_dma_copybreak sysctl
+Date:		Removed in kernel v3.13
+Contact:	Dan Williams <dan.j.williams@intel.com>
+Description:
+	Formerly the lower limit, in bytes, of the size of socket reads
+	that will be offloaded to a DMA copy engine.  Removed due to
+	coherency issues of the cpu potentially touching the buffers
+	while dma is in flight.
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index ab42c95f9985..ea8f3b182e70 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -582,12 +582,6 @@ tcp_workaround_signed_windows - BOOLEAN
 	not receive a window scaling option from them.
 	Default: 0
 
-tcp_dma_copybreak - INTEGER
-	Lower limit, in bytes, of the size of socket reads that will be
-	offloaded to a DMA copy engine, if one is present in the system
-	and CONFIG_NET_DMA is enabled.
-	Default: 4096
-
 tcp_thin_linear_timeouts - BOOLEAN
 	Enable dynamic triggering of linear timeouts for thin streams.
 	If set, a check is performed upon retransmission by timeout to
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 605b016bcea4..6b5f37e01a70 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -368,18 +368,6 @@ config DMA_OF
 comment "DMA Clients"
 	depends on DMA_ENGINE
 
-config NET_DMA
-	bool "Network: TCP receive copy offload"
-	depends on DMA_ENGINE && NET
-	default (INTEL_IOATDMA || FSL_DMA)
-	depends on BROKEN
-	help
-	  This enables the use of DMA engines in the network stack to
-	  offload receive copy-to-user operations, freeing CPU cycles.
-
-	  Say Y here if you enabled INTEL_IOATDMA or FSL_DMA, otherwise
-	  say N.
-
 config ASYNC_TX_DMA
 	bool "Async_tx: Offload support for the async_tx api"
 	depends on DMA_ENGINE
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index a029d0f4a1be..0c9dc7549327 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -6,7 +6,6 @@ obj-$(CONFIG_DMA_VIRTUAL_CHANNELS) += virt-dma.o
 obj-$(CONFIG_DMA_ACPI) += acpi-dma.o
 obj-$(CONFIG_DMA_OF) += of-dma.o
 
-obj-$(CONFIG_NET_DMA) += iovlock.o
 obj-$(CONFIG_INTEL_MID_DMAC) += intel_mid_dma.o
 obj-$(CONFIG_DMATEST) += dmatest.o
 obj-$(CONFIG_INTEL_IOATDMA) += ioat/
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index ed610b497518..268de183b519 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -1084,110 +1084,6 @@ dmaengine_get_unmap_data(struct device *dev, int nr, gfp_t flags)
 }
 EXPORT_SYMBOL(dmaengine_get_unmap_data);
 
-/**
- * dma_async_memcpy_pg_to_pg - offloaded copy from page to page
- * @chan: DMA channel to offload copy to
- * @dest_pg: destination page
- * @dest_off: offset in page to copy to
- * @src_pg: source page
- * @src_off: offset in page to copy from
- * @len: length
- *
- * Both @dest_page/@dest_off and @src_page/@src_off must be mappable to a bus
- * address according to the DMA mapping API rules for streaming mappings.
- * Both @dest_page/@dest_off and @src_page/@src_off must stay memory resident
- * (kernel memory or locked user space pages).
- */
-dma_cookie_t
-dma_async_memcpy_pg_to_pg(struct dma_chan *chan, struct page *dest_pg,
-	unsigned int dest_off, struct page *src_pg, unsigned int src_off,
-	size_t len)
-{
-	struct dma_device *dev = chan->device;
-	struct dma_async_tx_descriptor *tx;
-	struct dmaengine_unmap_data *unmap;
-	dma_cookie_t cookie;
-	unsigned long flags;
-
-	unmap = dmaengine_get_unmap_data(dev->dev, 2, GFP_NOWAIT);
-	if (!unmap)
-		return -ENOMEM;
-
-	unmap->to_cnt = 1;
-	unmap->from_cnt = 1;
-	unmap->addr[0] = dma_map_page(dev->dev, src_pg, src_off, len,
-				      DMA_TO_DEVICE);
-	unmap->addr[1] = dma_map_page(dev->dev, dest_pg, dest_off, len,
-				      DMA_FROM_DEVICE);
-	unmap->len = len;
-	flags = DMA_CTRL_ACK;
-	tx = dev->device_prep_dma_memcpy(chan, unmap->addr[1], unmap->addr[0],
-					 len, flags);
-
-	if (!tx) {
-		dmaengine_unmap_put(unmap);
-		return -ENOMEM;
-	}
-
-	dma_set_unmap(tx, unmap);
-	cookie = tx->tx_submit(tx);
-	dmaengine_unmap_put(unmap);
-
-	preempt_disable();
-	__this_cpu_add(chan->local->bytes_transferred, len);
-	__this_cpu_inc(chan->local->memcpy_count);
-	preempt_enable();
-
-	return cookie;
-}
-EXPORT_SYMBOL(dma_async_memcpy_pg_to_pg);
-
-/**
- * dma_async_memcpy_buf_to_buf - offloaded copy between virtual addresses
- * @chan: DMA channel to offload copy to
- * @dest: destination address (virtual)
- * @src: source address (virtual)
- * @len: length
- *
- * Both @dest and @src must be mappable to a bus address according to the
- * DMA mapping API rules for streaming mappings.
- * Both @dest and @src must stay memory resident (kernel memory or locked
- * user space pages).
- */
-dma_cookie_t
-dma_async_memcpy_buf_to_buf(struct dma_chan *chan, void *dest,
-			    void *src, size_t len)
-{
-	return dma_async_memcpy_pg_to_pg(chan, virt_to_page(dest),
-					 (unsigned long) dest & ~PAGE_MASK,
-					 virt_to_page(src),
-					 (unsigned long) src & ~PAGE_MASK, len);
-}
-EXPORT_SYMBOL(dma_async_memcpy_buf_to_buf);
-
-/**
- * dma_async_memcpy_buf_to_pg - offloaded copy from address to page
- * @chan: DMA channel to offload copy to
- * @page: destination page
- * @offset: offset in page to copy to
- * @kdata: source address (virtual)
- * @len: length
- *
- * Both @page/@offset and @kdata must be mappable to a bus address according
- * to the DMA mapping API rules for streaming mappings.
- * Both @page/@offset and @kdata must stay memory resident (kernel memory or
- * locked user space pages)
- */
-dma_cookie_t
-dma_async_memcpy_buf_to_pg(struct dma_chan *chan, struct page *page,
-			   unsigned int offset, void *kdata, size_t len)
-{
-	return dma_async_memcpy_pg_to_pg(chan, page, offset,
-					 virt_to_page(kdata),
-					 (unsigned long) kdata & ~PAGE_MASK, len);
-}
-EXPORT_SYMBOL(dma_async_memcpy_buf_to_pg);
-
 void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx,
 	struct dma_chan *chan)
 {
diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c
index b76c1485933b..940c1502a8b5 100644
--- a/drivers/dma/ioat/dma.c
+++ b/drivers/dma/ioat/dma.c
@@ -1222,7 +1222,6 @@ int ioat1_dma_probe(struct ioatdma_device *device, int dca)
 	err = ioat_probe(device);
 	if (err)
 		return err;
-	ioat_set_tcp_copy_break(4096);
 	err = ioat_register(device);
 	if (err)
 		return err;
diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h
index e982f00a9843..d63f68b1aa35 100644
--- a/drivers/dma/ioat/dma.h
+++ b/drivers/dma/ioat/dma.h
@@ -214,13 +214,6 @@ __dump_desc_dbg(struct ioat_chan_common *chan, struct ioat_dma_descriptor *hw,
 #define dump_desc_dbg(c, d) \
 	({ if (d) __dump_desc_dbg(&c->base, d->hw, &d->txd, desc_id(d)); 0; })
 
-static inline void ioat_set_tcp_copy_break(unsigned long copybreak)
-{
-	#ifdef CONFIG_NET_DMA
-	sysctl_tcp_dma_copybreak = copybreak;
-	#endif
-}
-
 static inline struct ioat_chan_common *
 ioat_chan_by_index(struct ioatdma_device *device, int index)
 {
diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c
index 2ce9be498608..695483e6be32 100644
--- a/drivers/dma/ioat/dma_v2.c
+++ b/drivers/dma/ioat/dma_v2.c
@@ -900,7 +900,6 @@ int ioat2_dma_probe(struct ioatdma_device *device, int dca)
 	err = ioat_probe(device);
 	if (err)
 		return err;
-	ioat_set_tcp_copy_break(2048);
 
 	list_for_each_entry(c, &dma->channels, device_node) {
 		chan = to_chan_common(c);
diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c
index 85971d6e9646..895f869d6c2c 100644
--- a/drivers/dma/ioat/dma_v3.c
+++ b/drivers/dma/ioat/dma_v3.c
@@ -1655,7 +1655,6 @@ int ioat3_dma_probe(struct ioatdma_device *device, int dca)
 	err = ioat_probe(device);
 	if (err)
 		return err;
-	ioat_set_tcp_copy_break(262144);
 
 	list_for_each_entry(c, &dma->channels, device_node) {
 		chan = to_chan_common(c);
diff --git a/drivers/dma/iovlock.c b/drivers/dma/iovlock.c
deleted file mode 100644
index bb48a57c2fc1..000000000000
--- a/drivers/dma/iovlock.c
+++ /dev/null
@@ -1,280 +0,0 @@
-/*
- * Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved.
- * Portions based on net/core/datagram.c and copyrighted by their authors.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
- *
- * The full GNU General Public License is included in this distribution in the
- * file called COPYING.
- */
-
-/*
- * This code allows the net stack to make use of a DMA engine for
- * skb to iovec copies.
- */
-
-#include <linux/dmaengine.h>
-#include <linux/pagemap.h>
-#include <linux/slab.h>
-#include <net/tcp.h> /* for memcpy_toiovec */
-#include <asm/io.h>
-#include <asm/uaccess.h>
-
-static int num_pages_spanned(struct iovec *iov)
-{
-	return
-	((PAGE_ALIGN((unsigned long)iov->iov_base + iov->iov_len) -
-	((unsigned long)iov->iov_base & PAGE_MASK)) >> PAGE_SHIFT);
-}
-
-/*
- * Pin down all the iovec pages needed for len bytes.
- * Return a struct dma_pinned_list to keep track of pages pinned down.
- *
- * We are allocating a single chunk of memory, and then carving it up into
- * 3 sections, the latter 2 whose size depends on the number of iovecs and the
- * total number of pages, respectively.
- */
-struct dma_pinned_list *dma_pin_iovec_pages(struct iovec *iov, size_t len)
-{
-	struct dma_pinned_list *local_list;
-	struct page **pages;
-	int i;
-	int ret;
-	int nr_iovecs = 0;
-	int iovec_len_used = 0;
-	int iovec_pages_used = 0;
-
-	/* don't pin down non-user-based iovecs */
-	if (segment_eq(get_fs(), KERNEL_DS))
-		return NULL;
-
-	/* determine how many iovecs/pages there are, up front */
-	do {
-		iovec_len_used += iov[nr_iovecs].iov_len;
-		iovec_pages_used += num_pages_spanned(&iov[nr_iovecs]);
-		nr_iovecs++;
-	} while (iovec_len_used < len);
-
-	/* single kmalloc for pinned list, page_list[], and the page arrays */
-	local_list = kmalloc(sizeof(*local_list)
-		+ (nr_iovecs * sizeof (struct dma_page_list))
-		+ (iovec_pages_used * sizeof (struct page*)), GFP_KERNEL);
-	if (!local_list)
-		goto out;
-
-	/* list of pages starts right after the page list array */
-	pages = (struct page **) &local_list->page_list[nr_iovecs];
-
-	local_list->nr_iovecs = 0;
-
-	for (i = 0; i < nr_iovecs; i++) {
-		struct dma_page_list *page_list = &local_list->page_list[i];
-
-		len -= iov[i].iov_len;
-
-		if (!access_ok(VERIFY_WRITE, iov[i].iov_base, iov[i].iov_len))
-			goto unpin;
-
-		page_list->nr_pages = num_pages_spanned(&iov[i]);
-		page_list->base_address = iov[i].iov_base;
-
-		page_list->pages = pages;
-		pages += page_list->nr_pages;
-
-		/* pin pages down */
-		down_read(&current->mm->mmap_sem);
-		ret = get_user_pages(
-			current,
-			current->mm,
-			(unsigned long) iov[i].iov_base,
-			page_list->nr_pages,
-			1,	/* write */
-			0,	/* force */
-			page_list->pages,
-			NULL);
-		up_read(&current->mm->mmap_sem);
-
-		if (ret != page_list->nr_pages)
-			goto unpin;
-
-		local_list->nr_iovecs = i + 1;
-	}
-
-	return local_list;
-
-unpin:
-	dma_unpin_iovec_pages(local_list);
-out:
-	return NULL;
-}
-
-void dma_unpin_iovec_pages(struct dma_pinned_list *pinned_list)
-{
-	int i, j;
-
-	if (!pinned_list)
-		return;
-
-	for (i = 0; i < pinned_list->nr_iovecs; i++) {
-		struct dma_page_list *page_list = &pinned_list->page_list[i];
-		for (j = 0; j < page_list->nr_pages; j++) {
-			set_page_dirty_lock(page_list->pages[j]);
-			page_cache_release(page_list->pages[j]);
-		}
-	}
-
-	kfree(pinned_list);
-}
-
-
-/*
- * We have already pinned down the pages we will be using in the iovecs.
- * Each entry in iov array has corresponding entry in pinned_list->page_list.
- * Using array indexing to keep iov[] and page_list[] in sync.
- * Initial elements in iov array's iov->iov_len will be 0 if already copied into
- *   by another call.
- * iov array length remaining guaranteed to be bigger than len.
- */
-dma_cookie_t dma_memcpy_to_iovec(struct dma_chan *chan, struct iovec *iov,
-	struct dma_pinned_list *pinned_list, unsigned char *kdata, size_t len)
-{
-	int iov_byte_offset;
-	int copy;
-	dma_cookie_t dma_cookie = 0;
-	int iovec_idx;
-	int page_idx;
-
-	if (!chan)
-		return memcpy_toiovec(iov, kdata, len);
-
-	iovec_idx = 0;
-	while (iovec_idx < pinned_list->nr_iovecs) {
-		struct dma_page_list *page_list;
-
-		/* skip already used-up iovecs */
-		while (!iov[iovec_idx].iov_len)
-			iovec_idx++;
-
-		page_list = &pinned_list->page_list[iovec_idx];
-
-		iov_byte_offset = ((unsigned long)iov[iovec_idx].iov_base & ~PAGE_MASK);
-		page_idx = (((unsigned long)iov[iovec_idx].iov_base & PAGE_MASK)
-			 - ((unsigned long)page_list->base_address & PAGE_MASK)) >> PAGE_SHIFT;
-
-		/* break up copies to not cross page boundary */
-		while (iov[iovec_idx].iov_len) {
-			copy = min_t(int, PAGE_SIZE - iov_byte_offset, len);
-			copy = min_t(int, copy, iov[iovec_idx].iov_len);
-
-			dma_cookie = dma_async_memcpy_buf_to_pg(chan,
-					page_list->pages[page_idx],
-					iov_byte_offset,
-					kdata,
-					copy);
-			/* poll for a descriptor slot */
-			if (unlikely(dma_cookie < 0)) {
-				dma_async_issue_pending(chan);
-				continue;
-			}
-
-			len -= copy;
-			iov[iovec_idx].iov_len -= copy;
-			iov[iovec_idx].iov_base += copy;
-
-			if (!len)
-				return dma_cookie;
-
-			kdata += copy;
-			iov_byte_offset = 0;
-			page_idx++;
-		}
-		iovec_idx++;
-	}
-
-	/* really bad if we ever run out of iovecs */
-	BUG();
-	return -EFAULT;
-}
-
-dma_cookie_t dma_memcpy_pg_to_iovec(struct dma_chan *chan, struct iovec *iov,
-	struct dma_pinned_list *pinned_list, struct page *page,
-	unsigned int offset, size_t len)
-{
-	int iov_byte_offset;
-	int copy;
-	dma_cookie_t dma_cookie = 0;
-	int iovec_idx;
-	int page_idx;
-	int err;
-
-	/* this needs as-yet-unimplemented buf-to-buff, so punt. */
-	/* TODO: use dma for this */
-	if (!chan || !pinned_list) {
-		u8 *vaddr = kmap(page);
-		err = memcpy_toiovec(iov, vaddr + offset, len);
-		kunmap(page);
-		return err;
-	}
-
-	iovec_idx = 0;
-	while (iovec_idx < pinned_list->nr_iovecs) {
-		struct dma_page_list *page_list;
-
-		/* skip already used-up iovecs */
-		while (!iov[iovec_idx].iov_len)
-			iovec_idx++;
-
-		page_list = &pinned_list->page_list[iovec_idx];
-
-		iov_byte_offset = ((unsigned long)iov[iovec_idx].iov_base & ~PAGE_MASK);
-		page_idx = (((unsigned long)iov[iovec_idx].iov_base & PAGE_MASK)
-			 - ((unsigned long)page_list->base_address & PAGE_MASK)) >> PAGE_SHIFT;
-
-		/* break up copies to not cross page boundary */
-		while (iov[iovec_idx].iov_len) {
-			copy = min_t(int, PAGE_SIZE - iov_byte_offset, len);
-			copy = min_t(int, copy, iov[iovec_idx].iov_len);
-
-			dma_cookie = dma_async_memcpy_pg_to_pg(chan,
-					page_list->pages[page_idx],
-					iov_byte_offset,
-					page,
-					offset,
-					copy);
-			/* poll for a descriptor slot */
-			if (unlikely(dma_cookie < 0)) {
-				dma_async_issue_pending(chan);
-				continue;
-			}
-
-			len -= copy;
-			iov[iovec_idx].iov_len -= copy;
-			iov[iovec_idx].iov_base += copy;
-
-			if (!len)
-				return dma_cookie;
-
-			offset += copy;
-			iov_byte_offset = 0;
-			page_idx++;
-		}
-		iovec_idx++;
-	}
-
-	/* really bad if we ever run out of iovecs */
-	BUG();
-	return -EFAULT;
-}
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index c5c92d59e531..3e382ecd1927 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -903,18 +903,6 @@ static inline void dmaengine_put(void)
 }
 #endif
 
-#ifdef CONFIG_NET_DMA
-#define net_dmaengine_get()	dmaengine_get()
-#define net_dmaengine_put()	dmaengine_put()
-#else
-static inline void net_dmaengine_get(void)
-{
-}
-static inline void net_dmaengine_put(void)
-{
-}
-#endif
-
 #ifdef CONFIG_ASYNC_TX_DMA
 #define async_dmaengine_get()	dmaengine_get()
 #define async_dmaengine_put()	dmaengine_put()
@@ -936,16 +924,8 @@ async_dma_find_channel(enum dma_transaction_type type)
 	return NULL;
 }
 #endif /* CONFIG_ASYNC_TX_DMA */
-
-dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan,
-	void *dest, void *src, size_t len);
-dma_cookie_t dma_async_memcpy_buf_to_pg(struct dma_chan *chan,
-	struct page *page, unsigned int offset, void *kdata, size_t len);
-dma_cookie_t dma_async_memcpy_pg_to_pg(struct dma_chan *chan,
-	struct page *dest_pg, unsigned int dest_off, struct page *src_pg,
-	unsigned int src_off, size_t len);
 void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx,
-	struct dma_chan *chan);
+				  struct dma_chan *chan);
 
 static inline void async_tx_ack(struct dma_async_tx_descriptor *tx)
 {
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 5e1e6f2d98c2..bdbf7afad6b7 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -28,7 +28,6 @@
 #include <linux/textsearch.h>
 #include <net/checksum.h>
 #include <linux/rcupdate.h>
-#include <linux/dmaengine.h>
 #include <linux/hrtimer.h>
 #include <linux/dma-mapping.h>
 #include <linux/netdev_features.h>
@@ -515,11 +514,8 @@ struct sk_buff {
 	/* 6/8 bit hole (depending on ndisc_nodetype presence) */
 	kmemcheck_bitfield_end(flags2);
 
-#if defined CONFIG_NET_DMA || defined CONFIG_NET_RX_BUSY_POLL
-	union {
-		unsigned int	napi_id;
-		dma_cookie_t	dma_cookie;
-	};
+#ifdef CONFIG_NET_RX_BUSY_POLL
+	unsigned int	napi_id;
 #endif
 #ifdef CONFIG_NETWORK_SECMARK
 	__u32			secmark;
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 4ad0706d40eb..90895b8dc7f2 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -19,7 +19,6 @@
 
 
 #include <linux/skbuff.h>
-#include <linux/dmaengine.h>
 #include <net/sock.h>
 #include <net/inet_connection_sock.h>
 #include <net/inet_timewait_sock.h>
@@ -169,13 +168,6 @@ struct tcp_sock {
 		struct iovec		*iov;
 		int			memory;
 		int			len;
-#ifdef CONFIG_NET_DMA
-		/* members for async copy */
-		struct dma_chan		*dma_chan;
-		int			wakeup;
-		struct dma_pinned_list	*pinned_list;
-		dma_cookie_t		dma_cookie;
-#endif
 	} ucopy;
 
 	u32	snd_wl1;	/* Sequence for window update		*/
diff --git a/include/net/netdma.h b/include/net/netdma.h
deleted file mode 100644
index 8ba8ce284eeb..000000000000
--- a/include/net/netdma.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
- *
- * The full GNU General Public License is included in this distribution in the
- * file called COPYING.
- */
-#ifndef NETDMA_H
-#define NETDMA_H
-#ifdef CONFIG_NET_DMA
-#include <linux/dmaengine.h>
-#include <linux/skbuff.h>
-
-int dma_skb_copy_datagram_iovec(struct dma_chan* chan,
-		struct sk_buff *skb, int offset, struct iovec *to,
-		size_t len, struct dma_pinned_list *pinned_list);
-
-#endif /* CONFIG_NET_DMA */
-#endif /* NETDMA_H */
diff --git a/include/net/sock.h b/include/net/sock.h
index b9586a137cad..3353b47f3d40 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -231,7 +231,6 @@ struct cg_proto;
   *	@sk_receive_queue: incoming packets
   *	@sk_wmem_alloc: transmit queue bytes committed
   *	@sk_write_queue: Packet sending queue
-  *	@sk_async_wait_queue: DMA copied packets
   *	@sk_omem_alloc: "o" is "option" or "other"
   *	@sk_wmem_queued: persistent queue size
   *	@sk_forward_alloc: space allocated forward
@@ -354,10 +353,6 @@ struct sock {
 	struct sk_filter __rcu	*sk_filter;
 	struct socket_wq __rcu	*sk_wq;
 
-#ifdef CONFIG_NET_DMA
-	struct sk_buff_head	sk_async_wait_queue;
-#endif
-
 #ifdef CONFIG_XFRM
 	struct xfrm_policy	*sk_policy[2];
 #endif
@@ -2214,27 +2209,15 @@ void sock_tx_timestamp(struct sock *sk, __u8 *tx_flags);
  * sk_eat_skb - Release a skb if it is no longer needed
  * @sk: socket to eat this skb from
  * @skb: socket buffer to eat
- * @copied_early: flag indicating whether DMA operations copied this data early
  *
  * This routine must be called with interrupts disabled or with the socket
  * locked so that the sk_buff queue operation is ok.
 */
-#ifdef CONFIG_NET_DMA
-static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb, bool copied_early)
-{
-	__skb_unlink(skb, &sk->sk_receive_queue);
-	if (!copied_early)
-		__kfree_skb(skb);
-	else
-		__skb_queue_tail(&sk->sk_async_wait_queue, skb);
-}
-#else
-static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb, bool copied_early)
+static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb)
 {
 	__skb_unlink(skb, &sk->sk_receive_queue);
 	__kfree_skb(skb);
 }
-#endif
 
 static inline
 struct net *sock_net(const struct sock *sk)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 8c4dd63134d4..2c2f24ffa383 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -27,7 +27,6 @@
 #include <linux/cache.h>
 #include <linux/percpu.h>
 #include <linux/skbuff.h>
-#include <linux/dmaengine.h>
 #include <linux/crypto.h>
 #include <linux/cryptohash.h>
 #include <linux/kref.h>
@@ -267,7 +266,6 @@ extern int sysctl_tcp_adv_win_scale;
 extern int sysctl_tcp_tw_reuse;
 extern int sysctl_tcp_frto;
 extern int sysctl_tcp_low_latency;
-extern int sysctl_tcp_dma_copybreak;
 extern int sysctl_tcp_nometrics_save;
 extern int sysctl_tcp_moderate_rcvbuf;
 extern int sysctl_tcp_tso_win_divisor;
@@ -1023,12 +1021,6 @@ static inline void tcp_prequeue_init(struct tcp_sock *tp)
 	tp->ucopy.len = 0;
 	tp->ucopy.memory = 0;
 	skb_queue_head_init(&tp->ucopy.prequeue);
-#ifdef CONFIG_NET_DMA
-	tp->ucopy.dma_chan = NULL;
-	tp->ucopy.wakeup = 0;
-	tp->ucopy.pinned_list = NULL;
-	tp->ucopy.dma_cookie = 0;
-#endif
 }
 
 bool tcp_prequeue(struct sock *sk, struct sk_buff *skb);
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index 653cbbd9e7ad..d457005acedf 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -390,7 +390,6 @@ static const struct bin_table bin_net_ipv4_table[] = {
 	{ CTL_INT,	NET_TCP_MTU_PROBING,			"tcp_mtu_probing" },
 	{ CTL_INT,	NET_TCP_BASE_MSS,			"tcp_base_mss" },
 	{ CTL_INT,	NET_IPV4_TCP_WORKAROUND_SIGNED_WINDOWS,	"tcp_workaround_signed_windows" },
-	{ CTL_INT,	NET_TCP_DMA_COPYBREAK,			"tcp_dma_copybreak" },
 	{ CTL_INT,	NET_TCP_SLOW_START_AFTER_IDLE,		"tcp_slow_start_after_idle" },
 	{ CTL_INT,	NET_CIPSOV4_CACHE_ENABLE,		"cipso_cache_enable" },
 	{ CTL_INT,	NET_CIPSOV4_CACHE_BUCKET_SIZE,		"cipso_cache_bucket_size" },
diff --git a/net/core/Makefile b/net/core/Makefile
index 9628c20acff6..5038f1ea0349 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -16,7 +16,6 @@ obj-y += net-sysfs.o
 obj-$(CONFIG_PROC_FS) += net-procfs.o
 obj-$(CONFIG_NET_PKTGEN) += pktgen.o
 obj-$(CONFIG_NETPOLL) += netpoll.o
-obj-$(CONFIG_NET_DMA) += user_dma.o
 obj-$(CONFIG_FIB_RULES) += fib_rules.o
 obj-$(CONFIG_TRACEPOINTS) += net-traces.o
 obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o
diff --git a/net/core/dev.c b/net/core/dev.c
index b1b0c8d4d7df..5e37e9abe8c5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1266,7 +1266,6 @@ static int __dev_open(struct net_device *dev)
 		clear_bit(__LINK_STATE_START, &dev->state);
 	else {
 		dev->flags |= IFF_UP;
-		net_dmaengine_get();
 		dev_set_rx_mode(dev);
 		dev_activate(dev);
 		add_device_randomness(dev->dev_addr, dev->addr_len);
@@ -1342,7 +1341,6 @@ static int __dev_close_many(struct list_head *head)
 			ops->ndo_stop(dev);
 
 		dev->flags &= ~IFF_UP;
-		net_dmaengine_put();
 	}
 
 	return 0;
@@ -4405,14 +4403,6 @@ static void net_rx_action(struct softirq_action *h)
 out:
 	net_rps_action_and_irq_enable(sd);
 
-#ifdef CONFIG_NET_DMA
-	/*
-	 * There may not be any more sk_buffs coming right now, so push
-	 * any pending DMA copies to hardware
-	 */
-	dma_issue_pending_all();
-#endif
-
 	return;
 
 softnet_break:
diff --git a/net/core/sock.c b/net/core/sock.c
index c0fc6bdad1e3..2f143c3b190a 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1452,9 +1452,6 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
 		atomic_set(&newsk->sk_omem_alloc, 0);
 		skb_queue_head_init(&newsk->sk_receive_queue);
 		skb_queue_head_init(&newsk->sk_write_queue);
-#ifdef CONFIG_NET_DMA
-		skb_queue_head_init(&newsk->sk_async_wait_queue);
-#endif
 
 		spin_lock_init(&newsk->sk_dst_lock);
 		rwlock_init(&newsk->sk_callback_lock);
@@ -2265,9 +2262,6 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 	skb_queue_head_init(&sk->sk_receive_queue);
 	skb_queue_head_init(&sk->sk_write_queue);
 	skb_queue_head_init(&sk->sk_error_queue);
-#ifdef CONFIG_NET_DMA
-	skb_queue_head_init(&sk->sk_async_wait_queue);
-#endif
 
 	sk->sk_send_head	=	NULL;
 
diff --git a/net/core/user_dma.c b/net/core/user_dma.c
deleted file mode 100644
index 1b5fefdb8198..000000000000
--- a/net/core/user_dma.c
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
- * Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved.
- * Portions based on net/core/datagram.c and copyrighted by their authors.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
- *
- * The full GNU General Public License is included in this distribution in the
- * file called COPYING.
- */
-
-/*
- * This code allows the net stack to make use of a DMA engine for
- * skb to iovec copies.
- */
-
-#include <linux/dmaengine.h>
-#include <linux/socket.h>
-#include <linux/export.h>
-#include <net/tcp.h>
-#include <net/netdma.h>
-
-#define NET_DMA_DEFAULT_COPYBREAK 4096
-
-int sysctl_tcp_dma_copybreak = NET_DMA_DEFAULT_COPYBREAK;
-EXPORT_SYMBOL(sysctl_tcp_dma_copybreak);
-
-/**
- *	dma_skb_copy_datagram_iovec - Copy a datagram to an iovec.
- *	@skb - buffer to copy
- *	@offset - offset in the buffer to start copying from
- *	@iovec - io vector to copy to
- *	@len - amount of data to copy from buffer to iovec
- *	@pinned_list - locked iovec buffer data
- *
- *	Note: the iovec is modified during the copy.
- */
-int dma_skb_copy_datagram_iovec(struct dma_chan *chan,
-			struct sk_buff *skb, int offset, struct iovec *to,
-			size_t len, struct dma_pinned_list *pinned_list)
-{
-	int start = skb_headlen(skb);
-	int i, copy = start - offset;
-	struct sk_buff *frag_iter;
-	dma_cookie_t cookie = 0;
-
-	/* Copy header. */
-	if (copy > 0) {
-		if (copy > len)
-			copy = len;
-		cookie = dma_memcpy_to_iovec(chan, to, pinned_list,
-					    skb->data + offset, copy);
-		if (cookie < 0)
-			goto fault;
-		len -= copy;
-		if (len == 0)
-			goto end;
-		offset += copy;
-	}
-
-	/* Copy paged appendix. Hmm... why does this look so complicated? */
-	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
-		int end;
-		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
-
-		WARN_ON(start > offset + len);
-
-		end = start + skb_frag_size(frag);
-		copy = end - offset;
-		if (copy > 0) {
-			struct page *page = skb_frag_page(frag);
-
-			if (copy > len)
-				copy = len;
-
-			cookie = dma_memcpy_pg_to_iovec(chan, to, pinned_list, page,
-					frag->page_offset + offset - start, copy);
-			if (cookie < 0)
-				goto fault;
-			len -= copy;
-			if (len == 0)
-				goto end;
-			offset += copy;
-		}
-		start = end;
-	}
-
-	skb_walk_frags(skb, frag_iter) {
-		int end;
-
-		WARN_ON(start > offset + len);
-
-		end = start + frag_iter->len;
-		copy = end - offset;
-		if (copy > 0) {
-			if (copy > len)
-				copy = len;
-			cookie = dma_skb_copy_datagram_iovec(chan, frag_iter,
-							     offset - start,
-							     to, copy,
-							     pinned_list);
-			if (cookie < 0)
-				goto fault;
-			len -= copy;
-			if (len == 0)
-				goto end;
-			offset += copy;
-		}
-		start = end;
-	}
-
-end:
-	if (!len) {
-		skb->dma_cookie = cookie;
-		return cookie;
-	}
-
-fault:
-	return -EFAULT;
-}
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index eb892b4f4814..f9076f295b13 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -848,7 +848,7 @@ int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 		default:
 			dccp_pr_debug("packet_type=%s\n",
 				      dccp_packet_name(dh->dccph_type));
-			sk_eat_skb(sk, skb, false);
+			sk_eat_skb(sk, skb);
 		}
 verify_sock_status:
 		if (sock_flag(sk, SOCK_DONE)) {
@@ -905,7 +905,7 @@ verify_sock_status:
 			len = skb->len;
 	found_fin_ok:
 		if (!(flags & MSG_PEEK))
-			sk_eat_skb(sk, skb, false);
+			sk_eat_skb(sk, skb);
 		break;
 	} while (1);
 out:
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 44eba052b43d..c3d2a48481f1 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -635,15 +635,6 @@ static struct ctl_table ipv4_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
-#ifdef CONFIG_NET_DMA
-	{
-		.procname	= "tcp_dma_copybreak",
-		.data		= &sysctl_tcp_dma_copybreak,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec
-	},
-#endif
 	{
 		.procname	= "tcp_slow_start_after_idle",
 		.data		= &sysctl_tcp_slow_start_after_idle,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 97c8f5620c43..28595a364f09 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -274,7 +274,6 @@
 #include <net/tcp.h>
 #include <net/xfrm.h>
 #include <net/ip.h>
-#include <net/netdma.h>
 #include <net/sock.h>
 
 #include <asm/uaccess.h>
@@ -1454,39 +1453,6 @@ static void tcp_prequeue_process(struct sock *sk)
 	tp->ucopy.memory = 0;
 }
 
-#ifdef CONFIG_NET_DMA
-static void tcp_service_net_dma(struct sock *sk, bool wait)
-{
-	dma_cookie_t done, used;
-	dma_cookie_t last_issued;
-	struct tcp_sock *tp = tcp_sk(sk);
-
-	if (!tp->ucopy.dma_chan)
-		return;
-
-	last_issued = tp->ucopy.dma_cookie;
-	dma_async_issue_pending(tp->ucopy.dma_chan);
-
-	do {
-		if (dma_async_is_tx_complete(tp->ucopy.dma_chan,
-					      last_issued, &done,
-					      &used) == DMA_COMPLETE) {
-			/* Safe to free early-copied skbs now */
-			__skb_queue_purge(&sk->sk_async_wait_queue);
-			break;
-		} else {
-			struct sk_buff *skb;
-			while ((skb = skb_peek(&sk->sk_async_wait_queue)) &&
-			       (dma_async_is_complete(skb->dma_cookie, done,
-						      used) == DMA_COMPLETE)) {
-				__skb_dequeue(&sk->sk_async_wait_queue);
-				kfree_skb(skb);
-			}
-		}
-	} while (wait);
-}
-#endif
-
 static struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
 {
 	struct sk_buff *skb;
@@ -1504,7 +1470,7 @@ static struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
 		 * splitted a fat GRO packet, while we released socket lock
 		 * in skb_splice_bits()
 		 */
-		sk_eat_skb(sk, skb, false);
+		sk_eat_skb(sk, skb);
 	}
 	return NULL;
 }
@@ -1570,11 +1536,11 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
 				continue;
 		}
 		if (tcp_hdr(skb)->fin) {
-			sk_eat_skb(sk, skb, false);
+			sk_eat_skb(sk, skb);
 			++seq;
 			break;
 		}
-		sk_eat_skb(sk, skb, false);
+		sk_eat_skb(sk, skb);
 		if (!desc->count)
 			break;
 		tp->copied_seq = seq;
@@ -1612,7 +1578,6 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	int target;		/* Read at least this many bytes */
 	long timeo;
 	struct task_struct *user_recv = NULL;
-	bool copied_early = false;
 	struct sk_buff *skb;
 	u32 urg_hole = 0;
 
@@ -1655,28 +1620,6 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 
 	target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
 
-#ifdef CONFIG_NET_DMA
-	tp->ucopy.dma_chan = NULL;
-	preempt_disable();
-	skb = skb_peek_tail(&sk->sk_receive_queue);
-	{
-		int available = 0;
-
-		if (skb)
-			available = TCP_SKB_CB(skb)->seq + skb->len - (*seq);
-		if ((available < target) &&
-		    (len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) &&
-		    !sysctl_tcp_low_latency &&
-		    net_dma_find_channel()) {
-			preempt_enable();
-			tp->ucopy.pinned_list =
-					dma_pin_iovec_pages(msg->msg_iov, len);
-		} else {
-			preempt_enable();
-		}
-	}
-#endif
-
 	do {
 		u32 offset;
 
@@ -1807,16 +1750,6 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 			/* __ Set realtime policy in scheduler __ */
 		}
 
-#ifdef CONFIG_NET_DMA
-		if (tp->ucopy.dma_chan) {
-			if (tp->rcv_wnd == 0 &&
-			    !skb_queue_empty(&sk->sk_async_wait_queue)) {
-				tcp_service_net_dma(sk, true);
-				tcp_cleanup_rbuf(sk, copied);
-			} else
-				dma_async_issue_pending(tp->ucopy.dma_chan);
-		}
-#endif
 		if (copied >= target) {
 			/* Do not sleep, just process backlog. */
 			release_sock(sk);
@@ -1824,11 +1757,6 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 		} else
 			sk_wait_data(sk, &timeo);
 
-#ifdef CONFIG_NET_DMA
-		tcp_service_net_dma(sk, false);  /* Don't block */
-		tp->ucopy.wakeup = 0;
-#endif
-
 		if (user_recv) {
 			int chunk;
 
@@ -1886,43 +1814,13 @@ do_prequeue:
 		}
 
 		if (!(flags & MSG_TRUNC)) {
-#ifdef CONFIG_NET_DMA
-			if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
-				tp->ucopy.dma_chan = net_dma_find_channel();
-
-			if (tp->ucopy.dma_chan) {
-				tp->ucopy.dma_cookie = dma_skb_copy_datagram_iovec(
-					tp->ucopy.dma_chan, skb, offset,
-					msg->msg_iov, used,
-					tp->ucopy.pinned_list);
-
-				if (tp->ucopy.dma_cookie < 0) {
-
-					pr_alert("%s: dma_cookie < 0\n",
-						 __func__);
-
-					/* Exception. Bailout! */
-					if (!copied)
-						copied = -EFAULT;
-					break;
-				}
-
-				dma_async_issue_pending(tp->ucopy.dma_chan);
-
-				if ((offset + used) == skb->len)
-					copied_early = true;
-
-			} else
-#endif
-			{
-				err = skb_copy_datagram_iovec(skb, offset,
-						msg->msg_iov, used);
-				if (err) {
-					/* Exception. Bailout! */
-					if (!copied)
-						copied = -EFAULT;
-					break;
-				}
+			err = skb_copy_datagram_iovec(skb, offset,
+						      msg->msg_iov, used);
+			if (err) {
+				/* Exception. Bailout! */
+				if (!copied)
+					copied = -EFAULT;
+				break;
 			}
 		}
 
@@ -1942,19 +1840,15 @@ skip_copy:
 
 		if (tcp_hdr(skb)->fin)
 			goto found_fin_ok;
-		if (!(flags & MSG_PEEK)) {
-			sk_eat_skb(sk, skb, copied_early);
-			copied_early = false;
-		}
+		if (!(flags & MSG_PEEK))
+			sk_eat_skb(sk, skb);
 		continue;
 
 	found_fin_ok:
 		/* Process the FIN. */
 		++*seq;
-		if (!(flags & MSG_PEEK)) {
-			sk_eat_skb(sk, skb, copied_early);
-			copied_early = false;
-		}
+		if (!(flags & MSG_PEEK))
+			sk_eat_skb(sk, skb);
 		break;
 	} while (len > 0);
 
@@ -1977,16 +1871,6 @@ skip_copy:
 		tp->ucopy.len = 0;
 	}
 
-#ifdef CONFIG_NET_DMA
-	tcp_service_net_dma(sk, true);  /* Wait for queue to drain */
-	tp->ucopy.dma_chan = NULL;
-
-	if (tp->ucopy.pinned_list) {
-		dma_unpin_iovec_pages(tp->ucopy.pinned_list);
-		tp->ucopy.pinned_list = NULL;
-	}
-#endif
-
 	/* According to UNIX98, msg_name/msg_namelen are ignored
 	 * on connected socket. I was just happy when found this 8) --ANK
 	 */
@@ -2330,9 +2214,6 @@ int tcp_disconnect(struct sock *sk, int flags)
 	__skb_queue_purge(&sk->sk_receive_queue);
 	tcp_write_queue_purge(sk);
 	__skb_queue_purge(&tp->out_of_order_queue);
-#ifdef CONFIG_NET_DMA
-	__skb_queue_purge(&sk->sk_async_wait_queue);
-#endif
 
 	inet->inet_dport = 0;
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index eeaac399420d..1342e9851f97 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -73,7 +73,6 @@
 #include <net/inet_common.h>
 #include <linux/ipsec.h>
 #include <asm/unaligned.h>
-#include <net/netdma.h>
 
 int sysctl_tcp_timestamps __read_mostly = 1;
 int sysctl_tcp_window_scaling __read_mostly = 1;
@@ -4970,53 +4969,6 @@ static inline bool tcp_checksum_complete_user(struct sock *sk,
 	       __tcp_checksum_complete_user(sk, skb);
 }
 
-#ifdef CONFIG_NET_DMA
-static bool tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb,
-				  int hlen)
-{
-	struct tcp_sock *tp = tcp_sk(sk);
-	int chunk = skb->len - hlen;
-	int dma_cookie;
-	bool copied_early = false;
-
-	if (tp->ucopy.wakeup)
-		return false;
-
-	if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
-		tp->ucopy.dma_chan = net_dma_find_channel();
-
-	if (tp->ucopy.dma_chan && skb_csum_unnecessary(skb)) {
-
-		dma_cookie = dma_skb_copy_datagram_iovec(tp->ucopy.dma_chan,
-							 skb, hlen,
-							 tp->ucopy.iov, chunk,
-							 tp->ucopy.pinned_list);
-
-		if (dma_cookie < 0)
-			goto out;
-
-		tp->ucopy.dma_cookie = dma_cookie;
-		copied_early = true;
-
-		tp->ucopy.len -= chunk;
-		tp->copied_seq += chunk;
-		tcp_rcv_space_adjust(sk);
-
-		if ((tp->ucopy.len == 0) ||
-		    (tcp_flag_word(tcp_hdr(skb)) & TCP_FLAG_PSH) ||
-		    (atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1))) {
-			tp->ucopy.wakeup = 1;
-			sk->sk_data_ready(sk, 0);
-		}
-	} else if (chunk > 0) {
-		tp->ucopy.wakeup = 1;
-		sk->sk_data_ready(sk, 0);
-	}
-out:
-	return copied_early;
-}
-#endif /* CONFIG_NET_DMA */
-
 /* Does PAWS and seqno based validation of an incoming segment, flags will
  * play significant role here.
  */
@@ -5201,14 +5153,6 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 
 			if (tp->copied_seq == tp->rcv_nxt &&
 			    len - tcp_header_len <= tp->ucopy.len) {
-#ifdef CONFIG_NET_DMA
-				if (tp->ucopy.task == current &&
-				    sock_owned_by_user(sk) &&
-				    tcp_dma_try_early_copy(sk, skb, tcp_header_len)) {
-					copied_early = 1;
-					eaten = 1;
-				}
-#endif
 				if (tp->ucopy.task == current &&
 				    sock_owned_by_user(sk) && !copied_early) {
 					__set_current_state(TASK_RUNNING);
@@ -5274,11 +5218,6 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 			if (!copied_early || tp->rcv_nxt != tp->rcv_wup)
 				__tcp_ack_snd_check(sk, 0);
 no_ack:
-#ifdef CONFIG_NET_DMA
-			if (copied_early)
-				__skb_queue_tail(&sk->sk_async_wait_queue, skb);
-			else
-#endif
 			if (eaten)
 				kfree_skb_partial(skb, fragstolen);
 			sk->sk_data_ready(sk, 0);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 3cf976510497..737c2e270ee3 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -72,7 +72,6 @@
 #include <net/inet_common.h>
 #include <net/timewait_sock.h>
 #include <net/xfrm.h>
-#include <net/netdma.h>
 #include <net/secure_seq.h>
 #include <net/tcp_memcontrol.h>
 #include <net/busy_poll.h>
@@ -1999,18 +1998,8 @@ process:
 	bh_lock_sock_nested(sk);
 	ret = 0;
 	if (!sock_owned_by_user(sk)) {
-#ifdef CONFIG_NET_DMA
-		struct tcp_sock *tp = tcp_sk(sk);
-		if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
-			tp->ucopy.dma_chan = net_dma_find_channel();
-		if (tp->ucopy.dma_chan)
+		if (!tcp_prequeue(sk, skb))
 			ret = tcp_v4_do_rcv(sk, skb);
-		else
-#endif
-		{
-			if (!tcp_prequeue(sk, skb))
-				ret = tcp_v4_do_rcv(sk, skb);
-		}
 	} else if (unlikely(sk_add_backlog(sk, skb,
 					   sk->sk_rcvbuf + sk->sk_sndbuf))) {
 		bh_unlock_sock(sk);
@@ -2169,11 +2158,6 @@ void tcp_v4_destroy_sock(struct sock *sk)
 	}
 #endif
 
-#ifdef CONFIG_NET_DMA
-	/* Cleans up our sk_async_wait_queue */
-	__skb_queue_purge(&sk->sk_async_wait_queue);
-#endif
-
 	/* Clean prequeue, it must be empty really */
 	__skb_queue_purge(&tp->ucopy.prequeue);
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 889079b2ea85..cb21fccf2089 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -59,7 +59,6 @@
 #include <net/snmp.h>
 #include <net/dsfield.h>
 #include <net/timewait_sock.h>
-#include <net/netdma.h>
 #include <net/inet_common.h>
 #include <net/secure_seq.h>
 #include <net/tcp_memcontrol.h>
@@ -1520,18 +1519,8 @@ process:
 	bh_lock_sock_nested(sk);
 	ret = 0;
 	if (!sock_owned_by_user(sk)) {
-#ifdef CONFIG_NET_DMA
-		struct tcp_sock *tp = tcp_sk(sk);
-		if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
-			tp->ucopy.dma_chan = net_dma_find_channel();
-		if (tp->ucopy.dma_chan)
+		if (!tcp_prequeue(sk, skb))
 			ret = tcp_v6_do_rcv(sk, skb);
-		else
-#endif
-		{
-			if (!tcp_prequeue(sk, skb))
-				ret = tcp_v6_do_rcv(sk, skb);
-		}
 	} else if (unlikely(sk_add_backlog(sk, skb,
 					   sk->sk_rcvbuf + sk->sk_sndbuf))) {
 		bh_unlock_sock(sk);
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 0080d2b0a8ae..bb9cbc17d926 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -839,7 +839,7 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock,
 
 		if (!(flags & MSG_PEEK)) {
 			spin_lock_irqsave(&sk->sk_receive_queue.lock, cpu_flags);
-			sk_eat_skb(sk, skb, false);
+			sk_eat_skb(sk, skb);
 			spin_unlock_irqrestore(&sk->sk_receive_queue.lock, cpu_flags);
 			*seq = 0;
 		}
@@ -861,10 +861,10 @@ copy_uaddr:
 		llc_cmsg_rcv(msg, skb);
 
 	if (!(flags & MSG_PEEK)) {
-			spin_lock_irqsave(&sk->sk_receive_queue.lock, cpu_flags);
-			sk_eat_skb(sk, skb, false);
-			spin_unlock_irqrestore(&sk->sk_receive_queue.lock, cpu_flags);
-			*seq = 0;
+		spin_lock_irqsave(&sk->sk_receive_queue.lock, cpu_flags);
+		sk_eat_skb(sk, skb);
+		spin_unlock_irqrestore(&sk->sk_receive_queue.lock, cpu_flags);
+		*seq = 0;
 	}
 
 	goto out;
-- 
cgit v1.2.3


From 3d9a0d2f8212879407e58d67f460d8920eb6543d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 25 Sep 2014 23:04:56 -0700
Subject: dql: dql_queued() should write first to reduce bus transactions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

While doing high throughput test on a BQL enabled NIC,
I found a very high cost in ndo_start_xmit() when accessing BQL data.

It turned out the problem was caused by compiler trying to be
smart, but involving a bad MESI transaction :

  0.05 │  mov    0xc0(%rax),%edi    // LOAD dql->num_queued
  0.48 │  mov    %edx,0xc8(%rax)    // STORE dql->last_obj_cnt = count
 58.23 │  add    %edx,%edi
  0.58 │  cmp    %edi,0xc4(%rax)
  0.76 │  mov    %edi,0xc0(%rax)    // STORE dql->num_queued += count
  0.72 │  js     bd8

I got an incredible 10 % gain [1] by making sure cpu do not attempt
to get the cache line in Shared mode, but directly requests for
ownership.

New code :
	mov    %edx,0xc8(%rax)  // STORE dql->last_obj_cnt = count
	add    %edx,0xc0(%rax)  // RMW   dql->num_queued += count
	mov    0xc4(%rax),%ecx  // LOAD dql->adj_limit
	mov    0xc0(%rax),%edx  // LOAD dql->num_queued
	cmp    %edx,%ecx

The TX completion was running from another cpu, with high interrupts
rate.

Note that I am using barrier() as a soft hint, as mb() here could be
too heavy cost.

[1] This was a netperf TCP_STREAM with TSO disabled, but GSO enabled.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dynamic_queue_limits.h | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dynamic_queue_limits.h b/include/linux/dynamic_queue_limits.h
index 5621547d631b..a4be70398ce1 100644
--- a/include/linux/dynamic_queue_limits.h
+++ b/include/linux/dynamic_queue_limits.h
@@ -73,14 +73,22 @@ static inline void dql_queued(struct dql *dql, unsigned int count)
 {
 	BUG_ON(count > DQL_MAX_OBJECT);
 
-	dql->num_queued += count;
 	dql->last_obj_cnt = count;
+
+	/* We want to force a write first, so that cpu do not attempt
+	 * to get cache line containing last_obj_cnt, num_queued, adj_limit
+	 * in Shared state, but directly does a Request For Ownership
+	 * It is only a hint, we use barrier() only.
+	 */
+	barrier();
+
+	dql->num_queued += count;
 }
 
 /* Returns how many objects can be queued, < 0 indicates over limit. */
 static inline int dql_avail(const struct dql *dql)
 {
-	return dql->adj_limit - dql->num_queued;
+	return ACCESS_ONCE(dql->adj_limit) - ACCESS_ONCE(dql->num_queued);
 }
 
 /* Record number of completed objects and recalculate the limit. */
-- 
cgit v1.2.3


From c08ee14cc6634457948bc5e26584697208baa02a Mon Sep 17 00:00:00 2001
From: Tero Kristo <t-kristo@ti.com>
Date: Fri, 12 Sep 2014 15:01:57 +0300
Subject: clk: ti: change clock init to use generic of_clk_init

Previously, the TI clock driver initialized all the clocks hierarchically
under each separate clock provider node. Now, each clock that requires
IO access will instead check their parent node to find out which IO range
to use.

This patch allows the TI clock driver to use a few new features provided
by the generic of_clk_init, and also allows registration of clock nodes
outside the clock hierarchy (for example, any external clocks.)

Signed-off-by: Tero Kristo <t-kristo@ti.com>
Cc: Mike Turquette <mturquette@linaro.org>
Cc: Paul Walmsley <paul@pwsan.com>
Cc: Tony Lindgren <tony@atomide.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Ujfalusi <peter.ujfalusi@ti.com>
Cc: Jyri Sarha <jsarha@ti.com>
Cc: Stefan Assmann <sassmann@kpanic.de>
Acked-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/io.c         | 12 +++++--
 arch/arm/mach-omap2/prm_common.c |  2 --
 drivers/clk/ti/clk.c             | 68 +++++++++++++++++++++++++---------------
 include/linux/clk/ti.h           |  1 +
 4 files changed, 54 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/io.c b/arch/arm/mach-omap2/io.c
index 5d0667c119f6..a1b82a994842 100644
--- a/arch/arm/mach-omap2/io.c
+++ b/arch/arm/mach-omap2/io.c
@@ -734,8 +734,16 @@ int __init omap_clk_init(void)
 	ti_clk_init_features();
 
 	ret = of_prcm_init();
-	if (!ret)
-		ret = omap_clk_soc_init();
+	if (ret)
+		return ret;
+
+	of_clk_init(NULL);
+
+	ti_dt_clk_init_retry_clks();
+
+	ti_dt_clockdomains_setup();
+
+	ret = omap_clk_soc_init();
 
 	return ret;
 }
diff --git a/arch/arm/mach-omap2/prm_common.c b/arch/arm/mach-omap2/prm_common.c
index 76ca320f007c..3b890807f5e6 100644
--- a/arch/arm/mach-omap2/prm_common.c
+++ b/arch/arm/mach-omap2/prm_common.c
@@ -525,8 +525,6 @@ int __init of_prcm_init(void)
 		memmap_index++;
 	}
 
-	ti_dt_clockdomains_setup();
-
 	return 0;
 }
 
diff --git a/drivers/clk/ti/clk.c b/drivers/clk/ti/clk.c
index b1a6f7144f3f..337abe5909e1 100644
--- a/drivers/clk/ti/clk.c
+++ b/drivers/clk/ti/clk.c
@@ -25,8 +25,8 @@
 #undef pr_fmt
 #define pr_fmt(fmt) "%s: " fmt, __func__
 
-static int ti_dt_clk_memmap_index;
 struct ti_clk_ll_ops *ti_clk_ll_ops;
+static struct device_node *clocks_node_ptr[CLK_MAX_MEMMAPS];
 
 /**
  * ti_dt_clocks_register - register DT alias clocks during boot
@@ -108,9 +108,21 @@ void __iomem *ti_clk_get_reg_addr(struct device_node *node, int index)
 	struct clk_omap_reg *reg;
 	u32 val;
 	u32 tmp;
+	int i;
 
 	reg = (struct clk_omap_reg *)&tmp;
-	reg->index = ti_dt_clk_memmap_index;
+
+	for (i = 0; i < CLK_MAX_MEMMAPS; i++) {
+		if (clocks_node_ptr[i] == node->parent)
+			break;
+	}
+
+	if (i == CLK_MAX_MEMMAPS) {
+		pr_err("clk-provider not found for %s!\n", node->name);
+		return NULL;
+	}
+
+	reg->index = i;
 
 	if (of_property_read_u32_index(node, "reg", index, &val)) {
 		pr_err("%s must have reg[%d]!\n", node->name, index);
@@ -127,20 +139,14 @@ void __iomem *ti_clk_get_reg_addr(struct device_node *node, int index)
  * @parent: master node
  * @index: internal index for clk_reg_ops
  *
- * Initializes a master clock IP block and its child clock nodes.
- * Regmap is provided for accessing the register space for the
- * IP block and all the clocks under it.
+ * Initializes a master clock IP block. This basically sets up the
+ * mapping from clocks node to the memory map index. All the clocks
+ * are then initialized through the common of_clk_init call, and the
+ * clocks will access their memory maps based on the node layout.
  */
 void ti_dt_clk_init_provider(struct device_node *parent, int index)
 {
-	const struct of_device_id *match;
-	struct device_node *np;
 	struct device_node *clocks;
-	of_clk_init_cb_t clk_init_cb;
-	struct clk_init_item *retry;
-	struct clk_init_item *tmp;
-
-	ti_dt_clk_memmap_index = index;
 
 	/* get clocks for this parent */
 	clocks = of_get_child_by_name(parent, "clocks");
@@ -149,19 +155,31 @@ void ti_dt_clk_init_provider(struct device_node *parent, int index)
 		return;
 	}
 
-	for_each_child_of_node(clocks, np) {
-		match = of_match_node(&__clk_of_table, np);
-		if (!match)
-			continue;
-		clk_init_cb = (of_clk_init_cb_t)match->data;
-		pr_debug("%s: initializing: %s\n", __func__, np->name);
-		clk_init_cb(np);
-	}
+	/* add clocks node info */
+	clocks_node_ptr[index] = clocks;
+}
 
-	list_for_each_entry_safe(retry, tmp, &retry_list, link) {
-		pr_debug("retry-init: %s\n", retry->node->name);
-		retry->func(retry->hw, retry->node);
-		list_del(&retry->link);
-		kfree(retry);
+/**
+ * ti_dt_clk_init_retry_clks - init clocks from the retry list
+ *
+ * Initializes any clocks that have failed to initialize before,
+ * reasons being missing parent node(s) during earlier init. This
+ * typically happens only for DPLLs which need to have both of their
+ * parent clocks ready during init.
+ */
+void ti_dt_clk_init_retry_clks(void)
+{
+	struct clk_init_item *retry;
+	struct clk_init_item *tmp;
+	int retries = 5;
+
+	while (!list_empty(&retry_list) && retries) {
+		list_for_each_entry_safe(retry, tmp, &retry_list, link) {
+			pr_debug("retry-init: %s\n", retry->node->name);
+			retry->func(retry->hw, retry->node);
+			list_del(&retry->link);
+			kfree(retry);
+		}
+		retries--;
 	}
 }
diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h
index e8d8a35034a5..f75acbf70e96 100644
--- a/include/linux/clk/ti.h
+++ b/include/linux/clk/ti.h
@@ -292,6 +292,7 @@ void omap2xxx_clkt_vps_init(void);
 void __iomem *ti_clk_get_reg_addr(struct device_node *node, int index);
 void ti_dt_clocks_register(struct ti_dt_clk *oclks);
 void ti_dt_clk_init_provider(struct device_node *np, int index);
+void ti_dt_clk_init_retry_clks(void);
 void ti_dt_clockdomains_setup(void);
 int ti_clk_retry_init(struct device_node *node, struct clk_hw *hw,
 		      ti_of_clk_init_cb_t func);
-- 
cgit v1.2.3


From 3d46e73dfdb840f460e5b06416965d132570ec33 Mon Sep 17 00:00:00 2001
From: Antoine Tenart <antoine.tenart@free-electrons.com>
Date: Wed, 24 Sep 2014 23:05:50 +0400
Subject: usb: rename phy to usb_phy in HCD

The USB PHY member of the HCD structure is renamed to 'usb_phy' and
modifications are done in all drivers accessing it.
This is in preparation to adding the generic PHY support.

Signed-off-by: Antoine Tenart <antoine.tenart@free-electrons.com>
[Sergei: added missing 'drivers/usb/misc/lvstest.c' file, resolved rejects,
updated changelog.]
Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Acked-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/chipidea/host.c   |  2 +-
 drivers/usb/core/hcd.c        | 20 ++++++++++----------
 drivers/usb/core/hub.c        |  8 ++++----
 drivers/usb/host/ehci-fsl.c   | 16 ++++++++--------
 drivers/usb/host/ehci-hub.c   |  2 +-
 drivers/usb/host/ehci-msm.c   |  4 ++--
 drivers/usb/host/ehci-tegra.c | 16 ++++++++--------
 drivers/usb/host/ohci-omap.c  | 20 ++++++++++----------
 drivers/usb/misc/lvstest.c    |  8 ++++----
 include/linux/usb/hcd.h       |  2 +-
 10 files changed, 49 insertions(+), 49 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/chipidea/host.c b/drivers/usb/chipidea/host.c
index 0d6b24cb2270..ebde7b6ce687 100644
--- a/drivers/usb/chipidea/host.c
+++ b/drivers/usb/chipidea/host.c
@@ -59,7 +59,7 @@ static int host_start(struct ci_hdrc *ci)
 	hcd->has_tt = 1;
 
 	hcd->power_budget = ci->platdata->power_budget;
-	hcd->phy = ci->transceiver;
+	hcd->usb_phy = ci->transceiver;
 	hcd->tpl_support = ci->platdata->tpl_support;
 
 	ehci = hcd_to_ehci(hcd);
diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index bcb96ff207ba..2c56252b9ff8 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -2629,7 +2629,7 @@ int usb_add_hcd(struct usb_hcd *hcd,
 	int retval;
 	struct usb_device *rhdev;
 
-	if (IS_ENABLED(CONFIG_USB_PHY) && !hcd->phy) {
+	if (IS_ENABLED(CONFIG_USB_PHY) && !hcd->usb_phy) {
 		struct usb_phy *phy = usb_get_phy_dev(hcd->self.controller, 0);
 
 		if (IS_ERR(phy)) {
@@ -2642,7 +2642,7 @@ int usb_add_hcd(struct usb_hcd *hcd,
 				usb_put_phy(phy);
 				return retval;
 			}
-			hcd->phy = phy;
+			hcd->usb_phy = phy;
 			hcd->remove_phy = 1;
 		}
 	}
@@ -2790,10 +2790,10 @@ err_allocate_root_hub:
 err_register_bus:
 	hcd_buffer_destroy(hcd);
 err_remove_phy:
-	if (hcd->remove_phy && hcd->phy) {
-		usb_phy_shutdown(hcd->phy);
-		usb_put_phy(hcd->phy);
-		hcd->phy = NULL;
+	if (hcd->remove_phy && hcd->usb_phy) {
+		usb_phy_shutdown(hcd->usb_phy);
+		usb_put_phy(hcd->usb_phy);
+		hcd->usb_phy = NULL;
 	}
 	return retval;
 }
@@ -2866,10 +2866,10 @@ void usb_remove_hcd(struct usb_hcd *hcd)
 
 	usb_deregister_bus(&hcd->self);
 	hcd_buffer_destroy(hcd);
-	if (hcd->remove_phy && hcd->phy) {
-		usb_phy_shutdown(hcd->phy);
-		usb_put_phy(hcd->phy);
-		hcd->phy = NULL;
+	if (hcd->remove_phy && hcd->usb_phy) {
+		usb_phy_shutdown(hcd->usb_phy);
+		usb_put_phy(hcd->usb_phy);
+		hcd->usb_phy = NULL;
 	}
 
 	usb_put_invalidate_rhdev(hcd);
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 1d21b2c21f9b..11e80ac31324 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -4468,8 +4468,8 @@ hub_port_init (struct usb_hub *hub, struct usb_device *udev, int port1,
 	if (retval)
 		goto fail;
 
-	if (hcd->phy && !hdev->parent)
-		usb_phy_notify_connect(hcd->phy, udev->speed);
+	if (hcd->usb_phy && !hdev->parent)
+		usb_phy_notify_connect(hcd->usb_phy, udev->speed);
 
 	/*
 	 * Some superspeed devices have finished the link training process
@@ -4627,9 +4627,9 @@ static void hub_port_connect(struct usb_hub *hub, int port1, u16 portstatus,
 
 	/* Disconnect any existing devices under this port */
 	if (udev) {
-		if (hcd->phy && !hdev->parent &&
+		if (hcd->usb_phy && !hdev->parent &&
 				!(portstatus & USB_PORT_STAT_CONNECTION))
-			usb_phy_notify_disconnect(hcd->phy, udev->speed);
+			usb_phy_notify_disconnect(hcd->usb_phy, udev->speed);
 		usb_disconnect(&port_dev->child);
 	}
 
diff --git a/drivers/usb/host/ehci-fsl.c b/drivers/usb/host/ehci-fsl.c
index 3d84b6a41dae..2d2ae8db439e 100644
--- a/drivers/usb/host/ehci-fsl.c
+++ b/drivers/usb/host/ehci-fsl.c
@@ -136,15 +136,15 @@ static int usb_hcd_fsl_probe(const struct hc_driver *driver,
 	if (pdata->operating_mode == FSL_USB2_DR_OTG) {
 		struct ehci_hcd *ehci = hcd_to_ehci(hcd);
 
-		hcd->phy = usb_get_phy(USB_PHY_TYPE_USB2);
+		hcd->usb_phy = usb_get_phy(USB_PHY_TYPE_USB2);
 		dev_dbg(&pdev->dev, "hcd=0x%p  ehci=0x%p, phy=0x%p\n",
-			hcd, ehci, hcd->phy);
+			hcd, ehci, hcd->usb_phy);
 
-		if (!IS_ERR_OR_NULL(hcd->phy)) {
-			retval = otg_set_host(hcd->phy->otg,
+		if (!IS_ERR_OR_NULL(hcd->usb_phy)) {
+			retval = otg_set_host(hcd->usb_phy->otg,
 					      &ehci_to_hcd(ehci)->self);
 			if (retval) {
-				usb_put_phy(hcd->phy);
+				usb_put_phy(hcd->usb_phy);
 				goto err2;
 			}
 		} else {
@@ -181,9 +181,9 @@ static void usb_hcd_fsl_remove(struct usb_hcd *hcd,
 {
 	struct fsl_usb2_platform_data *pdata = dev_get_platdata(&pdev->dev);
 
-	if (!IS_ERR_OR_NULL(hcd->phy)) {
-		otg_set_host(hcd->phy->otg, NULL);
-		usb_put_phy(hcd->phy);
+	if (!IS_ERR_OR_NULL(hcd->usb_phy)) {
+		otg_set_host(hcd->usb_phy->otg, NULL);
+		usb_put_phy(hcd->usb_phy);
 	}
 
 	usb_remove_hcd(hcd);
diff --git a/drivers/usb/host/ehci-hub.c b/drivers/usb/host/ehci-hub.c
index 7ccb3ccf8e86..5728829cf6ef 100644
--- a/drivers/usb/host/ehci-hub.c
+++ b/drivers/usb/host/ehci-hub.c
@@ -922,7 +922,7 @@ int ehci_hub_control(
 #ifdef CONFIG_USB_OTG
 			if ((hcd->self.otg_port == (wIndex + 1))
 			    && hcd->self.b_hnp_enable) {
-				otg_start_hnp(hcd->phy->otg);
+				otg_start_hnp(hcd->usb_phy->otg);
 				break;
 			}
 #endif
diff --git a/drivers/usb/host/ehci-msm.c b/drivers/usb/host/ehci-msm.c
index 934b39d5e44a..9dc2118ae808 100644
--- a/drivers/usb/host/ehci-msm.c
+++ b/drivers/usb/host/ehci-msm.c
@@ -124,7 +124,7 @@ static int ehci_msm_probe(struct platform_device *pdev)
 		goto put_hcd;
 	}
 
-	hcd->phy = phy;
+	hcd->usb_phy = phy;
 	device_init_wakeup(&pdev->dev, 1);
 	/*
 	 * OTG device parent of HCD takes care of putting
@@ -151,7 +151,7 @@ static int ehci_msm_remove(struct platform_device *pdev)
 	pm_runtime_disable(&pdev->dev);
 	pm_runtime_set_suspended(&pdev->dev);
 
-	otg_set_host(hcd->phy->otg, NULL);
+	otg_set_host(hcd->usb_phy->otg, NULL);
 
 	/* FIXME: need to call usb_remove_hcd() here? */
 
diff --git a/drivers/usb/host/ehci-tegra.c b/drivers/usb/host/ehci-tegra.c
index 7aafb05e7a40..aaa01971efe9 100644
--- a/drivers/usb/host/ehci-tegra.c
+++ b/drivers/usb/host/ehci-tegra.c
@@ -206,7 +206,7 @@ static int tegra_ehci_hub_control(
 		if (tegra->port_resuming && !(temp & PORT_SUSPEND)) {
 			/* Resume completed, re-enable disconnect detection */
 			tegra->port_resuming = 0;
-			tegra_usb_phy_postresume(hcd->phy);
+			tegra_usb_phy_postresume(hcd->usb_phy);
 		}
 	}
 
@@ -259,7 +259,7 @@ static int tegra_ehci_hub_control(
 			goto done;
 
 		/* Disable disconnect detection during port resume */
-		tegra_usb_phy_preresume(hcd->phy);
+		tegra_usb_phy_preresume(hcd->usb_phy);
 
 		ehci->reset_done[wIndex-1] = jiffies + msecs_to_jiffies(25);
 
@@ -454,7 +454,7 @@ static int tegra_ehci_probe(struct platform_device *pdev)
 		err = PTR_ERR(u_phy);
 		goto cleanup_clk_en;
 	}
-	hcd->phy = u_phy;
+	hcd->usb_phy = u_phy;
 
 	tegra->needs_double_reset = of_property_read_bool(pdev->dev.of_node,
 		"nvidia,needs-double-reset");
@@ -475,7 +475,7 @@ static int tegra_ehci_probe(struct platform_device *pdev)
 	ehci->caps = hcd->regs + 0x100;
 	ehci->has_hostpc = soc_config->has_hostpc;
 
-	err = usb_phy_init(hcd->phy);
+	err = usb_phy_init(hcd->usb_phy);
 	if (err) {
 		dev_err(&pdev->dev, "Failed to initialize phy\n");
 		goto cleanup_clk_en;
@@ -490,7 +490,7 @@ static int tegra_ehci_probe(struct platform_device *pdev)
 	}
 	u_phy->otg->host = hcd_to_bus(hcd);
 
-	err = usb_phy_set_suspend(hcd->phy, 0);
+	err = usb_phy_set_suspend(hcd->usb_phy, 0);
 	if (err) {
 		dev_err(&pdev->dev, "Failed to power on the phy\n");
 		goto cleanup_phy;
@@ -517,7 +517,7 @@ static int tegra_ehci_probe(struct platform_device *pdev)
 cleanup_otg_set_host:
 	otg_set_host(u_phy->otg, NULL);
 cleanup_phy:
-	usb_phy_shutdown(hcd->phy);
+	usb_phy_shutdown(hcd->usb_phy);
 cleanup_clk_en:
 	clk_disable_unprepare(tegra->clk);
 cleanup_hcd_create:
@@ -531,9 +531,9 @@ static int tegra_ehci_remove(struct platform_device *pdev)
 	struct tegra_ehci_hcd *tegra =
 		(struct tegra_ehci_hcd *)hcd_to_ehci(hcd)->priv;
 
-	otg_set_host(hcd->phy->otg, NULL);
+	otg_set_host(hcd->usb_phy->otg, NULL);
 
-	usb_phy_shutdown(hcd->phy);
+	usb_phy_shutdown(hcd->usb_phy);
 	usb_remove_hcd(hcd);
 
 	clk_disable_unprepare(tegra->clk);
diff --git a/drivers/usb/host/ohci-omap.c b/drivers/usb/host/ohci-omap.c
index de9428362503..0231606d47c2 100644
--- a/drivers/usb/host/ohci-omap.c
+++ b/drivers/usb/host/ohci-omap.c
@@ -180,10 +180,10 @@ static void start_hnp(struct ohci_hcd *ohci)
 	unsigned long	flags;
 	u32 l;
 
-	otg_start_hnp(hcd->phy->otg);
+	otg_start_hnp(hcd->usb_phy->otg);
 
 	local_irq_save(flags);
-	hcd->phy->state = OTG_STATE_A_SUSPEND;
+	hcd->usb_phy->state = OTG_STATE_A_SUSPEND;
 	writel (RH_PS_PSS, &ohci->regs->roothub.portstatus [port]);
 	l = omap_readl(OTG_CTRL);
 	l &= ~OTG_A_BUSREQ;
@@ -220,14 +220,14 @@ static int ohci_omap_reset(struct usb_hcd *hcd)
 
 #ifdef	CONFIG_USB_OTG
 	if (need_transceiver) {
-		hcd->phy = usb_get_phy(USB_PHY_TYPE_USB2);
-		if (!IS_ERR_OR_NULL(hcd->phy)) {
-			int	status = otg_set_host(hcd->phy->otg,
+		hcd->usb_phy = usb_get_phy(USB_PHY_TYPE_USB2);
+		if (!IS_ERR_OR_NULL(hcd->usb_phy)) {
+			int	status = otg_set_host(hcd->usb_phy->otg,
 						&ohci_to_hcd(ohci)->self);
 			dev_dbg(hcd->self.controller, "init %s phy, status %d\n",
-					hcd->phy->label, status);
+					hcd->usb_phy->label, status);
 			if (status) {
-				usb_put_phy(hcd->phy);
+				usb_put_phy(hcd->usb_phy);
 				return status;
 			}
 		} else {
@@ -399,9 +399,9 @@ usb_hcd_omap_remove (struct usb_hcd *hcd, struct platform_device *pdev)
 	dev_dbg(hcd->self.controller, "stopping USB Controller\n");
 	usb_remove_hcd(hcd);
 	omap_ohci_clock_power(0);
-	if (!IS_ERR_OR_NULL(hcd->phy)) {
-		(void) otg_set_host(hcd->phy->otg, 0);
-		usb_put_phy(hcd->phy);
+	if (!IS_ERR_OR_NULL(hcd->usb_phy)) {
+		(void) otg_set_host(hcd->usb_phy->otg, 0);
+		usb_put_phy(hcd->usb_phy);
 	}
 	if (machine_is_omap_osk())
 		gpio_free(9);
diff --git a/drivers/usb/misc/lvstest.c b/drivers/usb/misc/lvstest.c
index 7d589c156fb1..62cb8cd08403 100644
--- a/drivers/usb/misc/lvstest.c
+++ b/drivers/usb/misc/lvstest.c
@@ -333,13 +333,13 @@ static void lvs_rh_work(struct work_struct *work)
 					USB_PORT_STAT_CONNECTION) {
 				lvs->present = true;
 				lvs->portnum = i;
-				if (hcd->phy)
-					usb_phy_notify_connect(hcd->phy,
+				if (hcd->usb_phy)
+					usb_phy_notify_connect(hcd->usb_phy,
 							USB_SPEED_SUPER);
 			} else {
 				lvs->present = false;
-				if (hcd->phy)
-					usb_phy_notify_disconnect(hcd->phy,
+				if (hcd->usb_phy)
+					usb_phy_notify_disconnect(hcd->usb_phy,
 							USB_SPEED_SUPER);
 			}
 			break;
diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
index a48c89e96988..788059a108e5 100644
--- a/include/linux/usb/hcd.h
+++ b/include/linux/usb/hcd.h
@@ -106,7 +106,7 @@ struct usb_hcd {
 	 * OTG and some Host controllers need software interaction with phys;
 	 * other external phys should be software-transparent
 	 */
-	struct usb_phy	*phy;
+	struct usb_phy		*usb_phy;
 
 	/* Flags that need to be manipulated atomically because they can
 	 * change while the host controller is running.  Always use
-- 
cgit v1.2.3


From 0043325495222139daa0696db736f67658dc7770 Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Date: Wed, 24 Sep 2014 23:09:44 +0400
Subject: usb: hcd: add generic PHY support

Add the generic PHY support, analogous to the USB PHY support. Intended it to be
used with the PCI EHCI/OHCI drivers and the xHCI platform driver.

Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hcd.c  | 42 ++++++++++++++++++++++++++++++++++++++++--
 include/linux/usb/hcd.h |  1 +
 2 files changed, 41 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 2c56252b9ff8..b84fb141e122 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -42,6 +42,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/types.h>
 
+#include <linux/phy/phy.h>
 #include <linux/usb.h>
 #include <linux/usb/hcd.h>
 #include <linux/usb/phy.h>
@@ -2647,6 +2648,29 @@ int usb_add_hcd(struct usb_hcd *hcd,
 		}
 	}
 
+	if (IS_ENABLED(CONFIG_GENERIC_PHY)) {
+		struct phy *phy = phy_get(hcd->self.controller, "usb");
+
+		if (IS_ERR(phy)) {
+			retval = PTR_ERR(phy);
+			if (retval == -EPROBE_DEFER)
+				goto err_phy;
+		} else {
+			retval = phy_init(phy);
+			if (retval) {
+				phy_put(phy);
+				goto err_phy;
+			}
+			retval = phy_power_on(phy);
+			if (retval) {
+				phy_exit(phy);
+				phy_put(phy);
+				goto err_phy;
+			}
+			hcd->phy = phy;
+		}
+	}
+
 	dev_info(hcd->self.controller, "%s\n", hcd->product_desc);
 
 	/* Keep old behaviour if authorized_default is not in [0, 1]. */
@@ -2662,7 +2686,7 @@ int usb_add_hcd(struct usb_hcd *hcd,
 	 */
 	if ((retval = hcd_buffer_create(hcd)) != 0) {
 		dev_dbg(hcd->self.controller, "pool alloc failed\n");
-		goto err_remove_phy;
+		goto err_create_buf;
 	}
 
 	if ((retval = usb_register_bus(&hcd->self)) < 0)
@@ -2789,7 +2813,14 @@ err_allocate_root_hub:
 	usb_deregister_bus(&hcd->self);
 err_register_bus:
 	hcd_buffer_destroy(hcd);
-err_remove_phy:
+err_create_buf:
+	if (IS_ENABLED(CONFIG_GENERIC_PHY) && hcd->phy) {
+		phy_power_off(hcd->phy);
+		phy_exit(hcd->phy);
+		phy_put(hcd->phy);
+		hcd->phy = NULL;
+	}
+err_phy:
 	if (hcd->remove_phy && hcd->usb_phy) {
 		usb_phy_shutdown(hcd->usb_phy);
 		usb_put_phy(hcd->usb_phy);
@@ -2866,6 +2897,13 @@ void usb_remove_hcd(struct usb_hcd *hcd)
 
 	usb_deregister_bus(&hcd->self);
 	hcd_buffer_destroy(hcd);
+
+	if (IS_ENABLED(CONFIG_GENERIC_PHY) && hcd->phy) {
+		phy_power_off(hcd->phy);
+		phy_exit(hcd->phy);
+		phy_put(hcd->phy);
+		hcd->phy = NULL;
+	}
 	if (hcd->remove_phy && hcd->usb_phy) {
 		usb_phy_shutdown(hcd->usb_phy);
 		usb_put_phy(hcd->usb_phy);
diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
index 788059a108e5..cd96a2bc3388 100644
--- a/include/linux/usb/hcd.h
+++ b/include/linux/usb/hcd.h
@@ -107,6 +107,7 @@ struct usb_hcd {
 	 * other external phys should be software-transparent
 	 */
 	struct usb_phy		*usb_phy;
+	struct phy		*phy;
 
 	/* Flags that need to be manipulated atomically because they can
 	 * change while the host controller is running.  Always use
-- 
cgit v1.2.3


From 1f180359f42fc6fda4600175c63f2a84f444cc92 Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler@intel.com>
Date: Mon, 29 Sep 2014 16:31:46 +0300
Subject: mei: remove include to pci header from mei module files

Remove inclusion of linux/pci.h in mei layer
however we need to include the headers that before
got included implicitly from linux/pci.h.

Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/mei/amthif.c    | 2 +-
 drivers/misc/mei/bus.c       | 1 -
 drivers/misc/mei/client.c    | 2 +-
 drivers/misc/mei/debugfs.c   | 1 -
 drivers/misc/mei/hbm.c       | 5 +++--
 drivers/misc/mei/init.c      | 1 -
 drivers/misc/mei/interrupt.c | 2 +-
 drivers/misc/mei/main.c      | 2 +-
 drivers/misc/mei/nfc.c       | 3 ++-
 drivers/misc/mei/wd.c        | 1 -
 include/linux/mei_cl_bus.h   | 1 +
 11 files changed, 10 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/misc/mei/amthif.c b/drivers/misc/mei/amthif.c
index 5d47d1b36ccf..d9b0e761fcd2 100644
--- a/drivers/misc/mei/amthif.c
+++ b/drivers/misc/mei/amthif.c
@@ -20,7 +20,6 @@
 #include <linux/types.h>
 #include <linux/fcntl.h>
 #include <linux/aio.h>
-#include <linux/pci.h>
 #include <linux/ioctl.h>
 #include <linux/cdev.h>
 #include <linux/list.h>
@@ -29,6 +28,7 @@
 #include <linux/uuid.h>
 #include <linux/jiffies.h>
 #include <linux/uaccess.h>
+#include <linux/slab.h>
 
 #include <linux/mei.h>
 
diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c
index 4cc1a66187be..4d20d60ca38d 100644
--- a/drivers/misc/mei/bus.c
+++ b/drivers/misc/mei/bus.c
@@ -22,7 +22,6 @@
 #include <linux/slab.h>
 #include <linux/mutex.h>
 #include <linux/interrupt.h>
-#include <linux/pci.h>
 #include <linux/mei_cl_bus.h>
 
 #include "mei_dev.h"
diff --git a/drivers/misc/mei/client.c b/drivers/misc/mei/client.c
index bfc3ad24db9c..04a196429e55 100644
--- a/drivers/misc/mei/client.c
+++ b/drivers/misc/mei/client.c
@@ -14,10 +14,10 @@
  *
  */
 
-#include <linux/pci.h>
 #include <linux/sched.h>
 #include <linux/wait.h>
 #include <linux/delay.h>
+#include <linux/slab.h>
 #include <linux/pm_runtime.h>
 
 #include <linux/mei.h>
diff --git a/drivers/misc/mei/debugfs.c b/drivers/misc/mei/debugfs.c
index e14a7db9a669..aca7847da69f 100644
--- a/drivers/misc/mei/debugfs.c
+++ b/drivers/misc/mei/debugfs.c
@@ -17,7 +17,6 @@
 #include <linux/kernel.h>
 #include <linux/device.h>
 #include <linux/debugfs.h>
-#include <linux/pci.h>
 
 #include <linux/mei.h>
 
diff --git a/drivers/misc/mei/hbm.c b/drivers/misc/mei/hbm.c
index 209650bccf57..c874fdd44ce1 100644
--- a/drivers/misc/mei/hbm.c
+++ b/drivers/misc/mei/hbm.c
@@ -15,11 +15,12 @@
  */
 
 #include <linux/export.h>
-#include <linux/pci.h>
 #include <linux/sched.h>
 #include <linux/wait.h>
-#include <linux/mei.h>
 #include <linux/pm_runtime.h>
+#include <linux/slab.h>
+
+#include <linux/mei.h>
 
 #include "mei_dev.h"
 #include "hbm.h"
diff --git a/drivers/misc/mei/init.c b/drivers/misc/mei/init.c
index dd233fd43178..76ef8ffa42c1 100644
--- a/drivers/misc/mei/init.c
+++ b/drivers/misc/mei/init.c
@@ -15,7 +15,6 @@
  */
 
 #include <linux/export.h>
-#include <linux/pci.h>
 #include <linux/sched.h>
 #include <linux/wait.h>
 #include <linux/delay.h>
diff --git a/drivers/misc/mei/interrupt.c b/drivers/misc/mei/interrupt.c
index 8dac76901130..8844e1772793 100644
--- a/drivers/misc/mei/interrupt.c
+++ b/drivers/misc/mei/interrupt.c
@@ -16,11 +16,11 @@
 
 
 #include <linux/export.h>
-#include <linux/pci.h>
 #include <linux/kthread.h>
 #include <linux/interrupt.h>
 #include <linux/fs.h>
 #include <linux/jiffies.h>
+#include <linux/slab.h>
 
 #include <linux/mei.h>
 
diff --git a/drivers/misc/mei/main.c b/drivers/misc/mei/main.c
index 4d738c881878..d31f271f6516 100644
--- a/drivers/misc/mei/main.c
+++ b/drivers/misc/mei/main.c
@@ -17,12 +17,12 @@
 #include <linux/moduleparam.h>
 #include <linux/kernel.h>
 #include <linux/device.h>
+#include <linux/slab.h>
 #include <linux/fs.h>
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/fcntl.h>
 #include <linux/aio.h>
-#include <linux/pci.h>
 #include <linux/poll.h>
 #include <linux/init.h>
 #include <linux/ioctl.h>
diff --git a/drivers/misc/mei/nfc.c b/drivers/misc/mei/nfc.c
index 9f32fd6eaccf..5b369f4c47de 100644
--- a/drivers/misc/mei/nfc.c
+++ b/drivers/misc/mei/nfc.c
@@ -19,7 +19,8 @@
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/device.h>
-#include <linux/pci.h>
+#include <linux/slab.h>
+
 #include <linux/mei_cl_bus.h>
 
 #include "mei_dev.h"
diff --git a/drivers/misc/mei/wd.c b/drivers/misc/mei/wd.c
index d28511b78eaa..626b4c13993b 100644
--- a/drivers/misc/mei/wd.c
+++ b/drivers/misc/mei/wd.c
@@ -17,7 +17,6 @@
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/device.h>
-#include <linux/pci.h>
 #include <linux/sched.h>
 #include <linux/watchdog.h>
 
diff --git a/include/linux/mei_cl_bus.h b/include/linux/mei_cl_bus.h
index d14af7b722ef..164aad1f9f12 100644
--- a/include/linux/mei_cl_bus.h
+++ b/include/linux/mei_cl_bus.h
@@ -3,6 +3,7 @@
 
 #include <linux/device.h>
 #include <linux/uuid.h>
+#include <linux/mod_devicetable.h>
 
 struct mei_cl_device;
 
-- 
cgit v1.2.3


From c02607aad2f9ed478eb288bcec1c00cd9df38b3c Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 29 Sep 2014 10:05:06 -0600
Subject: iommu: introduce domain attribute for nesting IOMMUs

Some IOMMUs, such as the ARM SMMU, support two stages of translation.
The idea behind such a scheme is to allow a guest operating system to
use the IOMMU for DMA mappings in the first stage of translation, with
the hypervisor then installing mappings in the second stage to provide
isolation of the DMA to the physical range assigned to that virtual
machine.

In order to allow IOMMU domains to be used for second-stage translation,
this patch adds a new iommu_attr (IOMMU_ATTR_NESTING) for setting
second-stage domains prior to device attach. The attribute can also be
queried to see if a domain is actually making use of nesting.

Acked-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 include/linux/iommu.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 20f9a527922a..7b02bcc85b9e 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -80,6 +80,7 @@ enum iommu_attr {
 	DOMAIN_ATTR_FSL_PAMU_STASH,
 	DOMAIN_ATTR_FSL_PAMU_ENABLE,
 	DOMAIN_ATTR_FSL_PAMUV1,
+	DOMAIN_ATTR_NESTING,	/* two stages of translation */
 	DOMAIN_ATTR_MAX,
 };
 
-- 
cgit v1.2.3


From b1937227316417aa7568d01e6fa1f272e98fb890 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 28 Sep 2014 22:18:47 -0700
Subject: net: reorganize sk_buff for faster __copy_skb_header()

With proliferation of bit fields in sk_buff, __copy_skb_header() became
quite expensive, showing as the most expensive function in a GSO
workload.

__copy_skb_header() performance is also critical for non GSO TCP
operations, as it is used from skb_clone()

This patch carefully moves all the fields that were not copied in a
separate zone : cloned, nohdr, fclone, peeked, head_frag, xmit_more

Then I moved all other fields and all other copied fields in a section
delimited by headers_start[0]/headers_end[0] section so that we
can use a single memcpy() call, inlined by compiler using long
word load/stores.

I also tried to make all copies in the natural orders of sk_buff,
to help hardware prefetching.

I made sure sk_buff size did not change.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 133 ++++++++++++++++++++++++++-----------------------
 net/core/skbuff.c      |  80 ++++++++++++++---------------
 2 files changed, 113 insertions(+), 100 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 8eaa62400fca..b6cced304b26 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -527,27 +527,41 @@ struct sk_buff {
 	char			cb[48] __aligned(8);
 
 	unsigned long		_skb_refdst;
+	void			(*destructor)(struct sk_buff *skb);
 #ifdef CONFIG_XFRM
 	struct	sec_path	*sp;
+#endif
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+	struct nf_conntrack	*nfct;
+#endif
+#ifdef CONFIG_BRIDGE_NETFILTER
+	struct nf_bridge_info	*nf_bridge;
 #endif
 	unsigned int		len,
 				data_len;
 	__u16			mac_len,
 				hdr_len;
-	union {
-		__wsum		csum;
-		struct {
-			__u16	csum_start;
-			__u16	csum_offset;
-		};
-	};
-	__u32			priority;
+
+	/* Following fields are _not_ copied in __copy_skb_header()
+	 * Note that queue_mapping is here mostly to fill a hole.
+	 */
 	kmemcheck_bitfield_begin(flags1);
-	__u8			ignore_df:1,
-				cloned:1,
-				ip_summed:2,
+	__u16			queue_mapping;
+	__u8			cloned:1,
 				nohdr:1,
-				nfctinfo:3;
+				fclone:2,
+				peeked:1,
+				head_frag:1,
+				xmit_more:1;
+	/* one bit hole */
+	kmemcheck_bitfield_end(flags1);
+
+
+
+	/* fields enclosed in headers_start/headers_end are copied
+	 * using a single memcpy() in __copy_skb_header()
+	 */
+	__u32			headers_start[0];
 
 /* if you move pkt_type around you also must adapt those constants */
 #ifdef __BIG_ENDIAN_BITFIELD
@@ -558,58 +572,53 @@ struct sk_buff {
 #define PKT_TYPE_OFFSET()	offsetof(struct sk_buff, __pkt_type_offset)
 
 	__u8			__pkt_type_offset[0];
-	__u8			pkt_type:3,
-				fclone:2,
-				ipvs_property:1,
-				peeked:1,
-				nf_trace:1;
-	kmemcheck_bitfield_end(flags1);
-	__be16			protocol;
-
-	void			(*destructor)(struct sk_buff *skb);
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
-	struct nf_conntrack	*nfct;
-#endif
-#ifdef CONFIG_BRIDGE_NETFILTER
-	struct nf_bridge_info	*nf_bridge;
-#endif
-
-	int			skb_iif;
-
-	__u32			hash;
-
-	__be16			vlan_proto;
-	__u16			vlan_tci;
-
-#ifdef CONFIG_NET_SCHED
-	__u16			tc_index;	/* traffic control index */
-#ifdef CONFIG_NET_CLS_ACT
-	__u16			tc_verd;	/* traffic control verdict */
-#endif
-#endif
-
-	__u16			queue_mapping;
-	kmemcheck_bitfield_begin(flags2);
-	__u8			xmit_more:1;
-#ifdef CONFIG_IPV6_NDISC_NODETYPE
-	__u8			ndisc_nodetype:2;
-#endif
+	__u8			pkt_type:3;
 	__u8			pfmemalloc:1;
+	__u8			ignore_df:1;
+	__u8			nfctinfo:3;
+
+	__u8			nf_trace:1;
+	__u8			ip_summed:2;
 	__u8			ooo_okay:1;
 	__u8			l4_hash:1;
 	__u8			sw_hash:1;
 	__u8			wifi_acked_valid:1;
 	__u8			wifi_acked:1;
+
 	__u8			no_fcs:1;
-	__u8			head_frag:1;
 	/* Indicates the inner headers are valid in the skbuff. */
 	__u8			encapsulation:1;
 	__u8			encap_hdr_csum:1;
 	__u8			csum_valid:1;
 	__u8			csum_complete_sw:1;
-	/* 1/3 bit hole (depending on ndisc_nodetype presence) */
-	kmemcheck_bitfield_end(flags2);
+	__u8			csum_level:2;
+	__u8			csum_bad:1;
 
+#ifdef CONFIG_IPV6_NDISC_NODETYPE
+	__u8			ndisc_nodetype:2;
+#endif
+	__u8			ipvs_property:1;
+	/* 5 or 7 bit hole */
+
+#ifdef CONFIG_NET_SCHED
+	__u16			tc_index;	/* traffic control index */
+#ifdef CONFIG_NET_CLS_ACT
+	__u16			tc_verd;	/* traffic control verdict */
+#endif
+#endif
+
+	union {
+		__wsum		csum;
+		struct {
+			__u16	csum_start;
+			__u16	csum_offset;
+		};
+	};
+	__u32			priority;
+	int			skb_iif;
+	__u32			hash;
+	__be16			vlan_proto;
+	__u16			vlan_tci;
 #if defined CONFIG_NET_DMA || defined CONFIG_NET_RX_BUSY_POLL
 	union {
 		unsigned int	napi_id;
@@ -625,19 +634,18 @@ struct sk_buff {
 		__u32		reserved_tailroom;
 	};
 
-	kmemcheck_bitfield_begin(flags3);
-	__u8			csum_level:2;
-	__u8			csum_bad:1;
-	/* 13 bit hole */
-	kmemcheck_bitfield_end(flags3);
-
 	__be16			inner_protocol;
 	__u16			inner_transport_header;
 	__u16			inner_network_header;
 	__u16			inner_mac_header;
+
+	__be16			protocol;
 	__u16			transport_header;
 	__u16			network_header;
 	__u16			mac_header;
+
+	__u32			headers_end[0];
+
 	/* These elements must be at the end, see alloc_skb() for details.  */
 	sk_buff_data_t		tail;
 	sk_buff_data_t		end;
@@ -3040,19 +3048,22 @@ static inline void nf_reset_trace(struct sk_buff *skb)
 }
 
 /* Note: This doesn't put any conntrack and bridge info in dst. */
-static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src)
+static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src,
+			     bool copy)
 {
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 	dst->nfct = src->nfct;
 	nf_conntrack_get(src->nfct);
-	dst->nfctinfo = src->nfctinfo;
+	if (copy)
+		dst->nfctinfo = src->nfctinfo;
 #endif
 #ifdef CONFIG_BRIDGE_NETFILTER
 	dst->nf_bridge  = src->nf_bridge;
 	nf_bridge_get(src->nf_bridge);
 #endif
 #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) || defined(CONFIG_NF_TABLES)
-	dst->nf_trace = src->nf_trace;
+	if (copy)
+		dst->nf_trace = src->nf_trace;
 #endif
 }
 
@@ -3064,7 +3075,7 @@ static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src)
 #ifdef CONFIG_BRIDGE_NETFILTER
 	nf_bridge_put(dst->nf_bridge);
 #endif
-	__nf_copy(dst, src);
+	__nf_copy(dst, src, true);
 }
 
 #ifdef CONFIG_NETWORK_SECMARK
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index d4fdc649112c..4be570a4ab21 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -261,7 +261,6 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 		atomic_t *fclone_ref = (atomic_t *) (child + 1);
 
 		kmemcheck_annotate_bitfield(child, flags1);
-		kmemcheck_annotate_bitfield(child, flags2);
 		skb->fclone = SKB_FCLONE_ORIG;
 		atomic_set(fclone_ref, 1);
 
@@ -675,57 +674,61 @@ void consume_skb(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(consume_skb);
 
+/* Make sure a field is enclosed inside headers_start/headers_end section */
+#define CHECK_SKB_FIELD(field) \
+	BUILD_BUG_ON(offsetof(struct sk_buff, field) <		\
+		     offsetof(struct sk_buff, headers_start));	\
+	BUILD_BUG_ON(offsetof(struct sk_buff, field) >		\
+		     offsetof(struct sk_buff, headers_end));	\
+
 static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 {
 	new->tstamp		= old->tstamp;
+	/* We do not copy old->sk */
 	new->dev		= old->dev;
-	new->transport_header	= old->transport_header;
-	new->network_header	= old->network_header;
-	new->mac_header		= old->mac_header;
-	new->inner_protocol	= old->inner_protocol;
-	new->inner_transport_header = old->inner_transport_header;
-	new->inner_network_header = old->inner_network_header;
-	new->inner_mac_header = old->inner_mac_header;
+	memcpy(new->cb, old->cb, sizeof(old->cb));
 	skb_dst_copy(new, old);
-	skb_copy_hash(new, old);
-	new->ooo_okay		= old->ooo_okay;
-	new->no_fcs		= old->no_fcs;
-	new->encapsulation	= old->encapsulation;
-	new->encap_hdr_csum	= old->encap_hdr_csum;
-	new->csum_valid		= old->csum_valid;
-	new->csum_complete_sw	= old->csum_complete_sw;
 #ifdef CONFIG_XFRM
 	new->sp			= secpath_get(old->sp);
 #endif
-	memcpy(new->cb, old->cb, sizeof(old->cb));
-	new->csum		= old->csum;
-	new->ignore_df		= old->ignore_df;
-	new->pkt_type		= old->pkt_type;
-	new->ip_summed		= old->ip_summed;
-	skb_copy_queue_mapping(new, old);
-	new->priority		= old->priority;
-#if IS_ENABLED(CONFIG_IP_VS)
-	new->ipvs_property	= old->ipvs_property;
+	__nf_copy(new, old, false);
+
+	/* Note : this field could be in headers_start/headers_end section
+	 * It is not yet because we do not want to have a 16 bit hole
+	 */
+	new->queue_mapping = old->queue_mapping;
+
+	memcpy(&new->headers_start, &old->headers_start,
+	       offsetof(struct sk_buff, headers_end) -
+	       offsetof(struct sk_buff, headers_start));
+	CHECK_SKB_FIELD(protocol);
+	CHECK_SKB_FIELD(csum);
+	CHECK_SKB_FIELD(hash);
+	CHECK_SKB_FIELD(priority);
+	CHECK_SKB_FIELD(skb_iif);
+	CHECK_SKB_FIELD(vlan_proto);
+	CHECK_SKB_FIELD(vlan_tci);
+	CHECK_SKB_FIELD(transport_header);
+	CHECK_SKB_FIELD(network_header);
+	CHECK_SKB_FIELD(mac_header);
+	CHECK_SKB_FIELD(inner_protocol);
+	CHECK_SKB_FIELD(inner_transport_header);
+	CHECK_SKB_FIELD(inner_network_header);
+	CHECK_SKB_FIELD(inner_mac_header);
+	CHECK_SKB_FIELD(mark);
+#ifdef CONFIG_NETWORK_SECMARK
+	CHECK_SKB_FIELD(secmark);
+#endif
+#ifdef CONFIG_NET_RX_BUSY_POLL
+	CHECK_SKB_FIELD(napi_id);
 #endif
-	new->pfmemalloc		= old->pfmemalloc;
-	new->protocol		= old->protocol;
-	new->mark		= old->mark;
-	new->skb_iif		= old->skb_iif;
-	__nf_copy(new, old);
 #ifdef CONFIG_NET_SCHED
-	new->tc_index		= old->tc_index;
+	CHECK_SKB_FIELD(tc_index);
 #ifdef CONFIG_NET_CLS_ACT
-	new->tc_verd		= old->tc_verd;
+	CHECK_SKB_FIELD(tc_verd);
 #endif
 #endif
-	new->vlan_proto		= old->vlan_proto;
-	new->vlan_tci		= old->vlan_tci;
-
-	skb_copy_secmark(new, old);
 
-#ifdef CONFIG_NET_RX_BUSY_POLL
-	new->napi_id	= old->napi_id;
-#endif
 }
 
 /*
@@ -876,7 +879,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
 			return NULL;
 
 		kmemcheck_annotate_bitfield(n, flags1);
-		kmemcheck_annotate_bitfield(n, flags2);
 		n->fclone = SKB_FCLONE_UNAVAILABLE;
 	}
 
-- 
cgit v1.2.3


From 87a15a8090c0e5284c0e53528d9defa5d9237866 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@netapp.com>
Date: Fri, 26 Sep 2014 13:58:26 -0400
Subject: NFSD: Add generic v4.2 infrastructure

It's cleaner to introduce everything at once and have the server reply
with "not supported" than it would be to introduce extra operations when
implementing a specific one in the middle of the list.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4xdr.c    | 28 ++++++++++++++++++++++++++++
 include/linux/nfs4.h | 18 ++++++++++++++++--
 2 files changed, 44 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index e771a1a7c6f1..0311baebd7d3 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1586,6 +1586,20 @@ static nfsd4_dec nfsd4_dec_ops[] = {
 	[OP_WANT_DELEGATION]	= (nfsd4_dec)nfsd4_decode_notsupp,
 	[OP_DESTROY_CLIENTID]	= (nfsd4_dec)nfsd4_decode_destroy_clientid,
 	[OP_RECLAIM_COMPLETE]	= (nfsd4_dec)nfsd4_decode_reclaim_complete,
+
+	/* new operations for NFSv4.2 */
+	[OP_ALLOCATE]		= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_COPY]		= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_COPY_NOTIFY]	= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_DEALLOCATE]		= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_IO_ADVISE]		= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_LAYOUTERROR]	= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_LAYOUTSTATS]	= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_OFFLOAD_CANCEL]	= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_OFFLOAD_STATUS]	= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_READ_PLUS]		= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_SEEK]		= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_WRITE_SAME]		= (nfsd4_dec)nfsd4_decode_notsupp,
 };
 
 static inline bool
@@ -3823,6 +3837,20 @@ static nfsd4_enc nfsd4_enc_ops[] = {
 	[OP_WANT_DELEGATION]	= (nfsd4_enc)nfsd4_encode_noop,
 	[OP_DESTROY_CLIENTID]	= (nfsd4_enc)nfsd4_encode_noop,
 	[OP_RECLAIM_COMPLETE]	= (nfsd4_enc)nfsd4_encode_noop,
+
+	/* NFSv4.2 operations */
+	[OP_ALLOCATE]		= (nfsd4_enc)nfsd4_encode_noop,
+	[OP_COPY]		= (nfsd4_enc)nfsd4_encode_noop,
+	[OP_COPY_NOTIFY]	= (nfsd4_enc)nfsd4_encode_noop,
+	[OP_DEALLOCATE]		= (nfsd4_enc)nfsd4_encode_noop,
+	[OP_IO_ADVISE]		= (nfsd4_enc)nfsd4_encode_noop,
+	[OP_LAYOUTERROR]	= (nfsd4_enc)nfsd4_encode_noop,
+	[OP_LAYOUTSTATS]	= (nfsd4_enc)nfsd4_encode_noop,
+	[OP_OFFLOAD_CANCEL]	= (nfsd4_enc)nfsd4_encode_noop,
+	[OP_OFFLOAD_STATUS]	= (nfsd4_enc)nfsd4_encode_noop,
+	[OP_READ_PLUS]		= (nfsd4_enc)nfsd4_encode_noop,
+	[OP_SEEK]		= (nfsd4_enc)nfsd4_encode_noop,
+	[OP_WRITE_SAME]		= (nfsd4_enc)nfsd4_encode_noop,
 };
 
 /*
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 79b2a0f5c318..cf38224c4fa0 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -110,6 +110,20 @@ enum nfs_opnum4 {
 	OP_DESTROY_CLIENTID = 57,
 	OP_RECLAIM_COMPLETE = 58,
 
+	/* nfs42 */
+	OP_ALLOCATE = 59,
+	OP_COPY = 60,
+	OP_COPY_NOTIFY = 61,
+	OP_DEALLOCATE = 62,
+	OP_IO_ADVISE = 63,
+	OP_LAYOUTERROR = 64,
+	OP_LAYOUTSTATS = 65,
+	OP_OFFLOAD_CANCEL = 66,
+	OP_OFFLOAD_STATUS = 67,
+	OP_READ_PLUS = 68,
+	OP_SEEK = 69,
+	OP_WRITE_SAME = 70,
+
 	OP_ILLEGAL = 10044,
 };
 
@@ -117,10 +131,10 @@ enum nfs_opnum4 {
 Needs to be updated if more operations are defined in future.*/
 
 #define FIRST_NFS4_OP	OP_ACCESS
-#define LAST_NFS4_OP 	OP_RECLAIM_COMPLETE
+#define LAST_NFS4_OP 	OP_WRITE_SAME
 #define LAST_NFS40_OP	OP_RELEASE_LOCKOWNER
 #define LAST_NFS41_OP	OP_RECLAIM_COMPLETE
-#define LAST_NFS42_OP	OP_RECLAIM_COMPLETE
+#define LAST_NFS42_OP	OP_WRITE_SAME
 
 enum nfsstat4 {
 	NFS4_OK = 0,
-- 
cgit v1.2.3


From 24bab491220faa446d945624086d838af41d616c Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@netapp.com>
Date: Fri, 26 Sep 2014 13:58:27 -0400
Subject: NFSD: Implement SEEK

This patch adds server support for the NFS v4.2 operation SEEK, which
returns the position of the next hole or data segment in a file.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4proc.c   | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
 fs/nfsd/nfs4xdr.c    | 36 ++++++++++++++++++++++++++++++++++--
 fs/nfsd/xdr4.h       | 14 ++++++++++++++
 include/linux/nfs4.h |  5 +++++
 4 files changed, 102 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 5e0dc528a0e8..cdeb3cfd6f32 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1013,6 +1013,49 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	return status;
 }
 
+static __be32
+nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+		struct nfsd4_seek *seek)
+{
+	int whence;
+	__be32 status;
+	struct file *file;
+
+	status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), cstate,
+					    &seek->seek_stateid,
+					    RD_STATE, &file);
+	if (status) {
+		dprintk("NFSD: nfsd4_seek: couldn't process stateid!\n");
+		return status;
+	}
+
+	switch (seek->seek_whence) {
+	case NFS4_CONTENT_DATA:
+		whence = SEEK_DATA;
+		break;
+	case NFS4_CONTENT_HOLE:
+		whence = SEEK_HOLE;
+		break;
+	default:
+		status = nfserr_union_notsupp;
+		goto out;
+	}
+
+	/*
+	 * Note:  This call does change file->f_pos, but nothing in NFSD
+	 *        should ever file->f_pos.
+	 */
+	seek->seek_pos = vfs_llseek(file, seek->seek_offset, whence);
+	if (seek->seek_pos < 0)
+		status = nfserrno(seek->seek_pos);
+	else if (seek->seek_pos >= i_size_read(file_inode(file)))
+		seek->seek_eof = true;
+
+out:
+	fput(file);
+	return status;
+}
+
 /* This routine never returns NFS_OK!  If there are no other errors, it
  * will return NFSERR_SAME or NFSERR_NOT_SAME depending on whether the
  * attributes matched.  VERIFY is implemented by mapping NFSERR_SAME
@@ -1881,6 +1924,12 @@ static struct nfsd4_operation nfsd4_ops[] = {
 		.op_get_currentstateid = (stateid_getter)nfsd4_get_freestateid,
 		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
 	},
+
+	/* NFSv4.2 operations */
+	[OP_SEEK] = {
+		.op_func = (nfsd4op_func)nfsd4_seek,
+		.op_name = "OP_SEEK",
+	},
 };
 
 int nfsd4_max_reply(struct svc_rqst *rqstp, struct nfsd4_op *op)
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 0311baebd7d3..7ec646380005 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1513,6 +1513,22 @@ static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, str
 	DECODE_TAIL;
 }
 
+static __be32
+nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek)
+{
+	DECODE_HEAD;
+
+	status = nfsd4_decode_stateid(argp, &seek->seek_stateid);
+	if (status)
+		return status;
+
+	READ_BUF(8 + 4);
+	p = xdr_decode_hyper(p, &seek->seek_offset);
+	seek->seek_whence = be32_to_cpup(p);
+
+	DECODE_TAIL;
+}
+
 static __be32
 nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p)
 {
@@ -1598,7 +1614,7 @@ static nfsd4_dec nfsd4_dec_ops[] = {
 	[OP_OFFLOAD_CANCEL]	= (nfsd4_dec)nfsd4_decode_notsupp,
 	[OP_OFFLOAD_STATUS]	= (nfsd4_dec)nfsd4_decode_notsupp,
 	[OP_READ_PLUS]		= (nfsd4_dec)nfsd4_decode_notsupp,
-	[OP_SEEK]		= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_SEEK]		= (nfsd4_dec)nfsd4_decode_seek,
 	[OP_WRITE_SAME]		= (nfsd4_dec)nfsd4_decode_notsupp,
 };
 
@@ -3765,6 +3781,22 @@ nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr,
 	return nfserr;
 }
 
+static __be32
+nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr,
+		  struct nfsd4_seek *seek)
+{
+	__be32 *p;
+
+	if (nfserr)
+		return nfserr;
+
+	p = xdr_reserve_space(&resp->xdr, 4 + 8);
+	*p++ = cpu_to_be32(seek->seek_eof);
+	p = xdr_encode_hyper(p, seek->seek_pos);
+
+	return nfserr;
+}
+
 static __be32
 nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p)
 {
@@ -3849,7 +3881,7 @@ static nfsd4_enc nfsd4_enc_ops[] = {
 	[OP_OFFLOAD_CANCEL]	= (nfsd4_enc)nfsd4_encode_noop,
 	[OP_OFFLOAD_STATUS]	= (nfsd4_enc)nfsd4_encode_noop,
 	[OP_READ_PLUS]		= (nfsd4_enc)nfsd4_encode_noop,
-	[OP_SEEK]		= (nfsd4_enc)nfsd4_encode_noop,
+	[OP_SEEK]		= (nfsd4_enc)nfsd4_encode_seek,
 	[OP_WRITE_SAME]		= (nfsd4_enc)nfsd4_encode_noop,
 };
 
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 465e7799742a..5720e9457f33 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -428,6 +428,17 @@ struct nfsd4_reclaim_complete {
 	u32 rca_one_fs;
 };
 
+struct nfsd4_seek {
+	/* request */
+	stateid_t	seek_stateid;
+	loff_t		seek_offset;
+	u32		seek_whence;
+
+	/* response */
+	u32		seek_eof;
+	loff_t		seek_pos;
+};
+
 struct nfsd4_op {
 	int					opnum;
 	__be32					status;
@@ -473,6 +484,9 @@ struct nfsd4_op {
 		struct nfsd4_reclaim_complete	reclaim_complete;
 		struct nfsd4_test_stateid	test_stateid;
 		struct nfsd4_free_stateid	free_stateid;
+
+		/* NFSv4.2 */
+		struct nfsd4_seek		seek;
 	} u;
 	struct nfs4_replay *			replay;
 };
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index cf38224c4fa0..026b0c042c40 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -550,4 +550,9 @@ struct nfs4_deviceid {
 	char data[NFS4_DEVICEID4_SIZE];
 };
 
+enum data_content4 {
+	NFS4_CONTENT_DATA		= 0,
+	NFS4_CONTENT_HOLE		= 1,
+};
+
 #endif
-- 
cgit v1.2.3


From 8c14f9c70327a6fb75534c4c61d7ea9c82ccf78f Mon Sep 17 00:00:00 2001
From: Michael Grzeschik <m.grzeschik@pengutronix.de>
Date: Mon, 29 Sep 2014 11:55:36 +0200
Subject: ARCNET: add com20020 PCI IDs with metadata

This patch adds metadata for the com20020 to prepare for devices with
multiple io address areas with multi card interfaces.

Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/arcnet/com20020-pci.c | 216 ++++++++++++++++++++++++++++++++------
 include/linux/com20020.h          |  16 +++
 2 files changed, 197 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/arcnet/com20020-pci.c b/drivers/net/arcnet/com20020-pci.c
index 7bb292e59559..f9e55527739d 100644
--- a/drivers/net/arcnet/com20020-pci.c
+++ b/drivers/net/arcnet/com20020-pci.c
@@ -63,6 +63,8 @@ MODULE_LICENSE("GPL");
 
 static int com20020pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
+	struct com20020_pci_channel_map *cm;
+	struct com20020_pci_card_info *ci;
 	struct net_device *dev;
 	struct arcnet_local *lp;
 	int ioaddr, err;
@@ -75,19 +77,15 @@ static int com20020pci_probe(struct pci_dev *pdev, const struct pci_device_id *i
 
 	dev->netdev_ops = &com20020_netdev_ops;
 
+	ci = (struct com20020_pci_card_info *)id->driver_data;
+
 	lp = netdev_priv(dev);
 
 	pci_set_drvdata(pdev, dev);
 
-	// SOHARD needs PCI base addr 4
-	if (pdev->vendor==0x10B5) {
-		BUGMSG(D_NORMAL, "SOHARD\n");
-		ioaddr = pci_resource_start(pdev, 4);
-	}
-	else {
-		BUGMSG(D_NORMAL, "Contemporary Controls\n");
-		ioaddr = pci_resource_start(pdev, 2);
-	}
+	cm = &ci->chan_map_tbl[0];
+	BUGMSG(D_NORMAL, "%s Controls\n", ci->name);
+	ioaddr = pci_resource_start(pdev, cm->bar);
 
 	if (!request_region(ioaddr, ARCNET_TOTAL_SIZE, "com20020-pci")) {
 		BUGMSG(D_INIT, "IO region %xh-%xh already allocated.\n",
@@ -105,7 +103,7 @@ static int com20020pci_probe(struct pci_dev *pdev, const struct pci_device_id *i
 	dev->irq = pdev->irq;
 	dev->dev_addr[0] = node;
 	lp->card_name = "PCI COM20020";
-	lp->card_flags = id->driver_data;
+	lp->card_flags = ci->flags;
 	lp->backplane = backplane;
 	lp->clockp = clockp & 7;
 	lp->clockm = clockm & 3;
@@ -144,32 +142,180 @@ static void com20020pci_remove(struct pci_dev *pdev)
 	free_netdev(dev);
 }
 
+static struct com20020_pci_card_info card_info_10mbit = {
+	.name = "ARC-PCI",
+	.devcount = 1,
+	.chan_map_tbl = {
+		{ 2, 0x00, 0x08 },
+	},
+	.flags = ARC_CAN_10MBIT,
+};
+
+static struct com20020_pci_card_info card_info_5mbit = {
+	.name = "ARC-PCI",
+	.devcount = 1,
+	.chan_map_tbl = {
+		{ 2, 0x00, 0x08 },
+	},
+	.flags = ARC_IS_5MBIT,
+};
+
+static struct com20020_pci_card_info card_info_sohard = {
+	.name = "PLX-PCI",
+	.devcount = 1,
+	/* SOHARD needs PCI base addr 4 */
+	.chan_map_tbl = {
+		{4, 0x00, 0x08},
+	},
+	.flags = ARC_CAN_10MBIT,
+};
+
 static const struct pci_device_id com20020pci_id_table[] = {
-	{ 0x1571, 0xa001, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
-	{ 0x1571, 0xa002, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
-	{ 0x1571, 0xa003, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
-	{ 0x1571, 0xa004, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
-	{ 0x1571, 0xa005, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
-	{ 0x1571, 0xa006, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
-	{ 0x1571, 0xa007, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
-	{ 0x1571, 0xa008, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
-	{ 0x1571, 0xa009, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ARC_IS_5MBIT },
-	{ 0x1571, 0xa00a, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ARC_IS_5MBIT },
-	{ 0x1571, 0xa00b, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ARC_IS_5MBIT },
-	{ 0x1571, 0xa00c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ARC_IS_5MBIT },
-	{ 0x1571, 0xa00d, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ARC_IS_5MBIT },
-	{ 0x1571, 0xa00e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ARC_IS_5MBIT },
-	{ 0x1571, 0xa201, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ARC_CAN_10MBIT },
-	{ 0x1571, 0xa202, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ARC_CAN_10MBIT },
-	{ 0x1571, 0xa203, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ARC_CAN_10MBIT },
-	{ 0x1571, 0xa204, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ARC_CAN_10MBIT },
-	{ 0x1571, 0xa205, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ARC_CAN_10MBIT },
-	{ 0x1571, 0xa206, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ARC_CAN_10MBIT },
-	{ 0x10B5, 0x9030, 0x10B5,     0x2978,     0, 0, ARC_CAN_10MBIT },
-	{ 0x10B5, 0x9050, 0x10B5,     0x2273,     0, 0, ARC_CAN_10MBIT },
-	{ 0x14BA, 0x6000, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ARC_CAN_10MBIT },
-	{ 0x10B5, 0x2200, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ARC_CAN_10MBIT },
-	{0,}
+	{
+		0x1571, 0xa001,
+		PCI_ANY_ID, PCI_ANY_ID,
+		0, 0,
+		0,
+	},
+	{
+		0x1571, 0xa002,
+		PCI_ANY_ID, PCI_ANY_ID,
+		0, 0,
+		0,
+	},
+	{
+		0x1571, 0xa003,
+		PCI_ANY_ID, PCI_ANY_ID,
+		0, 0,
+		0
+	},
+	{
+		0x1571, 0xa004,
+		PCI_ANY_ID, PCI_ANY_ID,
+		0, 0,
+		0,
+	},
+	{
+		0x1571, 0xa005,
+		PCI_ANY_ID, PCI_ANY_ID,
+		0, 0,
+		0
+	},
+	{
+		0x1571, 0xa006,
+		PCI_ANY_ID, PCI_ANY_ID,
+		0, 0,
+		0
+	},
+	{
+		0x1571, 0xa007,
+		PCI_ANY_ID, PCI_ANY_ID,
+		0, 0,
+		0
+	},
+	{
+		0x1571, 0xa008,
+		PCI_ANY_ID, PCI_ANY_ID,
+		0, 0,
+		0
+	},
+	{
+		0x1571, 0xa009,
+		PCI_ANY_ID, PCI_ANY_ID,
+		0, 0,
+		(kernel_ulong_t)&card_info_5mbit
+	},
+	{
+		0x1571, 0xa00a,
+		PCI_ANY_ID, PCI_ANY_ID,
+		0, 0,
+		(kernel_ulong_t)&card_info_5mbit
+	},
+	{
+		0x1571, 0xa00b,
+		PCI_ANY_ID, PCI_ANY_ID,
+		0, 0,
+		(kernel_ulong_t)&card_info_5mbit
+	},
+	{
+		0x1571, 0xa00c,
+		PCI_ANY_ID, PCI_ANY_ID,
+		0, 0,
+		(kernel_ulong_t)&card_info_5mbit
+	},
+	{
+		0x1571, 0xa00d,
+		PCI_ANY_ID, PCI_ANY_ID,
+		0, 0,
+		(kernel_ulong_t)&card_info_5mbit
+	},
+	{
+		0x1571, 0xa00e,
+		PCI_ANY_ID, PCI_ANY_ID,
+		0, 0,
+		(kernel_ulong_t)&card_info_5mbit
+	},
+	{
+		0x1571, 0xa201,
+		PCI_ANY_ID, PCI_ANY_ID,
+		0, 0,
+		(kernel_ulong_t)&card_info_10mbit
+	},
+	{
+		0x1571, 0xa202,
+		PCI_ANY_ID, PCI_ANY_ID,
+		0, 0,
+		(kernel_ulong_t)&card_info_10mbit
+	},
+	{
+		0x1571, 0xa203,
+		PCI_ANY_ID, PCI_ANY_ID,
+		0, 0,
+		(kernel_ulong_t)&card_info_10mbit
+	},
+	{
+		0x1571, 0xa204,
+		PCI_ANY_ID, PCI_ANY_ID,
+		0, 0,
+		(kernel_ulong_t)&card_info_10mbit
+	},
+	{
+		0x1571, 0xa205,
+		PCI_ANY_ID, PCI_ANY_ID,
+		0, 0,
+		(kernel_ulong_t)&card_info_10mbit
+	},
+	{
+		0x1571, 0xa206,
+		PCI_ANY_ID, PCI_ANY_ID,
+		0, 0,
+		(kernel_ulong_t)&card_info_10mbit
+	},
+	{
+		0x10B5, 0x9030,
+		0x10B5, 0x2978,
+		0, 0,
+		(kernel_ulong_t)&card_info_sohard
+	},
+	{
+		0x10B5, 0x9050,
+		0x10B5, 0x2273,
+		0, 0,
+		(kernel_ulong_t)&card_info_sohard
+	},
+	{
+		0x14BA, 0x6000,
+		PCI_ANY_ID, PCI_ANY_ID,
+		0, 0,
+		(kernel_ulong_t)&card_info_10mbit
+	},
+	{
+		0x10B5, 0x2200,
+		PCI_ANY_ID, PCI_ANY_ID,
+		0, 0,
+		(kernel_ulong_t)&card_info_10mbit
+	},
+	{ 0, }
 };
 
 MODULE_DEVICE_TABLE(pci, com20020pci_id_table);
diff --git a/include/linux/com20020.h b/include/linux/com20020.h
index 5dcfb944b6ce..6a1ceca61e7f 100644
--- a/include/linux/com20020.h
+++ b/include/linux/com20020.h
@@ -41,6 +41,22 @@ extern const struct net_device_ops com20020_netdev_ops;
 #define BUS_ALIGN  1
 #endif
 
+#define PLX_PCI_MAX_CARDS 1
+
+struct com20020_pci_channel_map {
+	u32 bar;
+	u32 offset;
+	u32 size;               /* 0x00 - auto, e.g. length of entire bar */
+};
+
+struct com20020_pci_card_info {
+	const char *name;
+	int devcount;
+
+	struct com20020_pci_channel_map chan_map_tbl[PLX_PCI_MAX_CARDS];
+
+	unsigned int flags;
+};
 
 #define _INTMASK  (ioaddr+BUS_ALIGN*0)	/* writable */
 #define _STATUS   (ioaddr+BUS_ALIGN*0)	/* readable */
-- 
cgit v1.2.3


From c51da42a6346c0c747e70a4f5ae873da1150a784 Mon Sep 17 00:00:00 2001
From: Michael Grzeschik <m.grzeschik@pengutronix.de>
Date: Mon, 29 Sep 2014 11:55:37 +0200
Subject: ARCNET: add support for multi interfaces on com20020

The com20020-pci driver is currently designed to instance
one netdev with one pci device. This patch adds support to
instance many cards with one pci device, depending on the device
data in the private data.

Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/arcnet/com20020-pci.c | 149 ++++++++++++++++++++++++--------------
 drivers/net/arcnet/com20020_cs.c  |   4 -
 include/linux/com20020.h          |  15 +++-
 3 files changed, 109 insertions(+), 59 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/arcnet/com20020-pci.c b/drivers/net/arcnet/com20020-pci.c
index f9e55527739d..fe87576bae3c 100644
--- a/drivers/net/arcnet/com20020-pci.c
+++ b/drivers/net/arcnet/com20020-pci.c
@@ -38,6 +38,7 @@
 #include <linux/pci.h>
 #include <linux/arcdevice.h>
 #include <linux/com20020.h>
+#include <linux/list.h>
 
 #include <asm/io.h>
 
@@ -61,85 +62,125 @@ module_param(clockp, int, 0);
 module_param(clockm, int, 0);
 MODULE_LICENSE("GPL");
 
+static void com20020pci_remove(struct pci_dev *pdev);
+
 static int com20020pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
-	struct com20020_pci_channel_map *cm;
 	struct com20020_pci_card_info *ci;
 	struct net_device *dev;
 	struct arcnet_local *lp;
-	int ioaddr, err;
+	struct com20020_priv *priv;
+	int i, ioaddr, ret;
+	struct resource *r;
 
 	if (pci_enable_device(pdev))
 		return -EIO;
-	dev = alloc_arcdev(device);
-	if (!dev)
-		return -ENOMEM;
-
-	dev->netdev_ops = &com20020_netdev_ops;
 
+	priv = devm_kzalloc(&pdev->dev, sizeof(struct com20020_priv),
+			    GFP_KERNEL);
 	ci = (struct com20020_pci_card_info *)id->driver_data;
+	priv->ci = ci;
 
-	lp = netdev_priv(dev);
+	INIT_LIST_HEAD(&priv->list_dev);
 
-	pci_set_drvdata(pdev, dev);
 
-	cm = &ci->chan_map_tbl[0];
-	BUGMSG(D_NORMAL, "%s Controls\n", ci->name);
-	ioaddr = pci_resource_start(pdev, cm->bar);
+	for (i = 0; i < ci->devcount; i++) {
+		struct com20020_pci_channel_map *cm = &ci->chan_map_tbl[i];
+		struct com20020_dev *card;
 
-	if (!request_region(ioaddr, ARCNET_TOTAL_SIZE, "com20020-pci")) {
-		BUGMSG(D_INIT, "IO region %xh-%xh already allocated.\n",
-		       ioaddr, ioaddr + ARCNET_TOTAL_SIZE - 1);
-		err = -EBUSY;
-		goto out_dev;
-	}
+		dev = alloc_arcdev(device);
+		if (!dev) {
+			ret = -ENOMEM;
+			goto out_port;
+		}
 
-	// Dummy access after Reset
-	// ARCNET controller needs this access to detect bustype
-	outb(0x00,ioaddr+1);
-	inb(ioaddr+1);
-
-	dev->base_addr = ioaddr;
-	dev->irq = pdev->irq;
-	dev->dev_addr[0] = node;
-	lp->card_name = "PCI COM20020";
-	lp->card_flags = ci->flags;
-	lp->backplane = backplane;
-	lp->clockp = clockp & 7;
-	lp->clockm = clockm & 3;
-	lp->timeout = timeout;
-	lp->hw.owner = THIS_MODULE;
-
-	if (ASTATUS() == 0xFF) {
-		BUGMSG(D_NORMAL, "IO address %Xh was reported by PCI BIOS, "
-		       "but seems empty!\n", ioaddr);
-		err = -EIO;
-		goto out_port;
-	}
-	if (com20020_check(dev)) {
-		err = -EIO;
-		goto out_port;
+		dev->netdev_ops = &com20020_netdev_ops;
+
+		lp = netdev_priv(dev);
+
+		BUGMSG(D_NORMAL, "%s Controls\n", ci->name);
+		ioaddr = pci_resource_start(pdev, cm->bar) + cm->offset;
+
+		r = devm_request_region(&pdev->dev, ioaddr, cm->size,
+					"com20020-pci");
+		if (!r) {
+			pr_err("IO region %xh-%xh already allocated.\n",
+			       ioaddr, ioaddr + cm->size - 1);
+			ret = -EBUSY;
+			goto out_port;
+		}
+
+		/* Dummy access after Reset
+		 * ARCNET controller needs
+		 * this access to detect bustype
+		 */
+		outb(0x00, ioaddr + 1);
+		inb(ioaddr + 1);
+
+		dev->base_addr = ioaddr;
+		dev->dev_addr[0] = node;
+		dev->irq = pdev->irq;
+		lp->card_name = "PCI COM20020";
+		lp->card_flags = ci->flags;
+		lp->backplane = backplane;
+		lp->clockp = clockp & 7;
+		lp->clockm = clockm & 3;
+		lp->timeout = timeout;
+		lp->hw.owner = THIS_MODULE;
+
+		if (ASTATUS() == 0xFF) {
+			pr_err("IO address %Xh is empty!\n", ioaddr);
+			ret = -EIO;
+			goto out_port;
+		}
+		if (com20020_check(dev)) {
+			ret = -EIO;
+			goto out_port;
+		}
+
+		card = devm_kzalloc(&pdev->dev, sizeof(struct com20020_dev),
+				    GFP_KERNEL);
+		if (!card) {
+			pr_err("%s out of memory!\n", __func__);
+			return -ENOMEM;
+		}
+
+		card->index = i;
+		card->pci_priv = priv;
+		card->dev = dev;
+
+		dev_set_drvdata(&dev->dev, card);
+
+		ret = com20020_found(dev, IRQF_SHARED);
+		if (ret)
+			goto out_port;
+
+		list_add(&card->list, &priv->list_dev);
 	}
 
-	if ((err = com20020_found(dev, IRQF_SHARED)) != 0)
-	        goto out_port;
+	pci_set_drvdata(pdev, priv);
 
 	return 0;
 
 out_port:
-	release_region(ioaddr, ARCNET_TOTAL_SIZE);
-out_dev:
-	free_netdev(dev);
-	return err;
+	com20020pci_remove(pdev);
+	return ret;
 }
 
 static void com20020pci_remove(struct pci_dev *pdev)
 {
-	struct net_device *dev = pci_get_drvdata(pdev);
-	unregister_netdev(dev);
-	free_irq(dev->irq, dev);
-	release_region(dev->base_addr, ARCNET_TOTAL_SIZE);
-	free_netdev(dev);
+	struct com20020_dev *card, *tmpcard;
+	struct com20020_priv *priv;
+
+	priv = pci_get_drvdata(pdev);
+
+	list_for_each_entry_safe(card, tmpcard, &priv->list_dev, list) {
+		struct net_device *dev = card->dev;
+
+		unregister_netdev(dev);
+		free_irq(dev->irq, dev);
+		free_netdev(dev);
+	}
 }
 
 static struct com20020_pci_card_info card_info_10mbit = {
diff --git a/drivers/net/arcnet/com20020_cs.c b/drivers/net/arcnet/com20020_cs.c
index 1a790a20210d..057d9582132a 100644
--- a/drivers/net/arcnet/com20020_cs.c
+++ b/drivers/net/arcnet/com20020_cs.c
@@ -112,10 +112,6 @@ static void com20020_detach(struct pcmcia_device *p_dev);
 
 /*====================================================================*/
 
-struct com20020_dev {
-    struct net_device       *dev;
-};
-
 static int com20020_probe(struct pcmcia_device *p_dev)
 {
     struct com20020_dev *info;
diff --git a/include/linux/com20020.h b/include/linux/com20020.h
index 6a1ceca61e7f..85898995b234 100644
--- a/include/linux/com20020.h
+++ b/include/linux/com20020.h
@@ -41,7 +41,7 @@ extern const struct net_device_ops com20020_netdev_ops;
 #define BUS_ALIGN  1
 #endif
 
-#define PLX_PCI_MAX_CARDS 1
+#define PLX_PCI_MAX_CARDS 2
 
 struct com20020_pci_channel_map {
 	u32 bar;
@@ -58,6 +58,19 @@ struct com20020_pci_card_info {
 	unsigned int flags;
 };
 
+struct com20020_priv {
+	struct com20020_pci_card_info *ci;
+	struct list_head list_dev;
+};
+
+struct com20020_dev {
+	struct list_head list;
+	struct net_device *dev;
+
+	struct com20020_priv *pci_priv;
+	int index;
+};
+
 #define _INTMASK  (ioaddr+BUS_ALIGN*0)	/* writable */
 #define _STATUS   (ioaddr+BUS_ALIGN*0)	/* readable */
 #define _COMMAND  (ioaddr+BUS_ALIGN*1)	/* standard arcnet commands */
-- 
cgit v1.2.3


From 79cf79abce71eb7dbc40e2f3121048ca5405cb47 Mon Sep 17 00:00:00 2001
From: Michael Braun <michael-dev@fami-braun.de>
Date: Thu, 25 Sep 2014 16:31:08 +0200
Subject: macvlan: add source mode

This patch adds a new mode of operation to macvlan, called "source".
It allows one to set a list of allowed mac address, which is used
to match against source mac address from received frames on underlying
interface.
This enables creating mac based VLAN associations, instead of standard
port or tag based. The feature is useful to deploy 802.1x mac based
behavior, where drivers of underlying interfaces doesn't allows that.

Configuration is done through the netlink interface using e.g.:
 ip link add link eth0 name macvlan0 type macvlan mode source
 ip link add link eth0 name macvlan1 type macvlan mode source
 ip link set link dev macvlan0 type macvlan macaddr add 00:11:11:11:11:11
 ip link set link dev macvlan0 type macvlan macaddr add 00:22:22:22:22:22
 ip link set link dev macvlan0 type macvlan macaddr add 00:33:33:33:33:33
 ip link set link dev macvlan1 type macvlan macaddr add 00:33:33:33:33:33
 ip link set link dev macvlan1 type macvlan macaddr add 00:44:44:44:44:44

This allows clients with MAC addresses 00:11:11:11:11:11,
00:22:22:22:22:22 to be part of only VLAN associated with macvlan0
interface. Clients with MAC addresses 00:44:44:44:44:44 with only VLAN
associated with macvlan1 interface. And client with MAC address
00:33:33:33:33:33 to be associated with both VLANs.

Based on work of Stefan Gula <steweg@gmail.com>

v8: last version of Stefan Gula for Kernel 3.2.1
v9: rework onto linux-next 2014-03-12 by Michael Braun
    add MACADDR_SET command, enable to configure mac for source mode
    while creating interface
v10:
  - reduce indention level
  - rename source_list to source_entry
  - use aligned 64bit ether address
  - use hash_64 instead of addr[5]
v11:
  - rebase for 3.14 / linux-next 20.04.2014
v12
  - rebase for linux-next 2014-09-25

Signed-off-by: Michael Braun <michael-dev@fami-braun.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macvlan.c        | 304 ++++++++++++++++++++++++++++++++++++++++++-
 include/linux/if_macvlan.h   |   1 +
 include/uapi/linux/if_link.h |  12 ++
 3 files changed, 314 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 726edabff26b..e8a453f1b458 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -35,7 +35,8 @@
 #include <net/xfrm.h>
 #include <linux/netpoll.h>
 
-#define MACVLAN_HASH_SIZE	(1 << BITS_PER_BYTE)
+#define MACVLAN_HASH_BITS	8
+#define MACVLAN_HASH_SIZE	(1<<MACVLAN_HASH_BITS)
 #define MACVLAN_BC_QUEUE_LEN	1000
 
 struct macvlan_port {
@@ -47,6 +48,14 @@ struct macvlan_port {
 	struct work_struct	bc_work;
 	bool 			passthru;
 	int			count;
+	struct hlist_head	vlan_source_hash[MACVLAN_HASH_SIZE];
+};
+
+struct macvlan_source_entry {
+	struct hlist_node	hlist;
+	struct macvlan_dev	*vlan;
+	unsigned char		addr[6+2] __aligned(sizeof(u16));
+	struct rcu_head		rcu;
 };
 
 struct macvlan_skb_cb {
@@ -57,6 +66,20 @@ struct macvlan_skb_cb {
 
 static void macvlan_port_destroy(struct net_device *dev);
 
+/* Hash Ethernet address */
+static u32 macvlan_eth_hash(const unsigned char *addr)
+{
+	u64 value = get_unaligned((u64 *)addr);
+
+	/* only want 6 bytes */
+#ifdef __BIG_ENDIAN
+	value >>= 16;
+#else
+	value <<= 16;
+#endif
+	return hash_64(value, MACVLAN_HASH_BITS);
+}
+
 static struct macvlan_port *macvlan_port_get_rcu(const struct net_device *dev)
 {
 	return rcu_dereference(dev->rx_handler_data);
@@ -73,20 +96,68 @@ static struct macvlan_dev *macvlan_hash_lookup(const struct macvlan_port *port,
 					       const unsigned char *addr)
 {
 	struct macvlan_dev *vlan;
+	u32 idx = macvlan_eth_hash(addr);
 
-	hlist_for_each_entry_rcu(vlan, &port->vlan_hash[addr[5]], hlist) {
+	hlist_for_each_entry_rcu(vlan, &port->vlan_hash[idx], hlist) {
 		if (ether_addr_equal_64bits(vlan->dev->dev_addr, addr))
 			return vlan;
 	}
 	return NULL;
 }
 
+static struct macvlan_source_entry *macvlan_hash_lookup_source(
+	const struct macvlan_dev *vlan,
+	const unsigned char *addr)
+{
+	struct macvlan_source_entry *entry;
+	u32 idx = macvlan_eth_hash(addr);
+	struct hlist_head *h = &vlan->port->vlan_source_hash[idx];
+
+	hlist_for_each_entry_rcu(entry, h, hlist) {
+		if (ether_addr_equal_64bits(entry->addr, addr) &&
+		    entry->vlan == vlan)
+			return entry;
+	}
+	return NULL;
+}
+
+static int macvlan_hash_add_source(struct macvlan_dev *vlan,
+				   const unsigned char *addr)
+{
+	struct macvlan_port *port = vlan->port;
+	struct macvlan_source_entry *entry;
+	struct hlist_head *h;
+
+	entry = macvlan_hash_lookup_source(vlan, addr);
+	if (entry)
+		return 0;
+
+	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry)
+		return -ENOMEM;
+
+	ether_addr_copy(entry->addr, addr);
+	entry->vlan = vlan;
+	h = &port->vlan_source_hash[macvlan_eth_hash(addr)];
+	hlist_add_head_rcu(&entry->hlist, h);
+	vlan->macaddr_count++;
+
+	return 0;
+}
+
 static void macvlan_hash_add(struct macvlan_dev *vlan)
 {
 	struct macvlan_port *port = vlan->port;
 	const unsigned char *addr = vlan->dev->dev_addr;
+	u32 idx = macvlan_eth_hash(addr);
 
-	hlist_add_head_rcu(&vlan->hlist, &port->vlan_hash[addr[5]]);
+	hlist_add_head_rcu(&vlan->hlist, &port->vlan_hash[idx]);
+}
+
+static void macvlan_hash_del_source(struct macvlan_source_entry *entry)
+{
+	hlist_del_rcu(&entry->hlist);
+	kfree_rcu(entry, rcu);
 }
 
 static void macvlan_hash_del(struct macvlan_dev *vlan, bool sync)
@@ -267,6 +338,65 @@ err:
 	atomic_long_inc(&skb->dev->rx_dropped);
 }
 
+static void macvlan_flush_sources(struct macvlan_port *port,
+				  struct macvlan_dev *vlan)
+{
+	int i;
+
+	for (i = 0; i < MACVLAN_HASH_SIZE; i++) {
+		struct hlist_node *h, *n;
+
+		hlist_for_each_safe(h, n, &port->vlan_source_hash[i]) {
+			struct macvlan_source_entry *entry;
+
+			entry = hlist_entry(h, struct macvlan_source_entry,
+					    hlist);
+			if (entry->vlan == vlan)
+				macvlan_hash_del_source(entry);
+		}
+	}
+	vlan->macaddr_count = 0;
+}
+
+static void macvlan_forward_source_one(struct sk_buff *skb,
+				       struct macvlan_dev *vlan)
+{
+	struct sk_buff *nskb;
+	struct net_device *dev;
+	int len;
+	int ret;
+
+	dev = vlan->dev;
+	if (unlikely(!(dev->flags & IFF_UP)))
+		return;
+
+	nskb = skb_clone(skb, GFP_ATOMIC);
+	if (!nskb)
+		return;
+
+	len = nskb->len + ETH_HLEN;
+	nskb->dev = dev;
+	nskb->pkt_type = PACKET_HOST;
+
+	ret = netif_rx(nskb);
+	macvlan_count_rx(vlan, len, ret == NET_RX_SUCCESS, 0);
+}
+
+static void macvlan_forward_source(struct sk_buff *skb,
+				   struct macvlan_port *port,
+				   const unsigned char *addr)
+{
+	struct macvlan_source_entry *entry;
+	u32 idx = macvlan_eth_hash(addr);
+	struct hlist_head *h = &port->vlan_source_hash[idx];
+
+	hlist_for_each_entry_rcu(entry, h, hlist) {
+		if (ether_addr_equal_64bits(entry->addr, addr))
+			if (entry->vlan->dev->flags & IFF_UP)
+				macvlan_forward_source_one(skb, entry->vlan);
+	}
+}
+
 /* called under rcu_read_lock() from netif_receive_skb */
 static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb)
 {
@@ -285,6 +415,7 @@ static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb)
 		if (!skb)
 			return RX_HANDLER_CONSUMED;
 		eth = eth_hdr(skb);
+		macvlan_forward_source(skb, port, eth->h_source);
 		src = macvlan_hash_lookup(port, eth->h_source);
 		if (src && src->mode != MACVLAN_MODE_VEPA &&
 		    src->mode != MACVLAN_MODE_BRIDGE) {
@@ -301,6 +432,7 @@ static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb)
 		return RX_HANDLER_PASS;
 	}
 
+	macvlan_forward_source(skb, port, eth->h_source);
 	if (port->passthru)
 		vlan = list_first_or_null_rcu(&port->vlans,
 					      struct macvlan_dev, list);
@@ -667,6 +799,7 @@ static void macvlan_uninit(struct net_device *dev)
 
 	free_percpu(vlan->pcpu_stats);
 
+	macvlan_flush_sources(port, vlan);
 	port->count -= 1;
 	if (!port->count)
 		macvlan_port_destroy(port->dev);
@@ -925,6 +1058,8 @@ static int macvlan_port_create(struct net_device *dev)
 	INIT_LIST_HEAD(&port->vlans);
 	for (i = 0; i < MACVLAN_HASH_SIZE; i++)
 		INIT_HLIST_HEAD(&port->vlan_hash[i]);
+	for (i = 0; i < MACVLAN_HASH_SIZE; i++)
+		INIT_HLIST_HEAD(&port->vlan_source_hash[i]);
 
 	skb_queue_head_init(&port->bc_queue);
 	INIT_WORK(&port->bc_work, macvlan_process_broadcast);
@@ -966,11 +1101,102 @@ static int macvlan_validate(struct nlattr *tb[], struct nlattr *data[])
 		case MACVLAN_MODE_VEPA:
 		case MACVLAN_MODE_BRIDGE:
 		case MACVLAN_MODE_PASSTHRU:
+		case MACVLAN_MODE_SOURCE:
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
+
+	if (data && data[IFLA_MACVLAN_MACADDR_MODE]) {
+		switch (nla_get_u32(data[IFLA_MACVLAN_MACADDR_MODE])) {
+		case MACVLAN_MACADDR_ADD:
+		case MACVLAN_MACADDR_DEL:
+		case MACVLAN_MACADDR_FLUSH:
+		case MACVLAN_MACADDR_SET:
 			break;
 		default:
 			return -EINVAL;
 		}
 	}
+
+	if (data && data[IFLA_MACVLAN_MACADDR]) {
+		if (nla_len(data[IFLA_MACVLAN_MACADDR]) != ETH_ALEN)
+			return -EINVAL;
+
+		if (!is_valid_ether_addr(nla_data(data[IFLA_MACVLAN_MACADDR])))
+			return -EADDRNOTAVAIL;
+	}
+
+	if (data && data[IFLA_MACVLAN_MACADDR_COUNT])
+		return -EINVAL;
+
+	return 0;
+}
+
+/**
+ * reconfigure list of remote source mac address
+ * (only for macvlan devices in source mode)
+ * Note regarding alignment: all netlink data is aligned to 4 Byte, which
+ * suffices for both ether_addr_copy and ether_addr_equal_64bits usage.
+ */
+static int macvlan_changelink_sources(struct macvlan_dev *vlan, u32 mode,
+				      struct nlattr *data[])
+{
+	char *addr = NULL;
+	int ret, rem, len;
+	struct nlattr *nla, *head;
+	struct macvlan_source_entry *entry;
+
+	if (data[IFLA_MACVLAN_MACADDR])
+		addr = nla_data(data[IFLA_MACVLAN_MACADDR]);
+
+	if (mode == MACVLAN_MACADDR_ADD) {
+		if (!addr)
+			return -EINVAL;
+
+		return macvlan_hash_add_source(vlan, addr);
+
+	} else if (mode == MACVLAN_MACADDR_DEL) {
+		if (!addr)
+			return -EINVAL;
+
+		entry = macvlan_hash_lookup_source(vlan, addr);
+		if (entry) {
+			macvlan_hash_del_source(entry);
+			vlan->macaddr_count--;
+		}
+	} else if (mode == MACVLAN_MACADDR_FLUSH) {
+		macvlan_flush_sources(vlan->port, vlan);
+	} else if (mode == MACVLAN_MACADDR_SET) {
+		macvlan_flush_sources(vlan->port, vlan);
+
+		if (addr) {
+			ret = macvlan_hash_add_source(vlan, addr);
+			if (ret)
+				return ret;
+		}
+
+		if (!data || !data[IFLA_MACVLAN_MACADDR_DATA])
+			return 0;
+
+		head = nla_data(data[IFLA_MACVLAN_MACADDR_DATA]);
+		len = nla_len(data[IFLA_MACVLAN_MACADDR_DATA]);
+
+		nla_for_each_attr(nla, head, len, rem) {
+			if (nla_type(nla) != IFLA_MACVLAN_MACADDR ||
+			    nla_len(nla) != ETH_ALEN)
+				continue;
+
+			addr = nla_data(nla);
+			ret = macvlan_hash_add_source(vlan, addr);
+			if (ret)
+				return ret;
+		}
+	} else {
+		return -EINVAL;
+	}
+
 	return 0;
 }
 
@@ -981,6 +1207,7 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
 	struct macvlan_port *port;
 	struct net_device *lowerdev;
 	int err;
+	int macmode;
 
 	if (!tb[IFLA_LINK])
 		return -EINVAL;
@@ -1034,6 +1261,15 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
 		eth_hw_addr_inherit(dev, lowerdev);
 	}
 
+	if (data && data[IFLA_MACVLAN_MACADDR_MODE]) {
+		if (vlan->mode != MACVLAN_MODE_SOURCE)
+			return -EINVAL;
+		macmode = nla_get_u32(data[IFLA_MACVLAN_MACADDR_MODE]);
+		err = macvlan_changelink_sources(vlan, macmode, data);
+		if (err)
+			return err;
+	}
+
 	port->count += 1;
 	err = register_netdevice(dev);
 	if (err < 0)
@@ -1070,6 +1306,8 @@ void macvlan_dellink(struct net_device *dev, struct list_head *head)
 {
 	struct macvlan_dev *vlan = netdev_priv(dev);
 
+	if (vlan->mode == MACVLAN_MODE_SOURCE)
+		macvlan_flush_sources(vlan->port, vlan);
 	list_del_rcu(&vlan->list);
 	unregister_netdevice_queue(dev, head);
 	netdev_upper_dev_unlink(vlan->lowerdev, dev);
@@ -1082,6 +1320,8 @@ static int macvlan_changelink(struct net_device *dev,
 	struct macvlan_dev *vlan = netdev_priv(dev);
 	enum macvlan_mode mode;
 	bool set_mode = false;
+	enum macvlan_macaddr_mode macmode;
+	int ret;
 
 	/* Validate mode, but don't set yet: setting flags may fail. */
 	if (data && data[IFLA_MACVLAN_MODE]) {
@@ -1091,6 +1331,9 @@ static int macvlan_changelink(struct net_device *dev,
 		if ((mode == MACVLAN_MODE_PASSTHRU) !=
 		    (vlan->mode == MACVLAN_MODE_PASSTHRU))
 			return -EINVAL;
+		if (vlan->mode == MACVLAN_MODE_SOURCE &&
+		    vlan->mode != mode)
+			macvlan_flush_sources(vlan->port, vlan);
 	}
 
 	if (data && data[IFLA_MACVLAN_FLAGS]) {
@@ -1110,26 +1353,77 @@ static int macvlan_changelink(struct net_device *dev,
 	}
 	if (set_mode)
 		vlan->mode = mode;
+	if (data && data[IFLA_MACVLAN_MACADDR_MODE]) {
+		if (vlan->mode != MACVLAN_MODE_SOURCE)
+			return -EINVAL;
+		macmode = nla_get_u32(data[IFLA_MACVLAN_MACADDR_MODE]);
+		ret = macvlan_changelink_sources(vlan, macmode, data);
+		if (ret)
+			return ret;
+	}
 	return 0;
 }
 
+static size_t macvlan_get_size_mac(const struct macvlan_dev *vlan)
+{
+	if (vlan->macaddr_count == 0)
+		return 0;
+	return nla_total_size(0) /* IFLA_MACVLAN_MACADDR_DATA */
+		+ vlan->macaddr_count * nla_total_size(sizeof(u8) * ETH_ALEN);
+}
+
 static size_t macvlan_get_size(const struct net_device *dev)
 {
+	struct macvlan_dev *vlan = netdev_priv(dev);
+
 	return (0
 		+ nla_total_size(4) /* IFLA_MACVLAN_MODE */
 		+ nla_total_size(2) /* IFLA_MACVLAN_FLAGS */
+		+ nla_total_size(4) /* IFLA_MACVLAN_MACADDR_COUNT */
+		+ macvlan_get_size_mac(vlan) /* IFLA_MACVLAN_MACADDR */
 		);
 }
 
+static int macvlan_fill_info_macaddr(struct sk_buff *skb,
+				     const struct macvlan_dev *vlan,
+				     const int i)
+{
+	struct hlist_head *h = &vlan->port->vlan_source_hash[i];
+	struct macvlan_source_entry *entry;
+
+	hlist_for_each_entry_rcu(entry, h, hlist) {
+		if (entry->vlan != vlan)
+			continue;
+		if (nla_put(skb, IFLA_MACVLAN_MACADDR, ETH_ALEN, entry->addr))
+			return 1;
+	}
+	return 0;
+}
+
 static int macvlan_fill_info(struct sk_buff *skb,
 				const struct net_device *dev)
 {
 	struct macvlan_dev *vlan = netdev_priv(dev);
+	int i;
+	struct nlattr *nest;
 
 	if (nla_put_u32(skb, IFLA_MACVLAN_MODE, vlan->mode))
 		goto nla_put_failure;
 	if (nla_put_u16(skb, IFLA_MACVLAN_FLAGS, vlan->flags))
 		goto nla_put_failure;
+	if (nla_put_u32(skb, IFLA_MACVLAN_MACADDR_COUNT, vlan->macaddr_count))
+		goto nla_put_failure;
+	if (vlan->macaddr_count > 0) {
+		nest = nla_nest_start(skb, IFLA_MACVLAN_MACADDR_DATA);
+		if (nest == NULL)
+			goto nla_put_failure;
+
+		for (i = 0; i < MACVLAN_HASH_SIZE; i++) {
+			if (macvlan_fill_info_macaddr(skb, vlan, i))
+				goto nla_put_failure;
+		}
+		nla_nest_end(skb, nest);
+	}
 	return 0;
 
 nla_put_failure:
@@ -1139,6 +1433,10 @@ nla_put_failure:
 static const struct nla_policy macvlan_policy[IFLA_MACVLAN_MAX + 1] = {
 	[IFLA_MACVLAN_MODE]  = { .type = NLA_U32 },
 	[IFLA_MACVLAN_FLAGS] = { .type = NLA_U16 },
+	[IFLA_MACVLAN_MACADDR_MODE] = { .type = NLA_U32 },
+	[IFLA_MACVLAN_MACADDR] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
+	[IFLA_MACVLAN_MACADDR_DATA] = { .type = NLA_NESTED },
+	[IFLA_MACVLAN_MACADDR_COUNT] = { .type = NLA_U32 },
 };
 
 int macvlan_link_register(struct rtnl_link_ops *ops)
diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
index 6b2c7cf352a5..6f6929ea8a0c 100644
--- a/include/linux/if_macvlan.h
+++ b/include/linux/if_macvlan.h
@@ -60,6 +60,7 @@ struct macvlan_dev {
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	struct netpoll		*netpoll;
 #endif
+	unsigned int		macaddr_count;
 };
 
 static inline void macvlan_count_rx(const struct macvlan_dev *vlan,
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index c80f95f6ee78..0bdb77e16875 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -303,6 +303,10 @@ enum {
 	IFLA_MACVLAN_UNSPEC,
 	IFLA_MACVLAN_MODE,
 	IFLA_MACVLAN_FLAGS,
+	IFLA_MACVLAN_MACADDR_MODE,
+	IFLA_MACVLAN_MACADDR,
+	IFLA_MACVLAN_MACADDR_DATA,
+	IFLA_MACVLAN_MACADDR_COUNT,
 	__IFLA_MACVLAN_MAX,
 };
 
@@ -313,6 +317,14 @@ enum macvlan_mode {
 	MACVLAN_MODE_VEPA    = 2, /* talk to other ports through ext bridge */
 	MACVLAN_MODE_BRIDGE  = 4, /* talk to bridge ports directly */
 	MACVLAN_MODE_PASSTHRU = 8,/* take over the underlying device */
+	MACVLAN_MODE_SOURCE  = 16,/* use source MAC address list to assign */
+};
+
+enum macvlan_macaddr_mode {
+	MACVLAN_MACADDR_ADD,
+	MACVLAN_MACADDR_DEL,
+	MACVLAN_MACADDR_FLUSH,
+	MACVLAN_MACADDR_SET,
 };
 
 #define MACVLAN_FLAG_NOPROMISC	1
-- 
cgit v1.2.3


From 4bcfda09936da647b0a3b49d5dcb3c6c6ebb0395 Mon Sep 17 00:00:00 2001
From: "Tan, Raymond" <raymond.tan@intel.com>
Date: Wed, 3 Sep 2014 10:41:38 +0800
Subject: i2c: designware: add support of platform data to set I2C mode

Use the platform data to set the clk_freq when there is no DT configuration
available. The clk_freq in turn will determine the I2C speed mode.

In Quark, there is currently no other configuration mechanism other than
board files.

Signed-off-by: Raymond Tan <raymond.tan@intel.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Reviewed-by: Hock Leong Kweh <hock.leong.kweh@intel.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-designware-platdrv.c  |  6 ++++++
 include/linux/platform_data/i2c-designware.h | 21 +++++++++++++++++++++
 2 files changed, 27 insertions(+)
 create mode 100644 include/linux/platform_data/i2c-designware.h

(limited to 'include/linux')

diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c
index 8193e8eea764..4d6a6b94e2fa 100644
--- a/drivers/i2c/busses/i2c-designware-platdrv.c
+++ b/drivers/i2c/busses/i2c-designware-platdrv.c
@@ -41,6 +41,7 @@
 #include <linux/io.h>
 #include <linux/slab.h>
 #include <linux/acpi.h>
+#include <linux/platform_data/i2c-designware.h>
 #include "i2c-designware-core.h"
 
 static struct i2c_algorithm i2c_dw_algo = {
@@ -122,6 +123,7 @@ static int dw_i2c_probe(struct platform_device *pdev)
 	struct dw_i2c_dev *dev;
 	struct i2c_adapter *adap;
 	struct resource *mem;
+	struct dw_i2c_platform_data *pdata;
 	int irq, r;
 	u32 clk_freq;
 
@@ -182,6 +184,10 @@ static int dw_i2c_probe(struct platform_device *pdev)
 			dev_err(&pdev->dev, "Only 100kHz and 400kHz supported");
 			return -EINVAL;
 		}
+	} else {
+		pdata = dev_get_platdata(&pdev->dev);
+		if (pdata)
+			clk_freq = pdata->i2c_scl_freq;
 	}
 
 	dev->functionality =
diff --git a/include/linux/platform_data/i2c-designware.h b/include/linux/platform_data/i2c-designware.h
new file mode 100644
index 000000000000..7a61fb27c25b
--- /dev/null
+++ b/include/linux/platform_data/i2c-designware.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef I2C_DESIGNWARE_H
+#define I2C_DESIGNWARE_H
+
+struct dw_i2c_platform_data {
+	unsigned int i2c_scl_freq;
+};
+
+#endif
-- 
cgit v1.2.3


From f48c767ce8951e30eb716b8ef69142d21aacbd1d Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Mon, 29 Sep 2014 13:58:47 +0200
Subject: PM / Domains: Move dev_pm_domain_attach|detach() to pm_domain.h

The commit 46420dd73b80 (PM / Domains: Add APIs to attach/detach a PM
domain for a device) started using errno values in pm.h header file.
It also failed to include the header for these, thus it caused
compiler errors.

Instead of including the errno header to pm.h, let's move the functions
to pm_domain.h, since it's a better match.

Fixes: 46420dd73b80 (PM / Domains: Add APIs to attach/detach a PM domain for a device)
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Wolfram Sang <wsa@the-dreams.de>
Acked-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/amba/bus.c          |  1 +
 drivers/base/platform.c     |  1 +
 drivers/i2c/i2c-core.c      |  1 +
 drivers/mmc/core/sdio_bus.c |  1 +
 drivers/spi/spi.c           |  1 +
 include/linux/pm.h          | 11 -----------
 include/linux/pm_domain.h   | 11 +++++++++++
 7 files changed, 16 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c
index 8f5239377f91..47bbdc1b5be3 100644
--- a/drivers/amba/bus.c
+++ b/drivers/amba/bus.c
@@ -15,6 +15,7 @@
 #include <linux/io.h>
 #include <linux/pm.h>
 #include <linux/pm_runtime.h>
+#include <linux/pm_domain.h>
 #include <linux/amba/bus.h>
 #include <linux/sizes.h>
 
diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 904be3dc0908..b2afc29403f9 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -21,6 +21,7 @@
 #include <linux/err.h>
 #include <linux/slab.h>
 #include <linux/pm_runtime.h>
+#include <linux/pm_domain.h>
 #include <linux/idr.h>
 #include <linux/acpi.h>
 #include <linux/clk/clk-conf.h>
diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index 3cd8f11f1b5f..e61a6c5c3372 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -48,6 +48,7 @@
 #include <linux/irqflags.h>
 #include <linux/rwsem.h>
 #include <linux/pm_runtime.h>
+#include <linux/pm_domain.h>
 #include <linux/acpi.h>
 #include <linux/jump_label.h>
 #include <asm/uaccess.h>
diff --git a/drivers/mmc/core/sdio_bus.c b/drivers/mmc/core/sdio_bus.c
index 1df0fc63c17c..65cf7a7e05ea 100644
--- a/drivers/mmc/core/sdio_bus.c
+++ b/drivers/mmc/core/sdio_bus.c
@@ -16,6 +16,7 @@
 #include <linux/export.h>
 #include <linux/slab.h>
 #include <linux/pm_runtime.h>
+#include <linux/pm_domain.h>
 #include <linux/acpi.h>
 
 #include <linux/mmc/card.h>
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 72a0beb1fafa..3907f1493e7d 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -35,6 +35,7 @@
 #include <linux/spi/spi.h>
 #include <linux/of_gpio.h>
 #include <linux/pm_runtime.h>
+#include <linux/pm_domain.h>
 #include <linux/export.h>
 #include <linux/sched/rt.h>
 #include <linux/delay.h>
diff --git a/include/linux/pm.h b/include/linux/pm.h
index c4cbf485a5d6..1022ba1eb4de 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -622,17 +622,6 @@ struct dev_pm_domain {
 	void (*detach)(struct device *dev, bool power_off);
 };
 
-#ifdef CONFIG_PM
-extern int dev_pm_domain_attach(struct device *dev, bool power_on);
-extern void dev_pm_domain_detach(struct device *dev, bool power_off);
-#else
-static inline int dev_pm_domain_attach(struct device *dev, bool power_on)
-{
-	return -ENODEV;
-}
-static inline void dev_pm_domain_detach(struct device *dev, bool power_off) {}
-#endif
-
 /*
  * The PM_EVENT_ messages are also used by drivers implementing the legacy
  * suspend framework, based on the ->suspend() and ->resume() callbacks common
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index ed4f4a79c528..900474317afc 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -301,4 +301,15 @@ static inline int of_genpd_add_provider_onecell(struct device_node *np,
 	return __of_genpd_add_provider(np, __of_genpd_xlate_onecell, data);
 }
 
+#ifdef CONFIG_PM
+extern int dev_pm_domain_attach(struct device *dev, bool power_on);
+extern void dev_pm_domain_detach(struct device *dev, bool power_off);
+#else
+static inline int dev_pm_domain_attach(struct device *dev, bool power_on)
+{
+	return -ENODEV;
+}
+static inline void dev_pm_domain_detach(struct device *dev, bool power_off) {}
+#endif
+
 #endif /* _LINUX_PM_DOMAIN_H */
-- 
cgit v1.2.3


From baeb7ef34952f523a129e5d1369aa42ecbe7f8c9 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Mon, 29 Sep 2014 10:21:03 +0200
Subject: tty: serial: 8250: use 32bit variable for rpm_tx_active

The kbuild test robot wrote me:
|  make.cross ARCH=powerpc
|>> ERROR: ".__xchg_called_with_bad_pointer" [drivers/tty/serial/8250/8250.ko] undefined!

The generic implementation of xchg() on arm and x86 works for variables of
size one bye (char). According to the report powerpc does not support
xchg() for one byte sized variables and looking further it seems also to
be the same case for sparc and tile (or for 10 out of 26 architectures
which provide a custom implementation).
For that reason I increase the size of the variable from one to four
bytes to get it work on powerpc (and the others).

Reported-By: kbuild test robot <fengguang.wu@intel.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/serial_8250.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h
index c267412a3ef4..3df10d5f154b 100644
--- a/include/linux/serial_8250.h
+++ b/include/linux/serial_8250.h
@@ -84,7 +84,7 @@ struct uart_8250_port {
 	unsigned char		mcr_mask;	/* mask of user bits */
 	unsigned char		mcr_force;	/* mask of forced bits */
 	unsigned char		cur_iotype;	/* Running I/O type */
-	unsigned char		rpm_tx_active;
+	unsigned int		rpm_tx_active;
 
 	/*
 	 * Some bits in registers are cleared on a read, so they must
-- 
cgit v1.2.3


From d8e0a86f9713689e35dc14f7184e85a13a2a9f4e Mon Sep 17 00:00:00 2001
From: Doug Anderson <dianders@chromium.org>
Date: Mon, 23 Jun 2014 14:20:06 -0700
Subject: i2c: cros_ec: Remove EC_I2C_FLAG_10BIT

In <https://lkml.org/lkml/2014/6/10/265> pointed out that the 10-bit
flag in the cros_ec_tunnel was useless.  It went into a 16-bit flags
field but was defined at (1 << 16).

Since we have no 10-bit i2c devices on the other side of the tunnel on
any known devices this was never a problem.  Until we do it makes
sense to remove this code.  On the EC side the code to handle this
flag was removed in <https://chromium-review.googlesource.com/204162>.

Reported-by: Dave Jones <davej@redhat.com>
Signed-off-by: Doug Anderson <dianders@chromium.org>
Reviewed-by: Simon Glass <sjg@chromium.org>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-cros-ec-tunnel.c | 6 ++++--
 include/linux/mfd/cros_ec_commands.h    | 3 ---
 2 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/i2c/busses/i2c-cros-ec-tunnel.c b/drivers/i2c/busses/i2c-cros-ec-tunnel.c
index 0403ec1dc3b5..fc89c13b1632 100644
--- a/drivers/i2c/busses/i2c-cros-ec-tunnel.c
+++ b/drivers/i2c/busses/i2c-cros-ec-tunnel.c
@@ -94,7 +94,7 @@ static int ec_i2c_construct_message(u8 *buf, const struct i2c_msg i2c_msgs[],
 		msg->addr_flags = i2c_msg->addr;
 
 		if (i2c_msg->flags & I2C_M_TEN)
-			msg->addr_flags |= EC_I2C_FLAG_10BIT;
+			return -EINVAL;
 
 		if (i2c_msg->flags & I2C_M_RD) {
 			msg->addr_flags |= EC_I2C_FLAG_READ;
@@ -218,7 +218,9 @@ static int ec_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg i2c_msgs[],
 		}
 	}
 
-	ec_i2c_construct_message(request, i2c_msgs, num, bus_num);
+	result = ec_i2c_construct_message(request, i2c_msgs, num, bus_num);
+	if (result)
+		goto exit;
 
 	msg.version = 0;
 	msg.command = EC_CMD_I2C_PASSTHRU;
diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h
index 7853a6410d14..a49cd41feea7 100644
--- a/include/linux/mfd/cros_ec_commands.h
+++ b/include/linux/mfd/cros_ec_commands.h
@@ -1928,9 +1928,6 @@ struct ec_response_power_info {
 
 #define EC_CMD_I2C_PASSTHRU 0x9e
 
-/* Slave address is 10 (not 7) bit */
-#define EC_I2C_FLAG_10BIT	(1 << 16)
-
 /* Read data; if not present, message is a write */
 #define EC_I2C_FLAG_READ	(1 << 15)
 
-- 
cgit v1.2.3


From 41f8bba7f5552d033583777dede2df7c36e7853d Mon Sep 17 00:00:00 2001
From: Liviu Dudau <Liviu.Dudau@arm.com>
Date: Mon, 29 Sep 2014 15:29:21 +0100
Subject: of/pci: Add pci_register_io_range() and pci_pio_to_address()

Some architectures do not have a simple view of the PCI I/O space and
instead use a range of CPU addresses that map to bus addresses.  For some
architectures these ranges will be expressed by OF bindings in a device
tree file.

This patch introduces a pci_register_io_range() helper function with a
generic implementation that can be used by such architectures to keep track
of the I/O ranges described by the PCI bindings.  If the PCI_IOBASE macro
is not defined, that signals lack of support for PCI and we return an
error.

In order to retrieve the CPU address associated with an I/O port, a new
helper function pci_pio_to_address() is introduced.  This will search in
the list of ranges registered with pci_register_io_range() and return the
CPU address that corresponds to the given port.

[arnd: add dummy !CONFIG_OF pci_pio_to_address() to fix build errors]
Signed-off-by: Liviu Dudau <Liviu.Dudau@arm.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Rob Herring <robh@kernel.org>
CC: Grant Likely <grant.likely@linaro.org>
---
 drivers/of/address.c       | 109 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/of_address.h |   7 +++
 2 files changed, 116 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/of/address.c b/drivers/of/address.c
index e3718250d66e..758d4f04d4aa 100644
--- a/drivers/of/address.c
+++ b/drivers/of/address.c
@@ -5,6 +5,8 @@
 #include <linux/module.h>
 #include <linux/of_address.h>
 #include <linux/pci_regs.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
 #include <linux/string.h>
 
 /* Max address size we deal with */
@@ -601,12 +603,119 @@ const __be32 *of_get_address(struct device_node *dev, int index, u64 *size,
 }
 EXPORT_SYMBOL(of_get_address);
 
+#ifdef PCI_IOBASE
+struct io_range {
+	struct list_head list;
+	phys_addr_t start;
+	resource_size_t size;
+};
+
+static LIST_HEAD(io_range_list);
+static DEFINE_SPINLOCK(io_range_lock);
+#endif
+
+/*
+ * Record the PCI IO range (expressed as CPU physical address + size).
+ * Return a negative value if an error has occured, zero otherwise
+ */
+int __weak pci_register_io_range(phys_addr_t addr, resource_size_t size)
+{
+	int err = 0;
+
+#ifdef PCI_IOBASE
+	struct io_range *range;
+	resource_size_t allocated_size = 0;
+
+	/* check if the range hasn't been previously recorded */
+	spin_lock(&io_range_lock);
+	list_for_each_entry(range, &io_range_list, list) {
+		if (addr >= range->start && addr + size <= range->start + size) {
+			/* range already registered, bail out */
+			goto end_register;
+		}
+		allocated_size += range->size;
+	}
+
+	/* range not registed yet, check for available space */
+	if (allocated_size + size - 1 > IO_SPACE_LIMIT) {
+		/* if it's too big check if 64K space can be reserved */
+		if (allocated_size + SZ_64K - 1 > IO_SPACE_LIMIT) {
+			err = -E2BIG;
+			goto end_register;
+		}
+
+		size = SZ_64K;
+		pr_warn("Requested IO range too big, new size set to 64K\n");
+	}
+
+	/* add the range to the list */
+	range = kzalloc(sizeof(*range), GFP_KERNEL);
+	if (!range) {
+		err = -ENOMEM;
+		goto end_register;
+	}
+
+	range->start = addr;
+	range->size = size;
+
+	list_add_tail(&range->list, &io_range_list);
+
+end_register:
+	spin_unlock(&io_range_lock);
+#endif
+
+	return err;
+}
+
+phys_addr_t pci_pio_to_address(unsigned long pio)
+{
+	phys_addr_t address = (phys_addr_t)OF_BAD_ADDR;
+
+#ifdef PCI_IOBASE
+	struct io_range *range;
+	resource_size_t allocated_size = 0;
+
+	if (pio > IO_SPACE_LIMIT)
+		return address;
+
+	spin_lock(&io_range_lock);
+	list_for_each_entry(range, &io_range_list, list) {
+		if (pio >= allocated_size && pio < allocated_size + range->size) {
+			address = range->start + pio - allocated_size;
+			break;
+		}
+		allocated_size += range->size;
+	}
+	spin_unlock(&io_range_lock);
+#endif
+
+	return address;
+}
+
 unsigned long __weak pci_address_to_pio(phys_addr_t address)
 {
+#ifdef PCI_IOBASE
+	struct io_range *res;
+	resource_size_t offset = 0;
+	unsigned long addr = -1;
+
+	spin_lock(&io_range_lock);
+	list_for_each_entry(res, &io_range_list, list) {
+		if (address >= res->start && address < res->start + res->size) {
+			addr = res->start - address + offset;
+			break;
+		}
+		offset += res->size;
+	}
+	spin_unlock(&io_range_lock);
+
+	return addr;
+#else
 	if (address > IO_SPACE_LIMIT)
 		return (unsigned long)-1;
 
 	return (unsigned long) address;
+#endif
 }
 
 static int __of_address_to_resource(struct device_node *dev,
diff --git a/include/linux/of_address.h b/include/linux/of_address.h
index fb7b7221e063..497a04356ff8 100644
--- a/include/linux/of_address.h
+++ b/include/linux/of_address.h
@@ -55,7 +55,9 @@ extern void __iomem *of_iomap(struct device_node *device, int index);
 extern const __be32 *of_get_address(struct device_node *dev, int index,
 			   u64 *size, unsigned int *flags);
 
+extern int pci_register_io_range(phys_addr_t addr, resource_size_t size);
 extern unsigned long pci_address_to_pio(phys_addr_t addr);
+extern phys_addr_t pci_pio_to_address(unsigned long pio);
 
 extern int of_pci_range_parser_init(struct of_pci_range_parser *parser,
 			struct device_node *node);
@@ -80,6 +82,11 @@ static inline const __be32 *of_get_address(struct device_node *dev, int index,
 	return NULL;
 }
 
+static inline phys_addr_t pci_pio_to_address(unsigned long pio)
+{
+	return 0;
+}
+
 static inline int of_pci_range_parser_init(struct of_pci_range_parser *parser,
 			struct device_node *node)
 {
-- 
cgit v1.2.3


From 2101e533f41a90b25bee17ce969734e26eb0eb55 Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens <hauke@hauke-m.de>
Date: Fri, 26 Sep 2014 00:09:19 +0200
Subject: bcma: register bcma as device tree driver

This driver is used by the bcm53xx ARM SoC code. Now it is possible to
give the address of the chipcommon core in device tree and bcma will
search for all the other cores.

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 Documentation/devicetree/bindings/bus/bcma.txt | 20 +++++++
 drivers/bcma/bcma_private.h                    | 14 +++++
 drivers/bcma/host_soc.c                        | 81 ++++++++++++++++++++++++++
 drivers/bcma/main.c                            | 52 ++++++++++++++++-
 include/linux/bcma/bcma.h                      |  2 +
 5 files changed, 168 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/devicetree/bindings/bus/bcma.txt

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/bus/bcma.txt b/Documentation/devicetree/bindings/bus/bcma.txt
new file mode 100644
index 000000000000..e9070c161172
--- /dev/null
+++ b/Documentation/devicetree/bindings/bus/bcma.txt
@@ -0,0 +1,20 @@
+Driver for ARM AXI Bus with Broadcom Plugins (bcma)
+
+Required properties:
+
+- compatible : brcm,bus-axi
+
+- reg : iomem address range of chipcommon core
+
+The cores on the AXI bus are automatically detected by bcma with the
+memory ranges they are using and they get registered afterwards.
+
+Example:
+
+	axi@18000000 {
+		compatible = "brcm,bus-axi";
+		reg = <0x18000000 0x1000>;
+		ranges = <0x00000000 0x18000000 0x00100000>;
+		#address-cells = <1>;
+		#size-cells = <1>;
+	};
diff --git a/drivers/bcma/bcma_private.h b/drivers/bcma/bcma_private.h
index b40be43c6f31..b6412b2d748d 100644
--- a/drivers/bcma/bcma_private.h
+++ b/drivers/bcma/bcma_private.h
@@ -88,6 +88,20 @@ extern int __init bcma_host_pci_init(void);
 extern void __exit bcma_host_pci_exit(void);
 #endif /* CONFIG_BCMA_HOST_PCI */
 
+/* host_soc.c */
+#if defined(CONFIG_BCMA_HOST_SOC) && defined(CONFIG_OF)
+extern int __init bcma_host_soc_register_driver(void);
+extern void __exit bcma_host_soc_unregister_driver(void);
+#else
+static inline int __init bcma_host_soc_register_driver(void)
+{
+	return 0;
+}
+static inline void __exit bcma_host_soc_unregister_driver(void)
+{
+}
+#endif /* CONFIG_BCMA_HOST_SOC && CONFIG_OF */
+
 /* driver_pci.c */
 u32 bcma_pcie_read(struct bcma_drv_pci *pc, u32 address);
 
diff --git a/drivers/bcma/host_soc.c b/drivers/bcma/host_soc.c
index 718e054dd727..335cbcfd945b 100644
--- a/drivers/bcma/host_soc.c
+++ b/drivers/bcma/host_soc.c
@@ -7,6 +7,9 @@
 
 #include "bcma_private.h"
 #include "scan.h"
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
 #include <linux/bcma/bcma.h>
 #include <linux/bcma/bcma_soc.h>
 
@@ -176,6 +179,7 @@ int __init bcma_host_soc_register(struct bcma_soc *soc)
 	/* Host specific */
 	bus->hosttype = BCMA_HOSTTYPE_SOC;
 	bus->ops = &bcma_host_soc_ops;
+	bus->host_pdev = NULL;
 
 	/* Initialize struct, detect chip */
 	bcma_init_bus(bus);
@@ -195,3 +199,80 @@ int __init bcma_host_soc_init(struct bcma_soc *soc)
 
 	return err;
 }
+
+#ifdef CONFIG_OF
+static int bcma_host_soc_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *np = dev->of_node;
+	struct bcma_bus *bus;
+	int err;
+
+	/* Alloc */
+	bus = devm_kzalloc(dev, sizeof(*bus), GFP_KERNEL);
+	if (!bus)
+		return -ENOMEM;
+
+	/* Map MMIO */
+	bus->mmio = of_iomap(np, 0);
+	if (!bus->mmio)
+		return -ENOMEM;
+
+	/* Host specific */
+	bus->hosttype = BCMA_HOSTTYPE_SOC;
+	bus->ops = &bcma_host_soc_ops;
+	bus->host_pdev = pdev;
+
+	/* Initialize struct, detect chip */
+	bcma_init_bus(bus);
+
+	/* Register */
+	err = bcma_bus_register(bus);
+	if (err)
+		goto err_unmap_mmio;
+
+	platform_set_drvdata(pdev, bus);
+
+	return err;
+
+err_unmap_mmio:
+	iounmap(bus->mmio);
+	return err;
+}
+
+static int bcma_host_soc_remove(struct platform_device *pdev)
+{
+	struct bcma_bus *bus = platform_get_drvdata(pdev);
+
+	bcma_bus_unregister(bus);
+	iounmap(bus->mmio);
+	platform_set_drvdata(pdev, NULL);
+
+	return 0;
+}
+
+static const struct of_device_id bcma_host_soc_of_match[] = {
+	{ .compatible = "brcm,bus-axi", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, bcma_host_soc_of_match);
+
+static struct platform_driver bcma_host_soc_driver = {
+	.driver = {
+		.name = "bcma-host-soc",
+		.of_match_table = bcma_host_soc_of_match,
+	},
+	.probe		= bcma_host_soc_probe,
+	.remove		= bcma_host_soc_remove,
+};
+
+int __init bcma_host_soc_register_driver(void)
+{
+	return platform_driver_register(&bcma_host_soc_driver);
+}
+
+void __exit bcma_host_soc_unregister_driver(void)
+{
+	platform_driver_unregister(&bcma_host_soc_driver);
+}
+#endif /* CONFIG_OF */
diff --git a/drivers/bcma/main.c b/drivers/bcma/main.c
index c421403cab43..d1656c2f70af 100644
--- a/drivers/bcma/main.c
+++ b/drivers/bcma/main.c
@@ -10,6 +10,7 @@
 #include <linux/platform_device.h>
 #include <linux/bcma/bcma.h>
 #include <linux/slab.h>
+#include <linux/of_address.h>
 
 MODULE_DESCRIPTION("Broadcom's specific AMBA driver");
 MODULE_LICENSE("GPL");
@@ -131,6 +132,43 @@ static bool bcma_is_core_needed_early(u16 core_id)
 	return false;
 }
 
+#ifdef CONFIG_OF
+static struct device_node *bcma_of_find_child_device(struct platform_device *parent,
+						     struct bcma_device *core)
+{
+	struct device_node *node;
+	u64 size;
+	const __be32 *reg;
+
+	if (!parent || !parent->dev.of_node)
+		return NULL;
+
+	for_each_child_of_node(parent->dev.of_node, node) {
+		reg = of_get_address(node, 0, &size, NULL);
+		if (!reg)
+			continue;
+		if (of_translate_address(node, reg) == core->addr)
+			return node;
+	}
+	return NULL;
+}
+
+static void bcma_of_fill_device(struct platform_device *parent,
+				struct bcma_device *core)
+{
+	struct device_node *node;
+
+	node = bcma_of_find_child_device(parent, core);
+	if (node)
+		core->dev.of_node = node;
+}
+#else
+static void bcma_of_fill_device(struct platform_device *parent,
+				struct bcma_device *core)
+{
+}
+#endif /* CONFIG_OF */
+
 static void bcma_register_core(struct bcma_bus *bus, struct bcma_device *core)
 {
 	int err;
@@ -147,7 +185,13 @@ static void bcma_register_core(struct bcma_bus *bus, struct bcma_device *core)
 		break;
 	case BCMA_HOSTTYPE_SOC:
 		core->dev.dma_mask = &core->dev.coherent_dma_mask;
-		core->dma_dev = &core->dev;
+		if (bus->host_pdev) {
+			core->dma_dev = &bus->host_pdev->dev;
+			core->dev.parent = &bus->host_pdev->dev;
+			bcma_of_fill_device(bus->host_pdev, core);
+		} else {
+			core->dma_dev = &core->dev;
+		}
 		break;
 	case BCMA_HOSTTYPE_SDIO:
 		break;
@@ -528,6 +572,11 @@ static int __init bcma_modinit(void)
 	if (err)
 		return err;
 
+	err = bcma_host_soc_register_driver();
+	if (err) {
+		pr_err("SoC host initialization failed\n");
+		err = 0;
+	}
 #ifdef CONFIG_BCMA_HOST_PCI
 	err = bcma_host_pci_init();
 	if (err) {
@@ -545,6 +594,7 @@ static void __exit bcma_modexit(void)
 #ifdef CONFIG_BCMA_HOST_PCI
 	bcma_host_pci_exit();
 #endif
+	bcma_host_soc_unregister_driver();
 	bus_unregister(&bcma_bus_type);
 }
 module_exit(bcma_modexit)
diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h
index 634597917670..729f48e6b20b 100644
--- a/include/linux/bcma/bcma.h
+++ b/include/linux/bcma/bcma.h
@@ -323,6 +323,8 @@ struct bcma_bus {
 		struct pci_dev *host_pci;
 		/* Pointer to the SDIO device (only for BCMA_HOSTTYPE_SDIO) */
 		struct sdio_func *host_sdio;
+		/* Pointer to platform device (only for BCMA_HOSTTYPE_SOC) */
+		struct platform_device *host_pdev;
 	};
 
 	struct bcma_chipinfo chipinfo;
-- 
cgit v1.2.3


From 2a8a8ce651d3a88fdf83e2ed15633c8d19292108 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 30 Sep 2014 02:21:34 +0200
Subject: PM / sleep: Export dpm_suspend_late/noirq() and
 dpm_resume_early/noirq()

Subsequent change sets will add platform-related operations between
dpm_suspend_late() and dpm_suspend_noirq() as well as between
dpm_resume_noirq() and dpm_resume_early() in suspend_enter(), so
export these functions for suspend_enter() to be able to call them
separately and split the invocations of dpm_suspend_end() and
dpm_resume_start() in there accordingly.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/main.c |  8 ++++----
 include/linux/pm.h        |  4 ++++
 kernel/power/suspend.c    | 14 +++++++++++---
 3 files changed, 19 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index b67d9aef9fe4..44973196d3fd 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -540,7 +540,7 @@ static void async_resume_noirq(void *data, async_cookie_t cookie)
  * Call the "noirq" resume handlers for all devices in dpm_noirq_list and
  * enable device drivers to receive interrupts.
  */
-static void dpm_resume_noirq(pm_message_t state)
+void dpm_resume_noirq(pm_message_t state)
 {
 	struct device *dev;
 	ktime_t starttime = ktime_get();
@@ -662,7 +662,7 @@ static void async_resume_early(void *data, async_cookie_t cookie)
  * dpm_resume_early - Execute "early resume" callbacks for all devices.
  * @state: PM transition of the system being carried out.
  */
-static void dpm_resume_early(pm_message_t state)
+void dpm_resume_early(pm_message_t state)
 {
 	struct device *dev;
 	ktime_t starttime = ktime_get();
@@ -1093,7 +1093,7 @@ static int device_suspend_noirq(struct device *dev)
  * Prevent device drivers from receiving interrupts and call the "noirq" suspend
  * handlers for all non-sysdev devices.
  */
-static int dpm_suspend_noirq(pm_message_t state)
+int dpm_suspend_noirq(pm_message_t state)
 {
 	ktime_t starttime = ktime_get();
 	int error = 0;
@@ -1232,7 +1232,7 @@ static int device_suspend_late(struct device *dev)
  * dpm_suspend_late - Execute "late suspend" callbacks for all devices.
  * @state: PM transition of the system being carried out.
  */
-static int dpm_suspend_late(pm_message_t state)
+int dpm_suspend_late(pm_message_t state)
 {
 	ktime_t starttime = ktime_get();
 	int error = 0;
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 72c0fe098a27..e1c00b7ee913 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -679,12 +679,16 @@ struct dev_pm_domain {
 extern void device_pm_lock(void);
 extern void dpm_resume_start(pm_message_t state);
 extern void dpm_resume_end(pm_message_t state);
+extern void dpm_resume_noirq(pm_message_t state);
+extern void dpm_resume_early(pm_message_t state);
 extern void dpm_resume(pm_message_t state);
 extern void dpm_complete(pm_message_t state);
 
 extern void device_pm_unlock(void);
 extern int dpm_suspend_end(pm_message_t state);
 extern int dpm_suspend_start(pm_message_t state);
+extern int dpm_suspend_noirq(pm_message_t state);
+extern int dpm_suspend_late(pm_message_t state);
 extern int dpm_suspend(pm_message_t state);
 extern int dpm_prepare(pm_message_t state);
 
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index e837dd6783c6..58ae98b7dc2b 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -265,11 +265,16 @@ static int suspend_enter(suspend_state_t state, bool *wakeup)
 	if (error)
 		goto Platform_finish;
 
-	error = dpm_suspend_end(PMSG_SUSPEND);
+	error = dpm_suspend_late(PMSG_SUSPEND);
 	if (error) {
-		printk(KERN_ERR "PM: Some devices failed to power down\n");
+		printk(KERN_ERR "PM: late suspend of devices failed\n");
 		goto Platform_finish;
 	}
+	error = dpm_suspend_noirq(PMSG_SUSPEND);
+	if (error) {
+		printk(KERN_ERR "PM: noirq suspend of devices failed\n");
+		goto Devices_early_resume;
+	}
 	error = platform_suspend_prepare_late(state);
 	if (error)
 		goto Platform_wake;
@@ -319,7 +324,10 @@ static int suspend_enter(suspend_state_t state, bool *wakeup)
 
  Platform_wake:
 	platform_suspend_wake(state);
-	dpm_resume_start(PMSG_RESUME);
+	dpm_resume_noirq(PMSG_RESUME);
+
+ Devices_early_resume:
+	dpm_resume_early(PMSG_RESUME);
 
  Platform_finish:
 	platform_suspend_finish(state);
-- 
cgit v1.2.3


From a8d46b9e4e487301affe84fa53de40b890898604 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 30 Sep 2014 02:29:01 +0200
Subject: ACPI / sleep: Rework the handling of ACPI GPE wakeup from
 suspend-to-idle

The ACPI GPE wakeup from suspend-to-idle is currently based on using
the IRQF_NO_SUSPEND flag for the ACPI SCI, but that is problematic
for a couple of reasons.  First, in principle the ACPI SCI may be
shared and IRQF_NO_SUSPEND does not really work well with shared
interrupts.  Second, it may require the ACPI subsystem to special-case
the handling of device notifications depending on whether or not
they are received during suspend-to-idle in some places which would
lead to fragile code.  Finally, it's better the handle ACPI wakeup
interrupts consistently with wakeup interrupts from other sources.

For this reason, remove the IRQF_NO_SUSPEND flag from the ACPI SCI
and use enable_irq_wake()/disable_irq_wake() with it instead, which
requires two additional platform hooks to be added to struct
platform_freeze_ops.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/osl.c      |  2 +-
 drivers/acpi/sleep.c    | 16 ++++++++++++++++
 include/linux/suspend.h |  2 ++
 kernel/power/suspend.c  | 21 ++++++++++++++++++++-
 4 files changed, 39 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index 3abe9b223ba7..5ca29b5af8d1 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -825,7 +825,7 @@ acpi_os_install_interrupt_handler(u32 gsi, acpi_osd_handler handler,
 
 	acpi_irq_handler = handler;
 	acpi_irq_context = context;
-	if (request_irq(irq, acpi_irq, IRQF_SHARED | IRQF_NO_SUSPEND, "acpi", acpi_irq)) {
+	if (request_irq(irq, acpi_irq, IRQF_SHARED, "acpi", acpi_irq)) {
 		printk(KERN_ERR PREFIX "SCI (IRQ%d) allocation failed\n", irq);
 		acpi_irq_handler = NULL;
 		return AE_NOT_ACQUIRED;
diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index 54da4a3fe65e..05a31b573fc3 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -14,6 +14,7 @@
 #include <linux/irq.h>
 #include <linux/dmi.h>
 #include <linux/device.h>
+#include <linux/interrupt.h>
 #include <linux/suspend.h>
 #include <linux/reboot.h>
 #include <linux/acpi.h>
@@ -626,6 +627,19 @@ static int acpi_freeze_begin(void)
 	return 0;
 }
 
+static int acpi_freeze_prepare(void)
+{
+	acpi_enable_all_wakeup_gpes();
+	enable_irq_wake(acpi_gbl_FADT.sci_interrupt);
+	return 0;
+}
+
+static void acpi_freeze_restore(void)
+{
+	disable_irq_wake(acpi_gbl_FADT.sci_interrupt);
+	acpi_enable_all_runtime_gpes();
+}
+
 static void acpi_freeze_end(void)
 {
 	acpi_scan_lock_release();
@@ -633,6 +647,8 @@ static void acpi_freeze_end(void)
 
 static const struct platform_freeze_ops acpi_freeze_ops = {
 	.begin = acpi_freeze_begin,
+	.prepare = acpi_freeze_prepare,
+	.restore = acpi_freeze_restore,
 	.end = acpi_freeze_end,
 };
 
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 06a9910827c2..3388c1b6f7d8 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -189,6 +189,8 @@ struct platform_suspend_ops {
 
 struct platform_freeze_ops {
 	int (*begin)(void);
+	int (*prepare)(void);
+	void (*restore)(void);
 	void (*end)(void);
 };
 
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index a25e768d92b5..4ca9a33ff620 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -144,6 +144,12 @@ static int platform_suspend_prepare(suspend_state_t state)
 		suspend_ops->prepare() : 0;
 }
 
+static int platform_suspend_prepare_late(suspend_state_t state)
+{
+	return state == PM_SUSPEND_FREEZE && freeze_ops->prepare ?
+		freeze_ops->prepare() : 0;
+}
+
 static int platform_suspend_prepare_noirq(suspend_state_t state)
 {
 	return state != PM_SUSPEND_FREEZE && suspend_ops->prepare_late ?
@@ -156,6 +162,12 @@ static void platform_resume_noirq(suspend_state_t state)
 		suspend_ops->wake();
 }
 
+static void platform_resume_early(suspend_state_t state)
+{
+	if (state == PM_SUSPEND_FREEZE && freeze_ops->restore)
+		freeze_ops->restore();
+}
+
 static void platform_resume_finish(suspend_state_t state)
 {
 	if (state != PM_SUSPEND_FREEZE && suspend_ops->finish)
@@ -270,10 +282,14 @@ static int suspend_enter(suspend_state_t state, bool *wakeup)
 		printk(KERN_ERR "PM: late suspend of devices failed\n");
 		goto Platform_finish;
 	}
+	error = platform_suspend_prepare_late(state);
+	if (error)
+		goto Devices_early_resume;
+
 	error = dpm_suspend_noirq(PMSG_SUSPEND);
 	if (error) {
 		printk(KERN_ERR "PM: noirq suspend of devices failed\n");
-		goto Devices_early_resume;
+		goto Platform_early_resume;
 	}
 	error = platform_suspend_prepare_noirq(state);
 	if (error)
@@ -326,6 +342,9 @@ static int suspend_enter(suspend_state_t state, bool *wakeup)
 	platform_resume_noirq(state);
 	dpm_resume_noirq(PMSG_RESUME);
 
+ Platform_early_resume:
+	platform_resume_early(state);
+
  Devices_early_resume:
 	dpm_resume_early(PMSG_RESUME);
 
-- 
cgit v1.2.3


From 83bbde1cc0ec9d156b9271e29ffe0dc89c687feb Mon Sep 17 00:00:00 2001
From: Liviu Dudau <Liviu.Dudau@arm.com>
Date: Mon, 29 Sep 2014 15:29:24 +0100
Subject: of/pci: Move of_pci_range_to_resource() to of/address.c

We need to enhance of_pci_range_to_resources() enough that it won't make
sense for it to be inline anymore.  Move it to drivers/of/address.c, under
#ifdef CONFIG_PCI.

of_address.h previously implemented of_pci_range_to_resources()
unconditionally, regardless of any config options.  The implementation in
address.c is defined only when CONFIG_OF_ADDRESS=y and CONFIG_PCI=y,
so add a dummy version to avoid build errors when CONFIG_OF or
CONFIG_OF_ADDRESS is not defined.

[bhelgaas: drop extra detail from changelog, move def under CONFIG_PCI,
add dummy of_pci_range_to_resource() for build errors (from Arnd)]
Signed-off-by: Liviu Dudau <Liviu.Dudau@arm.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
CC: Grant Likely <grant.likely@linaro.org>
CC: Rob Herring <robh+dt@kernel.org>
CC: Arnd Bergmann <arnd@arndb.de>
CC: Catalin Marinas <catalin.marinas@arm.com>
---
 drivers/of/address.c       |  9 +++++++++
 include/linux/of_address.h | 20 +++++++++-----------
 2 files changed, 18 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/of/address.c b/drivers/of/address.c
index 758d4f04d4aa..327a57410797 100644
--- a/drivers/of/address.c
+++ b/drivers/of/address.c
@@ -295,6 +295,15 @@ struct of_pci_range *of_pci_range_parser_one(struct of_pci_range_parser *parser,
 }
 EXPORT_SYMBOL_GPL(of_pci_range_parser_one);
 
+void of_pci_range_to_resource(struct of_pci_range *range,
+			      struct device_node *np, struct resource *res)
+{
+	res->flags = range->flags;
+	res->start = range->cpu_addr;
+	res->end = range->cpu_addr + range->size - 1;
+	res->parent = res->child = res->sibling = NULL;
+	res->name = np->full_name;
+}
 #endif /* CONFIG_PCI */
 
 /*
diff --git a/include/linux/of_address.h b/include/linux/of_address.h
index 497a04356ff8..a38e1c846c23 100644
--- a/include/linux/of_address.h
+++ b/include/linux/of_address.h
@@ -23,17 +23,6 @@ struct of_pci_range {
 #define for_each_of_pci_range(parser, range) \
 	for (; of_pci_range_parser_one(parser, range);)
 
-static inline void of_pci_range_to_resource(struct of_pci_range *range,
-					    struct device_node *np,
-					    struct resource *res)
-{
-	res->flags = range->flags;
-	res->start = range->cpu_addr;
-	res->end = range->cpu_addr + range->size - 1;
-	res->parent = res->child = res->sibling = NULL;
-	res->name = np->full_name;
-}
-
 /* Translate a DMA address from device space to CPU space */
 extern u64 of_translate_dma_address(struct device_node *dev,
 				    const __be32 *in_addr);
@@ -145,6 +134,9 @@ extern const __be32 *of_get_pci_address(struct device_node *dev, int bar_no,
 			       u64 *size, unsigned int *flags);
 extern int of_pci_address_to_resource(struct device_node *dev, int bar,
 				      struct resource *r);
+extern void of_pci_range_to_resource(struct of_pci_range *range,
+				     struct device_node *np,
+				     struct resource *res);
 #else /* CONFIG_OF_ADDRESS && CONFIG_PCI */
 static inline int of_pci_address_to_resource(struct device_node *dev, int bar,
 				             struct resource *r)
@@ -157,6 +149,12 @@ static inline const __be32 *of_get_pci_address(struct device_node *dev,
 {
 	return NULL;
 }
+static inline void of_pci_range_to_resource(struct of_pci_range *range,
+					    struct device_node *np,
+					    struct resource *res)
+{
+	return -ENOSYS;
+}
 #endif /* CONFIG_OF_ADDRESS && CONFIG_PCI */
 
 #endif /* __OF_ADDRESS_H */
-- 
cgit v1.2.3


From 1c6dcbe5ceff81c2cf8d929646af675cd59fe7c0 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@netapp.com>
Date: Fri, 26 Sep 2014 13:58:48 -0400
Subject: NFS: Implement SEEK

The SEEK operation is used when an application makes an lseek call with
either the SEEK_HOLE or SEEK_DATA flags set.  I fall back on
nfs_file_llseek() if the server does not have SEEK support.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/Makefile           |  1 +
 fs/nfs/inode.c            |  2 +
 fs/nfs/nfs42.h            | 14 +++++++
 fs/nfs/nfs42proc.c        | 69 +++++++++++++++++++++++++++++++++
 fs/nfs/nfs42xdr.c         | 98 +++++++++++++++++++++++++++++++++++++++++++++++
 fs/nfs/nfs4_fs.h          |  3 ++
 fs/nfs/nfs4file.c         | 25 ++++++++++++
 fs/nfs/nfs4proc.c         |  4 +-
 fs/nfs/nfs4xdr.c          |  7 ++++
 include/linux/nfs4.h      |  3 ++
 include/linux/nfs_fs_sb.h |  1 +
 include/linux/nfs_xdr.h   | 19 +++++++++
 12 files changed, 244 insertions(+), 2 deletions(-)
 create mode 100644 fs/nfs/nfs42.h
 create mode 100644 fs/nfs/nfs42proc.c
 create mode 100644 fs/nfs/nfs42xdr.c

(limited to 'include/linux')

diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index 4782e0840dcc..04cb830fa09f 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -28,6 +28,7 @@ nfsv4-y := nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o nfs4super.o nfs4file.o
 nfsv4-$(CONFIG_NFS_USE_LEGACY_DNS) += cache_lib.o
 nfsv4-$(CONFIG_SYSCTL)	+= nfs4sysctl.o
 nfsv4-$(CONFIG_NFS_V4_1)	+= pnfs.o pnfs_dev.o
+nfsv4-$(CONFIG_NFS_V4_2)	+= nfs42proc.o
 
 obj-$(CONFIG_PNFS_FILE_LAYOUT) += filelayout/
 obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayout/
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 577a36f0a510..56d073ede3c7 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -716,6 +716,7 @@ struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx)
 	kfree(new);
 	return res;
 }
+EXPORT_SYMBOL_GPL(nfs_get_lock_context);
 
 void nfs_put_lock_context(struct nfs_lock_context *l_ctx)
 {
@@ -728,6 +729,7 @@ void nfs_put_lock_context(struct nfs_lock_context *l_ctx)
 	spin_unlock(&inode->i_lock);
 	kfree(l_ctx);
 }
+EXPORT_SYMBOL_GPL(nfs_put_lock_context);
 
 /**
  * nfs_close_context - Common close_context() routine NFSv2/v3
diff --git a/fs/nfs/nfs42.h b/fs/nfs/nfs42.h
new file mode 100644
index 000000000000..d10333a197bf
--- /dev/null
+++ b/fs/nfs/nfs42.h
@@ -0,0 +1,14 @@
+/*
+ * Copyright (c) 2014 Anna Schumaker <Anna.Schumaker@Netapp.com>
+ */
+
+#ifndef __LINUX_FS_NFS_NFS4_2_H
+#define __LINUX_FS_NFS_NFS4_2_H
+
+/* nfs4.2proc.c */
+loff_t nfs42_proc_llseek(struct file *, loff_t, int);
+
+/* nfs4.2xdr.h */
+extern struct rpc_procinfo nfs4_2_procedures[];
+
+#endif /* __LINUX_FS_NFS_NFS4_2_H */
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
new file mode 100644
index 000000000000..0886f1db5917
--- /dev/null
+++ b/fs/nfs/nfs42proc.c
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2014 Anna Schumaker <Anna.Schumaker@Netapp.com>
+ */
+#include <linux/fs.h>
+#include <linux/sunrpc/sched.h>
+#include <linux/nfs.h>
+#include <linux/nfs3.h>
+#include <linux/nfs4.h>
+#include <linux/nfs_xdr.h>
+#include <linux/nfs_fs.h>
+#include "nfs4_fs.h"
+#include "nfs42.h"
+
+static int nfs42_set_rw_stateid(nfs4_stateid *dst, struct file *file,
+				fmode_t fmode)
+{
+	struct nfs_open_context *open;
+	struct nfs_lock_context *lock;
+	int ret;
+
+	open = get_nfs_open_context(nfs_file_open_context(file));
+	lock = nfs_get_lock_context(open);
+	if (IS_ERR(lock)) {
+		put_nfs_open_context(open);
+		return PTR_ERR(lock);
+	}
+
+	ret = nfs4_set_rw_stateid(dst, open, lock, fmode);
+
+	nfs_put_lock_context(lock);
+	put_nfs_open_context(open);
+	return ret;
+}
+
+loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence)
+{
+	struct inode *inode = file_inode(filep);
+	struct nfs42_seek_args args = {
+		.sa_fh		= NFS_FH(inode),
+		.sa_offset	= offset,
+		.sa_what	= (whence == SEEK_HOLE) ?
+					NFS4_CONTENT_HOLE : NFS4_CONTENT_DATA,
+	};
+	struct nfs42_seek_res res;
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SEEK],
+		.rpc_argp = &args,
+		.rpc_resp = &res,
+	};
+	struct nfs_server *server = NFS_SERVER(inode);
+	int status;
+
+	if (!(server->caps & NFS_CAP_SEEK))
+		return -ENOTSUPP;
+
+	status = nfs42_set_rw_stateid(&args.sa_stateid, filep, FMODE_READ);
+	if (status)
+		return status;
+
+	nfs_wb_all(inode);
+	status = nfs4_call_sync(server->client, server, &msg,
+				&args.seq_args, &res.seq_res, 0);
+	if (status == -ENOTSUPP)
+		server->caps &= ~NFS_CAP_SEEK;
+	if (status)
+		return status;
+
+	return vfs_setpos(filep, res.sr_offset, inode->i_sb->s_maxbytes);
+}
diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
new file mode 100644
index 000000000000..c90469b604b8
--- /dev/null
+++ b/fs/nfs/nfs42xdr.c
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2014 Anna Schumaker <Anna.Schumaker@Netapp.com>
+ */
+#ifndef __LINUX_FS_NFS_NFS4_2XDR_H
+#define __LINUX_FS_NFS_NFS4_2XDR_H
+
+#define encode_seek_maxsz		(op_encode_hdr_maxsz + \
+					 encode_stateid_maxsz + \
+					 2 /* offset */ + \
+					 1 /* whence */)
+#define decode_seek_maxsz		(op_decode_hdr_maxsz + \
+					 1 /* eof */ + \
+					 1 /* whence */ + \
+					 2 /* offset */ + \
+					 2 /* length */)
+
+#define NFS4_enc_seek_sz		(compound_encode_hdr_maxsz + \
+					 encode_putfh_maxsz + \
+					 encode_seek_maxsz)
+#define NFS4_dec_seek_sz		(compound_decode_hdr_maxsz + \
+					 decode_putfh_maxsz + \
+					 decode_seek_maxsz)
+
+
+static void encode_seek(struct xdr_stream *xdr,
+			struct nfs42_seek_args *args,
+			struct compound_hdr *hdr)
+{
+	encode_op_hdr(xdr, OP_SEEK, decode_seek_maxsz, hdr);
+	encode_nfs4_stateid(xdr, &args->sa_stateid);
+	encode_uint64(xdr, args->sa_offset);
+	encode_uint32(xdr, args->sa_what);
+}
+
+/*
+ * Encode SEEK request
+ */
+static void nfs4_xdr_enc_seek(struct rpc_rqst *req,
+			      struct xdr_stream *xdr,
+			      struct nfs42_seek_args *args)
+{
+	struct compound_hdr hdr = {
+		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
+	};
+
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->sa_fh, &hdr);
+	encode_seek(xdr, args, &hdr);
+	encode_nops(&hdr);
+}
+
+static int decode_seek(struct xdr_stream *xdr, struct nfs42_seek_res *res)
+{
+	int status;
+	__be32 *p;
+
+	status = decode_op_hdr(xdr, OP_SEEK);
+	if (status)
+		return status;
+
+	p = xdr_inline_decode(xdr, 4 + 8);
+	if (unlikely(!p))
+		goto out_overflow;
+
+	res->sr_eof = be32_to_cpup(p++);
+	p = xdr_decode_hyper(p, &res->sr_offset);
+	return 0;
+
+out_overflow:
+	print_overflow_msg(__func__, xdr);
+	return -EIO;
+}
+
+/*
+ * Decode SEEK request
+ */
+static int nfs4_xdr_dec_seek(struct rpc_rqst *rqstp,
+			     struct xdr_stream *xdr,
+			     struct nfs42_seek_res *res)
+{
+	struct compound_hdr hdr;
+	int status;
+
+	status = decode_compound_hdr(xdr, &hdr);
+	if (status)
+		goto out;
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
+	if (status)
+		goto out;
+	status = decode_putfh(xdr);
+	if (status)
+		goto out;
+	status = decode_seek(xdr, res);
+out:
+	return status;
+}
+#endif /* __LINUX_FS_NFS_NFS4_2XDR_H */
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 92193eddb41d..b6b518d18aa7 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -227,6 +227,9 @@ int nfs4_replace_transport(struct nfs_server *server,
 				const struct nfs4_fs_locations *locations);
 
 /* nfs4proc.c */
+extern int nfs4_call_sync(struct rpc_clnt *, struct nfs_server *,
+			  struct rpc_message *, struct nfs4_sequence_args *,
+			  struct nfs4_sequence_res *, int);
 extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *);
 extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *);
 extern int nfs4_proc_get_rootfh(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *, bool);
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index a816f0627a6c..4dffa3a64731 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -8,6 +8,10 @@
 #include "fscache.h"
 #include "pnfs.h"
 
+#ifdef CONFIG_NFS_V4_2
+#include "nfs42.h"
+#endif
+
 #define NFSDBG_FACILITY		NFSDBG_FILE
 
 static int
@@ -115,8 +119,29 @@ nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 	return ret;
 }
 
+#ifdef CONFIG_NFS_V4_2
+static loff_t nfs4_file_llseek(struct file *filep, loff_t offset, int whence)
+{
+	loff_t ret;
+
+	switch (whence) {
+	case SEEK_HOLE:
+	case SEEK_DATA:
+		ret = nfs42_proc_llseek(filep, offset, whence);
+		if (ret != -ENOTSUPP)
+			return ret;
+	default:
+		return nfs_file_llseek(filep, offset, whence);
+	}
+}
+#endif /* CONFIG_NFS_V4_2 */
+
 const struct file_operations nfs4_file_operations = {
+#ifdef CONFIG_NFS_V4_2
+	.llseek		= nfs4_file_llseek,
+#else
 	.llseek		= nfs_file_llseek,
+#endif
 	.read		= new_sync_read,
 	.write		= new_sync_write,
 	.read_iter	= nfs_file_read,
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 7dd8aca31c29..f3a59f1fb498 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -875,7 +875,6 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt,
 	return ret;
 }
 
-static
 int nfs4_call_sync(struct rpc_clnt *clnt,
 		   struct nfs_server *server,
 		   struct rpc_message *msg,
@@ -8429,7 +8428,8 @@ static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = {
 		| NFS_CAP_CHANGE_ATTR
 		| NFS_CAP_POSIX_LOCK
 		| NFS_CAP_STATEID_NFSV41
-		| NFS_CAP_ATOMIC_OPEN_V1,
+		| NFS_CAP_ATOMIC_OPEN_V1
+		| NFS_CAP_SEEK,
 	.init_client = nfs41_init_client,
 	.shutdown_client = nfs41_shutdown_client,
 	.match_stateid = nfs41_match_stateid,
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index e13b59d8d9aa..949e8717118c 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -7427,6 +7427,10 @@ nfs4_stat_to_errno(int stat)
 	return -stat;
 }
 
+#ifdef CONFIG_NFS_V4_2
+#include "nfs42xdr.c"
+#endif /* CONFIG_NFS_V4_2 */
+
 #define PROC(proc, argtype, restype)				\
 [NFSPROC4_CLNT_##proc] = {					\
 	.p_proc   = NFSPROC4_COMPOUND,				\
@@ -7495,6 +7499,9 @@ struct rpc_procinfo	nfs4_procedures[] = {
 			enc_bind_conn_to_session, dec_bind_conn_to_session),
 	PROC(DESTROY_CLIENTID,	enc_destroy_clientid,	dec_destroy_clientid),
 #endif /* CONFIG_NFS_V4_1 */
+#ifdef CONFIG_NFS_V4_2
+	PROC(SEEK,		enc_seek,		dec_seek),
+#endif /* CONFIG_NFS_V4_2 */
 };
 
 const struct rpc_version nfs_version4 = {
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 026b0c042c40..356acc2846fd 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -487,6 +487,9 @@ enum {
 	NFSPROC4_CLNT_GETDEVICELIST,
 	NFSPROC4_CLNT_BIND_CONN_TO_SESSION,
 	NFSPROC4_CLNT_DESTROY_CLIENTID,
+
+	/* nfs42 */
+	NFSPROC4_CLNT_SEEK,
 };
 
 /* nfs41 types */
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 922be2e050f5..a32ba0d7a98f 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -230,5 +230,6 @@ struct nfs_server {
 #define NFS_CAP_STATEID_NFSV41	(1U << 16)
 #define NFS_CAP_ATOMIC_OPEN_V1	(1U << 17)
 #define NFS_CAP_SECURITY_LABEL	(1U << 18)
+#define NFS_CAP_SEEK		(1U << 19)
 
 #endif
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 0040629894df..0051b1ad2b37 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1239,6 +1239,25 @@ struct pnfs_ds_commit_info {
 
 #endif /* CONFIG_NFS_V4_1 */
 
+#ifdef CONFIG_NFS_V4_2
+struct nfs42_seek_args {
+	struct nfs4_sequence_args	seq_args;
+
+	struct nfs_fh			*sa_fh;
+	nfs4_stateid			sa_stateid;
+	u64				sa_offset;
+	u32				sa_what;
+};
+
+struct nfs42_seek_res {
+	struct nfs4_sequence_res	seq_res;
+	unsigned int			status;
+
+	u32	sr_eof;
+	u64	sr_offset;
+};
+#endif
+
 struct nfs_page;
 
 #define NFS_PAGEVEC_SIZE	(8U)
-- 
cgit v1.2.3


From e1c00e10e92c04aa637126db2e59b092bd4878f8 Mon Sep 17 00:00:00 2001
From: Majd Dibbiny <majd@mellanox.com>
Date: Tue, 30 Sep 2014 12:03:48 +0300
Subject: net/mlx4_core: New init and exit flow for mlx4_core

In the new flow, we separate the pci initialization and teardown
from the initialization and teardown of the other resources.

__mlx4_init_one handles the pci resources initialization. It then
calls mlx4_load_one to initialize the remainder of the resources.

When removing a device, mlx4_remove_one is invoked. However, now
mlx4_remove_one calls mlx4_unload_one to free all the resources except the pci
resources. When mlx4_unload_one returns, mlx4_remove_one then frees the
pci resources.

The above separation will allow us to implement 'reset flow' in the future.
It will also enable more EQs for VFs and is a pre-step to the modern API to
enable/disable SRIOV.

Also added nvfs; an integer array of size MLX4_MAX_PORTS + 1; to the mlx4_dev
struct. This new field is used to avoid parsing the num_vfs module parameter
each time the mlx4_restart_one is called.

Signed-off-by: Majd Dibbiny <majd@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/main.c | 393 +++++++++++++++++-------------
 include/linux/mlx4/device.h               |   1 +
 2 files changed, 220 insertions(+), 174 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 4e9857b409f3..f6c32a947185 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -2259,116 +2259,18 @@ static void mlx4_free_ownership(struct mlx4_dev *dev)
 	iounmap(owner);
 }
 
-static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data)
+static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data,
+			 int total_vfs, int *nvfs, struct mlx4_priv *priv)
 {
-	struct mlx4_priv *priv;
 	struct mlx4_dev *dev;
+	unsigned sum = 0;
 	int err;
 	int port;
-	int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0};
-	int prb_vf[MLX4_MAX_PORTS + 1] = {0, 0, 0};
-	const int param_map[MLX4_MAX_PORTS + 1][MLX4_MAX_PORTS + 1] = {
-		{2, 0, 0}, {0, 1, 2}, {0, 1, 2} };
-	unsigned total_vfs = 0;
-	int sriov_initialized = 0;
-	unsigned int i;
+	int i;
 	int existing_vfs = 0;
 
-	pr_info(DRV_NAME ": Initializing %s\n", pci_name(pdev));
-
-	err = pci_enable_device(pdev);
-	if (err) {
-		dev_err(&pdev->dev, "Cannot enable PCI device, aborting\n");
-		return err;
-	}
-
-	/* Due to requirement that all VFs and the PF are *guaranteed* 2 MACS
-	 * per port, we must limit the number of VFs to 63 (since their are
-	 * 128 MACs)
-	 */
-	for (i = 0; i < sizeof(nvfs)/sizeof(nvfs[0]) && i < num_vfs_argc;
-	     total_vfs += nvfs[param_map[num_vfs_argc - 1][i]], i++) {
-		nvfs[param_map[num_vfs_argc - 1][i]] = num_vfs[i];
-		if (nvfs[i] < 0) {
-			dev_err(&pdev->dev, "num_vfs module parameter cannot be negative\n");
-			return -EINVAL;
-		}
-	}
-	for (i = 0; i < sizeof(prb_vf)/sizeof(prb_vf[0]) && i < probe_vfs_argc;
-	     i++) {
-		prb_vf[param_map[probe_vfs_argc - 1][i]] = probe_vf[i];
-		if (prb_vf[i] < 0 || prb_vf[i] > nvfs[i]) {
-			dev_err(&pdev->dev, "probe_vf module parameter cannot be negative or greater than num_vfs\n");
-			return -EINVAL;
-		}
-	}
-	if (total_vfs >= MLX4_MAX_NUM_VF) {
-		dev_err(&pdev->dev,
-			"Requested more VF's (%d) than allowed (%d)\n",
-			total_vfs, MLX4_MAX_NUM_VF - 1);
-		return -EINVAL;
-	}
-
-	for (i = 0; i < MLX4_MAX_PORTS; i++) {
-		if (nvfs[i] + nvfs[2] >= MLX4_MAX_NUM_VF_P_PORT) {
-			dev_err(&pdev->dev,
-				"Requested more VF's (%d) for port (%d) than allowed (%d)\n",
-				nvfs[i] + nvfs[2], i + 1,
-				MLX4_MAX_NUM_VF_P_PORT - 1);
-			return -EINVAL;
-		}
-	}
-
-
-	/*
-	 * Check for BARs.
-	 */
-	if (!(pci_dev_data & MLX4_PCI_DEV_IS_VF) &&
-	    !(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
-		dev_err(&pdev->dev, "Missing DCS, aborting (driver_data: 0x%x, pci_resource_flags(pdev, 0):0x%lx)\n",
-			pci_dev_data, pci_resource_flags(pdev, 0));
-		err = -ENODEV;
-		goto err_disable_pdev;
-	}
-	if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM)) {
-		dev_err(&pdev->dev, "Missing UAR, aborting\n");
-		err = -ENODEV;
-		goto err_disable_pdev;
-	}
-
-	err = pci_request_regions(pdev, DRV_NAME);
-	if (err) {
-		dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n");
-		goto err_disable_pdev;
-	}
-
-	pci_set_master(pdev);
+	dev = &priv->dev;
 
-	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
-	if (err) {
-		dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask\n");
-		err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
-		if (err) {
-			dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting\n");
-			goto err_release_regions;
-		}
-	}
-	err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
-	if (err) {
-		dev_warn(&pdev->dev, "Warning: couldn't set 64-bit consistent PCI DMA mask\n");
-		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
-		if (err) {
-			dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, aborting\n");
-			goto err_release_regions;
-		}
-	}
-
-	/* Allow large DMA segments, up to the firmware limit of 1 GB */
-	dma_set_max_seg_size(&pdev->dev, 1024 * 1024 * 1024);
-
-	dev       = pci_get_drvdata(pdev);
-	priv      = mlx4_priv(dev);
-	dev->pdev = pdev;
 	INIT_LIST_HEAD(&priv->ctx_list);
 	spin_lock_init(&priv->ctx_lock);
 
@@ -2382,28 +2284,9 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data)
 
 	dev->rev_id = pdev->revision;
 	dev->numa_node = dev_to_node(&pdev->dev);
+
 	/* Detect if this device is a virtual function */
 	if (pci_dev_data & MLX4_PCI_DEV_IS_VF) {
-		/* When acting as pf, we normally skip vfs unless explicitly
-		 * requested to probe them. */
-		if (total_vfs) {
-			unsigned vfs_offset = 0;
-			for (i = 0; i < sizeof(nvfs)/sizeof(nvfs[0]) &&
-				     vfs_offset + nvfs[i] < extended_func_num(pdev);
-			     vfs_offset += nvfs[i], i++)
-				;
-			if (i == sizeof(nvfs)/sizeof(nvfs[0])) {
-				err = -ENODEV;
-				goto err_free_dev;
-			}
-			if ((extended_func_num(pdev) - vfs_offset)
-			    > prb_vf[i]) {
-				mlx4_warn(dev, "Skipping virtual function:%d\n",
-					  extended_func_num(pdev));
-				err = -ENODEV;
-				goto err_free_dev;
-			}
-		}
 		mlx4_warn(dev, "Detected virtual function - running in slave mode\n");
 		dev->flags |= MLX4_FLAG_SLAVE;
 	} else {
@@ -2413,11 +2296,10 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data)
 		err = mlx4_get_ownership(dev);
 		if (err) {
 			if (err < 0)
-				goto err_free_dev;
+				return err;
 			else {
 				mlx4_warn(dev, "Multiple PFs not yet supported - Skipping PF\n");
-				err = -EINVAL;
-				goto err_free_dev;
+				return -EINVAL;
 			}
 		}
 
@@ -2429,7 +2311,8 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data)
 				GFP_KERNEL);
 			if (NULL == dev->dev_vfs) {
 				mlx4_err(dev, "Failed to allocate memory for VFs\n");
-				err = 0;
+				err = -ENOMEM;
+				goto err_free_own;
 			} else {
 				atomic_inc(&pf_loading);
 				existing_vfs = pci_num_vf(pdev);
@@ -2445,13 +2328,11 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data)
 					mlx4_err(dev, "Failed to enable SR-IOV, continuing without SR-IOV (err = %d)\n",
 						 err);
 					atomic_dec(&pf_loading);
-					err = 0;
 				} else {
 					mlx4_warn(dev, "Running in master mode\n");
 					dev->flags |= MLX4_FLAG_SRIOV |
 						MLX4_FLAG_MASTER;
 					dev->num_vfs = total_vfs;
-					sriov_initialized = 1;
 				}
 			}
 		}
@@ -2467,7 +2348,7 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data)
 		err = mlx4_reset(dev);
 		if (err) {
 			mlx4_err(dev, "Failed to reset HCA, aborting\n");
-			goto err_rel_own;
+			goto err_sriov;
 		}
 	}
 
@@ -2517,34 +2398,46 @@ slave_start:
 	/* In master functions, the communication channel must be initialized
 	 * after obtaining its address from fw */
 	if (mlx4_is_master(dev)) {
-		unsigned sum = 0;
-		err = mlx4_multi_func_init(dev);
-		if (err) {
-			mlx4_err(dev, "Failed to init master mfunc interface, aborting\n");
+		int ib_ports = 0;
+
+		mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
+			ib_ports++;
+
+		if (ib_ports &&
+		    (num_vfs_argc > 1 || probe_vfs_argc > 1)) {
+			mlx4_err(dev,
+				 "Invalid syntax of num_vfs/probe_vfs with IB port - single port VFs syntax is only supported when all ports are configured as ethernet\n");
+			err = -EINVAL;
+			goto err_close;
+		}
+		if (dev->caps.num_ports < 2 &&
+		    num_vfs_argc > 1) {
+			err = -EINVAL;
+			mlx4_err(dev,
+				 "Error: Trying to configure VFs on port 2, but HCA has only %d physical ports\n",
+				 dev->caps.num_ports);
 			goto err_close;
 		}
-		if (sriov_initialized) {
-			int ib_ports = 0;
-			mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
-				ib_ports++;
+		memcpy(dev->nvfs, nvfs, sizeof(dev->nvfs));
 
-			if (ib_ports &&
-			    (num_vfs_argc > 1 || probe_vfs_argc > 1)) {
-				mlx4_err(dev,
-					 "Invalid syntax of num_vfs/probe_vfs with IB port - single port VFs syntax is only supported when all ports are configured as ethernet\n");
-				err = -EINVAL;
-				goto err_master_mfunc;
-			}
-			for (i = 0; i < sizeof(nvfs)/sizeof(nvfs[0]); i++) {
-				unsigned j;
-				for (j = 0; j < nvfs[i]; ++sum, ++j) {
-					dev->dev_vfs[sum].min_port =
-						i < 2 ? i + 1 : 1;
-					dev->dev_vfs[sum].n_ports = i < 2 ? 1 :
-						dev->caps.num_ports;
-				}
+		for (i = 0; i < sizeof(dev->nvfs)/sizeof(dev->nvfs[0]); i++) {
+			unsigned j;
+
+			for (j = 0; j < dev->nvfs[i]; ++sum, ++j) {
+				dev->dev_vfs[sum].min_port = i < 2 ? i + 1 : 1;
+				dev->dev_vfs[sum].n_ports = i < 2 ? 1 :
+					dev->caps.num_ports;
 			}
 		}
+
+		/* In master functions, the communication channel
+		 * must be initialized after obtaining its address from fw
+		 */
+		err = mlx4_multi_func_init(dev);
+		if (err) {
+			mlx4_err(dev, "Failed to init master mfunc interface, aborting.\n");
+			goto err_close;
+		}
 	}
 
 	err = mlx4_alloc_eq_table(dev);
@@ -2565,7 +2458,7 @@ slave_start:
 	if (!mlx4_is_slave(dev)) {
 		err = mlx4_init_steering(dev);
 		if (err)
-			goto err_free_eq;
+			goto err_disable_msix;
 	}
 
 	err = mlx4_setup_hca(dev);
@@ -2625,6 +2518,10 @@ err_steer:
 	if (!mlx4_is_slave(dev))
 		mlx4_clear_steering(dev);
 
+err_disable_msix:
+	if (dev->flags & MLX4_FLAG_MSI_X)
+		pci_disable_msix(pdev);
+
 err_free_eq:
 	mlx4_free_eq_table(dev);
 
@@ -2641,9 +2538,6 @@ err_master_mfunc:
 	}
 
 err_close:
-	if (dev->flags & MLX4_FLAG_MSI_X)
-		pci_disable_msix(pdev);
-
 	mlx4_close_hca(dev);
 
 err_mfunc:
@@ -2657,17 +2551,151 @@ err_sriov:
 	if (dev->flags & MLX4_FLAG_SRIOV && !existing_vfs)
 		pci_disable_sriov(pdev);
 
-err_rel_own:
-	if (!mlx4_is_slave(dev))
-		mlx4_free_ownership(dev);
-
 	if (mlx4_is_master(dev) && dev->num_vfs)
 		atomic_dec(&pf_loading);
 
 	kfree(priv->dev.dev_vfs);
 
-err_free_dev:
-	kfree(priv);
+err_free_own:
+	if (!mlx4_is_slave(dev))
+		mlx4_free_ownership(dev);
+
+	return err;
+}
+
+static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data,
+			   struct mlx4_priv *priv)
+{
+	int err;
+	int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0};
+	int prb_vf[MLX4_MAX_PORTS + 1] = {0, 0, 0};
+	const int param_map[MLX4_MAX_PORTS + 1][MLX4_MAX_PORTS + 1] = {
+		{2, 0, 0}, {0, 1, 2}, {0, 1, 2} };
+	unsigned total_vfs = 0;
+	unsigned int i;
+
+	pr_info(DRV_NAME ": Initializing %s\n", pci_name(pdev));
+
+	err = pci_enable_device(pdev);
+	if (err) {
+		dev_err(&pdev->dev, "Cannot enable PCI device, aborting\n");
+		return err;
+	}
+
+	/* Due to requirement that all VFs and the PF are *guaranteed* 2 MACS
+	 * per port, we must limit the number of VFs to 63 (since their are
+	 * 128 MACs)
+	 */
+	for (i = 0; i < sizeof(nvfs)/sizeof(nvfs[0]) && i < num_vfs_argc;
+	     total_vfs += nvfs[param_map[num_vfs_argc - 1][i]], i++) {
+		nvfs[param_map[num_vfs_argc - 1][i]] = num_vfs[i];
+		if (nvfs[i] < 0) {
+			dev_err(&pdev->dev, "num_vfs module parameter cannot be negative\n");
+			err = -EINVAL;
+			goto err_disable_pdev;
+		}
+	}
+	for (i = 0; i < sizeof(prb_vf)/sizeof(prb_vf[0]) && i < probe_vfs_argc;
+	     i++) {
+		prb_vf[param_map[probe_vfs_argc - 1][i]] = probe_vf[i];
+		if (prb_vf[i] < 0 || prb_vf[i] > nvfs[i]) {
+			dev_err(&pdev->dev, "probe_vf module parameter cannot be negative or greater than num_vfs\n");
+			err = -EINVAL;
+			goto err_disable_pdev;
+		}
+	}
+	if (total_vfs >= MLX4_MAX_NUM_VF) {
+		dev_err(&pdev->dev,
+			"Requested more VF's (%d) than allowed (%d)\n",
+			total_vfs, MLX4_MAX_NUM_VF - 1);
+		err = -EINVAL;
+		goto err_disable_pdev;
+	}
+
+	for (i = 0; i < MLX4_MAX_PORTS; i++) {
+		if (nvfs[i] + nvfs[2] >= MLX4_MAX_NUM_VF_P_PORT) {
+			dev_err(&pdev->dev,
+				"Requested more VF's (%d) for port (%d) than allowed (%d)\n",
+				nvfs[i] + nvfs[2], i + 1,
+				MLX4_MAX_NUM_VF_P_PORT - 1);
+			err = -EINVAL;
+			goto err_disable_pdev;
+		}
+	}
+
+	/* Check for BARs. */
+	if (!(pci_dev_data & MLX4_PCI_DEV_IS_VF) &&
+	    !(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
+		dev_err(&pdev->dev, "Missing DCS, aborting (driver_data: 0x%x, pci_resource_flags(pdev, 0):0x%lx)\n",
+			pci_dev_data, pci_resource_flags(pdev, 0));
+		err = -ENODEV;
+		goto err_disable_pdev;
+	}
+	if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM)) {
+		dev_err(&pdev->dev, "Missing UAR, aborting\n");
+		err = -ENODEV;
+		goto err_disable_pdev;
+	}
+
+	err = pci_request_regions(pdev, DRV_NAME);
+	if (err) {
+		dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n");
+		goto err_disable_pdev;
+	}
+
+	pci_set_master(pdev);
+
+	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+	if (err) {
+		dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask\n");
+		err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+		if (err) {
+			dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting\n");
+			goto err_release_regions;
+		}
+	}
+	err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+	if (err) {
+		dev_warn(&pdev->dev, "Warning: couldn't set 64-bit consistent PCI DMA mask\n");
+		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+		if (err) {
+			dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, aborting\n");
+			goto err_release_regions;
+		}
+	}
+
+	/* Allow large DMA segments, up to the firmware limit of 1 GB */
+	dma_set_max_seg_size(&pdev->dev, 1024 * 1024 * 1024);
+	/* Detect if this device is a virtual function */
+	if (pci_dev_data & MLX4_PCI_DEV_IS_VF) {
+		/* When acting as pf, we normally skip vfs unless explicitly
+		 * requested to probe them.
+		 */
+		if (total_vfs) {
+			unsigned vfs_offset = 0;
+
+			for (i = 0; i < sizeof(nvfs)/sizeof(nvfs[0]) &&
+			     vfs_offset + nvfs[i] < extended_func_num(pdev);
+			     vfs_offset += nvfs[i], i++)
+				;
+			if (i == sizeof(nvfs)/sizeof(nvfs[0])) {
+				err = -ENODEV;
+				goto err_release_regions;
+			}
+			if ((extended_func_num(pdev) - vfs_offset)
+			    > prb_vf[i]) {
+				dev_warn(&pdev->dev, "Skipping virtual function:%d\n",
+					 extended_func_num(pdev));
+				err = -ENODEV;
+				goto err_release_regions;
+			}
+		}
+	}
+
+	err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv);
+	if (err)
+		goto err_release_regions;
+	return 0;
 
 err_release_regions:
 	pci_release_regions(pdev);
@@ -2682,6 +2710,7 @@ static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 {
 	struct mlx4_priv *priv;
 	struct mlx4_dev *dev;
+	int ret;
 
 	printk_once(KERN_INFO "%s", mlx4_version);
 
@@ -2690,13 +2719,18 @@ static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 		return -ENOMEM;
 
 	dev       = &priv->dev;
+	dev->pdev = pdev;
 	pci_set_drvdata(pdev, dev);
 	priv->pci_dev_data = id->driver_data;
 
-	return __mlx4_init_one(pdev, id->driver_data);
+	ret =  __mlx4_init_one(pdev, id->driver_data, priv);
+	if (ret)
+		kfree(priv);
+
+	return ret;
 }
 
-static void __mlx4_remove_one(struct pci_dev *pdev)
+static void mlx4_unload_one(struct pci_dev *pdev)
 {
 	struct mlx4_dev  *dev  = pci_get_drvdata(pdev);
 	struct mlx4_priv *priv = mlx4_priv(dev);
@@ -2775,8 +2809,6 @@ static void __mlx4_remove_one(struct pci_dev *pdev)
 	kfree(dev->caps.qp1_proxy);
 	kfree(dev->dev_vfs);
 
-	pci_release_regions(pdev);
-	pci_disable_device(pdev);
 	memset(priv, 0, sizeof(*priv));
 	priv->pci_dev_data = pci_dev_data;
 	priv->removed = 1;
@@ -2787,7 +2819,9 @@ static void mlx4_remove_one(struct pci_dev *pdev)
 	struct mlx4_dev  *dev  = pci_get_drvdata(pdev);
 	struct mlx4_priv *priv = mlx4_priv(dev);
 
-	__mlx4_remove_one(pdev);
+	mlx4_unload_one(pdev);
+	pci_release_regions(pdev);
+	pci_disable_device(pdev);
 	kfree(priv);
 	pci_set_drvdata(pdev, NULL);
 }
@@ -2796,11 +2830,22 @@ int mlx4_restart_one(struct pci_dev *pdev)
 {
 	struct mlx4_dev	 *dev  = pci_get_drvdata(pdev);
 	struct mlx4_priv *priv = mlx4_priv(dev);
-	int		  pci_dev_data;
+	int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0};
+	int pci_dev_data, err, total_vfs;
 
 	pci_dev_data = priv->pci_dev_data;
-	__mlx4_remove_one(pdev);
-	return __mlx4_init_one(pdev, pci_dev_data);
+	total_vfs = dev->num_vfs;
+	memcpy(nvfs, dev->nvfs, sizeof(dev->nvfs));
+
+	mlx4_unload_one(pdev);
+	err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv);
+	if (err) {
+		mlx4_err(dev, "%s: ERROR: mlx4_load_one failed, pci_name=%s, err=%d\n",
+			 __func__, pci_name(pdev), err);
+		return err;
+	}
+
+	return err;
 }
 
 static const struct pci_device_id mlx4_pci_table[] = {
@@ -2854,7 +2899,7 @@ MODULE_DEVICE_TABLE(pci, mlx4_pci_table);
 static pci_ers_result_t mlx4_pci_err_detected(struct pci_dev *pdev,
 					      pci_channel_state_t state)
 {
-	__mlx4_remove_one(pdev);
+	mlx4_unload_one(pdev);
 
 	return state == pci_channel_io_perm_failure ?
 		PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
@@ -2866,7 +2911,7 @@ static pci_ers_result_t mlx4_pci_slot_reset(struct pci_dev *pdev)
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	int               ret;
 
-	ret = __mlx4_init_one(pdev, priv->pci_dev_data);
+	ret = __mlx4_init_one(pdev, priv->pci_dev_data, priv);
 
 	return ret ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
 }
@@ -2880,7 +2925,7 @@ static struct pci_driver mlx4_driver = {
 	.name		= DRV_NAME,
 	.id_table	= mlx4_pci_table,
 	.probe		= mlx4_init_one,
-	.shutdown	= __mlx4_remove_one,
+	.shutdown	= mlx4_unload_one,
 	.remove		= mlx4_remove_one,
 	.err_handler    = &mlx4_err_handler,
 };
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 03b5608a4329..b2f8ab9a57c4 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -707,6 +707,7 @@ struct mlx4_dev {
 	u64			regid_promisc_array[MLX4_MAX_PORTS + 1];
 	u64			regid_allmulti_array[MLX4_MAX_PORTS + 1];
 	struct mlx4_vf_dev     *dev_vfs;
+	int                     nvfs[MLX4_MAX_PORTS + 1];
 };
 
 struct mlx4_eqe {
-- 
cgit v1.2.3


From c611529e7cd3465ec0eada0f44200e8420c38908 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Fri, 26 Sep 2014 19:20:08 -0400
Subject: sd: Honor block layer integrity handling flags

A set of flags introduced in the block layer enable better control over
how protection information is handled. These flags are useful for both
error injection and data recovery purposes. Checking can be enabled and
disabled for controller and disk, and the guard tag format is now a
per-I/O property.

Update sd_protect_op to communicate the relevant information to the
low-level device driver via a set of flags in scsi_cmnd.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Acked-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/scsi/sd.c        | 73 ++++++++++++++++++++++++++++--------------------
 drivers/scsi/sd.h        | 66 +++++++++++++++++++++++++++++++++++++++++--
 drivers/scsi/sd_dif.c    | 23 ++++++---------
 include/linux/bio.h      | 33 ++++++++++++++++------
 include/scsi/scsi_cmnd.h | 36 +++++++++++++++++-------
 5 files changed, 166 insertions(+), 65 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 2c2041ca4b70..9f7099f4b537 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -610,29 +610,44 @@ static void scsi_disk_put(struct scsi_disk *sdkp)
 	mutex_unlock(&sd_ref_mutex);
 }
 
-static void sd_prot_op(struct scsi_cmnd *scmd, unsigned int dif)
-{
-	unsigned int prot_op = SCSI_PROT_NORMAL;
-	unsigned int dix = scsi_prot_sg_count(scmd);
-
-	if (scmd->sc_data_direction == DMA_FROM_DEVICE) {
-		if (dif && dix)
-			prot_op = SCSI_PROT_READ_PASS;
-		else if (dif && !dix)
-			prot_op = SCSI_PROT_READ_STRIP;
-		else if (!dif && dix)
-			prot_op = SCSI_PROT_READ_INSERT;
-	} else {
-		if (dif && dix)
-			prot_op = SCSI_PROT_WRITE_PASS;
-		else if (dif && !dix)
-			prot_op = SCSI_PROT_WRITE_INSERT;
-		else if (!dif && dix)
-			prot_op = SCSI_PROT_WRITE_STRIP;
+
+
+static unsigned char sd_setup_protect_cmnd(struct scsi_cmnd *scmd,
+					   unsigned int dix, unsigned int dif)
+{
+	struct bio *bio = scmd->request->bio;
+	unsigned int prot_op = sd_prot_op(rq_data_dir(scmd->request), dix, dif);
+	unsigned int protect = 0;
+
+	if (dix) {				/* DIX Type 0, 1, 2, 3 */
+		if (bio_integrity_flagged(bio, BIP_IP_CHECKSUM))
+			scmd->prot_flags |= SCSI_PROT_IP_CHECKSUM;
+
+		if (bio_integrity_flagged(bio, BIP_CTRL_NOCHECK) == false)
+			scmd->prot_flags |= SCSI_PROT_GUARD_CHECK;
+	}
+
+	if (dif != SD_DIF_TYPE3_PROTECTION) {	/* DIX/DIF Type 0, 1, 2 */
+		scmd->prot_flags |= SCSI_PROT_REF_INCREMENT;
+
+		if (bio_integrity_flagged(bio, BIP_CTRL_NOCHECK) == false)
+			scmd->prot_flags |= SCSI_PROT_REF_CHECK;
+	}
+
+	if (dif) {				/* DIX/DIF Type 1, 2, 3 */
+		scmd->prot_flags |= SCSI_PROT_TRANSFER_PI;
+
+		if (bio_integrity_flagged(bio, BIP_DISK_NOCHECK))
+			protect = 3 << 5;	/* Disable target PI checking */
+		else
+			protect = 1 << 5;	/* Enable target PI checking */
 	}
 
 	scsi_set_prot_op(scmd, prot_op);
 	scsi_set_prot_type(scmd, dif);
+	scmd->prot_flags &= sd_prot_flag_mask(prot_op);
+
+	return protect;
 }
 
 static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode)
@@ -893,7 +908,8 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt)
 	sector_t block = blk_rq_pos(rq);
 	sector_t threshold;
 	unsigned int this_count = blk_rq_sectors(rq);
-	int ret, host_dif;
+	unsigned int dif, dix;
+	int ret;
 	unsigned char protect;
 
 	ret = scsi_init_io(SCpnt, GFP_ATOMIC);
@@ -995,7 +1011,7 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt)
 		SCpnt->cmnd[0] = WRITE_6;
 
 		if (blk_integrity_rq(rq))
-			sd_dif_prepare(rq, block, sdp->sector_size);
+			sd_dif_prepare(SCpnt);
 
 	} else if (rq_data_dir(rq) == READ) {
 		SCpnt->cmnd[0] = READ_6;
@@ -1010,14 +1026,15 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt)
 					"writing" : "reading", this_count,
 					blk_rq_sectors(rq)));
 
-	/* Set RDPROTECT/WRPROTECT if disk is formatted with DIF */
-	host_dif = scsi_host_dif_capable(sdp->host, sdkp->protection_type);
-	if (host_dif)
-		protect = 1 << 5;
+	dix = scsi_prot_sg_count(SCpnt);
+	dif = scsi_host_dif_capable(SCpnt->device->host, sdkp->protection_type);
+
+	if (dif || dix)
+		protect = sd_setup_protect_cmnd(SCpnt, dix, dif);
 	else
 		protect = 0;
 
-	if (host_dif == SD_DIF_TYPE2_PROTECTION) {
+	if (protect && sdkp->protection_type == SD_DIF_TYPE2_PROTECTION) {
 		SCpnt->cmnd = mempool_alloc(sd_cdb_pool, GFP_ATOMIC);
 
 		if (unlikely(SCpnt->cmnd == NULL)) {
@@ -1102,10 +1119,6 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt)
 	}
 	SCpnt->sdb.length = this_count * sdp->sector_size;
 
-	/* If DIF or DIX is enabled, tell HBA how to handle request */
-	if (host_dif || scsi_prot_sg_count(SCpnt))
-		sd_prot_op(SCpnt, host_dif);
-
 	/*
 	 * We shouldn't disconnect in the middle of a sector, so with a dumb
 	 * host adapter, it's safe to assume that we can at least transfer
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index 4c3ab8377fd3..467377884b63 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -166,6 +166,68 @@ enum sd_dif_target_protection_types {
 	SD_DIF_TYPE3_PROTECTION = 0x3,
 };
 
+/*
+ * Look up the DIX operation based on whether the command is read or
+ * write and whether dix and dif are enabled.
+ */
+static inline unsigned int sd_prot_op(bool write, bool dix, bool dif)
+{
+	/* Lookup table: bit 2 (write), bit 1 (dix), bit 0 (dif) */
+	const unsigned int ops[] = {	/* wrt dix dif */
+		SCSI_PROT_NORMAL,	/*  0	0   0  */
+		SCSI_PROT_READ_STRIP,	/*  0	0   1  */
+		SCSI_PROT_READ_INSERT,	/*  0	1   0  */
+		SCSI_PROT_READ_PASS,	/*  0	1   1  */
+		SCSI_PROT_NORMAL,	/*  1	0   0  */
+		SCSI_PROT_WRITE_INSERT, /*  1	0   1  */
+		SCSI_PROT_WRITE_STRIP,	/*  1	1   0  */
+		SCSI_PROT_WRITE_PASS,	/*  1	1   1  */
+	};
+
+	return ops[write << 2 | dix << 1 | dif];
+}
+
+/*
+ * Returns a mask of the protection flags that are valid for a given DIX
+ * operation.
+ */
+static inline unsigned int sd_prot_flag_mask(unsigned int prot_op)
+{
+	const unsigned int flag_mask[] = {
+		[SCSI_PROT_NORMAL]		= 0,
+
+		[SCSI_PROT_READ_STRIP]		= SCSI_PROT_TRANSFER_PI |
+						  SCSI_PROT_GUARD_CHECK |
+						  SCSI_PROT_REF_CHECK |
+						  SCSI_PROT_REF_INCREMENT,
+
+		[SCSI_PROT_READ_INSERT]		= SCSI_PROT_REF_INCREMENT |
+						  SCSI_PROT_IP_CHECKSUM,
+
+		[SCSI_PROT_READ_PASS]		= SCSI_PROT_TRANSFER_PI |
+						  SCSI_PROT_GUARD_CHECK |
+						  SCSI_PROT_REF_CHECK |
+						  SCSI_PROT_REF_INCREMENT |
+						  SCSI_PROT_IP_CHECKSUM,
+
+		[SCSI_PROT_WRITE_INSERT]	= SCSI_PROT_TRANSFER_PI |
+						  SCSI_PROT_REF_INCREMENT,
+
+		[SCSI_PROT_WRITE_STRIP]		= SCSI_PROT_GUARD_CHECK |
+						  SCSI_PROT_REF_CHECK |
+						  SCSI_PROT_REF_INCREMENT |
+						  SCSI_PROT_IP_CHECKSUM,
+
+		[SCSI_PROT_WRITE_PASS]		= SCSI_PROT_TRANSFER_PI |
+						  SCSI_PROT_GUARD_CHECK |
+						  SCSI_PROT_REF_CHECK |
+						  SCSI_PROT_REF_INCREMENT |
+						  SCSI_PROT_IP_CHECKSUM,
+	};
+
+	return flag_mask[prot_op];
+}
+
 /*
  * Data Integrity Field tuple.
  */
@@ -178,7 +240,7 @@ struct sd_dif_tuple {
 #ifdef CONFIG_BLK_DEV_INTEGRITY
 
 extern void sd_dif_config_host(struct scsi_disk *);
-extern void sd_dif_prepare(struct request *rq, sector_t, unsigned int);
+extern void sd_dif_prepare(struct scsi_cmnd *scmd);
 extern void sd_dif_complete(struct scsi_cmnd *, unsigned int);
 
 #else /* CONFIG_BLK_DEV_INTEGRITY */
@@ -186,7 +248,7 @@ extern void sd_dif_complete(struct scsi_cmnd *, unsigned int);
 static inline void sd_dif_config_host(struct scsi_disk *disk)
 {
 }
-static inline int sd_dif_prepare(struct request *rq, sector_t s, unsigned int a)
+static inline int sd_dif_prepare(struct scsi_cmnd *scmd)
 {
 	return 0;
 }
diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c
index b7eaeadc18f9..14c7d42a11c2 100644
--- a/drivers/scsi/sd_dif.c
+++ b/drivers/scsi/sd_dif.c
@@ -106,8 +106,7 @@ void sd_dif_config_host(struct scsi_disk *sdkp)
  *
  * Type 3 does not have a reference tag so no remapping is required.
  */
-void sd_dif_prepare(struct request *rq, sector_t hw_sector,
-		    unsigned int sector_sz)
+void sd_dif_prepare(struct scsi_cmnd *scmd)
 {
 	const int tuple_sz = sizeof(struct t10_pi_tuple);
 	struct bio *bio;
@@ -115,14 +114,14 @@ void sd_dif_prepare(struct request *rq, sector_t hw_sector,
 	struct t10_pi_tuple *pi;
 	u32 phys, virt;
 
-	sdkp = rq->bio->bi_bdev->bd_disk->private_data;
+	sdkp = scsi_disk(scmd->request->rq_disk);
 
 	if (sdkp->protection_type == SD_DIF_TYPE3_PROTECTION)
 		return;
 
-	phys = hw_sector & 0xffffffff;
+	phys = scsi_prot_ref_tag(scmd);
 
-	__rq_for_each_bio(bio, rq) {
+	__rq_for_each_bio(bio, scmd->request) {
 		struct bio_integrity_payload *bip = bio_integrity(bio);
 		struct bio_vec iv;
 		struct bvec_iter iter;
@@ -163,7 +162,7 @@ void sd_dif_complete(struct scsi_cmnd *scmd, unsigned int good_bytes)
 	struct scsi_disk *sdkp;
 	struct bio *bio;
 	struct t10_pi_tuple *pi;
-	unsigned int j, sectors, sector_sz;
+	unsigned int j, intervals;
 	u32 phys, virt;
 
 	sdkp = scsi_disk(scmd->request->rq_disk);
@@ -171,12 +170,8 @@ void sd_dif_complete(struct scsi_cmnd *scmd, unsigned int good_bytes)
 	if (sdkp->protection_type == SD_DIF_TYPE3_PROTECTION || good_bytes == 0)
 		return;
 
-	sector_sz = scmd->device->sector_size;
-	sectors = good_bytes / sector_sz;
-
-	phys = blk_rq_pos(scmd->request) & 0xffffffff;
-	if (sector_sz == 4096)
-		phys >>= 3;
+	intervals = good_bytes / scsi_prot_interval(scmd);
+	phys = scsi_prot_ref_tag(scmd);
 
 	__rq_for_each_bio(bio, scmd->request) {
 		struct bio_integrity_payload *bip = bio_integrity(bio);
@@ -190,7 +185,7 @@ void sd_dif_complete(struct scsi_cmnd *scmd, unsigned int good_bytes)
 
 			for (j = 0; j < iv.bv_len; j += tuple_sz, pi++) {
 
-				if (sectors == 0) {
+				if (intervals == 0) {
 					kunmap_atomic(pi);
 					return;
 				}
@@ -200,7 +195,7 @@ void sd_dif_complete(struct scsi_cmnd *scmd, unsigned int good_bytes)
 
 				virt++;
 				phys++;
-				sectors--;
+				intervals--;
 			}
 
 			kunmap_atomic(pi);
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 14bff3fe56d4..ce6b75964b71 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -292,6 +292,14 @@ static inline unsigned bio_segments(struct bio *bio)
  */
 #define bio_get(bio)	atomic_inc(&(bio)->bi_cnt)
 
+enum bip_flags {
+	BIP_BLOCK_INTEGRITY	= 1 << 0, /* block layer owns integrity data */
+	BIP_MAPPED_INTEGRITY	= 1 << 1, /* ref tag has been remapped */
+	BIP_CTRL_NOCHECK	= 1 << 2, /* disable HBA integrity checking */
+	BIP_DISK_NOCHECK	= 1 << 3, /* disable disk integrity checking */
+	BIP_IP_CHECKSUM		= 1 << 4, /* IP checksum */
+};
+
 #if defined(CONFIG_BLK_DEV_INTEGRITY)
 
 static inline struct bio_integrity_payload *bio_integrity(struct bio *bio)
@@ -323,13 +331,15 @@ struct bio_integrity_payload {
 	struct bio_vec		bip_inline_vecs[0];/* embedded bvec array */
 };
 
-enum bip_flags {
-	BIP_BLOCK_INTEGRITY	= 1 << 0, /* block layer owns integrity data */
-	BIP_MAPPED_INTEGRITY	= 1 << 1, /* ref tag has been remapped */
-	BIP_CTRL_NOCHECK	= 1 << 2, /* disable HBA integrity checking */
-	BIP_DISK_NOCHECK	= 1 << 3, /* disable disk integrity checking */
-	BIP_IP_CHECKSUM		= 1 << 4, /* IP checksum */
-};
+static inline bool bio_integrity_flagged(struct bio *bio, enum bip_flags flag)
+{
+	struct bio_integrity_payload *bip = bio_integrity(bio);
+
+	if (bip)
+		return bip->bip_flags & flag;
+
+	return false;
+}
 
 static inline sector_t bip_get_seed(struct bio_integrity_payload *bip)
 {
@@ -701,9 +711,9 @@ extern void bio_integrity_init(void);
 
 #else /* CONFIG_BLK_DEV_INTEGRITY */
 
-static inline int bio_integrity(struct bio *bio)
+static inline void *bio_integrity(struct bio *bio)
 {
-	return 0;
+	return NULL;
 }
 
 static inline bool bio_integrity_enabled(struct bio *bio)
@@ -754,6 +764,11 @@ static inline void bio_integrity_init(void)
 	return;
 }
 
+static inline bool bio_integrity_flagged(struct bio *bio, enum bip_flags flag)
+{
+	return false;
+}
+
 #endif /* CONFIG_BLK_DEV_INTEGRITY */
 
 #endif /* CONFIG_BLOCK */
diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index 73f349044941..522a5f27f553 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h
@@ -10,9 +10,10 @@
 #include <scsi/scsi_device.h>
 
 struct Scsi_Host;
-struct scsi_device;
 struct scsi_driver;
 
+#include <scsi/scsi_device.h>
+
 /*
  * MAX_COMMAND_SIZE is:
  * The longest fixed-length SCSI CDB as per the SCSI standard.
@@ -81,6 +82,7 @@ struct scsi_cmnd {
 
 	unsigned char prot_op;
 	unsigned char prot_type;
+	unsigned char prot_flags;
 
 	unsigned short cmd_len;
 	enum dma_data_direction sc_data_direction;
@@ -252,6 +254,14 @@ static inline unsigned char scsi_get_prot_op(struct scsi_cmnd *scmd)
 	return scmd->prot_op;
 }
 
+enum scsi_prot_flags {
+	SCSI_PROT_TRANSFER_PI		= 1 << 0,
+	SCSI_PROT_GUARD_CHECK		= 1 << 1,
+	SCSI_PROT_REF_CHECK		= 1 << 2,
+	SCSI_PROT_REF_INCREMENT		= 1 << 3,
+	SCSI_PROT_IP_CHECKSUM		= 1 << 4,
+};
+
 /*
  * The controller usually does not know anything about the target it
  * is communicating with.  However, when DIX is enabled the controller
@@ -280,6 +290,17 @@ static inline sector_t scsi_get_lba(struct scsi_cmnd *scmd)
 	return blk_rq_pos(scmd->request);
 }
 
+static inline unsigned int scsi_prot_interval(struct scsi_cmnd *scmd)
+{
+	return scmd->device->sector_size;
+}
+
+static inline u32 scsi_prot_ref_tag(struct scsi_cmnd *scmd)
+{
+	return blk_rq_pos(scmd->request) >>
+		(ilog2(scsi_prot_interval(scmd)) - 9) & 0xffffffff;
+}
+
 static inline unsigned scsi_prot_sg_count(struct scsi_cmnd *cmd)
 {
 	return cmd->prot_sdb ? cmd->prot_sdb->table.nents : 0;
@@ -316,17 +337,12 @@ static inline void set_driver_byte(struct scsi_cmnd *cmd, char status)
 static inline unsigned scsi_transfer_length(struct scsi_cmnd *scmd)
 {
 	unsigned int xfer_len = scsi_out(scmd)->length;
-	unsigned int prot_op = scsi_get_prot_op(scmd);
-	unsigned int sector_size = scmd->device->sector_size;
+	unsigned int prot_interval = scsi_prot_interval(scmd);
 
-	switch (prot_op) {
-	case SCSI_PROT_NORMAL:
-	case SCSI_PROT_WRITE_STRIP:
-	case SCSI_PROT_READ_INSERT:
-		return xfer_len;
-	}
+	if (scmd->prot_flags & SCSI_PROT_TRANSFER_PI)
+		xfer_len += (xfer_len >> ilog2(prot_interval)) * 8;
 
-	return xfer_len + (xfer_len >> ilog2(sector_size)) * 8;
+	return xfer_len;
 }
 
 #endif /* _SCSI_SCSI_CMND_H */
-- 
cgit v1.2.3


From 0b0b0893d49b34201a6c4416b1a707b580b91e3d Mon Sep 17 00:00:00 2001
From: Liviu Dudau <Liviu.Dudau@arm.com>
Date: Mon, 29 Sep 2014 15:29:25 +0100
Subject: of/pci: Fix the conversion of IO ranges into IO resources

The ranges property for a host bridge controller in DT describes the
mapping between the PCI bus address and the CPU physical address.  The
resources framework however expects that the IO resources start at a pseudo
"port" address 0 (zero) and have a maximum size of IO_SPACE_LIMIT.  The
conversion from PCI ranges to resources failed to take that into account,
returning a CPU physical address instead of a port number.

Also fix all the drivers that depend on the old behaviour by fetching the
CPU physical address based on the port number where it is being needed.

Signed-off-by: Liviu Dudau <Liviu.Dudau@arm.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
CC: Grant Likely <grant.likely@linaro.org>
CC: Rob Herring <robh+dt@kernel.org>
CC: Arnd Bergmann <arnd@arndb.de>
CC: Thierry Reding <thierry.reding@gmail.com>
CC: Simon Horman <horms@verge.net.au>
CC: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm/mach-integrator/pci_v3.c | 23 ++++++++++----------
 drivers/of/address.c              | 44 +++++++++++++++++++++++++++++++++++----
 drivers/pci/host/pci-tegra.c      | 10 ++++++---
 drivers/pci/host/pcie-rcar.c      | 21 +++++++++++++------
 include/linux/of_address.h        | 12 +++++------
 5 files changed, 80 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-integrator/pci_v3.c b/arch/arm/mach-integrator/pci_v3.c
index 05e1f73a1e8d..c186a17c2cff 100644
--- a/arch/arm/mach-integrator/pci_v3.c
+++ b/arch/arm/mach-integrator/pci_v3.c
@@ -660,6 +660,7 @@ static void __init pci_v3_preinit(void)
 {
 	unsigned long flags;
 	unsigned int temp;
+	phys_addr_t io_address = pci_pio_to_address(io_mem.start);
 
 	pcibios_min_mem = 0x00100000;
 
@@ -701,7 +702,7 @@ static void __init pci_v3_preinit(void)
 	/*
 	 * Setup window 2 - PCI IO
 	 */
-	v3_writel(V3_LB_BASE2, v3_addr_to_lb_base2(io_mem.start) |
+	v3_writel(V3_LB_BASE2, v3_addr_to_lb_base2(io_address) |
 			V3_LB_BASE_ENABLE);
 	v3_writew(V3_LB_MAP2, v3_addr_to_lb_map2(0));
 
@@ -742,6 +743,7 @@ static void __init pci_v3_preinit(void)
 static void __init pci_v3_postinit(void)
 {
 	unsigned int pci_cmd;
+	phys_addr_t io_address = pci_pio_to_address(io_mem.start);
 
 	pci_cmd = PCI_COMMAND_MEMORY |
 		  PCI_COMMAND_MASTER | PCI_COMMAND_INVALIDATE;
@@ -758,7 +760,7 @@ static void __init pci_v3_postinit(void)
 		       "interrupt: %d\n", ret);
 #endif
 
-	register_isa_ports(non_mem.start, io_mem.start, 0);
+	register_isa_ports(non_mem.start, io_address, 0);
 }
 
 /*
@@ -867,33 +869,32 @@ static int __init pci_v3_probe(struct platform_device *pdev)
 
 	for_each_of_pci_range(&parser, &range) {
 		if (!range.flags) {
-			of_pci_range_to_resource(&range, np, &conf_mem);
+			ret = of_pci_range_to_resource(&range, np, &conf_mem);
 			conf_mem.name = "PCIv3 config";
 		}
 		if (range.flags & IORESOURCE_IO) {
-			of_pci_range_to_resource(&range, np, &io_mem);
+			ret = of_pci_range_to_resource(&range, np, &io_mem);
 			io_mem.name = "PCIv3 I/O";
 		}
 		if ((range.flags & IORESOURCE_MEM) &&
 			!(range.flags & IORESOURCE_PREFETCH)) {
 			non_mem_pci = range.pci_addr;
 			non_mem_pci_sz = range.size;
-			of_pci_range_to_resource(&range, np, &non_mem);
+			ret = of_pci_range_to_resource(&range, np, &non_mem);
 			non_mem.name = "PCIv3 non-prefetched mem";
 		}
 		if ((range.flags & IORESOURCE_MEM) &&
 			(range.flags & IORESOURCE_PREFETCH)) {
 			pre_mem_pci = range.pci_addr;
 			pre_mem_pci_sz = range.size;
-			of_pci_range_to_resource(&range, np, &pre_mem);
+			ret = of_pci_range_to_resource(&range, np, &pre_mem);
 			pre_mem.name = "PCIv3 prefetched mem";
 		}
-	}
 
-	if (!conf_mem.start || !io_mem.start ||
-	    !non_mem.start || !pre_mem.start) {
-		dev_err(&pdev->dev, "missing ranges in device node\n");
-		return -EINVAL;
+		if (ret < 0) {
+			dev_err(&pdev->dev, "missing ranges in device node\n");
+			return ret;
+		}
 	}
 
 	pci_v3.map_irq = of_irq_parse_and_map_pci;
diff --git a/drivers/of/address.c b/drivers/of/address.c
index 327a57410797..afdb78299f61 100644
--- a/drivers/of/address.c
+++ b/drivers/of/address.c
@@ -295,14 +295,50 @@ struct of_pci_range *of_pci_range_parser_one(struct of_pci_range_parser *parser,
 }
 EXPORT_SYMBOL_GPL(of_pci_range_parser_one);
 
-void of_pci_range_to_resource(struct of_pci_range *range,
-			      struct device_node *np, struct resource *res)
+/*
+ * of_pci_range_to_resource - Create a resource from an of_pci_range
+ * @range:	the PCI range that describes the resource
+ * @np:		device node where the range belongs to
+ * @res:	pointer to a valid resource that will be updated to
+ *              reflect the values contained in the range.
+ *
+ * Returns EINVAL if the range cannot be converted to resource.
+ *
+ * Note that if the range is an IO range, the resource will be converted
+ * using pci_address_to_pio() which can fail if it is called too early or
+ * if the range cannot be matched to any host bridge IO space (our case here).
+ * To guard against that we try to register the IO range first.
+ * If that fails we know that pci_address_to_pio() will do too.
+ */
+int of_pci_range_to_resource(struct of_pci_range *range,
+			     struct device_node *np, struct resource *res)
 {
+	int err;
 	res->flags = range->flags;
-	res->start = range->cpu_addr;
-	res->end = range->cpu_addr + range->size - 1;
 	res->parent = res->child = res->sibling = NULL;
 	res->name = np->full_name;
+
+	if (res->flags & IORESOURCE_IO) {
+		unsigned long port;
+		err = pci_register_io_range(range->cpu_addr, range->size);
+		if (err)
+			goto invalid_range;
+		port = pci_address_to_pio(range->cpu_addr);
+		if (port == (unsigned long)-1) {
+			err = -EINVAL;
+			goto invalid_range;
+		}
+		res->start = port;
+	} else {
+		res->start = range->cpu_addr;
+	}
+	res->end = res->start + range->size - 1;
+	return 0;
+
+invalid_range:
+	res->start = (resource_size_t)OF_BAD_ADDR;
+	res->end = (resource_size_t)OF_BAD_ADDR;
+	return err;
 }
 #endif /* CONFIG_PCI */
 
diff --git a/drivers/pci/host/pci-tegra.c b/drivers/pci/host/pci-tegra.c
index 0fb0fdb223d5..946935db62b6 100644
--- a/drivers/pci/host/pci-tegra.c
+++ b/drivers/pci/host/pci-tegra.c
@@ -626,13 +626,14 @@ DECLARE_PCI_FIXUP_FINAL(PCI_ANY_ID, PCI_ANY_ID, tegra_pcie_relax_enable);
 static int tegra_pcie_setup(int nr, struct pci_sys_data *sys)
 {
 	struct tegra_pcie *pcie = sys_to_pcie(sys);
+	phys_addr_t io_start = pci_pio_to_address(pcie->io.start);
 
 	pci_add_resource_offset(&sys->resources, &pcie->mem, sys->mem_offset);
 	pci_add_resource_offset(&sys->resources, &pcie->prefetch,
 				sys->mem_offset);
 	pci_add_resource(&sys->resources, &pcie->busn);
 
-	pci_ioremap_io(nr * SZ_64K, pcie->io.start);
+	pci_ioremap_io(nr * SZ_64K, io_start);
 
 	return 1;
 }
@@ -737,6 +738,7 @@ static irqreturn_t tegra_pcie_isr(int irq, void *arg)
 static void tegra_pcie_setup_translations(struct tegra_pcie *pcie)
 {
 	u32 fpci_bar, size, axi_address;
+	phys_addr_t io_start = pci_pio_to_address(pcie->io.start);
 
 	/* Bar 0: type 1 extended configuration space */
 	fpci_bar = 0xfe100000;
@@ -749,7 +751,7 @@ static void tegra_pcie_setup_translations(struct tegra_pcie *pcie)
 	/* Bar 1: downstream IO bar */
 	fpci_bar = 0xfdfc0000;
 	size = resource_size(&pcie->io);
-	axi_address = pcie->io.start;
+	axi_address = io_start;
 	afi_writel(pcie, axi_address, AFI_AXI_BAR1_START);
 	afi_writel(pcie, size >> 12, AFI_AXI_BAR1_SZ);
 	afi_writel(pcie, fpci_bar, AFI_FPCI_BAR1);
@@ -1520,7 +1522,9 @@ static int tegra_pcie_parse_dt(struct tegra_pcie *pcie)
 	}
 
 	for_each_of_pci_range(&parser, &range) {
-		of_pci_range_to_resource(&range, np, &res);
+		err = of_pci_range_to_resource(&range, np, &res);
+		if (err < 0)
+			return err;
 
 		switch (res.flags & IORESOURCE_TYPE_BITS) {
 		case IORESOURCE_IO:
diff --git a/drivers/pci/host/pcie-rcar.c b/drivers/pci/host/pcie-rcar.c
index 4884ee5e07d4..61158e03ab5f 100644
--- a/drivers/pci/host/pcie-rcar.c
+++ b/drivers/pci/host/pcie-rcar.c
@@ -323,6 +323,7 @@ static void rcar_pcie_setup_window(int win, struct rcar_pcie *pcie)
 
 	/* Setup PCIe address space mappings for each resource */
 	resource_size_t size;
+	resource_size_t res_start;
 	u32 mask;
 
 	rcar_pci_write_reg(pcie, 0x00000000, PCIEPTCTLR(win));
@@ -335,8 +336,13 @@ static void rcar_pcie_setup_window(int win, struct rcar_pcie *pcie)
 	mask = (roundup_pow_of_two(size) / SZ_128) - 1;
 	rcar_pci_write_reg(pcie, mask << 7, PCIEPAMR(win));
 
-	rcar_pci_write_reg(pcie, upper_32_bits(res->start), PCIEPARH(win));
-	rcar_pci_write_reg(pcie, lower_32_bits(res->start), PCIEPARL(win));
+	if (res->flags & IORESOURCE_IO)
+		res_start = pci_pio_to_address(res->start);
+	else
+		res_start = res->start;
+
+	rcar_pci_write_reg(pcie, upper_32_bits(res_start), PCIEPARH(win));
+	rcar_pci_write_reg(pcie, lower_32_bits(res_start), PCIEPARL(win));
 
 	/* First resource is for IO */
 	mask = PAR_ENABLE;
@@ -363,9 +369,10 @@ static int rcar_pcie_setup(int nr, struct pci_sys_data *sys)
 
 		rcar_pcie_setup_window(i, pcie);
 
-		if (res->flags & IORESOURCE_IO)
-			pci_ioremap_io(nr * SZ_64K, res->start);
-		else
+		if (res->flags & IORESOURCE_IO) {
+			phys_addr_t io_start = pci_pio_to_address(res->start);
+			pci_ioremap_io(nr * SZ_64K, io_start);
+		} else
 			pci_add_resource(&sys->resources, res);
 	}
 	pci_add_resource(&sys->resources, &pcie->busn);
@@ -935,8 +942,10 @@ static int rcar_pcie_probe(struct platform_device *pdev)
 	}
 
 	for_each_of_pci_range(&parser, &range) {
-		of_pci_range_to_resource(&range, pdev->dev.of_node,
+		err = of_pci_range_to_resource(&range, pdev->dev.of_node,
 						&pcie->res[win++]);
+		if (err < 0)
+			return err;
 
 		if (win > RCAR_PCI_MAX_RESOURCES)
 			break;
diff --git a/include/linux/of_address.h b/include/linux/of_address.h
index a38e1c846c23..8cb14eb393d6 100644
--- a/include/linux/of_address.h
+++ b/include/linux/of_address.h
@@ -134,9 +134,9 @@ extern const __be32 *of_get_pci_address(struct device_node *dev, int bar_no,
 			       u64 *size, unsigned int *flags);
 extern int of_pci_address_to_resource(struct device_node *dev, int bar,
 				      struct resource *r);
-extern void of_pci_range_to_resource(struct of_pci_range *range,
-				     struct device_node *np,
-				     struct resource *res);
+extern int of_pci_range_to_resource(struct of_pci_range *range,
+				    struct device_node *np,
+				    struct resource *res);
 #else /* CONFIG_OF_ADDRESS && CONFIG_PCI */
 static inline int of_pci_address_to_resource(struct device_node *dev, int bar,
 				             struct resource *r)
@@ -149,9 +149,9 @@ static inline const __be32 *of_get_pci_address(struct device_node *dev,
 {
 	return NULL;
 }
-static inline void of_pci_range_to_resource(struct of_pci_range *range,
-					    struct device_node *np,
-					    struct resource *res)
+static inline int of_pci_range_to_resource(struct of_pci_range *range,
+					   struct device_node *np,
+					   struct resource *res)
 {
 	return -ENOSYS;
 }
-- 
cgit v1.2.3


From 670ba0c8883b576d0aec28bd7a838358a4be1406 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Mon, 29 Sep 2014 15:29:26 +0100
Subject: PCI: Add generic domain handling

The handling of PCI domains (or PCI segments in ACPI speak) is usually a
straightforward affair but its implementation is currently left to the
architectural code, with pci_domain_nr(b) querying the value of the domain
associated with bus b.

This patch introduces CONFIG_PCI_DOMAINS_GENERIC as an option that can be
selected if an architecture wants a simple implementation where the value
of the domain associated with a bus is stored in struct pci_bus.

The architectures that select CONFIG_PCI_DOMAINS_GENERIC will then have to
implement pci_bus_assign_domain_nr() as a way of setting the domain number
associated with a root bus.  All child buses except the root bus will
inherit the domain_nr value from their parent.

Signed-off-by: Catalin Marinas <Catalin.Marinas@arm.com>
[Renamed pci_set_domain_nr() to pci_bus_assign_domain_nr()]
Signed-off-by: Liviu Dudau <Liviu.Dudau@arm.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
CC: Arnd Bergmann <arnd@arndb.de>
---
 drivers/pci/probe.c | 11 ++++++++---
 include/linux/pci.h | 21 +++++++++++++++++++++
 2 files changed, 29 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index e3cf8a2e6292..636d1c9156a9 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -485,7 +485,7 @@ void pci_read_bridge_bases(struct pci_bus *child)
 	}
 }
 
-static struct pci_bus *pci_alloc_bus(void)
+static struct pci_bus *pci_alloc_bus(struct pci_bus *parent)
 {
 	struct pci_bus *b;
 
@@ -500,6 +500,10 @@ static struct pci_bus *pci_alloc_bus(void)
 	INIT_LIST_HEAD(&b->resources);
 	b->max_bus_speed = PCI_SPEED_UNKNOWN;
 	b->cur_bus_speed = PCI_SPEED_UNKNOWN;
+#ifdef CONFIG_PCI_DOMAINS_GENERIC
+	if (parent)
+		b->domain_nr = parent->domain_nr;
+#endif
 	return b;
 }
 
@@ -671,7 +675,7 @@ static struct pci_bus *pci_alloc_child_bus(struct pci_bus *parent,
 	/*
 	 * Allocate a new bus, and inherit stuff from the parent..
 	 */
-	child = pci_alloc_bus();
+	child = pci_alloc_bus(parent);
 	if (!child)
 		return NULL;
 
@@ -1761,13 +1765,14 @@ struct pci_bus *pci_create_root_bus(struct device *parent, int bus,
 	char bus_addr[64];
 	char *fmt;
 
-	b = pci_alloc_bus();
+	b = pci_alloc_bus(NULL);
 	if (!b)
 		return NULL;
 
 	b->sysdata = sysdata;
 	b->ops = ops;
 	b->number = b->busn_res.start = bus;
+	pci_bus_assign_domain_nr(b, parent);
 	b2 = pci_find_bus(pci_domain_nr(b), bus);
 	if (b2) {
 		/* If we already got to this bus through a different bridge, ignore it */
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 61978a460841..a494e5d775c3 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -456,6 +456,9 @@ struct pci_bus {
 	unsigned char	primary;	/* number of primary bridge */
 	unsigned char	max_bus_speed;	/* enum pci_bus_speed */
 	unsigned char	cur_bus_speed;	/* enum pci_bus_speed */
+#ifdef CONFIG_PCI_DOMAINS_GENERIC
+	int		domain_nr;
+#endif
 
 	char		name[48];
 
@@ -1288,6 +1291,24 @@ static inline int pci_domain_nr(struct pci_bus *bus) { return 0; }
 static inline int pci_proc_domain(struct pci_bus *bus) { return 0; }
 #endif /* CONFIG_PCI_DOMAINS */
 
+/*
+ * Generic implementation for PCI domain support. If your
+ * architecture does not need custom management of PCI
+ * domains then this implementation will be used
+ */
+#ifdef CONFIG_PCI_DOMAINS_GENERIC
+static inline int pci_domain_nr(struct pci_bus *bus)
+{
+	return bus->domain_nr;
+}
+void pci_bus_assign_domain_nr(struct pci_bus *bus, struct device *parent);
+#else
+static inline void pci_bus_assign_domain_nr(struct pci_bus *bus,
+					struct device *parent)
+{
+}
+#endif
+
 /* some architectures require additional setup to direct VGA traffic */
 typedef int (*arch_set_vga_state_t)(struct pci_dev *pdev, bool decode,
 		      unsigned int command_bits, u32 flags);
-- 
cgit v1.2.3


From 41e5c0f81d3e676d671d96a0a1fafb27abfbd9d7 Mon Sep 17 00:00:00 2001
From: Liviu Dudau <Liviu.Dudau@arm.com>
Date: Mon, 29 Sep 2014 15:29:27 +0100
Subject: of/pci: Add pci_get_new_domain_nr() and of_get_pci_domain_nr()

Add pci_get_new_domain_nr() to allocate a new domain number and
of_get_pci_domain_nr() to retrieve the PCI domain number of a given device
from DT.  Host bridge drivers or architecture-specific code can choose to
implement their PCI domain number policy using these two functions.

Using of_get_pci_domain_nr() guarantees a stable PCI domain number on every
boot provided that all host bridge controllers are assigned a number in the
device tree using "linux,pci-domain" property.  Mixing use of
pci_get_new_domain_nr() and of_get_pci_domain_nr() is not recommended as it
can lead to potentially conflicting domain numbers being assigned to root
buses behind different host bridges.

Signed-off-by: Liviu Dudau <Liviu.Dudau@arm.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
CC: Arnd Bergmann <arnd@arndb.de>
CC: Grant Likely <grant.likely@linaro.org>
CC: Rob Herring <robh+dt@kernel.org>
CC: Catalin Marinas <catalin.marinas@arm.com>
---
 drivers/of/of_pci.c    | 25 +++++++++++++++++++++++++
 drivers/pci/pci.c      |  9 +++++++++
 include/linux/of_pci.h |  7 +++++++
 include/linux/pci.h    |  3 +++
 4 files changed, 44 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/of/of_pci.c b/drivers/of/of_pci.c
index 848199633798..82d172fa145a 100644
--- a/drivers/of/of_pci.c
+++ b/drivers/of/of_pci.c
@@ -89,6 +89,31 @@ int of_pci_parse_bus_range(struct device_node *node, struct resource *res)
 }
 EXPORT_SYMBOL_GPL(of_pci_parse_bus_range);
 
+/**
+ * This function will try to obtain the host bridge domain number by
+ * finding a property called "linux,pci-domain" of the given device node.
+ *
+ * @node: device tree node with the domain information
+ *
+ * Returns the associated domain number from DT in the range [0-0xffff], or
+ * a negative value if the required property is not found.
+ */
+int of_get_pci_domain_nr(struct device_node *node)
+{
+	const __be32 *value;
+	int len;
+	u16 domain;
+
+	value = of_get_property(node, "linux,pci-domain", &len);
+	if (!value || len < sizeof(*value))
+		return -EINVAL;
+
+	domain = (u16)be32_to_cpup(value);
+
+	return domain;
+}
+EXPORT_SYMBOL_GPL(of_get_pci_domain_nr);
+
 #ifdef CONFIG_PCI_MSI
 
 static LIST_HEAD(of_pci_msi_chip_list);
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 2c9ac70254e2..d36f35ff6c5d 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -4406,6 +4406,15 @@ static void pci_no_domains(void)
 #endif
 }
 
+#ifdef CONFIG_PCI_DOMAINS
+static atomic_t __domain_nr = ATOMIC_INIT(-1);
+
+int pci_get_new_domain_nr(void)
+{
+	return atomic_inc_return(&__domain_nr);
+}
+#endif
+
 /**
  * pci_ext_cfg_avail - can we access extended PCI config space?
  *
diff --git a/include/linux/of_pci.h b/include/linux/of_pci.h
index dde3a4a0fa5d..71062e9602f5 100644
--- a/include/linux/of_pci.h
+++ b/include/linux/of_pci.h
@@ -15,6 +15,7 @@ struct device_node *of_pci_find_child_device(struct device_node *parent,
 int of_pci_get_devfn(struct device_node *np);
 int of_irq_parse_and_map_pci(const struct pci_dev *dev, u8 slot, u8 pin);
 int of_pci_parse_bus_range(struct device_node *node, struct resource *res);
+int of_get_pci_domain_nr(struct device_node *node);
 #else
 static inline int of_irq_parse_pci(const struct pci_dev *pdev, struct of_phandle_args *out_irq)
 {
@@ -43,6 +44,12 @@ of_pci_parse_bus_range(struct device_node *node, struct resource *res)
 {
 	return -EINVAL;
 }
+
+static inline int
+of_get_pci_domain_nr(struct device_node *node)
+{
+	return -1;
+}
 #endif
 
 #if defined(CONFIG_OF) && defined(CONFIG_PCI_MSI)
diff --git a/include/linux/pci.h b/include/linux/pci.h
index a494e5d775c3..150da2d644e7 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1285,10 +1285,12 @@ void pci_cfg_access_unlock(struct pci_dev *dev);
  */
 #ifdef CONFIG_PCI_DOMAINS
 extern int pci_domains_supported;
+int pci_get_new_domain_nr(void);
 #else
 enum { pci_domains_supported = 0 };
 static inline int pci_domain_nr(struct pci_bus *bus) { return 0; }
 static inline int pci_proc_domain(struct pci_bus *bus) { return 0; }
+static inline int pci_get_new_domain_nr(void) { return -ENOSYS; }
 #endif /* CONFIG_PCI_DOMAINS */
 
 /*
@@ -1417,6 +1419,7 @@ static inline struct pci_dev *pci_get_bus_and_slot(unsigned int bus,
 
 static inline int pci_domain_nr(struct pci_bus *bus) { return 0; }
 static inline struct pci_dev *pci_dev_get(struct pci_dev *dev) { return NULL; }
+static inline int pci_get_new_domain_nr(void) { return -ENOSYS; }
 
 #define dev_is_pci(d) (false)
 #define dev_is_pf(d) (false)
-- 
cgit v1.2.3


From cbe4097f8ae699ebbdaf8c95ecab38d47e0bd5da Mon Sep 17 00:00:00 2001
From: Liviu Dudau <Liviu.Dudau@arm.com>
Date: Mon, 29 Sep 2014 15:29:28 +0100
Subject: of/pci: Add support for parsing PCI host bridge resources from DT

Provide a function to parse the PCI DT ranges that can be used to create a
pci_host_bridge structure together with its associated bus.

Signed-off-by: Liviu Dudau <Liviu.Dudau@arm.com>
[make io_base parameter optional]
Signed-off-by: Robert Richter <rrichter@cavium.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
CC: Arnd Bergmann <arnd@arndb.de>
CC: Grant Likely <grant.likely@linaro.org>
CC: Rob Herring <robh+dt@kernel.org>
CC: Catalin Marinas <catalin.marinas@arm.com>
---
 drivers/of/of_pci.c    | 117 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/of_pci.h |   6 +++
 2 files changed, 123 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/of/of_pci.c b/drivers/of/of_pci.c
index 82d172fa145a..8882b467be95 100644
--- a/drivers/of/of_pci.c
+++ b/drivers/of/of_pci.c
@@ -1,7 +1,9 @@
 #include <linux/kernel.h>
 #include <linux/export.h>
 #include <linux/of.h>
+#include <linux/of_address.h>
 #include <linux/of_pci.h>
+#include <linux/slab.h>
 
 static inline int __of_pci_pci_compare(struct device_node *node,
 				       unsigned int data)
@@ -114,6 +116,121 @@ int of_get_pci_domain_nr(struct device_node *node)
 }
 EXPORT_SYMBOL_GPL(of_get_pci_domain_nr);
 
+#if defined(CONFIG_OF_ADDRESS)
+/**
+ * of_pci_get_host_bridge_resources - Parse PCI host bridge resources from DT
+ * @dev: device node of the host bridge having the range property
+ * @busno: bus number associated with the bridge root bus
+ * @bus_max: maximum number of buses for this bridge
+ * @resources: list where the range of resources will be added after DT parsing
+ * @io_base: pointer to a variable that will contain on return the physical
+ * address for the start of the I/O range. Can be NULL if the caller doesn't
+ * expect IO ranges to be present in the device tree.
+ *
+ * It is the caller's job to free the @resources list.
+ *
+ * This function will parse the "ranges" property of a PCI host bridge device
+ * node and setup the resource mapping based on its content. It is expected
+ * that the property conforms with the Power ePAPR document.
+ *
+ * It returns zero if the range parsing has been successful or a standard error
+ * value if it failed.
+ */
+int of_pci_get_host_bridge_resources(struct device_node *dev,
+			unsigned char busno, unsigned char bus_max,
+			struct list_head *resources, resource_size_t *io_base)
+{
+	struct resource *res;
+	struct resource *bus_range;
+	struct of_pci_range range;
+	struct of_pci_range_parser parser;
+	char range_type[4];
+	int err;
+
+	if (io_base)
+		*io_base = (resource_size_t)OF_BAD_ADDR;
+
+	bus_range = kzalloc(sizeof(*bus_range), GFP_KERNEL);
+	if (!bus_range)
+		return -ENOMEM;
+
+	pr_info("PCI host bridge %s ranges:\n", dev->full_name);
+
+	err = of_pci_parse_bus_range(dev, bus_range);
+	if (err) {
+		bus_range->start = busno;
+		bus_range->end = bus_max;
+		bus_range->flags = IORESOURCE_BUS;
+		pr_info("  No bus range found for %s, using %pR\n",
+			dev->full_name, bus_range);
+	} else {
+		if (bus_range->end > bus_range->start + bus_max)
+			bus_range->end = bus_range->start + bus_max;
+	}
+	pci_add_resource(resources, bus_range);
+
+	/* Check for ranges property */
+	err = of_pci_range_parser_init(&parser, dev);
+	if (err)
+		goto parse_failed;
+
+	pr_debug("Parsing ranges property...\n");
+	for_each_of_pci_range(&parser, &range) {
+		/* Read next ranges element */
+		if ((range.flags & IORESOURCE_TYPE_BITS) == IORESOURCE_IO)
+			snprintf(range_type, 4, " IO");
+		else if ((range.flags & IORESOURCE_TYPE_BITS) == IORESOURCE_MEM)
+			snprintf(range_type, 4, "MEM");
+		else
+			snprintf(range_type, 4, "err");
+		pr_info("  %s %#010llx..%#010llx -> %#010llx\n", range_type,
+			range.cpu_addr, range.cpu_addr + range.size - 1,
+			range.pci_addr);
+
+		/*
+		 * If we failed translation or got a zero-sized region
+		 * then skip this range
+		 */
+		if (range.cpu_addr == OF_BAD_ADDR || range.size == 0)
+			continue;
+
+		res = kzalloc(sizeof(struct resource), GFP_KERNEL);
+		if (!res) {
+			err = -ENOMEM;
+			goto parse_failed;
+		}
+
+		err = of_pci_range_to_resource(&range, dev, res);
+		if (err)
+			goto conversion_failed;
+
+		if (resource_type(res) == IORESOURCE_IO) {
+			if (!io_base) {
+				pr_err("I/O range found for %s. Please provide an io_base pointer to save CPU base address\n",
+					dev->full_name);
+				err = -EINVAL;
+				goto conversion_failed;
+			}
+			if (*io_base != (resource_size_t)OF_BAD_ADDR)
+				pr_warn("More than one I/O resource converted for %s. CPU base address for old range lost!\n",
+					dev->full_name);
+			*io_base = range.cpu_addr;
+		}
+
+		pci_add_resource_offset(resources, res,	res->start - range.pci_addr);
+	}
+
+	return 0;
+
+conversion_failed:
+	kfree(res);
+parse_failed:
+	pci_free_resource_list(resources);
+	return err;
+}
+EXPORT_SYMBOL_GPL(of_pci_get_host_bridge_resources);
+#endif /* CONFIG_OF_ADDRESS */
+
 #ifdef CONFIG_PCI_MSI
 
 static LIST_HEAD(of_pci_msi_chip_list);
diff --git a/include/linux/of_pci.h b/include/linux/of_pci.h
index 71062e9602f5..1fd207e7a847 100644
--- a/include/linux/of_pci.h
+++ b/include/linux/of_pci.h
@@ -52,6 +52,12 @@ of_get_pci_domain_nr(struct device_node *node)
 }
 #endif
 
+#if defined(CONFIG_OF_ADDRESS)
+int of_pci_get_host_bridge_resources(struct device_node *dev,
+			unsigned char busno, unsigned char bus_max,
+			struct list_head *resources, resource_size_t *io_base);
+#endif
+
 #if defined(CONFIG_OF) && defined(CONFIG_PCI_MSI)
 int of_pci_msi_chip_add(struct msi_chip *chip);
 void of_pci_msi_chip_remove(struct msi_chip *chip);
-- 
cgit v1.2.3


From 8b921acfeffdb0b45085da862fc301a2d25ed2cf Mon Sep 17 00:00:00 2001
From: Liviu Dudau <Liviu.Dudau@arm.com>
Date: Mon, 29 Sep 2014 15:29:30 +0100
Subject: PCI: Add pci_remap_iospace() to map bus I/O resources

Add pci_remap_iospace() to map bus I/O resources into the CPU virtual
address space.  Architectures with special needs may provide their own
version, but most should be able to use this one.

This function is useful for PCI host bridge drivers that need to map the
PCI I/O resources into virtual memory space.

[bhelgaas: phys_addr description, drop temporary "err" variable]
Signed-off-by: Liviu Dudau <Liviu.Dudau@arm.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
CC: Arnd Bergmann <arnd@arndb.de>
---
 drivers/pci/pci.c             | 31 +++++++++++++++++++++++++++++++
 include/asm-generic/pgtable.h |  4 ++++
 include/linux/pci.h           |  3 +++
 3 files changed, 38 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index d36f35ff6c5d..6e994fc077f4 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -2704,6 +2704,37 @@ int pci_request_regions_exclusive(struct pci_dev *pdev, const char *res_name)
 }
 EXPORT_SYMBOL(pci_request_regions_exclusive);
 
+/**
+ *	pci_remap_iospace - Remap the memory mapped I/O space
+ *	@res: Resource describing the I/O space
+ *	@phys_addr: physical address of range to be mapped
+ *
+ *	Remap the memory mapped I/O space described by the @res
+ *	and the CPU physical address @phys_addr into virtual address space.
+ *	Only architectures that have memory mapped IO functions defined
+ *	(and the PCI_IOBASE value defined) should call this function.
+ */
+int __weak pci_remap_iospace(const struct resource *res, phys_addr_t phys_addr)
+{
+#if defined(PCI_IOBASE) && defined(CONFIG_MMU)
+	unsigned long vaddr = (unsigned long)PCI_IOBASE + res->start;
+
+	if (!(res->flags & IORESOURCE_IO))
+		return -EINVAL;
+
+	if (res->end > IO_SPACE_LIMIT)
+		return -EINVAL;
+
+	return ioremap_page_range(vaddr, vaddr + resource_size(res), phys_addr,
+				  pgprot_device(PAGE_KERNEL));
+#else
+	/* this architecture does not have memory mapped I/O space,
+	   so this function should never be called */
+	WARN_ONCE(1, "This architecture does not support memory mapped I/O\n");
+	return -ENODEV;
+#endif
+}
+
 static void __pci_set_master(struct pci_dev *dev, bool enable)
 {
 	u16 old_cmd, cmd;
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 53b2acc38213..977e545a64c3 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -249,6 +249,10 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
 #define pgprot_writecombine pgprot_noncached
 #endif
 
+#ifndef pgprot_device
+#define pgprot_device pgprot_noncached
+#endif
+
 /*
  * When walking page tables, get the address of the next boundary,
  * or the end address of the range if that comes earlier.  Although no
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 150da2d644e7..b4995fdfffb1 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1100,6 +1100,9 @@ int __must_check pci_bus_alloc_resource(struct pci_bus *bus,
 						  resource_size_t),
 			void *alignf_data);
 
+
+int pci_remap_iospace(const struct resource *res, phys_addr_t phys_addr);
+
 static inline dma_addr_t pci_bus_address(struct pci_dev *pdev, int bar)
 {
 	struct pci_bus_region region;
-- 
cgit v1.2.3


From 7704ac937345d4b502062952657027234aa86a37 Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Date: Tue, 23 Sep 2014 12:08:08 -0400
Subject: HID: wacom: implement generic HID handling for pen generic devices

ISDv4 and v5 are plain HID devices. We can directly implement a generic
HID parsing/handling and remove the need to manually add those PID in
the list of supported devices.

This patch implements the pen support only. The finger part will come in
a later patch.

To be properly notified of an .event() and a .report(), we need to force
hid-core to go through the HID parsing. By default, wacom.ko binds only
hidraw, so the hid parsing is not done by hid-core. When a true HID device
is there, we add the flag HID_CLAIMED_DRIVER to hid->claimed which will
force hid-core to parse the incoming reports.
(Note that this can be easily backported by directly setting the .claimed
flag to HID_CLAIMED_DRIVER even if hid-core does not support
HID_CONNECT_DRIVER)

Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Acked-by: Jason Gerecke <killertofu@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-core.c  |   3 +
 drivers/hid/wacom.h     |   6 ++
 drivers/hid/wacom_sys.c |  12 +++-
 drivers/hid/wacom_wac.c | 179 ++++++++++++++++++++++++++++++++++++++++++++++++
 drivers/hid/wacom_wac.h |   8 +++
 include/linux/hid.h     |   2 +
 6 files changed, 208 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 12b6e67d9de0..583344dba3c6 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -1591,6 +1591,9 @@ int hid_connect(struct hid_device *hdev, unsigned int connect_mask)
 	if ((connect_mask & HID_CONNECT_HIDRAW) && !hidraw_connect(hdev))
 		hdev->claimed |= HID_CLAIMED_HIDRAW;
 
+	if (connect_mask & HID_CONNECT_DRIVER)
+		hdev->claimed |= HID_CLAIMED_DRIVER;
+
 	/* Drivers with the ->raw_event callback set are not required to connect
 	 * to any other listener. */
 	if (!hdev->claimed && !hdev->driver->raw_event) {
diff --git a/drivers/hid/wacom.h b/drivers/hid/wacom.h
index 64bc1b296d91..0cc53440543a 100644
--- a/drivers/hid/wacom.h
+++ b/drivers/hid/wacom.h
@@ -89,6 +89,7 @@
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/mod_devicetable.h>
+#include <linux/hid.h>
 #include <linux/usb/input.h>
 #include <linux/power_supply.h>
 #include <asm/unaligned.h>
@@ -143,4 +144,9 @@ int wacom_setup_input_capabilities(struct input_dev *input_dev,
 				   struct wacom_wac *wacom_wac);
 int wacom_setup_pad_input_capabilities(struct input_dev *input_dev,
 				       struct wacom_wac *wacom_wac);
+void wacom_wac_usage_mapping(struct hid_device *hdev,
+		struct hid_field *field, struct hid_usage *usage);
+int wacom_wac_event(struct hid_device *hdev, struct hid_field *field,
+		struct hid_usage *usage, __s32 value);
+void wacom_wac_report(struct hid_device *hdev, struct hid_report *report);
 #endif
diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c
index 21ac2baa21be..dd288b2fbfe8 100644
--- a/drivers/hid/wacom_sys.c
+++ b/drivers/hid/wacom_sys.c
@@ -13,7 +13,6 @@
 
 #include "wacom_wac.h"
 #include "wacom.h"
-#include <linux/hid.h>
 
 #define WAC_MSG_RETRIES		5
 
@@ -215,6 +214,9 @@ static void wacom_usage_mapping(struct hid_device *hdev,
 			features->pressure_max = field->logical_maximum;
 		break;
 	}
+
+	if (features->type == HID_GENERIC)
+		wacom_wac_usage_mapping(hdev, field, usage);
 }
 
 static void wacom_parse_hid(struct hid_device *hdev,
@@ -1318,6 +1320,7 @@ static int wacom_probe(struct hid_device *hdev,
 	struct wacom_wac *wacom_wac;
 	struct wacom_features *features;
 	int error;
+	unsigned int connect_mask = HID_CONNECT_HIDRAW;
 
 	if (!id->driver_data)
 		return -EINVAL;
@@ -1451,8 +1454,11 @@ static int wacom_probe(struct hid_device *hdev,
 	/* Note that if query fails it is not a hard failure */
 	wacom_query_tablet_data(hdev, features);
 
+	if (features->type == HID_GENERIC)
+		connect_mask |= HID_CONNECT_DRIVER;
+
 	/* Regular HID work starts now */
-	error = hid_hw_start(hdev, HID_CONNECT_HIDRAW);
+	error = hid_hw_start(hdev, connect_mask);
 	if (error) {
 		hid_err(hdev, "hw start failed\n");
 		goto fail_hw_start;
@@ -1532,6 +1538,8 @@ static struct hid_driver wacom_driver = {
 	.id_table =	wacom_ids,
 	.probe =	wacom_probe,
 	.remove =	wacom_remove,
+	.event =	wacom_wac_event,
+	.report =	wacom_wac_report,
 #ifdef CONFIG_PM
 	.resume =	wacom_resume,
 	.reset_resume =	wacom_reset_resume,
diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c
index b8180e40534d..e77d46d85a11 100644
--- a/drivers/hid/wacom_wac.c
+++ b/drivers/hid/wacom_wac.c
@@ -1248,6 +1248,176 @@ static int wacom_tpc_irq(struct wacom_wac *wacom, size_t len)
 	return 0;
 }
 
+static void wacom_map_usage(struct wacom *wacom, struct hid_usage *usage,
+		struct hid_field *field, __u8 type, __u16 code, int fuzz)
+{
+	struct wacom_wac *wacom_wac = &wacom->wacom_wac;
+	struct input_dev *input = wacom_wac->input;
+	int fmin = field->logical_minimum;
+	int fmax = field->logical_maximum;
+
+	usage->type = type;
+	usage->code = code;
+
+	set_bit(type, input->evbit);
+
+	switch (type) {
+	case EV_ABS:
+		input_set_abs_params(input, code, fmin, fmax, fuzz, 0);
+		input_abs_set_res(input, code,
+				  hidinput_calc_abs_res(field, code));
+		break;
+	case EV_KEY:
+		input_set_capability(input, EV_KEY, code);
+		break;
+	case EV_MSC:
+		input_set_capability(input, EV_MSC, code);
+		break;
+	}
+}
+
+static void wacom_wac_pen_usage_mapping(struct hid_device *hdev,
+		struct hid_field *field, struct hid_usage *usage)
+{
+	struct wacom *wacom = hid_get_drvdata(hdev);
+
+	switch (usage->hid) {
+	case HID_GD_X:
+		wacom_map_usage(wacom, usage, field, EV_ABS, ABS_X, 4);
+		break;
+	case HID_GD_Y:
+		wacom_map_usage(wacom, usage, field, EV_ABS, ABS_Y, 4);
+		break;
+	case HID_DG_TIPPRESSURE:
+		wacom_map_usage(wacom, usage, field, EV_ABS, ABS_PRESSURE, 0);
+		break;
+	case HID_DG_INRANGE:
+		wacom_map_usage(wacom, usage, field, EV_KEY, BTN_TOOL_PEN, 0);
+		break;
+	case HID_DG_INVERT:
+		wacom_map_usage(wacom, usage, field, EV_KEY,
+				BTN_TOOL_RUBBER, 0);
+		break;
+	case HID_DG_ERASER:
+	case HID_DG_TIPSWITCH:
+		wacom_map_usage(wacom, usage, field, EV_KEY, BTN_TOUCH, 0);
+		break;
+	case HID_DG_BARRELSWITCH:
+		wacom_map_usage(wacom, usage, field, EV_KEY, BTN_STYLUS, 0);
+		break;
+	case HID_DG_BARRELSWITCH2:
+		wacom_map_usage(wacom, usage, field, EV_KEY, BTN_STYLUS2, 0);
+		break;
+	case HID_DG_TOOLSERIALNUMBER:
+		wacom_map_usage(wacom, usage, field, EV_MSC, MSC_SERIAL, 0);
+		break;
+	}
+}
+
+static int wacom_wac_pen_event(struct hid_device *hdev, struct hid_field *field,
+		struct hid_usage *usage, __s32 value)
+{
+	struct wacom *wacom = hid_get_drvdata(hdev);
+	struct wacom_wac *wacom_wac = &wacom->wacom_wac;
+	struct input_dev *input = wacom_wac->input;
+
+	/* checking which Tool / tip switch to send */
+	switch (usage->hid) {
+	case HID_DG_INRANGE:
+		wacom_wac->hid_data.inrange_state = value;
+		return 0;
+	case HID_DG_INVERT:
+		wacom_wac->hid_data.invert_state = value;
+		return 0;
+	case HID_DG_ERASER:
+	case HID_DG_TIPSWITCH:
+		wacom_wac->hid_data.tipswitch |= value;
+		return 0;
+	}
+
+	/* send pen events only when touch is up or forced out */
+	if (!usage->type || wacom_wac->shared->touch_down)
+		return 0;
+
+	input_event(input, usage->type, usage->code, value);
+
+	return 0;
+}
+
+static void wacom_wac_pen_report(struct hid_device *hdev,
+		struct hid_report *report)
+{
+	struct wacom *wacom = hid_get_drvdata(hdev);
+	struct wacom_wac *wacom_wac = &wacom->wacom_wac;
+	struct input_dev *input = wacom_wac->input;
+	bool prox = wacom_wac->hid_data.inrange_state;
+
+	if (!wacom_wac->shared->stylus_in_proximity) /* first in prox */
+		/* Going into proximity select tool */
+		wacom_wac->tool[0] = wacom_wac->hid_data.invert_state ?
+						BTN_TOOL_RUBBER : BTN_TOOL_PEN;
+
+	/* keep pen state for touch events */
+	wacom_wac->shared->stylus_in_proximity = prox;
+
+	/* send pen events only when touch is up or forced out */
+	if (!wacom_wac->shared->touch_down) {
+		input_report_key(input, BTN_TOUCH,
+				wacom_wac->hid_data.tipswitch);
+		input_report_key(input, wacom_wac->tool[0], prox);
+
+		wacom_wac->hid_data.tipswitch = false;
+
+		input_sync(input);
+	}
+}
+
+#define WACOM_PEN_FIELD(f)	(((f)->logical == HID_DG_STYLUS) || \
+				 ((f)->physical == HID_DG_STYLUS))
+#define WACOM_FINGER_FIELD(f)	(((f)->logical == HID_DG_FINGER) || \
+				 ((f)->physical == HID_DG_FINGER))
+
+void wacom_wac_usage_mapping(struct hid_device *hdev,
+		struct hid_field *field, struct hid_usage *usage)
+{
+	struct wacom *wacom = hid_get_drvdata(hdev);
+	struct wacom_wac *wacom_wac = &wacom->wacom_wac;
+	struct input_dev *input = wacom_wac->input;
+
+	/* currently, only direct devices have proper hid report descriptors */
+	__set_bit(INPUT_PROP_DIRECT, input->propbit);
+
+	if (WACOM_PEN_FIELD(field))
+		return wacom_wac_pen_usage_mapping(hdev, field, usage);
+}
+
+int wacom_wac_event(struct hid_device *hdev, struct hid_field *field,
+		struct hid_usage *usage, __s32 value)
+{
+	struct wacom *wacom = hid_get_drvdata(hdev);
+
+	if (wacom->wacom_wac.features.type != HID_GENERIC)
+		return 0;
+
+	if (WACOM_PEN_FIELD(field))
+		return wacom_wac_pen_event(hdev, field, usage, value);
+
+	return 0;
+}
+
+void wacom_wac_report(struct hid_device *hdev, struct hid_report *report)
+{
+	struct wacom *wacom = hid_get_drvdata(hdev);
+	struct wacom_wac *wacom_wac = &wacom->wacom_wac;
+	struct hid_field *field = report->field[0];
+
+	if (wacom_wac->features.type != HID_GENERIC)
+		return;
+
+	if (WACOM_PEN_FIELD(field))
+		return wacom_wac_pen_report(hdev, report);
+}
+
 static int wacom_bpt_touch(struct wacom_wac *wacom)
 {
 	struct wacom_features *features = &wacom->features;
@@ -1746,6 +1916,10 @@ int wacom_setup_input_capabilities(struct input_dev *input_dev,
 
 	input_dev->evbit[0] |= BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
 
+	if (features->type == HID_GENERIC)
+		/* setup has already been done */
+		return 0;
+
 	__set_bit(BTN_TOUCH, input_dev->keybit);
 	__set_bit(ABS_MISC, input_dev->absbit);
 
@@ -2585,6 +2759,9 @@ static const struct wacom_features wacom_features_0x30C =
 	  .oVid = USB_VENDOR_ID_WACOM, .oPid = 0x30A, .touch_max = 10,
 	  .check_for_hid_type = true, .hid_type = HID_TYPE_USBNONE };
 
+static const struct wacom_features wacom_features_HID_ANY_ID =
+	{ "Wacom HID", .type = HID_GENERIC };
+
 #define USB_DEVICE_WACOM(prod)						\
 	HID_DEVICE(BUS_USB, HID_GROUP_WACOM, USB_VENDOR_ID_WACOM, prod),\
 	.driver_data = (kernel_ulong_t)&wacom_features_##prod
@@ -2729,6 +2906,8 @@ const struct hid_device_id wacom_ids[] = {
 	{ USB_DEVICE_WACOM(0x4004) },
 	{ USB_DEVICE_WACOM(0x5000) },
 	{ USB_DEVICE_WACOM(0x5002) },
+
+	{ USB_DEVICE_WACOM(HID_ANY_ID) },
 	{ }
 };
 MODULE_DEVICE_TABLE(hid, wacom_ids);
diff --git a/drivers/hid/wacom_wac.h b/drivers/hid/wacom_wac.h
index 72f9ca8e5cd4..f472eac292d5 100644
--- a/drivers/hid/wacom_wac.h
+++ b/drivers/hid/wacom_wac.h
@@ -113,6 +113,7 @@ enum {
 	MTSCREEN,
 	MTTPC,
 	MTTPC_B,
+	HID_GENERIC,
 	MAX_TYPE
 };
 
@@ -154,6 +155,12 @@ struct wacom_shared {
 	struct input_dev *touch_input;
 };
 
+struct hid_data {
+	bool inrange_state;
+	bool invert_state;
+	bool tipswitch;
+};
+
 struct wacom_wac {
 	char name[WACOM_NAME_MAX];
 	char pad_name[WACOM_NAME_MAX];
@@ -175,6 +182,7 @@ struct wacom_wac {
 	int ps_connected;
 	u8 bt_features;
 	u8 bt_high_speed;
+	struct hid_data hid_data;
 };
 
 #endif
diff --git a/include/linux/hid.h b/include/linux/hid.h
index f53c4a9cca1d..3dcd00496064 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -265,6 +265,7 @@ struct hid_item {
 #define HID_CONNECT_HIDDEV		0x08
 #define HID_CONNECT_HIDDEV_FORCE	0x10
 #define HID_CONNECT_FF			0x20
+#define HID_CONNECT_DRIVER		0x40
 #define HID_CONNECT_DEFAULT	(HID_CONNECT_HIDINPUT|HID_CONNECT_HIDRAW| \
 		HID_CONNECT_HIDDEV|HID_CONNECT_FF)
 
@@ -440,6 +441,7 @@ struct hid_output_fifo {
 #define HID_CLAIMED_INPUT	1
 #define HID_CLAIMED_HIDDEV	2
 #define HID_CLAIMED_HIDRAW	4
+#define HID_CLAIMED_DRIVER	8
 
 #define HID_STAT_ADDED		1
 #define HID_STAT_PARSED		2
-- 
cgit v1.2.3


From ad975ebad4c3ce8dcc7d0bb4db26ea5aca4cfc99 Mon Sep 17 00:00:00 2001
From: Alexander Gordeev <agordeev@redhat.com>
Date: Tue, 23 Sep 2014 12:39:54 -0600
Subject: PCI/MSI: Remove arch_msi_check_device()

No architectures implement arch_msi_check_device() or the struct msi_chip
.check_device() method, so remove them.

Remove the "type" parameter to pci_msi_check_device() because it was only
used to call arch_msi_check_device() and is no longer needed.

[bhelgaas: changelog, split to separate patch]
Signed-off-by: Alexander Gordeev <agordeev@redhat.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/msi.c   | 21 +++------------------
 include/linux/msi.h |  3 ---
 2 files changed, 3 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 5a40516444f3..db21b77a03a5 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -56,16 +56,6 @@ void __weak arch_teardown_msi_irq(unsigned int irq)
 	chip->teardown_irq(chip, irq);
 }
 
-int __weak arch_msi_check_device(struct pci_dev *dev, int nvec, int type)
-{
-	struct msi_chip *chip = dev->bus->msi;
-
-	if (!chip || !chip->check_device)
-		return 0;
-
-	return chip->check_device(chip, dev, nvec, type);
-}
-
 int __weak arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 {
 	struct msi_desc *entry;
@@ -815,10 +805,9 @@ out_free:
  * to determine if MSI/-X are supported for the device. If MSI/-X is
  * supported return 0, else return an error code.
  **/
-static int pci_msi_check_device(struct pci_dev *dev, int nvec, int type)
+static int pci_msi_check_device(struct pci_dev *dev, int nvec)
 {
 	struct pci_bus *bus;
-	int ret;
 
 	/* MSI must be globally enabled and supported by the device */
 	if (!pci_msi_enable || !dev || dev->no_msi)
@@ -843,10 +832,6 @@ static int pci_msi_check_device(struct pci_dev *dev, int nvec, int type)
 		if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI)
 			return -EINVAL;
 
-	ret = arch_msi_check_device(dev, nvec, type);
-	if (ret)
-		return ret;
-
 	return 0;
 }
 
@@ -952,7 +937,7 @@ int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec)
 	if (!entries || !dev->msix_cap || dev->current_state != PCI_D0)
 		return -EINVAL;
 
-	status = pci_msi_check_device(dev, nvec, PCI_CAP_ID_MSIX);
+	status = pci_msi_check_device(dev, nvec);
 	if (status)
 		return status;
 
@@ -1086,7 +1071,7 @@ int pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec)
 		nvec = maxvec;
 
 	do {
-		rc = pci_msi_check_device(dev, nvec, PCI_CAP_ID_MSI);
+		rc = pci_msi_check_device(dev, nvec);
 		if (rc < 0) {
 			return rc;
 		} else if (rc > 0) {
diff --git a/include/linux/msi.h b/include/linux/msi.h
index 8103f32f6d87..dbf7cc932444 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -60,7 +60,6 @@ int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc);
 void arch_teardown_msi_irq(unsigned int irq);
 int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type);
 void arch_teardown_msi_irqs(struct pci_dev *dev);
-int arch_msi_check_device(struct pci_dev* dev, int nvec, int type);
 void arch_restore_msi_irqs(struct pci_dev *dev);
 
 void default_teardown_msi_irqs(struct pci_dev *dev);
@@ -77,8 +76,6 @@ struct msi_chip {
 	int (*setup_irq)(struct msi_chip *chip, struct pci_dev *dev,
 			 struct msi_desc *desc);
 	void (*teardown_irq)(struct msi_chip *chip, unsigned int irq);
-	int (*check_device)(struct msi_chip *chip, struct pci_dev *dev,
-			    int nvec, int type);
 };
 
 #endif /* LINUX_MSI_H */
-- 
cgit v1.2.3


From 81052769e48609525c452d8f078a5786b673e178 Mon Sep 17 00:00:00 2001
From: Yijing Wang <wangyijing@huawei.com>
Date: Tue, 23 Sep 2014 13:27:22 +0800
Subject: PCI/MSI: Remove unused kobject from struct msi_desc

After commit 1c51b50c2995 ("PCI/MSI: Export MSI mode using attributes, not
kobjects"), the kobject in struct msi_desc is unused.

Remove the unused struct kobject from struct msi_desc.

[bhelgaas: changelog]
Fixes: 1c51b50c2995 ("PCI/MSI: Export MSI mode using attributes, not kobjects")
Signed-off-by: Yijing Wang <wangyijing@huawei.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/pci/msi.c   | 11 -----------
 include/linux/msi.h |  2 --
 2 files changed, 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 5f1e5dc994cf..97d6ef67a3c8 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -374,17 +374,6 @@ static void free_msi_irqs(struct pci_dev *dev)
 				iounmap(entry->mask_base);
 		}
 
-		/*
-		 * Its possible that we get into this path
-		 * When populate_msi_sysfs fails, which means the entries
-		 * were not registered with sysfs.  In that case don't
-		 * unregister them.
-		 */
-		if (entry->kobj.parent) {
-			kobject_del(&entry->kobj);
-			kobject_put(&entry->kobj);
-		}
-
 		list_del(&entry->list);
 		kfree(entry);
 	}
diff --git a/include/linux/msi.h b/include/linux/msi.h
index dbf7cc932444..5a91c2d58ffd 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -47,8 +47,6 @@ struct msi_desc {
 
 	/* Last set MSI message */
 	struct msi_msg msg;
-
-	struct kobject kobj;
 };
 
 /*
-- 
cgit v1.2.3


From 48c3c38f003c25d50a09d3da558667c5ecd530aa Mon Sep 17 00:00:00 2001
From: Yijing Wang <wangyijing@huawei.com>
Date: Tue, 23 Sep 2014 11:02:42 -0600
Subject: PCI/MSI: Remove "pos" from the struct msi_desc msi_attrib

"msi_attrib.pos" is only used for MSI (not MSI-X), and we already cache the
MSI capability offset in "dev->msi_cap".

Remove "pos" from the struct msi_attrib and use "dev->msi_cap" directly.

[bhelgaas: changelog, fix whitespace]
Signed-off-by: Yijing Wang <wangyijing@huawei.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 arch/mips/pci/msi-octeon.c         | 6 ++----
 drivers/pci/host/pcie-designware.c | 5 ++---
 drivers/pci/msi.c                  | 2 --
 include/linux/msi.h                | 1 -
 4 files changed, 4 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/arch/mips/pci/msi-octeon.c b/arch/mips/pci/msi-octeon.c
index ab0c5d14c6f7..63bbe07a1ccd 100644
--- a/arch/mips/pci/msi-octeon.c
+++ b/arch/mips/pci/msi-octeon.c
@@ -73,8 +73,7 @@ int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
 	 * wants.  Most devices only want 1, which will give
 	 * configured_private_bits and request_private_bits equal 0.
 	 */
-	pci_read_config_word(dev, desc->msi_attrib.pos + PCI_MSI_FLAGS,
-			     &control);
+	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);
 
 	/*
 	 * If the number of private bits has been configured then use
@@ -176,8 +175,7 @@ msi_irq_allocated:
 	/* Update the number of IRQs the device has available to it */
 	control &= ~PCI_MSI_FLAGS_QSIZE;
 	control |= request_private_bits << 4;
-	pci_write_config_word(dev, desc->msi_attrib.pos + PCI_MSI_FLAGS,
-			      control);
+	pci_write_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, control);
 
 	irq_set_msi_desc(irq, desc);
 	write_msi_msg(irq, &msg);
diff --git a/drivers/pci/host/pcie-designware.c b/drivers/pci/host/pcie-designware.c
index 52bd3a143563..fa2fa459ed90 100644
--- a/drivers/pci/host/pcie-designware.c
+++ b/drivers/pci/host/pcie-designware.c
@@ -355,9 +355,8 @@ static int dw_msi_setup_irq(struct msi_chip *chip, struct pci_dev *pdev,
 		return -EINVAL;
 	}
 
-	pci_read_config_word(pdev, desc->msi_attrib.pos+PCI_MSI_FLAGS,
-				&msg_ctr);
-	msgvec = (msg_ctr&PCI_MSI_FLAGS_QSIZE) >> 4;
+	pci_read_config_word(pdev, pdev->msi_cap + PCI_MSI_FLAGS, &msg_ctr);
+	msgvec = (msg_ctr & PCI_MSI_FLAGS_QSIZE) >> 4;
 	if (msgvec == 0)
 		msgvec = (msg_ctr & PCI_MSI_FLAGS_QMASK) >> 1;
 	if (msgvec > 5)
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 97d6ef67a3c8..40699a2041b5 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -574,7 +574,6 @@ static struct msi_desc *msi_setup_entry(struct pci_dev *dev)
 	entry->msi_attrib.entry_nr	= 0;
 	entry->msi_attrib.maskbit	= !!(control & PCI_MSI_FLAGS_MASKBIT);
 	entry->msi_attrib.default_irq	= dev->irq;	/* Save IOAPIC IRQ */
-	entry->msi_attrib.pos		= dev->msi_cap;
 	entry->msi_attrib.multi_cap	= (control & PCI_MSI_FLAGS_QMASK) >> 1;
 
 	if (control & PCI_MSI_FLAGS_64BIT)
@@ -678,7 +677,6 @@ static int msix_setup_entries(struct pci_dev *dev, void __iomem *base,
 		entry->msi_attrib.is_64		= 1;
 		entry->msi_attrib.entry_nr	= entries[i].entry;
 		entry->msi_attrib.default_irq	= dev->irq;
-		entry->msi_attrib.pos		= dev->msix_cap;
 		entry->mask_base		= base;
 
 		list_add_tail(&entry->list, &dev->msi_list);
diff --git a/include/linux/msi.h b/include/linux/msi.h
index 5a91c2d58ffd..44f4746d033b 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -29,7 +29,6 @@ struct msi_desc {
 		__u8	multi_cap : 3;	/* log2 num of messages supported */
 		__u8	maskbit	: 1;	/* mask-pending bit supported ? */
 		__u8	is_64	: 1;	/* Address size: 0=32bit 1=64bit */
-		__u8	pos;		/* Location of the msi capability */
 		__u16	entry_nr;	/* specific enabled entry */
 		unsigned default_irq;	/* default pre-assigned irq */
 	} msi_attrib;
-- 
cgit v1.2.3


From d0bf4a9e92b9a93ffeeacbd7b6cb83e0ee3dc2ef Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 29 Sep 2014 13:29:15 -0700
Subject: net: cleanup and document skb fclone layout

Lets use a proper structure to clearly document and implement
skb fast clones.

Then, we might experiment more easily alternative layouts.

This patch adds a new skb_fclone_busy() helper, used by tcp and xfrm,
to stop leaking of implementation details.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 25 +++++++++++++++++++++++++
 net/core/skbuff.c      | 41 ++++++++++++++++++++---------------------
 net/ipv4/tcp_output.c  |  5 +----
 net/xfrm/xfrm_policy.c |  4 +---
 4 files changed, 47 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 262efdbc346b..d8f7d74d5a4d 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -781,6 +781,31 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
 				     int *errcode,
 				     gfp_t gfp_mask);
 
+/* Layout of fast clones : [skb1][skb2][fclone_ref] */
+struct sk_buff_fclones {
+	struct sk_buff	skb1;
+
+	struct sk_buff	skb2;
+
+	atomic_t	fclone_ref;
+};
+
+/**
+ *	skb_fclone_busy - check if fclone is busy
+ *	@skb: buffer
+ *
+ * Returns true is skb is a fast clone, and its clone is not freed.
+ */
+static inline bool skb_fclone_busy(const struct sk_buff *skb)
+{
+	const struct sk_buff_fclones *fclones;
+
+	fclones = container_of(skb, struct sk_buff_fclones, skb1);
+
+	return skb->fclone == SKB_FCLONE_ORIG &&
+	       fclones->skb2.fclone == SKB_FCLONE_CLONE;
+}
+
 static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
 					       gfp_t priority)
 {
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 4be570a4ab21..a8cebb40699c 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -257,15 +257,16 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 	kmemcheck_annotate_variable(shinfo->destructor_arg);
 
 	if (flags & SKB_ALLOC_FCLONE) {
-		struct sk_buff *child = skb + 1;
-		atomic_t *fclone_ref = (atomic_t *) (child + 1);
+		struct sk_buff_fclones *fclones;
 
-		kmemcheck_annotate_bitfield(child, flags1);
+		fclones = container_of(skb, struct sk_buff_fclones, skb1);
+
+		kmemcheck_annotate_bitfield(&fclones->skb2, flags1);
 		skb->fclone = SKB_FCLONE_ORIG;
-		atomic_set(fclone_ref, 1);
+		atomic_set(&fclones->fclone_ref, 1);
 
-		child->fclone = SKB_FCLONE_UNAVAILABLE;
-		child->pfmemalloc = pfmemalloc;
+		fclones->skb2.fclone = SKB_FCLONE_UNAVAILABLE;
+		fclones->skb2.pfmemalloc = pfmemalloc;
 	}
 out:
 	return skb;
@@ -524,8 +525,7 @@ static void skb_release_data(struct sk_buff *skb)
  */
 static void kfree_skbmem(struct sk_buff *skb)
 {
-	struct sk_buff *other;
-	atomic_t *fclone_ref;
+	struct sk_buff_fclones *fclones;
 
 	switch (skb->fclone) {
 	case SKB_FCLONE_UNAVAILABLE:
@@ -533,22 +533,21 @@ static void kfree_skbmem(struct sk_buff *skb)
 		break;
 
 	case SKB_FCLONE_ORIG:
-		fclone_ref = (atomic_t *) (skb + 2);
-		if (atomic_dec_and_test(fclone_ref))
-			kmem_cache_free(skbuff_fclone_cache, skb);
+		fclones = container_of(skb, struct sk_buff_fclones, skb1);
+		if (atomic_dec_and_test(&fclones->fclone_ref))
+			kmem_cache_free(skbuff_fclone_cache, fclones);
 		break;
 
 	case SKB_FCLONE_CLONE:
-		fclone_ref = (atomic_t *) (skb + 1);
-		other = skb - 1;
+		fclones = container_of(skb, struct sk_buff_fclones, skb2);
 
 		/* The clone portion is available for
 		 * fast-cloning again.
 		 */
 		skb->fclone = SKB_FCLONE_UNAVAILABLE;
 
-		if (atomic_dec_and_test(fclone_ref))
-			kmem_cache_free(skbuff_fclone_cache, other);
+		if (atomic_dec_and_test(&fclones->fclone_ref))
+			kmem_cache_free(skbuff_fclone_cache, fclones);
 		break;
 	}
 }
@@ -859,17 +858,18 @@ EXPORT_SYMBOL_GPL(skb_copy_ubufs);
 
 struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
 {
-	struct sk_buff *n;
+	struct sk_buff_fclones *fclones = container_of(skb,
+						       struct sk_buff_fclones,
+						       skb1);
+	struct sk_buff *n = &fclones->skb2;
 
 	if (skb_orphan_frags(skb, gfp_mask))
 		return NULL;
 
-	n = skb + 1;
 	if (skb->fclone == SKB_FCLONE_ORIG &&
 	    n->fclone == SKB_FCLONE_UNAVAILABLE) {
-		atomic_t *fclone_ref = (atomic_t *) (n + 1);
 		n->fclone = SKB_FCLONE_CLONE;
-		atomic_inc(fclone_ref);
+		atomic_inc(&fclones->fclone_ref);
 	} else {
 		if (skb_pfmemalloc(skb))
 			gfp_mask |= __GFP_MEMALLOC;
@@ -3240,8 +3240,7 @@ void __init skb_init(void)
 					      SLAB_HWCACHE_ALIGN|SLAB_PANIC,
 					      NULL);
 	skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache",
-						(2*sizeof(struct sk_buff)) +
-						sizeof(atomic_t),
+						sizeof(struct sk_buff_fclones),
 						0,
 						SLAB_HWCACHE_ALIGN|SLAB_PANIC,
 						NULL);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index ee567e9e98c3..8d4eac793700 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2110,10 +2110,7 @@ bool tcp_schedule_loss_probe(struct sock *sk)
 static bool skb_still_in_host_queue(const struct sock *sk,
 				    const struct sk_buff *skb)
 {
-	const struct sk_buff *fclone = skb + 1;
-
-	if (unlikely(skb->fclone == SKB_FCLONE_ORIG &&
-		     fclone->fclone == SKB_FCLONE_CLONE)) {
+	if (unlikely(skb_fclone_busy(skb))) {
 		NET_INC_STATS_BH(sock_net(sk),
 				 LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES);
 		return true;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index f623dca6ce30..4c4e457e7888 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1961,10 +1961,8 @@ static int xdst_queue_output(struct sock *sk, struct sk_buff *skb)
 	struct xfrm_dst *xdst = (struct xfrm_dst *) dst;
 	struct xfrm_policy *pol = xdst->pols[0];
 	struct xfrm_policy_queue *pq = &pol->polq;
-	const struct sk_buff *fclone = skb + 1;
 
-	if (unlikely(skb->fclone == SKB_FCLONE_ORIG &&
-		     fclone->fclone == SKB_FCLONE_CLONE)) {
+	if (unlikely(skb_fclone_busy(skb))) {
 		kfree_skb(skb);
 		return 0;
 	}
-- 
cgit v1.2.3


From 8bce6d7d0d1ede22af334ee241841e9278365278 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Mon, 29 Sep 2014 20:22:29 -0700
Subject: udp: Generalize skb_udp_segment

skb_udp_segment is the function called from udp4_ufo_fragment to
segment a UDP tunnel packet. This function currently assumes
segmentation is transparent Ethernet bridging (i.e. VXLAN
encapsulation). This patch generalizes the function to
operate on either Ethertype or IP protocol.

The inner_protocol field must be set to the protocol of the inner
header. This can now be either an Ethertype or an IP protocol
(in a union). A new flag in the skbuff indicates which type is
effective. skb_set_inner_protocol and skb_set_inner_ipproto
helper functions were added to set the inner_protocol. These
functions are called from the point where the tunnel encapsulation
is occuring.

When skb_udp_tunnel_segment is called, the function to segment the
inner packet is selected based on the inner IP or Ethertype. In the
case of an IP protocol encapsulation, the function is derived from
inet[6]_offloads. In the case of Ethertype, skb->protocol is
set to the inner_protocol and skb_mac_gso_segment is called. (GRE
currently does this, but it might be possible to lookup the protocol
in offload_base and call the appropriate segmenation function
directly).

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 26 +++++++++++++++++++++++--
 include/net/udp.h      |  3 ++-
 net/ipv4/udp_offload.c | 51 +++++++++++++++++++++++++++++++++++++++++++++-----
 net/ipv6/udp_offload.c |  2 +-
 4 files changed, 73 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index d8f7d74d5a4d..7c5036d11feb 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -596,7 +596,8 @@ struct sk_buff {
 	__u8			ndisc_nodetype:2;
 #endif
 	__u8			ipvs_property:1;
-	/* 5 or 7 bit hole */
+	__u8			inner_protocol_type:1;
+	/* 4 or 6 bit hole */
 
 #ifdef CONFIG_NET_SCHED
 	__u16			tc_index;	/* traffic control index */
@@ -632,7 +633,11 @@ struct sk_buff {
 		__u32		reserved_tailroom;
 	};
 
-	__be16			inner_protocol;
+	union {
+		__be16		inner_protocol;
+		__u8		inner_ipproto;
+	};
+
 	__u16			inner_transport_header;
 	__u16			inner_network_header;
 	__u16			inner_mac_header;
@@ -1762,6 +1767,23 @@ static inline void skb_reserve(struct sk_buff *skb, int len)
 	skb->tail += len;
 }
 
+#define ENCAP_TYPE_ETHER	0
+#define ENCAP_TYPE_IPPROTO	1
+
+static inline void skb_set_inner_protocol(struct sk_buff *skb,
+					  __be16 protocol)
+{
+	skb->inner_protocol = protocol;
+	skb->inner_protocol_type = ENCAP_TYPE_ETHER;
+}
+
+static inline void skb_set_inner_ipproto(struct sk_buff *skb,
+					 __u8 ipproto)
+{
+	skb->inner_ipproto = ipproto;
+	skb->inner_protocol_type = ENCAP_TYPE_IPPROTO;
+}
+
 static inline void skb_reset_inner_headers(struct sk_buff *skb)
 {
 	skb->inner_mac_header = skb->mac_header;
diff --git a/include/net/udp.h b/include/net/udp.h
index 16f4e80f0519..07f9b70962f6 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -239,7 +239,8 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg);
 int udp_disconnect(struct sock *sk, int flags);
 unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait);
 struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
-				       netdev_features_t features);
+				       netdev_features_t features,
+				       bool is_ipv6);
 int udp_lib_getsockopt(struct sock *sk, int level, int optname,
 		       char __user *optval, int __user *optlen);
 int udp_lib_setsockopt(struct sock *sk, int level, int optname,
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 19ebe6a39ddc..8c35f2c939ee 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -25,8 +25,11 @@ struct udp_offload_priv {
 	struct udp_offload_priv __rcu *next;
 };
 
-struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
-				       netdev_features_t features)
+static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
+	netdev_features_t features,
+	struct sk_buff *(*gso_inner_segment)(struct sk_buff *skb,
+					     netdev_features_t features),
+	__be16 new_protocol)
 {
 	struct sk_buff *segs = ERR_PTR(-EINVAL);
 	u16 mac_offset = skb->mac_header;
@@ -48,7 +51,7 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
 	skb_reset_mac_header(skb);
 	skb_set_network_header(skb, skb_inner_network_offset(skb));
 	skb->mac_len = skb_inner_network_offset(skb);
-	skb->protocol = htons(ETH_P_TEB);
+	skb->protocol = new_protocol;
 
 	need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM);
 	if (need_csum)
@@ -56,7 +59,7 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
 
 	/* segment inner packet. */
 	enc_features = skb->dev->hw_enc_features & netif_skb_features(skb);
-	segs = skb_mac_gso_segment(skb, enc_features);
+	segs = gso_inner_segment(skb, enc_features);
 	if (IS_ERR_OR_NULL(segs)) {
 		skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset,
 				     mac_len);
@@ -101,6 +104,44 @@ out:
 	return segs;
 }
 
+struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
+				       netdev_features_t features,
+				       bool is_ipv6)
+{
+	__be16 protocol = skb->protocol;
+	const struct net_offload **offloads;
+	const struct net_offload *ops;
+	struct sk_buff *segs = ERR_PTR(-EINVAL);
+	struct sk_buff *(*gso_inner_segment)(struct sk_buff *skb,
+					     netdev_features_t features);
+
+	rcu_read_lock();
+
+	switch (skb->inner_protocol_type) {
+	case ENCAP_TYPE_ETHER:
+		protocol = skb->inner_protocol;
+		gso_inner_segment = skb_mac_gso_segment;
+		break;
+	case ENCAP_TYPE_IPPROTO:
+		offloads = is_ipv6 ? inet6_offloads : inet_offloads;
+		ops = rcu_dereference(offloads[skb->inner_ipproto]);
+		if (!ops || !ops->callbacks.gso_segment)
+			goto out_unlock;
+		gso_inner_segment = ops->callbacks.gso_segment;
+		break;
+	default:
+		goto out_unlock;
+	}
+
+	segs = __skb_udp_tunnel_segment(skb, features, gso_inner_segment,
+					protocol);
+
+out_unlock:
+	rcu_read_unlock();
+
+	return segs;
+}
+
 static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
 					 netdev_features_t features)
 {
@@ -113,7 +154,7 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
 	if (skb->encapsulation &&
 	    (skb_shinfo(skb)->gso_type &
 	     (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))) {
-		segs = skb_udp_tunnel_segment(skb, features);
+		segs = skb_udp_tunnel_segment(skb, features, false);
 		goto out;
 	}
 
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 212ebfc7973f..8f96988c1db2 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -58,7 +58,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
 
 	if (skb->encapsulation && skb_shinfo(skb)->gso_type &
 	    (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))
-		segs = skb_udp_tunnel_segment(skb, features);
+		segs = skb_udp_tunnel_segment(skb, features, true);
 	else {
 		const struct ipv6hdr *ipv6h;
 		struct udphdr *uh;
-- 
cgit v1.2.3


From 90a8020278c1598fafd071736a0846b38510309c Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 1 Oct 2014 21:49:18 -0400
Subject: vfs: fix data corruption when blocksize < pagesize for mmaped data

->page_mkwrite() is used by filesystems to allocate blocks under a page
which is becoming writeably mmapped in some process' address space. This
allows a filesystem to return a page fault if there is not enough space
available, user exceeds quota or similar problem happens, rather than
silently discarding data later when writepage is called.

However VFS fails to call ->page_mkwrite() in all the cases where
filesystems need it when blocksize < pagesize. For example when
blocksize = 1024, pagesize = 4096 the following is problematic:
  ftruncate(fd, 0);
  pwrite(fd, buf, 1024, 0);
  map = mmap(NULL, 1024, PROT_WRITE, MAP_SHARED, fd, 0);
  map[0] = 'a';       ----> page_mkwrite() for index 0 is called
  ftruncate(fd, 10000); /* or even pwrite(fd, buf, 1, 10000) */
  mremap(map, 1024, 10000, 0);
  map[4095] = 'a';    ----> no page_mkwrite() called

At the moment ->page_mkwrite() is called, filesystem can allocate only
one block for the page because i_size == 1024. Otherwise it would create
blocks beyond i_size which is generally undesirable. But later at
->writepage() time, we also need to store data at offset 4095 but we
don't have block allocated for it.

This patch introduces a helper function filesystems can use to have
->page_mkwrite() called at all the necessary moments.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@vger.kernel.org
---
 fs/buffer.c        |  3 +++
 include/linux/mm.h |  1 +
 mm/truncate.c      | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 61 insertions(+)

(limited to 'include/linux')

diff --git a/fs/buffer.c b/fs/buffer.c
index 9a6029e0dd71..6dc1475dcb2d 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2087,6 +2087,7 @@ int generic_write_end(struct file *file, struct address_space *mapping,
 			struct page *page, void *fsdata)
 {
 	struct inode *inode = mapping->host;
+	loff_t old_size = inode->i_size;
 	int i_size_changed = 0;
 
 	copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
@@ -2106,6 +2107,8 @@ int generic_write_end(struct file *file, struct address_space *mapping,
 	unlock_page(page);
 	page_cache_release(page);
 
+	if (old_size < pos)
+		pagecache_isize_extended(inode, old_size, pos);
 	/*
 	 * Don't mark the inode dirty under page lock. First, it unnecessarily
 	 * makes the holding time of page lock longer. Second, it forces lock
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8981cc882ed2..5005464fe012 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1155,6 +1155,7 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping,
 
 extern void truncate_pagecache(struct inode *inode, loff_t new);
 extern void truncate_setsize(struct inode *inode, loff_t newsize);
+void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to);
 void truncate_pagecache_range(struct inode *inode, loff_t offset, loff_t end);
 int truncate_inode_page(struct address_space *mapping, struct page *page);
 int generic_error_remove_page(struct address_space *mapping, struct page *page);
diff --git a/mm/truncate.c b/mm/truncate.c
index 96d167372d89..261eaf6e5a19 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -20,6 +20,7 @@
 #include <linux/buffer_head.h>	/* grr. try_to_release_page,
 				   do_invalidatepage */
 #include <linux/cleancache.h>
+#include <linux/rmap.h>
 #include "internal.h"
 
 static void clear_exceptional_entry(struct address_space *mapping,
@@ -719,11 +720,67 @@ EXPORT_SYMBOL(truncate_pagecache);
  */
 void truncate_setsize(struct inode *inode, loff_t newsize)
 {
+	loff_t oldsize = inode->i_size;
+
 	i_size_write(inode, newsize);
+	if (newsize > oldsize)
+		pagecache_isize_extended(inode, oldsize, newsize);
 	truncate_pagecache(inode, newsize);
 }
 EXPORT_SYMBOL(truncate_setsize);
 
+/**
+ * pagecache_isize_extended - update pagecache after extension of i_size
+ * @inode:	inode for which i_size was extended
+ * @from:	original inode size
+ * @to:		new inode size
+ *
+ * Handle extension of inode size either caused by extending truncate or by
+ * write starting after current i_size. We mark the page straddling current
+ * i_size RO so that page_mkwrite() is called on the nearest write access to
+ * the page.  This way filesystem can be sure that page_mkwrite() is called on
+ * the page before user writes to the page via mmap after the i_size has been
+ * changed.
+ *
+ * The function must be called after i_size is updated so that page fault
+ * coming after we unlock the page will already see the new i_size.
+ * The function must be called while we still hold i_mutex - this not only
+ * makes sure i_size is stable but also that userspace cannot observe new
+ * i_size value before we are prepared to store mmap writes at new inode size.
+ */
+void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to)
+{
+	int bsize = 1 << inode->i_blkbits;
+	loff_t rounded_from;
+	struct page *page;
+	pgoff_t index;
+
+	WARN_ON(!mutex_is_locked(&inode->i_mutex));
+	WARN_ON(to > inode->i_size);
+
+	if (from >= to || bsize == PAGE_CACHE_SIZE)
+		return;
+	/* Page straddling @from will not have any hole block created? */
+	rounded_from = round_up(from, bsize);
+	if (to <= rounded_from || !(rounded_from & (PAGE_CACHE_SIZE - 1)))
+		return;
+
+	index = from >> PAGE_CACHE_SHIFT;
+	page = find_lock_page(inode->i_mapping, index);
+	/* Page not cached? Nothing to do */
+	if (!page)
+		return;
+	/*
+	 * See clear_page_dirty_for_io() for details why set_page_dirty()
+	 * is needed.
+	 */
+	if (page_mkclean(page))
+		set_page_dirty(page);
+	unlock_page(page);
+	page_cache_release(page);
+}
+EXPORT_SYMBOL(pagecache_isize_extended);
+
 /**
  * truncate_pagecache_range - unmap and remove pagecache that is hole-punched
  * @inode: inode
-- 
cgit v1.2.3


From d068b02cfdfc27f5962ec82ec5568b706f599edc Mon Sep 17 00:00:00 2001
From: Petri Gynther <pgynther@google.com>
Date: Wed, 1 Oct 2014 11:58:02 -0700
Subject: net: phy: add BCM7425 and BCM7429 PHYs

Signed-off-by: Petri Gynther <pgynther@google.com>
Acked-by: Florian Fainelli <f.fainelli@gmai.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/bcm7xxx.c | 28 ++++++++++++++++++++++++++++
 include/linux/brcmphy.h   |  2 ++
 2 files changed, 30 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c
index daae69950925..1d211d369039 100644
--- a/drivers/net/phy/bcm7xxx.c
+++ b/drivers/net/phy/bcm7xxx.c
@@ -350,6 +350,32 @@ static struct phy_driver bcm7xxx_driver[] = {
 	BCM7XXX_28NM_GPHY(PHY_ID_BCM7439, "Broadcom BCM7439"),
 	BCM7XXX_28NM_GPHY(PHY_ID_BCM7445, "Broadcom BCM7445"),
 {
+	.phy_id         = PHY_ID_BCM7425,
+	.phy_id_mask    = 0xfffffff0,
+	.name           = "Broadcom BCM7425",
+	.features       = PHY_GBIT_FEATURES |
+			  SUPPORTED_Pause | SUPPORTED_Asym_Pause,
+	.flags          = 0,
+	.config_init    = bcm7xxx_config_init,
+	.config_aneg    = genphy_config_aneg,
+	.read_status    = genphy_read_status,
+	.suspend        = bcm7xxx_suspend,
+	.resume         = bcm7xxx_config_init,
+	.driver         = { .owner = THIS_MODULE },
+}, {
+	.phy_id         = PHY_ID_BCM7429,
+	.phy_id_mask    = 0xfffffff0,
+	.name           = "Broadcom BCM7429",
+	.features       = PHY_GBIT_FEATURES |
+			  SUPPORTED_Pause | SUPPORTED_Asym_Pause,
+	.flags          = PHY_IS_INTERNAL,
+	.config_init    = bcm7xxx_config_init,
+	.config_aneg    = genphy_config_aneg,
+	.read_status    = genphy_read_status,
+	.suspend        = bcm7xxx_suspend,
+	.resume         = bcm7xxx_config_init,
+	.driver         = { .owner = THIS_MODULE },
+}, {
 	.phy_id		= PHY_BCM_OUI_4,
 	.phy_id_mask	= 0xffff0000,
 	.name		= "Broadcom BCM7XXX 40nm",
@@ -381,6 +407,8 @@ static struct mdio_device_id __maybe_unused bcm7xxx_tbl[] = {
 	{ PHY_ID_BCM7250, 0xfffffff0, },
 	{ PHY_ID_BCM7364, 0xfffffff0, },
 	{ PHY_ID_BCM7366, 0xfffffff0, },
+	{ PHY_ID_BCM7425, 0xfffffff0, },
+	{ PHY_ID_BCM7429, 0xfffffff0, },
 	{ PHY_ID_BCM7439, 0xfffffff0, },
 	{ PHY_ID_BCM7445, 0xfffffff0, },
 	{ PHY_BCM_OUI_4, 0xffff0000 },
diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index 3f626fe48c9a..7ccd928cc1f2 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -16,6 +16,8 @@
 #define PHY_ID_BCM7250			0xae025280
 #define PHY_ID_BCM7364			0xae025260
 #define PHY_ID_BCM7366			0x600d8490
+#define PHY_ID_BCM7425			0x03625e60
+#define PHY_ID_BCM7429			0x600d8730
 #define PHY_ID_BCM7439			0x600d8480
 #define PHY_ID_BCM7445			0x600d8510
 
-- 
cgit v1.2.3


From 599bad38cf7163123af7c9efea0fcf228bc74fe1 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Tue, 30 Sep 2014 13:02:02 +0200
Subject: driver core: Add BUS_NOTIFY_REMOVED_DEVICE event

This event closes an important gap in the bus notifiers.
There is already the BUS_NOTIFY_DEL_DEVICE event, but that
is sent when the device is still bound to its device driver.

This is too early for the IOMMU code to destroy any mappings
for the device, as they might still be in use by the driver.

The new BUS_NOTIFY_REMOVED_DEVICE event introduced with this
patch closes this gap as it is sent when the device is
already unbound from its device driver and almost completly
removed from the driver core.

With this event the IOMMU code can safely destroy any
mappings and other data structures when a device is removed.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tested-by: Jerry Hoemann <jerry.hoemann@hp.com>
---
 drivers/base/core.c    |  3 +++
 include/linux/device.h | 11 ++++++-----
 2 files changed, 9 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 20da3ad1696b..7b270a2e6ed5 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -1211,6 +1211,9 @@ void device_del(struct device *dev)
 	 */
 	if (platform_notify_remove)
 		platform_notify_remove(dev);
+	if (dev->bus)
+		blocking_notifier_call_chain(&dev->bus->p->bus_notifier,
+					     BUS_NOTIFY_REMOVED_DEVICE, dev);
 	kobject_uevent(&dev->kobj, KOBJ_REMOVE);
 	cleanup_device_parent(dev);
 	kobject_del(&dev->kobj);
diff --git a/include/linux/device.h b/include/linux/device.h
index 43d183aeb25b..d0d5c5db509d 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -181,13 +181,14 @@ extern int bus_unregister_notifier(struct bus_type *bus,
  * with the device lock held in the core, so be careful.
  */
 #define BUS_NOTIFY_ADD_DEVICE		0x00000001 /* device added */
-#define BUS_NOTIFY_DEL_DEVICE		0x00000002 /* device removed */
-#define BUS_NOTIFY_BIND_DRIVER		0x00000003 /* driver about to be
+#define BUS_NOTIFY_DEL_DEVICE		0x00000002 /* device to be removed */
+#define BUS_NOTIFY_REMOVED_DEVICE	0x00000003 /* device removed */
+#define BUS_NOTIFY_BIND_DRIVER		0x00000004 /* driver about to be
 						      bound */
-#define BUS_NOTIFY_BOUND_DRIVER		0x00000004 /* driver bound to device */
-#define BUS_NOTIFY_UNBIND_DRIVER	0x00000005 /* driver about to be
+#define BUS_NOTIFY_BOUND_DRIVER		0x00000005 /* driver bound to device */
+#define BUS_NOTIFY_UNBIND_DRIVER	0x00000006 /* driver about to be
 						      unbound */
-#define BUS_NOTIFY_UNBOUND_DRIVER	0x00000006 /* driver is unbound
+#define BUS_NOTIFY_UNBOUND_DRIVER	0x00000007 /* driver is unbound
 						      from the device */
 
 extern struct kset *bus_get_kset(struct bus_type *bus);
-- 
cgit v1.2.3


From 89168b48991537bec2573b3b6a8841df74465b12 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Thu, 2 Oct 2014 09:08:46 +0200
Subject: mmc: core: restore detect line inversion semantics

commit 98e90de99a0c43bd434da814c882c4332441871e
"mmc: host: switch OF parser to use gpio descriptors"
switched the semantic behaviour of card detect and read
only flags such that the inversion capability flag would
only be set if inversion was explicitly specified in the
device tree, in the hopes that no-one was using double
inversion.

It turns out that the XOR:ing between the explicit
inversion was indeed in use, so we need to restore the
old semantics where both ways of inversion are checked
and the end result XOR:ed.

Reported-by: Javier Martinez Canillas <javier@dowhile0.org>
Tested-by: Javier Martinez Canillas <javier@dowhile0.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/host.c       | 32 ++++++++++++++++++++++++++++----
 drivers/mmc/core/slot-gpio.c  | 14 ++++++++++++--
 drivers/mmc/host/mmci.c       |  4 ++--
 drivers/mmc/host/sdhci-acpi.c |  2 +-
 include/linux/mmc/slot-gpio.h |  4 ++--
 5 files changed, 45 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c
index 31969436d77c..03c53b72a2d6 100644
--- a/drivers/mmc/core/host.c
+++ b/drivers/mmc/core/host.c
@@ -311,6 +311,7 @@ int mmc_of_parse(struct mmc_host *host)
 	struct device_node *np;
 	u32 bus_width;
 	int len, ret;
+	bool cap_invert, gpio_invert;
 
 	if (!host->parent || !host->parent->of_node)
 		return 0;
@@ -359,12 +360,15 @@ int mmc_of_parse(struct mmc_host *host)
 		host->caps |= MMC_CAP_NONREMOVABLE;
 	} else {
 		if (of_property_read_bool(np, "cd-inverted"))
-			host->caps2 |= MMC_CAP2_CD_ACTIVE_HIGH;
+			cap_invert = true;
+		else
+			cap_invert = false;
 
 		if (of_find_property(np, "broken-cd", &len))
 			host->caps |= MMC_CAP_NEEDS_POLL;
 
-		ret = mmc_gpiod_request_cd(host, "cd", 0, false, 0);
+		ret = mmc_gpiod_request_cd(host, "cd", 0, true,
+					   0, &gpio_invert);
 		if (ret) {
 			if (ret == -EPROBE_DEFER)
 				return ret;
@@ -375,13 +379,29 @@ int mmc_of_parse(struct mmc_host *host)
 			}
 		} else
 			dev_info(host->parent, "Got CD GPIO\n");
+
+		/*
+		 * There are two ways to flag that the CD line is inverted:
+		 * through the cd-inverted flag and by the GPIO line itself
+		 * being inverted from the GPIO subsystem. This is a leftover
+		 * from the times when the GPIO subsystem did not make it
+		 * possible to flag a line as inverted.
+		 *
+		 * If the capability on the host AND the GPIO line are
+		 * both inverted, the end result is that the CD line is
+		 * not inverted.
+		 */
+		if (cap_invert ^ gpio_invert)
+			host->caps2 |= MMC_CAP2_CD_ACTIVE_HIGH;
 	}
 
 	/* Parse Write Protection */
 	if (of_property_read_bool(np, "wp-inverted"))
-		host->caps2 |= MMC_CAP2_RO_ACTIVE_HIGH;
+		cap_invert = true;
+	else
+		cap_invert = false;
 
-	ret = mmc_gpiod_request_ro(host, "wp", 0, false, 0);
+	ret = mmc_gpiod_request_ro(host, "wp", 0, false, 0, &gpio_invert);
 	if (ret) {
 		if (ret == -EPROBE_DEFER)
 			goto out;
@@ -393,6 +413,10 @@ int mmc_of_parse(struct mmc_host *host)
 	} else
 		dev_info(host->parent, "Got WP GPIO\n");
 
+	/* See the comment on CD inversion above */
+	if (cap_invert ^ gpio_invert)
+		host->caps2 |= MMC_CAP2_RO_ACTIVE_HIGH;
+
 	if (of_find_property(np, "cap-sd-highspeed", &len))
 		host->caps |= MMC_CAP_SD_HIGHSPEED;
 	if (of_find_property(np, "cap-mmc-highspeed", &len))
diff --git a/drivers/mmc/core/slot-gpio.c b/drivers/mmc/core/slot-gpio.c
index 38f76555d4bf..69bbf2adb329 100644
--- a/drivers/mmc/core/slot-gpio.c
+++ b/drivers/mmc/core/slot-gpio.c
@@ -281,6 +281,8 @@ EXPORT_SYMBOL(mmc_gpio_free_cd);
  * @idx: index of the GPIO to obtain in the consumer
  * @override_active_level: ignore %GPIO_ACTIVE_LOW flag
  * @debounce: debounce time in microseconds
+ * @gpio_invert: will return whether the GPIO line is inverted or not, set
+ * to NULL to ignore
  *
  * Use this function in place of mmc_gpio_request_cd() to use the GPIO
  * descriptor API.  Note that it is paired with mmc_gpiod_free_cd() not
@@ -291,7 +293,7 @@ EXPORT_SYMBOL(mmc_gpio_free_cd);
  */
 int mmc_gpiod_request_cd(struct mmc_host *host, const char *con_id,
 			 unsigned int idx, bool override_active_level,
-			 unsigned int debounce)
+			 unsigned int debounce, bool *gpio_invert)
 {
 	struct mmc_gpio *ctx;
 	struct gpio_desc *desc;
@@ -316,6 +318,9 @@ int mmc_gpiod_request_cd(struct mmc_host *host, const char *con_id,
 			return ret;
 	}
 
+	if (gpio_invert)
+		*gpio_invert = !gpiod_is_active_low(desc);
+
 	ctx->override_cd_active_level = override_active_level;
 	ctx->cd_gpio = desc;
 
@@ -330,6 +335,8 @@ EXPORT_SYMBOL(mmc_gpiod_request_cd);
  * @idx: index of the GPIO to obtain in the consumer
  * @override_active_level: ignore %GPIO_ACTIVE_LOW flag
  * @debounce: debounce time in microseconds
+ * @gpio_invert: will return whether the GPIO line is inverted or not,
+ * set to NULL to ignore
  *
  * Use this function in place of mmc_gpio_request_ro() to use the GPIO
  * descriptor API.  Note that it is paired with mmc_gpiod_free_ro() not
@@ -339,7 +346,7 @@ EXPORT_SYMBOL(mmc_gpiod_request_cd);
  */
 int mmc_gpiod_request_ro(struct mmc_host *host, const char *con_id,
 			 unsigned int idx, bool override_active_level,
-			 unsigned int debounce)
+			 unsigned int debounce, bool *gpio_invert)
 {
 	struct mmc_gpio *ctx;
 	struct gpio_desc *desc;
@@ -364,6 +371,9 @@ int mmc_gpiod_request_ro(struct mmc_host *host, const char *con_id,
 			return ret;
 	}
 
+	if (gpio_invert)
+		*gpio_invert = !gpiod_is_active_low(desc);
+
 	ctx->override_ro_active_level = override_active_level;
 	ctx->ro_gpio = desc;
 
diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index c9dafed550f2..43af791e2e45 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -1682,7 +1682,7 @@ static int mmci_probe(struct amba_device *dev,
 	 * silently of these do not exist and proceed to try platform data
 	 */
 	if (!np) {
-		ret = mmc_gpiod_request_cd(mmc, "cd", 0, false, 0);
+		ret = mmc_gpiod_request_cd(mmc, "cd", 0, false, 0, NULL);
 		if (ret < 0) {
 			if (ret == -EPROBE_DEFER)
 				goto clk_disable;
@@ -1693,7 +1693,7 @@ static int mmci_probe(struct amba_device *dev,
 			}
 		}
 
-		ret = mmc_gpiod_request_ro(mmc, "wp", 0, false, 0);
+		ret = mmc_gpiod_request_ro(mmc, "wp", 0, false, 0, NULL);
 		if (ret < 0) {
 			if (ret == -EPROBE_DEFER)
 				goto clk_disable;
diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c
index 3483c089baa7..327bc24ec8ce 100644
--- a/drivers/mmc/host/sdhci-acpi.c
+++ b/drivers/mmc/host/sdhci-acpi.c
@@ -352,7 +352,7 @@ static int sdhci_acpi_probe(struct platform_device *pdev)
 	if (sdhci_acpi_flag(c, SDHCI_ACPI_SD_CD)) {
 		bool v = sdhci_acpi_flag(c, SDHCI_ACPI_SD_CD_OVERRIDE_LEVEL);
 
-		if (mmc_gpiod_request_cd(host->mmc, NULL, 0, v, 0)) {
+		if (mmc_gpiod_request_cd(host->mmc, NULL, 0, v, 0, NULL)) {
 			dev_warn(dev, "failed to setup card detect gpio\n");
 			c->use_runtime_pm = false;
 		}
diff --git a/include/linux/mmc/slot-gpio.h b/include/linux/mmc/slot-gpio.h
index a0d0442c15bf..e56fa24c9322 100644
--- a/include/linux/mmc/slot-gpio.h
+++ b/include/linux/mmc/slot-gpio.h
@@ -24,10 +24,10 @@ void mmc_gpio_free_cd(struct mmc_host *host);
 
 int mmc_gpiod_request_cd(struct mmc_host *host, const char *con_id,
 			 unsigned int idx, bool override_active_level,
-			 unsigned int debounce);
+			 unsigned int debounce, bool *gpio_invert);
 int mmc_gpiod_request_ro(struct mmc_host *host, const char *con_id,
 			 unsigned int idx, bool override_active_level,
-			 unsigned int debounce);
+			 unsigned int debounce, bool *gpio_invert);
 void mmc_gpiod_free_cd(struct mmc_host *host);
 void mmc_gpiod_request_cd_irq(struct mmc_host *host);
 
-- 
cgit v1.2.3


From 57384592c43375d2c9a14d82aebbdc95fdda9e9d Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Thu, 2 Oct 2014 11:50:25 +0200
Subject: iommu/vt-d: Store bus information in RMRR PCI device path

This will be used later to match broken RMRR entries.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/dmar.c | 1 +
 include/linux/dmar.h | 8 +++++++-
 2 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index 8ed55b0a1ce4..68da1ab0f2cd 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -155,6 +155,7 @@ dmar_alloc_pci_notify_info(struct pci_dev *dev, unsigned long event)
 	if (event == BUS_NOTIFY_ADD_DEVICE) {
 		for (tmp = dev; tmp; tmp = tmp->bus->self) {
 			level--;
+			info->path[level].bus = tmp->bus->number;
 			info->path[level].device = PCI_SLOT(tmp->devfn);
 			info->path[level].function = PCI_FUNC(tmp->devfn);
 			if (pci_is_root_bus(tmp->bus))
diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index 1deece46a0ca..593fff99e6bf 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -56,13 +56,19 @@ struct dmar_drhd_unit {
 	struct intel_iommu *iommu;
 };
 
+struct dmar_pci_path {
+	u8 bus;
+	u8 device;
+	u8 function;
+};
+
 struct dmar_pci_notify_info {
 	struct pci_dev			*dev;
 	unsigned long			event;
 	int				bus;
 	u16				seg;
 	u16				level;
-	struct acpi_dmar_pci_path	path[];
+	struct dmar_pci_path		path[];
 }  __attribute__((packed));
 
 extern struct rw_semaphore dmar_global_lock;
-- 
cgit v1.2.3


From 8acd91e8620836a56ff62028ed28ba629f2881a0 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 30 Sep 2014 15:26:00 +0200
Subject: locking/lockdep: Revert qrwlock recusive stuff

Commit f0bab73cb539 ("locking/lockdep: Restrict the use of recursive
read_lock() with qrwlock") changed lockdep to try and conform to the
qrwlock semantics which differ from the traditional rwlock semantics.

In particular qrwlock is fair outside of interrupt context, but in
interrupt context readers will ignore all fairness.

The problem modeling this is that read and write side have different
lock state (interrupts) semantics but we only have a single
representation of these. Therefore lockdep will get confused, thinking
the lock can cause interrupt lock inversions.

So revert it for now; the old rwlock semantics were already imperfectly
modeled and the qrwlock extra won't fit either.

If we want to properly fix this, I think we need to resurrect the work
by Gautham did a few years ago that split the read and write state of
locks:

   http://lwn.net/Articles/332801/

FWIW the locking selftest that would've failed (and was reported by
Borislav earlier) is something like:

  RL(X1);	/* IRQ-ON */
  LOCK(A);
  UNLOCK(A);
  RU(X1);

  IRQ_ENTER();
  RL(X1);	/* IN-IRQ */
  RU(X1);
  IRQ_EXIT();

At which point it would report that because A is an IRQ-unsafe lock we
can suffer the following inversion:

	CPU0		CPU1

	lock(A)
			lock(X1)
			lock(A)
	<IRQ>
	 lock(X1)

And this is 'wrong' because X1 can recurse (assuming the above lock are
in fact read-lock) but lockdep doesn't know about this.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Waiman Long <Waiman.Long@hp.com>
Cc: ego@linux.vnet.ibm.com
Cc: bp@alien8.de
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/20140930132600.GA7444@worktop.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/lockdep.h  | 10 +--------
 kernel/locking/lockdep.c |  6 ------
 lib/locking-selftest.c   | 56 ++++++------------------------------------------
 3 files changed, 8 insertions(+), 64 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index b5a84b62fb84..f388481201cd 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -478,24 +478,16 @@ static inline void print_irqtrace_events(struct task_struct *curr)
  * on the per lock-class debug mode:
  */
 
-/*
- * Read states in the 2-bit held_lock:read field:
- *  0: Exclusive lock
- *  1: Shareable lock, cannot be recursively called
- *  2: Shareable lock, can be recursively called
- *  3: Shareable lock, cannot be recursively called except in interrupt context
- */
 #define lock_acquire_exclusive(l, s, t, n, i)		lock_acquire(l, s, t, 0, 1, n, i)
 #define lock_acquire_shared(l, s, t, n, i)		lock_acquire(l, s, t, 1, 1, n, i)
 #define lock_acquire_shared_recursive(l, s, t, n, i)	lock_acquire(l, s, t, 2, 1, n, i)
-#define lock_acquire_shared_irecursive(l, s, t, n, i)	lock_acquire(l, s, t, 3, 1, n, i)
 
 #define spin_acquire(l, s, t, i)		lock_acquire_exclusive(l, s, t, NULL, i)
 #define spin_acquire_nest(l, s, t, n, i)	lock_acquire_exclusive(l, s, t, n, i)
 #define spin_release(l, n, i)			lock_release(l, n, i)
 
 #define rwlock_acquire(l, s, t, i)		lock_acquire_exclusive(l, s, t, NULL, i)
-#define rwlock_acquire_read(l, s, t, i)		lock_acquire_shared_irecursive(l, s, t, NULL, i)
+#define rwlock_acquire_read(l, s, t, i)		lock_acquire_shared_recursive(l, s, t, NULL, i)
 #define rwlock_release(l, n, i)			lock_release(l, n, i)
 
 #define seqcount_acquire(l, s, t, i)		lock_acquire_exclusive(l, s, t, NULL, i)
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 420ba685c4e5..88d0d4420ad2 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -3597,12 +3597,6 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
 	raw_local_irq_save(flags);
 	check_flags(flags);
 
-	/*
-	 * An interrupt recursive read in interrupt context can be considered
-	 * to be the same as a recursive read from checking perspective.
-	 */
-	if ((read == 3) && in_interrupt())
-		read = 2;
 	current->lockdep_recursion = 1;
 	trace_lock_acquire(lock, subclass, trylock, read, check, nest_lock, ip);
 	__lock_acquire(lock, subclass, trylock, read, check,
diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
index 62af709b2083..872a15a2a637 100644
--- a/lib/locking-selftest.c
+++ b/lib/locking-selftest.c
@@ -267,46 +267,19 @@ GENERATE_TESTCASE(AA_rsem)
 #undef E
 
 /*
- * Special-case for read-locking, they are not allowed to
- * recurse on the same lock class except under interrupt context:
+ * Special-case for read-locking, they are
+ * allowed to recurse on the same lock class:
  */
 static void rlock_AA1(void)
 {
 	RL(X1);
-	RL(X1); // this one should fail
+	RL(X1); // this one should NOT fail
 }
 
 static void rlock_AA1B(void)
 {
 	RL(X1);
-	RL(X2); // this one should fail
-}
-
-static void rlock_AHA1(void)
-{
-	RL(X1);
-	HARDIRQ_ENTER();
-	RL(X1);	// this one should NOT fail
-	HARDIRQ_EXIT();
-}
-
-static void rlock_AHA1B(void)
-{
-	RL(X1);
-	HARDIRQ_ENTER();
-	RL(X2);	// this one should NOT fail
-	HARDIRQ_EXIT();
-}
-
-static void rlock_ASAHA1(void)
-{
-	RL(X1);
-	SOFTIRQ_ENTER();
-	RL(X1);	// this one should NOT fail
-	HARDIRQ_ENTER();
-	RL(X1);	// this one should NOT fail
-	HARDIRQ_EXIT();
-	SOFTIRQ_EXIT();
+	RL(X2); // this one should NOT fail
 }
 
 static void rsem_AA1(void)
@@ -1096,7 +1069,7 @@ static inline void print_testname(const char *testname)
 	print_testname(desc);					\
 	dotest(name##_spin, FAILURE, LOCKTYPE_SPIN);		\
 	dotest(name##_wlock, FAILURE, LOCKTYPE_RWLOCK);		\
-	dotest(name##_rlock, FAILURE, LOCKTYPE_RWLOCK);		\
+	dotest(name##_rlock, SUCCESS, LOCKTYPE_RWLOCK);		\
 	dotest(name##_mutex, FAILURE, LOCKTYPE_MUTEX);		\
 	dotest(name##_wsem, FAILURE, LOCKTYPE_RWSEM);		\
 	dotest(name##_rsem, FAILURE, LOCKTYPE_RWSEM);		\
@@ -1857,14 +1830,14 @@ void locking_selftest(void)
 	printk("  --------------------------------------------------------------------------\n");
 	print_testname("recursive read-lock");
 	printk("             |");
-	dotest(rlock_AA1, FAILURE, LOCKTYPE_RWLOCK);
+	dotest(rlock_AA1, SUCCESS, LOCKTYPE_RWLOCK);
 	printk("             |");
 	dotest(rsem_AA1, FAILURE, LOCKTYPE_RWSEM);
 	printk("\n");
 
 	print_testname("recursive read-lock #2");
 	printk("             |");
-	dotest(rlock_AA1B, FAILURE, LOCKTYPE_RWLOCK);
+	dotest(rlock_AA1B, SUCCESS, LOCKTYPE_RWLOCK);
 	printk("             |");
 	dotest(rsem_AA1B, FAILURE, LOCKTYPE_RWSEM);
 	printk("\n");
@@ -1883,21 +1856,6 @@ void locking_selftest(void)
 	dotest(rsem_AA3, FAILURE, LOCKTYPE_RWSEM);
 	printk("\n");
 
-	print_testname("recursive rlock with interrupt");
-	printk("             |");
-	dotest(rlock_AHA1, SUCCESS, LOCKTYPE_RWLOCK);
-	printk("\n");
-
-	print_testname("recursive rlock with interrupt #2");
-	printk("             |");
-	dotest(rlock_AHA1B, SUCCESS, LOCKTYPE_RWLOCK);
-	printk("\n");
-
-	print_testname("recursive rlock with interrupt #3");
-	printk("             |");
-	dotest(rlock_ASAHA1, SUCCESS, LOCKTYPE_RWLOCK);
-	printk("\n");
-
 	printk("  --------------------------------------------------------------------------\n");
 
 	/*
-- 
cgit v1.2.3


From f14bb039a4e8206439d3e9abd92bc76bd142f243 Mon Sep 17 00:00:00 2001
From: Andy Grover <agrover@redhat.com>
Date: Wed, 1 Oct 2014 16:07:03 -0700
Subject: uio: Export definition of struct uio_device

In order to prevent a O(n) search of the filesystem to link up its uio
node with its target configuration, TCMU needs to know the minor number
that UIO assigned. Expose the definition of this struct so TCMU can
access this field.

Signed-off-by: Andy Grover <agrover@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/uio/uio.c          | 12 ------------
 include/linux/uio_driver.h | 12 +++++++++++-
 2 files changed, 11 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c
index a673e5b6a2e0..60fa6278fbce 100644
--- a/drivers/uio/uio.c
+++ b/drivers/uio/uio.c
@@ -28,18 +28,6 @@
 
 #define UIO_MAX_DEVICES		(1U << MINORBITS)
 
-struct uio_device {
-	struct module		*owner;
-	struct device		*dev;
-	int			minor;
-	atomic_t		event;
-	struct fasync_struct	*async_queue;
-	wait_queue_head_t	wait;
-	struct uio_info		*info;
-	struct kobject		*map_dir;
-	struct kobject		*portio_dir;
-};
-
 static int uio_major;
 static struct cdev *uio_cdev;
 static DEFINE_IDR(uio_idr);
diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h
index 1ad4724458de..baa81718d985 100644
--- a/include/linux/uio_driver.h
+++ b/include/linux/uio_driver.h
@@ -63,7 +63,17 @@ struct uio_port {
 
 #define MAX_UIO_PORT_REGIONS	5
 
-struct uio_device;
+struct uio_device {
+        struct module           *owner;
+        struct device           *dev;
+        int                     minor;
+        atomic_t                event;
+        struct fasync_struct    *async_queue;
+        wait_queue_head_t       wait;
+        struct uio_info         *info;
+        struct kobject          *map_dir;
+        struct kobject          *portio_dir;
+};
 
 /**
  * struct uio_info - UIO device capabilities
-- 
cgit v1.2.3


From 615413979487a1e25a3b76abbaa316280ca19d26 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 24 Sep 2014 10:27:27 +0300
Subject: mmc: sdhci: Add quirk for always getting TC with stop cmd

Add a quirk for a host controller that always sets
a Transfer Complete interrupt status for the stop
command even when a busy response is not indicated.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci.c  | 8 ++++++--
 include/linux/mmc/sdhci.h | 2 ++
 2 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index cbd433712756..a932f70bcc51 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -2225,7 +2225,7 @@ static void sdhci_tuning_timer(unsigned long data)
  *                                                                           *
 \*****************************************************************************/
 
-static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask)
+static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask, u32 *mask)
 {
 	BUG_ON(intmask == 0);
 
@@ -2272,6 +2272,9 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask)
 
 		/* The controller does not support the end-of-busy IRQ,
 		 * fall through and take the SDHCI_INT_RESPONSE */
+	} else if ((host->quirks2 & SDHCI_QUIRK2_STOP_WITH_TC) &&
+		   host->cmd->opcode == MMC_STOP_TRANSMISSION && !host->data) {
+		*mask &= ~SDHCI_INT_DATA_END;
 	}
 
 	if (intmask & SDHCI_INT_RESPONSE)
@@ -2481,7 +2484,8 @@ static irqreturn_t sdhci_irq(int irq, void *dev_id)
 		}
 
 		if (intmask & SDHCI_INT_CMD_MASK)
-			sdhci_cmd_irq(host, intmask & SDHCI_INT_CMD_MASK);
+			sdhci_cmd_irq(host, intmask & SDHCI_INT_CMD_MASK,
+				      &intmask);
 
 		if (intmask & SDHCI_INT_DATA_MASK)
 			sdhci_data_irq(host, intmask & SDHCI_INT_DATA_MASK);
diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h
index 0aa85ca0c788..dba793e3a331 100644
--- a/include/linux/mmc/sdhci.h
+++ b/include/linux/mmc/sdhci.h
@@ -98,6 +98,8 @@ struct sdhci_host {
 #define SDHCI_QUIRK2_BROKEN_HS200			(1<<6)
 /* Controller does not support DDR50 */
 #define SDHCI_QUIRK2_BROKEN_DDR50			(1<<7)
+/* Stop command (CMD12) can set Transfer Complete when not using MMC_RSP_BUSY */
+#define SDHCI_QUIRK2_STOP_WITH_TC			(1<<8)
 
 	int irq;		/* Device IRQ */
 	void __iomem *ioaddr;	/* Mapped address */
-- 
cgit v1.2.3


From f39cb1797ec1094b196d3dab44a7ca6060813d38 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Thu, 2 Oct 2014 21:12:34 +0200
Subject: PM / Domains: Rename cpu_data to cpuidle_data

The "cpu_data" are defined for some archs and thus conflicting with the
"cpu_data" member in the struct gpd_cpu_data. This causes a compiler
error for those archs.

Let's fix it by rename the member to cpuidle_data. In this context it
also seems appropriate to rename the struct to gpd_cpuidle_data to
better reflect its use.

Fixes: f48c767ce895 (PM / Domains: Move dev_pm_domain_attach|detach() to pm_domain.h)
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 54 ++++++++++++++++++++++-----------------------
 include/linux/pm_domain.h   |  4 ++--
 2 files changed, 29 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 18cc68d058d6..40bc2f4072cc 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -142,13 +142,13 @@ static void genpd_recalc_cpu_exit_latency(struct generic_pm_domain *genpd)
 {
 	s64 usecs64;
 
-	if (!genpd->cpu_data)
+	if (!genpd->cpuidle_data)
 		return;
 
 	usecs64 = genpd->power_on_latency_ns;
 	do_div(usecs64, NSEC_PER_USEC);
-	usecs64 += genpd->cpu_data->saved_exit_latency;
-	genpd->cpu_data->idle_state->exit_latency = usecs64;
+	usecs64 += genpd->cpuidle_data->saved_exit_latency;
+	genpd->cpuidle_data->idle_state->exit_latency = usecs64;
 }
 
 /**
@@ -188,9 +188,9 @@ static int __pm_genpd_poweron(struct generic_pm_domain *genpd)
 		return 0;
 	}
 
-	if (genpd->cpu_data) {
+	if (genpd->cpuidle_data) {
 		cpuidle_pause_and_lock();
-		genpd->cpu_data->idle_state->disabled = true;
+		genpd->cpuidle_data->idle_state->disabled = true;
 		cpuidle_resume_and_unlock();
 		goto out;
 	}
@@ -513,17 +513,17 @@ static int pm_genpd_poweroff(struct generic_pm_domain *genpd)
 		}
 	}
 
-	if (genpd->cpu_data) {
+	if (genpd->cpuidle_data) {
 		/*
-		 * If cpu_data is set, cpuidle should turn the domain off when
-		 * the CPU in it is idle.  In that case we don't decrement the
-		 * subdomain counts of the master domains, so that power is not
-		 * removed from the current domain prematurely as a result of
-		 * cutting off the masters' power.
+		 * If cpuidle_data is set, cpuidle should turn the domain off
+		 * when the CPU in it is idle.  In that case we don't decrement
+		 * the subdomain counts of the master domains, so that power is
+		 * not removed from the current domain prematurely as a result
+		 * of cutting off the masters' power.
 		 */
 		genpd->status = GPD_STATE_POWER_OFF;
 		cpuidle_pause_and_lock();
-		genpd->cpu_data->idle_state->disabled = false;
+		genpd->cpuidle_data->idle_state->disabled = false;
 		cpuidle_resume_and_unlock();
 		goto out;
 	}
@@ -1698,7 +1698,7 @@ int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd,
 int pm_genpd_attach_cpuidle(struct generic_pm_domain *genpd, int state)
 {
 	struct cpuidle_driver *cpuidle_drv;
-	struct gpd_cpu_data *cpu_data;
+	struct gpd_cpuidle_data *cpuidle_data;
 	struct cpuidle_state *idle_state;
 	int ret = 0;
 
@@ -1707,12 +1707,12 @@ int pm_genpd_attach_cpuidle(struct generic_pm_domain *genpd, int state)
 
 	genpd_acquire_lock(genpd);
 
-	if (genpd->cpu_data) {
+	if (genpd->cpuidle_data) {
 		ret = -EEXIST;
 		goto out;
 	}
-	cpu_data = kzalloc(sizeof(*cpu_data), GFP_KERNEL);
-	if (!cpu_data) {
+	cpuidle_data = kzalloc(sizeof(*cpuidle_data), GFP_KERNEL);
+	if (!cpuidle_data) {
 		ret = -ENOMEM;
 		goto out;
 	}
@@ -1730,9 +1730,9 @@ int pm_genpd_attach_cpuidle(struct generic_pm_domain *genpd, int state)
 		ret = -EAGAIN;
 		goto err;
 	}
-	cpu_data->idle_state = idle_state;
-	cpu_data->saved_exit_latency = idle_state->exit_latency;
-	genpd->cpu_data = cpu_data;
+	cpuidle_data->idle_state = idle_state;
+	cpuidle_data->saved_exit_latency = idle_state->exit_latency;
+	genpd->cpuidle_data = cpuidle_data;
 	genpd_recalc_cpu_exit_latency(genpd);
 
  out:
@@ -1743,7 +1743,7 @@ int pm_genpd_attach_cpuidle(struct generic_pm_domain *genpd, int state)
 	cpuidle_driver_unref();
 
  err_drv:
-	kfree(cpu_data);
+	kfree(cpuidle_data);
 	goto out;
 }
 
@@ -1766,7 +1766,7 @@ int pm_genpd_name_attach_cpuidle(const char *name, int state)
  */
 int pm_genpd_detach_cpuidle(struct generic_pm_domain *genpd)
 {
-	struct gpd_cpu_data *cpu_data;
+	struct gpd_cpuidle_data *cpuidle_data;
 	struct cpuidle_state *idle_state;
 	int ret = 0;
 
@@ -1775,20 +1775,20 @@ int pm_genpd_detach_cpuidle(struct generic_pm_domain *genpd)
 
 	genpd_acquire_lock(genpd);
 
-	cpu_data = genpd->cpu_data;
-	if (!cpu_data) {
+	cpuidle_data = genpd->cpuidle_data;
+	if (!cpuidle_data) {
 		ret = -ENODEV;
 		goto out;
 	}
-	idle_state = cpu_data->idle_state;
+	idle_state = cpuidle_data->idle_state;
 	if (!idle_state->disabled) {
 		ret = -EAGAIN;
 		goto out;
 	}
-	idle_state->exit_latency = cpu_data->saved_exit_latency;
+	idle_state->exit_latency = cpuidle_data->saved_exit_latency;
 	cpuidle_driver_unref();
-	genpd->cpu_data = NULL;
-	kfree(cpu_data);
+	genpd->cpuidle_data = NULL;
+	kfree(cpuidle_data);
 
  out:
 	genpd_release_lock(genpd);
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 900474317afc..73e938b7e937 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -38,7 +38,7 @@ struct gpd_dev_ops {
 	bool (*active_wakeup)(struct device *dev);
 };
 
-struct gpd_cpu_data {
+struct gpd_cpuidle_data {
 	unsigned int saved_exit_latency;
 	struct cpuidle_state *idle_state;
 };
@@ -71,7 +71,7 @@ struct generic_pm_domain {
 	s64 max_off_time_ns;	/* Maximum allowed "suspended" time. */
 	bool max_off_time_changed;
 	bool cached_power_down_ok;
-	struct gpd_cpu_data *cpu_data;
+	struct gpd_cpuidle_data *cpuidle_data;
 	void (*attach_dev)(struct device *dev);
 	void (*detach_dev)(struct device *dev);
 };
-- 
cgit v1.2.3


From 5a17dae422d7de4b776a9753cd4673a343a25b4b Mon Sep 17 00:00:00 2001
From: Matt Fleming <matt.fleming@intel.com>
Date: Tue, 5 Aug 2014 11:52:11 +0100
Subject: efi: Add efi= parameter parsing to the EFI boot stub

We need a way to customize the behaviour of the EFI boot stub, in
particular, we need a way to disable the "chunking" workaround, used
when reading files from the EFI System Partition.

One of my machines doesn't cope well when reading files in 1MB chunks to
a buffer above the 4GB mark - it appears that the "chunking" bug
workaround triggers another firmware bug. This was only discovered with
commit 4bf7111f5016 ("x86/efi: Support initrd loaded above 4G"), and
that commit is perfectly valid. The symptom I observed was a corrupt
initrd rather than any kind of crash.

efi= is now used to specify EFI parameters in two very different
execution environments, the EFI boot stub and during kernel boot.

There is also a slight performance optimization by enabling efi=nochunk,
but that's offset by the fact that you're more likely to run into
firmware issues, at least on x86. This is the rationale behind leaving
the workaround enabled by default.

Also provide some documentation for EFI_READ_CHUNK_SIZE and why we're
using the current value of 1MB.

Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Roy Franz <roy.franz@linaro.org>
Cc: Maarten Lankhorst <m.b.lankhorst@gmail.com>
Cc: Leif Lindholm <leif.lindholm@linaro.org>
Cc: Borislav Petkov <bp@suse.de>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 Documentation/kernel-parameters.txt            |  5 ++-
 arch/x86/boot/compressed/eboot.c               |  4 ++
 arch/x86/platform/efi/efi.c                    | 19 +++++++-
 drivers/firmware/efi/libstub/arm-stub.c        |  4 ++
 drivers/firmware/efi/libstub/efi-stub-helper.c | 62 +++++++++++++++++++++++++-
 include/linux/efi.h                            |  2 +
 6 files changed, 91 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 5ae8608ca9f5..67d79597d9d6 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -992,10 +992,13 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			Format: {"off" | "on" | "skip[mbr]"}
 
 	efi=		[EFI]
-			Format: { "old_map" }
+			Format: { "old_map", "nochunk" }
 			old_map [X86-64]: switch to the old ioremap-based EFI
 			runtime services mapping. 32-bit still uses this one by
 			default.
+			nochunk: disable reading files in "chunks" in the EFI
+			boot stub, as chunking can cause problems with some
+			firmware implementations.
 
 	efi_no_storage_paranoia [EFI; X86]
 			Using this parameter you can use more than 50% of
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index f277184e2ac1..f4bdab1dbf66 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -1100,6 +1100,10 @@ struct boot_params *make_boot_params(struct efi_config *c)
 	else
 		initrd_addr_max = hdr->initrd_addr_max;
 
+	status = efi_parse_options(cmdline_ptr);
+	if (status != EFI_SUCCESS)
+		goto fail2;
+
 	status = handle_cmdline_files(sys_table, image,
 				      (char *)(unsigned long)hdr->cmd_line_ptr,
 				      "initrd=", initrd_addr_max,
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 850da94fef30..a1f745b0bf1d 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -943,8 +943,23 @@ static int __init parse_efi_cmdline(char *str)
 	if (*str == '=')
 		str++;
 
-	if (!strncmp(str, "old_map", 7))
-		set_bit(EFI_OLD_MEMMAP, &efi.flags);
+	while (*str) {
+		if (!strncmp(str, "old_map", 7)) {
+			set_bit(EFI_OLD_MEMMAP, &efi.flags);
+			str += strlen("old_map");
+		}
+
+		/*
+		 * Skip any options we don't understand. Presumably
+		 * they apply to the EFI boot stub.
+		 */
+		while (*str && *str != ',')
+			str++;
+
+		/* If we hit a delimiter, skip it */
+		if (*str == ',')
+			str++;
+	}
 
 	return 0;
 }
diff --git a/drivers/firmware/efi/libstub/arm-stub.c b/drivers/firmware/efi/libstub/arm-stub.c
index 480339b6b110..75ee05964cbc 100644
--- a/drivers/firmware/efi/libstub/arm-stub.c
+++ b/drivers/firmware/efi/libstub/arm-stub.c
@@ -226,6 +226,10 @@ unsigned long __init efi_entry(void *handle, efi_system_table_t *sys_table,
 		goto fail_free_image;
 	}
 
+	status = efi_parse_options(cmdline_ptr);
+	if (status != EFI_SUCCESS)
+		pr_efi_err(sys_table, "Failed to parse EFI cmdline options\n");
+
 	/*
 	 * Unauthenticated device tree data is a security hazard, so
 	 * ignore 'dtb=' unless UEFI Secure Boot is disabled.
diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c
index 32d5cca30f49..a920fec8fe88 100644
--- a/drivers/firmware/efi/libstub/efi-stub-helper.c
+++ b/drivers/firmware/efi/libstub/efi-stub-helper.c
@@ -15,8 +15,23 @@
 
 #include "efistub.h"
 
+/*
+ * Some firmware implementations have problems reading files in one go.
+ * A read chunk size of 1MB seems to work for most platforms.
+ *
+ * Unfortunately, reading files in chunks triggers *other* bugs on some
+ * platforms, so we provide a way to disable this workaround, which can
+ * be done by passing "efi=nochunk" on the EFI boot stub command line.
+ *
+ * If you experience issues with initrd images being corrupt it's worth
+ * trying efi=nochunk, but chunking is enabled by default because there
+ * are far more machines that require the workaround than those that
+ * break with it enabled.
+ */
 #define EFI_READ_CHUNK_SIZE	(1024 * 1024)
 
+static unsigned long __chunk_size = EFI_READ_CHUNK_SIZE;
+
 struct file_info {
 	efi_file_handle_t *handle;
 	u64 size;
@@ -281,6 +296,49 @@ void efi_free(efi_system_table_t *sys_table_arg, unsigned long size,
 	efi_call_early(free_pages, addr, nr_pages);
 }
 
+/*
+ * Parse the ASCII string 'cmdline' for EFI options, denoted by the efi=
+ * option, e.g. efi=nochunk.
+ *
+ * It should be noted that efi= is parsed in two very different
+ * environments, first in the early boot environment of the EFI boot
+ * stub, and subsequently during the kernel boot.
+ */
+efi_status_t efi_parse_options(char *cmdline)
+{
+	char *str;
+
+	/*
+	 * If no EFI parameters were specified on the cmdline we've got
+	 * nothing to do.
+	 */
+	str = strstr(cmdline, "efi=");
+	if (!str)
+		return EFI_SUCCESS;
+
+	/* Skip ahead to first argument */
+	str += strlen("efi=");
+
+	/*
+	 * Remember, because efi= is also used by the kernel we need to
+	 * skip over arguments we don't understand.
+	 */
+	while (*str) {
+		if (!strncmp(str, "nochunk", 7)) {
+			str += strlen("nochunk");
+			__chunk_size = -1UL;
+		}
+
+		/* Group words together, delimited by "," */
+		while (*str && *str != ',')
+			str++;
+
+		if (*str == ',')
+			str++;
+	}
+
+	return EFI_SUCCESS;
+}
 
 /*
  * Check the cmdline for a LILO-style file= arguments.
@@ -423,8 +481,8 @@ efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg,
 			size = files[j].size;
 			while (size) {
 				unsigned long chunksize;
-				if (size > EFI_READ_CHUNK_SIZE)
-					chunksize = EFI_READ_CHUNK_SIZE;
+				if (size > __chunk_size)
+					chunksize = __chunk_size;
 				else
 					chunksize = size;
 
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 45cb4ffdea62..518779fb5e90 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -1227,4 +1227,6 @@ efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg,
 				  unsigned long *load_addr,
 				  unsigned long *load_size);
 
+efi_status_t efi_parse_options(char *cmdline);
+
 #endif /* _LINUX_EFI_H */
-- 
cgit v1.2.3


From b2e0a54a1296a91b800f316df7bef7d1905e4fd0 Mon Sep 17 00:00:00 2001
From: Dave Young <dyoung@redhat.com>
Date: Thu, 14 Aug 2014 17:15:26 +0800
Subject: efi: Move noefi early param code out of x86 arch code

noefi param can be used for arches other than X86 later, thus move it
out of x86 platform code.

Signed-off-by: Dave Young <dyoung@redhat.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 Documentation/kernel-parameters.txt |  2 +-
 arch/x86/platform/efi/efi.c         | 10 +---------
 drivers/firmware/efi/efi.c          | 13 +++++++++++++
 include/linux/efi.h                 |  1 +
 4 files changed, 16 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 67d79597d9d6..08df275eee2b 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2169,7 +2169,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 
 	nodsp		[SH] Disable hardware DSP at boot time.
 
-	noefi		[X86] Disable EFI runtime services support.
+	noefi		Disable EFI runtime services support.
 
 	noexec		[IA-64]
 
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index a1f745b0bf1d..c90d3cd2728c 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -70,14 +70,6 @@ static efi_config_table_type_t arch_tables[] __initdata = {
 
 u64 efi_setup;		/* efi setup_data physical address */
 
-static bool disable_runtime __initdata = false;
-static int __init setup_noefi(char *arg)
-{
-	disable_runtime = true;
-	return 0;
-}
-early_param("noefi", setup_noefi);
-
 int add_efi_memmap;
 EXPORT_SYMBOL(add_efi_memmap);
 
@@ -492,7 +484,7 @@ void __init efi_init(void)
 	if (!efi_runtime_supported())
 		pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n");
 	else {
-		if (disable_runtime || efi_runtime_init())
+		if (efi_runtime_disabled() || efi_runtime_init())
 			return;
 	}
 	if (efi_memmap_init())
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 64ecbb501c50..c8f01a73edb5 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -41,6 +41,19 @@ struct efi __read_mostly efi = {
 };
 EXPORT_SYMBOL(efi);
 
+static bool disable_runtime;
+static int __init setup_noefi(char *arg)
+{
+	disable_runtime = true;
+	return 0;
+}
+early_param("noefi", setup_noefi);
+
+bool efi_runtime_disabled(void)
+{
+	return disable_runtime;
+}
+
 static struct kobject *efi_kobj;
 static struct kobject *efivars_kobj;
 
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 518779fb5e90..4812ed0b0374 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -1229,4 +1229,5 @@ efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg,
 
 efi_status_t efi_parse_options(char *cmdline);
 
+bool efi_runtime_disabled(void);
 #endif /* _LINUX_EFI_H */
-- 
cgit v1.2.3


From 6ccc72b87b83ece31c2a75bbe07f440b0378f7a9 Mon Sep 17 00:00:00 2001
From: Dave Young <dyoung@redhat.com>
Date: Thu, 14 Aug 2014 17:15:27 +0800
Subject: lib: Add a generic cmdline parse function parse_option_str

There should be a generic function to parse params like a=b,c
Adding parse_option_str in lib/cmdline.c which will return true
if there's specified option set in the params.

Also updated efi=old_map parsing code to use the new function

Signed-off-by: Dave Young <dyoung@redhat.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 arch/x86/platform/efi/efi.c | 22 ++--------------------
 include/linux/kernel.h      |  1 +
 lib/cmdline.c               | 29 +++++++++++++++++++++++++++++
 3 files changed, 32 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index c90d3cd2728c..c73a7df5d37f 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -932,26 +932,8 @@ u64 efi_mem_attributes(unsigned long phys_addr)
 
 static int __init parse_efi_cmdline(char *str)
 {
-	if (*str == '=')
-		str++;
-
-	while (*str) {
-		if (!strncmp(str, "old_map", 7)) {
-			set_bit(EFI_OLD_MEMMAP, &efi.flags);
-			str += strlen("old_map");
-		}
-
-		/*
-		 * Skip any options we don't understand. Presumably
-		 * they apply to the EFI boot stub.
-		 */
-		while (*str && *str != ',')
-			str++;
-
-		/* If we hit a delimiter, skip it */
-		if (*str == ',')
-			str++;
-	}
+	if (parse_option_str(str, "old_map"))
+		set_bit(EFI_OLD_MEMMAP, &efi.flags);
 
 	return 0;
 }
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 95624bed87ef..f66427ef0628 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -407,6 +407,7 @@ int vsscanf(const char *, const char *, va_list);
 extern int get_option(char **str, int *pint);
 extern char *get_options(const char *str, int nints, int *ints);
 extern unsigned long long memparse(const char *ptr, char **retptr);
+extern bool parse_option_str(const char *str, const char *option);
 
 extern int core_kernel_text(unsigned long addr);
 extern int core_kernel_data(unsigned long addr);
diff --git a/lib/cmdline.c b/lib/cmdline.c
index 76a712e6e20e..8f13cf73c2ec 100644
--- a/lib/cmdline.c
+++ b/lib/cmdline.c
@@ -160,3 +160,32 @@ unsigned long long memparse(const char *ptr, char **retptr)
 	return ret;
 }
 EXPORT_SYMBOL(memparse);
+
+/**
+ *	parse_option_str - Parse a string and check an option is set or not
+ *	@str: String to be parsed
+ *	@option: option name
+ *
+ *	This function parses a string containing a comma-separated list of
+ *	strings like a=b,c.
+ *
+ *	Return true if there's such option in the string, or return false.
+ */
+bool parse_option_str(const char *str, const char *option)
+{
+	while (*str) {
+		if (!strncmp(str, option, strlen(option))) {
+			str += strlen(option);
+			if (!*str || *str == ',')
+				return true;
+		}
+
+		while (*str && *str != ',')
+			str++;
+
+		if (*str == ',')
+			str++;
+	}
+
+	return false;
+}
-- 
cgit v1.2.3


From 9c97e0bdd4b4ae44577a1b1ec949e782084e9a78 Mon Sep 17 00:00:00 2001
From: Laszlo Ersek <lersek@redhat.com>
Date: Wed, 3 Sep 2014 13:32:19 +0200
Subject: efi: Add macro for EFI_MEMORY_UCE memory attribute

Add the following macro from the UEFI spec, for completeness:

  EFI_MEMORY_UCE  Memory cacheability attribute: The memory region
                  supports being configured as not cacheable, exported,
                  and supports the "fetch and add" semaphore mechanism.

Signed-off-by: Laszlo Ersek <lersek@redhat.com>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 include/linux/efi.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/efi.h b/include/linux/efi.h
index 4812ed0b0374..7464032ae00a 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -92,6 +92,7 @@ typedef	struct {
 #define EFI_MEMORY_WC		((u64)0x0000000000000002ULL)	/* write-coalescing */
 #define EFI_MEMORY_WT		((u64)0x0000000000000004ULL)	/* write-through */
 #define EFI_MEMORY_WB		((u64)0x0000000000000008ULL)	/* write-back */
+#define EFI_MEMORY_UCE		((u64)0x0000000000000010ULL)	/* uncached, exported */
 #define EFI_MEMORY_WP		((u64)0x0000000000001000ULL)	/* write-protect */
 #define EFI_MEMORY_RP		((u64)0x0000000000002000ULL)	/* read-protect */
 #define EFI_MEMORY_XP		((u64)0x0000000000004000ULL)	/* execute-protect */
-- 
cgit v1.2.3


From 98d2a6ca14520904a47c46258d3bad02ffcd3f96 Mon Sep 17 00:00:00 2001
From: Laszlo Ersek <lersek@redhat.com>
Date: Wed, 3 Sep 2014 13:32:20 +0200
Subject: efi: Introduce efi_md_typeattr_format()

At the moment, there are three architectures debug-printing the EFI memory
map at initialization: x86, ia64, and arm64. They all use different format
strings, plus the EFI memory type and the EFI memory attributes are
similarly hard to decode for a human reader.

Introduce a helper __init function that formats the memory type and the
memory attributes in a unified way, to a user-provided character buffer.

The array "memory_type_name" is copied from the arm64 code, temporarily
duplicating it. The (otherwise optional) braces around each string literal
in the initializer list are dropped in order to match the kernel coding
style more closely. The element size is tightened from 32 to 20 bytes
(maximum actual string length + 1) so that we can derive the field width
from the element size.

Signed-off-by: Laszlo Ersek <lersek@redhat.com>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
[ Dropped useless 'register' keyword, which compiler will ignore ]
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 drivers/firmware/efi/efi.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/efi.h        |  7 ++++++
 2 files changed, 64 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index cebfa36a27ae..8590099ac148 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -445,3 +445,60 @@ int __init efi_get_fdt_params(struct efi_fdt_params *params, int verbose)
 	return ret;
 }
 #endif /* CONFIG_EFI_PARAMS_FROM_FDT */
+
+static __initdata char memory_type_name[][20] = {
+	"Reserved",
+	"Loader Code",
+	"Loader Data",
+	"Boot Code",
+	"Boot Data",
+	"Runtime Code",
+	"Runtime Data",
+	"Conventional Memory",
+	"Unusable Memory",
+	"ACPI Reclaim Memory",
+	"ACPI Memory NVS",
+	"Memory Mapped I/O",
+	"MMIO Port Space",
+	"PAL Code"
+};
+
+char * __init efi_md_typeattr_format(char *buf, size_t size,
+				     const efi_memory_desc_t *md)
+{
+	char *pos;
+	int type_len;
+	u64 attr;
+
+	pos = buf;
+	if (md->type >= ARRAY_SIZE(memory_type_name))
+		type_len = snprintf(pos, size, "[type=%u", md->type);
+	else
+		type_len = snprintf(pos, size, "[%-*s",
+				    (int)(sizeof(memory_type_name[0]) - 1),
+				    memory_type_name[md->type]);
+	if (type_len >= size)
+		return buf;
+
+	pos += type_len;
+	size -= type_len;
+
+	attr = md->attribute;
+	if (attr & ~(EFI_MEMORY_UC | EFI_MEMORY_WC | EFI_MEMORY_WT |
+		     EFI_MEMORY_WB | EFI_MEMORY_UCE | EFI_MEMORY_WP |
+		     EFI_MEMORY_RP | EFI_MEMORY_XP | EFI_MEMORY_RUNTIME))
+		snprintf(pos, size, "|attr=0x%016llx]",
+			 (unsigned long long)attr);
+	else
+		snprintf(pos, size, "|%3s|%2s|%2s|%2s|%3s|%2s|%2s|%2s|%2s]",
+			 attr & EFI_MEMORY_RUNTIME ? "RUN" : "",
+			 attr & EFI_MEMORY_XP      ? "XP"  : "",
+			 attr & EFI_MEMORY_RP      ? "RP"  : "",
+			 attr & EFI_MEMORY_WP      ? "WP"  : "",
+			 attr & EFI_MEMORY_UCE     ? "UCE" : "",
+			 attr & EFI_MEMORY_WB      ? "WB"  : "",
+			 attr & EFI_MEMORY_WT      ? "WT"  : "",
+			 attr & EFI_MEMORY_WC      ? "WC"  : "",
+			 attr & EFI_MEMORY_UC      ? "UC"  : "");
+	return buf;
+}
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 7464032ae00a..78b29b133e14 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -887,6 +887,13 @@ extern bool efi_poweroff_required(void);
 	     (md) <= (efi_memory_desc_t *)((m)->map_end - (m)->desc_size); \
 	     (md) = (void *)(md) + (m)->desc_size)
 
+/*
+ * Format an EFI memory descriptor's type and attributes to a user-provided
+ * character buffer, as per snprintf(), and return the buffer.
+ */
+char * __init efi_md_typeattr_format(char *buf, size_t size,
+				     const efi_memory_desc_t *md);
+
 /**
  * efi_range_is_wc - check the WC bit on an address range
  * @start: starting kvirt address
-- 
cgit v1.2.3


From 6d80dba1c9fe4316ef626980102b92fa30c7845a Mon Sep 17 00:00:00 2001
From: Matt Fleming <matt.fleming@intel.com>
Date: Tue, 30 Sep 2014 21:58:52 +0100
Subject: efi: Provide a non-blocking SetVariable() operation

There are some circumstances that call for trying to write an EFI
variable in a non-blocking way. One such scenario is when writing pstore
data in efi_pstore_write() via the pstore_dump() kdump callback.

Now that we have an EFI runtime spinlock we need a way of aborting if
there is contention instead of spinning, since when writing pstore data
from the kdump callback, the runtime lock may already be held by the CPU
that's running the callback if we crashed in the middle of an EFI
variable operation.

The situation is sufficiently special that a new EFI variable operation
is warranted.

Introduce ->set_variable_nonblocking() for this use case. It is an
optional EFI backend operation, and need only be implemented by those
backends that usually acquire locks to serialize access to EFI
variables, as is the case for virt_efi_set_variable() where we now grab
the EFI runtime spinlock.

Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Matthew Garrett <mjg59@srcf.ucam.org>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 drivers/firmware/efi/runtime-wrappers.c | 19 +++++++++++++
 drivers/firmware/efi/vars.c             | 47 +++++++++++++++++++++++++++++++++
 include/linux/efi.h                     |  6 +++++
 3 files changed, 72 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c
index 9694cba665c4..4349206198b2 100644
--- a/drivers/firmware/efi/runtime-wrappers.c
+++ b/drivers/firmware/efi/runtime-wrappers.c
@@ -200,6 +200,24 @@ static efi_status_t virt_efi_set_variable(efi_char16_t *name,
 	return status;
 }
 
+static efi_status_t
+virt_efi_set_variable_nonblocking(efi_char16_t *name, efi_guid_t *vendor,
+				  u32 attr, unsigned long data_size,
+				  void *data)
+{
+	unsigned long flags;
+	efi_status_t status;
+
+	if (!spin_trylock_irqsave(&efi_runtime_lock, flags))
+		return EFI_NOT_READY;
+
+	status = efi_call_virt(set_variable, name, vendor, attr, data_size,
+			       data);
+	spin_unlock_irqrestore(&efi_runtime_lock, flags);
+	return status;
+}
+
+
 static efi_status_t virt_efi_query_variable_info(u32 attr,
 						 u64 *storage_space,
 						 u64 *remaining_space,
@@ -287,6 +305,7 @@ void efi_native_runtime_setup(void)
 	efi.get_variable = virt_efi_get_variable;
 	efi.get_next_variable = virt_efi_get_next_variable;
 	efi.set_variable = virt_efi_set_variable;
+	efi.set_variable_nonblocking = virt_efi_set_variable_nonblocking;
 	efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count;
 	efi.reset_system = virt_efi_reset_system;
 	efi.query_variable_info = virt_efi_query_variable_info;
diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c
index 1fa724f31b0e..fa3c66bdc1e5 100644
--- a/drivers/firmware/efi/vars.c
+++ b/drivers/firmware/efi/vars.c
@@ -595,6 +595,39 @@ int efivar_entry_set(struct efivar_entry *entry, u32 attributes,
 }
 EXPORT_SYMBOL_GPL(efivar_entry_set);
 
+/*
+ * efivar_entry_set_nonblocking - call set_variable_nonblocking()
+ *
+ * This function is guaranteed to not block and is suitable for calling
+ * from crash/panic handlers.
+ *
+ * Crucially, this function will not block if it cannot acquire
+ * __efivars->lock. Instead, it returns -EBUSY.
+ */
+static int
+efivar_entry_set_nonblocking(efi_char16_t *name, efi_guid_t vendor,
+			     u32 attributes, unsigned long size, void *data)
+{
+	const struct efivar_operations *ops = __efivars->ops;
+	unsigned long flags;
+	efi_status_t status;
+
+	if (!spin_trylock_irqsave(&__efivars->lock, flags))
+		return -EBUSY;
+
+	status = check_var_size(attributes, size + ucs2_strsize(name, 1024));
+	if (status != EFI_SUCCESS) {
+		spin_unlock_irqrestore(&__efivars->lock, flags);
+		return -ENOSPC;
+	}
+
+	status = ops->set_variable_nonblocking(name, &vendor, attributes,
+					       size, data);
+
+	spin_unlock_irqrestore(&__efivars->lock, flags);
+	return efi_status_to_err(status);
+}
+
 /**
  * efivar_entry_set_safe - call set_variable() if enough space in firmware
  * @name: buffer containing the variable name
@@ -622,6 +655,20 @@ int efivar_entry_set_safe(efi_char16_t *name, efi_guid_t vendor, u32 attributes,
 	if (!ops->query_variable_store)
 		return -ENOSYS;
 
+	/*
+	 * If the EFI variable backend provides a non-blocking
+	 * ->set_variable() operation and we're in a context where we
+	 * cannot block, then we need to use it to avoid live-locks,
+	 * since the implication is that the regular ->set_variable()
+	 * will block.
+	 *
+	 * If no ->set_variable_nonblocking() is provided then
+	 * ->set_variable() is assumed to be non-blocking.
+	 */
+	if (!block && ops->set_variable_nonblocking)
+		return efivar_entry_set_nonblocking(name, vendor, attributes,
+						    size, data);
+
 	if (!block) {
 		if (!spin_trylock_irqsave(&__efivars->lock, flags))
 			return -EBUSY;
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 78b29b133e14..0949f9c7e872 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -503,6 +503,10 @@ typedef efi_status_t efi_get_next_variable_t (unsigned long *name_size, efi_char
 typedef efi_status_t efi_set_variable_t (efi_char16_t *name, efi_guid_t *vendor, 
 					 u32 attr, unsigned long data_size,
 					 void *data);
+typedef efi_status_t
+efi_set_variable_nonblocking_t(efi_char16_t *name, efi_guid_t *vendor,
+			       u32 attr, unsigned long data_size, void *data);
+
 typedef efi_status_t efi_get_next_high_mono_count_t (u32 *count);
 typedef void efi_reset_system_t (int reset_type, efi_status_t status,
 				 unsigned long data_size, efi_char16_t *data);
@@ -822,6 +826,7 @@ extern struct efi {
 	efi_get_variable_t *get_variable;
 	efi_get_next_variable_t *get_next_variable;
 	efi_set_variable_t *set_variable;
+	efi_set_variable_nonblocking_t *set_variable_nonblocking;
 	efi_query_variable_info_t *query_variable_info;
 	efi_update_capsule_t *update_capsule;
 	efi_query_capsule_caps_t *query_capsule_caps;
@@ -1042,6 +1047,7 @@ struct efivar_operations {
 	efi_get_variable_t *get_variable;
 	efi_get_next_variable_t *get_next_variable;
 	efi_set_variable_t *set_variable;
+	efi_set_variable_nonblocking_t *set_variable_nonblocking;
 	efi_query_variable_store_t *query_variable_store;
 };
 
-- 
cgit v1.2.3


From d8f429e1669b9709f5b669aac9d734dbe0640891 Mon Sep 17 00:00:00 2001
From: Junichi Nomura <j-nomura@ce.jp.nec.com>
Date: Fri, 3 Oct 2014 17:27:12 -0400
Subject: block: add bioset_create_nobvec()

Users of bio_clone_fast() do not want bios with their own bvecs.
Allocating a bvec mempool as part of the bioset intended for such users
is a waste of memory.

bioset_create_nobvec() creates a bioset that doesn't have the bvec
mempool.

Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/bio.c         | 61 ++++++++++++++++++++++++++++++++++++++---------------
 include/linux/bio.h |  1 +
 2 files changed, 45 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/block/bio.c b/block/bio.c
index 3e6331d25d90..3e6e1986a5b2 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -428,6 +428,9 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
 		front_pad = 0;
 		inline_vecs = nr_iovecs;
 	} else {
+		/* should not use nobvec bioset for nr_iovecs > 0 */
+		if (WARN_ON_ONCE(!bs->bvec_pool && nr_iovecs > 0))
+			return NULL;
 		/*
 		 * generic_make_request() converts recursion to iteration; this
 		 * means if we're running beneath it, any bios we allocate and
@@ -1900,20 +1903,9 @@ void bioset_free(struct bio_set *bs)
 }
 EXPORT_SYMBOL(bioset_free);
 
-/**
- * bioset_create  - Create a bio_set
- * @pool_size:	Number of bio and bio_vecs to cache in the mempool
- * @front_pad:	Number of bytes to allocate in front of the returned bio
- *
- * Description:
- *    Set up a bio_set to be used with @bio_alloc_bioset. Allows the caller
- *    to ask for a number of bytes to be allocated in front of the bio.
- *    Front pad allocation is useful for embedding the bio inside
- *    another structure, to avoid allocating extra data to go with the bio.
- *    Note that the bio must be embedded at the END of that structure always,
- *    or things will break badly.
- */
-struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
+static struct bio_set *__bioset_create(unsigned int pool_size,
+				       unsigned int front_pad,
+				       bool create_bvec_pool)
 {
 	unsigned int back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec);
 	struct bio_set *bs;
@@ -1938,9 +1930,11 @@ struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
 	if (!bs->bio_pool)
 		goto bad;
 
-	bs->bvec_pool = biovec_create_pool(pool_size);
-	if (!bs->bvec_pool)
-		goto bad;
+	if (create_bvec_pool) {
+		bs->bvec_pool = biovec_create_pool(pool_size);
+		if (!bs->bvec_pool)
+			goto bad;
+	}
 
 	bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0);
 	if (!bs->rescue_workqueue)
@@ -1951,8 +1945,41 @@ bad:
 	bioset_free(bs);
 	return NULL;
 }
+
+/**
+ * bioset_create  - Create a bio_set
+ * @pool_size:	Number of bio and bio_vecs to cache in the mempool
+ * @front_pad:	Number of bytes to allocate in front of the returned bio
+ *
+ * Description:
+ *    Set up a bio_set to be used with @bio_alloc_bioset. Allows the caller
+ *    to ask for a number of bytes to be allocated in front of the bio.
+ *    Front pad allocation is useful for embedding the bio inside
+ *    another structure, to avoid allocating extra data to go with the bio.
+ *    Note that the bio must be embedded at the END of that structure always,
+ *    or things will break badly.
+ */
+struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
+{
+	return __bioset_create(pool_size, front_pad, true);
+}
 EXPORT_SYMBOL(bioset_create);
 
+/**
+ * bioset_create_nobvec  - Create a bio_set without bio_vec mempool
+ * @pool_size:	Number of bio to cache in the mempool
+ * @front_pad:	Number of bytes to allocate in front of the returned bio
+ *
+ * Description:
+ *    Same functionality as bioset_create() except that mempool is not
+ *    created for bio_vecs. Saving some memory for bio_clone_fast() users.
+ */
+struct bio_set *bioset_create_nobvec(unsigned int pool_size, unsigned int front_pad)
+{
+	return __bioset_create(pool_size, front_pad, false);
+}
+EXPORT_SYMBOL(bioset_create_nobvec);
+
 #ifdef CONFIG_BLK_CGROUP
 /**
  * bio_associate_current - associate a bio with %current
diff --git a/include/linux/bio.h b/include/linux/bio.h
index ce6b75964b71..7347f486ceca 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -378,6 +378,7 @@ static inline struct bio *bio_next_split(struct bio *bio, int sectors,
 }
 
 extern struct bio_set *bioset_create(unsigned int, unsigned int);
+extern struct bio_set *bioset_create_nobvec(unsigned int, unsigned int);
 extern void bioset_free(struct bio_set *);
 extern mempool_t *biovec_create_pool(int pool_entries);
 
-- 
cgit v1.2.3


From 906d201530f2c52aeb4eee31895c71cdccf1e9a0 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Wed, 24 Sep 2014 11:17:56 -0700
Subject: dynamic_debug: change __dynamic_<foo>_dbg return types to void

The return value is not used by callers of these functions
so change the functions to return void.

Signed-off-by: Joe Perches <joe@perches.com>
Acked-by: Jason Baron <jbaron@akamai.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/dynamic_debug.h | 12 +++++------
 lib/dynamic_debug.c           | 50 +++++++++++++++++--------------------------
 2 files changed, 26 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h
index 2fe93b26b42f..4f1bbc68cd1b 100644
--- a/include/linux/dynamic_debug.h
+++ b/include/linux/dynamic_debug.h
@@ -42,7 +42,7 @@ int ddebug_add_module(struct _ddebug *tab, unsigned int n,
 #if defined(CONFIG_DYNAMIC_DEBUG)
 extern int ddebug_remove_module(const char *mod_name);
 extern __printf(2, 3)
-int __dynamic_pr_debug(struct _ddebug *descriptor, const char *fmt, ...);
+void __dynamic_pr_debug(struct _ddebug *descriptor, const char *fmt, ...);
 
 extern int ddebug_dyndbg_module_param_cb(char *param, char *val,
 					const char *modname);
@@ -50,15 +50,15 @@ extern int ddebug_dyndbg_module_param_cb(char *param, char *val,
 struct device;
 
 extern __printf(3, 4)
-int __dynamic_dev_dbg(struct _ddebug *descriptor, const struct device *dev,
-		      const char *fmt, ...);
+void __dynamic_dev_dbg(struct _ddebug *descriptor, const struct device *dev,
+		       const char *fmt, ...);
 
 struct net_device;
 
 extern __printf(3, 4)
-int __dynamic_netdev_dbg(struct _ddebug *descriptor,
-			 const struct net_device *dev,
-			 const char *fmt, ...);
+void __dynamic_netdev_dbg(struct _ddebug *descriptor,
+			  const struct net_device *dev,
+			  const char *fmt, ...);
 
 #define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt)		\
 	static struct _ddebug  __aligned(8)			\
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index c9afbe2c445a..31fe79e31ab8 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -537,10 +537,9 @@ static char *dynamic_emit_prefix(const struct _ddebug *desc, char *buf)
 	return buf;
 }
 
-int __dynamic_pr_debug(struct _ddebug *descriptor, const char *fmt, ...)
+void __dynamic_pr_debug(struct _ddebug *descriptor, const char *fmt, ...)
 {
 	va_list args;
-	int res;
 	struct va_format vaf;
 	char buf[PREFIX_SIZE];
 
@@ -552,21 +551,17 @@ int __dynamic_pr_debug(struct _ddebug *descriptor, const char *fmt, ...)
 	vaf.fmt = fmt;
 	vaf.va = &args;
 
-	res = printk(KERN_DEBUG "%s%pV",
-		     dynamic_emit_prefix(descriptor, buf), &vaf);
+	printk(KERN_DEBUG "%s%pV", dynamic_emit_prefix(descriptor, buf), &vaf);
 
 	va_end(args);
-
-	return res;
 }
 EXPORT_SYMBOL(__dynamic_pr_debug);
 
-int __dynamic_dev_dbg(struct _ddebug *descriptor,
+void __dynamic_dev_dbg(struct _ddebug *descriptor,
 		      const struct device *dev, const char *fmt, ...)
 {
 	struct va_format vaf;
 	va_list args;
-	int res;
 
 	BUG_ON(!descriptor);
 	BUG_ON(!fmt);
@@ -577,30 +572,27 @@ int __dynamic_dev_dbg(struct _ddebug *descriptor,
 	vaf.va = &args;
 
 	if (!dev) {
-		res = printk(KERN_DEBUG "(NULL device *): %pV", &vaf);
+		printk(KERN_DEBUG "(NULL device *): %pV", &vaf);
 	} else {
 		char buf[PREFIX_SIZE];
 
-		res = dev_printk_emit(7, dev, "%s%s %s: %pV",
-				      dynamic_emit_prefix(descriptor, buf),
-				      dev_driver_string(dev), dev_name(dev),
-				      &vaf);
+		dev_printk_emit(7, dev, "%s%s %s: %pV",
+				dynamic_emit_prefix(descriptor, buf),
+				dev_driver_string(dev), dev_name(dev),
+				&vaf);
 	}
 
 	va_end(args);
-
-	return res;
 }
 EXPORT_SYMBOL(__dynamic_dev_dbg);
 
 #ifdef CONFIG_NET
 
-int __dynamic_netdev_dbg(struct _ddebug *descriptor,
-			 const struct net_device *dev, const char *fmt, ...)
+void __dynamic_netdev_dbg(struct _ddebug *descriptor,
+			  const struct net_device *dev, const char *fmt, ...)
 {
 	struct va_format vaf;
 	va_list args;
-	int res;
 
 	BUG_ON(!descriptor);
 	BUG_ON(!fmt);
@@ -613,23 +605,21 @@ int __dynamic_netdev_dbg(struct _ddebug *descriptor,
 	if (dev && dev->dev.parent) {
 		char buf[PREFIX_SIZE];
 
-		res = dev_printk_emit(7, dev->dev.parent,
-				      "%s%s %s %s%s: %pV",
-				      dynamic_emit_prefix(descriptor, buf),
-				      dev_driver_string(dev->dev.parent),
-				      dev_name(dev->dev.parent),
-				      netdev_name(dev), netdev_reg_state(dev),
-				      &vaf);
+		dev_printk_emit(7, dev->dev.parent,
+				"%s%s %s %s%s: %pV",
+				dynamic_emit_prefix(descriptor, buf),
+				dev_driver_string(dev->dev.parent),
+				dev_name(dev->dev.parent),
+				netdev_name(dev), netdev_reg_state(dev),
+				&vaf);
 	} else if (dev) {
-		res = printk(KERN_DEBUG "%s%s: %pV", netdev_name(dev),
-			     netdev_reg_state(dev), &vaf);
+		printk(KERN_DEBUG "%s%s: %pV", netdev_name(dev),
+		       netdev_reg_state(dev), &vaf);
 	} else {
-		res = printk(KERN_DEBUG "(NULL net_device): %pV", &vaf);
+		printk(KERN_DEBUG "(NULL net_device): %pV", &vaf);
 	}
 
 	va_end(args);
-
-	return res;
 }
 EXPORT_SYMBOL(__dynamic_netdev_dbg);
 
-- 
cgit v1.2.3


From 55a93b3ea780908b7d1b3a8cf1976223a9268d78 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 3 Oct 2014 15:31:07 -0700
Subject: qdisc: validate skb without holding lock

Validation of skb can be pretty expensive :

GSO segmentation and/or checksum computations.

We can do this without holding qdisc lock, so that other cpus
can queue additional packets.

Trick is that requeued packets were already validated, so we carry
a boolean so that sch_direct_xmit() can validate a fresh skb list,
or directly use an old one.

Tested on 40Gb NIC (8 TX queues) and 200 concurrent flows, 48 threads
host.

Turning TSO on or off had no effect on throughput, only few more cpu
cycles. Lock contention on qdisc lock disappeared.

Same if disabling TX checksum offload.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  2 +-
 include/net/pkt_sched.h   |  2 +-
 net/core/dev.c            | 29 +++++++++++++++++++---
 net/sched/sch_generic.c   | 61 ++++++++++++++++++++++-------------------------
 4 files changed, 56 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9b7fbacb6296..910fb17ad148 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2821,7 +2821,7 @@ int dev_set_mac_address(struct net_device *, struct sockaddr *);
 int dev_change_carrier(struct net_device *, bool new_carrier);
 int dev_get_phys_port_id(struct net_device *dev,
 			 struct netdev_phys_port_id *ppid);
-struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev);
+struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev);
 struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 				    struct netdev_queue *txq, int *ret);
 int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 8bbe626e9ece..e4b3c828c1c2 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -99,7 +99,7 @@ void qdisc_put_stab(struct qdisc_size_table *tab);
 void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc);
 int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
 		    struct net_device *dev, struct netdev_queue *txq,
-		    spinlock_t *root_lock);
+		    spinlock_t *root_lock, bool validate);
 
 void __qdisc_run(struct Qdisc *q);
 
diff --git a/net/core/dev.c b/net/core/dev.c
index e55c546717d4..1a90530f83ff 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2655,7 +2655,7 @@ struct sk_buff *validate_xmit_vlan(struct sk_buff *skb, netdev_features_t featur
 	return skb;
 }
 
-struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev)
+static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev)
 {
 	netdev_features_t features;
 
@@ -2720,6 +2720,30 @@ out_null:
 	return NULL;
 }
 
+struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev)
+{
+	struct sk_buff *next, *head = NULL, *tail;
+
+	while (skb) {
+		next = skb->next;
+		skb->next = NULL;
+		skb = validate_xmit_skb(skb, dev);
+		if (skb) {
+			struct sk_buff *end = skb;
+
+			while (end->next)
+				end = end->next;
+			if (!head)
+				head = skb;
+			else
+				tail->next = skb;
+			tail = end;
+		}
+		skb = next;
+	}
+	return head;
+}
+
 static void qdisc_pkt_len_init(struct sk_buff *skb)
 {
 	const struct skb_shared_info *shinfo = skb_shinfo(skb);
@@ -2786,8 +2810,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 
 		qdisc_bstats_update(q, skb);
 
-		skb = validate_xmit_skb(skb, dev);
-		if (skb && sch_direct_xmit(skb, q, dev, txq, root_lock)) {
+		if (sch_direct_xmit(skb, q, dev, txq, root_lock, true)) {
 			if (unlikely(contended)) {
 				spin_unlock(&q->busylock);
 				contended = false;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 797ebef73642..2b349a4de3c8 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -56,40 +56,34 @@ static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
 	return 0;
 }
 
-static struct sk_buff *try_bulk_dequeue_skb(struct Qdisc *q,
-					    struct sk_buff *head_skb,
-					    int bytelimit)
+static void try_bulk_dequeue_skb(struct Qdisc *q,
+				 struct sk_buff *skb,
+				 const struct netdev_queue *txq)
 {
-	struct sk_buff *skb, *tail_skb = head_skb;
+	int bytelimit = qdisc_avail_bulklimit(txq) - skb->len;
 
 	while (bytelimit > 0) {
-		skb = q->dequeue(q);
-		if (!skb)
-			break;
+		struct sk_buff *nskb = q->dequeue(q);
 
-		bytelimit -= skb->len; /* covers GSO len */
-		skb = validate_xmit_skb(skb, qdisc_dev(q));
-		if (!skb)
+		if (!nskb)
 			break;
 
-		while (tail_skb->next) /* GSO list goto tail */
-			tail_skb = tail_skb->next;
-
-		tail_skb->next = skb;
-		tail_skb = skb;
+		bytelimit -= nskb->len; /* covers GSO len */
+		skb->next = nskb;
+		skb = nskb;
 	}
-
-	return head_skb;
+	skb->next = NULL;
 }
 
 /* Note that dequeue_skb can possibly return a SKB list (via skb->next).
  * A requeued skb (via q->gso_skb) can also be a SKB list.
  */
-static inline struct sk_buff *dequeue_skb(struct Qdisc *q)
+static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate)
 {
 	struct sk_buff *skb = q->gso_skb;
 	const struct netdev_queue *txq = q->dev_queue;
 
+	*validate = true;
 	if (unlikely(skb)) {
 		/* check the reason of requeuing without tx lock first */
 		txq = skb_get_tx_queue(txq->dev, skb);
@@ -98,21 +92,16 @@ static inline struct sk_buff *dequeue_skb(struct Qdisc *q)
 			q->q.qlen--;
 		} else
 			skb = NULL;
+		/* skb in gso_skb were already validated */
+		*validate = false;
 	} else {
 		if (!(q->flags & TCQ_F_ONETXQUEUE) ||
 		    !netif_xmit_frozen_or_stopped(txq)) {
-			int bytelimit = qdisc_avail_bulklimit(txq);
-
 			skb = q->dequeue(q);
-			if (skb) {
-				bytelimit -= skb->len;
-				skb = validate_xmit_skb(skb, qdisc_dev(q));
-			}
 			if (skb && qdisc_may_bulk(q))
-				skb = try_bulk_dequeue_skb(q, skb, bytelimit);
+				try_bulk_dequeue_skb(q, skb, txq);
 		}
 	}
-
 	return skb;
 }
 
@@ -156,19 +145,24 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb,
  */
 int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
 		    struct net_device *dev, struct netdev_queue *txq,
-		    spinlock_t *root_lock)
+		    spinlock_t *root_lock, bool validate)
 {
 	int ret = NETDEV_TX_BUSY;
 
 	/* And release qdisc */
 	spin_unlock(root_lock);
 
-	HARD_TX_LOCK(dev, txq, smp_processor_id());
-	if (!netif_xmit_frozen_or_stopped(txq))
-		skb = dev_hard_start_xmit(skb, dev, txq, &ret);
+	/* Note that we validate skb (GSO, checksum, ...) outside of locks */
+	if (validate)
+		skb = validate_xmit_skb_list(skb, dev);
 
-	HARD_TX_UNLOCK(dev, txq);
+	if (skb) {
+		HARD_TX_LOCK(dev, txq, smp_processor_id());
+		if (!netif_xmit_frozen_or_stopped(txq))
+			skb = dev_hard_start_xmit(skb, dev, txq, &ret);
 
+		HARD_TX_UNLOCK(dev, txq);
+	}
 	spin_lock(root_lock);
 
 	if (dev_xmit_complete(ret)) {
@@ -217,9 +211,10 @@ static inline int qdisc_restart(struct Qdisc *q)
 	struct net_device *dev;
 	spinlock_t *root_lock;
 	struct sk_buff *skb;
+	bool validate;
 
 	/* Dequeue packet */
-	skb = dequeue_skb(q);
+	skb = dequeue_skb(q, &validate);
 	if (unlikely(!skb))
 		return 0;
 
@@ -229,7 +224,7 @@ static inline int qdisc_restart(struct Qdisc *q)
 	dev = qdisc_dev(q);
 	txq = skb_get_tx_queue(dev, skb);
 
-	return sch_direct_xmit(skb, q, dev, txq, root_lock);
+	return sch_direct_xmit(skb, q, dev, txq, root_lock, validate);
 }
 
 void __qdisc_run(struct Qdisc *q)
-- 
cgit v1.2.3


From c7a08ac7ee68b9af0d5af99c7b34b574cac4d144 Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.com>
Date: Thu, 2 Oct 2014 12:19:42 +0300
Subject: net/mlx5_core: Update device capabilities handling

Rearrange struct mlx5_caps so it has a "gen" field to represent the current
capabilities configured for the device. Max capabilities can also be queried
from the device. Also update capabilities struct to contain more fields as per
the latest revision if firmware specification.

Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/hw/mlx5/cq.c                |   8 +-
 drivers/infiniband/hw/mlx5/mad.c               |   2 +-
 drivers/infiniband/hw/mlx5/main.c              |  83 ++++++----
 drivers/infiniband/hw/mlx5/qp.c                |  72 +++++---
 drivers/infiniband/hw/mlx5/srq.c               |   6 +-
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c  |  24 ++-
 drivers/net/ethernet/mellanox/mlx5/core/eq.c   |   2 +-
 drivers/net/ethernet/mellanox/mlx5/core/fw.c   |  81 +--------
 drivers/net/ethernet/mellanox/mlx5/core/main.c | 219 +++++++++++++++++++------
 drivers/net/ethernet/mellanox/mlx5/core/uar.c  |   4 +-
 include/linux/mlx5/device.h                    |  24 ++-
 include/linux/mlx5/driver.h                    |  28 +++-
 12 files changed, 331 insertions(+), 222 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index e4056279166d..10cfce5119a9 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -752,7 +752,7 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int entries,
 		return ERR_PTR(-EINVAL);
 
 	entries = roundup_pow_of_two(entries + 1);
-	if (entries > dev->mdev->caps.max_cqes)
+	if (entries > dev->mdev->caps.gen.max_cqes)
 		return ERR_PTR(-EINVAL);
 
 	cq = kzalloc(sizeof(*cq), GFP_KERNEL);
@@ -919,7 +919,7 @@ int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
 	int err;
 	u32 fsel;
 
-	if (!(dev->mdev->caps.flags & MLX5_DEV_CAP_FLAG_CQ_MODER))
+	if (!(dev->mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_CQ_MODER))
 		return -ENOSYS;
 
 	in = kzalloc(sizeof(*in), GFP_KERNEL);
@@ -1074,7 +1074,7 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
 	int uninitialized_var(cqe_size);
 	unsigned long flags;
 
-	if (!(dev->mdev->caps.flags & MLX5_DEV_CAP_FLAG_RESIZE_CQ)) {
+	if (!(dev->mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_RESIZE_CQ)) {
 		pr_info("Firmware does not support resize CQ\n");
 		return -ENOSYS;
 	}
@@ -1083,7 +1083,7 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
 		return -EINVAL;
 
 	entries = roundup_pow_of_two(entries + 1);
-	if (entries > dev->mdev->caps.max_cqes + 1)
+	if (entries > dev->mdev->caps.gen.max_cqes + 1)
 		return -EINVAL;
 
 	if (entries == ibcq->cqe + 1)
diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c
index b514bbb5610f..657af9a1167c 100644
--- a/drivers/infiniband/hw/mlx5/mad.c
+++ b/drivers/infiniband/hw/mlx5/mad.c
@@ -129,7 +129,7 @@ int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port)
 
 	packet_error = be16_to_cpu(out_mad->status);
 
-	dev->mdev->caps.ext_port_cap[port - 1] = (!err && !packet_error) ?
+	dev->mdev->caps.gen.ext_port_cap[port - 1] = (!err && !packet_error) ?
 		MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO : 0;
 
 out:
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index d8907b20522a..f3114d1132fb 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -157,11 +157,13 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
 	struct mlx5_ib_dev *dev = to_mdev(ibdev);
 	struct ib_smp *in_mad  = NULL;
 	struct ib_smp *out_mad = NULL;
+	struct mlx5_general_caps *gen;
 	int err = -ENOMEM;
 	int max_rq_sg;
 	int max_sq_sg;
 	u64 flags;
 
+	gen = &dev->mdev->caps.gen;
 	in_mad  = kzalloc(sizeof(*in_mad), GFP_KERNEL);
 	out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
 	if (!in_mad || !out_mad)
@@ -183,7 +185,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
 		IB_DEVICE_PORT_ACTIVE_EVENT		|
 		IB_DEVICE_SYS_IMAGE_GUID		|
 		IB_DEVICE_RC_RNR_NAK_GEN;
-	flags = dev->mdev->caps.flags;
+	flags = gen->flags;
 	if (flags & MLX5_DEV_CAP_FLAG_BAD_PKEY_CNTR)
 		props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
 	if (flags & MLX5_DEV_CAP_FLAG_BAD_QKEY_CNTR)
@@ -213,30 +215,31 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
 	memcpy(&props->sys_image_guid, out_mad->data +	4, 8);
 
 	props->max_mr_size	   = ~0ull;
-	props->page_size_cap	   = dev->mdev->caps.min_page_sz;
-	props->max_qp		   = 1 << dev->mdev->caps.log_max_qp;
-	props->max_qp_wr	   = dev->mdev->caps.max_wqes;
-	max_rq_sg = dev->mdev->caps.max_rq_desc_sz / sizeof(struct mlx5_wqe_data_seg);
-	max_sq_sg = (dev->mdev->caps.max_sq_desc_sz - sizeof(struct mlx5_wqe_ctrl_seg)) /
+	props->page_size_cap	   = gen->min_page_sz;
+	props->max_qp		   = 1 << gen->log_max_qp;
+	props->max_qp_wr	   = gen->max_wqes;
+	max_rq_sg = gen->max_rq_desc_sz / sizeof(struct mlx5_wqe_data_seg);
+	max_sq_sg = (gen->max_sq_desc_sz - sizeof(struct mlx5_wqe_ctrl_seg)) /
 		sizeof(struct mlx5_wqe_data_seg);
 	props->max_sge = min(max_rq_sg, max_sq_sg);
-	props->max_cq		   = 1 << dev->mdev->caps.log_max_cq;
-	props->max_cqe		   = dev->mdev->caps.max_cqes - 1;
-	props->max_mr		   = 1 << dev->mdev->caps.log_max_mkey;
-	props->max_pd		   = 1 << dev->mdev->caps.log_max_pd;
-	props->max_qp_rd_atom	   = dev->mdev->caps.max_ra_req_qp;
-	props->max_qp_init_rd_atom = dev->mdev->caps.max_ra_res_qp;
+	props->max_cq		   = 1 << gen->log_max_cq;
+	props->max_cqe		   = gen->max_cqes - 1;
+	props->max_mr		   = 1 << gen->log_max_mkey;
+	props->max_pd		   = 1 << gen->log_max_pd;
+	props->max_qp_rd_atom	   = 1 << gen->log_max_ra_req_qp;
+	props->max_qp_init_rd_atom = 1 << gen->log_max_ra_res_qp;
+	props->max_srq		   = 1 << gen->log_max_srq;
+	props->max_srq_wr	   = gen->max_srq_wqes - 1;
+	props->local_ca_ack_delay  = gen->local_ca_ack_delay;
 	props->max_res_rd_atom	   = props->max_qp_rd_atom * props->max_qp;
-	props->max_srq		   = 1 << dev->mdev->caps.log_max_srq;
-	props->max_srq_wr	   = dev->mdev->caps.max_srq_wqes - 1;
 	props->max_srq_sge	   = max_rq_sg - 1;
 	props->max_fast_reg_page_list_len = (unsigned int)-1;
-	props->local_ca_ack_delay  = dev->mdev->caps.local_ca_ack_delay;
+	props->local_ca_ack_delay  = gen->local_ca_ack_delay;
 	props->atomic_cap	   = IB_ATOMIC_NONE;
 	props->masked_atomic_cap   = IB_ATOMIC_NONE;
 	props->max_pkeys	   = be16_to_cpup((__be16 *)(out_mad->data + 28));
-	props->max_mcast_grp	   = 1 << dev->mdev->caps.log_max_mcg;
-	props->max_mcast_qp_attach = dev->mdev->caps.max_qp_mcg;
+	props->max_mcast_grp	   = 1 << gen->log_max_mcg;
+	props->max_mcast_qp_attach = gen->max_qp_mcg;
 	props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
 					   props->max_mcast_grp;
 	props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */
@@ -254,10 +257,12 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
 	struct mlx5_ib_dev *dev = to_mdev(ibdev);
 	struct ib_smp *in_mad  = NULL;
 	struct ib_smp *out_mad = NULL;
+	struct mlx5_general_caps *gen;
 	int ext_active_speed;
 	int err = -ENOMEM;
 
-	if (port < 1 || port > dev->mdev->caps.num_ports) {
+	gen = &dev->mdev->caps.gen;
+	if (port < 1 || port > gen->num_ports) {
 		mlx5_ib_warn(dev, "invalid port number %d\n", port);
 		return -EINVAL;
 	}
@@ -288,8 +293,8 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
 	props->phys_state	= out_mad->data[33] >> 4;
 	props->port_cap_flags	= be32_to_cpup((__be32 *)(out_mad->data + 20));
 	props->gid_tbl_len	= out_mad->data[50];
-	props->max_msg_sz	= 1 << to_mdev(ibdev)->mdev->caps.log_max_msg;
-	props->pkey_tbl_len	= to_mdev(ibdev)->mdev->caps.port[port - 1].pkey_table_len;
+	props->max_msg_sz	= 1 << gen->log_max_msg;
+	props->pkey_tbl_len	= gen->port[port - 1].pkey_table_len;
 	props->bad_pkey_cntr	= be16_to_cpup((__be16 *)(out_mad->data + 46));
 	props->qkey_viol_cntr	= be16_to_cpup((__be16 *)(out_mad->data + 48));
 	props->active_width	= out_mad->data[31] & 0xf;
@@ -316,7 +321,7 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
 
 	/* If reported active speed is QDR, check if is FDR-10 */
 	if (props->active_speed == 4) {
-		if (dev->mdev->caps.ext_port_cap[port - 1] &
+		if (gen->ext_port_cap[port - 1] &
 		    MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO) {
 			init_query_mad(in_mad);
 			in_mad->attr_id = MLX5_ATTR_EXTENDED_PORT_INFO;
@@ -470,6 +475,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
 	struct mlx5_ib_alloc_ucontext_req_v2 req;
 	struct mlx5_ib_alloc_ucontext_resp resp;
 	struct mlx5_ib_ucontext *context;
+	struct mlx5_general_caps *gen;
 	struct mlx5_uuar_info *uuari;
 	struct mlx5_uar *uars;
 	int gross_uuars;
@@ -480,6 +486,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
 	int i;
 	size_t reqlen;
 
+	gen = &dev->mdev->caps.gen;
 	if (!dev->ib_active)
 		return ERR_PTR(-EAGAIN);
 
@@ -512,14 +519,14 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
 
 	num_uars = req.total_num_uuars / MLX5_NON_FP_BF_REGS_PER_PAGE;
 	gross_uuars = num_uars * MLX5_BF_REGS_PER_PAGE;
-	resp.qp_tab_size      = 1 << dev->mdev->caps.log_max_qp;
-	resp.bf_reg_size      = dev->mdev->caps.bf_reg_size;
+	resp.qp_tab_size      = 1 << gen->log_max_qp;
+	resp.bf_reg_size      = gen->bf_reg_size;
 	resp.cache_line_size  = L1_CACHE_BYTES;
-	resp.max_sq_desc_sz = dev->mdev->caps.max_sq_desc_sz;
-	resp.max_rq_desc_sz = dev->mdev->caps.max_rq_desc_sz;
-	resp.max_send_wqebb = dev->mdev->caps.max_wqes;
-	resp.max_recv_wr = dev->mdev->caps.max_wqes;
-	resp.max_srq_recv_wr = dev->mdev->caps.max_srq_wqes;
+	resp.max_sq_desc_sz = gen->max_sq_desc_sz;
+	resp.max_rq_desc_sz = gen->max_rq_desc_sz;
+	resp.max_send_wqebb = gen->max_wqes;
+	resp.max_recv_wr = gen->max_wqes;
+	resp.max_srq_recv_wr = gen->max_srq_wqes;
 
 	context = kzalloc(sizeof(*context), GFP_KERNEL);
 	if (!context)
@@ -565,7 +572,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
 	mutex_init(&context->db_page_mutex);
 
 	resp.tot_uuars = req.total_num_uuars;
-	resp.num_ports = dev->mdev->caps.num_ports;
+	resp.num_ports = gen->num_ports;
 	err = ib_copy_to_udata(udata, &resp,
 			       sizeof(resp) - sizeof(resp.reserved));
 	if (err)
@@ -967,9 +974,11 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
 
 static void get_ext_port_caps(struct mlx5_ib_dev *dev)
 {
+	struct mlx5_general_caps *gen;
 	int port;
 
-	for (port = 1; port <= dev->mdev->caps.num_ports; port++)
+	gen = &dev->mdev->caps.gen;
+	for (port = 1; port <= gen->num_ports; port++)
 		mlx5_query_ext_port_caps(dev, port);
 }
 
@@ -977,9 +986,11 @@ static int get_port_caps(struct mlx5_ib_dev *dev)
 {
 	struct ib_device_attr *dprops = NULL;
 	struct ib_port_attr *pprops = NULL;
+	struct mlx5_general_caps *gen;
 	int err = 0;
 	int port;
 
+	gen = &dev->mdev->caps.gen;
 	pprops = kmalloc(sizeof(*pprops), GFP_KERNEL);
 	if (!pprops)
 		goto out;
@@ -994,14 +1005,14 @@ static int get_port_caps(struct mlx5_ib_dev *dev)
 		goto out;
 	}
 
-	for (port = 1; port <= dev->mdev->caps.num_ports; port++) {
+	for (port = 1; port <= gen->num_ports; port++) {
 		err = mlx5_ib_query_port(&dev->ib_dev, port, pprops);
 		if (err) {
 			mlx5_ib_warn(dev, "query_port %d failed %d\n", port, err);
 			break;
 		}
-		dev->mdev->caps.port[port - 1].pkey_table_len = dprops->max_pkeys;
-		dev->mdev->caps.port[port - 1].gid_table_len = pprops->gid_tbl_len;
+		gen->port[port - 1].pkey_table_len = dprops->max_pkeys;
+		gen->port[port - 1].gid_table_len = pprops->gid_tbl_len;
 		mlx5_ib_dbg(dev, "pkey_table_len %d, gid_table_len %d\n",
 			    dprops->max_pkeys, pprops->gid_tbl_len);
 	}
@@ -1279,8 +1290,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
 	strlcpy(dev->ib_dev.name, "mlx5_%d", IB_DEVICE_NAME_MAX);
 	dev->ib_dev.owner		= THIS_MODULE;
 	dev->ib_dev.node_type		= RDMA_NODE_IB_CA;
-	dev->ib_dev.local_dma_lkey	= mdev->caps.reserved_lkey;
-	dev->num_ports		= mdev->caps.num_ports;
+	dev->ib_dev.local_dma_lkey	= mdev->caps.gen.reserved_lkey;
+	dev->num_ports		= mdev->caps.gen.num_ports;
 	dev->ib_dev.phys_port_cnt     = dev->num_ports;
 	dev->ib_dev.num_comp_vectors	= dev->num_comp_vectors;
 	dev->ib_dev.dma_device	= &mdev->pdev->dev;
@@ -1355,7 +1366,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
 	dev->ib_dev.free_fast_reg_page_list  = mlx5_ib_free_fast_reg_page_list;
 	dev->ib_dev.check_mr_status	= mlx5_ib_check_mr_status;
 
-	if (mdev->caps.flags & MLX5_DEV_CAP_FLAG_XRC) {
+	if (mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_XRC) {
 		dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd;
 		dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd;
 		dev->ib_dev.uverbs_cmd_mask |=
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 8c574b63d77b..dbfe498870c1 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -158,11 +158,13 @@ static void mlx5_ib_qp_event(struct mlx5_core_qp *qp, int type)
 static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap,
 		       int has_rq, struct mlx5_ib_qp *qp, struct mlx5_ib_create_qp *ucmd)
 {
+	struct mlx5_general_caps *gen;
 	int wqe_size;
 	int wq_size;
 
+	gen = &dev->mdev->caps.gen;
 	/* Sanity check RQ size before proceeding */
-	if (cap->max_recv_wr  > dev->mdev->caps.max_wqes)
+	if (cap->max_recv_wr  > gen->max_wqes)
 		return -EINVAL;
 
 	if (!has_rq) {
@@ -182,10 +184,10 @@ static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap,
 			wq_size = roundup_pow_of_two(cap->max_recv_wr) * wqe_size;
 			wq_size = max_t(int, wq_size, MLX5_SEND_WQE_BB);
 			qp->rq.wqe_cnt = wq_size / wqe_size;
-			if (wqe_size > dev->mdev->caps.max_rq_desc_sz) {
+			if (wqe_size > gen->max_rq_desc_sz) {
 				mlx5_ib_dbg(dev, "wqe_size %d, max %d\n",
 					    wqe_size,
-					    dev->mdev->caps.max_rq_desc_sz);
+					    gen->max_rq_desc_sz);
 				return -EINVAL;
 			}
 			qp->rq.wqe_shift = ilog2(wqe_size);
@@ -266,9 +268,11 @@ static int calc_send_wqe(struct ib_qp_init_attr *attr)
 static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
 			struct mlx5_ib_qp *qp)
 {
+	struct mlx5_general_caps *gen;
 	int wqe_size;
 	int wq_size;
 
+	gen = &dev->mdev->caps.gen;
 	if (!attr->cap.max_send_wr)
 		return 0;
 
@@ -277,9 +281,9 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
 	if (wqe_size < 0)
 		return wqe_size;
 
-	if (wqe_size > dev->mdev->caps.max_sq_desc_sz) {
+	if (wqe_size > gen->max_sq_desc_sz) {
 		mlx5_ib_dbg(dev, "wqe_size(%d) > max_sq_desc_sz(%d)\n",
-			    wqe_size, dev->mdev->caps.max_sq_desc_sz);
+			    wqe_size, gen->max_sq_desc_sz);
 		return -EINVAL;
 	}
 
@@ -292,9 +296,9 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
 
 	wq_size = roundup_pow_of_two(attr->cap.max_send_wr * wqe_size);
 	qp->sq.wqe_cnt = wq_size / MLX5_SEND_WQE_BB;
-	if (qp->sq.wqe_cnt > dev->mdev->caps.max_wqes) {
+	if (qp->sq.wqe_cnt > gen->max_wqes) {
 		mlx5_ib_dbg(dev, "wqe count(%d) exceeds limits(%d)\n",
-			    qp->sq.wqe_cnt, dev->mdev->caps.max_wqes);
+			    qp->sq.wqe_cnt, gen->max_wqes);
 		return -ENOMEM;
 	}
 	qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB);
@@ -309,11 +313,13 @@ static int set_user_buf_size(struct mlx5_ib_dev *dev,
 			    struct mlx5_ib_qp *qp,
 			    struct mlx5_ib_create_qp *ucmd)
 {
+	struct mlx5_general_caps *gen;
 	int desc_sz = 1 << qp->sq.wqe_shift;
 
-	if (desc_sz > dev->mdev->caps.max_sq_desc_sz) {
+	gen = &dev->mdev->caps.gen;
+	if (desc_sz > gen->max_sq_desc_sz) {
 		mlx5_ib_warn(dev, "desc_sz %d, max_sq_desc_sz %d\n",
-			     desc_sz, dev->mdev->caps.max_sq_desc_sz);
+			     desc_sz, gen->max_sq_desc_sz);
 		return -EINVAL;
 	}
 
@@ -325,9 +331,9 @@ static int set_user_buf_size(struct mlx5_ib_dev *dev,
 
 	qp->sq.wqe_cnt = ucmd->sq_wqe_count;
 
-	if (qp->sq.wqe_cnt > dev->mdev->caps.max_wqes) {
+	if (qp->sq.wqe_cnt > gen->max_wqes) {
 		mlx5_ib_warn(dev, "wqe_cnt %d, max_wqes %d\n",
-			     qp->sq.wqe_cnt, dev->mdev->caps.max_wqes);
+			     qp->sq.wqe_cnt, gen->max_wqes);
 		return -EINVAL;
 	}
 
@@ -803,16 +809,18 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 	struct mlx5_ib_resources *devr = &dev->devr;
 	struct mlx5_ib_create_qp_resp resp;
 	struct mlx5_create_qp_mbox_in *in;
+	struct mlx5_general_caps *gen;
 	struct mlx5_ib_create_qp ucmd;
 	int inlen = sizeof(*in);
 	int err;
 
+	gen = &dev->mdev->caps.gen;
 	mutex_init(&qp->mutex);
 	spin_lock_init(&qp->sq.lock);
 	spin_lock_init(&qp->rq.lock);
 
 	if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) {
-		if (!(dev->mdev->caps.flags & MLX5_DEV_CAP_FLAG_BLOCK_MCAST)) {
+		if (!(gen->flags & MLX5_DEV_CAP_FLAG_BLOCK_MCAST)) {
 			mlx5_ib_dbg(dev, "block multicast loopback isn't supported\n");
 			return -EINVAL;
 		} else {
@@ -851,9 +859,9 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 				mlx5_ib_dbg(dev, "invalid rq params\n");
 				return -EINVAL;
 			}
-			if (ucmd.sq_wqe_count > dev->mdev->caps.max_wqes) {
+			if (ucmd.sq_wqe_count > gen->max_wqes) {
 				mlx5_ib_dbg(dev, "requested sq_wqe_count (%d) > max allowed (%d)\n",
-					    ucmd.sq_wqe_count, dev->mdev->caps.max_wqes);
+					    ucmd.sq_wqe_count, gen->max_wqes);
 				return -EINVAL;
 			}
 			err = create_user_qp(dev, pd, qp, udata, &in, &resp, &inlen);
@@ -1144,6 +1152,7 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
 				struct ib_qp_init_attr *init_attr,
 				struct ib_udata *udata)
 {
+	struct mlx5_general_caps *gen;
 	struct mlx5_ib_dev *dev;
 	struct mlx5_ib_qp *qp;
 	u16 xrcdn = 0;
@@ -1161,11 +1170,12 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
 		}
 		dev = to_mdev(to_mxrcd(init_attr->xrcd)->ibxrcd.device);
 	}
+	gen = &dev->mdev->caps.gen;
 
 	switch (init_attr->qp_type) {
 	case IB_QPT_XRC_TGT:
 	case IB_QPT_XRC_INI:
-		if (!(dev->mdev->caps.flags & MLX5_DEV_CAP_FLAG_XRC)) {
+		if (!(gen->flags & MLX5_DEV_CAP_FLAG_XRC)) {
 			mlx5_ib_dbg(dev, "XRC not supported\n");
 			return ERR_PTR(-ENOSYS);
 		}
@@ -1272,6 +1282,9 @@ enum {
 
 static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate)
 {
+	struct mlx5_general_caps *gen;
+
+	gen = &dev->mdev->caps.gen;
 	if (rate == IB_RATE_PORT_CURRENT) {
 		return 0;
 	} else if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_300_GBPS) {
@@ -1279,7 +1292,7 @@ static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate)
 	} else {
 		while (rate != IB_RATE_2_5_GBPS &&
 		       !(1 << (rate + MLX5_STAT_RATE_OFFSET) &
-			 dev->mdev->caps.stat_rate_support))
+			 gen->stat_rate_support))
 			--rate;
 	}
 
@@ -1290,8 +1303,10 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah,
 			 struct mlx5_qp_path *path, u8 port, int attr_mask,
 			 u32 path_flags, const struct ib_qp_attr *attr)
 {
+	struct mlx5_general_caps *gen;
 	int err;
 
+	gen = &dev->mdev->caps.gen;
 	path->fl = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0;
 	path->free_ar = (path_flags & MLX5_PATH_FLAG_FREE_AR) ? 0x80 : 0;
 
@@ -1318,9 +1333,9 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah,
 	path->port = port;
 
 	if (ah->ah_flags & IB_AH_GRH) {
-		if (ah->grh.sgid_index >= dev->mdev->caps.port[port - 1].gid_table_len) {
+		if (ah->grh.sgid_index >= gen->port[port - 1].gid_table_len) {
 			pr_err(KERN_ERR "sgid_index (%u) too large. max is %d\n",
-			       ah->grh.sgid_index, dev->mdev->caps.port[port - 1].gid_table_len);
+			       ah->grh.sgid_index, gen->port[port - 1].gid_table_len);
 			return -EINVAL;
 		}
 
@@ -1492,6 +1507,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
 	struct mlx5_ib_qp *qp = to_mqp(ibqp);
 	struct mlx5_ib_cq *send_cq, *recv_cq;
 	struct mlx5_qp_context *context;
+	struct mlx5_general_caps *gen;
 	struct mlx5_modify_qp_mbox_in *in;
 	struct mlx5_ib_pd *pd;
 	enum mlx5_qp_state mlx5_cur, mlx5_new;
@@ -1500,6 +1516,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
 	int mlx5_st;
 	int err;
 
+	gen = &dev->mdev->caps.gen;
 	in = kzalloc(sizeof(*in), GFP_KERNEL);
 	if (!in)
 		return -ENOMEM;
@@ -1539,7 +1556,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
 			err = -EINVAL;
 			goto out;
 		}
-		context->mtu_msgmax = (attr->path_mtu << 5) | dev->mdev->caps.log_max_msg;
+		context->mtu_msgmax = (attr->path_mtu << 5) | gen->log_max_msg;
 	}
 
 	if (attr_mask & IB_QP_DEST_QPN)
@@ -1685,9 +1702,11 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 	struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
 	struct mlx5_ib_qp *qp = to_mqp(ibqp);
 	enum ib_qp_state cur_state, new_state;
+	struct mlx5_general_caps *gen;
 	int err = -EINVAL;
 	int port;
 
+	gen = &dev->mdev->caps.gen;
 	mutex_lock(&qp->mutex);
 
 	cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
@@ -1699,21 +1718,21 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 		goto out;
 
 	if ((attr_mask & IB_QP_PORT) &&
-	    (attr->port_num == 0 || attr->port_num > dev->mdev->caps.num_ports))
+	    (attr->port_num == 0 || attr->port_num > gen->num_ports))
 		goto out;
 
 	if (attr_mask & IB_QP_PKEY_INDEX) {
 		port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
-		if (attr->pkey_index >= dev->mdev->caps.port[port - 1].pkey_table_len)
+		if (attr->pkey_index >= gen->port[port - 1].pkey_table_len)
 			goto out;
 	}
 
 	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
-	    attr->max_rd_atomic > dev->mdev->caps.max_ra_res_qp)
+	    attr->max_rd_atomic > (1 << gen->log_max_ra_res_qp))
 		goto out;
 
 	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
-	    attr->max_dest_rd_atomic > dev->mdev->caps.max_ra_req_qp)
+	    attr->max_dest_rd_atomic > (1 << gen->log_max_ra_req_qp))
 		goto out;
 
 	if (cur_state == new_state && cur_state == IB_QPS_RESET) {
@@ -2893,7 +2912,8 @@ static void to_ib_ah_attr(struct mlx5_ib_dev *ibdev, struct ib_ah_attr *ib_ah_at
 	memset(ib_ah_attr, 0, sizeof(*ib_ah_attr));
 	ib_ah_attr->port_num	  = path->port;
 
-	if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->caps.num_ports)
+	if (ib_ah_attr->port_num == 0 ||
+	    ib_ah_attr->port_num > dev->caps.gen.num_ports)
 		return;
 
 	ib_ah_attr->sl = path->sl & 0xf;
@@ -3011,10 +3031,12 @@ struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev,
 					  struct ib_udata *udata)
 {
 	struct mlx5_ib_dev *dev = to_mdev(ibdev);
+	struct mlx5_general_caps *gen;
 	struct mlx5_ib_xrcd *xrcd;
 	int err;
 
-	if (!(dev->mdev->caps.flags & MLX5_DEV_CAP_FLAG_XRC))
+	gen = &dev->mdev->caps.gen;
+	if (!(gen->flags & MLX5_DEV_CAP_FLAG_XRC))
 		return ERR_PTR(-ENOSYS);
 
 	xrcd = kmalloc(sizeof(*xrcd), GFP_KERNEL);
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index 70bd131ba646..97cc1baaa8e3 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -238,6 +238,7 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
 				  struct ib_udata *udata)
 {
 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
+	struct mlx5_general_caps *gen;
 	struct mlx5_ib_srq *srq;
 	int desc_size;
 	int buf_size;
@@ -247,11 +248,12 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
 	int is_xrc;
 	u32 flgs, xrcdn;
 
+	gen = &dev->mdev->caps.gen;
 	/* Sanity check SRQ size before proceeding */
-	if (init_attr->attr.max_wr >= dev->mdev->caps.max_srq_wqes) {
+	if (init_attr->attr.max_wr >= gen->max_srq_wqes) {
 		mlx5_ib_dbg(dev, "max_wr %d, cap %d\n",
 			    init_attr->attr.max_wr,
-			    dev->mdev->caps.max_srq_wqes);
+			    gen->max_srq_wqes);
 		return ERR_PTR(-EINVAL);
 	}
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 65a7da69e2ac..6eb0f85cf872 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -1538,16 +1538,9 @@ static const char *cmd_status_str(u8 status)
 	}
 }
 
-int mlx5_cmd_status_to_err(struct mlx5_outbox_hdr *hdr)
+static int cmd_status_to_err(u8 status)
 {
-	if (!hdr->status)
-		return 0;
-
-	pr_warn("command failed, status %s(0x%x), syndrome 0x%x\n",
-		cmd_status_str(hdr->status), hdr->status,
-		be32_to_cpu(hdr->syndrome));
-
-	switch (hdr->status) {
+	switch (status) {
 	case MLX5_CMD_STAT_OK:				return 0;
 	case MLX5_CMD_STAT_INT_ERR:			return -EIO;
 	case MLX5_CMD_STAT_BAD_OP_ERR:			return -EINVAL;
@@ -1567,3 +1560,16 @@ int mlx5_cmd_status_to_err(struct mlx5_outbox_hdr *hdr)
 	default:					return -EIO;
 	}
 }
+
+/* this will be available till all the commands use set/get macros */
+int mlx5_cmd_status_to_err(struct mlx5_outbox_hdr *hdr)
+{
+	if (!hdr->status)
+		return 0;
+
+	pr_warn("command failed, status %s(0x%x), syndrome 0x%x\n",
+		cmd_status_str(hdr->status), hdr->status,
+		be32_to_cpu(hdr->syndrome));
+
+	return cmd_status_to_err(hdr->status);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 4e8bd0b34bb0..11b9b840ad4d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -468,7 +468,7 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev)
 
 	err = mlx5_create_map_eq(dev, &table->pages_eq,
 				 MLX5_EQ_VEC_PAGES,
-				 dev->caps.max_vf + 1,
+				 dev->caps.gen.max_vf + 1,
 				 1 << MLX5_EVENT_TYPE_PAGE_REQUEST, "mlx5_pages_eq",
 				 &dev->priv.uuari.uars[0]);
 	if (err) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
index f012658b6a92..087c4c797deb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -64,86 +64,9 @@ out_out:
 	return err;
 }
 
-int mlx5_cmd_query_hca_cap(struct mlx5_core_dev *dev,
-			   struct mlx5_caps *caps)
+int mlx5_cmd_query_hca_cap(struct mlx5_core_dev *dev, struct mlx5_caps *caps)
 {
-	struct mlx5_cmd_query_hca_cap_mbox_out *out;
-	struct mlx5_cmd_query_hca_cap_mbox_in in;
-	struct mlx5_query_special_ctxs_mbox_out ctx_out;
-	struct mlx5_query_special_ctxs_mbox_in ctx_in;
-	int err;
-	u16 t16;
-
-	out = kzalloc(sizeof(*out), GFP_KERNEL);
-	if (!out)
-		return -ENOMEM;
-
-	memset(&in, 0, sizeof(in));
-	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_HCA_CAP);
-	in.hdr.opmod  = cpu_to_be16(0x1);
-	err = mlx5_cmd_exec(dev, &in, sizeof(in), out, sizeof(*out));
-	if (err)
-		goto out_out;
-
-	if (out->hdr.status) {
-		err = mlx5_cmd_status_to_err(&out->hdr);
-		goto out_out;
-	}
-
-
-	caps->log_max_eq = out->hca_cap.log_max_eq & 0xf;
-	caps->max_cqes = 1 << out->hca_cap.log_max_cq_sz;
-	caps->max_wqes = 1 << out->hca_cap.log_max_qp_sz;
-	caps->max_sq_desc_sz = be16_to_cpu(out->hca_cap.max_desc_sz_sq);
-	caps->max_rq_desc_sz = be16_to_cpu(out->hca_cap.max_desc_sz_rq);
-	caps->flags = be64_to_cpu(out->hca_cap.flags);
-	caps->stat_rate_support = be16_to_cpu(out->hca_cap.stat_rate_support);
-	caps->log_max_msg = out->hca_cap.log_max_msg & 0x1f;
-	caps->num_ports = out->hca_cap.num_ports & 0xf;
-	caps->log_max_cq = out->hca_cap.log_max_cq & 0x1f;
-	if (caps->num_ports > MLX5_MAX_PORTS) {
-		mlx5_core_err(dev, "device has %d ports while the driver supports max %d ports\n",
-			      caps->num_ports, MLX5_MAX_PORTS);
-		err = -EINVAL;
-		goto out_out;
-	}
-	caps->log_max_qp = out->hca_cap.log_max_qp & 0x1f;
-	caps->log_max_mkey = out->hca_cap.log_max_mkey & 0x3f;
-	caps->log_max_pd = out->hca_cap.log_max_pd & 0x1f;
-	caps->log_max_srq = out->hca_cap.log_max_srqs & 0x1f;
-	caps->local_ca_ack_delay = out->hca_cap.local_ca_ack_delay & 0x1f;
-	caps->log_max_mcg = out->hca_cap.log_max_mcg;
-	caps->max_qp_mcg = be32_to_cpu(out->hca_cap.max_qp_mcg) & 0xffffff;
-	caps->max_ra_res_qp = 1 << (out->hca_cap.log_max_ra_res_qp & 0x3f);
-	caps->max_ra_req_qp = 1 << (out->hca_cap.log_max_ra_req_qp & 0x3f);
-	caps->max_srq_wqes = 1 << out->hca_cap.log_max_srq_sz;
-	t16 = be16_to_cpu(out->hca_cap.bf_log_bf_reg_size);
-	if (t16 & 0x8000) {
-		caps->bf_reg_size = 1 << (t16 & 0x1f);
-		caps->bf_regs_per_page = MLX5_BF_REGS_PER_PAGE;
-	} else {
-		caps->bf_reg_size = 0;
-		caps->bf_regs_per_page = 0;
-	}
-	caps->min_page_sz = ~(u32)((1 << out->hca_cap.log_pg_sz) - 1);
-
-	memset(&ctx_in, 0, sizeof(ctx_in));
-	memset(&ctx_out, 0, sizeof(ctx_out));
-	ctx_in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS);
-	err = mlx5_cmd_exec(dev, &ctx_in, sizeof(ctx_in),
-				 &ctx_out, sizeof(ctx_out));
-	if (err)
-		goto out_out;
-
-	if (ctx_out.hdr.status)
-		err = mlx5_cmd_status_to_err(&ctx_out.hdr);
-
-	caps->reserved_lkey = be32_to_cpu(ctx_out.reserved_lkey);
-
-out_out:
-	kfree(out);
-
-	return err;
+	return mlx5_core_get_caps(dev, caps, HCA_CAP_OPMOD_GET_CUR);
 }
 
 int mlx5_cmd_init_hca(struct mlx5_core_dev *dev)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index f2716cc1f51d..d9f74618befa 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -207,11 +207,11 @@ static void release_bar(struct pci_dev *pdev)
 static int mlx5_enable_msix(struct mlx5_core_dev *dev)
 {
 	struct mlx5_eq_table *table = &dev->priv.eq_table;
-	int num_eqs = 1 << dev->caps.log_max_eq;
+	int num_eqs = 1 << dev->caps.gen.log_max_eq;
 	int nvec;
 	int i;
 
-	nvec = dev->caps.num_ports * num_online_cpus() + MLX5_EQ_VEC_COMP_BASE;
+	nvec = dev->caps.gen.num_ports * num_online_cpus() + MLX5_EQ_VEC_COMP_BASE;
 	nvec = min_t(int, nvec, num_eqs);
 	if (nvec <= MLX5_EQ_VEC_COMP_BASE)
 		return -ENOMEM;
@@ -250,13 +250,34 @@ struct mlx5_reg_host_endianess {
 #define CAP_MASK(pos, size) ((u64)((1 << (size)) - 1) << (pos))
 
 enum {
-	MLX5_CAP_BITS_RW_MASK	= CAP_MASK(MLX5_CAP_OFF_CMDIF_CSUM, 2) |
-				  CAP_MASK(MLX5_CAP_OFF_DCT, 1),
+	MLX5_CAP_BITS_RW_MASK = CAP_MASK(MLX5_CAP_OFF_CMDIF_CSUM, 2) |
+				MLX5_DEV_CAP_FLAG_DCT,
 };
 
+static u16 to_fw_pkey_sz(u32 size)
+{
+	switch (size) {
+	case 128:
+		return 0;
+	case 256:
+		return 1;
+	case 512:
+		return 2;
+	case 1024:
+		return 3;
+	case 2048:
+		return 4;
+	case 4096:
+		return 5;
+	default:
+		pr_warn("invalid pkey table size %d\n", size);
+		return 0;
+	}
+}
+
 /* selectively copy writable fields clearing any reserved area
  */
-static void copy_rw_fields(struct mlx5_hca_cap *to, struct mlx5_hca_cap *from)
+static void copy_rw_fields(struct mlx5_hca_cap *to, struct mlx5_general_caps *from)
 {
 	u64 v64;
 
@@ -265,76 +286,172 @@ static void copy_rw_fields(struct mlx5_hca_cap *to, struct mlx5_hca_cap *from)
 	to->log_max_ra_res_dc = from->log_max_ra_res_dc & 0x3f;
 	to->log_max_ra_req_qp = from->log_max_ra_req_qp & 0x3f;
 	to->log_max_ra_res_qp = from->log_max_ra_res_qp & 0x3f;
-	to->log_max_atomic_size_qp = from->log_max_atomic_size_qp;
-	to->log_max_atomic_size_dc = from->log_max_atomic_size_dc;
-	v64 = be64_to_cpu(from->flags) & MLX5_CAP_BITS_RW_MASK;
+	to->pkey_table_size = cpu_to_be16(to_fw_pkey_sz(from->pkey_table_size));
+	v64 = from->flags & MLX5_CAP_BITS_RW_MASK;
 	to->flags = cpu_to_be64(v64);
 }
 
-enum {
-	HCA_CAP_OPMOD_GET_MAX	= 0,
-	HCA_CAP_OPMOD_GET_CUR	= 1,
-};
+static u16 get_pkey_table_size(int pkey)
+{
+	if (pkey > MLX5_MAX_LOG_PKEY_TABLE)
+		return 0;
 
-static int handle_hca_cap(struct mlx5_core_dev *dev)
+	return MLX5_MIN_PKEY_TABLE_SIZE << pkey;
+}
+
+static void fw2drv_caps(struct mlx5_caps *caps,
+			struct mlx5_cmd_query_hca_cap_mbox_out *out)
 {
-	struct mlx5_cmd_query_hca_cap_mbox_out *query_out = NULL;
-	struct mlx5_cmd_set_hca_cap_mbox_in *set_ctx = NULL;
-	struct mlx5_cmd_query_hca_cap_mbox_in query_ctx;
-	struct mlx5_cmd_set_hca_cap_mbox_out set_out;
-	u64 flags;
+	struct mlx5_general_caps *gen = &caps->gen;
+	u16 t16;
+
+	gen->max_srq_wqes = 1 << out->hca_cap.log_max_srq_sz;
+	gen->max_wqes = 1 << out->hca_cap.log_max_qp_sz;
+	gen->log_max_qp = out->hca_cap.log_max_qp & 0x1f;
+	gen->log_max_strq = out->hca_cap.log_max_strq_sz;
+	gen->log_max_srq = out->hca_cap.log_max_srqs & 0x1f;
+	gen->max_cqes = 1 << out->hca_cap.log_max_cq_sz;
+	gen->log_max_cq = out->hca_cap.log_max_cq & 0x1f;
+	gen->max_eqes = out->hca_cap.log_max_eq_sz;
+	gen->log_max_mkey = out->hca_cap.log_max_mkey & 0x3f;
+	gen->log_max_eq = out->hca_cap.log_max_eq & 0xf;
+	gen->max_indirection = out->hca_cap.max_indirection;
+	gen->log_max_mrw_sz = out->hca_cap.log_max_mrw_sz;
+	gen->log_max_bsf_list_size = 0;
+	gen->log_max_klm_list_size = 0;
+	gen->log_max_ra_req_dc = out->hca_cap.log_max_ra_req_dc;
+	gen->log_max_ra_res_dc = out->hca_cap.log_max_ra_res_dc;
+	gen->log_max_ra_req_qp = out->hca_cap.log_max_ra_req_qp;
+	gen->log_max_ra_res_qp = out->hca_cap.log_max_ra_res_qp;
+	gen->max_qp_counters = be16_to_cpu(out->hca_cap.max_qp_count);
+	gen->pkey_table_size = get_pkey_table_size(be16_to_cpu(out->hca_cap.pkey_table_size));
+	gen->local_ca_ack_delay = out->hca_cap.local_ca_ack_delay & 0x1f;
+	gen->num_ports = out->hca_cap.num_ports & 0xf;
+	gen->log_max_msg = out->hca_cap.log_max_msg & 0x1f;
+	gen->stat_rate_support = be16_to_cpu(out->hca_cap.stat_rate_support);
+	gen->flags = be64_to_cpu(out->hca_cap.flags);
+	pr_debug("flags = 0x%llx\n", gen->flags);
+	gen->uar_sz = out->hca_cap.uar_sz;
+	gen->min_log_pg_sz = out->hca_cap.log_pg_sz;
+
+	t16 = be16_to_cpu(out->hca_cap.bf_log_bf_reg_size);
+	if (t16 & 0x8000) {
+		gen->bf_reg_size = 1 << (t16 & 0x1f);
+		gen->bf_regs_per_page = MLX5_BF_REGS_PER_PAGE;
+	} else {
+		gen->bf_reg_size = 0;
+		gen->bf_regs_per_page = 0;
+	}
+	gen->max_sq_desc_sz = be16_to_cpu(out->hca_cap.max_desc_sz_sq);
+	gen->max_rq_desc_sz = be16_to_cpu(out->hca_cap.max_desc_sz_rq);
+	gen->max_qp_mcg = be32_to_cpu(out->hca_cap.max_qp_mcg) & 0xffffff;
+	gen->log_max_pd = out->hca_cap.log_max_pd & 0x1f;
+	gen->log_max_xrcd = out->hca_cap.log_max_xrcd;
+	gen->log_uar_page_sz = be16_to_cpu(out->hca_cap.log_uar_page_sz);
+}
+
+static const char *caps_opmod_str(u16 opmod)
+{
+	switch (opmod) {
+	case HCA_CAP_OPMOD_GET_MAX:
+		return "GET_MAX";
+	case HCA_CAP_OPMOD_GET_CUR:
+		return "GET_CUR";
+	default:
+		return "Invalid";
+	}
+}
+
+int mlx5_core_get_caps(struct mlx5_core_dev *dev, struct mlx5_caps *caps,
+		       u16 opmod)
+{
+	struct mlx5_cmd_query_hca_cap_mbox_out *out;
+	struct mlx5_cmd_query_hca_cap_mbox_in in;
 	int err;
 
-	memset(&query_ctx, 0, sizeof(query_ctx));
-	query_out = kzalloc(sizeof(*query_out), GFP_KERNEL);
-	if (!query_out)
+	memset(&in, 0, sizeof(in));
+	out = kzalloc(sizeof(*out), GFP_KERNEL);
+	if (!out)
 		return -ENOMEM;
 
-	set_ctx = kzalloc(sizeof(*set_ctx), GFP_KERNEL);
-	if (!set_ctx) {
-		err = -ENOMEM;
+	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_HCA_CAP);
+	in.hdr.opmod  = cpu_to_be16(opmod);
+	err = mlx5_cmd_exec(dev, &in, sizeof(in), out, sizeof(*out));
+
+	err = mlx5_cmd_status_to_err(&out->hdr);
+	if (err) {
+		mlx5_core_warn(dev, "query max hca cap failed, %d\n", err);
 		goto query_ex;
 	}
+	mlx5_core_dbg(dev, "%s\n", caps_opmod_str(opmod));
+	fw2drv_caps(caps, out);
 
-	query_ctx.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_HCA_CAP);
-	query_ctx.hdr.opmod  = cpu_to_be16(HCA_CAP_OPMOD_GET_CUR);
-	err = mlx5_cmd_exec(dev, &query_ctx, sizeof(query_ctx),
-				 query_out, sizeof(*query_out));
+query_ex:
+	kfree(out);
+	return err;
+}
+
+static int set_caps(struct mlx5_core_dev *dev,
+		    struct mlx5_cmd_set_hca_cap_mbox_in *in)
+{
+	struct mlx5_cmd_set_hca_cap_mbox_out out;
+	int err;
+
+	memset(&out, 0, sizeof(out));
+
+	in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_SET_HCA_CAP);
+	err = mlx5_cmd_exec(dev, in, sizeof(*in), &out, sizeof(out));
 	if (err)
-		goto query_ex;
+		return err;
 
-	err = mlx5_cmd_status_to_err(&query_out->hdr);
-	if (err) {
-		mlx5_core_warn(dev, "query hca cap failed, %d\n", err);
+	err = mlx5_cmd_status_to_err(&out.hdr);
+
+	return err;
+}
+
+static int handle_hca_cap(struct mlx5_core_dev *dev)
+{
+	struct mlx5_cmd_set_hca_cap_mbox_in *set_ctx = NULL;
+	struct mlx5_profile *prof = dev->profile;
+	struct mlx5_caps *cur_caps = NULL;
+	struct mlx5_caps *max_caps = NULL;
+	int err = -ENOMEM;
+
+	set_ctx = kzalloc(sizeof(*set_ctx), GFP_KERNEL);
+	if (!set_ctx)
 		goto query_ex;
-	}
 
-	copy_rw_fields(&set_ctx->hca_cap, &query_out->hca_cap);
+	max_caps = kzalloc(sizeof(*max_caps), GFP_KERNEL);
+	if (!max_caps)
+		goto query_ex;
 
-	if (dev->profile && dev->profile->mask & MLX5_PROF_MASK_QP_SIZE)
-		set_ctx->hca_cap.log_max_qp = dev->profile->log_max_qp;
+	cur_caps = kzalloc(sizeof(*cur_caps), GFP_KERNEL);
+	if (!cur_caps)
+		goto query_ex;
 
-	flags = be64_to_cpu(query_out->hca_cap.flags);
-	/* disable checksum */
-	flags &= ~MLX5_DEV_CAP_FLAG_CMDIF_CSUM;
-
-	set_ctx->hca_cap.flags = cpu_to_be64(flags);
-	memset(&set_out, 0, sizeof(set_out));
-	set_ctx->hca_cap.log_uar_page_sz = cpu_to_be16(PAGE_SHIFT - 12);
-	set_ctx->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_SET_HCA_CAP);
-	err = mlx5_cmd_exec(dev, set_ctx, sizeof(*set_ctx),
-				 &set_out, sizeof(set_out));
-	if (err) {
-		mlx5_core_warn(dev, "set hca cap failed, %d\n", err);
+	err = mlx5_core_get_caps(dev, max_caps, HCA_CAP_OPMOD_GET_MAX);
+	if (err)
 		goto query_ex;
-	}
 
-	err = mlx5_cmd_status_to_err(&set_out.hdr);
+	err = mlx5_core_get_caps(dev, cur_caps, HCA_CAP_OPMOD_GET_CUR);
 	if (err)
 		goto query_ex;
 
+	/* we limit the size of the pkey table to 128 entries for now */
+	cur_caps->gen.pkey_table_size = 128;
+
+	if (prof->mask & MLX5_PROF_MASK_QP_SIZE)
+		cur_caps->gen.log_max_qp = prof->log_max_qp;
+
+	/* disable checksum */
+	cur_caps->gen.flags &= ~MLX5_DEV_CAP_FLAG_CMDIF_CSUM;
+
+	copy_rw_fields(&set_ctx->hca_cap, &cur_caps->gen);
+	err = set_caps(dev, set_ctx);
+
 query_ex:
-	kfree(query_out);
+	kfree(cur_caps);
+	kfree(max_caps);
 	kfree(set_ctx);
 
 	return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/uar.c b/drivers/net/ethernet/mellanox/mlx5/core/uar.c
index 68f5d9c77c7b..0a6348cefc01 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/uar.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/uar.c
@@ -174,11 +174,11 @@ int mlx5_alloc_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari)
 	for (i = 0; i < tot_uuars; i++) {
 		bf = &uuari->bfs[i];
 
-		bf->buf_size = dev->caps.bf_reg_size / 2;
+		bf->buf_size = dev->caps.gen.bf_reg_size / 2;
 		bf->uar = &uuari->uars[i / MLX5_BF_REGS_PER_PAGE];
 		bf->regreg = uuari->uars[i / MLX5_BF_REGS_PER_PAGE].map;
 		bf->reg = NULL; /* Add WC support */
-		bf->offset = (i % MLX5_BF_REGS_PER_PAGE) * dev->caps.bf_reg_size +
+		bf->offset = (i % MLX5_BF_REGS_PER_PAGE) * dev->caps.gen.bf_reg_size +
 			MLX5_BF_OFFSET;
 		bf->need_lock = need_uuar_lock(i);
 		spin_lock_init(&bf->lock);
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 334947151dfc..dce01fd854a8 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -70,6 +70,11 @@ enum {
 	MLX5_INLINE_SEG = 0x80000000,
 };
 
+enum {
+	MLX5_MIN_PKEY_TABLE_SIZE = 128,
+	MLX5_MAX_LOG_PKEY_TABLE  = 5,
+};
+
 enum {
 	MLX5_PERM_LOCAL_READ	= 1 << 2,
 	MLX5_PERM_LOCAL_WRITE	= 1 << 3,
@@ -184,10 +189,10 @@ enum {
 	MLX5_DEV_CAP_FLAG_CQ_MODER	= 1LL << 29,
 	MLX5_DEV_CAP_FLAG_RESIZE_CQ	= 1LL << 30,
 	MLX5_DEV_CAP_FLAG_RESIZE_SRQ	= 1LL << 32,
+	MLX5_DEV_CAP_FLAG_DCT		= 1LL << 37,
 	MLX5_DEV_CAP_FLAG_REMOTE_FENCE	= 1LL << 38,
 	MLX5_DEV_CAP_FLAG_TLP_HINTS	= 1LL << 39,
 	MLX5_DEV_CAP_FLAG_SIG_HAND_OVER	= 1LL << 40,
-	MLX5_DEV_CAP_FLAG_DCT		= 1LL << 41,
 	MLX5_DEV_CAP_FLAG_CMDIF_CSUM	= 3LL << 46,
 };
 
@@ -243,10 +248,14 @@ enum {
 };
 
 enum {
-	MLX5_CAP_OFF_DCT		= 41,
 	MLX5_CAP_OFF_CMDIF_CSUM		= 46,
 };
 
+enum {
+	HCA_CAP_OPMOD_GET_MAX	= 0,
+	HCA_CAP_OPMOD_GET_CUR	= 1,
+};
+
 struct mlx5_inbox_hdr {
 	__be16		opcode;
 	u8		rsvd[4];
@@ -303,9 +312,10 @@ struct mlx5_hca_cap {
 	u8	log_max_ra_req_qp;
 	u8	rsvd10;
 	u8	log_max_ra_res_qp;
-	u8	rsvd11[4];
+	u8	pad_cap;
+	u8	rsvd11[3];
 	__be16	max_qp_count;
-	__be16	rsvd12;
+	__be16	pkey_table_size;
 	u8	rsvd13;
 	u8	local_ca_ack_delay;
 	u8	rsvd14;
@@ -335,11 +345,7 @@ struct mlx5_hca_cap {
 	u8	log_max_xrcd;
 	u8	rsvd25[42];
 	__be16  log_uar_page_sz;
-	u8	rsvd26[28];
-	u8	log_max_atomic_size_qp;
-	u8	rsvd27[2];
-	u8	log_max_atomic_size_dc;
-	u8	rsvd28[76];
+	u8	rsvd26[108];
 };
 
 
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index b88e9b46d957..45a2add747e0 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -335,23 +335,30 @@ struct mlx5_port_caps {
 	int	pkey_table_len;
 };
 
-struct mlx5_caps {
+struct mlx5_general_caps {
 	u8	log_max_eq;
 	u8	log_max_cq;
 	u8	log_max_qp;
 	u8	log_max_mkey;
 	u8	log_max_pd;
 	u8	log_max_srq;
+	u8	log_max_strq;
+	u8	log_max_mrw_sz;
+	u8	log_max_bsf_list_size;
+	u8	log_max_klm_list_size;
 	u32	max_cqes;
 	int	max_wqes;
+	u32	max_eqes;
+	u32	max_indirection;
 	int	max_sq_desc_sz;
 	int	max_rq_desc_sz;
+	int	max_dc_sq_desc_sz;
 	u64	flags;
 	u16	stat_rate_support;
 	int	log_max_msg;
 	int	num_ports;
-	int	max_ra_res_qp;
-	int	max_ra_req_qp;
+	u8	log_max_ra_res_qp;
+	u8	log_max_ra_req_qp;
 	int	max_srq_wqes;
 	int	bf_reg_size;
 	int	bf_regs_per_page;
@@ -363,6 +370,19 @@ struct mlx5_caps {
 	u8	log_max_mcg;
 	u32	max_qp_mcg;
 	int	min_page_sz;
+	int	pd_cap;
+	u32	max_qp_counters;
+	u32	pkey_table_size;
+	u8	log_max_ra_req_dc;
+	u8	log_max_ra_res_dc;
+	u32	uar_sz;
+	u8	min_log_pg_sz;
+	u8	log_max_xrcd;
+	u16	log_uar_page_sz;
+};
+
+struct mlx5_caps {
+	struct mlx5_general_caps gen;
 };
 
 struct mlx5_cmd_mailbox {
@@ -695,6 +715,8 @@ void mlx5_cmd_cleanup(struct mlx5_core_dev *dev);
 void mlx5_cmd_use_events(struct mlx5_core_dev *dev);
 void mlx5_cmd_use_polling(struct mlx5_core_dev *dev);
 int mlx5_cmd_status_to_err(struct mlx5_outbox_hdr *hdr);
+int mlx5_core_get_caps(struct mlx5_core_dev *dev, struct mlx5_caps *caps,
+		       u16 opmod);
 int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
 		  int out_size);
 int mlx5_cmd_exec_cb(struct mlx5_core_dev *dev, void *in, int in_size,
-- 
cgit v1.2.3


From d29b796adada8780db3512c4a34b339f9aeef1ae Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.com>
Date: Thu, 2 Oct 2014 12:19:43 +0300
Subject: net/mlx5_core: Use hardware registers description header file

Add an auto generated header file that describes hardware registers along with
set of macros that set/get values. The macros do static checks to avoid
overflow, handle endianess, and overall provide a clean way to code commands.
Currently the header file is small and we will add structs as we make use of
the macros.
A few commands were removed from the commands enum since they are not supported
currently and will be added when support is available.

Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c |  36 -------
 drivers/net/ethernet/mellanox/mlx5/core/qp.c  |   3 -
 include/linux/mlx5/device.h                   |  44 ++++++++
 include/linux/mlx5/driver.h                   |  76 +-------------
 include/linux/mlx5/mlx5_ifc.h                 | 143 ++++++++++++++++++++++++++
 5 files changed, 188 insertions(+), 114 deletions(-)
 create mode 100644 include/linux/mlx5/mlx5_ifc.h

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 6eb0f85cf872..3ecef1310bae 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -357,60 +357,24 @@ const char *mlx5_command_str(int command)
 	case MLX5_CMD_OP_2ERR_QP:
 		return "2ERR_QP";
 
-	case MLX5_CMD_OP_RTS2SQD_QP:
-		return "RTS2SQD_QP";
-
-	case MLX5_CMD_OP_SQD2RTS_QP:
-		return "SQD2RTS_QP";
-
 	case MLX5_CMD_OP_2RST_QP:
 		return "2RST_QP";
 
 	case MLX5_CMD_OP_QUERY_QP:
 		return "QUERY_QP";
 
-	case MLX5_CMD_OP_CONF_SQP:
-		return "CONF_SQP";
-
 	case MLX5_CMD_OP_MAD_IFC:
 		return "MAD_IFC";
 
 	case MLX5_CMD_OP_INIT2INIT_QP:
 		return "INIT2INIT_QP";
 
-	case MLX5_CMD_OP_SUSPEND_QP:
-		return "SUSPEND_QP";
-
-	case MLX5_CMD_OP_UNSUSPEND_QP:
-		return "UNSUSPEND_QP";
-
-	case MLX5_CMD_OP_SQD2SQD_QP:
-		return "SQD2SQD_QP";
-
-	case MLX5_CMD_OP_ALLOC_QP_COUNTER_SET:
-		return "ALLOC_QP_COUNTER_SET";
-
-	case MLX5_CMD_OP_DEALLOC_QP_COUNTER_SET:
-		return "DEALLOC_QP_COUNTER_SET";
-
-	case MLX5_CMD_OP_QUERY_QP_COUNTER_SET:
-		return "QUERY_QP_COUNTER_SET";
-
 	case MLX5_CMD_OP_CREATE_PSV:
 		return "CREATE_PSV";
 
 	case MLX5_CMD_OP_DESTROY_PSV:
 		return "DESTROY_PSV";
 
-	case MLX5_CMD_OP_QUERY_PSV:
-		return "QUERY_PSV";
-
-	case MLX5_CMD_OP_QUERY_SIG_RULE_TABLE:
-		return "QUERY_SIG_RULE_TABLE";
-
-	case MLX5_CMD_OP_QUERY_BLOCK_SIZE_TABLE:
-		return "QUERY_BLOCK_SIZE_TABLE";
-
 	case MLX5_CMD_OP_CREATE_SRQ:
 		return "CREATE_SRQ";
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
index 8145b4668229..415b67ce379e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
@@ -184,13 +184,10 @@ int mlx5_core_qp_modify(struct mlx5_core_dev *dev, enum mlx5_qp_state cur_state,
 			[MLX5_QP_STATE_RST]	= MLX5_CMD_OP_2RST_QP,
 			[MLX5_QP_STATE_ERR]	= MLX5_CMD_OP_2ERR_QP,
 			[MLX5_QP_STATE_RTS]	= MLX5_CMD_OP_RTS2RTS_QP,
-			[MLX5_QP_STATE_SQD]	= MLX5_CMD_OP_RTS2SQD_QP,
 		},
 		[MLX5_QP_STATE_SQD] = {
 			[MLX5_QP_STATE_RST]	= MLX5_CMD_OP_2RST_QP,
 			[MLX5_QP_STATE_ERR]	= MLX5_CMD_OP_2ERR_QP,
-			[MLX5_QP_STATE_RTS]	= MLX5_CMD_OP_SQD2RTS_QP,
-			[MLX5_QP_STATE_SQD]	= MLX5_CMD_OP_SQD2SQD_QP,
 		},
 		[MLX5_QP_STATE_SQER] = {
 			[MLX5_QP_STATE_RST]	= MLX5_CMD_OP_2RST_QP,
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index dce01fd854a8..0032687f58c7 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -44,6 +44,50 @@
 #error Host endianness not defined
 #endif
 
+/* helper macros */
+#define __mlx5_nullp(typ) ((struct mlx5_ifc_##typ##_bits *)0)
+#define __mlx5_bit_sz(typ, fld) sizeof(__mlx5_nullp(typ)->fld)
+#define __mlx5_bit_off(typ, fld) ((unsigned)(unsigned long)(&(__mlx5_nullp(typ)->fld)))
+#define __mlx5_dw_off(typ, fld) (__mlx5_bit_off(typ, fld) / 32)
+#define __mlx5_64_off(typ, fld) (__mlx5_bit_off(typ, fld) / 64)
+#define __mlx5_dw_bit_off(typ, fld) (32 - __mlx5_bit_sz(typ, fld) - (__mlx5_bit_off(typ, fld) & 0x1f))
+#define __mlx5_mask(typ, fld) ((u32)((1ull << __mlx5_bit_sz(typ, fld)) - 1))
+#define __mlx5_dw_mask(typ, fld) (__mlx5_mask(typ, fld) << __mlx5_dw_bit_off(typ, fld))
+#define __mlx5_st_sz_bits(typ) sizeof(struct mlx5_ifc_##typ##_bits)
+
+#define MLX5_FLD_SZ_BYTES(typ, fld) (__mlx5_bit_sz(typ, fld) / 8)
+#define MLX5_ST_SZ_BYTES(typ) (sizeof(struct mlx5_ifc_##typ##_bits) / 8)
+#define MLX5_ST_SZ_DW(typ) (sizeof(struct mlx5_ifc_##typ##_bits) / 32)
+#define MLX5_BYTE_OFF(typ, fld) (__mlx5_bit_off(typ, fld) / 8)
+#define MLX5_ADDR_OF(typ, p, fld) ((char *)(p) + MLX5_BYTE_OFF(typ, fld))
+
+/* insert a value to a struct */
+#define MLX5_SET(typ, p, fld, v) do { \
+	BUILD_BUG_ON(__mlx5_st_sz_bits(typ) % 32);             \
+	*((__be32 *)(p) + __mlx5_dw_off(typ, fld)) = \
+	cpu_to_be32((be32_to_cpu(*((__be32 *)(p) + __mlx5_dw_off(typ, fld))) & \
+		     (~__mlx5_dw_mask(typ, fld))) | (((v) & __mlx5_mask(typ, fld)) \
+		     << __mlx5_dw_bit_off(typ, fld))); \
+} while (0)
+
+#define MLX5_GET(typ, p, fld) ((be32_to_cpu(*((__be32 *)(p) +\
+__mlx5_dw_off(typ, fld))) >> __mlx5_dw_bit_off(typ, fld)) & \
+__mlx5_mask(typ, fld))
+
+#define MLX5_GET_PR(typ, p, fld) ({ \
+	u32 ___t = MLX5_GET(typ, p, fld); \
+	pr_debug(#fld " = 0x%x\n", ___t); \
+	___t; \
+})
+
+#define MLX5_SET64(typ, p, fld, v) do { \
+	BUILD_BUG_ON(__mlx5_bit_sz(typ, fld) != 64); \
+	BUILD_BUG_ON(__mlx5_bit_off(typ, fld) % 64); \
+	*((__be64 *)(p) + __mlx5_64_off(typ, fld)) = cpu_to_be64(v); \
+} while (0)
+
+#define MLX5_GET64(typ, p, fld) be64_to_cpu(*((__be64 *)(p) + __mlx5_64_off(typ, fld)))
+
 enum {
 	MLX5_MAX_COMMANDS		= 32,
 	MLX5_CMD_DATA_BLOCK_SIZE	= 512,
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 45a2add747e0..6f48dc793b9f 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -44,6 +44,7 @@
 
 #include <linux/mlx5/device.h>
 #include <linux/mlx5/doorbell.h>
+#include <linux/mlx5/mlx5_ifc.h>
 
 enum {
 	MLX5_BOARD_ID_LEN = 64,
@@ -98,81 +99,6 @@ enum {
 	MLX5_ATOMIC_MODE_256B		= 8 << 16,
 };
 
-enum {
-	MLX5_CMD_OP_QUERY_HCA_CAP		= 0x100,
-	MLX5_CMD_OP_QUERY_ADAPTER		= 0x101,
-	MLX5_CMD_OP_INIT_HCA			= 0x102,
-	MLX5_CMD_OP_TEARDOWN_HCA		= 0x103,
-	MLX5_CMD_OP_ENABLE_HCA			= 0x104,
-	MLX5_CMD_OP_DISABLE_HCA			= 0x105,
-	MLX5_CMD_OP_QUERY_PAGES			= 0x107,
-	MLX5_CMD_OP_MANAGE_PAGES		= 0x108,
-	MLX5_CMD_OP_SET_HCA_CAP			= 0x109,
-
-	MLX5_CMD_OP_CREATE_MKEY			= 0x200,
-	MLX5_CMD_OP_QUERY_MKEY			= 0x201,
-	MLX5_CMD_OP_DESTROY_MKEY		= 0x202,
-	MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS	= 0x203,
-
-	MLX5_CMD_OP_CREATE_EQ			= 0x301,
-	MLX5_CMD_OP_DESTROY_EQ			= 0x302,
-	MLX5_CMD_OP_QUERY_EQ			= 0x303,
-
-	MLX5_CMD_OP_CREATE_CQ			= 0x400,
-	MLX5_CMD_OP_DESTROY_CQ			= 0x401,
-	MLX5_CMD_OP_QUERY_CQ			= 0x402,
-	MLX5_CMD_OP_MODIFY_CQ			= 0x403,
-
-	MLX5_CMD_OP_CREATE_QP			= 0x500,
-	MLX5_CMD_OP_DESTROY_QP			= 0x501,
-	MLX5_CMD_OP_RST2INIT_QP			= 0x502,
-	MLX5_CMD_OP_INIT2RTR_QP			= 0x503,
-	MLX5_CMD_OP_RTR2RTS_QP			= 0x504,
-	MLX5_CMD_OP_RTS2RTS_QP			= 0x505,
-	MLX5_CMD_OP_SQERR2RTS_QP		= 0x506,
-	MLX5_CMD_OP_2ERR_QP			= 0x507,
-	MLX5_CMD_OP_RTS2SQD_QP			= 0x508,
-	MLX5_CMD_OP_SQD2RTS_QP			= 0x509,
-	MLX5_CMD_OP_2RST_QP			= 0x50a,
-	MLX5_CMD_OP_QUERY_QP			= 0x50b,
-	MLX5_CMD_OP_CONF_SQP			= 0x50c,
-	MLX5_CMD_OP_MAD_IFC			= 0x50d,
-	MLX5_CMD_OP_INIT2INIT_QP		= 0x50e,
-	MLX5_CMD_OP_SUSPEND_QP			= 0x50f,
-	MLX5_CMD_OP_UNSUSPEND_QP		= 0x510,
-	MLX5_CMD_OP_SQD2SQD_QP			= 0x511,
-	MLX5_CMD_OP_ALLOC_QP_COUNTER_SET	= 0x512,
-	MLX5_CMD_OP_DEALLOC_QP_COUNTER_SET	= 0x513,
-	MLX5_CMD_OP_QUERY_QP_COUNTER_SET	= 0x514,
-
-	MLX5_CMD_OP_CREATE_PSV			= 0x600,
-	MLX5_CMD_OP_DESTROY_PSV			= 0x601,
-	MLX5_CMD_OP_QUERY_PSV			= 0x602,
-	MLX5_CMD_OP_QUERY_SIG_RULE_TABLE	= 0x603,
-	MLX5_CMD_OP_QUERY_BLOCK_SIZE_TABLE	= 0x604,
-
-	MLX5_CMD_OP_CREATE_SRQ			= 0x700,
-	MLX5_CMD_OP_DESTROY_SRQ			= 0x701,
-	MLX5_CMD_OP_QUERY_SRQ			= 0x702,
-	MLX5_CMD_OP_ARM_RQ			= 0x703,
-	MLX5_CMD_OP_RESIZE_SRQ			= 0x704,
-
-	MLX5_CMD_OP_ALLOC_PD			= 0x800,
-	MLX5_CMD_OP_DEALLOC_PD			= 0x801,
-	MLX5_CMD_OP_ALLOC_UAR			= 0x802,
-	MLX5_CMD_OP_DEALLOC_UAR			= 0x803,
-
-	MLX5_CMD_OP_ATTACH_TO_MCG		= 0x806,
-	MLX5_CMD_OP_DETACH_FROM_MCG		= 0x807,
-
-
-	MLX5_CMD_OP_ALLOC_XRCD			= 0x80e,
-	MLX5_CMD_OP_DEALLOC_XRCD		= 0x80f,
-
-	MLX5_CMD_OP_ACCESS_REG			= 0x805,
-	MLX5_CMD_OP_MAX				= 0x810,
-};
-
 enum {
 	MLX5_REG_PCAP		 = 0x5001,
 	MLX5_REG_PMTU		 = 0x5003,
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
new file mode 100644
index 000000000000..df3bd9b5fbcf
--- /dev/null
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 2014, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef MLX5_IFC_H
+#define MLX5_IFC_H
+
+enum {
+	MLX5_CMD_OP_QUERY_HCA_CAP                 = 0x100,
+	MLX5_CMD_OP_QUERY_ADAPTER                 = 0x101,
+	MLX5_CMD_OP_INIT_HCA                      = 0x102,
+	MLX5_CMD_OP_TEARDOWN_HCA                  = 0x103,
+	MLX5_CMD_OP_ENABLE_HCA                    = 0x104,
+	MLX5_CMD_OP_DISABLE_HCA                   = 0x105,
+	MLX5_CMD_OP_QUERY_PAGES                   = 0x107,
+	MLX5_CMD_OP_MANAGE_PAGES                  = 0x108,
+	MLX5_CMD_OP_SET_HCA_CAP                   = 0x109,
+	MLX5_CMD_OP_CREATE_MKEY                   = 0x200,
+	MLX5_CMD_OP_QUERY_MKEY                    = 0x201,
+	MLX5_CMD_OP_DESTROY_MKEY                  = 0x202,
+	MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS        = 0x203,
+	MLX5_CMD_OP_PAGE_FAULT_RESUME             = 0x204,
+	MLX5_CMD_OP_CREATE_EQ                     = 0x301,
+	MLX5_CMD_OP_DESTROY_EQ                    = 0x302,
+	MLX5_CMD_OP_QUERY_EQ                      = 0x303,
+	MLX5_CMD_OP_GEN_EQE                       = 0x304,
+	MLX5_CMD_OP_CREATE_CQ                     = 0x400,
+	MLX5_CMD_OP_DESTROY_CQ                    = 0x401,
+	MLX5_CMD_OP_QUERY_CQ                      = 0x402,
+	MLX5_CMD_OP_MODIFY_CQ                     = 0x403,
+	MLX5_CMD_OP_CREATE_QP                     = 0x500,
+	MLX5_CMD_OP_DESTROY_QP                    = 0x501,
+	MLX5_CMD_OP_RST2INIT_QP                   = 0x502,
+	MLX5_CMD_OP_INIT2RTR_QP                   = 0x503,
+	MLX5_CMD_OP_RTR2RTS_QP                    = 0x504,
+	MLX5_CMD_OP_RTS2RTS_QP                    = 0x505,
+	MLX5_CMD_OP_SQERR2RTS_QP                  = 0x506,
+	MLX5_CMD_OP_2ERR_QP                       = 0x507,
+	MLX5_CMD_OP_2RST_QP                       = 0x50a,
+	MLX5_CMD_OP_QUERY_QP                      = 0x50b,
+	MLX5_CMD_OP_INIT2INIT_QP                  = 0x50e,
+	MLX5_CMD_OP_CREATE_PSV                    = 0x600,
+	MLX5_CMD_OP_DESTROY_PSV                   = 0x601,
+	MLX5_CMD_OP_CREATE_SRQ                    = 0x700,
+	MLX5_CMD_OP_DESTROY_SRQ                   = 0x701,
+	MLX5_CMD_OP_QUERY_SRQ                     = 0x702,
+	MLX5_CMD_OP_ARM_RQ                        = 0x703,
+	MLX5_CMD_OP_RESIZE_SRQ                    = 0x704,
+	MLX5_CMD_OP_CREATE_DCT                    = 0x710,
+	MLX5_CMD_OP_DESTROY_DCT                   = 0x711,
+	MLX5_CMD_OP_DRAIN_DCT                     = 0x712,
+	MLX5_CMD_OP_QUERY_DCT                     = 0x713,
+	MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION     = 0x714,
+	MLX5_CMD_OP_QUERY_VPORT_STATE             = 0x750,
+	MLX5_CMD_OP_MODIFY_VPORT_STATE            = 0x751,
+	MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT       = 0x752,
+	MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT      = 0x753,
+	MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT       = 0x754,
+	MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT      = 0x755,
+	MLX5_CMD_OP_QUERY_RCOE_ADDRESS            = 0x760,
+	MLX5_CMD_OP_SET_ROCE_ADDRESS              = 0x761,
+	MLX5_CMD_OP_QUERY_VPORT_COUNTER           = 0x770,
+	MLX5_CMD_OP_ALLOC_Q_COUNTER               = 0x771,
+	MLX5_CMD_OP_DEALLOC_Q_COUNTER             = 0x772,
+	MLX5_CMD_OP_QUERY_Q_COUNTER               = 0x773,
+	MLX5_CMD_OP_ALLOC_PD                      = 0x800,
+	MLX5_CMD_OP_DEALLOC_PD                    = 0x801,
+	MLX5_CMD_OP_ALLOC_UAR                     = 0x802,
+	MLX5_CMD_OP_DEALLOC_UAR                   = 0x803,
+	MLX5_CMD_OP_CONFIG_INT_MODERATION         = 0x804,
+	MLX5_CMD_OP_ACCESS_REG                    = 0x805,
+	MLX5_CMD_OP_ATTACH_TO_MCG                 = 0x806,
+	MLX5_CMD_OP_DETACH_FROM_MCG               = 0x807,
+	MLX5_CMD_OP_GET_DROPPED_PACKET_LOG        = 0x80a,
+	MLX5_CMD_OP_MAD_IFC                       = 0x50d,
+	MLX5_CMD_OP_QUERY_MAD_DEMUX               = 0x80b,
+	MLX5_CMD_OP_SET_MAD_DEMUX                 = 0x80c,
+	MLX5_CMD_OP_NOP                           = 0x80d,
+	MLX5_CMD_OP_ALLOC_XRCD                    = 0x80e,
+	MLX5_CMD_OP_DEALLOC_XRCD                  = 0x80f,
+	MLX5_CMD_OP_SET_BURST_SIZE                = 0x812,
+	MLX5_CMD_OP_QUERY_BURST_SZIE              = 0x813,
+	MLX5_CMD_OP_ACTIVATE_TRACER               = 0x814,
+	MLX5_CMD_OP_DEACTIVATE_TRACER             = 0x815,
+	MLX5_CMD_OP_CREATE_SNIFFER_RULE           = 0x820,
+	MLX5_CMD_OP_DESTROY_SNIFFER_RULE          = 0x821,
+	MLX5_CMD_OP_QUERY_CONG_PARAMS             = 0x822,
+	MLX5_CMD_OP_MODIFY_CONG_PARAMS            = 0x823,
+	MLX5_CMD_OP_QUERY_CONG_STATISTICS         = 0x824,
+	MLX5_CMD_OP_CREATE_TIR                    = 0x900,
+	MLX5_CMD_OP_MODIFY_TIR                    = 0x901,
+	MLX5_CMD_OP_DESTROY_TIR                   = 0x902,
+	MLX5_CMD_OP_QUERY_TIR                     = 0x903,
+	MLX5_CMD_OP_CREATE_TIS                    = 0x912,
+	MLX5_CMD_OP_MODIFY_TIS                    = 0x913,
+	MLX5_CMD_OP_DESTROY_TIS                   = 0x914,
+	MLX5_CMD_OP_QUERY_TIS                     = 0x915,
+	MLX5_CMD_OP_CREATE_SQ                     = 0x904,
+	MLX5_CMD_OP_MODIFY_SQ                     = 0x905,
+	MLX5_CMD_OP_DESTROY_SQ                    = 0x906,
+	MLX5_CMD_OP_QUERY_SQ                      = 0x907,
+	MLX5_CMD_OP_CREATE_RQ                     = 0x908,
+	MLX5_CMD_OP_MODIFY_RQ                     = 0x909,
+	MLX5_CMD_OP_DESTROY_RQ                    = 0x90a,
+	MLX5_CMD_OP_QUERY_RQ                      = 0x90b,
+	MLX5_CMD_OP_CREATE_RMP                    = 0x90c,
+	MLX5_CMD_OP_MODIFY_RMP                    = 0x90d,
+	MLX5_CMD_OP_DESTROY_RMP                   = 0x90e,
+	MLX5_CMD_OP_QUERY_RMP                     = 0x90f,
+	MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY          = 0x910,
+	MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY        = 0x911,
+	MLX5_CMD_OP_MAX				  = 0x911
+};
+
+#endif /* MLX5_IFC_H */
-- 
cgit v1.2.3


From b775516b042f9e35f856bd2914afefd9d23021d7 Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.com>
Date: Thu, 2 Oct 2014 12:19:44 +0300
Subject: net/mlx5_core: use set/get macros in device caps

Transform device capabilities related commands to use set/get macros to
manipulate command mailboxes.

Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c  |  17 ++
 drivers/net/ethernet/mellanox/mlx5/core/main.c | 150 +++++++++---------
 include/linux/mlx5/device.h                    |  92 -----------
 include/linux/mlx5/driver.h                    |   1 +
 include/linux/mlx5/mlx5_ifc.h                  | 206 +++++++++++++++++++++++++
 5 files changed, 298 insertions(+), 168 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 3ecef1310bae..368c6c5ea014 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -1537,3 +1537,20 @@ int mlx5_cmd_status_to_err(struct mlx5_outbox_hdr *hdr)
 
 	return cmd_status_to_err(hdr->status);
 }
+
+int mlx5_cmd_status_to_err_v2(void *ptr)
+{
+	u32	syndrome;
+	u8	status;
+
+	status = be32_to_cpu(*(__be32 *)ptr) >> 24;
+	if (!status)
+		return 0;
+
+	syndrome = be32_to_cpu(*(__be32 *)(ptr + 4));
+
+	pr_warn("command failed, status %s(0x%x), syndrome 0x%x\n",
+		cmd_status_str(status), status, syndrome);
+
+	return cmd_status_to_err(status);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index d9f74618befa..b9e3259e415f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -43,6 +43,7 @@
 #include <linux/mlx5/qp.h>
 #include <linux/mlx5/srq.h>
 #include <linux/debugfs.h>
+#include <linux/mlx5/mlx5_ifc.h>
 #include "mlx5_core.h"
 
 #define DRIVER_NAME "mlx5_core"
@@ -277,18 +278,20 @@ static u16 to_fw_pkey_sz(u32 size)
 
 /* selectively copy writable fields clearing any reserved area
  */
-static void copy_rw_fields(struct mlx5_hca_cap *to, struct mlx5_general_caps *from)
+static void copy_rw_fields(void *to, struct mlx5_caps *from)
 {
+	__be64 *flags_off = (__be64 *)MLX5_ADDR_OF(cmd_hca_cap, to, reserved_22);
 	u64 v64;
 
-	to->log_max_qp = from->log_max_qp & 0x1f;
-	to->log_max_ra_req_dc = from->log_max_ra_req_dc & 0x3f;
-	to->log_max_ra_res_dc = from->log_max_ra_res_dc & 0x3f;
-	to->log_max_ra_req_qp = from->log_max_ra_req_qp & 0x3f;
-	to->log_max_ra_res_qp = from->log_max_ra_res_qp & 0x3f;
-	to->pkey_table_size = cpu_to_be16(to_fw_pkey_sz(from->pkey_table_size));
-	v64 = from->flags & MLX5_CAP_BITS_RW_MASK;
-	to->flags = cpu_to_be64(v64);
+	MLX5_SET(cmd_hca_cap, to, log_max_qp, from->gen.log_max_qp);
+	MLX5_SET(cmd_hca_cap, to, log_max_ra_req_qp, from->gen.log_max_ra_req_qp);
+	MLX5_SET(cmd_hca_cap, to, log_max_ra_res_qp, from->gen.log_max_ra_res_qp);
+	MLX5_SET(cmd_hca_cap, to, pkey_table_size, from->gen.pkey_table_size);
+	MLX5_SET(cmd_hca_cap, to, log_max_ra_req_dc, from->gen.log_max_ra_req_dc);
+	MLX5_SET(cmd_hca_cap, to, log_max_ra_res_dc, from->gen.log_max_ra_res_dc);
+	MLX5_SET(cmd_hca_cap, to, pkey_table_size, to_fw_pkey_sz(from->gen.pkey_table_size));
+	v64 = from->gen.flags & MLX5_CAP_BITS_RW_MASK;
+	*flags_off = cpu_to_be64(v64);
 }
 
 static u16 get_pkey_table_size(int pkey)
@@ -299,55 +302,47 @@ static u16 get_pkey_table_size(int pkey)
 	return MLX5_MIN_PKEY_TABLE_SIZE << pkey;
 }
 
-static void fw2drv_caps(struct mlx5_caps *caps,
-			struct mlx5_cmd_query_hca_cap_mbox_out *out)
+static void fw2drv_caps(struct mlx5_caps *caps, void *out)
 {
 	struct mlx5_general_caps *gen = &caps->gen;
-	u16 t16;
-
-	gen->max_srq_wqes = 1 << out->hca_cap.log_max_srq_sz;
-	gen->max_wqes = 1 << out->hca_cap.log_max_qp_sz;
-	gen->log_max_qp = out->hca_cap.log_max_qp & 0x1f;
-	gen->log_max_strq = out->hca_cap.log_max_strq_sz;
-	gen->log_max_srq = out->hca_cap.log_max_srqs & 0x1f;
-	gen->max_cqes = 1 << out->hca_cap.log_max_cq_sz;
-	gen->log_max_cq = out->hca_cap.log_max_cq & 0x1f;
-	gen->max_eqes = out->hca_cap.log_max_eq_sz;
-	gen->log_max_mkey = out->hca_cap.log_max_mkey & 0x3f;
-	gen->log_max_eq = out->hca_cap.log_max_eq & 0xf;
-	gen->max_indirection = out->hca_cap.max_indirection;
-	gen->log_max_mrw_sz = out->hca_cap.log_max_mrw_sz;
-	gen->log_max_bsf_list_size = 0;
-	gen->log_max_klm_list_size = 0;
-	gen->log_max_ra_req_dc = out->hca_cap.log_max_ra_req_dc;
-	gen->log_max_ra_res_dc = out->hca_cap.log_max_ra_res_dc;
-	gen->log_max_ra_req_qp = out->hca_cap.log_max_ra_req_qp;
-	gen->log_max_ra_res_qp = out->hca_cap.log_max_ra_res_qp;
-	gen->max_qp_counters = be16_to_cpu(out->hca_cap.max_qp_count);
-	gen->pkey_table_size = get_pkey_table_size(be16_to_cpu(out->hca_cap.pkey_table_size));
-	gen->local_ca_ack_delay = out->hca_cap.local_ca_ack_delay & 0x1f;
-	gen->num_ports = out->hca_cap.num_ports & 0xf;
-	gen->log_max_msg = out->hca_cap.log_max_msg & 0x1f;
-	gen->stat_rate_support = be16_to_cpu(out->hca_cap.stat_rate_support);
-	gen->flags = be64_to_cpu(out->hca_cap.flags);
-	pr_debug("flags = 0x%llx\n", gen->flags);
-	gen->uar_sz = out->hca_cap.uar_sz;
-	gen->min_log_pg_sz = out->hca_cap.log_pg_sz;
 
-	t16 = be16_to_cpu(out->hca_cap.bf_log_bf_reg_size);
-	if (t16 & 0x8000) {
-		gen->bf_reg_size = 1 << (t16 & 0x1f);
-		gen->bf_regs_per_page = MLX5_BF_REGS_PER_PAGE;
-	} else {
-		gen->bf_reg_size = 0;
-		gen->bf_regs_per_page = 0;
-	}
-	gen->max_sq_desc_sz = be16_to_cpu(out->hca_cap.max_desc_sz_sq);
-	gen->max_rq_desc_sz = be16_to_cpu(out->hca_cap.max_desc_sz_rq);
-	gen->max_qp_mcg = be32_to_cpu(out->hca_cap.max_qp_mcg) & 0xffffff;
-	gen->log_max_pd = out->hca_cap.log_max_pd & 0x1f;
-	gen->log_max_xrcd = out->hca_cap.log_max_xrcd;
-	gen->log_uar_page_sz = be16_to_cpu(out->hca_cap.log_uar_page_sz);
+	gen->max_srq_wqes = 1 << MLX5_GET_PR(cmd_hca_cap, out, log_max_srq_sz);
+	gen->max_wqes = 1 << MLX5_GET_PR(cmd_hca_cap, out, log_max_qp_sz);
+	gen->log_max_qp = MLX5_GET_PR(cmd_hca_cap, out, log_max_qp);
+	gen->log_max_strq = MLX5_GET_PR(cmd_hca_cap, out, log_max_strq_sz);
+	gen->log_max_srq = MLX5_GET_PR(cmd_hca_cap, out, log_max_srqs);
+	gen->max_cqes = 1 << MLX5_GET_PR(cmd_hca_cap, out, log_max_cq_sz);
+	gen->log_max_cq = MLX5_GET_PR(cmd_hca_cap, out, log_max_cq);
+	gen->max_eqes = 1 << MLX5_GET_PR(cmd_hca_cap, out, log_max_eq_sz);
+	gen->log_max_mkey = MLX5_GET_PR(cmd_hca_cap, out, log_max_mkey);
+	gen->log_max_eq = MLX5_GET_PR(cmd_hca_cap, out, log_max_eq);
+	gen->max_indirection = MLX5_GET_PR(cmd_hca_cap, out, max_indirection);
+	gen->log_max_mrw_sz = MLX5_GET_PR(cmd_hca_cap, out, log_max_mrw_sz);
+	gen->log_max_bsf_list_size = MLX5_GET_PR(cmd_hca_cap, out, log_max_bsf_list_size);
+	gen->log_max_klm_list_size = MLX5_GET_PR(cmd_hca_cap, out, log_max_klm_list_size);
+	gen->log_max_ra_req_dc = MLX5_GET_PR(cmd_hca_cap, out, log_max_ra_req_dc);
+	gen->log_max_ra_res_dc = MLX5_GET_PR(cmd_hca_cap, out, log_max_ra_res_dc);
+	gen->log_max_ra_req_qp = MLX5_GET_PR(cmd_hca_cap, out, log_max_ra_req_qp);
+	gen->log_max_ra_res_qp = MLX5_GET_PR(cmd_hca_cap, out, log_max_ra_res_qp);
+	gen->max_qp_counters = MLX5_GET_PR(cmd_hca_cap, out, max_qp_cnt);
+	gen->pkey_table_size = get_pkey_table_size(MLX5_GET_PR(cmd_hca_cap, out, pkey_table_size));
+	gen->local_ca_ack_delay = MLX5_GET_PR(cmd_hca_cap, out, local_ca_ack_delay);
+	gen->num_ports = MLX5_GET_PR(cmd_hca_cap, out, num_ports);
+	gen->log_max_msg = MLX5_GET_PR(cmd_hca_cap, out, log_max_msg);
+	gen->stat_rate_support = MLX5_GET_PR(cmd_hca_cap, out, stat_rate_support);
+	gen->flags = be64_to_cpu(*(__be64 *)MLX5_ADDR_OF(cmd_hca_cap, out, reserved_22));
+	pr_debug("flags = 0x%llx\n", gen->flags);
+	gen->uar_sz = MLX5_GET_PR(cmd_hca_cap, out, uar_sz);
+	gen->min_log_pg_sz = MLX5_GET_PR(cmd_hca_cap, out, log_pg_sz);
+	gen->bf_reg_size = MLX5_GET_PR(cmd_hca_cap, out, bf);
+	gen->bf_reg_size = 1 << MLX5_GET_PR(cmd_hca_cap, out, log_bf_reg_size);
+	gen->max_sq_desc_sz = MLX5_GET_PR(cmd_hca_cap, out, max_wqe_sz_sq);
+	gen->max_rq_desc_sz = MLX5_GET_PR(cmd_hca_cap, out, max_wqe_sz_rq);
+	gen->max_dc_sq_desc_sz = MLX5_GET_PR(cmd_hca_cap, out, max_wqe_sz_sq_dc);
+	gen->max_qp_mcg = MLX5_GET_PR(cmd_hca_cap, out, max_qp_mcg);
+	gen->log_max_pd = MLX5_GET_PR(cmd_hca_cap, out, log_max_pd);
+	gen->log_max_xrcd = MLX5_GET_PR(cmd_hca_cap, out, log_max_xrcd);
+	gen->log_uar_page_sz = MLX5_GET_PR(cmd_hca_cap, out, log_uar_page_sz);
 }
 
 static const char *caps_opmod_str(u16 opmod)
@@ -365,59 +360,61 @@ static const char *caps_opmod_str(u16 opmod)
 int mlx5_core_get_caps(struct mlx5_core_dev *dev, struct mlx5_caps *caps,
 		       u16 opmod)
 {
-	struct mlx5_cmd_query_hca_cap_mbox_out *out;
-	struct mlx5_cmd_query_hca_cap_mbox_in in;
+	u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)];
+	int out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
+	void *out;
 	int err;
 
-	memset(&in, 0, sizeof(in));
-	out = kzalloc(sizeof(*out), GFP_KERNEL);
+	memset(in, 0, sizeof(in));
+	out = kzalloc(out_sz, GFP_KERNEL);
 	if (!out)
 		return -ENOMEM;
+	MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
+	MLX5_SET(query_hca_cap_in, in, op_mod, opmod);
+	err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz);
+	if (err)
+		goto query_ex;
 
-	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_HCA_CAP);
-	in.hdr.opmod  = cpu_to_be16(opmod);
-	err = mlx5_cmd_exec(dev, &in, sizeof(in), out, sizeof(*out));
-
-	err = mlx5_cmd_status_to_err(&out->hdr);
+	err = mlx5_cmd_status_to_err_v2(out);
 	if (err) {
 		mlx5_core_warn(dev, "query max hca cap failed, %d\n", err);
 		goto query_ex;
 	}
 	mlx5_core_dbg(dev, "%s\n", caps_opmod_str(opmod));
-	fw2drv_caps(caps, out);
+	fw2drv_caps(caps, MLX5_ADDR_OF(query_hca_cap_out, out, capability_struct));
 
 query_ex:
 	kfree(out);
 	return err;
 }
 
-static int set_caps(struct mlx5_core_dev *dev,
-		    struct mlx5_cmd_set_hca_cap_mbox_in *in)
+static int set_caps(struct mlx5_core_dev *dev, void *in, int in_sz)
 {
-	struct mlx5_cmd_set_hca_cap_mbox_out out;
+	u32 out[MLX5_ST_SZ_DW(set_hca_cap_out)];
 	int err;
 
-	memset(&out, 0, sizeof(out));
+	memset(out, 0, sizeof(out));
 
-	in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_SET_HCA_CAP);
-	err = mlx5_cmd_exec(dev, in, sizeof(*in), &out, sizeof(out));
+	MLX5_SET(set_hca_cap_in, in, opcode, MLX5_CMD_OP_SET_HCA_CAP);
+	err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out));
 	if (err)
 		return err;
 
-	err = mlx5_cmd_status_to_err(&out.hdr);
+	err = mlx5_cmd_status_to_err_v2(out);
 
 	return err;
 }
 
 static int handle_hca_cap(struct mlx5_core_dev *dev)
 {
-	struct mlx5_cmd_set_hca_cap_mbox_in *set_ctx = NULL;
+	void *set_ctx = NULL;
 	struct mlx5_profile *prof = dev->profile;
 	struct mlx5_caps *cur_caps = NULL;
 	struct mlx5_caps *max_caps = NULL;
 	int err = -ENOMEM;
+	int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
 
-	set_ctx = kzalloc(sizeof(*set_ctx), GFP_KERNEL);
+	set_ctx = kzalloc(set_sz, GFP_KERNEL);
 	if (!set_ctx)
 		goto query_ex;
 
@@ -446,8 +443,9 @@ static int handle_hca_cap(struct mlx5_core_dev *dev)
 	/* disable checksum */
 	cur_caps->gen.flags &= ~MLX5_DEV_CAP_FLAG_CMDIF_CSUM;
 
-	copy_rw_fields(&set_ctx->hca_cap, &cur_caps->gen);
-	err = set_caps(dev, set_ctx);
+	copy_rw_fields(MLX5_ADDR_OF(set_hca_cap_in, set_ctx, hca_capability_struct),
+		       cur_caps);
+	err = set_caps(dev, set_ctx, set_sz);
 
 query_ex:
 	kfree(cur_caps);
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 0032687f58c7..1d67fd32e71c 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -327,98 +327,6 @@ struct mlx5_cmd_query_adapter_mbox_out {
 	u8			vsd_psid[16];
 };
 
-struct mlx5_hca_cap {
-	u8	rsvd1[16];
-	u8	log_max_srq_sz;
-	u8	log_max_qp_sz;
-	u8	rsvd2;
-	u8	log_max_qp;
-	u8	log_max_strq_sz;
-	u8	log_max_srqs;
-	u8	rsvd4[2];
-	u8	rsvd5;
-	u8	log_max_cq_sz;
-	u8	rsvd6;
-	u8	log_max_cq;
-	u8	log_max_eq_sz;
-	u8	log_max_mkey;
-	u8	rsvd7;
-	u8	log_max_eq;
-	u8	max_indirection;
-	u8	log_max_mrw_sz;
-	u8	log_max_bsf_list_sz;
-	u8	log_max_klm_list_sz;
-	u8	rsvd_8_0;
-	u8	log_max_ra_req_dc;
-	u8	rsvd_8_1;
-	u8	log_max_ra_res_dc;
-	u8	rsvd9;
-	u8	log_max_ra_req_qp;
-	u8	rsvd10;
-	u8	log_max_ra_res_qp;
-	u8	pad_cap;
-	u8	rsvd11[3];
-	__be16	max_qp_count;
-	__be16	pkey_table_size;
-	u8	rsvd13;
-	u8	local_ca_ack_delay;
-	u8	rsvd14;
-	u8	num_ports;
-	u8	log_max_msg;
-	u8	rsvd15[3];
-	__be16	stat_rate_support;
-	u8	rsvd16[2];
-	__be64	flags;
-	u8	rsvd17;
-	u8	uar_sz;
-	u8	rsvd18;
-	u8	log_pg_sz;
-	__be16	bf_log_bf_reg_size;
-	u8	rsvd19[4];
-	__be16	max_desc_sz_sq;
-	u8	rsvd20[2];
-	__be16	max_desc_sz_rq;
-	u8	rsvd21[2];
-	__be16	max_desc_sz_sq_dc;
-	__be32	max_qp_mcg;
-	u8	rsvd22[3];
-	u8	log_max_mcg;
-	u8	rsvd23;
-	u8	log_max_pd;
-	u8	rsvd24;
-	u8	log_max_xrcd;
-	u8	rsvd25[42];
-	__be16  log_uar_page_sz;
-	u8	rsvd26[108];
-};
-
-
-struct mlx5_cmd_query_hca_cap_mbox_in {
-	struct mlx5_inbox_hdr	hdr;
-	u8			rsvd[8];
-};
-
-
-struct mlx5_cmd_query_hca_cap_mbox_out {
-	struct mlx5_outbox_hdr	hdr;
-	u8			rsvd0[8];
-	struct mlx5_hca_cap     hca_cap;
-};
-
-
-struct mlx5_cmd_set_hca_cap_mbox_in {
-	struct mlx5_inbox_hdr	hdr;
-	u8			rsvd[8];
-	struct mlx5_hca_cap     hca_cap;
-};
-
-
-struct mlx5_cmd_set_hca_cap_mbox_out {
-	struct mlx5_outbox_hdr	hdr;
-	u8			rsvd0[8];
-};
-
-
 struct mlx5_cmd_init_hca_mbox_in {
 	struct mlx5_inbox_hdr	hdr;
 	u8			rsvd0[2];
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 6f48dc793b9f..c439f9c59b93 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -641,6 +641,7 @@ void mlx5_cmd_cleanup(struct mlx5_core_dev *dev);
 void mlx5_cmd_use_events(struct mlx5_core_dev *dev);
 void mlx5_cmd_use_polling(struct mlx5_core_dev *dev);
 int mlx5_cmd_status_to_err(struct mlx5_outbox_hdr *hdr);
+int mlx5_cmd_status_to_err_v2(void *ptr);
 int mlx5_core_get_caps(struct mlx5_core_dev *dev, struct mlx5_caps *caps,
 		       u16 opmod);
 int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index df3bd9b5fbcf..5f48b8f592c5 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -140,4 +140,210 @@ enum {
 	MLX5_CMD_OP_MAX				  = 0x911
 };
 
+struct mlx5_ifc_cmd_hca_cap_bits {
+	u8         reserved_0[0x80];
+
+	u8         log_max_srq_sz[0x8];
+	u8         log_max_qp_sz[0x8];
+	u8         reserved_1[0xb];
+	u8         log_max_qp[0x5];
+
+	u8         log_max_strq_sz[0x8];
+	u8         reserved_2[0x3];
+	u8         log_max_srqs[0x5];
+	u8         reserved_3[0x10];
+
+	u8         reserved_4[0x8];
+	u8         log_max_cq_sz[0x8];
+	u8         reserved_5[0xb];
+	u8         log_max_cq[0x5];
+
+	u8         log_max_eq_sz[0x8];
+	u8         reserved_6[0x2];
+	u8         log_max_mkey[0x6];
+	u8         reserved_7[0xc];
+	u8         log_max_eq[0x4];
+
+	u8         max_indirection[0x8];
+	u8         reserved_8[0x1];
+	u8         log_max_mrw_sz[0x7];
+	u8         reserved_9[0x2];
+	u8         log_max_bsf_list_size[0x6];
+	u8         reserved_10[0x2];
+	u8         log_max_klm_list_size[0x6];
+
+	u8         reserved_11[0xa];
+	u8         log_max_ra_req_dc[0x6];
+	u8         reserved_12[0xa];
+	u8         log_max_ra_res_dc[0x6];
+
+	u8         reserved_13[0xa];
+	u8         log_max_ra_req_qp[0x6];
+	u8         reserved_14[0xa];
+	u8         log_max_ra_res_qp[0x6];
+
+	u8         pad_cap[0x1];
+	u8         cc_query_allowed[0x1];
+	u8         cc_modify_allowed[0x1];
+	u8         reserved_15[0x1d];
+
+	u8         reserved_16[0x6];
+	u8         max_qp_cnt[0xa];
+	u8         pkey_table_size[0x10];
+
+	u8         eswitch_owner[0x1];
+	u8         reserved_17[0xa];
+	u8         local_ca_ack_delay[0x5];
+	u8         reserved_18[0x8];
+	u8         num_ports[0x8];
+
+	u8         reserved_19[0x3];
+	u8         log_max_msg[0x5];
+	u8         reserved_20[0x18];
+
+	u8         stat_rate_support[0x10];
+	u8         reserved_21[0x10];
+
+	u8         reserved_22[0x10];
+	u8         cmdif_checksum[0x2];
+	u8         sigerr_cqe[0x1];
+	u8         reserved_23[0x1];
+	u8         wq_signature[0x1];
+	u8         sctr_data_cqe[0x1];
+	u8         reserved_24[0x1];
+	u8         sho[0x1];
+	u8         tph[0x1];
+	u8         rf[0x1];
+	u8         dc[0x1];
+	u8         reserved_25[0x2];
+	u8         roce[0x1];
+	u8         atomic[0x1];
+	u8         rsz_srq[0x1];
+
+	u8         cq_oi[0x1];
+	u8         cq_resize[0x1];
+	u8         cq_moderation[0x1];
+	u8         sniffer_rule_flow[0x1];
+	u8         sniffer_rule_vport[0x1];
+	u8         sniffer_rule_phy[0x1];
+	u8         reserved_26[0x1];
+	u8         pg[0x1];
+	u8         block_lb_mc[0x1];
+	u8         reserved_27[0x3];
+	u8         cd[0x1];
+	u8         reserved_28[0x1];
+	u8         apm[0x1];
+	u8         reserved_29[0x7];
+	u8         qkv[0x1];
+	u8         pkv[0x1];
+	u8         reserved_30[0x4];
+	u8         xrc[0x1];
+	u8         ud[0x1];
+	u8         uc[0x1];
+	u8         rc[0x1];
+
+	u8         reserved_31[0xa];
+	u8         uar_sz[0x6];
+	u8         reserved_32[0x8];
+	u8         log_pg_sz[0x8];
+
+	u8         bf[0x1];
+	u8         reserved_33[0xa];
+	u8         log_bf_reg_size[0x5];
+	u8         reserved_34[0x10];
+
+	u8         reserved_35[0x10];
+	u8         max_wqe_sz_sq[0x10];
+
+	u8         reserved_36[0x10];
+	u8         max_wqe_sz_rq[0x10];
+
+	u8         reserved_37[0x10];
+	u8         max_wqe_sz_sq_dc[0x10];
+
+	u8         reserved_38[0x7];
+	u8         max_qp_mcg[0x19];
+
+	u8         reserved_39[0x18];
+	u8         log_max_mcg[0x8];
+
+	u8         reserved_40[0xb];
+	u8         log_max_pd[0x5];
+	u8         reserved_41[0xb];
+	u8         log_max_xrcd[0x5];
+
+	u8         reserved_42[0x20];
+
+	u8         reserved_43[0x3];
+	u8         log_max_rq[0x5];
+	u8         reserved_44[0x3];
+	u8         log_max_sq[0x5];
+	u8         reserved_45[0x3];
+	u8         log_max_tir[0x5];
+	u8         reserved_46[0x3];
+	u8         log_max_tis[0x5];
+
+	u8         reserved_47[0x13];
+	u8         log_max_rq_per_tir[0x5];
+	u8         reserved_48[0x3];
+	u8         log_max_tis_per_sq[0x5];
+
+	u8         reserved_49[0xe0];
+
+	u8         reserved_50[0x10];
+	u8         log_uar_page_sz[0x10];
+
+	u8         reserved_51[0x100];
+
+	u8         reserved_52[0x1f];
+	u8         cqe_zip[0x1];
+
+	u8         cqe_zip_timeout[0x10];
+	u8         cqe_zip_max_num[0x10];
+
+	u8         reserved_53[0x220];
+};
+
+struct mlx5_ifc_set_hca_cap_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x40];
+
+	struct mlx5_ifc_cmd_hca_cap_bits hca_capability_struct;
+};
+
+struct mlx5_ifc_query_hca_cap_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x40];
+};
+
+struct mlx5_ifc_query_hca_cap_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+
+	u8         capability_struct[256][0x8];
+};
+
+struct mlx5_ifc_set_hca_cap_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
 #endif /* MLX5_IFC_H */
-- 
cgit v1.2.3


From 5903325a64834211daf63a62db3b35ee580cb8bf Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.com>
Date: Thu, 2 Oct 2014 12:19:45 +0300
Subject: net/mlx5_core: Identify resources by their type

This patch puts a common part as the first field of mlx5_core_qp. This field is
used to identify which resource generated an event. This is required since upcoming
new resource types such as DC targets are allocated for the same numerical space
as regular QPs and may generate the same events. By searching the resource in the
same table we can then look at the common field to identify the resource.

Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/eq.c | 12 +++---
 drivers/net/ethernet/mellanox/mlx5/core/qp.c | 57 ++++++++++++++++++++--------
 include/linux/mlx5/driver.h                  | 13 ++++++-
 include/linux/mlx5/qp.h                      |  3 +-
 4 files changed, 60 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 11b9b840ad4d..ed53291468f3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -198,7 +198,7 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
 	int eqes_found = 0;
 	int set_ci = 0;
 	u32 cqn;
-	u32 srqn;
+	u32 rsn;
 	u8 port;
 
 	while ((eqe = next_eqe_sw(eq))) {
@@ -224,18 +224,18 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
 		case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
 		case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
 		case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
+			rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
 			mlx5_core_dbg(dev, "event %s(%d) arrived\n",
 				      eqe_type_str(eqe->type), eqe->type);
-			mlx5_qp_event(dev, be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff,
-				      eqe->type);
+			mlx5_rsc_event(dev, rsn, eqe->type);
 			break;
 
 		case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
 		case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
-			srqn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
+			rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
 			mlx5_core_dbg(dev, "SRQ event %s(%d): srqn 0x%x\n",
-				      eqe_type_str(eqe->type), eqe->type, srqn);
-			mlx5_srq_event(dev, srqn, eqe->type);
+				      eqe_type_str(eqe->type), eqe->type, rsn);
+			mlx5_srq_event(dev, rsn, eqe->type);
 			break;
 
 		case MLX5_EVENT_TYPE_CMD:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
index 415b67ce379e..5261a2b0da43 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
@@ -39,28 +39,53 @@
 
 #include "mlx5_core.h"
 
-void mlx5_qp_event(struct mlx5_core_dev *dev, u32 qpn, int event_type)
+static struct mlx5_core_rsc_common *mlx5_get_rsc(struct mlx5_core_dev *dev,
+						 u32 rsn)
 {
 	struct mlx5_qp_table *table = &dev->priv.qp_table;
-	struct mlx5_core_qp *qp;
+	struct mlx5_core_rsc_common *common;
 
 	spin_lock(&table->lock);
 
-	qp = radix_tree_lookup(&table->tree, qpn);
-	if (qp)
-		atomic_inc(&qp->refcount);
+	common = radix_tree_lookup(&table->tree, rsn);
+	if (common)
+		atomic_inc(&common->refcount);
 
 	spin_unlock(&table->lock);
 
-	if (!qp) {
-		mlx5_core_warn(dev, "Async event for bogus QP 0x%x\n", qpn);
-		return;
+	if (!common) {
+		mlx5_core_warn(dev, "Async event for bogus resource 0x%x\n",
+			       rsn);
+		return NULL;
 	}
+	return common;
+}
 
-	qp->event(qp, event_type);
+void mlx5_core_put_rsc(struct mlx5_core_rsc_common *common)
+{
+	if (atomic_dec_and_test(&common->refcount))
+		complete(&common->free);
+}
+
+void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type)
+{
+	struct mlx5_core_rsc_common *common = mlx5_get_rsc(dev, rsn);
+	struct mlx5_core_qp *qp;
+
+	if (!common)
+		return;
+
+	switch (common->res) {
+	case MLX5_RES_QP:
+		qp = (struct mlx5_core_qp *)common;
+		qp->event(qp, event_type);
+		break;
+
+	default:
+		mlx5_core_warn(dev, "invalid resource type for 0x%x\n", rsn);
+	}
 
-	if (atomic_dec_and_test(&qp->refcount))
-		complete(&qp->free);
+	mlx5_core_put_rsc(common);
 }
 
 int mlx5_core_create_qp(struct mlx5_core_dev *dev,
@@ -92,6 +117,7 @@ int mlx5_core_create_qp(struct mlx5_core_dev *dev,
 	qp->qpn = be32_to_cpu(out.qpn) & 0xffffff;
 	mlx5_core_dbg(dev, "qpn = 0x%x\n", qp->qpn);
 
+	qp->common.res = MLX5_RES_QP;
 	spin_lock_irq(&table->lock);
 	err = radix_tree_insert(&table->tree, qp->qpn, qp);
 	spin_unlock_irq(&table->lock);
@@ -106,9 +132,9 @@ int mlx5_core_create_qp(struct mlx5_core_dev *dev,
 			      qp->qpn);
 
 	qp->pid = current->pid;
-	atomic_set(&qp->refcount, 1);
+	atomic_set(&qp->common.refcount, 1);
 	atomic_inc(&dev->num_qps);
-	init_completion(&qp->free);
+	init_completion(&qp->common.free);
 
 	return 0;
 
@@ -138,9 +164,8 @@ int mlx5_core_destroy_qp(struct mlx5_core_dev *dev,
 	radix_tree_delete(&table->tree, qp->qpn);
 	spin_unlock_irqrestore(&table->lock, flags);
 
-	if (atomic_dec_and_test(&qp->refcount))
-		complete(&qp->free);
-	wait_for_completion(&qp->free);
+	mlx5_core_put_rsc((struct mlx5_core_rsc_common *)qp);
+	wait_for_completion(&qp->common.free);
 
 	memset(&in, 0, sizeof(in));
 	memset(&out, 0, sizeof(out));
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index c439f9c59b93..246310dc8bef 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -375,6 +375,16 @@ struct mlx5_core_mr {
 	u32			pd;
 };
 
+enum mlx5_res_type {
+	MLX5_RES_QP,
+};
+
+struct mlx5_core_rsc_common {
+	enum mlx5_res_type	res;
+	atomic_t		refcount;
+	struct completion	free;
+};
+
 struct mlx5_core_srq {
 	u32		srqn;
 	int		max;
@@ -700,7 +710,7 @@ int mlx5_eq_init(struct mlx5_core_dev *dev);
 void mlx5_eq_cleanup(struct mlx5_core_dev *dev);
 void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas);
 void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn);
-void mlx5_qp_event(struct mlx5_core_dev *dev, u32 qpn, int event_type);
+void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type);
 void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type);
 struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn);
 void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, unsigned long vector);
@@ -737,6 +747,7 @@ void mlx5_cmdif_debugfs_cleanup(struct mlx5_core_dev *dev);
 int mlx5_core_create_psv(struct mlx5_core_dev *dev, u32 pdn,
 			 int npsvs, u32 *sig_index);
 int mlx5_core_destroy_psv(struct mlx5_core_dev *dev, int psv_num);
+void mlx5_core_put_rsc(struct mlx5_core_rsc_common *common);
 
 static inline u32 mlx5_mkey_to_idx(u32 mkey)
 {
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index 9709b30e2d69..7c4c0f1f5805 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -342,10 +342,9 @@ struct mlx5_stride_block_ctrl_seg {
 };
 
 struct mlx5_core_qp {
+	struct mlx5_core_rsc_common	common; /* must be first */
 	void (*event)		(struct mlx5_core_qp *, int);
 	int			qpn;
-	atomic_t		refcount;
-	struct completion	free;
 	struct mlx5_rsc_debug	*dbg;
 	int			pid;
 };
-- 
cgit v1.2.3


From efc98d08e1ec4fd131f794370b274dceaf32c958 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Fri, 3 Oct 2014 15:48:08 -0700
Subject: fou: eliminate IPv4,v6 specific GRO functions

This patch removes fou[46]_gro_receive and fou[46]_gro_complete
functions. The v4 or v6 variants were chosen for the UDP offloads
based on the address family of the socket this is not necessary
or correct. Alternatively, this patch adds is_ipv6 to napi_gro_skb.
This is set in udp6_gro_receive and unset in udp4_gro_receive. In
fou_gro_receive the value is used to select the correct inet_offloads
for the protocol of the outer IP header.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  3 +++
 net/ipv4/fou.c            | 48 ++++++++---------------------------------------
 net/ipv4/udp_offload.c    |  1 +
 net/ipv6/udp_offload.c    |  1 +
 4 files changed, 13 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 910fb17ad148..22d54b9b700d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1886,6 +1886,9 @@ struct napi_gro_cb {
 	/* Number of checksums via CHECKSUM_UNNECESSARY */
 	u8	csum_cnt:3;
 
+	/* Used in foo-over-udp, set in udp[46]_gro_receive */
+	u8	is_ipv6:1;
+
 	/* used to support CHECKSUM_COMPLETE for tunneling protocols */
 	__wsum	csum;
 
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index dced89fbe480..7e2126a31f2e 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -65,14 +65,15 @@ static int fou_udp_recv(struct sock *sk, struct sk_buff *skb)
 }
 
 static struct sk_buff **fou_gro_receive(struct sk_buff **head,
-					struct sk_buff *skb,
-					const struct net_offload **offloads)
+					struct sk_buff *skb)
 {
 	const struct net_offload *ops;
 	struct sk_buff **pp = NULL;
 	u8 proto = NAPI_GRO_CB(skb)->proto;
+	const struct net_offload **offloads;
 
 	rcu_read_lock();
+	offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
 	ops = rcu_dereference(offloads[proto]);
 	if (!ops || !ops->callbacks.gro_receive)
 		goto out_unlock;
@@ -85,14 +86,15 @@ out_unlock:
 	return pp;
 }
 
-static int fou_gro_complete(struct sk_buff *skb, int nhoff,
-			    const struct net_offload **offloads)
+static int fou_gro_complete(struct sk_buff *skb, int nhoff)
 {
 	const struct net_offload *ops;
 	u8 proto = NAPI_GRO_CB(skb)->proto;
 	int err = -ENOSYS;
+	const struct net_offload **offloads;
 
 	rcu_read_lock();
+	offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
 	ops = rcu_dereference(offloads[proto]);
 	if (WARN_ON(!ops || !ops->callbacks.gro_complete))
 		goto out_unlock;
@@ -105,28 +107,6 @@ out_unlock:
 	return err;
 }
 
-static struct sk_buff **fou4_gro_receive(struct sk_buff **head,
-					 struct sk_buff *skb)
-{
-	return fou_gro_receive(head, skb, inet_offloads);
-}
-
-static int fou4_gro_complete(struct sk_buff *skb, int nhoff)
-{
-	return fou_gro_complete(skb, nhoff, inet_offloads);
-}
-
-static struct sk_buff **fou6_gro_receive(struct sk_buff **head,
-					 struct sk_buff *skb)
-{
-	return fou_gro_receive(head, skb, inet6_offloads);
-}
-
-static int fou6_gro_complete(struct sk_buff *skb, int nhoff)
-{
-	return fou_gro_complete(skb, nhoff, inet6_offloads);
-}
-
 static int fou_add_to_port_list(struct fou *fou)
 {
 	struct fou *fout;
@@ -199,20 +179,8 @@ static int fou_create(struct net *net, struct fou_cfg *cfg,
 
 	sk->sk_allocation = GFP_ATOMIC;
 
-	switch (cfg->udp_config.family) {
-	case AF_INET:
-		fou->udp_offloads.callbacks.gro_receive = fou4_gro_receive;
-		fou->udp_offloads.callbacks.gro_complete = fou4_gro_complete;
-		break;
-	case AF_INET6:
-		fou->udp_offloads.callbacks.gro_receive = fou6_gro_receive;
-		fou->udp_offloads.callbacks.gro_complete = fou6_gro_complete;
-		break;
-	default:
-		err = -EPFNOSUPPORT;
-		goto error;
-	}
-
+	fou->udp_offloads.callbacks.gro_receive = fou_gro_receive;
+	fou->udp_offloads.callbacks.gro_complete = fou_gro_complete;
 	fou->udp_offloads.port = cfg->udp_config.local_udp_port;
 	fou->udp_offloads.ipproto = cfg->protocol;
 
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 8c35f2c939ee..507310ef4b56 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -334,6 +334,7 @@ static struct sk_buff **udp4_gro_receive(struct sk_buff **head,
 		skb_gro_checksum_try_convert(skb, IPPROTO_UDP, uh->check,
 					     inet_gro_compute_pseudo);
 skip:
+	NAPI_GRO_CB(skb)->is_ipv6 = 0;
 	return udp_gro_receive(head, skb, uh);
 
 flush:
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 8f96988c1db2..6b8f543f6ac6 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -140,6 +140,7 @@ static struct sk_buff **udp6_gro_receive(struct sk_buff **head,
 					     ip6_gro_compute_pseudo);
 
 skip:
+	NAPI_GRO_CB(skb)->is_ipv6 = 1;
 	return udp_gro_receive(head, skb, uh);
 
 flush:
-- 
cgit v1.2.3


From 7941b27b16e3282f6ec8817e36492f1deec753a7 Mon Sep 17 00:00:00 2001
From: Pantelis Antoniou <pantelis.antoniou@konsulko.com>
Date: Fri, 4 Jul 2014 19:59:20 +0300
Subject: of: Introduce Device Tree resolve support.

Introduce support for dynamic device tree resolution.
Using it, it is possible to prepare a device tree that's
been loaded on runtime to be modified and inserted at the kernel
live tree.

Export of of_resolve and bug fix of double free by
	Guenter Roeck <groeck@juniper.net>

Signed-off-by: Pantelis Antoniou <pantelis.antoniou@konsulko.com>
[grant.likely: Don't need to select CONFIG_OF_DYNAMIC and CONFIG_OF_DEVICE]
[grant.likely: Don't need to depend on OF or !SPARC]
[grant.likely: Factor out duplicate code blocks into single function]
Signed-off-by: Grant Likely <grant.likely@linaro.org>
---
 .../devicetree/dynamic-resolution-notes.txt        |  25 ++
 drivers/of/Kconfig                                 |   3 +
 drivers/of/Makefile                                |   1 +
 drivers/of/resolver.c                              | 336 +++++++++++++++++++++
 include/linux/of.h                                 |   3 +
 5 files changed, 368 insertions(+)
 create mode 100644 Documentation/devicetree/dynamic-resolution-notes.txt
 create mode 100644 drivers/of/resolver.c

(limited to 'include/linux')

diff --git a/Documentation/devicetree/dynamic-resolution-notes.txt b/Documentation/devicetree/dynamic-resolution-notes.txt
new file mode 100644
index 000000000000..083d23262abe
--- /dev/null
+++ b/Documentation/devicetree/dynamic-resolution-notes.txt
@@ -0,0 +1,25 @@
+Device Tree Dynamic Resolver Notes
+----------------------------------
+
+This document describes the implementation of the in-kernel
+Device Tree resolver, residing in drivers/of/resolver.c and is a
+companion document to Documentation/devicetree/dt-object-internal.txt[1]
+
+How the resolver works
+----------------------
+
+The resolver is given as an input an arbitrary tree compiled with the
+proper dtc option and having a /plugin/ tag. This generates the
+appropriate __fixups__ & __local_fixups__ nodes as described in [1].
+
+In sequence the resolver works by the following steps:
+
+1. Get the maximum device tree phandle value from the live tree + 1.
+2. Adjust all the local phandles of the tree to resolve by that amount.
+3. Using the __local__fixups__ node information adjust all local references
+   by the same amount.
+4. For each property in the __fixups__ node locate the node it references
+   in the live tree. This is the label used to tag the node.
+5. Retrieve the phandle of the target of the fixup.
+6. For each fixup in the property locate the node:property:offset location
+   and replace it with the phandle value.
diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig
index 5160c4eb73c2..6b81a36f6420 100644
--- a/drivers/of/Kconfig
+++ b/drivers/of/Kconfig
@@ -79,4 +79,7 @@ config OF_RESERVED_MEM
 	help
 	  Helpers to allow for reservation of memory regions
 
+config OF_RESOLVE
+	bool
+
 endmenu # OF
diff --git a/drivers/of/Makefile b/drivers/of/Makefile
index 2b6a7b129d10..ca9209ce50cd 100644
--- a/drivers/of/Makefile
+++ b/drivers/of/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_OF_PCI)	+= of_pci.o
 obj-$(CONFIG_OF_PCI_IRQ)  += of_pci_irq.o
 obj-$(CONFIG_OF_MTD)	+= of_mtd.o
 obj-$(CONFIG_OF_RESERVED_MEM) += of_reserved_mem.o
+obj-$(CONFIG_OF_RESOLVE)  += resolver.o
 
 CFLAGS_fdt.o = -I$(src)/../../scripts/dtc/libfdt
 CFLAGS_fdt_address.o = -I$(src)/../../scripts/dtc/libfdt
diff --git a/drivers/of/resolver.c b/drivers/of/resolver.c
new file mode 100644
index 000000000000..aed7959f800d
--- /dev/null
+++ b/drivers/of/resolver.c
@@ -0,0 +1,336 @@
+/*
+ * Functions for dealing with DT resolution
+ *
+ * Copyright (C) 2012 Pantelis Antoniou <panto@antoniou-consulting.com>
+ * Copyright (C) 2012 Texas Instruments Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+
+/* illegal phandle value (set when unresolved) */
+#define OF_PHANDLE_ILLEGAL	0xdeadbeef
+
+/**
+ * Find a node with the give full name by recursively following any of
+ * the child node links.
+ */
+static struct device_node *__of_find_node_by_full_name(struct device_node *node,
+		const char *full_name)
+{
+	struct device_node *child, *found;
+
+	if (node == NULL)
+		return NULL;
+
+	/* check */
+	if (of_node_cmp(node->full_name, full_name) == 0)
+		return node;
+
+	for_each_child_of_node(node, child) {
+		found = __of_find_node_by_full_name(child, full_name);
+		if (found != NULL)
+			return found;
+	}
+
+	return NULL;
+}
+
+/*
+ * Find live tree's maximum phandle value.
+ */
+static phandle of_get_tree_max_phandle(void)
+{
+	struct device_node *node;
+	phandle phandle;
+	unsigned long flags;
+
+	/* now search recursively */
+	raw_spin_lock_irqsave(&devtree_lock, flags);
+	phandle = 0;
+	for_each_of_allnodes(node) {
+		if (node->phandle != OF_PHANDLE_ILLEGAL &&
+				node->phandle > phandle)
+			phandle = node->phandle;
+	}
+	raw_spin_unlock_irqrestore(&devtree_lock, flags);
+
+	return phandle;
+}
+
+/*
+ * Adjust a subtree's phandle values by a given delta.
+ * Makes sure not to just adjust the device node's phandle value,
+ * but modify the phandle properties values as well.
+ */
+static void __of_adjust_tree_phandles(struct device_node *node,
+		int phandle_delta)
+{
+	struct device_node *child;
+	struct property *prop;
+	phandle phandle;
+
+	/* first adjust the node's phandle direct value */
+	if (node->phandle != 0 && node->phandle != OF_PHANDLE_ILLEGAL)
+		node->phandle += phandle_delta;
+
+	/* now adjust phandle & linux,phandle values */
+	for_each_property_of_node(node, prop) {
+
+		/* only look for these two */
+		if (of_prop_cmp(prop->name, "phandle") != 0 &&
+		    of_prop_cmp(prop->name, "linux,phandle") != 0)
+			continue;
+
+		/* must be big enough */
+		if (prop->length < 4)
+			continue;
+
+		/* read phandle value */
+		phandle = be32_to_cpup(prop->value);
+		if (phandle == OF_PHANDLE_ILLEGAL)	/* unresolved */
+			continue;
+
+		/* adjust */
+		*(uint32_t *)prop->value = cpu_to_be32(node->phandle);
+	}
+
+	/* now do the children recursively */
+	for_each_child_of_node(node, child)
+		__of_adjust_tree_phandles(child, phandle_delta);
+}
+
+static int __of_adjust_phandle_ref(struct device_node *node, struct property *rprop, int value, bool is_delta)
+{
+	phandle phandle;
+	struct device_node *refnode;
+	struct property *sprop;
+	char *propval, *propcur, *propend, *nodestr, *propstr, *s;
+	int offset, propcurlen;
+	int err = 0;
+
+	/* make a copy */
+	propval = kmalloc(rprop->length, GFP_KERNEL);
+	if (!propval) {
+		pr_err("%s: Could not copy value of '%s'\n",
+				__func__, rprop->name);
+		return -ENOMEM;
+	}
+	memcpy(propval, rprop->value, rprop->length);
+
+	propend = propval + rprop->length;
+	for (propcur = propval; propcur < propend; propcur += propcurlen + 1) {
+		propcurlen = strlen(propcur);
+
+		nodestr = propcur;
+		s = strchr(propcur, ':');
+		if (!s) {
+			pr_err("%s: Illegal symbol entry '%s' (1)\n",
+				__func__, propcur);
+			err = -EINVAL;
+			goto err_fail;
+		}
+		*s++ = '\0';
+
+		propstr = s;
+		s = strchr(s, ':');
+		if (!s) {
+			pr_err("%s: Illegal symbol entry '%s' (2)\n",
+				__func__, (char *)rprop->value);
+			err = -EINVAL;
+			goto err_fail;
+		}
+
+		*s++ = '\0';
+		err = kstrtoint(s, 10, &offset);
+		if (err != 0) {
+			pr_err("%s: Could get offset '%s'\n",
+				__func__, (char *)rprop->value);
+			goto err_fail;
+		}
+
+		/* look into the resolve node for the full path */
+		refnode = __of_find_node_by_full_name(node, nodestr);
+		if (!refnode) {
+			pr_warn("%s: Could not find refnode '%s'\n",
+				__func__, (char *)rprop->value);
+			continue;
+		}
+
+		/* now find the property */
+		for_each_property_of_node(refnode, sprop) {
+			if (of_prop_cmp(sprop->name, propstr) == 0)
+				break;
+		}
+
+		if (!sprop) {
+			pr_err("%s: Could not find property '%s'\n",
+				__func__, (char *)rprop->value);
+			err = -ENOENT;
+			goto err_fail;
+		}
+
+		phandle = is_delta ? be32_to_cpup(sprop->value + offset) + value : value;
+		*(__be32 *)(sprop->value + offset) = cpu_to_be32(phandle);
+	}
+
+err_fail:
+	kfree(propval);
+	return err;
+}
+
+/*
+ * Adjust the local phandle references by the given phandle delta.
+ * Assumes the existances of a __local_fixups__ node at the root
+ * of the tree. Does not take any devtree locks so make sure you
+ * call this on a tree which is at the detached state.
+ */
+static int __of_adjust_tree_phandle_references(struct device_node *node,
+		int phandle_delta)
+{
+	struct device_node *child;
+	struct property *rprop;
+	int err;
+
+	/* locate the symbols & fixups nodes on resolve */
+	for_each_child_of_node(node, child)
+		if (of_node_cmp(child->name, "__local_fixups__") == 0)
+			break;
+
+	/* no local fixups */
+	if (!child)
+		return 0;
+
+	/* find the local fixups property */
+	for_each_property_of_node(child, rprop) {
+		/* skip properties added automatically */
+		if (of_prop_cmp(rprop->name, "name") == 0)
+			continue;
+
+		err = __of_adjust_phandle_ref(node, rprop, phandle_delta, true);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+/**
+ * of_resolve	- Resolve the given node against the live tree.
+ *
+ * @resolve:	Node to resolve
+ *
+ * Perform dynamic Device Tree resolution against the live tree
+ * to the given node to resolve. This depends on the live tree
+ * having a __symbols__ node, and the resolve node the __fixups__ &
+ * __local_fixups__ nodes (if needed).
+ * The result of the operation is a resolve node that it's contents
+ * are fit to be inserted or operate upon the live tree.
+ * Returns 0 on success or a negative error value on error.
+ */
+int of_resolve_phandles(struct device_node *resolve)
+{
+	struct device_node *child, *refnode;
+	struct device_node *root_sym, *resolve_sym, *resolve_fix;
+	struct property *rprop;
+	const char *refpath;
+	phandle phandle, phandle_delta;
+	int err;
+
+	/* the resolve node must exist, and be detached */
+	if (!resolve || !of_node_check_flag(resolve, OF_DETACHED))
+		return -EINVAL;
+
+	/* first we need to adjust the phandles */
+	phandle_delta = of_get_tree_max_phandle() + 1;
+	__of_adjust_tree_phandles(resolve, phandle_delta);
+	err = __of_adjust_tree_phandle_references(resolve, phandle_delta);
+	if (err != 0)
+		return err;
+
+	root_sym = NULL;
+	resolve_sym = NULL;
+	resolve_fix = NULL;
+
+	/* this may fail (if no fixups are required) */
+	root_sym = of_find_node_by_path("/__symbols__");
+
+	/* locate the symbols & fixups nodes on resolve */
+	for_each_child_of_node(resolve, child) {
+
+		if (!resolve_sym &&
+				of_node_cmp(child->name, "__symbols__") == 0)
+			resolve_sym = child;
+
+		if (!resolve_fix &&
+				of_node_cmp(child->name, "__fixups__") == 0)
+			resolve_fix = child;
+
+		/* both found, don't bother anymore */
+		if (resolve_sym && resolve_fix)
+			break;
+	}
+
+	/* we do allow for the case where no fixups are needed */
+	if (!resolve_fix) {
+		err = 0;	/* no error */
+		goto out;
+	}
+
+	/* we need to fixup, but no root symbols... */
+	if (!root_sym) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	for_each_property_of_node(resolve_fix, rprop) {
+
+		/* skip properties added automatically */
+		if (of_prop_cmp(rprop->name, "name") == 0)
+			continue;
+
+		err = of_property_read_string(root_sym,
+				rprop->name, &refpath);
+		if (err != 0) {
+			pr_err("%s: Could not find symbol '%s'\n",
+					__func__, rprop->name);
+			goto out;
+		}
+
+		refnode = of_find_node_by_path(refpath);
+		if (!refnode) {
+			pr_err("%s: Could not find node by path '%s'\n",
+					__func__, refpath);
+			err = -ENOENT;
+			goto out;
+		}
+
+		phandle = refnode->phandle;
+		of_node_put(refnode);
+
+		pr_debug("%s: %s phandle is 0x%08x\n",
+				__func__, rprop->name, phandle);
+
+		err = __of_adjust_phandle_ref(resolve, rprop, phandle, false);
+		if (err)
+			break;
+	}
+
+out:
+	/* NULL is handled by of_node_put as NOP */
+	of_node_put(root_sym);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(of_resolve_phandles);
diff --git a/include/linux/of.h b/include/linux/of.h
index 6c4363b8ddc3..6545e7aec7bb 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -863,4 +863,7 @@ static inline int of_changeset_update_property(struct of_changeset *ocs,
 }
 #endif
 
+/* CONFIG_OF_RESOLVE api */
+extern int of_resolve_phandles(struct device_node *tree);
+
 #endif /* _LINUX_OF_H */
-- 
cgit v1.2.3


From c8753d55afb436fd6a25c8bbe8d783f6dcf1c9f8 Mon Sep 17 00:00:00 2001
From: Vijay Subramanian <subramanian.vijay@gmail.com>
Date: Thu, 2 Oct 2014 10:00:43 -0700
Subject: net: Cleanup skb cloning by adding SKB_FCLONE_FREE

SKB_FCLONE_UNAVAILABLE has overloaded meaning depending on type of skb.
1: If skb is allocated from head_cache, it indicates fclone is not available.
2: If skb is a companion fclone skb (allocated from fclone_cache), it indicates
it is available to be used.

To avoid confusion for case 2 above, this patch  replaces
SKB_FCLONE_UNAVAILABLE with SKB_FCLONE_FREE where appropriate. For fclone
companion skbs, this indicates it is free for use.

SKB_FCLONE_UNAVAILABLE will now simply indicate skb is from head_cache and
cannot / will not have a companion fclone.

Signed-off-by: Vijay Subramanian <subramanian.vijay@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 7 ++++---
 net/core/skbuff.c      | 8 ++++----
 2 files changed, 8 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 7c5036d11feb..3a5ec7638627 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -339,9 +339,10 @@ struct skb_shared_info {
 
 
 enum {
-	SKB_FCLONE_UNAVAILABLE,
-	SKB_FCLONE_ORIG,
-	SKB_FCLONE_CLONE,
+	SKB_FCLONE_UNAVAILABLE,	/* skb has no fclone (from head_cache) */
+	SKB_FCLONE_ORIG,	/* orig skb (from fclone_cache) */
+	SKB_FCLONE_CLONE,	/* companion fclone skb (from fclone_cache) */
+	SKB_FCLONE_FREE,	/* this companion fclone skb is available */
 };
 
 enum {
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index a0b312fa3047..28916e47f959 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -265,7 +265,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 		skb->fclone = SKB_FCLONE_ORIG;
 		atomic_set(&fclones->fclone_ref, 1);
 
-		fclones->skb2.fclone = SKB_FCLONE_UNAVAILABLE;
+		fclones->skb2.fclone = SKB_FCLONE_FREE;
 		fclones->skb2.pfmemalloc = pfmemalloc;
 	}
 out:
@@ -542,7 +542,7 @@ static void kfree_skbmem(struct sk_buff *skb)
 		fclones = container_of(skb, struct sk_buff_fclones, skb2);
 
 		/* Warning : We must perform the atomic_dec_and_test() before
-		 * setting skb->fclone back to SKB_FCLONE_UNAVAILABLE, otherwise
+		 * setting skb->fclone back to SKB_FCLONE_FREE, otherwise
 		 * skb_clone() could set clone_ref to 2 before our decrement.
 		 * Anyway, if we are going to free the structure, no need to
 		 * rewrite skb->fclone.
@@ -553,7 +553,7 @@ static void kfree_skbmem(struct sk_buff *skb)
 			/* The clone portion is available for
 			 * fast-cloning again.
 			 */
-			skb->fclone = SKB_FCLONE_UNAVAILABLE;
+			skb->fclone = SKB_FCLONE_FREE;
 		}
 		break;
 	}
@@ -874,7 +874,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
 		return NULL;
 
 	if (skb->fclone == SKB_FCLONE_ORIG &&
-	    n->fclone == SKB_FCLONE_UNAVAILABLE) {
+	    n->fclone == SKB_FCLONE_FREE) {
 		n->fclone = SKB_FCLONE_CLONE;
 		/* As our fastclone was free, clone_ref must be 1 at this point.
 		 * We could use atomic_inc() here, but it is faster
-- 
cgit v1.2.3


From 7dfa4b414d4eec8da56e44fb2b4aea3e549b092f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 5 Oct 2014 12:35:09 +0300
Subject: net/mlx4_en: Code cleanups in tx path

- Remove unused variable ring->poll_cnt
- No need to set some fields if using blueflame
- Add missing const's
- Use unlikely
- Remove unneeded new line
- Make some comments more precise
- struct mlx4_bf @offset field reduced to unsigned int to save space

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_tx.c   | 49 +++++++++++++++-------------
 drivers/net/ethernet/mellanox/mlx4/mlx4_en.h |  1 -
 include/linux/mlx4/device.h                  |  2 +-
 3 files changed, 27 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 0c501253fdab..eaf23eb7266a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -191,7 +191,6 @@ int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv,
 	ring->prod = 0;
 	ring->cons = 0xffffffff;
 	ring->last_nr_txbb = 1;
-	ring->poll_cnt = 0;
 	memset(ring->tx_info, 0, ring->size * sizeof(struct mlx4_en_tx_info));
 	memset(ring->buf, 0, ring->buf_size);
 
@@ -512,7 +511,8 @@ static struct mlx4_en_tx_desc *mlx4_en_bounce_to_desc(struct mlx4_en_priv *priv,
 	return ring->buf + index * TXBB_SIZE;
 }
 
-static int is_inline(int inline_thold, struct sk_buff *skb, void **pfrag)
+static bool is_inline(int inline_thold, const struct sk_buff *skb,
+		      void **pfrag)
 {
 	void *ptr;
 
@@ -535,7 +535,7 @@ static int is_inline(int inline_thold, struct sk_buff *skb, void **pfrag)
 	return 0;
 }
 
-static int inline_size(struct sk_buff *skb)
+static int inline_size(const struct sk_buff *skb)
 {
 	if (skb->len + CTRL_SIZE + sizeof(struct mlx4_wqe_inline_seg)
 	    <= MLX4_INLINE_ALIGN)
@@ -546,7 +546,8 @@ static int inline_size(struct sk_buff *skb)
 			     sizeof(struct mlx4_wqe_inline_seg), 16);
 }
 
-static int get_real_size(struct sk_buff *skb, struct net_device *dev,
+static int get_real_size(const struct sk_buff *skb,
+			 struct net_device *dev,
 			 int *lso_header_size)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
@@ -581,8 +582,10 @@ static int get_real_size(struct sk_buff *skb, struct net_device *dev,
 	return real_size;
 }
 
-static void build_inline_wqe(struct mlx4_en_tx_desc *tx_desc, struct sk_buff *skb,
-			     int real_size, u16 *vlan_tag, int tx_ind, void *fragptr)
+static void build_inline_wqe(struct mlx4_en_tx_desc *tx_desc,
+			     const struct sk_buff *skb,
+			     int real_size, u16 *vlan_tag,
+			     int tx_ind, void *fragptr)
 {
 	struct mlx4_wqe_inline_seg *inl = &tx_desc->inl;
 	int spc = MLX4_INLINE_ALIGN - CTRL_SIZE - sizeof *inl;
@@ -642,7 +645,8 @@ u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb,
 	return fallback(dev, skb) % rings_p_up + up * rings_p_up;
 }
 
-static void mlx4_bf_copy(void __iomem *dst, unsigned long *src, unsigned bytecnt)
+static void mlx4_bf_copy(void __iomem *dst, const void *src,
+			 unsigned int bytecnt)
 {
 	__iowrite64_copy(dst, src, bytecnt / 8);
 }
@@ -736,11 +740,10 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
 	tx_info->skb = skb;
 	tx_info->nr_txbb = nr_txbb;
 
+	data = &tx_desc->data;
 	if (lso_header_size)
 		data = ((void *)&tx_desc->lso + ALIGN(lso_header_size + 4,
 						      DS_SIZE));
-	else
-		data = &tx_desc->data;
 
 	/* valid only for none inline segments */
 	tx_info->data_offset = (void *)data - (void *)tx_desc;
@@ -753,9 +756,9 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (is_inline(ring->inline_thold, skb, &fragptr)) {
 		tx_info->inl = 1;
 	} else {
-		/* Map fragments */
+		/* Map fragments if any */
 		for (i = skb_shinfo(skb)->nr_frags - 1; i >= 0; i--) {
-			struct skb_frag_struct *frag;
+			const struct skb_frag_struct *frag;
 			dma_addr_t dma;
 
 			frag = &skb_shinfo(skb)->frags[i];
@@ -772,7 +775,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
 			--data;
 		}
 
-		/* Map linear part */
+		/* Map linear part if needed */
 		if (tx_info->linear) {
 			u32 byte_count = skb_headlen(skb) - lso_header_size;
 			dma_addr_t dma;
@@ -795,18 +798,14 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
 	 * For timestamping add flag to skb_shinfo and
 	 * set flag for further reference
 	 */
-	if (ring->hwtstamp_tx_type == HWTSTAMP_TX_ON &&
-	    skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) {
-		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+	if (unlikely(ring->hwtstamp_tx_type == HWTSTAMP_TX_ON &&
+		     shinfo->tx_flags & SKBTX_HW_TSTAMP)) {
+		shinfo->tx_flags |= SKBTX_IN_PROGRESS;
 		tx_info->ts_requested = 1;
 	}
 
 	/* Prepare ctrl segement apart opcode+ownership, which depends on
 	 * whether LSO is used */
-	tx_desc->ctrl.vlan_tag = cpu_to_be16(vlan_tag);
-	tx_desc->ctrl.ins_vlan = MLX4_WQE_CTRL_INS_VLAN *
-		!!vlan_tx_tag_present(skb);
-	tx_desc->ctrl.fence_size = (real_size / 16) & 0x3f;
 	tx_desc->ctrl.srcrb_flags = priv->ctrl_flags;
 	if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
 		tx_desc->ctrl.srcrb_flags |= cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM |
@@ -852,7 +851,6 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
 			 cpu_to_be32(MLX4_EN_BIT_DESC_OWN) : 0);
 		tx_info->nr_bytes = max_t(unsigned int, skb->len, ETH_ZLEN);
 		ring->packets++;
-
 	}
 	ring->bytes += tx_info->nr_bytes;
 	netdev_tx_sent_queue(ring->tx_queue, tx_info->nr_bytes);
@@ -874,7 +872,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
 	ring->prod += nr_txbb;
 
 	/* If we used a bounce buffer then copy descriptor back into place */
-	if (bounce)
+	if (unlikely(bounce))
 		tx_desc = mlx4_en_bounce_to_desc(priv, ring, index, desc_size);
 
 	skb_tx_timestamp(skb);
@@ -894,13 +892,18 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
 
 		wmb();
 
-		mlx4_bf_copy(ring->bf.reg + ring->bf.offset, (unsigned long *) &tx_desc->ctrl,
-		     desc_size);
+		mlx4_bf_copy(ring->bf.reg + ring->bf.offset, &tx_desc->ctrl,
+			     desc_size);
 
 		wmb();
 
 		ring->bf.offset ^= ring->bf.buf_size;
 	} else {
+		tx_desc->ctrl.vlan_tag = cpu_to_be16(vlan_tag);
+		tx_desc->ctrl.ins_vlan = MLX4_WQE_CTRL_INS_VLAN *
+			!!vlan_tx_tag_present(skb);
+		tx_desc->ctrl.fence_size = real_size;
+
 		/* Ensure new descriptor hits memory
 		 * before setting ownership of this descriptor to HW
 		 */
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index 84c9d5dbfa4f..e54b653de3d4 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -263,7 +263,6 @@ struct mlx4_en_tx_ring {
 	u32 buf_size;
 	u32 doorbell_qpn;
 	void *buf;
-	u16 poll_cnt;
 	struct mlx4_en_tx_info *tx_info;
 	u8 *bounce_buf;
 	u8 queue_index;
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index b2f8ab9a57c4..37e4404d0227 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -583,7 +583,7 @@ struct mlx4_uar {
 };
 
 struct mlx4_bf {
-	unsigned long		offset;
+	unsigned int		offset;
 	int			buf_size;
 	struct mlx4_uar	       *uar;
 	void __iomem	       *reg;
-- 
cgit v1.2.3


From a6551a76fff15056fde2342d0f7de41ee605264e Mon Sep 17 00:00:00 2001
From: Andrew Bresticker <abrestic@chromium.org>
Date: Thu, 18 Sep 2014 17:18:56 +0200
Subject: mfd: cros_ec: stop calling ->cmd_xfer() directly

Instead of having users of the ChromeOS EC call the interface-specific
cmd_xfer() callback directly, introduce a central cros_ec_cmd_xfer()
to use instead.  This will allow us to put all the locking and retry
logic in one place instead of duplicating it across the different
drivers.

Signed-off-by: Andrew Bresticker <abrestic@chromium.org>
Reviewed-by: Simon Glass <sjg@chromium.org>
Signed-off-by: Javier Martinez Canillas <javier.martinez@collabora.co.uk>
Reviewed-by: Doug Anderson <dianders@chromium.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/i2c/busses/i2c-cros-ec-tunnel.c |  2 +-
 drivers/input/keyboard/cros_ec_keyb.c   |  2 +-
 drivers/mfd/cros_ec.c                   |  7 +++++++
 include/linux/mfd/cros_ec.h             | 24 ++++++++++++++++++------
 4 files changed, 27 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/i2c/busses/i2c-cros-ec-tunnel.c b/drivers/i2c/busses/i2c-cros-ec-tunnel.c
index 05e033c98115..43be23dfb115 100644
--- a/drivers/i2c/busses/i2c-cros-ec-tunnel.c
+++ b/drivers/i2c/busses/i2c-cros-ec-tunnel.c
@@ -227,7 +227,7 @@ static int ec_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg i2c_msgs[],
 	msg.indata = response;
 	msg.insize = response_len;
 
-	result = bus->ec->cmd_xfer(bus->ec, &msg);
+	result = cros_ec_cmd_xfer(bus->ec, &msg);
 	if (result < 0)
 		goto exit;
 
diff --git a/drivers/input/keyboard/cros_ec_keyb.c b/drivers/input/keyboard/cros_ec_keyb.c
index 791781ade4e7..93111d1aa617 100644
--- a/drivers/input/keyboard/cros_ec_keyb.c
+++ b/drivers/input/keyboard/cros_ec_keyb.c
@@ -182,7 +182,7 @@ static int cros_ec_keyb_get_state(struct cros_ec_keyb *ckdev, uint8_t *kb_state)
 		.insize = ckdev->cols,
 	};
 
-	return ckdev->ec->cmd_xfer(ckdev->ec, &msg);
+	return cros_ec_cmd_xfer(ckdev->ec, &msg);
 }
 
 static irqreturn_t cros_ec_keyb_irq(int irq, void *data)
diff --git a/drivers/mfd/cros_ec.c b/drivers/mfd/cros_ec.c
index 4873f9c50452..a9faebdcfa14 100644
--- a/drivers/mfd/cros_ec.c
+++ b/drivers/mfd/cros_ec.c
@@ -62,6 +62,13 @@ int cros_ec_check_result(struct cros_ec_device *ec_dev,
 }
 EXPORT_SYMBOL(cros_ec_check_result);
 
+int cros_ec_cmd_xfer(struct cros_ec_device *ec_dev,
+		     struct cros_ec_command *msg)
+{
+	return ec_dev->cmd_xfer(ec_dev, msg);
+}
+EXPORT_SYMBOL(cros_ec_cmd_xfer);
+
 static const struct mfd_cell cros_devs[] = {
 	{
 		.name = "cros-ec-keyb",
diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h
index fcbe9d129a9d..0e166b92f5b4 100644
--- a/include/linux/mfd/cros_ec.h
+++ b/include/linux/mfd/cros_ec.h
@@ -62,10 +62,6 @@ struct cros_ec_command {
  * @dev: Device pointer
  * @was_wake_device: true if this device was set to wake the system from
  * sleep at the last suspend
- * @cmd_xfer: send command to EC and get response
- *     Returns the number of bytes received if the communication succeeded, but
- *     that doesn't mean the EC was happy with the command. The caller
- *     should check msg.result for the EC's result code.
  *
  * @priv: Private data
  * @irq: Interrupt to use
@@ -82,6 +78,10 @@ struct cros_ec_command {
  * @dout_size: size of dout buffer to allocate (zero to use static dout)
  * @parent: pointer to parent device (e.g. i2c or spi device)
  * @wake_enabled: true if this device can wake the system from sleep
+ * @cmd_xfer: send command to EC and get response
+ *     Returns the number of bytes received if the communication succeeded, but
+ *     that doesn't mean the EC was happy with the command. The caller
+ *     should check msg.result for the EC's result code.
  * @lock: one transaction at a time
  */
 struct cros_ec_device {
@@ -92,8 +92,6 @@ struct cros_ec_device {
 	struct device *dev;
 	bool was_wake_device;
 	struct class *cros_class;
-	int (*cmd_xfer)(struct cros_ec_device *ec,
-			struct cros_ec_command *msg);
 
 	/* These are used to implement the platform-specific interface */
 	void *priv;
@@ -104,6 +102,8 @@ struct cros_ec_device {
 	int dout_size;
 	struct device *parent;
 	bool wake_enabled;
+	int (*cmd_xfer)(struct cros_ec_device *ec,
+			struct cros_ec_command *msg);
 	struct mutex lock;
 };
 
@@ -152,6 +152,18 @@ int cros_ec_prepare_tx(struct cros_ec_device *ec_dev,
 int cros_ec_check_result(struct cros_ec_device *ec_dev,
 			 struct cros_ec_command *msg);
 
+/**
+ * cros_ec_cmd_xfer - Send a command to the ChromeOS EC
+ *
+ * Call this to send a command to the ChromeOS EC.  This should be used
+ * instead of calling the EC's cmd_xfer() callback directly.
+ *
+ * @ec_dev: EC device
+ * @msg: Message to write
+ */
+int cros_ec_cmd_xfer(struct cros_ec_device *ec_dev,
+		     struct cros_ec_command *msg);
+
 /**
  * cros_ec_remove - Remove a ChromeOS EC
  *
-- 
cgit v1.2.3


From fcbeb976d7ce783fd58e63e61c196d9a8912b3be Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 5 Oct 2014 10:11:27 -0700
Subject: net: introduce netdevice gso_min_segs attribute

Some TSO engines might have a too heavy setup cost, that impacts
performance on hosts sending small bursts (2 MSS per packet).

This patch adds a device gso_min_segs, allowing drivers to set
a minimum segment size for TSO packets, according to the NIC
performance.

Tested on a mlx4 NIC, this allows to get a ~110% increase of
throughput when sending 2 MSS per packet.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 4 +++-
 net/core/dev.c            | 9 ++++++---
 2 files changed, 9 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 22d54b9b700d..2df86f50261c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1416,6 +1416,8 @@ enum netdev_priv_flags {
  *	@gso_max_size:	Maximum size of generic segmentation offload
  *	@gso_max_segs:	Maximum number of segments that can be passed to the
  *			NIC for GSO
+ *	@gso_min_segs:	Minimum number of segments that can be passed to the
+ *			NIC for GSO
  *
  *	@dcbnl_ops:	Data Center Bridging netlink ops
  *	@num_tc:	Number of traffic classes in the net device
@@ -1666,7 +1668,7 @@ struct net_device {
 	unsigned int		gso_max_size;
 #define GSO_MAX_SEGS		65535
 	u16			gso_max_segs;
-
+	u16			gso_min_segs;
 #ifdef CONFIG_DCB
 	const struct dcbnl_rtnl_ops *dcbnl_ops;
 #endif
diff --git a/net/core/dev.c b/net/core/dev.c
index 7d5691cc1f47..c1a4de275576 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2567,10 +2567,12 @@ static netdev_features_t harmonize_features(struct sk_buff *skb,
 
 netdev_features_t netif_skb_features(struct sk_buff *skb)
 {
+	const struct net_device *dev = skb->dev;
+	netdev_features_t features = dev->features;
+	u16 gso_segs = skb_shinfo(skb)->gso_segs;
 	__be16 protocol = skb->protocol;
-	netdev_features_t features = skb->dev->features;
 
-	if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs)
+	if (gso_segs > dev->gso_max_segs || gso_segs < dev->gso_min_segs)
 		features &= ~NETIF_F_GSO_MASK;
 
 	if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) {
@@ -2581,7 +2583,7 @@ netdev_features_t netif_skb_features(struct sk_buff *skb)
 	}
 
 	features = netdev_intersect_features(features,
-					     skb->dev->vlan_features |
+					     dev->vlan_features |
 					     NETIF_F_HW_VLAN_CTAG_TX |
 					     NETIF_F_HW_VLAN_STAG_TX);
 
@@ -6661,6 +6663,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 
 	dev->gso_max_size = GSO_MAX_SIZE;
 	dev->gso_max_segs = GSO_MAX_SEGS;
+	dev->gso_min_segs = 0;
 
 	INIT_LIST_HEAD(&dev->napi_list);
 	INIT_LIST_HEAD(&dev->unreg_list);
-- 
cgit v1.2.3


From fd2ef0ba3071c92ac6272ab22ea3f2b16d88a4eb Mon Sep 17 00:00:00 2001
From: Petri Gynther <pgynther@google.com>
Date: Mon, 6 Oct 2014 11:38:30 -0700
Subject: net: phy: adjust fixed_phy_register() return value

Adjust fixed_phy_register() to return struct phy_device *, so that
it becomes easy to use fixed PHYs without device tree support:

  phydev = fixed_phy_register(PHY_POLL, &fixed_phy_status, NULL);
  fixed_phy_set_link_update(phydev, fixed_phy_link_update);
  phy_connect_direct(netdev, phydev, handler_fn, phy_interface);

This change is a prerequisite for modifying bcmgenet driver to work
without a device tree on Broadcom's MIPS-based 7xxx platforms.

Signed-off-by: Petri Gynther <pgynther@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/fixed.c   | 16 ++++++++--------
 drivers/of/of_mdio.c      |  7 +++++--
 include/linux/phy_fixed.h | 14 +++++++-------
 3 files changed, 20 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/phy/fixed.c b/drivers/net/phy/fixed.c
index 5b19fbbda6d4..47872caa0081 100644
--- a/drivers/net/phy/fixed.c
+++ b/drivers/net/phy/fixed.c
@@ -233,9 +233,9 @@ EXPORT_SYMBOL_GPL(fixed_phy_del);
 static int phy_fixed_addr;
 static DEFINE_SPINLOCK(phy_fixed_addr_lock);
 
-int fixed_phy_register(unsigned int irq,
-		       struct fixed_phy_status *status,
-		       struct device_node *np)
+struct phy_device *fixed_phy_register(unsigned int irq,
+				      struct fixed_phy_status *status,
+				      struct device_node *np)
 {
 	struct fixed_mdio_bus *fmb = &platform_fmb;
 	struct phy_device *phy;
@@ -246,19 +246,19 @@ int fixed_phy_register(unsigned int irq,
 	spin_lock(&phy_fixed_addr_lock);
 	if (phy_fixed_addr == PHY_MAX_ADDR) {
 		spin_unlock(&phy_fixed_addr_lock);
-		return -ENOSPC;
+		return ERR_PTR(-ENOSPC);
 	}
 	phy_addr = phy_fixed_addr++;
 	spin_unlock(&phy_fixed_addr_lock);
 
 	ret = fixed_phy_add(PHY_POLL, phy_addr, status);
 	if (ret < 0)
-		return ret;
+		return ERR_PTR(ret);
 
 	phy = get_phy_device(fmb->mii_bus, phy_addr, false);
 	if (!phy || IS_ERR(phy)) {
 		fixed_phy_del(phy_addr);
-		return -EINVAL;
+		return ERR_PTR(-EINVAL);
 	}
 
 	of_node_get(np);
@@ -269,10 +269,10 @@ int fixed_phy_register(unsigned int irq,
 		phy_device_free(phy);
 		of_node_put(np);
 		fixed_phy_del(phy_addr);
-		return ret;
+		return ERR_PTR(ret);
 	}
 
-	return 0;
+	return phy;
 }
 
 static int __init fixed_mdio_bus_init(void)
diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c
index a85d80012993..1bd43053b8c7 100644
--- a/drivers/of/of_mdio.c
+++ b/drivers/of/of_mdio.c
@@ -286,6 +286,7 @@ int of_phy_register_fixed_link(struct device_node *np)
 	struct device_node *fixed_link_node;
 	const __be32 *fixed_link_prop;
 	int len;
+	struct phy_device *phy;
 
 	/* New binding */
 	fixed_link_node = of_get_child_by_name(np, "fixed-link");
@@ -299,7 +300,8 @@ int of_phy_register_fixed_link(struct device_node *np)
 		status.asym_pause = of_property_read_bool(fixed_link_node,
 							  "asym-pause");
 		of_node_put(fixed_link_node);
-		return fixed_phy_register(PHY_POLL, &status, np);
+		phy = fixed_phy_register(PHY_POLL, &status, np);
+		return IS_ERR(phy) ? PTR_ERR(phy) : 0;
 	}
 
 	/* Old binding */
@@ -310,7 +312,8 @@ int of_phy_register_fixed_link(struct device_node *np)
 		status.speed = be32_to_cpu(fixed_link_prop[2]);
 		status.pause = be32_to_cpu(fixed_link_prop[3]);
 		status.asym_pause = be32_to_cpu(fixed_link_prop[4]);
-		return fixed_phy_register(PHY_POLL, &status, np);
+		phy = fixed_phy_register(PHY_POLL, &status, np);
+		return IS_ERR(phy) ? PTR_ERR(phy) : 0;
 	}
 
 	return -ENODEV;
diff --git a/include/linux/phy_fixed.h b/include/linux/phy_fixed.h
index 941138664c1d..f2ca1b459377 100644
--- a/include/linux/phy_fixed.h
+++ b/include/linux/phy_fixed.h
@@ -14,9 +14,9 @@ struct device_node;
 #ifdef CONFIG_FIXED_PHY
 extern int fixed_phy_add(unsigned int irq, int phy_id,
 			 struct fixed_phy_status *status);
-extern int fixed_phy_register(unsigned int irq,
-			      struct fixed_phy_status *status,
-			      struct device_node *np);
+extern struct phy_device *fixed_phy_register(unsigned int irq,
+					     struct fixed_phy_status *status,
+					     struct device_node *np);
 extern void fixed_phy_del(int phy_addr);
 extern int fixed_phy_set_link_update(struct phy_device *phydev,
 			int (*link_update)(struct net_device *,
@@ -27,11 +27,11 @@ static inline int fixed_phy_add(unsigned int irq, int phy_id,
 {
 	return -ENODEV;
 }
-static inline int fixed_phy_register(unsigned int irq,
-				     struct fixed_phy_status *status,
-				     struct device_node *np)
+static inline struct phy_device *fixed_phy_register(unsigned int irq,
+						struct fixed_phy_status *status,
+						struct device_node *np)
 {
-	return -ENODEV;
+	return ERR_PTR(-ENODEV);
 }
 static inline int fixed_phy_del(int phy_addr)
 {
-- 
cgit v1.2.3


From 0287587884b15041203b3a362d485e1ab1f24445 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 5 Oct 2014 18:38:35 -0700
Subject: net: better IFF_XMIT_DST_RELEASE support

Testing xmit_more support with netperf and connected UDP sockets,
I found strange dst refcount false sharing.

Current handling of IFF_XMIT_DST_RELEASE is not optimal.

Dropping dst in validate_xmit_skb() is certainly too late in case
packet was queued by cpu X but dequeued by cpu Y

The logical point to take care of drop/force is in __dev_queue_xmit()
before even taking qdisc lock.

As Julian Anastasov pointed out, need for skb_dst() might come from some
packet schedulers or classifiers.

This patch adds new helper to cleanly express needs of various drivers
or qdiscs/classifiers.

Drivers that need skb_dst() in their ndo_start_xmit() should call
following helper in their setup instead of the prior :

	dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
->
	netif_keep_dst(dev);

Instead of using a single bit, we use two bits, one being
eventually rebuilt in bonding/team drivers.

The other one, is permanent and blocks IFF_XMIT_DST_RELEASE being
rebuilt in bonding/team. Eventually, we could add something
smarter later.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Julian Anastasov <ja@ssi.bg>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/ulp/ipoib/ipoib_main.c |  2 +-
 drivers/net/appletalk/ipddp.c             |  2 +-
 drivers/net/bonding/bond_main.c           |  9 ++++++---
 drivers/net/eql.c                         |  2 +-
 drivers/net/ifb.c                         |  3 ++-
 drivers/net/loopback.c                    |  2 +-
 drivers/net/macvlan.c                     |  3 ++-
 drivers/net/ppp/ppp_generic.c             |  2 +-
 drivers/net/team/team.c                   |  8 +++++---
 drivers/net/vxlan.c                       |  2 +-
 drivers/net/wan/hdlc_fr.c                 |  2 +-
 drivers/s390/net/qeth_l3_main.c           |  2 +-
 include/linux/netdevice.h                 |  8 ++++++++
 net/8021q/vlan_dev.c                      |  3 ++-
 net/atm/clip.c                            |  2 +-
 net/core/dev.c                            | 19 +++++++++----------
 net/ipv4/ip_gre.c                         |  2 +-
 net/ipv4/ip_vti.c                         |  2 +-
 net/ipv4/ipip.c                           |  2 +-
 net/ipv6/ip6_gre.c                        |  2 +-
 net/ipv6/ip6_tunnel.c                     |  2 +-
 net/ipv6/ip6_vti.c                        |  2 +-
 net/ipv6/sit.c                            |  2 +-
 net/sched/cls_flow.c                      |  2 ++
 net/sched/cls_route.c                     |  1 +
 net/sched/sch_generic.c                   |  3 ---
 net/sched/sch_teql.c                      |  2 +-
 27 files changed, 54 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 13e6e0431592..58b5aa3b6f2d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1364,7 +1364,7 @@ void ipoib_setup(struct net_device *dev)
 	dev->tx_queue_len	 = ipoib_sendq_size * 2;
 	dev->features		 = (NETIF_F_VLAN_CHALLENGED	|
 				    NETIF_F_HIGHDMA);
-	dev->priv_flags		&= ~IFF_XMIT_DST_RELEASE;
+	netif_keep_dst(dev);
 
 	memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN);
 
diff --git a/drivers/net/appletalk/ipddp.c b/drivers/net/appletalk/ipddp.c
index 10d0dba572c2..e90c6a7333d7 100644
--- a/drivers/net/appletalk/ipddp.c
+++ b/drivers/net/appletalk/ipddp.c
@@ -74,7 +74,7 @@ static struct net_device * __init ipddp_init(void)
 	if (!dev)
 		return ERR_PTR(-ENOMEM);
 
-	dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+	netif_keep_dst(dev);
 	strcpy(dev->name, "ipddp%d");
 
 	if (version_printed++ == 0)
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 3ad5413d4f57..c9ac06cfe6b7 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1002,7 +1002,8 @@ static netdev_features_t bond_fix_features(struct net_device *dev,
 
 static void bond_compute_features(struct bonding *bond)
 {
-	unsigned int flags, dst_release_flag = IFF_XMIT_DST_RELEASE;
+	unsigned int dst_release_flag = IFF_XMIT_DST_RELEASE |
+					IFF_XMIT_DST_RELEASE_PERM;
 	netdev_features_t vlan_features = BOND_VLAN_FEATURES;
 	netdev_features_t enc_features  = BOND_ENC_FEATURES;
 	struct net_device *bond_dev = bond->dev;
@@ -1038,8 +1039,10 @@ done:
 	bond_dev->gso_max_segs = gso_max_segs;
 	netif_set_gso_max_size(bond_dev, gso_max_size);
 
-	flags = bond_dev->priv_flags & ~IFF_XMIT_DST_RELEASE;
-	bond_dev->priv_flags = flags | dst_release_flag;
+	bond_dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+	if ((bond_dev->priv_flags & IFF_XMIT_DST_RELEASE_PERM) &&
+	    dst_release_flag == (IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM))
+		bond_dev->priv_flags |= IFF_XMIT_DST_RELEASE;
 
 	netdev_change_features(bond_dev);
 }
diff --git a/drivers/net/eql.c b/drivers/net/eql.c
index 957e5c0cede3..a10ad74cc8d2 100644
--- a/drivers/net/eql.c
+++ b/drivers/net/eql.c
@@ -199,7 +199,7 @@ static void __init eql_setup(struct net_device *dev)
 
 	dev->type       	= ARPHRD_SLIP;
 	dev->tx_queue_len 	= 5;		/* Hands them off fast */
-	dev->priv_flags	       &= ~IFF_XMIT_DST_RELEASE;
+	netif_keep_dst(dev);
 }
 
 static int eql_open(struct net_device *dev)
diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c
index d2d4a3d2237f..34f846b4bd05 100644
--- a/drivers/net/ifb.c
+++ b/drivers/net/ifb.c
@@ -185,7 +185,8 @@ static void ifb_setup(struct net_device *dev)
 
 	dev->flags |= IFF_NOARP;
 	dev->flags &= ~IFF_MULTICAST;
-	dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);
+	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+	netif_keep_dst(dev);
 	eth_hw_addr_random(dev);
 }
 
diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index 8f2262540561..c76283c2f84a 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -169,7 +169,7 @@ static void loopback_setup(struct net_device *dev)
 	dev->type		= ARPHRD_LOOPBACK;	/* 0x0001*/
 	dev->flags		= IFF_LOOPBACK;
 	dev->priv_flags		|= IFF_LIVE_ADDR_CHANGE;
-	dev->priv_flags	       &= ~IFF_XMIT_DST_RELEASE;
+	netif_keep_dst(dev);
 	dev->hw_features	= NETIF_F_ALL_TSO | NETIF_F_UFO;
 	dev->features 		= NETIF_F_SG | NETIF_F_FRAGLIST
 		| NETIF_F_ALL_TSO
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index e8a453f1b458..38b4fae61f04 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -1025,7 +1025,8 @@ void macvlan_common_setup(struct net_device *dev)
 {
 	ether_setup(dev);
 
-	dev->priv_flags	       &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);
+	dev->priv_flags	       &= ~IFF_TX_SKB_SHARING;
+	netif_keep_dst(dev);
 	dev->priv_flags	       |= IFF_UNICAST_FLT;
 	dev->netdev_ops		= &macvlan_netdev_ops;
 	dev->destructor		= free_netdev;
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index fa0d71727894..80e6f3430f65 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -1103,7 +1103,7 @@ static void ppp_setup(struct net_device *dev)
 	dev->type = ARPHRD_PPP;
 	dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST;
 	dev->features |= NETIF_F_NETNS_LOCAL;
-	dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+	netif_keep_dst(dev);
 }
 
 /*
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 2277c3679a51..a94a9df3e6bd 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -970,7 +970,8 @@ static void __team_compute_features(struct team *team)
 	struct team_port *port;
 	u32 vlan_features = TEAM_VLAN_FEATURES & NETIF_F_ALL_FOR_ALL;
 	unsigned short max_hard_header_len = ETH_HLEN;
-	unsigned int flags, dst_release_flag = IFF_XMIT_DST_RELEASE;
+	unsigned int dst_release_flag = IFF_XMIT_DST_RELEASE |
+					IFF_XMIT_DST_RELEASE_PERM;
 
 	list_for_each_entry(port, &team->port_list, list) {
 		vlan_features = netdev_increment_features(vlan_features,
@@ -985,8 +986,9 @@ static void __team_compute_features(struct team *team)
 	team->dev->vlan_features = vlan_features;
 	team->dev->hard_header_len = max_hard_header_len;
 
-	flags = team->dev->priv_flags & ~IFF_XMIT_DST_RELEASE;
-	team->dev->priv_flags = flags | dst_release_flag;
+	team->dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+	if (dst_release_flag == (IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM))
+		team->dev->priv_flags |= IFF_XMIT_DST_RELEASE;
 
 	netdev_change_features(team->dev);
 }
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 2af795d6ba05..2a51e6e48e1e 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -2193,7 +2193,7 @@ static void vxlan_setup(struct net_device *dev)
 	dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM;
 	dev->hw_features |= NETIF_F_GSO_SOFTWARE;
 	dev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
-	dev->priv_flags	&= ~IFF_XMIT_DST_RELEASE;
+	netif_keep_dst(dev);
 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
 
 	INIT_LIST_HEAD(&vxlan->next);
diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c
index e5c7e6165a4b..3ebed1c40abb 100644
--- a/drivers/net/wan/hdlc_fr.c
+++ b/drivers/net/wan/hdlc_fr.c
@@ -1047,7 +1047,7 @@ static void pvc_setup(struct net_device *dev)
 	dev->flags = IFF_POINTOPOINT;
 	dev->hard_header_len = 10;
 	dev->addr_len = 2;
-	dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+	netif_keep_dst(dev);
 }
 
 static const struct net_device_ops pvc_ops = {
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index f8427a2c4840..afebb9709763 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -3306,7 +3306,7 @@ static int qeth_l3_setup_netdev(struct qeth_card *card)
 	card->dev->features |=	NETIF_F_HW_VLAN_CTAG_TX |
 				NETIF_F_HW_VLAN_CTAG_RX |
 				NETIF_F_HW_VLAN_CTAG_FILTER;
-	card->dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+	netif_keep_dst(card->dev);
 	card->dev->gso_max_size = 15 * PAGE_SIZE;
 
 	SET_NETDEV_DEV(card->dev, &card->gdev->dev);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2df86f50261c..3a4315b39d20 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1206,6 +1206,7 @@ enum netdev_priv_flags {
 	IFF_SUPP_NOFCS			= 1<<19,
 	IFF_LIVE_ADDR_CHANGE		= 1<<20,
 	IFF_MACVLAN			= 1<<21,
+	IFF_XMIT_DST_RELEASE_PERM	= 1<<22,
 };
 
 #define IFF_802_1Q_VLAN			IFF_802_1Q_VLAN
@@ -1230,6 +1231,7 @@ enum netdev_priv_flags {
 #define IFF_SUPP_NOFCS			IFF_SUPP_NOFCS
 #define IFF_LIVE_ADDR_CHANGE		IFF_LIVE_ADDR_CHANGE
 #define IFF_MACVLAN			IFF_MACVLAN
+#define IFF_XMIT_DST_RELEASE_PERM	IFF_XMIT_DST_RELEASE_PERM
 
 /**
  *	struct net_device - The DEVICE structure.
@@ -3588,6 +3590,12 @@ static inline bool netif_supports_nofcs(struct net_device *dev)
 	return dev->priv_flags & IFF_SUPP_NOFCS;
 }
 
+/* This device needs to keep skb dst for qdisc enqueue or ndo_start_xmit() */
+static inline void netif_keep_dst(struct net_device *dev)
+{
+	dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM);
+}
+
 extern struct pernet_operations __net_initdata loopback_net_ops;
 
 /* Logging, debugging and troubleshooting/diagnostic helpers. */
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 35a6b6b15e8a..0d441ec8763e 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -799,7 +799,8 @@ void vlan_setup(struct net_device *dev)
 	ether_setup(dev);
 
 	dev->priv_flags		|= IFF_802_1Q_VLAN;
-	dev->priv_flags		&= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);
+	dev->priv_flags		&= ~IFF_TX_SKB_SHARING;
+	netif_keep_dst(dev);
 	dev->tx_queue_len	= 0;
 
 	dev->netdev_ops		= &vlan_netdev_ops;
diff --git a/net/atm/clip.c b/net/atm/clip.c
index 1d9eaa4f041a..17e55dfecbe2 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -501,7 +501,7 @@ static void clip_setup(struct net_device *dev)
 	/* without any more elaborate queuing. 100 is a reasonable */
 	/* compromise between decent burst-tolerance and protection */
 	/* against memory hogs. */
-	dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+	netif_keep_dst(dev);
 }
 
 static int clip_create(int number)
diff --git a/net/core/dev.c b/net/core/dev.c
index a63b8c43c1b6..3c5bdaa44486 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2665,12 +2665,6 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device
 	if (skb->next)
 		return skb;
 
-	/* If device doesn't need skb->dst, release it right now while
-	 * its hot in this cpu cache
-	 */
-	if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
-		skb_dst_drop(skb);
-
 	features = netif_skb_features(skb);
 	skb = validate_xmit_vlan(skb, features);
 	if (unlikely(!skb))
@@ -2811,8 +2805,6 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 		 * waiting to be sent out; and the qdisc is not running -
 		 * xmit the skb directly.
 		 */
-		if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
-			skb_dst_force(skb);
 
 		qdisc_bstats_update(q, skb);
 
@@ -2827,7 +2819,6 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 
 		rc = NET_XMIT_SUCCESS;
 	} else {
-		skb_dst_force(skb);
 		rc = q->enqueue(skb, q) & NET_XMIT_MASK;
 		if (qdisc_run_begin(q)) {
 			if (unlikely(contended)) {
@@ -2924,6 +2915,14 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
 
 	skb_update_prio(skb);
 
+	/* If device/qdisc don't need skb->dst, release it right now while
+	 * its hot in this cpu cache.
+	 */
+	if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
+		skb_dst_drop(skb);
+	else
+		skb_dst_force(skb);
+
 	txq = netdev_pick_tx(dev, skb, accel_priv);
 	q = rcu_dereference_bh(txq->qdisc);
 
@@ -6674,7 +6673,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 	INIT_LIST_HEAD(&dev->adj_list.lower);
 	INIT_LIST_HEAD(&dev->all_adj_list.upper);
 	INIT_LIST_HEAD(&dev->all_adj_list.lower);
-	dev->priv_flags = IFF_XMIT_DST_RELEASE;
+	dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
 	setup(dev);
 
 	dev->num_tx_queues = txqs;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 0485ef18d254..12055fdbe716 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -510,7 +510,7 @@ static int ipgre_tunnel_init(struct net_device *dev)
 	memcpy(dev->broadcast, &iph->daddr, 4);
 
 	dev->flags		= IFF_NOARP;
-	dev->priv_flags		&= ~IFF_XMIT_DST_RELEASE;
+	netif_keep_dst(dev);
 	dev->addr_len		= 4;
 
 	if (iph->daddr) {
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index e453cb724a95..3e861011e4a3 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -364,7 +364,7 @@ static int vti_tunnel_init(struct net_device *dev)
 	dev->iflink		= 0;
 	dev->addr_len		= 4;
 	dev->features		|= NETIF_F_LLTX;
-	dev->priv_flags		&= ~IFF_XMIT_DST_RELEASE;
+	netif_keep_dst(dev);
 
 	return ip_tunnel_init(dev);
 }
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index ea88ab3102a8..37096d64730e 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -289,7 +289,7 @@ static void ipip_tunnel_setup(struct net_device *dev)
 	dev->iflink		= 0;
 	dev->addr_len		= 4;
 	dev->features		|= NETIF_F_LLTX;
-	dev->priv_flags		&= ~IFF_XMIT_DST_RELEASE;
+	netif_keep_dst(dev);
 
 	dev->features		|= IPIP_FEATURES;
 	dev->hw_features	|= IPIP_FEATURES;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 74b677916a70..de3b1c86b8d3 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1242,7 +1242,7 @@ static void ip6gre_tunnel_setup(struct net_device *dev)
 	dev->flags |= IFF_NOARP;
 	dev->iflink = 0;
 	dev->addr_len = sizeof(struct in6_addr);
-	dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+	netif_keep_dst(dev);
 }
 
 static int ip6gre_tunnel_init(struct net_device *dev)
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index d3e8888ad611..9409887fb664 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1493,7 +1493,7 @@ static void ip6_tnl_dev_setup(struct net_device *dev)
 		dev->mtu -= 8;
 	dev->flags |= IFF_NOARP;
 	dev->addr_len = sizeof(struct in6_addr);
-	dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+	netif_keep_dst(dev);
 	/* This perm addr will be used as interface identifier by IPv6 */
 	dev->addr_assign_type = NET_ADDR_RANDOM;
 	eth_random_addr(dev->perm_addr);
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 5833a2244467..d440bb585524 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -807,7 +807,7 @@ static void vti6_dev_setup(struct net_device *dev)
 	dev->mtu = ETH_DATA_LEN;
 	dev->flags |= IFF_NOARP;
 	dev->addr_len = sizeof(struct in6_addr);
-	dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+	netif_keep_dst(dev);
 }
 
 /**
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 0d4e27466f82..6eab37cf5345 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1364,7 +1364,7 @@ static void ipip6_tunnel_setup(struct net_device *dev)
 	dev->hard_header_len	= LL_MAX_HEADER + t_hlen;
 	dev->mtu		= ETH_DATA_LEN - t_hlen;
 	dev->flags		= IFF_NOARP;
-	dev->priv_flags	       &= ~IFF_XMIT_DST_RELEASE;
+	netif_keep_dst(dev);
 	dev->iflink		= 0;
 	dev->addr_len		= 4;
 	dev->features		|= NETIF_F_LLTX;
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index a5d2b20db560..4ac515f2a6ce 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -493,6 +493,8 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
 	tcf_exts_change(tp, &fnew->exts, &e);
 	tcf_em_tree_change(tp, &fnew->ematches, &t);
 
+	netif_keep_dst(qdisc_dev(tp->q));
+
 	if (tb[TCA_FLOW_KEYS]) {
 		fnew->keymask = keymask;
 		fnew->nkeys   = nkeys;
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 6f22baae0afa..109a329b7198 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -524,6 +524,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
 		if (f->handle < f1->handle)
 			break;
 
+	netif_keep_dst(qdisc_dev(tp->q));
 	rcu_assign_pointer(f->next, f1);
 	rcu_assign_pointer(*fp, f);
 
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 2b349a4de3c8..38d58e6cef07 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -47,7 +47,6 @@ EXPORT_SYMBOL(default_qdisc_ops);
 
 static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
 {
-	skb_dst_force(skb);
 	q->gso_skb = skb;
 	q->qstats.requeues++;
 	q->q.qlen++;	/* it's still part of the queue */
@@ -218,8 +217,6 @@ static inline int qdisc_restart(struct Qdisc *q)
 	if (unlikely(!skb))
 		return 0;
 
-	WARN_ON_ONCE(skb_dst_is_noref(skb));
-
 	root_lock = qdisc_lock(q);
 	dev = qdisc_dev(q);
 	txq = skb_get_tx_queue(dev, skb);
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 5cd291bd00e4..6ada42396a24 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -470,7 +470,7 @@ static __init void teql_master_setup(struct net_device *dev)
 	dev->tx_queue_len	= 100;
 	dev->flags		= IFF_NOARP;
 	dev->hard_header_len	= LL_MAX_HEADER;
-	dev->priv_flags		&= ~IFF_XMIT_DST_RELEASE;
+	netif_keep_dst(dev);
 }
 
 static LIST_HEAD(master_dev_list);
-- 
cgit v1.2.3


From e6f5c78930e409f3a6b37f5484313a416359ac7f Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@primarydata.com>
Date: Fri, 22 Aug 2014 10:40:25 -0400
Subject: locks: plumb a "priv" pointer into the setlease routines

In later patches, we're going to add a new lock_manager_operation to
finish setting up the lease while still holding the i_lock.  To do
this, we'll need to pass a little bit of info in the fcntl setlease
case (primarily an fasync structure). Plumb the extra pointer into
there in advance of that.

We declare this pointer as a void ** to make it clear that this is
private info, and that the caller isn't required to set this unless
the lm_setup specifically requires it.

Signed-off-by: Jeff Layton <jlayton@primarydata.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 Documentation/filesystems/Locking |  2 +-
 Documentation/filesystems/vfs.txt |  2 +-
 fs/cifs/cifsfs.c                  |  7 ++++---
 fs/libfs.c                        |  4 +++-
 fs/locks.c                        | 32 ++++++++++++++++++++------------
 fs/nfsd/nfs4state.c               |  4 ++--
 include/linux/fs.h                | 12 ++++++------
 7 files changed, 37 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index f1997e9da61f..3d92049ae71d 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -464,7 +464,7 @@ prototypes:
 			size_t, unsigned int);
 	ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *,
 			size_t, unsigned int);
-	int (*setlease)(struct file *, long, struct file_lock **);
+	int (*setlease)(struct file *, long, struct file_lock **, void **);
 	long (*fallocate)(struct file *, int, loff_t, loff_t);
 };
 
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 61d65cc65c54..28ebd49f169f 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -826,7 +826,7 @@ struct file_operations {
 	int (*flock) (struct file *, int, struct file_lock *);
 	ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, size_t, unsigned int);
 	ssize_t (*splice_read)(struct file *, struct pipe_inode_info *, size_t, unsigned int);
-	int (*setlease)(struct file *, long arg, struct file_lock **);
+	int (*setlease)(struct file *, long arg, struct file_lock **, void **);
 	long (*fallocate)(struct file *, int mode, loff_t offset, loff_t len);
 	int (*show_fdinfo)(struct seq_file *m, struct file *f);
 };
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index ac4f260155c8..85c70d5969ac 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -800,7 +800,8 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int whence)
 	return generic_file_llseek(file, offset, whence);
 }
 
-static int cifs_setlease(struct file *file, long arg, struct file_lock **lease)
+static int
+cifs_setlease(struct file *file, long arg, struct file_lock **lease, void **priv)
 {
 	/*
 	 * Note that this is called by vfs setlease with i_lock held to
@@ -815,7 +816,7 @@ static int cifs_setlease(struct file *file, long arg, struct file_lock **lease)
 	/* check if file is oplocked */
 	if (((arg == F_RDLCK) && CIFS_CACHE_READ(CIFS_I(inode))) ||
 	    ((arg == F_WRLCK) && CIFS_CACHE_WRITE(CIFS_I(inode))))
-		return generic_setlease(file, arg, lease);
+		return generic_setlease(file, arg, lease, priv);
 	else if (tlink_tcon(cfile->tlink)->local_lease &&
 		 !CIFS_CACHE_READ(CIFS_I(inode)))
 		/*
@@ -826,7 +827,7 @@ static int cifs_setlease(struct file *file, long arg, struct file_lock **lease)
 		 * knows that the file won't be changed on the server by anyone
 		 * else.
 		 */
-		return generic_setlease(file, arg, lease);
+		return generic_setlease(file, arg, lease, priv);
 	else
 		return -EAGAIN;
 }
diff --git a/fs/libfs.c b/fs/libfs.c
index 29012a303ef8..171d2846f2a3 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -1081,12 +1081,14 @@ EXPORT_SYMBOL(alloc_anon_inode);
  * @filp: file pointer
  * @arg: type of lease to obtain
  * @flp: new lease supplied for insertion
+ * @priv: private data for lm_setup operation
  *
  * Generic helper for filesystems that do not wish to allow leases to be set.
  * All arguments are ignored and it just returns -EINVAL.
  */
 int
-simple_nosetlease(struct file *filp, long arg, struct file_lock **flp)
+simple_nosetlease(struct file *filp, long arg, struct file_lock **flp,
+		  void **priv)
 {
 	return -EINVAL;
 }
diff --git a/fs/locks.c b/fs/locks.c
index e16c2c61a44f..4fa269b0bdef 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1297,7 +1297,6 @@ int lease_modify(struct file_lock **before, int arg)
 	}
 	return 0;
 }
-
 EXPORT_SYMBOL(lease_modify);
 
 static bool past_time(unsigned long then)
@@ -1543,7 +1542,8 @@ check_conflicting_open(const struct dentry *dentry, const long arg)
 	return ret;
 }
 
-static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp)
+static int
+generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **priv)
 {
 	struct file_lock *fl, **before, **my_before = NULL, *lease;
 	struct dentry *dentry = filp->f_path.dentry;
@@ -1630,11 +1630,14 @@ static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp
 	smp_mb();
 	error = check_conflicting_open(dentry, arg);
 	if (error)
-		locks_unlink_lock(before);
+		goto out_unlink;
 out:
 	if (is_deleg)
 		mutex_unlock(&inode->i_mutex);
 	return error;
+out_unlink:
+	locks_unlink_lock(before);
+	goto out;
 }
 
 static int generic_delete_lease(struct file *filp)
@@ -1661,13 +1664,15 @@ static int generic_delete_lease(struct file *filp)
  *	@filp: file pointer
  *	@arg: type of lease to obtain
  *	@flp: input - file_lock to use, output - file_lock inserted
+ *	@priv: private data for lm_setup
  *
  *	The (input) flp->fl_lmops->lm_break function is required
  *	by break_lease().
  *
  *	Called with inode->i_lock held.
  */
-int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
+int generic_setlease(struct file *filp, long arg, struct file_lock **flp,
+			void **priv)
 {
 	struct dentry *dentry = filp->f_path.dentry;
 	struct inode *inode = dentry->d_inode;
@@ -1692,19 +1697,20 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
 			WARN_ON_ONCE(1);
 			return -ENOLCK;
 		}
-		return generic_add_lease(filp, arg, flp);
+		return generic_add_lease(filp, arg, flp, priv);
 	default:
 		return -EINVAL;
 	}
 }
 EXPORT_SYMBOL(generic_setlease);
 
-static int __vfs_setlease(struct file *filp, long arg, struct file_lock **lease)
+static int
+__vfs_setlease(struct file *filp, long arg, struct file_lock **lease, void **priv)
 {
 	if (filp->f_op->setlease)
-		return filp->f_op->setlease(filp, arg, lease);
+		return filp->f_op->setlease(filp, arg, lease, priv);
 	else
-		return generic_setlease(filp, arg, lease);
+		return generic_setlease(filp, arg, lease, priv);
 }
 
 /**
@@ -1712,6 +1718,7 @@ static int __vfs_setlease(struct file *filp, long arg, struct file_lock **lease)
  * @filp: file pointer
  * @arg: type of lease to obtain
  * @lease: file_lock to use when adding a lease
+ * @priv: private info for lm_setup when adding a lease
  *
  * Call this to establish a lease on the file. The "lease" argument is not
  * used for F_UNLCK requests and may be NULL. For commands that set or alter
@@ -1720,13 +1727,14 @@ static int __vfs_setlease(struct file *filp, long arg, struct file_lock **lease)
  * stack trace).
  */
 
-int vfs_setlease(struct file *filp, long arg, struct file_lock **lease)
+int
+vfs_setlease(struct file *filp, long arg, struct file_lock **lease, void **priv)
 {
 	struct inode *inode = file_inode(filp);
 	int error;
 
 	spin_lock(&inode->i_lock);
-	error = __vfs_setlease(filp, arg, lease);
+	error = __vfs_setlease(filp, arg, lease, priv);
 	spin_unlock(&inode->i_lock);
 
 	return error;
@@ -1751,7 +1759,7 @@ static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg)
 	}
 	ret = fl;
 	spin_lock(&inode->i_lock);
-	error = __vfs_setlease(filp, arg, &ret);
+	error = __vfs_setlease(filp, arg, &ret, NULL);
 	if (error)
 		goto out_unlock;
 	if (ret == fl)
@@ -1789,7 +1797,7 @@ out_unlock:
 int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
 {
 	if (arg == F_UNLCK)
-		return vfs_setlease(filp, F_UNLCK, NULL);
+		return vfs_setlease(filp, F_UNLCK, NULL, NULL);
 	return do_fcntl_add_lease(fd, filp, arg);
 }
 
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 188cd68aefb6..7c803db2a027 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -686,7 +686,7 @@ static void nfs4_put_deleg_lease(struct nfs4_file *fp)
 	spin_unlock(&fp->fi_lock);
 
 	if (filp) {
-		vfs_setlease(filp, F_UNLCK, NULL);
+		vfs_setlease(filp, F_UNLCK, NULL, NULL);
 		fput(filp);
 	}
 }
@@ -3792,7 +3792,7 @@ static int nfs4_setlease(struct nfs4_delegation *dp)
 	}
 	fl->fl_file = filp;
 	ret = fl;
-	status = vfs_setlease(filp, fl->fl_type, &ret);
+	status = vfs_setlease(filp, fl->fl_type, &fl, NULL);
 	if (status) {
 		locks_free_lock(fl);
 		goto out_fput;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 96528f73dda4..f1870eb67b02 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -982,8 +982,8 @@ extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl);
 extern int flock_lock_file_wait(struct file *filp, struct file_lock *fl);
 extern int __break_lease(struct inode *inode, unsigned int flags, unsigned int type);
 extern void lease_get_mtime(struct inode *, struct timespec *time);
-extern int generic_setlease(struct file *, long, struct file_lock **);
-extern int vfs_setlease(struct file *, long, struct file_lock **);
+extern int generic_setlease(struct file *, long, struct file_lock **, void **priv);
+extern int vfs_setlease(struct file *, long, struct file_lock **, void **);
 extern int lease_modify(struct file_lock **, int);
 #else /* !CONFIG_FILE_LOCKING */
 static inline int fcntl_getlk(struct file *file, unsigned int cmd,
@@ -1100,13 +1100,13 @@ static inline void lease_get_mtime(struct inode *inode, struct timespec *time)
 }
 
 static inline int generic_setlease(struct file *filp, long arg,
-				    struct file_lock **flp)
+				    struct file_lock **flp, void **priv)
 {
 	return -EINVAL;
 }
 
 static inline int vfs_setlease(struct file *filp, long arg,
-			       struct file_lock **lease)
+			       struct file_lock **lease, void **priv)
 {
 	return -EINVAL;
 }
@@ -1494,7 +1494,7 @@ struct file_operations {
 	int (*flock) (struct file *, int, struct file_lock *);
 	ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
 	ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
-	int (*setlease)(struct file *, long, struct file_lock **);
+	int (*setlease)(struct file *, long, struct file_lock **, void **);
 	long (*fallocate)(struct file *file, int mode, loff_t offset,
 			  loff_t len);
 	int (*show_fdinfo)(struct seq_file *m, struct file *f);
@@ -2599,7 +2599,7 @@ extern int simple_write_end(struct file *file, struct address_space *mapping,
 			struct page *page, void *fsdata);
 extern int always_delete_dentry(const struct dentry *);
 extern struct inode *alloc_anon_inode(struct super_block *);
-extern int simple_nosetlease(struct file *, long, struct file_lock **);
+extern int simple_nosetlease(struct file *, long, struct file_lock **, void **);
 extern const struct dentry_operations simple_dentry_operations;
 
 extern struct dentry *simple_lookup(struct inode *, struct dentry *, unsigned int flags);
-- 
cgit v1.2.3


From 1c7dd2ff430fa14b45c9def54468e3a25ab8342b Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@primarydata.com>
Date: Fri, 22 Aug 2014 10:55:47 -0400
Subject: locks: define a lm_setup handler for leases

...and move the fasync setup into it for fcntl lease calls. At the same
time, change the semantics of how the file_lock double-pointer is
handled. Up until now, on a successful lease return you got a pointer to
the lock on the list. This is bad, since that pointer can no longer be
relied on as valid once the inode->i_lock has been released.

Change the code to instead just zero out the pointer if the lease we
passed in ended up being used. Then the callers can just check to see
if it's NULL after the call and free it if it isn't.

The priv argument has the same semantics. The lm_setup function can
zero the pointer out to signal to the caller that it should not be
freed after the function returns.

Signed-off-by: Jeff Layton <jlayton@primarydata.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 fs/locks.c          | 92 ++++++++++++++++++++++++++++-------------------------
 fs/nfsd/nfs4state.c |  6 ++--
 include/linux/fs.h  |  1 +
 3 files changed, 51 insertions(+), 48 deletions(-)

(limited to 'include/linux')

diff --git a/fs/locks.c b/fs/locks.c
index 4fa269b0bdef..a237ba632e8d 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -432,9 +432,27 @@ static void lease_break_callback(struct file_lock *fl)
 	kill_fasync(&fl->fl_fasync, SIGIO, POLL_MSG);
 }
 
+static void
+lease_setup(struct file_lock *fl, void **priv)
+{
+	struct file *filp = fl->fl_file;
+	struct fasync_struct *fa = *priv;
+
+	/*
+	 * fasync_insert_entry() returns the old entry if any. If there was no
+	 * old entry, then it used "priv" and inserted it into the fasync list.
+	 * Clear the pointer to indicate that it shouldn't be freed.
+	 */
+	if (!fasync_insert_entry(fa->fa_fd, filp, &fl->fl_fasync, fa))
+		*priv = NULL;
+
+	__f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
+}
+
 static const struct lock_manager_operations lease_manager_ops = {
 	.lm_break = lease_break_callback,
 	.lm_change = lease_modify,
+	.lm_setup = lease_setup,
 };
 
 /*
@@ -1607,10 +1625,11 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
 	}
 
 	if (my_before != NULL) {
+		lease = *my_before;
 		error = lease->fl_lmops->lm_change(my_before, arg);
-		if (!error)
-			*flp = *my_before;
-		goto out;
+		if (error)
+			goto out;
+		goto out_setup;
 	}
 
 	error = -EINVAL;
@@ -1631,9 +1650,15 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
 	error = check_conflicting_open(dentry, arg);
 	if (error)
 		goto out_unlink;
+
+out_setup:
+	if (lease->fl_lmops->lm_setup)
+		lease->fl_lmops->lm_setup(lease, priv);
 out:
 	if (is_deleg)
 		mutex_unlock(&inode->i_mutex);
+	if (!error && !my_before)
+		*flp = NULL;
 	return error;
 out_unlink:
 	locks_unlink_lock(before);
@@ -1661,10 +1686,11 @@ static int generic_delete_lease(struct file *filp)
 
 /**
  *	generic_setlease	-	sets a lease on an open file
- *	@filp: file pointer
- *	@arg: type of lease to obtain
- *	@flp: input - file_lock to use, output - file_lock inserted
- *	@priv: private data for lm_setup
+ *	@filp:	file pointer
+ *	@arg:	type of lease to obtain
+ *	@flp:	input - file_lock to use, output - file_lock inserted
+ *	@priv:	private data for lm_setup (may be NULL if lm_setup
+ *		doesn't require it)
  *
  *	The (input) flp->fl_lmops->lm_break function is required
  *	by break_lease().
@@ -1704,29 +1730,23 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp,
 }
 EXPORT_SYMBOL(generic_setlease);
 
-static int
-__vfs_setlease(struct file *filp, long arg, struct file_lock **lease, void **priv)
-{
-	if (filp->f_op->setlease)
-		return filp->f_op->setlease(filp, arg, lease, priv);
-	else
-		return generic_setlease(filp, arg, lease, priv);
-}
-
 /**
  * vfs_setlease        -       sets a lease on an open file
- * @filp: file pointer
- * @arg: type of lease to obtain
- * @lease: file_lock to use when adding a lease
- * @priv: private info for lm_setup when adding a lease
+ * @filp:	file pointer
+ * @arg:	type of lease to obtain
+ * @lease:	file_lock to use when adding a lease
+ * @priv:	private info for lm_setup when adding a lease (may be
+ * 		NULL if lm_setup doesn't require it)
  *
  * Call this to establish a lease on the file. The "lease" argument is not
  * used for F_UNLCK requests and may be NULL. For commands that set or alter
  * an existing lease, the (*lease)->fl_lmops->lm_break operation must be set;
  * if not, this function will return -ENOLCK (and generate a scary-looking
  * stack trace).
+ *
+ * The "priv" pointer is passed directly to the lm_setup function as-is. It
+ * may be NULL if the lm_setup operation doesn't require it.
  */
-
 int
 vfs_setlease(struct file *filp, long arg, struct file_lock **lease, void **priv)
 {
@@ -1734,17 +1754,18 @@ vfs_setlease(struct file *filp, long arg, struct file_lock **lease, void **priv)
 	int error;
 
 	spin_lock(&inode->i_lock);
-	error = __vfs_setlease(filp, arg, lease, priv);
+	if (filp->f_op->setlease)
+		error = filp->f_op->setlease(filp, arg, lease, priv);
+	else
+		error = generic_setlease(filp, arg, lease, priv);
 	spin_unlock(&inode->i_lock);
-
 	return error;
 }
 EXPORT_SYMBOL_GPL(vfs_setlease);
 
 static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg)
 {
-	struct file_lock *fl, *ret;
-	struct inode *inode = file_inode(filp);
+	struct file_lock *fl;
 	struct fasync_struct *new;
 	int error;
 
@@ -1757,26 +1778,9 @@ static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg)
 		locks_free_lock(fl);
 		return -ENOMEM;
 	}
-	ret = fl;
-	spin_lock(&inode->i_lock);
-	error = __vfs_setlease(filp, arg, &ret, NULL);
-	if (error)
-		goto out_unlock;
-	if (ret == fl)
-		fl = NULL;
+	new->fa_fd = fd;
 
-	/*
-	 * fasync_insert_entry() returns the old entry if any.
-	 * If there was no old entry, then it used 'new' and
-	 * inserted it into the fasync list. Clear new so that
-	 * we don't release it here.
-	 */
-	if (!fasync_insert_entry(fd, filp, &ret->fl_fasync, new))
-		new = NULL;
-
-	__f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
-out_unlock:
-	spin_unlock(&inode->i_lock);
+	error = vfs_setlease(filp, arg, &fl, (void **)&new);
 	if (fl)
 		locks_free_lock(fl);
 	if (new)
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 7c803db2a027..5349528136e2 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3793,12 +3793,10 @@ static int nfs4_setlease(struct nfs4_delegation *dp)
 	fl->fl_file = filp;
 	ret = fl;
 	status = vfs_setlease(filp, fl->fl_type, &fl, NULL);
-	if (status) {
+	if (fl)
 		locks_free_lock(fl);
+	if (status)
 		goto out_fput;
-	}
-	if (ret != fl)
-		locks_free_lock(fl);
 	spin_lock(&state_lock);
 	spin_lock(&fp->fi_lock);
 	/* Did the lease get broken before we took the lock? */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index f1870eb67b02..9a6d56154dd5 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -874,6 +874,7 @@ struct lock_manager_operations {
 	int (*lm_grant)(struct file_lock *, int);
 	void (*lm_break)(struct file_lock *);
 	int (*lm_change)(struct file_lock **, int);
+	void (*lm_setup)(struct file_lock *, void **);
 };
 
 struct lock_manager {
-- 
cgit v1.2.3


From c45198eda2794bb72601c9f96266d8b95db66dd5 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@primarydata.com>
Date: Mon, 1 Sep 2014 07:12:07 -0400
Subject: locks: move freeing of leases outside of i_lock

There was only one place where we still could free a file_lock while
holding the i_lock -- lease_modify. Add a new list_head argument to the
lm_change operation, pass in a private list when calling it, and fix
those callers to dispose of the list once the lock has been dropped.

Signed-off-by: Jeff Layton <jlayton@primarydata.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 Documentation/filesystems/Locking |  3 +--
 fs/locks.c                        | 34 ++++++++++++++++++++++------------
 fs/nfsd/nfs4state.c               |  6 +++---
 include/linux/fs.h                |  7 ++++---
 4 files changed, 30 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 4af288e38f13..94d93b1f8b53 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -469,8 +469,7 @@ prototypes:
 };
 
 locking rules:
-	All may block except for ->setlease.
-	No VFS locks held on entry except for ->setlease.
+	All may block.
 
 ->llseek() locking has moved from llseek to the individual llseek
 implementations.  If your fs is not using generic_file_llseek, you
diff --git a/fs/locks.c b/fs/locks.c
index eb463257f867..c0f789dfa655 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1292,7 +1292,7 @@ static void lease_clear_pending(struct file_lock *fl, int arg)
 }
 
 /* We already had a lease on this file; just change its type */
-int lease_modify(struct file_lock **before, int arg)
+int lease_modify(struct file_lock **before, int arg, struct list_head *dispose)
 {
 	struct file_lock *fl = *before;
 	int error = assign_type(fl, arg);
@@ -1311,7 +1311,7 @@ int lease_modify(struct file_lock **before, int arg)
 			printk(KERN_ERR "locks_delete_lock: fasync == %p\n", fl->fl_fasync);
 			fl->fl_fasync = NULL;
 		}
-		locks_delete_lock(before, NULL);
+		locks_delete_lock(before, dispose);
 	}
 	return 0;
 }
@@ -1325,7 +1325,7 @@ static bool past_time(unsigned long then)
 	return time_after(jiffies, then);
 }
 
-static void time_out_leases(struct inode *inode)
+static void time_out_leases(struct inode *inode, struct list_head *dispose)
 {
 	struct file_lock **before;
 	struct file_lock *fl;
@@ -1336,9 +1336,9 @@ static void time_out_leases(struct inode *inode)
 	while ((fl = *before) && IS_LEASE(fl) && lease_breaking(fl)) {
 		trace_time_out_leases(inode, fl);
 		if (past_time(fl->fl_downgrade_time))
-			lease_modify(before, F_RDLCK);
+			lease_modify(before, F_RDLCK, dispose);
 		if (past_time(fl->fl_break_time))
-			lease_modify(before, F_UNLCK);
+			lease_modify(before, F_UNLCK, dispose);
 		if (fl == *before)	/* lease_modify may have freed fl */
 			before = &fl->fl_next;
 	}
@@ -1373,6 +1373,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
 	int i_have_this_lease = 0;
 	bool lease_conflict = false;
 	int want_write = (mode & O_ACCMODE) != O_RDONLY;
+	LIST_HEAD(dispose);
 
 	new_fl = lease_alloc(NULL, want_write ? F_WRLCK : F_RDLCK);
 	if (IS_ERR(new_fl))
@@ -1381,7 +1382,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
 
 	spin_lock(&inode->i_lock);
 
-	time_out_leases(inode);
+	time_out_leases(inode, &dispose);
 
 	flock = inode->i_flock;
 	if ((flock == NULL) || !IS_LEASE(flock))
@@ -1436,6 +1437,7 @@ restart:
 	locks_insert_block(flock, new_fl);
 	trace_break_lease_block(inode, new_fl);
 	spin_unlock(&inode->i_lock);
+	locks_dispose_list(&dispose);
 	error = wait_event_interruptible_timeout(new_fl->fl_wait,
 						!new_fl->fl_next, break_time);
 	spin_lock(&inode->i_lock);
@@ -1443,7 +1445,7 @@ restart:
 	locks_delete_block(new_fl);
 	if (error >= 0) {
 		if (error == 0)
-			time_out_leases(inode);
+			time_out_leases(inode, &dispose);
 		/*
 		 * Wait for the next conflicting lease that has not been
 		 * broken yet
@@ -1458,6 +1460,7 @@ restart:
 
 out:
 	spin_unlock(&inode->i_lock);
+	locks_dispose_list(&dispose);
 	locks_free_lock(new_fl);
 	return error;
 }
@@ -1522,9 +1525,10 @@ int fcntl_getlease(struct file *filp)
 	struct file_lock *fl;
 	struct inode *inode = file_inode(filp);
 	int type = F_UNLCK;
+	LIST_HEAD(dispose);
 
 	spin_lock(&inode->i_lock);
-	time_out_leases(file_inode(filp));
+	time_out_leases(file_inode(filp), &dispose);
 	for (fl = file_inode(filp)->i_flock; fl && IS_LEASE(fl);
 			fl = fl->fl_next) {
 		if (fl->fl_file == filp) {
@@ -1533,6 +1537,7 @@ int fcntl_getlease(struct file *filp)
 		}
 	}
 	spin_unlock(&inode->i_lock);
+	locks_dispose_list(&dispose);
 	return type;
 }
 
@@ -1570,6 +1575,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
 	struct inode *inode = dentry->d_inode;
 	bool is_deleg = (*flp)->fl_flags & FL_DELEG;
 	int error;
+	LIST_HEAD(dispose);
 
 	lease = *flp;
 	trace_generic_add_lease(inode, lease);
@@ -1593,7 +1599,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
 	}
 
 	spin_lock(&inode->i_lock);
-	time_out_leases(inode);
+	time_out_leases(inode, &dispose);
 	error = check_conflicting_open(dentry, arg);
 	if (error)
 		goto out;
@@ -1630,7 +1636,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
 
 	if (my_before != NULL) {
 		lease = *my_before;
-		error = lease->fl_lmops->lm_change(my_before, arg);
+		error = lease->fl_lmops->lm_change(my_before, arg, &dispose);
 		if (error)
 			goto out;
 		goto out_setup;
@@ -1660,6 +1666,7 @@ out_setup:
 		lease->fl_lmops->lm_setup(lease, priv);
 out:
 	spin_unlock(&inode->i_lock);
+	locks_dispose_list(&dispose);
 	if (is_deleg)
 		mutex_unlock(&inode->i_mutex);
 	if (!error && !my_before)
@@ -1676,8 +1683,10 @@ static int generic_delete_lease(struct file *filp)
 	struct file_lock *fl, **before;
 	struct dentry *dentry = filp->f_path.dentry;
 	struct inode *inode = dentry->d_inode;
+	LIST_HEAD(dispose);
 
 	spin_lock(&inode->i_lock);
+	time_out_leases(inode, &dispose);
 	for (before = &inode->i_flock;
 			((fl = *before) != NULL) && IS_LEASE(fl);
 			before = &fl->fl_next) {
@@ -1686,8 +1695,9 @@ static int generic_delete_lease(struct file *filp)
 	}
 	trace_generic_delete_lease(inode, fl);
 	if (fl)
-		error = fl->fl_lmops->lm_change(before, F_UNLCK);
+		error = fl->fl_lmops->lm_change(before, F_UNLCK, &dispose);
 	spin_unlock(&inode->i_lock);
+	locks_dispose_list(&dispose);
 	return error;
 }
 
@@ -2372,7 +2382,7 @@ void locks_remove_file(struct file *filp)
 	while ((fl = *before) != NULL) {
 		if (fl->fl_file == filp) {
 			if (IS_LEASE(fl)) {
-				lease_modify(before, F_UNLCK);
+				lease_modify(before, F_UNLCK, &dispose);
 				continue;
 			}
 
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 5349528136e2..604ab6decd28 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3427,11 +3427,11 @@ static void nfsd_break_deleg_cb(struct file_lock *fl)
 	spin_unlock(&fp->fi_lock);
 }
 
-static
-int nfsd_change_deleg_cb(struct file_lock **onlist, int arg)
+static int
+nfsd_change_deleg_cb(struct file_lock **onlist, int arg, struct list_head *dispose)
 {
 	if (arg & F_UNLCK)
-		return lease_modify(onlist, arg);
+		return lease_modify(onlist, arg, dispose);
 	else
 		return -EAGAIN;
 }
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9a6d56154dd5..f419f718e447 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -873,7 +873,7 @@ struct lock_manager_operations {
 	void (*lm_notify)(struct file_lock *);	/* unblock callback */
 	int (*lm_grant)(struct file_lock *, int);
 	void (*lm_break)(struct file_lock *);
-	int (*lm_change)(struct file_lock **, int);
+	int (*lm_change)(struct file_lock **, int, struct list_head *);
 	void (*lm_setup)(struct file_lock *, void **);
 };
 
@@ -985,7 +985,7 @@ extern int __break_lease(struct inode *inode, unsigned int flags, unsigned int t
 extern void lease_get_mtime(struct inode *, struct timespec *time);
 extern int generic_setlease(struct file *, long, struct file_lock **, void **priv);
 extern int vfs_setlease(struct file *, long, struct file_lock **, void **);
-extern int lease_modify(struct file_lock **, int);
+extern int lease_modify(struct file_lock **, int, struct list_head *);
 #else /* !CONFIG_FILE_LOCKING */
 static inline int fcntl_getlk(struct file *file, unsigned int cmd,
 			      struct flock __user *user)
@@ -1112,7 +1112,8 @@ static inline int vfs_setlease(struct file *filp, long arg,
 	return -EINVAL;
 }
 
-static inline int lease_modify(struct file_lock **before, int arg)
+static inline int lease_modify(struct file_lock **before, int arg,
+			       struct list_head *dispose)
 {
 	return -EINVAL;
 }
-- 
cgit v1.2.3


From 4d01b7f5e7576858b71cbaa72b541e17a229cb91 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@primarydata.com>
Date: Mon, 1 Sep 2014 15:06:54 -0400
Subject: locks: give lm_break a return value

Christoph suggests:

   "Add a return value to lm_break so that the lock manager can tell the
    core code "you can delete this lease right now".  That gets rid of
    the games with the timeout which require all kinds of race avoidance
    code in the users."

Do that here and have the nfsd lease break routine use it when it detects
that there was a race between setting up the lease and it being broken.

Signed-off-by: Jeff Layton <jlayton@primarydata.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 fs/locks.c          | 17 +++++++++++++----
 fs/nfsd/nfs4state.c | 17 +++++++++--------
 include/linux/fs.h  |  2 +-
 3 files changed, 23 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/fs/locks.c b/fs/locks.c
index 7d627ac0ed87..aed4a957d232 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -427,9 +427,11 @@ static int flock_to_posix_lock(struct file *filp, struct file_lock *fl,
 }
 
 /* default lease lock manager operations */
-static void lease_break_callback(struct file_lock *fl)
+static bool
+lease_break_callback(struct file_lock *fl)
 {
 	kill_fasync(&fl->fl_fasync, SIGIO, POLL_MSG);
+	return false;
 }
 
 static void
@@ -1382,7 +1384,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
 {
 	int error = 0;
 	struct file_lock *new_fl;
-	struct file_lock *fl;
+	struct file_lock *fl, **before;
 	unsigned long break_time;
 	int want_write = (mode & O_ACCMODE) != O_RDONLY;
 	LIST_HEAD(dispose);
@@ -1406,7 +1408,9 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
 			break_time++;	/* so that 0 means no break time */
 	}
 
-	for (fl = inode->i_flock; fl && IS_LEASE(fl); fl = fl->fl_next) {
+	for (before = &inode->i_flock;
+			((fl = *before) != NULL) && IS_LEASE(fl);
+			before = &fl->fl_next) {
 		if (!leases_conflict(fl, new_fl))
 			continue;
 		if (want_write) {
@@ -1420,9 +1424,14 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
 			fl->fl_flags |= FL_DOWNGRADE_PENDING;
 			fl->fl_downgrade_time = break_time;
 		}
-		fl->fl_lmops->lm_break(fl);
+		if (fl->fl_lmops->lm_break(fl))
+			locks_delete_lock(before, &dispose);
 	}
 
+	fl = inode->i_flock;
+	if (!fl || !IS_LEASE(fl))
+		goto out;
+
 	if (mode & O_NONBLOCK) {
 		trace_break_lease_noblock(inode, new_fl);
 		error = -EWOULDBLOCK;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 604ab6decd28..d1b851548b7a 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3391,18 +3391,20 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
 }
 
 /* Called from break_lease() with i_lock held. */
-static void nfsd_break_deleg_cb(struct file_lock *fl)
+static bool
+nfsd_break_deleg_cb(struct file_lock *fl)
 {
+	bool ret = false;
 	struct nfs4_file *fp = (struct nfs4_file *)fl->fl_owner;
 	struct nfs4_delegation *dp;
 
 	if (!fp) {
 		WARN(1, "(%p)->fl_owner NULL\n", fl);
-		return;
+		return ret;
 	}
 	if (fp->fi_had_conflict) {
 		WARN(1, "duplicate break on %p\n", fp);
-		return;
+		return ret;
 	}
 	/*
 	 * We don't want the locks code to timeout the lease for us;
@@ -3414,17 +3416,16 @@ static void nfsd_break_deleg_cb(struct file_lock *fl)
 	spin_lock(&fp->fi_lock);
 	fp->fi_had_conflict = true;
 	/*
-	 * If there are no delegations on the list, then we can't count on this
-	 * lease ever being cleaned up. Set the fl_break_time to jiffies so that
-	 * time_out_leases will do it ASAP. The fact that fi_had_conflict is now
-	 * true should keep any new delegations from being hashed.
+	 * If there are no delegations on the list, then return true
+	 * so that the lease code will go ahead and delete it.
 	 */
 	if (list_empty(&fp->fi_delegations))
-		fl->fl_break_time = jiffies;
+		ret = true;
 	else
 		list_for_each_entry(dp, &fp->fi_delegations, dl_perfile)
 			nfsd_break_one_deleg(dp);
 	spin_unlock(&fp->fi_lock);
+	return ret;
 }
 
 static int
diff --git a/include/linux/fs.h b/include/linux/fs.h
index f419f718e447..ed4e1897099c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -872,7 +872,7 @@ struct lock_manager_operations {
 	void (*lm_put_owner)(struct file_lock *);
 	void (*lm_notify)(struct file_lock *);	/* unblock callback */
 	int (*lm_grant)(struct file_lock *, int);
-	void (*lm_break)(struct file_lock *);
+	bool (*lm_break)(struct file_lock *);
 	int (*lm_change)(struct file_lock **, int, struct list_head *);
 	void (*lm_setup)(struct file_lock *, void **);
 };
-- 
cgit v1.2.3


From 7ca76311fe6c397e9f332e5e6c79e3310d5ee98a Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@primarydata.com>
Date: Mon, 1 Sep 2014 19:04:48 -0400
Subject: locks: set fl_owner for leases to filp instead of current->files

Like flock locks, leases are owned by the file description. Now that the
i_have_this_lease check in __break_lease is gone, we don't actually use
the fl_owner for leases for anything. So, it's now safe to set this more
appropriately to the same value as the fl_file.

While we're at it, fix up the comments over the fl_owner_t definition
since they're rather out of date.

Signed-off-by: Jeff Layton <jlayton@primarydata.com>
---
 fs/locks.c         | 2 +-
 include/linux/fs.h | 8 +-------
 2 files changed, 2 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/fs/locks.c b/fs/locks.c
index aed4a957d232..314135ad820b 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -465,7 +465,7 @@ static int lease_init(struct file *filp, long type, struct file_lock *fl)
 	if (assign_type(fl, type) != 0)
 		return -EINVAL;
 
-	fl->fl_owner = current->files;
+	fl->fl_owner = filp;
 	fl->fl_pid = current->tgid;
 
 	fl->fl_file = filp;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ed4e1897099c..bb9484ae1eef 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -851,13 +851,7 @@ static inline struct file *get_file(struct file *f)
  */
 #define FILE_LOCK_DEFERRED 1
 
-/*
- * The POSIX file lock owner is determined by
- * the "struct files_struct" in the thread group
- * (or NULL for no owner - BSD locks).
- *
- * Lockd stuffs a "host" pointer into this.
- */
+/* legacy typedef, should eventually be removed */
 typedef void *fl_owner_t;
 
 struct file_lock_operations {
-- 
cgit v1.2.3


From 1b2b32dcdb3df28dd103033c73cac2417fa05845 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@primarydata.com>
Date: Wed, 24 Sep 2014 08:38:44 -0400
Subject: locks: fix fcntl_setlease/getlease return when !CONFIG_FILE_LOCKING

Currently they both just return 0. Fix them to return more appropriate
values instead.

For better or worse, most places in the kernel return -EINVAL when
leases aren't available. -ENOLCK would probably have been better, but
let's follow suit here in the case of F_SETLEASE.

In the F_GETLEASE case, just return F_UNLCK since we know that no
lease will have been set.

Signed-off-by: Jeff Layton <jlayton@primarydata.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/fs.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index bb9484ae1eef..2023306c620e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1008,12 +1008,12 @@ static inline int fcntl_setlk64(unsigned int fd, struct file *file,
 #endif
 static inline int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
 {
-	return 0;
+	return -EINVAL;
 }
 
 static inline int fcntl_getlease(struct file *filp)
 {
-	return 0;
+	return F_UNLCK;
 }
 
 static inline void locks_init_lock(struct file_lock *fl)
-- 
cgit v1.2.3


From f2fc42b6ac31f4d808da7a9da460dd433a71e976 Mon Sep 17 00:00:00 2001
From: Suman Anna <s-anna@ti.com>
Date: Thu, 12 Jun 2014 22:30:34 +0530
Subject: mailbox: rename pl320-ipc specific mailbox.h

The patch 30058677 "ARM / highbank: add support for pl320 IPC"
added a pl320 IPC specific header file as a generic mailbox.h.
This file has been renamed appropriately to allow the
introduction of the generic mailbox API framework.

Acked-by: Mark Langsdorf <mark.langsdorf@calxeda.com>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Suman Anna <s-anna@ti.com>
Reviewed-by: Mark Brown <broonie@linaro.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/arm/mach-highbank/highbank.c  |  2 +-
 drivers/cpufreq/highbank-cpufreq.c |  2 +-
 drivers/mailbox/pl320-ipc.c        |  2 +-
 include/linux/mailbox.h            | 17 -----------------
 include/linux/pl320-ipc.h          | 17 +++++++++++++++++
 5 files changed, 20 insertions(+), 20 deletions(-)
 delete mode 100644 include/linux/mailbox.h
 create mode 100644 include/linux/pl320-ipc.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-highbank/highbank.c b/arch/arm/mach-highbank/highbank.c
index 8c35ae4ff176..07a09570175d 100644
--- a/arch/arm/mach-highbank/highbank.c
+++ b/arch/arm/mach-highbank/highbank.c
@@ -20,7 +20,7 @@
 #include <linux/input.h>
 #include <linux/io.h>
 #include <linux/irqchip.h>
-#include <linux/mailbox.h>
+#include <linux/pl320-ipc.h>
 #include <linux/of.h>
 #include <linux/of_irq.h>
 #include <linux/of_platform.h>
diff --git a/drivers/cpufreq/highbank-cpufreq.c b/drivers/cpufreq/highbank-cpufreq.c
index bf8902a0866d..b464f29d8d54 100644
--- a/drivers/cpufreq/highbank-cpufreq.c
+++ b/drivers/cpufreq/highbank-cpufreq.c
@@ -19,7 +19,7 @@
 #include <linux/cpu.h>
 #include <linux/err.h>
 #include <linux/of.h>
-#include <linux/mailbox.h>
+#include <linux/pl320-ipc.h>
 #include <linux/platform_device.h>
 
 #define HB_CPUFREQ_CHANGE_NOTE	0x80000001
diff --git a/drivers/mailbox/pl320-ipc.c b/drivers/mailbox/pl320-ipc.c
index d873cbae2fbb..f3755e0aa935 100644
--- a/drivers/mailbox/pl320-ipc.c
+++ b/drivers/mailbox/pl320-ipc.c
@@ -26,7 +26,7 @@
 #include <linux/device.h>
 #include <linux/amba/bus.h>
 
-#include <linux/mailbox.h>
+#include <linux/pl320-ipc.h>
 
 #define IPCMxSOURCE(m)		((m) * 0x40)
 #define IPCMxDSET(m)		(((m) * 0x40) + 0x004)
diff --git a/include/linux/mailbox.h b/include/linux/mailbox.h
deleted file mode 100644
index 5161f63ec1c8..000000000000
--- a/include/linux/mailbox.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-int pl320_ipc_transmit(u32 *data);
-int pl320_ipc_register_notifier(struct notifier_block *nb);
-int pl320_ipc_unregister_notifier(struct notifier_block *nb);
diff --git a/include/linux/pl320-ipc.h b/include/linux/pl320-ipc.h
new file mode 100644
index 000000000000..5161f63ec1c8
--- /dev/null
+++ b/include/linux/pl320-ipc.h
@@ -0,0 +1,17 @@
+/*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+int pl320_ipc_transmit(u32 *data);
+int pl320_ipc_register_notifier(struct notifier_block *nb);
+int pl320_ipc_unregister_notifier(struct notifier_block *nb);
-- 
cgit v1.2.3


From 2b6d83e2b8b7de82331a6a1dcd64b51020a6031c Mon Sep 17 00:00:00 2001
From: Jassi Brar <jaswinder.singh@linaro.org>
Date: Thu, 12 Jun 2014 22:31:19 +0530
Subject: mailbox: Introduce framework for mailbox

Introduce common framework for client/protocol drivers and
controller drivers of Inter-Processor-Communication (IPC).

Client driver developers should have a look at
 include/linux/mailbox_client.h to understand the part of
the API exposed to client drivers.
Similarly controller driver developers should have a look
at include/linux/mailbox_controller.h

Reviewed-by: Mark Brown <broonie@linaro.org>
Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
---
 MAINTAINERS                        |   8 +
 drivers/mailbox/Makefile           |   4 +
 drivers/mailbox/mailbox.c          | 465 +++++++++++++++++++++++++++++++++++++
 include/linux/mailbox_client.h     |  46 ++++
 include/linux/mailbox_controller.h | 133 +++++++++++
 5 files changed, 656 insertions(+)
 create mode 100644 drivers/mailbox/mailbox.c
 create mode 100644 include/linux/mailbox_client.h
 create mode 100644 include/linux/mailbox_controller.h

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index a12edf2624e5..c5f7c854b6d2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5740,6 +5740,14 @@ S:	Maintained
 F:	drivers/net/macvlan.c
 F:	include/linux/if_macvlan.h
 
+MAILBOX API
+M:	Jassi Brar <jassisinghbrar@gmail.com>
+L:	linux-kernel@vger.kernel.org
+S:	Maintained
+F:	drivers/mailbox/
+F:	include/linux/mailbox_client.h
+F:	include/linux/mailbox_controller.h
+
 MAN-PAGES: MANUAL PAGES FOR LINUX -- Sections 2, 3, 4, 5, and 7
 M:	Michael Kerrisk <mtk.manpages@gmail.com>
 W:	http://www.kernel.org/doc/man-pages
diff --git a/drivers/mailbox/Makefile b/drivers/mailbox/Makefile
index 6d184dbcaca8..94ed7cefb14d 100644
--- a/drivers/mailbox/Makefile
+++ b/drivers/mailbox/Makefile
@@ -1,3 +1,7 @@
+# Generic MAILBOX API
+
+obj-$(CONFIG_MAILBOX)		+= mailbox.o
+
 obj-$(CONFIG_PL320_MBOX)	+= pl320-ipc.o
 
 obj-$(CONFIG_OMAP2PLUS_MBOX)	+= omap-mailbox.o
diff --git a/drivers/mailbox/mailbox.c b/drivers/mailbox/mailbox.c
new file mode 100644
index 000000000000..afcb430508ec
--- /dev/null
+++ b/drivers/mailbox/mailbox.c
@@ -0,0 +1,465 @@
+/*
+ * Mailbox: Common code for Mailbox controllers and users
+ *
+ * Copyright (C) 2013-2014 Linaro Ltd.
+ * Author: Jassi Brar <jassisinghbrar@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/bitops.h>
+#include <linux/mailbox_client.h>
+#include <linux/mailbox_controller.h>
+
+#define TXDONE_BY_IRQ	BIT(0) /* controller has remote RTR irq */
+#define TXDONE_BY_POLL	BIT(1) /* controller can read status of last TX */
+#define TXDONE_BY_ACK	BIT(2) /* S/W ACK recevied by Client ticks the TX */
+
+static LIST_HEAD(mbox_cons);
+static DEFINE_MUTEX(con_mutex);
+
+static int add_to_rbuf(struct mbox_chan *chan, void *mssg)
+{
+	int idx;
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->lock, flags);
+
+	/* See if there is any space left */
+	if (chan->msg_count == MBOX_TX_QUEUE_LEN) {
+		spin_unlock_irqrestore(&chan->lock, flags);
+		return -ENOBUFS;
+	}
+
+	idx = chan->msg_free;
+	chan->msg_data[idx] = mssg;
+	chan->msg_count++;
+
+	if (idx == MBOX_TX_QUEUE_LEN - 1)
+		chan->msg_free = 0;
+	else
+		chan->msg_free++;
+
+	spin_unlock_irqrestore(&chan->lock, flags);
+
+	return idx;
+}
+
+static void msg_submit(struct mbox_chan *chan)
+{
+	unsigned count, idx;
+	unsigned long flags;
+	void *data;
+	int err;
+
+	spin_lock_irqsave(&chan->lock, flags);
+
+	if (!chan->msg_count || chan->active_req)
+		goto exit;
+
+	count = chan->msg_count;
+	idx = chan->msg_free;
+	if (idx >= count)
+		idx -= count;
+	else
+		idx += MBOX_TX_QUEUE_LEN - count;
+
+	data = chan->msg_data[idx];
+
+	/* Try to submit a message to the MBOX controller */
+	err = chan->mbox->ops->send_data(chan, data);
+	if (!err) {
+		chan->active_req = data;
+		chan->msg_count--;
+	}
+exit:
+	spin_unlock_irqrestore(&chan->lock, flags);
+}
+
+static void tx_tick(struct mbox_chan *chan, int r)
+{
+	unsigned long flags;
+	void *mssg;
+
+	spin_lock_irqsave(&chan->lock, flags);
+	mssg = chan->active_req;
+	chan->active_req = NULL;
+	spin_unlock_irqrestore(&chan->lock, flags);
+
+	/* Submit next message */
+	msg_submit(chan);
+
+	/* Notify the client */
+	if (mssg && chan->cl->tx_done)
+		chan->cl->tx_done(chan->cl, mssg, r);
+
+	if (chan->cl->tx_block)
+		complete(&chan->tx_complete);
+}
+
+static void poll_txdone(unsigned long data)
+{
+	struct mbox_controller *mbox = (struct mbox_controller *)data;
+	bool txdone, resched = false;
+	int i;
+
+	for (i = 0; i < mbox->num_chans; i++) {
+		struct mbox_chan *chan = &mbox->chans[i];
+
+		if (chan->active_req && chan->cl) {
+			resched = true;
+			txdone = chan->mbox->ops->last_tx_done(chan);
+			if (txdone)
+				tx_tick(chan, 0);
+		}
+	}
+
+	if (resched)
+		mod_timer(&mbox->poll, jiffies +
+				msecs_to_jiffies(mbox->txpoll_period));
+}
+
+/**
+ * mbox_chan_received_data - A way for controller driver to push data
+ *				received from remote to the upper layer.
+ * @chan: Pointer to the mailbox channel on which RX happened.
+ * @mssg: Client specific message typecasted as void *
+ *
+ * After startup and before shutdown any data received on the chan
+ * is passed on to the API via atomic mbox_chan_received_data().
+ * The controller should ACK the RX only after this call returns.
+ */
+void mbox_chan_received_data(struct mbox_chan *chan, void *mssg)
+{
+	/* No buffering the received data */
+	if (chan->cl->rx_callback)
+		chan->cl->rx_callback(chan->cl, mssg);
+}
+EXPORT_SYMBOL_GPL(mbox_chan_received_data);
+
+/**
+ * mbox_chan_txdone - A way for controller driver to notify the
+ *			framework that the last TX has completed.
+ * @chan: Pointer to the mailbox chan on which TX happened.
+ * @r: Status of last TX - OK or ERROR
+ *
+ * The controller that has IRQ for TX ACK calls this atomic API
+ * to tick the TX state machine. It works only if txdone_irq
+ * is set by the controller.
+ */
+void mbox_chan_txdone(struct mbox_chan *chan, int r)
+{
+	if (unlikely(!(chan->txdone_method & TXDONE_BY_IRQ))) {
+		dev_err(chan->mbox->dev,
+		       "Controller can't run the TX ticker\n");
+		return;
+	}
+
+	tx_tick(chan, r);
+}
+EXPORT_SYMBOL_GPL(mbox_chan_txdone);
+
+/**
+ * mbox_client_txdone - The way for a client to run the TX state machine.
+ * @chan: Mailbox channel assigned to this client.
+ * @r: Success status of last transmission.
+ *
+ * The client/protocol had received some 'ACK' packet and it notifies
+ * the API that the last packet was sent successfully. This only works
+ * if the controller can't sense TX-Done.
+ */
+void mbox_client_txdone(struct mbox_chan *chan, int r)
+{
+	if (unlikely(!(chan->txdone_method & TXDONE_BY_ACK))) {
+		dev_err(chan->mbox->dev, "Client can't run the TX ticker\n");
+		return;
+	}
+
+	tx_tick(chan, r);
+}
+EXPORT_SYMBOL_GPL(mbox_client_txdone);
+
+/**
+ * mbox_client_peek_data - A way for client driver to pull data
+ *			received from remote by the controller.
+ * @chan: Mailbox channel assigned to this client.
+ *
+ * A poke to controller driver for any received data.
+ * The data is actually passed onto client via the
+ * mbox_chan_received_data()
+ * The call can be made from atomic context, so the controller's
+ * implementation of peek_data() must not sleep.
+ *
+ * Return: True, if controller has, and is going to push after this,
+ *          some data.
+ *         False, if controller doesn't have any data to be read.
+ */
+bool mbox_client_peek_data(struct mbox_chan *chan)
+{
+	if (chan->mbox->ops->peek_data)
+		return chan->mbox->ops->peek_data(chan);
+
+	return false;
+}
+EXPORT_SYMBOL_GPL(mbox_client_peek_data);
+
+/**
+ * mbox_send_message -	For client to submit a message to be
+ *				sent to the remote.
+ * @chan: Mailbox channel assigned to this client.
+ * @mssg: Client specific message typecasted.
+ *
+ * For client to submit data to the controller destined for a remote
+ * processor. If the client had set 'tx_block', the call will return
+ * either when the remote receives the data or when 'tx_tout' millisecs
+ * run out.
+ *  In non-blocking mode, the requests are buffered by the API and a
+ * non-negative token is returned for each queued request. If the request
+ * is not queued, a negative token is returned. Upon failure or successful
+ * TX, the API calls 'tx_done' from atomic context, from which the client
+ * could submit yet another request.
+ * The pointer to message should be preserved until it is sent
+ * over the chan, i.e, tx_done() is made.
+ * This function could be called from atomic context as it simply
+ * queues the data and returns a token against the request.
+ *
+ * Return: Non-negative integer for successful submission (non-blocking mode)
+ *	or transmission over chan (blocking mode).
+ *	Negative value denotes failure.
+ */
+int mbox_send_message(struct mbox_chan *chan, void *mssg)
+{
+	int t;
+
+	if (!chan || !chan->cl)
+		return -EINVAL;
+
+	t = add_to_rbuf(chan, mssg);
+	if (t < 0) {
+		dev_err(chan->mbox->dev, "Try increasing MBOX_TX_QUEUE_LEN\n");
+		return t;
+	}
+
+	msg_submit(chan);
+
+	if (chan->txdone_method	== TXDONE_BY_POLL)
+		poll_txdone((unsigned long)chan->mbox);
+
+	if (chan->cl->tx_block && chan->active_req) {
+		unsigned long wait;
+		int ret;
+
+		if (!chan->cl->tx_tout) /* wait forever */
+			wait = msecs_to_jiffies(3600000);
+		else
+			wait = msecs_to_jiffies(chan->cl->tx_tout);
+
+		ret = wait_for_completion_timeout(&chan->tx_complete, wait);
+		if (ret == 0) {
+			t = -EIO;
+			tx_tick(chan, -EIO);
+		}
+	}
+
+	return t;
+}
+EXPORT_SYMBOL_GPL(mbox_send_message);
+
+/**
+ * mbox_request_channel - Request a mailbox channel.
+ * @cl: Identity of the client requesting the channel.
+ * @index: Index of mailbox specifier in 'mboxes' property.
+ *
+ * The Client specifies its requirements and capabilities while asking for
+ * a mailbox channel. It can't be called from atomic context.
+ * The channel is exclusively allocated and can't be used by another
+ * client before the owner calls mbox_free_channel.
+ * After assignment, any packet received on this channel will be
+ * handed over to the client via the 'rx_callback'.
+ * The framework holds reference to the client, so the mbox_client
+ * structure shouldn't be modified until the mbox_free_channel returns.
+ *
+ * Return: Pointer to the channel assigned to the client if successful.
+ *		ERR_PTR for request failure.
+ */
+struct mbox_chan *mbox_request_channel(struct mbox_client *cl, int index)
+{
+	struct device *dev = cl->dev;
+	struct mbox_controller *mbox;
+	struct of_phandle_args spec;
+	struct mbox_chan *chan;
+	unsigned long flags;
+	int ret;
+
+	if (!dev || !dev->of_node) {
+		pr_debug("%s: No owner device node\n", __func__);
+		return ERR_PTR(-ENODEV);
+	}
+
+	mutex_lock(&con_mutex);
+
+	if (of_parse_phandle_with_args(dev->of_node, "mboxes",
+				       "#mbox-cells", index, &spec)) {
+		dev_dbg(dev, "%s: can't parse \"mboxes\" property\n", __func__);
+		mutex_unlock(&con_mutex);
+		return ERR_PTR(-ENODEV);
+	}
+
+	chan = NULL;
+	list_for_each_entry(mbox, &mbox_cons, node)
+		if (mbox->dev->of_node == spec.np) {
+			chan = mbox->of_xlate(mbox, &spec);
+			break;
+		}
+
+	of_node_put(spec.np);
+
+	if (!chan || chan->cl || !try_module_get(mbox->dev->driver->owner)) {
+		dev_dbg(dev, "%s: mailbox not free\n", __func__);
+		mutex_unlock(&con_mutex);
+		return ERR_PTR(-EBUSY);
+	}
+
+	spin_lock_irqsave(&chan->lock, flags);
+	chan->msg_free = 0;
+	chan->msg_count = 0;
+	chan->active_req = NULL;
+	chan->cl = cl;
+	init_completion(&chan->tx_complete);
+
+	if (chan->txdone_method	== TXDONE_BY_POLL && cl->knows_txdone)
+		chan->txdone_method |= TXDONE_BY_ACK;
+
+	spin_unlock_irqrestore(&chan->lock, flags);
+
+	ret = chan->mbox->ops->startup(chan);
+	if (ret) {
+		dev_err(dev, "Unable to startup the chan (%d)\n", ret);
+		mbox_free_channel(chan);
+		chan = ERR_PTR(ret);
+	}
+
+	mutex_unlock(&con_mutex);
+	return chan;
+}
+EXPORT_SYMBOL_GPL(mbox_request_channel);
+
+/**
+ * mbox_free_channel - The client relinquishes control of a mailbox
+ *			channel by this call.
+ * @chan: The mailbox channel to be freed.
+ */
+void mbox_free_channel(struct mbox_chan *chan)
+{
+	unsigned long flags;
+
+	if (!chan || !chan->cl)
+		return;
+
+	chan->mbox->ops->shutdown(chan);
+
+	/* The queued TX requests are simply aborted, no callbacks are made */
+	spin_lock_irqsave(&chan->lock, flags);
+	chan->cl = NULL;
+	chan->active_req = NULL;
+	if (chan->txdone_method == (TXDONE_BY_POLL | TXDONE_BY_ACK))
+		chan->txdone_method = TXDONE_BY_POLL;
+
+	module_put(chan->mbox->dev->driver->owner);
+	spin_unlock_irqrestore(&chan->lock, flags);
+}
+EXPORT_SYMBOL_GPL(mbox_free_channel);
+
+static struct mbox_chan *
+of_mbox_index_xlate(struct mbox_controller *mbox,
+		    const struct of_phandle_args *sp)
+{
+	int ind = sp->args[0];
+
+	if (ind >= mbox->num_chans)
+		return NULL;
+
+	return &mbox->chans[ind];
+}
+
+/**
+ * mbox_controller_register - Register the mailbox controller
+ * @mbox:	Pointer to the mailbox controller.
+ *
+ * The controller driver registers its communication channels
+ */
+int mbox_controller_register(struct mbox_controller *mbox)
+{
+	int i, txdone;
+
+	/* Sanity check */
+	if (!mbox || !mbox->dev || !mbox->ops || !mbox->num_chans)
+		return -EINVAL;
+
+	if (mbox->txdone_irq)
+		txdone = TXDONE_BY_IRQ;
+	else if (mbox->txdone_poll)
+		txdone = TXDONE_BY_POLL;
+	else /* It has to be ACK then */
+		txdone = TXDONE_BY_ACK;
+
+	if (txdone == TXDONE_BY_POLL) {
+		mbox->poll.function = &poll_txdone;
+		mbox->poll.data = (unsigned long)mbox;
+		init_timer(&mbox->poll);
+	}
+
+	for (i = 0; i < mbox->num_chans; i++) {
+		struct mbox_chan *chan = &mbox->chans[i];
+
+		chan->cl = NULL;
+		chan->mbox = mbox;
+		chan->txdone_method = txdone;
+		spin_lock_init(&chan->lock);
+	}
+
+	if (!mbox->of_xlate)
+		mbox->of_xlate = of_mbox_index_xlate;
+
+	mutex_lock(&con_mutex);
+	list_add_tail(&mbox->node, &mbox_cons);
+	mutex_unlock(&con_mutex);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mbox_controller_register);
+
+/**
+ * mbox_controller_unregister - Unregister the mailbox controller
+ * @mbox:	Pointer to the mailbox controller.
+ */
+void mbox_controller_unregister(struct mbox_controller *mbox)
+{
+	int i;
+
+	if (!mbox)
+		return;
+
+	mutex_lock(&con_mutex);
+
+	list_del(&mbox->node);
+
+	for (i = 0; i < mbox->num_chans; i++)
+		mbox_free_channel(&mbox->chans[i]);
+
+	if (mbox->txdone_poll)
+		del_timer_sync(&mbox->poll);
+
+	mutex_unlock(&con_mutex);
+}
+EXPORT_SYMBOL_GPL(mbox_controller_unregister);
diff --git a/include/linux/mailbox_client.h b/include/linux/mailbox_client.h
new file mode 100644
index 000000000000..307d9cab2026
--- /dev/null
+++ b/include/linux/mailbox_client.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2013-2014 Linaro Ltd.
+ * Author: Jassi Brar <jassisinghbrar@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __MAILBOX_CLIENT_H
+#define __MAILBOX_CLIENT_H
+
+#include <linux/of.h>
+#include <linux/device.h>
+
+struct mbox_chan;
+
+/**
+ * struct mbox_client - User of a mailbox
+ * @dev:		The client device
+ * @tx_block:		If the mbox_send_message should block until data is
+ *			transmitted.
+ * @tx_tout:		Max block period in ms before TX is assumed failure
+ * @knows_txdone:	If the client could run the TX state machine. Usually
+ *			if the client receives some ACK packet for transmission.
+ *			Unused if the controller already has TX_Done/RTR IRQ.
+ * @rx_callback:	Atomic callback to provide client the data received
+ * @tx_done:		Atomic callback to tell client of data transmission
+ */
+struct mbox_client {
+	struct device *dev;
+	bool tx_block;
+	unsigned long tx_tout;
+	bool knows_txdone;
+
+	void (*rx_callback)(struct mbox_client *cl, void *mssg);
+	void (*tx_done)(struct mbox_client *cl, void *mssg, int r);
+};
+
+struct mbox_chan *mbox_request_channel(struct mbox_client *cl, int index);
+int mbox_send_message(struct mbox_chan *chan, void *mssg);
+void mbox_client_txdone(struct mbox_chan *chan, int r); /* atomic */
+bool mbox_client_peek_data(struct mbox_chan *chan); /* atomic */
+void mbox_free_channel(struct mbox_chan *chan); /* may sleep */
+
+#endif /* __MAILBOX_CLIENT_H */
diff --git a/include/linux/mailbox_controller.h b/include/linux/mailbox_controller.h
new file mode 100644
index 000000000000..d4cf96f07cfc
--- /dev/null
+++ b/include/linux/mailbox_controller.h
@@ -0,0 +1,133 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __MAILBOX_CONTROLLER_H
+#define __MAILBOX_CONTROLLER_H
+
+#include <linux/of.h>
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/device.h>
+#include <linux/completion.h>
+
+struct mbox_chan;
+
+/**
+ * struct mbox_chan_ops - methods to control mailbox channels
+ * @send_data:	The API asks the MBOX controller driver, in atomic
+ *		context try to transmit a message on the bus. Returns 0 if
+ *		data is accepted for transmission, -EBUSY while rejecting
+ *		if the remote hasn't yet read the last data sent. Actual
+ *		transmission of data is reported by the controller via
+ *		mbox_chan_txdone (if it has some TX ACK irq). It must not
+ *		sleep.
+ * @startup:	Called when a client requests the chan. The controller
+ *		could ask clients for additional parameters of communication
+ *		to be provided via client's chan_data. This call may
+ *		block. After this call the Controller must forward any
+ *		data received on the chan by calling mbox_chan_received_data.
+ *		The controller may do stuff that need to sleep.
+ * @shutdown:	Called when a client relinquishes control of a chan.
+ *		This call may block too. The controller must not forward
+ *		any received data anymore.
+ *		The controller may do stuff that need to sleep.
+ * @last_tx_done: If the controller sets 'txdone_poll', the API calls
+ *		  this to poll status of last TX. The controller must
+ *		  give priority to IRQ method over polling and never
+ *		  set both txdone_poll and txdone_irq. Only in polling
+ *		  mode 'send_data' is expected to return -EBUSY.
+ *		  The controller may do stuff that need to sleep/block.
+ *		  Used only if txdone_poll:=true && txdone_irq:=false
+ * @peek_data: Atomic check for any received data. Return true if controller
+ *		  has some data to push to the client. False otherwise.
+ */
+struct mbox_chan_ops {
+	int (*send_data)(struct mbox_chan *chan, void *data);
+	int (*startup)(struct mbox_chan *chan);
+	void (*shutdown)(struct mbox_chan *chan);
+	bool (*last_tx_done)(struct mbox_chan *chan);
+	bool (*peek_data)(struct mbox_chan *chan);
+};
+
+/**
+ * struct mbox_controller - Controller of a class of communication channels
+ * @dev:		Device backing this controller
+ * @ops:		Operators that work on each communication chan
+ * @chans:		Array of channels
+ * @num_chans:		Number of channels in the 'chans' array.
+ * @txdone_irq:		Indicates if the controller can report to API when
+ *			the last transmitted data was read by the remote.
+ *			Eg, if it has some TX ACK irq.
+ * @txdone_poll:	If the controller can read but not report the TX
+ *			done. Ex, some register shows the TX status but
+ *			no interrupt rises. Ignored if 'txdone_irq' is set.
+ * @txpoll_period:	If 'txdone_poll' is in effect, the API polls for
+ *			last TX's status after these many millisecs
+ * @of_xlate:		Controller driver specific mapping of channel via DT
+ * @poll:		API private. Used to poll for TXDONE on all channels.
+ * @node:		API private. To hook into list of controllers.
+ */
+struct mbox_controller {
+	struct device *dev;
+	struct mbox_chan_ops *ops;
+	struct mbox_chan *chans;
+	int num_chans;
+	bool txdone_irq;
+	bool txdone_poll;
+	unsigned txpoll_period;
+	struct mbox_chan *(*of_xlate)(struct mbox_controller *mbox,
+				      const struct of_phandle_args *sp);
+	/* Internal to API */
+	struct timer_list poll;
+	struct list_head node;
+};
+
+/*
+ * The length of circular buffer for queuing messages from a client.
+ * 'msg_count' tracks the number of buffered messages while 'msg_free'
+ * is the index where the next message would be buffered.
+ * We shouldn't need it too big because every transfer is interrupt
+ * triggered and if we have lots of data to transfer, the interrupt
+ * latencies are going to be the bottleneck, not the buffer length.
+ * Besides, mbox_send_message could be called from atomic context and
+ * the client could also queue another message from the notifier 'tx_done'
+ * of the last transfer done.
+ * REVISIT: If too many platforms see the "Try increasing MBOX_TX_QUEUE_LEN"
+ * print, it needs to be taken from config option or somesuch.
+ */
+#define MBOX_TX_QUEUE_LEN	20
+
+/**
+ * struct mbox_chan - s/w representation of a communication chan
+ * @mbox:		Pointer to the parent/provider of this channel
+ * @txdone_method:	Way to detect TXDone chosen by the API
+ * @cl:			Pointer to the current owner of this channel
+ * @tx_complete:	Transmission completion
+ * @active_req:		Currently active request hook
+ * @msg_count:		No. of mssg currently queued
+ * @msg_free:		Index of next available mssg slot
+ * @msg_data:		Hook for data packet
+ * @lock:		Serialise access to the channel
+ * @con_priv:		Hook for controller driver to attach private data
+ */
+struct mbox_chan {
+	struct mbox_controller *mbox;
+	unsigned txdone_method;
+	struct mbox_client *cl;
+	struct completion tx_complete;
+	void *active_req;
+	unsigned msg_count, msg_free;
+	void *msg_data[MBOX_TX_QUEUE_LEN];
+	spinlock_t lock; /* Serialise access to the channel */
+	void *con_priv;
+};
+
+int mbox_controller_register(struct mbox_controller *mbox); /* can sleep */
+void mbox_controller_unregister(struct mbox_controller *mbox); /* can sleep */
+void mbox_chan_received_data(struct mbox_chan *chan, void *data); /* atomic */
+void mbox_chan_txdone(struct mbox_chan *chan, int r); /* atomic */
+
+#endif /* __MAILBOX_CONTROLLER_H */
-- 
cgit v1.2.3


From 583d4a6885cfa75a3d189f6bb69b5c545e961c75 Mon Sep 17 00:00:00 2001
From: LEROY Christophe <christophe.leroy@c-s.fr>
Date: Tue, 7 Oct 2014 15:04:57 +0200
Subject: net: fs_enet: Remove non NAPI RX

In the probe function, use_napi is inconditionnaly set to 1. This patch removes
all the code which is conditional to !use_napi, and removes use_napi which has
then become useless.

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/freescale/fs_enet/fs_enet-main.c  | 164 ++-------------------
 include/linux/fs_enet_pd.h                         |   1 -
 2 files changed, 15 insertions(+), 150 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
index 748fd24d3d9e..71a25b4e2d5a 100644
--- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
+++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
@@ -215,128 +215,6 @@ static int fs_enet_rx_napi(struct napi_struct *napi, int budget)
 	return received;
 }
 
-/* non NAPI receive function */
-static int fs_enet_rx_non_napi(struct net_device *dev)
-{
-	struct fs_enet_private *fep = netdev_priv(dev);
-	const struct fs_platform_info *fpi = fep->fpi;
-	cbd_t __iomem *bdp;
-	struct sk_buff *skb, *skbn, *skbt;
-	int received = 0;
-	u16 pkt_len, sc;
-	int curidx;
-	/*
-	 * First, grab all of the stats for the incoming packet.
-	 * These get messed up if we get called due to a busy condition.
-	 */
-	bdp = fep->cur_rx;
-
-	while (((sc = CBDR_SC(bdp)) & BD_ENET_RX_EMPTY) == 0) {
-
-		curidx = bdp - fep->rx_bd_base;
-
-		/*
-		 * Since we have allocated space to hold a complete frame,
-		 * the last indicator should be set.
-		 */
-		if ((sc & BD_ENET_RX_LAST) == 0)
-			dev_warn(fep->dev, "rcv is not +last\n");
-
-		/*
-		 * Check for errors.
-		 */
-		if (sc & (BD_ENET_RX_LG | BD_ENET_RX_SH | BD_ENET_RX_CL |
-			  BD_ENET_RX_NO | BD_ENET_RX_CR | BD_ENET_RX_OV)) {
-			fep->stats.rx_errors++;
-			/* Frame too long or too short. */
-			if (sc & (BD_ENET_RX_LG | BD_ENET_RX_SH))
-				fep->stats.rx_length_errors++;
-			/* Frame alignment */
-			if (sc & (BD_ENET_RX_NO | BD_ENET_RX_CL))
-				fep->stats.rx_frame_errors++;
-			/* CRC Error */
-			if (sc & BD_ENET_RX_CR)
-				fep->stats.rx_crc_errors++;
-			/* FIFO overrun */
-			if (sc & BD_ENET_RX_OV)
-				fep->stats.rx_crc_errors++;
-
-			skb = fep->rx_skbuff[curidx];
-
-			dma_unmap_single(fep->dev, CBDR_BUFADDR(bdp),
-				L1_CACHE_ALIGN(PKT_MAXBUF_SIZE),
-				DMA_FROM_DEVICE);
-
-			skbn = skb;
-
-		} else {
-
-			skb = fep->rx_skbuff[curidx];
-
-			dma_unmap_single(fep->dev, CBDR_BUFADDR(bdp),
-				L1_CACHE_ALIGN(PKT_MAXBUF_SIZE),
-				DMA_FROM_DEVICE);
-
-			/*
-			 * Process the incoming frame.
-			 */
-			fep->stats.rx_packets++;
-			pkt_len = CBDR_DATLEN(bdp) - 4;	/* remove CRC */
-			fep->stats.rx_bytes += pkt_len + 4;
-
-			if (pkt_len <= fpi->rx_copybreak) {
-				/* +2 to make IP header L1 cache aligned */
-				skbn = netdev_alloc_skb(dev, pkt_len + 2);
-				if (skbn != NULL) {
-					skb_reserve(skbn, 2);	/* align IP header */
-					skb_copy_from_linear_data(skb,
-						      skbn->data, pkt_len);
-					/* swap */
-					skbt = skb;
-					skb = skbn;
-					skbn = skbt;
-				}
-			} else {
-				skbn = netdev_alloc_skb(dev, ENET_RX_FRSIZE);
-
-				if (skbn)
-					skb_align(skbn, ENET_RX_ALIGN);
-			}
-
-			if (skbn != NULL) {
-				skb_put(skb, pkt_len);	/* Make room */
-				skb->protocol = eth_type_trans(skb, dev);
-				received++;
-				netif_rx(skb);
-			} else {
-				fep->stats.rx_dropped++;
-				skbn = skb;
-			}
-		}
-
-		fep->rx_skbuff[curidx] = skbn;
-		CBDW_BUFADDR(bdp, dma_map_single(fep->dev, skbn->data,
-			     L1_CACHE_ALIGN(PKT_MAXBUF_SIZE),
-			     DMA_FROM_DEVICE));
-		CBDW_DATLEN(bdp, 0);
-		CBDW_SC(bdp, (sc & ~BD_ENET_RX_STATS) | BD_ENET_RX_EMPTY);
-
-		/*
-		 * Update BD pointer to next entry.
-		 */
-		if ((sc & BD_ENET_RX_WRAP) == 0)
-			bdp++;
-		else
-			bdp = fep->rx_bd_base;
-
-		(*fep->ops->rx_bd_done)(dev);
-	}
-
-	fep->cur_rx = bdp;
-
-	return 0;
-}
-
 static void fs_enet_tx(struct net_device *dev)
 {
 	struct fs_enet_private *fep = netdev_priv(dev);
@@ -453,8 +331,7 @@ fs_enet_interrupt(int irq, void *dev_id)
 		nr++;
 
 		int_clr_events = int_events;
-		if (fpi->use_napi)
-			int_clr_events &= ~fep->ev_napi_rx;
+		int_clr_events &= ~fep->ev_napi_rx;
 
 		(*fep->ops->clear_int_events)(dev, int_clr_events);
 
@@ -462,19 +339,15 @@ fs_enet_interrupt(int irq, void *dev_id)
 			(*fep->ops->ev_error)(dev, int_events);
 
 		if (int_events & fep->ev_rx) {
-			if (!fpi->use_napi)
-				fs_enet_rx_non_napi(dev);
-			else {
-				napi_ok = napi_schedule_prep(&fep->napi);
-
-				(*fep->ops->napi_disable_rx)(dev);
-				(*fep->ops->clear_int_events)(dev, fep->ev_napi_rx);
-
-				/* NOTE: it is possible for FCCs in NAPI mode    */
-				/* to submit a spurious interrupt while in poll  */
-				if (napi_ok)
-					__napi_schedule(&fep->napi);
-			}
+			napi_ok = napi_schedule_prep(&fep->napi);
+
+			(*fep->ops->napi_disable_rx)(dev);
+			(*fep->ops->clear_int_events)(dev, fep->ev_napi_rx);
+
+			/* NOTE: it is possible for FCCs in NAPI mode    */
+			/* to submit a spurious interrupt while in poll  */
+			if (napi_ok)
+				__napi_schedule(&fep->napi);
 		}
 
 		if (int_events & fep->ev_tx)
@@ -811,24 +684,21 @@ static int fs_enet_open(struct net_device *dev)
 	/* not doing this, will cause a crash in fs_enet_rx_napi */
 	fs_init_bds(fep->ndev);
 
-	if (fep->fpi->use_napi)
-		napi_enable(&fep->napi);
+	napi_enable(&fep->napi);
 
 	/* Install our interrupt handler. */
 	r = request_irq(fep->interrupt, fs_enet_interrupt, IRQF_SHARED,
 			"fs_enet-mac", dev);
 	if (r != 0) {
 		dev_err(fep->dev, "Could not allocate FS_ENET IRQ!");
-		if (fep->fpi->use_napi)
-			napi_disable(&fep->napi);
+		napi_disable(&fep->napi);
 		return -EINVAL;
 	}
 
 	err = fs_init_phy(dev);
 	if (err) {
 		free_irq(fep->interrupt, dev);
-		if (fep->fpi->use_napi)
-			napi_disable(&fep->napi);
+		napi_disable(&fep->napi);
 		return err;
 	}
 	phy_start(fep->phydev);
@@ -845,8 +715,7 @@ static int fs_enet_close(struct net_device *dev)
 
 	netif_stop_queue(dev);
 	netif_carrier_off(dev);
-	if (fep->fpi->use_napi)
-		napi_disable(&fep->napi);
+	napi_disable(&fep->napi);
 	phy_stop(fep->phydev);
 
 	spin_lock_irqsave(&fep->lock, flags);
@@ -1022,7 +891,6 @@ static int fs_enet_probe(struct platform_device *ofdev)
 	fpi->rx_ring = 32;
 	fpi->tx_ring = 32;
 	fpi->rx_copybreak = 240;
-	fpi->use_napi = 1;
 	fpi->napi_weight = 17;
 	fpi->phy_node = of_parse_phandle(ofdev->dev.of_node, "phy-handle", 0);
 	if (!fpi->phy_node && of_phy_is_fixed_link(ofdev->dev.of_node)) {
@@ -1102,9 +970,7 @@ static int fs_enet_probe(struct platform_device *ofdev)
 
 	ndev->netdev_ops = &fs_enet_netdev_ops;
 	ndev->watchdog_timeo = 2 * HZ;
-	if (fpi->use_napi)
-		netif_napi_add(ndev, &fep->napi, fs_enet_rx_napi,
-		               fpi->napi_weight);
+	netif_napi_add(ndev, &fep->napi, fs_enet_rx_napi, fpi->napi_weight);
 
 	ndev->ethtool_ops = &fs_ethtool_ops;
 
diff --git a/include/linux/fs_enet_pd.h b/include/linux/fs_enet_pd.h
index efb05961bdd8..77d783f71527 100644
--- a/include/linux/fs_enet_pd.h
+++ b/include/linux/fs_enet_pd.h
@@ -139,7 +139,6 @@ struct fs_platform_info {
 	int rx_ring, tx_ring;	/* number of buffers on rx     */
 	__u8 macaddr[ETH_ALEN];	/* mac address                 */
 	int rx_copybreak;	/* limit we copy small frames  */
-	int use_napi;		/* use NAPI                    */
 	int napi_weight;	/* NAPI weight                 */
 
 	int use_rmii;		/* use RMII mode 	       */
-- 
cgit v1.2.3


From 68939df1d7d8da9088d51000bd334f4c59ea0cb3 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@amacapital.net>
Date: Thu, 14 Aug 2014 14:45:40 -0700
Subject: Move Intel SNB device ids from sb_edac to pci_ids.h

The i2c_imc driver will use two of them, and moving only part of
the list seems messier.

Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Aristeu Rozanski <aris@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 drivers/edac/sb_edac.c  | 30 ------------------------------
 include/linux/pci_ids.h | 15 +++++++++++++++
 2 files changed, 15 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index 07efed452a15..449622935816 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -52,36 +52,6 @@ static int probed;
 #define GET_BITFIELD(v, lo, hi)	\
 	(((v) & GENMASK_ULL(hi, lo)) >> (lo))
 
-/*
- * sbridge Memory Controller Registers
- */
-
-/*
- * FIXME: For now, let's order by device function, as it makes
- * easier for driver's development process. This table should be
- * moved to pci_id.h when submitted upstream
- */
-#define PCI_DEVICE_ID_INTEL_SBRIDGE_SAD0	0x3cf4	/* 12.6 */
-#define PCI_DEVICE_ID_INTEL_SBRIDGE_SAD1	0x3cf6	/* 12.7 */
-#define PCI_DEVICE_ID_INTEL_SBRIDGE_BR		0x3cf5	/* 13.6 */
-#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0	0x3ca0	/* 14.0 */
-#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA	0x3ca8	/* 15.0 */
-#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_RAS	0x3c71	/* 15.1 */
-#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD0	0x3caa	/* 15.2 */
-#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD1	0x3cab	/* 15.3 */
-#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD2	0x3cac	/* 15.4 */
-#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD3	0x3cad	/* 15.5 */
-#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_DDRIO	0x3cb8	/* 17.0 */
-
-	/*
-	 * Currently, unused, but will be needed in the future
-	 * implementations, as they hold the error counters
-	 */
-#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR0	0x3c72	/* 16.2 */
-#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR1	0x3c73	/* 16.3 */
-#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR2	0x3c76	/* 16.6 */
-#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR3	0x3c77	/* 16.7 */
-
 /* Devices 12 Function 6, Offsets 0x80 to 0xcc */
 static const u32 sbridge_dram_rule[] = {
 	0x80, 0x88, 0x90, 0x98, 0xa0,
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 6ed0bb73a864..58aa8d78353f 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2818,7 +2818,22 @@
 #define PCI_DEVICE_ID_INTEL_UNC_R2PCIE	0x3c43
 #define PCI_DEVICE_ID_INTEL_UNC_R3QPI0	0x3c44
 #define PCI_DEVICE_ID_INTEL_UNC_R3QPI1	0x3c45
+#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_RAS	0x3c71	/* 15.1 */
+#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR0	0x3c72	/* 16.2 */
+#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR1	0x3c73	/* 16.3 */
+#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR2	0x3c76	/* 16.6 */
+#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR3	0x3c77	/* 16.7 */
+#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0	0x3ca0	/* 14.0 */
+#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA	0x3ca8	/* 15.0 */
+#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD0	0x3caa	/* 15.2 */
+#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD1	0x3cab	/* 15.3 */
+#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD2	0x3cac	/* 15.4 */
+#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD3	0x3cad	/* 15.5 */
+#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_DDRIO	0x3cb8	/* 17.0 */
 #define PCI_DEVICE_ID_INTEL_JAKETOWN_UBOX	0x3ce0
+#define PCI_DEVICE_ID_INTEL_SBRIDGE_SAD0	0x3cf4	/* 12.6 */
+#define PCI_DEVICE_ID_INTEL_SBRIDGE_BR		0x3cf5	/* 13.6 */
+#define PCI_DEVICE_ID_INTEL_SBRIDGE_SAD1	0x3cf6	/* 12.7 */
 #define PCI_DEVICE_ID_INTEL_IOAT_SNB	0x402f
 #define PCI_DEVICE_ID_INTEL_5100_16	0x65f0
 #define PCI_DEVICE_ID_INTEL_5100_19	0x65f3
-- 
cgit v1.2.3


From 709c48b39ecf11a81f3820c13a828c330fd832b9 Mon Sep 17 00:00:00 2001
From: Masanari Iida <standby24x7@gmail.com>
Date: Wed, 8 Oct 2014 23:53:39 +0900
Subject: net: description of dma_cookie cause make xmldocs warning

In commit 7bced397510ab569d31de4c70b39e13355046387,
dma_cookie was removed from struct skbuff.
But the description of dma_cookie still exist.
So the "make xmldocs" output following warning.

Warning(.//include/linux/skbuff.h:609): Excess struct/union
/enum/typedef member 'dma_cookie' description in 'sk_buff'

Remove description of dma_cookie fix the symptom.

Signed-off-by: Masanari Iida <standby24x7@gmail.com>
Acked-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 3a5ec7638627..776104ba02ce 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -483,8 +483,6 @@ static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1,
  *	@wifi_acked_valid: wifi_acked was set
  *	@wifi_acked: whether frame was acked on wifi or not
  *	@no_fcs:  Request NIC to treat last 4 bytes as Ethernet FCS
- *	@dma_cookie: a cookie to one of several possible DMA operations
- *		done by skb DMA functions
   *	@napi_id: id of the NAPI struct this skb came from
  *	@secmark: security marking
  *	@mark: Generic packet mark
-- 
cgit v1.2.3


From 535114539bb2c081b6680cb5a34be17e7b45df37 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 8 Oct 2014 08:19:27 -0700
Subject: net: add netdev_txq_bql_{enqueue, complete}_prefetchw() helpers

Add two helpers so that drivers do not have to care of BQL being
available or not.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Jim Davis <jim.epost@gmail.com>
Fixes: 29d40c903247 ("net/mlx4_en: Use prefetch in tx path")
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_tx.c |  5 +++--
 include/linux/netdevice.h                  | 29 +++++++++++++++++++++++++++++
 2 files changed, 32 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 8726a4aee5a7..34c137878545 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -392,7 +392,8 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev,
 	if (!priv->port_up)
 		return true;
 
-	prefetchw(&ring->tx_queue->dql.limit);
+	netdev_txq_bql_complete_prefetchw(ring->tx_queue);
+
 	index = cons_index & size_mask;
 	cqe = mlx4_en_get_cqe(buf, index, priv->cqe_size) + factor;
 	last_nr_txbb = ACCESS_ONCE(ring->last_nr_txbb);
@@ -737,7 +738,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
 		vlan_tag = vlan_tx_tag_get(skb);
 
 
-	prefetchw(&ring->tx_queue->dql);
+	netdev_txq_bql_enqueue_prefetchw(ring->tx_queue);
 
 	/* Track current inflight packets for performance analysis */
 	AVG_PERF_COUNTER(priv->pstats.inflight_avg,
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3a4315b39d20..838407aea705 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -30,6 +30,7 @@
 #include <linux/bug.h>
 #include <linux/delay.h>
 #include <linux/atomic.h>
+#include <linux/prefetch.h>
 #include <asm/cache.h>
 #include <asm/byteorder.h>
 
@@ -2480,6 +2481,34 @@ netif_xmit_frozen_or_drv_stopped(const struct netdev_queue *dev_queue)
 	return dev_queue->state & QUEUE_STATE_DRV_XOFF_OR_FROZEN;
 }
 
+/**
+ *	netdev_txq_bql_enqueue_prefetchw - prefetch bql data for write
+ *	@dev_queue: pointer to transmit queue
+ *
+ * BQL enabled drivers might use this helper in their ndo_start_xmit(),
+ * to give appropriate hint to the cpu.
+ */
+static inline void netdev_txq_bql_enqueue_prefetchw(struct netdev_queue *dev_queue)
+{
+#ifdef CONFIG_BQL
+	prefetchw(&dev_queue->dql.num_queued);
+#endif
+}
+
+/**
+ *	netdev_txq_bql_complete_prefetchw - prefetch bql data for write
+ *	@dev_queue: pointer to transmit queue
+ *
+ * BQL enabled drivers might use this helper in their TX completion path,
+ * to give appropriate hint to the cpu.
+ */
+static inline void netdev_txq_bql_complete_prefetchw(struct netdev_queue *dev_queue)
+{
+#ifdef CONFIG_BQL
+	prefetchw(&dev_queue->dql.limit);
+#endif
+}
+
 static inline void netdev_tx_sent_queue(struct netdev_queue *dev_queue,
 					unsigned int bytes)
 {
-- 
cgit v1.2.3


From 1ffe46d11cc88479797b262f60d92e5fb461b411 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 13 Feb 2014 09:39:37 -0800
Subject: vfs: Merge check_submounts_and_drop and d_invalidate

Now that d_invalidate is the only caller of check_submounts_and_drop,
expand check_submounts_and_drop inline in d_invalidate.

Reviewed-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c            | 55 ++++++++++++++++++++------------------------------
 include/linux/dcache.h |  1 -
 2 files changed, 22 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dcache.c b/fs/dcache.c
index 484114a4db93..5e02b9eee6b1 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -645,32 +645,6 @@ kill_it:
 }
 EXPORT_SYMBOL(dput);
 
-/**
- * d_invalidate - invalidate a dentry
- * @dentry: dentry to invalidate
- *
- * Try to invalidate the dentry if it turns out to be
- * possible. If there are reasons not to delete it
- * return -EBUSY. On success return 0.
- *
- * no dcache lock.
- */
- 
-int d_invalidate(struct dentry * dentry)
-{
-	/*
-	 * If it's already been dropped, return OK.
-	 */
-	spin_lock(&dentry->d_lock);
-	if (d_unhashed(dentry)) {
-		spin_unlock(&dentry->d_lock);
-		return 0;
-	}
-	spin_unlock(&dentry->d_lock);
-
-	return check_submounts_and_drop(dentry);
-}
-EXPORT_SYMBOL(d_invalidate);
 
 /* This must be called with d_lock held */
 static inline void __dget_dlock(struct dentry *dentry)
@@ -1190,7 +1164,7 @@ EXPORT_SYMBOL(have_submounts);
  * reachable (e.g. NFS can unhash a directory dentry and then the complete
  * subtree can become unreachable).
  *
- * Only one of check_submounts_and_drop() and d_set_mounted() must succeed.  For
+ * Only one of d_invalidate() and d_set_mounted() must succeed.  For
  * this reason take rename_lock and d_lock on dentry and ancestors.
  */
 int d_set_mounted(struct dentry *dentry)
@@ -1199,7 +1173,7 @@ int d_set_mounted(struct dentry *dentry)
 	int ret = -ENOENT;
 	write_seqlock(&rename_lock);
 	for (p = dentry->d_parent; !IS_ROOT(p); p = p->d_parent) {
-		/* Need exclusion wrt. check_submounts_and_drop() */
+		/* Need exclusion wrt. d_invalidate() */
 		spin_lock(&p->d_lock);
 		if (unlikely(d_unhashed(p))) {
 			spin_unlock(&p->d_lock);
@@ -1369,18 +1343,33 @@ static void check_and_drop(void *_data)
 }
 
 /**
- * check_submounts_and_drop - detach submounts, prune dcache, and drop
+ * d_invalidate - detach submounts, prune dcache, and drop
+ * @dentry: dentry to invalidate (aka detach, prune and drop)
+ *
+ * Try to invalidate the dentry if it turns out to be
+ * possible. If there are reasons not to delete it
+ * return -EBUSY. On success return 0.
+ *
+ * no dcache lock.
  *
  * The final d_drop is done as an atomic operation relative to
  * rename_lock ensuring there are no races with d_set_mounted.  This
  * ensures there are no unhashed dentries on the path to a mountpoint.
- *
- * @dentry: dentry to detach, prune and drop
  */
-int check_submounts_and_drop(struct dentry *dentry)
+int d_invalidate(struct dentry *dentry)
 {
 	int ret = 0;
 
+	/*
+	 * If it's already been dropped, return OK.
+	 */
+	spin_lock(&dentry->d_lock);
+	if (d_unhashed(dentry)) {
+		spin_unlock(&dentry->d_lock);
+		return 0;
+	}
+	spin_unlock(&dentry->d_lock);
+
 	/* Negative dentries can be dropped without further checks */
 	if (!dentry->d_inode) {
 		d_drop(dentry);
@@ -1414,7 +1403,7 @@ int check_submounts_and_drop(struct dentry *dentry)
 out:
 	return ret;
 }
-EXPORT_SYMBOL(check_submounts_and_drop);
+EXPORT_SYMBOL(d_invalidate);
 
 /**
  * __d_alloc	-	allocate a dcache entry
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 75a227cc7ce2..595af29ad145 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -269,7 +269,6 @@ extern void d_prune_aliases(struct inode *);
 
 /* test whether we have any submounts in a subdir tree */
 extern int have_submounts(struct dentry *);
-extern int check_submounts_and_drop(struct dentry *);
 
 /*
  * This adds the entry to the hash queues.
-- 
cgit v1.2.3


From 5542aa2fa7f6cddb03c4ac3135e390adffda98ca Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 13 Feb 2014 09:46:25 -0800
Subject: vfs: Make d_invalidate return void

Now that d_invalidate can no longer fail, stop returning a useless
return code.  For the few callers that checked the return code update
remove the handling of d_invalidate failure.

Reviewed-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/btrfs/ioctl.c       |  5 +----
 fs/cifs/readdir.c      |  6 +-----
 fs/dcache.c            | 15 +++------------
 fs/fuse/dir.c          |  4 +---
 fs/namei.c             | 10 +++++-----
 fs/nfs/dir.c           |  3 +--
 include/linux/dcache.h |  2 +-
 7 files changed, 13 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 8a8e29878c34..996eb192fa77 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2423,9 +2423,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
 		goto out_dput;
 	}
 
-	err = d_invalidate(dentry);
-	if (err)
-		goto out_unlock;
+	d_invalidate(dentry);
 
 	down_write(&root->fs_info->subvol_sem);
 
@@ -2510,7 +2508,6 @@ out_release:
 	btrfs_subvolume_release_metadata(root, &block_rsv, qgroup_reserved);
 out_up_write:
 	up_write(&root->fs_info->subvol_sem);
-out_unlock:
 	if (err) {
 		spin_lock(&dest->root_item_lock);
 		root_flags = btrfs_root_flags(&dest->root_item);
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index b334a89d6a66..d2141f101382 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -87,8 +87,6 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name,
 		return;
 
 	if (dentry) {
-		int err;
-
 		inode = dentry->d_inode;
 		if (inode) {
 			/*
@@ -105,10 +103,8 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name,
 				goto out;
 			}
 		}
-		err = d_invalidate(dentry);
+		d_invalidate(dentry);
 		dput(dentry);
-		if (err)
-			return;
 	}
 
 	/*
diff --git a/fs/dcache.c b/fs/dcache.c
index 5e02b9eee6b1..70d102e70271 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1346,34 +1346,28 @@ static void check_and_drop(void *_data)
  * d_invalidate - detach submounts, prune dcache, and drop
  * @dentry: dentry to invalidate (aka detach, prune and drop)
  *
- * Try to invalidate the dentry if it turns out to be
- * possible. If there are reasons not to delete it
- * return -EBUSY. On success return 0.
- *
  * no dcache lock.
  *
  * The final d_drop is done as an atomic operation relative to
  * rename_lock ensuring there are no races with d_set_mounted.  This
  * ensures there are no unhashed dentries on the path to a mountpoint.
  */
-int d_invalidate(struct dentry *dentry)
+void d_invalidate(struct dentry *dentry)
 {
-	int ret = 0;
-
 	/*
 	 * If it's already been dropped, return OK.
 	 */
 	spin_lock(&dentry->d_lock);
 	if (d_unhashed(dentry)) {
 		spin_unlock(&dentry->d_lock);
-		return 0;
+		return;
 	}
 	spin_unlock(&dentry->d_lock);
 
 	/* Negative dentries can be dropped without further checks */
 	if (!dentry->d_inode) {
 		d_drop(dentry);
-		goto out;
+		return;
 	}
 
 	for (;;) {
@@ -1399,9 +1393,6 @@ int d_invalidate(struct dentry *dentry)
 
 		cond_resched();
 	}
-
-out:
-	return ret;
 }
 EXPORT_SYMBOL(d_invalidate);
 
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 820efd74ca9f..dbab798f5caf 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1286,9 +1286,7 @@ static int fuse_direntplus_link(struct file *file,
 			d_drop(dentry);
 		} else if (get_node_id(inode) != o->nodeid ||
 			   ((o->attr.mode ^ inode->i_mode) & S_IFMT)) {
-			err = d_invalidate(dentry);
-			if (err)
-				goto out;
+			d_invalidate(dentry);
 		} else if (is_bad_inode(inode)) {
 			err = -EIO;
 			goto out;
diff --git a/fs/namei.c b/fs/namei.c
index 2ba10904dba0..d20d579a022e 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1306,7 +1306,8 @@ static struct dentry *lookup_dcache(struct qstr *name, struct dentry *dir,
 				if (error < 0) {
 					dput(dentry);
 					return ERR_PTR(error);
-				} else if (!d_invalidate(dentry)) {
+				} else {
+					d_invalidate(dentry);
 					dput(dentry);
 					dentry = NULL;
 				}
@@ -1435,10 +1436,9 @@ unlazy:
 			dput(dentry);
 			return status;
 		}
-		if (!d_invalidate(dentry)) {
-			dput(dentry);
-			goto need_lookup;
-		}
+		d_invalidate(dentry);
+		dput(dentry);
+		goto need_lookup;
 	}
 
 	path->mnt = mnt;
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 8be6988a1c6c..06e8cfcbb670 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -486,8 +486,7 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
 				nfs_setsecurity(dentry->d_inode, entry->fattr, entry->label);
 			goto out;
 		} else {
-			if (d_invalidate(dentry) != 0)
-				goto out;
+			d_invalidate(dentry);
 			dput(dentry);
 		}
 	}
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 595af29ad145..81b03150f39a 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -254,7 +254,7 @@ extern struct dentry * d_obtain_root(struct inode *);
 extern void shrink_dcache_sb(struct super_block *);
 extern void shrink_dcache_parent(struct dentry *);
 extern void shrink_dcache_for_umount(struct super_block *);
-extern int d_invalidate(struct dentry *);
+extern void d_invalidate(struct dentry *);
 
 /* only used at mount-time */
 extern struct dentry * d_make_root(struct inode *);
-- 
cgit v1.2.3


From 1fa97e8b1f327059aa98089abd8c3378cdf43017 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 7 May 2014 20:47:49 -0400
Subject: constify file_inode()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 94187721ad41..75bdd51ec9b8 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1874,7 +1874,7 @@ extern int current_umask(void);
 extern void ihold(struct inode * inode);
 extern void iput(struct inode *);
 
-static inline struct inode *file_inode(struct file *f)
+static inline struct inode *file_inode(const struct file *f)
 {
 	return f->f_inode;
 }
-- 
cgit v1.2.3


From c35e02480014f7a86e264a2fda39a568690163da Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@linux.intel.com>
Date: Fri, 1 Aug 2014 09:27:22 -0400
Subject: Add copy_to_iter(), copy_from_iter() and iov_iter_zero()

For DAX, we want to be able to copy between iovecs and kernel addresses
that don't necessarily have a struct page.  This is a fairly simple
rearrangement for bvec iters to kmap the pages outside and pass them in,
but for user iovecs it gets more complicated because we might try various
different ways to kmap the memory.  Duplicating the existing logic works
out best in this case.

We need to be able to write zeroes to an iovec for reads from unwritten
ranges in a file.  This is performed by the new iov_iter_zero() function,
again patterned after the existing code that handles iovec iterators.

[AV: and export the buggers...]

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/uio.h |   3 +
 mm/iov_iter.c       | 240 +++++++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 229 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/uio.h b/include/linux/uio.h
index 290fbf0b6b8a..9b1581414cd4 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -80,6 +80,9 @@ size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
 			 struct iov_iter *i);
 size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
 			 struct iov_iter *i);
+size_t copy_to_iter(void *addr, size_t bytes, struct iov_iter *i);
+size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i);
+size_t iov_iter_zero(size_t bytes, struct iov_iter *);
 unsigned long iov_iter_alignment(const struct iov_iter *i);
 void iov_iter_init(struct iov_iter *i, int direction, const struct iovec *iov,
 			unsigned long nr_segs, size_t count);
diff --git a/mm/iov_iter.c b/mm/iov_iter.c
index 9a09f2034fcc..eafcf60f6b83 100644
--- a/mm/iov_iter.c
+++ b/mm/iov_iter.c
@@ -4,6 +4,96 @@
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 
+static size_t copy_to_iter_iovec(void *from, size_t bytes, struct iov_iter *i)
+{
+	size_t skip, copy, left, wanted;
+	const struct iovec *iov;
+	char __user *buf;
+
+	if (unlikely(bytes > i->count))
+		bytes = i->count;
+
+	if (unlikely(!bytes))
+		return 0;
+
+	wanted = bytes;
+	iov = i->iov;
+	skip = i->iov_offset;
+	buf = iov->iov_base + skip;
+	copy = min(bytes, iov->iov_len - skip);
+
+	left = __copy_to_user(buf, from, copy);
+	copy -= left;
+	skip += copy;
+	from += copy;
+	bytes -= copy;
+	while (unlikely(!left && bytes)) {
+		iov++;
+		buf = iov->iov_base;
+		copy = min(bytes, iov->iov_len);
+		left = __copy_to_user(buf, from, copy);
+		copy -= left;
+		skip = copy;
+		from += copy;
+		bytes -= copy;
+	}
+
+	if (skip == iov->iov_len) {
+		iov++;
+		skip = 0;
+	}
+	i->count -= wanted - bytes;
+	i->nr_segs -= iov - i->iov;
+	i->iov = iov;
+	i->iov_offset = skip;
+	return wanted - bytes;
+}
+
+static size_t copy_from_iter_iovec(void *to, size_t bytes, struct iov_iter *i)
+{
+	size_t skip, copy, left, wanted;
+	const struct iovec *iov;
+	char __user *buf;
+
+	if (unlikely(bytes > i->count))
+		bytes = i->count;
+
+	if (unlikely(!bytes))
+		return 0;
+
+	wanted = bytes;
+	iov = i->iov;
+	skip = i->iov_offset;
+	buf = iov->iov_base + skip;
+	copy = min(bytes, iov->iov_len - skip);
+
+	left = __copy_from_user(to, buf, copy);
+	copy -= left;
+	skip += copy;
+	to += copy;
+	bytes -= copy;
+	while (unlikely(!left && bytes)) {
+		iov++;
+		buf = iov->iov_base;
+		copy = min(bytes, iov->iov_len);
+		left = __copy_from_user(to, buf, copy);
+		copy -= left;
+		skip = copy;
+		to += copy;
+		bytes -= copy;
+	}
+
+	if (skip == iov->iov_len) {
+		iov++;
+		skip = 0;
+	}
+	i->count -= wanted - bytes;
+	i->nr_segs -= iov - i->iov;
+	i->iov = iov;
+	i->iov_offset = skip;
+	return wanted - bytes;
+}
+
 static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes,
 			 struct iov_iter *i)
 {
@@ -166,6 +256,50 @@ done:
 	return wanted - bytes;
 }
 
+static size_t zero_iovec(size_t bytes, struct iov_iter *i)
+{
+	size_t skip, copy, left, wanted;
+	const struct iovec *iov;
+	char __user *buf;
+
+	if (unlikely(bytes > i->count))
+		bytes = i->count;
+
+	if (unlikely(!bytes))
+		return 0;
+
+	wanted = bytes;
+	iov = i->iov;
+	skip = i->iov_offset;
+	buf = iov->iov_base + skip;
+	copy = min(bytes, iov->iov_len - skip);
+
+	left = __clear_user(buf, copy);
+	copy -= left;
+	skip += copy;
+	bytes -= copy;
+
+	while (unlikely(!left && bytes)) {
+		iov++;
+		buf = iov->iov_base;
+		copy = min(bytes, iov->iov_len);
+		left = __clear_user(buf, copy);
+		copy -= left;
+		skip = copy;
+		bytes -= copy;
+	}
+
+	if (skip == iov->iov_len) {
+		iov++;
+		skip = 0;
+	}
+	i->count -= wanted - bytes;
+	i->nr_segs -= iov - i->iov;
+	i->iov = iov;
+	i->iov_offset = skip;
+	return wanted - bytes;
+}
+
 static size_t __iovec_copy_from_user_inatomic(char *vaddr,
 			const struct iovec *iov, size_t base, size_t bytes)
 {
@@ -414,12 +548,17 @@ static void memcpy_to_page(struct page *page, size_t offset, char *from, size_t
 	kunmap_atomic(to);
 }
 
-static size_t copy_page_to_iter_bvec(struct page *page, size_t offset, size_t bytes,
-			 struct iov_iter *i)
+static void memzero_page(struct page *page, size_t offset, size_t len)
+{
+	char *addr = kmap_atomic(page);
+	memset(addr + offset, 0, len);
+	kunmap_atomic(addr);
+}
+
+static size_t copy_to_iter_bvec(void *from, size_t bytes, struct iov_iter *i)
 {
 	size_t skip, copy, wanted;
 	const struct bio_vec *bvec;
-	void *kaddr, *from;
 
 	if (unlikely(bytes > i->count))
 		bytes = i->count;
@@ -432,8 +571,6 @@ static size_t copy_page_to_iter_bvec(struct page *page, size_t offset, size_t by
 	skip = i->iov_offset;
 	copy = min_t(size_t, bytes, bvec->bv_len - skip);
 
-	kaddr = kmap_atomic(page);
-	from = kaddr + offset;
 	memcpy_to_page(bvec->bv_page, skip + bvec->bv_offset, from, copy);
 	skip += copy;
 	from += copy;
@@ -446,7 +583,6 @@ static size_t copy_page_to_iter_bvec(struct page *page, size_t offset, size_t by
 		from += copy;
 		bytes -= copy;
 	}
-	kunmap_atomic(kaddr);
 	if (skip == bvec->bv_len) {
 		bvec++;
 		skip = 0;
@@ -458,12 +594,10 @@ static size_t copy_page_to_iter_bvec(struct page *page, size_t offset, size_t by
 	return wanted - bytes;
 }
 
-static size_t copy_page_from_iter_bvec(struct page *page, size_t offset, size_t bytes,
-			 struct iov_iter *i)
+static size_t copy_from_iter_bvec(void *to, size_t bytes, struct iov_iter *i)
 {
 	size_t skip, copy, wanted;
 	const struct bio_vec *bvec;
-	void *kaddr, *to;
 
 	if (unlikely(bytes > i->count))
 		bytes = i->count;
@@ -475,10 +609,6 @@ static size_t copy_page_from_iter_bvec(struct page *page, size_t offset, size_t
 	bvec = i->bvec;
 	skip = i->iov_offset;
 
-	kaddr = kmap_atomic(page);
-
-	to = kaddr + offset;
-
 	copy = min(bytes, bvec->bv_len - skip);
 
 	memcpy_from_page(to, bvec->bv_page, bvec->bv_offset + skip, copy);
@@ -495,7 +625,6 @@ static size_t copy_page_from_iter_bvec(struct page *page, size_t offset, size_t
 		to += copy;
 		bytes -= copy;
 	}
-	kunmap_atomic(kaddr);
 	if (skip == bvec->bv_len) {
 		bvec++;
 		skip = 0;
@@ -507,6 +636,61 @@ static size_t copy_page_from_iter_bvec(struct page *page, size_t offset, size_t
 	return wanted;
 }
 
+static size_t copy_page_to_iter_bvec(struct page *page, size_t offset,
+					size_t bytes, struct iov_iter *i)
+{
+	void *kaddr = kmap_atomic(page);
+	size_t wanted = copy_to_iter_bvec(kaddr + offset, bytes, i);
+	kunmap_atomic(kaddr);
+	return wanted;
+}
+
+static size_t copy_page_from_iter_bvec(struct page *page, size_t offset,
+					size_t bytes, struct iov_iter *i)
+{
+	void *kaddr = kmap_atomic(page);
+	size_t wanted = copy_from_iter_bvec(kaddr + offset, bytes, i);
+	kunmap_atomic(kaddr);
+	return wanted;
+}
+
+static size_t zero_bvec(size_t bytes, struct iov_iter *i)
+{
+	size_t skip, copy, wanted;
+	const struct bio_vec *bvec;
+
+	if (unlikely(bytes > i->count))
+		bytes = i->count;
+
+	if (unlikely(!bytes))
+		return 0;
+
+	wanted = bytes;
+	bvec = i->bvec;
+	skip = i->iov_offset;
+	copy = min_t(size_t, bytes, bvec->bv_len - skip);
+
+	memzero_page(bvec->bv_page, skip + bvec->bv_offset, copy);
+	skip += copy;
+	bytes -= copy;
+	while (bytes) {
+		bvec++;
+		copy = min(bytes, (size_t)bvec->bv_len);
+		memzero_page(bvec->bv_page, bvec->bv_offset, copy);
+		skip = copy;
+		bytes -= copy;
+	}
+	if (skip == bvec->bv_len) {
+		bvec++;
+		skip = 0;
+	}
+	i->count -= wanted - bytes;
+	i->nr_segs -= bvec - i->bvec;
+	i->bvec = bvec;
+	i->iov_offset = skip;
+	return wanted - bytes;
+}
+
 static size_t copy_from_user_bvec(struct page *page,
 		struct iov_iter *i, unsigned long offset, size_t bytes)
 {
@@ -672,6 +856,34 @@ size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
 }
 EXPORT_SYMBOL(copy_page_from_iter);
 
+size_t copy_to_iter(void *addr, size_t bytes, struct iov_iter *i)
+{
+	if (i->type & ITER_BVEC)
+		return copy_to_iter_bvec(addr, bytes, i);
+	else
+		return copy_to_iter_iovec(addr, bytes, i);
+}
+EXPORT_SYMBOL(copy_to_iter);
+
+size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
+{
+	if (i->type & ITER_BVEC)
+		return copy_from_iter_bvec(addr, bytes, i);
+	else
+		return copy_from_iter_iovec(addr, bytes, i);
+}
+EXPORT_SYMBOL(copy_from_iter);
+
+size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
+{
+	if (i->type & ITER_BVEC) {
+		return zero_bvec(bytes, i);
+	} else {
+		return zero_iovec(bytes, i);
+	}
+}
+EXPORT_SYMBOL(iov_iter_zero);
+
 size_t iov_iter_copy_from_user_atomic(struct page *page,
 		struct iov_iter *i, unsigned long offset, size_t bytes)
 {
-- 
cgit v1.2.3


From 5e6123f3477e4260fb14392f0a88f1a842fa4d42 Mon Sep 17 00:00:00 2001
From: Seunghun Lee <waydi1@gmail.com>
Date: Sun, 14 Sep 2014 22:15:10 +0900
Subject: vfs: move getname() from callers to do_mount()

It would make more sense to pass char __user * instead of
char * in callers of do_mount() and do getname() inside do_mount().

Suggested-by: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: Seunghun Lee <waydi1@gmail.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/alpha/kernel/osf_sys.c | 23 ++++++++++-------------
 fs/compat.c                 | 20 ++++++--------------
 fs/namespace.c              | 19 +++----------------
 include/linux/fs.h          |  3 ++-
 4 files changed, 21 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index 1402fcc11c2c..f9c732e18284 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -446,7 +446,8 @@ struct procfs_args {
  * unhappy with OSF UFS. [CHECKME]
  */
 static int
-osf_ufs_mount(const char *dirname, struct ufs_args __user *args, int flags)
+osf_ufs_mount(const char __user *dirname,
+	      struct ufs_args __user *args, int flags)
 {
 	int retval;
 	struct cdfs_args tmp;
@@ -466,7 +467,8 @@ osf_ufs_mount(const char *dirname, struct ufs_args __user *args, int flags)
 }
 
 static int
-osf_cdfs_mount(const char *dirname, struct cdfs_args __user *args, int flags)
+osf_cdfs_mount(const char __user *dirname,
+	       struct cdfs_args __user *args, int flags)
 {
 	int retval;
 	struct cdfs_args tmp;
@@ -486,7 +488,8 @@ osf_cdfs_mount(const char *dirname, struct cdfs_args __user *args, int flags)
 }
 
 static int
-osf_procfs_mount(const char *dirname, struct procfs_args __user *args, int flags)
+osf_procfs_mount(const char __user *dirname,
+		 struct procfs_args __user *args, int flags)
 {
 	struct procfs_args tmp;
 
@@ -500,28 +503,22 @@ SYSCALL_DEFINE4(osf_mount, unsigned long, typenr, const char __user *, path,
 		int, flag, void __user *, data)
 {
 	int retval;
-	struct filename *name;
 
-	name = getname(path);
-	retval = PTR_ERR(name);
-	if (IS_ERR(name))
-		goto out;
 	switch (typenr) {
 	case 1:
-		retval = osf_ufs_mount(name->name, data, flag);
+		retval = osf_ufs_mount(path, data, flag);
 		break;
 	case 6:
-		retval = osf_cdfs_mount(name->name, data, flag);
+		retval = osf_cdfs_mount(path, data, flag);
 		break;
 	case 9:
-		retval = osf_procfs_mount(name->name, data, flag);
+		retval = osf_procfs_mount(path, data, flag);
 		break;
 	default:
 		retval = -EINVAL;
 		printk("osf_mount(%ld, %x)\n", typenr, flag);
 	}
-	putname(name);
- out:
+
 	return retval;
 }
 
diff --git a/fs/compat.c b/fs/compat.c
index 6205c247a6e3..b13df99f3534 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -794,7 +794,6 @@ COMPAT_SYSCALL_DEFINE5(mount, const char __user *, dev_name,
 	char *kernel_type;
 	unsigned long data_page;
 	char *kernel_dev;
-	struct filename *dir;
 	int retval;
 
 	kernel_type = copy_mount_string(type);
@@ -802,19 +801,14 @@ COMPAT_SYSCALL_DEFINE5(mount, const char __user *, dev_name,
 	if (IS_ERR(kernel_type))
 		goto out;
 
-	dir = getname(dir_name);
-	retval = PTR_ERR(dir);
-	if (IS_ERR(dir))
-		goto out1;
-
 	kernel_dev = copy_mount_string(dev_name);
 	retval = PTR_ERR(kernel_dev);
 	if (IS_ERR(kernel_dev))
-		goto out2;
+		goto out1;
 
 	retval = copy_mount_options(data, &data_page);
 	if (retval < 0)
-		goto out3;
+		goto out2;
 
 	retval = -EINVAL;
 
@@ -823,19 +817,17 @@ COMPAT_SYSCALL_DEFINE5(mount, const char __user *, dev_name,
 			do_ncp_super_data_conv((void *)data_page);
 		} else if (!strcmp(kernel_type, NFS4_NAME)) {
 			if (do_nfs4_super_data_conv((void *) data_page))
-				goto out4;
+				goto out3;
 		}
 	}
 
-	retval = do_mount(kernel_dev, dir->name, kernel_type,
+	retval = do_mount(kernel_dev, dir_name, kernel_type,
 			flags, (void*)data_page);
 
- out4:
-	free_page(data_page);
  out3:
-	kfree(kernel_dev);
+	free_page(data_page);
  out2:
-	putname(dir);
+	kfree(kernel_dev);
  out1:
 	kfree(kernel_type);
  out:
diff --git a/fs/namespace.c b/fs/namespace.c
index abd3abb52616..348562f14e93 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2533,7 +2533,7 @@ char *copy_mount_string(const void __user *data)
  * Therefore, if this magic number is present, it carries no information
  * and must be discarded.
  */
-long do_mount(const char *dev_name, const char *dir_name,
+long do_mount(const char *dev_name, const char __user *dir_name,
 		const char *type_page, unsigned long flags, void *data_page)
 {
 	struct path path;
@@ -2545,15 +2545,11 @@ long do_mount(const char *dev_name, const char *dir_name,
 		flags &= ~MS_MGC_MSK;
 
 	/* Basic sanity checks */
-
-	if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE))
-		return -EINVAL;
-
 	if (data_page)
 		((char *)data_page)[PAGE_SIZE - 1] = 0;
 
 	/* ... and get the mountpoint */
-	retval = kern_path(dir_name, LOOKUP_FOLLOW, &path);
+	retval = user_path(dir_name, &path);
 	if (retval)
 		return retval;
 
@@ -2778,7 +2774,6 @@ SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
 {
 	int ret;
 	char *kernel_type;
-	struct filename *kernel_dir;
 	char *kernel_dev;
 	unsigned long data_page;
 
@@ -2787,12 +2782,6 @@ SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
 	if (IS_ERR(kernel_type))
 		goto out_type;
 
-	kernel_dir = getname(dir_name);
-	if (IS_ERR(kernel_dir)) {
-		ret = PTR_ERR(kernel_dir);
-		goto out_dir;
-	}
-
 	kernel_dev = copy_mount_string(dev_name);
 	ret = PTR_ERR(kernel_dev);
 	if (IS_ERR(kernel_dev))
@@ -2802,15 +2791,13 @@ SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
 	if (ret < 0)
 		goto out_data;
 
-	ret = do_mount(kernel_dev, kernel_dir->name, kernel_type, flags,
+	ret = do_mount(kernel_dev, dir_name, kernel_type, flags,
 		(void *) data_page);
 
 	free_page(data_page);
 out_data:
 	kfree(kernel_dev);
 out_dev:
-	putname(kernel_dir);
-out_dir:
 	kfree(kernel_type);
 out_type:
 	return ret;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 75bdd51ec9b8..92148361bef8 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1855,7 +1855,8 @@ extern struct vfsmount *kern_mount_data(struct file_system_type *, void *data);
 extern void kern_unmount(struct vfsmount *mnt);
 extern int may_umount_tree(struct vfsmount *);
 extern int may_umount(struct vfsmount *);
-extern long do_mount(const char *, const char *, const char *, unsigned long, void *);
+extern long do_mount(const char *, const char __user *,
+		     const char *, unsigned long, void *);
 extern struct vfsmount *collect_mounts(struct path *);
 extern void drop_collected_mounts(struct vfsmount *);
 extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *,
-- 
cgit v1.2.3


From fd22f78cf7b95102d8e5b988afe27165e47471fc Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagig@mellanox.com>
Date: Wed, 13 Aug 2014 19:54:29 +0300
Subject: IB/mlx5: Use enumerations for PI copy mask

In case input and output space parameters match, we can use a copy
mask from input and output space.  Use enums for those.

Signed-off-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/infiniband/hw/mlx5/qp.c | 6 +++---
 include/linux/mlx5/qp.h         | 3 +++
 2 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 8c574b63d77b..554410c2655d 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -2095,11 +2095,11 @@ static int mlx5_set_bsf(struct ib_mr *sig_mr,
 			/* Same block structure */
 			basic->bsf_size_sbs = 1 << 4;
 			if (mem->sig.dif.bg_type == wire->sig.dif.bg_type)
-				basic->wire.copy_byte_mask |= 0xc0;
+				basic->wire.copy_byte_mask |= MLX5_CPY_GRD_MASK;
 			if (mem->sig.dif.app_tag == wire->sig.dif.app_tag)
-				basic->wire.copy_byte_mask |= 0x30;
+				basic->wire.copy_byte_mask |= MLX5_CPY_APP_MASK;
 			if (mem->sig.dif.ref_tag == wire->sig.dif.ref_tag)
-				basic->wire.copy_byte_mask |= 0x0f;
+				basic->wire.copy_byte_mask |= MLX5_CPY_REF_MASK;
 		} else
 			basic->wire.bs_selector = bs_selector(wire->sig.dif.pi_interval);
 
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index 9709b30e2d69..4aa5634dc210 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -40,6 +40,9 @@
 #define MLX5_SIG_WQE_SIZE	(MLX5_SEND_WQE_BB * 5)
 #define MLX5_DIF_SIZE		8
 #define MLX5_STRIDE_BLOCK_OP	0x400
+#define MLX5_CPY_GRD_MASK	0xc0
+#define MLX5_CPY_APP_MASK	0x30
+#define MLX5_CPY_REF_MASK	0x0f
 
 enum mlx5_qp_optpar {
 	MLX5_QP_OPTPAR_ALT_ADDR_PATH		= 1 << 0,
-- 
cgit v1.2.3


From 142537f4e5f7ffd3e34b0c46646ac9cb5d986d06 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagig@mellanox.com>
Date: Wed, 13 Aug 2014 19:54:32 +0300
Subject: IB/mlx5: Use extended internal signature layout

Rather than using the basic BSF layout which utilizes a pre-configured
signature settings (sufficient for current DIF implementation), we use
the extended BSF layout to expose advanced signature settings. These
settings will also be exposed to the user later.

Signed-off-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/infiniband/hw/mlx5/qp.c | 80 +++++++++++++++++++----------------------
 include/linux/mlx5/qp.h         | 32 ++++++++++++-----
 2 files changed, 61 insertions(+), 51 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 554410c2655d..13924a256290 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -2020,53 +2020,47 @@ static u8 bs_selector(int block_size)
 	}
 }
 
-static int format_selector(struct ib_sig_attrs *attr,
-			   struct ib_sig_domain *domain,
-			   int *selector)
+static int mlx5_fill_inl_bsf(struct ib_sig_domain *domain,
+			     struct mlx5_bsf_inl *inl)
 {
-
-#define FORMAT_DIF_NONE		0
-#define FORMAT_DIF_CRC_INC	8
-#define FORMAT_DIF_CRC_NO_INC	12
-#define FORMAT_DIF_CSUM_INC	13
-#define FORMAT_DIF_CSUM_NO_INC	14
+	/* Valid inline section and allow BSF refresh */
+	inl->vld_refresh = cpu_to_be16(MLX5_BSF_INL_VALID |
+				       MLX5_BSF_REFRESH_DIF);
+	inl->dif_apptag = cpu_to_be16(domain->sig.dif.app_tag);
+	inl->dif_reftag = cpu_to_be32(domain->sig.dif.ref_tag);
 
 	switch (domain->sig.dif.type) {
 	case IB_T10DIF_NONE:
 		/* No DIF */
-		*selector = FORMAT_DIF_NONE;
 		break;
 	case IB_T10DIF_TYPE1: /* Fall through */
 	case IB_T10DIF_TYPE2:
-		switch (domain->sig.dif.bg_type) {
-		case IB_T10DIF_CRC:
-			*selector = FORMAT_DIF_CRC_INC;
-			break;
-		case IB_T10DIF_CSUM:
-			*selector = FORMAT_DIF_CSUM_INC;
-			break;
-		default:
-			return 1;
-		}
+		inl->sig_type = domain->sig.dif.bg_type == IB_T10DIF_CRC ?
+				MLX5_DIF_CRC : MLX5_DIF_IPCS;
+		/*
+		 * increment reftag and don't check if
+		 * apptag=0xffff and reftag=0xffffffff
+		 */
+		inl->dif_inc_ref_guard_check = MLX5_BSF_INC_REFTAG |
+					       MLX5_BSF_APPREF_ESCAPE;
+		inl->dif_app_bitmask_check = 0xffff;
+		/* repeating block */
+		inl->rp_inv_seed = MLX5_BSF_REPEAT_BLOCK;
 		break;
 	case IB_T10DIF_TYPE3:
-		switch (domain->sig.dif.bg_type) {
-		case IB_T10DIF_CRC:
-			*selector = domain->sig.dif.type3_inc_reftag ?
-					   FORMAT_DIF_CRC_INC :
-					   FORMAT_DIF_CRC_NO_INC;
-			break;
-		case IB_T10DIF_CSUM:
-			*selector = domain->sig.dif.type3_inc_reftag ?
-					   FORMAT_DIF_CSUM_INC :
-					   FORMAT_DIF_CSUM_NO_INC;
-			break;
-		default:
-			return 1;
-		}
+		inl->sig_type = domain->sig.dif.bg_type == IB_T10DIF_CRC ?
+				MLX5_DIF_CRC : MLX5_DIF_IPCS;
+		/*
+		 * Don't inc reftag and don't check if
+		 * apptag=0xffff and reftag=0xffffffff
+		 */
+		inl->dif_inc_ref_guard_check = MLX5_BSF_APPREF_ESCAPE;
+		inl->dif_app_bitmask_check = 0xffff;
+		/* Repeating block */
+		inl->rp_inv_seed = MLX5_BSF_REPEAT_BLOCK;
 		break;
 	default:
-		return 1;
+		return -EINVAL;
 	}
 
 	return 0;
@@ -2080,7 +2074,7 @@ static int mlx5_set_bsf(struct ib_mr *sig_mr,
 	struct mlx5_bsf_basic *basic = &bsf->basic;
 	struct ib_sig_domain *mem = &sig_attrs->mem;
 	struct ib_sig_domain *wire = &sig_attrs->wire;
-	int ret, selector;
+	int ret;
 
 	memset(bsf, 0, sizeof(*bsf));
 	switch (sig_attrs->mem.sig_type) {
@@ -2088,12 +2082,14 @@ static int mlx5_set_bsf(struct ib_mr *sig_mr,
 		if (sig_attrs->wire.sig_type != IB_SIG_TYPE_T10_DIF)
 			return -EINVAL;
 
+		/* Basic + Extended + Inline */
+		basic->bsf_size_sbs = 1 << 7;
 		/* Input domain check byte mask */
 		basic->check_byte_mask = sig_attrs->check_mask;
 		if (mem->sig.dif.pi_interval == wire->sig.dif.pi_interval &&
 		    mem->sig.dif.type == wire->sig.dif.type) {
 			/* Same block structure */
-			basic->bsf_size_sbs = 1 << 4;
+			basic->bsf_size_sbs |= 1 << 4;
 			if (mem->sig.dif.bg_type == wire->sig.dif.bg_type)
 				basic->wire.copy_byte_mask |= MLX5_CPY_GRD_MASK;
 			if (mem->sig.dif.app_tag == wire->sig.dif.app_tag)
@@ -2105,18 +2101,16 @@ static int mlx5_set_bsf(struct ib_mr *sig_mr,
 
 		basic->mem.bs_selector = bs_selector(mem->sig.dif.pi_interval);
 		basic->raw_data_size = cpu_to_be32(data_size);
+		basic->m_bfs_psv = cpu_to_be32(msig->psv_memory.psv_idx);
+		basic->w_bfs_psv = cpu_to_be32(msig->psv_wire.psv_idx);
 
-		ret = format_selector(sig_attrs, mem, &selector);
+		ret = mlx5_fill_inl_bsf(wire, &bsf->w_inl);
 		if (ret)
 			return -EINVAL;
-		basic->m_bfs_psv = cpu_to_be32(selector << 24 |
-					       msig->psv_memory.psv_idx);
 
-		ret = format_selector(sig_attrs, wire, &selector);
+		ret = mlx5_fill_inl_bsf(mem, &bsf->m_inl);
 		if (ret)
 			return -EINVAL;
-		basic->w_bfs_psv = cpu_to_be32(selector << 24 |
-					       msig->psv_wire.psv_idx);
 		break;
 
 	default:
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index 4aa5634dc210..69f5378455b7 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -43,6 +43,12 @@
 #define MLX5_CPY_GRD_MASK	0xc0
 #define MLX5_CPY_APP_MASK	0x30
 #define MLX5_CPY_REF_MASK	0x0f
+#define MLX5_BSF_INC_REFTAG	(1 << 6)
+#define MLX5_BSF_INL_VALID	(1 << 15)
+#define MLX5_BSF_REFRESH_DIF	(1 << 14)
+#define MLX5_BSF_REPEAT_BLOCK	(1 << 7)
+#define MLX5_BSF_APPTAG_ESCAPE	0x1
+#define MLX5_BSF_APPREF_ESCAPE	0x2
 
 enum mlx5_qp_optpar {
 	MLX5_QP_OPTPAR_ALT_ADDR_PATH		= 1 << 0,
@@ -290,6 +296,22 @@ struct mlx5_wqe_inline_seg {
 	__be32	byte_count;
 };
 
+enum mlx5_sig_type {
+	MLX5_DIF_CRC = 0x1,
+	MLX5_DIF_IPCS = 0x2,
+};
+
+struct mlx5_bsf_inl {
+	__be16		vld_refresh;
+	__be16		dif_apptag;
+	__be32		dif_reftag;
+	u8		sig_type;
+	u8		rp_inv_seed;
+	u8		rsvd[3];
+	u8		dif_inc_ref_guard_check;
+	__be16		dif_app_bitmask_check;
+};
+
 struct mlx5_bsf {
 	struct mlx5_bsf_basic {
 		u8		bsf_size_sbs;
@@ -313,14 +335,8 @@ struct mlx5_bsf {
 		__be32		w_tfs_psv;
 		__be32		m_tfs_psv;
 	} ext;
-	struct mlx5_bsf_inl {
-		__be32		w_inl_vld;
-		__be32		w_rsvd;
-		__be64		w_block_format;
-		__be32		m_inl_vld;
-		__be32		m_rsvd;
-		__be64		m_block_format;
-	} inl;
+	struct mlx5_bsf_inl	w_inl;
+	struct mlx5_bsf_inl	m_inl;
 };
 
 struct mlx5_klm {
-- 
cgit v1.2.3


From fe0f49768d807a8fe6336b097feb8c4441951710 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Tue, 30 Sep 2014 17:37:52 +0200
Subject: s390/nohz: use a per-cpu flag for arch_needs_cpu

Move the nohz_delay bit from the s390_idle data structure to the
per-cpu flags. Clear the nohz delay flag in __cpu_disable and
remove the cpu hotplug notifier that used to do this.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/cputime.h   |  8 --------
 arch/s390/include/asm/processor.h |  4 ++++
 arch/s390/kernel/irq.c            |  2 +-
 arch/s390/kernel/smp.c            |  1 +
 arch/s390/kernel/vtime.c          | 19 +------------------
 drivers/s390/cio/airq.c           |  2 +-
 drivers/s390/cio/cio.c            |  2 +-
 include/linux/tick.h              |  2 +-
 kernel/time/tick-sched.c          |  2 +-
 9 files changed, 11 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h
index f65bd3634519..01887b1fade5 100644
--- a/arch/s390/include/asm/cputime.h
+++ b/arch/s390/include/asm/cputime.h
@@ -166,7 +166,6 @@ static inline clock_t cputime64_to_clock_t(cputime64_t cputime)
 }
 
 struct s390_idle_data {
-	int nohz_delay;
 	unsigned int sequence;
 	unsigned long long idle_count;
 	unsigned long long idle_time;
@@ -182,11 +181,4 @@ cputime64_t s390_get_idle_time(int cpu);
 
 #define arch_idle_time(cpu) s390_get_idle_time(cpu)
 
-static inline int s390_nohz_delay(int cpu)
-{
-	return __get_cpu_var(s390_idle).nohz_delay != 0;
-}
-
-#define arch_needs_cpu(cpu) s390_nohz_delay(cpu)
-
 #endif /* _S390_CPUTIME_H */
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index e568fc8a7250..bc796d73129b 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -13,9 +13,11 @@
 
 #define CIF_MCCK_PENDING	0	/* machine check handling is pending */
 #define CIF_ASCE		1	/* user asce needs fixup / uaccess */
+#define CIF_NOHZ_DELAY		2	/* delay HZ disable for a tick */
 
 #define _CIF_MCCK_PENDING	(1<<CIF_MCCK_PENDING)
 #define _CIF_ASCE		(1<<CIF_ASCE)
+#define _CIF_NOHZ_DELAY		(1<<CIF_NOHZ_DELAY)
 
 
 #ifndef __ASSEMBLY__
@@ -43,6 +45,8 @@ static inline int test_cpu_flag(int flag)
 	return !!(S390_lowcore.cpu_flags & (1U << flag));
 }
 
+#define arch_needs_cpu() test_cpu_flag(CIF_NOHZ_DELAY)
+
 /*
  * Default implementation of macro that returns current
  * instruction pointer ("program counter").
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index 051574ee5366..1b8a38ab7861 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -259,7 +259,7 @@ static irqreturn_t do_ext_interrupt(int irq, void *dummy)
 
 	ext_code = *(struct ext_code *) &regs->int_code;
 	if (ext_code.code != EXT_IRQ_CLK_COMP)
-		__get_cpu_var(s390_idle).nohz_delay = 1;
+		set_cpu_flag(CIF_NOHZ_DELAY);
 
 	index = ext_hash(ext_code.code);
 	rcu_read_lock();
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index abec97b4ddbf..46317d6951c4 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -720,6 +720,7 @@ int __cpu_disable(void)
 	cregs[6]  &= ~0xff000000UL;	/* disable all I/O interrupts */
 	cregs[14] &= ~0x1f000000UL;	/* disable most machine checks */
 	__ctl_load(cregs, 0, 15);
+	clear_cpu_flag(CIF_NOHZ_DELAY);
 	return 0;
 }
 
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 8c34363d6f1e..40709821abde 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -163,7 +163,7 @@ void __kprobes vtime_stop_cpu(void)
 	/* Wait for external, I/O or machine check interrupt. */
 	psw_mask = PSW_KERNEL_BITS | PSW_MASK_WAIT | PSW_MASK_DAT |
 		PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
-	idle->nohz_delay = 0;
+	clear_cpu_flag(CIF_NOHZ_DELAY);
 
 	/* Call the assembler magic in entry.S */
 	psw_idle(idle, psw_mask);
@@ -378,25 +378,8 @@ void init_cpu_vtimer(void)
 	set_vtimer(VTIMER_MAX_SLICE);
 }
 
-static int s390_nohz_notify(struct notifier_block *self, unsigned long action,
-			    void *hcpu)
-{
-	struct s390_idle_data *idle;
-	long cpu = (long) hcpu;
-
-	idle = &per_cpu(s390_idle, cpu);
-	switch (action & ~CPU_TASKS_FROZEN) {
-	case CPU_DYING:
-		idle->nohz_delay = 0;
-	default:
-		break;
-	}
-	return NOTIFY_OK;
-}
-
 void __init vtime_init(void)
 {
 	/* Enable cpu timer interrupts on the boot cpu. */
 	init_cpu_vtimer();
-	cpu_notifier(s390_nohz_notify, 0);
 }
diff --git a/drivers/s390/cio/airq.c b/drivers/s390/cio/airq.c
index 00bfbee0af9e..56eb4ee4deba 100644
--- a/drivers/s390/cio/airq.c
+++ b/drivers/s390/cio/airq.c
@@ -87,7 +87,7 @@ static irqreturn_t do_airq_interrupt(int irq, void *dummy)
 	struct airq_struct *airq;
 	struct hlist_head *head;
 
-	__this_cpu_write(s390_idle.nohz_delay, 1);
+	set_cpu_flag(CIF_NOHZ_DELAY);
 	tpi_info = (struct tpi_info *) &get_irq_regs()->int_code;
 	head = &airq_lists[tpi_info->isc];
 	rcu_read_lock();
diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c
index 2905d8b0ec95..d5a6f287d2fe 100644
--- a/drivers/s390/cio/cio.c
+++ b/drivers/s390/cio/cio.c
@@ -561,7 +561,7 @@ static irqreturn_t do_cio_interrupt(int irq, void *dummy)
 	struct subchannel *sch;
 	struct irb *irb;
 
-	__this_cpu_write(s390_idle.nohz_delay, 1);
+	set_cpu_flag(CIF_NOHZ_DELAY);
 	tpi_info = (struct tpi_info *) &get_irq_regs()->int_code;
 	irb = &__get_cpu_var(cio_irb);
 	sch = (struct subchannel *)(unsigned long) tpi_info->intparm;
diff --git a/include/linux/tick.h b/include/linux/tick.h
index 9a82c7dc3fdd..e5832d03da19 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -108,7 +108,7 @@ extern struct tick_sched *tick_get_tick_sched(int cpu);
 extern void tick_irq_enter(void);
 extern int tick_oneshot_mode_active(void);
 #  ifndef arch_needs_cpu
-#   define arch_needs_cpu(cpu) (0)
+#   define arch_needs_cpu() (0)
 #  endif
 # else
 static inline void tick_clock_notify(void) { }
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index f654a8a298fa..01d512fd45f1 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -572,7 +572,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
 	} while (read_seqretry(&jiffies_lock, seq));
 
 	if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) ||
-	    arch_needs_cpu(cpu) || irq_work_needs_cpu()) {
+	    arch_needs_cpu() || irq_work_needs_cpu()) {
 		next_jiffies = last_jiffies + 1;
 		delta_jiffies = 1;
 	} else {
-- 
cgit v1.2.3


From 33ac9dba859b07d40e9ec826057d20c857fdede5 Mon Sep 17 00:00:00 2001
From: Maarten ter Huurne <maarten@treewalker.org>
Date: Tue, 9 Sep 2014 13:46:28 +0200
Subject: fonts: Add 6x10 font

This font is suitable for framebuffer consoles on devices with a
320x240 screen, to get a reasonable number of characters (53x24) that
are still at a readable size.

The font is derived from the existing 6x11 font, but gets 3 extra
lines without sacrificing readability. Also I redesigned a some glyhps
so they are more distinct and better fill the available space.

Signed-off-by: Maarten ter Huurne <maarten@treewalker.org>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
---
 include/linux/font.h  |    4 +-
 lib/fonts/Kconfig     |    9 +
 lib/fonts/Makefile    |    1 +
 lib/fonts/font_6x10.c | 3086 +++++++++++++++++++++++++++++++++++++++++++++++++
 lib/fonts/fonts.c     |    4 +
 5 files changed, 3103 insertions(+), 1 deletion(-)
 create mode 100644 lib/fonts/font_6x10.c

(limited to 'include/linux')

diff --git a/include/linux/font.h b/include/linux/font.h
index 40a24ab41b36..d6821769dd1e 100644
--- a/include/linux/font.h
+++ b/include/linux/font.h
@@ -31,6 +31,7 @@ struct font_desc {
 #define SUN12x22_IDX	7
 #define ACORN8x8_IDX	8
 #define	MINI4x6_IDX	9
+#define FONT6x10_IDX	10
 
 extern const struct font_desc	font_vga_8x8,
 			font_vga_8x16,
@@ -41,7 +42,8 @@ extern const struct font_desc	font_vga_8x8,
 			font_sun_8x16,
 			font_sun_12x22,
 			font_acorn_8x8,
-			font_mini_4x6;
+			font_mini_4x6,
+			font_6x10;
 
 /* Find a font with a specific name */
 
diff --git a/lib/fonts/Kconfig b/lib/fonts/Kconfig
index 34fd931b54b5..e77dfe00de36 100644
--- a/lib/fonts/Kconfig
+++ b/lib/fonts/Kconfig
@@ -79,6 +79,14 @@ config FONT_MINI_4x6
 	bool "Mini 4x6 font"
 	depends on !SPARC && FONTS
 
+config FONT_6x10
+	bool "Medium-size 6x10 font"
+	depends on !SPARC && FONTS
+	help
+	  Medium-size console font. Suitable for framebuffer consoles on
+	  embedded devices with a 320x240 screen, to get a reasonable number
+	  of characters (53x24) that are still at a readable size.
+
 config FONT_SUN8x16
 	bool "Sparc console 8x16 font"
 	depends on FRAMEBUFFER_CONSOLE && (!SPARC && FONTS || SPARC)
@@ -109,6 +117,7 @@ config FONT_AUTOSELECT
 	depends on !FONT_PEARL_8x8
 	depends on !FONT_ACORN_8x8
 	depends on !FONT_MINI_4x6
+	depends on !FONT_6x10
 	depends on !FONT_SUN8x16
 	depends on !FONT_SUN12x22
 	depends on !FONT_10x18
diff --git a/lib/fonts/Makefile b/lib/fonts/Makefile
index 2761560f3f15..e04d010cfbf5 100644
--- a/lib/fonts/Makefile
+++ b/lib/fonts/Makefile
@@ -12,6 +12,7 @@ font-objs-$(CONFIG_FONT_10x18)     += font_10x18.o
 font-objs-$(CONFIG_FONT_PEARL_8x8) += font_pearl_8x8.o
 font-objs-$(CONFIG_FONT_ACORN_8x8) += font_acorn_8x8.o
 font-objs-$(CONFIG_FONT_MINI_4x6)  += font_mini_4x6.o
+font-objs-$(CONFIG_FONT_6x10)      += font_6x10.o
 
 font-objs += $(font-objs-y)
 
diff --git a/lib/fonts/font_6x10.c b/lib/fonts/font_6x10.c
new file mode 100644
index 000000000000..b20620904d31
--- /dev/null
+++ b/lib/fonts/font_6x10.c
@@ -0,0 +1,3086 @@
+#include <linux/font.h>
+
+static const unsigned char fontdata_6x10[] = {
+
+	/* 0 0x00 '^@' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 1 0x01 '^A' */
+	0x00, /* 00000000 */
+	0x78, /* 01111000 */
+	0x84, /* 10000100 */
+	0xCC, /* 11001100 */
+	0x84, /* 10000100 */
+	0xCC, /* 11001100 */
+	0xB4, /* 10110100 */
+	0x78, /* 01111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 2 0x02 '^B' */
+	0x00, /* 00000000 */
+	0x78, /* 01111000 */
+	0xFC, /* 11111100 */
+	0xB4, /* 10110100 */
+	0xFC, /* 11111100 */
+	0xB4, /* 10110100 */
+	0xCC, /* 11001100 */
+	0x78, /* 01111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 3 0x03 '^C' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x28, /* 00101000 */
+	0x7C, /* 01111100 */
+	0x7C, /* 01111100 */
+	0x38, /* 00111000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 4 0x04 '^D' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x38, /* 00111000 */
+	0x7C, /* 01111100 */
+	0x38, /* 00111000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 5 0x05 '^E' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x38, /* 00111000 */
+	0x6C, /* 01101100 */
+	0x6C, /* 01101100 */
+	0x10, /* 00010000 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 6 0x06 '^F' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x38, /* 00111000 */
+	0x7C, /* 01111100 */
+	0x7C, /* 01111100 */
+	0x10, /* 00010000 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 7 0x07 '^G' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x30, /* 00110000 */
+	0x78, /* 01111000 */
+	0x30, /* 00110000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 8 0x08 '^H' */
+	0xFC, /* 11111100 */
+	0xFC, /* 11111100 */
+	0xFC, /* 11111100 */
+	0xCC, /* 11001100 */
+	0x84, /* 10000100 */
+	0xCC, /* 11001100 */
+	0xFC, /* 11111100 */
+	0xFC, /* 11111100 */
+	0xFC, /* 11111100 */
+	0xFC, /* 11111100 */
+
+	/* 9 0x09 '^I' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x30, /* 00110000 */
+	0x48, /* 01001000 */
+	0x84, /* 10000100 */
+	0x48, /* 01001000 */
+	0x30, /* 00110000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 10 0x0A '^J' */
+	0xFC, /* 11111100 */
+	0xFC, /* 11111100 */
+	0xCC, /* 11001100 */
+	0xB4, /* 10110100 */
+	0x78, /* 01111000 */
+	0xB4, /* 10110100 */
+	0xCC, /* 11001100 */
+	0xFC, /* 11111100 */
+	0xFC, /* 11111100 */
+	0xFC, /* 11111100 */
+
+	/* 11 0x0B '^K' */
+	0x00, /* 00000000 */
+	0x3C, /* 00111100 */
+	0x14, /* 00010100 */
+	0x20, /* 00100000 */
+	0x78, /* 01111000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 12 0x0C '^L' */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x38, /* 00111000 */
+	0x10, /* 00010000 */
+	0x38, /* 00111000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 13 0x0D '^M' */
+	0x00, /* 00000000 */
+	0x18, /* 00011000 */
+	0x14, /* 00010100 */
+	0x14, /* 00010100 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x70, /* 01110000 */
+	0x60, /* 01100000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 14 0x0E '^N' */
+	0x00, /* 00000000 */
+	0x3C, /* 00111100 */
+	0x24, /* 00100100 */
+	0x3C, /* 00111100 */
+	0x24, /* 00100100 */
+	0x24, /* 00100100 */
+	0x6C, /* 01101100 */
+	0x6C, /* 01101100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 15 0x0F '^O' */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x54, /* 01010100 */
+	0x38, /* 00111000 */
+	0x6C, /* 01101100 */
+	0x38, /* 00111000 */
+	0x54, /* 01010100 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 16 0x10 '^P' */
+	0x00, /* 00000000 */
+	0x40, /* 01000000 */
+	0x60, /* 01100000 */
+	0x70, /* 01110000 */
+	0x78, /* 01111000 */
+	0x70, /* 01110000 */
+	0x60, /* 01100000 */
+	0x40, /* 01000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 17 0x11 '^Q' */
+	0x00, /* 00000000 */
+	0x04, /* 00000100 */
+	0x0C, /* 00001100 */
+	0x1C, /* 00011100 */
+	0x3C, /* 00111100 */
+	0x1C, /* 00011100 */
+	0x0C, /* 00001100 */
+	0x04, /* 00000100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 18 0x12 '^R' */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x38, /* 00111000 */
+	0x54, /* 01010100 */
+	0x10, /* 00010000 */
+	0x54, /* 01010100 */
+	0x38, /* 00111000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 19 0x13 '^S' */
+	0x00, /* 00000000 */
+	0x48, /* 01001000 */
+	0x48, /* 01001000 */
+	0x48, /* 01001000 */
+	0x48, /* 01001000 */
+	0x48, /* 01001000 */
+	0x00, /* 00000000 */
+	0x48, /* 01001000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 20 0x14 '^T' */
+	0x3C, /* 00111100 */
+	0x54, /* 01010100 */
+	0x54, /* 01010100 */
+	0x54, /* 01010100 */
+	0x3C, /* 00111100 */
+	0x14, /* 00010100 */
+	0x14, /* 00010100 */
+	0x14, /* 00010100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 21 0x15 '^U' */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x20, /* 00100000 */
+	0x50, /* 01010000 */
+	0x48, /* 01001000 */
+	0x24, /* 00100100 */
+	0x14, /* 00010100 */
+	0x08, /* 00001000 */
+	0x44, /* 01000100 */
+	0x38, /* 00111000 */
+
+	/* 22 0x16 '^V' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0xF8, /* 11111000 */
+	0xF8, /* 11111000 */
+	0xF8, /* 11111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 23 0x17 '^W' */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x38, /* 00111000 */
+	0x54, /* 01010100 */
+	0x10, /* 00010000 */
+	0x54, /* 01010100 */
+	0x38, /* 00111000 */
+	0x10, /* 00010000 */
+	0x7C, /* 01111100 */
+	0x00, /* 00000000 */
+
+	/* 24 0x18 '^X' */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x38, /* 00111000 */
+	0x54, /* 01010100 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 25 0x19 '^Y' */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x54, /* 01010100 */
+	0x38, /* 00111000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 26 0x1A '^Z' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x08, /* 00001000 */
+	0x7C, /* 01111100 */
+	0x08, /* 00001000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 27 0x1B '^[' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x20, /* 00100000 */
+	0x7C, /* 01111100 */
+	0x20, /* 00100000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 28 0x1C '^\' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x78, /* 01111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 29 0x1D '^]' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x48, /* 01001000 */
+	0x84, /* 10000100 */
+	0xFC, /* 11111100 */
+	0x84, /* 10000100 */
+	0x48, /* 01001000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 30 0x1E '^^' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x38, /* 00111000 */
+	0x38, /* 00111000 */
+	0x7C, /* 01111100 */
+	0x7C, /* 01111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 31 0x1F '^_' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x7C, /* 01111100 */
+	0x7C, /* 01111100 */
+	0x38, /* 00111000 */
+	0x38, /* 00111000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 32 0x20 ' ' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 33 0x21 '!' */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 34 0x22 '"' */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 35 0x23 '#' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x28, /* 00101000 */
+	0x7C, /* 01111100 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x7C, /* 01111100 */
+	0x28, /* 00101000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 36 0x24 '$' */
+	0x10, /* 00010000 */
+	0x38, /* 00111000 */
+	0x54, /* 01010100 */
+	0x50, /* 01010000 */
+	0x38, /* 00111000 */
+	0x14, /* 00010100 */
+	0x54, /* 01010100 */
+	0x38, /* 00111000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+
+	/* 37 0x25 '%' */
+	0x00, /* 00000000 */
+	0x64, /* 01100100 */
+	0x64, /* 01100100 */
+	0x08, /* 00001000 */
+	0x10, /* 00010000 */
+	0x20, /* 00100000 */
+	0x4C, /* 01001100 */
+	0x4C, /* 01001100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 38 0x26 '&' */
+	0x00, /* 00000000 */
+	0x30, /* 00110000 */
+	0x48, /* 01001000 */
+	0x50, /* 01010000 */
+	0x20, /* 00100000 */
+	0x54, /* 01010100 */
+	0x48, /* 01001000 */
+	0x34, /* 00110100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 39 0x27 ''' */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 40 0x28 '(' */
+	0x08, /* 00001000 */
+	0x10, /* 00010000 */
+	0x20, /* 00100000 */
+	0x20, /* 00100000 */
+	0x20, /* 00100000 */
+	0x20, /* 00100000 */
+	0x20, /* 00100000 */
+	0x10, /* 00010000 */
+	0x08, /* 00001000 */
+	0x00, /* 00000000 */
+
+	/* 41 0x29 ')' */
+	0x20, /* 00100000 */
+	0x10, /* 00010000 */
+	0x08, /* 00001000 */
+	0x08, /* 00001000 */
+	0x08, /* 00001000 */
+	0x08, /* 00001000 */
+	0x08, /* 00001000 */
+	0x10, /* 00010000 */
+	0x20, /* 00100000 */
+	0x00, /* 00000000 */
+
+	/* 42 0x2A '*' */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x54, /* 01010100 */
+	0x38, /* 00111000 */
+	0x54, /* 01010100 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 43 0x2B '+' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x7C, /* 01111100 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 44 0x2C ',' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x30, /* 00110000 */
+	0x30, /* 00110000 */
+	0x10, /* 00010000 */
+	0x20, /* 00100000 */
+
+	/* 45 0x2D '-' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x7C, /* 01111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 46 0x2E '.' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x18, /* 00011000 */
+	0x18, /* 00011000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 47 0x2F '/' */
+	0x04, /* 00000100 */
+	0x04, /* 00000100 */
+	0x08, /* 00001000 */
+	0x08, /* 00001000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x20, /* 00100000 */
+	0x20, /* 00100000 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+
+	/* 48 0x30 '0' */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x4C, /* 01001100 */
+	0x54, /* 01010100 */
+	0x64, /* 01100100 */
+	0x44, /* 01000100 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 49 0x31 '1' */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x30, /* 00110000 */
+	0x50, /* 01010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x7C, /* 01111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 50 0x32 '2' */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x04, /* 00000100 */
+	0x08, /* 00001000 */
+	0x10, /* 00010000 */
+	0x20, /* 00100000 */
+	0x7C, /* 01111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 51 0x33 '3' */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x04, /* 00000100 */
+	0x18, /* 00011000 */
+	0x04, /* 00000100 */
+	0x44, /* 01000100 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 52 0x34 '4' */
+	0x00, /* 00000000 */
+	0x08, /* 00001000 */
+	0x18, /* 00011000 */
+	0x28, /* 00101000 */
+	0x48, /* 01001000 */
+	0x7C, /* 01111100 */
+	0x08, /* 00001000 */
+	0x08, /* 00001000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 53 0x35 '5' */
+	0x00, /* 00000000 */
+	0x7C, /* 01111100 */
+	0x40, /* 01000000 */
+	0x78, /* 01111000 */
+	0x04, /* 00000100 */
+	0x04, /* 00000100 */
+	0x44, /* 01000100 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 54 0x36 '6' */
+	0x00, /* 00000000 */
+	0x18, /* 00011000 */
+	0x20, /* 00100000 */
+	0x40, /* 01000000 */
+	0x78, /* 01111000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 55 0x37 '7' */
+	0x00, /* 00000000 */
+	0x7C, /* 01111100 */
+	0x04, /* 00000100 */
+	0x04, /* 00000100 */
+	0x08, /* 00001000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 56 0x38 '8' */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 57 0x39 '9' */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x3C, /* 00111100 */
+	0x04, /* 00000100 */
+	0x08, /* 00001000 */
+	0x30, /* 00110000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 58 0x3A ':' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x18, /* 00011000 */
+	0x18, /* 00011000 */
+	0x00, /* 00000000 */
+	0x18, /* 00011000 */
+	0x18, /* 00011000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 59 0x3B ';' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x30, /* 00110000 */
+	0x30, /* 00110000 */
+	0x00, /* 00000000 */
+	0x30, /* 00110000 */
+	0x30, /* 00110000 */
+	0x10, /* 00010000 */
+	0x20, /* 00100000 */
+
+	/* 60 0x3C '<' */
+	0x00, /* 00000000 */
+	0x04, /* 00000100 */
+	0x08, /* 00001000 */
+	0x10, /* 00010000 */
+	0x20, /* 00100000 */
+	0x10, /* 00010000 */
+	0x08, /* 00001000 */
+	0x04, /* 00000100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 61 0x3D '=' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x7C, /* 01111100 */
+	0x00, /* 00000000 */
+	0x7C, /* 01111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 62 0x3E '>' */
+	0x00, /* 00000000 */
+	0x20, /* 00100000 */
+	0x10, /* 00010000 */
+	0x08, /* 00001000 */
+	0x04, /* 00000100 */
+	0x08, /* 00001000 */
+	0x10, /* 00010000 */
+	0x20, /* 00100000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 63 0x3F '?' */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x04, /* 00000100 */
+	0x08, /* 00001000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 64 0x40 '@' */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x5C, /* 01011100 */
+	0x54, /* 01010100 */
+	0x5C, /* 01011100 */
+	0x40, /* 01000000 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 65 0x41 'A' */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x28, /* 00101000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x7C, /* 01111100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 66 0x42 'B' */
+	0x00, /* 00000000 */
+	0x78, /* 01111000 */
+	0x24, /* 00100100 */
+	0x24, /* 00100100 */
+	0x38, /* 00111000 */
+	0x24, /* 00100100 */
+	0x24, /* 00100100 */
+	0x78, /* 01111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 67 0x43 'C' */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x44, /* 01000100 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 68 0x44 'D' */
+	0x00, /* 00000000 */
+	0x78, /* 01111000 */
+	0x24, /* 00100100 */
+	0x24, /* 00100100 */
+	0x24, /* 00100100 */
+	0x24, /* 00100100 */
+	0x24, /* 00100100 */
+	0x78, /* 01111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 69 0x45 'E' */
+	0x00, /* 00000000 */
+	0x7C, /* 01111100 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x78, /* 01111000 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x7C, /* 01111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 70 0x46 'F' */
+	0x00, /* 00000000 */
+	0x7C, /* 01111100 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x78, /* 01111000 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 71 0x47 'G' */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x40, /* 01000000 */
+	0x5C, /* 01011100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 72 0x48 'H' */
+	0x00, /* 00000000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x7C, /* 01111100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 73 0x49 'I' */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 74 0x4A 'J' */
+	0x00, /* 00000000 */
+	0x1C, /* 00011100 */
+	0x08, /* 00001000 */
+	0x08, /* 00001000 */
+	0x08, /* 00001000 */
+	0x48, /* 01001000 */
+	0x48, /* 01001000 */
+	0x30, /* 00110000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 75 0x4B 'K' */
+	0x00, /* 00000000 */
+	0x44, /* 01000100 */
+	0x48, /* 01001000 */
+	0x50, /* 01010000 */
+	0x60, /* 01100000 */
+	0x50, /* 01010000 */
+	0x48, /* 01001000 */
+	0x44, /* 01000100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 76 0x4C 'L' */
+	0x00, /* 00000000 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x7C, /* 01111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 77 0x4D 'M' */
+	0x00, /* 00000000 */
+	0x44, /* 01000100 */
+	0x6C, /* 01101100 */
+	0x54, /* 01010100 */
+	0x54, /* 01010100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 78 0x4E 'N' */
+	0x00, /* 00000000 */
+	0x44, /* 01000100 */
+	0x64, /* 01100100 */
+	0x54, /* 01010100 */
+	0x4C, /* 01001100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 79 0x4F 'O' */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 80 0x50 'P' */
+	0x00, /* 00000000 */
+	0x78, /* 01111000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x78, /* 01111000 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 81 0x51 'Q' */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x54, /* 01010100 */
+	0x48, /* 01001000 */
+	0x34, /* 00110100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 82 0x52 'R' */
+	0x00, /* 00000000 */
+	0x78, /* 01111000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x78, /* 01111000 */
+	0x50, /* 01010000 */
+	0x48, /* 01001000 */
+	0x44, /* 01000100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 83 0x53 'S' */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x40, /* 01000000 */
+	0x38, /* 00111000 */
+	0x04, /* 00000100 */
+	0x44, /* 01000100 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 84 0x54 'T' */
+	0x00, /* 00000000 */
+	0x7C, /* 01111100 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 85 0x55 'U' */
+	0x00, /* 00000000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 86 0x56 'V' */
+	0x00, /* 00000000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x28, /* 00101000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 87 0x57 'W' */
+	0x00, /* 00000000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x54, /* 01010100 */
+	0x54, /* 01010100 */
+	0x6C, /* 01101100 */
+	0x44, /* 01000100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 88 0x58 'X' */
+	0x00, /* 00000000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x28, /* 00101000 */
+	0x10, /* 00010000 */
+	0x28, /* 00101000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 89 0x59 'Y' */
+	0x00, /* 00000000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x28, /* 00101000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 90 0x5A 'Z' */
+	0x00, /* 00000000 */
+	0x7C, /* 01111100 */
+	0x04, /* 00000100 */
+	0x08, /* 00001000 */
+	0x10, /* 00010000 */
+	0x20, /* 00100000 */
+	0x40, /* 01000000 */
+	0x7C, /* 01111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 91 0x5B '[' */
+	0x18, /* 00011000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x18, /* 00011000 */
+	0x00, /* 00000000 */
+
+	/* 92 0x5C '\' */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x20, /* 00100000 */
+	0x20, /* 00100000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x08, /* 00001000 */
+	0x08, /* 00001000 */
+	0x04, /* 00000100 */
+	0x04, /* 00000100 */
+
+	/* 93 0x5D ']' */
+	0x30, /* 00110000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x30, /* 00110000 */
+	0x00, /* 00000000 */
+
+	/* 94 0x5E '^' */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x28, /* 00101000 */
+	0x44, /* 01000100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 95 0x5F '_' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x7C, /* 01111100 */
+	0x00, /* 00000000 */
+
+	/* 96 0x60 '`' */
+	0x20, /* 00100000 */
+	0x10, /* 00010000 */
+	0x08, /* 00001000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 97 0x61 'a' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x04, /* 00000100 */
+	0x3C, /* 00111100 */
+	0x44, /* 01000100 */
+	0x3C, /* 00111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 98 0x62 'b' */
+	0x00, /* 00000000 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x58, /* 01011000 */
+	0x64, /* 01100100 */
+	0x44, /* 01000100 */
+	0x64, /* 01100100 */
+	0x58, /* 01011000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 99 0x63 'c' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x40, /* 01000000 */
+	0x44, /* 01000100 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 100 0x64 'd' */
+	0x00, /* 00000000 */
+	0x04, /* 00000100 */
+	0x04, /* 00000100 */
+	0x34, /* 00110100 */
+	0x4C, /* 01001100 */
+	0x44, /* 01000100 */
+	0x4C, /* 01001100 */
+	0x34, /* 00110100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 101 0x65 'e' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x7C, /* 01111100 */
+	0x40, /* 01000000 */
+	0x3C, /* 00111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 102 0x66 'f' */
+	0x00, /* 00000000 */
+	0x0C, /* 00001100 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x38, /* 00111000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 103 0x67 'g' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x34, /* 00110100 */
+	0x4C, /* 01001100 */
+	0x44, /* 01000100 */
+	0x4C, /* 01001100 */
+	0x34, /* 00110100 */
+	0x04, /* 00000100 */
+	0x38, /* 00111000 */
+
+	/* 104 0x68 'h' */
+	0x00, /* 00000000 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x78, /* 01111000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 105 0x69 'i' */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x30, /* 00110000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 106 0x6A 'j' */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x30, /* 00110000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x60, /* 01100000 */
+
+	/* 107 0x6B 'k' */
+	0x00, /* 00000000 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x48, /* 01001000 */
+	0x50, /* 01010000 */
+	0x70, /* 01110000 */
+	0x48, /* 01001000 */
+	0x44, /* 01000100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 108 0x6C 'l' */
+	0x00, /* 00000000 */
+	0x30, /* 00110000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 109 0x6D 'm' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x68, /* 01101000 */
+	0x54, /* 01010100 */
+	0x54, /* 01010100 */
+	0x54, /* 01010100 */
+	0x54, /* 01010100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 110 0x6E 'n' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x58, /* 01011000 */
+	0x64, /* 01100100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 111 0x6F 'o' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 112 0x70 'p' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x58, /* 01011000 */
+	0x64, /* 01100100 */
+	0x44, /* 01000100 */
+	0x64, /* 01100100 */
+	0x58, /* 01011000 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+
+	/* 113 0x71 'q' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x34, /* 00110100 */
+	0x4C, /* 01001100 */
+	0x44, /* 01000100 */
+	0x4C, /* 01001100 */
+	0x34, /* 00110100 */
+	0x04, /* 00000100 */
+	0x04, /* 00000100 */
+
+	/* 114 0x72 'r' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x58, /* 01011000 */
+	0x64, /* 01100100 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 115 0x73 's' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x3C, /* 00111100 */
+	0x40, /* 01000000 */
+	0x38, /* 00111000 */
+	0x04, /* 00000100 */
+	0x78, /* 01111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 116 0x74 't' */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x38, /* 00111000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x0C, /* 00001100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 117 0x75 'u' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x4C, /* 01001100 */
+	0x34, /* 00110100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 118 0x76 'v' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x28, /* 00101000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 119 0x77 'w' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x54, /* 01010100 */
+	0x54, /* 01010100 */
+	0x54, /* 01010100 */
+	0x54, /* 01010100 */
+	0x28, /* 00101000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 120 0x78 'x' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x44, /* 01000100 */
+	0x28, /* 00101000 */
+	0x10, /* 00010000 */
+	0x28, /* 00101000 */
+	0x44, /* 01000100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 121 0x79 'y' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x3C, /* 00111100 */
+	0x04, /* 00000100 */
+	0x38, /* 00111000 */
+
+	/* 122 0x7A 'z' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x7C, /* 01111100 */
+	0x08, /* 00001000 */
+	0x10, /* 00010000 */
+	0x20, /* 00100000 */
+	0x7C, /* 01111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 123 0x7B '{' */
+	0x08, /* 00001000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x20, /* 00100000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x08, /* 00001000 */
+	0x00, /* 00000000 */
+
+	/* 124 0x7C '|' */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+
+	/* 125 0x7D '}' */
+	0x20, /* 00100000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x08, /* 00001000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x20, /* 00100000 */
+	0x00, /* 00000000 */
+
+	/* 126 0x7E '~' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x20, /* 00100000 */
+	0x54, /* 01010100 */
+	0x08, /* 00001000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 127 0x7F '' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x28, /* 00101000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x7C, /* 01111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 128 0x80 '\200' */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x44, /* 01000100 */
+	0x38, /* 00111000 */
+	0x10, /* 00010000 */
+	0x20, /* 00100000 */
+
+	/* 129 0x81 '\201' */
+	0x00, /* 00000000 */
+	0x28, /* 00101000 */
+	0x00, /* 00000000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x4C, /* 01001100 */
+	0x34, /* 00110100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 130 0x82 '\202' */
+	0x08, /* 00001000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x7C, /* 01111100 */
+	0x40, /* 01000000 */
+	0x3C, /* 00111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 131 0x83 '\203' */
+	0x10, /* 00010000 */
+	0x28, /* 00101000 */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x04, /* 00000100 */
+	0x3C, /* 00111100 */
+	0x44, /* 01000100 */
+	0x3C, /* 00111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 132 0x84 '\204' */
+	0x00, /* 00000000 */
+	0x28, /* 00101000 */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x04, /* 00000100 */
+	0x3C, /* 00111100 */
+	0x44, /* 01000100 */
+	0x3C, /* 00111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 133 0x85 '\205' */
+	0x10, /* 00010000 */
+	0x08, /* 00001000 */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x04, /* 00000100 */
+	0x3C, /* 00111100 */
+	0x44, /* 01000100 */
+	0x3C, /* 00111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 134 0x86 '\206' */
+	0x18, /* 00011000 */
+	0x24, /* 00100100 */
+	0x18, /* 00011000 */
+	0x38, /* 00111000 */
+	0x04, /* 00000100 */
+	0x3C, /* 00111100 */
+	0x44, /* 01000100 */
+	0x3C, /* 00111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 135 0x87 '\207' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x40, /* 01000000 */
+	0x44, /* 01000100 */
+	0x38, /* 00111000 */
+	0x10, /* 00010000 */
+	0x20, /* 00100000 */
+
+	/* 136 0x88 '\210' */
+	0x10, /* 00010000 */
+	0x28, /* 00101000 */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x7C, /* 01111100 */
+	0x40, /* 01000000 */
+	0x3C, /* 00111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 137 0x89 '\211' */
+	0x00, /* 00000000 */
+	0x28, /* 00101000 */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x7C, /* 01111100 */
+	0x40, /* 01000000 */
+	0x3C, /* 00111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 138 0x8A '\212' */
+	0x20, /* 00100000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x7C, /* 01111100 */
+	0x40, /* 01000000 */
+	0x3C, /* 00111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 139 0x8B '\213' */
+	0x00, /* 00000000 */
+	0x28, /* 00101000 */
+	0x00, /* 00000000 */
+	0x30, /* 00110000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 140 0x8C '\214' */
+	0x10, /* 00010000 */
+	0x28, /* 00101000 */
+	0x00, /* 00000000 */
+	0x30, /* 00110000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 141 0x8D '\215' */
+	0x20, /* 00100000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x30, /* 00110000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 142 0x8E '\216' */
+	0x44, /* 01000100 */
+	0x10, /* 00010000 */
+	0x28, /* 00101000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x7C, /* 01111100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 143 0x8F '\217' */
+	0x30, /* 00110000 */
+	0x48, /* 01001000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x7C, /* 01111100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 144 0x90 '\220' */
+	0x10, /* 00010000 */
+	0x7C, /* 01111100 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x78, /* 01111000 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x7C, /* 01111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 145 0x91 '\221' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x78, /* 01111000 */
+	0x14, /* 00010100 */
+	0x7C, /* 01111100 */
+	0x50, /* 01010000 */
+	0x3C, /* 00111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 146 0x92 '\222' */
+	0x00, /* 00000000 */
+	0x3C, /* 00111100 */
+	0x50, /* 01010000 */
+	0x50, /* 01010000 */
+	0x78, /* 01111000 */
+	0x50, /* 01010000 */
+	0x50, /* 01010000 */
+	0x5C, /* 01011100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 147 0x93 '\223' */
+	0x10, /* 00010000 */
+	0x28, /* 00101000 */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 148 0x94 '\224' */
+	0x00, /* 00000000 */
+	0x28, /* 00101000 */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 149 0x95 '\225' */
+	0x20, /* 00100000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 150 0x96 '\226' */
+	0x10, /* 00010000 */
+	0x28, /* 00101000 */
+	0x00, /* 00000000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x4C, /* 01001100 */
+	0x34, /* 00110100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 151 0x97 '\227' */
+	0x20, /* 00100000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x4C, /* 01001100 */
+	0x34, /* 00110100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 152 0x98 '\230' */
+	0x00, /* 00000000 */
+	0x28, /* 00101000 */
+	0x00, /* 00000000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x3C, /* 00111100 */
+	0x04, /* 00000100 */
+	0x38, /* 00111000 */
+
+	/* 153 0x99 '\231' */
+	0x84, /* 10000100 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 154 0x9A '\232' */
+	0x88, /* 10001000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 155 0x9B '\233' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x38, /* 00111000 */
+	0x54, /* 01010100 */
+	0x50, /* 01010000 */
+	0x54, /* 01010100 */
+	0x38, /* 00111000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+
+	/* 156 0x9C '\234' */
+	0x30, /* 00110000 */
+	0x48, /* 01001000 */
+	0x40, /* 01000000 */
+	0x70, /* 01110000 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x44, /* 01000100 */
+	0x78, /* 01111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 157 0x9D '\235' */
+	0x00, /* 00000000 */
+	0x44, /* 01000100 */
+	0x28, /* 00101000 */
+	0x7C, /* 01111100 */
+	0x10, /* 00010000 */
+	0x7C, /* 01111100 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 158 0x9E '\236' */
+	0x00, /* 00000000 */
+	0x70, /* 01110000 */
+	0x48, /* 01001000 */
+	0x70, /* 01110000 */
+	0x48, /* 01001000 */
+	0x5C, /* 01011100 */
+	0x48, /* 01001000 */
+	0x44, /* 01000100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 159 0x9F '\237' */
+	0x00, /* 00000000 */
+	0x0C, /* 00001100 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x38, /* 00111000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x60, /* 01100000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 160 0xA0 '\240' */
+	0x08, /* 00001000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x04, /* 00000100 */
+	0x3C, /* 00111100 */
+	0x44, /* 01000100 */
+	0x3C, /* 00111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 161 0xA1 '\241' */
+	0x08, /* 00001000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x30, /* 00110000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 162 0xA2 '\242' */
+	0x08, /* 00001000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 163 0xA3 '\243' */
+	0x08, /* 00001000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x4C, /* 01001100 */
+	0x34, /* 00110100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 164 0xA4 '\244' */
+	0x34, /* 00110100 */
+	0x58, /* 01011000 */
+	0x00, /* 00000000 */
+	0x58, /* 01011000 */
+	0x64, /* 01100100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 165 0xA5 '\245' */
+	0x58, /* 01011000 */
+	0x44, /* 01000100 */
+	0x64, /* 01100100 */
+	0x54, /* 01010100 */
+	0x4C, /* 01001100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 166 0xA6 '\246' */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x04, /* 00000100 */
+	0x3C, /* 00111100 */
+	0x44, /* 01000100 */
+	0x3C, /* 00111100 */
+	0x00, /* 00000000 */
+	0x7C, /* 01111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 167 0xA7 '\247' */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x7C, /* 01111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 168 0xA8 '\250' */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x20, /* 00100000 */
+	0x40, /* 01000000 */
+	0x44, /* 01000100 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 169 0xA9 '\251' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x7C, /* 01111100 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 170 0xAA '\252' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x7C, /* 01111100 */
+	0x04, /* 00000100 */
+	0x04, /* 00000100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 171 0xAB '\253' */
+	0x20, /* 00100000 */
+	0x60, /* 01100000 */
+	0x24, /* 00100100 */
+	0x28, /* 00101000 */
+	0x10, /* 00010000 */
+	0x28, /* 00101000 */
+	0x44, /* 01000100 */
+	0x08, /* 00001000 */
+	0x1C, /* 00011100 */
+	0x00, /* 00000000 */
+
+	/* 172 0xAC '\254' */
+	0x20, /* 00100000 */
+	0x60, /* 01100000 */
+	0x24, /* 00100100 */
+	0x28, /* 00101000 */
+	0x10, /* 00010000 */
+	0x28, /* 00101000 */
+	0x58, /* 01011000 */
+	0x3C, /* 00111100 */
+	0x08, /* 00001000 */
+	0x00, /* 00000000 */
+
+	/* 173 0xAD '\255' */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 174 0xAE '\256' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x24, /* 00100100 */
+	0x48, /* 01001000 */
+	0x90, /* 10010000 */
+	0x48, /* 01001000 */
+	0x24, /* 00100100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 175 0xAF '\257' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x90, /* 10010000 */
+	0x48, /* 01001000 */
+	0x24, /* 00100100 */
+	0x48, /* 01001000 */
+	0x90, /* 10010000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 176 0xB0 '\260' */
+	0x10, /* 00010000 */
+	0x44, /* 01000100 */
+	0x10, /* 00010000 */
+	0x44, /* 01000100 */
+	0x10, /* 00010000 */
+	0x44, /* 01000100 */
+	0x10, /* 00010000 */
+	0x44, /* 01000100 */
+	0x10, /* 00010000 */
+	0x44, /* 01000100 */
+
+	/* 177 0xB1 '\261' */
+	0xA8, /* 10101000 */
+	0x54, /* 01010100 */
+	0xA8, /* 10101000 */
+	0x54, /* 01010100 */
+	0xA8, /* 10101000 */
+	0x54, /* 01010100 */
+	0xA8, /* 10101000 */
+	0x54, /* 01010100 */
+	0xA8, /* 10101000 */
+	0x54, /* 01010100 */
+
+	/* 178 0xB2 '\262' */
+	0xDC, /* 11011100 */
+	0x74, /* 01110100 */
+	0xDC, /* 11011100 */
+	0x74, /* 01110100 */
+	0xDC, /* 11011100 */
+	0x74, /* 01110100 */
+	0xDC, /* 11011100 */
+	0x74, /* 01110100 */
+	0xDC, /* 11011100 */
+	0x74, /* 01110100 */
+
+	/* 179 0xB3 '\263' */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+
+	/* 180 0xB4 '\264' */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0xF0, /* 11110000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+
+	/* 181 0xB5 '\265' */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0xF0, /* 11110000 */
+	0x10, /* 00010000 */
+	0xF0, /* 11110000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+
+	/* 182 0xB6 '\266' */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0xE8, /* 11101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+
+	/* 183 0xB7 '\267' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0xF8, /* 11111000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+
+	/* 184 0xB8 '\270' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0xF0, /* 11110000 */
+	0x10, /* 00010000 */
+	0xF0, /* 11110000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+
+	/* 185 0xB9 '\271' */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0xE8, /* 11101000 */
+	0x08, /* 00001000 */
+	0xE8, /* 11101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+
+	/* 186 0xBA '\272' */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+
+	/* 187 0xBB '\273' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0xF8, /* 11111000 */
+	0x08, /* 00001000 */
+	0xE8, /* 11101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+
+	/* 188 0xBC '\274' */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0xE8, /* 11101000 */
+	0x08, /* 00001000 */
+	0xF8, /* 11111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 189 0xBD '\275' */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0xF8, /* 11111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 190 0xBE '\276' */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0xF0, /* 11110000 */
+	0x10, /* 00010000 */
+	0xF0, /* 11110000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 191 0xBF '\277' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0xF0, /* 11110000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+
+	/* 192 0xC0 '\300' */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x1C, /* 00011100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 193 0xC1 '\301' */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0xFC, /* 11111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 194 0xC2 '\302' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0xFC, /* 11111100 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+
+	/* 195 0xC3 '\303' */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x1C, /* 00011100 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+
+	/* 196 0xC4 '\304' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0xFC, /* 11111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 197 0xC5 '\305' */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0xFC, /* 11111100 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+
+	/* 198 0xC6 '\306' */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x1C, /* 00011100 */
+	0x10, /* 00010000 */
+	0x1C, /* 00011100 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+
+	/* 199 0xC7 '\307' */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x2C, /* 00101100 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+
+	/* 200 0xC8 '\310' */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x2C, /* 00101100 */
+	0x20, /* 00100000 */
+	0x3C, /* 00111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 201 0xC9 '\311' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x3C, /* 00111100 */
+	0x20, /* 00100000 */
+	0x2C, /* 00101100 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+
+	/* 202 0xCA '\312' */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0xEC, /* 11101100 */
+	0x00, /* 00000000 */
+	0xFC, /* 11111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 203 0xCB '\313' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0xFC, /* 11111100 */
+	0x00, /* 00000000 */
+	0xEC, /* 11101100 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+
+	/* 204 0xCC '\314' */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x2C, /* 00101100 */
+	0x20, /* 00100000 */
+	0x2C, /* 00101100 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+
+	/* 205 0xCD '\315' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0xFC, /* 11111100 */
+	0x00, /* 00000000 */
+	0xFC, /* 11111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 206 0xCE '\316' */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0xEC, /* 11101100 */
+	0x00, /* 00000000 */
+	0xEC, /* 11101100 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+
+	/* 207 0xCF '\317' */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0xFC, /* 11111100 */
+	0x00, /* 00000000 */
+	0xFC, /* 11111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 208 0xD0 '\320' */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0xFC, /* 11111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 209 0xD1 '\321' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0xFC, /* 11111100 */
+	0x00, /* 00000000 */
+	0xFC, /* 11111100 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+
+	/* 210 0xD2 '\322' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0xFC, /* 11111100 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+
+	/* 211 0xD3 '\323' */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x3C, /* 00111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 212 0xD4 '\324' */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x1C, /* 00011100 */
+	0x10, /* 00010000 */
+	0x1C, /* 00011100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 213 0xD5 '\325' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x1C, /* 00011100 */
+	0x10, /* 00010000 */
+	0x1C, /* 00011100 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+
+	/* 214 0xD6 '\326' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x3C, /* 00111100 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+
+	/* 215 0xD7 '\327' */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0xFC, /* 11111100 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+
+	/* 216 0xD8 '\330' */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0xFC, /* 11111100 */
+	0x10, /* 00010000 */
+	0xFC, /* 11111100 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+
+	/* 217 0xD9 '\331' */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0xF0, /* 11110000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 218 0xDA '\332' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x1C, /* 00011100 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+
+	/* 219 0xDB '\333' */
+	0xFC, /* 11111100 */
+	0xFC, /* 11111100 */
+	0xFC, /* 11111100 */
+	0xFC, /* 11111100 */
+	0xFC, /* 11111100 */
+	0xFC, /* 11111100 */
+	0xFC, /* 11111100 */
+	0xFC, /* 11111100 */
+	0xFC, /* 11111100 */
+	0xFC, /* 11111100 */
+
+	/* 220 0xDC '\334' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0xFC, /* 11111100 */
+	0xFC, /* 11111100 */
+	0xFC, /* 11111100 */
+	0xFC, /* 11111100 */
+	0xFC, /* 11111100 */
+
+	/* 221 0xDD '\335' */
+	0xE0, /* 11100000 */
+	0xE0, /* 11100000 */
+	0xE0, /* 11100000 */
+	0xE0, /* 11100000 */
+	0xE0, /* 11100000 */
+	0xE0, /* 11100000 */
+	0xE0, /* 11100000 */
+	0xE0, /* 11100000 */
+	0xE0, /* 11100000 */
+	0xE0, /* 11100000 */
+
+	/* 222 0xDE '\336' */
+	0x1C, /* 00011100 */
+	0x1C, /* 00011100 */
+	0x1C, /* 00011100 */
+	0x1C, /* 00011100 */
+	0x1C, /* 00011100 */
+	0x1C, /* 00011100 */
+	0x1C, /* 00011100 */
+	0x1C, /* 00011100 */
+	0x1C, /* 00011100 */
+	0x1C, /* 00011100 */
+
+	/* 223 0xDF '\337' */
+	0xFC, /* 11111100 */
+	0xFC, /* 11111100 */
+	0xFC, /* 11111100 */
+	0xFC, /* 11111100 */
+	0xFC, /* 11111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 224 0xE0 '\340' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x34, /* 00110100 */
+	0x48, /* 01001000 */
+	0x48, /* 01001000 */
+	0x48, /* 01001000 */
+	0x34, /* 00110100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 225 0xE1 '\341' */
+	0x18, /* 00011000 */
+	0x24, /* 00100100 */
+	0x44, /* 01000100 */
+	0x48, /* 01001000 */
+	0x48, /* 01001000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x58, /* 01011000 */
+	0x40, /* 01000000 */
+	0x00, /* 00000000 */
+
+	/* 226 0xE2 '\342' */
+	0x00, /* 00000000 */
+	0x7C, /* 01111100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 227 0xE3 '\343' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x7C, /* 01111100 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 228 0xE4 '\344' */
+	0x00, /* 00000000 */
+	0x7C, /* 01111100 */
+	0x24, /* 00100100 */
+	0x10, /* 00010000 */
+	0x08, /* 00001000 */
+	0x10, /* 00010000 */
+	0x24, /* 00100100 */
+	0x7C, /* 01111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 229 0xE5 '\345' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x3C, /* 00111100 */
+	0x48, /* 01001000 */
+	0x48, /* 01001000 */
+	0x48, /* 01001000 */
+	0x30, /* 00110000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 230 0xE6 '\346' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x48, /* 01001000 */
+	0x48, /* 01001000 */
+	0x48, /* 01001000 */
+	0x48, /* 01001000 */
+	0x74, /* 01110100 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+
+	/* 231 0xE7 '\347' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x7C, /* 01111100 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x0C, /* 00001100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 232 0xE8 '\350' */
+	0x38, /* 00111000 */
+	0x10, /* 00010000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x38, /* 00111000 */
+	0x10, /* 00010000 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 233 0xE9 '\351' */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x7C, /* 01111100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 234 0xEA '\352' */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x28, /* 00101000 */
+	0x6C, /* 01101100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 235 0xEB '\353' */
+	0x00, /* 00000000 */
+	0x18, /* 00011000 */
+	0x20, /* 00100000 */
+	0x18, /* 00011000 */
+	0x24, /* 00100100 */
+	0x24, /* 00100100 */
+	0x24, /* 00100100 */
+	0x18, /* 00011000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 236 0xEC '\354' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x54, /* 01010100 */
+	0x54, /* 01010100 */
+	0x54, /* 01010100 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 237 0xED '\355' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x04, /* 00000100 */
+	0x38, /* 00111000 */
+	0x54, /* 01010100 */
+	0x54, /* 01010100 */
+	0x38, /* 00111000 */
+	0x40, /* 01000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 238 0xEE '\356' */
+	0x00, /* 00000000 */
+	0x3C, /* 00111100 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x38, /* 00111000 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x3C, /* 00111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 239 0xEF '\357' */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 240 0xF0 '\360' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0xFC, /* 11111100 */
+	0x00, /* 00000000 */
+	0xFC, /* 11111100 */
+	0x00, /* 00000000 */
+	0xFC, /* 11111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 241 0xF1 '\361' */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x7C, /* 01111100 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x7C, /* 01111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 242 0xF2 '\362' */
+	0x00, /* 00000000 */
+	0x20, /* 00100000 */
+	0x10, /* 00010000 */
+	0x08, /* 00001000 */
+	0x10, /* 00010000 */
+	0x20, /* 00100000 */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 243 0xF3 '\363' */
+	0x00, /* 00000000 */
+	0x08, /* 00001000 */
+	0x10, /* 00010000 */
+	0x20, /* 00100000 */
+	0x10, /* 00010000 */
+	0x08, /* 00001000 */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 244 0xF4 '\364' */
+	0x00, /* 00000000 */
+	0x0C, /* 00001100 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+
+	/* 245 0xF5 '\365' */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x60, /* 01100000 */
+	0x00, /* 00000000 */
+
+	/* 246 0xF6 '\366' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x7C, /* 01111100 */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 247 0xF7 '\367' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x20, /* 00100000 */
+	0x54, /* 01010100 */
+	0x08, /* 00001000 */
+	0x20, /* 00100000 */
+	0x54, /* 01010100 */
+	0x08, /* 00001000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 248 0xF8 '\370' */
+	0x30, /* 00110000 */
+	0x48, /* 01001000 */
+	0x48, /* 01001000 */
+	0x30, /* 00110000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 249 0xF9 '\371' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x38, /* 00111000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 250 0xFA '\372' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 251 0xFB '\373' */
+	0x00, /* 00000000 */
+	0x04, /* 00000100 */
+	0x08, /* 00001000 */
+	0x08, /* 00001000 */
+	0x50, /* 01010000 */
+	0x50, /* 01010000 */
+	0x20, /* 00100000 */
+	0x20, /* 00100000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 252 0xFC '\374' */
+	0x60, /* 01100000 */
+	0x50, /* 01010000 */
+	0x50, /* 01010000 */
+	0x50, /* 01010000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 253 0xFD '\375' */
+	0x60, /* 01100000 */
+	0x10, /* 00010000 */
+	0x20, /* 00100000 */
+	0x70, /* 01110000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 254 0xFE '\376' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x38, /* 00111000 */
+	0x38, /* 00111000 */
+	0x38, /* 00111000 */
+	0x38, /* 00111000 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 255 0xFF '\377' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+};
+
+const struct font_desc font_6x10 = {
+	.idx	= FONT6x10_IDX,
+	.name	= "6x10",
+	.width	= 6,
+	.height	= 10,
+	.data	= fontdata_6x10,
+	.pref	= 0,
+};
diff --git a/lib/fonts/fonts.c b/lib/fonts/fonts.c
index f947189efe6d..823376ca0a8b 100644
--- a/lib/fonts/fonts.c
+++ b/lib/fonts/fonts.c
@@ -63,6 +63,10 @@ static const struct font_desc *fonts[] = {
 #undef NO_FONTS
     &font_mini_4x6,
 #endif
+#ifdef CONFIG_FONT_6x10
+#undef NO_FONTS
+    &font_6x10,
+#endif
 };
 
 #define num_fonts ARRAY_SIZE(fonts)
-- 
cgit v1.2.3


From b8839b8c55f3fdd60dc36abcda7e0266aff7985c Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Wed, 8 Oct 2014 18:26:13 -0400
Subject: block: fix alignment_offset math that assumes io_min is a power-of-2

The math in both blk_stack_limits() and queue_limit_alignment_offset()
assume that a block device's io_min (aka minimum_io_size) is always a
power-of-2.  Fix the math such that it works for non-power-of-2 io_min.

This issue (of alignment_offset != 0) became apparent when testing
dm-thinp with a thinp blocksize that matches a RAID6 stripesize of
1280K.  Commit fdfb4c8c1 ("dm thin: set minimum_io_size to pool's data
block size") unlocked the potential for alignment_offset != 0 due to
the dm-thin-pool's io_min possibly being a non-power-of-2.

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Cc: stable@vger.kernel.org
Acked-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-settings.c   | 4 ++--
 include/linux/blkdev.h | 5 ++---
 2 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-settings.c b/block/blk-settings.c
index f1a1795a5683..aa02247d227e 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -574,7 +574,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 		bottom = max(b->physical_block_size, b->io_min) + alignment;
 
 		/* Verify that top and bottom intervals line up */
-		if (max(top, bottom) & (min(top, bottom) - 1)) {
+		if (max(top, bottom) % min(top, bottom)) {
 			t->misaligned = 1;
 			ret = -1;
 		}
@@ -619,7 +619,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 
 	/* Find lowest common alignment_offset */
 	t->alignment_offset = lcm(t->alignment_offset, alignment)
-		& (max(t->physical_block_size, t->io_min) - 1);
+		% max(t->physical_block_size, t->io_min);
 
 	/* Verify that new alignment_offset is on a logical block boundary */
 	if (t->alignment_offset & (t->logical_block_size - 1)) {
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 038b40f84c7a..554639249fd6 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1279,10 +1279,9 @@ static inline int queue_alignment_offset(struct request_queue *q)
 static inline int queue_limit_alignment_offset(struct queue_limits *lim, sector_t sector)
 {
 	unsigned int granularity = max(lim->physical_block_size, lim->io_min);
-	unsigned int alignment = (sector << 9) & (granularity - 1);
+	unsigned int alignment = sector_div(sector, granularity >> 9) << 9;
 
-	return (granularity + lim->alignment_offset - alignment)
-		& (granularity - 1);
+	return (granularity + lim->alignment_offset - alignment) % granularity;
 }
 
 static inline int bdev_alignment_offset(struct block_device *bdev)
-- 
cgit v1.2.3


From 58cb65487e92b47448d00a711c9f5922137d5678 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Thu, 9 Oct 2014 15:25:54 -0700
Subject: proc/maps: make vm_is_stack() logic namespace-friendly

- Rename vm_is_stack() to task_of_stack() and change it to return
  "struct task_struct *" rather than the global (and thus wrong in
  general) pid_t.

- Add the new pid_of_stack() helper which calls task_of_stack() and
  uses the right namespace to report the correct pid_t.

  Unfortunately we need to define this helper twice, in task_mmu.c
  and in task_nommu.c. perhaps it makes sense to add fs/proc/util.c
  and move at least pid_of_stack/task_of_stack there to avoid the
  code duplication.

- Change show_map_vma() and show_numa_map() to use the new helper.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Greg Ungerer <gerg@uclinux.org>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/task_mmu.c   | 25 +++++++++++++++++++++----
 fs/proc/task_nommu.c | 21 ++++++++++++++++++++-
 include/linux/mm.h   |  4 ++--
 mm/util.c            | 23 ++++++++---------------
 4 files changed, 51 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 4793e4a843b0..adddf697c4ea 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -261,13 +261,31 @@ static int do_maps_open(struct inode *inode, struct file *file,
 				sizeof(struct proc_maps_private));
 }
 
+static pid_t pid_of_stack(struct proc_maps_private *priv,
+				struct vm_area_struct *vma, bool is_pid)
+{
+	struct inode *inode = priv->inode;
+	struct task_struct *task;
+	pid_t ret = 0;
+
+	rcu_read_lock();
+	task = pid_task(proc_pid(inode), PIDTYPE_PID);
+	if (task) {
+		task = task_of_stack(task, vma, is_pid);
+		if (task)
+			ret = task_pid_nr_ns(task, inode->i_sb->s_fs_info);
+	}
+	rcu_read_unlock();
+
+	return ret;
+}
+
 static void
 show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	struct file *file = vma->vm_file;
 	struct proc_maps_private *priv = m->private;
-	struct task_struct *task = priv->task;
 	vm_flags_t flags = vma->vm_flags;
 	unsigned long ino = 0;
 	unsigned long long pgoff = 0;
@@ -332,8 +350,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
 			goto done;
 		}
 
-		tid = vm_is_stack(task, vma, is_pid);
-
+		tid = pid_of_stack(priv, vma, is_pid);
 		if (tid != 0) {
 			/*
 			 * Thread stack in /proc/PID/task/TID/maps or
@@ -1446,7 +1463,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
 	} else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) {
 		seq_puts(m, " heap");
 	} else {
-		pid_t tid = vm_is_stack(task, vma, is_pid);
+		pid_t tid = pid_of_stack(proc_priv, vma, is_pid);
 		if (tid != 0) {
 			/*
 			 * Thread stack in /proc/PID/task/TID/maps or
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index f36e213835cc..599ec2e20104 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -123,6 +123,25 @@ unsigned long task_statm(struct mm_struct *mm,
 	return size;
 }
 
+static pid_t pid_of_stack(struct proc_maps_private *priv,
+				struct vm_area_struct *vma, bool is_pid)
+{
+	struct inode *inode = priv->inode;
+	struct task_struct *task;
+	pid_t ret = 0;
+
+	rcu_read_lock();
+	task = pid_task(proc_pid(inode), PIDTYPE_PID);
+	if (task) {
+		task = task_of_stack(task, vma, is_pid);
+		if (task)
+			ret = task_pid_nr_ns(task, inode->i_sb->s_fs_info);
+	}
+	rcu_read_unlock();
+
+	return ret;
+}
+
 /*
  * display a single VMA to a sequenced file
  */
@@ -163,7 +182,7 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma,
 		seq_pad(m, ' ');
 		seq_path(m, &file->f_path, "");
 	} else if (mm) {
-		pid_t tid = vm_is_stack(priv->task, vma, is_pid);
+		pid_t tid = pid_of_stack(priv, vma, is_pid);
 
 		if (tid != 0) {
 			seq_pad(m, ' ');
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0f4196a0bc20..28df70774b81 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1247,8 +1247,8 @@ static inline int stack_guard_page_end(struct vm_area_struct *vma,
 		!vma_growsup(vma->vm_next, addr);
 }
 
-extern pid_t
-vm_is_stack(struct task_struct *task, struct vm_area_struct *vma, int in_group);
+extern struct task_struct *task_of_stack(struct task_struct *task,
+				struct vm_area_struct *vma, bool in_group);
 
 extern unsigned long move_page_tables(struct vm_area_struct *vma,
 		unsigned long old_addr, struct vm_area_struct *new_vma,
diff --git a/mm/util.c b/mm/util.c
index 093c973f1697..fec39d4509a9 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -170,32 +170,25 @@ static int vm_is_stack_for_task(struct task_struct *t,
 /*
  * Check if the vma is being used as a stack.
  * If is_group is non-zero, check in the entire thread group or else
- * just check in the current task. Returns the pid of the task that
- * the vma is stack for.
+ * just check in the current task. Returns the task_struct of the task
+ * that the vma is stack for. Must be called under rcu_read_lock().
  */
-pid_t vm_is_stack(struct task_struct *task,
-		  struct vm_area_struct *vma, int in_group)
+struct task_struct *task_of_stack(struct task_struct *task,
+				struct vm_area_struct *vma, bool in_group)
 {
-	pid_t ret = 0;
-
 	if (vm_is_stack_for_task(task, vma))
-		return task->pid;
+		return task;
 
 	if (in_group) {
 		struct task_struct *t;
 
-		rcu_read_lock();
 		for_each_thread(task, t) {
-			if (vm_is_stack_for_task(t, vma)) {
-				ret = t->pid;
-				goto done;
-			}
+			if (vm_is_stack_for_task(t, vma))
+				return t;
 		}
-done:
-		rcu_read_unlock();
 	}
 
-	return ret;
+	return NULL;
 }
 
 #if defined(CONFIG_MMU) && !defined(HAVE_ARCH_PICK_MMAP_LAYOUT)
-- 
cgit v1.2.3


From 07f361b2bee38896df8be17d8c3f8af3f3610606 Mon Sep 17 00:00:00 2001
From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Date: Thu, 9 Oct 2014 15:26:00 -0700
Subject: mm/slab_common: move kmem_cache definition to internal header

We don't need to keep kmem_cache definition in include/linux/slab.h if we
don't need to inline kmem_cache_size().  According to my code inspection,
this function is only called at lc_create() in lib/lru_cache.c which may
be called at initialization phase of something, so we don't need to inline
it.  Therfore, move it to slab_common.c and move kmem_cache definition to
internal header.

After this change, we can change kmem_cache definition easily without full
kernel build.  For instance, we can turn on/off CONFIG_SLUB_STATS without
full kernel build.

[akpm@linux-foundation.org: export kmem_cache_size() to modules]
[rdunlap@infradead.org: add header files to fix kmemcheck.c build errors]
Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Acked-by: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slab.h | 42 +-----------------------------------------
 mm/kmemcheck.c       |  1 +
 mm/slab.h            | 35 +++++++++++++++++++++++++++++++++++
 mm/slab_common.c     |  9 +++++++++
 4 files changed, 46 insertions(+), 41 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 1d9abb7d22a0..9062e4ad1787 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -158,31 +158,6 @@ size_t ksize(const void *);
 #define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
 #endif
 
-#ifdef CONFIG_SLOB
-/*
- * Common fields provided in kmem_cache by all slab allocators
- * This struct is either used directly by the allocator (SLOB)
- * or the allocator must include definitions for all fields
- * provided in kmem_cache_common in their definition of kmem_cache.
- *
- * Once we can do anonymous structs (C11 standard) we could put a
- * anonymous struct definition in these allocators so that the
- * separate allocations in the kmem_cache structure of SLAB and
- * SLUB is no longer needed.
- */
-struct kmem_cache {
-	unsigned int object_size;/* The original size of the object */
-	unsigned int size;	/* The aligned/padded/added on size  */
-	unsigned int align;	/* Alignment as calculated */
-	unsigned long flags;	/* Active flags on the slab */
-	const char *name;	/* Slab name for sysfs */
-	int refcount;		/* Use counter */
-	void (*ctor)(void *);	/* Called on object slot creation */
-	struct list_head list;	/* List of all slab caches on the system */
-};
-
-#endif /* CONFIG_SLOB */
-
 /*
  * Kmalloc array related definitions
  */
@@ -363,14 +338,6 @@ kmem_cache_alloc_node_trace(struct kmem_cache *s,
 }
 #endif /* CONFIG_TRACING */
 
-#ifdef CONFIG_SLAB
-#include <linux/slab_def.h>
-#endif
-
-#ifdef CONFIG_SLUB
-#include <linux/slub_def.h>
-#endif
-
 extern void *kmalloc_order(size_t size, gfp_t flags, unsigned int order);
 
 #ifdef CONFIG_TRACING
@@ -650,14 +617,7 @@ static inline void *kzalloc_node(size_t size, gfp_t flags, int node)
 	return kmalloc_node(size, flags | __GFP_ZERO, node);
 }
 
-/*
- * Determine the size of a slab object
- */
-static inline unsigned int kmem_cache_size(struct kmem_cache *s)
-{
-	return s->object_size;
-}
-
+unsigned int kmem_cache_size(struct kmem_cache *s);
 void __init kmem_cache_init_late(void);
 
 #endif	/* _LINUX_SLAB_H */
diff --git a/mm/kmemcheck.c b/mm/kmemcheck.c
index fd814fd61319..cab58bb592d8 100644
--- a/mm/kmemcheck.c
+++ b/mm/kmemcheck.c
@@ -2,6 +2,7 @@
 #include <linux/mm_types.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
+#include "slab.h"
 #include <linux/kmemcheck.h>
 
 void kmemcheck_alloc_shadow(struct page *page, int order, gfp_t flags, int node)
diff --git a/mm/slab.h b/mm/slab.h
index 0e0fdd365840..026e7c393f0b 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -4,6 +4,41 @@
  * Internal slab definitions
  */
 
+#ifdef CONFIG_SLOB
+/*
+ * Common fields provided in kmem_cache by all slab allocators
+ * This struct is either used directly by the allocator (SLOB)
+ * or the allocator must include definitions for all fields
+ * provided in kmem_cache_common in their definition of kmem_cache.
+ *
+ * Once we can do anonymous structs (C11 standard) we could put a
+ * anonymous struct definition in these allocators so that the
+ * separate allocations in the kmem_cache structure of SLAB and
+ * SLUB is no longer needed.
+ */
+struct kmem_cache {
+	unsigned int object_size;/* The original size of the object */
+	unsigned int size;	/* The aligned/padded/added on size  */
+	unsigned int align;	/* Alignment as calculated */
+	unsigned long flags;	/* Active flags on the slab */
+	const char *name;	/* Slab name for sysfs */
+	int refcount;		/* Use counter */
+	void (*ctor)(void *);	/* Called on object slot creation */
+	struct list_head list;	/* List of all slab caches on the system */
+};
+
+#endif /* CONFIG_SLOB */
+
+#ifdef CONFIG_SLAB
+#include <linux/slab_def.h>
+#endif
+
+#ifdef CONFIG_SLUB
+#include <linux/slub_def.h>
+#endif
+
+#include <linux/memcontrol.h>
+
 /*
  * State of the slab allocator.
  *
diff --git a/mm/slab_common.c b/mm/slab_common.c
index cabb842c4e7c..d7d8ffd0c306 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -30,6 +30,15 @@ LIST_HEAD(slab_caches);
 DEFINE_MUTEX(slab_mutex);
 struct kmem_cache *kmem_cache;
 
+/*
+ * Determine the size of a slab object
+ */
+unsigned int kmem_cache_size(struct kmem_cache *s)
+{
+	return s->object_size;
+}
+EXPORT_SYMBOL(kmem_cache_size);
+
 #ifdef CONFIG_DEBUG_VM
 static int kmem_cache_sanity_check(const char *name, size_t size)
 {
-- 
cgit v1.2.3


From 61f47105a2c9c60e950ca808b7560f776f9bfa31 Mon Sep 17 00:00:00 2001
From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Date: Thu, 9 Oct 2014 15:26:02 -0700
Subject: mm/sl[ao]b: always track caller in kmalloc_(node_)track_caller()

Now, we track caller if tracing or slab debugging is enabled.  If they are
disabled, we could save one argument passing overhead by calling
__kmalloc(_node)().  But, I think that it would be marginal.  Furthermore,
default slab allocator, SLUB, doesn't use this technique so I think that
it's okay to change this situation.

After this change, we can turn on/off CONFIG_DEBUG_SLAB without full
kernel build and remove some complicated '#if' defintion.  It looks more
benefitial to me.

Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Acked-by: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slab.h | 22 ----------------------
 mm/slab.c            | 18 ------------------
 mm/slob.c            |  2 --
 3 files changed, 42 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 9062e4ad1787..c265bec6a57d 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -549,37 +549,15 @@ static inline void *kcalloc(size_t n, size_t size, gfp_t flags)
  * allocator where we care about the real place the memory allocation
  * request comes from.
  */
-#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB) || \
-	(defined(CONFIG_SLAB) && defined(CONFIG_TRACING)) || \
-	(defined(CONFIG_SLOB) && defined(CONFIG_TRACING))
 extern void *__kmalloc_track_caller(size_t, gfp_t, unsigned long);
 #define kmalloc_track_caller(size, flags) \
 	__kmalloc_track_caller(size, flags, _RET_IP_)
-#else
-#define kmalloc_track_caller(size, flags) \
-	__kmalloc(size, flags)
-#endif /* DEBUG_SLAB */
 
 #ifdef CONFIG_NUMA
-/*
- * kmalloc_node_track_caller is a special version of kmalloc_node that
- * records the calling function of the routine calling it for slab leak
- * tracking instead of just the calling function (confusing, eh?).
- * It's useful when the call to kmalloc_node comes from a widely-used
- * standard allocator where we care about the real place the memory
- * allocation request comes from.
- */
-#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB) || \
-	(defined(CONFIG_SLAB) && defined(CONFIG_TRACING)) || \
-	(defined(CONFIG_SLOB) && defined(CONFIG_TRACING))
 extern void *__kmalloc_node_track_caller(size_t, gfp_t, int, unsigned long);
 #define kmalloc_node_track_caller(size, flags, node) \
 	__kmalloc_node_track_caller(size, flags, node, \
 			_RET_IP_)
-#else
-#define kmalloc_node_track_caller(size, flags, node) \
-	__kmalloc_node(size, flags, node)
-#endif
 
 #else /* CONFIG_NUMA */
 
diff --git a/mm/slab.c b/mm/slab.c
index 7c52b3890d25..c52bc5aa6ba0 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3496,7 +3496,6 @@ __do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller)
 	return kmem_cache_alloc_node_trace(cachep, flags, node, size);
 }
 
-#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_TRACING)
 void *__kmalloc_node(size_t size, gfp_t flags, int node)
 {
 	return __do_kmalloc_node(size, flags, node, _RET_IP_);
@@ -3509,13 +3508,6 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t flags,
 	return __do_kmalloc_node(size, flags, node, caller);
 }
 EXPORT_SYMBOL(__kmalloc_node_track_caller);
-#else
-void *__kmalloc_node(size_t size, gfp_t flags, int node)
-{
-	return __do_kmalloc_node(size, flags, node, 0);
-}
-EXPORT_SYMBOL(__kmalloc_node);
-#endif /* CONFIG_DEBUG_SLAB || CONFIG_TRACING */
 #endif /* CONFIG_NUMA */
 
 /**
@@ -3541,8 +3533,6 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
 	return ret;
 }
 
-
-#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_TRACING)
 void *__kmalloc(size_t size, gfp_t flags)
 {
 	return __do_kmalloc(size, flags, _RET_IP_);
@@ -3555,14 +3545,6 @@ void *__kmalloc_track_caller(size_t size, gfp_t flags, unsigned long caller)
 }
 EXPORT_SYMBOL(__kmalloc_track_caller);
 
-#else
-void *__kmalloc(size_t size, gfp_t flags)
-{
-	return __do_kmalloc(size, flags, 0);
-}
-EXPORT_SYMBOL(__kmalloc);
-#endif
-
 /**
  * kmem_cache_free - Deallocate an object
  * @cachep: The cache the allocation was from.
diff --git a/mm/slob.c b/mm/slob.c
index 21980e0f39a8..96a86206a26b 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -468,7 +468,6 @@ void *__kmalloc(size_t size, gfp_t gfp)
 }
 EXPORT_SYMBOL(__kmalloc);
 
-#ifdef CONFIG_TRACING
 void *__kmalloc_track_caller(size_t size, gfp_t gfp, unsigned long caller)
 {
 	return __do_kmalloc_node(size, gfp, NUMA_NO_NODE, caller);
@@ -481,7 +480,6 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfp,
 	return __do_kmalloc_node(size, gfp, node, caller);
 }
 #endif
-#endif
 
 void kfree(const void *block)
 {
-- 
cgit v1.2.3


From ad2c8144418c6a81cefe65379fd47bbe8344cef2 Mon Sep 17 00:00:00 2001
From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Date: Thu, 9 Oct 2014 15:26:13 -0700
Subject: topology: add support for node_to_mem_node() to determine the
 fallback node

Anton noticed (http://www.spinics.net/lists/linux-mm/msg67489.html) that
on ppc LPARs with memoryless nodes, a large amount of memory was consumed
by slabs and was marked unreclaimable.  He tracked it down to slab
deactivations in the SLUB core when we allocate remotely, leading to poor
efficiency always when memoryless nodes are present.

After much discussion, Joonsoo provided a few patches that help
significantly.  They don't resolve the problem altogether:

 - memory hotplug still needs testing, that is when a memoryless node
   becomes memory-ful, we want to dtrt
 - there are other reasons for going off-node than memoryless nodes,
   e.g., fully exhausted local nodes

Neither case is resolved with this series, but I don't think that should
block their acceptance, as they can be explored/resolved with follow-on
patches.

The series consists of:

[1/3] topology: add support for node_to_mem_node() to determine the
      fallback node

[2/3] slub: fallback to node_to_mem_node() node if allocating on
      memoryless node

      - Joonsoo's patches to cache the nearest node with memory for each
        NUMA node

[3/3] Partial revert of 81c98869faa5 (""kthread: ensure locality of
      task_struct allocations")

 - At Tejun's request, keep the knowledge of memoryless node fallback
   to the allocator core.

This patch (of 3):

We need to determine the fallback node in slub allocator if the allocation
target node is memoryless node.  Without it, the SLUB wrongly select the
node which has no memory and can't use a partial slab, because of node
mismatch.  Introduced function, node_to_mem_node(X), will return a node Y
with memory that has the nearest distance.  If X is memoryless node, it
will return nearest distance node, but, if X is normal node, it will
return itself.

We will use this function in following patch to determine the fallback
node.

Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Nishanth Aravamudan <nacc@linux.vnet.ibm.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Han Pingtian <hanpt@linux.vnet.ibm.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Anton Blanchard <anton@samba.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Wanpeng Li <liwanp@linux.vnet.ibm.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/topology.h | 17 +++++++++++++++++
 mm/page_alloc.c          |  1 +
 2 files changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/topology.h b/include/linux/topology.h
index dda6ee521e74..909b6e43b694 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -119,11 +119,20 @@ static inline int numa_node_id(void)
  * Use the accessor functions set_numa_mem(), numa_mem_id() and cpu_to_mem().
  */
 DECLARE_PER_CPU(int, _numa_mem_);
+extern int _node_numa_mem_[MAX_NUMNODES];
 
 #ifndef set_numa_mem
 static inline void set_numa_mem(int node)
 {
 	this_cpu_write(_numa_mem_, node);
+	_node_numa_mem_[numa_node_id()] = node;
+}
+#endif
+
+#ifndef node_to_mem_node
+static inline int node_to_mem_node(int node)
+{
+	return _node_numa_mem_[node];
 }
 #endif
 
@@ -146,6 +155,7 @@ static inline int cpu_to_mem(int cpu)
 static inline void set_cpu_numa_mem(int cpu, int node)
 {
 	per_cpu(_numa_mem_, cpu) = node;
+	_node_numa_mem_[cpu_to_node(cpu)] = node;
 }
 #endif
 
@@ -159,6 +169,13 @@ static inline int numa_mem_id(void)
 }
 #endif
 
+#ifndef node_to_mem_node
+static inline int node_to_mem_node(int node)
+{
+	return node;
+}
+#endif
+
 #ifndef cpu_to_mem
 static inline int cpu_to_mem(int cpu)
 {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index eee961958021..f3bc59f2ed52 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -85,6 +85,7 @@ EXPORT_PER_CPU_SYMBOL(numa_node);
  */
 DEFINE_PER_CPU(int, _numa_mem_);		/* Kernel "local memory" node */
 EXPORT_PER_CPU_SYMBOL(_numa_mem_);
+int _node_numa_mem_[MAX_NUMNODES];
 #endif
 
 /*
-- 
cgit v1.2.3


From bf0dea23a9c094ae869a88bb694fbe966671bf6d Mon Sep 17 00:00:00 2001
From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Date: Thu, 9 Oct 2014 15:26:27 -0700
Subject: mm/slab: use percpu allocator for cpu cache

Because of chicken and egg problem, initialization of SLAB is really
complicated.  We need to allocate cpu cache through SLAB to make the
kmem_cache work, but before initialization of kmem_cache, allocation
through SLAB is impossible.

On the other hand, SLUB does initialization in a more simple way.  It uses
percpu allocator to allocate cpu cache so there is no chicken and egg
problem.

So, this patch try to use percpu allocator in SLAB.  This simplifies the
initialization step in SLAB so that we could maintain SLAB code more
easily.

In my testing there is no performance difference.

This implementation relies on percpu allocator.  Because percpu allocator
uses vmalloc address space, vmalloc address space could be exhausted by
this change on many cpu system with *32 bit* kernel.  This implementation
can cover 1024 cpus in worst case by following calculation.

Worst: 1024 cpus * 4 bytes for pointer * 300 kmem_caches *
	120 objects per cpu_cache = 140 MB
Normal: 1024 cpus * 4 bytes for pointer * 150 kmem_caches(slab merge) *
	80 objects per cpu_cache = 46 MB

Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Acked-by: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Jeremiah Mahler <jmmahler@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slab_def.h |  20 +---
 mm/slab.c                | 239 +++++++++++++++--------------------------------
 mm/slab.h                |   1 -
 3 files changed, 78 insertions(+), 182 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index 8235dfbb3b05..b869d1662ba3 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -8,6 +8,8 @@
  */
 
 struct kmem_cache {
+	struct array_cache __percpu *cpu_cache;
+
 /* 1) Cache tunables. Protected by slab_mutex */
 	unsigned int batchcount;
 	unsigned int limit;
@@ -71,23 +73,7 @@ struct kmem_cache {
 	struct memcg_cache_params *memcg_params;
 #endif
 
-/* 6) per-cpu/per-node data, touched during every alloc/free */
-	/*
-	 * We put array[] at the end of kmem_cache, because we want to size
-	 * this array to nr_cpu_ids slots instead of NR_CPUS
-	 * (see kmem_cache_init())
-	 * We still use [NR_CPUS] and not [1] or [0] because cache_cache
-	 * is statically defined, so we reserve the max number of cpus.
-	 *
-	 * We also need to guarantee that the list is able to accomodate a
-	 * pointer for each node since "nodelists" uses the remainder of
-	 * available pointers.
-	 */
-	struct kmem_cache_node **node;
-	struct array_cache *array[NR_CPUS + MAX_NUMNODES];
-	/*
-	 * Do not add fields after array[]
-	 */
+	struct kmem_cache_node *node[MAX_NUMNODES];
 };
 
 #endif	/* _LINUX_SLAB_DEF_H */
diff --git a/mm/slab.c b/mm/slab.c
index 328233a724af..655d65c3f010 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -237,11 +237,10 @@ struct arraycache_init {
 /*
  * Need this for bootstrapping a per node allocator.
  */
-#define NUM_INIT_LISTS (3 * MAX_NUMNODES)
+#define NUM_INIT_LISTS (2 * MAX_NUMNODES)
 static struct kmem_cache_node __initdata init_kmem_cache_node[NUM_INIT_LISTS];
 #define	CACHE_CACHE 0
-#define	SIZE_AC MAX_NUMNODES
-#define	SIZE_NODE (2 * MAX_NUMNODES)
+#define	SIZE_NODE (MAX_NUMNODES)
 
 static int drain_freelist(struct kmem_cache *cache,
 			struct kmem_cache_node *n, int tofree);
@@ -253,7 +252,6 @@ static void cache_reap(struct work_struct *unused);
 
 static int slab_early_init = 1;
 
-#define INDEX_AC kmalloc_index(sizeof(struct arraycache_init))
 #define INDEX_NODE kmalloc_index(sizeof(struct kmem_cache_node))
 
 static void kmem_cache_node_init(struct kmem_cache_node *parent)
@@ -458,9 +456,6 @@ static inline unsigned int obj_to_index(const struct kmem_cache *cache,
 	return reciprocal_divide(offset, cache->reciprocal_buffer_size);
 }
 
-static struct arraycache_init initarray_generic =
-    { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
-
 /* internal cache of cache description objs */
 static struct kmem_cache kmem_cache_boot = {
 	.batchcount = 1,
@@ -476,7 +471,7 @@ static DEFINE_PER_CPU(struct delayed_work, slab_reap_work);
 
 static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
 {
-	return cachep->array[smp_processor_id()];
+	return this_cpu_ptr(cachep->cpu_cache);
 }
 
 static size_t calculate_freelist_size(int nr_objs, size_t align)
@@ -1096,24 +1091,25 @@ static void cpuup_canceled(long cpu)
 		struct alien_cache **alien;
 		LIST_HEAD(list);
 
-		/* cpu is dead; no one can alloc from it. */
-		nc = cachep->array[cpu];
-		cachep->array[cpu] = NULL;
 		n = get_node(cachep, node);
-
 		if (!n)
-			goto free_array_cache;
+			continue;
 
 		spin_lock_irq(&n->list_lock);
 
 		/* Free limit for this kmem_cache_node */
 		n->free_limit -= cachep->batchcount;
-		if (nc)
+
+		/* cpu is dead; no one can alloc from it. */
+		nc = per_cpu_ptr(cachep->cpu_cache, cpu);
+		if (nc) {
 			free_block(cachep, nc->entry, nc->avail, node, &list);
+			nc->avail = 0;
+		}
 
 		if (!cpumask_empty(mask)) {
 			spin_unlock_irq(&n->list_lock);
-			goto free_array_cache;
+			goto free_slab;
 		}
 
 		shared = n->shared;
@@ -1133,9 +1129,9 @@ static void cpuup_canceled(long cpu)
 			drain_alien_cache(cachep, alien);
 			free_alien_cache(alien);
 		}
-free_array_cache:
+
+free_slab:
 		slabs_destroy(cachep, &list);
-		kfree(nc);
 	}
 	/*
 	 * In the previous loop, all the objects were freed to
@@ -1172,32 +1168,23 @@ static int cpuup_prepare(long cpu)
 	 * array caches
 	 */
 	list_for_each_entry(cachep, &slab_caches, list) {
-		struct array_cache *nc;
 		struct array_cache *shared = NULL;
 		struct alien_cache **alien = NULL;
 
-		nc = alloc_arraycache(node, cachep->limit,
-					cachep->batchcount, GFP_KERNEL);
-		if (!nc)
-			goto bad;
 		if (cachep->shared) {
 			shared = alloc_arraycache(node,
 				cachep->shared * cachep->batchcount,
 				0xbaadf00d, GFP_KERNEL);
-			if (!shared) {
-				kfree(nc);
+			if (!shared)
 				goto bad;
-			}
 		}
 		if (use_alien_caches) {
 			alien = alloc_alien_cache(node, cachep->limit, GFP_KERNEL);
 			if (!alien) {
 				kfree(shared);
-				kfree(nc);
 				goto bad;
 			}
 		}
-		cachep->array[cpu] = nc;
 		n = get_node(cachep, node);
 		BUG_ON(!n);
 
@@ -1388,15 +1375,6 @@ static void __init set_up_node(struct kmem_cache *cachep, int index)
 	}
 }
 
-/*
- * The memory after the last cpu cache pointer is used for the
- * the node pointer.
- */
-static void setup_node_pointer(struct kmem_cache *cachep)
-{
-	cachep->node = (struct kmem_cache_node **)&cachep->array[nr_cpu_ids];
-}
-
 /*
  * Initialisation.  Called after the page allocator have been initialised and
  * before smp_init().
@@ -1408,7 +1386,6 @@ void __init kmem_cache_init(void)
 	BUILD_BUG_ON(sizeof(((struct page *)NULL)->lru) <
 					sizeof(struct rcu_head));
 	kmem_cache = &kmem_cache_boot;
-	setup_node_pointer(kmem_cache);
 
 	if (num_possible_nodes() == 1)
 		use_alien_caches = 0;
@@ -1416,8 +1393,6 @@ void __init kmem_cache_init(void)
 	for (i = 0; i < NUM_INIT_LISTS; i++)
 		kmem_cache_node_init(&init_kmem_cache_node[i]);
 
-	set_up_node(kmem_cache, CACHE_CACHE);
-
 	/*
 	 * Fragmentation resistance on low memory - only use bigger
 	 * page orders on machines with more than 32MB of memory if
@@ -1452,49 +1427,22 @@ void __init kmem_cache_init(void)
 	 * struct kmem_cache size depends on nr_node_ids & nr_cpu_ids
 	 */
 	create_boot_cache(kmem_cache, "kmem_cache",
-		offsetof(struct kmem_cache, array[nr_cpu_ids]) +
+		offsetof(struct kmem_cache, node) +
 				  nr_node_ids * sizeof(struct kmem_cache_node *),
 				  SLAB_HWCACHE_ALIGN);
 	list_add(&kmem_cache->list, &slab_caches);
-
-	/* 2+3) create the kmalloc caches */
+	slab_state = PARTIAL;
 
 	/*
-	 * Initialize the caches that provide memory for the array cache and the
-	 * kmem_cache_node structures first.  Without this, further allocations will
-	 * bug.
+	 * Initialize the caches that provide memory for the  kmem_cache_node
+	 * structures first.  Without this, further allocations will bug.
 	 */
-
-	kmalloc_caches[INDEX_AC] = create_kmalloc_cache("kmalloc-ac",
-					kmalloc_size(INDEX_AC), ARCH_KMALLOC_FLAGS);
-
-	if (INDEX_AC != INDEX_NODE)
-		kmalloc_caches[INDEX_NODE] =
-			create_kmalloc_cache("kmalloc-node",
+	kmalloc_caches[INDEX_NODE] = create_kmalloc_cache("kmalloc-node",
 				kmalloc_size(INDEX_NODE), ARCH_KMALLOC_FLAGS);
+	slab_state = PARTIAL_NODE;
 
 	slab_early_init = 0;
 
-	/* 4) Replace the bootstrap head arrays */
-	{
-		struct array_cache *ptr;
-
-		ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
-
-		memcpy(ptr, cpu_cache_get(kmem_cache),
-		       sizeof(struct arraycache_init));
-
-		kmem_cache->array[smp_processor_id()] = ptr;
-
-		ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
-
-		BUG_ON(cpu_cache_get(kmalloc_caches[INDEX_AC])
-		       != &initarray_generic.cache);
-		memcpy(ptr, cpu_cache_get(kmalloc_caches[INDEX_AC]),
-		       sizeof(struct arraycache_init));
-
-		kmalloc_caches[INDEX_AC]->array[smp_processor_id()] = ptr;
-	}
 	/* 5) Replace the bootstrap kmem_cache_node */
 	{
 		int nid;
@@ -1502,13 +1450,8 @@ void __init kmem_cache_init(void)
 		for_each_online_node(nid) {
 			init_list(kmem_cache, &init_kmem_cache_node[CACHE_CACHE + nid], nid);
 
-			init_list(kmalloc_caches[INDEX_AC],
-				  &init_kmem_cache_node[SIZE_AC + nid], nid);
-
-			if (INDEX_AC != INDEX_NODE) {
-				init_list(kmalloc_caches[INDEX_NODE],
+			init_list(kmalloc_caches[INDEX_NODE],
 					  &init_kmem_cache_node[SIZE_NODE + nid], nid);
-			}
 		}
 	}
 
@@ -2041,56 +1984,53 @@ static size_t calculate_slab_order(struct kmem_cache *cachep,
 	return left_over;
 }
 
+static struct array_cache __percpu *alloc_kmem_cache_cpus(
+		struct kmem_cache *cachep, int entries, int batchcount)
+{
+	int cpu;
+	size_t size;
+	struct array_cache __percpu *cpu_cache;
+
+	size = sizeof(void *) * entries + sizeof(struct array_cache);
+	cpu_cache = __alloc_percpu(size, 0);
+
+	if (!cpu_cache)
+		return NULL;
+
+	for_each_possible_cpu(cpu) {
+		init_arraycache(per_cpu_ptr(cpu_cache, cpu),
+				entries, batchcount);
+	}
+
+	return cpu_cache;
+}
+
 static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
 {
 	if (slab_state >= FULL)
 		return enable_cpucache(cachep, gfp);
 
+	cachep->cpu_cache = alloc_kmem_cache_cpus(cachep, 1, 1);
+	if (!cachep->cpu_cache)
+		return 1;
+
 	if (slab_state == DOWN) {
-		/*
-		 * Note: Creation of first cache (kmem_cache).
-		 * The setup_node is taken care
-		 * of by the caller of __kmem_cache_create
-		 */
-		cachep->array[smp_processor_id()] = &initarray_generic.cache;
-		slab_state = PARTIAL;
+		/* Creation of first cache (kmem_cache). */
+		set_up_node(kmem_cache, CACHE_CACHE);
 	} else if (slab_state == PARTIAL) {
-		/*
-		 * Note: the second kmem_cache_create must create the cache
-		 * that's used by kmalloc(24), otherwise the creation of
-		 * further caches will BUG().
-		 */
-		cachep->array[smp_processor_id()] = &initarray_generic.cache;
-
-		/*
-		 * If the cache that's used by kmalloc(sizeof(kmem_cache_node)) is
-		 * the second cache, then we need to set up all its node/,
-		 * otherwise the creation of further caches will BUG().
-		 */
-		set_up_node(cachep, SIZE_AC);
-		if (INDEX_AC == INDEX_NODE)
-			slab_state = PARTIAL_NODE;
-		else
-			slab_state = PARTIAL_ARRAYCACHE;
+		/* For kmem_cache_node */
+		set_up_node(cachep, SIZE_NODE);
 	} else {
-		/* Remaining boot caches */
-		cachep->array[smp_processor_id()] =
-			kmalloc(sizeof(struct arraycache_init), gfp);
+		int node;
 
-		if (slab_state == PARTIAL_ARRAYCACHE) {
-			set_up_node(cachep, SIZE_NODE);
-			slab_state = PARTIAL_NODE;
-		} else {
-			int node;
-			for_each_online_node(node) {
-				cachep->node[node] =
-				    kmalloc_node(sizeof(struct kmem_cache_node),
-						gfp, node);
-				BUG_ON(!cachep->node[node]);
-				kmem_cache_node_init(cachep->node[node]);
-			}
+		for_each_online_node(node) {
+			cachep->node[node] = kmalloc_node(
+				sizeof(struct kmem_cache_node), gfp, node);
+			BUG_ON(!cachep->node[node]);
+			kmem_cache_node_init(cachep->node[node]);
 		}
 	}
+
 	cachep->node[numa_mem_id()]->next_reap =
 			jiffies + REAPTIMEOUT_NODE +
 			((unsigned long)cachep) % REAPTIMEOUT_NODE;
@@ -2213,7 +2153,6 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
 	else
 		gfp = GFP_NOWAIT;
 
-	setup_node_pointer(cachep);
 #if DEBUG
 
 	/*
@@ -2470,8 +2409,7 @@ int __kmem_cache_shutdown(struct kmem_cache *cachep)
 	if (rc)
 		return rc;
 
-	for_each_online_cpu(i)
-	    kfree(cachep->array[i]);
+	free_percpu(cachep->cpu_cache);
 
 	/* NUMA: free the node structures */
 	for_each_kmem_cache_node(cachep, i, n) {
@@ -3719,72 +3657,45 @@ fail:
 	return -ENOMEM;
 }
 
-struct ccupdate_struct {
-	struct kmem_cache *cachep;
-	struct array_cache *new[0];
-};
-
-static void do_ccupdate_local(void *info)
-{
-	struct ccupdate_struct *new = info;
-	struct array_cache *old;
-
-	check_irq_off();
-	old = cpu_cache_get(new->cachep);
-
-	new->cachep->array[smp_processor_id()] = new->new[smp_processor_id()];
-	new->new[smp_processor_id()] = old;
-}
-
 /* Always called with the slab_mutex held */
 static int __do_tune_cpucache(struct kmem_cache *cachep, int limit,
 				int batchcount, int shared, gfp_t gfp)
 {
-	struct ccupdate_struct *new;
-	int i;
+	struct array_cache __percpu *cpu_cache, *prev;
+	int cpu;
 
-	new = kzalloc(sizeof(*new) + nr_cpu_ids * sizeof(struct array_cache *),
-		      gfp);
-	if (!new)
+	cpu_cache = alloc_kmem_cache_cpus(cachep, limit, batchcount);
+	if (!cpu_cache)
 		return -ENOMEM;
 
-	for_each_online_cpu(i) {
-		new->new[i] = alloc_arraycache(cpu_to_mem(i), limit,
-						batchcount, gfp);
-		if (!new->new[i]) {
-			for (i--; i >= 0; i--)
-				kfree(new->new[i]);
-			kfree(new);
-			return -ENOMEM;
-		}
-	}
-	new->cachep = cachep;
-
-	on_each_cpu(do_ccupdate_local, (void *)new, 1);
+	prev = cachep->cpu_cache;
+	cachep->cpu_cache = cpu_cache;
+	kick_all_cpus_sync();
 
 	check_irq_on();
 	cachep->batchcount = batchcount;
 	cachep->limit = limit;
 	cachep->shared = shared;
 
-	for_each_online_cpu(i) {
+	if (!prev)
+		goto alloc_node;
+
+	for_each_online_cpu(cpu) {
 		LIST_HEAD(list);
-		struct array_cache *ccold = new->new[i];
 		int node;
 		struct kmem_cache_node *n;
+		struct array_cache *ac = per_cpu_ptr(prev, cpu);
 
-		if (!ccold)
-			continue;
-
-		node = cpu_to_mem(i);
+		node = cpu_to_mem(cpu);
 		n = get_node(cachep, node);
 		spin_lock_irq(&n->list_lock);
-		free_block(cachep, ccold->entry, ccold->avail, node, &list);
+		free_block(cachep, ac->entry, ac->avail, node, &list);
 		spin_unlock_irq(&n->list_lock);
 		slabs_destroy(cachep, &list);
-		kfree(ccold);
 	}
-	kfree(new);
+	free_percpu(prev);
+
+alloc_node:
 	return alloc_kmem_cache_node(cachep, gfp);
 }
 
diff --git a/mm/slab.h b/mm/slab.h
index 50d29d716db4..ab019e63e3c2 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -50,7 +50,6 @@ struct kmem_cache {
 enum slab_state {
 	DOWN,			/* No slab functionality yet */
 	PARTIAL,		/* SLUB: kmem_cache_node available */
-	PARTIAL_ARRAYCACHE,	/* SLAB: kmalloc size for arraycache available */
 	PARTIAL_NODE,		/* SLAB: kmalloc size for node struct available */
 	UP,			/* Slab caches usable but not all extras yet */
 	FULL			/* Everything is working */
-- 
cgit v1.2.3


From ed2f240094f900833ac06f533ab8bbcf0a1e8199 Mon Sep 17 00:00:00 2001
From: Zhang Zhen <zhenzhang.zhang@huawei.com>
Date: Thu, 9 Oct 2014 15:26:31 -0700
Subject: memory-hotplug: add sysfs valid_zones attribute

Currently memory-hotplug has two limits:

1. If the memory block is in ZONE_NORMAL, you can change it to
   ZONE_MOVABLE, but this memory block must be adjacent to ZONE_MOVABLE.

2. If the memory block is in ZONE_MOVABLE, you can change it to
   ZONE_NORMAL, but this memory block must be adjacent to ZONE_NORMAL.

With this patch, we can easy to know a memory block can be onlined to
which zone, and don't need to know the above two limits.

Updated the related Documentation.

[akpm@linux-foundation.org: use conventional comment layout]
[akpm@linux-foundation.org: fix build with CONFIG_MEMORY_HOTREMOVE=n]
[akpm@linux-foundation.org: remove unused local zone_prev]
Signed-off-by: Zhang Zhen <zhenzhang.zhang@huawei.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Toshi Kani <toshi.kani@hp.com>
Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Wang Nan <wangnan0@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/ABI/testing/sysfs-devices-memory |  8 +++++
 Documentation/memory-hotplug.txt               | 11 ++++++-
 drivers/base/memory.c                          | 42 ++++++++++++++++++++++++++
 include/linux/memory_hotplug.h                 |  1 +
 mm/memory_hotplug.c                            |  2 +-
 5 files changed, 62 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-devices-memory b/Documentation/ABI/testing/sysfs-devices-memory
index 7405de26ee60..deef3b5723cf 100644
--- a/Documentation/ABI/testing/sysfs-devices-memory
+++ b/Documentation/ABI/testing/sysfs-devices-memory
@@ -61,6 +61,14 @@ Users:		hotplug memory remove tools
 		http://www.ibm.com/developerworks/wikis/display/LinuxP/powerpc-utils
 
 
+What:           /sys/devices/system/memory/memoryX/valid_zones
+Date:           July 2014
+Contact:	Zhang Zhen <zhenzhang.zhang@huawei.com>
+Description:
+		The file /sys/devices/system/memory/memoryX/valid_zones	is
+		read-only and is designed to show which zone this memory
+		block can be onlined to.
+
 What:		/sys/devices/system/memoryX/nodeY
 Date:		October 2009
 Contact:	Linux Memory Management list <linux-mm@kvack.org>
diff --git a/Documentation/memory-hotplug.txt b/Documentation/memory-hotplug.txt
index 45134dc23854..ea03abfc97e9 100644
--- a/Documentation/memory-hotplug.txt
+++ b/Documentation/memory-hotplug.txt
@@ -155,6 +155,7 @@ Under each memory block, you can see 4 files:
 /sys/devices/system/memory/memoryXXX/phys_device
 /sys/devices/system/memory/memoryXXX/state
 /sys/devices/system/memory/memoryXXX/removable
+/sys/devices/system/memory/memoryXXX/valid_zones
 
 'phys_index'      : read-only and contains memory block id, same as XXX.
 'state'           : read-write
@@ -170,6 +171,15 @@ Under each memory block, you can see 4 files:
                     block is removable and a value of 0 indicates that
                     it is not removable. A memory block is removable only if
                     every section in the block is removable.
+'valid_zones'     : read-only: designed to show which zones this memory block
+		    can be onlined to.
+		    The first column shows it's default zone.
+		    "memory6/valid_zones: Normal Movable" shows this memoryblock
+		    can be onlined to ZONE_NORMAL by default and to ZONE_MOVABLE
+		    by online_movable.
+		    "memory7/valid_zones: Movable Normal" shows this memoryblock
+		    can be onlined to ZONE_MOVABLE by default and to ZONE_NORMAL
+		    by online_kernel.
 
 NOTE:
   These directories/files appear after physical memory hotplug phase.
@@ -408,7 +418,6 @@ node if necessary.
   - allowing memory hot-add to ZONE_MOVABLE. maybe we need some switch like
     sysctl or new control file.
   - showing memory block and physical device relationship.
-  - showing memory block is under ZONE_MOVABLE or not
   - test and make it better memory offlining.
   - support HugeTLB page migration and offlining.
   - memmap removing at memory offline.
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index a2e13e250bba..7c5d87191b28 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -373,6 +373,45 @@ static ssize_t show_phys_device(struct device *dev,
 	return sprintf(buf, "%d\n", mem->phys_device);
 }
 
+#ifdef CONFIG_MEMORY_HOTREMOVE
+static ssize_t show_valid_zones(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct memory_block *mem = to_memory_block(dev);
+	unsigned long start_pfn, end_pfn;
+	unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
+	struct page *first_page;
+	struct zone *zone;
+
+	start_pfn = section_nr_to_pfn(mem->start_section_nr);
+	end_pfn = start_pfn + nr_pages;
+	first_page = pfn_to_page(start_pfn);
+
+	/* The block contains more than one zone can not be offlined. */
+	if (!test_pages_in_a_zone(start_pfn, end_pfn))
+		return sprintf(buf, "none\n");
+
+	zone = page_zone(first_page);
+
+	if (zone_idx(zone) == ZONE_MOVABLE - 1) {
+		/*The mem block is the last memoryblock of this zone.*/
+		if (end_pfn == zone_end_pfn(zone))
+			return sprintf(buf, "%s %s\n",
+					zone->name, (zone + 1)->name);
+	}
+
+	if (zone_idx(zone) == ZONE_MOVABLE) {
+		/*The mem block is the first memoryblock of ZONE_MOVABLE.*/
+		if (start_pfn == zone->zone_start_pfn)
+			return sprintf(buf, "%s %s\n",
+					zone->name, (zone - 1)->name);
+	}
+
+	return sprintf(buf, "%s\n", zone->name);
+}
+static DEVICE_ATTR(valid_zones, 0444, show_valid_zones, NULL);
+#endif
+
 static DEVICE_ATTR(phys_index, 0444, show_mem_start_phys_index, NULL);
 static DEVICE_ATTR(state, 0644, show_mem_state, store_mem_state);
 static DEVICE_ATTR(phys_device, 0444, show_phys_device, NULL);
@@ -523,6 +562,9 @@ static struct attribute *memory_memblk_attrs[] = {
 	&dev_attr_state.attr,
 	&dev_attr_phys_device.attr,
 	&dev_attr_removable.attr,
+#ifdef CONFIG_MEMORY_HOTREMOVE
+	&dev_attr_valid_zones.attr,
+#endif
 	NULL
 };
 
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index d9524c49d767..8f1a41951df9 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -84,6 +84,7 @@ extern int zone_grow_waitqueues(struct zone *zone, unsigned long nr_pages);
 extern int add_one_highpage(struct page *page, int pfn, int bad_ppro);
 /* VM interface that may be used by firmware interface */
 extern int online_pages(unsigned long, unsigned long, int);
+extern int test_pages_in_a_zone(unsigned long, unsigned long);
 extern void __offline_isolated_pages(unsigned long, unsigned long);
 
 typedef void (*online_page_callback_t)(struct page *page);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 2ff8c2325e96..29d8693d0c61 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1307,7 +1307,7 @@ int is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages)
 /*
  * Confirm all pages in a range [start, end) is belongs to the same zone.
  */
-static int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn)
+int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn)
 {
 	unsigned long pfn;
 	struct zone *zone = NULL;
-- 
cgit v1.2.3


From 505e3be6c082489a32a88e042f930d047b6415bc Mon Sep 17 00:00:00 2001
From: Laura Abbott <lauraa@codeaurora.org>
Date: Thu, 9 Oct 2014 15:26:35 -0700
Subject: lib/genalloc.c: add power aligned algorithm

One of the more common algorithms used for allocation is to align the
start address of the allocation to the order of size requested.  Add this
as an algorithm option for genalloc.

Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Acked-by: Olof Johansson <olof@lixom.net>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: David Riley <davidriley@chromium.org>
Cc: Ritesh Harjain <ritesh.harjani@gmail.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Thierry Reding <thierry.reding@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/genalloc.h |  4 ++++
 lib/genalloc.c           | 20 ++++++++++++++++++++
 2 files changed, 24 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h
index 1c2fdaa2ffc3..3cd0934d62ba 100644
--- a/include/linux/genalloc.h
+++ b/include/linux/genalloc.h
@@ -110,6 +110,10 @@ extern void gen_pool_set_algo(struct gen_pool *pool, genpool_algo_t algo,
 extern unsigned long gen_pool_first_fit(unsigned long *map, unsigned long size,
 		unsigned long start, unsigned int nr, void *data);
 
+extern unsigned long gen_pool_first_fit_order_align(unsigned long *map,
+		unsigned long size, unsigned long start, unsigned int nr,
+		void *data);
+
 extern unsigned long gen_pool_best_fit(unsigned long *map, unsigned long size,
 		unsigned long start, unsigned int nr, void *data);
 
diff --git a/lib/genalloc.c b/lib/genalloc.c
index 38d2db82228c..166f17b9f169 100644
--- a/lib/genalloc.c
+++ b/lib/genalloc.c
@@ -480,6 +480,26 @@ unsigned long gen_pool_first_fit(unsigned long *map, unsigned long size,
 }
 EXPORT_SYMBOL(gen_pool_first_fit);
 
+/**
+ * gen_pool_first_fit_order_align - find the first available region
+ * of memory matching the size requirement. The region will be aligned
+ * to the order of the size specified.
+ * @map: The address to base the search on
+ * @size: The bitmap size in bits
+ * @start: The bitnumber to start searching at
+ * @nr: The number of zeroed bits we're looking for
+ * @data: additional data - unused
+ */
+unsigned long gen_pool_first_fit_order_align(unsigned long *map,
+		unsigned long size, unsigned long start,
+		unsigned int nr, void *data)
+{
+	unsigned long align_mask = roundup_pow_of_two(nr) - 1;
+
+	return bitmap_find_next_zero_area(map, size, start, nr, align_mask);
+}
+EXPORT_SYMBOL(gen_pool_first_fit_order_align);
+
 /**
  * gen_pool_best_fit - find the best fitting region of memory
  * macthing the size requirement (no alignment constraint)
-- 
cgit v1.2.3


From 9efb3a421d55d30b65fb0dbee05108d15c6c55f7 Mon Sep 17 00:00:00 2001
From: Laura Abbott <lauraa@codeaurora.org>
Date: Thu, 9 Oct 2014 15:26:38 -0700
Subject: lib/genalloc.c: add genpool range check function

After allocating an address from a particular genpool, there is no good
way to verify if that address actually belongs to a genpool.  Introduce
addr_in_gen_pool which will return if an address plus size falls
completely within the genpool range.

Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Reviewed-by: Olof Johansson <olof@lixom.net>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: David Riley <davidriley@chromium.org>
Cc: Ritesh Harjain <ritesh.harjani@gmail.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Thierry Reding <thierry.reding@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/genalloc.h |  3 +++
 lib/genalloc.c           | 29 +++++++++++++++++++++++++++++
 2 files changed, 32 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h
index 3cd0934d62ba..1ccaab44abcc 100644
--- a/include/linux/genalloc.h
+++ b/include/linux/genalloc.h
@@ -121,6 +121,9 @@ extern struct gen_pool *devm_gen_pool_create(struct device *dev,
 		int min_alloc_order, int nid);
 extern struct gen_pool *dev_get_gen_pool(struct device *dev);
 
+bool addr_in_gen_pool(struct gen_pool *pool, unsigned long start,
+			size_t size);
+
 #ifdef CONFIG_OF
 extern struct gen_pool *of_get_named_gen_pool(struct device_node *np,
 	const char *propname, int index);
diff --git a/lib/genalloc.c b/lib/genalloc.c
index 166f17b9f169..cce4dd68c40d 100644
--- a/lib/genalloc.c
+++ b/lib/genalloc.c
@@ -402,6 +402,35 @@ void gen_pool_for_each_chunk(struct gen_pool *pool,
 }
 EXPORT_SYMBOL(gen_pool_for_each_chunk);
 
+/**
+ * addr_in_gen_pool - checks if an address falls within the range of a pool
+ * @pool:	the generic memory pool
+ * @start:	start address
+ * @size:	size of the region
+ *
+ * Check if the range of addresses falls within the specified pool. Returns
+ * true if the entire range is contained in the pool and false otherwise.
+ */
+bool addr_in_gen_pool(struct gen_pool *pool, unsigned long start,
+			size_t size)
+{
+	bool found = false;
+	unsigned long end = start + size;
+	struct gen_pool_chunk *chunk;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(chunk, &(pool)->chunks, next_chunk) {
+		if (start >= chunk->start_addr && start <= chunk->end_addr) {
+			if (end <= chunk->end_addr) {
+				found = true;
+				break;
+			}
+		}
+	}
+	rcu_read_unlock();
+	return found;
+}
+
 /**
  * gen_pool_avail - get available free space of the pool
  * @pool: pool to get available free space
-- 
cgit v1.2.3


From 53853e2d2bfb748a8b5aa2fd1de15699266865e0 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Thu, 9 Oct 2014 15:27:02 -0700
Subject: mm, compaction: defer each zone individually instead of preferred
 zone

When direct sync compaction is often unsuccessful, it may become deferred
for some time to avoid further useless attempts, both sync and async.
Successful high-order allocations un-defer compaction, while further
unsuccessful compaction attempts prolong the compaction deferred period.

Currently the checking and setting deferred status is performed only on
the preferred zone of the allocation that invoked direct compaction.  But
compaction itself is attempted on all eligible zones in the zonelist, so
the behavior is suboptimal and may lead both to scenarios where 1)
compaction is attempted uselessly, or 2) where it's not attempted despite
good chances of succeeding, as shown on the examples below:

1) A direct compaction with Normal preferred zone failed and set
   deferred compaction for the Normal zone.  Another unrelated direct
   compaction with DMA32 as preferred zone will attempt to compact DMA32
   zone even though the first compaction attempt also included DMA32 zone.

   In another scenario, compaction with Normal preferred zone failed to
   compact Normal zone, but succeeded in the DMA32 zone, so it will not
   defer compaction.  In the next attempt, it will try Normal zone which
   will fail again, instead of skipping Normal zone and trying DMA32
   directly.

2) Kswapd will balance DMA32 zone and reset defer status based on
   watermarks looking good.  A direct compaction with preferred Normal
   zone will skip compaction of all zones including DMA32 because Normal
   was still deferred.  The allocation might have succeeded in DMA32, but
   won't.

This patch makes compaction deferring work on individual zone basis
instead of preferred zone.  For each zone, it checks compaction_deferred()
to decide if the zone should be skipped.  If watermarks fail after
compacting the zone, defer_compaction() is called.  The zone where
watermarks passed can still be deferred when the allocation attempt is
unsuccessful.  When allocation is successful, compaction_defer_reset() is
called for the zone containing the allocated page.  This approach should
approximate calling defer_compaction() only on zones where compaction was
attempted and did not yield allocated page.  There might be corner cases
but that is inevitable as long as the decision to stop compacting dues not
guarantee that a page will be allocated.

Due to a new COMPACT_DEFERRED return value, some functions relying
implicitly on COMPACT_SKIPPED = 0 had to be updated, with comments made
more accurate.  The did_some_progress output parameter of
__alloc_pages_direct_compact() is removed completely, as the caller
actually does not use it after compaction sets it - it is only considered
when direct reclaim sets it.

During testing on a two-node machine with a single very small Normal zone
on node 1, this patch has improved success rates in stress-highalloc
mmtests benchmark.  The success here were previously made worse by commit
3a025760fc15 ("mm: page_alloc: spill to remote nodes before waking
kswapd") as kswapd was no longer resetting often enough the deferred
compaction for the Normal zone, and DMA32 zones on both nodes were thus
not considered for compaction.  On different machine, success rates were
improved with __GFP_NO_KSWAPD allocations.

[akpm@linux-foundation.org: fix CONFIG_COMPACTION=n build]
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Minchan Kim <minchan@kernel.org>
Reviewed-by: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
Acked-by: Mel Gorman <mgorman@suse.de>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Michal Nazarewicz <mina86@mina86.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compaction.h | 16 ++++++++-----
 mm/compaction.c            | 32 ++++++++++++++++++++------
 mm/page_alloc.c            | 57 +++++++++++++++++++++++++---------------------
 mm/vmscan.c                | 14 ++++++++----
 4 files changed, 76 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index 01e3132820da..b2e4c92d0445 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -2,14 +2,16 @@
 #define _LINUX_COMPACTION_H
 
 /* Return values for compact_zone() and try_to_compact_pages() */
+/* compaction didn't start as it was deferred due to past failures */
+#define COMPACT_DEFERRED	0
 /* compaction didn't start as it was not possible or direct reclaim was more suitable */
-#define COMPACT_SKIPPED		0
+#define COMPACT_SKIPPED		1
 /* compaction should continue to another pageblock */
-#define COMPACT_CONTINUE	1
+#define COMPACT_CONTINUE	2
 /* direct compaction partially compacted a zone and there are suitable pages */
-#define COMPACT_PARTIAL		2
+#define COMPACT_PARTIAL		3
 /* The full zone was compacted */
-#define COMPACT_COMPLETE	3
+#define COMPACT_COMPLETE	4
 
 #ifdef CONFIG_COMPACTION
 extern int sysctl_compact_memory;
@@ -22,7 +24,8 @@ extern int sysctl_extfrag_handler(struct ctl_table *table, int write,
 extern int fragmentation_index(struct zone *zone, unsigned int order);
 extern unsigned long try_to_compact_pages(struct zonelist *zonelist,
 			int order, gfp_t gfp_mask, nodemask_t *mask,
-			enum migrate_mode mode, bool *contended);
+			enum migrate_mode mode, bool *contended,
+			struct zone **candidate_zone);
 extern void compact_pgdat(pg_data_t *pgdat, int order);
 extern void reset_isolation_suitable(pg_data_t *pgdat);
 extern unsigned long compaction_suitable(struct zone *zone, int order);
@@ -91,7 +94,8 @@ static inline bool compaction_restarting(struct zone *zone, int order)
 #else
 static inline unsigned long try_to_compact_pages(struct zonelist *zonelist,
 			int order, gfp_t gfp_mask, nodemask_t *nodemask,
-			enum migrate_mode mode, bool *contended)
+			enum migrate_mode mode, bool *contended,
+			struct zone **candidate_zone)
 {
 	return COMPACT_CONTINUE;
 }
diff --git a/mm/compaction.c b/mm/compaction.c
index 21bf292b642a..1c7195d42e83 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1125,27 +1125,26 @@ int sysctl_extfrag_threshold = 500;
  * @nodemask: The allowed nodes to allocate from
  * @mode: The migration mode for async, sync light, or sync migration
  * @contended: Return value that is true if compaction was aborted due to lock contention
- * @page: Optionally capture a free page of the requested order during compaction
+ * @candidate_zone: Return the zone where we think allocation should succeed
  *
  * This is the main entry point for direct page compaction.
  */
 unsigned long try_to_compact_pages(struct zonelist *zonelist,
 			int order, gfp_t gfp_mask, nodemask_t *nodemask,
-			enum migrate_mode mode, bool *contended)
+			enum migrate_mode mode, bool *contended,
+			struct zone **candidate_zone)
 {
 	enum zone_type high_zoneidx = gfp_zone(gfp_mask);
 	int may_enter_fs = gfp_mask & __GFP_FS;
 	int may_perform_io = gfp_mask & __GFP_IO;
 	struct zoneref *z;
 	struct zone *zone;
-	int rc = COMPACT_SKIPPED;
+	int rc = COMPACT_DEFERRED;
 	int alloc_flags = 0;
 
 	/* Check if the GFP flags allow compaction */
 	if (!order || !may_enter_fs || !may_perform_io)
-		return rc;
-
-	count_compact_event(COMPACTSTALL);
+		return COMPACT_SKIPPED;
 
 #ifdef CONFIG_CMA
 	if (allocflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE)
@@ -1156,14 +1155,33 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
 								nodemask) {
 		int status;
 
+		if (compaction_deferred(zone, order))
+			continue;
+
 		status = compact_zone_order(zone, order, gfp_mask, mode,
 						contended);
 		rc = max(status, rc);
 
 		/* If a normal allocation would succeed, stop compacting */
 		if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0,
-				      alloc_flags))
+				      alloc_flags)) {
+			*candidate_zone = zone;
+			/*
+			 * We think the allocation will succeed in this zone,
+			 * but it is not certain, hence the false. The caller
+			 * will repeat this with true if allocation indeed
+			 * succeeds in this zone.
+			 */
+			compaction_defer_reset(zone, order, false);
 			break;
+		} else if (mode != MIGRATE_ASYNC) {
+			/*
+			 * We think that allocation won't succeed in this zone
+			 * so we defer compaction there. If it ends up
+			 * succeeding after all, it will be reset.
+			 */
+			defer_compaction(zone, order);
+		}
 	}
 
 	return rc;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index e63bf7744a0c..514fd8008114 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2297,24 +2297,28 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
 	struct zonelist *zonelist, enum zone_type high_zoneidx,
 	nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
 	int classzone_idx, int migratetype, enum migrate_mode mode,
-	bool *contended_compaction, bool *deferred_compaction,
-	unsigned long *did_some_progress)
+	bool *contended_compaction, bool *deferred_compaction)
 {
-	if (!order)
-		return NULL;
+	struct zone *last_compact_zone = NULL;
+	unsigned long compact_result;
 
-	if (compaction_deferred(preferred_zone, order)) {
-		*deferred_compaction = true;
+
+	if (!order)
 		return NULL;
-	}
 
 	current->flags |= PF_MEMALLOC;
-	*did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask,
+	compact_result = try_to_compact_pages(zonelist, order, gfp_mask,
 						nodemask, mode,
-						contended_compaction);
+						contended_compaction,
+						&last_compact_zone);
 	current->flags &= ~PF_MEMALLOC;
 
-	if (*did_some_progress != COMPACT_SKIPPED) {
+	if (compact_result > COMPACT_DEFERRED)
+		count_vm_event(COMPACTSTALL);
+	else
+		*deferred_compaction = true;
+
+	if (compact_result > COMPACT_SKIPPED) {
 		struct page *page;
 
 		/* Page migration frees to the PCP lists but we want merging */
@@ -2325,13 +2329,24 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
 				order, zonelist, high_zoneidx,
 				alloc_flags & ~ALLOC_NO_WATERMARKS,
 				preferred_zone, classzone_idx, migratetype);
+
 		if (page) {
-			preferred_zone->compact_blockskip_flush = false;
-			compaction_defer_reset(preferred_zone, order, true);
+			struct zone *zone = page_zone(page);
+
+			zone->compact_blockskip_flush = false;
+			compaction_defer_reset(zone, order, true);
 			count_vm_event(COMPACTSUCCESS);
 			return page;
 		}
 
+		/*
+		 * last_compact_zone is where try_to_compact_pages thought
+		 * allocation should succeed, so it did not defer compaction.
+		 * But now we know that it didn't succeed, so we do the defer.
+		 */
+		if (last_compact_zone && mode != MIGRATE_ASYNC)
+			defer_compaction(last_compact_zone, order);
+
 		/*
 		 * It's bad if compaction run occurs and fails.
 		 * The most likely reason is that pages exist,
@@ -2339,13 +2354,6 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
 		 */
 		count_vm_event(COMPACTFAIL);
 
-		/*
-		 * As async compaction considers a subset of pageblocks, only
-		 * defer if the failure was a sync compaction failure.
-		 */
-		if (mode != MIGRATE_ASYNC)
-			defer_compaction(preferred_zone, order);
-
 		cond_resched();
 	}
 
@@ -2356,9 +2364,8 @@ static inline struct page *
 __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
 	struct zonelist *zonelist, enum zone_type high_zoneidx,
 	nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
-	int classzone_idx, int migratetype,
-	enum migrate_mode mode, bool *contended_compaction,
-	bool *deferred_compaction, unsigned long *did_some_progress)
+	int classzone_idx, int migratetype, enum migrate_mode mode,
+	bool *contended_compaction, bool *deferred_compaction)
 {
 	return NULL;
 }
@@ -2634,8 +2641,7 @@ rebalance:
 					preferred_zone,
 					classzone_idx, migratetype,
 					migration_mode, &contended_compaction,
-					&deferred_compaction,
-					&did_some_progress);
+					&deferred_compaction);
 	if (page)
 		goto got_pg;
 
@@ -2727,8 +2733,7 @@ rebalance:
 					preferred_zone,
 					classzone_idx, migratetype,
 					migration_mode, &contended_compaction,
-					&deferred_compaction,
-					&did_some_progress);
+					&deferred_compaction);
 		if (page)
 			goto got_pg;
 	}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 2836b5373b2e..1a71b8b1ea34 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2315,7 +2315,10 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc)
 	return reclaimable;
 }
 
-/* Returns true if compaction should go ahead for a high-order request */
+/*
+ * Returns true if compaction should go ahead for a high-order request, or
+ * the high-order allocation would succeed without compaction.
+ */
 static inline bool compaction_ready(struct zone *zone, int order)
 {
 	unsigned long balance_gap, watermark;
@@ -2339,8 +2342,11 @@ static inline bool compaction_ready(struct zone *zone, int order)
 	if (compaction_deferred(zone, order))
 		return watermark_ok;
 
-	/* If compaction is not ready to start, keep reclaiming */
-	if (!compaction_suitable(zone, order))
+	/*
+	 * If compaction is not ready to start and allocation is not likely
+	 * to succeed without it, then keep reclaiming.
+	 */
+	if (compaction_suitable(zone, order) == COMPACT_SKIPPED)
 		return false;
 
 	return watermark_ok;
@@ -2818,7 +2824,7 @@ static bool zone_balanced(struct zone *zone, int order,
 		return false;
 
 	if (IS_ENABLED(CONFIG_COMPACTION) && order &&
-	    !compaction_suitable(zone, order))
+	    compaction_suitable(zone, order) == COMPACT_SKIPPED)
 		return false;
 
 	return true;
-- 
cgit v1.2.3


From 1f9efdef4f3f1d2a073e524113fd0038af636f2b Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Thu, 9 Oct 2014 15:27:14 -0700
Subject: mm, compaction: khugepaged should not give up due to need_resched()

Async compaction aborts when it detects zone lock contention or
need_resched() is true.  David Rientjes has reported that in practice,
most direct async compactions for THP allocation abort due to
need_resched().  This means that a second direct compaction is never
attempted, which might be OK for a page fault, but khugepaged is intended
to attempt a sync compaction in such case and in these cases it won't.

This patch replaces "bool contended" in compact_control with an int that
distinguishes between aborting due to need_resched() and aborting due to
lock contention.  This allows propagating the abort through all compaction
functions as before, but passing the abort reason up to
__alloc_pages_slowpath() which decides when to continue with direct
reclaim and another compaction attempt.

Another problem is that try_to_compact_pages() did not act upon the
reported contention (both need_resched() or lock contention) immediately
and would proceed with another zone from the zonelist.  When
need_resched() is true, that means initializing another zone compaction,
only to check again need_resched() in isolate_migratepages() and aborting.
 For zone lock contention, the unintended consequence is that the lock
contended status reported back to the allocator is detrmined from the last
zone where compaction was attempted, which is rather arbitrary.

This patch fixes the problem in the following way:
- async compaction of a zone aborting due to need_resched() or fatal signal
  pending means that further zones should not be tried. We report
  COMPACT_CONTENDED_SCHED to the allocator.
- aborting zone compaction due to lock contention means we can still try
  another zone, since it has different set of locks. We report back
  COMPACT_CONTENDED_LOCK only if *all* zones where compaction was attempted,
  it was aborted due to lock contention.

As a result of these fixes, khugepaged will proceed with second sync
compaction as intended, when the preceding async compaction aborted due to
need_resched().  Page fault compactions aborting due to need_resched()
will spare some cycles previously wasted by initializing another zone
compaction only to abort again.  Lock contention will be reported only
when compaction in all zones aborted due to lock contention, and therefore
it's not a good idea to try again after reclaim.

In stress-highalloc from mmtests configured to use __GFP_NO_KSWAPD, this
has improved number of THP collapse allocations by 10%, which shows
positive effect on khugepaged.  The benchmark's success rates are
unchanged as it is not recognized as khugepaged.  Numbers of compact_stall
and compact_fail events have however decreased by 20%, with
compact_success still a bit improved, which is good.  With benchmark
configured not to use __GFP_NO_KSWAPD, there is 6% improvement in THP
collapse allocations, and only slight improvement in stalls and failures.

[akpm@linux-foundation.org: fix warnings]
Reported-by: David Rientjes <rientjes@google.com>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Minchan Kim <minchan@kernel.org>
Acked-by: Mel Gorman <mgorman@suse.de>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Michal Nazarewicz <mina86@mina86.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compaction.h | 12 +++++--
 mm/compaction.c            | 87 ++++++++++++++++++++++++++++++++++++++++------
 mm/internal.h              |  4 +--
 mm/page_alloc.c            | 45 +++++++++++++++++-------
 4 files changed, 121 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index b2e4c92d0445..60bdf8dc02a3 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -13,6 +13,14 @@
 /* The full zone was compacted */
 #define COMPACT_COMPLETE	4
 
+/* Used to signal whether compaction detected need_sched() or lock contention */
+/* No contention detected */
+#define COMPACT_CONTENDED_NONE	0
+/* Either need_sched() was true or fatal signal pending */
+#define COMPACT_CONTENDED_SCHED	1
+/* Zone lock or lru_lock was contended in async compaction */
+#define COMPACT_CONTENDED_LOCK	2
+
 #ifdef CONFIG_COMPACTION
 extern int sysctl_compact_memory;
 extern int sysctl_compaction_handler(struct ctl_table *table, int write,
@@ -24,7 +32,7 @@ extern int sysctl_extfrag_handler(struct ctl_table *table, int write,
 extern int fragmentation_index(struct zone *zone, unsigned int order);
 extern unsigned long try_to_compact_pages(struct zonelist *zonelist,
 			int order, gfp_t gfp_mask, nodemask_t *mask,
-			enum migrate_mode mode, bool *contended,
+			enum migrate_mode mode, int *contended,
 			struct zone **candidate_zone);
 extern void compact_pgdat(pg_data_t *pgdat, int order);
 extern void reset_isolation_suitable(pg_data_t *pgdat);
@@ -94,7 +102,7 @@ static inline bool compaction_restarting(struct zone *zone, int order)
 #else
 static inline unsigned long try_to_compact_pages(struct zonelist *zonelist,
 			int order, gfp_t gfp_mask, nodemask_t *nodemask,
-			enum migrate_mode mode, bool *contended,
+			enum migrate_mode mode, int *contended,
 			struct zone **candidate_zone)
 {
 	return COMPACT_CONTINUE;
diff --git a/mm/compaction.c b/mm/compaction.c
index 1067c07cb33d..26bb20ef853d 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -223,9 +223,21 @@ static void update_pageblock_skip(struct compact_control *cc,
 }
 #endif /* CONFIG_COMPACTION */
 
-static inline bool should_release_lock(spinlock_t *lock)
+static int should_release_lock(spinlock_t *lock)
 {
-	return need_resched() || spin_is_contended(lock);
+	/*
+	 * Sched contention has higher priority here as we may potentially
+	 * have to abort whole compaction ASAP. Returning with lock contention
+	 * means we will try another zone, and further decisions are
+	 * influenced only when all zones are lock contended. That means
+	 * potentially missing a lock contention is less critical.
+	 */
+	if (need_resched())
+		return COMPACT_CONTENDED_SCHED;
+	else if (spin_is_contended(lock))
+		return COMPACT_CONTENDED_LOCK;
+
+	return COMPACT_CONTENDED_NONE;
 }
 
 /*
@@ -240,7 +252,9 @@ static inline bool should_release_lock(spinlock_t *lock)
 static bool compact_checklock_irqsave(spinlock_t *lock, unsigned long *flags,
 				      bool locked, struct compact_control *cc)
 {
-	if (should_release_lock(lock)) {
+	int contended = should_release_lock(lock);
+
+	if (contended) {
 		if (locked) {
 			spin_unlock_irqrestore(lock, *flags);
 			locked = false;
@@ -248,7 +262,7 @@ static bool compact_checklock_irqsave(spinlock_t *lock, unsigned long *flags,
 
 		/* async aborts if taking too long or contended */
 		if (cc->mode == MIGRATE_ASYNC) {
-			cc->contended = true;
+			cc->contended = contended;
 			return false;
 		}
 
@@ -274,7 +288,7 @@ static inline bool compact_should_abort(struct compact_control *cc)
 	/* async compaction aborts if contended */
 	if (need_resched()) {
 		if (cc->mode == MIGRATE_ASYNC) {
-			cc->contended = true;
+			cc->contended = COMPACT_CONTENDED_SCHED;
 			return true;
 		}
 
@@ -1140,7 +1154,7 @@ out:
 }
 
 static unsigned long compact_zone_order(struct zone *zone, int order,
-		gfp_t gfp_mask, enum migrate_mode mode, bool *contended)
+		gfp_t gfp_mask, enum migrate_mode mode, int *contended)
 {
 	unsigned long ret;
 	struct compact_control cc = {
@@ -1172,14 +1186,15 @@ int sysctl_extfrag_threshold = 500;
  * @gfp_mask: The GFP mask of the current allocation
  * @nodemask: The allowed nodes to allocate from
  * @mode: The migration mode for async, sync light, or sync migration
- * @contended: Return value that is true if compaction was aborted due to lock contention
+ * @contended: Return value that determines if compaction was aborted due to
+ *	       need_resched() or lock contention
  * @candidate_zone: Return the zone where we think allocation should succeed
  *
  * This is the main entry point for direct page compaction.
  */
 unsigned long try_to_compact_pages(struct zonelist *zonelist,
 			int order, gfp_t gfp_mask, nodemask_t *nodemask,
-			enum migrate_mode mode, bool *contended,
+			enum migrate_mode mode, int *contended,
 			struct zone **candidate_zone)
 {
 	enum zone_type high_zoneidx = gfp_zone(gfp_mask);
@@ -1189,6 +1204,9 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
 	struct zone *zone;
 	int rc = COMPACT_DEFERRED;
 	int alloc_flags = 0;
+	int all_zones_contended = COMPACT_CONTENDED_LOCK; /* init for &= op */
+
+	*contended = COMPACT_CONTENDED_NONE;
 
 	/* Check if the GFP flags allow compaction */
 	if (!order || !may_enter_fs || !may_perform_io)
@@ -1202,13 +1220,19 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
 	for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx,
 								nodemask) {
 		int status;
+		int zone_contended;
 
 		if (compaction_deferred(zone, order))
 			continue;
 
 		status = compact_zone_order(zone, order, gfp_mask, mode,
-						contended);
+							&zone_contended);
 		rc = max(status, rc);
+		/*
+		 * It takes at least one zone that wasn't lock contended
+		 * to clear all_zones_contended.
+		 */
+		all_zones_contended &= zone_contended;
 
 		/* If a normal allocation would succeed, stop compacting */
 		if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0,
@@ -1221,8 +1245,21 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
 			 * succeeds in this zone.
 			 */
 			compaction_defer_reset(zone, order, false);
-			break;
-		} else if (mode != MIGRATE_ASYNC) {
+			/*
+			 * It is possible that async compaction aborted due to
+			 * need_resched() and the watermarks were ok thanks to
+			 * somebody else freeing memory. The allocation can
+			 * however still fail so we better signal the
+			 * need_resched() contention anyway (this will not
+			 * prevent the allocation attempt).
+			 */
+			if (zone_contended == COMPACT_CONTENDED_SCHED)
+				*contended = COMPACT_CONTENDED_SCHED;
+
+			goto break_loop;
+		}
+
+		if (mode != MIGRATE_ASYNC) {
 			/*
 			 * We think that allocation won't succeed in this zone
 			 * so we defer compaction there. If it ends up
@@ -1230,8 +1267,36 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
 			 */
 			defer_compaction(zone, order);
 		}
+
+		/*
+		 * We might have stopped compacting due to need_resched() in
+		 * async compaction, or due to a fatal signal detected. In that
+		 * case do not try further zones and signal need_resched()
+		 * contention.
+		 */
+		if ((zone_contended == COMPACT_CONTENDED_SCHED)
+					|| fatal_signal_pending(current)) {
+			*contended = COMPACT_CONTENDED_SCHED;
+			goto break_loop;
+		}
+
+		continue;
+break_loop:
+		/*
+		 * We might not have tried all the zones, so  be conservative
+		 * and assume they are not all lock contended.
+		 */
+		all_zones_contended = 0;
+		break;
 	}
 
+	/*
+	 * If at least one zone wasn't deferred or skipped, we report if all
+	 * zones that were tried were lock contended.
+	 */
+	if (rc > COMPACT_SKIPPED && all_zones_contended)
+		*contended = COMPACT_CONTENDED_LOCK;
+
 	return rc;
 }
 
diff --git a/mm/internal.h b/mm/internal.h
index 5a0738fa649c..4c1d604c396c 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -144,8 +144,8 @@ struct compact_control {
 	int order;			/* order a direct compactor needs */
 	int migratetype;		/* MOVABLE, RECLAIMABLE etc */
 	struct zone *zone;
-	bool contended;			/* True if a lock was contended, or
-					 * need_resched() true during async
+	int contended;			/* Signal need_sched() or lock
+					 * contention detected during
 					 * compaction
 					 */
 };
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index dfbf54b51649..313338d74095 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2297,7 +2297,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
 	struct zonelist *zonelist, enum zone_type high_zoneidx,
 	nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
 	int classzone_idx, int migratetype, enum migrate_mode mode,
-	bool *contended_compaction, bool *deferred_compaction)
+	int *contended_compaction, bool *deferred_compaction)
 {
 	struct zone *last_compact_zone = NULL;
 	unsigned long compact_result;
@@ -2371,7 +2371,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
 	struct zonelist *zonelist, enum zone_type high_zoneidx,
 	nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
 	int classzone_idx, int migratetype, enum migrate_mode mode,
-	bool *contended_compaction, bool *deferred_compaction)
+	int *contended_compaction, bool *deferred_compaction)
 {
 	return NULL;
 }
@@ -2547,7 +2547,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 	unsigned long did_some_progress;
 	enum migrate_mode migration_mode = MIGRATE_ASYNC;
 	bool deferred_compaction = false;
-	bool contended_compaction = false;
+	int contended_compaction = COMPACT_CONTENDED_NONE;
 
 	/*
 	 * In the slowpath, we sanity check order to avoid ever trying to
@@ -2651,15 +2651,36 @@ rebalance:
 	if (page)
 		goto got_pg;
 
-	/*
-	 * If compaction is deferred for high-order allocations, it is because
-	 * sync compaction recently failed. In this is the case and the caller
-	 * requested a movable allocation that does not heavily disrupt the
-	 * system then fail the allocation instead of entering direct reclaim.
-	 */
-	if ((deferred_compaction || contended_compaction) &&
-						(gfp_mask & __GFP_NO_KSWAPD))
-		goto nopage;
+	/* Checks for THP-specific high-order allocations */
+	if ((gfp_mask & GFP_TRANSHUGE) == GFP_TRANSHUGE) {
+		/*
+		 * If compaction is deferred for high-order allocations, it is
+		 * because sync compaction recently failed. If this is the case
+		 * and the caller requested a THP allocation, we do not want
+		 * to heavily disrupt the system, so we fail the allocation
+		 * instead of entering direct reclaim.
+		 */
+		if (deferred_compaction)
+			goto nopage;
+
+		/*
+		 * In all zones where compaction was attempted (and not
+		 * deferred or skipped), lock contention has been detected.
+		 * For THP allocation we do not want to disrupt the others
+		 * so we fallback to base pages instead.
+		 */
+		if (contended_compaction == COMPACT_CONTENDED_LOCK)
+			goto nopage;
+
+		/*
+		 * If compaction was aborted due to need_resched(), we do not
+		 * want to further increase allocation latency, unless it is
+		 * khugepaged trying to collapse.
+		 */
+		if (contended_compaction == COMPACT_CONTENDED_SCHED
+			&& !(current->flags & PF_KTHREAD))
+			goto nopage;
+	}
 
 	/*
 	 * It can become very expensive to allocate transparent hugepages at
-- 
cgit v1.2.3


From 43e7a34d265e884b7cf34f9b05e6f2e0c05bf120 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Thu, 9 Oct 2014 15:27:25 -0700
Subject: mm: rename allocflags_to_migratetype for clarity

The page allocator has gfp flags (like __GFP_WAIT) and alloc flags (like
ALLOC_CPUSET) that have separate semantics.

The function allocflags_to_migratetype() actually takes gfp flags, not
alloc flags, and returns a migratetype.  Rename it to
gfpflags_to_migratetype().

Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Acked-by: Minchan Kim <minchan@kernel.org>
Acked-by: Mel Gorman <mgorman@suse.de>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Michal Nazarewicz <mina86@mina86.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h | 2 +-
 mm/compaction.c     | 4 ++--
 mm/page_alloc.c     | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 5e7219dc0fae..41b30fd4d041 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -156,7 +156,7 @@ struct vm_area_struct;
 #define GFP_DMA32	__GFP_DMA32
 
 /* Convert GFP flags to their corresponding migrate type */
-static inline int allocflags_to_migratetype(gfp_t gfp_flags)
+static inline int gfpflags_to_migratetype(const gfp_t gfp_flags)
 {
 	WARN_ON((gfp_flags & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK);
 
diff --git a/mm/compaction.c b/mm/compaction.c
index b9cf751cc00e..7c687c0eef6e 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1242,7 +1242,7 @@ static unsigned long compact_zone_order(struct zone *zone, int order,
 		.nr_freepages = 0,
 		.nr_migratepages = 0,
 		.order = order,
-		.migratetype = allocflags_to_migratetype(gfp_mask),
+		.migratetype = gfpflags_to_migratetype(gfp_mask),
 		.zone = zone,
 		.mode = mode,
 	};
@@ -1294,7 +1294,7 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
 		return COMPACT_SKIPPED;
 
 #ifdef CONFIG_CMA
-	if (allocflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE)
+	if (gfpflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE)
 		alloc_flags |= ALLOC_CMA;
 #endif
 	/* Compact each zone in the list */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 313338d74095..f07588b11d59 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2523,7 +2523,7 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
 			alloc_flags |= ALLOC_NO_WATERMARKS;
 	}
 #ifdef CONFIG_CMA
-	if (allocflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE)
+	if (gfpflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE)
 		alloc_flags |= ALLOC_CMA;
 #endif
 	return alloc_flags;
@@ -2786,7 +2786,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
 	struct zone *preferred_zone;
 	struct zoneref *preferred_zoneref;
 	struct page *page = NULL;
-	int migratetype = allocflags_to_migratetype(gfp_mask);
+	int migratetype = gfpflags_to_migratetype(gfp_mask);
 	unsigned int cpuset_mems_cookie;
 	int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET|ALLOC_FAIR;
 	int classzone_idx;
-- 
cgit v1.2.3


From 9c5990240e076ae564cccbd921868cd08f6daaa5 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@openvz.org>
Date: Thu, 9 Oct 2014 15:27:29 -0700
Subject: mm: introduce check_data_rlimit helper

To eliminate code duplication lets introduce check_data_rlimit helper
which we will use in brk() and prctl() syscalls.

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Andrew Vagin <avagin@openvz.org>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Vasiliy Kulikov <segoon@openwall.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Julien Tinnes <jln@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 28df70774b81..4d814aa97785 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -18,6 +18,7 @@
 #include <linux/pfn.h>
 #include <linux/bit_spinlock.h>
 #include <linux/shrinker.h>
+#include <linux/resource.h>
 
 struct mempolicy;
 struct anon_vma;
@@ -1780,6 +1781,20 @@ extern struct vm_area_struct *copy_vma(struct vm_area_struct **,
 	bool *need_rmap_locks);
 extern void exit_mmap(struct mm_struct *);
 
+static inline int check_data_rlimit(unsigned long rlim,
+				    unsigned long new,
+				    unsigned long start,
+				    unsigned long end_data,
+				    unsigned long start_data)
+{
+	if (rlim < RLIM_INFINITY) {
+		if (((new - start) + (end_data - start_data)) > rlim)
+			return -ENOSPC;
+	}
+
+	return 0;
+}
+
 extern int mm_take_all_locks(struct mm_struct *mm);
 extern void mm_drop_all_locks(struct mm_struct *mm);
 
-- 
cgit v1.2.3


From 1f13ae399c58af5a05b5cee61da864e1f4071de4 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Thu, 9 Oct 2014 15:27:39 -0700
Subject: mm: remove noisy remainder of the scan_unevictable interface

The deprecation warnings for the scan_unevictable interface triggers by
scripts doing `sysctl -a | grep something else'.  This is annoying and not
helpful.

The interface has been defunct since 264e56d8247e ("mm: disable user
interface to manually rescue unevictable pages"), which was in 2011, and
there haven't been any reports of usecases for it, only reports that the
deprecation warnings are annying.  It's unlikely that anybody is using
this interface specifically at this point, so remove it.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/ABI/stable/sysfs-devices-node |  8 ----
 drivers/base/node.c                         |  3 --
 include/linux/swap.h                        | 16 --------
 kernel/sysctl.c                             |  7 ----
 mm/vmscan.c                                 | 63 -----------------------------
 5 files changed, 97 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/ABI/stable/sysfs-devices-node b/Documentation/ABI/stable/sysfs-devices-node
index ce259c13c36a..5b2d0f08867c 100644
--- a/Documentation/ABI/stable/sysfs-devices-node
+++ b/Documentation/ABI/stable/sysfs-devices-node
@@ -85,14 +85,6 @@ Description:
 		will be compacted. When it completes, memory will be freed
 		into blocks which have as many contiguous pages as possible
 
-What:		/sys/devices/system/node/nodeX/scan_unevictable_pages
-Date:		October 2008
-Contact:	Lee Schermerhorn <lee.schermerhorn@hp.com>
-Description:
-		When set, it triggers scanning the node's unevictable lists
-		and move any pages that have become evictable onto the respective
-		zone's inactive list. See mm/vmscan.c
-
 What:		/sys/devices/system/node/nodeX/hugepages/hugepages-<size>/
 Date:		December 2009
 Contact:	Lee Schermerhorn <lee.schermerhorn@hp.com>
diff --git a/drivers/base/node.c b/drivers/base/node.c
index d51c49c9bafa..472168cd0c97 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -289,8 +289,6 @@ static int register_node(struct node *node, int num, struct node *parent)
 		device_create_file(&node->dev, &dev_attr_distance);
 		device_create_file(&node->dev, &dev_attr_vmstat);
 
-		scan_unevictable_register_node(node);
-
 		hugetlb_register_node(node);
 
 		compaction_register_node(node);
@@ -314,7 +312,6 @@ void unregister_node(struct node *node)
 	device_remove_file(&node->dev, &dev_attr_distance);
 	device_remove_file(&node->dev, &dev_attr_vmstat);
 
-	scan_unevictable_unregister_node(node);
 	hugetlb_unregister_node(node);		/* no-op, if memoryless node */
 
 	device_unregister(&node->dev);
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 1b72060f093a..ea4f926e6b9b 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -354,22 +354,6 @@ static inline int zone_reclaim(struct zone *z, gfp_t mask, unsigned int order)
 extern int page_evictable(struct page *page);
 extern void check_move_unevictable_pages(struct page **, int nr_pages);
 
-extern unsigned long scan_unevictable_pages;
-extern int scan_unevictable_handler(struct ctl_table *, int,
-					void __user *, size_t *, loff_t *);
-#ifdef CONFIG_NUMA
-extern int scan_unevictable_register_node(struct node *node);
-extern void scan_unevictable_unregister_node(struct node *node);
-#else
-static inline int scan_unevictable_register_node(struct node *node)
-{
-	return 0;
-}
-static inline void scan_unevictable_unregister_node(struct node *node)
-{
-}
-#endif
-
 extern int kswapd_run(int nid);
 extern void kswapd_stop(int nid);
 #ifdef CONFIG_MEMCG
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 75875a741b5e..91180987e40e 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1460,13 +1460,6 @@ static struct ctl_table vm_table[] = {
 		.extra2		= &one,
 	},
 #endif
-	{
-		.procname	= "scan_unevictable_pages",
-		.data		= &scan_unevictable_pages,
-		.maxlen		= sizeof(scan_unevictable_pages),
-		.mode		= 0644,
-		.proc_handler	= scan_unevictable_handler,
-	},
 #ifdef CONFIG_MEMORY_FAILURE
 	{
 		.procname	= "memory_failure_early_kill",
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 1a71b8b1ea34..af72fe8e8d74 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3797,66 +3797,3 @@ void check_move_unevictable_pages(struct page **pages, int nr_pages)
 	}
 }
 #endif /* CONFIG_SHMEM */
-
-static void warn_scan_unevictable_pages(void)
-{
-	printk_once(KERN_WARNING
-		    "%s: The scan_unevictable_pages sysctl/node-interface has been "
-		    "disabled for lack of a legitimate use case.  If you have "
-		    "one, please send an email to linux-mm@kvack.org.\n",
-		    current->comm);
-}
-
-/*
- * scan_unevictable_pages [vm] sysctl handler.  On demand re-scan of
- * all nodes' unevictable lists for evictable pages
- */
-unsigned long scan_unevictable_pages;
-
-int scan_unevictable_handler(struct ctl_table *table, int write,
-			   void __user *buffer,
-			   size_t *length, loff_t *ppos)
-{
-	warn_scan_unevictable_pages();
-	proc_doulongvec_minmax(table, write, buffer, length, ppos);
-	scan_unevictable_pages = 0;
-	return 0;
-}
-
-#ifdef CONFIG_NUMA
-/*
- * per node 'scan_unevictable_pages' attribute.  On demand re-scan of
- * a specified node's per zone unevictable lists for evictable pages.
- */
-
-static ssize_t read_scan_unevictable_node(struct device *dev,
-					  struct device_attribute *attr,
-					  char *buf)
-{
-	warn_scan_unevictable_pages();
-	return sprintf(buf, "0\n");	/* always zero; should fit... */
-}
-
-static ssize_t write_scan_unevictable_node(struct device *dev,
-					   struct device_attribute *attr,
-					const char *buf, size_t count)
-{
-	warn_scan_unevictable_pages();
-	return 1;
-}
-
-
-static DEVICE_ATTR(scan_unevictable_pages, S_IRUGO | S_IWUSR,
-			read_scan_unevictable_node,
-			write_scan_unevictable_node);
-
-int scan_unevictable_register_node(struct node *node)
-{
-	return device_create_file(&node->dev, &dev_attr_scan_unevictable_pages);
-}
-
-void scan_unevictable_unregister_node(struct node *node)
-{
-	device_remove_file(&node->dev, &dev_attr_scan_unevictable_pages);
-}
-#endif
-- 
cgit v1.2.3


From 6b6482bbf64ef6f6dbc8b52f7a7cf88a0498bd51 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Thu, 9 Oct 2014 15:27:48 -0700
Subject: mempolicy: remove the "task" arg of vma_policy_mof() and simplify it

1. vma_policy_mof(task) is simply not safe unless task == current,
   it can race with do_exit()->mpol_put(). Remove this arg and update
   its single caller.

2. vma can not be NULL, remove this check and simplify the code.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: David Rientjes <rientjes@google.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mempolicy.h |  2 +-
 kernel/sched/fair.c       |  2 +-
 mm/mempolicy.c            | 25 +++++++++++--------------
 3 files changed, 13 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index f230a978e6ba..5e4bfcedd2ce 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -136,7 +136,7 @@ struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp,
 
 struct mempolicy *get_vma_policy(struct task_struct *tsk,
 		struct vm_area_struct *vma, unsigned long addr);
-bool vma_policy_mof(struct task_struct *task, struct vm_area_struct *vma);
+bool vma_policy_mof(struct vm_area_struct *vma);
 
 extern void numa_default_policy(void);
 extern void numa_policy_init(void);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index bfa3c86d0d68..82088b29704e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1946,7 +1946,7 @@ void task_numa_work(struct callback_head *work)
 		vma = mm->mmap;
 	}
 	for (; vma; vma = vma->vm_next) {
-		if (!vma_migratable(vma) || !vma_policy_mof(p, vma))
+		if (!vma_migratable(vma) || !vma_policy_mof(vma))
 			continue;
 
 		/*
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index b86b08e77b8d..ad27bbc757bf 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1646,27 +1646,24 @@ struct mempolicy *get_vma_policy(struct task_struct *task,
 	return pol;
 }
 
-bool vma_policy_mof(struct task_struct *task, struct vm_area_struct *vma)
+bool vma_policy_mof(struct vm_area_struct *vma)
 {
-	struct mempolicy *pol = NULL;
-
-	if (vma) {
-		if (vma->vm_ops && vma->vm_ops->get_policy) {
-			bool ret = false;
+	struct mempolicy *pol;
 
-			pol = vma->vm_ops->get_policy(vma, vma->vm_start);
-			if (pol && (pol->flags & MPOL_F_MOF))
-				ret = true;
-			mpol_cond_put(pol);
+	if (vma->vm_ops && vma->vm_ops->get_policy) {
+		bool ret = false;
 
-			return ret;
-		}
+		pol = vma->vm_ops->get_policy(vma, vma->vm_start);
+		if (pol && (pol->flags & MPOL_F_MOF))
+			ret = true;
+		mpol_cond_put(pol);
 
-		pol = vma->vm_policy;
+		return ret;
 	}
 
+	pol = vma->vm_policy;
 	if (!pol)
-		pol = get_task_policy(task);
+		pol = get_task_policy(current);
 
 	return pol->flags & MPOL_F_MOF;
 }
-- 
cgit v1.2.3


From 74d2c3a05cc6c1eef2d7236a9919036ed85ddaaf Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Thu, 9 Oct 2014 15:27:50 -0700
Subject: mempolicy: introduce __get_vma_policy(), export get_task_policy()

Extract the code which looks for vma's policy from get_vma_policy()
into the new helper, __get_vma_policy(). Export get_task_policy().

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: David Rientjes <rientjes@google.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mempolicy.h |  3 +++
 mm/mempolicy.c            | 44 ++++++++++++++++++++++++++------------------
 2 files changed, 29 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 5e4bfcedd2ce..e1abe249892a 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -134,6 +134,9 @@ void mpol_free_shared_policy(struct shared_policy *p);
 struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp,
 					    unsigned long idx);
 
+struct mempolicy *get_task_policy(struct task_struct *p);
+struct mempolicy *__get_vma_policy(struct vm_area_struct *vma,
+		unsigned long addr);
 struct mempolicy *get_vma_policy(struct task_struct *tsk,
 		struct vm_area_struct *vma, unsigned long addr);
 bool vma_policy_mof(struct vm_area_struct *vma);
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index ad27bbc757bf..4378c334e89b 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -123,7 +123,7 @@ static struct mempolicy default_policy = {
 
 static struct mempolicy preferred_node_policy[MAX_NUMNODES];
 
-static struct mempolicy *get_task_policy(struct task_struct *p)
+struct mempolicy *get_task_policy(struct task_struct *p)
 {
 	struct mempolicy *pol = p->mempolicy;
 	int node;
@@ -1603,23 +1603,8 @@ COMPAT_SYSCALL_DEFINE6(mbind, compat_ulong_t, start, compat_ulong_t, len,
 
 #endif
 
-/*
- * get_vma_policy(@task, @vma, @addr)
- * @task: task for fallback if vma policy == default
- * @vma: virtual memory area whose policy is sought
- * @addr: address in @vma for shared policy lookup
- *
- * Returns effective policy for a VMA at specified address.
- * Falls back to @task or system default policy, as necessary.
- * Current or other task's task mempolicy and non-shared vma policies must be
- * protected by task_lock(task) by the caller.
- * Shared policies [those marked as MPOL_F_SHARED] require an extra reference
- * count--added by the get_policy() vm_op, as appropriate--to protect against
- * freeing by another task.  It is the caller's responsibility to free the
- * extra reference for shared policies.
- */
-struct mempolicy *get_vma_policy(struct task_struct *task,
-		struct vm_area_struct *vma, unsigned long addr)
+struct mempolicy *__get_vma_policy(struct vm_area_struct *vma,
+						unsigned long addr)
 {
 	struct mempolicy *pol = NULL;
 
@@ -1640,6 +1625,29 @@ struct mempolicy *get_vma_policy(struct task_struct *task,
 		}
 	}
 
+	return pol;
+}
+
+/*
+ * get_vma_policy(@task, @vma, @addr)
+ * @task: task for fallback if vma policy == default
+ * @vma: virtual memory area whose policy is sought
+ * @addr: address in @vma for shared policy lookup
+ *
+ * Returns effective policy for a VMA at specified address.
+ * Falls back to @task or system default policy, as necessary.
+ * Current or other task's task mempolicy and non-shared vma policies must be
+ * protected by task_lock(task) by the caller.
+ * Shared policies [those marked as MPOL_F_SHARED] require an extra reference
+ * count--added by the get_policy() vm_op, as appropriate--to protect against
+ * freeing by another task.  It is the caller's responsibility to free the
+ * extra reference for shared policies.
+ */
+struct mempolicy *get_vma_policy(struct task_struct *task,
+		struct vm_area_struct *vma, unsigned long addr)
+{
+	struct mempolicy *pol = __get_vma_policy(vma, addr);
+
 	if (!pol)
 		pol = get_task_policy(task);
 
-- 
cgit v1.2.3


From dd6eecb917938c1b7e505a83df307b3476e7c8bd Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Thu, 9 Oct 2014 15:27:57 -0700
Subject: mempolicy: unexport get_vma_policy() and remove its "task" arg

- get_vma_policy(task) is not safe if task != current, remove this
  argument.

- get_vma_policy() no longer has callers outside of mempolicy.c,
  make it static.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: David Rientjes <rientjes@google.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mempolicy.h |  2 --
 mm/mempolicy.c            | 19 ++++++++-----------
 2 files changed, 8 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index e1abe249892a..3d385c81c153 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -137,8 +137,6 @@ struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp,
 struct mempolicy *get_task_policy(struct task_struct *p);
 struct mempolicy *__get_vma_policy(struct vm_area_struct *vma,
 		unsigned long addr);
-struct mempolicy *get_vma_policy(struct task_struct *tsk,
-		struct vm_area_struct *vma, unsigned long addr);
 bool vma_policy_mof(struct vm_area_struct *vma);
 
 extern void numa_default_policy(void);
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 9695a9a3ab90..008fb32936eb 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1616,27 +1616,24 @@ struct mempolicy *__get_vma_policy(struct vm_area_struct *vma,
 }
 
 /*
- * get_vma_policy(@task, @vma, @addr)
- * @task: task for fallback if vma policy == default
+ * get_vma_policy(@vma, @addr)
  * @vma: virtual memory area whose policy is sought
  * @addr: address in @vma for shared policy lookup
  *
  * Returns effective policy for a VMA at specified address.
- * Falls back to @task or system default policy, as necessary.
- * Current or other task's task mempolicy and non-shared vma policies must be
- * protected by task_lock(task) by the caller.
+ * Falls back to current->mempolicy or system default policy, as necessary.
  * Shared policies [those marked as MPOL_F_SHARED] require an extra reference
  * count--added by the get_policy() vm_op, as appropriate--to protect against
  * freeing by another task.  It is the caller's responsibility to free the
  * extra reference for shared policies.
  */
-struct mempolicy *get_vma_policy(struct task_struct *task,
-		struct vm_area_struct *vma, unsigned long addr)
+static struct mempolicy *get_vma_policy(struct vm_area_struct *vma,
+						unsigned long addr)
 {
 	struct mempolicy *pol = __get_vma_policy(vma, addr);
 
 	if (!pol)
-		pol = get_task_policy(task);
+		pol = get_task_policy(current);
 
 	return pol;
 }
@@ -1864,7 +1861,7 @@ struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr,
 {
 	struct zonelist *zl;
 
-	*mpol = get_vma_policy(current, vma, addr);
+	*mpol = get_vma_policy(vma, addr);
 	*nodemask = NULL;	/* assume !MPOL_BIND */
 
 	if (unlikely((*mpol)->mode == MPOL_INTERLEAVE)) {
@@ -2019,7 +2016,7 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
 	unsigned int cpuset_mems_cookie;
 
 retry_cpuset:
-	pol = get_vma_policy(current, vma, addr);
+	pol = get_vma_policy(vma, addr);
 	cpuset_mems_cookie = read_mems_allowed_begin();
 
 	if (unlikely(pol->mode == MPOL_INTERLEAVE)) {
@@ -2285,7 +2282,7 @@ int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long
 
 	BUG_ON(!vma);
 
-	pol = get_vma_policy(current, vma, addr);
+	pol = get_vma_policy(vma, addr);
 	if (!(pol->flags & MPOL_F_MOF))
 		goto out;
 
-- 
cgit v1.2.3


From 1c93923cc264105418e6ead149c76bd88302eff4 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Thu, 9 Oct 2014 15:27:59 -0700
Subject: include/linux/migrate.h: remove migrate_page #define

This is designed to avoid a few ifdefs in .c files but it's obnoxious
because it can cause unsuspecting "migrate_page" symbols to get turned into
"NULL".

Just nuke it and use the ifdefs.

Cc: Konstantin Khlebnikov <k.khlebnikov@samsung.com>
Cc: Rafael Aquini <aquini@redhat.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/migrate.h | 3 ---
 mm/shmem.c              | 2 ++
 mm/swap_state.c         | 2 ++
 3 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index a2901c414664..b66fd10f4b93 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -82,9 +82,6 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping,
 	return -ENOSYS;
 }
 
-/* Possible settings for the migrate_page() method in address_operations */
-#define migrate_page NULL
-
 #endif /* CONFIG_MIGRATION */
 
 #ifdef CONFIG_NUMA_BALANCING
diff --git a/mm/shmem.c b/mm/shmem.c
index 469f90d56051..4fad61bb41e5 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -3077,7 +3077,9 @@ static const struct address_space_operations shmem_aops = {
 	.write_begin	= shmem_write_begin,
 	.write_end	= shmem_write_end,
 #endif
+#ifdef CONFIG_MIGRATION
 	.migratepage	= migrate_page,
+#endif
 	.error_remove_page = generic_error_remove_page,
 };
 
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 3e0ec83d000c..ef1f39139b71 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -28,7 +28,9 @@
 static const struct address_space_operations swap_aops = {
 	.writepage	= swap_writepage,
 	.set_page_dirty	= swap_set_page_dirty,
+#ifdef CONFIG_MIGRATION
 	.migratepage	= migrate_page,
+#endif
 };
 
 static struct backing_dev_info swap_backing_dev_info = {
-- 
cgit v1.2.3


From 0bf55139782db1fa96af66e37cc84afde18443ef Mon Sep 17 00:00:00 2001
From: Sasha Levin <sasha.levin@oracle.com>
Date: Thu, 9 Oct 2014 15:28:06 -0700
Subject: mm: introduce dump_vma

Introduce a helper to dump information about a VMA, this also makes
dump_page_flags more generic and re-uses that so the output looks very
similar to dump_page:

[   61.903437] vma ffff88070f88be00 start 00007fff25970000 end 00007fff25992000
[   61.903437] next ffff88070facd600 prev ffff88070face400 mm ffff88070fade000
[   61.903437] prot 8000000000000025 anon_vma ffff88070fa1e200 vm_ops           (null)
[   61.903437] pgoff 7ffffffdd file           (null) private_data           (null)
[   61.909129] flags: 0x100173(read|write|mayread|maywrite|mayexec|growsdown|account)

[akpm@linux-foundation.org: make dump_vma() require CONFIG_DEBUG_VM]
[swarren@nvidia.com: fix dump_vma() compilation]
Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Konstantin Khlebnikov <khlebnikov@openvz.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Hugh Dickins <hughd@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Michel Lespinasse <walken@google.com>
Cc: Minchan Kim <minchan@kernel.org>
Signed-off-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmdebug.h |  2 ++
 mm/page_alloc.c         | 82 +++++++++++++++++++++++++++++++++++++++++++------
 2 files changed, 75 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
index 2f348d02f640..dfb93333fc62 100644
--- a/include/linux/mmdebug.h
+++ b/include/linux/mmdebug.h
@@ -4,10 +4,12 @@
 #include <linux/stringify.h>
 
 struct page;
+struct vm_area_struct;
 
 extern void dump_page(struct page *page, const char *reason);
 extern void dump_page_badflags(struct page *page, const char *reason,
 			       unsigned long badflags);
+void dump_vma(const struct vm_area_struct *vma);
 
 #ifdef CONFIG_DEBUG_VM
 #define VM_BUG_ON(cond) BUG_ON(cond)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index f07588b11d59..3a950144f80b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6626,27 +6626,26 @@ static const struct trace_print_flags pageflag_names[] = {
 #endif
 };
 
-static void dump_page_flags(unsigned long flags)
+static void dump_flags(unsigned long flags,
+			const struct trace_print_flags *names, int count)
 {
 	const char *delim = "";
 	unsigned long mask;
 	int i;
 
-	BUILD_BUG_ON(ARRAY_SIZE(pageflag_names) != __NR_PAGEFLAGS);
-
-	printk(KERN_ALERT "page flags: %#lx(", flags);
+	printk(KERN_ALERT "flags: %#lx(", flags);
 
 	/* remove zone id */
 	flags &= (1UL << NR_PAGEFLAGS) - 1;
 
-	for (i = 0; i < ARRAY_SIZE(pageflag_names) && flags; i++) {
+	for (i = 0; i < count && flags; i++) {
 
-		mask = pageflag_names[i].mask;
+		mask = names[i].mask;
 		if ((flags & mask) != mask)
 			continue;
 
 		flags &= ~mask;
-		printk("%s%s", delim, pageflag_names[i].name);
+		printk("%s%s", delim, names[i].name);
 		delim = "|";
 	}
 
@@ -6664,12 +6663,14 @@ void dump_page_badflags(struct page *page, const char *reason,
 	       "page:%p count:%d mapcount:%d mapping:%p index:%#lx\n",
 		page, atomic_read(&page->_count), page_mapcount(page),
 		page->mapping, page->index);
-	dump_page_flags(page->flags);
+	BUILD_BUG_ON(ARRAY_SIZE(pageflag_names) != __NR_PAGEFLAGS);
+	dump_flags(page->flags, pageflag_names, ARRAY_SIZE(pageflag_names));
 	if (reason)
 		pr_alert("page dumped because: %s\n", reason);
 	if (page->flags & badflags) {
 		pr_alert("bad because of flags:\n");
-		dump_page_flags(page->flags & badflags);
+		dump_flags(page->flags & badflags,
+				pageflag_names, ARRAY_SIZE(pageflag_names));
 	}
 	mem_cgroup_print_bad_page(page);
 }
@@ -6679,3 +6680,66 @@ void dump_page(struct page *page, const char *reason)
 	dump_page_badflags(page, reason, 0);
 }
 EXPORT_SYMBOL(dump_page);
+
+#ifdef CONFIG_DEBUG_VM
+
+static const struct trace_print_flags vmaflags_names[] = {
+	{VM_READ,			"read"		},
+	{VM_WRITE,			"write"		},
+	{VM_EXEC,			"exec"		},
+	{VM_SHARED,			"shared"	},
+	{VM_MAYREAD,			"mayread"	},
+	{VM_MAYWRITE,			"maywrite"	},
+	{VM_MAYEXEC,			"mayexec"	},
+	{VM_MAYSHARE,			"mayshare"	},
+	{VM_GROWSDOWN,			"growsdown"	},
+	{VM_PFNMAP,			"pfnmap"	},
+	{VM_DENYWRITE,			"denywrite"	},
+	{VM_LOCKED,			"locked"	},
+	{VM_IO,				"io"		},
+	{VM_SEQ_READ,			"seqread"	},
+	{VM_RAND_READ,			"randread"	},
+	{VM_DONTCOPY,			"dontcopy"	},
+	{VM_DONTEXPAND,			"dontexpand"	},
+	{VM_ACCOUNT,			"account"	},
+	{VM_NORESERVE,			"noreserve"	},
+	{VM_HUGETLB,			"hugetlb"	},
+	{VM_NONLINEAR,			"nonlinear"	},
+#if defined(CONFIG_X86)
+	{VM_PAT,			"pat"		},
+#elif defined(CONFIG_PPC)
+	{VM_SAO,			"sao"		},
+#elif defined(CONFIG_PARISC) || defined(CONFIG_METAG) || defined(CONFIG_IA64)
+	{VM_GROWSUP,			"growsup"	},
+#elif !defined(CONFIG_MMU)
+	{VM_MAPPED_COPY,		"mappedcopy"	},
+#else
+	{VM_ARCH_1,			"arch_1"	},
+#endif
+	{VM_DONTDUMP,			"dontdump"	},
+#ifdef CONFIG_MEM_SOFT_DIRTY
+	{VM_SOFTDIRTY,			"softdirty"	},
+#endif
+	{VM_MIXEDMAP,			"mixedmap"	},
+	{VM_HUGEPAGE,			"hugepage"	},
+	{VM_NOHUGEPAGE,			"nohugepage"	},
+	{VM_MERGEABLE,			"mergeable"	},
+};
+
+void dump_vma(const struct vm_area_struct *vma)
+{
+	printk(KERN_ALERT
+		"vma %p start %p end %p\n"
+		"next %p prev %p mm %p\n"
+		"prot %lx anon_vma %p vm_ops %p\n"
+		"pgoff %lx file %p private_data %p\n",
+		vma, (void *)vma->vm_start, (void *)vma->vm_end, vma->vm_next,
+		vma->vm_prev, vma->vm_mm,
+		(unsigned long)pgprot_val(vma->vm_page_prot),
+		vma->anon_vma, vma->vm_ops, vma->vm_pgoff,
+		vma->vm_file, vma->vm_private_data);
+	dump_flags(vma->vm_flags, vmaflags_names, ARRAY_SIZE(vmaflags_names));
+}
+EXPORT_SYMBOL(dump_vma);
+
+#endif		/* CONFIG_DEBUG_VM */
-- 
cgit v1.2.3


From fa3759ccd5651c4235f572302d58c8ec9ddf1c4b Mon Sep 17 00:00:00 2001
From: Sasha Levin <sasha.levin@oracle.com>
Date: Thu, 9 Oct 2014 15:28:08 -0700
Subject: mm: introduce VM_BUG_ON_VMA

Very similar to VM_BUG_ON_PAGE but dumps VMA information instead.

Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Konstantin Khlebnikov <khlebnikov@openvz.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Hugh Dickins <hughd@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Michel Lespinasse <walken@google.com>
Cc: Minchan Kim <minchan@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmdebug.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
index dfb93333fc62..569e4c8d0ebb 100644
--- a/include/linux/mmdebug.h
+++ b/include/linux/mmdebug.h
@@ -20,12 +20,20 @@ void dump_vma(const struct vm_area_struct *vma);
 			BUG();						\
 		}							\
 	} while (0)
+#define VM_BUG_ON_VMA(cond, vma)					\
+	do {								\
+		if (unlikely(cond)) {					\
+			dump_vma(vma);					\
+			BUG();						\
+		}							\
+	} while (0)
 #define VM_WARN_ON(cond) WARN_ON(cond)
 #define VM_WARN_ON_ONCE(cond) WARN_ON_ONCE(cond)
 #define VM_WARN_ONCE(cond, format...) WARN_ONCE(cond, format)
 #else
 #define VM_BUG_ON(cond) BUILD_BUG_ON_INVALID(cond)
 #define VM_BUG_ON_PAGE(cond, page) VM_BUG_ON(cond)
+#define VM_BUG_ON_VMA(cond, vma) VM_BUG_ON(cond)
 #define VM_WARN_ON(cond) BUILD_BUG_ON_INVALID(cond)
 #define VM_WARN_ON_ONCE(cond) BUILD_BUG_ON_INVALID(cond)
 #define VM_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond)
-- 
cgit v1.2.3


From 81d1b09c6be66afac7d41ee52279d9bccbce56d8 Mon Sep 17 00:00:00 2001
From: Sasha Levin <sasha.levin@oracle.com>
Date: Thu, 9 Oct 2014 15:28:10 -0700
Subject: mm: convert a few VM_BUG_ON callers to VM_BUG_ON_VMA

Trivially convert a few VM_BUG_ON calls to VM_BUG_ON_VMA to extract
more information when they trigger.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Konstantin Khlebnikov <khlebnikov@openvz.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Hugh Dickins <hughd@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Michel Lespinasse <walken@google.com>
Cc: Minchan Kim <minchan@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/huge_mm.h |  2 +-
 include/linux/rmap.h    |  2 +-
 mm/huge_memory.c        |  6 +++---
 mm/hugetlb.c            | 14 +++++++-------
 mm/interval_tree.c      |  2 +-
 mm/mlock.c              |  4 ++--
 mm/mmap.c               |  6 +++---
 mm/mremap.c             |  3 ++-
 mm/rmap.c               |  8 ++++----
 9 files changed, 24 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 63579cb8d3dc..ad9051bab267 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -132,7 +132,7 @@ extern int __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
 static inline int pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
 		spinlock_t **ptl)
 {
-	VM_BUG_ON(!rwsem_is_locked(&vma->vm_mm->mmap_sem));
+	VM_BUG_ON_VMA(!rwsem_is_locked(&vma->vm_mm->mmap_sem), vma);
 	if (pmd_trans_huge(*pmd))
 		return __pmd_trans_huge_lock(pmd, vma, ptl);
 	else
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index be574506e6a9..c0c2bce6b0b7 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -150,7 +150,7 @@ int anon_vma_fork(struct vm_area_struct *, struct vm_area_struct *);
 static inline void anon_vma_merge(struct vm_area_struct *vma,
 				  struct vm_area_struct *next)
 {
-	VM_BUG_ON(vma->anon_vma != next->anon_vma);
+	VM_BUG_ON_VMA(vma->anon_vma != next->anon_vma, vma);
 	unlink_anon_vmas(next);
 }
 
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 55ab569c31b4..c13148cc745f 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1096,7 +1096,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	unsigned long mmun_end;		/* For mmu_notifiers */
 
 	ptl = pmd_lockptr(mm, pmd);
-	VM_BUG_ON(!vma->anon_vma);
+	VM_BUG_ON_VMA(!vma->anon_vma, vma);
 	haddr = address & HPAGE_PMD_MASK;
 	if (is_huge_zero_pmd(orig_pmd))
 		goto alloc;
@@ -2083,7 +2083,7 @@ int khugepaged_enter_vma_merge(struct vm_area_struct *vma)
 	if (vma->vm_ops)
 		/* khugepaged not yet working on file or special mappings */
 		return 0;
-	VM_BUG_ON(vma->vm_flags & VM_NO_THP);
+	VM_BUG_ON_VMA(vma->vm_flags & VM_NO_THP, vma);
 	hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
 	hend = vma->vm_end & HPAGE_PMD_MASK;
 	if (hstart < hend)
@@ -2406,7 +2406,7 @@ static bool hugepage_vma_check(struct vm_area_struct *vma)
 		return false;
 	if (is_vma_temporary_stack(vma))
 		return false;
-	VM_BUG_ON(vma->vm_flags & VM_NO_THP);
+	VM_BUG_ON_VMA(vma->vm_flags & VM_NO_THP, vma);
 	return true;
 }
 
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index eeceeeb09019..9fd722769927 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -434,7 +434,7 @@ static inline struct resv_map *inode_resv_map(struct inode *inode)
 
 static struct resv_map *vma_resv_map(struct vm_area_struct *vma)
 {
-	VM_BUG_ON(!is_vm_hugetlb_page(vma));
+	VM_BUG_ON_VMA(!is_vm_hugetlb_page(vma), vma);
 	if (vma->vm_flags & VM_MAYSHARE) {
 		struct address_space *mapping = vma->vm_file->f_mapping;
 		struct inode *inode = mapping->host;
@@ -449,8 +449,8 @@ static struct resv_map *vma_resv_map(struct vm_area_struct *vma)
 
 static void set_vma_resv_map(struct vm_area_struct *vma, struct resv_map *map)
 {
-	VM_BUG_ON(!is_vm_hugetlb_page(vma));
-	VM_BUG_ON(vma->vm_flags & VM_MAYSHARE);
+	VM_BUG_ON_VMA(!is_vm_hugetlb_page(vma), vma);
+	VM_BUG_ON_VMA(vma->vm_flags & VM_MAYSHARE, vma);
 
 	set_vma_private_data(vma, (get_vma_private_data(vma) &
 				HPAGE_RESV_MASK) | (unsigned long)map);
@@ -458,15 +458,15 @@ static void set_vma_resv_map(struct vm_area_struct *vma, struct resv_map *map)
 
 static void set_vma_resv_flags(struct vm_area_struct *vma, unsigned long flags)
 {
-	VM_BUG_ON(!is_vm_hugetlb_page(vma));
-	VM_BUG_ON(vma->vm_flags & VM_MAYSHARE);
+	VM_BUG_ON_VMA(!is_vm_hugetlb_page(vma), vma);
+	VM_BUG_ON_VMA(vma->vm_flags & VM_MAYSHARE, vma);
 
 	set_vma_private_data(vma, get_vma_private_data(vma) | flags);
 }
 
 static int is_vma_resv_set(struct vm_area_struct *vma, unsigned long flag)
 {
-	VM_BUG_ON(!is_vm_hugetlb_page(vma));
+	VM_BUG_ON_VMA(!is_vm_hugetlb_page(vma), vma);
 
 	return (get_vma_private_data(vma) & flag) != 0;
 }
@@ -474,7 +474,7 @@ static int is_vma_resv_set(struct vm_area_struct *vma, unsigned long flag)
 /* Reset counters to 0 and clear all HPAGE_RESV_* flags */
 void reset_vma_resv_huge_pages(struct vm_area_struct *vma)
 {
-	VM_BUG_ON(!is_vm_hugetlb_page(vma));
+	VM_BUG_ON_VMA(!is_vm_hugetlb_page(vma), vma);
 	if (!(vma->vm_flags & VM_MAYSHARE))
 		vma->vm_private_data = (void *)0;
 }
diff --git a/mm/interval_tree.c b/mm/interval_tree.c
index 4a5822a586e6..8da581fa9060 100644
--- a/mm/interval_tree.c
+++ b/mm/interval_tree.c
@@ -34,7 +34,7 @@ void vma_interval_tree_insert_after(struct vm_area_struct *node,
 	struct vm_area_struct *parent;
 	unsigned long last = vma_last_pgoff(node);
 
-	VM_BUG_ON(vma_start_pgoff(node) != vma_start_pgoff(prev));
+	VM_BUG_ON_VMA(vma_start_pgoff(node) != vma_start_pgoff(prev), node);
 
 	if (!prev->shared.linear.rb.rb_right) {
 		parent = prev;
diff --git a/mm/mlock.c b/mm/mlock.c
index ce84cb0b83ef..d5d09d0786ec 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -233,8 +233,8 @@ long __mlock_vma_pages_range(struct vm_area_struct *vma,
 
 	VM_BUG_ON(start & ~PAGE_MASK);
 	VM_BUG_ON(end   & ~PAGE_MASK);
-	VM_BUG_ON(start < vma->vm_start);
-	VM_BUG_ON(end   > vma->vm_end);
+	VM_BUG_ON_VMA(start < vma->vm_start, vma);
+	VM_BUG_ON_VMA(end   > vma->vm_end, vma);
 	VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
 
 	gup_flags = FOLL_TOUCH | FOLL_MLOCK;
diff --git a/mm/mmap.c b/mm/mmap.c
index 7ff38f1a66ec..69d4c5199fd8 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -786,8 +786,8 @@ again:			remove_next = 1 + (end > next->vm_end);
 	if (!anon_vma && adjust_next)
 		anon_vma = next->anon_vma;
 	if (anon_vma) {
-		VM_BUG_ON(adjust_next && next->anon_vma &&
-			  anon_vma != next->anon_vma);
+		VM_BUG_ON_VMA(adjust_next && next->anon_vma &&
+			  anon_vma != next->anon_vma, next);
 		anon_vma_lock_write(anon_vma);
 		anon_vma_interval_tree_pre_update_vma(vma);
 		if (adjust_next)
@@ -2848,7 +2848,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
 			 * safe. It is only safe to keep the vm_pgoff
 			 * linear if there are no pages mapped yet.
 			 */
-			VM_BUG_ON(faulted_in_anon_vma);
+			VM_BUG_ON_VMA(faulted_in_anon_vma, new_vma);
 			*vmap = vma = new_vma;
 		}
 		*need_rmap_locks = (new_vma->vm_pgoff <= vma->vm_pgoff);
diff --git a/mm/mremap.c b/mm/mremap.c
index 05f1180e9f21..89e45d8a983a 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -195,7 +195,8 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
 		if (pmd_trans_huge(*old_pmd)) {
 			int err = 0;
 			if (extent == HPAGE_PMD_SIZE) {
-				VM_BUG_ON(vma->vm_file || !vma->anon_vma);
+				VM_BUG_ON_VMA(vma->vm_file || !vma->anon_vma,
+					      vma);
 				/* See comment in move_ptes() */
 				if (need_rmap_locks)
 					anon_vma_lock_write(vma->anon_vma);
diff --git a/mm/rmap.c b/mm/rmap.c
index bc74e0012809..116a5053415b 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -527,7 +527,7 @@ vma_address(struct page *page, struct vm_area_struct *vma)
 	unsigned long address = __vma_address(page, vma);
 
 	/* page should be within @vma mapping range */
-	VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end);
+	VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma);
 
 	return address;
 }
@@ -897,7 +897,7 @@ void page_move_anon_rmap(struct page *page,
 	struct anon_vma *anon_vma = vma->anon_vma;
 
 	VM_BUG_ON_PAGE(!PageLocked(page), page);
-	VM_BUG_ON(!anon_vma);
+	VM_BUG_ON_VMA(!anon_vma, vma);
 	VM_BUG_ON_PAGE(page->index != linear_page_index(vma, address), page);
 
 	anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
@@ -1024,7 +1024,7 @@ void do_page_add_anon_rmap(struct page *page,
 void page_add_new_anon_rmap(struct page *page,
 	struct vm_area_struct *vma, unsigned long address)
 {
-	VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end);
+	VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma);
 	SetPageSwapBacked(page);
 	atomic_set(&page->_mapcount, 0); /* increment count (starts at -1) */
 	if (PageTransHuge(page))
@@ -1670,7 +1670,7 @@ static int rmap_walk_file(struct page *page, struct rmap_walk_control *rwc)
 	 * structure at mapping cannot be freed and reused yet,
 	 * so we can safely take mapping->i_mmap_mutex.
 	 */
-	VM_BUG_ON(!PageLocked(page));
+	VM_BUG_ON_PAGE(!PageLocked(page), page);
 
 	if (!mapping)
 		return ret;
-- 
cgit v1.2.3


From 5705465174686d007473e017b76c4b64b44aa690 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Thu, 9 Oct 2014 15:28:17 -0700
Subject: mm: clean up zone flags

Page reclaim tests zone_is_reclaim_dirty(), but the site that actually
sets this state does zone_set_flag(zone, ZONE_TAIL_LRU_DIRTY), sending the
reader through layers indirection just to track down a simple bit.

Remove all zone flag wrappers and just use bitops against zone->flags
directly.  It's just as readable and the lines are barely any longer.

Also rename ZONE_TAIL_LRU_DIRTY to ZONE_DIRTY to match ZONE_WRITEBACK, and
remove the zone_flags_t typedef.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: David Rientjes <rientjes@google.com>
Acked-by: Mel Gorman <mgorman@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 51 +++-----------------------------------------------
 mm/backing-dev.c       |  2 +-
 mm/oom_kill.c          |  6 +++---
 mm/page_alloc.c        |  8 ++++----
 mm/vmscan.c            | 28 +++++++++++++--------------
 5 files changed, 25 insertions(+), 70 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 318df7051850..48bf12ef6620 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -521,13 +521,13 @@ struct zone {
 	atomic_long_t		vm_stat[NR_VM_ZONE_STAT_ITEMS];
 } ____cacheline_internodealigned_in_smp;
 
-typedef enum {
+enum zone_flags {
 	ZONE_RECLAIM_LOCKED,		/* prevents concurrent reclaim */
 	ZONE_OOM_LOCKED,		/* zone is in OOM killer zonelist */
 	ZONE_CONGESTED,			/* zone has many dirty pages backed by
 					 * a congested BDI
 					 */
-	ZONE_TAIL_LRU_DIRTY,		/* reclaim scanning has recently found
+	ZONE_DIRTY,			/* reclaim scanning has recently found
 					 * many dirty file pages at the tail
 					 * of the LRU.
 					 */
@@ -535,52 +535,7 @@ typedef enum {
 					 * many pages under writeback
 					 */
 	ZONE_FAIR_DEPLETED,		/* fair zone policy batch depleted */
-} zone_flags_t;
-
-static inline void zone_set_flag(struct zone *zone, zone_flags_t flag)
-{
-	set_bit(flag, &zone->flags);
-}
-
-static inline int zone_test_and_set_flag(struct zone *zone, zone_flags_t flag)
-{
-	return test_and_set_bit(flag, &zone->flags);
-}
-
-static inline void zone_clear_flag(struct zone *zone, zone_flags_t flag)
-{
-	clear_bit(flag, &zone->flags);
-}
-
-static inline int zone_is_reclaim_congested(const struct zone *zone)
-{
-	return test_bit(ZONE_CONGESTED, &zone->flags);
-}
-
-static inline int zone_is_reclaim_dirty(const struct zone *zone)
-{
-	return test_bit(ZONE_TAIL_LRU_DIRTY, &zone->flags);
-}
-
-static inline int zone_is_reclaim_writeback(const struct zone *zone)
-{
-	return test_bit(ZONE_WRITEBACK, &zone->flags);
-}
-
-static inline int zone_is_reclaim_locked(const struct zone *zone)
-{
-	return test_bit(ZONE_RECLAIM_LOCKED, &zone->flags);
-}
-
-static inline int zone_is_fair_depleted(const struct zone *zone)
-{
-	return test_bit(ZONE_FAIR_DEPLETED, &zone->flags);
-}
-
-static inline int zone_is_oom_locked(const struct zone *zone)
-{
-	return test_bit(ZONE_OOM_LOCKED, &zone->flags);
-}
+};
 
 static inline unsigned long zone_end_pfn(const struct zone *zone)
 {
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 1706cbbdf5f0..b27714f1b40f 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -631,7 +631,7 @@ long wait_iff_congested(struct zone *zone, int sync, long timeout)
 	 * of sleeping on the congestion queue
 	 */
 	if (atomic_read(&nr_bdi_congested[sync]) == 0 ||
-			!zone_is_reclaim_congested(zone)) {
+	    !test_bit(ZONE_CONGESTED, &zone->flags)) {
 		cond_resched();
 
 		/* In case we scheduled, work out time remaining */
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 1e11df8fa7ec..bbf405a3a18f 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -565,7 +565,7 @@ bool oom_zonelist_trylock(struct zonelist *zonelist, gfp_t gfp_mask)
 
 	spin_lock(&zone_scan_lock);
 	for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask))
-		if (zone_is_oom_locked(zone)) {
+		if (test_bit(ZONE_OOM_LOCKED, &zone->flags)) {
 			ret = false;
 			goto out;
 		}
@@ -575,7 +575,7 @@ bool oom_zonelist_trylock(struct zonelist *zonelist, gfp_t gfp_mask)
 	 * call to oom_zonelist_trylock() doesn't succeed when it shouldn't.
 	 */
 	for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask))
-		zone_set_flag(zone, ZONE_OOM_LOCKED);
+		set_bit(ZONE_OOM_LOCKED, &zone->flags);
 
 out:
 	spin_unlock(&zone_scan_lock);
@@ -594,7 +594,7 @@ void oom_zonelist_unlock(struct zonelist *zonelist, gfp_t gfp_mask)
 
 	spin_lock(&zone_scan_lock);
 	for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask))
-		zone_clear_flag(zone, ZONE_OOM_LOCKED);
+		clear_bit(ZONE_OOM_LOCKED, &zone->flags);
 	spin_unlock(&zone_scan_lock);
 }
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index ae2f8474273c..f3769f0fce3c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1614,8 +1614,8 @@ again:
 
 	__mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order));
 	if (atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]) <= 0 &&
-	    !zone_is_fair_depleted(zone))
-		zone_set_flag(zone, ZONE_FAIR_DEPLETED);
+	    !test_bit(ZONE_FAIR_DEPLETED, &zone->flags))
+		set_bit(ZONE_FAIR_DEPLETED, &zone->flags);
 
 	__count_zone_vm_events(PGALLOC, zone, 1 << order);
 	zone_statistics(preferred_zone, zone, gfp_flags);
@@ -1935,7 +1935,7 @@ static void reset_alloc_batches(struct zone *preferred_zone)
 		mod_zone_page_state(zone, NR_ALLOC_BATCH,
 			high_wmark_pages(zone) - low_wmark_pages(zone) -
 			atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]));
-		zone_clear_flag(zone, ZONE_FAIR_DEPLETED);
+		clear_bit(ZONE_FAIR_DEPLETED, &zone->flags);
 	} while (zone++ != preferred_zone);
 }
 
@@ -1986,7 +1986,7 @@ zonelist_scan:
 		if (alloc_flags & ALLOC_FAIR) {
 			if (!zone_local(preferred_zone, zone))
 				break;
-			if (zone_is_fair_depleted(zone)) {
+			if (test_bit(ZONE_FAIR_DEPLETED, &zone->flags)) {
 				nr_fair_skipped++;
 				continue;
 			}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index af72fe8e8d74..06123f20a326 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -920,7 +920,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 			/* Case 1 above */
 			if (current_is_kswapd() &&
 			    PageReclaim(page) &&
-			    zone_is_reclaim_writeback(zone)) {
+			    test_bit(ZONE_WRITEBACK, &zone->flags)) {
 				nr_immediate++;
 				goto keep_locked;
 
@@ -1002,7 +1002,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 			 */
 			if (page_is_file_cache(page) &&
 					(!current_is_kswapd() ||
-					 !zone_is_reclaim_dirty(zone))) {
+					 !test_bit(ZONE_DIRTY, &zone->flags))) {
 				/*
 				 * Immediately reclaim when written back.
 				 * Similar in principal to deactivate_page()
@@ -1563,7 +1563,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
 	 * are encountered in the nr_immediate check below.
 	 */
 	if (nr_writeback && nr_writeback == nr_taken)
-		zone_set_flag(zone, ZONE_WRITEBACK);
+		set_bit(ZONE_WRITEBACK, &zone->flags);
 
 	/*
 	 * memcg will stall in page writeback so only consider forcibly
@@ -1575,16 +1575,16 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
 		 * backed by a congested BDI and wait_iff_congested will stall.
 		 */
 		if (nr_dirty && nr_dirty == nr_congested)
-			zone_set_flag(zone, ZONE_CONGESTED);
+			set_bit(ZONE_CONGESTED, &zone->flags);
 
 		/*
 		 * If dirty pages are scanned that are not queued for IO, it
 		 * implies that flushers are not keeping up. In this case, flag
-		 * the zone ZONE_TAIL_LRU_DIRTY and kswapd will start writing
-		 * pages from reclaim context.
+		 * the zone ZONE_DIRTY and kswapd will start writing pages from
+		 * reclaim context.
 		 */
 		if (nr_unqueued_dirty == nr_taken)
-			zone_set_flag(zone, ZONE_TAIL_LRU_DIRTY);
+			set_bit(ZONE_DIRTY, &zone->flags);
 
 		/*
 		 * If kswapd scans pages marked marked for immediate
@@ -2984,7 +2984,7 @@ static bool kswapd_shrink_zone(struct zone *zone,
 	/* Account for the number of pages attempted to reclaim */
 	*nr_attempted += sc->nr_to_reclaim;
 
-	zone_clear_flag(zone, ZONE_WRITEBACK);
+	clear_bit(ZONE_WRITEBACK, &zone->flags);
 
 	/*
 	 * If a zone reaches its high watermark, consider it to be no longer
@@ -2994,8 +2994,8 @@ static bool kswapd_shrink_zone(struct zone *zone,
 	 */
 	if (zone_reclaimable(zone) &&
 	    zone_balanced(zone, testorder, 0, classzone_idx)) {
-		zone_clear_flag(zone, ZONE_CONGESTED);
-		zone_clear_flag(zone, ZONE_TAIL_LRU_DIRTY);
+		clear_bit(ZONE_CONGESTED, &zone->flags);
+		clear_bit(ZONE_DIRTY, &zone->flags);
 	}
 
 	return sc->nr_scanned >= sc->nr_to_reclaim;
@@ -3086,8 +3086,8 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
 				 * If balanced, clear the dirty and congested
 				 * flags
 				 */
-				zone_clear_flag(zone, ZONE_CONGESTED);
-				zone_clear_flag(zone, ZONE_TAIL_LRU_DIRTY);
+				clear_bit(ZONE_CONGESTED, &zone->flags);
+				clear_bit(ZONE_DIRTY, &zone->flags);
 			}
 		}
 
@@ -3714,11 +3714,11 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
 	if (node_state(node_id, N_CPU) && node_id != numa_node_id())
 		return ZONE_RECLAIM_NOSCAN;
 
-	if (zone_test_and_set_flag(zone, ZONE_RECLAIM_LOCKED))
+	if (test_and_set_bit(ZONE_RECLAIM_LOCKED, &zone->flags))
 		return ZONE_RECLAIM_NOSCAN;
 
 	ret = __zone_reclaim(zone, gfp_mask, order);
-	zone_clear_flag(zone, ZONE_RECLAIM_LOCKED);
+	clear_bit(ZONE_RECLAIM_LOCKED, &zone->flags);
 
 	if (!ret)
 		count_vm_event(PGSCAN_ZONE_RECLAIM_FAILED);
-- 
cgit v1.2.3


From 934f3072c17cc8886f4c043b47eeeb1b12f8de33 Mon Sep 17 00:00:00 2001
From: Junxiao Bi <junxiao.bi@oracle.com>
Date: Thu, 9 Oct 2014 15:28:23 -0700
Subject: mm: clear __GFP_FS when PF_MEMALLOC_NOIO is set

commit 21caf2fc1931 ("mm: teach mm by current context info to not do I/O
during memory allocation") introduces PF_MEMALLOC_NOIO flag to avoid doing
I/O inside memory allocation, __GFP_IO is cleared when this flag is set,
but __GFP_FS implies __GFP_IO, it should also be cleared.  Or it may still
run into I/O, like in superblock shrinker.  And this will make the kernel
run into the deadlock case described in that commit.

See Dave Chinner's comment about io in superblock shrinker:

Filesystem shrinkers do indeed perform IO from the superblock shrinker and
have for years.  Even clean inodes can require IO before they can be freed
- e.g.  on an orphan list, need truncation of post-eof blocks, need to
wait for ordered operations to complete before it can be freed, etc.

IOWs, Ext4, btrfs and XFS all can issue and/or block on arbitrary amounts
of IO in the superblock shrinker context.  XFS, in particular, has been
doing transactions and IO from the VFS inode cache shrinker since it was
first introduced....

Fix this by clearing __GFP_FS in memalloc_noio_flags(), this function has
masked all the gfp_mask that will be passed into fs for the processes
setting PF_MEMALLOC_NOIO in the direct reclaim path.

v1 thread at: https://lkml.org/lkml/2014/9/3/32

Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: joyce.xue <xuejiufei@huawei.com>
Cc: Ming Lei <ming.lei@canonical.com>
Cc: Trond Myklebust <trond.myklebust@primarydata.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9c6353d9e63a..5e63ba59258c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1935,11 +1935,13 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut,
 #define tsk_used_math(p) ((p)->flags & PF_USED_MATH)
 #define used_math() tsk_used_math(current)
 
-/* __GFP_IO isn't allowed if PF_MEMALLOC_NOIO is set in current->flags */
+/* __GFP_IO isn't allowed if PF_MEMALLOC_NOIO is set in current->flags
+ * __GFP_FS is also cleared as it implies __GFP_IO.
+ */
 static inline gfp_t memalloc_noio_flags(gfp_t flags)
 {
 	if (unlikely(current->flags & PF_MEMALLOC_NOIO))
-		flags &= ~__GFP_IO;
+		flags &= ~(__GFP_IO | __GFP_FS);
 	return flags;
 }
 
-- 
cgit v1.2.3


From 31c9afa6db122a5c7a7843278aaf77dd08ea6e98 Mon Sep 17 00:00:00 2001
From: Sasha Levin <sasha.levin@oracle.com>
Date: Thu, 9 Oct 2014 15:28:37 -0700
Subject: mm: introduce VM_BUG_ON_MM

Very similar to VM_BUG_ON_PAGE and VM_BUG_ON_VMA, dump struct_mm when the
bug is hit.

[akpm@linux-foundation.org: coding-style fixes]
[mhocko@suse.cz: fix build]
[mhocko@suse.cz: fix build some more]
[akpm@linux-foundation.org: do strange things to avoid doing strange things for the comma separators]
Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
Cc: Dave Jones <davej@redhat.com>
Signed-off-by: Michal Hocko <mhocko@suse.cz>
Cc: Valdis Kletnieks <Valdis.Kletnieks@vt.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmdebug.h | 10 +++++++
 mm/debug.c              | 78 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 88 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
index 569e4c8d0ebb..877ef226f90f 100644
--- a/include/linux/mmdebug.h
+++ b/include/linux/mmdebug.h
@@ -5,11 +5,13 @@
 
 struct page;
 struct vm_area_struct;
+struct mm_struct;
 
 extern void dump_page(struct page *page, const char *reason);
 extern void dump_page_badflags(struct page *page, const char *reason,
 			       unsigned long badflags);
 void dump_vma(const struct vm_area_struct *vma);
+void dump_mm(const struct mm_struct *mm);
 
 #ifdef CONFIG_DEBUG_VM
 #define VM_BUG_ON(cond) BUG_ON(cond)
@@ -27,6 +29,13 @@ void dump_vma(const struct vm_area_struct *vma);
 			BUG();						\
 		}							\
 	} while (0)
+#define VM_BUG_ON_MM(cond, mm)						\
+	do {								\
+		if (unlikely(cond)) {					\
+			dump_mm(mm);					\
+			BUG();						\
+		}							\
+	} while (0)
 #define VM_WARN_ON(cond) WARN_ON(cond)
 #define VM_WARN_ON_ONCE(cond) WARN_ON_ONCE(cond)
 #define VM_WARN_ONCE(cond, format...) WARN_ONCE(cond, format)
@@ -34,6 +43,7 @@ void dump_vma(const struct vm_area_struct *vma);
 #define VM_BUG_ON(cond) BUILD_BUG_ON_INVALID(cond)
 #define VM_BUG_ON_PAGE(cond, page) VM_BUG_ON(cond)
 #define VM_BUG_ON_VMA(cond, vma) VM_BUG_ON(cond)
+#define VM_BUG_ON_MM(cond, mm) VM_BUG_ON(cond)
 #define VM_WARN_ON(cond) BUILD_BUG_ON_INVALID(cond)
 #define VM_WARN_ON_ONCE(cond) BUILD_BUG_ON_INVALID(cond)
 #define VM_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond)
diff --git a/mm/debug.c b/mm/debug.c
index 697df9050193..5a1b6194089c 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -1,3 +1,10 @@
+/*
+ * mm/debug.c
+ *
+ * mm/ specific debug routines.
+ *
+ */
+
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/ftrace_event.h>
@@ -159,4 +166,75 @@ void dump_vma(const struct vm_area_struct *vma)
 }
 EXPORT_SYMBOL(dump_vma);
 
+void dump_mm(const struct mm_struct *mm)
+{
+	printk(KERN_ALERT
+		"mm %p mmap %p seqnum %d task_size %lu\n"
+#ifdef CONFIG_MMU
+		"get_unmapped_area %p\n"
+#endif
+		"mmap_base %lu mmap_legacy_base %lu highest_vm_end %lu\n"
+		"pgd %p mm_users %d mm_count %d nr_ptes %lu map_count %d\n"
+		"hiwater_rss %lx hiwater_vm %lx total_vm %lx locked_vm %lx\n"
+		"pinned_vm %lx shared_vm %lx exec_vm %lx stack_vm %lx\n"
+		"start_code %lx end_code %lx start_data %lx end_data %lx\n"
+		"start_brk %lx brk %lx start_stack %lx\n"
+		"arg_start %lx arg_end %lx env_start %lx env_end %lx\n"
+		"binfmt %p flags %lx core_state %p\n"
+#ifdef CONFIG_AIO
+		"ioctx_table %p\n"
+#endif
+#ifdef CONFIG_MEMCG
+		"owner %p "
+#endif
+		"exe_file %p\n"
+#ifdef CONFIG_MMU_NOTIFIER
+		"mmu_notifier_mm %p\n"
+#endif
+#ifdef CONFIG_NUMA_BALANCING
+		"numa_next_scan %lu numa_scan_offset %lu numa_scan_seq %d\n"
+#endif
+#if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION)
+		"tlb_flush_pending %d\n"
+#endif
+		"%s",	/* This is here to hold the comma */
+
+		mm, mm->mmap, mm->vmacache_seqnum, mm->task_size,
+#ifdef CONFIG_MMU
+		mm->get_unmapped_area,
+#endif
+		mm->mmap_base, mm->mmap_legacy_base, mm->highest_vm_end,
+		mm->pgd, atomic_read(&mm->mm_users),
+		atomic_read(&mm->mm_count),
+		atomic_long_read((atomic_long_t *)&mm->nr_ptes),
+		mm->map_count,
+		mm->hiwater_rss, mm->hiwater_vm, mm->total_vm, mm->locked_vm,
+		mm->pinned_vm, mm->shared_vm, mm->exec_vm, mm->stack_vm,
+		mm->start_code, mm->end_code, mm->start_data, mm->end_data,
+		mm->start_brk, mm->brk, mm->start_stack,
+		mm->arg_start, mm->arg_end, mm->env_start, mm->env_end,
+		mm->binfmt, mm->flags, mm->core_state,
+#ifdef CONFIG_AIO
+		mm->ioctx_table,
+#endif
+#ifdef CONFIG_MEMCG
+		mm->owner,
+#endif
+		mm->exe_file,
+#ifdef CONFIG_MMU_NOTIFIER
+		mm->mmu_notifier_mm,
+#endif
+#ifdef CONFIG_NUMA_BALANCING
+		mm->numa_next_scan, mm->numa_scan_offset, mm->numa_scan_seq,
+#endif
+#if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION)
+		mm->tlb_flush_pending,
+#endif
+		""		/* This is here to not have a comma! */
+		);
+
+		dump_flags(mm->def_flags, vmaflags_names,
+				ARRAY_SIZE(vmaflags_names));
+}
+
 #endif		/* CONFIG_DEBUG_VM */
-- 
cgit v1.2.3


From 33a690c45b202e4c6483bfd1d93ad8d0f51df2ca Mon Sep 17 00:00:00 2001
From: Vladimir Davydov <vdavydov@parallels.com>
Date: Thu, 9 Oct 2014 15:28:43 -0700
Subject: memcg: move memcg_{alloc,free}_cache_params to slab_common.c

The only reason why they live in memcontrol.c is that we get/put css
reference to the owner memory cgroup in them.  However, we can do that in
memcg_{un,}register_cache.  OTOH, there are several reasons to move them
to slab_common.c.

First, I think that the less public interface functions we have in
memcontrol.h the better.  Since the functions I move don't depend on
memcontrol, I think it's worth making them private to slab, especially
taking into account that the arrays are defined on the slab's side too.

Second, the way how per-memcg arrays are updated looks rather awkward: it
proceeds from memcontrol.c (__memcg_activate_kmem) to slab_common.c
(memcg_update_all_caches) and back to memcontrol.c again
(memcg_update_array_size).  In the following patches I move the function
relocating the arrays (memcg_update_array_size) to slab_common.c and
therefore get rid this circular call path.  I think we should have the
cache allocation stuff in the same place where we have relocation, because
it's easier to follow the code then.  So I move arrays alloc/free
functions to slab_common.c too.

The third point isn't obvious.  I'm going to make the list_lru structure
per-memcg to allow targeted kmem reclaim.  That means we will have
per-memcg arrays in list_lrus too.  It turns out that it's much easier to
update these arrays in list_lru.c rather than in memcontrol.c, because all
the stuff we need is defined there.  This patch makes memcg caches arrays
allocation path conform that of the upcoming list_lru.

So let's move these functions to slab_common.c and make them static.

Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: Christoph Lameter <cl@linux.com>
Cc: Glauber Costa <glommer@gmail.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Pekka Enberg <penberg@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 14 --------------
 mm/memcontrol.c            | 41 ++++-------------------------------------
 mm/slab_common.c           | 44 +++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 47 insertions(+), 52 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index e0752d204d9e..4d17242eeff7 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -440,10 +440,6 @@ void __memcg_kmem_uncharge_pages(struct page *page, int order);
 
 int memcg_cache_id(struct mem_cgroup *memcg);
 
-int memcg_alloc_cache_params(struct mem_cgroup *memcg, struct kmem_cache *s,
-			     struct kmem_cache *root_cache);
-void memcg_free_cache_params(struct kmem_cache *s);
-
 int memcg_update_cache_size(struct kmem_cache *s, int num_groups);
 void memcg_update_array_size(int num_groups);
 
@@ -574,16 +570,6 @@ static inline int memcg_cache_id(struct mem_cgroup *memcg)
 	return -1;
 }
 
-static inline int memcg_alloc_cache_params(struct mem_cgroup *memcg,
-		struct kmem_cache *s, struct kmem_cache *root_cache)
-{
-	return 0;
-}
-
-static inline void memcg_free_cache_params(struct kmem_cache *s)
-{
-}
-
 static inline struct kmem_cache *
 memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
 {
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 28928ce9b07f..865e87c014d6 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2984,43 +2984,6 @@ int memcg_update_cache_size(struct kmem_cache *s, int num_groups)
 	return 0;
 }
 
-int memcg_alloc_cache_params(struct mem_cgroup *memcg, struct kmem_cache *s,
-			     struct kmem_cache *root_cache)
-{
-	size_t size;
-
-	if (!memcg_kmem_enabled())
-		return 0;
-
-	if (!memcg) {
-		size = offsetof(struct memcg_cache_params, memcg_caches);
-		size += memcg_limited_groups_array_size * sizeof(void *);
-	} else
-		size = sizeof(struct memcg_cache_params);
-
-	s->memcg_params = kzalloc(size, GFP_KERNEL);
-	if (!s->memcg_params)
-		return -ENOMEM;
-
-	if (memcg) {
-		s->memcg_params->memcg = memcg;
-		s->memcg_params->root_cache = root_cache;
-		css_get(&memcg->css);
-	} else
-		s->memcg_params->is_root_cache = true;
-
-	return 0;
-}
-
-void memcg_free_cache_params(struct kmem_cache *s)
-{
-	if (!s->memcg_params)
-		return;
-	if (!s->memcg_params->is_root_cache)
-		css_put(&s->memcg_params->memcg->css);
-	kfree(s->memcg_params);
-}
-
 static void memcg_register_cache(struct mem_cgroup *memcg,
 				 struct kmem_cache *root_cache)
 {
@@ -3051,6 +3014,7 @@ static void memcg_register_cache(struct mem_cgroup *memcg,
 	if (!cachep)
 		return;
 
+	css_get(&memcg->css);
 	list_add(&cachep->memcg_params->list, &memcg->memcg_slab_caches);
 
 	/*
@@ -3084,6 +3048,9 @@ static void memcg_unregister_cache(struct kmem_cache *cachep)
 	list_del(&cachep->memcg_params->list);
 
 	kmem_cache_destroy(cachep);
+
+	/* drop the reference taken in memcg_register_cache */
+	css_put(&memcg->css);
 }
 
 /*
diff --git a/mm/slab_common.c b/mm/slab_common.c
index f206cb10a544..c2a8661f8b81 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -116,6 +116,38 @@ static inline int kmem_cache_sanity_check(const char *name, size_t size)
 #endif
 
 #ifdef CONFIG_MEMCG_KMEM
+static int memcg_alloc_cache_params(struct mem_cgroup *memcg,
+		struct kmem_cache *s, struct kmem_cache *root_cache)
+{
+	size_t size;
+
+	if (!memcg_kmem_enabled())
+		return 0;
+
+	if (!memcg) {
+		size = offsetof(struct memcg_cache_params, memcg_caches);
+		size += memcg_limited_groups_array_size * sizeof(void *);
+	} else
+		size = sizeof(struct memcg_cache_params);
+
+	s->memcg_params = kzalloc(size, GFP_KERNEL);
+	if (!s->memcg_params)
+		return -ENOMEM;
+
+	if (memcg) {
+		s->memcg_params->memcg = memcg;
+		s->memcg_params->root_cache = root_cache;
+	} else
+		s->memcg_params->is_root_cache = true;
+
+	return 0;
+}
+
+static void memcg_free_cache_params(struct kmem_cache *s)
+{
+	kfree(s->memcg_params);
+}
+
 int memcg_update_all_caches(int num_memcgs)
 {
 	struct kmem_cache *s;
@@ -141,7 +173,17 @@ out:
 	mutex_unlock(&slab_mutex);
 	return ret;
 }
-#endif
+#else
+static inline int memcg_alloc_cache_params(struct mem_cgroup *memcg,
+		struct kmem_cache *s, struct kmem_cache *root_cache)
+{
+	return 0;
+}
+
+static inline void memcg_free_cache_params(struct kmem_cache *s)
+{
+}
+#endif /* CONFIG_MEMCG_KMEM */
 
 /*
  * Find a mergeable slab cache
-- 
cgit v1.2.3


From 6f817f4cda68b09621312ec5ba84217bc5e37b3d Mon Sep 17 00:00:00 2001
From: Vladimir Davydov <vdavydov@parallels.com>
Date: Thu, 9 Oct 2014 15:28:47 -0700
Subject: memcg: move memcg_update_cache_size() to slab_common.c

`While growing per memcg caches arrays, we jump between memcontrol.c and
slab_common.c in a weird way:

  memcg_alloc_cache_id - memcontrol.c
    memcg_update_all_caches - slab_common.c
      memcg_update_cache_size - memcontrol.c

There's absolutely no reason why memcg_update_cache_size can't live on the
slab's side though.  So let's move it there and settle it comfortably amid
per-memcg cache allocation functions.

Besides, this patch cleans this function up a bit, removing all the
useless comments from it, and renames it to memcg_update_cache_params to
conform to memcg_alloc/free_cache_params, which we already have in
slab_common.c.

Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: Christoph Lameter <cl@linux.com>
Cc: Glauber Costa <glommer@gmail.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Pekka Enberg <penberg@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  1 -
 mm/memcontrol.c            | 49 ----------------------------------------------
 mm/slab_common.c           | 30 ++++++++++++++++++++++++++--
 3 files changed, 28 insertions(+), 52 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 4d17242eeff7..19df5d857411 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -440,7 +440,6 @@ void __memcg_kmem_uncharge_pages(struct page *page, int order);
 
 int memcg_cache_id(struct mem_cgroup *memcg);
 
-int memcg_update_cache_size(struct kmem_cache *s, int num_groups);
 void memcg_update_array_size(int num_groups);
 
 struct kmem_cache *
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index ef4fbc5e4ca3..fff511e25bb2 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2958,55 +2958,6 @@ void memcg_update_array_size(int num)
 	memcg_limited_groups_array_size = num;
 }
 
-int memcg_update_cache_size(struct kmem_cache *s, int num_groups)
-{
-	struct memcg_cache_params *cur_params = s->memcg_params;
-	struct memcg_cache_params *new_params;
-	size_t size;
-	int i;
-
-	VM_BUG_ON(!is_root_cache(s));
-
-	size = num_groups * sizeof(void *);
-	size += offsetof(struct memcg_cache_params, memcg_caches);
-
-	new_params = kzalloc(size, GFP_KERNEL);
-	if (!new_params)
-		return -ENOMEM;
-
-	new_params->is_root_cache = true;
-
-	/*
-	 * There is the chance it will be bigger than
-	 * memcg_limited_groups_array_size, if we failed an allocation
-	 * in a cache, in which case all caches updated before it, will
-	 * have a bigger array.
-	 *
-	 * But if that is the case, the data after
-	 * memcg_limited_groups_array_size is certainly unused
-	 */
-	for (i = 0; i < memcg_limited_groups_array_size; i++) {
-		if (!cur_params->memcg_caches[i])
-			continue;
-		new_params->memcg_caches[i] =
-			cur_params->memcg_caches[i];
-	}
-
-	/*
-	 * Ideally, we would wait until all caches succeed, and only
-	 * then free the old one. But this is not worth the extra
-	 * pointer per-cache we'd have to have for this.
-	 *
-	 * It is not a big deal if some caches are left with a size
-	 * bigger than the others. And all updates will reset this
-	 * anyway.
-	 */
-	rcu_assign_pointer(s->memcg_params, new_params);
-	if (cur_params)
-		kfree_rcu(cur_params, rcu_head);
-	return 0;
-}
-
 static void memcg_register_cache(struct mem_cgroup *memcg,
 				 struct kmem_cache *root_cache)
 {
diff --git a/mm/slab_common.c b/mm/slab_common.c
index c2a8661f8b81..3a6e0cfdf03a 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -148,6 +148,33 @@ static void memcg_free_cache_params(struct kmem_cache *s)
 	kfree(s->memcg_params);
 }
 
+static int memcg_update_cache_params(struct kmem_cache *s, int num_memcgs)
+{
+	int size;
+	struct memcg_cache_params *new_params, *cur_params;
+
+	BUG_ON(!is_root_cache(s));
+
+	size = offsetof(struct memcg_cache_params, memcg_caches);
+	size += num_memcgs * sizeof(void *);
+
+	new_params = kzalloc(size, GFP_KERNEL);
+	if (!new_params)
+		return -ENOMEM;
+
+	cur_params = s->memcg_params;
+	memcpy(new_params->memcg_caches, cur_params->memcg_caches,
+	       memcg_limited_groups_array_size * sizeof(void *));
+
+	new_params->is_root_cache = true;
+
+	rcu_assign_pointer(s->memcg_params, new_params);
+	if (cur_params)
+		kfree_rcu(cur_params, rcu_head);
+
+	return 0;
+}
+
 int memcg_update_all_caches(int num_memcgs)
 {
 	struct kmem_cache *s;
@@ -158,9 +185,8 @@ int memcg_update_all_caches(int num_memcgs)
 		if (!is_root_cache(s))
 			continue;
 
-		ret = memcg_update_cache_size(s, num_memcgs);
+		ret = memcg_update_cache_params(s, num_memcgs);
 		/*
-		 * See comment in memcontrol.c, memcg_update_cache_size:
 		 * Instead of freeing the memory, we'll just leave the caches
 		 * up to this point in an updated state.
 		 */
-- 
cgit v1.2.3


From b70a2a21dc9d4ad455931b53131a0cb4fc01fafe Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Thu, 9 Oct 2014 15:28:56 -0700
Subject: mm: memcontrol: fix transparent huge page allocations under pressure

In a memcg with even just moderate cache pressure, success rates for
transparent huge page allocations drop to zero, wasting a lot of effort
that the allocator puts into assembling these pages.

The reason for this is that the memcg reclaim code was never designed for
higher-order charges.  It reclaims in small batches until there is room
for at least one page.  Huge page charges only succeed when these batches
add up over a series of huge faults, which is unlikely under any
significant load involving order-0 allocations in the group.

Remove that loop on the memcg side in favor of passing the actual reclaim
goal to direct reclaim, which is already set up and optimized to meet
higher-order goals efficiently.

This brings memcg's THP policy in line with the system policy: if the
allocator painstakingly assembles a hugepage, memcg will at least make an
honest effort to charge it.  As a result, transparent hugepage allocation
rates amid cache activity are drastically improved:

                                      vanilla                 patched
pgalloc                 4717530.80 (  +0.00%)   4451376.40 (  -5.64%)
pgfault                  491370.60 (  +0.00%)    225477.40 ( -54.11%)
pgmajfault                    2.00 (  +0.00%)         1.80 (  -6.67%)
thp_fault_alloc               0.00 (  +0.00%)       531.60 (+100.00%)
thp_fault_fallback          749.00 (  +0.00%)       217.40 ( -70.88%)

[ Note: this may in turn increase memory consumption from internal
  fragmentation, which is an inherent risk of transparent hugepages.
  Some setups may have to adjust the memcg limits accordingly to
  accomodate this - or, if the machine is already packed to capacity,
  disable the transparent huge page feature. ]

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Vladimir Davydov <vdavydov@parallels.com>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Dave Hansen <dave@sr71.net>
Cc: Greg Thelen <gthelen@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h |  6 +++--
 mm/memcontrol.c      | 69 +++++++++++++---------------------------------------
 mm/vmscan.c          |  7 +++---
 3 files changed, 25 insertions(+), 57 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index ea4f926e6b9b..37a585beef5c 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -327,8 +327,10 @@ extern void lru_cache_add_active_or_unevictable(struct page *page,
 extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
 					gfp_t gfp_mask, nodemask_t *mask);
 extern int __isolate_lru_page(struct page *page, isolate_mode_t mode);
-extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem,
-						  gfp_t gfp_mask, bool noswap);
+extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
+						  unsigned long nr_pages,
+						  gfp_t gfp_mask,
+						  bool may_swap);
 extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
 						gfp_t gfp_mask, bool noswap,
 						struct zone *zone,
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 9cda99dfac4f..c86cc442ada4 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -480,14 +480,6 @@ enum res_type {
 /* Used for OOM nofiier */
 #define OOM_CONTROL		(0)
 
-/*
- * Reclaim flags for mem_cgroup_hierarchical_reclaim
- */
-#define MEM_CGROUP_RECLAIM_NOSWAP_BIT	0x0
-#define MEM_CGROUP_RECLAIM_NOSWAP	(1 << MEM_CGROUP_RECLAIM_NOSWAP_BIT)
-#define MEM_CGROUP_RECLAIM_SHRINK_BIT	0x1
-#define MEM_CGROUP_RECLAIM_SHRINK	(1 << MEM_CGROUP_RECLAIM_SHRINK_BIT)
-
 /*
  * The memcg_create_mutex will be held whenever a new cgroup is created.
  * As a consequence, any change that needs to protect against new child cgroups
@@ -1805,40 +1797,6 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
 			 NULL, "Memory cgroup out of memory");
 }
 
-static unsigned long mem_cgroup_reclaim(struct mem_cgroup *memcg,
-					gfp_t gfp_mask,
-					unsigned long flags)
-{
-	unsigned long total = 0;
-	bool noswap = false;
-	int loop;
-
-	if (flags & MEM_CGROUP_RECLAIM_NOSWAP)
-		noswap = true;
-
-	for (loop = 0; loop < MEM_CGROUP_MAX_RECLAIM_LOOPS; loop++) {
-		if (loop)
-			drain_all_stock_async(memcg);
-		total += try_to_free_mem_cgroup_pages(memcg, gfp_mask, noswap);
-		/*
-		 * Allow limit shrinkers, which are triggered directly
-		 * by userspace, to catch signals and stop reclaim
-		 * after minimal progress, regardless of the margin.
-		 */
-		if (total && (flags & MEM_CGROUP_RECLAIM_SHRINK))
-			break;
-		if (mem_cgroup_margin(memcg))
-			break;
-		/*
-		 * If nothing was reclaimed after two attempts, there
-		 * may be no reclaimable pages in this hierarchy.
-		 */
-		if (loop && !total)
-			break;
-	}
-	return total;
-}
-
 /**
  * test_mem_cgroup_node_reclaimable
  * @memcg: the target memcg
@@ -2541,8 +2499,9 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
 	struct mem_cgroup *mem_over_limit;
 	struct res_counter *fail_res;
 	unsigned long nr_reclaimed;
-	unsigned long flags = 0;
 	unsigned long long size;
+	bool may_swap = true;
+	bool drained = false;
 	int ret = 0;
 
 	if (mem_cgroup_is_root(memcg))
@@ -2561,7 +2520,7 @@ retry:
 		mem_over_limit = mem_cgroup_from_res_counter(fail_res, res);
 	} else {
 		mem_over_limit = mem_cgroup_from_res_counter(fail_res, memsw);
-		flags |= MEM_CGROUP_RECLAIM_NOSWAP;
+		may_swap = false;
 	}
 
 	if (batch > nr_pages) {
@@ -2586,11 +2545,18 @@ retry:
 	if (!(gfp_mask & __GFP_WAIT))
 		goto nomem;
 
-	nr_reclaimed = mem_cgroup_reclaim(mem_over_limit, gfp_mask, flags);
+	nr_reclaimed = try_to_free_mem_cgroup_pages(mem_over_limit, nr_pages,
+						    gfp_mask, may_swap);
 
 	if (mem_cgroup_margin(mem_over_limit) >= nr_pages)
 		goto retry;
 
+	if (!drained) {
+		drain_all_stock_async(mem_over_limit);
+		drained = true;
+		goto retry;
+	}
+
 	if (gfp_mask & __GFP_NORETRY)
 		goto nomem;
 	/*
@@ -3666,8 +3632,8 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
 		if (!ret)
 			break;
 
-		mem_cgroup_reclaim(memcg, GFP_KERNEL,
-				   MEM_CGROUP_RECLAIM_SHRINK);
+		try_to_free_mem_cgroup_pages(memcg, 1, GFP_KERNEL, true);
+
 		curusage = res_counter_read_u64(&memcg->res, RES_USAGE);
 		/* Usage is reduced ? */
 		if (curusage >= oldusage)
@@ -3717,9 +3683,8 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
 		if (!ret)
 			break;
 
-		mem_cgroup_reclaim(memcg, GFP_KERNEL,
-				   MEM_CGROUP_RECLAIM_NOSWAP |
-				   MEM_CGROUP_RECLAIM_SHRINK);
+		try_to_free_mem_cgroup_pages(memcg, 1, GFP_KERNEL, false);
+
 		curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
 		/* Usage is reduced ? */
 		if (curusage >= oldusage)
@@ -3968,8 +3933,8 @@ static int mem_cgroup_force_empty(struct mem_cgroup *memcg)
 		if (signal_pending(current))
 			return -EINTR;
 
-		progress = try_to_free_mem_cgroup_pages(memcg, GFP_KERNEL,
-						false);
+		progress = try_to_free_mem_cgroup_pages(memcg, 1,
+							GFP_KERNEL, true);
 		if (!progress) {
 			nr_retries--;
 			/* maybe some writeback is necessary */
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 06123f20a326..dcb47074ae03 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2759,21 +2759,22 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg,
 }
 
 unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
+					   unsigned long nr_pages,
 					   gfp_t gfp_mask,
-					   bool noswap)
+					   bool may_swap)
 {
 	struct zonelist *zonelist;
 	unsigned long nr_reclaimed;
 	int nid;
 	struct scan_control sc = {
-		.nr_to_reclaim = SWAP_CLUSTER_MAX,
+		.nr_to_reclaim = max(nr_pages, SWAP_CLUSTER_MAX),
 		.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
 				(GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK),
 		.target_mem_cgroup = memcg,
 		.priority = DEF_PRIORITY,
 		.may_writepage = !laptop_mode,
 		.may_unmap = 1,
-		.may_swap = !noswap,
+		.may_swap = may_swap,
 	};
 
 	/*
-- 
cgit v1.2.3


From d6d86c0a7f8ddc5b38cf089222cb1d9540762dc2 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <k.khlebnikov@samsung.com>
Date: Thu, 9 Oct 2014 15:29:27 -0700
Subject: mm/balloon_compaction: redesign ballooned pages management

Sasha Levin reported KASAN splash inside isolate_migratepages_range().
Problem is in the function __is_movable_balloon_page() which tests
AS_BALLOON_MAP in page->mapping->flags.  This function has no protection
against anonymous pages.  As result it tried to check address space flags
inside struct anon_vma.

Further investigation shows more problems in current implementation:

* Special branch in __unmap_and_move() never works:
  balloon_page_movable() checks page flags and page_count.  In
  __unmap_and_move() page is locked, reference counter is elevated, thus
  balloon_page_movable() always fails.  As a result execution goes to the
  normal migration path.  virtballoon_migratepage() returns
  MIGRATEPAGE_BALLOON_SUCCESS instead of MIGRATEPAGE_SUCCESS,
  move_to_new_page() thinks this is an error code and assigns
  newpage->mapping to NULL.  Newly migrated page lose connectivity with
  balloon an all ability for further migration.

* lru_lock erroneously required in isolate_migratepages_range() for
  isolation ballooned page.  This function releases lru_lock periodically,
  this makes migration mostly impossible for some pages.

* balloon_page_dequeue have a tight race with balloon_page_isolate:
  balloon_page_isolate could be executed in parallel with dequeue between
  picking page from list and locking page_lock.  Race is rare because they
  use trylock_page() for locking.

This patch fixes all of them.

Instead of fake mapping with special flag this patch uses special state of
page->_mapcount: PAGE_BALLOON_MAPCOUNT_VALUE = -256.  Buddy allocator uses
PAGE_BUDDY_MAPCOUNT_VALUE = -128 for similar purpose.  Storing mark
directly in struct page makes everything safer and easier.

PagePrivate is used to mark pages present in page list (i.e.  not
isolated, like PageLRU for normal pages).  It replaces special rules for
reference counter and makes balloon migration similar to migration of
normal pages.  This flag is protected by page_lock together with link to
the balloon device.

Signed-off-by: Konstantin Khlebnikov <k.khlebnikov@samsung.com>
Reported-by: Sasha Levin <sasha.levin@oracle.com>
Link: http://lkml.kernel.org/p/53E6CEAA.9020105@oracle.com
Cc: Rafael Aquini <aquini@redhat.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: <stable@vger.kernel.org>	[3.8+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/virtio/virtio_balloon.c    | 15 +++---
 include/linux/balloon_compaction.h | 97 ++++++++++----------------------------
 include/linux/migrate.h            | 11 +----
 include/linux/mm.h                 | 19 ++++++++
 mm/balloon_compaction.c            | 26 +++++-----
 mm/compaction.c                    |  2 +-
 mm/migrate.c                       | 16 ++-----
 7 files changed, 68 insertions(+), 118 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 25ebe8eecdb7..c3eb93fc9261 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -163,8 +163,8 @@ static void release_pages_by_pfn(const u32 pfns[], unsigned int num)
 	/* Find pfns pointing at start of each page, get pages and free them. */
 	for (i = 0; i < num; i += VIRTIO_BALLOON_PAGES_PER_PAGE) {
 		struct page *page = balloon_pfn_to_page(pfns[i]);
-		balloon_page_free(page);
 		adjust_managed_page_count(page, 1);
+		put_page(page); /* balloon reference */
 	}
 }
 
@@ -395,6 +395,8 @@ static int virtballoon_migratepage(struct address_space *mapping,
 	if (!mutex_trylock(&vb->balloon_lock))
 		return -EAGAIN;
 
+	get_page(newpage); /* balloon reference */
+
 	/* balloon's page migration 1st step  -- inflate "newpage" */
 	spin_lock_irqsave(&vb_dev_info->pages_lock, flags);
 	balloon_page_insert(newpage, mapping, &vb_dev_info->pages);
@@ -404,12 +406,7 @@ static int virtballoon_migratepage(struct address_space *mapping,
 	set_page_pfns(vb->pfns, newpage);
 	tell_host(vb, vb->inflate_vq);
 
-	/*
-	 * balloon's page migration 2nd step -- deflate "page"
-	 *
-	 * It's safe to delete page->lru here because this page is at
-	 * an isolated migration list, and this step is expected to happen here
-	 */
+	/* balloon's page migration 2nd step -- deflate "page" */
 	balloon_page_delete(page);
 	vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE;
 	set_page_pfns(vb->pfns, page);
@@ -417,7 +414,9 @@ static int virtballoon_migratepage(struct address_space *mapping,
 
 	mutex_unlock(&vb->balloon_lock);
 
-	return MIGRATEPAGE_BALLOON_SUCCESS;
+	put_page(page); /* balloon reference */
+
+	return MIGRATEPAGE_SUCCESS;
 }
 
 /* define the balloon_mapping->a_ops callback to allow balloon page migration */
diff --git a/include/linux/balloon_compaction.h b/include/linux/balloon_compaction.h
index 089743ade734..38aa07d5b81c 100644
--- a/include/linux/balloon_compaction.h
+++ b/include/linux/balloon_compaction.h
@@ -27,10 +27,13 @@
  *      counter raised only while it is under our special handling;
  *
  * iii. after the lockless scan step have selected a potential balloon page for
- *      isolation, re-test the page->mapping flags and the page ref counter
+ *      isolation, re-test the PageBalloon mark and the PagePrivate flag
  *      under the proper page lock, to ensure isolating a valid balloon page
  *      (not yet isolated, nor under release procedure)
  *
+ *  iv. isolation or dequeueing procedure must clear PagePrivate flag under
+ *      page lock together with removing page from balloon device page list.
+ *
  * The functions provided by this interface are placed to help on coping with
  * the aforementioned balloon page corner case, as well as to ensure the simple
  * set of exposed rules are satisfied while we are dealing with balloon pages
@@ -71,28 +74,6 @@ static inline void balloon_devinfo_free(struct balloon_dev_info *b_dev_info)
 	kfree(b_dev_info);
 }
 
-/*
- * balloon_page_free - release a balloon page back to the page free lists
- * @page: ballooned page to be set free
- *
- * This function must be used to properly set free an isolated/dequeued balloon
- * page at the end of a sucessful page migration, or at the balloon driver's
- * page release procedure.
- */
-static inline void balloon_page_free(struct page *page)
-{
-	/*
-	 * Balloon pages always get an extra refcount before being isolated
-	 * and before being dequeued to help on sorting out fortuite colisions
-	 * between a thread attempting to isolate and another thread attempting
-	 * to release the very same balloon page.
-	 *
-	 * Before we handle the page back to Buddy, lets drop its extra refcnt.
-	 */
-	put_page(page);
-	__free_page(page);
-}
-
 #ifdef CONFIG_BALLOON_COMPACTION
 extern bool balloon_page_isolate(struct page *page);
 extern void balloon_page_putback(struct page *page);
@@ -108,74 +89,33 @@ static inline void balloon_mapping_free(struct address_space *balloon_mapping)
 }
 
 /*
- * page_flags_cleared - helper to perform balloon @page ->flags tests.
- *
- * As balloon pages are obtained from buddy and we do not play with page->flags
- * at driver level (exception made when we get the page lock for compaction),
- * we can safely identify a ballooned page by checking if the
- * PAGE_FLAGS_CHECK_AT_PREP page->flags are all cleared.  This approach also
- * helps us skip ballooned pages that are locked for compaction or release, thus
- * mitigating their racy check at balloon_page_movable()
- */
-static inline bool page_flags_cleared(struct page *page)
-{
-	return !(page->flags & PAGE_FLAGS_CHECK_AT_PREP);
-}
-
-/*
- * __is_movable_balloon_page - helper to perform @page mapping->flags tests
+ * __is_movable_balloon_page - helper to perform @page PageBalloon tests
  */
 static inline bool __is_movable_balloon_page(struct page *page)
 {
-	struct address_space *mapping = page->mapping;
-	return mapping_balloon(mapping);
+	return PageBalloon(page);
 }
 
 /*
- * balloon_page_movable - test page->mapping->flags to identify balloon pages
- *			  that can be moved by compaction/migration.
- *
- * This function is used at core compaction's page isolation scheme, therefore
- * most pages exposed to it are not enlisted as balloon pages and so, to avoid
- * undesired side effects like racing against __free_pages(), we cannot afford
- * holding the page locked while testing page->mapping->flags here.
+ * balloon_page_movable - test PageBalloon to identify balloon pages
+ *			  and PagePrivate to check that the page is not
+ *			  isolated and can be moved by compaction/migration.
  *
  * As we might return false positives in the case of a balloon page being just
- * released under us, the page->mapping->flags need to be re-tested later,
- * under the proper page lock, at the functions that will be coping with the
- * balloon page case.
+ * released under us, this need to be re-tested later, under the page lock.
  */
 static inline bool balloon_page_movable(struct page *page)
 {
-	/*
-	 * Before dereferencing and testing mapping->flags, let's make sure
-	 * this is not a page that uses ->mapping in a different way
-	 */
-	if (page_flags_cleared(page) && !page_mapped(page) &&
-	    page_count(page) == 1)
-		return __is_movable_balloon_page(page);
-
-	return false;
+	return PageBalloon(page) && PagePrivate(page);
 }
 
 /*
  * isolated_balloon_page - identify an isolated balloon page on private
  *			   compaction/migration page lists.
- *
- * After a compaction thread isolates a balloon page for migration, it raises
- * the page refcount to prevent concurrent compaction threads from re-isolating
- * the same page. For that reason putback_movable_pages(), or other routines
- * that need to identify isolated balloon pages on private pagelists, cannot
- * rely on balloon_page_movable() to accomplish the task.
  */
 static inline bool isolated_balloon_page(struct page *page)
 {
-	/* Already isolated balloon pages, by default, have a raised refcount */
-	if (page_flags_cleared(page) && !page_mapped(page) &&
-	    page_count(page) >= 2)
-		return __is_movable_balloon_page(page);
-
-	return false;
+	return PageBalloon(page);
 }
 
 /*
@@ -192,6 +132,8 @@ static inline void balloon_page_insert(struct page *page,
 				       struct address_space *mapping,
 				       struct list_head *head)
 {
+	__SetPageBalloon(page);
+	SetPagePrivate(page);
 	page->mapping = mapping;
 	list_add(&page->lru, head);
 }
@@ -206,8 +148,12 @@ static inline void balloon_page_insert(struct page *page,
  */
 static inline void balloon_page_delete(struct page *page)
 {
+	__ClearPageBalloon(page);
 	page->mapping = NULL;
-	list_del(&page->lru);
+	if (PagePrivate(page)) {
+		ClearPagePrivate(page);
+		list_del(&page->lru);
+	}
 }
 
 /*
@@ -258,6 +204,11 @@ static inline void balloon_page_delete(struct page *page)
 	list_del(&page->lru);
 }
 
+static inline bool __is_movable_balloon_page(struct page *page)
+{
+	return false;
+}
+
 static inline bool balloon_page_movable(struct page *page)
 {
 	return false;
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index b66fd10f4b93..01aad3ed89ec 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -13,18 +13,9 @@ typedef void free_page_t(struct page *page, unsigned long private);
  * Return values from addresss_space_operations.migratepage():
  * - negative errno on page migration failure;
  * - zero on page migration success;
- *
- * The balloon page migration introduces this special case where a 'distinct'
- * return code is used to flag a successful page migration to unmap_and_move().
- * This approach is necessary because page migration can race against balloon
- * deflation procedure, and for such case we could introduce a nasty page leak
- * if a successfully migrated balloon page gets released concurrently with
- * migration's unmap_and_move() wrap-up steps.
  */
 #define MIGRATEPAGE_SUCCESS		0
-#define MIGRATEPAGE_BALLOON_SUCCESS	1 /* special ret code for balloon page
-					   * sucessful migration case.
-					   */
+
 enum migrate_reason {
 	MR_COMPACTION,
 	MR_MEMORY_FAILURE,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 4d814aa97785..fa0d74e06428 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -554,6 +554,25 @@ static inline void __ClearPageBuddy(struct page *page)
 	atomic_set(&page->_mapcount, -1);
 }
 
+#define PAGE_BALLOON_MAPCOUNT_VALUE (-256)
+
+static inline int PageBalloon(struct page *page)
+{
+	return atomic_read(&page->_mapcount) == PAGE_BALLOON_MAPCOUNT_VALUE;
+}
+
+static inline void __SetPageBalloon(struct page *page)
+{
+	VM_BUG_ON_PAGE(atomic_read(&page->_mapcount) != -1, page);
+	atomic_set(&page->_mapcount, PAGE_BALLOON_MAPCOUNT_VALUE);
+}
+
+static inline void __ClearPageBalloon(struct page *page)
+{
+	VM_BUG_ON_PAGE(!PageBalloon(page), page);
+	atomic_set(&page->_mapcount, -1);
+}
+
 void put_page(struct page *page);
 void put_pages_list(struct list_head *pages);
 
diff --git a/mm/balloon_compaction.c b/mm/balloon_compaction.c
index 6e45a5074bf0..52abeeb3cb9d 100644
--- a/mm/balloon_compaction.c
+++ b/mm/balloon_compaction.c
@@ -93,17 +93,12 @@ struct page *balloon_page_dequeue(struct balloon_dev_info *b_dev_info)
 		 * to be released by the balloon driver.
 		 */
 		if (trylock_page(page)) {
+			if (!PagePrivate(page)) {
+				/* raced with isolation */
+				unlock_page(page);
+				continue;
+			}
 			spin_lock_irqsave(&b_dev_info->pages_lock, flags);
-			/*
-			 * Raise the page refcount here to prevent any wrong
-			 * attempt to isolate this page, in case of coliding
-			 * with balloon_page_isolate() just after we release
-			 * the page lock.
-			 *
-			 * balloon_page_free() will take care of dropping
-			 * this extra refcount later.
-			 */
-			get_page(page);
 			balloon_page_delete(page);
 			spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
 			unlock_page(page);
@@ -187,7 +182,9 @@ static inline void __isolate_balloon_page(struct page *page)
 {
 	struct balloon_dev_info *b_dev_info = page->mapping->private_data;
 	unsigned long flags;
+
 	spin_lock_irqsave(&b_dev_info->pages_lock, flags);
+	ClearPagePrivate(page);
 	list_del(&page->lru);
 	b_dev_info->isolated_pages++;
 	spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
@@ -197,7 +194,9 @@ static inline void __putback_balloon_page(struct page *page)
 {
 	struct balloon_dev_info *b_dev_info = page->mapping->private_data;
 	unsigned long flags;
+
 	spin_lock_irqsave(&b_dev_info->pages_lock, flags);
+	SetPagePrivate(page);
 	list_add(&page->lru, &b_dev_info->pages);
 	b_dev_info->isolated_pages--;
 	spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
@@ -235,12 +234,11 @@ bool balloon_page_isolate(struct page *page)
 		 */
 		if (likely(trylock_page(page))) {
 			/*
-			 * A ballooned page, by default, has just one refcount.
+			 * A ballooned page, by default, has PagePrivate set.
 			 * Prevent concurrent compaction threads from isolating
-			 * an already isolated balloon page by refcount check.
+			 * an already isolated balloon page by clearing it.
 			 */
-			if (__is_movable_balloon_page(page) &&
-			    page_count(page) == 2) {
+			if (balloon_page_movable(page)) {
 				__isolate_balloon_page(page);
 				unlock_page(page);
 				return true;
diff --git a/mm/compaction.c b/mm/compaction.c
index b9972c0fd917..edba18aed173 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -640,7 +640,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
 		 */
 		if (!PageLRU(page)) {
 			if (unlikely(balloon_page_movable(page))) {
-				if (locked && balloon_page_isolate(page)) {
+				if (balloon_page_isolate(page)) {
 					/* Successfully isolated */
 					goto isolate_success;
 				}
diff --git a/mm/migrate.c b/mm/migrate.c
index 2740360cd216..01439953abf5 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -876,7 +876,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
 		}
 	}
 
-	if (unlikely(balloon_page_movable(page))) {
+	if (unlikely(isolated_balloon_page(page))) {
 		/*
 		 * A ballooned page does not need any special attention from
 		 * physical to virtual reverse mapping procedures.
@@ -955,17 +955,6 @@ static int unmap_and_move(new_page_t get_new_page, free_page_t put_new_page,
 
 	rc = __unmap_and_move(page, newpage, force, mode);
 
-	if (unlikely(rc == MIGRATEPAGE_BALLOON_SUCCESS)) {
-		/*
-		 * A ballooned page has been migrated already.
-		 * Now, it's the time to wrap-up counters,
-		 * handle the page back to Buddy and return.
-		 */
-		dec_zone_page_state(page, NR_ISOLATED_ANON +
-				    page_is_file_cache(page));
-		balloon_page_free(page);
-		return MIGRATEPAGE_SUCCESS;
-	}
 out:
 	if (rc != -EAGAIN) {
 		/*
@@ -988,6 +977,9 @@ out:
 	if (rc != MIGRATEPAGE_SUCCESS && put_new_page) {
 		ClearPageSwapBacked(newpage);
 		put_new_page(newpage, private);
+	} else if (unlikely(__is_movable_balloon_page(newpage))) {
+		/* drop our reference, page already in the balloon */
+		put_page(newpage);
 	} else
 		putback_lru_page(newpage);
 
-- 
cgit v1.2.3


From 9d1ba8056474a208ed9efb7e58cd014795d9f818 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <k.khlebnikov@samsung.com>
Date: Thu, 9 Oct 2014 15:29:29 -0700
Subject: mm/balloon_compaction: remove balloon mapping and flag AS_BALLOON_MAP

Now ballooned pages are detected using PageBalloon().  Fake mapping is no
longer required.  This patch links ballooned pages to balloon device using
field page->private instead of page->mapping.  Also this patch embeds
balloon_dev_info directly into struct virtio_balloon.

Signed-off-by: Konstantin Khlebnikov <k.khlebnikov@samsung.com>
Cc: Rafael Aquini <aquini@redhat.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/virtio/virtio_balloon.c    | 60 ++++++------------------
 include/linux/balloon_compaction.h | 72 ++++++++---------------------
 include/linux/pagemap.h            | 18 +-------
 mm/balloon_compaction.c            | 95 +++-----------------------------------
 4 files changed, 39 insertions(+), 206 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index c3eb93fc9261..2bad7f9dd2ac 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -59,7 +59,7 @@ struct virtio_balloon
 	 * Each page on this list adds VIRTIO_BALLOON_PAGES_PER_PAGE
 	 * to num_pages above.
 	 */
-	struct balloon_dev_info *vb_dev_info;
+	struct balloon_dev_info vb_dev_info;
 
 	/* Synchronize access/update to this struct virtio_balloon elements */
 	struct mutex balloon_lock;
@@ -127,7 +127,7 @@ static void set_page_pfns(u32 pfns[], struct page *page)
 
 static void fill_balloon(struct virtio_balloon *vb, size_t num)
 {
-	struct balloon_dev_info *vb_dev_info = vb->vb_dev_info;
+	struct balloon_dev_info *vb_dev_info = &vb->vb_dev_info;
 
 	/* We can only do one array worth at a time. */
 	num = min(num, ARRAY_SIZE(vb->pfns));
@@ -171,7 +171,7 @@ static void release_pages_by_pfn(const u32 pfns[], unsigned int num)
 static void leak_balloon(struct virtio_balloon *vb, size_t num)
 {
 	struct page *page;
-	struct balloon_dev_info *vb_dev_info = vb->vb_dev_info;
+	struct balloon_dev_info *vb_dev_info = &vb->vb_dev_info;
 
 	/* We can only do one array worth at a time. */
 	num = min(num, ARRAY_SIZE(vb->pfns));
@@ -353,12 +353,11 @@ static int init_vqs(struct virtio_balloon *vb)
 	return 0;
 }
 
-static const struct address_space_operations virtio_balloon_aops;
 #ifdef CONFIG_BALLOON_COMPACTION
 /*
  * virtballoon_migratepage - perform the balloon page migration on behalf of
  *			     a compation thread.     (called under page lock)
- * @mapping: the page->mapping which will be assigned to the new migrated page.
+ * @vb_dev_info: the balloon device
  * @newpage: page that will replace the isolated page after migration finishes.
  * @page   : the isolated (old) page that is about to be migrated to newpage.
  * @mode   : compaction mode -- not used for balloon page migration.
@@ -373,17 +372,13 @@ static const struct address_space_operations virtio_balloon_aops;
  * This function preforms the balloon page migration task.
  * Called through balloon_mapping->a_ops->migratepage
  */
-static int virtballoon_migratepage(struct address_space *mapping,
+static int virtballoon_migratepage(struct balloon_dev_info *vb_dev_info,
 		struct page *newpage, struct page *page, enum migrate_mode mode)
 {
-	struct balloon_dev_info *vb_dev_info = balloon_page_device(page);
-	struct virtio_balloon *vb;
+	struct virtio_balloon *vb = container_of(vb_dev_info,
+			struct virtio_balloon, vb_dev_info);
 	unsigned long flags;
 
-	BUG_ON(!vb_dev_info);
-
-	vb = vb_dev_info->balloon_device;
-
 	/*
 	 * In order to avoid lock contention while migrating pages concurrently
 	 * to leak_balloon() or fill_balloon() we just give up the balloon_lock
@@ -399,7 +394,7 @@ static int virtballoon_migratepage(struct address_space *mapping,
 
 	/* balloon's page migration 1st step  -- inflate "newpage" */
 	spin_lock_irqsave(&vb_dev_info->pages_lock, flags);
-	balloon_page_insert(newpage, mapping, &vb_dev_info->pages);
+	balloon_page_insert(vb_dev_info, newpage);
 	vb_dev_info->isolated_pages--;
 	spin_unlock_irqrestore(&vb_dev_info->pages_lock, flags);
 	vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE;
@@ -418,18 +413,11 @@ static int virtballoon_migratepage(struct address_space *mapping,
 
 	return MIGRATEPAGE_SUCCESS;
 }
-
-/* define the balloon_mapping->a_ops callback to allow balloon page migration */
-static const struct address_space_operations virtio_balloon_aops = {
-			.migratepage = virtballoon_migratepage,
-};
 #endif /* CONFIG_BALLOON_COMPACTION */
 
 static int virtballoon_probe(struct virtio_device *vdev)
 {
 	struct virtio_balloon *vb;
-	struct address_space *vb_mapping;
-	struct balloon_dev_info *vb_devinfo;
 	int err;
 
 	vdev->priv = vb = kmalloc(sizeof(*vb), GFP_KERNEL);
@@ -445,30 +433,14 @@ static int virtballoon_probe(struct virtio_device *vdev)
 	vb->vdev = vdev;
 	vb->need_stats_update = 0;
 
-	vb_devinfo = balloon_devinfo_alloc(vb);
-	if (IS_ERR(vb_devinfo)) {
-		err = PTR_ERR(vb_devinfo);
-		goto out_free_vb;
-	}
-
-	vb_mapping = balloon_mapping_alloc(vb_devinfo,
-					   (balloon_compaction_check()) ?
-					   &virtio_balloon_aops : NULL);
-	if (IS_ERR(vb_mapping)) {
-		/*
-		 * IS_ERR(vb_mapping) && PTR_ERR(vb_mapping) == -EOPNOTSUPP
-		 * This means !CONFIG_BALLOON_COMPACTION, otherwise we get off.
-		 */
-		err = PTR_ERR(vb_mapping);
-		if (err != -EOPNOTSUPP)
-			goto out_free_vb_devinfo;
-	}
-
-	vb->vb_dev_info = vb_devinfo;
+	balloon_devinfo_init(&vb->vb_dev_info);
+#ifdef CONFIG_BALLOON_COMPACTION
+	vb->vb_dev_info.migratepage = virtballoon_migratepage;
+#endif
 
 	err = init_vqs(vb);
 	if (err)
-		goto out_free_vb_mapping;
+		goto out_free_vb;
 
 	vb->thread = kthread_run(balloon, vb, "vballoon");
 	if (IS_ERR(vb->thread)) {
@@ -480,10 +452,6 @@ static int virtballoon_probe(struct virtio_device *vdev)
 
 out_del_vqs:
 	vdev->config->del_vqs(vdev);
-out_free_vb_mapping:
-	balloon_mapping_free(vb_mapping);
-out_free_vb_devinfo:
-	balloon_devinfo_free(vb_devinfo);
 out_free_vb:
 	kfree(vb);
 out:
@@ -509,8 +477,6 @@ static void virtballoon_remove(struct virtio_device *vdev)
 
 	kthread_stop(vb->thread);
 	remove_common(vb);
-	balloon_mapping_free(vb->vb_dev_info->mapping);
-	balloon_devinfo_free(vb->vb_dev_info);
 	kfree(vb);
 }
 
diff --git a/include/linux/balloon_compaction.h b/include/linux/balloon_compaction.h
index 38aa07d5b81c..bc3d2985cc9a 100644
--- a/include/linux/balloon_compaction.h
+++ b/include/linux/balloon_compaction.h
@@ -57,21 +57,22 @@
  * balloon driver as a page book-keeper for its registered balloon devices.
  */
 struct balloon_dev_info {
-	void *balloon_device;		/* balloon device descriptor */
-	struct address_space *mapping;	/* balloon special page->mapping */
 	unsigned long isolated_pages;	/* # of isolated pages for migration */
 	spinlock_t pages_lock;		/* Protection to pages list */
 	struct list_head pages;		/* Pages enqueued & handled to Host */
+	int (*migratepage)(struct balloon_dev_info *, struct page *newpage,
+			struct page *page, enum migrate_mode mode);
 };
 
 extern struct page *balloon_page_enqueue(struct balloon_dev_info *b_dev_info);
 extern struct page *balloon_page_dequeue(struct balloon_dev_info *b_dev_info);
-extern struct balloon_dev_info *balloon_devinfo_alloc(
-						void *balloon_dev_descriptor);
 
-static inline void balloon_devinfo_free(struct balloon_dev_info *b_dev_info)
+static inline void balloon_devinfo_init(struct balloon_dev_info *balloon)
 {
-	kfree(b_dev_info);
+	balloon->isolated_pages = 0;
+	spin_lock_init(&balloon->pages_lock);
+	INIT_LIST_HEAD(&balloon->pages);
+	balloon->migratepage = NULL;
 }
 
 #ifdef CONFIG_BALLOON_COMPACTION
@@ -79,14 +80,6 @@ extern bool balloon_page_isolate(struct page *page);
 extern void balloon_page_putback(struct page *page);
 extern int balloon_page_migrate(struct page *newpage,
 				struct page *page, enum migrate_mode mode);
-extern struct address_space
-*balloon_mapping_alloc(struct balloon_dev_info *b_dev_info,
-			const struct address_space_operations *a_ops);
-
-static inline void balloon_mapping_free(struct address_space *balloon_mapping)
-{
-	kfree(balloon_mapping);
-}
 
 /*
  * __is_movable_balloon_page - helper to perform @page PageBalloon tests
@@ -120,27 +113,25 @@ static inline bool isolated_balloon_page(struct page *page)
 
 /*
  * balloon_page_insert - insert a page into the balloon's page list and make
- *		         the page->mapping assignment accordingly.
+ *			 the page->private assignment accordingly.
+ * @balloon : pointer to balloon device
  * @page    : page to be assigned as a 'balloon page'
- * @mapping : allocated special 'balloon_mapping'
- * @head    : balloon's device page list head
  *
  * Caller must ensure the page is locked and the spin_lock protecting balloon
  * pages list is held before inserting a page into the balloon device.
  */
-static inline void balloon_page_insert(struct page *page,
-				       struct address_space *mapping,
-				       struct list_head *head)
+static inline void balloon_page_insert(struct balloon_dev_info *balloon,
+				       struct page *page)
 {
 	__SetPageBalloon(page);
 	SetPagePrivate(page);
-	page->mapping = mapping;
-	list_add(&page->lru, head);
+	set_page_private(page, (unsigned long)balloon);
+	list_add(&page->lru, &balloon->pages);
 }
 
 /*
  * balloon_page_delete - delete a page from balloon's page list and clear
- *			 the page->mapping assignement accordingly.
+ *			 the page->private assignement accordingly.
  * @page    : page to be released from balloon's page list
  *
  * Caller must ensure the page is locked and the spin_lock protecting balloon
@@ -149,7 +140,7 @@ static inline void balloon_page_insert(struct page *page,
 static inline void balloon_page_delete(struct page *page)
 {
 	__ClearPageBalloon(page);
-	page->mapping = NULL;
+	set_page_private(page, 0);
 	if (PagePrivate(page)) {
 		ClearPagePrivate(page);
 		list_del(&page->lru);
@@ -162,11 +153,7 @@ static inline void balloon_page_delete(struct page *page)
  */
 static inline struct balloon_dev_info *balloon_page_device(struct page *page)
 {
-	struct address_space *mapping = page->mapping;
-	if (likely(mapping))
-		return mapping->private_data;
-
-	return NULL;
+	return (struct balloon_dev_info *)page_private(page);
 }
 
 static inline gfp_t balloon_mapping_gfp_mask(void)
@@ -174,29 +161,12 @@ static inline gfp_t balloon_mapping_gfp_mask(void)
 	return GFP_HIGHUSER_MOVABLE;
 }
 
-static inline bool balloon_compaction_check(void)
-{
-	return true;
-}
-
 #else /* !CONFIG_BALLOON_COMPACTION */
 
-static inline void *balloon_mapping_alloc(void *balloon_device,
-				const struct address_space_operations *a_ops)
+static inline void balloon_page_insert(struct balloon_dev_info *balloon,
+				       struct page *page)
 {
-	return ERR_PTR(-EOPNOTSUPP);
-}
-
-static inline void balloon_mapping_free(struct address_space *balloon_mapping)
-{
-	return;
-}
-
-static inline void balloon_page_insert(struct page *page,
-				       struct address_space *mapping,
-				       struct list_head *head)
-{
-	list_add(&page->lru, head);
+	list_add(&page->lru, &balloon->pages);
 }
 
 static inline void balloon_page_delete(struct page *page)
@@ -240,9 +210,5 @@ static inline gfp_t balloon_mapping_gfp_mask(void)
 	return GFP_HIGHUSER;
 }
 
-static inline bool balloon_compaction_check(void)
-{
-	return false;
-}
 #endif /* CONFIG_BALLOON_COMPACTION */
 #endif /* _LINUX_BALLOON_COMPACTION_H */
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 19191d39c4f3..7ea069cd3257 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -24,8 +24,7 @@ enum mapping_flags {
 	AS_ENOSPC	= __GFP_BITS_SHIFT + 1,	/* ENOSPC on async write */
 	AS_MM_ALL_LOCKS	= __GFP_BITS_SHIFT + 2,	/* under mm_take_all_locks() */
 	AS_UNEVICTABLE	= __GFP_BITS_SHIFT + 3,	/* e.g., ramdisk, SHM_LOCK */
-	AS_BALLOON_MAP  = __GFP_BITS_SHIFT + 4, /* balloon page special map */
-	AS_EXITING	= __GFP_BITS_SHIFT + 5, /* final truncate in progress */
+	AS_EXITING	= __GFP_BITS_SHIFT + 4, /* final truncate in progress */
 };
 
 static inline void mapping_set_error(struct address_space *mapping, int error)
@@ -55,21 +54,6 @@ static inline int mapping_unevictable(struct address_space *mapping)
 	return !!mapping;
 }
 
-static inline void mapping_set_balloon(struct address_space *mapping)
-{
-	set_bit(AS_BALLOON_MAP, &mapping->flags);
-}
-
-static inline void mapping_clear_balloon(struct address_space *mapping)
-{
-	clear_bit(AS_BALLOON_MAP, &mapping->flags);
-}
-
-static inline int mapping_balloon(struct address_space *mapping)
-{
-	return mapping && test_bit(AS_BALLOON_MAP, &mapping->flags);
-}
-
 static inline void mapping_set_exiting(struct address_space *mapping)
 {
 	set_bit(AS_EXITING, &mapping->flags);
diff --git a/mm/balloon_compaction.c b/mm/balloon_compaction.c
index 52abeeb3cb9d..3afdabdbc0a4 100644
--- a/mm/balloon_compaction.c
+++ b/mm/balloon_compaction.c
@@ -10,32 +10,6 @@
 #include <linux/export.h>
 #include <linux/balloon_compaction.h>
 
-/*
- * balloon_devinfo_alloc - allocates a balloon device information descriptor.
- * @balloon_dev_descriptor: pointer to reference the balloon device which
- *                          this struct balloon_dev_info will be servicing.
- *
- * Driver must call it to properly allocate and initialize an instance of
- * struct balloon_dev_info which will be used to reference a balloon device
- * as well as to keep track of the balloon device page list.
- */
-struct balloon_dev_info *balloon_devinfo_alloc(void *balloon_dev_descriptor)
-{
-	struct balloon_dev_info *b_dev_info;
-	b_dev_info = kmalloc(sizeof(*b_dev_info), GFP_KERNEL);
-	if (!b_dev_info)
-		return ERR_PTR(-ENOMEM);
-
-	b_dev_info->balloon_device = balloon_dev_descriptor;
-	b_dev_info->mapping = NULL;
-	b_dev_info->isolated_pages = 0;
-	spin_lock_init(&b_dev_info->pages_lock);
-	INIT_LIST_HEAD(&b_dev_info->pages);
-
-	return b_dev_info;
-}
-EXPORT_SYMBOL_GPL(balloon_devinfo_alloc);
-
 /*
  * balloon_page_enqueue - allocates a new page and inserts it into the balloon
  *			  page list.
@@ -61,7 +35,7 @@ struct page *balloon_page_enqueue(struct balloon_dev_info *b_dev_info)
 	 */
 	BUG_ON(!trylock_page(page));
 	spin_lock_irqsave(&b_dev_info->pages_lock, flags);
-	balloon_page_insert(page, b_dev_info->mapping, &b_dev_info->pages);
+	balloon_page_insert(b_dev_info, page);
 	spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
 	unlock_page(page);
 	return page;
@@ -127,60 +101,10 @@ struct page *balloon_page_dequeue(struct balloon_dev_info *b_dev_info)
 EXPORT_SYMBOL_GPL(balloon_page_dequeue);
 
 #ifdef CONFIG_BALLOON_COMPACTION
-/*
- * balloon_mapping_alloc - allocates a special ->mapping for ballooned pages.
- * @b_dev_info: holds the balloon device information descriptor.
- * @a_ops: balloon_mapping address_space_operations descriptor.
- *
- * Driver must call it to properly allocate and initialize an instance of
- * struct address_space which will be used as the special page->mapping for
- * balloon device enlisted page instances.
- */
-struct address_space *balloon_mapping_alloc(struct balloon_dev_info *b_dev_info,
-				const struct address_space_operations *a_ops)
-{
-	struct address_space *mapping;
-
-	mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
-	if (!mapping)
-		return ERR_PTR(-ENOMEM);
-
-	/*
-	 * Give a clean 'zeroed' status to all elements of this special
-	 * balloon page->mapping struct address_space instance.
-	 */
-	address_space_init_once(mapping);
-
-	/*
-	 * Set mapping->flags appropriately, to allow balloon pages
-	 * ->mapping identification.
-	 */
-	mapping_set_balloon(mapping);
-	mapping_set_gfp_mask(mapping, balloon_mapping_gfp_mask());
-
-	/* balloon's page->mapping->a_ops callback descriptor */
-	mapping->a_ops = a_ops;
-
-	/*
-	 * Establish a pointer reference back to the balloon device descriptor
-	 * this particular page->mapping will be servicing.
-	 * This is used by compaction / migration procedures to identify and
-	 * access the balloon device pageset while isolating / migrating pages.
-	 *
-	 * As some balloon drivers can register multiple balloon devices
-	 * for a single guest, this also helps compaction / migration to
-	 * properly deal with multiple balloon pagesets, when required.
-	 */
-	mapping->private_data = b_dev_info;
-	b_dev_info->mapping = mapping;
-
-	return mapping;
-}
-EXPORT_SYMBOL_GPL(balloon_mapping_alloc);
 
 static inline void __isolate_balloon_page(struct page *page)
 {
-	struct balloon_dev_info *b_dev_info = page->mapping->private_data;
+	struct balloon_dev_info *b_dev_info = balloon_page_device(page);
 	unsigned long flags;
 
 	spin_lock_irqsave(&b_dev_info->pages_lock, flags);
@@ -192,7 +116,7 @@ static inline void __isolate_balloon_page(struct page *page)
 
 static inline void __putback_balloon_page(struct page *page)
 {
-	struct balloon_dev_info *b_dev_info = page->mapping->private_data;
+	struct balloon_dev_info *b_dev_info = balloon_page_device(page);
 	unsigned long flags;
 
 	spin_lock_irqsave(&b_dev_info->pages_lock, flags);
@@ -202,12 +126,6 @@ static inline void __putback_balloon_page(struct page *page)
 	spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
 }
 
-static inline int __migrate_balloon_page(struct address_space *mapping,
-		struct page *newpage, struct page *page, enum migrate_mode mode)
-{
-	return page->mapping->a_ops->migratepage(mapping, newpage, page, mode);
-}
-
 /* __isolate_lru_page() counterpart for a ballooned page */
 bool balloon_page_isolate(struct page *page)
 {
@@ -274,7 +192,7 @@ void balloon_page_putback(struct page *page)
 int balloon_page_migrate(struct page *newpage,
 			 struct page *page, enum migrate_mode mode)
 {
-	struct address_space *mapping;
+	struct balloon_dev_info *balloon = balloon_page_device(page);
 	int rc = -EAGAIN;
 
 	/*
@@ -290,9 +208,8 @@ int balloon_page_migrate(struct page *newpage,
 		return rc;
 	}
 
-	mapping = page->mapping;
-	if (mapping)
-		rc = __migrate_balloon_page(mapping, newpage, page, mode);
+	if (balloon && balloon->migratepage)
+		rc = balloon->migratepage(balloon, newpage, page, mode);
 
 	unlock_page(newpage);
 	return rc;
-- 
cgit v1.2.3


From 09316c09dde33aae14f34489d9e3d243ec0d5938 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <k.khlebnikov@samsung.com>
Date: Thu, 9 Oct 2014 15:29:32 -0700
Subject: mm/balloon_compaction: add vmstat counters and kpageflags bit

Always mark pages with PageBalloon even if balloon compaction is disabled
and expose this mark in /proc/kpageflags as KPF_BALLOON.

Also this patch adds three counters into /proc/vmstat: "balloon_inflate",
"balloon_deflate" and "balloon_migrate".  They accumulate balloon
activity.  Current size of balloon is (balloon_inflate - balloon_deflate)
pages.

All generic balloon code now gathered under option CONFIG_MEMORY_BALLOON.
It should be selected by ballooning driver which wants use this feature.
Currently virtio-balloon is the only user.

Signed-off-by: Konstantin Khlebnikov <k.khlebnikov@samsung.com>
Cc: Rafael Aquini <aquini@redhat.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/virtio/Kconfig                 |  1 +
 drivers/virtio/virtio_balloon.c        |  1 +
 fs/proc/page.c                         |  3 +++
 include/linux/balloon_compaction.h     |  2 ++
 include/linux/vm_event_item.h          |  7 +++++++
 include/uapi/linux/kernel-page-flags.h |  1 +
 mm/Kconfig                             |  7 ++++++-
 mm/Makefile                            |  3 ++-
 mm/balloon_compaction.c                |  2 ++
 mm/vmstat.c                            | 12 +++++++++++-
 tools/vm/page-types.c                  |  1 +
 11 files changed, 37 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig
index c6683f2e396c..00b228638274 100644
--- a/drivers/virtio/Kconfig
+++ b/drivers/virtio/Kconfig
@@ -25,6 +25,7 @@ config VIRTIO_PCI
 config VIRTIO_BALLOON
 	tristate "Virtio balloon driver"
 	depends on VIRTIO
+	select MEMORY_BALLOON
 	---help---
 	 This driver supports increasing and decreasing the amount
 	 of memory within a KVM guest.
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 2bad7f9dd2ac..f893148a107b 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -396,6 +396,7 @@ static int virtballoon_migratepage(struct balloon_dev_info *vb_dev_info,
 	spin_lock_irqsave(&vb_dev_info->pages_lock, flags);
 	balloon_page_insert(vb_dev_info, newpage);
 	vb_dev_info->isolated_pages--;
+	__count_vm_event(BALLOON_MIGRATE);
 	spin_unlock_irqrestore(&vb_dev_info->pages_lock, flags);
 	vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE;
 	set_page_pfns(vb->pfns, newpage);
diff --git a/fs/proc/page.c b/fs/proc/page.c
index e647c55275d9..1e3187da1fed 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -133,6 +133,9 @@ u64 stable_page_flags(struct page *page)
 	if (PageBuddy(page))
 		u |= 1 << KPF_BUDDY;
 
+	if (PageBalloon(page))
+		u |= 1 << KPF_BALLOON;
+
 	u |= kpf_copy_bit(k, KPF_LOCKED,	PG_locked);
 
 	u |= kpf_copy_bit(k, KPF_SLAB,		PG_slab);
diff --git a/include/linux/balloon_compaction.h b/include/linux/balloon_compaction.h
index bc3d2985cc9a..9b0a15d06a4f 100644
--- a/include/linux/balloon_compaction.h
+++ b/include/linux/balloon_compaction.h
@@ -166,11 +166,13 @@ static inline gfp_t balloon_mapping_gfp_mask(void)
 static inline void balloon_page_insert(struct balloon_dev_info *balloon,
 				       struct page *page)
 {
+	__SetPageBalloon(page);
 	list_add(&page->lru, &balloon->pages);
 }
 
 static inline void balloon_page_delete(struct page *page)
 {
+	__ClearPageBalloon(page);
 	list_del(&page->lru);
 }
 
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index ced92345c963..730334cdf037 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -72,6 +72,13 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 		THP_ZERO_PAGE_ALLOC,
 		THP_ZERO_PAGE_ALLOC_FAILED,
 #endif
+#ifdef CONFIG_MEMORY_BALLOON
+		BALLOON_INFLATE,
+		BALLOON_DEFLATE,
+#ifdef CONFIG_BALLOON_COMPACTION
+		BALLOON_MIGRATE,
+#endif
+#endif
 #ifdef CONFIG_DEBUG_TLBFLUSH
 #ifdef CONFIG_SMP
 		NR_TLB_REMOTE_FLUSH,	/* cpu tried to flush others' tlbs */
diff --git a/include/uapi/linux/kernel-page-flags.h b/include/uapi/linux/kernel-page-flags.h
index 5116a0e48172..2f96d233c980 100644
--- a/include/uapi/linux/kernel-page-flags.h
+++ b/include/uapi/linux/kernel-page-flags.h
@@ -31,6 +31,7 @@
 
 #define KPF_KSM			21
 #define KPF_THP			22
+#define KPF_BALLOON		23
 
 
 #endif /* _UAPILINUX_KERNEL_PAGE_FLAGS_H */
diff --git a/mm/Kconfig b/mm/Kconfig
index 0ceb8a567dab..1d1ae6b078fd 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -230,12 +230,17 @@ config SPLIT_PTLOCK_CPUS
 config ARCH_ENABLE_SPLIT_PMD_PTLOCK
 	boolean
 
+#
+# support for memory balloon
+config MEMORY_BALLOON
+	boolean
+
 #
 # support for memory balloon compaction
 config BALLOON_COMPACTION
 	bool "Allow for balloon memory compaction/migration"
 	def_bool y
-	depends on COMPACTION && VIRTIO_BALLOON
+	depends on COMPACTION && MEMORY_BALLOON
 	help
 	  Memory fragmentation introduced by ballooning might reduce
 	  significantly the number of 2MB contiguous memory blocks that can be
diff --git a/mm/Makefile b/mm/Makefile
index f8ed7ab417b1..1f534a7f0a71 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -16,7 +16,7 @@ obj-y			:= filemap.o mempool.o oom_kill.o \
 			   readahead.o swap.o truncate.o vmscan.o shmem.o \
 			   util.o mmzone.o vmstat.o backing-dev.o \
 			   mm_init.o mmu_context.o percpu.o slab_common.o \
-			   compaction.o balloon_compaction.o vmacache.o \
+			   compaction.o vmacache.o \
 			   interval_tree.o list_lru.o workingset.o \
 			   iov_iter.o debug.o $(mmu-y)
 
@@ -67,3 +67,4 @@ obj-$(CONFIG_ZBUD)	+= zbud.o
 obj-$(CONFIG_ZSMALLOC)	+= zsmalloc.o
 obj-$(CONFIG_GENERIC_EARLY_IOREMAP) += early_ioremap.o
 obj-$(CONFIG_CMA)	+= cma.o
+obj-$(CONFIG_MEMORY_BALLOON) += balloon_compaction.o
diff --git a/mm/balloon_compaction.c b/mm/balloon_compaction.c
index 3afdabdbc0a4..b3cbe19f71b5 100644
--- a/mm/balloon_compaction.c
+++ b/mm/balloon_compaction.c
@@ -36,6 +36,7 @@ struct page *balloon_page_enqueue(struct balloon_dev_info *b_dev_info)
 	BUG_ON(!trylock_page(page));
 	spin_lock_irqsave(&b_dev_info->pages_lock, flags);
 	balloon_page_insert(b_dev_info, page);
+	__count_vm_event(BALLOON_INFLATE);
 	spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
 	unlock_page(page);
 	return page;
@@ -74,6 +75,7 @@ struct page *balloon_page_dequeue(struct balloon_dev_info *b_dev_info)
 			}
 			spin_lock_irqsave(&b_dev_info->pages_lock, flags);
 			balloon_page_delete(page);
+			__count_vm_event(BALLOON_DEFLATE);
 			spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
 			unlock_page(page);
 			dequeued_page = true;
diff --git a/mm/vmstat.c b/mm/vmstat.c
index e9ab104b956f..cce7c766da7a 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -735,7 +735,7 @@ static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat,
 					TEXT_FOR_HIGHMEM(xx) xx "_movable",
 
 const char * const vmstat_text[] = {
-	/* Zoned VM counters */
+	/* enum zone_stat_item countes */
 	"nr_free_pages",
 	"nr_alloc_batch",
 	"nr_inactive_anon",
@@ -778,10 +778,13 @@ const char * const vmstat_text[] = {
 	"workingset_nodereclaim",
 	"nr_anon_transparent_hugepages",
 	"nr_free_cma",
+
+	/* enum writeback_stat_item counters */
 	"nr_dirty_threshold",
 	"nr_dirty_background_threshold",
 
 #ifdef CONFIG_VM_EVENT_COUNTERS
+	/* enum vm_event_item counters */
 	"pgpgin",
 	"pgpgout",
 	"pswpin",
@@ -860,6 +863,13 @@ const char * const vmstat_text[] = {
 	"thp_zero_page_alloc",
 	"thp_zero_page_alloc_failed",
 #endif
+#ifdef CONFIG_MEMORY_BALLOON
+	"balloon_inflate",
+	"balloon_deflate",
+#ifdef CONFIG_BALLOON_COMPACTION
+	"balloon_migrate",
+#endif
+#endif /* CONFIG_MEMORY_BALLOON */
 #ifdef CONFIG_DEBUG_TLBFLUSH
 #ifdef CONFIG_SMP
 	"nr_tlb_remote_flush",
diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c
index c4d6d2e20e0d..264fbc297e0b 100644
--- a/tools/vm/page-types.c
+++ b/tools/vm/page-types.c
@@ -132,6 +132,7 @@ static const char * const page_flag_names[] = {
 	[KPF_NOPAGE]		= "n:nopage",
 	[KPF_KSM]		= "x:ksm",
 	[KPF_THP]		= "t:thp",
+	[KPF_BALLOON]		= "o:balloon",
 
 	[KPF_RESERVED]		= "r:reserved",
 	[KPF_MLOCKED]		= "m:mlocked",
-- 
cgit v1.2.3


From 722cdc17232f0f684011407f7cf3c40d39457971 Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Thu, 9 Oct 2014 15:29:50 -0700
Subject: zsmalloc: change return value unit of zs_get_total_size_bytes

zs_get_total_size_bytes returns a amount of memory zsmalloc consumed with
*byte unit* but zsmalloc operates *page unit* rather than byte unit so
let's change the API so benefit we could get is that reduce unnecessary
overhead (ie, change page unit with byte unit) in zsmalloc.

Since return type is pages, "zs_get_total_pages" is better than
"zs_get_total_size_bytes".

Signed-off-by: Minchan Kim <minchan@kernel.org>
Reviewed-by: Dan Streetman <ddstreet@ieee.org>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Jerome Marchand <jmarchan@redhat.com>
Cc: <juno.choi@lge.com>
Cc: <seungho1.park@lge.com>
Cc: Luigi Semenzato <semenzato@google.com>
Cc: Nitin Gupta <ngupta@vflare.org>
Cc: Seth Jennings <sjennings@variantweb.net>
Cc: David Horner <ds2horner@gmail.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/block/zram/zram_drv.c | 4 ++--
 include/linux/zsmalloc.h      | 2 +-
 mm/zsmalloc.c                 | 9 ++++-----
 3 files changed, 7 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index d00831c3d731..f0b8b30a7128 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -103,10 +103,10 @@ static ssize_t mem_used_total_show(struct device *dev,
 
 	down_read(&zram->init_lock);
 	if (init_done(zram))
-		val = zs_get_total_size_bytes(meta->mem_pool);
+		val = zs_get_total_pages(meta->mem_pool);
 	up_read(&zram->init_lock);
 
-	return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
+	return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT);
 }
 
 static ssize_t max_comp_streams_show(struct device *dev,
diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h
index e44d634e7fb7..05c214760977 100644
--- a/include/linux/zsmalloc.h
+++ b/include/linux/zsmalloc.h
@@ -46,6 +46,6 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
 			enum zs_mapmode mm);
 void zs_unmap_object(struct zs_pool *pool, unsigned long handle);
 
-u64 zs_get_total_size_bytes(struct zs_pool *pool);
+unsigned long zs_get_total_pages(struct zs_pool *pool);
 
 #endif
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 2a4acf400846..c4a91578dc96 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -297,7 +297,7 @@ static void zs_zpool_unmap(void *pool, unsigned long handle)
 
 static u64 zs_zpool_total_size(void *pool)
 {
-	return zs_get_total_size_bytes(pool);
+	return zs_get_total_pages(pool) << PAGE_SHIFT;
 }
 
 static struct zpool_driver zs_zpool_driver = {
@@ -1181,12 +1181,11 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
 }
 EXPORT_SYMBOL_GPL(zs_unmap_object);
 
-u64 zs_get_total_size_bytes(struct zs_pool *pool)
+unsigned long zs_get_total_pages(struct zs_pool *pool)
 {
-	u64 npages = atomic_long_read(&pool->pages_allocated);
-	return npages << PAGE_SHIFT;
+	return atomic_long_read(&pool->pages_allocated);
 }
-EXPORT_SYMBOL_GPL(zs_get_total_size_bytes);
+EXPORT_SYMBOL_GPL(zs_get_total_pages);
 
 module_init(zs_init);
 module_exit(zs_exit);
-- 
cgit v1.2.3


From 2e1d06e1c05af9dbe8a3bfddeefbf041ca637fff Mon Sep 17 00:00:00 2001
From: Michal Nazarewicz <mina86@mina86.com>
Date: Thu, 9 Oct 2014 15:30:13 -0700
Subject: include/linux/kernel.h: rewrite min3, max3 and clamp using min and
 max

It appears that gcc is better at optimising a double call to min and max
rather than open coded min3 and max3.  This can be observed here:

    $ cat min-max.c
    #define min(x, y) ({				\
    	typeof(x) _min1 = (x);			\
    	typeof(y) _min2 = (y);			\
    	(void) (&_min1 == &_min2);		\
    	_min1 < _min2 ? _min1 : _min2; })
    #define min3(x, y, z) ({			\
    	typeof(x) _min1 = (x);			\
    	typeof(y) _min2 = (y);			\
    	typeof(z) _min3 = (z);			\
    	(void) (&_min1 == &_min2);		\
    	(void) (&_min1 == &_min3);		\
    	_min1 < _min2 ? (_min1 < _min3 ? _min1 : _min3) : \
    		(_min2 < _min3 ? _min2 : _min3); })

    int fmin3(int x, int y, int z) { return min3(x, y, z); }
    int fmin2(int x, int y, int z) { return min(min(x, y), z); }

    $ gcc -O2 -o min-max.s -S min-max.c; cat min-max.s
    	.file	"min-max.c"
    	.text
    	.p2align 4,,15
    	.globl	fmin3
    	.type	fmin3, @function
    fmin3:
    .LFB0:
    	.cfi_startproc
    	cmpl	%esi, %edi
    	jl	.L5
    	cmpl	%esi, %edx
    	movl	%esi, %eax
    	cmovle	%edx, %eax
    	ret
    	.p2align 4,,10
    	.p2align 3
    .L5:
    	cmpl	%edi, %edx
    	movl	%edi, %eax
    	cmovle	%edx, %eax
    	ret
    	.cfi_endproc
    .LFE0:
    	.size	fmin3, .-fmin3
    	.p2align 4,,15
    	.globl	fmin2
    	.type	fmin2, @function
    fmin2:
    .LFB1:
    	.cfi_startproc
    	cmpl	%edi, %esi
    	movl	%edx, %eax
    	cmovle	%esi, %edi
    	cmpl	%edx, %edi
    	cmovle	%edi, %eax
    	ret
    	.cfi_endproc
    .LFE1:
    	.size	fmin2, .-fmin2
    	.ident	"GCC: (Ubuntu/Linaro 4.6.3-1ubuntu5) 4.6.3"
    	.section	.note.GNU-stack,"",@progbits

fmin3 function, which uses open-coded min3 macro, is compiled into total
of ten instructions including a conditional branch, whereas fmin2
function, which uses two calls to min2 macro, is compiled into six
instructions with no branches.

Similarly, open-coded clamp produces the same code as clamp using min and
max macros, but the latter is much shorter:

    $ cat clamp.c
    #define clamp(val, min, max) ({			\
    	typeof(val) __val = (val);		\
    	typeof(min) __min = (min);		\
    	typeof(max) __max = (max);		\
    	(void) (&__val == &__min);		\
    	(void) (&__val == &__max);		\
    	__val = __val < __min ? __min: __val;	\
    	__val > __max ? __max: __val; })
    #define min(x, y) ({				\
    	typeof(x) _min1 = (x);			\
    	typeof(y) _min2 = (y);			\
    	(void) (&_min1 == &_min2);		\
    	_min1 < _min2 ? _min1 : _min2; })
    #define max(x, y) ({				\
    	typeof(x) _max1 = (x);			\
    	typeof(y) _max2 = (y);			\
    	(void) (&_max1 == &_max2);		\
    	_max1 > _max2 ? _max1 : _max2; })

    int fclamp(int v, int min, int max) { return clamp(v, min, max); }
    int fclampmm(int v, int min, int max) { return min(max(v, min), max); }

    $ gcc -O2 -o clamp.s -S clamp.c; cat clamp.s
    	.file	"clamp.c"
    	.text
    	.p2align 4,,15
    	.globl	fclamp
    	.type	fclamp, @function
    fclamp:
    .LFB0:
    	.cfi_startproc
    	cmpl	%edi, %esi
    	movl	%edx, %eax
    	cmovge	%esi, %edi
    	cmpl	%edx, %edi
    	cmovle	%edi, %eax
    	ret
    	.cfi_endproc
    .LFE0:
    	.size	fclamp, .-fclamp
    	.p2align 4,,15
    	.globl	fclampmm
    	.type	fclampmm, @function
    fclampmm:
    .LFB1:
    	.cfi_startproc
    	cmpl	%edi, %esi
    	cmovge	%esi, %edi
    	cmpl	%edi, %edx
    	movl	%edi, %eax
    	cmovle	%edx, %eax
    	ret
    	.cfi_endproc
    .LFE1:
    	.size	fclampmm, .-fclampmm
    	.ident	"GCC: (Ubuntu/Linaro 4.6.3-1ubuntu5) 4.6.3"
    	.section	.note.GNU-stack,"",@progbits

    Linux mpn-glaptop 3.13.0-29-generic #53~precise1-Ubuntu SMP Wed Jun 4 22:06:25 UTC 2014 x86_64 x86_64 x86_64 GNU/Linux
    gcc (Ubuntu/Linaro 4.6.3-1ubuntu5) 4.6.3
    Copyright (C) 2011 Free Software Foundation, Inc.
    This is free software; see the source for copying conditions.  There is NO
    warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

    -rwx------ 1 mpn eng 51224656 Jun 17 14:15 vmlinux.before
    -rwx------ 1 mpn eng 51224608 Jun 17 13:57 vmlinux.after

48 bytes reduction.  The do_fault_around was a few instruction shorter
and as far as I can tell saved 12 bytes on the stack, i.e.:

    $ grep -e rsp -e pop -e push do_fault_around.*
    do_fault_around.before.s:push   %rbp
    do_fault_around.before.s:mov    %rsp,%rbp
    do_fault_around.before.s:push   %r13
    do_fault_around.before.s:push   %r12
    do_fault_around.before.s:push   %rbx
    do_fault_around.before.s:sub    $0x38,%rsp
    do_fault_around.before.s:add    $0x38,%rsp
    do_fault_around.before.s:pop    %rbx
    do_fault_around.before.s:pop    %r12
    do_fault_around.before.s:pop    %r13
    do_fault_around.before.s:pop    %rbp

    do_fault_around.after.s:push   %rbp
    do_fault_around.after.s:mov    %rsp,%rbp
    do_fault_around.after.s:push   %r12
    do_fault_around.after.s:push   %rbx
    do_fault_around.after.s:sub    $0x30,%rsp
    do_fault_around.after.s:add    $0x30,%rsp
    do_fault_around.after.s:pop    %rbx
    do_fault_around.after.s:pop    %r12
    do_fault_around.after.s:pop    %rbp

or here side-by-side:

    Before                    After
    push   %rbp               push   %rbp
    mov    %rsp,%rbp          mov    %rsp,%rbp
    push   %r13
    push   %r12               push   %r12
    push   %rbx               push   %rbx
    sub    $0x38,%rsp         sub    $0x30,%rsp
    add    $0x38,%rsp         add    $0x30,%rsp
    pop    %rbx               pop    %rbx
    pop    %r12               pop    %r12
    pop    %r13
    pop    %rbp               pop    %rbp

There are also fewer branches:

    $ grep ^j do_fault_around.*
    do_fault_around.before.s:jae    ffffffff812079b7
    do_fault_around.before.s:jmp    ffffffff812079c5
    do_fault_around.before.s:jmp    ffffffff81207a14
    do_fault_around.before.s:ja     ffffffff812079f9
    do_fault_around.before.s:jb     ffffffff81207a10
    do_fault_around.before.s:jmp    ffffffff81207a63
    do_fault_around.before.s:jne    ffffffff812079df

    do_fault_around.after.s:jmp    ffffffff812079fd
    do_fault_around.after.s:ja     ffffffff812079e2
    do_fault_around.after.s:jb     ffffffff812079f9
    do_fault_around.after.s:jmp    ffffffff81207a4c
    do_fault_around.after.s:jne    ffffffff812079c8

And here's with allyesconfig on a different machine:

    $ uname -a; gcc --version; ls -l vmlinux.*
    Linux erwin 3.14.7-mn #54 SMP Sun Jun 15 11:25:08 CEST 2014 x86_64 AMD Phenom(tm) II X3 710 Processor AuthenticAMD GNU/Linux
    gcc (GCC) 4.8.3
    Copyright (C) 2013 Free Software Foundation, Inc.
    This is free software; see the source for copying conditions.  There is NO
    warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

    -rwx------ 1 mpn eng 437027411 Jun 20 16:04 vmlinux.before
    -rwx------ 1 mpn eng 437026881 Jun 20 15:30 vmlinux.after

530 bytes reduction.

Signed-off-by: Michal Nazarewicz <mina86@mina86.com>
Signed-off-by: Hagen Paul Pfeifer <hagen@jauu.net>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Hagen Paul Pfeifer <hagen@jauu.net>
Cc: David Rientjes <rientjes@google.com>
Cc: "Rustad, Mark D" <mark.d.rustad@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 32 +++++---------------------------
 1 file changed, 5 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 95624bed87ef..aa2a0cb57f50 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -715,23 +715,8 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
 	(void) (&_max1 == &_max2);		\
 	_max1 > _max2 ? _max1 : _max2; })
 
-#define min3(x, y, z) ({			\
-	typeof(x) _min1 = (x);			\
-	typeof(y) _min2 = (y);			\
-	typeof(z) _min3 = (z);			\
-	(void) (&_min1 == &_min2);		\
-	(void) (&_min1 == &_min3);		\
-	_min1 < _min2 ? (_min1 < _min3 ? _min1 : _min3) : \
-		(_min2 < _min3 ? _min2 : _min3); })
-
-#define max3(x, y, z) ({			\
-	typeof(x) _max1 = (x);			\
-	typeof(y) _max2 = (y);			\
-	typeof(z) _max3 = (z);			\
-	(void) (&_max1 == &_max2);		\
-	(void) (&_max1 == &_max3);		\
-	_max1 > _max2 ? (_max1 > _max3 ? _max1 : _max3) : \
-		(_max2 > _max3 ? _max2 : _max3); })
+#define min3(x, y, z) min((typeof(x))min(x, y), z)
+#define max3(x, y, z) max((typeof(x))max(x, y), z)
 
 /**
  * min_not_zero - return the minimum that is _not_ zero, unless both are zero
@@ -746,20 +731,13 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
 /**
  * clamp - return a value clamped to a given range with strict typechecking
  * @val: current value
- * @min: minimum allowable value
- * @max: maximum allowable value
+ * @lo: lowest allowable value
+ * @hi: highest allowable value
  *
  * This macro does strict typechecking of min/max to make sure they are of the
  * same type as val.  See the unnecessary pointer comparisons.
  */
-#define clamp(val, min, max) ({			\
-	typeof(val) __val = (val);		\
-	typeof(min) __min = (min);		\
-	typeof(max) __max = (max);		\
-	(void) (&__val == &__min);		\
-	(void) (&__val == &__max);		\
-	__val = __val < __min ? __min: __val;	\
-	__val > __max ? __max: __val; })
+#define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi)
 
 /*
  * ..and if you can't take the strict
-- 
cgit v1.2.3


From c185b07fc9f24d52a864376ed22a6d84384b0c53 Mon Sep 17 00:00:00 2001
From: Michal Nazarewicz <mina86@mina86.com>
Date: Thu, 9 Oct 2014 15:30:15 -0700
Subject: include/linux/kernel.h: deduplicate code implementing clamp* macros

Instead of open-coding clamp_t macro min_t and max_t the way clamp macro
does and instead of open-coding clamp_val simply use clamp_t.
Furthermore, normalise argument naming in the macros to be lo and hi.

Signed-off-by: Michal Nazarewicz <mina86@mina86.com>
Cc: Mark Rustad <mark.d.rustad@intel.com>
Cc: "Kirsher, Jeffrey T" <jeffrey.t.kirsher@intel.com>
Cc: Hagen Paul Pfeifer <hagen@jauu.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 24 +++++++-----------------
 1 file changed, 7 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index aa2a0cb57f50..e9e420b6d931 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -734,7 +734,7 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
  * @lo: lowest allowable value
  * @hi: highest allowable value
  *
- * This macro does strict typechecking of min/max to make sure they are of the
+ * This macro does strict typechecking of lo/hi to make sure they are of the
  * same type as val.  See the unnecessary pointer comparisons.
  */
 #define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi)
@@ -759,36 +759,26 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
  * clamp_t - return a value clamped to a given range using a given type
  * @type: the type of variable to use
  * @val: current value
- * @min: minimum allowable value
- * @max: maximum allowable value
+ * @lo: minimum allowable value
+ * @hi: maximum allowable value
  *
  * This macro does no typechecking and uses temporary variables of type
  * 'type' to make all the comparisons.
  */
-#define clamp_t(type, val, min, max) ({		\
-	type __val = (val);			\
-	type __min = (min);			\
-	type __max = (max);			\
-	__val = __val < __min ? __min: __val;	\
-	__val > __max ? __max: __val; })
+#define clamp_t(type, val, lo, hi) min_t(type, max_t(type, val, lo), hi)
 
 /**
  * clamp_val - return a value clamped to a given range using val's type
  * @val: current value
- * @min: minimum allowable value
- * @max: maximum allowable value
+ * @lo: minimum allowable value
+ * @hi: maximum allowable value
  *
  * This macro does no typechecking and uses temporary variables of whatever
  * type the input argument 'val' is.  This is useful when val is an unsigned
  * type and min and max are literals that will otherwise be assigned a signed
  * integer type.
  */
-#define clamp_val(val, min, max) ({		\
-	typeof(val) __val = (val);		\
-	typeof(val) __min = (min);		\
-	typeof(val) __max = (max);		\
-	__val = __val < __min ? __min: __val;	\
-	__val > __max ? __max: __val; })
+#define clamp_val(val, lo, hi) clamp_t(typeof(val), val, lo, hi)
 
 
 /*
-- 
cgit v1.2.3


From 61a04e5b306ab9d6a30f78e86f1f140d7c888304 Mon Sep 17 00:00:00 2001
From: Michele Curti <michele.curti@gmail.com>
Date: Thu, 9 Oct 2014 15:30:17 -0700
Subject: include/linux/blkdev.h: use NULL instead of zero

Quite useless but it shuts up some warnings.

Signed-off-by: Michele Curti <michele.curti@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/blkdev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 518b46555b80..87be398166d3 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1564,7 +1564,7 @@ static inline int blk_rq_map_integrity_sg(struct request_queue *q,
 }
 static inline struct blk_integrity *bdev_get_integrity(struct block_device *b)
 {
-	return 0;
+	return NULL;
 }
 static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk)
 {
-- 
cgit v1.2.3


From 578b25dfce2990d8bab5631f33a4283bd5b01556 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Thu, 9 Oct 2014 15:30:28 -0700
Subject: include/linux/screen_info.h: remove unused ORIG_* macros

The ORIG_* macros definitions to access struct screen_info members and all
of their users were removed 7 years ago by commit 3ea335100014785f
("Remove magic macros for screen_info structure members"), but (only) the
definitions reappeared a few days later in commit ee8e7cfe9d330d6f ("Make
asm-x86/bootparam.h includable from userspace.").

Remove them for good. Amen.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/screen_info.h | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/screen_info.h b/include/linux/screen_info.h
index 005bf3e38db5..f0f8bad54be9 100644
--- a/include/linux/screen_info.h
+++ b/include/linux/screen_info.h
@@ -5,12 +5,4 @@
 
 extern struct screen_info screen_info;
 
-#define ORIG_X			(screen_info.orig_x)
-#define ORIG_Y			(screen_info.orig_y)
-#define ORIG_VIDEO_MODE		(screen_info.orig_video_mode)
-#define ORIG_VIDEO_COLS 	(screen_info.orig_video_cols)
-#define ORIG_VIDEO_EGA_BX	(screen_info.orig_video_ega_bx)
-#define ORIG_VIDEO_LINES	(screen_info.orig_video_lines)
-#define ORIG_VIDEO_ISVGA	(screen_info.orig_video_isVGA)
-#define ORIG_VIDEO_POINTS       (screen_info.orig_video_points)
 #endif /* _SCREEN_INFO_H */
-- 
cgit v1.2.3


From 083bf668cb70e47b84db64856606e94beac87f01 Mon Sep 17 00:00:00 2001
From: Zhang Rui <rui.zhang@intel.com>
Date: Fri, 14 Mar 2014 14:06:25 +0800
Subject: ACPI: make acpi_create_platform_device() an external API

Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 drivers/acpi/acpi_platform.c | 1 +
 drivers/acpi/internal.h      | 7 -------
 include/linux/acpi.h         | 1 +
 3 files changed, 2 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/acpi_platform.c b/drivers/acpi/acpi_platform.c
index 2bf9082f7523..a3c89a1bcf54 100644
--- a/drivers/acpi/acpi_platform.c
+++ b/drivers/acpi/acpi_platform.c
@@ -113,3 +113,4 @@ struct platform_device *acpi_create_platform_device(struct acpi_device *adev)
 	kfree(resources);
 	return pdev;
 }
+EXPORT_SYMBOL_GPL(acpi_create_platform_device);
diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index de47f9f746c9..f221d1eb594a 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -168,13 +168,6 @@ static inline int suspend_nvs_save(void) { return 0; }
 static inline void suspend_nvs_restore(void) {}
 #endif
 
-/*--------------------------------------------------------------------------
-				Platform bus support
-  -------------------------------------------------------------------------- */
-struct platform_device;
-
-struct platform_device *acpi_create_platform_device(struct acpi_device *adev);
-
 /*--------------------------------------------------------------------------
 					Video
   -------------------------------------------------------------------------- */
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 807cbc46d73e..2c24c2c1be45 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -432,6 +432,7 @@ static inline bool acpi_driver_match_device(struct device *dev,
 int acpi_device_uevent_modalias(struct device *, struct kobj_uevent_env *);
 int acpi_device_modalias(struct device *, char *, int);
 
+struct platform_device *acpi_create_platform_device(struct acpi_device *);
 #define ACPI_PTR(_ptr)	(_ptr)
 
 #else	/* !CONFIG_ACPI */
-- 
cgit v1.2.3


From 7b83fd9d91a411158f72d36958103c708c3b5a86 Mon Sep 17 00:00:00 2001
From: Aaron Lu <aaron.lu@intel.com>
Date: Tue, 25 Mar 2014 10:40:09 +0800
Subject: Thermal: move the KELVIN_TO_MILLICELSIUS macro to thermal.h

This macro can be used by other component so move it to a common header,
but in a slightly different way: define two macros, one macro with an
offset and the other doesn't.

Signed-off-by: Aaron Lu <aaron.lu@intel.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 drivers/acpi/thermal.c  | 18 +++++++++---------
 include/linux/thermal.h |  2 ++
 2 files changed, 11 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c
index 112817e963e0..d24fa1964eb8 100644
--- a/drivers/acpi/thermal.c
+++ b/drivers/acpi/thermal.c
@@ -528,7 +528,6 @@ static void acpi_thermal_check(void *data)
 }
 
 /* sys I/F for generic thermal sysfs support */
-#define KELVIN_TO_MILLICELSIUS(t, off) (((t) - (off)) * 100)
 
 static int thermal_get_temp(struct thermal_zone_device *thermal,
 			    unsigned long *temp)
@@ -543,7 +542,8 @@ static int thermal_get_temp(struct thermal_zone_device *thermal,
 	if (result)
 		return result;
 
-	*temp = KELVIN_TO_MILLICELSIUS(tz->temperature, tz->kelvin_offset);
+	*temp = DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET(tz->temperature,
+							tz->kelvin_offset);
 	return 0;
 }
 
@@ -647,7 +647,7 @@ static int thermal_get_trip_temp(struct thermal_zone_device *thermal,
 
 	if (tz->trips.critical.flags.valid) {
 		if (!trip) {
-			*temp = KELVIN_TO_MILLICELSIUS(
+			*temp = DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET(
 				tz->trips.critical.temperature,
 				tz->kelvin_offset);
 			return 0;
@@ -657,7 +657,7 @@ static int thermal_get_trip_temp(struct thermal_zone_device *thermal,
 
 	if (tz->trips.hot.flags.valid) {
 		if (!trip) {
-			*temp = KELVIN_TO_MILLICELSIUS(
+			*temp = DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET(
 				tz->trips.hot.temperature,
 				tz->kelvin_offset);
 			return 0;
@@ -667,7 +667,7 @@ static int thermal_get_trip_temp(struct thermal_zone_device *thermal,
 
 	if (tz->trips.passive.flags.valid) {
 		if (!trip) {
-			*temp = KELVIN_TO_MILLICELSIUS(
+			*temp = DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET(
 				tz->trips.passive.temperature,
 				tz->kelvin_offset);
 			return 0;
@@ -678,7 +678,7 @@ static int thermal_get_trip_temp(struct thermal_zone_device *thermal,
 	for (i = 0; i < ACPI_THERMAL_MAX_ACTIVE &&
 		tz->trips.active[i].flags.valid; i++) {
 		if (!trip) {
-			*temp = KELVIN_TO_MILLICELSIUS(
+			*temp = DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET(
 				tz->trips.active[i].temperature,
 				tz->kelvin_offset);
 			return 0;
@@ -694,7 +694,7 @@ static int thermal_get_crit_temp(struct thermal_zone_device *thermal,
 	struct acpi_thermal *tz = thermal->devdata;
 
 	if (tz->trips.critical.flags.valid) {
-		*temperature = KELVIN_TO_MILLICELSIUS(
+		*temperature = DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET(
 				tz->trips.critical.temperature,
 				tz->kelvin_offset);
 		return 0;
@@ -714,8 +714,8 @@ static int thermal_get_trend(struct thermal_zone_device *thermal,
 
 	if (type == THERMAL_TRIP_ACTIVE) {
 		unsigned long trip_temp;
-		unsigned long temp = KELVIN_TO_MILLICELSIUS(tz->temperature,
-							tz->kelvin_offset);
+		unsigned long temp = DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET(
+					tz->temperature, tz->kelvin_offset);
 		if (thermal_get_trip_temp(thermal, trip, &trip_temp))
 			return -EINVAL;
 
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 0305cde21a74..79ce6b94884a 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -44,6 +44,8 @@
 #define KELVIN_TO_CELSIUS(t)	(long)(((long)t-2732 >= 0) ?	\
 				((long)t-2732+5)/10 : ((long)t-2732-5)/10)
 #define CELSIUS_TO_KELVIN(t)	((t)*10+2732)
+#define DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET(t, off) (((t) - (off)) * 100)
+#define DECI_KELVIN_TO_MILLICELSIUS(t) DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET(t, 2732)
 
 /* Adding event notification support elements */
 #define THERMAL_GENL_FAMILY_NAME                "thermal_event"
-- 
cgit v1.2.3


From 77e337c6e23e3b9d22e09ffec202a80f755a54c2 Mon Sep 17 00:00:00 2001
From: Aaron Lu <aaron.lu@intel.com>
Date: Wed, 3 Sep 2014 15:13:02 +0800
Subject: Thermal: introduce INT3402 thermal driver

ACPI INT3402 device object could report temperature for the memory module.
To expose such information to user space, a thermal zone device is registered
for it so that the thermal sysfs interface can expose such information for
userspace to use.

Signed-off-by: Aaron Lu <aaron.lu@intel.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 drivers/thermal/int340x_thermal/Makefile          |   1 +
 drivers/thermal/int340x_thermal/int3402_thermal.c | 242 ++++++++++++++++++++++
 include/linux/thermal.h                           |   2 +
 3 files changed, 245 insertions(+)
 create mode 100644 drivers/thermal/int340x_thermal/int3402_thermal.c

(limited to 'include/linux')

diff --git a/drivers/thermal/int340x_thermal/Makefile b/drivers/thermal/int340x_thermal/Makefile
index e10a53bcefe7..67c98fd24200 100644
--- a/drivers/thermal/int340x_thermal/Makefile
+++ b/drivers/thermal/int340x_thermal/Makefile
@@ -1 +1,2 @@
 obj-$(CONFIG_INT340X_THERMAL)	+= int3400_thermal.o
+obj-$(CONFIG_INT340X_THERMAL)	+= int3402_thermal.o
diff --git a/drivers/thermal/int340x_thermal/int3402_thermal.c b/drivers/thermal/int340x_thermal/int3402_thermal.c
new file mode 100644
index 000000000000..a5d08c14ba24
--- /dev/null
+++ b/drivers/thermal/int340x_thermal/int3402_thermal.c
@@ -0,0 +1,242 @@
+/*
+ * INT3402 thermal driver for memory temperature reporting
+ *
+ * Copyright (C) 2014, Intel Corporation
+ * Authors: Aaron Lu <aaron.lu@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/acpi.h>
+#include <linux/thermal.h>
+
+#define ACPI_ACTIVE_COOLING_MAX_NR 10
+
+struct active_trip {
+	unsigned long temp;
+	int id;
+	bool valid;
+};
+
+struct int3402_thermal_data {
+	unsigned long *aux_trips;
+	int aux_trip_nr;
+	unsigned long psv_temp;
+	int psv_trip_id;
+	unsigned long crt_temp;
+	int crt_trip_id;
+	unsigned long hot_temp;
+	int hot_trip_id;
+	struct active_trip act_trips[ACPI_ACTIVE_COOLING_MAX_NR];
+	acpi_handle *handle;
+};
+
+static int int3402_thermal_get_zone_temp(struct thermal_zone_device *zone,
+					 unsigned long *temp)
+{
+	struct int3402_thermal_data *d = zone->devdata;
+	unsigned long long tmp;
+	acpi_status status;
+
+	status = acpi_evaluate_integer(d->handle, "_TMP", NULL, &tmp);
+	if (ACPI_FAILURE(status))
+		return -ENODEV;
+
+	/* _TMP returns the temperature in tenths of degrees Kelvin */
+	*temp = DECI_KELVIN_TO_MILLICELSIUS(tmp);
+
+	return 0;
+}
+
+static int int3402_thermal_get_trip_temp(struct thermal_zone_device *zone,
+					 int trip, unsigned long *temp)
+{
+	struct int3402_thermal_data *d = zone->devdata;
+	int i;
+
+	if (trip < d->aux_trip_nr)
+		*temp = d->aux_trips[trip];
+	else if (trip == d->crt_trip_id)
+		*temp = d->crt_temp;
+	else if (trip == d->psv_trip_id)
+		*temp = d->psv_temp;
+	else if (trip == d->hot_trip_id)
+		*temp = d->hot_temp;
+	else {
+		for (i = 0; i < ACPI_ACTIVE_COOLING_MAX_NR; i++) {
+			if (d->act_trips[i].valid &&
+			    d->act_trips[i].id == trip) {
+				*temp = d->act_trips[i].temp;
+				break;
+			}
+		}
+		if (i == ACPI_ACTIVE_COOLING_MAX_NR)
+			return -EINVAL;
+	}
+	return 0;
+}
+
+static int int3402_thermal_get_trip_type(struct thermal_zone_device *zone,
+					 int trip, enum thermal_trip_type *type)
+{
+	struct int3402_thermal_data *d = zone->devdata;
+	int i;
+
+	if (trip < d->aux_trip_nr)
+		*type = THERMAL_TRIP_PASSIVE;
+	else if (trip == d->crt_trip_id)
+		*type = THERMAL_TRIP_CRITICAL;
+	else if (trip == d->hot_trip_id)
+		*type = THERMAL_TRIP_HOT;
+	else if (trip == d->psv_trip_id)
+		*type = THERMAL_TRIP_PASSIVE;
+	else {
+		for (i = 0; i < ACPI_ACTIVE_COOLING_MAX_NR; i++) {
+			if (d->act_trips[i].valid &&
+			    d->act_trips[i].id == trip) {
+				*type = THERMAL_TRIP_ACTIVE;
+				break;
+			}
+		}
+		if (i == ACPI_ACTIVE_COOLING_MAX_NR)
+			return -EINVAL;
+	}
+	return 0;
+}
+
+static int int3402_thermal_set_trip_temp(struct thermal_zone_device *zone, int trip,
+				  unsigned long temp)
+{
+	struct int3402_thermal_data *d = zone->devdata;
+	acpi_status status;
+	char name[10];
+
+	snprintf(name, sizeof(name), "PAT%d", trip);
+	status = acpi_execute_simple_method(d->handle, name,
+			MILLICELSIUS_TO_DECI_KELVIN(temp));
+	if (ACPI_FAILURE(status))
+		return -EIO;
+
+	d->aux_trips[trip] = temp;
+	return 0;
+}
+
+static struct thermal_zone_device_ops int3402_thermal_zone_ops = {
+	.get_temp       = int3402_thermal_get_zone_temp,
+	.get_trip_temp	= int3402_thermal_get_trip_temp,
+	.get_trip_type	= int3402_thermal_get_trip_type,
+	.set_trip_temp	= int3402_thermal_set_trip_temp,
+};
+
+static struct thermal_zone_params int3402_thermal_params = {
+	.governor_name = "user_space",
+	.no_hwmon = true,
+};
+
+static int int3402_thermal_get_temp(acpi_handle handle, char *name,
+				    unsigned long *temp)
+{
+	unsigned long long r;
+	acpi_status status;
+
+	status = acpi_evaluate_integer(handle, name, NULL, &r);
+	if (ACPI_FAILURE(status))
+		return -EIO;
+
+	*temp = DECI_KELVIN_TO_MILLICELSIUS(r);
+	return 0;
+}
+
+static int int3402_thermal_probe(struct platform_device *pdev)
+{
+	struct acpi_device *adev = ACPI_COMPANION(&pdev->dev);
+	struct int3402_thermal_data *d;
+	struct thermal_zone_device *zone;
+	acpi_status status;
+	unsigned long long trip_cnt;
+	int trip_mask = 0, i;
+
+	if (!acpi_has_method(adev->handle, "_TMP"))
+		return -ENODEV;
+
+	d = devm_kzalloc(&pdev->dev, sizeof(*d), GFP_KERNEL);
+	if (!d)
+		return -ENOMEM;
+
+	status = acpi_evaluate_integer(adev->handle, "PATC", NULL, &trip_cnt);
+	if (ACPI_FAILURE(status))
+		trip_cnt = 0;
+	else {
+		d->aux_trips = devm_kzalloc(&pdev->dev,
+				sizeof(*d->aux_trips) * trip_cnt, GFP_KERNEL);
+		if (!d->aux_trips)
+			return -ENOMEM;
+		trip_mask = trip_cnt - 1;
+		d->handle = adev->handle;
+		d->aux_trip_nr = trip_cnt;
+	}
+
+	d->crt_trip_id = -1;
+	if (!int3402_thermal_get_temp(adev->handle, "_CRT", &d->crt_temp))
+		d->crt_trip_id = trip_cnt++;
+	d->hot_trip_id = -1;
+	if (!int3402_thermal_get_temp(adev->handle, "_HOT", &d->hot_temp))
+		d->hot_trip_id = trip_cnt++;
+	d->psv_trip_id = -1;
+	if (!int3402_thermal_get_temp(adev->handle, "_PSV", &d->psv_temp))
+		d->psv_trip_id = trip_cnt++;
+	for (i = 0; i < ACPI_ACTIVE_COOLING_MAX_NR; i++) {
+		char name[5] = { '_', 'A', 'C', '0' + i, '\0' };
+		if (int3402_thermal_get_temp(adev->handle, name,
+					     &d->act_trips[i].temp))
+			break;
+		d->act_trips[i].id = trip_cnt++;
+		d->act_trips[i].valid = true;
+	}
+
+	zone = thermal_zone_device_register(acpi_device_bid(adev), trip_cnt,
+					    trip_mask, d,
+					    &int3402_thermal_zone_ops,
+					    &int3402_thermal_params,
+					    0, 0);
+	if (IS_ERR(zone))
+		return PTR_ERR(zone);
+	platform_set_drvdata(pdev, zone);
+
+	return 0;
+}
+
+static int int3402_thermal_remove(struct platform_device *pdev)
+{
+	struct thermal_zone_device *zone = platform_get_drvdata(pdev);
+
+	thermal_zone_device_unregister(zone);
+	return 0;
+}
+
+static const struct acpi_device_id int3402_thermal_match[] = {
+	{"INT3402", 0},
+	{}
+};
+
+MODULE_DEVICE_TABLE(acpi, int3402_thermal_match);
+
+static struct platform_driver int3402_thermal_driver = {
+	.probe = int3402_thermal_probe,
+	.remove = int3402_thermal_remove,
+	.driver = {
+		   .name = "int3402 thermal",
+		   .owner = THIS_MODULE,
+		   .acpi_match_table = int3402_thermal_match,
+		   },
+};
+
+module_platform_driver(int3402_thermal_driver);
+
+MODULE_DESCRIPTION("INT3402 Thermal driver");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 79ce6b94884a..ef90838b36a0 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -46,6 +46,8 @@
 #define CELSIUS_TO_KELVIN(t)	((t)*10+2732)
 #define DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET(t, off) (((t) - (off)) * 100)
 #define DECI_KELVIN_TO_MILLICELSIUS(t) DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET(t, 2732)
+#define MILLICELSIUS_TO_DECI_KELVIN_WITH_OFFSET(t, off) (((t) / 100) + (off))
+#define MILLICELSIUS_TO_DECI_KELVIN(t) MILLICELSIUS_TO_DECI_KELVIN_WITH_OFFSET(t, 2732)
 
 /* Adding event notification support elements */
 #define THERMAL_GENL_FAMILY_NAME                "thermal_event"
-- 
cgit v1.2.3


From 1fadee0c364572f2b2e098b34001fbaa82ee2e00 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Fri, 10 Oct 2014 09:48:05 +0200
Subject: net/phy: micrel: Add clock support for KSZ8021/KSZ8031

The KSZ8021 and KSZ8031 support RMII reference input clocks of 25MHz
and 50MHz. Both PHYs differ in the default frequency they expect
after reset. If this differs from the actual input clock, then
register 0x1f bit 7 must be changed.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/micrel.txt |  6 +++++
 drivers/net/phy/micrel.c                         | 31 ++++++++++++++++++++++--
 include/linux/micrel_phy.h                       |  1 +
 3 files changed, 36 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/net/micrel.txt b/Documentation/devicetree/bindings/net/micrel.txt
index 98a3e61f9ee8..e1d99b95c4ec 100644
--- a/Documentation/devicetree/bindings/net/micrel.txt
+++ b/Documentation/devicetree/bindings/net/micrel.txt
@@ -16,3 +16,9 @@ Optional properties:
 	      KSZ8051: register 0x1f, bits 5..4
 
               See the respective PHY datasheet for the mode values.
+
+ - clocks, clock-names: contains clocks according to the common clock bindings.
+
+              supported clocks:
+	      - KSZ8021, KSZ8031: "rmii-ref": The RMII refence input clock. Used
+		to determine the XI input clock.
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 011dbda2b2f1..492435fce1d4 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -26,6 +26,7 @@
 #include <linux/phy.h>
 #include <linux/micrel_phy.h>
 #include <linux/of.h>
+#include <linux/clk.h>
 
 /* Operation Mode Strap Override */
 #define MII_KSZPHY_OMSO				0x16
@@ -72,9 +73,12 @@ static int ksz_config_flags(struct phy_device *phydev)
 {
 	int regval;
 
-	if (phydev->dev_flags & MICREL_PHY_50MHZ_CLK) {
+	if (phydev->dev_flags & (MICREL_PHY_50MHZ_CLK | MICREL_PHY_25MHZ_CLK)) {
 		regval = phy_read(phydev, MII_KSZPHY_CTRL);
-		regval |= KSZ8051_RMII_50MHZ_CLK;
+		if (phydev->dev_flags & MICREL_PHY_50MHZ_CLK)
+			regval |= KSZ8051_RMII_50MHZ_CLK;
+		else
+			regval &= ~KSZ8051_RMII_50MHZ_CLK;
 		return phy_write(phydev, MII_KSZPHY_CTRL, regval);
 	}
 	return 0;
@@ -440,6 +444,27 @@ ksz9021_wr_mmd_phyreg(struct phy_device *phydev, int ptrad, int devnum,
 {
 }
 
+static int ksz8021_probe(struct phy_device *phydev)
+{
+	struct clk *clk;
+
+	clk = devm_clk_get(&phydev->dev, "rmii-ref");
+	if (!IS_ERR(clk)) {
+		unsigned long rate = clk_get_rate(clk);
+
+		if (rate > 24500000 && rate < 25500000) {
+			phydev->dev_flags |= MICREL_PHY_25MHZ_CLK;
+		} else if (rate > 49500000 && rate < 50500000) {
+			phydev->dev_flags |= MICREL_PHY_50MHZ_CLK;
+		} else {
+			dev_err(&phydev->dev, "Clock rate out of range: %ld\n", rate);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
 static struct phy_driver ksphy_driver[] = {
 {
 	.phy_id		= PHY_ID_KS8737,
@@ -462,6 +487,7 @@ static struct phy_driver ksphy_driver[] = {
 	.features	= (PHY_BASIC_FEATURES | SUPPORTED_Pause |
 			   SUPPORTED_Asym_Pause),
 	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
+	.probe		= ksz8021_probe,
 	.config_init	= ksz8021_config_init,
 	.config_aneg	= genphy_config_aneg,
 	.read_status	= genphy_read_status,
@@ -477,6 +503,7 @@ static struct phy_driver ksphy_driver[] = {
 	.features	= (PHY_BASIC_FEATURES | SUPPORTED_Pause |
 			   SUPPORTED_Asym_Pause),
 	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
+	.probe		= ksz8021_probe,
 	.config_init	= ksz8021_config_init,
 	.config_aneg	= genphy_config_aneg,
 	.read_status	= genphy_read_status,
diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h
index 2e5b194b9b19..53d33dee70e1 100644
--- a/include/linux/micrel_phy.h
+++ b/include/linux/micrel_phy.h
@@ -37,6 +37,7 @@
 
 /* struct phy_device dev_flags definitions */
 #define MICREL_PHY_50MHZ_CLK	0x00000001
+#define MICREL_PHY_25MHZ_CLK	0x00000002
 
 #define MICREL_KSZ9021_EXTREG_CTRL	0xB
 #define MICREL_KSZ9021_EXTREG_DATA_WRITE	0xC
-- 
cgit v1.2.3


From 810bb172671aec17cf85cc748120cf73c17af372 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 12 Oct 2014 12:45:37 -0400
Subject: take dname_external() into fs/dcache.c

never used outside and it's too low-level for legitimate uses outside
of fs/dcache.c anyway

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c            | 5 +++++
 include/linux/dcache.h | 5 -----
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dcache.c b/fs/dcache.c
index 8221faae0bef..d5a23fd0da90 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -264,6 +264,11 @@ static void __d_free_external(struct rcu_head *head)
 	kmem_cache_free(dentry_cache, dentry); 
 }
 
+static inline int dname_external(const struct dentry *dentry)
+{
+	return dentry->d_name.name != dentry->d_iname;
+}
+
 static void dentry_free(struct dentry *dentry)
 {
 	if (unlikely(dname_external(dentry))) {
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 81b03150f39a..27a7f00bc89e 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -226,11 +226,6 @@ struct dentry_operations {
 
 extern seqlock_t rename_lock;
 
-static inline int dname_external(const struct dentry *dentry)
-{
-	return dentry->d_name.name != dentry->d_iname;
-}
-
 /*
  * These are the low-level FS interfaces to the dcache..
  */
-- 
cgit v1.2.3


From 7b600f2abb36909e70963cc7c744c15983500bee Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 12 Oct 2014 13:31:58 -0400
Subject: don't need that forward declaration of struct nameidata in dcache.h
 anymore

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/dcache.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 27a7f00bc89e..b2a2a08523bf 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -11,7 +11,6 @@
 #include <linux/rcupdate.h>
 #include <linux/lockref.h>
 
-struct nameidata;
 struct path;
 struct vfsmount;
 
-- 
cgit v1.2.3


From 174e964ec224c3c591b83a6b5f0984d905d3678f Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Thu, 9 Oct 2014 12:43:27 -0700
Subject: regulator: Include err.h from consumer.h to fix build failure

sh:sh2007_defconfig fails to build with the following error:

In file included from include/linux/regulator/machine.h:18:0,
                 from arch/sh/boards/board-sh2007.c:10:
include/linux/regulator/consumer.h: In function 'regulator_get_optional':
include/linux/regulator/consumer.h:271:2:
		error: implicit declaration of function 'ERR_PTR'
include/linux/err.h: At top level:
include/linux/err.h:23:35: error: conflicting types for 'ERR_PTR'
include/linux/regulator/consumer.h:271:9:
		note: previous implicit declaration of 'ERR_PTR' was here

Since consumer.h uses ERR_PTR, it should include err.h.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/regulator/consumer.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h
index d347c805f923..f540b1496e2f 100644
--- a/include/linux/regulator/consumer.h
+++ b/include/linux/regulator/consumer.h
@@ -35,6 +35,8 @@
 #ifndef __LINUX_REGULATOR_CONSUMER_H_
 #define __LINUX_REGULATOR_CONSUMER_H_
 
+#include <linux/err.h>
+
 struct device;
 struct notifier_block;
 struct regmap;
-- 
cgit v1.2.3


From 71458cfc782eafe4b27656e078d379a34e472adf Mon Sep 17 00:00:00 2001
From: Sasha Levin <sasha.levin@oracle.com>
Date: Mon, 13 Oct 2014 15:51:05 -0700
Subject: kernel: add support for gcc 5

We're missing include/linux/compiler-gcc5.h which is required now
because gcc branched off to v5 in trunk.

Just copy the relevant bits out of include/linux/compiler-gcc4.h,
no new code is added as of now.

This fixes a build error when using gcc 5.

Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compiler-gcc5.h | 66 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 66 insertions(+)
 create mode 100644 include/linux/compiler-gcc5.h

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc5.h b/include/linux/compiler-gcc5.h
new file mode 100644
index 000000000000..cdd1cc202d51
--- /dev/null
+++ b/include/linux/compiler-gcc5.h
@@ -0,0 +1,66 @@
+#ifndef __LINUX_COMPILER_H
+#error "Please don't include <linux/compiler-gcc5.h> directly, include <linux/compiler.h> instead."
+#endif
+
+#define __used				__attribute__((__used__))
+#define __must_check			__attribute__((warn_unused_result))
+#define __compiler_offsetof(a, b)	__builtin_offsetof(a, b)
+
+/* Mark functions as cold. gcc will assume any path leading to a call
+   to them will be unlikely.  This means a lot of manual unlikely()s
+   are unnecessary now for any paths leading to the usual suspects
+   like BUG(), printk(), panic() etc. [but let's keep them for now for
+   older compilers]
+
+   Early snapshots of gcc 4.3 don't support this and we can't detect this
+   in the preprocessor, but we can live with this because they're unreleased.
+   Maketime probing would be overkill here.
+
+   gcc also has a __attribute__((__hot__)) to move hot functions into
+   a special section, but I don't see any sense in this right now in
+   the kernel context */
+#define __cold			__attribute__((__cold__))
+
+#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__)
+
+#ifndef __CHECKER__
+# define __compiletime_warning(message) __attribute__((warning(message)))
+# define __compiletime_error(message) __attribute__((error(message)))
+#endif /* __CHECKER__ */
+
+/*
+ * Mark a position in code as unreachable.  This can be used to
+ * suppress control flow warnings after asm blocks that transfer
+ * control elsewhere.
+ *
+ * Early snapshots of gcc 4.5 don't support this and we can't detect
+ * this in the preprocessor, but we can live with this because they're
+ * unreleased.  Really, we need to have autoconf for the kernel.
+ */
+#define unreachable() __builtin_unreachable()
+
+/* Mark a function definition as prohibited from being cloned. */
+#define __noclone	__attribute__((__noclone__))
+
+/*
+ * Tell the optimizer that something else uses this function or variable.
+ */
+#define __visible __attribute__((externally_visible))
+
+/*
+ * GCC 'asm goto' miscompiles certain code sequences:
+ *
+ *   http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670
+ *
+ * Work it around via a compiler barrier quirk suggested by Jakub Jelinek.
+ * Fixed in GCC 4.8.2 and later versions.
+ *
+ * (asm goto is automatically volatile - the naming reflects this.)
+ */
+#define asm_volatile_goto(x...)	do { asm goto(x); asm (""); } while (0)
+
+#ifdef CONFIG_ARCH_USE_BUILTIN_BSWAP
+#define __HAVE_BUILTIN_BSWAP32__
+#define __HAVE_BUILTIN_BSWAP64__
+#define __HAVE_BUILTIN_BSWAP16__
+#endif /* CONFIG_ARCH_USE_BUILTIN_BSWAP */
-- 
cgit v1.2.3


From de9e14eebf33a60712a52a0bc6e08c043c0aba53 Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Mon, 13 Oct 2014 15:51:09 -0700
Subject: drivers: dma-contiguous: add initialization from device tree

Add a function to create CMA region from previously reserved memory and
add support for handling 'shared-dma-pool' reserved-memory device tree
nodes.

Based on previous code provided by Josh Cartwright <joshc@codeaurora.org>

Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Michal Nazarewicz <mina86@mina86.com>
Cc: Grant Likely <grant.likely@linaro.org>
Cc: Laura Abbott <lauraa@codeaurora.org>
Cc: Josh Cartwright <joshc@codeaurora.org>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/base/dma-contiguous.c | 66 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/cma.h           |  3 ++
 mm/cma.c                      | 62 ++++++++++++++++++++++++++++++++--------
 3 files changed, 120 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/dma-contiguous.c b/drivers/base/dma-contiguous.c
index 6606abdf880c..473ff4892401 100644
--- a/drivers/base/dma-contiguous.c
+++ b/drivers/base/dma-contiguous.c
@@ -211,3 +211,69 @@ bool dma_release_from_contiguous(struct device *dev, struct page *pages,
 {
 	return cma_release(dev_get_cma_area(dev), pages, count);
 }
+
+/*
+ * Support for reserved memory regions defined in device tree
+ */
+#ifdef CONFIG_OF_RESERVED_MEM
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/of_reserved_mem.h>
+
+#undef pr_fmt
+#define pr_fmt(fmt) fmt
+
+static void rmem_cma_device_init(struct reserved_mem *rmem, struct device *dev)
+{
+	dev_set_cma_area(dev, rmem->priv);
+}
+
+static void rmem_cma_device_release(struct reserved_mem *rmem,
+				    struct device *dev)
+{
+	dev_set_cma_area(dev, NULL);
+}
+
+static const struct reserved_mem_ops rmem_cma_ops = {
+	.device_init	= rmem_cma_device_init,
+	.device_release = rmem_cma_device_release,
+};
+
+static int __init rmem_cma_setup(struct reserved_mem *rmem)
+{
+	phys_addr_t align = PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order);
+	phys_addr_t mask = align - 1;
+	unsigned long node = rmem->fdt_node;
+	struct cma *cma;
+	int err;
+
+	if (!of_get_flat_dt_prop(node, "reusable", NULL) ||
+	    of_get_flat_dt_prop(node, "no-map", NULL))
+		return -EINVAL;
+
+	if ((rmem->base & mask) || (rmem->size & mask)) {
+		pr_err("Reserved memory: incorrect alignment of CMA region\n");
+		return -EINVAL;
+	}
+
+	err = cma_init_reserved_mem(rmem->base, rmem->size, 0, &cma);
+	if (err) {
+		pr_err("Reserved memory: unable to setup CMA region\n");
+		return err;
+	}
+	/* Architecture specific contiguous memory fixup. */
+	dma_contiguous_early_fixup(rmem->base, rmem->size);
+
+	if (of_get_flat_dt_prop(node, "linux,cma-default", NULL))
+		dma_contiguous_set_default(cma);
+
+	rmem->ops = &rmem_cma_ops;
+	rmem->priv = cma;
+
+	pr_info("Reserved memory: created CMA memory pool at %pa, size %ld MiB\n",
+		&rmem->base, (unsigned long)rmem->size / SZ_1M);
+
+	return 0;
+}
+RESERVEDMEM_OF_DECLARE(cma, "shared-dma-pool", rmem_cma_setup);
+#endif
diff --git a/include/linux/cma.h b/include/linux/cma.h
index 371b93042520..0430ed05d3b9 100644
--- a/include/linux/cma.h
+++ b/include/linux/cma.h
@@ -22,6 +22,9 @@ extern int __init cma_declare_contiguous(phys_addr_t size,
 			phys_addr_t base, phys_addr_t limit,
 			phys_addr_t alignment, unsigned int order_per_bit,
 			bool fixed, struct cma **res_cma);
+extern int cma_init_reserved_mem(phys_addr_t size,
+					phys_addr_t base, int order_per_bit,
+					struct cma **res_cma);
 extern struct page *cma_alloc(struct cma *cma, int count, unsigned int align);
 extern bool cma_release(struct cma *cma, struct page *pages, int count);
 #endif
diff --git a/mm/cma.c b/mm/cma.c
index a951a3b3ed36..963bc4add9af 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -142,6 +142,54 @@ static int __init cma_init_reserved_areas(void)
 }
 core_initcall(cma_init_reserved_areas);
 
+/**
+ * cma_init_reserved_mem() - create custom contiguous area from reserved memory
+ * @base: Base address of the reserved area
+ * @size: Size of the reserved area (in bytes),
+ * @order_per_bit: Order of pages represented by one bit on bitmap.
+ * @res_cma: Pointer to store the created cma region.
+ *
+ * This function creates custom contiguous area from already reserved memory.
+ */
+int __init cma_init_reserved_mem(phys_addr_t base, phys_addr_t size,
+				 int order_per_bit, struct cma **res_cma)
+{
+	struct cma *cma;
+	phys_addr_t alignment;
+
+	/* Sanity checks */
+	if (cma_area_count == ARRAY_SIZE(cma_areas)) {
+		pr_err("Not enough slots for CMA reserved regions!\n");
+		return -ENOSPC;
+	}
+
+	if (!size || !memblock_is_region_reserved(base, size))
+		return -EINVAL;
+
+	/* ensure minimal alignment requied by mm core */
+	alignment = PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order);
+
+	/* alignment should be aligned with order_per_bit */
+	if (!IS_ALIGNED(alignment >> PAGE_SHIFT, 1 << order_per_bit))
+		return -EINVAL;
+
+	if (ALIGN(base, alignment) != base || ALIGN(size, alignment) != size)
+		return -EINVAL;
+
+	/*
+	 * Each reserved area must be initialised later, when more kernel
+	 * subsystems (like slab allocator) are available.
+	 */
+	cma = &cma_areas[cma_area_count];
+	cma->base_pfn = PFN_DOWN(base);
+	cma->count = size >> PAGE_SHIFT;
+	cma->order_per_bit = order_per_bit;
+	*res_cma = cma;
+	cma_area_count++;
+
+	return 0;
+}
+
 /**
  * cma_declare_contiguous() - reserve custom contiguous area
  * @base: Base address of the reserved area optional, use 0 for any
@@ -165,7 +213,6 @@ int __init cma_declare_contiguous(phys_addr_t base,
 			phys_addr_t alignment, unsigned int order_per_bit,
 			bool fixed, struct cma **res_cma)
 {
-	struct cma *cma;
 	phys_addr_t memblock_end = memblock_end_of_DRAM();
 	phys_addr_t highmem_start = __pa(high_memory);
 	int ret = 0;
@@ -237,16 +284,9 @@ int __init cma_declare_contiguous(phys_addr_t base,
 		}
 	}
 
-	/*
-	 * Each reserved area must be initialised later, when more kernel
-	 * subsystems (like slab allocator) are available.
-	 */
-	cma = &cma_areas[cma_area_count];
-	cma->base_pfn = PFN_DOWN(base);
-	cma->count = size >> PAGE_SHIFT;
-	cma->order_per_bit = order_per_bit;
-	*res_cma = cma;
-	cma_area_count++;
+	ret = cma_init_reserved_mem(base, size, order_per_bit, res_cma);
+	if (ret)
+		goto err;
 
 	pr_info("Reserved %ld MiB at %08lx\n", (unsigned long)size / SZ_1M,
 		(unsigned long)base);
-- 
cgit v1.2.3


From 8b21d9ca17ff8ed0dbf650f4162ee2d59bb5a881 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.m@jp.panasonic.com>
Date: Mon, 13 Oct 2014 15:51:30 -0700
Subject: list: include linux/kernel.h

linux/list.h uses container_of, therefore it depends on linux/kernel.h.

Signed-off-by: Masahiro Yamada <yamada.m@jp.panasonic.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/list.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/list.h b/include/linux/list.h
index cbbb96fcead9..f33f831eb3c8 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -5,6 +5,7 @@
 #include <linux/stddef.h>
 #include <linux/poison.h>
 #include <linux/const.h>
+#include <linux/kernel.h>
 
 /*
  * Simple doubly linked list implementation.
-- 
cgit v1.2.3


From 6de8ab68bc30da75116209d818c75497bdaed09d Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Mon, 13 Oct 2014 15:51:36 -0700
Subject: lib: remove prio_heap

The prio_heap code is unused since commit 889ed9ceaa97 ("cgroup: remove
css_scan_tasks()").  It should be compiled out to shrink the binary
kernel size which can be done via introducing CONFIG_PRIO_HEAD or by
removing the code.

We can simply recover the code from git when needed, so it would be
better to remove it IMO.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Acked-by: Tejun Heo <tj@kernel.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Ingo Molnar <mingo@kernel.org>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Francesco Fusco <ffusco@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: George Spelvin <linux@horizon.com>
Cc: Mark Salter <msalter@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/prio_heap.h | 58 ---------------------------------------
 lib/Makefile              |  2 +-
 lib/prio_heap.c           | 70 -----------------------------------------------
 3 files changed, 1 insertion(+), 129 deletions(-)
 delete mode 100644 include/linux/prio_heap.h
 delete mode 100644 lib/prio_heap.c

(limited to 'include/linux')

diff --git a/include/linux/prio_heap.h b/include/linux/prio_heap.h
deleted file mode 100644
index 08094350f26a..000000000000
--- a/include/linux/prio_heap.h
+++ /dev/null
@@ -1,58 +0,0 @@
-#ifndef _LINUX_PRIO_HEAP_H
-#define _LINUX_PRIO_HEAP_H
-
-/*
- * Simple insertion-only static-sized priority heap containing
- * pointers, based on CLR, chapter 7
- */
-
-#include <linux/gfp.h>
-
-/**
- * struct ptr_heap - simple static-sized priority heap
- * @ptrs - pointer to data area
- * @max - max number of elements that can be stored in @ptrs
- * @size - current number of valid elements in @ptrs (in the range 0..@size-1
- * @gt: comparison operator, which should implement "greater than"
- */
-struct ptr_heap {
-	void **ptrs;
-	int max;
-	int size;
-	int (*gt)(void *, void *);
-};
-
-/**
- * heap_init - initialize an empty heap with a given memory size
- * @heap: the heap structure to be initialized
- * @size: amount of memory to use in bytes
- * @gfp_mask: mask to pass to kmalloc()
- * @gt: comparison operator, which should implement "greater than"
- */
-extern int heap_init(struct ptr_heap *heap, size_t size, gfp_t gfp_mask,
-		     int (*gt)(void *, void *));
-
-/**
- * heap_free - release a heap's storage
- * @heap: the heap structure whose data should be released
- */
-void heap_free(struct ptr_heap *heap);
-
-/**
- * heap_insert - insert a value into the heap and return any overflowed value
- * @heap: the heap to be operated on
- * @p: the pointer to be inserted
- *
- * Attempts to insert the given value into the priority heap. If the
- * heap is full prior to the insertion, then the resulting heap will
- * consist of the smallest @max elements of the original heap and the
- * new element; the greatest element will be removed from the heap and
- * returned. Note that the returned element will be the new element
- * (i.e. no change to the heap) if the new element is greater than all
- * elements currently in the heap.
- */
-extern void *heap_insert(struct ptr_heap *heap, void *p);
-
-
-
-#endif /* _LINUX_PRIO_HEAP_H */
diff --git a/lib/Makefile b/lib/Makefile
index d6b4bc496408..eb95e0fdcca5 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -11,7 +11,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
 	 rbtree.o radix-tree.o dump_stack.o timerqueue.o\
 	 idr.o int_sqrt.o extable.o \
 	 sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \
-	 proportions.o flex_proportions.o prio_heap.o ratelimit.o show_mem.o \
+	 proportions.o flex_proportions.o ratelimit.o show_mem.o \
 	 is_single_threaded.o plist.o decompress.o kobject_uevent.o \
 	 earlycpio.o
 
diff --git a/lib/prio_heap.c b/lib/prio_heap.c
deleted file mode 100644
index a7af6f85eca8..000000000000
--- a/lib/prio_heap.c
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Simple insertion-only static-sized priority heap containing
- * pointers, based on CLR, chapter 7
- */
-
-#include <linux/slab.h>
-#include <linux/prio_heap.h>
-
-int heap_init(struct ptr_heap *heap, size_t size, gfp_t gfp_mask,
-	      int (*gt)(void *, void *))
-{
-	heap->ptrs = kmalloc(size, gfp_mask);
-	if (!heap->ptrs)
-		return -ENOMEM;
-	heap->size = 0;
-	heap->max = size / sizeof(void *);
-	heap->gt = gt;
-	return 0;
-}
-
-void heap_free(struct ptr_heap *heap)
-{
-	kfree(heap->ptrs);
-}
-
-void *heap_insert(struct ptr_heap *heap, void *p)
-{
-	void *res;
-	void **ptrs = heap->ptrs;
-	int pos;
-
-	if (heap->size < heap->max) {
-		/* Heap insertion */
-		pos = heap->size++;
-		while (pos > 0 && heap->gt(p, ptrs[(pos-1)/2])) {
-			ptrs[pos] = ptrs[(pos-1)/2];
-			pos = (pos-1)/2;
-		}
-		ptrs[pos] = p;
-		return NULL;
-	}
-
-	/* The heap is full, so something will have to be dropped */
-
-	/* If the new pointer is greater than the current max, drop it */
-	if (heap->gt(p, ptrs[0]))
-		return p;
-
-	/* Replace the current max and heapify */
-	res = ptrs[0];
-	ptrs[0] = p;
-	pos = 0;
-
-	while (1) {
-		int left = 2 * pos + 1;
-		int right = 2 * pos + 2;
-		int largest = pos;
-		if (left < heap->size && heap->gt(ptrs[left], p))
-			largest = left;
-		if (right < heap->size && heap->gt(ptrs[right], ptrs[largest]))
-			largest = right;
-		if (largest == pos)
-			break;
-		/* Push p down the heap one level and bump one up */
-		ptrs[pos] = ptrs[largest];
-		ptrs[largest] = p;
-		pos = largest;
-	}
-	return res;
-}
-- 
cgit v1.2.3


From 1c3bea0e71892ef9100c01d3799cdae8cac273ef Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Mon, 13 Oct 2014 15:53:33 -0700
Subject: signal: use BUILD_BUG() instead of _NSIG_WORDS_is_unsupported_size()

Kill _NSIG_WORDS_is_unsupported_size(), use BUILD_BUG() instead.  This
simplifies the code, avoids the nested-externs warnings, and this way we
do not defer the problem to linker.

Also, fix the indentation in _SIG_SET_BINOP() and _SIG_SET_OP().

Note: this patch assumes that the code like "if (0) BUILD_BUG();" is
valid.  If not (say __compiletime_error() is not defined and thus
__compiletime_error_fallback() uses a negative array) we should fix
BUILD_BUG() and/or BUILD_BUG_ON_MSG().  This code should be fine by
definition, this is the documented purpose of BUILD_BUG().

[sfr@canb.auug.org.au: fix powerpc build failures]
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reported-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/signal.h | 29 +++++++++++++----------------
 1 file changed, 13 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/signal.h b/include/linux/signal.h
index 750196fcc0a5..ab1e0392b5ac 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -2,6 +2,7 @@
 #define _LINUX_SIGNAL_H
 
 #include <linux/list.h>
+#include <linux/bug.h>
 #include <uapi/linux/signal.h>
 
 struct task_struct;
@@ -67,7 +68,6 @@ static inline int sigismember(sigset_t *set, int _sig)
 
 static inline int sigisemptyset(sigset_t *set)
 {
-	extern void _NSIG_WORDS_is_unsupported_size(void);
 	switch (_NSIG_WORDS) {
 	case 4:
 		return (set->sig[3] | set->sig[2] |
@@ -77,7 +77,7 @@ static inline int sigisemptyset(sigset_t *set)
 	case 1:
 		return set->sig[0] == 0;
 	default:
-		_NSIG_WORDS_is_unsupported_size();
+		BUILD_BUG();
 		return 0;
 	}
 }
@@ -90,24 +90,23 @@ static inline int sigisemptyset(sigset_t *set)
 #define _SIG_SET_BINOP(name, op)					\
 static inline void name(sigset_t *r, const sigset_t *a, const sigset_t *b) \
 {									\
-	extern void _NSIG_WORDS_is_unsupported_size(void);		\
 	unsigned long a0, a1, a2, a3, b0, b1, b2, b3;			\
 									\
 	switch (_NSIG_WORDS) {						\
-	    case 4:							\
+	case 4:								\
 		a3 = a->sig[3]; a2 = a->sig[2];				\
 		b3 = b->sig[3]; b2 = b->sig[2];				\
 		r->sig[3] = op(a3, b3);					\
 		r->sig[2] = op(a2, b2);					\
-	    case 2:							\
+	case 2:								\
 		a1 = a->sig[1]; b1 = b->sig[1];				\
 		r->sig[1] = op(a1, b1);					\
-	    case 1:							\
+	case 1:								\
 		a0 = a->sig[0]; b0 = b->sig[0];				\
 		r->sig[0] = op(a0, b0);					\
 		break;							\
-	    default:							\
-		_NSIG_WORDS_is_unsupported_size();			\
+	default:							\
+		BUILD_BUG();						\
 	}								\
 }
 
@@ -128,16 +127,14 @@ _SIG_SET_BINOP(sigandnsets, _sig_andn)
 #define _SIG_SET_OP(name, op)						\
 static inline void name(sigset_t *set)					\
 {									\
-	extern void _NSIG_WORDS_is_unsupported_size(void);		\
-									\
 	switch (_NSIG_WORDS) {						\
-	    case 4: set->sig[3] = op(set->sig[3]);			\
-		    set->sig[2] = op(set->sig[2]);			\
-	    case 2: set->sig[1] = op(set->sig[1]);			\
-	    case 1: set->sig[0] = op(set->sig[0]);			\
+	case 4:	set->sig[3] = op(set->sig[3]);				\
+		set->sig[2] = op(set->sig[2]);				\
+	case 2:	set->sig[1] = op(set->sig[1]);				\
+	case 1:	set->sig[0] = op(set->sig[0]);				\
 		    break;						\
-	    default:							\
-		_NSIG_WORDS_is_unsupported_size();			\
+	default:							\
+		BUILD_BUG();						\
 	}								\
 }
 
-- 
cgit v1.2.3


From 669280a152ce5144321c0e511498877383f34393 Mon Sep 17 00:00:00 2001
From: Baoquan He <bhe@redhat.com>
Date: Mon, 13 Oct 2014 15:53:40 -0700
Subject: kexec: take the segment adding out of locate_mem_hole functions

In locate_mem_hole functions, a memory hole is located and added as
kexec_segment.  But from the name of locate_mem_hole, it should only take
responsibility of searching a available memory hole to contain data of a
specified size.

So in this patch add a new field 'mem' into kexec_buf, then take that
kexec segment adding code out of locate_mem_hole_top_down and
locate_mem_hole_bottom_up.  This make clear of the functionality of
locate_mem_hole just like it declars to do.  And by this
locate_mem_hole_callback chould be used later if anyone want to locate a
memory hole for other use.

Meanwhile Vivek suggested opening code function __kexec_add_segment(),
that way we have to retreive ksegment pointer once and it is easy to read.
 So just do it in this patch and remove __kexec_add_segment() since no one
use it anymore.

Signed-off-by: Baoquan He <bhe@redhat.com>
Acked-by: Vivek Goyal <vgoyal@redhat.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kexec.h |  1 +
 kernel/kexec.c        | 29 ++++++++---------------------
 2 files changed, 9 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 4b2a0e11cc5b..9d957b7ae095 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -178,6 +178,7 @@ struct kexec_buf {
 	struct kimage *image;
 	char *buffer;
 	unsigned long bufsz;
+	unsigned long mem;
 	unsigned long memsz;
 	unsigned long buf_align;
 	unsigned long buf_min;
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 2bee072268d9..63bc3cdfb629 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -2016,22 +2016,6 @@ static int __init crash_save_vmcoreinfo_init(void)
 subsys_initcall(crash_save_vmcoreinfo_init);
 
 #ifdef CONFIG_KEXEC_FILE
-static int __kexec_add_segment(struct kimage *image, char *buf,
-			       unsigned long bufsz, unsigned long mem,
-			       unsigned long memsz)
-{
-	struct kexec_segment *ksegment;
-
-	ksegment = &image->segment[image->nr_segments];
-	ksegment->kbuf = buf;
-	ksegment->bufsz = bufsz;
-	ksegment->mem = mem;
-	ksegment->memsz = memsz;
-	image->nr_segments++;
-
-	return 0;
-}
-
 static int locate_mem_hole_top_down(unsigned long start, unsigned long end,
 				    struct kexec_buf *kbuf)
 {
@@ -2064,8 +2048,7 @@ static int locate_mem_hole_top_down(unsigned long start, unsigned long end,
 	} while (1);
 
 	/* If we are here, we found a suitable memory range */
-	__kexec_add_segment(image, kbuf->buffer, kbuf->bufsz, temp_start,
-			    kbuf->memsz);
+	kbuf->mem = temp_start;
 
 	/* Success, stop navigating through remaining System RAM ranges */
 	return 1;
@@ -2099,8 +2082,7 @@ static int locate_mem_hole_bottom_up(unsigned long start, unsigned long end,
 	} while (1);
 
 	/* If we are here, we found a suitable memory range */
-	__kexec_add_segment(image, kbuf->buffer, kbuf->bufsz, temp_start,
-			    kbuf->memsz);
+	kbuf->mem = temp_start;
 
 	/* Success, stop navigating through remaining System RAM ranges */
 	return 1;
@@ -2187,7 +2169,12 @@ int kexec_add_buffer(struct kimage *image, char *buffer, unsigned long bufsz,
 	}
 
 	/* Found a suitable memory range */
-	ksegment = &image->segment[image->nr_segments - 1];
+	ksegment = &image->segment[image->nr_segments];
+	ksegment->kbuf = kbuf->buffer;
+	ksegment->bufsz = kbuf->bufsz;
+	ksegment->mem = kbuf->mem;
+	ksegment->memsz = kbuf->memsz;
+	image->nr_segments++;
 	*load_addr = ksegment->mem;
 	return 0;
 }
-- 
cgit v1.2.3


From a841b65921a959c759da6b5c8d5dc21966b4cf86 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Mon, 13 Oct 2014 15:53:48 -0700
Subject: rbtree: add comment to rb_insert_augmented()

The comment is copied from Documentation/rbtree.txt, but this comment is
so important that it should also be in the code.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Acked-by: Michel Lespinasse <walken@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rbtree_augmented.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h
index fea49b5da12a..378c5ee75f78 100644
--- a/include/linux/rbtree_augmented.h
+++ b/include/linux/rbtree_augmented.h
@@ -43,6 +43,16 @@ struct rb_augment_callbacks {
 
 extern void __rb_insert_augmented(struct rb_node *node, struct rb_root *root,
 	void (*augment_rotate)(struct rb_node *old, struct rb_node *new));
+/*
+ * Fixup the rbtree and update the augmented information when rebalancing.
+ *
+ * On insertion, the user must update the augmented information on the path
+ * leading to the inserted node, then call rb_link_node() as usual and
+ * rb_augment_inserted() instead of the usual rb_insert_color() call.
+ * If rb_augment_inserted() rebalances the rbtree, it will callback into
+ * a user provided function to update the augmented information on the
+ * affected subtrees.
+ */
 static inline void
 rb_insert_augmented(struct rb_node *node, struct rb_root *root,
 		    const struct rb_augment_callbacks *augment)
-- 
cgit v1.2.3


From 67cf13ceed89e2c1a967719e98624a20c48dfb5a Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Mon, 13 Oct 2014 15:54:03 -0700
Subject: x86: optimize resource lookups for ioremap

We have a large university system in the UK that is experiencing very long
delays modprobing the driver for a specific I/O device.  The delay is from
8-10 minutes per device and there are 31 devices in the system.  This 4 to
5 hour delay in starting up those I/O devices is very much a burden on the
customer.

There are two causes for requiring a restart/reload of the drivers.  First
is periodic preventive maintenance (PM) and the second is if any of the
devices experience a fatal error.  Both of these trigger this excessively
long delay in bringing the system back up to full capability.

The problem was tracked down to a very slow IOREMAP operation and the
excessively long ioresource lookup to insure that the user is not
attempting to ioremap RAM.  These patches provide a speed up to that
function.

The modprobe time appears to be affected quite a bit by previous activity
on the ioresource list, which I suspect is due to cache preloading.  While
the overall improvement is impacted by other overhead of starting the
devices, this drastically improves the modprobe time.

Also our system is considerably smaller so the percentages gained will not
be the same.  Best case improvement with the modprobe on our 20 device
smallish system was from 'real 5m51.913s' to 'real 0m18.275s'.

This patch (of 2):

Since the ioremap operation is verifying that the specified address range
is NOT RAM, it will search the entire ioresource list if the condition is
true.  To make matters worse, it does this one 4k page at a time.  For a
128M BAR region this is 32 passes to determine the entire region does not
contain any RAM addresses.

This patch provides another resource lookup function, region_is_ram, that
searches for the entire region specified, verifying that it is completely
contained within the resource region.  If it is found, then it is checked
to be RAM or not, within a single pass.

The return result reflects if it was found or not (-1), and whether it is
RAM (1) or not (0).  This allows the caller to fallback to the previous
page by page search if it was not found.

[akpm@linux-foundation.org: fix spellos and typos in comment]
Signed-off-by: Mike Travis <travis@sgi.com>
Acked-by: Alex Thorlton <athorlton@sgi.com>
Reviewed-by: Cliff Wickman <cpw@sgi.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Mark Salter <msalter@redhat.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h |  1 +
 kernel/resource.c  | 36 ++++++++++++++++++++++++++++++++++++
 2 files changed, 37 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index fa0d74e06428..4cd45cb95e6d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -347,6 +347,7 @@ static inline int put_page_unless_one(struct page *page)
 }
 
 extern int page_is_ram(unsigned long pfn);
+extern int region_is_ram(resource_size_t phys_addr, unsigned long size);
 
 /* Support for virtually mapped pages */
 struct page *vmalloc_to_page(const void *addr);
diff --git a/kernel/resource.c b/kernel/resource.c
index 46322019ab7d..0bcebffc4e77 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -491,6 +491,42 @@ int __weak page_is_ram(unsigned long pfn)
 }
 EXPORT_SYMBOL_GPL(page_is_ram);
 
+/*
+ * Search for a resouce entry that fully contains the specified region.
+ * If found, return 1 if it is RAM, 0 if not.
+ * If not found, or region is not fully contained, return -1
+ *
+ * Used by the ioremap functions to ensure the user is not remapping RAM and is
+ * a vast speed up over walking through the resource table page by page.
+ */
+int region_is_ram(resource_size_t start, unsigned long size)
+{
+	struct resource *p;
+	resource_size_t end = start + size - 1;
+	int flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+	const char *name = "System RAM";
+	int ret = -1;
+
+	read_lock(&resource_lock);
+	for (p = iomem_resource.child; p ; p = p->sibling) {
+		if (end < p->start)
+			continue;
+
+		if (p->start <= start && end <= p->end) {
+			/* resource fully contains region */
+			if ((p->flags != flags) || strcmp(p->name, name))
+				ret = 0;
+			else
+				ret = 1;
+			break;
+		}
+		if (p->end < start)
+			break;	/* not found */
+	}
+	read_unlock(&resource_lock);
+	return ret;
+}
+
 void __weak arch_remove_reservations(struct resource *avail)
 {
 }
-- 
cgit v1.2.3


From b0bfb63118612e3614cf77b115c00f895a42c96a Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Mon, 13 Oct 2014 15:54:27 -0700
Subject: lib: string: Make all calls to strnicmp into calls to strncasecmp

The previous patch made strnicmp into a wrapper for strncasecmp.

This patch makes all in-tree users of strnicmp call strncasecmp
directly, while still making sure that the strnicmp symbol can be used
by out-of-tree modules.  It should be considered a temporary hack until
all in-tree callers have been converted.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/string.h | 2 +-
 lib/string.c           | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/string.h b/include/linux/string.h
index d36977e029af..e6edfe51575a 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -41,7 +41,7 @@ extern int strcmp(const char *,const char *);
 extern int strncmp(const char *,const char *,__kernel_size_t);
 #endif
 #ifndef __HAVE_ARCH_STRNICMP
-extern int strnicmp(const char *, const char *, __kernel_size_t);
+#define strnicmp strncasecmp
 #endif
 #ifndef __HAVE_ARCH_STRCASECMP
 extern int strcasecmp(const char *s1, const char *s2);
diff --git a/lib/string.c b/lib/string.c
index 3181e267a033..2fc20aa06f84 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -59,6 +59,7 @@ int strncasecmp(const char *s1, const char *s2, size_t len)
 EXPORT_SYMBOL(strncasecmp);
 #endif
 #ifndef __HAVE_ARCH_STRNICMP
+#undef strnicmp
 int strnicmp(const char *s1, const char *s2, size_t len)
 {
 	return strncasecmp(s1, s2, len);
-- 
cgit v1.2.3


From 3db2e9cdc085144e243495137273e2318c53a82f Mon Sep 17 00:00:00 2001
From: Daniel Walter <dwalter@google.com>
Date: Mon, 13 Oct 2014 15:55:09 -0700
Subject: include/linux: remove strict_strto* definitions

Remove obsolete and unused strict_strto* functions

Signed-off-by: Daniel Walter <dwalter@google.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 35c8ffb0136f..40728cf1c452 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -376,10 +376,6 @@ extern unsigned long simple_strtoul(const char *,char **,unsigned int);
 extern long simple_strtol(const char *,char **,unsigned int);
 extern unsigned long long simple_strtoull(const char *,char **,unsigned int);
 extern long long simple_strtoll(const char *,char **,unsigned int);
-#define strict_strtoul	kstrtoul
-#define strict_strtol	kstrtol
-#define strict_strtoull	kstrtoull
-#define strict_strtoll	kstrtoll
 
 extern int num_to_str(char *buf, int size, unsigned long long num);
 
-- 
cgit v1.2.3


From d295634e965ecacdb44c6760b3ca4eae08812715 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 13 Oct 2014 15:55:11 -0700
Subject: lib / string_helpers: move documentation to c-file

The introduced function string_escape_mem() is a kind of opposite to
string_unescape.  We have several users of such functionality each of
them created custom implementation.  The series contains clean up of
test suite, adding new call, and switching few users to use it via %*pE
specifier.

Test suite covers all of existing and most of potential use cases.

This patch (of 11):

The documentation of API belongs to c-file.  This patch moves it
accordingly.

There is no functional change.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: "John W . Linville" <linville@tuxdriver.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Joe Perches <joe@perches.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/string_helpers.h | 34 ----------------------------------
 lib/string_helpers.c           | 38 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h
index 3eeee9672a4a..5a30f2a86239 100644
--- a/include/linux/string_helpers.h
+++ b/include/linux/string_helpers.h
@@ -20,40 +20,6 @@ int string_get_size(u64 size, enum string_size_units units,
 #define UNESCAPE_ANY		\
 	(UNESCAPE_SPACE | UNESCAPE_OCTAL | UNESCAPE_HEX | UNESCAPE_SPECIAL)
 
-/**
- * string_unescape - unquote characters in the given string
- * @src:	source buffer (escaped)
- * @dst:	destination buffer (unescaped)
- * @size:	size of the destination buffer (0 to unlimit)
- * @flags:	combination of the flags (bitwise OR):
- *	%UNESCAPE_SPACE:
- *		'\f' - form feed
- *		'\n' - new line
- *		'\r' - carriage return
- *		'\t' - horizontal tab
- *		'\v' - vertical tab
- *	%UNESCAPE_OCTAL:
- *		'\NNN' - byte with octal value NNN (1 to 3 digits)
- *	%UNESCAPE_HEX:
- *		'\xHH' - byte with hexadecimal value HH (1 to 2 digits)
- *	%UNESCAPE_SPECIAL:
- *		'\"' - double quote
- *		'\\' - backslash
- *		'\a' - alert (BEL)
- *		'\e' - escape
- *	%UNESCAPE_ANY:
- *		all previous together
- *
- * Returns amount of characters processed to the destination buffer excluding
- * trailing '\0'.
- *
- * Because the size of the output will be the same as or less than the size of
- * the input, the transformation may be performed in place.
- *
- * Caller must provide valid source and destination pointers. Be aware that
- * destination buffer will always be NULL-terminated. Source string must be
- * NULL-terminated as well.
- */
 int string_unescape(char *src, char *dst, size_t size, unsigned int flags);
 
 static inline int string_unescape_inplace(char *buf, unsigned int flags)
diff --git a/lib/string_helpers.c b/lib/string_helpers.c
index 29033f319aea..74ec60469640 100644
--- a/lib/string_helpers.c
+++ b/lib/string_helpers.c
@@ -168,6 +168,44 @@ static bool unescape_special(char **src, char **dst)
 	return true;
 }
 
+/**
+ * string_unescape - unquote characters in the given string
+ * @src:	source buffer (escaped)
+ * @dst:	destination buffer (unescaped)
+ * @size:	size of the destination buffer (0 to unlimit)
+ * @flags:	combination of the flags (bitwise OR):
+ *	%UNESCAPE_SPACE:
+ *		'\f' - form feed
+ *		'\n' - new line
+ *		'\r' - carriage return
+ *		'\t' - horizontal tab
+ *		'\v' - vertical tab
+ *	%UNESCAPE_OCTAL:
+ *		'\NNN' - byte with octal value NNN (1 to 3 digits)
+ *	%UNESCAPE_HEX:
+ *		'\xHH' - byte with hexadecimal value HH (1 to 2 digits)
+ *	%UNESCAPE_SPECIAL:
+ *		'\"' - double quote
+ *		'\\' - backslash
+ *		'\a' - alert (BEL)
+ *		'\e' - escape
+ *	%UNESCAPE_ANY:
+ *		all previous together
+ *
+ * Description:
+ * The function unquotes characters in the given string.
+ *
+ * Because the size of the output will be the same as or less than the size of
+ * the input, the transformation may be performed in place.
+ *
+ * Caller must provide valid source and destination pointers. Be aware that
+ * destination buffer will always be NULL-terminated. Source string must be
+ * NULL-terminated as well.
+ *
+ * Return:
+ * The amount of the characters processed to the destination buffer excluding
+ * trailing '\0' is returned.
+ */
 int string_unescape(char *src, char *dst, size_t size, unsigned int flags)
 {
 	char *out = dst;
-- 
cgit v1.2.3


From c8250381c8272a9828fdd353171727b154fbd296 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 13 Oct 2014 15:55:16 -0700
Subject: lib / string_helpers: introduce string_escape_mem()

This is almost the opposite function to string_unescape().  Nevertheless
it handles \0 and could be used for any byte buffer.

The documentation is supplied together with the function prototype.

The test cases covers most of the scenarios and would be expanded later
on.

[akpm@linux-foundation.org: avoid 1k stack consumption]
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: "John W . Linville" <linville@tuxdriver.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Joe Perches <joe@perches.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/string_helpers.h |  31 +++++
 lib/string_helpers.c           | 274 +++++++++++++++++++++++++++++++++++++++++
 lib/test-string_helpers.c      | 240 +++++++++++++++++++++++++++++++++++-
 3 files changed, 541 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h
index 5a30f2a86239..6eb567ac56bc 100644
--- a/include/linux/string_helpers.h
+++ b/include/linux/string_helpers.h
@@ -37,4 +37,35 @@ static inline int string_unescape_any_inplace(char *buf)
 	return string_unescape_any(buf, buf, 0);
 }
 
+#define ESCAPE_SPACE		0x01
+#define ESCAPE_SPECIAL		0x02
+#define ESCAPE_NULL		0x04
+#define ESCAPE_OCTAL		0x08
+#define ESCAPE_ANY		\
+	(ESCAPE_SPACE | ESCAPE_OCTAL | ESCAPE_SPECIAL | ESCAPE_NULL)
+#define ESCAPE_NP		0x10
+#define ESCAPE_ANY_NP		(ESCAPE_ANY | ESCAPE_NP)
+#define ESCAPE_HEX		0x20
+
+int string_escape_mem(const char *src, size_t isz, char **dst, size_t osz,
+		unsigned int flags, const char *esc);
+
+static inline int string_escape_mem_any_np(const char *src, size_t isz,
+		char **dst, size_t osz, const char *esc)
+{
+	return string_escape_mem(src, isz, dst, osz, ESCAPE_ANY_NP, esc);
+}
+
+static inline int string_escape_str(const char *src, char **dst, size_t sz,
+		unsigned int flags, const char *esc)
+{
+	return string_escape_mem(src, strlen(src), dst, sz, flags, esc);
+}
+
+static inline int string_escape_str_any_np(const char *src, char **dst,
+		size_t sz, const char *esc)
+{
+	return string_escape_str(src, dst, sz, ESCAPE_ANY_NP, esc);
+}
+
 #endif
diff --git a/lib/string_helpers.c b/lib/string_helpers.c
index 74ec60469640..58b78ba57439 100644
--- a/lib/string_helpers.c
+++ b/lib/string_helpers.c
@@ -8,6 +8,8 @@
 #include <linux/math64.h>
 #include <linux/export.h>
 #include <linux/ctype.h>
+#include <linux/errno.h>
+#include <linux/string.h>
 #include <linux/string_helpers.h>
 
 /**
@@ -240,3 +242,275 @@ int string_unescape(char *src, char *dst, size_t size, unsigned int flags)
 	return out - dst;
 }
 EXPORT_SYMBOL(string_unescape);
+
+static int escape_passthrough(unsigned char c, char **dst, size_t *osz)
+{
+	char *out = *dst;
+
+	if (*osz < 1)
+		return -ENOMEM;
+
+	*out++ = c;
+
+	*dst = out;
+	*osz -= 1;
+
+	return 1;
+}
+
+static int escape_space(unsigned char c, char **dst, size_t *osz)
+{
+	char *out = *dst;
+	unsigned char to;
+
+	if (*osz < 2)
+		return -ENOMEM;
+
+	switch (c) {
+	case '\n':
+		to = 'n';
+		break;
+	case '\r':
+		to = 'r';
+		break;
+	case '\t':
+		to = 't';
+		break;
+	case '\v':
+		to = 'v';
+		break;
+	case '\f':
+		to = 'f';
+		break;
+	default:
+		return 0;
+	}
+
+	*out++ = '\\';
+	*out++ = to;
+
+	*dst = out;
+	*osz -= 2;
+
+	return 1;
+}
+
+static int escape_special(unsigned char c, char **dst, size_t *osz)
+{
+	char *out = *dst;
+	unsigned char to;
+
+	if (*osz < 2)
+		return -ENOMEM;
+
+	switch (c) {
+	case '\\':
+		to = '\\';
+		break;
+	case '\a':
+		to = 'a';
+		break;
+	case '\e':
+		to = 'e';
+		break;
+	default:
+		return 0;
+	}
+
+	*out++ = '\\';
+	*out++ = to;
+
+	*dst = out;
+	*osz -= 2;
+
+	return 1;
+}
+
+static int escape_null(unsigned char c, char **dst, size_t *osz)
+{
+	char *out = *dst;
+
+	if (*osz < 2)
+		return -ENOMEM;
+
+	if (c)
+		return 0;
+
+	*out++ = '\\';
+	*out++ = '0';
+
+	*dst = out;
+	*osz -= 2;
+
+	return 1;
+}
+
+static int escape_octal(unsigned char c, char **dst, size_t *osz)
+{
+	char *out = *dst;
+
+	if (*osz < 4)
+		return -ENOMEM;
+
+	*out++ = '\\';
+	*out++ = ((c >> 6) & 0x07) + '0';
+	*out++ = ((c >> 3) & 0x07) + '0';
+	*out++ = ((c >> 0) & 0x07) + '0';
+
+	*dst = out;
+	*osz -= 4;
+
+	return 1;
+}
+
+static int escape_hex(unsigned char c, char **dst, size_t *osz)
+{
+	char *out = *dst;
+
+	if (*osz < 4)
+		return -ENOMEM;
+
+	*out++ = '\\';
+	*out++ = 'x';
+	*out++ = hex_asc_hi(c);
+	*out++ = hex_asc_lo(c);
+
+	*dst = out;
+	*osz -= 4;
+
+	return 1;
+}
+
+/**
+ * string_escape_mem - quote characters in the given memory buffer
+ * @src:	source buffer (unescaped)
+ * @isz:	source buffer size
+ * @dst:	destination buffer (escaped)
+ * @osz:	destination buffer size
+ * @flags:	combination of the flags (bitwise OR):
+ *	%ESCAPE_SPACE:
+ *		'\f' - form feed
+ *		'\n' - new line
+ *		'\r' - carriage return
+ *		'\t' - horizontal tab
+ *		'\v' - vertical tab
+ *	%ESCAPE_SPECIAL:
+ *		'\\' - backslash
+ *		'\a' - alert (BEL)
+ *		'\e' - escape
+ *	%ESCAPE_NULL:
+ *		'\0' - null
+ *	%ESCAPE_OCTAL:
+ *		'\NNN' - byte with octal value NNN (3 digits)
+ *	%ESCAPE_ANY:
+ *		all previous together
+ *	%ESCAPE_NP:
+ *		escape only non-printable characters (checked by isprint)
+ *	%ESCAPE_ANY_NP:
+ *		all previous together
+ *	%ESCAPE_HEX:
+ *		'\xHH' - byte with hexadecimal value HH (2 digits)
+ * @esc:	NULL-terminated string of characters any of which, if found in
+ *		the source, has to be escaped
+ *
+ * Description:
+ * The process of escaping byte buffer includes several parts. They are applied
+ * in the following sequence.
+ *	1. The character is matched to the printable class, if asked, and in
+ *	   case of match it passes through to the output.
+ *	2. The character is not matched to the one from @esc string and thus
+ *	   must go as is to the output.
+ *	3. The character is checked if it falls into the class given by @flags.
+ *	   %ESCAPE_OCTAL and %ESCAPE_HEX are going last since they cover any
+ *	   character. Note that they actually can't go together, otherwise
+ *	   %ESCAPE_HEX will be ignored.
+ *
+ * Caller must provide valid source and destination pointers. Be aware that
+ * destination buffer will not be NULL-terminated, thus caller have to append
+ * it if needs.
+ *
+ * Return:
+ * The amount of the characters processed to the destination buffer, or
+ * %-ENOMEM if the size of buffer is not enough to put an escaped character is
+ * returned.
+ *
+ * Even in the case of error @dst pointer will be updated to point to the byte
+ * after the last processed character.
+ */
+int string_escape_mem(const char *src, size_t isz, char **dst, size_t osz,
+		      unsigned int flags, const char *esc)
+{
+	char *out = *dst, *p = out;
+	bool is_dict = esc && *esc;
+	int ret = 0;
+
+	while (isz--) {
+		unsigned char c = *src++;
+
+		/*
+		 * Apply rules in the following sequence:
+		 *	- the character is printable, when @flags has
+		 *	  %ESCAPE_NP bit set
+		 *	- the @esc string is supplied and does not contain a
+		 *	  character under question
+		 *	- the character doesn't fall into a class of symbols
+		 *	  defined by given @flags
+		 * In these cases we just pass through a character to the
+		 * output buffer.
+		 */
+		if ((flags & ESCAPE_NP && isprint(c)) ||
+		    (is_dict && !strchr(esc, c))) {
+			/* do nothing */
+		} else {
+			if (flags & ESCAPE_SPACE) {
+				ret = escape_space(c, &p, &osz);
+				if (ret < 0)
+					break;
+				if (ret > 0)
+					continue;
+			}
+
+			if (flags & ESCAPE_SPECIAL) {
+				ret = escape_special(c, &p, &osz);
+				if (ret < 0)
+					break;
+				if (ret > 0)
+					continue;
+			}
+
+			if (flags & ESCAPE_NULL) {
+				ret = escape_null(c, &p, &osz);
+				if (ret < 0)
+					break;
+				if (ret > 0)
+					continue;
+			}
+
+			/* ESCAPE_OCTAL and ESCAPE_HEX always go last */
+			if (flags & ESCAPE_OCTAL) {
+				ret = escape_octal(c, &p, &osz);
+				if (ret < 0)
+					break;
+				continue;
+			}
+			if (flags & ESCAPE_HEX) {
+				ret = escape_hex(c, &p, &osz);
+				if (ret < 0)
+					break;
+				continue;
+			}
+		}
+
+		ret = escape_passthrough(c, &p, &osz);
+		if (ret < 0)
+			break;
+	}
+
+	*dst = p;
+
+	if (ret < 0)
+		return ret;
+
+	return p - out;
+}
+EXPORT_SYMBOL(string_escape_mem);
diff --git a/lib/test-string_helpers.c b/lib/test-string_helpers.c
index ac44c9245dcf..ab0d30e1e18f 100644
--- a/lib/test-string_helpers.c
+++ b/lib/test-string_helpers.c
@@ -5,6 +5,7 @@
 
 #include <linux/init.h>
 #include <linux/kernel.h>
+#include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/random.h>
 #include <linux/string.h>
@@ -62,10 +63,14 @@ static const struct test_string strings[] __initconst = {
 static void __init test_string_unescape(const char *name, unsigned int flags,
 					bool inplace)
 {
-	char in[256];
-	char out_test[256];
-	char out_real[256];
-	int i, p = 0, q_test = 0, q_real = sizeof(out_real);
+	int q_real = 256;
+	char *in = kmalloc(q_real, GFP_KERNEL);
+	char *out_test = kmalloc(q_real, GFP_KERNEL);
+	char *out_real = kmalloc(q_real, GFP_KERNEL);
+	int i, p = 0, q_test = 0;
+
+	if (!in || !out_test || !out_real)
+		goto out;
 
 	for (i = 0; i < ARRAY_SIZE(strings); i++) {
 		const char *s = strings[i].in;
@@ -100,6 +105,223 @@ static void __init test_string_unescape(const char *name, unsigned int flags,
 
 	test_string_check_buf(name, flags, in, p - 1, out_real, q_real,
 			      out_test, q_test);
+out:
+	kfree(out_real);
+	kfree(out_test);
+	kfree(in);
+}
+
+struct test_string_1 {
+	const char *out;
+	unsigned int flags;
+};
+
+#define	TEST_STRING_2_MAX_S1		32
+struct test_string_2 {
+	const char *in;
+	struct test_string_1 s1[TEST_STRING_2_MAX_S1];
+};
+
+#define	TEST_STRING_2_DICT_0		NULL
+static const struct test_string_2 escape0[] __initconst = {{
+	.in = "\f\\ \n\r\t\v",
+	.s1 = {{
+		.out = "\\f\\ \\n\\r\\t\\v",
+		.flags = ESCAPE_SPACE,
+	},{
+		.out = "\\f\\134\\040\\n\\r\\t\\v",
+		.flags = ESCAPE_SPACE | ESCAPE_OCTAL,
+	},{
+		.out = "\\f\\x5c\\x20\\n\\r\\t\\v",
+		.flags = ESCAPE_SPACE | ESCAPE_HEX,
+	},{
+		/* terminator */
+	}},
+},{
+	.in = "\\h\\\"\a\e\\",
+	.s1 = {{
+		.out = "\\\\h\\\\\"\\a\\e\\\\",
+		.flags = ESCAPE_SPECIAL,
+	},{
+		.out = "\\\\\\150\\\\\\042\\a\\e\\\\",
+		.flags = ESCAPE_SPECIAL | ESCAPE_OCTAL,
+	},{
+		.out = "\\\\\\x68\\\\\\x22\\a\\e\\\\",
+		.flags = ESCAPE_SPECIAL | ESCAPE_HEX,
+	},{
+		/* terminator */
+	}},
+},{
+	.in = "\eb \\C\007\"\x90\r]",
+	.s1 = {{
+		.out = "\eb \\C\007\"\x90\\r]",
+		.flags = ESCAPE_SPACE,
+	},{
+		.out = "\\eb \\\\C\\a\"\x90\r]",
+		.flags = ESCAPE_SPECIAL,
+	},{
+		.out = "\\eb \\\\C\\a\"\x90\\r]",
+		.flags = ESCAPE_SPACE | ESCAPE_SPECIAL,
+	},{
+		.out = "\\033\\142\\040\\134\\103\\007\\042\\220\\015\\135",
+		.flags = ESCAPE_OCTAL,
+	},{
+		.out = "\\033\\142\\040\\134\\103\\007\\042\\220\\r\\135",
+		.flags = ESCAPE_SPACE | ESCAPE_OCTAL,
+	},{
+		.out = "\\e\\142\\040\\\\\\103\\a\\042\\220\\015\\135",
+		.flags = ESCAPE_SPECIAL | ESCAPE_OCTAL,
+	},{
+		.out = "\\e\\142\\040\\\\\\103\\a\\042\\220\\r\\135",
+		.flags = ESCAPE_SPACE | ESCAPE_SPECIAL | ESCAPE_OCTAL,
+	},{
+		.out = "\eb \\C\007\"\x90\r]",
+		.flags = ESCAPE_NP,
+	},{
+		.out = "\eb \\C\007\"\x90\\r]",
+		.flags = ESCAPE_SPACE | ESCAPE_NP,
+	},{
+		.out = "\\eb \\C\\a\"\x90\r]",
+		.flags = ESCAPE_SPECIAL | ESCAPE_NP,
+	},{
+		.out = "\\eb \\C\\a\"\x90\\r]",
+		.flags = ESCAPE_SPACE | ESCAPE_SPECIAL | ESCAPE_NP,
+	},{
+		.out = "\\033b \\C\\007\"\\220\\015]",
+		.flags = ESCAPE_OCTAL | ESCAPE_NP,
+	},{
+		.out = "\\033b \\C\\007\"\\220\\r]",
+		.flags = ESCAPE_SPACE | ESCAPE_OCTAL | ESCAPE_NP,
+	},{
+		.out = "\\eb \\C\\a\"\\220\\r]",
+		.flags = ESCAPE_SPECIAL | ESCAPE_SPACE | ESCAPE_OCTAL |
+			 ESCAPE_NP,
+	},{
+		.out = "\\x1bb \\C\\x07\"\\x90\\x0d]",
+		.flags = ESCAPE_NP | ESCAPE_HEX,
+	},{
+		/* terminator */
+	}},
+},{
+	/* terminator */
+}};
+
+#define	TEST_STRING_2_DICT_1		"b\\ \t\r"
+static const struct test_string_2 escape1[] __initconst = {{
+	.in = "\f\\ \n\r\t\v",
+	.s1 = {{
+		.out = "\f\\134\\040\n\\015\\011\v",
+		.flags = ESCAPE_OCTAL,
+	},{
+		.out = "\f\\x5c\\x20\n\\x0d\\x09\v",
+		.flags = ESCAPE_HEX,
+	},{
+		/* terminator */
+	}},
+},{
+	.in = "\\h\\\"\a\e\\",
+	.s1 = {{
+		.out = "\\134h\\134\"\a\e\\134",
+		.flags = ESCAPE_OCTAL,
+	},{
+		/* terminator */
+	}},
+},{
+	.in = "\eb \\C\007\"\x90\r]",
+	.s1 = {{
+		.out = "\e\\142\\040\\134C\007\"\x90\\015]",
+		.flags = ESCAPE_OCTAL,
+	},{
+		/* terminator */
+	}},
+},{
+	/* terminator */
+}};
+
+static __init const char *test_string_find_match(const struct test_string_2 *s2,
+						 unsigned int flags)
+{
+	const struct test_string_1 *s1 = s2->s1;
+	unsigned int i;
+
+	if (!flags)
+		return s2->in;
+
+	/* Test cases are NULL-aware */
+	flags &= ~ESCAPE_NULL;
+
+	/* ESCAPE_OCTAL has a higher priority */
+	if (flags & ESCAPE_OCTAL)
+		flags &= ~ESCAPE_HEX;
+
+	for (i = 0; i < TEST_STRING_2_MAX_S1 && s1->out; i++, s1++)
+		if (s1->flags == flags)
+			return s1->out;
+	return NULL;
+}
+
+static __init void test_string_escape(const char *name,
+				      const struct test_string_2 *s2,
+				      unsigned int flags, const char *esc)
+{
+	int q_real = 512;
+	char *out_test = kmalloc(q_real, GFP_KERNEL);
+	char *out_real = kmalloc(q_real, GFP_KERNEL);
+	char *in = kmalloc(256, GFP_KERNEL);
+	char *buf = out_real;
+	int p = 0, q_test = 0;
+
+	if (!out_test || !out_real || !in)
+		goto out;
+
+	for (; s2->in; s2++) {
+		const char *out;
+		int len;
+
+		/* NULL injection */
+		if (flags & ESCAPE_NULL) {
+			in[p++] = '\0';
+			out_test[q_test++] = '\\';
+			out_test[q_test++] = '0';
+		}
+
+		/* Don't try strings that have no output */
+		out = test_string_find_match(s2, flags);
+		if (!out)
+			continue;
+
+		/* Copy string to in buffer */
+		len = strlen(s2->in);
+		memcpy(&in[p], s2->in, len);
+		p += len;
+
+		/* Copy expected result for given flags */
+		len = strlen(out);
+		memcpy(&out_test[q_test], out, len);
+		q_test += len;
+	}
+
+	q_real = string_escape_mem(in, p, &buf, q_real, flags, esc);
+
+	test_string_check_buf(name, flags, in, p, out_real, q_real, out_test,
+			      q_test);
+out:
+	kfree(in);
+	kfree(out_real);
+	kfree(out_test);
+}
+
+static __init void test_string_escape_nomem(void)
+{
+	char *in = "\eb \\C\007\"\x90\r]";
+	char out[64], *buf = out;
+	int rc = -ENOMEM, ret;
+
+	ret = string_escape_str_any_np(in, &buf, strlen(in), NULL);
+	if (ret == rc)
+		return;
+
+	pr_err("Test 'escape nomem' failed: got %d instead of %d\n", ret, rc);
 }
 
 static int __init test_string_helpers_init(void)
@@ -112,6 +334,16 @@ static int __init test_string_helpers_init(void)
 	test_string_unescape("unescape inplace",
 			     get_random_int() % (UNESCAPE_ANY + 1), true);
 
+	/* Without dictionary */
+	for (i = 0; i < (ESCAPE_ANY_NP | ESCAPE_HEX) + 1; i++)
+		test_string_escape("escape 0", escape0, i, TEST_STRING_2_DICT_0);
+
+	/* With dictionary */
+	for (i = 0; i < (ESCAPE_ANY_NP | ESCAPE_HEX) + 1; i++)
+		test_string_escape("escape 1", escape1, i, TEST_STRING_2_DICT_1);
+
+	test_string_escape_nomem();
+
 	return -EINVAL;
 }
 module_init(test_string_helpers_init);
-- 
cgit v1.2.3


From 6e7458a6f074c71e74cda31c483114e65ea0f570 Mon Sep 17 00:00:00 2001
From: Ulrich Obergfell <uobergfe@redhat.com>
Date: Mon, 13 Oct 2014 15:55:35 -0700
Subject: kernel/watchdog.c: control hard lockup detection default

In some cases we don't want hard lockup detection enabled by default.
An example is when running as a guest.  Introduce

  watchdog_enable_hardlockup_detector(bool)

allowing those cases to disable hard lockup detection.  This must be
executed early by the boot processor from e.g.  smp_prepare_boot_cpu, in
order to allow kernel command line arguments to override it, as well as
to avoid hard lockup detection being enabled before we've had a chance
to indicate that it's unwanted.  In summary,

  initial boot:					default=enabled
  smp_prepare_boot_cpu
    watchdog_enable_hardlockup_detector(false):	default=disabled
  cmdline has 'nmi_watchdog=1':			default=enabled

The running kernel still has the ability to enable/disable at any time
with /proc/sys/kernel/nmi_watchdog us usual.  However even when the
default has been overridden /proc/sys/kernel/nmi_watchdog will initially
show '1'.  To truly turn it on one must disable/enable it, i.e.

  echo 0 > /proc/sys/kernel/nmi_watchdog
  echo 1 > /proc/sys/kernel/nmi_watchdog

This patch will be immediately useful for KVM with the next patch of this
series.  Other hypervisor guest types may find it useful as well.

[akpm@linux-foundation.org: fix build]
[dzickus@redhat.com: fix compile issues on sparc]
Signed-off-by: Ulrich Obergfell <uobergfe@redhat.com>
Signed-off-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: Don Zickus <dzickus@redhat.com>
Signed-off-by: Don Zickus <dzickus@redhat.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/nmi.h | 13 +++++++++++++
 kernel/watchdog.c   | 50 ++++++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 61 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 1d2a6ab6b8bb..9b2022ab4d85 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -24,6 +24,19 @@ static inline void touch_nmi_watchdog(void)
 }
 #endif
 
+#if defined(CONFIG_HARDLOCKUP_DETECTOR)
+extern void watchdog_enable_hardlockup_detector(bool val);
+extern bool watchdog_hardlockup_detector_is_enabled(void);
+#else
+static inline void watchdog_enable_hardlockup_detector(bool val)
+{
+}
+static inline bool watchdog_hardlockup_detector_is_enabled(void)
+{
+	return true;
+}
+#endif
+
 /*
  * Create trigger_all_cpu_backtrace() out of the arch-provided
  * base function. Return whether such support was available,
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index ff7fd80bef99..49e9537f3673 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -59,6 +59,25 @@ static unsigned long soft_lockup_nmi_warn;
 static int hardlockup_panic =
 			CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
 
+static bool hardlockup_detector_enabled = true;
+/*
+ * We may not want to enable hard lockup detection by default in all cases,
+ * for example when running the kernel as a guest on a hypervisor. In these
+ * cases this function can be called to disable hard lockup detection. This
+ * function should only be executed once by the boot processor before the
+ * kernel command line parameters are parsed, because otherwise it is not
+ * possible to override this in hardlockup_panic_setup().
+ */
+void watchdog_enable_hardlockup_detector(bool val)
+{
+	hardlockup_detector_enabled = val;
+}
+
+bool watchdog_hardlockup_detector_is_enabled(void)
+{
+	return hardlockup_detector_enabled;
+}
+
 static int __init hardlockup_panic_setup(char *str)
 {
 	if (!strncmp(str, "panic", 5))
@@ -67,6 +86,14 @@ static int __init hardlockup_panic_setup(char *str)
 		hardlockup_panic = 0;
 	else if (!strncmp(str, "0", 1))
 		watchdog_user_enabled = 0;
+	else if (!strncmp(str, "1", 1) || !strncmp(str, "2", 1)) {
+		/*
+		 * Setting 'nmi_watchdog=1' or 'nmi_watchdog=2' (legacy option)
+		 * has the same effect.
+		 */
+		watchdog_user_enabled = 1;
+		watchdog_enable_hardlockup_detector(true);
+	}
 	return 1;
 }
 __setup("nmi_watchdog=", hardlockup_panic_setup);
@@ -465,6 +492,15 @@ static int watchdog_nmi_enable(unsigned int cpu)
 	struct perf_event_attr *wd_attr;
 	struct perf_event *event = per_cpu(watchdog_ev, cpu);
 
+	/*
+	 * Some kernels need to default hard lockup detection to
+	 * 'disabled', for example a guest on a hypervisor.
+	 */
+	if (!watchdog_hardlockup_detector_is_enabled()) {
+		event = ERR_PTR(-ENOENT);
+		goto handle_err;
+	}
+
 	/* is it already setup and enabled? */
 	if (event && event->state > PERF_EVENT_STATE_OFF)
 		goto out;
@@ -479,6 +515,7 @@ static int watchdog_nmi_enable(unsigned int cpu)
 	/* Try to register using hardware perf events */
 	event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL);
 
+handle_err:
 	/* save cpu0 error for future comparision */
 	if (cpu == 0 && IS_ERR(event))
 		cpu0_err = PTR_ERR(event);
@@ -624,11 +661,13 @@ int proc_dowatchdog(struct ctl_table *table, int write,
 		    void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	int err, old_thresh, old_enabled;
+	bool old_hardlockup;
 	static DEFINE_MUTEX(watchdog_proc_mutex);
 
 	mutex_lock(&watchdog_proc_mutex);
 	old_thresh = ACCESS_ONCE(watchdog_thresh);
 	old_enabled = ACCESS_ONCE(watchdog_user_enabled);
+	old_hardlockup = watchdog_hardlockup_detector_is_enabled();
 
 	err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
 	if (err || !write)
@@ -640,15 +679,22 @@ int proc_dowatchdog(struct ctl_table *table, int write,
 	 * disabled. The 'watchdog_running' variable check in
 	 * watchdog_*_all_cpus() function takes care of this.
 	 */
-	if (watchdog_user_enabled && watchdog_thresh)
+	if (watchdog_user_enabled && watchdog_thresh) {
+		/*
+		 * Prevent a change in watchdog_thresh accidentally overriding
+		 * the enablement of the hardlockup detector.
+		 */
+		if (watchdog_user_enabled != old_enabled)
+			watchdog_enable_hardlockup_detector(true);
 		err = watchdog_enable_all_cpus(old_thresh != watchdog_thresh);
-	else
+	} else
 		watchdog_disable_all_cpus();
 
 	/* Restore old values on failure */
 	if (err) {
 		watchdog_thresh = old_thresh;
 		watchdog_user_enabled = old_enabled;
+		watchdog_enable_hardlockup_detector(old_hardlockup);
 	}
 out:
 	mutex_unlock(&watchdog_proc_mutex);
-- 
cgit v1.2.3


From 63a12d9d01831208a47f5c0fbbf93f503d1fb162 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Mon, 13 Oct 2014 15:55:44 -0700
Subject: kernel/param: consolidate __{start,stop}___param[] in
 <linux/moduleparam.h>

Consolidate the various external const and non-const declarations of
__start___param[] and __stop___param in <linux/moduleparam.h>.  This
requires making a few struct kernel_param pointers in kernel/params.c
const.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/moduleparam.h | 2 ++
 init/main.c                 | 2 --
 kernel/params.c             | 7 +++----
 3 files changed, 5 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index b43f4752304e..1c9effa25e26 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -78,6 +78,8 @@ struct kernel_param {
 	};
 };
 
+extern const struct kernel_param __start___param[], __stop___param[];
+
 /* Special one for strings we want to copy into */
 struct kparam_string {
 	unsigned int maxlen;
diff --git a/init/main.c b/init/main.c
index 89ec862da2d4..800a0daede7e 100644
--- a/init/main.c
+++ b/init/main.c
@@ -501,7 +501,6 @@ asmlinkage __visible void __init start_kernel(void)
 {
 	char *command_line;
 	char *after_dashes;
-	extern const struct kernel_param __start___param[], __stop___param[];
 
 	/*
 	 * Need to run as early as possible, to initialize the
@@ -844,7 +843,6 @@ static char *initcall_level_names[] __initdata = {
 
 static void __init do_initcall_level(int level)
 {
-	extern const struct kernel_param __start___param[], __stop___param[];
 	initcall_t *fn;
 
 	strcpy(initcall_command_line, saved_command_line);
diff --git a/kernel/params.c b/kernel/params.c
index 041b5899d5e2..db97b791390f 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -19,6 +19,7 @@
 #include <linux/string.h>
 #include <linux/errno.h>
 #include <linux/module.h>
+#include <linux/moduleparam.h>
 #include <linux/device.h>
 #include <linux/err.h>
 #include <linux/slab.h>
@@ -513,8 +514,6 @@ EXPORT_SYMBOL(param_ops_string);
 #define to_module_attr(n) container_of(n, struct module_attribute, attr)
 #define to_module_kobject(n) container_of(n, struct module_kobject, kobj)
 
-extern struct kernel_param __start___param[], __stop___param[];
-
 struct param_attribute
 {
 	struct module_attribute mattr;
@@ -774,7 +773,7 @@ static struct module_kobject * __init locate_module_kobject(const char *name)
 }
 
 static void __init kernel_add_sysfs_param(const char *name,
-					  struct kernel_param *kparam,
+					  const struct kernel_param *kparam,
 					  unsigned int name_skip)
 {
 	struct module_kobject *mk;
@@ -809,7 +808,7 @@ static void __init kernel_add_sysfs_param(const char *name,
  */
 static void __init param_sysfs_builtin(void)
 {
-	struct kernel_param *kp;
+	const struct kernel_param *kp;
 	unsigned int name_len;
 	char modname[MODULE_NAME_LEN];
 
-- 
cgit v1.2.3


From 64e455079e1bd7787cc47be30b7f601ce682a5f6 Mon Sep 17 00:00:00 2001
From: Peter Feiner <pfeiner@google.com>
Date: Mon, 13 Oct 2014 15:55:46 -0700
Subject: mm: softdirty: enable write notifications on VMAs after VM_SOFTDIRTY
 cleared

For VMAs that don't want write notifications, PTEs created for read faults
have their write bit set.  If the read fault happens after VM_SOFTDIRTY is
cleared, then the PTE's softdirty bit will remain clear after subsequent
writes.

Here's a simple code snippet to demonstrate the bug:

  char* m = mmap(NULL, getpagesize(), PROT_READ | PROT_WRITE,
                 MAP_ANONYMOUS | MAP_SHARED, -1, 0);
  system("echo 4 > /proc/$PPID/clear_refs"); /* clear VM_SOFTDIRTY */
  assert(*m == '\0');     /* new PTE allows write access */
  assert(!soft_dirty(x));
  *m = 'x';               /* should dirty the page */
  assert(soft_dirty(x));  /* fails */

With this patch, write notifications are enabled when VM_SOFTDIRTY is
cleared.  Furthermore, to avoid unnecessary faults, write notifications
are disabled when VM_SOFTDIRTY is set.

As a side effect of enabling and disabling write notifications with
care, this patch fixes a bug in mprotect where vm_page_prot bits set by
drivers were zapped on mprotect.  An analogous bug was fixed in mmap by
commit c9d0bf241451 ("mm: uncached vma support with writenotify").

Signed-off-by: Peter Feiner <pfeiner@google.com>
Reported-by: Peter Feiner <pfeiner@google.com>
Suggested-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Jamie Liu <jamieliu@google.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/task_mmu.c            | 19 +++++++++++++-----
 include/asm-generic/pgtable.h | 14 ++++++++++++++
 include/linux/mm.h            |  5 +++++
 mm/memory.c                   |  3 ++-
 mm/mmap.c                     | 45 +++++++++++++++++++++++++++----------------
 mm/mprotect.c                 | 20 +++++--------------
 6 files changed, 68 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index b7a7dc963a35..4e0388cffe3d 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -827,8 +827,21 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
 			.private = &cp,
 		};
 		down_read(&mm->mmap_sem);
-		if (type == CLEAR_REFS_SOFT_DIRTY)
+		if (type == CLEAR_REFS_SOFT_DIRTY) {
+			for (vma = mm->mmap; vma; vma = vma->vm_next) {
+				if (!(vma->vm_flags & VM_SOFTDIRTY))
+					continue;
+				up_read(&mm->mmap_sem);
+				down_write(&mm->mmap_sem);
+				for (vma = mm->mmap; vma; vma = vma->vm_next) {
+					vma->vm_flags &= ~VM_SOFTDIRTY;
+					vma_set_page_prot(vma);
+				}
+				downgrade_write(&mm->mmap_sem);
+				break;
+			}
 			mmu_notifier_invalidate_range_start(mm, 0, -1);
+		}
 		for (vma = mm->mmap; vma; vma = vma->vm_next) {
 			cp.vma = vma;
 			if (is_vm_hugetlb_page(vma))
@@ -848,10 +861,6 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
 				continue;
 			if (type == CLEAR_REFS_MAPPED && !vma->vm_file)
 				continue;
-			if (type == CLEAR_REFS_SOFT_DIRTY) {
-				if (vma->vm_flags & VM_SOFTDIRTY)
-					vma->vm_flags &= ~VM_SOFTDIRTY;
-			}
 			walk_page_range(vma->vm_start, vma->vm_end,
 					&clear_refs_walk);
 		}
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 081ff8826bf6..752e30d63904 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -253,6 +253,20 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
 #define pgprot_device pgprot_noncached
 #endif
 
+#ifndef pgprot_modify
+#define pgprot_modify pgprot_modify
+static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
+{
+	if (pgprot_val(oldprot) == pgprot_val(pgprot_noncached(oldprot)))
+		newprot = pgprot_noncached(newprot);
+	if (pgprot_val(oldprot) == pgprot_val(pgprot_writecombine(oldprot)))
+		newprot = pgprot_writecombine(newprot);
+	if (pgprot_val(oldprot) == pgprot_val(pgprot_device(oldprot)))
+		newprot = pgprot_device(newprot);
+	return newprot;
+}
+#endif
+
 /*
  * When walking page tables, get the address of the next boundary,
  * or the end address of the range if that comes earlier.  Although no
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 4cd45cb95e6d..02d11ee7f19d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1974,11 +1974,16 @@ static inline struct vm_area_struct *find_exact_vma(struct mm_struct *mm,
 
 #ifdef CONFIG_MMU
 pgprot_t vm_get_page_prot(unsigned long vm_flags);
+void vma_set_page_prot(struct vm_area_struct *vma);
 #else
 static inline pgprot_t vm_get_page_prot(unsigned long vm_flags)
 {
 	return __pgprot(0);
 }
+static inline void vma_set_page_prot(struct vm_area_struct *vma)
+{
+	vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
+}
 #endif
 
 #ifdef CONFIG_NUMA_BALANCING
diff --git a/mm/memory.c b/mm/memory.c
index e229970e4223..1cc6bfbd872e 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2053,7 +2053,8 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	old_page = vm_normal_page(vma, address, orig_pte);
 	if (!old_page) {
 		/*
-		 * VM_MIXEDMAP !pfn_valid() case
+		 * VM_MIXEDMAP !pfn_valid() case, or VM_SOFTDIRTY clear on a
+		 * VM_PFNMAP VMA.
 		 *
 		 * We should not cow pages in a shared writeable mapping.
 		 * Just mark the pages writable as we can't do any dirty
diff --git a/mm/mmap.c b/mm/mmap.c
index 93d28c7e5420..7f855206e7fb 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -89,6 +89,25 @@ pgprot_t vm_get_page_prot(unsigned long vm_flags)
 }
 EXPORT_SYMBOL(vm_get_page_prot);
 
+static pgprot_t vm_pgprot_modify(pgprot_t oldprot, unsigned long vm_flags)
+{
+	return pgprot_modify(oldprot, vm_get_page_prot(vm_flags));
+}
+
+/* Update vma->vm_page_prot to reflect vma->vm_flags. */
+void vma_set_page_prot(struct vm_area_struct *vma)
+{
+	unsigned long vm_flags = vma->vm_flags;
+
+	vma->vm_page_prot = vm_pgprot_modify(vma->vm_page_prot, vm_flags);
+	if (vma_wants_writenotify(vma)) {
+		vm_flags &= ~VM_SHARED;
+		vma->vm_page_prot = vm_pgprot_modify(vma->vm_page_prot,
+						     vm_flags);
+	}
+}
+
+
 int sysctl_overcommit_memory __read_mostly = OVERCOMMIT_GUESS;  /* heuristic overcommit */
 int sysctl_overcommit_ratio __read_mostly = 50;	/* default is 50% */
 unsigned long sysctl_overcommit_kbytes __read_mostly;
@@ -1475,11 +1494,16 @@ int vma_wants_writenotify(struct vm_area_struct *vma)
 	if (vma->vm_ops && vma->vm_ops->page_mkwrite)
 		return 1;
 
-	/* The open routine did something to the protections already? */
+	/* The open routine did something to the protections that pgprot_modify
+	 * won't preserve? */
 	if (pgprot_val(vma->vm_page_prot) !=
-	    pgprot_val(vm_get_page_prot(vm_flags)))
+	    pgprot_val(vm_pgprot_modify(vma->vm_page_prot, vm_flags)))
 		return 0;
 
+	/* Do we need to track softdirty? */
+	if (IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) && !(vm_flags & VM_SOFTDIRTY))
+		return 1;
+
 	/* Specialty mapping? */
 	if (vm_flags & VM_PFNMAP)
 		return 0;
@@ -1615,21 +1639,6 @@ munmap_back:
 			goto free_vma;
 	}
 
-	if (vma_wants_writenotify(vma)) {
-		pgprot_t pprot = vma->vm_page_prot;
-
-		/* Can vma->vm_page_prot have changed??
-		 *
-		 * Answer: Yes, drivers may have changed it in their
-		 *         f_op->mmap method.
-		 *
-		 * Ensures that vmas marked as uncached stay that way.
-		 */
-		vma->vm_page_prot = vm_get_page_prot(vm_flags & ~VM_SHARED);
-		if (pgprot_val(pprot) == pgprot_val(pgprot_noncached(pprot)))
-			vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-	}
-
 	vma_link(mm, vma, prev, rb_link, rb_parent);
 	/* Once vma denies write, undo our temporary denial count */
 	if (file) {
@@ -1663,6 +1672,8 @@ out:
 	 */
 	vma->vm_flags |= VM_SOFTDIRTY;
 
+	vma_set_page_prot(vma);
+
 	return addr;
 
 unmap_and_free_vma:
diff --git a/mm/mprotect.c b/mm/mprotect.c
index c43d557941f8..ace93454ce8e 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -29,13 +29,6 @@
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 
-#ifndef pgprot_modify
-static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
-{
-	return newprot;
-}
-#endif
-
 /*
  * For a prot_numa update we only hold mmap_sem for read so there is a
  * potential race with faulting where a pmd was temporarily none. This
@@ -93,7 +86,9 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 				 * Avoid taking write faults for pages we
 				 * know to be dirty.
 				 */
-				if (dirty_accountable && pte_dirty(ptent))
+				if (dirty_accountable && pte_dirty(ptent) &&
+				    (pte_soft_dirty(ptent) ||
+				     !(vma->vm_flags & VM_SOFTDIRTY)))
 					ptent = pte_mkwrite(ptent);
 				ptep_modify_prot_commit(mm, addr, pte, ptent);
 				updated = true;
@@ -320,13 +315,8 @@ success:
 	 * held in write mode.
 	 */
 	vma->vm_flags = newflags;
-	vma->vm_page_prot = pgprot_modify(vma->vm_page_prot,
-					  vm_get_page_prot(newflags));
-
-	if (vma_wants_writenotify(vma)) {
-		vma->vm_page_prot = vm_get_page_prot(newflags & ~VM_SHARED);
-		dirty_accountable = 1;
-	}
+	dirty_accountable = vma_wants_writenotify(vma);
+	vma_set_page_prot(vma);
 
 	change_protection(vma, start, end, vma->vm_page_prot,
 			  dirty_accountable, 0);
-- 
cgit v1.2.3


From e19a8a0ad2d255316830ead05b59c5a704434cbb Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Tue, 14 Oct 2014 09:00:44 -0600
Subject: block: Remove REQ_KERNEL

REQ_KERNEL is no longer used. Remove it and drop the redundant uio
argument to nfs_file_direct_{read,write}.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Cc: Christoph Hellwig <hch@infradead.org>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 fs/nfs/direct.c           | 12 +++++-------
 fs/nfs/file.c             |  4 ++--
 include/linux/blk_types.h |  2 --
 include/linux/fs.h        |  2 --
 include/linux/nfs_fs.h    |  4 ++--
 5 files changed, 9 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 65ef6e00deee..891f7dd8cbd6 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -222,11 +222,9 @@ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t
 #else
 	VM_BUG_ON(iocb->ki_nbytes != PAGE_SIZE);
 
-	if (rw == READ || rw == KERNEL_READ)
-		return nfs_file_direct_read(iocb, iter, pos,
-				rw == READ ? true : false);
-	return nfs_file_direct_write(iocb, iter, pos,
-				rw == WRITE ? true : false);
+	if (rw == READ)
+		return nfs_file_direct_read(iocb, iter, pos);
+	return nfs_file_direct_write(iocb, iter, pos);
 #endif /* CONFIG_NFS_SWAP */
 }
 
@@ -512,7 +510,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
  * cache.
  */
 ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter,
-				loff_t pos, bool uio)
+				loff_t pos)
 {
 	struct file *file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
@@ -893,7 +891,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
  * is no atomic O_APPEND write facility in the NFS protocol.
  */
 ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter,
-				loff_t pos, bool uio)
+				loff_t pos)
 {
 	ssize_t result = -EINVAL;
 	struct file *file = iocb->ki_filp;
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 524dd80d1898..3b42cb86218e 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -171,7 +171,7 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to)
 	ssize_t result;
 
 	if (iocb->ki_filp->f_flags & O_DIRECT)
-		return nfs_file_direct_read(iocb, to, iocb->ki_pos, true);
+		return nfs_file_direct_read(iocb, to, iocb->ki_pos);
 
 	dprintk("NFS: read(%pD2, %zu@%lu)\n",
 		iocb->ki_filp,
@@ -648,7 +648,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
 		return result;
 
 	if (file->f_flags & O_DIRECT)
-		return nfs_file_direct_write(iocb, from, pos, true);
+		return nfs_file_direct_write(iocb, from, pos);
 
 	dprintk("NFS: write(%pD2, %zu@%Ld)\n",
 		file, count, (long long) pos);
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 38bc008e4503..445d59231bc4 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -187,7 +187,6 @@ enum rq_flag_bits {
 	__REQ_FLUSH_SEQ,	/* request for flush sequence */
 	__REQ_IO_STAT,		/* account I/O stat */
 	__REQ_MIXED_MERGE,	/* merge of different types, fail separately */
-	__REQ_KERNEL, 		/* direct IO to kernel pages */
 	__REQ_PM,		/* runtime pm request */
 	__REQ_HASHED,		/* on IO scheduler merge hash */
 	__REQ_MQ_INFLIGHT,	/* track inflight for MQ */
@@ -241,7 +240,6 @@ enum rq_flag_bits {
 #define REQ_IO_STAT		(1ULL << __REQ_IO_STAT)
 #define REQ_MIXED_MERGE		(1ULL << __REQ_MIXED_MERGE)
 #define REQ_SECURE		(1ULL << __REQ_SECURE)
-#define REQ_KERNEL		(1ULL << __REQ_KERNEL)
 #define REQ_PM			(1ULL << __REQ_PM)
 #define REQ_HASHED		(1ULL << __REQ_HASHED)
 #define REQ_MQ_INFLIGHT		(1ULL << __REQ_MQ_INFLIGHT)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 94187721ad41..9b5bc1cacb8e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -192,8 +192,6 @@ typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 #define READ			0
 #define WRITE			RW_MASK
 #define READA			RWA_MASK
-#define KERNEL_READ		(READ|REQ_KERNEL)
-#define KERNEL_WRITE		(WRITE|REQ_KERNEL)
 
 #define READ_SYNC		(READ | REQ_SYNC)
 #define WRITE_SYNC		(WRITE | REQ_SYNC | REQ_NOIDLE)
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 5180a7ededec..e6e1c4e0d7f3 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -464,10 +464,10 @@ extern int nfs3_removexattr (struct dentry *, const char *name);
 extern ssize_t nfs_direct_IO(int, struct kiocb *, struct iov_iter *, loff_t);
 extern ssize_t nfs_file_direct_read(struct kiocb *iocb,
 			struct iov_iter *iter,
-			loff_t pos, bool uio);
+			loff_t pos);
 extern ssize_t nfs_file_direct_write(struct kiocb *iocb,
 			struct iov_iter *iter,
-			loff_t pos, bool uio);
+			loff_t pos);
 
 /*
  * linux/fs/nfs/dir.c
-- 
cgit v1.2.3


From 31eff81e94472ddb7549509bf4b6e93e1f6f7dc9 Mon Sep 17 00:00:00 2001
From: Alexander Aring <alex.aring@gmail.com>
Date: Fri, 10 Oct 2014 23:10:47 +0200
Subject: skbuff: fix ftrace handling in skb_unshare

If the skb is not dropped afterwards we should run consume_skb instead
kfree_skb. Inside of function skb_unshare we do always a kfree_skb,
doesn't depend if skb_copy failed or was successful.

This patch switch this behaviour like skb_share_check, if allocation of
sk_buff failed we use kfree_skb otherwise consume_skb.

Signed-off-by: Alexander Aring <alex.aring@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 3ab0749d6875..a59d9343c25b 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1203,7 +1203,12 @@ static inline struct sk_buff *skb_unshare(struct sk_buff *skb,
 	might_sleep_if(pri & __GFP_WAIT);
 	if (skb_cloned(skb)) {
 		struct sk_buff *nskb = skb_copy(skb, pri);
-		kfree_skb(skb);	/* Free our shared copy */
+
+		/* Free our shared copy */
+		if (likely(nskb))
+			consume_skb(skb);
+		else
+			kfree_skb(skb);
 		skb = nskb;
 	}
 	return skb;
-- 
cgit v1.2.3


From 9ea8aa8d5087529210553114b7bc4bf4374ace8f Mon Sep 17 00:00:00 2001
From: Tilman Schmidt <tilman@imap.cc>
Date: Sat, 11 Oct 2014 13:46:30 +0200
Subject: isdn/capi: correct capi20_manufacturer argument type mismatch

Function capi20_manufacturer() is declared with unsigned int cmd
argument but called with unsigned long.
Fix by correcting the function prototype since the actual argument
is part of the user visible API.

Spotted with Coverity.

Signed-off-by: Tilman Schmidt <tilman@imap.cc>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/capi/kcapi.c  | 4 ++--
 include/linux/kernelcapi.h | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/isdn/capi/kcapi.c b/drivers/isdn/capi/kcapi.c
index c123709acf82..823f6985b260 100644
--- a/drivers/isdn/capi/kcapi.c
+++ b/drivers/isdn/capi/kcapi.c
@@ -1184,7 +1184,7 @@ static int old_capi_manufacturer(unsigned int cmd, void __user *data)
  * Return value: CAPI result code
  */
 
-int capi20_manufacturer(unsigned int cmd, void __user *data)
+int capi20_manufacturer(unsigned long cmd, void __user *data)
 {
 	struct capi_ctr *ctr;
 	int retval;
@@ -1259,7 +1259,7 @@ int capi20_manufacturer(unsigned int cmd, void __user *data)
 	}
 
 	default:
-		printk(KERN_ERR "kcapi: manufacturer command %d unknown.\n",
+		printk(KERN_ERR "kcapi: manufacturer command %lu unknown.\n",
 		       cmd);
 		break;
 
diff --git a/include/linux/kernelcapi.h b/include/linux/kernelcapi.h
index 9be37da93680..e985ba679c4a 100644
--- a/include/linux/kernelcapi.h
+++ b/include/linux/kernelcapi.h
@@ -41,7 +41,7 @@ u16 capi20_get_manufacturer(u32 contr, u8 buf[CAPI_MANUFACTURER_LEN]);
 u16 capi20_get_version(u32 contr, struct capi_version *verp);
 u16 capi20_get_serial(u32 contr, u8 serial[CAPI_SERIAL_LEN]);
 u16 capi20_get_profile(u32 contr, struct capi_profile *profp);
-int capi20_manufacturer(unsigned int cmd, void __user *data);
+int capi20_manufacturer(unsigned long cmd, void __user *data);
 
 #define CAPICTR_UP			0
 #define CAPICTR_DOWN			1
-- 
cgit v1.2.3


From e4339d28f640a7c0d92903bcf389a2dfa281270d Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zyan@redhat.com>
Date: Tue, 16 Sep 2014 17:50:45 +0800
Subject: libceph: reference counting pagelist

this allow pagelist to present data that may be sent multiple times.

Signed-off-by: Yan, Zheng <zyan@redhat.com>
Reviewed-by: Sage Weil <sage@redhat.com>
---
 fs/ceph/mds_client.c          | 1 -
 include/linux/ceph/pagelist.h | 5 ++++-
 net/ceph/messenger.c          | 4 +---
 net/ceph/pagelist.c           | 7 +++++--
 4 files changed, 10 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 5474feb77743..b4430ce1b3f6 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2865,7 +2865,6 @@ fail:
 	mutex_unlock(&session->s_mutex);
 fail_nomsg:
 	ceph_pagelist_release(pagelist);
-	kfree(pagelist);
 fail_nopagelist:
 	pr_err("error %d preparing reconnect for mds%d\n", err, mds);
 	return;
diff --git a/include/linux/ceph/pagelist.h b/include/linux/ceph/pagelist.h
index 9660d6b0a35d..5f871d84ddce 100644
--- a/include/linux/ceph/pagelist.h
+++ b/include/linux/ceph/pagelist.h
@@ -2,6 +2,7 @@
 #define __FS_CEPH_PAGELIST_H
 
 #include <linux/list.h>
+#include <linux/atomic.h>
 
 struct ceph_pagelist {
 	struct list_head head;
@@ -10,6 +11,7 @@ struct ceph_pagelist {
 	size_t room;
 	struct list_head free_list;
 	size_t num_pages_free;
+	atomic_t refcnt;
 };
 
 struct ceph_pagelist_cursor {
@@ -26,9 +28,10 @@ static inline void ceph_pagelist_init(struct ceph_pagelist *pl)
 	pl->room = 0;
 	INIT_LIST_HEAD(&pl->free_list);
 	pl->num_pages_free = 0;
+	atomic_set(&pl->refcnt, 1);
 }
 
-extern int ceph_pagelist_release(struct ceph_pagelist *pl);
+extern void ceph_pagelist_release(struct ceph_pagelist *pl);
 
 extern int ceph_pagelist_append(struct ceph_pagelist *pl, const void *d, size_t l);
 
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index e7d94113f2d6..9764c771cfb1 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -3071,10 +3071,8 @@ static void ceph_msg_data_destroy(struct ceph_msg_data *data)
 		return;
 
 	WARN_ON(!list_empty(&data->links));
-	if (data->type == CEPH_MSG_DATA_PAGELIST) {
+	if (data->type == CEPH_MSG_DATA_PAGELIST)
 		ceph_pagelist_release(data->pagelist);
-		kfree(data->pagelist);
-	}
 	kmem_cache_free(ceph_msg_data_cache, data);
 }
 
diff --git a/net/ceph/pagelist.c b/net/ceph/pagelist.c
index 92866bebb65f..c7c220a736e5 100644
--- a/net/ceph/pagelist.c
+++ b/net/ceph/pagelist.c
@@ -1,5 +1,6 @@
 #include <linux/module.h>
 #include <linux/gfp.h>
+#include <linux/slab.h>
 #include <linux/pagemap.h>
 #include <linux/highmem.h>
 #include <linux/ceph/pagelist.h>
@@ -13,8 +14,10 @@ static void ceph_pagelist_unmap_tail(struct ceph_pagelist *pl)
 	}
 }
 
-int ceph_pagelist_release(struct ceph_pagelist *pl)
+void ceph_pagelist_release(struct ceph_pagelist *pl)
 {
+	if (!atomic_dec_and_test(&pl->refcnt))
+		return;
 	ceph_pagelist_unmap_tail(pl);
 	while (!list_empty(&pl->head)) {
 		struct page *page = list_first_entry(&pl->head, struct page,
@@ -23,7 +26,7 @@ int ceph_pagelist_release(struct ceph_pagelist *pl)
 		__free_page(page);
 	}
 	ceph_pagelist_free_reserve(pl);
-	return 0;
+	kfree(pl);
 }
 EXPORT_SYMBOL(ceph_pagelist_release);
 
-- 
cgit v1.2.3


From eb179d3975c804ad98eaa403425eb6e48cfd3cc2 Mon Sep 17 00:00:00 2001
From: Fabian Frederick <fabf@skynet.be>
Date: Tue, 30 Sep 2014 22:07:50 +0200
Subject: libceph: remove redundant declaration

ceph_release_page_vector was defined twice in libceph.h

Signed-off-by: Fabian Frederick <fabf@skynet.be>
Signed-off-by: Ilya Dryomov <idryomov@redhat.com>
---
 include/linux/ceph/libceph.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 279b0afac1c1..07bc359b88ac 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -211,7 +211,6 @@ extern struct page **ceph_get_direct_page_vector(const void __user *data,
 						 bool write_page);
 extern void ceph_put_page_vector(struct page **pages, int num_pages,
 				 bool dirty);
-extern void ceph_release_page_vector(struct page **pages, int num_pages);
 extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags);
 extern int ceph_copy_user_to_page_vector(struct page **pages,
 					 const void __user *data,
-- 
cgit v1.2.3


From 70b5bfa360aea4157b45c2863746ca67896c6ef1 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@redhat.com>
Date: Thu, 2 Oct 2014 17:22:29 +0400
Subject: libceph: sync osd op definitions in rados.h

Bring in missing osd ops and strings, use macros to eliminate multiple
points of maintenance.

Signed-off-by: Ilya Dryomov <idryomov@redhat.com>
Reviewed-by: Sage Weil <sage@redhat.com>
---
 include/linux/ceph/rados.h | 225 ++++++++++++++++++++++++++-------------------
 net/ceph/ceph_strings.c    |  75 +--------------
 net/ceph/osd_client.c      |  65 +------------
 3 files changed, 137 insertions(+), 228 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h
index f20e0d8a2155..2f822dca1046 100644
--- a/include/linux/ceph/rados.h
+++ b/include/linux/ceph/rados.h
@@ -172,6 +172,7 @@ extern const char *ceph_osd_state_name(int s);
 #define CEPH_OSD_OP_MODE_WR    0x2000
 #define CEPH_OSD_OP_MODE_RMW   0x3000
 #define CEPH_OSD_OP_MODE_SUB   0x4000
+#define CEPH_OSD_OP_MODE_CACHE 0x8000
 
 #define CEPH_OSD_OP_TYPE       0x0f00
 #define CEPH_OSD_OP_TYPE_LOCK  0x0100
@@ -181,103 +182,135 @@ extern const char *ceph_osd_state_name(int s);
 #define CEPH_OSD_OP_TYPE_PG    0x0500
 #define CEPH_OSD_OP_TYPE_MULTI 0x0600 /* multiobject */
 
+#define __CEPH_OSD_OP1(mode, nr) \
+	(CEPH_OSD_OP_MODE_##mode | (nr))
+
+#define __CEPH_OSD_OP(mode, type, nr) \
+	(CEPH_OSD_OP_MODE_##mode | CEPH_OSD_OP_TYPE_##type | (nr))
+
+#define __CEPH_FORALL_OSD_OPS(f)					    \
+	/** data **/							    \
+	/* read */							    \
+	f(READ,		__CEPH_OSD_OP(RD, DATA, 1),	"read")		    \
+	f(STAT,		__CEPH_OSD_OP(RD, DATA, 2),	"stat")		    \
+	f(MAPEXT,	__CEPH_OSD_OP(RD, DATA, 3),	"mapext")	    \
+									    \
+	/* fancy read */						    \
+	f(MASKTRUNC,	__CEPH_OSD_OP(RD, DATA, 4),	"masktrunc")	    \
+	f(SPARSE_READ,	__CEPH_OSD_OP(RD, DATA, 5),	"sparse-read")	    \
+									    \
+	f(NOTIFY,	__CEPH_OSD_OP(RD, DATA, 6),	"notify")	    \
+	f(NOTIFY_ACK,	__CEPH_OSD_OP(RD, DATA, 7),	"notify-ack")	    \
+									    \
+	/* versioning */						    \
+	f(ASSERT_VER,	__CEPH_OSD_OP(RD, DATA, 8),	"assert-version")   \
+									    \
+	f(LIST_WATCHERS, __CEPH_OSD_OP(RD, DATA, 9),	"list-watchers")    \
+									    \
+	f(LIST_SNAPS,	__CEPH_OSD_OP(RD, DATA, 10),	"list-snaps")	    \
+									    \
+	/* sync */							    \
+	f(SYNC_READ,	__CEPH_OSD_OP(RD, DATA, 11),	"sync_read")	    \
+									    \
+	/* write */							    \
+	f(WRITE,	__CEPH_OSD_OP(WR, DATA, 1),	"write")	    \
+	f(WRITEFULL,	__CEPH_OSD_OP(WR, DATA, 2),	"writefull")	    \
+	f(TRUNCATE,	__CEPH_OSD_OP(WR, DATA, 3),	"truncate")	    \
+	f(ZERO,		__CEPH_OSD_OP(WR, DATA, 4),	"zero")		    \
+	f(DELETE,	__CEPH_OSD_OP(WR, DATA, 5),	"delete")	    \
+									    \
+	/* fancy write */						    \
+	f(APPEND,	__CEPH_OSD_OP(WR, DATA, 6),	"append")	    \
+	f(STARTSYNC,	__CEPH_OSD_OP(WR, DATA, 7),	"startsync")	    \
+	f(SETTRUNC,	__CEPH_OSD_OP(WR, DATA, 8),	"settrunc")	    \
+	f(TRIMTRUNC,	__CEPH_OSD_OP(WR, DATA, 9),	"trimtrunc")	    \
+									    \
+	f(TMAPUP,	__CEPH_OSD_OP(RMW, DATA, 10),	"tmapup")	    \
+	f(TMAPPUT,	__CEPH_OSD_OP(WR, DATA, 11),	"tmapput")	    \
+	f(TMAPGET,	__CEPH_OSD_OP(RD, DATA, 12),	"tmapget")	    \
+									    \
+	f(CREATE,	__CEPH_OSD_OP(WR, DATA, 13),	"create")	    \
+	f(ROLLBACK,	__CEPH_OSD_OP(WR, DATA, 14),	"rollback")	    \
+									    \
+	f(WATCH,	__CEPH_OSD_OP(WR, DATA, 15),	"watch")	    \
+									    \
+	/* omap */							    \
+	f(OMAPGETKEYS,	__CEPH_OSD_OP(RD, DATA, 17),	"omap-get-keys")    \
+	f(OMAPGETVALS,	__CEPH_OSD_OP(RD, DATA, 18),	"omap-get-vals")    \
+	f(OMAPGETHEADER, __CEPH_OSD_OP(RD, DATA, 19),	"omap-get-header")  \
+	f(OMAPGETVALSBYKEYS, __CEPH_OSD_OP(RD, DATA, 20), "omap-get-vals-by-keys") \
+	f(OMAPSETVALS,	__CEPH_OSD_OP(WR, DATA, 21),	"omap-set-vals")    \
+	f(OMAPSETHEADER, __CEPH_OSD_OP(WR, DATA, 22),	"omap-set-header")  \
+	f(OMAPCLEAR,	__CEPH_OSD_OP(WR, DATA, 23),	"omap-clear")	    \
+	f(OMAPRMKEYS,	__CEPH_OSD_OP(WR, DATA, 24),	"omap-rm-keys")	    \
+	f(OMAP_CMP,	__CEPH_OSD_OP(RD, DATA, 25),	"omap-cmp")	    \
+									    \
+	/* tiering */							    \
+	f(COPY_FROM,	__CEPH_OSD_OP(WR, DATA, 26),	"copy-from")	    \
+	f(COPY_GET_CLASSIC, __CEPH_OSD_OP(RD, DATA, 27), "copy-get-classic") \
+	f(UNDIRTY,	__CEPH_OSD_OP(WR, DATA, 28),	"undirty")	    \
+	f(ISDIRTY,	__CEPH_OSD_OP(RD, DATA, 29),	"isdirty")	    \
+	f(COPY_GET,	__CEPH_OSD_OP(RD, DATA, 30),	"copy-get")	    \
+	f(CACHE_FLUSH,	__CEPH_OSD_OP(CACHE, DATA, 31),	"cache-flush")	    \
+	f(CACHE_EVICT,	__CEPH_OSD_OP(CACHE, DATA, 32),	"cache-evict")	    \
+	f(CACHE_TRY_FLUSH, __CEPH_OSD_OP(CACHE, DATA, 33), "cache-try-flush") \
+									    \
+	/* convert tmap to omap */					    \
+	f(TMAP2OMAP,	__CEPH_OSD_OP(RMW, DATA, 34),	"tmap2omap")	    \
+									    \
+	/* hints */							    \
+	f(SETALLOCHINT,	__CEPH_OSD_OP(WR, DATA, 35),	"set-alloc-hint")   \
+									    \
+	/** multi **/							    \
+	f(CLONERANGE,	__CEPH_OSD_OP(WR, MULTI, 1),	"clonerange")	    \
+	f(ASSERT_SRC_VERSION, __CEPH_OSD_OP(RD, MULTI, 2), "assert-src-version") \
+	f(SRC_CMPXATTR,	__CEPH_OSD_OP(RD, MULTI, 3),	"src-cmpxattr")	    \
+									    \
+	/** attrs **/							    \
+	/* read */							    \
+	f(GETXATTR,	__CEPH_OSD_OP(RD, ATTR, 1),	"getxattr")	    \
+	f(GETXATTRS,	__CEPH_OSD_OP(RD, ATTR, 2),	"getxattrs")	    \
+	f(CMPXATTR,	__CEPH_OSD_OP(RD, ATTR, 3),	"cmpxattr")	    \
+									    \
+	/* write */							    \
+	f(SETXATTR,	__CEPH_OSD_OP(WR, ATTR, 1),	"setxattr")	    \
+	f(SETXATTRS,	__CEPH_OSD_OP(WR, ATTR, 2),	"setxattrs")	    \
+	f(RESETXATTRS,	__CEPH_OSD_OP(WR, ATTR, 3),	"resetxattrs")	    \
+	f(RMXATTR,	__CEPH_OSD_OP(WR, ATTR, 4),	"rmxattr")	    \
+									    \
+	/** subop **/							    \
+	f(PULL,		__CEPH_OSD_OP1(SUB, 1),		"pull")		    \
+	f(PUSH,		__CEPH_OSD_OP1(SUB, 2),		"push")		    \
+	f(BALANCEREADS,	__CEPH_OSD_OP1(SUB, 3),		"balance-reads")    \
+	f(UNBALANCEREADS, __CEPH_OSD_OP1(SUB, 4),	"unbalance-reads")  \
+	f(SCRUB,	__CEPH_OSD_OP1(SUB, 5),		"scrub")	    \
+	f(SCRUB_RESERVE, __CEPH_OSD_OP1(SUB, 6),	"scrub-reserve")    \
+	f(SCRUB_UNRESERVE, __CEPH_OSD_OP1(SUB, 7),	"scrub-unreserve")  \
+	f(SCRUB_STOP,	__CEPH_OSD_OP1(SUB, 8),		"scrub-stop")	    \
+	f(SCRUB_MAP,	__CEPH_OSD_OP1(SUB, 9),		"scrub-map")	    \
+									    \
+	/** lock **/							    \
+	f(WRLOCK,	__CEPH_OSD_OP(WR, LOCK, 1),	"wrlock")	    \
+	f(WRUNLOCK,	__CEPH_OSD_OP(WR, LOCK, 2),	"wrunlock")	    \
+	f(RDLOCK,	__CEPH_OSD_OP(WR, LOCK, 3),	"rdlock")	    \
+	f(RDUNLOCK,	__CEPH_OSD_OP(WR, LOCK, 4),	"rdunlock")	    \
+	f(UPLOCK,	__CEPH_OSD_OP(WR, LOCK, 5),	"uplock")	    \
+	f(DNLOCK,	__CEPH_OSD_OP(WR, LOCK, 6),	"dnlock")	    \
+									    \
+	/** exec **/							    \
+	/* note: the RD bit here is wrong; see special-case below in helper */ \
+	f(CALL,		__CEPH_OSD_OP(RD, EXEC, 1),	"call")		    \
+									    \
+	/** pg **/							    \
+	f(PGLS,		__CEPH_OSD_OP(RD, PG, 1),	"pgls")		    \
+	f(PGLS_FILTER,	__CEPH_OSD_OP(RD, PG, 2),	"pgls-filter")	    \
+	f(PG_HITSET_LS,	__CEPH_OSD_OP(RD, PG, 3),	"pg-hitset-ls")	    \
+	f(PG_HITSET_GET, __CEPH_OSD_OP(RD, PG, 4),	"pg-hitset-get")
+
 enum {
-	/** data **/
-	/* read */
-	CEPH_OSD_OP_READ      = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 1,
-	CEPH_OSD_OP_STAT      = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 2,
-	CEPH_OSD_OP_MAPEXT    = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 3,
-
-	/* fancy read */
-	CEPH_OSD_OP_MASKTRUNC   = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 4,
-	CEPH_OSD_OP_SPARSE_READ = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 5,
-
-	CEPH_OSD_OP_NOTIFY    = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 6,
-	CEPH_OSD_OP_NOTIFY_ACK = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 7,
-
-	/* versioning */
-	CEPH_OSD_OP_ASSERT_VER = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 8,
-
-	/* write */
-	CEPH_OSD_OP_WRITE     = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 1,
-	CEPH_OSD_OP_WRITEFULL = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 2,
-	CEPH_OSD_OP_TRUNCATE  = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 3,
-	CEPH_OSD_OP_ZERO      = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 4,
-	CEPH_OSD_OP_DELETE    = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 5,
-
-	/* fancy write */
-	CEPH_OSD_OP_APPEND    = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 6,
-	CEPH_OSD_OP_STARTSYNC = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 7,
-	CEPH_OSD_OP_SETTRUNC  = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 8,
-	CEPH_OSD_OP_TRIMTRUNC = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 9,
-
-	CEPH_OSD_OP_TMAPUP  = CEPH_OSD_OP_MODE_RMW | CEPH_OSD_OP_TYPE_DATA | 10,
-	CEPH_OSD_OP_TMAPPUT = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 11,
-	CEPH_OSD_OP_TMAPGET = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 12,
-
-	CEPH_OSD_OP_CREATE  = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 13,
-	CEPH_OSD_OP_ROLLBACK= CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 14,
-
-	CEPH_OSD_OP_WATCH   = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 15,
-
-	/* omap */
-	CEPH_OSD_OP_OMAPGETKEYS   = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 17,
-	CEPH_OSD_OP_OMAPGETVALS   = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 18,
-	CEPH_OSD_OP_OMAPGETHEADER = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 19,
-	CEPH_OSD_OP_OMAPGETVALSBYKEYS  =
-	  CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 20,
-	CEPH_OSD_OP_OMAPSETVALS   = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 21,
-	CEPH_OSD_OP_OMAPSETHEADER = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 22,
-	CEPH_OSD_OP_OMAPCLEAR     = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 23,
-	CEPH_OSD_OP_OMAPRMKEYS    = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 24,
-	CEPH_OSD_OP_OMAP_CMP      = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 25,
-
-	/* hints */
-	CEPH_OSD_OP_SETALLOCHINT = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 35,
-
-	/** multi **/
-	CEPH_OSD_OP_CLONERANGE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_MULTI | 1,
-	CEPH_OSD_OP_ASSERT_SRC_VERSION = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_MULTI | 2,
-	CEPH_OSD_OP_SRC_CMPXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_MULTI | 3,
-
-	/** attrs **/
-	/* read */
-	CEPH_OSD_OP_GETXATTR  = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 1,
-	CEPH_OSD_OP_GETXATTRS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 2,
-	CEPH_OSD_OP_CMPXATTR  = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 3,
-
-	/* write */
-	CEPH_OSD_OP_SETXATTR  = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_ATTR | 1,
-	CEPH_OSD_OP_SETXATTRS = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_ATTR | 2,
-	CEPH_OSD_OP_RESETXATTRS = CEPH_OSD_OP_MODE_WR|CEPH_OSD_OP_TYPE_ATTR | 3,
-	CEPH_OSD_OP_RMXATTR   = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_ATTR | 4,
-
-	/** subop **/
-	CEPH_OSD_OP_PULL            = CEPH_OSD_OP_MODE_SUB | 1,
-	CEPH_OSD_OP_PUSH            = CEPH_OSD_OP_MODE_SUB | 2,
-	CEPH_OSD_OP_BALANCEREADS    = CEPH_OSD_OP_MODE_SUB | 3,
-	CEPH_OSD_OP_UNBALANCEREADS  = CEPH_OSD_OP_MODE_SUB | 4,
-	CEPH_OSD_OP_SCRUB           = CEPH_OSD_OP_MODE_SUB | 5,
-	CEPH_OSD_OP_SCRUB_RESERVE   = CEPH_OSD_OP_MODE_SUB | 6,
-	CEPH_OSD_OP_SCRUB_UNRESERVE = CEPH_OSD_OP_MODE_SUB | 7,
-	CEPH_OSD_OP_SCRUB_STOP      = CEPH_OSD_OP_MODE_SUB | 8,
-	CEPH_OSD_OP_SCRUB_MAP     = CEPH_OSD_OP_MODE_SUB | 9,
-
-	/** lock **/
-	CEPH_OSD_OP_WRLOCK    = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 1,
-	CEPH_OSD_OP_WRUNLOCK  = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 2,
-	CEPH_OSD_OP_RDLOCK    = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 3,
-	CEPH_OSD_OP_RDUNLOCK  = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 4,
-	CEPH_OSD_OP_UPLOCK    = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 5,
-	CEPH_OSD_OP_DNLOCK    = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 6,
-
-	/** exec **/
-	/* note: the RD bit here is wrong; see special-case below in helper */
-	CEPH_OSD_OP_CALL    = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_EXEC | 1,
-
-	/** pg **/
-	CEPH_OSD_OP_PGLS      = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_PG | 1,
-	CEPH_OSD_OP_PGLS_FILTER = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_PG | 2,
+#define GENERATE_ENUM_ENTRY(op, opcode, str)	CEPH_OSD_OP_##op = (opcode),
+__CEPH_FORALL_OSD_OPS(GENERATE_ENUM_ENTRY)
+#undef GENERATE_ENUM_ENTRY
 };
 
 static inline int ceph_osd_op_type_lock(int op)
diff --git a/net/ceph/ceph_strings.c b/net/ceph/ceph_strings.c
index 1348df96fe15..30560202f57b 100644
--- a/net/ceph/ceph_strings.c
+++ b/net/ceph/ceph_strings.c
@@ -19,77 +19,12 @@ const char *ceph_entity_type_name(int type)
 const char *ceph_osd_op_name(int op)
 {
 	switch (op) {
-	case CEPH_OSD_OP_READ: return "read";
-	case CEPH_OSD_OP_STAT: return "stat";
-	case CEPH_OSD_OP_MAPEXT: return "mapext";
-	case CEPH_OSD_OP_SPARSE_READ: return "sparse-read";
-	case CEPH_OSD_OP_NOTIFY: return "notify";
-	case CEPH_OSD_OP_NOTIFY_ACK: return "notify-ack";
-	case CEPH_OSD_OP_ASSERT_VER: return "assert-version";
-
-	case CEPH_OSD_OP_MASKTRUNC: return "masktrunc";
-
-	case CEPH_OSD_OP_CREATE: return "create";
-	case CEPH_OSD_OP_WRITE: return "write";
-	case CEPH_OSD_OP_DELETE: return "delete";
-	case CEPH_OSD_OP_TRUNCATE: return "truncate";
-	case CEPH_OSD_OP_ZERO: return "zero";
-	case CEPH_OSD_OP_WRITEFULL: return "writefull";
-	case CEPH_OSD_OP_ROLLBACK: return "rollback";
-
-	case CEPH_OSD_OP_APPEND: return "append";
-	case CEPH_OSD_OP_STARTSYNC: return "startsync";
-	case CEPH_OSD_OP_SETTRUNC: return "settrunc";
-	case CEPH_OSD_OP_TRIMTRUNC: return "trimtrunc";
-
-	case CEPH_OSD_OP_TMAPUP: return "tmapup";
-	case CEPH_OSD_OP_TMAPGET: return "tmapget";
-	case CEPH_OSD_OP_TMAPPUT: return "tmapput";
-	case CEPH_OSD_OP_WATCH: return "watch";
-
-	case CEPH_OSD_OP_CLONERANGE: return "clonerange";
-	case CEPH_OSD_OP_ASSERT_SRC_VERSION: return "assert-src-version";
-	case CEPH_OSD_OP_SRC_CMPXATTR: return "src-cmpxattr";
-
-	case CEPH_OSD_OP_GETXATTR: return "getxattr";
-	case CEPH_OSD_OP_GETXATTRS: return "getxattrs";
-	case CEPH_OSD_OP_SETXATTR: return "setxattr";
-	case CEPH_OSD_OP_SETXATTRS: return "setxattrs";
-	case CEPH_OSD_OP_RESETXATTRS: return "resetxattrs";
-	case CEPH_OSD_OP_RMXATTR: return "rmxattr";
-	case CEPH_OSD_OP_CMPXATTR: return "cmpxattr";
-
-	case CEPH_OSD_OP_PULL: return "pull";
-	case CEPH_OSD_OP_PUSH: return "push";
-	case CEPH_OSD_OP_BALANCEREADS: return "balance-reads";
-	case CEPH_OSD_OP_UNBALANCEREADS: return "unbalance-reads";
-	case CEPH_OSD_OP_SCRUB: return "scrub";
-	case CEPH_OSD_OP_SCRUB_RESERVE: return "scrub-reserve";
-	case CEPH_OSD_OP_SCRUB_UNRESERVE: return "scrub-unreserve";
-	case CEPH_OSD_OP_SCRUB_STOP: return "scrub-stop";
-	case CEPH_OSD_OP_SCRUB_MAP: return "scrub-map";
-
-	case CEPH_OSD_OP_WRLOCK: return "wrlock";
-	case CEPH_OSD_OP_WRUNLOCK: return "wrunlock";
-	case CEPH_OSD_OP_RDLOCK: return "rdlock";
-	case CEPH_OSD_OP_RDUNLOCK: return "rdunlock";
-	case CEPH_OSD_OP_UPLOCK: return "uplock";
-	case CEPH_OSD_OP_DNLOCK: return "dnlock";
-
-	case CEPH_OSD_OP_CALL: return "call";
-
-	case CEPH_OSD_OP_PGLS: return "pgls";
-	case CEPH_OSD_OP_PGLS_FILTER: return "pgls-filter";
-	case CEPH_OSD_OP_OMAPGETKEYS: return "omap-get-keys";
-	case CEPH_OSD_OP_OMAPGETVALS: return "omap-get-vals";
-	case CEPH_OSD_OP_OMAPGETHEADER: return "omap-get-header";
-	case CEPH_OSD_OP_OMAPGETVALSBYKEYS: return "omap-get-vals-by-keys";
-	case CEPH_OSD_OP_OMAPSETVALS: return "omap-set-vals";
-	case CEPH_OSD_OP_OMAPSETHEADER: return "omap-set-header";
-	case CEPH_OSD_OP_OMAPCLEAR: return "omap-clear";
-	case CEPH_OSD_OP_OMAPRMKEYS: return "omap-rm-keys";
+#define GENERATE_CASE(op, opcode, str)	case CEPH_OSD_OP_##op: return (str);
+__CEPH_FORALL_OSD_OPS(GENERATE_CASE)
+#undef GENERATE_CASE
+	default:
+		return "???";
 	}
-	return "???";
 }
 
 const char *ceph_osd_state_name(int s)
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index a528ea34253a..f3fc54eac09d 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -431,68 +431,9 @@ EXPORT_SYMBOL(ceph_osdc_alloc_request);
 static bool osd_req_opcode_valid(u16 opcode)
 {
 	switch (opcode) {
-	case CEPH_OSD_OP_READ:
-	case CEPH_OSD_OP_STAT:
-	case CEPH_OSD_OP_MAPEXT:
-	case CEPH_OSD_OP_MASKTRUNC:
-	case CEPH_OSD_OP_SPARSE_READ:
-	case CEPH_OSD_OP_NOTIFY:
-	case CEPH_OSD_OP_NOTIFY_ACK:
-	case CEPH_OSD_OP_ASSERT_VER:
-	case CEPH_OSD_OP_WRITE:
-	case CEPH_OSD_OP_WRITEFULL:
-	case CEPH_OSD_OP_TRUNCATE:
-	case CEPH_OSD_OP_ZERO:
-	case CEPH_OSD_OP_DELETE:
-	case CEPH_OSD_OP_APPEND:
-	case CEPH_OSD_OP_STARTSYNC:
-	case CEPH_OSD_OP_SETTRUNC:
-	case CEPH_OSD_OP_TRIMTRUNC:
-	case CEPH_OSD_OP_TMAPUP:
-	case CEPH_OSD_OP_TMAPPUT:
-	case CEPH_OSD_OP_TMAPGET:
-	case CEPH_OSD_OP_CREATE:
-	case CEPH_OSD_OP_ROLLBACK:
-	case CEPH_OSD_OP_WATCH:
-	case CEPH_OSD_OP_OMAPGETKEYS:
-	case CEPH_OSD_OP_OMAPGETVALS:
-	case CEPH_OSD_OP_OMAPGETHEADER:
-	case CEPH_OSD_OP_OMAPGETVALSBYKEYS:
-	case CEPH_OSD_OP_OMAPSETVALS:
-	case CEPH_OSD_OP_OMAPSETHEADER:
-	case CEPH_OSD_OP_OMAPCLEAR:
-	case CEPH_OSD_OP_OMAPRMKEYS:
-	case CEPH_OSD_OP_OMAP_CMP:
-	case CEPH_OSD_OP_SETALLOCHINT:
-	case CEPH_OSD_OP_CLONERANGE:
-	case CEPH_OSD_OP_ASSERT_SRC_VERSION:
-	case CEPH_OSD_OP_SRC_CMPXATTR:
-	case CEPH_OSD_OP_GETXATTR:
-	case CEPH_OSD_OP_GETXATTRS:
-	case CEPH_OSD_OP_CMPXATTR:
-	case CEPH_OSD_OP_SETXATTR:
-	case CEPH_OSD_OP_SETXATTRS:
-	case CEPH_OSD_OP_RESETXATTRS:
-	case CEPH_OSD_OP_RMXATTR:
-	case CEPH_OSD_OP_PULL:
-	case CEPH_OSD_OP_PUSH:
-	case CEPH_OSD_OP_BALANCEREADS:
-	case CEPH_OSD_OP_UNBALANCEREADS:
-	case CEPH_OSD_OP_SCRUB:
-	case CEPH_OSD_OP_SCRUB_RESERVE:
-	case CEPH_OSD_OP_SCRUB_UNRESERVE:
-	case CEPH_OSD_OP_SCRUB_STOP:
-	case CEPH_OSD_OP_SCRUB_MAP:
-	case CEPH_OSD_OP_WRLOCK:
-	case CEPH_OSD_OP_WRUNLOCK:
-	case CEPH_OSD_OP_RDLOCK:
-	case CEPH_OSD_OP_RDUNLOCK:
-	case CEPH_OSD_OP_UPLOCK:
-	case CEPH_OSD_OP_DNLOCK:
-	case CEPH_OSD_OP_CALL:
-	case CEPH_OSD_OP_PGLS:
-	case CEPH_OSD_OP_PGLS_FILTER:
-		return true;
+#define GENERATE_CASE(op, opcode, str)	case CEPH_OSD_OP_##op: return true;
+__CEPH_FORALL_OSD_OPS(GENERATE_CASE)
+#undef GENERATE_CASE
 	default:
 		return false;
 	}
-- 
cgit v1.2.3


From db404b13617fe0cdb415da55762203d456837912 Mon Sep 17 00:00:00 2001
From: Mark Rustad <mark.d.rustad@intel.com>
Date: Tue, 14 Oct 2014 06:28:38 -0700
Subject: genl_magic: Resolve logical-op warnings

Resolve "logical 'and' applied to non-boolean constant" warnings"
that appear in W=2 builds by adding !! to a bit test.

Signed-off-by: Mark Rustad <mark.d.rustad@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/genl_magic_func.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h
index c0894dd8827b..667c31101b8b 100644
--- a/include/linux/genl_magic_func.h
+++ b/include/linux/genl_magic_func.h
@@ -178,12 +178,12 @@ static int s_name ## _from_attrs_for_change(struct s_name *s,		\
 #define __assign(attr_nr, attr_flag, name, nla_type, type, assignment...)	\
 		nla = ntb[attr_nr];						\
 		if (nla) {						\
-			if (exclude_invariants && ((attr_flag) & DRBD_F_INVARIANT)) {		\
+			if (exclude_invariants && !!((attr_flag) & DRBD_F_INVARIANT)) {		\
 				pr_info("<< must not change invariant attr: %s\n", #name);	\
 				return -EEXIST;				\
 			}						\
 			assignment;					\
-		} else if (exclude_invariants && ((attr_flag) & DRBD_F_INVARIANT)) {		\
+		} else if (exclude_invariants && !!((attr_flag) & DRBD_F_INVARIANT)) {		\
 			/* attribute missing from payload, */		\
 			/* which was expected */			\
 		} else if ((attr_flag) & DRBD_F_REQUIRED) {		\
-- 
cgit v1.2.3


From 016c98c6fe0c914d12e2e242b2bccde6d6dea54b Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Tue, 14 Oct 2014 10:40:34 +1030
Subject: virtio: unify config_changed handling

Replace duplicated code in all transports with a single wrapper in
virtio.c.

The only functional change is in virtio_mmio.c: if a buggy device sends
us an interrupt before driver is set, we previously returned IRQ_NONE,
now we return IRQ_HANDLED.

As this must not happen in practice, this does not look like a big deal.

See also commit 3fff0179e33cd7d0a688dab65700c46ad089e934
	virtio-pci: do not oops on config change if driver not loaded.
for the original motivation behind the driver check.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/misc/mic/card/mic_virtio.c | 6 +-----
 drivers/s390/kvm/kvm_virtio.c      | 9 +--------
 drivers/s390/kvm/virtio_ccw.c      | 6 +-----
 drivers/virtio/virtio.c            | 9 +++++++++
 drivers/virtio/virtio_mmio.c       | 7 ++-----
 drivers/virtio/virtio_pci.c        | 6 +-----
 include/linux/virtio.h             | 2 ++
 7 files changed, 17 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/misc/mic/card/mic_virtio.c b/drivers/misc/mic/card/mic_virtio.c
index f14b60080c21..e64794730e21 100644
--- a/drivers/misc/mic/card/mic_virtio.c
+++ b/drivers/misc/mic/card/mic_virtio.c
@@ -462,16 +462,12 @@ static void mic_handle_config_change(struct mic_device_desc __iomem *d,
 	struct mic_device_ctrl __iomem *dc
 		= (void __iomem *)d + mic_aligned_desc_size(d);
 	struct mic_vdev *mvdev = (struct mic_vdev *)ioread64(&dc->vdev);
-	struct virtio_driver *drv;
 
 	if (ioread8(&dc->config_change) != MIC_VIRTIO_PARAM_CONFIG_CHANGED)
 		return;
 
 	dev_dbg(mdrv->dev, "%s %d\n", __func__, __LINE__);
-	drv = container_of(mvdev->vdev.dev.driver,
-				struct virtio_driver, driver);
-	if (drv->config_changed)
-		drv->config_changed(&mvdev->vdev);
+	virtio_config_changed(&mvdev->vdev);
 	iowrite8(1, &dc->guest_ack);
 }
 
diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c
index a1349653c6d9..643129070c51 100644
--- a/drivers/s390/kvm/kvm_virtio.c
+++ b/drivers/s390/kvm/kvm_virtio.c
@@ -406,15 +406,8 @@ static void kvm_extint_handler(struct ext_code ext_code,
 
 	switch (param) {
 	case VIRTIO_PARAM_CONFIG_CHANGED:
-	{
-		struct virtio_driver *drv;
-		drv = container_of(vq->vdev->dev.driver,
-				   struct virtio_driver, driver);
-		if (drv->config_changed)
-			drv->config_changed(vq->vdev);
-
+		virtio_config_changed(vq->vdev);
 		break;
-	}
 	case VIRTIO_PARAM_DEV_ADD:
 		schedule_work(&hotplug_work);
 		break;
diff --git a/drivers/s390/kvm/virtio_ccw.c b/drivers/s390/kvm/virtio_ccw.c
index d2c0b442bce5..6cbe6ef3c889 100644
--- a/drivers/s390/kvm/virtio_ccw.c
+++ b/drivers/s390/kvm/virtio_ccw.c
@@ -940,11 +940,7 @@ static void virtio_ccw_int_handler(struct ccw_device *cdev,
 		vring_interrupt(0, vq);
 	}
 	if (test_bit(0, &vcdev->indicators2)) {
-		drv = container_of(vcdev->vdev.dev.driver,
-				   struct virtio_driver, driver);
-
-		if (drv && drv->config_changed)
-			drv->config_changed(&vcdev->vdev);
+		virtio_config_changed(&vcdev->vdev);
 		clear_bit(0, &vcdev->indicators2);
 	}
 }
diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index fed0ce198ae3..3980687401f6 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -239,6 +239,15 @@ void unregister_virtio_device(struct virtio_device *dev)
 }
 EXPORT_SYMBOL_GPL(unregister_virtio_device);
 
+void virtio_config_changed(struct virtio_device *dev)
+{
+	struct virtio_driver *drv = drv_to_virtio(dev->dev.driver);
+
+	if (drv && drv->config_changed)
+		drv->config_changed(dev);
+}
+EXPORT_SYMBOL_GPL(virtio_config_changed);
+
 static int virtio_init(void)
 {
 	if (bus_register(&virtio_bus) != 0)
diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
index c600ccfd6922..ef9a1650bb80 100644
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -234,8 +234,6 @@ static irqreturn_t vm_interrupt(int irq, void *opaque)
 {
 	struct virtio_mmio_device *vm_dev = opaque;
 	struct virtio_mmio_vq_info *info;
-	struct virtio_driver *vdrv = container_of(vm_dev->vdev.dev.driver,
-			struct virtio_driver, driver);
 	unsigned long status;
 	unsigned long flags;
 	irqreturn_t ret = IRQ_NONE;
@@ -244,9 +242,8 @@ static irqreturn_t vm_interrupt(int irq, void *opaque)
 	status = readl(vm_dev->base + VIRTIO_MMIO_INTERRUPT_STATUS);
 	writel(status, vm_dev->base + VIRTIO_MMIO_INTERRUPT_ACK);
 
-	if (unlikely(status & VIRTIO_MMIO_INT_CONFIG)
-			&& vdrv && vdrv->config_changed) {
-		vdrv->config_changed(&vm_dev->vdev);
+	if (unlikely(status & VIRTIO_MMIO_INT_CONFIG)) {
+		virtio_config_changed(&vm_dev->vdev);
 		ret = IRQ_HANDLED;
 	}
 
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index add40d00dcdb..f39f4e772e6a 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -211,12 +211,8 @@ static bool vp_notify(struct virtqueue *vq)
 static irqreturn_t vp_config_changed(int irq, void *opaque)
 {
 	struct virtio_pci_device *vp_dev = opaque;
-	struct virtio_driver *drv;
-	drv = container_of(vp_dev->vdev.dev.driver,
-			   struct virtio_driver, driver);
 
-	if (drv && drv->config_changed)
-		drv->config_changed(&vp_dev->vdev);
+	virtio_config_changed(&vp_dev->vdev);
 	return IRQ_HANDLED;
 }
 
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index b46671e28de2..3c19bd3189cb 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -108,6 +108,8 @@ void unregister_virtio_device(struct virtio_device *dev);
 
 void virtio_break_device(struct virtio_device *dev);
 
+void virtio_config_changed(struct virtio_device *dev);
+
 /**
  * virtio_driver - operations for a virtio I/O driver
  * @driver: underlying device driver (populate name and owner).
-- 
cgit v1.2.3


From c6716bae52f97347e25166c6270aa98693d9212c Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Tue, 14 Oct 2014 10:40:35 +1030
Subject: virtio-pci: move freeze/restore to virtio core

This is in preparation to extending config changed event handling
in core.
Wrapping these in an API also seems to make for a cleaner code.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/virtio/virtio.c     | 54 +++++++++++++++++++++++++++++++++++++++++++++
 drivers/virtio/virtio_pci.c | 54 ++-------------------------------------------
 include/linux/virtio.h      |  6 +++++
 3 files changed, 62 insertions(+), 52 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index 3980687401f6..8216b7311092 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -248,6 +248,60 @@ void virtio_config_changed(struct virtio_device *dev)
 }
 EXPORT_SYMBOL_GPL(virtio_config_changed);
 
+#ifdef CONFIG_PM_SLEEP
+int virtio_device_freeze(struct virtio_device *dev)
+{
+	struct virtio_driver *drv = drv_to_virtio(dev->dev.driver);
+
+	dev->failed = dev->config->get_status(dev) & VIRTIO_CONFIG_S_FAILED;
+
+	if (drv && drv->freeze)
+		return drv->freeze(dev);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(virtio_device_freeze);
+
+int virtio_device_restore(struct virtio_device *dev)
+{
+	struct virtio_driver *drv = drv_to_virtio(dev->dev.driver);
+
+	/* We always start by resetting the device, in case a previous
+	 * driver messed it up. */
+	dev->config->reset(dev);
+
+	/* Acknowledge that we've seen the device. */
+	add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE);
+
+	/* Maybe driver failed before freeze.
+	 * Restore the failed status, for debugging. */
+	if (dev->failed)
+		add_status(dev, VIRTIO_CONFIG_S_FAILED);
+
+	if (!drv)
+		return 0;
+
+	/* We have a driver! */
+	add_status(dev, VIRTIO_CONFIG_S_DRIVER);
+
+	dev->config->finalize_features(dev);
+
+	if (drv->restore) {
+		int ret = drv->restore(dev);
+		if (ret) {
+			add_status(dev, VIRTIO_CONFIG_S_FAILED);
+			return ret;
+		}
+	}
+
+	/* Finally, tell the device we're all set */
+	add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(virtio_device_restore);
+#endif
+
 static int virtio_init(void)
 {
 	if (bus_register(&virtio_bus) != 0)
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index f39f4e772e6a..d34ebfa604f3 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -57,9 +57,6 @@ struct virtio_pci_device
 	/* Vectors allocated, excluding per-vq vectors if any */
 	unsigned msix_used_vectors;
 
-	/* Status saved during hibernate/restore */
-	u8 saved_status;
-
 	/* Whether we have vector per vq */
 	bool per_vq_vectors;
 };
@@ -764,16 +761,9 @@ static int virtio_pci_freeze(struct device *dev)
 {
 	struct pci_dev *pci_dev = to_pci_dev(dev);
 	struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
-	struct virtio_driver *drv;
 	int ret;
 
-	drv = container_of(vp_dev->vdev.dev.driver,
-			   struct virtio_driver, driver);
-
-	ret = 0;
-	vp_dev->saved_status = vp_get_status(&vp_dev->vdev);
-	if (drv && drv->freeze)
-		ret = drv->freeze(&vp_dev->vdev);
+	ret = virtio_device_freeze(&vp_dev->vdev);
 
 	if (!ret)
 		pci_disable_device(pci_dev);
@@ -784,54 +774,14 @@ static int virtio_pci_restore(struct device *dev)
 {
 	struct pci_dev *pci_dev = to_pci_dev(dev);
 	struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
-	struct virtio_driver *drv;
-	unsigned status = 0;
 	int ret;
 
-	drv = container_of(vp_dev->vdev.dev.driver,
-			   struct virtio_driver, driver);
-
 	ret = pci_enable_device(pci_dev);
 	if (ret)
 		return ret;
 
 	pci_set_master(pci_dev);
-	/* We always start by resetting the device, in case a previous
-	 * driver messed it up. */
-	vp_reset(&vp_dev->vdev);
-
-	/* Acknowledge that we've seen the device. */
-	status |= VIRTIO_CONFIG_S_ACKNOWLEDGE;
-	vp_set_status(&vp_dev->vdev, status);
-
-	/* Maybe driver failed before freeze.
-	 * Restore the failed status, for debugging. */
-	status |= vp_dev->saved_status & VIRTIO_CONFIG_S_FAILED;
-	vp_set_status(&vp_dev->vdev, status);
-
-	if (!drv)
-		return 0;
-
-	/* We have a driver! */
-	status |= VIRTIO_CONFIG_S_DRIVER;
-	vp_set_status(&vp_dev->vdev, status);
-
-	vp_finalize_features(&vp_dev->vdev);
-
-	if (drv->restore) {
-		ret = drv->restore(&vp_dev->vdev);
-		if (ret) {
-			status |= VIRTIO_CONFIG_S_FAILED;
-			vp_set_status(&vp_dev->vdev, status);
-			return ret;
-		}
-	}
-
-	/* Finally, tell the device we're all set */
-	status |= VIRTIO_CONFIG_S_DRIVER_OK;
-	vp_set_status(&vp_dev->vdev, status);
-
-	return ret;
+	return virtio_device_restore(&vp_dev->vdev);
 }
 
 static const struct dev_pm_ops virtio_pci_pm_ops = {
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 3c19bd3189cb..8df7ba81e5c7 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -78,6 +78,7 @@ bool virtqueue_is_broken(struct virtqueue *vq);
 /**
  * virtio_device - representation of a device using virtio
  * @index: unique position on the virtio bus
+ * @failed: saved value for CONFIG_S_FAILED bit (for restore)
  * @dev: underlying device.
  * @id: the device type identification (used to match it with a driver).
  * @config: the configuration ops for this device.
@@ -88,6 +89,7 @@ bool virtqueue_is_broken(struct virtqueue *vq);
  */
 struct virtio_device {
 	int index;
+	bool failed;
 	struct device dev;
 	struct virtio_device_id id;
 	const struct virtio_config_ops *config;
@@ -109,6 +111,10 @@ void unregister_virtio_device(struct virtio_device *dev);
 void virtio_break_device(struct virtio_device *dev);
 
 void virtio_config_changed(struct virtio_device *dev);
+#ifdef CONFIG_PM_SLEEP
+int virtio_device_freeze(struct virtio_device *dev);
+int virtio_device_restore(struct virtio_device *dev);
+#endif
 
 /**
  * virtio_driver - operations for a virtio I/O driver
-- 
cgit v1.2.3


From 22b7050a024d7deb0c9ef1e14ed73e3b1e369f24 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Wed, 15 Oct 2014 10:21:55 +1030
Subject: virtio: defer config changed notifications

Defer config changed notifications that arrive during
probe/scan/freeze/restore.

This will allow drivers to set DRIVER_OK earlier, without worrying about
racing with config change interrupts.

This change will also benefit old hypervisors (before 2009)
that send interrupts without checking DRIVER_OK: previously,
the callback could race with driver-specific initialization.

This will also help simplify drivers.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (cosmetic changes)
---
 drivers/virtio/virtio.c | 58 +++++++++++++++++++++++++++++++++++++++++--------
 include/linux/virtio.h  |  6 +++++
 2 files changed, 55 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index 8216b7311092..df598dd8c5c8 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -117,6 +117,43 @@ void virtio_check_driver_offered_feature(const struct virtio_device *vdev,
 }
 EXPORT_SYMBOL_GPL(virtio_check_driver_offered_feature);
 
+static void __virtio_config_changed(struct virtio_device *dev)
+{
+	struct virtio_driver *drv = drv_to_virtio(dev->dev.driver);
+
+	if (!dev->config_enabled)
+		dev->config_change_pending = true;
+	else if (drv && drv->config_changed)
+		drv->config_changed(dev);
+}
+
+void virtio_config_changed(struct virtio_device *dev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev->config_lock, flags);
+	__virtio_config_changed(dev);
+	spin_unlock_irqrestore(&dev->config_lock, flags);
+}
+EXPORT_SYMBOL_GPL(virtio_config_changed);
+
+static void virtio_config_disable(struct virtio_device *dev)
+{
+	spin_lock_irq(&dev->config_lock);
+	dev->config_enabled = false;
+	spin_unlock_irq(&dev->config_lock);
+}
+
+static void virtio_config_enable(struct virtio_device *dev)
+{
+	spin_lock_irq(&dev->config_lock);
+	dev->config_enabled = true;
+	if (dev->config_change_pending)
+		__virtio_config_changed(dev);
+	dev->config_change_pending = false;
+	spin_unlock_irq(&dev->config_lock);
+}
+
 static int virtio_dev_probe(struct device *_d)
 {
 	int err, i;
@@ -153,6 +190,8 @@ static int virtio_dev_probe(struct device *_d)
 		add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
 		if (drv->scan)
 			drv->scan(dev);
+
+		virtio_config_enable(dev);
 	}
 
 	return err;
@@ -163,6 +202,8 @@ static int virtio_dev_remove(struct device *_d)
 	struct virtio_device *dev = dev_to_virtio(_d);
 	struct virtio_driver *drv = drv_to_virtio(dev->dev.driver);
 
+	virtio_config_disable(dev);
+
 	drv->remove(dev);
 
 	/* Driver should have reset device. */
@@ -211,6 +252,10 @@ int register_virtio_device(struct virtio_device *dev)
 	dev->index = err;
 	dev_set_name(&dev->dev, "virtio%u", dev->index);
 
+	spin_lock_init(&dev->config_lock);
+	dev->config_enabled = false;
+	dev->config_change_pending = false;
+
 	/* We always start by resetting the device, in case a previous
 	 * driver messed it up.  This also tests that code path a little. */
 	dev->config->reset(dev);
@@ -239,20 +284,13 @@ void unregister_virtio_device(struct virtio_device *dev)
 }
 EXPORT_SYMBOL_GPL(unregister_virtio_device);
 
-void virtio_config_changed(struct virtio_device *dev)
-{
-	struct virtio_driver *drv = drv_to_virtio(dev->dev.driver);
-
-	if (drv && drv->config_changed)
-		drv->config_changed(dev);
-}
-EXPORT_SYMBOL_GPL(virtio_config_changed);
-
 #ifdef CONFIG_PM_SLEEP
 int virtio_device_freeze(struct virtio_device *dev)
 {
 	struct virtio_driver *drv = drv_to_virtio(dev->dev.driver);
 
+	virtio_config_disable(dev);
+
 	dev->failed = dev->config->get_status(dev) & VIRTIO_CONFIG_S_FAILED;
 
 	if (drv && drv->freeze)
@@ -297,6 +335,8 @@ int virtio_device_restore(struct virtio_device *dev)
 	/* Finally, tell the device we're all set */
 	add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
 
+	virtio_config_enable(dev);
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(virtio_device_restore);
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 8df7ba81e5c7..65261a7244fc 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -79,6 +79,9 @@ bool virtqueue_is_broken(struct virtqueue *vq);
  * virtio_device - representation of a device using virtio
  * @index: unique position on the virtio bus
  * @failed: saved value for CONFIG_S_FAILED bit (for restore)
+ * @config_enabled: configuration change reporting enabled
+ * @config_change_pending: configuration change reported while disabled
+ * @config_lock: protects configuration change reporting
  * @dev: underlying device.
  * @id: the device type identification (used to match it with a driver).
  * @config: the configuration ops for this device.
@@ -90,6 +93,9 @@ bool virtqueue_is_broken(struct virtqueue *vq);
 struct virtio_device {
 	int index;
 	bool failed;
+	bool config_enabled;
+	bool config_change_pending;
+	spinlock_t config_lock;
 	struct device dev;
 	struct virtio_device_id id;
 	const struct virtio_config_ops *config;
-- 
cgit v1.2.3


From 3569db593081fd88bbd6df21b9b0531873f2042c Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Wed, 15 Oct 2014 10:22:30 +1030
Subject: virtio: add API to enable VQs early

virtio spec 0.9.X requires DRIVER_OK to be set before
VQs are used, but some drivers use VQs before probe
function returns.
Since DRIVER_OK is set after probe, this violates the spec.

Even though under virtio 1.0 transitional devices support this
behaviour, we want to make it possible for those early callers to become
spec compliant and eventually support non-transitional devices.

Add API for drivers to call before using VQs.

Sets DRIVER_OK internally.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/virtio_config.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index e8f8f71e843c..7f4ef66873ef 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -109,6 +109,23 @@ struct virtqueue *virtio_find_single_vq(struct virtio_device *vdev,
 	return vq;
 }
 
+/**
+ * virtio_device_ready - enable vq use in probe function
+ * @vdev: the device
+ *
+ * Driver must call this to use vqs in the probe function.
+ *
+ * Note: vqs are enabled automatically after probe returns.
+ */
+static inline
+void virtio_device_ready(struct virtio_device *dev)
+{
+	unsigned status = dev->config->get_status(dev);
+
+	BUG_ON(status & VIRTIO_CONFIG_S_DRIVER_OK);
+	dev->config->set_status(dev, status | VIRTIO_CONFIG_S_DRIVER_OK);
+}
+
 static inline
 const char *virtio_bus_name(struct virtio_device *vdev)
 {
-- 
cgit v1.2.3


From 46e8c83c83c06b90ebc000df481c2fdcee79a141 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 23 Sep 2014 17:18:10 +0300
Subject: dmaengine: dw: move private definitions to regs.h

Since we don't allow user to set registers directly through private slave
configuration we may move definitions to the regs.h because they are not used
anywhere except core.c part.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/dw/regs.h                | 32 +++++++++++++++++++++++++++++---
 include/linux/platform_data/dma-dw.h | 31 -------------------------------
 2 files changed, 29 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/dma/dw/regs.h b/drivers/dma/dw/regs.h
index 00d27a9d9c27..e8f92b28ffc2 100644
--- a/drivers/dma/dw/regs.h
+++ b/drivers/dma/dw/regs.h
@@ -11,7 +11,6 @@
 
 #include <linux/interrupt.h>
 #include <linux/dmaengine.h>
-#include <linux/platform_data/dma-dw.h>
 
 #define DW_DMA_MAX_NR_CHANNELS	8
 #define DW_DMA_MAX_NR_REQUESTS	16
@@ -132,6 +131,18 @@ struct dw_dma_regs {
 /* Bitfields in DWC_PARAMS */
 #define DWC_PARAMS_MBLK_EN	11		/* multi block transfer */
 
+/* bursts size */
+enum dw_dma_msize {
+	DW_DMA_MSIZE_1,
+	DW_DMA_MSIZE_4,
+	DW_DMA_MSIZE_8,
+	DW_DMA_MSIZE_16,
+	DW_DMA_MSIZE_32,
+	DW_DMA_MSIZE_64,
+	DW_DMA_MSIZE_128,
+	DW_DMA_MSIZE_256,
+};
+
 /* Bitfields in CTL_LO */
 #define DWC_CTLL_INT_EN		(1 << 0)	/* irqs enabled? */
 #define DWC_CTLL_DST_WIDTH(n)	((n)<<1)	/* bytes per element */
@@ -161,20 +172,35 @@ struct dw_dma_regs {
 #define DWC_CTLH_DONE		0x00001000
 #define DWC_CTLH_BLOCK_TS_MASK	0x00000fff
 
-/* Bitfields in CFG_LO. Platform-configurable bits are in <linux/platform_data/dma-dw.h> */
+/* Bitfields in CFG_LO */
 #define DWC_CFGL_CH_PRIOR_MASK	(0x7 << 5)	/* priority mask */
 #define DWC_CFGL_CH_PRIOR(x)	((x) << 5)	/* priority */
 #define DWC_CFGL_CH_SUSP	(1 << 8)	/* pause xfer */
 #define DWC_CFGL_FIFO_EMPTY	(1 << 9)	/* pause xfer */
 #define DWC_CFGL_HS_DST		(1 << 10)	/* handshake w/dst */
 #define DWC_CFGL_HS_SRC		(1 << 11)	/* handshake w/src */
+#define DWC_CFGL_LOCK_CH_XFER	(0 << 12)	/* scope of LOCK_CH */
+#define DWC_CFGL_LOCK_CH_BLOCK	(1 << 12)
+#define DWC_CFGL_LOCK_CH_XACT	(2 << 12)
+#define DWC_CFGL_LOCK_BUS_XFER	(0 << 14)	/* scope of LOCK_BUS */
+#define DWC_CFGL_LOCK_BUS_BLOCK	(1 << 14)
+#define DWC_CFGL_LOCK_BUS_XACT	(2 << 14)
+#define DWC_CFGL_LOCK_CH	(1 << 15)	/* channel lockout */
+#define DWC_CFGL_LOCK_BUS	(1 << 16)	/* busmaster lockout */
+#define DWC_CFGL_HS_DST_POL	(1 << 18)	/* dst handshake active low */
+#define DWC_CFGL_HS_SRC_POL	(1 << 19)	/* src handshake active low */
 #define DWC_CFGL_MAX_BURST(x)	((x) << 20)
 #define DWC_CFGL_RELOAD_SAR	(1 << 30)
 #define DWC_CFGL_RELOAD_DAR	(1 << 31)
 
-/* Bitfields in CFG_HI. Platform-configurable bits are in <linux/platform_data/dma-dw.h> */
+/* Bitfields in CFG_HI */
+#define DWC_CFGH_FCMODE		(1 << 0)
+#define DWC_CFGH_FIFO_MODE	(1 << 1)
+#define DWC_CFGH_PROTCTL(x)	((x) << 2)
 #define DWC_CFGH_DS_UPD_EN	(1 << 5)
 #define DWC_CFGH_SS_UPD_EN	(1 << 6)
+#define DWC_CFGH_SRC_PER(x)	((x) << 7)
+#define DWC_CFGH_DST_PER(x)	((x) << 11)
 
 /* Bitfields in SGR */
 #define DWC_SGR_SGI(x)		((x) << 0)
diff --git a/include/linux/platform_data/dma-dw.h b/include/linux/platform_data/dma-dw.h
index bc411a1bf8e7..d0c97da66e22 100644
--- a/include/linux/platform_data/dma-dw.h
+++ b/include/linux/platform_data/dma-dw.h
@@ -56,37 +56,6 @@ struct dw_dma_platform_data {
 	unsigned char	data_width[4];
 };
 
-/* bursts size */
-enum dw_dma_msize {
-	DW_DMA_MSIZE_1,
-	DW_DMA_MSIZE_4,
-	DW_DMA_MSIZE_8,
-	DW_DMA_MSIZE_16,
-	DW_DMA_MSIZE_32,
-	DW_DMA_MSIZE_64,
-	DW_DMA_MSIZE_128,
-	DW_DMA_MSIZE_256,
-};
-
-/* Platform-configurable bits in CFG_HI */
-#define DWC_CFGH_FCMODE		(1 << 0)
-#define DWC_CFGH_FIFO_MODE	(1 << 1)
-#define DWC_CFGH_PROTCTL(x)	((x) << 2)
-#define DWC_CFGH_SRC_PER(x)	((x) << 7)
-#define DWC_CFGH_DST_PER(x)	((x) << 11)
-
-/* Platform-configurable bits in CFG_LO */
-#define DWC_CFGL_LOCK_CH_XFER	(0 << 12)	/* scope of LOCK_CH */
-#define DWC_CFGL_LOCK_CH_BLOCK	(1 << 12)
-#define DWC_CFGL_LOCK_CH_XACT	(2 << 12)
-#define DWC_CFGL_LOCK_BUS_XFER	(0 << 14)	/* scope of LOCK_BUS */
-#define DWC_CFGL_LOCK_BUS_BLOCK	(1 << 14)
-#define DWC_CFGL_LOCK_BUS_XACT	(2 << 14)
-#define DWC_CFGL_LOCK_CH	(1 << 15)	/* channel lockout */
-#define DWC_CFGL_LOCK_BUS	(1 << 16)	/* busmaster lockout */
-#define DWC_CFGL_HS_DST_POL	(1 << 18)	/* dst handshake active low */
-#define DWC_CFGL_HS_SRC_POL	(1 << 19)	/* src handshake active low */
-
 /* DMA API extensions */
 struct dw_cyclic_desc {
 	struct dw_desc	**desc;
-- 
cgit v1.2.3


From 3d588f83e4d6a5230d9094b97d38621cbaa9a972 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 23 Sep 2014 17:18:11 +0300
Subject: dmaengine: dw: split dma-dw.h to platform and private parts

The introduced include/linux/dma/dw.h is going to contain the private
extensions and structures which are shared for dw_dmac users in the kernel.
Meanwhile include/linux/platform_data/dma-dw.h keeps only platform related data
types and definitions.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/dw/internal.h            |  2 ++
 include/linux/dma/dw.h               | 37 ++++++++++++++++++++++++++++++++++++
 include/linux/platform_data/dma-dw.h | 27 ++++----------------------
 sound/atmel/abdac.c                  |  4 +++-
 sound/atmel/ac97c.c                  |  1 +
 5 files changed, 47 insertions(+), 24 deletions(-)
 create mode 100644 include/linux/dma/dw.h

(limited to 'include/linux')

diff --git a/drivers/dma/dw/internal.h b/drivers/dma/dw/internal.h
index 82258a167a0e..9a886e3c31d3 100644
--- a/drivers/dma/dw/internal.h
+++ b/drivers/dma/dw/internal.h
@@ -12,6 +12,8 @@
 #define _DW_DMAC_INTERNAL_H
 
 #include <linux/device.h>
+
+#include <linux/dma/dw.h>
 #include <linux/platform_data/dma-dw.h>
 
 #include "regs.h"
diff --git a/include/linux/dma/dw.h b/include/linux/dma/dw.h
new file mode 100644
index 000000000000..24756130eadb
--- /dev/null
+++ b/include/linux/dma/dw.h
@@ -0,0 +1,37 @@
+/*
+ * Driver for the Synopsys DesignWare DMA Controller
+ *
+ * Copyright (C) 2007 Atmel Corporation
+ * Copyright (C) 2010-2011 ST Microelectronics
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef _DMA_DW_H
+#define _DMA_DW_H
+
+#include <linux/dmaengine.h>
+
+/* DMA API extensions */
+struct dw_desc;
+
+struct dw_cyclic_desc {
+	struct dw_desc	**desc;
+	unsigned long	periods;
+	void		(*period_callback)(void *param);
+	void		*period_callback_param;
+};
+
+struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan,
+		dma_addr_t buf_addr, size_t buf_len, size_t period_len,
+		enum dma_transfer_direction direction);
+void dw_dma_cyclic_free(struct dma_chan *chan);
+int dw_dma_cyclic_start(struct dma_chan *chan);
+void dw_dma_cyclic_stop(struct dma_chan *chan);
+
+dma_addr_t dw_dma_get_src_addr(struct dma_chan *chan);
+
+dma_addr_t dw_dma_get_dst_addr(struct dma_chan *chan);
+
+#endif /* _DMA_DW_H */
diff --git a/include/linux/platform_data/dma-dw.h b/include/linux/platform_data/dma-dw.h
index d0c97da66e22..d8155c005242 100644
--- a/include/linux/platform_data/dma-dw.h
+++ b/include/linux/platform_data/dma-dw.h
@@ -8,10 +8,10 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-#ifndef DW_DMAC_H
-#define DW_DMAC_H
+#ifndef _PLATFORM_DATA_DMA_DW_H
+#define _PLATFORM_DATA_DMA_DW_H
 
-#include <linux/dmaengine.h>
+#include <linux/device.h>
 
 /**
  * struct dw_dma_slave - Controller-specific information about a slave
@@ -56,23 +56,4 @@ struct dw_dma_platform_data {
 	unsigned char	data_width[4];
 };
 
-/* DMA API extensions */
-struct dw_cyclic_desc {
-	struct dw_desc	**desc;
-	unsigned long	periods;
-	void		(*period_callback)(void *param);
-	void		*period_callback_param;
-};
-
-struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan,
-		dma_addr_t buf_addr, size_t buf_len, size_t period_len,
-		enum dma_transfer_direction direction);
-void dw_dma_cyclic_free(struct dma_chan *chan);
-int dw_dma_cyclic_start(struct dma_chan *chan);
-void dw_dma_cyclic_stop(struct dma_chan *chan);
-
-dma_addr_t dw_dma_get_src_addr(struct dma_chan *chan);
-
-dma_addr_t dw_dma_get_dst_addr(struct dma_chan *chan);
-
-#endif /* DW_DMAC_H */
+#endif /* _PLATFORM_DATA_DMA_DW_H */
diff --git a/sound/atmel/abdac.c b/sound/atmel/abdac.c
index 154a7c44e38d..31061e3521d4 100644
--- a/sound/atmel/abdac.c
+++ b/sound/atmel/abdac.c
@@ -9,7 +9,6 @@
  */
 #include <linux/clk.h>
 #include <linux/bitmap.h>
-#include <linux/platform_data/dma-dw.h>
 #include <linux/dmaengine.h>
 #include <linux/dma-mapping.h>
 #include <linux/init.h>
@@ -25,6 +24,9 @@
 #include <sound/pcm_params.h>
 #include <sound/atmel-abdac.h>
 
+#include <linux/platform_data/dma-dw.h>
+#include <linux/dma/dw.h>
+
 /* DAC register offsets */
 #define DAC_DATA                                0x0000
 #define DAC_CTRL                                0x0008
diff --git a/sound/atmel/ac97c.c b/sound/atmel/ac97c.c
index 1dfb35afef8f..b59427d5a697 100644
--- a/sound/atmel/ac97c.c
+++ b/sound/atmel/ac97c.c
@@ -32,6 +32,7 @@
 #include <sound/memalloc.h>
 
 #include <linux/platform_data/dma-dw.h>
+#include <linux/dma/dw.h>
 
 #include <mach/cpu.h>
 
-- 
cgit v1.2.3


From 2a52f6e49e5e400ed98a79503193d81207009647 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 23 Sep 2014 17:18:15 +0300
Subject: dmaengine: dw: export probe()/remove() and Co to users

The driver library functions can be used directly by the compound devices such
as ADSP or serial driver where DesignWare DMA IP is privately attached to the
main hardware.

Instead of creating a new platform device leaf they may call dw_dma_probe()
with given struct dw_dma_chip directly and make sure that the main device is
DMA capable.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/dw/internal.h | 29 +++--------------------------
 include/linux/dma/dw.h    | 27 +++++++++++++++++++++++++++
 2 files changed, 30 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/dma/dw/internal.h b/drivers/dma/dw/internal.h
index c55c3e0bf1fe..41439732ff6b 100644
--- a/drivers/dma/dw/internal.h
+++ b/drivers/dma/dw/internal.h
@@ -8,39 +8,16 @@
  * published by the Free Software Foundation.
  */
 
-#ifndef _DW_DMAC_INTERNAL_H
-#define _DW_DMAC_INTERNAL_H
-
-#include <linux/device.h>
+#ifndef _DMA_DW_INTERNAL_H
+#define _DMA_DW_INTERNAL_H
 
 #include <linux/dma/dw.h>
-#include <linux/platform_data/dma-dw.h>
 
 #include "regs.h"
 
-/**
- * struct dw_dma_chip - representation of DesignWare DMA controller hardware
- * @dev:		struct device of the DMA controller
- * @irq:		irq line
- * @regs:		memory mapped I/O space
- * @clk:		hclk clock
- * @dw:			struct dw_dma that is filed by dw_dma_probe()
- */
-struct dw_dma_chip {
-	struct device	*dev;
-	int		irq;
-	void __iomem	*regs;
-	struct clk	*clk;
-	struct dw_dma	*dw;
-};
-
-/* Export to the platform drivers */
-int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata);
-int dw_dma_remove(struct dw_dma_chip *chip);
-
 int dw_dma_disable(struct dw_dma_chip *chip);
 int dw_dma_enable(struct dw_dma_chip *chip);
 
 extern bool dw_dma_filter(struct dma_chan *chan, void *param);
 
-#endif /* _DW_DMAC_INTERNAL_H */
+#endif /* _DMA_DW_INTERNAL_H */
diff --git a/include/linux/dma/dw.h b/include/linux/dma/dw.h
index 24756130eadb..71456442ebe3 100644
--- a/include/linux/dma/dw.h
+++ b/include/linux/dma/dw.h
@@ -3,6 +3,7 @@
  *
  * Copyright (C) 2007 Atmel Corporation
  * Copyright (C) 2010-2011 ST Microelectronics
+ * Copyright (C) 2014 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -11,8 +12,34 @@
 #ifndef _DMA_DW_H
 #define _DMA_DW_H
 
+#include <linux/clk.h>
+#include <linux/device.h>
 #include <linux/dmaengine.h>
 
+#include <linux/platform_data/dma-dw.h>
+
+struct dw_dma;
+
+/**
+ * struct dw_dma_chip - representation of DesignWare DMA controller hardware
+ * @dev:		struct device of the DMA controller
+ * @irq:		irq line
+ * @regs:		memory mapped I/O space
+ * @clk:		hclk clock
+ * @dw:			struct dw_dma that is filed by dw_dma_probe()
+ */
+struct dw_dma_chip {
+	struct device	*dev;
+	int		irq;
+	void __iomem	*regs;
+	struct clk	*clk;
+	struct dw_dma	*dw;
+};
+
+/* Export to the platform drivers */
+int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata);
+int dw_dma_remove(struct dw_dma_chip *chip);
+
 /* DMA API extensions */
 struct dw_desc;
 
-- 
cgit v1.2.3


From b65612a868768cd0431084ccf376d0946c12132d Mon Sep 17 00:00:00 2001
From: Vinod Koul <vinod.koul@intel.com>
Date: Sat, 11 Oct 2014 21:16:43 +0530
Subject: dmaengine: add dmaengine_prep_dma_sg() helper

This was only prep API which didnt have an helper

Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 include/linux/dmaengine.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 1f9e642c66ad..7e6b3a281da8 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -755,6 +755,16 @@ static inline struct dma_async_tx_descriptor *dmaengine_prep_interleaved_dma(
 	return chan->device->device_prep_interleaved_dma(chan, xt, flags);
 }
 
+static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_sg(
+		struct dma_chan *chan,
+		struct scatterlist *dst_sg, unsigned int dst_nents,
+		struct scatterlist *src_sg, unsigned int src_nents,
+		unsigned long flags)
+{
+	return chan->device->device_prep_dma_sg(chan, dst_sg, dst_nents,
+			src_sg, src_nents, flags);
+}
+
 static inline int dma_get_slave_caps(struct dma_chan *chan, struct dma_slave_caps *caps)
 {
 	if (!chan || !caps)
-- 
cgit v1.2.3


From 0a5642be03293f73706961a7649ac1d12bd0be59 Mon Sep 17 00:00:00 2001
From: Vinod Koul <vinod.koul@intel.com>
Date: Sat, 11 Oct 2014 21:16:44 +0530
Subject: dmaengine: freescale: add and export fsl_dma_external_start()

The freescale driver uses custom device control FSLDMA_EXTERNAL_START to
put the controller in external start mode.
Since we are planning to deprecate the device control, move this to exported
API. Subsequent patches will remove the FSLDMA_EXTERNAL_START

Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/fsldma.c   | 16 +++++++++++++++-
 include/linux/fsldma.h | 13 +++++++++++++
 2 files changed, 28 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/fsldma.h

(limited to 'include/linux')

diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index d5d6885ab341..0cded86f946c 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -36,7 +36,7 @@
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
 #include <linux/of_platform.h>
-
+#include <linux/fsldma.h>
 #include "dmaengine.h"
 #include "fsldma.h"
 
@@ -367,6 +367,20 @@ static void fsl_chan_toggle_ext_start(struct fsldma_chan *chan, int enable)
 		chan->feature &= ~FSL_DMA_CHAN_START_EXT;
 }
 
+int fsl_dma_external_start(struct dma_chan *dchan, int enable)
+{
+	struct fsldma_chan *chan;
+
+	if (!dchan)
+		return -EINVAL;
+
+	chan = to_fsl_chan(dchan);
+
+	fsl_chan_toggle_ext_start(chan, enable);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(fsl_dma_external_start);
+
 static void append_ld_queue(struct fsldma_chan *chan, struct fsl_desc_sw *desc)
 {
 	struct fsl_desc_sw *tail = to_fsl_desc(chan->ld_pending.prev);
diff --git a/include/linux/fsldma.h b/include/linux/fsldma.h
new file mode 100644
index 000000000000..b213c02963c9
--- /dev/null
+++ b/include/linux/fsldma.h
@@ -0,0 +1,13 @@
+/*
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef FSL_DMA_H
+#define FSL_DMA_H
+/* fsl dma API for enxternal start */
+int fsl_dma_external_start(struct dma_chan *dchan, int enable);
+
+#endif
-- 
cgit v1.2.3


From b80719b6bd083130c112cb4d3e5329a164eef4c3 Mon Sep 17 00:00:00 2001
From: Vinod Koul <vinod.koul@intel.com>
Date: Sat, 11 Oct 2014 21:16:48 +0530
Subject: dmaengine: remove FSLDMA_EXTERNAL_START

as users have been converted, so no need of this custom method

Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 include/linux/dmaengine.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 7e6b3a281da8..f8e5a9ea461a 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -199,15 +199,12 @@ enum dma_ctrl_flags {
  * configuration data in statically from the platform). An additional
  * argument of struct dma_slave_config must be passed in with this
  * command.
- * @FSLDMA_EXTERNAL_START: this command will put the Freescale DMA controller
- * into external start mode.
  */
 enum dma_ctrl_cmd {
 	DMA_TERMINATE_ALL,
 	DMA_PAUSE,
 	DMA_RESUME,
 	DMA_SLAVE_CONFIG,
-	FSLDMA_EXTERNAL_START,
 };
 
 /**
-- 
cgit v1.2.3


From 04ffcb255f22a2a988ce7393e6e72f6eb3fcb7aa Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Tue, 14 Oct 2014 15:19:06 -0700
Subject: net: Add ndo_gso_check

Add ndo_gso_check which a device can define to indicate whether is
is capable of doing GSO on a packet. This funciton would be called from
the stack to determine whether software GSO is needed to be done. A
driver should populate this function if it advertises GSO types for
which there are combinations that it wouldn't be able to handle. For
instance a device that performs UDP tunneling might only implement
support for transparent Ethernet bridging type of inner packets
or might have limitations on lengths of inner headers.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macvtap.c      |  2 +-
 drivers/net/xen-netfront.c |  2 +-
 include/linux/netdevice.h  | 12 +++++++++++-
 net/core/dev.c             |  2 +-
 4 files changed, 14 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index 0c6adaaf898c..65e2892342bd 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -298,7 +298,7 @@ static rx_handler_result_t macvtap_handle_frame(struct sk_buff **pskb)
 	 */
 	if (q->flags & IFF_VNET_HDR)
 		features |= vlan->tap_features;
-	if (netif_needs_gso(skb, features)) {
+	if (netif_needs_gso(dev, skb, features)) {
 		struct sk_buff *segs = __skb_gso_segment(skb, features, false);
 
 		if (IS_ERR(segs))
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index ca82f545ec2c..3c0b37574d05 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -638,7 +638,7 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	if (unlikely(!netif_carrier_ok(dev) ||
 		     (slots > 1 && !xennet_can_sg(dev)) ||
-		     netif_needs_gso(skb, netif_skb_features(skb)))) {
+		     netif_needs_gso(dev, skb, netif_skb_features(skb)))) {
 		spin_unlock_irqrestore(&queue->tx_lock, flags);
 		goto drop;
 	}
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 838407aea705..74fd5d37f15a 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -998,6 +998,12 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
  *	Callback to use for xmit over the accelerated station. This
  *	is used in place of ndo_start_xmit on accelerated net
  *	devices.
+ * bool	(*ndo_gso_check) (struct sk_buff *skb,
+ *			  struct net_device *dev);
+ *	Called by core transmit path to determine if device is capable of
+ *	performing GSO on a packet. The device returns true if it is
+ *	able to GSO the packet, false otherwise. If the return value is
+ *	false the stack will do software GSO.
  */
 struct net_device_ops {
 	int			(*ndo_init)(struct net_device *dev);
@@ -1147,6 +1153,8 @@ struct net_device_ops {
 							struct net_device *dev,
 							void *priv);
 	int			(*ndo_get_lock_subclass)(struct net_device *dev);
+	bool			(*ndo_gso_check) (struct sk_buff *skb,
+						  struct net_device *dev);
 };
 
 /**
@@ -3572,10 +3580,12 @@ static inline bool skb_gso_ok(struct sk_buff *skb, netdev_features_t features)
 	       (!skb_has_frag_list(skb) || (features & NETIF_F_FRAGLIST));
 }
 
-static inline bool netif_needs_gso(struct sk_buff *skb,
+static inline bool netif_needs_gso(struct net_device *dev, struct sk_buff *skb,
 				   netdev_features_t features)
 {
 	return skb_is_gso(skb) && (!skb_gso_ok(skb, features) ||
+		(dev->netdev_ops->ndo_gso_check &&
+		 !dev->netdev_ops->ndo_gso_check(skb, dev)) ||
 		unlikely((skb->ip_summed != CHECKSUM_PARTIAL) &&
 			 (skb->ip_summed != CHECKSUM_UNNECESSARY)));
 }
diff --git a/net/core/dev.c b/net/core/dev.c
index 4699dcfdc4ab..9f77a78c6b1c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2675,7 +2675,7 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device
 	if (skb->encapsulation)
 		features &= dev->hw_enc_features;
 
-	if (netif_needs_gso(skb, features)) {
+	if (netif_needs_gso(dev, skb, features)) {
 		struct sk_buff *segs;
 
 		segs = skb_gso_segment(skb, features);
-- 
cgit v1.2.3


From d4c5efdb97773f59a2b711754ca0953f24516739 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Tue, 26 Aug 2014 23:16:35 -0400
Subject: random: add and use memzero_explicit() for clearing data

zatimend has reported that in his environment (3.16/gcc4.8.3/corei7)
memset() calls which clear out sensitive data in extract_{buf,entropy,
entropy_user}() in random driver are being optimized away by gcc.

Add a helper memzero_explicit() (similarly as explicit_bzero() variants)
that can be used in such cases where a variable with sensitive data is
being cleared out in the end. Other use cases might also be in crypto
code. [ I have put this into lib/string.c though, as it's always built-in
and doesn't need any dependencies then. ]

Fixes kernel bugzilla: 82041

Reported-by: zatimend@hotmail.co.uk
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@vger.kernel.org
---
 drivers/char/random.c  |  8 ++++----
 include/linux/string.h |  5 +++--
 lib/string.c           | 16 ++++++++++++++++
 3 files changed, 23 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/random.c b/drivers/char/random.c
index c18d41db83d8..8c86a95203a0 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1106,7 +1106,7 @@ static void extract_buf(struct entropy_store *r, __u8 *out)
 	__mix_pool_bytes(r, hash.w, sizeof(hash.w));
 	spin_unlock_irqrestore(&r->lock, flags);
 
-	memset(workspace, 0, sizeof(workspace));
+	memzero_explicit(workspace, sizeof(workspace));
 
 	/*
 	 * In case the hash function has some recognizable output
@@ -1118,7 +1118,7 @@ static void extract_buf(struct entropy_store *r, __u8 *out)
 	hash.w[2] ^= rol32(hash.w[2], 16);
 
 	memcpy(out, &hash, EXTRACT_SIZE);
-	memset(&hash, 0, sizeof(hash));
+	memzero_explicit(&hash, sizeof(hash));
 }
 
 /*
@@ -1175,7 +1175,7 @@ static ssize_t extract_entropy(struct entropy_store *r, void *buf,
 	}
 
 	/* Wipe data just returned from memory */
-	memset(tmp, 0, sizeof(tmp));
+	memzero_explicit(tmp, sizeof(tmp));
 
 	return ret;
 }
@@ -1218,7 +1218,7 @@ static ssize_t extract_entropy_user(struct entropy_store *r, void __user *buf,
 	}
 
 	/* Wipe data just returned from memory */
-	memset(tmp, 0, sizeof(tmp));
+	memzero_explicit(tmp, sizeof(tmp));
 
 	return ret;
 }
diff --git a/include/linux/string.h b/include/linux/string.h
index d36977e029af..3b42b3732da6 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -132,7 +132,7 @@ int bprintf(u32 *bin_buf, size_t size, const char *fmt, ...) __printf(3, 4);
 #endif
 
 extern ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos,
-			const void *from, size_t available);
+				       const void *from, size_t available);
 
 /**
  * strstarts - does @str start with @prefix?
@@ -144,7 +144,8 @@ static inline bool strstarts(const char *str, const char *prefix)
 	return strncmp(str, prefix, strlen(prefix)) == 0;
 }
 
-extern size_t memweight(const void *ptr, size_t bytes);
+size_t memweight(const void *ptr, size_t bytes);
+void memzero_explicit(void *s, size_t count);
 
 /**
  * kbasename - return the last part of a pathname.
diff --git a/lib/string.c b/lib/string.c
index 992bf30af759..3a3120452a1d 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -604,6 +604,22 @@ void *memset(void *s, int c, size_t count)
 EXPORT_SYMBOL(memset);
 #endif
 
+/**
+ * memzero_explicit - Fill a region of memory (e.g. sensitive
+ *		      keying data) with 0s.
+ * @s: Pointer to the start of the area.
+ * @count: The size of the area.
+ *
+ * memzero_explicit() doesn't need an arch-specific version as
+ * it just invokes the one of memset() implicitly.
+ */
+void memzero_explicit(void *s, size_t count)
+{
+	memset(s, 0, count);
+	OPTIMIZER_HIDE_VAR(s);
+}
+EXPORT_SYMBOL(memzero_explicit);
+
 #ifndef __HAVE_ARCH_MEMCPY
 /**
  * memcpy - Copy one area of memory to another
-- 
cgit v1.2.3


From 70f3ce0510afdad7cbaf27ab7ab961377205c782 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Mon, 29 Sep 2014 11:47:54 +0200
Subject: mtd: spi-nor: make spi_nor_scan() take a chip type name, not
 spi_device_id
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Drivers currently call spi_nor_match_id() and then spi_nor_scan().
This adds a dependency on struct spi_device_id which we want to
avoid.  Make spi_nor_scan() do it for them.

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/devices/m25p80.c      |  4 +---
 drivers/mtd/spi-nor/fsl-quadspi.c |  7 +------
 drivers/mtd/spi-nor/spi-nor.c     | 13 +++++++++----
 include/linux/mtd/spi-nor.h       | 20 +++-----------------
 4 files changed, 14 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c
index 822209d10689..bd5e4c6edfd4 100644
--- a/drivers/mtd/devices/m25p80.c
+++ b/drivers/mtd/devices/m25p80.c
@@ -193,7 +193,6 @@ static int m25p_probe(struct spi_device *spi)
 {
 	struct mtd_part_parser_data	ppdata;
 	struct flash_platform_data	*data;
-	const struct spi_device_id *id = NULL;
 	struct m25p *flash;
 	struct spi_nor *nor;
 	enum read_mode mode = SPI_NOR_NORMAL;
@@ -241,8 +240,7 @@ static int m25p_probe(struct spi_device *spi)
 	else
 		flash_name = spi->modalias;
 
-	id = spi_nor_match_id(flash_name);
-	ret = spi_nor_scan(nor, id, mode);
+	ret = spi_nor_scan(nor, flash_name, mode);
 	if (ret)
 		return ret;
 
diff --git a/drivers/mtd/spi-nor/fsl-quadspi.c b/drivers/mtd/spi-nor/fsl-quadspi.c
index 8d659a2888d5..d5269a26c839 100644
--- a/drivers/mtd/spi-nor/fsl-quadspi.c
+++ b/drivers/mtd/spi-nor/fsl-quadspi.c
@@ -881,7 +881,6 @@ static int fsl_qspi_probe(struct platform_device *pdev)
 
 	/* iterate the subnodes. */
 	for_each_available_child_of_node(dev->of_node, np) {
-		const struct spi_device_id *id;
 		char modalias[40];
 
 		/* skip the holes */
@@ -909,10 +908,6 @@ static int fsl_qspi_probe(struct platform_device *pdev)
 		if (of_modalias_node(np, modalias, sizeof(modalias)) < 0)
 			goto map_failed;
 
-		id = spi_nor_match_id(modalias);
-		if (!id)
-			goto map_failed;
-
 		ret = of_property_read_u32(np, "spi-max-frequency",
 				&q->clk_rate);
 		if (ret < 0)
@@ -921,7 +916,7 @@ static int fsl_qspi_probe(struct platform_device *pdev)
 		/* set the chip address for READID */
 		fsl_qspi_set_base_addr(q, nor);
 
-		ret = spi_nor_scan(nor, id, SPI_NOR_QUAD);
+		ret = spi_nor_scan(nor, modalias, SPI_NOR_QUAD);
 		if (ret)
 			goto map_failed;
 
diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index ae16aa2f6885..5c8e39977bc5 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -28,6 +28,8 @@
 
 #define JEDEC_MFR(_jedec_id)	((_jedec_id) >> 16)
 
+static const struct spi_device_id *spi_nor_match_id(const char *name);
+
 /*
  * Read the status register, returning its value in the location
  * Return the status register value.
@@ -911,9 +913,9 @@ static int spi_nor_check(struct spi_nor *nor)
 	return 0;
 }
 
-int spi_nor_scan(struct spi_nor *nor, const struct spi_device_id *id,
-			enum read_mode mode)
+int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode)
 {
+	const struct spi_device_id	*id = NULL;
 	struct flash_info		*info;
 	struct device *dev = nor->dev;
 	struct mtd_info *mtd = nor->mtd;
@@ -925,6 +927,10 @@ int spi_nor_scan(struct spi_nor *nor, const struct spi_device_id *id,
 	if (ret)
 		return ret;
 
+	id = spi_nor_match_id(name);
+	if (!id)
+		return -ENOENT;
+
 	info = (void *)id->driver_data;
 
 	if (info->jedec_id) {
@@ -1113,7 +1119,7 @@ int spi_nor_scan(struct spi_nor *nor, const struct spi_device_id *id,
 }
 EXPORT_SYMBOL_GPL(spi_nor_scan);
 
-const struct spi_device_id *spi_nor_match_id(char *name)
+static const struct spi_device_id *spi_nor_match_id(const char *name)
 {
 	const struct spi_device_id *id = spi_nor_ids;
 
@@ -1124,7 +1130,6 @@ const struct spi_device_id *spi_nor_match_id(char *name)
 	}
 	return NULL;
 }
-EXPORT_SYMBOL_GPL(spi_nor_match_id);
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Huang Shijie <shijie8@gmail.com>");
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index 9e6294f32ba8..a5a7a086748d 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -187,32 +187,18 @@ struct spi_nor {
 /**
  * spi_nor_scan() - scan the SPI NOR
  * @nor:	the spi_nor structure
- * @id:		the spi_device_id provided by the driver
+ * @name:	the chip type name
  * @mode:	the read mode supported by the driver
  *
  * The drivers can use this fuction to scan the SPI NOR.
  * In the scanning, it will try to get all the necessary information to
  * fill the mtd_info{} and the spi_nor{}.
  *
- * The board may assigns a spi_device_id with @id which be used to compared with
- * the spi_device_id detected by the scanning.
+ * The chip type name can be provided through the @name parameter.
  *
  * Return: 0 for success, others for failure.
  */
-int spi_nor_scan(struct spi_nor *nor, const struct spi_device_id *id,
-			enum read_mode mode);
+int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode);
 extern const struct spi_device_id spi_nor_ids[];
 
-/**
- * spi_nor_match_id() - find the spi_device_id by the name
- * @name:	the name of the spi_device_id
- *
- * The drivers use this function to find the spi_device_id
- * specified by the @name.
- *
- * Return: returns the right spi_device_id pointer on success,
- *         and returns NULL on failure.
- */
-const struct spi_device_id *spi_nor_match_id(char *name);
-
 #endif
-- 
cgit v1.2.3


From aa281ac631008b9c18c405c8880007789f659c7d Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <ooo@electrozaur.com>
Date: Sun, 19 Oct 2014 19:38:58 +0300
Subject: Boaz Harrosh - Fix broken email address

I no longer have access to the Panasas email.
So change to an email that can always reach me.

Signed-off-by: Boaz Harrosh <ooo@electrozaur.com>
---
 drivers/scsi/osd/Kbuild             | 2 +-
 drivers/scsi/osd/Kconfig            | 2 +-
 drivers/scsi/osd/osd_debug.h        | 2 +-
 drivers/scsi/osd/osd_initiator.c    | 4 ++--
 drivers/scsi/osd/osd_uld.c          | 4 ++--
 fs/exofs/Kbuild                     | 2 +-
 fs/exofs/common.h                   | 2 +-
 fs/exofs/dir.c                      | 2 +-
 fs/exofs/exofs.h                    | 2 +-
 fs/exofs/file.c                     | 2 +-
 fs/exofs/inode.c                    | 2 +-
 fs/exofs/namei.c                    | 2 +-
 fs/exofs/ore.c                      | 4 ++--
 fs/exofs/ore_raid.c                 | 2 +-
 fs/exofs/ore_raid.h                 | 2 +-
 fs/exofs/super.c                    | 2 +-
 fs/exofs/symlink.c                  | 2 +-
 fs/exofs/sys.c                      | 2 +-
 fs/nfs/objlayout/objio_osd.c        | 2 +-
 fs/nfs/objlayout/objlayout.c        | 2 +-
 fs/nfs/objlayout/objlayout.h        | 2 +-
 fs/nfs/objlayout/pnfs_osd_xdr_cli.c | 2 +-
 include/linux/pnfs_osd_xdr.h        | 2 +-
 include/scsi/osd_initiator.h        | 2 +-
 include/scsi/osd_ore.h              | 2 +-
 include/scsi/osd_protocol.h         | 4 ++--
 include/scsi/osd_sec.h              | 2 +-
 include/scsi/osd_sense.h            | 2 +-
 include/scsi/osd_types.h            | 2 +-
 29 files changed, 33 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/scsi/osd/Kbuild b/drivers/scsi/osd/Kbuild
index 5fd73d77c3af..58cecd45b0f5 100644
--- a/drivers/scsi/osd/Kbuild
+++ b/drivers/scsi/osd/Kbuild
@@ -4,7 +4,7 @@
 # Copyright (C) 2008 Panasas Inc.  All rights reserved.
 #
 # Authors:
-#   Boaz Harrosh <bharrosh@panasas.com>
+#   Boaz Harrosh <ooo@electrozaur.com>
 #   Benny Halevy <bhalevy@panasas.com>
 #
 # This program is free software; you can redistribute it and/or modify
diff --git a/drivers/scsi/osd/Kconfig b/drivers/scsi/osd/Kconfig
index a0703514eb0f..347cc5e33749 100644
--- a/drivers/scsi/osd/Kconfig
+++ b/drivers/scsi/osd/Kconfig
@@ -4,7 +4,7 @@
 # Copyright (C) 2008 Panasas Inc.  All rights reserved.
 #
 # Authors:
-#   Boaz Harrosh <bharrosh@panasas.com>
+#   Boaz Harrosh <ooo@electrozaur.com>
 #   Benny Halevy <bhalevy@panasas.com>
 #
 # This program is free software; you can redistribute it and/or modify
diff --git a/drivers/scsi/osd/osd_debug.h b/drivers/scsi/osd/osd_debug.h
index 579e491f11df..26341261bb5c 100644
--- a/drivers/scsi/osd/osd_debug.h
+++ b/drivers/scsi/osd/osd_debug.h
@@ -4,7 +4,7 @@
  * Copyright (C) 2008 Panasas Inc.  All rights reserved.
  *
  * Authors:
- *   Boaz Harrosh <bharrosh@panasas.com>
+ *   Boaz Harrosh <ooo@electrozaur.com>
  *   Benny Halevy <bhalevy@panasas.com>
  *
  * This program is free software; you can redistribute it and/or modify
diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c
index 5f4cbf0c4759..52e243b77000 100644
--- a/drivers/scsi/osd/osd_initiator.c
+++ b/drivers/scsi/osd/osd_initiator.c
@@ -7,7 +7,7 @@
  * Copyright (C) 2008 Panasas Inc.  All rights reserved.
  *
  * Authors:
- *   Boaz Harrosh <bharrosh@panasas.com>
+ *   Boaz Harrosh <ooo@electrozaur.com>
  *   Benny Halevy <bhalevy@panasas.com>
  *
  * This program is free software; you can redistribute it and/or modify
@@ -57,7 +57,7 @@
 
 enum { OSD_REQ_RETRIES = 1 };
 
-MODULE_AUTHOR("Boaz Harrosh <bharrosh@panasas.com>");
+MODULE_AUTHOR("Boaz Harrosh <ooo@electrozaur.com>");
 MODULE_DESCRIPTION("open-osd initiator library libosd.ko");
 MODULE_LICENSE("GPL");
 
diff --git a/drivers/scsi/osd/osd_uld.c b/drivers/scsi/osd/osd_uld.c
index e1d9a4c4c4b3..92cdd4b06526 100644
--- a/drivers/scsi/osd/osd_uld.c
+++ b/drivers/scsi/osd/osd_uld.c
@@ -10,7 +10,7 @@
  * Copyright (C) 2008 Panasas Inc.  All rights reserved.
  *
  * Authors:
- *   Boaz Harrosh <bharrosh@panasas.com>
+ *   Boaz Harrosh <ooo@electrozaur.com>
  *   Benny Halevy <bhalevy@panasas.com>
  *
  * This program is free software; you can redistribute it and/or modify
@@ -74,7 +74,7 @@
 static const char osd_name[] = "osd";
 static const char *osd_version_string = "open-osd 0.2.1";
 
-MODULE_AUTHOR("Boaz Harrosh <bharrosh@panasas.com>");
+MODULE_AUTHOR("Boaz Harrosh <ooo@electrozaur.com>");
 MODULE_DESCRIPTION("open-osd Upper-Layer-Driver osd.ko");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS_CHARDEV_MAJOR(SCSI_OSD_MAJOR);
diff --git a/fs/exofs/Kbuild b/fs/exofs/Kbuild
index 389ba8312d5d..b47c7b8dc275 100644
--- a/fs/exofs/Kbuild
+++ b/fs/exofs/Kbuild
@@ -4,7 +4,7 @@
 # Copyright (C) 2008 Panasas Inc.  All rights reserved.
 #
 # Authors:
-#   Boaz Harrosh <bharrosh@panasas.com>
+#   Boaz Harrosh <ooo@electrozaur.com>
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2
diff --git a/fs/exofs/common.h b/fs/exofs/common.h
index 3bbd46956d77..7d88ef566213 100644
--- a/fs/exofs/common.h
+++ b/fs/exofs/common.h
@@ -4,7 +4,7 @@
  * Copyright (C) 2005, 2006
  * Avishay Traeger (avishay@gmail.com)
  * Copyright (C) 2008, 2009
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
  *
  * Copyrights for code taken from ext2:
  *     Copyright (C) 1992, 1993, 1994, 1995
diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c
index 49f51ab4caac..d7defd557601 100644
--- a/fs/exofs/dir.c
+++ b/fs/exofs/dir.c
@@ -2,7 +2,7 @@
  * Copyright (C) 2005, 2006
  * Avishay Traeger (avishay@gmail.com)
  * Copyright (C) 2008, 2009
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
  *
  * Copyrights for code taken from ext2:
  *     Copyright (C) 1992, 1993, 1994, 1995
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h
index fffe86fd7a42..ad9cac670a47 100644
--- a/fs/exofs/exofs.h
+++ b/fs/exofs/exofs.h
@@ -2,7 +2,7 @@
  * Copyright (C) 2005, 2006
  * Avishay Traeger (avishay@gmail.com)
  * Copyright (C) 2008, 2009
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
  *
  * Copyrights for code taken from ext2:
  *     Copyright (C) 1992, 1993, 1994, 1995
diff --git a/fs/exofs/file.c b/fs/exofs/file.c
index 71bf8e4fb5d4..1a376b42d305 100644
--- a/fs/exofs/file.c
+++ b/fs/exofs/file.c
@@ -2,7 +2,7 @@
  * Copyright (C) 2005, 2006
  * Avishay Traeger (avishay@gmail.com)
  * Copyright (C) 2008, 2009
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
  *
  * Copyrights for code taken from ext2:
  *     Copyright (C) 1992, 1993, 1994, 1995
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 3f9cafd73931..f1d3d4eb8c4f 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -2,7 +2,7 @@
  * Copyright (C) 2005, 2006
  * Avishay Traeger (avishay@gmail.com)
  * Copyright (C) 2008, 2009
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
  *
  * Copyrights for code taken from ext2:
  *     Copyright (C) 1992, 1993, 1994, 1995
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c
index 4731fd991efe..28907460e8fa 100644
--- a/fs/exofs/namei.c
+++ b/fs/exofs/namei.c
@@ -2,7 +2,7 @@
  * Copyright (C) 2005, 2006
  * Avishay Traeger (avishay@gmail.com)
  * Copyright (C) 2008, 2009
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
  *
  * Copyrights for code taken from ext2:
  *     Copyright (C) 1992, 1993, 1994, 1995
diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c
index cfc0205d62c4..7bd8ac8dfb28 100644
--- a/fs/exofs/ore.c
+++ b/fs/exofs/ore.c
@@ -2,7 +2,7 @@
  * Copyright (C) 2005, 2006
  * Avishay Traeger (avishay@gmail.com)
  * Copyright (C) 2008, 2009
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
  *
  * This file is part of exofs.
  *
@@ -29,7 +29,7 @@
 
 #include "ore_raid.h"
 
-MODULE_AUTHOR("Boaz Harrosh <bharrosh@panasas.com>");
+MODULE_AUTHOR("Boaz Harrosh <ooo@electrozaur.com>");
 MODULE_DESCRIPTION("Objects Raid Engine ore.ko");
 MODULE_LICENSE("GPL");
 
diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c
index 84529b8a331b..27cbdb697649 100644
--- a/fs/exofs/ore_raid.c
+++ b/fs/exofs/ore_raid.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) 2011
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
  *
  * This file is part of the objects raid engine (ore).
  *
diff --git a/fs/exofs/ore_raid.h b/fs/exofs/ore_raid.h
index cf6375d82129..a6e746775570 100644
--- a/fs/exofs/ore_raid.h
+++ b/fs/exofs/ore_raid.h
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) from 2011
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
  *
  * This file is part of the objects raid engine (ore).
  *
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index ed73ed8ebbee..95965503afcb 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -2,7 +2,7 @@
  * Copyright (C) 2005, 2006
  * Avishay Traeger (avishay@gmail.com)
  * Copyright (C) 2008, 2009
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
  *
  * Copyrights for code taken from ext2:
  *     Copyright (C) 1992, 1993, 1994, 1995
diff --git a/fs/exofs/symlink.c b/fs/exofs/symlink.c
index 4dd687c3e747..832e2624b80b 100644
--- a/fs/exofs/symlink.c
+++ b/fs/exofs/symlink.c
@@ -2,7 +2,7 @@
  * Copyright (C) 2005, 2006
  * Avishay Traeger (avishay@gmail.com)
  * Copyright (C) 2008, 2009
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
  *
  * Copyrights for code taken from ext2:
  *     Copyright (C) 1992, 1993, 1994, 1995
diff --git a/fs/exofs/sys.c b/fs/exofs/sys.c
index 1b4f2f95fc37..5e6a2c0a1f0b 100644
--- a/fs/exofs/sys.c
+++ b/fs/exofs/sys.c
@@ -1,7 +1,7 @@
 /*
  * Copyright (C) 2012
  * Sachin Bhamare <sbhamare@panasas.com>
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
  *
  * This file is part of exofs.
  *
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index ae05278b3761..5cbd8cf9a5a5 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -5,7 +5,7 @@
  *  All rights reserved.
  *
  *  Benny Halevy <bhalevy@panasas.com>
- *  Boaz Harrosh <bharrosh@panasas.com>
+ *  Boaz Harrosh <ooo@electrozaur.com>
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License version 2
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c
index 697a16d11fac..ad988428162e 100644
--- a/fs/nfs/objlayout/objlayout.c
+++ b/fs/nfs/objlayout/objlayout.c
@@ -5,7 +5,7 @@
  *  All rights reserved.
  *
  *  Benny Halevy <bhalevy@panasas.com>
- *  Boaz Harrosh <bharrosh@panasas.com>
+ *  Boaz Harrosh <ooo@electrozaur.com>
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License version 2
diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h
index fd13f1d2f136..3472e8390ba5 100644
--- a/fs/nfs/objlayout/objlayout.h
+++ b/fs/nfs/objlayout/objlayout.h
@@ -6,7 +6,7 @@
  *  All rights reserved.
  *
  *  Benny Halevy <bhalevy@panasas.com>
- *  Boaz Harrosh <bharrosh@panasas.com>
+ *  Boaz Harrosh <ooo@electrozaur.com>
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License version 2
diff --git a/fs/nfs/objlayout/pnfs_osd_xdr_cli.c b/fs/nfs/objlayout/pnfs_osd_xdr_cli.c
index b3918f7ac34d..f093c7ec983b 100644
--- a/fs/nfs/objlayout/pnfs_osd_xdr_cli.c
+++ b/fs/nfs/objlayout/pnfs_osd_xdr_cli.c
@@ -5,7 +5,7 @@
  *  All rights reserved.
  *
  *  Benny Halevy <bhalevy@panasas.com>
- *  Boaz Harrosh <bharrosh@panasas.com>
+ *  Boaz Harrosh <ooo@electrozaur.com>
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License version 2
diff --git a/include/linux/pnfs_osd_xdr.h b/include/linux/pnfs_osd_xdr.h
index fe25876c1a5d..17d7d0d20eca 100644
--- a/include/linux/pnfs_osd_xdr.h
+++ b/include/linux/pnfs_osd_xdr.h
@@ -5,7 +5,7 @@
  *  All rights reserved.
  *
  *  Benny Halevy <bhalevy@panasas.com>
- *  Boaz Harrosh <bharrosh@panasas.com>
+ *  Boaz Harrosh <ooo@electrozaur.com>
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License version 2
diff --git a/include/scsi/osd_initiator.h b/include/scsi/osd_initiator.h
index b2e85fdd2ae0..a09cca829082 100644
--- a/include/scsi/osd_initiator.h
+++ b/include/scsi/osd_initiator.h
@@ -4,7 +4,7 @@
  * Copyright (C) 2008 Panasas Inc.  All rights reserved.
  *
  * Authors:
- *   Boaz Harrosh <bharrosh@panasas.com>
+ *   Boaz Harrosh <ooo@electrozaur.com>
  *   Benny Halevy <bhalevy@panasas.com>
  *
  * This program is free software; you can redistribute it and/or modify
diff --git a/include/scsi/osd_ore.h b/include/scsi/osd_ore.h
index 6ca3265a4dca..7a8d2cd30328 100644
--- a/include/scsi/osd_ore.h
+++ b/include/scsi/osd_ore.h
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) 2011
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
  *
  * Public Declarations of the ORE API
  *
diff --git a/include/scsi/osd_protocol.h b/include/scsi/osd_protocol.h
index a2594afe05c7..e0ca835e7bf7 100644
--- a/include/scsi/osd_protocol.h
+++ b/include/scsi/osd_protocol.h
@@ -4,7 +4,7 @@
  * Copyright (C) 2008 Panasas Inc.  All rights reserved.
  *
  * Authors:
- *   Boaz Harrosh <bharrosh@panasas.com>
+ *   Boaz Harrosh <ooo@electrozaur.com>
  *   Benny Halevy <bhalevy@panasas.com>
  *
  * This program is free software; you can redistribute it and/or modify
@@ -496,7 +496,7 @@ struct osd_timestamp {
  */
 
 struct osd_key_identifier {
-	u8 id[7]; /* if you know why 7 please email bharrosh@panasas.com */
+	u8 id[7]; /* if you know why 7 please email ooo@electrozaur.com */
 } __packed;
 
 /* for osd_capability.format */
diff --git a/include/scsi/osd_sec.h b/include/scsi/osd_sec.h
index f96151c9c9e8..7abeb0f0db30 100644
--- a/include/scsi/osd_sec.h
+++ b/include/scsi/osd_sec.h
@@ -4,7 +4,7 @@
  * Copyright (C) 2008 Panasas Inc.  All rights reserved.
  *
  * Authors:
- *   Boaz Harrosh <bharrosh@panasas.com>
+ *   Boaz Harrosh <ooo@electrozaur.com>
  *   Benny Halevy <bhalevy@panasas.com>
  *
  * This program is free software; you can redistribute it and/or modify
diff --git a/include/scsi/osd_sense.h b/include/scsi/osd_sense.h
index 91db543a5502..d52aa93a0b2d 100644
--- a/include/scsi/osd_sense.h
+++ b/include/scsi/osd_sense.h
@@ -4,7 +4,7 @@
  * Copyright (C) 2008 Panasas Inc.  All rights reserved.
  *
  * Authors:
- *   Boaz Harrosh <bharrosh@panasas.com>
+ *   Boaz Harrosh <ooo@electrozaur.com>
  *   Benny Halevy <bhalevy@panasas.com>
  *
  * This program is free software; you can redistribute it and/or modify
diff --git a/include/scsi/osd_types.h b/include/scsi/osd_types.h
index bd0be7ed4bcf..48e8a165e136 100644
--- a/include/scsi/osd_types.h
+++ b/include/scsi/osd_types.h
@@ -4,7 +4,7 @@
  * Copyright (C) 2008 Panasas Inc.  All rights reserved.
  *
  * Authors:
- *   Boaz Harrosh <bharrosh@panasas.com>
+ *   Boaz Harrosh <ooo@electrozaur.com>
  *   Benny Halevy <bhalevy@panasas.com>
  *
  * This program is free software; you can redistribute it and/or modify
-- 
cgit v1.2.3


From 4846e3784585173f48e267b76f968bcb4a12d3b2 Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Tue, 9 Sep 2014 22:18:31 +0200
Subject: watchdog: simplify definitions of WATCHDOG_NOWAYOUT(_INIT_STATUS)?

Signed-off-by: Uwe Kleine-K=C3=B6nig <u.kleine-koenig@pengutronix.de>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 include/linux/watchdog.h | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h
index 2a3038ee17a3..395b70e0eccf 100644
--- a/include/linux/watchdog.h
+++ b/include/linux/watchdog.h
@@ -97,13 +97,8 @@ struct watchdog_device {
 #define WDOG_UNREGISTERED	4	/* Has the device been unregistered */
 };
 
-#ifdef CONFIG_WATCHDOG_NOWAYOUT
-#define WATCHDOG_NOWAYOUT		1
-#define WATCHDOG_NOWAYOUT_INIT_STATUS	(1 << WDOG_NO_WAY_OUT)
-#else
-#define WATCHDOG_NOWAYOUT		0
-#define WATCHDOG_NOWAYOUT_INIT_STATUS	0
-#endif
+#define WATCHDOG_NOWAYOUT		IS_BUILTIN(CONFIG_WATCHDOG_NOWAYOUT)
+#define WATCHDOG_NOWAYOUT_INIT_STATUS	(WATCHDOG_NOWAYOUT << WDOG_NO_WAY_OUT)
 
 /* Use the following function to check whether or not the watchdog is active */
 static inline bool watchdog_active(struct watchdog_device *wdd)
-- 
cgit v1.2.3


From 51315cdfa0521fff3059cec5fb8ffecc7f37cba7 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Sun, 19 Oct 2014 11:30:27 +0200
Subject: cpufreq: allow driver-specific data

This commit extends the cpufreq_driver structure with an additional
'void *driver_data' field that can be filled by the ->probe() function
of a cpufreq driver to pass additional custom information to the
driver itself.

A new function called cpufreq_get_driver_data() is added to allow a
cpufreq driver to retrieve those driver data, since they are typically
needed from a cpufreq_policy->init() callback, which does not have
access to the cpufreq_driver structure. This function call is similar
to the existing cpufreq_get_current_driver() function call.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq.c | 15 +++++++++++++++
 include/linux/cpufreq.h   |  2 ++
 2 files changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 24bf76fba141..058d6e084a6d 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1731,6 +1731,21 @@ const char *cpufreq_get_current_driver(void)
 }
 EXPORT_SYMBOL_GPL(cpufreq_get_current_driver);
 
+/**
+ *	cpufreq_get_driver_data - return current driver data
+ *
+ *	Return the private data of the currently loaded cpufreq
+ *	driver, or NULL if no cpufreq driver is loaded.
+ */
+void *cpufreq_get_driver_data(void)
+{
+	if (cpufreq_driver)
+		return cpufreq_driver->driver_data;
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(cpufreq_get_driver_data);
+
 /*********************************************************************
  *                     NOTIFIER LISTS INTERFACE                      *
  *********************************************************************/
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 138336b6bb04..503b085b7832 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -219,6 +219,7 @@ __ATTR(_name, 0644, show_##_name, store_##_name)
 struct cpufreq_driver {
 	char			name[CPUFREQ_NAME_LEN];
 	u8			flags;
+	void			*driver_data;
 
 	/* needed by all drivers */
 	int	(*init)		(struct cpufreq_policy *policy);
@@ -312,6 +313,7 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data);
 int cpufreq_unregister_driver(struct cpufreq_driver *driver_data);
 
 const char *cpufreq_get_current_driver(void);
+void *cpufreq_get_driver_data(void);
 
 static inline void cpufreq_verify_within_limits(struct cpufreq_policy *policy,
 		unsigned int min, unsigned int max)
-- 
cgit v1.2.3


From 34e5a5273d6aa0ee8836bd5d6111b135ffae6931 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Sun, 19 Oct 2014 11:30:28 +0200
Subject: cpufreq: cpufreq-dt: extend with platform_data

This commit extends the cpufreq-dt driver to take a platform_data
structure. This structure is for now used to tell the cpufreq-dt
driver the layout of the clocks on the platform, i.e whether all CPUs
share the same clock or whether each CPU has a separate clock.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq-dt.c | 17 +++++++++++++++--
 include/linux/cpufreq-dt.h   | 22 ++++++++++++++++++++++
 2 files changed, 37 insertions(+), 2 deletions(-)
 create mode 100644 include/linux/cpufreq-dt.h

(limited to 'include/linux')

diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c
index 6bbb8b913446..52facdaf531e 100644
--- a/drivers/cpufreq/cpufreq-dt.c
+++ b/drivers/cpufreq/cpufreq-dt.c
@@ -18,6 +18,7 @@
 #include <linux/cpu.h>
 #include <linux/cpu_cooling.h>
 #include <linux/cpufreq.h>
+#include <linux/cpufreq-dt.h>
 #include <linux/cpumask.h>
 #include <linux/err.h>
 #include <linux/module.h>
@@ -178,6 +179,7 @@ try_again:
 
 static int cpufreq_init(struct cpufreq_policy *policy)
 {
+	struct cpufreq_dt_platform_data *pd;
 	struct cpufreq_frequency_table *freq_table;
 	struct thermal_cooling_device *cdev;
 	struct device_node *np;
@@ -265,9 +267,18 @@ static int cpufreq_init(struct cpufreq_policy *policy)
 	policy->driver_data = priv;
 
 	policy->clk = cpu_clk;
-	ret = cpufreq_generic_init(policy, freq_table, transition_latency);
-	if (ret)
+	ret = cpufreq_table_validate_and_show(policy, freq_table);
+	if (ret) {
+		dev_err(cpu_dev, "%s: invalid frequency table: %d\n", __func__,
+			ret);
 		goto out_cooling_unregister;
+	}
+
+	policy->cpuinfo.transition_latency = transition_latency;
+
+	pd = cpufreq_get_driver_data();
+	if (pd && !pd->independent_clocks)
+		cpumask_setall(policy->cpus);
 
 	of_node_put(np);
 
@@ -335,6 +346,8 @@ static int dt_cpufreq_probe(struct platform_device *pdev)
 	if (!IS_ERR(cpu_reg))
 		regulator_put(cpu_reg);
 
+	dt_cpufreq_driver.driver_data = dev_get_platdata(&pdev->dev);
+
 	ret = cpufreq_register_driver(&dt_cpufreq_driver);
 	if (ret)
 		dev_err(cpu_dev, "failed register driver: %d\n", ret);
diff --git a/include/linux/cpufreq-dt.h b/include/linux/cpufreq-dt.h
new file mode 100644
index 000000000000..0414009e2c30
--- /dev/null
+++ b/include/linux/cpufreq-dt.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) 2014 Marvell
+ * Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __CPUFREQ_DT_H__
+#define __CPUFREQ_DT_H__
+
+struct cpufreq_dt_platform_data {
+	/*
+	 * True when each CPU has its own clock to control its
+	 * frequency, false when all CPUs are controlled by a single
+	 * clock.
+	 */
+	bool independent_clocks;
+};
+
+#endif /* __CPUFREQ_DT_H__ */
-- 
cgit v1.2.3


From a5b7616c55e188fe3d6ef686bef402d4703ecb62 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Tue, 30 Sep 2014 03:14:55 +0100
Subject: mtd: m25p80,spi-nor: Fix module aliases for m25p80

m25p80's device ID table is now spi_nor_ids, defined in spi-nor.  The
MODULE_DEVICE_TABLE() macro doesn't work with extern definitions, but
its use was also removed at the same time.  Now if m25p80 is built as
a module it doesn't get the necessary aliases to be loaded
automatically.

A clean solution to this will involve defining the list of device
IDs in spi-nor.h and removing struct spi_device_id from the spi-nor
API, but this is quite a large change.

As a quick fix suitable for stable, copy the device IDs back into
m25p80.

Fixes: 03e296f613af ("mtd: m25p80: use the SPI nor framework")
Cc: <stable@vger.kernel.org> # 3.16.x: 32f1b7c8352f: mtd: move support for struct flash_platform_data into m25p80
Cc: <stable@vger.kernel.org> # 3.16.x: 90e55b3812a1: mtd: m25p80: get rid of spi_get_device_id
Cc: <stable@vger.kernel.org> # 3.16.x: 70f3ce0510af: mtd: spi-nor: make spi_nor_scan() take a chip type name, not spi_device_id
Cc: <stable@vger.kernel.org> # 3.16.x
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/devices/m25p80.c  | 52 ++++++++++++++++++++++++++++++++++++++++++-
 drivers/mtd/spi-nor/spi-nor.c |  3 +--
 include/linux/mtd/spi-nor.h   |  1 -
 3 files changed, 52 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c
index bd5e4c6edfd4..ed827cf894e4 100644
--- a/drivers/mtd/devices/m25p80.c
+++ b/drivers/mtd/devices/m25p80.c
@@ -261,12 +261,62 @@ static int m25p_remove(struct spi_device *spi)
 }
 
 
+/*
+ * XXX This needs to be kept in sync with spi_nor_ids.  We can't share
+ * it with spi-nor, because if this is built as a module then modpost
+ * won't be able to read it and add appropriate aliases.
+ */
+static const struct spi_device_id m25p_ids[] = {
+	{"at25fs010"},	{"at25fs040"},	{"at25df041a"},	{"at25df321a"},
+	{"at25df641"},	{"at26f004"},	{"at26df081a"},	{"at26df161a"},
+	{"at26df321"},	{"at45db081d"},
+	{"en25f32"},	{"en25p32"},	{"en25q32b"},	{"en25p64"},
+	{"en25q64"},	{"en25qh128"},	{"en25qh256"},
+	{"f25l32pa"},
+	{"mr25h256"},	{"mr25h10"},
+	{"gd25q32"},	{"gd25q64"},
+	{"160s33b"},	{"320s33b"},	{"640s33b"},
+	{"mx25l2005a"},	{"mx25l4005a"},	{"mx25l8005"},	{"mx25l1606e"},
+	{"mx25l3205d"},	{"mx25l3255e"},	{"mx25l6405d"},	{"mx25l12805d"},
+	{"mx25l12855e"},{"mx25l25635e"},{"mx25l25655e"},{"mx66l51235l"},
+	{"mx66l1g55g"},
+	{"n25q064"},	{"n25q128a11"},	{"n25q128a13"},	{"n25q256a"},
+	{"n25q512a"},	{"n25q512ax3"},	{"n25q00"},
+	{"pm25lv512"},	{"pm25lv010"},	{"pm25lq032"},
+	{"s25sl032p"},	{"s25sl064p"},	{"s25fl256s0"},	{"s25fl256s1"},
+	{"s25fl512s"},	{"s70fl01gs"},	{"s25sl12800"},	{"s25sl12801"},
+	{"s25fl129p0"},	{"s25fl129p1"},	{"s25sl004a"},	{"s25sl008a"},
+	{"s25sl016a"},	{"s25sl032a"},	{"s25sl064a"},	{"s25fl008k"},
+	{"s25fl016k"},	{"s25fl064k"},
+	{"sst25vf040b"},{"sst25vf080b"},{"sst25vf016b"},{"sst25vf032b"},
+	{"sst25vf064c"},{"sst25wf512"},	{"sst25wf010"},	{"sst25wf020"},
+	{"sst25wf040"},
+	{"m25p05"},	{"m25p10"},	{"m25p20"},	{"m25p40"},
+	{"m25p80"},	{"m25p16"},	{"m25p32"},	{"m25p64"},
+	{"m25p128"},	{"n25q032"},
+	{"m25p05-nonjedec"},	{"m25p10-nonjedec"},	{"m25p20-nonjedec"},
+	{"m25p40-nonjedec"},	{"m25p80-nonjedec"},	{"m25p16-nonjedec"},
+	{"m25p32-nonjedec"},	{"m25p64-nonjedec"},	{"m25p128-nonjedec"},
+	{"m45pe10"},	{"m45pe80"},	{"m45pe16"},
+	{"m25pe20"},	{"m25pe80"},	{"m25pe16"},
+	{"m25px16"},	{"m25px32"},	{"m25px32-s0"},	{"m25px32-s1"},
+	{"m25px64"},
+	{"w25x10"},	{"w25x20"},	{"w25x40"},	{"w25x80"},
+	{"w25x16"},	{"w25x32"},	{"w25q32"},	{"w25q32dw"},
+	{"w25x64"},	{"w25q64"},	{"w25q128"},	{"w25q80"},
+	{"w25q80bl"},	{"w25q128"},	{"w25q256"},	{"cat25c11"},
+	{"cat25c03"},	{"cat25c09"},	{"cat25c17"},	{"cat25128"},
+	{ },
+};
+MODULE_DEVICE_TABLE(spi, m25p_ids);
+
+
 static struct spi_driver m25p80_driver = {
 	.driver = {
 		.name	= "m25p80",
 		.owner	= THIS_MODULE,
 	},
-	.id_table	= spi_nor_ids,
+	.id_table	= m25p_ids,
 	.probe	= m25p_probe,
 	.remove	= m25p_remove,
 
diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index 5c8e39977bc5..c51ee52386a7 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -475,7 +475,7 @@ struct flash_info {
  * more nor chips.  This current list focusses on newer chips, which
  * have been converging on command sets which including JEDEC ID.
  */
-const struct spi_device_id spi_nor_ids[] = {
+static const struct spi_device_id spi_nor_ids[] = {
 	/* Atmel -- some are (confusingly) marketed as "DataFlash" */
 	{ "at25fs010",  INFO(0x1f6601, 0, 32 * 1024,   4, SECT_4K) },
 	{ "at25fs040",  INFO(0x1f6604, 0, 64 * 1024,   8, SECT_4K) },
@@ -639,7 +639,6 @@ const struct spi_device_id spi_nor_ids[] = {
 	{ "cat25128", CAT25_INFO(2048, 8, 64, 2, SPI_NOR_NO_ERASE | SPI_NOR_NO_FR) },
 	{ },
 };
-EXPORT_SYMBOL_GPL(spi_nor_ids);
 
 static const struct spi_device_id *spi_nor_read_id(struct spi_nor *nor)
 {
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index a5a7a086748d..046a0a2e4c4e 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -199,6 +199,5 @@ struct spi_nor {
  * Return: 0 for success, others for failure.
  */
 int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode);
-extern const struct spi_device_id spi_nor_ids[];
 
 #endif
-- 
cgit v1.2.3


From 5695be142e203167e3cb515ef86a88424f3524eb Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.cz>
Date: Mon, 20 Oct 2014 18:12:32 +0200
Subject: OOM, PM: OOM killed task shouldn't escape PM suspend

PM freezer relies on having all tasks frozen by the time devices are
getting frozen so that no task will touch them while they are getting
frozen. But OOM killer is allowed to kill an already frozen task in
order to handle OOM situtation. In order to protect from late wake ups
OOM killer is disabled after all tasks are frozen. This, however, still
keeps a window open when a killed task didn't manage to die by the time
freeze_processes finishes.

Reduce the race window by checking all tasks after OOM killer has been
disabled. This is still not race free completely unfortunately because
oom_killer_disable cannot stop an already ongoing OOM killer so a task
might still wake up from the fridge and get killed without
freeze_processes noticing. Full synchronization of OOM and freezer is,
however, too heavy weight for this highly unlikely case.

Introduce and check oom_kills counter which gets incremented early when
the allocator enters __alloc_pages_may_oom path and only check all the
tasks if the counter changes during the freezing attempt. The counter
is updated so early to reduce the race window since allocator checked
oom_killer_disabled which is set by PM-freezing code. A false positive
will push the PM-freezer into a slow path but that is not a big deal.

Changes since v1
- push the re-check loop out of freeze_processes into
  check_frozen_processes and invert the condition to make the code more
  readable as per Rafael

Fixes: f660daac474c6f (oom: thaw threads if oom killed thread is frozen before deferring)
Cc: 3.2+ <stable@vger.kernel.org> # 3.2+
Signed-off-by: Michal Hocko <mhocko@suse.cz>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/oom.h    |  3 +++
 kernel/power/process.c | 40 +++++++++++++++++++++++++++++++++++++++-
 mm/oom_kill.c          | 17 +++++++++++++++++
 mm/page_alloc.c        |  8 ++++++++
 4 files changed, 67 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/oom.h b/include/linux/oom.h
index 647395a1a550..e8d6e1058723 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -50,6 +50,9 @@ static inline bool oom_task_origin(const struct task_struct *p)
 extern unsigned long oom_badness(struct task_struct *p,
 		struct mem_cgroup *memcg, const nodemask_t *nodemask,
 		unsigned long totalpages);
+
+extern int oom_kills_count(void);
+extern void note_oom_kill(void);
 extern void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
 			     unsigned int points, unsigned long totalpages,
 			     struct mem_cgroup *memcg, nodemask_t *nodemask,
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 7b323221b9ee..5cc588c1abab 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -108,6 +108,28 @@ static int try_to_freeze_tasks(bool user_only)
 	return todo ? -EBUSY : 0;
 }
 
+/*
+ * Returns true if all freezable tasks (except for current) are frozen already
+ */
+static bool check_frozen_processes(void)
+{
+	struct task_struct *g, *p;
+	bool ret = true;
+
+	read_lock(&tasklist_lock);
+	for_each_process_thread(g, p) {
+		if (p != current && !freezer_should_skip(p) &&
+		    !frozen(p)) {
+			ret = false;
+			goto done;
+		}
+	}
+done:
+	read_unlock(&tasklist_lock);
+
+	return ret;
+}
+
 /**
  * freeze_processes - Signal user space processes to enter the refrigerator.
  * The current thread will not be frozen.  The same process that calls
@@ -118,6 +140,7 @@ static int try_to_freeze_tasks(bool user_only)
 int freeze_processes(void)
 {
 	int error;
+	int oom_kills_saved;
 
 	error = __usermodehelper_disable(UMH_FREEZING);
 	if (error)
@@ -132,12 +155,27 @@ int freeze_processes(void)
 	pm_wakeup_clear();
 	printk("Freezing user space processes ... ");
 	pm_freezing = true;
+	oom_kills_saved = oom_kills_count();
 	error = try_to_freeze_tasks(true);
 	if (!error) {
-		printk("done.");
 		__usermodehelper_set_disable_depth(UMH_DISABLED);
 		oom_killer_disable();
+
+		/*
+		 * There might have been an OOM kill while we were
+		 * freezing tasks and the killed task might be still
+		 * on the way out so we have to double check for race.
+		 */
+		if (oom_kills_count() != oom_kills_saved &&
+				!check_frozen_processes()) {
+			__usermodehelper_set_disable_depth(UMH_ENABLED);
+			printk("OOM in progress.");
+			error = -EBUSY;
+			goto done;
+		}
+		printk("done.");
 	}
+done:
 	printk("\n");
 	BUG_ON(in_atomic());
 
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index bbf405a3a18f..5340f6b91312 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -404,6 +404,23 @@ static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order,
 		dump_tasks(memcg, nodemask);
 }
 
+/*
+ * Number of OOM killer invocations (including memcg OOM killer).
+ * Primarily used by PM freezer to check for potential races with
+ * OOM killed frozen task.
+ */
+static atomic_t oom_kills = ATOMIC_INIT(0);
+
+int oom_kills_count(void)
+{
+	return atomic_read(&oom_kills);
+}
+
+void note_oom_kill(void)
+{
+	atomic_inc(&oom_kills);
+}
+
 #define K(x) ((x) << (PAGE_SHIFT-10))
 /*
  * Must be called while holding a reference to p, which will be released upon
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 736d8e1b6381..9cd36b822444 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2251,6 +2251,14 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
 		return NULL;
 	}
 
+	/*
+	 * PM-freezer should be notified that there might be an OOM killer on
+	 * its way to kill and wake somebody up. This is too early and we might
+	 * end up not killing anything but false positives are acceptable.
+	 * See freeze_processes.
+	 */
+	note_oom_kill();
+
 	/*
 	 * Go through the zonelist yet one more time, keep very high watermark
 	 * here, this is only to catch a parallel oom killing, we must fail if
-- 
cgit v1.2.3


From 9e8beeb79ded25c5c1986f80fb8a7f6815345d5a Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Mon, 13 Oct 2014 18:58:48 -0600
Subject: audit: Remove "weak" from audit_classify_compat_syscall() declaration

There's only one audit_classify_compat_syscall() definition, so it doesn't
need to be weak.

Remove the "weak" attribute from the audit_classify_compat_syscall()
declaration.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Richard Guy Briggs <rgb@redhat.com>
CC: AKASHI Takahiro <takahiro.akashi@linaro.org>
---
 include/linux/audit.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 36dffeccebdb..e58fe7df8b9c 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -90,7 +90,7 @@ extern unsigned compat_dir_class[];
 extern unsigned compat_chattr_class[];
 extern unsigned compat_signal_class[];
 
-extern int __weak audit_classify_compat_syscall(int abi, unsigned syscall);
+extern int audit_classify_compat_syscall(int abi, unsigned syscall);
 
 /* audit_names->type values */
 #define	AUDIT_TYPE_UNKNOWN	0	/* we don't know yet */
-- 
cgit v1.2.3


From 96a2adbc6f501996418da9f7afe39bf0e4d006a9 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Mon, 13 Oct 2014 18:59:09 -0600
Subject: clocksource: Remove "weak" from clocksource_default_clock()
 declaration

kernel/time/jiffies.c provides a default clocksource_default_clock()
definition explicitly marked "weak".  arch/s390 provides its own definition
intended to override the default, but the "weak" attribute on the
declaration applied to the s390 definition as well, so the linker chose one
based on link order (see 10629d711ed7 ("PCI: Remove __weak annotation from
pcibios_get_phb_of_node decl")).

Remove the "weak" attribute from the clocksource_default_clock()
declaration so we always prefer a non-weak definition over the weak one,
independent of link order.

Fixes: f1b82746c1e9 ("clocksource: Cleanup clocksource selection")
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: John Stultz <john.stultz@linaro.org>
Acked-by: Ingo Molnar <mingo@kernel.org>
CC: Daniel Lezcano <daniel.lezcano@linaro.org>
CC: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 include/linux/clocksource.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 653f0e2b6ca9..abcafaa20b86 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -287,7 +287,7 @@ extern struct clocksource* clocksource_get_next(void);
 extern void clocksource_change_rating(struct clocksource *cs, int rating);
 extern void clocksource_suspend(void);
 extern void clocksource_resume(void);
-extern struct clocksource * __init __weak clocksource_default_clock(void);
+extern struct clocksource * __init clocksource_default_clock(void);
 extern void clocksource_mark_unstable(struct clocksource *cs);
 
 extern u64
-- 
cgit v1.2.3


From 5ab03ac5aaa1f032e071f1b3dc433b7839359c03 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Mon, 13 Oct 2014 18:59:41 -0600
Subject: vmcore: Remove "weak" from function declarations

For the following functions:

  elfcorehdr_alloc()
  elfcorehdr_free()
  elfcorehdr_read()
  elfcorehdr_read_notes()
  remap_oldmem_pfn_range()

fs/proc/vmcore.c provides default definitions explicitly marked "weak".
arch/s390 provides its own definitions intended to override the default
ones, but the "weak" attribute on the declarations applied to the s390
definitions as well, so the linker chose one based on link order (see
10629d711ed7 ("PCI: Remove __weak annotation from pcibios_get_phb_of_node
decl")).

Remove the "weak" attribute from the declarations so we always prefer a
non-weak definition over the weak one, independent of link order.

Fixes: be8a8d069e50 ("vmcore: introduce ELF header in new memory feature")
Fixes: 9cb218131de1 ("vmcore: introduce remap_oldmem_pfn_range()")
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Vivek Goyal <vgoyal@redhat.com>
CC: Michael Holzheu <holzheu@linux.vnet.ibm.com>
---
 include/linux/crash_dump.h | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h
index 72ab536ad3de..3849fce7ecfe 100644
--- a/include/linux/crash_dump.h
+++ b/include/linux/crash_dump.h
@@ -14,14 +14,13 @@
 extern unsigned long long elfcorehdr_addr;
 extern unsigned long long elfcorehdr_size;
 
-extern int __weak elfcorehdr_alloc(unsigned long long *addr,
-				   unsigned long long *size);
-extern void __weak elfcorehdr_free(unsigned long long addr);
-extern ssize_t __weak elfcorehdr_read(char *buf, size_t count, u64 *ppos);
-extern ssize_t __weak elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos);
-extern int __weak remap_oldmem_pfn_range(struct vm_area_struct *vma,
-					 unsigned long from, unsigned long pfn,
-					 unsigned long size, pgprot_t prot);
+extern int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size);
+extern void elfcorehdr_free(unsigned long long addr);
+extern ssize_t elfcorehdr_read(char *buf, size_t count, u64 *ppos);
+extern ssize_t elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos);
+extern int remap_oldmem_pfn_range(struct vm_area_struct *vma,
+				  unsigned long from, unsigned long pfn,
+				  unsigned long size, pgprot_t prot);
 
 extern ssize_t copy_oldmem_page(unsigned long, char *, size_t,
 						unsigned long, int);
-- 
cgit v1.2.3


From 107bcc6d566cb40184068d888637f9aefe6252dd Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Mon, 13 Oct 2014 19:00:25 -0600
Subject: kgdb: Remove "weak" from kgdb_arch_pc() declaration

kernel/debug/debug_core.c provides a default kgdb_arch_pc() definition
explicitly marked "weak".  Several architectures provide their own
definitions intended to override the default, but the "weak" attribute on
the declaration applied to the arch definitions as well, so the linker
chose one based on link order (see 10629d711ed7 ("PCI: Remove __weak
annotation from pcibios_get_phb_of_node decl")).

Remove the "weak" attribute from the declaration so we always prefer a
non-weak definition over the weak one, independent of link order.

Fixes: 688b744d8bc8 ("kgdb: fix signedness mixmatches, add statics, add declaration to header")
Tested-by: Vineet Gupta <vgupta@synopsys.com>	# for ARC build
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Harvey Harrison <harvey.harrison@gmail.com>
---
 include/linux/kgdb.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h
index 6b06d378f3df..e465bb15912d 100644
--- a/include/linux/kgdb.h
+++ b/include/linux/kgdb.h
@@ -283,7 +283,7 @@ struct kgdb_io {
 
 extern struct kgdb_arch		arch_kgdb_ops;
 
-extern unsigned long __weak kgdb_arch_pc(int exception, struct pt_regs *regs);
+extern unsigned long kgdb_arch_pc(int exception, struct pt_regs *regs);
 
 #ifdef CONFIG_SERIAL_KGDB_NMI
 extern int kgdb_register_nmi_console(void);
-- 
cgit v1.2.3


From e0a8400c6923a163265d52798cdd4c33f3f8ab5a Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Mon, 13 Oct 2014 19:00:47 -0600
Subject: memory-hotplug: Remove "weak" from memory_block_size_bytes()
 declaration

drivers/base/memory.c provides a default memory_block_size_bytes()
definition explicitly marked "weak".  Several architectures provide their
own definitions intended to override the default, but the "weak" attribute
on the declaration applied to the arch definitions as well, so the linker
chose one based on link order (see 10629d711ed7 ("PCI: Remove __weak
annotation from pcibios_get_phb_of_node decl")).

Remove the "weak" attribute from the declaration so we always prefer a
non-weak definition over the weak one, independent of link order.

Fixes: 41f107266b19 ("drivers: base: Add prototype declaration to the header file")
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
CC: Rashika Kheria <rashika.kheria@gmail.com>
CC: Nathan Fontenot <nfont@austin.ibm.com>
CC: Anton Blanchard <anton@au1.ibm.com>
CC: Heiko Carstens <heiko.carstens@de.ibm.com>
CC: Yinghai Lu <yinghai@kernel.org>
---
 include/linux/memory.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/memory.h b/include/linux/memory.h
index bb7384e3c3d8..8b8d8d12348e 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -35,7 +35,7 @@ struct memory_block {
 };
 
 int arch_get_memory_phys_device(unsigned long start_pfn);
-unsigned long __weak memory_block_size_bytes(void);
+unsigned long memory_block_size_bytes(void);
 
 /* These states are exposed to userspace as text strings in sysfs */
 #define	MEM_ONLINE		(1<<0) /* exposed to userspace */
-- 
cgit v1.2.3


From 271a9c35158910496f6fc3a635c2ed85df6be3d9 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Mon, 13 Oct 2014 19:01:03 -0600
Subject: uprobes: Remove "weak" from function declarations

For the following interfaces:

  set_swbp()
  set_orig_insn()
  is_swbp_insn()
  is_trap_insn()
  uprobe_get_swbp_addr()
  arch_uprobe_ignore()
  arch_uprobe_copy_ixol()

kernel/events/uprobes.c provides default definitions explicitly marked
"weak".  Some architectures provide their own definitions intended to
override the defaults, but the "weak" attribute on the declarations applied
to the arch definitions as well, so the linker chose one based on link
order (see 10629d711ed7 ("PCI: Remove __weak annotation from
pcibios_get_phb_of_node decl")).

Remove the "weak" attribute from the declarations so we always prefer a
non-weak definition over the weak one, independent of link order.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
CC: Victor Kamensky <victor.kamensky@linaro.org>
CC: Oleg Nesterov <oleg@redhat.com>
CC: David A. Long <dave.long@linaro.org>
CC: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
---
 include/linux/uprobes.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 4f844c6b03ee..60beb5dc7977 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -98,11 +98,11 @@ struct uprobes_state {
 	struct xol_area		*xol_area;
 };
 
-extern int __weak set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr);
-extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr);
-extern bool __weak is_swbp_insn(uprobe_opcode_t *insn);
-extern bool __weak is_trap_insn(uprobe_opcode_t *insn);
-extern unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs);
+extern int set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr);
+extern int set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr);
+extern bool is_swbp_insn(uprobe_opcode_t *insn);
+extern bool is_trap_insn(uprobe_opcode_t *insn);
+extern unsigned long uprobe_get_swbp_addr(struct pt_regs *regs);
 extern unsigned long uprobe_get_trap_addr(struct pt_regs *regs);
 extern int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t);
 extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
@@ -128,8 +128,8 @@ extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk);
 extern int  arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data);
 extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs);
 extern unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs);
-extern bool __weak arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *regs);
-extern void __weak arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
+extern bool arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *regs);
+extern void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
 					 void *src, unsigned long len);
 #else /* !CONFIG_UPROBES */
 struct uprobes_state {
-- 
cgit v1.2.3


From 4aa7c6346be395bdf776f82bbb2e3e2bc60bdd2b Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Fri, 24 Oct 2014 00:14:35 +0200
Subject: vfs: add i_op->dentry_open()

Add a new inode operation i_op->dentry_open().  This is for stacked filesystems
that want to return a struct file from a different filesystem.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 Documentation/filesystems/Locking |  2 ++
 Documentation/filesystems/vfs.txt |  7 +++++++
 fs/namei.c                        |  9 ++++++---
 fs/open.c                         | 23 +++++++++++++++++++++--
 include/linux/fs.h                |  4 ++++
 5 files changed, 40 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 94d93b1f8b53..b30753cbf431 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -67,6 +67,7 @@ prototypes:
 				struct file *, unsigned open_flag,
 				umode_t create_mode, int *opened);
 	int (*tmpfile) (struct inode *, struct dentry *, umode_t);
+	int (*dentry_open)(struct dentry *, struct file *, const struct cred *);
 
 locking rules:
 	all may block
@@ -96,6 +97,7 @@ fiemap:		no
 update_time:	no
 atomic_open:	yes
 tmpfile:	no
+dentry_open:	no
 
 	Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on
 victim.
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index fceff7c00a3c..20bf204426ca 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -364,6 +364,7 @@ struct inode_operations {
 	int (*atomic_open)(struct inode *, struct dentry *, struct file *,
 			unsigned open_flag, umode_t create_mode, int *opened);
 	int (*tmpfile) (struct inode *, struct dentry *, umode_t);
+	int (*dentry_open)(struct dentry *, struct file *, const struct cred *);
 };
 
 Again, all methods are called without any locks being held, unless
@@ -696,6 +697,12 @@ struct address_space_operations {
   	but instead uses bmap to find out where the blocks in the file
   	are and uses those addresses directly.
 
+  dentry_open: *WARNING: probably going away soon, do not use!* This is an
+	alternative to f_op->open(), the difference is that this method may open
+	a file not necessarily originating from the same filesystem as the one
+	i_op->open() was called on.  It may be useful for stacking filesystems
+	which want to allow native I/O directly on underlying files.
+
 
   invalidatepage: If a page has PagePrivate set, then invalidatepage
         will be called when part or all of the page is to be removed
diff --git a/fs/namei.c b/fs/namei.c
index 43927d14db67..75306b3c9526 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3064,9 +3064,12 @@ finish_open_created:
 	error = may_open(&nd->path, acc_mode, open_flag);
 	if (error)
 		goto out;
-	file->f_path.mnt = nd->path.mnt;
-	error = finish_open(file, nd->path.dentry, NULL, opened);
-	if (error) {
+
+	BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */
+	error = vfs_open(&nd->path, file, current_cred());
+	if (!error) {
+		*opened |= FILE_OPENED;
+	} else {
 		if (error == -EOPENSTALE)
 			goto stale_open;
 		goto out;
diff --git a/fs/open.c b/fs/open.c
index d6fd3acde134..de92c13b58be 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -823,8 +823,7 @@ struct file *dentry_open(const struct path *path, int flags,
 	f = get_empty_filp();
 	if (!IS_ERR(f)) {
 		f->f_flags = flags;
-		f->f_path = *path;
-		error = do_dentry_open(f, NULL, cred);
+		error = vfs_open(path, f, cred);
 		if (!error) {
 			/* from now on we need fput() to dispose of f */
 			error = open_check_o_direct(f);
@@ -841,6 +840,26 @@ struct file *dentry_open(const struct path *path, int flags,
 }
 EXPORT_SYMBOL(dentry_open);
 
+/**
+ * vfs_open - open the file at the given path
+ * @path: path to open
+ * @filp: newly allocated file with f_flag initialized
+ * @cred: credentials to use
+ */
+int vfs_open(const struct path *path, struct file *filp,
+	     const struct cred *cred)
+{
+	struct inode *inode = path->dentry->d_inode;
+
+	if (inode->i_op->dentry_open)
+		return inode->i_op->dentry_open(path->dentry, filp, cred);
+	else {
+		filp->f_path = *path;
+		return do_dentry_open(filp, NULL, cred);
+	}
+}
+EXPORT_SYMBOL(vfs_open);
+
 static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op)
 {
 	int lookup_flags = 0;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index a957d4366c24..5cf7f6759679 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1528,6 +1528,9 @@ struct inode_operations {
 			   umode_t create_mode, int *opened);
 	int (*tmpfile) (struct inode *, struct dentry *, umode_t);
 	int (*set_acl)(struct inode *, struct posix_acl *, int);
+
+	/* WARNING: probably going away soon, do not use! */
+	int (*dentry_open)(struct dentry *, struct file *, const struct cred *);
 } ____cacheline_aligned;
 
 ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
@@ -2040,6 +2043,7 @@ extern struct file *file_open_name(struct filename *, int, umode_t);
 extern struct file *filp_open(const char *, int, umode_t);
 extern struct file *file_open_root(struct dentry *, struct vfsmount *,
 				   const char *, int);
+extern int vfs_open(const struct path *, struct file *, const struct cred *);
 extern struct file * dentry_open(const struct path *, int, const struct cred *);
 extern int filp_close(struct file *, fl_owner_t id);
 
-- 
cgit v1.2.3


From 1c118596a7682912106c80007102ce0184c77780 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Fri, 24 Oct 2014 00:14:35 +0200
Subject: vfs: export do_splice_direct() to modules

Export do_splice_direct() to modules.  Needed by overlay filesystem.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/internal.h      | 6 ------
 fs/splice.c        | 1 +
 include/linux/fs.h | 3 +++
 3 files changed, 4 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/internal.h b/fs/internal.h
index 9477f8f6aefc..0f0626a6997c 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -138,12 +138,6 @@ extern long prune_dcache_sb(struct super_block *sb, unsigned long nr_to_scan,
  */
 extern int rw_verify_area(int, struct file *, const loff_t *, size_t);
 
-/*
- * splice.c
- */
-extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
-		loff_t *opos, size_t len, unsigned int flags);
-
 /*
  * pipe.c
  */
diff --git a/fs/splice.c b/fs/splice.c
index f5cb9ba84510..75c6058eabf2 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1330,6 +1330,7 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
 
 	return ret;
 }
+EXPORT_SYMBOL(do_splice_direct);
 
 static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
 			       struct pipe_inode_info *opipe,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5cf7f6759679..10ed65b2c31d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2456,6 +2456,9 @@ extern ssize_t iter_file_splice_write(struct pipe_inode_info *,
 		struct file *, loff_t *, size_t, unsigned int);
 extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe,
 		struct file *out, loff_t *, size_t len, unsigned int flags);
+extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
+		loff_t *opos, size_t len, unsigned int flags);
+
 
 extern void
 file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
-- 
cgit v1.2.3


From bd5d08569cc379f8366663a61558a9ce17c2e460 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Fri, 24 Oct 2014 00:14:35 +0200
Subject: vfs: export __inode_permission() to modules

We need to be able to check inode permissions (but not filesystem implied
permissions) for stackable filesystems.  Expose this interface for overlayfs.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/internal.h      | 1 -
 fs/namei.c         | 1 +
 include/linux/fs.h | 1 +
 3 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/internal.h b/fs/internal.h
index 0f0626a6997c..757ba2abf21e 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -47,7 +47,6 @@ extern void __init chrdev_init(void);
 /*
  * namei.c
  */
-extern int __inode_permission(struct inode *, int);
 extern int user_path_mountpoint_at(int, const char __user *, unsigned int, struct path *);
 extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
 			   const char *, unsigned int, struct path *);
diff --git a/fs/namei.c b/fs/namei.c
index 75306b3c9526..d944f6db9b07 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -416,6 +416,7 @@ int __inode_permission(struct inode *inode, int mask)
 
 	return security_inode_permission(inode, mask);
 }
+EXPORT_SYMBOL(__inode_permission);
 
 /**
  * sb_permission - Check superblock-level permissions
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 10ed65b2c31d..5419df70a835 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2257,6 +2257,7 @@ extern sector_t bmap(struct inode *, sector_t);
 #endif
 extern int notify_change(struct dentry *, struct iattr *, struct inode **);
 extern int inode_permission(struct inode *, int);
+extern int __inode_permission(struct inode *, int);
 extern int generic_permission(struct inode *, int);
 
 static inline bool execute_ok(struct inode *inode)
-- 
cgit v1.2.3


From c771d683a62e5d36bc46036f5c07f4f5bb7dda61 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Fri, 24 Oct 2014 00:14:36 +0200
Subject: vfs: introduce clone_private_mount()

Overlayfs needs a private clone of the mount, so create a function for
this and export to modules.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/namespace.c        | 27 +++++++++++++++++++++++++++
 include/linux/mount.h |  3 +++
 2 files changed, 30 insertions(+)

(limited to 'include/linux')

diff --git a/fs/namespace.c b/fs/namespace.c
index fbba8b17330d..5b66b2b3624d 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1686,6 +1686,33 @@ void drop_collected_mounts(struct vfsmount *mnt)
 	namespace_unlock();
 }
 
+/**
+ * clone_private_mount - create a private clone of a path
+ *
+ * This creates a new vfsmount, which will be the clone of @path.  The new will
+ * not be attached anywhere in the namespace and will be private (i.e. changes
+ * to the originating mount won't be propagated into this).
+ *
+ * Release with mntput().
+ */
+struct vfsmount *clone_private_mount(struct path *path)
+{
+	struct mount *old_mnt = real_mount(path->mnt);
+	struct mount *new_mnt;
+
+	if (IS_MNT_UNBINDABLE(old_mnt))
+		return ERR_PTR(-EINVAL);
+
+	down_read(&namespace_sem);
+	new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE);
+	up_read(&namespace_sem);
+	if (IS_ERR(new_mnt))
+		return ERR_CAST(new_mnt);
+
+	return &new_mnt->mnt;
+}
+EXPORT_SYMBOL_GPL(clone_private_mount);
+
 int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
 		   struct vfsmount *root)
 {
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 9262e4bf0cc3..c2c561dc0114 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -81,6 +81,9 @@ extern struct vfsmount *mntget(struct vfsmount *mnt);
 extern struct vfsmount *mnt_clone_internal(struct path *path);
 extern int __mnt_is_readonly(struct vfsmount *mnt);
 
+struct path;
+extern struct vfsmount *clone_private_mount(struct path *path);
+
 struct file_system_type;
 extern struct vfsmount *vfs_kern_mount(struct file_system_type *type,
 				      int flags, const char *name,
-- 
cgit v1.2.3


From cbdf35bcb833bfd00f0925d7a9a33a21f41ea582 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Fri, 24 Oct 2014 00:14:36 +0200
Subject: vfs: export check_sticky()

It's already duplicated in btrfs and about to be used in overlayfs too.

Move the sticky bit check to an inline helper and call the out-of-line
helper only in the unlikly case of the sticky bit being set.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/btrfs/ioctl.c   | 20 +-------------------
 fs/namei.c         |  9 ++-------
 include/linux/fs.h |  9 +++++++++
 3 files changed, 12 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 8d2b76e29d3b..4399f0c3a4ce 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -765,23 +765,6 @@ out:
 	return ret;
 }
 
-/*  copy of check_sticky in fs/namei.c()
-* It's inline, so penalty for filesystems that don't use sticky bit is
-* minimal.
-*/
-static inline int btrfs_check_sticky(struct inode *dir, struct inode *inode)
-{
-	kuid_t fsuid = current_fsuid();
-
-	if (!(dir->i_mode & S_ISVTX))
-		return 0;
-	if (uid_eq(inode->i_uid, fsuid))
-		return 0;
-	if (uid_eq(dir->i_uid, fsuid))
-		return 0;
-	return !capable(CAP_FOWNER);
-}
-
 /*  copy of may_delete in fs/namei.c()
  *	Check whether we can remove a link victim from directory dir, check
  *  whether the type of victim is right.
@@ -817,8 +800,7 @@ static int btrfs_may_delete(struct inode *dir, struct dentry *victim, int isdir)
 		return error;
 	if (IS_APPEND(dir))
 		return -EPERM;
-	if (btrfs_check_sticky(dir, victim->d_inode)||
-		IS_APPEND(victim->d_inode)||
+	if (check_sticky(dir, victim->d_inode) || IS_APPEND(victim->d_inode) ||
 	    IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode))
 		return -EPERM;
 	if (isdir) {
diff --git a/fs/namei.c b/fs/namei.c
index d944f6db9b07..77fd536106cb 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2384,22 +2384,17 @@ kern_path_mountpoint(int dfd, const char *name, struct path *path,
 }
 EXPORT_SYMBOL(kern_path_mountpoint);
 
-/*
- * It's inline, so penalty for filesystems that don't use sticky bit is
- * minimal.
- */
-static inline int check_sticky(struct inode *dir, struct inode *inode)
+int __check_sticky(struct inode *dir, struct inode *inode)
 {
 	kuid_t fsuid = current_fsuid();
 
-	if (!(dir->i_mode & S_ISVTX))
-		return 0;
 	if (uid_eq(inode->i_uid, fsuid))
 		return 0;
 	if (uid_eq(dir->i_uid, fsuid))
 		return 0;
 	return !capable_wrt_inode_uidgid(inode, CAP_FOWNER);
 }
+EXPORT_SYMBOL(__check_sticky);
 
 /*
  *	Check whether we can remove a link victim from directory dir, check
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5419df70a835..55cc0a319baa 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2259,6 +2259,7 @@ extern int notify_change(struct dentry *, struct iattr *, struct inode **);
 extern int inode_permission(struct inode *, int);
 extern int __inode_permission(struct inode *, int);
 extern int generic_permission(struct inode *, int);
+extern int __check_sticky(struct inode *dir, struct inode *inode);
 
 static inline bool execute_ok(struct inode *inode)
 {
@@ -2745,6 +2746,14 @@ static inline int is_sxid(umode_t mode)
 	return (mode & S_ISUID) || ((mode & S_ISGID) && (mode & S_IXGRP));
 }
 
+static inline int check_sticky(struct inode *dir, struct inode *inode)
+{
+	if (!(dir->i_mode & S_ISVTX))
+		return 0;
+
+	return __check_sticky(dir, inode);
+}
+
 static inline void inode_has_no_xattr(struct inode *inode)
 {
 	if (!is_sxid(inode->i_mode) && (inode->i_sb->s_flags & MS_NOSEC))
-- 
cgit v1.2.3


From 787fb6bc9682ec7c05fb5d9561b57100fbc1cc41 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Fri, 24 Oct 2014 00:14:36 +0200
Subject: vfs: add whiteout support

Whiteout isn't actually a new file type, but is represented as a char
device (Linus's idea) with 0/0 device number.

This has several advantages compared to introducing a new whiteout file
type:

 - no userspace API changes (e.g. trivial to make backups of upper layer
   filesystem, without losing whiteouts)

 - no fs image format changes (you can boot an old kernel/fsck without
   whiteout support and things won't break)

 - implementation is trivial

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/namei.c         | 14 ++++++++++++++
 include/linux/fs.h | 11 +++++++++++
 2 files changed, 25 insertions(+)

(limited to 'include/linux')

diff --git a/fs/namei.c b/fs/namei.c
index 77fd536106cb..d20191c0ebf5 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -4346,6 +4346,20 @@ SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newna
 	return sys_renameat2(AT_FDCWD, oldname, AT_FDCWD, newname, 0);
 }
 
+int vfs_whiteout(struct inode *dir, struct dentry *dentry)
+{
+	int error = may_create(dir, dentry);
+	if (error)
+		return error;
+
+	if (!dir->i_op->mknod)
+		return -EPERM;
+
+	return dir->i_op->mknod(dir, dentry,
+				S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV);
+}
+EXPORT_SYMBOL(vfs_whiteout);
+
 int readlink_copy(char __user *buffer, int buflen, const char *link)
 {
 	int len = PTR_ERR(link);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 55cc0a319baa..69118b3cb917 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -222,6 +222,13 @@ typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 #define ATTR_OPEN	(1 << 15) /* Truncating from open(O_TRUNC) */
 #define ATTR_TIMES_SET	(1 << 16)
 
+/*
+ * Whiteout is represented by a char device.  The following constants define the
+ * mode and device number to use.
+ */
+#define WHITEOUT_MODE 0
+#define WHITEOUT_DEV 0
+
 /*
  * This is the Inode Attributes structure, used for notify_change().  It
  * uses the above definitions as flags, to know which values have changed.
@@ -1398,6 +1405,7 @@ extern int vfs_link(struct dentry *, struct inode *, struct dentry *, struct ino
 extern int vfs_rmdir(struct inode *, struct dentry *);
 extern int vfs_unlink(struct inode *, struct dentry *, struct inode **);
 extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int);
+extern int vfs_whiteout(struct inode *, struct dentry *);
 
 /*
  * VFS dentry helper functions.
@@ -1628,6 +1636,9 @@ struct super_operations {
 #define IS_AUTOMOUNT(inode)	((inode)->i_flags & S_AUTOMOUNT)
 #define IS_NOSEC(inode)		((inode)->i_flags & S_NOSEC)
 
+#define IS_WHITEOUT(inode)	(S_ISCHR(inode->i_mode) && \
+				 (inode)->i_rdev == WHITEOUT_DEV)
+
 /*
  * Inode state bits.  Protected by inode->i_lock
  *
-- 
cgit v1.2.3


From 69c433ed2ecd2d3264efd7afec4439524b319121 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Fri, 24 Oct 2014 00:14:39 +0200
Subject: fs: limit filesystem stacking depth

Add a simple read-only counter to super_block that indicates how deep this
is in the stack of filesystems.  Previously ecryptfs was the only stackable
filesystem and it explicitly disallowed multiple layers of itself.

Overlayfs, however, can be stacked recursively and also may be stacked
on top of ecryptfs or vice versa.

To limit the kernel stack usage we must limit the depth of the
filesystem stack.  Initially the limit is set to 2.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/ecryptfs/main.c   |  7 +++++++
 fs/overlayfs/super.c |  9 +++++++++
 include/linux/fs.h   | 11 +++++++++++
 3 files changed, 27 insertions(+)

(limited to 'include/linux')

diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 1b119d3bf924..c4cd1fd86cc2 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -566,6 +566,13 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
 	s->s_maxbytes = path.dentry->d_sb->s_maxbytes;
 	s->s_blocksize = path.dentry->d_sb->s_blocksize;
 	s->s_magic = ECRYPTFS_SUPER_MAGIC;
+	s->s_stack_depth = path.dentry->d_sb->s_stack_depth + 1;
+
+	rc = -EINVAL;
+	if (s->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
+		pr_err("eCryptfs: maximum fs stacking depth exceeded\n");
+		goto out_free;
+	}
 
 	inode = ecryptfs_get_inode(path.dentry->d_inode, s);
 	rc = PTR_ERR(inode);
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 7dcc24e84417..08b704cebfc4 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -677,6 +677,15 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 	}
 	ufs->lower_namelen = statfs.f_namelen;
 
+	sb->s_stack_depth = max(upperpath.mnt->mnt_sb->s_stack_depth,
+				lowerpath.mnt->mnt_sb->s_stack_depth) + 1;
+
+	err = -EINVAL;
+	if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
+		pr_err("overlayfs: maximum fs stacking depth exceeded\n");
+		goto out_put_workpath;
+	}
+
 	ufs->upper_mnt = clone_private_mount(&upperpath);
 	err = PTR_ERR(ufs->upper_mnt);
 	if (IS_ERR(ufs->upper_mnt)) {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 69118b3cb917..4e41a4a331bb 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -261,6 +261,12 @@ struct iattr {
  */
 #include <linux/quota.h>
 
+/*
+ * Maximum number of layers of fs stack.  Needs to be limited to
+ * prevent kernel stack overflow
+ */
+#define FILESYSTEM_MAX_STACK_DEPTH 2
+
 /** 
  * enum positive_aop_returns - aop return codes with specific semantics
  *
@@ -1273,6 +1279,11 @@ struct super_block {
 	struct list_lru		s_dentry_lru ____cacheline_aligned_in_smp;
 	struct list_lru		s_inode_lru ____cacheline_aligned_in_smp;
 	struct rcu_head		rcu;
+
+	/*
+	 * Indicates how deep in a filesystem stack this SB is
+	 */
+	int s_stack_depth;
 };
 
 extern struct timespec current_fs_time(struct super_block *sb);
-- 
cgit v1.2.3


From 571ee1b6859869a09ed718d390aac2b9414646a2 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpeng.li@linux.intel.com>
Date: Thu, 9 Oct 2014 18:30:08 +0800
Subject: kvm: vfio: fix unregister kvm_device_ops of vfio

After commit 80ce163 (KVM: VFIO: register kvm_device_ops dynamically),
kvm_device_ops of vfio can be registered dynamically. Commit 3c3c29fd
(kvm-vfio: do not use module_init) move the dynamic register invoked by
kvm_init in order to fix broke unloading of the kvm module. However,
kvm_device_ops of vfio is unregistered after rmmod kvm-intel module
which lead to device type collision detection warning after kvm-intel
module reinsmod.

    WARNING: CPU: 1 PID: 10358 at /root/cathy/kvm/arch/x86/kvm/../../../virt/kvm/kvm_main.c:3289 kvm_init+0x234/0x282 [kvm]()
    Modules linked in: kvm_intel(O+) kvm(O) nfsv3 nfs_acl auth_rpcgss oid_registry nfsv4 dns_resolver nfs fscache lockd sunrpc pci_stub bridge stp llc autofs4 8021q cpufreq_ondemand ipv6 joydev microcode pcspkr igb i2c_algo_bit ehci_pci ehci_hcd e1000e i2c_i801 ixgbe ptp pps_core hwmon mdio tpm_tis tpm ipmi_si ipmi_msghandler acpi_cpufreq isci libsas scsi_transport_sas button dm_mirror dm_region_hash dm_log dm_mod [last unloaded: kvm_intel]
    CPU: 1 PID: 10358 Comm: insmod Tainted: G        W  O   3.17.0-rc1 #2
    Hardware name: Intel Corporation S2600CP/S2600CP, BIOS RMLSDP.86I.00.29.D696.1311111329 11/11/2013
     0000000000000cd9 ffff880ff08cfd18 ffffffff814a61d9 0000000000000cd9
     0000000000000000 ffff880ff08cfd58 ffffffff810417b7 ffff880ff08cfd48
     ffffffffa045bcac ffffffffa049c420 0000000000000040 00000000000000ff
    Call Trace:
     [<ffffffff814a61d9>] dump_stack+0x49/0x60
     [<ffffffff810417b7>] warn_slowpath_common+0x7c/0x96
     [<ffffffffa045bcac>] ? kvm_init+0x234/0x282 [kvm]
     [<ffffffff810417e6>] warn_slowpath_null+0x15/0x17
     [<ffffffffa045bcac>] kvm_init+0x234/0x282 [kvm]
     [<ffffffffa016e995>] vmx_init+0x1bf/0x42a [kvm_intel]
     [<ffffffffa016e7d6>] ? vmx_check_processor_compat+0x64/0x64 [kvm_intel]
     [<ffffffff810002ab>] do_one_initcall+0xe3/0x170
     [<ffffffff811168a9>] ? __vunmap+0xad/0xb8
     [<ffffffff8109c58f>] do_init_module+0x2b/0x174
     [<ffffffff8109d414>] load_module+0x43e/0x569
     [<ffffffff8109c6d8>] ? do_init_module+0x174/0x174
     [<ffffffff8109c75a>] ? copy_module_from_user+0x39/0x82
     [<ffffffff8109b7dd>] ? module_sect_show+0x20/0x20
     [<ffffffff8109d65f>] SyS_init_module+0x54/0x81
     [<ffffffff814a9a12>] system_call_fastpath+0x16/0x1b
    ---[ end trace 0626f4a3ddea56f3 ]---

The bug can be reproduced by:

    rmmod kvm_intel.ko
    insmod kvm_intel.ko

without rmmod/insmod kvm.ko
This patch fixes the bug by unregistering kvm_device_ops of vfio when the
kvm-intel module is removed.

Reported-by: Liu Rongrong <rongrongx.liu@intel.com>
Fixes: 3c3c29fd0d7cddc32862c350d0700ce69953e3bd
Signed-off-by: Wanpeng Li <wanpeng.li@linux.intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 1 +
 virt/kvm/kvm_main.c      | 7 +++++++
 virt/kvm/vfio.c          | 5 +++++
 virt/kvm/vfio.h          | 4 ++++
 4 files changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 28be31f49250..ea53b04993f2 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1080,6 +1080,7 @@ void kvm_device_get(struct kvm_device *dev);
 void kvm_device_put(struct kvm_device *dev);
 struct kvm_device *kvm_device_from_filp(struct file *filp);
 int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type);
+void kvm_unregister_device_ops(u32 type);
 
 extern struct kvm_device_ops kvm_mpic_ops;
 extern struct kvm_device_ops kvm_xics_ops;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 384eaa7b02fa..25ffac9e947d 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2354,6 +2354,12 @@ int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type)
 	return 0;
 }
 
+void kvm_unregister_device_ops(u32 type)
+{
+	if (kvm_device_ops_table[type] != NULL)
+		kvm_device_ops_table[type] = NULL;
+}
+
 static int kvm_ioctl_create_device(struct kvm *kvm,
 				   struct kvm_create_device *cd)
 {
@@ -3328,5 +3334,6 @@ void kvm_exit(void)
 	kvm_arch_exit();
 	kvm_irqfd_exit();
 	free_cpumask_var(cpus_hardware_enabled);
+	kvm_vfio_ops_exit();
 }
 EXPORT_SYMBOL_GPL(kvm_exit);
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
index 281e7cf2b8e5..620e37f741b8 100644
--- a/virt/kvm/vfio.c
+++ b/virt/kvm/vfio.c
@@ -283,3 +283,8 @@ int kvm_vfio_ops_init(void)
 {
 	return kvm_register_device_ops(&kvm_vfio_ops, KVM_DEV_TYPE_VFIO);
 }
+
+void kvm_vfio_ops_exit(void)
+{
+	kvm_unregister_device_ops(KVM_DEV_TYPE_VFIO);
+}
diff --git a/virt/kvm/vfio.h b/virt/kvm/vfio.h
index 92eac75d6b62..ab88c7dc0514 100644
--- a/virt/kvm/vfio.h
+++ b/virt/kvm/vfio.h
@@ -3,11 +3,15 @@
 
 #ifdef CONFIG_KVM_VFIO
 int kvm_vfio_ops_init(void);
+void kvm_vfio_ops_exit(void);
 #else
 static inline int kvm_vfio_ops_init(void)
 {
 	return 0;
 }
+static inline void kvm_vfio_ops_exit(void)
+{
+}
 #endif
 
 #endif
-- 
cgit v1.2.3


From dda02fd6278d9e995850b3c1dba484f17cbe4de4 Mon Sep 17 00:00:00 2001
From: Weijie Yang <weijie.yang@samsung.com>
Date: Fri, 24 Oct 2014 17:47:57 +0800
Subject: mm, cma: make parameters order consistent in func declaration and
 definition

In the current code, the base and size parameters order is not consistent
in functions declaration and definition. If someone calls these functions
according to the declaration parameters order in cma.h, he will run into
some bug and it's hard to find the reason.

This patch makes the parameters order consistent in functions declaration
and definition.

Signed-off-by: Weijie Yang <weijie.yang@samsung.com>
Acked-by: Michal Nazarewicz <mina86@mina86.com>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
---
 include/linux/cma.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cma.h b/include/linux/cma.h
index 0430ed05d3b9..a93438beb33c 100644
--- a/include/linux/cma.h
+++ b/include/linux/cma.h
@@ -18,12 +18,12 @@ struct cma;
 extern phys_addr_t cma_get_base(struct cma *cma);
 extern unsigned long cma_get_size(struct cma *cma);
 
-extern int __init cma_declare_contiguous(phys_addr_t size,
-			phys_addr_t base, phys_addr_t limit,
+extern int __init cma_declare_contiguous(phys_addr_t base,
+			phys_addr_t size, phys_addr_t limit,
 			phys_addr_t alignment, unsigned int order_per_bit,
 			bool fixed, struct cma **res_cma);
-extern int cma_init_reserved_mem(phys_addr_t size,
-					phys_addr_t base, int order_per_bit,
+extern int cma_init_reserved_mem(phys_addr_t base,
+					phys_addr_t size, int order_per_bit,
 					struct cma **res_cma);
 extern struct page *cma_alloc(struct cma *cma, int count, unsigned int align);
 extern bool cma_release(struct cma *cma, struct page *pages, int count);
-- 
cgit v1.2.3


From a69d82b9bdf1e53e94423048e8bda8c5f5a3dd4e Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Date: Tue, 7 Oct 2014 17:47:36 +0200
Subject: power_supply: Add no_thermal property to prevent recursive get_temp
 calls

Add a 'no_thermal' property to the power supply class. If true then
thermal zone won't be created for this power supply in
power_supply_register().

Power supply drivers may want to set it if they support
POWER_SUPPLY_PROP_TEMP and they are forwarding this get property call to
other thermal zone.

If they won't set it lockdep may report false positive deadlock for
thermal zone's mutex because of nested calls to thermal_zone_get_temp().
First is the call to thermal_zone_get_temp() of the driver's thermal
zone. Thermal core gets POWER_SUPPLY_PROP_TEMP property from this
driver. The driver then calls other thermal zone thermal_zone_get_temp()
and returns result.

Example of such driver is charger manager.

Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: Sebastian Reichel <sre@kernel.org>
---
 drivers/power/power_supply_core.c | 3 +++
 include/linux/power_supply.h      | 6 ++++++
 2 files changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/power/power_supply_core.c b/drivers/power/power_supply_core.c
index 6cb7fe5c022d..694e8cddd5c1 100644
--- a/drivers/power/power_supply_core.c
+++ b/drivers/power/power_supply_core.c
@@ -417,6 +417,9 @@ static int psy_register_thermal(struct power_supply *psy)
 {
 	int i;
 
+	if (psy->no_thermal)
+		return 0;
+
 	/* Register battery zone device psy reports temperature */
 	for (i = 0; i < psy->num_properties; i++) {
 		if (psy->properties[i] == POWER_SUPPLY_PROP_TEMP) {
diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index 3ed049673022..096dbced02ac 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -200,6 +200,12 @@ struct power_supply {
 	void (*external_power_changed)(struct power_supply *psy);
 	void (*set_charged)(struct power_supply *psy);
 
+	/*
+	 * Set if thermal zone should not be created for this power supply.
+	 * For example for virtual supplies forwarding calls to actual
+	 * sensors or other supplies.
+	 */
+	bool no_thermal;
 	/* For APM emulation, think legacy userspace. */
 	int use_for_apm;
 
-- 
cgit v1.2.3


From bdbe81445407644492b9ac69a24d35e3202d773b Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Date: Mon, 13 Oct 2014 15:34:30 +0200
Subject: power: charger-manager: Fix accessing invalidated power supply after
 fuel gauge unbind

The charger manager obtained reference to fuel gauge power supply in probe
with power_supply_get_by_name() for later usage. However if fuel gauge
driver was removed and re-added then this reference would point to old
power supply (from driver which was removed).

This lead to accessing old (and probably invalid) memory which could be
observed with:
$ echo "12-0036" > /sys/bus/i2c/drivers/max17042/unbind
$ echo "12-0036" > /sys/bus/i2c/drivers/max17042/bind
$ cat /sys/devices/virtual/power_supply/battery/capacity
[  240.480084] INFO: task cat:1393 blocked for more than 120 seconds.
[  240.484799]       Not tainted 3.17.0-next-20141007-00028-ge60b6dd79570 #203
[  240.491782] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[  240.499589] cat             D c0469530     0  1393      1 0x00000000
[  240.505947] [<c0469530>] (__schedule) from [<c0469d3c>] (schedule_preempt_disabled+0x14/0x20)
[  240.514449] [<c0469d3c>] (schedule_preempt_disabled) from [<c046af08>] (mutex_lock_nested+0x1bc/0x458)
[  240.523736] [<c046af08>] (mutex_lock_nested) from [<c0287a98>] (regmap_read+0x30/0x60)
[  240.531647] [<c0287a98>] (regmap_read) from [<c032238c>] (max17042_get_property+0x2e8/0x350)
[  240.540055] [<c032238c>] (max17042_get_property) from [<c03247d8>] (charger_get_property+0x264/0x348)
[  240.549252] [<c03247d8>] (charger_get_property) from [<c0320764>] (power_supply_show_property+0x48/0x1e0)
[  240.558808] [<c0320764>] (power_supply_show_property) from [<c027308c>] (dev_attr_show+0x1c/0x48)
[  240.567664] [<c027308c>] (dev_attr_show) from [<c0141fb0>] (sysfs_kf_seq_show+0x84/0x104)
[  240.575814] [<c0141fb0>] (sysfs_kf_seq_show) from [<c0140b18>] (kernfs_seq_show+0x24/0x28)
[  240.584061] [<c0140b18>] (kernfs_seq_show) from [<c0104574>] (seq_read+0x1b0/0x484)
[  240.591702] [<c0104574>] (seq_read) from [<c00e1e24>] (vfs_read+0x88/0x144)
[  240.598640] [<c00e1e24>] (vfs_read) from [<c00e1f20>] (SyS_read+0x40/0x8c)
[  240.605507] [<c00e1f20>] (SyS_read) from [<c000e760>] (ret_fast_syscall+0x0/0x48)
[  240.612952] 4 locks held by cat/1393:
[  240.616589]  #0:  (&p->lock){+.+.+.}, at: [<c01043f4>] seq_read+0x30/0x484
[  240.623414]  #1:  (&of->mutex){+.+.+.}, at: [<c01417dc>] kernfs_seq_start+0x1c/0x8c
[  240.631086]  #2:  (s_active#31){++++.+}, at: [<c01417e4>] kernfs_seq_start+0x24/0x8c
[  240.638777]  #3:  (&map->mutex){+.+...}, at: [<c0287a98>] regmap_read+0x30/0x60

The charger-manager should get reference to fuel gauge power supply on
each use of get_property callback. The thermal zone 'tzd' field of
power supply should not be used because of the same reason.

Additionally this change solves also the issue with nested
thermal_zone_get_temp() calls and related false lockdep positive for
deadlock for thermal zone's mutex [1]. When fuel gauge is used as source of
temperature then the charger manager forwards its get_temp calls to fuel
gauge thermal zone. So actually different mutexes are used (one for
charger manager thermal zone and second for fuel gauge thermal zone) but
for lockdep this is one class of mutex.

The recursion is removed by retrieving temperature through power
supply's get_property().

In case external thermal zone is used ('cm-thermal-zone' property is
present in DTS) the recursion does not exist. Charger manager simply
exports POWER_SUPPLY_PROP_TEMP_AMBIENT property (instead of
POWER_SUPPLY_PROP_TEMP) thus no thermal zone is created for this power
supply.

[1] https://lkml.org/lkml/2014/10/6/309

Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Cc: <stable@vger.kernel.org>
Fixes: 3bb3dbbd56ea ("power_supply: Add initial Charger-Manager driver")
Signed-off-by: Sebastian Reichel <sre@kernel.org>
---
 drivers/power/charger-manager.c       | 99 +++++++++++++++++++++++++----------
 include/linux/power/charger-manager.h |  1 -
 2 files changed, 71 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/power/charger-manager.c b/drivers/power/charger-manager.c
index 7a1177ea238d..7ae8cb70204a 100644
--- a/drivers/power/charger-manager.c
+++ b/drivers/power/charger-manager.c
@@ -97,6 +97,7 @@ static struct charger_global_desc *g_desc; /* init with setup_charger_manager */
 static bool is_batt_present(struct charger_manager *cm)
 {
 	union power_supply_propval val;
+	struct power_supply *psy;
 	bool present = false;
 	int i, ret;
 
@@ -107,7 +108,11 @@ static bool is_batt_present(struct charger_manager *cm)
 	case CM_NO_BATTERY:
 		break;
 	case CM_FUEL_GAUGE:
-		ret = cm->fuel_gauge->get_property(cm->fuel_gauge,
+		psy = power_supply_get_by_name(cm->desc->psy_fuel_gauge);
+		if (!psy)
+			break;
+
+		ret = psy->get_property(psy,
 				POWER_SUPPLY_PROP_PRESENT, &val);
 		if (ret == 0 && val.intval)
 			present = true;
@@ -167,12 +172,14 @@ static bool is_ext_pwr_online(struct charger_manager *cm)
 static int get_batt_uV(struct charger_manager *cm, int *uV)
 {
 	union power_supply_propval val;
+	struct power_supply *fuel_gauge;
 	int ret;
 
-	if (!cm->fuel_gauge)
+	fuel_gauge = power_supply_get_by_name(cm->desc->psy_fuel_gauge);
+	if (!fuel_gauge)
 		return -ENODEV;
 
-	ret = cm->fuel_gauge->get_property(cm->fuel_gauge,
+	ret = fuel_gauge->get_property(fuel_gauge,
 				POWER_SUPPLY_PROP_VOLTAGE_NOW, &val);
 	if (ret)
 		return ret;
@@ -248,6 +255,7 @@ static bool is_full_charged(struct charger_manager *cm)
 {
 	struct charger_desc *desc = cm->desc;
 	union power_supply_propval val;
+	struct power_supply *fuel_gauge;
 	int ret = 0;
 	int uV;
 
@@ -255,11 +263,15 @@ static bool is_full_charged(struct charger_manager *cm)
 	if (!is_batt_present(cm))
 		return false;
 
-	if (cm->fuel_gauge && desc->fullbatt_full_capacity > 0) {
+	fuel_gauge = power_supply_get_by_name(cm->desc->psy_fuel_gauge);
+	if (!fuel_gauge)
+		return false;
+
+	if (desc->fullbatt_full_capacity > 0) {
 		val.intval = 0;
 
 		/* Not full if capacity of fuel gauge isn't full */
-		ret = cm->fuel_gauge->get_property(cm->fuel_gauge,
+		ret = fuel_gauge->get_property(fuel_gauge,
 				POWER_SUPPLY_PROP_CHARGE_FULL, &val);
 		if (!ret && val.intval > desc->fullbatt_full_capacity)
 			return true;
@@ -273,10 +285,10 @@ static bool is_full_charged(struct charger_manager *cm)
 	}
 
 	/* Full, if the capacity is more than fullbatt_soc */
-	if (cm->fuel_gauge && desc->fullbatt_soc > 0) {
+	if (desc->fullbatt_soc > 0) {
 		val.intval = 0;
 
-		ret = cm->fuel_gauge->get_property(cm->fuel_gauge,
+		ret = fuel_gauge->get_property(fuel_gauge,
 				POWER_SUPPLY_PROP_CAPACITY, &val);
 		if (!ret && val.intval >= desc->fullbatt_soc)
 			return true;
@@ -551,6 +563,20 @@ static int check_charging_duration(struct charger_manager *cm)
 	return ret;
 }
 
+static int cm_get_battery_temperature_by_psy(struct charger_manager *cm,
+					int *temp)
+{
+	struct power_supply *fuel_gauge;
+
+	fuel_gauge = power_supply_get_by_name(cm->desc->psy_fuel_gauge);
+	if (!fuel_gauge)
+		return -ENODEV;
+
+	return fuel_gauge->get_property(fuel_gauge,
+				POWER_SUPPLY_PROP_TEMP,
+				(union power_supply_propval *)temp);
+}
+
 static int cm_get_battery_temperature(struct charger_manager *cm,
 					int *temp)
 {
@@ -560,15 +586,18 @@ static int cm_get_battery_temperature(struct charger_manager *cm,
 		return -ENODEV;
 
 #ifdef CONFIG_THERMAL
-	ret = thermal_zone_get_temp(cm->tzd_batt, (unsigned long *)temp);
-	if (!ret)
-		/* Calibrate temperature unit */
-		*temp /= 100;
-#else
-	ret = cm->fuel_gauge->get_property(cm->fuel_gauge,
-				POWER_SUPPLY_PROP_TEMP,
-				(union power_supply_propval *)temp);
+	if (cm->tzd_batt) {
+		ret = thermal_zone_get_temp(cm->tzd_batt, (unsigned long *)temp);
+		if (!ret)
+			/* Calibrate temperature unit */
+			*temp /= 100;
+	} else
 #endif
+	{
+		/* if-else continued from CONFIG_THERMAL */
+		ret = cm_get_battery_temperature_by_psy(cm, temp);
+	}
+
 	return ret;
 }
 
@@ -827,6 +856,7 @@ static int charger_get_property(struct power_supply *psy,
 	struct charger_manager *cm = container_of(psy,
 			struct charger_manager, charger_psy);
 	struct charger_desc *desc = cm->desc;
+	struct power_supply *fuel_gauge;
 	int ret = 0;
 	int uV;
 
@@ -857,14 +887,20 @@ static int charger_get_property(struct power_supply *psy,
 		ret = get_batt_uV(cm, &val->intval);
 		break;
 	case POWER_SUPPLY_PROP_CURRENT_NOW:
-		ret = cm->fuel_gauge->get_property(cm->fuel_gauge,
+		fuel_gauge = power_supply_get_by_name(cm->desc->psy_fuel_gauge);
+		if (!fuel_gauge) {
+			ret = -ENODEV;
+			break;
+		}
+		ret = fuel_gauge->get_property(fuel_gauge,
 				POWER_SUPPLY_PROP_CURRENT_NOW, val);
 		break;
 	case POWER_SUPPLY_PROP_TEMP:
 	case POWER_SUPPLY_PROP_TEMP_AMBIENT:
 		return cm_get_battery_temperature(cm, &val->intval);
 	case POWER_SUPPLY_PROP_CAPACITY:
-		if (!cm->fuel_gauge) {
+		fuel_gauge = power_supply_get_by_name(cm->desc->psy_fuel_gauge);
+		if (!fuel_gauge) {
 			ret = -ENODEV;
 			break;
 		}
@@ -875,7 +911,7 @@ static int charger_get_property(struct power_supply *psy,
 			break;
 		}
 
-		ret = cm->fuel_gauge->get_property(cm->fuel_gauge,
+		ret = fuel_gauge->get_property(fuel_gauge,
 					POWER_SUPPLY_PROP_CAPACITY, val);
 		if (ret)
 			break;
@@ -924,7 +960,14 @@ static int charger_get_property(struct power_supply *psy,
 		break;
 	case POWER_SUPPLY_PROP_CHARGE_NOW:
 		if (is_charging(cm)) {
-			ret = cm->fuel_gauge->get_property(cm->fuel_gauge,
+			fuel_gauge = power_supply_get_by_name(
+					cm->desc->psy_fuel_gauge);
+			if (!fuel_gauge) {
+				ret = -ENODEV;
+				break;
+			}
+
+			ret = fuel_gauge->get_property(fuel_gauge,
 						POWER_SUPPLY_PROP_CHARGE_NOW,
 						val);
 			if (ret) {
@@ -1486,14 +1529,15 @@ err:
 	return ret;
 }
 
-static int cm_init_thermal_data(struct charger_manager *cm)
+static int cm_init_thermal_data(struct charger_manager *cm,
+		struct power_supply *fuel_gauge)
 {
 	struct charger_desc *desc = cm->desc;
 	union power_supply_propval val;
 	int ret;
 
 	/* Verify whether fuel gauge provides battery temperature */
-	ret = cm->fuel_gauge->get_property(cm->fuel_gauge,
+	ret = fuel_gauge->get_property(fuel_gauge,
 					POWER_SUPPLY_PROP_TEMP, &val);
 
 	if (!ret) {
@@ -1503,8 +1547,6 @@ static int cm_init_thermal_data(struct charger_manager *cm)
 		cm->desc->measure_battery_temp = true;
 	}
 #ifdef CONFIG_THERMAL
-	cm->tzd_batt = cm->fuel_gauge->tzd;
-
 	if (ret && desc->thermal_zone) {
 		cm->tzd_batt =
 			thermal_zone_get_zone_by_name(desc->thermal_zone);
@@ -1667,6 +1709,7 @@ static int charger_manager_probe(struct platform_device *pdev)
 	int ret = 0, i = 0;
 	int j = 0;
 	union power_supply_propval val;
+	struct power_supply *fuel_gauge;
 
 	if (g_desc && !rtc_dev && g_desc->rtc_name) {
 		rtc_dev = rtc_class_open(g_desc->rtc_name);
@@ -1745,8 +1788,8 @@ static int charger_manager_probe(struct platform_device *pdev)
 		}
 	}
 
-	cm->fuel_gauge = power_supply_get_by_name(desc->psy_fuel_gauge);
-	if (!cm->fuel_gauge) {
+	fuel_gauge = power_supply_get_by_name(desc->psy_fuel_gauge);
+	if (!fuel_gauge) {
 		dev_err(&pdev->dev, "Cannot find power supply \"%s\"\n",
 			desc->psy_fuel_gauge);
 		return -ENODEV;
@@ -1789,13 +1832,13 @@ static int charger_manager_probe(struct platform_device *pdev)
 	cm->charger_psy.num_properties = psy_default.num_properties;
 
 	/* Find which optional psy-properties are available */
-	if (!cm->fuel_gauge->get_property(cm->fuel_gauge,
+	if (!fuel_gauge->get_property(fuel_gauge,
 					  POWER_SUPPLY_PROP_CHARGE_NOW, &val)) {
 		cm->charger_psy.properties[cm->charger_psy.num_properties] =
 				POWER_SUPPLY_PROP_CHARGE_NOW;
 		cm->charger_psy.num_properties++;
 	}
-	if (!cm->fuel_gauge->get_property(cm->fuel_gauge,
+	if (!fuel_gauge->get_property(fuel_gauge,
 					  POWER_SUPPLY_PROP_CURRENT_NOW,
 					  &val)) {
 		cm->charger_psy.properties[cm->charger_psy.num_properties] =
@@ -1803,7 +1846,7 @@ static int charger_manager_probe(struct platform_device *pdev)
 		cm->charger_psy.num_properties++;
 	}
 
-	ret = cm_init_thermal_data(cm);
+	ret = cm_init_thermal_data(cm, fuel_gauge);
 	if (ret) {
 		dev_err(&pdev->dev, "Failed to initialize thermal data\n");
 		cm->desc->measure_battery_temp = false;
diff --git a/include/linux/power/charger-manager.h b/include/linux/power/charger-manager.h
index 07e7945a1ff2..5d90d329e0ba 100644
--- a/include/linux/power/charger-manager.h
+++ b/include/linux/power/charger-manager.h
@@ -253,7 +253,6 @@ struct charger_manager {
 	struct device *dev;
 	struct charger_desc *desc;
 
-	struct power_supply *fuel_gauge;
 	struct power_supply **charger_stat;
 
 #ifdef CONFIG_THERMAL
-- 
cgit v1.2.3


From cdaf3e15385d3232b52287e50692506f8fd01a09 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Date: Mon, 13 Oct 2014 15:34:31 +0200
Subject: power: charger-manager: Fix accessing invalidated power supply after
 charger unbind

The charger manager obtained in probe references to power supplies for
all chargers with power_supply_get_by_name() for later usage. However
if such charger driver was removed then this reference would point to
old power supply (from driver which was removed).

This lead to accessing invalid memory which could be observed with:
$ echo "max77693-charger" > /sys/bus/platform/drivers/max77693-charger/unbind
$ grep . /sys/devices/virtual/power_supply/battery/charger.0/*
$ grep . /sys/devices/virtual/power_supply/battery/*
[   15.339817] Unable to handle kernel paging request at virtual address 0001c12c
[   15.346187] pgd = edd08000
[   15.348814] [0001c12c] *pgd=6dce2831, *pte=00000000, *ppte=00000000
[   15.355075] Internal error: Oops: 80000007 [#1] PREEMPT SMP ARM
[   15.360967] Modules linked in:
[   15.364010] CPU: 2 PID: 1388 Comm: grep Not tainted 3.17.0-next-20141007-00027-ga95e761db1b0 #245
[   15.372859] task: ee03ad00 ti: edcf6000 task.ti: edcf6000
[   15.378241] PC is at 0x1c12c
[   15.381113] LR is at is_ext_pwr_online+0x30/0x6c
[   15.385706] pc : [<0001c12c>]    lr : [<c0339fc4>]    psr: a0000013
[   15.385706] sp : edcf7e88  ip : 00000000  fp : 00000000
[   15.397161] r10: eeb02c08  r9 : c04b1f84  r8 : eeb02c00
[   15.402369] r7 : edc69a10  r6 : eea6ac10  r5 : eea6ac10  r4 : 00000004
[   15.408878] r3 : 0001c12c  r2 : edcf7e8c  r1 : 00000004  r0 : ee914418
[   15.415390] Flags: NzCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment user
[   15.422506] Control: 10c5387d  Table: 6dd0804a  DAC: 00000015
[   15.428236] Process grep (pid: 1388, stack limit = 0xedcf6240)
[   15.434050] Stack: (0xedcf7e88 to 0xedcf8000)
[   15.438395] 7e80:                   ee03ad00 00000000 edcf7f80 eea6aca8 edcf7ec4 c033b7b0
[   15.446554] 7ea0: 00000001 ee1cc3f0 00000004 c06e1e44 eebdc000 c06e1e44 eeb02c00 c0337144
[   15.454713] 7ec0: ee2dac68 c005cffc ee1cc3c0 c06e1e44 00000fff 00001000 eebdc000 c0278ca8
[   15.462872] 7ee0: c0278c8c ee1cc3c0 eeb7ce00 c014422c edcf7f20 00008000 ee1cc3c0 ee9a48c0
[   15.471030] 7f00: 00000001 00000001 edcf7f80 c0142d94 c0142d70 c01060f4 00021000 ee1cc3f0
[   15.479190] 7f20: 00000000 00000000 c06a2150 eebdc000 2e7ec000 ee9a48c0 00008000 00021000
[   15.487349] 7f40: edcf7f80 00008000 edcf6000 00021000 00021000 c00e39a4 00000000 ee9a48c0
[   15.495508] 7f60: 00004000 00000000 00000000 ee9a48c0 ee9a48c0 00008000 00021000 c00e3aa0
[   15.503668] 7f80: 00000000 00000000 0001f2e0 0001f2e0 00021000 00001000 00000003 c000f364
[   15.511826] 7fa0: 00000000 c000f1a0 0001f2e0 00021000 00000003 00021000 00008000 00000000
[   15.519986] 7fc0: 0001f2e0 00021000 00001000 00000003 00000001 000205e8 00000000 00021000
[   15.528145] 7fe0: 00008000 bebbe910 0000a7ad b6edc49c 60000010 00000003 aaaaaaaa aaaaaaaa
[   15.536320] [<c0339fc4>] (is_ext_pwr_online) from [<c033b7b0>] (charger_get_property+0x170/0x314)
[   15.545164] [<c033b7b0>] (charger_get_property) from [<c0337144>] (power_supply_show_property+0x48/0x20c)
[   15.554719] [<c0337144>] (power_supply_show_property) from [<c0278ca8>] (dev_attr_show+0x1c/0x48)
[   15.563577] [<c0278ca8>] (dev_attr_show) from [<c014422c>] (sysfs_kf_seq_show+0x84/0x104)
[   15.571725] [<c014422c>] (sysfs_kf_seq_show) from [<c0142d94>] (kernfs_seq_show+0x24/0x28)
[   15.579973] [<c0142d94>] (kernfs_seq_show) from [<c01060f4>] (seq_read+0x1b0/0x484)
[   15.587614] [<c01060f4>] (seq_read) from [<c00e39a4>] (vfs_read+0x88/0x144)
[   15.594552] [<c00e39a4>] (vfs_read) from [<c00e3aa0>] (SyS_read+0x40/0x8c)
[   15.601417] [<c00e3aa0>] (SyS_read) from [<c000f1a0>] (ret_fast_syscall+0x0/0x48)
[   15.608877] Code: bad PC value
[   15.611991] ---[ end trace a88fcc95208db283 ]---

The charger-manager should get reference to charger power supply on
each use of get_property callback.

Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Cc: <stable@vger.kernel.org>
Fixes: 3bb3dbbd56ea ("power_supply: Add initial Charger-Manager driver")
Signed-off-by: Sebastian Reichel <sre@kernel.org>
---
 drivers/power/charger-manager.c       | 64 +++++++++++++++++++++--------------
 include/linux/power/charger-manager.h |  2 --
 2 files changed, 39 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/power/charger-manager.c b/drivers/power/charger-manager.c
index 7ae8cb70204a..ef8094a61f1e 100644
--- a/drivers/power/charger-manager.c
+++ b/drivers/power/charger-manager.c
@@ -118,10 +118,17 @@ static bool is_batt_present(struct charger_manager *cm)
 			present = true;
 		break;
 	case CM_CHARGER_STAT:
-		for (i = 0; cm->charger_stat[i]; i++) {
-			ret = cm->charger_stat[i]->get_property(
-					cm->charger_stat[i],
-					POWER_SUPPLY_PROP_PRESENT, &val);
+		for (i = 0; cm->desc->psy_charger_stat[i]; i++) {
+			psy = power_supply_get_by_name(
+					cm->desc->psy_charger_stat[i]);
+			if (!psy) {
+				dev_err(cm->dev, "Cannot find power supply \"%s\"\n",
+					cm->desc->psy_charger_stat[i]);
+				continue;
+			}
+
+			ret = psy->get_property(psy, POWER_SUPPLY_PROP_PRESENT,
+					&val);
 			if (ret == 0 && val.intval) {
 				present = true;
 				break;
@@ -144,14 +151,20 @@ static bool is_batt_present(struct charger_manager *cm)
 static bool is_ext_pwr_online(struct charger_manager *cm)
 {
 	union power_supply_propval val;
+	struct power_supply *psy;
 	bool online = false;
 	int i, ret;
 
 	/* If at least one of them has one, it's yes. */
-	for (i = 0; cm->charger_stat[i]; i++) {
-		ret = cm->charger_stat[i]->get_property(
-				cm->charger_stat[i],
-				POWER_SUPPLY_PROP_ONLINE, &val);
+	for (i = 0; cm->desc->psy_charger_stat[i]; i++) {
+		psy = power_supply_get_by_name(cm->desc->psy_charger_stat[i]);
+		if (!psy) {
+			dev_err(cm->dev, "Cannot find power supply \"%s\"\n",
+					cm->desc->psy_charger_stat[i]);
+			continue;
+		}
+
+		ret = psy->get_property(psy, POWER_SUPPLY_PROP_ONLINE, &val);
 		if (ret == 0 && val.intval) {
 			online = true;
 			break;
@@ -196,6 +209,7 @@ static bool is_charging(struct charger_manager *cm)
 {
 	int i, ret;
 	bool charging = false;
+	struct power_supply *psy;
 	union power_supply_propval val;
 
 	/* If there is no battery, it cannot be charged */
@@ -203,17 +217,22 @@ static bool is_charging(struct charger_manager *cm)
 		return false;
 
 	/* If at least one of the charger is charging, return yes */
-	for (i = 0; cm->charger_stat[i]; i++) {
+	for (i = 0; cm->desc->psy_charger_stat[i]; i++) {
 		/* 1. The charger sholuld not be DISABLED */
 		if (cm->emergency_stop)
 			continue;
 		if (!cm->charger_enabled)
 			continue;
 
+		psy = power_supply_get_by_name(cm->desc->psy_charger_stat[i]);
+		if (!psy) {
+			dev_err(cm->dev, "Cannot find power supply \"%s\"\n",
+					cm->desc->psy_charger_stat[i]);
+			continue;
+		}
+
 		/* 2. The charger should be online (ext-power) */
-		ret = cm->charger_stat[i]->get_property(
-				cm->charger_stat[i],
-				POWER_SUPPLY_PROP_ONLINE, &val);
+		ret = psy->get_property(psy, POWER_SUPPLY_PROP_ONLINE, &val);
 		if (ret) {
 			dev_warn(cm->dev, "Cannot read ONLINE value from %s\n",
 				 cm->desc->psy_charger_stat[i]);
@@ -226,9 +245,7 @@ static bool is_charging(struct charger_manager *cm)
 		 * 3. The charger should not be FULL, DISCHARGING,
 		 * or NOT_CHARGING.
 		 */
-		ret = cm->charger_stat[i]->get_property(
-				cm->charger_stat[i],
-				POWER_SUPPLY_PROP_STATUS, &val);
+		ret = psy->get_property(psy, POWER_SUPPLY_PROP_STATUS, &val);
 		if (ret) {
 			dev_warn(cm->dev, "Cannot read STATUS value from %s\n",
 				 cm->desc->psy_charger_stat[i]);
@@ -1773,15 +1790,12 @@ static int charger_manager_probe(struct platform_device *pdev)
 	while (desc->psy_charger_stat[i])
 		i++;
 
-	cm->charger_stat = devm_kzalloc(&pdev->dev,
-				sizeof(struct power_supply *) * i, GFP_KERNEL);
-	if (!cm->charger_stat)
-		return -ENOMEM;
-
+	/* Check if charger's supplies are present at probe */
 	for (i = 0; desc->psy_charger_stat[i]; i++) {
-		cm->charger_stat[i] = power_supply_get_by_name(
-					desc->psy_charger_stat[i]);
-		if (!cm->charger_stat[i]) {
+		struct power_supply *psy;
+
+		psy = power_supply_get_by_name(desc->psy_charger_stat[i]);
+		if (!psy) {
 			dev_err(&pdev->dev, "Cannot find power supply \"%s\"\n",
 				desc->psy_charger_stat[i]);
 			return -ENODEV;
@@ -2110,8 +2124,8 @@ static bool find_power_supply(struct charger_manager *cm,
 	int i;
 	bool found = false;
 
-	for (i = 0; cm->charger_stat[i]; i++) {
-		if (psy == cm->charger_stat[i]) {
+	for (i = 0; cm->desc->psy_charger_stat[i]; i++) {
+		if (!strcmp(psy->name, cm->desc->psy_charger_stat[i])) {
 			found = true;
 			break;
 		}
diff --git a/include/linux/power/charger-manager.h b/include/linux/power/charger-manager.h
index 5d90d329e0ba..e97fc656a058 100644
--- a/include/linux/power/charger-manager.h
+++ b/include/linux/power/charger-manager.h
@@ -253,8 +253,6 @@ struct charger_manager {
 	struct device *dev;
 	struct charger_desc *desc;
 
-	struct power_supply **charger_stat;
-
 #ifdef CONFIG_THERMAL
 	struct thermal_zone_device *tzd_batt;
 #endif
-- 
cgit v1.2.3


From e999dbc254044e8d2a5818d92d205f65bae28f37 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 19 Oct 2014 17:13:57 +0200
Subject: Revert "block: all blk-mq requests are tagged"

This reverts commit fb3ccb5da71273e7f0d50b50bc879e50cedd60e7.

SCSI-2/SPI actually needs the tagged/untagged flag in the request to
work properly.  Revert this patch and add a follow on to set it in
the right place.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Acked-by: Jens Axboe <axboe@kernel.dk>
Reported-by: Meelis Roos <mroos@linux.ee>
Tested-by: Meelis Roos <mroos@linux.ee>
Cc: stable@vger.kernel.org
---
 include/linux/blkdev.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 0207a78a8d82..51d0dc2259cf 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1136,8 +1136,7 @@ static inline bool blk_needs_flush_plug(struct task_struct *tsk)
 /*
  * tag stuff
  */
-#define blk_rq_tagged(rq) \
-	((rq)->mq_ctx || ((rq)->cmd_flags & REQ_QUEUED))
+#define blk_rq_tagged(rq)		((rq)->cmd_flags & REQ_QUEUED)
 extern int blk_queue_start_tag(struct request_queue *, struct request *);
 extern struct request *blk_queue_find_tag(struct request_queue *, int);
 extern void blk_queue_end_tag(struct request_queue *, struct request *);
-- 
cgit v1.2.3


From 5631b8fba640a4ab2f8a954f63a603fa34eda96b Mon Sep 17 00:00:00 2001
From: Steven Noonan <steven@uplinklabs.net>
Date: Sat, 25 Oct 2014 15:09:42 -0700
Subject: compiler/gcc4+: Remove inaccurate comment about 'asm goto'
 miscompiles

The bug referenced by the comment in this commit was not
completely fixed in GCC 4.8.2, as I mentioned in a thread back
in February:

   https://lkml.org/lkml/2014/2/12/797

The conclusion at that time was to make the quirk unconditional
until the bug could be found and fixed in GCC. Unfortunately,
when I submitted the patch (commit a9f18034) I left a comment
in that claimed the bug was fixed in GCC 4.8.2+.

This comment is inaccurate, and should be removed.

Signed-off-by: Steven Noonan <steven@uplinklabs.net>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Jakub Jelinek <jakub@redhat.com>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/1414274982-14040-1-git-send-email-steven@uplinklabs.net
Cc: Ingo Molnar <mingo@kernel.org>
---
 include/linux/compiler-gcc4.h | 1 -
 include/linux/compiler-gcc5.h | 1 -
 2 files changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h
index 2507fd2a1eb4..d1a558239b1a 100644
--- a/include/linux/compiler-gcc4.h
+++ b/include/linux/compiler-gcc4.h
@@ -71,7 +71,6 @@
  *   http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670
  *
  * Work it around via a compiler barrier quirk suggested by Jakub Jelinek.
- * Fixed in GCC 4.8.2 and later versions.
  *
  * (asm goto is automatically volatile - the naming reflects this.)
  */
diff --git a/include/linux/compiler-gcc5.h b/include/linux/compiler-gcc5.h
index cdd1cc202d51..c8c565952548 100644
--- a/include/linux/compiler-gcc5.h
+++ b/include/linux/compiler-gcc5.h
@@ -53,7 +53,6 @@
  *   http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670
  *
  * Work it around via a compiler barrier quirk suggested by Jakub Jelinek.
- * Fixed in GCC 4.8.2 and later versions.
  *
  * (asm goto is automatically volatile - the naming reflects this.)
  */
-- 
cgit v1.2.3


From ebcf34f3d4be11f994340aff629f3c17171a4f65 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sun, 26 Oct 2014 19:14:06 -0700
Subject: skbuff.h: fix kernel-doc warning for headers_end

Fix kernel-doc warning in <linux/skbuff.h> by making both headers_start
and headers_end private fields.

Warning(..//include/linux/skbuff.h:654): No description found for parameter 'headers_end[0]'

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index a59d9343c25b..5884f95ff0e9 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -557,7 +557,9 @@ struct sk_buff {
 	/* fields enclosed in headers_start/headers_end are copied
 	 * using a single memcpy() in __copy_skb_header()
 	 */
+	/* private: */
 	__u32			headers_start[0];
+	/* public: */
 
 /* if you move pkt_type around you also must adapt those constants */
 #ifdef __BIG_ENDIAN_BITFIELD
@@ -642,7 +644,9 @@ struct sk_buff {
 	__u16			network_header;
 	__u16			mac_header;
 
+	/* private: */
 	__u32			headers_end[0];
+	/* public: */
 
 	/* These elements must be at the end, see alloc_skb() for details.  */
 	sk_buff_data_t		tail;
-- 
cgit v1.2.3


From 1efed2d06c703489342ab6af2951683e07509c99 Mon Sep 17 00:00:00 2001
From: Olivier Blin <olivier.blin@softathome.com>
Date: Fri, 24 Oct 2014 19:43:00 +0200
Subject: usbnet: add a callback for set_rx_mode

To delegate promiscuous mode and multicast filtering to the subdriver.

Signed-off-by: Olivier Blin <olivier.blin@softathome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/usbnet.c   | 20 ++++++++++++++++++++
 include/linux/usb/usbnet.h |  4 ++++
 2 files changed, 24 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 20615bbd693b..3a6770a65d78 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -1052,6 +1052,21 @@ static void __handle_link_change(struct usbnet *dev)
 	clear_bit(EVENT_LINK_CHANGE, &dev->flags);
 }
 
+static void usbnet_set_rx_mode(struct net_device *net)
+{
+	struct usbnet		*dev = netdev_priv(net);
+
+	usbnet_defer_kevent(dev, EVENT_SET_RX_MODE);
+}
+
+static void __handle_set_rx_mode(struct usbnet *dev)
+{
+	if (dev->driver_info->set_rx_mode)
+		(dev->driver_info->set_rx_mode)(dev);
+
+	clear_bit(EVENT_SET_RX_MODE, &dev->flags);
+}
+
 /* work that cannot be done in interrupt context uses keventd.
  *
  * NOTE:  with 2.5 we could do more of this using completion callbacks,
@@ -1157,6 +1172,10 @@ skip_reset:
 	if (test_bit (EVENT_LINK_CHANGE, &dev->flags))
 		__handle_link_change(dev);
 
+	if (test_bit (EVENT_SET_RX_MODE, &dev->flags))
+		__handle_set_rx_mode(dev);
+
+
 	if (dev->flags)
 		netdev_dbg(dev->net, "kevent done, flags = 0x%lx\n", dev->flags);
 }
@@ -1525,6 +1544,7 @@ static const struct net_device_ops usbnet_netdev_ops = {
 	.ndo_stop		= usbnet_stop,
 	.ndo_start_xmit		= usbnet_start_xmit,
 	.ndo_tx_timeout		= usbnet_tx_timeout,
+	.ndo_set_rx_mode	= usbnet_set_rx_mode,
 	.ndo_change_mtu		= usbnet_change_mtu,
 	.ndo_set_mac_address 	= eth_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index 26088feb6608..d9a4905e01d0 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -78,6 +78,7 @@ struct usbnet {
 #		define EVENT_NO_RUNTIME_PM	9
 #		define EVENT_RX_KILL	10
 #		define EVENT_LINK_CHANGE	11
+#		define EVENT_SET_RX_MODE	12
 };
 
 static inline struct usb_driver *driver_of(struct usb_interface *intf)
@@ -159,6 +160,9 @@ struct driver_info {
 	/* called by minidriver when receiving indication */
 	void	(*indication)(struct usbnet *dev, void *ind, int indlen);
 
+	/* rx mode change (device changes address list filtering) */
+	void	(*set_rx_mode)(struct usbnet *dev);
+
 	/* for new devices, use the descriptor-reading code instead */
 	int		in;		/* rx endpoint */
 	int		out;		/* tx endpoint */
-- 
cgit v1.2.3


From 54ef6df3f3f1353d99c80c437259d317b2cd1cbd Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 27 Oct 2014 21:11:27 -0700
Subject: rcu: Provide counterpart to rcu_dereference() for non-RCU situations

Although rcu_dereference() and friends can be used in situations where
object lifetimes are being managed by something other than RCU, the
resulting sparse and lockdep-RCU noise can be annoying.  This commit
therefore supplies a lockless_dereference(), which provides the
protection for dereferences without the RCU-related debugging noise.

Reported-by: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/rcupdate.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index a4a819ffb2d1..53ff1a752d7e 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -616,6 +616,21 @@ static inline void rcu_preempt_sleep_check(void)
  */
 #define RCU_INITIALIZER(v) (typeof(*(v)) __force __rcu *)(v)
 
+/**
+ * lockless_dereference() - safely load a pointer for later dereference
+ * @p: The pointer to load
+ *
+ * Similar to rcu_dereference(), but for situations where the pointed-to
+ * object's lifetime is managed by something other than RCU.  That
+ * "something other" might be reference counting or simple immortality.
+ */
+#define lockless_dereference(p) \
+({ \
+	typeof(p) _________p1 = ACCESS_ONCE(p); \
+	smp_read_barrier_depends(); /* Dependency order vs. p above. */ \
+	(_________p1); \
+})
+
 /**
  * rcu_assign_pointer() - assign to RCU-protected pointer
  * @p: pointer to assign to
-- 
cgit v1.2.3


From d1b72cc6d8cb766c802fdc70a5edc2f0ba8a2b57 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Mon, 27 Oct 2014 15:42:01 +0100
Subject: overlayfs: fix lockdep misannotation

In an overlay directory that shadows an empty lower directory, say
/mnt/a/empty102, do:

 	touch /mnt/a/empty102/x
 	unlink /mnt/a/empty102/x
 	rmdir /mnt/a/empty102

It's actually harmless, but needs another level of nesting between
I_MUTEX_CHILD and I_MUTEX_NORMAL.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Tested-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c             | 2 +-
 fs/overlayfs/readdir.c | 2 +-
 include/linux/fs.h     | 9 ++++++---
 3 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namei.c b/fs/namei.c
index 42df664e95e5..922f27068c4c 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2497,7 +2497,7 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
 	}
 
 	mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT);
-	mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD);
+	mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_PARENT2);
 	return NULL;
 }
 EXPORT_SYMBOL(lock_rename);
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index 3fbf0d306e12..401f0840f5cc 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -571,7 +571,7 @@ void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list)
 {
 	struct ovl_cache_entry *p;
 
-	mutex_lock_nested(&upper->d_inode->i_mutex, I_MUTEX_PARENT);
+	mutex_lock_nested(&upper->d_inode->i_mutex, I_MUTEX_CHILD);
 	list_for_each_entry(p, list, l_node) {
 		struct dentry *dentry;
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4e41a4a331bb..01036262095f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -639,11 +639,13 @@ static inline int inode_unhashed(struct inode *inode)
  * 2: child/target
  * 3: xattr
  * 4: second non-directory
- * The last is for certain operations (such as rename) which lock two
+ * 5: second parent (when locking independent directories in rename)
+ *
+ * I_MUTEX_NONDIR2 is for certain operations (such as rename) which lock two
  * non-directories at once.
  *
  * The locking order between these classes is
- * parent -> child -> normal -> xattr -> second non-directory
+ * parent[2] -> child -> grandchild -> normal -> xattr -> second non-directory
  */
 enum inode_i_mutex_lock_class
 {
@@ -651,7 +653,8 @@ enum inode_i_mutex_lock_class
 	I_MUTEX_PARENT,
 	I_MUTEX_CHILD,
 	I_MUTEX_XATTR,
-	I_MUTEX_NONDIR2
+	I_MUTEX_NONDIR2,
+	I_MUTEX_PARENT2,
 };
 
 void lock_two_nondirectories(struct inode *, struct inode*);
-- 
cgit v1.2.3


From cb1a5ab6ece7a37da4ac98ee26b0475b7c3ea79e Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Tue, 28 Oct 2014 20:27:43 -0600
Subject: block: Fix merge logic when CONFIG_BLK_DEV_INTEGRITY is not defined

Commit 4eaf99beadce switched to returning bool and as a result reversed
the logic of the integrity merge checks.  However, the empty stubs used
when the block integrity code is compiled out were still returning
0. Make these stubs return "true".

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Reported-by: Michael L. Semon <mlsemon35@gmail.com>
Tested-by: Michael L. Semon <mlsemon35@gmail.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blkdev.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 0207a78a8d82..6cbee8395f60 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1583,13 +1583,13 @@ static inline bool blk_integrity_merge_rq(struct request_queue *rq,
 					  struct request *r1,
 					  struct request *r2)
 {
-	return 0;
+	return true;
 }
 static inline bool blk_integrity_merge_bio(struct request_queue *rq,
 					   struct request *r,
 					   struct bio *b)
 {
-	return 0;
+	return true;
 }
 static inline bool blk_integrity_is_initialized(struct gendisk *g)
 {
-- 
cgit v1.2.3


From 47f29df7db78ee4fcdb104cf36918d987ddd0278 Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Wed, 29 Oct 2014 14:50:29 -0700
Subject: drivers: of: add return value to of_reserved_mem_device_init()

Driver calling of_reserved_mem_device_init() might be interested if the
initialization has been successful or not, so add support for returning
error code.

This fixes a build warining caused by commit 7bfa5ab6fa1b ("drivers:
dma-coherent: add initialization from device tree"), which has been
merged without this change and without fixing function return value.

Fixes: 7bfa5ab6fa1b1 ("drivers: dma-coherent: add initialization from device tree")
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Cc: Michal Nazarewicz <mina86@mina86.com>
Cc: Grant Likely <grant.likely@linaro.org>
Cc: Laura Abbott <lauraa@codeaurora.org>
Cc: Josh Cartwright <joshc@codeaurora.org>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Kyungmin Park <kyungmin.park@samsung.com>
Cc: Russell King <rmk+kernel@arm.linux.org.uk>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/base/dma-contiguous.c   |  3 ++-
 drivers/of/of_reserved_mem.c    | 14 +++++++++-----
 include/linux/of_reserved_mem.h |  9 ++++++---
 3 files changed, 17 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/dma-contiguous.c b/drivers/base/dma-contiguous.c
index 473ff4892401..950fff9ce453 100644
--- a/drivers/base/dma-contiguous.c
+++ b/drivers/base/dma-contiguous.c
@@ -223,9 +223,10 @@ bool dma_release_from_contiguous(struct device *dev, struct page *pages,
 #undef pr_fmt
 #define pr_fmt(fmt) fmt
 
-static void rmem_cma_device_init(struct reserved_mem *rmem, struct device *dev)
+static int rmem_cma_device_init(struct reserved_mem *rmem, struct device *dev)
 {
 	dev_set_cma_area(dev, rmem->priv);
+	return 0;
 }
 
 static void rmem_cma_device_release(struct reserved_mem *rmem,
diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c
index 59fb12e84e6b..dc566b38645f 100644
--- a/drivers/of/of_reserved_mem.c
+++ b/drivers/of/of_reserved_mem.c
@@ -243,23 +243,27 @@ static inline struct reserved_mem *__find_rmem(struct device_node *node)
  * This function assign memory region pointed by "memory-region" device tree
  * property to the given device.
  */
-void of_reserved_mem_device_init(struct device *dev)
+int of_reserved_mem_device_init(struct device *dev)
 {
 	struct reserved_mem *rmem;
 	struct device_node *np;
+	int ret;
 
 	np = of_parse_phandle(dev->of_node, "memory-region", 0);
 	if (!np)
-		return;
+		return -ENODEV;
 
 	rmem = __find_rmem(np);
 	of_node_put(np);
 
 	if (!rmem || !rmem->ops || !rmem->ops->device_init)
-		return;
+		return -EINVAL;
+
+	ret = rmem->ops->device_init(rmem, dev);
+	if (ret == 0)
+		dev_info(dev, "assigned reserved memory node %s\n", rmem->name);
 
-	rmem->ops->device_init(rmem, dev);
-	dev_info(dev, "assigned reserved memory node %s\n", rmem->name);
+	return ret;
 }
 
 /**
diff --git a/include/linux/of_reserved_mem.h b/include/linux/of_reserved_mem.h
index 5b5efae09135..ad2f67054372 100644
--- a/include/linux/of_reserved_mem.h
+++ b/include/linux/of_reserved_mem.h
@@ -16,7 +16,7 @@ struct reserved_mem {
 };
 
 struct reserved_mem_ops {
-	void	(*device_init)(struct reserved_mem *rmem,
+	int	(*device_init)(struct reserved_mem *rmem,
 			       struct device *dev);
 	void	(*device_release)(struct reserved_mem *rmem,
 				  struct device *dev);
@@ -28,14 +28,17 @@ typedef int (*reservedmem_of_init_fn)(struct reserved_mem *rmem);
 	_OF_DECLARE(reservedmem, name, compat, init, reservedmem_of_init_fn)
 
 #ifdef CONFIG_OF_RESERVED_MEM
-void of_reserved_mem_device_init(struct device *dev);
+int of_reserved_mem_device_init(struct device *dev);
 void of_reserved_mem_device_release(struct device *dev);
 
 void fdt_init_reserved_mem(void);
 void fdt_reserved_mem_save_node(unsigned long node, const char *uname,
 			       phys_addr_t base, phys_addr_t size);
 #else
-static inline void of_reserved_mem_device_init(struct device *dev) { }
+static inline int of_reserved_mem_device_init(struct device *dev)
+{
+	return -ENOSYS;
+}
 static inline void of_reserved_mem_device_release(struct device *pdev) { }
 
 static inline void fdt_init_reserved_mem(void) { }
-- 
cgit v1.2.3


From 6d50e60cd2edb5a57154db5a6f64eef5aa59b751 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Wed, 29 Oct 2014 14:50:31 -0700
Subject: mm, thp: fix collapsing of hugepages on madvise

If an anonymous mapping is not allowed to fault thp memory and then
madvise(MADV_HUGEPAGE) is used after fault, khugepaged will never
collapse this memory into thp memory.

This occurs because the madvise(2) handler for thp, hugepage_madvise(),
clears VM_NOHUGEPAGE on the stack and it isn't stored in vma->vm_flags
until the final action of madvise_behavior().  This causes the
khugepaged_enter_vma_merge() to be a no-op in hugepage_madvise() when
the vma had previously had VM_NOHUGEPAGE set.

Fix this by passing the correct vma flags to the khugepaged mm slot
handler.  There's no chance khugepaged can run on this vma until after
madvise_behavior() returns since we hold mm->mmap_sem.

It would be possible to clear VM_NOHUGEPAGE directly from vma->vm_flags
in hugepage_advise(), but I didn't want to introduce special case
behavior into madvise_behavior().  I think it's best to just let it
always set vma->vm_flags itself.

Signed-off-by: David Rientjes <rientjes@google.com>
Reported-by: Suleiman Souhlal <suleiman@google.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/khugepaged.h | 17 ++++++++++-------
 mm/huge_memory.c           | 11 ++++++-----
 mm/mmap.c                  |  8 ++++----
 3 files changed, 20 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h
index 6b394f0b5148..eeb307985715 100644
--- a/include/linux/khugepaged.h
+++ b/include/linux/khugepaged.h
@@ -6,7 +6,8 @@
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 extern int __khugepaged_enter(struct mm_struct *mm);
 extern void __khugepaged_exit(struct mm_struct *mm);
-extern int khugepaged_enter_vma_merge(struct vm_area_struct *vma);
+extern int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
+				      unsigned long vm_flags);
 
 #define khugepaged_enabled()					       \
 	(transparent_hugepage_flags &				       \
@@ -35,13 +36,13 @@ static inline void khugepaged_exit(struct mm_struct *mm)
 		__khugepaged_exit(mm);
 }
 
-static inline int khugepaged_enter(struct vm_area_struct *vma)
+static inline int khugepaged_enter(struct vm_area_struct *vma,
+				   unsigned long vm_flags)
 {
 	if (!test_bit(MMF_VM_HUGEPAGE, &vma->vm_mm->flags))
 		if ((khugepaged_always() ||
-		     (khugepaged_req_madv() &&
-		      vma->vm_flags & VM_HUGEPAGE)) &&
-		    !(vma->vm_flags & VM_NOHUGEPAGE))
+		     (khugepaged_req_madv() && (vm_flags & VM_HUGEPAGE))) &&
+		    !(vm_flags & VM_NOHUGEPAGE))
 			if (__khugepaged_enter(vma->vm_mm))
 				return -ENOMEM;
 	return 0;
@@ -54,11 +55,13 @@ static inline int khugepaged_fork(struct mm_struct *mm, struct mm_struct *oldmm)
 static inline void khugepaged_exit(struct mm_struct *mm)
 {
 }
-static inline int khugepaged_enter(struct vm_area_struct *vma)
+static inline int khugepaged_enter(struct vm_area_struct *vma,
+				   unsigned long vm_flags)
 {
 	return 0;
 }
-static inline int khugepaged_enter_vma_merge(struct vm_area_struct *vma)
+static inline int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
+					     unsigned long vm_flags)
 {
 	return 0;
 }
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 780d12c000e9..de984159cf0b 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -803,7 +803,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		return VM_FAULT_FALLBACK;
 	if (unlikely(anon_vma_prepare(vma)))
 		return VM_FAULT_OOM;
-	if (unlikely(khugepaged_enter(vma)))
+	if (unlikely(khugepaged_enter(vma, vma->vm_flags)))
 		return VM_FAULT_OOM;
 	if (!(flags & FAULT_FLAG_WRITE) &&
 			transparent_hugepage_use_zero_page()) {
@@ -1970,7 +1970,7 @@ int hugepage_madvise(struct vm_area_struct *vma,
 		 * register it here without waiting a page fault that
 		 * may not happen any time soon.
 		 */
-		if (unlikely(khugepaged_enter_vma_merge(vma)))
+		if (unlikely(khugepaged_enter_vma_merge(vma, *vm_flags)))
 			return -ENOMEM;
 		break;
 	case MADV_NOHUGEPAGE:
@@ -2071,7 +2071,8 @@ int __khugepaged_enter(struct mm_struct *mm)
 	return 0;
 }
 
-int khugepaged_enter_vma_merge(struct vm_area_struct *vma)
+int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
+			       unsigned long vm_flags)
 {
 	unsigned long hstart, hend;
 	if (!vma->anon_vma)
@@ -2083,11 +2084,11 @@ int khugepaged_enter_vma_merge(struct vm_area_struct *vma)
 	if (vma->vm_ops)
 		/* khugepaged not yet working on file or special mappings */
 		return 0;
-	VM_BUG_ON_VMA(vma->vm_flags & VM_NO_THP, vma);
+	VM_BUG_ON_VMA(vm_flags & VM_NO_THP, vma);
 	hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
 	hend = vma->vm_end & HPAGE_PMD_MASK;
 	if (hstart < hend)
-		return khugepaged_enter(vma);
+		return khugepaged_enter(vma, vm_flags);
 	return 0;
 }
 
diff --git a/mm/mmap.c b/mm/mmap.c
index 7f855206e7fb..87e82b38453c 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1080,7 +1080,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
 				end, prev->vm_pgoff, NULL);
 		if (err)
 			return NULL;
-		khugepaged_enter_vma_merge(prev);
+		khugepaged_enter_vma_merge(prev, vm_flags);
 		return prev;
 	}
 
@@ -1099,7 +1099,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
 				next->vm_pgoff - pglen, NULL);
 		if (err)
 			return NULL;
-		khugepaged_enter_vma_merge(area);
+		khugepaged_enter_vma_merge(area, vm_flags);
 		return area;
 	}
 
@@ -2208,7 +2208,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
 		}
 	}
 	vma_unlock_anon_vma(vma);
-	khugepaged_enter_vma_merge(vma);
+	khugepaged_enter_vma_merge(vma, vma->vm_flags);
 	validate_mm(vma->vm_mm);
 	return error;
 }
@@ -2277,7 +2277,7 @@ int expand_downwards(struct vm_area_struct *vma,
 		}
 	}
 	vma_unlock_anon_vma(vma);
-	khugepaged_enter_vma_merge(vma);
+	khugepaged_enter_vma_merge(vma, vma->vm_flags);
 	validate_mm(vma->vm_mm);
 	return error;
 }
-- 
cgit v1.2.3


From 3a3c02ecf7f2852f122d6d16fb9b3d9cb0c6f201 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Wed, 29 Oct 2014 14:50:46 -0700
Subject: mm: page-writeback: inline account_page_dirtied() into single caller

A follow-up patch would have changed the call signature.  To save the
trouble, just fold it instead.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: Vladimir Davydov <vdavydov@parallels.com>
Cc: <stable@vger.kernel.org>	[3.17.x]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h  |  1 -
 mm/page-writeback.c | 23 ++++-------------------
 2 files changed, 4 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 27eb1bfbe704..b46461116cd2 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1235,7 +1235,6 @@ int __set_page_dirty_no_writeback(struct page *page);
 int redirty_page_for_writepage(struct writeback_control *wbc,
 				struct page *page);
 void account_page_dirtied(struct page *page, struct address_space *mapping);
-void account_page_writeback(struct page *page);
 int set_page_dirty(struct page *page);
 int set_page_dirty_lock(struct page *page);
 int clear_page_dirty_for_io(struct page *page);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index ff24c9d83112..ff6a5b07211e 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2115,23 +2115,6 @@ void account_page_dirtied(struct page *page, struct address_space *mapping)
 }
 EXPORT_SYMBOL(account_page_dirtied);
 
-/*
- * Helper function for set_page_writeback family.
- *
- * The caller must hold mem_cgroup_begin/end_update_page_stat() lock
- * while calling this function.
- * See test_set_page_writeback for example.
- *
- * NOTE: Unlike account_page_dirtied this does not rely on being atomic
- * wrt interrupts.
- */
-void account_page_writeback(struct page *page)
-{
-	mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_WRITEBACK);
-	inc_zone_page_state(page, NR_WRITEBACK);
-}
-EXPORT_SYMBOL(account_page_writeback);
-
 /*
  * For address_spaces which do not use buffers.  Just tag the page as dirty in
  * its radix tree.
@@ -2410,8 +2393,10 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
 	} else {
 		ret = TestSetPageWriteback(page);
 	}
-	if (!ret)
-		account_page_writeback(page);
+	if (!ret) {
+		mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_WRITEBACK);
+		inc_zone_page_state(page, NR_WRITEBACK);
+	}
 	mem_cgroup_end_update_page_stat(page, &locked, &memcg_flags);
 	return ret;
 
-- 
cgit v1.2.3


From d7365e783edb858279be1d03f61bc8d5d3383d90 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Wed, 29 Oct 2014 14:50:48 -0700
Subject: mm: memcontrol: fix missed end-writeback page accounting

Commit 0a31bc97c80c ("mm: memcontrol: rewrite uncharge API") changed
page migration to uncharge the old page right away.  The page is locked,
unmapped, truncated, and off the LRU, but it could race with writeback
ending, which then doesn't unaccount the page properly:

test_clear_page_writeback()              migration
                                           wait_on_page_writeback()
  TestClearPageWriteback()
                                           mem_cgroup_migrate()
                                             clear PCG_USED
  mem_cgroup_update_page_stat()
    if (PageCgroupUsed(pc))
      decrease memcg pages under writeback

  release pc->mem_cgroup->move_lock

The per-page statistics interface is heavily optimized to avoid a
function call and a lookup_page_cgroup() in the file unmap fast path,
which means it doesn't verify whether a page is still charged before
clearing PageWriteback() and it has to do it in the stat update later.

Rework it so that it looks up the page's memcg once at the beginning of
the transaction and then uses it throughout.  The charge will be
verified before clearing PageWriteback() and migration can't uncharge
the page as long as that is still set.  The RCU lock will protect the
memcg past uncharge.

As far as losing the optimization goes, the following test results are
from a microbenchmark that maps, faults, and unmaps a 4GB sparse file
three times in a nested fashion, so that there are two negative passes
that don't account but still go through the new transaction overhead.
There is no actual difference:

 old:     33.195102545 seconds time elapsed       ( +-  0.01% )
 new:     33.199231369 seconds time elapsed       ( +-  0.03% )

The time spent in page_remove_rmap()'s callees still adds up to the
same, but the time spent in the function itself seems reduced:

     # Children      Self  Command        Shared Object       Symbol
 old:     0.12%     0.11%  filemapstress  [kernel.kallsyms]   [k] page_remove_rmap
 new:     0.12%     0.08%  filemapstress  [kernel.kallsyms]   [k] page_remove_rmap

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: Vladimir Davydov <vdavydov@parallels.com>
Cc: <stable@vger.kernel.org>	[3.17.x]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  58 ++++++++-----------------
 mm/memcontrol.c            | 105 ++++++++++++++++++++++++---------------------
 mm/page-writeback.c        |  22 +++++-----
 mm/rmap.c                  |  20 ++++-----
 4 files changed, 96 insertions(+), 109 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 19df5d857411..6b75640ef5ab 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -139,48 +139,23 @@ static inline bool mem_cgroup_disabled(void)
 	return false;
 }
 
-void __mem_cgroup_begin_update_page_stat(struct page *page, bool *locked,
-					 unsigned long *flags);
-
-extern atomic_t memcg_moving;
-
-static inline void mem_cgroup_begin_update_page_stat(struct page *page,
-					bool *locked, unsigned long *flags)
-{
-	if (mem_cgroup_disabled())
-		return;
-	rcu_read_lock();
-	*locked = false;
-	if (atomic_read(&memcg_moving))
-		__mem_cgroup_begin_update_page_stat(page, locked, flags);
-}
-
-void __mem_cgroup_end_update_page_stat(struct page *page,
-				unsigned long *flags);
-static inline void mem_cgroup_end_update_page_stat(struct page *page,
-					bool *locked, unsigned long *flags)
-{
-	if (mem_cgroup_disabled())
-		return;
-	if (*locked)
-		__mem_cgroup_end_update_page_stat(page, flags);
-	rcu_read_unlock();
-}
-
-void mem_cgroup_update_page_stat(struct page *page,
-				 enum mem_cgroup_stat_index idx,
-				 int val);
-
-static inline void mem_cgroup_inc_page_stat(struct page *page,
+struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page, bool *locked,
+					      unsigned long *flags);
+void mem_cgroup_end_page_stat(struct mem_cgroup *memcg, bool locked,
+			      unsigned long flags);
+void mem_cgroup_update_page_stat(struct mem_cgroup *memcg,
+				 enum mem_cgroup_stat_index idx, int val);
+
+static inline void mem_cgroup_inc_page_stat(struct mem_cgroup *memcg,
 					    enum mem_cgroup_stat_index idx)
 {
-	mem_cgroup_update_page_stat(page, idx, 1);
+	mem_cgroup_update_page_stat(memcg, idx, 1);
 }
 
-static inline void mem_cgroup_dec_page_stat(struct page *page,
+static inline void mem_cgroup_dec_page_stat(struct mem_cgroup *memcg,
 					    enum mem_cgroup_stat_index idx)
 {
-	mem_cgroup_update_page_stat(page, idx, -1);
+	mem_cgroup_update_page_stat(memcg, idx, -1);
 }
 
 unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
@@ -315,13 +290,14 @@ mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p)
 {
 }
 
-static inline void mem_cgroup_begin_update_page_stat(struct page *page,
+static inline struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page,
 					bool *locked, unsigned long *flags)
 {
+	return NULL;
 }
 
-static inline void mem_cgroup_end_update_page_stat(struct page *page,
-					bool *locked, unsigned long *flags)
+static inline void mem_cgroup_end_page_stat(struct mem_cgroup *memcg,
+					bool locked, unsigned long flags)
 {
 }
 
@@ -343,12 +319,12 @@ static inline bool mem_cgroup_oom_synchronize(bool wait)
 	return false;
 }
 
-static inline void mem_cgroup_inc_page_stat(struct page *page,
+static inline void mem_cgroup_inc_page_stat(struct mem_cgroup *memcg,
 					    enum mem_cgroup_stat_index idx)
 {
 }
 
-static inline void mem_cgroup_dec_page_stat(struct page *page,
+static inline void mem_cgroup_dec_page_stat(struct mem_cgroup *memcg,
 					    enum mem_cgroup_stat_index idx)
 {
 }
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 23976fd885fd..d6ac0e33e150 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1536,12 +1536,8 @@ int mem_cgroup_swappiness(struct mem_cgroup *memcg)
  *         start move here.
  */
 
-/* for quick checking without looking up memcg */
-atomic_t memcg_moving __read_mostly;
-
 static void mem_cgroup_start_move(struct mem_cgroup *memcg)
 {
-	atomic_inc(&memcg_moving);
 	atomic_inc(&memcg->moving_account);
 	synchronize_rcu();
 }
@@ -1552,10 +1548,8 @@ static void mem_cgroup_end_move(struct mem_cgroup *memcg)
 	 * Now, mem_cgroup_clear_mc() may call this function with NULL.
 	 * We check NULL in callee rather than caller.
 	 */
-	if (memcg) {
-		atomic_dec(&memcg_moving);
+	if (memcg)
 		atomic_dec(&memcg->moving_account);
-	}
 }
 
 /*
@@ -2204,41 +2198,52 @@ cleanup:
 	return true;
 }
 
-/*
- * Used to update mapped file or writeback or other statistics.
+/**
+ * mem_cgroup_begin_page_stat - begin a page state statistics transaction
+ * @page: page that is going to change accounted state
+ * @locked: &memcg->move_lock slowpath was taken
+ * @flags: IRQ-state flags for &memcg->move_lock
  *
- * Notes: Race condition
+ * This function must mark the beginning of an accounted page state
+ * change to prevent double accounting when the page is concurrently
+ * being moved to another memcg:
  *
- * Charging occurs during page instantiation, while the page is
- * unmapped and locked in page migration, or while the page table is
- * locked in THP migration.  No race is possible.
+ *   memcg = mem_cgroup_begin_page_stat(page, &locked, &flags);
+ *   if (TestClearPageState(page))
+ *     mem_cgroup_update_page_stat(memcg, state, -1);
+ *   mem_cgroup_end_page_stat(memcg, locked, flags);
  *
- * Uncharge happens to pages with zero references, no race possible.
+ * The RCU lock is held throughout the transaction.  The fast path can
+ * get away without acquiring the memcg->move_lock (@locked is false)
+ * because page moving starts with an RCU grace period.
  *
- * Charge moving between groups is protected by checking mm->moving
- * account and taking the move_lock in the slowpath.
+ * The RCU lock also protects the memcg from being freed when the page
+ * state that is going to change is the only thing preventing the page
+ * from being uncharged.  E.g. end-writeback clearing PageWriteback(),
+ * which allows migration to go ahead and uncharge the page before the
+ * account transaction might be complete.
  */
-
-void __mem_cgroup_begin_update_page_stat(struct page *page,
-				bool *locked, unsigned long *flags)
+struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page,
+					      bool *locked,
+					      unsigned long *flags)
 {
 	struct mem_cgroup *memcg;
 	struct page_cgroup *pc;
 
+	rcu_read_lock();
+
+	if (mem_cgroup_disabled())
+		return NULL;
+
 	pc = lookup_page_cgroup(page);
 again:
 	memcg = pc->mem_cgroup;
 	if (unlikely(!memcg || !PageCgroupUsed(pc)))
-		return;
-	/*
-	 * If this memory cgroup is not under account moving, we don't
-	 * need to take move_lock_mem_cgroup(). Because we already hold
-	 * rcu_read_lock(), any calls to move_account will be delayed until
-	 * rcu_read_unlock().
-	 */
-	VM_BUG_ON(!rcu_read_lock_held());
+		return NULL;
+
+	*locked = false;
 	if (atomic_read(&memcg->moving_account) <= 0)
-		return;
+		return memcg;
 
 	move_lock_mem_cgroup(memcg, flags);
 	if (memcg != pc->mem_cgroup || !PageCgroupUsed(pc)) {
@@ -2246,36 +2251,40 @@ again:
 		goto again;
 	}
 	*locked = true;
+
+	return memcg;
 }
 
-void __mem_cgroup_end_update_page_stat(struct page *page, unsigned long *flags)
+/**
+ * mem_cgroup_end_page_stat - finish a page state statistics transaction
+ * @memcg: the memcg that was accounted against
+ * @locked: value received from mem_cgroup_begin_page_stat()
+ * @flags: value received from mem_cgroup_begin_page_stat()
+ */
+void mem_cgroup_end_page_stat(struct mem_cgroup *memcg, bool locked,
+			      unsigned long flags)
 {
-	struct page_cgroup *pc = lookup_page_cgroup(page);
+	if (memcg && locked)
+		move_unlock_mem_cgroup(memcg, &flags);
 
-	/*
-	 * It's guaranteed that pc->mem_cgroup never changes while
-	 * lock is held because a routine modifies pc->mem_cgroup
-	 * should take move_lock_mem_cgroup().
-	 */
-	move_unlock_mem_cgroup(pc->mem_cgroup, flags);
+	rcu_read_unlock();
 }
 
-void mem_cgroup_update_page_stat(struct page *page,
+/**
+ * mem_cgroup_update_page_stat - update page state statistics
+ * @memcg: memcg to account against
+ * @idx: page state item to account
+ * @val: number of pages (positive or negative)
+ *
+ * See mem_cgroup_begin_page_stat() for locking requirements.
+ */
+void mem_cgroup_update_page_stat(struct mem_cgroup *memcg,
 				 enum mem_cgroup_stat_index idx, int val)
 {
-	struct mem_cgroup *memcg;
-	struct page_cgroup *pc = lookup_page_cgroup(page);
-	unsigned long uninitialized_var(flags);
-
-	if (mem_cgroup_disabled())
-		return;
-
 	VM_BUG_ON(!rcu_read_lock_held());
-	memcg = pc->mem_cgroup;
-	if (unlikely(!memcg || !PageCgroupUsed(pc)))
-		return;
 
-	this_cpu_add(memcg->stat->count[idx], val);
+	if (memcg)
+		this_cpu_add(memcg->stat->count[idx], val);
 }
 
 /*
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index ff6a5b07211e..19ceae87522d 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2327,11 +2327,12 @@ EXPORT_SYMBOL(clear_page_dirty_for_io);
 int test_clear_page_writeback(struct page *page)
 {
 	struct address_space *mapping = page_mapping(page);
-	int ret;
-	bool locked;
 	unsigned long memcg_flags;
+	struct mem_cgroup *memcg;
+	bool locked;
+	int ret;
 
-	mem_cgroup_begin_update_page_stat(page, &locked, &memcg_flags);
+	memcg = mem_cgroup_begin_page_stat(page, &locked, &memcg_flags);
 	if (mapping) {
 		struct backing_dev_info *bdi = mapping->backing_dev_info;
 		unsigned long flags;
@@ -2352,22 +2353,23 @@ int test_clear_page_writeback(struct page *page)
 		ret = TestClearPageWriteback(page);
 	}
 	if (ret) {
-		mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_WRITEBACK);
+		mem_cgroup_dec_page_stat(memcg, MEM_CGROUP_STAT_WRITEBACK);
 		dec_zone_page_state(page, NR_WRITEBACK);
 		inc_zone_page_state(page, NR_WRITTEN);
 	}
-	mem_cgroup_end_update_page_stat(page, &locked, &memcg_flags);
+	mem_cgroup_end_page_stat(memcg, locked, memcg_flags);
 	return ret;
 }
 
 int __test_set_page_writeback(struct page *page, bool keep_write)
 {
 	struct address_space *mapping = page_mapping(page);
-	int ret;
-	bool locked;
 	unsigned long memcg_flags;
+	struct mem_cgroup *memcg;
+	bool locked;
+	int ret;
 
-	mem_cgroup_begin_update_page_stat(page, &locked, &memcg_flags);
+	memcg = mem_cgroup_begin_page_stat(page, &locked, &memcg_flags);
 	if (mapping) {
 		struct backing_dev_info *bdi = mapping->backing_dev_info;
 		unsigned long flags;
@@ -2394,10 +2396,10 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
 		ret = TestSetPageWriteback(page);
 	}
 	if (!ret) {
-		mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_WRITEBACK);
+		mem_cgroup_inc_page_stat(memcg, MEM_CGROUP_STAT_WRITEBACK);
 		inc_zone_page_state(page, NR_WRITEBACK);
 	}
-	mem_cgroup_end_update_page_stat(page, &locked, &memcg_flags);
+	mem_cgroup_end_page_stat(memcg, locked, memcg_flags);
 	return ret;
 
 }
diff --git a/mm/rmap.c b/mm/rmap.c
index 116a5053415b..f574046f77d4 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1042,15 +1042,16 @@ void page_add_new_anon_rmap(struct page *page,
  */
 void page_add_file_rmap(struct page *page)
 {
-	bool locked;
+	struct mem_cgroup *memcg;
 	unsigned long flags;
+	bool locked;
 
-	mem_cgroup_begin_update_page_stat(page, &locked, &flags);
+	memcg = mem_cgroup_begin_page_stat(page, &locked, &flags);
 	if (atomic_inc_and_test(&page->_mapcount)) {
 		__inc_zone_page_state(page, NR_FILE_MAPPED);
-		mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED);
+		mem_cgroup_inc_page_stat(memcg, MEM_CGROUP_STAT_FILE_MAPPED);
 	}
-	mem_cgroup_end_update_page_stat(page, &locked, &flags);
+	mem_cgroup_end_page_stat(memcg, locked, flags);
 }
 
 /**
@@ -1061,9 +1062,10 @@ void page_add_file_rmap(struct page *page)
  */
 void page_remove_rmap(struct page *page)
 {
+	struct mem_cgroup *uninitialized_var(memcg);
 	bool anon = PageAnon(page);
-	bool locked;
 	unsigned long flags;
+	bool locked;
 
 	/*
 	 * The anon case has no mem_cgroup page_stat to update; but may
@@ -1071,7 +1073,7 @@ void page_remove_rmap(struct page *page)
 	 * we hold the lock against page_stat move: so avoid it on anon.
 	 */
 	if (!anon)
-		mem_cgroup_begin_update_page_stat(page, &locked, &flags);
+		memcg = mem_cgroup_begin_page_stat(page, &locked, &flags);
 
 	/* page still mapped by someone else? */
 	if (!atomic_add_negative(-1, &page->_mapcount))
@@ -1096,8 +1098,7 @@ void page_remove_rmap(struct page *page)
 				-hpage_nr_pages(page));
 	} else {
 		__dec_zone_page_state(page, NR_FILE_MAPPED);
-		mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED);
-		mem_cgroup_end_update_page_stat(page, &locked, &flags);
+		mem_cgroup_dec_page_stat(memcg, MEM_CGROUP_STAT_FILE_MAPPED);
 	}
 	if (unlikely(PageMlocked(page)))
 		clear_page_mlock(page);
@@ -1110,10 +1111,9 @@ void page_remove_rmap(struct page *page)
 	 * Leaving it set also helps swapoff to reinstate ptes
 	 * faster for those pages still in swapcache.
 	 */
-	return;
 out:
 	if (!anon)
-		mem_cgroup_end_update_page_stat(page, &locked, &flags);
+		mem_cgroup_end_page_stat(memcg, locked, flags);
 }
 
 /*
-- 
cgit v1.2.3


From 39bb5e62867de82b269b07df900165029b928359 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 30 Oct 2014 10:32:34 -0700
Subject: net: skb_fclone_busy() needs to detect orphaned skb

Some drivers are unable to perform TX completions in a bound time.
They instead call skb_orphan()

Problem is skb_fclone_busy() has to detect this case, otherwise
we block TCP retransmits and can freeze unlucky tcp sessions on
mostly idle hosts.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Fixes: 1f3279ae0c13 ("tcp: avoid retransmits of TCP packets hanging in host queues")
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 8 ++++++--
 net/ipv4/tcp_output.c  | 2 +-
 net/xfrm/xfrm_policy.c | 2 +-
 3 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 5884f95ff0e9..6c8b6f604e76 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -799,15 +799,19 @@ struct sk_buff_fclones {
  *	@skb: buffer
  *
  * Returns true is skb is a fast clone, and its clone is not freed.
+ * Some drivers call skb_orphan() in their ndo_start_xmit(),
+ * so we also check that this didnt happen.
  */
-static inline bool skb_fclone_busy(const struct sk_buff *skb)
+static inline bool skb_fclone_busy(const struct sock *sk,
+				   const struct sk_buff *skb)
 {
 	const struct sk_buff_fclones *fclones;
 
 	fclones = container_of(skb, struct sk_buff_fclones, skb1);
 
 	return skb->fclone == SKB_FCLONE_ORIG &&
-	       fclones->skb2.fclone == SKB_FCLONE_CLONE;
+	       fclones->skb2.fclone == SKB_FCLONE_CLONE &&
+	       fclones->skb2.sk == sk;
 }
 
 static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 3af21296d967..a3d453b94747 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2126,7 +2126,7 @@ bool tcp_schedule_loss_probe(struct sock *sk)
 static bool skb_still_in_host_queue(const struct sock *sk,
 				    const struct sk_buff *skb)
 {
-	if (unlikely(skb_fclone_busy(skb))) {
+	if (unlikely(skb_fclone_busy(sk, skb))) {
 		NET_INC_STATS_BH(sock_net(sk),
 				 LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES);
 		return true;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 4c4e457e7888..88bf289abdc9 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1962,7 +1962,7 @@ static int xdst_queue_output(struct sock *sk, struct sk_buff *skb)
 	struct xfrm_policy *pol = xdst->pols[0];
 	struct xfrm_policy_queue *pq = &pol->polq;
 
-	if (unlikely(skb_fclone_busy(skb))) {
+	if (unlikely(skb_fclone_busy(sk, skb))) {
 		kfree_skb(skb);
 		return 0;
 	}
-- 
cgit v1.2.3


From b2de525f095708b2adbadaec3f1e4017a23d1e09 Mon Sep 17 00:00:00 2001
From: David Jeffery <djeffery@redhat.com>
Date: Mon, 29 Sep 2014 10:21:10 -0400
Subject: Return short read or 0 at end of a raw device, not EIO

Author: David Jeffery <djeffery@redhat.com>
Changes to the basic direct I/O code have broken the raw driver when reading
to the end of a raw device.  Instead of returning a short read for a read that
extends partially beyond the device's end or 0 when at the end of the device,
these reads now return EIO.

The raw driver needs the same end of device handling as was added for normal
block devices.  Using blkdev_read_iter, which has the needed size checks,
prevents the EIO conditions at the end of the device.

Signed-off-by: David Jeffery <djeffery@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/char/raw.c | 2 +-
 fs/block_dev.c     | 3 ++-
 include/linux/fs.h | 1 +
 3 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/raw.c b/drivers/char/raw.c
index 0102dc788608..a24891b97547 100644
--- a/drivers/char/raw.c
+++ b/drivers/char/raw.c
@@ -285,7 +285,7 @@ static long raw_ctl_compat_ioctl(struct file *file, unsigned int cmd,
 
 static const struct file_operations raw_fops = {
 	.read		= new_sync_read,
-	.read_iter	= generic_file_read_iter,
+	.read_iter	= blkdev_read_iter,
 	.write		= new_sync_write,
 	.write_iter	= blkdev_write_iter,
 	.fsync		= blkdev_fsync,
diff --git a/fs/block_dev.c b/fs/block_dev.c
index cc9d4114cda0..1d9c9f3754f8 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1585,7 +1585,7 @@ ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
 }
 EXPORT_SYMBOL_GPL(blkdev_write_iter);
 
-static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
+ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *bd_inode = file->f_mapping->host;
@@ -1599,6 +1599,7 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
 	iov_iter_truncate(to, size);
 	return generic_file_read_iter(iocb, to);
 }
+EXPORT_SYMBOL_GPL(blkdev_read_iter);
 
 /*
  * Try to release a page associated with block device when the system
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 01036262095f..9ab779e8a63c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2469,6 +2469,7 @@ extern ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, lo
 extern ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos);
 
 /* fs/block_dev.c */
+extern ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to);
 extern ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from);
 extern int blkdev_fsync(struct file *filp, loff_t start, loff_t end,
 			int datasync);
-- 
cgit v1.2.3


From a87fa1d81a9fb5e9adca9820e16008c40ad09f33 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@linaro.org>
Date: Mon, 3 Nov 2014 15:15:35 +0000
Subject: of: Fix overflow bug in string property parsing functions

The string property read helpers will run off the end of the buffer if
it is handed a malformed string property. Rework the parsers to make
sure that doesn't happen. At the same time add new test cases to make
sure the functions behave themselves.

The original implementations of of_property_read_string_index() and
of_property_count_strings() both open-coded the same block of parsing
code, each with it's own subtly different bugs. The fix here merges
functions into a single helper and makes the original functions static
inline wrappers around the helper.

One non-bugfix aspect of this patch is the addition of a new wrapper,
of_property_read_string_array(). The new wrapper is needed by the
device_properties feature that Rafael is working on and planning to
merge for v3.19. The implementation is identical both with and without
the new static inline wrapper, so it just got left in to reduce the
churn on the header file.

Signed-off-by: Grant Likely <grant.likely@linaro.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Mika Westerberg <mika.westerberg@linux.intel.com>
Cc: Rob Herring <robh+dt@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Darren Hart <darren.hart@intel.com>
Cc: <stable@vger.kernel.org>  # v3.3+: Drop selftest hunks that don't apply
---
 drivers/of/base.c                           | 88 ++++++++---------------------
 drivers/of/selftest.c                       | 66 ++++++++++++++++++++--
 drivers/of/testcase-data/tests-phandle.dtsi |  2 +
 include/linux/of.h                          | 84 ++++++++++++++++++++++-----
 4 files changed, 154 insertions(+), 86 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/of/base.c b/drivers/of/base.c
index 2305dc0382bc..3823edf2d012 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -1279,52 +1279,6 @@ int of_property_read_string(struct device_node *np, const char *propname,
 }
 EXPORT_SYMBOL_GPL(of_property_read_string);
 
-/**
- * of_property_read_string_index - Find and read a string from a multiple
- * strings property.
- * @np:		device node from which the property value is to be read.
- * @propname:	name of the property to be searched.
- * @index:	index of the string in the list of strings
- * @out_string:	pointer to null terminated return string, modified only if
- *		return value is 0.
- *
- * Search for a property in a device tree node and retrieve a null
- * terminated string value (pointer to data, not a copy) in the list of strings
- * contained in that property.
- * Returns 0 on success, -EINVAL if the property does not exist, -ENODATA if
- * property does not have a value, and -EILSEQ if the string is not
- * null-terminated within the length of the property data.
- *
- * The out_string pointer is modified only if a valid string can be decoded.
- */
-int of_property_read_string_index(struct device_node *np, const char *propname,
-				  int index, const char **output)
-{
-	struct property *prop = of_find_property(np, propname, NULL);
-	int i = 0;
-	size_t l = 0, total = 0;
-	const char *p;
-
-	if (!prop)
-		return -EINVAL;
-	if (!prop->value)
-		return -ENODATA;
-	if (strnlen(prop->value, prop->length) >= prop->length)
-		return -EILSEQ;
-
-	p = prop->value;
-
-	for (i = 0; total < prop->length; total += l, p += l) {
-		l = strlen(p) + 1;
-		if (i++ == index) {
-			*output = p;
-			return 0;
-		}
-	}
-	return -ENODATA;
-}
-EXPORT_SYMBOL_GPL(of_property_read_string_index);
-
 /**
  * of_property_match_string() - Find string in a list and return index
  * @np: pointer to node containing string list property
@@ -1351,7 +1305,7 @@ int of_property_match_string(struct device_node *np, const char *propname,
 	end = p + prop->length;
 
 	for (i = 0; p < end; i++, p += l) {
-		l = strlen(p) + 1;
+		l = strnlen(p, end - p) + 1;
 		if (p + l > end)
 			return -EILSEQ;
 		pr_debug("comparing %s with %s\n", string, p);
@@ -1363,39 +1317,41 @@ int of_property_match_string(struct device_node *np, const char *propname,
 EXPORT_SYMBOL_GPL(of_property_match_string);
 
 /**
- * of_property_count_strings - Find and return the number of strings from a
- * multiple strings property.
+ * of_property_read_string_util() - Utility helper for parsing string properties
  * @np:		device node from which the property value is to be read.
  * @propname:	name of the property to be searched.
+ * @out_strs:	output array of string pointers.
+ * @sz:		number of array elements to read.
+ * @skip:	Number of strings to skip over at beginning of list.
  *
- * Search for a property in a device tree node and retrieve the number of null
- * terminated string contain in it. Returns the number of strings on
- * success, -EINVAL if the property does not exist, -ENODATA if property
- * does not have a value, and -EILSEQ if the string is not null-terminated
- * within the length of the property data.
+ * Don't call this function directly. It is a utility helper for the
+ * of_property_read_string*() family of functions.
  */
-int of_property_count_strings(struct device_node *np, const char *propname)
+int of_property_read_string_helper(struct device_node *np, const char *propname,
+				   const char **out_strs, size_t sz, int skip)
 {
 	struct property *prop = of_find_property(np, propname, NULL);
-	int i = 0;
-	size_t l = 0, total = 0;
-	const char *p;
+	int l = 0, i = 0;
+	const char *p, *end;
 
 	if (!prop)
 		return -EINVAL;
 	if (!prop->value)
 		return -ENODATA;
-	if (strnlen(prop->value, prop->length) >= prop->length)
-		return -EILSEQ;
-
 	p = prop->value;
+	end = p + prop->length;
 
-	for (i = 0; total < prop->length; total += l, p += l, i++)
-		l = strlen(p) + 1;
-
-	return i;
+	for (i = 0; p < end && (!out_strs || i < skip + sz); i++, p += l) {
+		l = strnlen(p, end - p) + 1;
+		if (p + l > end)
+			return -EILSEQ;
+		if (out_strs && i >= skip)
+			*out_strs++ = p;
+	}
+	i -= skip;
+	return i <= 0 ? -ENODATA : i;
 }
-EXPORT_SYMBOL_GPL(of_property_count_strings);
+EXPORT_SYMBOL_GPL(of_property_read_string_helper);
 
 void of_print_phandle_args(const char *msg, const struct of_phandle_args *args)
 {
diff --git a/drivers/of/selftest.c b/drivers/of/selftest.c
index 78001270a598..11b873c54a77 100644
--- a/drivers/of/selftest.c
+++ b/drivers/of/selftest.c
@@ -339,8 +339,9 @@ static void __init of_selftest_parse_phandle_with_args(void)
 	selftest(rc == -EINVAL, "expected:%i got:%i\n", -EINVAL, rc);
 }
 
-static void __init of_selftest_property_match_string(void)
+static void __init of_selftest_property_string(void)
 {
+	const char *strings[4];
 	struct device_node *np;
 	int rc;
 
@@ -357,13 +358,66 @@ static void __init of_selftest_property_match_string(void)
 	rc = of_property_match_string(np, "phandle-list-names", "third");
 	selftest(rc == 2, "third expected:0 got:%i\n", rc);
 	rc = of_property_match_string(np, "phandle-list-names", "fourth");
-	selftest(rc == -ENODATA, "unmatched string; rc=%i", rc);
+	selftest(rc == -ENODATA, "unmatched string; rc=%i\n", rc);
 	rc = of_property_match_string(np, "missing-property", "blah");
-	selftest(rc == -EINVAL, "missing property; rc=%i", rc);
+	selftest(rc == -EINVAL, "missing property; rc=%i\n", rc);
 	rc = of_property_match_string(np, "empty-property", "blah");
-	selftest(rc == -ENODATA, "empty property; rc=%i", rc);
+	selftest(rc == -ENODATA, "empty property; rc=%i\n", rc);
 	rc = of_property_match_string(np, "unterminated-string", "blah");
-	selftest(rc == -EILSEQ, "unterminated string; rc=%i", rc);
+	selftest(rc == -EILSEQ, "unterminated string; rc=%i\n", rc);
+
+	/* of_property_count_strings() tests */
+	rc = of_property_count_strings(np, "string-property");
+	selftest(rc == 1, "Incorrect string count; rc=%i\n", rc);
+	rc = of_property_count_strings(np, "phandle-list-names");
+	selftest(rc == 3, "Incorrect string count; rc=%i\n", rc);
+	rc = of_property_count_strings(np, "unterminated-string");
+	selftest(rc == -EILSEQ, "unterminated string; rc=%i\n", rc);
+	rc = of_property_count_strings(np, "unterminated-string-list");
+	selftest(rc == -EILSEQ, "unterminated string array; rc=%i\n", rc);
+
+	/* of_property_read_string_index() tests */
+	rc = of_property_read_string_index(np, "string-property", 0, strings);
+	selftest(rc == 0 && !strcmp(strings[0], "foobar"), "of_property_read_string_index() failure; rc=%i\n", rc);
+	strings[0] = NULL;
+	rc = of_property_read_string_index(np, "string-property", 1, strings);
+	selftest(rc == -ENODATA && strings[0] == NULL, "of_property_read_string_index() failure; rc=%i\n", rc);
+	rc = of_property_read_string_index(np, "phandle-list-names", 0, strings);
+	selftest(rc == 0 && !strcmp(strings[0], "first"), "of_property_read_string_index() failure; rc=%i\n", rc);
+	rc = of_property_read_string_index(np, "phandle-list-names", 1, strings);
+	selftest(rc == 0 && !strcmp(strings[0], "second"), "of_property_read_string_index() failure; rc=%i\n", rc);
+	rc = of_property_read_string_index(np, "phandle-list-names", 2, strings);
+	selftest(rc == 0 && !strcmp(strings[0], "third"), "of_property_read_string_index() failure; rc=%i\n", rc);
+	strings[0] = NULL;
+	rc = of_property_read_string_index(np, "phandle-list-names", 3, strings);
+	selftest(rc == -ENODATA && strings[0] == NULL, "of_property_read_string_index() failure; rc=%i\n", rc);
+	strings[0] = NULL;
+	rc = of_property_read_string_index(np, "unterminated-string", 0, strings);
+	selftest(rc == -EILSEQ && strings[0] == NULL, "of_property_read_string_index() failure; rc=%i\n", rc);
+	rc = of_property_read_string_index(np, "unterminated-string-list", 0, strings);
+	selftest(rc == 0 && !strcmp(strings[0], "first"), "of_property_read_string_index() failure; rc=%i\n", rc);
+	strings[0] = NULL;
+	rc = of_property_read_string_index(np, "unterminated-string-list", 2, strings); /* should fail */
+	selftest(rc == -EILSEQ && strings[0] == NULL, "of_property_read_string_index() failure; rc=%i\n", rc);
+	strings[1] = NULL;
+
+	/* of_property_read_string_array() tests */
+	rc = of_property_read_string_array(np, "string-property", strings, 4);
+	selftest(rc == 1, "Incorrect string count; rc=%i\n", rc);
+	rc = of_property_read_string_array(np, "phandle-list-names", strings, 4);
+	selftest(rc == 3, "Incorrect string count; rc=%i\n", rc);
+	rc = of_property_read_string_array(np, "unterminated-string", strings, 4);
+	selftest(rc == -EILSEQ, "unterminated string; rc=%i\n", rc);
+	/* -- An incorrectly formed string should cause a failure */
+	rc = of_property_read_string_array(np, "unterminated-string-list", strings, 4);
+	selftest(rc == -EILSEQ, "unterminated string array; rc=%i\n", rc);
+	/* -- parsing the correctly formed strings should still work: */
+	strings[2] = NULL;
+	rc = of_property_read_string_array(np, "unterminated-string-list", strings, 2);
+	selftest(rc == 2 && strings[2] == NULL, "of_property_read_string_array() failure; rc=%i\n", rc);
+	strings[1] = NULL;
+	rc = of_property_read_string_array(np, "phandle-list-names", strings, 1);
+	selftest(rc == 1 && strings[1] == NULL, "Overwrote end of string array; rc=%i, str='%s'\n", rc, strings[1]);
 }
 
 #define propcmp(p1, p2) (((p1)->length == (p2)->length) && \
@@ -881,7 +935,7 @@ static int __init of_selftest(void)
 	of_selftest_find_node_by_name();
 	of_selftest_dynamic();
 	of_selftest_parse_phandle_with_args();
-	of_selftest_property_match_string();
+	of_selftest_property_string();
 	of_selftest_property_copy();
 	of_selftest_changeset();
 	of_selftest_parse_interrupts();
diff --git a/drivers/of/testcase-data/tests-phandle.dtsi b/drivers/of/testcase-data/tests-phandle.dtsi
index ce0fe083d406..5b1527e8a7fb 100644
--- a/drivers/of/testcase-data/tests-phandle.dtsi
+++ b/drivers/of/testcase-data/tests-phandle.dtsi
@@ -39,7 +39,9 @@
 				phandle-list-bad-args = <&provider2 1 0>,
 							<&provider3 0>;
 				empty-property;
+				string-property = "foobar";
 				unterminated-string = [40 41 42 43];
+				unterminated-string-list = "first", "second", [40 41 42 43];
 			};
 		};
 	};
diff --git a/include/linux/of.h b/include/linux/of.h
index 6545e7aec7bb..29f0adc5f3e4 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -267,14 +267,12 @@ extern int of_property_read_u64(const struct device_node *np,
 extern int of_property_read_string(struct device_node *np,
 				   const char *propname,
 				   const char **out_string);
-extern int of_property_read_string_index(struct device_node *np,
-					 const char *propname,
-					 int index, const char **output);
 extern int of_property_match_string(struct device_node *np,
 				    const char *propname,
 				    const char *string);
-extern int of_property_count_strings(struct device_node *np,
-				     const char *propname);
+extern int of_property_read_string_helper(struct device_node *np,
+					      const char *propname,
+					      const char **out_strs, size_t sz, int index);
 extern int of_device_is_compatible(const struct device_node *device,
 				   const char *);
 extern int of_device_is_available(const struct device_node *device);
@@ -486,15 +484,9 @@ static inline int of_property_read_string(struct device_node *np,
 	return -ENOSYS;
 }
 
-static inline int of_property_read_string_index(struct device_node *np,
-						const char *propname, int index,
-						const char **out_string)
-{
-	return -ENOSYS;
-}
-
-static inline int of_property_count_strings(struct device_node *np,
-					    const char *propname)
+static inline int of_property_read_string_helper(struct device_node *np,
+						 const char *propname,
+						 const char **out_strs, size_t sz, int index)
 {
 	return -ENOSYS;
 }
@@ -667,6 +659,70 @@ static inline int of_property_count_u64_elems(const struct device_node *np,
 	return of_property_count_elems_of_size(np, propname, sizeof(u64));
 }
 
+/**
+ * of_property_read_string_array() - Read an array of strings from a multiple
+ * strings property.
+ * @np:		device node from which the property value is to be read.
+ * @propname:	name of the property to be searched.
+ * @out_strs:	output array of string pointers.
+ * @sz:		number of array elements to read.
+ *
+ * Search for a property in a device tree node and retrieve a list of
+ * terminated string values (pointer to data, not a copy) in that property.
+ *
+ * If @out_strs is NULL, the number of strings in the property is returned.
+ */
+static inline int of_property_read_string_array(struct device_node *np,
+						const char *propname, const char **out_strs,
+						size_t sz)
+{
+	return of_property_read_string_helper(np, propname, out_strs, sz, 0);
+}
+
+/**
+ * of_property_count_strings() - Find and return the number of strings from a
+ * multiple strings property.
+ * @np:		device node from which the property value is to be read.
+ * @propname:	name of the property to be searched.
+ *
+ * Search for a property in a device tree node and retrieve the number of null
+ * terminated string contain in it. Returns the number of strings on
+ * success, -EINVAL if the property does not exist, -ENODATA if property
+ * does not have a value, and -EILSEQ if the string is not null-terminated
+ * within the length of the property data.
+ */
+static inline int of_property_count_strings(struct device_node *np,
+					    const char *propname)
+{
+	return of_property_read_string_helper(np, propname, NULL, 0, 0);
+}
+
+/**
+ * of_property_read_string_index() - Find and read a string from a multiple
+ * strings property.
+ * @np:		device node from which the property value is to be read.
+ * @propname:	name of the property to be searched.
+ * @index:	index of the string in the list of strings
+ * @out_string:	pointer to null terminated return string, modified only if
+ *		return value is 0.
+ *
+ * Search for a property in a device tree node and retrieve a null
+ * terminated string value (pointer to data, not a copy) in the list of strings
+ * contained in that property.
+ * Returns 0 on success, -EINVAL if the property does not exist, -ENODATA if
+ * property does not have a value, and -EILSEQ if the string is not
+ * null-terminated within the length of the property data.
+ *
+ * The out_string pointer is modified only if a valid string can be decoded.
+ */
+static inline int of_property_read_string_index(struct device_node *np,
+						const char *propname,
+						int index, const char **output)
+{
+	int rc = of_property_read_string_helper(np, propname, output, 1, index);
+	return rc < 0 ? rc : 0;
+}
+
 /**
  * of_property_read_bool - Findfrom a property
  * @np:		device node from which the property value is to be read.
-- 
cgit v1.2.3


From 32f638fc11db0526c706454d9ab4339d55ac89f3 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Thu, 30 Oct 2014 10:17:25 -0600
Subject: PCI: Don't oops on virtual buses in acpi_pci_get_bridge_handle()

acpi_pci_get_bridge_handle() returns the ACPI handle for the bridge device
(either a host bridge or a PCI-to-PCI bridge) leading to a PCI bus.  But
SR-IOV virtual functions can be on a virtual bus with no bridge leading to
it.  Return a NULL acpi_handle in this case instead of trying to
dereference the NULL pointer to the bridge.

This fixes a NULL pointer dereference oops in pci_get_hp_params() when
adding SR-IOV VF devices on virtual buses.

[bhelgaas: changelog, add comment in code]
Fixes: 6cd33649fa83 ("PCI: Add pci_configure_device() during enumeration")
Link: https://bugzilla.kernel.org/show_bug.cgi?id=87591
Reported-by: Chao Zhou <chao.zhou@intel.com>
Reported-by: Joerg Roedel <joro@8bytes.org>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 include/linux/pci-acpi.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h
index 64dacb7288a6..24c7728ca681 100644
--- a/include/linux/pci-acpi.h
+++ b/include/linux/pci-acpi.h
@@ -41,8 +41,13 @@ static inline acpi_handle acpi_pci_get_bridge_handle(struct pci_bus *pbus)
 
 	if (pci_is_root_bus(pbus))
 		dev = pbus->bridge;
-	else
+	else {
+		/* If pbus is a virtual bus, there is no bridge to it */
+		if (!pbus->self)
+			return NULL;
+
 		dev = &pbus->self->dev;
+	}
 
 	return ACPI_HANDLE(dev);
 }
-- 
cgit v1.2.3


From 9cdb5dbf79f4e8f43e19ab7f4ec9ed74c146f0af Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Wed, 5 Nov 2014 21:44:27 +0100
Subject: include/linux/socket.h: Fix comment

File descriptors are always closed on exit :-)

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/socket.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/socket.h b/include/linux/socket.h
index ec538fc287a6..bb9b83640070 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -256,7 +256,7 @@ struct ucred {
 #define MSG_EOF         MSG_FIN
 
 #define MSG_FASTOPEN	0x20000000	/* Send data in TCP SYN */
-#define MSG_CMSG_CLOEXEC 0x40000000	/* Set close_on_exit for file
+#define MSG_CMSG_CLOEXEC 0x40000000	/* Set close_on_exec for file
 					   descriptor received through
 					   SCM_RIGHTS */
 #if defined(CONFIG_COMPAT)
-- 
cgit v1.2.3


From c16561e8df7a64764ef61f02221e98273add325a Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Thu, 6 Nov 2014 00:37:08 +0100
Subject: PM / Domains: Change prototype for the attach and detach callbacks

Convert the prototypes to return an int in order to support error
handling in these callbacks.

Also, as suggested by Dmitry Torokhov, pass the domain pointer for use
inside the callbacks, and so that they match the existing
power_on/power_off callbacks which currently take the domain pointer.

Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Acked-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
[ khilman: added domain as parameter to callbacks, as suggested by Dmitry ]
Signed-off-by: Kevin Hilman <khilman@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 4 ++--
 include/linux/pm_domain.h   | 6 ++++--
 2 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 40bc2f4072cc..b520687046d4 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -1437,7 +1437,7 @@ int __pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev,
 	spin_unlock_irq(&dev->power.lock);
 
 	if (genpd->attach_dev)
-		genpd->attach_dev(dev);
+		genpd->attach_dev(genpd, dev);
 
 	mutex_lock(&gpd_data->lock);
 	gpd_data->base.dev = dev;
@@ -1499,7 +1499,7 @@ int pm_genpd_remove_device(struct generic_pm_domain *genpd,
 	genpd->max_off_time_changed = true;
 
 	if (genpd->detach_dev)
-		genpd->detach_dev(dev);
+		genpd->detach_dev(genpd, dev);
 
 	spin_lock_irq(&dev->power.lock);
 
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 73e938b7e937..b3ed7766a291 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -72,8 +72,10 @@ struct generic_pm_domain {
 	bool max_off_time_changed;
 	bool cached_power_down_ok;
 	struct gpd_cpuidle_data *cpuidle_data;
-	void (*attach_dev)(struct device *dev);
-	void (*detach_dev)(struct device *dev);
+	int (*attach_dev)(struct generic_pm_domain *domain,
+			  struct device *dev);
+	void (*detach_dev)(struct generic_pm_domain *domain,
+			   struct device *dev);
 };
 
 static inline struct generic_pm_domain *pd_to_genpd(struct dev_pm_domain *pd)
-- 
cgit v1.2.3


From c0acb8144bd6d8d88aee1dab33364b7353e9a903 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Date: Fri, 10 Oct 2014 12:48:35 +0200
Subject: mfd: max77693: Fix always masked MUIC interrupts

All interrupts coming from MUIC were ignored because interrupt source
register was masked.

The Maxim 77693 has a "interrupt source" - a separate register and interrupts
which give information about PMIC block triggering the individual
interrupt (charger, topsys, MUIC, flash LED).

By default bootloader could initialize this register to "mask all"
value. In such case (observed on Trats2 board) MUIC interrupts won't be
generated regardless of their mask status. Regmap irq chip was unmasking
individual MUIC interrupts but the source was masked

Before introducing regmap irq chip this interrupt source was unmasked,
read and acked. Reading and acking is not necessary but unmasking is.

Fixes: 342d669c1ee4 ("mfd: max77693: Handle IRQs using regmap")

Cc: <stable@vger.kernel.org>
Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Reviewed-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/max77693.c               | 12 ++++++++++++
 include/linux/mfd/max77693-private.h |  7 +++++++
 2 files changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mfd/max77693.c b/drivers/mfd/max77693.c
index 22d23fcbb65c..711773e8e64b 100644
--- a/drivers/mfd/max77693.c
+++ b/drivers/mfd/max77693.c
@@ -250,6 +250,17 @@ static int max77693_i2c_probe(struct i2c_client *i2c,
 		goto err_irq_muic;
 	}
 
+	/* Unmask interrupts from all blocks in interrupt source register */
+	ret = regmap_update_bits(max77693->regmap,
+				MAX77693_PMIC_REG_INTSRC_MASK,
+				SRC_IRQ_ALL, (unsigned int)~SRC_IRQ_ALL);
+	if (ret < 0) {
+		dev_err(max77693->dev,
+			"Could not unmask interrupts in INTSRC: %d\n",
+			ret);
+		goto err_intsrc;
+	}
+
 	pm_runtime_set_active(max77693->dev);
 
 	ret = mfd_add_devices(max77693->dev, -1, max77693_devs,
@@ -261,6 +272,7 @@ static int max77693_i2c_probe(struct i2c_client *i2c,
 
 err_mfd:
 	mfd_remove_devices(max77693->dev);
+err_intsrc:
 	regmap_del_irq_chip(max77693->irq, max77693->irq_data_muic);
 err_irq_muic:
 	regmap_del_irq_chip(max77693->irq, max77693->irq_data_charger);
diff --git a/include/linux/mfd/max77693-private.h b/include/linux/mfd/max77693-private.h
index fc17d56581b2..582e67f34054 100644
--- a/include/linux/mfd/max77693-private.h
+++ b/include/linux/mfd/max77693-private.h
@@ -330,6 +330,13 @@ enum max77693_irq_source {
 	MAX77693_IRQ_GROUP_NR,
 };
 
+#define SRC_IRQ_CHARGER			BIT(0)
+#define SRC_IRQ_TOP			BIT(1)
+#define SRC_IRQ_FLASH			BIT(2)
+#define SRC_IRQ_MUIC			BIT(3)
+#define SRC_IRQ_ALL			(SRC_IRQ_CHARGER | SRC_IRQ_TOP \
+						| SRC_IRQ_FLASH | SRC_IRQ_MUIC)
+
 #define LED_IRQ_FLED2_OPEN		BIT(0)
 #define LED_IRQ_FLED2_SHORT		BIT(1)
 #define LED_IRQ_FLED1_OPEN		BIT(2)
-- 
cgit v1.2.3


From e30f53aad2202b5526c40c36d8eeac8bf290bde5 Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin@rab.in>
Date: Mon, 10 Nov 2014 19:46:34 +0100
Subject: tracing: Do not busy wait in buffer splice

On a !PREEMPT kernel, attempting to use trace-cmd results in a soft
lockup:

 # trace-cmd record -e raw_syscalls:* -F false
 NMI watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [trace-cmd:61]
 ...
 Call Trace:
  [<ffffffff8105b580>] ? __wake_up_common+0x90/0x90
  [<ffffffff81092e25>] wait_on_pipe+0x35/0x40
  [<ffffffff810936e3>] tracing_buffers_splice_read+0x2e3/0x3c0
  [<ffffffff81093300>] ? tracing_stats_read+0x2a0/0x2a0
  [<ffffffff812d10ab>] ? _raw_spin_unlock+0x2b/0x40
  [<ffffffff810dc87b>] ? do_read_fault+0x21b/0x290
  [<ffffffff810de56a>] ? handle_mm_fault+0x2ba/0xbd0
  [<ffffffff81095c80>] ? trace_event_buffer_lock_reserve+0x40/0x80
  [<ffffffff810951e2>] ? trace_buffer_lock_reserve+0x22/0x60
  [<ffffffff81095c80>] ? trace_event_buffer_lock_reserve+0x40/0x80
  [<ffffffff8112415d>] do_splice_to+0x6d/0x90
  [<ffffffff81126971>] SyS_splice+0x7c1/0x800
  [<ffffffff812d1edd>] tracesys_phase2+0xd3/0xd8

The problem is this: tracing_buffers_splice_read() calls
ring_buffer_wait() to wait for data in the ring buffers.  The buffers
are not empty so ring_buffer_wait() returns immediately.  But
tracing_buffers_splice_read() calls ring_buffer_read_page() with full=1,
meaning it only wants to read a full page.  When the full page is not
available, tracing_buffers_splice_read() tries to wait again with
ring_buffer_wait(), which again returns immediately, and so on.

Fix this by adding a "full" argument to ring_buffer_wait() which will
make ring_buffer_wait() wait until the writer has left the reader's
page, i.e.  until full-page reads will succeed.

Link: http://lkml.kernel.org/r/1415645194-25379-1-git-send-email-rabin@rab.in

Cc: stable@vger.kernel.org # 3.16+
Fixes: b1169cc69ba9 ("tracing: Remove mock up poll wait function")
Signed-off-by: Rabin Vincent <rabin@rab.in>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ring_buffer.h |  2 +-
 kernel/trace/ring_buffer.c  | 81 ++++++++++++++++++++++++++++++---------------
 kernel/trace/trace.c        | 23 ++++---------
 3 files changed, 62 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 49a4d6f59108..e2c13cd863bd 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -97,7 +97,7 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k
 	__ring_buffer_alloc((size), (flags), &__key);	\
 })
 
-int ring_buffer_wait(struct ring_buffer *buffer, int cpu);
+int ring_buffer_wait(struct ring_buffer *buffer, int cpu, bool full);
 int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu,
 			  struct file *filp, poll_table *poll_table);
 
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 2d75c94ae87d..a56e07c8d15b 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -538,16 +538,18 @@ static void rb_wake_up_waiters(struct irq_work *work)
  * ring_buffer_wait - wait for input to the ring buffer
  * @buffer: buffer to wait on
  * @cpu: the cpu buffer to wait on
+ * @full: wait until a full page is available, if @cpu != RING_BUFFER_ALL_CPUS
  *
  * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon
  * as data is added to any of the @buffer's cpu buffers. Otherwise
  * it will wait for data to be added to a specific cpu buffer.
  */
-int ring_buffer_wait(struct ring_buffer *buffer, int cpu)
+int ring_buffer_wait(struct ring_buffer *buffer, int cpu, bool full)
 {
-	struct ring_buffer_per_cpu *cpu_buffer;
+	struct ring_buffer_per_cpu *uninitialized_var(cpu_buffer);
 	DEFINE_WAIT(wait);
 	struct rb_irq_work *work;
+	int ret = 0;
 
 	/*
 	 * Depending on what the caller is waiting for, either any
@@ -564,36 +566,61 @@ int ring_buffer_wait(struct ring_buffer *buffer, int cpu)
 	}
 
 
-	prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE);
+	while (true) {
+		prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE);
 
-	/*
-	 * The events can happen in critical sections where
-	 * checking a work queue can cause deadlocks.
-	 * After adding a task to the queue, this flag is set
-	 * only to notify events to try to wake up the queue
-	 * using irq_work.
-	 *
-	 * We don't clear it even if the buffer is no longer
-	 * empty. The flag only causes the next event to run
-	 * irq_work to do the work queue wake up. The worse
-	 * that can happen if we race with !trace_empty() is that
-	 * an event will cause an irq_work to try to wake up
-	 * an empty queue.
-	 *
-	 * There's no reason to protect this flag either, as
-	 * the work queue and irq_work logic will do the necessary
-	 * synchronization for the wake ups. The only thing
-	 * that is necessary is that the wake up happens after
-	 * a task has been queued. It's OK for spurious wake ups.
-	 */
-	work->waiters_pending = true;
+		/*
+		 * The events can happen in critical sections where
+		 * checking a work queue can cause deadlocks.
+		 * After adding a task to the queue, this flag is set
+		 * only to notify events to try to wake up the queue
+		 * using irq_work.
+		 *
+		 * We don't clear it even if the buffer is no longer
+		 * empty. The flag only causes the next event to run
+		 * irq_work to do the work queue wake up. The worse
+		 * that can happen if we race with !trace_empty() is that
+		 * an event will cause an irq_work to try to wake up
+		 * an empty queue.
+		 *
+		 * There's no reason to protect this flag either, as
+		 * the work queue and irq_work logic will do the necessary
+		 * synchronization for the wake ups. The only thing
+		 * that is necessary is that the wake up happens after
+		 * a task has been queued. It's OK for spurious wake ups.
+		 */
+		work->waiters_pending = true;
+
+		if (signal_pending(current)) {
+			ret = -EINTR;
+			break;
+		}
+
+		if (cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer))
+			break;
+
+		if (cpu != RING_BUFFER_ALL_CPUS &&
+		    !ring_buffer_empty_cpu(buffer, cpu)) {
+			unsigned long flags;
+			bool pagebusy;
+
+			if (!full)
+				break;
+
+			raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+			pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
+			raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+
+			if (!pagebusy)
+				break;
+		}
 
-	if ((cpu == RING_BUFFER_ALL_CPUS && ring_buffer_empty(buffer)) ||
-	    (cpu != RING_BUFFER_ALL_CPUS && ring_buffer_empty_cpu(buffer, cpu)))
 		schedule();
+	}
 
 	finish_wait(&work->waiters, &wait);
-	return 0;
+
+	return ret;
 }
 
 /**
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 8a528392b1f4..15209335888d 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1076,13 +1076,14 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
 }
 #endif /* CONFIG_TRACER_MAX_TRACE */
 
-static int wait_on_pipe(struct trace_iterator *iter)
+static int wait_on_pipe(struct trace_iterator *iter, bool full)
 {
 	/* Iterators are static, they should be filled or empty */
 	if (trace_buffer_iter(iter, iter->cpu_file))
 		return 0;
 
-	return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file);
+	return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
+				full);
 }
 
 #ifdef CONFIG_FTRACE_STARTUP_TEST
@@ -4434,15 +4435,12 @@ static int tracing_wait_pipe(struct file *filp)
 
 		mutex_unlock(&iter->mutex);
 
-		ret = wait_on_pipe(iter);
+		ret = wait_on_pipe(iter, false);
 
 		mutex_lock(&iter->mutex);
 
 		if (ret)
 			return ret;
-
-		if (signal_pending(current))
-			return -EINTR;
 	}
 
 	return 1;
@@ -5372,16 +5370,12 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
 				goto out_unlock;
 			}
 			mutex_unlock(&trace_types_lock);
-			ret = wait_on_pipe(iter);
+			ret = wait_on_pipe(iter, false);
 			mutex_lock(&trace_types_lock);
 			if (ret) {
 				size = ret;
 				goto out_unlock;
 			}
-			if (signal_pending(current)) {
-				size = -EINTR;
-				goto out_unlock;
-			}
 			goto again;
 		}
 		size = 0;
@@ -5587,14 +5581,11 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
 			goto out;
 		}
 		mutex_unlock(&trace_types_lock);
-		ret = wait_on_pipe(iter);
+		ret = wait_on_pipe(iter, true);
 		mutex_lock(&trace_types_lock);
 		if (ret)
 			goto out;
-		if (signal_pending(current)) {
-			ret = -EINTR;
-			goto out;
-		}
+
 		goto again;
 	}
 
-- 
cgit v1.2.3


From 67732cd34382066ae5df313b6dad65ab14b9735f Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Tue, 11 Nov 2014 11:07:08 +0100
Subject: PM / Domains: Fix initial default state of the need_restore flag

The initial state of the device's need_restore flag should'nt depend on
the current state of the PM domain. For example it should be perfectly
valid to attach an inactive device to a powered PM domain.

The pm_genpd_dev_need_restore() API allow us to update the need_restore
flag to somewhat cope with such scenarios. Typically that should have
been done from drivers/buses ->probe() since it's those that put the
requirements on the value of the need_restore flag.

Until recently, the Exynos SOCs were the only user of the
pm_genpd_dev_need_restore() API, though invoking it from a centralized
location while adding devices to their PM domains.

Due to that Exynos now have swithed to the generic OF-based PM domain
look-up, it's no longer possible to invoke the API from a centralized
location. The reason is because devices are now added to their PM
domains during the probe sequence.

Commit "ARM: exynos: Move to generic PM domain DT bindings"
did the switch for Exynos to the generic OF-based PM domain look-up,
but it also removed the call to pm_genpd_dev_need_restore(). This
caused a regression for some of the Exynos drivers.

To handle things more properly in the generic PM domain, let's change
the default initial value of the need_restore flag to reflect that the
state is unknown. As soon as some of the runtime PM callbacks gets
invoked, update the initial value accordingly.

Moreover, since the generic PM domain is verifying that all devices
are both runtime PM enabled and suspended, using pm_runtime_suspended()
while pm_genpd_poweroff() is invoked from the scheduled work, we can be
sure of that the PM domain won't be powering off while having active
devices.

Do note that, the generic PM domain can still only know about active
devices which has been activated through invoking its runtime PM resume
callback. In other words, buses/drivers using pm_runtime_set_active()
during ->probe() will still suffer from a race condition, potentially
probing a device without having its PM domain being powered. That issue
will have to be solved using a different approach.

This a log from the boot regression for Exynos5, which is being fixed in
this patch.

------------[ cut here ]------------
WARNING: CPU: 0 PID: 308 at ../drivers/clk/clk.c:851 clk_disable+0x24/0x30()
Modules linked in:
CPU: 0 PID: 308 Comm: kworker/0:1 Not tainted 3.18.0-rc3-00569-gbd9449f-dirty #10
Workqueue: pm pm_runtime_work
[<c0013c64>] (unwind_backtrace) from [<c0010dec>] (show_stack+0x10/0x14)
[<c0010dec>] (show_stack) from [<c03ee4cc>] (dump_stack+0x70/0xbc)
[<c03ee4cc>] (dump_stack) from [<c0020d34>] (warn_slowpath_common+0x64/0x88)
[<c0020d34>] (warn_slowpath_common) from [<c0020d74>] (warn_slowpath_null+0x1c/0x24)
[<c0020d74>] (warn_slowpath_null) from [<c03107b0>] (clk_disable+0x24/0x30)
[<c03107b0>] (clk_disable) from [<c02cc834>] (gsc_runtime_suspend+0x128/0x160)
[<c02cc834>] (gsc_runtime_suspend) from [<c0249024>] (pm_generic_runtime_suspend+0x2c/0x38)
[<c0249024>] (pm_generic_runtime_suspend) from [<c024f44c>] (pm_genpd_default_save_state+0x2c/0x8c)
[<c024f44c>] (pm_genpd_default_save_state) from [<c024ff2c>] (pm_genpd_poweroff+0x224/0x3ec)
[<c024ff2c>] (pm_genpd_poweroff) from [<c02501b4>] (pm_genpd_runtime_suspend+0x9c/0xcc)
[<c02501b4>] (pm_genpd_runtime_suspend) from [<c024a4f8>] (__rpm_callback+0x2c/0x60)
[<c024a4f8>] (__rpm_callback) from [<c024a54c>] (rpm_callback+0x20/0x74)
[<c024a54c>] (rpm_callback) from [<c024a930>] (rpm_suspend+0xd4/0x43c)
[<c024a930>] (rpm_suspend) from [<c024bbcc>] (pm_runtime_work+0x80/0x90)
[<c024bbcc>] (pm_runtime_work) from [<c0032a9c>] (process_one_work+0x12c/0x314)
[<c0032a9c>] (process_one_work) from [<c0032cf4>] (worker_thread+0x3c/0x4b0)
[<c0032cf4>] (worker_thread) from [<c003747c>] (kthread+0xcc/0xe8)
[<c003747c>] (kthread) from [<c000e738>] (ret_from_fork+0x14/0x3c)
---[ end trace 40cd58bcd6988f12 ]---

Fixes: a4a8c2c4962bb655 (ARM: exynos: Move to generic PM domain DT bindings)
Reported-and-tested0by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Reviewed-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Reviewed-by: Kevin Hilman <khilman@linaro.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 38 ++++++++++++++++++++++++++++++++------
 include/linux/pm_domain.h   |  2 +-
 2 files changed, 33 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index b520687046d4..fb83d4acd400 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -361,9 +361,19 @@ static int __pm_genpd_save_device(struct pm_domain_data *pdd,
 	struct device *dev = pdd->dev;
 	int ret = 0;
 
-	if (gpd_data->need_restore)
+	if (gpd_data->need_restore > 0)
 		return 0;
 
+	/*
+	 * If the value of the need_restore flag is still unknown at this point,
+	 * we trust that pm_genpd_poweroff() has verified that the device is
+	 * already runtime PM suspended.
+	 */
+	if (gpd_data->need_restore < 0) {
+		gpd_data->need_restore = 1;
+		return 0;
+	}
+
 	mutex_unlock(&genpd->lock);
 
 	genpd_start_dev(genpd, dev);
@@ -373,7 +383,7 @@ static int __pm_genpd_save_device(struct pm_domain_data *pdd,
 	mutex_lock(&genpd->lock);
 
 	if (!ret)
-		gpd_data->need_restore = true;
+		gpd_data->need_restore = 1;
 
 	return ret;
 }
@@ -389,12 +399,17 @@ static void __pm_genpd_restore_device(struct pm_domain_data *pdd,
 {
 	struct generic_pm_domain_data *gpd_data = to_gpd_data(pdd);
 	struct device *dev = pdd->dev;
-	bool need_restore = gpd_data->need_restore;
+	int need_restore = gpd_data->need_restore;
 
-	gpd_data->need_restore = false;
+	gpd_data->need_restore = 0;
 	mutex_unlock(&genpd->lock);
 
 	genpd_start_dev(genpd, dev);
+
+	/*
+	 * Call genpd_restore_dev() for recently added devices too (need_restore
+	 * is negative then).
+	 */
 	if (need_restore)
 		genpd_restore_dev(genpd, dev);
 
@@ -603,6 +618,7 @@ static void genpd_power_off_work_fn(struct work_struct *work)
 static int pm_genpd_runtime_suspend(struct device *dev)
 {
 	struct generic_pm_domain *genpd;
+	struct generic_pm_domain_data *gpd_data;
 	bool (*stop_ok)(struct device *__dev);
 	int ret;
 
@@ -628,6 +644,16 @@ static int pm_genpd_runtime_suspend(struct device *dev)
 		return 0;
 
 	mutex_lock(&genpd->lock);
+
+	/*
+	 * If we have an unknown state of the need_restore flag, it means none
+	 * of the runtime PM callbacks has been invoked yet. Let's update the
+	 * flag to reflect that the current state is active.
+	 */
+	gpd_data = to_gpd_data(dev->power.subsys_data->domain_data);
+	if (gpd_data->need_restore < 0)
+		gpd_data->need_restore = 0;
+
 	genpd->in_progress++;
 	pm_genpd_poweroff(genpd);
 	genpd->in_progress--;
@@ -1442,7 +1468,7 @@ int __pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev,
 	mutex_lock(&gpd_data->lock);
 	gpd_data->base.dev = dev;
 	list_add_tail(&gpd_data->base.list_node, &genpd->dev_list);
-	gpd_data->need_restore = genpd->status == GPD_STATE_POWER_OFF;
+	gpd_data->need_restore = -1;
 	gpd_data->td.constraint_changed = true;
 	gpd_data->td.effective_constraint_ns = -1;
 	mutex_unlock(&gpd_data->lock);
@@ -1546,7 +1572,7 @@ void pm_genpd_dev_need_restore(struct device *dev, bool val)
 
 	psd = dev_to_psd(dev);
 	if (psd && psd->domain_data)
-		to_gpd_data(psd->domain_data)->need_restore = val;
+		to_gpd_data(psd->domain_data)->need_restore = val ? 1 : 0;
 
 	spin_unlock_irqrestore(&dev->power.lock, flags);
 }
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index b3ed7766a291..2e0e06daf8c0 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -106,7 +106,7 @@ struct generic_pm_domain_data {
 	struct notifier_block nb;
 	struct mutex lock;
 	unsigned int refcount;
-	bool need_restore;
+	int need_restore;
 };
 
 #ifdef CONFIG_PM_GENERIC_DOMAINS
-- 
cgit v1.2.3


From 8c393f9a721c30a030049a680e1bf896669bb279 Mon Sep 17 00:00:00 2001
From: Peng Tao <tao.peng@primarydata.com>
Date: Wed, 5 Nov 2014 22:36:50 +0800
Subject: nfs: fix pnfs direct write memory leak

For pNFS direct writes, layout driver may dynamically allocate ds_cinfo.buckets.
So we need to take care to free them when freeing dreq.

Ideally this needs to be done inside layout driver where ds_cinfo.buckets
are allocated. But buckets are attached to dreq and reused across LD IO iterations.
So I feel it's OK to free them in the generic layer.

Cc: stable@vger.kernel.org [v3.4+]
Signed-off-by: Peng Tao <tao.peng@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/direct.c         |  1 +
 include/linux/nfs_xdr.h | 11 +++++++++++
 2 files changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 20cffc830468..10bf07280f4a 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -266,6 +266,7 @@ static void nfs_direct_req_free(struct kref *kref)
 {
 	struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref);
 
+	nfs_free_pnfs_ds_cinfo(&dreq->ds_cinfo);
 	if (dreq->l_ctx != NULL)
 		nfs_put_lock_context(dreq->l_ctx);
 	if (dreq->ctx != NULL)
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 983876f24aed..47ebb4fafd87 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1224,11 +1224,22 @@ struct nfs41_free_stateid_res {
 	unsigned int			status;
 };
 
+static inline void
+nfs_free_pnfs_ds_cinfo(struct pnfs_ds_commit_info *cinfo)
+{
+	kfree(cinfo->buckets);
+}
+
 #else
 
 struct pnfs_ds_commit_info {
 };
 
+static inline void
+nfs_free_pnfs_ds_cinfo(struct pnfs_ds_commit_info *cinfo)
+{
+}
+
 #endif /* CONFIG_NFS_V4_1 */
 
 #ifdef CONFIG_NFS_V4_2
-- 
cgit v1.2.3


From ad53f92eb416d81e469fa8ea57153e59455e7175 Mon Sep 17 00:00:00 2001
From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Date: Thu, 13 Nov 2014 15:19:11 -0800
Subject: mm/page_alloc: fix incorrect isolation behavior by rechecking
 migratetype

Before describing bugs itself, I first explain definition of freepage.

 1. pages on buddy list are counted as freepage.
 2. pages on isolate migratetype buddy list are *not* counted as freepage.
 3. pages on cma buddy list are counted as CMA freepage, too.

Now, I describe problems and related patch.

Patch 1: There is race conditions on getting pageblock migratetype that
it results in misplacement of freepages on buddy list, incorrect
freepage count and un-availability of freepage.

Patch 2: Freepages on pcp list could have stale cached information to
determine migratetype of buddy list to go.  This causes misplacement of
freepages on buddy list and incorrect freepage count.

Patch 4: Merging between freepages on different migratetype of
pageblocks will cause freepages accouting problem.  This patch fixes it.

Without patchset [3], above problem doesn't happens on my CMA allocation
test, because CMA reserved pages aren't used at all.  So there is no
chance for above race.

With patchset [3], I did simple CMA allocation test and get below
result:

 - Virtual machine, 4 cpus, 1024 MB memory, 256 MB CMA reservation
 - run kernel build (make -j16) on background
 - 30 times CMA allocation(8MB * 30 = 240MB) attempts in 5 sec interval
 - Result: more than 5000 freepage count are missed

With patchset [3] and this patchset, I found that no freepage count are
missed so that I conclude that problems are solved.

On my simple memory offlining test, these problems also occur on that
environment, too.

This patch (of 4):

There are two paths to reach core free function of buddy allocator,
__free_one_page(), one is free_one_page()->__free_one_page() and the
other is free_hot_cold_page()->free_pcppages_bulk()->__free_one_page().
Each paths has race condition causing serious problems.  At first, this
patch is focused on first type of freepath.  And then, following patch
will solve the problem in second type of freepath.

In the first type of freepath, we got migratetype of freeing page
without holding the zone lock, so it could be racy.  There are two cases
of this race.

 1. pages are added to isolate buddy list after restoring orignal
    migratetype

    CPU1                                   CPU2

    get migratetype => return MIGRATE_ISOLATE
    call free_one_page() with MIGRATE_ISOLATE

                                grab the zone lock
                                unisolate pageblock
                                release the zone lock

    grab the zone lock
    call __free_one_page() with MIGRATE_ISOLATE
    freepage go into isolate buddy list,
    although pageblock is already unisolated

This may cause two problems.  One is that we can't use this page anymore
until next isolation attempt of this pageblock, because freepage is on
isolate buddy list.  The other is that freepage accouting could be wrong
due to merging between different buddy list.  Freepages on isolate buddy
list aren't counted as freepage, but ones on normal buddy list are
counted as freepage.  If merge happens, buddy freepage on normal buddy
list is inevitably moved to isolate buddy list without any consideration
of freepage accouting so it could be incorrect.

 2. pages are added to normal buddy list while pageblock is isolated.
    It is similar with above case.

This also may cause two problems.  One is that we can't keep these
freepages from being allocated.  Although this pageblock is isolated,
freepage would be added to normal buddy list so that it could be
allocated without any restriction.  And the other problem is same as
case 1, that it, incorrect freepage accouting.

This race condition would be prevented by checking migratetype again
with holding the zone lock.  Because it is somewhat heavy operation and
it isn't needed in common case, we want to avoid rechecking as much as
possible.  So this patch introduce new variable, nr_isolate_pageblock in
struct zone to check if there is isolated pageblock.  With this, we can
avoid to re-check migratetype in common case and do it only if there is
isolated pageblock or migratetype is MIGRATE_ISOLATE.  This solve above
mentioned problems.

Changes from v3:
Add one more check in free_one_page() that checks whether migratetype is
MIGRATE_ISOLATE or not. Without this, abovementioned case 1 could happens.

Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Acked-by: Minchan Kim <minchan@kernel.org>
Acked-by: Michal Nazarewicz <mina86@mina86.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
Cc: Tang Chen <tangchen@cn.fujitsu.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Cc: Wen Congyang <wency@cn.fujitsu.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Laura Abbott <lauraa@codeaurora.org>
Cc: Heesub Shin <heesub.shin@samsung.com>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Cc: Ritesh Harjani <ritesh.list@gmail.com>
Cc: Gioh Kim <gioh.kim@lge.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h         |  9 +++++++++
 include/linux/page-isolation.h |  8 ++++++++
 mm/page_alloc.c                | 11 +++++++++--
 mm/page_isolation.c            |  2 ++
 4 files changed, 28 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 48bf12ef6620..ffe66e381c04 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -431,6 +431,15 @@ struct zone {
 	 */
 	int			nr_migrate_reserve_block;
 
+#ifdef CONFIG_MEMORY_ISOLATION
+	/*
+	 * Number of isolated pageblock. It is used to solve incorrect
+	 * freepage counting problem due to racy retrieving migratetype
+	 * of pageblock. Protected by zone->lock.
+	 */
+	unsigned long		nr_isolate_pageblock;
+#endif
+
 #ifdef CONFIG_MEMORY_HOTPLUG
 	/* see spanned/present_pages for more description */
 	seqlock_t		span_seqlock;
diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h
index 3fff8e774067..2dc1e1697b45 100644
--- a/include/linux/page-isolation.h
+++ b/include/linux/page-isolation.h
@@ -2,6 +2,10 @@
 #define __LINUX_PAGEISOLATION_H
 
 #ifdef CONFIG_MEMORY_ISOLATION
+static inline bool has_isolate_pageblock(struct zone *zone)
+{
+	return zone->nr_isolate_pageblock;
+}
 static inline bool is_migrate_isolate_page(struct page *page)
 {
 	return get_pageblock_migratetype(page) == MIGRATE_ISOLATE;
@@ -11,6 +15,10 @@ static inline bool is_migrate_isolate(int migratetype)
 	return migratetype == MIGRATE_ISOLATE;
 }
 #else
+static inline bool has_isolate_pageblock(struct zone *zone)
+{
+	return false;
+}
 static inline bool is_migrate_isolate_page(struct page *page)
 {
 	return false;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 9cd36b822444..df1da25b309b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -739,9 +739,16 @@ static void free_one_page(struct zone *zone,
 	if (nr_scanned)
 		__mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned);
 
+	if (unlikely(has_isolate_pageblock(zone) ||
+		is_migrate_isolate(migratetype))) {
+		migratetype = get_pfnblock_migratetype(page, pfn);
+		if (is_migrate_isolate(migratetype))
+			goto skip_counting;
+	}
+	__mod_zone_freepage_state(zone, 1 << order, migratetype);
+
+skip_counting:
 	__free_one_page(page, pfn, zone, order, migratetype);
-	if (unlikely(!is_migrate_isolate(migratetype)))
-		__mod_zone_freepage_state(zone, 1 << order, migratetype);
 	spin_unlock(&zone->lock);
 }
 
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index d1473b2e9481..1fa4a4d72ecc 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -60,6 +60,7 @@ out:
 		int migratetype = get_pageblock_migratetype(page);
 
 		set_pageblock_migratetype(page, MIGRATE_ISOLATE);
+		zone->nr_isolate_pageblock++;
 		nr_pages = move_freepages_block(zone, page, MIGRATE_ISOLATE);
 
 		__mod_zone_freepage_state(zone, -nr_pages, migratetype);
@@ -83,6 +84,7 @@ void unset_migratetype_isolate(struct page *page, unsigned migratetype)
 	nr_pages = move_freepages_block(zone, page, migratetype);
 	__mod_zone_freepage_state(zone, nr_pages, migratetype);
 	set_pageblock_migratetype(page, migratetype);
+	zone->nr_isolate_pageblock--;
 out:
 	spin_unlock_irqrestore(&zone->lock, flags);
 }
-- 
cgit v1.2.3


From f784a3f19613901ca4539a5b0eed3bdc700e6ee7 Mon Sep 17 00:00:00 2001
From: Tang Chen <tangchen@cn.fujitsu.com>
Date: Thu, 13 Nov 2014 15:19:39 -0800
Subject: mem-hotplug: reset node managed pages when hot-adding a new pgdat

In free_area_init_core(), zone->managed_pages is set to an approximate
value for lowmem, and will be adjusted when the bootmem allocator frees
pages into the buddy system.

But free_area_init_core() is also called by hotadd_new_pgdat() when
hot-adding memory.  As a result, zone->managed_pages of the newly added
node's pgdat is set to an approximate value in the very beginning.

Even if the memory on that node has node been onlined,
/sys/device/system/node/nodeXXX/meminfo has wrong value:

  hot-add node2 (memory not onlined)
  cat /sys/device/system/node/node2/meminfo
  Node 2 MemTotal:       33554432 kB
  Node 2 MemFree:               0 kB
  Node 2 MemUsed:        33554432 kB
  Node 2 Active:                0 kB

This patch fixes this problem by reset node managed pages to 0 after
hot-adding a new node.

1. Move reset_managed_pages_done from reset_node_managed_pages() to
   reset_all_zones_managed_pages()
2. Make reset_node_managed_pages() non-static
3. Call reset_node_managed_pages() in hotadd_new_pgdat() after pgdat
   is initialized

Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com>
Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: <stable@vger.kernel.org>	[3.16+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bootmem.h | 1 +
 mm/bootmem.c            | 9 +++++----
 mm/memory_hotplug.c     | 9 +++++++++
 mm/nobootmem.c          | 8 +++++---
 4 files changed, 20 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 4e2bd4c95b66..0995c2de8162 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -46,6 +46,7 @@ extern unsigned long init_bootmem_node(pg_data_t *pgdat,
 extern unsigned long init_bootmem(unsigned long addr, unsigned long memend);
 
 extern unsigned long free_all_bootmem(void);
+extern void reset_node_managed_pages(pg_data_t *pgdat);
 extern void reset_all_zones_managed_pages(void);
 
 extern void free_bootmem_node(pg_data_t *pgdat,
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 8a000cebb0d7..477be696511d 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -243,13 +243,10 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
 
 static int reset_managed_pages_done __initdata;
 
-static inline void __init reset_node_managed_pages(pg_data_t *pgdat)
+void reset_node_managed_pages(pg_data_t *pgdat)
 {
 	struct zone *z;
 
-	if (reset_managed_pages_done)
-		return;
-
 	for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++)
 		z->managed_pages = 0;
 }
@@ -258,8 +255,12 @@ void __init reset_all_zones_managed_pages(void)
 {
 	struct pglist_data *pgdat;
 
+	if (reset_managed_pages_done)
+		return;
+
 	for_each_online_pgdat(pgdat)
 		reset_node_managed_pages(pgdat);
+
 	reset_managed_pages_done = 1;
 }
 
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 252e1dbbed86..c5f7fe199a60 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -31,6 +31,7 @@
 #include <linux/stop_machine.h>
 #include <linux/hugetlb.h>
 #include <linux/memblock.h>
+#include <linux/bootmem.h>
 
 #include <asm/tlbflush.h>
 
@@ -1096,6 +1097,14 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
 	build_all_zonelists(pgdat, NULL);
 	mutex_unlock(&zonelists_mutex);
 
+	/*
+	 * zone->managed_pages is set to an approximate value in
+	 * free_area_init_core(), which will cause
+	 * /sys/device/system/node/nodeX/meminfo has wrong data.
+	 * So reset it to 0 before any memory is onlined.
+	 */
+	reset_node_managed_pages(pgdat);
+
 	return pgdat;
 }
 
diff --git a/mm/nobootmem.c b/mm/nobootmem.c
index 7c7ab32ee503..90b50468333e 100644
--- a/mm/nobootmem.c
+++ b/mm/nobootmem.c
@@ -145,12 +145,10 @@ static unsigned long __init free_low_memory_core_early(void)
 
 static int reset_managed_pages_done __initdata;
 
-static inline void __init reset_node_managed_pages(pg_data_t *pgdat)
+void reset_node_managed_pages(pg_data_t *pgdat)
 {
 	struct zone *z;
 
-	if (reset_managed_pages_done)
-		return;
 	for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++)
 		z->managed_pages = 0;
 }
@@ -159,8 +157,12 @@ void __init reset_all_zones_managed_pages(void)
 {
 	struct pglist_data *pgdat;
 
+	if (reset_managed_pages_done)
+		return;
+
 	for_each_online_pgdat(pgdat)
 		reset_node_managed_pages(pgdat);
+
 	reset_managed_pages_done = 1;
 }
 
-- 
cgit v1.2.3