From 5165238460068e53c740eaa621ebb6623dc4a50d Mon Sep 17 00:00:00 2001 From: "Zhu, Lejun" Date: Tue, 3 Jun 2014 13:26:02 +0800 Subject: mfd: intel_soc_pmic: Core driver This patch provides the common I2C driver code for Intel SoC PMICs. Signed-off-by: Yang, Bin Signed-off-by: Zhu, Lejun Signed-off-by: Lee Jones --- include/linux/mfd/intel_soc_pmic.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 include/linux/mfd/intel_soc_pmic.h (limited to 'include/linux') diff --git a/include/linux/mfd/intel_soc_pmic.h b/include/linux/mfd/intel_soc_pmic.h new file mode 100644 index 000000000000..abcbfcf32d10 --- /dev/null +++ b/include/linux/mfd/intel_soc_pmic.h @@ -0,0 +1,30 @@ +/* + * intel_soc_pmic.h - Intel SoC PMIC Driver + * + * Copyright (C) 2012-2014 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Author: Yang, Bin + * Author: Zhu, Lejun + */ + +#ifndef __INTEL_SOC_PMIC_H__ +#define __INTEL_SOC_PMIC_H__ + +#include + +struct intel_soc_pmic { + int irq; + struct regmap *regmap; + struct regmap_irq_chip_data *irq_chip_data; +}; + +#endif /* __INTEL_SOC_PMIC_H__ */ -- cgit v1.2.3 From f15a5cf912f05b572d1f9f3772fba019643f4837 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Tue, 10 Jun 2014 18:29:39 +0800 Subject: SUNRPC/NFSD: Change to type of bool for rq_usedeferral and rq_splice_ok rq_usedeferral and rq_splice_ok are used as 0 and 1, just defined to bool. Signed-off-by: Kinglong Mee Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 4 ++-- include/linux/sunrpc/svc.h | 4 ++-- net/sunrpc/auth_gss/svcauth_gss.c | 2 +- net/sunrpc/svc.c | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index baa3803f0811..be6734060d2a 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1298,7 +1298,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, * Don't use the deferral mechanism for NFSv4; compounds make it * too hard to avoid non-idempotency problems. */ - rqstp->rq_usedeferral = 0; + rqstp->rq_usedeferral = false; /* * According to RFC3010, this takes precedence over all other errors. @@ -1417,7 +1417,7 @@ encode_op: BUG_ON(cstate->replay_owner); out: /* Reset deferral mechanism for RPC deferrals */ - rqstp->rq_usedeferral = 1; + rqstp->rq_usedeferral = true; dprintk("nfsv4 compound returned %d\n", ntohl(status)); return status; } diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 1bc7cd05b22e..cf61ecd148e0 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -236,7 +236,7 @@ struct svc_rqst { struct svc_cred rq_cred; /* auth info */ void * rq_xprt_ctxt; /* transport specific context ptr */ struct svc_deferred_req*rq_deferred; /* deferred request we are replaying */ - int rq_usedeferral; /* use deferral */ + bool rq_usedeferral; /* use deferral */ size_t rq_xprt_hlen; /* xprt header len */ struct xdr_buf rq_arg; @@ -277,7 +277,7 @@ struct svc_rqst { struct auth_domain * rq_gssclient; /* "gss/"-style peer info */ int rq_cachetype; struct svc_cacherep * rq_cacherep; /* cache info */ - int rq_splice_ok; /* turned off in gss privacy + bool rq_splice_ok; /* turned off in gss privacy * to prevent encrypting page * cache pages */ wait_queue_head_t rq_wait; /* synchronization */ diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 4ce5eccec1f6..c548ab213f76 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -886,7 +886,7 @@ unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gs u32 priv_len, maj_stat; int pad, saved_len, remaining_len, offset; - rqstp->rq_splice_ok = 0; + rqstp->rq_splice_ok = false; priv_len = svc_getnl(&buf->head[0]); if (rqstp->rq_deferred) { diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 5de6801cd924..1db5007ddbce 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1086,9 +1086,9 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) goto err_short_len; /* Will be turned off only in gss privacy case: */ - rqstp->rq_splice_ok = 1; + rqstp->rq_splice_ok = true; /* Will be turned off only when NFSv4 Sessions are used */ - rqstp->rq_usedeferral = 1; + rqstp->rq_usedeferral = true; rqstp->rq_dropme = false; /* Setup reply header */ -- cgit v1.2.3 From 1e7f3a485922211b6e4a082ebc6bf05810b0b6ea Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Mon, 9 Jun 2014 11:48:33 -0400 Subject: nfs: move nfs_pgio_data and remove nfs_rw_header nfs_rw_header was used to allocate an nfs_pgio_header along with an nfs_pgio_data, because a _header would need at least one _data. Now there is only ever one nfs_pgio_data for each nfs_pgio_header -- move it to nfs_pgio_header and get rid of nfs_rw_header. Reviewed-by: Christoph Hellwig Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 8 ++--- fs/nfs/internal.h | 6 ++-- fs/nfs/pagelist.c | 94 +++++++++++++++--------------------------------- fs/nfs/pnfs.c | 24 ++++++------- fs/nfs/read.c | 6 ++-- fs/nfs/write.c | 10 +++--- include/linux/nfs_page.h | 4 +-- include/linux/nfs_xdr.h | 38 +++++++++----------- 8 files changed, 71 insertions(+), 119 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 8f98138cbc43..179de67ca907 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -148,8 +148,8 @@ static void nfs_direct_set_hdr_verf(struct nfs_direct_req *dreq, { struct nfs_writeverf *verfp; - verfp = nfs_direct_select_verf(dreq, hdr->data->ds_clp, - hdr->data->ds_idx); + verfp = nfs_direct_select_verf(dreq, hdr->data.ds_clp, + hdr->data.ds_idx); WARN_ON_ONCE(verfp->committed >= 0); memcpy(verfp, &hdr->verf, sizeof(struct nfs_writeverf)); WARN_ON_ONCE(verfp->committed < 0); @@ -169,8 +169,8 @@ static int nfs_direct_set_or_cmp_hdr_verf(struct nfs_direct_req *dreq, { struct nfs_writeverf *verfp; - verfp = nfs_direct_select_verf(dreq, hdr->data->ds_clp, - hdr->data->ds_idx); + verfp = nfs_direct_select_verf(dreq, hdr->data.ds_clp, + hdr->data.ds_idx); if (verfp->committed < 0) { nfs_direct_set_hdr_verf(dreq, hdr); return 0; diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 82ddbf46660e..5cda049c8f9b 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -238,9 +238,9 @@ void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos); int nfs_iocounter_wait(struct nfs_io_counter *c); extern const struct nfs_pageio_ops nfs_pgio_rw_ops; -struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *); -void nfs_rw_header_free(struct nfs_pgio_header *); -void nfs_pgio_data_release(struct nfs_pgio_data *); +struct nfs_pgio_header *nfs_pgio_header_alloc(const struct nfs_rw_ops *); +void nfs_pgio_header_free(struct nfs_pgio_header *); +void nfs_pgio_data_destroy(struct nfs_pgio_data *); int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *); int nfs_initiate_pgio(struct rpc_clnt *, struct nfs_pgio_data *, const struct rpc_call_ops *, int, int); diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index b6ee3a6ee96d..e4cde476562f 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -452,95 +452,61 @@ size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, } EXPORT_SYMBOL_GPL(nfs_generic_pg_test); -static inline struct nfs_rw_header *NFS_RW_HEADER(struct nfs_pgio_header *hdr) +struct nfs_pgio_header *nfs_pgio_header_alloc(const struct nfs_rw_ops *ops) { - return container_of(hdr, struct nfs_rw_header, header); -} - -/** - * nfs_rw_header_alloc - Allocate a header for a read or write - * @ops: Read or write function vector - */ -struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *ops) -{ - struct nfs_rw_header *header = ops->rw_alloc_header(); - - if (header) { - struct nfs_pgio_header *hdr = &header->header; + struct nfs_pgio_header *hdr = ops->rw_alloc_header(); + if (hdr) { INIT_LIST_HEAD(&hdr->pages); spin_lock_init(&hdr->lock); atomic_set(&hdr->refcnt, 0); hdr->rw_ops = ops; } - return header; + return hdr; } -EXPORT_SYMBOL_GPL(nfs_rw_header_alloc); +EXPORT_SYMBOL_GPL(nfs_pgio_header_alloc); /* - * nfs_rw_header_free - Free a read or write header + * nfs_pgio_header_free - Free a read or write header * @hdr: The header to free */ -void nfs_rw_header_free(struct nfs_pgio_header *hdr) +void nfs_pgio_header_free(struct nfs_pgio_header *hdr) { - hdr->rw_ops->rw_free_header(NFS_RW_HEADER(hdr)); + hdr->rw_ops->rw_free_header(hdr); } -EXPORT_SYMBOL_GPL(nfs_rw_header_free); +EXPORT_SYMBOL_GPL(nfs_pgio_header_free); /** * nfs_pgio_data_alloc - Allocate pageio data * @hdr: The header making a request * @pagecount: Number of pages to create */ -static struct nfs_pgio_data *nfs_pgio_data_alloc(struct nfs_pgio_header *hdr, - unsigned int pagecount) +static bool nfs_pgio_data_init(struct nfs_pgio_header *hdr, + unsigned int pagecount) { - struct nfs_pgio_data *data, *prealloc; - - prealloc = &NFS_RW_HEADER(hdr)->rpc_data; - if (prealloc->header == NULL) - data = prealloc; - else - data = kzalloc(sizeof(*data), GFP_KERNEL); - if (!data) - goto out; - - if (nfs_pgarray_set(&data->pages, pagecount)) { - data->header = hdr; + if (nfs_pgarray_set(&hdr->data.pages, pagecount)) { + hdr->data.header = hdr; atomic_inc(&hdr->refcnt); - } else { - if (data != prealloc) - kfree(data); - data = NULL; + return true; } -out: - return data; + return false; } /** - * nfs_pgio_data_release - Properly free pageio data - * @data: The data to release + * nfs_pgio_data_destroy - Properly free pageio data + * @data: The data to destroy */ -void nfs_pgio_data_release(struct nfs_pgio_data *data) +void nfs_pgio_data_destroy(struct nfs_pgio_data *data) { struct nfs_pgio_header *hdr = data->header; - struct nfs_rw_header *pageio_header = NFS_RW_HEADER(hdr); put_nfs_open_context(data->args.context); if (data->pages.pagevec != data->pages.page_array) kfree(data->pages.pagevec); - if (data == &pageio_header->rpc_data) { - data->header = NULL; - data = NULL; - } if (atomic_dec_and_test(&hdr->refcnt)) hdr->completion_ops->completion(hdr); - /* Note: we only free the rpc_task after callbacks are done. - * See the comment in rpc_free_task() for why - */ - kfree(data); } -EXPORT_SYMBOL_GPL(nfs_pgio_data_release); +EXPORT_SYMBOL_GPL(nfs_pgio_data_destroy); /** * nfs_pgio_rpcsetup - Set up arguments for a pageio call @@ -655,8 +621,7 @@ static int nfs_pgio_error(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) { set_bit(NFS_IOHDR_REDO, &hdr->flags); - nfs_pgio_data_release(hdr->data); - hdr->data = NULL; + nfs_pgio_data_destroy(&hdr->data); desc->pg_completion_ops->error_cleanup(&desc->pg_list); return -ENOMEM; } @@ -670,7 +635,7 @@ static void nfs_pgio_release(void *calldata) struct nfs_pgio_data *data = calldata; if (data->header->rw_ops->rw_release) data->header->rw_ops->rw_release(data); - nfs_pgio_data_release(data); + nfs_pgio_data_destroy(data); } /** @@ -746,11 +711,11 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, struct list_head *head = &desc->pg_list; struct nfs_commit_info cinfo; - data = nfs_pgio_data_alloc(hdr, nfs_page_array_len(desc->pg_base, - desc->pg_count)); - if (!data) + if (!nfs_pgio_data_init(hdr, nfs_page_array_len(desc->pg_base, + desc->pg_count))) return nfs_pgio_error(desc, hdr); + data = &hdr->data; nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq); pages = data->pages.pagevec; while (!list_empty(head)) { @@ -766,7 +731,6 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, /* Set up the argument struct */ nfs_pgio_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo); - hdr->data = data; desc->pg_rpc_callops = &nfs_pgio_common_ops; return 0; } @@ -774,22 +738,20 @@ EXPORT_SYMBOL_GPL(nfs_generic_pgio); static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc) { - struct nfs_rw_header *rw_hdr; struct nfs_pgio_header *hdr; int ret; - rw_hdr = nfs_rw_header_alloc(desc->pg_rw_ops); - if (!rw_hdr) { + hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); + if (!hdr) { desc->pg_completion_ops->error_cleanup(&desc->pg_list); return -ENOMEM; } - hdr = &rw_hdr->header; - nfs_pgheader_init(desc, hdr, nfs_rw_header_free); + nfs_pgheader_init(desc, hdr, nfs_pgio_header_free); atomic_inc(&hdr->refcnt); ret = nfs_generic_pgio(desc, hdr); if (ret == 0) ret = nfs_initiate_pgio(NFS_CLIENT(hdr->inode), - hdr->data, desc->pg_rpc_callops, + &hdr->data, desc->pg_rpc_callops, desc->pg_ioflags, 0); if (atomic_dec_and_test(&hdr->refcnt)) hdr->completion_ops->completion(hdr); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 6fdcd233d6f7..067104cce181 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1546,7 +1546,7 @@ pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, nfs_pageio_reset_write_mds(desc); desc->pg_recoalesce = 1; } - nfs_pgio_data_release(data); + nfs_pgio_data_destroy(data); } static enum pnfs_try_status @@ -1575,7 +1575,7 @@ static void pnfs_do_write(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr, int how) { - struct nfs_pgio_data *data = hdr->data; + struct nfs_pgio_data *data = &hdr->data; const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; struct pnfs_layout_segment *lseg = desc->pg_lseg; enum pnfs_try_status trypnfs; @@ -1590,25 +1590,23 @@ pnfs_do_write(struct nfs_pageio_descriptor *desc, static void pnfs_writehdr_free(struct nfs_pgio_header *hdr) { pnfs_put_lseg(hdr->lseg); - nfs_rw_header_free(hdr); + nfs_pgio_header_free(hdr); } EXPORT_SYMBOL_GPL(pnfs_writehdr_free); int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) { - struct nfs_rw_header *whdr; struct nfs_pgio_header *hdr; int ret; - whdr = nfs_rw_header_alloc(desc->pg_rw_ops); - if (!whdr) { + hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); + if (!hdr) { desc->pg_completion_ops->error_cleanup(&desc->pg_list); pnfs_put_lseg(desc->pg_lseg); desc->pg_lseg = NULL; return -ENOMEM; } - hdr = &whdr->header; nfs_pgheader_init(desc, hdr, pnfs_writehdr_free); hdr->lseg = pnfs_get_lseg(desc->pg_lseg); atomic_inc(&hdr->refcnt); @@ -1696,7 +1694,7 @@ pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, nfs_pageio_reset_read_mds(desc); desc->pg_recoalesce = 1; } - nfs_pgio_data_release(data); + nfs_pgio_data_destroy(data); } /* @@ -1727,7 +1725,7 @@ pnfs_try_to_read_data(struct nfs_pgio_data *rdata, static void pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) { - struct nfs_pgio_data *data = hdr->data; + struct nfs_pgio_data *data = &hdr->data; const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; struct pnfs_layout_segment *lseg = desc->pg_lseg; enum pnfs_try_status trypnfs; @@ -1742,26 +1740,24 @@ pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) static void pnfs_readhdr_free(struct nfs_pgio_header *hdr) { pnfs_put_lseg(hdr->lseg); - nfs_rw_header_free(hdr); + nfs_pgio_header_free(hdr); } EXPORT_SYMBOL_GPL(pnfs_readhdr_free); int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) { - struct nfs_rw_header *rhdr; struct nfs_pgio_header *hdr; int ret; - rhdr = nfs_rw_header_alloc(desc->pg_rw_ops); - if (!rhdr) { + hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); + if (!hdr) { desc->pg_completion_ops->error_cleanup(&desc->pg_list); ret = -ENOMEM; pnfs_put_lseg(desc->pg_lseg); desc->pg_lseg = NULL; return ret; } - hdr = &rhdr->header; nfs_pgheader_init(desc, hdr, pnfs_readhdr_free); hdr->lseg = pnfs_get_lseg(desc->pg_lseg); atomic_inc(&hdr->refcnt); diff --git a/fs/nfs/read.c b/fs/nfs/read.c index e818a475ca64..d9df4ab3737b 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -33,12 +33,12 @@ static const struct nfs_rw_ops nfs_rw_read_ops; static struct kmem_cache *nfs_rdata_cachep; -static struct nfs_rw_header *nfs_readhdr_alloc(void) +static struct nfs_pgio_header *nfs_readhdr_alloc(void) { return kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL); } -static void nfs_readhdr_free(struct nfs_rw_header *rhdr) +static void nfs_readhdr_free(struct nfs_pgio_header *rhdr) { kmem_cache_free(nfs_rdata_cachep, rhdr); } @@ -404,7 +404,7 @@ out: int __init nfs_init_readpagecache(void) { nfs_rdata_cachep = kmem_cache_create("nfs_read_data", - sizeof(struct nfs_rw_header), + sizeof(struct nfs_pgio_header), 0, SLAB_HWCACHE_ALIGN, NULL); if (nfs_rdata_cachep == NULL) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 98ff061ccaf3..d694952f0071 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -70,18 +70,18 @@ void nfs_commit_free(struct nfs_commit_data *p) } EXPORT_SYMBOL_GPL(nfs_commit_free); -static struct nfs_rw_header *nfs_writehdr_alloc(void) +static struct nfs_pgio_header *nfs_writehdr_alloc(void) { - struct nfs_rw_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO); + struct nfs_pgio_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO); if (p) memset(p, 0, sizeof(*p)); return p; } -static void nfs_writehdr_free(struct nfs_rw_header *whdr) +static void nfs_writehdr_free(struct nfs_pgio_header *hdr) { - mempool_free(whdr, nfs_wdata_mempool); + mempool_free(hdr, nfs_wdata_mempool); } static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) @@ -1655,7 +1655,7 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage, int __init nfs_init_writepagecache(void) { nfs_wdata_cachep = kmem_cache_create("nfs_write_data", - sizeof(struct nfs_rw_header), + sizeof(struct nfs_pgio_header), 0, SLAB_HWCACHE_ALIGN, NULL); if (nfs_wdata_cachep == NULL) diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 7d9096d95d4a..43592651cd5a 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -62,8 +62,8 @@ struct nfs_pageio_ops { struct nfs_rw_ops { const fmode_t rw_mode; - struct nfs_rw_header *(*rw_alloc_header)(void); - void (*rw_free_header)(struct nfs_rw_header *); + struct nfs_pgio_header *(*rw_alloc_header)(void); + void (*rw_free_header)(struct nfs_pgio_header *); void (*rw_release)(struct nfs_pgio_data *); int (*rw_done)(struct rpc_task *, struct nfs_pgio_data *, struct inode *); void (*rw_result)(struct rpc_task *, struct nfs_pgio_data *); diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 9a1396e70310..e1c9437e8aac 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1257,13 +1257,27 @@ enum { NFS_IOHDR_NEED_RESCHED, }; -struct nfs_pgio_data; +struct nfs_pgio_data { + struct nfs_pgio_header *header; + struct list_head list; + struct rpc_task task; + struct nfs_fattr fattr; + struct nfs_writeverf verf; /* Used for writes */ + struct nfs_pgio_args args; /* argument struct */ + struct nfs_pgio_res res; /* result struct */ + unsigned long timestamp; /* For lease renewal */ + int (*pgio_done_cb)(struct rpc_task *task, struct nfs_pgio_data *data); + __u64 mds_offset; /* Filelayout dense stripe */ + struct nfs_page_array pages; + struct nfs_client *ds_clp; /* pNFS data server */ + int ds_idx; /* ds index if ds_clp is set */ +}; struct nfs_pgio_header { struct inode *inode; struct rpc_cred *cred; struct list_head pages; - struct nfs_pgio_data *data; + struct nfs_pgio_data data; atomic_t refcnt; struct nfs_page *req; struct nfs_writeverf verf; /* Used for writes */ @@ -1283,26 +1297,6 @@ struct nfs_pgio_header { unsigned long flags; }; -struct nfs_pgio_data { - struct nfs_pgio_header *header; - struct rpc_task task; - struct nfs_fattr fattr; - struct nfs_writeverf verf; /* Used for writes */ - struct nfs_pgio_args args; /* argument struct */ - struct nfs_pgio_res res; /* result struct */ - unsigned long timestamp; /* For lease renewal */ - int (*pgio_done_cb) (struct rpc_task *task, struct nfs_pgio_data *data); - __u64 mds_offset; /* Filelayout dense stripe */ - struct nfs_page_array pages; - struct nfs_client *ds_clp; /* pNFS data server */ - int ds_idx; /* ds index if ds_clp is set */ -}; - -struct nfs_rw_header { - struct nfs_pgio_header header; - struct nfs_pgio_data rpc_data; -}; - struct nfs_mds_commit_info { atomic_t rpcs_out; unsigned long ncommit; -- cgit v1.2.3 From 823b0c9d9800e712374cda89ac3565bd29f6701b Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Mon, 9 Jun 2014 11:48:34 -0400 Subject: nfs: rename members of nfs_pgio_data Rename "verf" to "writeverf" and "pages" to "page_array" to prepare for merge of nfs_pgio_data and nfs_pgio_header. Reviewed-by: Christoph Hellwig Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/blocklayout.c | 17 ++++++++++------- fs/nfs/objlayout/objlayout.c | 4 ++-- fs/nfs/pagelist.c | 12 ++++++------ fs/nfs/write.c | 9 +++++---- include/linux/nfs_xdr.h | 4 ++-- 5 files changed, 25 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 9b431f44fad9..36b01cef849e 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -258,7 +258,8 @@ bl_read_pagelist(struct nfs_pgio_data *rdata) const bool is_dio = (header->dreq != NULL); dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__, - rdata->pages.npages, f_offset, (unsigned int)rdata->args.count); + rdata->page_array.npages, f_offset, + (unsigned int)rdata->args.count); par = alloc_parallel(rdata); if (!par) @@ -268,7 +269,7 @@ bl_read_pagelist(struct nfs_pgio_data *rdata) isect = (sector_t) (f_offset >> SECTOR_SHIFT); /* Code assumes extents are page-aligned */ - for (i = pg_index; i < rdata->pages.npages; i++) { + for (i = pg_index; i < rdata->page_array.npages; i++) { if (!extent_length) { /* We've used up the previous extent */ bl_put_extent(be); @@ -317,7 +318,8 @@ bl_read_pagelist(struct nfs_pgio_data *rdata) struct pnfs_block_extent *be_read; be_read = (hole && cow_read) ? cow_read : be; - bio = do_add_page_to_bio(bio, rdata->pages.npages - i, + bio = do_add_page_to_bio(bio, + rdata->page_array.npages - i, READ, isect, pages[i], be_read, bl_end_io_read, par, @@ -446,7 +448,7 @@ static void bl_end_par_io_write(void *data, int num_se) } wdata->task.tk_status = wdata->header->pnfs_error; - wdata->verf.committed = NFS_FILE_SYNC; + wdata->writeverf.committed = NFS_FILE_SYNC; INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup); schedule_work(&wdata->task.u.tk_work); } @@ -699,7 +701,7 @@ bl_write_pagelist(struct nfs_pgio_data *wdata, int sync) dprintk("pnfsblock nonblock aligned DIO writes. Resend MDS\n"); goto out_mds; } - /* At this point, wdata->pages is a (sequential) list of nfs_pages. + /* At this point, wdata->page_aray is a (sequential) list of nfs_pages. * We want to write each, and if there is an error set pnfs_error * to have it redone using nfs. */ @@ -791,7 +793,7 @@ next_page: /* Middle pages */ pg_index = wdata->args.pgbase >> PAGE_CACHE_SHIFT; - for (i = pg_index; i < wdata->pages.npages; i++) { + for (i = pg_index; i < wdata->page_array.npages; i++) { if (!extent_length) { /* We've used up the previous extent */ bl_put_extent(be); @@ -862,7 +864,8 @@ next_page: } - bio = do_add_page_to_bio(bio, wdata->pages.npages - i, WRITE, + bio = do_add_page_to_bio(bio, wdata->page_array.npages - i, + WRITE, isect, pages[i], be, bl_end_io_write, par, pg_offset, pg_len); diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c index 765d3f54e986..31fed91a8bac 100644 --- a/fs/nfs/objlayout/objlayout.c +++ b/fs/nfs/objlayout/objlayout.c @@ -329,7 +329,7 @@ objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync) oir->status = wdata->task.tk_status = status; if (status >= 0) { wdata->res.count = status; - wdata->verf.committed = oir->committed; + wdata->writeverf.committed = oir->committed; } else { wdata->header->pnfs_error = status; } @@ -337,7 +337,7 @@ objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync) /* must not use oir after this point */ dprintk("%s: Return status %zd committed %d sync=%d\n", __func__, - status, wdata->verf.committed, sync); + status, wdata->writeverf.committed, sync); if (sync) pnfs_ld_write_done(wdata); diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index e4cde476562f..5e70918f6c95 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -484,7 +484,7 @@ EXPORT_SYMBOL_GPL(nfs_pgio_header_free); static bool nfs_pgio_data_init(struct nfs_pgio_header *hdr, unsigned int pagecount) { - if (nfs_pgarray_set(&hdr->data.pages, pagecount)) { + if (nfs_pgarray_set(&hdr->data.page_array, pagecount)) { hdr->data.header = hdr; atomic_inc(&hdr->refcnt); return true; @@ -501,8 +501,8 @@ void nfs_pgio_data_destroy(struct nfs_pgio_data *data) struct nfs_pgio_header *hdr = data->header; put_nfs_open_context(data->args.context); - if (data->pages.pagevec != data->pages.page_array) - kfree(data->pages.pagevec); + if (data->page_array.pagevec != data->page_array.page_array) + kfree(data->page_array.pagevec); if (atomic_dec_and_test(&hdr->refcnt)) hdr->completion_ops->completion(hdr); } @@ -530,7 +530,7 @@ static void nfs_pgio_rpcsetup(struct nfs_pgio_data *data, /* pnfs_set_layoutcommit needs this */ data->mds_offset = data->args.offset; data->args.pgbase = req->wb_pgbase + offset; - data->args.pages = data->pages.pagevec; + data->args.pages = data->page_array.pagevec; data->args.count = count; data->args.context = get_nfs_open_context(req->wb_context); data->args.lock_context = req->wb_lock_context; @@ -548,7 +548,7 @@ static void nfs_pgio_rpcsetup(struct nfs_pgio_data *data, data->res.fattr = &data->fattr; data->res.count = count; data->res.eof = 0; - data->res.verf = &data->verf; + data->res.verf = &data->writeverf; nfs_fattr_init(&data->fattr); } @@ -717,7 +717,7 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, data = &hdr->data; nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq); - pages = data->pages.pagevec; + pages = data->page_array.pagevec; while (!list_empty(head)) { req = nfs_list_entry(head->next); nfs_list_remove_request(req); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index d694952f0071..6afe0f679420 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -598,9 +598,9 @@ nfs_clear_request_commit(struct nfs_page *req) static inline int nfs_write_need_commit(struct nfs_pgio_data *data) { - if (data->verf.committed == NFS_DATA_SYNC) + if (data->writeverf.committed == NFS_DATA_SYNC) return data->header->lseg == NULL; - return data->verf.committed != NFS_FILE_SYNC; + return data->writeverf.committed != NFS_FILE_SYNC; } #else @@ -1095,8 +1095,9 @@ static void nfs_writeback_release_common(struct nfs_pgio_data *data) if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) ; /* Do nothing */ else if (!test_and_set_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) - memcpy(&hdr->verf, &data->verf, sizeof(hdr->verf)); - else if (memcmp(&hdr->verf, &data->verf, sizeof(hdr->verf))) + memcpy(&hdr->verf, &data->writeverf, sizeof(hdr->verf)); + else if (memcmp(&hdr->verf, &data->writeverf, + sizeof(hdr->verf))) set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags); spin_unlock(&hdr->lock); } diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index e1c9437e8aac..bb18dba1aefe 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1262,13 +1262,13 @@ struct nfs_pgio_data { struct list_head list; struct rpc_task task; struct nfs_fattr fattr; - struct nfs_writeverf verf; /* Used for writes */ + struct nfs_writeverf writeverf; /* Used for writes */ struct nfs_pgio_args args; /* argument struct */ struct nfs_pgio_res res; /* result struct */ unsigned long timestamp; /* For lease renewal */ int (*pgio_done_cb)(struct rpc_task *task, struct nfs_pgio_data *data); __u64 mds_offset; /* Filelayout dense stripe */ - struct nfs_page_array pages; + struct nfs_page_array page_array; struct nfs_client *ds_clp; /* pNFS data server */ int ds_idx; /* ds index if ds_clp is set */ }; -- cgit v1.2.3 From d45f60c67848b9f19160692581d78e5b4757a000 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Mon, 9 Jun 2014 11:48:35 -0400 Subject: nfs: merge nfs_pgio_data into _header struct nfs_pgio_data only exists as a member of nfs_pgio_header, but is passed around everywhere, because there used to be multiple _data structs per _header. Many of these functions then use the _data to find a pointer to the _header. This patch cleans this up by merging the nfs_pgio_data structure into nfs_pgio_header and passing nfs_pgio_header around instead. Reviewed-by: Christoph Hellwig Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/blocklayout.c | 98 +++++++++++----------- fs/nfs/direct.c | 8 +- fs/nfs/filelayout/filelayout.c | 170 +++++++++++++++++++-------------------- fs/nfs/internal.h | 6 +- fs/nfs/nfs3proc.c | 21 ++--- fs/nfs/nfs4_fs.h | 6 +- fs/nfs/nfs4proc.c | 105 ++++++++++++------------ fs/nfs/nfs4trace.h | 28 +++---- fs/nfs/objlayout/objio_osd.c | 24 +++--- fs/nfs/objlayout/objlayout.c | 81 +++++++++---------- fs/nfs/objlayout/objlayout.h | 8 +- fs/nfs/pagelist.c | 120 +++++++++++++-------------- fs/nfs/pnfs.c | 80 ++++++++---------- fs/nfs/pnfs.h | 10 +-- fs/nfs/proc.c | 27 ++++--- fs/nfs/read.c | 42 +++++----- fs/nfs/write.c | 56 ++++++------- include/linux/nfs_page.h | 9 ++- include/linux/nfs_xdr.h | 43 +++++----- 19 files changed, 460 insertions(+), 482 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 36b01cef849e..c3ccfe440390 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -210,8 +210,7 @@ static void bl_end_io_read(struct bio *bio, int err) SetPageUptodate(bvec->bv_page); if (err) { - struct nfs_pgio_data *rdata = par->data; - struct nfs_pgio_header *header = rdata->header; + struct nfs_pgio_header *header = par->data; if (!header->pnfs_error) header->pnfs_error = -EIO; @@ -224,44 +223,44 @@ static void bl_end_io_read(struct bio *bio, int err) static void bl_read_cleanup(struct work_struct *work) { struct rpc_task *task; - struct nfs_pgio_data *rdata; + struct nfs_pgio_header *hdr; dprintk("%s enter\n", __func__); task = container_of(work, struct rpc_task, u.tk_work); - rdata = container_of(task, struct nfs_pgio_data, task); - pnfs_ld_read_done(rdata); + hdr = container_of(task, struct nfs_pgio_header, task); + pnfs_ld_read_done(hdr); } static void bl_end_par_io_read(void *data, int unused) { - struct nfs_pgio_data *rdata = data; + struct nfs_pgio_header *hdr = data; - rdata->task.tk_status = rdata->header->pnfs_error; - INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup); - schedule_work(&rdata->task.u.tk_work); + hdr->task.tk_status = hdr->pnfs_error; + INIT_WORK(&hdr->task.u.tk_work, bl_read_cleanup); + schedule_work(&hdr->task.u.tk_work); } static enum pnfs_try_status -bl_read_pagelist(struct nfs_pgio_data *rdata) +bl_read_pagelist(struct nfs_pgio_header *hdr) { - struct nfs_pgio_header *header = rdata->header; + struct nfs_pgio_header *header = hdr; int i, hole; struct bio *bio = NULL; struct pnfs_block_extent *be = NULL, *cow_read = NULL; sector_t isect, extent_length = 0; struct parallel_io *par; - loff_t f_offset = rdata->args.offset; - size_t bytes_left = rdata->args.count; + loff_t f_offset = hdr->args.offset; + size_t bytes_left = hdr->args.count; unsigned int pg_offset, pg_len; - struct page **pages = rdata->args.pages; - int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT; + struct page **pages = hdr->args.pages; + int pg_index = hdr->args.pgbase >> PAGE_CACHE_SHIFT; const bool is_dio = (header->dreq != NULL); dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__, - rdata->page_array.npages, f_offset, - (unsigned int)rdata->args.count); + hdr->page_array.npages, f_offset, + (unsigned int)hdr->args.count); - par = alloc_parallel(rdata); + par = alloc_parallel(hdr); if (!par) goto use_mds; par->pnfs_callback = bl_end_par_io_read; @@ -269,7 +268,7 @@ bl_read_pagelist(struct nfs_pgio_data *rdata) isect = (sector_t) (f_offset >> SECTOR_SHIFT); /* Code assumes extents are page-aligned */ - for (i = pg_index; i < rdata->page_array.npages; i++) { + for (i = pg_index; i < hdr->page_array.npages; i++) { if (!extent_length) { /* We've used up the previous extent */ bl_put_extent(be); @@ -319,7 +318,7 @@ bl_read_pagelist(struct nfs_pgio_data *rdata) be_read = (hole && cow_read) ? cow_read : be; bio = do_add_page_to_bio(bio, - rdata->page_array.npages - i, + hdr->page_array.npages - i, READ, isect, pages[i], be_read, bl_end_io_read, par, @@ -334,10 +333,10 @@ bl_read_pagelist(struct nfs_pgio_data *rdata) extent_length -= PAGE_CACHE_SECTORS; } if ((isect << SECTOR_SHIFT) >= header->inode->i_size) { - rdata->res.eof = 1; - rdata->res.count = header->inode->i_size - rdata->args.offset; + hdr->res.eof = 1; + hdr->res.count = header->inode->i_size - hdr->args.offset; } else { - rdata->res.count = (isect << SECTOR_SHIFT) - rdata->args.offset; + hdr->res.count = (isect << SECTOR_SHIFT) - hdr->args.offset; } out: bl_put_extent(be); @@ -392,8 +391,7 @@ static void bl_end_io_write_zero(struct bio *bio, int err) } if (unlikely(err)) { - struct nfs_pgio_data *data = par->data; - struct nfs_pgio_header *header = data->header; + struct nfs_pgio_header *header = par->data; if (!header->pnfs_error) header->pnfs_error = -EIO; @@ -407,8 +405,7 @@ static void bl_end_io_write(struct bio *bio, int err) { struct parallel_io *par = bio->bi_private; const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); - struct nfs_pgio_data *data = par->data; - struct nfs_pgio_header *header = data->header; + struct nfs_pgio_header *header = par->data; if (!uptodate) { if (!header->pnfs_error) @@ -425,32 +422,32 @@ static void bl_end_io_write(struct bio *bio, int err) static void bl_write_cleanup(struct work_struct *work) { struct rpc_task *task; - struct nfs_pgio_data *wdata; + struct nfs_pgio_header *hdr; dprintk("%s enter\n", __func__); task = container_of(work, struct rpc_task, u.tk_work); - wdata = container_of(task, struct nfs_pgio_data, task); - if (likely(!wdata->header->pnfs_error)) { + hdr = container_of(task, struct nfs_pgio_header, task); + if (likely(!hdr->pnfs_error)) { /* Marks for LAYOUTCOMMIT */ - mark_extents_written(BLK_LSEG2EXT(wdata->header->lseg), - wdata->args.offset, wdata->args.count); + mark_extents_written(BLK_LSEG2EXT(hdr->lseg), + hdr->args.offset, hdr->args.count); } - pnfs_ld_write_done(wdata); + pnfs_ld_write_done(hdr); } /* Called when last of bios associated with a bl_write_pagelist call finishes */ static void bl_end_par_io_write(void *data, int num_se) { - struct nfs_pgio_data *wdata = data; + struct nfs_pgio_header *hdr = data; - if (unlikely(wdata->header->pnfs_error)) { - bl_free_short_extents(&BLK_LSEG2EXT(wdata->header->lseg)->bl_inval, + if (unlikely(hdr->pnfs_error)) { + bl_free_short_extents(&BLK_LSEG2EXT(hdr->lseg)->bl_inval, num_se); } - wdata->task.tk_status = wdata->header->pnfs_error; - wdata->writeverf.committed = NFS_FILE_SYNC; - INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup); - schedule_work(&wdata->task.u.tk_work); + hdr->task.tk_status = hdr->pnfs_error; + hdr->writeverf.committed = NFS_FILE_SYNC; + INIT_WORK(&hdr->task.u.tk_work, bl_write_cleanup); + schedule_work(&hdr->task.u.tk_work); } /* FIXME STUB - mark intersection of layout and page as bad, so is not @@ -675,18 +672,17 @@ check_page: } static enum pnfs_try_status -bl_write_pagelist(struct nfs_pgio_data *wdata, int sync) +bl_write_pagelist(struct nfs_pgio_header *header, int sync) { - struct nfs_pgio_header *header = wdata->header; int i, ret, npg_zero, pg_index, last = 0; struct bio *bio = NULL; struct pnfs_block_extent *be = NULL, *cow_read = NULL; sector_t isect, last_isect = 0, extent_length = 0; struct parallel_io *par = NULL; - loff_t offset = wdata->args.offset; - size_t count = wdata->args.count; + loff_t offset = header->args.offset; + size_t count = header->args.count; unsigned int pg_offset, pg_len, saved_len; - struct page **pages = wdata->args.pages; + struct page **pages = header->args.pages; struct page *page; pgoff_t index; u64 temp; @@ -701,11 +697,11 @@ bl_write_pagelist(struct nfs_pgio_data *wdata, int sync) dprintk("pnfsblock nonblock aligned DIO writes. Resend MDS\n"); goto out_mds; } - /* At this point, wdata->page_aray is a (sequential) list of nfs_pages. + /* At this point, header->page_aray is a (sequential) list of nfs_pages. * We want to write each, and if there is an error set pnfs_error * to have it redone using nfs. */ - par = alloc_parallel(wdata); + par = alloc_parallel(header); if (!par) goto out_mds; par->pnfs_callback = bl_end_par_io_write; @@ -792,8 +788,8 @@ next_page: bio = bl_submit_bio(WRITE, bio); /* Middle pages */ - pg_index = wdata->args.pgbase >> PAGE_CACHE_SHIFT; - for (i = pg_index; i < wdata->page_array.npages; i++) { + pg_index = header->args.pgbase >> PAGE_CACHE_SHIFT; + for (i = pg_index; i < header->page_array.npages; i++) { if (!extent_length) { /* We've used up the previous extent */ bl_put_extent(be); @@ -864,7 +860,7 @@ next_page: } - bio = do_add_page_to_bio(bio, wdata->page_array.npages - i, + bio = do_add_page_to_bio(bio, header->page_array.npages - i, WRITE, isect, pages[i], be, bl_end_io_write, par, @@ -893,7 +889,7 @@ next_page: } write_done: - wdata->res.count = wdata->args.count; + header->res.count = header->args.count; out: bl_put_extent(be); bl_put_extent(cow_read); diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 179de67ca907..6c4c867ee04c 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -148,8 +148,8 @@ static void nfs_direct_set_hdr_verf(struct nfs_direct_req *dreq, { struct nfs_writeverf *verfp; - verfp = nfs_direct_select_verf(dreq, hdr->data.ds_clp, - hdr->data.ds_idx); + verfp = nfs_direct_select_verf(dreq, hdr->ds_clp, + hdr->ds_idx); WARN_ON_ONCE(verfp->committed >= 0); memcpy(verfp, &hdr->verf, sizeof(struct nfs_writeverf)); WARN_ON_ONCE(verfp->committed < 0); @@ -169,8 +169,8 @@ static int nfs_direct_set_or_cmp_hdr_verf(struct nfs_direct_req *dreq, { struct nfs_writeverf *verfp; - verfp = nfs_direct_select_verf(dreq, hdr->data.ds_clp, - hdr->data.ds_idx); + verfp = nfs_direct_select_verf(dreq, hdr->ds_clp, + hdr->ds_idx); if (verfp->committed < 0) { nfs_direct_set_hdr_verf(dreq, hdr); return 0; diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index d2eba1c13b7e..537e7f7a0b48 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -84,19 +84,18 @@ filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset) BUG(); } -static void filelayout_reset_write(struct nfs_pgio_data *data) +static void filelayout_reset_write(struct nfs_pgio_header *hdr) { - struct nfs_pgio_header *hdr = data->header; - struct rpc_task *task = &data->task; + struct rpc_task *task = &hdr->task; if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { dprintk("%s Reset task %5u for i/o through MDS " "(req %s/%llu, %u bytes @ offset %llu)\n", __func__, - data->task.tk_pid, + hdr->task.tk_pid, hdr->inode->i_sb->s_id, (unsigned long long)NFS_FILEID(hdr->inode), - data->args.count, - (unsigned long long)data->args.offset); + hdr->args.count, + (unsigned long long)hdr->args.offset); task->tk_status = pnfs_write_done_resend_to_mds(hdr->inode, &hdr->pages, @@ -105,19 +104,18 @@ static void filelayout_reset_write(struct nfs_pgio_data *data) } } -static void filelayout_reset_read(struct nfs_pgio_data *data) +static void filelayout_reset_read(struct nfs_pgio_header *hdr) { - struct nfs_pgio_header *hdr = data->header; - struct rpc_task *task = &data->task; + struct rpc_task *task = &hdr->task; if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { dprintk("%s Reset task %5u for i/o through MDS " "(req %s/%llu, %u bytes @ offset %llu)\n", __func__, - data->task.tk_pid, + hdr->task.tk_pid, hdr->inode->i_sb->s_id, (unsigned long long)NFS_FILEID(hdr->inode), - data->args.count, - (unsigned long long)data->args.offset); + hdr->args.count, + (unsigned long long)hdr->args.offset); task->tk_status = pnfs_read_done_resend_to_mds(hdr->inode, &hdr->pages, @@ -243,18 +241,17 @@ wait_on_recovery: /* NFS_PROTO call done callback routines */ static int filelayout_read_done_cb(struct rpc_task *task, - struct nfs_pgio_data *data) + struct nfs_pgio_header *hdr) { - struct nfs_pgio_header *hdr = data->header; int err; - trace_nfs4_pnfs_read(data, task->tk_status); - err = filelayout_async_handle_error(task, data->args.context->state, - data->ds_clp, hdr->lseg); + trace_nfs4_pnfs_read(hdr, task->tk_status); + err = filelayout_async_handle_error(task, hdr->args.context->state, + hdr->ds_clp, hdr->lseg); switch (err) { case -NFS4ERR_RESET_TO_MDS: - filelayout_reset_read(data); + filelayout_reset_read(hdr); return task->tk_status; case -EAGAIN: rpc_restart_call_prepare(task); @@ -270,15 +267,14 @@ static int filelayout_read_done_cb(struct rpc_task *task, * rfc5661 is not clear about which credential should be used. */ static void -filelayout_set_layoutcommit(struct nfs_pgio_data *wdata) +filelayout_set_layoutcommit(struct nfs_pgio_header *hdr) { - struct nfs_pgio_header *hdr = wdata->header; if (FILELAYOUT_LSEG(hdr->lseg)->commit_through_mds || - wdata->res.verf->committed == NFS_FILE_SYNC) + hdr->res.verf->committed == NFS_FILE_SYNC) return; - pnfs_set_layoutcommit(wdata); + pnfs_set_layoutcommit(hdr); dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino, (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb); } @@ -305,83 +301,82 @@ filelayout_reset_to_mds(struct pnfs_layout_segment *lseg) */ static void filelayout_read_prepare(struct rpc_task *task, void *data) { - struct nfs_pgio_data *rdata = data; + struct nfs_pgio_header *hdr = data; - if (unlikely(test_bit(NFS_CONTEXT_BAD, &rdata->args.context->flags))) { + if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) { rpc_exit(task, -EIO); return; } - if (filelayout_reset_to_mds(rdata->header->lseg)) { + if (filelayout_reset_to_mds(hdr->lseg)) { dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid); - filelayout_reset_read(rdata); + filelayout_reset_read(hdr); rpc_exit(task, 0); return; } - rdata->pgio_done_cb = filelayout_read_done_cb; + hdr->pgio_done_cb = filelayout_read_done_cb; - if (nfs41_setup_sequence(rdata->ds_clp->cl_session, - &rdata->args.seq_args, - &rdata->res.seq_res, + if (nfs41_setup_sequence(hdr->ds_clp->cl_session, + &hdr->args.seq_args, + &hdr->res.seq_res, task)) return; - if (nfs4_set_rw_stateid(&rdata->args.stateid, rdata->args.context, - rdata->args.lock_context, FMODE_READ) == -EIO) + if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context, + hdr->args.lock_context, FMODE_READ) == -EIO) rpc_exit(task, -EIO); /* lost lock, terminate I/O */ } static void filelayout_read_call_done(struct rpc_task *task, void *data) { - struct nfs_pgio_data *rdata = data; + struct nfs_pgio_header *hdr = data; dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status); - if (test_bit(NFS_IOHDR_REDO, &rdata->header->flags) && + if (test_bit(NFS_IOHDR_REDO, &hdr->flags) && task->tk_status == 0) { - nfs41_sequence_done(task, &rdata->res.seq_res); + nfs41_sequence_done(task, &hdr->res.seq_res); return; } /* Note this may cause RPC to be resent */ - rdata->header->mds_ops->rpc_call_done(task, data); + hdr->mds_ops->rpc_call_done(task, data); } static void filelayout_read_count_stats(struct rpc_task *task, void *data) { - struct nfs_pgio_data *rdata = data; + struct nfs_pgio_header *hdr = data; - rpc_count_iostats(task, NFS_SERVER(rdata->header->inode)->client->cl_metrics); + rpc_count_iostats(task, NFS_SERVER(hdr->inode)->client->cl_metrics); } static void filelayout_read_release(void *data) { - struct nfs_pgio_data *rdata = data; - struct pnfs_layout_hdr *lo = rdata->header->lseg->pls_layout; + struct nfs_pgio_header *hdr = data; + struct pnfs_layout_hdr *lo = hdr->lseg->pls_layout; filelayout_fenceme(lo->plh_inode, lo); - nfs_put_client(rdata->ds_clp); - rdata->header->mds_ops->rpc_release(data); + nfs_put_client(hdr->ds_clp); + hdr->mds_ops->rpc_release(data); } static int filelayout_write_done_cb(struct rpc_task *task, - struct nfs_pgio_data *data) + struct nfs_pgio_header *hdr) { - struct nfs_pgio_header *hdr = data->header; int err; - trace_nfs4_pnfs_write(data, task->tk_status); - err = filelayout_async_handle_error(task, data->args.context->state, - data->ds_clp, hdr->lseg); + trace_nfs4_pnfs_write(hdr, task->tk_status); + err = filelayout_async_handle_error(task, hdr->args.context->state, + hdr->ds_clp, hdr->lseg); switch (err) { case -NFS4ERR_RESET_TO_MDS: - filelayout_reset_write(data); + filelayout_reset_write(hdr); return task->tk_status; case -EAGAIN: rpc_restart_call_prepare(task); return -EAGAIN; } - filelayout_set_layoutcommit(data); + filelayout_set_layoutcommit(hdr); return 0; } @@ -419,57 +414,57 @@ static int filelayout_commit_done_cb(struct rpc_task *task, static void filelayout_write_prepare(struct rpc_task *task, void *data) { - struct nfs_pgio_data *wdata = data; + struct nfs_pgio_header *hdr = data; - if (unlikely(test_bit(NFS_CONTEXT_BAD, &wdata->args.context->flags))) { + if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) { rpc_exit(task, -EIO); return; } - if (filelayout_reset_to_mds(wdata->header->lseg)) { + if (filelayout_reset_to_mds(hdr->lseg)) { dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid); - filelayout_reset_write(wdata); + filelayout_reset_write(hdr); rpc_exit(task, 0); return; } - if (nfs41_setup_sequence(wdata->ds_clp->cl_session, - &wdata->args.seq_args, - &wdata->res.seq_res, + if (nfs41_setup_sequence(hdr->ds_clp->cl_session, + &hdr->args.seq_args, + &hdr->res.seq_res, task)) return; - if (nfs4_set_rw_stateid(&wdata->args.stateid, wdata->args.context, - wdata->args.lock_context, FMODE_WRITE) == -EIO) + if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context, + hdr->args.lock_context, FMODE_WRITE) == -EIO) rpc_exit(task, -EIO); /* lost lock, terminate I/O */ } static void filelayout_write_call_done(struct rpc_task *task, void *data) { - struct nfs_pgio_data *wdata = data; + struct nfs_pgio_header *hdr = data; - if (test_bit(NFS_IOHDR_REDO, &wdata->header->flags) && + if (test_bit(NFS_IOHDR_REDO, &hdr->flags) && task->tk_status == 0) { - nfs41_sequence_done(task, &wdata->res.seq_res); + nfs41_sequence_done(task, &hdr->res.seq_res); return; } /* Note this may cause RPC to be resent */ - wdata->header->mds_ops->rpc_call_done(task, data); + hdr->mds_ops->rpc_call_done(task, data); } static void filelayout_write_count_stats(struct rpc_task *task, void *data) { - struct nfs_pgio_data *wdata = data; + struct nfs_pgio_header *hdr = data; - rpc_count_iostats(task, NFS_SERVER(wdata->header->inode)->client->cl_metrics); + rpc_count_iostats(task, NFS_SERVER(hdr->inode)->client->cl_metrics); } static void filelayout_write_release(void *data) { - struct nfs_pgio_data *wdata = data; - struct pnfs_layout_hdr *lo = wdata->header->lseg->pls_layout; + struct nfs_pgio_header *hdr = data; + struct pnfs_layout_hdr *lo = hdr->lseg->pls_layout; filelayout_fenceme(lo->plh_inode, lo); - nfs_put_client(wdata->ds_clp); - wdata->header->mds_ops->rpc_release(data); + nfs_put_client(hdr->ds_clp); + hdr->mds_ops->rpc_release(data); } static void filelayout_commit_prepare(struct rpc_task *task, void *data) @@ -529,19 +524,18 @@ static const struct rpc_call_ops filelayout_commit_call_ops = { }; static enum pnfs_try_status -filelayout_read_pagelist(struct nfs_pgio_data *data) +filelayout_read_pagelist(struct nfs_pgio_header *hdr) { - struct nfs_pgio_header *hdr = data->header; struct pnfs_layout_segment *lseg = hdr->lseg; struct nfs4_pnfs_ds *ds; struct rpc_clnt *ds_clnt; - loff_t offset = data->args.offset; + loff_t offset = hdr->args.offset; u32 j, idx; struct nfs_fh *fh; dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n", __func__, hdr->inode->i_ino, - data->args.pgbase, (size_t)data->args.count, offset); + hdr->args.pgbase, (size_t)hdr->args.count, offset); /* Retrieve the correct rpc_client for the byte range */ j = nfs4_fl_calc_j_index(lseg, offset); @@ -559,30 +553,29 @@ filelayout_read_pagelist(struct nfs_pgio_data *data) /* No multipath support. Use first DS */ atomic_inc(&ds->ds_clp->cl_count); - data->ds_clp = ds->ds_clp; - data->ds_idx = idx; + hdr->ds_clp = ds->ds_clp; + hdr->ds_idx = idx; fh = nfs4_fl_select_ds_fh(lseg, j); if (fh) - data->args.fh = fh; + hdr->args.fh = fh; - data->args.offset = filelayout_get_dserver_offset(lseg, offset); - data->mds_offset = offset; + hdr->args.offset = filelayout_get_dserver_offset(lseg, offset); + hdr->mds_offset = offset; /* Perform an asynchronous read to ds */ - nfs_initiate_pgio(ds_clnt, data, + nfs_initiate_pgio(ds_clnt, hdr, &filelayout_read_call_ops, 0, RPC_TASK_SOFTCONN); return PNFS_ATTEMPTED; } /* Perform async writes. */ static enum pnfs_try_status -filelayout_write_pagelist(struct nfs_pgio_data *data, int sync) +filelayout_write_pagelist(struct nfs_pgio_header *hdr, int sync) { - struct nfs_pgio_header *hdr = data->header; struct pnfs_layout_segment *lseg = hdr->lseg; struct nfs4_pnfs_ds *ds; struct rpc_clnt *ds_clnt; - loff_t offset = data->args.offset; + loff_t offset = hdr->args.offset; u32 j, idx; struct nfs_fh *fh; @@ -598,21 +591,20 @@ filelayout_write_pagelist(struct nfs_pgio_data *data, int sync) return PNFS_NOT_ATTEMPTED; dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s cl_count %d\n", - __func__, hdr->inode->i_ino, sync, (size_t) data->args.count, + __func__, hdr->inode->i_ino, sync, (size_t) hdr->args.count, offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count)); - data->pgio_done_cb = filelayout_write_done_cb; + hdr->pgio_done_cb = filelayout_write_done_cb; atomic_inc(&ds->ds_clp->cl_count); - data->ds_clp = ds->ds_clp; - data->ds_idx = idx; + hdr->ds_clp = ds->ds_clp; + hdr->ds_idx = idx; fh = nfs4_fl_select_ds_fh(lseg, j); if (fh) - data->args.fh = fh; - - data->args.offset = filelayout_get_dserver_offset(lseg, offset); + hdr->args.fh = fh; + hdr->args.offset = filelayout_get_dserver_offset(lseg, offset); /* Perform an asynchronous write */ - nfs_initiate_pgio(ds_clnt, data, + nfs_initiate_pgio(ds_clnt, hdr, &filelayout_write_call_ops, sync, RPC_TASK_SOFTCONN); return PNFS_ATTEMPTED; diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 5cda049c8f9b..3f3aedd2e8c9 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -240,9 +240,9 @@ int nfs_iocounter_wait(struct nfs_io_counter *c); extern const struct nfs_pageio_ops nfs_pgio_rw_ops; struct nfs_pgio_header *nfs_pgio_header_alloc(const struct nfs_rw_ops *); void nfs_pgio_header_free(struct nfs_pgio_header *); -void nfs_pgio_data_destroy(struct nfs_pgio_data *); +void nfs_pgio_data_destroy(struct nfs_pgio_header *); int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *); -int nfs_initiate_pgio(struct rpc_clnt *, struct nfs_pgio_data *, +int nfs_initiate_pgio(struct rpc_clnt *, struct nfs_pgio_header *, const struct rpc_call_ops *, int, int); static inline void nfs_iocounter_init(struct nfs_io_counter *c) @@ -481,7 +481,7 @@ static inline void nfs_inode_dio_wait(struct inode *inode) extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq); /* nfs4proc.c */ -extern void __nfs4_read_done_cb(struct nfs_pgio_data *); +extern void __nfs4_read_done_cb(struct nfs_pgio_header *); extern struct nfs_client *nfs4_init_client(struct nfs_client *clp, const struct rpc_timeout *timeparms, const char *ip_addr); diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index e7daa42bbc86..854959db0e5d 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -795,41 +795,44 @@ nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, return status; } -static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_data *data) +static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr) { - struct inode *inode = data->header->inode; + struct inode *inode = hdr->inode; if (nfs3_async_handle_jukebox(task, inode)) return -EAGAIN; nfs_invalidate_atime(inode); - nfs_refresh_inode(inode, &data->fattr); + nfs_refresh_inode(inode, &hdr->fattr); return 0; } -static void nfs3_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg) +static void nfs3_proc_read_setup(struct nfs_pgio_header *hdr, + struct rpc_message *msg) { msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ]; } -static int nfs3_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data) +static int nfs3_proc_pgio_rpc_prepare(struct rpc_task *task, + struct nfs_pgio_header *hdr) { rpc_call_start(task); return 0; } -static int nfs3_write_done(struct rpc_task *task, struct nfs_pgio_data *data) +static int nfs3_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr) { - struct inode *inode = data->header->inode; + struct inode *inode = hdr->inode; if (nfs3_async_handle_jukebox(task, inode)) return -EAGAIN; if (task->tk_status >= 0) - nfs_post_op_update_inode_force_wcc(inode, data->res.fattr); + nfs_post_op_update_inode_force_wcc(inode, hdr->res.fattr); return 0; } -static void nfs3_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg) +static void nfs3_proc_write_setup(struct nfs_pgio_header *hdr, + struct rpc_message *msg) { msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE]; } diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index ba2affa51941..b8ea4a26998c 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -337,11 +337,11 @@ nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_mode, */ static inline void nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp, - struct rpc_message *msg, struct nfs_pgio_data *wdata) + struct rpc_message *msg, struct nfs_pgio_header *hdr) { if (_nfs4_state_protect(clp, NFS_SP4_MACH_CRED_WRITE, clntp, msg) && !test_bit(NFS_SP4_MACH_CRED_COMMIT, &clp->cl_sp4_flags)) - wdata->args.stable = NFS_FILE_SYNC; + hdr->args.stable = NFS_FILE_SYNC; } #else /* CONFIG_NFS_v4_1 */ static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server) @@ -369,7 +369,7 @@ nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_flags, static inline void nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp, - struct rpc_message *msg, struct nfs_pgio_data *wdata) + struct rpc_message *msg, struct nfs_pgio_header *hdr) { } #endif /* CONFIG_NFS_V4_1 */ diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 4bf3d97cc5a0..b0e5705599bf 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4033,24 +4033,25 @@ static bool nfs4_error_stateid_expired(int err) return false; } -void __nfs4_read_done_cb(struct nfs_pgio_data *data) +void __nfs4_read_done_cb(struct nfs_pgio_header *hdr) { - nfs_invalidate_atime(data->header->inode); + nfs_invalidate_atime(hdr->inode); } -static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_pgio_data *data) +static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_pgio_header *hdr) { - struct nfs_server *server = NFS_SERVER(data->header->inode); + struct nfs_server *server = NFS_SERVER(hdr->inode); - trace_nfs4_read(data, task->tk_status); - if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) { + trace_nfs4_read(hdr, task->tk_status); + if (nfs4_async_handle_error(task, server, + hdr->args.context->state) == -EAGAIN) { rpc_restart_call_prepare(task); return -EAGAIN; } - __nfs4_read_done_cb(data); + __nfs4_read_done_cb(hdr); if (task->tk_status > 0) - renew_lease(server, data->timestamp); + renew_lease(server, hdr->timestamp); return 0; } @@ -4068,54 +4069,59 @@ static bool nfs4_read_stateid_changed(struct rpc_task *task, return true; } -static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_data *data) +static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr) { dprintk("--> %s\n", __func__); - if (!nfs4_sequence_done(task, &data->res.seq_res)) + if (!nfs4_sequence_done(task, &hdr->res.seq_res)) return -EAGAIN; - if (nfs4_read_stateid_changed(task, &data->args)) + if (nfs4_read_stateid_changed(task, &hdr->args)) return -EAGAIN; - return data->pgio_done_cb ? data->pgio_done_cb(task, data) : - nfs4_read_done_cb(task, data); + return hdr->pgio_done_cb ? hdr->pgio_done_cb(task, hdr) : + nfs4_read_done_cb(task, hdr); } -static void nfs4_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg) +static void nfs4_proc_read_setup(struct nfs_pgio_header *hdr, + struct rpc_message *msg) { - data->timestamp = jiffies; - data->pgio_done_cb = nfs4_read_done_cb; + hdr->timestamp = jiffies; + hdr->pgio_done_cb = nfs4_read_done_cb; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; - nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); + nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 0); } -static int nfs4_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data) +static int nfs4_proc_pgio_rpc_prepare(struct rpc_task *task, + struct nfs_pgio_header *hdr) { - if (nfs4_setup_sequence(NFS_SERVER(data->header->inode), - &data->args.seq_args, - &data->res.seq_res, + if (nfs4_setup_sequence(NFS_SERVER(hdr->inode), + &hdr->args.seq_args, + &hdr->res.seq_res, task)) return 0; - if (nfs4_set_rw_stateid(&data->args.stateid, data->args.context, - data->args.lock_context, data->header->rw_ops->rw_mode) == -EIO) + if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context, + hdr->args.lock_context, + hdr->rw_ops->rw_mode) == -EIO) return -EIO; - if (unlikely(test_bit(NFS_CONTEXT_BAD, &data->args.context->flags))) + if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) return -EIO; return 0; } -static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_pgio_data *data) +static int nfs4_write_done_cb(struct rpc_task *task, + struct nfs_pgio_header *hdr) { - struct inode *inode = data->header->inode; + struct inode *inode = hdr->inode; - trace_nfs4_write(data, task->tk_status); - if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) { + trace_nfs4_write(hdr, task->tk_status); + if (nfs4_async_handle_error(task, NFS_SERVER(inode), + hdr->args.context->state) == -EAGAIN) { rpc_restart_call_prepare(task); return -EAGAIN; } if (task->tk_status >= 0) { - renew_lease(NFS_SERVER(inode), data->timestamp); - nfs_post_op_update_inode_force_wcc(inode, &data->fattr); + renew_lease(NFS_SERVER(inode), hdr->timestamp); + nfs_post_op_update_inode_force_wcc(inode, &hdr->fattr); } return 0; } @@ -4134,23 +4140,21 @@ static bool nfs4_write_stateid_changed(struct rpc_task *task, return true; } -static int nfs4_write_done(struct rpc_task *task, struct nfs_pgio_data *data) +static int nfs4_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr) { - if (!nfs4_sequence_done(task, &data->res.seq_res)) + if (!nfs4_sequence_done(task, &hdr->res.seq_res)) return -EAGAIN; - if (nfs4_write_stateid_changed(task, &data->args)) + if (nfs4_write_stateid_changed(task, &hdr->args)) return -EAGAIN; - return data->pgio_done_cb ? data->pgio_done_cb(task, data) : - nfs4_write_done_cb(task, data); + return hdr->pgio_done_cb ? hdr->pgio_done_cb(task, hdr) : + nfs4_write_done_cb(task, hdr); } static -bool nfs4_write_need_cache_consistency_data(const struct nfs_pgio_data *data) +bool nfs4_write_need_cache_consistency_data(struct nfs_pgio_header *hdr) { - const struct nfs_pgio_header *hdr = data->header; - /* Don't request attributes for pNFS or O_DIRECT writes */ - if (data->ds_clp != NULL || hdr->dreq != NULL) + if (hdr->ds_clp != NULL || hdr->dreq != NULL) return false; /* Otherwise, request attributes if and only if we don't hold * a delegation @@ -4158,23 +4162,24 @@ bool nfs4_write_need_cache_consistency_data(const struct nfs_pgio_data *data) return nfs4_have_delegation(hdr->inode, FMODE_READ) == 0; } -static void nfs4_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg) +static void nfs4_proc_write_setup(struct nfs_pgio_header *hdr, + struct rpc_message *msg) { - struct nfs_server *server = NFS_SERVER(data->header->inode); + struct nfs_server *server = NFS_SERVER(hdr->inode); - if (!nfs4_write_need_cache_consistency_data(data)) { - data->args.bitmask = NULL; - data->res.fattr = NULL; + if (!nfs4_write_need_cache_consistency_data(hdr)) { + hdr->args.bitmask = NULL; + hdr->res.fattr = NULL; } else - data->args.bitmask = server->cache_consistency_bitmask; + hdr->args.bitmask = server->cache_consistency_bitmask; - if (!data->pgio_done_cb) - data->pgio_done_cb = nfs4_write_done_cb; - data->res.server = server; - data->timestamp = jiffies; + if (!hdr->pgio_done_cb) + hdr->pgio_done_cb = nfs4_write_done_cb; + hdr->res.server = server; + hdr->timestamp = jiffies; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE]; - nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); + nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 1); } static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index 0a744f3a86f6..1c32adbe728d 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -932,11 +932,11 @@ DEFINE_NFS4_IDMAP_EVENT(nfs4_map_gid_to_group); DECLARE_EVENT_CLASS(nfs4_read_event, TP_PROTO( - const struct nfs_pgio_data *data, + const struct nfs_pgio_header *hdr, int error ), - TP_ARGS(data, error), + TP_ARGS(hdr, error), TP_STRUCT__entry( __field(dev_t, dev) @@ -948,12 +948,12 @@ DECLARE_EVENT_CLASS(nfs4_read_event, ), TP_fast_assign( - const struct inode *inode = data->header->inode; + const struct inode *inode = hdr->inode; __entry->dev = inode->i_sb->s_dev; __entry->fileid = NFS_FILEID(inode); __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); - __entry->offset = data->args.offset; - __entry->count = data->args.count; + __entry->offset = hdr->args.offset; + __entry->count = hdr->args.count; __entry->error = error; ), @@ -972,10 +972,10 @@ DECLARE_EVENT_CLASS(nfs4_read_event, #define DEFINE_NFS4_READ_EVENT(name) \ DEFINE_EVENT(nfs4_read_event, name, \ TP_PROTO( \ - const struct nfs_pgio_data *data, \ + const struct nfs_pgio_header *hdr, \ int error \ ), \ - TP_ARGS(data, error)) + TP_ARGS(hdr, error)) DEFINE_NFS4_READ_EVENT(nfs4_read); #ifdef CONFIG_NFS_V4_1 DEFINE_NFS4_READ_EVENT(nfs4_pnfs_read); @@ -983,11 +983,11 @@ DEFINE_NFS4_READ_EVENT(nfs4_pnfs_read); DECLARE_EVENT_CLASS(nfs4_write_event, TP_PROTO( - const struct nfs_pgio_data *data, + const struct nfs_pgio_header *hdr, int error ), - TP_ARGS(data, error), + TP_ARGS(hdr, error), TP_STRUCT__entry( __field(dev_t, dev) @@ -999,12 +999,12 @@ DECLARE_EVENT_CLASS(nfs4_write_event, ), TP_fast_assign( - const struct inode *inode = data->header->inode; + const struct inode *inode = hdr->inode; __entry->dev = inode->i_sb->s_dev; __entry->fileid = NFS_FILEID(inode); __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); - __entry->offset = data->args.offset; - __entry->count = data->args.count; + __entry->offset = hdr->args.offset; + __entry->count = hdr->args.count; __entry->error = error; ), @@ -1024,10 +1024,10 @@ DECLARE_EVENT_CLASS(nfs4_write_event, #define DEFINE_NFS4_WRITE_EVENT(name) \ DEFINE_EVENT(nfs4_write_event, name, \ TP_PROTO( \ - const struct nfs_pgio_data *data, \ + const struct nfs_pgio_header *hdr, \ int error \ ), \ - TP_ARGS(data, error)) + TP_ARGS(hdr, error)) DEFINE_NFS4_WRITE_EVENT(nfs4_write); #ifdef CONFIG_NFS_V4_1 DEFINE_NFS4_WRITE_EVENT(nfs4_pnfs_write); diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 611320753db2..ae05278b3761 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -439,22 +439,21 @@ static void _read_done(struct ore_io_state *ios, void *private) objlayout_read_done(&objios->oir, status, objios->sync); } -int objio_read_pagelist(struct nfs_pgio_data *rdata) +int objio_read_pagelist(struct nfs_pgio_header *hdr) { - struct nfs_pgio_header *hdr = rdata->header; struct objio_state *objios; int ret; ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, true, - hdr->lseg, rdata->args.pages, rdata->args.pgbase, - rdata->args.offset, rdata->args.count, rdata, + hdr->lseg, hdr->args.pages, hdr->args.pgbase, + hdr->args.offset, hdr->args.count, hdr, GFP_KERNEL, &objios); if (unlikely(ret)) return ret; objios->ios->done = _read_done; dprintk("%s: offset=0x%llx length=0x%x\n", __func__, - rdata->args.offset, rdata->args.count); + hdr->args.offset, hdr->args.count); ret = ore_read(objios->ios); if (unlikely(ret)) objio_free_result(&objios->oir); @@ -487,11 +486,11 @@ static void _write_done(struct ore_io_state *ios, void *private) static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate) { struct objio_state *objios = priv; - struct nfs_pgio_data *wdata = objios->oir.rpcdata; - struct address_space *mapping = wdata->header->inode->i_mapping; + struct nfs_pgio_header *hdr = objios->oir.rpcdata; + struct address_space *mapping = hdr->inode->i_mapping; pgoff_t index = offset / PAGE_SIZE; struct page *page; - loff_t i_size = i_size_read(wdata->header->inode); + loff_t i_size = i_size_read(hdr->inode); if (offset >= i_size) { *uptodate = true; @@ -531,15 +530,14 @@ static const struct _ore_r4w_op _r4w_op = { .put_page = &__r4w_put_page, }; -int objio_write_pagelist(struct nfs_pgio_data *wdata, int how) +int objio_write_pagelist(struct nfs_pgio_header *hdr, int how) { - struct nfs_pgio_header *hdr = wdata->header; struct objio_state *objios; int ret; ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, false, - hdr->lseg, wdata->args.pages, wdata->args.pgbase, - wdata->args.offset, wdata->args.count, wdata, GFP_NOFS, + hdr->lseg, hdr->args.pages, hdr->args.pgbase, + hdr->args.offset, hdr->args.count, hdr, GFP_NOFS, &objios); if (unlikely(ret)) return ret; @@ -551,7 +549,7 @@ int objio_write_pagelist(struct nfs_pgio_data *wdata, int how) objios->ios->done = _write_done; dprintk("%s: offset=0x%llx length=0x%x\n", __func__, - wdata->args.offset, wdata->args.count); + hdr->args.offset, hdr->args.count); ret = ore_write(objios->ios); if (unlikely(ret)) { objio_free_result(&objios->oir); diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c index 31fed91a8bac..86312787cee6 100644 --- a/fs/nfs/objlayout/objlayout.c +++ b/fs/nfs/objlayout/objlayout.c @@ -229,36 +229,36 @@ objlayout_io_set_result(struct objlayout_io_res *oir, unsigned index, static void _rpc_read_complete(struct work_struct *work) { struct rpc_task *task; - struct nfs_pgio_data *rdata; + struct nfs_pgio_header *hdr; dprintk("%s enter\n", __func__); task = container_of(work, struct rpc_task, u.tk_work); - rdata = container_of(task, struct nfs_pgio_data, task); + hdr = container_of(task, struct nfs_pgio_header, task); - pnfs_ld_read_done(rdata); + pnfs_ld_read_done(hdr); } void objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync) { - struct nfs_pgio_data *rdata = oir->rpcdata; + struct nfs_pgio_header *hdr = oir->rpcdata; - oir->status = rdata->task.tk_status = status; + oir->status = hdr->task.tk_status = status; if (status >= 0) - rdata->res.count = status; + hdr->res.count = status; else - rdata->header->pnfs_error = status; + hdr->pnfs_error = status; objlayout_iodone(oir); /* must not use oir after this point */ dprintk("%s: Return status=%zd eof=%d sync=%d\n", __func__, - status, rdata->res.eof, sync); + status, hdr->res.eof, sync); if (sync) - pnfs_ld_read_done(rdata); + pnfs_ld_read_done(hdr); else { - INIT_WORK(&rdata->task.u.tk_work, _rpc_read_complete); - schedule_work(&rdata->task.u.tk_work); + INIT_WORK(&hdr->task.u.tk_work, _rpc_read_complete); + schedule_work(&hdr->task.u.tk_work); } } @@ -266,12 +266,11 @@ objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync) * Perform sync or async reads. */ enum pnfs_try_status -objlayout_read_pagelist(struct nfs_pgio_data *rdata) +objlayout_read_pagelist(struct nfs_pgio_header *hdr) { - struct nfs_pgio_header *hdr = rdata->header; struct inode *inode = hdr->inode; - loff_t offset = rdata->args.offset; - size_t count = rdata->args.count; + loff_t offset = hdr->args.offset; + size_t count = hdr->args.count; int err; loff_t eof; @@ -279,23 +278,23 @@ objlayout_read_pagelist(struct nfs_pgio_data *rdata) if (unlikely(offset + count > eof)) { if (offset >= eof) { err = 0; - rdata->res.count = 0; - rdata->res.eof = 1; + hdr->res.count = 0; + hdr->res.eof = 1; /*FIXME: do we need to call pnfs_ld_read_done() */ goto out; } count = eof - offset; } - rdata->res.eof = (offset + count) >= eof; - _fix_verify_io_params(hdr->lseg, &rdata->args.pages, - &rdata->args.pgbase, - rdata->args.offset, rdata->args.count); + hdr->res.eof = (offset + count) >= eof; + _fix_verify_io_params(hdr->lseg, &hdr->args.pages, + &hdr->args.pgbase, + hdr->args.offset, hdr->args.count); dprintk("%s: inode(%lx) offset 0x%llx count 0x%Zx eof=%d\n", - __func__, inode->i_ino, offset, count, rdata->res.eof); + __func__, inode->i_ino, offset, count, hdr->res.eof); - err = objio_read_pagelist(rdata); + err = objio_read_pagelist(hdr); out: if (unlikely(err)) { hdr->pnfs_error = err; @@ -312,38 +311,38 @@ objlayout_read_pagelist(struct nfs_pgio_data *rdata) static void _rpc_write_complete(struct work_struct *work) { struct rpc_task *task; - struct nfs_pgio_data *wdata; + struct nfs_pgio_header *hdr; dprintk("%s enter\n", __func__); task = container_of(work, struct rpc_task, u.tk_work); - wdata = container_of(task, struct nfs_pgio_data, task); + hdr = container_of(task, struct nfs_pgio_header, task); - pnfs_ld_write_done(wdata); + pnfs_ld_write_done(hdr); } void objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync) { - struct nfs_pgio_data *wdata = oir->rpcdata; + struct nfs_pgio_header *hdr = oir->rpcdata; - oir->status = wdata->task.tk_status = status; + oir->status = hdr->task.tk_status = status; if (status >= 0) { - wdata->res.count = status; - wdata->writeverf.committed = oir->committed; + hdr->res.count = status; + hdr->writeverf.committed = oir->committed; } else { - wdata->header->pnfs_error = status; + hdr->pnfs_error = status; } objlayout_iodone(oir); /* must not use oir after this point */ dprintk("%s: Return status %zd committed %d sync=%d\n", __func__, - status, wdata->writeverf.committed, sync); + status, hdr->writeverf.committed, sync); if (sync) - pnfs_ld_write_done(wdata); + pnfs_ld_write_done(hdr); else { - INIT_WORK(&wdata->task.u.tk_work, _rpc_write_complete); - schedule_work(&wdata->task.u.tk_work); + INIT_WORK(&hdr->task.u.tk_work, _rpc_write_complete); + schedule_work(&hdr->task.u.tk_work); } } @@ -351,17 +350,15 @@ objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync) * Perform sync or async writes. */ enum pnfs_try_status -objlayout_write_pagelist(struct nfs_pgio_data *wdata, - int how) +objlayout_write_pagelist(struct nfs_pgio_header *hdr, int how) { - struct nfs_pgio_header *hdr = wdata->header; int err; - _fix_verify_io_params(hdr->lseg, &wdata->args.pages, - &wdata->args.pgbase, - wdata->args.offset, wdata->args.count); + _fix_verify_io_params(hdr->lseg, &hdr->args.pages, + &hdr->args.pgbase, + hdr->args.offset, hdr->args.count); - err = objio_write_pagelist(wdata, how); + err = objio_write_pagelist(hdr, how); if (unlikely(err)) { hdr->pnfs_error = err; dprintk("%s: Returned Error %d\n", __func__, err); diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h index 01e041029a6c..fd13f1d2f136 100644 --- a/fs/nfs/objlayout/objlayout.h +++ b/fs/nfs/objlayout/objlayout.h @@ -119,8 +119,8 @@ extern void objio_free_lseg(struct pnfs_layout_segment *lseg); */ extern void objio_free_result(struct objlayout_io_res *oir); -extern int objio_read_pagelist(struct nfs_pgio_data *rdata); -extern int objio_write_pagelist(struct nfs_pgio_data *wdata, int how); +extern int objio_read_pagelist(struct nfs_pgio_header *rdata); +extern int objio_write_pagelist(struct nfs_pgio_header *wdata, int how); /* * callback API @@ -168,10 +168,10 @@ extern struct pnfs_layout_segment *objlayout_alloc_lseg( extern void objlayout_free_lseg(struct pnfs_layout_segment *); extern enum pnfs_try_status objlayout_read_pagelist( - struct nfs_pgio_data *); + struct nfs_pgio_header *); extern enum pnfs_try_status objlayout_write_pagelist( - struct nfs_pgio_data *, + struct nfs_pgio_header *, int how); extern void objlayout_encode_layoutcommit( diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 5e70918f6c95..ecb3d4cdbc85 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -484,8 +484,7 @@ EXPORT_SYMBOL_GPL(nfs_pgio_header_free); static bool nfs_pgio_data_init(struct nfs_pgio_header *hdr, unsigned int pagecount) { - if (nfs_pgarray_set(&hdr->data.page_array, pagecount)) { - hdr->data.header = hdr; + if (nfs_pgarray_set(&hdr->page_array, pagecount)) { atomic_inc(&hdr->refcnt); return true; } @@ -493,16 +492,14 @@ static bool nfs_pgio_data_init(struct nfs_pgio_header *hdr, } /** - * nfs_pgio_data_destroy - Properly free pageio data - * @data: The data to destroy + * nfs_pgio_data_destroy - Properly release pageio data + * @hdr: The header with data to destroy */ -void nfs_pgio_data_destroy(struct nfs_pgio_data *data) +void nfs_pgio_data_destroy(struct nfs_pgio_header *hdr) { - struct nfs_pgio_header *hdr = data->header; - - put_nfs_open_context(data->args.context); - if (data->page_array.pagevec != data->page_array.page_array) - kfree(data->page_array.pagevec); + put_nfs_open_context(hdr->args.context); + if (hdr->page_array.pagevec != hdr->page_array.page_array) + kfree(hdr->page_array.pagevec); if (atomic_dec_and_test(&hdr->refcnt)) hdr->completion_ops->completion(hdr); } @@ -510,31 +507,31 @@ EXPORT_SYMBOL_GPL(nfs_pgio_data_destroy); /** * nfs_pgio_rpcsetup - Set up arguments for a pageio call - * @data: The pageio data + * @hdr: The pageio hdr * @count: Number of bytes to read * @offset: Initial offset * @how: How to commit data (writes only) * @cinfo: Commit information for the call (writes only) */ -static void nfs_pgio_rpcsetup(struct nfs_pgio_data *data, +static void nfs_pgio_rpcsetup(struct nfs_pgio_header *hdr, unsigned int count, unsigned int offset, int how, struct nfs_commit_info *cinfo) { - struct nfs_page *req = data->header->req; + struct nfs_page *req = hdr->req; /* Set up the RPC argument and reply structs - * NB: take care not to mess about with data->commit et al. */ + * NB: take care not to mess about with hdr->commit et al. */ - data->args.fh = NFS_FH(data->header->inode); - data->args.offset = req_offset(req) + offset; + hdr->args.fh = NFS_FH(hdr->inode); + hdr->args.offset = req_offset(req) + offset; /* pnfs_set_layoutcommit needs this */ - data->mds_offset = data->args.offset; - data->args.pgbase = req->wb_pgbase + offset; - data->args.pages = data->page_array.pagevec; - data->args.count = count; - data->args.context = get_nfs_open_context(req->wb_context); - data->args.lock_context = req->wb_lock_context; - data->args.stable = NFS_UNSTABLE; + hdr->mds_offset = hdr->args.offset; + hdr->args.pgbase = req->wb_pgbase + offset; + hdr->args.pages = hdr->page_array.pagevec; + hdr->args.count = count; + hdr->args.context = get_nfs_open_context(req->wb_context); + hdr->args.lock_context = req->wb_lock_context; + hdr->args.stable = NFS_UNSTABLE; switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) { case 0: break; @@ -542,59 +539,60 @@ static void nfs_pgio_rpcsetup(struct nfs_pgio_data *data, if (nfs_reqs_to_commit(cinfo)) break; default: - data->args.stable = NFS_FILE_SYNC; + hdr->args.stable = NFS_FILE_SYNC; } - data->res.fattr = &data->fattr; - data->res.count = count; - data->res.eof = 0; - data->res.verf = &data->writeverf; - nfs_fattr_init(&data->fattr); + hdr->res.fattr = &hdr->fattr; + hdr->res.count = count; + hdr->res.eof = 0; + hdr->res.verf = &hdr->writeverf; + nfs_fattr_init(&hdr->fattr); } /** - * nfs_pgio_prepare - Prepare pageio data to go over the wire + * nfs_pgio_prepare - Prepare pageio hdr to go over the wire * @task: The current task - * @calldata: pageio data to prepare + * @calldata: pageio header to prepare */ static void nfs_pgio_prepare(struct rpc_task *task, void *calldata) { - struct nfs_pgio_data *data = calldata; + struct nfs_pgio_header *hdr = calldata; int err; - err = NFS_PROTO(data->header->inode)->pgio_rpc_prepare(task, data); + err = NFS_PROTO(hdr->inode)->pgio_rpc_prepare(task, hdr); if (err) rpc_exit(task, err); } -int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_data *data, +int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr, const struct rpc_call_ops *call_ops, int how, int flags) { + struct inode *inode = hdr->inode; struct rpc_task *task; struct rpc_message msg = { - .rpc_argp = &data->args, - .rpc_resp = &data->res, - .rpc_cred = data->header->cred, + .rpc_argp = &hdr->args, + .rpc_resp = &hdr->res, + .rpc_cred = hdr->cred, }; struct rpc_task_setup task_setup_data = { .rpc_client = clnt, - .task = &data->task, + .task = &hdr->task, .rpc_message = &msg, .callback_ops = call_ops, - .callback_data = data, + .callback_data = hdr, .workqueue = nfsiod_workqueue, .flags = RPC_TASK_ASYNC | flags, }; int ret = 0; - data->header->rw_ops->rw_initiate(data, &msg, &task_setup_data, how); + hdr->rw_ops->rw_initiate(hdr, &msg, &task_setup_data, how); dprintk("NFS: %5u initiated pgio call " "(req %s/%llu, %u bytes @ offset %llu)\n", - data->task.tk_pid, - data->header->inode->i_sb->s_id, - (unsigned long long)NFS_FILEID(data->header->inode), - data->args.count, - (unsigned long long)data->args.offset); + hdr->task.tk_pid, + inode->i_sb->s_id, + (unsigned long long)NFS_FILEID(inode), + hdr->args.count, + (unsigned long long)hdr->args.offset); task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) { @@ -621,21 +619,21 @@ static int nfs_pgio_error(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) { set_bit(NFS_IOHDR_REDO, &hdr->flags); - nfs_pgio_data_destroy(&hdr->data); + nfs_pgio_data_destroy(hdr); desc->pg_completion_ops->error_cleanup(&desc->pg_list); return -ENOMEM; } /** * nfs_pgio_release - Release pageio data - * @calldata: The pageio data to release + * @calldata: The pageio header to release */ static void nfs_pgio_release(void *calldata) { - struct nfs_pgio_data *data = calldata; - if (data->header->rw_ops->rw_release) - data->header->rw_ops->rw_release(data); - nfs_pgio_data_destroy(data); + struct nfs_pgio_header *hdr = calldata; + if (hdr->rw_ops->rw_release) + hdr->rw_ops->rw_release(hdr); + nfs_pgio_data_destroy(hdr); } /** @@ -676,22 +674,22 @@ EXPORT_SYMBOL_GPL(nfs_pageio_init); /** * nfs_pgio_result - Basic pageio error handling * @task: The task that ran - * @calldata: Pageio data to check + * @calldata: Pageio header to check */ static void nfs_pgio_result(struct rpc_task *task, void *calldata) { - struct nfs_pgio_data *data = calldata; - struct inode *inode = data->header->inode; + struct nfs_pgio_header *hdr = calldata; + struct inode *inode = hdr->inode; dprintk("NFS: %s: %5u, (status %d)\n", __func__, task->tk_pid, task->tk_status); - if (data->header->rw_ops->rw_done(task, data, inode) != 0) + if (hdr->rw_ops->rw_done(task, hdr, inode) != 0) return; if (task->tk_status < 0) - nfs_set_pgio_error(data->header, task->tk_status, data->args.offset); + nfs_set_pgio_error(hdr, task->tk_status, hdr->args.offset); else - data->header->rw_ops->rw_result(task, data); + hdr->rw_ops->rw_result(task, hdr); } /* @@ -707,7 +705,6 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, { struct nfs_page *req; struct page **pages; - struct nfs_pgio_data *data; struct list_head *head = &desc->pg_list; struct nfs_commit_info cinfo; @@ -715,9 +712,8 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, desc->pg_count))) return nfs_pgio_error(desc, hdr); - data = &hdr->data; nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq); - pages = data->page_array.pagevec; + pages = hdr->page_array.pagevec; while (!list_empty(head)) { req = nfs_list_entry(head->next); nfs_list_remove_request(req); @@ -730,7 +726,7 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, desc->pg_ioflags &= ~FLUSH_COND_STABLE; /* Set up the argument struct */ - nfs_pgio_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo); + nfs_pgio_rpcsetup(hdr, desc->pg_count, 0, desc->pg_ioflags, &cinfo); desc->pg_rpc_callops = &nfs_pgio_common_ops; return 0; } @@ -751,7 +747,7 @@ static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc) ret = nfs_generic_pgio(desc, hdr); if (ret == 0) ret = nfs_initiate_pgio(NFS_CLIENT(hdr->inode), - &hdr->data, desc->pg_rpc_callops, + hdr, desc->pg_rpc_callops, desc->pg_ioflags, 0); if (atomic_dec_and_test(&hdr->refcnt)) hdr->completion_ops->completion(hdr); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 067104cce181..ecc911347750 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1502,9 +1502,8 @@ int pnfs_write_done_resend_to_mds(struct inode *inode, } EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds); -static void pnfs_ld_handle_write_error(struct nfs_pgio_data *data) +static void pnfs_ld_handle_write_error(struct nfs_pgio_header *hdr) { - struct nfs_pgio_header *hdr = data->header; dprintk("pnfs write error = %d\n", hdr->pnfs_error); if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags & @@ -1512,7 +1511,7 @@ static void pnfs_ld_handle_write_error(struct nfs_pgio_data *data) pnfs_return_layout(hdr->inode); } if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) - data->task.tk_status = pnfs_write_done_resend_to_mds(hdr->inode, + hdr->task.tk_status = pnfs_write_done_resend_to_mds(hdr->inode, &hdr->pages, hdr->completion_ops, hdr->dreq); @@ -1521,41 +1520,36 @@ static void pnfs_ld_handle_write_error(struct nfs_pgio_data *data) /* * Called by non rpc-based layout drivers */ -void pnfs_ld_write_done(struct nfs_pgio_data *data) +void pnfs_ld_write_done(struct nfs_pgio_header *hdr) { - struct nfs_pgio_header *hdr = data->header; - - trace_nfs4_pnfs_write(data, hdr->pnfs_error); + trace_nfs4_pnfs_write(hdr, hdr->pnfs_error); if (!hdr->pnfs_error) { - pnfs_set_layoutcommit(data); - hdr->mds_ops->rpc_call_done(&data->task, data); + pnfs_set_layoutcommit(hdr); + hdr->mds_ops->rpc_call_done(&hdr->task, hdr); } else - pnfs_ld_handle_write_error(data); - hdr->mds_ops->rpc_release(data); + pnfs_ld_handle_write_error(hdr); + hdr->mds_ops->rpc_release(hdr); } EXPORT_SYMBOL_GPL(pnfs_ld_write_done); static void pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, - struct nfs_pgio_data *data) + struct nfs_pgio_header *hdr) { - struct nfs_pgio_header *hdr = data->header; - if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { list_splice_tail_init(&hdr->pages, &desc->pg_list); nfs_pageio_reset_write_mds(desc); desc->pg_recoalesce = 1; } - nfs_pgio_data_destroy(data); + nfs_pgio_data_destroy(hdr); } static enum pnfs_try_status -pnfs_try_to_write_data(struct nfs_pgio_data *wdata, +pnfs_try_to_write_data(struct nfs_pgio_header *hdr, const struct rpc_call_ops *call_ops, struct pnfs_layout_segment *lseg, int how) { - struct nfs_pgio_header *hdr = wdata->header; struct inode *inode = hdr->inode; enum pnfs_try_status trypnfs; struct nfs_server *nfss = NFS_SERVER(inode); @@ -1563,8 +1557,8 @@ pnfs_try_to_write_data(struct nfs_pgio_data *wdata, hdr->mds_ops = call_ops; dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__, - inode->i_ino, wdata->args.count, wdata->args.offset, how); - trypnfs = nfss->pnfs_curr_ld->write_pagelist(wdata, how); + inode->i_ino, hdr->args.count, hdr->args.offset, how); + trypnfs = nfss->pnfs_curr_ld->write_pagelist(hdr, how); if (trypnfs != PNFS_NOT_ATTEMPTED) nfs_inc_stats(inode, NFSIOS_PNFS_WRITE); dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); @@ -1575,15 +1569,14 @@ static void pnfs_do_write(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr, int how) { - struct nfs_pgio_data *data = &hdr->data; const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; struct pnfs_layout_segment *lseg = desc->pg_lseg; enum pnfs_try_status trypnfs; desc->pg_lseg = NULL; - trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how); + trypnfs = pnfs_try_to_write_data(hdr, call_ops, lseg, how); if (trypnfs == PNFS_NOT_ATTEMPTED) - pnfs_write_through_mds(desc, data); + pnfs_write_through_mds(desc, hdr); pnfs_put_lseg(lseg); } @@ -1650,17 +1643,15 @@ int pnfs_read_done_resend_to_mds(struct inode *inode, } EXPORT_SYMBOL_GPL(pnfs_read_done_resend_to_mds); -static void pnfs_ld_handle_read_error(struct nfs_pgio_data *data) +static void pnfs_ld_handle_read_error(struct nfs_pgio_header *hdr) { - struct nfs_pgio_header *hdr = data->header; - dprintk("pnfs read error = %d\n", hdr->pnfs_error); if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags & PNFS_LAYOUTRET_ON_ERROR) { pnfs_return_layout(hdr->inode); } if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) - data->task.tk_status = pnfs_read_done_resend_to_mds(hdr->inode, + hdr->task.tk_status = pnfs_read_done_resend_to_mds(hdr->inode, &hdr->pages, hdr->completion_ops, hdr->dreq); @@ -1669,43 +1660,38 @@ static void pnfs_ld_handle_read_error(struct nfs_pgio_data *data) /* * Called by non rpc-based layout drivers */ -void pnfs_ld_read_done(struct nfs_pgio_data *data) +void pnfs_ld_read_done(struct nfs_pgio_header *hdr) { - struct nfs_pgio_header *hdr = data->header; - - trace_nfs4_pnfs_read(data, hdr->pnfs_error); + trace_nfs4_pnfs_read(hdr, hdr->pnfs_error); if (likely(!hdr->pnfs_error)) { - __nfs4_read_done_cb(data); - hdr->mds_ops->rpc_call_done(&data->task, data); + __nfs4_read_done_cb(hdr); + hdr->mds_ops->rpc_call_done(&hdr->task, hdr); } else - pnfs_ld_handle_read_error(data); - hdr->mds_ops->rpc_release(data); + pnfs_ld_handle_read_error(hdr); + hdr->mds_ops->rpc_release(hdr); } EXPORT_SYMBOL_GPL(pnfs_ld_read_done); static void pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, - struct nfs_pgio_data *data) + struct nfs_pgio_header *hdr) { - struct nfs_pgio_header *hdr = data->header; - if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { list_splice_tail_init(&hdr->pages, &desc->pg_list); nfs_pageio_reset_read_mds(desc); desc->pg_recoalesce = 1; } - nfs_pgio_data_destroy(data); + nfs_pgio_data_destroy(hdr); } /* * Call the appropriate parallel I/O subsystem read function. */ static enum pnfs_try_status -pnfs_try_to_read_data(struct nfs_pgio_data *rdata, +pnfs_try_to_read_data(struct nfs_pgio_header *hdr, const struct rpc_call_ops *call_ops, struct pnfs_layout_segment *lseg) { - struct nfs_pgio_header *hdr = rdata->header; struct inode *inode = hdr->inode; struct nfs_server *nfss = NFS_SERVER(inode); enum pnfs_try_status trypnfs; @@ -1713,9 +1699,9 @@ pnfs_try_to_read_data(struct nfs_pgio_data *rdata, hdr->mds_ops = call_ops; dprintk("%s: Reading ino:%lu %u@%llu\n", - __func__, inode->i_ino, rdata->args.count, rdata->args.offset); + __func__, inode->i_ino, hdr->args.count, hdr->args.offset); - trypnfs = nfss->pnfs_curr_ld->read_pagelist(rdata); + trypnfs = nfss->pnfs_curr_ld->read_pagelist(hdr); if (trypnfs != PNFS_NOT_ATTEMPTED) nfs_inc_stats(inode, NFSIOS_PNFS_READ); dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); @@ -1725,15 +1711,14 @@ pnfs_try_to_read_data(struct nfs_pgio_data *rdata, static void pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) { - struct nfs_pgio_data *data = &hdr->data; const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; struct pnfs_layout_segment *lseg = desc->pg_lseg; enum pnfs_try_status trypnfs; desc->pg_lseg = NULL; - trypnfs = pnfs_try_to_read_data(data, call_ops, lseg); + trypnfs = pnfs_try_to_read_data(hdr, call_ops, lseg); if (trypnfs == PNFS_NOT_ATTEMPTED) - pnfs_read_through_mds(desc, data); + pnfs_read_through_mds(desc, hdr); pnfs_put_lseg(lseg); } @@ -1816,12 +1801,11 @@ void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg) EXPORT_SYMBOL_GPL(pnfs_set_lo_fail); void -pnfs_set_layoutcommit(struct nfs_pgio_data *wdata) +pnfs_set_layoutcommit(struct nfs_pgio_header *hdr) { - struct nfs_pgio_header *hdr = wdata->header; struct inode *inode = hdr->inode; struct nfs_inode *nfsi = NFS_I(inode); - loff_t end_pos = wdata->mds_offset + wdata->res.count; + loff_t end_pos = hdr->mds_offset + hdr->res.count; bool mark_as_dirty = false; spin_lock(&inode->i_lock); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 4fb309a2b4c4..a4a58be94064 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -113,8 +113,8 @@ struct pnfs_layoutdriver_type { * Return PNFS_ATTEMPTED to indicate the layout code has attempted * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS */ - enum pnfs_try_status (*read_pagelist) (struct nfs_pgio_data *nfs_data); - enum pnfs_try_status (*write_pagelist) (struct nfs_pgio_data *nfs_data, int how); + enum pnfs_try_status (*read_pagelist)(struct nfs_pgio_header *); + enum pnfs_try_status (*write_pagelist)(struct nfs_pgio_header *, int); void (*free_deviceid_node) (struct nfs4_deviceid_node *); @@ -213,13 +213,13 @@ bool pnfs_roc(struct inode *ino); void pnfs_roc_release(struct inode *ino); void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task); -void pnfs_set_layoutcommit(struct nfs_pgio_data *wdata); +void pnfs_set_layoutcommit(struct nfs_pgio_header *); void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data); int pnfs_layoutcommit_inode(struct inode *inode, bool sync); int _pnfs_return_layout(struct inode *); int pnfs_commit_and_return_layout(struct inode *); -void pnfs_ld_write_done(struct nfs_pgio_data *); -void pnfs_ld_read_done(struct nfs_pgio_data *); +void pnfs_ld_write_done(struct nfs_pgio_header *); +void pnfs_ld_read_done(struct nfs_pgio_header *); struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, loff_t pos, diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index c171ce1a8a30..b09cc23d6f43 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -578,46 +578,49 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, return 0; } -static int nfs_read_done(struct rpc_task *task, struct nfs_pgio_data *data) +static int nfs_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr) { - struct inode *inode = data->header->inode; + struct inode *inode = hdr->inode; nfs_invalidate_atime(inode); if (task->tk_status >= 0) { - nfs_refresh_inode(inode, data->res.fattr); + nfs_refresh_inode(inode, hdr->res.fattr); /* Emulate the eof flag, which isn't normally needed in NFSv2 * as it is guaranteed to always return the file attributes */ - if (data->args.offset + data->res.count >= data->res.fattr->size) - data->res.eof = 1; + if (hdr->args.offset + hdr->res.count >= hdr->res.fattr->size) + hdr->res.eof = 1; } return 0; } -static void nfs_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg) +static void nfs_proc_read_setup(struct nfs_pgio_header *hdr, + struct rpc_message *msg) { msg->rpc_proc = &nfs_procedures[NFSPROC_READ]; } -static int nfs_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data) +static int nfs_proc_pgio_rpc_prepare(struct rpc_task *task, + struct nfs_pgio_header *hdr) { rpc_call_start(task); return 0; } -static int nfs_write_done(struct rpc_task *task, struct nfs_pgio_data *data) +static int nfs_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr) { - struct inode *inode = data->header->inode; + struct inode *inode = hdr->inode; if (task->tk_status >= 0) - nfs_post_op_update_inode_force_wcc(inode, data->res.fattr); + nfs_post_op_update_inode_force_wcc(inode, hdr->res.fattr); return 0; } -static void nfs_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg) +static void nfs_proc_write_setup(struct nfs_pgio_header *hdr, + struct rpc_message *msg) { /* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */ - data->args.stable = NFS_FILE_SYNC; + hdr->args.stable = NFS_FILE_SYNC; msg->rpc_proc = &nfs_procedures[NFSPROC_WRITE]; } diff --git a/fs/nfs/read.c b/fs/nfs/read.c index d9df4ab3737b..b1532b73fea3 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -172,14 +172,15 @@ out: hdr->release(hdr); } -static void nfs_initiate_read(struct nfs_pgio_data *data, struct rpc_message *msg, +static void nfs_initiate_read(struct nfs_pgio_header *hdr, + struct rpc_message *msg, struct rpc_task_setup *task_setup_data, int how) { - struct inode *inode = data->header->inode; + struct inode *inode = hdr->inode; int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0; task_setup_data->flags |= swap_flags; - NFS_PROTO(inode)->read_setup(data, msg); + NFS_PROTO(inode)->read_setup(hdr, msg); } static void @@ -203,14 +204,15 @@ static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops = { * This is the callback from RPC telling us whether a reply was * received or some error occurred (timeout or socket shutdown). */ -static int nfs_readpage_done(struct rpc_task *task, struct nfs_pgio_data *data, +static int nfs_readpage_done(struct rpc_task *task, + struct nfs_pgio_header *hdr, struct inode *inode) { - int status = NFS_PROTO(inode)->read_done(task, data); + int status = NFS_PROTO(inode)->read_done(task, hdr); if (status != 0) return status; - nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, data->res.count); + nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, hdr->res.count); if (task->tk_status == -ESTALE) { set_bit(NFS_INO_STALE, &NFS_I(inode)->flags); @@ -219,34 +221,34 @@ static int nfs_readpage_done(struct rpc_task *task, struct nfs_pgio_data *data, return 0; } -static void nfs_readpage_retry(struct rpc_task *task, struct nfs_pgio_data *data) +static void nfs_readpage_retry(struct rpc_task *task, + struct nfs_pgio_header *hdr) { - struct nfs_pgio_args *argp = &data->args; - struct nfs_pgio_res *resp = &data->res; + struct nfs_pgio_args *argp = &hdr->args; + struct nfs_pgio_res *resp = &hdr->res; /* This is a short read! */ - nfs_inc_stats(data->header->inode, NFSIOS_SHORTREAD); + nfs_inc_stats(hdr->inode, NFSIOS_SHORTREAD); /* Has the server at least made some progress? */ if (resp->count == 0) { - nfs_set_pgio_error(data->header, -EIO, argp->offset); + nfs_set_pgio_error(hdr, -EIO, argp->offset); return; } - /* Yes, so retry the read at the end of the data */ - data->mds_offset += resp->count; + /* Yes, so retry the read at the end of the hdr */ + hdr->mds_offset += resp->count; argp->offset += resp->count; argp->pgbase += resp->count; argp->count -= resp->count; rpc_restart_call_prepare(task); } -static void nfs_readpage_result(struct rpc_task *task, struct nfs_pgio_data *data) +static void nfs_readpage_result(struct rpc_task *task, + struct nfs_pgio_header *hdr) { - struct nfs_pgio_header *hdr = data->header; - - if (data->res.eof) { + if (hdr->res.eof) { loff_t bound; - bound = data->args.offset + data->res.count; + bound = hdr->args.offset + hdr->res.count; spin_lock(&hdr->lock); if (bound < hdr->io_start + hdr->good_bytes) { set_bit(NFS_IOHDR_EOF, &hdr->flags); @@ -254,8 +256,8 @@ static void nfs_readpage_result(struct rpc_task *task, struct nfs_pgio_data *dat hdr->good_bytes = bound - hdr->io_start; } spin_unlock(&hdr->lock); - } else if (data->res.count != data->args.count) - nfs_readpage_retry(task, data); + } else if (hdr->res.count != hdr->args.count) + nfs_readpage_retry(task, hdr); } /* diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 6afe0f679420..6a2d0986a3a3 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -596,11 +596,11 @@ nfs_clear_request_commit(struct nfs_page *req) } static inline -int nfs_write_need_commit(struct nfs_pgio_data *data) +int nfs_write_need_commit(struct nfs_pgio_header *hdr) { - if (data->writeverf.committed == NFS_DATA_SYNC) - return data->header->lseg == NULL; - return data->writeverf.committed != NFS_FILE_SYNC; + if (hdr->writeverf.committed == NFS_DATA_SYNC) + return hdr->lseg == NULL; + return hdr->writeverf.committed != NFS_FILE_SYNC; } #else @@ -627,7 +627,7 @@ nfs_clear_request_commit(struct nfs_page *req) } static inline -int nfs_write_need_commit(struct nfs_pgio_data *data) +int nfs_write_need_commit(struct nfs_pgio_header *hdr) { return 0; } @@ -1013,17 +1013,18 @@ static int flush_task_priority(int how) return RPC_PRIORITY_NORMAL; } -static void nfs_initiate_write(struct nfs_pgio_data *data, struct rpc_message *msg, +static void nfs_initiate_write(struct nfs_pgio_header *hdr, + struct rpc_message *msg, struct rpc_task_setup *task_setup_data, int how) { - struct inode *inode = data->header->inode; + struct inode *inode = hdr->inode; int priority = flush_task_priority(how); task_setup_data->priority = priority; - NFS_PROTO(inode)->write_setup(data, msg); + NFS_PROTO(inode)->write_setup(hdr, msg); nfs4_state_protect_write(NFS_SERVER(inode)->nfs_client, - &task_setup_data->rpc_client, msg, data); + &task_setup_data->rpc_client, msg, hdr); } /* If a nfs_flush_* function fails, it should remove reqs from @head and @@ -1085,19 +1086,17 @@ void nfs_commit_prepare(struct rpc_task *task, void *calldata) NFS_PROTO(data->inode)->commit_rpc_prepare(task, data); } -static void nfs_writeback_release_common(struct nfs_pgio_data *data) +static void nfs_writeback_release_common(struct nfs_pgio_header *hdr) { - struct nfs_pgio_header *hdr = data->header; - int status = data->task.tk_status; + int status = hdr->task.tk_status; - if ((status >= 0) && nfs_write_need_commit(data)) { + if ((status >= 0) && nfs_write_need_commit(hdr)) { spin_lock(&hdr->lock); if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) ; /* Do nothing */ else if (!test_and_set_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) - memcpy(&hdr->verf, &data->writeverf, sizeof(hdr->verf)); - else if (memcmp(&hdr->verf, &data->writeverf, - sizeof(hdr->verf))) + memcpy(&hdr->verf, &hdr->writeverf, sizeof(hdr->verf)); + else if (memcmp(&hdr->verf, &hdr->writeverf, sizeof(hdr->verf))) set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags); spin_unlock(&hdr->lock); } @@ -1131,7 +1130,8 @@ static int nfs_should_remove_suid(const struct inode *inode) /* * This function is called when the WRITE call is complete. */ -static int nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data, +static int nfs_writeback_done(struct rpc_task *task, + struct nfs_pgio_header *hdr, struct inode *inode) { int status; @@ -1143,13 +1143,14 @@ static int nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data, * another writer had changed the file, but some applications * depend on tighter cache coherency when writing. */ - status = NFS_PROTO(inode)->write_done(task, data); + status = NFS_PROTO(inode)->write_done(task, hdr); if (status != 0) return status; - nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, data->res.count); + nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, hdr->res.count); #if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) - if (data->res.verf->committed < data->args.stable && task->tk_status >= 0) { + if (hdr->res.verf->committed < hdr->args.stable && + task->tk_status >= 0) { /* We tried a write call, but the server did not * commit data to stable storage even though we * requested it. @@ -1165,7 +1166,7 @@ static int nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data, dprintk("NFS: faulty NFS server %s:" " (committed = %d) != (stable = %d)\n", NFS_SERVER(inode)->nfs_client->cl_hostname, - data->res.verf->committed, data->args.stable); + hdr->res.verf->committed, hdr->args.stable); complain = jiffies + 300 * HZ; } } @@ -1180,16 +1181,17 @@ static int nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data, /* * This function is called when the WRITE call is complete. */ -static void nfs_writeback_result(struct rpc_task *task, struct nfs_pgio_data *data) +static void nfs_writeback_result(struct rpc_task *task, + struct nfs_pgio_header *hdr) { - struct nfs_pgio_args *argp = &data->args; - struct nfs_pgio_res *resp = &data->res; + struct nfs_pgio_args *argp = &hdr->args; + struct nfs_pgio_res *resp = &hdr->res; if (resp->count < argp->count) { static unsigned long complain; /* This a short write! */ - nfs_inc_stats(data->header->inode, NFSIOS_SHORTWRITE); + nfs_inc_stats(hdr->inode, NFSIOS_SHORTWRITE); /* Has the server at least made some progress? */ if (resp->count == 0) { @@ -1199,14 +1201,14 @@ static void nfs_writeback_result(struct rpc_task *task, struct nfs_pgio_data *da argp->count); complain = jiffies + 300 * HZ; } - nfs_set_pgio_error(data->header, -EIO, argp->offset); + nfs_set_pgio_error(hdr, -EIO, argp->offset); task->tk_status = -EIO; return; } /* Was this an NFSv2 write or an NFSv3 stable write? */ if (resp->verf->committed != NFS_UNSTABLE) { /* Resend from where the server left off */ - data->mds_offset += resp->count; + hdr->mds_offset += resp->count; argp->offset += resp->count; argp->pgbase += resp->count; argp->count -= resp->count; diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 43592651cd5a..d0fae7b78252 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -64,10 +64,11 @@ struct nfs_rw_ops { const fmode_t rw_mode; struct nfs_pgio_header *(*rw_alloc_header)(void); void (*rw_free_header)(struct nfs_pgio_header *); - void (*rw_release)(struct nfs_pgio_data *); - int (*rw_done)(struct rpc_task *, struct nfs_pgio_data *, struct inode *); - void (*rw_result)(struct rpc_task *, struct nfs_pgio_data *); - void (*rw_initiate)(struct nfs_pgio_data *, struct rpc_message *, + void (*rw_release)(struct nfs_pgio_header *); + int (*rw_done)(struct rpc_task *, struct nfs_pgio_header *, + struct inode *); + void (*rw_result)(struct rpc_task *, struct nfs_pgio_header *); + void (*rw_initiate)(struct nfs_pgio_header *, struct rpc_message *, struct rpc_task_setup *, int); }; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index bb18dba1aefe..efeaf7690b51 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1257,27 +1257,10 @@ enum { NFS_IOHDR_NEED_RESCHED, }; -struct nfs_pgio_data { - struct nfs_pgio_header *header; - struct list_head list; - struct rpc_task task; - struct nfs_fattr fattr; - struct nfs_writeverf writeverf; /* Used for writes */ - struct nfs_pgio_args args; /* argument struct */ - struct nfs_pgio_res res; /* result struct */ - unsigned long timestamp; /* For lease renewal */ - int (*pgio_done_cb)(struct rpc_task *task, struct nfs_pgio_data *data); - __u64 mds_offset; /* Filelayout dense stripe */ - struct nfs_page_array page_array; - struct nfs_client *ds_clp; /* pNFS data server */ - int ds_idx; /* ds index if ds_clp is set */ -}; - struct nfs_pgio_header { struct inode *inode; struct rpc_cred *cred; struct list_head pages; - struct nfs_pgio_data data; atomic_t refcnt; struct nfs_page *req; struct nfs_writeverf verf; /* Used for writes */ @@ -1295,6 +1278,21 @@ struct nfs_pgio_header { int error; /* merge with pnfs_error */ unsigned long good_bytes; /* boundary of good data */ unsigned long flags; + + /* + * rpc data + */ + struct rpc_task task; + struct nfs_fattr fattr; + struct nfs_writeverf writeverf; /* Used for writes */ + struct nfs_pgio_args args; /* argument struct */ + struct nfs_pgio_res res; /* result struct */ + unsigned long timestamp; /* For lease renewal */ + int (*pgio_done_cb)(struct rpc_task *, struct nfs_pgio_header *); + __u64 mds_offset; /* Filelayout dense stripe */ + struct nfs_page_array page_array; + struct nfs_client *ds_clp; /* pNFS data server */ + int ds_idx; /* ds index if ds_clp is set */ }; struct nfs_mds_commit_info { @@ -1426,11 +1424,12 @@ struct nfs_rpc_ops { struct nfs_pathconf *); int (*set_capabilities)(struct nfs_server *, struct nfs_fh *); int (*decode_dirent)(struct xdr_stream *, struct nfs_entry *, int); - int (*pgio_rpc_prepare)(struct rpc_task *, struct nfs_pgio_data *); - void (*read_setup) (struct nfs_pgio_data *, struct rpc_message *); - int (*read_done) (struct rpc_task *, struct nfs_pgio_data *); - void (*write_setup) (struct nfs_pgio_data *, struct rpc_message *); - int (*write_done) (struct rpc_task *, struct nfs_pgio_data *); + int (*pgio_rpc_prepare)(struct rpc_task *, + struct nfs_pgio_header *); + void (*read_setup)(struct nfs_pgio_header *, struct rpc_message *); + int (*read_done)(struct rpc_task *, struct nfs_pgio_header *); + void (*write_setup)(struct nfs_pgio_header *, struct rpc_message *); + int (*write_done)(struct rpc_task *, struct nfs_pgio_header *); void (*commit_setup) (struct nfs_commit_data *, struct rpc_message *); void (*commit_rpc_prepare)(struct rpc_task *, struct nfs_commit_data *); int (*commit_done) (struct rpc_task *, struct nfs_commit_data *); -- cgit v1.2.3 From c65e6254ca4db1584c5bf5f228ee26556477a9fd Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Mon, 9 Jun 2014 11:48:36 -0400 Subject: nfs: remove unused writeverf code Remove duplicate writeverf structure from merge of nfs_pgio_header and nfs_pgio_data and remove writeverf related flags and logic to handle more than one RPC per nfs_pgio_header. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/blocklayout.c | 2 +- fs/nfs/direct.c | 25 ++++++++----------------- fs/nfs/internal.h | 1 + fs/nfs/objlayout/objlayout.c | 4 ++-- fs/nfs/pagelist.c | 2 +- fs/nfs/write.c | 27 +++++---------------------- include/linux/nfs_xdr.h | 3 --- 7 files changed, 18 insertions(+), 46 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index c3ccfe440390..04ac32b339f8 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -445,7 +445,7 @@ static void bl_end_par_io_write(void *data, int num_se) } hdr->task.tk_status = hdr->pnfs_error; - hdr->writeverf.committed = NFS_FILE_SYNC; + hdr->verf.committed = NFS_FILE_SYNC; INIT_WORK(&hdr->task.u.tk_work, bl_write_cleanup); schedule_work(&hdr->task.u.tk_work); } diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 6c4c867ee04c..2a3293a5dda0 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -715,7 +715,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) { struct nfs_direct_req *dreq = hdr->dreq; struct nfs_commit_info cinfo; - int bit = -1; + bool request_commit = false; struct nfs_page *req = nfs_list_entry(hdr->pages.next); if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) @@ -729,27 +729,20 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) dreq->flags = 0; dreq->error = hdr->error; } - if (dreq->error != 0) - bit = NFS_IOHDR_ERROR; - else { + if (dreq->error == 0) { dreq->count += hdr->good_bytes; - if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) { - dreq->flags = NFS_ODIRECT_RESCHED_WRITES; - bit = NFS_IOHDR_NEED_RESCHED; - } else if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) { + if (nfs_write_need_commit(hdr)) { if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) - bit = NFS_IOHDR_NEED_RESCHED; + request_commit = true; else if (dreq->flags == 0) { nfs_direct_set_hdr_verf(dreq, hdr); - bit = NFS_IOHDR_NEED_COMMIT; + request_commit = true; dreq->flags = NFS_ODIRECT_DO_COMMIT; } else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) { - if (nfs_direct_set_or_cmp_hdr_verf(dreq, hdr)) { + request_commit = true; + if (nfs_direct_set_or_cmp_hdr_verf(dreq, hdr)) dreq->flags = NFS_ODIRECT_RESCHED_WRITES; - bit = NFS_IOHDR_NEED_RESCHED; - } else - bit = NFS_IOHDR_NEED_COMMIT; } } } @@ -760,9 +753,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) req = nfs_list_entry(hdr->pages.next); nfs_list_remove_request(req); - switch (bit) { - case NFS_IOHDR_NEED_RESCHED: - case NFS_IOHDR_NEED_COMMIT: + if (request_commit) { kref_get(&req->wb_kref); nfs_mark_request_commit(req, hdr->lseg, &cinfo); do_destroy = false; diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 3f3aedd2e8c9..da36257628c5 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -441,6 +441,7 @@ int nfs_scan_commit(struct inode *inode, struct list_head *dst, void nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, struct nfs_commit_info *cinfo); +int nfs_write_need_commit(struct nfs_pgio_header *); int nfs_generic_commit_list(struct inode *inode, struct list_head *head, int how, struct nfs_commit_info *cinfo); void nfs_retry_commit(struct list_head *page_list, diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c index 86312787cee6..697a16d11fac 100644 --- a/fs/nfs/objlayout/objlayout.c +++ b/fs/nfs/objlayout/objlayout.c @@ -328,7 +328,7 @@ objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync) oir->status = hdr->task.tk_status = status; if (status >= 0) { hdr->res.count = status; - hdr->writeverf.committed = oir->committed; + hdr->verf.committed = oir->committed; } else { hdr->pnfs_error = status; } @@ -336,7 +336,7 @@ objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync) /* must not use oir after this point */ dprintk("%s: Return status %zd committed %d sync=%d\n", __func__, - status, hdr->writeverf.committed, sync); + status, hdr->verf.committed, sync); if (sync) pnfs_ld_write_done(hdr); diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index ecb3d4cdbc85..7dd0d5f101a4 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -545,7 +545,7 @@ static void nfs_pgio_rpcsetup(struct nfs_pgio_header *hdr, hdr->res.fattr = &hdr->fattr; hdr->res.count = count; hdr->res.eof = 0; - hdr->res.verf = &hdr->writeverf; + hdr->res.verf = &hdr->verf; nfs_fattr_init(&hdr->fattr); } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 6a2d0986a3a3..8534ee5c207d 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -595,12 +595,11 @@ nfs_clear_request_commit(struct nfs_page *req) } } -static inline int nfs_write_need_commit(struct nfs_pgio_header *hdr) { - if (hdr->writeverf.committed == NFS_DATA_SYNC) + if (hdr->verf.committed == NFS_DATA_SYNC) return hdr->lseg == NULL; - return hdr->writeverf.committed != NFS_FILE_SYNC; + return hdr->verf.committed != NFS_FILE_SYNC; } #else @@ -626,7 +625,6 @@ nfs_clear_request_commit(struct nfs_page *req) { } -static inline int nfs_write_need_commit(struct nfs_pgio_header *hdr) { return 0; @@ -654,11 +652,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr) nfs_context_set_write_error(req->wb_context, hdr->error); goto remove_req; } - if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) { - nfs_mark_request_dirty(req); - goto next; - } - if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) { + if (nfs_write_need_commit(hdr)) { memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf)); nfs_mark_request_commit(req, hdr->lseg, &cinfo); goto next; @@ -668,7 +662,7 @@ remove_req: next: nfs_unlock_request(req); nfs_end_page_writeback(req); - do_destroy = !test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags); + do_destroy = !nfs_write_need_commit(hdr); nfs_release_request(req); } out: @@ -1088,18 +1082,7 @@ void nfs_commit_prepare(struct rpc_task *task, void *calldata) static void nfs_writeback_release_common(struct nfs_pgio_header *hdr) { - int status = hdr->task.tk_status; - - if ((status >= 0) && nfs_write_need_commit(hdr)) { - spin_lock(&hdr->lock); - if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) - ; /* Do nothing */ - else if (!test_and_set_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) - memcpy(&hdr->verf, &hdr->writeverf, sizeof(hdr->verf)); - else if (memcmp(&hdr->verf, &hdr->writeverf, sizeof(hdr->verf))) - set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags); - spin_unlock(&hdr->lock); - } + /* do nothing! */ } /* diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index efeaf7690b51..e1b7b3b7c40f 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1253,8 +1253,6 @@ enum { NFS_IOHDR_ERROR = 0, NFS_IOHDR_EOF, NFS_IOHDR_REDO, - NFS_IOHDR_NEED_COMMIT, - NFS_IOHDR_NEED_RESCHED, }; struct nfs_pgio_header { @@ -1284,7 +1282,6 @@ struct nfs_pgio_header { */ struct rpc_task task; struct nfs_fattr fattr; - struct nfs_writeverf writeverf; /* Used for writes */ struct nfs_pgio_args args; /* argument struct */ struct nfs_pgio_res res; /* result struct */ unsigned long timestamp; /* For lease renewal */ -- cgit v1.2.3 From 4714fb51fd03a14d8c73001438283e7f7b752f1e Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Mon, 9 Jun 2014 11:48:37 -0400 Subject: nfs: remove pgio_header refcount, related cleanup The refcounting on nfs_pgio_header was related to there being (possibly) more than one nfs_pgio_data. Now that nfs_pgio_data has been merged into nfs_pgio_header, there is no reason to do this ref counting. Just call the completion callback on nfs_pgio_release/nfs_pgio_error. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 36 +++++++++++------------------------- fs/nfs/pnfs.c | 6 ------ include/linux/nfs_xdr.h | 1 - 3 files changed, 11 insertions(+), 32 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 7dd0d5f101a4..580fc0c982e6 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -459,7 +459,6 @@ struct nfs_pgio_header *nfs_pgio_header_alloc(const struct nfs_rw_ops *ops) if (hdr) { INIT_LIST_HEAD(&hdr->pages); spin_lock_init(&hdr->lock); - atomic_set(&hdr->refcnt, 0); hdr->rw_ops = ops; } return hdr; @@ -477,31 +476,18 @@ void nfs_pgio_header_free(struct nfs_pgio_header *hdr) EXPORT_SYMBOL_GPL(nfs_pgio_header_free); /** - * nfs_pgio_data_alloc - Allocate pageio data - * @hdr: The header making a request - * @pagecount: Number of pages to create - */ -static bool nfs_pgio_data_init(struct nfs_pgio_header *hdr, - unsigned int pagecount) -{ - if (nfs_pgarray_set(&hdr->page_array, pagecount)) { - atomic_inc(&hdr->refcnt); - return true; - } - return false; -} - -/** - * nfs_pgio_data_destroy - Properly release pageio data - * @hdr: The header with data to destroy + * nfs_pgio_data_destroy - make @hdr suitable for reuse + * + * Frees memory and releases refs from nfs_generic_pgio, so that it may + * be called again. + * + * @hdr: A header that has had nfs_generic_pgio called */ void nfs_pgio_data_destroy(struct nfs_pgio_header *hdr) { put_nfs_open_context(hdr->args.context); if (hdr->page_array.pagevec != hdr->page_array.page_array) kfree(hdr->page_array.pagevec); - if (atomic_dec_and_test(&hdr->refcnt)) - hdr->completion_ops->completion(hdr); } EXPORT_SYMBOL_GPL(nfs_pgio_data_destroy); @@ -620,6 +606,7 @@ static int nfs_pgio_error(struct nfs_pageio_descriptor *desc, { set_bit(NFS_IOHDR_REDO, &hdr->flags); nfs_pgio_data_destroy(hdr); + hdr->completion_ops->completion(hdr); desc->pg_completion_ops->error_cleanup(&desc->pg_list); return -ENOMEM; } @@ -634,6 +621,7 @@ static void nfs_pgio_release(void *calldata) if (hdr->rw_ops->rw_release) hdr->rw_ops->rw_release(hdr); nfs_pgio_data_destroy(hdr); + hdr->completion_ops->completion(hdr); } /** @@ -707,9 +695,10 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, struct page **pages; struct list_head *head = &desc->pg_list; struct nfs_commit_info cinfo; + unsigned int pagecount; - if (!nfs_pgio_data_init(hdr, nfs_page_array_len(desc->pg_base, - desc->pg_count))) + pagecount = nfs_page_array_len(desc->pg_base, desc->pg_count); + if (!nfs_pgarray_set(&hdr->page_array, pagecount)) return nfs_pgio_error(desc, hdr); nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq); @@ -743,14 +732,11 @@ static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc) return -ENOMEM; } nfs_pgheader_init(desc, hdr, nfs_pgio_header_free); - atomic_inc(&hdr->refcnt); ret = nfs_generic_pgio(desc, hdr); if (ret == 0) ret = nfs_initiate_pgio(NFS_CLIENT(hdr->inode), hdr, desc->pg_rpc_callops, desc->pg_ioflags, 0); - if (atomic_dec_and_test(&hdr->refcnt)) - hdr->completion_ops->completion(hdr); return ret; } diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index ecc911347750..ecbed4632d11 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1602,15 +1602,12 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) } nfs_pgheader_init(desc, hdr, pnfs_writehdr_free); hdr->lseg = pnfs_get_lseg(desc->pg_lseg); - atomic_inc(&hdr->refcnt); ret = nfs_generic_pgio(desc, hdr); if (ret != 0) { pnfs_put_lseg(desc->pg_lseg); desc->pg_lseg = NULL; } else pnfs_do_write(desc, hdr, desc->pg_ioflags); - if (atomic_dec_and_test(&hdr->refcnt)) - hdr->completion_ops->completion(hdr); return ret; } EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages); @@ -1745,15 +1742,12 @@ pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) } nfs_pgheader_init(desc, hdr, pnfs_readhdr_free); hdr->lseg = pnfs_get_lseg(desc->pg_lseg); - atomic_inc(&hdr->refcnt); ret = nfs_generic_pgio(desc, hdr); if (ret != 0) { pnfs_put_lseg(desc->pg_lseg); desc->pg_lseg = NULL; } else pnfs_do_read(desc, hdr); - if (atomic_dec_and_test(&hdr->refcnt)) - hdr->completion_ops->completion(hdr); return ret; } EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages); diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index e1b7b3b7c40f..81cbbf313272 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1259,7 +1259,6 @@ struct nfs_pgio_header { struct inode *inode; struct rpc_cred *cred; struct list_head pages; - atomic_t refcnt; struct nfs_page *req; struct nfs_writeverf verf; /* Used for writes */ struct pnfs_layout_segment *lseg; -- cgit v1.2.3 From 53113ad35e4b9ce82d949c7c67c7b666fad5d907 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Mon, 9 Jun 2014 11:48:38 -0400 Subject: pnfs: clean up *_resend_to_mds Clean up pnfs_read_done_resend_to_mds and pnfs_write_done_resend_to_mds: - instead of passing all arguments from a nfs_pgio_header, just pass the header - share the common code Reviewed-by: Christoph Hellwig Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/filelayout/filelayout.c | 10 ++----- fs/nfs/pagelist.c | 32 +++++++++++++++++++++ fs/nfs/pnfs.c | 63 ++++++------------------------------------ fs/nfs/pnfs.h | 8 ++---- include/linux/nfs_page.h | 2 ++ 5 files changed, 47 insertions(+), 68 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index 537e7f7a0b48..504d58a51d35 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -97,10 +97,7 @@ static void filelayout_reset_write(struct nfs_pgio_header *hdr) hdr->args.count, (unsigned long long)hdr->args.offset); - task->tk_status = pnfs_write_done_resend_to_mds(hdr->inode, - &hdr->pages, - hdr->completion_ops, - hdr->dreq); + task->tk_status = pnfs_write_done_resend_to_mds(hdr); } } @@ -117,10 +114,7 @@ static void filelayout_reset_read(struct nfs_pgio_header *hdr) hdr->args.count, (unsigned long long)hdr->args.offset); - task->tk_status = pnfs_read_done_resend_to_mds(hdr->inode, - &hdr->pages, - hdr->completion_ops, - hdr->dreq); + task->tk_status = pnfs_read_done_resend_to_mds(hdr); } } diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 580fc0c982e6..9c6c55359394 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -949,6 +949,38 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, } EXPORT_SYMBOL_GPL(nfs_pageio_add_request); +/* + * nfs_pageio_resend - Transfer requests to new descriptor and resend + * @hdr - the pgio header to move request from + * @desc - the pageio descriptor to add requests to + * + * Try to move each request (nfs_page) from @hdr to @desc then attempt + * to send them. + * + * Returns 0 on success and < 0 on error. + */ +int nfs_pageio_resend(struct nfs_pageio_descriptor *desc, + struct nfs_pgio_header *hdr) +{ + LIST_HEAD(failed); + + desc->pg_dreq = hdr->dreq; + while (!list_empty(&hdr->pages)) { + struct nfs_page *req = nfs_list_entry(hdr->pages.next); + + nfs_list_remove_request(req); + if (!nfs_pageio_add_request(desc, req)) + nfs_list_add_request(req, &failed); + } + nfs_pageio_complete(desc); + if (!list_empty(&failed)) { + list_move(&failed, &hdr->pages); + return -EIO; + } + return 0; +} +EXPORT_SYMBOL_GPL(nfs_pageio_resend); + /** * nfs_pageio_complete - Complete I/O on an nfs_pageio_descriptor * @desc: pointer to io descriptor diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index ecbed4632d11..83ff8a05485a 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1470,35 +1470,14 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, } EXPORT_SYMBOL_GPL(pnfs_generic_pg_test); -int pnfs_write_done_resend_to_mds(struct inode *inode, - struct list_head *head, - const struct nfs_pgio_completion_ops *compl_ops, - struct nfs_direct_req *dreq) +int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *hdr) { struct nfs_pageio_descriptor pgio; - LIST_HEAD(failed); /* Resend all requests through the MDS */ - nfs_pageio_init_write(&pgio, inode, FLUSH_STABLE, true, compl_ops); - pgio.pg_dreq = dreq; - while (!list_empty(head)) { - struct nfs_page *req = nfs_list_entry(head->next); - - nfs_list_remove_request(req); - if (!nfs_pageio_add_request(&pgio, req)) - nfs_list_add_request(req, &failed); - } - nfs_pageio_complete(&pgio); - - if (!list_empty(&failed)) { - /* For some reason our attempt to resend pages. Mark the - * overall send request as having failed, and let - * nfs_writeback_release_full deal with the error. - */ - list_move(&failed, head); - return -EIO; - } - return 0; + nfs_pageio_init_write(&pgio, hdr->inode, FLUSH_STABLE, true, + hdr->completion_ops); + return nfs_pageio_resend(&pgio, hdr); } EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds); @@ -1511,10 +1490,7 @@ static void pnfs_ld_handle_write_error(struct nfs_pgio_header *hdr) pnfs_return_layout(hdr->inode); } if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) - hdr->task.tk_status = pnfs_write_done_resend_to_mds(hdr->inode, - &hdr->pages, - hdr->completion_ops, - hdr->dreq); + hdr->task.tk_status = pnfs_write_done_resend_to_mds(hdr); } /* @@ -1612,31 +1588,13 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) } EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages); -int pnfs_read_done_resend_to_mds(struct inode *inode, - struct list_head *head, - const struct nfs_pgio_completion_ops *compl_ops, - struct nfs_direct_req *dreq) +int pnfs_read_done_resend_to_mds(struct nfs_pgio_header *hdr) { struct nfs_pageio_descriptor pgio; - LIST_HEAD(failed); /* Resend all requests through the MDS */ - nfs_pageio_init_read(&pgio, inode, true, compl_ops); - pgio.pg_dreq = dreq; - while (!list_empty(head)) { - struct nfs_page *req = nfs_list_entry(head->next); - - nfs_list_remove_request(req); - if (!nfs_pageio_add_request(&pgio, req)) - nfs_list_add_request(req, &failed); - } - nfs_pageio_complete(&pgio); - - if (!list_empty(&failed)) { - list_move(&failed, head); - return -EIO; - } - return 0; + nfs_pageio_init_read(&pgio, hdr->inode, true, hdr->completion_ops); + return nfs_pageio_resend(&pgio, hdr); } EXPORT_SYMBOL_GPL(pnfs_read_done_resend_to_mds); @@ -1648,10 +1606,7 @@ static void pnfs_ld_handle_read_error(struct nfs_pgio_header *hdr) pnfs_return_layout(hdr->inode); } if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) - hdr->task.tk_status = pnfs_read_done_resend_to_mds(hdr->inode, - &hdr->pages, - hdr->completion_ops, - hdr->dreq); + hdr->task.tk_status = pnfs_read_done_resend_to_mds(hdr); } /* diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index a4a58be94064..27ddecd3847f 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -228,12 +228,8 @@ struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, gfp_t gfp_flags); void nfs4_deviceid_mark_client_invalid(struct nfs_client *clp); -int pnfs_read_done_resend_to_mds(struct inode *inode, struct list_head *head, - const struct nfs_pgio_completion_ops *compl_ops, - struct nfs_direct_req *dreq); -int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head *head, - const struct nfs_pgio_completion_ops *compl_ops, - struct nfs_direct_req *dreq); +int pnfs_read_done_resend_to_mds(struct nfs_pgio_header *); +int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *); struct nfs4_threshold *pnfs_mdsthreshold_alloc(void); /* nfs4_deviceid_flags */ diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index d0fae7b78252..4b48548e700e 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -112,6 +112,8 @@ extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc, int how); extern int nfs_pageio_add_request(struct nfs_pageio_descriptor *, struct nfs_page *); +extern int nfs_pageio_resend(struct nfs_pageio_descriptor *, + struct nfs_pgio_header *); extern void nfs_pageio_complete(struct nfs_pageio_descriptor *desc); extern void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *, pgoff_t); extern size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, -- cgit v1.2.3 From d0d480cce8f522b37c2c1de38230fc9ad15fa506 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 25 Jun 2014 10:08:44 -0700 Subject: leds: add led-class attribute-group support Allow led-class devices to be created with optional attribute groups. This is needed in order to allow led drivers to create custom device attributes in a race-free manner. Signed-off-by: Johan Hovold Signed-off-by: Bryan Wu --- drivers/leds/led-class.c | 5 +++-- include/linux/leds.h | 2 ++ 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c index f37d63cf726b..aa29198fca3e 100644 --- a/drivers/leds/led-class.c +++ b/drivers/leds/led-class.c @@ -210,8 +210,9 @@ static const struct dev_pm_ops leds_class_dev_pm_ops = { */ int led_classdev_register(struct device *parent, struct led_classdev *led_cdev) { - led_cdev->dev = device_create(leds_class, parent, 0, led_cdev, - "%s", led_cdev->name); + led_cdev->dev = device_create_with_groups(leds_class, parent, 0, + led_cdev, led_cdev->groups, + "%s", led_cdev->name); if (IS_ERR(led_cdev->dev)) return PTR_ERR(led_cdev->dev); diff --git a/include/linux/leds.h b/include/linux/leds.h index 0287ab296689..e43686472197 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -63,6 +63,8 @@ struct led_classdev { unsigned long *delay_off); struct device *dev; + const struct attribute_group **groups; + struct list_head node; /* LED Device list */ const char *default_trigger; /* Trigger to use */ -- cgit v1.2.3 From 3482f2c52b77bf6596e24aae82e204a0603eba66 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Thu, 27 Mar 2014 17:18:55 -0700 Subject: of: Create of_console_check() for selecting a console specified in /chosen The devicetree has a binding for specifying the console device in the /chosen node, but the kernel doesn't use it consistently. This change adds an API for testing if a device node is a console, and adds a preferred console entry if it is. At the same time this patch removes the of_device_is_stdout_path() API since it is unused. Signed-off-by: Grant Likely Tested-by: Sascha Hauer --- drivers/of/base.c | 23 +++++++++++++---------- include/linux/of.h | 6 +++--- 2 files changed, 16 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/drivers/of/base.c b/drivers/of/base.c index b9864806e9b8..df9b2bb7bb27 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -17,6 +17,7 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ +#include #include #include #include @@ -2180,20 +2181,22 @@ const char *of_prop_next_string(struct property *prop, const char *cur) EXPORT_SYMBOL_GPL(of_prop_next_string); /** - * of_device_is_stdout_path - check if a device node matches the - * linux,stdout-path property - * - * Check if this device node matches the linux,stdout-path property - * in the chosen node. return true if yes, false otherwise. + * of_console_check() - Test and setup console for DT setup + * @dn - Pointer to device node + * @name - Name to use for preferred console without index. ex. "ttyS" + * @index - Index to use for preferred console. + * + * Check if the given device node matches the stdout-path property in the + * /chosen node. If it does then register it as the preferred console and return + * TRUE. Otherwise return FALSE. */ -int of_device_is_stdout_path(struct device_node *dn) +bool of_console_check(struct device_node *dn, char *name, int index) { - if (!of_stdout) + if (!dn || dn != of_stdout || console_set_on_cmdline) return false; - - return of_stdout == dn; + return add_preferred_console(name, index, NULL); } -EXPORT_SYMBOL_GPL(of_device_is_stdout_path); +EXPORT_SYMBOL_GPL(of_console_check); /** * of_find_next_cache_node - Find a node's subsidiary cache diff --git a/include/linux/of.h b/include/linux/of.h index 196b34c1ef4e..9d9734056e39 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -352,7 +352,7 @@ const __be32 *of_prop_next_u32(struct property *prop, const __be32 *cur, */ const char *of_prop_next_string(struct property *prop, const char *cur); -int of_device_is_stdout_path(struct device_node *dn); +bool of_console_check(struct device_node *dn, char *name, int index); #else /* CONFIG_OF */ @@ -564,9 +564,9 @@ static inline int of_machine_is_compatible(const char *compat) return 0; } -static inline int of_device_is_stdout_path(struct device_node *dn) +static inline bool of_console_check(const struct device_node *dn, const char *name, int index) { - return 0; + return false; } static inline const __be32 *of_prop_next_u32(struct property *prop, -- cgit v1.2.3 From a752ee56ad84bf9a35b8323af1ad22b03c1df2c4 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Fri, 28 Mar 2014 08:12:18 -0700 Subject: tty: Update hypervisor tty drivers to use core stdout parsing code. The evh_bytechan, hvc_opal and hvc_vio drivers all open code the parsing of the stdout node in the device tree. This patch simplifies the driver by removing the duplicated functionality. Signed-off-by: Grant Likely --- drivers/of/base.c | 5 ++++- drivers/tty/ehv_bytechan.c | 43 ++++--------------------------------------- drivers/tty/hvc/hvc_opal.c | 15 +++------------ drivers/tty/hvc/hvc_vio.c | 29 ++++++++++------------------- include/linux/of.h | 1 + 5 files changed, 22 insertions(+), 71 deletions(-) (limited to 'include/linux') diff --git a/drivers/of/base.c b/drivers/of/base.c index df9b2bb7bb27..e4f95ba0a3eb 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -36,7 +36,7 @@ struct device_node *of_allnodes; EXPORT_SYMBOL(of_allnodes); struct device_node *of_chosen; struct device_node *of_aliases; -static struct device_node *of_stdout; +struct device_node *of_stdout; static struct kset *of_kset; @@ -2063,9 +2063,12 @@ void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align)) of_chosen = of_find_node_by_path("/chosen@0"); if (of_chosen) { + /* linux,stdout-path and /aliases/stdout are for legacy compatibility */ const char *name = of_get_property(of_chosen, "stdout-path", NULL); if (!name) name = of_get_property(of_chosen, "linux,stdout-path", NULL); + if (IS_ENABLED(CONFIG_PPC) && !name) + name = of_get_property(of_aliases, "stdout", NULL); if (name) of_stdout = of_find_node_by_path(name); } diff --git a/drivers/tty/ehv_bytechan.c b/drivers/tty/ehv_bytechan.c index 0419b69e270f..4f485e88f60c 100644 --- a/drivers/tty/ehv_bytechan.c +++ b/drivers/tty/ehv_bytechan.c @@ -108,55 +108,23 @@ static void disable_tx_interrupt(struct ehv_bc_data *bc) * * The byte channel to be used for the console is specified via a "stdout" * property in the /chosen node. - * - * For compatible with legacy device trees, we also look for a "stdout" alias. */ static int find_console_handle(void) { - struct device_node *np, *np2; + struct device_node *np = of_stdout; const char *sprop = NULL; const uint32_t *iprop; - np = of_find_node_by_path("/chosen"); - if (np) - sprop = of_get_property(np, "stdout-path", NULL); - - if (!np || !sprop) { - of_node_put(np); - np = of_find_node_by_name(NULL, "aliases"); - if (np) - sprop = of_get_property(np, "stdout", NULL); - } - - if (!sprop) { - of_node_put(np); - return 0; - } - /* We don't care what the aliased node is actually called. We only * care if it's compatible with "epapr,hv-byte-channel", because that - * indicates that it's a byte channel node. We use a temporary - * variable, 'np2', because we can't release 'np' until we're done with - * 'sprop'. + * indicates that it's a byte channel node. */ - np2 = of_find_node_by_path(sprop); - of_node_put(np); - np = np2; - if (!np) { - pr_warning("ehv-bc: stdout node '%s' does not exist\n", sprop); - return 0; - } - - /* Is it a byte channel? */ - if (!of_device_is_compatible(np, "epapr,hv-byte-channel")) { - of_node_put(np); + if (!np || !of_device_is_compatible(np, "epapr,hv-byte-channel")) return 0; - } stdout_irq = irq_of_parse_and_map(np, 0); if (stdout_irq == NO_IRQ) { - pr_err("ehv-bc: no 'interrupts' property in %s node\n", sprop); - of_node_put(np); + pr_err("ehv-bc: no 'interrupts' property in %s node\n", np->full_name); return 0; } @@ -167,12 +135,9 @@ static int find_console_handle(void) if (!iprop) { pr_err("ehv-bc: no 'hv-handle' property in %s node\n", np->name); - of_node_put(np); return 0; } stdout_bc = be32_to_cpu(*iprop); - - of_node_put(np); return 1; } diff --git a/drivers/tty/hvc/hvc_opal.c b/drivers/tty/hvc/hvc_opal.c index a585079b4b38..a2cc5f834c63 100644 --- a/drivers/tty/hvc/hvc_opal.c +++ b/drivers/tty/hvc/hvc_opal.c @@ -342,22 +342,13 @@ static void udbg_init_opal_common(void) void __init hvc_opal_init_early(void) { - struct device_node *stdout_node = NULL; + struct device_node *stdout_node = of_node_get(of_stdout); const __be32 *termno; - const char *name = NULL; const struct hv_ops *ops; u32 index; - /* find the boot console from /chosen/stdout */ - if (of_chosen) - name = of_get_property(of_chosen, "linux,stdout-path", NULL); - if (name) { - stdout_node = of_find_node_by_path(name); - if (!stdout_node) { - pr_err("hvc_opal: Failed to locate default console!\n"); - return; - } - } else { + /* If the console wasn't in /chosen, try /ibm,opal */ + if (!stdout_node) { struct device_node *opal, *np; /* Current OPAL takeover doesn't provide the stdout diff --git a/drivers/tty/hvc/hvc_vio.c b/drivers/tty/hvc/hvc_vio.c index b594abfbf21e..5618b5fc7500 100644 --- a/drivers/tty/hvc/hvc_vio.c +++ b/drivers/tty/hvc/hvc_vio.c @@ -404,42 +404,35 @@ module_exit(hvc_vio_exit); void __init hvc_vio_init_early(void) { - struct device_node *stdout_node; const __be32 *termno; const char *name; const struct hv_ops *ops; /* find the boot console from /chosen/stdout */ - if (!of_chosen) + if (!of_stdout) return; - name = of_get_property(of_chosen, "linux,stdout-path", NULL); - if (name == NULL) - return; - stdout_node = of_find_node_by_path(name); - if (!stdout_node) - return; - name = of_get_property(stdout_node, "name", NULL); + name = of_get_property(of_stdout, "name", NULL); if (!name) { printk(KERN_WARNING "stdout node missing 'name' property!\n"); - goto out; + return; } /* Check if it's a virtual terminal */ if (strncmp(name, "vty", 3) != 0) - goto out; - termno = of_get_property(stdout_node, "reg", NULL); + return; + termno = of_get_property(of_stdout, "reg", NULL); if (termno == NULL) - goto out; + return; hvterm_priv0.termno = of_read_number(termno, 1); spin_lock_init(&hvterm_priv0.buf_lock); hvterm_privs[0] = &hvterm_priv0; /* Check the protocol */ - if (of_device_is_compatible(stdout_node, "hvterm1")) { + if (of_device_is_compatible(of_stdout, "hvterm1")) { hvterm_priv0.proto = HV_PROTOCOL_RAW; ops = &hvterm_raw_ops; } - else if (of_device_is_compatible(stdout_node, "hvterm-protocol")) { + else if (of_device_is_compatible(of_stdout, "hvterm-protocol")) { hvterm_priv0.proto = HV_PROTOCOL_HVSI; ops = &hvterm_hvsi_ops; hvsilib_init(&hvterm_priv0.hvsi, hvc_get_chars, hvc_put_chars, @@ -447,7 +440,7 @@ void __init hvc_vio_init_early(void) /* HVSI, perform the handshake now */ hvsilib_establish(&hvterm_priv0.hvsi); } else - goto out; + return; udbg_putc = udbg_hvc_putc; udbg_getc = udbg_hvc_getc; udbg_getc_poll = udbg_hvc_getc_poll; @@ -456,14 +449,12 @@ void __init hvc_vio_init_early(void) * backend for HVSI, only do udbg */ if (hvterm_priv0.proto == HV_PROTOCOL_HVSI) - goto out; + return; #endif /* Check whether the user has requested a different console. */ if (!strstr(cmd_line, "console=")) add_preferred_console("hvc", 0, NULL); hvc_instantiate(0, 0, ops); -out: - of_node_put(stdout_node); } /* call this from early_init() for a working debug console on diff --git a/include/linux/of.h b/include/linux/of.h index 9d9734056e39..f0d256273c83 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -113,6 +113,7 @@ static inline void of_node_put(struct device_node *node) { } extern struct device_node *of_allnodes; extern struct device_node *of_chosen; extern struct device_node *of_aliases; +extern struct device_node *of_stdout; extern raw_spinlock_t devtree_lock; static inline bool of_have_populated_dt(void) -- cgit v1.2.3 From 7b8278358cc2b453ca6e75eedb3741cdb7e97236 Mon Sep 17 00:00:00 2001 From: Aristeu Rozanski Date: Wed, 18 Jun 2014 11:05:01 -0300 Subject: edac: add DDR4 and RDDR4 Haswell memory controller can make use of DDR4 and Registered DDR4 Cc: tony.luck@intel.com Signed-off-by: Aristeu Rozanski Signed-off-by: Mauro Carvalho Chehab --- drivers/edac/edac_mc_sysfs.c | 4 +++- include/linux/edac.h | 5 +++++ 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c index 01fae8289cf0..a6cd36100663 100644 --- a/drivers/edac/edac_mc_sysfs.c +++ b/drivers/edac/edac_mc_sysfs.c @@ -108,7 +108,9 @@ static const char * const mem_types[] = { [MEM_RDDR2] = "Registered-DDR2", [MEM_XDR] = "XDR", [MEM_DDR3] = "Unbuffered-DDR3", - [MEM_RDDR3] = "Registered-DDR3" + [MEM_RDDR3] = "Registered-DDR3", + [MEM_DDR4] = "Unbuffered-DDR4", + [MEM_RDDR4] = "Registered-DDR4" }; static const char * const dev_types[] = { diff --git a/include/linux/edac.h b/include/linux/edac.h index 8e6c20af11a2..e1e68da6f35c 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -194,6 +194,9 @@ static inline char *mc_event_error_type(const unsigned int err_type) * @MEM_DDR3: DDR3 RAM * @MEM_RDDR3: Registered DDR3 RAM * This is a variant of the DDR3 memories. + * @MEM_DDR4: DDR4 RAM + * @MEM_RDDR4: Registered DDR4 RAM + * This is a variant of the DDR4 memories. */ enum mem_type { MEM_EMPTY = 0, @@ -213,6 +216,8 @@ enum mem_type { MEM_XDR, MEM_DDR3, MEM_RDDR3, + MEM_DDR4, + MEM_RDDR4, }; #define MEM_FLAG_EMPTY BIT(MEM_EMPTY) -- cgit v1.2.3 From 11c32d7b6274cb0f554943d65bd4a126c4a86dcd Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 22 May 2014 23:25:14 +0200 Subject: video: move Versatile CLCD helpers This moves the Versatile-specific helper code and panel database down into the drivers/video folder next to the CLCD driver itself, preserving the config symbol but also moving the header to platform data. This is necessary to rid the Integrator of this final inclusion dependency and get us one less user of the plat-versatile folder. Cc: Arnd Bergmann Cc: Jean-Christophe Plagniol-Villard Cc: linux-fbdev@vger.kernel.org Cc: Russell King Acked-by: Tomi Valkeinen Signed-off-by: Linus Walleij --- arch/arm/mach-integrator/Kconfig | 1 - arch/arm/mach-integrator/integrator_cp.c | 3 +- arch/arm/mach-realview/core.c | 2 +- arch/arm/mach-versatile/core.c | 2 +- arch/arm/mach-vexpress/ct-ca9x4.c | 3 +- arch/arm/plat-versatile/Kconfig | 3 - arch/arm/plat-versatile/Makefile | 1 - arch/arm/plat-versatile/clcd.c | 182 --------------------- arch/arm/plat-versatile/include/plat/clcd.h | 9 - drivers/video/fbdev/Kconfig | 7 + drivers/video/fbdev/Makefile | 1 + drivers/video/fbdev/amba-clcd-versatile.c | 182 +++++++++++++++++++++ include/linux/platform_data/video-clcd-versatile.h | 9 + 13 files changed, 203 insertions(+), 202 deletions(-) delete mode 100644 arch/arm/plat-versatile/clcd.c delete mode 100644 arch/arm/plat-versatile/include/plat/clcd.h create mode 100644 drivers/video/fbdev/amba-clcd-versatile.c create mode 100644 include/linux/platform_data/video-clcd-versatile.h (limited to 'include/linux') diff --git a/arch/arm/mach-integrator/Kconfig b/arch/arm/mach-integrator/Kconfig index 64f8e2564a37..c455e974bbfe 100644 --- a/arch/arm/mach-integrator/Kconfig +++ b/arch/arm/mach-integrator/Kconfig @@ -17,7 +17,6 @@ config ARCH_INTEGRATOR_CP bool "Support Integrator/CP platform" select ARCH_CINTEGRATOR select ARM_TIMER_SP804 - select PLAT_VERSATILE_CLCD select SERIAL_AMBA_PL011 if TTY select SERIAL_AMBA_PL011_CONSOLE if TTY select SOC_BUS diff --git a/arch/arm/mach-integrator/integrator_cp.c b/arch/arm/mach-integrator/integrator_cp.c index a938242b0c95..0228165d2d64 100644 --- a/arch/arm/mach-integrator/integrator_cp.c +++ b/arch/arm/mach-integrator/integrator_cp.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -36,8 +37,6 @@ #include #include -#include - #include "hardware.h" #include "cm.h" #include "common.h" diff --git a/arch/arm/mach-realview/core.c b/arch/arm/mach-realview/core.c index 8c1b39a0caa0..850e506926df 100644 --- a/arch/arm/mach-realview/core.c +++ b/arch/arm/mach-realview/core.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -48,7 +49,6 @@ #include #include -#include #include #include "core.h" diff --git a/arch/arm/mach-versatile/core.c b/arch/arm/mach-versatile/core.c index be83ba25f81b..08fb8c89f414 100644 --- a/arch/arm/mach-versatile/core.c +++ b/arch/arm/mach-versatile/core.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -53,7 +54,6 @@ #include #include -#include #include #include "core.h" diff --git a/arch/arm/mach-vexpress/ct-ca9x4.c b/arch/arm/mach-vexpress/ct-ca9x4.c index 86150d7a2e7d..27bea049380a 100644 --- a/arch/arm/mach-vexpress/ct-ca9x4.c +++ b/arch/arm/mach-vexpress/ct-ca9x4.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -29,8 +30,6 @@ #include #include -#include - static struct map_desc ct_ca9x4_io_desc[] __initdata = { { .virtual = V2T_PERIPH, diff --git a/arch/arm/plat-versatile/Kconfig b/arch/arm/plat-versatile/Kconfig index fce41e93b6a4..a301ca2c7d00 100644 --- a/arch/arm/plat-versatile/Kconfig +++ b/arch/arm/plat-versatile/Kconfig @@ -3,9 +3,6 @@ if PLAT_VERSATILE config PLAT_VERSATILE_CLOCK bool -config PLAT_VERSATILE_CLCD - bool - config PLAT_VERSATILE_SCHED_CLOCK def_bool y diff --git a/arch/arm/plat-versatile/Makefile b/arch/arm/plat-versatile/Makefile index 2e0c472958ae..03c4900ac3f4 100644 --- a/arch/arm/plat-versatile/Makefile +++ b/arch/arm/plat-versatile/Makefile @@ -1,6 +1,5 @@ ccflags-$(CONFIG_ARCH_MULTIPLATFORM) := -I$(srctree)/$(src)/include obj-$(CONFIG_PLAT_VERSATILE_CLOCK) += clock.o -obj-$(CONFIG_PLAT_VERSATILE_CLCD) += clcd.o obj-$(CONFIG_PLAT_VERSATILE_SCHED_CLOCK) += sched-clock.o obj-$(CONFIG_SMP) += headsmp.o platsmp.o diff --git a/arch/arm/plat-versatile/clcd.c b/arch/arm/plat-versatile/clcd.c deleted file mode 100644 index 6628cc27efc5..000000000000 --- a/arch/arm/plat-versatile/clcd.c +++ /dev/null @@ -1,182 +0,0 @@ -#include -#include -#include -#include -#include - -static struct clcd_panel vga = { - .mode = { - .name = "VGA", - .refresh = 60, - .xres = 640, - .yres = 480, - .pixclock = 39721, - .left_margin = 40, - .right_margin = 24, - .upper_margin = 32, - .lower_margin = 11, - .hsync_len = 96, - .vsync_len = 2, - .sync = 0, - .vmode = FB_VMODE_NONINTERLACED, - }, - .width = -1, - .height = -1, - .tim2 = TIM2_BCD | TIM2_IPC, - .cntl = CNTL_LCDTFT | CNTL_BGR | CNTL_LCDVCOMP(1), - .caps = CLCD_CAP_5551 | CLCD_CAP_565 | CLCD_CAP_888, - .bpp = 16, -}; - -static struct clcd_panel xvga = { - .mode = { - .name = "XVGA", - .refresh = 60, - .xres = 1024, - .yres = 768, - .pixclock = 15748, - .left_margin = 152, - .right_margin = 48, - .upper_margin = 23, - .lower_margin = 3, - .hsync_len = 104, - .vsync_len = 4, - .sync = 0, - .vmode = FB_VMODE_NONINTERLACED, - }, - .width = -1, - .height = -1, - .tim2 = TIM2_BCD | TIM2_IPC, - .cntl = CNTL_LCDTFT | CNTL_BGR | CNTL_LCDVCOMP(1), - .caps = CLCD_CAP_5551 | CLCD_CAP_565 | CLCD_CAP_888, - .bpp = 16, -}; - -/* Sanyo TM38QV67A02A - 3.8 inch QVGA (320x240) Color TFT */ -static struct clcd_panel sanyo_tm38qv67a02a = { - .mode = { - .name = "Sanyo TM38QV67A02A", - .refresh = 116, - .xres = 320, - .yres = 240, - .pixclock = 100000, - .left_margin = 6, - .right_margin = 6, - .upper_margin = 5, - .lower_margin = 5, - .hsync_len = 6, - .vsync_len = 6, - .sync = 0, - .vmode = FB_VMODE_NONINTERLACED, - }, - .width = -1, - .height = -1, - .tim2 = TIM2_BCD, - .cntl = CNTL_LCDTFT | CNTL_BGR | CNTL_LCDVCOMP(1), - .caps = CLCD_CAP_5551, - .bpp = 16, -}; - -static struct clcd_panel sanyo_2_5_in = { - .mode = { - .name = "Sanyo QVGA Portrait", - .refresh = 116, - .xres = 240, - .yres = 320, - .pixclock = 100000, - .left_margin = 20, - .right_margin = 10, - .upper_margin = 2, - .lower_margin = 2, - .hsync_len = 10, - .vsync_len = 2, - .sync = FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT, - .vmode = FB_VMODE_NONINTERLACED, - }, - .width = -1, - .height = -1, - .tim2 = TIM2_IVS | TIM2_IHS | TIM2_IPC, - .cntl = CNTL_LCDTFT | CNTL_BGR | CNTL_LCDVCOMP(1), - .caps = CLCD_CAP_5551, - .bpp = 16, -}; - -/* Epson L2F50113T00 - 2.2 inch 176x220 Color TFT */ -static struct clcd_panel epson_l2f50113t00 = { - .mode = { - .name = "Epson L2F50113T00", - .refresh = 390, - .xres = 176, - .yres = 220, - .pixclock = 62500, - .left_margin = 3, - .right_margin = 2, - .upper_margin = 1, - .lower_margin = 0, - .hsync_len = 3, - .vsync_len = 2, - .sync = 0, - .vmode = FB_VMODE_NONINTERLACED, - }, - .width = -1, - .height = -1, - .tim2 = TIM2_BCD | TIM2_IPC, - .cntl = CNTL_LCDTFT | CNTL_BGR | CNTL_LCDVCOMP(1), - .caps = CLCD_CAP_5551, - .bpp = 16, -}; - -static struct clcd_panel *panels[] = { - &vga, - &xvga, - &sanyo_tm38qv67a02a, - &sanyo_2_5_in, - &epson_l2f50113t00, -}; - -struct clcd_panel *versatile_clcd_get_panel(const char *name) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(panels); i++) - if (strcmp(panels[i]->mode.name, name) == 0) - break; - - if (i < ARRAY_SIZE(panels)) - return panels[i]; - - pr_err("CLCD: couldn't get parameters for panel %s\n", name); - - return NULL; -} - -int versatile_clcd_setup_dma(struct clcd_fb *fb, unsigned long framesize) -{ - dma_addr_t dma; - - fb->fb.screen_base = dma_alloc_writecombine(&fb->dev->dev, framesize, - &dma, GFP_KERNEL); - if (!fb->fb.screen_base) { - pr_err("CLCD: unable to map framebuffer\n"); - return -ENOMEM; - } - - fb->fb.fix.smem_start = dma; - fb->fb.fix.smem_len = framesize; - - return 0; -} - -int versatile_clcd_mmap_dma(struct clcd_fb *fb, struct vm_area_struct *vma) -{ - return dma_mmap_writecombine(&fb->dev->dev, vma, - fb->fb.screen_base, - fb->fb.fix.smem_start, - fb->fb.fix.smem_len); -} - -void versatile_clcd_remove_dma(struct clcd_fb *fb) -{ - dma_free_writecombine(&fb->dev->dev, fb->fb.fix.smem_len, - fb->fb.screen_base, fb->fb.fix.smem_start); -} diff --git a/arch/arm/plat-versatile/include/plat/clcd.h b/arch/arm/plat-versatile/include/plat/clcd.h deleted file mode 100644 index 6bb6a1d2019b..000000000000 --- a/arch/arm/plat-versatile/include/plat/clcd.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef PLAT_CLCD_H -#define PLAT_CLCD_H - -struct clcd_panel *versatile_clcd_get_panel(const char *); -int versatile_clcd_setup_dma(struct clcd_fb *, unsigned long); -int versatile_clcd_mmap_dma(struct clcd_fb *, struct vm_area_struct *); -void versatile_clcd_remove_dma(struct clcd_fb *); - -#endif diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig index 59c98bfd5a8a..5edc7a054e03 100644 --- a/drivers/video/fbdev/Kconfig +++ b/drivers/video/fbdev/Kconfig @@ -290,6 +290,13 @@ config FB_ARMCLCD here and read . The module will be called amba-clcd. +# Helper logic selected only by the ARM Versatile platform family. +config PLAT_VERSATILE_CLCD + depends on FB_ARMCLCD + depends on (PLAT_VERSATILE || ARCH_INTEGRATOR) + default y + bool + config FB_ACORN bool "Acorn VIDC support" depends on (FB = y) && ARM && ARCH_ACORN diff --git a/drivers/video/fbdev/Makefile b/drivers/video/fbdev/Makefile index 0284f2a12538..0b2090d2e52e 100644 --- a/drivers/video/fbdev/Makefile +++ b/drivers/video/fbdev/Makefile @@ -78,6 +78,7 @@ obj-$(CONFIG_FB_ATMEL) += atmel_lcdfb.o obj-$(CONFIG_FB_PVR2) += pvr2fb.o obj-$(CONFIG_FB_VOODOO1) += sstfb.o obj-$(CONFIG_FB_ARMCLCD) += amba-clcd.o +obj-$(CONFIG_PLAT_VERSATILE_CLCD) += amba-clcd-versatile.o obj-$(CONFIG_FB_GOLDFISH) += goldfishfb.o obj-$(CONFIG_FB_68328) += 68328fb.o obj-$(CONFIG_FB_GBE) += gbefb.o diff --git a/drivers/video/fbdev/amba-clcd-versatile.c b/drivers/video/fbdev/amba-clcd-versatile.c new file mode 100644 index 000000000000..7a8afcd4573e --- /dev/null +++ b/drivers/video/fbdev/amba-clcd-versatile.c @@ -0,0 +1,182 @@ +#include +#include +#include +#include +#include + +static struct clcd_panel vga = { + .mode = { + .name = "VGA", + .refresh = 60, + .xres = 640, + .yres = 480, + .pixclock = 39721, + .left_margin = 40, + .right_margin = 24, + .upper_margin = 32, + .lower_margin = 11, + .hsync_len = 96, + .vsync_len = 2, + .sync = 0, + .vmode = FB_VMODE_NONINTERLACED, + }, + .width = -1, + .height = -1, + .tim2 = TIM2_BCD | TIM2_IPC, + .cntl = CNTL_LCDTFT | CNTL_BGR | CNTL_LCDVCOMP(1), + .caps = CLCD_CAP_5551 | CLCD_CAP_565 | CLCD_CAP_888, + .bpp = 16, +}; + +static struct clcd_panel xvga = { + .mode = { + .name = "XVGA", + .refresh = 60, + .xres = 1024, + .yres = 768, + .pixclock = 15748, + .left_margin = 152, + .right_margin = 48, + .upper_margin = 23, + .lower_margin = 3, + .hsync_len = 104, + .vsync_len = 4, + .sync = 0, + .vmode = FB_VMODE_NONINTERLACED, + }, + .width = -1, + .height = -1, + .tim2 = TIM2_BCD | TIM2_IPC, + .cntl = CNTL_LCDTFT | CNTL_BGR | CNTL_LCDVCOMP(1), + .caps = CLCD_CAP_5551 | CLCD_CAP_565 | CLCD_CAP_888, + .bpp = 16, +}; + +/* Sanyo TM38QV67A02A - 3.8 inch QVGA (320x240) Color TFT */ +static struct clcd_panel sanyo_tm38qv67a02a = { + .mode = { + .name = "Sanyo TM38QV67A02A", + .refresh = 116, + .xres = 320, + .yres = 240, + .pixclock = 100000, + .left_margin = 6, + .right_margin = 6, + .upper_margin = 5, + .lower_margin = 5, + .hsync_len = 6, + .vsync_len = 6, + .sync = 0, + .vmode = FB_VMODE_NONINTERLACED, + }, + .width = -1, + .height = -1, + .tim2 = TIM2_BCD, + .cntl = CNTL_LCDTFT | CNTL_BGR | CNTL_LCDVCOMP(1), + .caps = CLCD_CAP_5551, + .bpp = 16, +}; + +static struct clcd_panel sanyo_2_5_in = { + .mode = { + .name = "Sanyo QVGA Portrait", + .refresh = 116, + .xres = 240, + .yres = 320, + .pixclock = 100000, + .left_margin = 20, + .right_margin = 10, + .upper_margin = 2, + .lower_margin = 2, + .hsync_len = 10, + .vsync_len = 2, + .sync = FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT, + .vmode = FB_VMODE_NONINTERLACED, + }, + .width = -1, + .height = -1, + .tim2 = TIM2_IVS | TIM2_IHS | TIM2_IPC, + .cntl = CNTL_LCDTFT | CNTL_BGR | CNTL_LCDVCOMP(1), + .caps = CLCD_CAP_5551, + .bpp = 16, +}; + +/* Epson L2F50113T00 - 2.2 inch 176x220 Color TFT */ +static struct clcd_panel epson_l2f50113t00 = { + .mode = { + .name = "Epson L2F50113T00", + .refresh = 390, + .xres = 176, + .yres = 220, + .pixclock = 62500, + .left_margin = 3, + .right_margin = 2, + .upper_margin = 1, + .lower_margin = 0, + .hsync_len = 3, + .vsync_len = 2, + .sync = 0, + .vmode = FB_VMODE_NONINTERLACED, + }, + .width = -1, + .height = -1, + .tim2 = TIM2_BCD | TIM2_IPC, + .cntl = CNTL_LCDTFT | CNTL_BGR | CNTL_LCDVCOMP(1), + .caps = CLCD_CAP_5551, + .bpp = 16, +}; + +static struct clcd_panel *panels[] = { + &vga, + &xvga, + &sanyo_tm38qv67a02a, + &sanyo_2_5_in, + &epson_l2f50113t00, +}; + +struct clcd_panel *versatile_clcd_get_panel(const char *name) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(panels); i++) + if (strcmp(panels[i]->mode.name, name) == 0) + break; + + if (i < ARRAY_SIZE(panels)) + return panels[i]; + + pr_err("CLCD: couldn't get parameters for panel %s\n", name); + + return NULL; +} + +int versatile_clcd_setup_dma(struct clcd_fb *fb, unsigned long framesize) +{ + dma_addr_t dma; + + fb->fb.screen_base = dma_alloc_writecombine(&fb->dev->dev, framesize, + &dma, GFP_KERNEL); + if (!fb->fb.screen_base) { + pr_err("CLCD: unable to map framebuffer\n"); + return -ENOMEM; + } + + fb->fb.fix.smem_start = dma; + fb->fb.fix.smem_len = framesize; + + return 0; +} + +int versatile_clcd_mmap_dma(struct clcd_fb *fb, struct vm_area_struct *vma) +{ + return dma_mmap_writecombine(&fb->dev->dev, vma, + fb->fb.screen_base, + fb->fb.fix.smem_start, + fb->fb.fix.smem_len); +} + +void versatile_clcd_remove_dma(struct clcd_fb *fb) +{ + dma_free_writecombine(&fb->dev->dev, fb->fb.fix.smem_len, + fb->fb.screen_base, fb->fb.fix.smem_start); +} diff --git a/include/linux/platform_data/video-clcd-versatile.h b/include/linux/platform_data/video-clcd-versatile.h new file mode 100644 index 000000000000..6bb6a1d2019b --- /dev/null +++ b/include/linux/platform_data/video-clcd-versatile.h @@ -0,0 +1,9 @@ +#ifndef PLAT_CLCD_H +#define PLAT_CLCD_H + +struct clcd_panel *versatile_clcd_get_panel(const char *); +int versatile_clcd_setup_dma(struct clcd_fb *, unsigned long); +int versatile_clcd_mmap_dma(struct clcd_fb *, struct vm_area_struct *); +void versatile_clcd_remove_dma(struct clcd_fb *); + +#endif -- cgit v1.2.3 From 5d98e61d337c181f199a6cb864569cc4e116ef4c Mon Sep 17 00:00:00 2001 From: Lan Tianyu Date: Tue, 20 May 2014 20:59:23 +0800 Subject: I2C/ACPI: Add i2c ACPI operation region support ACPI 5.0 spec(5.5.2.4.5) defines GenericSerialBus(i2c, spi, uart) operation region. It allows ACPI aml code able to access such kind of devices to implement some ACPI standard method. ACPI Spec defines some access attribute to associate with i2c protocol. AttribQuick Read/Write Quick Protocol AttribSendReceive Send/Receive Byte Protocol AttribByte Read/Write Byte Protocol AttribWord Read/Write Word Protocol AttribBlock Read/Write Block Protocol AttribBytes Read/Write N-Bytes Protocol AttribProcessCall Process Call Protocol AttribBlockProcessCall Write Block-Read Block Process Call Protocol AttribRawBytes Raw Read/Write N-BytesProtocol AttribRawProcessBytes Raw Process Call Protocol On the Asus T100TA, Bios use GenericSerialBus operation region to access i2c device to get battery info. Sample code From Asus T100TA Scope (_SB.I2C1) { Name (UMPC, ResourceTemplate () { I2cSerialBus (0x0066, ControllerInitiated, 0x00061A80, AddressingMode7Bit, "\\_SB.I2C1", 0x00, ResourceConsumer, , ) }) ... OperationRegion (DVUM, GenericSerialBus, Zero, 0x0100) Field (DVUM, BufferAcc, NoLock, Preserve) { Connection (UMPC), Offset (0x81), AccessAs (BufferAcc, AttribBytes (0x3E)), FGC0, 8 } ... } Device (BATC) { Name (_HID, EisaId ("PNP0C0A")) // _HID: Hardware ID Name (_UID, One) // _UID: Unique ID ... Method (_BST, 0, NotSerialized) // _BST: Battery Status { If (LEqual (AVBL, One)) { Store (FGC0, BFFG) If (LNotEqual (STAT, One)) { ShiftRight (CHST, 0x04, Local0) And (Local0, 0x03, Local0) If (LOr (LEqual (Local0, One), LEqual (Local0, 0x02))) { Store (0x02, Local1) } ... } The i2c operation region is defined under I2C1 scope. _BST method under battery device BATC read battery status from the field "FCG0". The request would be sent to i2c operation region handler. This patch is to add i2c ACPI operation region support. Due to there are only "Byte" and "Bytes" protocol access on the Asus T100TA, other protocols have not been tested. About RawBytes and RawProcessBytes protocol, they needs specific drivers to interpret reference data from AML code according ACPI 5.0 SPEC(5.5.2.4.5.3.9 and 5.5.2.4.5.3.10). So far, not found such case and will add when find real case. Signed-off-by: Lan Tianyu Reviewed-by: Mika Westerberg Signed-off-by: Wolfram Sang --- drivers/i2c/Makefile | 5 +- drivers/i2c/i2c-acpi.c | 273 +++++++++++++++++++++++++++++++++++++++++++++++++ drivers/i2c/i2c-core.c | 2 + include/linux/acpi.h | 11 ++ include/linux/i2c.h | 10 ++ 5 files changed, 300 insertions(+), 1 deletion(-) create mode 100644 drivers/i2c/i2c-acpi.c (limited to 'include/linux') diff --git a/drivers/i2c/Makefile b/drivers/i2c/Makefile index 1722f50f2473..80db3073aa84 100644 --- a/drivers/i2c/Makefile +++ b/drivers/i2c/Makefile @@ -2,8 +2,11 @@ # Makefile for the i2c core. # +i2ccore-y := i2c-core.o +i2ccore-$(CONFIG_ACPI) += i2c-acpi.o + obj-$(CONFIG_I2C_BOARDINFO) += i2c-boardinfo.o -obj-$(CONFIG_I2C) += i2c-core.o +obj-$(CONFIG_I2C) += i2ccore.o obj-$(CONFIG_I2C_SMBUS) += i2c-smbus.o obj-$(CONFIG_I2C_CHARDEV) += i2c-dev.o obj-$(CONFIG_I2C_MUX) += i2c-mux.o diff --git a/drivers/i2c/i2c-acpi.c b/drivers/i2c/i2c-acpi.c new file mode 100644 index 000000000000..f7f4c89c09b3 --- /dev/null +++ b/drivers/i2c/i2c-acpi.c @@ -0,0 +1,273 @@ +/* + * I2C ACPI code + * + * Copyright (C) 2014 Intel Corp + * + * Author: Lan Tianyu + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ +#define pr_fmt(fmt) "I2C/ACPI : " fmt + +#include +#include +#include +#include +#include + +struct acpi_i2c_handler_data { + struct acpi_connection_info info; + struct i2c_adapter *adapter; +}; + +struct gsb_buffer { + u8 status; + u8 len; + union { + u16 wdata; + u8 bdata; + u8 data[0]; + }; +} __packed; + +static int acpi_gsb_i2c_read_bytes(struct i2c_client *client, + u8 cmd, u8 *data, u8 data_len) +{ + + struct i2c_msg msgs[2]; + int ret; + u8 *buffer; + + buffer = kzalloc(data_len, GFP_KERNEL); + if (!buffer) + return AE_NO_MEMORY; + + msgs[0].addr = client->addr; + msgs[0].flags = client->flags; + msgs[0].len = 1; + msgs[0].buf = &cmd; + + msgs[1].addr = client->addr; + msgs[1].flags = client->flags | I2C_M_RD; + msgs[1].len = data_len; + msgs[1].buf = buffer; + + ret = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs)); + if (ret < 0) + dev_err(&client->adapter->dev, "i2c read failed\n"); + else + memcpy(data, buffer, data_len); + + kfree(buffer); + return ret; +} + +static int acpi_gsb_i2c_write_bytes(struct i2c_client *client, + u8 cmd, u8 *data, u8 data_len) +{ + + struct i2c_msg msgs[1]; + u8 *buffer; + int ret = AE_OK; + + buffer = kzalloc(data_len + 1, GFP_KERNEL); + if (!buffer) + return AE_NO_MEMORY; + + buffer[0] = cmd; + memcpy(buffer + 1, data, data_len); + + msgs[0].addr = client->addr; + msgs[0].flags = client->flags; + msgs[0].len = data_len + 1; + msgs[0].buf = buffer; + + ret = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs)); + if (ret < 0) + dev_err(&client->adapter->dev, "i2c write failed\n"); + + kfree(buffer); + return ret; +} + +static acpi_status +acpi_i2c_space_handler(u32 function, acpi_physical_address command, + u32 bits, u64 *value64, + void *handler_context, void *region_context) +{ + struct gsb_buffer *gsb = (struct gsb_buffer *)value64; + struct acpi_i2c_handler_data *data = handler_context; + struct acpi_connection_info *info = &data->info; + struct acpi_resource_i2c_serialbus *sb; + struct i2c_adapter *adapter = data->adapter; + struct i2c_client client; + struct acpi_resource *ares; + u32 accessor_type = function >> 16; + u8 action = function & ACPI_IO_MASK; + acpi_status ret = AE_OK; + int status; + + ret = acpi_buffer_to_resource(info->connection, info->length, &ares); + if (ACPI_FAILURE(ret)) + return ret; + + if (!value64 || ares->type != ACPI_RESOURCE_TYPE_SERIAL_BUS) { + ret = AE_BAD_PARAMETER; + goto err; + } + + sb = &ares->data.i2c_serial_bus; + if (sb->type != ACPI_RESOURCE_SERIAL_TYPE_I2C) { + ret = AE_BAD_PARAMETER; + goto err; + } + + memset(&client, 0, sizeof(client)); + client.adapter = adapter; + client.addr = sb->slave_address; + client.flags = 0; + + if (sb->access_mode == ACPI_I2C_10BIT_MODE) + client.flags |= I2C_CLIENT_TEN; + + switch (accessor_type) { + case ACPI_GSB_ACCESS_ATTRIB_SEND_RCV: + if (action == ACPI_READ) { + status = i2c_smbus_read_byte(&client); + if (status >= 0) { + gsb->bdata = status; + status = 0; + } + } else { + status = i2c_smbus_write_byte(&client, gsb->bdata); + } + break; + + case ACPI_GSB_ACCESS_ATTRIB_BYTE: + if (action == ACPI_READ) { + status = i2c_smbus_read_byte_data(&client, command); + if (status >= 0) { + gsb->bdata = status; + status = 0; + } + } else { + status = i2c_smbus_write_byte_data(&client, command, + gsb->bdata); + } + break; + + case ACPI_GSB_ACCESS_ATTRIB_WORD: + if (action == ACPI_READ) { + status = i2c_smbus_read_word_data(&client, command); + if (status >= 0) { + gsb->wdata = status; + status = 0; + } + } else { + status = i2c_smbus_write_word_data(&client, command, + gsb->wdata); + } + break; + + case ACPI_GSB_ACCESS_ATTRIB_BLOCK: + if (action == ACPI_READ) { + status = i2c_smbus_read_block_data(&client, command, + gsb->data); + if (status >= 0) { + gsb->len = status; + status = 0; + } + } else { + status = i2c_smbus_write_block_data(&client, command, + gsb->len, gsb->data); + } + break; + + case ACPI_GSB_ACCESS_ATTRIB_MULTIBYTE: + if (action == ACPI_READ) { + status = acpi_gsb_i2c_read_bytes(&client, command, + gsb->data, info->access_length); + if (status > 0) + status = 0; + } else { + status = acpi_gsb_i2c_write_bytes(&client, command, + gsb->data, info->access_length); + } + break; + + default: + pr_info("protocol(0x%02x) is not supported.\n", accessor_type); + ret = AE_BAD_PARAMETER; + goto err; + } + + gsb->status = status; + + err: + ACPI_FREE(ares); + return ret; +} + + +int acpi_i2c_install_space_handler(struct i2c_adapter *adapter) +{ + acpi_handle handle = ACPI_HANDLE(adapter->dev.parent); + struct acpi_i2c_handler_data *data; + acpi_status status; + + if (!handle) + return -ENODEV; + + data = kzalloc(sizeof(struct acpi_i2c_handler_data), + GFP_KERNEL); + if (!data) + return -ENOMEM; + + data->adapter = adapter; + status = acpi_bus_attach_private_data(handle, (void *)data); + if (ACPI_FAILURE(status)) { + kfree(data); + return -ENOMEM; + } + + status = acpi_install_address_space_handler(handle, + ACPI_ADR_SPACE_GSBUS, + &acpi_i2c_space_handler, + NULL, + data); + if (ACPI_FAILURE(status)) { + dev_err(&adapter->dev, "Error installing i2c space handler\n"); + acpi_bus_detach_private_data(handle); + kfree(data); + return -ENOMEM; + } + + return 0; +} + +void acpi_i2c_remove_space_handler(struct i2c_adapter *adapter) +{ + acpi_handle handle = ACPI_HANDLE(adapter->dev.parent); + struct acpi_i2c_handler_data *data; + acpi_status status; + + if (!handle) + return; + + acpi_remove_address_space_handler(handle, + ACPI_ADR_SPACE_GSBUS, + &acpi_i2c_space_handler); + + status = acpi_bus_get_private_data(handle, (void **)&data); + if (ACPI_SUCCESS(status)) + kfree(data); + + acpi_bus_detach_private_data(handle); +} diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c index 7c7f4b856bad..e25cb84cb297 100644 --- a/drivers/i2c/i2c-core.c +++ b/drivers/i2c/i2c-core.c @@ -1293,6 +1293,7 @@ exit_recovery: /* create pre-declared device nodes */ of_i2c_register_devices(adap); acpi_i2c_register_devices(adap); + acpi_i2c_install_space_handler(adap); if (adap->nr < __i2c_first_dynamic_bus_num) i2c_scan_static_board_info(adap); @@ -1466,6 +1467,7 @@ void i2c_del_adapter(struct i2c_adapter *adap) return; } + acpi_i2c_remove_space_handler(adap); /* Tell drivers about this removal */ mutex_lock(&core_lock); bus_for_each_drv(&i2c_bus_type, NULL, adap, diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 358c01b971db..40718e91e171 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -364,6 +364,17 @@ extern bool osc_sb_apei_support_acked; #define OSC_PCI_EXPRESS_CAPABILITY_CONTROL 0x00000010 #define OSC_PCI_CONTROL_MASKS 0x0000001f +#define ACPI_GSB_ACCESS_ATTRIB_QUICK 0x00000002 +#define ACPI_GSB_ACCESS_ATTRIB_SEND_RCV 0x00000004 +#define ACPI_GSB_ACCESS_ATTRIB_BYTE 0x00000006 +#define ACPI_GSB_ACCESS_ATTRIB_WORD 0x00000008 +#define ACPI_GSB_ACCESS_ATTRIB_BLOCK 0x0000000A +#define ACPI_GSB_ACCESS_ATTRIB_MULTIBYTE 0x0000000B +#define ACPI_GSB_ACCESS_ATTRIB_WORD_CALL 0x0000000C +#define ACPI_GSB_ACCESS_ATTRIB_BLOCK_CALL 0x0000000D +#define ACPI_GSB_ACCESS_ATTRIB_RAW_BYTES 0x0000000E +#define ACPI_GSB_ACCESS_ATTRIB_RAW_PROCESS 0x0000000F + extern acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 *mask, u32 req); diff --git a/include/linux/i2c.h b/include/linux/i2c.h index b556e0ab946f..f7a939a2cb56 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -577,4 +577,14 @@ static inline struct i2c_adapter *of_find_i2c_adapter_by_node(struct device_node } #endif /* CONFIG_OF */ +#ifdef CONFIG_ACPI +int acpi_i2c_install_space_handler(struct i2c_adapter *adapter); +void acpi_i2c_remove_space_handler(struct i2c_adapter *adapter); +#else +static inline void acpi_i2c_remove_space_handler(struct i2c_adapter *adapter) +{ } +static inline int acpi_i2c_install_space_handler(struct i2c_adapter *adapter) +{ return 0; } +#endif + #endif /* _LINUX_I2C_H */ -- cgit v1.2.3 From da3c6647ee08711c7edc28d7fea4ad69fc5ffcca Mon Sep 17 00:00:00 2001 From: Lan Tianyu Date: Tue, 20 May 2014 20:59:24 +0800 Subject: I2C/ACPI: Clean up I2C ACPI code and Add CONFIG_I2C_ACPI config Clean up ACPI related code in the i2c core and add CONFIG_I2C_ACPI to enable I2C ACPI code. Current there is a race between removing I2C ACPI operation region and ACPI AML code accessing. So make i2c core built-in if CONFIG_I2C_ACPI is set. Reviewed-by: Mika Westerberg Signed-off-by: Lan Tianyu Signed-off-by: Wolfram Sang --- drivers/i2c/Kconfig | 18 +++++++++- drivers/i2c/Makefile | 2 +- drivers/i2c/i2c-acpi.c | 89 ++++++++++++++++++++++++++++++++++++++++++++++ drivers/i2c/i2c-core.c | 95 -------------------------------------------------- include/linux/i2c.h | 4 ++- 5 files changed, 110 insertions(+), 98 deletions(-) (limited to 'include/linux') diff --git a/drivers/i2c/Kconfig b/drivers/i2c/Kconfig index 7b7ea320a258..3e3b680dc007 100644 --- a/drivers/i2c/Kconfig +++ b/drivers/i2c/Kconfig @@ -2,7 +2,9 @@ # I2C subsystem configuration # -menuconfig I2C +menu "I2C support" + +config I2C tristate "I2C support" select RT_MUTEXES ---help--- @@ -21,6 +23,18 @@ menuconfig I2C This I2C support can also be built as a module. If so, the module will be called i2c-core. +config I2C_ACPI + bool "I2C ACPI support" + select I2C + depends on ACPI + default y + help + Say Y here if you want to enable ACPI I2C support. This includes support + for automatic enumeration of I2C slave devices and support for ACPI I2C + Operation Regions. Operation Regions allow firmware (BIOS) code to + access I2C slave devices, such as smart batteries through an I2C host + controller driver. + if I2C config I2C_BOARDINFO @@ -124,3 +138,5 @@ config I2C_DEBUG_BUS on. endif # I2C + +endmenu diff --git a/drivers/i2c/Makefile b/drivers/i2c/Makefile index 80db3073aa84..a1f590cbb435 100644 --- a/drivers/i2c/Makefile +++ b/drivers/i2c/Makefile @@ -3,7 +3,7 @@ # i2ccore-y := i2c-core.o -i2ccore-$(CONFIG_ACPI) += i2c-acpi.o +i2ccore-$(CONFIG_I2C_ACPI) += i2c-acpi.o obj-$(CONFIG_I2C_BOARDINFO) += i2c-boardinfo.o obj-$(CONFIG_I2C) += i2ccore.o diff --git a/drivers/i2c/i2c-acpi.c b/drivers/i2c/i2c-acpi.c index f7f4c89c09b3..e8b61967334b 100644 --- a/drivers/i2c/i2c-acpi.c +++ b/drivers/i2c/i2c-acpi.c @@ -37,6 +37,95 @@ struct gsb_buffer { }; } __packed; +static int acpi_i2c_add_resource(struct acpi_resource *ares, void *data) +{ + struct i2c_board_info *info = data; + + if (ares->type == ACPI_RESOURCE_TYPE_SERIAL_BUS) { + struct acpi_resource_i2c_serialbus *sb; + + sb = &ares->data.i2c_serial_bus; + if (sb->type == ACPI_RESOURCE_SERIAL_TYPE_I2C) { + info->addr = sb->slave_address; + if (sb->access_mode == ACPI_I2C_10BIT_MODE) + info->flags |= I2C_CLIENT_TEN; + } + } else if (info->irq < 0) { + struct resource r; + + if (acpi_dev_resource_interrupt(ares, 0, &r)) + info->irq = r.start; + } + + /* Tell the ACPI core to skip this resource */ + return 1; +} + +static acpi_status acpi_i2c_add_device(acpi_handle handle, u32 level, + void *data, void **return_value) +{ + struct i2c_adapter *adapter = data; + struct list_head resource_list; + struct i2c_board_info info; + struct acpi_device *adev; + int ret; + + if (acpi_bus_get_device(handle, &adev)) + return AE_OK; + if (acpi_bus_get_status(adev) || !adev->status.present) + return AE_OK; + + memset(&info, 0, sizeof(info)); + info.acpi_node.companion = adev; + info.irq = -1; + + INIT_LIST_HEAD(&resource_list); + ret = acpi_dev_get_resources(adev, &resource_list, + acpi_i2c_add_resource, &info); + acpi_dev_free_resource_list(&resource_list); + + if (ret < 0 || !info.addr) + return AE_OK; + + adev->power.flags.ignore_parent = true; + strlcpy(info.type, dev_name(&adev->dev), sizeof(info.type)); + if (!i2c_new_device(adapter, &info)) { + adev->power.flags.ignore_parent = false; + dev_err(&adapter->dev, + "failed to add I2C device %s from ACPI\n", + dev_name(&adev->dev)); + } + + return AE_OK; +} + +/** + * acpi_i2c_register_devices - enumerate I2C slave devices behind adapter + * @adap: pointer to adapter + * + * Enumerate all I2C slave devices behind this adapter by walking the ACPI + * namespace. When a device is found it will be added to the Linux device + * model and bound to the corresponding ACPI handle. + */ +void acpi_i2c_register_devices(struct i2c_adapter *adap) +{ + acpi_handle handle; + acpi_status status; + + if (!adap->dev.parent) + return; + + handle = ACPI_HANDLE(adap->dev.parent); + if (!handle) + return; + + status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, 1, + acpi_i2c_add_device, NULL, + adap, NULL); + if (ACPI_FAILURE(status)) + dev_warn(&adap->dev, "failed to enumerate I2C slaves\n"); +} + static int acpi_gsb_i2c_read_bytes(struct i2c_client *client, u8 cmd, u8 *data, u8 data_len) { diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c index e25cb84cb297..4ccff114b147 100644 --- a/drivers/i2c/i2c-core.c +++ b/drivers/i2c/i2c-core.c @@ -1092,101 +1092,6 @@ EXPORT_SYMBOL(of_find_i2c_adapter_by_node); static void of_i2c_register_devices(struct i2c_adapter *adap) { } #endif /* CONFIG_OF */ -/* ACPI support code */ - -#if IS_ENABLED(CONFIG_ACPI) -static int acpi_i2c_add_resource(struct acpi_resource *ares, void *data) -{ - struct i2c_board_info *info = data; - - if (ares->type == ACPI_RESOURCE_TYPE_SERIAL_BUS) { - struct acpi_resource_i2c_serialbus *sb; - - sb = &ares->data.i2c_serial_bus; - if (sb->type == ACPI_RESOURCE_SERIAL_TYPE_I2C) { - info->addr = sb->slave_address; - if (sb->access_mode == ACPI_I2C_10BIT_MODE) - info->flags |= I2C_CLIENT_TEN; - } - } else if (info->irq < 0) { - struct resource r; - - if (acpi_dev_resource_interrupt(ares, 0, &r)) - info->irq = r.start; - } - - /* Tell the ACPI core to skip this resource */ - return 1; -} - -static acpi_status acpi_i2c_add_device(acpi_handle handle, u32 level, - void *data, void **return_value) -{ - struct i2c_adapter *adapter = data; - struct list_head resource_list; - struct i2c_board_info info; - struct acpi_device *adev; - int ret; - - if (acpi_bus_get_device(handle, &adev)) - return AE_OK; - if (acpi_bus_get_status(adev) || !adev->status.present) - return AE_OK; - - memset(&info, 0, sizeof(info)); - info.acpi_node.companion = adev; - info.irq = -1; - - INIT_LIST_HEAD(&resource_list); - ret = acpi_dev_get_resources(adev, &resource_list, - acpi_i2c_add_resource, &info); - acpi_dev_free_resource_list(&resource_list); - - if (ret < 0 || !info.addr) - return AE_OK; - - adev->power.flags.ignore_parent = true; - strlcpy(info.type, dev_name(&adev->dev), sizeof(info.type)); - if (!i2c_new_device(adapter, &info)) { - adev->power.flags.ignore_parent = false; - dev_err(&adapter->dev, - "failed to add I2C device %s from ACPI\n", - dev_name(&adev->dev)); - } - - return AE_OK; -} - -/** - * acpi_i2c_register_devices - enumerate I2C slave devices behind adapter - * @adap: pointer to adapter - * - * Enumerate all I2C slave devices behind this adapter by walking the ACPI - * namespace. When a device is found it will be added to the Linux device - * model and bound to the corresponding ACPI handle. - */ -static void acpi_i2c_register_devices(struct i2c_adapter *adap) -{ - acpi_handle handle; - acpi_status status; - - if (!adap->dev.parent) - return; - - handle = ACPI_HANDLE(adap->dev.parent); - if (!handle) - return; - - status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, 1, - acpi_i2c_add_device, NULL, - adap, NULL); - if (ACPI_FAILURE(status)) - dev_warn(&adap->dev, "failed to enumerate I2C slaves\n"); -} -#else -static inline void acpi_i2c_register_devices(struct i2c_adapter *adap) {} -#endif /* CONFIG_ACPI */ - static int i2c_do_add_adapter(struct i2c_driver *driver, struct i2c_adapter *adap) { diff --git a/include/linux/i2c.h b/include/linux/i2c.h index f7a939a2cb56..ea507665896c 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -577,10 +577,12 @@ static inline struct i2c_adapter *of_find_i2c_adapter_by_node(struct device_node } #endif /* CONFIG_OF */ -#ifdef CONFIG_ACPI +#ifdef CONFIG_I2C_ACPI int acpi_i2c_install_space_handler(struct i2c_adapter *adapter); void acpi_i2c_remove_space_handler(struct i2c_adapter *adapter); +void acpi_i2c_register_devices(struct i2c_adapter *adap); #else +static inline void acpi_i2c_register_devices(struct i2c_adapter *adap) { } static inline void acpi_i2c_remove_space_handler(struct i2c_adapter *adapter) { } static inline int acpi_i2c_install_space_handler(struct i2c_adapter *adapter) -- cgit v1.2.3 From 981409b25e2a99409b26daa67293ca1cfd5ea0a0 Mon Sep 17 00:00:00 2001 From: Archit Taneja Date: Fri, 16 Nov 2012 14:46:04 +0530 Subject: fbdev: arm has __raw I/O accessors, use them in fb.h This removes the sparse warnings on arm platforms: warning: cast removes address space of expression Signed-off-by: Archit Taneja Signed-off-by: Tomi Valkeinen Cc: H Hartley Sweeten Cc: Alexander Shiyan Cc: Russell King --- include/linux/fb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fb.h b/include/linux/fb.h index b6bfda99add3..09bb7a18d287 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -553,7 +553,7 @@ static inline struct apertures_struct *alloc_apertures(unsigned int max_num) { #define fb_memcpy_fromfb sbus_memcpy_fromio #define fb_memcpy_tofb sbus_memcpy_toio -#elif defined(__i386__) || defined(__alpha__) || defined(__x86_64__) || defined(__hppa__) || defined(__sh__) || defined(__powerpc__) || defined(__avr32__) || defined(__bfin__) +#elif defined(__i386__) || defined(__alpha__) || defined(__x86_64__) || defined(__hppa__) || defined(__sh__) || defined(__powerpc__) || defined(__avr32__) || defined(__bfin__) || defined(__arm__) #define fb_readb __raw_readb #define fb_readw __raw_readw -- cgit v1.2.3 From 780db2071ac4d167ee4154ad9c96088f1bba044b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 1 Jul 2014 10:31:13 -0600 Subject: blk-mq: decouble blk-mq freezing from generic bypassing blk_mq freezing is entangled with generic bypassing which bypasses blkcg and io scheduler and lets IO requests fall through the block layer to the drivers in FIFO order. This allows forward progress on IOs with the advanced features disabled so that those features can be configured or altered without worrying about stalling IO which may lead to deadlock through memory allocation. However, generic bypassing doesn't quite fit blk-mq. blk-mq currently doesn't make use of blkcg or ioscheds and it maps bypssing to freezing, which blocks request processing and drains all the in-flight ones. This causes problems as bypassing assumes that request processing is online. blk-mq works around this by conditionally allowing request processing for the problem case - during queue initialization. Another weirdity is that except for during queue cleanup, bypassing started on the generic side prevents blk-mq from processing new requests but doesn't drain the in-flight ones. This shouldn't break anything but again highlights that something isn't quite right here. The root cause is conflating blk-mq freezing and generic bypassing which are two different mechanisms. The only intersecting purpose that they serve is during queue cleanup. Let's properly separate blk-mq freezing from generic bypassing and simply use it where necessary. * request_queue->mq_freeze_depth is added and blk_mq_[un]freeze_queue() now operate on this counter instead of ->bypass_depth. The replacement for QUEUE_FLAG_BYPASS isn't added but the counter is tested directly. This will be further updated by later changes. * blk_mq_drain_queue() is dropped and "__" prefix is dropped from blk_mq_freeze_queue(). Queue cleanup path now calls blk_mq_freeze_queue() directly. * blk_queue_enter()'s fast path condition is simplified to simply check @q->mq_freeze_depth. Previously, the condition was !blk_queue_dying(q) && (!blk_queue_bypass(q) || !blk_queue_init_done(q)) mq_freeze_depth is incremented right after dying is set and blk_queue_init_done() exception isn't necessary as blk-mq doesn't start frozen, which only leaves the blk_queue_bypass() test which can be replaced by @q->mq_freeze_depth test. This change simplifies the code and reduces confusion in the area. Signed-off-by: Tejun Heo Cc: Jens Axboe Cc: Nicholas A. Bellinger Signed-off-by: Jens Axboe --- block/blk-core.c | 2 +- block/blk-mq.c | 17 ++++++----------- block/blk-mq.h | 2 +- include/linux/blkdev.h | 1 + 4 files changed, 9 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/block/blk-core.c b/block/blk-core.c index 0d0bdd65b2d7..c359d72e9d76 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -514,7 +514,7 @@ void blk_cleanup_queue(struct request_queue *q) * prevent that q->request_fn() gets invoked after draining finished. */ if (q->mq_ops) { - blk_mq_drain_queue(q); + blk_mq_freeze_queue(q); spin_lock_irq(lock); } else { spin_lock_irq(lock); diff --git a/block/blk-mq.c b/block/blk-mq.c index f4bdddd7ed99..1e324a123d40 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -84,15 +84,14 @@ static int blk_mq_queue_enter(struct request_queue *q) smp_mb(); /* we have problems freezing the queue if it's initializing */ - if (!blk_queue_dying(q) && - (!blk_queue_bypass(q) || !blk_queue_init_done(q))) + if (!q->mq_freeze_depth) return 0; __percpu_counter_add(&q->mq_usage_counter, -1, 1000000); spin_lock_irq(q->queue_lock); ret = wait_event_interruptible_lock_irq(q->mq_freeze_wq, - !blk_queue_bypass(q) || blk_queue_dying(q), + !q->mq_freeze_depth || blk_queue_dying(q), *q->queue_lock); /* inc usage with lock hold to avoid freeze_queue runs here */ if (!ret && !blk_queue_dying(q)) @@ -129,11 +128,10 @@ void blk_mq_drain_queue(struct request_queue *q) * Guarantee no request is in use, so we can change any data structure of * the queue afterward. */ -static void blk_mq_freeze_queue(struct request_queue *q) +void blk_mq_freeze_queue(struct request_queue *q) { spin_lock_irq(q->queue_lock); - q->bypass_depth++; - queue_flag_set(QUEUE_FLAG_BYPASS, q); + q->mq_freeze_depth++; spin_unlock_irq(q->queue_lock); blk_mq_drain_queue(q); @@ -144,11 +142,8 @@ static void blk_mq_unfreeze_queue(struct request_queue *q) bool wake = false; spin_lock_irq(q->queue_lock); - if (!--q->bypass_depth) { - queue_flag_clear(QUEUE_FLAG_BYPASS, q); - wake = true; - } - WARN_ON_ONCE(q->bypass_depth < 0); + wake = !--q->mq_freeze_depth; + WARN_ON_ONCE(q->mq_freeze_depth < 0); spin_unlock_irq(q->queue_lock); if (wake) wake_up_all(&q->mq_freeze_wq); diff --git a/block/blk-mq.h b/block/blk-mq.h index 26460884c6cd..ca4964a6295d 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -28,7 +28,7 @@ struct blk_mq_ctx { void __blk_mq_complete_request(struct request *rq); void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); void blk_mq_init_flush(struct request_queue *q); -void blk_mq_drain_queue(struct request_queue *q); +void blk_mq_freeze_queue(struct request_queue *q); void blk_mq_free_queue(struct request_queue *q); void blk_mq_clone_flush_request(struct request *flush_rq, struct request *orig_rq); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 8699bcf5f099..c8f344ff74fe 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -470,6 +470,7 @@ struct request_queue { struct mutex sysfs_lock; int bypass_depth; + int mq_freeze_depth; #if defined(CONFIG_BLK_DEV_BSG) bsg_job_fn *bsg_job_fn; -- cgit v1.2.3 From add703fda981b9719d37f371498b9f129acbd997 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 1 Jul 2014 10:34:38 -0600 Subject: blk-mq: use percpu_ref for mq usage count Currently, blk-mq uses a percpu_counter to keep track of how many usages are in flight. The percpu_counter is drained while freezing to ensure that no usage is left in-flight after freezing is complete. blk_mq_queue_enter/exit() and blk_mq_[un]freeze_queue() implement this per-cpu gating mechanism. This type of code has relatively high chance of subtle bugs which are extremely difficult to trigger and it's way too hairy to be open coded in blk-mq. percpu_ref can serve the same purpose after the recent changes. This patch replaces the open-coded per-cpu usage counting and draining mechanism with percpu_ref. blk_mq_queue_enter() performs tryget_live on the ref and exit() performs put. blk_mq_freeze_queue() kills the ref and waits until the reference count reaches zero. blk_mq_unfreeze_queue() revives the ref and wakes up the waiters. Signed-off-by: Tejun Heo Cc: Jens Axboe Cc: Nicholas A. Bellinger Cc: Kent Overstreet Signed-off-by: Jens Axboe --- block/blk-mq.c | 68 +++++++++++++++++++++----------------------------- include/linux/blkdev.h | 3 ++- 2 files changed, 31 insertions(+), 40 deletions(-) (limited to 'include/linux') diff --git a/block/blk-mq.c b/block/blk-mq.c index 22682fb4be65..5189cb1e478a 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -78,34 +78,32 @@ static void blk_mq_hctx_clear_pending(struct blk_mq_hw_ctx *hctx, static int blk_mq_queue_enter(struct request_queue *q) { - int ret; - - __percpu_counter_add(&q->mq_usage_counter, 1, 1000000); - smp_mb(); - - /* we have problems freezing the queue if it's initializing */ - if (!q->mq_freeze_depth) - return 0; - - __percpu_counter_add(&q->mq_usage_counter, -1, 1000000); + while (true) { + int ret; - spin_lock_irq(q->queue_lock); - ret = wait_event_interruptible_lock_irq(q->mq_freeze_wq, - !q->mq_freeze_depth || blk_queue_dying(q), - *q->queue_lock); - /* inc usage with lock hold to avoid freeze_queue runs here */ - if (!ret && !blk_queue_dying(q)) - __percpu_counter_add(&q->mq_usage_counter, 1, 1000000); - else if (blk_queue_dying(q)) - ret = -ENODEV; - spin_unlock_irq(q->queue_lock); + if (percpu_ref_tryget_live(&q->mq_usage_counter)) + return 0; - return ret; + ret = wait_event_interruptible(q->mq_freeze_wq, + !q->mq_freeze_depth || blk_queue_dying(q)); + if (blk_queue_dying(q)) + return -ENODEV; + if (ret) + return ret; + } } static void blk_mq_queue_exit(struct request_queue *q) { - __percpu_counter_add(&q->mq_usage_counter, -1, 1000000); + percpu_ref_put(&q->mq_usage_counter); +} + +static void blk_mq_usage_counter_release(struct percpu_ref *ref) +{ + struct request_queue *q = + container_of(ref, struct request_queue, mq_usage_counter); + + wake_up_all(&q->mq_freeze_wq); } /* @@ -118,18 +116,9 @@ void blk_mq_freeze_queue(struct request_queue *q) q->mq_freeze_depth++; spin_unlock_irq(q->queue_lock); - while (true) { - s64 count; - - spin_lock_irq(q->queue_lock); - count = percpu_counter_sum(&q->mq_usage_counter); - spin_unlock_irq(q->queue_lock); - - if (count == 0) - break; - blk_mq_start_hw_queues(q); - msleep(10); - } + percpu_ref_kill(&q->mq_usage_counter); + blk_mq_run_queues(q, false); + wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->mq_usage_counter)); } static void blk_mq_unfreeze_queue(struct request_queue *q) @@ -140,8 +129,10 @@ static void blk_mq_unfreeze_queue(struct request_queue *q) wake = !--q->mq_freeze_depth; WARN_ON_ONCE(q->mq_freeze_depth < 0); spin_unlock_irq(q->queue_lock); - if (wake) + if (wake) { + percpu_ref_reinit(&q->mq_usage_counter); wake_up_all(&q->mq_freeze_wq); + } } bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx) @@ -1785,7 +1776,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) if (!q) goto err_hctxs; - if (percpu_counter_init(&q->mq_usage_counter, 0)) + if (percpu_ref_init(&q->mq_usage_counter, blk_mq_usage_counter_release)) goto err_map; setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q); @@ -1878,7 +1869,7 @@ void blk_mq_free_queue(struct request_queue *q) blk_mq_exit_hw_queues(q, set, set->nr_hw_queues); blk_mq_free_hw_queues(q, set); - percpu_counter_destroy(&q->mq_usage_counter); + percpu_ref_exit(&q->mq_usage_counter); free_percpu(q->queue_ctx); kfree(q->queue_hw_ctx); @@ -2037,8 +2028,7 @@ static int __init blk_mq_init(void) { blk_mq_cpu_init(); - /* Must be called after percpu_counter_hotcpu_callback() */ - hotcpu_notifier(blk_mq_queue_reinit_notify, -10); + hotcpu_notifier(blk_mq_queue_reinit_notify, 0); return 0; } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c8f344ff74fe..518b46555b80 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -21,6 +21,7 @@ #include #include #include +#include #include @@ -484,7 +485,7 @@ struct request_queue { #endif struct rcu_head rcu_head; wait_queue_head_t mq_freeze_wq; - struct percpu_counter mq_usage_counter; + struct percpu_ref mq_usage_counter; struct list_head all_q_node; struct blk_mq_tag_set *tag_set; -- cgit v1.2.3 From cbcd1054a1fd2aa980fc11ff28e436fc4aaa2d54 Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Tue, 1 Jul 2014 10:36:47 -0600 Subject: bio-integrity: add "bip_max_vcnt" into struct bio_integrity_payload Commit 08778795 ("block: Fix nr_vecs for inline integrity vectors") from Martin introduces the function bip_integrity_vecs(get the useful vectors) to fix the issue about nr_vecs for inline integrity vectors that reported by David Milburn. But it seems that bip_integrity_vecs() will return the wrong number if the bio is not based on any bio_set for some reason(bio->bi_pool == NULL), because in that case, the bip_inline_vecs[0] is malloced directly. So here we add the bip_max_vcnt to record the count of vector slots, and cleanup the function bip_integrity_vecs(). Signed-off-by: Gu Zheng Cc: Martin K. Petersen Cc: Kent Overstreet Signed-off-by: Jens Axboe --- block/bio-integrity.c | 12 +++--------- include/linux/bio.h | 1 + 2 files changed, 4 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/block/bio-integrity.c b/block/bio-integrity.c index 9e241063a616..bc423f7b02da 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -70,8 +70,10 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, bs->bvec_integrity_pool); if (!bip->bip_vec) goto err; + bip->bip_max_vcnt = bvec_nr_vecs(idx); } else { bip->bip_vec = bip->bip_inline_vecs; + bip->bip_max_vcnt = inline_vecs; } bip->bip_slab = idx; @@ -114,14 +116,6 @@ void bio_integrity_free(struct bio *bio) } EXPORT_SYMBOL(bio_integrity_free); -static inline unsigned int bip_integrity_vecs(struct bio_integrity_payload *bip) -{ - if (bip->bip_slab == BIO_POOL_NONE) - return BIP_INLINE_VECS; - - return bvec_nr_vecs(bip->bip_slab); -} - /** * bio_integrity_add_page - Attach integrity metadata * @bio: bio to update @@ -137,7 +131,7 @@ int bio_integrity_add_page(struct bio *bio, struct page *page, struct bio_integrity_payload *bip = bio->bi_integrity; struct bio_vec *iv; - if (bip->bip_vcnt >= bip_integrity_vecs(bip)) { + if (bip->bip_vcnt >= bip->bip_max_vcnt) { printk(KERN_ERR "%s: bip_vec full\n", __func__); return 0; } diff --git a/include/linux/bio.h b/include/linux/bio.h index d2633ee099d9..b39e5000ff58 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -308,6 +308,7 @@ struct bio_integrity_payload { unsigned short bip_slab; /* slab the bip came from */ unsigned short bip_vcnt; /* # of integrity bio_vecs */ + unsigned short bip_max_vcnt; /* integrity bio_vec slots */ unsigned bip_owns_buf:1; /* should free bip_buf */ struct work_struct bip_work; /* I/O completion */ -- cgit v1.2.3 From 8b37e1bef5a6b60e949e28a4db3006e4b00bd758 Mon Sep 17 00:00:00 2001 From: Vincent Donnefort Date: Sat, 14 Jun 2014 02:21:40 -0700 Subject: leds: convert blink timer to workqueue This patch converts the blink timer from led-core to workqueue which is more suitable for this kind of non-priority operations. Moreover, timer may lead to errors when a LED setting function use a scheduling function such as pinctrl which is using mutex. Signed-off-by: Vincent Donnefort Signed-off-by: Bryan Wu --- drivers/leds/led-class.c | 14 +++++++------- drivers/leds/led-core.c | 11 ++++++----- include/linux/leds.h | 3 +-- 3 files changed, 14 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c index aa29198fca3e..129729d35478 100644 --- a/drivers/leds/led-class.c +++ b/drivers/leds/led-class.c @@ -15,10 +15,10 @@ #include #include #include -#include #include #include #include +#include #include "leds.h" static struct class *leds_class; @@ -97,9 +97,10 @@ static const struct attribute_group *led_groups[] = { NULL, }; -static void led_timer_function(unsigned long data) +static void led_work_function(struct work_struct *ws) { - struct led_classdev *led_cdev = (void *)data; + struct led_classdev *led_cdev = + container_of(ws, struct led_classdev, blink_work.work); unsigned long brightness; unsigned long delay; @@ -143,7 +144,8 @@ static void led_timer_function(unsigned long data) } } - mod_timer(&led_cdev->blink_timer, jiffies + msecs_to_jiffies(delay)); + queue_delayed_work(system_wq, &led_cdev->blink_work, + msecs_to_jiffies(delay)); } static void set_brightness_delayed(struct work_struct *ws) @@ -231,9 +233,7 @@ int led_classdev_register(struct device *parent, struct led_classdev *led_cdev) INIT_WORK(&led_cdev->set_brightness_work, set_brightness_delayed); - init_timer(&led_cdev->blink_timer); - led_cdev->blink_timer.function = led_timer_function; - led_cdev->blink_timer.data = (unsigned long)led_cdev; + INIT_DELAYED_WORK(&led_cdev->blink_work, led_work_function); #ifdef CONFIG_LEDS_TRIGGERS led_trigger_set_default(led_cdev); diff --git a/drivers/leds/led-core.c b/drivers/leds/led-core.c index 71b40d3bf776..4bb116867b88 100644 --- a/drivers/leds/led-core.c +++ b/drivers/leds/led-core.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "leds.h" DECLARE_RWSEM(leds_list_lock); @@ -51,7 +52,7 @@ static void led_set_software_blink(struct led_classdev *led_cdev, return; } - mod_timer(&led_cdev->blink_timer, jiffies + 1); + queue_delayed_work(system_wq, &led_cdev->blink_work, 1); } @@ -75,7 +76,7 @@ void led_blink_set(struct led_classdev *led_cdev, unsigned long *delay_on, unsigned long *delay_off) { - del_timer_sync(&led_cdev->blink_timer); + cancel_delayed_work_sync(&led_cdev->blink_work); led_cdev->flags &= ~LED_BLINK_ONESHOT; led_cdev->flags &= ~LED_BLINK_ONESHOT_STOP; @@ -90,7 +91,7 @@ void led_blink_set_oneshot(struct led_classdev *led_cdev, int invert) { if ((led_cdev->flags & LED_BLINK_ONESHOT) && - timer_pending(&led_cdev->blink_timer)) + delayed_work_pending(&led_cdev->blink_work)) return; led_cdev->flags |= LED_BLINK_ONESHOT; @@ -107,7 +108,7 @@ EXPORT_SYMBOL(led_blink_set_oneshot); void led_stop_software_blink(struct led_classdev *led_cdev) { - del_timer_sync(&led_cdev->blink_timer); + cancel_delayed_work_sync(&led_cdev->blink_work); led_cdev->blink_delay_on = 0; led_cdev->blink_delay_off = 0; } @@ -116,7 +117,7 @@ EXPORT_SYMBOL_GPL(led_stop_software_blink); void led_set_brightness(struct led_classdev *led_cdev, enum led_brightness brightness) { - /* delay brightness setting if need to stop soft-blink timer */ + /* delay brightness setting if need to stop soft-blink work */ if (led_cdev->blink_delay_on || led_cdev->blink_delay_off) { led_cdev->delayed_set_value = brightness; schedule_work(&led_cdev->set_brightness_work); diff --git a/include/linux/leds.h b/include/linux/leds.h index e43686472197..6a599dce7f9d 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -15,7 +15,6 @@ #include #include #include -#include #include struct device; @@ -69,7 +68,7 @@ struct led_classdev { const char *default_trigger; /* Trigger to use */ unsigned long blink_delay_on, blink_delay_off; - struct timer_list blink_timer; + struct delayed_work blink_work; int blink_brightness; struct work_struct set_brightness_work; -- cgit v1.2.3 From 75f353b61342b5847c7f6d8499fd6301dce09845 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Tue, 24 Jun 2014 16:13:47 +0100 Subject: of/platform: Fix of_platform_device_destroy iteration of devices of_platform_destroy does not work properly, since the tree population test was iterating on all devices having as its parent the given platform device. The check was intended to check whether any other platform or amba devices created by of_platform_populate were still populated, but instead checked for every kind of device. This is wrong, since platform devices typically create a subsystem regular device and set themselves as parents. Instead, go ahead and call the unregister functions for any devices created with of_platform_populate. The driver core will take care of unbinding drivers, and drivers are responsible for getting rid of any child devices that weren't created by of_platform_populate. Signed-off-by: Grant Likely Signed-off-by: Pantelis Antoniou --- drivers/of/platform.c | 32 +++++++++----------------------- include/linux/of.h | 1 + include/linux/of_platform.h | 7 ++----- 3 files changed, 12 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/drivers/of/platform.c b/drivers/of/platform.c index 500436f9be7f..0197725e033a 100644 --- a/drivers/of/platform.c +++ b/drivers/of/platform.c @@ -422,6 +422,7 @@ static int of_platform_bus_create(struct device_node *bus, break; } } + of_node_set_flag(bus, OF_POPULATED_BUS); return rc; } @@ -508,19 +509,13 @@ EXPORT_SYMBOL_GPL(of_platform_populate); static int of_platform_device_destroy(struct device *dev, void *data) { - bool *children_left = data; - /* Do not touch devices not populated from the device tree */ - if (!dev->of_node || !of_node_check_flag(dev->of_node, OF_POPULATED)) { - *children_left = true; + if (!dev->of_node || !of_node_check_flag(dev->of_node, OF_POPULATED)) return 0; - } - /* Recurse, but don't touch this device if it has any children left */ - if (of_platform_depopulate(dev) != 0) { - *children_left = true; - return 0; - } + /* Recurse for any nodes that were treated as busses */ + if (of_node_check_flag(dev->of_node, OF_POPULATED_BUS)) + device_for_each_child(dev, NULL, of_platform_device_destroy); if (dev->bus == &platform_bus_type) platform_device_unregister(to_platform_device(dev)); @@ -528,19 +523,15 @@ static int of_platform_device_destroy(struct device *dev, void *data) else if (dev->bus == &amba_bustype) amba_device_unregister(to_amba_device(dev)); #endif - else { - *children_left = true; - return 0; - } of_node_clear_flag(dev->of_node, OF_POPULATED); - + of_node_clear_flag(dev->of_node, OF_POPULATED_BUS); return 0; } /** * of_platform_depopulate() - Remove devices populated from device tree - * @parent: device which childred will be removed + * @parent: device which children will be removed * * Complementary to of_platform_populate(), this function removes children * of the given device (and, recurrently, their children) that have been @@ -550,14 +541,9 @@ static int of_platform_device_destroy(struct device *dev, void *data) * Returns 0 when all children devices have been removed or * -EBUSY when some children remained. */ -int of_platform_depopulate(struct device *parent) +void of_platform_depopulate(struct device *parent) { - bool children_left = false; - - device_for_each_child(parent, &children_left, - of_platform_device_destroy); - - return children_left ? -EBUSY : 0; + device_for_each_child(parent, NULL, of_platform_device_destroy); } EXPORT_SYMBOL_GPL(of_platform_depopulate); diff --git a/include/linux/of.h b/include/linux/of.h index 196b34c1ef4e..abf829a1f150 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -204,6 +204,7 @@ static inline unsigned long of_read_ulong(const __be32 *cell, int size) #define OF_DYNAMIC 1 /* node and properties were allocated via kmalloc */ #define OF_DETACHED 2 /* node has been detached from the device tree */ #define OF_POPULATED 3 /* device already created for the node */ +#define OF_POPULATED_BUS 4 /* of_platform_populate recursed to children of this node */ #define OF_IS_DYNAMIC(x) test_bit(OF_DYNAMIC, &x->_flags) #define OF_MARK_DYNAMIC(x) set_bit(OF_DYNAMIC, &x->_flags) diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h index d96e1badbee0..c2b0627a2317 100644 --- a/include/linux/of_platform.h +++ b/include/linux/of_platform.h @@ -72,7 +72,7 @@ extern int of_platform_populate(struct device_node *root, const struct of_device_id *matches, const struct of_dev_auxdata *lookup, struct device *parent); -extern int of_platform_depopulate(struct device *parent); +extern void of_platform_depopulate(struct device *parent); #else static inline int of_platform_populate(struct device_node *root, const struct of_device_id *matches, @@ -81,10 +81,7 @@ static inline int of_platform_populate(struct device_node *root, { return -ENODEV; } -static inline int of_platform_depopulate(struct device *parent) -{ - return -ENODEV; -} +static inline void of_platform_depopulate(struct device *parent) { } #endif #endif /* _LINUX_OF_PLATFORM_H */ -- cgit v1.2.3 From 1d0326b13bc9ecab5c784415165e6f78fb06ae5b Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 20 Jun 2014 14:14:41 +0400 Subject: libceph: rename ceph_osd_request::r_linger_osd to r_linger_osd_item So that: req->r_osd_item --> osd->o_requests list req->r_linger_osd_item --> osd->o_linger_requests list Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- include/linux/ceph/osd_client.h | 2 +- net/ceph/osd_client.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 94ec69672164..7490a03ac163 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -117,7 +117,7 @@ struct ceph_osd_request { struct list_head r_req_lru_item; struct list_head r_osd_item; struct list_head r_linger_item; - struct list_head r_linger_osd; + struct list_head r_linger_osd_item; struct ceph_osd *r_osd; struct ceph_pg r_pgid; int r_pg_osds[CEPH_PG_MAX_SIZE]; diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 05be0c181695..d5d2be3bd113 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -364,7 +364,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, RB_CLEAR_NODE(&req->r_node); INIT_LIST_HEAD(&req->r_unsafe_item); INIT_LIST_HEAD(&req->r_linger_item); - INIT_LIST_HEAD(&req->r_linger_osd); + INIT_LIST_HEAD(&req->r_linger_osd_item); INIT_LIST_HEAD(&req->r_req_lru_item); INIT_LIST_HEAD(&req->r_osd_item); @@ -916,7 +916,7 @@ static void __kick_osd_requests(struct ceph_osd_client *osdc, * list at the end to keep things in tid order. */ list_for_each_entry_safe(req, nreq, &osd->o_linger_requests, - r_linger_osd) { + r_linger_osd_item) { /* * reregister request prior to unregistering linger so * that r_osd is preserved. @@ -1218,7 +1218,7 @@ static void __register_linger_request(struct ceph_osd_client *osdc, ceph_osdc_get_request(req); list_add_tail(&req->r_linger_item, &osdc->req_linger); if (req->r_osd) - list_add_tail(&req->r_linger_osd, + list_add_tail(&req->r_linger_osd_item, &req->r_osd->o_linger_requests); } @@ -1228,7 +1228,7 @@ static void __unregister_linger_request(struct ceph_osd_client *osdc, dout("__unregister_linger_request %p\n", req); list_del_init(&req->r_linger_item); if (req->r_osd) { - list_del_init(&req->r_linger_osd); + list_del_init(&req->r_linger_osd_item); if (list_empty(&req->r_osd->o_requests) && list_empty(&req->r_osd->o_linger_requests)) { -- cgit v1.2.3 From 0215e44bb390a968d01404aa2f35af56f9b55fc8 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 20 Jun 2014 14:14:41 +0400 Subject: libceph: move and add dout()s to ceph_msg_{get,put}() Add dout()s to ceph_msg_{get,put}(). Also move them to .c and turn kref release callback into a static function. Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- include/linux/ceph/messenger.h | 14 ++------------ net/ceph/messenger.c | 31 ++++++++++++++++++++++--------- 2 files changed, 24 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index d21f2dba0731..40ae58e3e9db 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -285,19 +285,9 @@ extern void ceph_msg_data_add_bio(struct ceph_msg *msg, struct bio *bio, extern struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags, bool can_fail); -extern void ceph_msg_kfree(struct ceph_msg *m); - -static inline struct ceph_msg *ceph_msg_get(struct ceph_msg *msg) -{ - kref_get(&msg->kref); - return msg; -} -extern void ceph_msg_last_put(struct kref *kref); -static inline void ceph_msg_put(struct ceph_msg *msg) -{ - kref_put(&msg->kref, ceph_msg_last_put); -} +extern struct ceph_msg *ceph_msg_get(struct ceph_msg *msg); +extern void ceph_msg_put(struct ceph_msg *msg); extern void ceph_msg_dump(struct ceph_msg *msg); diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 1948d592aa54..8bffa5b90fef 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -3269,24 +3269,21 @@ static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip) /* * Free a generically kmalloc'd message. */ -void ceph_msg_kfree(struct ceph_msg *m) +static void ceph_msg_free(struct ceph_msg *m) { - dout("msg_kfree %p\n", m); + dout("%s %p\n", __func__, m); ceph_kvfree(m->front.iov_base); kmem_cache_free(ceph_msg_cache, m); } -/* - * Drop a msg ref. Destroy as needed. - */ -void ceph_msg_last_put(struct kref *kref) +static void ceph_msg_release(struct kref *kref) { struct ceph_msg *m = container_of(kref, struct ceph_msg, kref); LIST_HEAD(data); struct list_head *links; struct list_head *next; - dout("ceph_msg_put last one on %p\n", m); + dout("%s %p\n", __func__, m); WARN_ON(!list_empty(&m->list_head)); /* drop middle, data, if any */ @@ -3308,9 +3305,25 @@ void ceph_msg_last_put(struct kref *kref) if (m->pool) ceph_msgpool_put(m->pool, m); else - ceph_msg_kfree(m); + ceph_msg_free(m); +} + +struct ceph_msg *ceph_msg_get(struct ceph_msg *msg) +{ + dout("%s %p (was %d)\n", __func__, msg, + atomic_read(&msg->kref.refcount)); + kref_get(&msg->kref); + return msg; +} +EXPORT_SYMBOL(ceph_msg_get); + +void ceph_msg_put(struct ceph_msg *msg) +{ + dout("%s %p (was %d)\n", __func__, msg, + atomic_read(&msg->kref.refcount)); + kref_put(&msg->kref, ceph_msg_release); } -EXPORT_SYMBOL(ceph_msg_last_put); +EXPORT_SYMBOL(ceph_msg_put); void ceph_msg_dump(struct ceph_msg *msg) { -- cgit v1.2.3 From 9e94af202afd961da39f82b55ba83edd4ad30e98 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 20 Jun 2014 14:14:42 +0400 Subject: libceph: move and add dout()s to ceph_osdc_request_{get,put}() Add dout()s to ceph_osdc_request_{get,put}(). Also move them to .c and turn kref release callback into a static function. Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- include/linux/ceph/osd_client.h | 11 ++--------- net/ceph/osd_client.c | 26 ++++++++++++++++++++++---- 2 files changed, 24 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 7490a03ac163..a8d5652f589d 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -328,15 +328,8 @@ extern void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc, extern void ceph_osdc_unregister_linger_request(struct ceph_osd_client *osdc, struct ceph_osd_request *req); -static inline void ceph_osdc_get_request(struct ceph_osd_request *req) -{ - kref_get(&req->r_kref); -} -extern void ceph_osdc_release_request(struct kref *kref); -static inline void ceph_osdc_put_request(struct ceph_osd_request *req) -{ - kref_put(&req->r_kref, ceph_osdc_release_request); -} +extern void ceph_osdc_get_request(struct ceph_osd_request *req); +extern void ceph_osdc_put_request(struct ceph_osd_request *req); extern int ceph_osdc_start_request(struct ceph_osd_client *osdc, struct ceph_osd_request *req, diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 6202923b41ff..7406046212dc 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -297,12 +297,15 @@ static void osd_req_op_data_release(struct ceph_osd_request *osd_req, /* * requests */ -void ceph_osdc_release_request(struct kref *kref) +static void ceph_osdc_release_request(struct kref *kref) { - struct ceph_osd_request *req; + struct ceph_osd_request *req = container_of(kref, + struct ceph_osd_request, r_kref); unsigned int which; - req = container_of(kref, struct ceph_osd_request, r_kref); + dout("%s %p (r_request %p r_reply %p)\n", __func__, req, + req->r_request, req->r_reply); + if (req->r_request) ceph_msg_put(req->r_request); if (req->r_reply) { @@ -320,7 +323,22 @@ void ceph_osdc_release_request(struct kref *kref) kmem_cache_free(ceph_osd_request_cache, req); } -EXPORT_SYMBOL(ceph_osdc_release_request); + +void ceph_osdc_get_request(struct ceph_osd_request *req) +{ + dout("%s %p (was %d)\n", __func__, req, + atomic_read(&req->r_kref.refcount)); + kref_get(&req->r_kref); +} +EXPORT_SYMBOL(ceph_osdc_get_request); + +void ceph_osdc_put_request(struct ceph_osd_request *req) +{ + dout("%s %p (was %d)\n", __func__, req, + atomic_read(&req->r_kref.refcount)); + kref_put(&req->r_kref, ceph_osdc_release_request); +} +EXPORT_SYMBOL(ceph_osdc_put_request); struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, struct ceph_snap_context *snapc, -- cgit v1.2.3 From c9f9b93ddfd76498fe36d9f550bd26533a4ee6bf Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 19 Jun 2014 11:38:13 +0400 Subject: libceph: introduce ceph_osdc_cancel_request() Introduce ceph_osdc_cancel_request() intended for canceling requests from the higher layers (rbd and cephfs). Because higher layers are in charge and are supposed to know what and when they are canceling, the request is not completed, only unref'ed and removed from the libceph data structures. __cancel_request() is no longer called before __unregister_request(), because __unregister_request() unconditionally revokes r_request and there is no point in trying to do it twice. Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- include/linux/ceph/osd_client.h | 1 + net/ceph/osd_client.c | 31 +++++++++++++++++++++++++------ 2 files changed, 26 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index a8d5652f589d..de09cad7b7c7 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -334,6 +334,7 @@ extern void ceph_osdc_put_request(struct ceph_osd_request *req); extern int ceph_osdc_start_request(struct ceph_osd_client *osdc, struct ceph_osd_request *req, bool nofail); +extern void ceph_osdc_cancel_request(struct ceph_osd_request *req); extern int ceph_osdc_wait_request(struct ceph_osd_client *osdc, struct ceph_osd_request *req); extern void ceph_osdc_sync(struct ceph_osd_client *osdc); diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 89d7d8861d80..6c1ccf5590a3 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -2467,6 +2467,25 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc, } EXPORT_SYMBOL(ceph_osdc_start_request); +/* + * Unregister a registered request. The request is not completed (i.e. + * no callbacks or wakeups) - higher layers are supposed to know what + * they are canceling. + */ +void ceph_osdc_cancel_request(struct ceph_osd_request *req) +{ + struct ceph_osd_client *osdc = req->r_osdc; + + mutex_lock(&osdc->request_mutex); + if (req->r_linger) + __unregister_linger_request(osdc, req); + __unregister_request(osdc, req); + mutex_unlock(&osdc->request_mutex); + + dout("%s %p tid %llu canceled\n", __func__, req, req->r_tid); +} +EXPORT_SYMBOL(ceph_osdc_cancel_request); + /* * wait for a request to complete */ @@ -2475,18 +2494,18 @@ int ceph_osdc_wait_request(struct ceph_osd_client *osdc, { int rc; + dout("%s %p tid %llu\n", __func__, req, req->r_tid); + rc = wait_for_completion_interruptible(&req->r_completion); if (rc < 0) { - mutex_lock(&osdc->request_mutex); - __cancel_request(req); - __unregister_request(osdc, req); - mutex_unlock(&osdc->request_mutex); + dout("%s %p tid %llu interrupted\n", __func__, req, req->r_tid); + ceph_osdc_cancel_request(req); complete_request(req); - dout("wait_request tid %llu canceled/timed out\n", req->r_tid); return rc; } - dout("wait_request tid %llu result %d\n", req->r_tid, req->r_result); + dout("%s %p tid %llu result %d\n", __func__, req, req->r_tid, + req->r_result); return req->r_result; } EXPORT_SYMBOL(ceph_osdc_wait_request); -- cgit v1.2.3 From 2d05f082cbc73b837011225b165d64d25b47c940 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 24 Jun 2014 16:21:45 +0400 Subject: libceph: nuke ceph_osdc_unregister_linger_request() Remove now unused ceph_osdc_unregister_linger_request(). Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- include/linux/ceph/osd_client.h | 2 -- net/ceph/osd_client.c | 10 ---------- 2 files changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index de09cad7b7c7..03aeb27fcc69 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -325,8 +325,6 @@ extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *, extern void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc, struct ceph_osd_request *req); -extern void ceph_osdc_unregister_linger_request(struct ceph_osd_client *osdc, - struct ceph_osd_request *req); extern void ceph_osdc_get_request(struct ceph_osd_request *req); extern void ceph_osdc_put_request(struct ceph_osd_request *req); diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 6c1ccf5590a3..30f6faf3584f 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1281,16 +1281,6 @@ static void __unregister_linger_request(struct ceph_osd_client *osdc, ceph_osdc_put_request(req); } -void ceph_osdc_unregister_linger_request(struct ceph_osd_client *osdc, - struct ceph_osd_request *req) -{ - mutex_lock(&osdc->request_mutex); - if (req->r_linger) - __unregister_linger_request(osdc, req); - mutex_unlock(&osdc->request_mutex); -} -EXPORT_SYMBOL(ceph_osdc_unregister_linger_request); - void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc, struct ceph_osd_request *req) { -- cgit v1.2.3 From 8471bb73ba10ed6788b4f1e9b8a0f9dc6bdb05b5 Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Wed, 21 May 2014 19:06:12 -0300 Subject: mtd: Introduce mtd_block_isreserved() In addition to mtd_block_isbad(), which checks if a block is bad or reserved, it's needed to check if a block is reserved only (but not bad). This commit adds an MTD interface for it, in a similar fashion to mtd_block_isbad(). While here, fix mtd_block_isbad() so the out-of-bounds checking is done before the callback check. Signed-off-by: Ezequiel Garcia Tested-by: Pekon Gupta Signed-off-by: Brian Norris --- drivers/mtd/mtdcore.c | 14 ++++++++++++-- drivers/mtd/mtdpart.c | 9 +++++++++ drivers/mtd/nand/nand_base.c | 18 ++++++++++++++++++ drivers/mtd/nand/nand_bbt.c | 14 ++++++++++++++ include/linux/mtd/mtd.h | 2 ++ include/linux/mtd/nand.h | 1 + 6 files changed, 56 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c index 11857faa0d96..e4831b4159db 100644 --- a/drivers/mtd/mtdcore.c +++ b/drivers/mtd/mtdcore.c @@ -1043,12 +1043,22 @@ int mtd_is_locked(struct mtd_info *mtd, loff_t ofs, uint64_t len) } EXPORT_SYMBOL_GPL(mtd_is_locked); -int mtd_block_isbad(struct mtd_info *mtd, loff_t ofs) +int mtd_block_isreserved(struct mtd_info *mtd, loff_t ofs) { - if (!mtd->_block_isbad) + if (ofs < 0 || ofs > mtd->size) + return -EINVAL; + if (!mtd->_block_isreserved) return 0; + return mtd->_block_isreserved(mtd, ofs); +} +EXPORT_SYMBOL_GPL(mtd_block_isreserved); + +int mtd_block_isbad(struct mtd_info *mtd, loff_t ofs) +{ if (ofs < 0 || ofs > mtd->size) return -EINVAL; + if (!mtd->_block_isbad) + return 0; return mtd->_block_isbad(mtd, ofs); } EXPORT_SYMBOL_GPL(mtd_block_isbad); diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c index 1ca9aec141ff..921e8c647884 100644 --- a/drivers/mtd/mtdpart.c +++ b/drivers/mtd/mtdpart.c @@ -290,6 +290,13 @@ static void part_resume(struct mtd_info *mtd) part->master->_resume(part->master); } +static int part_block_isreserved(struct mtd_info *mtd, loff_t ofs) +{ + struct mtd_part *part = PART(mtd); + ofs += part->offset; + return part->master->_block_isreserved(part->master, ofs); +} + static int part_block_isbad(struct mtd_info *mtd, loff_t ofs) { struct mtd_part *part = PART(mtd); @@ -422,6 +429,8 @@ static struct mtd_part *allocate_partition(struct mtd_info *master, slave->mtd._unlock = part_unlock; if (master->_is_locked) slave->mtd._is_locked = part_is_locked; + if (master->_block_isreserved) + slave->mtd._block_isreserved = part_block_isreserved; if (master->_block_isbad) slave->mtd._block_isbad = part_block_isbad; if (master->_block_markbad) diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index 41167e9e991e..0c505dd1f522 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -484,6 +484,23 @@ static int nand_check_wp(struct mtd_info *mtd) return (chip->read_byte(mtd) & NAND_STATUS_WP) ? 0 : 1; } +/** + * nand_block_checkbad - [GENERIC] Check if a block is marked bad + * @mtd: MTD device structure + * @ofs: offset from device start + * + * Check if the block is mark as reserved. + */ +static int nand_block_isreserved(struct mtd_info *mtd, loff_t ofs) +{ + struct nand_chip *chip = mtd->priv; + + if (!chip->bbt) + return 0; + /* Return info from the table */ + return nand_isreserved_bbt(mtd, ofs); +} + /** * nand_block_checkbad - [GENERIC] Check if a block is marked bad * @mtd: MTD device structure @@ -4111,6 +4128,7 @@ int nand_scan_tail(struct mtd_info *mtd) mtd->_unlock = NULL; mtd->_suspend = nand_suspend; mtd->_resume = nand_resume; + mtd->_block_isreserved = nand_block_isreserved; mtd->_block_isbad = nand_block_isbad; mtd->_block_markbad = nand_block_markbad; mtd->writebufsize = mtd->writesize; diff --git a/drivers/mtd/nand/nand_bbt.c b/drivers/mtd/nand/nand_bbt.c index 7f0c3b4c2a4f..443fa82cde6a 100644 --- a/drivers/mtd/nand/nand_bbt.c +++ b/drivers/mtd/nand/nand_bbt.c @@ -1310,6 +1310,20 @@ int nand_default_bbt(struct mtd_info *mtd) return nand_scan_bbt(mtd, this->badblock_pattern); } +/** + * nand_isreserved_bbt - [NAND Interface] Check if a block is reserved + * @mtd: MTD device structure + * @offs: offset in the device + */ +int nand_isreserved_bbt(struct mtd_info *mtd, loff_t offs) +{ + struct nand_chip *this = mtd->priv; + int block; + + block = (int)(offs >> this->bbt_erase_shift); + return bbt_get_entry(this, block) == BBT_BLOCK_RESERVED; +} + /** * nand_isbad_bbt - [NAND Interface] Check if a block is bad * @mtd: MTD device structure diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index a1b0b4c8fd79..031ff3a9a0bd 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -222,6 +222,7 @@ struct mtd_info { int (*_lock) (struct mtd_info *mtd, loff_t ofs, uint64_t len); int (*_unlock) (struct mtd_info *mtd, loff_t ofs, uint64_t len); int (*_is_locked) (struct mtd_info *mtd, loff_t ofs, uint64_t len); + int (*_block_isreserved) (struct mtd_info *mtd, loff_t ofs); int (*_block_isbad) (struct mtd_info *mtd, loff_t ofs); int (*_block_markbad) (struct mtd_info *mtd, loff_t ofs); int (*_suspend) (struct mtd_info *mtd); @@ -302,6 +303,7 @@ static inline void mtd_sync(struct mtd_info *mtd) int mtd_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len); int mtd_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len); int mtd_is_locked(struct mtd_info *mtd, loff_t ofs, uint64_t len); +int mtd_block_isreserved(struct mtd_info *mtd, loff_t ofs); int mtd_block_isbad(struct mtd_info *mtd, loff_t ofs); int mtd_block_markbad(struct mtd_info *mtd, loff_t ofs); diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 2f0af2891f0f..1cff329ae13d 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -810,6 +810,7 @@ extern struct nand_manufacturers nand_manuf_ids[]; extern int nand_scan_bbt(struct mtd_info *mtd, struct nand_bbt_descr *bd); extern int nand_default_bbt(struct mtd_info *mtd); extern int nand_markbad_bbt(struct mtd_info *mtd, loff_t offs); +extern int nand_isreserved_bbt(struct mtd_info *mtd, loff_t offs); extern int nand_isbad_bbt(struct mtd_info *mtd, loff_t offs, int allowbbt); extern int nand_erase_nand(struct mtd_info *mtd, struct erase_info *instr, int allowbbt); -- cgit v1.2.3 From 6b32fafee2bb5fcf0b3d3d04a9762d3a0212089e Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 20 Jun 2014 14:37:38 +0200 Subject: dmaengine: shdma: Add more register documentation Also add a few definitions that were missing. Signed-off-by: Geert Uytterhoeven Signed-off-by: Simon Horman --- arch/sh/include/asm/dma-register.h | 36 +++++++++++++++++++----------------- drivers/dma/sh/shdmac.c | 12 ++++++------ include/linux/sh_dma.h | 24 +++++++++++++----------- 3 files changed, 38 insertions(+), 34 deletions(-) (limited to 'include/linux') diff --git a/arch/sh/include/asm/dma-register.h b/arch/sh/include/asm/dma-register.h index 51cd78feacff..c757b47e6b64 100644 --- a/arch/sh/include/asm/dma-register.h +++ b/arch/sh/include/asm/dma-register.h @@ -13,17 +13,17 @@ #ifndef DMA_REGISTER_H #define DMA_REGISTER_H -/* DMA register */ -#define SAR 0x00 -#define DAR 0x04 -#define TCR 0x08 -#define CHCR 0x0C -#define DMAOR 0x40 +/* DMA registers */ +#define SAR 0x00 /* Source Address Register */ +#define DAR 0x04 /* Destination Address Register */ +#define TCR 0x08 /* Transfer Count Register */ +#define CHCR 0x0C /* Channel Control Register */ +#define DMAOR 0x40 /* DMA Operation Register */ /* DMAOR definitions */ -#define DMAOR_AE 0x00000004 +#define DMAOR_AE 0x00000004 /* Address Error Flag */ #define DMAOR_NMIF 0x00000002 -#define DMAOR_DME 0x00000001 +#define DMAOR_DME 0x00000001 /* DMA Master Enable */ /* Definitions for the SuperH DMAC */ #define REQ_L 0x00000000 @@ -34,18 +34,20 @@ #define ACK_W 0x00020000 #define ACK_H 0x00000000 #define ACK_L 0x00010000 -#define DM_INC 0x00004000 -#define DM_DEC 0x00008000 -#define DM_FIX 0x0000c000 -#define SM_INC 0x00001000 -#define SM_DEC 0x00002000 -#define SM_FIX 0x00003000 +#define DM_INC 0x00004000 /* Destination addresses are incremented */ +#define DM_DEC 0x00008000 /* Destination addresses are decremented */ +#define DM_FIX 0x0000c000 /* Destination address is fixed */ +#define SM_INC 0x00001000 /* Source addresses are incremented */ +#define SM_DEC 0x00002000 /* Source addresses are decremented */ +#define SM_FIX 0x00003000 /* Source address is fixed */ #define RS_IN 0x00000200 #define RS_OUT 0x00000300 +#define RS_AUTO 0x00000400 /* Auto Request */ +#define RS_ERS 0x00000800 /* DMA extended resource selector */ #define TS_BLK 0x00000040 #define TM_BUR 0x00000020 -#define CHCR_DE 0x00000001 -#define CHCR_TE 0x00000002 -#define CHCR_IE 0x00000004 +#define CHCR_DE 0x00000001 /* DMA Enable */ +#define CHCR_TE 0x00000002 /* Transfer End Flag */ +#define CHCR_IE 0x00000004 /* Interrupt Enable */ #endif diff --git a/drivers/dma/sh/shdmac.c b/drivers/dma/sh/shdmac.c index 146d5df926db..1a6f6595c6c1 100644 --- a/drivers/dma/sh/shdmac.c +++ b/drivers/dma/sh/shdmac.c @@ -38,12 +38,12 @@ #include "../dmaengine.h" #include "shdma.h" -/* DMA register */ -#define SAR 0x00 -#define DAR 0x04 -#define TCR 0x08 -#define CHCR 0x0C -#define DMAOR 0x40 +/* DMA registers */ +#define SAR 0x00 /* Source Address Register */ +#define DAR 0x04 /* Destination Address Register */ +#define TCR 0x08 /* Transfer Count Register */ +#define CHCR 0x0C /* Channel Control Register */ +#define DMAOR 0x40 /* DMA Operation Register */ #define TEND 0x18 /* USB-DMAC */ diff --git a/include/linux/sh_dma.h b/include/linux/sh_dma.h index b7b43b82231e..56b97eed28a4 100644 --- a/include/linux/sh_dma.h +++ b/include/linux/sh_dma.h @@ -95,19 +95,21 @@ struct sh_dmae_pdata { }; /* DMAOR definitions */ -#define DMAOR_AE 0x00000004 +#define DMAOR_AE 0x00000004 /* Address Error Flag */ #define DMAOR_NMIF 0x00000002 -#define DMAOR_DME 0x00000001 +#define DMAOR_DME 0x00000001 /* DMA Master Enable */ /* Definitions for the SuperH DMAC */ -#define DM_INC 0x00004000 -#define DM_DEC 0x00008000 -#define DM_FIX 0x0000c000 -#define SM_INC 0x00001000 -#define SM_DEC 0x00002000 -#define SM_FIX 0x00003000 -#define CHCR_DE 0x00000001 -#define CHCR_TE 0x00000002 -#define CHCR_IE 0x00000004 +#define DM_INC 0x00004000 /* Destination addresses are incremented */ +#define DM_DEC 0x00008000 /* Destination addresses are decremented */ +#define DM_FIX 0x0000c000 /* Destination address is fixed */ +#define SM_INC 0x00001000 /* Source addresses are incremented */ +#define SM_DEC 0x00002000 /* Source addresses are decremented */ +#define SM_FIX 0x00003000 /* Source address is fixed */ +#define RS_AUTO 0x00000400 /* Auto Request */ +#define RS_ERS 0x00000800 /* DMA extended resource selector */ +#define CHCR_DE 0x00000001 /* DMA Enable */ +#define CHCR_TE 0x00000002 /* Transfer End Flag */ +#define CHCR_IE 0x00000004 /* Interrupt Enable */ #endif -- cgit v1.2.3 From 3a48edc4bd68f841c07c7bc86358d2f02133f247 Mon Sep 17 00:00:00 2001 From: Tim Kryger Date: Fri, 13 Jun 2014 10:13:56 -0700 Subject: mmc: sdhci: Use mmc core regulator infrastucture Switch the common SDHCI code over to use mmc_host's regulator pointers and remove the ones in the sdhci_host structure. Additionally, use the common mmc_regulator_get_supply function to get the regulators and set the ocr_avail mask. This change sets the ocr_avail directly based upon the voltage ranges supported which ensures ocr_avail is set correctly while allowing the use of regulators that can't provide exactly 1.8v, 3.0v, or 3.3v. Signed-off-by: Tim Kryger Signed-off-by: Markus Mayer Reviewed-by: Matt Porter Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci.c | 97 ++++++++++++++++++----------------------------- include/linux/mmc/sdhci.h | 3 -- 2 files changed, 36 insertions(+), 64 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 47055f3f01b8..ee524b06db14 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -1223,6 +1223,7 @@ EXPORT_SYMBOL_GPL(sdhci_set_clock); static void sdhci_set_power(struct sdhci_host *host, unsigned char mode, unsigned short vdd) { + struct mmc_host *mmc = host->mmc; u8 pwr = 0; if (mode != MMC_POWER_OFF) { @@ -1284,9 +1285,9 @@ static void sdhci_set_power(struct sdhci_host *host, unsigned char mode, mdelay(10); } - if (host->vmmc) { + if (!IS_ERR(mmc->supply.vmmc)) { spin_unlock_irq(&host->lock); - mmc_regulator_set_ocr(host->mmc, host->vmmc, vdd); + mmc_regulator_set_ocr(host->mmc, mmc->supply.vmmc, vdd); spin_lock_irq(&host->lock); } } @@ -1440,13 +1441,15 @@ static void sdhci_do_set_ios(struct sdhci_host *host, struct mmc_ios *ios) { unsigned long flags; u8 ctrl; + struct mmc_host *mmc = host->mmc; spin_lock_irqsave(&host->lock, flags); if (host->flags & SDHCI_DEVICE_DEAD) { spin_unlock_irqrestore(&host->lock, flags); - if (host->vmmc && ios->power_mode == MMC_POWER_OFF) - mmc_regulator_set_ocr(host->mmc, host->vmmc, 0); + if (!IS_ERR(mmc->supply.vmmc) && + ios->power_mode == MMC_POWER_OFF) + mmc_regulator_set_ocr(host->mmc, mmc->supply.vmmc, 0); return; } @@ -1707,6 +1710,7 @@ static void sdhci_enable_sdio_irq(struct mmc_host *mmc, int enable) static int sdhci_do_start_signal_voltage_switch(struct sdhci_host *host, struct mmc_ios *ios) { + struct mmc_host *mmc = host->mmc; u16 ctrl; int ret; @@ -1725,8 +1729,9 @@ static int sdhci_do_start_signal_voltage_switch(struct sdhci_host *host, ctrl &= ~SDHCI_CTRL_VDD_180; sdhci_writew(host, ctrl, SDHCI_HOST_CONTROL2); - if (host->vqmmc) { - ret = regulator_set_voltage(host->vqmmc, 2700000, 3600000); + if (!IS_ERR(mmc->supply.vqmmc)) { + ret = regulator_set_voltage(mmc->supply.vqmmc, 2700000, + 3600000); if (ret) { pr_warning("%s: Switching to 3.3V signalling voltage " " failed\n", mmc_hostname(host->mmc)); @@ -1746,8 +1751,8 @@ static int sdhci_do_start_signal_voltage_switch(struct sdhci_host *host, return -EAGAIN; case MMC_SIGNAL_VOLTAGE_180: - if (host->vqmmc) { - ret = regulator_set_voltage(host->vqmmc, + if (!IS_ERR(mmc->supply.vqmmc)) { + ret = regulator_set_voltage(mmc->supply.vqmmc, 1700000, 1950000); if (ret) { pr_warning("%s: Switching to 1.8V signalling voltage " @@ -1776,8 +1781,9 @@ static int sdhci_do_start_signal_voltage_switch(struct sdhci_host *host, return -EAGAIN; case MMC_SIGNAL_VOLTAGE_120: - if (host->vqmmc) { - ret = regulator_set_voltage(host->vqmmc, 1100000, 1300000); + if (!IS_ERR(mmc->supply.vqmmc)) { + ret = regulator_set_voltage(mmc->supply.vqmmc, 1100000, + 1300000); if (ret) { pr_warning("%s: Switching to 1.2V signalling voltage " " failed\n", mmc_hostname(host->mmc)); @@ -2962,25 +2968,22 @@ int sdhci_add_host(struct sdhci_host *host) !(host->mmc->caps & MMC_CAP_NONREMOVABLE)) mmc->caps |= MMC_CAP_NEEDS_POLL; + /* If there are external regulators, get them */ + if (mmc_regulator_get_supply(mmc) == -EPROBE_DEFER) + return -EPROBE_DEFER; + /* If vqmmc regulator and no 1.8V signalling, then there's no UHS */ - host->vqmmc = regulator_get_optional(mmc_dev(mmc), "vqmmc"); - if (IS_ERR_OR_NULL(host->vqmmc)) { - if (PTR_ERR(host->vqmmc) < 0) { - pr_info("%s: no vqmmc regulator found\n", - mmc_hostname(mmc)); - host->vqmmc = NULL; - } - } else { - ret = regulator_enable(host->vqmmc); - if (!regulator_is_supported_voltage(host->vqmmc, 1700000, - 1950000)) + if (!IS_ERR(mmc->supply.vqmmc)) { + ret = regulator_enable(mmc->supply.vqmmc); + if (!regulator_is_supported_voltage(mmc->supply.vqmmc, 1700000, + 1950000)) caps[1] &= ~(SDHCI_SUPPORT_SDR104 | SDHCI_SUPPORT_SDR50 | SDHCI_SUPPORT_DDR50); if (ret) { pr_warn("%s: Failed to enable vqmmc regulator: %d\n", mmc_hostname(mmc), ret); - host->vqmmc = NULL; + mmc->supply.vqmmc = NULL; } } @@ -3041,34 +3044,6 @@ int sdhci_add_host(struct sdhci_host *host) ocr_avail = 0; - host->vmmc = regulator_get_optional(mmc_dev(mmc), "vmmc"); - if (IS_ERR_OR_NULL(host->vmmc)) { - if (PTR_ERR(host->vmmc) < 0) { - pr_info("%s: no vmmc regulator found\n", - mmc_hostname(mmc)); - host->vmmc = NULL; - } - } - -#ifdef CONFIG_REGULATOR - /* - * Voltage range check makes sense only if regulator reports - * any voltage value. - */ - if (host->vmmc && regulator_get_voltage(host->vmmc) > 0) { - ret = regulator_is_supported_voltage(host->vmmc, 2700000, - 3600000); - if ((ret <= 0) || (!(caps[0] & SDHCI_CAN_VDD_330))) - caps[0] &= ~SDHCI_CAN_VDD_330; - if ((ret <= 0) || (!(caps[0] & SDHCI_CAN_VDD_300))) - caps[0] &= ~SDHCI_CAN_VDD_300; - ret = regulator_is_supported_voltage(host->vmmc, 1700000, - 1950000); - if ((ret <= 0) || (!(caps[0] & SDHCI_CAN_VDD_180))) - caps[0] &= ~SDHCI_CAN_VDD_180; - } -#endif /* CONFIG_REGULATOR */ - /* * According to SD Host Controller spec v3.00, if the Host System * can afford more than 150mA, Host Driver should set XPC to 1. Also @@ -3077,8 +3052,8 @@ int sdhci_add_host(struct sdhci_host *host) * value. */ max_current_caps = sdhci_readl(host, SDHCI_MAX_CURRENT); - if (!max_current_caps && host->vmmc) { - u32 curr = regulator_get_current_limit(host->vmmc); + if (!max_current_caps && !IS_ERR(mmc->supply.vmmc)) { + u32 curr = regulator_get_current_limit(mmc->supply.vmmc); if (curr > 0) { /* convert to SDHCI_MAX_CURRENT format */ @@ -3118,8 +3093,11 @@ int sdhci_add_host(struct sdhci_host *host) SDHCI_MAX_CURRENT_MULTIPLIER; } + if (mmc->ocr_avail) + ocr_avail &= mmc->ocr_avail; + if (host->ocr_mask) - ocr_avail = host->ocr_mask; + ocr_avail &= host->ocr_mask; mmc->ocr_avail = ocr_avail; mmc->ocr_avail_sdio = ocr_avail; @@ -3273,6 +3251,7 @@ EXPORT_SYMBOL_GPL(sdhci_add_host); void sdhci_remove_host(struct sdhci_host *host, int dead) { + struct mmc_host *mmc = host->mmc; unsigned long flags; if (dead) { @@ -3310,15 +3289,11 @@ void sdhci_remove_host(struct sdhci_host *host, int dead) tasklet_kill(&host->finish_tasklet); - if (host->vmmc) { - regulator_disable(host->vmmc); - regulator_put(host->vmmc); - } + if (!IS_ERR(mmc->supply.vmmc)) + regulator_disable(mmc->supply.vmmc); - if (host->vqmmc) { - regulator_disable(host->vqmmc); - regulator_put(host->vqmmc); - } + if (!IS_ERR(mmc->supply.vqmmc)) + regulator_disable(mmc->supply.vqmmc); if (host->adma_desc) dma_free_coherent(mmc_dev(host->mmc), ADMA_SIZE, diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h index 08abe9941884..09ebe57d5ce9 100644 --- a/include/linux/mmc/sdhci.h +++ b/include/linux/mmc/sdhci.h @@ -104,9 +104,6 @@ struct sdhci_host { const struct sdhci_ops *ops; /* Low level hw interface */ - struct regulator *vmmc; /* Power regulator (vmmc) */ - struct regulator *vqmmc; /* Signaling regulator (vccq) */ - /* Internal data */ struct mmc_host *mmc; /* MMC structure */ u64 dma_mask; /* custom DMA mask */ -- cgit v1.2.3 From 2cd3a2a54656f9c480b1c7272fc07635d575841b Mon Sep 17 00:00:00 2001 From: Andreas Fenkart Date: Thu, 29 May 2014 10:28:00 +0200 Subject: mmc: omap_hsmmc: Enable SDIO interrupt There have been various patches floating around for enabling the SDIO IRQ for hsmmc, but none of them ever got merged. Probably the reason for not merging the SDIO interrupt patches has been the lack of wake-up path for SDIO on some omaps that has also needed remuxing the SDIO DAT1 line to a GPIO making the patches complex. This patch adds the minimal SDIO IRQ support to hsmmc for omaps that do have the wake-up path. For those omaps, the DAT1 line need to have the wake-up enable bit set, and the wake-up interrupt is the same as for the MMC controller. This patch has been tested on am3730 es1.2 with mwifiex connected to MMC3 with mwifiex waking to Ethernet traffic from off-idle mode. Note that for omaps that do not have the SDIO wake-up path, this patch will not work for idle modes and further patches for remuxing DAT1 to GPIO are needed. Based on earlier patches [1][2] by David Vrabel , Steve Sakoman For now, only support SDIO interrupt if we are booted with a separate wake-irq configued via device tree. This is because omaps need the wake-irq for idle states, and some omaps need special quirks. And we don't want to add new legacy mux platform init code callbacks any longer as we are moving to DT based booting anyways. To use it, you need to specify the wake-irq using the interrupts-extended property. [1] http://www.sakoman.com/cgi-bin/gitweb.cgi?p=linux.git;a=commitdiff_plain;h=010810d22f6f49ac03da4ba384969432e0320453 [2] http://comments.gmane.org/gmane.linux.kernel.mmc/20446 Acked-by: Balaji T K Signed-off-by: Andreas Fenkart Signed-off-by: Tony Lindgren Signed-off-by: Ulf Hansson --- .../devicetree/bindings/mmc/ti-omap-hsmmc.txt | 1 + drivers/mmc/host/omap_hsmmc.c | 201 +++++++++++++++++++-- include/linux/platform_data/mmc-omap.h | 1 + 3 files changed, 191 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/Documentation/devicetree/bindings/mmc/ti-omap-hsmmc.txt b/Documentation/devicetree/bindings/mmc/ti-omap-hsmmc.txt index ce8056116fb0..0233ba7951e5 100644 --- a/Documentation/devicetree/bindings/mmc/ti-omap-hsmmc.txt +++ b/Documentation/devicetree/bindings/mmc/ti-omap-hsmmc.txt @@ -12,6 +12,7 @@ Required properties: Should be "ti,omap3-hsmmc", for OMAP3 controllers Should be "ti,omap3-pre-es3-hsmmc" for OMAP3 controllers pre ES3.0 Should be "ti,omap4-hsmmc", for OMAP4 controllers + Should be "ti,am33xx-hsmmc", for AM335x controllers - ti,hwmods: Must be "mmc", n is controller instance starting 1 Optional properties: diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c index 6b7b75585926..9446010a5dd9 100644 --- a/drivers/mmc/host/omap_hsmmc.c +++ b/drivers/mmc/host/omap_hsmmc.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -36,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -106,6 +108,7 @@ #define TC_EN (1 << 1) #define BWR_EN (1 << 4) #define BRR_EN (1 << 5) +#define CIRQ_EN (1 << 8) #define ERR_EN (1 << 15) #define CTO_EN (1 << 16) #define CCRC_EN (1 << 17) @@ -140,7 +143,6 @@ #define VDD_3V0 3000000 /* 300000 uV */ #define VDD_165_195 (ffs(MMC_VDD_165_195) - 1) -#define AUTO_CMD23 (1 << 1) /* Auto CMD23 support */ /* * One controller can have multiple slots, like on some omap boards using * omap.c controller driver. Luckily this is not currently done on any known @@ -194,6 +196,7 @@ struct omap_hsmmc_host { u32 sysctl; u32 capa; int irq; + int wake_irq; int use_dma, dma_ch; struct dma_chan *tx_chan; struct dma_chan *rx_chan; @@ -206,6 +209,9 @@ struct omap_hsmmc_host { int req_in_progress; unsigned long clk_rate; unsigned int flags; +#define AUTO_CMD23 (1 << 0) /* Auto CMD23 support */ +#define HSMMC_SDIO_IRQ_ENABLED (1 << 1) /* SDIO irq enabled */ +#define HSMMC_WAKE_IRQ_ENABLED (1 << 2) struct omap_hsmmc_next next_data; struct omap_mmc_platform_data *pdata; }; @@ -510,27 +516,40 @@ static void omap_hsmmc_stop_clock(struct omap_hsmmc_host *host) static void omap_hsmmc_enable_irq(struct omap_hsmmc_host *host, struct mmc_command *cmd) { - unsigned int irq_mask; + u32 irq_mask = INT_EN_MASK; + unsigned long flags; if (host->use_dma) - irq_mask = INT_EN_MASK & ~(BRR_EN | BWR_EN); - else - irq_mask = INT_EN_MASK; + irq_mask &= ~(BRR_EN | BWR_EN); /* Disable timeout for erases */ if (cmd->opcode == MMC_ERASE) irq_mask &= ~DTO_EN; + spin_lock_irqsave(&host->irq_lock, flags); OMAP_HSMMC_WRITE(host->base, STAT, STAT_CLEAR); OMAP_HSMMC_WRITE(host->base, ISE, irq_mask); + + /* latch pending CIRQ, but don't signal MMC core */ + if (host->flags & HSMMC_SDIO_IRQ_ENABLED) + irq_mask |= CIRQ_EN; OMAP_HSMMC_WRITE(host->base, IE, irq_mask); + spin_unlock_irqrestore(&host->irq_lock, flags); } static void omap_hsmmc_disable_irq(struct omap_hsmmc_host *host) { - OMAP_HSMMC_WRITE(host->base, ISE, 0); - OMAP_HSMMC_WRITE(host->base, IE, 0); + u32 irq_mask = 0; + unsigned long flags; + + spin_lock_irqsave(&host->irq_lock, flags); + /* no transfer running but need to keep cirq if enabled */ + if (host->flags & HSMMC_SDIO_IRQ_ENABLED) + irq_mask |= CIRQ_EN; + OMAP_HSMMC_WRITE(host->base, ISE, irq_mask); + OMAP_HSMMC_WRITE(host->base, IE, irq_mask); OMAP_HSMMC_WRITE(host->base, STAT, STAT_CLEAR); + spin_unlock_irqrestore(&host->irq_lock, flags); } /* Calculate divisor for the given clock frequency */ @@ -681,7 +700,9 @@ static int omap_hsmmc_context_restore(struct omap_hsmmc_host *host) && time_before(jiffies, timeout)) ; - omap_hsmmc_disable_irq(host); + OMAP_HSMMC_WRITE(host->base, ISE, 0); + OMAP_HSMMC_WRITE(host->base, IE, 0); + OMAP_HSMMC_WRITE(host->base, STAT, STAT_CLEAR); /* Do not initialize card-specific things if the power is off */ if (host->power_mode == MMC_POWER_OFF) @@ -1118,8 +1139,12 @@ static irqreturn_t omap_hsmmc_irq(int irq, void *dev_id) int status; status = OMAP_HSMMC_READ(host->base, STAT); - while (status & INT_EN_MASK && host->req_in_progress) { - omap_hsmmc_do_irq(host, status); + while (status & (INT_EN_MASK | CIRQ_EN)) { + if (host->req_in_progress) + omap_hsmmc_do_irq(host, status); + + if (status & CIRQ_EN) + mmc_signal_sdio_irq(host->mmc); /* Flush posted write */ status = OMAP_HSMMC_READ(host->base, STAT); @@ -1128,6 +1153,22 @@ static irqreturn_t omap_hsmmc_irq(int irq, void *dev_id) return IRQ_HANDLED; } +static irqreturn_t omap_hsmmc_wake_irq(int irq, void *dev_id) +{ + struct omap_hsmmc_host *host = dev_id; + + /* cirq is level triggered, disable to avoid infinite loop */ + spin_lock(&host->irq_lock); + if (host->flags & HSMMC_WAKE_IRQ_ENABLED) { + disable_irq_nosync(host->wake_irq); + host->flags &= ~HSMMC_WAKE_IRQ_ENABLED; + } + spin_unlock(&host->irq_lock); + pm_request_resume(host->dev); /* no use counter */ + + return IRQ_HANDLED; +} + static void set_sd_bus_power(struct omap_hsmmc_host *host) { unsigned long i; @@ -1639,6 +1680,79 @@ static void omap_hsmmc_init_card(struct mmc_host *mmc, struct mmc_card *card) mmc_slot(host).init_card(card); } +static void omap_hsmmc_enable_sdio_irq(struct mmc_host *mmc, int enable) +{ + struct omap_hsmmc_host *host = mmc_priv(mmc); + u32 irq_mask; + unsigned long flags; + + spin_lock_irqsave(&host->irq_lock, flags); + + irq_mask = OMAP_HSMMC_READ(host->base, ISE); + if (enable) { + host->flags |= HSMMC_SDIO_IRQ_ENABLED; + irq_mask |= CIRQ_EN; + } else { + host->flags &= ~HSMMC_SDIO_IRQ_ENABLED; + irq_mask &= ~CIRQ_EN; + } + OMAP_HSMMC_WRITE(host->base, IE, irq_mask); + + /* + * if enable, piggy back detection on current request + * but always disable immediately + */ + if (!host->req_in_progress || !enable) + OMAP_HSMMC_WRITE(host->base, ISE, irq_mask); + + /* flush posted write */ + OMAP_HSMMC_READ(host->base, IE); + + spin_unlock_irqrestore(&host->irq_lock, flags); +} + +static int omap_hsmmc_configure_wake_irq(struct omap_hsmmc_host *host) +{ + struct mmc_host *mmc = host->mmc; + int ret; + + /* + * For omaps with wake-up path, wakeirq will be irq from pinctrl and + * for other omaps, wakeirq will be from GPIO (dat line remuxed to + * gpio). wakeirq is needed to detect sdio irq in runtime suspend state + * with functional clock disabled. + */ + if (!host->dev->of_node || !host->wake_irq) + return -ENODEV; + + /* Prevent auto-enabling of IRQ */ + irq_set_status_flags(host->wake_irq, IRQ_NOAUTOEN); + ret = devm_request_irq(host->dev, host->wake_irq, omap_hsmmc_wake_irq, + IRQF_TRIGGER_LOW | IRQF_ONESHOT, + mmc_hostname(mmc), host); + if (ret) { + dev_err(mmc_dev(host->mmc), "Unable to request wake IRQ\n"); + goto err; + } + + /* + * Some omaps don't have wake-up path from deeper idle states + * and need to remux SDIO DAT1 to GPIO for wake-up from idle. + */ + if (host->pdata->controller_flags & OMAP_HSMMC_SWAKEUP_MISSING) { + ret = -ENODEV; + devm_free_irq(host->dev, host->wake_irq, host); + goto err; + } + + return 0; + +err: + dev_warn(host->dev, "no SDIO IRQ support, falling back to polling\n"); + host->wake_irq = 0; + return ret; +} + static void omap_hsmmc_conf_bus_power(struct omap_hsmmc_host *host) { u32 hctl, capa, value; @@ -1691,7 +1805,7 @@ static const struct mmc_host_ops omap_hsmmc_ops = { .get_cd = omap_hsmmc_get_cd, .get_ro = omap_hsmmc_get_ro, .init_card = omap_hsmmc_init_card, - /* NYET -- enable_sdio_irq */ + .enable_sdio_irq = omap_hsmmc_enable_sdio_irq, }; #ifdef CONFIG_DEBUG_FS @@ -1761,6 +1875,10 @@ static const struct omap_mmc_of_data omap3_pre_es3_mmc_of_data = { static const struct omap_mmc_of_data omap4_mmc_of_data = { .reg_offset = 0x100, }; +static const struct omap_mmc_of_data am33xx_mmc_of_data = { + .reg_offset = 0x100, + .controller_flags = OMAP_HSMMC_SWAKEUP_MISSING, +}; static const struct of_device_id omap_mmc_of_match[] = { { @@ -1777,6 +1895,10 @@ static const struct of_device_id omap_mmc_of_match[] = { .compatible = "ti,omap4-hsmmc", .data = &omap4_mmc_of_data, }, + { + .compatible = "ti,am33xx-hsmmc", + .data = &am33xx_mmc_of_data, + }, {}, }; MODULE_DEVICE_TABLE(of, omap_mmc_of_match); @@ -1913,6 +2035,9 @@ static int omap_hsmmc_probe(struct platform_device *pdev) platform_set_drvdata(pdev, host); + if (pdev->dev.of_node) + host->wake_irq = irq_of_parse_and_map(pdev->dev.of_node, 1); + mmc->ops = &omap_hsmmc_ops; mmc->f_min = OMAP_MMC_MIN_CLOCK; @@ -2066,6 +2191,18 @@ static int omap_hsmmc_probe(struct platform_device *pdev) dev_warn(&pdev->dev, "pins are not configured from the driver\n"); + /* + * For now, only support SDIO interrupt if we have a separate + * wake-up interrupt configured from device tree. This is because + * the wake-up interrupt is needed for idle state and some + * platforms need special quirks. And we don't want to add new + * legacy mux platform init code callbacks any longer as we + * are moving to DT based booting anyways. + */ + ret = omap_hsmmc_configure_wake_irq(host); + if (!ret) + mmc->caps |= MMC_CAP_SDIO_IRQ; + omap_hsmmc_protect_card(host); mmc_add_host(mmc); @@ -2170,11 +2307,18 @@ static int omap_hsmmc_suspend(struct device *dev) pm_runtime_get_sync(host->dev); if (!(host->mmc->pm_flags & MMC_PM_KEEP_POWER)) { - omap_hsmmc_disable_irq(host); + OMAP_HSMMC_WRITE(host->base, ISE, 0); + OMAP_HSMMC_WRITE(host->base, IE, 0); + OMAP_HSMMC_WRITE(host->base, STAT, STAT_CLEAR); OMAP_HSMMC_WRITE(host->base, HCTL, OMAP_HSMMC_READ(host->base, HCTL) & ~SDBP); } + /* do not wake up due to sdio irq */ + if ((host->mmc->caps & MMC_CAP_SDIO_IRQ) && + !(host->mmc->pm_flags & MMC_PM_WAKE_SDIO_IRQ)) + disable_irq(host->wake_irq); + if (host->dbclk) clk_disable_unprepare(host->dbclk); @@ -2200,6 +2344,10 @@ static int omap_hsmmc_resume(struct device *dev) omap_hsmmc_protect_card(host); + if ((host->mmc->caps & MMC_CAP_SDIO_IRQ) && + !(host->mmc->pm_flags & MMC_PM_WAKE_SDIO_IRQ)) + enable_irq(host->wake_irq); + pm_runtime_mark_last_busy(host->dev); pm_runtime_put_autosuspend(host->dev); return 0; @@ -2215,22 +2363,51 @@ static int omap_hsmmc_resume(struct device *dev) static int omap_hsmmc_runtime_suspend(struct device *dev) { struct omap_hsmmc_host *host; + unsigned long flags; host = platform_get_drvdata(to_platform_device(dev)); omap_hsmmc_context_save(host); dev_dbg(dev, "disabled\n"); + spin_lock_irqsave(&host->irq_lock, flags); + if ((host->mmc->caps & MMC_CAP_SDIO_IRQ) && + (host->flags & HSMMC_SDIO_IRQ_ENABLED)) { + /* disable sdio irq handling to prevent race */ + OMAP_HSMMC_WRITE(host->base, ISE, 0); + OMAP_HSMMC_WRITE(host->base, IE, 0); + OMAP_HSMMC_WRITE(host->base, STAT, STAT_CLEAR); + + WARN_ON(host->flags & HSMMC_WAKE_IRQ_ENABLED); + enable_irq(host->wake_irq); + host->flags |= HSMMC_WAKE_IRQ_ENABLED; + } + spin_unlock_irqrestore(&host->irq_lock, flags); return 0; } static int omap_hsmmc_runtime_resume(struct device *dev) { struct omap_hsmmc_host *host; + unsigned long flags; host = platform_get_drvdata(to_platform_device(dev)); omap_hsmmc_context_restore(host); dev_dbg(dev, "enabled\n"); + spin_lock_irqsave(&host->irq_lock, flags); + if ((host->mmc->caps & MMC_CAP_SDIO_IRQ) && + (host->flags & HSMMC_SDIO_IRQ_ENABLED)) { + /* sdio irq flag can't change while in runtime suspend */ + if (host->flags & HSMMC_WAKE_IRQ_ENABLED) { + disable_irq_nosync(host->wake_irq); + host->flags &= ~HSMMC_WAKE_IRQ_ENABLED; + } + + OMAP_HSMMC_WRITE(host->base, STAT, STAT_CLEAR); + OMAP_HSMMC_WRITE(host->base, ISE, CIRQ_EN); + OMAP_HSMMC_WRITE(host->base, IE, CIRQ_EN); + } + spin_unlock_irqrestore(&host->irq_lock, flags); return 0; } diff --git a/include/linux/platform_data/mmc-omap.h b/include/linux/platform_data/mmc-omap.h index 2bf1b30cb5dc..51e70cf25cbc 100644 --- a/include/linux/platform_data/mmc-omap.h +++ b/include/linux/platform_data/mmc-omap.h @@ -28,6 +28,7 @@ */ #define OMAP_HSMMC_SUPPORTS_DUAL_VOLT BIT(0) #define OMAP_HSMMC_BROKEN_MULTIBLOCK_READ BIT(1) +#define OMAP_HSMMC_SWAKEUP_MISSING BIT(2) struct mmc_card; -- cgit v1.2.3 From ec38846ad59d7b780540afcec101b24139933195 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Thu, 29 May 2014 01:20:16 +0200 Subject: backlight: atmel-pwm-bl: remove obsolete driver The atmel-pwm-bl driver is now obsolete. It is not used by any mainlined boards and is replaced by the generic pwm_bl with the pawm-atmel driver using the generic PWM framework. Signed-off-by: Alexandre Belloni Acked-by: Hans-Christian Egtvedt Acked-by: Jingoo Han Signed-off-by: Nicolas Ferre --- drivers/video/backlight/Kconfig | 11 -- drivers/video/backlight/Makefile | 1 - drivers/video/backlight/atmel-pwm-bl.c | 223 --------------------------------- include/linux/atmel-pwm-bl.h | 43 ------- 4 files changed, 278 deletions(-) delete mode 100644 drivers/video/backlight/atmel-pwm-bl.c delete mode 100644 include/linux/atmel-pwm-bl.h (limited to 'include/linux') diff --git a/drivers/video/backlight/Kconfig b/drivers/video/backlight/Kconfig index 5d449059a556..c3c18339b8cb 100644 --- a/drivers/video/backlight/Kconfig +++ b/drivers/video/backlight/Kconfig @@ -178,17 +178,6 @@ config BACKLIGHT_ATMEL_LCDC If in doubt, it's safe to enable this option; it doesn't kick in unless the board's description says it's wired that way. -config BACKLIGHT_ATMEL_PWM - tristate "Atmel PWM backlight control" - depends on ATMEL_PWM - help - Say Y here if you want to use the PWM peripheral in Atmel AT91 and - AVR32 devices. This driver will need additional platform data to know - which PWM instance to use and how to configure it. - - To compile this driver as a module, choose M here: the module will be - called atmel-pwm-bl. - config BACKLIGHT_EP93XX tristate "Cirrus EP93xx Backlight Driver" depends on FB_EP93XX diff --git a/drivers/video/backlight/Makefile b/drivers/video/backlight/Makefile index bb820024f346..351451dbb607 100644 --- a/drivers/video/backlight/Makefile +++ b/drivers/video/backlight/Makefile @@ -25,7 +25,6 @@ obj-$(CONFIG_BACKLIGHT_ADP8860) += adp8860_bl.o obj-$(CONFIG_BACKLIGHT_ADP8870) += adp8870_bl.o obj-$(CONFIG_BACKLIGHT_APPLE) += apple_bl.o obj-$(CONFIG_BACKLIGHT_AS3711) += as3711_bl.o -obj-$(CONFIG_BACKLIGHT_ATMEL_PWM) += atmel-pwm-bl.o obj-$(CONFIG_BACKLIGHT_BD6107) += bd6107.o obj-$(CONFIG_BACKLIGHT_CARILLO_RANCH) += cr_bllcd.o obj-$(CONFIG_BACKLIGHT_CLASS_DEVICE) += backlight.o diff --git a/drivers/video/backlight/atmel-pwm-bl.c b/drivers/video/backlight/atmel-pwm-bl.c deleted file mode 100644 index 261b1a4ec3d8..000000000000 --- a/drivers/video/backlight/atmel-pwm-bl.c +++ /dev/null @@ -1,223 +0,0 @@ -/* - * Copyright (C) 2008 Atmel Corporation - * - * Backlight driver using Atmel PWM peripheral. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -struct atmel_pwm_bl { - const struct atmel_pwm_bl_platform_data *pdata; - struct backlight_device *bldev; - struct platform_device *pdev; - struct pwm_channel pwmc; - int gpio_on; -}; - -static void atmel_pwm_bl_set_gpio_on(struct atmel_pwm_bl *pwmbl, int on) -{ - if (!gpio_is_valid(pwmbl->gpio_on)) - return; - - gpio_set_value(pwmbl->gpio_on, on ^ pwmbl->pdata->on_active_low); -} - -static int atmel_pwm_bl_set_intensity(struct backlight_device *bd) -{ - struct atmel_pwm_bl *pwmbl = bl_get_data(bd); - int intensity = bd->props.brightness; - int pwm_duty; - - if (bd->props.power != FB_BLANK_UNBLANK) - intensity = 0; - if (bd->props.fb_blank != FB_BLANK_UNBLANK) - intensity = 0; - - if (pwmbl->pdata->pwm_active_low) - pwm_duty = pwmbl->pdata->pwm_duty_min + intensity; - else - pwm_duty = pwmbl->pdata->pwm_duty_max - intensity; - - if (pwm_duty > pwmbl->pdata->pwm_duty_max) - pwm_duty = pwmbl->pdata->pwm_duty_max; - if (pwm_duty < pwmbl->pdata->pwm_duty_min) - pwm_duty = pwmbl->pdata->pwm_duty_min; - - if (!intensity) { - atmel_pwm_bl_set_gpio_on(pwmbl, 0); - pwm_channel_writel(&pwmbl->pwmc, PWM_CUPD, pwm_duty); - pwm_channel_disable(&pwmbl->pwmc); - } else { - pwm_channel_enable(&pwmbl->pwmc); - pwm_channel_writel(&pwmbl->pwmc, PWM_CUPD, pwm_duty); - atmel_pwm_bl_set_gpio_on(pwmbl, 1); - } - - return 0; -} - -static int atmel_pwm_bl_get_intensity(struct backlight_device *bd) -{ - struct atmel_pwm_bl *pwmbl = bl_get_data(bd); - u32 cdty; - u32 intensity; - - cdty = pwm_channel_readl(&pwmbl->pwmc, PWM_CDTY); - if (pwmbl->pdata->pwm_active_low) - intensity = cdty - pwmbl->pdata->pwm_duty_min; - else - intensity = pwmbl->pdata->pwm_duty_max - cdty; - - return intensity & 0xffff; -} - -static int atmel_pwm_bl_init_pwm(struct atmel_pwm_bl *pwmbl) -{ - unsigned long pwm_rate = pwmbl->pwmc.mck; - unsigned long prescale = DIV_ROUND_UP(pwm_rate, - (pwmbl->pdata->pwm_frequency * - pwmbl->pdata->pwm_compare_max)) - 1; - - /* - * Prescale must be power of two and maximum 0xf in size because of - * hardware limit. PWM speed will be: - * PWM module clock speed / (2 ^ prescale). - */ - prescale = fls(prescale); - if (prescale > 0xf) - prescale = 0xf; - - pwm_channel_writel(&pwmbl->pwmc, PWM_CMR, prescale); - pwm_channel_writel(&pwmbl->pwmc, PWM_CDTY, - pwmbl->pdata->pwm_duty_min + - pwmbl->bldev->props.brightness); - pwm_channel_writel(&pwmbl->pwmc, PWM_CPRD, - pwmbl->pdata->pwm_compare_max); - - dev_info(&pwmbl->pdev->dev, "Atmel PWM backlight driver (%lu Hz)\n", - pwmbl->pwmc.mck / pwmbl->pdata->pwm_compare_max / - (1 << prescale)); - - return pwm_channel_enable(&pwmbl->pwmc); -} - -static const struct backlight_ops atmel_pwm_bl_ops = { - .get_brightness = atmel_pwm_bl_get_intensity, - .update_status = atmel_pwm_bl_set_intensity, -}; - -static int atmel_pwm_bl_probe(struct platform_device *pdev) -{ - struct backlight_properties props; - const struct atmel_pwm_bl_platform_data *pdata; - struct backlight_device *bldev; - struct atmel_pwm_bl *pwmbl; - unsigned long flags; - int retval; - - pdata = dev_get_platdata(&pdev->dev); - if (!pdata) - return -ENODEV; - - if (pdata->pwm_compare_max < pdata->pwm_duty_max || - pdata->pwm_duty_min > pdata->pwm_duty_max || - pdata->pwm_frequency == 0) - return -EINVAL; - - pwmbl = devm_kzalloc(&pdev->dev, sizeof(struct atmel_pwm_bl), - GFP_KERNEL); - if (!pwmbl) - return -ENOMEM; - - pwmbl->pdev = pdev; - pwmbl->pdata = pdata; - pwmbl->gpio_on = pdata->gpio_on; - - retval = pwm_channel_alloc(pdata->pwm_channel, &pwmbl->pwmc); - if (retval) - return retval; - - if (gpio_is_valid(pwmbl->gpio_on)) { - /* Turn display off by default. */ - if (pdata->on_active_low) - flags = GPIOF_OUT_INIT_HIGH; - else - flags = GPIOF_OUT_INIT_LOW; - - retval = devm_gpio_request_one(&pdev->dev, pwmbl->gpio_on, - flags, "gpio_atmel_pwm_bl"); - if (retval) - goto err_free_pwm; - } - - memset(&props, 0, sizeof(struct backlight_properties)); - props.type = BACKLIGHT_RAW; - props.max_brightness = pdata->pwm_duty_max - pdata->pwm_duty_min; - bldev = devm_backlight_device_register(&pdev->dev, "atmel-pwm-bl", - &pdev->dev, pwmbl, &atmel_pwm_bl_ops, - &props); - if (IS_ERR(bldev)) { - retval = PTR_ERR(bldev); - goto err_free_pwm; - } - - pwmbl->bldev = bldev; - - platform_set_drvdata(pdev, pwmbl); - - /* Power up the backlight by default at middle intesity. */ - bldev->props.power = FB_BLANK_UNBLANK; - bldev->props.brightness = bldev->props.max_brightness / 2; - - retval = atmel_pwm_bl_init_pwm(pwmbl); - if (retval) - goto err_free_pwm; - - atmel_pwm_bl_set_intensity(bldev); - - return 0; - -err_free_pwm: - pwm_channel_free(&pwmbl->pwmc); - - return retval; -} - -static int atmel_pwm_bl_remove(struct platform_device *pdev) -{ - struct atmel_pwm_bl *pwmbl = platform_get_drvdata(pdev); - - atmel_pwm_bl_set_gpio_on(pwmbl, 0); - pwm_channel_disable(&pwmbl->pwmc); - pwm_channel_free(&pwmbl->pwmc); - - return 0; -} - -static struct platform_driver atmel_pwm_bl_driver = { - .driver = { - .name = "atmel-pwm-bl", - }, - /* REVISIT add suspend() and resume() */ - .probe = atmel_pwm_bl_probe, - .remove = atmel_pwm_bl_remove, -}; - -module_platform_driver(atmel_pwm_bl_driver); - -MODULE_AUTHOR("Hans-Christian egtvedt "); -MODULE_DESCRIPTION("Atmel PWM backlight driver"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS("platform:atmel-pwm-bl"); diff --git a/include/linux/atmel-pwm-bl.h b/include/linux/atmel-pwm-bl.h deleted file mode 100644 index 0153a47806c2..000000000000 --- a/include/linux/atmel-pwm-bl.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (C) 2007 Atmel Corporation - * - * Driver for the AT32AP700X PS/2 controller (PSIF). - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published - * by the Free Software Foundation. - */ - -#ifndef __INCLUDE_ATMEL_PWM_BL_H -#define __INCLUDE_ATMEL_PWM_BL_H - -/** - * struct atmel_pwm_bl_platform_data - * @pwm_channel: which PWM channel in the PWM module to use. - * @pwm_frequency: PWM frequency to generate, the driver will try to be as - * close as the prescaler allows. - * @pwm_compare_max: value to use in the PWM channel compare register. - * @pwm_duty_max: maximum duty cycle value, must be less than or equal to - * pwm_compare_max. - * @pwm_duty_min: minimum duty cycle value, must be less than pwm_duty_max. - * @pwm_active_low: set to one if the low part of the PWM signal increases the - * brightness of the backlight. - * @gpio_on: GPIO line to control the backlight on/off, set to -1 if not used. - * @on_active_low: set to one if the on/off signal is on when GPIO is low. - * - * This struct must be added to the platform device in the board code. It is - * used by the atmel-pwm-bl driver to setup the GPIO to control on/off and the - * PWM device. - */ -struct atmel_pwm_bl_platform_data { - unsigned int pwm_channel; - unsigned int pwm_frequency; - unsigned int pwm_compare_max; - unsigned int pwm_duty_max; - unsigned int pwm_duty_min; - unsigned int pwm_active_low; - int gpio_on; - unsigned int on_active_low; -}; - -#endif /* __INCLUDE_ATMEL_PWM_BL_H */ -- cgit v1.2.3 From f2a70e1fc1ccc0fcdf4ad12db7382134228fb552 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Thu, 29 May 2014 01:20:18 +0200 Subject: misc: atmel_pwm: remove obsolete driver The misc/atmel_pwm is not used by any mainlined boards and has been replaced by the pwm-driver using the generic PWM framework. Signed-off-by: Alexandre Belloni Acked-by: Greg Kroah-Hartman Signed-off-by: Nicolas Ferre --- drivers/misc/Kconfig | 10 -- drivers/misc/Makefile | 1 - drivers/misc/atmel_pwm.c | 402 ---------------------------------------------- include/linux/atmel_pwm.h | 70 -------- 4 files changed, 483 deletions(-) delete mode 100644 drivers/misc/atmel_pwm.c delete mode 100644 include/linux/atmel_pwm.h (limited to 'include/linux') diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index ee9402324a23..b841180c7c74 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -51,16 +51,6 @@ config AD525X_DPOT_SPI To compile this driver as a module, choose M here: the module will be called ad525x_dpot-spi. -config ATMEL_PWM - tristate "Atmel AT32/AT91 PWM support" - depends on HAVE_CLK - depends on AVR32 || ARCH_AT91SAM9263 || ARCH_AT91SAM9RL || ARCH_AT91SAM9G45 - help - This option enables device driver support for the PWM channels - on certain Atmel processors. Pulse Width Modulation is used for - purposes including software controlled power-efficient backlights - on LCD displays, motor control, and waveform generation. - config ATMEL_TCLIB bool "Atmel AT32/AT91 Timer/Counter Library" depends on (AVR32 || ARCH_AT91) diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile index d59ce1261b38..5497d026e651 100644 --- a/drivers/misc/Makefile +++ b/drivers/misc/Makefile @@ -7,7 +7,6 @@ obj-$(CONFIG_AD525X_DPOT) += ad525x_dpot.o obj-$(CONFIG_AD525X_DPOT_I2C) += ad525x_dpot-i2c.o obj-$(CONFIG_AD525X_DPOT_SPI) += ad525x_dpot-spi.o obj-$(CONFIG_INTEL_MID_PTI) += pti.o -obj-$(CONFIG_ATMEL_PWM) += atmel_pwm.o obj-$(CONFIG_ATMEL_SSC) += atmel-ssc.o obj-$(CONFIG_ATMEL_TCLIB) += atmel_tclib.o obj-$(CONFIG_BMP085) += bmp085.o diff --git a/drivers/misc/atmel_pwm.c b/drivers/misc/atmel_pwm.c deleted file mode 100644 index a6dc56e1bc58..000000000000 --- a/drivers/misc/atmel_pwm.c +++ /dev/null @@ -1,402 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include - - -/* - * This is a simple driver for the PWM controller found in various newer - * Atmel SOCs, including the AVR32 series and the AT91sam9263. - * - * Chips with current Linux ports have only 4 PWM channels, out of max 32. - * AT32UC3A and AT32UC3B chips have 7 channels (but currently no Linux). - * Docs are inconsistent about the width of the channel counter registers; - * it's at least 16 bits, but several places say 20 bits. - */ -#define PWM_NCHAN 4 /* max 32 */ - -struct pwm { - spinlock_t lock; - struct platform_device *pdev; - u32 mask; - int irq; - void __iomem *base; - struct clk *clk; - struct pwm_channel *channel[PWM_NCHAN]; - void (*handler[PWM_NCHAN])(struct pwm_channel *); -}; - - -/* global PWM controller registers */ -#define PWM_MR 0x00 -#define PWM_ENA 0x04 -#define PWM_DIS 0x08 -#define PWM_SR 0x0c -#define PWM_IER 0x10 -#define PWM_IDR 0x14 -#define PWM_IMR 0x18 -#define PWM_ISR 0x1c - -static inline void pwm_writel(const struct pwm *p, unsigned offset, u32 val) -{ - __raw_writel(val, p->base + offset); -} - -static inline u32 pwm_readl(const struct pwm *p, unsigned offset) -{ - return __raw_readl(p->base + offset); -} - -static inline void __iomem *pwmc_regs(const struct pwm *p, int index) -{ - return p->base + 0x200 + index * 0x20; -} - -static struct pwm *pwm; - -static void pwm_dumpregs(struct pwm_channel *ch, char *tag) -{ - struct device *dev = &pwm->pdev->dev; - - dev_dbg(dev, "%s: mr %08x, sr %08x, imr %08x\n", - tag, - pwm_readl(pwm, PWM_MR), - pwm_readl(pwm, PWM_SR), - pwm_readl(pwm, PWM_IMR)); - dev_dbg(dev, - "pwm ch%d - mr %08x, dty %u, prd %u, cnt %u\n", - ch->index, - pwm_channel_readl(ch, PWM_CMR), - pwm_channel_readl(ch, PWM_CDTY), - pwm_channel_readl(ch, PWM_CPRD), - pwm_channel_readl(ch, PWM_CCNT)); -} - - -/** - * pwm_channel_alloc - allocate an unused PWM channel - * @index: identifies the channel - * @ch: structure to be initialized - * - * Drivers allocate PWM channels according to the board's wiring, and - * matching board-specific setup code. Returns zero or negative errno. - */ -int pwm_channel_alloc(int index, struct pwm_channel *ch) -{ - unsigned long flags; - int status = 0; - - if (!pwm) - return -EPROBE_DEFER; - - if (!(pwm->mask & 1 << index)) - return -ENODEV; - - if (index < 0 || index >= PWM_NCHAN || !ch) - return -EINVAL; - memset(ch, 0, sizeof *ch); - - spin_lock_irqsave(&pwm->lock, flags); - if (pwm->channel[index]) - status = -EBUSY; - else { - clk_enable(pwm->clk); - - ch->regs = pwmc_regs(pwm, index); - ch->index = index; - - /* REVISIT: ap7000 seems to go 2x as fast as we expect!! */ - ch->mck = clk_get_rate(pwm->clk); - - pwm->channel[index] = ch; - pwm->handler[index] = NULL; - - /* channel and irq are always disabled when we return */ - pwm_writel(pwm, PWM_DIS, 1 << index); - pwm_writel(pwm, PWM_IDR, 1 << index); - } - spin_unlock_irqrestore(&pwm->lock, flags); - return status; -} -EXPORT_SYMBOL(pwm_channel_alloc); - -static int pwmcheck(struct pwm_channel *ch) -{ - int index; - - if (!pwm) - return -ENODEV; - if (!ch) - return -EINVAL; - index = ch->index; - if (index < 0 || index >= PWM_NCHAN || pwm->channel[index] != ch) - return -EINVAL; - - return index; -} - -/** - * pwm_channel_free - release a previously allocated channel - * @ch: the channel being released - * - * The channel is completely shut down (counter and IRQ disabled), - * and made available for re-use. Returns zero, or negative errno. - */ -int pwm_channel_free(struct pwm_channel *ch) -{ - unsigned long flags; - int t; - - spin_lock_irqsave(&pwm->lock, flags); - t = pwmcheck(ch); - if (t >= 0) { - pwm->channel[t] = NULL; - pwm->handler[t] = NULL; - - /* channel and irq are always disabled when we return */ - pwm_writel(pwm, PWM_DIS, 1 << t); - pwm_writel(pwm, PWM_IDR, 1 << t); - - clk_disable(pwm->clk); - t = 0; - } - spin_unlock_irqrestore(&pwm->lock, flags); - return t; -} -EXPORT_SYMBOL(pwm_channel_free); - -int __pwm_channel_onoff(struct pwm_channel *ch, int enabled) -{ - unsigned long flags; - int t; - - /* OMITTED FUNCTIONALITY: starting several channels in synch */ - - spin_lock_irqsave(&pwm->lock, flags); - t = pwmcheck(ch); - if (t >= 0) { - pwm_writel(pwm, enabled ? PWM_ENA : PWM_DIS, 1 << t); - t = 0; - pwm_dumpregs(ch, enabled ? "enable" : "disable"); - } - spin_unlock_irqrestore(&pwm->lock, flags); - - return t; -} -EXPORT_SYMBOL(__pwm_channel_onoff); - -/** - * pwm_clk_alloc - allocate and configure CLKA or CLKB - * @prescale: from 0..10, the power of two used to divide MCK - * @div: from 1..255, the linear divisor to use - * - * Returns PWM_CPR_CLKA, PWM_CPR_CLKB, or negative errno. The allocated - * clock will run with a period of (2^prescale * div) / MCK, or twice as - * long if center aligned PWM output is used. The clock must later be - * deconfigured using pwm_clk_free(). - */ -int pwm_clk_alloc(unsigned prescale, unsigned div) -{ - unsigned long flags; - u32 mr; - u32 val = (prescale << 8) | div; - int ret = -EBUSY; - - if (prescale >= 10 || div == 0 || div > 255) - return -EINVAL; - - spin_lock_irqsave(&pwm->lock, flags); - mr = pwm_readl(pwm, PWM_MR); - if ((mr & 0xffff) == 0) { - mr |= val; - ret = PWM_CPR_CLKA; - } else if ((mr & (0xffff << 16)) == 0) { - mr |= val << 16; - ret = PWM_CPR_CLKB; - } - if (ret > 0) - pwm_writel(pwm, PWM_MR, mr); - spin_unlock_irqrestore(&pwm->lock, flags); - return ret; -} -EXPORT_SYMBOL(pwm_clk_alloc); - -/** - * pwm_clk_free - deconfigure and release CLKA or CLKB - * - * Reverses the effect of pwm_clk_alloc(). - */ -void pwm_clk_free(unsigned clk) -{ - unsigned long flags; - u32 mr; - - spin_lock_irqsave(&pwm->lock, flags); - mr = pwm_readl(pwm, PWM_MR); - if (clk == PWM_CPR_CLKA) - pwm_writel(pwm, PWM_MR, mr & ~(0xffff << 0)); - if (clk == PWM_CPR_CLKB) - pwm_writel(pwm, PWM_MR, mr & ~(0xffff << 16)); - spin_unlock_irqrestore(&pwm->lock, flags); -} -EXPORT_SYMBOL(pwm_clk_free); - -/** - * pwm_channel_handler - manage channel's IRQ handler - * @ch: the channel - * @handler: the handler to use, possibly NULL - * - * If the handler is non-null, the handler will be called after every - * period of this PWM channel. If the handler is null, this channel - * won't generate an IRQ. - */ -int pwm_channel_handler(struct pwm_channel *ch, - void (*handler)(struct pwm_channel *ch)) -{ - unsigned long flags; - int t; - - spin_lock_irqsave(&pwm->lock, flags); - t = pwmcheck(ch); - if (t >= 0) { - pwm->handler[t] = handler; - pwm_writel(pwm, handler ? PWM_IER : PWM_IDR, 1 << t); - t = 0; - } - spin_unlock_irqrestore(&pwm->lock, flags); - - return t; -} -EXPORT_SYMBOL(pwm_channel_handler); - -static irqreturn_t pwm_irq(int id, void *_pwm) -{ - struct pwm *p = _pwm; - irqreturn_t handled = IRQ_NONE; - u32 irqstat; - int index; - - spin_lock(&p->lock); - - /* ack irqs, then handle them */ - irqstat = pwm_readl(pwm, PWM_ISR); - - while (irqstat) { - struct pwm_channel *ch; - void (*handler)(struct pwm_channel *ch); - - index = ffs(irqstat) - 1; - irqstat &= ~(1 << index); - ch = pwm->channel[index]; - handler = pwm->handler[index]; - if (handler && ch) { - spin_unlock(&p->lock); - handler(ch); - spin_lock(&p->lock); - handled = IRQ_HANDLED; - } - } - - spin_unlock(&p->lock); - return handled; -} - -static int __init pwm_probe(struct platform_device *pdev) -{ - struct resource *r = platform_get_resource(pdev, IORESOURCE_MEM, 0); - int irq = platform_get_irq(pdev, 0); - u32 *mp = pdev->dev.platform_data; - struct pwm *p; - int status = -EIO; - - if (pwm) - return -EBUSY; - if (!r || irq < 0 || !mp || !*mp) - return -ENODEV; - if (*mp & ~((1<dev, "mask 0x%x ... more than %d channels\n", - *mp, PWM_NCHAN); - return -EINVAL; - } - - p = kzalloc(sizeof(*p), GFP_KERNEL); - if (!p) - return -ENOMEM; - - spin_lock_init(&p->lock); - p->pdev = pdev; - p->mask = *mp; - p->irq = irq; - p->base = ioremap(r->start, resource_size(r)); - if (!p->base) - goto fail; - p->clk = clk_get(&pdev->dev, "pwm_clk"); - if (IS_ERR(p->clk)) { - status = PTR_ERR(p->clk); - p->clk = NULL; - goto fail; - } - - status = request_irq(irq, pwm_irq, 0, pdev->name, p); - if (status < 0) - goto fail; - - pwm = p; - platform_set_drvdata(pdev, p); - - return 0; - -fail: - if (p->clk) - clk_put(p->clk); - if (p->base) - iounmap(p->base); - - kfree(p); - return status; -} - -static int __exit pwm_remove(struct platform_device *pdev) -{ - struct pwm *p = platform_get_drvdata(pdev); - - if (p != pwm) - return -EINVAL; - - clk_enable(pwm->clk); - pwm_writel(pwm, PWM_DIS, (1 << PWM_NCHAN) - 1); - pwm_writel(pwm, PWM_IDR, (1 << PWM_NCHAN) - 1); - clk_disable(pwm->clk); - - pwm = NULL; - - free_irq(p->irq, p); - clk_put(p->clk); - iounmap(p->base); - kfree(p); - - return 0; -} - -static struct platform_driver atmel_pwm_driver = { - .driver = { - .name = "atmel_pwm", - .owner = THIS_MODULE, - }, - .remove = __exit_p(pwm_remove), - - /* NOTE: PWM can keep running in AVR32 "idle" and "frozen" states; - * and all AT91sam9263 states, albeit at reduced clock rate if - * MCK becomes the slow clock (i.e. what Linux labels STR). - */ -}; - -module_platform_driver_probe(atmel_pwm_driver, pwm_probe); - -MODULE_DESCRIPTION("Driver for AT32/AT91 PWM module"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS("platform:atmel_pwm"); diff --git a/include/linux/atmel_pwm.h b/include/linux/atmel_pwm.h deleted file mode 100644 index ea04abb3db8e..000000000000 --- a/include/linux/atmel_pwm.h +++ /dev/null @@ -1,70 +0,0 @@ -#ifndef __LINUX_ATMEL_PWM_H -#define __LINUX_ATMEL_PWM_H - -/** - * struct pwm_channel - driver handle to a PWM channel - * @regs: base of this channel's registers - * @index: number of this channel (0..31) - * @mck: base clock rate, which can be prescaled and maybe subdivided - * - * Drivers initialize a pwm_channel structure using pwm_channel_alloc(). - * Then they configure its clock rate (derived from MCK), alignment, - * polarity, and duty cycle by writing directly to the channel registers, - * before enabling the channel by calling pwm_channel_enable(). - * - * After emitting a PWM signal for the desired length of time, drivers - * may then pwm_channel_disable() or pwm_channel_free(). Both of these - * disable the channel, but when it's freed the IRQ is deconfigured and - * the channel must later be re-allocated and reconfigured. - * - * Note that if the period or duty cycle need to be changed while the - * PWM channel is operating, drivers must use the PWM_CUPD double buffer - * mechanism, either polling until they change or getting implicitly - * notified through a once-per-period interrupt handler. - */ -struct pwm_channel { - void __iomem *regs; - unsigned index; - unsigned long mck; -}; - -extern int pwm_channel_alloc(int index, struct pwm_channel *ch); -extern int pwm_channel_free(struct pwm_channel *ch); - -extern int pwm_clk_alloc(unsigned prescale, unsigned div); -extern void pwm_clk_free(unsigned clk); - -extern int __pwm_channel_onoff(struct pwm_channel *ch, int enabled); - -#define pwm_channel_enable(ch) __pwm_channel_onoff((ch), 1) -#define pwm_channel_disable(ch) __pwm_channel_onoff((ch), 0) - -/* periodic interrupts, mostly for CUPD changes to period or cycle */ -extern int pwm_channel_handler(struct pwm_channel *ch, - void (*handler)(struct pwm_channel *ch)); - -/* per-channel registers (banked at pwm_channel->regs) */ -#define PWM_CMR 0x00 /* mode register */ -#define PWM_CPR_CPD (1 << 10) /* set: CUPD modifies period */ -#define PWM_CPR_CPOL (1 << 9) /* set: idle high */ -#define PWM_CPR_CALG (1 << 8) /* set: center align */ -#define PWM_CPR_CPRE (0xf << 0) /* mask: rate is mck/(2^pre) */ -#define PWM_CPR_CLKA (0xb << 0) /* rate CLKA */ -#define PWM_CPR_CLKB (0xc << 0) /* rate CLKB */ -#define PWM_CDTY 0x04 /* duty cycle (max of CPRD) */ -#define PWM_CPRD 0x08 /* period (count up from zero) */ -#define PWM_CCNT 0x0c /* counter (20 bits?) */ -#define PWM_CUPD 0x10 /* update CPRD (or CDTY) next period */ - -static inline void -pwm_channel_writel(struct pwm_channel *pwmc, unsigned offset, u32 val) -{ - __raw_writel(val, pwmc->regs + offset); -} - -static inline u32 pwm_channel_readl(struct pwm_channel *pwmc, unsigned offset) -{ - return __raw_readl(pwmc->regs + offset); -} - -#endif /* __LINUX_ATMEL_PWM_H */ -- cgit v1.2.3 From 8cd118308a8649c649533a0133af0ce731d223bb Mon Sep 17 00:00:00 2001 From: Micky Ching Date: Fri, 6 Jun 2014 15:05:44 +0800 Subject: mfd: rtsx: Add dma transfer function rtsx driver using a single function for transfer data, dma map/unmap are placed in one fix function. We need map/unmap dma in different place(for mmc async driver), so add three function for dma map, dma transfer and dma unmap. Signed-off-by: Micky Ching Signed-off-by: Lee Jones --- drivers/mfd/rtsx_pcr.c | 76 ++++++++++++++++++++++++++++---------------- include/linux/mfd/rtsx_pci.h | 6 ++++ 2 files changed, 54 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/drivers/mfd/rtsx_pcr.c b/drivers/mfd/rtsx_pcr.c index 1d15735f9ef9..d01b8c249231 100644 --- a/drivers/mfd/rtsx_pcr.c +++ b/drivers/mfd/rtsx_pcr.c @@ -337,40 +337,64 @@ static void rtsx_pci_add_sg_tbl(struct rtsx_pcr *pcr, int rtsx_pci_transfer_data(struct rtsx_pcr *pcr, struct scatterlist *sglist, int num_sg, bool read, int timeout) { - struct completion trans_done; - u8 dir; - int err = 0, i, count; - long timeleft; - unsigned long flags; - struct scatterlist *sg; - enum dma_data_direction dma_dir; - u32 val; - dma_addr_t addr; - unsigned int len; + int err = 0, count; dev_dbg(&(pcr->pci->dev), "--> %s: num_sg = %d\n", __func__, num_sg); + count = rtsx_pci_dma_map_sg(pcr, sglist, num_sg, read); + if (count < 1) + return -EINVAL; + dev_dbg(&(pcr->pci->dev), "DMA mapping count: %d\n", count); + + err = rtsx_pci_dma_transfer(pcr, sglist, count, read, timeout); + + rtsx_pci_dma_unmap_sg(pcr, sglist, num_sg, read); + + return err; +} +EXPORT_SYMBOL_GPL(rtsx_pci_transfer_data); + +int rtsx_pci_dma_map_sg(struct rtsx_pcr *pcr, struct scatterlist *sglist, + int num_sg, bool read) +{ + enum dma_data_direction dir = read ? DMA_FROM_DEVICE : DMA_TO_DEVICE; - /* don't transfer data during abort processing */ if (pcr->remove_pci) return -EINVAL; if ((sglist == NULL) || (num_sg <= 0)) return -EINVAL; - if (read) { - dir = DEVICE_TO_HOST; - dma_dir = DMA_FROM_DEVICE; - } else { - dir = HOST_TO_DEVICE; - dma_dir = DMA_TO_DEVICE; - } + return dma_map_sg(&(pcr->pci->dev), sglist, num_sg, dir); +} +EXPORT_SYMBOL_GPL(rtsx_pci_dma_map_sg); - count = dma_map_sg(&(pcr->pci->dev), sglist, num_sg, dma_dir); - if (count < 1) { - dev_err(&(pcr->pci->dev), "scatterlist map failed\n"); +void rtsx_pci_dma_unmap_sg(struct rtsx_pcr *pcr, struct scatterlist *sglist, + int num_sg, bool read) +{ + enum dma_data_direction dir = read ? DMA_FROM_DEVICE : DMA_TO_DEVICE; + + dma_unmap_sg(&(pcr->pci->dev), sglist, num_sg, dir); +} +EXPORT_SYMBOL_GPL(rtsx_pci_dma_unmap_sg); + +int rtsx_pci_dma_transfer(struct rtsx_pcr *pcr, struct scatterlist *sglist, + int count, bool read, int timeout) +{ + struct completion trans_done; + struct scatterlist *sg; + dma_addr_t addr; + long timeleft; + unsigned long flags; + unsigned int len; + int i, err = 0; + u32 val; + u8 dir = read ? DEVICE_TO_HOST : HOST_TO_DEVICE; + + if (pcr->remove_pci) + return -ENODEV; + + if ((sglist == NULL) || (count < 1)) return -EINVAL; - } - dev_dbg(&(pcr->pci->dev), "DMA mapping count: %d\n", count); val = ((u32)(dir & 0x01) << 29) | TRIG_DMA | ADMA_MODE; pcr->sgi = 0; @@ -400,12 +424,10 @@ int rtsx_pci_transfer_data(struct rtsx_pcr *pcr, struct scatterlist *sglist, } spin_lock_irqsave(&pcr->lock, flags); - if (pcr->trans_result == TRANS_RESULT_FAIL) err = -EINVAL; else if (pcr->trans_result == TRANS_NO_DEVICE) err = -ENODEV; - spin_unlock_irqrestore(&pcr->lock, flags); out: @@ -413,8 +435,6 @@ out: pcr->done = NULL; spin_unlock_irqrestore(&pcr->lock, flags); - dma_unmap_sg(&(pcr->pci->dev), sglist, num_sg, dma_dir); - if ((err < 0) && (err != -ENODEV)) rtsx_pci_stop_cmd(pcr); @@ -423,7 +443,7 @@ out: return err; } -EXPORT_SYMBOL_GPL(rtsx_pci_transfer_data); +EXPORT_SYMBOL_GPL(rtsx_pci_dma_transfer); int rtsx_pci_read_ppbuf(struct rtsx_pcr *pcr, u8 *buf, int buf_len) { diff --git a/include/linux/mfd/rtsx_pci.h b/include/linux/mfd/rtsx_pci.h index a3835976f7c6..74346d5e7899 100644 --- a/include/linux/mfd/rtsx_pci.h +++ b/include/linux/mfd/rtsx_pci.h @@ -943,6 +943,12 @@ void rtsx_pci_send_cmd_no_wait(struct rtsx_pcr *pcr); int rtsx_pci_send_cmd(struct rtsx_pcr *pcr, int timeout); int rtsx_pci_transfer_data(struct rtsx_pcr *pcr, struct scatterlist *sglist, int num_sg, bool read, int timeout); +int rtsx_pci_dma_map_sg(struct rtsx_pcr *pcr, struct scatterlist *sglist, + int num_sg, bool read); +void rtsx_pci_dma_unmap_sg(struct rtsx_pcr *pcr, struct scatterlist *sglist, + int num_sg, bool read); +int rtsx_pci_dma_transfer(struct rtsx_pcr *pcr, struct scatterlist *sglist, + int count, bool read, int timeout); int rtsx_pci_read_ppbuf(struct rtsx_pcr *pcr, u8 *buf, int buf_len); int rtsx_pci_write_ppbuf(struct rtsx_pcr *pcr, u8 *buf, int buf_len); int rtsx_pci_card_pull_ctl_enable(struct rtsx_pcr *pcr, int card); -- cgit v1.2.3 From bdbc736da636956a40435f5f41d2be6af544c3fb Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Mon, 2 Jun 2014 09:50:43 +0100 Subject: mfd: arizona: Lower ARIZONA_MAX_CORE_SUPPLIES to 2 There are no Arizona devices with 3 core supplies but we define a fix array with space for 3 core supplies. Lower the ARIZONA_MAX_CORE_SUPPLIES define to 2, to save a few bytes. Signed-off-by: Charles Keepax Signed-off-by: Lee Jones --- include/linux/mfd/arizona/core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/arizona/core.h b/include/linux/mfd/arizona/core.h index 6d9371f88875..70854d892760 100644 --- a/include/linux/mfd/arizona/core.h +++ b/include/linux/mfd/arizona/core.h @@ -18,7 +18,7 @@ #include #include -#define ARIZONA_MAX_CORE_SUPPLIES 3 +#define ARIZONA_MAX_CORE_SUPPLIES 2 enum arizona_type { WM5102 = 1, -- cgit v1.2.3 From 10f9edaeaa30468194e1dcd0e47e59b012f4cf8b Mon Sep 17 00:00:00 2001 From: Alexander Shiyan Date: Wed, 18 Jun 2014 21:05:40 +0400 Subject: mfd: mc13xxx: Use regmap irq framework for interrupts This patch convert mc13xxx MFD driver to use regmap irq framework for interrupt registration. Signed-off-by: Alexander Shiyan Signed-off-by: Lee Jones --- drivers/mfd/Kconfig | 1 + drivers/mfd/mc13xxx-core.c | 310 ++++++-------------------------------------- drivers/mfd/mc13xxx.h | 11 +- include/linux/mfd/mc13783.h | 1 - include/linux/mfd/mc13xxx.h | 23 +++- 5 files changed, 65 insertions(+), 281 deletions(-) (limited to 'include/linux') diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index 207c433074af..defe58d65940 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -187,6 +187,7 @@ config MFD_MC13XXX tristate depends on (SPI_MASTER || I2C) select MFD_CORE + select REGMAP_IRQ help Enable support for the Freescale MC13783 and MC13892 PMICs. This driver provides common support for accessing the device, diff --git a/drivers/mfd/mc13xxx-core.c b/drivers/mfd/mc13xxx-core.c index acf5dd712eb2..2b6bc868cd3d 100644 --- a/drivers/mfd/mc13xxx-core.c +++ b/drivers/mfd/mc13xxx-core.c @@ -10,106 +10,18 @@ * Free Software Foundation. */ -#include #include -#include -#include -#include -#include -#include #include #include -#include +#include +#include #include "mc13xxx.h" #define MC13XXX_IRQSTAT0 0 -#define MC13XXX_IRQSTAT0_ADCDONEI (1 << 0) -#define MC13XXX_IRQSTAT0_ADCBISDONEI (1 << 1) -#define MC13XXX_IRQSTAT0_TSI (1 << 2) -#define MC13783_IRQSTAT0_WHIGHI (1 << 3) -#define MC13783_IRQSTAT0_WLOWI (1 << 4) -#define MC13XXX_IRQSTAT0_CHGDETI (1 << 6) -#define MC13783_IRQSTAT0_CHGOVI (1 << 7) -#define MC13XXX_IRQSTAT0_CHGREVI (1 << 8) -#define MC13XXX_IRQSTAT0_CHGSHORTI (1 << 9) -#define MC13XXX_IRQSTAT0_CCCVI (1 << 10) -#define MC13XXX_IRQSTAT0_CHGCURRI (1 << 11) -#define MC13XXX_IRQSTAT0_BPONI (1 << 12) -#define MC13XXX_IRQSTAT0_LOBATLI (1 << 13) -#define MC13XXX_IRQSTAT0_LOBATHI (1 << 14) -#define MC13783_IRQSTAT0_UDPI (1 << 15) -#define MC13783_IRQSTAT0_USBI (1 << 16) -#define MC13783_IRQSTAT0_IDI (1 << 19) -#define MC13783_IRQSTAT0_SE1I (1 << 21) -#define MC13783_IRQSTAT0_CKDETI (1 << 22) -#define MC13783_IRQSTAT0_UDMI (1 << 23) - #define MC13XXX_IRQMASK0 1 -#define MC13XXX_IRQMASK0_ADCDONEM MC13XXX_IRQSTAT0_ADCDONEI -#define MC13XXX_IRQMASK0_ADCBISDONEM MC13XXX_IRQSTAT0_ADCBISDONEI -#define MC13XXX_IRQMASK0_TSM MC13XXX_IRQSTAT0_TSI -#define MC13783_IRQMASK0_WHIGHM MC13783_IRQSTAT0_WHIGHI -#define MC13783_IRQMASK0_WLOWM MC13783_IRQSTAT0_WLOWI -#define MC13XXX_IRQMASK0_CHGDETM MC13XXX_IRQSTAT0_CHGDETI -#define MC13783_IRQMASK0_CHGOVM MC13783_IRQSTAT0_CHGOVI -#define MC13XXX_IRQMASK0_CHGREVM MC13XXX_IRQSTAT0_CHGREVI -#define MC13XXX_IRQMASK0_CHGSHORTM MC13XXX_IRQSTAT0_CHGSHORTI -#define MC13XXX_IRQMASK0_CCCVM MC13XXX_IRQSTAT0_CCCVI -#define MC13XXX_IRQMASK0_CHGCURRM MC13XXX_IRQSTAT0_CHGCURRI -#define MC13XXX_IRQMASK0_BPONM MC13XXX_IRQSTAT0_BPONI -#define MC13XXX_IRQMASK0_LOBATLM MC13XXX_IRQSTAT0_LOBATLI -#define MC13XXX_IRQMASK0_LOBATHM MC13XXX_IRQSTAT0_LOBATHI -#define MC13783_IRQMASK0_UDPM MC13783_IRQSTAT0_UDPI -#define MC13783_IRQMASK0_USBM MC13783_IRQSTAT0_USBI -#define MC13783_IRQMASK0_IDM MC13783_IRQSTAT0_IDI -#define MC13783_IRQMASK0_SE1M MC13783_IRQSTAT0_SE1I -#define MC13783_IRQMASK0_CKDETM MC13783_IRQSTAT0_CKDETI -#define MC13783_IRQMASK0_UDMM MC13783_IRQSTAT0_UDMI - #define MC13XXX_IRQSTAT1 3 -#define MC13XXX_IRQSTAT1_1HZI (1 << 0) -#define MC13XXX_IRQSTAT1_TODAI (1 << 1) -#define MC13783_IRQSTAT1_ONOFD1I (1 << 3) -#define MC13783_IRQSTAT1_ONOFD2I (1 << 4) -#define MC13783_IRQSTAT1_ONOFD3I (1 << 5) -#define MC13XXX_IRQSTAT1_SYSRSTI (1 << 6) -#define MC13XXX_IRQSTAT1_RTCRSTI (1 << 7) -#define MC13XXX_IRQSTAT1_PCI (1 << 8) -#define MC13XXX_IRQSTAT1_WARMI (1 << 9) -#define MC13XXX_IRQSTAT1_MEMHLDI (1 << 10) -#define MC13783_IRQSTAT1_PWRRDYI (1 << 11) -#define MC13XXX_IRQSTAT1_THWARNLI (1 << 12) -#define MC13XXX_IRQSTAT1_THWARNHI (1 << 13) -#define MC13XXX_IRQSTAT1_CLKI (1 << 14) -#define MC13783_IRQSTAT1_SEMAFI (1 << 15) -#define MC13783_IRQSTAT1_MC2BI (1 << 17) -#define MC13783_IRQSTAT1_HSDETI (1 << 18) -#define MC13783_IRQSTAT1_HSLI (1 << 19) -#define MC13783_IRQSTAT1_ALSPTHI (1 << 20) -#define MC13783_IRQSTAT1_AHSSHORTI (1 << 21) - #define MC13XXX_IRQMASK1 4 -#define MC13XXX_IRQMASK1_1HZM MC13XXX_IRQSTAT1_1HZI -#define MC13XXX_IRQMASK1_TODAM MC13XXX_IRQSTAT1_TODAI -#define MC13783_IRQMASK1_ONOFD1M MC13783_IRQSTAT1_ONOFD1I -#define MC13783_IRQMASK1_ONOFD2M MC13783_IRQSTAT1_ONOFD2I -#define MC13783_IRQMASK1_ONOFD3M MC13783_IRQSTAT1_ONOFD3I -#define MC13XXX_IRQMASK1_SYSRSTM MC13XXX_IRQSTAT1_SYSRSTI -#define MC13XXX_IRQMASK1_RTCRSTM MC13XXX_IRQSTAT1_RTCRSTI -#define MC13XXX_IRQMASK1_PCM MC13XXX_IRQSTAT1_PCI -#define MC13XXX_IRQMASK1_WARMM MC13XXX_IRQSTAT1_WARMI -#define MC13XXX_IRQMASK1_MEMHLDM MC13XXX_IRQSTAT1_MEMHLDI -#define MC13783_IRQMASK1_PWRRDYM MC13783_IRQSTAT1_PWRRDYI -#define MC13XXX_IRQMASK1_THWARNLM MC13XXX_IRQSTAT1_THWARNLI -#define MC13XXX_IRQMASK1_THWARNHM MC13XXX_IRQSTAT1_THWARNHI -#define MC13XXX_IRQMASK1_CLKM MC13XXX_IRQSTAT1_CLKI -#define MC13783_IRQMASK1_SEMAFM MC13783_IRQSTAT1_SEMAFI -#define MC13783_IRQMASK1_MC2BM MC13783_IRQSTAT1_MC2BI -#define MC13783_IRQMASK1_HSDETM MC13783_IRQSTAT1_HSDETI -#define MC13783_IRQMASK1_HSLM MC13783_IRQSTAT1_HSLI -#define MC13783_IRQMASK1_ALSPTHM MC13783_IRQSTAT1_ALSPTHI -#define MC13783_IRQMASK1_AHSSHORTM MC13783_IRQSTAT1_AHSSHORTI #define MC13XXX_REVISION 7 #define MC13XXX_REVISION_REVMETAL (0x07 << 0) @@ -189,45 +101,21 @@ EXPORT_SYMBOL(mc13xxx_reg_rmw); int mc13xxx_irq_mask(struct mc13xxx *mc13xxx, int irq) { - int ret; - unsigned int offmask = irq < 24 ? MC13XXX_IRQMASK0 : MC13XXX_IRQMASK1; - u32 irqbit = 1 << (irq < 24 ? irq : irq - 24); - u32 mask; - - if (irq < 0 || irq >= MC13XXX_NUM_IRQ) - return -EINVAL; - - ret = mc13xxx_reg_read(mc13xxx, offmask, &mask); - if (ret) - return ret; + int virq = regmap_irq_get_virq(mc13xxx->irq_data, irq); - if (mask & irqbit) - /* already masked */ - return 0; + disable_irq_nosync(virq); - return mc13xxx_reg_write(mc13xxx, offmask, mask | irqbit); + return 0; } EXPORT_SYMBOL(mc13xxx_irq_mask); int mc13xxx_irq_unmask(struct mc13xxx *mc13xxx, int irq) { - int ret; - unsigned int offmask = irq < 24 ? MC13XXX_IRQMASK0 : MC13XXX_IRQMASK1; - u32 irqbit = 1 << (irq < 24 ? irq : irq - 24); - u32 mask; - - if (irq < 0 || irq >= MC13XXX_NUM_IRQ) - return -EINVAL; + int virq = regmap_irq_get_virq(mc13xxx->irq_data, irq); - ret = mc13xxx_reg_read(mc13xxx, offmask, &mask); - if (ret) - return ret; + enable_irq(virq); - if (!(mask & irqbit)) - /* already unmasked */ - return 0; - - return mc13xxx_reg_write(mc13xxx, offmask, mask & ~irqbit); + return 0; } EXPORT_SYMBOL(mc13xxx_irq_unmask); @@ -239,7 +127,7 @@ int mc13xxx_irq_status(struct mc13xxx *mc13xxx, int irq, unsigned int offstat = irq < 24 ? MC13XXX_IRQSTAT0 : MC13XXX_IRQSTAT1; u32 irqbit = 1 << (irq < 24 ? irq : irq - 24); - if (irq < 0 || irq >= MC13XXX_NUM_IRQ) + if (irq < 0 || irq >= ARRAY_SIZE(mc13xxx->irqs)) return -EINVAL; if (enabled) { @@ -266,147 +154,26 @@ int mc13xxx_irq_status(struct mc13xxx *mc13xxx, int irq, } EXPORT_SYMBOL(mc13xxx_irq_status); -int mc13xxx_irq_ack(struct mc13xxx *mc13xxx, int irq) -{ - unsigned int offstat = irq < 24 ? MC13XXX_IRQSTAT0 : MC13XXX_IRQSTAT1; - unsigned int val = 1 << (irq < 24 ? irq : irq - 24); - - BUG_ON(irq < 0 || irq >= MC13XXX_NUM_IRQ); - - return mc13xxx_reg_write(mc13xxx, offstat, val); -} -EXPORT_SYMBOL(mc13xxx_irq_ack); - -int mc13xxx_irq_request_nounmask(struct mc13xxx *mc13xxx, int irq, - irq_handler_t handler, const char *name, void *dev) -{ - BUG_ON(!mutex_is_locked(&mc13xxx->lock)); - BUG_ON(!handler); - - if (irq < 0 || irq >= MC13XXX_NUM_IRQ) - return -EINVAL; - - if (mc13xxx->irqhandler[irq]) - return -EBUSY; - - mc13xxx->irqhandler[irq] = handler; - mc13xxx->irqdata[irq] = dev; - - return 0; -} -EXPORT_SYMBOL(mc13xxx_irq_request_nounmask); - int mc13xxx_irq_request(struct mc13xxx *mc13xxx, int irq, irq_handler_t handler, const char *name, void *dev) { - int ret; + int virq = regmap_irq_get_virq(mc13xxx->irq_data, irq); - ret = mc13xxx_irq_request_nounmask(mc13xxx, irq, handler, name, dev); - if (ret) - return ret; - - ret = mc13xxx_irq_unmask(mc13xxx, irq); - if (ret) { - mc13xxx->irqhandler[irq] = NULL; - mc13xxx->irqdata[irq] = NULL; - return ret; - } - - return 0; + return devm_request_threaded_irq(mc13xxx->dev, virq, NULL, handler, + 0, name, dev); } EXPORT_SYMBOL(mc13xxx_irq_request); int mc13xxx_irq_free(struct mc13xxx *mc13xxx, int irq, void *dev) { - int ret; - BUG_ON(!mutex_is_locked(&mc13xxx->lock)); + int virq = regmap_irq_get_virq(mc13xxx->irq_data, irq); - if (irq < 0 || irq >= MC13XXX_NUM_IRQ || !mc13xxx->irqhandler[irq] || - mc13xxx->irqdata[irq] != dev) - return -EINVAL; - - ret = mc13xxx_irq_mask(mc13xxx, irq); - if (ret) - return ret; - - mc13xxx->irqhandler[irq] = NULL; - mc13xxx->irqdata[irq] = NULL; + devm_free_irq(mc13xxx->dev, virq, dev); return 0; } EXPORT_SYMBOL(mc13xxx_irq_free); -static inline irqreturn_t mc13xxx_irqhandler(struct mc13xxx *mc13xxx, int irq) -{ - return mc13xxx->irqhandler[irq](irq, mc13xxx->irqdata[irq]); -} - -/* - * returns: number of handled irqs or negative error - * locking: holds mc13xxx->lock - */ -static int mc13xxx_irq_handle(struct mc13xxx *mc13xxx, - unsigned int offstat, unsigned int offmask, int baseirq) -{ - u32 stat, mask; - int ret = mc13xxx_reg_read(mc13xxx, offstat, &stat); - int num_handled = 0; - - if (ret) - return ret; - - ret = mc13xxx_reg_read(mc13xxx, offmask, &mask); - if (ret) - return ret; - - while (stat & ~mask) { - int irq = __ffs(stat & ~mask); - - stat &= ~(1 << irq); - - if (likely(mc13xxx->irqhandler[baseirq + irq])) { - irqreturn_t handled; - - handled = mc13xxx_irqhandler(mc13xxx, baseirq + irq); - if (handled == IRQ_HANDLED) - num_handled++; - } else { - dev_err(mc13xxx->dev, - "BUG: irq %u but no handler\n", - baseirq + irq); - - mask |= 1 << irq; - - ret = mc13xxx_reg_write(mc13xxx, offmask, mask); - } - } - - return num_handled; -} - -static irqreturn_t mc13xxx_irq_thread(int irq, void *data) -{ - struct mc13xxx *mc13xxx = data; - irqreturn_t ret; - int handled = 0; - - mc13xxx_lock(mc13xxx); - - ret = mc13xxx_irq_handle(mc13xxx, MC13XXX_IRQSTAT0, - MC13XXX_IRQMASK0, 0); - if (ret > 0) - handled = 1; - - ret = mc13xxx_irq_handle(mc13xxx, MC13XXX_IRQSTAT1, - MC13XXX_IRQMASK1, 24); - if (ret > 0) - handled = 1; - - mc13xxx_unlock(mc13xxx); - - return IRQ_RETVAL(handled); -} - #define maskval(reg, mask) (((reg) & (mask)) >> __ffs(mask)) static void mc13xxx_print_revision(struct mc13xxx *mc13xxx, u32 revision) { @@ -475,8 +242,6 @@ static irqreturn_t mc13xxx_handler_adcdone(int irq, void *data) { struct mc13xxx_adcdone_data *adcdone_data = data; - mc13xxx_irq_ack(adcdone_data->mc13xxx, irq); - complete_all(&adcdone_data->done); return IRQ_HANDLED; @@ -544,7 +309,6 @@ int mc13xxx_adc_do_conversion(struct mc13xxx *mc13xxx, unsigned int mode, dev_dbg(mc13xxx->dev, "%s: request irq\n", __func__); mc13xxx_irq_request(mc13xxx, MC13XXX_IRQ_ADCDONE, mc13xxx_handler_adcdone, __func__, &adcdone_data); - mc13xxx_irq_ack(mc13xxx, MC13XXX_IRQ_ADCDONE); mc13xxx_reg_write(mc13xxx, MC13XXX_ADC0, adc0); mc13xxx_reg_write(mc13xxx, MC13XXX_ADC1, adc1); @@ -599,7 +363,8 @@ static int mc13xxx_add_subdevice_pdata(struct mc13xxx *mc13xxx, if (!cell.name) return -ENOMEM; - return mfd_add_devices(mc13xxx->dev, -1, &cell, 1, NULL, 0, NULL); + return mfd_add_devices(mc13xxx->dev, -1, &cell, 1, NULL, 0, + regmap_irq_get_domain(mc13xxx->irq_data)); } static int mc13xxx_add_subdevice(struct mc13xxx *mc13xxx, const char *format) @@ -640,8 +405,8 @@ int mc13xxx_common_init(struct device *dev) { struct mc13xxx_platform_data *pdata = dev_get_platdata(dev); struct mc13xxx *mc13xxx = dev_get_drvdata(dev); - int ret; u32 revision; + int i, ret; mc13xxx->dev = dev; @@ -651,31 +416,32 @@ int mc13xxx_common_init(struct device *dev) mc13xxx->variant->print_revision(mc13xxx, revision); - /* mask all irqs */ - ret = mc13xxx_reg_write(mc13xxx, MC13XXX_IRQMASK0, 0x00ffffff); - if (ret) - return ret; + for (i = 0; i < ARRAY_SIZE(mc13xxx->irqs); i++) { + mc13xxx->irqs[i].reg_offset = i / MC13XXX_IRQ_PER_REG; + mc13xxx->irqs[i].mask = BIT(i % MC13XXX_IRQ_PER_REG); + } - ret = mc13xxx_reg_write(mc13xxx, MC13XXX_IRQMASK1, 0x00ffffff); + mc13xxx->irq_chip.name = dev_name(dev); + mc13xxx->irq_chip.status_base = MC13XXX_IRQSTAT0; + mc13xxx->irq_chip.mask_base = MC13XXX_IRQMASK0; + mc13xxx->irq_chip.ack_base = MC13XXX_IRQSTAT0; + mc13xxx->irq_chip.irq_reg_stride = MC13XXX_IRQSTAT1 - MC13XXX_IRQSTAT0; + mc13xxx->irq_chip.init_ack_masked = true; + mc13xxx->irq_chip.use_ack = true; + mc13xxx->irq_chip.num_regs = MC13XXX_IRQ_REG_CNT; + mc13xxx->irq_chip.irqs = mc13xxx->irqs; + mc13xxx->irq_chip.num_irqs = ARRAY_SIZE(mc13xxx->irqs); + + ret = regmap_add_irq_chip(mc13xxx->regmap, mc13xxx->irq, IRQF_ONESHOT, + 0, &mc13xxx->irq_chip, &mc13xxx->irq_data); if (ret) return ret; mutex_init(&mc13xxx->lock); - ret = request_threaded_irq(mc13xxx->irq, NULL, mc13xxx_irq_thread, - IRQF_ONESHOT | IRQF_TRIGGER_HIGH, "mc13xxx", mc13xxx); - if (ret) - return ret; - if (mc13xxx_probe_flags_dt(mc13xxx) < 0 && pdata) mc13xxx->flags = pdata->flags; - if (mc13xxx->flags & MC13XXX_USE_ADC) - mc13xxx_add_subdevice(mc13xxx, "%s-adc"); - - if (mc13xxx->flags & MC13XXX_USE_RTC) - mc13xxx_add_subdevice(mc13xxx, "%s-rtc"); - if (pdata) { mc13xxx_add_subdevice_pdata(mc13xxx, "%s-regulator", &pdata->regulators, sizeof(pdata->regulators)); @@ -699,6 +465,12 @@ int mc13xxx_common_init(struct device *dev) mc13xxx_add_subdevice(mc13xxx, "%s-ts"); } + if (mc13xxx->flags & MC13XXX_USE_ADC) + mc13xxx_add_subdevice(mc13xxx, "%s-adc"); + + if (mc13xxx->flags & MC13XXX_USE_RTC) + mc13xxx_add_subdevice(mc13xxx, "%s-rtc"); + return 0; } EXPORT_SYMBOL_GPL(mc13xxx_common_init); @@ -707,8 +479,8 @@ int mc13xxx_common_exit(struct device *dev) { struct mc13xxx *mc13xxx = dev_get_drvdata(dev); - free_irq(mc13xxx->irq, mc13xxx); mfd_remove_devices(dev); + regmap_del_irq_chip(mc13xxx->irq, mc13xxx->irq_data); mutex_destroy(&mc13xxx->lock); return 0; diff --git a/drivers/mfd/mc13xxx.h b/drivers/mfd/mc13xxx.h index ae7f1659f5d1..33677d1dcf66 100644 --- a/drivers/mfd/mc13xxx.h +++ b/drivers/mfd/mc13xxx.h @@ -13,7 +13,9 @@ #include #include -#define MC13XXX_NUMREGS 0x3f +#define MC13XXX_NUMREGS 0x3f +#define MC13XXX_IRQ_REG_CNT 2 +#define MC13XXX_IRQ_PER_REG 24 struct mc13xxx; @@ -33,13 +35,14 @@ struct mc13xxx { struct device *dev; const struct mc13xxx_variant *variant; + struct regmap_irq irqs[MC13XXX_IRQ_PER_REG * MC13XXX_IRQ_REG_CNT]; + struct regmap_irq_chip irq_chip; + struct regmap_irq_chip_data *irq_data; + struct mutex lock; int irq; int flags; - irq_handler_t irqhandler[MC13XXX_NUM_IRQ]; - void *irqdata[MC13XXX_NUM_IRQ]; - int adcflags; }; diff --git a/include/linux/mfd/mc13783.h b/include/linux/mfd/mc13783.h index a8eeda773a7b..4ff6137d8d67 100644 --- a/include/linux/mfd/mc13783.h +++ b/include/linux/mfd/mc13783.h @@ -86,6 +86,5 @@ #define MC13783_IRQ_HSL 43 #define MC13783_IRQ_ALSPTH 44 #define MC13783_IRQ_AHSSHORT 45 -#define MC13783_NUM_IRQ MC13XXX_NUM_IRQ #endif /* ifndef __LINUX_MFD_MC13783_H */ diff --git a/include/linux/mfd/mc13xxx.h b/include/linux/mfd/mc13xxx.h index d63b1d309106..638222e43e48 100644 --- a/include/linux/mfd/mc13xxx.h +++ b/include/linux/mfd/mc13xxx.h @@ -23,15 +23,10 @@ int mc13xxx_reg_rmw(struct mc13xxx *mc13xxx, unsigned int offset, int mc13xxx_irq_request(struct mc13xxx *mc13xxx, int irq, irq_handler_t handler, const char *name, void *dev); -int mc13xxx_irq_request_nounmask(struct mc13xxx *mc13xxx, int irq, - irq_handler_t handler, const char *name, void *dev); int mc13xxx_irq_free(struct mc13xxx *mc13xxx, int irq, void *dev); -int mc13xxx_irq_mask(struct mc13xxx *mc13xxx, int irq); -int mc13xxx_irq_unmask(struct mc13xxx *mc13xxx, int irq); int mc13xxx_irq_status(struct mc13xxx *mc13xxx, int irq, int *enabled, int *pending); -int mc13xxx_irq_ack(struct mc13xxx *mc13xxx, int irq); int mc13xxx_get_flags(struct mc13xxx *mc13xxx); @@ -39,6 +34,22 @@ int mc13xxx_adc_do_conversion(struct mc13xxx *mc13xxx, unsigned int mode, unsigned int channel, u8 ato, bool atox, unsigned int *sample); +/* Deprecated calls */ +static inline int mc13xxx_irq_ack(struct mc13xxx *mc13xxx, int irq) +{ + return 0; +} + +static inline int mc13xxx_irq_request_nounmask(struct mc13xxx *mc13xxx, int irq, + irq_handler_t handler, + const char *name, void *dev) +{ + return mc13xxx_irq_request(mc13xxx, irq, handler, name, dev); +} + +int mc13xxx_irq_mask(struct mc13xxx *mc13xxx, int irq); +int mc13xxx_irq_unmask(struct mc13xxx *mc13xxx, int irq); + #define MC13783_AUDIO_RX0 36 #define MC13783_AUDIO_RX1 37 #define MC13783_AUDIO_TX 38 @@ -68,8 +79,6 @@ int mc13xxx_adc_do_conversion(struct mc13xxx *mc13xxx, #define MC13XXX_IRQ_THWARNH 37 #define MC13XXX_IRQ_CLK 38 -#define MC13XXX_NUM_IRQ 46 - struct regulator_init_data; struct mc13xxx_regulator_init_data { -- cgit v1.2.3 From 54e8827d5f0e66d152ef63e7958030ef4880cd85 Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Wed, 25 Jun 2014 16:14:44 +0900 Subject: mfd: sec-core: Add support for S2MPU02 device Add support for Samsung S2MPU02 PMIC device to the MFD sec-core driver. The S2MPU02 device includes PMIC/RTC/Clock devices. Signed-off-by: Chanwoo Choi Reviewed-by: Krzysztof Kozlowski Signed-off-by: Lee Jones --- drivers/mfd/sec-core.c | 46 ++++++--- drivers/mfd/sec-irq.c | 110 +++++++++++++++++--- include/linux/mfd/samsung/core.h | 1 + include/linux/mfd/samsung/irq.h | 24 +++++ include/linux/mfd/samsung/s2mpu02.h | 201 ++++++++++++++++++++++++++++++++++++ 5 files changed, 352 insertions(+), 30 deletions(-) create mode 100644 include/linux/mfd/samsung/s2mpu02.h (limited to 'include/linux') diff --git a/drivers/mfd/sec-core.c b/drivers/mfd/sec-core.c index be06d0abbf19..15ba847b3d23 100644 --- a/drivers/mfd/sec-core.c +++ b/drivers/mfd/sec-core.c @@ -89,6 +89,15 @@ static const struct mfd_cell s2mpa01_devs[] = { }, }; +static const struct mfd_cell s2mpu02_devs[] = { + { .name = "s2mpu02-pmic", }, + { .name = "s2mpu02-rtc", }, + { + .name = "s2mpu02-clk", + .of_compatible = "samsung,s2mpu02-clk", + } +}; + #ifdef CONFIG_OF static const struct of_device_id sec_dt_match[] = { { .compatible = "samsung,s5m8767-pmic", @@ -102,6 +111,9 @@ static const struct of_device_id sec_dt_match[] = { }, { .compatible = "samsung,s2mpa01-pmic", .data = (void *)S2MPA01, + }, { + .compatible = "samsung,s2mpu02-pmic", + .data = (void *)S2MPU02, }, { /* Sentinel */ }, @@ -250,9 +262,10 @@ static int sec_pmic_probe(struct i2c_client *i2c, { struct sec_platform_data *pdata = dev_get_platdata(&i2c->dev); const struct regmap_config *regmap; + const struct mfd_cell *sec_devs; struct sec_pmic_dev *sec_pmic; unsigned long device_type; - int ret; + int ret, num_sec_devs; sec_pmic = devm_kzalloc(&i2c->dev, sizeof(struct sec_pmic_dev), GFP_KERNEL); @@ -319,34 +332,39 @@ static int sec_pmic_probe(struct i2c_client *i2c, switch (sec_pmic->device_type) { case S5M8751X: - ret = mfd_add_devices(sec_pmic->dev, -1, s5m8751_devs, - ARRAY_SIZE(s5m8751_devs), NULL, 0, NULL); + sec_devs = s5m8751_devs; + num_sec_devs = ARRAY_SIZE(s5m8751_devs); break; case S5M8763X: - ret = mfd_add_devices(sec_pmic->dev, -1, s5m8763_devs, - ARRAY_SIZE(s5m8763_devs), NULL, 0, NULL); + sec_devs = s5m8763_devs; + num_sec_devs = ARRAY_SIZE(s5m8763_devs); break; case S5M8767X: - ret = mfd_add_devices(sec_pmic->dev, -1, s5m8767_devs, - ARRAY_SIZE(s5m8767_devs), NULL, 0, NULL); + sec_devs = s5m8767_devs; + num_sec_devs = ARRAY_SIZE(s5m8767_devs); break; case S2MPA01: - ret = mfd_add_devices(sec_pmic->dev, -1, s2mpa01_devs, - ARRAY_SIZE(s2mpa01_devs), NULL, 0, NULL); + sec_devs = s2mpa01_devs; + num_sec_devs = ARRAY_SIZE(s2mpa01_devs); break; case S2MPS11X: - ret = mfd_add_devices(sec_pmic->dev, -1, s2mps11_devs, - ARRAY_SIZE(s2mps11_devs), NULL, 0, NULL); + sec_devs = s2mps11_devs; + num_sec_devs = ARRAY_SIZE(s2mps11_devs); break; case S2MPS14X: - ret = mfd_add_devices(sec_pmic->dev, -1, s2mps14_devs, - ARRAY_SIZE(s2mps14_devs), NULL, 0, NULL); + sec_devs = s2mps14_devs; + num_sec_devs = ARRAY_SIZE(s2mps14_devs); + break; + case S2MPU02: + sec_devs = s2mpu02_devs; + num_sec_devs = ARRAY_SIZE(s2mpu02_devs); break; default: /* If this happens the probe function is problem */ BUG(); } - + ret = mfd_add_devices(sec_pmic->dev, -1, sec_devs, num_sec_devs, NULL, + 0, NULL); if (ret) goto err_mfd; diff --git a/drivers/mfd/sec-irq.c b/drivers/mfd/sec-irq.c index 654e2c1dbf7a..f9a57869e3ec 100644 --- a/drivers/mfd/sec-irq.c +++ b/drivers/mfd/sec-irq.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -161,6 +162,77 @@ static const struct regmap_irq s2mps14_irqs[] = { }, }; +static const struct regmap_irq s2mpu02_irqs[] = { + [S2MPU02_IRQ_PWRONF] = { + .reg_offset = 0, + .mask = S2MPS11_IRQ_PWRONF_MASK, + }, + [S2MPU02_IRQ_PWRONR] = { + .reg_offset = 0, + .mask = S2MPS11_IRQ_PWRONR_MASK, + }, + [S2MPU02_IRQ_JIGONBF] = { + .reg_offset = 0, + .mask = S2MPS11_IRQ_JIGONBF_MASK, + }, + [S2MPU02_IRQ_JIGONBR] = { + .reg_offset = 0, + .mask = S2MPS11_IRQ_JIGONBR_MASK, + }, + [S2MPU02_IRQ_ACOKBF] = { + .reg_offset = 0, + .mask = S2MPS11_IRQ_ACOKBF_MASK, + }, + [S2MPU02_IRQ_ACOKBR] = { + .reg_offset = 0, + .mask = S2MPS11_IRQ_ACOKBR_MASK, + }, + [S2MPU02_IRQ_PWRON1S] = { + .reg_offset = 0, + .mask = S2MPS11_IRQ_PWRON1S_MASK, + }, + [S2MPU02_IRQ_MRB] = { + .reg_offset = 0, + .mask = S2MPS11_IRQ_MRB_MASK, + }, + [S2MPU02_IRQ_RTC60S] = { + .reg_offset = 1, + .mask = S2MPS11_IRQ_RTC60S_MASK, + }, + [S2MPU02_IRQ_RTCA1] = { + .reg_offset = 1, + .mask = S2MPS11_IRQ_RTCA1_MASK, + }, + [S2MPU02_IRQ_RTCA0] = { + .reg_offset = 1, + .mask = S2MPS11_IRQ_RTCA0_MASK, + }, + [S2MPU02_IRQ_SMPL] = { + .reg_offset = 1, + .mask = S2MPS11_IRQ_SMPL_MASK, + }, + [S2MPU02_IRQ_RTC1S] = { + .reg_offset = 1, + .mask = S2MPS11_IRQ_RTC1S_MASK, + }, + [S2MPU02_IRQ_WTSR] = { + .reg_offset = 1, + .mask = S2MPS11_IRQ_WTSR_MASK, + }, + [S2MPU02_IRQ_INT120C] = { + .reg_offset = 2, + .mask = S2MPS11_IRQ_INT120C_MASK, + }, + [S2MPU02_IRQ_INT140C] = { + .reg_offset = 2, + .mask = S2MPS11_IRQ_INT140C_MASK, + }, + [S2MPU02_IRQ_TSD] = { + .reg_offset = 2, + .mask = S2MPS14_IRQ_TSD_MASK, + }, +}; + static const struct regmap_irq s5m8767_irqs[] = { [S5M8767_IRQ_PWRR] = { .reg_offset = 0, @@ -327,6 +399,16 @@ static const struct regmap_irq_chip s2mps14_irq_chip = { .ack_base = S2MPS14_REG_INT1, }; +static const struct regmap_irq_chip s2mpu02_irq_chip = { + .name = "s2mpu02", + .irqs = s2mpu02_irqs, + .num_irqs = ARRAY_SIZE(s2mpu02_irqs), + .num_regs = 3, + .status_base = S2MPU02_REG_INT1, + .mask_base = S2MPU02_REG_INT1M, + .ack_base = S2MPU02_REG_INT1, +}; + static const struct regmap_irq_chip s5m8767_irq_chip = { .name = "s5m8767", .irqs = s5m8767_irqs, @@ -351,6 +433,7 @@ int sec_irq_init(struct sec_pmic_dev *sec_pmic) { int ret = 0; int type = sec_pmic->device_type; + const struct regmap_irq_chip *sec_irq_chip; if (!sec_pmic->irq) { dev_warn(sec_pmic->dev, @@ -361,28 +444,19 @@ int sec_irq_init(struct sec_pmic_dev *sec_pmic) switch (type) { case S5M8763X: - ret = regmap_add_irq_chip(sec_pmic->regmap_pmic, sec_pmic->irq, - IRQF_TRIGGER_FALLING | IRQF_ONESHOT, - sec_pmic->irq_base, &s5m8763_irq_chip, - &sec_pmic->irq_data); + sec_irq_chip = &s5m8763_irq_chip; break; case S5M8767X: - ret = regmap_add_irq_chip(sec_pmic->regmap_pmic, sec_pmic->irq, - IRQF_TRIGGER_FALLING | IRQF_ONESHOT, - sec_pmic->irq_base, &s5m8767_irq_chip, - &sec_pmic->irq_data); + sec_irq_chip = &s5m8767_irq_chip; break; case S2MPS11X: - ret = regmap_add_irq_chip(sec_pmic->regmap_pmic, sec_pmic->irq, - IRQF_TRIGGER_FALLING | IRQF_ONESHOT, - sec_pmic->irq_base, &s2mps11_irq_chip, - &sec_pmic->irq_data); + sec_irq_chip = &s2mps11_irq_chip; break; case S2MPS14X: - ret = regmap_add_irq_chip(sec_pmic->regmap_pmic, sec_pmic->irq, - IRQF_TRIGGER_FALLING | IRQF_ONESHOT, - sec_pmic->irq_base, &s2mps14_irq_chip, - &sec_pmic->irq_data); + sec_irq_chip = &s2mps14_irq_chip; + break; + case S2MPU02: + sec_irq_chip = &s2mpu02_irq_chip; break; default: dev_err(sec_pmic->dev, "Unknown device type %lu\n", @@ -390,6 +464,10 @@ int sec_irq_init(struct sec_pmic_dev *sec_pmic) return -EINVAL; } + ret = regmap_add_irq_chip(sec_pmic->regmap_pmic, sec_pmic->irq, + IRQF_TRIGGER_FALLING | IRQF_ONESHOT, + sec_pmic->irq_base, sec_irq_chip, + &sec_pmic->irq_data); if (ret != 0) { dev_err(sec_pmic->dev, "Failed to register IRQ chip: %d\n", ret); return ret; diff --git a/include/linux/mfd/samsung/core.h b/include/linux/mfd/samsung/core.h index 47d84242940b..b5f73de81aad 100644 --- a/include/linux/mfd/samsung/core.h +++ b/include/linux/mfd/samsung/core.h @@ -21,6 +21,7 @@ enum sec_device_type { S2MPA01, S2MPS11X, S2MPS14X, + S2MPU02, }; /** diff --git a/include/linux/mfd/samsung/irq.h b/include/linux/mfd/samsung/irq.h index 1224f447356b..f35af7361b60 100644 --- a/include/linux/mfd/samsung/irq.h +++ b/include/linux/mfd/samsung/irq.h @@ -129,6 +129,30 @@ enum s2mps14_irq { S2MPS14_IRQ_NR, }; +enum s2mpu02_irq { + S2MPU02_IRQ_PWRONF, + S2MPU02_IRQ_PWRONR, + S2MPU02_IRQ_JIGONBF, + S2MPU02_IRQ_JIGONBR, + S2MPU02_IRQ_ACOKBF, + S2MPU02_IRQ_ACOKBR, + S2MPU02_IRQ_PWRON1S, + S2MPU02_IRQ_MRB, + + S2MPU02_IRQ_RTC60S, + S2MPU02_IRQ_RTCA1, + S2MPU02_IRQ_RTCA0, + S2MPU02_IRQ_SMPL, + S2MPU02_IRQ_RTC1S, + S2MPU02_IRQ_WTSR, + + S2MPU02_IRQ_INT120C, + S2MPU02_IRQ_INT140C, + S2MPU02_IRQ_TSD, + + S2MPU02_IRQ_NR, +}; + /* Masks for interrupts are the same as in s2mps11 */ #define S2MPS14_IRQ_TSD_MASK (1 << 2) diff --git a/include/linux/mfd/samsung/s2mpu02.h b/include/linux/mfd/samsung/s2mpu02.h new file mode 100644 index 000000000000..47ae9bc583a7 --- /dev/null +++ b/include/linux/mfd/samsung/s2mpu02.h @@ -0,0 +1,201 @@ +/* + * s2mpu02.h + * + * Copyright (c) 2014 Samsung Electronics Co., Ltd + * http://www.samsung.com + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef __LINUX_MFD_S2MPU02_H +#define __LINUX_MFD_S2MPU02_H + +/* S2MPU02 registers */ +enum S2MPU02_reg { + S2MPU02_REG_ID, + S2MPU02_REG_INT1, + S2MPU02_REG_INT2, + S2MPU02_REG_INT3, + S2MPU02_REG_INT1M, + S2MPU02_REG_INT2M, + S2MPU02_REG_INT3M, + S2MPU02_REG_ST1, + S2MPU02_REG_ST2, + S2MPU02_REG_PWRONSRC, + S2MPU02_REG_OFFSRC, + S2MPU02_REG_BU_CHG, + S2MPU02_REG_RTCCTRL, + S2MPU02_REG_PMCTRL1, + S2MPU02_REG_RSVD1, + S2MPU02_REG_RSVD2, + S2MPU02_REG_RSVD3, + S2MPU02_REG_RSVD4, + S2MPU02_REG_RSVD5, + S2MPU02_REG_RSVD6, + S2MPU02_REG_RSVD7, + S2MPU02_REG_WRSTEN, + S2MPU02_REG_RSVD8, + S2MPU02_REG_RSVD9, + S2MPU02_REG_RSVD10, + S2MPU02_REG_B1CTRL1, + S2MPU02_REG_B1CTRL2, + S2MPU02_REG_B2CTRL1, + S2MPU02_REG_B2CTRL2, + S2MPU02_REG_B3CTRL1, + S2MPU02_REG_B3CTRL2, + S2MPU02_REG_B4CTRL1, + S2MPU02_REG_B4CTRL2, + S2MPU02_REG_B5CTRL1, + S2MPU02_REG_B5CTRL2, + S2MPU02_REG_B5CTRL3, + S2MPU02_REG_B5CTRL4, + S2MPU02_REG_B5CTRL5, + S2MPU02_REG_B6CTRL1, + S2MPU02_REG_B6CTRL2, + S2MPU02_REG_B7CTRL1, + S2MPU02_REG_B7CTRL2, + S2MPU02_REG_RAMP1, + S2MPU02_REG_RAMP2, + S2MPU02_REG_L1CTRL, + S2MPU02_REG_L2CTRL1, + S2MPU02_REG_L2CTRL2, + S2MPU02_REG_L2CTRL3, + S2MPU02_REG_L2CTRL4, + S2MPU02_REG_L3CTRL, + S2MPU02_REG_L4CTRL, + S2MPU02_REG_L5CTRL, + S2MPU02_REG_L6CTRL, + S2MPU02_REG_L7CTRL, + S2MPU02_REG_L8CTRL, + S2MPU02_REG_L9CTRL, + S2MPU02_REG_L10CTRL, + S2MPU02_REG_L11CTRL, + S2MPU02_REG_L12CTRL, + S2MPU02_REG_L13CTRL, + S2MPU02_REG_L14CTRL, + S2MPU02_REG_L15CTRL, + S2MPU02_REG_L16CTRL, + S2MPU02_REG_L17CTRL, + S2MPU02_REG_L18CTRL, + S2MPU02_REG_L19CTRL, + S2MPU02_REG_L20CTRL, + S2MPU02_REG_L21CTRL, + S2MPU02_REG_L22CTRL, + S2MPU02_REG_L23CTRL, + S2MPU02_REG_L24CTRL, + S2MPU02_REG_L25CTRL, + S2MPU02_REG_L26CTRL, + S2MPU02_REG_L27CTRL, + S2MPU02_REG_L28CTRL, + S2MPU02_REG_LDODSCH1, + S2MPU02_REG_LDODSCH2, + S2MPU02_REG_LDODSCH3, + S2MPU02_REG_LDODSCH4, + S2MPU02_REG_SELMIF, + S2MPU02_REG_RSVD11, + S2MPU02_REG_RSVD12, + S2MPU02_REG_RSVD13, + S2MPU02_REG_DVSSEL, + S2MPU02_REG_DVSPTR, + S2MPU02_REG_DVSDATA, +}; + +/* S2MPU02 regulator ids */ +enum S2MPU02_regulators { + S2MPU02_LDO1, + S2MPU02_LDO2, + S2MPU02_LDO3, + S2MPU02_LDO4, + S2MPU02_LDO5, + S2MPU02_LDO6, + S2MPU02_LDO7, + S2MPU02_LDO8, + S2MPU02_LDO9, + S2MPU02_LDO10, + S2MPU02_LDO11, + S2MPU02_LDO12, + S2MPU02_LDO13, + S2MPU02_LDO14, + S2MPU02_LDO15, + S2MPU02_LDO16, + S2MPU02_LDO17, + S2MPU02_LDO18, + S2MPU02_LDO19, + S2MPU02_LDO20, + S2MPU02_LDO21, + S2MPU02_LDO22, + S2MPU02_LDO23, + S2MPU02_LDO24, + S2MPU02_LDO25, + S2MPU02_LDO26, + S2MPU02_LDO27, + S2MPU02_LDO28, + S2MPU02_BUCK1, + S2MPU02_BUCK2, + S2MPU02_BUCK3, + S2MPU02_BUCK4, + S2MPU02_BUCK5, + S2MPU02_BUCK6, + S2MPU02_BUCK7, + + S2MPU02_REGULATOR_MAX, +}; + +/* Regulator constraints for BUCKx */ +#define S2MPU02_BUCK1234_MIN_600MV 600000 +#define S2MPU02_BUCK5_MIN_1081_25MV 1081250 +#define S2MPU02_BUCK6_MIN_1700MV 1700000 +#define S2MPU02_BUCK7_MIN_900MV 900000 + +#define S2MPU02_BUCK1234_STEP_6_25MV 6250 +#define S2MPU02_BUCK5_STEP_6_25MV 6250 +#define S2MPU02_BUCK6_STEP_2_50MV 2500 +#define S2MPU02_BUCK7_STEP_6_25MV 6250 + +#define S2MPU02_BUCK1234_START_SEL 0x00 +#define S2MPU02_BUCK5_START_SEL 0x4D +#define S2MPU02_BUCK6_START_SEL 0x28 +#define S2MPU02_BUCK7_START_SEL 0x30 + +#define S2MPU02_BUCK_RAMP_DELAY 12500 + +/* Regulator constraints for different types of LDOx */ +#define S2MPU02_LDO_MIN_900MV 900000 +#define S2MPU02_LDO_MIN_1050MV 1050000 +#define S2MPU02_LDO_MIN_1600MV 1600000 +#define S2MPU02_LDO_STEP_12_5MV 12500 +#define S2MPU02_LDO_STEP_25MV 25000 +#define S2MPU02_LDO_STEP_50MV 50000 + +#define S2MPU02_LDO_GROUP1_START_SEL 0x8 +#define S2MPU02_LDO_GROUP2_START_SEL 0xA +#define S2MPU02_LDO_GROUP3_START_SEL 0x10 + +#define S2MPU02_LDO_VSEL_MASK 0x3F +#define S2MPU02_BUCK_VSEL_MASK 0xFF +#define S2MPU02_ENABLE_MASK (0x03 << S2MPU02_ENABLE_SHIFT) +#define S2MPU02_ENABLE_SHIFT 6 + +/* On/Off controlled by PWREN */ +#define S2MPU02_ENABLE_SUSPEND (0x01 << S2MPU02_ENABLE_SHIFT) +#define S2MPU02_DISABLE_SUSPEND (0x11 << S2MPU02_ENABLE_SHIFT) +#define S2MPU02_LDO_N_VOLTAGES (S2MPU02_LDO_VSEL_MASK + 1) +#define S2MPU02_BUCK_N_VOLTAGES (S2MPU02_BUCK_VSEL_MASK + 1) + +/* RAMP delay for BUCK1234*/ +#define S2MPU02_BUCK1_RAMP_SHIFT 6 +#define S2MPU02_BUCK2_RAMP_SHIFT 4 +#define S2MPU02_BUCK3_RAMP_SHIFT 2 +#define S2MPU02_BUCK4_RAMP_SHIFT 0 +#define S2MPU02_BUCK1234_RAMP_MASK 0x3 + +#endif /* __LINUX_MFD_S2MPU02_H */ -- cgit v1.2.3 From ee98662ec914a23ab826b0c83797aa9414f737bc Mon Sep 17 00:00:00 2001 From: Bill Richardson Date: Wed, 18 Jun 2014 11:13:58 -0700 Subject: mfd: cros_ec: Fix the comment on cros_ec_remove() This comment was incorrect, so update it. Signed-off-by: Bill Richardson Signed-off-by: Simon Glass Signed-off-by: Doug Anderson Signed-off-by: Lee Jones --- include/linux/mfd/cros_ec.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h index 887ef4f7bef7..7e9fe6e98d2f 100644 --- a/include/linux/mfd/cros_ec.h +++ b/include/linux/mfd/cros_ec.h @@ -148,8 +148,7 @@ int cros_ec_prepare_tx(struct cros_ec_device *ec_dev, /** * cros_ec_remove - Remove a ChromeOS EC * - * Call this to deregister a ChromeOS EC. After this you should call - * cros_ec_free(). + * Call this to deregister a ChromeOS EC, then clean up any private data. * * @ec_dev: Device to register * @return 0 if ok, -ve on error -- cgit v1.2.3 From 2ce701ae4e351d9407ec0b30f5f9dd56b6de4292 Mon Sep 17 00:00:00 2001 From: Bill Richardson Date: Wed, 18 Jun 2014 11:13:59 -0700 Subject: mfd: cros_ec: Allow static din/dout buffers with cros_ec_register() The lower-level driver may want to provide its own buffers. If so, there's no need to allocate new ones. This already happens to work just fine (since we check for size of 0 and use devm allocation), but it's good to document it. [dianders: Resolved conflicts; documented that no code changes needed on mainline] Signed-off-by: Bill Richardson Signed-off-by: Doug Anderson Reviewed-by: Simon Glass Signed-off-by: Lee Jones --- include/linux/mfd/cros_ec.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h index 7e9fe6e98d2f..2ee3190b691c 100644 --- a/include/linux/mfd/cros_ec.h +++ b/include/linux/mfd/cros_ec.h @@ -68,8 +68,8 @@ struct cros_ec_msg { * We use this alignment to keep ARM and x86 happy. Probably word * alignment would be OK, there might be a small performance advantage * to using dword. - * @din_size: size of din buffer - * @dout_size: size of dout buffer + * @din_size: size of din buffer to allocate (zero to use static din) + * @dout_size: size of dout buffer to allocate (zero to use static dout) * @command_send: send a command * @command_recv: receive a command * @ec_name: name of EC device (e.g. 'chromeos-ec') -- cgit v1.2.3 From 7e6cb5b4dbbc4b1d98289c88d0bc4092cac328be Mon Sep 17 00:00:00 2001 From: Bill Richardson Date: Wed, 18 Jun 2014 11:14:00 -0700 Subject: mfd: cros_ec: Tweak struct cros_ec_device for clarity The members of struct cros_ec_device were improperly commented, and intermixed the private and public sections. This is just cleanup to make it more obvious what goes with what. [dianders: left lock in the structure but gave it the name that will eventually be used.] Signed-off-by: Bill Richardson Signed-off-by: Doug Anderson Signed-off-by: Lee Jones --- drivers/mfd/cros_ec.c | 2 +- drivers/mfd/cros_ec_i2c.c | 4 +-- drivers/mfd/cros_ec_spi.c | 10 +++---- include/linux/mfd/cros_ec.h | 65 ++++++++++++++++++++++++--------------------- 4 files changed, 43 insertions(+), 38 deletions(-) (limited to 'include/linux') diff --git a/drivers/mfd/cros_ec.c b/drivers/mfd/cros_ec.c index 38fe9bf0d169..04e053c71cc6 100644 --- a/drivers/mfd/cros_ec.c +++ b/drivers/mfd/cros_ec.c @@ -57,7 +57,7 @@ static int cros_ec_command_sendrecv(struct cros_ec_device *ec_dev, msg.in_buf = in_buf; msg.in_len = in_len; - return ec_dev->command_xfer(ec_dev, &msg); + return ec_dev->cmd_xfer(ec_dev, &msg); } static int cros_ec_command_recv(struct cros_ec_device *ec_dev, diff --git a/drivers/mfd/cros_ec_i2c.c b/drivers/mfd/cros_ec_i2c.c index 4f71be99a183..777e529abb16 100644 --- a/drivers/mfd/cros_ec_i2c.c +++ b/drivers/mfd/cros_ec_i2c.c @@ -29,7 +29,7 @@ static inline struct cros_ec_device *to_ec_dev(struct device *dev) return i2c_get_clientdata(client); } -static int cros_ec_command_xfer(struct cros_ec_device *ec_dev, +static int cros_ec_cmd_xfer_i2c(struct cros_ec_device *ec_dev, struct cros_ec_msg *msg) { struct i2c_client *client = ec_dev->priv; @@ -136,7 +136,7 @@ static int cros_ec_i2c_probe(struct i2c_client *client, ec_dev->dev = dev; ec_dev->priv = client; ec_dev->irq = client->irq; - ec_dev->command_xfer = cros_ec_command_xfer; + ec_dev->cmd_xfer = cros_ec_cmd_xfer_i2c; ec_dev->ec_name = client->name; ec_dev->phys_name = client->adapter->name; ec_dev->parent = &client->dev; diff --git a/drivers/mfd/cros_ec_spi.c b/drivers/mfd/cros_ec_spi.c index 1fcc65ecad0e..6c3075fb5dc7 100644 --- a/drivers/mfd/cros_ec_spi.c +++ b/drivers/mfd/cros_ec_spi.c @@ -73,7 +73,7 @@ * if no record * @end_of_msg_delay: used to set the delay_usecs on the spi_transfer that * is sent when we want to turn off CS at the end of a transaction. - * @lock: mutex to ensure only one user of cros_ec_command_spi_xfer at a time + * @lock: mutex to ensure only one user of cros_ec_cmd_xfer_spi at a time */ struct cros_ec_spi { struct spi_device *spi; @@ -210,13 +210,13 @@ static int cros_ec_spi_receive_response(struct cros_ec_device *ec_dev, } /** - * cros_ec_command_spi_xfer - Transfer a message over SPI and receive the reply + * cros_ec_cmd_xfer_spi - Transfer a message over SPI and receive the reply * * @ec_dev: ChromeOS EC device * @ec_msg: Message to transfer */ -static int cros_ec_command_spi_xfer(struct cros_ec_device *ec_dev, - struct cros_ec_msg *ec_msg) +static int cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev, + struct cros_ec_msg *ec_msg) { struct cros_ec_spi *ec_spi = ec_dev->priv; struct spi_transfer trans; @@ -368,7 +368,7 @@ static int cros_ec_spi_probe(struct spi_device *spi) ec_dev->dev = dev; ec_dev->priv = ec_spi; ec_dev->irq = spi->irq; - ec_dev->command_xfer = cros_ec_command_spi_xfer; + ec_dev->cmd_xfer = cros_ec_cmd_xfer_spi; ec_dev->ec_name = ec_spi->spi->modalias; ec_dev->phys_name = dev_name(&ec_spi->spi->dev); ec_dev->parent = &ec_spi->spi->dev; diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h index 2ee3190b691c..79a35857cc9e 100644 --- a/include/linux/mfd/cros_ec.h +++ b/include/linux/mfd/cros_ec.h @@ -16,7 +16,9 @@ #ifndef __LINUX_MFD_CROS_EC_H #define __LINUX_MFD_CROS_EC_H +#include #include +#include /* * Command interface between EC and AP, for LPC, I2C and SPI interfaces. @@ -55,34 +57,53 @@ struct cros_ec_msg { /** * struct cros_ec_device - Information about a ChromeOS EC device * + * @ec_name: name of EC device (e.g. 'chromeos-ec') + * @phys_name: name of physical comms layer (e.g. 'i2c-4') + * @dev: Device pointer + * @was_wake_device: true if this device was set to wake the system from + * sleep at the last suspend + * @event_notifier: interrupt event notifier for transport devices + * @command_send: send a command + * @command_recv: receive a response + * @command_sendrecv: send a command and receive a response + * * @name: Name of this EC interface * @priv: Private data * @irq: Interrupt to use - * @din: input buffer (from EC) - * @dout: output buffer (to EC) + * @din: input buffer (for data from EC) + * @dout: output buffer (for data to EC) * \note * These two buffers will always be dword-aligned and include enough * space for up to 7 word-alignment bytes also, so we can ensure that * the body of the message is always dword-aligned (64-bit). - * * We use this alignment to keep ARM and x86 happy. Probably word * alignment would be OK, there might be a small performance advantage * to using dword. * @din_size: size of din buffer to allocate (zero to use static din) * @dout_size: size of dout buffer to allocate (zero to use static dout) - * @command_send: send a command - * @command_recv: receive a command - * @ec_name: name of EC device (e.g. 'chromeos-ec') - * @phys_name: name of physical comms layer (e.g. 'i2c-4') * @parent: pointer to parent device (e.g. i2c or spi device) - * @dev: Device pointer - * dev_lock: Lock to prevent concurrent access * @wake_enabled: true if this device can wake the system from sleep - * @was_wake_device: true if this device was set to wake the system from - * sleep at the last suspend - * @event_notifier: interrupt event notifier for transport devices + * @lock: one transaction at a time + * @cmd_xfer: low-level channel to the EC */ struct cros_ec_device { + + /* These are used by other drivers that want to talk to the EC */ + const char *ec_name; + const char *phys_name; + struct device *dev; + bool was_wake_device; + struct class *cros_class; + struct blocking_notifier_head event_notifier; + int (*command_send)(struct cros_ec_device *ec, + uint16_t cmd, void *out_buf, int out_len); + int (*command_recv)(struct cros_ec_device *ec, + uint16_t cmd, void *in_buf, int in_len); + int (*command_sendrecv)(struct cros_ec_device *ec, + uint16_t cmd, void *out_buf, int out_len, + void *in_buf, int in_len); + + /* These are used to implement the platform-specific interface */ const char *name; void *priv; int irq; @@ -90,26 +111,10 @@ struct cros_ec_device { uint8_t *dout; int din_size; int dout_size; - int (*command_send)(struct cros_ec_device *ec, - uint16_t cmd, void *out_buf, int out_len); - int (*command_recv)(struct cros_ec_device *ec, - uint16_t cmd, void *in_buf, int in_len); - int (*command_sendrecv)(struct cros_ec_device *ec, - uint16_t cmd, void *out_buf, int out_len, - void *in_buf, int in_len); - int (*command_xfer)(struct cros_ec_device *ec, - struct cros_ec_msg *msg); - - const char *ec_name; - const char *phys_name; struct device *parent; - - /* These are --private-- fields - do not assign */ - struct device *dev; - struct mutex dev_lock; bool wake_enabled; - bool was_wake_device; - struct blocking_notifier_head event_notifier; + struct mutex lock; + int (*cmd_xfer)(struct cros_ec_device *ec, struct cros_ec_msg *msg); }; /** -- cgit v1.2.3 From 5d4773e27e8ab37491767a6ef99ffd7100fe6341 Mon Sep 17 00:00:00 2001 From: Bill Richardson Date: Wed, 18 Jun 2014 11:14:02 -0700 Subject: mfd: cros_ec: Use struct cros_ec_command to communicate with the EC This is some internal structure reorganization / renaming to prepare for future patches that will add a userspace API to cros_ec. There should be no visible changes. Signed-off-by: Bill Richardson Signed-off-by: Doug Anderson Signed-off-by: Lee Jones --- drivers/mfd/cros_ec.c | 28 ++++++++++++++-------------- drivers/mfd/cros_ec_i2c.c | 24 ++++++++++++------------ drivers/mfd/cros_ec_spi.c | 16 ++++++++-------- include/linux/mfd/cros_ec.h | 35 ++++++++++++++++++----------------- 4 files changed, 52 insertions(+), 51 deletions(-) (limited to 'include/linux') diff --git a/drivers/mfd/cros_ec.c b/drivers/mfd/cros_ec.c index 04e053c71cc6..2e86c282f0b4 100644 --- a/drivers/mfd/cros_ec.c +++ b/drivers/mfd/cros_ec.c @@ -25,22 +25,22 @@ #include int cros_ec_prepare_tx(struct cros_ec_device *ec_dev, - struct cros_ec_msg *msg) + struct cros_ec_command *msg) { uint8_t *out; int csum, i; - BUG_ON(msg->out_len > EC_PROTO2_MAX_PARAM_SIZE); + BUG_ON(msg->outsize > EC_PROTO2_MAX_PARAM_SIZE); out = ec_dev->dout; out[0] = EC_CMD_VERSION0 + msg->version; - out[1] = msg->cmd; - out[2] = msg->out_len; + out[1] = msg->command; + out[2] = msg->outsize; csum = out[0] + out[1] + out[2]; - for (i = 0; i < msg->out_len; i++) - csum += out[EC_MSG_TX_HEADER_BYTES + i] = msg->out_buf[i]; - out[EC_MSG_TX_HEADER_BYTES + msg->out_len] = (uint8_t)(csum & 0xff); + for (i = 0; i < msg->outsize; i++) + csum += out[EC_MSG_TX_HEADER_BYTES + i] = msg->outdata[i]; + out[EC_MSG_TX_HEADER_BYTES + msg->outsize] = (uint8_t)(csum & 0xff); - return EC_MSG_TX_PROTO_BYTES + msg->out_len; + return EC_MSG_TX_PROTO_BYTES + msg->outsize; } EXPORT_SYMBOL(cros_ec_prepare_tx); @@ -48,14 +48,14 @@ static int cros_ec_command_sendrecv(struct cros_ec_device *ec_dev, uint16_t cmd, void *out_buf, int out_len, void *in_buf, int in_len) { - struct cros_ec_msg msg; + struct cros_ec_command msg; msg.version = cmd >> 8; - msg.cmd = cmd & 0xff; - msg.out_buf = out_buf; - msg.out_len = out_len; - msg.in_buf = in_buf; - msg.in_len = in_len; + msg.command = cmd & 0xff; + msg.outdata = out_buf; + msg.outsize = out_len; + msg.indata = in_buf; + msg.insize = in_len; return ec_dev->cmd_xfer(ec_dev, &msg); } diff --git a/drivers/mfd/cros_ec_i2c.c b/drivers/mfd/cros_ec_i2c.c index 777e529abb16..37ed12f99560 100644 --- a/drivers/mfd/cros_ec_i2c.c +++ b/drivers/mfd/cros_ec_i2c.c @@ -30,7 +30,7 @@ static inline struct cros_ec_device *to_ec_dev(struct device *dev) } static int cros_ec_cmd_xfer_i2c(struct cros_ec_device *ec_dev, - struct cros_ec_msg *msg) + struct cros_ec_command *msg) { struct i2c_client *client = ec_dev->priv; int ret = -ENOMEM; @@ -50,7 +50,7 @@ static int cros_ec_cmd_xfer_i2c(struct cros_ec_device *ec_dev, * allocate larger packet (one byte for checksum, one byte for * length, and one for result code) */ - packet_len = msg->in_len + 3; + packet_len = msg->insize + 3; in_buf = kzalloc(packet_len, GFP_KERNEL); if (!in_buf) goto done; @@ -61,7 +61,7 @@ static int cros_ec_cmd_xfer_i2c(struct cros_ec_device *ec_dev, * allocate larger packet (one byte for checksum, one for * command code, one for length, and one for command version) */ - packet_len = msg->out_len + 4; + packet_len = msg->outsize + 4; out_buf = kzalloc(packet_len, GFP_KERNEL); if (!out_buf) goto done; @@ -69,16 +69,16 @@ static int cros_ec_cmd_xfer_i2c(struct cros_ec_device *ec_dev, i2c_msg[0].buf = (char *)out_buf; out_buf[0] = EC_CMD_VERSION0 + msg->version; - out_buf[1] = msg->cmd; - out_buf[2] = msg->out_len; + out_buf[1] = msg->command; + out_buf[2] = msg->outsize; /* copy message payload and compute checksum */ sum = out_buf[0] + out_buf[1] + out_buf[2]; - for (i = 0; i < msg->out_len; i++) { - out_buf[3 + i] = msg->out_buf[i]; + for (i = 0; i < msg->outsize; i++) { + out_buf[3 + i] = msg->outdata[i]; sum += out_buf[3 + i]; } - out_buf[3 + msg->out_len] = sum; + out_buf[3 + msg->outsize] = sum; /* send command to EC and read answer */ ret = i2c_transfer(client->adapter, i2c_msg, 2); @@ -94,20 +94,20 @@ static int cros_ec_cmd_xfer_i2c(struct cros_ec_device *ec_dev, /* check response error code */ if (i2c_msg[1].buf[0]) { dev_warn(ec_dev->dev, "command 0x%02x returned an error %d\n", - msg->cmd, i2c_msg[1].buf[0]); + msg->command, i2c_msg[1].buf[0]); ret = -EINVAL; goto done; } /* copy response packet payload and compute checksum */ sum = in_buf[0] + in_buf[1]; - for (i = 0; i < msg->in_len; i++) { - msg->in_buf[i] = in_buf[2 + i]; + for (i = 0; i < msg->insize; i++) { + msg->indata[i] = in_buf[2 + i]; sum += in_buf[2 + i]; } dev_dbg(ec_dev->dev, "packet: %*ph, sum = %02x\n", i2c_msg[1].len, in_buf, sum); - if (sum != in_buf[2 + msg->in_len]) { + if (sum != in_buf[2 + msg->insize]) { dev_err(ec_dev->dev, "bad packet checksum\n"); ret = -EBADMSG; goto done; diff --git a/drivers/mfd/cros_ec_spi.c b/drivers/mfd/cros_ec_spi.c index 9d45d88813b8..bef7735ecfde 100644 --- a/drivers/mfd/cros_ec_spi.c +++ b/drivers/mfd/cros_ec_spi.c @@ -216,7 +216,7 @@ static int cros_ec_spi_receive_response(struct cros_ec_device *ec_dev, * @ec_msg: Message to transfer */ static int cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev, - struct cros_ec_msg *ec_msg) + struct cros_ec_command *ec_msg) { struct cros_ec_spi *ec_spi = ec_dev->priv; struct spi_transfer trans; @@ -261,7 +261,7 @@ static int cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev, /* Get the response */ if (!ret) { ret = cros_ec_spi_receive_response(ec_dev, - ec_msg->in_len + EC_MSG_TX_PROTO_BYTES); + ec_msg->insize + EC_MSG_TX_PROTO_BYTES); } else { dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret); } @@ -290,21 +290,21 @@ static int cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev, if (ptr[0]) { if (ptr[0] == EC_RES_IN_PROGRESS) { dev_dbg(ec_dev->dev, "command 0x%02x in progress\n", - ec_msg->cmd); + ec_msg->command); ret = -EAGAIN; goto exit; } dev_warn(ec_dev->dev, "command 0x%02x returned an error %d\n", - ec_msg->cmd, ptr[0]); + ec_msg->command, ptr[0]); debug_packet(ec_dev->dev, "in_err", ptr, len); ret = -EINVAL; goto exit; } len = ptr[1]; sum = ptr[0] + ptr[1]; - if (len > ec_msg->in_len) { + if (len > ec_msg->insize) { dev_err(ec_dev->dev, "packet too long (%d bytes, expected %d)", - len, ec_msg->in_len); + len, ec_msg->insize); ret = -ENOSPC; goto exit; } @@ -312,8 +312,8 @@ static int cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev, /* copy response packet payload and compute checksum */ for (i = 0; i < len; i++) { sum += ptr[i + 2]; - if (ec_msg->in_len) - ec_msg->in_buf[i] = ptr[i + 2]; + if (ec_msg->insize) + ec_msg->indata[i] = ptr[i + 2]; } sum &= 0xff; diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h index 79a35857cc9e..f27c03766069 100644 --- a/include/linux/mfd/cros_ec.h +++ b/include/linux/mfd/cros_ec.h @@ -35,23 +35,23 @@ enum { EC_MSG_TX_PROTO_BYTES, }; -/** - * struct cros_ec_msg - A message sent to the EC, and its reply - * +/* * @version: Command version number (often 0) - * @cmd: Command to send (EC_CMD_...) - * @out_buf: Outgoing payload (to EC) - * @outlen: Outgoing length - * @in_buf: Incoming payload (from EC) - * @in_len: Incoming length + * @command: Command to send (EC_CMD_...) + * @outdata: Outgoing data to EC + * @outsize: Outgoing length in bytes + * @indata: Where to put the incoming data from EC + * @insize: Incoming length in bytes (filled in by EC) + * @result: EC's response to the command (separate from communication failure) */ -struct cros_ec_msg { - u8 version; - u8 cmd; - uint8_t *out_buf; - int out_len; - uint8_t *in_buf; - int in_len; +struct cros_ec_command { + uint32_t version; + uint32_t command; + uint8_t *outdata; + uint32_t outsize; + uint8_t *indata; + uint32_t insize; + uint32_t result; }; /** @@ -114,7 +114,8 @@ struct cros_ec_device { struct device *parent; bool wake_enabled; struct mutex lock; - int (*cmd_xfer)(struct cros_ec_device *ec, struct cros_ec_msg *msg); + int (*cmd_xfer)(struct cros_ec_device *ec, + struct cros_ec_command *msg); }; /** @@ -148,7 +149,7 @@ int cros_ec_resume(struct cros_ec_device *ec_dev); * @msg: Message to write */ int cros_ec_prepare_tx(struct cros_ec_device *ec_dev, - struct cros_ec_msg *msg); + struct cros_ec_command *msg); /** * cros_ec_remove - Remove a ChromeOS EC -- cgit v1.2.3 From 533cec8f34778de10412dfabac991cf458ebf3c9 Mon Sep 17 00:00:00 2001 From: Bill Richardson Date: Wed, 18 Jun 2014 11:14:03 -0700 Subject: mfd: cros_ec: cleanup: remove unused fields from struct cros_ec_device struct cros_ec_device has a superfluous "name" field. We can get all the debugging info we need from the existing ec_name and phys_name fields, so let's take out the extra field. The printout also has sufficient info in it without explicitly adding the transport. Before this change: cros-ec-spi spi2.0: Chrome EC (SPI) After this change: cros-ec-spi spi2.0: Chrome EC device registered Signed-off-by: Bill Richardson Signed-off-by: Doug Anderson Reviewed-by: Simon Glass Signed-off-by: Lee Jones --- drivers/mfd/cros_ec.c | 2 +- drivers/mfd/cros_ec_i2c.c | 1 - drivers/mfd/cros_ec_spi.c | 1 - include/linux/mfd/cros_ec.h | 2 -- 4 files changed, 1 insertion(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/mfd/cros_ec.c b/drivers/mfd/cros_ec.c index 2e86c282f0b4..49ed8c340868 100644 --- a/drivers/mfd/cros_ec.c +++ b/drivers/mfd/cros_ec.c @@ -140,7 +140,7 @@ int cros_ec_register(struct cros_ec_device *ec_dev) goto fail_mfd; } - dev_info(dev, "Chrome EC (%s)\n", ec_dev->name); + dev_info(dev, "Chrome EC device registered\n"); return 0; diff --git a/drivers/mfd/cros_ec_i2c.c b/drivers/mfd/cros_ec_i2c.c index 37ed12f99560..5bb32f5550b3 100644 --- a/drivers/mfd/cros_ec_i2c.c +++ b/drivers/mfd/cros_ec_i2c.c @@ -132,7 +132,6 @@ static int cros_ec_i2c_probe(struct i2c_client *client, return -ENOMEM; i2c_set_clientdata(client, ec_dev); - ec_dev->name = "I2C"; ec_dev->dev = dev; ec_dev->priv = client; ec_dev->irq = client->irq; diff --git a/drivers/mfd/cros_ec_spi.c b/drivers/mfd/cros_ec_spi.c index bef7735ecfde..6e929b5f3bd3 100644 --- a/drivers/mfd/cros_ec_spi.c +++ b/drivers/mfd/cros_ec_spi.c @@ -370,7 +370,6 @@ static int cros_ec_spi_probe(struct spi_device *spi) cros_ec_spi_dt_probe(ec_spi, dev); spi_set_drvdata(spi, ec_dev); - ec_dev->name = "SPI"; ec_dev->dev = dev; ec_dev->priv = ec_spi; ec_dev->irq = spi->irq; diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h index f27c03766069..2b0c5982dbc1 100644 --- a/include/linux/mfd/cros_ec.h +++ b/include/linux/mfd/cros_ec.h @@ -67,7 +67,6 @@ struct cros_ec_command { * @command_recv: receive a response * @command_sendrecv: send a command and receive a response * - * @name: Name of this EC interface * @priv: Private data * @irq: Interrupt to use * @din: input buffer (for data from EC) @@ -104,7 +103,6 @@ struct cros_ec_device { void *in_buf, int in_len); /* These are used to implement the platform-specific interface */ - const char *name; void *priv; int irq; uint8_t *din; -- cgit v1.2.3 From 5799f95a373a2752e5c732f531a6f40fe458b818 Mon Sep 17 00:00:00 2001 From: Bill Richardson Date: Wed, 18 Jun 2014 11:14:04 -0700 Subject: mfd: cros_ec: cleanup: Remove EC wrapper functions Remove the three wrapper functions that talk to the EC without passing all the desired arguments and just use the underlying communication function that passes everything in a struct intead. This is internal code refactoring only. Nothing should change. Signed-off-by: Bill Richardson Signed-off-by: Doug Anderson Reviewed-by: Simon Glass Acked-by: Wolfram Sang Acked-by: Dmitry Torokhov Signed-off-by: Lee Jones --- drivers/i2c/busses/i2c-cros-ec-tunnel.c | 15 +++++++++++---- drivers/input/keyboard/cros_ec_keyb.c | 12 ++++++++++-- drivers/mfd/cros_ec.c | 32 -------------------------------- include/linux/mfd/cros_ec.h | 19 ++++++------------- 4 files changed, 27 insertions(+), 51 deletions(-) (limited to 'include/linux') diff --git a/drivers/i2c/busses/i2c-cros-ec-tunnel.c b/drivers/i2c/busses/i2c-cros-ec-tunnel.c index 8e7a71487bb1..dd07818d03d0 100644 --- a/drivers/i2c/busses/i2c-cros-ec-tunnel.c +++ b/drivers/i2c/busses/i2c-cros-ec-tunnel.c @@ -183,6 +183,7 @@ static int ec_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg i2c_msgs[], u8 *request = NULL; u8 *response = NULL; int result; + struct cros_ec_command msg; request_len = ec_i2c_count_message(i2c_msgs, num); if (request_len < 0) { @@ -218,9 +219,15 @@ static int ec_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg i2c_msgs[], } ec_i2c_construct_message(request, i2c_msgs, num, bus_num); - result = bus->ec->command_sendrecv(bus->ec, EC_CMD_I2C_PASSTHRU, - request, request_len, - response, response_len); + + msg.version = 0; + msg.command = EC_CMD_I2C_PASSTHRU; + msg.outdata = request; + msg.outsize = request_len; + msg.indata = response; + msg.insize = response_len; + + result = bus->ec->cmd_xfer(bus->ec, &msg); if (result) goto exit; @@ -258,7 +265,7 @@ static int ec_i2c_probe(struct platform_device *pdev) u32 remote_bus; int err; - if (!ec->command_sendrecv) { + if (!ec->cmd_xfer) { dev_err(dev, "Missing sendrecv\n"); return -EINVAL; } diff --git a/drivers/input/keyboard/cros_ec_keyb.c b/drivers/input/keyboard/cros_ec_keyb.c index 408379669d3c..b8341ab99f55 100644 --- a/drivers/input/keyboard/cros_ec_keyb.c +++ b/drivers/input/keyboard/cros_ec_keyb.c @@ -191,8 +191,16 @@ static void cros_ec_keyb_close(struct input_dev *dev) static int cros_ec_keyb_get_state(struct cros_ec_keyb *ckdev, uint8_t *kb_state) { - return ckdev->ec->command_recv(ckdev->ec, EC_CMD_MKBP_STATE, - kb_state, ckdev->cols); + struct cros_ec_command msg = { + .version = 0, + .command = EC_CMD_MKBP_STATE, + .outdata = NULL, + .outsize = 0, + .indata = kb_state, + .insize = ckdev->cols, + }; + + return ckdev->ec->cmd_xfer(ckdev->ec, &msg); } static int cros_ec_keyb_work(struct notifier_block *nb, diff --git a/drivers/mfd/cros_ec.c b/drivers/mfd/cros_ec.c index 49ed8c340868..4851ed2fbe31 100644 --- a/drivers/mfd/cros_ec.c +++ b/drivers/mfd/cros_ec.c @@ -44,34 +44,6 @@ int cros_ec_prepare_tx(struct cros_ec_device *ec_dev, } EXPORT_SYMBOL(cros_ec_prepare_tx); -static int cros_ec_command_sendrecv(struct cros_ec_device *ec_dev, - uint16_t cmd, void *out_buf, int out_len, - void *in_buf, int in_len) -{ - struct cros_ec_command msg; - - msg.version = cmd >> 8; - msg.command = cmd & 0xff; - msg.outdata = out_buf; - msg.outsize = out_len; - msg.indata = in_buf; - msg.insize = in_len; - - return ec_dev->cmd_xfer(ec_dev, &msg); -} - -static int cros_ec_command_recv(struct cros_ec_device *ec_dev, - uint16_t cmd, void *buf, int buf_len) -{ - return cros_ec_command_sendrecv(ec_dev, cmd, NULL, 0, buf, buf_len); -} - -static int cros_ec_command_send(struct cros_ec_device *ec_dev, - uint16_t cmd, void *buf, int buf_len) -{ - return cros_ec_command_sendrecv(ec_dev, cmd, buf, buf_len, NULL, 0); -} - static irqreturn_t ec_irq_thread(int irq, void *data) { struct cros_ec_device *ec_dev = data; @@ -104,10 +76,6 @@ int cros_ec_register(struct cros_ec_device *ec_dev) BLOCKING_INIT_NOTIFIER_HEAD(&ec_dev->event_notifier); - ec_dev->command_send = cros_ec_command_send; - ec_dev->command_recv = cros_ec_command_recv; - ec_dev->command_sendrecv = cros_ec_command_sendrecv; - if (ec_dev->din_size) { ec_dev->din = devm_kzalloc(dev, ec_dev->din_size, GFP_KERNEL); if (!ec_dev->din) diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h index 2b0c5982dbc1..60c088055f3a 100644 --- a/include/linux/mfd/cros_ec.h +++ b/include/linux/mfd/cros_ec.h @@ -63,9 +63,10 @@ struct cros_ec_command { * @was_wake_device: true if this device was set to wake the system from * sleep at the last suspend * @event_notifier: interrupt event notifier for transport devices - * @command_send: send a command - * @command_recv: receive a response - * @command_sendrecv: send a command and receive a response + * @cmd_xfer: send command to EC and get response + * Returns 0 if the communication succeeded, but that doesn't mean the EC + * was happy with the command it got. Caller should check msg.result for + * the EC's result code. * * @priv: Private data * @irq: Interrupt to use @@ -83,7 +84,6 @@ struct cros_ec_command { * @parent: pointer to parent device (e.g. i2c or spi device) * @wake_enabled: true if this device can wake the system from sleep * @lock: one transaction at a time - * @cmd_xfer: low-level channel to the EC */ struct cros_ec_device { @@ -94,13 +94,8 @@ struct cros_ec_device { bool was_wake_device; struct class *cros_class; struct blocking_notifier_head event_notifier; - int (*command_send)(struct cros_ec_device *ec, - uint16_t cmd, void *out_buf, int out_len); - int (*command_recv)(struct cros_ec_device *ec, - uint16_t cmd, void *in_buf, int in_len); - int (*command_sendrecv)(struct cros_ec_device *ec, - uint16_t cmd, void *out_buf, int out_len, - void *in_buf, int in_len); + int (*cmd_xfer)(struct cros_ec_device *ec, + struct cros_ec_command *msg); /* These are used to implement the platform-specific interface */ void *priv; @@ -112,8 +107,6 @@ struct cros_ec_device { struct device *parent; bool wake_enabled; struct mutex lock; - int (*cmd_xfer)(struct cros_ec_device *ec, - struct cros_ec_command *msg); }; /** -- cgit v1.2.3 From 6db07b6336589ff480528173e41f8f6af3f0097f Mon Sep 17 00:00:00 2001 From: Bill Richardson Date: Wed, 18 Jun 2014 11:14:05 -0700 Subject: mfd: cros_ec: Check result code from EC messages Just because the host was able to talk to the EC doesn't mean that the EC was happy with what it was told. Errors in communincation are not the same as error messages from the EC itself. This change lets the EC report its errors separately. [dianders: Added common function to cros_ec.c] Signed-off-by: Bill Richardson Signed-off-by: Doug Anderson Signed-off-by: Lee Jones --- drivers/mfd/cros_ec.c | 18 ++++++++++++++++++ drivers/mfd/cros_ec_i2c.c | 8 +++----- drivers/mfd/cros_ec_spi.c | 19 ++++++------------- include/linux/mfd/cros_ec.h | 12 ++++++++++++ 4 files changed, 39 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/drivers/mfd/cros_ec.c b/drivers/mfd/cros_ec.c index 4851ed2fbe31..83e30c663578 100644 --- a/drivers/mfd/cros_ec.c +++ b/drivers/mfd/cros_ec.c @@ -44,6 +44,24 @@ int cros_ec_prepare_tx(struct cros_ec_device *ec_dev, } EXPORT_SYMBOL(cros_ec_prepare_tx); +int cros_ec_check_result(struct cros_ec_device *ec_dev, + struct cros_ec_command *msg) +{ + switch (msg->result) { + case EC_RES_SUCCESS: + return 0; + case EC_RES_IN_PROGRESS: + dev_dbg(ec_dev->dev, "command 0x%02x in progress\n", + msg->command); + return -EAGAIN; + default: + dev_dbg(ec_dev->dev, "command 0x%02x returned %d\n", + msg->command, msg->result); + return 0; + } +} +EXPORT_SYMBOL(cros_ec_check_result); + static irqreturn_t ec_irq_thread(int irq, void *data) { struct cros_ec_device *ec_dev = data; diff --git a/drivers/mfd/cros_ec_i2c.c b/drivers/mfd/cros_ec_i2c.c index 5bb32f5550b3..189e7d1d7742 100644 --- a/drivers/mfd/cros_ec_i2c.c +++ b/drivers/mfd/cros_ec_i2c.c @@ -92,12 +92,10 @@ static int cros_ec_cmd_xfer_i2c(struct cros_ec_device *ec_dev, } /* check response error code */ - if (i2c_msg[1].buf[0]) { - dev_warn(ec_dev->dev, "command 0x%02x returned an error %d\n", - msg->command, i2c_msg[1].buf[0]); - ret = -EINVAL; + msg->result = i2c_msg[1].buf[0]; + ret = cros_ec_check_result(ec_dev, msg); + if (ret) goto done; - } /* copy response packet payload and compute checksum */ sum = in_buf[0] + in_buf[1]; diff --git a/drivers/mfd/cros_ec_spi.c b/drivers/mfd/cros_ec_spi.c index 6e929b5f3bd3..da1da05cd546 100644 --- a/drivers/mfd/cros_ec_spi.c +++ b/drivers/mfd/cros_ec_spi.c @@ -285,21 +285,14 @@ static int cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev, goto exit; } - /* check response error code */ ptr = ec_dev->din; - if (ptr[0]) { - if (ptr[0] == EC_RES_IN_PROGRESS) { - dev_dbg(ec_dev->dev, "command 0x%02x in progress\n", - ec_msg->command); - ret = -EAGAIN; - goto exit; - } - dev_warn(ec_dev->dev, "command 0x%02x returned an error %d\n", - ec_msg->command, ptr[0]); - debug_packet(ec_dev->dev, "in_err", ptr, len); - ret = -EINVAL; + + /* check response error code */ + ec_msg->result = ptr[0]; + ret = cros_ec_check_result(ec_dev, ec_msg); + if (ret) goto exit; - } + len = ptr[1]; sum = ptr[0] + ptr[1]; if (len > ec_msg->insize) { diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h index 60c088055f3a..1f79f162abe4 100644 --- a/include/linux/mfd/cros_ec.h +++ b/include/linux/mfd/cros_ec.h @@ -142,6 +142,18 @@ int cros_ec_resume(struct cros_ec_device *ec_dev); int cros_ec_prepare_tx(struct cros_ec_device *ec_dev, struct cros_ec_command *msg); +/** + * cros_ec_check_result - Check ec_msg->result + * + * This is used by ChromeOS EC drivers to check the ec_msg->result for + * errors and to warn about them. + * + * @ec_dev: EC device + * @msg: Message to check + */ +int cros_ec_check_result(struct cros_ec_device *ec_dev, + struct cros_ec_command *msg); + /** * cros_ec_remove - Remove a ChromeOS EC * -- cgit v1.2.3 From 12ebc8a50bc54e3a6fe207861fc6793181f9c2dc Mon Sep 17 00:00:00 2001 From: Bill Richardson Date: Wed, 18 Jun 2014 11:14:06 -0700 Subject: mfd: cros_ec: ec_dev->cmd_xfer() returns number of bytes received from EC When communicating with the EC, the cmd_xfer() function should return the number of bytes it received from the EC, or negative on error. Signed-off-by: Bill Richardson Signed-off-by: Doug Anderson Reviewed-by: Simon Glass Acked-by: Wolfram Sang Signed-off-by: Lee Jones --- drivers/i2c/busses/i2c-cros-ec-tunnel.c | 2 +- drivers/mfd/cros_ec_i2c.c | 2 +- drivers/mfd/cros_ec_spi.c | 2 +- include/linux/mfd/cros_ec.h | 8 ++++---- 4 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/i2c/busses/i2c-cros-ec-tunnel.c b/drivers/i2c/busses/i2c-cros-ec-tunnel.c index dd07818d03d0..05e033c98115 100644 --- a/drivers/i2c/busses/i2c-cros-ec-tunnel.c +++ b/drivers/i2c/busses/i2c-cros-ec-tunnel.c @@ -228,7 +228,7 @@ static int ec_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg i2c_msgs[], msg.insize = response_len; result = bus->ec->cmd_xfer(bus->ec, &msg); - if (result) + if (result < 0) goto exit; result = ec_i2c_parse_response(response, i2c_msgs, &num); diff --git a/drivers/mfd/cros_ec_i2c.c b/drivers/mfd/cros_ec_i2c.c index 189e7d1d7742..fd7a546d3478 100644 --- a/drivers/mfd/cros_ec_i2c.c +++ b/drivers/mfd/cros_ec_i2c.c @@ -111,7 +111,7 @@ static int cros_ec_cmd_xfer_i2c(struct cros_ec_device *ec_dev, goto done; } - ret = 0; + ret = i2c_msg[1].buf[1]; done: kfree(in_buf); kfree(out_buf); diff --git a/drivers/mfd/cros_ec_spi.c b/drivers/mfd/cros_ec_spi.c index da1da05cd546..ac52e3653e90 100644 --- a/drivers/mfd/cros_ec_spi.c +++ b/drivers/mfd/cros_ec_spi.c @@ -320,7 +320,7 @@ static int cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev, goto exit; } - ret = 0; + ret = len; exit: mutex_unlock(&ec_spi->lock); return ret; diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h index 1f79f162abe4..0ebf26fddbbb 100644 --- a/include/linux/mfd/cros_ec.h +++ b/include/linux/mfd/cros_ec.h @@ -41,7 +41,7 @@ enum { * @outdata: Outgoing data to EC * @outsize: Outgoing length in bytes * @indata: Where to put the incoming data from EC - * @insize: Incoming length in bytes (filled in by EC) + * @insize: Max number of bytes to accept from EC * @result: EC's response to the command (separate from communication failure) */ struct cros_ec_command { @@ -64,9 +64,9 @@ struct cros_ec_command { * sleep at the last suspend * @event_notifier: interrupt event notifier for transport devices * @cmd_xfer: send command to EC and get response - * Returns 0 if the communication succeeded, but that doesn't mean the EC - * was happy with the command it got. Caller should check msg.result for - * the EC's result code. + * Returns the number of bytes received if the communication succeeded, but + * that doesn't mean the EC was happy with the command. The caller + * should check msg.result for the EC's result code. * * @priv: Private data * @irq: Interrupt to use -- cgit v1.2.3 From d1fd345e2087f0362c92bd3b0a1cea7fe636ac3a Mon Sep 17 00:00:00 2001 From: Andrew Bresticker Date: Wed, 18 Jun 2014 11:14:07 -0700 Subject: mfd: cros_ec: Move EC interrupt to cros_ec_keyb If we receive EC interrupts after the cros_ec driver has probed, but before the cros_ec_keyb driver has probed, the cros_ec IRQ handler will not run the cros_ec_keyb notifier and the EC will leave the IRQ line asserted. The cros_ec IRQ handler then returns IRQ_HANDLED and the resulting flood of interrupts causes the machine to hang. Since the EC interrupt is currently only used for the keyboard, move the setup and handling of the EC interrupt to the cros_ec_keyb driver. Signed-off-by: Andrew Bresticker Signed-off-by: Doug Anderson Acked-by: Dmitry Torokhov Signed-off-by: Lee Jones --- drivers/input/keyboard/cros_ec_keyb.c | 58 ++++++++++++++++++++--------------- drivers/mfd/cros_ec.c | 35 +-------------------- include/linux/mfd/cros_ec.h | 2 -- 3 files changed, 34 insertions(+), 61 deletions(-) (limited to 'include/linux') diff --git a/drivers/input/keyboard/cros_ec_keyb.c b/drivers/input/keyboard/cros_ec_keyb.c index b8341ab99f55..791781ade4e7 100644 --- a/drivers/input/keyboard/cros_ec_keyb.c +++ b/drivers/input/keyboard/cros_ec_keyb.c @@ -24,8 +24,8 @@ #include #include #include +#include #include -#include #include #include #include @@ -42,7 +42,6 @@ * @dev: Device pointer * @idev: Input device * @ec: Top level ChromeOS device to use to talk to EC - * @event_notifier: interrupt event notifier for transport devices */ struct cros_ec_keyb { unsigned int rows; @@ -55,7 +54,6 @@ struct cros_ec_keyb { struct device *dev; struct input_dev *idev; struct cros_ec_device *ec; - struct notifier_block notifier; }; @@ -173,22 +171,6 @@ static void cros_ec_keyb_process(struct cros_ec_keyb *ckdev, input_sync(ckdev->idev); } -static int cros_ec_keyb_open(struct input_dev *dev) -{ - struct cros_ec_keyb *ckdev = input_get_drvdata(dev); - - return blocking_notifier_chain_register(&ckdev->ec->event_notifier, - &ckdev->notifier); -} - -static void cros_ec_keyb_close(struct input_dev *dev) -{ - struct cros_ec_keyb *ckdev = input_get_drvdata(dev); - - blocking_notifier_chain_unregister(&ckdev->ec->event_notifier, - &ckdev->notifier); -} - static int cros_ec_keyb_get_state(struct cros_ec_keyb *ckdev, uint8_t *kb_state) { struct cros_ec_command msg = { @@ -203,19 +185,41 @@ static int cros_ec_keyb_get_state(struct cros_ec_keyb *ckdev, uint8_t *kb_state) return ckdev->ec->cmd_xfer(ckdev->ec, &msg); } -static int cros_ec_keyb_work(struct notifier_block *nb, - unsigned long state, void *_notify) +static irqreturn_t cros_ec_keyb_irq(int irq, void *data) { + struct cros_ec_keyb *ckdev = data; + struct cros_ec_device *ec = ckdev->ec; int ret; - struct cros_ec_keyb *ckdev = container_of(nb, struct cros_ec_keyb, - notifier); uint8_t kb_state[ckdev->cols]; + if (device_may_wakeup(ec->dev)) + pm_wakeup_event(ec->dev, 0); + ret = cros_ec_keyb_get_state(ckdev, kb_state); if (ret >= 0) cros_ec_keyb_process(ckdev, kb_state, ret); + else + dev_err(ec->dev, "failed to get keyboard state: %d\n", ret); - return NOTIFY_DONE; + return IRQ_HANDLED; +} + +static int cros_ec_keyb_open(struct input_dev *dev) +{ + struct cros_ec_keyb *ckdev = input_get_drvdata(dev); + struct cros_ec_device *ec = ckdev->ec; + + return request_threaded_irq(ec->irq, NULL, cros_ec_keyb_irq, + IRQF_TRIGGER_LOW | IRQF_ONESHOT, + "cros_ec_keyb", ckdev); +} + +static void cros_ec_keyb_close(struct input_dev *dev) +{ + struct cros_ec_keyb *ckdev = input_get_drvdata(dev); + struct cros_ec_device *ec = ckdev->ec; + + free_irq(ec->irq, ckdev); } static int cros_ec_keyb_probe(struct platform_device *pdev) @@ -246,8 +250,12 @@ static int cros_ec_keyb_probe(struct platform_device *pdev) if (!idev) return -ENOMEM; + if (!ec->irq) { + dev_err(dev, "no EC IRQ specified\n"); + return -EINVAL; + } + ckdev->ec = ec; - ckdev->notifier.notifier_call = cros_ec_keyb_work; ckdev->dev = dev; dev_set_drvdata(&pdev->dev, ckdev); diff --git a/drivers/mfd/cros_ec.c b/drivers/mfd/cros_ec.c index 83e30c663578..4873f9c50452 100644 --- a/drivers/mfd/cros_ec.c +++ b/drivers/mfd/cros_ec.c @@ -62,18 +62,6 @@ int cros_ec_check_result(struct cros_ec_device *ec_dev, } EXPORT_SYMBOL(cros_ec_check_result); -static irqreturn_t ec_irq_thread(int irq, void *data) -{ - struct cros_ec_device *ec_dev = data; - - if (device_may_wakeup(ec_dev->dev)) - pm_wakeup_event(ec_dev->dev, 0); - - blocking_notifier_call_chain(&ec_dev->event_notifier, 1, ec_dev); - - return IRQ_HANDLED; -} - static const struct mfd_cell cros_devs[] = { { .name = "cros-ec-keyb", @@ -92,8 +80,6 @@ int cros_ec_register(struct cros_ec_device *ec_dev) struct device *dev = ec_dev->dev; int err = 0; - BLOCKING_INIT_NOTIFIER_HEAD(&ec_dev->event_notifier); - if (ec_dev->din_size) { ec_dev->din = devm_kzalloc(dev, ec_dev->din_size, GFP_KERNEL); if (!ec_dev->din) @@ -105,42 +91,23 @@ int cros_ec_register(struct cros_ec_device *ec_dev) return -ENOMEM; } - if (!ec_dev->irq) { - dev_dbg(dev, "no valid IRQ: %d\n", ec_dev->irq); - return err; - } - - err = request_threaded_irq(ec_dev->irq, NULL, ec_irq_thread, - IRQF_TRIGGER_LOW | IRQF_ONESHOT, - "chromeos-ec", ec_dev); - if (err) { - dev_err(dev, "request irq %d: error %d\n", ec_dev->irq, err); - return err; - } - err = mfd_add_devices(dev, 0, cros_devs, ARRAY_SIZE(cros_devs), NULL, ec_dev->irq, NULL); if (err) { dev_err(dev, "failed to add mfd devices\n"); - goto fail_mfd; + return err; } dev_info(dev, "Chrome EC device registered\n"); return 0; - -fail_mfd: - free_irq(ec_dev->irq, ec_dev); - - return err; } EXPORT_SYMBOL(cros_ec_register); int cros_ec_remove(struct cros_ec_device *ec_dev) { mfd_remove_devices(ec_dev->dev); - free_irq(ec_dev->irq, ec_dev); return 0; } diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h index 0ebf26fddbbb..fcbe9d129a9d 100644 --- a/include/linux/mfd/cros_ec.h +++ b/include/linux/mfd/cros_ec.h @@ -62,7 +62,6 @@ struct cros_ec_command { * @dev: Device pointer * @was_wake_device: true if this device was set to wake the system from * sleep at the last suspend - * @event_notifier: interrupt event notifier for transport devices * @cmd_xfer: send command to EC and get response * Returns the number of bytes received if the communication succeeded, but * that doesn't mean the EC was happy with the command. The caller @@ -93,7 +92,6 @@ struct cros_ec_device { struct device *dev; bool was_wake_device; struct class *cros_class; - struct blocking_notifier_head event_notifier; int (*cmd_xfer)(struct cros_ec_device *ec, struct cros_ec_command *msg); -- cgit v1.2.3 From 01a0f4aaaefff9f57bb17e6cc514c84ba43a7335 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Wed, 2 Jul 2014 14:34:13 +0100 Subject: mfd: tps65910: Rid data size incompatibility warn when building for 64bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extinguishes: ../drivers/mfd/tps65910.c: In function ‘tps65910_parse_dt’: ../drivers/mfd/tps65910.c:404:14: warning: cast from pointer to integer of different size Signed-off-by: Lee Jones --- drivers/mfd/tps65910.c | 10 +++++----- include/linux/mfd/tps65910.h | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/mfd/tps65910.c b/drivers/mfd/tps65910.c index f9e42ea1cb1a..f243e75d28f3 100644 --- a/drivers/mfd/tps65910.c +++ b/drivers/mfd/tps65910.c @@ -387,7 +387,7 @@ static const struct of_device_id tps65910_of_match[] = { MODULE_DEVICE_TABLE(of, tps65910_of_match); static struct tps65910_board *tps65910_parse_dt(struct i2c_client *client, - int *chip_id) + unsigned long *chip_id) { struct device_node *np = client->dev.of_node; struct tps65910_board *board_info; @@ -401,7 +401,7 @@ static struct tps65910_board *tps65910_parse_dt(struct i2c_client *client, return NULL; } - *chip_id = (int)match->data; + *chip_id = (unsigned long)match->data; board_info = devm_kzalloc(&client->dev, sizeof(*board_info), GFP_KERNEL); @@ -431,7 +431,7 @@ static struct tps65910_board *tps65910_parse_dt(struct i2c_client *client, #else static inline struct tps65910_board *tps65910_parse_dt(struct i2c_client *client, - int *chip_id) + unsigned long *chip_id) { return NULL; } @@ -453,14 +453,14 @@ static void tps65910_power_off(void) } static int tps65910_i2c_probe(struct i2c_client *i2c, - const struct i2c_device_id *id) + const struct i2c_device_id *id) { struct tps65910 *tps65910; struct tps65910_board *pmic_plat_data; struct tps65910_board *of_pmic_plat_data = NULL; struct tps65910_platform_data *init_data; + unsigned long chip_id = id->driver_data; int ret = 0; - int chip_id = id->driver_data; pmic_plat_data = dev_get_platdata(&i2c->dev); diff --git a/include/linux/mfd/tps65910.h b/include/linux/mfd/tps65910.h index 16c2335c2856..6483a6fdce59 100644 --- a/include/linux/mfd/tps65910.h +++ b/include/linux/mfd/tps65910.h @@ -892,7 +892,7 @@ struct tps65910 { struct device *dev; struct i2c_client *i2c_client; struct regmap *regmap; - unsigned int id; + unsigned long id; /* Client devices */ struct tps65910_pmic *pmic; -- cgit v1.2.3 From 8fe39aac0578cbb0abf27e1be70ff581e0c1d836 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 22 Nov 2013 13:22:13 +0100 Subject: drbd: device->ldev is not guaranteed on an D_ATTACHING disk Some parts of the code assumed that get_ldev_if_state(device, D_ATTACHING) is sufficient to access the ldev member of the device object. That was wrong. ldev may not be there or might be freed at any time if the device has a disk state of D_ATTACHING. bm_rw() Documented that drbd_bm_read() is only called from drbd_adm_attach. drbd_bm_write() is only called when a reference is held, and it is documented that a caller has to hold a reference before calling drbd_bm_write() drbd_bm_write_page() Use get_ldev() instead of get_ldev_if_state(device, D_ATTACHING) drbd_bmio_set_n_write() No longer use get_ldev_if_state(device, D_ATTACHING). All callers hold a reference to ldev now. drbd_bmio_clear_n_write() All callers where holding a reference of ldev anyways. Remove the misleading get_ldev_if_state(device, D_ATTACHING) drbd_reconsider_max_bio_size() Removed the get_ldev_if_state(device, D_ATTACHING). All callers now pass a struct drbd_backing_dev* when they have a proper reference, or a NULL pointer. Before this fix, the receiver could trigger a NULL pointer deref when in drbd_reconsider_max_bio_size() drbd_bump_write_ordering() Used get_ldev_if_state(device, D_ATTACHING) with the wrong assumption. Remove it, and allow the caller to pass in a struct drbd_backing_dev* when the caller knows that accessing this bdev is safe. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_bitmap.c | 4 +++- drivers/block/drbd/drbd_int.h | 9 ++++---- drivers/block/drbd/drbd_main.c | 36 +++++++++++++------------------ drivers/block/drbd/drbd_nl.c | 41 +++++++++++++++++++++++------------- drivers/block/drbd/drbd_receiver.c | 43 ++++++++++++++++++++++++++------------ include/linux/drbd.h | 2 +- 6 files changed, 79 insertions(+), 56 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 1aa29f8fdfe1..ed310415020b 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -1085,6 +1085,8 @@ static int bm_rw(struct drbd_device *device, int rw, unsigned flags, unsigned la kfree(ctx); return -ENODEV; } + /* Here D_ATTACHING is sufficient since drbd_bm_read() is called only from + drbd_adm_attach(), after device->ldev was assigned. */ if (!ctx->flags) WARN_ON(!(BM_LOCKED_MASK & b->bm_flags)); @@ -1260,7 +1262,7 @@ int drbd_bm_write_page(struct drbd_device *device, unsigned int idx) __must_hold .kref = { ATOMIC_INIT(2) }, }; - if (!get_ldev_if_state(device, D_ATTACHING)) { /* put is in bm_aio_ctx_destroy() */ + if (!get_ldev(device)) { /* put is in bm_aio_ctx_destroy() */ drbd_err(device, "ASSERT FAILED: get_ldev_if_state() == 1 in drbd_bm_write_page()\n"); kfree(ctx); return -ENODEV; diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 1ef2474e8f11..c87bc8e8fd82 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -984,8 +984,8 @@ extern int drbd_bitmap_io(struct drbd_device *device, extern int drbd_bitmap_io_from_worker(struct drbd_device *device, int (*io_fn)(struct drbd_device *), char *why, enum bm_flag flags); -extern int drbd_bmio_set_n_write(struct drbd_device *device); -extern int drbd_bmio_clear_n_write(struct drbd_device *device); +extern int drbd_bmio_set_n_write(struct drbd_device *device) __must_hold(local); +extern int drbd_bmio_clear_n_write(struct drbd_device *device) __must_hold(local); extern void drbd_ldev_destroy(struct drbd_device *device); /* Meta data layout @@ -1313,7 +1313,7 @@ enum determine_dev_size { extern enum determine_dev_size drbd_determine_dev_size(struct drbd_device *, enum dds_flags, struct resize_parms *) __must_hold(local); extern void resync_after_online_grow(struct drbd_device *); -extern void drbd_reconsider_max_bio_size(struct drbd_device *device); +extern void drbd_reconsider_max_bio_size(struct drbd_device *device, struct drbd_backing_dev *bdev); extern enum drbd_state_rv drbd_set_role(struct drbd_device *device, enum drbd_role new_role, int force); @@ -1479,7 +1479,8 @@ static inline void drbd_generic_make_request(struct drbd_device *device, generic_make_request(bio); } -void drbd_bump_write_ordering(struct drbd_resource *resource, enum write_ordering_e wo); +void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backing_dev *bdev, + enum write_ordering_e wo); /* drbd_proc.c */ extern struct proc_dir_entry *drbd_proc; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 17b9a237f2e6..a6af93528d57 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -3466,23 +3466,19 @@ void drbd_uuid_set_bm(struct drbd_device *device, u64 val) __must_hold(local) * * Sets all bits in the bitmap and writes the whole bitmap to stable storage. */ -int drbd_bmio_set_n_write(struct drbd_device *device) +int drbd_bmio_set_n_write(struct drbd_device *device) __must_hold(local) { int rv = -EIO; - if (get_ldev_if_state(device, D_ATTACHING)) { - drbd_md_set_flag(device, MDF_FULL_SYNC); - drbd_md_sync(device); - drbd_bm_set_all(device); - - rv = drbd_bm_write(device); + drbd_md_set_flag(device, MDF_FULL_SYNC); + drbd_md_sync(device); + drbd_bm_set_all(device); - if (!rv) { - drbd_md_clear_flag(device, MDF_FULL_SYNC); - drbd_md_sync(device); - } + rv = drbd_bm_write(device); - put_ldev(device); + if (!rv) { + drbd_md_clear_flag(device, MDF_FULL_SYNC); + drbd_md_sync(device); } return rv; @@ -3494,18 +3490,11 @@ int drbd_bmio_set_n_write(struct drbd_device *device) * * Clears all bits in the bitmap and writes the whole bitmap to stable storage. */ -int drbd_bmio_clear_n_write(struct drbd_device *device) +int drbd_bmio_clear_n_write(struct drbd_device *device) __must_hold(local) { - int rv = -EIO; - drbd_resume_al(device); - if (get_ldev_if_state(device, D_ATTACHING)) { - drbd_bm_clear_all(device); - rv = drbd_bm_write(device); - put_ldev(device); - } - - return rv; + drbd_bm_clear_all(device); + return drbd_bm_write(device); } static int w_bitmap_io(struct drbd_work *w, int unused) @@ -3603,6 +3592,9 @@ static int w_go_diskless(struct drbd_work *w, int unused) * that drbd_set_out_of_sync() can not be called. This function MAY ONLY be * called from worker context. It MUST NOT be used while a previous such * work is still pending! + * + * Its worker function encloses the call of io_fn() by get_ldev() and + * put_ldev(). */ void drbd_queue_bitmap_io(struct drbd_device *device, int (*io_fn)(struct drbd_device *), diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 43fad2c1ba01..25f4b6f67c21 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1110,15 +1110,16 @@ static int drbd_check_al_size(struct drbd_device *device, struct disk_conf *dc) return 0; } -static void drbd_setup_queue_param(struct drbd_device *device, unsigned int max_bio_size) +static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backing_dev *bdev, + unsigned int max_bio_size) { struct request_queue * const q = device->rq_queue; unsigned int max_hw_sectors = max_bio_size >> 9; unsigned int max_segments = 0; struct request_queue *b = NULL; - if (get_ldev_if_state(device, D_ATTACHING)) { - b = device->ldev->backing_bdev->bd_disk->queue; + if (bdev) { + b = bdev->backing_bdev->bd_disk->queue; max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9); rcu_read_lock(); @@ -1163,11 +1164,10 @@ static void drbd_setup_queue_param(struct drbd_device *device, unsigned int max_ b->backing_dev_info.ra_pages); q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages; } - put_ldev(device); } } -void drbd_reconsider_max_bio_size(struct drbd_device *device) +void drbd_reconsider_max_bio_size(struct drbd_device *device, struct drbd_backing_dev *bdev) { unsigned int now, new, local, peer; @@ -1175,10 +1175,9 @@ void drbd_reconsider_max_bio_size(struct drbd_device *device) local = device->local_max_bio_size; /* Eventually last known value, from volatile memory */ peer = device->peer_max_bio_size; /* Eventually last known value, from meta data */ - if (get_ldev_if_state(device, D_ATTACHING)) { - local = queue_max_hw_sectors(device->ldev->backing_bdev->bd_disk->queue) << 9; + if (bdev) { + local = queue_max_hw_sectors(bdev->backing_bdev->bd_disk->queue) << 9; device->local_max_bio_size = local; - put_ldev(device); } local = min(local, DRBD_MAX_BIO_SIZE); @@ -1211,7 +1210,7 @@ void drbd_reconsider_max_bio_size(struct drbd_device *device) if (new != now) drbd_info(device, "max BIO size = %u\n", new); - drbd_setup_queue_param(device, new); + drbd_setup_queue_param(device, bdev, new); } /* Starts the worker thread */ @@ -1399,7 +1398,7 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) else set_bit(MD_NO_FUA, &device->flags); - drbd_bump_write_ordering(device->resource, WO_bdev_flush); + drbd_bump_write_ordering(device->resource, NULL, WO_bdev_flush); drbd_md_sync(device); @@ -1704,7 +1703,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) new_disk_conf = NULL; new_plan = NULL; - drbd_bump_write_ordering(device->resource, WO_bdev_flush); + drbd_bump_write_ordering(device->resource, device->ldev, WO_bdev_flush); if (drbd_md_test_flag(device->ldev, MDF_CRASHED_PRIMARY)) set_bit(CRASHED_PRIMARY, &device->flags); @@ -1720,7 +1719,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) device->read_cnt = 0; device->writ_cnt = 0; - drbd_reconsider_max_bio_size(device); + drbd_reconsider_max_bio_size(device, device->ldev); /* If I am currently not R_PRIMARY, * but meta data primary indicator is set, @@ -2648,8 +2647,13 @@ int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info) if (retcode != NO_ERROR) goto out; - mutex_lock(&adm_ctx.resource->adm_mutex); device = adm_ctx.device; + if (!get_ldev(device)) { + retcode = ERR_NO_DISK; + goto out; + } + + mutex_lock(&adm_ctx.resource->adm_mutex); /* If there is still bitmap IO pending, probably because of a previous * resync just being finished, wait for it before requesting a new resync. @@ -2673,6 +2677,7 @@ int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info) retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_T)); drbd_resume_io(device); mutex_unlock(&adm_ctx.resource->adm_mutex); + put_ldev(device); out: drbd_adm_finish(&adm_ctx, info, retcode); return 0; @@ -2698,7 +2703,7 @@ out: return 0; } -static int drbd_bmio_set_susp_al(struct drbd_device *device) +static int drbd_bmio_set_susp_al(struct drbd_device *device) __must_hold(local) { int rv; @@ -2719,8 +2724,13 @@ int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info) if (retcode != NO_ERROR) goto out; - mutex_lock(&adm_ctx.resource->adm_mutex); device = adm_ctx.device; + if (!get_ldev(device)) { + retcode = ERR_NO_DISK; + goto out; + } + + mutex_lock(&adm_ctx.resource->adm_mutex); /* If there is still bitmap IO pending, probably because of a previous * resync just being finished, wait for it before requesting a new resync. @@ -2747,6 +2757,7 @@ int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info) retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_S)); drbd_resume_io(device); mutex_unlock(&adm_ctx.resource->adm_mutex); + put_ldev(device); out: drbd_adm_finish(&adm_ctx, info, retcode); return 0; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index c7084188c2ae..be0c3761cdc6 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1168,7 +1168,7 @@ static void drbd_flush(struct drbd_connection *connection) /* would rather check on EOPNOTSUPP, but that is not reliable. * don't try again for ANY return value != 0 * if (rv == -EOPNOTSUPP) */ - drbd_bump_write_ordering(connection->resource, WO_drain_io); + drbd_bump_write_ordering(connection->resource, NULL, WO_drain_io); } put_ldev(device); kref_put(&device->kref, drbd_destroy_device); @@ -1257,14 +1257,29 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *connectio return rv; } +static enum write_ordering_e +max_allowed_wo(struct drbd_backing_dev *bdev, enum write_ordering_e wo) +{ + struct disk_conf *dc; + + dc = rcu_dereference(bdev->disk_conf); + + if (wo == WO_bdev_flush && !dc->disk_flushes) + wo = WO_drain_io; + if (wo == WO_drain_io && !dc->disk_drain) + wo = WO_none; + + return wo; +} + /** * drbd_bump_write_ordering() - Fall back to an other write ordering method * @connection: DRBD connection. * @wo: Write ordering method to try. */ -void drbd_bump_write_ordering(struct drbd_resource *resource, enum write_ordering_e wo) +void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backing_dev *bdev, + enum write_ordering_e wo) { - struct disk_conf *dc; struct drbd_device *device; enum write_ordering_e pwo; int vnr; @@ -1278,17 +1293,18 @@ void drbd_bump_write_ordering(struct drbd_resource *resource, enum write_orderin wo = min(pwo, wo); rcu_read_lock(); idr_for_each_entry(&resource->devices, device, vnr) { - if (!get_ldev_if_state(device, D_ATTACHING)) - continue; - dc = rcu_dereference(device->ldev->disk_conf); - - if (wo == WO_bdev_flush && !dc->disk_flushes) - wo = WO_drain_io; - if (wo == WO_drain_io && !dc->disk_drain) - wo = WO_none; - put_ldev(device); + if (get_ldev(device)) { + wo = max_allowed_wo(device->ldev, wo); + if (device->ldev == bdev) + bdev = NULL; + put_ldev(device); + } } rcu_read_unlock(); + + if (bdev) + wo = max_allowed_wo(bdev, wo); + resource->write_ordering = wo; if (pwo != resource->write_ordering || wo == WO_bdev_flush) drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]); @@ -3709,7 +3725,6 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info } device->peer_max_bio_size = be32_to_cpu(p->max_bio_size); - drbd_reconsider_max_bio_size(device); /* Leave drbd_reconsider_max_bio_size() before drbd_determine_dev_size(). In case we cleared the QUEUE_FLAG_DISCARD from our queue in drbd_reconsider_max_bio_size(), we can be sure that after @@ -3717,6 +3732,7 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info ddsf = be16_to_cpu(p->dds_flags); if (get_ldev(device)) { + drbd_reconsider_max_bio_size(device, device->ldev); dd = drbd_determine_dev_size(device, ddsf, NULL); put_ldev(device); if (dd == DS_ERROR) @@ -3724,6 +3740,7 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info drbd_md_sync(device); } else { /* I am diskless, need to accept the peer's size. */ + drbd_reconsider_max_bio_size(device, NULL); drbd_set_my_capacity(device, p_size); } diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 3dbe9bd57a09..20ec8903b1e4 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -245,7 +245,7 @@ enum drbd_disk_state { D_DISKLESS, D_ATTACHING, /* In the process of reading the meta-data */ D_FAILED, /* Becomes D_DISKLESS as soon as we told it the peer */ - /* when >= D_FAILED it is legal to access mdev->bc */ + /* when >= D_FAILED it is legal to access mdev->ldev */ D_NEGOTIATING, /* Late attaching state, we need to talk to the peer */ D_INCONSISTENT, D_OUTDATED, -- cgit v1.2.3 From aaaba34576407857f6146ff6c330f06e63fb2bf2 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 18 Mar 2014 12:30:09 +0100 Subject: drbd: implement csums-after-crash-only Checksum based resync trades CPU cycles for network bandwidth, in situations where we expect much of the to-be-resynced blocks to be actually identical on both sides already. In a "network hickup" scenario, it won't help: all to-be-resynced blocks will typically be different. The use case is for the resync of *potentially* different blocks after crash recovery -- the crash recovery had marked larger areas (those covered by the activity log) as need-to-be-resynced, just in case. Most of those blocks will be identical. This option makes it possible to configure checksum based resync, but only actually use it for the first resync after primary crash. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2 ++ drivers/block/drbd/drbd_receiver.c | 2 ++ drivers/block/drbd/drbd_worker.c | 24 ++++++++++++++++++++---- include/linux/drbd_genl.h | 3 +++ include/linux/drbd_limits.h | 1 + 5 files changed, 28 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index abf5aefd9790..fe6595a96a9a 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -738,6 +738,8 @@ struct drbd_device { struct rb_root read_requests; struct rb_root write_requests; + /* use checksums for *this* resync */ + bool use_csums; /* blocks to resync in this run [unit BM_BLOCK_SIZE] */ unsigned long rs_total; /* number of resync blocks that failed in this run */ diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 5626c5babc3f..d326af67c27e 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -2555,6 +2555,8 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet peer_req->w.cb = w_e_end_csum_rs_req; /* used in the sector offset progress display */ device->bm_resync_fo = BM_SECT_TO_BIT(sector); + /* remember to report stats in drbd_resync_finished */ + device->use_csums = true; } else if (pi->cmd == P_OV_REPLY) { /* track progress, we may need to throttle */ atomic_add(size >> 9, &device->rs_sect_in); diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 2ff5fd49a3b1..6532a697cf49 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -698,8 +698,8 @@ next_sector: /* adjust very last sectors, in case we are oddly sized */ if (sector + (size>>9) > capacity) size = (capacity-sector)<<9; - if (connection->agreed_pro_version >= 89 && - connection->csums_tfm) { + + if (device->use_csums) { switch (read_for_csum(peer_device, sector, size)) { case -EIO: /* Disk failure */ put_ldev(device); @@ -913,7 +913,7 @@ int drbd_resync_finished(struct drbd_device *device) if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) khelper_cmd = "after-resync-target"; - if (first_peer_device(device)->connection->csums_tfm && device->rs_total) { + if (device->use_csums && device->rs_total) { const unsigned long s = device->rs_same_csum; const unsigned long t = device->rs_total; const int ratio = @@ -1622,6 +1622,18 @@ static void do_start_resync(struct drbd_device *device) clear_bit(AHEAD_TO_SYNC_SOURCE, &device->flags); } +static bool use_checksum_based_resync(struct drbd_connection *connection, struct drbd_device *device) +{ + bool csums_after_crash_only; + rcu_read_lock(); + csums_after_crash_only = rcu_dereference(connection->net_conf)->csums_after_crash_only; + rcu_read_unlock(); + return connection->agreed_pro_version >= 89 && /* supported? */ + connection->csums_tfm && /* configured? */ + (csums_after_crash_only == 0 /* use for each resync? */ + || test_bit(CRASHED_PRIMARY, &device->flags)); /* or only after Primary crash? */ +} + /** * drbd_start_resync() - Start the resync process * @device: DRBD device. @@ -1756,8 +1768,12 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side) drbd_conn_str(ns.conn), (unsigned long) device->rs_total << (BM_BLOCK_SHIFT-10), (unsigned long) device->rs_total); - if (side == C_SYNC_TARGET) + if (side == C_SYNC_TARGET) { device->bm_resync_fo = 0; + device->use_csums = use_checksum_based_resync(connection, device); + } else { + device->use_csums = 0; + } /* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid * with w_send_oos, or the sync target will get confused as to diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 4193f5f2636c..71fc924c53fa 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -171,6 +171,9 @@ GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf, __flg_field(28, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, tentative) __flg_field_def(29, DRBD_GENLA_F_MANDATORY, use_rle, DRBD_USE_RLE_DEF) /* 9: __u32_field_def(30, DRBD_GENLA_F_MANDATORY, fencing_policy, DRBD_FENCING_DEF) */ + /* 9: __str_field_def(31, DRBD_GENLA_F_MANDATORY, name, SHARED_SECRET_MAX) */ + /* 9: __u32_field(32, DRBD_F_REQUIRED | DRBD_F_INVARIANT, peer_node_id) */ + __flg_field_def(33, 0 /* OPTIONAL */, csums_after_crash_only, DRBD_CSUMS_AFTER_CRASH_ONLY_DEF) ) GENL_struct(DRBD_NLA_SET_ROLE_PARMS, 6, set_role_parms, diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 17e50bb00521..9d2df1d51414 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -214,6 +214,7 @@ #define DRBD_ALLOW_TWO_PRIMARIES_DEF 0 #define DRBD_ALWAYS_ASBP_DEF 0 #define DRBD_USE_RLE_DEF 1 +#define DRBD_CSUMS_AFTER_CRASH_ONLY_DEF 0 #define DRBD_AL_STRIPES_MIN 1 #define DRBD_AL_STRIPES_MAX 1024 -- cgit v1.2.3 From 5d0b17f1a29e8189d04aef447a3a53cfd05529b2 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 18 Mar 2014 14:24:35 +0100 Subject: drbd: New net configuration option socket-check-timeout In setups involving a DRBD-proxy and connections that experience a lot of buffer-bloat it might be necessary to set ping-timeout to an unusual high value. By default DRBD uses the same value to wait if a newly established TCP-connection is stable. Since the DRBD-proxy is usually located in the same data center such a long wait time may hinder DRBD's connect process. In such setups socket-check-timeout should be set to at least to the round trip time between DRBD and DRBD-proxy. I.e. in most cases to 1. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 46 +++++++++++++++++++++++++------------- include/linux/drbd_genl.h | 1 + include/linux/drbd_limits.h | 5 +++++ 3 files changed, 36 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 7da83f3a61eb..b89e6fb468c6 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -819,7 +819,7 @@ static int receive_first_packet(struct drbd_connection *connection, struct socke * drbd_socket_okay() - Free the socket if its connection is not okay * @sock: pointer to the pointer to the socket. */ -static int drbd_socket_okay(struct socket **sock) +static bool drbd_socket_okay(struct socket **sock) { int rr; char tb[4]; @@ -837,6 +837,30 @@ static int drbd_socket_okay(struct socket **sock) return false; } } + +static bool connection_established(struct drbd_connection *connection, + struct socket **sock1, + struct socket **sock2) +{ + struct net_conf *nc; + int timeout; + bool ok; + + if (!*sock1 || !*sock2) + return false; + + rcu_read_lock(); + nc = rcu_dereference(connection->net_conf); + timeout = (nc->sock_check_timeo ?: nc->ping_timeo) * HZ / 10; + rcu_read_unlock(); + schedule_timeout_interruptible(timeout); + + ok = drbd_socket_okay(sock1); + ok = drbd_socket_okay(sock2) && ok; + + return ok; +} + /* Gets called if a connection is established, or if a new minor gets created in a connection */ int drbd_connected(struct drbd_peer_device *peer_device) @@ -878,8 +902,8 @@ static int conn_connect(struct drbd_connection *connection) struct drbd_socket sock, msock; struct drbd_peer_device *peer_device; struct net_conf *nc; - int vnr, timeout, h, ok; - bool discard_my_data; + int vnr, timeout, h; + bool discard_my_data, ok; enum drbd_state_rv rv; struct accept_wait_data ad = { .connection = connection, @@ -923,17 +947,8 @@ static int conn_connect(struct drbd_connection *connection) } } - if (sock.socket && msock.socket) { - rcu_read_lock(); - nc = rcu_dereference(connection->net_conf); - timeout = nc->ping_timeo * HZ / 10; - rcu_read_unlock(); - schedule_timeout_interruptible(timeout); - ok = drbd_socket_okay(&sock.socket); - ok = drbd_socket_okay(&msock.socket) && ok; - if (ok) - break; - } + if (connection_established(connection, &sock.socket, &msock.socket)) + break; retry: s = drbd_wait_for_connect(connection, &ad); @@ -979,8 +994,7 @@ randomize: goto out_release_sockets; } - ok = drbd_socket_okay(&sock.socket); - ok = drbd_socket_okay(&msock.socket) && ok; + ok = connection_established(connection, &sock.socket, &msock.socket); } while (!ok); if (ad.s_listen) diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 71fc924c53fa..7b131ed8f9c6 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -174,6 +174,7 @@ GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf, /* 9: __str_field_def(31, DRBD_GENLA_F_MANDATORY, name, SHARED_SECRET_MAX) */ /* 9: __u32_field(32, DRBD_F_REQUIRED | DRBD_F_INVARIANT, peer_node_id) */ __flg_field_def(33, 0 /* OPTIONAL */, csums_after_crash_only, DRBD_CSUMS_AFTER_CRASH_ONLY_DEF) + __u32_field_def(34, 0 /* OPTIONAL */, sock_check_timeo, DRBD_SOCKET_CHECK_TIMEO_DEF) ) GENL_struct(DRBD_NLA_SET_ROLE_PARMS, 6, set_role_parms, diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 9d2df1d51414..8ac8c5d9a3ad 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -225,4 +225,9 @@ #define DRBD_AL_STRIPE_SIZE_MAX 16777216 #define DRBD_AL_STRIPE_SIZE_DEF 32 #define DRBD_AL_STRIPE_SIZE_SCALE 'k' /* kilobytes */ + +#define DRBD_SOCKET_CHECK_TIMEO_MIN 0 +#define DRBD_SOCKET_CHECK_TIMEO_MAX DRBD_PING_TIMEO_MAX +#define DRBD_SOCKET_CHECK_TIMEO_DEF 0 +#define DRBD_SOCKET_CHECK_TIMEO_SCALE '1' #endif -- cgit v1.2.3 From bf0d6e4a1138e71cafdbbb99cde430eee50c4ff1 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 6 May 2014 14:28:32 +0300 Subject: drbd: silence underflow warning in read_in_block() My static checker warns that "data_size" could be negative and underflow the limit check. The code looks suspicious but I don't know if it is a real bug. Signed-off-by: Dan Carpenter Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 2 +- include/linux/drbd.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index f972988291c5..9342b8da73ab 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1592,7 +1592,7 @@ read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector, struct drbd_peer_request *peer_req; struct page *page; int dgs, ds, err; - int data_size = pi->size; + unsigned int data_size = pi->size; void *dig_in = peer_device->connection->int_dig_in; void *dig_vv = peer_device->connection->int_dig_vv; unsigned long *data; diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 20ec8903b1e4..debb70d40547 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -52,7 +52,7 @@ #endif extern const char *drbd_buildtag(void); -#define REL_VERSION "8.4.3" +#define REL_VERSION "8.4.5" #define API_VERSION 1 #define PRO_VERSION_MIN 86 #define PRO_VERSION_MAX 101 -- cgit v1.2.3 From 2243a87d90b42eb38bc281957df3e57c712b5e56 Mon Sep 17 00:00:00 2001 From: Fan Wu Date: Mon, 9 Jun 2014 09:37:56 +0800 Subject: pinctrl: avoid duplicated calling enable_pinmux_setting for a pin What the patch does: 1. Call pinmux_disable_setting ahead of pinmux_enable_setting each time pinctrl_select_state is called 2. Remove the HW disable operation in pinmux_disable_setting function. 3. Remove the disable ops in struct pinmux_ops 4. Remove all the disable ops users in current code base. Notes: 1. Great thanks for the suggestion from Linus, Tony Lindgren and Stephen Warren and Everyone that shared comments on this patch. 2. The patch also includes comment fixes from Stephen Warren. The reason why we do this: 1. To avoid duplicated calling of the enable_setting operation without disabling operation inbetween which will let the pin descriptor desc->mux_usecount increase monotonously. 2. The HW pin disable operation is not useful for any of the existing platforms. And this can be used to avoid the HW glitch after using the item #1 modification. In the following case, the issue can be reproduced: 1. There is a driver that need to switch pin state dynamically, e.g. between "sleep" and "default" state 2. The pin setting configuration in a DTS node may be like this: component a { pinctrl-names = "default", "sleep"; pinctrl-0 = <&a_grp_setting &c_grp_setting>; pinctrl-1 = <&b_grp_setting &c_grp_setting>; } The "c_grp_setting" config node is totally identical, maybe like following one: c_grp_setting: c_grp_setting { pinctrl-single,pins = ; } 3. When switching the pin state in the following official pinctrl sequence: pin = pinctrl_get(); state = pinctrl_lookup_state(wanted_state); pinctrl_select_state(state); pinctrl_put(); Test Result: 1. The switch is completed as expected, that is: the device's pin configuration is changed according to the description in the "wanted_state" group setting 2. The "desc->mux_usecount" of the corresponding pins in "c_group" is increased without being decreased, because the "desc" is for each physical pin while the setting is for each setting node in the DTS. Thus, if the "c_grp_setting" in pinctrl-0 is not disabled ahead of enabling "c_grp_setting" in pinctrl-1, the desc->mux_usecount will keep increasing without any chance to be decreased. According to the comments in the original code, only the setting, in old state but not in new state, will be "disabled" (calling pinmux_disable_setting), which is correct logic but not intact. We still need consider case that the setting is in both old state and new state. We can do this in the following two ways: 1. Avoid to "enable"(calling pinmux_enable_setting) the "same pin setting" repeatedly 2. "Disable"(calling pinmux_disable_setting) the "same pin setting", actually two setting instances, ahead of enabling them. Analysis: 1. The solution #2 is better because it can avoid too much iteration. 2. If we disable all of the settings in the old state and one of the setting(s) exist in the new state, the pins mux function change may happen when some SoC vendors defined the "pinctrl-single,function-off" in their DTS file. old_setting => disabled_setting => new_setting. 3. In the pinmux framework, when a pin state is switched, the setting in the old state should be marked as "disabled". Conclusion: 1. To Remove the HW disabling operation to above the glitch mentioned above. 2. Handle the issue mentioned above by disabling all of the settings in old state and then enable the all of the settings in new state. Signed-off-by: Fan Wu Acked-by: Stephen Warren Acked-by: Patrice Chotard Acked-by: Heiko Stuebner Acked-by: Maxime Coquelin Signed-off-by: Linus Walleij --- drivers/pinctrl/core.c | 24 +++------------ drivers/pinctrl/pinctrl-abx500.c | 15 --------- drivers/pinctrl/pinctrl-adi2.c | 30 ------------------ drivers/pinctrl/pinctrl-at91.c | 21 ------------- drivers/pinctrl/pinctrl-bcm2835.c | 11 ------- drivers/pinctrl/pinctrl-exynos5440.c | 8 ----- drivers/pinctrl/pinctrl-msm.c | 25 --------------- drivers/pinctrl/pinctrl-nomadik.c | 16 ---------- drivers/pinctrl/pinctrl-rockchip.c | 18 ----------- drivers/pinctrl/pinctrl-samsung.c | 8 ----- drivers/pinctrl/pinctrl-single.c | 56 --------------------------------- drivers/pinctrl/pinctrl-st.c | 6 ---- drivers/pinctrl/pinctrl-tb10x.c | 17 ---------- drivers/pinctrl/pinctrl-tegra.c | 13 -------- drivers/pinctrl/pinctrl-tz1090-pdc.c | 28 ----------------- drivers/pinctrl/pinctrl-tz1090.c | 58 ----------------------------------- drivers/pinctrl/pinctrl-u300.c | 14 --------- drivers/pinctrl/pinmux.c | 4 --- drivers/pinctrl/sh-pfc/pinctrl.c | 22 ------------- drivers/pinctrl/sirf/pinctrl-sirf.c | 10 ------ drivers/pinctrl/spear/pinctrl-spear.c | 7 ----- drivers/pinctrl/vt8500/pinctrl-wmt.c | 12 -------- include/linux/pinctrl/pinmux.h | 2 -- 23 files changed, 5 insertions(+), 420 deletions(-) (limited to 'include/linux') diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c index e09474ecde23..e4f65510c87e 100644 --- a/drivers/pinctrl/core.c +++ b/drivers/pinctrl/core.c @@ -992,29 +992,15 @@ int pinctrl_select_state(struct pinctrl *p, struct pinctrl_state *state) if (p->state) { /* - * The set of groups with a mux configuration in the old state - * may not be identical to the set of groups with a mux setting - * in the new state. While this might be unusual, it's entirely - * possible for the "user"-supplied mapping table to be written - * that way. For each group that was configured in the old state - * but not in the new state, this code puts that group into a - * safe/disabled state. + * For each pinmux setting in the old state, forget SW's record + * of mux owner for that pingroup. Any pingroups which are + * still owned by the new state will be re-acquired by the call + * to pinmux_enable_setting() in the loop below. */ list_for_each_entry(setting, &p->state->settings, node) { - bool found = false; if (setting->type != PIN_MAP_TYPE_MUX_GROUP) continue; - list_for_each_entry(setting2, &state->settings, node) { - if (setting2->type != PIN_MAP_TYPE_MUX_GROUP) - continue; - if (setting2->data.mux.group == - setting->data.mux.group) { - found = true; - break; - } - } - if (!found) - pinmux_disable_setting(setting); + pinmux_disable_setting(setting); } } diff --git a/drivers/pinctrl/pinctrl-abx500.c b/drivers/pinctrl/pinctrl-abx500.c index 163da9c3ea0e..f3f8b24efe54 100644 --- a/drivers/pinctrl/pinctrl-abx500.c +++ b/drivers/pinctrl/pinctrl-abx500.c @@ -737,20 +737,6 @@ static int abx500_pmx_enable(struct pinctrl_dev *pctldev, unsigned function, return ret; } -static void abx500_pmx_disable(struct pinctrl_dev *pctldev, - unsigned function, unsigned group) -{ - struct abx500_pinctrl *pct = pinctrl_dev_get_drvdata(pctldev); - const struct abx500_pingroup *g; - - g = &pct->soc->groups[group]; - if (g->altsetting < 0) - return; - - /* FIXME: poke out the mux, set the pin to some default state? */ - dev_dbg(pct->dev, "disable group %s, %u pins\n", g->name, g->npins); -} - static int abx500_gpio_request_enable(struct pinctrl_dev *pctldev, struct pinctrl_gpio_range *range, unsigned offset) @@ -799,7 +785,6 @@ static const struct pinmux_ops abx500_pinmux_ops = { .get_function_name = abx500_pmx_get_func_name, .get_function_groups = abx500_pmx_get_func_groups, .enable = abx500_pmx_enable, - .disable = abx500_pmx_disable, .gpio_request_enable = abx500_gpio_request_enable, .gpio_disable_free = abx500_gpio_disable_free, }; diff --git a/drivers/pinctrl/pinctrl-adi2.c b/drivers/pinctrl/pinctrl-adi2.c index 5c44feb54ebb..b02ee4f882c0 100644 --- a/drivers/pinctrl/pinctrl-adi2.c +++ b/drivers/pinctrl/pinctrl-adi2.c @@ -652,35 +652,6 @@ static int adi_pinmux_enable(struct pinctrl_dev *pctldev, unsigned func_id, return 0; } -static void adi_pinmux_disable(struct pinctrl_dev *pctldev, unsigned func_id, - unsigned group_id) -{ - struct adi_pinctrl *pinctrl = pinctrl_dev_get_drvdata(pctldev); - struct gpio_port *port; - struct pinctrl_gpio_range *range; - unsigned long flags; - unsigned short *mux, pin; - - mux = (unsigned short *)pinctrl->soc->groups[group_id].mux; - - while (*mux) { - pin = P_IDENT(*mux); - - range = pinctrl_find_gpio_range_from_pin(pctldev, pin); - if (range == NULL) /* should not happen */ - return; - - port = container_of(range->gc, struct gpio_port, chip); - - spin_lock_irqsave(&port->lock, flags); - - port_setup(port, pin_to_offset(range, pin), true); - mux++; - - spin_unlock_irqrestore(&port->lock, flags); - } -} - static int adi_pinmux_get_funcs_count(struct pinctrl_dev *pctldev) { struct adi_pinctrl *pinctrl = pinctrl_dev_get_drvdata(pctldev); @@ -728,7 +699,6 @@ static int adi_pinmux_request_gpio(struct pinctrl_dev *pctldev, static struct pinmux_ops adi_pinmux_ops = { .enable = adi_pinmux_enable, - .disable = adi_pinmux_disable, .get_functions_count = adi_pinmux_get_funcs_count, .get_function_name = adi_pinmux_get_func_name, .get_function_groups = adi_pinmux_get_groups, diff --git a/drivers/pinctrl/pinctrl-at91.c b/drivers/pinctrl/pinctrl-at91.c index 421493cb490c..bd57ab514aa4 100644 --- a/drivers/pinctrl/pinctrl-at91.c +++ b/drivers/pinctrl/pinctrl-at91.c @@ -611,26 +611,6 @@ static int at91_pmx_enable(struct pinctrl_dev *pctldev, unsigned selector, return 0; } -static void at91_pmx_disable(struct pinctrl_dev *pctldev, unsigned selector, - unsigned group) -{ - struct at91_pinctrl *info = pinctrl_dev_get_drvdata(pctldev); - const struct at91_pmx_pin *pins_conf = info->groups[group].pins_conf; - const struct at91_pmx_pin *pin; - uint32_t npins = info->groups[group].npins; - int i; - unsigned mask; - void __iomem *pio; - - for (i = 0; i < npins; i++) { - pin = &pins_conf[i]; - at91_pin_dbg(info->dev, pin); - pio = pin_to_controller(info, pin->bank); - mask = pin_to_mask(pin->pin); - at91_mux_gpio_enable(pio, mask, 1); - } -} - static int at91_pmx_get_funcs_count(struct pinctrl_dev *pctldev) { struct at91_pinctrl *info = pinctrl_dev_get_drvdata(pctldev); @@ -705,7 +685,6 @@ static const struct pinmux_ops at91_pmx_ops = { .get_function_name = at91_pmx_get_func_name, .get_function_groups = at91_pmx_get_groups, .enable = at91_pmx_enable, - .disable = at91_pmx_disable, .gpio_request_enable = at91_gpio_request_enable, .gpio_disable_free = at91_gpio_disable_free, }; diff --git a/drivers/pinctrl/pinctrl-bcm2835.c b/drivers/pinctrl/pinctrl-bcm2835.c index 3d907de9bc91..5bcfd7ace0cd 100644 --- a/drivers/pinctrl/pinctrl-bcm2835.c +++ b/drivers/pinctrl/pinctrl-bcm2835.c @@ -841,16 +841,6 @@ static int bcm2835_pmx_enable(struct pinctrl_dev *pctldev, return 0; } -static void bcm2835_pmx_disable(struct pinctrl_dev *pctldev, - unsigned func_selector, - unsigned group_selector) -{ - struct bcm2835_pinctrl *pc = pinctrl_dev_get_drvdata(pctldev); - - /* disable by setting to GPIO_IN */ - bcm2835_pinctrl_fsel_set(pc, group_selector, BCM2835_FSEL_GPIO_IN); -} - static void bcm2835_pmx_gpio_disable_free(struct pinctrl_dev *pctldev, struct pinctrl_gpio_range *range, unsigned offset) @@ -880,7 +870,6 @@ static const struct pinmux_ops bcm2835_pmx_ops = { .get_function_name = bcm2835_pmx_get_function_name, .get_function_groups = bcm2835_pmx_get_function_groups, .enable = bcm2835_pmx_enable, - .disable = bcm2835_pmx_disable, .gpio_disable_free = bcm2835_pmx_gpio_disable_free, .gpio_set_direction = bcm2835_pmx_gpio_set_direction, }; diff --git a/drivers/pinctrl/pinctrl-exynos5440.c b/drivers/pinctrl/pinctrl-exynos5440.c index 8fe2ab0a7698..4b145b5db7a6 100644 --- a/drivers/pinctrl/pinctrl-exynos5440.c +++ b/drivers/pinctrl/pinctrl-exynos5440.c @@ -371,13 +371,6 @@ static int exynos5440_pinmux_enable(struct pinctrl_dev *pctldev, unsigned select return 0; } -/* disable a specified pinmux by writing to registers */ -static void exynos5440_pinmux_disable(struct pinctrl_dev *pctldev, - unsigned selector, unsigned group) -{ - exynos5440_pinmux_setup(pctldev, selector, group, false); -} - /* * The calls to gpio_direction_output() and gpio_direction_input() * leads to this function call (via the pinctrl_gpio_direction_{input|output}() @@ -395,7 +388,6 @@ static const struct pinmux_ops exynos5440_pinmux_ops = { .get_function_name = exynos5440_pinmux_get_fname, .get_function_groups = exynos5440_pinmux_get_groups, .enable = exynos5440_pinmux_enable, - .disable = exynos5440_pinmux_disable, .gpio_set_direction = exynos5440_pinmux_gpio_set_direction, }; diff --git a/drivers/pinctrl/pinctrl-msm.c b/drivers/pinctrl/pinctrl-msm.c index df6dda4ce803..bdfaba4430f2 100644 --- a/drivers/pinctrl/pinctrl-msm.c +++ b/drivers/pinctrl/pinctrl-msm.c @@ -165,36 +165,11 @@ static int msm_pinmux_enable(struct pinctrl_dev *pctldev, return 0; } -static void msm_pinmux_disable(struct pinctrl_dev *pctldev, - unsigned function, - unsigned group) -{ - struct msm_pinctrl *pctrl = pinctrl_dev_get_drvdata(pctldev); - const struct msm_pingroup *g; - unsigned long flags; - u32 val; - - g = &pctrl->soc->groups[group]; - - if (WARN_ON(g->mux_bit < 0)) - return; - - spin_lock_irqsave(&pctrl->lock, flags); - - /* Clear the mux bits to select gpio mode */ - val = readl(pctrl->regs + g->ctl_reg); - val &= ~(0x7 << g->mux_bit); - writel(val, pctrl->regs + g->ctl_reg); - - spin_unlock_irqrestore(&pctrl->lock, flags); -} - static const struct pinmux_ops msm_pinmux_ops = { .get_functions_count = msm_get_functions_count, .get_function_name = msm_get_function_name, .get_function_groups = msm_get_function_groups, .enable = msm_pinmux_enable, - .disable = msm_pinmux_disable, }; static int msm_config_reg(struct msm_pinctrl *pctrl, diff --git a/drivers/pinctrl/pinctrl-nomadik.c b/drivers/pinctrl/pinctrl-nomadik.c index 8f6f16ef73f3..a564251fe093 100644 --- a/drivers/pinctrl/pinctrl-nomadik.c +++ b/drivers/pinctrl/pinctrl-nomadik.c @@ -1765,21 +1765,6 @@ out_glitch: return ret; } -static void nmk_pmx_disable(struct pinctrl_dev *pctldev, - unsigned function, unsigned group) -{ - struct nmk_pinctrl *npct = pinctrl_dev_get_drvdata(pctldev); - const struct nmk_pingroup *g; - - g = &npct->soc->groups[group]; - - if (g->altsetting < 0) - return; - - /* Poke out the mux, set the pin to some default state? */ - dev_dbg(npct->dev, "disable group %s, %u pins\n", g->name, g->npins); -} - static int nmk_gpio_request_enable(struct pinctrl_dev *pctldev, struct pinctrl_gpio_range *range, unsigned offset) @@ -1826,7 +1811,6 @@ static const struct pinmux_ops nmk_pinmux_ops = { .get_function_name = nmk_pmx_get_func_name, .get_function_groups = nmk_pmx_get_func_groups, .enable = nmk_pmx_enable, - .disable = nmk_pmx_disable, .gpio_request_enable = nmk_gpio_request_enable, .gpio_disable_free = nmk_gpio_disable_free, }; diff --git a/drivers/pinctrl/pinctrl-rockchip.c b/drivers/pinctrl/pinctrl-rockchip.c index bb805d5e9ff0..51f67a6eadcb 100644 --- a/drivers/pinctrl/pinctrl-rockchip.c +++ b/drivers/pinctrl/pinctrl-rockchip.c @@ -657,23 +657,6 @@ static int rockchip_pmx_enable(struct pinctrl_dev *pctldev, unsigned selector, return 0; } -static void rockchip_pmx_disable(struct pinctrl_dev *pctldev, - unsigned selector, unsigned group) -{ - struct rockchip_pinctrl *info = pinctrl_dev_get_drvdata(pctldev); - const unsigned int *pins = info->groups[group].pins; - struct rockchip_pin_bank *bank; - int cnt; - - dev_dbg(info->dev, "disable function %s group %s\n", - info->functions[selector].name, info->groups[group].name); - - for (cnt = 0; cnt < info->groups[group].npins; cnt++) { - bank = pin_to_bank(info, pins[cnt]); - rockchip_set_mux(bank, pins[cnt] - bank->pin_base, 0); - } -} - /* * The calls to gpio_direction_output() and gpio_direction_input() * leads to this function call (via the pinctrl_gpio_direction_{input|output}() @@ -716,7 +699,6 @@ static const struct pinmux_ops rockchip_pmx_ops = { .get_function_name = rockchip_pmx_get_func_name, .get_function_groups = rockchip_pmx_get_groups, .enable = rockchip_pmx_enable, - .disable = rockchip_pmx_disable, .gpio_set_direction = rockchip_pmx_gpio_set_direction, }; diff --git a/drivers/pinctrl/pinctrl-samsung.c b/drivers/pinctrl/pinctrl-samsung.c index 3e61d0f8f146..089abde35d44 100644 --- a/drivers/pinctrl/pinctrl-samsung.c +++ b/drivers/pinctrl/pinctrl-samsung.c @@ -333,13 +333,6 @@ static int samsung_pinmux_enable(struct pinctrl_dev *pctldev, unsigned selector, return 0; } -/* disable a specified pinmux by writing to registers */ -static void samsung_pinmux_disable(struct pinctrl_dev *pctldev, - unsigned selector, unsigned group) -{ - samsung_pinmux_setup(pctldev, selector, group, false); -} - /* * The calls to gpio_direction_output() and gpio_direction_input() * leads to this function call (via the pinctrl_gpio_direction_{input|output}() @@ -390,7 +383,6 @@ static const struct pinmux_ops samsung_pinmux_ops = { .get_function_name = samsung_pinmux_get_fname, .get_function_groups = samsung_pinmux_get_groups, .enable = samsung_pinmux_enable, - .disable = samsung_pinmux_disable, .gpio_set_direction = samsung_pinmux_gpio_set_direction, }; diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c index 2960557bfed9..ff6a2bda52e5 100644 --- a/drivers/pinctrl/pinctrl-single.c +++ b/drivers/pinctrl/pinctrl-single.c @@ -488,61 +488,6 @@ static int pcs_enable(struct pinctrl_dev *pctldev, unsigned fselector, return 0; } -static void pcs_disable(struct pinctrl_dev *pctldev, unsigned fselector, - unsigned group) -{ - struct pcs_device *pcs; - struct pcs_function *func; - int i; - - pcs = pinctrl_dev_get_drvdata(pctldev); - /* If function mask is null, needn't disable it. */ - if (!pcs->fmask) - return; - - func = radix_tree_lookup(&pcs->ftree, fselector); - if (!func) { - dev_err(pcs->dev, "%s could not find function%i\n", - __func__, fselector); - return; - } - - /* - * Ignore disable if function-off is not specified. Some hardware - * does not have clearly defined disable function. For pin specific - * off modes, you can use alternate named states as described in - * pinctrl-bindings.txt. - */ - if (pcs->foff == PCS_OFF_DISABLED) { - dev_dbg(pcs->dev, "ignoring disable for %s function%i\n", - func->name, fselector); - return; - } - - dev_dbg(pcs->dev, "disabling function%i %s\n", - fselector, func->name); - - for (i = 0; i < func->nvals; i++) { - struct pcs_func_vals *vals; - unsigned long flags; - unsigned val, mask; - - vals = &func->vals[i]; - raw_spin_lock_irqsave(&pcs->lock, flags); - val = pcs->read(vals->reg); - - if (pcs->bits_per_mux) - mask = vals->mask; - else - mask = pcs->fmask; - - val &= ~mask; - val |= pcs->foff << pcs->fshift; - pcs->write(val, vals->reg); - raw_spin_unlock_irqrestore(&pcs->lock, flags); - } -} - static int pcs_request_gpio(struct pinctrl_dev *pctldev, struct pinctrl_gpio_range *range, unsigned pin) { @@ -575,7 +520,6 @@ static const struct pinmux_ops pcs_pinmux_ops = { .get_function_name = pcs_get_function_name, .get_function_groups = pcs_get_function_groups, .enable = pcs_enable, - .disable = pcs_disable, .gpio_request_enable = pcs_request_gpio, }; diff --git a/drivers/pinctrl/pinctrl-st.c b/drivers/pinctrl/pinctrl-st.c index 1bd6363bc95e..e1919cd43117 100644 --- a/drivers/pinctrl/pinctrl-st.c +++ b/drivers/pinctrl/pinctrl-st.c @@ -930,11 +930,6 @@ static int st_pmx_enable(struct pinctrl_dev *pctldev, unsigned fselector, return 0; } -static void st_pmx_disable(struct pinctrl_dev *pctldev, unsigned selector, - unsigned group) -{ -} - static int st_pmx_set_gpio_direction(struct pinctrl_dev *pctldev, struct pinctrl_gpio_range *range, unsigned gpio, bool input) @@ -957,7 +952,6 @@ static struct pinmux_ops st_pmxops = { .get_function_name = st_pmx_get_fname, .get_function_groups = st_pmx_get_groups, .enable = st_pmx_enable, - .disable = st_pmx_disable, .gpio_set_direction = st_pmx_set_gpio_direction, }; diff --git a/drivers/pinctrl/pinctrl-tb10x.c b/drivers/pinctrl/pinctrl-tb10x.c index 26ca6855f478..71c5d4f0c538 100644 --- a/drivers/pinctrl/pinctrl-tb10x.c +++ b/drivers/pinctrl/pinctrl-tb10x.c @@ -738,22 +738,6 @@ static int tb10x_pctl_enable(struct pinctrl_dev *pctl, return 0; } -static void tb10x_pctl_disable(struct pinctrl_dev *pctl, - unsigned func_selector, unsigned group_selector) -{ - struct tb10x_pinctrl *state = pinctrl_dev_get_drvdata(pctl); - const struct tb10x_pinfuncgrp *grp = &state->pingroups[group_selector]; - - if (grp->port < 0) - return; - - mutex_lock(&state->mutex); - - state->ports[grp->port].count--; - - mutex_unlock(&state->mutex); -} - static struct pinmux_ops tb10x_pinmux_ops = { .get_functions_count = tb10x_get_functions_count, .get_function_name = tb10x_get_function_name, @@ -761,7 +745,6 @@ static struct pinmux_ops tb10x_pinmux_ops = { .gpio_request_enable = tb10x_gpio_request_enable, .gpio_disable_free = tb10x_gpio_disable_free, .enable = tb10x_pctl_enable, - .disable = tb10x_pctl_disable, }; static struct pinctrl_desc tb10x_pindesc = { diff --git a/drivers/pinctrl/pinctrl-tegra.c b/drivers/pinctrl/pinctrl-tegra.c index 2d43bff74f59..150af5503c09 100644 --- a/drivers/pinctrl/pinctrl-tegra.c +++ b/drivers/pinctrl/pinctrl-tegra.c @@ -290,24 +290,11 @@ static int tegra_pinctrl_enable(struct pinctrl_dev *pctldev, unsigned function, return 0; } -static void tegra_pinctrl_disable(struct pinctrl_dev *pctldev, - unsigned function, unsigned group) -{ - struct tegra_pmx *pmx = pinctrl_dev_get_drvdata(pctldev); - const struct tegra_pingroup *g; - - g = &pmx->soc->groups[group]; - - if (WARN_ON(g->mux_reg < 0)) - return; -} - static const struct pinmux_ops tegra_pinmux_ops = { .get_functions_count = tegra_pinctrl_get_funcs_count, .get_function_name = tegra_pinctrl_get_func_name, .get_function_groups = tegra_pinctrl_get_func_groups, .enable = tegra_pinctrl_enable, - .disable = tegra_pinctrl_disable, }; static int tegra_pinconf_reg(struct tegra_pmx *pmx, diff --git a/drivers/pinctrl/pinctrl-tz1090-pdc.c b/drivers/pinctrl/pinctrl-tz1090-pdc.c index 5bf01c28925e..41e81a35cabb 100644 --- a/drivers/pinctrl/pinctrl-tz1090-pdc.c +++ b/drivers/pinctrl/pinctrl-tz1090-pdc.c @@ -574,33 +574,6 @@ static int tz1090_pdc_pinctrl_enable(struct pinctrl_dev *pctldev, return 0; } -static void tz1090_pdc_pinctrl_disable(struct pinctrl_dev *pctldev, - unsigned int function, - unsigned int group) -{ - struct tz1090_pdc_pmx *pmx = pinctrl_dev_get_drvdata(pctldev); - const struct tz1090_pdc_pingroup *grp = &tz1090_pdc_groups[group]; - - dev_dbg(pctldev->dev, "%s(func=%u (%s), group=%u (%s))\n", - __func__, - function, tz1090_pdc_functions[function].name, - group, tz1090_pdc_groups[group].name); - - /* is it even a mux? */ - if (grp->drv) - return; - - /* does this group even control the function? */ - if (function != grp->func) - return; - - /* record the pin being unmuxed and update mux bit */ - spin_lock(&pmx->lock); - pmx->mux_en &= ~BIT(grp->pins[0]); - tz1090_pdc_pinctrl_mux(pmx, grp); - spin_unlock(&pmx->lock); -} - static const struct tz1090_pdc_pingroup *find_mux_group( struct tz1090_pdc_pmx *pmx, unsigned int pin) @@ -662,7 +635,6 @@ static struct pinmux_ops tz1090_pdc_pinmux_ops = { .get_function_name = tz1090_pdc_pinctrl_get_func_name, .get_function_groups = tz1090_pdc_pinctrl_get_func_groups, .enable = tz1090_pdc_pinctrl_enable, - .disable = tz1090_pdc_pinctrl_disable, .gpio_request_enable = tz1090_pdc_pinctrl_gpio_request_enable, .gpio_disable_free = tz1090_pdc_pinctrl_gpio_disable_free, }; diff --git a/drivers/pinctrl/pinctrl-tz1090.c b/drivers/pinctrl/pinctrl-tz1090.c index bc9cd7a7602e..24082216842e 100644 --- a/drivers/pinctrl/pinctrl-tz1090.c +++ b/drivers/pinctrl/pinctrl-tz1090.c @@ -1478,63 +1478,6 @@ mux_pins: return 0; } -/** - * tz1090_pinctrl_disable() - Disable a function on a pin group. - * @pctldev: Pin control data - * @function: Function index to disable - * @group: Group index to disable - * - * Disable a particular function on a group of pins. The per GPIO pin pseudo pin - * groups can be used (in which case the pin will be taken out of peripheral - * mode. Some convenience pin groups can also be used in which case the effect - * is the same as enabling the function on each individual pin in the group. - */ -static void tz1090_pinctrl_disable(struct pinctrl_dev *pctldev, - unsigned int function, unsigned int group) -{ - struct tz1090_pmx *pmx = pinctrl_dev_get_drvdata(pctldev); - struct tz1090_pingroup *grp; - unsigned int pin_num, mux_group, i, npins; - const unsigned int *pins; - - /* group of pins? */ - if (group < ARRAY_SIZE(tz1090_groups)) { - grp = &tz1090_groups[group]; - npins = grp->npins; - pins = grp->pins; - /* - * All pins in the group must belong to the same mux group, - * which allows us to just use the mux group of the first pin. - * By explicitly listing permitted pingroups for each function - * the pinmux core should ensure this is always the case. - */ - } else { - pin_num = group - ARRAY_SIZE(tz1090_groups); - npins = 1; - pins = &pin_num; - } - mux_group = tz1090_mux_pins[*pins]; - - /* no mux group, but can still be individually muxed to peripheral */ - if (mux_group >= TZ1090_MUX_GROUP_MAX) { - if (function == TZ1090_MUX_PERIP) - goto unmux_pins; - return; - } - - /* mux group already set to a different function? */ - grp = &tz1090_mux_groups[mux_group]; - dev_dbg(pctldev->dev, "%s: unmuxing %u pin(s) in '%s' from '%s'\n", - __func__, npins, grp->name, tz1090_functions[function].name); - - /* subtract pins from ref count and unmux individually */ - WARN_ON(grp->func_count < npins); - grp->func_count -= npins; -unmux_pins: - for (i = 0; i < npins; ++i) - tz1090_pinctrl_perip_select(pmx, pins[i], false); -} - /** * tz1090_pinctrl_gpio_request_enable() - Put pin in GPIO mode. * @pctldev: Pin control data @@ -1575,7 +1518,6 @@ static struct pinmux_ops tz1090_pinmux_ops = { .get_function_name = tz1090_pinctrl_get_func_name, .get_function_groups = tz1090_pinctrl_get_func_groups, .enable = tz1090_pinctrl_enable, - .disable = tz1090_pinctrl_disable, .gpio_request_enable = tz1090_pinctrl_gpio_request_enable, .gpio_disable_free = tz1090_pinctrl_gpio_disable_free, }; diff --git a/drivers/pinctrl/pinctrl-u300.c b/drivers/pinctrl/pinctrl-u300.c index 209a01b8bd3b..0959bb36450f 100644 --- a/drivers/pinctrl/pinctrl-u300.c +++ b/drivers/pinctrl/pinctrl-u300.c @@ -970,19 +970,6 @@ static int u300_pmx_enable(struct pinctrl_dev *pctldev, unsigned selector, return 0; } -static void u300_pmx_disable(struct pinctrl_dev *pctldev, unsigned selector, - unsigned group) -{ - struct u300_pmx *upmx; - - /* There is nothing to do with the power pins */ - if (selector == 0) - return; - - upmx = pinctrl_dev_get_drvdata(pctldev); - u300_pmx_endisable(upmx, selector, false); -} - static int u300_pmx_get_funcs_count(struct pinctrl_dev *pctldev) { return ARRAY_SIZE(u300_pmx_functions); @@ -1008,7 +995,6 @@ static const struct pinmux_ops u300_pmx_ops = { .get_function_name = u300_pmx_get_func_name, .get_function_groups = u300_pmx_get_groups, .enable = u300_pmx_enable, - .disable = u300_pmx_disable, }; static int u300_pin_config_get(struct pinctrl_dev *pctldev, unsigned pin, diff --git a/drivers/pinctrl/pinmux.c b/drivers/pinctrl/pinmux.c index 051e8592990e..c055daf9a80f 100644 --- a/drivers/pinctrl/pinmux.c +++ b/drivers/pinctrl/pinmux.c @@ -471,7 +471,6 @@ void pinmux_disable_setting(struct pinctrl_setting const *setting) { struct pinctrl_dev *pctldev = setting->pctldev; const struct pinctrl_ops *pctlops = pctldev->desc->pctlops; - const struct pinmux_ops *ops = pctldev->desc->pmxops; int ret = 0; const unsigned *pins = NULL; unsigned num_pins = 0; @@ -518,9 +517,6 @@ void pinmux_disable_setting(struct pinctrl_setting const *setting) pins[i], desc->name, gname); } } - - if (ops->disable) - ops->disable(pctldev, setting->data.mux.func, setting->data.mux.group); } #ifdef CONFIG_DEBUG_FS diff --git a/drivers/pinctrl/sh-pfc/pinctrl.c b/drivers/pinctrl/sh-pfc/pinctrl.c index e758af95c209..11db3ee39d40 100644 --- a/drivers/pinctrl/sh-pfc/pinctrl.c +++ b/drivers/pinctrl/sh-pfc/pinctrl.c @@ -345,27 +345,6 @@ done: return ret; } -static void sh_pfc_func_disable(struct pinctrl_dev *pctldev, unsigned selector, - unsigned group) -{ - struct sh_pfc_pinctrl *pmx = pinctrl_dev_get_drvdata(pctldev); - struct sh_pfc *pfc = pmx->pfc; - const struct sh_pfc_pin_group *grp = &pfc->info->groups[group]; - unsigned long flags; - unsigned int i; - - spin_lock_irqsave(&pfc->lock, flags); - - for (i = 0; i < grp->nr_pins; ++i) { - int idx = sh_pfc_get_pin_index(pfc, grp->pins[i]); - struct sh_pfc_pin_config *cfg = &pmx->configs[idx]; - - cfg->type = PINMUX_TYPE_NONE; - } - - spin_unlock_irqrestore(&pfc->lock, flags); -} - static int sh_pfc_gpio_request_enable(struct pinctrl_dev *pctldev, struct pinctrl_gpio_range *range, unsigned offset) @@ -464,7 +443,6 @@ static const struct pinmux_ops sh_pfc_pinmux_ops = { .get_function_name = sh_pfc_get_function_name, .get_function_groups = sh_pfc_get_function_groups, .enable = sh_pfc_func_enable, - .disable = sh_pfc_func_disable, .gpio_request_enable = sh_pfc_gpio_request_enable, .gpio_disable_free = sh_pfc_gpio_disable_free, .gpio_set_direction = sh_pfc_gpio_set_direction, diff --git a/drivers/pinctrl/sirf/pinctrl-sirf.c b/drivers/pinctrl/sirf/pinctrl-sirf.c index 014f5b1fee55..4c1d7c68666d 100644 --- a/drivers/pinctrl/sirf/pinctrl-sirf.c +++ b/drivers/pinctrl/sirf/pinctrl-sirf.c @@ -186,15 +186,6 @@ static int sirfsoc_pinmux_enable(struct pinctrl_dev *pmxdev, unsigned selector, return 0; } -static void sirfsoc_pinmux_disable(struct pinctrl_dev *pmxdev, unsigned selector, - unsigned group) -{ - struct sirfsoc_pmx *spmx; - - spmx = pinctrl_dev_get_drvdata(pmxdev); - sirfsoc_pinmux_endisable(spmx, selector, false); -} - static int sirfsoc_pinmux_get_funcs_count(struct pinctrl_dev *pmxdev) { return sirfsoc_pmxfunc_cnt; @@ -240,7 +231,6 @@ static int sirfsoc_pinmux_request_gpio(struct pinctrl_dev *pmxdev, static struct pinmux_ops sirfsoc_pinmux_ops = { .enable = sirfsoc_pinmux_enable, - .disable = sirfsoc_pinmux_disable, .get_functions_count = sirfsoc_pinmux_get_funcs_count, .get_function_name = sirfsoc_pinmux_get_func_name, .get_function_groups = sirfsoc_pinmux_get_groups, diff --git a/drivers/pinctrl/spear/pinctrl-spear.c b/drivers/pinctrl/spear/pinctrl-spear.c index 58bf6867aa17..f72cc4e192bd 100644 --- a/drivers/pinctrl/spear/pinctrl-spear.c +++ b/drivers/pinctrl/spear/pinctrl-spear.c @@ -274,12 +274,6 @@ static int spear_pinctrl_enable(struct pinctrl_dev *pctldev, unsigned function, return spear_pinctrl_endisable(pctldev, function, group, true); } -static void spear_pinctrl_disable(struct pinctrl_dev *pctldev, - unsigned function, unsigned group) -{ - spear_pinctrl_endisable(pctldev, function, group, false); -} - /* gpio with pinmux */ static struct spear_gpio_pingroup *get_gpio_pingroup(struct spear_pmx *pmx, unsigned pin) @@ -345,7 +339,6 @@ static const struct pinmux_ops spear_pinmux_ops = { .get_function_name = spear_pinctrl_get_func_name, .get_function_groups = spear_pinctrl_get_func_groups, .enable = spear_pinctrl_enable, - .disable = spear_pinctrl_disable, .gpio_request_enable = gpio_request_enable, .gpio_disable_free = gpio_disable_free, }; diff --git a/drivers/pinctrl/vt8500/pinctrl-wmt.c b/drivers/pinctrl/vt8500/pinctrl-wmt.c index 2c61281bebd7..8c976c21eeee 100644 --- a/drivers/pinctrl/vt8500/pinctrl-wmt.c +++ b/drivers/pinctrl/vt8500/pinctrl-wmt.c @@ -141,17 +141,6 @@ static int wmt_pmx_enable(struct pinctrl_dev *pctldev, return wmt_set_pinmux(data, func_selector, pinnum); } -static void wmt_pmx_disable(struct pinctrl_dev *pctldev, - unsigned func_selector, - unsigned group_selector) -{ - struct wmt_pinctrl_data *data = pinctrl_dev_get_drvdata(pctldev); - u32 pinnum = data->pins[group_selector].number; - - /* disable by setting GPIO_IN */ - wmt_set_pinmux(data, WMT_FSEL_GPIO_IN, pinnum); -} - static void wmt_pmx_gpio_disable_free(struct pinctrl_dev *pctldev, struct pinctrl_gpio_range *range, unsigned offset) @@ -180,7 +169,6 @@ static struct pinmux_ops wmt_pinmux_ops = { .get_function_name = wmt_pmx_get_function_name, .get_function_groups = wmt_pmx_get_function_groups, .enable = wmt_pmx_enable, - .disable = wmt_pmx_disable, .gpio_disable_free = wmt_pmx_gpio_disable_free, .gpio_set_direction = wmt_pmx_gpio_set_direction, }; diff --git a/include/linux/pinctrl/pinmux.h b/include/linux/pinctrl/pinmux.h index c15395031cb3..3097aafbeb24 100644 --- a/include/linux/pinctrl/pinmux.h +++ b/include/linux/pinctrl/pinmux.h @@ -70,8 +70,6 @@ struct pinmux_ops { unsigned * const num_groups); int (*enable) (struct pinctrl_dev *pctldev, unsigned func_selector, unsigned group_selector); - void (*disable) (struct pinctrl_dev *pctldev, unsigned func_selector, - unsigned group_selector); int (*gpio_request_enable) (struct pinctrl_dev *pctldev, struct pinctrl_gpio_range *range, unsigned offset); -- cgit v1.2.3 From c14deddec1fbd8c9757c53a49dbfd2dc83265f21 Mon Sep 17 00:00:00 2001 From: "grmoore@altera.com" Date: Tue, 29 Apr 2014 10:29:51 -0500 Subject: mtd: spi-nor: add support for flag status register on Micron chips Some new Micron flash chips require reading the flag status register to determine when operations have completed. Furthermore, chips with multi-die stacks of the 65nm 256Mb QSPI also require reading the status register before reading the flag status register. This patch adds support for the flag status register in the n25q512ax3 and n25q00 Micron QSPI flash chips. Signed-off-by: Graham Moore Signed-off-by: Brian Norris --- drivers/mtd/spi-nor/spi-nor.c | 52 +++++++++++++++++++++++++++++++++++++++++++ include/linux/mtd/spi-nor.h | 4 ++++ 2 files changed, 56 insertions(+) (limited to 'include/linux') diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c index c713c8656710..7da3a7067c35 100644 --- a/drivers/mtd/spi-nor/spi-nor.c +++ b/drivers/mtd/spi-nor/spi-nor.c @@ -47,6 +47,25 @@ static int read_sr(struct spi_nor *nor) return val; } +/* + * Read the flag status register, returning its value in the location + * Return the status register value. + * Returns negative if error occurred. + */ +static int read_fsr(struct spi_nor *nor) +{ + int ret; + u8 val; + + ret = nor->read_reg(nor, SPINOR_OP_RDFSR, &val, 1); + if (ret < 0) { + pr_err("error %d reading FSR\n", ret); + return ret; + } + + return val; +} + /* * Read configuration register, returning its value in the * location. Return the configuration register value. @@ -165,6 +184,32 @@ static int spi_nor_wait_till_ready(struct spi_nor *nor) return -ETIMEDOUT; } +static int spi_nor_wait_till_fsr_ready(struct spi_nor *nor) +{ + unsigned long deadline; + int sr; + int fsr; + + deadline = jiffies + MAX_READY_WAIT_JIFFIES; + + do { + cond_resched(); + + sr = read_sr(nor); + if (sr < 0) { + break; + } else if (!(sr & SR_WIP)) { + fsr = read_fsr(nor); + if (fsr < 0) + break; + if (fsr & FSR_READY) + return 0; + } + } while (!time_after_eq(jiffies, deadline)); + + return -ETIMEDOUT; +} + /* * Service routine to read status register until ready, or timeout occurs. * Returns non-zero if error. @@ -402,6 +447,7 @@ struct flash_info { #define SECT_4K_PMC 0x10 /* SPINOR_OP_BE_4K_PMC works uniformly */ #define SPI_NOR_DUAL_READ 0x20 /* Flash supports Dual Read */ #define SPI_NOR_QUAD_READ 0x40 /* Flash supports Quad Read */ +#define USE_FSR 0x80 /* use flag status register */ }; #define INFO(_jedec_id, _ext_id, _sector_size, _n_sectors, _flags) \ @@ -488,6 +534,8 @@ const struct spi_device_id spi_nor_ids[] = { { "n25q128a13", INFO(0x20ba18, 0, 64 * 1024, 256, 0) }, { "n25q256a", INFO(0x20ba19, 0, 64 * 1024, 512, SECT_4K) }, { "n25q512a", INFO(0x20bb20, 0, 64 * 1024, 1024, SECT_4K) }, + { "n25q512ax3", INFO(0x20ba20, 0, 64 * 1024, 1024, USE_FSR) }, + { "n25q00", INFO(0x20ba21, 0, 64 * 1024, 2048, USE_FSR) }, /* PMC */ { "pm25lv512", INFO(0, 0, 32 * 1024, 2, SECT_4K_PMC) }, @@ -965,6 +1013,10 @@ int spi_nor_scan(struct spi_nor *nor, const struct spi_device_id *id, else mtd->_write = spi_nor_write; + if ((info->flags & USE_FSR) && + nor->wait_till_ready == spi_nor_wait_till_ready) + nor->wait_till_ready = spi_nor_wait_till_fsr_ready; + /* prefer "small sector" erase if possible */ if (info->flags & SECT_4K) { nor->erase_opcode = SPINOR_OP_BE_4K; diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index 53241842a7ab..9e6294f32ba8 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -34,6 +34,7 @@ #define SPINOR_OP_SE 0xd8 /* Sector erase (usually 64KiB) */ #define SPINOR_OP_RDID 0x9f /* Read JEDEC ID */ #define SPINOR_OP_RDCR 0x35 /* Read configuration register */ +#define SPINOR_OP_RDFSR 0x70 /* Read flag status register */ /* 4-byte address opcodes - used on Spansion and some Macronix flashes. */ #define SPINOR_OP_READ4 0x13 /* Read data bytes (low frequency) */ @@ -66,6 +67,9 @@ #define SR_QUAD_EN_MX 0x40 /* Macronix Quad I/O */ +/* Flag Status Register bits */ +#define FSR_READY 0x80 + /* Configuration Register bits. */ #define CR_QUAD_EN_SPAN 0x2 /* Spansion Quad I/O */ -- cgit v1.2.3 From 2004c726b9d9a9670b7f837190be9c8dfa7a0e9d Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 21 Jun 2014 20:52:15 -0400 Subject: auth_gss: fetch the acceptor name out of the downcall If rpc.gssd sends us an acceptor name string trailing the context token, stash it as part of the context. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- include/linux/sunrpc/auth_gss.h | 1 + net/sunrpc/auth_gss/auth_gss.c | 20 +++++++++++++++++--- 2 files changed, 18 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/auth_gss.h b/include/linux/sunrpc/auth_gss.h index f1cfd4c85cd0..cbc6875fb9cf 100644 --- a/include/linux/sunrpc/auth_gss.h +++ b/include/linux/sunrpc/auth_gss.h @@ -71,6 +71,7 @@ struct gss_cl_ctx { spinlock_t gc_seq_lock; struct gss_ctx __rcu *gc_gss_ctx; struct xdr_netobj gc_wire_ctx; + struct xdr_netobj gc_acceptor; u32 gc_win; unsigned long gc_expiry; struct rcu_head gc_rcu; diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index b6e440baccc3..e34af68603bd 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -262,9 +262,22 @@ gss_fill_context(const void *p, const void *end, struct gss_cl_ctx *ctx, struct p = ERR_PTR(ret); goto err; } - dprintk("RPC: %s Success. gc_expiry %lu now %lu timeout %u\n", - __func__, ctx->gc_expiry, now, timeout); - return q; + + /* is there any trailing data? */ + if (q == end) { + p = q; + goto done; + } + + /* pull in acceptor name (if there is one) */ + p = simple_get_netobj(q, end, &ctx->gc_acceptor); + if (IS_ERR(p)) + goto err; +done: + dprintk("RPC: %s Success. gc_expiry %lu now %lu timeout %u acceptor %.*s\n", + __func__, ctx->gc_expiry, now, timeout, ctx->gc_acceptor.len, + ctx->gc_acceptor.data); + return p; err: dprintk("RPC: %s returns error %ld\n", __func__, -PTR_ERR(p)); return p; @@ -1225,6 +1238,7 @@ gss_do_free_ctx(struct gss_cl_ctx *ctx) gss_delete_sec_context(&ctx->gc_gss_ctx); kfree(ctx->gc_wire_ctx.data); + kfree(ctx->gc_acceptor.data); kfree(ctx); } -- cgit v1.2.3 From a0337d1ddb5a4bd609e3ff0955551cb240340340 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 21 Jun 2014 20:52:16 -0400 Subject: sunrpc: add a new "stringify_acceptor" rpc_credop ...and add an new rpc_auth function to call it when it exists. This is only applicable for AUTH_GSS mechanisms, so we only specify this for those sorts of credentials. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- include/linux/sunrpc/auth.h | 2 ++ net/sunrpc/auth.c | 9 ++++++ net/sunrpc/auth_gss/auth_gss.c | 62 ++++++++++++++++++++++++++++-------------- 3 files changed, 53 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 790be1472792..c683b9a06913 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -140,6 +140,7 @@ struct rpc_credops { void *, __be32 *, void *); int (*crkey_timeout)(struct rpc_cred *); bool (*crkey_to_expire)(struct rpc_cred *); + char * (*crstringify_acceptor)(struct rpc_cred *); }; extern const struct rpc_authops authunix_ops; @@ -182,6 +183,7 @@ void rpcauth_clear_credcache(struct rpc_cred_cache *); int rpcauth_key_timeout_notify(struct rpc_auth *, struct rpc_cred *); bool rpcauth_cred_key_to_expire(struct rpc_cred *); +char * rpcauth_stringify_acceptor(struct rpc_cred *); static inline struct rpc_cred * get_rpccred(struct rpc_cred *cred) diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index f77366717420..1481efff6aa2 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -363,6 +363,15 @@ rpcauth_cred_key_to_expire(struct rpc_cred *cred) } EXPORT_SYMBOL_GPL(rpcauth_cred_key_to_expire); +char * +rpcauth_stringify_acceptor(struct rpc_cred *cred) +{ + if (!cred->cr_ops->crstringify_acceptor) + return NULL; + return cred->cr_ops->crstringify_acceptor(cred); +} +EXPORT_SYMBOL_GPL(rpcauth_stringify_acceptor); + /* * Destroy a list of credentials */ diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index e34af68603bd..73854314fb85 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -1346,6 +1346,26 @@ gss_cred_init(struct rpc_auth *auth, struct rpc_cred *cred) return err; } +static char * +gss_stringify_acceptor(struct rpc_cred *cred) +{ + char *string; + struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); + struct xdr_netobj *acceptor = &gss_cred->gc_ctx->gc_acceptor; + + /* no point if there's no string */ + if (!acceptor->len) + return NULL; + + string = kmalloc(acceptor->len + 1, GFP_KERNEL); + if (!string) + return string; + + memcpy(string, acceptor->data, acceptor->len); + string[acceptor->len] = '\0'; + return string; +} + /* * Returns -EACCES if GSS context is NULL or will expire within the * timeout (miliseconds) @@ -1923,29 +1943,31 @@ static const struct rpc_authops authgss_ops = { }; static const struct rpc_credops gss_credops = { - .cr_name = "AUTH_GSS", - .crdestroy = gss_destroy_cred, - .cr_init = gss_cred_init, - .crbind = rpcauth_generic_bind_cred, - .crmatch = gss_match, - .crmarshal = gss_marshal, - .crrefresh = gss_refresh, - .crvalidate = gss_validate, - .crwrap_req = gss_wrap_req, - .crunwrap_resp = gss_unwrap_resp, - .crkey_timeout = gss_key_timeout, + .cr_name = "AUTH_GSS", + .crdestroy = gss_destroy_cred, + .cr_init = gss_cred_init, + .crbind = rpcauth_generic_bind_cred, + .crmatch = gss_match, + .crmarshal = gss_marshal, + .crrefresh = gss_refresh, + .crvalidate = gss_validate, + .crwrap_req = gss_wrap_req, + .crunwrap_resp = gss_unwrap_resp, + .crkey_timeout = gss_key_timeout, + .crstringify_acceptor = gss_stringify_acceptor, }; static const struct rpc_credops gss_nullops = { - .cr_name = "AUTH_GSS", - .crdestroy = gss_destroy_nullcred, - .crbind = rpcauth_generic_bind_cred, - .crmatch = gss_match, - .crmarshal = gss_marshal, - .crrefresh = gss_refresh_null, - .crvalidate = gss_validate, - .crwrap_req = gss_wrap_req, - .crunwrap_resp = gss_unwrap_resp, + .cr_name = "AUTH_GSS", + .crdestroy = gss_destroy_nullcred, + .crbind = rpcauth_generic_bind_cred, + .crmatch = gss_match, + .crmarshal = gss_marshal, + .crrefresh = gss_refresh_null, + .crvalidate = gss_validate, + .crwrap_req = gss_wrap_req, + .crunwrap_resp = gss_unwrap_resp, + .crstringify_acceptor = gss_stringify_acceptor, }; static const struct rpc_pipe_ops gss_upcall_ops_v0 = { -- cgit v1.2.3 From f11b2a1cfbf5dd783eb55cb470509d06e20d1c78 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 21 Jun 2014 20:52:17 -0400 Subject: nfs4: copy acceptor name from context to nfs_client The current CB_COMPOUND handling code tries to compare the principal name of the request with the cl_hostname in the client. This is not guaranteed to ever work, particularly if the client happened to mount a CNAME of the server or a non-fqdn. Fix this by instead comparing the cr_principal string with the acceptor name that we get from gssd. In the event that gssd didn't send one down (i.e. it was too old), then we fall back to trying to use the cl_hostname as we do today. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/callback.c | 12 ++++++++++++ fs/nfs/client.c | 1 + fs/nfs/nfs4proc.c | 33 ++++++++++++++++++++++++++++++++- include/linux/nfs_fs_sb.h | 1 + include/linux/nfs_xdr.h | 1 + 5 files changed, 47 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 073b4cf67ed9..54de482143cc 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -428,6 +428,18 @@ check_gss_callback_principal(struct nfs_client *clp, struct svc_rqst *rqstp) if (p == NULL) return 0; + /* + * Did we get the acceptor from userland during the SETCLIENID + * negotiation? + */ + if (clp->cl_acceptor) + return !strcmp(p, clp->cl_acceptor); + + /* + * Otherwise try to verify it using the cl_hostname. Note that this + * doesn't work if a non-canonical hostname was used in the devname. + */ + /* Expect a GSS_C_NT_HOSTBASED_NAME like "nfs@serverhostname" */ if (memcmp(p, "nfs@", 4) != 0) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index b213ee8fb012..168aa0df2658 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -252,6 +252,7 @@ void nfs_free_client(struct nfs_client *clp) put_net(clp->cl_net); put_nfs_version(clp->cl_nfs_mod); kfree(clp->cl_hostname); + kfree(clp->cl_acceptor); kfree(clp); dprintk("<-- nfs_free_client()\n"); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 0b8490eab486..b7babb3b8a4d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4936,6 +4936,18 @@ nfs4_init_callback_netid(const struct nfs_client *clp, char *buf, size_t len) return scnprintf(buf, len, "tcp"); } +static void nfs4_setclientid_done(struct rpc_task *task, void *calldata) +{ + struct nfs4_setclientid *sc = calldata; + + if (task->tk_status == 0) + sc->sc_cred = get_rpccred(task->tk_rqstp->rq_cred); +} + +static const struct rpc_call_ops nfs4_setclientid_ops = { + .rpc_call_done = nfs4_setclientid_done, +}; + /** * nfs4_proc_setclientid - Negotiate client ID * @clp: state data structure @@ -4962,6 +4974,14 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, .rpc_resp = res, .rpc_cred = cred, }; + struct rpc_task *task; + struct rpc_task_setup task_setup_data = { + .rpc_client = clp->cl_rpcclient, + .rpc_message = &msg, + .callback_ops = &nfs4_setclientid_ops, + .callback_data = &setclientid, + .flags = RPC_TASK_TIMEOUT, + }; int status; /* nfs_client_id4 */ @@ -4988,7 +5008,18 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, dprintk("NFS call setclientid auth=%s, '%.*s'\n", clp->cl_rpcclient->cl_auth->au_ops->au_name, setclientid.sc_name_len, setclientid.sc_name); - status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); + task = rpc_run_task(&task_setup_data); + if (IS_ERR(task)) { + status = PTR_ERR(task); + goto out; + } + status = task->tk_status; + if (setclientid.sc_cred) { + clp->cl_acceptor = rpcauth_stringify_acceptor(setclientid.sc_cred); + put_rpccred(setclientid.sc_cred); + } + rpc_put_task(task); +out: trace_nfs4_setclientid(clp, status); dprintk("NFS reply setclientid: %d\n", status); return status; diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 1150ea41b626..922be2e050f5 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -45,6 +45,7 @@ struct nfs_client { struct sockaddr_storage cl_addr; /* server identifier */ size_t cl_addrlen; char * cl_hostname; /* hostname of server */ + char * cl_acceptor; /* GSSAPI acceptor name */ struct list_head cl_share_link; /* link in global client list */ struct list_head cl_superblocks; /* List of nfs_server structs */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 81cbbf313272..0040629894df 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -993,6 +993,7 @@ struct nfs4_setclientid { unsigned int sc_uaddr_len; char sc_uaddr[RPCBIND_MAXUADDRLEN + 1]; u32 sc_cb_ident; + struct rpc_cred *sc_cred; }; struct nfs4_setclientid_res { -- cgit v1.2.3 From b9ba6f94b2382ef832f97122976b73004f714714 Mon Sep 17 00:00:00 2001 From: Niu Yawei Date: Wed, 4 Jun 2014 12:23:19 +0800 Subject: quota: remove dqptr_sem Remove dqptr_sem to make quota code scalable: Remove the dqptr_sem, accessing inode->i_dquot now protected by dquot_srcu, and changing inode->i_dquot is now serialized by dq_data_lock. Signed-off-by: Lai Siyao Signed-off-by: Niu Yawei Signed-off-by: Jan Kara --- fs/quota/dquot.c | 114 ++++++++++++++++++++++---------------------------- fs/super.c | 1 - include/linux/quota.h | 1 - 3 files changed, 49 insertions(+), 67 deletions(-) (limited to 'include/linux') diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index fb2d2e2a89e7..f2d0eee9d1f1 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -96,13 +96,16 @@ * Note that some things (eg. sb pointer, type, id) doesn't change during * the life of the dquot structure and so needn't to be protected by a lock * - * Any operation working on dquots via inode pointers must hold dqptr_sem. If - * operation is just reading pointers from inode (or not using them at all) the - * read lock is enough. If pointers are altered function must hold write lock. + * Operation accessing dquots via inode pointers are protected by dquot_srcu. + * Operation of reading pointer needs srcu_read_lock(&dquot_srcu), and + * synchronize_srcu(&dquot_srcu) is called after clearing pointers from + * inode and before dropping dquot references to avoid use of dquots after + * they are freed. dq_data_lock is used to serialize the pointer setting and + * clearing operations. * Special care needs to be taken about S_NOQUOTA inode flag (marking that * inode is a quota file). Functions adding pointers from inode to dquots have - * to check this flag under dqptr_sem and then (if S_NOQUOTA is not set) they - * have to do all pointer modifications before dropping dqptr_sem. This makes + * to check this flag under dq_data_lock and then (if S_NOQUOTA is not set) they + * have to do all pointer modifications before dropping dq_data_lock. This makes * sure they cannot race with quotaon which first sets S_NOQUOTA flag and * then drops all pointers to dquots from an inode. * @@ -116,21 +119,15 @@ * spinlock to internal buffers before writing. * * Lock ordering (including related VFS locks) is the following: - * dqonoff_mutex > i_mutex > journal_lock > dqptr_sem > dquot->dq_lock > - * dqio_mutex + * dqonoff_mutex > i_mutex > journal_lock > dquot->dq_lock > dqio_mutex * dqonoff_mutex > i_mutex comes from dquot_quota_sync, dquot_enable, etc. - * The lock ordering of dqptr_sem imposed by quota code is only dqonoff_sem > - * dqptr_sem. But filesystem has to count with the fact that functions such as - * dquot_alloc_space() acquire dqptr_sem and they usually have to be called - * from inside a transaction to keep filesystem consistency after a crash. Also - * filesystems usually want to do some IO on dquot from ->mark_dirty which is - * called with dqptr_sem held. */ static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_list_lock); static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_state_lock); __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_data_lock); EXPORT_SYMBOL(dq_data_lock); +DEFINE_STATIC_SRCU(dquot_srcu); void __quota_error(struct super_block *sb, const char *func, const char *fmt, ...) @@ -964,7 +961,6 @@ static void add_dquot_ref(struct super_block *sb, int type) /* * Remove references to dquots from inode and add dquot to list for freeing * if we have the last reference to dquot - * We can't race with anybody because we hold dqptr_sem for writing... */ static void remove_inode_dquot_ref(struct inode *inode, int type, struct list_head *tofree_head) @@ -1024,13 +1020,15 @@ static void remove_dquot_ref(struct super_block *sb, int type, * We have to scan also I_NEW inodes because they can already * have quota pointer initialized. Luckily, we need to touch * only quota pointers and these have separate locking - * (dqptr_sem). + * (dq_data_lock). */ + spin_lock(&dq_data_lock); if (!IS_NOQUOTA(inode)) { if (unlikely(inode_get_rsv_space(inode) > 0)) reserved = 1; remove_inode_dquot_ref(inode, type, tofree_head); } + spin_unlock(&dq_data_lock); } spin_unlock(&inode_sb_list_lock); #ifdef CONFIG_QUOTA_DEBUG @@ -1048,9 +1046,8 @@ static void drop_dquot_ref(struct super_block *sb, int type) LIST_HEAD(tofree_head); if (sb->dq_op) { - down_write(&sb_dqopt(sb)->dqptr_sem); remove_dquot_ref(sb, type, &tofree_head); - up_write(&sb_dqopt(sb)->dqptr_sem); + synchronize_srcu(&dquot_srcu); put_dquot_list(&tofree_head); } } @@ -1381,9 +1378,6 @@ static int dquot_active(const struct inode *inode) /* * Initialize quota pointers in inode * - * We do things in a bit complicated way but by that we avoid calling - * dqget() and thus filesystem callbacks under dqptr_sem. - * * It is better to call this function outside of any transaction as it * might need a lot of space in journal for dquot structure allocation. */ @@ -1394,8 +1388,6 @@ static void __dquot_initialize(struct inode *inode, int type) struct super_block *sb = inode->i_sb; qsize_t rsv; - /* First test before acquiring mutex - solves deadlocks when we - * re-enter the quota code and are already holding the mutex */ if (!dquot_active(inode)) return; @@ -1429,7 +1421,7 @@ static void __dquot_initialize(struct inode *inode, int type) if (!init_needed) return; - down_write(&sb_dqopt(sb)->dqptr_sem); + spin_lock(&dq_data_lock); if (IS_NOQUOTA(inode)) goto out_err; for (cnt = 0; cnt < MAXQUOTAS; cnt++) { @@ -1449,15 +1441,12 @@ static void __dquot_initialize(struct inode *inode, int type) * did a write before quota was turned on */ rsv = inode_get_rsv_space(inode); - if (unlikely(rsv)) { - spin_lock(&dq_data_lock); + if (unlikely(rsv)) dquot_resv_space(inode->i_dquot[cnt], rsv); - spin_unlock(&dq_data_lock); - } } } out_err: - up_write(&sb_dqopt(sb)->dqptr_sem); + spin_unlock(&dq_data_lock); /* Drop unused references */ dqput_all(got); } @@ -1469,19 +1458,24 @@ void dquot_initialize(struct inode *inode) EXPORT_SYMBOL(dquot_initialize); /* - * Release all quotas referenced by inode + * Release all quotas referenced by inode. + * + * This function only be called on inode free or converting + * a file to quota file, no other users for the i_dquot in + * both cases, so we needn't call synchronize_srcu() after + * clearing i_dquot. */ static void __dquot_drop(struct inode *inode) { int cnt; struct dquot *put[MAXQUOTAS]; - down_write(&sb_dqopt(inode->i_sb)->dqptr_sem); + spin_lock(&dq_data_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { put[cnt] = inode->i_dquot[cnt]; inode->i_dquot[cnt] = NULL; } - up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); + spin_unlock(&dq_data_lock); dqput_all(put); } @@ -1599,15 +1593,11 @@ static void inode_decr_space(struct inode *inode, qsize_t number, int reserve) */ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags) { - int cnt, ret = 0; + int cnt, ret = 0, index; struct dquot_warn warn[MAXQUOTAS]; struct dquot **dquots = inode->i_dquot; int reserve = flags & DQUOT_SPACE_RESERVE; - /* - * First test before acquiring mutex - solves deadlocks when we - * re-enter the quota code and are already holding the mutex - */ if (!dquot_active(inode)) { inode_incr_space(inode, number, reserve); goto out; @@ -1616,7 +1606,7 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags) for (cnt = 0; cnt < MAXQUOTAS; cnt++) warn[cnt].w_type = QUOTA_NL_NOWARN; - down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + index = srcu_read_lock(&dquot_srcu); spin_lock(&dq_data_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (!dquots[cnt]) @@ -1643,7 +1633,7 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags) goto out_flush_warn; mark_all_dquot_dirty(dquots); out_flush_warn: - up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + srcu_read_unlock(&dquot_srcu, index); flush_warnings(warn); out: return ret; @@ -1655,17 +1645,16 @@ EXPORT_SYMBOL(__dquot_alloc_space); */ int dquot_alloc_inode(const struct inode *inode) { - int cnt, ret = 0; + int cnt, ret = 0, index; struct dquot_warn warn[MAXQUOTAS]; struct dquot * const *dquots = inode->i_dquot; - /* First test before acquiring mutex - solves deadlocks when we - * re-enter the quota code and are already holding the mutex */ if (!dquot_active(inode)) return 0; for (cnt = 0; cnt < MAXQUOTAS; cnt++) warn[cnt].w_type = QUOTA_NL_NOWARN; - down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + + index = srcu_read_lock(&dquot_srcu); spin_lock(&dq_data_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (!dquots[cnt]) @@ -1685,7 +1674,7 @@ warn_put_all: spin_unlock(&dq_data_lock); if (ret == 0) mark_all_dquot_dirty(dquots); - up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + srcu_read_unlock(&dquot_srcu, index); flush_warnings(warn); return ret; } @@ -1696,14 +1685,14 @@ EXPORT_SYMBOL(dquot_alloc_inode); */ int dquot_claim_space_nodirty(struct inode *inode, qsize_t number) { - int cnt; + int cnt, index; if (!dquot_active(inode)) { inode_claim_rsv_space(inode, number); return 0; } - down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + index = srcu_read_lock(&dquot_srcu); spin_lock(&dq_data_lock); /* Claim reserved quotas to allocated quotas */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { @@ -1715,7 +1704,7 @@ int dquot_claim_space_nodirty(struct inode *inode, qsize_t number) inode_claim_rsv_space(inode, number); spin_unlock(&dq_data_lock); mark_all_dquot_dirty(inode->i_dquot); - up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + srcu_read_unlock(&dquot_srcu, index); return 0; } EXPORT_SYMBOL(dquot_claim_space_nodirty); @@ -1725,14 +1714,14 @@ EXPORT_SYMBOL(dquot_claim_space_nodirty); */ void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number) { - int cnt; + int cnt, index; if (!dquot_active(inode)) { inode_reclaim_rsv_space(inode, number); return; } - down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + index = srcu_read_lock(&dquot_srcu); spin_lock(&dq_data_lock); /* Claim reserved quotas to allocated quotas */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { @@ -1744,7 +1733,7 @@ void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number) inode_reclaim_rsv_space(inode, number); spin_unlock(&dq_data_lock); mark_all_dquot_dirty(inode->i_dquot); - up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + srcu_read_unlock(&dquot_srcu, index); return; } EXPORT_SYMBOL(dquot_reclaim_space_nodirty); @@ -1757,16 +1746,14 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags) unsigned int cnt; struct dquot_warn warn[MAXQUOTAS]; struct dquot **dquots = inode->i_dquot; - int reserve = flags & DQUOT_SPACE_RESERVE; + int reserve = flags & DQUOT_SPACE_RESERVE, index; - /* First test before acquiring mutex - solves deadlocks when we - * re-enter the quota code and are already holding the mutex */ if (!dquot_active(inode)) { inode_decr_space(inode, number, reserve); return; } - down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + index = srcu_read_lock(&dquot_srcu); spin_lock(&dq_data_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { int wtype; @@ -1789,7 +1776,7 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags) goto out_unlock; mark_all_dquot_dirty(dquots); out_unlock: - up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + srcu_read_unlock(&dquot_srcu, index); flush_warnings(warn); } EXPORT_SYMBOL(__dquot_free_space); @@ -1802,13 +1789,12 @@ void dquot_free_inode(const struct inode *inode) unsigned int cnt; struct dquot_warn warn[MAXQUOTAS]; struct dquot * const *dquots = inode->i_dquot; + int index; - /* First test before acquiring mutex - solves deadlocks when we - * re-enter the quota code and are already holding the mutex */ if (!dquot_active(inode)) return; - down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + index = srcu_read_lock(&dquot_srcu); spin_lock(&dq_data_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { int wtype; @@ -1823,7 +1809,7 @@ void dquot_free_inode(const struct inode *inode) } spin_unlock(&dq_data_lock); mark_all_dquot_dirty(dquots); - up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + srcu_read_unlock(&dquot_srcu, index); flush_warnings(warn); } EXPORT_SYMBOL(dquot_free_inode); @@ -1837,6 +1823,8 @@ EXPORT_SYMBOL(dquot_free_inode); * This operation can block, but only after everything is updated * A transaction must be started when entering this function. * + * We are holding reference on transfer_from & transfer_to, no need to + * protect them by srcu_read_lock(). */ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) { @@ -1849,8 +1837,6 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) struct dquot_warn warn_from_inodes[MAXQUOTAS]; struct dquot_warn warn_from_space[MAXQUOTAS]; - /* First test before acquiring mutex - solves deadlocks when we - * re-enter the quota code and are already holding the mutex */ if (IS_NOQUOTA(inode)) return 0; /* Initialize the arrays */ @@ -1859,12 +1845,12 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) warn_from_inodes[cnt].w_type = QUOTA_NL_NOWARN; warn_from_space[cnt].w_type = QUOTA_NL_NOWARN; } - down_write(&sb_dqopt(inode->i_sb)->dqptr_sem); + + spin_lock(&dq_data_lock); if (IS_NOQUOTA(inode)) { /* File without quota accounting? */ - up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); + spin_unlock(&dq_data_lock); return 0; } - spin_lock(&dq_data_lock); cur_space = inode_get_bytes(inode); rsv_space = inode_get_rsv_space(inode); space = cur_space + rsv_space; @@ -1918,7 +1904,6 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) inode->i_dquot[cnt] = transfer_to[cnt]; } spin_unlock(&dq_data_lock); - up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); mark_all_dquot_dirty(transfer_from); mark_all_dquot_dirty(transfer_to); @@ -1932,7 +1917,6 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) return 0; over_quota: spin_unlock(&dq_data_lock); - up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); flush_warnings(warn_to); return ret; } diff --git a/fs/super.c b/fs/super.c index d20d5b11dedf..872b26bf06dd 100644 --- a/fs/super.c +++ b/fs/super.c @@ -218,7 +218,6 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags) lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key); mutex_init(&s->s_dquot.dqio_mutex); mutex_init(&s->s_dquot.dqonoff_mutex); - init_rwsem(&s->s_dquot.dqptr_sem); s->s_maxbytes = MAX_NON_LFS; s->s_op = &default_op; s->s_time_gran = 1000000000; diff --git a/include/linux/quota.h b/include/linux/quota.h index 0f3c5d38da1f..80d345a3524c 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -390,7 +390,6 @@ struct quota_info { unsigned int flags; /* Flags for diskquotas on this device */ struct mutex dqio_mutex; /* lock device while I/O in progress */ struct mutex dqonoff_mutex; /* Serialize quotaon & quotaoff */ - struct rw_semaphore dqptr_sem; /* serialize ops using quota_info struct, pointers from inode to dquots */ struct inode *files[MAXQUOTAS]; /* inodes of quotafiles */ struct mem_dqinfo info[MAXQUOTAS]; /* Information for each quota type */ const struct quota_format_ops *ops[MAXQUOTAS]; /* Operations for each type */ -- cgit v1.2.3 From a509ea840b8e29e512764803e30b805c7ea89038 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Wed, 9 Jul 2014 17:45:10 +0200 Subject: ARM: mvebu: extend PMSU code to support dynamic frequency scaling This commit adds the necessary code in the Marvell EBU PMSU driver to support dynamic frequency scaling. In essence, what this new code does is that it: * registers the frequency operating points supported by the CPU; * registers a clock notifier of the CPU clocks. The notifier function listens to the newly introduced APPLY_RATE_CHANGE event, and uses that to finalize the frequency transition by doing the part of the procedure that involves the PMSU; * registers a platform device for the cpufreq-generic driver, which will take care of the CPU frequency transitions. Signed-off-by: Thomas Petazzoni Link: https://lkml.kernel.org/r/1404920715-19834-3-git-send-email-thomas.petazzoni@free-electrons.com Signed-off-by: Jason Cooper --- arch/arm/mach-mvebu/pmsu.c | 162 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/mvebu-pmsu.h | 20 ++++++ 2 files changed, 182 insertions(+) create mode 100644 include/linux/mvebu-pmsu.h (limited to 'include/linux') diff --git a/arch/arm/mach-mvebu/pmsu.c b/arch/arm/mach-mvebu/pmsu.c index 53a55c8520bf..db7d9ab298b6 100644 --- a/arch/arm/mach-mvebu/pmsu.c +++ b/arch/arm/mach-mvebu/pmsu.c @@ -18,20 +18,26 @@ #define pr_fmt(fmt) "mvebu-pmsu: " fmt +#include #include +#include #include #include #include +#include #include #include +#include #include #include +#include #include #include #include #include #include #include "common.h" +#include "armada-370-xp.h" static void __iomem *pmsu_mp_base; @@ -57,6 +63,10 @@ static void __iomem *pmsu_mp_base; #define PMSU_STATUS_AND_MASK_IRQ_MASK BIT(24) #define PMSU_STATUS_AND_MASK_FIQ_MASK BIT(25) +#define PMSU_EVENT_STATUS_AND_MASK(cpu) ((cpu * 0x100) + 0x120) +#define PMSU_EVENT_STATUS_AND_MASK_DFS_DONE BIT(1) +#define PMSU_EVENT_STATUS_AND_MASK_DFS_DONE_MASK BIT(17) + #define PMSU_BOOT_ADDR_REDIRECT_OFFSET(cpu) ((cpu * 0x100) + 0x124) /* PMSU fabric registers */ @@ -296,3 +306,155 @@ int __init armada_370_xp_cpu_pm_init(void) arch_initcall(armada_370_xp_cpu_pm_init); early_initcall(armada_370_xp_pmsu_init); + +static void mvebu_pmsu_dfs_request_local(void *data) +{ + u32 reg; + u32 cpu = smp_processor_id(); + unsigned long flags; + + local_irq_save(flags); + + /* Prepare to enter idle */ + reg = readl(pmsu_mp_base + PMSU_STATUS_AND_MASK(cpu)); + reg |= PMSU_STATUS_AND_MASK_CPU_IDLE_WAIT | + PMSU_STATUS_AND_MASK_IRQ_MASK | + PMSU_STATUS_AND_MASK_FIQ_MASK; + writel(reg, pmsu_mp_base + PMSU_STATUS_AND_MASK(cpu)); + + /* Request the DFS transition */ + reg = readl(pmsu_mp_base + PMSU_CONTROL_AND_CONFIG(cpu)); + reg |= PMSU_CONTROL_AND_CONFIG_DFS_REQ; + writel(reg, pmsu_mp_base + PMSU_CONTROL_AND_CONFIG(cpu)); + + /* The fact of entering idle will trigger the DFS transition */ + wfi(); + + /* + * We're back from idle, the DFS transition has completed, + * clear the idle wait indication. + */ + reg = readl(pmsu_mp_base + PMSU_STATUS_AND_MASK(cpu)); + reg &= ~PMSU_STATUS_AND_MASK_CPU_IDLE_WAIT; + writel(reg, pmsu_mp_base + PMSU_STATUS_AND_MASK(cpu)); + + local_irq_restore(flags); +} + +int mvebu_pmsu_dfs_request(int cpu) +{ + unsigned long timeout; + int hwcpu = cpu_logical_map(cpu); + u32 reg; + + /* Clear any previous DFS DONE event */ + reg = readl(pmsu_mp_base + PMSU_EVENT_STATUS_AND_MASK(hwcpu)); + reg &= ~PMSU_EVENT_STATUS_AND_MASK_DFS_DONE; + writel(reg, pmsu_mp_base + PMSU_EVENT_STATUS_AND_MASK(hwcpu)); + + /* Mask the DFS done interrupt, since we are going to poll */ + reg = readl(pmsu_mp_base + PMSU_EVENT_STATUS_AND_MASK(hwcpu)); + reg |= PMSU_EVENT_STATUS_AND_MASK_DFS_DONE_MASK; + writel(reg, pmsu_mp_base + PMSU_EVENT_STATUS_AND_MASK(hwcpu)); + + /* Trigger the DFS on the appropriate CPU */ + smp_call_function_single(cpu, mvebu_pmsu_dfs_request_local, + NULL, false); + + /* Poll until the DFS done event is generated */ + timeout = jiffies + HZ; + while (time_before(jiffies, timeout)) { + reg = readl(pmsu_mp_base + PMSU_EVENT_STATUS_AND_MASK(hwcpu)); + if (reg & PMSU_EVENT_STATUS_AND_MASK_DFS_DONE) + break; + udelay(10); + } + + if (time_after(jiffies, timeout)) + return -ETIME; + + /* Restore the DFS mask to its original state */ + reg = readl(pmsu_mp_base + PMSU_EVENT_STATUS_AND_MASK(hwcpu)); + reg &= ~PMSU_EVENT_STATUS_AND_MASK_DFS_DONE_MASK; + writel(reg, pmsu_mp_base + PMSU_EVENT_STATUS_AND_MASK(hwcpu)); + + return 0; +} + +static int __init armada_xp_pmsu_cpufreq_init(void) +{ + struct device_node *np; + struct resource res; + int ret, cpu; + + if (!of_machine_is_compatible("marvell,armadaxp")) + return 0; + + /* + * In order to have proper cpufreq handling, we need to ensure + * that the Device Tree description of the CPU clock includes + * the definition of the PMU DFS registers. If not, we do not + * register the clock notifier and the cpufreq driver. This + * piece of code is only for compatibility with old Device + * Trees. + */ + np = of_find_compatible_node(NULL, NULL, "marvell,armada-xp-cpu-clock"); + if (!np) + return 0; + + ret = of_address_to_resource(np, 1, &res); + if (ret) { + pr_warn(FW_WARN "not enabling cpufreq, deprecated armada-xp-cpu-clock binding\n"); + of_node_put(np); + return 0; + } + + of_node_put(np); + + /* + * For each CPU, this loop registers the operating points + * supported (which are the nominal CPU frequency and half of + * it), and registers the clock notifier that will take care + * of doing the PMSU part of a frequency transition. + */ + for_each_possible_cpu(cpu) { + struct device *cpu_dev; + struct clk *clk; + int ret; + + cpu_dev = get_cpu_device(cpu); + if (!cpu_dev) { + pr_err("Cannot get CPU %d\n", cpu); + continue; + } + + clk = clk_get(cpu_dev, 0); + if (!clk) { + pr_err("Cannot get clock for CPU %d\n", cpu); + return -ENODEV; + } + + /* + * In case of a failure of dev_pm_opp_add(), we don't + * bother with cleaning up the registered OPP (there's + * no function to do so), and simply cancel the + * registration of the cpufreq device. + */ + ret = dev_pm_opp_add(cpu_dev, clk_get_rate(clk), 0); + if (ret) { + clk_put(clk); + return ret; + } + + ret = dev_pm_opp_add(cpu_dev, clk_get_rate(clk) / 2, 0); + if (ret) { + clk_put(clk); + return ret; + } + } + + platform_device_register_simple("cpufreq-generic", -1, NULL, 0); + return 0; +} + +device_initcall(armada_xp_pmsu_cpufreq_init); diff --git a/include/linux/mvebu-pmsu.h b/include/linux/mvebu-pmsu.h new file mode 100644 index 000000000000..b918d07efe23 --- /dev/null +++ b/include/linux/mvebu-pmsu.h @@ -0,0 +1,20 @@ +/* + * Copyright (C) 2012 Marvell + * + * Thomas Petazzoni + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + */ + +#ifndef __MVEBU_PMSU_H__ +#define __MVEBU_PMSU_H__ + +#ifdef CONFIG_MACH_MVEBU_V7 +int mvebu_pmsu_dfs_request(int cpu); +#else +static inline int mvebu_pmsu_dfs_request(int cpu) { return -ENODEV; } +#endif + +#endif /* __MVEBU_PMSU_H__ */ -- cgit v1.2.3 From 8fe8bc7773303e3c49be348c3180bc9785104dfc Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Wed, 9 Jul 2014 16:25:23 +0200 Subject: i2c: s6000: remove duplicate driver It turned out that the s6000 simply has a designware IP core and should use the designated driver for it which is way more maintained and feature complete. There are currently no users in tree, and not even a toolchain for s6000 seems to be available. So, simply remove this duplicate. If someone needs assistance in converting to the designware driver, the i2c list will be there to help. Signed-off-by: Wolfram Sang --- drivers/i2c/busses/Kconfig | 10 - drivers/i2c/busses/Makefile | 1 - drivers/i2c/busses/i2c-s6000.c | 404 ----------------------------------------- drivers/i2c/busses/i2c-s6000.h | 79 -------- include/linux/i2c/s6000.h | 10 - 5 files changed, 504 deletions(-) delete mode 100644 drivers/i2c/busses/i2c-s6000.c delete mode 100644 drivers/i2c/busses/i2c-s6000.h delete mode 100644 include/linux/i2c/s6000.h (limited to 'include/linux') diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index 9f7d5859cf65..d25dd120c011 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -700,16 +700,6 @@ config I2C_S3C2410 Say Y here to include support for I2C controller in the Samsung SoCs. -config I2C_S6000 - tristate "S6000 I2C support" - depends on XTENSA_VARIANT_S6000 - help - This driver supports the on chip I2C device on the - S6000 xtensa processor family. - - To compile this driver as a module, choose M here. The module - will be called i2c-s6000. - config I2C_SH7760 tristate "Renesas SH7760 I2C Controller" depends on CPU_SUBTYPE_SH7760 diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile index dd9a7f8e873f..1958b490105e 100644 --- a/drivers/i2c/busses/Makefile +++ b/drivers/i2c/busses/Makefile @@ -68,7 +68,6 @@ obj-$(CONFIG_I2C_QUP) += i2c-qup.o obj-$(CONFIG_I2C_RIIC) += i2c-riic.o obj-$(CONFIG_I2C_RK3X) += i2c-rk3x.o obj-$(CONFIG_I2C_S3C2410) += i2c-s3c2410.o -obj-$(CONFIG_I2C_S6000) += i2c-s6000.o obj-$(CONFIG_I2C_SH7760) += i2c-sh7760.o obj-$(CONFIG_I2C_SH_MOBILE) += i2c-sh_mobile.o obj-$(CONFIG_I2C_SIMTEC) += i2c-simtec.o diff --git a/drivers/i2c/busses/i2c-s6000.c b/drivers/i2c/busses/i2c-s6000.c deleted file mode 100644 index dd186a037684..000000000000 --- a/drivers/i2c/busses/i2c-s6000.c +++ /dev/null @@ -1,404 +0,0 @@ -/* - * drivers/i2c/busses/i2c-s6000.c - * - * Description: Driver for S6000 Family I2C Interface - * Copyright (c) 2008 emlix GmbH - * Author: Oskar Schirmer - * - * Partially based on i2c-bfin-twi.c driver by - * Copyright (c) 2005-2007 Analog Devices, Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "i2c-s6000.h" - -#define DRV_NAME "i2c-s6000" - -#define POLL_TIMEOUT (2 * HZ) - -struct s6i2c_if { - u8 __iomem *reg; /* memory mapped registers */ - int irq; - spinlock_t lock; - struct i2c_msg *msgs; /* messages currently handled */ - int msgs_num; /* nb of msgs to do */ - int msgs_push; /* nb of msgs read/written */ - int msgs_done; /* nb of msgs finally handled */ - unsigned push; /* nb of bytes read/written in msg */ - unsigned done; /* nb of bytes finally handled */ - int timeout_count; /* timeout retries left */ - struct timer_list timeout_timer; - struct i2c_adapter adap; - struct completion complete; - struct clk *clk; - struct resource *res; -}; - -static inline u16 i2c_rd16(struct s6i2c_if *iface, unsigned n) -{ - return readw(iface->reg + (n)); -} - -static inline void i2c_wr16(struct s6i2c_if *iface, unsigned n, u16 v) -{ - writew(v, iface->reg + (n)); -} - -static inline u32 i2c_rd32(struct s6i2c_if *iface, unsigned n) -{ - return readl(iface->reg + (n)); -} - -static inline void i2c_wr32(struct s6i2c_if *iface, unsigned n, u32 v) -{ - writel(v, iface->reg + (n)); -} - -static struct s6i2c_if s6i2c_if; - -static void s6i2c_handle_interrupt(struct s6i2c_if *iface) -{ - if (i2c_rd16(iface, S6_I2C_INTRSTAT) & (1 << S6_I2C_INTR_TXABRT)) { - i2c_rd16(iface, S6_I2C_CLRTXABRT); - i2c_wr16(iface, S6_I2C_INTRMASK, 0); - complete(&iface->complete); - return; - } - if (iface->msgs_done >= iface->msgs_num) { - dev_err(&iface->adap.dev, "s6i2c: spurious I2C irq: %04x\n", - i2c_rd16(iface, S6_I2C_INTRSTAT)); - i2c_wr16(iface, S6_I2C_INTRMASK, 0); - return; - } - while ((iface->msgs_push < iface->msgs_num) - && (i2c_rd16(iface, S6_I2C_STATUS) & (1 << S6_I2C_STATUS_TFNF))) { - struct i2c_msg *m = &iface->msgs[iface->msgs_push]; - if (!(m->flags & I2C_M_RD)) - i2c_wr16(iface, S6_I2C_DATACMD, m->buf[iface->push]); - else - i2c_wr16(iface, S6_I2C_DATACMD, - 1 << S6_I2C_DATACMD_READ); - if (++iface->push >= m->len) { - iface->push = 0; - iface->msgs_push += 1; - } - } - do { - struct i2c_msg *m = &iface->msgs[iface->msgs_done]; - if (!(m->flags & I2C_M_RD)) { - if (iface->msgs_done < iface->msgs_push) - iface->msgs_done += 1; - else - break; - } else if (i2c_rd16(iface, S6_I2C_STATUS) - & (1 << S6_I2C_STATUS_RFNE)) { - m->buf[iface->done] = i2c_rd16(iface, S6_I2C_DATACMD); - if (++iface->done >= m->len) { - iface->done = 0; - iface->msgs_done += 1; - } - } else{ - break; - } - } while (iface->msgs_done < iface->msgs_num); - if (iface->msgs_done >= iface->msgs_num) { - i2c_wr16(iface, S6_I2C_INTRMASK, 1 << S6_I2C_INTR_TXABRT); - complete(&iface->complete); - } else if (iface->msgs_push >= iface->msgs_num) { - i2c_wr16(iface, S6_I2C_INTRMASK, (1 << S6_I2C_INTR_TXABRT) | - (1 << S6_I2C_INTR_RXFULL)); - } else { - i2c_wr16(iface, S6_I2C_INTRMASK, (1 << S6_I2C_INTR_TXABRT) | - (1 << S6_I2C_INTR_TXEMPTY) | - (1 << S6_I2C_INTR_RXFULL)); - } -} - -static irqreturn_t s6i2c_interrupt_entry(int irq, void *dev_id) -{ - struct s6i2c_if *iface = dev_id; - if (!(i2c_rd16(iface, S6_I2C_STATUS) & ((1 << S6_I2C_INTR_RXUNDER) - | (1 << S6_I2C_INTR_RXOVER) - | (1 << S6_I2C_INTR_RXFULL) - | (1 << S6_I2C_INTR_TXOVER) - | (1 << S6_I2C_INTR_TXEMPTY) - | (1 << S6_I2C_INTR_RDREQ) - | (1 << S6_I2C_INTR_TXABRT) - | (1 << S6_I2C_INTR_RXDONE) - | (1 << S6_I2C_INTR_ACTIVITY) - | (1 << S6_I2C_INTR_STOPDET) - | (1 << S6_I2C_INTR_STARTDET) - | (1 << S6_I2C_INTR_GENCALL)))) - return IRQ_NONE; - - spin_lock(&iface->lock); - del_timer(&iface->timeout_timer); - s6i2c_handle_interrupt(iface); - spin_unlock(&iface->lock); - return IRQ_HANDLED; -} - -static void s6i2c_timeout(unsigned long data) -{ - struct s6i2c_if *iface = (struct s6i2c_if *)data; - unsigned long flags; - - spin_lock_irqsave(&iface->lock, flags); - s6i2c_handle_interrupt(iface); - if (--iface->timeout_count > 0) { - iface->timeout_timer.expires = jiffies + POLL_TIMEOUT; - add_timer(&iface->timeout_timer); - } else { - complete(&iface->complete); - i2c_wr16(iface, S6_I2C_INTRMASK, 0); - } - spin_unlock_irqrestore(&iface->lock, flags); -} - -static int s6i2c_master_xfer(struct i2c_adapter *adap, - struct i2c_msg *msgs, int num) -{ - struct s6i2c_if *iface = adap->algo_data; - int i; - if (num == 0) - return 0; - if (i2c_rd16(iface, S6_I2C_STATUS) & (1 << S6_I2C_STATUS_ACTIVITY)) - yield(); - i2c_wr16(iface, S6_I2C_INTRMASK, 0); - i2c_rd16(iface, S6_I2C_CLRINTR); - for (i = 0; i < num; i++) { - if (msgs[i].flags & I2C_M_TEN) { - dev_err(&adap->dev, - "s6i2c: 10 bits addr not supported\n"); - return -EINVAL; - } - if (msgs[i].len == 0) { - dev_err(&adap->dev, - "s6i2c: zero length message not supported\n"); - return -EINVAL; - } - if (msgs[i].addr != msgs[0].addr) { - dev_err(&adap->dev, - "s6i2c: multiple xfer cannot change target\n"); - return -EINVAL; - } - } - - iface->msgs = msgs; - iface->msgs_num = num; - iface->msgs_push = 0; - iface->msgs_done = 0; - iface->push = 0; - iface->done = 0; - iface->timeout_count = 10; - i2c_wr16(iface, S6_I2C_TAR, msgs[0].addr); - i2c_wr16(iface, S6_I2C_ENABLE, 1); - i2c_wr16(iface, S6_I2C_INTRMASK, (1 << S6_I2C_INTR_TXEMPTY) | - (1 << S6_I2C_INTR_TXABRT)); - - iface->timeout_timer.expires = jiffies + POLL_TIMEOUT; - add_timer(&iface->timeout_timer); - wait_for_completion(&iface->complete); - del_timer_sync(&iface->timeout_timer); - while (i2c_rd32(iface, S6_I2C_TXFLR) > 0) - schedule(); - while (i2c_rd16(iface, S6_I2C_STATUS) & (1 << S6_I2C_STATUS_ACTIVITY)) - schedule(); - - i2c_wr16(iface, S6_I2C_INTRMASK, 0); - i2c_wr16(iface, S6_I2C_ENABLE, 0); - return iface->msgs_done; -} - -static u32 s6i2c_functionality(struct i2c_adapter *adap) -{ - return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL; -} - -static struct i2c_algorithm s6i2c_algorithm = { - .master_xfer = s6i2c_master_xfer, - .functionality = s6i2c_functionality, -}; - -static u16 nanoseconds_on_clk(struct s6i2c_if *iface, u32 ns) -{ - u32 dividend = ((clk_get_rate(iface->clk) / 1000) * ns) / 1000000; - if (dividend > 0xffff) - return 0xffff; - return dividend; -} - -static int s6i2c_probe(struct platform_device *dev) -{ - struct s6i2c_if *iface = &s6i2c_if; - struct i2c_adapter *p_adap; - const char *clock; - int bus_num, rc; - spin_lock_init(&iface->lock); - init_completion(&iface->complete); - iface->irq = platform_get_irq(dev, 0); - if (iface->irq < 0) { - rc = iface->irq; - goto err_out; - } - iface->res = platform_get_resource(dev, IORESOURCE_MEM, 0); - if (!iface->res) { - rc = -ENXIO; - goto err_out; - } - iface->res = request_mem_region(iface->res->start, - resource_size(iface->res), - dev->dev.bus_id); - if (!iface->res) { - rc = -EBUSY; - goto err_out; - } - iface->reg = ioremap_nocache(iface->res->start, - resource_size(iface->res)); - if (!iface->reg) { - rc = -ENOMEM; - goto err_reg; - } - - clock = 0; - bus_num = -1; - if (dev_get_platdata(&dev->dev)) { - struct s6_i2c_platform_data *pdata = - dev_get_platdata(&dev->dev); - bus_num = pdata->bus_num; - clock = pdata->clock; - } - iface->clk = clk_get(&dev->dev, clock); - if (IS_ERR(iface->clk)) { - rc = PTR_ERR(iface->clk); - goto err_map; - } - rc = clk_enable(iface->clk); - if (rc < 0) - goto err_clk_put; - init_timer(&iface->timeout_timer); - iface->timeout_timer.function = s6i2c_timeout; - iface->timeout_timer.data = (unsigned long)iface; - - p_adap = &iface->adap; - strlcpy(p_adap->name, dev->name, sizeof(p_adap->name)); - p_adap->algo = &s6i2c_algorithm; - p_adap->algo_data = iface; - p_adap->nr = bus_num; - p_adap->class = 0; - p_adap->dev.parent = &dev->dev; - i2c_wr16(iface, S6_I2C_INTRMASK, 0); - rc = request_irq(iface->irq, s6i2c_interrupt_entry, - IRQF_SHARED, dev->name, iface); - if (rc) { - dev_err(&p_adap->dev, "s6i2c: can't get IRQ %d\n", iface->irq); - goto err_clk_dis; - } - - i2c_wr16(iface, S6_I2C_ENABLE, 0); - udelay(1); - i2c_wr32(iface, S6_I2C_SRESET, 1 << S6_I2C_SRESET_IC_SRST); - i2c_wr16(iface, S6_I2C_CLRTXABRT, 1); - i2c_wr16(iface, S6_I2C_CON, - (1 << S6_I2C_CON_MASTER) | - (S6_I2C_CON_SPEED_NORMAL << S6_I2C_CON_SPEED) | - (0 << S6_I2C_CON_10BITSLAVE) | - (0 << S6_I2C_CON_10BITMASTER) | - (1 << S6_I2C_CON_RESTARTENA) | - (1 << S6_I2C_CON_SLAVEDISABLE)); - i2c_wr16(iface, S6_I2C_SSHCNT, nanoseconds_on_clk(iface, 4000)); - i2c_wr16(iface, S6_I2C_SSLCNT, nanoseconds_on_clk(iface, 4700)); - i2c_wr16(iface, S6_I2C_FSHCNT, nanoseconds_on_clk(iface, 600)); - i2c_wr16(iface, S6_I2C_FSLCNT, nanoseconds_on_clk(iface, 1300)); - i2c_wr16(iface, S6_I2C_RXTL, 0); - i2c_wr16(iface, S6_I2C_TXTL, 0); - - platform_set_drvdata(dev, iface); - rc = i2c_add_numbered_adapter(p_adap); - if (rc) - goto err_irq_free; - return 0; - -err_irq_free: - free_irq(iface->irq, iface); -err_clk_dis: - clk_disable(iface->clk); -err_clk_put: - clk_put(iface->clk); -err_map: - iounmap(iface->reg); -err_reg: - release_mem_region(iface->res->start, - resource_size(iface->res)); -err_out: - return rc; -} - -static int s6i2c_remove(struct platform_device *pdev) -{ - struct s6i2c_if *iface = platform_get_drvdata(pdev); - i2c_wr16(iface, S6_I2C_ENABLE, 0); - i2c_del_adapter(&iface->adap); - free_irq(iface->irq, iface); - clk_disable(iface->clk); - clk_put(iface->clk); - iounmap(iface->reg); - release_mem_region(iface->res->start, - resource_size(iface->res)); - return 0; -} - -static struct platform_driver s6i2c_driver = { - .probe = s6i2c_probe, - .remove = s6i2c_remove, - .driver = { - .name = DRV_NAME, - .owner = THIS_MODULE, - }, -}; - -static int __init s6i2c_init(void) -{ - pr_info("I2C: S6000 I2C driver\n"); - return platform_driver_register(&s6i2c_driver); -} - -static void __exit s6i2c_exit(void) -{ - platform_driver_unregister(&s6i2c_driver); -} - -MODULE_DESCRIPTION("I2C-Bus adapter routines for S6000 I2C"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS("platform:" DRV_NAME); - -subsys_initcall(s6i2c_init); -module_exit(s6i2c_exit); diff --git a/drivers/i2c/busses/i2c-s6000.h b/drivers/i2c/busses/i2c-s6000.h deleted file mode 100644 index 4936f9f2256f..000000000000 --- a/drivers/i2c/busses/i2c-s6000.h +++ /dev/null @@ -1,79 +0,0 @@ -/* - * drivers/i2c/busses/i2c-s6000.h - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 2008 Emlix GmbH - * Author: Oskar Schirmer - */ - -#ifndef __DRIVERS_I2C_BUSSES_I2C_S6000_H -#define __DRIVERS_I2C_BUSSES_I2C_S6000_H - -#define S6_I2C_CON 0x000 -#define S6_I2C_CON_MASTER 0 -#define S6_I2C_CON_SPEED 1 -#define S6_I2C_CON_SPEED_NORMAL 1 -#define S6_I2C_CON_SPEED_FAST 2 -#define S6_I2C_CON_SPEED_MASK 3 -#define S6_I2C_CON_10BITSLAVE 3 -#define S6_I2C_CON_10BITMASTER 4 -#define S6_I2C_CON_RESTARTENA 5 -#define S6_I2C_CON_SLAVEDISABLE 6 -#define S6_I2C_TAR 0x004 -#define S6_I2C_TAR_GCORSTART 10 -#define S6_I2C_TAR_SPECIAL 11 -#define S6_I2C_SAR 0x008 -#define S6_I2C_HSMADDR 0x00C -#define S6_I2C_DATACMD 0x010 -#define S6_I2C_DATACMD_READ 8 -#define S6_I2C_SSHCNT 0x014 -#define S6_I2C_SSLCNT 0x018 -#define S6_I2C_FSHCNT 0x01C -#define S6_I2C_FSLCNT 0x020 -#define S6_I2C_INTRSTAT 0x02C -#define S6_I2C_INTRMASK 0x030 -#define S6_I2C_RAWINTR 0x034 -#define S6_I2C_INTR_RXUNDER 0 -#define S6_I2C_INTR_RXOVER 1 -#define S6_I2C_INTR_RXFULL 2 -#define S6_I2C_INTR_TXOVER 3 -#define S6_I2C_INTR_TXEMPTY 4 -#define S6_I2C_INTR_RDREQ 5 -#define S6_I2C_INTR_TXABRT 6 -#define S6_I2C_INTR_RXDONE 7 -#define S6_I2C_INTR_ACTIVITY 8 -#define S6_I2C_INTR_STOPDET 9 -#define S6_I2C_INTR_STARTDET 10 -#define S6_I2C_INTR_GENCALL 11 -#define S6_I2C_RXTL 0x038 -#define S6_I2C_TXTL 0x03C -#define S6_I2C_CLRINTR 0x040 -#define S6_I2C_CLRRXUNDER 0x044 -#define S6_I2C_CLRRXOVER 0x048 -#define S6_I2C_CLRTXOVER 0x04C -#define S6_I2C_CLRRDREQ 0x050 -#define S6_I2C_CLRTXABRT 0x054 -#define S6_I2C_CLRRXDONE 0x058 -#define S6_I2C_CLRACTIVITY 0x05C -#define S6_I2C_CLRSTOPDET 0x060 -#define S6_I2C_CLRSTARTDET 0x064 -#define S6_I2C_CLRGENCALL 0x068 -#define S6_I2C_ENABLE 0x06C -#define S6_I2C_STATUS 0x070 -#define S6_I2C_STATUS_ACTIVITY 0 -#define S6_I2C_STATUS_TFNF 1 -#define S6_I2C_STATUS_TFE 2 -#define S6_I2C_STATUS_RFNE 3 -#define S6_I2C_STATUS_RFF 4 -#define S6_I2C_TXFLR 0x074 -#define S6_I2C_RXFLR 0x078 -#define S6_I2C_SRESET 0x07C -#define S6_I2C_SRESET_IC_SRST 0 -#define S6_I2C_SRESET_IC_MASTER_SRST 1 -#define S6_I2C_SRESET_IC_SLAVE_SRST 2 -#define S6_I2C_TXABRTSOURCE 0x080 - -#endif diff --git a/include/linux/i2c/s6000.h b/include/linux/i2c/s6000.h deleted file mode 100644 index d9b34bfdae76..000000000000 --- a/include/linux/i2c/s6000.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef __LINUX_I2C_S6000_H -#define __LINUX_I2C_S6000_H - -struct s6_i2c_platform_data { - const char *clock; /* the clock to use */ - int bus_num; /* the bus number to register */ -}; - -#endif - -- cgit v1.2.3 From 306a7f9139318a28063282a15b9f9ebacf09c9b9 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Thu, 17 Jul 2014 13:17:24 +0200 Subject: ARM: tegra: Move includes to include/soc/tegra In order to not clutter the include/linux directory with SoC specific headers, move the Tegra-specific headers out into a separate directory. Signed-off-by: Thierry Reding --- arch/arm/mach-tegra/fuse.c | 3 +- arch/arm/mach-tegra/pmc.c | 3 +- arch/arm/mach-tegra/powergate.c | 3 +- drivers/amba/tegra-ahb.c | 3 +- drivers/clk/tegra/clk-periph-gate.c | 3 +- drivers/clk/tegra/clk-tegra30.c | 5 +- drivers/clk/tegra/clk.c | 3 +- drivers/gpu/drm/tegra/gr3d.c | 3 +- drivers/gpu/drm/tegra/sor.c | 3 +- drivers/iommu/tegra-smmu.c | 3 +- drivers/pci/host/pci-tegra.c | 5 +- include/linux/tegra-ahb.h | 19 ----- include/linux/tegra-cpuidle.h | 25 ------- include/linux/tegra-powergate.h | 134 ------------------------------------ include/linux/tegra-soc.h | 22 ------ include/soc/tegra/ahb.h | 19 +++++ include/soc/tegra/cpuidle.h | 25 +++++++ include/soc/tegra/fuse.h | 22 ++++++ include/soc/tegra/powergate.h | 134 ++++++++++++++++++++++++++++++++++++ 19 files changed, 225 insertions(+), 212 deletions(-) delete mode 100644 include/linux/tegra-ahb.h delete mode 100644 include/linux/tegra-cpuidle.h delete mode 100644 include/linux/tegra-powergate.h delete mode 100644 include/linux/tegra-soc.h create mode 100644 include/soc/tegra/ahb.h create mode 100644 include/soc/tegra/cpuidle.h create mode 100644 include/soc/tegra/fuse.h create mode 100644 include/soc/tegra/powergate.h (limited to 'include/linux') diff --git a/arch/arm/mach-tegra/fuse.c b/arch/arm/mach-tegra/fuse.c index c9ac23b385be..930fef861227 100644 --- a/arch/arm/mach-tegra/fuse.c +++ b/arch/arm/mach-tegra/fuse.c @@ -23,7 +23,8 @@ #include #include #include -#include + +#include #include "fuse.h" #include "iomap.h" diff --git a/arch/arm/mach-tegra/pmc.c b/arch/arm/mach-tegra/pmc.c index 7c7123e7557b..0f457801eaca 100644 --- a/arch/arm/mach-tegra/pmc.c +++ b/arch/arm/mach-tegra/pmc.c @@ -20,7 +20,8 @@ #include #include #include -#include + +#include #include "flowctrl.h" #include "fuse.h" diff --git a/arch/arm/mach-tegra/powergate.c b/arch/arm/mach-tegra/powergate.c index 4cefc5cd6bed..7b148bc6c995 100644 --- a/arch/arm/mach-tegra/powergate.c +++ b/arch/arm/mach-tegra/powergate.c @@ -29,7 +29,8 @@ #include #include #include -#include + +#include #include "fuse.h" #include "iomap.h" diff --git a/drivers/amba/tegra-ahb.c b/drivers/amba/tegra-ahb.c index 558a239954e8..d8961ef4d2e7 100644 --- a/drivers/amba/tegra-ahb.c +++ b/drivers/amba/tegra-ahb.c @@ -25,7 +25,8 @@ #include #include #include -#include + +#include #define DRV_NAME "tegra-ahb" diff --git a/drivers/clk/tegra/clk-periph-gate.c b/drivers/clk/tegra/clk-periph-gate.c index 507015314827..0aa8830ae7cc 100644 --- a/drivers/clk/tegra/clk-periph-gate.c +++ b/drivers/clk/tegra/clk-periph-gate.c @@ -20,7 +20,8 @@ #include #include #include -#include + +#include #include "clk.h" diff --git a/drivers/clk/tegra/clk-tegra30.c b/drivers/clk/tegra/clk-tegra30.c index 8b10c38b6e3c..5679ffdb3f8c 100644 --- a/drivers/clk/tegra/clk-tegra30.c +++ b/drivers/clk/tegra/clk-tegra30.c @@ -22,8 +22,11 @@ #include #include #include -#include + +#include + #include + #include "clk.h" #include "clk-id.h" diff --git a/drivers/clk/tegra/clk.c b/drivers/clk/tegra/clk.c index c0a7d7723510..f4503ba97400 100644 --- a/drivers/clk/tegra/clk.c +++ b/drivers/clk/tegra/clk.c @@ -19,7 +19,8 @@ #include #include #include -#include + +#include #include "clk.h" diff --git a/drivers/gpu/drm/tegra/gr3d.c b/drivers/gpu/drm/tegra/gr3d.c index 30f5ba9bd6d0..69974851e564 100644 --- a/drivers/gpu/drm/tegra/gr3d.c +++ b/drivers/gpu/drm/tegra/gr3d.c @@ -12,7 +12,8 @@ #include #include #include -#include + +#include #include "drm.h" #include "gem.h" diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c index 27c979b50111..eafd0b8a71d2 100644 --- a/drivers/gpu/drm/tegra/sor.c +++ b/drivers/gpu/drm/tegra/sor.c @@ -11,7 +11,8 @@ #include #include #include -#include + +#include #include diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 605b5b46a903..230d06c9328b 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -35,7 +35,8 @@ #include #include #include -#include + +#include #include #include diff --git a/drivers/pci/host/pci-tegra.c b/drivers/pci/host/pci-tegra.c index 083cf37ca047..a2f0f88be382 100644 --- a/drivers/pci/host/pci-tegra.c +++ b/drivers/pci/host/pci-tegra.c @@ -41,11 +41,12 @@ #include #include #include -#include -#include #include #include +#include +#include + #include #include #include diff --git a/include/linux/tegra-ahb.h b/include/linux/tegra-ahb.h deleted file mode 100644 index f1cd075ceee1..000000000000 --- a/include/linux/tegra-ahb.h +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Copyright (c) 2012, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - */ - -#ifndef __LINUX_AHB_H__ -#define __LINUX_AHB_H__ - -extern int tegra_ahb_enable_smmu(struct device_node *ahb); - -#endif /* __LINUX_AHB_H__ */ diff --git a/include/linux/tegra-cpuidle.h b/include/linux/tegra-cpuidle.h deleted file mode 100644 index 9c6286bbf662..000000000000 --- a/include/linux/tegra-cpuidle.h +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - */ - -#ifndef __LINUX_TEGRA_CPUIDLE_H__ -#define __LINUX_TEGRA_CPUIDLE_H__ - -#ifdef CONFIG_CPU_IDLE -void tegra_cpuidle_pcie_irqs_in_use(void); -#else -static inline void tegra_cpuidle_pcie_irqs_in_use(void) -{ -} -#endif - -#endif diff --git a/include/linux/tegra-powergate.h b/include/linux/tegra-powergate.h deleted file mode 100644 index 46f0a07812b4..000000000000 --- a/include/linux/tegra-powergate.h +++ /dev/null @@ -1,134 +0,0 @@ -/* - * Copyright (c) 2010 Google, Inc - * - * Author: - * Colin Cross - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#ifndef _MACH_TEGRA_POWERGATE_H_ -#define _MACH_TEGRA_POWERGATE_H_ - -struct clk; -struct reset_control; - -#define TEGRA_POWERGATE_CPU 0 -#define TEGRA_POWERGATE_3D 1 -#define TEGRA_POWERGATE_VENC 2 -#define TEGRA_POWERGATE_PCIE 3 -#define TEGRA_POWERGATE_VDEC 4 -#define TEGRA_POWERGATE_L2 5 -#define TEGRA_POWERGATE_MPE 6 -#define TEGRA_POWERGATE_HEG 7 -#define TEGRA_POWERGATE_SATA 8 -#define TEGRA_POWERGATE_CPU1 9 -#define TEGRA_POWERGATE_CPU2 10 -#define TEGRA_POWERGATE_CPU3 11 -#define TEGRA_POWERGATE_CELP 12 -#define TEGRA_POWERGATE_3D1 13 -#define TEGRA_POWERGATE_CPU0 14 -#define TEGRA_POWERGATE_C0NC 15 -#define TEGRA_POWERGATE_C1NC 16 -#define TEGRA_POWERGATE_SOR 17 -#define TEGRA_POWERGATE_DIS 18 -#define TEGRA_POWERGATE_DISB 19 -#define TEGRA_POWERGATE_XUSBA 20 -#define TEGRA_POWERGATE_XUSBB 21 -#define TEGRA_POWERGATE_XUSBC 22 -#define TEGRA_POWERGATE_VIC 23 -#define TEGRA_POWERGATE_IRAM 24 - -#define TEGRA_POWERGATE_3D0 TEGRA_POWERGATE_3D - -#define TEGRA_IO_RAIL_CSIA 0 -#define TEGRA_IO_RAIL_CSIB 1 -#define TEGRA_IO_RAIL_DSI 2 -#define TEGRA_IO_RAIL_MIPI_BIAS 3 -#define TEGRA_IO_RAIL_PEX_BIAS 4 -#define TEGRA_IO_RAIL_PEX_CLK1 5 -#define TEGRA_IO_RAIL_PEX_CLK2 6 -#define TEGRA_IO_RAIL_USB0 9 -#define TEGRA_IO_RAIL_USB1 10 -#define TEGRA_IO_RAIL_USB2 11 -#define TEGRA_IO_RAIL_USB_BIAS 12 -#define TEGRA_IO_RAIL_NAND 13 -#define TEGRA_IO_RAIL_UART 14 -#define TEGRA_IO_RAIL_BB 15 -#define TEGRA_IO_RAIL_AUDIO 17 -#define TEGRA_IO_RAIL_HSIC 19 -#define TEGRA_IO_RAIL_COMP 22 -#define TEGRA_IO_RAIL_HDMI 28 -#define TEGRA_IO_RAIL_PEX_CNTRL 32 -#define TEGRA_IO_RAIL_SDMMC1 33 -#define TEGRA_IO_RAIL_SDMMC3 34 -#define TEGRA_IO_RAIL_SDMMC4 35 -#define TEGRA_IO_RAIL_CAM 36 -#define TEGRA_IO_RAIL_RES 37 -#define TEGRA_IO_RAIL_HV 38 -#define TEGRA_IO_RAIL_DSIB 39 -#define TEGRA_IO_RAIL_DSIC 40 -#define TEGRA_IO_RAIL_DSID 41 -#define TEGRA_IO_RAIL_CSIE 44 -#define TEGRA_IO_RAIL_LVDS 57 -#define TEGRA_IO_RAIL_SYS_DDC 58 - -#ifdef CONFIG_ARCH_TEGRA -int tegra_powergate_is_powered(int id); -int tegra_powergate_power_on(int id); -int tegra_powergate_power_off(int id); -int tegra_powergate_remove_clamping(int id); - -/* Must be called with clk disabled, and returns with clk enabled */ -int tegra_powergate_sequence_power_up(int id, struct clk *clk, - struct reset_control *rst); - -int tegra_io_rail_power_on(int id); -int tegra_io_rail_power_off(int id); -#else -static inline int tegra_powergate_is_powered(int id) -{ - return -ENOSYS; -} - -static inline int tegra_powergate_power_on(int id) -{ - return -ENOSYS; -} - -static inline int tegra_powergate_power_off(int id) -{ - return -ENOSYS; -} - -static inline int tegra_powergate_remove_clamping(int id) -{ - return -ENOSYS; -} - -static inline int tegra_powergate_sequence_power_up(int id, struct clk *clk, - struct reset_control *rst) -{ - return -ENOSYS; -} - -static inline int tegra_io_rail_power_on(int id) -{ - return -ENOSYS; -} - -static inline int tegra_io_rail_power_off(int id) -{ - return -ENOSYS; -} -#endif - -#endif /* _MACH_TEGRA_POWERGATE_H_ */ diff --git a/include/linux/tegra-soc.h b/include/linux/tegra-soc.h deleted file mode 100644 index 95f611d78f3a..000000000000 --- a/include/linux/tegra-soc.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright (c) 2012, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef __LINUX_TEGRA_SOC_H_ -#define __LINUX_TEGRA_SOC_H_ - -u32 tegra_read_chipid(void); - -#endif /* __LINUX_TEGRA_SOC_H_ */ diff --git a/include/soc/tegra/ahb.h b/include/soc/tegra/ahb.h new file mode 100644 index 000000000000..504eb6f957e5 --- /dev/null +++ b/include/soc/tegra/ahb.h @@ -0,0 +1,19 @@ +/* + * Copyright (c) 2012, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef __SOC_TEGRA_AHB_H__ +#define __SOC_TEGRA_AHB_H__ + +extern int tegra_ahb_enable_smmu(struct device_node *ahb); + +#endif /* __SOC_TEGRA_AHB_H__ */ diff --git a/include/soc/tegra/cpuidle.h b/include/soc/tegra/cpuidle.h new file mode 100644 index 000000000000..ea04f4225638 --- /dev/null +++ b/include/soc/tegra/cpuidle.h @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef __SOC_TEGRA_CPUIDLE_H__ +#define __SOC_TEGRA_CPUIDLE_H__ + +#ifdef CONFIG_CPU_IDLE +void tegra_cpuidle_pcie_irqs_in_use(void); +#else +static inline void tegra_cpuidle_pcie_irqs_in_use(void) +{ +} +#endif + +#endif /* __SOC_TEGRA_CPUIDLE_H__ */ diff --git a/include/soc/tegra/fuse.h b/include/soc/tegra/fuse.h new file mode 100644 index 000000000000..85f555c89ad5 --- /dev/null +++ b/include/soc/tegra/fuse.h @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2012, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __SOC_TEGRA_FUSE_H__ +#define __SOC_TEGRA_FUSE_H__ + +u32 tegra_read_chipid(void); + +#endif /* __SOC_TEGRA_FUSE_H__ */ diff --git a/include/soc/tegra/powergate.h b/include/soc/tegra/powergate.h new file mode 100644 index 000000000000..c16912ed1a8d --- /dev/null +++ b/include/soc/tegra/powergate.h @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2010 Google, Inc + * + * Author: + * Colin Cross + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef __SOC_TEGRA_POWERGATE_H__ +#define __SOC_TEGRA_POWERGATE_H__ + +struct clk; +struct reset_control; + +#define TEGRA_POWERGATE_CPU 0 +#define TEGRA_POWERGATE_3D 1 +#define TEGRA_POWERGATE_VENC 2 +#define TEGRA_POWERGATE_PCIE 3 +#define TEGRA_POWERGATE_VDEC 4 +#define TEGRA_POWERGATE_L2 5 +#define TEGRA_POWERGATE_MPE 6 +#define TEGRA_POWERGATE_HEG 7 +#define TEGRA_POWERGATE_SATA 8 +#define TEGRA_POWERGATE_CPU1 9 +#define TEGRA_POWERGATE_CPU2 10 +#define TEGRA_POWERGATE_CPU3 11 +#define TEGRA_POWERGATE_CELP 12 +#define TEGRA_POWERGATE_3D1 13 +#define TEGRA_POWERGATE_CPU0 14 +#define TEGRA_POWERGATE_C0NC 15 +#define TEGRA_POWERGATE_C1NC 16 +#define TEGRA_POWERGATE_SOR 17 +#define TEGRA_POWERGATE_DIS 18 +#define TEGRA_POWERGATE_DISB 19 +#define TEGRA_POWERGATE_XUSBA 20 +#define TEGRA_POWERGATE_XUSBB 21 +#define TEGRA_POWERGATE_XUSBC 22 +#define TEGRA_POWERGATE_VIC 23 +#define TEGRA_POWERGATE_IRAM 24 + +#define TEGRA_POWERGATE_3D0 TEGRA_POWERGATE_3D + +#define TEGRA_IO_RAIL_CSIA 0 +#define TEGRA_IO_RAIL_CSIB 1 +#define TEGRA_IO_RAIL_DSI 2 +#define TEGRA_IO_RAIL_MIPI_BIAS 3 +#define TEGRA_IO_RAIL_PEX_BIAS 4 +#define TEGRA_IO_RAIL_PEX_CLK1 5 +#define TEGRA_IO_RAIL_PEX_CLK2 6 +#define TEGRA_IO_RAIL_USB0 9 +#define TEGRA_IO_RAIL_USB1 10 +#define TEGRA_IO_RAIL_USB2 11 +#define TEGRA_IO_RAIL_USB_BIAS 12 +#define TEGRA_IO_RAIL_NAND 13 +#define TEGRA_IO_RAIL_UART 14 +#define TEGRA_IO_RAIL_BB 15 +#define TEGRA_IO_RAIL_AUDIO 17 +#define TEGRA_IO_RAIL_HSIC 19 +#define TEGRA_IO_RAIL_COMP 22 +#define TEGRA_IO_RAIL_HDMI 28 +#define TEGRA_IO_RAIL_PEX_CNTRL 32 +#define TEGRA_IO_RAIL_SDMMC1 33 +#define TEGRA_IO_RAIL_SDMMC3 34 +#define TEGRA_IO_RAIL_SDMMC4 35 +#define TEGRA_IO_RAIL_CAM 36 +#define TEGRA_IO_RAIL_RES 37 +#define TEGRA_IO_RAIL_HV 38 +#define TEGRA_IO_RAIL_DSIB 39 +#define TEGRA_IO_RAIL_DSIC 40 +#define TEGRA_IO_RAIL_DSID 41 +#define TEGRA_IO_RAIL_CSIE 44 +#define TEGRA_IO_RAIL_LVDS 57 +#define TEGRA_IO_RAIL_SYS_DDC 58 + +#ifdef CONFIG_ARCH_TEGRA +int tegra_powergate_is_powered(int id); +int tegra_powergate_power_on(int id); +int tegra_powergate_power_off(int id); +int tegra_powergate_remove_clamping(int id); + +/* Must be called with clk disabled, and returns with clk enabled */ +int tegra_powergate_sequence_power_up(int id, struct clk *clk, + struct reset_control *rst); + +int tegra_io_rail_power_on(int id); +int tegra_io_rail_power_off(int id); +#else +static inline int tegra_powergate_is_powered(int id) +{ + return -ENOSYS; +} + +static inline int tegra_powergate_power_on(int id) +{ + return -ENOSYS; +} + +static inline int tegra_powergate_power_off(int id) +{ + return -ENOSYS; +} + +static inline int tegra_powergate_remove_clamping(int id) +{ + return -ENOSYS; +} + +static inline int tegra_powergate_sequence_power_up(int id, struct clk *clk, + struct reset_control *rst) +{ + return -ENOSYS; +} + +static inline int tegra_io_rail_power_on(int id) +{ + return -ENOSYS; +} + +static inline int tegra_io_rail_power_off(int id) +{ + return -ENOSYS; +} +#endif + +#endif /* __SOC_TEGRA_POWERGATE_H__ */ -- cgit v1.2.3 From ae4b884fc6316b3190be19448cea24b020c1cad6 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 15 Jul 2014 12:59:36 -0400 Subject: nfsd: silence sparse warning about accessing credentials sparse says: fs/nfsd/auth.c:31:38: warning: incorrect type in argument 1 (different address spaces) fs/nfsd/auth.c:31:38: expected struct cred const *cred fs/nfsd/auth.c:31:38: got struct cred const [noderef] *real_cred Add a new accessor for the ->real_cred and use that to fetch the pointer. Accessing current->real_cred directly is actually quite safe since we know that they can't go away so this is mostly a cosmetic fixup to silence sparse. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/auth.c | 2 +- include/linux/cred.h | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c index 72f44823adbb..9d46a0bdd9f9 100644 --- a/fs/nfsd/auth.c +++ b/fs/nfsd/auth.c @@ -28,7 +28,7 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) validate_process_creds(); /* discard any old override before preparing the new set */ - revert_creds(get_cred(current->real_cred)); + revert_creds(get_cred(current_real_cred())); new = prepare_creds(); if (!new) return -ENOMEM; diff --git a/include/linux/cred.h b/include/linux/cred.h index f61d6c8f5ef3..b2d0820837c4 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -258,6 +258,15 @@ static inline void put_cred(const struct cred *_cred) #define current_cred() \ rcu_dereference_protected(current->cred, 1) +/** + * current_real_cred - Access the current task's objective credentials + * + * Access the objective credentials of the current task. RCU-safe, + * since nobody else can modify it. + */ +#define current_real_cred() \ + rcu_dereference_protected(current->real_cred, 1) + /** * __task_cred - Access a task's objective credentials * @task: The task to query -- cgit v1.2.3 From e1b243772d455f3b25b410a15a5677a9e74ffa37 Mon Sep 17 00:00:00 2001 From: Alexander Shiyan Date: Tue, 20 May 2014 20:43:51 +0400 Subject: ARM: i.MX: Remove registration helper for i.MX1 USB UDC imx_udc driver was removed from the kernel of about 10 months ago. This patch removes a registration helper for this driver and orphaned driver header. Signed-off-by: Alexander Shiyan Signed-off-by: Shawn Guo --- arch/arm/mach-imx/devices/Kconfig | 3 -- arch/arm/mach-imx/devices/Makefile | 1 - arch/arm/mach-imx/devices/devices-common.h | 16 ------ arch/arm/mach-imx/devices/platform-imx_udc.c | 75 ---------------------------- include/linux/platform_data/usb-imx_udc.h | 23 --------- 5 files changed, 118 deletions(-) delete mode 100644 arch/arm/mach-imx/devices/platform-imx_udc.c delete mode 100644 include/linux/platform_data/usb-imx_udc.h (limited to 'include/linux') diff --git a/arch/arm/mach-imx/devices/Kconfig b/arch/arm/mach-imx/devices/Kconfig index 2d260a5a307c..dd7e0b45fb18 100644 --- a/arch/arm/mach-imx/devices/Kconfig +++ b/arch/arm/mach-imx/devices/Kconfig @@ -43,9 +43,6 @@ config IMX_HAVE_PLATFORM_IMX_SSI config IMX_HAVE_PLATFORM_IMX_UART bool -config IMX_HAVE_PLATFORM_IMX_UDC - bool - config IMX_HAVE_PLATFORM_IPU_CORE bool diff --git a/arch/arm/mach-imx/devices/Makefile b/arch/arm/mach-imx/devices/Makefile index 1cbc14cd80d1..6bb144dd680e 100644 --- a/arch/arm/mach-imx/devices/Makefile +++ b/arch/arm/mach-imx/devices/Makefile @@ -16,7 +16,6 @@ obj-$(CONFIG_IMX_HAVE_PLATFORM_IMX_KEYPAD) += platform-imx-keypad.o obj-$(CONFIG_IMX_HAVE_PLATFORM_PATA_IMX) += platform-pata_imx.o obj-$(CONFIG_IMX_HAVE_PLATFORM_IMX_SSI) += platform-imx-ssi.o obj-$(CONFIG_IMX_HAVE_PLATFORM_IMX_UART) += platform-imx-uart.o -obj-$(CONFIG_IMX_HAVE_PLATFORM_IMX_UDC) += platform-imx_udc.o obj-$(CONFIG_IMX_HAVE_PLATFORM_IPU_CORE) += platform-ipu-core.o obj-$(CONFIG_IMX_HAVE_PLATFORM_MX1_CAMERA) += platform-mx1-camera.o obj-$(CONFIG_IMX_HAVE_PLATFORM_MX2_CAMERA) += platform-mx2-camera.o diff --git a/arch/arm/mach-imx/devices/devices-common.h b/arch/arm/mach-imx/devices/devices-common.h index 61352a80bb59..69bafc884dfa 100644 --- a/arch/arm/mach-imx/devices/devices-common.h +++ b/arch/arm/mach-imx/devices/devices-common.h @@ -176,22 +176,6 @@ struct platform_device *__init imx_add_imx_uart_1irq( const struct imx_imx_uart_1irq_data *data, const struct imxuart_platform_data *pdata); -#include -struct imx_imx_udc_data { - resource_size_t iobase; - resource_size_t iosize; - resource_size_t irq0; - resource_size_t irq1; - resource_size_t irq2; - resource_size_t irq3; - resource_size_t irq4; - resource_size_t irq5; - resource_size_t irq6; -}; -struct platform_device *__init imx_add_imx_udc( - const struct imx_imx_udc_data *data, - const struct imxusb_platform_data *pdata); - #include #include struct imx_ipu_core_data { diff --git a/arch/arm/mach-imx/devices/platform-imx_udc.c b/arch/arm/mach-imx/devices/platform-imx_udc.c deleted file mode 100644 index 5ced7e4e2c71..000000000000 --- a/arch/arm/mach-imx/devices/platform-imx_udc.c +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (C) 2010 Pengutronix - * Uwe Kleine-Koenig - * - * This program is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License version 2 as published by the - * Free Software Foundation. - */ -#include "../hardware.h" -#include "devices-common.h" - -#define imx_imx_udc_data_entry_single(soc, _size) \ - { \ - .iobase = soc ## _USBD_BASE_ADDR, \ - .iosize = _size, \ - .irq0 = soc ## _INT_USBD0, \ - .irq1 = soc ## _INT_USBD1, \ - .irq2 = soc ## _INT_USBD2, \ - .irq3 = soc ## _INT_USBD3, \ - .irq4 = soc ## _INT_USBD4, \ - .irq5 = soc ## _INT_USBD5, \ - .irq6 = soc ## _INT_USBD6, \ - } - -#define imx_imx_udc_data_entry(soc, _size) \ - [_id] = imx_imx_udc_data_entry_single(soc, _size) - -#ifdef CONFIG_SOC_IMX1 -const struct imx_imx_udc_data imx1_imx_udc_data __initconst = - imx_imx_udc_data_entry_single(MX1, SZ_4K); -#endif /* ifdef CONFIG_SOC_IMX1 */ - -struct platform_device *__init imx_add_imx_udc( - const struct imx_imx_udc_data *data, - const struct imxusb_platform_data *pdata) -{ - struct resource res[] = { - { - .start = data->iobase, - .end = data->iobase + data->iosize - 1, - .flags = IORESOURCE_MEM, - }, { - .start = data->irq0, - .end = data->irq0, - .flags = IORESOURCE_IRQ, - }, { - .start = data->irq1, - .end = data->irq1, - .flags = IORESOURCE_IRQ, - }, { - .start = data->irq2, - .end = data->irq2, - .flags = IORESOURCE_IRQ, - }, { - .start = data->irq3, - .end = data->irq3, - .flags = IORESOURCE_IRQ, - }, { - .start = data->irq4, - .end = data->irq4, - .flags = IORESOURCE_IRQ, - }, { - .start = data->irq5, - .end = data->irq5, - .flags = IORESOURCE_IRQ, - }, { - .start = data->irq6, - .end = data->irq6, - .flags = IORESOURCE_IRQ, - }, - }; - - return imx_add_platform_device("imx_udc", 0, - res, ARRAY_SIZE(res), pdata, sizeof(*pdata)); -} diff --git a/include/linux/platform_data/usb-imx_udc.h b/include/linux/platform_data/usb-imx_udc.h deleted file mode 100644 index be273371f34a..000000000000 --- a/include/linux/platform_data/usb-imx_udc.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (C) 2008 Darius Augulis - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#ifndef __ASM_ARCH_MXC_USB -#define __ASM_ARCH_MXC_USB - -struct imxusb_platform_data { - int (*init)(struct device *); - void (*exit)(struct device *); -}; - -#endif /* __ASM_ARCH_MXC_USB */ -- cgit v1.2.3 From 641dfe8b73e81aa38cbbeab72a480462a4987963 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Mon, 19 May 2014 20:41:52 +0800 Subject: ARM: imx: move EHCI platform defines out of platform_data header The platform_data header usb-ehci-mxc.h has a lot of stuff used by only IMX platform code. They shouldn't be really in this header but a IMX platform local header. Create ehci.h and move these stuff into it. Signed-off-by: Shawn Guo --- arch/arm/mach-imx/ehci-imx25.c | 1 + arch/arm/mach-imx/ehci-imx27.c | 1 + arch/arm/mach-imx/ehci-imx31.c | 1 + arch/arm/mach-imx/ehci-imx35.c | 1 + arch/arm/mach-imx/ehci-imx5.c | 1 + arch/arm/mach-imx/ehci.h | 50 +++++++++++++++++++++++++++++ arch/arm/mach-imx/mach-armadillo5x0.c | 1 + arch/arm/mach-imx/mach-cpuimx27.c | 1 + arch/arm/mach-imx/mach-cpuimx35.c | 1 + arch/arm/mach-imx/mach-eukrea_cpuimx25.c | 1 + arch/arm/mach-imx/mach-imx27_visstrim_m10.c | 1 + arch/arm/mach-imx/mach-mx25_3ds.c | 1 + arch/arm/mach-imx/mach-mx27_3ds.c | 1 + arch/arm/mach-imx/mach-mx31_3ds.c | 1 + arch/arm/mach-imx/mach-mx31lilly.c | 1 + arch/arm/mach-imx/mach-mx31lite.c | 1 + arch/arm/mach-imx/mach-mx31moboard.c | 1 + arch/arm/mach-imx/mach-mx35_3ds.c | 1 + arch/arm/mach-imx/mach-pca100.c | 1 + arch/arm/mach-imx/mach-pcm037.c | 1 + arch/arm/mach-imx/mach-pcm038.c | 1 + arch/arm/mach-imx/mach-pcm043.c | 1 + arch/arm/mach-imx/mach-vpr200.c | 1 + arch/arm/mach-imx/mx31moboard-devboard.c | 1 + arch/arm/mach-imx/mx31moboard-marxbot.c | 1 + arch/arm/mach-imx/mx31moboard-smartbot.c | 1 + include/linux/platform_data/usb-ehci-mxc.h | 46 -------------------------- 27 files changed, 75 insertions(+), 46 deletions(-) create mode 100644 arch/arm/mach-imx/ehci.h (limited to 'include/linux') diff --git a/arch/arm/mach-imx/ehci-imx25.c b/arch/arm/mach-imx/ehci-imx25.c index 134c190e3003..42a5a3d14c5f 100644 --- a/arch/arm/mach-imx/ehci-imx25.c +++ b/arch/arm/mach-imx/ehci-imx25.c @@ -17,6 +17,7 @@ #include #include +#include "ehci.h" #include "hardware.h" #define USBCTRL_OTGBASE_OFFSET 0x600 diff --git a/arch/arm/mach-imx/ehci-imx27.c b/arch/arm/mach-imx/ehci-imx27.c index 448d9115539d..c56974346c16 100644 --- a/arch/arm/mach-imx/ehci-imx27.c +++ b/arch/arm/mach-imx/ehci-imx27.c @@ -17,6 +17,7 @@ #include #include +#include "ehci.h" #include "hardware.h" #define USBCTRL_OTGBASE_OFFSET 0x600 diff --git a/arch/arm/mach-imx/ehci-imx31.c b/arch/arm/mach-imx/ehci-imx31.c index 05de4e1e39d7..bede21d9b981 100644 --- a/arch/arm/mach-imx/ehci-imx31.c +++ b/arch/arm/mach-imx/ehci-imx31.c @@ -17,6 +17,7 @@ #include #include +#include "ehci.h" #include "hardware.h" #define USBCTRL_OTGBASE_OFFSET 0x600 diff --git a/arch/arm/mach-imx/ehci-imx35.c b/arch/arm/mach-imx/ehci-imx35.c index 554e7cccff53..f424a543755c 100644 --- a/arch/arm/mach-imx/ehci-imx35.c +++ b/arch/arm/mach-imx/ehci-imx35.c @@ -17,6 +17,7 @@ #include #include +#include "ehci.h" #include "hardware.h" #define USBCTRL_OTGBASE_OFFSET 0x600 diff --git a/arch/arm/mach-imx/ehci-imx5.c b/arch/arm/mach-imx/ehci-imx5.c index e49710b10c68..74bfdd970bfe 100644 --- a/arch/arm/mach-imx/ehci-imx5.c +++ b/arch/arm/mach-imx/ehci-imx5.c @@ -17,6 +17,7 @@ #include #include +#include "ehci.h" #include "hardware.h" #define MXC_OTG_OFFSET 0 diff --git a/arch/arm/mach-imx/ehci.h b/arch/arm/mach-imx/ehci.h new file mode 100644 index 000000000000..4f42ca380b26 --- /dev/null +++ b/arch/arm/mach-imx/ehci.h @@ -0,0 +1,50 @@ +#ifndef __MACH_IMX_EHCI_H +#define __MACH_IMX_EHCI_H + +/* values for portsc field */ +#define MXC_EHCI_PHY_LOW_POWER_SUSPEND (1 << 23) +#define MXC_EHCI_FORCE_FS (1 << 24) +#define MXC_EHCI_UTMI_8BIT (0 << 28) +#define MXC_EHCI_UTMI_16BIT (1 << 28) +#define MXC_EHCI_SERIAL (1 << 29) +#define MXC_EHCI_MODE_UTMI (0 << 30) +#define MXC_EHCI_MODE_PHILIPS (1 << 30) +#define MXC_EHCI_MODE_ULPI (2 << 30) +#define MXC_EHCI_MODE_SERIAL (3 << 30) + +/* values for flags field */ +#define MXC_EHCI_INTERFACE_DIFF_UNI (0 << 0) +#define MXC_EHCI_INTERFACE_DIFF_BI (1 << 0) +#define MXC_EHCI_INTERFACE_SINGLE_UNI (2 << 0) +#define MXC_EHCI_INTERFACE_SINGLE_BI (3 << 0) +#define MXC_EHCI_INTERFACE_MASK (0xf) + +#define MXC_EHCI_POWER_PINS_ENABLED (1 << 5) +#define MXC_EHCI_PWR_PIN_ACTIVE_HIGH (1 << 6) +#define MXC_EHCI_OC_PIN_ACTIVE_LOW (1 << 7) +#define MXC_EHCI_TTL_ENABLED (1 << 8) + +#define MXC_EHCI_INTERNAL_PHY (1 << 9) +#define MXC_EHCI_IPPUE_DOWN (1 << 10) +#define MXC_EHCI_IPPUE_UP (1 << 11) +#define MXC_EHCI_WAKEUP_ENABLED (1 << 12) +#define MXC_EHCI_ITC_NO_THRESHOLD (1 << 13) + +#define MXC_USBCTRL_OFFSET 0 +#define MXC_USB_PHY_CTR_FUNC_OFFSET 0x8 +#define MXC_USB_PHY_CTR_FUNC2_OFFSET 0xc +#define MXC_USBH2CTRL_OFFSET 0x14 + +#define MX5_USBOTHER_REGS_OFFSET 0x800 + +/* USB_PHY_CTRL_FUNC2*/ +#define MX5_USB_UTMI_PHYCTRL1_PLLDIV_MASK 0x3 +#define MX5_USB_UTMI_PHYCTRL1_PLLDIV_SHIFT 0 + +int mx51_initialize_usb_hw(int port, unsigned int flags); +int mx25_initialize_usb_hw(int port, unsigned int flags); +int mx31_initialize_usb_hw(int port, unsigned int flags); +int mx35_initialize_usb_hw(int port, unsigned int flags); +int mx27_initialize_usb_hw(int port, unsigned int flags); + +#endif /* __MACH_IMX_EHCI_H */ diff --git a/arch/arm/mach-imx/mach-armadillo5x0.c b/arch/arm/mach-imx/mach-armadillo5x0.c index 39406b7e3228..a7e9bd26a552 100644 --- a/arch/arm/mach-imx/mach-armadillo5x0.c +++ b/arch/arm/mach-imx/mach-armadillo5x0.c @@ -50,6 +50,7 @@ #include "common.h" #include "devices-imx31.h" #include "crmregs-imx3.h" +#include "ehci.h" #include "hardware.h" #include "iomux-mx3.h" #include "ulpi.h" diff --git a/arch/arm/mach-imx/mach-cpuimx27.c b/arch/arm/mach-imx/mach-cpuimx27.c index 75b7b6aa2720..e6d4b9929571 100644 --- a/arch/arm/mach-imx/mach-cpuimx27.c +++ b/arch/arm/mach-imx/mach-cpuimx27.c @@ -36,6 +36,7 @@ #include "common.h" #include "devices-imx27.h" +#include "ehci.h" #include "eukrea-baseboards.h" #include "hardware.h" #include "iomux-mx27.h" diff --git a/arch/arm/mach-imx/mach-cpuimx35.c b/arch/arm/mach-imx/mach-cpuimx35.c index 1ffa27169045..62a6e02f4763 100644 --- a/arch/arm/mach-imx/mach-cpuimx35.c +++ b/arch/arm/mach-imx/mach-cpuimx35.c @@ -39,6 +39,7 @@ #include "common.h" #include "devices-imx35.h" +#include "ehci.h" #include "eukrea-baseboards.h" #include "hardware.h" #include "iomux-mx35.h" diff --git a/arch/arm/mach-imx/mach-eukrea_cpuimx25.c b/arch/arm/mach-imx/mach-eukrea_cpuimx25.c index e978dda1434c..b2ee6e009fe4 100644 --- a/arch/arm/mach-imx/mach-eukrea_cpuimx25.c +++ b/arch/arm/mach-imx/mach-eukrea_cpuimx25.c @@ -35,6 +35,7 @@ #include "common.h" #include "devices-imx25.h" +#include "ehci.h" #include "eukrea-baseboards.h" #include "hardware.h" #include "iomux-mx25.h" diff --git a/arch/arm/mach-imx/mach-imx27_visstrim_m10.c b/arch/arm/mach-imx/mach-imx27_visstrim_m10.c index b61bd8ed5568..ede2bdbb5dd5 100644 --- a/arch/arm/mach-imx/mach-imx27_visstrim_m10.c +++ b/arch/arm/mach-imx/mach-imx27_visstrim_m10.c @@ -43,6 +43,7 @@ #include "common.h" #include "devices-imx27.h" +#include "ehci.h" #include "hardware.h" #include "iomux-mx27.h" diff --git a/arch/arm/mach-imx/mach-mx25_3ds.c b/arch/arm/mach-imx/mach-mx25_3ds.c index ea1fa199c148..0d01e367b062 100644 --- a/arch/arm/mach-imx/mach-mx25_3ds.c +++ b/arch/arm/mach-imx/mach-mx25_3ds.c @@ -39,6 +39,7 @@ #include "common.h" #include "devices-imx25.h" +#include "ehci.h" #include "hardware.h" #include "iomux-mx25.h" #include "mx25.h" diff --git a/arch/arm/mach-imx/mach-mx27_3ds.c b/arch/arm/mach-imx/mach-mx27_3ds.c index 435a5428a678..9ef4640f3660 100644 --- a/arch/arm/mach-imx/mach-mx27_3ds.c +++ b/arch/arm/mach-imx/mach-mx27_3ds.c @@ -40,6 +40,7 @@ #include "3ds_debugboard.h" #include "common.h" #include "devices-imx27.h" +#include "ehci.h" #include "hardware.h" #include "iomux-mx27.h" #include "ulpi.h" diff --git a/arch/arm/mach-imx/mach-mx31_3ds.c b/arch/arm/mach-imx/mach-mx31_3ds.c index 4217871a9653..453f41a2c5a9 100644 --- a/arch/arm/mach-imx/mach-mx31_3ds.c +++ b/arch/arm/mach-imx/mach-mx31_3ds.c @@ -40,6 +40,7 @@ #include "3ds_debugboard.h" #include "common.h" #include "devices-imx31.h" +#include "ehci.h" #include "hardware.h" #include "iomux-mx3.h" #include "ulpi.h" diff --git a/arch/arm/mach-imx/mach-mx31lilly.c b/arch/arm/mach-imx/mach-mx31lilly.c index eee042fa2768..e9549a3c0223 100644 --- a/arch/arm/mach-imx/mach-mx31lilly.c +++ b/arch/arm/mach-imx/mach-mx31lilly.c @@ -45,6 +45,7 @@ #include "board-mx31lilly.h" #include "common.h" #include "devices-imx31.h" +#include "ehci.h" #include "hardware.h" #include "iomux-mx3.h" #include "ulpi.h" diff --git a/arch/arm/mach-imx/mach-mx31lite.c b/arch/arm/mach-imx/mach-mx31lite.c index fa15d0b6118d..57eac6f45fab 100644 --- a/arch/arm/mach-imx/mach-mx31lite.c +++ b/arch/arm/mach-imx/mach-mx31lite.c @@ -42,6 +42,7 @@ #include "board-mx31lite.h" #include "common.h" #include "devices-imx31.h" +#include "ehci.h" #include "hardware.h" #include "iomux-mx3.h" #include "ulpi.h" diff --git a/arch/arm/mach-imx/mach-mx31moboard.c b/arch/arm/mach-imx/mach-mx31moboard.c index 08730f238449..6bed57040973 100644 --- a/arch/arm/mach-imx/mach-mx31moboard.c +++ b/arch/arm/mach-imx/mach-mx31moboard.c @@ -47,6 +47,7 @@ #include "board-mx31moboard.h" #include "common.h" #include "devices-imx31.h" +#include "ehci.h" #include "hardware.h" #include "iomux-mx3.h" #include "ulpi.h" diff --git a/arch/arm/mach-imx/mach-mx35_3ds.c b/arch/arm/mach-imx/mach-mx35_3ds.c index 4e8b184d773b..72cd77d21f63 100644 --- a/arch/arm/mach-imx/mach-mx35_3ds.c +++ b/arch/arm/mach-imx/mach-mx35_3ds.c @@ -50,6 +50,7 @@ #include "3ds_debugboard.h" #include "common.h" #include "devices-imx35.h" +#include "ehci.h" #include "hardware.h" #include "iomux-mx35.h" diff --git a/arch/arm/mach-imx/mach-pca100.c b/arch/arm/mach-imx/mach-pca100.c index 12212378c672..2d1c50bd8bdf 100644 --- a/arch/arm/mach-imx/mach-pca100.c +++ b/arch/arm/mach-imx/mach-pca100.c @@ -36,6 +36,7 @@ #include "common.h" #include "devices-imx27.h" +#include "ehci.h" #include "hardware.h" #include "iomux-mx27.h" #include "ulpi.h" diff --git a/arch/arm/mach-imx/mach-pcm037.c b/arch/arm/mach-imx/mach-pcm037.c index 81b8affb9448..8eb1570f7851 100644 --- a/arch/arm/mach-imx/mach-pcm037.c +++ b/arch/arm/mach-imx/mach-pcm037.c @@ -45,6 +45,7 @@ #include "common.h" #include "devices-imx31.h" +#include "ehci.h" #include "hardware.h" #include "iomux-mx3.h" #include "pcm037.h" diff --git a/arch/arm/mach-imx/mach-pcm038.c b/arch/arm/mach-imx/mach-pcm038.c index 6c56fb5553c7..ee862ad6b6fc 100644 --- a/arch/arm/mach-imx/mach-pcm038.c +++ b/arch/arm/mach-imx/mach-pcm038.c @@ -36,6 +36,7 @@ #include "board-pcm038.h" #include "common.h" #include "devices-imx27.h" +#include "ehci.h" #include "hardware.h" #include "iomux-mx27.h" #include "ulpi.h" diff --git a/arch/arm/mach-imx/mach-pcm043.c b/arch/arm/mach-imx/mach-pcm043.c index c62b5d261345..b623bcaca76c 100644 --- a/arch/arm/mach-imx/mach-pcm043.c +++ b/arch/arm/mach-imx/mach-pcm043.c @@ -35,6 +35,7 @@ #include "common.h" #include "devices-imx35.h" +#include "ehci.h" #include "hardware.h" #include "iomux-mx35.h" #include "ulpi.h" diff --git a/arch/arm/mach-imx/mach-vpr200.c b/arch/arm/mach-imx/mach-vpr200.c index 872b3c6ba408..97836e94451c 100644 --- a/arch/arm/mach-imx/mach-vpr200.c +++ b/arch/arm/mach-imx/mach-vpr200.c @@ -34,6 +34,7 @@ #include "common.h" #include "devices-imx35.h" +#include "ehci.h" #include "hardware.h" #include "iomux-mx35.h" diff --git a/arch/arm/mach-imx/mx31moboard-devboard.c b/arch/arm/mach-imx/mx31moboard-devboard.c index 52d5b1574721..1e4ea1640a2a 100644 --- a/arch/arm/mach-imx/mx31moboard-devboard.c +++ b/arch/arm/mach-imx/mx31moboard-devboard.c @@ -24,6 +24,7 @@ #include "common.h" #include "devices-imx31.h" +#include "ehci.h" #include "hardware.h" #include "iomux-mx3.h" #include "ulpi.h" diff --git a/arch/arm/mach-imx/mx31moboard-marxbot.c b/arch/arm/mach-imx/mx31moboard-marxbot.c index a4f43e90f3c1..699d01a4fef8 100644 --- a/arch/arm/mach-imx/mx31moboard-marxbot.c +++ b/arch/arm/mach-imx/mx31moboard-marxbot.c @@ -28,6 +28,7 @@ #include "common.h" #include "devices-imx31.h" +#include "ehci.h" #include "hardware.h" #include "iomux-mx3.h" #include "ulpi.h" diff --git a/arch/arm/mach-imx/mx31moboard-smartbot.c b/arch/arm/mach-imx/mx31moboard-smartbot.c index 04ae45dbfaa7..4b3d66eb8d34 100644 --- a/arch/arm/mach-imx/mx31moboard-smartbot.c +++ b/arch/arm/mach-imx/mx31moboard-smartbot.c @@ -28,6 +28,7 @@ #include "board-mx31moboard.h" #include "common.h" #include "devices-imx31.h" +#include "ehci.h" #include "hardware.h" #include "iomux-mx3.h" #include "ulpi.h" diff --git a/include/linux/platform_data/usb-ehci-mxc.h b/include/linux/platform_data/usb-ehci-mxc.h index 7eb9d1329671..157e71f79f99 100644 --- a/include/linux/platform_data/usb-ehci-mxc.h +++ b/include/linux/platform_data/usb-ehci-mxc.h @@ -1,46 +1,6 @@ #ifndef __INCLUDE_ASM_ARCH_MXC_EHCI_H #define __INCLUDE_ASM_ARCH_MXC_EHCI_H -/* values for portsc field */ -#define MXC_EHCI_PHY_LOW_POWER_SUSPEND (1 << 23) -#define MXC_EHCI_FORCE_FS (1 << 24) -#define MXC_EHCI_UTMI_8BIT (0 << 28) -#define MXC_EHCI_UTMI_16BIT (1 << 28) -#define MXC_EHCI_SERIAL (1 << 29) -#define MXC_EHCI_MODE_UTMI (0 << 30) -#define MXC_EHCI_MODE_PHILIPS (1 << 30) -#define MXC_EHCI_MODE_ULPI (2 << 30) -#define MXC_EHCI_MODE_SERIAL (3 << 30) - -/* values for flags field */ -#define MXC_EHCI_INTERFACE_DIFF_UNI (0 << 0) -#define MXC_EHCI_INTERFACE_DIFF_BI (1 << 0) -#define MXC_EHCI_INTERFACE_SINGLE_UNI (2 << 0) -#define MXC_EHCI_INTERFACE_SINGLE_BI (3 << 0) -#define MXC_EHCI_INTERFACE_MASK (0xf) - -#define MXC_EHCI_POWER_PINS_ENABLED (1 << 5) -#define MXC_EHCI_PWR_PIN_ACTIVE_HIGH (1 << 6) -#define MXC_EHCI_OC_PIN_ACTIVE_LOW (1 << 7) -#define MXC_EHCI_TTL_ENABLED (1 << 8) - -#define MXC_EHCI_INTERNAL_PHY (1 << 9) -#define MXC_EHCI_IPPUE_DOWN (1 << 10) -#define MXC_EHCI_IPPUE_UP (1 << 11) -#define MXC_EHCI_WAKEUP_ENABLED (1 << 12) -#define MXC_EHCI_ITC_NO_THRESHOLD (1 << 13) - -#define MXC_USBCTRL_OFFSET 0 -#define MXC_USB_PHY_CTR_FUNC_OFFSET 0x8 -#define MXC_USB_PHY_CTR_FUNC2_OFFSET 0xc -#define MXC_USBH2CTRL_OFFSET 0x14 - -#define MX5_USBOTHER_REGS_OFFSET 0x800 - -/* USB_PHY_CTRL_FUNC2*/ -#define MX5_USB_UTMI_PHYCTRL1_PLLDIV_MASK 0x3 -#define MX5_USB_UTMI_PHYCTRL1_PLLDIV_SHIFT 0 - struct mxc_usbh_platform_data { int (*init)(struct platform_device *pdev); int (*exit)(struct platform_device *pdev); @@ -49,11 +9,5 @@ struct mxc_usbh_platform_data { struct usb_phy *otg; }; -int mx51_initialize_usb_hw(int port, unsigned int flags); -int mx25_initialize_usb_hw(int port, unsigned int flags); -int mx31_initialize_usb_hw(int port, unsigned int flags); -int mx35_initialize_usb_hw(int port, unsigned int flags); -int mx27_initialize_usb_hw(int port, unsigned int flags); - #endif /* __INCLUDE_ASM_ARCH_MXC_EHCI_H */ -- cgit v1.2.3 From 35e3bc535d0437ca5f32985a294703ce48c75d88 Mon Sep 17 00:00:00 2001 From: Alexander Shiyan Date: Sat, 5 Jul 2014 08:51:38 +0400 Subject: ARM: i.MX: Remove i.MX1 camera support i.MX1 camera driver has been removed by the commit 90b055898e. This patch removes remaining support files for this camera. Signed-off-by: Alexander Shiyan Signed-off-by: Shawn Guo --- arch/arm/mach-imx/Makefile | 3 -- arch/arm/mach-imx/devices/Kconfig | 3 -- arch/arm/mach-imx/devices/Makefile | 1 - arch/arm/mach-imx/devices/devices-common.h | 10 ------ arch/arm/mach-imx/devices/platform-mx1-camera.c | 42 ------------------------- arch/arm/mach-imx/mx1-camera-fiq-ksym.c | 18 ----------- arch/arm/mach-imx/mx1-camera-fiq.S | 35 --------------------- include/linux/platform_data/camera-mx1.h | 35 --------------------- 8 files changed, 147 deletions(-) delete mode 100644 arch/arm/mach-imx/devices/platform-mx1-camera.c delete mode 100644 arch/arm/mach-imx/mx1-camera-fiq-ksym.c delete mode 100644 arch/arm/mach-imx/mx1-camera-fiq.S delete mode 100644 include/linux/platform_data/camera-mx1.h (limited to 'include/linux') diff --git a/arch/arm/mach-imx/Makefile b/arch/arm/mach-imx/Makefile index a3d9712567c9..d1c1d38f3741 100644 --- a/arch/arm/mach-imx/Makefile +++ b/arch/arm/mach-imx/Makefile @@ -40,9 +40,6 @@ obj-y += ssi-fiq.o obj-y += ssi-fiq-ksym.o endif -# Support for CMOS sensor interface -obj-$(CONFIG_MX1_VIDEO) += mx1-camera-fiq.o mx1-camera-fiq-ksym.o - # i.MX1 based machines obj-$(CONFIG_ARCH_MX1ADS) += mach-mx1ads.o obj-$(CONFIG_MACH_SCB9328) += mach-scb9328.o diff --git a/arch/arm/mach-imx/devices/Kconfig b/arch/arm/mach-imx/devices/Kconfig index 119f699649a9..73a1dce6cac1 100644 --- a/arch/arm/mach-imx/devices/Kconfig +++ b/arch/arm/mach-imx/devices/Kconfig @@ -45,9 +45,6 @@ config IMX_HAVE_PLATFORM_IMX_UART config IMX_HAVE_PLATFORM_IPU_CORE bool -config IMX_HAVE_PLATFORM_MX1_CAMERA - bool - config IMX_HAVE_PLATFORM_MX2_CAMERA bool diff --git a/arch/arm/mach-imx/devices/Makefile b/arch/arm/mach-imx/devices/Makefile index 6bb144dd680e..8fdb12b4ca7e 100644 --- a/arch/arm/mach-imx/devices/Makefile +++ b/arch/arm/mach-imx/devices/Makefile @@ -17,7 +17,6 @@ obj-$(CONFIG_IMX_HAVE_PLATFORM_PATA_IMX) += platform-pata_imx.o obj-$(CONFIG_IMX_HAVE_PLATFORM_IMX_SSI) += platform-imx-ssi.o obj-$(CONFIG_IMX_HAVE_PLATFORM_IMX_UART) += platform-imx-uart.o obj-$(CONFIG_IMX_HAVE_PLATFORM_IPU_CORE) += platform-ipu-core.o -obj-$(CONFIG_IMX_HAVE_PLATFORM_MX1_CAMERA) += platform-mx1-camera.o obj-$(CONFIG_IMX_HAVE_PLATFORM_MX2_CAMERA) += platform-mx2-camera.o obj-$(CONFIG_IMX_HAVE_PLATFORM_MXC_EHCI) += platform-mxc-ehci.o obj-$(CONFIG_IMX_HAVE_PLATFORM_MXC_MMC) += platform-mxc-mmc.o diff --git a/arch/arm/mach-imx/devices/devices-common.h b/arch/arm/mach-imx/devices/devices-common.h index 69bafc884dfa..67f7fb13050d 100644 --- a/arch/arm/mach-imx/devices/devices-common.h +++ b/arch/arm/mach-imx/devices/devices-common.h @@ -192,16 +192,6 @@ struct platform_device *__init imx_add_mx3_sdc_fb( const struct imx_ipu_core_data *data, struct mx3fb_platform_data *pdata); -#include -struct imx_mx1_camera_data { - resource_size_t iobase; - resource_size_t iosize; - resource_size_t irq; -}; -struct platform_device *__init imx_add_mx1_camera( - const struct imx_mx1_camera_data *data, - const struct mx1_camera_pdata *pdata); - #include struct imx_mx2_camera_data { const char *devid; diff --git a/arch/arm/mach-imx/devices/platform-mx1-camera.c b/arch/arm/mach-imx/devices/platform-mx1-camera.c deleted file mode 100644 index 2c6788131080..000000000000 --- a/arch/arm/mach-imx/devices/platform-mx1-camera.c +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright (C) 2010 Pengutronix - * Uwe Kleine-Koenig - * - * This program is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License version 2 as published by the - * Free Software Foundation. - */ -#include "../hardware.h" -#include "devices-common.h" - -#define imx_mx1_camera_data_entry_single(soc, _size) \ - { \ - .iobase = soc ## _CSI ## _BASE_ADDR, \ - .iosize = _size, \ - .irq = soc ## _INT_CSI, \ - } - -#ifdef CONFIG_SOC_IMX1 -const struct imx_mx1_camera_data imx1_mx1_camera_data __initconst = - imx_mx1_camera_data_entry_single(MX1, 10); -#endif /* ifdef CONFIG_SOC_IMX1 */ - -struct platform_device *__init imx_add_mx1_camera( - const struct imx_mx1_camera_data *data, - const struct mx1_camera_pdata *pdata) -{ - struct resource res[] = { - { - .start = data->iobase, - .end = data->iobase + data->iosize - 1, - .flags = IORESOURCE_MEM, - }, { - .start = data->irq, - .end = data->irq, - .flags = IORESOURCE_IRQ, - }, - }; - return imx_add_platform_device_dmamask("mx1-camera", 0, - res, ARRAY_SIZE(res), - pdata, sizeof(*pdata), DMA_BIT_MASK(32)); -} diff --git a/arch/arm/mach-imx/mx1-camera-fiq-ksym.c b/arch/arm/mach-imx/mx1-camera-fiq-ksym.c deleted file mode 100644 index fb38436ca67f..000000000000 --- a/arch/arm/mach-imx/mx1-camera-fiq-ksym.c +++ /dev/null @@ -1,18 +0,0 @@ -/* - * Exported ksyms of ARCH_MX1 - * - * Copyright (C) 2008, Darius Augulis - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include - -#include - -/* IMX camera FIQ handler */ -EXPORT_SYMBOL(mx1_camera_sof_fiq_start); -EXPORT_SYMBOL(mx1_camera_sof_fiq_end); diff --git a/arch/arm/mach-imx/mx1-camera-fiq.S b/arch/arm/mach-imx/mx1-camera-fiq.S deleted file mode 100644 index 9c69aa65bf17..000000000000 --- a/arch/arm/mach-imx/mx1-camera-fiq.S +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (C) 2008 Paulius Zaleckas - * - * Based on linux/arch/arm/lib/floppydma.S - * Copyright (C) 1995, 1996 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include -#include - - .text - .global mx1_camera_sof_fiq_end - .global mx1_camera_sof_fiq_start -mx1_camera_sof_fiq_start: - @ enable dma - ldr r12, [r9] - orr r12, r12, #0x00000001 - str r12, [r9] - @ unmask DMA interrupt - ldr r12, [r8] - bic r12, r12, r13 - str r12, [r8] - @ disable SOF interrupt - ldr r12, [r10] - bic r12, r12, #0x00010000 - str r12, [r10] - @ clear SOF flag - mov r12, #0x00010000 - str r12, [r11] - @ return from FIQ - subs pc, lr, #4 -mx1_camera_sof_fiq_end: diff --git a/include/linux/platform_data/camera-mx1.h b/include/linux/platform_data/camera-mx1.h deleted file mode 100644 index 4fd6c70314b4..000000000000 --- a/include/linux/platform_data/camera-mx1.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * mx1_camera.h - i.MX1/i.MXL camera driver header file - * - * Copyright (c) 2008, Paulius Zaleckas - * Copyright (C) 2009, Darius Augulis - * - * Based on PXA camera.h file: - * Copyright (C) 2003, Intel Corporation - * Copyright (C) 2008, Guennadi Liakhovetski - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef __ASM_ARCH_CAMERA_H_ -#define __ASM_ARCH_CAMERA_H_ - -#define MX1_CAMERA_DATA_HIGH 1 -#define MX1_CAMERA_PCLK_RISING 2 -#define MX1_CAMERA_VSYNC_HIGH 4 - -extern unsigned char mx1_camera_sof_fiq_start, mx1_camera_sof_fiq_end; - -/** - * struct mx1_camera_pdata - i.MX1/i.MXL camera platform data - * @mclk_10khz: master clock frequency in 10kHz units - * @flags: MX1 camera platform flags - */ -struct mx1_camera_pdata { - unsigned long mclk_10khz; - unsigned long flags; -}; - -#endif /* __ASM_ARCH_CAMERA_H_ */ -- cgit v1.2.3 From 3c45ddf823d679a820adddd53b52c6699c9a05ac Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 16 Jul 2014 15:38:32 -0400 Subject: svcrdma: Select NFSv4.1 backchannel transport based on forward channel The current code always selects XPRT_TRANSPORT_BC_TCP for the back channel, even when the forward channel was not TCP (eg, RDMA). When a 4.1 mount is attempted with RDMA, the server panics in the TCP BC code when trying to send CB_NULL. Instead, construct the transport protocol number from the forward channel transport or'd with XPRT_TRANSPORT_BC. Transports that do not support bi-directional RPC will not have registered a "BC" transport, causing create_backchannel_client() to fail immediately. Fixes: https://bugzilla.linux-nfs.org/show_bug.cgi?id=265 Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 3 ++- include/linux/sunrpc/svc_xprt.h | 1 + net/sunrpc/svcsock.c | 2 ++ net/sunrpc/xprt.c | 2 +- net/sunrpc/xprtrdma/svc_rdma_transport.c | 1 + 5 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index a88a93e09d69..564d72304613 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -689,7 +689,8 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c clp->cl_cb_session = ses; args.bc_xprt = conn->cb_xprt; args.prognumber = clp->cl_cb_session->se_cb_prog; - args.protocol = XPRT_TRANSPORT_BC_TCP; + args.protocol = conn->cb_xprt->xpt_class->xcl_ident | + XPRT_TRANSPORT_BC; args.authflavor = ses->se_cb_sec.flavor; } /* Create RPC client */ diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 7235040a19b2..5d9d6f84b382 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -33,6 +33,7 @@ struct svc_xprt_class { struct svc_xprt_ops *xcl_ops; struct list_head xcl_list; u32 xcl_max_payload; + int xcl_ident; }; /* diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index b507cd327d9b..b2437ee93657 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -692,6 +692,7 @@ static struct svc_xprt_class svc_udp_class = { .xcl_owner = THIS_MODULE, .xcl_ops = &svc_udp_ops, .xcl_max_payload = RPCSVC_MAXPAYLOAD_UDP, + .xcl_ident = XPRT_TRANSPORT_UDP, }; static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv) @@ -1292,6 +1293,7 @@ static struct svc_xprt_class svc_tcp_class = { .xcl_owner = THIS_MODULE, .xcl_ops = &svc_tcp_ops, .xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP, + .xcl_ident = XPRT_TRANSPORT_TCP, }; void svc_init_xprt_sock(void) diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index c3b2b3369e52..51c63165073c 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1306,7 +1306,7 @@ struct rpc_xprt *xprt_create_transport(struct xprt_create *args) } } spin_unlock(&xprt_list_lock); - printk(KERN_ERR "RPC: transport (%d) not supported\n", args->ident); + dprintk("RPC: transport (%d) not supported\n", args->ident); return ERR_PTR(-EIO); found: diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index e7323fbbd348..06a5d9235107 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -92,6 +92,7 @@ struct svc_xprt_class svc_rdma_class = { .xcl_owner = THIS_MODULE, .xcl_ops = &svc_rdma_ops, .xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP, + .xcl_ident = XPRT_TRANSPORT_RDMA, }; struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt) -- cgit v1.2.3 From 4362175dd65d1816a18ac3f14107d788d5fced27 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Mon, 14 Jul 2014 18:29:16 +0100 Subject: mfd: ab8500-debugfs: BIG clean-up When checkpatch is run on ab8500-debugfs.c it screamed blue murder! This patch fixes up all of the errors/warnings reported: WARNING: line over 80 characters + err = seq_printf(s, " [0x%02X/0x%02X]: 0x%02X\n", WARNING: Prefer [subsystem eg: netdev]_info([subsystem]dev, ... then dev_info(dev, ... then pr_info(... to printk(KERN_INFO ... + printk(KERN_INFO" [0x%02X/0x%02X]: 0x%02X\n", WARNING: Prefer seq_puts to seq_printf + seq_printf(s, AB8500_NAME_STRING " register values:\n"); WARNING: Prefer seq_puts to seq_printf + seq_printf(s, AB8500_NAME_STRING " register values:\n"); WARNING: Prefer [subsystem eg: netdev]_info([subsystem]dev, ... then dev_info(dev, ... then pr_info(... to printk(KERN_INFO ... + printk(KERN_INFO"ab8500 register values:\n"); WARNING: Prefer [subsystem eg: netdev]_info([subsystem]dev, ... then dev_info(dev, ... then pr_info(... to printk(KERN_INFO ... + printk(KERN_INFO" bank 0x%02X:\n", i); WARNING: externs should be avoided in .c files +extern int prcmu_abb_read(u8 slave, u8 reg, u8 *value, u8 size); WARNING: quoted string split across lines + pr_info("Saving all ABB registers at \"ab8500_complete_register_dump\" " + "for crash analyze.\n"); WARNING: Prefer [subsystem eg: netdev]_err([subsystem]dev, ... then dev_err(dev, ... then pr_err(... to printk(KERN_ERR ... + printk(KERN_ERR "abx500_set_reg failed %d, %d", err, __LINE__); WARNING: Prefer seq_puts to seq_printf + seq_printf(s, "name: number: number of: wake:\n"); WARNING: line over 80 characters + return single_open(file, ab8500_print_modem_registers, inode->i_private); WARNING: line over 80 characters + return single_open(file, ab8500_gpadc_btemp_ball_print, inode->i_private); WARNING: line over 80 characters + return single_open(file, ab8500_gpadc_main_bat_v_print, inode->i_private); WARNING: line over 80 characters + vbat_true_meas_convert = ab8500_gpadc_ad_to_voltage(gpadc, VBAT_TRUE_MEAS, WARNING: line over 80 characters +static int ab8540_gpadc_vbat_true_meas_and_ibat_print(struct seq_file *s, void *p) WARNING: line over 80 characters +static const struct file_operations ab8540_gpadc_vbat_true_meas_and_ibat_fops = { WARNING: line over 80 characters + vmain_l, vmain_h, btemp_l, btemp_h, vbat_l, vbat_h, ibat_l, ibat_h); WARNING: quoted string split across lines + dev_err(dev, "debugfs error input: " + "should be egal to 1, 4, 8 or 16\n"); WARNING: Missing a blank line after declarations + char *s = b; + if ((*s == '0') && ((*(s+1) == 'x') || (*(s+1) == 'X'))) { WARNING: simple_strtoul is obsolete, use kstrtoul instead + loc.mask = simple_strtoul(b, &b, 0); WARNING: simple_strtol is obsolete, use kstrtol instead + loc.shift = simple_strtol(b, &b, 0); WARNING: simple_strtoul is obsolete, use kstrtoul instead + loc.bank = simple_strtoul(b, &b, 0); WARNING: simple_strtoul is obsolete, use kstrtoul instead + loc.addr = simple_strtoul(b, &b, 0); WARNING: simple_strtoul is obsolete, use kstrtoul instead + val = simple_strtoul(b, &b, 0); WARNING: quoted string split across lines + pr_warn("HWREG request: %s, %s, addr=0x%08X, mask=0x%X, shift=%d" + "value=0x%X\n", (write) ? "write" : "read", WARNING: Prefer [subsystem eg: netdev]_err([subsystem]dev, ... then dev_err(dev, ... then pr_err(... to printk(KERN_ERR ... + printk(KERN_ERR "sysfs_create_file failed %d\n", err); WARNING: Prefer [subsystem eg: netdev]_err([subsystem]dev, ... then dev_err(dev, ... then pr_err(... to printk(KERN_ERR ... + printk(KERN_ERR "request_threaded_irq failed %d, %lu\n", ERROR: code indent should use tabs where possible + err, user_val);$ WARNING: please, no spaces at the start of a line + err, user_val);$ WARNING: Missing a blank line after declarations + struct resource *res; + debug_bank = AB8500_MISC; ERROR: space required after that ',' (ctx:VxV) + sizeof(*dev_attr)*num_irqs,GFP_KERNEL); ^ WARNING: return of an errno should typically be -ve (return -ENXIO) + return ENXIO; WARNING: line over 80 characters + file = debugfs_create_file("register-bank", (S_IRUGO | S_IWUSR | S_IWGRP), WARNING: line over 80 characters + file = debugfs_create_file("register-address", (S_IRUGO | S_IWUSR | S_IWGRP), WARNING: line over 80 characters + file = debugfs_create_file("register-value", (S_IRUGO | S_IWUSR | S_IWGRP), WARNING: line over 80 characters + file = debugfs_create_file("irq-subscribe", (S_IRUGO | S_IWUSR | S_IWGRP), WARNING: line over 80 characters + file = debugfs_create_file("irq-unsubscribe", (S_IRUGO | S_IWUSR | S_IWGRP), WARNING: line over 80 characters + file = debugfs_create_file("all-modem-registers", (S_IRUGO | S_IWUSR | S_IWGRP), WARNING: line over 80 characters + file = debugfs_create_file("main_charger_v", (S_IRUGO | S_IWUSR | S_IWGRP), WARNING: line over 80 characters + file = debugfs_create_file("main_charger_c", (S_IRUGO | S_IWUSR | S_IWGRP), WARNING: line over 80 characters + file = debugfs_create_file("usb_charger_c", (S_IRUGO | S_IWUSR | S_IWGRP), WARNING: line over 80 characters + file = debugfs_create_file("xtal_temp", (S_IRUGO | S_IWUSR | S_IWGRP), WARNING: line over 80 characters + ab8500_gpadc_dir, &plf->dev, &ab8540_gpadc_xtal_temp_fops); WARNING: line over 80 characters + file = debugfs_create_file("vbattruemeas", (S_IRUGO | S_IWUSR | S_IWGRP), WARNING: line over 80 characters + file = debugfs_create_file("otp_calib", (S_IRUGO | S_IWUSR | S_IWGRP), WARNING: line over 80 characters + ab8500_gpadc_dir, &plf->dev, &ab8540_gpadc_otp_calib_fops); total: 2 errors, 44 warnings, 3230 lines checked Signed-off-by: Lee Jones --- drivers/mfd/ab8500-debugfs.c | 288 +++++++++++++++++++++++--------------- include/linux/mfd/abx500/ab8500.h | 1 + 2 files changed, 173 insertions(+), 116 deletions(-) (limited to 'include/linux') diff --git a/drivers/mfd/ab8500-debugfs.c b/drivers/mfd/ab8500-debugfs.c index f7f271c16f2c..b2c7e3b1edfa 100644 --- a/drivers/mfd/ab8500-debugfs.c +++ b/drivers/mfd/ab8500-debugfs.c @@ -135,10 +135,10 @@ struct ab8500_prcmu_ranges { /* hwreg- "mask" and "shift" entries ressources */ struct hwreg_cfg { u32 bank; /* target bank */ - u32 addr; /* target address */ + unsigned long addr; /* target address */ uint fmt; /* format */ - uint mask; /* read/write mask, applied before any bit shift */ - int shift; /* bit shift (read:right shift, write:left shift */ + unsigned long mask; /* read/write mask, applied before any bit shift */ + long shift; /* bit shift (read:right shift, write:left shift */ }; /* fmt bit #0: 0=hexa, 1=dec */ #define REG_FMT_DEC(c) ((c)->fmt & 0x1) @@ -1304,16 +1304,17 @@ static int ab8500_registers_print(struct device *dev, u32 bank, } if (s) { - err = seq_printf(s, " [0x%02X/0x%02X]: 0x%02X\n", - bank, reg, value); + err = seq_printf(s, + " [0x%02X/0x%02X]: 0x%02X\n", + bank, reg, value); if (err < 0) { /* Error is not returned here since * the output is wanted in any case */ return 0; } } else { - printk(KERN_INFO" [0x%02X/0x%02X]: 0x%02X\n", - bank, reg, value); + dev_info(dev, " [0x%02X/0x%02X]: 0x%02X\n", + bank, reg, value); } } } @@ -1325,7 +1326,7 @@ static int ab8500_print_bank_registers(struct seq_file *s, void *p) struct device *dev = s->private; u32 bank = debug_bank; - seq_printf(s, AB8500_NAME_STRING " register values:\n"); + seq_puts(s, AB8500_NAME_STRING " register values:\n"); seq_printf(s, " bank 0x%02X:\n", bank); @@ -1351,7 +1352,7 @@ static int ab8500_print_all_banks(struct seq_file *s, void *p) struct device *dev = s->private; unsigned int i; - seq_printf(s, AB8500_NAME_STRING " register values:\n"); + seq_puts(s, AB8500_NAME_STRING " register values:\n"); for (i = 0; i < AB8500_NUM_BANKS; i++) { seq_printf(s, " bank 0x%02X:\n", i); @@ -1366,10 +1367,10 @@ void ab8500_dump_all_banks(struct device *dev) { unsigned int i; - printk(KERN_INFO"ab8500 register values:\n"); + dev_info(dev, "ab8500 register values:\n"); for (i = 1; i < AB8500_NUM_BANKS; i++) { - printk(KERN_INFO" bank 0x%02X:\n", i); + dev_info(dev, " bank 0x%02X:\n", i); ab8500_registers_print(dev, i, NULL); } } @@ -1383,8 +1384,6 @@ static struct ab8500_register_dump u8 value; } ab8500_complete_register_dump[DUMP_MAX_REGS]; -extern int prcmu_abb_read(u8 slave, u8 reg, u8 *value, u8 size); - /* This shall only be called upon kernel panic! */ void ab8500_dump_all_banks_to_mem(void) { @@ -1392,8 +1391,7 @@ void ab8500_dump_all_banks_to_mem(void) u8 bank; int err = 0; - pr_info("Saving all ABB registers at \"ab8500_complete_register_dump\" " - "for crash analyze.\n"); + pr_info("Saving all ABB registers for crash analysis.\n"); for (bank = 0; bank < AB8500_NUM_BANKS; bank++) { for (i = 0; i < debug_ranges[bank].num_ranges; i++) { @@ -1563,7 +1561,7 @@ static ssize_t ab8500_val_write(struct file *file, err = abx500_set_register_interruptible(dev, (u8)debug_bank, debug_address, (u8)user_val); if (err < 0) { - printk(KERN_ERR "abx500_set_reg failed %d, %d", err, __LINE__); + pr_err("abx500_set_reg failed %d, %d", err, __LINE__); return -EINVAL; } @@ -1595,7 +1593,7 @@ static int ab8500_interrupts_print(struct seq_file *s, void *p) { int line; - seq_printf(s, "name: number: number of: wake:\n"); + seq_puts(s, "name: number: number of: wake:\n"); for (line = 0; line < num_interrupt_lines; line++) { struct irq_desc *desc = irq_to_desc(line + irq_first); @@ -1721,7 +1719,8 @@ static int ab8500_print_modem_registers(struct seq_file *s, void *p) static int ab8500_modem_open(struct inode *inode, struct file *file) { - return single_open(file, ab8500_print_modem_registers, inode->i_private); + return single_open(file, ab8500_print_modem_registers, + inode->i_private); } static const struct file_operations ab8500_modem_fops = { @@ -1750,7 +1749,8 @@ static int ab8500_gpadc_bat_ctrl_print(struct seq_file *s, void *p) static int ab8500_gpadc_bat_ctrl_open(struct inode *inode, struct file *file) { - return single_open(file, ab8500_gpadc_bat_ctrl_print, inode->i_private); + return single_open(file, ab8500_gpadc_bat_ctrl_print, + inode->i_private); } static const struct file_operations ab8500_gpadc_bat_ctrl_fops = { @@ -1780,7 +1780,8 @@ static int ab8500_gpadc_btemp_ball_print(struct seq_file *s, void *p) static int ab8500_gpadc_btemp_ball_open(struct inode *inode, struct file *file) { - return single_open(file, ab8500_gpadc_btemp_ball_print, inode->i_private); + return single_open(file, ab8500_gpadc_btemp_ball_print, + inode->i_private); } static const struct file_operations ab8500_gpadc_btemp_ball_fops = { @@ -1961,7 +1962,8 @@ static int ab8500_gpadc_main_bat_v_print(struct seq_file *s, void *p) static int ab8500_gpadc_main_bat_v_open(struct inode *inode, struct file *file) { - return single_open(file, ab8500_gpadc_main_bat_v_print, inode->i_private); + return single_open(file, ab8500_gpadc_main_bat_v_print, + inode->i_private); } static const struct file_operations ab8500_gpadc_main_bat_v_fops = { @@ -2081,7 +2083,8 @@ static int ab8500_gpadc_bk_bat_v_print(struct seq_file *s, void *p) static int ab8500_gpadc_bk_bat_v_open(struct inode *inode, struct file *file) { - return single_open(file, ab8500_gpadc_bk_bat_v_print, inode->i_private); + return single_open(file, ab8500_gpadc_bk_bat_v_print, + inode->i_private); } static const struct file_operations ab8500_gpadc_bk_bat_v_fops = { @@ -2110,7 +2113,8 @@ static int ab8500_gpadc_die_temp_print(struct seq_file *s, void *p) static int ab8500_gpadc_die_temp_open(struct inode *inode, struct file *file) { - return single_open(file, ab8500_gpadc_die_temp_print, inode->i_private); + return single_open(file, ab8500_gpadc_die_temp_print, + inode->i_private); } static const struct file_operations ab8500_gpadc_die_temp_fops = { @@ -2189,8 +2193,9 @@ static int ab8540_gpadc_vbat_true_meas_print(struct seq_file *s, void *p) gpadc = ab8500_gpadc_get("ab8500-gpadc.0"); vbat_true_meas_raw = ab8500_gpadc_read_raw(gpadc, VBAT_TRUE_MEAS, avg_sample, trig_edge, trig_timer, conv_type); - vbat_true_meas_convert = ab8500_gpadc_ad_to_voltage(gpadc, VBAT_TRUE_MEAS, - vbat_true_meas_raw); + vbat_true_meas_convert = + ab8500_gpadc_ad_to_voltage(gpadc, VBAT_TRUE_MEAS, + vbat_true_meas_raw); return seq_printf(s, "%d,0x%X\n", vbat_true_meas_convert, vbat_true_meas_raw); @@ -2284,7 +2289,8 @@ static const struct file_operations ab8540_gpadc_vbat_meas_and_ibat_fops = { .owner = THIS_MODULE, }; -static int ab8540_gpadc_vbat_true_meas_and_ibat_print(struct seq_file *s, void *p) +static int ab8540_gpadc_vbat_true_meas_and_ibat_print(struct seq_file *s, + void *p) { int vbat_true_meas_raw; int vbat_true_meas_convert; @@ -2313,7 +2319,8 @@ static int ab8540_gpadc_vbat_true_meas_and_ibat_open(struct inode *inode, inode->i_private); } -static const struct file_operations ab8540_gpadc_vbat_true_meas_and_ibat_fops = { +static const struct file_operations +ab8540_gpadc_vbat_true_meas_and_ibat_fops = { .open = ab8540_gpadc_vbat_true_meas_and_ibat_open, .read = seq_read, .llseek = seq_lseek, @@ -2367,14 +2374,15 @@ static int ab8540_gpadc_otp_cal_print(struct seq_file *s, void *p) ab8540_gpadc_get_otp(gpadc, &vmain_l, &vmain_h, &btemp_l, &btemp_h, &vbat_l, &vbat_h, &ibat_l, &ibat_h); return seq_printf(s, "VMAIN_L:0x%X\n" - "VMAIN_H:0x%X\n" - "BTEMP_L:0x%X\n" - "BTEMP_H:0x%X\n" - "VBAT_L:0x%X\n" - "VBAT_H:0x%X\n" - "IBAT_L:0x%X\n" - "IBAT_H:0x%X\n", - vmain_l, vmain_h, btemp_l, btemp_h, vbat_l, vbat_h, ibat_l, ibat_h); + "VMAIN_H:0x%X\n" + "BTEMP_L:0x%X\n" + "BTEMP_H:0x%X\n" + "VBAT_L:0x%X\n" + "VBAT_H:0x%X\n" + "IBAT_L:0x%X\n" + "IBAT_H:0x%X\n", + vmain_l, vmain_h, btemp_l, btemp_h, + vbat_l, vbat_h, ibat_l, ibat_h); } static int ab8540_gpadc_otp_cal_open(struct inode *inode, struct file *file) @@ -2418,8 +2426,8 @@ static ssize_t ab8500_gpadc_avg_sample_write(struct file *file, || (user_avg_sample == SAMPLE_16)) { avg_sample = (u8) user_avg_sample; } else { - dev_err(dev, "debugfs error input: " - "should be egal to 1, 4, 8 or 16\n"); + dev_err(dev, + "debugfs err input: should be egal to 1, 4, 8 or 16\n"); return -EINVAL; } @@ -2578,6 +2586,7 @@ static const struct file_operations ab8500_gpadc_conv_type_fops = { static int strval_len(char *b) { char *s = b; + if ((*s == '0') && ((*(s+1) == 'x') || (*(s+1) == 'X'))) { s += 2; for (; *s && (*s != ' ') && (*s != '\n'); s++) { @@ -2642,13 +2651,17 @@ static ssize_t hwreg_common_write(char *b, struct hwreg_cfg *cfg, b += (*(b+2) == ' ') ? 3 : 6; if (strval_len(b) == 0) return -EINVAL; - loc.mask = simple_strtoul(b, &b, 0); + ret = kstrtoul(b, 0, &loc.mask); + if (ret) + return ret; } else if ((!strncmp(b, "-s ", 3)) || (!strncmp(b, "-shift ", 7))) { b += (*(b+2) == ' ') ? 3 : 7; if (strval_len(b) == 0) return -EINVAL; - loc.shift = simple_strtol(b, &b, 0); + ret = kstrtol(b, 0, &loc.shift); + if (ret) + return ret; } else { return -EINVAL; } @@ -2656,29 +2669,36 @@ static ssize_t hwreg_common_write(char *b, struct hwreg_cfg *cfg, /* get arg BANK and ADDRESS */ if (strval_len(b) == 0) return -EINVAL; - loc.bank = simple_strtoul(b, &b, 0); + ret = kstrtouint(b, 0, &loc.bank); + if (ret) + return ret; while (*b == ' ') b++; if (strval_len(b) == 0) return -EINVAL; - loc.addr = simple_strtoul(b, &b, 0); + ret = kstrtoul(b, 0, &loc.addr); + if (ret) + return ret; if (write) { while (*b == ' ') b++; if (strval_len(b) == 0) return -EINVAL; - val = simple_strtoul(b, &b, 0); + ret = kstrtouint(b, 0, &val); + if (ret) + return ret; } /* args are ok, update target cfg (mainly for read) */ *cfg = loc; #ifdef ABB_HWREG_DEBUG - pr_warn("HWREG request: %s, %s, addr=0x%08X, mask=0x%X, shift=%d" - "value=0x%X\n", (write) ? "write" : "read", - REG_FMT_DEC(cfg) ? "decimal" : "hexa", - cfg->addr, cfg->mask, cfg->shift, val); + pr_warn("HWREG request: %s, %s,\n" + " addr=0x%08X, mask=0x%X, shift=%d" "value=0x%X\n", + (write) ? "write" : "read", + REG_FMT_DEC(cfg) ? "decimal" : "hexa", + cfg->addr, cfg->mask, cfg->shift, val); #endif if (!write) @@ -2814,7 +2834,7 @@ static ssize_t ab8500_subscribe_write(struct file *file, dev_attr[irq_index]->attr.mode = S_IRUGO; err = sysfs_create_file(&dev->kobj, &dev_attr[irq_index]->attr); if (err < 0) { - printk(KERN_ERR "sysfs_create_file failed %d\n", err); + pr_info("sysfs_create_file failed %d\n", err); return err; } @@ -2822,8 +2842,8 @@ static ssize_t ab8500_subscribe_write(struct file *file, IRQF_SHARED | IRQF_NO_SUSPEND, "ab8500-debug", &dev->kobj); if (err < 0) { - printk(KERN_ERR "request_threaded_irq failed %d, %lu\n", - err, user_val); + pr_info("request_threaded_irq failed %d, %lu\n", + err, user_val); sysfs_remove_file(&dev->kobj, &dev_attr[irq_index]->attr); return err; } @@ -2945,6 +2965,7 @@ static int ab8500_debug_probe(struct platform_device *plf) struct dentry *file; struct ab8500 *ab8500; struct resource *res; + debug_bank = AB8500_MISC; debug_address = AB8500_REV_REG & 0x00FF; @@ -2957,7 +2978,7 @@ static int ab8500_debug_probe(struct platform_device *plf) return -ENOMEM; dev_attr = devm_kzalloc(&plf->dev, - sizeof(*dev_attr)*num_irqs,GFP_KERNEL); + sizeof(*dev_attr)*num_irqs, GFP_KERNEL); if (!dev_attr) return -ENOMEM; @@ -2968,23 +2989,20 @@ static int ab8500_debug_probe(struct platform_device *plf) res = platform_get_resource_byname(plf, 0, "IRQ_AB8500"); if (!res) { - dev_err(&plf->dev, "AB8500 irq not found, err %d\n", - irq_first); - return ENXIO; + dev_err(&plf->dev, "AB8500 irq not found, err %d\n", irq_first); + return -ENXIO; } irq_ab8500 = res->start; irq_first = platform_get_irq_byname(plf, "IRQ_FIRST"); if (irq_first < 0) { - dev_err(&plf->dev, "First irq not found, err %d\n", - irq_first); + dev_err(&plf->dev, "First irq not found, err %d\n", irq_first); return irq_first; } irq_last = platform_get_irq_byname(plf, "IRQ_LAST"); if (irq_last < 0) { - dev_err(&plf->dev, "Last irq not found, err %d\n", - irq_last); + dev_err(&plf->dev, "Last irq not found, err %d\n", irq_last); return irq_last; } @@ -2993,37 +3011,41 @@ static int ab8500_debug_probe(struct platform_device *plf) goto err; ab8500_gpadc_dir = debugfs_create_dir(AB8500_ADC_NAME_STRING, - ab8500_dir); + ab8500_dir); if (!ab8500_gpadc_dir) goto err; - file = debugfs_create_file("all-bank-registers", S_IRUGO, - ab8500_dir, &plf->dev, &ab8500_registers_fops); + file = debugfs_create_file("all-bank-registers", S_IRUGO, ab8500_dir, + &plf->dev, &ab8500_registers_fops); if (!file) goto err; - file = debugfs_create_file("all-banks", S_IRUGO, - ab8500_dir, &plf->dev, &ab8500_all_banks_fops); + file = debugfs_create_file("all-banks", S_IRUGO, ab8500_dir, + &plf->dev, &ab8500_all_banks_fops); if (!file) goto err; - file = debugfs_create_file("register-bank", (S_IRUGO | S_IWUSR | S_IWGRP), - ab8500_dir, &plf->dev, &ab8500_bank_fops); + file = debugfs_create_file("register-bank", + (S_IRUGO | S_IWUSR | S_IWGRP), + ab8500_dir, &plf->dev, &ab8500_bank_fops); if (!file) goto err; - file = debugfs_create_file("register-address", (S_IRUGO | S_IWUSR | S_IWGRP), - ab8500_dir, &plf->dev, &ab8500_address_fops); + file = debugfs_create_file("register-address", + (S_IRUGO | S_IWUSR | S_IWGRP), + ab8500_dir, &plf->dev, &ab8500_address_fops); if (!file) goto err; - file = debugfs_create_file("register-value", (S_IRUGO | S_IWUSR | S_IWGRP), - ab8500_dir, &plf->dev, &ab8500_val_fops); + file = debugfs_create_file("register-value", + (S_IRUGO | S_IWUSR | S_IWGRP), + ab8500_dir, &plf->dev, &ab8500_val_fops); if (!file) goto err; - file = debugfs_create_file("irq-subscribe", (S_IRUGO | S_IWUSR | S_IWGRP), - ab8500_dir, &plf->dev, &ab8500_subscribe_fops); + file = debugfs_create_file("irq-subscribe", + (S_IRUGO | S_IWUSR | S_IWGRP), ab8500_dir, + &plf->dev, &ab8500_subscribe_fops); if (!file) goto err; @@ -3041,150 +3063,184 @@ static int ab8500_debug_probe(struct platform_device *plf) num_interrupt_lines = AB8540_NR_IRQS; } - file = debugfs_create_file("interrupts", (S_IRUGO), - ab8500_dir, &plf->dev, &ab8500_interrupts_fops); + file = debugfs_create_file("interrupts", (S_IRUGO), ab8500_dir, + &plf->dev, &ab8500_interrupts_fops); if (!file) goto err; - file = debugfs_create_file("irq-unsubscribe", (S_IRUGO | S_IWUSR | S_IWGRP), - ab8500_dir, &plf->dev, &ab8500_unsubscribe_fops); + file = debugfs_create_file("irq-unsubscribe", + (S_IRUGO | S_IWUSR | S_IWGRP), ab8500_dir, + &plf->dev, &ab8500_unsubscribe_fops); if (!file) goto err; file = debugfs_create_file("hwreg", (S_IRUGO | S_IWUSR | S_IWGRP), - ab8500_dir, &plf->dev, &ab8500_hwreg_fops); + ab8500_dir, &plf->dev, &ab8500_hwreg_fops); if (!file) goto err; - file = debugfs_create_file("all-modem-registers", (S_IRUGO | S_IWUSR | S_IWGRP), - ab8500_dir, &plf->dev, &ab8500_modem_fops); + file = debugfs_create_file("all-modem-registers", + (S_IRUGO | S_IWUSR | S_IWGRP), + ab8500_dir, &plf->dev, &ab8500_modem_fops); if (!file) goto err; file = debugfs_create_file("bat_ctrl", (S_IRUGO | S_IWUSR | S_IWGRP), - ab8500_gpadc_dir, &plf->dev, &ab8500_gpadc_bat_ctrl_fops); + ab8500_gpadc_dir, &plf->dev, + &ab8500_gpadc_bat_ctrl_fops); if (!file) goto err; file = debugfs_create_file("btemp_ball", (S_IRUGO | S_IWUSR | S_IWGRP), - ab8500_gpadc_dir, &plf->dev, &ab8500_gpadc_btemp_ball_fops); + ab8500_gpadc_dir, + &plf->dev, &ab8500_gpadc_btemp_ball_fops); if (!file) goto err; - file = debugfs_create_file("main_charger_v", (S_IRUGO | S_IWUSR | S_IWGRP), - ab8500_gpadc_dir, &plf->dev, &ab8500_gpadc_main_charger_v_fops); + file = debugfs_create_file("main_charger_v", + (S_IRUGO | S_IWUSR | S_IWGRP), + ab8500_gpadc_dir, &plf->dev, + &ab8500_gpadc_main_charger_v_fops); if (!file) goto err; - file = debugfs_create_file("acc_detect1", (S_IRUGO | S_IWUSR | S_IWGRP), - ab8500_gpadc_dir, &plf->dev, &ab8500_gpadc_acc_detect1_fops); + file = debugfs_create_file("acc_detect1", + (S_IRUGO | S_IWUSR | S_IWGRP), + ab8500_gpadc_dir, &plf->dev, + &ab8500_gpadc_acc_detect1_fops); if (!file) goto err; - file = debugfs_create_file("acc_detect2", (S_IRUGO | S_IWUSR | S_IWGRP), - ab8500_gpadc_dir, &plf->dev, &ab8500_gpadc_acc_detect2_fops); + file = debugfs_create_file("acc_detect2", + (S_IRUGO | S_IWUSR | S_IWGRP), + ab8500_gpadc_dir, &plf->dev, + &ab8500_gpadc_acc_detect2_fops); if (!file) goto err; file = debugfs_create_file("adc_aux1", (S_IRUGO | S_IWUSR | S_IWGRP), - ab8500_gpadc_dir, &plf->dev, &ab8500_gpadc_aux1_fops); + ab8500_gpadc_dir, &plf->dev, + &ab8500_gpadc_aux1_fops); if (!file) goto err; file = debugfs_create_file("adc_aux2", (S_IRUGO | S_IWUSR | S_IWGRP), - ab8500_gpadc_dir, &plf->dev, &ab8500_gpadc_aux2_fops); + ab8500_gpadc_dir, &plf->dev, + &ab8500_gpadc_aux2_fops); if (!file) goto err; file = debugfs_create_file("main_bat_v", (S_IRUGO | S_IWUSR | S_IWGRP), - ab8500_gpadc_dir, &plf->dev, &ab8500_gpadc_main_bat_v_fops); + ab8500_gpadc_dir, &plf->dev, + &ab8500_gpadc_main_bat_v_fops); if (!file) goto err; file = debugfs_create_file("vbus_v", (S_IRUGO | S_IWUSR | S_IWGRP), - ab8500_gpadc_dir, &plf->dev, &ab8500_gpadc_vbus_v_fops); + ab8500_gpadc_dir, &plf->dev, + &ab8500_gpadc_vbus_v_fops); if (!file) goto err; - file = debugfs_create_file("main_charger_c", (S_IRUGO | S_IWUSR | S_IWGRP), - ab8500_gpadc_dir, &plf->dev, &ab8500_gpadc_main_charger_c_fops); + file = debugfs_create_file("main_charger_c", + (S_IRUGO | S_IWUSR | S_IWGRP), + ab8500_gpadc_dir, &plf->dev, + &ab8500_gpadc_main_charger_c_fops); if (!file) goto err; - file = debugfs_create_file("usb_charger_c", (S_IRUGO | S_IWUSR | S_IWGRP), - ab8500_gpadc_dir, &plf->dev, &ab8500_gpadc_usb_charger_c_fops); + file = debugfs_create_file("usb_charger_c", + (S_IRUGO | S_IWUSR | S_IWGRP), + ab8500_gpadc_dir, + &plf->dev, &ab8500_gpadc_usb_charger_c_fops); if (!file) goto err; file = debugfs_create_file("bk_bat_v", (S_IRUGO | S_IWUSR | S_IWGRP), - ab8500_gpadc_dir, &plf->dev, &ab8500_gpadc_bk_bat_v_fops); + ab8500_gpadc_dir, &plf->dev, + &ab8500_gpadc_bk_bat_v_fops); if (!file) goto err; file = debugfs_create_file("die_temp", (S_IRUGO | S_IWUSR | S_IWGRP), - ab8500_gpadc_dir, &plf->dev, &ab8500_gpadc_die_temp_fops); + ab8500_gpadc_dir, &plf->dev, + &ab8500_gpadc_die_temp_fops); if (!file) goto err; file = debugfs_create_file("usb_id", (S_IRUGO | S_IWUSR | S_IWGRP), - ab8500_gpadc_dir, &plf->dev, &ab8500_gpadc_usb_id_fops); + ab8500_gpadc_dir, &plf->dev, + &ab8500_gpadc_usb_id_fops); if (!file) goto err; if (is_ab8540(ab8500)) { - file = debugfs_create_file("xtal_temp", (S_IRUGO | S_IWUSR | S_IWGRP), - ab8500_gpadc_dir, &plf->dev, &ab8540_gpadc_xtal_temp_fops); + file = debugfs_create_file("xtal_temp", + (S_IRUGO | S_IWUSR | S_IWGRP), + ab8500_gpadc_dir, &plf->dev, + &ab8540_gpadc_xtal_temp_fops); if (!file) goto err; - file = debugfs_create_file("vbattruemeas", (S_IRUGO | S_IWUSR | S_IWGRP), - ab8500_gpadc_dir, &plf->dev, - &ab8540_gpadc_vbat_true_meas_fops); + file = debugfs_create_file("vbattruemeas", + (S_IRUGO | S_IWUSR | S_IWGRP), + ab8500_gpadc_dir, &plf->dev, + &ab8540_gpadc_vbat_true_meas_fops); if (!file) goto err; file = debugfs_create_file("batctrl_and_ibat", - (S_IRUGO | S_IWUGO), ab8500_gpadc_dir, - &plf->dev, &ab8540_gpadc_bat_ctrl_and_ibat_fops); + (S_IRUGO | S_IWUGO), + ab8500_gpadc_dir, + &plf->dev, + &ab8540_gpadc_bat_ctrl_and_ibat_fops); if (!file) goto err; file = debugfs_create_file("vbatmeas_and_ibat", - (S_IRUGO | S_IWUGO), ab8500_gpadc_dir, - &plf->dev, - &ab8540_gpadc_vbat_meas_and_ibat_fops); + (S_IRUGO | S_IWUGO), + ab8500_gpadc_dir, &plf->dev, + &ab8540_gpadc_vbat_meas_and_ibat_fops); if (!file) goto err; file = debugfs_create_file("vbattruemeas_and_ibat", - (S_IRUGO | S_IWUGO), ab8500_gpadc_dir, - &plf->dev, - &ab8540_gpadc_vbat_true_meas_and_ibat_fops); + (S_IRUGO | S_IWUGO), + ab8500_gpadc_dir, + &plf->dev, + &ab8540_gpadc_vbat_true_meas_and_ibat_fops); if (!file) goto err; file = debugfs_create_file("battemp_and_ibat", - (S_IRUGO | S_IWUGO), ab8500_gpadc_dir, + (S_IRUGO | S_IWUGO), + ab8500_gpadc_dir, &plf->dev, &ab8540_gpadc_bat_temp_and_ibat_fops); if (!file) goto err; - file = debugfs_create_file("otp_calib", (S_IRUGO | S_IWUSR | S_IWGRP), - ab8500_gpadc_dir, &plf->dev, &ab8540_gpadc_otp_calib_fops); + file = debugfs_create_file("otp_calib", + (S_IRUGO | S_IWUSR | S_IWGRP), + ab8500_gpadc_dir, + &plf->dev, &ab8540_gpadc_otp_calib_fops); if (!file) goto err; } file = debugfs_create_file("avg_sample", (S_IRUGO | S_IWUSR | S_IWGRP), - ab8500_gpadc_dir, &plf->dev, &ab8500_gpadc_avg_sample_fops); + ab8500_gpadc_dir, &plf->dev, + &ab8500_gpadc_avg_sample_fops); if (!file) goto err; file = debugfs_create_file("trig_edge", (S_IRUGO | S_IWUSR | S_IWGRP), - ab8500_gpadc_dir, &plf->dev, &ab8500_gpadc_trig_edge_fops); + ab8500_gpadc_dir, &plf->dev, + &ab8500_gpadc_trig_edge_fops); if (!file) goto err; file = debugfs_create_file("trig_timer", (S_IRUGO | S_IWUSR | S_IWGRP), - ab8500_gpadc_dir, &plf->dev, &ab8500_gpadc_trig_timer_fops); + ab8500_gpadc_dir, &plf->dev, + &ab8500_gpadc_trig_timer_fops); if (!file) goto err; file = debugfs_create_file("conv_type", (S_IRUGO | S_IWUSR | S_IWGRP), - ab8500_gpadc_dir, &plf->dev, &ab8500_gpadc_conv_type_fops); + ab8500_gpadc_dir, &plf->dev, + &ab8500_gpadc_conv_type_fops); if (!file) goto err; diff --git a/include/linux/mfd/abx500/ab8500.h b/include/linux/mfd/abx500/ab8500.h index 4e7fe7417fc9..9475fee2bfc5 100644 --- a/include/linux/mfd/abx500/ab8500.h +++ b/include/linux/mfd/abx500/ab8500.h @@ -505,6 +505,7 @@ static inline int is_ab9540_2p0_or_earlier(struct ab8500 *ab) void ab8500_override_turn_on_stat(u8 mask, u8 set); #ifdef CONFIG_AB8500_DEBUG +extern int prcmu_abb_read(u8 slave, u8 reg, u8 *value, u8 size); void ab8500_dump_all_banks(struct device *dev); void ab8500_debug_register_interrupt(int line); #else -- cgit v1.2.3 From 6f1c1e71d933f58a6248f1681aededdd407f32a8 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Fri, 4 Jul 2014 22:24:04 +0200 Subject: mfd: max77686: Convert to use regmap_irq By using the generic IRQ support in the Register map API, it is possible to get rid max77686-irq.c and simplify the code. Suggested-by: Krzysztof Kozlowski Signed-off-by: Javier Martinez Canillas Reviewed-by: Doug Anderson Tested-by: Doug Anderson Signed-off-by: Lee Jones --- drivers/mfd/Kconfig | 1 + drivers/mfd/Makefile | 2 +- drivers/mfd/max77686-irq.c | 319 ----------------------------------- drivers/mfd/max77686.c | 97 ++++++++++- drivers/rtc/rtc-max77686.c | 27 +-- include/linux/mfd/max77686-private.h | 31 +++- include/linux/mfd/max77686.h | 2 - 7 files changed, 123 insertions(+), 356 deletions(-) delete mode 100644 drivers/mfd/max77686-irq.c (limited to 'include/linux') diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index b8d9ca0b68e2..30102042dcaf 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -384,6 +384,7 @@ config MFD_MAX77686 depends on I2C=y select MFD_CORE select REGMAP_I2C + select REGMAP_IRQ select IRQ_DOMAIN help Say yes here to add support for Maxim Semiconductor MAX77686. diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile index 4e2bc255b8b0..f00148782d9b 100644 --- a/drivers/mfd/Makefile +++ b/drivers/mfd/Makefile @@ -115,7 +115,7 @@ da9063-objs := da9063-core.o da9063-irq.o da9063-i2c.o obj-$(CONFIG_MFD_DA9063) += da9063.o obj-$(CONFIG_MFD_MAX14577) += max14577.o -obj-$(CONFIG_MFD_MAX77686) += max77686.o max77686-irq.o +obj-$(CONFIG_MFD_MAX77686) += max77686.o obj-$(CONFIG_MFD_MAX77693) += max77693.o obj-$(CONFIG_MFD_MAX8907) += max8907.o max8925-objs := max8925-core.o max8925-i2c.o diff --git a/drivers/mfd/max77686-irq.c b/drivers/mfd/max77686-irq.c deleted file mode 100644 index cdc3280e2ec7..000000000000 --- a/drivers/mfd/max77686-irq.c +++ /dev/null @@ -1,319 +0,0 @@ -/* - * max77686-irq.c - Interrupt controller support for MAX77686 - * - * Copyright (C) 2012 Samsung Electronics Co.Ltd - * Chiwoong Byun - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * This driver is based on max8997-irq.c - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -enum { - MAX77686_DEBUG_IRQ_INFO = 1 << 0, - MAX77686_DEBUG_IRQ_MASK = 1 << 1, - MAX77686_DEBUG_IRQ_INT = 1 << 2, -}; - -static int debug_mask = 0; -module_param(debug_mask, int, 0); -MODULE_PARM_DESC(debug_mask, "Set debug_mask : 0x0=off 0x1=IRQ_INFO 0x2=IRQ_MASK 0x4=IRQ_INI)"); - -static const u8 max77686_mask_reg[] = { - [PMIC_INT1] = MAX77686_REG_INT1MSK, - [PMIC_INT2] = MAX77686_REG_INT2MSK, - [RTC_INT] = MAX77686_RTC_INTM, -}; - -static struct regmap *max77686_get_regmap(struct max77686_dev *max77686, - enum max77686_irq_source src) -{ - switch (src) { - case PMIC_INT1 ... PMIC_INT2: - return max77686->regmap; - case RTC_INT: - return max77686->rtc_regmap; - default: - return ERR_PTR(-EINVAL); - } -} - -struct max77686_irq_data { - int mask; - enum max77686_irq_source group; -}; - -#define DECLARE_IRQ(idx, _group, _mask) \ - [(idx)] = { .group = (_group), .mask = (_mask) } -static const struct max77686_irq_data max77686_irqs[] = { - DECLARE_IRQ(MAX77686_PMICIRQ_PWRONF, PMIC_INT1, 1 << 0), - DECLARE_IRQ(MAX77686_PMICIRQ_PWRONR, PMIC_INT1, 1 << 1), - DECLARE_IRQ(MAX77686_PMICIRQ_JIGONBF, PMIC_INT1, 1 << 2), - DECLARE_IRQ(MAX77686_PMICIRQ_JIGONBR, PMIC_INT1, 1 << 3), - DECLARE_IRQ(MAX77686_PMICIRQ_ACOKBF, PMIC_INT1, 1 << 4), - DECLARE_IRQ(MAX77686_PMICIRQ_ACOKBR, PMIC_INT1, 1 << 5), - DECLARE_IRQ(MAX77686_PMICIRQ_ONKEY1S, PMIC_INT1, 1 << 6), - DECLARE_IRQ(MAX77686_PMICIRQ_MRSTB, PMIC_INT1, 1 << 7), - DECLARE_IRQ(MAX77686_PMICIRQ_140C, PMIC_INT2, 1 << 0), - DECLARE_IRQ(MAX77686_PMICIRQ_120C, PMIC_INT2, 1 << 1), - DECLARE_IRQ(MAX77686_RTCIRQ_RTC60S, RTC_INT, 1 << 0), - DECLARE_IRQ(MAX77686_RTCIRQ_RTCA1, RTC_INT, 1 << 1), - DECLARE_IRQ(MAX77686_RTCIRQ_RTCA2, RTC_INT, 1 << 2), - DECLARE_IRQ(MAX77686_RTCIRQ_SMPL, RTC_INT, 1 << 3), - DECLARE_IRQ(MAX77686_RTCIRQ_RTC1S, RTC_INT, 1 << 4), - DECLARE_IRQ(MAX77686_RTCIRQ_WTSR, RTC_INT, 1 << 5), -}; - -static void max77686_irq_lock(struct irq_data *data) -{ - struct max77686_dev *max77686 = irq_get_chip_data(data->irq); - - if (debug_mask & MAX77686_DEBUG_IRQ_MASK) - pr_info("%s\n", __func__); - - mutex_lock(&max77686->irqlock); -} - -static void max77686_irq_sync_unlock(struct irq_data *data) -{ - struct max77686_dev *max77686 = irq_get_chip_data(data->irq); - int i; - - for (i = 0; i < MAX77686_IRQ_GROUP_NR; i++) { - u8 mask_reg = max77686_mask_reg[i]; - struct regmap *map = max77686_get_regmap(max77686, i); - - if (debug_mask & MAX77686_DEBUG_IRQ_MASK) - pr_debug("%s: mask_reg[%d]=0x%x, cur=0x%x\n", - __func__, i, mask_reg, max77686->irq_masks_cur[i]); - - if (mask_reg == MAX77686_REG_INVALID || - IS_ERR_OR_NULL(map)) - continue; - - max77686->irq_masks_cache[i] = max77686->irq_masks_cur[i]; - - regmap_write(map, max77686_mask_reg[i], - max77686->irq_masks_cur[i]); - } - - mutex_unlock(&max77686->irqlock); -} - -static const inline struct max77686_irq_data *to_max77686_irq(int irq) -{ - struct irq_data *data = irq_get_irq_data(irq); - return &max77686_irqs[data->hwirq]; -} - -static void max77686_irq_mask(struct irq_data *data) -{ - struct max77686_dev *max77686 = irq_get_chip_data(data->irq); - const struct max77686_irq_data *irq_data = to_max77686_irq(data->irq); - - max77686->irq_masks_cur[irq_data->group] |= irq_data->mask; - - if (debug_mask & MAX77686_DEBUG_IRQ_MASK) - pr_info("%s: group=%d, cur=0x%x\n", - __func__, irq_data->group, - max77686->irq_masks_cur[irq_data->group]); -} - -static void max77686_irq_unmask(struct irq_data *data) -{ - struct max77686_dev *max77686 = irq_get_chip_data(data->irq); - const struct max77686_irq_data *irq_data = to_max77686_irq(data->irq); - - max77686->irq_masks_cur[irq_data->group] &= ~irq_data->mask; - - if (debug_mask & MAX77686_DEBUG_IRQ_MASK) - pr_info("%s: group=%d, cur=0x%x\n", - __func__, irq_data->group, - max77686->irq_masks_cur[irq_data->group]); -} - -static struct irq_chip max77686_irq_chip = { - .name = "max77686", - .irq_bus_lock = max77686_irq_lock, - .irq_bus_sync_unlock = max77686_irq_sync_unlock, - .irq_mask = max77686_irq_mask, - .irq_unmask = max77686_irq_unmask, -}; - -static irqreturn_t max77686_irq_thread(int irq, void *data) -{ - struct max77686_dev *max77686 = data; - unsigned int irq_reg[MAX77686_IRQ_GROUP_NR] = {}; - unsigned int irq_src; - int ret; - int i, cur_irq; - - ret = regmap_read(max77686->regmap, MAX77686_REG_INTSRC, &irq_src); - if (ret < 0) { - dev_err(max77686->dev, "Failed to read interrupt source: %d\n", - ret); - return IRQ_NONE; - } - - if (debug_mask & MAX77686_DEBUG_IRQ_INT) - pr_info("%s: irq_src=0x%x\n", __func__, irq_src); - - if (irq_src == MAX77686_IRQSRC_PMIC) { - ret = regmap_bulk_read(max77686->regmap, - MAX77686_REG_INT1, irq_reg, 2); - if (ret < 0) { - dev_err(max77686->dev, "Failed to read interrupt source: %d\n", - ret); - return IRQ_NONE; - } - - if (debug_mask & MAX77686_DEBUG_IRQ_INT) - pr_info("%s: int1=0x%x, int2=0x%x\n", __func__, - irq_reg[PMIC_INT1], irq_reg[PMIC_INT2]); - } - - if (irq_src & MAX77686_IRQSRC_RTC) { - ret = regmap_read(max77686->rtc_regmap, - MAX77686_RTC_INT, &irq_reg[RTC_INT]); - if (ret < 0) { - dev_err(max77686->dev, "Failed to read interrupt source: %d\n", - ret); - return IRQ_NONE; - } - - if (debug_mask & MAX77686_DEBUG_IRQ_INT) - pr_info("%s: rtc int=0x%x\n", __func__, - irq_reg[RTC_INT]); - - } - - for (i = 0; i < MAX77686_IRQ_GROUP_NR; i++) - irq_reg[i] &= ~max77686->irq_masks_cur[i]; - - for (i = 0; i < MAX77686_IRQ_NR; i++) { - if (irq_reg[max77686_irqs[i].group] & max77686_irqs[i].mask) { - cur_irq = irq_find_mapping(max77686->irq_domain, i); - if (cur_irq) - handle_nested_irq(cur_irq); - } - } - - return IRQ_HANDLED; -} - -static int max77686_irq_domain_map(struct irq_domain *d, unsigned int irq, - irq_hw_number_t hw) -{ - struct max77686_dev *max77686 = d->host_data; - - irq_set_chip_data(irq, max77686); - irq_set_chip_and_handler(irq, &max77686_irq_chip, handle_edge_irq); - irq_set_nested_thread(irq, 1); -#ifdef CONFIG_ARM - set_irq_flags(irq, IRQF_VALID); -#else - irq_set_noprobe(irq); -#endif - return 0; -} - -static struct irq_domain_ops max77686_irq_domain_ops = { - .map = max77686_irq_domain_map, -}; - -int max77686_irq_init(struct max77686_dev *max77686) -{ - struct irq_domain *domain; - int i; - int ret; - int val; - struct regmap *map; - - mutex_init(&max77686->irqlock); - - if (max77686->irq_gpio && !max77686->irq) { - max77686->irq = gpio_to_irq(max77686->irq_gpio); - - if (debug_mask & MAX77686_DEBUG_IRQ_INT) { - ret = gpio_request(max77686->irq_gpio, "pmic_irq"); - if (ret < 0) { - dev_err(max77686->dev, - "Failed to request gpio %d with ret:" - "%d\n", max77686->irq_gpio, ret); - return IRQ_NONE; - } - - gpio_direction_input(max77686->irq_gpio); - val = gpio_get_value(max77686->irq_gpio); - gpio_free(max77686->irq_gpio); - pr_info("%s: gpio_irq=%x\n", __func__, val); - } - } - - if (!max77686->irq) { - dev_err(max77686->dev, "irq is not specified\n"); - return -ENODEV; - } - - /* Mask individual interrupt sources */ - for (i = 0; i < MAX77686_IRQ_GROUP_NR; i++) { - max77686->irq_masks_cur[i] = 0xff; - max77686->irq_masks_cache[i] = 0xff; - map = max77686_get_regmap(max77686, i); - - if (IS_ERR_OR_NULL(map)) - continue; - if (max77686_mask_reg[i] == MAX77686_REG_INVALID) - continue; - - regmap_write(map, max77686_mask_reg[i], 0xff); - } - domain = irq_domain_add_linear(NULL, MAX77686_IRQ_NR, - &max77686_irq_domain_ops, max77686); - if (!domain) { - dev_err(max77686->dev, "could not create irq domain\n"); - return -ENODEV; - } - max77686->irq_domain = domain; - - ret = request_threaded_irq(max77686->irq, NULL, max77686_irq_thread, - IRQF_TRIGGER_FALLING | IRQF_ONESHOT, - "max77686-irq", max77686); - - if (ret) - dev_err(max77686->dev, "Failed to request IRQ %d: %d\n", - max77686->irq, ret); - - - if (debug_mask & MAX77686_DEBUG_IRQ_INFO) - pr_info("%s-\n", __func__); - - return 0; -} - -void max77686_irq_exit(struct max77686_dev *max77686) -{ - if (max77686->irq) - free_irq(max77686->irq, max77686); -} diff --git a/drivers/mfd/max77686.c b/drivers/mfd/max77686.c index ce869acf27ae..3cb41d02cd3d 100644 --- a/drivers/mfd/max77686.c +++ b/drivers/mfd/max77686.c @@ -25,6 +25,8 @@ #include #include #include +#include +#include #include #include #include @@ -46,6 +48,54 @@ static struct regmap_config max77686_regmap_config = { .val_bits = 8, }; +static struct regmap_config max77686_rtc_regmap_config = { + .reg_bits = 8, + .val_bits = 8, +}; + +static const struct regmap_irq max77686_irqs[] = { + /* INT1 interrupts */ + { .reg_offset = 0, .mask = MAX77686_INT1_PWRONF_MSK, }, + { .reg_offset = 0, .mask = MAX77686_INT1_PWRONR_MSK, }, + { .reg_offset = 0, .mask = MAX77686_INT1_JIGONBF_MSK, }, + { .reg_offset = 0, .mask = MAX77686_INT1_JIGONBR_MSK, }, + { .reg_offset = 0, .mask = MAX77686_INT1_ACOKBF_MSK, }, + { .reg_offset = 0, .mask = MAX77686_INT1_ACOKBR_MSK, }, + { .reg_offset = 0, .mask = MAX77686_INT1_ONKEY1S_MSK, }, + { .reg_offset = 0, .mask = MAX77686_INT1_MRSTB_MSK, }, + /* INT2 interrupts */ + { .reg_offset = 1, .mask = MAX77686_INT2_140C_MSK, }, + { .reg_offset = 1, .mask = MAX77686_INT2_120C_MSK, }, +}; + +static const struct regmap_irq_chip max77686_irq_chip = { + .name = "max77686-pmic", + .status_base = MAX77686_REG_INT1, + .mask_base = MAX77686_REG_INT1MSK, + .num_regs = 2, + .irqs = max77686_irqs, + .num_irqs = ARRAY_SIZE(max77686_irqs), +}; + +static const struct regmap_irq max77686_rtc_irqs[] = { + /* RTC interrupts */ + { .reg_offset = 0, .mask = MAX77686_RTCINT_RTC60S_MSK, }, + { .reg_offset = 0, .mask = MAX77686_RTCINT_RTCA1_MSK, }, + { .reg_offset = 0, .mask = MAX77686_RTCINT_RTCA2_MSK, }, + { .reg_offset = 0, .mask = MAX77686_RTCINT_SMPL_MSK, }, + { .reg_offset = 0, .mask = MAX77686_RTCINT_RTC1S_MSK, }, + { .reg_offset = 0, .mask = MAX77686_RTCINT_WTSR_MSK, }, +}; + +static const struct regmap_irq_chip max77686_rtc_irq_chip = { + .name = "max77686-rtc", + .status_base = MAX77686_RTC_INT, + .mask_base = MAX77686_RTC_INTM, + .num_regs = 1, + .irqs = max77686_rtc_irqs, + .num_irqs = ARRAY_SIZE(max77686_rtc_irqs), +}; + #ifdef CONFIG_OF static const struct of_device_id max77686_pmic_dt_match[] = { {.compatible = "maxim,max77686", .data = NULL}, @@ -101,7 +151,6 @@ static int max77686_i2c_probe(struct i2c_client *i2c, max77686->type = id->driver_data; max77686->wakeup = pdata->wakeup; - max77686->irq_gpio = pdata->irq_gpio; max77686->irq = i2c->irq; max77686->regmap = devm_regmap_init_i2c(i2c, &max77686_regmap_config); @@ -117,8 +166,7 @@ static int max77686_i2c_probe(struct i2c_client *i2c, dev_err(max77686->dev, "device not found on this channel (this is not an error)\n"); return -ENODEV; - } else - dev_info(max77686->dev, "device found\n"); + } max77686->rtc = i2c_new_dummy(i2c->adapter, I2C_ADDR_RTC); if (!max77686->rtc) { @@ -127,15 +175,48 @@ static int max77686_i2c_probe(struct i2c_client *i2c, } i2c_set_clientdata(max77686->rtc, max77686); - max77686_irq_init(max77686); + max77686->rtc_regmap = devm_regmap_init_i2c(max77686->rtc, + &max77686_rtc_regmap_config); + if (IS_ERR(max77686->rtc_regmap)) { + ret = PTR_ERR(max77686->rtc_regmap); + dev_err(max77686->dev, "failed to allocate RTC regmap: %d\n", + ret); + goto err_unregister_i2c; + } + + ret = regmap_add_irq_chip(max77686->regmap, max77686->irq, + IRQF_TRIGGER_FALLING | IRQF_ONESHOT | + IRQF_SHARED, 0, &max77686_irq_chip, + &max77686->irq_data); + if (ret != 0) { + dev_err(&i2c->dev, "failed to add PMIC irq chip: %d\n", ret); + goto err_unregister_i2c; + } + ret = regmap_add_irq_chip(max77686->rtc_regmap, max77686->irq, + IRQF_TRIGGER_FALLING | IRQF_ONESHOT | + IRQF_SHARED, 0, &max77686_rtc_irq_chip, + &max77686->rtc_irq_data); + if (ret != 0) { + dev_err(&i2c->dev, "failed to add RTC irq chip: %d\n", ret); + goto err_del_irqc; + } ret = mfd_add_devices(max77686->dev, -1, max77686_devs, ARRAY_SIZE(max77686_devs), NULL, 0, NULL); if (ret < 0) { - mfd_remove_devices(max77686->dev); - i2c_unregister_device(max77686->rtc); + dev_err(&i2c->dev, "failed to add MFD devices: %d\n", ret); + goto err_del_rtc_irqc; } + return 0; + +err_del_rtc_irqc: + regmap_del_irq_chip(max77686->irq, max77686->rtc_irq_data); +err_del_irqc: + regmap_del_irq_chip(max77686->irq, max77686->irq_data); +err_unregister_i2c: + i2c_unregister_device(max77686->rtc); + return ret; } @@ -144,6 +225,10 @@ static int max77686_i2c_remove(struct i2c_client *i2c) struct max77686_dev *max77686 = i2c_get_clientdata(i2c); mfd_remove_devices(max77686->dev); + + regmap_del_irq_chip(max77686->irq, max77686->rtc_irq_data); + regmap_del_irq_chip(max77686->irq, max77686->irq_data); + i2c_unregister_device(max77686->rtc); return 0; diff --git a/drivers/rtc/rtc-max77686.c b/drivers/rtc/rtc-max77686.c index 9efe118a28ba..d20a7f0786eb 100644 --- a/drivers/rtc/rtc-max77686.c +++ b/drivers/rtc/rtc-max77686.c @@ -492,16 +492,11 @@ static int max77686_rtc_init_reg(struct max77686_rtc_info *info) return ret; } -static struct regmap_config max77686_rtc_regmap_config = { - .reg_bits = 8, - .val_bits = 8, -}; - static int max77686_rtc_probe(struct platform_device *pdev) { struct max77686_dev *max77686 = dev_get_drvdata(pdev->dev.parent); struct max77686_rtc_info *info; - int ret, virq; + int ret; dev_info(&pdev->dev, "%s\n", __func__); @@ -514,14 +509,7 @@ static int max77686_rtc_probe(struct platform_device *pdev) info->dev = &pdev->dev; info->max77686 = max77686; info->rtc = max77686->rtc; - info->max77686->rtc_regmap = devm_regmap_init_i2c(info->max77686->rtc, - &max77686_rtc_regmap_config); - if (IS_ERR(info->max77686->rtc_regmap)) { - ret = PTR_ERR(info->max77686->rtc_regmap); - dev_err(info->max77686->dev, "Failed to allocate register map: %d\n", - ret); - return ret; - } + platform_set_drvdata(pdev, info); ret = max77686_rtc_init_reg(info); @@ -550,15 +538,16 @@ static int max77686_rtc_probe(struct platform_device *pdev) ret = -EINVAL; goto err_rtc; } - virq = irq_create_mapping(max77686->irq_domain, MAX77686_RTCIRQ_RTCA1); - if (!virq) { + + info->virq = regmap_irq_get_virq(max77686->rtc_irq_data, + MAX77686_RTCIRQ_RTCA1); + if (!info->virq) { ret = -ENXIO; goto err_rtc; } - info->virq = virq; - ret = devm_request_threaded_irq(&pdev->dev, virq, NULL, - max77686_rtc_alarm_irq, 0, "rtc-alarm0", info); + ret = devm_request_threaded_irq(&pdev->dev, info->virq, NULL, + max77686_rtc_alarm_irq, 0, "rtc-alarm1", info); if (ret < 0) dev_err(&pdev->dev, "Failed to request alarm IRQ: %d: %d\n", info->virq, ret); diff --git a/include/linux/mfd/max77686-private.h b/include/linux/mfd/max77686-private.h index 8c75a9c8dfab..8e177806cba1 100644 --- a/include/linux/mfd/max77686-private.h +++ b/include/linux/mfd/max77686-private.h @@ -181,9 +181,6 @@ enum max77686_rtc_reg { MAX77686_ALARM2_DATE = 0x1B, }; -#define MAX77686_IRQSRC_PMIC (0) -#define MAX77686_IRQSRC_RTC (1 << 0) - enum max77686_irq_source { PMIC_INT1 = 0, PMIC_INT2, @@ -205,16 +202,33 @@ enum max77686_irq { MAX77686_PMICIRQ_140C, MAX77686_PMICIRQ_120C, - MAX77686_RTCIRQ_RTC60S, + MAX77686_RTCIRQ_RTC60S = 0, MAX77686_RTCIRQ_RTCA1, MAX77686_RTCIRQ_RTCA2, MAX77686_RTCIRQ_SMPL, MAX77686_RTCIRQ_RTC1S, MAX77686_RTCIRQ_WTSR, - - MAX77686_IRQ_NR, }; +#define MAX77686_INT1_PWRONF_MSK BIT(0) +#define MAX77686_INT1_PWRONR_MSK BIT(1) +#define MAX77686_INT1_JIGONBF_MSK BIT(2) +#define MAX77686_INT1_JIGONBR_MSK BIT(3) +#define MAX77686_INT1_ACOKBF_MSK BIT(4) +#define MAX77686_INT1_ACOKBR_MSK BIT(5) +#define MAX77686_INT1_ONKEY1S_MSK BIT(6) +#define MAX77686_INT1_MRSTB_MSK BIT(7) + +#define MAX77686_INT2_140C_MSK BIT(0) +#define MAX77686_INT2_120C_MSK BIT(1) + +#define MAX77686_RTCINT_RTC60S_MSK BIT(0) +#define MAX77686_RTCINT_RTCA1_MSK BIT(1) +#define MAX77686_RTCINT_RTCA2_MSK BIT(2) +#define MAX77686_RTCINT_SMPL_MSK BIT(3) +#define MAX77686_RTCINT_RTC1S_MSK BIT(4) +#define MAX77686_RTCINT_WTSR_MSK BIT(5) + struct max77686_dev { struct device *dev; struct i2c_client *i2c; /* 0xcc / PMIC, Battery Control, and FLASH */ @@ -224,11 +238,10 @@ struct max77686_dev { struct regmap *regmap; /* regmap for mfd */ struct regmap *rtc_regmap; /* regmap for rtc */ - - struct irq_domain *irq_domain; + struct regmap_irq_chip_data *irq_data; + struct regmap_irq_chip_data *rtc_irq_data; int irq; - int irq_gpio; bool wakeup; struct mutex irqlock; int irq_masks_cur[MAX77686_IRQ_GROUP_NR]; diff --git a/include/linux/mfd/max77686.h b/include/linux/mfd/max77686.h index 46c0f320ed76..4cbcc13e8a2a 100644 --- a/include/linux/mfd/max77686.h +++ b/include/linux/mfd/max77686.h @@ -89,8 +89,6 @@ struct max77686_opmode_data { }; struct max77686_platform_data { - /* IRQ */ - int irq_gpio; int ono; int wakeup; -- cgit v1.2.3 From c708a98f01068fe07f77448031f9f5317423e777 Mon Sep 17 00:00:00 2001 From: Javi Merino Date: Wed, 25 Jun 2014 11:00:12 +0100 Subject: thermal: document struct thermal_zone_device and thermal_governor Document struct thermal_zone_device and struct thermal_governor fields and their use by the thermal framework code. Cc: Zhang Rui Cc: Eduardo Valentin Signed-off-by: Javi Merino Signed-off-by: Zhang Rui --- include/linux/thermal.h | 46 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 44 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index f7e11c7ea7d9..0305cde21a74 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -158,6 +158,42 @@ struct thermal_attr { char name[THERMAL_NAME_LENGTH]; }; +/** + * struct thermal_zone_device - structure for a thermal zone + * @id: unique id number for each thermal zone + * @type: the thermal zone device type + * @device: &struct device for this thermal zone + * @trip_temp_attrs: attributes for trip points for sysfs: trip temperature + * @trip_type_attrs: attributes for trip points for sysfs: trip type + * @trip_hyst_attrs: attributes for trip points for sysfs: trip hysteresis + * @devdata: private pointer for device private data + * @trips: number of trip points the thermal zone supports + * @passive_delay: number of milliseconds to wait between polls when + * performing passive cooling. Currenty only used by the + * step-wise governor + * @polling_delay: number of milliseconds to wait between polls when + * checking whether trip points have been crossed (0 for + * interrupt driven systems) + * @temperature: current temperature. This is only for core code, + * drivers should use thermal_zone_get_temp() to get the + * current temperature + * @last_temperature: previous temperature read + * @emul_temperature: emulated temperature when using CONFIG_THERMAL_EMULATION + * @passive: 1 if you've crossed a passive trip point, 0 otherwise. + * Currenty only used by the step-wise governor. + * @forced_passive: If > 0, temperature at which to switch on all ACPI + * processor cooling devices. Currently only used by the + * step-wise governor. + * @ops: operations this &thermal_zone_device supports + * @tzp: thermal zone parameters + * @governor: pointer to the governor for this thermal zone + * @thermal_instances: list of &struct thermal_instance of this thermal zone + * @idr: &struct idr to generate unique id for this zone's cooling + * devices + * @lock: lock to protect thermal_instances list + * @node: node in thermal_tz_list (in thermal_core.c) + * @poll_queue: delayed work for polling + */ struct thermal_zone_device { int id; char type[THERMAL_NAME_LENGTH]; @@ -179,12 +215,18 @@ struct thermal_zone_device { struct thermal_governor *governor; struct list_head thermal_instances; struct idr idr; - struct mutex lock; /* protect thermal_instances list */ + struct mutex lock; struct list_head node; struct delayed_work poll_queue; }; -/* Structure that holds thermal governor information */ +/** + * struct thermal_governor - structure that holds thermal governor information + * @name: name of the governor + * @throttle: callback called for every trip point even if temperature is + * below the trip point temperature + * @governor_list: node in thermal_governor_list (in thermal_core.c) + */ struct thermal_governor { char name[THERMAL_NAME_LENGTH]; int (*throttle)(struct thermal_zone_device *tz, int trip); -- cgit v1.2.3 From bb5fd0b6daaf0da0b1e78c699b8582984373d3f4 Mon Sep 17 00:00:00 2001 From: Boris BREZILLON Date: Fri, 11 Jul 2014 09:49:41 +0200 Subject: mtd: nand: define struct nand_timings Define a struct containing the standard NAND timings as described in NAND datasheets. Signed-off-by: Boris BREZILLON Signed-off-by: Brian Norris --- include/linux/mtd/nand.h | 49 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 1cff329ae13d..cdda207c16e1 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -948,4 +948,53 @@ static inline int jedec_feature(struct nand_chip *chip) return chip->jedec_version ? le16_to_cpu(chip->jedec_params.features) : 0; } + +/** + * struct nand_sdr_timings - SDR NAND chip timings + * + * This struct defines the timing requirements of a SDR NAND chip. + * These informations can be found in every NAND datasheets and the timings + * meaning are described in the ONFI specifications: + * www.onfi.org/~/media/ONFI/specs/onfi_3_1_spec.pdf (chapter 4.15 Timing + * Parameters) + * + * All these timings are expressed in picoseconds. + */ + +struct nand_sdr_timings { + u32 tALH_min; + u32 tADL_min; + u32 tALS_min; + u32 tAR_min; + u32 tCEA_max; + u32 tCEH_min; + u32 tCH_min; + u32 tCHZ_max; + u32 tCLH_min; + u32 tCLR_min; + u32 tCLS_min; + u32 tCOH_min; + u32 tCS_min; + u32 tDH_min; + u32 tDS_min; + u32 tFEAT_max; + u32 tIR_min; + u32 tITC_max; + u32 tRC_min; + u32 tREA_max; + u32 tREH_min; + u32 tRHOH_min; + u32 tRHW_min; + u32 tRHZ_max; + u32 tRLOH_min; + u32 tRP_min; + u32 tRR_min; + u64 tRST_max; + u32 tWB_max; + u32 tWC_min; + u32 tWH_min; + u32 tWHR_min; + u32 tWP_min; + u32 tWW_min; +}; #endif /* __LINUX_MTD_NAND_H */ -- cgit v1.2.3 From 974647ea8a13021a91d558df61d598bcabf73439 Mon Sep 17 00:00:00 2001 From: Boris BREZILLON Date: Fri, 11 Jul 2014 09:49:42 +0200 Subject: mtd: nand: add ONFI timing mode to nand_timings converter Add a converter to retrieve NAND timings from an ONFI NAND timing mode. At the moment, only SDR NAND timings are supported. Signed-off-by: Boris BREZILLON Signed-off-by: Brian Norris --- drivers/mtd/nand/Makefile | 2 +- drivers/mtd/nand/nand_timings.c | 253 ++++++++++++++++++++++++++++++++++++++++ include/linux/mtd/nand.h | 3 + 3 files changed, 257 insertions(+), 1 deletion(-) create mode 100644 drivers/mtd/nand/nand_timings.c (limited to 'include/linux') diff --git a/drivers/mtd/nand/Makefile b/drivers/mtd/nand/Makefile index 542b5689eb63..a035e7cc6d46 100644 --- a/drivers/mtd/nand/Makefile +++ b/drivers/mtd/nand/Makefile @@ -50,4 +50,4 @@ obj-$(CONFIG_MTD_NAND_GPMI_NAND) += gpmi-nand/ obj-$(CONFIG_MTD_NAND_XWAY) += xway_nand.o obj-$(CONFIG_MTD_NAND_BCM47XXNFLASH) += bcm47xxnflash/ -nand-objs := nand_base.o nand_bbt.o +nand-objs := nand_base.o nand_bbt.o nand_timings.o diff --git a/drivers/mtd/nand/nand_timings.c b/drivers/mtd/nand/nand_timings.c new file mode 100644 index 000000000000..8b36253420fa --- /dev/null +++ b/drivers/mtd/nand/nand_timings.c @@ -0,0 +1,253 @@ +/* + * Copyright (C) 2014 Free Electrons + * + * Author: Boris BREZILLON + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#include +#include +#include +#include + +static const struct nand_sdr_timings onfi_sdr_timings[] = { + /* Mode 0 */ + { + .tADL_min = 200000, + .tALH_min = 20000, + .tALS_min = 50000, + .tAR_min = 25000, + .tCEA_max = 100000, + .tCEH_min = 20000, + .tCH_min = 20000, + .tCHZ_max = 100000, + .tCLH_min = 20000, + .tCLR_min = 20000, + .tCLS_min = 50000, + .tCOH_min = 0, + .tCS_min = 70000, + .tDH_min = 20000, + .tDS_min = 40000, + .tFEAT_max = 1000000, + .tIR_min = 10000, + .tITC_max = 1000000, + .tRC_min = 100000, + .tREA_max = 40000, + .tREH_min = 30000, + .tRHOH_min = 0, + .tRHW_min = 200000, + .tRHZ_max = 200000, + .tRLOH_min = 0, + .tRP_min = 50000, + .tRST_max = 250000000000, + .tWB_max = 200000, + .tRR_min = 40000, + .tWC_min = 100000, + .tWH_min = 30000, + .tWHR_min = 120000, + .tWP_min = 50000, + .tWW_min = 100000, + }, + /* Mode 1 */ + { + .tADL_min = 100000, + .tALH_min = 10000, + .tALS_min = 25000, + .tAR_min = 10000, + .tCEA_max = 45000, + .tCEH_min = 20000, + .tCH_min = 10000, + .tCHZ_max = 50000, + .tCLH_min = 10000, + .tCLR_min = 10000, + .tCLS_min = 25000, + .tCOH_min = 15000, + .tCS_min = 35000, + .tDH_min = 10000, + .tDS_min = 20000, + .tFEAT_max = 1000000, + .tIR_min = 0, + .tITC_max = 1000000, + .tRC_min = 50000, + .tREA_max = 30000, + .tREH_min = 15000, + .tRHOH_min = 15000, + .tRHW_min = 100000, + .tRHZ_max = 100000, + .tRLOH_min = 0, + .tRP_min = 25000, + .tRR_min = 20000, + .tRST_max = 500000000, + .tWB_max = 100000, + .tWC_min = 45000, + .tWH_min = 15000, + .tWHR_min = 80000, + .tWP_min = 25000, + .tWW_min = 100000, + }, + /* Mode 2 */ + { + .tADL_min = 100000, + .tALH_min = 10000, + .tALS_min = 15000, + .tAR_min = 10000, + .tCEA_max = 30000, + .tCEH_min = 20000, + .tCH_min = 10000, + .tCHZ_max = 50000, + .tCLH_min = 10000, + .tCLR_min = 10000, + .tCLS_min = 15000, + .tCOH_min = 15000, + .tCS_min = 25000, + .tDH_min = 5000, + .tDS_min = 15000, + .tFEAT_max = 1000000, + .tIR_min = 0, + .tITC_max = 1000000, + .tRC_min = 35000, + .tREA_max = 25000, + .tREH_min = 15000, + .tRHOH_min = 15000, + .tRHW_min = 100000, + .tRHZ_max = 100000, + .tRLOH_min = 0, + .tRR_min = 20000, + .tRST_max = 500000000, + .tWB_max = 100000, + .tRP_min = 17000, + .tWC_min = 35000, + .tWH_min = 15000, + .tWHR_min = 80000, + .tWP_min = 17000, + .tWW_min = 100000, + }, + /* Mode 3 */ + { + .tADL_min = 100000, + .tALH_min = 5000, + .tALS_min = 10000, + .tAR_min = 10000, + .tCEA_max = 25000, + .tCEH_min = 20000, + .tCH_min = 5000, + .tCHZ_max = 50000, + .tCLH_min = 5000, + .tCLR_min = 10000, + .tCLS_min = 10000, + .tCOH_min = 15000, + .tCS_min = 25000, + .tDH_min = 5000, + .tDS_min = 10000, + .tFEAT_max = 1000000, + .tIR_min = 0, + .tITC_max = 1000000, + .tRC_min = 30000, + .tREA_max = 20000, + .tREH_min = 10000, + .tRHOH_min = 15000, + .tRHW_min = 100000, + .tRHZ_max = 100000, + .tRLOH_min = 0, + .tRP_min = 15000, + .tRR_min = 20000, + .tRST_max = 500000000, + .tWB_max = 100000, + .tWC_min = 30000, + .tWH_min = 10000, + .tWHR_min = 80000, + .tWP_min = 15000, + .tWW_min = 100000, + }, + /* Mode 4 */ + { + .tADL_min = 70000, + .tALH_min = 5000, + .tALS_min = 10000, + .tAR_min = 10000, + .tCEA_max = 25000, + .tCEH_min = 20000, + .tCH_min = 5000, + .tCHZ_max = 30000, + .tCLH_min = 5000, + .tCLR_min = 10000, + .tCLS_min = 10000, + .tCOH_min = 15000, + .tCS_min = 20000, + .tDH_min = 5000, + .tDS_min = 10000, + .tFEAT_max = 1000000, + .tIR_min = 0, + .tITC_max = 1000000, + .tRC_min = 25000, + .tREA_max = 20000, + .tREH_min = 10000, + .tRHOH_min = 15000, + .tRHW_min = 100000, + .tRHZ_max = 100000, + .tRLOH_min = 5000, + .tRP_min = 12000, + .tRR_min = 20000, + .tRST_max = 500000000, + .tWB_max = 100000, + .tWC_min = 25000, + .tWH_min = 10000, + .tWHR_min = 80000, + .tWP_min = 12000, + .tWW_min = 100000, + }, + /* Mode 5 */ + { + .tADL_min = 70000, + .tALH_min = 5000, + .tALS_min = 10000, + .tAR_min = 10000, + .tCEA_max = 25000, + .tCEH_min = 20000, + .tCH_min = 5000, + .tCHZ_max = 30000, + .tCLH_min = 5000, + .tCLR_min = 10000, + .tCLS_min = 10000, + .tCOH_min = 15000, + .tCS_min = 15000, + .tDH_min = 5000, + .tDS_min = 7000, + .tFEAT_max = 1000000, + .tIR_min = 0, + .tITC_max = 1000000, + .tRC_min = 20000, + .tREA_max = 16000, + .tREH_min = 7000, + .tRHOH_min = 15000, + .tRHW_min = 100000, + .tRHZ_max = 100000, + .tRLOH_min = 5000, + .tRP_min = 10000, + .tRR_min = 20000, + .tRST_max = 500000000, + .tWB_max = 100000, + .tWC_min = 20000, + .tWH_min = 7000, + .tWHR_min = 80000, + .tWP_min = 10000, + .tWW_min = 100000, + }, +}; + +/** + * onfi_async_timing_mode_to_sdr_timings - [NAND Interface] Retrieve NAND + * timings according to the given ONFI timing mode + * @mode: ONFI timing mode + */ +const struct nand_sdr_timings *onfi_async_timing_mode_to_sdr_timings(int mode) +{ + if (mode < 0 || mode >= ARRAY_SIZE(onfi_sdr_timings)) + return ERR_PTR(-EINVAL); + + return &onfi_sdr_timings[mode]; +} +EXPORT_SYMBOL(onfi_async_timing_mode_to_sdr_timings); diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index cdda207c16e1..3083c53e0270 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -997,4 +997,7 @@ struct nand_sdr_timings { u32 tWP_min; u32 tWW_min; }; + +/* get timing characteristics from ONFI timing mode. */ +const struct nand_sdr_timings *onfi_async_timing_mode_to_sdr_timings(int mode); #endif /* __LINUX_MTD_NAND_H */ -- cgit v1.2.3 From 76be4a54157ab0059fb29d8d516db46d239812e2 Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Thu, 12 Jun 2014 17:15:22 +0530 Subject: ARM: OMAP2+: DMA: remove requirement of irq for platform-dma driver we have currently 2 DMA drivers that try to co-exist. drivers/dma/omap-dma.c which registers it's own IRQ and is device tree aware and uses arch/arm/plat-omap/dma.c instance created by arch/arm/mach-omap2/dma.c to maintain channel usage (omap_request_dma). Currently both try to register interrupts and mach-omap2/plat-omap dma.c attempts to use the IRQ number registered by hwmod to register it's own interrupt handler. Now, there is no reasonable way of static allocating DMA irq in GIC SPI when we use crossbar. However, since the dma_chan structure is freed as a result of IRQ not being present due to devm allocation, maintaining information of channel by platform code fails at a later point in time when that region of memory is reused. So, if hwmod does not indicate an IRQ number, then, assume that dma-engine will take care of the interrupt handling. Signed-off-by: Nishanth Menon Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/dma.c | 3 +++ arch/arm/plat-omap/dma.c | 5 +++-- include/linux/omap-dma.h | 1 + 3 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-omap2/dma.c b/arch/arm/mach-omap2/dma.c index a6d2cf1f8d02..e1a56d87599e 100644 --- a/arch/arm/mach-omap2/dma.c +++ b/arch/arm/mach-omap2/dma.c @@ -259,6 +259,9 @@ static int __init omap2_system_dma_init_dev(struct omap_hwmod *oh, void *unused) if (cpu_is_omap34xx() && (omap_type() != OMAP2_DEVICE_TYPE_GP)) d->dev_caps |= HS_CHANNELS_RESERVED; + if (platform_get_irq_byname(pdev, "0") < 0) + d->dev_caps |= DMA_ENGINE_HANDLE_IRQ; + /* Check the capabilities register for descriptor loading feature */ if (dma_read(CAPS_0, 0) & DMA_HAS_DESCRIPTOR_CAPS) dma_common_ch_end = CCDN; diff --git a/arch/arm/plat-omap/dma.c b/arch/arm/plat-omap/dma.c index b5608b1f9fbd..7aae0e5b188c 100644 --- a/arch/arm/plat-omap/dma.c +++ b/arch/arm/plat-omap/dma.c @@ -2100,7 +2100,7 @@ static int omap_system_dma_probe(struct platform_device *pdev) omap_dma_set_global_params(DMA_DEFAULT_ARB_RATE, DMA_DEFAULT_FIFO_DEPTH, 0); - if (dma_omap2plus()) { + if (dma_omap2plus() && !(d->dev_caps & DMA_ENGINE_HANDLE_IRQ)) { strcpy(irq_name, "0"); dma_irq = platform_get_irq_byname(pdev, irq_name); if (dma_irq < 0) { @@ -2145,7 +2145,8 @@ static int omap_system_dma_remove(struct platform_device *pdev) char irq_name[4]; strcpy(irq_name, "0"); dma_irq = platform_get_irq_byname(pdev, irq_name); - remove_irq(dma_irq, &omap24xx_dma_irq); + if (dma_irq >= 0) + remove_irq(dma_irq, &omap24xx_dma_irq); } else { int irq_rel = 0; for ( ; irq_rel < dma_chan_count; irq_rel++) { diff --git a/include/linux/omap-dma.h b/include/linux/omap-dma.h index 88e6ea4a5d36..6f06f8bc612c 100644 --- a/include/linux/omap-dma.h +++ b/include/linux/omap-dma.h @@ -130,6 +130,7 @@ #define IS_WORD_16 BIT(0xd) #define ENABLE_16XX_MODE BIT(0xe) #define HS_CHANNELS_RESERVED BIT(0xf) +#define DMA_ENGINE_HANDLE_IRQ BIT(0x10) /* Defines for DMA Capabilities */ #define DMA_HAS_TRANSPARENT_CAPS (0x1 << 18) -- cgit v1.2.3 From 14c8a620ba436511b1347c592633befa49535176 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 23 Jul 2014 10:47:49 +0200 Subject: gpio: drop retval check enforcing from gpiochip_remove() As we start to decomission the return value from gpiochip_remove() the compilers emit warnings due to the function being tagged __must_check. So drop this until we remove the return value altogether. Cc: Abdoulaye Berthe Suggested-by: Stephen Rothwell Signed-off-by: Linus Walleij --- include/linux/gpio/driver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 573e4f3243d0..ca3024554a2d 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -141,7 +141,7 @@ extern const char *gpiochip_is_requested(struct gpio_chip *chip, /* add/remove chips */ extern int gpiochip_add(struct gpio_chip *chip); -extern int __must_check gpiochip_remove(struct gpio_chip *chip); +extern int gpiochip_remove(struct gpio_chip *chip); extern struct gpio_chip *gpiochip_find(void *data, int (*match)(struct gpio_chip *chip, void *data)); -- cgit v1.2.3 From ec4c4d877becf1c224f45347f4fc0016765e00d0 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 15 Jul 2014 09:58:06 +0200 Subject: video: fix up versatile CLCD helper move commit 11c32d7b6274cb0f ("video: move Versatile CLCD helpers") moved files out of the plat-versatile directory but in the process got a few of the dependencies wrong: - If CONFIG_FB is not set, the file no longer gets built, resulting in a link error - If CONFIG_FB or CONFIG_FB_ARMCLCD are disabled, we also get a Kconfig warning for incorrect dependencies due to the symbol being 'select'ed from the platform Kconfig. - When the file is not built, we also get a link error for missing symbols. This patch should fix all three, by removing the 'select' statements, changing the Kconfig description of the symbol to be enabled in exactly the right configurations, and adding inline stub functions for the case when the framebuffer driver is disabled. Signed-off-by: Arnd Bergmann Reviewed-by: Linus Walleij --- arch/arm/Kconfig | 2 -- arch/arm/mach-vexpress/Kconfig | 1 - drivers/video/fbdev/Kconfig | 6 ++---- include/linux/platform_data/video-clcd-versatile.h | 18 ++++++++++++++++++ 4 files changed, 20 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index c32064de77d8..11f18a04c066 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -332,7 +332,6 @@ config ARCH_REALVIEW select ICST select NEED_MACH_MEMORY_H select PLAT_VERSATILE - select PLAT_VERSATILE_CLCD help This enables support for ARM Ltd RealView boards. @@ -347,7 +346,6 @@ config ARCH_VERSATILE select HAVE_MACH_CLKDEV select ICST select PLAT_VERSATILE - select PLAT_VERSATILE_CLCD select PLAT_VERSATILE_CLOCK select VERSATILE_FPGA_IRQ help diff --git a/arch/arm/mach-vexpress/Kconfig b/arch/arm/mach-vexpress/Kconfig index d8b9330f896a..e9166dfc4756 100644 --- a/arch/arm/mach-vexpress/Kconfig +++ b/arch/arm/mach-vexpress/Kconfig @@ -13,7 +13,6 @@ menuconfig ARCH_VEXPRESS select ICST select NO_IOPORT_MAP select PLAT_VERSATILE - select PLAT_VERSATILE_CLCD select POWER_RESET select POWER_RESET_VEXPRESS select POWER_SUPPLY diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig index 92026d31bb48..bdf463072247 100644 --- a/drivers/video/fbdev/Kconfig +++ b/drivers/video/fbdev/Kconfig @@ -292,10 +292,8 @@ config FB_ARMCLCD # Helper logic selected only by the ARM Versatile platform family. config PLAT_VERSATILE_CLCD - depends on FB_ARMCLCD - depends on (PLAT_VERSATILE || ARCH_INTEGRATOR) - default y - bool + def_bool ARCH_VERSATILE || ARCH_REALVIEW || ARCH_VEXPRESS + depends on FB_ARMCLCD && FB=y config FB_ACORN bool "Acorn VIDC support" diff --git a/include/linux/platform_data/video-clcd-versatile.h b/include/linux/platform_data/video-clcd-versatile.h index 6bb6a1d2019b..09ccf182af4d 100644 --- a/include/linux/platform_data/video-clcd-versatile.h +++ b/include/linux/platform_data/video-clcd-versatile.h @@ -1,9 +1,27 @@ #ifndef PLAT_CLCD_H #define PLAT_CLCD_H +#ifdef CONFIG_PLAT_VERSATILE_CLCD struct clcd_panel *versatile_clcd_get_panel(const char *); int versatile_clcd_setup_dma(struct clcd_fb *, unsigned long); int versatile_clcd_mmap_dma(struct clcd_fb *, struct vm_area_struct *); void versatile_clcd_remove_dma(struct clcd_fb *); +#else +static inline struct clcd_panel *versatile_clcd_get_panel(const char *s) +{ + return NULL; +} +static inline int versatile_clcd_setup_dma(struct clcd_fb *fb, unsigned long framesize) +{ + return -ENODEV; +} +static inline int versatile_clcd_mmap_dma(struct clcd_fb *fb, struct vm_area_struct *vm) +{ + return -ENODEV; +} +static inline void versatile_clcd_remove_dma(struct clcd_fb *fb) +{ +} +#endif #endif -- cgit v1.2.3 From 1bd6b601fe196b6fbce2c93536ce0f3f53577cec Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Tue, 22 Jul 2014 16:17:41 +0900 Subject: gpio: make gpiochip_get_desc() gpiolib-private As GPIO descriptors are not going to remain unique anymore, having this function public is not safe. Restrain its use to gpiolib since we have no user outside of it. Signed-off-by: Alexandre Courbot Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib-of.c | 2 +- drivers/gpio/gpiolib.c | 1 - drivers/gpio/gpiolib.h | 2 ++ include/linux/gpio/driver.h | 3 --- 4 files changed, 3 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index 3e2fae205bee..7cfdc2278905 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -23,7 +23,7 @@ #include #include -struct gpio_desc; +#include "gpiolib.h" /* Private data structure for of_gpiochip_find_and_xlate */ struct gg_data { diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index c5509359ba88..38d176e31379 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -82,7 +82,6 @@ struct gpio_desc *gpiochip_get_desc(struct gpio_chip *chip, return &chip->desc[hwnum]; } -EXPORT_SYMBOL_GPL(gpiochip_get_desc); /** * Convert a GPIO descriptor to the integer namespace. diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h index 98020c393eb3..acbb9335f08c 100644 --- a/drivers/gpio/gpiolib.h +++ b/drivers/gpio/gpiolib.h @@ -51,6 +51,8 @@ void gpiochip_free_own_desc(struct gpio_desc *desc); struct gpio_desc *of_get_named_gpiod_flags(struct device_node *np, const char *list_name, int index, enum of_gpio_flags *flags); +struct gpio_desc *gpiochip_get_desc(struct gpio_chip *chip, u16 hwnum); + extern struct spinlock gpio_lock; extern struct list_head gpio_chips; diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index ca3024554a2d..88f92dfae545 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -151,9 +151,6 @@ void gpiod_unlock_as_irq(struct gpio_desc *desc); struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc); -struct gpio_desc *gpiochip_get_desc(struct gpio_chip *chip, - u16 hwnum); - enum gpio_lookup_flags { GPIO_ACTIVE_HIGH = (0 << 0), GPIO_ACTIVE_LOW = (1 << 0), -- cgit v1.2.3 From d74be6dfea1b96cfb4bd79d9254fa9d21ed5f131 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Tue, 22 Jul 2014 16:17:42 +0900 Subject: gpio: remove gpiod_lock/unlock_as_irq() gpio_lock/unlock_as_irq() are working with (chip, offset) arguments and are thus not using the old integer namespace. Therefore, there is no reason to have gpiod variants of these functions working with descriptors, especially since the (chip, offset) tuple is more suitable to the users of these functions (GPIO drivers, whereas GPIO descriptors are targeted at GPIO consumers). Signed-off-by: Alexandre Courbot Signed-off-by: Linus Walleij --- Documentation/gpio/driver.txt | 4 ++-- drivers/gpio/gpiolib-acpi.c | 6 +++--- drivers/gpio/gpiolib-legacy.c | 12 ------------ drivers/gpio/gpiolib-sysfs.c | 4 ++-- drivers/gpio/gpiolib.c | 30 ++++++++++++++++-------------- include/asm-generic/gpio.h | 3 --- include/linux/gpio/driver.h | 4 ++-- 7 files changed, 25 insertions(+), 38 deletions(-) (limited to 'include/linux') diff --git a/Documentation/gpio/driver.txt b/Documentation/gpio/driver.txt index fa9a0a8b3734..224dbbcd1804 100644 --- a/Documentation/gpio/driver.txt +++ b/Documentation/gpio/driver.txt @@ -157,12 +157,12 @@ Locking IRQ usage Input GPIOs can be used as IRQ signals. When this happens, a driver is requested to mark the GPIO as being used as an IRQ: - int gpiod_lock_as_irq(struct gpio_desc *desc) + int gpio_lock_as_irq(struct gpio_chip *chip, unsigned int offset) This will prevent the use of non-irq related GPIO APIs until the GPIO IRQ lock is released: - void gpiod_unlock_as_irq(struct gpio_desc *desc) + void gpio_unlock_as_irq(struct gpio_chip *chip, unsigned int offset) When implementing an irqchip inside a GPIO driver, these two functions should typically be called in the .startup() and .shutdown() callbacks from the diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index 4a987917c186..d2e8600df02c 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -157,7 +157,7 @@ static acpi_status acpi_gpiochip_request_interrupt(struct acpi_resource *ares, gpiod_direction_input(desc); - ret = gpiod_lock_as_irq(desc); + ret = gpio_lock_as_irq(chip, pin); if (ret) { dev_err(chip->dev, "Failed to lock GPIO as interrupt\n"); goto fail_free_desc; @@ -212,7 +212,7 @@ static acpi_status acpi_gpiochip_request_interrupt(struct acpi_resource *ares, fail_free_event: kfree(event); fail_unlock_irq: - gpiod_unlock_as_irq(desc); + gpio_unlock_as_irq(chip, pin); fail_free_desc: gpiochip_free_own_desc(desc); @@ -263,7 +263,7 @@ static void acpi_gpiochip_free_interrupts(struct acpi_gpio_chip *acpi_gpio) desc = gpiochip_get_desc(chip, event->pin); if (WARN_ON(IS_ERR(desc))) continue; - gpiod_unlock_as_irq(desc); + gpio_unlock_as_irq(chip, event->pin); gpiochip_free_own_desc(desc); list_del(&event->node); kfree(event); diff --git a/drivers/gpio/gpiolib-legacy.c b/drivers/gpio/gpiolib-legacy.c index c684d94cdbb4..078ae6c2df79 100644 --- a/drivers/gpio/gpiolib-legacy.c +++ b/drivers/gpio/gpiolib-legacy.c @@ -100,15 +100,3 @@ void gpio_free_array(const struct gpio *array, size_t num) gpio_free((array++)->gpio); } EXPORT_SYMBOL_GPL(gpio_free_array); - -int gpio_lock_as_irq(struct gpio_chip *chip, unsigned int offset) -{ - return gpiod_lock_as_irq(gpiochip_get_desc(chip, offset)); -} -EXPORT_SYMBOL_GPL(gpio_lock_as_irq); - -void gpio_unlock_as_irq(struct gpio_chip *chip, unsigned int offset) -{ - return gpiod_unlock_as_irq(gpiochip_get_desc(chip, offset)); -} -EXPORT_SYMBOL_GPL(gpio_unlock_as_irq); diff --git a/drivers/gpio/gpiolib-sysfs.c b/drivers/gpio/gpiolib-sysfs.c index f150aa288fa1..be45a9283c28 100644 --- a/drivers/gpio/gpiolib-sysfs.c +++ b/drivers/gpio/gpiolib-sysfs.c @@ -161,7 +161,7 @@ static int gpio_setup_irq(struct gpio_desc *desc, struct device *dev, desc->flags &= ~GPIO_TRIGGER_MASK; if (!gpio_flags) { - gpiod_unlock_as_irq(desc); + gpio_unlock_as_irq(desc->chip, gpio_chip_hwgpio(desc)); ret = 0; goto free_id; } @@ -200,7 +200,7 @@ static int gpio_setup_irq(struct gpio_desc *desc, struct device *dev, if (ret < 0) goto free_id; - ret = gpiod_lock_as_irq(desc); + ret = gpio_lock_as_irq(desc->chip, gpio_chip_hwgpio(desc)); if (ret < 0) { gpiod_warn(desc, "failed to flag the GPIO for IRQ\n"); goto free_id; diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 38d176e31379..7582207c92e7 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1428,44 +1428,46 @@ int gpiod_to_irq(const struct gpio_desc *desc) EXPORT_SYMBOL_GPL(gpiod_to_irq); /** - * gpiod_lock_as_irq() - lock a GPIO to be used as IRQ - * @gpio: the GPIO line to lock as used for IRQ + * gpio_lock_as_irq() - lock a GPIO to be used as IRQ + * @chip: the chip the GPIO to lock belongs to + * @offset: the offset of the GPIO to lock as IRQ * * This is used directly by GPIO drivers that want to lock down * a certain GPIO line to be used for IRQs. */ -int gpiod_lock_as_irq(struct gpio_desc *desc) +int gpio_lock_as_irq(struct gpio_chip *chip, unsigned int offset) { - if (!desc) + if (offset >= chip->ngpio) return -EINVAL; - if (test_bit(FLAG_IS_OUT, &desc->flags)) { - gpiod_err(desc, + if (test_bit(FLAG_IS_OUT, &chip->desc[offset].flags)) { + chip_err(chip, "%s: tried to flag a GPIO set as output for IRQ\n", __func__); return -EIO; } - set_bit(FLAG_USED_AS_IRQ, &desc->flags); + set_bit(FLAG_USED_AS_IRQ, &chip->desc[offset].flags); return 0; } -EXPORT_SYMBOL_GPL(gpiod_lock_as_irq); +EXPORT_SYMBOL_GPL(gpio_lock_as_irq); /** - * gpiod_unlock_as_irq() - unlock a GPIO used as IRQ - * @gpio: the GPIO line to unlock from IRQ usage + * gpio_unlock_as_irq() - unlock a GPIO used as IRQ + * @chip: the chip the GPIO to lock belongs to + * @offset: the offset of the GPIO to lock as IRQ * * This is used directly by GPIO drivers that want to indicate * that a certain GPIO is no longer used exclusively for IRQ. */ -void gpiod_unlock_as_irq(struct gpio_desc *desc) +void gpio_unlock_as_irq(struct gpio_chip *chip, unsigned int offset) { - if (!desc) + if (offset >= chip->ngpio) return; - clear_bit(FLAG_USED_AS_IRQ, &desc->flags); + clear_bit(FLAG_USED_AS_IRQ, &chip->desc[offset].flags); } -EXPORT_SYMBOL_GPL(gpiod_unlock_as_irq); +EXPORT_SYMBOL_GPL(gpio_unlock_as_irq); /** * gpiod_get_raw_value_cansleep() - return a gpio's raw value diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h index 23e364538ab5..c1d4105e1c1d 100644 --- a/include/asm-generic/gpio.h +++ b/include/asm-generic/gpio.h @@ -110,9 +110,6 @@ static inline int __gpio_to_irq(unsigned gpio) return gpiod_to_irq(gpio_to_desc(gpio)); } -extern int gpio_lock_as_irq(struct gpio_chip *chip, unsigned int offset); -extern void gpio_unlock_as_irq(struct gpio_chip *chip, unsigned int offset); - extern int gpio_request_one(unsigned gpio, unsigned long flags, const char *label); extern int gpio_request_array(const struct gpio *array, size_t num); extern void gpio_free_array(const struct gpio *array, size_t num); diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 88f92dfae545..c66c91682d9e 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -146,8 +146,8 @@ extern struct gpio_chip *gpiochip_find(void *data, int (*match)(struct gpio_chip *chip, void *data)); /* lock/unlock as IRQ */ -int gpiod_lock_as_irq(struct gpio_desc *desc); -void gpiod_unlock_as_irq(struct gpio_desc *desc); +int gpio_lock_as_irq(struct gpio_chip *chip, unsigned int offset); +void gpio_unlock_as_irq(struct gpio_chip *chip, unsigned int offset); struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc); -- cgit v1.2.3 From d9bb5a43277d2dcc514fa693f741bbc38e2e2271 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Jul 2014 17:48:04 -0400 Subject: svcrdma: Double the default credit limit The RDMA credit limit controls how many concurrent RPCs are allowed per connection. An NFS/RDMA client and server exchange their credit limits in the RPC/RDMA headers. The Linux client and the Solaris client and server allow 32 credits. The Linux server allows only 16, which limits its performance. Set the server's default credit limit to 32, like the other well- known implementations, so the out-of-the-shrinkwrap performance of the Linux server is better. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_rdma.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 5cf99a016368..975da754c778 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -174,8 +174,7 @@ struct svcxprt_rdma { * page size of 4k, or 32k * 2 ops / 4k = 16 outstanding RDMA_READ. */ #define RPCRDMA_ORD (64/4) #define RPCRDMA_SQ_DEPTH_MULT 8 -#define RPCRDMA_MAX_THREADS 16 -#define RPCRDMA_MAX_REQUESTS 16 +#define RPCRDMA_MAX_REQUESTS 32 #define RPCRDMA_MAX_REQ_SIZE 4096 /* svc_rdma_marshal.c */ -- cgit v1.2.3 From 50a77c658b80e7e3303e3bcec195b30e2b62d513 Mon Sep 17 00:00:00 2001 From: Nick Dyer Date: Wed, 23 Jul 2014 12:38:48 -0700 Subject: Input: atmel_mxt_ts - download device config using firmware loader The existing implementation which encodes the configuration as a binary blob in platform data is unsatisfactory since it requires a kernel recompile for the configuration to be changed, and it doesn't deal well with firmware changes that move values around on the chip. Atmel define an ASCII format for the configuration which can be exported from their tools. This patch implements a parser for that format which loads the configuration via the firmware loader and sends it to the MXT chip. Signed-off-by: Nick Dyer Acked-by: Benson Leung Acked-by: Yufeng Shen Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/atmel_mxt_ts.c | 278 ++++++++++++++++++++++-------- drivers/platform/chrome/chromeos_laptop.c | 4 - include/linux/i2c/atmel_mxt_ts.h | 3 - 3 files changed, 204 insertions(+), 81 deletions(-) (limited to 'include/linux') diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c index 4317273c2138..c6b5e7dbd942 100644 --- a/drivers/input/touchscreen/atmel_mxt_ts.c +++ b/drivers/input/touchscreen/atmel_mxt_ts.c @@ -2,6 +2,7 @@ * Atmel maXTouch Touchscreen driver * * Copyright (C) 2010 Samsung Electronics Co.Ltd + * Copyright (C) 2011-2014 Atmel Corporation * Copyright (C) 2012 Google, Inc. * * Author: Joonyoung Shim @@ -30,8 +31,10 @@ #define MXT_VER_21 21 #define MXT_VER_22 22 -/* Firmware */ +/* Firmware files */ #define MXT_FW_NAME "maxtouch.fw" +#define MXT_CFG_NAME "maxtouch.cfg" +#define MXT_CFG_MAGIC "OBP_RAW V1" /* Registers */ #define MXT_INFO 0x00 @@ -298,37 +301,6 @@ static bool mxt_object_readable(unsigned int type) } } -static bool mxt_object_writable(unsigned int type) -{ - switch (type) { - case MXT_GEN_COMMAND_T6: - case MXT_GEN_POWER_T7: - case MXT_GEN_ACQUIRE_T8: - case MXT_TOUCH_MULTI_T9: - case MXT_TOUCH_KEYARRAY_T15: - case MXT_TOUCH_PROXIMITY_T23: - case MXT_TOUCH_PROXKEY_T52: - case MXT_PROCI_GRIPFACE_T20: - case MXT_PROCG_NOISE_T22: - case MXT_PROCI_ONETOUCH_T24: - case MXT_PROCI_TWOTOUCH_T27: - case MXT_PROCI_GRIP_T40: - case MXT_PROCI_PALM_T41: - case MXT_PROCI_TOUCHSUPPRESSION_T42: - case MXT_PROCI_STYLUS_T47: - case MXT_PROCG_NOISESUPPRESSION_T48: - case MXT_SPT_COMMSCONFIG_T18: - case MXT_SPT_GPIOPWM_T19: - case MXT_SPT_SELFTEST_T25: - case MXT_SPT_CTECONFIG_T28: - case MXT_SPT_DIGITIZER_T43: - case MXT_SPT_CTECONFIG_T46: - return true; - default: - return false; - } -} - static void mxt_dump_message(struct device *dev, struct mxt_message *message) { @@ -606,7 +578,7 @@ mxt_get_object(struct mxt_data *data, u8 type) return object; } - dev_err(&data->client->dev, "Invalid object type T%u\n", type); + dev_warn(&data->client->dev, "Invalid object type T%u\n", type); return NULL; } @@ -877,58 +849,197 @@ static void mxt_update_crc(struct mxt_data *data, u8 cmd, u8 value) mxt_wait_for_completion(data, &data->crc_completion, MXT_CRC_TIMEOUT); } -static int mxt_check_reg_init(struct mxt_data *data) +/* + * mxt_update_cfg - download configuration to chip + * + * Atmel Raw Config File Format + * + * The first four lines of the raw config file contain: + * 1) Version + * 2) Chip ID Information (first 7 bytes of device memory) + * 3) Chip Information Block 24-bit CRC Checksum + * 4) Chip Configuration 24-bit CRC Checksum + * + * The rest of the file consists of one line per object instance: + * + * + * - 2-byte object type as hex + * - 2-byte object instance number as hex + * - 2-byte object size as hex + * - array of 1-byte hex values + */ +static int mxt_update_cfg(struct mxt_data *data, const struct firmware *cfg) { - const struct mxt_platform_data *pdata = data->pdata; - struct mxt_object *object; struct device *dev = &data->client->dev; - int index = 0; - int i, size; + struct mxt_info cfg_info; + struct mxt_object *object; int ret; + int offset; + int pos; + int i; + u32 info_crc, config_crc; + unsigned int type, instance, size; + u8 val; + u16 reg; - if (!pdata->config) { - dev_dbg(dev, "No cfg data defined, skipping reg init\n"); - return 0; + mxt_update_crc(data, MXT_COMMAND_REPORTALL, 1); + + if (strncmp(cfg->data, MXT_CFG_MAGIC, strlen(MXT_CFG_MAGIC))) { + dev_err(dev, "Unrecognised config file\n"); + ret = -EINVAL; + goto release; } - mxt_update_crc(data, MXT_COMMAND_REPORTALL, 1); + pos = strlen(MXT_CFG_MAGIC); + + /* Load information block and check */ + for (i = 0; i < sizeof(struct mxt_info); i++) { + ret = sscanf(cfg->data + pos, "%hhx%n", + (unsigned char *)&cfg_info + i, + &offset); + if (ret != 1) { + dev_err(dev, "Bad format\n"); + ret = -EINVAL; + goto release; + } - if (data->config_crc == pdata->config_crc) { - dev_info(dev, "Config CRC 0x%06X: OK\n", data->config_crc); - return 0; + pos += offset; + } + + if (cfg_info.family_id != data->info.family_id) { + dev_err(dev, "Family ID mismatch!\n"); + ret = -EINVAL; + goto release; } - dev_info(dev, "Config CRC 0x%06X: does not match 0x%06X\n", - data->config_crc, pdata->config_crc); + if (cfg_info.variant_id != data->info.variant_id) { + dev_err(dev, "Variant ID mismatch!\n"); + ret = -EINVAL; + goto release; + } - for (i = 0; i < data->info.object_num; i++) { - object = data->object_table + i; + if (cfg_info.version != data->info.version) + dev_err(dev, "Warning: version mismatch!\n"); - if (!mxt_object_writable(object->type)) + if (cfg_info.build != data->info.build) + dev_err(dev, "Warning: build num mismatch!\n"); + + ret = sscanf(cfg->data + pos, "%x%n", &info_crc, &offset); + if (ret != 1) { + dev_err(dev, "Bad format: failed to parse Info CRC\n"); + ret = -EINVAL; + goto release; + } + pos += offset; + + /* Check config CRC */ + ret = sscanf(cfg->data + pos, "%x%n", &config_crc, &offset); + if (ret != 1) { + dev_err(dev, "Bad format: failed to parse Config CRC\n"); + ret = -EINVAL; + goto release; + } + pos += offset; + + if (data->config_crc == config_crc) { + dev_dbg(dev, "Config CRC 0x%06X: OK\n", config_crc); + ret = 0; + goto release; + } + + dev_info(dev, "Config CRC 0x%06X: does not match file 0x%06X\n", + data->config_crc, config_crc); + + while (pos < cfg->size) { + /* Read type, instance, length */ + ret = sscanf(cfg->data + pos, "%x %x %x%n", + &type, &instance, &size, &offset); + if (ret == 0) { + /* EOF */ + ret = 1; + goto release; + } else if (ret != 3) { + dev_err(dev, "Bad format: failed to parse object\n"); + ret = -EINVAL; + goto release; + } + pos += offset; + + object = mxt_get_object(data, type); + if (!object) { + /* Skip object */ + for (i = 0; i < size; i++) { + ret = sscanf(cfg->data + pos, "%hhx%n", + &val, + &offset); + pos += offset; + } continue; + } - size = mxt_obj_size(object) * mxt_obj_instances(object); - if (index + size > pdata->config_length) { - dev_err(dev, "Not enough config data!\n"); - return -EINVAL; + if (size > mxt_obj_size(object)) { + dev_err(dev, "Discarding %zu byte(s) in T%u\n", + size - mxt_obj_size(object), type); } - ret = __mxt_write_reg(data->client, object->start_address, - size, &pdata->config[index]); - if (ret) - return ret; - index += size; + if (instance >= mxt_obj_instances(object)) { + dev_err(dev, "Object instances exceeded!\n"); + ret = -EINVAL; + goto release; + } + + reg = object->start_address + mxt_obj_size(object) * instance; + + for (i = 0; i < size; i++) { + ret = sscanf(cfg->data + pos, "%hhx%n", + &val, + &offset); + if (ret != 1) { + dev_err(dev, "Bad format in T%d\n", type); + ret = -EINVAL; + goto release; + } + pos += offset; + + if (i > mxt_obj_size(object)) + continue; + + ret = mxt_write_reg(data->client, reg + i, val); + if (ret) + goto release; + + } + + /* + * If firmware is upgraded, new bytes may be added to end of + * objects. It is generally forward compatible to zero these + * bytes - previous behaviour will be retained. However + * this does invalidate the CRC and will force a config + * download every time until the configuration is updated. + */ + if (size < mxt_obj_size(object)) { + dev_info(dev, "Zeroing %zu byte(s) in T%d\n", + mxt_obj_size(object) - size, type); + + for (i = size + 1; i < mxt_obj_size(object); i++) { + ret = mxt_write_reg(data->client, reg + i, 0); + if (ret) + goto release; + } + } } mxt_update_crc(data, MXT_COMMAND_BACKUPNV, MXT_BACKUP_VALUE); ret = mxt_soft_reset(data); if (ret) - return ret; + goto release; dev_info(dev, "Config successfully updated\n"); - return 0; +release: + release_firmware(cfg); + return ret; } static int mxt_make_highchg(struct mxt_data *data) @@ -1204,10 +1315,17 @@ err_free_mem: return error; } +static int mxt_configure_objects(struct mxt_data *data, + const struct firmware *cfg); + +static void mxt_config_cb(const struct firmware *cfg, void *ctx) +{ + mxt_configure_objects(ctx, cfg); +} + static int mxt_initialize(struct mxt_data *data) { struct i2c_client *client = data->client; - struct mxt_info *info = &data->info; int error; error = mxt_get_info(data); @@ -1225,28 +1343,40 @@ static int mxt_initialize(struct mxt_data *data) if (error) goto err_free_object_table; - /* Check register init values */ - error = mxt_check_reg_init(data); - if (error) { - dev_err(&client->dev, "Error %d initializing configuration\n", - error); - goto err_free_object_table; + request_firmware_nowait(THIS_MODULE, true, MXT_CFG_NAME, + &data->client->dev, GFP_KERNEL, data, + mxt_config_cb); + + return 0; + +err_free_object_table: + mxt_free_object_table(data); + return error; +} + +static int mxt_configure_objects(struct mxt_data *data, + const struct firmware *cfg) +{ + struct device *dev = &data->client->dev; + struct mxt_info *info = &data->info; + int error; + + if (cfg) { + error = mxt_update_cfg(data, cfg); + if (error) + dev_warn(dev, "Error %d updating config\n", error); } error = mxt_initialize_t9_input_device(data); if (error) - goto err_free_object_table; + return error; - dev_info(&client->dev, + dev_info(dev, "Family: %u Variant: %u Firmware V%u.%u.%02X Objects: %u\n", info->family_id, info->variant_id, info->version >> 4, info->version & 0xf, info->build, info->object_num); return 0; - -err_free_object_table: - mxt_free_object_table(data); - return error; } /* Firmware Version is returned as Major.Minor.Build */ diff --git a/drivers/platform/chrome/chromeos_laptop.c b/drivers/platform/chrome/chromeos_laptop.c index 7f1a2e2711bd..67b316b2a2bd 100644 --- a/drivers/platform/chrome/chromeos_laptop.c +++ b/drivers/platform/chrome/chromeos_laptop.c @@ -97,8 +97,6 @@ static struct mxt_platform_data atmel_224s_tp_platform_data = { .irqflags = IRQF_TRIGGER_FALLING, .t19_num_keys = ARRAY_SIZE(mxt_t19_keys), .t19_keymap = mxt_t19_keys, - .config = NULL, - .config_length = 0, }; static struct i2c_board_info atmel_224s_tp_device = { @@ -109,8 +107,6 @@ static struct i2c_board_info atmel_224s_tp_device = { static struct mxt_platform_data atmel_1664s_platform_data = { .irqflags = IRQF_TRIGGER_FALLING, - .config = NULL, - .config_length = 0, }; static struct i2c_board_info atmel_1664s_device = { diff --git a/include/linux/i2c/atmel_mxt_ts.h b/include/linux/i2c/atmel_mxt_ts.h index 3891dc1de21c..02bf6ea31701 100644 --- a/include/linux/i2c/atmel_mxt_ts.h +++ b/include/linux/i2c/atmel_mxt_ts.h @@ -17,9 +17,6 @@ /* The platform data for the Atmel maXTouch touchscreen driver */ struct mxt_platform_data { - const u8 *config; - size_t config_length; - u32 config_crc; unsigned long irqflags; u8 t19_num_keys; const unsigned int *t19_keymap; -- cgit v1.2.3 From 8a2b22a2595bf89d4396530edf8388159fad9d83 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Wed, 23 Jul 2014 17:05:06 -0600 Subject: of: Make devicetree sysfs update functions consistent. All of the DT modification functions are split into two parts, the first part manipulates the DT data structure, and the second part updates sysfs, but the code isn't very consistent about how the second half is called. They don't all enforce the same rules about when it is valid to update sysfs, and there isn't any clarity on locking. The transactional DT modification feature that is coming also needs access to these functions so that it can perform all the structure changes together, and then all the sysfs updates as a second stage instead of doing each one at a time. Fix up the second have by creating a separate __of_*_sysfs() function for each of the helpers. The new functions have consistent naming (ie. of_node_add() becomes __of_attach_node_sysfs()) and all of them now defer if of_init hasn't been called yet. Callers of the new functions must hold the of_mutex to ensure there are no race conditions with of_init(). The mutex ensures that there will only ever be one writer to the tree at any given time. There can still be any number of readers and the raw_spin_lock is still used to make sure access to the data structure is still consistent. Finally, put the function prototypes into of_private.h so they are accessible to the transaction code. Signed-off-by: Pantelis Antoniou [grant.likely: Changed suffix from _post to _sysfs to match existing code] [grant.likely: Reorganized to eliminate trivial wrappers] Signed-off-by: Grant Likely --- drivers/of/base.c | 96 +++++++++++++++++++++++++------------------------ drivers/of/dynamic.c | 12 +++++-- drivers/of/of_private.h | 10 ++++++ include/linux/of.h | 2 -- 4 files changed, 69 insertions(+), 51 deletions(-) (limited to 'include/linux') diff --git a/drivers/of/base.c b/drivers/of/base.c index b403f9d98461..ad4929cbd876 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -37,10 +37,13 @@ struct device_node *of_chosen; struct device_node *of_aliases; static struct device_node *of_stdout; -static struct kset *of_kset; +struct kset *of_kset; /* - * Used to protect the of_aliases, to hold off addition of nodes to sysfs + * Used to protect the of_aliases, to hold off addition of nodes to sysfs. + * This mutex must be held whenever modifications are being made to the + * device tree. The of_{attach,detach}_node() and + * of_{add,remove,update}_property() helpers make sure this happens. */ DEFINE_MUTEX(of_mutex); @@ -127,13 +130,16 @@ static const char *safe_name(struct kobject *kobj, const char *orig_name) return name; } -static int __of_add_property_sysfs(struct device_node *np, struct property *pp) +int __of_add_property_sysfs(struct device_node *np, struct property *pp) { int rc; /* Important: Don't leak passwords */ bool secure = strncmp(pp->name, "security-", 9) == 0; + if (!of_kset || !of_node_is_attached(np)) + return 0; + sysfs_bin_attr_init(&pp->attr); pp->attr.attr.name = safe_name(&np->kobj, pp->name); pp->attr.attr.mode = secure ? S_IRUSR : S_IRUGO; @@ -145,12 +151,15 @@ static int __of_add_property_sysfs(struct device_node *np, struct property *pp) return rc; } -static int __of_node_add(struct device_node *np) +int __of_attach_node_sysfs(struct device_node *np) { const char *name; struct property *pp; int rc; + if (!of_kset) + return 0; + np->kobj.kset = of_kset; if (!np->parent) { /* Nodes without parents are new top level trees */ @@ -172,26 +181,6 @@ static int __of_node_add(struct device_node *np) return 0; } -int of_node_add(struct device_node *np) -{ - int rc = 0; - - BUG_ON(!of_node_is_initialized(np)); - - /* - * Grab the mutex here so that in a race condition between of_init() and - * of_node_add(), node addition will still be consistent. - */ - mutex_lock(&of_mutex); - if (of_kset) - rc = __of_node_add(np); - else - /* This scenario may be perfectly valid, but report it anyway */ - pr_info("of_node_add(%s) before of_init()\n", np->full_name); - mutex_unlock(&of_mutex); - return rc; -} - static int __init of_init(void) { struct device_node *np; @@ -204,7 +193,7 @@ static int __init of_init(void) return -ENOMEM; } for_each_of_allnodes(np) - __of_node_add(np); + __of_attach_node_sysfs(np); mutex_unlock(&of_mutex); /* Symlink in /proc as required by userspace ABI */ @@ -1689,15 +1678,17 @@ int of_add_property(struct device_node *np, struct property *prop) if (rc) return rc; + mutex_lock(&of_mutex); + raw_spin_lock_irqsave(&devtree_lock, flags); rc = __of_add_property(np, prop); raw_spin_unlock_irqrestore(&devtree_lock, flags); - if (rc) - return rc; - if (of_node_is_attached(np)) + if (!rc) __of_add_property_sysfs(np, prop); + mutex_unlock(&of_mutex); + return rc; } @@ -1720,6 +1711,13 @@ int __of_remove_property(struct device_node *np, struct property *prop) return 0; } +void __of_remove_property_sysfs(struct device_node *np, struct property *prop) +{ + /* at early boot, bail here and defer setup to of_init() */ + if (of_kset && of_node_is_attached(np)) + sysfs_remove_bin_file(&np->kobj, &prop->attr); +} + /** * of_remove_property - Remove a property from a node. * @@ -1737,20 +1735,18 @@ int of_remove_property(struct device_node *np, struct property *prop) if (rc) return rc; + mutex_lock(&of_mutex); + raw_spin_lock_irqsave(&devtree_lock, flags); rc = __of_remove_property(np, prop); raw_spin_unlock_irqrestore(&devtree_lock, flags); - if (rc) - return rc; - - /* at early boot, bail hear and defer setup to of_init() */ - if (!of_kset) - return 0; + if (!rc) + __of_remove_property_sysfs(np, prop); - sysfs_remove_bin_file(&np->kobj, &prop->attr); + mutex_unlock(&of_mutex); - return 0; + return rc; } int __of_update_property(struct device_node *np, struct property *newprop, @@ -1779,6 +1775,18 @@ int __of_update_property(struct device_node *np, struct property *newprop, return 0; } +void __of_update_property_sysfs(struct device_node *np, struct property *newprop, + struct property *oldprop) +{ + /* At early boot, bail out and defer setup to of_init() */ + if (!of_kset) + return; + + if (oldprop) + sysfs_remove_bin_file(&np->kobj, &oldprop->attr); + __of_add_property_sysfs(np, newprop); +} + /* * of_update_property - Update a property in a node, if the property does * not exist, add it. @@ -1801,22 +1809,18 @@ int of_update_property(struct device_node *np, struct property *newprop) if (rc) return rc; + mutex_lock(&of_mutex); + raw_spin_lock_irqsave(&devtree_lock, flags); rc = __of_update_property(np, newprop, &oldprop); raw_spin_unlock_irqrestore(&devtree_lock, flags); - if (rc) - return rc; - /* At early boot, bail out and defer setup to of_init() */ - if (!of_kset) - return 0; + if (!rc) + __of_update_property_sysfs(np, newprop, oldprop); - /* Update the sysfs attribute */ - if (oldprop) - sysfs_remove_bin_file(&np->kobj, &oldprop->attr); - __of_add_property_sysfs(np, newprop); + mutex_unlock(&of_mutex); - return 0; + return rc; } static void of_alias_add(struct alias_prop *ap, struct device_node *np, diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c index 75fcc66fcefd..c875787fa394 100644 --- a/drivers/of/dynamic.c +++ b/drivers/of/dynamic.c @@ -41,11 +41,13 @@ void of_node_put(struct device_node *node) } EXPORT_SYMBOL(of_node_put); -static void of_node_remove(struct device_node *np) +void __of_detach_node_sysfs(struct device_node *np) { struct property *pp; BUG_ON(!of_node_is_initialized(np)); + if (!of_kset) + return; /* only remove properties if on sysfs */ if (of_node_is_attached(np)) { @@ -115,11 +117,13 @@ int of_attach_node(struct device_node *np) if (rc) return rc; + mutex_lock(&of_mutex); raw_spin_lock_irqsave(&devtree_lock, flags); __of_attach_node(np); raw_spin_unlock_irqrestore(&devtree_lock, flags); - of_node_add(np); + __of_attach_node_sysfs(np); + mutex_unlock(&of_mutex); return 0; } @@ -174,11 +178,13 @@ int of_detach_node(struct device_node *np) if (rc) return rc; + mutex_lock(&of_mutex); raw_spin_lock_irqsave(&devtree_lock, flags); __of_detach_node(np); raw_spin_unlock_irqrestore(&devtree_lock, flags); - of_node_remove(np); + __of_detach_node_sysfs(np); + mutex_unlock(&of_mutex); return rc; } diff --git a/drivers/of/of_private.h b/drivers/of/of_private.h index 0f6089722af9..0d99ba8caeed 100644 --- a/drivers/of/of_private.h +++ b/drivers/of/of_private.h @@ -33,6 +33,8 @@ struct alias_prop { extern struct mutex of_mutex; extern struct list_head aliases_lookup; +extern struct kset *of_kset; + static inline struct device_node *kobj_to_device_node(struct kobject *kobj) { @@ -62,11 +64,19 @@ struct property *__of_prop_dup(const struct property *prop, gfp_t allocflags); struct device_node *__of_node_alloc(const char *full_name, gfp_t allocflags); extern int __of_add_property(struct device_node *np, struct property *prop); +extern int __of_add_property_sysfs(struct device_node *np, + struct property *prop); extern int __of_remove_property(struct device_node *np, struct property *prop); +extern void __of_remove_property_sysfs(struct device_node *np, + struct property *prop); extern int __of_update_property(struct device_node *np, struct property *newprop, struct property **oldprop); +extern void __of_update_property_sysfs(struct device_node *np, + struct property *newprop, struct property *oldprop); extern void __of_attach_node(struct device_node *np); +extern int __of_attach_node_sysfs(struct device_node *np); extern void __of_detach_node(struct device_node *np); +extern void __of_detach_node_sysfs(struct device_node *np); #endif /* _LINUX_OF_PRIVATE_H */ diff --git a/include/linux/of.h b/include/linux/of.h index abf829a1f150..705fa12fca7f 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -74,8 +74,6 @@ struct of_phandle_args { uint32_t args[MAX_PHANDLE_ARGS]; }; -extern int of_node_add(struct device_node *node); - /* initialize a node */ extern struct kobj_type of_node_ktype; static inline void of_node_init(struct device_node *node) -- cgit v1.2.3 From 259092a35c7e11f1d4616b0f5b3ba7b851fe4fa6 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Wed, 16 Jul 2014 12:48:23 -0600 Subject: of: Reorder device tree changes and notifiers Currently, devicetree reconfig notifiers get emitted before the change is applied to the tree, but that behaviour is problematic if the receiver wants the determine the new state of the tree. The current users don't care, but the changeset code to follow will be making multiple changes at once. Reorder notifiers to get emitted after the change has been applied to the tree so that callbacks see the new tree state. At the same time, fixup the existing callbacks to expect the new order. There are a few callbacks that compare the old and new values of a changed property. Put both property pointers into the of_prop_reconfig structure. The current notifiers also allow the notifier callback to fail and cancel the change to the tree, but that feature isn't actually used. It really isn't valid to ignore a tree modification provided by firmware anyway, so remove the ability to cancel a change to the tree. Signed-off-by: Grant Likely Cc: Nathan Fontenot --- arch/powerpc/platforms/pseries/hotplug-memory.c | 2 +- drivers/crypto/nx/nx-842.c | 30 +++++++------------------ drivers/of/base.c | 21 ++++++++--------- drivers/of/dynamic.c | 18 +++++++-------- drivers/of/of_private.h | 4 ++-- include/linux/of.h | 1 + 6 files changed, 29 insertions(+), 47 deletions(-) (limited to 'include/linux') diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 7995135170a3..ac01e188faef 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -194,7 +194,7 @@ static int pseries_update_drconf_memory(struct of_prop_reconfig *pr) if (!memblock_size) return -EINVAL; - p = (u32 *)of_get_property(pr->dn, "ibm,dynamic-memory", NULL); + p = (u32 *) pr->old_prop->value; if (!p) return -EINVAL; diff --git a/drivers/crypto/nx/nx-842.c b/drivers/crypto/nx/nx-842.c index 502edf0a2933..c897c3a5ee17 100644 --- a/drivers/crypto/nx/nx-842.c +++ b/drivers/crypto/nx/nx-842.c @@ -936,28 +936,14 @@ static int nx842_OF_upd(struct property *new_prop) goto error_out; } - /* Set ptr to new property if provided */ - if (new_prop) { - /* Single property */ - if (!strncmp(new_prop->name, "status", new_prop->length)) { - status = new_prop; - - } else if (!strncmp(new_prop->name, "ibm,max-sg-len", - new_prop->length)) { - maxsglen = new_prop; - - } else if (!strncmp(new_prop->name, "ibm,max-sync-cop", - new_prop->length)) { - maxsyncop = new_prop; - - } else { - /* - * Skip the update, the property being updated - * has no impact. - */ - goto out; - } - } + /* + * If this is a property update, there are only certain properties that + * we care about. Bail if it isn't in the below list + */ + if (new_prop && (strncmp(new_prop->name, "status", new_prop->length) || + strncmp(new_prop->name, "ibm,max-sg-len", new_prop->length) || + strncmp(new_prop->name, "ibm,max-sync-cop", new_prop->length))) + goto out; /* Perform property updates */ ret = nx842_OF_upd_status(new_devdata, status); diff --git a/drivers/of/base.c b/drivers/of/base.c index ededf8e33145..a7ad1013edfa 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -1674,10 +1674,6 @@ int of_add_property(struct device_node *np, struct property *prop) unsigned long flags; int rc; - rc = of_property_notify(OF_RECONFIG_ADD_PROPERTY, np, prop); - if (rc) - return rc; - mutex_lock(&of_mutex); raw_spin_lock_irqsave(&devtree_lock, flags); @@ -1689,6 +1685,9 @@ int of_add_property(struct device_node *np, struct property *prop) mutex_unlock(&of_mutex); + if (!rc) + of_property_notify(OF_RECONFIG_ADD_PROPERTY, np, prop, NULL); + return rc; } @@ -1731,10 +1730,6 @@ int of_remove_property(struct device_node *np, struct property *prop) unsigned long flags; int rc; - rc = of_property_notify(OF_RECONFIG_REMOVE_PROPERTY, np, prop); - if (rc) - return rc; - mutex_lock(&of_mutex); raw_spin_lock_irqsave(&devtree_lock, flags); @@ -1746,6 +1741,9 @@ int of_remove_property(struct device_node *np, struct property *prop) mutex_unlock(&of_mutex); + if (!rc) + of_property_notify(OF_RECONFIG_REMOVE_PROPERTY, np, prop, NULL); + return rc; } @@ -1805,10 +1803,6 @@ int of_update_property(struct device_node *np, struct property *newprop) if (!newprop->name) return -EINVAL; - rc = of_property_notify(OF_RECONFIG_UPDATE_PROPERTY, np, newprop); - if (rc) - return rc; - mutex_lock(&of_mutex); raw_spin_lock_irqsave(&devtree_lock, flags); @@ -1820,6 +1814,9 @@ int of_update_property(struct device_node *np, struct property *newprop) mutex_unlock(&of_mutex); + if (!rc) + of_property_notify(OF_RECONFIG_UPDATE_PROPERTY, np, newprop, oldprop); + return rc; } diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c index 7c020b9a3317..7bd5501736a6 100644 --- a/drivers/of/dynamic.c +++ b/drivers/of/dynamic.c @@ -83,7 +83,7 @@ int of_reconfig_notify(unsigned long action, void *p) } int of_property_notify(int action, struct device_node *np, - struct property *prop) + struct property *prop, struct property *oldprop) { struct of_prop_reconfig pr; @@ -93,6 +93,7 @@ int of_property_notify(int action, struct device_node *np, pr.dn = np; pr.prop = prop; + pr.old_prop = oldprop; return of_reconfig_notify(action, &pr); } @@ -125,11 +126,6 @@ void __of_attach_node(struct device_node *np) int of_attach_node(struct device_node *np) { unsigned long flags; - int rc; - - rc = of_reconfig_notify(OF_RECONFIG_ATTACH_NODE, np); - if (rc) - return rc; mutex_lock(&of_mutex); raw_spin_lock_irqsave(&devtree_lock, flags); @@ -138,6 +134,9 @@ int of_attach_node(struct device_node *np) __of_attach_node_sysfs(np); mutex_unlock(&of_mutex); + + of_reconfig_notify(OF_RECONFIG_ATTACH_NODE, np); + return 0; } @@ -188,10 +187,6 @@ int of_detach_node(struct device_node *np) unsigned long flags; int rc = 0; - rc = of_reconfig_notify(OF_RECONFIG_DETACH_NODE, np); - if (rc) - return rc; - mutex_lock(&of_mutex); raw_spin_lock_irqsave(&devtree_lock, flags); __of_detach_node(np); @@ -199,6 +194,9 @@ int of_detach_node(struct device_node *np) __of_detach_node_sysfs(np); mutex_unlock(&of_mutex); + + of_reconfig_notify(OF_RECONFIG_DETACH_NODE, np); + return rc; } diff --git a/drivers/of/of_private.h b/drivers/of/of_private.h index 8129c0e58d70..f69ccb1fa308 100644 --- a/drivers/of/of_private.h +++ b/drivers/of/of_private.h @@ -43,11 +43,11 @@ static inline struct device_node *kobj_to_device_node(struct kobject *kobj) #if defined(CONFIG_OF_DYNAMIC) extern int of_property_notify(int action, struct device_node *np, - struct property *prop); + struct property *prop, struct property *old_prop); extern void of_node_release(struct kobject *kobj); #else /* CONFIG_OF_DYNAMIC */ static inline int of_property_notify(int action, struct device_node *np, - struct property *prop) + struct property *prop, struct property *old_prop) { return 0; } diff --git a/include/linux/of.h b/include/linux/of.h index 705fa12fca7f..400f18cb4fff 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -321,6 +321,7 @@ extern int of_update_property(struct device_node *np, struct property *newprop); struct of_prop_reconfig { struct device_node *dn; struct property *prop; + struct property *old_prop; }; extern int of_reconfig_notifier_register(struct notifier_block *); -- cgit v1.2.3 From 201c910bd6898d81d4ac6685d0f421b7e10f3c5d Mon Sep 17 00:00:00 2001 From: Pantelis Antoniou Date: Fri, 4 Jul 2014 19:58:49 +0300 Subject: of: Transactional DT support. Introducing DT transactional support. A DT transaction is a method which allows one to apply changes in the live tree, in such a way that either the full set of changes take effect, or the state of the tree can be rolled-back to the state it was before it was attempted. An applied transaction can be rolled-back at any time. Documentation is in Documentation/devicetree/changesets.txt Signed-off-by: Pantelis Antoniou [glikely: Removed device notifiers and reworked to be more consistent] Signed-off-by: Grant Likely --- Documentation/devicetree/changesets.txt | 40 ++++ drivers/of/dynamic.c | 344 ++++++++++++++++++++++++++++++++ drivers/of/of_private.h | 9 + drivers/of/selftest.c | 51 +++++ drivers/of/testcase-data/testcases.dtsi | 10 + include/linux/of.h | 76 +++++++ 6 files changed, 530 insertions(+) create mode 100644 Documentation/devicetree/changesets.txt (limited to 'include/linux') diff --git a/Documentation/devicetree/changesets.txt b/Documentation/devicetree/changesets.txt new file mode 100644 index 000000000000..935ba5acc34e --- /dev/null +++ b/Documentation/devicetree/changesets.txt @@ -0,0 +1,40 @@ +A DT changeset is a method which allows one to apply changes +in the live tree in such a way that either the full set of changes +will be applied, or none of them will be. If an error occurs partway +through applying the changeset, then the tree will be rolled back to the +previous state. A changeset can also be removed after it has been +applied. + +When a changeset is applied, all of the changes get applied to the tree +at once before emitting OF_RECONFIG notifiers. This is so that the +receiver sees a complete and consistent state of the tree when it +receives the notifier. + +The sequence of a changeset is as follows. + +1. of_changeset_init() - initializes a changeset + +2. A number of DT tree change calls, of_changeset_attach_node(), +of_changeset_detach_node(), of_changeset_add_property(), +of_changeset_remove_property, of_changeset_update_property() to prepare +a set of changes. No changes to the active tree are made at this point. +All the change operations are recorded in the of_changeset 'entries' +list. + +3. mutex_lock(of_mutex) - starts a changeset; The global of_mutex +ensures there can only be one editor at a time. + +4. of_changeset_apply() - Apply the changes to the tree. Either the +entire changeset will get applied, or if there is an error the tree will +be restored to the previous state + +5. mutex_unlock(of_mutex) - All operations complete, release the mutex + +If a successfully applied changeset needs to be removed, it can be done +with the following sequence. + +1. mutex_lock(of_mutex) + +2. of_changeset_revert() + +3. mutex_unlock(of_mutex) diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c index 7bd5501736a6..c1002b7be786 100644 --- a/drivers/of/dynamic.c +++ b/drivers/of/dynamic.c @@ -314,3 +314,347 @@ struct device_node *__of_node_alloc(const char *full_name, gfp_t allocflags) kfree(node); return NULL; } + +static void __of_changeset_entry_destroy(struct of_changeset_entry *ce) +{ + of_node_put(ce->np); + list_del(&ce->node); + kfree(ce); +} + +#ifdef DEBUG +static void __of_changeset_entry_dump(struct of_changeset_entry *ce) +{ + switch (ce->action) { + case OF_RECONFIG_ADD_PROPERTY: + pr_debug("%p: %s %s/%s\n", + ce, "ADD_PROPERTY ", ce->np->full_name, + ce->prop->name); + break; + case OF_RECONFIG_REMOVE_PROPERTY: + pr_debug("%p: %s %s/%s\n", + ce, "REMOVE_PROPERTY", ce->np->full_name, + ce->prop->name); + break; + case OF_RECONFIG_UPDATE_PROPERTY: + pr_debug("%p: %s %s/%s\n", + ce, "UPDATE_PROPERTY", ce->np->full_name, + ce->prop->name); + break; + case OF_RECONFIG_ATTACH_NODE: + pr_debug("%p: %s %s\n", + ce, "ATTACH_NODE ", ce->np->full_name); + break; + case OF_RECONFIG_DETACH_NODE: + pr_debug("%p: %s %s\n", + ce, "DETACH_NODE ", ce->np->full_name); + break; + } +} +#else +static inline void __of_changeset_entry_dump(struct of_changeset_entry *ce) +{ + /* empty */ +} +#endif + +static void __of_changeset_entry_invert(struct of_changeset_entry *ce, + struct of_changeset_entry *rce) +{ + memcpy(rce, ce, sizeof(*rce)); + + switch (ce->action) { + case OF_RECONFIG_ATTACH_NODE: + rce->action = OF_RECONFIG_DETACH_NODE; + break; + case OF_RECONFIG_DETACH_NODE: + rce->action = OF_RECONFIG_ATTACH_NODE; + break; + case OF_RECONFIG_ADD_PROPERTY: + rce->action = OF_RECONFIG_REMOVE_PROPERTY; + break; + case OF_RECONFIG_REMOVE_PROPERTY: + rce->action = OF_RECONFIG_ADD_PROPERTY; + break; + case OF_RECONFIG_UPDATE_PROPERTY: + rce->old_prop = ce->prop; + rce->prop = ce->old_prop; + break; + } +} + +static void __of_changeset_entry_notify(struct of_changeset_entry *ce, bool revert) +{ + struct of_changeset_entry ce_inverted; + int ret; + + if (revert) { + __of_changeset_entry_invert(ce, &ce_inverted); + ce = &ce_inverted; + } + + switch (ce->action) { + case OF_RECONFIG_ATTACH_NODE: + case OF_RECONFIG_DETACH_NODE: + ret = of_reconfig_notify(ce->action, ce->np); + break; + case OF_RECONFIG_ADD_PROPERTY: + case OF_RECONFIG_REMOVE_PROPERTY: + case OF_RECONFIG_UPDATE_PROPERTY: + ret = of_property_notify(ce->action, ce->np, ce->prop, ce->old_prop); + break; + default: + pr_err("%s: invalid devicetree changeset action: %i\n", __func__, + (int)ce->action); + return; + } + + if (ret) + pr_err("%s: notifier error @%s\n", __func__, ce->np->full_name); +} + +static int __of_changeset_entry_apply(struct of_changeset_entry *ce) +{ + struct property *old_prop, **propp; + unsigned long flags; + int ret = 0; + + __of_changeset_entry_dump(ce); + + raw_spin_lock_irqsave(&devtree_lock, flags); + switch (ce->action) { + case OF_RECONFIG_ATTACH_NODE: + __of_attach_node(ce->np); + break; + case OF_RECONFIG_DETACH_NODE: + __of_detach_node(ce->np); + break; + case OF_RECONFIG_ADD_PROPERTY: + /* If the property is in deadprops then it must be removed */ + for (propp = &ce->np->deadprops; *propp; propp = &(*propp)->next) { + if (*propp == ce->prop) { + *propp = ce->prop->next; + ce->prop->next = NULL; + break; + } + } + + ret = __of_add_property(ce->np, ce->prop); + if (ret) { + pr_err("%s: add_property failed @%s/%s\n", + __func__, ce->np->full_name, + ce->prop->name); + break; + } + break; + case OF_RECONFIG_REMOVE_PROPERTY: + ret = __of_remove_property(ce->np, ce->prop); + if (ret) { + pr_err("%s: remove_property failed @%s/%s\n", + __func__, ce->np->full_name, + ce->prop->name); + break; + } + break; + + case OF_RECONFIG_UPDATE_PROPERTY: + /* If the property is in deadprops then it must be removed */ + for (propp = &ce->np->deadprops; *propp; propp = &(*propp)->next) { + if (*propp == ce->prop) { + *propp = ce->prop->next; + ce->prop->next = NULL; + break; + } + } + + ret = __of_update_property(ce->np, ce->prop, &old_prop); + if (ret) { + pr_err("%s: update_property failed @%s/%s\n", + __func__, ce->np->full_name, + ce->prop->name); + break; + } + break; + default: + ret = -EINVAL; + } + raw_spin_unlock_irqrestore(&devtree_lock, flags); + + if (ret) + return ret; + + switch (ce->action) { + case OF_RECONFIG_ATTACH_NODE: + __of_attach_node_sysfs(ce->np); + break; + case OF_RECONFIG_DETACH_NODE: + __of_detach_node_sysfs(ce->np); + break; + case OF_RECONFIG_ADD_PROPERTY: + /* ignore duplicate names */ + __of_add_property_sysfs(ce->np, ce->prop); + break; + case OF_RECONFIG_REMOVE_PROPERTY: + __of_remove_property_sysfs(ce->np, ce->prop); + break; + case OF_RECONFIG_UPDATE_PROPERTY: + __of_update_property_sysfs(ce->np, ce->prop, ce->old_prop); + break; + } + + return 0; +} + +static inline int __of_changeset_entry_revert(struct of_changeset_entry *ce) +{ + struct of_changeset_entry ce_inverted; + + __of_changeset_entry_invert(ce, &ce_inverted); + return __of_changeset_entry_apply(&ce_inverted); +} + +/** + * of_changeset_init - Initialize a changeset for use + * + * @ocs: changeset pointer + * + * Initialize a changeset structure + */ +void of_changeset_init(struct of_changeset *ocs) +{ + memset(ocs, 0, sizeof(*ocs)); + INIT_LIST_HEAD(&ocs->entries); +} + +/** + * of_changeset_destroy - Destroy a changeset + * + * @ocs: changeset pointer + * + * Destroys a changeset. Note that if a changeset is applied, + * its changes to the tree cannot be reverted. + */ +void of_changeset_destroy(struct of_changeset *ocs) +{ + struct of_changeset_entry *ce, *cen; + + list_for_each_entry_safe_reverse(ce, cen, &ocs->entries, node) + __of_changeset_entry_destroy(ce); +} + +/** + * of_changeset_apply - Applies a changeset + * + * @ocs: changeset pointer + * + * Applies a changeset to the live tree. + * Any side-effects of live tree state changes are applied here on + * sucess, like creation/destruction of devices and side-effects + * like creation of sysfs properties and directories. + * Returns 0 on success, a negative error value in case of an error. + * On error the partially applied effects are reverted. + */ +int of_changeset_apply(struct of_changeset *ocs) +{ + struct of_changeset_entry *ce; + int ret; + + /* perform the rest of the work */ + pr_debug("of_changeset: applying...\n"); + list_for_each_entry(ce, &ocs->entries, node) { + ret = __of_changeset_entry_apply(ce); + if (ret) { + pr_err("%s: Error applying changeset (%d)\n", __func__, ret); + list_for_each_entry_continue_reverse(ce, &ocs->entries, node) + __of_changeset_entry_revert(ce); + return ret; + } + } + pr_debug("of_changeset: applied, emitting notifiers.\n"); + + /* drop the global lock while emitting notifiers */ + mutex_unlock(&of_mutex); + list_for_each_entry(ce, &ocs->entries, node) + __of_changeset_entry_notify(ce, 0); + mutex_lock(&of_mutex); + pr_debug("of_changeset: notifiers sent.\n"); + + return 0; +} + +/** + * of_changeset_revert - Reverts an applied changeset + * + * @ocs: changeset pointer + * + * Reverts a changeset returning the state of the tree to what it + * was before the application. + * Any side-effects like creation/destruction of devices and + * removal of sysfs properties and directories are applied. + * Returns 0 on success, a negative error value in case of an error. + */ +int of_changeset_revert(struct of_changeset *ocs) +{ + struct of_changeset_entry *ce; + int ret; + + pr_debug("of_changeset: reverting...\n"); + list_for_each_entry_reverse(ce, &ocs->entries, node) { + ret = __of_changeset_entry_revert(ce); + if (ret) { + pr_err("%s: Error reverting changeset (%d)\n", __func__, ret); + list_for_each_entry_continue(ce, &ocs->entries, node) + __of_changeset_entry_apply(ce); + return ret; + } + } + pr_debug("of_changeset: reverted, emitting notifiers.\n"); + + /* drop the global lock while emitting notifiers */ + mutex_unlock(&of_mutex); + list_for_each_entry_reverse(ce, &ocs->entries, node) + __of_changeset_entry_notify(ce, 1); + mutex_lock(&of_mutex); + pr_debug("of_changeset: notifiers sent.\n"); + + return 0; +} + +/** + * of_changeset_action - Perform a changeset action + * + * @ocs: changeset pointer + * @action: action to perform + * @np: Pointer to device node + * @prop: Pointer to property + * + * On action being one of: + * + OF_RECONFIG_ATTACH_NODE + * + OF_RECONFIG_DETACH_NODE, + * + OF_RECONFIG_ADD_PROPERTY + * + OF_RECONFIG_REMOVE_PROPERTY, + * + OF_RECONFIG_UPDATE_PROPERTY + * Returns 0 on success, a negative error value in case of an error. + */ +int of_changeset_action(struct of_changeset *ocs, unsigned long action, + struct device_node *np, struct property *prop) +{ + struct of_changeset_entry *ce; + + ce = kzalloc(sizeof(*ce), GFP_KERNEL); + if (!ce) { + pr_err("%s: Failed to allocate\n", __func__); + return -ENOMEM; + } + /* get a reference to the node */ + ce->action = action; + ce->np = of_node_get(np); + ce->prop = prop; + + if (action == OF_RECONFIG_UPDATE_PROPERTY && prop) + ce->old_prop = of_find_property(np, prop->name, NULL); + + /* add it to the list */ + list_add_tail(&ce->node, &ocs->entries); + return 0; +} diff --git a/drivers/of/of_private.h b/drivers/of/of_private.h index f69ccb1fa308..858e0a5d9a11 100644 --- a/drivers/of/of_private.h +++ b/drivers/of/of_private.h @@ -81,4 +81,13 @@ extern int __of_attach_node_sysfs(struct device_node *np); extern void __of_detach_node(struct device_node *np); extern void __of_detach_node_sysfs(struct device_node *np); +/* iterators for transactions, used for overlays */ +/* forward iterator */ +#define for_each_transaction_entry(_oft, _te) \ + list_for_each_entry(_te, &(_oft)->te_list, node) + +/* reverse iterator */ +#define for_each_transaction_entry_reverse(_oft, _te) \ + list_for_each_entry_reverse(_te, &(_oft)->te_list, node) + #endif /* _LINUX_OF_PRIVATE_H */ diff --git a/drivers/of/selftest.c b/drivers/of/selftest.c index ee2166f0f36a..04e39a183e53 100644 --- a/drivers/of/selftest.c +++ b/drivers/of/selftest.c @@ -293,6 +293,56 @@ static void __init of_selftest_property_copy(void) #endif } +static void __init of_selftest_changeset(void) +{ +#ifdef CONFIG_OF_DYNAMIC + struct property *ppadd, padd = { .name = "prop-add", .length = 0, .value = "" }; + struct property *ppupdate, pupdate = { .name = "prop-update", .length = 5, .value = "abcd" }; + struct property *ppremove; + struct device_node *n1, *n2, *n21, *nremove, *parent; + struct of_changeset chgset; + + of_changeset_init(&chgset); + n1 = __of_node_alloc("/testcase-data/changeset/n1", GFP_KERNEL); + selftest(n1, "testcase setup failure\n"); + n2 = __of_node_alloc("/testcase-data/changeset/n2", GFP_KERNEL); + selftest(n2, "testcase setup failure\n"); + n21 = __of_node_alloc("/testcase-data/changeset/n2/n21", GFP_KERNEL); + selftest(n21, "testcase setup failure %p\n", n21); + nremove = of_find_node_by_path("/testcase-data/changeset/node-remove"); + selftest(nremove, "testcase setup failure\n"); + ppadd = __of_prop_dup(&padd, GFP_KERNEL); + selftest(ppadd, "testcase setup failure\n"); + ppupdate = __of_prop_dup(&pupdate, GFP_KERNEL); + selftest(ppupdate, "testcase setup failure\n"); + parent = nremove->parent; + n1->parent = parent; + n2->parent = parent; + n21->parent = n2; + n2->child = n21; + ppremove = of_find_property(parent, "prop-remove", NULL); + selftest(ppremove, "failed to find removal prop"); + + of_changeset_init(&chgset); + selftest(!of_changeset_attach_node(&chgset, n1), "fail attach n1\n"); + selftest(!of_changeset_attach_node(&chgset, n2), "fail attach n2\n"); + selftest(!of_changeset_detach_node(&chgset, nremove), "fail remove node\n"); + selftest(!of_changeset_attach_node(&chgset, n21), "fail attach n21\n"); + selftest(!of_changeset_add_property(&chgset, parent, ppadd), "fail add prop\n"); + selftest(!of_changeset_update_property(&chgset, parent, ppupdate), "fail update prop\n"); + selftest(!of_changeset_remove_property(&chgset, parent, ppremove), "fail remove prop\n"); + mutex_lock(&of_mutex); + selftest(!of_changeset_apply(&chgset), "apply failed\n"); + mutex_unlock(&of_mutex); + + mutex_lock(&of_mutex); + selftest(!of_changeset_revert(&chgset), "revert failed\n"); + mutex_unlock(&of_mutex); + + of_changeset_destroy(&chgset); +#endif +} + static void __init of_selftest_parse_interrupts(void) { struct device_node *np; @@ -561,6 +611,7 @@ static int __init of_selftest(void) of_selftest_parse_phandle_with_args(); of_selftest_property_match_string(); of_selftest_property_copy(); + of_selftest_changeset(); of_selftest_parse_interrupts(); of_selftest_parse_interrupts_extended(); of_selftest_match_node(); diff --git a/drivers/of/testcase-data/testcases.dtsi b/drivers/of/testcase-data/testcases.dtsi index 6d8d980ac858..669bb07df142 100644 --- a/drivers/of/testcase-data/testcases.dtsi +++ b/drivers/of/testcase-data/testcases.dtsi @@ -1,3 +1,13 @@ +/ { + testcase-data { + changeset { + prop-update = "hello"; + prop-remove = "world"; + node-remove { + }; + }; + }; +}; #include "tests-phandle.dtsi" #include "tests-interrupts.dtsi" #include "tests-match.dtsi" diff --git a/include/linux/of.h b/include/linux/of.h index 400f18cb4fff..bc91fbb13ce8 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -786,4 +786,80 @@ typedef void (*of_init_fn_1)(struct device_node *); #define OF_DECLARE_2(table, name, compat, fn) \ _OF_DECLARE(table, name, compat, fn, of_init_fn_2) +/** + * struct of_changeset_entry - Holds a changeset entry + * + * @node: list_head for the log list + * @action: notifier action + * @np: pointer to the device node affected + * @prop: pointer to the property affected + * @old_prop: hold a pointer to the original property + * + * Every modification of the device tree during a changeset + * is held in a list of of_changeset_entry structures. + * That way we can recover from a partial application, or we can + * revert the changeset + */ +struct of_changeset_entry { + struct list_head node; + unsigned long action; + struct device_node *np; + struct property *prop; + struct property *old_prop; +}; + +/** + * struct of_changeset - changeset tracker structure + * + * @entries: list_head for the changeset entries + * + * changesets are a convenient way to apply bulk changes to the + * live tree. In case of an error, changes are rolled-back. + * changesets live on after initial application, and if not + * destroyed after use, they can be reverted in one single call. + */ +struct of_changeset { + struct list_head entries; +}; + +#ifdef CONFIG_OF_DYNAMIC +extern void of_changeset_init(struct of_changeset *ocs); +extern void of_changeset_destroy(struct of_changeset *ocs); +extern int of_changeset_apply(struct of_changeset *ocs); +extern int of_changeset_revert(struct of_changeset *ocs); +extern int of_changeset_action(struct of_changeset *ocs, + unsigned long action, struct device_node *np, + struct property *prop); + +static inline int of_changeset_attach_node(struct of_changeset *ocs, + struct device_node *np) +{ + return of_changeset_action(ocs, OF_RECONFIG_ATTACH_NODE, np, NULL); +} + +static inline int of_changeset_detach_node(struct of_changeset *ocs, + struct device_node *np) +{ + return of_changeset_action(ocs, OF_RECONFIG_DETACH_NODE, np, NULL); +} + +static inline int of_changeset_add_property(struct of_changeset *ocs, + struct device_node *np, struct property *prop) +{ + return of_changeset_action(ocs, OF_RECONFIG_ADD_PROPERTY, np, prop); +} + +static inline int of_changeset_remove_property(struct of_changeset *ocs, + struct device_node *np, struct property *prop) +{ + return of_changeset_action(ocs, OF_RECONFIG_REMOVE_PROPERTY, np, prop); +} + +static inline int of_changeset_update_property(struct of_changeset *ocs, + struct device_node *np, struct property *prop) +{ + return of_changeset_action(ocs, OF_RECONFIG_UPDATE_PROPERTY, np, prop); +} +#endif + #endif /* _LINUX_OF_H */ -- cgit v1.2.3 From f7d4ad98fdd08932ffda2354c62e2e2ee059adcc Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 22 Jul 2014 08:01:01 -0700 Subject: gpiolib: Export gpiochip_request_own_desc and gpiochip_free_own_desc Both functions were introduced to let gpio drivers request their own gpio pins. Without exporting the functions, this can however only be used by gpio drivers built into the kernel. Secondary impact is that the functions can not currently be used by platform initialization code associated with the gpio-pca953x driver. This code permits auto-export of gpio pins through platform data, but if this functionality is used, the module can no longer be unloaded due to the problem solved with the introduction of gpiochip_request_own_desc and gpiochip_free_own_desc. Export both function so they can be used from modules and from platform initialization code. Reviewed-by: Alexandre Courbot Reviewed-by: Mika Westerberg Signed-off-by: Guenter Roeck Signed-off-by: Linus Walleij --- Documentation/gpio/driver.txt | 21 +++++++++++++++++++++ drivers/gpio/gpiolib.c | 2 ++ drivers/gpio/gpiolib.h | 3 --- include/linux/gpio/driver.h | 3 +++ 4 files changed, 26 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/Documentation/gpio/driver.txt b/Documentation/gpio/driver.txt index 224dbbcd1804..18790c237977 100644 --- a/Documentation/gpio/driver.txt +++ b/Documentation/gpio/driver.txt @@ -167,3 +167,24 @@ is released: When implementing an irqchip inside a GPIO driver, these two functions should typically be called in the .startup() and .shutdown() callbacks from the irqchip. + + +Requesting self-owned GPIO pins +------------------------------- + +Sometimes it is useful to allow a GPIO chip driver to request its own GPIO +descriptors through the gpiolib API. Using gpio_request() for this purpose +does not help since it pins the module to the kernel forever (it calls +try_module_get()). A GPIO driver can use the following functions instead +to request and free descriptors without being pinned to the kernel forever. + + int gpiochip_request_own_desc(struct gpio_desc *desc, const char *label) + + void gpiochip_free_own_desc(struct gpio_desc *desc) + +Descriptors requested with gpiochip_request_own_desc() must be released with +gpiochip_free_own_desc(). + +These functions must be used with care since they do not affect module use +count. Do not use the functions to request gpio descriptors not owned by the +calling driver. diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 412d64e93cfb..768f0831db18 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -897,6 +897,7 @@ int gpiochip_request_own_desc(struct gpio_desc *desc, const char *label) return __gpiod_request(desc, label); } +EXPORT_SYMBOL_GPL(gpiochip_request_own_desc); /** * gpiochip_free_own_desc - Free GPIO requested by the chip driver @@ -910,6 +911,7 @@ void gpiochip_free_own_desc(struct gpio_desc *desc) if (desc) __gpiod_free(desc); } +EXPORT_SYMBOL_GPL(gpiochip_free_own_desc); /* Drivers MUST set GPIO direction before making get/set calls. In * some cases this is done in early boot, before IRQs are enabled. diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h index acbb9335f08c..7fcb645ded4c 100644 --- a/drivers/gpio/gpiolib.h +++ b/drivers/gpio/gpiolib.h @@ -45,9 +45,6 @@ acpi_get_gpiod_by_index(struct device *dev, int index, } #endif -int gpiochip_request_own_desc(struct gpio_desc *desc, const char *label); -void gpiochip_free_own_desc(struct gpio_desc *desc); - struct gpio_desc *of_get_named_gpiod_flags(struct device_node *np, const char *list_name, int index, enum of_gpio_flags *flags); diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index c66c91682d9e..4c463fb0155e 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -220,6 +220,9 @@ int gpiochip_irqchip_add(struct gpio_chip *gpiochip, #endif /* CONFIG_GPIO_IRQCHIP */ +int gpiochip_request_own_desc(struct gpio_desc *desc, const char *label); +void gpiochip_free_own_desc(struct gpio_desc *desc); + #else /* CONFIG_GPIOLIB */ static inline struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc) -- cgit v1.2.3 From f892afb07eeecf575179c4747952644a82a92a36 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Mon, 16 Jun 2014 11:31:05 +0800 Subject: dmaengine: imx-sdma: Add a new DMATYPE for Shared Peripheral ASRC Shared Peripheral ASRC, running on SPBA, needs to use shp sciprts for DMA transfer. So this patch just adds a new DMATYPE for it. Signed-off-by: Nicolin Chen Acked-by: Shawn Guo Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt | 1 + drivers/dma/imx-sdma.c | 5 +++++ include/linux/platform_data/dma-imx.h | 1 + 3 files changed, 7 insertions(+) (limited to 'include/linux') diff --git a/Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt b/Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt index e577196a12c0..4659fd952301 100644 --- a/Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt +++ b/Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt @@ -47,6 +47,7 @@ The full ID of peripheral types can be found below. 20 ASRC 21 ESAI 22 SSI Dual FIFO (needs firmware ver >= 2) + 23 Shared ASRC The third cell specifies the transfer priority as below. diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c index 8269c200b53b..de584e605db5 100644 --- a/drivers/dma/imx-sdma.c +++ b/drivers/dma/imx-sdma.c @@ -750,6 +750,11 @@ static void sdma_get_pc(struct sdma_channel *sdmac, emi_2_per = sdma->script_addrs->asrc_2_mcu_addr; per_2_per = sdma->script_addrs->per_2_per_addr; break; + case IMX_DMATYPE_ASRC_SP: + per_2_emi = sdma->script_addrs->shp_2_mcu_addr; + emi_2_per = sdma->script_addrs->mcu_2_shp_addr; + per_2_per = sdma->script_addrs->per_2_per_addr; + break; case IMX_DMATYPE_MSHC: per_2_emi = sdma->script_addrs->mshc_2_mcu_addr; emi_2_per = sdma->script_addrs->mcu_2_mshc_addr; diff --git a/include/linux/platform_data/dma-imx.h b/include/linux/platform_data/dma-imx.h index bcbc6c3c14c0..7aa0e89d1bcc 100644 --- a/include/linux/platform_data/dma-imx.h +++ b/include/linux/platform_data/dma-imx.h @@ -40,6 +40,7 @@ enum sdma_peripheral_type { IMX_DMATYPE_ASRC, /* ASRC */ IMX_DMATYPE_ESAI, /* ESAI */ IMX_DMATYPE_SSI_DUAL, /* SSI Dual FIFO */ + IMX_DMATYPE_ASRC_SP, /* Shared ASRC */ }; enum imx_dma_prio { -- cgit v1.2.3 From 0c9dbebdb6611d2cd75d025ec09035c3e8ce2160 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 11 Jul 2014 18:18:26 +0200 Subject: dmaengine: Remove unused definition of DMA_MAX_COOKIE As of commit commit f04cd40701deace2efb9edd7120e59366bda2118 ("fsldma: fix controller lockups"), its last (and only ever) user is gone. Signed-off-by: Geert Uytterhoeven Signed-off-by: Vinod Koul --- include/linux/dmaengine.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index d2c5cc7c583c..4eb2f82aed1d 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -37,7 +37,6 @@ */ typedef s32 dma_cookie_t; #define DMA_MIN_COOKIE 1 -#define DMA_MAX_COOKIE INT_MAX static inline int dma_submit_error(dma_cookie_t cookie) { -- cgit v1.2.3 From a259f3896a39ec7cbcd5f630a6ec95bdcbc080d2 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Thu, 24 Jul 2014 14:39:24 +0200 Subject: mfd: max77686: Add Maxim 77802 PMIC support Maxim MAX77802 is a power management chip that contains 10 high efficiency Buck regulators, 32 Low-dropout (LDO) regulators used to power up application processors and peripherals, a 2-channel 32kHz clock outputs, a Real-Time-Clock (RTC) and a I2C interface to program the individual regulators, clocks outputs and the RTC. This patch adds support for MAX77802 to the MAX77686 driver and is based on a driver added to the Chrome OS kernel 3.8 by Simon Glass. Signed-off-by: Javier Martinez Canillas Reviewed-by: Krzysztof Kozlowski Signed-off-by: Lee Jones --- drivers/mfd/Kconfig | 6 +- drivers/mfd/max77686.c | 197 ++++++++++++++++++++++++++++----- include/linux/mfd/max77686-private.h | 208 ++++++++++++++++++++++++++++++++++- include/linux/mfd/max77686.h | 57 +++++++++- 4 files changed, 436 insertions(+), 32 deletions(-) (limited to 'include/linux') diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index 30102042dcaf..de5abf244746 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -380,15 +380,15 @@ config MFD_MAX14577 of the device. config MFD_MAX77686 - bool "Maxim Semiconductor MAX77686 PMIC Support" + bool "Maxim Semiconductor MAX77686/802 PMIC Support" depends on I2C=y select MFD_CORE select REGMAP_I2C select REGMAP_IRQ select IRQ_DOMAIN help - Say yes here to add support for Maxim Semiconductor MAX77686. - This is a Power Management IC with RTC on chip. + Say yes here to add support for Maxim Semiconductor MAX77686 and + MAX77802 which are Power Management IC with an RTC on chip. This driver provides common support for accessing the device; additional drivers must be enabled in order to use the functionality of the device. diff --git a/drivers/mfd/max77686.c b/drivers/mfd/max77686.c index f2bd69915987..c65332291bb4 100644 --- a/drivers/mfd/max77686.c +++ b/drivers/mfd/max77686.c @@ -1,5 +1,5 @@ /* - * max77686.c - mfd core driver for the Maxim 77686 + * max77686.c - mfd core driver for the Maxim 77686/802 * * Copyright (C) 2012 Samsung Electronics * Chiwoong Byun @@ -43,6 +43,74 @@ static const struct mfd_cell max77686_devs[] = { { .name = "max77686-clk", }, }; +static const struct mfd_cell max77802_devs[] = { + { .name = "max77802-pmic", }, + { .name = "max77802-clk", }, + { .name = "max77802-rtc", }, +}; + +static bool max77802_pmic_is_accessible_reg(struct device *dev, + unsigned int reg) +{ + return (reg >= MAX77802_REG_DEVICE_ID && reg < MAX77802_REG_PMIC_END); +} + +static bool max77802_rtc_is_accessible_reg(struct device *dev, + unsigned int reg) +{ + return (reg >= MAX77802_RTC_INT && reg < MAX77802_RTC_END); +} + +static bool max77802_is_accessible_reg(struct device *dev, unsigned int reg) +{ + return (max77802_pmic_is_accessible_reg(dev, reg) || + max77802_rtc_is_accessible_reg(dev, reg)); +} + +static bool max77802_pmic_is_precious_reg(struct device *dev, unsigned int reg) +{ + return (reg == MAX77802_REG_INTSRC || reg == MAX77802_REG_INT1 || + reg == MAX77802_REG_INT2); +} + +static bool max77802_rtc_is_precious_reg(struct device *dev, unsigned int reg) +{ + return (reg == MAX77802_RTC_INT || + reg == MAX77802_RTC_UPDATE0 || + reg == MAX77802_RTC_UPDATE1); +} + +static bool max77802_is_precious_reg(struct device *dev, unsigned int reg) +{ + return (max77802_pmic_is_precious_reg(dev, reg) || + max77802_rtc_is_precious_reg(dev, reg)); +} + +static bool max77802_pmic_is_volatile_reg(struct device *dev, unsigned int reg) +{ + return (max77802_is_precious_reg(dev, reg) || + reg == MAX77802_REG_STATUS1 || reg == MAX77802_REG_STATUS2 || + reg == MAX77802_REG_PWRON); +} + +static bool max77802_rtc_is_volatile_reg(struct device *dev, unsigned int reg) +{ + return (max77802_rtc_is_precious_reg(dev, reg) || + reg == MAX77802_RTC_SEC || + reg == MAX77802_RTC_MIN || + reg == MAX77802_RTC_HOUR || + reg == MAX77802_RTC_WEEKDAY || + reg == MAX77802_RTC_MONTH || + reg == MAX77802_RTC_YEAR || + reg == MAX77802_RTC_DATE); +} + +static bool max77802_is_volatile_reg(struct device *dev, unsigned int reg) +{ + return (max77802_pmic_is_volatile_reg(dev, reg) || + max77802_rtc_is_volatile_reg(dev, reg)); +} + static struct regmap_config max77686_regmap_config = { .reg_bits = 8, .val_bits = 8, @@ -53,6 +121,17 @@ static struct regmap_config max77686_rtc_regmap_config = { .val_bits = 8, }; +static struct regmap_config max77802_regmap_config = { + .reg_bits = 8, + .val_bits = 8, + .writeable_reg = max77802_is_accessible_reg, + .readable_reg = max77802_is_accessible_reg, + .precious_reg = max77802_is_precious_reg, + .volatile_reg = max77802_is_volatile_reg, + .name = "max77802-pmic", + .cache_type = REGCACHE_RBTREE, +}; + static const struct regmap_irq max77686_irqs[] = { /* INT1 interrupts */ { .reg_offset = 0, .mask = MAX77686_INT1_PWRONF_MSK, }, @@ -96,9 +175,34 @@ static const struct regmap_irq_chip max77686_rtc_irq_chip = { .num_irqs = ARRAY_SIZE(max77686_rtc_irqs), }; +static const struct regmap_irq_chip max77802_irq_chip = { + .name = "max77802-pmic", + .status_base = MAX77802_REG_INT1, + .mask_base = MAX77802_REG_INT1MSK, + .num_regs = 2, + .irqs = max77686_irqs, /* same masks as 77686 */ + .num_irqs = ARRAY_SIZE(max77686_irqs), +}; + +static const struct regmap_irq_chip max77802_rtc_irq_chip = { + .name = "max77802-rtc", + .status_base = MAX77802_RTC_INT, + .mask_base = MAX77802_RTC_INTM, + .num_regs = 1, + .irqs = max77686_rtc_irqs, /* same masks as 77686 */ + .num_irqs = ARRAY_SIZE(max77686_rtc_irqs), +}; + static const struct of_device_id max77686_pmic_dt_match[] = { - {.compatible = "maxim,max77686", .data = NULL}, - {}, + { + .compatible = "maxim,max77686", + .data = (void *)TYPE_MAX77686, + }, + { + .compatible = "maxim,max77802", + .data = (void *)TYPE_MAX77802, + }, + { }, }; static struct max77686_platform_data *max77686_i2c_parse_dt_pdata(struct device @@ -119,8 +223,15 @@ static int max77686_i2c_probe(struct i2c_client *i2c, { struct max77686_dev *max77686 = NULL; struct max77686_platform_data *pdata = dev_get_platdata(&i2c->dev); + const struct of_device_id *match; unsigned int data; int ret = 0; + const struct regmap_config *config; + const struct regmap_irq_chip *irq_chip; + const struct regmap_irq_chip *rtc_irq_chip; + struct regmap **rtc_regmap; + const struct mfd_cell *cells; + int n_devs; if (IS_ENABLED(CONFIG_OF) && i2c->dev.of_node && !pdata) pdata = max77686_i2c_parse_dt_pdata(&i2c->dev); @@ -135,15 +246,40 @@ static int max77686_i2c_probe(struct i2c_client *i2c, if (!max77686) return -ENOMEM; + if (i2c->dev.of_node) { + match = of_match_node(max77686_pmic_dt_match, i2c->dev.of_node); + if (!match) + return -EINVAL; + + max77686->type = (int)match->data; + } else { + max77686->type = id->driver_data; + } + i2c_set_clientdata(i2c, max77686); max77686->dev = &i2c->dev; max77686->i2c = i2c; - max77686->type = id->driver_data; max77686->wakeup = pdata->wakeup; max77686->irq = i2c->irq; - max77686->regmap = devm_regmap_init_i2c(i2c, &max77686_regmap_config); + if (max77686->type == TYPE_MAX77686) { + config = &max77686_regmap_config; + irq_chip = &max77686_irq_chip; + rtc_irq_chip = &max77686_rtc_irq_chip; + rtc_regmap = &max77686->rtc_regmap; + cells = max77686_devs; + n_devs = ARRAY_SIZE(max77686_devs); + } else { + config = &max77802_regmap_config; + irq_chip = &max77802_irq_chip; + rtc_irq_chip = &max77802_rtc_irq_chip; + rtc_regmap = &max77686->regmap; + cells = max77802_devs; + n_devs = ARRAY_SIZE(max77802_devs); + } + + max77686->regmap = devm_regmap_init_i2c(i2c, config); if (IS_ERR(max77686->regmap)) { ret = PTR_ERR(max77686->regmap); dev_err(max77686->dev, "Failed to allocate register map: %d\n", @@ -158,41 +294,46 @@ static int max77686_i2c_probe(struct i2c_client *i2c, return -ENODEV; } - max77686->rtc = i2c_new_dummy(i2c->adapter, I2C_ADDR_RTC); - if (!max77686->rtc) { - dev_err(max77686->dev, "Failed to allocate I2C device for RTC\n"); - return -ENODEV; - } - i2c_set_clientdata(max77686->rtc, max77686); - - max77686->rtc_regmap = devm_regmap_init_i2c(max77686->rtc, - &max77686_rtc_regmap_config); - if (IS_ERR(max77686->rtc_regmap)) { - ret = PTR_ERR(max77686->rtc_regmap); - dev_err(max77686->dev, "failed to allocate RTC regmap: %d\n", - ret); - goto err_unregister_i2c; + if (max77686->type == TYPE_MAX77686) { + max77686->rtc = i2c_new_dummy(i2c->adapter, I2C_ADDR_RTC); + if (!max77686->rtc) { + dev_err(max77686->dev, + "Failed to allocate I2C device for RTC\n"); + return -ENODEV; + } + i2c_set_clientdata(max77686->rtc, max77686); + + max77686->rtc_regmap = + devm_regmap_init_i2c(max77686->rtc, + &max77686_rtc_regmap_config); + if (IS_ERR(max77686->rtc_regmap)) { + ret = PTR_ERR(max77686->rtc_regmap); + dev_err(max77686->dev, + "failed to allocate RTC regmap: %d\n", + ret); + goto err_unregister_i2c; + } } ret = regmap_add_irq_chip(max77686->regmap, max77686->irq, IRQF_TRIGGER_FALLING | IRQF_ONESHOT | - IRQF_SHARED, 0, &max77686_irq_chip, + IRQF_SHARED, 0, irq_chip, &max77686->irq_data); if (ret) { dev_err(&i2c->dev, "failed to add PMIC irq chip: %d\n", ret); goto err_unregister_i2c; } - ret = regmap_add_irq_chip(max77686->rtc_regmap, max77686->irq, + + ret = regmap_add_irq_chip(*rtc_regmap, max77686->irq, IRQF_TRIGGER_FALLING | IRQF_ONESHOT | - IRQF_SHARED, 0, &max77686_rtc_irq_chip, + IRQF_SHARED, 0, rtc_irq_chip, &max77686->rtc_irq_data); if (ret) { dev_err(&i2c->dev, "failed to add RTC irq chip: %d\n", ret); goto err_del_irqc; } - ret = mfd_add_devices(max77686->dev, -1, max77686_devs, - ARRAY_SIZE(max77686_devs), NULL, 0, NULL); + ret = mfd_add_devices(max77686->dev, -1, cells, n_devs, NULL, 0, NULL); if (ret < 0) { dev_err(&i2c->dev, "failed to add MFD devices: %d\n", ret); goto err_del_rtc_irqc; @@ -205,7 +346,8 @@ err_del_rtc_irqc: err_del_irqc: regmap_del_irq_chip(max77686->irq, max77686->irq_data); err_unregister_i2c: - i2c_unregister_device(max77686->rtc); + if (max77686->type == TYPE_MAX77686) + i2c_unregister_device(max77686->rtc); return ret; } @@ -219,7 +361,8 @@ static int max77686_i2c_remove(struct i2c_client *i2c) regmap_del_irq_chip(max77686->irq, max77686->rtc_irq_data); regmap_del_irq_chip(max77686->irq, max77686->irq_data); - i2c_unregister_device(max77686->rtc); + if (max77686->type == TYPE_MAX77686) + i2c_unregister_device(max77686->rtc); return 0; } @@ -294,6 +437,6 @@ static void __exit max77686_i2c_exit(void) } module_exit(max77686_i2c_exit); -MODULE_DESCRIPTION("MAXIM 77686 multi-function core driver"); +MODULE_DESCRIPTION("MAXIM 77686/802 multi-function core driver"); MODULE_AUTHOR("Chiwoong Byun "); MODULE_LICENSE("GPL"); diff --git a/include/linux/mfd/max77686-private.h b/include/linux/mfd/max77686-private.h index 8e177806cba1..0d60b38e5b5c 100644 --- a/include/linux/mfd/max77686-private.h +++ b/include/linux/mfd/max77686-private.h @@ -1,5 +1,5 @@ /* - * max77686-private.h - Voltage regulator driver for the Maxim 77686 + * max77686-private.h - Voltage regulator driver for the Maxim 77686/802 * * Copyright (C) 2012 Samsung Electrnoics * Chiwoong Byun @@ -28,6 +28,7 @@ #define MAX77686_REG_INVALID (0xff) +/* MAX77686 PMIC registers */ enum max77686_pmic_reg { MAX77686_REG_DEVICE_ID = 0x00, MAX77686_REG_INTSRC = 0x01, @@ -181,6 +182,210 @@ enum max77686_rtc_reg { MAX77686_ALARM2_DATE = 0x1B, }; +/* MAX77802 PMIC registers */ +enum max77802_pmic_reg { + MAX77802_REG_DEVICE_ID = 0x00, + MAX77802_REG_INTSRC = 0x01, + MAX77802_REG_INT1 = 0x02, + MAX77802_REG_INT2 = 0x03, + + MAX77802_REG_INT1MSK = 0x04, + MAX77802_REG_INT2MSK = 0x05, + + MAX77802_REG_STATUS1 = 0x06, + MAX77802_REG_STATUS2 = 0x07, + + MAX77802_REG_PWRON = 0x08, + /* Reserved: 0x09 */ + MAX77802_REG_MRSTB = 0x0A, + MAX77802_REG_EPWRHOLD = 0x0B, + /* Reserved: 0x0C-0x0D */ + MAX77802_REG_BOOSTCTRL = 0x0E, + MAX77802_REG_BOOSTOUT = 0x0F, + + MAX77802_REG_BUCK1CTRL = 0x10, + MAX77802_REG_BUCK1DVS1 = 0x11, + MAX77802_REG_BUCK1DVS2 = 0x12, + MAX77802_REG_BUCK1DVS3 = 0x13, + MAX77802_REG_BUCK1DVS4 = 0x14, + MAX77802_REG_BUCK1DVS5 = 0x15, + MAX77802_REG_BUCK1DVS6 = 0x16, + MAX77802_REG_BUCK1DVS7 = 0x17, + MAX77802_REG_BUCK1DVS8 = 0x18, + /* Reserved: 0x19 */ + MAX77802_REG_BUCK2CTRL1 = 0x1A, + MAX77802_REG_BUCK2CTRL2 = 0x1B, + MAX77802_REG_BUCK2PHTRAN = 0x1C, + MAX77802_REG_BUCK2DVS1 = 0x1D, + MAX77802_REG_BUCK2DVS2 = 0x1E, + MAX77802_REG_BUCK2DVS3 = 0x1F, + MAX77802_REG_BUCK2DVS4 = 0x20, + MAX77802_REG_BUCK2DVS5 = 0x21, + MAX77802_REG_BUCK2DVS6 = 0x22, + MAX77802_REG_BUCK2DVS7 = 0x23, + MAX77802_REG_BUCK2DVS8 = 0x24, + /* Reserved: 0x25-0x26 */ + MAX77802_REG_BUCK3CTRL1 = 0x27, + MAX77802_REG_BUCK3DVS1 = 0x28, + MAX77802_REG_BUCK3DVS2 = 0x29, + MAX77802_REG_BUCK3DVS3 = 0x2A, + MAX77802_REG_BUCK3DVS4 = 0x2B, + MAX77802_REG_BUCK3DVS5 = 0x2C, + MAX77802_REG_BUCK3DVS6 = 0x2D, + MAX77802_REG_BUCK3DVS7 = 0x2E, + MAX77802_REG_BUCK3DVS8 = 0x2F, + /* Reserved: 0x30-0x36 */ + MAX77802_REG_BUCK4CTRL1 = 0x37, + MAX77802_REG_BUCK4DVS1 = 0x38, + MAX77802_REG_BUCK4DVS2 = 0x39, + MAX77802_REG_BUCK4DVS3 = 0x3A, + MAX77802_REG_BUCK4DVS4 = 0x3B, + MAX77802_REG_BUCK4DVS5 = 0x3C, + MAX77802_REG_BUCK4DVS6 = 0x3D, + MAX77802_REG_BUCK4DVS7 = 0x3E, + MAX77802_REG_BUCK4DVS8 = 0x3F, + /* Reserved: 0x40 */ + MAX77802_REG_BUCK5CTRL = 0x41, + MAX77802_REG_BUCK5OUT = 0x42, + /* Reserved: 0x43 */ + MAX77802_REG_BUCK6CTRL = 0x44, + MAX77802_REG_BUCK6DVS1 = 0x45, + MAX77802_REG_BUCK6DVS2 = 0x46, + MAX77802_REG_BUCK6DVS3 = 0x47, + MAX77802_REG_BUCK6DVS4 = 0x48, + MAX77802_REG_BUCK6DVS5 = 0x49, + MAX77802_REG_BUCK6DVS6 = 0x4A, + MAX77802_REG_BUCK6DVS7 = 0x4B, + MAX77802_REG_BUCK6DVS8 = 0x4C, + /* Reserved: 0x4D */ + MAX77802_REG_BUCK7CTRL = 0x4E, + MAX77802_REG_BUCK7OUT = 0x4F, + /* Reserved: 0x50 */ + MAX77802_REG_BUCK8CTRL = 0x51, + MAX77802_REG_BUCK8OUT = 0x52, + /* Reserved: 0x53 */ + MAX77802_REG_BUCK9CTRL = 0x54, + MAX77802_REG_BUCK9OUT = 0x55, + /* Reserved: 0x56 */ + MAX77802_REG_BUCK10CTRL = 0x57, + MAX77802_REG_BUCK10OUT = 0x58, + + /* Reserved: 0x59-0x5F */ + + MAX77802_REG_LDO1CTRL1 = 0x60, + MAX77802_REG_LDO2CTRL1 = 0x61, + MAX77802_REG_LDO3CTRL1 = 0x62, + MAX77802_REG_LDO4CTRL1 = 0x63, + MAX77802_REG_LDO5CTRL1 = 0x64, + MAX77802_REG_LDO6CTRL1 = 0x65, + MAX77802_REG_LDO7CTRL1 = 0x66, + MAX77802_REG_LDO8CTRL1 = 0x67, + MAX77802_REG_LDO9CTRL1 = 0x68, + MAX77802_REG_LDO10CTRL1 = 0x69, + MAX77802_REG_LDO11CTRL1 = 0x6A, + MAX77802_REG_LDO12CTRL1 = 0x6B, + MAX77802_REG_LDO13CTRL1 = 0x6C, + MAX77802_REG_LDO14CTRL1 = 0x6D, + MAX77802_REG_LDO15CTRL1 = 0x6E, + /* Reserved: 0x6F */ + MAX77802_REG_LDO17CTRL1 = 0x70, + MAX77802_REG_LDO18CTRL1 = 0x71, + MAX77802_REG_LDO19CTRL1 = 0x72, + MAX77802_REG_LDO20CTRL1 = 0x73, + MAX77802_REG_LDO21CTRL1 = 0x74, + MAX77802_REG_LDO22CTRL1 = 0x75, + MAX77802_REG_LDO23CTRL1 = 0x76, + MAX77802_REG_LDO24CTRL1 = 0x77, + MAX77802_REG_LDO25CTRL1 = 0x78, + MAX77802_REG_LDO26CTRL1 = 0x79, + MAX77802_REG_LDO27CTRL1 = 0x7A, + MAX77802_REG_LDO28CTRL1 = 0x7B, + MAX77802_REG_LDO29CTRL1 = 0x7C, + MAX77802_REG_LDO30CTRL1 = 0x7D, + /* Reserved: 0x7E */ + MAX77802_REG_LDO32CTRL1 = 0x7F, + MAX77802_REG_LDO33CTRL1 = 0x80, + MAX77802_REG_LDO34CTRL1 = 0x81, + MAX77802_REG_LDO35CTRL1 = 0x82, + /* Reserved: 0x83-0x8F */ + MAX77802_REG_LDO1CTRL2 = 0x90, + MAX77802_REG_LDO2CTRL2 = 0x91, + MAX77802_REG_LDO3CTRL2 = 0x92, + MAX77802_REG_LDO4CTRL2 = 0x93, + MAX77802_REG_LDO5CTRL2 = 0x94, + MAX77802_REG_LDO6CTRL2 = 0x95, + MAX77802_REG_LDO7CTRL2 = 0x96, + MAX77802_REG_LDO8CTRL2 = 0x97, + MAX77802_REG_LDO9CTRL2 = 0x98, + MAX77802_REG_LDO10CTRL2 = 0x99, + MAX77802_REG_LDO11CTRL2 = 0x9A, + MAX77802_REG_LDO12CTRL2 = 0x9B, + MAX77802_REG_LDO13CTRL2 = 0x9C, + MAX77802_REG_LDO14CTRL2 = 0x9D, + MAX77802_REG_LDO15CTRL2 = 0x9E, + /* Reserved: 0x9F */ + MAX77802_REG_LDO17CTRL2 = 0xA0, + MAX77802_REG_LDO18CTRL2 = 0xA1, + MAX77802_REG_LDO19CTRL2 = 0xA2, + MAX77802_REG_LDO20CTRL2 = 0xA3, + MAX77802_REG_LDO21CTRL2 = 0xA4, + MAX77802_REG_LDO22CTRL2 = 0xA5, + MAX77802_REG_LDO23CTRL2 = 0xA6, + MAX77802_REG_LDO24CTRL2 = 0xA7, + MAX77802_REG_LDO25CTRL2 = 0xA8, + MAX77802_REG_LDO26CTRL2 = 0xA9, + MAX77802_REG_LDO27CTRL2 = 0xAA, + MAX77802_REG_LDO28CTRL2 = 0xAB, + MAX77802_REG_LDO29CTRL2 = 0xAC, + MAX77802_REG_LDO30CTRL2 = 0xAD, + /* Reserved: 0xAE */ + MAX77802_REG_LDO32CTRL2 = 0xAF, + MAX77802_REG_LDO33CTRL2 = 0xB0, + MAX77802_REG_LDO34CTRL2 = 0xB1, + MAX77802_REG_LDO35CTRL2 = 0xB2, + /* Reserved: 0xB3 */ + + MAX77802_REG_BBAT_CHG = 0xB4, + MAX77802_REG_32KHZ = 0xB5, + + MAX77802_REG_PMIC_END = 0xB6, +}; + +enum max77802_rtc_reg { + MAX77802_RTC_INT = 0xC0, + MAX77802_RTC_INTM = 0xC1, + MAX77802_RTC_CONTROLM = 0xC2, + MAX77802_RTC_CONTROL = 0xC3, + MAX77802_RTC_UPDATE0 = 0xC4, + MAX77802_RTC_UPDATE1 = 0xC5, + MAX77802_WTSR_SMPL_CNTL = 0xC6, + MAX77802_RTC_SEC = 0xC7, + MAX77802_RTC_MIN = 0xC8, + MAX77802_RTC_HOUR = 0xC9, + MAX77802_RTC_WEEKDAY = 0xCA, + MAX77802_RTC_MONTH = 0xCB, + MAX77802_RTC_YEAR = 0xCC, + MAX77802_RTC_DATE = 0xCD, + MAX77802_RTC_AE1 = 0xCE, + MAX77802_ALARM1_SEC = 0xCF, + MAX77802_ALARM1_MIN = 0xD0, + MAX77802_ALARM1_HOUR = 0xD1, + MAX77802_ALARM1_WEEKDAY = 0xD2, + MAX77802_ALARM1_MONTH = 0xD3, + MAX77802_ALARM1_YEAR = 0xD4, + MAX77802_ALARM1_DATE = 0xD5, + MAX77802_RTC_AE2 = 0xD6, + MAX77802_ALARM2_SEC = 0xD7, + MAX77802_ALARM2_MIN = 0xD8, + MAX77802_ALARM2_HOUR = 0xD9, + MAX77802_ALARM2_WEEKDAY = 0xDA, + MAX77802_ALARM2_MONTH = 0xDB, + MAX77802_ALARM2_YEAR = 0xDC, + MAX77802_ALARM2_DATE = 0xDD, + + MAX77802_RTC_END = 0xDF, +}; + enum max77686_irq_source { PMIC_INT1 = 0, PMIC_INT2, @@ -250,6 +455,7 @@ struct max77686_dev { enum max77686_types { TYPE_MAX77686, + TYPE_MAX77802, }; extern int max77686_irq_init(struct max77686_dev *max77686); diff --git a/include/linux/mfd/max77686.h b/include/linux/mfd/max77686.h index 4cbcc13e8a2a..7e6dc4b2b795 100644 --- a/include/linux/mfd/max77686.h +++ b/include/linux/mfd/max77686.h @@ -1,5 +1,5 @@ /* - * max77686.h - Driver for the Maxim 77686 + * max77686.h - Driver for the Maxim 77686/802 * * Copyright (C) 2012 Samsung Electrnoics * Chiwoong Byun @@ -71,6 +71,54 @@ enum max77686_regulators { MAX77686_REG_MAX, }; +/* MAX77802 regulator IDs */ +enum max77802_regulators { + MAX77802_BUCK1 = 0, + MAX77802_BUCK2, + MAX77802_BUCK3, + MAX77802_BUCK4, + MAX77802_BUCK5, + MAX77802_BUCK6, + MAX77802_BUCK7, + MAX77802_BUCK8, + MAX77802_BUCK9, + MAX77802_BUCK10, + MAX77802_LDO1, + MAX77802_LDO2, + MAX77802_LDO3, + MAX77802_LDO4, + MAX77802_LDO5, + MAX77802_LDO6, + MAX77802_LDO7, + MAX77802_LDO8, + MAX77802_LDO9, + MAX77802_LDO10, + MAX77802_LDO11, + MAX77802_LDO12, + MAX77802_LDO13, + MAX77802_LDO14, + MAX77802_LDO15, + MAX77802_LDO17, + MAX77802_LDO18, + MAX77802_LDO19, + MAX77802_LDO20, + MAX77802_LDO21, + MAX77802_LDO23, + MAX77802_LDO24, + MAX77802_LDO25, + MAX77802_LDO26, + MAX77802_LDO27, + MAX77802_LDO28, + MAX77802_LDO29, + MAX77802_LDO30, + MAX77802_LDO32, + MAX77802_LDO33, + MAX77802_LDO34, + MAX77802_LDO35, + + MAX77802_REG_MAX, +}; + struct max77686_regulator_data { int id; struct regulator_init_data *initdata; @@ -83,6 +131,13 @@ enum max77686_opmode { MAX77686_OPMODE_STANDBY, }; +enum max77802_opmode { + MAX77802_OPMODE_OFF, + MAX77802_OPMODE_STANDBY, + MAX77802_OPMODE_LP, + MAX77802_OPMODE_NORMAL, +}; + struct max77686_opmode_data { int id; int mode; -- cgit v1.2.3 From ec8bd56699cb4371994437583a285b855b6f5e3a Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 24 Jul 2014 17:07:16 +0100 Subject: mfd: max77686: Ensure device type IDs are architecture agnostic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extinguishes: ../drivers/mfd/max77686.c: In function ‘max77686_i2c_probe’: ../drivers/mfd/max77686.c:254:20: warning: cast from pointer to integer of different size Signed-off-by: Lee Jones --- drivers/mfd/max77686.c | 5 ++--- include/linux/mfd/max77686-private.h | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/mfd/max77686.c b/drivers/mfd/max77686.c index c65332291bb4..86e552348db4 100644 --- a/drivers/mfd/max77686.c +++ b/drivers/mfd/max77686.c @@ -251,10 +251,9 @@ static int max77686_i2c_probe(struct i2c_client *i2c, if (!match) return -EINVAL; - max77686->type = (int)match->data; - } else { + max77686->type = (unsigned long)match->data; + } else max77686->type = id->driver_data; - } i2c_set_clientdata(i2c, max77686); max77686->dev = &i2c->dev; diff --git a/include/linux/mfd/max77686-private.h b/include/linux/mfd/max77686-private.h index 0d60b38e5b5c..960b92ad450d 100644 --- a/include/linux/mfd/max77686-private.h +++ b/include/linux/mfd/max77686-private.h @@ -439,7 +439,7 @@ struct max77686_dev { struct i2c_client *i2c; /* 0xcc / PMIC, Battery Control, and FLASH */ struct i2c_client *rtc; /* slave addr 0x0c */ - int type; + unsigned long type; struct regmap *regmap; /* regmap for mfd */ struct regmap *rtc_regmap; /* regmap for rtc */ -- cgit v1.2.3 From 16369efb1f6006ec79babe53f388eed431533596 Mon Sep 17 00:00:00 2001 From: Alexander Popov Date: Wed, 25 Jun 2014 14:52:59 +0400 Subject: dmaengine: of: add common xlate function for matching by channel id This patch adds a new common OF dma xlate callback function which will match a channel by it's id. The binding expects one integer argument which it will use to lookup the channel by the id. Unlike of_dma_simple_xlate this function is able to handle a system with multiple DMA controllers. When registering the of dma provider with of_dma_controller_register a pointer to the dma_device struct which is associated with the dt node needs to passed as the data parameter. New function will use this pointer to match only channels which belong to the specified DMA controller. Signed-off-by: Alexander Popov Signed-off-by: Vinod Koul --- drivers/dma/of-dma.c | 35 +++++++++++++++++++++++++++++++++++ include/linux/of_dma.h | 4 ++++ 2 files changed, 39 insertions(+) (limited to 'include/linux') diff --git a/drivers/dma/of-dma.c b/drivers/dma/of-dma.c index e8fe9dc455f4..d5fbeaa1e7ba 100644 --- a/drivers/dma/of-dma.c +++ b/drivers/dma/of-dma.c @@ -218,3 +218,38 @@ struct dma_chan *of_dma_simple_xlate(struct of_phandle_args *dma_spec, &dma_spec->args[0]); } EXPORT_SYMBOL_GPL(of_dma_simple_xlate); + +/** + * of_dma_xlate_by_chan_id - Translate dt property to DMA channel by channel id + * @dma_spec: pointer to DMA specifier as found in the device tree + * @of_dma: pointer to DMA controller data + * + * This function can be used as the of xlate callback for DMA driver which wants + * to match the channel based on the channel id. When using this xlate function + * the #dma-cells propety of the DMA controller dt node needs to be set to 1. + * The data parameter of of_dma_controller_register must be a pointer to the + * dma_device struct the function should match upon. + * + * Returns pointer to appropriate dma channel on success or NULL on error. + */ +struct dma_chan *of_dma_xlate_by_chan_id(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct dma_device *dev = ofdma->of_dma_data; + struct dma_chan *chan, *candidate = NULL; + + if (!dev || dma_spec->args_count != 1) + return NULL; + + list_for_each_entry(chan, &dev->channels, device_node) + if (chan->chan_id == dma_spec->args[0]) { + candidate = chan; + break; + } + + if (!candidate) + return NULL; + + return dma_get_slave_channel(candidate); +} +EXPORT_SYMBOL_GPL(of_dma_xlate_by_chan_id); diff --git a/include/linux/of_dma.h b/include/linux/of_dma.h index ae36298ba076..56bc026c143f 100644 --- a/include/linux/of_dma.h +++ b/include/linux/of_dma.h @@ -41,6 +41,8 @@ extern struct dma_chan *of_dma_request_slave_channel(struct device_node *np, const char *name); extern struct dma_chan *of_dma_simple_xlate(struct of_phandle_args *dma_spec, struct of_dma *ofdma); +extern struct dma_chan *of_dma_xlate_by_chan_id(struct of_phandle_args *dma_spec, + struct of_dma *ofdma); #else static inline int of_dma_controller_register(struct device_node *np, struct dma_chan *(*of_dma_xlate) @@ -66,6 +68,8 @@ static inline struct dma_chan *of_dma_simple_xlate(struct of_phandle_args *dma_s return NULL; } +#define of_dma_xlate_by_chan_id NULL + #endif #endif /* __LINUX_OF_DMA_H */ -- cgit v1.2.3 From 29b4739134c73a2873adec93346f09bb76d6a794 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Thu, 24 Jul 2014 12:52:23 -0700 Subject: Input: wacom - switch from an USB driver to a HID driver All USB Wacom tablets are actually HID devices. For historical reasons, they are handled as plain USB devices. The current code makes more and more reference to the HID subsystem like implementing its own HID report descriptor parser to handle new devices. From the user point of view, we can transparently switch from this state to a driver handled in the HID subsystem and clean up a lot of USB specific code in the wacom.ko driver. The other benefit once the USB dependecies have been removed is that we can use a tool like uhid to make regression tests and allow further cleanup or new implementations without risking breaking current behaviors. To match the current handling of devices in wacom_wac.c, we rely on the hid_type set by usbhid. usbhid sets the hid_type to HID_TYPE_USBMOUSE when it sees a USB boot mouse protocol declared and HID_TYPE_USBNONE when the device is plain HID. There is thus a one to one matching between the list of supported devices before and after the switch from USB to HID. Signed-off-by: Benjamin Tissoires Reviewed-by: Jason Gerecke Tested-by: Jason Gerecke Signed-off-by: Dmitry Torokhov --- drivers/hid/hid-core.c | 10 +- drivers/hid/hid-wacom.c | 2 +- drivers/input/tablet/wacom.h | 6 +- drivers/input/tablet/wacom_sys.c | 222 ++++++++++++++------------------------- drivers/input/tablet/wacom_wac.c | 80 +++++++------- drivers/input/tablet/wacom_wac.h | 4 +- include/linux/hid.h | 5 + 7 files changed, 141 insertions(+), 188 deletions(-) (limited to 'include/linux') diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 8ed66fd1ea87..1ce751db5a7a 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -787,6 +787,15 @@ static int hid_scan_report(struct hid_device *hid) /* hid-rmi should take care of them, not hid-generic */ hid->group = HID_GROUP_RMI; + /* + * Vendor specific handlings + */ + switch (hid->vendor) { + case USB_VENDOR_ID_WACOM: + hid->group = HID_GROUP_WACOM; + break; + } + vfree(parser); return 0; } @@ -2339,7 +2348,6 @@ static const struct hid_device_id hid_ignore_list[] = { { HID_USB_DEVICE(USB_VENDOR_ID_VERNIER, USB_DEVICE_ID_VERNIER_SKIP) }, { HID_USB_DEVICE(USB_VENDOR_ID_VERNIER, USB_DEVICE_ID_VERNIER_CYCLOPS) }, { HID_USB_DEVICE(USB_VENDOR_ID_VERNIER, USB_DEVICE_ID_VERNIER_LCSPEC) }, - { HID_USB_DEVICE(USB_VENDOR_ID_WACOM, HID_ANY_ID) }, { HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_4_PHIDGETSERVO_20) }, { HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_1_PHIDGETSERVO_20) }, { HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_8_8_4_IF_KIT) }, diff --git a/drivers/hid/hid-wacom.c b/drivers/hid/hid-wacom.c index 902013ec041b..4874f4ec43f5 100644 --- a/drivers/hid/hid-wacom.c +++ b/drivers/hid/hid-wacom.c @@ -960,7 +960,7 @@ static const struct hid_device_id wacom_devices[] = { MODULE_DEVICE_TABLE(hid, wacom_devices); static struct hid_driver wacom_driver = { - .name = "wacom", + .name = "hid-wacom", .id_table = wacom_devices, .probe = wacom_probe, .remove = wacom_remove, diff --git a/drivers/input/tablet/wacom.h b/drivers/input/tablet/wacom.h index caa59cab2c5f..b9212390db71 100644 --- a/drivers/input/tablet/wacom.h +++ b/drivers/input/tablet/wacom.h @@ -106,14 +106,12 @@ MODULE_LICENSE(DRIVER_LICENSE); #define USB_VENDOR_ID_LENOVO 0x17ef struct wacom { - dma_addr_t data_dma; struct usb_device *usbdev; struct usb_interface *intf; - struct urb *irq; struct wacom_wac wacom_wac; + struct hid_device *hdev; struct mutex lock; struct work_struct work; - bool open; char phys[32]; struct wacom_led { u8 select[2]; /* status led selector (0..3) */ @@ -130,7 +128,7 @@ static inline void wacom_schedule_work(struct wacom_wac *wacom_wac) schedule_work(&wacom->work); } -extern const struct usb_device_id wacom_ids[]; +extern const struct hid_device_id wacom_ids[]; void wacom_wac_irq(struct wacom_wac *wacom_wac, size_t len); void wacom_setup_device_quirks(struct wacom_features *features); diff --git a/drivers/input/tablet/wacom_sys.c b/drivers/input/tablet/wacom_sys.c index c34791e7f0b9..5ceeab6e95f1 100644 --- a/drivers/input/tablet/wacom_sys.c +++ b/drivers/input/tablet/wacom_sys.c @@ -83,86 +83,40 @@ static int wacom_set_report(struct usb_interface *intf, u8 type, u8 id, return retval; } -static void wacom_sys_irq(struct urb *urb) +static int wacom_raw_event(struct hid_device *hdev, struct hid_report *report, + u8 *raw_data, int size) { - struct wacom *wacom = urb->context; - struct device *dev = &wacom->intf->dev; - int retval; + struct wacom *wacom = hid_get_drvdata(hdev); - switch (urb->status) { - case 0: - /* success */ - break; - case -ECONNRESET: - case -ENOENT: - case -ESHUTDOWN: - /* this urb is terminated, clean up */ - dev_dbg(dev, "%s - urb shutting down with status: %d\n", - __func__, urb->status); - return; - default: - dev_dbg(dev, "%s - nonzero urb status received: %d\n", - __func__, urb->status); - goto exit; - } + if (size > WACOM_PKGLEN_MAX) + return 1; - wacom_wac_irq(&wacom->wacom_wac, urb->actual_length); + memcpy(wacom->wacom_wac.data, raw_data, size); - exit: - usb_mark_last_busy(wacom->usbdev); - retval = usb_submit_urb(urb, GFP_ATOMIC); - if (retval) - dev_err(dev, "%s - usb_submit_urb failed with result %d\n", - __func__, retval); + wacom_wac_irq(&wacom->wacom_wac, size); + + return 0; } static int wacom_open(struct input_dev *dev) { struct wacom *wacom = input_get_drvdata(dev); - int retval = 0; - - if (usb_autopm_get_interface(wacom->intf) < 0) - return -EIO; + int retval; mutex_lock(&wacom->lock); - - if (wacom->open) - goto out; - - if (usb_submit_urb(wacom->irq, GFP_KERNEL)) { - retval = -EIO; - goto out; - } - - wacom->open = true; - wacom->intf->needs_remote_wakeup = 1; - -out: + retval = hid_hw_open(wacom->hdev); mutex_unlock(&wacom->lock); - usb_autopm_put_interface(wacom->intf); + return retval; } static void wacom_close(struct input_dev *dev) { struct wacom *wacom = input_get_drvdata(dev); - int autopm_error; - - autopm_error = usb_autopm_get_interface(wacom->intf); mutex_lock(&wacom->lock); - if (!wacom->open) - goto out; - - usb_kill_urb(wacom->irq); - wacom->open = false; - wacom->intf->needs_remote_wakeup = 0; - -out: + hid_hw_close(wacom->hdev); mutex_unlock(&wacom->lock); - - if (!autopm_error) - usb_autopm_put_interface(wacom->intf); } /* @@ -807,7 +761,8 @@ out: static ssize_t wacom_led_select_store(struct device *dev, int set_id, const char *buf, size_t count) { - struct wacom *wacom = dev_get_drvdata(dev); + struct hid_device *hdev = dev_get_drvdata(dev); + struct wacom *wacom = hid_get_drvdata(hdev); unsigned int id; int err; @@ -834,7 +789,8 @@ static ssize_t wacom_led##SET_ID##_select_store(struct device *dev, \ static ssize_t wacom_led##SET_ID##_select_show(struct device *dev, \ struct device_attribute *attr, char *buf) \ { \ - struct wacom *wacom = dev_get_drvdata(dev); \ + struct hid_device *hdev = dev_get_drvdata(dev); \ + struct wacom *wacom = hid_get_drvdata(hdev); \ return snprintf(buf, 2, "%d\n", wacom->led.select[SET_ID]); \ } \ static DEVICE_ATTR(status_led##SET_ID##_select, S_IWUSR | S_IRUSR, \ @@ -868,7 +824,8 @@ static ssize_t wacom_luminance_store(struct wacom *wacom, u8 *dest, static ssize_t wacom_##name##_luminance_store(struct device *dev, \ struct device_attribute *attr, const char *buf, size_t count) \ { \ - struct wacom *wacom = dev_get_drvdata(dev); \ + struct hid_device *hdev = dev_get_drvdata(dev); \ + struct wacom *wacom = hid_get_drvdata(hdev); \ \ return wacom_luminance_store(wacom, &wacom->led.field, \ buf, count); \ @@ -883,7 +840,8 @@ DEVICE_LUMINANCE_ATTR(buttons, img_lum); static ssize_t wacom_button_image_store(struct device *dev, int button_id, const char *buf, size_t count) { - struct wacom *wacom = dev_get_drvdata(dev); + struct hid_device *hdev = dev_get_drvdata(dev); + struct wacom *wacom = hid_get_drvdata(hdev); int err; if (count != 1024) @@ -1201,6 +1159,7 @@ static void wacom_wireless_work(struct work_struct *work) struct wacom *wacom = container_of(work, struct wacom, work); struct usb_device *usbdev = wacom->usbdev; struct wacom_wac *wacom_wac = &wacom->wacom_wac; + struct hid_device *hdev1, *hdev2; struct wacom *wacom1, *wacom2; struct wacom_wac *wacom_wac1, *wacom_wac2; int error; @@ -1213,32 +1172,34 @@ static void wacom_wireless_work(struct work_struct *work) wacom_destroy_battery(wacom); /* Stylus interface */ - wacom1 = usb_get_intfdata(usbdev->config->interface[1]); + hdev1 = usb_get_intfdata(usbdev->config->interface[1]); + wacom1 = hid_get_drvdata(hdev1); wacom_wac1 = &(wacom1->wacom_wac); wacom_unregister_inputs(wacom1); /* Touch interface */ - wacom2 = usb_get_intfdata(usbdev->config->interface[2]); + hdev2 = usb_get_intfdata(usbdev->config->interface[2]); + wacom2 = hid_get_drvdata(hdev2); wacom_wac2 = &(wacom2->wacom_wac); wacom_unregister_inputs(wacom2); if (wacom_wac->pid == 0) { dev_info(&wacom->intf->dev, "wireless tablet disconnected\n"); } else { - const struct usb_device_id *id = wacom_ids; + const struct hid_device_id *id = wacom_ids; dev_info(&wacom->intf->dev, "wireless tablet connected with PID %x\n", wacom_wac->pid); - while (id->match_flags) { - if (id->idVendor == USB_VENDOR_ID_WACOM && - id->idProduct == wacom_wac->pid) + while (id->bus) { + if (id->vendor == USB_VENDOR_ID_WACOM && + id->product == wacom_wac->pid) break; id++; } - if (!id->match_flags) { + if (!id->bus) { dev_info(&wacom->intf->dev, "ignoring unknown PID.\n"); return; @@ -1246,7 +1207,7 @@ static void wacom_wireless_work(struct work_struct *work) /* Stylus interface */ wacom_wac1->features = - *((struct wacom_features *)id->driver_info); + *((struct wacom_features *)id->driver_data); wacom_wac1->features.device_type = BTN_TOOL_PEN; snprintf(wacom_wac1->name, WACOM_NAME_MAX, "%s (WL) Pen", wacom_wac1->features.name); @@ -1262,7 +1223,7 @@ static void wacom_wireless_work(struct work_struct *work) if (wacom_wac1->features.touch_max || wacom_wac1->features.type == INTUOSHT) { wacom_wac2->features = - *((struct wacom_features *)id->driver_info); + *((struct wacom_features *)id->driver_data); wacom_wac2->features.pktlen = WACOM_PKGLEN_BBTOUCH3; wacom_wac2->features.device_type = BTN_TOOL_FINGER; wacom_wac2->features.x_max = wacom_wac2->features.y_max = 4096; @@ -1323,8 +1284,10 @@ static void wacom_calculate_res(struct wacom_features *features) features->unitExpo); } -static int wacom_probe(struct usb_interface *intf, const struct usb_device_id *id) +static int wacom_probe(struct hid_device *hdev, + const struct hid_device_id *id) { + struct usb_interface *intf = to_usb_interface(hdev->dev.parent); struct usb_device *dev = interface_to_usbdev(intf); struct usb_endpoint_descriptor *endpoint; struct wacom *wacom; @@ -1332,34 +1295,29 @@ static int wacom_probe(struct usb_interface *intf, const struct usb_device_id *i struct wacom_features *features; int error; - if (!id->driver_info) + if (!id->driver_data) return -EINVAL; wacom = kzalloc(sizeof(struct wacom), GFP_KERNEL); if (!wacom) return -ENOMEM; + hid_set_drvdata(hdev, wacom); + wacom->hdev = hdev; + wacom_wac = &wacom->wacom_wac; - wacom_wac->features = *((struct wacom_features *)id->driver_info); + wacom_wac->features = *((struct wacom_features *)id->driver_data); features = &wacom_wac->features; if (features->pktlen > WACOM_PKGLEN_MAX) { error = -EINVAL; goto fail1; } - wacom_wac->data = usb_alloc_coherent(dev, WACOM_PKGLEN_MAX, - GFP_KERNEL, &wacom->data_dma); - if (!wacom_wac->data) { - error = -ENOMEM; + if (features->check_for_hid_type && features->hid_type != hdev->type) { + error = -ENODEV; goto fail1; } - wacom->irq = usb_alloc_urb(0, GFP_KERNEL); - if (!wacom->irq) { - error = -ENOMEM; - goto fail2; - } - wacom->usbdev = dev; wacom->intf = intf; mutex_init(&wacom->lock); @@ -1375,7 +1333,7 @@ static int wacom_probe(struct usb_interface *intf, const struct usb_device_id *i /* Retrieve the physical and logical size for touch devices */ error = wacom_retrieve_hid_descriptor(intf, features); if (error) - goto fail3; + goto fail1; /* * Intuos5 has no useful data about its touch interface in its @@ -1423,38 +1381,38 @@ static int wacom_probe(struct usb_interface *intf, const struct usb_device_id *i other_dev = dev; error = wacom_add_shared_data(wacom_wac, other_dev); if (error) - goto fail3; + goto fail1; } - usb_fill_int_urb(wacom->irq, dev, - usb_rcvintpipe(dev, endpoint->bEndpointAddress), - wacom_wac->data, features->pktlen, - wacom_sys_irq, wacom, endpoint->bInterval); - wacom->irq->transfer_dma = wacom->data_dma; - wacom->irq->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; - error = wacom_initialize_leds(wacom); if (error) - goto fail4; + goto fail2; if (!(features->quirks & WACOM_QUIRK_NO_INPUT)) { error = wacom_register_inputs(wacom); if (error) - goto fail5; + goto fail3; } /* Note that if query fails it is not a hard failure */ wacom_query_tablet_data(intf, features); - usb_set_intfdata(intf, wacom); + /* Regular HID work starts now */ + error = hid_parse(hdev); + if (error) { + hid_err(hdev, "parse failed\n"); + goto fail4; + } - if (features->quirks & WACOM_QUIRK_MONITOR) { - if (usb_submit_urb(wacom->irq, GFP_KERNEL)) { - error = -EIO; - goto fail5; - } + error = hid_hw_start(hdev, HID_CONNECT_HIDRAW); + if (error) { + hid_err(hdev, "hw start failed\n"); + goto fail4; } + if (features->quirks & WACOM_QUIRK_MONITOR) + error = hid_hw_open(hdev); + if (wacom_wac->features.type == INTUOSHT && wacom_wac->features.touch_max) { if (wacom_wac->features.device_type == BTN_TOOL_FINGER) wacom_wac->shared->touch_input = wacom_wac->input; @@ -1462,78 +1420,60 @@ static int wacom_probe(struct usb_interface *intf, const struct usb_device_id *i return 0; - fail5: wacom_destroy_leds(wacom); - fail4: wacom_remove_shared_data(wacom_wac); - fail3: usb_free_urb(wacom->irq); - fail2: usb_free_coherent(dev, WACOM_PKGLEN_MAX, wacom_wac->data, wacom->data_dma); + fail4: wacom_unregister_inputs(wacom); + fail3: wacom_destroy_leds(wacom); + fail2: wacom_remove_shared_data(wacom_wac); fail1: kfree(wacom); + hid_set_drvdata(hdev, NULL); return error; } -static void wacom_disconnect(struct usb_interface *intf) +static void wacom_remove(struct hid_device *hdev) { - struct wacom *wacom = usb_get_intfdata(intf); + struct wacom *wacom = hid_get_drvdata(hdev); - usb_set_intfdata(intf, NULL); + hid_hw_stop(hdev); - usb_kill_urb(wacom->irq); cancel_work_sync(&wacom->work); wacom_unregister_inputs(wacom); wacom_destroy_battery(wacom); wacom_destroy_leds(wacom); - usb_free_urb(wacom->irq); - usb_free_coherent(interface_to_usbdev(intf), WACOM_PKGLEN_MAX, - wacom->wacom_wac.data, wacom->data_dma); wacom_remove_shared_data(&wacom->wacom_wac); - kfree(wacom); -} - -static int wacom_suspend(struct usb_interface *intf, pm_message_t message) -{ - struct wacom *wacom = usb_get_intfdata(intf); - - mutex_lock(&wacom->lock); - usb_kill_urb(wacom->irq); - mutex_unlock(&wacom->lock); - return 0; + hid_set_drvdata(hdev, NULL); + kfree(wacom); } -static int wacom_resume(struct usb_interface *intf) +static int wacom_resume(struct hid_device *hdev) { - struct wacom *wacom = usb_get_intfdata(intf); + struct wacom *wacom = hid_get_drvdata(hdev); struct wacom_features *features = &wacom->wacom_wac.features; - int rv = 0; mutex_lock(&wacom->lock); /* switch to wacom mode first */ - wacom_query_tablet_data(intf, features); + wacom_query_tablet_data(wacom->intf, features); wacom_led_control(wacom); - if ((wacom->open || (features->quirks & WACOM_QUIRK_MONITOR)) && - usb_submit_urb(wacom->irq, GFP_NOIO) < 0) - rv = -EIO; - mutex_unlock(&wacom->lock); - return rv; + return 0; } -static int wacom_reset_resume(struct usb_interface *intf) +static int wacom_reset_resume(struct hid_device *hdev) { - return wacom_resume(intf); + return wacom_resume(hdev); } -static struct usb_driver wacom_driver = { +static struct hid_driver wacom_driver = { .name = "wacom", .id_table = wacom_ids, .probe = wacom_probe, - .disconnect = wacom_disconnect, - .suspend = wacom_suspend, + .remove = wacom_remove, +#ifdef CONFIG_PM .resume = wacom_resume, .reset_resume = wacom_reset_resume, - .supports_autosuspend = 1, +#endif + .raw_event = wacom_raw_event, }; - -module_usb_driver(wacom_driver); +module_hid_driver(wacom_driver); diff --git a/drivers/input/tablet/wacom_wac.c b/drivers/input/tablet/wacom_wac.c index f170277b6f28..1c95ce78d749 100644 --- a/drivers/input/tablet/wacom_wac.c +++ b/drivers/input/tablet/wacom_wac.c @@ -2197,15 +2197,18 @@ static const struct wacom_features wacom_features_0x2A = static const struct wacom_features wacom_features_0x314 = { "Wacom Intuos Pro S", WACOM_PKGLEN_INTUOS, 31496, 19685, 2047, 63, INTUOSPS, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES, - .touch_max = 16 }; + .touch_max = 16, + .check_for_hid_type = true, .hid_type = HID_TYPE_USBNONE }; static const struct wacom_features wacom_features_0x315 = { "Wacom Intuos Pro M", WACOM_PKGLEN_INTUOS, 44704, 27940, 2047, 63, INTUOSPM, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES, - .touch_max = 16 }; + .touch_max = 16, + .check_for_hid_type = true, .hid_type = HID_TYPE_USBNONE }; static const struct wacom_features wacom_features_0x317 = { "Wacom Intuos Pro L", WACOM_PKGLEN_INTUOS, 65024, 40640, 2047, 63, INTUOSPL, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES, - .touch_max = 16 }; + .touch_max = 16, + .check_for_hid_type = true, .hid_type = HID_TYPE_USBNONE }; static const struct wacom_features wacom_features_0xF4 = { "Wacom Cintiq 24HD", WACOM_PKGLEN_INTUOS, 104280, 65400, 2047, 63, WACOM_24HD, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES, 200, 200 }; @@ -2215,7 +2218,8 @@ static const struct wacom_features wacom_features_0xF8 = .oVid = USB_VENDOR_ID_WACOM, .oPid = 0xf6 }; static const struct wacom_features wacom_features_0xF6 = { "Wacom Cintiq 24HD touch", .type = WACOM_24HDT, /* Touch */ - .oVid = USB_VENDOR_ID_WACOM, .oPid = 0xf8, .touch_max = 10 }; + .oVid = USB_VENDOR_ID_WACOM, .oPid = 0xf8, .touch_max = 10, + .check_for_hid_type = true, .hid_type = HID_TYPE_USBNONE }; static const struct wacom_features wacom_features_0x3F = { "Wacom Cintiq 21UX", WACOM_PKGLEN_INTUOS, 87200, 65600, 1023, 63, CINTIQ, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES }; @@ -2233,7 +2237,8 @@ static const struct wacom_features wacom_features_0xC7 = 0, PL, WACOM_INTUOS_RES, WACOM_INTUOS_RES }; static const struct wacom_features wacom_features_0xCE = { "Wacom DTU2231", WACOM_PKGLEN_GRAPHIRE, 47864, 27011, 511, - 0, DTU, WACOM_INTUOS_RES, WACOM_INTUOS_RES }; + 0, DTU, WACOM_INTUOS_RES, WACOM_INTUOS_RES, + .check_for_hid_type = true, .hid_type = HID_TYPE_USBMOUSE }; static const struct wacom_features wacom_features_0xF0 = { "Wacom DTU1631", WACOM_PKGLEN_GRAPHIRE, 34623, 19553, 511, 0, DTU, WACOM_INTUOS_RES, WACOM_INTUOS_RES }; @@ -2249,7 +2254,8 @@ static const struct wacom_features wacom_features_0x59 = /* Pen */ .oVid = USB_VENDOR_ID_WACOM, .oPid = 0x5D }; static const struct wacom_features wacom_features_0x5D = /* Touch */ { "Wacom DTH2242", .type = WACOM_24HDT, - .oVid = USB_VENDOR_ID_WACOM, .oPid = 0x59, .touch_max = 10 }; + .oVid = USB_VENDOR_ID_WACOM, .oPid = 0x59, .touch_max = 10, + .check_for_hid_type = true, .hid_type = HID_TYPE_USBNONE }; static const struct wacom_features wacom_features_0xCC = { "Wacom Cintiq 21UX2", WACOM_PKGLEN_INTUOS, 87000, 65400, 2047, 63, WACOM_21UX2, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES, 200, 200 }; @@ -2262,7 +2268,8 @@ static const struct wacom_features wacom_features_0x5B = .oVid = USB_VENDOR_ID_WACOM, .oPid = 0x5e }; static const struct wacom_features wacom_features_0x5E = { "Wacom Cintiq 22HDT", .type = WACOM_24HDT, - .oVid = USB_VENDOR_ID_WACOM, .oPid = 0x5b, .touch_max = 10 }; + .oVid = USB_VENDOR_ID_WACOM, .oPid = 0x5b, .touch_max = 10, + .check_for_hid_type = true, .hid_type = HID_TYPE_USBNONE }; static const struct wacom_features wacom_features_0x90 = { "Wacom ISDv4 90", WACOM_PKGLEN_GRAPHIRE, 26202, 16325, 255, 0, TABLETPC, WACOM_INTUOS_RES, WACOM_INTUOS_RES }; @@ -2400,14 +2407,17 @@ static const struct wacom_features wacom_features_0x301 = static const struct wacom_features wacom_features_0x302 = { "Wacom Intuos PT S", WACOM_PKGLEN_BBPEN, 15200, 9500, 1023, 31, INTUOSHT, WACOM_INTUOS_RES, WACOM_INTUOS_RES, - .touch_max = 16 }; + .touch_max = 16, + .check_for_hid_type = true, .hid_type = HID_TYPE_USBNONE }; static const struct wacom_features wacom_features_0x303 = { "Wacom Intuos PT M", WACOM_PKGLEN_BBPEN, 21600, 13500, 1023, 31, INTUOSHT, WACOM_INTUOS_RES, WACOM_INTUOS_RES, - .touch_max = 16 }; + .touch_max = 16, + .check_for_hid_type = true, .hid_type = HID_TYPE_USBNONE }; static const struct wacom_features wacom_features_0x30E = { "Wacom Intuos S", WACOM_PKGLEN_BBPEN, 15200, 9500, 1023, - 31, INTUOSHT, WACOM_INTUOS_RES, WACOM_INTUOS_RES }; + 31, INTUOSHT, WACOM_INTUOS_RES, WACOM_INTUOS_RES, + .check_for_hid_type = true, .hid_type = HID_TYPE_USBNONE }; static const struct wacom_features wacom_features_0x6004 = { "ISD-V4", WACOM_PKGLEN_GRAPHIRE, 12800, 8000, 255, 0, TABLETPC, WACOM_INTUOS_RES, WACOM_INTUOS_RES }; @@ -2417,22 +2427,18 @@ static const struct wacom_features wacom_features_0x0307 = .oVid = USB_VENDOR_ID_WACOM, .oPid = 0x309 }; static const struct wacom_features wacom_features_0x0309 = { "Wacom ISDv5 309", .type = WACOM_24HDT, /* Touch */ - .oVid = USB_VENDOR_ID_WACOM, .oPid = 0x0307, .touch_max = 10 }; - -#define USB_DEVICE_WACOM(prod) \ - USB_DEVICE(USB_VENDOR_ID_WACOM, prod), \ - .driver_info = (kernel_ulong_t)&wacom_features_##prod + .oVid = USB_VENDOR_ID_WACOM, .oPid = 0x0307, .touch_max = 10, + .check_for_hid_type = true, .hid_type = HID_TYPE_USBNONE }; -#define USB_DEVICE_DETAILED(prod, class, sub, proto) \ - USB_DEVICE_AND_INTERFACE_INFO(USB_VENDOR_ID_WACOM, prod, class, \ - sub, proto), \ - .driver_info = (kernel_ulong_t)&wacom_features_##prod +#define USB_DEVICE_WACOM(prod) \ + HID_DEVICE(BUS_USB, HID_GROUP_WACOM, USB_VENDOR_ID_WACOM, prod),\ + .driver_data = (kernel_ulong_t)&wacom_features_##prod #define USB_DEVICE_LENOVO(prod) \ - USB_DEVICE(USB_VENDOR_ID_LENOVO, prod), \ - .driver_info = (kernel_ulong_t)&wacom_features_##prod + HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, prod), \ + .driver_data = (kernel_ulong_t)&wacom_features_##prod -const struct usb_device_id wacom_ids[] = { +const struct hid_device_id wacom_ids[] = { { USB_DEVICE_WACOM(0x00) }, { USB_DEVICE_WACOM(0x10) }, { USB_DEVICE_WACOM(0x11) }, @@ -2478,9 +2484,9 @@ const struct usb_device_id wacom_ids[] = { { USB_DEVICE_WACOM(0x45) }, { USB_DEVICE_WACOM(0x57) }, { USB_DEVICE_WACOM(0x59) }, - { USB_DEVICE_DETAILED(0x5D, USB_CLASS_HID, 0, 0) }, + { USB_DEVICE_WACOM(0x5D) }, { USB_DEVICE_WACOM(0x5B) }, - { USB_DEVICE_DETAILED(0x5E, USB_CLASS_HID, 0, 0) }, + { USB_DEVICE_WACOM(0x5E) }, { USB_DEVICE_WACOM(0xB0) }, { USB_DEVICE_WACOM(0xB1) }, { USB_DEVICE_WACOM(0xB2) }, @@ -2502,13 +2508,7 @@ const struct usb_device_id wacom_ids[] = { { USB_DEVICE_WACOM(0xC5) }, { USB_DEVICE_WACOM(0xC6) }, { USB_DEVICE_WACOM(0xC7) }, - /* - * DTU-2231 has two interfaces on the same configuration, - * only one is used. - */ - { USB_DEVICE_DETAILED(0xCE, USB_CLASS_HID, - USB_INTERFACE_SUBCLASS_BOOT, - USB_INTERFACE_PROTOCOL_MOUSE) }, + { USB_DEVICE_WACOM(0xCE) }, { USB_DEVICE_WACOM(0x84) }, { USB_DEVICE_WACOM(0xD0) }, { USB_DEVICE_WACOM(0xD1) }, @@ -2546,13 +2546,13 @@ const struct usb_device_id wacom_ids[] = { { USB_DEVICE_WACOM(0x116) }, { USB_DEVICE_WACOM(0x300) }, { USB_DEVICE_WACOM(0x301) }, - { USB_DEVICE_DETAILED(0x302, USB_CLASS_HID, 0, 0) }, - { USB_DEVICE_DETAILED(0x303, USB_CLASS_HID, 0, 0) }, - { USB_DEVICE_DETAILED(0x30E, USB_CLASS_HID, 0, 0) }, + { USB_DEVICE_WACOM(0x302) }, + { USB_DEVICE_WACOM(0x303) }, + { USB_DEVICE_WACOM(0x30E) }, { USB_DEVICE_WACOM(0x304) }, - { USB_DEVICE_DETAILED(0x314, USB_CLASS_HID, 0, 0) }, - { USB_DEVICE_DETAILED(0x315, USB_CLASS_HID, 0, 0) }, - { USB_DEVICE_DETAILED(0x317, USB_CLASS_HID, 0, 0) }, + { USB_DEVICE_WACOM(0x314) }, + { USB_DEVICE_WACOM(0x315) }, + { USB_DEVICE_WACOM(0x317) }, { USB_DEVICE_WACOM(0x4001) }, { USB_DEVICE_WACOM(0x4004) }, { USB_DEVICE_WACOM(0x5000) }, @@ -2560,12 +2560,12 @@ const struct usb_device_id wacom_ids[] = { { USB_DEVICE_WACOM(0x47) }, { USB_DEVICE_WACOM(0xF4) }, { USB_DEVICE_WACOM(0xF8) }, - { USB_DEVICE_DETAILED(0xF6, USB_CLASS_HID, 0, 0) }, + { USB_DEVICE_WACOM(0xF6) }, { USB_DEVICE_WACOM(0xFA) }, { USB_DEVICE_WACOM(0xFB) }, { USB_DEVICE_WACOM(0x0307) }, - { USB_DEVICE_DETAILED(0x0309, USB_CLASS_HID, 0, 0) }, + { USB_DEVICE_WACOM(0x0309) }, { USB_DEVICE_LENOVO(0x6004) }, { } }; -MODULE_DEVICE_TABLE(usb, wacom_ids); +MODULE_DEVICE_TABLE(hid, wacom_ids); diff --git a/drivers/input/tablet/wacom_wac.h b/drivers/input/tablet/wacom_wac.h index f48164c780f4..8821a518abf6 100644 --- a/drivers/input/tablet/wacom_wac.h +++ b/drivers/input/tablet/wacom_wac.h @@ -137,6 +137,8 @@ struct wacom_features { unsigned touch_max; int oVid; int oPid; + bool check_for_hid_type; + int hid_type; }; struct wacom_shared { @@ -151,7 +153,7 @@ struct wacom_shared { struct wacom_wac { char name[WACOM_NAME_MAX]; char pad_name[WACOM_NAME_MAX]; - unsigned char *data; + unsigned char data[WACOM_PKGLEN_MAX]; int tool[2]; int id[2]; __u32 serial[2]; diff --git a/include/linux/hid.h b/include/linux/hid.h index 77632cf159c0..07fa80671db0 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -310,6 +310,11 @@ struct hid_item { */ #define HID_GROUP_RMI 0x0100 +/* + * Vendor specific HID device groups + */ +#define HID_GROUP_WACOM 0x0101 + /* * This is the global environment of the parser. This information is * persistent for main-items. The global environment can be saved and -- cgit v1.2.3 From 3a611c3cfba2106aed3187b90903855e776e2761 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sun, 27 Jul 2014 07:23:01 +0930 Subject: modules: Fix build error in moduleloader.h Fengguang Wu's build bot detected that if moduleloader.h is included in a C file (used by ftrace and kprobes to access module_alloc() when available), that it can fail to build if CONFIG_MODULES and CONFIG_MODULES_USE_ELF_REL is not defined. This is because there's a printk() that dereferences struct module to print the name of the module. But as struct module does not exist when CONFIG_MODULES is not defined we get this error: include/linux/moduleloader.h: In function 'apply_relocate': >> include/linux/moduleloader.h:48:63: error: dereferencing pointer to >> incomplete type printk(KERN_ERR "module %s: REL relocation unsupported\n", me->name); ^ Reported-by: kbuild test robot Based-on-the-true-story-by: Steven Rostedt Confirms-rustys-story-ends-the-same-by: Steven Rostedt Signed-off-by: Rusty Russell --- include/linux/moduleloader.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/moduleloader.h b/include/linux/moduleloader.h index 560ca53a75fa..7eeb9bbfb816 100644 --- a/include/linux/moduleloader.h +++ b/include/linux/moduleloader.h @@ -45,7 +45,8 @@ static inline int apply_relocate(Elf_Shdr *sechdrs, unsigned int relsec, struct module *me) { - printk(KERN_ERR "module %s: REL relocation unsupported\n", me->name); + printk(KERN_ERR "module %s: REL relocation unsupported\n", + module_name(me)); return -ENOEXEC; } #endif @@ -67,7 +68,8 @@ static inline int apply_relocate_add(Elf_Shdr *sechdrs, unsigned int relsec, struct module *me) { - printk(KERN_ERR "module %s: REL relocation unsupported\n", me->name); + printk(KERN_ERR "module %s: REL relocation unsupported\n", + module_name(me)); return -ENOEXEC; } #endif -- cgit v1.2.3 From 9b20a352d78a7651aa68a9220f77ccb03009d892 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Sun, 27 Jul 2014 07:24:01 +0930 Subject: module: add within_module() function It is just a small optimization that allows to replace few occurrences of within_module_init() || within_module_core() with a single call. Signed-off-by: Petr Mladek Signed-off-by: Rusty Russell --- include/linux/module.h | 5 +++++ kernel/module.c | 12 ++++-------- 2 files changed, 9 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index f520a767c86c..61d8fb2d0873 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -408,6 +408,11 @@ static inline int within_module_init(unsigned long addr, const struct module *mo addr < (unsigned long)mod->module_init + mod->init_size; } +static inline int within_module(unsigned long addr, const struct module *mod) +{ + return within_module_init(addr, mod) || within_module_core(addr, mod); +} + /* Search for module by name: must hold module_mutex. */ struct module *find_module(const char *name); diff --git a/kernel/module.c b/kernel/module.c index 81e727cf6df9..e87fdd2fc3c2 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3448,8 +3448,7 @@ const char *module_address_lookup(unsigned long addr, list_for_each_entry_rcu(mod, &modules, list) { if (mod->state == MODULE_STATE_UNFORMED) continue; - if (within_module_init(addr, mod) || - within_module_core(addr, mod)) { + if (within_module(addr, mod)) { if (modname) *modname = mod->name; ret = get_ksymbol(mod, addr, size, offset); @@ -3473,8 +3472,7 @@ int lookup_module_symbol_name(unsigned long addr, char *symname) list_for_each_entry_rcu(mod, &modules, list) { if (mod->state == MODULE_STATE_UNFORMED) continue; - if (within_module_init(addr, mod) || - within_module_core(addr, mod)) { + if (within_module(addr, mod)) { const char *sym; sym = get_ksymbol(mod, addr, NULL, NULL); @@ -3499,8 +3497,7 @@ int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size, list_for_each_entry_rcu(mod, &modules, list) { if (mod->state == MODULE_STATE_UNFORMED) continue; - if (within_module_init(addr, mod) || - within_module_core(addr, mod)) { + if (within_module(addr, mod)) { const char *sym; sym = get_ksymbol(mod, addr, size, offset); @@ -3764,8 +3761,7 @@ struct module *__module_address(unsigned long addr) list_for_each_entry_rcu(mod, &modules, list) { if (mod->state == MODULE_STATE_UNFORMED) continue; - if (within_module_core(addr, mod) - || within_module_init(addr, mod)) + if (within_module(addr, mod)) return mod; } return NULL; -- cgit v1.2.3 From 76681c8faa07f9e07caa3cc69f235c8719b2a6ea Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Sun, 27 Jul 2014 07:25:01 +0930 Subject: module: return bool from within_module*() The within_module*() functions return only true or false. Let's use bool as the return type. Note that it should not change kABI because these are inline functions. Signed-off-by: Petr Mladek Signed-off-by: Rusty Russell --- include/linux/module.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index 61d8fb2d0873..71f282a4e307 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -396,19 +396,21 @@ bool is_module_address(unsigned long addr); bool is_module_percpu_address(unsigned long addr); bool is_module_text_address(unsigned long addr); -static inline int within_module_core(unsigned long addr, const struct module *mod) +static inline bool within_module_core(unsigned long addr, + const struct module *mod) { return (unsigned long)mod->module_core <= addr && addr < (unsigned long)mod->module_core + mod->core_size; } -static inline int within_module_init(unsigned long addr, const struct module *mod) +static inline bool within_module_init(unsigned long addr, + const struct module *mod) { return (unsigned long)mod->module_init <= addr && addr < (unsigned long)mod->module_init + mod->init_size; } -static inline int within_module(unsigned long addr, const struct module *mod) +static inline bool within_module(unsigned long addr, const struct module *mod) { return within_module_init(addr, mod) || within_module_core(addr, mod); } -- cgit v1.2.3 From 37549e94c77a94a9c32b5ae3313a3801cb66adf9 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sun, 27 Jul 2014 07:26:01 +0930 Subject: sysfs: disallow world-writable files. This check was introduced in 2006 by Alexey Dobriyan (9774a1f54f173) for module parameters; we removed it when we unified the check into VERIFY_OCTAL_PERMISSIONS() as sysfs didn't have the same requirement. Now all those users are fixed, reintroduce it. Cc: Alexey Dobriyan Cc: Dave Jones Cc: Joe Perches Signed-off-by: Rusty Russell --- include/linux/kernel.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 4c52907a6d8b..43e1c6a9683e 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -849,5 +849,7 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } /* User perms >= group perms >= other perms */ \ BUILD_BUG_ON_ZERO(((perms) >> 6) < (((perms) >> 3) & 7)) + \ BUILD_BUG_ON_ZERO((((perms) >> 3) & 7) < ((perms) & 7)) + \ + /* Other writable? Generally considered a bad idea. */ \ + BUILD_BUG_ON_ZERO((perms) & 2) + \ (perms)) #endif -- cgit v1.2.3 From 9cb42e2a8ed06e91ce9d2c59fbae8d1185ebe2f7 Mon Sep 17 00:00:00 2001 From: "Opensource [Steve Twiss]" Date: Mon, 21 Jul 2014 11:39:33 +0100 Subject: mfd: da9063: Add support for AD silicon variant Add register definitions for DA9063 AD (0x3) silicon variant ID the ability to choose the silicon variant at run-time using regmap configuration. This patch also adds RTC support for the AD silicon changes. It adds both BB and AD support as regmap ranges and then makes the distinction between the two tables at run-time. This allows both AD and BB silicon variants to be supported at the same time. Suggested-by: Philipp Zabel Signed-off-by: Opensource [Steve Twiss] Signed-off-by: Lee Jones --- drivers/mfd/da9063-core.c | 6 +- drivers/mfd/da9063-i2c.c | 134 ++++++++++++++++++++++++++++------- drivers/rtc/rtc-da9063.c | 54 +++++++++----- include/linux/mfd/da9063/core.h | 3 +- include/linux/mfd/da9063/registers.h | 129 +++++++++++++++++++++------------ 5 files changed, 236 insertions(+), 90 deletions(-) (limited to 'include/linux') diff --git a/drivers/mfd/da9063-core.c b/drivers/mfd/da9063-core.c index e70ae315abc7..93db8bb8c8f0 100644 --- a/drivers/mfd/da9063-core.c +++ b/drivers/mfd/da9063-core.c @@ -153,9 +153,9 @@ int da9063_device_init(struct da9063 *da9063, unsigned int irq) "Device detected (chip-ID: 0x%02X, var-ID: 0x%02X)\n", model, variant_id); - if (variant_code != PMIC_DA9063_BB) { - dev_err(da9063->dev, "Unknown chip variant code: 0x%02X\n", - variant_code); + if (variant_code < PMIC_DA9063_BB && variant_code != PMIC_DA9063_AD) { + dev_err(da9063->dev, + "Cannot support variant code: 0x%02X\n", variant_code); return -ENODEV; } diff --git a/drivers/mfd/da9063-i2c.c b/drivers/mfd/da9063-i2c.c index 8db5c805c64f..21fd8d9a217b 100644 --- a/drivers/mfd/da9063-i2c.c +++ b/drivers/mfd/da9063-i2c.c @@ -25,10 +25,10 @@ #include #include -static const struct regmap_range da9063_readable_ranges[] = { +static const struct regmap_range da9063_ad_readable_ranges[] = { { .range_min = DA9063_REG_PAGE_CON, - .range_max = DA9063_REG_SECOND_D, + .range_max = DA9063_AD_REG_SECOND_D, }, { .range_min = DA9063_REG_SEQ, .range_max = DA9063_REG_ID_32_31, @@ -37,14 +37,14 @@ static const struct regmap_range da9063_readable_ranges[] = { .range_max = DA9063_REG_AUTO3_LOW, }, { .range_min = DA9063_REG_T_OFFSET, - .range_max = DA9063_REG_GP_ID_19, + .range_max = DA9063_AD_REG_GP_ID_19, }, { .range_min = DA9063_REG_CHIP_ID, .range_max = DA9063_REG_CHIP_VARIANT, }, }; -static const struct regmap_range da9063_writeable_ranges[] = { +static const struct regmap_range da9063_ad_writeable_ranges[] = { { .range_min = DA9063_REG_PAGE_CON, .range_max = DA9063_REG_PAGE_CON, @@ -53,7 +53,7 @@ static const struct regmap_range da9063_writeable_ranges[] = { .range_max = DA9063_REG_VSYS_MON, }, { .range_min = DA9063_REG_COUNT_S, - .range_max = DA9063_REG_ALARM_Y, + .range_max = DA9063_AD_REG_ALARM_Y, }, { .range_min = DA9063_REG_SEQ, .range_max = DA9063_REG_ID_32_31, @@ -62,14 +62,14 @@ static const struct regmap_range da9063_writeable_ranges[] = { .range_max = DA9063_REG_AUTO3_LOW, }, { .range_min = DA9063_REG_CONFIG_I, - .range_max = DA9063_REG_MON_REG_4, + .range_max = DA9063_AD_REG_MON_REG_4, }, { - .range_min = DA9063_REG_GP_ID_0, - .range_max = DA9063_REG_GP_ID_19, + .range_min = DA9063_AD_REG_GP_ID_0, + .range_max = DA9063_AD_REG_GP_ID_19, }, }; -static const struct regmap_range da9063_volatile_ranges[] = { +static const struct regmap_range da9063_ad_volatile_ranges[] = { { .range_min = DA9063_REG_STATUS_A, .range_max = DA9063_REG_EVENT_D, @@ -81,26 +81,104 @@ static const struct regmap_range da9063_volatile_ranges[] = { .range_max = DA9063_REG_ADC_MAN, }, { .range_min = DA9063_REG_ADC_RES_L, - .range_max = DA9063_REG_SECOND_D, + .range_max = DA9063_AD_REG_SECOND_D, }, { - .range_min = DA9063_REG_MON_REG_5, - .range_max = DA9063_REG_MON_REG_6, + .range_min = DA9063_AD_REG_MON_REG_5, + .range_max = DA9063_AD_REG_MON_REG_6, }, }; -static const struct regmap_access_table da9063_readable_table = { - .yes_ranges = da9063_readable_ranges, - .n_yes_ranges = ARRAY_SIZE(da9063_readable_ranges), +static const struct regmap_access_table da9063_ad_readable_table = { + .yes_ranges = da9063_ad_readable_ranges, + .n_yes_ranges = ARRAY_SIZE(da9063_ad_readable_ranges), }; -static const struct regmap_access_table da9063_writeable_table = { - .yes_ranges = da9063_writeable_ranges, - .n_yes_ranges = ARRAY_SIZE(da9063_writeable_ranges), +static const struct regmap_access_table da9063_ad_writeable_table = { + .yes_ranges = da9063_ad_writeable_ranges, + .n_yes_ranges = ARRAY_SIZE(da9063_ad_writeable_ranges), }; -static const struct regmap_access_table da9063_volatile_table = { - .yes_ranges = da9063_volatile_ranges, - .n_yes_ranges = ARRAY_SIZE(da9063_volatile_ranges), +static const struct regmap_access_table da9063_ad_volatile_table = { + .yes_ranges = da9063_ad_volatile_ranges, + .n_yes_ranges = ARRAY_SIZE(da9063_ad_volatile_ranges), +}; + +static const struct regmap_range da9063_bb_readable_ranges[] = { + { + .range_min = DA9063_REG_PAGE_CON, + .range_max = DA9063_BB_REG_SECOND_D, + }, { + .range_min = DA9063_REG_SEQ, + .range_max = DA9063_REG_ID_32_31, + }, { + .range_min = DA9063_REG_SEQ_A, + .range_max = DA9063_REG_AUTO3_LOW, + }, { + .range_min = DA9063_REG_T_OFFSET, + .range_max = DA9063_BB_REG_GP_ID_19, + }, { + .range_min = DA9063_REG_CHIP_ID, + .range_max = DA9063_REG_CHIP_VARIANT, + }, +}; + +static const struct regmap_range da9063_bb_writeable_ranges[] = { + { + .range_min = DA9063_REG_PAGE_CON, + .range_max = DA9063_REG_PAGE_CON, + }, { + .range_min = DA9063_REG_FAULT_LOG, + .range_max = DA9063_REG_VSYS_MON, + }, { + .range_min = DA9063_REG_COUNT_S, + .range_max = DA9063_BB_REG_ALARM_Y, + }, { + .range_min = DA9063_REG_SEQ, + .range_max = DA9063_REG_ID_32_31, + }, { + .range_min = DA9063_REG_SEQ_A, + .range_max = DA9063_REG_AUTO3_LOW, + }, { + .range_min = DA9063_REG_CONFIG_I, + .range_max = DA9063_BB_REG_MON_REG_4, + }, { + .range_min = DA9063_BB_REG_GP_ID_0, + .range_max = DA9063_BB_REG_GP_ID_19, + }, +}; + +static const struct regmap_range da9063_bb_volatile_ranges[] = { + { + .range_min = DA9063_REG_STATUS_A, + .range_max = DA9063_REG_EVENT_D, + }, { + .range_min = DA9063_REG_CONTROL_F, + .range_max = DA9063_REG_CONTROL_F, + }, { + .range_min = DA9063_REG_ADC_MAN, + .range_max = DA9063_REG_ADC_MAN, + }, { + .range_min = DA9063_REG_ADC_RES_L, + .range_max = DA9063_BB_REG_SECOND_D, + }, { + .range_min = DA9063_BB_REG_MON_REG_5, + .range_max = DA9063_BB_REG_MON_REG_6, + }, +}; + +static const struct regmap_access_table da9063_bb_readable_table = { + .yes_ranges = da9063_bb_readable_ranges, + .n_yes_ranges = ARRAY_SIZE(da9063_bb_readable_ranges), +}; + +static const struct regmap_access_table da9063_bb_writeable_table = { + .yes_ranges = da9063_bb_writeable_ranges, + .n_yes_ranges = ARRAY_SIZE(da9063_bb_writeable_ranges), +}; + +static const struct regmap_access_table da9063_bb_volatile_table = { + .yes_ranges = da9063_bb_volatile_ranges, + .n_yes_ranges = ARRAY_SIZE(da9063_bb_volatile_ranges), }; static const struct regmap_range_cfg da9063_range_cfg[] = { @@ -123,10 +201,6 @@ static struct regmap_config da9063_regmap_config = { .max_register = DA9063_REG_CHIP_VARIANT, .cache_type = REGCACHE_RBTREE, - - .rd_table = &da9063_readable_table, - .wr_table = &da9063_writeable_table, - .volatile_table = &da9063_volatile_table, }; static int da9063_i2c_probe(struct i2c_client *i2c, @@ -143,6 +217,16 @@ static int da9063_i2c_probe(struct i2c_client *i2c, da9063->dev = &i2c->dev; da9063->chip_irq = i2c->irq; + if (da9063->variant_code == PMIC_DA9063_AD) { + da9063_regmap_config.rd_table = &da9063_ad_readable_table; + da9063_regmap_config.wr_table = &da9063_ad_writeable_table; + da9063_regmap_config.volatile_table = &da9063_ad_volatile_table; + } else { + da9063_regmap_config.rd_table = &da9063_bb_readable_table; + da9063_regmap_config.wr_table = &da9063_bb_writeable_table; + da9063_regmap_config.volatile_table = &da9063_bb_volatile_table; + } + da9063->regmap = devm_regmap_init_i2c(i2c, &da9063_regmap_config); if (IS_ERR(da9063->regmap)) { ret = PTR_ERR(da9063->regmap); diff --git a/drivers/rtc/rtc-da9063.c b/drivers/rtc/rtc-da9063.c index 595393098b09..731ed1a97f59 100644 --- a/drivers/rtc/rtc-da9063.c +++ b/drivers/rtc/rtc-da9063.c @@ -29,6 +29,8 @@ #define YEARS_FROM_DA9063(year) ((year) + 100) #define MONTHS_FROM_DA9063(month) ((month) - 1) +#define RTC_ALARM_DATA_LEN (DA9063_AD_REG_ALARM_Y - DA9063_AD_REG_ALARM_MI + 1) + #define RTC_DATA_LEN (DA9063_REG_COUNT_Y - DA9063_REG_COUNT_S + 1) #define RTC_SEC 0 #define RTC_MIN 1 @@ -42,6 +44,10 @@ struct da9063_rtc { struct da9063 *hw; struct rtc_time alarm_time; bool rtc_sync; + int alarm_year; + int alarm_start; + int alarm_len; + int data_start; }; static void da9063_data_to_tm(u8 *data, struct rtc_time *tm) @@ -83,7 +89,7 @@ static int da9063_rtc_stop_alarm(struct device *dev) { struct da9063_rtc *rtc = dev_get_drvdata(dev); - return regmap_update_bits(rtc->hw->regmap, DA9063_REG_ALARM_Y, + return regmap_update_bits(rtc->hw->regmap, rtc->alarm_year, DA9063_ALARM_ON, 0); } @@ -91,7 +97,7 @@ static int da9063_rtc_start_alarm(struct device *dev) { struct da9063_rtc *rtc = dev_get_drvdata(dev); - return regmap_update_bits(rtc->hw->regmap, DA9063_REG_ALARM_Y, + return regmap_update_bits(rtc->hw->regmap, rtc->alarm_year, DA9063_ALARM_ON, DA9063_ALARM_ON); } @@ -151,8 +157,9 @@ static int da9063_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) int ret; unsigned int val; - ret = regmap_bulk_read(rtc->hw->regmap, DA9063_REG_ALARM_S, - &data[RTC_SEC], RTC_DATA_LEN); + data[RTC_SEC] = 0; + ret = regmap_bulk_read(rtc->hw->regmap, rtc->alarm_start, + &data[rtc->data_start], rtc->alarm_len); if (ret < 0) return ret; @@ -186,14 +193,14 @@ static int da9063_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) return ret; } - ret = regmap_bulk_write(rtc->hw->regmap, DA9063_REG_ALARM_S, - data, RTC_DATA_LEN); + ret = regmap_bulk_write(rtc->hw->regmap, rtc->alarm_start, + &data[rtc->data_start], rtc->alarm_len); if (ret < 0) { dev_err(dev, "Failed to write alarm: %d\n", ret); return ret; } - rtc->alarm_time = alrm->time; + da9063_data_to_tm(data, &rtc->alarm_time); if (alrm->enabled) { ret = da9063_rtc_start_alarm(dev); @@ -218,7 +225,7 @@ static irqreturn_t da9063_alarm_event(int irq, void *data) { struct da9063_rtc *rtc = data; - regmap_update_bits(rtc->hw->regmap, DA9063_REG_ALARM_Y, + regmap_update_bits(rtc->hw->regmap, rtc->alarm_year, DA9063_ALARM_ON, 0); rtc->rtc_sync = true; @@ -257,7 +264,23 @@ static int da9063_rtc_probe(struct platform_device *pdev) goto err; } - ret = regmap_update_bits(da9063->regmap, DA9063_REG_ALARM_S, + rtc = devm_kzalloc(&pdev->dev, sizeof(*rtc), GFP_KERNEL); + if (!rtc) + return -ENOMEM; + + if (da9063->variant_code == PMIC_DA9063_AD) { + rtc->alarm_year = DA9063_AD_REG_ALARM_Y; + rtc->alarm_start = DA9063_AD_REG_ALARM_MI; + rtc->alarm_len = RTC_ALARM_DATA_LEN; + rtc->data_start = RTC_MIN; + } else { + rtc->alarm_year = DA9063_BB_REG_ALARM_Y; + rtc->alarm_start = DA9063_BB_REG_ALARM_S; + rtc->alarm_len = RTC_DATA_LEN; + rtc->data_start = RTC_SEC; + } + + ret = regmap_update_bits(da9063->regmap, rtc->alarm_start, DA9063_ALARM_STATUS_TICK | DA9063_ALARM_STATUS_ALARM, 0); if (ret < 0) { @@ -265,7 +288,7 @@ static int da9063_rtc_probe(struct platform_device *pdev) goto err; } - ret = regmap_update_bits(da9063->regmap, DA9063_REG_ALARM_S, + ret = regmap_update_bits(da9063->regmap, rtc->alarm_start, DA9063_ALARM_STATUS_ALARM, DA9063_ALARM_STATUS_ALARM); if (ret < 0) { @@ -273,25 +296,22 @@ static int da9063_rtc_probe(struct platform_device *pdev) goto err; } - ret = regmap_update_bits(da9063->regmap, DA9063_REG_ALARM_Y, + ret = regmap_update_bits(da9063->regmap, rtc->alarm_year, DA9063_TICK_ON, 0); if (ret < 0) { dev_err(&pdev->dev, "Failed to disable TICKs\n"); goto err; } - ret = regmap_bulk_read(da9063->regmap, DA9063_REG_ALARM_S, - data, RTC_DATA_LEN); + data[RTC_SEC] = 0; + ret = regmap_bulk_read(da9063->regmap, rtc->alarm_start, + &data[rtc->data_start], rtc->alarm_len); if (ret < 0) { dev_err(&pdev->dev, "Failed to read initial alarm data: %d\n", ret); goto err; } - rtc = devm_kzalloc(&pdev->dev, sizeof(*rtc), GFP_KERNEL); - if (!rtc) - return -ENOMEM; - platform_set_drvdata(pdev, rtc); irq_alarm = platform_get_irq_byname(pdev, "ALARM"); diff --git a/include/linux/mfd/da9063/core.h b/include/linux/mfd/da9063/core.h index 00a9aac5d1e8..b92a3262f8f6 100644 --- a/include/linux/mfd/da9063/core.h +++ b/include/linux/mfd/da9063/core.h @@ -34,7 +34,8 @@ enum da9063_models { }; enum da9063_variant_codes { - PMIC_DA9063_BB = 0x5 + PMIC_DA9063_AD = 0x3, + PMIC_DA9063_BB = 0x5, }; /* Interrupts */ diff --git a/include/linux/mfd/da9063/registers.h b/include/linux/mfd/da9063/registers.h index 09a85c699da1..2e0ba6d5fbc3 100644 --- a/include/linux/mfd/da9063/registers.h +++ b/include/linux/mfd/da9063/registers.h @@ -104,16 +104,27 @@ #define DA9063_REG_COUNT_D 0x43 #define DA9063_REG_COUNT_MO 0x44 #define DA9063_REG_COUNT_Y 0x45 -#define DA9063_REG_ALARM_S 0x46 -#define DA9063_REG_ALARM_MI 0x47 -#define DA9063_REG_ALARM_H 0x48 -#define DA9063_REG_ALARM_D 0x49 -#define DA9063_REG_ALARM_MO 0x4A -#define DA9063_REG_ALARM_Y 0x4B -#define DA9063_REG_SECOND_A 0x4C -#define DA9063_REG_SECOND_B 0x4D -#define DA9063_REG_SECOND_C 0x4E -#define DA9063_REG_SECOND_D 0x4F + +#define DA9063_AD_REG_ALARM_MI 0x46 +#define DA9063_AD_REG_ALARM_H 0x47 +#define DA9063_AD_REG_ALARM_D 0x48 +#define DA9063_AD_REG_ALARM_MO 0x49 +#define DA9063_AD_REG_ALARM_Y 0x4A +#define DA9063_AD_REG_SECOND_A 0x4B +#define DA9063_AD_REG_SECOND_B 0x4C +#define DA9063_AD_REG_SECOND_C 0x4D +#define DA9063_AD_REG_SECOND_D 0x4E + +#define DA9063_BB_REG_ALARM_S 0x46 +#define DA9063_BB_REG_ALARM_MI 0x47 +#define DA9063_BB_REG_ALARM_H 0x48 +#define DA9063_BB_REG_ALARM_D 0x49 +#define DA9063_BB_REG_ALARM_MO 0x4A +#define DA9063_BB_REG_ALARM_Y 0x4B +#define DA9063_BB_REG_SECOND_A 0x4C +#define DA9063_BB_REG_SECOND_B 0x4D +#define DA9063_BB_REG_SECOND_C 0x4E +#define DA9063_BB_REG_SECOND_D 0x4F /* Sequencer Control Registers */ #define DA9063_REG_SEQ 0x81 @@ -223,37 +234,67 @@ #define DA9063_REG_CONFIG_J 0x10F #define DA9063_REG_CONFIG_K 0x110 #define DA9063_REG_CONFIG_L 0x111 -#define DA9063_REG_CONFIG_M 0x112 -#define DA9063_REG_CONFIG_N 0x113 - -#define DA9063_REG_MON_REG_1 0x114 -#define DA9063_REG_MON_REG_2 0x115 -#define DA9063_REG_MON_REG_3 0x116 -#define DA9063_REG_MON_REG_4 0x117 -#define DA9063_REG_MON_REG_5 0x11E -#define DA9063_REG_MON_REG_6 0x11F -#define DA9063_REG_TRIM_CLDR 0x120 + +#define DA9063_AD_REG_MON_REG_1 0x112 +#define DA9063_AD_REG_MON_REG_2 0x113 +#define DA9063_AD_REG_MON_REG_3 0x114 +#define DA9063_AD_REG_MON_REG_4 0x115 +#define DA9063_AD_REG_MON_REG_5 0x116 +#define DA9063_AD_REG_MON_REG_6 0x117 +#define DA9063_AD_REG_TRIM_CLDR 0x118 + +#define DA9063_AD_REG_GP_ID_0 0x119 +#define DA9063_AD_REG_GP_ID_1 0x11A +#define DA9063_AD_REG_GP_ID_2 0x11B +#define DA9063_AD_REG_GP_ID_3 0x11C +#define DA9063_AD_REG_GP_ID_4 0x11D +#define DA9063_AD_REG_GP_ID_5 0x11E +#define DA9063_AD_REG_GP_ID_6 0x11F +#define DA9063_AD_REG_GP_ID_7 0x120 +#define DA9063_AD_REG_GP_ID_8 0x121 +#define DA9063_AD_REG_GP_ID_9 0x122 +#define DA9063_AD_REG_GP_ID_10 0x123 +#define DA9063_AD_REG_GP_ID_11 0x124 +#define DA9063_AD_REG_GP_ID_12 0x125 +#define DA9063_AD_REG_GP_ID_13 0x126 +#define DA9063_AD_REG_GP_ID_14 0x127 +#define DA9063_AD_REG_GP_ID_15 0x128 +#define DA9063_AD_REG_GP_ID_16 0x129 +#define DA9063_AD_REG_GP_ID_17 0x12A +#define DA9063_AD_REG_GP_ID_18 0x12B +#define DA9063_AD_REG_GP_ID_19 0x12C + +#define DA9063_BB_REG_CONFIG_M 0x112 +#define DA9063_BB_REG_CONFIG_N 0x113 + +#define DA9063_BB_REG_MON_REG_1 0x114 +#define DA9063_BB_REG_MON_REG_2 0x115 +#define DA9063_BB_REG_MON_REG_3 0x116 +#define DA9063_BB_REG_MON_REG_4 0x117 +#define DA9063_BB_REG_MON_REG_5 0x11E +#define DA9063_BB_REG_MON_REG_6 0x11F +#define DA9063_BB_REG_TRIM_CLDR 0x120 /* General Purpose Registers */ -#define DA9063_REG_GP_ID_0 0x121 -#define DA9063_REG_GP_ID_1 0x122 -#define DA9063_REG_GP_ID_2 0x123 -#define DA9063_REG_GP_ID_3 0x124 -#define DA9063_REG_GP_ID_4 0x125 -#define DA9063_REG_GP_ID_5 0x126 -#define DA9063_REG_GP_ID_6 0x127 -#define DA9063_REG_GP_ID_7 0x128 -#define DA9063_REG_GP_ID_8 0x129 -#define DA9063_REG_GP_ID_9 0x12A -#define DA9063_REG_GP_ID_10 0x12B -#define DA9063_REG_GP_ID_11 0x12C -#define DA9063_REG_GP_ID_12 0x12D -#define DA9063_REG_GP_ID_13 0x12E -#define DA9063_REG_GP_ID_14 0x12F -#define DA9063_REG_GP_ID_15 0x130 -#define DA9063_REG_GP_ID_16 0x131 -#define DA9063_REG_GP_ID_17 0x132 -#define DA9063_REG_GP_ID_18 0x133 -#define DA9063_REG_GP_ID_19 0x134 +#define DA9063_BB_REG_GP_ID_0 0x121 +#define DA9063_BB_REG_GP_ID_1 0x122 +#define DA9063_BB_REG_GP_ID_2 0x123 +#define DA9063_BB_REG_GP_ID_3 0x124 +#define DA9063_BB_REG_GP_ID_4 0x125 +#define DA9063_BB_REG_GP_ID_5 0x126 +#define DA9063_BB_REG_GP_ID_6 0x127 +#define DA9063_BB_REG_GP_ID_7 0x128 +#define DA9063_BB_REG_GP_ID_8 0x129 +#define DA9063_BB_REG_GP_ID_9 0x12A +#define DA9063_BB_REG_GP_ID_10 0x12B +#define DA9063_BB_REG_GP_ID_11 0x12C +#define DA9063_BB_REG_GP_ID_12 0x12D +#define DA9063_BB_REG_GP_ID_13 0x12E +#define DA9063_BB_REG_GP_ID_14 0x12F +#define DA9063_BB_REG_GP_ID_15 0x130 +#define DA9063_BB_REG_GP_ID_16 0x131 +#define DA9063_BB_REG_GP_ID_17 0x132 +#define DA9063_BB_REG_GP_ID_18 0x133 +#define DA9063_BB_REG_GP_ID_19 0x134 /* Chip ID and variant */ #define DA9063_REG_CHIP_ID 0x181 @@ -404,10 +445,10 @@ /* DA9063_REG_CONTROL_B (addr=0x0F) */ #define DA9063_CHG_SEL 0x01 #define DA9063_WATCHDOG_PD 0x02 -#define DA9063_RESET_BLINKING 0x04 +#define DA9063_BB_RESET_BLINKING 0x04 #define DA9063_NRES_MODE 0x08 #define DA9063_NONKEY_LOCK 0x10 -#define DA9063_BUCK_SLOWSTART 0x80 +#define DA9063_BB_BUCK_SLOWSTART 0x80 /* DA9063_REG_CONTROL_C (addr=0x10) */ #define DA9063_DEBOUNCING_MASK 0x07 @@ -467,7 +508,7 @@ #define DA9063_GPADC_PAUSE 0x02 #define DA9063_PMIF_DIS 0x04 #define DA9063_HS2WIRE_DIS 0x08 -#define DA9063_CLDR_PAUSE 0x10 +#define DA9063_BB_CLDR_PAUSE 0x10 #define DA9063_BBAT_DIS 0x20 #define DA9063_OUT_32K_PAUSE 0x40 #define DA9063_PMCONT_DIS 0x80 @@ -844,7 +885,7 @@ #define DA9063_MONITOR 0x40 /* DA9063_REG_ALARM_S (addr=0x46) */ -#define DA9063_ALARM_S_MASK 0x3F +#define DA9063_BB_ALARM_S_MASK 0x3F #define DA9063_ALARM_STATUS_ALARM 0x80 #define DA9063_ALARM_STATUS_TICK 0x40 /* DA9063_REG_ALARM_MI (addr=0x47) */ -- cgit v1.2.3 From 3d2108dae4e1768c06718cdce19f8f0089ce310e Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 15 Jul 2014 11:21:46 +0100 Subject: mfd: wm5110: Add in the output done interrupts wm5110 has interrupts to signal that an output has fully enabled. This patch adds in these interrupts although use is not made of them yet. Signed-off-by: Charles Keepax Signed-off-by: Lee Jones --- drivers/mfd/wm5110-tables.c | 18 +++++ include/linux/mfd/arizona/core.h | 10 ++- include/linux/mfd/arizona/registers.h | 120 ++++++++++++++++++++++++++++++++++ 3 files changed, 146 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/mfd/wm5110-tables.c b/drivers/mfd/wm5110-tables.c index b26be6e5e3f4..4729007055d8 100644 --- a/drivers/mfd/wm5110-tables.c +++ b/drivers/mfd/wm5110-tables.c @@ -416,6 +416,24 @@ static const struct regmap_irq wm5110_irqs[ARIZONA_NUM_IRQ] = { [ARIZONA_IRQ_ISRC2_CFG_ERR] = { .reg_offset = 3, .mask = ARIZONA_ISRC2_CFG_ERR_EINT1 }, + [ARIZONA_IRQ_HP3R_DONE] = { + .reg_offset = 3, .mask = ARIZONA_HP3R_DONE_EINT1 + }, + [ARIZONA_IRQ_HP3L_DONE] = { + .reg_offset = 3, .mask = ARIZONA_HP3L_DONE_EINT1 + }, + [ARIZONA_IRQ_HP2R_DONE] = { + .reg_offset = 3, .mask = ARIZONA_HP2R_DONE_EINT1 + }, + [ARIZONA_IRQ_HP2L_DONE] = { + .reg_offset = 3, .mask = ARIZONA_HP2L_DONE_EINT1 + }, + [ARIZONA_IRQ_HP1R_DONE] = { + .reg_offset = 3, .mask = ARIZONA_HP1R_DONE_EINT1 + }, + [ARIZONA_IRQ_HP1L_DONE] = { + .reg_offset = 3, .mask = ARIZONA_HP1L_DONE_EINT1 + }, [ARIZONA_IRQ_BOOT_DONE] = { .reg_offset = 4, .mask = ARIZONA_BOOT_DONE_EINT1 diff --git a/include/linux/mfd/arizona/core.h b/include/linux/mfd/arizona/core.h index 70854d892760..6a62fc99f399 100644 --- a/include/linux/mfd/arizona/core.h +++ b/include/linux/mfd/arizona/core.h @@ -78,8 +78,14 @@ enum arizona_type { #define ARIZONA_IRQ_FLL1_CLOCK_OK 49 #define ARIZONA_IRQ_MICD_CLAMP_RISE 50 #define ARIZONA_IRQ_MICD_CLAMP_FALL 51 - -#define ARIZONA_NUM_IRQ 52 +#define ARIZONA_IRQ_HP3R_DONE 52 +#define ARIZONA_IRQ_HP3L_DONE 53 +#define ARIZONA_IRQ_HP2R_DONE 54 +#define ARIZONA_IRQ_HP2L_DONE 55 +#define ARIZONA_IRQ_HP1R_DONE 56 +#define ARIZONA_IRQ_HP1L_DONE 57 + +#define ARIZONA_NUM_IRQ 58 struct snd_soc_dapm_context; diff --git a/include/linux/mfd/arizona/registers.h b/include/linux/mfd/arizona/registers.h index 7204d8138b24..df93563ea8c5 100644 --- a/include/linux/mfd/arizona/registers.h +++ b/include/linux/mfd/arizona/registers.h @@ -4795,6 +4795,30 @@ #define ARIZONA_ISRC2_CFG_ERR_EINT1_MASK 0x0040 /* ISRC2_CFG_ERR_EINT1 */ #define ARIZONA_ISRC2_CFG_ERR_EINT1_SHIFT 6 /* ISRC2_CFG_ERR_EINT1 */ #define ARIZONA_ISRC2_CFG_ERR_EINT1_WIDTH 1 /* ISRC2_CFG_ERR_EINT1 */ +#define ARIZONA_HP3R_DONE_EINT1 0x0020 /* HP3R_DONE_EINT1 */ +#define ARIZONA_HP3R_DONE_EINT1_MASK 0x0020 /* HP3R_DONE_EINT1 */ +#define ARIZONA_HP3R_DONE_EINT1_SHIFT 5 /* HP3R_DONE_EINT1 */ +#define ARIZONA_HP3R_DONE_EINT1_WIDTH 1 /* HP3R_DONE_EINT1 */ +#define ARIZONA_HP3L_DONE_EINT1 0x0010 /* HP3L_DONE_EINT1 */ +#define ARIZONA_HP3L_DONE_EINT1_MASK 0x0010 /* HP3L_DONE_EINT1 */ +#define ARIZONA_HP3L_DONE_EINT1_SHIFT 4 /* HP3L_DONE_EINT1 */ +#define ARIZONA_HP3L_DONE_EINT1_WIDTH 1 /* HP3L_DONE_EINT1 */ +#define ARIZONA_HP2R_DONE_EINT1 0x0008 /* HP2R_DONE_EINT1 */ +#define ARIZONA_HP2R_DONE_EINT1_MASK 0x0008 /* HP2R_DONE_EINT1 */ +#define ARIZONA_HP2R_DONE_EINT1_SHIFT 3 /* HP2R_DONE_EINT1 */ +#define ARIZONA_HP2R_DONE_EINT1_WIDTH 1 /* HP2R_DONE_EINT1 */ +#define ARIZONA_HP2L_DONE_EINT1 0x0004 /* HP2L_DONE_EINT1 */ +#define ARIZONA_HP2L_DONE_EINT1_MASK 0x0004 /* HP2L_DONE_EINT1 */ +#define ARIZONA_HP2L_DONE_EINT1_SHIFT 2 /* HP2L_DONE_EINT1 */ +#define ARIZONA_HP2L_DONE_EINT1_WIDTH 1 /* HP2L_DONE_EINT1 */ +#define ARIZONA_HP1R_DONE_EINT1 0x0002 /* HP1R_DONE_EINT1 */ +#define ARIZONA_HP1R_DONE_EINT1_MASK 0x0002 /* HP1R_DONE_EINT1 */ +#define ARIZONA_HP1R_DONE_EINT1_SHIFT 1 /* HP1R_DONE_EINT1 */ +#define ARIZONA_HP1R_DONE_EINT1_WIDTH 1 /* HP1R_DONE_EINT1 */ +#define ARIZONA_HP1L_DONE_EINT1 0x0001 /* HP1L_DONE_EINT1 */ +#define ARIZONA_HP1L_DONE_EINT1_MASK 0x0001 /* HP1L_DONE_EINT1 */ +#define ARIZONA_HP1L_DONE_EINT1_SHIFT 0 /* HP1L_DONE_EINT1 */ +#define ARIZONA_HP1L_DONE_EINT1_WIDTH 1 /* HP1L_DONE_EINT1 */ /* * R3332 (0xD04) - Interrupt Status 5 @@ -4963,6 +4987,30 @@ #define ARIZONA_IM_ISRC2_CFG_ERR_EINT1_MASK 0x0040 /* IM_ISRC2_CFG_ERR_EINT1 */ #define ARIZONA_IM_ISRC2_CFG_ERR_EINT1_SHIFT 6 /* IM_ISRC2_CFG_ERR_EINT1 */ #define ARIZONA_IM_ISRC2_CFG_ERR_EINT1_WIDTH 1 /* IM_ISRC2_CFG_ERR_EINT1 */ +#define ARIZONA_IM_HP3R_DONE_EINT1 0x0020 /* IM_HP3R_DONE_EINT1 */ +#define ARIZONA_IM_HP3R_DONE_EINT1_MASK 0x0020 /* IM_HP3R_DONE_EINT1 */ +#define ARIZONA_IM_HP3R_DONE_EINT1_SHIFT 5 /* IM_HP3R_DONE_EINT1 */ +#define ARIZONA_IM_HP3R_DONE_EINT1_WIDTH 1 /* IM_HP3R_DONE_EINT1 */ +#define ARIZONA_IM_HP3L_DONE_EINT1 0x0010 /* IM_HP3L_DONE_EINT1 */ +#define ARIZONA_IM_HP3L_DONE_EINT1_MASK 0x0010 /* IM_HP3L_DONE_EINT1 */ +#define ARIZONA_IM_HP3L_DONE_EINT1_SHIFT 4 /* IM_HP3L_DONE_EINT1 */ +#define ARIZONA_IM_HP3L_DONE_EINT1_WIDTH 1 /* IM_HP3L_DONE_EINT1 */ +#define ARIZONA_IM_HP2R_DONE_EINT1 0x0008 /* IM_HP2R_DONE_EINT1 */ +#define ARIZONA_IM_HP2R_DONE_EINT1_MASK 0x0008 /* IM_HP2R_DONE_EINT1 */ +#define ARIZONA_IM_HP2R_DONE_EINT1_SHIFT 3 /* IM_HP2R_DONE_EINT1 */ +#define ARIZONA_IM_HP2R_DONE_EINT1_WIDTH 1 /* IM_HP2R_DONE_EINT1 */ +#define ARIZONA_IM_HP2L_DONE_EINT1 0x0004 /* IM_HP2L_DONE_EINT1 */ +#define ARIZONA_IM_HP2L_DONE_EINT1_MASK 0x0004 /* IM_HP2L_DONE_EINT1 */ +#define ARIZONA_IM_HP2L_DONE_EINT1_SHIFT 2 /* IM_HP2L_DONE_EINT1 */ +#define ARIZONA_IM_HP2L_DONE_EINT1_WIDTH 1 /* IM_HP2L_DONE_EINT1 */ +#define ARIZONA_IM_HP1R_DONE_EINT1 0x0002 /* IM_HP1R_DONE_EINT1 */ +#define ARIZONA_IM_HP1R_DONE_EINT1_MASK 0x0002 /* IM_HP1R_DONE_EINT1 */ +#define ARIZONA_IM_HP1R_DONE_EINT1_SHIFT 1 /* IM_HP1R_DONE_EINT1 */ +#define ARIZONA_IM_HP1R_DONE_EINT1_WIDTH 1 /* IM_HP1R_DONE_EINT1 */ +#define ARIZONA_IM_HP1L_DONE_EINT1 0x0001 /* IM_HP1L_DONE_EINT1 */ +#define ARIZONA_IM_HP1L_DONE_EINT1_MASK 0x0001 /* IM_HP1L_DONE_EINT1 */ +#define ARIZONA_IM_HP1L_DONE_EINT1_SHIFT 0 /* IM_HP1L_DONE_EINT1 */ +#define ARIZONA_IM_HP1L_DONE_EINT1_WIDTH 1 /* IM_HP1L_DONE_EINT1 */ /* * R3340 (0xD0C) - Interrupt Status 5 Mask @@ -5139,6 +5187,30 @@ #define ARIZONA_ISRC2_CFG_ERR_EINT2_MASK 0x0040 /* ISRC2_CFG_ERR_EINT2 */ #define ARIZONA_ISRC2_CFG_ERR_EINT2_SHIFT 6 /* ISRC2_CFG_ERR_EINT2 */ #define ARIZONA_ISRC2_CFG_ERR_EINT2_WIDTH 1 /* ISRC2_CFG_ERR_EINT2 */ +#define ARIZONA_HP3R_DONE_EINT2 0x0020 /* HP3R_DONE_EINT2 */ +#define ARIZONA_HP3R_DONE_EINT2_MASK 0x0020 /* HP3R_DONE_EINT2 */ +#define ARIZONA_HP3R_DONE_EINT2_SHIFT 5 /* HP3R_DONE_EINT2 */ +#define ARIZONA_HP3R_DONE_EINT2_WIDTH 1 /* HP3R_DONE_EINT2 */ +#define ARIZONA_HP3L_DONE_EINT2 0x0010 /* HP3L_DONE_EINT2 */ +#define ARIZONA_HP3L_DONE_EINT2_MASK 0x0010 /* HP3L_DONE_EINT2 */ +#define ARIZONA_HP3L_DONE_EINT2_SHIFT 4 /* HP3L_DONE_EINT2 */ +#define ARIZONA_HP3L_DONE_EINT2_WIDTH 1 /* HP3L_DONE_EINT2 */ +#define ARIZONA_HP2R_DONE_EINT2 0x0008 /* HP2R_DONE_EINT2 */ +#define ARIZONA_HP2R_DONE_EINT2_MASK 0x0008 /* HP2R_DONE_EINT2 */ +#define ARIZONA_HP2R_DONE_EINT2_SHIFT 3 /* HP2R_DONE_EINT2 */ +#define ARIZONA_HP2R_DONE_EINT2_WIDTH 1 /* HP2R_DONE_EINT2 */ +#define ARIZONA_HP2L_DONE_EINT2 0x0004 /* HP2L_DONE_EINT2 */ +#define ARIZONA_HP2L_DONE_EINT2_MASK 0x0004 /* HP2L_DONE_EINT2 */ +#define ARIZONA_HP2L_DONE_EINT2_SHIFT 2 /* HP2L_DONE_EINT2 */ +#define ARIZONA_HP2L_DONE_EINT2_WIDTH 1 /* HP2L_DONE_EINT2 */ +#define ARIZONA_HP1R_DONE_EINT2 0x0002 /* HP1R_DONE_EINT2 */ +#define ARIZONA_HP1R_DONE_EINT2_MASK 0x0002 /* HP1R_DONE_EINT2 */ +#define ARIZONA_HP1R_DONE_EINT2_SHIFT 1 /* HP1R_DONE_EINT2 */ +#define ARIZONA_HP1R_DONE_EINT2_WIDTH 1 /* HP1R_DONE_EINT2 */ +#define ARIZONA_HP1L_DONE_EINT2 0x0001 /* HP1L_DONE_EINT2 */ +#define ARIZONA_HP1L_DONE_EINT2_MASK 0x0001 /* HP1L_DONE_EINT2 */ +#define ARIZONA_HP1L_DONE_EINT2_SHIFT 0 /* HP1L_DONE_EINT2 */ +#define ARIZONA_HP1L_DONE_EINT2_WIDTH 1 /* HP1L_DONE_EINT2 */ /* * R3348 (0xD14) - IRQ2 Status 5 @@ -5307,6 +5379,30 @@ #define ARIZONA_IM_ISRC2_CFG_ERR_EINT2_MASK 0x0040 /* IM_ISRC2_CFG_ERR_EINT2 */ #define ARIZONA_IM_ISRC2_CFG_ERR_EINT2_SHIFT 6 /* IM_ISRC2_CFG_ERR_EINT2 */ #define ARIZONA_IM_ISRC2_CFG_ERR_EINT2_WIDTH 1 /* IM_ISRC2_CFG_ERR_EINT2 */ +#define ARIZONA_IM_HP3R_DONE_EINT2 0x0020 /* IM_HP3R_DONE_EINT2 */ +#define ARIZONA_IM_HP3R_DONE_EINT2_MASK 0x0020 /* IM_HP3R_DONE_EINT2 */ +#define ARIZONA_IM_HP3R_DONE_EINT2_SHIFT 5 /* IM_HP3R_DONE_EINT2 */ +#define ARIZONA_IM_HP3R_DONE_EINT2_WIDTH 1 /* IM_HP3R_DONE_EINT2 */ +#define ARIZONA_IM_HP3L_DONE_EINT2 0x0010 /* IM_HP3L_DONE_EINT2 */ +#define ARIZONA_IM_HP3L_DONE_EINT2_MASK 0x0010 /* IM_HP3L_DONE_EINT2 */ +#define ARIZONA_IM_HP3L_DONE_EINT2_SHIFT 4 /* IM_HP3L_DONE_EINT2 */ +#define ARIZONA_IM_HP3L_DONE_EINT2_WIDTH 1 /* IM_HP3L_DONE_EINT2 */ +#define ARIZONA_IM_HP2R_DONE_EINT2 0x0008 /* IM_HP2R_DONE_EINT2 */ +#define ARIZONA_IM_HP2R_DONE_EINT2_MASK 0x0008 /* IM_HP2R_DONE_EINT2 */ +#define ARIZONA_IM_HP2R_DONE_EINT2_SHIFT 3 /* IM_HP2R_DONE_EINT2 */ +#define ARIZONA_IM_HP2R_DONE_EINT2_WIDTH 1 /* IM_HP2R_DONE_EINT2 */ +#define ARIZONA_IM_HP2L_DONE_EINT2 0x0004 /* IM_HP2L_DONE_EINT2 */ +#define ARIZONA_IM_HP2L_DONE_EINT2_MASK 0x0004 /* IM_HP2L_DONE_EINT2 */ +#define ARIZONA_IM_HP2L_DONE_EINT2_SHIFT 2 /* IM_HP2L_DONE_EINT2 */ +#define ARIZONA_IM_HP2L_DONE_EINT2_WIDTH 1 /* IM_HP2L_DONE_EINT2 */ +#define ARIZONA_IM_HP1R_DONE_EINT2 0x0002 /* IM_HP1R_DONE_EINT2 */ +#define ARIZONA_IM_HP1R_DONE_EINT2_MASK 0x0002 /* IM_HP1R_DONE_EINT2 */ +#define ARIZONA_IM_HP1R_DONE_EINT2_SHIFT 1 /* IM_HP1R_DONE_EINT2 */ +#define ARIZONA_IM_HP1R_DONE_EINT2_WIDTH 1 /* IM_HP1R_DONE_EINT2 */ +#define ARIZONA_IM_HP1L_DONE_EINT2 0x0001 /* IM_HP1L_DONE_EINT2 */ +#define ARIZONA_IM_HP1L_DONE_EINT2_MASK 0x0001 /* IM_HP1L_DONE_EINT2 */ +#define ARIZONA_IM_HP1L_DONE_EINT2_SHIFT 0 /* IM_HP1L_DONE_EINT2 */ +#define ARIZONA_IM_HP1L_DONE_EINT2_WIDTH 1 /* IM_HP1L_DONE_EINT2 */ /* * R3356 (0xD1C) - IRQ2 Status 5 Mask @@ -5464,6 +5560,30 @@ #define ARIZONA_ISRC2_CFG_ERR_STS_MASK 0x0040 /* ISRC2_CFG_ERR_STS */ #define ARIZONA_ISRC2_CFG_ERR_STS_SHIFT 6 /* ISRC2_CFG_ERR_STS */ #define ARIZONA_ISRC2_CFG_ERR_STS_WIDTH 1 /* ISRC2_CFG_ERR_STS */ +#define ARIZONA_HP3R_DONE_STS 0x0020 /* HP3R_DONE_STS */ +#define ARIZONA_HP3R_DONE_STS_MASK 0x0020 /* HP3R_DONE_STS */ +#define ARIZONA_HP3R_DONE_STS_SHIFT 5 /* HP3R_DONE_STS */ +#define ARIZONA_HP3R_DONE_STS_WIDTH 1 /* HP3R_DONE_STS */ +#define ARIZONA_HP3L_DONE_STS 0x0010 /* HP3L_DONE_STS */ +#define ARIZONA_HP3L_DONE_STS_MASK 0x0010 /* HP3L_DONE_STS */ +#define ARIZONA_HP3L_DONE_STS_SHIFT 4 /* HP3L_DONE_STS */ +#define ARIZONA_HP3L_DONE_STS_WIDTH 1 /* HP3L_DONE_STS */ +#define ARIZONA_HP2R_DONE_STS 0x0008 /* HP2R_DONE_STS */ +#define ARIZONA_HP2R_DONE_STS_MASK 0x0008 /* HP2R_DONE_STS */ +#define ARIZONA_HP2R_DONE_STS_SHIFT 3 /* HP2R_DONE_STS */ +#define ARIZONA_HP2R_DONE_STS_WIDTH 1 /* HP2R_DONE_STS */ +#define ARIZONA_HP2L_DONE_STS 0x0004 /* HP2L_DONE_STS */ +#define ARIZONA_HP2L_DONE_STS_MASK 0x0004 /* HP2L_DONE_STS */ +#define ARIZONA_HP2L_DONE_STS_SHIFT 2 /* HP2L_DONE_STS */ +#define ARIZONA_HP2L_DONE_STS_WIDTH 1 /* HP2L_DONE_STS */ +#define ARIZONA_HP1R_DONE_STS 0x0002 /* HP1R_DONE_STS */ +#define ARIZONA_HP1R_DONE_STS_MASK 0x0002 /* HP1R_DONE_STS */ +#define ARIZONA_HP1R_DONE_STS_SHIFT 1 /* HP1R_DONE_STS */ +#define ARIZONA_HP1R_DONE_STS_WIDTH 1 /* HP1R_DONE_STS */ +#define ARIZONA_HP1L_DONE_STS 0x0001 /* HP1L_DONE_STS */ +#define ARIZONA_HP1L_DONE_STS_MASK 0x0001 /* HP1L_DONE_STS */ +#define ARIZONA_HP1L_DONE_STS_SHIFT 0 /* HP1L_DONE_STS */ +#define ARIZONA_HP1L_DONE_STS_WIDTH 1 /* HP1L_DONE_STS */ /* * R3363 (0xD23) - Interrupt Raw Status 5 -- cgit v1.2.3 From c0fe2c5b3f730e3d56d37f7b731a5b1191a4e8bf Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 15 Jul 2014 11:21:47 +0100 Subject: mfd: arizona: Rename thermal shutdown interrupt Newer versions of the IP introduce short circuit protection which will also shutdown the speaker. Rename the interrupt and associated register bits associated with thermal events to better fit the function and avoid conflict with future interrupt additions. Signed-off-by: Charles Keepax Signed-off-by: Lee Jones --- drivers/mfd/wm5102-tables.c | 8 ++-- drivers/mfd/wm5110-tables.c | 8 ++-- drivers/mfd/wm8997-tables.c | 8 ++-- include/linux/mfd/arizona/core.h | 4 +- include/linux/mfd/arizona/registers.h | 80 +++++++++++++++++------------------ sound/soc/codecs/arizona.c | 10 ++--- 6 files changed, 59 insertions(+), 59 deletions(-) (limited to 'include/linux') diff --git a/drivers/mfd/wm5102-tables.c b/drivers/mfd/wm5102-tables.c index c8a993bd17ae..fb4d4bb0f47d 100644 --- a/drivers/mfd/wm5102-tables.c +++ b/drivers/mfd/wm5102-tables.c @@ -138,11 +138,11 @@ static const struct regmap_irq wm5102_irqs[ARIZONA_NUM_IRQ] = { .reg_offset = 1, .mask = ARIZONA_DSP_IRQ1_EINT1 }, - [ARIZONA_IRQ_SPK_SHUTDOWN_WARN] = { - .reg_offset = 2, .mask = ARIZONA_SPK_SHUTDOWN_WARN_EINT1 + [ARIZONA_IRQ_SPK_OVERHEAT_WARN] = { + .reg_offset = 2, .mask = ARIZONA_SPK_OVERHEAT_WARN_EINT1 }, - [ARIZONA_IRQ_SPK_SHUTDOWN] = { - .reg_offset = 2, .mask = ARIZONA_SPK_SHUTDOWN_EINT1 + [ARIZONA_IRQ_SPK_OVERHEAT] = { + .reg_offset = 2, .mask = ARIZONA_SPK_OVERHEAT_EINT1 }, [ARIZONA_IRQ_HPDET] = { .reg_offset = 2, .mask = ARIZONA_HPDET_EINT1 diff --git a/drivers/mfd/wm5110-tables.c b/drivers/mfd/wm5110-tables.c index 4729007055d8..2822768f2df1 100644 --- a/drivers/mfd/wm5110-tables.c +++ b/drivers/mfd/wm5110-tables.c @@ -340,11 +340,11 @@ static const struct regmap_irq wm5110_irqs[ARIZONA_NUM_IRQ] = { .reg_offset = 1, .mask = ARIZONA_DSP_IRQ1_EINT1 }, - [ARIZONA_IRQ_SPK_SHUTDOWN_WARN] = { - .reg_offset = 2, .mask = ARIZONA_SPK_SHUTDOWN_WARN_EINT1 + [ARIZONA_IRQ_SPK_OVERHEAT_WARN] = { + .reg_offset = 2, .mask = ARIZONA_SPK_OVERHEAT_WARN_EINT1 }, - [ARIZONA_IRQ_SPK_SHUTDOWN] = { - .reg_offset = 2, .mask = ARIZONA_SPK_SHUTDOWN_EINT1 + [ARIZONA_IRQ_SPK_OVERHEAT] = { + .reg_offset = 2, .mask = ARIZONA_SPK_OVERHEAT_EINT1 }, [ARIZONA_IRQ_HPDET] = { .reg_offset = 2, .mask = ARIZONA_HPDET_EINT1 diff --git a/drivers/mfd/wm8997-tables.c b/drivers/mfd/wm8997-tables.c index 04bc6d5ff285..510da3b52324 100644 --- a/drivers/mfd/wm8997-tables.c +++ b/drivers/mfd/wm8997-tables.c @@ -65,11 +65,11 @@ static const struct regmap_irq wm8997_irqs[ARIZONA_NUM_IRQ] = { [ARIZONA_IRQ_GP2] = { .reg_offset = 0, .mask = ARIZONA_GP2_EINT1 }, [ARIZONA_IRQ_GP1] = { .reg_offset = 0, .mask = ARIZONA_GP1_EINT1 }, - [ARIZONA_IRQ_SPK_SHUTDOWN_WARN] = { - .reg_offset = 2, .mask = ARIZONA_SPK_SHUTDOWN_WARN_EINT1 + [ARIZONA_IRQ_SPK_OVERHEAT_WARN] = { + .reg_offset = 2, .mask = ARIZONA_SPK_OVERHEAT_WARN_EINT1 }, - [ARIZONA_IRQ_SPK_SHUTDOWN] = { - .reg_offset = 2, .mask = ARIZONA_SPK_SHUTDOWN_EINT1 + [ARIZONA_IRQ_SPK_OVERHEAT] = { + .reg_offset = 2, .mask = ARIZONA_SPK_OVERHEAT_EINT1 }, [ARIZONA_IRQ_HPDET] = { .reg_offset = 2, .mask = ARIZONA_HPDET_EINT1 diff --git a/include/linux/mfd/arizona/core.h b/include/linux/mfd/arizona/core.h index 6a62fc99f399..819edf5d1edf 100644 --- a/include/linux/mfd/arizona/core.h +++ b/include/linux/mfd/arizona/core.h @@ -46,8 +46,8 @@ enum arizona_type { #define ARIZONA_IRQ_DSP_IRQ6 17 #define ARIZONA_IRQ_DSP_IRQ7 18 #define ARIZONA_IRQ_DSP_IRQ8 19 -#define ARIZONA_IRQ_SPK_SHUTDOWN_WARN 20 -#define ARIZONA_IRQ_SPK_SHUTDOWN 21 +#define ARIZONA_IRQ_SPK_OVERHEAT_WARN 20 +#define ARIZONA_IRQ_SPK_OVERHEAT 21 #define ARIZONA_IRQ_MICDET 22 #define ARIZONA_IRQ_HPDET 23 #define ARIZONA_IRQ_WSEQ_DONE 24 diff --git a/include/linux/mfd/arizona/registers.h b/include/linux/mfd/arizona/registers.h index df93563ea8c5..f7d6f9e91da1 100644 --- a/include/linux/mfd/arizona/registers.h +++ b/include/linux/mfd/arizona/registers.h @@ -4691,14 +4691,14 @@ /* * R3330 (0xD02) - Interrupt Status 3 */ -#define ARIZONA_SPK_SHUTDOWN_WARN_EINT1 0x8000 /* SPK_SHUTDOWN_WARN_EINT1 */ -#define ARIZONA_SPK_SHUTDOWN_WARN_EINT1_MASK 0x8000 /* SPK_SHUTDOWN_WARN_EINT1 */ -#define ARIZONA_SPK_SHUTDOWN_WARN_EINT1_SHIFT 15 /* SPK_SHUTDOWN_WARN_EINT1 */ -#define ARIZONA_SPK_SHUTDOWN_WARN_EINT1_WIDTH 1 /* SPK_SHUTDOWN_WARN_EINT1 */ -#define ARIZONA_SPK_SHUTDOWN_EINT1 0x4000 /* SPK_SHUTDOWN_EINT1 */ -#define ARIZONA_SPK_SHUTDOWN_EINT1_MASK 0x4000 /* SPK_SHUTDOWN_EINT1 */ -#define ARIZONA_SPK_SHUTDOWN_EINT1_SHIFT 14 /* SPK_SHUTDOWN_EINT1 */ -#define ARIZONA_SPK_SHUTDOWN_EINT1_WIDTH 1 /* SPK_SHUTDOWN_EINT1 */ +#define ARIZONA_SPK_OVERHEAT_WARN_EINT1 0x8000 /* SPK_OVERHEAT_WARN_EINT1 */ +#define ARIZONA_SPK_OVERHEAT_WARN_EINT1_MASK 0x8000 /* SPK_OVERHEAD_WARN_EINT1 */ +#define ARIZONA_SPK_OVERHEAT_WARN_EINT1_SHIFT 15 /* SPK_OVERHEAT_WARN_EINT1 */ +#define ARIZONA_SPK_OVERHEAT_WARN_EINT1_WIDTH 1 /* SPK_OVERHEAT_WARN_EINT1 */ +#define ARIZONA_SPK_OVERHEAT_EINT1 0x4000 /* SPK_OVERHEAT_EINT1 */ +#define ARIZONA_SPK_OVERHEAT_EINT1_MASK 0x4000 /* SPK_OVERHEAT_EINT1 */ +#define ARIZONA_SPK_OVERHEAT_EINT1_SHIFT 14 /* SPK_OVERHEAT_EINT1 */ +#define ARIZONA_SPK_OVERHEAT_EINT1_WIDTH 1 /* SPK_OVERHEAT_EINT1 */ #define ARIZONA_HPDET_EINT1 0x2000 /* HPDET_EINT1 */ #define ARIZONA_HPDET_EINT1_MASK 0x2000 /* HPDET_EINT1 */ #define ARIZONA_HPDET_EINT1_SHIFT 13 /* HPDET_EINT1 */ @@ -4883,14 +4883,14 @@ /* * R3338 (0xD0A) - Interrupt Status 3 Mask */ -#define ARIZONA_IM_SPK_SHUTDOWN_WARN_EINT1 0x8000 /* IM_SPK_SHUTDOWN_WARN_EINT1 */ -#define ARIZONA_IM_SPK_SHUTDOWN_WARN_EINT1_MASK 0x8000 /* IM_SPK_SHUTDOWN_WARN_EINT1 */ -#define ARIZONA_IM_SPK_SHUTDOWN_WARN_EINT1_SHIFT 15 /* IM_SPK_SHUTDOWN_WARN_EINT1 */ -#define ARIZONA_IM_SPK_SHUTDOWN_WARN_EINT1_WIDTH 1 /* IM_SPK_SHUTDOWN_WARN_EINT1 */ -#define ARIZONA_IM_SPK_SHUTDOWN_EINT1 0x4000 /* IM_SPK_SHUTDOWN_EINT1 */ -#define ARIZONA_IM_SPK_SHUTDOWN_EINT1_MASK 0x4000 /* IM_SPK_SHUTDOWN_EINT1 */ -#define ARIZONA_IM_SPK_SHUTDOWN_EINT1_SHIFT 14 /* IM_SPK_SHUTDOWN_EINT1 */ -#define ARIZONA_IM_SPK_SHUTDOWN_EINT1_WIDTH 1 /* IM_SPK_SHUTDOWN_EINT1 */ +#define ARIZONA_IM_SPK_OVERHEAT_WARN_EINT1 0x8000 /* IM_SPK_OVERHEAT_WARN_EINT1 */ +#define ARIZONA_IM_SPK_OVERHEAT_WARN_EINT1_MASK 0x8000 /* IM_SPK_OVERHEAT_WARN_EINT1 */ +#define ARIZONA_IM_SPK_OVERHEAT_WARN_EINT1_SHIFT 15 /* IM_SPK_OVERHEAT_WARN_EINT1 */ +#define ARIZONA_IM_SPK_OVERHEAT_WARN_EINT1_WIDTH 1 /* IM_SPK_OVERHEAT_WARN_EINT1 */ +#define ARIZONA_IM_SPK_OVERHEAT_EINT1 0x4000 /* IM_SPK_OVERHEAT_EINT1 */ +#define ARIZONA_IM_SPK_OVERHEAT_EINT1_MASK 0x4000 /* IM_SPK_OVERHEAT_EINT1 */ +#define ARIZONA_IM_SPK_OVERHEAT_EINT1_SHIFT 14 /* IM_SPK_OVERHEAT_EINT1 */ +#define ARIZONA_IM_SPK_OVERHEAT_EINT1_WIDTH 1 /* IM_SPK_OVERHEAT_EINT1 */ #define ARIZONA_IM_HPDET_EINT1 0x2000 /* IM_HPDET_EINT1 */ #define ARIZONA_IM_HPDET_EINT1_MASK 0x2000 /* IM_HPDET_EINT1 */ #define ARIZONA_IM_HPDET_EINT1_SHIFT 13 /* IM_HPDET_EINT1 */ @@ -5083,14 +5083,14 @@ /* * R3346 (0xD12) - IRQ2 Status 3 */ -#define ARIZONA_SPK_SHUTDOWN_WARN_EINT2 0x8000 /* SPK_SHUTDOWN_WARN_EINT2 */ -#define ARIZONA_SPK_SHUTDOWN_WARN_EINT2_MASK 0x8000 /* SPK_SHUTDOWN_WARN_EINT2 */ -#define ARIZONA_SPK_SHUTDOWN_WARN_EINT2_SHIFT 15 /* SPK_SHUTDOWN_WARN_EINT2 */ -#define ARIZONA_SPK_SHUTDOWN_WARN_EINT2_WIDTH 1 /* SPK_SHUTDOWN_WARN_EINT2 */ -#define ARIZONA_SPK_SHUTDOWN_EINT2 0x4000 /* SPK_SHUTDOWN_EINT2 */ -#define ARIZONA_SPK_SHUTDOWN_EINT2_MASK 0x4000 /* SPK_SHUTDOWN_EINT2 */ -#define ARIZONA_SPK_SHUTDOWN_EINT2_SHIFT 14 /* SPK_SHUTDOWN_EINT2 */ -#define ARIZONA_SPK_SHUTDOWN_EINT2_WIDTH 1 /* SPK_SHUTDOWN_EINT2 */ +#define ARIZONA_SPK_OVERHEAT_WARN_EINT2 0x8000 /* SPK_OVERHEAT_WARN_EINT2 */ +#define ARIZONA_SPK_OVERHEAT_WARN_EINT2_MASK 0x8000 /* SPK_OVERHEAT_WARN_EINT2 */ +#define ARIZONA_SPK_OVERHEAT_WARN_EINT2_SHIFT 15 /* SPK_OVERHEAT_WARN_EINT2 */ +#define ARIZONA_SPK_OVERHEAT_WARN_EINT2_WIDTH 1 /* SPK_OVERHEAT_WARN_EINT2 */ +#define ARIZONA_SPK_OVERHEAT_EINT2 0x4000 /* SPK_OVERHEAT_EINT2 */ +#define ARIZONA_SPK_OVERHEAT_EINT2_MASK 0x4000 /* SPK_OVERHEAT_EINT2 */ +#define ARIZONA_SPK_OVERHEAT_EINT2_SHIFT 14 /* SPK_OVERHEAT_EINT2 */ +#define ARIZONA_SPK_OVERHEAT_EINT2_WIDTH 1 /* SPK_OVERHEAT_EINT2 */ #define ARIZONA_HPDET_EINT2 0x2000 /* HPDET_EINT2 */ #define ARIZONA_HPDET_EINT2_MASK 0x2000 /* HPDET_EINT2 */ #define ARIZONA_HPDET_EINT2_SHIFT 13 /* HPDET_EINT2 */ @@ -5275,14 +5275,14 @@ /* * R3354 (0xD1A) - IRQ2 Status 3 Mask */ -#define ARIZONA_IM_SPK_SHUTDOWN_WARN_EINT2 0x8000 /* IM_SPK_SHUTDOWN_WARN_EINT2 */ -#define ARIZONA_IM_SPK_SHUTDOWN_WARN_EINT2_MASK 0x8000 /* IM_SPK_SHUTDOWN_WARN_EINT2 */ -#define ARIZONA_IM_SPK_SHUTDOWN_WARN_EINT2_SHIFT 15 /* IM_SPK_SHUTDOWN_WARN_EINT2 */ -#define ARIZONA_IM_SPK_SHUTDOWN_WARN_EINT2_WIDTH 1 /* IM_SPK_SHUTDOWN_WARN_EINT2 */ -#define ARIZONA_IM_SPK_SHUTDOWN_EINT2 0x4000 /* IM_SPK_SHUTDOWN_EINT2 */ -#define ARIZONA_IM_SPK_SHUTDOWN_EINT2_MASK 0x4000 /* IM_SPK_SHUTDOWN_EINT2 */ -#define ARIZONA_IM_SPK_SHUTDOWN_EINT2_SHIFT 14 /* IM_SPK_SHUTDOWN_EINT2 */ -#define ARIZONA_IM_SPK_SHUTDOWN_EINT2_WIDTH 1 /* IM_SPK_SHUTDOWN_EINT2 */ +#define ARIZONA_IM_SPK_OVERHEAT_WARN_EINT2 0x8000 /* IM_SPK_OVERHEAT_WARN_EINT2 */ +#define ARIZONA_IM_SPK_OVERHEAT_WARN_EINT2_MASK 0x8000 /* IM_SPK_OVERHEAT_WARN_EINT2 */ +#define ARIZONA_IM_SPK_OVERHEAT_WARN_EINT2_SHIFT 15 /* IM_SPK_OVERHEAT_WARN_EINT2 */ +#define ARIZONA_IM_SPK_OVERHEAT_WARN_EINT2_WIDTH 1 /* IM_SPK_OVERHEAT_WARN_EINT2 */ +#define ARIZONA_IM_SPK_OVERHEAT_EINT2 0x4000 /* IM_SPK_OVERHEAT_EINT2 */ +#define ARIZONA_IM_SPK_OVERHEAT_EINT2_MASK 0x4000 /* IM_SPK_OVERHEAT_EINT2 */ +#define ARIZONA_IM_SPK_OVERHEAT_EINT2_SHIFT 14 /* IM_SPK_OVERHEAT_EINT2 */ +#define ARIZONA_IM_SPK_OVERHEAT_EINT2_WIDTH 1 /* IM_SPK_OVERHEAT_EINT2 */ #define ARIZONA_IM_HPDET_EINT2 0x2000 /* IM_HPDET_EINT2 */ #define ARIZONA_IM_HPDET_EINT2_MASK 0x2000 /* IM_HPDET_EINT2 */ #define ARIZONA_IM_HPDET_EINT2_SHIFT 13 /* IM_HPDET_EINT2 */ @@ -5456,14 +5456,14 @@ /* * R3361 (0xD21) - Interrupt Raw Status 3 */ -#define ARIZONA_SPK_SHUTDOWN_WARN_STS 0x8000 /* SPK_SHUTDOWN_WARN_STS */ -#define ARIZONA_SPK_SHUTDOWN_WARN_STS_MASK 0x8000 /* SPK_SHUTDOWN_WARN_STS */ -#define ARIZONA_SPK_SHUTDOWN_WARN_STS_SHIFT 15 /* SPK_SHUTDOWN_WARN_STS */ -#define ARIZONA_SPK_SHUTDOWN_WARN_STS_WIDTH 1 /* SPK_SHUTDOWN_WARN_STS */ -#define ARIZONA_SPK_SHUTDOWN_STS 0x4000 /* SPK_SHUTDOWN_STS */ -#define ARIZONA_SPK_SHUTDOWN_STS_MASK 0x4000 /* SPK_SHUTDOWN_STS */ -#define ARIZONA_SPK_SHUTDOWN_STS_SHIFT 14 /* SPK_SHUTDOWN_STS */ -#define ARIZONA_SPK_SHUTDOWN_STS_WIDTH 1 /* SPK_SHUTDOWN_STS */ +#define ARIZONA_SPK_OVERHEAT_WARN_STS 0x8000 /* SPK_OVERHEAT_WARN_STS */ +#define ARIZONA_SPK_OVERHEAT_WARN_STS_MASK 0x8000 /* SPK_OVERHEAT_WARN_STS */ +#define ARIZONA_SPK_OVERHEAT_WARN_STS_SHIFT 15 /* SPK_OVERHEAT_WARN_STS */ +#define ARIZONA_SPK_OVERHEAT_WARN_STS_WIDTH 1 /* SPK_OVERHEAT_WARN_STS */ +#define ARIZONA_SPK_OVERHEAT_STS 0x4000 /* SPK_OVERHEAT_STS */ +#define ARIZONA_SPK_OVERHEAT_STS_MASK 0x4000 /* SPK_OVERHEAT_STS */ +#define ARIZONA_SPK_OVERHEAT_STS_SHIFT 14 /* SPK_OVERHEAT_STS */ +#define ARIZONA_SPK_OVERHEAT_STS_WIDTH 1 /* SPK_OVERHEAT_STS */ #define ARIZONA_HPDET_STS 0x2000 /* HPDET_STS */ #define ARIZONA_HPDET_STS_MASK 0x2000 /* HPDET_STS */ #define ARIZONA_HPDET_STS_SHIFT 13 /* HPDET_STS */ diff --git a/sound/soc/codecs/arizona.c b/sound/soc/codecs/arizona.c index 29e198f57d4c..1b14105d8da3 100644 --- a/sound/soc/codecs/arizona.c +++ b/sound/soc/codecs/arizona.c @@ -107,7 +107,7 @@ static int arizona_spk_ev(struct snd_soc_dapm_widget *w, break; case SND_SOC_DAPM_POST_PMU: val = snd_soc_read(codec, ARIZONA_INTERRUPT_RAW_STATUS_3); - if (val & ARIZONA_SPK_SHUTDOWN_STS) { + if (val & ARIZONA_SPK_OVERHEAT_STS) { dev_crit(arizona->dev, "Speaker not enabled due to temperature\n"); return -EBUSY; @@ -159,7 +159,7 @@ static irqreturn_t arizona_thermal_warn(int irq, void *data) if (ret != 0) { dev_err(arizona->dev, "Failed to read thermal status: %d\n", ret); - } else if (val & ARIZONA_SPK_SHUTDOWN_WARN_STS) { + } else if (val & ARIZONA_SPK_OVERHEAT_WARN_STS) { dev_crit(arizona->dev, "Thermal warning\n"); } @@ -177,7 +177,7 @@ static irqreturn_t arizona_thermal_shutdown(int irq, void *data) if (ret != 0) { dev_err(arizona->dev, "Failed to read thermal status: %d\n", ret); - } else if (val & ARIZONA_SPK_SHUTDOWN_STS) { + } else if (val & ARIZONA_SPK_OVERHEAT_STS) { dev_crit(arizona->dev, "Thermal shutdown\n"); ret = regmap_update_bits(arizona->regmap, ARIZONA_OUTPUT_ENABLES_1, @@ -223,7 +223,7 @@ int arizona_init_spk(struct snd_soc_codec *codec) break; } - ret = arizona_request_irq(arizona, ARIZONA_IRQ_SPK_SHUTDOWN_WARN, + ret = arizona_request_irq(arizona, ARIZONA_IRQ_SPK_OVERHEAT_WARN, "Thermal warning", arizona_thermal_warn, arizona); if (ret != 0) @@ -231,7 +231,7 @@ int arizona_init_spk(struct snd_soc_codec *codec) "Failed to get thermal warning IRQ: %d\n", ret); - ret = arizona_request_irq(arizona, ARIZONA_IRQ_SPK_SHUTDOWN, + ret = arizona_request_irq(arizona, ARIZONA_IRQ_SPK_OVERHEAT, "Thermal shutdown", arizona_thermal_shutdown, arizona); if (ret != 0) -- cgit v1.2.3 From 3215501fc90e109c7b854423e02eb05bc638b555 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 15 Jul 2014 11:21:48 +0100 Subject: mfd: wm5110: Add new interrupt register definitions Newer versions of the IP have a lot of new interrupts and move several existing interrupts. This patch adds the register definitions and regmap hookup for these interrupts. Signed-off-by: Charles Keepax Signed-off-by: Lee Jones --- drivers/mfd/arizona-irq.c | 10 +- drivers/mfd/arizona.h | 1 + drivers/mfd/wm5110-tables.c | 213 +++++++++++++ include/linux/mfd/arizona/core.h | 21 +- include/linux/mfd/arizona/registers.h | 585 ++++++++++++++++++++++++++++++++++ 5 files changed, 827 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/mfd/arizona-irq.c b/drivers/mfd/arizona-irq.c index 17102f589100..e780bc40165d 100644 --- a/drivers/mfd/arizona-irq.c +++ b/drivers/mfd/arizona-irq.c @@ -203,7 +203,15 @@ int arizona_irq_init(struct arizona *arizona) #ifdef CONFIG_MFD_WM5110 case WM5110: aod = &wm5110_aod; - irq = &wm5110_irq; + + switch (arizona->rev) { + case 0 ... 2: + irq = &wm5110_irq; + break; + default: + irq = &wm5110_revd_irq; + break; + } ctrlif_error = false; break; diff --git a/drivers/mfd/arizona.h b/drivers/mfd/arizona.h index 2951498ab9a1..fbe2843271c5 100644 --- a/drivers/mfd/arizona.h +++ b/drivers/mfd/arizona.h @@ -36,6 +36,7 @@ extern const struct regmap_irq_chip wm5102_irq; extern const struct regmap_irq_chip wm5110_aod; extern const struct regmap_irq_chip wm5110_irq; +extern const struct regmap_irq_chip wm5110_revd_irq; extern const struct regmap_irq_chip wm8997_aod; extern const struct regmap_irq_chip wm8997_irq; diff --git a/drivers/mfd/wm5110-tables.c b/drivers/mfd/wm5110-tables.c index 2822768f2df1..9b98ee559188 100644 --- a/drivers/mfd/wm5110-tables.c +++ b/drivers/mfd/wm5110-tables.c @@ -457,6 +457,209 @@ const struct regmap_irq_chip wm5110_irq = { }; EXPORT_SYMBOL_GPL(wm5110_irq); +static const struct regmap_irq wm5110_revd_irqs[ARIZONA_NUM_IRQ] = { + [ARIZONA_IRQ_GP4] = { .reg_offset = 0, .mask = ARIZONA_GP4_EINT1 }, + [ARIZONA_IRQ_GP3] = { .reg_offset = 0, .mask = ARIZONA_GP3_EINT1 }, + [ARIZONA_IRQ_GP2] = { .reg_offset = 0, .mask = ARIZONA_GP2_EINT1 }, + [ARIZONA_IRQ_GP1] = { .reg_offset = 0, .mask = ARIZONA_GP1_EINT1 }, + + [ARIZONA_IRQ_DSP4_RAM_RDY] = { + .reg_offset = 1, .mask = ARIZONA_DSP4_RAM_RDY_EINT1 + }, + [ARIZONA_IRQ_DSP3_RAM_RDY] = { + .reg_offset = 1, .mask = ARIZONA_DSP3_RAM_RDY_EINT1 + }, + [ARIZONA_IRQ_DSP2_RAM_RDY] = { + .reg_offset = 1, .mask = ARIZONA_DSP2_RAM_RDY_EINT1 + }, + [ARIZONA_IRQ_DSP1_RAM_RDY] = { + .reg_offset = 1, .mask = ARIZONA_DSP1_RAM_RDY_EINT1 + }, + [ARIZONA_IRQ_DSP_IRQ8] = { + .reg_offset = 1, .mask = ARIZONA_DSP_IRQ8_EINT1 + }, + [ARIZONA_IRQ_DSP_IRQ7] = { + .reg_offset = 1, .mask = ARIZONA_DSP_IRQ7_EINT1 + }, + [ARIZONA_IRQ_DSP_IRQ6] = { + .reg_offset = 1, .mask = ARIZONA_DSP_IRQ6_EINT1 + }, + [ARIZONA_IRQ_DSP_IRQ5] = { + .reg_offset = 1, .mask = ARIZONA_DSP_IRQ5_EINT1 + }, + [ARIZONA_IRQ_DSP_IRQ4] = { + .reg_offset = 1, .mask = ARIZONA_DSP_IRQ4_EINT1 + }, + [ARIZONA_IRQ_DSP_IRQ3] = { + .reg_offset = 1, .mask = ARIZONA_DSP_IRQ3_EINT1 + }, + [ARIZONA_IRQ_DSP_IRQ2] = { + .reg_offset = 1, .mask = ARIZONA_DSP_IRQ2_EINT1 + }, + [ARIZONA_IRQ_DSP_IRQ1] = { + .reg_offset = 1, .mask = ARIZONA_DSP_IRQ1_EINT1 + }, + + [ARIZONA_IRQ_SPK_OVERHEAT_WARN] = { + .reg_offset = 2, .mask = ARIZONA_SPK_OVERHEAT_WARN_EINT1 + }, + [ARIZONA_IRQ_SPK_OVERHEAT] = { + .reg_offset = 2, .mask = ARIZONA_SPK_OVERHEAT_EINT1 + }, + [ARIZONA_IRQ_HPDET] = { + .reg_offset = 2, .mask = ARIZONA_HPDET_EINT1 + }, + [ARIZONA_IRQ_MICDET] = { + .reg_offset = 2, .mask = ARIZONA_MICDET_EINT1 + }, + [ARIZONA_IRQ_WSEQ_DONE] = { + .reg_offset = 2, .mask = ARIZONA_WSEQ_DONE_EINT1 + }, + [ARIZONA_IRQ_DRC2_SIG_DET] = { + .reg_offset = 2, .mask = ARIZONA_DRC2_SIG_DET_EINT1 + }, + [ARIZONA_IRQ_DRC1_SIG_DET] = { + .reg_offset = 2, .mask = ARIZONA_DRC1_SIG_DET_EINT1 + }, + [ARIZONA_IRQ_ASRC2_LOCK] = { + .reg_offset = 2, .mask = ARIZONA_ASRC2_LOCK_EINT1 + }, + [ARIZONA_IRQ_ASRC1_LOCK] = { + .reg_offset = 2, .mask = ARIZONA_ASRC1_LOCK_EINT1 + }, + [ARIZONA_IRQ_UNDERCLOCKED] = { + .reg_offset = 2, .mask = ARIZONA_UNDERCLOCKED_EINT1 + }, + [ARIZONA_IRQ_OVERCLOCKED] = { + .reg_offset = 2, .mask = ARIZONA_OVERCLOCKED_EINT1 + }, + [ARIZONA_IRQ_FLL2_LOCK] = { + .reg_offset = 2, .mask = ARIZONA_FLL2_LOCK_EINT1 + }, + [ARIZONA_IRQ_FLL1_LOCK] = { + .reg_offset = 2, .mask = ARIZONA_FLL1_LOCK_EINT1 + }, + [ARIZONA_IRQ_CLKGEN_ERR] = { + .reg_offset = 2, .mask = ARIZONA_CLKGEN_ERR_EINT1 + }, + [ARIZONA_IRQ_CLKGEN_ERR_ASYNC] = { + .reg_offset = 2, .mask = ARIZONA_CLKGEN_ERR_ASYNC_EINT1 + }, + + [ARIZONA_IRQ_CTRLIF_ERR] = { + .reg_offset = 3, .mask = ARIZONA_V2_CTRLIF_ERR_EINT1 + }, + [ARIZONA_IRQ_MIXER_DROPPED_SAMPLES] = { + .reg_offset = 3, .mask = ARIZONA_V2_MIXER_DROPPED_SAMPLE_EINT1 + }, + [ARIZONA_IRQ_ASYNC_CLK_ENA_LOW] = { + .reg_offset = 3, .mask = ARIZONA_V2_ASYNC_CLK_ENA_LOW_EINT1 + }, + [ARIZONA_IRQ_SYSCLK_ENA_LOW] = { + .reg_offset = 3, .mask = ARIZONA_V2_SYSCLK_ENA_LOW_EINT1 + }, + [ARIZONA_IRQ_ISRC1_CFG_ERR] = { + .reg_offset = 3, .mask = ARIZONA_V2_ISRC1_CFG_ERR_EINT1 + }, + [ARIZONA_IRQ_ISRC2_CFG_ERR] = { + .reg_offset = 3, .mask = ARIZONA_V2_ISRC2_CFG_ERR_EINT1 + }, + [ARIZONA_IRQ_ISRC3_CFG_ERR] = { + .reg_offset = 3, .mask = ARIZONA_V2_ISRC3_CFG_ERR_EINT1 + }, + [ARIZONA_IRQ_HP3R_DONE] = { + .reg_offset = 3, .mask = ARIZONA_HP3R_DONE_EINT1 + }, + [ARIZONA_IRQ_HP3L_DONE] = { + .reg_offset = 3, .mask = ARIZONA_HP3L_DONE_EINT1 + }, + [ARIZONA_IRQ_HP2R_DONE] = { + .reg_offset = 3, .mask = ARIZONA_HP2R_DONE_EINT1 + }, + [ARIZONA_IRQ_HP2L_DONE] = { + .reg_offset = 3, .mask = ARIZONA_HP2L_DONE_EINT1 + }, + [ARIZONA_IRQ_HP1R_DONE] = { + .reg_offset = 3, .mask = ARIZONA_HP1R_DONE_EINT1 + }, + [ARIZONA_IRQ_HP1L_DONE] = { + .reg_offset = 3, .mask = ARIZONA_HP1L_DONE_EINT1 + }, + + [ARIZONA_IRQ_BOOT_DONE] = { + .reg_offset = 4, .mask = ARIZONA_BOOT_DONE_EINT1 + }, + [ARIZONA_IRQ_ASRC_CFG_ERR] = { + .reg_offset = 4, .mask = ARIZONA_V2_ASRC_CFG_ERR_EINT1 + }, + [ARIZONA_IRQ_FLL2_CLOCK_OK] = { + .reg_offset = 4, .mask = ARIZONA_FLL2_CLOCK_OK_EINT1 + }, + [ARIZONA_IRQ_FLL1_CLOCK_OK] = { + .reg_offset = 4, .mask = ARIZONA_FLL1_CLOCK_OK_EINT1 + }, + + [ARIZONA_IRQ_DSP_SHARED_WR_COLL] = { + .reg_offset = 5, .mask = ARIZONA_DSP_SHARED_WR_COLL_EINT1 + }, + [ARIZONA_IRQ_SPK_SHUTDOWN] = { + .reg_offset = 5, .mask = ARIZONA_SPK_SHUTDOWN_EINT1 + }, + [ARIZONA_IRQ_SPK1R_SHORT] = { + .reg_offset = 5, .mask = ARIZONA_SPK1R_SHORT_EINT1 + }, + [ARIZONA_IRQ_SPK1L_SHORT] = { + .reg_offset = 5, .mask = ARIZONA_SPK1L_SHORT_EINT1 + }, + [ARIZONA_IRQ_HP3R_SC_NEG] = { + .reg_offset = 5, .mask = ARIZONA_HP3R_SC_NEG_EINT1 + }, + [ARIZONA_IRQ_HP3R_SC_POS] = { + .reg_offset = 5, .mask = ARIZONA_HP3R_SC_POS_EINT1 + }, + [ARIZONA_IRQ_HP3L_SC_NEG] = { + .reg_offset = 5, .mask = ARIZONA_HP3L_SC_NEG_EINT1 + }, + [ARIZONA_IRQ_HP3L_SC_POS] = { + .reg_offset = 5, .mask = ARIZONA_HP3L_SC_POS_EINT1 + }, + [ARIZONA_IRQ_HP2R_SC_NEG] = { + .reg_offset = 5, .mask = ARIZONA_HP2R_SC_NEG_EINT1 + }, + [ARIZONA_IRQ_HP2R_SC_POS] = { + .reg_offset = 5, .mask = ARIZONA_HP2R_SC_POS_EINT1 + }, + [ARIZONA_IRQ_HP2L_SC_NEG] = { + .reg_offset = 5, .mask = ARIZONA_HP2L_SC_NEG_EINT1 + }, + [ARIZONA_IRQ_HP2L_SC_POS] = { + .reg_offset = 5, .mask = ARIZONA_HP2L_SC_POS_EINT1 + }, + [ARIZONA_IRQ_HP1R_SC_NEG] = { + .reg_offset = 5, .mask = ARIZONA_HP1R_SC_NEG_EINT1 + }, + [ARIZONA_IRQ_HP1R_SC_POS] = { + .reg_offset = 5, .mask = ARIZONA_HP1R_SC_POS_EINT1 + }, + [ARIZONA_IRQ_HP1L_SC_NEG] = { + .reg_offset = 5, .mask = ARIZONA_HP1L_SC_NEG_EINT1 + }, + [ARIZONA_IRQ_HP1L_SC_POS] = { + .reg_offset = 5, .mask = ARIZONA_HP1L_SC_POS_EINT1 + }, +}; + +const struct regmap_irq_chip wm5110_revd_irq = { + .name = "wm5110 IRQ", + .status_base = ARIZONA_INTERRUPT_STATUS_1, + .mask_base = ARIZONA_INTERRUPT_STATUS_1_MASK, + .ack_base = ARIZONA_INTERRUPT_STATUS_1, + .num_regs = 6, + .irqs = wm5110_revd_irqs, + .num_irqs = ARRAY_SIZE(wm5110_revd_irqs), +}; +EXPORT_SYMBOL_GPL(wm5110_revd_irq); + static const struct reg_default wm5110_reg_default[] = { { 0x00000008, 0x0019 }, /* R8 - Ctrl IF SPI CFG 1 */ { 0x00000009, 0x0001 }, /* R9 - Ctrl IF I2C1 CFG 1 */ @@ -1286,12 +1489,14 @@ static const struct reg_default wm5110_reg_default[] = { { 0x00000D0A, 0xFFFF }, /* R3338 - Interrupt Status 3 Mask */ { 0x00000D0B, 0xFFFF }, /* R3339 - Interrupt Status 4 Mask */ { 0x00000D0C, 0xFEFF }, /* R3340 - Interrupt Status 5 Mask */ + { 0x00000D0D, 0xFFFF }, /* R3341 - Interrupt Status 6 Mask */ { 0x00000D0F, 0x0000 }, /* R3343 - Interrupt Control */ { 0x00000D18, 0xFFFF }, /* R3352 - IRQ2 Status 1 Mask */ { 0x00000D19, 0xFFFF }, /* R3353 - IRQ2 Status 2 Mask */ { 0x00000D1A, 0xFFFF }, /* R3354 - IRQ2 Status 3 Mask */ { 0x00000D1B, 0xFFFF }, /* R3355 - IRQ2 Status 4 Mask */ { 0x00000D1C, 0xFFFF }, /* R3356 - IRQ2 Status 5 Mask */ + { 0x00000D1D, 0xFFFF }, /* R3357 - IRQ2 Status 6 Mask */ { 0x00000D1F, 0x0000 }, /* R3359 - IRQ2 Control */ { 0x00000D53, 0xFFFF }, /* R3411 - AOD IRQ Mask IRQ1 */ { 0x00000D54, 0xFFFF }, /* R3412 - AOD IRQ Mask IRQ2 */ @@ -2323,22 +2528,26 @@ static bool wm5110_readable_register(struct device *dev, unsigned int reg) case ARIZONA_INTERRUPT_STATUS_3: case ARIZONA_INTERRUPT_STATUS_4: case ARIZONA_INTERRUPT_STATUS_5: + case ARIZONA_INTERRUPT_STATUS_6: case ARIZONA_INTERRUPT_STATUS_1_MASK: case ARIZONA_INTERRUPT_STATUS_2_MASK: case ARIZONA_INTERRUPT_STATUS_3_MASK: case ARIZONA_INTERRUPT_STATUS_4_MASK: case ARIZONA_INTERRUPT_STATUS_5_MASK: + case ARIZONA_INTERRUPT_STATUS_6_MASK: case ARIZONA_INTERRUPT_CONTROL: case ARIZONA_IRQ2_STATUS_1: case ARIZONA_IRQ2_STATUS_2: case ARIZONA_IRQ2_STATUS_3: case ARIZONA_IRQ2_STATUS_4: case ARIZONA_IRQ2_STATUS_5: + case ARIZONA_IRQ2_STATUS_6: case ARIZONA_IRQ2_STATUS_1_MASK: case ARIZONA_IRQ2_STATUS_2_MASK: case ARIZONA_IRQ2_STATUS_3_MASK: case ARIZONA_IRQ2_STATUS_4_MASK: case ARIZONA_IRQ2_STATUS_5_MASK: + case ARIZONA_IRQ2_STATUS_6_MASK: case ARIZONA_IRQ2_CONTROL: case ARIZONA_INTERRUPT_RAW_STATUS_2: case ARIZONA_INTERRUPT_RAW_STATUS_3: @@ -2347,6 +2556,7 @@ static bool wm5110_readable_register(struct device *dev, unsigned int reg) case ARIZONA_INTERRUPT_RAW_STATUS_6: case ARIZONA_INTERRUPT_RAW_STATUS_7: case ARIZONA_INTERRUPT_RAW_STATUS_8: + case ARIZONA_INTERRUPT_RAW_STATUS_9: case ARIZONA_IRQ_PIN_STATUS: case ARIZONA_AOD_WKUP_AND_TRIG: case ARIZONA_AOD_IRQ1: @@ -2622,11 +2832,13 @@ static bool wm5110_volatile_register(struct device *dev, unsigned int reg) case ARIZONA_INTERRUPT_STATUS_3: case ARIZONA_INTERRUPT_STATUS_4: case ARIZONA_INTERRUPT_STATUS_5: + case ARIZONA_INTERRUPT_STATUS_6: case ARIZONA_IRQ2_STATUS_1: case ARIZONA_IRQ2_STATUS_2: case ARIZONA_IRQ2_STATUS_3: case ARIZONA_IRQ2_STATUS_4: case ARIZONA_IRQ2_STATUS_5: + case ARIZONA_IRQ2_STATUS_6: case ARIZONA_INTERRUPT_RAW_STATUS_2: case ARIZONA_INTERRUPT_RAW_STATUS_3: case ARIZONA_INTERRUPT_RAW_STATUS_4: @@ -2634,6 +2846,7 @@ static bool wm5110_volatile_register(struct device *dev, unsigned int reg) case ARIZONA_INTERRUPT_RAW_STATUS_6: case ARIZONA_INTERRUPT_RAW_STATUS_7: case ARIZONA_INTERRUPT_RAW_STATUS_8: + case ARIZONA_INTERRUPT_RAW_STATUS_9: case ARIZONA_IRQ_PIN_STATUS: case ARIZONA_AOD_WKUP_AND_TRIG: case ARIZONA_AOD_IRQ1: diff --git a/include/linux/mfd/arizona/core.h b/include/linux/mfd/arizona/core.h index 819edf5d1edf..8bc7601cca68 100644 --- a/include/linux/mfd/arizona/core.h +++ b/include/linux/mfd/arizona/core.h @@ -84,8 +84,25 @@ enum arizona_type { #define ARIZONA_IRQ_HP2L_DONE 55 #define ARIZONA_IRQ_HP1R_DONE 56 #define ARIZONA_IRQ_HP1L_DONE 57 - -#define ARIZONA_NUM_IRQ 58 +#define ARIZONA_IRQ_ISRC3_CFG_ERR 58 +#define ARIZONA_IRQ_DSP_SHARED_WR_COLL 59 +#define ARIZONA_IRQ_SPK_SHUTDOWN 60 +#define ARIZONA_IRQ_SPK1R_SHORT 61 +#define ARIZONA_IRQ_SPK1L_SHORT 62 +#define ARIZONA_IRQ_HP3R_SC_NEG 63 +#define ARIZONA_IRQ_HP3R_SC_POS 64 +#define ARIZONA_IRQ_HP3L_SC_NEG 65 +#define ARIZONA_IRQ_HP3L_SC_POS 66 +#define ARIZONA_IRQ_HP2R_SC_NEG 67 +#define ARIZONA_IRQ_HP2R_SC_POS 68 +#define ARIZONA_IRQ_HP2L_SC_NEG 69 +#define ARIZONA_IRQ_HP2L_SC_POS 70 +#define ARIZONA_IRQ_HP1R_SC_NEG 71 +#define ARIZONA_IRQ_HP1R_SC_POS 72 +#define ARIZONA_IRQ_HP1L_SC_NEG 73 +#define ARIZONA_IRQ_HP1L_SC_POS 74 + +#define ARIZONA_NUM_IRQ 75 struct snd_soc_dapm_context; diff --git a/include/linux/mfd/arizona/registers.h b/include/linux/mfd/arizona/registers.h index f7d6f9e91da1..dbd23c36de21 100644 --- a/include/linux/mfd/arizona/registers.h +++ b/include/linux/mfd/arizona/registers.h @@ -878,22 +878,26 @@ #define ARIZONA_INTERRUPT_STATUS_3 0xD02 #define ARIZONA_INTERRUPT_STATUS_4 0xD03 #define ARIZONA_INTERRUPT_STATUS_5 0xD04 +#define ARIZONA_INTERRUPT_STATUS_6 0xD05 #define ARIZONA_INTERRUPT_STATUS_1_MASK 0xD08 #define ARIZONA_INTERRUPT_STATUS_2_MASK 0xD09 #define ARIZONA_INTERRUPT_STATUS_3_MASK 0xD0A #define ARIZONA_INTERRUPT_STATUS_4_MASK 0xD0B #define ARIZONA_INTERRUPT_STATUS_5_MASK 0xD0C +#define ARIZONA_INTERRUPT_STATUS_6_MASK 0xD0D #define ARIZONA_INTERRUPT_CONTROL 0xD0F #define ARIZONA_IRQ2_STATUS_1 0xD10 #define ARIZONA_IRQ2_STATUS_2 0xD11 #define ARIZONA_IRQ2_STATUS_3 0xD12 #define ARIZONA_IRQ2_STATUS_4 0xD13 #define ARIZONA_IRQ2_STATUS_5 0xD14 +#define ARIZONA_IRQ2_STATUS_6 0xD15 #define ARIZONA_IRQ2_STATUS_1_MASK 0xD18 #define ARIZONA_IRQ2_STATUS_2_MASK 0xD19 #define ARIZONA_IRQ2_STATUS_3_MASK 0xD1A #define ARIZONA_IRQ2_STATUS_4_MASK 0xD1B #define ARIZONA_IRQ2_STATUS_5_MASK 0xD1C +#define ARIZONA_IRQ2_STATUS_6_MASK 0xD1D #define ARIZONA_IRQ2_CONTROL 0xD1F #define ARIZONA_INTERRUPT_RAW_STATUS_2 0xD20 #define ARIZONA_INTERRUPT_RAW_STATUS_3 0xD21 @@ -902,6 +906,7 @@ #define ARIZONA_INTERRUPT_RAW_STATUS_6 0xD24 #define ARIZONA_INTERRUPT_RAW_STATUS_7 0xD25 #define ARIZONA_INTERRUPT_RAW_STATUS_8 0xD26 +#define ARIZONA_INTERRUPT_RAW_STATUS_9 0xD28 #define ARIZONA_IRQ_PIN_STATUS 0xD40 #define ARIZONA_ADSP2_IRQ0 0xD41 #define ARIZONA_AOD_WKUP_AND_TRIG 0xD50 @@ -4820,6 +4825,53 @@ #define ARIZONA_HP1L_DONE_EINT1_SHIFT 0 /* HP1L_DONE_EINT1 */ #define ARIZONA_HP1L_DONE_EINT1_WIDTH 1 /* HP1L_DONE_EINT1 */ +/* + * R3331 (0xD03) - Interrupt Status 4 (Alternate layout) + * + * Alternate layout used on later devices, note only fields that have moved + * are specified + */ +#define ARIZONA_V2_AIF3_ERR_EINT1 0x8000 /* AIF3_ERR_EINT1 */ +#define ARIZONA_V2_AIF3_ERR_EINT1_MASK 0x8000 /* AIF3_ERR_EINT1 */ +#define ARIZONA_V2_AIF3_ERR_EINT1_SHIFT 15 /* AIF3_ERR_EINT1 */ +#define ARIZONA_V2_AIF3_ERR_EINT1_WIDTH 1 /* AIF3_ERR_EINT1 */ +#define ARIZONA_V2_AIF2_ERR_EINT1 0x4000 /* AIF2_ERR_EINT1 */ +#define ARIZONA_V2_AIF2_ERR_EINT1_MASK 0x4000 /* AIF2_ERR_EINT1 */ +#define ARIZONA_V2_AIF2_ERR_EINT1_SHIFT 14 /* AIF2_ERR_EINT1 */ +#define ARIZONA_V2_AIF2_ERR_EINT1_WIDTH 1 /* AIF2_ERR_EINT1 */ +#define ARIZONA_V2_AIF1_ERR_EINT1 0x2000 /* AIF1_ERR_EINT1 */ +#define ARIZONA_V2_AIF1_ERR_EINT1_MASK 0x2000 /* AIF1_ERR_EINT1 */ +#define ARIZONA_V2_AIF1_ERR_EINT1_SHIFT 13 /* AIF1_ERR_EINT1 */ +#define ARIZONA_V2_AIF1_ERR_EINT1_WIDTH 1 /* AIF1_ERR_EINT1 */ +#define ARIZONA_V2_CTRLIF_ERR_EINT1 0x1000 /* CTRLIF_ERR_EINT1 */ +#define ARIZONA_V2_CTRLIF_ERR_EINT1_MASK 0x1000 /* CTRLIF_ERR_EINT1 */ +#define ARIZONA_V2_CTRLIF_ERR_EINT1_SHIFT 12 /* CTRLIF_ERR_EINT1 */ +#define ARIZONA_V2_CTRLIF_ERR_EINT1_WIDTH 1 /* CTRLIF_ERR_EINT1 */ +#define ARIZONA_V2_MIXER_DROPPED_SAMPLE_EINT1 0x0800 /* MIXER_DROPPED_SAMPLE_EINT1 */ +#define ARIZONA_V2_MIXER_DROPPED_SAMPLE_EINT1_MASK 0x0800 /* MIXER_DROPPED_SAMPLE_EINT1 */ +#define ARIZONA_V2_MIXER_DROPPED_SAMPLE_EINT1_SHIFT 11 /* MIXER_DROPPED_SAMPLE_EINT1 */ +#define ARIZONA_V2_MIXER_DROPPED_SAMPLE_EINT1_WIDTH 1 /* MIXER_DROPPED_SAMPLE_EINT1 */ +#define ARIZONA_V2_ASYNC_CLK_ENA_LOW_EINT1 0x0400 /* ASYNC_CLK_ENA_LOW_EINT1 */ +#define ARIZONA_V2_ASYNC_CLK_ENA_LOW_EINT1_MASK 0x0400 /* ASYNC_CLK_ENA_LOW_EINT1 */ +#define ARIZONA_V2_ASYNC_CLK_ENA_LOW_EINT1_SHIFT 10 /* ASYNC_CLK_ENA_LOW_EINT1 */ +#define ARIZONA_V2_ASYNC_CLK_ENA_LOW_EINT1_WIDTH 1 /* ASYNC_CLK_ENA_LOW_EINT1 */ +#define ARIZONA_V2_SYSCLK_ENA_LOW_EINT1 0x0200 /* SYSCLK_ENA_LOW_EINT1 */ +#define ARIZONA_V2_SYSCLK_ENA_LOW_EINT1_MASK 0x0200 /* SYSCLK_ENA_LOW_EINT1 */ +#define ARIZONA_V2_SYSCLK_ENA_LOW_EINT1_SHIFT 9 /* SYSCLK_ENA_LOW_EINT1 */ +#define ARIZONA_V2_SYSCLK_ENA_LOW_EINT1_WIDTH 1 /* SYSCLK_ENA_LOW_EINT1 */ +#define ARIZONA_V2_ISRC1_CFG_ERR_EINT1 0x0100 /* ISRC1_CFG_ERR_EINT1 */ +#define ARIZONA_V2_ISRC1_CFG_ERR_EINT1_MASK 0x0100 /* ISRC1_CFG_ERR_EINT1 */ +#define ARIZONA_V2_ISRC1_CFG_ERR_EINT1_SHIFT 8 /* ISRC1_CFG_ERR_EINT1 */ +#define ARIZONA_V2_ISRC1_CFG_ERR_EINT1_WIDTH 1 /* ISRC1_CFG_ERR_EINT1 */ +#define ARIZONA_V2_ISRC2_CFG_ERR_EINT1 0x0080 /* ISRC2_CFG_ERR_EINT1 */ +#define ARIZONA_V2_ISRC2_CFG_ERR_EINT1_MASK 0x0080 /* ISRC2_CFG_ERR_EINT1 */ +#define ARIZONA_V2_ISRC2_CFG_ERR_EINT1_SHIFT 7 /* ISRC2_CFG_ERR_EINT1 */ +#define ARIZONA_V2_ISRC2_CFG_ERR_EINT1_WIDTH 1 /* ISRC2_CFG_ERR_EINT1 */ +#define ARIZONA_V2_ISRC3_CFG_ERR_EINT1 0x0040 /* ISRC3_CFG_ERR_EINT1 */ +#define ARIZONA_V2_ISRC3_CFG_ERR_EINT1_MASK 0x0040 /* ISRC3_CFG_ERR_EINT1 */ +#define ARIZONA_V2_ISRC3_CFG_ERR_EINT1_SHIFT 6 /* ISRC3_CFG_ERR_EINT1 */ +#define ARIZONA_V2_ISRC3_CFG_ERR_EINT1_WIDTH 1 /* ISRC3_CFG_ERR_EINT1 */ + /* * R3332 (0xD04) - Interrupt Status 5 */ @@ -4844,6 +4896,85 @@ #define ARIZONA_FLL1_CLOCK_OK_EINT1_SHIFT 0 /* FLL1_CLOCK_OK_EINT1 */ #define ARIZONA_FLL1_CLOCK_OK_EINT1_WIDTH 1 /* FLL1_CLOCK_OK_EINT1 */ +/* + * R3332 (0xD05) - Interrupt Status 5 (Alternate layout) + * + * Alternate layout used on later devices, note only fields that have moved + * are specified + */ +#define ARIZONA_V2_ASRC_CFG_ERR_EINT1 0x0008 /* ASRC_CFG_ERR_EINT1 */ +#define ARIZONA_V2_ASRC_CFG_ERR_EINT1_MASK 0x0008 /* ASRC_CFG_ERR_EINT1 */ +#define ARIZONA_V2_ASRC_CFG_ERR_EINT1_SHIFT 3 /* ASRC_CFG_ERR_EINT1 */ +#define ARIZONA_V2_ASRC_CFG_ERR_EINT1_WIDTH 1 /* ASRC_CFG_ERR_EINT1 */ + +/* + * R3333 (0xD05) - Interrupt Status 6 + */ +#define ARIZONA_DSP_SHARED_WR_COLL_EINT1 0x8000 /* DSP_SHARED_WR_COLL_EINT1 */ +#define ARIZONA_DSP_SHARED_WR_COLL_EINT1_MASK 0x8000 /* DSP_SHARED_WR_COLL_EINT1 */ +#define ARIZONA_DSP_SHARED_WR_COLL_EINT1_SHIFT 15 /* DSP_SHARED_WR_COLL_EINT1 */ +#define ARIZONA_DSP_SHARED_WR_COLL_EINT1_WIDTH 1 /* DSP_SHARED_WR_COLL_EINT1 */ +#define ARIZONA_SPK_SHUTDOWN_EINT1 0x4000 /* SPK_SHUTDOWN_EINT1 */ +#define ARIZONA_SPK_SHUTDOWN_EINT1_MASK 0x4000 /* SPK_SHUTDOWN_EINT1 */ +#define ARIZONA_SPK_SHUTDOWN_EINT1_SHIFT 14 /* SPK_SHUTDOWN_EINT1 */ +#define ARIZONA_SPK_SHUTDOWN_EINT1_WIDTH 1 /* SPK_SHUTDOWN_EINT1 */ +#define ARIZONA_SPK1R_SHORT_EINT1 0x2000 /* SPK1R_SHORT_EINT1 */ +#define ARIZONA_SPK1R_SHORT_EINT1_MASK 0x2000 /* SPK1R_SHORT_EINT1 */ +#define ARIZONA_SPK1R_SHORT_EINT1_SHIFT 13 /* SPK1R_SHORT_EINT1 */ +#define ARIZONA_SPK1R_SHORT_EINT1_WIDTH 1 /* SPK1R_SHORT_EINT1 */ +#define ARIZONA_SPK1L_SHORT_EINT1 0x1000 /* SPK1L_SHORT_EINT1 */ +#define ARIZONA_SPK1L_SHORT_EINT1_MASK 0x1000 /* SPK1L_SHORT_EINT1 */ +#define ARIZONA_SPK1L_SHORT_EINT1_SHIFT 12 /* SPK1L_SHORT_EINT1 */ +#define ARIZONA_SPK1L_SHORT_EINT1_WIDTH 1 /* SPK1L_SHORT_EINT1 */ +#define ARIZONA_HP3R_SC_NEG_EINT1 0x0800 /* HP3R_SC_NEG_EINT1 */ +#define ARIZONA_HP3R_SC_NEG_EINT1_MASK 0x0800 /* HP3R_SC_NEG_EINT1 */ +#define ARIZONA_HP3R_SC_NEG_EINT1_SHIFT 11 /* HP3R_SC_NEG_EINT1 */ +#define ARIZONA_HP3R_SC_NEG_EINT1_WIDTH 1 /* HP3R_SC_NEG_EINT1 */ +#define ARIZONA_HP3R_SC_POS_EINT1 0x0400 /* HP3R_SC_POS_EINT1 */ +#define ARIZONA_HP3R_SC_POS_EINT1_MASK 0x0400 /* HP3R_SC_POS_EINT1 */ +#define ARIZONA_HP3R_SC_POS_EINT1_SHIFT 10 /* HP3R_SC_POS_EINT1 */ +#define ARIZONA_HP3R_SC_POS_EINT1_WIDTH 1 /* HP3R_SC_POS_EINT1 */ +#define ARIZONA_HP3L_SC_NEG_EINT1 0x0200 /* HP3L_SC_NEG_EINT1 */ +#define ARIZONA_HP3L_SC_NEG_EINT1_MASK 0x0200 /* HP3L_SC_NEG_EINT1 */ +#define ARIZONA_HP3L_SC_NEG_EINT1_SHIFT 9 /* HP3L_SC_NEG_EINT1 */ +#define ARIZONA_HP3L_SC_NEG_EINT1_WIDTH 1 /* HP3L_SC_NEG_EINT1 */ +#define ARIZONA_HP3L_SC_POS_EINT1 0x0100 /* HP3L_SC_POS_EINT1 */ +#define ARIZONA_HP3L_SC_POS_EINT1_MASK 0x0100 /* HP3L_SC_POS_EINT1 */ +#define ARIZONA_HP3L_SC_POS_EINT1_SHIFT 8 /* HP3L_SC_POS_EINT1 */ +#define ARIZONA_HP3L_SC_POS_EINT1_WIDTH 1 /* HP3L_SC_POS_EINT1 */ +#define ARIZONA_HP2R_SC_NEG_EINT1 0x0080 /* HP2R_SC_NEG_EINT1 */ +#define ARIZONA_HP2R_SC_NEG_EINT1_MASK 0x0080 /* HP2R_SC_NEG_EINT1 */ +#define ARIZONA_HP2R_SC_NEG_EINT1_SHIFT 7 /* HP2R_SC_NEG_EINT1 */ +#define ARIZONA_HP2R_SC_NEG_EINT1_WIDTH 1 /* HP2R_SC_NEG_EINT1 */ +#define ARIZONA_HP2R_SC_POS_EINT1 0x0040 /* HP2R_SC_POS_EINT1 */ +#define ARIZONA_HP2R_SC_POS_EINT1_MASK 0x0040 /* HP2R_SC_POS_EINT1 */ +#define ARIZONA_HP2R_SC_POS_EINT1_SHIFT 6 /* HP2R_SC_POS_EINT1 */ +#define ARIZONA_HP2R_SC_POS_EINT1_WIDTH 1 /* HP2R_SC_POS_EINT1 */ +#define ARIZONA_HP2L_SC_NEG_EINT1 0x0020 /* HP2L_SC_NEG_EINT1 */ +#define ARIZONA_HP2L_SC_NEG_EINT1_MASK 0x0020 /* HP2L_SC_NEG_EINT1 */ +#define ARIZONA_HP2L_SC_NEG_EINT1_SHIFT 5 /* HP2L_SC_NEG_EINT1 */ +#define ARIZONA_HP2L_SC_NEG_EINT1_WIDTH 1 /* HP2L_SC_NEG_EINT1 */ +#define ARIZONA_HP2L_SC_POS_EINT1 0x0010 /* HP2L_SC_POS_EINT1 */ +#define ARIZONA_HP2L_SC_POS_EINT1_MASK 0x0010 /* HP2L_SC_POS_EINT1 */ +#define ARIZONA_HP2L_SC_POS_EINT1_SHIFT 4 /* HP2L_SC_POS_EINT1 */ +#define ARIZONA_HP2L_SC_POS_EINT1_WIDTH 1 /* HP2L_SC_POS_EINT1 */ +#define ARIZONA_HP1R_SC_NEG_EINT1 0x0008 /* HP1R_SC_NEG_EINT1 */ +#define ARIZONA_HP1R_SC_NEG_EINT1_MASK 0x0008 /* HP1R_SC_NEG_EINT1 */ +#define ARIZONA_HP1R_SC_NEG_EINT1_SHIFT 3 /* HP1R_SC_NEG_EINT1 */ +#define ARIZONA_HP1R_SC_NEG_EINT1_WIDTH 1 /* HP1R_SC_NEG_EINT1 */ +#define ARIZONA_HP1R_SC_POS_EINT1 0x0004 /* HP1R_SC_POS_EINT1 */ +#define ARIZONA_HP1R_SC_POS_EINT1_MASK 0x0004 /* HP1R_SC_POS_EINT1 */ +#define ARIZONA_HP1R_SC_POS_EINT1_SHIFT 2 /* HP1R_SC_POS_EINT1 */ +#define ARIZONA_HP1R_SC_POS_EINT1_WIDTH 1 /* HP1R_SC_POS_EINT1 */ +#define ARIZONA_HP1L_SC_NEG_EINT1 0x0002 /* HP1L_SC_NEG_EINT1 */ +#define ARIZONA_HP1L_SC_NEG_EINT1_MASK 0x0002 /* HP1L_SC_NEG_EINT1 */ +#define ARIZONA_HP1L_SC_NEG_EINT1_SHIFT 1 /* HP1L_SC_NEG_EINT1 */ +#define ARIZONA_HP1L_SC_NEG_EINT1_WIDTH 1 /* HP1L_SC_NEG_EINT1 */ +#define ARIZONA_HP1L_SC_POS_EINT1 0x0001 /* HP1L_SC_POS_EINT1 */ +#define ARIZONA_HP1L_SC_POS_EINT1_MASK 0x0001 /* HP1L_SC_POS_EINT1 */ +#define ARIZONA_HP1L_SC_POS_EINT1_SHIFT 0 /* HP1L_SC_POS_EINT1 */ +#define ARIZONA_HP1L_SC_POS_EINT1_WIDTH 1 /* HP1L_SC_POS_EINT1 */ + /* * R3336 (0xD08) - Interrupt Status 1 Mask */ @@ -5012,6 +5143,53 @@ #define ARIZONA_IM_HP1L_DONE_EINT1_SHIFT 0 /* IM_HP1L_DONE_EINT1 */ #define ARIZONA_IM_HP1L_DONE_EINT1_WIDTH 1 /* IM_HP1L_DONE_EINT1 */ +/* + * R3339 (0xD0B) - Interrupt Status 4 Mask (Alternate layout) + * + * Alternate layout used on later devices, note only fields that have moved + * are specified + */ +#define ARIZONA_V2_IM_AIF3_ERR_EINT1 0x8000 /* IM_AIF3_ERR_EINT1 */ +#define ARIZONA_V2_IM_AIF3_ERR_EINT1_MASK 0x8000 /* IM_AIF3_ERR_EINT1 */ +#define ARIZONA_V2_IM_AIF3_ERR_EINT1_SHIFT 15 /* IM_AIF3_ERR_EINT1 */ +#define ARIZONA_V2_IM_AIF3_ERR_EINT1_WIDTH 1 /* IM_AIF3_ERR_EINT1 */ +#define ARIZONA_V2_IM_AIF2_ERR_EINT1 0x4000 /* IM_AIF2_ERR_EINT1 */ +#define ARIZONA_V2_IM_AIF2_ERR_EINT1_MASK 0x4000 /* IM_AIF2_ERR_EINT1 */ +#define ARIZONA_V2_IM_AIF2_ERR_EINT1_SHIFT 14 /* IM_AIF2_ERR_EINT1 */ +#define ARIZONA_V2_IM_AIF2_ERR_EINT1_WIDTH 1 /* IM_AIF2_ERR_EINT1 */ +#define ARIZONA_V2_IM_AIF1_ERR_EINT1 0x2000 /* IM_AIF1_ERR_EINT1 */ +#define ARIZONA_V2_IM_AIF1_ERR_EINT1_MASK 0x2000 /* IM_AIF1_ERR_EINT1 */ +#define ARIZONA_V2_IM_AIF1_ERR_EINT1_SHIFT 13 /* IM_AIF1_ERR_EINT1 */ +#define ARIZONA_V2_IM_AIF1_ERR_EINT1_WIDTH 1 /* IM_AIF1_ERR_EINT1 */ +#define ARIZONA_V2_IM_CTRLIF_ERR_EINT1 0x1000 /* IM_CTRLIF_ERR_EINT1 */ +#define ARIZONA_V2_IM_CTRLIF_ERR_EINT1_MASK 0x1000 /* IM_CTRLIF_ERR_EINT1 */ +#define ARIZONA_V2_IM_CTRLIF_ERR_EINT1_SHIFT 12 /* IM_CTRLIF_ERR_EINT1 */ +#define ARIZONA_V2_IM_CTRLIF_ERR_EINT1_WIDTH 1 /* IM_CTRLIF_ERR_EINT1 */ +#define ARIZONA_V2_IM_MIXER_DROPPED_SAMPLE_EINT1 0x0800 /* IM_MIXER_DROPPED_SAMPLE_EINT1 */ +#define ARIZONA_V2_IM_MIXER_DROPPED_SAMPLE_EINT1_MASK 0x0800 /* IM_MIXER_DROPPED_SAMPLE_EINT1 */ +#define ARIZONA_V2_IM_MIXER_DROPPED_SAMPLE_EINT1_SHIFT 11 /* IM_MIXER_DROPPED_SAMPLE_EINT1 */ +#define ARIZONA_V2_IM_MIXER_DROPPED_SAMPLE_EINT1_WIDTH 1 /* IM_MIXER_DROPPED_SAMPLE_EINT1 */ +#define ARIZONA_V2_IM_ASYNC_CLK_ENA_LOW_EINT1 0x0400 /* IM_ASYNC_CLK_ENA_LOW_EINT1 */ +#define ARIZONA_V2_IM_ASYNC_CLK_ENA_LOW_EINT1_MASK 0x0400 /* IM_ASYNC_CLK_ENA_LOW_EINT1 */ +#define ARIZONA_V2_IM_ASYNC_CLK_ENA_LOW_EINT1_SHIFT 10 /* IM_ASYNC_CLK_ENA_LOW_EINT1 */ +#define ARIZONA_V2_IM_ASYNC_CLK_ENA_LOW_EINT1_WIDTH 1 /* IM_ASYNC_CLK_ENA_LOW_EINT1 */ +#define ARIZONA_V2_IM_SYSCLK_ENA_LOW_EINT1 0x0200 /* IM_SYSCLK_ENA_LOW_EINT1 */ +#define ARIZONA_V2_IM_SYSCLK_ENA_LOW_EINT1_MASK 0x0200 /* IM_SYSCLK_ENA_LOW_EINT1 */ +#define ARIZONA_V2_IM_SYSCLK_ENA_LOW_EINT1_SHIFT 9 /* IM_SYSCLK_ENA_LOW_EINT1 */ +#define ARIZONA_V2_IM_SYSCLK_ENA_LOW_EINT1_WIDTH 1 /* IM_SYSCLK_ENA_LOW_EINT1 */ +#define ARIZONA_V2_IM_ISRC1_CFG_ERR_EINT1 0x0100 /* IM_ISRC1_CFG_ERR_EINT1 */ +#define ARIZONA_V2_IM_ISRC1_CFG_ERR_EINT1_MASK 0x0100 /* IM_ISRC1_CFG_ERR_EINT1 */ +#define ARIZONA_V2_IM_ISRC1_CFG_ERR_EINT1_SHIFT 8 /* IM_ISRC1_CFG_ERR_EINT1 */ +#define ARIZONA_V2_IM_ISRC1_CFG_ERR_EINT1_WIDTH 1 /* IM_ISRC1_CFG_ERR_EINT1 */ +#define ARIZONA_V2_IM_ISRC2_CFG_ERR_EINT1 0x0080 /* IM_ISRC2_CFG_ERR_EINT1 */ +#define ARIZONA_V2_IM_ISRC2_CFG_ERR_EINT1_MASK 0x0080 /* IM_ISRC2_CFG_ERR_EINT1 */ +#define ARIZONA_V2_IM_ISRC2_CFG_ERR_EINT1_SHIFT 7 /* IM_ISRC2_CFG_ERR_EINT1 */ +#define ARIZONA_V2_IM_ISRC2_CFG_ERR_EINT1_WIDTH 1 /* IM_ISRC2_CFG_ERR_EINT1 */ +#define ARIZONA_V2_IM_ISRC3_CFG_ERR_EINT1 0x0040 /* IM_ISRC3_CFG_ERR_EINT1 */ +#define ARIZONA_V2_IM_ISRC3_CFG_ERR_EINT1_MASK 0x0040 /* IM_ISRC3_CFG_ERR_EINT1 */ +#define ARIZONA_V2_IM_ISRC3_CFG_ERR_EINT1_SHIFT 6 /* IM_ISRC3_CFG_ERR_EINT1 */ +#define ARIZONA_V2_IM_ISRC3_CFG_ERR_EINT1_WIDTH 1 /* IM_ISRC3_CFG_ERR_EINT1 */ + /* * R3340 (0xD0C) - Interrupt Status 5 Mask */ @@ -5036,6 +5214,85 @@ #define ARIZONA_IM_FLL1_CLOCK_OK_EINT1_SHIFT 0 /* IM_FLL1_CLOCK_OK_EINT1 */ #define ARIZONA_IM_FLL1_CLOCK_OK_EINT1_WIDTH 1 /* IM_FLL1_CLOCK_OK_EINT1 */ +/* + * R3340 (0xD0C) - Interrupt Status 5 Mask (Alternate layout) + * + * Alternate layout used on later devices, note only fields that have moved + * are specified + */ +#define ARIZONA_V2_IM_ASRC_CFG_ERR_EINT1 0x0008 /* IM_ASRC_CFG_ERR_EINT1 */ +#define ARIZONA_V2_IM_ASRC_CFG_ERR_EINT1_MASK 0x0008 /* IM_ASRC_CFG_ERR_EINT1 */ +#define ARIZONA_V2_IM_ASRC_CFG_ERR_EINT1_SHIFT 3 /* IM_ASRC_CFG_ERR_EINT1 */ +#define ARIZONA_V2_IM_ASRC_CFG_ERR_EINT1_WIDTH 1 /* IM_ASRC_CFG_ERR_EINT1 */ + +/* + * R3341 (0xD0D) - Interrupt Status 6 Mask + */ +#define ARIZONA_IM_DSP_SHARED_WR_COLL_EINT1 0x8000 /* IM_DSP_SHARED_WR_COLL_EINT1 */ +#define ARIZONA_IM_DSP_SHARED_WR_COLL_EINT1_MASK 0x8000 /* IM_DSP_SHARED_WR_COLL_EINT1 */ +#define ARIZONA_IM_DSP_SHARED_WR_COLL_EINT1_SHIFT 15 /* IM_DSP_SHARED_WR_COLL_EINT1 */ +#define ARIZONA_IM_DSP_SHARED_WR_COLL_EINT1_WIDTH 1 /* IM_DSP_SHARED_WR_COLL_EINT1 */ +#define ARIZONA_IM_SPK_SHUTDOWN_EINT1 0x4000 /* IM_SPK_SHUTDOWN_EINT1 */ +#define ARIZONA_IM_SPK_SHUTDOWN_EINT1_MASK 0x4000 /* IM_SPK_SHUTDOWN_EINT1 */ +#define ARIZONA_IM_SPK_SHUTDOWN_EINT1_SHIFT 14 /* IM_SPK_SHUTDOWN_EINT1 */ +#define ARIZONA_IM_SPK_SHUTDOWN_EINT1_WIDTH 1 /* IM_SPK_SHUTDOWN_EINT1 */ +#define ARIZONA_IM_SPK1R_SHORT_EINT1 0x2000 /* IM_SPK1R_SHORT_EINT1 */ +#define ARIZONA_IM_SPK1R_SHORT_EINT1_MASK 0x2000 /* IM_SPK1R_SHORT_EINT1 */ +#define ARIZONA_IM_SPK1R_SHORT_EINT1_SHIFT 13 /* IM_SPK1R_SHORT_EINT1 */ +#define ARIZONA_IM_SPK1R_SHORT_EINT1_WIDTH 1 /* IM_SPK1R_SHORT_EINT1 */ +#define ARIZONA_IM_SPK1L_SHORT_EINT1 0x1000 /* IM_SPK1L_SHORT_EINT1 */ +#define ARIZONA_IM_SPK1L_SHORT_EINT1_MASK 0x1000 /* IM_SPK1L_SHORT_EINT1 */ +#define ARIZONA_IM_SPK1L_SHORT_EINT1_SHIFT 12 /* IM_SPK1L_SHORT_EINT1 */ +#define ARIZONA_IM_SPK1L_SHORT_EINT1_WIDTH 1 /* IM_SPK1L_SHORT_EINT1 */ +#define ARIZONA_IM_HP3R_SC_NEG_EINT1 0x0800 /* IM_HP3R_SC_NEG_EINT1 */ +#define ARIZONA_IM_HP3R_SC_NEG_EINT1_MASK 0x0800 /* IM_HP3R_SC_NEG_EINT1 */ +#define ARIZONA_IM_HP3R_SC_NEG_EINT1_SHIFT 11 /* IM_HP3R_SC_NEG_EINT1 */ +#define ARIZONA_IM_HP3R_SC_NEG_EINT1_WIDTH 1 /* IM_HP3R_SC_NEG_EINT1 */ +#define ARIZONA_IM_HP3R_SC_POS_EINT1 0x0400 /* IM_HP3R_SC_POS_EINT1 */ +#define ARIZONA_IM_HP3R_SC_POS_EINT1_MASK 0x0400 /* IM_HP3R_SC_POS_EINT1 */ +#define ARIZONA_IM_HP3R_SC_POS_EINT1_SHIFT 10 /* IM_HP3R_SC_POS_EINT1 */ +#define ARIZONA_IM_HP3R_SC_POS_EINT1_WIDTH 1 /* IM_HP3R_SC_POS_EINT1 */ +#define ARIZONA_IM_HP3L_SC_NEG_EINT1 0x0200 /* IM_HP3L_SC_NEG_EINT1 */ +#define ARIZONA_IM_HP3L_SC_NEG_EINT1_MASK 0x0200 /* IM_HP3L_SC_NEG_EINT1 */ +#define ARIZONA_IM_HP3L_SC_NEG_EINT1_SHIFT 9 /* IM_HP3L_SC_NEG_EINT1 */ +#define ARIZONA_IM_HP3L_SC_NEG_EINT1_WIDTH 1 /* IM_HP3L_SC_NEG_EINT1 */ +#define ARIZONA_IM_HP3L_SC_POS_EINT1 0x0100 /* IM_HP3L_SC_POS_EINT1 */ +#define ARIZONA_IM_HP3L_SC_POS_EINT1_MASK 0x0100 /* IM_HP3L_SC_POS_EINT1 */ +#define ARIZONA_IM_HP3L_SC_POS_EINT1_SHIFT 8 /* IM_HP3L_SC_POS_EINT1 */ +#define ARIZONA_IM_HP3L_SC_POS_EINT1_WIDTH 1 /* IM_HP3L_SC_POS_EINT1 */ +#define ARIZONA_IM_HP2R_SC_NEG_EINT1 0x0080 /* IM_HP2R_SC_NEG_EINT1 */ +#define ARIZONA_IM_HP2R_SC_NEG_EINT1_MASK 0x0080 /* IM_HP2R_SC_NEG_EINT1 */ +#define ARIZONA_IM_HP2R_SC_NEG_EINT1_SHIFT 7 /* IM_HP2R_SC_NEG_EINT1 */ +#define ARIZONA_IM_HP2R_SC_NEG_EINT1_WIDTH 1 /* IM_HP2R_SC_NEG_EINT1 */ +#define ARIZONA_IM_HP2R_SC_POS_EINT1 0x0040 /* IM_HP2R_SC_POS_EINT1 */ +#define ARIZONA_IM_HP2R_SC_POS_EINT1_MASK 0x0040 /* IM_HP2R_SC_POS_EINT1 */ +#define ARIZONA_IM_HP2R_SC_POS_EINT1_SHIFT 6 /* IM_HP2R_SC_POS_EINT1 */ +#define ARIZONA_IM_HP2R_SC_POS_EINT1_WIDTH 1 /* IM_HP2R_SC_POS_EINT1 */ +#define ARIZONA_IM_HP2L_SC_NEG_EINT1 0x0020 /* IM_HP2L_SC_NEG_EINT1 */ +#define ARIZONA_IM_HP2L_SC_NEG_EINT1_MASK 0x0020 /* IM_HP2L_SC_NEG_EINT1 */ +#define ARIZONA_IM_HP2L_SC_NEG_EINT1_SHIFT 5 /* IM_HP2L_SC_NEG_EINT1 */ +#define ARIZONA_IM_HP2L_SC_NEG_EINT1_WIDTH 1 /* IM_HP2L_SC_NEG_EINT1 */ +#define ARIZONA_IM_HP2L_SC_POS_EINT1 0x0010 /* IM_HP2L_SC_POS_EINT1 */ +#define ARIZONA_IM_HP2L_SC_POS_EINT1_MASK 0x0010 /* IM_HP2L_SC_POS_EINT1 */ +#define ARIZONA_IM_HP2L_SC_POS_EINT1_SHIFT 4 /* IM_HP2L_SC_POS_EINT1 */ +#define ARIZONA_IM_HP2L_SC_POS_EINT1_WIDTH 1 /* IM_HP2L_SC_POS_EINT1 */ +#define ARIZONA_IM_HP1R_SC_NEG_EINT1 0x0008 /* IM_HP1R_SC_NEG_EINT1 */ +#define ARIZONA_IM_HP1R_SC_NEG_EINT1_MASK 0x0008 /* IM_HP1R_SC_NEG_EINT1 */ +#define ARIZONA_IM_HP1R_SC_NEG_EINT1_SHIFT 3 /* IM_HP1R_SC_NEG_EINT1 */ +#define ARIZONA_IM_HP1R_SC_NEG_EINT1_WIDTH 1 /* IM_HP1R_SC_NEG_EINT1 */ +#define ARIZONA_IM_HP1R_SC_POS_EINT1 0x0004 /* IM_HP1R_SC_POS_EINT1 */ +#define ARIZONA_IM_HP1R_SC_POS_EINT1_MASK 0x0004 /* IM_HP1R_SC_POS_EINT1 */ +#define ARIZONA_IM_HP1R_SC_POS_EINT1_SHIFT 2 /* IM_HP1R_SC_POS_EINT1 */ +#define ARIZONA_IM_HP1R_SC_POS_EINT1_WIDTH 1 /* IM_HP1R_SC_POS_EINT1 */ +#define ARIZONA_IM_HP1L_SC_NEG_EINT1 0x0002 /* IM_HP1L_SC_NEG_EINT1 */ +#define ARIZONA_IM_HP1L_SC_NEG_EINT1_MASK 0x0002 /* IM_HP1L_SC_NEG_EINT1 */ +#define ARIZONA_IM_HP1L_SC_NEG_EINT1_SHIFT 1 /* IM_HP1L_SC_NEG_EINT1 */ +#define ARIZONA_IM_HP1L_SC_NEG_EINT1_WIDTH 1 /* IM_HP1L_SC_NEG_EINT1 */ +#define ARIZONA_IM_HP1L_SC_POS_EINT1 0x0001 /* IM_HP1L_SC_POS_EINT1 */ +#define ARIZONA_IM_HP1L_SC_POS_EINT1_MASK 0x0001 /* IM_HP1L_SC_POS_EINT1 */ +#define ARIZONA_IM_HP1L_SC_POS_EINT1_SHIFT 0 /* IM_HP1L_SC_POS_EINT1 */ +#define ARIZONA_IM_HP1L_SC_POS_EINT1_WIDTH 1 /* IM_HP1L_SC_POS_EINT1 */ + /* * R3343 (0xD0F) - Interrupt Control */ @@ -5212,6 +5469,53 @@ #define ARIZONA_HP1L_DONE_EINT2_SHIFT 0 /* HP1L_DONE_EINT2 */ #define ARIZONA_HP1L_DONE_EINT2_WIDTH 1 /* HP1L_DONE_EINT2 */ +/* + * R3347 (0xD13) - IRQ2 Status 4 (Alternate layout) + * + * Alternate layout used on later devices, note only fields that have moved + * are specified + */ +#define ARIZONA_V2_AIF3_ERR_EINT2 0x8000 /* AIF3_ERR_EINT2 */ +#define ARIZONA_V2_AIF3_ERR_EINT2_MASK 0x8000 /* AIF3_ERR_EINT2 */ +#define ARIZONA_V2_AIF3_ERR_EINT2_SHIFT 15 /* AIF3_ERR_EINT2 */ +#define ARIZONA_V2_AIF3_ERR_EINT2_WIDTH 1 /* AIF3_ERR_EINT2 */ +#define ARIZONA_V2_AIF2_ERR_EINT2 0x4000 /* AIF2_ERR_EINT2 */ +#define ARIZONA_V2_AIF2_ERR_EINT2_MASK 0x4000 /* AIF2_ERR_EINT2 */ +#define ARIZONA_V2_AIF2_ERR_EINT2_SHIFT 14 /* AIF2_ERR_EINT2 */ +#define ARIZONA_V2_AIF2_ERR_EINT2_WIDTH 1 /* AIF2_ERR_EINT2 */ +#define ARIZONA_V2_AIF1_ERR_EINT2 0x2000 /* AIF1_ERR_EINT2 */ +#define ARIZONA_V2_AIF1_ERR_EINT2_MASK 0x2000 /* AIF1_ERR_EINT2 */ +#define ARIZONA_V2_AIF1_ERR_EINT2_SHIFT 13 /* AIF1_ERR_EINT2 */ +#define ARIZONA_V2_AIF1_ERR_EINT2_WIDTH 1 /* AIF1_ERR_EINT2 */ +#define ARIZONA_V2_CTRLIF_ERR_EINT2 0x1000 /* CTRLIF_ERR_EINT2 */ +#define ARIZONA_V2_CTRLIF_ERR_EINT2_MASK 0x1000 /* CTRLIF_ERR_EINT2 */ +#define ARIZONA_V2_CTRLIF_ERR_EINT2_SHIFT 12 /* CTRLIF_ERR_EINT2 */ +#define ARIZONA_V2_CTRLIF_ERR_EINT2_WIDTH 1 /* CTRLIF_ERR_EINT2 */ +#define ARIZONA_V2_MIXER_DROPPED_SAMPLE_EINT2 0x0800 /* MIXER_DROPPED_SAMPLE_EINT2 */ +#define ARIZONA_V2_MIXER_DROPPED_SAMPLE_EINT2_MASK 0x0800 /* MIXER_DROPPED_SAMPLE_EINT2 */ +#define ARIZONA_V2_MIXER_DROPPED_SAMPLE_EINT2_SHIFT 11 /* MIXER_DROPPED_SAMPLE_EINT2 */ +#define ARIZONA_V2_MIXER_DROPPED_SAMPLE_EINT2_WIDTH 1 /* MIXER_DROPPED_SAMPLE_EINT2 */ +#define ARIZONA_V2_ASYNC_CLK_ENA_LOW_EINT2 0x0400 /* ASYNC_CLK_ENA_LOW_EINT2 */ +#define ARIZONA_V2_ASYNC_CLK_ENA_LOW_EINT2_MASK 0x0400 /* ASYNC_CLK_ENA_LOW_EINT2 */ +#define ARIZONA_V2_ASYNC_CLK_ENA_LOW_EINT2_SHIFT 10 /* ASYNC_CLK_ENA_LOW_EINT2 */ +#define ARIZONA_V2_ASYNC_CLK_ENA_LOW_EINT2_WIDTH 1 /* ASYNC_CLK_ENA_LOW_EINT2 */ +#define ARIZONA_V2_SYSCLK_ENA_LOW_EINT2 0x0200 /* SYSCLK_ENA_LOW_EINT2 */ +#define ARIZONA_V2_SYSCLK_ENA_LOW_EINT2_MASK 0x0200 /* SYSCLK_ENA_LOW_EINT2 */ +#define ARIZONA_V2_SYSCLK_ENA_LOW_EINT2_SHIFT 9 /* SYSCLK_ENA_LOW_EINT2 */ +#define ARIZONA_V2_SYSCLK_ENA_LOW_EINT2_WIDTH 1 /* SYSCLK_ENA_LOW_EINT2 */ +#define ARIZONA_V2_ISRC1_CFG_ERR_EINT2 0x0100 /* ISRC1_CFG_ERR_EINT2 */ +#define ARIZONA_V2_ISRC1_CFG_ERR_EINT2_MASK 0x0100 /* ISRC1_CFG_ERR_EINT2 */ +#define ARIZONA_V2_ISRC1_CFG_ERR_EINT2_SHIFT 8 /* ISRC1_CFG_ERR_EINT2 */ +#define ARIZONA_V2_ISRC1_CFG_ERR_EINT2_WIDTH 1 /* ISRC1_CFG_ERR_EINT2 */ +#define ARIZONA_V2_ISRC2_CFG_ERR_EINT2 0x0080 /* ISRC2_CFG_ERR_EINT2 */ +#define ARIZONA_V2_ISRC2_CFG_ERR_EINT2_MASK 0x0080 /* ISRC2_CFG_ERR_EINT2 */ +#define ARIZONA_V2_ISRC2_CFG_ERR_EINT2_SHIFT 7 /* ISRC2_CFG_ERR_EINT2 */ +#define ARIZONA_V2_ISRC2_CFG_ERR_EINT2_WIDTH 1 /* ISRC2_CFG_ERR_EINT2 */ +#define ARIZONA_V2_ISRC3_CFG_ERR_EINT2 0x0040 /* ISRC3_CFG_ERR_EINT2 */ +#define ARIZONA_V2_ISRC3_CFG_ERR_EINT2_MASK 0x0040 /* ISRC3_CFG_ERR_EINT2 */ +#define ARIZONA_V2_ISRC3_CFG_ERR_EINT2_SHIFT 6 /* ISRC3_CFG_ERR_EINT2 */ +#define ARIZONA_V2_ISRC3_CFG_ERR_EINT2_WIDTH 1 /* ISRC3_CFG_ERR_EINT2 */ + /* * R3348 (0xD14) - IRQ2 Status 5 */ @@ -5236,6 +5540,85 @@ #define ARIZONA_FLL1_CLOCK_OK_EINT2_SHIFT 0 /* FLL1_CLOCK_OK_EINT2 */ #define ARIZONA_FLL1_CLOCK_OK_EINT2_WIDTH 1 /* FLL1_CLOCK_OK_EINT2 */ +/* + * R3348 (0xD14) - IRQ2 Status 5 (Alternate layout) + * + * Alternate layout used on later devices, note only fields that have moved + * are specified + */ +#define ARIZONA_V2_ASRC_CFG_ERR_EINT2 0x0008 /* ASRC_CFG_ERR_EINT2 */ +#define ARIZONA_V2_ASRC_CFG_ERR_EINT2_MASK 0x0008 /* ASRC_CFG_ERR_EINT2 */ +#define ARIZONA_V2_ASRC_CFG_ERR_EINT2_SHIFT 3 /* ASRC_CFG_ERR_EINT2 */ +#define ARIZONA_V2_ASRC_CFG_ERR_EINT2_WIDTH 1 /* ASRC_CFG_ERR_EINT2 */ + +/* + * R3349 (0xD15) - IRQ2 Status 6 + */ +#define ARIZONA_DSP_SHARED_WR_COLL_EINT2 0x8000 /* DSP_SHARED_WR_COLL_EINT2 */ +#define ARIZONA_DSP_SHARED_WR_COLL_EINT2_MASK 0x8000 /* DSP_SHARED_WR_COLL_EINT2 */ +#define ARIZONA_DSP_SHARED_WR_COLL_EINT2_SHIFT 15 /* DSP_SHARED_WR_COLL_EINT2 */ +#define ARIZONA_DSP_SHARED_WR_COLL_EINT2_WIDTH 1 /* DSP_SHARED_WR_COLL_EINT2 */ +#define ARIZONA_SPK_SHUTDOWN_EINT2 0x4000 /* SPK_SHUTDOWN_EINT2 */ +#define ARIZONA_SPK_SHUTDOWN_EINT2_MASK 0x4000 /* SPK_SHUTDOWN_EINT2 */ +#define ARIZONA_SPK_SHUTDOWN_EINT2_SHIFT 14 /* SPK_SHUTDOWN_EINT2 */ +#define ARIZONA_SPK_SHUTDOWN_EINT2_WIDTH 1 /* SPK_SHUTDOWN_EINT2 */ +#define ARIZONA_SPK1R_SHORT_EINT2 0x2000 /* SPK1R_SHORT_EINT2 */ +#define ARIZONA_SPK1R_SHORT_EINT2_MASK 0x2000 /* SPK1R_SHORT_EINT2 */ +#define ARIZONA_SPK1R_SHORT_EINT2_SHIFT 13 /* SPK1R_SHORT_EINT2 */ +#define ARIZONA_SPK1R_SHORT_EINT2_WIDTH 1 /* SPK1R_SHORT_EINT2 */ +#define ARIZONA_SPK1L_SHORT_EINT2 0x1000 /* SPK1L_SHORT_EINT2 */ +#define ARIZONA_SPK1L_SHORT_EINT2_MASK 0x1000 /* SPK1L_SHORT_EINT2 */ +#define ARIZONA_SPK1L_SHORT_EINT2_SHIFT 12 /* SPK1L_SHORT_EINT2 */ +#define ARIZONA_SPK1L_SHORT_EINT2_WIDTH 1 /* SPK1L_SHORT_EINT2 */ +#define ARIZONA_HP3R_SC_NEG_EINT2 0x0800 /* HP3R_SC_NEG_EINT2 */ +#define ARIZONA_HP3R_SC_NEG_EINT2_MASK 0x0800 /* HP3R_SC_NEG_EINT2 */ +#define ARIZONA_HP3R_SC_NEG_EINT2_SHIFT 11 /* HP3R_SC_NEG_EINT2 */ +#define ARIZONA_HP3R_SC_NEG_EINT2_WIDTH 1 /* HP3R_SC_NEG_EINT2 */ +#define ARIZONA_HP3R_SC_POS_EINT2 0x0400 /* HP3R_SC_POS_EINT2 */ +#define ARIZONA_HP3R_SC_POS_EINT2_MASK 0x0400 /* HP3R_SC_POS_EINT2 */ +#define ARIZONA_HP3R_SC_POS_EINT2_SHIFT 10 /* HP3R_SC_POS_EINT2 */ +#define ARIZONA_HP3R_SC_POS_EINT2_WIDTH 1 /* HP3R_SC_POS_EINT2 */ +#define ARIZONA_HP3L_SC_NEG_EINT2 0x0200 /* HP3L_SC_NEG_EINT2 */ +#define ARIZONA_HP3L_SC_NEG_EINT2_MASK 0x0200 /* HP3L_SC_NEG_EINT2 */ +#define ARIZONA_HP3L_SC_NEG_EINT2_SHIFT 9 /* HP3L_SC_NEG_EINT2 */ +#define ARIZONA_HP3L_SC_NEG_EINT2_WIDTH 1 /* HP3L_SC_NEG_EINT2 */ +#define ARIZONA_HP3L_SC_POS_EINT2 0x0100 /* HP3L_SC_POS_EINT2 */ +#define ARIZONA_HP3L_SC_POS_EINT2_MASK 0x0100 /* HP3L_SC_POS_EINT2 */ +#define ARIZONA_HP3L_SC_POS_EINT2_SHIFT 8 /* HP3L_SC_POS_EINT2 */ +#define ARIZONA_HP3L_SC_POS_EINT2_WIDTH 1 /* HP3L_SC_POS_EINT2 */ +#define ARIZONA_HP2R_SC_NEG_EINT2 0x0080 /* HP2R_SC_NEG_EINT2 */ +#define ARIZONA_HP2R_SC_NEG_EINT2_MASK 0x0080 /* HP2R_SC_NEG_EINT2 */ +#define ARIZONA_HP2R_SC_NEG_EINT2_SHIFT 7 /* HP2R_SC_NEG_EINT2 */ +#define ARIZONA_HP2R_SC_NEG_EINT2_WIDTH 1 /* HP2R_SC_NEG_EINT2 */ +#define ARIZONA_HP2R_SC_POS_EINT2 0x0040 /* HP2R_SC_POS_EINT2 */ +#define ARIZONA_HP2R_SC_POS_EINT2_MASK 0x0040 /* HP2R_SC_POS_EINT2 */ +#define ARIZONA_HP2R_SC_POS_EINT2_SHIFT 6 /* HP2R_SC_POS_EINT2 */ +#define ARIZONA_HP2R_SC_POS_EINT2_WIDTH 1 /* HP2R_SC_POS_EINT2 */ +#define ARIZONA_HP2L_SC_NEG_EINT2 0x0020 /* HP2L_SC_NEG_EINT2 */ +#define ARIZONA_HP2L_SC_NEG_EINT2_MASK 0x0020 /* HP2L_SC_NEG_EINT2 */ +#define ARIZONA_HP2L_SC_NEG_EINT2_SHIFT 5 /* HP2L_SC_NEG_EINT2 */ +#define ARIZONA_HP2L_SC_NEG_EINT2_WIDTH 1 /* HP2L_SC_NEG_EINT2 */ +#define ARIZONA_HP2L_SC_POS_EINT2 0x0010 /* HP2L_SC_POS_EINT2 */ +#define ARIZONA_HP2L_SC_POS_EINT2_MASK 0x0010 /* HP2L_SC_POS_EINT2 */ +#define ARIZONA_HP2L_SC_POS_EINT2_SHIFT 4 /* HP2L_SC_POS_EINT2 */ +#define ARIZONA_HP2L_SC_POS_EINT2_WIDTH 1 /* HP2L_SC_POS_EINT2 */ +#define ARIZONA_HP1R_SC_NEG_EINT2 0x0008 /* HP1R_SC_NEG_EINT2 */ +#define ARIZONA_HP1R_SC_NEG_EINT2_MASK 0x0008 /* HP1R_SC_NEG_EINT2 */ +#define ARIZONA_HP1R_SC_NEG_EINT2_SHIFT 3 /* HP1R_SC_NEG_EINT2 */ +#define ARIZONA_HP1R_SC_NEG_EINT2_WIDTH 1 /* HP1R_SC_NEG_EINT2 */ +#define ARIZONA_HP1R_SC_POS_EINT2 0x0004 /* HP1R_SC_POS_EINT2 */ +#define ARIZONA_HP1R_SC_POS_EINT2_MASK 0x0004 /* HP1R_SC_POS_EINT2 */ +#define ARIZONA_HP1R_SC_POS_EINT2_SHIFT 2 /* HP1R_SC_POS_EINT2 */ +#define ARIZONA_HP1R_SC_POS_EINT2_WIDTH 1 /* HP1R_SC_POS_EINT2 */ +#define ARIZONA_HP1L_SC_NEG_EINT2 0x0002 /* HP1L_SC_NEG_EINT2 */ +#define ARIZONA_HP1L_SC_NEG_EINT2_MASK 0x0002 /* HP1L_SC_NEG_EINT2 */ +#define ARIZONA_HP1L_SC_NEG_EINT2_SHIFT 1 /* HP1L_SC_NEG_EINT2 */ +#define ARIZONA_HP1L_SC_NEG_EINT2_WIDTH 1 /* HP1L_SC_NEG_EINT2 */ +#define ARIZONA_HP1L_SC_POS_EINT2 0x0001 /* HP1L_SC_POS_EINT2 */ +#define ARIZONA_HP1L_SC_POS_EINT2_MASK 0x0001 /* HP1L_SC_POS_EINT2 */ +#define ARIZONA_HP1L_SC_POS_EINT2_SHIFT 0 /* HP1L_SC_POS_EINT2 */ +#define ARIZONA_HP1L_SC_POS_EINT2_WIDTH 1 /* HP1L_SC_POS_EINT2 */ + /* * R3352 (0xD18) - IRQ2 Status 1 Mask */ @@ -5404,6 +5787,53 @@ #define ARIZONA_IM_HP1L_DONE_EINT2_SHIFT 0 /* IM_HP1L_DONE_EINT2 */ #define ARIZONA_IM_HP1L_DONE_EINT2_WIDTH 1 /* IM_HP1L_DONE_EINT2 */ +/* + * R3355 (0xD1B) - IRQ2 Status 4 Mask (Alternate layout) + * + * Alternate layout used on later devices, note only fields that have moved + * are specified + */ +#define ARIZONA_V2_IM_AIF3_ERR_EINT2 0x8000 /* IM_AIF3_ERR_EINT2 */ +#define ARIZONA_V2_IM_AIF3_ERR_EINT2_MASK 0x8000 /* IM_AIF3_ERR_EINT2 */ +#define ARIZONA_V2_IM_AIF3_ERR_EINT2_SHIFT 15 /* IM_AIF3_ERR_EINT2 */ +#define ARIZONA_V2_IM_AIF3_ERR_EINT2_WIDTH 1 /* IM_AIF3_ERR_EINT2 */ +#define ARIZONA_V2_IM_AIF2_ERR_EINT2 0x4000 /* IM_AIF2_ERR_EINT2 */ +#define ARIZONA_V2_IM_AIF2_ERR_EINT2_MASK 0x4000 /* IM_AIF2_ERR_EINT2 */ +#define ARIZONA_V2_IM_AIF2_ERR_EINT2_SHIFT 14 /* IM_AIF2_ERR_EINT2 */ +#define ARIZONA_V2_IM_AIF2_ERR_EINT2_WIDTH 1 /* IM_AIF2_ERR_EINT2 */ +#define ARIZONA_V2_IM_AIF1_ERR_EINT2 0x2000 /* IM_AIF1_ERR_EINT2 */ +#define ARIZONA_V2_IM_AIF1_ERR_EINT2_MASK 0x2000 /* IM_AIF1_ERR_EINT2 */ +#define ARIZONA_V2_IM_AIF1_ERR_EINT2_SHIFT 13 /* IM_AIF1_ERR_EINT2 */ +#define ARIZONA_V2_IM_AIF1_ERR_EINT2_WIDTH 1 /* IM_AIF1_ERR_EINT2 */ +#define ARIZONA_V2_IM_CTRLIF_ERR_EINT2 0x1000 /* IM_CTRLIF_ERR_EINT2 */ +#define ARIZONA_V2_IM_CTRLIF_ERR_EINT2_MASK 0x1000 /* IM_CTRLIF_ERR_EINT2 */ +#define ARIZONA_V2_IM_CTRLIF_ERR_EINT2_SHIFT 12 /* IM_CTRLIF_ERR_EINT2 */ +#define ARIZONA_V2_IM_CTRLIF_ERR_EINT2_WIDTH 1 /* IM_CTRLIF_ERR_EINT2 */ +#define ARIZONA_V2_IM_MIXER_DROPPED_SAMPLE_EINT2 0x0800 /* IM_MIXER_DROPPED_SAMPLE_EINT2 */ +#define ARIZONA_V2_IM_MIXER_DROPPED_SAMPLE_EINT2_MASK 0x0800 /* IM_MIXER_DROPPED_SAMPLE_EINT2 */ +#define ARIZONA_V2_IM_MIXER_DROPPED_SAMPLE_EINT2_SHIFT 11 /* IM_MIXER_DROPPED_SAMPLE_EINT2 */ +#define ARIZONA_V2_IM_MIXER_DROPPED_SAMPLE_EINT2_WIDTH 1 /* IM_MIXER_DROPPED_SAMPLE_EINT2 */ +#define ARIZONA_V2_IM_ASYNC_CLK_ENA_LOW_EINT2 0x0400 /* IM_ASYNC_CLK_ENA_LOW_EINT2 */ +#define ARIZONA_V2_IM_ASYNC_CLK_ENA_LOW_EINT2_MASK 0x0400 /* IM_ASYNC_CLK_ENA_LOW_EINT2 */ +#define ARIZONA_V2_IM_ASYNC_CLK_ENA_LOW_EINT2_SHIFT 10 /* IM_ASYNC_CLK_ENA_LOW_EINT2 */ +#define ARIZONA_V2_IM_ASYNC_CLK_ENA_LOW_EINT2_WIDTH 1 /* IM_ASYNC_CLK_ENA_LOW_EINT2 */ +#define ARIZONA_V2_IM_SYSCLK_ENA_LOW_EINT2 0x0200 /* IM_SYSCLK_ENA_LOW_EINT2 */ +#define ARIZONA_V2_IM_SYSCLK_ENA_LOW_EINT2_MASK 0x0200 /* IM_SYSCLK_ENA_LOW_EINT2 */ +#define ARIZONA_V2_IM_SYSCLK_ENA_LOW_EINT2_SHIFT 9 /* IM_SYSCLK_ENA_LOW_EINT2 */ +#define ARIZONA_V2_IM_SYSCLK_ENA_LOW_EINT2_WIDTH 1 /* IM_SYSCLK_ENA_LOW_EINT2 */ +#define ARIZONA_V2_IM_ISRC1_CFG_ERR_EINT2 0x0100 /* IM_ISRC1_CFG_ERR_EINT2 */ +#define ARIZONA_V2_IM_ISRC1_CFG_ERR_EINT2_MASK 0x0100 /* IM_ISRC1_CFG_ERR_EINT2 */ +#define ARIZONA_V2_IM_ISRC1_CFG_ERR_EINT2_SHIFT 8 /* IM_ISRC1_CFG_ERR_EINT2 */ +#define ARIZONA_V2_IM_ISRC1_CFG_ERR_EINT2_WIDTH 1 /* IM_ISRC1_CFG_ERR_EINT2 */ +#define ARIZONA_V2_IM_ISRC2_CFG_ERR_EINT2 0x0080 /* IM_ISRC2_CFG_ERR_EINT2 */ +#define ARIZONA_V2_IM_ISRC2_CFG_ERR_EINT2_MASK 0x0080 /* IM_ISRC2_CFG_ERR_EINT2 */ +#define ARIZONA_V2_IM_ISRC2_CFG_ERR_EINT2_SHIFT 7 /* IM_ISRC2_CFG_ERR_EINT2 */ +#define ARIZONA_V2_IM_ISRC2_CFG_ERR_EINT2_WIDTH 1 /* IM_ISRC2_CFG_ERR_EINT2 */ +#define ARIZONA_V2_IM_ISRC3_CFG_ERR_EINT2 0x0040 /* IM_ISRC3_CFG_ERR_EINT2 */ +#define ARIZONA_V2_IM_ISRC3_CFG_ERR_EINT2_MASK 0x0040 /* IM_ISRC3_CFG_ERR_EINT2 */ +#define ARIZONA_V2_IM_ISRC3_CFG_ERR_EINT2_SHIFT 6 /* IM_ISRC3_CFG_ERR_EINT2 */ +#define ARIZONA_V2_IM_ISRC3_CFG_ERR_EINT2_WIDTH 1 /* IM_ISRC3_CFG_ERR_EINT2 */ + /* * R3356 (0xD1C) - IRQ2 Status 5 Mask */ @@ -5429,6 +5859,85 @@ #define ARIZONA_IM_FLL1_CLOCK_OK_EINT2_SHIFT 0 /* IM_FLL1_CLOCK_OK_EINT2 */ #define ARIZONA_IM_FLL1_CLOCK_OK_EINT2_WIDTH 1 /* IM_FLL1_CLOCK_OK_EINT2 */ +/* + * R3340 (0xD0C) - Interrupt Status 5 Mask (Alternate layout) + * + * Alternate layout used on later devices, note only fields that have moved + * are specified + */ +#define ARIZONA_V2_IM_ASRC_CFG_ERR_EINT2 0x0008 /* IM_ASRC_CFG_ERR_EINT2 */ +#define ARIZONA_V2_IM_ASRC_CFG_ERR_EINT2_MASK 0x0008 /* IM_ASRC_CFG_ERR_EINT2 */ +#define ARIZONA_V2_IM_ASRC_CFG_ERR_EINT2_SHIFT 3 /* IM_ASRC_CFG_ERR_EINT2 */ +#define ARIZONA_V2_IM_ASRC_CFG_ERR_EINT2_WIDTH 1 /* IM_ASRC_CFG_ERR_EINT2 */ + +/* + * R3357 (0xD1D) - IRQ2 Status 6 Mask + */ +#define ARIZONA_IM_DSP_SHARED_WR_COLL_EINT2 0x8000 /* IM_DSP_SHARED_WR_COLL_EINT2 */ +#define ARIZONA_IM_DSP_SHARED_WR_COLL_EINT2_MASK 0x8000 /* IM_DSP_SHARED_WR_COLL_EINT2 */ +#define ARIZONA_IM_DSP_SHARED_WR_COLL_EINT2_SHIFT 15 /* IM_DSP_SHARED_WR_COLL_EINT2 */ +#define ARIZONA_IM_DSP_SHARED_WR_COLL_EINT2_WIDTH 1 /* IM_DSP_SHARED_WR_COLL_EINT2 */ +#define ARIZONA_IM_SPK_SHUTDOWN_EINT2 0x4000 /* IM_SPK_SHUTDOWN_EINT2 */ +#define ARIZONA_IM_SPK_SHUTDOWN_EINT2_MASK 0x4000 /* IM_SPK_SHUTDOWN_EINT2 */ +#define ARIZONA_IM_SPK_SHUTDOWN_EINT2_SHIFT 14 /* IM_SPK_SHUTDOWN_EINT2 */ +#define ARIZONA_IM_SPK_SHUTDOWN_EINT2_WIDTH 1 /* IM_SPK_SHUTDOWN_EINT2 */ +#define ARIZONA_IM_SPK1R_SHORT_EINT2 0x2000 /* IM_SPK1R_SHORT_EINT2 */ +#define ARIZONA_IM_SPK1R_SHORT_EINT2_MASK 0x2000 /* IM_SPK1R_SHORT_EINT2 */ +#define ARIZONA_IM_SPK1R_SHORT_EINT2_SHIFT 13 /* IM_SPK1R_SHORT_EINT2 */ +#define ARIZONA_IM_SPK1R_SHORT_EINT2_WIDTH 1 /* IM_SPK1R_SHORT_EINT2 */ +#define ARIZONA_IM_SPK1L_SHORT_EINT2 0x1000 /* IM_SPK1L_SHORT_EINT2 */ +#define ARIZONA_IM_SPK1L_SHORT_EINT2_MASK 0x1000 /* IM_SPK1L_SHORT_EINT2 */ +#define ARIZONA_IM_SPK1L_SHORT_EINT2_SHIFT 12 /* IM_SPK1L_SHORT_EINT2 */ +#define ARIZONA_IM_SPK1L_SHORT_EINT2_WIDTH 1 /* IM_SPK1L_SHORT_EINT2 */ +#define ARIZONA_IM_HP3R_SC_NEG_EINT2 0x0800 /* IM_HP3R_SC_NEG_EINT2 */ +#define ARIZONA_IM_HP3R_SC_NEG_EINT2_MASK 0x0800 /* IM_HP3R_SC_NEG_EINT2 */ +#define ARIZONA_IM_HP3R_SC_NEG_EINT2_SHIFT 11 /* IM_HP3R_SC_NEG_EINT2 */ +#define ARIZONA_IM_HP3R_SC_NEG_EINT2_WIDTH 1 /* IM_HP3R_SC_NEG_EINT2 */ +#define ARIZONA_IM_HP3R_SC_POS_EINT2 0x0400 /* IM_HP3R_SC_POS_EINT2 */ +#define ARIZONA_IM_HP3R_SC_POS_EINT2_MASK 0x0400 /* IM_HP3R_SC_POS_EINT2 */ +#define ARIZONA_IM_HP3R_SC_POS_EINT2_SHIFT 10 /* IM_HP3R_SC_POS_EINT2 */ +#define ARIZONA_IM_HP3R_SC_POS_EINT2_WIDTH 1 /* IM_HP3R_SC_POS_EINT2 */ +#define ARIZONA_IM_HP3L_SC_NEG_EINT2 0x0200 /* IM_HP3L_SC_NEG_EINT2 */ +#define ARIZONA_IM_HP3L_SC_NEG_EINT2_MASK 0x0200 /* IM_HP3L_SC_NEG_EINT2 */ +#define ARIZONA_IM_HP3L_SC_NEG_EINT2_SHIFT 9 /* IM_HP3L_SC_NEG_EINT2 */ +#define ARIZONA_IM_HP3L_SC_NEG_EINT2_WIDTH 1 /* IM_HP3L_SC_NEG_EINT2 */ +#define ARIZONA_IM_HP3L_SC_POS_EINT2 0x0100 /* IM_HP3L_SC_POS_EINT2 */ +#define ARIZONA_IM_HP3L_SC_POS_EINT2_MASK 0x0100 /* IM_HP3L_SC_POS_EINT2 */ +#define ARIZONA_IM_HP3L_SC_POS_EINT2_SHIFT 8 /* IM_HP3L_SC_POS_EINT2 */ +#define ARIZONA_IM_HP3L_SC_POS_EINT2_WIDTH 1 /* IM_HP3L_SC_POS_EINT2 */ +#define ARIZONA_IM_HP2R_SC_NEG_EINT2 0x0080 /* IM_HP2R_SC_NEG_EINT2 */ +#define ARIZONA_IM_HP2R_SC_NEG_EINT2_MASK 0x0080 /* IM_HP2R_SC_NEG_EINT2 */ +#define ARIZONA_IM_HP2R_SC_NEG_EINT2_SHIFT 7 /* IM_HP2R_SC_NEG_EINT2 */ +#define ARIZONA_IM_HP2R_SC_NEG_EINT2_WIDTH 1 /* IM_HP2R_SC_NEG_EINT2 */ +#define ARIZONA_IM_HP2R_SC_POS_EINT2 0x0040 /* IM_HP2R_SC_POS_EINT2 */ +#define ARIZONA_IM_HP2R_SC_POS_EINT2_MASK 0x0040 /* IM_HP2R_SC_POS_EINT2 */ +#define ARIZONA_IM_HP2R_SC_POS_EINT2_SHIFT 6 /* IM_HP2R_SC_POS_EINT2 */ +#define ARIZONA_IM_HP2R_SC_POS_EINT2_WIDTH 1 /* IM_HP2R_SC_POS_EINT2 */ +#define ARIZONA_IM_HP2L_SC_NEG_EINT2 0x0020 /* IM_HP2L_SC_NEG_EINT2 */ +#define ARIZONA_IM_HP2L_SC_NEG_EINT2_MASK 0x0020 /* IM_HP2L_SC_NEG_EINT2 */ +#define ARIZONA_IM_HP2L_SC_NEG_EINT2_SHIFT 5 /* IM_HP2L_SC_NEG_EINT2 */ +#define ARIZONA_IM_HP2L_SC_NEG_EINT2_WIDTH 1 /* IM_HP2L_SC_NEG_EINT2 */ +#define ARIZONA_IM_HP2L_SC_POS_EINT2 0x0010 /* IM_HP2L_SC_POS_EINT2 */ +#define ARIZONA_IM_HP2L_SC_POS_EINT2_MASK 0x0010 /* IM_HP2L_SC_POS_EINT2 */ +#define ARIZONA_IM_HP2L_SC_POS_EINT2_SHIFT 4 /* IM_HP2L_SC_POS_EINT2 */ +#define ARIZONA_IM_HP2L_SC_POS_EINT2_WIDTH 1 /* IM_HP2L_SC_POS_EINT2 */ +#define ARIZONA_IM_HP1R_SC_NEG_EINT2 0x0008 /* IM_HP1R_SC_NEG_EINT2 */ +#define ARIZONA_IM_HP1R_SC_NEG_EINT2_MASK 0x0008 /* IM_HP1R_SC_NEG_EINT2 */ +#define ARIZONA_IM_HP1R_SC_NEG_EINT2_SHIFT 3 /* IM_HP1R_SC_NEG_EINT2 */ +#define ARIZONA_IM_HP1R_SC_NEG_EINT2_WIDTH 1 /* IM_HP1R_SC_NEG_EINT2 */ +#define ARIZONA_IM_HP1R_SC_POS_EINT2 0x0004 /* IM_HP1R_SC_POS_EINT2 */ +#define ARIZONA_IM_HP1R_SC_POS_EINT2_MASK 0x0004 /* IM_HP1R_SC_POS_EINT2 */ +#define ARIZONA_IM_HP1R_SC_POS_EINT2_SHIFT 2 /* IM_HP1R_SC_POS_EINT2 */ +#define ARIZONA_IM_HP1R_SC_POS_EINT2_WIDTH 1 /* IM_HP1R_SC_POS_EINT2 */ +#define ARIZONA_IM_HP1L_SC_NEG_EINT2 0x0002 /* IM_HP1L_SC_NEG_EINT2 */ +#define ARIZONA_IM_HP1L_SC_NEG_EINT2_MASK 0x0002 /* IM_HP1L_SC_NEG_EINT2 */ +#define ARIZONA_IM_HP1L_SC_NEG_EINT2_SHIFT 1 /* IM_HP1L_SC_NEG_EINT2 */ +#define ARIZONA_IM_HP1L_SC_NEG_EINT2_WIDTH 1 /* IM_HP1L_SC_NEG_EINT2 */ +#define ARIZONA_IM_HP1L_SC_POS_EINT2 0x0001 /* IM_HP1L_SC_POS_EINT2 */ +#define ARIZONA_IM_HP1L_SC_POS_EINT2_MASK 0x0001 /* IM_HP1L_SC_POS_EINT2 */ +#define ARIZONA_IM_HP1L_SC_POS_EINT2_SHIFT 0 /* IM_HP1L_SC_POS_EINT2 */ +#define ARIZONA_IM_HP1L_SC_POS_EINT2_WIDTH 1 /* IM_HP1L_SC_POS_EINT2 */ + /* * R3359 (0xD1F) - IRQ2 Control */ @@ -5700,6 +6209,10 @@ #define ARIZONA_ADSP2_1_OVERCLOCKED_STS_MASK 0x0008 /* ADSP2_1_OVERCLOCKED_STS */ #define ARIZONA_ADSP2_1_OVERCLOCKED_STS_SHIFT 3 /* ADSP2_1_OVERCLOCKED_STS */ #define ARIZONA_ADSP2_1_OVERCLOCKED_STS_WIDTH 1 /* ADSP2_1_OVERCLOCKED_STS */ +#define ARIZONA_ISRC3_OVERCLOCKED_STS 0x0004 /* ISRC3_OVERCLOCKED_STS */ +#define ARIZONA_ISRC3_OVERCLOCKED_STS_MASK 0x0004 /* ISRC3_OVERCLOCKED_STS */ +#define ARIZONA_ISRC3_OVERCLOCKED_STS_SHIFT 2 /* ISRC3_OVERCLOCKED_STS */ +#define ARIZONA_ISRC3_OVERCLOCKED_STS_WIDTH 1 /* ISRC3_OVERCLOCKED_STS */ #define ARIZONA_ISRC2_OVERCLOCKED_STS 0x0002 /* ISRC2_OVERCLOCKED_STS */ #define ARIZONA_ISRC2_OVERCLOCKED_STS_MASK 0x0002 /* ISRC2_OVERCLOCKED_STS */ #define ARIZONA_ISRC2_OVERCLOCKED_STS_SHIFT 1 /* ISRC2_OVERCLOCKED_STS */ @@ -5724,6 +6237,10 @@ #define ARIZONA_AIF1_UNDERCLOCKED_STS_MASK 0x0100 /* AIF1_UNDERCLOCKED_STS */ #define ARIZONA_AIF1_UNDERCLOCKED_STS_SHIFT 8 /* AIF1_UNDERCLOCKED_STS */ #define ARIZONA_AIF1_UNDERCLOCKED_STS_WIDTH 1 /* AIF1_UNDERCLOCKED_STS */ +#define ARIZONA_ISRC3_UNDERCLOCKED_STS 0x0080 /* ISRC3_UNDERCLOCKED_STS */ +#define ARIZONA_ISRC3_UNDERCLOCKED_STS_MASK 0x0080 /* ISRC3_UNDERCLOCKED_STS */ +#define ARIZONA_ISRC3_UNDERCLOCKED_STS_SHIFT 7 /* ISRC3_UNDERCLOCKED_STS */ +#define ARIZONA_ISRC3_UNDERCLOCKED_STS_WIDTH 1 /* ISRC3_UNDERCLOCKED_STS */ #define ARIZONA_ISRC2_UNDERCLOCKED_STS 0x0040 /* ISRC2_UNDERCLOCKED_STS */ #define ARIZONA_ISRC2_UNDERCLOCKED_STS_MASK 0x0040 /* ISRC2_UNDERCLOCKED_STS */ #define ARIZONA_ISRC2_UNDERCLOCKED_STS_SHIFT 6 /* ISRC2_UNDERCLOCKED_STS */ @@ -5753,6 +6270,74 @@ #define ARIZONA_MIXER_UNDERCLOCKED_STS_SHIFT 0 /* MIXER_UNDERCLOCKED_STS */ #define ARIZONA_MIXER_UNDERCLOCKED_STS_WIDTH 1 /* MIXER_UNDERCLOCKED_STS */ +/* + * R3368 (0xD28) - Interrupt Raw Status 9 + */ +#define ARIZONA_DSP_SHARED_WR_COLL_STS 0x8000 /* DSP_SHARED_WR_COLL_STS */ +#define ARIZONA_DSP_SHARED_WR_COLL_STS_MASK 0x8000 /* DSP_SHARED_WR_COLL_STS */ +#define ARIZONA_DSP_SHARED_WR_COLL_STS_SHIFT 15 /* DSP_SHARED_WR_COLL_STS */ +#define ARIZONA_DSP_SHARED_WR_COLL_STS_WIDTH 1 /* DSP_SHARED_WR_COLL_STS */ +#define ARIZONA_SPK_SHUTDOWN_STS 0x4000 /* SPK_SHUTDOWN_STS */ +#define ARIZONA_SPK_SHUTDOWN_STS_MASK 0x4000 /* SPK_SHUTDOWN_STS */ +#define ARIZONA_SPK_SHUTDOWN_STS_SHIFT 14 /* SPK_SHUTDOWN_STS */ +#define ARIZONA_SPK_SHUTDOWN_STS_WIDTH 1 /* SPK_SHUTDOWN_STS */ +#define ARIZONA_SPK1R_SHORT_STS 0x2000 /* SPK1R_SHORT_STS */ +#define ARIZONA_SPK1R_SHORT_STS_MASK 0x2000 /* SPK1R_SHORT_STS */ +#define ARIZONA_SPK1R_SHORT_STS_SHIFT 13 /* SPK1R_SHORT_STS */ +#define ARIZONA_SPK1R_SHORT_STS_WIDTH 1 /* SPK1R_SHORT_STS */ +#define ARIZONA_SPK1L_SHORT_STS 0x1000 /* SPK1L_SHORT_STS */ +#define ARIZONA_SPK1L_SHORT_STS_MASK 0x1000 /* SPK1L_SHORT_STS */ +#define ARIZONA_SPK1L_SHORT_STS_SHIFT 12 /* SPK1L_SHORT_STS */ +#define ARIZONA_SPK1L_SHORT_STS_WIDTH 1 /* SPK1L_SHORT_STS */ +#define ARIZONA_HP3R_SC_NEG_STS 0x0800 /* HP3R_SC_NEG_STS */ +#define ARIZONA_HP3R_SC_NEG_STS_MASK 0x0800 /* HP3R_SC_NEG_STS */ +#define ARIZONA_HP3R_SC_NEG_STS_SHIFT 11 /* HP3R_SC_NEG_STS */ +#define ARIZONA_HP3R_SC_NEG_STS_WIDTH 1 /* HP3R_SC_NEG_STS */ +#define ARIZONA_HP3R_SC_POS_STS 0x0400 /* HP3R_SC_POS_STS */ +#define ARIZONA_HP3R_SC_POS_STS_MASK 0x0400 /* HP3R_SC_POS_STS */ +#define ARIZONA_HP3R_SC_POS_STS_SHIFT 10 /* HP3R_SC_POS_STS */ +#define ARIZONA_HP3R_SC_POS_STS_WIDTH 1 /* HP3R_SC_POS_STS */ +#define ARIZONA_HP3L_SC_NEG_STS 0x0200 /* HP3L_SC_NEG_STS */ +#define ARIZONA_HP3L_SC_NEG_STS_MASK 0x0200 /* HP3L_SC_NEG_STS */ +#define ARIZONA_HP3L_SC_NEG_STS_SHIFT 9 /* HP3L_SC_NEG_STS */ +#define ARIZONA_HP3L_SC_NEG_STS_WIDTH 1 /* HP3L_SC_NEG_STS */ +#define ARIZONA_HP3L_SC_POS_STS 0x0100 /* HP3L_SC_POS_STS */ +#define ARIZONA_HP3L_SC_POS_STS_MASK 0x0100 /* HP3L_SC_POS_STS */ +#define ARIZONA_HP3L_SC_POS_STS_SHIFT 8 /* HP3L_SC_POS_STS */ +#define ARIZONA_HP3L_SC_POS_STS_WIDTH 1 /* HP3L_SC_POS_STS */ +#define ARIZONA_HP2R_SC_NEG_STS 0x0080 /* HP2R_SC_NEG_STS */ +#define ARIZONA_HP2R_SC_NEG_STS_MASK 0x0080 /* HP2R_SC_NEG_STS */ +#define ARIZONA_HP2R_SC_NEG_STS_SHIFT 7 /* HP2R_SC_NEG_STS */ +#define ARIZONA_HP2R_SC_NEG_STS_WIDTH 1 /* HP2R_SC_NEG_STS */ +#define ARIZONA_HP2R_SC_POS_STS 0x0040 /* HP2R_SC_POS_STS */ +#define ARIZONA_HP2R_SC_POS_STS_MASK 0x0040 /* HP2R_SC_POS_STS */ +#define ARIZONA_HP2R_SC_POS_STS_SHIFT 6 /* HP2R_SC_POS_STS */ +#define ARIZONA_HP2R_SC_POS_STS_WIDTH 1 /* HP2R_SC_POS_STS */ +#define ARIZONA_HP2L_SC_NEG_STS 0x0020 /* HP2L_SC_NEG_STS */ +#define ARIZONA_HP2L_SC_NEG_STS_MASK 0x0020 /* HP2L_SC_NEG_STS */ +#define ARIZONA_HP2L_SC_NEG_STS_SHIFT 5 /* HP2L_SC_NEG_STS */ +#define ARIZONA_HP2L_SC_NEG_STS_WIDTH 1 /* HP2L_SC_NEG_STS */ +#define ARIZONA_HP2L_SC_POS_STS 0x0010 /* HP2L_SC_POS_STS */ +#define ARIZONA_HP2L_SC_POS_STS_MASK 0x0010 /* HP2L_SC_POS_STS */ +#define ARIZONA_HP2L_SC_POS_STS_SHIFT 4 /* HP2L_SC_POS_STS */ +#define ARIZONA_HP2L_SC_POS_STS_WIDTH 1 /* HP2L_SC_POS_STS */ +#define ARIZONA_HP1R_SC_NEG_STS 0x0008 /* HP1R_SC_NEG_STS */ +#define ARIZONA_HP1R_SC_NEG_STS_MASK 0x0008 /* HP1R_SC_NEG_STS */ +#define ARIZONA_HP1R_SC_NEG_STS_SHIFT 3 /* HP1R_SC_NEG_STS */ +#define ARIZONA_HP1R_SC_NEG_STS_WIDTH 1 /* HP1R_SC_NEG_STS */ +#define ARIZONA_HP1R_SC_POS_STS 0x0004 /* HP1R_SC_POS_STS */ +#define ARIZONA_HP1R_SC_POS_STS_MASK 0x0004 /* HP1R_SC_POS_STS */ +#define ARIZONA_HP1R_SC_POS_STS_SHIFT 2 /* HP1R_SC_POS_STS */ +#define ARIZONA_HP1R_SC_POS_STS_WIDTH 1 /* HP1R_SC_POS_STS */ +#define ARIZONA_HP1L_SC_NEG_STS 0x0002 /* HP1L_SC_NEG_STS */ +#define ARIZONA_HP1L_SC_NEG_STS_MASK 0x0002 /* HP1L_SC_NEG_STS */ +#define ARIZONA_HP1L_SC_NEG_STS_SHIFT 1 /* HP1L_SC_NEG_STS */ +#define ARIZONA_HP1L_SC_NEG_STS_WIDTH 1 /* HP1L_SC_NEG_STS */ +#define ARIZONA_HP1L_SC_POS_STS 0x0001 /* HP1L_SC_POS_STS */ +#define ARIZONA_HP1L_SC_POS_STS_MASK 0x0001 /* HP1L_SC_POS_STS */ +#define ARIZONA_HP1L_SC_POS_STS_SHIFT 0 /* HP1L_SC_POS_STS */ +#define ARIZONA_HP1L_SC_POS_STS_WIDTH 1 /* HP1L_SC_POS_STS */ + /* * R3392 (0xD40) - IRQ Pin Status */ -- cgit v1.2.3 From 30a2af3a320d5c0598cde08ba6e5d22a724f82e4 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 15 Jul 2014 11:21:50 +0100 Subject: mfd: arizona: Only free the CTRLIF_ERR IRQ if we requested it We only request the control interface error IRQ if we set ctrlif_error, as such we should only free it in that situation. Otherwise we will attempt to free an IRQ we never requested and get a warning from the IRQ core. This patch moves the ctrlif_error variable into the arizona structure and checks it in all cases we free the control interface error IRQ. Signed-off-by: Charles Keepax Signed-off-by: Lee Jones --- drivers/mfd/arizona-irq.c | 19 ++++++++++++------- include/linux/mfd/arizona/core.h | 2 ++ 2 files changed, 14 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/mfd/arizona-irq.c b/drivers/mfd/arizona-irq.c index e780bc40165d..d420dbc0e2b0 100644 --- a/drivers/mfd/arizona-irq.c +++ b/drivers/mfd/arizona-irq.c @@ -188,16 +188,17 @@ int arizona_irq_init(struct arizona *arizona) int flags = IRQF_ONESHOT; int ret, i; const struct regmap_irq_chip *aod, *irq; - bool ctrlif_error = true; struct irq_data *irq_data; + arizona->ctrlif_error = true; + switch (arizona->type) { #ifdef CONFIG_MFD_WM5102 case WM5102: aod = &wm5102_aod; irq = &wm5102_irq; - ctrlif_error = false; + arizona->ctrlif_error = false; break; #endif #ifdef CONFIG_MFD_WM5110 @@ -213,7 +214,7 @@ int arizona_irq_init(struct arizona *arizona) break; } - ctrlif_error = false; + arizona->ctrlif_error = false; break; #endif #ifdef CONFIG_MFD_WM8997 @@ -221,7 +222,7 @@ int arizona_irq_init(struct arizona *arizona) aod = &wm8997_aod; irq = &wm8997_irq; - ctrlif_error = false; + arizona->ctrlif_error = false; break; #endif default: @@ -308,7 +309,7 @@ int arizona_irq_init(struct arizona *arizona) } /* Handle control interface errors in the core */ - if (ctrlif_error) { + if (arizona->ctrlif_error) { i = arizona_map_irq(arizona, ARIZONA_IRQ_CTRLIF_ERR); ret = request_threaded_irq(i, NULL, arizona_ctrlif_err, IRQF_ONESHOT, @@ -353,7 +354,9 @@ int arizona_irq_init(struct arizona *arizona) return 0; err_main_irq: - free_irq(arizona_map_irq(arizona, ARIZONA_IRQ_CTRLIF_ERR), arizona); + if (arizona->ctrlif_error) + free_irq(arizona_map_irq(arizona, ARIZONA_IRQ_CTRLIF_ERR), + arizona); err_ctrlif: free_irq(arizona_map_irq(arizona, ARIZONA_IRQ_BOOT_DONE), arizona); err_boot_done: @@ -369,7 +372,9 @@ err: int arizona_irq_exit(struct arizona *arizona) { - free_irq(arizona_map_irq(arizona, ARIZONA_IRQ_CTRLIF_ERR), arizona); + if (arizona->ctrlif_error) + free_irq(arizona_map_irq(arizona, ARIZONA_IRQ_CTRLIF_ERR), + arizona); free_irq(arizona_map_irq(arizona, ARIZONA_IRQ_BOOT_DONE), arizona); regmap_del_irq_chip(irq_create_mapping(arizona->virq, 1), arizona->irq_chip); diff --git a/include/linux/mfd/arizona/core.h b/include/linux/mfd/arizona/core.h index 8bc7601cca68..fdd8b7b82db5 100644 --- a/include/linux/mfd/arizona/core.h +++ b/include/linux/mfd/arizona/core.h @@ -132,6 +132,8 @@ struct arizona { struct mutex clk_lock; int clk32k_ref; + bool ctrlif_error; + struct snd_soc_dapm_context *dapm; }; -- cgit v1.2.3 From 0a6d315827eedc733d404ecff3cd4cc0e6437865 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 24 Jul 2014 20:08:55 +0200 Subject: gpio: split gpiod board registration into machine header As per example from the regulator subsystem: put all defines and functions related to registering board info for GPIO descriptors into a separate header. Cc: Andrew Victor Cc: Nicolas Ferre Cc: Jean-Christophe Plagniol-Villard Cc: Ralf Baechle Cc: Thierry Reding Acked-by: Stephen Warren Reviewed-by: Alexandre Courbot Signed-off-by: Linus Walleij --- Documentation/gpio/board.txt | 2 +- arch/arm/mach-at91/at91rm9200_devices.c | 2 +- arch/arm/mach-tegra/board-paz00.c | 2 +- arch/mips/jz4740/board-qi_lb60.c | 1 + drivers/gpio/gpiolib.c | 1 + include/linux/gpio/driver.h | 54 ------------------------------ include/linux/gpio/machine.h | 58 +++++++++++++++++++++++++++++++++ 7 files changed, 63 insertions(+), 57 deletions(-) create mode 100644 include/linux/gpio/machine.h (limited to 'include/linux') diff --git a/Documentation/gpio/board.txt b/Documentation/gpio/board.txt index ba169faad5c6..4452786225b8 100644 --- a/Documentation/gpio/board.txt +++ b/Documentation/gpio/board.txt @@ -60,7 +60,7 @@ Platform Data Finally, GPIOs can be bound to devices and functions using platform data. Board files that desire to do so need to include the following header: - #include + #include GPIOs are mapped by the means of tables of lookups, containing instances of the gpiod_lookup structure. Two macros are defined to help declaring such mappings: diff --git a/arch/arm/mach-at91/at91rm9200_devices.c b/arch/arm/mach-at91/at91rm9200_devices.c index 3f4bb58aea54..74f1eaf97801 100644 --- a/arch/arm/mach-at91/at91rm9200_devices.c +++ b/arch/arm/mach-at91/at91rm9200_devices.c @@ -15,7 +15,7 @@ #include #include -#include +#include #include #include diff --git a/arch/arm/mach-tegra/board-paz00.c b/arch/arm/mach-tegra/board-paz00.c index 9c6029ba526f..91fd858ced0d 100644 --- a/arch/arm/mach-tegra/board-paz00.c +++ b/arch/arm/mach-tegra/board-paz00.c @@ -18,7 +18,7 @@ */ #include -#include +#include #include #include "board.h" diff --git a/arch/mips/jz4740/board-qi_lb60.c b/arch/mips/jz4740/board-qi_lb60.c index 088e92a79ae6..c454525e7695 100644 --- a/arch/mips/jz4740/board-qi_lb60.c +++ b/arch/mips/jz4740/board-qi_lb60.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 768f0831db18..18b069e6ba03 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "gpiolib.h" diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 4c463fb0155e..e78a2373e374 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -151,60 +151,6 @@ void gpio_unlock_as_irq(struct gpio_chip *chip, unsigned int offset); struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc); -enum gpio_lookup_flags { - GPIO_ACTIVE_HIGH = (0 << 0), - GPIO_ACTIVE_LOW = (1 << 0), - GPIO_OPEN_DRAIN = (1 << 1), - GPIO_OPEN_SOURCE = (1 << 2), -}; - -/** - * struct gpiod_lookup - lookup table - * @chip_label: name of the chip the GPIO belongs to - * @chip_hwnum: hardware number (i.e. relative to the chip) of the GPIO - * @con_id: name of the GPIO from the device's point of view - * @idx: index of the GPIO in case several GPIOs share the same name - * @flags: mask of GPIO_* values - * - * gpiod_lookup is a lookup table for associating GPIOs to specific devices and - * functions using platform data. - */ -struct gpiod_lookup { - const char *chip_label; - u16 chip_hwnum; - const char *con_id; - unsigned int idx; - enum gpio_lookup_flags flags; -}; - -struct gpiod_lookup_table { - struct list_head list; - const char *dev_id; - struct gpiod_lookup table[]; -}; - -/* - * Simple definition of a single GPIO under a con_id - */ -#define GPIO_LOOKUP(_chip_label, _chip_hwnum, _con_id, _flags) \ - GPIO_LOOKUP_IDX(_chip_label, _chip_hwnum, _con_id, 0, _flags) - -/* - * Use this macro if you need to have several GPIOs under the same con_id. - * Each GPIO needs to use a different index and can be accessed using - * gpiod_get_index() - */ -#define GPIO_LOOKUP_IDX(_chip_label, _chip_hwnum, _con_id, _idx, _flags) \ -{ \ - .chip_label = _chip_label, \ - .chip_hwnum = _chip_hwnum, \ - .con_id = _con_id, \ - .idx = _idx, \ - .flags = _flags, \ -} - -void gpiod_add_lookup_table(struct gpiod_lookup_table *table); - #ifdef CONFIG_GPIOLIB_IRQCHIP void gpiochip_set_chained_irqchip(struct gpio_chip *gpiochip, diff --git a/include/linux/gpio/machine.h b/include/linux/gpio/machine.h new file mode 100644 index 000000000000..b8ad87fab4ce --- /dev/null +++ b/include/linux/gpio/machine.h @@ -0,0 +1,58 @@ +#ifndef __LINUX_GPIO_MACHINE_H +#define __LINUX_GPIO_MACHINE_H + +enum gpio_lookup_flags { + GPIO_ACTIVE_HIGH = (0 << 0), + GPIO_ACTIVE_LOW = (1 << 0), + GPIO_OPEN_DRAIN = (1 << 1), + GPIO_OPEN_SOURCE = (1 << 2), +}; + +/** + * struct gpiod_lookup - lookup table + * @chip_label: name of the chip the GPIO belongs to + * @chip_hwnum: hardware number (i.e. relative to the chip) of the GPIO + * @con_id: name of the GPIO from the device's point of view + * @idx: index of the GPIO in case several GPIOs share the same name + * @flags: mask of GPIO_* values + * + * gpiod_lookup is a lookup table for associating GPIOs to specific devices and + * functions using platform data. + */ +struct gpiod_lookup { + const char *chip_label; + u16 chip_hwnum; + const char *con_id; + unsigned int idx; + enum gpio_lookup_flags flags; +}; + +struct gpiod_lookup_table { + struct list_head list; + const char *dev_id; + struct gpiod_lookup table[]; +}; + +/* + * Simple definition of a single GPIO under a con_id + */ +#define GPIO_LOOKUP(_chip_label, _chip_hwnum, _con_id, _flags) \ + GPIO_LOOKUP_IDX(_chip_label, _chip_hwnum, _con_id, 0, _flags) + +/* + * Use this macro if you need to have several GPIOs under the same con_id. + * Each GPIO needs to use a different index and can be accessed using + * gpiod_get_index() + */ +#define GPIO_LOOKUP_IDX(_chip_label, _chip_hwnum, _con_id, _idx, _flags) \ +{ \ + .chip_label = _chip_label, \ + .chip_hwnum = _chip_hwnum, \ + .con_id = _con_id, \ + .idx = _idx, \ + .flags = _flags, \ +} + +void gpiod_add_lookup_table(struct gpiod_lookup_table *table); + +#endif /* __LINUX_GPIO_MACHINE_H */ -- cgit v1.2.3 From 39b2bbe3d715cf5013b5c48695ccdd25bd3bf120 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Fri, 25 Jul 2014 23:38:36 +0900 Subject: gpio: add flags argument to gpiod_get*() functions The huge majority of GPIOs have their direction and initial value set right after being obtained by one of the gpiod_get() functions. The integer GPIO API had gpio_request_one() that took a convenience flags parameter allowing to specify an direction and value applied to the returned GPIO. This feature greatly simplifies client code and ensures errors are always handled properly. A similar feature has been requested for the gpiod API. Since setting the direction of a GPIO is so often the very next action done after obtaining its descriptor, we prefer to extend the existing functions instead of introducing new functions that would raise the number of gpiod getters to 16 (!). The drawback of this approach is that all gpiod clients need to be updated. To limit the pain, temporary macros are introduced that allow gpiod_get*() to be called with or without the extra flags argument. They will be removed once all consumer code has been updated. Signed-off-by: Alexandre Courbot Reviewed-by: Mark Brown Signed-off-by: Linus Walleij --- Documentation/gpio/consumer.txt | 26 ++++++++++--- drivers/gpio/devres.c | 40 ++++++++++++-------- drivers/gpio/gpiolib.c | 67 +++++++++++++++++++++++----------- include/linux/gpio/consumer.h | 81 +++++++++++++++++++++++++++++++++-------- 4 files changed, 155 insertions(+), 59 deletions(-) (limited to 'include/linux') diff --git a/Documentation/gpio/consumer.txt b/Documentation/gpio/consumer.txt index d8abfc31abbe..76546324e968 100644 --- a/Documentation/gpio/consumer.txt +++ b/Documentation/gpio/consumer.txt @@ -29,13 +29,24 @@ gpiod_get() functions. Like many other kernel subsystems, gpiod_get() takes the device that will use the GPIO and the function the requested GPIO is supposed to fulfill: - struct gpio_desc *gpiod_get(struct device *dev, const char *con_id) + struct gpio_desc *gpiod_get(struct device *dev, const char *con_id, + enum gpiod_flags flags) If a function is implemented by using several GPIOs together (e.g. a simple LED device that displays digits), an additional index argument can be specified: struct gpio_desc *gpiod_get_index(struct device *dev, - const char *con_id, unsigned int idx) + const char *con_id, unsigned int idx, + enum gpiod_flags flags) + +The flags parameter is used to optionally specify a direction and initial value +for the GPIO. Values can be: + +* GPIOD_ASIS or 0 to not initialize the GPIO at all. The direction must be set + later with one of the dedicated functions. +* GPIOD_IN to initialize the GPIO as input. +* GPIOD_OUT_LOW to initialize the GPIO as output with a value of 0. +* GPIOD_OUT_HIGH to initialize the GPIO as output with a value of 1. Both functions return either a valid GPIO descriptor, or an error code checkable with IS_ERR() (they will never return a NULL pointer). -ENOENT will be returned @@ -46,11 +57,13 @@ errors and an absence of GPIO for optional GPIO parameters. Device-managed variants of these functions are also defined: - struct gpio_desc *devm_gpiod_get(struct device *dev, const char *con_id) + struct gpio_desc *devm_gpiod_get(struct device *dev, const char *con_id, + enum gpiod_flags flags) struct gpio_desc *devm_gpiod_get_index(struct device *dev, const char *con_id, - unsigned int idx) + unsigned int idx, + enum gpiod_flags flags) A GPIO descriptor can be disposed of using the gpiod_put() function: @@ -67,8 +80,9 @@ Using GPIOs Setting Direction ----------------- -The first thing a driver must do with a GPIO is setting its direction. This is -done by invoking one of the gpiod_direction_*() functions: +The first thing a driver must do with a GPIO is setting its direction. If no +direction-setting flags have been given to gpiod_get*(), this is done by +invoking one of the gpiod_direction_*() functions: int gpiod_direction_input(struct gpio_desc *desc) int gpiod_direction_output(struct gpio_desc *desc, int value) diff --git a/drivers/gpio/devres.c b/drivers/gpio/devres.c index 65978cf85f79..41b2f40578d5 100644 --- a/drivers/gpio/devres.c +++ b/drivers/gpio/devres.c @@ -39,47 +39,53 @@ static int devm_gpiod_match(struct device *dev, void *res, void *data) * devm_gpiod_get - Resource-managed gpiod_get() * @dev: GPIO consumer * @con_id: function within the GPIO consumer + * @flags: optional GPIO initialization flags * * Managed gpiod_get(). GPIO descriptors returned from this function are * automatically disposed on driver detach. See gpiod_get() for detailed * information about behavior and return values. */ -struct gpio_desc *__must_check devm_gpiod_get(struct device *dev, - const char *con_id) +struct gpio_desc *__must_check __devm_gpiod_get(struct device *dev, + const char *con_id, + enum gpiod_flags flags) { - return devm_gpiod_get_index(dev, con_id, 0); + return devm_gpiod_get_index(dev, con_id, 0, flags); } -EXPORT_SYMBOL(devm_gpiod_get); +EXPORT_SYMBOL(__devm_gpiod_get); /** * devm_gpiod_get_optional - Resource-managed gpiod_get_optional() * @dev: GPIO consumer * @con_id: function within the GPIO consumer + * @flags: optional GPIO initialization flags * * Managed gpiod_get_optional(). GPIO descriptors returned from this function * are automatically disposed on driver detach. See gpiod_get_optional() for * detailed information about behavior and return values. */ -struct gpio_desc *__must_check devm_gpiod_get_optional(struct device *dev, - const char *con_id) +struct gpio_desc *__must_check __devm_gpiod_get_optional(struct device *dev, + const char *con_id, + enum gpiod_flags flags) { - return devm_gpiod_get_index_optional(dev, con_id, 0); + return devm_gpiod_get_index_optional(dev, con_id, 0, flags); } -EXPORT_SYMBOL(devm_gpiod_get_optional); +EXPORT_SYMBOL(__devm_gpiod_get_optional); /** * devm_gpiod_get_index - Resource-managed gpiod_get_index() * @dev: GPIO consumer * @con_id: function within the GPIO consumer * @idx: index of the GPIO to obtain in the consumer + * @flags: optional GPIO initialization flags * * Managed gpiod_get_index(). GPIO descriptors returned from this function are * automatically disposed on driver detach. See gpiod_get_index() for detailed * information about behavior and return values. */ -struct gpio_desc *__must_check devm_gpiod_get_index(struct device *dev, +struct gpio_desc *__must_check __devm_gpiod_get_index(struct device *dev, const char *con_id, - unsigned int idx) + unsigned int idx, + enum gpiod_flags flags) { struct gpio_desc **dr; struct gpio_desc *desc; @@ -89,7 +95,7 @@ struct gpio_desc *__must_check devm_gpiod_get_index(struct device *dev, if (!dr) return ERR_PTR(-ENOMEM); - desc = gpiod_get_index(dev, con_id, idx); + desc = gpiod_get_index(dev, con_id, idx, flags); if (IS_ERR(desc)) { devres_free(dr); return desc; @@ -100,26 +106,28 @@ struct gpio_desc *__must_check devm_gpiod_get_index(struct device *dev, return desc; } -EXPORT_SYMBOL(devm_gpiod_get_index); +EXPORT_SYMBOL(__devm_gpiod_get_index); /** * devm_gpiod_get_index_optional - Resource-managed gpiod_get_index_optional() * @dev: GPIO consumer * @con_id: function within the GPIO consumer * @index: index of the GPIO to obtain in the consumer + * @flags: optional GPIO initialization flags * * Managed gpiod_get_index_optional(). GPIO descriptors returned from this * function are automatically disposed on driver detach. See * gpiod_get_index_optional() for detailed information about behavior and * return values. */ -struct gpio_desc *__must_check devm_gpiod_get_index_optional(struct device *dev, +struct gpio_desc *__must_check __devm_gpiod_get_index_optional(struct device *dev, const char *con_id, - unsigned int index) + unsigned int index, + enum gpiod_flags flags) { struct gpio_desc *desc; - desc = devm_gpiod_get_index(dev, con_id, index); + desc = devm_gpiod_get_index(dev, con_id, index, flags); if (IS_ERR(desc)) { if (PTR_ERR(desc) == -ENOENT) return NULL; @@ -127,7 +135,7 @@ struct gpio_desc *__must_check devm_gpiod_get_index_optional(struct device *dev, return desc; } -EXPORT_SYMBOL(devm_gpiod_get_index_optional); +EXPORT_SYMBOL(__devm_gpiod_get_index_optional); /** * devm_gpiod_put - Resource-managed gpiod_put() diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 330227581a25..15cc0bb65dda 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1582,38 +1582,43 @@ static struct gpio_desc *gpiod_find(struct device *dev, const char *con_id, * gpiod_get - obtain a GPIO for a given GPIO function * @dev: GPIO consumer, can be NULL for system-global GPIOs * @con_id: function within the GPIO consumer + * @flags: optional GPIO initialization flags * * Return the GPIO descriptor corresponding to the function con_id of device * dev, -ENOENT if no GPIO has been assigned to the requested function, or * another IS_ERR() code if an error occured while trying to acquire the GPIO. */ -struct gpio_desc *__must_check gpiod_get(struct device *dev, const char *con_id) +struct gpio_desc *__must_check __gpiod_get(struct device *dev, const char *con_id, + enum gpiod_flags flags) { - return gpiod_get_index(dev, con_id, 0); + return gpiod_get_index(dev, con_id, 0, flags); } -EXPORT_SYMBOL_GPL(gpiod_get); +EXPORT_SYMBOL_GPL(__gpiod_get); /** * gpiod_get_optional - obtain an optional GPIO for a given GPIO function * @dev: GPIO consumer, can be NULL for system-global GPIOs * @con_id: function within the GPIO consumer + * @flags: optional GPIO initialization flags * * This is equivalent to gpiod_get(), except that when no GPIO was assigned to * the requested function it will return NULL. This is convenient for drivers * that need to handle optional GPIOs. */ -struct gpio_desc *__must_check gpiod_get_optional(struct device *dev, - const char *con_id) +struct gpio_desc *__must_check __gpiod_get_optional(struct device *dev, + const char *con_id, + enum gpiod_flags flags) { - return gpiod_get_index_optional(dev, con_id, 0); + return gpiod_get_index_optional(dev, con_id, 0, flags); } -EXPORT_SYMBOL_GPL(gpiod_get_optional); +EXPORT_SYMBOL_GPL(__gpiod_get_optional); /** * gpiod_get_index - obtain a GPIO from a multi-index GPIO function * @dev: GPIO consumer, can be NULL for system-global GPIOs * @con_id: function within the GPIO consumer * @idx: index of the GPIO to obtain in the consumer + * @flags: optional GPIO initialization flags * * This variant of gpiod_get() allows to access GPIOs other than the first * defined one for functions that define several GPIOs. @@ -1622,23 +1627,24 @@ EXPORT_SYMBOL_GPL(gpiod_get_optional); * requested function and/or index, or another IS_ERR() code if an error * occured while trying to acquire the GPIO. */ -struct gpio_desc *__must_check gpiod_get_index(struct device *dev, +struct gpio_desc *__must_check __gpiod_get_index(struct device *dev, const char *con_id, - unsigned int idx) + unsigned int idx, + enum gpiod_flags flags) { struct gpio_desc *desc = NULL; int status; - enum gpio_lookup_flags flags = 0; + enum gpio_lookup_flags lookupflags = 0; dev_dbg(dev, "GPIO lookup for consumer %s\n", con_id); /* Using device tree? */ if (IS_ENABLED(CONFIG_OF) && dev && dev->of_node) { dev_dbg(dev, "using device tree for GPIO lookup\n"); - desc = of_find_gpio(dev, con_id, idx, &flags); + desc = of_find_gpio(dev, con_id, idx, &lookupflags); } else if (IS_ENABLED(CONFIG_ACPI) && dev && ACPI_HANDLE(dev)) { dev_dbg(dev, "using ACPI for GPIO lookup\n"); - desc = acpi_find_gpio(dev, con_id, idx, &flags); + desc = acpi_find_gpio(dev, con_id, idx, &lookupflags); } /* @@ -1647,7 +1653,7 @@ struct gpio_desc *__must_check gpiod_get_index(struct device *dev, */ if (!desc || desc == ERR_PTR(-ENOENT)) { dev_dbg(dev, "using lookup tables for GPIO lookup"); - desc = gpiod_find(dev, con_id, idx, &flags); + desc = gpiod_find(dev, con_id, idx, &lookupflags); } if (IS_ERR(desc)) { @@ -1660,16 +1666,33 @@ struct gpio_desc *__must_check gpiod_get_index(struct device *dev, if (status < 0) return ERR_PTR(status); - if (flags & GPIO_ACTIVE_LOW) + if (lookupflags & GPIO_ACTIVE_LOW) set_bit(FLAG_ACTIVE_LOW, &desc->flags); - if (flags & GPIO_OPEN_DRAIN) + if (lookupflags & GPIO_OPEN_DRAIN) set_bit(FLAG_OPEN_DRAIN, &desc->flags); - if (flags & GPIO_OPEN_SOURCE) + if (lookupflags & GPIO_OPEN_SOURCE) set_bit(FLAG_OPEN_SOURCE, &desc->flags); + /* No particular flag request, return here... */ + if (flags & GPIOD_FLAGS_BIT_DIR_SET) + return desc; + + /* Process flags */ + if (flags & GPIOD_FLAGS_BIT_DIR_OUT) + status = gpiod_direction_output(desc, + flags & GPIOD_FLAGS_BIT_DIR_VAL); + else + status = gpiod_direction_input(desc); + + if (status < 0) { + dev_dbg(dev, "setup of GPIO %s failed\n", con_id); + gpiod_put(desc); + return ERR_PTR(status); + } + return desc; } -EXPORT_SYMBOL_GPL(gpiod_get_index); +EXPORT_SYMBOL_GPL(__gpiod_get_index); /** * gpiod_get_index_optional - obtain an optional GPIO from a multi-index GPIO @@ -1677,18 +1700,20 @@ EXPORT_SYMBOL_GPL(gpiod_get_index); * @dev: GPIO consumer, can be NULL for system-global GPIOs * @con_id: function within the GPIO consumer * @index: index of the GPIO to obtain in the consumer + * @flags: optional GPIO initialization flags * * This is equivalent to gpiod_get_index(), except that when no GPIO with the * specified index was assigned to the requested function it will return NULL. * This is convenient for drivers that need to handle optional GPIOs. */ -struct gpio_desc *__must_check gpiod_get_index_optional(struct device *dev, +struct gpio_desc *__must_check __gpiod_get_index_optional(struct device *dev, const char *con_id, - unsigned int index) + unsigned int index, + enum gpiod_flags flags) { struct gpio_desc *desc; - desc = gpiod_get_index(dev, con_id, index); + desc = gpiod_get_index(dev, con_id, index, flags); if (IS_ERR(desc)) { if (PTR_ERR(desc) == -ENOENT) return NULL; @@ -1696,7 +1721,7 @@ struct gpio_desc *__must_check gpiod_get_index_optional(struct device *dev, return desc; } -EXPORT_SYMBOL_GPL(gpiod_get_index_optional); +EXPORT_SYMBOL_GPL(__gpiod_get_index_optional); /** * gpiod_put - dispose of a GPIO descriptor diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index 05e53ccb708b..b7ce0c64c6f3 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -18,30 +18,79 @@ struct gpio_desc; #ifdef CONFIG_GPIOLIB +#define GPIOD_FLAGS_BIT_DIR_SET BIT(0) +#define GPIOD_FLAGS_BIT_DIR_OUT BIT(1) +#define GPIOD_FLAGS_BIT_DIR_VAL BIT(2) + +/** + * Optional flags that can be passed to one of gpiod_* to configure direction + * and output value. These values cannot be OR'd. + */ +enum gpiod_flags { + GPIOD_ASIS = 0, + GPIOD_IN = GPIOD_FLAGS_BIT_DIR_SET, + GPIOD_OUT_LOW = GPIOD_FLAGS_BIT_DIR_SET | GPIOD_FLAGS_BIT_DIR_OUT, + GPIOD_OUT_HIGH = GPIOD_FLAGS_BIT_DIR_SET | GPIOD_FLAGS_BIT_DIR_OUT | + GPIOD_FLAGS_BIT_DIR_VAL, +}; + /* Acquire and dispose GPIOs */ -struct gpio_desc *__must_check gpiod_get(struct device *dev, - const char *con_id); -struct gpio_desc *__must_check gpiod_get_index(struct device *dev, +struct gpio_desc *__must_check __gpiod_get(struct device *dev, + const char *con_id, + enum gpiod_flags flags); +#define __gpiod_get(dev, con_id, flags, ...) __gpiod_get(dev, con_id, flags) +#define gpiod_get(varargs...) __gpiod_get(varargs, 0) +struct gpio_desc *__must_check __gpiod_get_index(struct device *dev, const char *con_id, - unsigned int idx); -struct gpio_desc *__must_check gpiod_get_optional(struct device *dev, - const char *con_id); -struct gpio_desc *__must_check gpiod_get_index_optional(struct device *dev, + unsigned int idx, + enum gpiod_flags flags); +#define __gpiod_get_index(dev, con_id, index, flags, ...) \ + __gpiod_get_index(dev, con_id, index, flags) +#define gpiod_get_index(varargs...) __gpiod_get_index(varargs, 0) +struct gpio_desc *__must_check __gpiod_get_optional(struct device *dev, + const char *con_id, + enum gpiod_flags flags); +#define __gpiod_get_optional(dev, con_id, flags, ...) \ + __gpiod_get_optional(dev, con_id, flags) +#define gpiod_get_optional(varargs...) __gpiod_get_optional(varargs, 0) +struct gpio_desc *__must_check __gpiod_get_index_optional(struct device *dev, const char *con_id, - unsigned int index); + unsigned int index, + enum gpiod_flags flags); +#define __gpiod_get_index_optional(dev, con_id, index, flags, ...) \ + __gpiod_get_index_optional(dev, con_id, index, flags) +#define gpiod_get_index_optional(varargs...) \ + __gpiod_get_index_optional(varargs, 0) void gpiod_put(struct gpio_desc *desc); -struct gpio_desc *__must_check devm_gpiod_get(struct device *dev, - const char *con_id); -struct gpio_desc *__must_check devm_gpiod_get_index(struct device *dev, +struct gpio_desc *__must_check __devm_gpiod_get(struct device *dev, + const char *con_id, + enum gpiod_flags flags); +#define __devm_gpiod_get(dev, con_id, flags, ...) \ + __devm_gpiod_get(dev, con_id, flags) +#define devm_gpiod_get(varargs...) __devm_gpiod_get(varargs, 0) +struct gpio_desc *__must_check __devm_gpiod_get_index(struct device *dev, const char *con_id, - unsigned int idx); -struct gpio_desc *__must_check devm_gpiod_get_optional(struct device *dev, - const char *con_id); + unsigned int idx, + enum gpiod_flags flags); +#define __devm_gpiod_get_index(dev, con_id, index, flags, ...) \ + __devm_gpiod_get_index(dev, con_id, index, flags) +#define devm_gpiod_get_index(varargs...) __devm_gpiod_get_index(varargs, 0) +struct gpio_desc *__must_check __devm_gpiod_get_optional(struct device *dev, + const char *con_id, + enum gpiod_flags flags); +#define __devm_gpiod_get_optional(dev, con_id, flags, ...) \ + __devm_gpiod_get_optional(dev, con_id, flags) +#define devm_gpiod_get_optional(varargs...) \ + __devm_gpiod_get_optional(varargs, 0) struct gpio_desc *__must_check -devm_gpiod_get_index_optional(struct device *dev, const char *con_id, - unsigned int index); +__devm_gpiod_get_index_optional(struct device *dev, const char *con_id, + unsigned int index, enum gpiod_flags flags); +#define __devm_gpiod_get_index_optional(dev, con_id, index, flags, ...) \ + __devm_gpiod_get_index_optional(dev, con_id, index, flags) +#define devm_gpiod_get_index_optional(varargs...) \ + __devm_gpiod_get_index_optional(varargs, 0) void devm_gpiod_put(struct device *dev, struct gpio_desc *desc); -- cgit v1.2.3 From eb3fe7def66511120766c8fc05ee9631cce7fe6f Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 8 Jul 2014 13:46:37 +0300 Subject: ARM: edma: Add edma_assign_channel_eventq() to move channel to a give queue In some cases it is desired to move a channel to a specific event queue. Such a use case is audio, where it is preferred that it is served with highest priority compared to other DMA clients. Signed-off-by: Peter Ujfalusi Acked-by: Sekhar Nori Signed-off-by: Vinod Koul --- arch/arm/common/edma.c | 28 ++++++++++++++++++++++++++++ include/linux/platform_data/edma.h | 2 ++ 2 files changed, 30 insertions(+) (limited to 'include/linux') diff --git a/arch/arm/common/edma.c b/arch/arm/common/edma.c index f834aae7720f..88099175fc56 100644 --- a/arch/arm/common/edma.c +++ b/arch/arm/common/edma.c @@ -1414,6 +1414,34 @@ void edma_clear_event(unsigned channel) } EXPORT_SYMBOL(edma_clear_event); +/* + * edma_assign_channel_eventq - move given channel to desired eventq + * Arguments: + * channel - channel number + * eventq_no - queue to move the channel + * + * Can be used to move a channel to a selected event queue. + */ +void edma_assign_channel_eventq(unsigned channel, enum dma_event_q eventq_no) +{ + unsigned ctlr; + + ctlr = EDMA_CTLR(channel); + channel = EDMA_CHAN_SLOT(channel); + + if (channel >= edma_cc[ctlr]->num_channels) + return; + + /* default to low priority queue */ + if (eventq_no == EVENTQ_DEFAULT) + eventq_no = edma_cc[ctlr]->default_queue; + if (eventq_no >= edma_cc[ctlr]->num_tc) + return; + + map_dmach_queue(ctlr, channel, eventq_no); +} +EXPORT_SYMBOL(edma_assign_channel_eventq); + static int edma_setup_from_hw(struct device *dev, struct edma_soc_info *pdata, struct edma *edma_cc) { diff --git a/include/linux/platform_data/edma.h b/include/linux/platform_data/edma.h index eb8d5627d080..bdb2710e2aab 100644 --- a/include/linux/platform_data/edma.h +++ b/include/linux/platform_data/edma.h @@ -150,6 +150,8 @@ void edma_clear_event(unsigned channel); void edma_pause(unsigned channel); void edma_resume(unsigned channel); +void edma_assign_channel_eventq(unsigned channel, enum dma_event_q eventq_no); + struct edma_rsv_info { const s16 (*rsv_chans)[2]; -- cgit v1.2.3 From 784aa3d7fb6f729c06d5836c9d9569f58e4d05ae Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 14 Jul 2014 18:27:35 +0200 Subject: KVM: Rename and add argument to check_extension In preparation to make the check_extension function available to VM scope we add a struct kvm * argument to the function header and rename the function accordingly. It will still be called from the /dev/kvm fd, but with a NULL argument for struct kvm *. Signed-off-by: Alexander Graf Acked-by: Paolo Bonzini --- arch/arm/kvm/arm.c | 2 +- arch/ia64/kvm/kvm-ia64.c | 2 +- arch/mips/kvm/mips.c | 2 +- arch/powerpc/kvm/powerpc.c | 2 +- arch/s390/kvm/kvm-s390.c | 2 +- arch/x86/kvm/x86.c | 2 +- include/linux/kvm_host.h | 2 +- virt/kvm/kvm_main.c | 6 +++--- 8 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 3c82b37c0f9e..cb77f999badd 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -184,7 +184,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) } } -int kvm_dev_ioctl_check_extension(long ext) +int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) { int r; switch (ext) { diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 6a4309bb821a..0729ba6acddf 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -190,7 +190,7 @@ void kvm_arch_check_processor_compat(void *rtn) *(int *)rtn = 0; } -int kvm_dev_ioctl_check_extension(long ext) +int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) { int r; diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index d687c6e3258d..3ca79aa011df 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -885,7 +885,7 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) return VM_FAULT_SIGBUS; } -int kvm_dev_ioctl_check_extension(long ext) +int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) { int r; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 8e0356835960..d870bacc2f75 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -391,7 +391,7 @@ void kvm_arch_sync_events(struct kvm *kvm) { } -int kvm_dev_ioctl_check_extension(long ext) +int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) { int r; /* FIXME!! diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 2f3e14fe91a4..00268cacdf4c 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -146,7 +146,7 @@ long kvm_arch_dev_ioctl(struct file *filp, return -EINVAL; } -int kvm_dev_ioctl_check_extension(long ext) +int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) { int r; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5a8691b0ed76..5a62d91c96e7 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2616,7 +2616,7 @@ out: return r; } -int kvm_dev_ioctl_check_extension(long ext) +int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) { int r; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ec4e3bd83d47..5065b953e6e8 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -602,7 +602,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf); -int kvm_dev_ioctl_check_extension(long ext); +int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext); int kvm_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log, int *is_dirty); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 4b6c01b477f9..e28f3caa539d 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2571,7 +2571,7 @@ static int kvm_dev_ioctl_create_vm(unsigned long type) return r; } -static long kvm_dev_ioctl_check_extension_generic(long arg) +static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) { switch (arg) { case KVM_CAP_USER_MEMORY: @@ -2595,7 +2595,7 @@ static long kvm_dev_ioctl_check_extension_generic(long arg) default: break; } - return kvm_dev_ioctl_check_extension(arg); + return kvm_vm_ioctl_check_extension(kvm, arg); } static long kvm_dev_ioctl(struct file *filp, @@ -2614,7 +2614,7 @@ static long kvm_dev_ioctl(struct file *filp, r = kvm_dev_ioctl_create_vm(arg); break; case KVM_CHECK_EXTENSION: - r = kvm_dev_ioctl_check_extension_generic(arg); + r = kvm_vm_ioctl_check_extension_generic(NULL, arg); break; case KVM_GET_VCPU_MMAP_SIZE: r = -EINVAL; -- cgit v1.2.3 From 36874c7e219fa080141d49fd7bb9bbbdad0507c5 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Mon, 28 Jul 2014 10:01:07 -0700 Subject: Input: pixcir_i2c_ts - support up to 5 fingers and hardware tracking IDs Some variants of the Pixcir touch controller support up to 5 simultaneous fingers and hardware tracking IDs. Prepare the driver for that. Signed-off-by: Roger Quadros Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/pixcir_i2c_ts.c | 74 ++++++++++++++++++++++++------- include/linux/input/pixcir_ts.h | 12 +++++ 2 files changed, 69 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/drivers/input/touchscreen/pixcir_i2c_ts.c b/drivers/input/touchscreen/pixcir_i2c_ts.c index 0b016814e6b6..eddda520a1a7 100644 --- a/drivers/input/touchscreen/pixcir_i2c_ts.c +++ b/drivers/input/touchscreen/pixcir_i2c_ts.c @@ -27,18 +27,20 @@ #include #include -#define PIXCIR_MAX_SLOTS 2 +#define PIXCIR_MAX_SLOTS 5 /* Max fingers supported by driver */ struct pixcir_i2c_ts_data { struct i2c_client *client; struct input_dev *input; - const struct pixcir_ts_platform_data *chip; + const struct pixcir_ts_platform_data *pdata; bool running; + int max_fingers; /* Max fingers supported in this instance */ }; struct pixcir_touch { int x; int y; + int id; }; struct pixcir_report_data { @@ -49,13 +51,21 @@ struct pixcir_report_data { static void pixcir_ts_parse(struct pixcir_i2c_ts_data *tsdata, struct pixcir_report_data *report) { - u8 rdbuf[10], wrbuf[1] = { 0 }; + u8 rdbuf[2 + PIXCIR_MAX_SLOTS * 5]; + u8 wrbuf[1] = { 0 }; u8 *bufptr; u8 touch; int ret, i; + int readsize; + const struct pixcir_i2c_chip_data *chip = &tsdata->pdata->chip; memset(report, 0, sizeof(struct pixcir_report_data)); + i = chip->has_hw_ids ? 1 : 0; + readsize = 2 + tsdata->max_fingers * (4 + i); + if (readsize > sizeof(rdbuf)) + readsize = sizeof(rdbuf); + ret = i2c_master_send(tsdata->client, wrbuf, sizeof(wrbuf)); if (ret != sizeof(wrbuf)) { dev_err(&tsdata->client->dev, @@ -64,7 +74,7 @@ static void pixcir_ts_parse(struct pixcir_i2c_ts_data *tsdata, return; } - ret = i2c_master_recv(tsdata->client, rdbuf, sizeof(rdbuf)); + ret = i2c_master_recv(tsdata->client, rdbuf, readsize); if (ret != sizeof(rdbuf)) { dev_err(&tsdata->client->dev, "%s: i2c_master_recv failed(), ret=%d\n", @@ -73,8 +83,8 @@ static void pixcir_ts_parse(struct pixcir_i2c_ts_data *tsdata, } touch = rdbuf[0] & 0x7; - if (touch > PIXCIR_MAX_SLOTS) - touch = PIXCIR_MAX_SLOTS; + if (touch > tsdata->max_fingers) + touch = tsdata->max_fingers; report->num_touches = touch; bufptr = &rdbuf[2]; @@ -83,7 +93,12 @@ static void pixcir_ts_parse(struct pixcir_i2c_ts_data *tsdata, report->touches[i].x = (bufptr[1] << 8) | bufptr[0]; report->touches[i].y = (bufptr[3] << 8) | bufptr[2]; - bufptr = bufptr + 4; + if (chip->has_hw_ids) { + report->touches[i].id = bufptr[4]; + bufptr = bufptr + 5; + } else { + bufptr = bufptr + 4; + } } } @@ -95,22 +110,35 @@ static void pixcir_ts_report(struct pixcir_i2c_ts_data *ts, struct pixcir_touch *touch; int n, i, slot; struct device *dev = &ts->client->dev; + const struct pixcir_i2c_chip_data *chip = &ts->pdata->chip; n = report->num_touches; if (n > PIXCIR_MAX_SLOTS) n = PIXCIR_MAX_SLOTS; - for (i = 0; i < n; i++) { - touch = &report->touches[i]; - pos[i].x = touch->x; - pos[i].y = touch->y; - } + if (!chip->has_hw_ids) { + for (i = 0; i < n; i++) { + touch = &report->touches[i]; + pos[i].x = touch->x; + pos[i].y = touch->y; + } - input_mt_assign_slots(ts->input, slots, pos, n); + input_mt_assign_slots(ts->input, slots, pos, n); + } for (i = 0; i < n; i++) { touch = &report->touches[i]; - slot = slots[i]; + + if (chip->has_hw_ids) { + slot = input_mt_get_slot_by_key(ts->input, touch->id); + if (slot < 0) { + dev_dbg(dev, "no free slot for id 0x%x\n", + touch->id); + continue; + } + } else { + slot = slots[i]; + } input_mt_slot(ts->input, slot); input_mt_report_slot_state(ts->input, @@ -130,7 +158,7 @@ static void pixcir_ts_report(struct pixcir_i2c_ts_data *ts, static irqreturn_t pixcir_ts_isr(int irq, void *dev_id) { struct pixcir_i2c_ts_data *tsdata = dev_id; - const struct pixcir_ts_platform_data *pdata = tsdata->chip; + const struct pixcir_ts_platform_data *pdata = tsdata->pdata; struct pixcir_report_data report; while (tsdata->running) { @@ -399,6 +427,11 @@ static int pixcir_i2c_ts_probe(struct i2c_client *client, return -EINVAL; } + if (!pdata->chip.max_fingers) { + dev_err(dev, "Invalid max_fingers in pdata\n"); + return -EINVAL; + } + tsdata = devm_kzalloc(dev, sizeof(*tsdata), GFP_KERNEL); if (!tsdata) return -ENOMEM; @@ -411,7 +444,7 @@ static int pixcir_i2c_ts_probe(struct i2c_client *client, tsdata->client = client; tsdata->input = input; - tsdata->chip = pdata; + tsdata->pdata = pdata; input->name = client->name; input->id.bustype = BUS_I2C; @@ -427,7 +460,14 @@ static int pixcir_i2c_ts_probe(struct i2c_client *client, input_set_abs_params(input, ABS_MT_POSITION_X, 0, pdata->x_max, 0, 0); input_set_abs_params(input, ABS_MT_POSITION_Y, 0, pdata->y_max, 0, 0); - error = input_mt_init_slots(input, PIXCIR_MAX_SLOTS, + tsdata->max_fingers = tsdata->pdata->chip.max_fingers; + if (tsdata->max_fingers > PIXCIR_MAX_SLOTS) { + tsdata->max_fingers = PIXCIR_MAX_SLOTS; + dev_info(dev, "Limiting maximum fingers to %d\n", + tsdata->max_fingers); + } + + error = input_mt_init_slots(input, tsdata->max_fingers, INPUT_MT_DIRECT | INPUT_MT_DROP_UNUSED); if (error) { dev_err(dev, "Error initializing Multi-Touch slots\n"); diff --git a/include/linux/input/pixcir_ts.h b/include/linux/input/pixcir_ts.h index 160cf353aa39..7bae83b7c396 100644 --- a/include/linux/input/pixcir_ts.h +++ b/include/linux/input/pixcir_ts.h @@ -43,10 +43,22 @@ enum pixcir_int_mode { #define PIXCIR_INT_ENABLE (1UL << 3) #define PIXCIR_INT_POL_HIGH (1UL << 2) +/** + * struct pixcir_irc_chip_data - chip related data + * @max_fingers: Max number of fingers reported simultaneously by h/w + * @has_hw_ids: Hardware supports finger tracking IDs + * + */ +struct pixcir_i2c_chip_data { + u8 max_fingers; + bool has_hw_ids; +}; + struct pixcir_ts_platform_data { int x_max; int y_max; int gpio_attb; /* GPIO connected to ATTB line */ + struct pixcir_i2c_chip_data chip; }; #endif -- cgit v1.2.3 From 518776800c094a518ae6d303660b57f1400eb1eb Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 24 Jul 2014 23:59:33 -0400 Subject: SUNRPC: Allow svc_reserve() to notify TCP socket that space has been freed Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_xprt.h | 1 + net/sunrpc/svc_xprt.c | 2 ++ net/sunrpc/svcsock.c | 9 +++++++++ 3 files changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 5d9d6f84b382..ce6e4182a5b2 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -25,6 +25,7 @@ struct svc_xprt_ops { void (*xpo_detach)(struct svc_xprt *); void (*xpo_free)(struct svc_xprt *); int (*xpo_secure_port)(struct svc_rqst *); + void (*xpo_adjust_wspace)(struct svc_xprt *); }; struct svc_xprt_class { diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 9cfa391e2bd0..6666c6745858 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -448,6 +448,8 @@ void svc_reserve(struct svc_rqst *rqstp, int space) atomic_sub((rqstp->rq_reserved - space), &xprt->xpt_reserved); rqstp->rq_reserved = space; + if (xprt->xpt_ops->xpo_adjust_wspace) + xprt->xpt_ops->xpo_adjust_wspace(xprt); svc_xprt_enqueue(xprt); } } diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 88db211d4264..c24a8ff33f8f 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -475,6 +475,14 @@ static void svc_tcp_write_space(struct sock *sk) svc_write_space(sk); } +static void svc_tcp_adjust_wspace(struct svc_xprt *xprt) +{ + struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); + + if (svc_tcp_has_wspace(xprt)) + clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); +} + /* * See net/ipv6/ip_sockglue.c : ip_cmsg_recv_pktinfo */ @@ -1289,6 +1297,7 @@ static struct svc_xprt_ops svc_tcp_ops = { .xpo_has_wspace = svc_tcp_has_wspace, .xpo_accept = svc_tcp_accept, .xpo_secure_port = svc_sock_secure_port, + .xpo_adjust_wspace = svc_tcp_adjust_wspace, }; static struct svc_xprt_class svc_tcp_class = { -- cgit v1.2.3 From 728dba3a39c66b3d8ac889ddbe38b5b1c264aec3 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 3 Feb 2014 19:13:49 -0800 Subject: namespaces: Use task_lock and not rcu to protect nsproxy The synchronous syncrhonize_rcu in switch_task_namespaces makes setns a sufficiently expensive system call that people have complained. Upon inspect nsproxy no longer needs rcu protection for remote reads. remote reads are rare. So optimize for same process reads and write by switching using rask_lock instead. This yields a simpler to understand lock, and a faster setns system call. In particular this fixes a performance regression observed by Rafael David Tinoco . This is effectively a revert of Pavel Emelyanov's commit cf7b708c8d1d7a27736771bcf4c457b332b0f818 Make access to task's nsproxy lighter from 2007. The race this originialy fixed no longer exists as do_notify_parent uses task_active_pid_ns(parent) instead of parent->nsproxy. Signed-off-by: "Eric W. Biederman" --- fs/namespace.c | 6 +++--- fs/proc/proc_net.c | 4 +++- fs/proc_namespace.c | 8 +++----- include/linux/nsproxy.h | 16 ++++++---------- ipc/namespace.c | 6 +++--- kernel/nsproxy.c | 15 ++++----------- kernel/utsname.c | 6 +++--- net/core/net_namespace.c | 10 ++++++---- 8 files changed, 31 insertions(+), 40 deletions(-) (limited to 'include/linux') diff --git a/fs/namespace.c b/fs/namespace.c index 182bc41cd887..7187d01329c3 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2972,13 +2972,13 @@ static void *mntns_get(struct task_struct *task) struct mnt_namespace *ns = NULL; struct nsproxy *nsproxy; - rcu_read_lock(); - nsproxy = task_nsproxy(task); + task_lock(task); + nsproxy = task->nsproxy; if (nsproxy) { ns = nsproxy->mnt_ns; get_mnt_ns(ns); } - rcu_read_unlock(); + task_unlock(task); return ns; } diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index 4677bb7dc7c2..a63af3e0a612 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -113,9 +113,11 @@ static struct net *get_proc_task_net(struct inode *dir) rcu_read_lock(); task = pid_task(proc_pid(dir), PIDTYPE_PID); if (task != NULL) { - ns = task_nsproxy(task); + task_lock(task); + ns = task->nsproxy; if (ns != NULL) net = get_net(ns->net_ns); + task_unlock(task); } rcu_read_unlock(); diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c index 1a81373947f3..73ca1740d839 100644 --- a/fs/proc_namespace.c +++ b/fs/proc_namespace.c @@ -232,17 +232,15 @@ static int mounts_open_common(struct inode *inode, struct file *file, if (!task) goto err; - rcu_read_lock(); - nsp = task_nsproxy(task); + task_lock(task); + nsp = task->nsproxy; if (!nsp || !nsp->mnt_ns) { - rcu_read_unlock(); + task_unlock(task); put_task_struct(task); goto err; } ns = nsp->mnt_ns; get_mnt_ns(ns); - rcu_read_unlock(); - task_lock(task); if (!task->fs) { task_unlock(task); put_task_struct(task); diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h index b4ec59d159ac..35fa08fd7739 100644 --- a/include/linux/nsproxy.h +++ b/include/linux/nsproxy.h @@ -40,32 +40,28 @@ extern struct nsproxy init_nsproxy; * the namespaces access rules are: * * 1. only current task is allowed to change tsk->nsproxy pointer or - * any pointer on the nsproxy itself + * any pointer on the nsproxy itself. Current must hold the task_lock + * when changing tsk->nsproxy. * * 2. when accessing (i.e. reading) current task's namespaces - no * precautions should be taken - just dereference the pointers * * 3. the access to other task namespaces is performed like this - * rcu_read_lock(); - * nsproxy = task_nsproxy(tsk); + * task_lock(task); + * nsproxy = task->nsproxy; * if (nsproxy != NULL) { * / * * * work with the namespaces here * * e.g. get the reference on one of them * * / * } / * - * * NULL task_nsproxy() means that this task is + * * NULL task->nsproxy means that this task is * * almost dead (zombie) * * / - * rcu_read_unlock(); + * task_unlock(task); * */ -static inline struct nsproxy *task_nsproxy(struct task_struct *tsk) -{ - return rcu_dereference(tsk->nsproxy); -} - int copy_namespaces(unsigned long flags, struct task_struct *tsk); void exit_task_namespaces(struct task_struct *tsk); void switch_task_namespaces(struct task_struct *tsk, struct nsproxy *new); diff --git a/ipc/namespace.c b/ipc/namespace.c index 59451c1e214d..b54468e48e32 100644 --- a/ipc/namespace.c +++ b/ipc/namespace.c @@ -154,11 +154,11 @@ static void *ipcns_get(struct task_struct *task) struct ipc_namespace *ns = NULL; struct nsproxy *nsproxy; - rcu_read_lock(); - nsproxy = task_nsproxy(task); + task_lock(task); + nsproxy = task->nsproxy; if (nsproxy) ns = get_ipc_ns(nsproxy->ipc_ns); - rcu_read_unlock(); + task_unlock(task); return ns; } diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index 8e7811086b82..ef42d0ab3115 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -204,20 +204,13 @@ void switch_task_namespaces(struct task_struct *p, struct nsproxy *new) might_sleep(); + task_lock(p); ns = p->nsproxy; + p->nsproxy = new; + task_unlock(p); - rcu_assign_pointer(p->nsproxy, new); - - if (ns && atomic_dec_and_test(&ns->count)) { - /* - * wait for others to get what they want from this nsproxy. - * - * cannot release this nsproxy via the call_rcu() since - * put_mnt_ns() will want to sleep - */ - synchronize_rcu(); + if (ns && atomic_dec_and_test(&ns->count)) free_nsproxy(ns); - } } void exit_task_namespaces(struct task_struct *p) diff --git a/kernel/utsname.c b/kernel/utsname.c index fd393124e507..883aaaa7de8a 100644 --- a/kernel/utsname.c +++ b/kernel/utsname.c @@ -93,13 +93,13 @@ static void *utsns_get(struct task_struct *task) struct uts_namespace *ns = NULL; struct nsproxy *nsproxy; - rcu_read_lock(); - nsproxy = task_nsproxy(task); + task_lock(task); + nsproxy = task->nsproxy; if (nsproxy) { ns = nsproxy->uts_ns; get_uts_ns(ns); } - rcu_read_unlock(); + task_unlock(task); return ns; } diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 85b62691f4f2..7c6b51a58968 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -373,9 +373,11 @@ struct net *get_net_ns_by_pid(pid_t pid) tsk = find_task_by_vpid(pid); if (tsk) { struct nsproxy *nsproxy; - nsproxy = task_nsproxy(tsk); + task_lock(tsk); + nsproxy = tsk->nsproxy; if (nsproxy) net = get_net(nsproxy->net_ns); + task_unlock(tsk); } rcu_read_unlock(); return net; @@ -632,11 +634,11 @@ static void *netns_get(struct task_struct *task) struct net *net = NULL; struct nsproxy *nsproxy; - rcu_read_lock(); - nsproxy = task_nsproxy(task); + task_lock(task); + nsproxy = task->nsproxy; if (nsproxy) net = get_net(nsproxy->net_ns); - rcu_read_unlock(); + task_unlock(task); return net; } -- cgit v1.2.3 From a779ca5fa766e270b9e11c162d877295e2904f4e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 29 Jul 2014 17:26:04 -0400 Subject: xprtrdma: Remove RPCRDMA_PERSISTENT_REGISTRATION macro Clean up. RPCRDMA_PERSISTENT_REGISTRATION was a compile-time switch between RPCRDMA_REGISTER mode and RPCRDMA_ALLPHYSICAL mode. Since RPCRDMA_REGISTER has been removed, there's no need for the extra conditional compilation. Signed-off-by: Chuck Lever Tested-by: Steve Wise Tested-by: Shirley Ma Tested-by: Devesh Sharma Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xprtrdma.h | 2 -- net/sunrpc/xprtrdma/verbs.c | 13 ------------- 2 files changed, 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprtrdma.h b/include/linux/sunrpc/xprtrdma.h index c2f04e1ae159..64a0a0a97b23 100644 --- a/include/linux/sunrpc/xprtrdma.h +++ b/include/linux/sunrpc/xprtrdma.h @@ -62,8 +62,6 @@ #define RPCRDMA_INLINE_PAD_THRESH (512)/* payload threshold to pad (bytes) */ /* memory registration strategies */ -#define RPCRDMA_PERSISTENT_REGISTRATION (1) - enum rpcrdma_memreg { RPCRDMA_BOUNCEBUFFERS = 0, RPCRDMA_REGISTER, diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 1208ab2e655f..c2253d4c64df 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -561,12 +561,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) if (!ia->ri_id->device->alloc_fmr) { dprintk("RPC: %s: MTHCAFMR registration " "not supported by HCA\n", __func__); -#if RPCRDMA_PERSISTENT_REGISTRATION memreg = RPCRDMA_ALLPHYSICAL; -#else - rc = -ENOMEM; - goto out2; -#endif } } @@ -581,20 +576,16 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) switch (memreg) { case RPCRDMA_FRMR: break; -#if RPCRDMA_PERSISTENT_REGISTRATION case RPCRDMA_ALLPHYSICAL: mem_priv = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ; goto register_setup; -#endif case RPCRDMA_MTHCAFMR: if (ia->ri_have_dma_lkey) break; mem_priv = IB_ACCESS_LOCAL_WRITE; -#if RPCRDMA_PERSISTENT_REGISTRATION register_setup: -#endif ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv); if (IS_ERR(ia->ri_bind_mem)) { printk(KERN_ALERT "%s: ib_get_dma_mr for " @@ -1905,7 +1896,6 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg, switch (ia->ri_memreg_strategy) { -#if RPCRDMA_PERSISTENT_REGISTRATION case RPCRDMA_ALLPHYSICAL: rpcrdma_map_one(ia, seg, writing); seg->mr_rkey = ia->ri_bind_mem->rkey; @@ -1913,7 +1903,6 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg, seg->mr_nsegs = 1; nsegs = 1; break; -#endif /* Registration using frmr registration */ case RPCRDMA_FRMR: @@ -1943,13 +1932,11 @@ rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg, switch (ia->ri_memreg_strategy) { -#if RPCRDMA_PERSISTENT_REGISTRATION case RPCRDMA_ALLPHYSICAL: read_lock(&ia->ri_qplock); rpcrdma_unmap_one(ia, seg); read_unlock(&ia->ri_qplock); break; -#endif case RPCRDMA_FRMR: rc = rpcrdma_deregister_frmr_external(seg, ia, r_xprt); -- cgit v1.2.3 From a6138db815df5ee542d848318e5dae681590fccd Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 28 Jul 2014 16:26:53 -0700 Subject: mnt: Only change user settable mount flags in remount Kenton Varda discovered that by remounting a read-only bind mount read-only in a user namespace the MNT_LOCK_READONLY bit would be cleared, allowing an unprivileged user to the remount a read-only mount read-write. Correct this by replacing the mask of mount flags to preserve with a mask of mount flags that may be changed, and preserve all others. This ensures that any future bugs with this mask and remount will fail in an easy to detect way where new mount flags simply won't change. Cc: stable@vger.kernel.org Acked-by: Serge E. Hallyn Signed-off-by: "Eric W. Biederman" --- fs/namespace.c | 2 +- include/linux/mount.h | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/fs/namespace.c b/fs/namespace.c index 7187d01329c3..cb40449ea0df 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1937,7 +1937,7 @@ static int do_remount(struct path *path, int flags, int mnt_flags, err = do_remount_sb(sb, flags, data, 0); if (!err) { lock_mount_hash(); - mnt_flags |= mnt->mnt.mnt_flags & MNT_PROPAGATION_MASK; + mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK; mnt->mnt.mnt_flags = mnt_flags; touch_mnt_namespace(mnt->mnt_ns); unlock_mount_hash(); diff --git a/include/linux/mount.h b/include/linux/mount.h index 839bac270904..b637a89e1fae 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -42,7 +42,9 @@ struct mnt_namespace; * flag, consider how it interacts with shared mounts. */ #define MNT_SHARED_MASK (MNT_UNBINDABLE) -#define MNT_PROPAGATION_MASK (MNT_SHARED | MNT_UNBINDABLE) +#define MNT_USER_SETTABLE_MASK (MNT_NOSUID | MNT_NODEV | MNT_NOEXEC \ + | MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME \ + | MNT_READONLY) #define MNT_INTERNAL_FLAGS (MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | \ MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED) -- cgit v1.2.3 From 9566d6742852c527bf5af38af5cbb878dad75705 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 28 Jul 2014 17:26:07 -0700 Subject: mnt: Correct permission checks in do_remount While invesgiating the issue where in "mount --bind -oremount,ro ..." would result in later "mount --bind -oremount,rw" succeeding even if the mount started off locked I realized that there are several additional mount flags that should be locked and are not. In particular MNT_NOSUID, MNT_NODEV, MNT_NOEXEC, and the atime flags in addition to MNT_READONLY should all be locked. These flags are all per superblock, can all be changed with MS_BIND, and should not be changable if set by a more privileged user. The following additions to the current logic are added in this patch. - nosuid may not be clearable by a less privileged user. - nodev may not be clearable by a less privielged user. - noexec may not be clearable by a less privileged user. - atime flags may not be changeable by a less privileged user. The logic with atime is that always setting atime on access is a global policy and backup software and auditing software could break if atime bits are not updated (when they are configured to be updated), and serious performance degradation could result (DOS attack) if atime updates happen when they have been explicitly disabled. Therefore an unprivileged user should not be able to mess with the atime bits set by a more privileged user. The additional restrictions are implemented with the addition of MNT_LOCK_NOSUID, MNT_LOCK_NODEV, MNT_LOCK_NOEXEC, and MNT_LOCK_ATIME mnt flags. Taken together these changes and the fixes for MNT_LOCK_READONLY should make it safe for an unprivileged user to create a user namespace and to call "mount --bind -o remount,... ..." without the danger of mount flags being changed maliciously. Cc: stable@vger.kernel.org Acked-by: Serge E. Hallyn Signed-off-by: "Eric W. Biederman" --- fs/namespace.c | 36 +++++++++++++++++++++++++++++++++--- include/linux/mount.h | 5 +++++ 2 files changed, 38 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/fs/namespace.c b/fs/namespace.c index 1105a577a14f..dd9c93b5a9d5 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -890,8 +890,21 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~(MNT_WRITE_HOLD|MNT_MARKED); /* Don't allow unprivileged users to change mount flags */ - if ((flag & CL_UNPRIVILEGED) && (mnt->mnt.mnt_flags & MNT_READONLY)) - mnt->mnt.mnt_flags |= MNT_LOCK_READONLY; + if (flag & CL_UNPRIVILEGED) { + mnt->mnt.mnt_flags |= MNT_LOCK_ATIME; + + if (mnt->mnt.mnt_flags & MNT_READONLY) + mnt->mnt.mnt_flags |= MNT_LOCK_READONLY; + + if (mnt->mnt.mnt_flags & MNT_NODEV) + mnt->mnt.mnt_flags |= MNT_LOCK_NODEV; + + if (mnt->mnt.mnt_flags & MNT_NOSUID) + mnt->mnt.mnt_flags |= MNT_LOCK_NOSUID; + + if (mnt->mnt.mnt_flags & MNT_NOEXEC) + mnt->mnt.mnt_flags |= MNT_LOCK_NOEXEC; + } /* Don't allow unprivileged users to reveal what is under a mount */ if ((flag & CL_UNPRIVILEGED) && list_empty(&old->mnt_expire)) @@ -1931,6 +1944,23 @@ static int do_remount(struct path *path, int flags, int mnt_flags, !(mnt_flags & MNT_READONLY)) { return -EPERM; } + if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) && + !(mnt_flags & MNT_NODEV)) { + return -EPERM; + } + if ((mnt->mnt.mnt_flags & MNT_LOCK_NOSUID) && + !(mnt_flags & MNT_NOSUID)) { + return -EPERM; + } + if ((mnt->mnt.mnt_flags & MNT_LOCK_NOEXEC) && + !(mnt_flags & MNT_NOEXEC)) { + return -EPERM; + } + if ((mnt->mnt.mnt_flags & MNT_LOCK_ATIME) && + ((mnt->mnt.mnt_flags & MNT_ATIME_MASK) != (mnt_flags & MNT_ATIME_MASK))) { + return -EPERM; + } + err = security_sb_remount(sb, data); if (err) return err; @@ -2129,7 +2159,7 @@ static int do_new_mount(struct path *path, const char *fstype, int flags, */ if (!(type->fs_flags & FS_USERNS_DEV_MOUNT)) { flags |= MS_NODEV; - mnt_flags |= MNT_NODEV; + mnt_flags |= MNT_NODEV | MNT_LOCK_NODEV; } } diff --git a/include/linux/mount.h b/include/linux/mount.h index b637a89e1fae..b0c1e6574e7f 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -45,12 +45,17 @@ struct mnt_namespace; #define MNT_USER_SETTABLE_MASK (MNT_NOSUID | MNT_NODEV | MNT_NOEXEC \ | MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME \ | MNT_READONLY) +#define MNT_ATIME_MASK (MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME ) #define MNT_INTERNAL_FLAGS (MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | \ MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED) #define MNT_INTERNAL 0x4000 +#define MNT_LOCK_ATIME 0x040000 +#define MNT_LOCK_NOEXEC 0x080000 +#define MNT_LOCK_NOSUID 0x100000 +#define MNT_LOCK_NODEV 0x200000 #define MNT_LOCK_READONLY 0x400000 #define MNT_LOCKED 0x800000 #define MNT_DOOMED 0x1000000 -- cgit v1.2.3 From 9dcfee01930e6cc1e84d28c232664f0c19a1f86c Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Mon, 14 Jul 2014 10:28:04 +0200 Subject: drivers: of: add automated assignment of reserved regions to client devices This patch adds code for automated assignment of reserved memory regions to struct device. reserved_mem->ops->device_init()/device_cleanup() callbacks are called to perform reserved memory driver specific initialization and cleanup Based on previous code provided by Josh Cartwright Signed-off-by: Marek Szyprowski Signed-off-by: Grant Likely --- drivers/of/of_reserved_mem.c | 70 +++++++++++++++++++++++++++++++++++++++++ include/linux/of_reserved_mem.h | 7 +++++ 2 files changed, 77 insertions(+) (limited to 'include/linux') diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c index 632aae861375..59fb12e84e6b 100644 --- a/drivers/of/of_reserved_mem.c +++ b/drivers/of/of_reserved_mem.c @@ -206,8 +206,16 @@ void __init fdt_init_reserved_mem(void) for (i = 0; i < reserved_mem_count; i++) { struct reserved_mem *rmem = &reserved_mem[i]; unsigned long node = rmem->fdt_node; + int len; + const __be32 *prop; int err = 0; + prop = of_get_flat_dt_prop(node, "phandle", &len); + if (!prop) + prop = of_get_flat_dt_prop(node, "linux,phandle", &len); + if (prop) + rmem->phandle = of_read_number(prop, len/4); + if (rmem->size == 0) err = __reserved_mem_alloc_size(node, rmem->name, &rmem->base, &rmem->size); @@ -215,3 +223,65 @@ void __init fdt_init_reserved_mem(void) __reserved_mem_init_node(rmem); } } + +static inline struct reserved_mem *__find_rmem(struct device_node *node) +{ + unsigned int i; + + if (!node->phandle) + return NULL; + + for (i = 0; i < reserved_mem_count; i++) + if (reserved_mem[i].phandle == node->phandle) + return &reserved_mem[i]; + return NULL; +} + +/** + * of_reserved_mem_device_init() - assign reserved memory region to given device + * + * This function assign memory region pointed by "memory-region" device tree + * property to the given device. + */ +void of_reserved_mem_device_init(struct device *dev) +{ + struct reserved_mem *rmem; + struct device_node *np; + + np = of_parse_phandle(dev->of_node, "memory-region", 0); + if (!np) + return; + + rmem = __find_rmem(np); + of_node_put(np); + + if (!rmem || !rmem->ops || !rmem->ops->device_init) + return; + + rmem->ops->device_init(rmem, dev); + dev_info(dev, "assigned reserved memory node %s\n", rmem->name); +} + +/** + * of_reserved_mem_device_release() - release reserved memory device structures + * + * This function releases structures allocated for memory region handling for + * the given device. + */ +void of_reserved_mem_device_release(struct device *dev) +{ + struct reserved_mem *rmem; + struct device_node *np; + + np = of_parse_phandle(dev->of_node, "memory-region", 0); + if (!np) + return; + + rmem = __find_rmem(np); + of_node_put(np); + + if (!rmem || !rmem->ops || !rmem->ops->device_release) + return; + + rmem->ops->device_release(rmem, dev); +} diff --git a/include/linux/of_reserved_mem.h b/include/linux/of_reserved_mem.h index 4669ddfdd5af..5b5efae09135 100644 --- a/include/linux/of_reserved_mem.h +++ b/include/linux/of_reserved_mem.h @@ -8,6 +8,7 @@ struct reserved_mem_ops; struct reserved_mem { const char *name; unsigned long fdt_node; + unsigned long phandle; const struct reserved_mem_ops *ops; phys_addr_t base; phys_addr_t size; @@ -27,10 +28,16 @@ typedef int (*reservedmem_of_init_fn)(struct reserved_mem *rmem); _OF_DECLARE(reservedmem, name, compat, init, reservedmem_of_init_fn) #ifdef CONFIG_OF_RESERVED_MEM +void of_reserved_mem_device_init(struct device *dev); +void of_reserved_mem_device_release(struct device *dev); + void fdt_init_reserved_mem(void); void fdt_reserved_mem_save_node(unsigned long node, const char *uname, phys_addr_t base, phys_addr_t size); #else +static inline void of_reserved_mem_device_init(struct device *dev) { } +static inline void of_reserved_mem_device_release(struct device *pdev) { } + static inline void fdt_init_reserved_mem(void) { } static inline void fdt_reserved_mem_save_node(unsigned long node, const char *uname, phys_addr_t base, phys_addr_t size) { } -- cgit v1.2.3 From e630664c8383f300c4146d7613d61e5a8eb1f8e3 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 31 Jul 2014 11:01:29 +0300 Subject: mlx4_core: Add helper functions to support MR re-registration Add few helper functions to support a mechanism of getting an MPT, modifying it and updating the HCA with the modified object. The code takes 2 paths, one for directly changing the MPT (and sometimes its related MTTs) and another one which queries the MPT and updates the HCA via fw command SW2HW_MPT. The first path is used in native mode; the second path is slower and is used only in SRIOV. Signed-off-by: Jack Morgenstein Signed-off-by: Matan Barak Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 2 + drivers/net/ethernet/mellanox/mlx4/mr.c | 160 +++++++++++++++++++++ .../net/ethernet/mellanox/mlx4/resource_tracker.c | 26 +++- include/linux/mlx4/device.h | 16 +++ 4 files changed, 202 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 1d8af7336807..b40d587974fa 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -279,6 +279,8 @@ struct mlx4_icm_table { #define MLX4_MPT_FLAG_PHYSICAL (1 << 9) #define MLX4_MPT_FLAG_REGION (1 << 8) +#define MLX4_MPT_PD_MASK (0x1FFFFUL) +#define MLX4_MPT_PD_VF_MASK (0xFE0000UL) #define MLX4_MPT_PD_FLAG_FAST_REG (1 << 27) #define MLX4_MPT_PD_FLAG_RAE (1 << 28) #define MLX4_MPT_PD_FLAG_EN_INV (3 << 24) diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c index 2839abb878a6..7d717eccb7b0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mr.c +++ b/drivers/net/ethernet/mellanox/mlx4/mr.c @@ -298,6 +298,131 @@ static int mlx4_HW2SW_MPT(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED); } +int mlx4_mr_hw_get_mpt(struct mlx4_dev *dev, struct mlx4_mr *mmr, + struct mlx4_mpt_entry ***mpt_entry) +{ + int err; + int key = key_to_hw_index(mmr->key) & (dev->caps.num_mpts - 1); + struct mlx4_cmd_mailbox *mailbox = NULL; + + /* Make sure that at this point we have single-threaded access only */ + + if (mmr->enabled != MLX4_MPT_EN_HW) + return -EINVAL; + + err = mlx4_HW2SW_MPT(dev, NULL, key); + + if (err) { + mlx4_warn(dev, "HW2SW_MPT failed (%d).", err); + mlx4_warn(dev, "Most likely the MR has MWs bound to it.\n"); + return err; + } + + mmr->enabled = MLX4_MPT_EN_SW; + + if (!mlx4_is_mfunc(dev)) { + **mpt_entry = mlx4_table_find( + &mlx4_priv(dev)->mr_table.dmpt_table, + key, NULL); + } else { + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR_OR_NULL(mailbox)) + return PTR_ERR(mailbox); + + err = mlx4_cmd_box(dev, 0, mailbox->dma, key, + 0, MLX4_CMD_QUERY_MPT, + MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_WRAPPED); + + if (err) + goto free_mailbox; + + *mpt_entry = (struct mlx4_mpt_entry **)&mailbox->buf; + } + + if (!(*mpt_entry) || !(**mpt_entry)) { + err = -ENOMEM; + goto free_mailbox; + } + + return 0; + +free_mailbox: + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} +EXPORT_SYMBOL_GPL(mlx4_mr_hw_get_mpt); + +int mlx4_mr_hw_write_mpt(struct mlx4_dev *dev, struct mlx4_mr *mmr, + struct mlx4_mpt_entry **mpt_entry) +{ + int err; + + if (!mlx4_is_mfunc(dev)) { + /* Make sure any changes to this entry are flushed */ + wmb(); + + *(u8 *)(*mpt_entry) = MLX4_MPT_STATUS_HW; + + /* Make sure the new status is written */ + wmb(); + + err = mlx4_SYNC_TPT(dev); + } else { + int key = key_to_hw_index(mmr->key) & (dev->caps.num_mpts - 1); + + struct mlx4_cmd_mailbox *mailbox = + container_of((void *)mpt_entry, struct mlx4_cmd_mailbox, + buf); + + err = mlx4_SW2HW_MPT(dev, mailbox, key); + } + + mmr->pd = be32_to_cpu((*mpt_entry)->pd_flags) & MLX4_MPT_PD_MASK; + if (!err) + mmr->enabled = MLX4_MPT_EN_HW; + return err; +} +EXPORT_SYMBOL_GPL(mlx4_mr_hw_write_mpt); + +void mlx4_mr_hw_put_mpt(struct mlx4_dev *dev, + struct mlx4_mpt_entry **mpt_entry) +{ + if (mlx4_is_mfunc(dev)) { + struct mlx4_cmd_mailbox *mailbox = + container_of((void *)mpt_entry, struct mlx4_cmd_mailbox, + buf); + mlx4_free_cmd_mailbox(dev, mailbox); + } +} +EXPORT_SYMBOL_GPL(mlx4_mr_hw_put_mpt); + +int mlx4_mr_hw_change_pd(struct mlx4_dev *dev, struct mlx4_mpt_entry *mpt_entry, + u32 pdn) +{ + u32 pd_flags = be32_to_cpu(mpt_entry->pd_flags); + /* The wrapper function will put the slave's id here */ + if (mlx4_is_mfunc(dev)) + pd_flags &= ~MLX4_MPT_PD_VF_MASK; + mpt_entry->pd_flags = cpu_to_be32((pd_flags & ~MLX4_MPT_PD_MASK) | + (pdn & MLX4_MPT_PD_MASK) + | MLX4_MPT_PD_FLAG_EN_INV); + return 0; +} +EXPORT_SYMBOL_GPL(mlx4_mr_hw_change_pd); + +int mlx4_mr_hw_change_access(struct mlx4_dev *dev, + struct mlx4_mpt_entry *mpt_entry, + u32 access) +{ + u32 flags = (be32_to_cpu(mpt_entry->flags) & ~MLX4_PERM_MASK) | + (access & MLX4_PERM_MASK); + + mpt_entry->flags = cpu_to_be32(flags); + return 0; +} +EXPORT_SYMBOL_GPL(mlx4_mr_hw_change_access); + static int mlx4_mr_alloc_reserved(struct mlx4_dev *dev, u32 mridx, u32 pd, u64 iova, u64 size, u32 access, int npages, int page_shift, struct mlx4_mr *mr) @@ -463,6 +588,41 @@ int mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr) } EXPORT_SYMBOL_GPL(mlx4_mr_free); +void mlx4_mr_rereg_mem_cleanup(struct mlx4_dev *dev, struct mlx4_mr *mr) +{ + mlx4_mtt_cleanup(dev, &mr->mtt); +} +EXPORT_SYMBOL_GPL(mlx4_mr_rereg_mem_cleanup); + +int mlx4_mr_rereg_mem_write(struct mlx4_dev *dev, struct mlx4_mr *mr, + u64 iova, u64 size, int npages, + int page_shift, struct mlx4_mpt_entry *mpt_entry) +{ + int err; + + mpt_entry->start = cpu_to_be64(mr->iova); + mpt_entry->length = cpu_to_be64(mr->size); + mpt_entry->entity_size = cpu_to_be32(mr->mtt.page_shift); + + err = mlx4_mtt_init(dev, npages, page_shift, &mr->mtt); + if (err) + return err; + + if (mr->mtt.order < 0) { + mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_PHYSICAL); + mpt_entry->mtt_addr = 0; + } else { + mpt_entry->mtt_addr = cpu_to_be64(mlx4_mtt_addr(dev, + &mr->mtt)); + if (mr->mtt.page_shift == 0) + mpt_entry->mtt_sz = cpu_to_be32(1 << mr->mtt.order); + } + mr->enabled = MLX4_MPT_EN_SW; + + return 0; +} +EXPORT_SYMBOL_GPL(mlx4_mr_rereg_mem_write); + int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr) { struct mlx4_cmd_mailbox *mailbox; diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 0efc1368e5a8..1089367fed22 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -2613,12 +2613,34 @@ int mlx4_QUERY_MPT_wrapper(struct mlx4_dev *dev, int slave, if (err) return err; - if (mpt->com.from_state != RES_MPT_HW) { + if (mpt->com.from_state == RES_MPT_MAPPED) { + /* In order to allow rereg in SRIOV, we need to alter the MPT entry. To do + * that, the VF must read the MPT. But since the MPT entry memory is not + * in the VF's virtual memory space, it must use QUERY_MPT to obtain the + * entry contents. To guarantee that the MPT cannot be changed, the driver + * must perform HW2SW_MPT before this query and return the MPT entry to HW + * ownership fofollowing the change. The change here allows the VF to + * perform QUERY_MPT also when the entry is in SW ownership. + */ + struct mlx4_mpt_entry *mpt_entry = mlx4_table_find( + &mlx4_priv(dev)->mr_table.dmpt_table, + mpt->key, NULL); + + if (NULL == mpt_entry || NULL == outbox->buf) { + err = -EINVAL; + goto out; + } + + memcpy(outbox->buf, mpt_entry, sizeof(*mpt_entry)); + + err = 0; + } else if (mpt->com.from_state == RES_MPT_HW) { + err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); + } else { err = -EBUSY; goto out; } - err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); out: put_res(dev, slave, id, RES_MPT); diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 35b51e7af886..bac002167ace 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -262,6 +262,7 @@ enum { MLX4_PERM_REMOTE_WRITE = 1 << 13, MLX4_PERM_ATOMIC = 1 << 14, MLX4_PERM_BIND_MW = 1 << 15, + MLX4_PERM_MASK = 0xFC00 }; enum { @@ -1243,4 +1244,19 @@ int mlx4_vf_smi_enabled(struct mlx4_dev *dev, int slave, int port); int mlx4_vf_get_enable_smi_admin(struct mlx4_dev *dev, int slave, int port); int mlx4_vf_set_enable_smi_admin(struct mlx4_dev *dev, int slave, int port, int enable); +int mlx4_mr_hw_get_mpt(struct mlx4_dev *dev, struct mlx4_mr *mmr, + struct mlx4_mpt_entry ***mpt_entry); +int mlx4_mr_hw_write_mpt(struct mlx4_dev *dev, struct mlx4_mr *mmr, + struct mlx4_mpt_entry **mpt_entry); +int mlx4_mr_hw_change_pd(struct mlx4_dev *dev, struct mlx4_mpt_entry *mpt_entry, + u32 pdn); +int mlx4_mr_hw_change_access(struct mlx4_dev *dev, + struct mlx4_mpt_entry *mpt_entry, + u32 access); +void mlx4_mr_hw_put_mpt(struct mlx4_dev *dev, + struct mlx4_mpt_entry **mpt_entry); +void mlx4_mr_rereg_mem_cleanup(struct mlx4_dev *dev, struct mlx4_mr *mr); +int mlx4_mr_rereg_mem_write(struct mlx4_dev *dev, struct mlx4_mr *mr, + u64 iova, u64 size, int npages, + int page_shift, struct mlx4_mpt_entry *mpt_entry); #endif /* MLX4_DEVICE_H */ -- cgit v1.2.3 From a3b255717fed1cad0dd4ed5be77114d32ef22a6d Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 16 Jul 2014 06:52:18 -0400 Subject: sunrpc: remove __rcu annotation from struct gss_cl_ctx->gc_gss_ctx Commit 5b22216e11f7 (nfs: __rcu annotations) added a __rcu annotation to the gc_gss_ctx field. I see no rationale for adding that though, as that field does not seem to be managed via RCU at all. Cc: Arnd Bergmann Cc: Paul McKenney Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- include/linux/sunrpc/auth_gss.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/auth_gss.h b/include/linux/sunrpc/auth_gss.h index cbc6875fb9cf..36eebc451b41 100644 --- a/include/linux/sunrpc/auth_gss.h +++ b/include/linux/sunrpc/auth_gss.h @@ -69,7 +69,7 @@ struct gss_cl_ctx { enum rpc_gss_proc gc_proc; u32 gc_seq; spinlock_t gc_seq_lock; - struct gss_ctx __rcu *gc_gss_ctx; + struct gss_ctx *gc_gss_ctx; struct xdr_netobj gc_wire_ctx; struct xdr_netobj gc_acceptor; u32 gc_win; -- cgit v1.2.3 From ec25422c669d38f4e8a83da7f77950094349de48 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 16 Jul 2014 06:52:22 -0400 Subject: sunrpc: remove "ec" argument from encrypt_v2 operation It's always 0. Signed-off-by: Jeff Layton Reviewed-by: Christoph Hellwig Signed-off-by: Trond Myklebust --- include/linux/sunrpc/gss_krb5.h | 4 ++-- net/sunrpc/auth_gss/gss_krb5_crypto.c | 9 ++------- net/sunrpc/auth_gss/gss_krb5_wrap.c | 2 +- 3 files changed, 5 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index 5af2931cf58d..df02a4188487 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -81,7 +81,7 @@ struct gss_krb5_enctype { struct xdr_netobj *in, struct xdr_netobj *out); /* complete key generation */ u32 (*encrypt_v2) (struct krb5_ctx *kctx, u32 offset, - struct xdr_buf *buf, int ec, + struct xdr_buf *buf, struct page **pages); /* v2 encryption function */ u32 (*decrypt_v2) (struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf, u32 *headskip, @@ -310,7 +310,7 @@ gss_krb5_aes_make_key(const struct gss_krb5_enctype *gk5e, u32 gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset, - struct xdr_buf *buf, int ec, + struct xdr_buf *buf, struct page **pages); u32 diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index 0f43e894bc0a..f5ed9f6ece06 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -641,7 +641,7 @@ out: u32 gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset, - struct xdr_buf *buf, int ec, struct page **pages) + struct xdr_buf *buf, struct page **pages) { u32 err; struct xdr_netobj hmac; @@ -684,13 +684,8 @@ gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset, ecptr = buf->tail[0].iov_base; } - memset(ecptr, 'X', ec); - buf->tail[0].iov_len += ec; - buf->len += ec; - /* copy plaintext gss token header after filler (if any) */ - memcpy(ecptr + ec, buf->head[0].iov_base + offset, - GSS_KRB5_TOK_HDR_LEN); + memcpy(ecptr, buf->head[0].iov_base + offset, GSS_KRB5_TOK_HDR_LEN); buf->tail[0].iov_len += GSS_KRB5_TOK_HDR_LEN; buf->len += GSS_KRB5_TOK_HDR_LEN; diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c index 88cd24aacddc..4b614c604fe0 100644 --- a/net/sunrpc/auth_gss/gss_krb5_wrap.c +++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c @@ -483,7 +483,7 @@ gss_wrap_kerberos_v2(struct krb5_ctx *kctx, u32 offset, *be64ptr = cpu_to_be64(kctx->seq_send64++); spin_unlock(&krb5_seq_lock); - err = (*kctx->gk5e->encrypt_v2)(kctx, offset, buf, 0, pages); + err = (*kctx->gk5e->encrypt_v2)(kctx, offset, buf, pages); if (err) return err; -- cgit v1.2.3 From e7029206ff43f6cf7d6fcb741adb126f47200516 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Thu, 17 Jul 2014 20:42:15 -0400 Subject: nfs: check wait_on_bit_lock err in page_group_lock Return errors from wait_on_bit_lock from nfs_page_group_lock. Add a bool argument @wait to nfs_page_group_lock. If true, loop over wait_on_bit_lock until it returns cleanly. If false, return the error from wait_on_bit_lock. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 29 +++++++++++++++++++++++------ fs/nfs/write.c | 6 ++++-- include/linux/nfs_page.h | 2 +- 3 files changed, 28 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index e76a40e298f2..9425118e91d7 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -147,17 +147,25 @@ static int nfs_wait_bit_uninterruptible(void *word) * @req - request in group that is to be locked * * this lock must be held if modifying the page group list + * + * returns result from wait_on_bit_lock: 0 on success, < 0 on error */ -void -nfs_page_group_lock(struct nfs_page *req) +int +nfs_page_group_lock(struct nfs_page *req, bool wait) { struct nfs_page *head = req->wb_head; + int ret; WARN_ON_ONCE(head != head->wb_head); - wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK, + do { + ret = wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK, nfs_wait_bit_uninterruptible, TASK_UNINTERRUPTIBLE); + } while (wait && ret != 0); + + WARN_ON_ONCE(ret > 0); + return ret; } /* @@ -218,7 +226,7 @@ bool nfs_page_group_sync_on_bit(struct nfs_page *req, unsigned int bit) { bool ret; - nfs_page_group_lock(req); + nfs_page_group_lock(req, true); ret = nfs_page_group_sync_on_bit_locked(req, bit); nfs_page_group_unlock(req); @@ -858,8 +866,13 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, struct nfs_page *subreq; unsigned int bytes_left = 0; unsigned int offset, pgbase; + int ret; - nfs_page_group_lock(req); + ret = nfs_page_group_lock(req, false); + if (ret < 0) { + desc->pg_error = ret; + return 0; + } subreq = req; bytes_left = subreq->wb_bytes; @@ -881,7 +894,11 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, if (desc->pg_recoalesce) return 0; /* retry add_request for this subreq */ - nfs_page_group_lock(req); + ret = nfs_page_group_lock(req, false); + if (ret < 0) { + desc->pg_error = ret; + return 0; + } continue; } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index d357728ed8ba..8d1ed2b9c16c 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -216,7 +216,7 @@ static bool nfs_page_group_covers_page(struct nfs_page *req) unsigned int pos = 0; unsigned int len = nfs_page_length(req->wb_page); - nfs_page_group_lock(req); + nfs_page_group_lock(req, true); do { tmp = nfs_page_group_search_locked(req->wb_head, pos); @@ -456,7 +456,9 @@ try_again: } /* lock each request in the page group */ - nfs_page_group_lock(head); + ret = nfs_page_group_lock(head, false); + if (ret < 0) + return ERR_PTR(ret); subreq = head; do { /* diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 4b48548e700e..291924ca9517 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -122,7 +122,7 @@ extern size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, extern int nfs_wait_on_request(struct nfs_page *); extern void nfs_unlock_request(struct nfs_page *req); extern void nfs_unlock_and_release_request(struct nfs_page *); -extern void nfs_page_group_lock(struct nfs_page *); +extern int nfs_page_group_lock(struct nfs_page *, bool); extern void nfs_page_group_unlock(struct nfs_page *); extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int); -- cgit v1.2.3 From b412ddf0661e11485876a202c48868143e3a01cf Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Thu, 17 Jul 2014 20:42:16 -0400 Subject: nfs: fix comment and add warn_on for PG_INODE_REF Fix the comment in nfs_page.h for PG_INODE_REF to reflect that it's no longer set only on head requests. Also add a WARN_ON_ONCE in nfs_inode_remove_request as PG_INODE_REF should always be set. Suggested-by: Peng Tao Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 2 ++ include/linux/nfs_page.h | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 8d1ed2b9c16c..e6bc5b51f325 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -707,6 +707,8 @@ static void nfs_inode_remove_request(struct nfs_page *req) if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) nfs_release_request(req); + else + WARN_ON_ONCE(1); } static void diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 291924ca9517..6ad2bbcad405 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -26,7 +26,7 @@ enum { PG_MAPPED, /* page private set for buffered io */ PG_CLEAN, /* write succeeded */ PG_COMMIT_TO_DS, /* used by pnfs layouts */ - PG_INODE_REF, /* extra ref held by inode (head req only) */ + PG_INODE_REF, /* extra ref held by inode when in writeback */ PG_HEADLOCK, /* page group lock of wb_head */ PG_TEARDOWN, /* page group sync for destroy */ PG_UNLOCKPAGE, /* page group sync bit in read path */ -- cgit v1.2.3 From bd95608053b7f7813351b0defc0e3e7ef8cf2803 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 14 Jul 2014 11:28:20 +1000 Subject: sunrpc/auth: allow lockless (rcu) lookup of credential cache. The new flag RPCAUTH_LOOKUP_RCU to credential lookup avoids locking, does not take a reference on the returned credential, and returns -ECHILD if a simple lookup was not possible. The returned value can only be used within an rcu_read_lock protected region. The main user of this is the new rpc_lookup_cred_nonblock() which returns a pointer to the current credential which is only rcu-safe (no ref-count held), and might return -ECHILD if allocation was required. Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- include/linux/sunrpc/auth.h | 2 ++ net/sunrpc/auth.c | 17 +++++++++++++++-- net/sunrpc/auth_generic.c | 6 ++++++ net/sunrpc/auth_null.c | 2 ++ 4 files changed, 25 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index c683b9a06913..8e030075fe79 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -103,6 +103,7 @@ struct rpc_auth_create_args { /* Flags for rpcauth_lookupcred() */ #define RPCAUTH_LOOKUP_NEW 0x01 /* Accept an uninitialised cred */ +#define RPCAUTH_LOOKUP_RCU 0x02 /* lock-less lookup */ /* * Client authentication ops @@ -154,6 +155,7 @@ void rpc_destroy_generic_auth(void); void rpc_destroy_authunix(void); struct rpc_cred * rpc_lookup_cred(void); +struct rpc_cred * rpc_lookup_cred_nonblock(void); struct rpc_cred * rpc_lookup_machine_cred(const char *service_name); int rpcauth_register(const struct rpc_authops *); int rpcauth_unregister(const struct rpc_authops *); diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 360decdddc78..24fcbd23ae6c 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -557,6 +557,12 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred, hlist_for_each_entry_rcu(entry, &cache->hashtable[nr], cr_hash) { if (!entry->cr_ops->crmatch(acred, entry, flags)) continue; + if (flags & RPCAUTH_LOOKUP_RCU) { + if (test_bit(RPCAUTH_CRED_HASHED, &entry->cr_flags) && + !test_bit(RPCAUTH_CRED_NEW, &entry->cr_flags)) + cred = entry; + break; + } spin_lock(&cache->lock); if (test_bit(RPCAUTH_CRED_HASHED, &entry->cr_flags) == 0) { spin_unlock(&cache->lock); @@ -571,6 +577,9 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred, if (cred != NULL) goto found; + if (flags & RPCAUTH_LOOKUP_RCU) + return ERR_PTR(-ECHILD); + new = auth->au_ops->crcreate(auth, acred, flags); if (IS_ERR(new)) { cred = new; @@ -621,10 +630,14 @@ rpcauth_lookupcred(struct rpc_auth *auth, int flags) memset(&acred, 0, sizeof(acred)); acred.uid = cred->fsuid; acred.gid = cred->fsgid; - acred.group_info = get_group_info(((struct cred *)cred)->group_info); + if (flags & RPCAUTH_LOOKUP_RCU) + acred.group_info = rcu_dereference(cred->group_info); + else + acred.group_info = get_group_info(((struct cred *)cred)->group_info); ret = auth->au_ops->lookup_cred(auth, &acred, flags); - put_group_info(acred.group_info); + if (!(flags & RPCAUTH_LOOKUP_RCU)) + put_group_info(acred.group_info); return ret; } EXPORT_SYMBOL_GPL(rpcauth_lookupcred); diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c index ed04869b2d4f..6f6b829c9e8e 100644 --- a/net/sunrpc/auth_generic.c +++ b/net/sunrpc/auth_generic.c @@ -38,6 +38,12 @@ struct rpc_cred *rpc_lookup_cred(void) } EXPORT_SYMBOL_GPL(rpc_lookup_cred); +struct rpc_cred *rpc_lookup_cred_nonblock(void) +{ + return rpcauth_lookupcred(&generic_auth, RPCAUTH_LOOKUP_RCU); +} +EXPORT_SYMBOL_GPL(rpc_lookup_cred_nonblock); + /* * Public call interface for looking up machine creds. */ diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c index f0ebe07978a2..712c123e04e9 100644 --- a/net/sunrpc/auth_null.c +++ b/net/sunrpc/auth_null.c @@ -35,6 +35,8 @@ nul_destroy(struct rpc_auth *auth) static struct rpc_cred * nul_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) { + if (flags & RPCAUTH_LOOKUP_RCU) + return &null_cred; return get_rpccred(&null_cred); } -- cgit v1.2.3 From 912a108da767ae75cc929d2854e698aff527ec5d Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 14 Jul 2014 11:28:20 +1000 Subject: NFS: teach nfs_neg_need_reval to understand LOOKUP_RCU This requires nfs_check_verifier to take an rcu_walk flag, and requires an rcu version of nfs_revalidate_inode which returns -ECHILD rather than making an RPC call. With this, nfs_lookup_revalidate can call nfs_neg_need_reval in RCU-walk mode. We can also move the LOOKUP_RCU check past the nfs_check_verifier() call in nfs_lookup_revalidate. If RCU_WALK prevents nfs_check_verifier or nfs_neg_need_reval from doing a full check, they return a status indicating that a revalidation is required. As this revalidation will not be possible in RCU_WALK mode, -ECHILD will ultimately be returned, which is the desired result. Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 59 +++++++++++++++++++++++++++++++++++--------------- fs/nfs/inode.c | 9 ++++++++ include/linux/nfs_fs.h | 1 + 3 files changed, 52 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 8a3c36984fc4..dcd4fe5831d6 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -988,9 +988,13 @@ EXPORT_SYMBOL_GPL(nfs_force_lookup_revalidate); * A check for whether or not the parent directory has changed. * In the case it has, we assume that the dentries are untrustworthy * and may need to be looked up again. + * If rcu_walk prevents us from performing a full check, return 0. */ -static int nfs_check_verifier(struct inode *dir, struct dentry *dentry) +static int nfs_check_verifier(struct inode *dir, struct dentry *dentry, + int rcu_walk) { + int ret; + if (IS_ROOT(dentry)) return 1; if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONE) @@ -998,7 +1002,11 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry) if (!nfs_verify_change_attribute(dir, dentry->d_time)) return 0; /* Revalidate nfsi->cache_change_attribute before we declare a match */ - if (nfs_revalidate_inode(NFS_SERVER(dir), dir) < 0) + if (rcu_walk) + ret = nfs_revalidate_inode_rcu(NFS_SERVER(dir), dir); + else + ret = nfs_revalidate_inode(NFS_SERVER(dir), dir); + if (ret < 0) return 0; if (!nfs_verify_change_attribute(dir, dentry->d_time)) return 0; @@ -1054,6 +1062,9 @@ out_force: * * If parent mtime has changed, we revalidate, else we wait for a * period corresponding to the parent's attribute cache timeout value. + * + * If LOOKUP_RCU prevents us from performing a full check, return 1 + * suggesting a reval is needed. */ static inline int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry, @@ -1064,7 +1075,7 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry, return 0; if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG) return 1; - return !nfs_check_verifier(dir, dentry); + return !nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU); } /* @@ -1101,11 +1112,11 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) inode = dentry->d_inode; if (!inode) { - if (flags & LOOKUP_RCU) - return -ECHILD; - - if (nfs_neg_need_reval(dir, dentry, flags)) + if (nfs_neg_need_reval(dir, dentry, flags)) { + if (flags & LOOKUP_RCU) + return -ECHILD; goto out_bad; + } goto out_valid_noent; } @@ -1120,16 +1131,21 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ)) goto out_set_verifier; - if (flags & LOOKUP_RCU) - return -ECHILD; - /* Force a full look up iff the parent directory has changed */ - if (!nfs_is_exclusive_create(dir, flags) && nfs_check_verifier(dir, dentry)) { + if (!nfs_is_exclusive_create(dir, flags) && + nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU)) { + + if (flags & LOOKUP_RCU) + return -ECHILD; + if (nfs_lookup_verify_inode(inode, flags)) goto out_zap_parent; goto out_valid; } + if (flags & LOOKUP_RCU) + return -ECHILD; + if (NFS_STALE(inode)) goto out_bad; @@ -1566,14 +1582,23 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags) struct dentry *parent; struct inode *dir; - if (flags & LOOKUP_RCU) - return -ECHILD; - - parent = dget_parent(dentry); - dir = parent->d_inode; + if (flags & LOOKUP_RCU) { + parent = rcu_dereference(dentry); + dir = ACCESS_ONCE(parent->d_inode); + if (!dir) + return -ECHILD; + } else { + parent = dget_parent(dentry); + dir = parent->d_inode; + } if (!nfs_neg_need_reval(dir, dentry, flags)) ret = 1; - dput(parent); + else if (flags & LOOKUP_RCU) + ret = -ECHILD; + if (!(flags & LOOKUP_RCU)) + dput(parent); + else if (parent != rcu_dereference(dentry)) + return -ECHILD; goto out; } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 9927913c97c2..147fd17e7920 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1002,6 +1002,15 @@ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) } EXPORT_SYMBOL_GPL(nfs_revalidate_inode); +int nfs_revalidate_inode_rcu(struct nfs_server *server, struct inode *inode) +{ + if (!(NFS_I(inode)->cache_validity & + (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_LABEL)) + && !nfs_attribute_cache_expired(inode)) + return NFS_STALE(inode) ? -ESTALE : 0; + return -ECHILD; +} + static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping) { struct nfs_inode *nfsi = NFS_I(inode); diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index e30f6059ecd6..60cd9e377926 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -352,6 +352,7 @@ extern int nfs_release(struct inode *, struct file *); extern int nfs_attribute_timeout(struct inode *inode); extern int nfs_attribute_cache_expired(struct inode *inode); extern int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode); +extern int nfs_revalidate_inode_rcu(struct nfs_server *server, struct inode *inode); extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *); extern int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping); extern int nfs_setattr(struct dentry *, struct iattr *); -- cgit v1.2.3 From f682a398b2e24ae0a775ddf37cced83b897198ee Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 14 Jul 2014 11:28:20 +1000 Subject: NFS: allow lockless access to access_cache The access cache is used during RCU-walk path lookups, so it is best to avoid locking if possible as taking a lock kills concurrency. The rbtree is not rcu-safe and cannot easily be made so. Instead we simply check the last (i.e. most recent) entry on the LRU list. If this doesn't match, then we return -ECHILD and retry in lock/refcount mode. This requires freeing the nfs_access_entry struct with rcu, and requires using rcu access primatives when adding entries to the lru, and when examining the last entry. Calling put_rpccred before kfree_rcu looks a bit odd, but as put_rpccred already provides rcu protection, we know that the cred will not actually be freed until the next grace period, so any concurrent access will be safe. This patch provides about 5% performance improvement on a stat-heavy synthetic work load with 4 threads on a 2-core CPU. Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 43 +++++++++++++++++++++++++++++++++++++++++-- include/linux/nfs_fs.h | 1 + 2 files changed, 42 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 2bfbde0f7176..1b5f38f48dab 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2079,7 +2079,7 @@ MODULE_PARM_DESC(nfs_access_max_cachesize, "NFS access maximum total cache lengt static void nfs_access_free_entry(struct nfs_access_entry *entry) { put_rpccred(entry->cred); - kfree(entry); + kfree_rcu(entry, rcu_head); smp_mb__before_atomic(); atomic_long_dec(&nfs_access_nr_entries); smp_mb__after_atomic(); @@ -2257,6 +2257,38 @@ out_zap: return -ENOENT; } +static int nfs_access_get_cached_rcu(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res) +{ + /* Only check the most recently returned cache entry, + * but do it without locking. + */ + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_access_entry *cache; + int err = -ECHILD; + struct list_head *lh; + + rcu_read_lock(); + if (nfsi->cache_validity & NFS_INO_INVALID_ACCESS) + goto out; + lh = rcu_dereference(nfsi->access_cache_entry_lru.prev); + cache = list_entry(lh, struct nfs_access_entry, lru); + if (lh == &nfsi->access_cache_entry_lru || + cred != cache->cred) + cache = NULL; + if (cache == NULL) + goto out; + if (!nfs_have_delegated_attributes(inode) && + !time_in_range_open(jiffies, cache->jiffies, cache->jiffies + nfsi->attrtimeo)) + goto out; + res->jiffies = cache->jiffies; + res->cred = cache->cred; + res->mask = cache->mask; + err = 0; +out: + rcu_read_unlock(); + return err; +} + static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry *set) { struct nfs_inode *nfsi = NFS_I(inode); @@ -2300,6 +2332,11 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) cache->cred = get_rpccred(set->cred); cache->mask = set->mask; + /* The above field assignments must be visible + * before this item appears on the lru. We cannot easily + * use rcu_assign_pointer, so just force the memory barrier. + */ + smp_wmb(); nfs_access_add_rbtree(inode, cache); /* Update accounting */ @@ -2339,7 +2376,9 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask) trace_nfs_access_enter(inode); - status = nfs_access_get_cached(inode, cred, &cache); + status = nfs_access_get_cached_rcu(inode, cred, &cache); + if (status != 0) + status = nfs_access_get_cached(inode, cred, &cache); if (status == 0) goto out_cached; diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 60cd9e377926..5180a7ededec 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -52,6 +52,7 @@ struct nfs_access_entry { unsigned long jiffies; struct rpc_cred * cred; int mask; + struct rcu_head rcu_head; }; struct nfs_lockowner { -- cgit v1.2.3 From 961e3beae3b29ae9463631415342244cdaf1cd47 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Tue, 10 Jun 2014 10:25:00 +0200 Subject: drm/tegra: Make job submission 64-bit safe Job submission currently relies on the fact that struct drm_tegra_reloc and struct host1x_reloc are the same size and uses a simple call to the copy_from_user() function to copy them to kernel space. This causes the handle to be stored in the buffer object field, which then needs a cast to a 32 bit integer to resolve it to a proper buffer object pointer and store it back in the buffer object field. On 64-bit architectures that will no longer work, since pointers are 64 bits wide whereas handles will remain 32 bits. This causes the sizes of both structures to because different and copying will no longer work. Fix this by adding a new function, host1x_reloc_get_user(), that copies the structures field by field. While at it, use substructures for the command and target buffers in struct host1x_reloc for better readability. Also use unsized types to make it more obvious that this isn't part of userspace ABI. Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/drm.c | 62 ++++++++++++++++++++++++++++++++------------- drivers/gpu/host1x/job.c | 22 ++++++++-------- include/linux/host1x.h | 15 ++++++----- 3 files changed, 64 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index 5cba5e736130..59736bb810cd 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -132,6 +132,45 @@ host1x_bo_lookup(struct drm_device *drm, struct drm_file *file, u32 handle) return &bo->base; } +static int host1x_reloc_copy_from_user(struct host1x_reloc *dest, + struct drm_tegra_reloc __user *src, + struct drm_device *drm, + struct drm_file *file) +{ + u32 cmdbuf, target; + int err; + + err = get_user(cmdbuf, &src->cmdbuf.handle); + if (err < 0) + return err; + + err = get_user(dest->cmdbuf.offset, &src->cmdbuf.offset); + if (err < 0) + return err; + + err = get_user(target, &src->target.handle); + if (err < 0) + return err; + + err = get_user(dest->target.offset, &src->cmdbuf.offset); + if (err < 0) + return err; + + err = get_user(dest->shift, &src->shift); + if (err < 0) + return err; + + dest->cmdbuf.bo = host1x_bo_lookup(drm, file, cmdbuf); + if (!dest->cmdbuf.bo) + return -ENOENT; + + dest->target.bo = host1x_bo_lookup(drm, file, target); + if (!dest->target.bo) + return -ENOENT; + + return 0; +} + int tegra_drm_submit(struct tegra_drm_context *context, struct drm_tegra_submit *args, struct drm_device *drm, struct drm_file *file) @@ -184,26 +223,13 @@ int tegra_drm_submit(struct tegra_drm_context *context, cmdbufs++; } - if (copy_from_user(job->relocarray, relocs, - sizeof(*relocs) * num_relocs)) { - err = -EFAULT; - goto fail; - } - + /* copy and resolve relocations from submit */ while (num_relocs--) { - struct host1x_reloc *reloc = &job->relocarray[num_relocs]; - struct host1x_bo *cmdbuf, *target; - - cmdbuf = host1x_bo_lookup(drm, file, (u32)reloc->cmdbuf); - target = host1x_bo_lookup(drm, file, (u32)reloc->target); - - reloc->cmdbuf = cmdbuf; - reloc->target = target; - - if (!reloc->target || !reloc->cmdbuf) { - err = -ENOENT; + err = host1x_reloc_copy_from_user(&job->relocarray[num_relocs], + &relocs[num_relocs], drm, + file); + if (err < 0) goto fail; - } } if (copy_from_user(job->waitchk, waitchks, diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c index 112f27e51bc7..63bd63f3c7df 100644 --- a/drivers/gpu/host1x/job.c +++ b/drivers/gpu/host1x/job.c @@ -185,16 +185,16 @@ static unsigned int pin_job(struct host1x_job *job) struct sg_table *sgt; dma_addr_t phys_addr; - reloc->target = host1x_bo_get(reloc->target); - if (!reloc->target) + reloc->target.bo = host1x_bo_get(reloc->target.bo); + if (!reloc->target.bo) goto unpin; - phys_addr = host1x_bo_pin(reloc->target, &sgt); + phys_addr = host1x_bo_pin(reloc->target.bo, &sgt); if (!phys_addr) goto unpin; job->addr_phys[job->num_unpins] = phys_addr; - job->unpins[job->num_unpins].bo = reloc->target; + job->unpins[job->num_unpins].bo = reloc->target.bo; job->unpins[job->num_unpins].sgt = sgt; job->num_unpins++; } @@ -235,21 +235,21 @@ static unsigned int do_relocs(struct host1x_job *job, struct host1x_bo *cmdbuf) for (i = 0; i < job->num_relocs; i++) { struct host1x_reloc *reloc = &job->relocarray[i]; u32 reloc_addr = (job->reloc_addr_phys[i] + - reloc->target_offset) >> reloc->shift; + reloc->target.offset) >> reloc->shift; u32 *target; /* skip all other gathers */ - if (cmdbuf != reloc->cmdbuf) + if (cmdbuf != reloc->cmdbuf.bo) continue; - if (last_page != reloc->cmdbuf_offset >> PAGE_SHIFT) { + if (last_page != reloc->cmdbuf.offset >> PAGE_SHIFT) { if (cmdbuf_page_addr) host1x_bo_kunmap(cmdbuf, last_page, cmdbuf_page_addr); cmdbuf_page_addr = host1x_bo_kmap(cmdbuf, - reloc->cmdbuf_offset >> PAGE_SHIFT); - last_page = reloc->cmdbuf_offset >> PAGE_SHIFT; + reloc->cmdbuf.offset >> PAGE_SHIFT); + last_page = reloc->cmdbuf.offset >> PAGE_SHIFT; if (unlikely(!cmdbuf_page_addr)) { pr_err("Could not map cmdbuf for relocation\n"); @@ -257,7 +257,7 @@ static unsigned int do_relocs(struct host1x_job *job, struct host1x_bo *cmdbuf) } } - target = cmdbuf_page_addr + (reloc->cmdbuf_offset & ~PAGE_MASK); + target = cmdbuf_page_addr + (reloc->cmdbuf.offset & ~PAGE_MASK); *target = reloc_addr; } @@ -272,7 +272,7 @@ static bool check_reloc(struct host1x_reloc *reloc, struct host1x_bo *cmdbuf, { offset *= sizeof(u32); - if (reloc->cmdbuf != cmdbuf || reloc->cmdbuf_offset != offset) + if (reloc->cmdbuf.bo != cmdbuf || reloc->cmdbuf.offset != offset) return false; return true; diff --git a/include/linux/host1x.h b/include/linux/host1x.h index d2b52999e771..bb9840fd1e18 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -164,12 +164,15 @@ int host1x_job_submit(struct host1x_job *job); */ struct host1x_reloc { - struct host1x_bo *cmdbuf; - u32 cmdbuf_offset; - struct host1x_bo *target; - u32 target_offset; - u32 shift; - u32 pad; + struct { + struct host1x_bo *bo; + unsigned long offset; + } cmdbuf; + struct { + struct host1x_bo *bo; + unsigned long offset; + } target; + unsigned long shift; }; struct host1x_job { -- cgit v1.2.3 From 31c1e5a1350ae8d1bc2018f5de8264266d9773e1 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Fri, 1 Aug 2014 12:20:10 +0200 Subject: dmaengine: Remove the context argument to the prep_dma_cyclic operation The argument is always set to NULL and never used. Remove it. Signed-off-by: Laurent Pinchart Signed-off-by: Vinod Koul --- drivers/dma/amba-pl08x.c | 2 +- drivers/dma/at_hdmac.c | 3 +-- drivers/dma/bcm2835-dma.c | 2 +- drivers/dma/dma-jz4740.c | 2 +- drivers/dma/edma.c | 2 +- drivers/dma/ep93xx_dma.c | 4 +--- drivers/dma/fsl-edma.c | 2 +- drivers/dma/imx-dma.c | 2 +- drivers/dma/imx-sdma.c | 2 +- drivers/dma/mmp_pdma.c | 2 +- drivers/dma/mmp_tdma.c | 2 +- drivers/dma/mxs-dma.c | 2 +- drivers/dma/omap-dma.c | 3 +-- drivers/dma/pl330.c | 2 +- drivers/dma/s3c24xx-dma.c | 3 +-- drivers/dma/sa11x0-dma.c | 2 +- drivers/dma/sh/shdma-base.c | 2 +- drivers/dma/sirf-dma.c | 2 +- drivers/dma/ste_dma40.c | 3 +-- drivers/dma/tegra20-apb-dma.c | 2 +- include/linux/dmaengine.h | 4 ++-- 21 files changed, 22 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c index 8114731a1c62..48ec81fe1eac 100644 --- a/drivers/dma/amba-pl08x.c +++ b/drivers/dma/amba-pl08x.c @@ -1653,7 +1653,7 @@ static struct dma_async_tx_descriptor *pl08x_prep_slave_sg( static struct dma_async_tx_descriptor *pl08x_prep_dma_cyclic( struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len, size_t period_len, enum dma_transfer_direction direction, - unsigned long flags, void *context) + unsigned long flags) { struct pl08x_dma_chan *plchan = to_pl08x_chan(chan); struct pl08x_driver_data *pl08x = plchan->host; diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index c13a3bb0f594..d20ab1b73a3a 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -893,12 +893,11 @@ atc_dma_cyclic_fill_desc(struct dma_chan *chan, struct at_desc *desc, * @period_len: number of bytes for each period * @direction: transfer direction, to or from device * @flags: tx descriptor status flags - * @context: transfer context (ignored) */ static struct dma_async_tx_descriptor * atc_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len, size_t period_len, enum dma_transfer_direction direction, - unsigned long flags, void *context) + unsigned long flags) { struct at_dma_chan *atchan = to_at_dma_chan(chan); struct at_dma_slave *atslave = chan->private; diff --git a/drivers/dma/bcm2835-dma.c b/drivers/dma/bcm2835-dma.c index a03602164e3e..68007974961a 100644 --- a/drivers/dma/bcm2835-dma.c +++ b/drivers/dma/bcm2835-dma.c @@ -335,7 +335,7 @@ static void bcm2835_dma_issue_pending(struct dma_chan *chan) static struct dma_async_tx_descriptor *bcm2835_dma_prep_dma_cyclic( struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len, size_t period_len, enum dma_transfer_direction direction, - unsigned long flags, void *context) + unsigned long flags) { struct bcm2835_chan *c = to_bcm2835_dma_chan(chan); enum dma_slave_buswidth dev_width; diff --git a/drivers/dma/dma-jz4740.c b/drivers/dma/dma-jz4740.c index bfbce6b07902..6a9d89c93b1f 100644 --- a/drivers/dma/dma-jz4740.c +++ b/drivers/dma/dma-jz4740.c @@ -433,7 +433,7 @@ static struct dma_async_tx_descriptor *jz4740_dma_prep_slave_sg( static struct dma_async_tx_descriptor *jz4740_dma_prep_dma_cyclic( struct dma_chan *c, dma_addr_t buf_addr, size_t buf_len, size_t period_len, enum dma_transfer_direction direction, - unsigned long flags, void *context) + unsigned long flags) { struct jz4740_dmaengine_chan *chan = to_jz4740_dma_chan(c); struct jz4740_dma_desc *desc; diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c index a13f37f719ed..d566650abf62 100644 --- a/drivers/dma/edma.c +++ b/drivers/dma/edma.c @@ -598,7 +598,7 @@ struct dma_async_tx_descriptor *edma_prep_dma_memcpy( static struct dma_async_tx_descriptor *edma_prep_dma_cyclic( struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len, size_t period_len, enum dma_transfer_direction direction, - unsigned long tx_flags, void *context) + unsigned long tx_flags) { struct edma_chan *echan = to_edma_chan(chan); struct device *dev = chan->device->dev; diff --git a/drivers/dma/ep93xx_dma.c b/drivers/dma/ep93xx_dma.c index cb4bf682a708..7650470196c4 100644 --- a/drivers/dma/ep93xx_dma.c +++ b/drivers/dma/ep93xx_dma.c @@ -1092,7 +1092,6 @@ fail: * @period_len: length of a single period * @dir: direction of the operation * @flags: tx descriptor status flags - * @context: operation context (ignored) * * Prepares a descriptor for cyclic DMA operation. This means that once the * descriptor is submitted, we will be submitting in a @period_len sized @@ -1105,8 +1104,7 @@ fail: static struct dma_async_tx_descriptor * ep93xx_dma_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len, size_t period_len, - enum dma_transfer_direction dir, unsigned long flags, - void *context) + enum dma_transfer_direction dir, unsigned long flags) { struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan); struct ep93xx_dma_desc *desc, *first; diff --git a/drivers/dma/fsl-edma.c b/drivers/dma/fsl-edma.c index 24ab3d371954..3c5711d5fe97 100644 --- a/drivers/dma/fsl-edma.c +++ b/drivers/dma/fsl-edma.c @@ -517,7 +517,7 @@ err: static struct dma_async_tx_descriptor *fsl_edma_prep_dma_cyclic( struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len, size_t period_len, enum dma_transfer_direction direction, - unsigned long flags, void *context) + unsigned long flags) { struct fsl_edma_chan *fsl_chan = to_fsl_edma_chan(chan); struct fsl_edma_desc *fsl_desc; diff --git a/drivers/dma/imx-dma.c b/drivers/dma/imx-dma.c index 286660a12cc6..9d2c9e7374dc 100644 --- a/drivers/dma/imx-dma.c +++ b/drivers/dma/imx-dma.c @@ -866,7 +866,7 @@ static struct dma_async_tx_descriptor *imxdma_prep_slave_sg( static struct dma_async_tx_descriptor *imxdma_prep_dma_cyclic( struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len, size_t period_len, enum dma_transfer_direction direction, - unsigned long flags, void *context) + unsigned long flags) { struct imxdma_channel *imxdmac = to_imxdma_chan(chan); struct imxdma_engine *imxdma = imxdmac->imxdma; diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c index de584e605db5..f7626e37d0b8 100644 --- a/drivers/dma/imx-sdma.c +++ b/drivers/dma/imx-sdma.c @@ -1125,7 +1125,7 @@ err_out: static struct dma_async_tx_descriptor *sdma_prep_dma_cyclic( struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len, size_t period_len, enum dma_transfer_direction direction, - unsigned long flags, void *context) + unsigned long flags) { struct sdma_channel *sdmac = to_sdma_chan(chan); struct sdma_engine *sdma = sdmac->sdma; diff --git a/drivers/dma/mmp_pdma.c b/drivers/dma/mmp_pdma.c index a7b186d536b3..a1a4db5721b8 100644 --- a/drivers/dma/mmp_pdma.c +++ b/drivers/dma/mmp_pdma.c @@ -601,7 +601,7 @@ static struct dma_async_tx_descriptor * mmp_pdma_prep_dma_cyclic(struct dma_chan *dchan, dma_addr_t buf_addr, size_t len, size_t period_len, enum dma_transfer_direction direction, - unsigned long flags, void *context) + unsigned long flags) { struct mmp_pdma_chan *chan; struct mmp_pdma_desc_sw *first = NULL, *prev = NULL, *new; diff --git a/drivers/dma/mmp_tdma.c b/drivers/dma/mmp_tdma.c index 724f7f4c9720..6ad30e2c5038 100644 --- a/drivers/dma/mmp_tdma.c +++ b/drivers/dma/mmp_tdma.c @@ -389,7 +389,7 @@ struct mmp_tdma_desc *mmp_tdma_alloc_descriptor(struct mmp_tdma_chan *tdmac) static struct dma_async_tx_descriptor *mmp_tdma_prep_dma_cyclic( struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len, size_t period_len, enum dma_transfer_direction direction, - unsigned long flags, void *context) + unsigned long flags) { struct mmp_tdma_chan *tdmac = to_mmp_tdma_chan(chan); struct mmp_tdma_desc *desc; diff --git a/drivers/dma/mxs-dma.c b/drivers/dma/mxs-dma.c index dc1dba78e529..5ea61201dbf0 100644 --- a/drivers/dma/mxs-dma.c +++ b/drivers/dma/mxs-dma.c @@ -589,7 +589,7 @@ err_out: static struct dma_async_tx_descriptor *mxs_dma_prep_dma_cyclic( struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len, size_t period_len, enum dma_transfer_direction direction, - unsigned long flags, void *context) + unsigned long flags) { struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan); struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma; diff --git a/drivers/dma/omap-dma.c b/drivers/dma/omap-dma.c index b19f04f4390b..4cf7d9a950d7 100644 --- a/drivers/dma/omap-dma.c +++ b/drivers/dma/omap-dma.c @@ -853,8 +853,7 @@ static struct dma_async_tx_descriptor *omap_dma_prep_slave_sg( static struct dma_async_tx_descriptor *omap_dma_prep_dma_cyclic( struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len, - size_t period_len, enum dma_transfer_direction dir, unsigned long flags, - void *context) + size_t period_len, enum dma_transfer_direction dir, unsigned long flags) { struct omap_dmadev *od = to_omap_dma_dev(chan->device); struct omap_chan *c = to_omap_dma_chan(chan); diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index a55d75498098..d5149aacd2fe 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -2362,7 +2362,7 @@ static inline int get_burst_len(struct dma_pl330_desc *desc, size_t len) static struct dma_async_tx_descriptor *pl330_prep_dma_cyclic( struct dma_chan *chan, dma_addr_t dma_addr, size_t len, size_t period_len, enum dma_transfer_direction direction, - unsigned long flags, void *context) + unsigned long flags) { struct dma_pl330_desc *desc = NULL, *first = NULL; struct dma_pl330_chan *pch = to_pchan(chan); diff --git a/drivers/dma/s3c24xx-dma.c b/drivers/dma/s3c24xx-dma.c index 012520c9fd79..7416572d1e40 100644 --- a/drivers/dma/s3c24xx-dma.c +++ b/drivers/dma/s3c24xx-dma.c @@ -889,8 +889,7 @@ static struct dma_async_tx_descriptor *s3c24xx_dma_prep_memcpy( static struct dma_async_tx_descriptor *s3c24xx_dma_prep_dma_cyclic( struct dma_chan *chan, dma_addr_t addr, size_t size, size_t period, - enum dma_transfer_direction direction, unsigned long flags, - void *context) + enum dma_transfer_direction direction, unsigned long flags) { struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan); struct s3c24xx_dma_engine *s3cdma = s3cchan->host; diff --git a/drivers/dma/sa11x0-dma.c b/drivers/dma/sa11x0-dma.c index 5ebdfbc1051e..4b0ef043729a 100644 --- a/drivers/dma/sa11x0-dma.c +++ b/drivers/dma/sa11x0-dma.c @@ -612,7 +612,7 @@ static struct dma_async_tx_descriptor *sa11x0_dma_prep_slave_sg( static struct dma_async_tx_descriptor *sa11x0_dma_prep_dma_cyclic( struct dma_chan *chan, dma_addr_t addr, size_t size, size_t period, - enum dma_transfer_direction dir, unsigned long flags, void *context) + enum dma_transfer_direction dir, unsigned long flags) { struct sa11x0_dma_chan *c = to_sa11x0_dma_chan(chan); struct sa11x0_dma_desc *txd; diff --git a/drivers/dma/sh/shdma-base.c b/drivers/dma/sh/shdma-base.c index e427a03a0e8b..42d497416196 100644 --- a/drivers/dma/sh/shdma-base.c +++ b/drivers/dma/sh/shdma-base.c @@ -668,7 +668,7 @@ static struct dma_async_tx_descriptor *shdma_prep_slave_sg( static struct dma_async_tx_descriptor *shdma_prep_dma_cyclic( struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len, size_t period_len, enum dma_transfer_direction direction, - unsigned long flags, void *context) + unsigned long flags) { struct shdma_chan *schan = to_shdma_chan(chan); struct shdma_dev *sdev = to_shdma_dev(schan->dma_chan.device); diff --git a/drivers/dma/sirf-dma.c b/drivers/dma/sirf-dma.c index 03f7820fa333..aac03ab10c54 100644 --- a/drivers/dma/sirf-dma.c +++ b/drivers/dma/sirf-dma.c @@ -580,7 +580,7 @@ err_dir: static struct dma_async_tx_descriptor * sirfsoc_dma_prep_cyclic(struct dma_chan *chan, dma_addr_t addr, size_t buf_len, size_t period_len, - enum dma_transfer_direction direction, unsigned long flags, void *context) + enum dma_transfer_direction direction, unsigned long flags) { struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan); struct sirfsoc_dma_desc *sdesc = NULL; diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c index c7984459ede7..5fe59335e247 100644 --- a/drivers/dma/ste_dma40.c +++ b/drivers/dma/ste_dma40.c @@ -2531,8 +2531,7 @@ d40_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, static struct dma_async_tx_descriptor * dma40_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len, size_t period_len, - enum dma_transfer_direction direction, unsigned long flags, - void *context) + enum dma_transfer_direction direction, unsigned long flags) { unsigned int periods = buf_len / period_len; struct dma_async_tx_descriptor *txd; diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c index 03ad64ecaaf0..16efa603ff65 100644 --- a/drivers/dma/tegra20-apb-dma.c +++ b/drivers/dma/tegra20-apb-dma.c @@ -1055,7 +1055,7 @@ static struct dma_async_tx_descriptor *tegra_dma_prep_slave_sg( static struct dma_async_tx_descriptor *tegra_dma_prep_dma_cyclic( struct dma_chan *dc, dma_addr_t buf_addr, size_t buf_len, size_t period_len, enum dma_transfer_direction direction, - unsigned long flags, void *context) + unsigned long flags) { struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc); struct tegra_dma_desc *dma_desc = NULL; diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 4eb2f82aed1d..94ddccd706fc 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -669,7 +669,7 @@ struct dma_device { struct dma_async_tx_descriptor *(*device_prep_dma_cyclic)( struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len, size_t period_len, enum dma_transfer_direction direction, - unsigned long flags, void *context); + unsigned long flags); struct dma_async_tx_descriptor *(*device_prep_interleaved_dma)( struct dma_chan *chan, struct dma_interleaved_template *xt, unsigned long flags); @@ -744,7 +744,7 @@ static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_cyclic( unsigned long flags) { return chan->device->device_prep_dma_cyclic(chan, buf_addr, buf_len, - period_len, dir, flags, NULL); + period_len, dir, flags); } static inline struct dma_async_tx_descriptor *dmaengine_prep_interleaved_dma( -- cgit v1.2.3 From 0097875bd41528922fb3bb5f348c53f17e00e2fd Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 31 Jul 2014 03:10:50 -0700 Subject: proc: Implement /proc/thread-self to point at the directory of the current thread /proc/thread-self is derived from /proc/self. /proc/thread-self points to the directory in proc containing information about the current thread. This funtionality has been missing for a long time, and is tricky to implement in userspace as gettid() is not exported by glibc. More importantly this allows fixing defects in /proc/mounts and /proc/net where in a threaded application today they wind up being empty files when only the initial pthread has exited, causing problems for other threads. Signed-off-by: "Eric W. Biederman" --- fs/proc/Makefile | 1 + fs/proc/base.c | 15 +++++--- fs/proc/inode.c | 7 +++- fs/proc/internal.h | 6 +++ fs/proc/root.c | 3 ++ fs/proc/thread_self.c | 85 +++++++++++++++++++++++++++++++++++++++++++ include/linux/pid_namespace.h | 1 + 7 files changed, 112 insertions(+), 6 deletions(-) create mode 100644 fs/proc/thread_self.c (limited to 'include/linux') diff --git a/fs/proc/Makefile b/fs/proc/Makefile index 239493ec718e..7151ea428041 100644 --- a/fs/proc/Makefile +++ b/fs/proc/Makefile @@ -23,6 +23,7 @@ proc-y += version.o proc-y += softirqs.o proc-y += namespaces.o proc-y += self.o +proc-y += thread_self.o proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o proc-$(CONFIG_NET) += proc_net.o proc-$(CONFIG_PROC_KCORE) += kcore.o diff --git a/fs/proc/base.c b/fs/proc/base.c index ed34e405c6b9..0131156ce7c9 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2847,7 +2847,7 @@ retry: return iter; } -#define TGID_OFFSET (FIRST_PROCESS_ENTRY + 1) +#define TGID_OFFSET (FIRST_PROCESS_ENTRY + 2) /* for the /proc/ directory itself, after non-process stuff has been done */ int proc_pid_readdir(struct file *file, struct dir_context *ctx) @@ -2859,14 +2859,19 @@ int proc_pid_readdir(struct file *file, struct dir_context *ctx) if (pos >= PID_MAX_LIMIT + TGID_OFFSET) return 0; - if (pos == TGID_OFFSET - 1) { + if (pos == TGID_OFFSET - 2) { struct inode *inode = ns->proc_self->d_inode; if (!dir_emit(ctx, "self", 4, inode->i_ino, DT_LNK)) return 0; - iter.tgid = 0; - } else { - iter.tgid = pos - TGID_OFFSET; + ctx->pos = pos = pos + 1; + } + if (pos == TGID_OFFSET - 1) { + struct inode *inode = ns->proc_thread_self->d_inode; + if (!dir_emit(ctx, "thread-self", 11, inode->i_ino, DT_LNK)) + return 0; + ctx->pos = pos = pos + 1; } + iter.tgid = pos - TGID_OFFSET; iter.task = NULL; for (iter = next_tgid(ns, iter); iter.task; diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 0adbc02d60e3..333080d7a671 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -442,6 +442,7 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de) int proc_fill_super(struct super_block *s) { struct inode *root_inode; + int ret; s->s_flags |= MS_NODIRATIME | MS_NOSUID | MS_NOEXEC; s->s_blocksize = 1024; @@ -463,5 +464,9 @@ int proc_fill_super(struct super_block *s) return -ENOMEM; } - return proc_setup_self(s); + ret = proc_setup_self(s); + if (ret) { + return ret; + } + return proc_setup_thread_self(s); } diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 3ab6d14e71c5..ee04619173b2 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -233,6 +233,12 @@ static inline int proc_net_init(void) { return 0; } */ extern int proc_setup_self(struct super_block *); +/* + * proc_thread_self.c + */ +extern int proc_setup_thread_self(struct super_block *); +extern void proc_thread_self_init(void); + /* * proc_sysctl.c */ diff --git a/fs/proc/root.c b/fs/proc/root.c index 5dbadecb234d..48f1c03bc7ed 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -149,6 +149,8 @@ static void proc_kill_sb(struct super_block *sb) ns = (struct pid_namespace *)sb->s_fs_info; if (ns->proc_self) dput(ns->proc_self); + if (ns->proc_thread_self) + dput(ns->proc_thread_self); kill_anon_super(sb); put_pid_ns(ns); } @@ -170,6 +172,7 @@ void __init proc_root_init(void) return; proc_self_init(); + proc_thread_self_init(); proc_symlink("mounts", NULL, "self/mounts"); proc_net_init(); diff --git a/fs/proc/thread_self.c b/fs/proc/thread_self.c new file mode 100644 index 000000000000..59075b509df3 --- /dev/null +++ b/fs/proc/thread_self.c @@ -0,0 +1,85 @@ +#include +#include +#include +#include +#include "internal.h" + +/* + * /proc/thread_self: + */ +static int proc_thread_self_readlink(struct dentry *dentry, char __user *buffer, + int buflen) +{ + struct pid_namespace *ns = dentry->d_sb->s_fs_info; + pid_t tgid = task_tgid_nr_ns(current, ns); + pid_t pid = task_pid_nr_ns(current, ns); + char tmp[PROC_NUMBUF + 6 + PROC_NUMBUF]; + if (!pid) + return -ENOENT; + sprintf(tmp, "%d/task/%d", tgid, pid); + return readlink_copy(buffer, buflen, tmp); +} + +static void *proc_thread_self_follow_link(struct dentry *dentry, struct nameidata *nd) +{ + struct pid_namespace *ns = dentry->d_sb->s_fs_info; + pid_t tgid = task_tgid_nr_ns(current, ns); + pid_t pid = task_pid_nr_ns(current, ns); + char *name = ERR_PTR(-ENOENT); + if (pid) { + name = kmalloc(PROC_NUMBUF + 6 + PROC_NUMBUF, GFP_KERNEL); + if (!name) + name = ERR_PTR(-ENOMEM); + else + sprintf(name, "%d/task/%d", tgid, pid); + } + nd_set_link(nd, name); + return NULL; +} + +static const struct inode_operations proc_thread_self_inode_operations = { + .readlink = proc_thread_self_readlink, + .follow_link = proc_thread_self_follow_link, + .put_link = kfree_put_link, +}; + +static unsigned thread_self_inum; + +int proc_setup_thread_self(struct super_block *s) +{ + struct inode *root_inode = s->s_root->d_inode; + struct pid_namespace *ns = s->s_fs_info; + struct dentry *thread_self; + + mutex_lock(&root_inode->i_mutex); + thread_self = d_alloc_name(s->s_root, "thread-self"); + if (thread_self) { + struct inode *inode = new_inode_pseudo(s); + if (inode) { + inode->i_ino = thread_self_inum; + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; + inode->i_mode = S_IFLNK | S_IRWXUGO; + inode->i_uid = GLOBAL_ROOT_UID; + inode->i_gid = GLOBAL_ROOT_GID; + inode->i_op = &proc_thread_self_inode_operations; + d_add(thread_self, inode); + } else { + dput(thread_self); + thread_self = ERR_PTR(-ENOMEM); + } + } else { + thread_self = ERR_PTR(-ENOMEM); + } + mutex_unlock(&root_inode->i_mutex); + if (IS_ERR(thread_self)) { + pr_err("proc_fill_super: can't allocate /proc/thread_self\n"); + return PTR_ERR(thread_self); + } + ns->proc_thread_self = thread_self; + return 0; +} + +void __init proc_thread_self_init(void) +{ + proc_alloc_inum(&thread_self_inum); +} diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index 7246ef3d4455..1997ffc295a7 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -33,6 +33,7 @@ struct pid_namespace { #ifdef CONFIG_PROC_FS struct vfsmount *proc_mnt; struct dentry *proc_self; + struct dentry *proc_thread_self; #endif #ifdef CONFIG_BSD_PROCESS_ACCT struct bsd_acct_struct *bacct; -- cgit v1.2.3 From 1b69be5e8afc634f39ad695a6ab6aad0cf0975c7 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 10 Jun 2014 11:41:57 +1000 Subject: drivers/vfio: EEH support for VFIO PCI device The patch adds new IOCTL commands for sPAPR VFIO container device to support EEH functionality for PCI devices, which have been passed through from host to somebody else via VFIO. Signed-off-by: Gavin Shan Acked-by: Alexander Graf Acked-by: Alex Williamson Signed-off-by: Benjamin Herrenschmidt --- Documentation/vfio.txt | 87 +++++++++++++++++++++++++++++++++++-- drivers/vfio/Makefile | 1 + drivers/vfio/pci/vfio_pci.c | 18 ++++++-- drivers/vfio/vfio_iommu_spapr_tce.c | 17 +++++++- drivers/vfio/vfio_spapr_eeh.c | 87 +++++++++++++++++++++++++++++++++++++ include/linux/vfio.h | 23 ++++++++++ include/uapi/linux/vfio.h | 34 +++++++++++++++ 7 files changed, 259 insertions(+), 8 deletions(-) create mode 100644 drivers/vfio/vfio_spapr_eeh.c (limited to 'include/linux') diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt index b9ca02370d46..96978eced341 100644 --- a/Documentation/vfio.txt +++ b/Documentation/vfio.txt @@ -305,7 +305,15 @@ faster, the map/unmap handling has been implemented in real mode which provides an excellent performance which has limitations such as inability to do locked pages accounting in real time. -So 3 additional ioctls have been added: +4) According to sPAPR specification, A Partitionable Endpoint (PE) is an I/O +subtree that can be treated as a unit for the purposes of partitioning and +error recovery. A PE may be a single or multi-function IOA (IO Adapter), a +function of a multi-function IOA, or multiple IOAs (possibly including switch +and bridge structures above the multiple IOAs). PPC64 guests detect PCI errors +and recover from them via EEH RTAS services, which works on the basis of +additional ioctl commands. + +So 4 additional ioctls have been added: VFIO_IOMMU_SPAPR_TCE_GET_INFO - returns the size and the start of the DMA window on the PCI bus. @@ -316,9 +324,12 @@ So 3 additional ioctls have been added: VFIO_IOMMU_DISABLE - disables the container. + VFIO_EEH_PE_OP - provides an API for EEH setup, error detection and recovery. The code flow from the example above should be slightly changed: + struct vfio_eeh_pe_op pe_op = { .argsz = sizeof(pe_op), .flags = 0 }; + ..... /* Add the group to the container */ ioctl(group, VFIO_GROUP_SET_CONTAINER, &container); @@ -342,9 +353,79 @@ The code flow from the example above should be slightly changed: dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE; /* Check here is .iova/.size are within DMA window from spapr_iommu_info */ - ioctl(container, VFIO_IOMMU_MAP_DMA, &dma_map); - ..... + + /* Get a file descriptor for the device */ + device = ioctl(group, VFIO_GROUP_GET_DEVICE_FD, "0000:06:0d.0"); + + .... + + /* Gratuitous device reset and go... */ + ioctl(device, VFIO_DEVICE_RESET); + + /* Make sure EEH is supported */ + ioctl(container, VFIO_CHECK_EXTENSION, VFIO_EEH); + + /* Enable the EEH functionality on the device */ + pe_op.op = VFIO_EEH_PE_ENABLE; + ioctl(container, VFIO_EEH_PE_OP, &pe_op); + + /* You're suggested to create additional data struct to represent + * PE, and put child devices belonging to same IOMMU group to the + * PE instance for later reference. + */ + + /* Check the PE's state and make sure it's in functional state */ + pe_op.op = VFIO_EEH_PE_GET_STATE; + ioctl(container, VFIO_EEH_PE_OP, &pe_op); + + /* Save device state using pci_save_state(). + * EEH should be enabled on the specified device. + */ + + .... + + /* When 0xFF's returned from reading PCI config space or IO BARs + * of the PCI device. Check the PE's state to see if that has been + * frozen. + */ + ioctl(container, VFIO_EEH_PE_OP, &pe_op); + + /* Waiting for pending PCI transactions to be completed and don't + * produce any more PCI traffic from/to the affected PE until + * recovery is finished. + */ + + /* Enable IO for the affected PE and collect logs. Usually, the + * standard part of PCI config space, AER registers are dumped + * as logs for further analysis. + */ + pe_op.op = VFIO_EEH_PE_UNFREEZE_IO; + ioctl(container, VFIO_EEH_PE_OP, &pe_op); + + /* + * Issue PE reset: hot or fundamental reset. Usually, hot reset + * is enough. However, the firmware of some PCI adapters would + * require fundamental reset. + */ + pe_op.op = VFIO_EEH_PE_RESET_HOT; + ioctl(container, VFIO_EEH_PE_OP, &pe_op); + pe_op.op = VFIO_EEH_PE_RESET_DEACTIVATE; + ioctl(container, VFIO_EEH_PE_OP, &pe_op); + + /* Configure the PCI bridges for the affected PE */ + pe_op.op = VFIO_EEH_PE_CONFIGURE; + ioctl(container, VFIO_EEH_PE_OP, &pe_op); + + /* Restored state we saved at initialization time. pci_restore_state() + * is good enough as an example. + */ + + /* Hopefully, error is recovered successfully. Now, you can resume to + * start PCI traffic to/from the affected PE. + */ + + .... ------------------------------------------------------------------------------- diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile index 72bfabc8629e..50e30bc75e85 100644 --- a/drivers/vfio/Makefile +++ b/drivers/vfio/Makefile @@ -1,4 +1,5 @@ obj-$(CONFIG_VFIO) += vfio.o obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o +obj-$(CONFIG_EEH) += vfio_spapr_eeh.o obj-$(CONFIG_VFIO_PCI) += pci/ diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 010e0f8b8e4f..e2ee80f36e3e 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -157,8 +157,10 @@ static void vfio_pci_release(void *device_data) { struct vfio_pci_device *vdev = device_data; - if (atomic_dec_and_test(&vdev->refcnt)) + if (atomic_dec_and_test(&vdev->refcnt)) { + vfio_spapr_pci_eeh_release(vdev->pdev); vfio_pci_disable(vdev); + } module_put(THIS_MODULE); } @@ -166,19 +168,27 @@ static void vfio_pci_release(void *device_data) static int vfio_pci_open(void *device_data) { struct vfio_pci_device *vdev = device_data; + int ret; if (!try_module_get(THIS_MODULE)) return -ENODEV; if (atomic_inc_return(&vdev->refcnt) == 1) { - int ret = vfio_pci_enable(vdev); + ret = vfio_pci_enable(vdev); + if (ret) + goto error; + + ret = vfio_spapr_pci_eeh_open(vdev->pdev); if (ret) { - module_put(THIS_MODULE); - return ret; + vfio_pci_disable(vdev); + goto error; } } return 0; +error: + module_put(THIS_MODULE); + return ret; } static int vfio_pci_get_irq_count(struct vfio_pci_device *vdev, int irq_type) diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index a84788ba662c..730b4ef3e0cc 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -156,7 +156,16 @@ static long tce_iommu_ioctl(void *iommu_data, switch (cmd) { case VFIO_CHECK_EXTENSION: - return (arg == VFIO_SPAPR_TCE_IOMMU) ? 1 : 0; + switch (arg) { + case VFIO_SPAPR_TCE_IOMMU: + ret = 1; + break; + default: + ret = vfio_spapr_iommu_eeh_ioctl(NULL, cmd, arg); + break; + } + + return (ret < 0) ? 0 : ret; case VFIO_IOMMU_SPAPR_TCE_GET_INFO: { struct vfio_iommu_spapr_tce_info info; @@ -283,6 +292,12 @@ static long tce_iommu_ioctl(void *iommu_data, tce_iommu_disable(container); mutex_unlock(&container->lock); return 0; + case VFIO_EEH_PE_OP: + if (!container->tbl || !container->tbl->it_group) + return -ENODEV; + + return vfio_spapr_iommu_eeh_ioctl(container->tbl->it_group, + cmd, arg); } return -ENOTTY; diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c new file mode 100644 index 000000000000..f834b4ce1431 --- /dev/null +++ b/drivers/vfio/vfio_spapr_eeh.c @@ -0,0 +1,87 @@ +/* + * EEH functionality support for VFIO devices. The feature is only + * available on sPAPR compatible platforms. + * + * Copyright Gavin Shan, IBM Corporation 2014. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include + +/* We might build address mapping here for "fast" path later */ +int vfio_spapr_pci_eeh_open(struct pci_dev *pdev) +{ + return eeh_dev_open(pdev); +} + +void vfio_spapr_pci_eeh_release(struct pci_dev *pdev) +{ + eeh_dev_release(pdev); +} + +long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group, + unsigned int cmd, unsigned long arg) +{ + struct eeh_pe *pe; + struct vfio_eeh_pe_op op; + unsigned long minsz; + long ret = -EINVAL; + + switch (cmd) { + case VFIO_CHECK_EXTENSION: + if (arg == VFIO_EEH) + ret = eeh_enabled() ? 1 : 0; + else + ret = 0; + break; + case VFIO_EEH_PE_OP: + pe = eeh_iommu_group_to_pe(group); + if (!pe) + return -ENODEV; + + minsz = offsetofend(struct vfio_eeh_pe_op, op); + if (copy_from_user(&op, (void __user *)arg, minsz)) + return -EFAULT; + if (op.argsz < minsz || op.flags) + return -EINVAL; + + switch (op.op) { + case VFIO_EEH_PE_DISABLE: + ret = eeh_pe_set_option(pe, EEH_OPT_DISABLE); + break; + case VFIO_EEH_PE_ENABLE: + ret = eeh_pe_set_option(pe, EEH_OPT_ENABLE); + break; + case VFIO_EEH_PE_UNFREEZE_IO: + ret = eeh_pe_set_option(pe, EEH_OPT_THAW_MMIO); + break; + case VFIO_EEH_PE_UNFREEZE_DMA: + ret = eeh_pe_set_option(pe, EEH_OPT_THAW_DMA); + break; + case VFIO_EEH_PE_GET_STATE: + ret = eeh_pe_get_state(pe); + break; + case VFIO_EEH_PE_RESET_DEACTIVATE: + ret = eeh_pe_reset(pe, EEH_RESET_DEACTIVATE); + break; + case VFIO_EEH_PE_RESET_HOT: + ret = eeh_pe_reset(pe, EEH_RESET_HOT); + break; + case VFIO_EEH_PE_RESET_FUNDAMENTAL: + ret = eeh_pe_reset(pe, EEH_RESET_FUNDAMENTAL); + break; + case VFIO_EEH_PE_CONFIGURE: + ret = eeh_pe_configure(pe); + break; + default: + ret = -EINVAL; + } + } + + return ret; +} diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 8ec980b5e3af..25a0fbd4b998 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -98,4 +98,27 @@ extern int vfio_external_user_iommu_id(struct vfio_group *group); extern long vfio_external_check_extension(struct vfio_group *group, unsigned long arg); +#ifdef CONFIG_EEH +extern int vfio_spapr_pci_eeh_open(struct pci_dev *pdev); +extern void vfio_spapr_pci_eeh_release(struct pci_dev *pdev); +extern long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group, + unsigned int cmd, + unsigned long arg); +#else +static inline int vfio_spapr_pci_eeh_open(struct pci_dev *pdev) +{ + return 0; +} + +static inline void vfio_spapr_pci_eeh_release(struct pci_dev *pdev) +{ +} + +static inline long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group, + unsigned int cmd, + unsigned long arg) +{ + return -ENOTTY; +} +#endif /* CONFIG_EEH */ #endif /* VFIO_H */ diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index cb9023d4f063..6612974c64bf 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -30,6 +30,9 @@ */ #define VFIO_DMA_CC_IOMMU 4 +/* Check if EEH is supported */ +#define VFIO_EEH 5 + /* * The IOCTL interface is designed for extensibility by embedding the * structure length (argsz) and flags into structures passed between @@ -455,6 +458,37 @@ struct vfio_iommu_spapr_tce_info { #define VFIO_IOMMU_SPAPR_TCE_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12) +/* + * EEH PE operation struct provides ways to: + * - enable/disable EEH functionality; + * - unfreeze IO/DMA for frozen PE; + * - read PE state; + * - reset PE; + * - configure PE. + */ +struct vfio_eeh_pe_op { + __u32 argsz; + __u32 flags; + __u32 op; +}; + +#define VFIO_EEH_PE_DISABLE 0 /* Disable EEH functionality */ +#define VFIO_EEH_PE_ENABLE 1 /* Enable EEH functionality */ +#define VFIO_EEH_PE_UNFREEZE_IO 2 /* Enable IO for frozen PE */ +#define VFIO_EEH_PE_UNFREEZE_DMA 3 /* Enable DMA for frozen PE */ +#define VFIO_EEH_PE_GET_STATE 4 /* PE state retrieval */ +#define VFIO_EEH_PE_STATE_NORMAL 0 /* PE in functional state */ +#define VFIO_EEH_PE_STATE_RESET 1 /* PE reset in progress */ +#define VFIO_EEH_PE_STATE_STOPPED 2 /* Stopped DMA and IO */ +#define VFIO_EEH_PE_STATE_STOPPED_DMA 4 /* Stopped DMA only */ +#define VFIO_EEH_PE_STATE_UNAVAIL 5 /* State unavailable */ +#define VFIO_EEH_PE_RESET_DEACTIVATE 5 /* Deassert PE reset */ +#define VFIO_EEH_PE_RESET_HOT 6 /* Assert hot reset */ +#define VFIO_EEH_PE_RESET_FUNDAMENTAL 7 /* Assert fundamental reset */ +#define VFIO_EEH_PE_CONFIGURE 8 /* PE configuration */ + +#define VFIO_EEH_PE_OP _IO(VFIO_TYPE, VFIO_BASE + 21) + /* ***************************************************************** */ #endif /* _UAPIVFIO_H */ -- cgit v1.2.3 From 8ba918d488caded2c4368b0b922eb905fe3bb101 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 30 Jun 2014 20:51:10 +1000 Subject: KVM: irqchip: Provide and use accessors for irq routing table This provides accessor functions for the KVM interrupt mappings, in order to reduce the amount of code that accesses the fields of the kvm_irq_routing_table struct, and restrict that code to one file, virt/kvm/irqchip.c. The new functions are kvm_irq_map_gsi(), which maps from a global interrupt number to a set of IRQ routing entries, and kvm_irq_map_chip_pin, which maps from IRQ chip and pin numbers to a global interrupt number. This also moves the update of kvm_irq_routing_table::chip[][] into irqchip.c, out of the various kvm_set_routing_entry implementations. That means that none of the kvm_set_routing_entry implementations need the kvm_irq_routing_table argument anymore, so this removes it. This does not change any locking or data lifetime rules. Signed-off-by: Paul Mackerras Tested-by: Eric Auger Tested-by: Cornelia Huck Signed-off-by: Paolo Bonzini --- arch/powerpc/kvm/mpic.c | 4 +--- arch/s390/kvm/interrupt.c | 3 +-- include/linux/kvm_host.h | 8 ++++++-- virt/kvm/eventfd.c | 10 ++++++---- virt/kvm/irq_comm.c | 20 +++++++++----------- virt/kvm/irqchip.c | 42 ++++++++++++++++++++++++++++++++++-------- 6 files changed, 57 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c index b68d0dc9479a..39b3a8f816f2 100644 --- a/arch/powerpc/kvm/mpic.c +++ b/arch/powerpc/kvm/mpic.c @@ -1826,8 +1826,7 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, return 0; } -int kvm_set_routing_entry(struct kvm_irq_routing_table *rt, - struct kvm_kernel_irq_routing_entry *e, +int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue) { int r = -EINVAL; @@ -1839,7 +1838,6 @@ int kvm_set_routing_entry(struct kvm_irq_routing_table *rt, e->irqchip.pin = ue->u.irqchip.pin; if (e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS) goto out; - rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi; break; case KVM_IRQ_ROUTING_MSI: e->set = kvm_set_msi; diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 92528a0bdda6..f4c819bfc193 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -1556,8 +1556,7 @@ static int set_adapter_int(struct kvm_kernel_irq_routing_entry *e, return ret; } -int kvm_set_routing_entry(struct kvm_irq_routing_table *rt, - struct kvm_kernel_irq_routing_entry *e, +int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue) { int ret; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 5065b953e6e8..4956149e962a 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -752,6 +752,11 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, bool mask); +int kvm_irq_map_gsi(struct kvm_kernel_irq_routing_entry *entries, + struct kvm_irq_routing_table *irq_rt, int gsi); +int kvm_irq_map_chip_pin(struct kvm_irq_routing_table *irq_rt, + unsigned irqchip, unsigned pin); + int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, bool line_status); int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level); @@ -942,8 +947,7 @@ int kvm_set_irq_routing(struct kvm *kvm, const struct kvm_irq_routing_entry *entries, unsigned nr, unsigned flags); -int kvm_set_routing_entry(struct kvm_irq_routing_table *rt, - struct kvm_kernel_irq_routing_entry *e, +int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue); void kvm_free_irq_routing(struct kvm *kvm); diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index bae593a545c5..15fa9488b2d0 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -282,20 +282,22 @@ static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd, struct kvm_irq_routing_table *irq_rt) { struct kvm_kernel_irq_routing_entry *e; + struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS]; + int i, n_entries; + + n_entries = kvm_irq_map_gsi(entries, irq_rt, irqfd->gsi); write_seqcount_begin(&irqfd->irq_entry_sc); irqfd->irq_entry.type = 0; - if (irqfd->gsi >= irq_rt->nr_rt_entries) - goto out; - hlist_for_each_entry(e, &irq_rt->map[irqfd->gsi], link) { + e = entries; + for (i = 0; i < n_entries; ++i, ++e) { /* Only fast-path MSI. */ if (e->type == KVM_IRQ_ROUTING_MSI) irqfd->irq_entry = *e; } - out: write_seqcount_end(&irqfd->irq_entry_sc); } diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index a228ee82bad2..175844593243 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -160,6 +160,7 @@ static int kvm_set_msi_inatomic(struct kvm_kernel_irq_routing_entry *e, */ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level) { + struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS]; struct kvm_kernel_irq_routing_entry *e; int ret = -EINVAL; struct kvm_irq_routing_table *irq_rt; @@ -177,14 +178,13 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level) */ idx = srcu_read_lock(&kvm->irq_srcu); irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); - if (irq < irq_rt->nr_rt_entries) - hlist_for_each_entry(e, &irq_rt->map[irq], link) { - if (likely(e->type == KVM_IRQ_ROUTING_MSI)) - ret = kvm_set_msi_inatomic(e, kvm); - else - ret = -EWOULDBLOCK; - break; - } + if (kvm_irq_map_gsi(entries, irq_rt, irq) > 0) { + e = &entries[0]; + if (likely(e->type == KVM_IRQ_ROUTING_MSI)) + ret = kvm_set_msi_inatomic(e, kvm); + else + ret = -EWOULDBLOCK; + } srcu_read_unlock(&kvm->irq_srcu, idx); return ret; } @@ -272,8 +272,7 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, srcu_read_unlock(&kvm->irq_srcu, idx); } -int kvm_set_routing_entry(struct kvm_irq_routing_table *rt, - struct kvm_kernel_irq_routing_entry *e, +int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue) { int r = -EINVAL; @@ -304,7 +303,6 @@ int kvm_set_routing_entry(struct kvm_irq_routing_table *rt, e->irqchip.pin = ue->u.irqchip.pin + delta; if (e->irqchip.pin >= max_pin) goto out; - rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi; break; case KVM_IRQ_ROUTING_MSI: e->set = kvm_set_msi; diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index b43c275775cd..f4648dd94888 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c @@ -31,13 +31,37 @@ #include #include "irq.h" +int kvm_irq_map_gsi(struct kvm_kernel_irq_routing_entry *entries, + struct kvm_irq_routing_table *irq_rt, int gsi) +{ + struct kvm_kernel_irq_routing_entry *e; + int n = 0; + + if (gsi < irq_rt->nr_rt_entries) { + hlist_for_each_entry(e, &irq_rt->map[gsi], link) { + entries[n] = *e; + ++n; + } + } + + return n; +} + +int kvm_irq_map_chip_pin(struct kvm_irq_routing_table *irq_rt, + unsigned irqchip, unsigned pin) +{ + return irq_rt->chip[irqchip][pin]; +} + bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) { + struct kvm_irq_routing_table *irq_rt; struct kvm_irq_ack_notifier *kian; int gsi, idx; idx = srcu_read_lock(&kvm->irq_srcu); - gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin]; + irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); + gsi = kvm_irq_map_chip_pin(irq_rt, irqchip, pin); if (gsi != -1) hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, link) @@ -54,13 +78,15 @@ EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) { + struct kvm_irq_routing_table *irq_rt; struct kvm_irq_ack_notifier *kian; int gsi, idx; trace_kvm_ack_irq(irqchip, pin); idx = srcu_read_lock(&kvm->irq_srcu); - gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin]; + irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); + gsi = kvm_irq_map_chip_pin(irq_rt, irqchip, pin); if (gsi != -1) hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, link) @@ -115,8 +141,8 @@ int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi) int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, bool line_status) { - struct kvm_kernel_irq_routing_entry *e, irq_set[KVM_NR_IRQCHIPS]; - int ret = -1, i = 0, idx; + struct kvm_kernel_irq_routing_entry irq_set[KVM_NR_IRQCHIPS]; + int ret = -1, i, idx; struct kvm_irq_routing_table *irq_rt; trace_kvm_set_irq(irq, level, irq_source_id); @@ -127,9 +153,7 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, */ idx = srcu_read_lock(&kvm->irq_srcu); irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); - if (irq < irq_rt->nr_rt_entries) - hlist_for_each_entry(e, &irq_rt->map[irq], link) - irq_set[i++] = *e; + i = kvm_irq_map_gsi(irq_set, irq_rt, irq); srcu_read_unlock(&kvm->irq_srcu, idx); while(i--) { @@ -171,9 +195,11 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt, e->gsi = ue->gsi; e->type = ue->type; - r = kvm_set_routing_entry(rt, e, ue); + r = kvm_set_routing_entry(e, ue); if (r) goto out; + if (e->type == KVM_IRQ_ROUTING_IRQCHIP) + rt->chip[e->irqchip.irqchip][e->irqchip.pin] = e->gsi; hlist_add_head(&e->link, &rt->map[e->gsi]); r = 0; -- cgit v1.2.3 From 9957c86d659a4d5a2bed25ccbd3bfc9c3f25e658 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 30 Jun 2014 20:51:11 +1000 Subject: KVM: Move all accesses to kvm::irq_routing into irqchip.c Now that struct _irqfd does not keep a reference to storage pointed to by the irq_routing field of struct kvm, we can move the statement that updates it out from under the irqfds.lock and put it in kvm_set_irq_routing() instead. That means we then have to take a srcu_read_lock on kvm->irq_srcu around the irqfd_update call in kvm_irqfd_assign(), since holding the kvm->irqfds.lock no longer ensures that that the routing can't change. Combined with changing kvm_irq_map_gsi() and kvm_irq_map_chip_pin() to take a struct kvm * argument instead of the pointer to the routing table, this allows us to to move all references to kvm->irq_routing into irqchip.c. That in turn allows us to move the definition of the kvm_irq_routing_table struct into irqchip.c as well. Signed-off-by: Paul Mackerras Tested-by: Eric Auger Tested-by: Cornelia Huck Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 35 +++++++---------------------------- virt/kvm/eventfd.c | 22 +++++++++------------- virt/kvm/irq_comm.c | 6 ++---- virt/kvm/irqchip.c | 39 +++++++++++++++++++++++++-------------- 4 files changed, 43 insertions(+), 59 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 4956149e962a..ddd33e1aeee1 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -325,24 +325,7 @@ struct kvm_kernel_irq_routing_entry { struct hlist_node link; }; -#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING - -struct kvm_irq_routing_table { - int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS]; - struct kvm_kernel_irq_routing_entry *rt_entries; - u32 nr_rt_entries; - /* - * Array indexed by gsi. Each entry contains list of irq chips - * the gsi is connected to. - */ - struct hlist_head map[0]; -}; - -#else - -struct kvm_irq_routing_table {}; - -#endif +struct kvm_irq_routing_table; #ifndef KVM_PRIVATE_MEM_SLOTS #define KVM_PRIVATE_MEM_SLOTS 0 @@ -401,8 +384,7 @@ struct kvm { struct mutex irq_lock; #ifdef CONFIG_HAVE_KVM_IRQCHIP /* - * Update side is protected by irq_lock and, - * if configured, irqfds.lock. + * Update side is protected by irq_lock. */ struct kvm_irq_routing_table __rcu *irq_routing; struct hlist_head mask_notifier_list; @@ -752,10 +734,9 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, bool mask); -int kvm_irq_map_gsi(struct kvm_kernel_irq_routing_entry *entries, - struct kvm_irq_routing_table *irq_rt, int gsi); -int kvm_irq_map_chip_pin(struct kvm_irq_routing_table *irq_rt, - unsigned irqchip, unsigned pin); +int kvm_irq_map_gsi(struct kvm *kvm, + struct kvm_kernel_irq_routing_entry *entries, int gsi); +int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin); int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, bool line_status); @@ -967,7 +948,7 @@ int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args); #ifdef CONFIG_HAVE_KVM_IRQCHIP int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args); void kvm_irqfd_release(struct kvm *kvm); -void kvm_irq_routing_update(struct kvm *, struct kvm_irq_routing_table *); +void kvm_irq_routing_update(struct kvm *); #else static inline int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args) { @@ -989,10 +970,8 @@ static inline int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args) static inline void kvm_irqfd_release(struct kvm *kvm) {} #ifdef CONFIG_HAVE_KVM_IRQCHIP -static inline void kvm_irq_routing_update(struct kvm *kvm, - struct kvm_irq_routing_table *irq_rt) +static inline void kvm_irq_routing_update(struct kvm *kvm) { - rcu_assign_pointer(kvm->irq_routing, irq_rt); } #endif diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 15fa9488b2d0..f0075ffb0c35 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -278,14 +278,13 @@ irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, } /* Must be called under irqfds.lock */ -static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd, - struct kvm_irq_routing_table *irq_rt) +static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd) { struct kvm_kernel_irq_routing_entry *e; struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS]; int i, n_entries; - n_entries = kvm_irq_map_gsi(entries, irq_rt, irqfd->gsi); + n_entries = kvm_irq_map_gsi(kvm, entries, irqfd->gsi); write_seqcount_begin(&irqfd->irq_entry_sc); @@ -304,12 +303,12 @@ static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd, static int kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) { - struct kvm_irq_routing_table *irq_rt; struct _irqfd *irqfd, *tmp; struct fd f; struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL; int ret; unsigned int events; + int idx; irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL); if (!irqfd) @@ -403,9 +402,9 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) goto fail; } - irq_rt = rcu_dereference_protected(kvm->irq_routing, - lockdep_is_held(&kvm->irqfds.lock)); - irqfd_update(kvm, irqfd, irq_rt); + idx = srcu_read_lock(&kvm->irq_srcu); + irqfd_update(kvm, irqfd); + srcu_read_unlock(&kvm->irq_srcu, idx); list_add_tail(&irqfd->list, &kvm->irqfds.items); @@ -539,20 +538,17 @@ kvm_irqfd_release(struct kvm *kvm) } /* - * Change irq_routing and irqfd. + * Take note of a change in irq routing. * Caller must invoke synchronize_srcu(&kvm->irq_srcu) afterwards. */ -void kvm_irq_routing_update(struct kvm *kvm, - struct kvm_irq_routing_table *irq_rt) +void kvm_irq_routing_update(struct kvm *kvm) { struct _irqfd *irqfd; spin_lock_irq(&kvm->irqfds.lock); - rcu_assign_pointer(kvm->irq_routing, irq_rt); - list_for_each_entry(irqfd, &kvm->irqfds.items, list) - irqfd_update(kvm, irqfd, irq_rt); + irqfd_update(kvm, irqfd); spin_unlock_irq(&kvm->irqfds.lock); } diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index 175844593243..963b8995a9e8 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -163,7 +163,6 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level) struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS]; struct kvm_kernel_irq_routing_entry *e; int ret = -EINVAL; - struct kvm_irq_routing_table *irq_rt; int idx; trace_kvm_set_irq(irq, level, irq_source_id); @@ -177,8 +176,7 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level) * which is limited to 1:1 GSI mapping. */ idx = srcu_read_lock(&kvm->irq_srcu); - irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); - if (kvm_irq_map_gsi(entries, irq_rt, irq) > 0) { + if (kvm_irq_map_gsi(kvm, entries, irq) > 0) { e = &entries[0]; if (likely(e->type == KVM_IRQ_ROUTING_MSI)) ret = kvm_set_msi_inatomic(e, kvm); @@ -264,7 +262,7 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, int idx, gsi; idx = srcu_read_lock(&kvm->irq_srcu); - gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin]; + gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); if (gsi != -1) hlist_for_each_entry_rcu(kimn, &kvm->mask_notifier_list, link) if (kimn->irq == gsi) diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index f4648dd94888..04faac50cef5 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c @@ -31,12 +31,26 @@ #include #include "irq.h" -int kvm_irq_map_gsi(struct kvm_kernel_irq_routing_entry *entries, - struct kvm_irq_routing_table *irq_rt, int gsi) +struct kvm_irq_routing_table { + int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS]; + struct kvm_kernel_irq_routing_entry *rt_entries; + u32 nr_rt_entries; + /* + * Array indexed by gsi. Each entry contains list of irq chips + * the gsi is connected to. + */ + struct hlist_head map[0]; +}; + +int kvm_irq_map_gsi(struct kvm *kvm, + struct kvm_kernel_irq_routing_entry *entries, int gsi) { + struct kvm_irq_routing_table *irq_rt; struct kvm_kernel_irq_routing_entry *e; int n = 0; + irq_rt = srcu_dereference_check(kvm->irq_routing, &kvm->irq_srcu, + lockdep_is_held(&kvm->irq_lock)); if (gsi < irq_rt->nr_rt_entries) { hlist_for_each_entry(e, &irq_rt->map[gsi], link) { entries[n] = *e; @@ -47,21 +61,21 @@ int kvm_irq_map_gsi(struct kvm_kernel_irq_routing_entry *entries, return n; } -int kvm_irq_map_chip_pin(struct kvm_irq_routing_table *irq_rt, - unsigned irqchip, unsigned pin) +int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin) { + struct kvm_irq_routing_table *irq_rt; + + irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); return irq_rt->chip[irqchip][pin]; } bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) { - struct kvm_irq_routing_table *irq_rt; struct kvm_irq_ack_notifier *kian; int gsi, idx; idx = srcu_read_lock(&kvm->irq_srcu); - irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); - gsi = kvm_irq_map_chip_pin(irq_rt, irqchip, pin); + gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); if (gsi != -1) hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, link) @@ -78,15 +92,13 @@ EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) { - struct kvm_irq_routing_table *irq_rt; struct kvm_irq_ack_notifier *kian; int gsi, idx; trace_kvm_ack_irq(irqchip, pin); idx = srcu_read_lock(&kvm->irq_srcu); - irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); - gsi = kvm_irq_map_chip_pin(irq_rt, irqchip, pin); + gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); if (gsi != -1) hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, link) @@ -143,7 +155,6 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, { struct kvm_kernel_irq_routing_entry irq_set[KVM_NR_IRQCHIPS]; int ret = -1, i, idx; - struct kvm_irq_routing_table *irq_rt; trace_kvm_set_irq(irq, level, irq_source_id); @@ -152,8 +163,7 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, * writes to the unused one. */ idx = srcu_read_lock(&kvm->irq_srcu); - irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); - i = kvm_irq_map_gsi(irq_set, irq_rt, irq); + i = kvm_irq_map_gsi(kvm, irq_set, irq); srcu_read_unlock(&kvm->irq_srcu, idx); while(i--) { @@ -250,7 +260,8 @@ int kvm_set_irq_routing(struct kvm *kvm, mutex_lock(&kvm->irq_lock); old = kvm->irq_routing; - kvm_irq_routing_update(kvm, new); + rcu_assign_pointer(kvm->irq_routing, new); + kvm_irq_routing_update(kvm); mutex_unlock(&kvm->irq_lock); synchronize_srcu_expedited(&kvm->irq_srcu); -- cgit v1.2.3 From 297e21053a52f060944e9f0de4c64fad9bcd72fc Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 30 Jun 2014 20:51:13 +1000 Subject: KVM: Give IRQFD its own separate enabling Kconfig option Currently, the IRQFD code is conditional on CONFIG_HAVE_KVM_IRQ_ROUTING. So that we can have the IRQFD code compiled in without having the IRQ routing code, this creates a new CONFIG_HAVE_KVM_IRQFD, makes the IRQFD code conditional on it instead of CONFIG_HAVE_KVM_IRQ_ROUTING, and makes all the platforms that currently select HAVE_KVM_IRQ_ROUTING also select HAVE_KVM_IRQFD. Signed-off-by: Paul Mackerras Tested-by: Eric Auger Tested-by: Cornelia Huck Signed-off-by: Paolo Bonzini --- arch/ia64/kvm/Kconfig | 1 + arch/powerpc/kvm/Kconfig | 1 + arch/s390/kvm/Kconfig | 1 + arch/x86/kvm/Kconfig | 1 + include/linux/kvm_host.h | 8 ++++---- virt/kvm/Kconfig | 3 +++ virt/kvm/eventfd.c | 6 +++--- virt/kvm/kvm_main.c | 2 +- 8 files changed, 15 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig index 990b86420cc6..3d50ea955c4c 100644 --- a/arch/ia64/kvm/Kconfig +++ b/arch/ia64/kvm/Kconfig @@ -25,6 +25,7 @@ config KVM select PREEMPT_NOTIFIERS select ANON_INODES select HAVE_KVM_IRQCHIP + select HAVE_KVM_IRQFD select HAVE_KVM_IRQ_ROUTING select KVM_APIC_ARCHITECTURE select KVM_MMIO diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 8f104a6879f0..d4741dba91af 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -158,6 +158,7 @@ config KVM_MPIC bool "KVM in-kernel MPIC emulation" depends on KVM && E500 select HAVE_KVM_IRQCHIP + select HAVE_KVM_IRQFD select HAVE_KVM_IRQ_ROUTING select HAVE_KVM_MSI help diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig index 10d529ac9821..646db9c467d1 100644 --- a/arch/s390/kvm/Kconfig +++ b/arch/s390/kvm/Kconfig @@ -26,6 +26,7 @@ config KVM select KVM_ASYNC_PF select KVM_ASYNC_PF_SYNC select HAVE_KVM_IRQCHIP + select HAVE_KVM_IRQFD select HAVE_KVM_IRQ_ROUTING ---help--- Support hosting paravirtualized guest machines using the SIE diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 287e4c85fff9..f9d16ff56c6b 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -27,6 +27,7 @@ config KVM select MMU_NOTIFIER select ANON_INODES select HAVE_KVM_IRQCHIP + select HAVE_KVM_IRQFD select HAVE_KVM_IRQ_ROUTING select HAVE_KVM_EVENTFD select KVM_APIC_ARCHITECTURE diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ddd33e1aeee1..8593d2e61cbf 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -437,7 +437,7 @@ void kvm_vcpu_uninit(struct kvm_vcpu *vcpu); int __must_check vcpu_load(struct kvm_vcpu *vcpu); void vcpu_put(struct kvm_vcpu *vcpu); -#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING +#ifdef CONFIG_HAVE_KVM_IRQFD int kvm_irqfd_init(void); void kvm_irqfd_exit(void); #else @@ -932,20 +932,20 @@ int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue); void kvm_free_irq_routing(struct kvm *kvm); -int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi); - #else static inline void kvm_free_irq_routing(struct kvm *kvm) {} #endif +int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi); + #ifdef CONFIG_HAVE_KVM_EVENTFD void kvm_eventfd_init(struct kvm *kvm); int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args); -#ifdef CONFIG_HAVE_KVM_IRQCHIP +#ifdef CONFIG_HAVE_KVM_IRQFD int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args); void kvm_irqfd_release(struct kvm *kvm); void kvm_irq_routing_update(struct kvm *); diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index 13f2d19793e3..fc0c5e603eb4 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -6,6 +6,9 @@ config HAVE_KVM config HAVE_KVM_IRQCHIP bool +config HAVE_KVM_IRQFD + bool + config HAVE_KVM_IRQ_ROUTING bool diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 99957df69cf2..f5f61548f60d 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -39,7 +39,7 @@ #include "irq.h" #include "iodev.h" -#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING +#ifdef CONFIG_HAVE_KVM_IRQFD /* * -------------------------------------------------------------------- * irqfd: Allows an fd to be used to inject an interrupt to the guest @@ -450,7 +450,7 @@ out: void kvm_eventfd_init(struct kvm *kvm) { -#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING +#ifdef CONFIG_HAVE_KVM_IRQFD spin_lock_init(&kvm->irqfds.lock); INIT_LIST_HEAD(&kvm->irqfds.items); INIT_LIST_HEAD(&kvm->irqfds.resampler_list); @@ -459,7 +459,7 @@ kvm_eventfd_init(struct kvm *kvm) INIT_LIST_HEAD(&kvm->ioeventfds); } -#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING +#ifdef CONFIG_HAVE_KVM_IRQFD /* * shutdown any irqfd's that match fd+gsi */ diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 1b95cc926cfc..a69a623938b8 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2337,7 +2337,7 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) #ifdef CONFIG_HAVE_KVM_MSI case KVM_CAP_SIGNAL_MSI: #endif -#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING +#ifdef CONFIG_HAVE_KVM_IRQFD case KVM_CAP_IRQFD_RESAMPLE: #endif case KVM_CAP_CHECK_EXTENSION_VM: -- cgit v1.2.3 From 114840c3d29b9cbd867faa69595a2aee6f6b54a2 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Sun, 1 Jun 2014 11:53:50 +0300 Subject: mlx4_core: Add support for secure-host and SMP firewall Secure-host is the general term for the capability of a device to protect itself and the subnet from malicious host software. This is achieved by: 1. Not allowing un-trusted entities to access device configuration registers, directly (through pci_cr or pci_conf) and indirectly (through MADs). 2. Hiding M_Key from untrusted entities. 3. Preventing the modification of GUID0 by un-trusted entities 4. Not allowing drivers on untrusted hosts to receive nor to transmit packets over QP0 (SMP Firewall). The secure-host capability depends on firmware handling all QP0 packets, and not passing these packets up to the driver. Any information required by the driver for proper operation (e.g., SM lid) is passed via events generated by the firmware while processing QP0 MADs. Driver support mainly requires using the MAD_DEMUX FW command at startup, where the feature is enabled/disabled through a procedure described in the Mellanox HCA tools package. Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz [ Fix error path in mlx4_setup_hca to go to err_mcg_table_free. - Roland ] Signed-off-by: Roland Dreier --- drivers/net/ethernet/mellanox/mlx4/cmd.c | 9 +++ drivers/net/ethernet/mellanox/mlx4/fw.c | 91 ++++++++++++++++++++++++++++++- drivers/net/ethernet/mellanox/mlx4/main.c | 5 ++ drivers/net/ethernet/mellanox/mlx4/mlx4.h | 1 + include/linux/mlx4/cmd.h | 7 +++ include/linux/mlx4/device.h | 1 + 6 files changed, 113 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 5d940a26055c..65a4a0f88ea0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -1310,6 +1310,15 @@ static struct mlx4_cmd_info cmd_info[] = { .verify = NULL, .wrapper = mlx4_MAD_IFC_wrapper }, + { + .opcode = MLX4_CMD_MAD_DEMUX, + .has_inbox = false, + .has_outbox = false, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_CMD_EPERM_wrapper + }, { .opcode = MLX4_CMD_QUERY_IF_STAT, .has_inbox = false, diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 688e1eabab29..494753e44ae3 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -136,7 +136,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags) [7] = "FSM (MAC anti-spoofing) support", [8] = "Dynamic QP updates support", [9] = "Device managed flow steering IPoIB support", - [10] = "TCP/IP offloads/flow-steering for VXLAN support" + [10] = "TCP/IP offloads/flow-steering for VXLAN support", + [11] = "MAD DEMUX (Secure-Host) support" }; int i; @@ -571,6 +572,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) #define QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET 0xa0 #define QUERY_DEV_CAP_FW_REASSIGN_MAC 0x9d #define QUERY_DEV_CAP_VXLAN 0x9e +#define QUERY_DEV_CAP_MAD_DEMUX_OFFSET 0xb0 dev_cap->flags2 = 0; mailbox = mlx4_alloc_cmd_mailbox(dev); @@ -748,6 +750,11 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) MLX4_GET(dev_cap->max_counters, outbox, QUERY_DEV_CAP_MAX_COUNTERS_OFFSET); + MLX4_GET(field32, outbox, + QUERY_DEV_CAP_MAD_DEMUX_OFFSET); + if (field32 & (1 << 0)) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_MAD_DEMUX; + MLX4_GET(field32, outbox, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET); if (field32 & (1 << 16)) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_UPDATE_QP; @@ -2016,3 +2023,85 @@ void mlx4_opreq_action(struct work_struct *work) out: mlx4_free_cmd_mailbox(dev, mailbox); } + +static int mlx4_check_smp_firewall_active(struct mlx4_dev *dev, + struct mlx4_cmd_mailbox *mailbox) +{ +#define MLX4_CMD_MAD_DEMUX_SET_ATTR_OFFSET 0x10 +#define MLX4_CMD_MAD_DEMUX_GETRESP_ATTR_OFFSET 0x20 +#define MLX4_CMD_MAD_DEMUX_TRAP_ATTR_OFFSET 0x40 +#define MLX4_CMD_MAD_DEMUX_TRAP_REPRESS_ATTR_OFFSET 0x70 + + u32 set_attr_mask, getresp_attr_mask; + u32 trap_attr_mask, traprepress_attr_mask; + + MLX4_GET(set_attr_mask, mailbox->buf, + MLX4_CMD_MAD_DEMUX_SET_ATTR_OFFSET); + mlx4_dbg(dev, "SMP firewall set_attribute_mask = 0x%x\n", + set_attr_mask); + + MLX4_GET(getresp_attr_mask, mailbox->buf, + MLX4_CMD_MAD_DEMUX_GETRESP_ATTR_OFFSET); + mlx4_dbg(dev, "SMP firewall getresp_attribute_mask = 0x%x\n", + getresp_attr_mask); + + MLX4_GET(trap_attr_mask, mailbox->buf, + MLX4_CMD_MAD_DEMUX_TRAP_ATTR_OFFSET); + mlx4_dbg(dev, "SMP firewall trap_attribute_mask = 0x%x\n", + trap_attr_mask); + + MLX4_GET(traprepress_attr_mask, mailbox->buf, + MLX4_CMD_MAD_DEMUX_TRAP_REPRESS_ATTR_OFFSET); + mlx4_dbg(dev, "SMP firewall traprepress_attribute_mask = 0x%x\n", + traprepress_attr_mask); + + if (set_attr_mask && getresp_attr_mask && trap_attr_mask && + traprepress_attr_mask) + return 1; + + return 0; +} + +int mlx4_config_mad_demux(struct mlx4_dev *dev) +{ + struct mlx4_cmd_mailbox *mailbox; + int secure_host_active; + int err; + + /* Check if mad_demux is supported */ + if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_MAD_DEMUX)) + return 0; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) { + mlx4_warn(dev, "Failed to allocate mailbox for cmd MAD_DEMUX"); + return -ENOMEM; + } + + /* Query mad_demux to find out which MADs are handled by internal sma */ + err = mlx4_cmd_box(dev, 0, mailbox->dma, 0x01 /* subn mgmt class */, + MLX4_CMD_MAD_DEMUX_QUERY_RESTR, MLX4_CMD_MAD_DEMUX, + MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); + if (err) { + mlx4_warn(dev, "MLX4_CMD_MAD_DEMUX: query restrictions failed (%d)\n", + err); + goto out; + } + + secure_host_active = mlx4_check_smp_firewall_active(dev, mailbox); + + /* Config mad_demux to handle all MADs returned by the query above */ + err = mlx4_cmd(dev, mailbox->dma, 0x01 /* subn mgmt class */, + MLX4_CMD_MAD_DEMUX_CONFIG, MLX4_CMD_MAD_DEMUX, + MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); + if (err) { + mlx4_warn(dev, "MLX4_CMD_MAD_DEMUX: configure failed (%d)\n", err); + goto out; + } + + if (secure_host_active) + mlx4_warn(dev, "HCA operating in secure-host mode. SMP firewall activated.\n"); +out: + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 82ab427290c3..f2c8e8ba23fe 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -1831,6 +1831,11 @@ static int mlx4_setup_hca(struct mlx4_dev *dev) mlx4_err(dev, "Failed to initialize multicast group table, aborting\n"); goto err_mr_table_free; } + err = mlx4_config_mad_demux(dev); + if (err) { + mlx4_err(dev, "Failed in config_mad_demux, aborting\n"); + goto err_mcg_table_free; + } } err = mlx4_init_eq_table(dev); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 1d8af7336807..310b30b4682c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -1311,5 +1311,6 @@ void mlx4_init_quotas(struct mlx4_dev *dev); int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave, int port); /* Returns the VF index of slave */ int mlx4_get_vf_indx(struct mlx4_dev *dev, int slave); +int mlx4_config_mad_demux(struct mlx4_dev *dev); #endif /* MLX4_H */ diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index c8450366c130..379c02648ab3 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -116,6 +116,7 @@ enum { /* special QP and management commands */ MLX4_CMD_CONF_SPECIAL_QP = 0x23, MLX4_CMD_MAD_IFC = 0x24, + MLX4_CMD_MAD_DEMUX = 0x203, /* multicast commands */ MLX4_CMD_READ_MCG = 0x25, @@ -185,6 +186,12 @@ enum { MLX4_SET_PORT_VXLAN = 0xB }; +enum { + MLX4_CMD_MAD_DEMUX_CONFIG = 0, + MLX4_CMD_MAD_DEMUX_QUERY_STATE = 1, + MLX4_CMD_MAD_DEMUX_QUERY_RESTR = 2, /* Query mad demux restrictions */ +}; + enum { MLX4_CMD_WRAPPED, MLX4_CMD_NATIVE diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 35b51e7af886..cee9561e8ef6 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -172,6 +172,7 @@ enum { MLX4_DEV_CAP_FLAG2_UPDATE_QP = 1LL << 8, MLX4_DEV_CAP_FLAG2_DMFS_IPOIB = 1LL << 9, MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS = 1LL << 10, + MLX4_DEV_CAP_FLAG2_MAD_DEMUX = 1LL << 11, }; enum { -- cgit v1.2.3 From df5601f9c3d831b4c478b004a1ed90a18643adbe Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Mon, 7 Oct 2013 15:37:19 +0200 Subject: tracehook_signal_handler: Remove sig, info, ka and regs These parameters are nowhere used, so we can remove them. Signed-off-by: Richard Weinberger --- include/linux/tracehook.h | 8 +------- kernel/signal.c | 2 +- 2 files changed, 2 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index 6f8ab7da27c4..84d497297c5f 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -133,10 +133,6 @@ static inline void tracehook_report_syscall_exit(struct pt_regs *regs, int step) /** * tracehook_signal_handler - signal handler setup is complete - * @sig: number of signal being delivered - * @info: siginfo_t of signal being delivered - * @ka: sigaction setting that chose the handler - * @regs: user register state * @stepping: nonzero if debugger single-step or block-step in use * * Called by the arch code after a signal handler has been set up. @@ -146,9 +142,7 @@ static inline void tracehook_report_syscall_exit(struct pt_regs *regs, int step) * Called without locks, shortly before returning to user mode * (or handling more signals). */ -static inline void tracehook_signal_handler(int sig, siginfo_t *info, - const struct k_sigaction *ka, - struct pt_regs *regs, int stepping) +static inline void tracehook_signal_handler(int stepping) { if (stepping) ptrace_notify(SIGTRAP); diff --git a/kernel/signal.c b/kernel/signal.c index a4077e90f19f..c4d47661cc86 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2379,7 +2379,7 @@ void signal_delivered(int sig, siginfo_t *info, struct k_sigaction *ka, if (!(ka->sa.sa_flags & SA_NODEFER)) sigaddset(&blocked, sig); set_current_blocked(&blocked); - tracehook_signal_handler(sig, info, ka, regs, stepping); + tracehook_signal_handler(stepping); } void signal_setup_done(int failed, struct ksignal *ksig, int stepping) -- cgit v1.2.3 From 10b1c7ac8bfed429cf3dcb0225482c8dc1485d8e Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Sun, 13 Jul 2014 13:36:04 +0200 Subject: Clean up signal_delivered() - Pass a ksignal struct to it - Remove unused regs parameter - Make it private as it's nowhere outside of kernel/signal.c is used Signed-off-by: Richard Weinberger --- include/linux/signal.h | 1 - kernel/signal.c | 21 ++++++++------------- 2 files changed, 8 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/signal.h b/include/linux/signal.h index c9e65360c49a..b005cc3dc1dc 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -282,7 +282,6 @@ struct ksignal { extern int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka, struct pt_regs *regs, void *cookie); extern void signal_setup_done(int failed, struct ksignal *ksig, int stepping); -extern void signal_delivered(int sig, siginfo_t *info, struct k_sigaction *ka, struct pt_regs *regs, int stepping); extern void exit_signals(struct task_struct *tsk); extern void kernel_sigaction(int, __sighandler_t); diff --git a/kernel/signal.c b/kernel/signal.c index c4d47661cc86..0d75cf875d44 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2353,19 +2353,15 @@ relock: /** * signal_delivered - - * @sig: number of signal being delivered - * @info: siginfo_t of signal being delivered - * @ka: sigaction setting that chose the handler - * @regs: user register state + * @ksig: kernel signal struct * @stepping: nonzero if debugger single-step or block-step in use * * This function should be called when a signal has successfully been - * delivered. It updates the blocked signals accordingly (@ka->sa.sa_mask + * delivered. It updates the blocked signals accordingly (@ksig->ka.sa.sa_mask * is always blocked, and the signal itself is blocked unless %SA_NODEFER - * is set in @ka->sa.sa_flags. Tracing is notified. + * is set in @ksig->ka.sa.sa_flags. Tracing is notified. */ -void signal_delivered(int sig, siginfo_t *info, struct k_sigaction *ka, - struct pt_regs *regs, int stepping) +static void signal_delivered(struct ksignal *ksig, int stepping) { sigset_t blocked; @@ -2375,9 +2371,9 @@ void signal_delivered(int sig, siginfo_t *info, struct k_sigaction *ka, simply clear the restore sigmask flag. */ clear_restore_sigmask(); - sigorsets(&blocked, ¤t->blocked, &ka->sa.sa_mask); - if (!(ka->sa.sa_flags & SA_NODEFER)) - sigaddset(&blocked, sig); + sigorsets(&blocked, ¤t->blocked, &ksig->ka.sa.sa_mask); + if (!(ksig->ka.sa.sa_flags & SA_NODEFER)) + sigaddset(&blocked, ksig->sig); set_current_blocked(&blocked); tracehook_signal_handler(stepping); } @@ -2387,8 +2383,7 @@ void signal_setup_done(int failed, struct ksignal *ksig, int stepping) if (failed) force_sigsegv(ksig->sig, current); else - signal_delivered(ksig->sig, &ksig->info, &ksig->ka, - signal_pt_regs(), stepping); + signal_delivered(ksig, stepping); } /* -- cgit v1.2.3 From 828b1f65d23cf8a68795739f6dd08fc8abd9ee64 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Mon, 7 Oct 2013 15:26:57 +0200 Subject: Rip out get_signal_to_deliver() Now we can turn get_signal() to the main function. Signed-off-by: Richard Weinberger --- include/linux/signal.h | 14 +------------- kernel/signal.c | 23 ++++++++++++----------- 2 files changed, 13 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/signal.h b/include/linux/signal.h index b005cc3dc1dc..750196fcc0a5 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -280,7 +280,7 @@ struct ksignal { int sig; }; -extern int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka, struct pt_regs *regs, void *cookie); +extern int get_signal(struct ksignal *ksig); extern void signal_setup_done(int failed, struct ksignal *ksig, int stepping); extern void exit_signals(struct task_struct *tsk); extern void kernel_sigaction(int, __sighandler_t); @@ -300,18 +300,6 @@ static inline void disallow_signal(int sig) kernel_sigaction(sig, SIG_IGN); } -/* - * Eventually that'll replace get_signal_to_deliver(); macro for now, - * to avoid nastiness with include order. - */ -#define get_signal(ksig) \ -({ \ - struct ksignal *p = (ksig); \ - p->sig = get_signal_to_deliver(&p->info, &p->ka, \ - signal_pt_regs(), NULL);\ - p->sig > 0; \ -}) - extern struct kmem_cache *sighand_cachep; int unhandled_signal(struct task_struct *tsk, int sig); diff --git a/kernel/signal.c b/kernel/signal.c index 0d75cf875d44..5c6020040388 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2166,8 +2166,7 @@ static int ptrace_signal(int signr, siginfo_t *info) return signr; } -int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka, - struct pt_regs *regs, void *cookie) +int get_signal(struct ksignal *ksig) { struct sighand_struct *sighand = current->sighand; struct signal_struct *signal = current->signal; @@ -2237,13 +2236,13 @@ relock: goto relock; } - signr = dequeue_signal(current, ¤t->blocked, info); + signr = dequeue_signal(current, ¤t->blocked, &ksig->info); if (!signr) break; /* will return 0 */ if (unlikely(current->ptrace) && signr != SIGKILL) { - signr = ptrace_signal(signr, info); + signr = ptrace_signal(signr, &ksig->info); if (!signr) continue; } @@ -2251,13 +2250,13 @@ relock: ka = &sighand->action[signr-1]; /* Trace actually delivered signals. */ - trace_signal_deliver(signr, info, ka); + trace_signal_deliver(signr, &ksig->info, ka); if (ka->sa.sa_handler == SIG_IGN) /* Do nothing. */ continue; if (ka->sa.sa_handler != SIG_DFL) { /* Run the handler. */ - *return_ka = *ka; + ksig->ka = *ka; if (ka->sa.sa_flags & SA_ONESHOT) ka->sa.sa_handler = SIG_DFL; @@ -2307,7 +2306,7 @@ relock: spin_lock_irq(&sighand->siglock); } - if (likely(do_signal_stop(info->si_signo))) { + if (likely(do_signal_stop(ksig->info.si_signo))) { /* It released the siglock. */ goto relock; } @@ -2328,7 +2327,7 @@ relock: if (sig_kernel_coredump(signr)) { if (print_fatal_signals) - print_fatal_signal(info->si_signo); + print_fatal_signal(ksig->info.si_signo); proc_coredump_connector(current); /* * If it was able to dump core, this kills all @@ -2338,17 +2337,19 @@ relock: * first and our do_group_exit call below will use * that value and ignore the one we pass it. */ - do_coredump(info); + do_coredump(&ksig->info); } /* * Death signals, no core dump. */ - do_group_exit(info->si_signo); + do_group_exit(ksig->info.si_signo); /* NOTREACHED */ } spin_unlock_irq(&sighand->siglock); - return signr; + + ksig->sig = signr; + return ksig->sig > 0; } /** -- cgit v1.2.3 From 72f15c03977acc8f06080e6c8a91d93bfc655a65 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Wed, 5 Mar 2014 15:15:22 +0100 Subject: sas_ss_flags: Remove nested ternary if ...to make it readable. Signed-off-by: Richard Weinberger --- include/linux/sched.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 0376b054a0d0..795ea2bc3d4f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2360,8 +2360,10 @@ static inline int on_sig_stack(unsigned long sp) static inline int sas_ss_flags(unsigned long sp) { - return (current->sas_ss_size == 0 ? SS_DISABLE - : on_sig_stack(sp) ? SS_ONSTACK : 0); + if (!current->sas_ss_size) + return SS_DISABLE; + + return on_sig_stack(sp) ? SS_ONSTACK : 0; } static inline unsigned long sigsp(unsigned long sp, struct ksignal *ksig) -- cgit v1.2.3 From c77dcacb397519b6ade8f08201a4a90a7f4f751e Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 6 Aug 2014 14:24:45 +0200 Subject: KVM: Move more code under CONFIG_HAVE_KVM_IRQFD Commits e4d57e1ee1ab (KVM: Move irq notifier implementation into eventfd.c, 2014-06-30) included the irq notifier code unconditionally in eventfd.c, while it was under CONFIG_HAVE_KVM_IRQCHIP before. Similarly, commit 297e21053a52 (KVM: Give IRQFD its own separate enabling Kconfig option, 2014-06-30) moved code from CONFIG_HAVE_IRQ_ROUTING to CONFIG_HAVE_KVM_IRQFD but forgot to move the pieces that used to be under CONFIG_HAVE_KVM_IRQCHIP. Together, this broke compilation without CONFIG_KVM_XICS. Fix by adding or changing the #ifdefs so that they point at CONFIG_HAVE_KVM_IRQFD. Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 2 + include/trace/events/kvm.h | 8 +-- virt/kvm/eventfd.c | 122 ++++++++++++++++++++++----------------------- virt/kvm/kvm_main.c | 2 + 4 files changed, 69 insertions(+), 65 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 8593d2e61cbf..a4c33b34fe3f 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -388,6 +388,8 @@ struct kvm { */ struct kvm_irq_routing_table __rcu *irq_routing; struct hlist_head mask_notifier_list; +#endif +#ifdef CONFIG_HAVE_KVM_IRQFD struct hlist_head irq_ack_notifier_list; #endif diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index 131a0bda7aec..908925ace776 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h @@ -37,7 +37,7 @@ TRACE_EVENT(kvm_userspace_exit, __entry->errno < 0 ? -__entry->errno : __entry->reason) ); -#if defined(CONFIG_HAVE_KVM_IRQCHIP) +#if defined(CONFIG_HAVE_KVM_IRQFD) TRACE_EVENT(kvm_set_irq, TP_PROTO(unsigned int gsi, int level, int irq_source_id), TP_ARGS(gsi, level, irq_source_id), @@ -57,7 +57,7 @@ TRACE_EVENT(kvm_set_irq, TP_printk("gsi %u level %d source %d", __entry->gsi, __entry->level, __entry->irq_source_id) ); -#endif +#endif /* defined(CONFIG_HAVE_KVM_IRQFD) */ #if defined(__KVM_HAVE_IOAPIC) #define kvm_deliver_mode \ @@ -124,7 +124,7 @@ TRACE_EVENT(kvm_msi_set_irq, #endif /* defined(__KVM_HAVE_IOAPIC) */ -#if defined(CONFIG_HAVE_KVM_IRQCHIP) +#if defined(CONFIG_HAVE_KVM_IRQFD) TRACE_EVENT(kvm_ack_irq, TP_PROTO(unsigned int irqchip, unsigned int pin), @@ -149,7 +149,7 @@ TRACE_EVENT(kvm_ack_irq, #endif ); -#endif /* defined(CONFIG_HAVE_KVM_IRQCHIP) */ +#endif /* defined(CONFIG_HAVE_KVM_IRQFD) */ diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index f5f61548f60d..3c5981c87c3f 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -445,6 +445,67 @@ out: kfree(irqfd); return ret; } + +bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) +{ + struct kvm_irq_ack_notifier *kian; + int gsi, idx; + + idx = srcu_read_lock(&kvm->irq_srcu); + gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); + if (gsi != -1) + hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, + link) + if (kian->gsi == gsi) { + srcu_read_unlock(&kvm->irq_srcu, idx); + return true; + } + + srcu_read_unlock(&kvm->irq_srcu, idx); + + return false; +} +EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); + +void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) +{ + struct kvm_irq_ack_notifier *kian; + int gsi, idx; + + trace_kvm_ack_irq(irqchip, pin); + + idx = srcu_read_lock(&kvm->irq_srcu); + gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); + if (gsi != -1) + hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, + link) + if (kian->gsi == gsi) + kian->irq_acked(kian); + srcu_read_unlock(&kvm->irq_srcu, idx); +} + +void kvm_register_irq_ack_notifier(struct kvm *kvm, + struct kvm_irq_ack_notifier *kian) +{ + mutex_lock(&kvm->irq_lock); + hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list); + mutex_unlock(&kvm->irq_lock); +#ifdef __KVM_HAVE_IOAPIC + kvm_vcpu_request_scan_ioapic(kvm); +#endif +} + +void kvm_unregister_irq_ack_notifier(struct kvm *kvm, + struct kvm_irq_ack_notifier *kian) +{ + mutex_lock(&kvm->irq_lock); + hlist_del_init_rcu(&kian->link); + mutex_unlock(&kvm->irq_lock); + synchronize_srcu(&kvm->irq_srcu); +#ifdef __KVM_HAVE_IOAPIC + kvm_vcpu_request_scan_ioapic(kvm); +#endif +} #endif void @@ -867,64 +928,3 @@ kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) return kvm_assign_ioeventfd(kvm, args); } - -bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) -{ - struct kvm_irq_ack_notifier *kian; - int gsi, idx; - - idx = srcu_read_lock(&kvm->irq_srcu); - gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); - if (gsi != -1) - hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, - link) - if (kian->gsi == gsi) { - srcu_read_unlock(&kvm->irq_srcu, idx); - return true; - } - - srcu_read_unlock(&kvm->irq_srcu, idx); - - return false; -} -EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); - -void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) -{ - struct kvm_irq_ack_notifier *kian; - int gsi, idx; - - trace_kvm_ack_irq(irqchip, pin); - - idx = srcu_read_lock(&kvm->irq_srcu); - gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); - if (gsi != -1) - hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, - link) - if (kian->gsi == gsi) - kian->irq_acked(kian); - srcu_read_unlock(&kvm->irq_srcu, idx); -} - -void kvm_register_irq_ack_notifier(struct kvm *kvm, - struct kvm_irq_ack_notifier *kian) -{ - mutex_lock(&kvm->irq_lock); - hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list); - mutex_unlock(&kvm->irq_lock); -#ifdef __KVM_HAVE_IOAPIC - kvm_vcpu_request_scan_ioapic(kvm); -#endif -} - -void kvm_unregister_irq_ack_notifier(struct kvm *kvm, - struct kvm_irq_ack_notifier *kian) -{ - mutex_lock(&kvm->irq_lock); - hlist_del_init_rcu(&kian->link); - mutex_unlock(&kvm->irq_lock); - synchronize_srcu(&kvm->irq_srcu); -#ifdef __KVM_HAVE_IOAPIC - kvm_vcpu_request_scan_ioapic(kvm); -#endif -} diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index a69a623938b8..33712fb26eb1 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -465,6 +465,8 @@ static struct kvm *kvm_create_vm(unsigned long type) #ifdef CONFIG_HAVE_KVM_IRQCHIP INIT_HLIST_HEAD(&kvm->mask_notifier_list); +#endif +#ifdef CONFIG_HAVE_KVM_IRQFD INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list); #endif -- cgit v1.2.3 From 372ba8cb46b271a7662b92cbefedee56725f6bd0 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 6 Aug 2014 14:19:21 +0100 Subject: cpuidle: menu: Lookup CPU runqueues less The menu governer makes separate lookups of the CPU runqueue to get load and number of IO waiters but it can be done with a single lookup. Signed-off-by: Mel Gorman Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/governors/menu.c | 17 +++++++---------- include/linux/sched.h | 3 +-- kernel/sched/core.c | 7 +++++++ kernel/sched/proc.c | 7 ------- 4 files changed, 15 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index f55d8260ec43..27702742b319 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -134,12 +134,9 @@ struct menu_device { #define LOAD_INT(x) ((x) >> FSHIFT) #define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100) -static int get_loadavg(void) +static inline int get_loadavg(unsigned long load) { - unsigned long this = this_cpu_load(); - - - return LOAD_INT(this) * 10 + LOAD_FRAC(this) / 10; + return LOAD_INT(load) * 10 + LOAD_FRAC(load) / 10; } static inline int which_bucket(unsigned int duration, unsigned long nr_iowaiters) @@ -175,13 +172,13 @@ static inline int which_bucket(unsigned int duration, unsigned long nr_iowaiters * to be, the higher this multiplier, and thus the higher * the barrier to go to an expensive C state. */ -static inline int performance_multiplier(unsigned long nr_iowaiters) +static inline int performance_multiplier(unsigned long nr_iowaiters, unsigned long load) { int mult = 1; /* for higher loadavg, we are more reluctant */ - mult += 2 * get_loadavg(); + mult += 2 * get_loadavg(load); /* for IO wait tasks (per cpu!) we add 5x each */ mult += 10 * nr_iowaiters; @@ -296,7 +293,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) int latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY); int i; unsigned int interactivity_req; - unsigned long nr_iowaiters; + unsigned long nr_iowaiters, cpu_load; if (data->needs_update) { menu_update(drv, dev); @@ -312,7 +309,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) /* determine the expected residency time, round up */ data->next_timer_us = ktime_to_us(tick_nohz_get_sleep_length()); - nr_iowaiters = nr_iowait_cpu(smp_processor_id()); + get_iowait_load(&nr_iowaiters, &cpu_load); data->bucket = which_bucket(data->next_timer_us, nr_iowaiters); /* @@ -331,7 +328,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) * duration / latency ratio. Adjust the latency limit if * necessary. */ - interactivity_req = data->predicted_us / performance_multiplier(nr_iowaiters); + interactivity_req = data->predicted_us / performance_multiplier(nr_iowaiters, cpu_load); if (latency_req > interactivity_req) latency_req = interactivity_req; diff --git a/include/linux/sched.h b/include/linux/sched.h index 306f4f0c987a..641bd954bb5d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -168,8 +168,7 @@ extern int nr_processes(void); extern unsigned long nr_running(void); extern unsigned long nr_iowait(void); extern unsigned long nr_iowait_cpu(int cpu); -extern unsigned long this_cpu_load(void); - +extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load); extern void calc_global_load(unsigned long ticks); extern void update_cpu_load_nohz(void); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 3bdf01b494fe..863ef1d19563 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2385,6 +2385,13 @@ unsigned long nr_iowait_cpu(int cpu) return atomic_read(&this->nr_iowait); } +void get_iowait_load(unsigned long *nr_waiters, unsigned long *load) +{ + struct rq *this = this_rq(); + *nr_waiters = atomic_read(&this->nr_iowait); + *load = this->cpu_load[0]; +} + #ifdef CONFIG_SMP /* diff --git a/kernel/sched/proc.c b/kernel/sched/proc.c index 16f5a30f9c88..8ecd552fe4f2 100644 --- a/kernel/sched/proc.c +++ b/kernel/sched/proc.c @@ -8,13 +8,6 @@ #include "sched.h" -unsigned long this_cpu_load(void) -{ - struct rq *this = this_rq(); - return this->cpu_load[0]; -} - - /* * Global load-average calculations * -- cgit v1.2.3 From 8ba8fa917093510cdcb4ec8ff8b9603e1b525658 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 6 Aug 2014 16:03:26 -0700 Subject: fsnotify: rename event handling functions Rename fsnotify_add_notify_event() to fsnotify_add_event() since the "notify" part is duplicit. Rename fsnotify_remove_notify_event() and fsnotify_peek_notify_event() to fsnotify_remove_first_event() and fsnotify_peek_first_event() respectively since "notify" part is duplicit and they really look at the first event in the queue. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Jan Kara Cc: Eric Paris Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/notify/fanotify/fanotify.c | 2 +- fs/notify/fanotify/fanotify_user.c | 2 +- fs/notify/inotify/inotify_fsnotify.c | 2 +- fs/notify/inotify/inotify_user.c | 4 ++-- fs/notify/notification.c | 19 ++++++++++--------- include/linux/fsnotify_backend.h | 12 ++++++------ 6 files changed, 21 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index ee9cb3795c2b..fdeb36b70c65 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -210,7 +210,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, return -ENOMEM; fsn_event = &event->fse; - ret = fsnotify_add_notify_event(group, fsn_event, fanotify_merge); + ret = fsnotify_add_event(group, fsn_event, fanotify_merge); if (ret) { /* Permission events shouldn't be merged */ BUG_ON(ret == 1 && mask & FAN_ALL_PERM_EVENTS); diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 3fdc8a3e1134..fbf2210823ab 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -66,7 +66,7 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group, /* held the notification_mutex the whole time, so this is the * same event we peeked above */ - return fsnotify_remove_notify_event(group); + return fsnotify_remove_first_event(group); } static int create_fd(struct fsnotify_group *group, diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c index 43ab1e1a07a2..0f88bc0b4e6c 100644 --- a/fs/notify/inotify/inotify_fsnotify.c +++ b/fs/notify/inotify/inotify_fsnotify.c @@ -108,7 +108,7 @@ int inotify_handle_event(struct fsnotify_group *group, if (len) strcpy(event->name, file_name); - ret = fsnotify_add_notify_event(group, fsn_event, inotify_merge); + ret = fsnotify_add_event(group, fsn_event, inotify_merge); if (ret) { /* Our event wasn't used in the end. Free it. */ fsnotify_destroy_event(group, fsn_event); diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index cc423a30a0c8..daf76652fe58 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -149,7 +149,7 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group, if (fsnotify_notify_queue_is_empty(group)) return NULL; - event = fsnotify_peek_notify_event(group); + event = fsnotify_peek_first_event(group); pr_debug("%s: group=%p event=%p\n", __func__, group, event); @@ -159,7 +159,7 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group, /* held the notification_mutex the whole time, so this is the * same event we peeked above */ - fsnotify_remove_notify_event(group); + fsnotify_remove_first_event(group); return event; } diff --git a/fs/notify/notification.c b/fs/notify/notification.c index 1e58402171a5..1d394220acbe 100644 --- a/fs/notify/notification.c +++ b/fs/notify/notification.c @@ -83,10 +83,10 @@ void fsnotify_destroy_event(struct fsnotify_group *group, * added to the queue, 1 if the event was merged with some other queued event, * 2 if the queue of events has overflown. */ -int fsnotify_add_notify_event(struct fsnotify_group *group, - struct fsnotify_event *event, - int (*merge)(struct list_head *, - struct fsnotify_event *)) +int fsnotify_add_event(struct fsnotify_group *group, + struct fsnotify_event *event, + int (*merge)(struct list_head *, + struct fsnotify_event *)) { int ret = 0; struct list_head *list = &group->notification_list; @@ -128,7 +128,7 @@ queue: * Remove and return the first event from the notification list. It is the * responsibility of the caller to destroy the obtained event */ -struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group) +struct fsnotify_event *fsnotify_remove_first_event(struct fsnotify_group *group) { struct fsnotify_event *event; @@ -140,7 +140,7 @@ struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group struct fsnotify_event, list); /* * We need to init list head for the case of overflow event so that - * check in fsnotify_add_notify_events() works + * check in fsnotify_add_event() works */ list_del_init(&event->list); group->q_len--; @@ -149,9 +149,10 @@ struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group } /* - * This will not remove the event, that must be done with fsnotify_remove_notify_event() + * This will not remove the event, that must be done with + * fsnotify_remove_first_event() */ -struct fsnotify_event *fsnotify_peek_notify_event(struct fsnotify_group *group) +struct fsnotify_event *fsnotify_peek_first_event(struct fsnotify_group *group) { BUG_ON(!mutex_is_locked(&group->notification_mutex)); @@ -169,7 +170,7 @@ void fsnotify_flush_notify(struct fsnotify_group *group) mutex_lock(&group->notification_mutex); while (!fsnotify_notify_queue_is_empty(group)) { - event = fsnotify_remove_notify_event(group); + event = fsnotify_remove_first_event(group); fsnotify_destroy_event(group, event); } mutex_unlock(&group->notification_mutex); diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index fc7718c6bd3e..a6e899943363 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -322,16 +322,16 @@ extern int fsnotify_fasync(int fd, struct file *file, int on); extern void fsnotify_destroy_event(struct fsnotify_group *group, struct fsnotify_event *event); /* attach the event to the group notification queue */ -extern int fsnotify_add_notify_event(struct fsnotify_group *group, - struct fsnotify_event *event, - int (*merge)(struct list_head *, - struct fsnotify_event *)); +extern int fsnotify_add_event(struct fsnotify_group *group, + struct fsnotify_event *event, + int (*merge)(struct list_head *, + struct fsnotify_event *)); /* true if the group notification queue is empty */ extern bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group); /* return, but do not dequeue the first event on the notification queue */ -extern struct fsnotify_event *fsnotify_peek_notify_event(struct fsnotify_group *group); +extern struct fsnotify_event *fsnotify_peek_first_event(struct fsnotify_group *group); /* return AND dequeue the first event on the notification queue */ -extern struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group); +extern struct fsnotify_event *fsnotify_remove_first_event(struct fsnotify_group *group); /* functions used to manipulate the marks attached to inodes */ -- cgit v1.2.3 From 5838d4442bd5971687b72221736222637e03140d Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 6 Aug 2014 16:03:28 -0700 Subject: fanotify: fix double free of pending permission events Commit 85816794240b ("fanotify: Fix use after free for permission events") introduced a double free issue for permission events which are pending in group's notification queue while group is being destroyed. These events are freed from fanotify_handle_event() but they are not removed from groups notification queue and thus they get freed again from fsnotify_flush_notify(). Fix the problem by removing permission events from notification queue before freeing them if we skip processing access response. Also expand comments in fanotify_release() to explain group shutdown in detail. Fixes: 85816794240b9659e66e4d9b0df7c6e814e5f603 Signed-off-by: Jan Kara Reported-by: Douglas Leeder Tested-by: Douglas Leeder Reported-by: Heinrich Schuchard Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/notify/fanotify/fanotify.c | 9 ++++++++- fs/notify/fanotify/fanotify_user.c | 12 ++++++++++++ fs/notify/notification.c | 18 +++++++++++++++++- include/linux/fsnotify_backend.h | 2 ++ 4 files changed, 39 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index fdeb36b70c65..30d3addfad75 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -70,8 +70,15 @@ static int fanotify_get_response(struct fsnotify_group *group, wait_event(group->fanotify_data.access_waitq, event->response || atomic_read(&group->fanotify_data.bypass_perm)); - if (!event->response) /* bypass_perm set */ + if (!event->response) { /* bypass_perm set */ + /* + * Event was canceled because group is being destroyed. Remove + * it from group's event list because we are responsible for + * freeing the permission event. + */ + fsnotify_remove_event(group, &event->fae.fse); return 0; + } /* userspace responded, convert to something usable */ switch (event->response) { diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index fbf2210823ab..b13992a41bd9 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -359,6 +359,11 @@ static int fanotify_release(struct inode *ignored, struct file *file) #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS struct fanotify_perm_event_info *event, *next; + /* + * There may be still new events arriving in the notification queue + * but since userspace cannot use fanotify fd anymore, no event can + * enter or leave access_list by now. + */ spin_lock(&group->fanotify_data.access_lock); atomic_inc(&group->fanotify_data.bypass_perm); @@ -373,6 +378,13 @@ static int fanotify_release(struct inode *ignored, struct file *file) } spin_unlock(&group->fanotify_data.access_lock); + /* + * Since bypass_perm is set, newly queued events will not wait for + * access response. Wake up the already sleeping ones now. + * synchronize_srcu() in fsnotify_destroy_group() will wait for all + * processes sleeping in fanotify_handle_event() waiting for access + * response and thus also for all permission events to be freed. + */ wake_up(&group->fanotify_data.access_waitq); #endif diff --git a/fs/notify/notification.c b/fs/notify/notification.c index 1d394220acbe..a95d8e037aeb 100644 --- a/fs/notify/notification.c +++ b/fs/notify/notification.c @@ -73,7 +73,8 @@ void fsnotify_destroy_event(struct fsnotify_group *group, /* Overflow events are per-group and we don't want to free them */ if (!event || event->mask == FS_Q_OVERFLOW) return; - + /* If the event is still queued, we have a problem... */ + WARN_ON(!list_empty(&event->list)); group->ops->free_event(event); } @@ -124,6 +125,21 @@ queue: return ret; } +/* + * Remove @event from group's notification queue. It is the responsibility of + * the caller to destroy the event. + */ +void fsnotify_remove_event(struct fsnotify_group *group, + struct fsnotify_event *event) +{ + mutex_lock(&group->notification_mutex); + if (!list_empty(&event->list)) { + list_del_init(&event->list); + group->q_len--; + } + mutex_unlock(&group->notification_mutex); +} + /* * Remove and return the first event from the notification list. It is the * responsibility of the caller to destroy the obtained event diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index a6e899943363..ca060d7c4fa6 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -326,6 +326,8 @@ extern int fsnotify_add_event(struct fsnotify_group *group, struct fsnotify_event *event, int (*merge)(struct list_head *, struct fsnotify_event *)); +/* Remove passed event from groups notification queue */ +extern void fsnotify_remove_event(struct fsnotify_group *group, struct fsnotify_event *event); /* true if the group notification queue is empty */ extern bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group); /* return, but do not dequeue the first event on the notification queue */ -- cgit v1.2.3 From e19318116048d5fbdb8d230d6d37625834b503cd Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Wed, 6 Aug 2014 16:04:59 -0700 Subject: mm/page_alloc.c: add __meminit to alloc_pages_exact_nid() alloc_pages_exact_nid() is only called by __meminit alloc_page_cgroup() Signed-off-by: Fabian Frederick Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 2 +- mm/page_alloc.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 6eb1fb37de9a..5e7219dc0fae 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -360,7 +360,7 @@ extern unsigned long get_zeroed_page(gfp_t gfp_mask); void *alloc_pages_exact(size_t size, gfp_t gfp_mask); void free_pages_exact(void *virt, size_t size); /* This is different from alloc_pages_exact_node !!! */ -void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask); +void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask); #define __get_free_page(gfp_mask) \ __get_free_pages((gfp_mask), 0) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index ef44ad736ca1..fd4322cc096d 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2962,7 +2962,7 @@ EXPORT_SYMBOL(alloc_pages_exact); * Note this is not alloc_pages_exact_node() which allocates on a specific node, * but is not exact. */ -void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask) +void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask) { unsigned order = get_order(size); struct page *p = alloc_pages_node(nid, gfp_mask, order); -- cgit v1.2.3 From 2cfb3665e864755400dc57b6ceee2ebb6b382910 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Wed, 6 Aug 2014 16:05:03 -0700 Subject: include/linux/memblock.h: add __init to memblock_set_bottom_up() memblock_set_bottom_up() is only called by __init cmdline_parse_movable_node() and __init numa_init(). Signed-off-by: Fabian Frederick Reviewed-by: Tang Chen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memblock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index b660e05b63d4..e8cc45307f8f 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -249,7 +249,7 @@ phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align); /* * Set the allocation direction to bottom-up or top-down. */ -static inline void memblock_set_bottom_up(bool enable) +static inline void __init memblock_set_bottom_up(bool enable) { memblock.bottom_up = enable; } @@ -264,7 +264,7 @@ static inline bool memblock_bottom_up(void) return memblock.bottom_up; } #else -static inline void memblock_set_bottom_up(bool enable) {} +static inline void __init memblock_set_bottom_up(bool enable) {} static inline bool memblock_bottom_up(void) { return false; } #endif -- cgit v1.2.3 From 4f7c6b49c45a398d72763d1f0e64ddff8b3653c7 Mon Sep 17 00:00:00 2001 From: Tang Chen Date: Wed, 6 Aug 2014 16:05:13 -0700 Subject: mem-hotplug: introduce MMOP_OFFLINE to replace the hard coding -1 In store_mem_state(), we have: ... 334 else if (!strncmp(buf, "offline", min_t(int, count, 7))) 335 online_type = -1; ... 355 case -1: 356 ret = device_offline(&mem->dev); 357 break; ... Here, "offline" is hard coded as -1. This patch does the following renaming: ONLINE_KEEP -> MMOP_ONLINE_KEEP ONLINE_KERNEL -> MMOP_ONLINE_KERNEL ONLINE_MOVABLE -> MMOP_ONLINE_MOVABLE and introduces MMOP_OFFLINE = -1 to avoid hard coding. Signed-off-by: Tang Chen Cc: Hu Tao Cc: Greg Kroah-Hartman Cc: Lai Jiangshan Cc: Yasuaki Ishimatsu Cc: Gu Zheng Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/memory.c | 18 +++++++++--------- include/linux/memory_hotplug.h | 9 +++++---- mm/memory_hotplug.c | 9 ++++++--- 3 files changed, 20 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/memory.c b/drivers/base/memory.c index c6707dfb5284..7c60ed27e711 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -284,7 +284,7 @@ static int memory_subsys_online(struct device *dev) * attribute and need to set the online_type. */ if (mem->online_type < 0) - mem->online_type = ONLINE_KEEP; + mem->online_type = MMOP_ONLINE_KEEP; ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE); @@ -316,22 +316,22 @@ store_mem_state(struct device *dev, return ret; if (sysfs_streq(buf, "online_kernel")) - online_type = ONLINE_KERNEL; + online_type = MMOP_ONLINE_KERNEL; else if (sysfs_streq(buf, "online_movable")) - online_type = ONLINE_MOVABLE; + online_type = MMOP_ONLINE_MOVABLE; else if (sysfs_streq(buf, "online")) - online_type = ONLINE_KEEP; + online_type = MMOP_ONLINE_KEEP; else if (sysfs_streq(buf, "offline")) - online_type = -1; + online_type = MMOP_OFFLINE; else { ret = -EINVAL; goto err; } switch (online_type) { - case ONLINE_KERNEL: - case ONLINE_MOVABLE: - case ONLINE_KEEP: + case MMOP_ONLINE_KERNEL: + case MMOP_ONLINE_MOVABLE: + case MMOP_ONLINE_KEEP: /* * mem->online_type is not protected so there can be a * race here. However, when racing online, the first @@ -342,7 +342,7 @@ store_mem_state(struct device *dev, mem->online_type = online_type; ret = device_online(&mem->dev); break; - case -1: + case MMOP_OFFLINE: ret = device_offline(&mem->dev); break; default: diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 010d125bffbf..79dd9eca054f 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -26,11 +26,12 @@ enum { MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE = NODE_INFO, }; -/* Types for control the zone type of onlined memory */ +/* Types for control the zone type of onlined and offlined memory */ enum { - ONLINE_KEEP, - ONLINE_KERNEL, - ONLINE_MOVABLE, + MMOP_OFFLINE = -1, + MMOP_ONLINE_KEEP, + MMOP_ONLINE_KERNEL, + MMOP_ONLINE_MOVABLE, }; /* diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 3557e8c9e8de..a3797d3fd8a4 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -977,15 +977,18 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ zone = page_zone(pfn_to_page(pfn)); ret = -EINVAL; - if ((zone_idx(zone) > ZONE_NORMAL || online_type == ONLINE_MOVABLE) && + if ((zone_idx(zone) > ZONE_NORMAL || + online_type == MMOP_ONLINE_MOVABLE) && !can_online_high_movable(zone)) goto out; - if (online_type == ONLINE_KERNEL && zone_idx(zone) == ZONE_MOVABLE) { + if (online_type == MMOP_ONLINE_KERNEL && + zone_idx(zone) == ZONE_MOVABLE) { if (move_pfn_range_left(zone - 1, zone, pfn, pfn + nr_pages)) goto out; } - if (online_type == ONLINE_MOVABLE && zone_idx(zone) == ZONE_MOVABLE - 1) { + if (online_type == MMOP_ONLINE_MOVABLE && + zone_idx(zone) == ZONE_MOVABLE - 1) { if (move_pfn_range_right(zone, zone + 1, pfn, pfn + nr_pages)) goto out; } -- cgit v1.2.3 From a254129e8686bff7a340b58f35241b04927e81c0 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Wed, 6 Aug 2014 16:05:25 -0700 Subject: CMA: generalize CMA reserved area management functionality Currently, there are two users on CMA functionality, one is the DMA subsystem and the other is the KVM on powerpc. They have their own code to manage CMA reserved area even if they looks really similar. From my guess, it is caused by some needs on bitmap management. KVM side wants to maintain bitmap not for 1 page, but for more size. Eventually it use bitmap where one bit represents 64 pages. When I implement CMA related patches, I should change those two places to apply my change and it seem to be painful to me. I want to change this situation and reduce future code management overhead through this patch. This change could also help developer who want to use CMA in their new feature development, since they can use CMA easily without copying & pasting this reserved area management code. In previous patches, we have prepared some features to generalize CMA reserved area management and now it's time to do it. This patch moves core functions to mm/cma.c and change DMA APIs to use these functions. There is no functional change in DMA APIs. Signed-off-by: Joonsoo Kim Acked-by: Michal Nazarewicz Acked-by: Zhang Yanfei Acked-by: Minchan Kim Reviewed-by: Aneesh Kumar K.V Cc: Alexander Graf Cc: Aneesh Kumar K.V Cc: Gleb Natapov Acked-by: Marek Szyprowski Tested-by: Marek Szyprowski Cc: Paolo Bonzini Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/mm/dma-mapping.c | 1 + drivers/base/Kconfig | 10 -- drivers/base/dma-contiguous.c | 280 +--------------------------------- include/linux/cma.h | 27 ++++ include/linux/dma-contiguous.h | 11 +- mm/Kconfig | 11 ++ mm/Makefile | 1 + mm/cma.c | 333 +++++++++++++++++++++++++++++++++++++++++ 8 files changed, 383 insertions(+), 291 deletions(-) create mode 100644 include/linux/cma.h create mode 100644 mm/cma.c (limited to 'include/linux') diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 1f88db06b133..7a996aaa061e 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig index 88500fed3c7a..4e7f0ff83ae7 100644 --- a/drivers/base/Kconfig +++ b/drivers/base/Kconfig @@ -289,16 +289,6 @@ config CMA_ALIGNMENT If unsure, leave the default value "8". -config CMA_AREAS - int "Maximum count of the CMA device-private areas" - default 7 - help - CMA allows to create CMA areas for particular devices. This parameter - sets the maximum number of such device private CMA areas in the - system. - - If unsure, leave the default value "7". - endif endmenu diff --git a/drivers/base/dma-contiguous.c b/drivers/base/dma-contiguous.c index ad8a85bf852f..0411c1c57005 100644 --- a/drivers/base/dma-contiguous.c +++ b/drivers/base/dma-contiguous.c @@ -24,25 +24,9 @@ #include #include -#include -#include -#include #include -#include -#include -#include #include -#include - -struct cma { - unsigned long base_pfn; - unsigned long count; - unsigned long *bitmap; - unsigned int order_per_bit; /* Order of pages represented by one bit */ - struct mutex lock; -}; - -struct cma *dma_contiguous_default_area; +#include #ifdef CONFIG_CMA_SIZE_MBYTES #define CMA_SIZE_MBYTES CONFIG_CMA_SIZE_MBYTES @@ -50,6 +34,8 @@ struct cma *dma_contiguous_default_area; #define CMA_SIZE_MBYTES 0 #endif +struct cma *dma_contiguous_default_area; + /* * Default global CMA area size can be defined in kernel's .config. * This is useful mainly for distro maintainers to create a kernel @@ -156,169 +142,6 @@ void __init dma_contiguous_reserve(phys_addr_t limit) } } -static DEFINE_MUTEX(cma_mutex); - -static unsigned long cma_bitmap_aligned_mask(struct cma *cma, int align_order) -{ - return (1UL << (align_order >> cma->order_per_bit)) - 1; -} - -static unsigned long cma_bitmap_maxno(struct cma *cma) -{ - return cma->count >> cma->order_per_bit; -} - -static unsigned long cma_bitmap_pages_to_bits(struct cma *cma, - unsigned long pages) -{ - return ALIGN(pages, 1UL << cma->order_per_bit) >> cma->order_per_bit; -} - -static void cma_clear_bitmap(struct cma *cma, unsigned long pfn, int count) -{ - unsigned long bitmap_no, bitmap_count; - - bitmap_no = (pfn - cma->base_pfn) >> cma->order_per_bit; - bitmap_count = cma_bitmap_pages_to_bits(cma, count); - - mutex_lock(&cma->lock); - bitmap_clear(cma->bitmap, bitmap_no, bitmap_count); - mutex_unlock(&cma->lock); -} - -static int __init cma_activate_area(struct cma *cma) -{ - int bitmap_size = BITS_TO_LONGS(cma_bitmap_maxno(cma)) * sizeof(long); - unsigned long base_pfn = cma->base_pfn, pfn = base_pfn; - unsigned i = cma->count >> pageblock_order; - struct zone *zone; - - cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL); - - if (!cma->bitmap) - return -ENOMEM; - - WARN_ON_ONCE(!pfn_valid(pfn)); - zone = page_zone(pfn_to_page(pfn)); - - do { - unsigned j; - base_pfn = pfn; - for (j = pageblock_nr_pages; j; --j, pfn++) { - WARN_ON_ONCE(!pfn_valid(pfn)); - /* - * alloc_contig_range requires the pfn range - * specified to be in the same zone. Make this - * simple by forcing the entire CMA resv range - * to be in the same zone. - */ - if (page_zone(pfn_to_page(pfn)) != zone) - goto err; - } - init_cma_reserved_pageblock(pfn_to_page(base_pfn)); - } while (--i); - - mutex_init(&cma->lock); - return 0; - -err: - kfree(cma->bitmap); - return -EINVAL; -} - -static struct cma cma_areas[MAX_CMA_AREAS]; -static unsigned cma_area_count; - -static int __init cma_init_reserved_areas(void) -{ - int i; - - for (i = 0; i < cma_area_count; i++) { - int ret = cma_activate_area(&cma_areas[i]); - if (ret) - return ret; - } - - return 0; -} -core_initcall(cma_init_reserved_areas); - -static int __init __dma_contiguous_reserve_area(phys_addr_t size, - phys_addr_t base, phys_addr_t limit, - phys_addr_t alignment, unsigned int order_per_bit, - struct cma **res_cma, bool fixed) -{ - struct cma *cma = &cma_areas[cma_area_count]; - int ret = 0; - - pr_debug("%s(size %lx, base %08lx, limit %08lx alignment %08lx)\n", - __func__, (unsigned long)size, (unsigned long)base, - (unsigned long)limit, (unsigned long)alignment); - - if (cma_area_count == ARRAY_SIZE(cma_areas)) { - pr_err("Not enough slots for CMA reserved regions!\n"); - return -ENOSPC; - } - - if (!size) - return -EINVAL; - - if (alignment && !is_power_of_2(alignment)) - return -EINVAL; - - /* - * Sanitise input arguments. - * Pages both ends in CMA area could be merged into adjacent unmovable - * migratetype page by page allocator's buddy algorithm. In the case, - * you couldn't get a contiguous memory, which is not what we want. - */ - alignment = max(alignment, - (phys_addr_t)PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order)); - base = ALIGN(base, alignment); - size = ALIGN(size, alignment); - limit &= ~(alignment - 1); - - /* size should be aligned with order_per_bit */ - if (!IS_ALIGNED(size >> PAGE_SHIFT, 1 << order_per_bit)) - return -EINVAL; - - /* Reserve memory */ - if (base && fixed) { - if (memblock_is_region_reserved(base, size) || - memblock_reserve(base, size) < 0) { - ret = -EBUSY; - goto err; - } - } else { - phys_addr_t addr = memblock_alloc_range(size, alignment, base, - limit); - if (!addr) { - ret = -ENOMEM; - goto err; - } else { - base = addr; - } - } - - /* - * Each reserved area must be initialised later, when more kernel - * subsystems (like slab allocator) are available. - */ - cma->base_pfn = PFN_DOWN(base); - cma->count = size >> PAGE_SHIFT; - cma->order_per_bit = order_per_bit; - *res_cma = cma; - cma_area_count++; - - pr_info("CMA: reserved %ld MiB at %08lx\n", (unsigned long)size / SZ_1M, - (unsigned long)base); - return 0; - -err: - pr_err("CMA: failed to reserve %ld MiB\n", (unsigned long)size / SZ_1M); - return ret; -} - /** * dma_contiguous_reserve_area() - reserve custom contiguous area * @size: Size of the reserved area (in bytes), @@ -342,77 +165,17 @@ int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base, { int ret; - ret = __dma_contiguous_reserve_area(size, base, limit, 0, 0, - res_cma, fixed); + ret = cma_declare_contiguous(size, base, limit, 0, 0, res_cma, fixed); if (ret) return ret; /* Architecture specific contiguous memory fixup. */ - dma_contiguous_early_fixup(PFN_PHYS((*res_cma)->base_pfn), - (*res_cma)->count << PAGE_SHIFT); + dma_contiguous_early_fixup(cma_get_base(*res_cma), + cma_get_size(*res_cma)); return 0; } -static struct page *__dma_alloc_from_contiguous(struct cma *cma, int count, - unsigned int align) -{ - unsigned long mask, pfn, start = 0; - unsigned long bitmap_maxno, bitmap_no, bitmap_count; - struct page *page = NULL; - int ret; - - if (!cma || !cma->count) - return NULL; - - pr_debug("%s(cma %p, count %d, align %d)\n", __func__, (void *)cma, - count, align); - - if (!count) - return NULL; - - mask = cma_bitmap_aligned_mask(cma, align); - bitmap_maxno = cma_bitmap_maxno(cma); - bitmap_count = cma_bitmap_pages_to_bits(cma, count); - - for (;;) { - mutex_lock(&cma->lock); - bitmap_no = bitmap_find_next_zero_area(cma->bitmap, - bitmap_maxno, start, bitmap_count, mask); - if (bitmap_no >= bitmap_maxno) { - mutex_unlock(&cma->lock); - break; - } - bitmap_set(cma->bitmap, bitmap_no, bitmap_count); - /* - * It's safe to drop the lock here. We've marked this region for - * our exclusive use. If the migration fails we will take the - * lock again and unmark it. - */ - mutex_unlock(&cma->lock); - - pfn = cma->base_pfn + (bitmap_no << cma->order_per_bit); - mutex_lock(&cma_mutex); - ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA); - mutex_unlock(&cma_mutex); - if (ret == 0) { - page = pfn_to_page(pfn); - break; - } else if (ret != -EBUSY) { - cma_clear_bitmap(cma, pfn, count); - break; - } - cma_clear_bitmap(cma, pfn, count); - pr_debug("%s(): memory range at %p is busy, retrying\n", - __func__, pfn_to_page(pfn)); - /* try again with a bit different memory target */ - start = bitmap_no + mask + 1; - } - - pr_debug("%s(): returned %p\n", __func__, page); - return page; -} - /** * dma_alloc_from_contiguous() - allocate pages from contiguous area * @dev: Pointer to device for which the allocation is performed. @@ -427,35 +190,10 @@ static struct page *__dma_alloc_from_contiguous(struct cma *cma, int count, struct page *dma_alloc_from_contiguous(struct device *dev, int count, unsigned int align) { - struct cma *cma = dev_get_cma_area(dev); - if (align > CONFIG_CMA_ALIGNMENT) align = CONFIG_CMA_ALIGNMENT; - return __dma_alloc_from_contiguous(cma, count, align); -} - -static bool __dma_release_from_contiguous(struct cma *cma, struct page *pages, - int count) -{ - unsigned long pfn; - - if (!cma || !pages) - return false; - - pr_debug("%s(page %p)\n", __func__, (void *)pages); - - pfn = page_to_pfn(pages); - - if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count) - return false; - - VM_BUG_ON(pfn + count > cma->base_pfn + cma->count); - - free_contig_range(pfn, count); - cma_clear_bitmap(cma, pfn, count); - - return true; + return cma_alloc(dev_get_cma_area(dev), count, align); } /** @@ -471,7 +209,5 @@ static bool __dma_release_from_contiguous(struct cma *cma, struct page *pages, bool dma_release_from_contiguous(struct device *dev, struct page *pages, int count) { - struct cma *cma = dev_get_cma_area(dev); - - return __dma_release_from_contiguous(cma, pages, count); + return cma_release(dev_get_cma_area(dev), pages, count); } diff --git a/include/linux/cma.h b/include/linux/cma.h new file mode 100644 index 000000000000..f6f7809acb98 --- /dev/null +++ b/include/linux/cma.h @@ -0,0 +1,27 @@ +#ifndef __CMA_H__ +#define __CMA_H__ + +/* + * There is always at least global CMA area and a few optional + * areas configured in kernel .config. + */ +#ifdef CONFIG_CMA_AREAS +#define MAX_CMA_AREAS (1 + CONFIG_CMA_AREAS) + +#else +#define MAX_CMA_AREAS (0) + +#endif + +struct cma; + +extern phys_addr_t cma_get_base(struct cma *cma); +extern unsigned long cma_get_size(struct cma *cma); + +extern int __init cma_declare_contiguous(phys_addr_t size, + phys_addr_t base, phys_addr_t limit, + phys_addr_t alignment, unsigned int order_per_bit, + struct cma **res_cma, bool fixed); +extern struct page *cma_alloc(struct cma *cma, int count, unsigned int align); +extern bool cma_release(struct cma *cma, struct page *pages, int count); +#endif diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h index 772eab5d524a..569bbd039896 100644 --- a/include/linux/dma-contiguous.h +++ b/include/linux/dma-contiguous.h @@ -53,18 +53,13 @@ #ifdef __KERNEL__ +#include + struct cma; struct page; -struct device; #ifdef CONFIG_DMA_CMA -/* - * There is always at least global CMA area and a few optional device - * private areas configured in kernel .config. - */ -#define MAX_CMA_AREAS (1 + CONFIG_CMA_AREAS) - extern struct cma *dma_contiguous_default_area; static inline struct cma *dev_get_cma_area(struct device *dev) @@ -123,8 +118,6 @@ bool dma_release_from_contiguous(struct device *dev, struct page *pages, #else -#define MAX_CMA_AREAS (0) - static inline struct cma *dev_get_cma_area(struct device *dev) { return NULL; diff --git a/mm/Kconfig b/mm/Kconfig index 3e9977a9d657..f4899ec39cf4 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -508,6 +508,17 @@ config CMA_DEBUG processing calls such as dma_alloc_from_contiguous(). This option does not affect warning and error messages. +config CMA_AREAS + int "Maximum count of the CMA areas" + depends on CMA + default 7 + help + CMA allows to create CMA areas for particular purpose, mainly, + used as device private area. This parameter sets the maximum + number of CMA area in the system. + + If unsure, leave the default value "7". + config ZBUD tristate default n diff --git a/mm/Makefile b/mm/Makefile index 4064f3ec145e..8338473c329a 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -62,3 +62,4 @@ obj-$(CONFIG_MEMORY_ISOLATION) += page_isolation.o obj-$(CONFIG_ZBUD) += zbud.o obj-$(CONFIG_ZSMALLOC) += zsmalloc.o obj-$(CONFIG_GENERIC_EARLY_IOREMAP) += early_ioremap.o +obj-$(CONFIG_CMA) += cma.o diff --git a/mm/cma.c b/mm/cma.c new file mode 100644 index 000000000000..656004216953 --- /dev/null +++ b/mm/cma.c @@ -0,0 +1,333 @@ +/* + * Contiguous Memory Allocator + * + * Copyright (c) 2010-2011 by Samsung Electronics. + * Copyright IBM Corporation, 2013 + * Copyright LG Electronics Inc., 2014 + * Written by: + * Marek Szyprowski + * Michal Nazarewicz + * Aneesh Kumar K.V + * Joonsoo Kim + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License or (at your optional) any later version of the license. + */ + +#define pr_fmt(fmt) "cma: " fmt + +#ifdef CONFIG_CMA_DEBUG +#ifndef DEBUG +# define DEBUG +#endif +#endif + +#include +#include +#include +#include +#include +#include +#include +#include + +struct cma { + unsigned long base_pfn; + unsigned long count; + unsigned long *bitmap; + unsigned int order_per_bit; /* Order of pages represented by one bit */ + struct mutex lock; +}; + +static struct cma cma_areas[MAX_CMA_AREAS]; +static unsigned cma_area_count; +static DEFINE_MUTEX(cma_mutex); + +phys_addr_t cma_get_base(struct cma *cma) +{ + return PFN_PHYS(cma->base_pfn); +} + +unsigned long cma_get_size(struct cma *cma) +{ + return cma->count << PAGE_SHIFT; +} + +static unsigned long cma_bitmap_aligned_mask(struct cma *cma, int align_order) +{ + return (1UL << (align_order >> cma->order_per_bit)) - 1; +} + +static unsigned long cma_bitmap_maxno(struct cma *cma) +{ + return cma->count >> cma->order_per_bit; +} + +static unsigned long cma_bitmap_pages_to_bits(struct cma *cma, + unsigned long pages) +{ + return ALIGN(pages, 1UL << cma->order_per_bit) >> cma->order_per_bit; +} + +static void cma_clear_bitmap(struct cma *cma, unsigned long pfn, int count) +{ + unsigned long bitmap_no, bitmap_count; + + bitmap_no = (pfn - cma->base_pfn) >> cma->order_per_bit; + bitmap_count = cma_bitmap_pages_to_bits(cma, count); + + mutex_lock(&cma->lock); + bitmap_clear(cma->bitmap, bitmap_no, bitmap_count); + mutex_unlock(&cma->lock); +} + +static int __init cma_activate_area(struct cma *cma) +{ + int bitmap_size = BITS_TO_LONGS(cma_bitmap_maxno(cma)) * sizeof(long); + unsigned long base_pfn = cma->base_pfn, pfn = base_pfn; + unsigned i = cma->count >> pageblock_order; + struct zone *zone; + + cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL); + + if (!cma->bitmap) + return -ENOMEM; + + WARN_ON_ONCE(!pfn_valid(pfn)); + zone = page_zone(pfn_to_page(pfn)); + + do { + unsigned j; + + base_pfn = pfn; + for (j = pageblock_nr_pages; j; --j, pfn++) { + WARN_ON_ONCE(!pfn_valid(pfn)); + /* + * alloc_contig_range requires the pfn range + * specified to be in the same zone. Make this + * simple by forcing the entire CMA resv range + * to be in the same zone. + */ + if (page_zone(pfn_to_page(pfn)) != zone) + goto err; + } + init_cma_reserved_pageblock(pfn_to_page(base_pfn)); + } while (--i); + + mutex_init(&cma->lock); + return 0; + +err: + kfree(cma->bitmap); + return -EINVAL; +} + +static int __init cma_init_reserved_areas(void) +{ + int i; + + for (i = 0; i < cma_area_count; i++) { + int ret = cma_activate_area(&cma_areas[i]); + + if (ret) + return ret; + } + + return 0; +} +core_initcall(cma_init_reserved_areas); + +/** + * cma_declare_contiguous() - reserve custom contiguous area + * @size: Size of the reserved area (in bytes), + * @base: Base address of the reserved area optional, use 0 for any + * @limit: End address of the reserved memory (optional, 0 for any). + * @alignment: Alignment for the CMA area, should be power of 2 or zero + * @order_per_bit: Order of pages represented by one bit on bitmap. + * @res_cma: Pointer to store the created cma region. + * @fixed: hint about where to place the reserved area + * + * This function reserves memory from early allocator. It should be + * called by arch specific code once the early allocator (memblock or bootmem) + * has been activated and all other subsystems have already allocated/reserved + * memory. This function allows to create custom reserved areas. + * + * If @fixed is true, reserve contiguous area at exactly @base. If false, + * reserve in range from @base to @limit. + */ +int __init cma_declare_contiguous(phys_addr_t size, + phys_addr_t base, phys_addr_t limit, + phys_addr_t alignment, unsigned int order_per_bit, + struct cma **res_cma, bool fixed) +{ + struct cma *cma = &cma_areas[cma_area_count]; + int ret = 0; + + pr_debug("%s(size %lx, base %08lx, limit %08lx alignment %08lx)\n", + __func__, (unsigned long)size, (unsigned long)base, + (unsigned long)limit, (unsigned long)alignment); + + if (cma_area_count == ARRAY_SIZE(cma_areas)) { + pr_err("Not enough slots for CMA reserved regions!\n"); + return -ENOSPC; + } + + if (!size) + return -EINVAL; + + if (alignment && !is_power_of_2(alignment)) + return -EINVAL; + + /* + * Sanitise input arguments. + * Pages both ends in CMA area could be merged into adjacent unmovable + * migratetype page by page allocator's buddy algorithm. In the case, + * you couldn't get a contiguous memory, which is not what we want. + */ + alignment = max(alignment, + (phys_addr_t)PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order)); + base = ALIGN(base, alignment); + size = ALIGN(size, alignment); + limit &= ~(alignment - 1); + + /* size should be aligned with order_per_bit */ + if (!IS_ALIGNED(size >> PAGE_SHIFT, 1 << order_per_bit)) + return -EINVAL; + + /* Reserve memory */ + if (base && fixed) { + if (memblock_is_region_reserved(base, size) || + memblock_reserve(base, size) < 0) { + ret = -EBUSY; + goto err; + } + } else { + phys_addr_t addr = memblock_alloc_range(size, alignment, base, + limit); + if (!addr) { + ret = -ENOMEM; + goto err; + } else { + base = addr; + } + } + + /* + * Each reserved area must be initialised later, when more kernel + * subsystems (like slab allocator) are available. + */ + cma->base_pfn = PFN_DOWN(base); + cma->count = size >> PAGE_SHIFT; + cma->order_per_bit = order_per_bit; + *res_cma = cma; + cma_area_count++; + + pr_info("CMA: reserved %ld MiB at %08lx\n", (unsigned long)size / SZ_1M, + (unsigned long)base); + return 0; + +err: + pr_err("CMA: failed to reserve %ld MiB\n", (unsigned long)size / SZ_1M); + return ret; +} + +/** + * cma_alloc() - allocate pages from contiguous area + * @cma: Contiguous memory region for which the allocation is performed. + * @count: Requested number of pages. + * @align: Requested alignment of pages (in PAGE_SIZE order). + * + * This function allocates part of contiguous memory on specific + * contiguous memory area. + */ +struct page *cma_alloc(struct cma *cma, int count, unsigned int align) +{ + unsigned long mask, pfn, start = 0; + unsigned long bitmap_maxno, bitmap_no, bitmap_count; + struct page *page = NULL; + int ret; + + if (!cma || !cma->count) + return NULL; + + pr_debug("%s(cma %p, count %d, align %d)\n", __func__, (void *)cma, + count, align); + + if (!count) + return NULL; + + mask = cma_bitmap_aligned_mask(cma, align); + bitmap_maxno = cma_bitmap_maxno(cma); + bitmap_count = cma_bitmap_pages_to_bits(cma, count); + + for (;;) { + mutex_lock(&cma->lock); + bitmap_no = bitmap_find_next_zero_area(cma->bitmap, + bitmap_maxno, start, bitmap_count, mask); + if (bitmap_no >= bitmap_maxno) { + mutex_unlock(&cma->lock); + break; + } + bitmap_set(cma->bitmap, bitmap_no, bitmap_count); + /* + * It's safe to drop the lock here. We've marked this region for + * our exclusive use. If the migration fails we will take the + * lock again and unmark it. + */ + mutex_unlock(&cma->lock); + + pfn = cma->base_pfn + (bitmap_no << cma->order_per_bit); + mutex_lock(&cma_mutex); + ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA); + mutex_unlock(&cma_mutex); + if (ret == 0) { + page = pfn_to_page(pfn); + break; + } else if (ret != -EBUSY) { + cma_clear_bitmap(cma, pfn, count); + break; + } + cma_clear_bitmap(cma, pfn, count); + pr_debug("%s(): memory range at %p is busy, retrying\n", + __func__, pfn_to_page(pfn)); + /* try again with a bit different memory target */ + start = bitmap_no + mask + 1; + } + + pr_debug("%s(): returned %p\n", __func__, page); + return page; +} + +/** + * cma_release() - release allocated pages + * @cma: Contiguous memory region for which the allocation is performed. + * @pages: Allocated pages. + * @count: Number of allocated pages. + * + * This function releases memory allocated by alloc_cma(). + * It returns false when provided pages do not belong to contiguous area and + * true otherwise. + */ +bool cma_release(struct cma *cma, struct page *pages, int count) +{ + unsigned long pfn; + + if (!cma || !pages) + return false; + + pr_debug("%s(page %p)\n", __func__, (void *)pages); + + pfn = page_to_pfn(pages); + + if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count) + return false; + + VM_BUG_ON(pfn + count > cma->base_pfn + cma->count); + + free_contig_range(pfn, count); + cma_clear_bitmap(cma, pfn, count); + + return true; +} -- cgit v1.2.3 From c1f733aaaf30a0068a3126d5aa9d5b4c25ba4c0c Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Wed, 6 Aug 2014 16:05:32 -0700 Subject: mm, CMA: change cma_declare_contiguous() to obey coding convention Conventionally, we put output param to the end of param list and put the 'base' ahead of 'size', but cma_declare_contiguous() doesn't look like that, so change it. Additionally, move down cma_areas reference code to the position where it is really needed. Signed-off-by: Joonsoo Kim Acked-by: Michal Nazarewicz Reviewed-by: Aneesh Kumar K.V Cc: Alexander Graf Cc: Aneesh Kumar K.V Cc: Gleb Natapov Acked-by: Marek Szyprowski Tested-by: Marek Szyprowski Cc: Minchan Kim Cc: Paolo Bonzini Cc: Zhang Yanfei Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/kvm/book3s_hv_builtin.c | 4 ++-- drivers/base/dma-contiguous.c | 2 +- include/linux/cma.h | 2 +- mm/cma.c | 13 +++++++------ 4 files changed, 11 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 3960e0bceaf2..6cf498a9bc98 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -185,8 +185,8 @@ void __init kvm_cma_reserve(void) align_size = HPT_ALIGN_PAGES << PAGE_SHIFT; align_size = max(kvm_rma_pages << PAGE_SHIFT, align_size); - cma_declare_contiguous(selected_size, 0, 0, align_size, - KVM_CMA_CHUNK_ORDER - PAGE_SHIFT, &kvm_cma, false); + cma_declare_contiguous(0, selected_size, 0, align_size, + KVM_CMA_CHUNK_ORDER - PAGE_SHIFT, false, &kvm_cma); } } diff --git a/drivers/base/dma-contiguous.c b/drivers/base/dma-contiguous.c index 0411c1c57005..6606abdf880c 100644 --- a/drivers/base/dma-contiguous.c +++ b/drivers/base/dma-contiguous.c @@ -165,7 +165,7 @@ int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base, { int ret; - ret = cma_declare_contiguous(size, base, limit, 0, 0, res_cma, fixed); + ret = cma_declare_contiguous(base, size, limit, 0, 0, fixed, res_cma); if (ret) return ret; diff --git a/include/linux/cma.h b/include/linux/cma.h index f6f7809acb98..371b93042520 100644 --- a/include/linux/cma.h +++ b/include/linux/cma.h @@ -21,7 +21,7 @@ extern unsigned long cma_get_size(struct cma *cma); extern int __init cma_declare_contiguous(phys_addr_t size, phys_addr_t base, phys_addr_t limit, phys_addr_t alignment, unsigned int order_per_bit, - struct cma **res_cma, bool fixed); + bool fixed, struct cma **res_cma); extern struct page *cma_alloc(struct cma *cma, int count, unsigned int align); extern bool cma_release(struct cma *cma, struct page *pages, int count); #endif diff --git a/mm/cma.c b/mm/cma.c index 103a6663b7c7..488e50810ed1 100644 --- a/mm/cma.c +++ b/mm/cma.c @@ -141,13 +141,13 @@ core_initcall(cma_init_reserved_areas); /** * cma_declare_contiguous() - reserve custom contiguous area - * @size: Size of the reserved area (in bytes), * @base: Base address of the reserved area optional, use 0 for any + * @size: Size of the reserved area (in bytes), * @limit: End address of the reserved memory (optional, 0 for any). * @alignment: Alignment for the CMA area, should be power of 2 or zero * @order_per_bit: Order of pages represented by one bit on bitmap. - * @res_cma: Pointer to store the created cma region. * @fixed: hint about where to place the reserved area + * @res_cma: Pointer to store the created cma region. * * This function reserves memory from early allocator. It should be * called by arch specific code once the early allocator (memblock or bootmem) @@ -157,12 +157,12 @@ core_initcall(cma_init_reserved_areas); * If @fixed is true, reserve contiguous area at exactly @base. If false, * reserve in range from @base to @limit. */ -int __init cma_declare_contiguous(phys_addr_t size, - phys_addr_t base, phys_addr_t limit, +int __init cma_declare_contiguous(phys_addr_t base, + phys_addr_t size, phys_addr_t limit, phys_addr_t alignment, unsigned int order_per_bit, - struct cma **res_cma, bool fixed) + bool fixed, struct cma **res_cma) { - struct cma *cma = &cma_areas[cma_area_count]; + struct cma *cma; int ret = 0; pr_debug("%s(size %lx, base %08lx, limit %08lx alignment %08lx)\n", @@ -218,6 +218,7 @@ int __init cma_declare_contiguous(phys_addr_t size, * Each reserved area must be initialised later, when more kernel * subsystems (like slab allocator) are available. */ + cma = &cma_areas[cma_area_count]; cma->base_pfn = PFN_DOWN(base); cma->count = size >> PAGE_SHIFT; cma->order_per_bit = order_per_bit; -- cgit v1.2.3 From 2f3e442ccceb85c51c7dffd3799bfd84de213874 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 6 Aug 2014 16:05:40 -0700 Subject: mm: page-flags: clean up the page flag test, set, clear macros - PAGEFLAG_FALSE only defines TEST, make it define SET and CLEAR as well, analogous to PAGEFLAG. - Define TESTSETFLAG_FALSE, analogous to TESTSETFLAG. - Define TESTSCFLAG_FALSE, analogous to TESTSCFLAG - Make PG_mlocked accessors the same on both MMU and !MMU setups Signed-off-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page-flags.h | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 8304959ad336..e1f5fcd79792 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -171,13 +171,12 @@ static inline int __TestClearPage##uname(struct page *page) \ #define __PAGEFLAG(uname, lname) TESTPAGEFLAG(uname, lname) \ __SETPAGEFLAG(uname, lname) __CLEARPAGEFLAG(uname, lname) -#define PAGEFLAG_FALSE(uname) \ -static inline int Page##uname(const struct page *page) \ - { return 0; } - #define TESTSCFLAG(uname, lname) \ TESTSETFLAG(uname, lname) TESTCLEARFLAG(uname, lname) +#define TESTPAGEFLAG_FALSE(uname) \ +static inline int Page##uname(const struct page *page) { return 0; } + #define SETPAGEFLAG_NOOP(uname) \ static inline void SetPage##uname(struct page *page) { } @@ -187,12 +186,21 @@ static inline void ClearPage##uname(struct page *page) { } #define __CLEARPAGEFLAG_NOOP(uname) \ static inline void __ClearPage##uname(struct page *page) { } +#define TESTSETFLAG_FALSE(uname) \ +static inline int TestSetPage##uname(struct page *page) { return 0; } + #define TESTCLEARFLAG_FALSE(uname) \ static inline int TestClearPage##uname(struct page *page) { return 0; } #define __TESTCLEARFLAG_FALSE(uname) \ static inline int __TestClearPage##uname(struct page *page) { return 0; } +#define PAGEFLAG_FALSE(uname) TESTPAGEFLAG_FALSE(uname) \ + SETPAGEFLAG_NOOP(uname) CLEARPAGEFLAG_NOOP(uname) + +#define TESTSCFLAG_FALSE(uname) \ + TESTSETFLAG_FALSE(uname) TESTCLEARFLAG_FALSE(uname) + struct page; /* forward declaration */ TESTPAGEFLAG(Locked, locked) @@ -248,7 +256,6 @@ PAGEFLAG_FALSE(HighMem) PAGEFLAG(SwapCache, swapcache) #else PAGEFLAG_FALSE(SwapCache) - SETPAGEFLAG_NOOP(SwapCache) CLEARPAGEFLAG_NOOP(SwapCache) #endif PAGEFLAG(Unevictable, unevictable) __CLEARPAGEFLAG(Unevictable, unevictable) @@ -258,8 +265,8 @@ PAGEFLAG(Unevictable, unevictable) __CLEARPAGEFLAG(Unevictable, unevictable) PAGEFLAG(Mlocked, mlocked) __CLEARPAGEFLAG(Mlocked, mlocked) TESTSCFLAG(Mlocked, mlocked) __TESTCLEARFLAG(Mlocked, mlocked) #else -PAGEFLAG_FALSE(Mlocked) SETPAGEFLAG_NOOP(Mlocked) - TESTCLEARFLAG_FALSE(Mlocked) __TESTCLEARFLAG_FALSE(Mlocked) +PAGEFLAG_FALSE(Mlocked) __CLEARPAGEFLAG_NOOP(Mlocked) + TESTSCFLAG_FALSE(Mlocked) __TESTCLEARFLAG_FALSE(Mlocked) #endif #ifdef CONFIG_ARCH_USES_PG_UNCACHED -- cgit v1.2.3 From 1a4dc5bc7cb5659a8004d105afeb0571126f8f56 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Wed, 6 Aug 2014 16:06:08 -0700 Subject: mem-hotplug: improve zone_movable_is_highmem logic In original code, zone_movable_is_highmem() assumes ZONE_MOVABLE not highmem if CONFIG_HAVE_MEMBLOCK_NODE_MAP is not set. In online_pages, it extracts pages from the previous zone before ZONE_MOVABLE. Which is logically inconsistent: If HAVE_MEMBLOCK_NODE_MAP is turned off but HIGHMEM is on, zone_movable_is_highmem() makes movable zone not highmem, but online_pages() extracts pages from ZONE_HIGHMEM. This inconsistency doesn't cause real problem currently, because all architectures support online_pages also have HAVE_MEMBLOCK_NODE_MAP. However, fixing it makes code clear, and also helps futher coding. Signed-off-by: Wang Nan Cc: Zhang Zhen Cc: Mel Gorman Cc: Jiang Liu Cc: Li Zefan Cc: Yinghai Lu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 6cbd1b6c3d20..559e659288fc 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -872,6 +872,8 @@ static inline int zone_movable_is_highmem(void) { #if defined(CONFIG_HIGHMEM) && defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) return movable_zone == ZONE_HIGHMEM; +#elif defined(CONFIG_HIGHMEM) + return (ZONE_MOVABLE - 1) == ZONE_HIGHMEM; #else return 0; #endif -- cgit v1.2.3 From ef6b571fb8920d5006349a6e29ac47c4817e9691 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 6 Aug 2014 16:06:30 -0700 Subject: include/linux/mmdebug.h: add VM_WARN_ONCE() It was missing... Cc: Konstantin Khlebnikov Cc: Dave Hansen Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmdebug.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h index edd82a105220..2f348d02f640 100644 --- a/include/linux/mmdebug.h +++ b/include/linux/mmdebug.h @@ -20,11 +20,13 @@ extern void dump_page_badflags(struct page *page, const char *reason, } while (0) #define VM_WARN_ON(cond) WARN_ON(cond) #define VM_WARN_ON_ONCE(cond) WARN_ON_ONCE(cond) +#define VM_WARN_ONCE(cond, format...) WARN_ONCE(cond, format) #else #define VM_BUG_ON(cond) BUILD_BUG_ON_INVALID(cond) #define VM_BUG_ON_PAGE(cond, page) VM_BUG_ON(cond) #define VM_WARN_ON(cond) BUILD_BUG_ON_INVALID(cond) #define VM_WARN_ON_ONCE(cond) BUILD_BUG_ON_INVALID(cond) +#define VM_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond) #endif #ifdef CONFIG_DEBUG_VIRTUAL -- cgit v1.2.3 From eb39d618f9e80f81cfc5788cf1b252d141c2f0c3 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Wed, 6 Aug 2014 16:06:43 -0700 Subject: mm: replace init_page_accessed by __SetPageReferenced Do we really need an exported alias for __SetPageReferenced()? Its callers better know what they're doing, in which case the page would not be already marked referenced. Kill init_page_accessed(), just __SetPageReferenced() inline. Signed-off-by: Hugh Dickins Acked-by: Mel Gorman Cc: Johannes Weiner Cc: Vlastimil Babka Cc: Michal Hocko Cc: Dave Hansen Cc: Prabhakar Lad Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 1 - mm/filemap.c | 4 ++-- mm/shmem.c | 2 +- mm/swap.c | 14 +++----------- 4 files changed, 6 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 4bdbee80eede..1eb64043c076 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -311,7 +311,6 @@ extern void lru_add_page_tail(struct page *page, struct page *page_tail, struct lruvec *lruvec, struct list_head *head); extern void activate_page(struct page *); extern void mark_page_accessed(struct page *); -extern void init_page_accessed(struct page *page); extern void lru_add_drain(void); extern void lru_add_drain_cpu(int cpu); extern void lru_add_drain_all(void); diff --git a/mm/filemap.c b/mm/filemap.c index 65d44fd88c78..7e85c8147e1b 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1091,9 +1091,9 @@ no_page: if (WARN_ON_ONCE(!(fgp_flags & FGP_LOCK))) fgp_flags |= FGP_LOCK; - /* Init accessed so avoit atomic mark_page_accessed later */ + /* Init accessed so avoid atomic mark_page_accessed later */ if (fgp_flags & FGP_ACCESSED) - init_page_accessed(page); + __SetPageReferenced(page); err = add_to_page_cache_lru(page, mapping, offset, radix_gfp_mask); if (unlikely(err)) { diff --git a/mm/shmem.c b/mm/shmem.c index 57fd82a5af7a..fe15d96c3166 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1166,7 +1166,7 @@ repeat: __SetPageSwapBacked(page); __set_page_locked(page); if (sgp == SGP_WRITE) - init_page_accessed(page); + __SetPageReferenced(page); error = mem_cgroup_charge_file(page, current->mm, gfp & GFP_RECLAIM_MASK); diff --git a/mm/swap.c b/mm/swap.c index 9e8e3472248b..d8eb4d09ffa2 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -589,6 +589,9 @@ static void __lru_cache_activate_page(struct page *page) * inactive,unreferenced -> inactive,referenced * inactive,referenced -> active,unreferenced * active,unreferenced -> active,referenced + * + * When a newly allocated page is not yet visible, so safe for non-atomic ops, + * __SetPageReferenced(page) may be substituted for mark_page_accessed(page). */ void mark_page_accessed(struct page *page) { @@ -614,17 +617,6 @@ void mark_page_accessed(struct page *page) } EXPORT_SYMBOL(mark_page_accessed); -/* - * Used to mark_page_accessed(page) that is not visible yet and when it is - * still safe to use non-atomic ops - */ -void init_page_accessed(struct page *page) -{ - if (!PageReferenced(page)) - __SetPageReferenced(page); -} -EXPORT_SYMBOL(init_page_accessed); - static void __lru_cache_add(struct page *page) { struct pagevec *pvec = &get_cpu_var(lru_add_pvec); -- cgit v1.2.3 From ed4d4902ebdd7ca8b5a51daaf6bebf4b172895cc Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Wed, 6 Aug 2014 16:06:54 -0700 Subject: mm, hugetlb: remove hugetlb_zero and hugetlb_infinity They are unnecessary: "zero" can be used in place of "hugetlb_zero" and passing extra2 == NULL is equivalent to infinity. Signed-off-by: David Rientjes Cc: Joonsoo Kim Reviewed-by: Naoya Horiguchi Reviewed-by: Luiz Capitulino Cc: "Kirill A. Shutemov" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 1 - kernel/sysctl.c | 9 +++------ mm/hugetlb.c | 1 - 3 files changed, 3 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index a23c096b3080..6e6d338641fe 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -87,7 +87,6 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud); #endif extern unsigned long hugepages_treat_as_movable; -extern const unsigned long hugetlb_zero, hugetlb_infinity; extern int sysctl_hugetlb_shm_group; extern struct list_head huge_boot_pages; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 75b22e22a72c..75875a741b5e 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1240,8 +1240,7 @@ static struct ctl_table vm_table[] = { .maxlen = sizeof(unsigned long), .mode = 0644, .proc_handler = hugetlb_sysctl_handler, - .extra1 = (void *)&hugetlb_zero, - .extra2 = (void *)&hugetlb_infinity, + .extra1 = &zero, }, #ifdef CONFIG_NUMA { @@ -1250,8 +1249,7 @@ static struct ctl_table vm_table[] = { .maxlen = sizeof(unsigned long), .mode = 0644, .proc_handler = &hugetlb_mempolicy_sysctl_handler, - .extra1 = (void *)&hugetlb_zero, - .extra2 = (void *)&hugetlb_infinity, + .extra1 = &zero, }, #endif { @@ -1274,8 +1272,7 @@ static struct ctl_table vm_table[] = { .maxlen = sizeof(unsigned long), .mode = 0644, .proc_handler = hugetlb_overcommit_handler, - .extra1 = (void *)&hugetlb_zero, - .extra2 = (void *)&hugetlb_infinity, + .extra1 = &zero, }, #endif { diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 7a0fcb33973e..d9ad93b55585 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -35,7 +35,6 @@ #include #include "internal.h" -const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL; unsigned long hugepages_treat_as_movable; int hugetlb_max_hstate __read_mostly; -- cgit v1.2.3 From 21bda264f4243f61dfcc485174055f12ad0530b4 Mon Sep 17 00:00:00 2001 From: Jerome Marchand Date: Wed, 6 Aug 2014 16:06:56 -0700 Subject: mm: make copy_pte_range static again Commit 71e3aac0724f ("thp: transparent hugepage core") adds copy_pte_range prototype to huge_mm.h. I'm not sure why (or if) this function have been used outside of memory.c, but it currently isn't. This patch makes copy_pte_range() static again. Signed-off-by: Jerome Marchand Acked-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/huge_mm.h | 4 ---- mm/memory.c | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index b826239bdce0..63579cb8d3dc 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -93,10 +93,6 @@ extern bool is_vma_temporary_stack(struct vm_area_struct *vma); #endif /* CONFIG_DEBUG_VM */ extern unsigned long transparent_hugepage_flags; -extern int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, - pmd_t *dst_pmd, pmd_t *src_pmd, - struct vm_area_struct *vma, - unsigned long addr, unsigned long end); extern int split_huge_page_to_list(struct page *page, struct list_head *list); static inline int split_huge_page(struct page *page) { diff --git a/mm/memory.c b/mm/memory.c index 06ff0720d75a..01d0289f30a7 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -884,7 +884,7 @@ out_set_pte: return 0; } -int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, +static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma, unsigned long addr, unsigned long end) { -- cgit v1.2.3 From f6f8ed47353597dcb895eb4a15a28af657392e72 Mon Sep 17 00:00:00 2001 From: WANG Chao Date: Wed, 6 Aug 2014 16:06:58 -0700 Subject: mm/vmalloc.c: clean up map_vm_area third argument Currently map_vm_area() takes (struct page *** pages) as third argument, and after mapping, it moves (*pages) to point to (*pages + nr_mappped_pages). It looks like this kind of increment is useless to its caller these days. The callers don't care about the increments and actually they're trying to avoid this by passing another copy to map_vm_area(). The caller can always guarantee all the pages can be mapped into vm_area as specified in first argument and the caller only cares about whether map_vm_area() fails or not. This patch cleans up the pointer movement in map_vm_area() and updates its callers accordingly. Signed-off-by: WANG Chao Cc: Zhang Yanfei Acked-by: Greg Kroah-Hartman Cc: Minchan Kim Cc: Nitin Gupta Cc: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/tile/kernel/module.c | 2 +- drivers/lguest/core.c | 7 ++----- drivers/staging/android/binder.c | 4 +--- include/linux/vmalloc.h | 2 +- mm/vmalloc.c | 14 +++++--------- mm/zsmalloc.c | 2 +- 6 files changed, 11 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/arch/tile/kernel/module.c b/arch/tile/kernel/module.c index 4918d91bc3a6..d19b13e3a59f 100644 --- a/arch/tile/kernel/module.c +++ b/arch/tile/kernel/module.c @@ -58,7 +58,7 @@ void *module_alloc(unsigned long size) area->nr_pages = npages; area->pages = pages; - if (map_vm_area(area, prot_rwx, &pages)) { + if (map_vm_area(area, prot_rwx, pages)) { vunmap(area->addr); goto error; } diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c index 0bf1e4edf04d..6590558d1d31 100644 --- a/drivers/lguest/core.c +++ b/drivers/lguest/core.c @@ -42,7 +42,6 @@ DEFINE_MUTEX(lguest_lock); static __init int map_switcher(void) { int i, err; - struct page **pagep; /* * Map the Switcher in to high memory. @@ -110,11 +109,9 @@ static __init int map_switcher(void) * This code actually sets up the pages we've allocated to appear at * switcher_addr. map_vm_area() takes the vma we allocated above, the * kind of pages we're mapping (kernel pages), and a pointer to our - * array of struct pages. It increments that pointer, but we don't - * care. + * array of struct pages. */ - pagep = lg_switcher_pages; - err = map_vm_area(switcher_vma, PAGE_KERNEL_EXEC, &pagep); + err = map_vm_area(switcher_vma, PAGE_KERNEL_EXEC, lg_switcher_pages); if (err) { printk("lguest: map_vm_area failed: %i\n", err); goto free_vma; diff --git a/drivers/staging/android/binder.c b/drivers/staging/android/binder.c index 02b0379ae550..4f34dc0095b5 100644 --- a/drivers/staging/android/binder.c +++ b/drivers/staging/android/binder.c @@ -585,7 +585,6 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate, for (page_addr = start; page_addr < end; page_addr += PAGE_SIZE) { int ret; - struct page **page_array_ptr; page = &proc->pages[(page_addr - proc->buffer) / PAGE_SIZE]; @@ -598,8 +597,7 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate, } tmp_area.addr = page_addr; tmp_area.size = PAGE_SIZE + PAGE_SIZE /* guard page? */; - page_array_ptr = page; - ret = map_vm_area(&tmp_area, PAGE_KERNEL, &page_array_ptr); + ret = map_vm_area(&tmp_area, PAGE_KERNEL, page); if (ret) { pr_err("%d: binder_alloc_buf failed to map page at %p in kernel\n", proc->pid, page_addr); diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 4b8a89189a29..b87696fdf06a 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -113,7 +113,7 @@ extern struct vm_struct *remove_vm_area(const void *addr); extern struct vm_struct *find_vm_area(const void *addr); extern int map_vm_area(struct vm_struct *area, pgprot_t prot, - struct page ***pages); + struct page **pages); #ifdef CONFIG_MMU extern int map_kernel_range_noflush(unsigned long start, unsigned long size, pgprot_t prot, struct page **pages); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 9ec4173f48a8..2b0aa5486092 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1270,19 +1270,15 @@ void unmap_kernel_range(unsigned long addr, unsigned long size) } EXPORT_SYMBOL_GPL(unmap_kernel_range); -int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages) +int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page **pages) { unsigned long addr = (unsigned long)area->addr; unsigned long end = addr + get_vm_area_size(area); int err; - err = vmap_page_range(addr, end, prot, *pages); - if (err > 0) { - *pages += err; - err = 0; - } + err = vmap_page_range(addr, end, prot, pages); - return err; + return err > 0 ? 0 : err; } EXPORT_SYMBOL_GPL(map_vm_area); @@ -1548,7 +1544,7 @@ void *vmap(struct page **pages, unsigned int count, if (!area) return NULL; - if (map_vm_area(area, prot, &pages)) { + if (map_vm_area(area, prot, pages)) { vunmap(area->addr); return NULL; } @@ -1606,7 +1602,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, cond_resched(); } - if (map_vm_area(area, prot, &pages)) + if (map_vm_area(area, prot, pages)) goto fail; return area->addr; diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index fe78189624cf..bb62a4adc328 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -690,7 +690,7 @@ static inline void __zs_cpu_down(struct mapping_area *area) static inline void *__zs_map_object(struct mapping_area *area, struct page *pages[2], int off, int size) { - BUG_ON(map_vm_area(area->vm, PAGE_KERNEL, &pages)); + BUG_ON(map_vm_area(area->vm, PAGE_KERNEL, pages)); area->vm_addr = area->vm->addr; return area->vm_addr + off; } -- cgit v1.2.3 From 3484b2de9499df23c4604a513b36f96326ae81ad Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 6 Aug 2014 16:07:14 -0700 Subject: mm: rearrange zone fields into read-only, page alloc, statistics and page reclaim lines The arrangement of struct zone has changed over time and now it has reached the point where there is some inappropriate sharing going on. On x86-64 for example o The zone->node field is shared with the zone lock and zone->node is accessed frequently from the page allocator due to the fair zone allocation policy. o span_seqlock is almost never used by shares a line with free_area o Some zone statistics share a cache line with the LRU lock so reclaim-intensive and allocator-intensive workloads can bounce the cache line on a stat update This patch rearranges struct zone to put read-only and read-mostly fields together and then splits the page allocator intensive fields, the zone statistics and the page reclaim intensive fields into their own cache lines. Note that the type of lowmem_reserve changes due to the watermark calculations being signed and avoiding a signed/unsigned conversion there. On the test configuration I used the overall size of struct zone shrunk by one cache line. On smaller machines, this is not likely to be noticable. However, on a 4-node NUMA machine running tiobench the system CPU overhead is reduced by this patch. 3.16.0-rc3 3.16.0-rc3 vanillarearrange-v5r9 User 746.94 759.78 System 65336.22 58350.98 Elapsed 27553.52 27282.02 Signed-off-by: Mel Gorman Acked-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 211 +++++++++++++++++++++++++------------------------ mm/page_alloc.c | 7 +- mm/vmstat.c | 4 +- 3 files changed, 113 insertions(+), 109 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 559e659288fc..ed0876bb902c 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -324,18 +324,11 @@ enum zone_type { #ifndef __GENERATING_BOUNDS_H struct zone { - /* Fields commonly accessed by the page allocator */ + /* Read-mostly fields */ /* zone watermarks, access with *_wmark_pages(zone) macros */ unsigned long watermark[NR_WMARK]; - /* - * When free pages are below this point, additional steps are taken - * when reading the number of free pages to avoid per-cpu counter - * drift allowing watermarks to be breached - */ - unsigned long percpu_drift_mark; - /* * We don't know if the memory that we're going to allocate will be freeable * or/and it will be released eventually, so to avoid totally wasting several @@ -344,41 +337,26 @@ struct zone { * on the higher zones). This array is recalculated at runtime if the * sysctl_lowmem_reserve_ratio sysctl changes. */ - unsigned long lowmem_reserve[MAX_NR_ZONES]; - - /* - * This is a per-zone reserve of pages that should not be - * considered dirtyable memory. - */ - unsigned long dirty_balance_reserve; + long lowmem_reserve[MAX_NR_ZONES]; #ifdef CONFIG_NUMA int node; +#endif + /* - * zone reclaim becomes active if more unmapped pages exist. + * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on + * this zone's LRU. Maintained by the pageout code. */ - unsigned long min_unmapped_pages; - unsigned long min_slab_pages; -#endif + unsigned int inactive_ratio; + + struct pglist_data *zone_pgdat; struct per_cpu_pageset __percpu *pageset; + /* - * free areas of different sizes + * This is a per-zone reserve of pages that should not be + * considered dirtyable memory. */ - spinlock_t lock; -#if defined CONFIG_COMPACTION || defined CONFIG_CMA - /* Set to true when the PG_migrate_skip bits should be cleared */ - bool compact_blockskip_flush; - - /* pfn where compaction free scanner should start */ - unsigned long compact_cached_free_pfn; - /* pfn where async and sync compaction migration scanner should start */ - unsigned long compact_cached_migrate_pfn[2]; -#endif -#ifdef CONFIG_MEMORY_HOTPLUG - /* see spanned/present_pages for more description */ - seqlock_t span_seqlock; -#endif - struct free_area free_area[MAX_ORDER]; + unsigned long dirty_balance_reserve; #ifndef CONFIG_SPARSEMEM /* @@ -388,74 +366,14 @@ struct zone { unsigned long *pageblock_flags; #endif /* CONFIG_SPARSEMEM */ -#ifdef CONFIG_COMPACTION - /* - * On compaction failure, 1<> PAGE_SHIFT */ unsigned long zone_start_pfn; @@ -500,9 +418,11 @@ struct zone { * adjust_managed_page_count() should be used instead of directly * touching zone->managed_pages and totalram_pages. */ + unsigned long managed_pages; unsigned long spanned_pages; unsigned long present_pages; - unsigned long managed_pages; + + const char *name; /* * Number of MIGRATE_RESEVE page block. To maintain for just @@ -510,10 +430,95 @@ struct zone { */ int nr_migrate_reserve_block; +#ifdef CONFIG_MEMORY_HOTPLUG + /* see spanned/present_pages for more description */ + seqlock_t span_seqlock; +#endif + /* - * rarely used fields: + * wait_table -- the array holding the hash table + * wait_table_hash_nr_entries -- the size of the hash table array + * wait_table_bits -- wait_table_size == (1 << wait_table_bits) + * + * The purpose of all these is to keep track of the people + * waiting for a page to become available and make them + * runnable again when possible. The trouble is that this + * consumes a lot of space, especially when so few things + * wait on pages at a given time. So instead of using + * per-page waitqueues, we use a waitqueue hash table. + * + * The bucket discipline is to sleep on the same queue when + * colliding and wake all in that wait queue when removing. + * When something wakes, it must check to be sure its page is + * truly available, a la thundering herd. The cost of a + * collision is great, but given the expected load of the + * table, they should be so rare as to be outweighed by the + * benefits from the saved space. + * + * __wait_on_page_locked() and unlock_page() in mm/filemap.c, are the + * primary users of these fields, and in mm/page_alloc.c + * free_area_init_core() performs the initialization of them. */ - const char *name; + wait_queue_head_t *wait_table; + unsigned long wait_table_hash_nr_entries; + unsigned long wait_table_bits; + + ZONE_PADDING(_pad1_) + + /* Write-intensive fields used from the page allocator */ + spinlock_t lock; + + /* free areas of different sizes */ + struct free_area free_area[MAX_ORDER]; + + /* zone flags, see below */ + unsigned long flags; + + ZONE_PADDING(_pad2_) + + /* Write-intensive fields used by page reclaim */ + + /* Fields commonly accessed by the page reclaim scanner */ + spinlock_t lru_lock; + unsigned long pages_scanned; /* since last reclaim */ + struct lruvec lruvec; + + /* Evictions & activations on the inactive file list */ + atomic_long_t inactive_age; + + /* + * When free pages are below this point, additional steps are taken + * when reading the number of free pages to avoid per-cpu counter + * drift allowing watermarks to be breached + */ + unsigned long percpu_drift_mark; + +#if defined CONFIG_COMPACTION || defined CONFIG_CMA + /* pfn where compaction free scanner should start */ + unsigned long compact_cached_free_pfn; + /* pfn where async and sync compaction migration scanner should start */ + unsigned long compact_cached_migrate_pfn[2]; +#endif + +#ifdef CONFIG_COMPACTION + /* + * On compaction failure, 1<lowmem_reserve[classzone_idx]; int o; long free_cma = 0; @@ -1723,7 +1722,7 @@ static bool __zone_watermark_ok(struct zone *z, unsigned int order, free_cma = zone_page_state(z, NR_FREE_CMA_PAGES); #endif - if (free_pages - free_cma <= min + lowmem_reserve) + if (free_pages - free_cma <= min + z->lowmem_reserve[classzone_idx]) return false; for (o = 0; o < order; o++) { /* At the next order, this order's pages become unavailable */ @@ -3254,7 +3253,7 @@ void show_free_areas(unsigned int filter) ); printk("lowmem_reserve[]:"); for (i = 0; i < MAX_NR_ZONES; i++) - printk(" %lu", zone->lowmem_reserve[i]); + printk(" %ld", zone->lowmem_reserve[i]); printk("\n"); } @@ -5575,7 +5574,7 @@ static void calculate_totalreserve_pages(void) for_each_online_pgdat(pgdat) { for (i = 0; i < MAX_NR_ZONES; i++) { struct zone *zone = pgdat->node_zones + i; - unsigned long max = 0; + long max = 0; /* Find valid and maximum lowmem_reserve in the zone */ for (j = i; j < MAX_NR_ZONES; j++) { diff --git a/mm/vmstat.c b/mm/vmstat.c index b37bd49bfd55..8267f77d1875 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1077,10 +1077,10 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, zone_page_state(zone, i)); seq_printf(m, - "\n protection: (%lu", + "\n protection: (%ld", zone->lowmem_reserve[0]); for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++) - seq_printf(m, ", %lu", zone->lowmem_reserve[i]); + seq_printf(m, ", %ld", zone->lowmem_reserve[i]); seq_printf(m, ")" "\n pagesets"); -- cgit v1.2.3 From 0d5d823ab4e608ec7b52ac4410de4cb74bbe0edd Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 6 Aug 2014 16:07:16 -0700 Subject: mm: move zone->pages_scanned into a vmstat counter zone->pages_scanned is a write-intensive cache line during page reclaim and it's also updated during page free. Move the counter into vmstat to take advantage of the per-cpu updates and do not update it in the free paths unless necessary. On a small UMA machine running tiobench the difference is marginal. On a 4-node machine the overhead is more noticable. Note that automatic NUMA balancing was disabled for this test as otherwise the system CPU overhead is unpredictable. 3.16.0-rc3 3.16.0-rc3 3.16.0-rc3 vanillarearrange-v5 vmstat-v5 User 746.94 759.78 774.56 System 65336.22 58350.98 32847.27 Elapsed 27553.52 27282.02 27415.04 Note that the overhead reduction will vary depending on where exactly pages are allocated and freed. Signed-off-by: Mel Gorman Acked-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 2 +- mm/page_alloc.c | 12 +++++++++--- mm/vmscan.c | 7 ++++--- mm/vmstat.c | 3 ++- 4 files changed, 16 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index ed0876bb902c..0bd77f730b38 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -143,6 +143,7 @@ enum zone_stat_item { NR_SHMEM, /* shmem pages (included tmpfs/GEM pages) */ NR_DIRTIED, /* page dirtyings since bootup */ NR_WRITTEN, /* page writings since bootup */ + NR_PAGES_SCANNED, /* pages scanned since last reclaim */ #ifdef CONFIG_NUMA NUMA_HIT, /* allocated in intended node */ NUMA_MISS, /* allocated in non intended node */ @@ -480,7 +481,6 @@ struct zone { /* Fields commonly accessed by the page reclaim scanner */ spinlock_t lru_lock; - unsigned long pages_scanned; /* since last reclaim */ struct lruvec lruvec; /* Evictions & activations on the inactive file list */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index b7381d11f021..daa016063793 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -680,9 +680,12 @@ static void free_pcppages_bulk(struct zone *zone, int count, int migratetype = 0; int batch_free = 0; int to_free = count; + unsigned long nr_scanned; spin_lock(&zone->lock); - zone->pages_scanned = 0; + nr_scanned = zone_page_state(zone, NR_PAGES_SCANNED); + if (nr_scanned) + __mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned); while (to_free) { struct page *page; @@ -731,8 +734,11 @@ static void free_one_page(struct zone *zone, unsigned int order, int migratetype) { + unsigned long nr_scanned; spin_lock(&zone->lock); - zone->pages_scanned = 0; + nr_scanned = zone_page_state(zone, NR_PAGES_SCANNED); + if (nr_scanned) + __mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned); __free_one_page(page, pfn, zone, order, migratetype); if (unlikely(!is_migrate_isolate(migratetype))) @@ -3248,7 +3254,7 @@ void show_free_areas(unsigned int filter) K(zone_page_state(zone, NR_BOUNCE)), K(zone_page_state(zone, NR_FREE_CMA_PAGES)), K(zone_page_state(zone, NR_WRITEBACK_TEMP)), - zone->pages_scanned, + K(zone_page_state(zone, NR_PAGES_SCANNED)), (!zone_reclaimable(zone) ? "yes" : "no") ); printk("lowmem_reserve[]:"); diff --git a/mm/vmscan.c b/mm/vmscan.c index 5fec1ba9951f..9c8222b499b4 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -174,7 +174,8 @@ static unsigned long zone_reclaimable_pages(struct zone *zone) bool zone_reclaimable(struct zone *zone) { - return zone->pages_scanned < zone_reclaimable_pages(zone) * 6; + return zone_page_state(zone, NR_PAGES_SCANNED) < + zone_reclaimable_pages(zone) * 6; } static unsigned long get_lru_size(struct lruvec *lruvec, enum lru_list lru) @@ -1508,7 +1509,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken); if (global_reclaim(sc)) { - zone->pages_scanned += nr_scanned; + __mod_zone_page_state(zone, NR_PAGES_SCANNED, nr_scanned); if (current_is_kswapd()) __count_zone_vm_events(PGSCAN_KSWAPD, zone, nr_scanned); else @@ -1698,7 +1699,7 @@ static void shrink_active_list(unsigned long nr_to_scan, nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &l_hold, &nr_scanned, sc, isolate_mode, lru); if (global_reclaim(sc)) - zone->pages_scanned += nr_scanned; + __mod_zone_page_state(zone, NR_PAGES_SCANNED, nr_scanned); reclaim_stat->recent_scanned[file] += nr_taken; diff --git a/mm/vmstat.c b/mm/vmstat.c index 8267f77d1875..e574e883fa70 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -763,6 +763,7 @@ const char * const vmstat_text[] = { "nr_shmem", "nr_dirtied", "nr_written", + "nr_pages_scanned", #ifdef CONFIG_NUMA "numa_hit", @@ -1067,7 +1068,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, min_wmark_pages(zone), low_wmark_pages(zone), high_wmark_pages(zone), - zone->pages_scanned, + zone_page_state(zone, NR_PAGES_SCANNED), zone->spanned_pages, zone->present_pages, zone->managed_pages); -- cgit v1.2.3 From 4ffeaf3560a52b4a69cc7909873d08c0ef5909d4 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 6 Aug 2014 16:07:22 -0700 Subject: mm: page_alloc: reduce cost of the fair zone allocation policy The fair zone allocation policy round-robins allocations between zones within a node to avoid age inversion problems during reclaim. If the first allocation fails, the batch counts are reset and a second attempt made before entering the slow path. One assumption made with this scheme is that batches expire at roughly the same time and the resets each time are justified. This assumption does not hold when zones reach their low watermark as the batches will be consumed at uneven rates. Allocation failure due to watermark depletion result in additional zonelist scans for the reset and another watermark check before hitting the slowpath. On UMA, the benefit is negligible -- around 0.25%. On 4-socket NUMA machine it's variable due to the variability of measuring overhead with the vmstat changes. The system CPU overhead comparison looks like 3.16.0-rc3 3.16.0-rc3 3.16.0-rc3 vanilla vmstat-v5 lowercost-v5 User 746.94 774.56 802.00 System 65336.22 32847.27 40852.33 Elapsed 27553.52 27415.04 27368.46 However it is worth noting that the overall benchmark still completed faster and intuitively it makes sense to take as few passes as possible through the zonelists. Signed-off-by: Mel Gorman Acked-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 6 +++ mm/page_alloc.c | 101 ++++++++++++++++++++++++++----------------------- 2 files changed, 59 insertions(+), 48 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 0bd77f730b38..318df7051850 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -534,6 +534,7 @@ typedef enum { ZONE_WRITEBACK, /* reclaim scanning has recently found * many pages under writeback */ + ZONE_FAIR_DEPLETED, /* fair zone policy batch depleted */ } zone_flags_t; static inline void zone_set_flag(struct zone *zone, zone_flags_t flag) @@ -571,6 +572,11 @@ static inline int zone_is_reclaim_locked(const struct zone *zone) return test_bit(ZONE_RECLAIM_LOCKED, &zone->flags); } +static inline int zone_is_fair_depleted(const struct zone *zone) +{ + return test_bit(ZONE_FAIR_DEPLETED, &zone->flags); +} + static inline int zone_is_oom_locked(const struct zone *zone) { return test_bit(ZONE_OOM_LOCKED, &zone->flags); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6e5e8f762532..fb9908148474 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1612,6 +1612,9 @@ again: } __mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order)); + if (zone_page_state(zone, NR_ALLOC_BATCH) == 0 && + !zone_is_fair_depleted(zone)) + zone_set_flag(zone, ZONE_FAIR_DEPLETED); __count_zone_vm_events(PGALLOC, zone, 1 << order); zone_statistics(preferred_zone, zone, gfp_flags); @@ -1923,6 +1926,18 @@ static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone) #endif /* CONFIG_NUMA */ +static void reset_alloc_batches(struct zone *preferred_zone) +{ + struct zone *zone = preferred_zone->zone_pgdat->node_zones; + + do { + mod_zone_page_state(zone, NR_ALLOC_BATCH, + high_wmark_pages(zone) - low_wmark_pages(zone) - + atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH])); + zone_clear_flag(zone, ZONE_FAIR_DEPLETED); + } while (zone++ != preferred_zone); +} + /* * get_page_from_freelist goes through the zonelist trying to allocate * a page. @@ -1940,8 +1955,12 @@ get_page_from_freelist(gfp_t gfp_mask, nodemask_t *nodemask, unsigned int order, int did_zlc_setup = 0; /* just call zlc_setup() one time */ bool consider_zone_dirty = (alloc_flags & ALLOC_WMARK_LOW) && (gfp_mask & __GFP_WRITE); + int nr_fair_skipped = 0; + bool zonelist_rescan; zonelist_scan: + zonelist_rescan = false; + /* * Scan zonelist, looking for a zone with enough free. * See also __cpuset_node_allowed_softwall() comment in kernel/cpuset.c. @@ -1966,8 +1985,10 @@ zonelist_scan: if (alloc_flags & ALLOC_FAIR) { if (!zone_local(preferred_zone, zone)) break; - if (zone_page_state(zone, NR_ALLOC_BATCH) <= 0) + if (zone_is_fair_depleted(zone)) { + nr_fair_skipped++; continue; + } } /* * When allocating a page cache page for writing, we @@ -2073,13 +2094,7 @@ this_zone_full: zlc_mark_zone_full(zonelist, z); } - if (unlikely(IS_ENABLED(CONFIG_NUMA) && page == NULL && zlc_active)) { - /* Disable zlc cache for second zonelist scan */ - zlc_active = 0; - goto zonelist_scan; - } - - if (page) + if (page) { /* * page->pfmemalloc is set when ALLOC_NO_WATERMARKS was * necessary to allocate the page. The expectation is @@ -2088,8 +2103,37 @@ this_zone_full: * for !PFMEMALLOC purposes. */ page->pfmemalloc = !!(alloc_flags & ALLOC_NO_WATERMARKS); + return page; + } - return page; + /* + * The first pass makes sure allocations are spread fairly within the + * local node. However, the local node might have free pages left + * after the fairness batches are exhausted, and remote zones haven't + * even been considered yet. Try once more without fairness, and + * include remote zones now, before entering the slowpath and waking + * kswapd: prefer spilling to a remote zone over swapping locally. + */ + if (alloc_flags & ALLOC_FAIR) { + alloc_flags &= ~ALLOC_FAIR; + if (nr_fair_skipped) { + zonelist_rescan = true; + reset_alloc_batches(preferred_zone); + } + if (nr_online_nodes > 1) + zonelist_rescan = true; + } + + if (unlikely(IS_ENABLED(CONFIG_NUMA) && zlc_active)) { + /* Disable zlc cache for second zonelist scan */ + zlc_active = 0; + zonelist_rescan = true; + } + + if (zonelist_rescan) + goto zonelist_scan; + + return NULL; } /* @@ -2410,28 +2454,6 @@ __alloc_pages_high_priority(gfp_t gfp_mask, unsigned int order, return page; } -static void reset_alloc_batches(struct zonelist *zonelist, - enum zone_type high_zoneidx, - struct zone *preferred_zone) -{ - struct zoneref *z; - struct zone *zone; - - for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { - /* - * Only reset the batches of zones that were actually - * considered in the fairness pass, we don't want to - * trash fairness information for zones that are not - * actually part of this zonelist's round-robin cycle. - */ - if (!zone_local(preferred_zone, zone)) - continue; - mod_zone_page_state(zone, NR_ALLOC_BATCH, - high_wmark_pages(zone) - low_wmark_pages(zone) - - atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH])); - } -} - static void wake_all_kswapds(unsigned int order, struct zonelist *zonelist, enum zone_type high_zoneidx, @@ -2767,28 +2789,11 @@ retry_cpuset: if (allocflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE) alloc_flags |= ALLOC_CMA; #endif -retry: /* First allocation attempt */ page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order, zonelist, high_zoneidx, alloc_flags, preferred_zone, classzone_idx, migratetype); if (unlikely(!page)) { - /* - * The first pass makes sure allocations are spread - * fairly within the local node. However, the local - * node might have free pages left after the fairness - * batches are exhausted, and remote zones haven't - * even been considered yet. Try once more without - * fairness, and include remote zones now, before - * entering the slowpath and waking kswapd: prefer - * spilling to a remote zone over swapping locally. - */ - if (alloc_flags & ALLOC_FAIR) { - reset_alloc_batches(zonelist, high_zoneidx, - preferred_zone); - alloc_flags &= ~ALLOC_FAIR; - goto retry; - } /* * Runtime PM, block IO and its error handling path * can deadlock because I/O on the device might not -- cgit v1.2.3 From 9a95f3cf7b33d66fa64727cff8cd2f2a9d09f335 Mon Sep 17 00:00:00 2001 From: Paul Cassella Date: Wed, 6 Aug 2014 16:07:24 -0700 Subject: mm: describe mmap_sem rules for __lock_page_or_retry() and callers Add a comment describing the circumstances in which __lock_page_or_retry() will or will not release the mmap_sem when returning 0. Add comments to lock_page_or_retry()'s callers (filemap_fault(), do_swap_page()) noting the impact on VM_FAULT_RETRY returns. Add comments on up the call tree, particularly replacing the false "We return with mmap_sem still held" comments. Signed-off-by: Paul Cassella Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/fault.c | 3 ++- include/linux/pagemap.h | 3 +++ mm/filemap.c | 23 +++++++++++++++++++++++ mm/gup.c | 18 +++++++++++++++--- mm/memory.c | 34 +++++++++++++++++++++++++++++++--- mm/mlock.c | 9 ++++++++- 6 files changed, 82 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 1dbade870f90..a24194681513 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -1218,7 +1218,8 @@ good_area: /* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo - * the fault: + * the fault. Since we never set FAULT_FLAG_RETRY_NOWAIT, if + * we get VM_FAULT_RETRY back, the mmap_sem has been unlocked. */ fault = handle_mm_fault(mm, vma, address, flags); diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index e1474ae18c88..3df8c7db7a4e 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -484,6 +484,9 @@ static inline int lock_page_killable(struct page *page) /* * lock_page_or_retry - Lock the page, unless this would block and the * caller indicated that it can handle a retry. + * + * Return value and mmap_sem implications depend on flags; see + * __lock_page_or_retry(). */ static inline int lock_page_or_retry(struct page *page, struct mm_struct *mm, unsigned int flags) diff --git a/mm/filemap.c b/mm/filemap.c index 7e85c8147e1b..af19a6b079f5 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -808,6 +808,17 @@ int __lock_page_killable(struct page *page) } EXPORT_SYMBOL_GPL(__lock_page_killable); +/* + * Return values: + * 1 - page is locked; mmap_sem is still held. + * 0 - page is not locked. + * mmap_sem has been released (up_read()), unless flags had both + * FAULT_FLAG_ALLOW_RETRY and FAULT_FLAG_RETRY_NOWAIT set, in + * which case mmap_sem is still held. + * + * If neither ALLOW_RETRY nor KILLABLE are set, will always return 1 + * with the page locked and the mmap_sem unperturbed. + */ int __lock_page_or_retry(struct page *page, struct mm_struct *mm, unsigned int flags) { @@ -1827,6 +1838,18 @@ static void do_async_mmap_readahead(struct vm_area_struct *vma, * The goto's are kind of ugly, but this streamlines the normal case of having * it in the page cache, and handles the special cases reasonably without * having a lot of duplicated code. + * + * vma->vm_mm->mmap_sem must be held on entry. + * + * If our return value has VM_FAULT_RETRY set, it's because + * lock_page_or_retry() returned 0. + * The mmap_sem has usually been released in this case. + * See __lock_page_or_retry() for the exception. + * + * If our return value does not have VM_FAULT_RETRY set, the mmap_sem + * has not been released. + * + * We never return with VM_FAULT_RETRY and a bit from VM_FAULT_ERROR set. */ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { diff --git a/mm/gup.c b/mm/gup.c index cc5a9e7adea7..91d044b1600d 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -258,6 +258,11 @@ unmap: return ret; } +/* + * mmap_sem must be held on entry. If @nonblocking != NULL and + * *@flags does not include FOLL_NOWAIT, the mmap_sem may be released. + * If it is, *@nonblocking will be set to 0 and -EBUSY returned. + */ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma, unsigned long address, unsigned int *flags, int *nonblocking) { @@ -373,7 +378,7 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags) * with a put_page() call when it is finished with. vmas will only * remain valid while mmap_sem is held. * - * Must be called with mmap_sem held for read or write. + * Must be called with mmap_sem held. It may be released. See below. * * __get_user_pages walks a process's page tables and takes a reference to * each struct page that each user address corresponds to at a given @@ -396,7 +401,14 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags) * * If @nonblocking != NULL, __get_user_pages will not wait for disk IO * or mmap_sem contention, and if waiting is needed to pin all pages, - * *@nonblocking will be set to 0. + * *@nonblocking will be set to 0. Further, if @gup_flags does not + * include FOLL_NOWAIT, the mmap_sem will be released via up_read() in + * this case. + * + * A caller using such a combination of @nonblocking and @gup_flags + * must therefore hold the mmap_sem for reading only, and recognize + * when it's been released. Otherwise, it must be held for either + * reading or writing and will not be released. * * In most cases, get_user_pages or get_user_pages_fast should be used * instead of __get_user_pages. __get_user_pages should be used only if @@ -528,7 +540,7 @@ EXPORT_SYMBOL(__get_user_pages); * such architectures, gup() will not be enough to make a subsequent access * succeed. * - * This should be called with the mm_sem held for read. + * This has the same semantics wrt the @mm->mmap_sem as does filemap_fault(). */ int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm, unsigned long address, unsigned int fault_flags) diff --git a/mm/memory.c b/mm/memory.c index 7e131325bdf8..4d0a543f3bb3 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2399,7 +2399,10 @@ EXPORT_SYMBOL(unmap_mapping_range); /* * We enter with non-exclusive mmap_sem (to exclude vma changes, * but allow concurrent faults), and pte mapped but not yet locked. - * We return with mmap_sem still held, but pte unmapped and unlocked. + * We return with pte unmapped and unlocked. + * + * We return with the mmap_sem locked or unlocked in the same cases + * as does filemap_fault(). */ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, pte_t *page_table, pmd_t *pmd, @@ -2688,6 +2691,11 @@ oom: return VM_FAULT_OOM; } +/* + * The mmap_sem must have been held on entry, and may have been + * released depending on flags and vma->vm_ops->fault() return value. + * See filemap_fault() and __lock_page_retry(). + */ static int __do_fault(struct vm_area_struct *vma, unsigned long address, pgoff_t pgoff, unsigned int flags, struct page **page) { @@ -3016,6 +3024,12 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma, return ret; } +/* + * We enter with non-exclusive mmap_sem (to exclude vma changes, + * but allow concurrent faults). + * The mmap_sem may have been released depending on flags and our + * return value. See filemap_fault() and __lock_page_or_retry(). + */ static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, pte_t *page_table, pmd_t *pmd, unsigned int flags, pte_t orig_pte) @@ -3040,7 +3054,9 @@ static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma, * * We enter with non-exclusive mmap_sem (to exclude vma changes, * but allow concurrent faults), and pte mapped but not yet locked. - * We return with mmap_sem still held, but pte unmapped and unlocked. + * We return with pte unmapped and unlocked. + * The mmap_sem may have been released depending on flags and our + * return value. See filemap_fault() and __lock_page_or_retry(). */ static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, pte_t *page_table, pmd_t *pmd, @@ -3172,7 +3188,10 @@ out: * * We enter with non-exclusive mmap_sem (to exclude vma changes, * but allow concurrent faults), and pte mapped but not yet locked. - * We return with mmap_sem still held, but pte unmapped and unlocked. + * We return with pte unmapped and unlocked. + * + * The mmap_sem may have been released depending on flags and our + * return value. See filemap_fault() and __lock_page_or_retry(). */ static int handle_pte_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, @@ -3232,6 +3251,9 @@ unlock: /* * By the time we get here, we already hold the mm semaphore + * + * The mmap_sem may have been released depending on flags and our + * return value. See filemap_fault() and __lock_page_or_retry(). */ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags) @@ -3313,6 +3335,12 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, return handle_pte_fault(mm, vma, address, pte, pmd, flags); } +/* + * By the time we get here, we already hold the mm semaphore + * + * The mmap_sem may have been released depending on flags and our + * return value. See filemap_fault() and __lock_page_or_retry(). + */ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags) { diff --git a/mm/mlock.c b/mm/mlock.c index b1eb53634005..ce84cb0b83ef 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -210,12 +210,19 @@ out: * @vma: target vma * @start: start address * @end: end address + * @nonblocking: * * This takes care of making the pages present too. * * return 0 on success, negative error code on error. * - * vma->vm_mm->mmap_sem must be held for at least read. + * vma->vm_mm->mmap_sem must be held. + * + * If @nonblocking is NULL, it may be held for read or write and will + * be unperturbed. + * + * If @nonblocking is non-NULL, it must held for read only and may be + * released. If it's released, *@nonblocking will be set to 0. */ long __mlock_vma_pages_range(struct vm_area_struct *vma, unsigned long start, unsigned long end, int *nonblocking) -- cgit v1.2.3 From 6326440077a48d2c3b2993f3b3f2d969f09b6917 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Wed, 6 Aug 2014 16:07:36 -0700 Subject: memory-hotplug: add zone_for_memory() for selecting zone for new memory This series of patches fixes a problem when adding memory in bad manner. For example: for a x86_64 machine booted with "mem=400M" and with 2GiB memory installed, following commands cause problem: # echo 0x40000000 > /sys/devices/system/memory/probe [ 28.613895] init_memory_mapping: [mem 0x40000000-0x47ffffff] # echo 0x48000000 > /sys/devices/system/memory/probe [ 28.693675] init_memory_mapping: [mem 0x48000000-0x4fffffff] # echo online_movable > /sys/devices/system/memory/memory9/state # echo 0x50000000 > /sys/devices/system/memory/probe [ 29.084090] init_memory_mapping: [mem 0x50000000-0x57ffffff] # echo 0x58000000 > /sys/devices/system/memory/probe [ 29.151880] init_memory_mapping: [mem 0x58000000-0x5fffffff] # echo online_movable > /sys/devices/system/memory/memory11/state # echo online> /sys/devices/system/memory/memory8/state # echo online> /sys/devices/system/memory/memory10/state # echo offline> /sys/devices/system/memory/memory9/state [ 30.558819] Offlined Pages 32768 # free total used free shared buffers cached Mem: 780588 18014398509432020 830552 0 0 51180 -/+ buffers/cache: 18014398509380840 881732 Swap: 0 0 0 This is because the above commands probe higher memory after online a section with online_movable, which causes ZONE_HIGHMEM (or ZONE_NORMAL for systems without ZONE_HIGHMEM) overlaps ZONE_MOVABLE. After the second online_movable, the problem can be observed from zoneinfo: # cat /proc/zoneinfo ... Node 0, zone Movable pages free 65491 min 250 low 312 high 375 scanned 0 spanned 18446744073709518848 present 65536 managed 65536 ... This series of patches solve the problem by checking ZONE_MOVABLE when choosing zone for new memory. If new memory is inside or higher than ZONE_MOVABLE, makes it go there instead. After applying this series of patches, following are free and zoneinfo result (after offlining memory9): bash-4.2# free total used free shared buffers cached Mem: 780956 80112 700844 0 0 51180 -/+ buffers/cache: 28932 752024 Swap: 0 0 0 bash-4.2# cat /proc/zoneinfo Node 0, zone DMA pages free 3389 min 14 low 17 high 21 scanned 0 spanned 4095 present 3998 managed 3977 nr_free_pages 3389 ... start_pfn: 1 inactive_ratio: 1 Node 0, zone DMA32 pages free 73724 min 341 low 426 high 511 scanned 0 spanned 98304 present 98304 managed 92958 nr_free_pages 73724 ... start_pfn: 4096 inactive_ratio: 1 Node 0, zone Normal pages free 32630 min 120 low 150 high 180 scanned 0 spanned 32768 present 32768 managed 32768 nr_free_pages 32630 ... start_pfn: 262144 inactive_ratio: 1 Node 0, zone Movable pages free 65476 min 241 low 301 high 361 scanned 0 spanned 98304 present 65536 managed 65536 nr_free_pages 65476 ... start_pfn: 294912 inactive_ratio: 1 This patch (of 7): Introduce zone_for_memory() in arch independent code for arch_add_memory() use. Many arch_add_memory() function simply selects ZONE_HIGHMEM or ZONE_NORMAL and add new memory into it. However, with the existance of ZONE_MOVABLE, the selection method should be carefully considered: if new, higher memory is added after ZONE_MOVABLE is setup, the default zone and ZONE_MOVABLE may overlap each other. should_add_memory_movable() checks the status of ZONE_MOVABLE. If it has already contain memory, compare the address of new memory and movable memory. If new memory is higher than movable, it should be added into ZONE_MOVABLE instead of default zone. Signed-off-by: Wang Nan Cc: Zhang Yanfei Cc: Dave Hansen Cc: Ingo Molnar Cc: Yinghai Lu Cc: "Mel Gorman" Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: "Luck, Tony" Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Chris Metcalf Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 1 + mm/memory_hotplug.c | 28 ++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 79dd9eca054f..d9524c49d767 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -259,6 +259,7 @@ static inline void remove_memory(int nid, u64 start, u64 size) {} extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn, void *arg, int (*func)(struct memory_block *, void *)); extern int add_memory(int nid, u64 start, u64 size); +extern int zone_for_memory(int nid, u64 start, u64 size, int zone_default); extern int arch_add_memory(int nid, u64 start, u64 size); extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages); extern bool is_memblock_offlined(struct memory_block *mem); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index a3797d3fd8a4..2ff8c2325e96 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1159,6 +1159,34 @@ static int check_hotplug_memory_range(u64 start, u64 size) return 0; } +/* + * If movable zone has already been setup, newly added memory should be check. + * If its address is higher than movable zone, it should be added as movable. + * Without this check, movable zone may overlap with other zone. + */ +static int should_add_memory_movable(int nid, u64 start, u64 size) +{ + unsigned long start_pfn = start >> PAGE_SHIFT; + pg_data_t *pgdat = NODE_DATA(nid); + struct zone *movable_zone = pgdat->node_zones + ZONE_MOVABLE; + + if (zone_is_empty(movable_zone)) + return 0; + + if (movable_zone->zone_start_pfn <= start_pfn) + return 1; + + return 0; +} + +int zone_for_memory(int nid, u64 start, u64 size, int zone_default) +{ + if (should_add_memory_movable(nid, start, size)) + return ZONE_MOVABLE; + + return zone_default; +} + /* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */ int __ref add_memory(int nid, u64 start, u64 size) { -- cgit v1.2.3 From 8d060bf490930f305c4efc45724e861a268f4d2f Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Wed, 6 Aug 2014 16:07:50 -0700 Subject: mm, oom: ensure memoryless node zonelist always includes zones With memoryless node support being worked on, it's possible that for optimizations that a node may not have a non-NULL zonelist. When CONFIG_NUMA is enabled and node 0 is memoryless, this means the zonelist for first_online_node may become NULL. The oom killer requires a zonelist that includes all memory zones for the sysrq trigger and pagefault out of memory handler. Ensure that a non-NULL zonelist is always passed to the oom killer. [akpm@linux-foundation.org: fix non-numa build] Signed-off-by: David Rientjes Cc: "Kirill A. Shutemov" Cc: Johannes Weiner Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/tty/sysrq.c | 2 +- include/linux/nodemask.h | 11 ++++++++++- mm/oom_kill.c | 2 +- 3 files changed, 12 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c index 454b65898e2c..42bad18c66c9 100644 --- a/drivers/tty/sysrq.c +++ b/drivers/tty/sysrq.c @@ -355,7 +355,7 @@ static struct sysrq_key_op sysrq_term_op = { static void moom_callback(struct work_struct *ignored) { - out_of_memory(node_zonelist(first_online_node, GFP_KERNEL), GFP_KERNEL, + out_of_memory(node_zonelist(first_memory_node, GFP_KERNEL), GFP_KERNEL, 0, NULL, true); } diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index 58b9a02c38d2..83a6aeda899d 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -430,7 +430,15 @@ static inline int num_node_state(enum node_states state) for_each_node_mask((__node), node_states[__state]) #define first_online_node first_node(node_states[N_ONLINE]) -#define next_online_node(nid) next_node((nid), node_states[N_ONLINE]) +#define first_memory_node first_node(node_states[N_MEMORY]) +static inline int next_online_node(int nid) +{ + return next_node(nid, node_states[N_ONLINE]); +} +static inline int next_memory_node(int nid) +{ + return next_node(nid, node_states[N_MEMORY]); +} extern int nr_node_ids; extern int nr_online_nodes; @@ -471,6 +479,7 @@ static inline int num_node_state(enum node_states state) for ( (node) = 0; (node) == 0; (node) = 1) #define first_online_node 0 +#define first_memory_node 0 #define next_online_node(nid) (MAX_NUMNODES) #define nr_node_ids 1 #define nr_online_nodes 1 diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 3291e82d4352..b0a1e1ff0353 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -694,7 +694,7 @@ void pagefault_out_of_memory(void) if (mem_cgroup_oom_synchronize(true)) return; - zonelist = node_zonelist(first_online_node, GFP_KERNEL); + zonelist = node_zonelist(first_memory_node, GFP_KERNEL); if (try_set_zonelist_oom(zonelist, GFP_KERNEL)) { out_of_memory(NULL, 0, 0, NULL, false); clear_zonelist_oom(zonelist, GFP_KERNEL); -- cgit v1.2.3 From e972a070e2d3296cd2e2cc2fd0561ce89a1d5ebf Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Wed, 6 Aug 2014 16:07:52 -0700 Subject: mm, oom: rename zonelist locking functions try_set_zonelist_oom() and clear_zonelist_oom() are not named properly to imply that they require locking semantics to avoid out_of_memory() being reordered. zone_scan_lock is required for both functions to ensure that there is proper locking synchronization. Rename try_set_zonelist_oom() to oom_zonelist_trylock() and rename clear_zonelist_oom() to oom_zonelist_unlock() to imply there is proper locking semantics. At the same time, convert oom_zonelist_trylock() to return bool instead of int since only success and failure are tested. Signed-off-by: David Rientjes Cc: "Kirill A. Shutemov" Cc: Johannes Weiner Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/oom.h | 4 ++-- mm/oom_kill.c | 30 +++++++++++++----------------- mm/page_alloc.c | 6 +++--- 3 files changed, 18 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/oom.h b/include/linux/oom.h index 4cd62677feb9..647395a1a550 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -55,8 +55,8 @@ extern void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, struct mem_cgroup *memcg, nodemask_t *nodemask, const char *message); -extern int try_set_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags); -extern void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags); +extern bool oom_zonelist_trylock(struct zonelist *zonelist, gfp_t gfp_flags); +extern void oom_zonelist_unlock(struct zonelist *zonelist, gfp_t gfp_flags); extern void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask, int order, const nodemask_t *nodemask); diff --git a/mm/oom_kill.c b/mm/oom_kill.c index b0a1e1ff0353..d33aca1552ad 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -559,28 +559,25 @@ EXPORT_SYMBOL_GPL(unregister_oom_notifier); * if a parallel OOM killing is already taking place that includes a zone in * the zonelist. Otherwise, locks all zones in the zonelist and returns 1. */ -int try_set_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_mask) +bool oom_zonelist_trylock(struct zonelist *zonelist, gfp_t gfp_mask) { struct zoneref *z; struct zone *zone; - int ret = 1; + bool ret = true; spin_lock(&zone_scan_lock); - for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) { + for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) if (zone_is_oom_locked(zone)) { - ret = 0; + ret = false; goto out; } - } - for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) { - /* - * Lock each zone in the zonelist under zone_scan_lock so a - * parallel invocation of try_set_zonelist_oom() doesn't succeed - * when it shouldn't. - */ + /* + * Lock each zone in the zonelist under zone_scan_lock so a parallel + * call to oom_zonelist_trylock() doesn't succeed when it shouldn't. + */ + for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) zone_set_flag(zone, ZONE_OOM_LOCKED); - } out: spin_unlock(&zone_scan_lock); @@ -592,15 +589,14 @@ out: * allocation attempts with zonelists containing them may now recall the OOM * killer, if necessary. */ -void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_mask) +void oom_zonelist_unlock(struct zonelist *zonelist, gfp_t gfp_mask) { struct zoneref *z; struct zone *zone; spin_lock(&zone_scan_lock); - for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) { + for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) zone_clear_flag(zone, ZONE_OOM_LOCKED); - } spin_unlock(&zone_scan_lock); } @@ -695,8 +691,8 @@ void pagefault_out_of_memory(void) return; zonelist = node_zonelist(first_memory_node, GFP_KERNEL); - if (try_set_zonelist_oom(zonelist, GFP_KERNEL)) { + if (oom_zonelist_trylock(zonelist, GFP_KERNEL)) { out_of_memory(NULL, 0, 0, NULL, false); - clear_zonelist_oom(zonelist, GFP_KERNEL); + oom_zonelist_unlock(zonelist, GFP_KERNEL); } } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index fb9908148474..578236089ec1 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2246,8 +2246,8 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, { struct page *page; - /* Acquire the OOM killer lock for the zones in zonelist */ - if (!try_set_zonelist_oom(zonelist, gfp_mask)) { + /* Acquire the per-zone oom lock for each zone */ + if (!oom_zonelist_trylock(zonelist, gfp_mask)) { schedule_timeout_uninterruptible(1); return NULL; } @@ -2285,7 +2285,7 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, out_of_memory(zonelist, gfp_mask, order, nodemask, false); out: - clear_zonelist_oom(zonelist, gfp_mask); + oom_zonelist_unlock(zonelist, gfp_mask); return page; } -- cgit v1.2.3 From 1d352bfd41e8219cdf9bebe79677700bdc38b540 Mon Sep 17 00:00:00 2001 From: Chintan Pandya Date: Wed, 6 Aug 2014 16:08:18 -0700 Subject: mm: BUG when __kmap_atomic_idx equals KM_TYPE_NR __kmap_atomic_idx is per_cpu variable. Each CPU can use KM_TYPE_NR entries from FIXMAP i.e. from 0 to KM_TYPE_NR - 1. Allowing __kmap_atomic_idx to over- shoot to KM_TYPE_NR can mess up with next CPU's 0th entry which is a bug. Hence BUG_ON if __kmap_atomic_idx >= KM_TYPE_NR. Fix the off-by-on in this test. Signed-off-by: Chintan Pandya Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/highmem.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 7fb31da45d03..9286a46b7d69 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -93,7 +93,7 @@ static inline int kmap_atomic_idx_push(void) #ifdef CONFIG_DEBUG_HIGHMEM WARN_ON_ONCE(in_irq() && !irqs_disabled()); - BUG_ON(idx > KM_TYPE_NR); + BUG_ON(idx >= KM_TYPE_NR); #endif return idx; } -- cgit v1.2.3 From b972216e27d1c853eced33f8638926636c606341 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 6 Aug 2014 16:08:20 -0700 Subject: mmu_notifier: add call_srcu and sync function for listener to delay call and sync MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When kernel device drivers or subsystems want to bind their lifespan to t= he lifespan of the mm_struct, they usually use one of the following methods: 1. Manually calling a function in the interested kernel module. The funct= ion call needs to be placed in mmput. This method was rejected by several ker= nel maintainers. 2. Registering to the mmu notifier release mechanism. The problem with the latter approach is that the mmu_notifier_release cal= lback is called from__mmu_notifier_release (called from exit_mmap). That functi= on iterates over the list of mmu notifiers and don't expect the release call= back function to remove itself from the list. Therefore, the callback function= in the kernel module can't release the mmu_notifier_object, which is actuall= y the kernel module's object itself. As a result, the destruction of the kernel module's object must to be done in a delayed fashion. This patch adds support for this delayed callback, by adding a new mmu_notifier_call_srcu function that receives a function ptr and calls th= at function with call_srcu. In that function, the kernel module releases its object. To use mmu_notifier_call_srcu, the calling module needs to call b= efore that a new function called mmu_notifier_unregister_no_release that as its= name implies, unregisters a notifier without calling its notifier release call= back. This patch also adds a function that will call barrier_srcu so those kern= el modules can sync with mmu_notifier. Signed-off-by: Peter Zijlstra Signed-off-by: Jérôme Glisse Signed-off-by: Oded Gabbay Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmu_notifier.h | 6 ++++++ mm/mmu_notifier.c | 40 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 45 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index deca87452528..27288692241e 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -170,6 +170,8 @@ extern int __mmu_notifier_register(struct mmu_notifier *mn, struct mm_struct *mm); extern void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm); +extern void mmu_notifier_unregister_no_release(struct mmu_notifier *mn, + struct mm_struct *mm); extern void __mmu_notifier_mm_destroy(struct mm_struct *mm); extern void __mmu_notifier_release(struct mm_struct *mm); extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm, @@ -288,6 +290,10 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) set_pte_at(___mm, ___address, __ptep, ___pte); \ }) +extern void mmu_notifier_call_srcu(struct rcu_head *rcu, + void (*func)(struct rcu_head *rcu)); +extern void mmu_notifier_synchronize(void); + #else /* CONFIG_MMU_NOTIFIER */ static inline void mmu_notifier_release(struct mm_struct *mm) diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c index 41cefdf0aadd..950813b1eb36 100644 --- a/mm/mmu_notifier.c +++ b/mm/mmu_notifier.c @@ -22,6 +22,25 @@ /* global SRCU for all MMs */ static struct srcu_struct srcu; +/* + * This function allows mmu_notifier::release callback to delay a call to + * a function that will free appropriate resources. The function must be + * quick and must not block. + */ +void mmu_notifier_call_srcu(struct rcu_head *rcu, + void (*func)(struct rcu_head *rcu)) +{ + call_srcu(&srcu, rcu, func); +} +EXPORT_SYMBOL_GPL(mmu_notifier_call_srcu); + +void mmu_notifier_synchronize(void) +{ + /* Wait for any running method to finish. */ + srcu_barrier(&srcu); +} +EXPORT_SYMBOL_GPL(mmu_notifier_synchronize); + /* * This function can't run concurrently against mmu_notifier_register * because mm->mm_users > 0 during mmu_notifier_register and exit_mmap @@ -53,7 +72,6 @@ void __mmu_notifier_release(struct mm_struct *mm) */ if (mn->ops->release) mn->ops->release(mn, mm); - srcu_read_unlock(&srcu, id); spin_lock(&mm->mmu_notifier_mm->lock); while (unlikely(!hlist_empty(&mm->mmu_notifier_mm->list))) { @@ -69,6 +87,7 @@ void __mmu_notifier_release(struct mm_struct *mm) hlist_del_init_rcu(&mn->hlist); } spin_unlock(&mm->mmu_notifier_mm->lock); + srcu_read_unlock(&srcu, id); /* * synchronize_srcu here prevents mmu_notifier_release from returning to @@ -325,6 +344,25 @@ void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm) } EXPORT_SYMBOL_GPL(mmu_notifier_unregister); +/* + * Same as mmu_notifier_unregister but no callback and no srcu synchronization. + */ +void mmu_notifier_unregister_no_release(struct mmu_notifier *mn, + struct mm_struct *mm) +{ + spin_lock(&mm->mmu_notifier_mm->lock); + /* + * Can not use list_del_rcu() since __mmu_notifier_release + * can delete it before we hold the lock. + */ + hlist_del_init_rcu(&mn->hlist); + spin_unlock(&mm->mmu_notifier_mm->lock); + + BUG_ON(atomic_read(&mm->mm_count) <= 0); + mmdrop(mm); +} +EXPORT_SYMBOL_GPL(mmu_notifier_unregister_no_release); + static int __init mmu_notifier_init(void) { return init_srcu_struct(&srcu); -- cgit v1.2.3 From 99eef8e9369abe009006b4fa7f6ca5086c09cf46 Mon Sep 17 00:00:00 2001 From: Dan Streetman Date: Wed, 6 Aug 2014 16:08:33 -0700 Subject: mm/zbud: change zbud_alloc size type to size_t Change the type of the zbud_alloc() size param from unsigned int to size_t. Technically, this should not make any difference, as the zbud implementation already restricts the size to well within either type's limits; but as zsmalloc (and kmalloc) use size_t, and zpool will use size_t, this brings the size parameter type in line with zsmalloc/zpool. Signed-off-by: Dan Streetman Acked-by: Seth Jennings Tested-by: Seth Jennings Cc: Weijie Yang Cc: Minchan Kim Cc: Nitin Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/zbud.h | 2 +- mm/zbud.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/zbud.h b/include/linux/zbud.h index 13af0d450bf6..f9d41a6e361f 100644 --- a/include/linux/zbud.h +++ b/include/linux/zbud.h @@ -11,7 +11,7 @@ struct zbud_ops { struct zbud_pool *zbud_create_pool(gfp_t gfp, struct zbud_ops *ops); void zbud_destroy_pool(struct zbud_pool *pool); -int zbud_alloc(struct zbud_pool *pool, unsigned int size, gfp_t gfp, +int zbud_alloc(struct zbud_pool *pool, size_t size, gfp_t gfp, unsigned long *handle); void zbud_free(struct zbud_pool *pool, unsigned long handle); int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries); diff --git a/mm/zbud.c b/mm/zbud.c index 01df13a7e2e1..d01226117b8d 100644 --- a/mm/zbud.c +++ b/mm/zbud.c @@ -122,7 +122,7 @@ enum buddy { }; /* Converts an allocation size in bytes to size in zbud chunks */ -static int size_to_chunks(int size) +static int size_to_chunks(size_t size) { return (size + CHUNK_SIZE - 1) >> CHUNK_SHIFT; } @@ -247,7 +247,7 @@ void zbud_destroy_pool(struct zbud_pool *pool) * gfp arguments are invalid or -ENOMEM if the pool was unable to allocate * a new page. */ -int zbud_alloc(struct zbud_pool *pool, unsigned int size, gfp_t gfp, +int zbud_alloc(struct zbud_pool *pool, size_t size, gfp_t gfp, unsigned long *handle) { int chunks, i, freechunks; -- cgit v1.2.3 From af8d417a04564bca0348e7e3c749ab12a3e837ad Mon Sep 17 00:00:00 2001 From: Dan Streetman Date: Wed, 6 Aug 2014 16:08:36 -0700 Subject: mm/zpool: implement common zpool api to zbud/zsmalloc Add zpool api. zpool provides an interface for memory storage, typically of compressed memory. Users can select what backend to use; currently the only implementations are zbud, a low density implementation with up to two compressed pages per storage page, and zsmalloc, a higher density implementation with multiple compressed pages per storage page. Signed-off-by: Dan Streetman Tested-by: Seth Jennings Cc: Minchan Kim Cc: Nitin Gupta Cc: Weijie Yang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/zpool.h | 106 +++++++++++++++ mm/Kconfig | 41 +++--- mm/Makefile | 1 + mm/zpool.c | 364 ++++++++++++++++++++++++++++++++++++++++++++++++++ mm/zsmalloc.c | 1 - 5 files changed, 495 insertions(+), 18 deletions(-) create mode 100644 include/linux/zpool.h create mode 100644 mm/zpool.c (limited to 'include/linux') diff --git a/include/linux/zpool.h b/include/linux/zpool.h new file mode 100644 index 000000000000..f14bd75f08b3 --- /dev/null +++ b/include/linux/zpool.h @@ -0,0 +1,106 @@ +/* + * zpool memory storage api + * + * Copyright (C) 2014 Dan Streetman + * + * This is a common frontend for the zbud and zsmalloc memory + * storage pool implementations. Typically, this is used to + * store compressed memory. + */ + +#ifndef _ZPOOL_H_ +#define _ZPOOL_H_ + +struct zpool; + +struct zpool_ops { + int (*evict)(struct zpool *pool, unsigned long handle); +}; + +/* + * Control how a handle is mapped. It will be ignored if the + * implementation does not support it. Its use is optional. + * Note that this does not refer to memory protection, it + * refers to how the memory will be copied in/out if copying + * is necessary during mapping; read-write is the safest as + * it copies the existing memory in on map, and copies the + * changed memory back out on unmap. Write-only does not copy + * in the memory and should only be used for initialization. + * If in doubt, use ZPOOL_MM_DEFAULT which is read-write. + */ +enum zpool_mapmode { + ZPOOL_MM_RW, /* normal read-write mapping */ + ZPOOL_MM_RO, /* read-only (no copy-out at unmap time) */ + ZPOOL_MM_WO, /* write-only (no copy-in at map time) */ + + ZPOOL_MM_DEFAULT = ZPOOL_MM_RW +}; + +struct zpool *zpool_create_pool(char *type, gfp_t gfp, struct zpool_ops *ops); + +char *zpool_get_type(struct zpool *pool); + +void zpool_destroy_pool(struct zpool *pool); + +int zpool_malloc(struct zpool *pool, size_t size, gfp_t gfp, + unsigned long *handle); + +void zpool_free(struct zpool *pool, unsigned long handle); + +int zpool_shrink(struct zpool *pool, unsigned int pages, + unsigned int *reclaimed); + +void *zpool_map_handle(struct zpool *pool, unsigned long handle, + enum zpool_mapmode mm); + +void zpool_unmap_handle(struct zpool *pool, unsigned long handle); + +u64 zpool_get_total_size(struct zpool *pool); + + +/** + * struct zpool_driver - driver implementation for zpool + * @type: name of the driver. + * @list: entry in the list of zpool drivers. + * @create: create a new pool. + * @destroy: destroy a pool. + * @malloc: allocate mem from a pool. + * @free: free mem from a pool. + * @shrink: shrink the pool. + * @map: map a handle. + * @unmap: unmap a handle. + * @total_size: get total size of a pool. + * + * This is created by a zpool implementation and registered + * with zpool. + */ +struct zpool_driver { + char *type; + struct module *owner; + atomic_t refcount; + struct list_head list; + + void *(*create)(gfp_t gfp, struct zpool_ops *ops); + void (*destroy)(void *pool); + + int (*malloc)(void *pool, size_t size, gfp_t gfp, + unsigned long *handle); + void (*free)(void *pool, unsigned long handle); + + int (*shrink)(void *pool, unsigned int pages, + unsigned int *reclaimed); + + void *(*map)(void *pool, unsigned long handle, + enum zpool_mapmode mm); + void (*unmap)(void *pool, unsigned long handle); + + u64 (*total_size)(void *pool); +}; + +void zpool_register_driver(struct zpool_driver *driver); + +int zpool_unregister_driver(struct zpool_driver *driver); + +int zpool_evict(void *pool, unsigned long handle); + +#endif diff --git a/mm/Kconfig b/mm/Kconfig index f4899ec39cf4..12179b8c3b89 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -519,15 +519,17 @@ config CMA_AREAS If unsure, leave the default value "7". -config ZBUD - tristate - default n +config MEM_SOFT_DIRTY + bool "Track memory changes" + depends on CHECKPOINT_RESTORE && HAVE_ARCH_SOFT_DIRTY && PROC_FS + select PROC_PAGE_MONITOR help - A special purpose allocator for storing compressed pages. - It is designed to store up to two compressed pages per physical - page. While this design limits storage density, it has simple and - deterministic reclaim properties that make it preferable to a higher - density approach when reclaim will be used. + This option enables memory changes tracking by introducing a + soft-dirty bit on pte-s. This bit it set when someone writes + into a page just as regular dirty bit, but unlike the latter + it can be cleared by hands. + + See Documentation/vm/soft-dirty.txt for more details. config ZSWAP bool "Compressed cache for swap pages (EXPERIMENTAL)" @@ -549,17 +551,22 @@ config ZSWAP they have not be fully explored on the large set of potential configurations and workloads that exist. -config MEM_SOFT_DIRTY - bool "Track memory changes" - depends on CHECKPOINT_RESTORE && HAVE_ARCH_SOFT_DIRTY && PROC_FS - select PROC_PAGE_MONITOR +config ZPOOL + tristate "Common API for compressed memory storage" + default n help - This option enables memory changes tracking by introducing a - soft-dirty bit on pte-s. This bit it set when someone writes - into a page just as regular dirty bit, but unlike the latter - it can be cleared by hands. + Compressed memory storage API. This allows using either zbud or + zsmalloc. - See Documentation/vm/soft-dirty.txt for more details. +config ZBUD + tristate "Low density storage for compressed pages" + default n + help + A special purpose allocator for storing compressed pages. + It is designed to store up to two compressed pages per physical + page. While this design limits storage density, it has simple and + deterministic reclaim properties that make it preferable to a higher + density approach when reclaim will be used. config ZSMALLOC tristate "Memory allocator for compressed pages" diff --git a/mm/Makefile b/mm/Makefile index 8338473c329a..632ae77e6070 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -59,6 +59,7 @@ obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o obj-$(CONFIG_CLEANCACHE) += cleancache.o obj-$(CONFIG_MEMORY_ISOLATION) += page_isolation.o +obj-$(CONFIG_ZPOOL) += zpool.o obj-$(CONFIG_ZBUD) += zbud.o obj-$(CONFIG_ZSMALLOC) += zsmalloc.o obj-$(CONFIG_GENERIC_EARLY_IOREMAP) += early_ioremap.o diff --git a/mm/zpool.c b/mm/zpool.c new file mode 100644 index 000000000000..e40612a1df00 --- /dev/null +++ b/mm/zpool.c @@ -0,0 +1,364 @@ +/* + * zpool memory storage api + * + * Copyright (C) 2014 Dan Streetman + * + * This is a common frontend for memory storage pool implementations. + * Typically, this is used to store compressed memory. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include + +struct zpool { + char *type; + + struct zpool_driver *driver; + void *pool; + struct zpool_ops *ops; + + struct list_head list; +}; + +static LIST_HEAD(drivers_head); +static DEFINE_SPINLOCK(drivers_lock); + +static LIST_HEAD(pools_head); +static DEFINE_SPINLOCK(pools_lock); + +/** + * zpool_register_driver() - register a zpool implementation. + * @driver: driver to register + */ +void zpool_register_driver(struct zpool_driver *driver) +{ + spin_lock(&drivers_lock); + atomic_set(&driver->refcount, 0); + list_add(&driver->list, &drivers_head); + spin_unlock(&drivers_lock); +} +EXPORT_SYMBOL(zpool_register_driver); + +/** + * zpool_unregister_driver() - unregister a zpool implementation. + * @driver: driver to unregister. + * + * Module usage counting is used to prevent using a driver + * while/after unloading, so if this is called from module + * exit function, this should never fail; if called from + * other than the module exit function, and this returns + * failure, the driver is in use and must remain available. + */ +int zpool_unregister_driver(struct zpool_driver *driver) +{ + int ret = 0, refcount; + + spin_lock(&drivers_lock); + refcount = atomic_read(&driver->refcount); + WARN_ON(refcount < 0); + if (refcount > 0) + ret = -EBUSY; + else + list_del(&driver->list); + spin_unlock(&drivers_lock); + + return ret; +} +EXPORT_SYMBOL(zpool_unregister_driver); + +/** + * zpool_evict() - evict callback from a zpool implementation. + * @pool: pool to evict from. + * @handle: handle to evict. + * + * This can be used by zpool implementations to call the + * user's evict zpool_ops struct evict callback. + */ +int zpool_evict(void *pool, unsigned long handle) +{ + struct zpool *zpool; + + spin_lock(&pools_lock); + list_for_each_entry(zpool, &pools_head, list) { + if (zpool->pool == pool) { + spin_unlock(&pools_lock); + if (!zpool->ops || !zpool->ops->evict) + return -EINVAL; + return zpool->ops->evict(zpool, handle); + } + } + spin_unlock(&pools_lock); + + return -ENOENT; +} +EXPORT_SYMBOL(zpool_evict); + +static struct zpool_driver *zpool_get_driver(char *type) +{ + struct zpool_driver *driver; + + spin_lock(&drivers_lock); + list_for_each_entry(driver, &drivers_head, list) { + if (!strcmp(driver->type, type)) { + bool got = try_module_get(driver->owner); + + if (got) + atomic_inc(&driver->refcount); + spin_unlock(&drivers_lock); + return got ? driver : NULL; + } + } + + spin_unlock(&drivers_lock); + return NULL; +} + +static void zpool_put_driver(struct zpool_driver *driver) +{ + atomic_dec(&driver->refcount); + module_put(driver->owner); +} + +/** + * zpool_create_pool() - Create a new zpool + * @type The type of the zpool to create (e.g. zbud, zsmalloc) + * @gfp The GFP flags to use when allocating the pool. + * @ops The optional ops callback. + * + * This creates a new zpool of the specified type. The gfp flags will be + * used when allocating memory, if the implementation supports it. If the + * ops param is NULL, then the created zpool will not be shrinkable. + * + * Implementations must guarantee this to be thread-safe. + * + * Returns: New zpool on success, NULL on failure. + */ +struct zpool *zpool_create_pool(char *type, gfp_t gfp, struct zpool_ops *ops) +{ + struct zpool_driver *driver; + struct zpool *zpool; + + pr_info("creating pool type %s\n", type); + + driver = zpool_get_driver(type); + + if (!driver) { + request_module(type); + driver = zpool_get_driver(type); + } + + if (!driver) { + pr_err("no driver for type %s\n", type); + return NULL; + } + + zpool = kmalloc(sizeof(*zpool), gfp); + if (!zpool) { + pr_err("couldn't create zpool - out of memory\n"); + zpool_put_driver(driver); + return NULL; + } + + zpool->type = driver->type; + zpool->driver = driver; + zpool->pool = driver->create(gfp, ops); + zpool->ops = ops; + + if (!zpool->pool) { + pr_err("couldn't create %s pool\n", type); + zpool_put_driver(driver); + kfree(zpool); + return NULL; + } + + pr_info("created %s pool\n", type); + + spin_lock(&pools_lock); + list_add(&zpool->list, &pools_head); + spin_unlock(&pools_lock); + + return zpool; +} + +/** + * zpool_destroy_pool() - Destroy a zpool + * @pool The zpool to destroy. + * + * Implementations must guarantee this to be thread-safe, + * however only when destroying different pools. The same + * pool should only be destroyed once, and should not be used + * after it is destroyed. + * + * This destroys an existing zpool. The zpool should not be in use. + */ +void zpool_destroy_pool(struct zpool *zpool) +{ + pr_info("destroying pool type %s\n", zpool->type); + + spin_lock(&pools_lock); + list_del(&zpool->list); + spin_unlock(&pools_lock); + zpool->driver->destroy(zpool->pool); + zpool_put_driver(zpool->driver); + kfree(zpool); +} + +/** + * zpool_get_type() - Get the type of the zpool + * @pool The zpool to check + * + * This returns the type of the pool. + * + * Implementations must guarantee this to be thread-safe. + * + * Returns: The type of zpool. + */ +char *zpool_get_type(struct zpool *zpool) +{ + return zpool->type; +} + +/** + * zpool_malloc() - Allocate memory + * @pool The zpool to allocate from. + * @size The amount of memory to allocate. + * @gfp The GFP flags to use when allocating memory. + * @handle Pointer to the handle to set + * + * This allocates the requested amount of memory from the pool. + * The gfp flags will be used when allocating memory, if the + * implementation supports it. The provided @handle will be + * set to the allocated object handle. + * + * Implementations must guarantee this to be thread-safe. + * + * Returns: 0 on success, negative value on error. + */ +int zpool_malloc(struct zpool *zpool, size_t size, gfp_t gfp, + unsigned long *handle) +{ + return zpool->driver->malloc(zpool->pool, size, gfp, handle); +} + +/** + * zpool_free() - Free previously allocated memory + * @pool The zpool that allocated the memory. + * @handle The handle to the memory to free. + * + * This frees previously allocated memory. This does not guarantee + * that the pool will actually free memory, only that the memory + * in the pool will become available for use by the pool. + * + * Implementations must guarantee this to be thread-safe, + * however only when freeing different handles. The same + * handle should only be freed once, and should not be used + * after freeing. + */ +void zpool_free(struct zpool *zpool, unsigned long handle) +{ + zpool->driver->free(zpool->pool, handle); +} + +/** + * zpool_shrink() - Shrink the pool size + * @pool The zpool to shrink. + * @pages The number of pages to shrink the pool. + * @reclaimed The number of pages successfully evicted. + * + * This attempts to shrink the actual memory size of the pool + * by evicting currently used handle(s). If the pool was + * created with no zpool_ops, or the evict call fails for any + * of the handles, this will fail. If non-NULL, the @reclaimed + * parameter will be set to the number of pages reclaimed, + * which may be more than the number of pages requested. + * + * Implementations must guarantee this to be thread-safe. + * + * Returns: 0 on success, negative value on error/failure. + */ +int zpool_shrink(struct zpool *zpool, unsigned int pages, + unsigned int *reclaimed) +{ + return zpool->driver->shrink(zpool->pool, pages, reclaimed); +} + +/** + * zpool_map_handle() - Map a previously allocated handle into memory + * @pool The zpool that the handle was allocated from + * @handle The handle to map + * @mm How the memory should be mapped + * + * This maps a previously allocated handle into memory. The @mm + * param indicates to the implementation how the memory will be + * used, i.e. read-only, write-only, read-write. If the + * implementation does not support it, the memory will be treated + * as read-write. + * + * This may hold locks, disable interrupts, and/or preemption, + * and the zpool_unmap_handle() must be called to undo those + * actions. The code that uses the mapped handle should complete + * its operatons on the mapped handle memory quickly and unmap + * as soon as possible. As the implementation may use per-cpu + * data, multiple handles should not be mapped concurrently on + * any cpu. + * + * Returns: A pointer to the handle's mapped memory area. + */ +void *zpool_map_handle(struct zpool *zpool, unsigned long handle, + enum zpool_mapmode mapmode) +{ + return zpool->driver->map(zpool->pool, handle, mapmode); +} + +/** + * zpool_unmap_handle() - Unmap a previously mapped handle + * @pool The zpool that the handle was allocated from + * @handle The handle to unmap + * + * This unmaps a previously mapped handle. Any locks or other + * actions that the implementation took in zpool_map_handle() + * will be undone here. The memory area returned from + * zpool_map_handle() should no longer be used after this. + */ +void zpool_unmap_handle(struct zpool *zpool, unsigned long handle) +{ + zpool->driver->unmap(zpool->pool, handle); +} + +/** + * zpool_get_total_size() - The total size of the pool + * @pool The zpool to check + * + * This returns the total size in bytes of the pool. + * + * Returns: Total size of the zpool in bytes. + */ +u64 zpool_get_total_size(struct zpool *zpool) +{ + return zpool->driver->total_size(zpool->pool); +} + +static int __init init_zpool(void) +{ + pr_info("loaded\n"); + return 0; +} + +static void __exit exit_zpool(void) +{ + pr_info("unloaded\n"); +} + +module_init(init_zpool); +module_exit(exit_zpool); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Dan Streetman "); +MODULE_DESCRIPTION("Common API for compressed memory storage"); diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index bb62a4adc328..6a1827d3d231 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -240,7 +240,6 @@ struct mapping_area { enum zs_mapmode vm_mm; /* mapping mode */ }; - /* per-cpu VM mapping areas for zspage accesses that cross page boundaries */ static DEFINE_PER_CPU(struct mapping_area, zs_map_area); -- cgit v1.2.3 From 68be302963230fa76600cd598935a830ac95dca2 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 6 Aug 2014 16:08:45 -0700 Subject: fs.h, drivers/hwmon/asus_atk0110.c: fix DEFINE_SIMPLE_ATTRIBUTE semicolon definition and use The DEFINE_SIMPLE_ATTRIBUTE macro should not end in a ; Fix the one use in the kernel tree that did not have a semicolon. Signed-off-by: Joe Perches Acked-by: Guenter Roeck Acked-by: Luca Tettamanti Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/hwmon/asus_atk0110.c | 2 +- include/linux/fs.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/hwmon/asus_atk0110.c b/drivers/hwmon/asus_atk0110.c index ae208f612198..cccef87963e0 100644 --- a/drivers/hwmon/asus_atk0110.c +++ b/drivers/hwmon/asus_atk0110.c @@ -688,7 +688,7 @@ static int atk_debugfs_gitm_get(void *p, u64 *val) DEFINE_SIMPLE_ATTRIBUTE(atk_debugfs_gitm, atk_debugfs_gitm_get, NULL, - "0x%08llx\n") + "0x%08llx\n"); static int atk_acpi_print(char *buf, size_t sz, union acpi_object *obj) { diff --git a/include/linux/fs.h b/include/linux/fs.h index 2daccaf4b547..1ab6c6913040 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2688,7 +2688,7 @@ static const struct file_operations __fops = { \ .read = simple_attr_read, \ .write = simple_attr_write, \ .llseek = generic_file_llseek, \ -}; +} static inline __printf(1, 2) void __simple_attr_check_format(const char *fmt, ...) -- cgit v1.2.3 From 90a856436ddafbe0c6f8c18d7fc21aed3784e227 Mon Sep 17 00:00:00 2001 From: Geoff Levand Date: Wed, 6 Aug 2014 16:08:47 -0700 Subject: include/linux/byteorder/generic.h: minor comment fix Signed-off-by: Geoff Levand Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/byteorder/generic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/byteorder/generic.h b/include/linux/byteorder/generic.h index 0846e6b931ce..89f67c1c3160 100644 --- a/include/linux/byteorder/generic.h +++ b/include/linux/byteorder/generic.h @@ -2,7 +2,7 @@ #define _LINUX_BYTEORDER_GENERIC_H /* - * linux/byteorder_generic.h + * linux/byteorder/generic.h * Generic Byte-reordering support * * The "... p" macros, like le64_to_cpup, can be used with pointers -- cgit v1.2.3 From 42a9dc0b3d0f749375c767c7d5cab56e89160576 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Wed, 6 Aug 2014 16:09:01 -0700 Subject: printk: rename DEFAULT_MESSAGE_LOGLEVEL Commit a8fe19ebfbfd ("kernel/printk: use symbolic defines for console loglevels") makes consistent use of symbolic values for printk() log levels. The naming scheme used is different from the one used for DEFAULT_MESSAGE_LOGLEVEL though. Change that symbol name to be MESSAGE_LOGLEVEL_DEFAULT for consistency. And because the value of that symbol comes from a similarly-named config option, rename CONFIG_DEFAULT_MESSAGE_LOGLEVEL as well. Signed-off-by: Alex Elder Cc: Andi Kleen Cc: Borislav Petkov Cc: Jan Kara Cc: John Stultz Cc: Petr Mladek Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/printk.h | 2 +- kernel/printk/printk.c | 2 +- lib/Kconfig.debug | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/printk.h b/include/linux/printk.h index 319ff7e53efb..0990997a5304 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -31,7 +31,7 @@ static inline const char *printk_skip_level(const char *buffer) } /* printk's without a loglevel use this.. */ -#define DEFAULT_MESSAGE_LOGLEVEL CONFIG_DEFAULT_MESSAGE_LOGLEVEL +#define MESSAGE_LOGLEVEL_DEFAULT CONFIG_MESSAGE_LOGLEVEL_DEFAULT /* We show everything that is MORE important than this.. */ #define CONSOLE_LOGLEVEL_SILENT 0 /* Mum's the word */ diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index ec3bfb0b1f62..770ed4821ba9 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -56,7 +56,7 @@ int console_printk[4] = { CONSOLE_LOGLEVEL_DEFAULT, /* console_loglevel */ - DEFAULT_MESSAGE_LOGLEVEL, /* default_message_loglevel */ + MESSAGE_LOGLEVEL_DEFAULT, /* default_message_loglevel */ CONSOLE_LOGLEVEL_MIN, /* minimum_console_loglevel */ CONSOLE_LOGLEVEL_DEFAULT, /* default_console_loglevel */ }; diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index cfe7df8f62cc..cb45f59685e6 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -15,7 +15,7 @@ config PRINTK_TIME The behavior is also controlled by the kernel command line parameter printk.time=1. See Documentation/kernel-parameters.txt -config DEFAULT_MESSAGE_LOGLEVEL +config MESSAGE_LOGLEVEL_DEFAULT int "Default message log level (1-7)" range 1 7 default "4" -- cgit v1.2.3 From bc18dd335a161f9229ed3aaab88ce0706cbd9867 Mon Sep 17 00:00:00 2001 From: Ken Helias Date: Wed, 6 Aug 2014 16:09:14 -0700 Subject: list: make hlist_add_after() argument names match hlist_add_after_rcu() The argument names for hlist_add_after() are poorly chosen because they look the same as the ones for hlist_add_before() but have to be used differently. hlist_add_after_rcu() has made a better choice. Signed-off-by: Ken Helias Cc: "Paul E. McKenney" Cc: Christoph Hellwig Cc: Hugh Dickins Cc: Jeff Kirsher Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/list.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/list.h b/include/linux/list.h index ef9594171062..624ec7f48293 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -654,15 +654,15 @@ static inline void hlist_add_before(struct hlist_node *n, *(n->pprev) = n; } -static inline void hlist_add_after(struct hlist_node *n, - struct hlist_node *next) +static inline void hlist_add_after(struct hlist_node *prev, + struct hlist_node *n) { - next->next = n->next; - n->next = next; - next->pprev = &n->next; + n->next = prev->next; + prev->next = n; + n->pprev = &prev->next; - if(next->next) - next->next->pprev = &next->next; + if (n->next) + n->next->pprev = &n->next; } /* after that we'll appear to be on some hlist and hlist_del will work */ -- cgit v1.2.3 From 1d023284c31a4e40a94d5bbcb7dbb7a35ee0bcbc Mon Sep 17 00:00:00 2001 From: Ken Helias Date: Wed, 6 Aug 2014 16:09:16 -0700 Subject: list: fix order of arguments for hlist_add_after(_rcu) All other add functions for lists have the new item as first argument and the position where it is added as second argument. This was changed for no good reason in this function and makes using it unnecessary confusing. The name was changed to hlist_add_behind() to cause unconverted code to generate a compile error instead of using the wrong parameter order. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Ken Helias Cc: "Paul E. McKenney" Acked-by: Jeff Kirsher [intel driver bits] Cc: Hugh Dickins Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/RCU/whatisRCU.txt | 2 +- drivers/gpu/drm/drm_hashtab.c | 2 +- drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 2 +- drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c | 2 +- drivers/staging/lustre/lustre/libcfs/hash.c | 4 ++-- fs/namespace.c | 2 +- fs/notify/inode_mark.c | 2 +- fs/notify/vfsmount_mark.c | 2 +- include/linux/list.h | 4 ++-- include/linux/rculist.h | 8 ++++---- net/batman-adv/fragmentation.c | 2 +- net/bridge/br_multicast.c | 2 +- net/ipv4/fib_trie.c | 2 +- net/ipv6/addrlabel.c | 2 +- net/xfrm/xfrm_policy.c | 4 ++-- 15 files changed, 21 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt index 49b8551a3b68..e48c57f1943b 100644 --- a/Documentation/RCU/whatisRCU.txt +++ b/Documentation/RCU/whatisRCU.txt @@ -818,7 +818,7 @@ RCU pointer/list update: list_add_tail_rcu list_del_rcu list_replace_rcu - hlist_add_after_rcu + hlist_add_behind_rcu hlist_add_before_rcu hlist_add_head_rcu hlist_del_rcu diff --git a/drivers/gpu/drm/drm_hashtab.c b/drivers/gpu/drm/drm_hashtab.c index 7e4bae760e27..c3b80fd65d62 100644 --- a/drivers/gpu/drm/drm_hashtab.c +++ b/drivers/gpu/drm/drm_hashtab.c @@ -125,7 +125,7 @@ int drm_ht_insert_item(struct drm_open_hash *ht, struct drm_hash_item *item) parent = &entry->head; } if (parent) { - hlist_add_after_rcu(parent, &item->head); + hlist_add_behind_rcu(&item->head, parent); } else { hlist_add_head_rcu(&item->head, h_list); } diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 681a9e81ff51..e8ba7470700a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -1948,7 +1948,7 @@ static int i40e_update_ethtool_fdir_entry(struct i40e_vsi *vsi, /* add filter to the list */ if (parent) - hlist_add_after(&parent->fdir_node, &input->fdir_node); + hlist_add_behind(&input->fdir_node, &parent->fdir_node); else hlist_add_head(&input->fdir_node, &pf->fdir_filter_list); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index 94a1c07efeb0..e4100b5737b6 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -2517,7 +2517,7 @@ static int ixgbe_update_ethtool_fdir_entry(struct ixgbe_adapter *adapter, /* add filter to the list */ if (parent) - hlist_add_after(&parent->fdir_node, &input->fdir_node); + hlist_add_behind(&input->fdir_node, &parent->fdir_node); else hlist_add_head(&input->fdir_node, &adapter->fdir_filter_list); diff --git a/drivers/staging/lustre/lustre/libcfs/hash.c b/drivers/staging/lustre/lustre/libcfs/hash.c index 5dde79418297..8ef1deb59d4a 100644 --- a/drivers/staging/lustre/lustre/libcfs/hash.c +++ b/drivers/staging/lustre/lustre/libcfs/hash.c @@ -351,7 +351,7 @@ cfs_hash_dh_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd, cfs_hash_dhead_t, dh_head); if (dh->dh_tail != NULL) /* not empty */ - hlist_add_after(dh->dh_tail, hnode); + hlist_add_behind(hnode, dh->dh_tail); else /* empty list */ hlist_add_head(hnode, &dh->dh_head); dh->dh_tail = hnode; @@ -406,7 +406,7 @@ cfs_hash_dd_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd, cfs_hash_dhead_dep_t, dd_head); if (dh->dd_tail != NULL) /* not empty */ - hlist_add_after(dh->dd_tail, hnode); + hlist_add_behind(hnode, dh->dd_tail); else /* empty list */ hlist_add_head(hnode, &dh->dd_head); dh->dd_tail = hnode; diff --git a/fs/namespace.c b/fs/namespace.c index 182bc41cd887..2a1447c946e7 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -798,7 +798,7 @@ static void commit_tree(struct mount *mnt, struct mount *shadows) list_splice(&head, n->list.prev); if (shadows) - hlist_add_after_rcu(&shadows->mnt_hash, &mnt->mnt_hash); + hlist_add_behind_rcu(&mnt->mnt_hash, &shadows->mnt_hash); else hlist_add_head_rcu(&mnt->mnt_hash, m_hash(&parent->mnt, mnt->mnt_mountpoint)); diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c index 74825be65b7b..9ce062218de9 100644 --- a/fs/notify/inode_mark.c +++ b/fs/notify/inode_mark.c @@ -232,7 +232,7 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark, BUG_ON(last == NULL); /* mark should be the last entry. last is the current last entry */ - hlist_add_after_rcu(&last->i.i_list, &mark->i.i_list); + hlist_add_behind_rcu(&mark->i.i_list, &last->i.i_list); out: fsnotify_recalc_inode_mask_locked(inode); spin_unlock(&inode->i_lock); diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c index 68ca5a8704b5..ac851e8376b1 100644 --- a/fs/notify/vfsmount_mark.c +++ b/fs/notify/vfsmount_mark.c @@ -191,7 +191,7 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark, BUG_ON(last == NULL); /* mark should be the last entry. last is the current last entry */ - hlist_add_after_rcu(&last->m.m_list, &mark->m.m_list); + hlist_add_behind_rcu(&mark->m.m_list, &last->m.m_list); out: fsnotify_recalc_vfsmount_mask_locked(mnt); spin_unlock(&mnt->mnt_root->d_lock); diff --git a/include/linux/list.h b/include/linux/list.h index 624ec7f48293..cbbb96fcead9 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -654,8 +654,8 @@ static inline void hlist_add_before(struct hlist_node *n, *(n->pprev) = n; } -static inline void hlist_add_after(struct hlist_node *prev, - struct hlist_node *n) +static inline void hlist_add_behind(struct hlist_node *n, + struct hlist_node *prev) { n->next = prev->next; prev->next = n; diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 8183b46fbaa2..372ad5e0dcb8 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -432,9 +432,9 @@ static inline void hlist_add_before_rcu(struct hlist_node *n, } /** - * hlist_add_after_rcu - * @prev: the existing element to add the new element after. + * hlist_add_behind_rcu * @n: the new element to add to the hash list. + * @prev: the existing element to add the new element after. * * Description: * Adds the specified element to the specified hlist @@ -449,8 +449,8 @@ static inline void hlist_add_before_rcu(struct hlist_node *n, * hlist_for_each_entry_rcu(), used to prevent memory-consistency * problems on Alpha CPUs. */ -static inline void hlist_add_after_rcu(struct hlist_node *prev, - struct hlist_node *n) +static inline void hlist_add_behind_rcu(struct hlist_node *n, + struct hlist_node *prev) { n->next = prev->next; n->pprev = &prev->next; diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index 022d18ab27a6..52c43f904220 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -188,7 +188,7 @@ static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node, /* Reached the end of the list, so insert after 'frag_entry_last'. */ if (likely(frag_entry_last)) { - hlist_add_after(&frag_entry_last->list, &frag_entry_new->list); + hlist_add_behind(&frag_entry_last->list, &frag_entry_new->list); chain->size += skb->len - hdr_size; chain->timestamp = jiffies; ret = true; diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index b4845f4b2bb4..7751c92c8c57 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1174,7 +1174,7 @@ static void br_multicast_add_router(struct net_bridge *br, } if (slot) - hlist_add_after_rcu(slot, &port->rlist); + hlist_add_behind_rcu(&port->rlist, slot); else hlist_add_head_rcu(&port->rlist, &br->router_list); } diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 5afeb5aa4c7c..e9cb2588e416 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -940,7 +940,7 @@ static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new) last = li; } if (last) - hlist_add_after_rcu(&last->hlist, &new->hlist); + hlist_add_behind_rcu(&new->hlist, &last->hlist); else hlist_add_before_rcu(&new->hlist, &li->hlist); } diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index 731e1e1722d9..fd0dc47f471d 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -277,7 +277,7 @@ static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace) last = p; } if (last) - hlist_add_after_rcu(&last->list, &newp->list); + hlist_add_behind_rcu(&newp->list, &last->list); else hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head); out: diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 0525d78ba328..beeed602aeb3 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -389,7 +389,7 @@ redo: if (h != h0) continue; hlist_del(&pol->bydst); - hlist_add_after(entry0, &pol->bydst); + hlist_add_behind(&pol->bydst, entry0); } entry0 = &pol->bydst; } @@ -654,7 +654,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) break; } if (newpos) - hlist_add_after(newpos, &policy->bydst); + hlist_add_behind(&policy->bydst, newpos); else hlist_add_head(&policy->bydst, chain); xfrm_pol_hold(policy); -- cgit v1.2.3 From 0f9859ca92c9182bcb8f18c55cae1a04627cbb59 Mon Sep 17 00:00:00 2001 From: Ken Helias Date: Wed, 6 Aug 2014 16:09:18 -0700 Subject: klist: use same naming scheme as hlist for klist_add_after() The name was modified from hlist_add_after() to hlist_add_behind() when adjusting the order of arguments to match the one with klist_add_after(). This is necessary to break old code when it would use it the wrong way. Make klist follow this naming scheme for consistency. Signed-off-by: Ken Helias Cc: "Paul E. McKenney" Cc: Christoph Hellwig Cc: Hugh Dickins Cc: Jeff Kirsher Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/klist.h | 2 +- lib/klist.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/klist.h b/include/linux/klist.h index a370ce57cf1d..61e5b723ae73 100644 --- a/include/linux/klist.h +++ b/include/linux/klist.h @@ -44,7 +44,7 @@ struct klist_node { extern void klist_add_tail(struct klist_node *n, struct klist *k); extern void klist_add_head(struct klist_node *n, struct klist *k); -extern void klist_add_after(struct klist_node *n, struct klist_node *pos); +extern void klist_add_behind(struct klist_node *n, struct klist_node *pos); extern void klist_add_before(struct klist_node *n, struct klist_node *pos); extern void klist_del(struct klist_node *n); diff --git a/lib/klist.c b/lib/klist.c index 358a368a2947..89b485a2a58d 100644 --- a/lib/klist.c +++ b/lib/klist.c @@ -140,11 +140,11 @@ void klist_add_tail(struct klist_node *n, struct klist *k) EXPORT_SYMBOL_GPL(klist_add_tail); /** - * klist_add_after - Init a klist_node and add it after an existing node + * klist_add_behind - Init a klist_node and add it after an existing node * @n: node we're adding. * @pos: node to put @n after */ -void klist_add_after(struct klist_node *n, struct klist_node *pos) +void klist_add_behind(struct klist_node *n, struct klist_node *pos) { struct klist *k = knode_klist(pos); @@ -153,7 +153,7 @@ void klist_add_after(struct klist_node *n, struct klist_node *pos) list_add(&n->n_node, &pos->n_node); spin_unlock(&k->k_lock); } -EXPORT_SYMBOL_GPL(klist_add_after); +EXPORT_SYMBOL_GPL(klist_add_behind); /** * klist_add_before - Init a klist_node and add it before an existing node -- cgit v1.2.3 From 62e7ca5280fd8cbf523970757e13f0324ce0daa0 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Wed, 6 Aug 2014 16:09:21 -0700 Subject: zlib: clean up some dead code Cleanup unused `if 0'-ed functions, which have been dead since 2006 (commits 87c2ce3b9305 ("lib/zlib*: cleanups") by Adrian Bunk and 4f3865fb57a0 ("zlib_inflate: Upgrade library code to a recent version") by Richard Purdie): - zlib_deflateSetDictionary - zlib_deflateParams - zlib_deflateCopy - zlib_inflateSync - zlib_syncsearch - zlib_inflateSetDictionary - zlib_inflatePrime Signed-off-by: Sergey Senozhatsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/zlib.h | 118 ------------------------------------- lib/zlib_deflate/deflate.c | 143 --------------------------------------------- lib/zlib_inflate/inflate.c | 132 ----------------------------------------- 3 files changed, 393 deletions(-) (limited to 'include/linux') diff --git a/include/linux/zlib.h b/include/linux/zlib.h index 9c5a6b4de0a3..197abb2a54c5 100644 --- a/include/linux/zlib.h +++ b/include/linux/zlib.h @@ -493,64 +493,6 @@ extern int deflateInit2 (z_streamp strm, method). msg is set to null if there is no error message. deflateInit2 does not perform any compression: this will be done by deflate(). */ - -#if 0 -extern int zlib_deflateSetDictionary (z_streamp strm, - const Byte *dictionary, - uInt dictLength); -#endif -/* - Initializes the compression dictionary from the given byte sequence - without producing any compressed output. This function must be called - immediately after deflateInit, deflateInit2 or deflateReset, before any - call of deflate. The compressor and decompressor must use exactly the same - dictionary (see inflateSetDictionary). - - The dictionary should consist of strings (byte sequences) that are likely - to be encountered later in the data to be compressed, with the most commonly - used strings preferably put towards the end of the dictionary. Using a - dictionary is most useful when the data to be compressed is short and can be - predicted with good accuracy; the data can then be compressed better than - with the default empty dictionary. - - Depending on the size of the compression data structures selected by - deflateInit or deflateInit2, a part of the dictionary may in effect be - discarded, for example if the dictionary is larger than the window size in - deflate or deflate2. Thus the strings most likely to be useful should be - put at the end of the dictionary, not at the front. - - Upon return of this function, strm->adler is set to the Adler32 value - of the dictionary; the decompressor may later use this value to determine - which dictionary has been used by the compressor. (The Adler32 value - applies to the whole dictionary even if only a subset of the dictionary is - actually used by the compressor.) - - deflateSetDictionary returns Z_OK if success, or Z_STREAM_ERROR if a - parameter is invalid (such as NULL dictionary) or the stream state is - inconsistent (for example if deflate has already been called for this stream - or if the compression method is bsort). deflateSetDictionary does not - perform any compression: this will be done by deflate(). -*/ - -#if 0 -extern int zlib_deflateCopy (z_streamp dest, z_streamp source); -#endif - -/* - Sets the destination stream as a complete copy of the source stream. - - This function can be useful when several compression strategies will be - tried, for example when there are several ways of pre-processing the input - data with a filter. The streams that will be discarded should then be freed - by calling deflateEnd. Note that deflateCopy duplicates the internal - compression state which can be quite large, so this strategy is slow and - can consume lots of memory. - - deflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not - enough memory, Z_STREAM_ERROR if the source stream state was inconsistent - (such as zalloc being NULL). msg is left unchanged in both source and - destination. -*/ extern int zlib_deflateReset (z_streamp strm); /* @@ -568,27 +510,6 @@ static inline unsigned long deflateBound(unsigned long s) return s + ((s + 7) >> 3) + ((s + 63) >> 6) + 11; } -#if 0 -extern int zlib_deflateParams (z_streamp strm, int level, int strategy); -#endif -/* - Dynamically update the compression level and compression strategy. The - interpretation of level and strategy is as in deflateInit2. This can be - used to switch between compression and straight copy of the input data, or - to switch to a different kind of input data requiring a different - strategy. If the compression level is changed, the input available so far - is compressed with the old level (and may be flushed); the new level will - take effect only at the next call of deflate(). - - Before the call of deflateParams, the stream state must be set as for - a call of deflate(), since the currently available input may have to - be compressed and flushed. In particular, strm->avail_out must be non-zero. - - deflateParams returns Z_OK if success, Z_STREAM_ERROR if the source - stream state was inconsistent or if a parameter was invalid, Z_BUF_ERROR - if strm->avail_out was zero. -*/ - /* extern int inflateInit2 (z_streamp strm, int windowBits); @@ -631,45 +552,6 @@ extern int inflateInit2 (z_streamp strm, int windowBits); and avail_out are unchanged.) */ -extern int zlib_inflateSetDictionary (z_streamp strm, - const Byte *dictionary, - uInt dictLength); -/* - Initializes the decompression dictionary from the given uncompressed byte - sequence. This function must be called immediately after a call of inflate, - if that call returned Z_NEED_DICT. The dictionary chosen by the compressor - can be determined from the adler32 value returned by that call of inflate. - The compressor and decompressor must use exactly the same dictionary (see - deflateSetDictionary). For raw inflate, this function can be called - immediately after inflateInit2() or inflateReset() and before any call of - inflate() to set the dictionary. The application must insure that the - dictionary that was used for compression is provided. - - inflateSetDictionary returns Z_OK if success, Z_STREAM_ERROR if a - parameter is invalid (such as NULL dictionary) or the stream state is - inconsistent, Z_DATA_ERROR if the given dictionary doesn't match the - expected one (incorrect adler32 value). inflateSetDictionary does not - perform any decompression: this will be done by subsequent calls of - inflate(). -*/ - -#if 0 -extern int zlib_inflateSync (z_streamp strm); -#endif -/* - Skips invalid compressed data until a full flush point (see above the - description of deflate with Z_FULL_FLUSH) can be found, or until all - available input is skipped. No output is provided. - - inflateSync returns Z_OK if a full flush point has been found, Z_BUF_ERROR - if no more input was provided, Z_DATA_ERROR if no flush point has been found, - or Z_STREAM_ERROR if the stream structure was inconsistent. In the success - case, the application may save the current current value of total_in which - indicates where valid compressed data was found. In the error case, the - application may repeatedly call inflateSync, providing more input each time, - until success or end of the input data. -*/ - extern int zlib_inflateReset (z_streamp strm); /* This function is equivalent to inflateEnd followed by inflateInit, diff --git a/lib/zlib_deflate/deflate.c b/lib/zlib_deflate/deflate.c index d63381e8e333..d20ef458f137 100644 --- a/lib/zlib_deflate/deflate.c +++ b/lib/zlib_deflate/deflate.c @@ -249,52 +249,6 @@ int zlib_deflateInit2( return zlib_deflateReset(strm); } -/* ========================================================================= */ -#if 0 -int zlib_deflateSetDictionary( - z_streamp strm, - const Byte *dictionary, - uInt dictLength -) -{ - deflate_state *s; - uInt length = dictLength; - uInt n; - IPos hash_head = 0; - - if (strm == NULL || strm->state == NULL || dictionary == NULL) - return Z_STREAM_ERROR; - - s = (deflate_state *) strm->state; - if (s->status != INIT_STATE) return Z_STREAM_ERROR; - - strm->adler = zlib_adler32(strm->adler, dictionary, dictLength); - - if (length < MIN_MATCH) return Z_OK; - if (length > MAX_DIST(s)) { - length = MAX_DIST(s); -#ifndef USE_DICT_HEAD - dictionary += dictLength - length; /* use the tail of the dictionary */ -#endif - } - memcpy((char *)s->window, dictionary, length); - s->strstart = length; - s->block_start = (long)length; - - /* Insert all strings in the hash table (except for the last two bytes). - * s->lookahead stays null, so s->ins_h will be recomputed at the next - * call of fill_window. - */ - s->ins_h = s->window[0]; - UPDATE_HASH(s, s->ins_h, s->window[1]); - for (n = 0; n <= length - MIN_MATCH; n++) { - INSERT_STRING(s, n, hash_head); - } - if (hash_head) hash_head = 0; /* to make compiler happy */ - return Z_OK; -} -#endif /* 0 */ - /* ========================================================================= */ int zlib_deflateReset( z_streamp strm @@ -326,45 +280,6 @@ int zlib_deflateReset( return Z_OK; } -/* ========================================================================= */ -#if 0 -int zlib_deflateParams( - z_streamp strm, - int level, - int strategy -) -{ - deflate_state *s; - compress_func func; - int err = Z_OK; - - if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR; - s = (deflate_state *) strm->state; - - if (level == Z_DEFAULT_COMPRESSION) { - level = 6; - } - if (level < 0 || level > 9 || strategy < 0 || strategy > Z_HUFFMAN_ONLY) { - return Z_STREAM_ERROR; - } - func = configuration_table[s->level].func; - - if (func != configuration_table[level].func && strm->total_in != 0) { - /* Flush the last buffer: */ - err = zlib_deflate(strm, Z_PARTIAL_FLUSH); - } - if (s->level != level) { - s->level = level; - s->max_lazy_match = configuration_table[level].max_lazy; - s->good_match = configuration_table[level].good_length; - s->nice_match = configuration_table[level].nice_length; - s->max_chain_length = configuration_table[level].max_chain; - } - s->strategy = strategy; - return err; -} -#endif /* 0 */ - /* ========================================================================= * Put a short in the pending buffer. The 16-bit value is put in MSB order. * IN assertion: the stream state is correct and there is enough room in @@ -568,64 +483,6 @@ int zlib_deflateEnd( return status == BUSY_STATE ? Z_DATA_ERROR : Z_OK; } -/* ========================================================================= - * Copy the source state to the destination state. - */ -#if 0 -int zlib_deflateCopy ( - z_streamp dest, - z_streamp source -) -{ -#ifdef MAXSEG_64K - return Z_STREAM_ERROR; -#else - deflate_state *ds; - deflate_state *ss; - ush *overlay; - deflate_workspace *mem; - - - if (source == NULL || dest == NULL || source->state == NULL) { - return Z_STREAM_ERROR; - } - - ss = (deflate_state *) source->state; - - *dest = *source; - - mem = (deflate_workspace *) dest->workspace; - - ds = &(mem->deflate_memory); - - dest->state = (struct internal_state *) ds; - *ds = *ss; - ds->strm = dest; - - ds->window = (Byte *) mem->window_memory; - ds->prev = (Pos *) mem->prev_memory; - ds->head = (Pos *) mem->head_memory; - overlay = (ush *) mem->overlay_memory; - ds->pending_buf = (uch *) overlay; - - memcpy(ds->window, ss->window, ds->w_size * 2 * sizeof(Byte)); - memcpy(ds->prev, ss->prev, ds->w_size * sizeof(Pos)); - memcpy(ds->head, ss->head, ds->hash_size * sizeof(Pos)); - memcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size); - - ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf); - ds->d_buf = overlay + ds->lit_bufsize/sizeof(ush); - ds->l_buf = ds->pending_buf + (1+sizeof(ush))*ds->lit_bufsize; - - ds->l_desc.dyn_tree = ds->dyn_ltree; - ds->d_desc.dyn_tree = ds->dyn_dtree; - ds->bl_desc.dyn_tree = ds->bl_tree; - - return Z_OK; -#endif -} -#endif /* 0 */ - /* =========================================================================== * Read a new buffer from the current input stream, update the adler32 * and total number of bytes read. All deflate() input goes through diff --git a/lib/zlib_inflate/inflate.c b/lib/zlib_inflate/inflate.c index f5ce87b0800e..58a733b10387 100644 --- a/lib/zlib_inflate/inflate.c +++ b/lib/zlib_inflate/inflate.c @@ -45,21 +45,6 @@ int zlib_inflateReset(z_streamp strm) return Z_OK; } -#if 0 -int zlib_inflatePrime(z_streamp strm, int bits, int value) -{ - struct inflate_state *state; - - if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR; - state = (struct inflate_state *)strm->state; - if (bits > 16 || state->bits + bits > 32) return Z_STREAM_ERROR; - value &= (1L << bits) - 1; - state->hold += value << state->bits; - state->bits += bits; - return Z_OK; -} -#endif - int zlib_inflateInit2(z_streamp strm, int windowBits) { struct inflate_state *state; @@ -761,123 +746,6 @@ int zlib_inflateEnd(z_streamp strm) return Z_OK; } -#if 0 -int zlib_inflateSetDictionary(z_streamp strm, const Byte *dictionary, - uInt dictLength) -{ - struct inflate_state *state; - unsigned long id; - - /* check state */ - if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR; - state = (struct inflate_state *)strm->state; - if (state->wrap != 0 && state->mode != DICT) - return Z_STREAM_ERROR; - - /* check for correct dictionary id */ - if (state->mode == DICT) { - id = zlib_adler32(0L, NULL, 0); - id = zlib_adler32(id, dictionary, dictLength); - if (id != state->check) - return Z_DATA_ERROR; - } - - /* copy dictionary to window */ - zlib_updatewindow(strm, strm->avail_out); - - if (dictLength > state->wsize) { - memcpy(state->window, dictionary + dictLength - state->wsize, - state->wsize); - state->whave = state->wsize; - } - else { - memcpy(state->window + state->wsize - dictLength, dictionary, - dictLength); - state->whave = dictLength; - } - state->havedict = 1; - return Z_OK; -} -#endif - -#if 0 -/* - Search buf[0..len-1] for the pattern: 0, 0, 0xff, 0xff. Return when found - or when out of input. When called, *have is the number of pattern bytes - found in order so far, in 0..3. On return *have is updated to the new - state. If on return *have equals four, then the pattern was found and the - return value is how many bytes were read including the last byte of the - pattern. If *have is less than four, then the pattern has not been found - yet and the return value is len. In the latter case, zlib_syncsearch() can be - called again with more data and the *have state. *have is initialized to - zero for the first call. - */ -static unsigned zlib_syncsearch(unsigned *have, unsigned char *buf, - unsigned len) -{ - unsigned got; - unsigned next; - - got = *have; - next = 0; - while (next < len && got < 4) { - if ((int)(buf[next]) == (got < 2 ? 0 : 0xff)) - got++; - else if (buf[next]) - got = 0; - else - got = 4 - got; - next++; - } - *have = got; - return next; -} -#endif - -#if 0 -int zlib_inflateSync(z_streamp strm) -{ - unsigned len; /* number of bytes to look at or looked at */ - unsigned long in, out; /* temporary to save total_in and total_out */ - unsigned char buf[4]; /* to restore bit buffer to byte string */ - struct inflate_state *state; - - /* check parameters */ - if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR; - state = (struct inflate_state *)strm->state; - if (strm->avail_in == 0 && state->bits < 8) return Z_BUF_ERROR; - - /* if first time, start search in bit buffer */ - if (state->mode != SYNC) { - state->mode = SYNC; - state->hold <<= state->bits & 7; - state->bits -= state->bits & 7; - len = 0; - while (state->bits >= 8) { - buf[len++] = (unsigned char)(state->hold); - state->hold >>= 8; - state->bits -= 8; - } - state->have = 0; - zlib_syncsearch(&(state->have), buf, len); - } - - /* search available input */ - len = zlib_syncsearch(&(state->have), strm->next_in, strm->avail_in); - strm->avail_in -= len; - strm->next_in += len; - strm->total_in += len; - - /* return no joy or set up to restart inflate() on a new block */ - if (state->have != 4) return Z_DATA_ERROR; - in = strm->total_in; out = strm->total_out; - zlib_inflateReset(strm); - strm->total_in = in; strm->total_out = out; - state->mode = TYPE; - return Z_OK; -} -#endif - /* * This subroutine adds the data at next_in/avail_in to the output history * without performing any output. The output buffer must be "caught up"; -- cgit v1.2.3 From b01250856b25f4417c51aa33afc451fbf7da1484 Mon Sep 17 00:00:00 2001 From: George Spelvin Date: Wed, 6 Aug 2014 16:09:23 -0700 Subject: lib: add lib/glob.c This is a helper function from drivers/ata/libata_core.c, where it is used to blacklist particular device models. It's being moved to lib/ so other drivers may use it for the same purpose. This implementation in non-recursive, so is safe for the kernel stack. [akpm@linux-foundation.org: fix sparse warning] Signed-off-by: George Spelvin Cc: Randy Dunlap Cc: Tejun Heo Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/glob.h | 9 ++++ lib/Kconfig | 19 ++++++++ lib/Makefile | 2 + lib/glob.c | 123 +++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 153 insertions(+) create mode 100644 include/linux/glob.h create mode 100644 lib/glob.c (limited to 'include/linux') diff --git a/include/linux/glob.h b/include/linux/glob.h new file mode 100644 index 000000000000..861d8347d08e --- /dev/null +++ b/include/linux/glob.h @@ -0,0 +1,9 @@ +#ifndef _LINUX_GLOB_H +#define _LINUX_GLOB_H + +#include /* For bool */ +#include /* For __pure */ + +bool __pure glob_match(char const *pat, char const *str); + +#endif /* _LINUX_GLOB_H */ diff --git a/lib/Kconfig b/lib/Kconfig index a8a775730c09..41bfeec72e40 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -396,6 +396,25 @@ config CPU_RMAP config DQL bool +config GLOB + bool +# This actually supports modular compilation, but the module overhead +# is ridiculous for the amount of code involved. Until an out-of-tree +# driver asks for it, we'll just link it directly it into the kernel +# when required. Since we're ignoring out-of-tree users, there's also +# no need bother prompting for a manual decision: +# prompt "glob_match() function" + help + This option provides a glob_match function for performing + simple text pattern matching. It originated in the ATA code + to blacklist particular drive models, but other device drivers + may need similar functionality. + + All drivers in the Linux kernel tree that require this function + should automatically select this option. Say N unless you + are compiling an out-of tree driver which tells you that it + depends on this. + # # Netlink attribute parsing support is select'ed if needed # diff --git a/lib/Makefile b/lib/Makefile index 8427df95dade..d6b4bc496408 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -137,6 +137,8 @@ obj-$(CONFIG_CORDIC) += cordic.o obj-$(CONFIG_DQL) += dynamic_queue_limits.o +obj-$(CONFIG_GLOB) += glob.o + obj-$(CONFIG_MPILIB) += mpi/ obj-$(CONFIG_SIGNATURE) += digsig.o diff --git a/lib/glob.c b/lib/glob.c new file mode 100644 index 000000000000..0ba3ea86b546 --- /dev/null +++ b/lib/glob.c @@ -0,0 +1,123 @@ +#include +#include + +/* + * The only reason this code can be compiled as a module is because the + * ATA code that depends on it can be as well. In practice, they're + * both usually compiled in and the module overhead goes away. + */ +MODULE_DESCRIPTION("glob(7) matching"); +MODULE_LICENSE("Dual MIT/GPL"); + +/** + * glob_match - Shell-style pattern matching, like !fnmatch(pat, str, 0) + * @pat: Shell-style pattern to match, e.g. "*.[ch]". + * @str: String to match. The pattern must match the entire string. + * + * Perform shell-style glob matching, returning true (1) if the match + * succeeds, or false (0) if it fails. Equivalent to !fnmatch(@pat, @str, 0). + * + * Pattern metacharacters are ?, *, [ and \. + * (And, inside character classes, !, - and ].) + * + * This is small and simple implementation intended for device blacklists + * where a string is matched against a number of patterns. Thus, it + * does not preprocess the patterns. It is non-recursive, and run-time + * is at most quadratic: strlen(@str)*strlen(@pat). + * + * An example of the worst case is glob_match("*aaaaa", "aaaaaaaaaa"); + * it takes 6 passes over the pattern before matching the string. + * + * Like !fnmatch(@pat, @str, 0) and unlike the shell, this does NOT + * treat / or leading . specially; it isn't actually used for pathnames. + * + * Note that according to glob(7) (and unlike bash), character classes + * are complemented by a leading !; this does not support the regex-style + * [^a-z] syntax. + * + * An opening bracket without a matching close is matched literally. + */ +bool __pure glob_match(char const *pat, char const *str) +{ + /* + * Backtrack to previous * on mismatch and retry starting one + * character later in the string. Because * matches all characters + * (no exception for /), it can be easily proved that there's + * never a need to backtrack multiple levels. + */ + char const *back_pat = NULL, *back_str = back_str; + + /* + * Loop over each token (character or class) in pat, matching + * it against the remaining unmatched tail of str. Return false + * on mismatch, or true after matching the trailing nul bytes. + */ + for (;;) { + unsigned char c = *str++; + unsigned char d = *pat++; + + switch (d) { + case '?': /* Wildcard: anything but nul */ + if (c == '\0') + return false; + break; + case '*': /* Any-length wildcard */ + if (*pat == '\0') /* Optimize trailing * case */ + return true; + back_pat = pat; + back_str = --str; /* Allow zero-length match */ + break; + case '[': { /* Character class */ + bool match = false, inverted = (*pat == '!'); + char const *class = pat + inverted; + unsigned char a = *class++; + + /* + * Iterate over each span in the character class. + * A span is either a single character a, or a + * range a-b. The first span may begin with ']'. + */ + do { + unsigned char b = a; + + if (a == '\0') /* Malformed */ + goto literal; + + if (class[0] == '-' && class[1] != ']') { + b = class[1]; + + if (b == '\0') + goto literal; + + class += 2; + /* Any special action if a > b? */ + } + match |= (a <= c && c <= b); + } while ((a = *class++) != ']'); + + if (match == inverted) + goto backtrack; + pat = class; + } + break; + case '\\': + d = *pat++; + /*FALLTHROUGH*/ + default: /* Literal character */ +literal: + if (c == d) { + if (d == '\0') + return true; + break; + } +backtrack: + if (c == '\0' || !back_pat) + return false; /* No point continuing */ + /* Try again from last *, one character later in str. */ + pat = back_pat; + str = ++back_str; + break; + } + } +} +EXPORT_SYMBOL(glob_match); -- cgit v1.2.3 From 087face5265026d4fe664bdb580f4904bd10cfbf Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 6 Aug 2014 16:09:36 -0700 Subject: kernel.h: remove deprecated pack_hex_byte It's been nearly 3 years now since commit 55036ba76b2d ("lib: rename pack_hex_byte() to hex_byte_pack()") so it's time to remove this deprecated and unused static inline. Signed-off-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index a9e2268ecccb..3dc22abbc68a 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -493,11 +493,6 @@ static inline char *hex_byte_pack_upper(char *buf, u8 byte) return buf; } -static inline char * __deprecated pack_hex_byte(char *buf, u8 byte) -{ - return hex_byte_pack(buf, byte); -} - extern int hex_to_bin(char ch); extern int __must_check hex2bin(u8 *dst, const char *src, size_t count); -- cgit v1.2.3 From 0679cc483669d08153d158273455398a389ee9ca Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Wed, 6 Aug 2014 16:09:49 -0700 Subject: lib: bitmap: make nbits parameter of bitmap_empty unsigned Many functions in lib/bitmap.c start with an expression such as lim = bits/BITS_PER_LONG. Since bits has type (signed) int, and since gcc cannot know that it is in fact non-negative, it generates worse code than it could. These patches, mostly consisting of changing various parameters to unsigned, gives a slight overall code reduction: add/remove: 1/1 grow/shrink: 8/16 up/down: 251/-414 (-163) function old new delta tick_device_uses_broadcast 335 425 +90 __irq_alloc_descs 498 554 +56 __bitmap_andnot 73 115 +42 __bitmap_and 70 101 +31 bitmap_weight - 11 +11 copy_hugetlb_page_range 752 762 +10 follow_hugetlb_page 846 854 +8 hugetlb_init 1415 1417 +2 hugetlb_nrpages_setup 130 131 +1 hugetlb_add_hstate 377 376 -1 bitmap_allocate_region 82 80 -2 select_task_rq_fair 2202 2191 -11 hweight_long 66 55 -11 __reg_op 230 219 -11 dm_stats_message 2849 2833 -16 bitmap_parselist 92 74 -18 __bitmap_weight 115 97 -18 __bitmap_subset 153 129 -24 __bitmap_full 128 104 -24 __bitmap_empty 120 96 -24 bitmap_set 179 149 -30 bitmap_clear 185 155 -30 __bitmap_equal 136 105 -31 __bitmap_intersects 148 108 -40 __bitmap_complement 109 67 -42 tick_device_setup_broadcast_func.isra 81 - -81 [The increases in __bitmap_and{,not} are due to bug fixes 17/18,18/18. No idea why bitmap_weight suddenly appears.] While 163 bytes treewide is insignificant, I believe the bitmap functions are often called with locks held, so saving even a few cycles might be worth it. While making these changes, I found a few other things that might be worth including. 16,17,18 are actual bug fixes. The rest shouldn't change the behaviour of any of the functions, provided no-one passed negative nbits values. If something should come up, it should be fairly bisectable. A few issues I thought about, but didn't know what to do with: * Many of the functions misbehave if nbits is compile-time 0; the out-of-line functions generally handle 0 correctly. bitmap_fill() is particularly bad, whether the 0 is known at compile time or not. It would probably be nice to add detection of at least compile-time 0 and handle that appropriately. * I didn't change __bitmap_shift_{left,right} to use unsigned because I want to fully understand why the algorithm works before making that change. However, AFAICT, they behave correctly for all (positive) shift amounts. This is not the case for the small_const_nbits versions. If for example nbits = n = BITS_PER_LONG, the shift operators turn into no-ops (at least on x86), so one get *dst = *src, whereas one would expect to get *dst=0. That difference in behaviour is somewhat annoying. This patch (of 18): The compiler can generate slightly smaller and simpler code when it knows that "nbits" is non-negative. Since no-one passes a negative bit-count, this shouldn't affect the semantics. Signed-off-by: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitmap.h | 4 ++-- lib/bitmap.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 7ad634501e48..3d3fd6b2f157 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -88,7 +88,7 @@ * lib/bitmap.c provides these functions: */ -extern int __bitmap_empty(const unsigned long *bitmap, int bits); +extern int __bitmap_empty(const unsigned long *bitmap, unsigned int nbits); extern int __bitmap_full(const unsigned long *bitmap, int bits); extern int __bitmap_equal(const unsigned long *bitmap1, const unsigned long *bitmap2, int bits); @@ -257,7 +257,7 @@ static inline int bitmap_subset(const unsigned long *src1, return __bitmap_subset(src1, src2, nbits); } -static inline int bitmap_empty(const unsigned long *src, int nbits) +static inline int bitmap_empty(const unsigned long *src, unsigned nbits) { if (small_const_nbits(nbits)) return ! (*src & BITMAP_LAST_WORD_MASK(nbits)); diff --git a/lib/bitmap.c b/lib/bitmap.c index 06f7e4fe8d2d..378911001442 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -40,9 +40,9 @@ * for the best explanations of this ordering. */ -int __bitmap_empty(const unsigned long *bitmap, int bits) +int __bitmap_empty(const unsigned long *bitmap, unsigned int bits) { - int k, lim = bits/BITS_PER_LONG; + unsigned int k, lim = bits/BITS_PER_LONG; for (k = 0; k < lim; ++k) if (bitmap[k]) return 0; -- cgit v1.2.3 From 8397927c8045c58afc68ef839855eb5505259df3 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Wed, 6 Aug 2014 16:09:51 -0700 Subject: lib: bitmap: make nbits parameter of bitmap_full unsigned The compiler can generate slightly smaller and simpler code when it knows that "nbits" is non-negative. Since no-one passes a negative bit-count, this shouldn't affect the semantics. Signed-off-by: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitmap.h | 4 ++-- lib/bitmap.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 3d3fd6b2f157..bc7e520d3f78 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -89,7 +89,7 @@ */ extern int __bitmap_empty(const unsigned long *bitmap, unsigned int nbits); -extern int __bitmap_full(const unsigned long *bitmap, int bits); +extern int __bitmap_full(const unsigned long *bitmap, unsigned int nbits); extern int __bitmap_equal(const unsigned long *bitmap1, const unsigned long *bitmap2, int bits); extern void __bitmap_complement(unsigned long *dst, const unsigned long *src, @@ -265,7 +265,7 @@ static inline int bitmap_empty(const unsigned long *src, unsigned nbits) return __bitmap_empty(src, nbits); } -static inline int bitmap_full(const unsigned long *src, int nbits) +static inline int bitmap_full(const unsigned long *src, unsigned int nbits) { if (small_const_nbits(nbits)) return ! (~(*src) & BITMAP_LAST_WORD_MASK(nbits)); diff --git a/lib/bitmap.c b/lib/bitmap.c index 378911001442..9859f38660f9 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -55,9 +55,9 @@ int __bitmap_empty(const unsigned long *bitmap, unsigned int bits) } EXPORT_SYMBOL(__bitmap_empty); -int __bitmap_full(const unsigned long *bitmap, int bits) +int __bitmap_full(const unsigned long *bitmap, unsigned int bits) { - int k, lim = bits/BITS_PER_LONG; + unsigned int k, lim = bits/BITS_PER_LONG; for (k = 0; k < lim; ++k) if (~bitmap[k]) return 0; -- cgit v1.2.3 From 5e068069319a9fb02fb14337c2cedeae5f16d812 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Wed, 6 Aug 2014 16:09:53 -0700 Subject: lib: bitmap: make nbits parameter of bitmap_equal unsigned The compiler can generate slightly smaller and simpler code when it knows that "nbits" is non-negative. Since no-one passes a negative bit-count, this shouldn't affect the semantics. Signed-off-by: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitmap.h | 2 +- lib/bitmap.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index bc7e520d3f78..1e0f46c91125 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -91,7 +91,7 @@ extern int __bitmap_empty(const unsigned long *bitmap, unsigned int nbits); extern int __bitmap_full(const unsigned long *bitmap, unsigned int nbits); extern int __bitmap_equal(const unsigned long *bitmap1, - const unsigned long *bitmap2, int bits); + const unsigned long *bitmap2, unsigned int nbits); extern void __bitmap_complement(unsigned long *dst, const unsigned long *src, int bits); extern void __bitmap_shift_right(unsigned long *dst, diff --git a/lib/bitmap.c b/lib/bitmap.c index 9859f38660f9..d6bb955e71cb 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -71,9 +71,9 @@ int __bitmap_full(const unsigned long *bitmap, unsigned int bits) EXPORT_SYMBOL(__bitmap_full); int __bitmap_equal(const unsigned long *bitmap1, - const unsigned long *bitmap2, int bits) + const unsigned long *bitmap2, unsigned int bits) { - int k, lim = bits/BITS_PER_LONG; + unsigned int k, lim = bits/BITS_PER_LONG; for (k = 0; k < lim; ++k) if (bitmap1[k] != bitmap2[k]) return 0; -- cgit v1.2.3 From 3d6684f4e6a46f3a8263f5681e093bccbb767a1c Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Wed, 6 Aug 2014 16:09:55 -0700 Subject: lib: bitmap: make nbits parameter of bitmap_complement unsigned The compiler can generate slightly smaller and simpler code when it knows that "nbits" is non-negative. Since no-one passes a negative bit-count, this shouldn't affect the semantics. Signed-off-by: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitmap.h | 6 +++--- lib/bitmap.c | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 1e0f46c91125..21fb52ffe444 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -93,7 +93,7 @@ extern int __bitmap_full(const unsigned long *bitmap, unsigned int nbits); extern int __bitmap_equal(const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int nbits); extern void __bitmap_complement(unsigned long *dst, const unsigned long *src, - int bits); + unsigned int nbits); extern void __bitmap_shift_right(unsigned long *dst, const unsigned long *src, int shift, int bits); extern void __bitmap_shift_left(unsigned long *dst, @@ -222,7 +222,7 @@ static inline int bitmap_andnot(unsigned long *dst, const unsigned long *src1, } static inline void bitmap_complement(unsigned long *dst, const unsigned long *src, - int nbits) + unsigned int nbits) { if (small_const_nbits(nbits)) *dst = ~(*src) & BITMAP_LAST_WORD_MASK(nbits); @@ -231,7 +231,7 @@ static inline void bitmap_complement(unsigned long *dst, const unsigned long *sr } static inline int bitmap_equal(const unsigned long *src1, - const unsigned long *src2, int nbits) + const unsigned long *src2, unsigned int nbits) { if (small_const_nbits(nbits)) return ! ((*src1 ^ *src2) & BITMAP_LAST_WORD_MASK(nbits)); diff --git a/lib/bitmap.c b/lib/bitmap.c index d6bb955e71cb..0f2f845702eb 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -86,9 +86,9 @@ int __bitmap_equal(const unsigned long *bitmap1, } EXPORT_SYMBOL(__bitmap_equal); -void __bitmap_complement(unsigned long *dst, const unsigned long *src, int bits) +void __bitmap_complement(unsigned long *dst, const unsigned long *src, unsigned int bits) { - int k, lim = bits/BITS_PER_LONG; + unsigned int k, lim = bits/BITS_PER_LONG; for (k = 0; k < lim; ++k) dst[k] = ~src[k]; -- cgit v1.2.3 From 65b4ee62c9cd10640f0054f47fd84c7920e8c118 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Wed, 6 Aug 2014 16:09:57 -0700 Subject: lib: bitmap: remove unnecessary mask from bitmap_complement Since the extra bits are "don't care", there is no reason to mask the last word to the used bits when complementing. This shaves off yet a few bytes. Signed-off-by: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitmap.h | 2 +- lib/bitmap.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 21fb52ffe444..f42d72d5fe82 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -225,7 +225,7 @@ static inline void bitmap_complement(unsigned long *dst, const unsigned long *sr unsigned int nbits) { if (small_const_nbits(nbits)) - *dst = ~(*src) & BITMAP_LAST_WORD_MASK(nbits); + *dst = ~(*src); else __bitmap_complement(dst, src, nbits); } diff --git a/lib/bitmap.c b/lib/bitmap.c index 0f2f845702eb..4387e3c092fd 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -93,7 +93,7 @@ void __bitmap_complement(unsigned long *dst, const unsigned long *src, unsigned dst[k] = ~src[k]; if (bits % BITS_PER_LONG) - dst[k] = ~src[k] & BITMAP_LAST_WORD_MASK(bits); + dst[k] = ~src[k]; } EXPORT_SYMBOL(__bitmap_complement); -- cgit v1.2.3 From 2f9305eb31097fdd3dc86daca65d8097d1fcf2ff Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Wed, 6 Aug 2014 16:09:59 -0700 Subject: lib: bitmap: make nbits parameter of bitmap_{and,or,xor,andnot} unsigned This change is only for consistency with the changes to the other bitmap_* functions; it doesn't change the size of the generated code: inside BITS_TO_LONGS there is a sizeof(long), which causes bits to be interpreted as unsigned anyway. Signed-off-by: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitmap.h | 16 ++++++++-------- lib/bitmap.c | 24 ++++++++++++------------ 2 files changed, 20 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index f42d72d5fe82..7048782fe5b9 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -99,13 +99,13 @@ extern void __bitmap_shift_right(unsigned long *dst, extern void __bitmap_shift_left(unsigned long *dst, const unsigned long *src, int shift, int bits); extern int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1, - const unsigned long *bitmap2, int bits); + const unsigned long *bitmap2, unsigned int nbits); extern void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1, - const unsigned long *bitmap2, int bits); + const unsigned long *bitmap2, unsigned int nbits); extern void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1, - const unsigned long *bitmap2, int bits); + const unsigned long *bitmap2, unsigned int nbits); extern int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1, - const unsigned long *bitmap2, int bits); + const unsigned long *bitmap2, unsigned int nbits); extern int __bitmap_intersects(const unsigned long *bitmap1, const unsigned long *bitmap2, int bits); extern int __bitmap_subset(const unsigned long *bitmap1, @@ -188,7 +188,7 @@ static inline void bitmap_copy(unsigned long *dst, const unsigned long *src, } static inline int bitmap_and(unsigned long *dst, const unsigned long *src1, - const unsigned long *src2, int nbits) + const unsigned long *src2, unsigned int nbits) { if (small_const_nbits(nbits)) return (*dst = *src1 & *src2) != 0; @@ -196,7 +196,7 @@ static inline int bitmap_and(unsigned long *dst, const unsigned long *src1, } static inline void bitmap_or(unsigned long *dst, const unsigned long *src1, - const unsigned long *src2, int nbits) + const unsigned long *src2, unsigned int nbits) { if (small_const_nbits(nbits)) *dst = *src1 | *src2; @@ -205,7 +205,7 @@ static inline void bitmap_or(unsigned long *dst, const unsigned long *src1, } static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1, - const unsigned long *src2, int nbits) + const unsigned long *src2, unsigned int nbits) { if (small_const_nbits(nbits)) *dst = *src1 ^ *src2; @@ -214,7 +214,7 @@ static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1, } static inline int bitmap_andnot(unsigned long *dst, const unsigned long *src1, - const unsigned long *src2, int nbits) + const unsigned long *src2, unsigned int nbits) { if (small_const_nbits(nbits)) return (*dst = *src1 & ~(*src2)) != 0; diff --git a/lib/bitmap.c b/lib/bitmap.c index 4387e3c092fd..03207373cc5a 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -182,10 +182,10 @@ void __bitmap_shift_left(unsigned long *dst, EXPORT_SYMBOL(__bitmap_shift_left); int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1, - const unsigned long *bitmap2, int bits) + const unsigned long *bitmap2, unsigned int bits) { - int k; - int nr = BITS_TO_LONGS(bits); + unsigned int k; + unsigned int nr = BITS_TO_LONGS(bits); unsigned long result = 0; for (k = 0; k < nr; k++) @@ -195,10 +195,10 @@ int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1, EXPORT_SYMBOL(__bitmap_and); void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1, - const unsigned long *bitmap2, int bits) + const unsigned long *bitmap2, unsigned int bits) { - int k; - int nr = BITS_TO_LONGS(bits); + unsigned int k; + unsigned int nr = BITS_TO_LONGS(bits); for (k = 0; k < nr; k++) dst[k] = bitmap1[k] | bitmap2[k]; @@ -206,10 +206,10 @@ void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1, EXPORT_SYMBOL(__bitmap_or); void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1, - const unsigned long *bitmap2, int bits) + const unsigned long *bitmap2, unsigned int bits) { - int k; - int nr = BITS_TO_LONGS(bits); + unsigned int k; + unsigned int nr = BITS_TO_LONGS(bits); for (k = 0; k < nr; k++) dst[k] = bitmap1[k] ^ bitmap2[k]; @@ -217,10 +217,10 @@ void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1, EXPORT_SYMBOL(__bitmap_xor); int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1, - const unsigned long *bitmap2, int bits) + const unsigned long *bitmap2, unsigned int bits) { - int k; - int nr = BITS_TO_LONGS(bits); + unsigned int k; + unsigned int nr = BITS_TO_LONGS(bits); unsigned long result = 0; for (k = 0; k < nr; k++) -- cgit v1.2.3 From 6dfe9799c2a03d225316a3e959b0447f3f50303e Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Wed, 6 Aug 2014 16:10:01 -0700 Subject: lib: bitmap: make nbits parameter of bitmap_intersects unsigned The compiler can generate slightly smaller and simpler code when it knows that "nbits" is non-negative. Since no-one passes a negative bit-count, this shouldn't affect the semantics. Signed-off-by: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitmap.h | 4 ++-- lib/bitmap.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 7048782fe5b9..2f3f3a4d5996 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -107,7 +107,7 @@ extern void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1, extern int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int nbits); extern int __bitmap_intersects(const unsigned long *bitmap1, - const unsigned long *bitmap2, int bits); + const unsigned long *bitmap2, unsigned int nbits); extern int __bitmap_subset(const unsigned long *bitmap1, const unsigned long *bitmap2, int bits); extern int __bitmap_weight(const unsigned long *bitmap, int bits); @@ -240,7 +240,7 @@ static inline int bitmap_equal(const unsigned long *src1, } static inline int bitmap_intersects(const unsigned long *src1, - const unsigned long *src2, int nbits) + const unsigned long *src2, unsigned int nbits) { if (small_const_nbits(nbits)) return ((*src1 & *src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0; diff --git a/lib/bitmap.c b/lib/bitmap.c index 03207373cc5a..e85daa90b237 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -230,9 +230,9 @@ int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1, EXPORT_SYMBOL(__bitmap_andnot); int __bitmap_intersects(const unsigned long *bitmap1, - const unsigned long *bitmap2, int bits) + const unsigned long *bitmap2, unsigned int bits) { - int k, lim = bits/BITS_PER_LONG; + unsigned int k, lim = bits/BITS_PER_LONG; for (k = 0; k < lim; ++k) if (bitmap1[k] & bitmap2[k]) return 1; -- cgit v1.2.3 From 5be20213e855550de2b32fde6fc116f74bab86a6 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Wed, 6 Aug 2014 16:10:03 -0700 Subject: lib: bitmap: make nbits parameter of bitmap_subset unsigned The compiler can generate slightly smaller and simpler code when it knows that "nbits" is non-negative. Since no-one passes a negative bit-count, this shouldn't affect the semantics. Signed-off-by: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitmap.h | 4 ++-- lib/bitmap.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 2f3f3a4d5996..87e88f79def1 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -109,7 +109,7 @@ extern int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1, extern int __bitmap_intersects(const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int nbits); extern int __bitmap_subset(const unsigned long *bitmap1, - const unsigned long *bitmap2, int bits); + const unsigned long *bitmap2, unsigned int nbits); extern int __bitmap_weight(const unsigned long *bitmap, int bits); extern void bitmap_set(unsigned long *map, int i, int len); @@ -249,7 +249,7 @@ static inline int bitmap_intersects(const unsigned long *src1, } static inline int bitmap_subset(const unsigned long *src1, - const unsigned long *src2, int nbits) + const unsigned long *src2, unsigned int nbits) { if (small_const_nbits(nbits)) return ! ((*src1 & ~(*src2)) & BITMAP_LAST_WORD_MASK(nbits)); diff --git a/lib/bitmap.c b/lib/bitmap.c index e85daa90b237..c9bff5379795 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -245,9 +245,9 @@ int __bitmap_intersects(const unsigned long *bitmap1, EXPORT_SYMBOL(__bitmap_intersects); int __bitmap_subset(const unsigned long *bitmap1, - const unsigned long *bitmap2, int bits) + const unsigned long *bitmap2, unsigned int bits) { - int k, lim = bits/BITS_PER_LONG; + unsigned int k, lim = bits/BITS_PER_LONG; for (k = 0; k < lim; ++k) if (bitmap1[k] & ~bitmap2[k]) return 0; -- cgit v1.2.3 From 877d9f3b63ac2e5dbc51cbcdff156433f03b3a32 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Wed, 6 Aug 2014 16:10:05 -0700 Subject: lib: bitmap: make nbits parameter of bitmap_weight unsigned The compiler can generate slightly smaller and simpler code when it knows that "nbits" is non-negative. Since no-one passes a negative bit-count, this shouldn't affect the semantics. I didn't change the return type, since that might change the semantics of some expression containing a call to bitmap_weight(). Certainly an int is capable of holding the result. Signed-off-by: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitmap.h | 4 ++-- lib/bitmap.c | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 87e88f79def1..64b0ebe9f9a8 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -110,7 +110,7 @@ extern int __bitmap_intersects(const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int nbits); extern int __bitmap_subset(const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int nbits); -extern int __bitmap_weight(const unsigned long *bitmap, int bits); +extern int __bitmap_weight(const unsigned long *bitmap, unsigned int nbits); extern void bitmap_set(unsigned long *map, int i, int len); extern void bitmap_clear(unsigned long *map, int start, int nr); @@ -273,7 +273,7 @@ static inline int bitmap_full(const unsigned long *src, unsigned int nbits) return __bitmap_full(src, nbits); } -static inline int bitmap_weight(const unsigned long *src, int nbits) +static inline int bitmap_weight(const unsigned long *src, unsigned int nbits) { if (small_const_nbits(nbits)) return hweight_long(*src & BITMAP_LAST_WORD_MASK(nbits)); diff --git a/lib/bitmap.c b/lib/bitmap.c index c9bff5379795..f69435c23f9c 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -259,9 +259,10 @@ int __bitmap_subset(const unsigned long *bitmap1, } EXPORT_SYMBOL(__bitmap_subset); -int __bitmap_weight(const unsigned long *bitmap, int bits) +int __bitmap_weight(const unsigned long *bitmap, unsigned int bits) { - int k, w = 0, lim = bits/BITS_PER_LONG; + unsigned int k, lim = bits/BITS_PER_LONG; + int w = 0; for (k = 0; k < lim; k++) w += hweight_long(bitmap[k]); -- cgit v1.2.3 From fb5ac54263ef3fcb5c469a61e0ab6b06e45e2307 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Wed, 6 Aug 2014 16:10:07 -0700 Subject: lib: bitmap: make the start index of bitmap_set unsigned The compiler can generate slightly smaller and simpler code when it knows that "start" is non-negative. Also, use the names "start" and "len" for the two parameters in both header file and implementation, instead of the previous mix. Signed-off-by: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitmap.h | 2 +- lib/bitmap.c | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 64b0ebe9f9a8..ad2c67d3583e 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -112,7 +112,7 @@ extern int __bitmap_subset(const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int nbits); extern int __bitmap_weight(const unsigned long *bitmap, unsigned int nbits); -extern void bitmap_set(unsigned long *map, int i, int len); +extern void bitmap_set(unsigned long *map, unsigned int start, int len); extern void bitmap_clear(unsigned long *map, int start, int nr); extern unsigned long bitmap_find_next_zero_area(unsigned long *map, unsigned long size, diff --git a/lib/bitmap.c b/lib/bitmap.c index f69435c23f9c..2a3a92fc3355 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -274,21 +274,21 @@ int __bitmap_weight(const unsigned long *bitmap, unsigned int bits) } EXPORT_SYMBOL(__bitmap_weight); -void bitmap_set(unsigned long *map, int start, int nr) +void bitmap_set(unsigned long *map, unsigned int start, int len) { unsigned long *p = map + BIT_WORD(start); - const int size = start + nr; + const unsigned int size = start + len; int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG); unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start); - while (nr - bits_to_set >= 0) { + while (len - bits_to_set >= 0) { *p |= mask_to_set; - nr -= bits_to_set; + len -= bits_to_set; bits_to_set = BITS_PER_LONG; mask_to_set = ~0UL; p++; } - if (nr) { + if (len) { mask_to_set &= BITMAP_LAST_WORD_MASK(size); *p |= mask_to_set; } -- cgit v1.2.3 From 154f5e38f30f262025c8c2e825376f6eb51e8bcb Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Wed, 6 Aug 2014 16:10:10 -0700 Subject: lib: bitmap: make the start index of bitmap_clear unsigned The compiler can generate slightly smaller and simpler code when it knows that "start" is non-negative. Also, use the names "start" and "len" for the two parameters for consistency with bitmap_set. Signed-off-by: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitmap.h | 2 +- lib/bitmap.c | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index ad2c67d3583e..83c1c7d25073 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -113,7 +113,7 @@ extern int __bitmap_subset(const unsigned long *bitmap1, extern int __bitmap_weight(const unsigned long *bitmap, unsigned int nbits); extern void bitmap_set(unsigned long *map, unsigned int start, int len); -extern void bitmap_clear(unsigned long *map, int start, int nr); +extern void bitmap_clear(unsigned long *map, unsigned int start, int len); extern unsigned long bitmap_find_next_zero_area(unsigned long *map, unsigned long size, unsigned long start, diff --git a/lib/bitmap.c b/lib/bitmap.c index 2a3a92fc3355..5d2540396300 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -295,21 +295,21 @@ void bitmap_set(unsigned long *map, unsigned int start, int len) } EXPORT_SYMBOL(bitmap_set); -void bitmap_clear(unsigned long *map, int start, int nr) +void bitmap_clear(unsigned long *map, unsigned int start, int len) { unsigned long *p = map + BIT_WORD(start); - const int size = start + nr; + const unsigned int size = start + len; int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG); unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start); - while (nr - bits_to_clear >= 0) { + while (len - bits_to_clear >= 0) { *p &= ~mask_to_clear; - nr -= bits_to_clear; + len -= bits_to_clear; bits_to_clear = BITS_PER_LONG; mask_to_clear = ~0UL; p++; } - if (nr) { + if (len) { mask_to_clear &= BITMAP_LAST_WORD_MASK(size); *p &= ~mask_to_clear; } -- cgit v1.2.3 From 9279d3286e10736766edcaf815ae10e00856e448 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Wed, 6 Aug 2014 16:10:16 -0700 Subject: lib: bitmap: change parameter of bitmap_*_region to unsigned Changing the pos parameter of __reg_op to unsigned allows the compiler to generate slightly smaller and simpler code. Also update its callers bitmap_*_region to receive and pass unsigned int. The return types of bitmap_find_free_region and bitmap_allocate_region are still int to allow a negative error code to be returned. An int is certainly capable of representing any realistic return value. Signed-off-by: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitmap.h | 6 +++--- lib/bitmap.c | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 83c1c7d25073..210037833356 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -140,9 +140,9 @@ extern void bitmap_onto(unsigned long *dst, const unsigned long *orig, const unsigned long *relmap, int bits); extern void bitmap_fold(unsigned long *dst, const unsigned long *orig, int sz, int bits); -extern int bitmap_find_free_region(unsigned long *bitmap, int bits, int order); -extern void bitmap_release_region(unsigned long *bitmap, int pos, int order); -extern int bitmap_allocate_region(unsigned long *bitmap, int pos, int order); +extern int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order); +extern void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order); +extern int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order); extern void bitmap_copy_le(void *dst, const unsigned long *src, int nbits); extern int bitmap_ord_to_pos(const unsigned long *bitmap, int n, int bits); diff --git a/lib/bitmap.c b/lib/bitmap.c index 2714df9f5cdb..c2f3807b3601 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -1042,7 +1042,7 @@ enum { REG_OP_RELEASE, /* clear all bits in region */ }; -static int __reg_op(unsigned long *bitmap, int pos, int order, int reg_op) +static int __reg_op(unsigned long *bitmap, unsigned int pos, int order, int reg_op) { int nbits_reg; /* number of bits in region */ int index; /* index first long of region in bitmap */ @@ -1108,11 +1108,11 @@ done: * Return the bit offset in bitmap of the allocated region, * or -errno on failure. */ -int bitmap_find_free_region(unsigned long *bitmap, int bits, int order) +int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order) { - int pos, end; /* scans bitmap by regions of size order */ + unsigned int pos, end; /* scans bitmap by regions of size order */ - for (pos = 0 ; (end = pos + (1 << order)) <= bits; pos = end) { + for (pos = 0 ; (end = pos + (1U << order)) <= bits; pos = end) { if (!__reg_op(bitmap, pos, order, REG_OP_ISFREE)) continue; __reg_op(bitmap, pos, order, REG_OP_ALLOC); @@ -1133,7 +1133,7 @@ EXPORT_SYMBOL(bitmap_find_free_region); * * No return value. */ -void bitmap_release_region(unsigned long *bitmap, int pos, int order) +void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order) { __reg_op(bitmap, pos, order, REG_OP_RELEASE); } @@ -1150,7 +1150,7 @@ EXPORT_SYMBOL(bitmap_release_region); * Return 0 on success, or %-EBUSY if specified region wasn't * free (not all bits were zero). */ -int bitmap_allocate_region(unsigned long *bitmap, int pos, int order) +int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order) { if (!__reg_op(bitmap, pos, order, REG_OP_ISFREE)) return -EBUSY; -- cgit v1.2.3 From c5341ec8904ebff50f365a2626da6ab525d63b9e Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Wed, 6 Aug 2014 16:10:20 -0700 Subject: lib: bitmap: add missing mask in bitmap_shift_right There is no guarantee that *src does not contain garbage bits outside the lower nbits, so we need to mask it before the shift-and-assign. Signed-off-by: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitmap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 210037833356..75df61d9ecfb 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -284,7 +284,7 @@ static inline void bitmap_shift_right(unsigned long *dst, const unsigned long *src, int n, int nbits) { if (small_const_nbits(nbits)) - *dst = *src >> n; + *dst = (*src & BITMAP_LAST_WORD_MASK(nbits)) >> n; else __bitmap_shift_right(dst, src, n, nbits); } -- cgit v1.2.3 From 7e5f97d1927f41affa21aa5b321865ceab1994ce Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Wed, 6 Aug 2014 16:10:22 -0700 Subject: lib: bitmap: add missing mask in bitmap_and Apparently, bitmap_and is supposed to return whether the new bitmap is empty. But it didn't take potential garbage bits in the last word into account. Signed-off-by: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitmap.h | 2 +- lib/bitmap.c | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 75df61d9ecfb..3399a9ecd991 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -191,7 +191,7 @@ static inline int bitmap_and(unsigned long *dst, const unsigned long *src1, const unsigned long *src2, unsigned int nbits) { if (small_const_nbits(nbits)) - return (*dst = *src1 & *src2) != 0; + return (*dst = *src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits)) != 0; return __bitmap_and(dst, src1, src2, nbits); } diff --git a/lib/bitmap.c b/lib/bitmap.c index faaf7206d4cf..ce2ec80bf431 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -185,11 +185,14 @@ int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int bits) { unsigned int k; - unsigned int nr = BITS_TO_LONGS(bits); + unsigned int lim = bits/BITS_PER_LONG; unsigned long result = 0; - for (k = 0; k < nr; k++) + for (k = 0; k < lim; k++) result |= (dst[k] = bitmap1[k] & bitmap2[k]); + if (bits % BITS_PER_LONG) + result |= (dst[k] = bitmap1[k] & bitmap2[k] & + BITMAP_LAST_WORD_MASK(bits)); return result != 0; } EXPORT_SYMBOL(__bitmap_and); -- cgit v1.2.3 From 74e765319084bd2940a9612ada961f0f7385936c Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Wed, 6 Aug 2014 16:10:24 -0700 Subject: lib: bitmap: add missing mask in bitmap_andnot Apparently, bitmap_andnot is supposed to return whether the new bitmap is empty. But it didn't take potential garbage bits in the last word into account. Signed-off-by: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitmap.h | 2 +- lib/bitmap.c | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 3399a9ecd991..e1c8d080c427 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -217,7 +217,7 @@ static inline int bitmap_andnot(unsigned long *dst, const unsigned long *src1, const unsigned long *src2, unsigned int nbits) { if (small_const_nbits(nbits)) - return (*dst = *src1 & ~(*src2)) != 0; + return (*dst = *src1 & ~(*src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0; return __bitmap_andnot(dst, src1, src2, nbits); } diff --git a/lib/bitmap.c b/lib/bitmap.c index ce2ec80bf431..1e031f2c9aba 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -223,11 +223,14 @@ int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int bits) { unsigned int k; - unsigned int nr = BITS_TO_LONGS(bits); + unsigned int lim = bits/BITS_PER_LONG; unsigned long result = 0; - for (k = 0; k < nr; k++) + for (k = 0; k < lim; k++) result |= (dst[k] = bitmap1[k] & ~bitmap2[k]); + if (bits % BITS_PER_LONG) + result |= (dst[k] = bitmap1[k] & ~bitmap2[k] & + BITMAP_LAST_WORD_MASK(bits)); return result != 0; } EXPORT_SYMBOL(__bitmap_andnot); -- cgit v1.2.3 From b3ea074fd3c798bee861aa076dc2f873461ae26f Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Mon, 4 Aug 2014 13:05:56 +0900 Subject: gpio: add missing includes in machine.h linux/types.h and linux/list.h should be included so the typed used in the header file are always properly declared. Reported-by: Stephen Rothwell Reported-by: Thierry Reding Signed-off-by: Alexandre Courbot Signed-off-by: Linus Walleij --- include/linux/gpio/machine.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/gpio/machine.h b/include/linux/gpio/machine.h index b8ad87fab4ce..e2706140eaff 100644 --- a/include/linux/gpio/machine.h +++ b/include/linux/gpio/machine.h @@ -1,6 +1,9 @@ #ifndef __LINUX_GPIO_MACHINE_H #define __LINUX_GPIO_MACHINE_H +#include +#include + enum gpio_lookup_flags { GPIO_ACTIVE_HIGH = (0 << 0), GPIO_ACTIVE_LOW = (1 << 0), -- cgit v1.2.3 From ed44724b79d8e03a40665436019cf22baba80d30 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 19 Apr 2014 14:37:20 -0400 Subject: acct: switch to __kernel_write() Signed-off-by: Al Viro --- fs/internal.h | 1 - include/linux/fs.h | 1 + kernel/acct.c | 31 ++++++++++++------------------- 3 files changed, 13 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/fs/internal.h b/fs/internal.h index 465742407466..9a2edba87c2b 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -131,7 +131,6 @@ extern long prune_dcache_sb(struct super_block *sb, unsigned long nr_to_scan, /* * read_write.c */ -extern ssize_t __kernel_write(struct file *, const char *, size_t, loff_t *); extern int rw_verify_area(int, struct file *, const loff_t *, size_t); /* diff --git a/include/linux/fs.h b/include/linux/fs.h index e11d60cc867b..4b7d57cf7863 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2335,6 +2335,7 @@ extern int do_pipe_flags(int *, int); extern int kernel_read(struct file *, loff_t, char *, unsigned long); extern ssize_t kernel_write(struct file *, const char *, size_t, loff_t); +extern ssize_t __kernel_write(struct file *, const char *, size_t, loff_t *); extern struct file * open_exec(const char *); /* fs/dcache.c -- generic fs support functions */ diff --git a/kernel/acct.c b/kernel/acct.c index 807ebc5d8333..8082d9875d6b 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -456,12 +456,16 @@ static void do_acct_process(struct bsd_acct_struct *acct, { struct pacct_struct *pacct = ¤t->signal->pacct; acct_t ac; - mm_segment_t fs; unsigned long flim; u64 elapsed, run_time; struct tty_struct *tty; const struct cred *orig_cred; + /* + * Accounting records are not subject to resource limits. + */ + flim = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; + current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY; /* Perform file operations on behalf of whoever enabled accounting */ orig_cred = override_creds(file->f_cred); @@ -536,25 +540,14 @@ static void do_acct_process(struct bsd_acct_struct *acct, * Get freeze protection. If the fs is frozen, just skip the write * as we could deadlock the system otherwise. */ - if (!file_start_write_trylock(file)) - goto out; - /* - * Kernel segment override to datasegment and write it - * to the accounting file. - */ - fs = get_fs(); - set_fs(KERNEL_DS); - /* - * Accounting records are not subject to resource limits. - */ - flim = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; - current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY; - file->f_op->write(file, (char *)&ac, - sizeof(acct_t), &file->f_pos); - current->signal->rlim[RLIMIT_FSIZE].rlim_cur = flim; - set_fs(fs); - file_end_write(file); + if (file_start_write_trylock(file)) { + /* it's been opened O_APPEND, so position is irrelevant */ + loff_t pos = 0; + __kernel_write(file, (char *)&ac, sizeof(acct_t), &pos); + file_end_write(file); + } out: + current->signal->rlim[RLIMIT_FSIZE].rlim_cur = flim; revert_creds(orig_cred); } -- cgit v1.2.3 From 215752fce31c80f3b3a1530bc7cddb3ba6a69b3a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 7 Aug 2014 06:23:41 -0400 Subject: acct: get rid of acct_list Put these suckers on per-vfsmount and per-superblock lists instead. Note: right now it's still acct_lock for everything, but that's going to change. Signed-off-by: Al Viro --- fs/mount.h | 1 + fs/namespace.c | 2 +- fs/super.c | 2 +- include/linux/acct.h | 6 +-- include/linux/fs.h | 1 + kernel/acct.c | 135 +++++++++++++++++++++------------------------------ 6 files changed, 62 insertions(+), 85 deletions(-) (limited to 'include/linux') diff --git a/fs/mount.h b/fs/mount.h index d55297f2fa05..0a2d1458681f 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -56,6 +56,7 @@ struct mount { int mnt_group_id; /* peer group identifier */ int mnt_expiry_mark; /* true if marked for expiry */ int mnt_pinned; + struct hlist_head mnt_pins; struct path mnt_ex_mountpoint; }; diff --git a/fs/namespace.c b/fs/namespace.c index 182bc41cd887..22e530addfaf 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -956,7 +956,7 @@ put_again: mnt->mnt_pinned = 0; rcu_read_unlock(); unlock_mount_hash(); - acct_auto_close_mnt(&mnt->mnt); + acct_auto_close_mnt(&mnt->mnt_pins); goto put_again; } if (unlikely(mnt->mnt.mnt_flags & MNT_DOOMED)) { diff --git a/fs/super.c b/fs/super.c index d20d5b11dedf..52ed93eb63df 100644 --- a/fs/super.c +++ b/fs/super.c @@ -703,7 +703,7 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) #endif if (flags & MS_RDONLY) - acct_auto_close(sb); + acct_auto_close(&sb->s_pins); shrink_dcache_sb(sb); remount_ro = (flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY); diff --git a/include/linux/acct.h b/include/linux/acct.h index 4a5b7cb56079..65a4f889182e 100644 --- a/include/linux/acct.h +++ b/include/linux/acct.h @@ -24,14 +24,14 @@ struct super_block; struct pacct_struct; struct pid_namespace; extern int acct_parm[]; /* for sysctl */ -extern void acct_auto_close_mnt(struct vfsmount *m); -extern void acct_auto_close(struct super_block *sb); +extern void acct_auto_close(struct hlist_head *); +extern void acct_auto_close_mnt(struct hlist_head *); extern void acct_collect(long exitcode, int group_dead); extern void acct_process(void); extern void acct_exit_ns(struct pid_namespace *); #else -#define acct_auto_close_mnt(x) do { } while (0) #define acct_auto_close(x) do { } while (0) +#define acct_auto_close_mnt(x) do { } while (0) #define acct_collect(x,y) do { } while (0) #define acct_process() do { } while (0) #define acct_exit_ns(ns) do { } while (0) diff --git a/include/linux/fs.h b/include/linux/fs.h index 4b7d57cf7863..17f70872a4a5 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1250,6 +1250,7 @@ struct super_block { /* AIO completions deferred from interrupt context */ struct workqueue_struct *s_dio_done_wq; + struct hlist_head s_pins; /* * Keep the lru lists last in the structure so they always sit on their diff --git a/kernel/acct.c b/kernel/acct.c index 019f012a3c6f..21fbb3c27c2a 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -59,6 +59,7 @@ #include #include /* sector_div */ #include +#include <../fs/mount.h> /* will go away when we refactor */ /* * These constants control the amount of freespace that suspend and @@ -79,16 +80,16 @@ static void do_acct_process(struct bsd_acct_struct *acct); struct bsd_acct_struct { long count; + struct hlist_node s_list; + struct hlist_node m_list; struct mutex lock; int active; unsigned long needcheck; struct file *file; struct pid_namespace *ns; - struct list_head list; }; static DEFINE_SPINLOCK(acct_lock); -static LIST_HEAD(acct_list); /* * Check the amount of free space and suspend/resume accordingly. @@ -133,25 +134,33 @@ static void acct_put(struct bsd_acct_struct *p) spin_unlock(&acct_lock); } -static struct bsd_acct_struct *acct_get(struct bsd_acct_struct **p) +static struct bsd_acct_struct *__acct_get(struct bsd_acct_struct *res) +{ + res->count++; + spin_unlock(&acct_lock); + mutex_lock(&res->lock); + if (!res->ns) { + mutex_unlock(&res->lock); + spin_lock(&acct_lock); + if (!--res->count) + kfree(res); + return NULL; + } + return res; +} + +static struct bsd_acct_struct *acct_get(struct pid_namespace *ns) { struct bsd_acct_struct *res; spin_lock(&acct_lock); again: - res = *p; - if (res) - res->count++; - spin_unlock(&acct_lock); - if (res) { - mutex_lock(&res->lock); - if (!res->ns) { - mutex_unlock(&res->lock); - spin_lock(&acct_lock); - if (!--res->count) - kfree(res); - goto again; - } + if (!ns->bacct) { + spin_unlock(&acct_lock); + return NULL; } + res = __acct_get(ns->bacct); + if (!res) + goto again; return res; } @@ -162,7 +171,8 @@ static void acct_kill(struct bsd_acct_struct *acct, struct file *file = acct->file; struct pid_namespace *ns = acct->ns; spin_lock(&acct_lock); - list_del(&acct->list); + hlist_del(&acct->m_list); + hlist_del(&acct->s_list); mnt_unpin(file->f_path.mnt); spin_unlock(&acct_lock); do_acct_process(acct); @@ -170,8 +180,10 @@ static void acct_kill(struct bsd_acct_struct *acct, spin_lock(&acct_lock); ns->bacct = new; if (new) { - mnt_pin(new->file->f_path.mnt); - list_add(&new->list, &acct_list); + struct vfsmount *m = new->file->f_path.mnt; + mnt_pin(m); + hlist_add_head(&new->s_list, &m->mnt_sb->s_pins); + hlist_add_head(&new->m_list, &real_mount(m)->mnt_pins); } acct->ns = NULL; mutex_unlock(&acct->lock); @@ -218,14 +230,15 @@ static int acct_on(struct filename *pathname) mutex_init(&acct->lock); mnt = file->f_path.mnt; - old = acct_get(&ns->bacct); + old = acct_get(ns); if (old) { acct_kill(old, acct); } else { spin_lock(&acct_lock); ns->bacct = acct; mnt_pin(mnt); - list_add(&acct->list, &acct_list); + hlist_add_head(&acct->s_list, &mnt->mnt_sb->s_pins); + hlist_add_head(&acct->m_list, &real_mount(mnt)->mnt_pins); spin_unlock(&acct_lock); } mntput(mnt); /* it's pinned, now give up active reference */ @@ -261,79 +274,41 @@ SYSCALL_DEFINE1(acct, const char __user *, name) mutex_unlock(&acct_on_mutex); putname(tmp); } else { - acct_kill(acct_get(&task_active_pid_ns(current)->bacct), NULL); + acct_kill(acct_get(task_active_pid_ns(current)), NULL); } return error; } -/** - * acct_auto_close - turn off a filesystem's accounting if it is on - * @m: vfsmount being shut down - * - * If the accounting is turned on for a file in the subtree pointed to - * to by m, turn accounting off. Done when m is about to die. - */ -void acct_auto_close_mnt(struct vfsmount *m) +void acct_auto_close_mnt(struct hlist_head *list) { - struct bsd_acct_struct *acct; - - spin_lock(&acct_lock); -restart: - list_for_each_entry(acct, &acct_list, list) - if (acct->file->f_path.mnt == m) { - acct->count++; - spin_unlock(&acct_lock); - mutex_lock(&acct->lock); - if (!acct->ns) { - mutex_unlock(&acct->lock); - spin_lock(&acct_lock); - if (!--acct->count) - kfree(acct); - goto restart; - } - acct_kill(acct, NULL); - spin_lock(&acct_lock); - goto restart; - } + while (1) { + spin_lock(&acct_lock); + if (!list->first) + break; + acct_kill(__acct_get(hlist_entry(list->first, + struct bsd_acct_struct, + m_list)), NULL); + } spin_unlock(&acct_lock); } -/** - * acct_auto_close - turn off a filesystem's accounting if it is on - * @sb: super block for the filesystem - * - * If the accounting is turned on for a file in the filesystem pointed - * to by sb, turn accounting off. - */ -void acct_auto_close(struct super_block *sb) +void acct_auto_close(struct hlist_head *list) { - struct bsd_acct_struct *acct; - - spin_lock(&acct_lock); -restart: - list_for_each_entry(acct, &acct_list, list) - if (acct->file->f_path.dentry->d_sb == sb) { - acct->count++; - spin_unlock(&acct_lock); - mutex_lock(&acct->lock); - if (!acct->ns) { - mutex_unlock(&acct->lock); - spin_lock(&acct_lock); - if (!--acct->count) - kfree(acct); - goto restart; - } - acct_kill(acct, NULL); - spin_lock(&acct_lock); - goto restart; - } + while (1) { + spin_lock(&acct_lock); + if (!list->first) + break; + acct_kill(__acct_get(hlist_entry(list->first, + struct bsd_acct_struct, + s_list)), NULL); + } spin_unlock(&acct_lock); } void acct_exit_ns(struct pid_namespace *ns) { - acct_kill(acct_get(&ns->bacct), NULL); + acct_kill(acct_get(ns), NULL); } /* @@ -602,7 +577,7 @@ void acct_collect(long exitcode, int group_dead) static void slow_acct_process(struct pid_namespace *ns) { for ( ; ns; ns = ns->parent) { - struct bsd_acct_struct *acct = acct_get(&ns->bacct); + struct bsd_acct_struct *acct = acct_get(ns); if (acct) { do_acct_process(acct); mutex_unlock(&acct->lock); -- cgit v1.2.3 From efb170c22867cdc6f770de441bdefecec6712199 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 7 Aug 2014 08:39:04 -0400 Subject: take fs_pin stuff to fs/* Add a new field to fs_pin - kill(pin). That's what umount and r/o remount will be calling for all pins attached to vfsmount and superblock resp. Called after bumping the refcount, so it won't go away under us. Dropping the refcount is responsibility of the instance. All generic stuff moved to fs/fs_pin.c; the next step will rip all the knowledge of kernel/acct.c from fs/super.c and fs/namespace.c. After that - death to mnt_pin(); it was intended to be usable as generic mechanism for code that wants to attach objects to vfsmount, so that they would not make the sucker busy and would get killed on umount. Never got it right; it remained acct.c-specific all along. Now it's very close to being killable. Signed-off-by: Al Viro --- fs/Makefile | 2 +- fs/fs_pin.c | 77 ++++++++++++++++++++++++++++++ include/linux/acct.h | 6 +-- include/linux/fs_pin.h | 17 +++++++ kernel/acct.c | 127 +++++++++++++------------------------------------ 5 files changed, 129 insertions(+), 100 deletions(-) create mode 100644 fs/fs_pin.c create mode 100644 include/linux/fs_pin.h (limited to 'include/linux') diff --git a/fs/Makefile b/fs/Makefile index 4030cbfbc9af..90c88529892b 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -11,7 +11,7 @@ obj-y := open.o read_write.o file_table.o super.o \ attr.o bad_inode.o file.o filesystems.o namespace.o \ seq_file.o xattr.o libfs.o fs-writeback.o \ pnode.o splice.o sync.o utimes.o \ - stack.o fs_struct.o statfs.o + stack.o fs_struct.o statfs.o fs_pin.o ifeq ($(CONFIG_BLOCK),y) obj-y += buffer.o block_dev.o direct-io.o mpage.o diff --git a/fs/fs_pin.c b/fs/fs_pin.c new file mode 100644 index 000000000000..f3ce0b874a44 --- /dev/null +++ b/fs/fs_pin.c @@ -0,0 +1,77 @@ +#include +#include +#include +#include "mount.h" + +static void pin_free_rcu(struct rcu_head *head) +{ + kfree(container_of(head, struct fs_pin, rcu)); +} + +static DEFINE_SPINLOCK(pin_lock); + +void pin_put(struct fs_pin *p) +{ + if (atomic_long_dec_and_test(&p->count)) + call_rcu(&p->rcu, pin_free_rcu); +} + +void pin_remove(struct fs_pin *pin) +{ + spin_lock(&pin_lock); + hlist_del(&pin->m_list); + hlist_del(&pin->s_list); + spin_unlock(&pin_lock); +} + +void pin_insert(struct fs_pin *pin, struct vfsmount *m) +{ + spin_lock(&pin_lock); + hlist_add_head(&pin->s_list, &m->mnt_sb->s_pins); + hlist_add_head(&pin->m_list, &real_mount(m)->mnt_pins); + spin_unlock(&pin_lock); +} + +void acct_auto_close_mnt(struct hlist_head *list) +{ + while (1) { + struct hlist_node *p; + struct fs_pin *pin; + rcu_read_lock(); + p = ACCESS_ONCE(list->first); + if (!p) { + rcu_read_unlock(); + break; + } + pin = hlist_entry(p, struct fs_pin, m_list); + if (!atomic_long_inc_not_zero(&pin->count)) { + rcu_read_unlock(); + cpu_relax(); + continue; + } + rcu_read_unlock(); + pin->kill(pin); + } +} + +void acct_auto_close(struct hlist_head *list) +{ + while (1) { + struct hlist_node *p; + struct fs_pin *pin; + rcu_read_lock(); + p = ACCESS_ONCE(list->first); + if (!p) { + rcu_read_unlock(); + break; + } + pin = hlist_entry(p, struct fs_pin, s_list); + if (!atomic_long_inc_not_zero(&pin->count)) { + rcu_read_unlock(); + cpu_relax(); + continue; + } + rcu_read_unlock(); + pin->kill(pin); + } +} diff --git a/include/linux/acct.h b/include/linux/acct.h index 65a4f889182e..137837929dbe 100644 --- a/include/linux/acct.h +++ b/include/linux/acct.h @@ -24,18 +24,16 @@ struct super_block; struct pacct_struct; struct pid_namespace; extern int acct_parm[]; /* for sysctl */ -extern void acct_auto_close(struct hlist_head *); -extern void acct_auto_close_mnt(struct hlist_head *); extern void acct_collect(long exitcode, int group_dead); extern void acct_process(void); extern void acct_exit_ns(struct pid_namespace *); #else -#define acct_auto_close(x) do { } while (0) -#define acct_auto_close_mnt(x) do { } while (0) #define acct_collect(x,y) do { } while (0) #define acct_process() do { } while (0) #define acct_exit_ns(ns) do { } while (0) #endif +extern void acct_auto_close(struct hlist_head *); +extern void acct_auto_close_mnt(struct hlist_head *); /* * ACCT_VERSION numbers as yet defined: diff --git a/include/linux/fs_pin.h b/include/linux/fs_pin.h new file mode 100644 index 000000000000..f66525e72ccf --- /dev/null +++ b/include/linux/fs_pin.h @@ -0,0 +1,17 @@ +#include + +struct fs_pin { + atomic_long_t count; + union { + struct { + struct hlist_node s_list; + struct hlist_node m_list; + }; + struct rcu_head rcu; + }; + void (*kill)(struct fs_pin *); +}; + +void pin_put(struct fs_pin *); +void pin_remove(struct fs_pin *); +void pin_insert(struct fs_pin *, struct vfsmount *); diff --git a/kernel/acct.c b/kernel/acct.c index afeaaa6f49bf..a7993a6cb604 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -59,7 +59,7 @@ #include #include /* sector_div */ #include -#include <../fs/mount.h> /* will go away when we refactor */ +#include /* * These constants control the amount of freespace that suspend and @@ -78,17 +78,6 @@ int acct_parm[3] = {4, 2, 30}; */ static void do_acct_process(struct bsd_acct_struct *acct); -struct fs_pin { - atomic_long_t count; - union { - struct { - struct hlist_node s_list; - struct hlist_node m_list; - }; - struct rcu_head rcu; - }; -}; - struct bsd_acct_struct { struct fs_pin pin; struct mutex lock; @@ -100,13 +89,6 @@ struct bsd_acct_struct { struct completion done; }; -static void pin_free_rcu(struct rcu_head *head) -{ - kfree(container_of(head, struct fs_pin, rcu)); -} - -static DEFINE_SPINLOCK(acct_lock); - /* * Check the amount of free space and suspend/resume accordingly. */ @@ -142,29 +124,6 @@ out: return acct->active; } -static void pin_put(struct fs_pin *p) -{ - if (atomic_long_dec_and_test(&p->count)) - call_rcu(&p->rcu, pin_free_rcu); -} - -static struct bsd_acct_struct *__acct_get(struct bsd_acct_struct *res) -{ - if (!atomic_long_inc_not_zero(&res->pin.count)) { - rcu_read_unlock(); - cpu_relax(); - return NULL; - } - rcu_read_unlock(); - mutex_lock(&res->lock); - if (!res->ns) { - mutex_unlock(&res->lock); - pin_put(&res->pin); - return NULL; - } - return res; -} - static struct bsd_acct_struct *acct_get(struct pid_namespace *ns) { struct bsd_acct_struct *res; @@ -176,9 +135,18 @@ again: rcu_read_unlock(); return NULL; } - res = __acct_get(res); - if (!res) + if (!atomic_long_inc_not_zero(&res->pin.count)) { + rcu_read_unlock(); + cpu_relax(); goto again; + } + rcu_read_unlock(); + mutex_lock(&res->lock); + if (!res->ns) { + mutex_unlock(&res->lock); + pin_put(&res->pin); + goto again; + } return res; } @@ -203,19 +171,8 @@ static void acct_kill(struct bsd_acct_struct *acct, init_completion(&acct->done); schedule_work(&acct->work); wait_for_completion(&acct->done); - spin_lock(&acct_lock); - hlist_del(&acct->pin.m_list); - hlist_del(&acct->pin.s_list); - spin_unlock(&acct_lock); + pin_remove(&acct->pin); ns->bacct = new; - if (new) { - struct vfsmount *m = new->file->f_path.mnt; - spin_lock(&acct_lock); - hlist_add_head(&new->pin.s_list, &m->mnt_sb->s_pins); - hlist_add_head(&new->pin.m_list, &real_mount(m)->mnt_pins); - spin_unlock(&acct_lock); - mutex_unlock(&new->lock); - } acct->ns = NULL; atomic_long_dec(&acct->pin.count); mutex_unlock(&acct->lock); @@ -223,6 +180,19 @@ static void acct_kill(struct bsd_acct_struct *acct, } } +static void acct_pin_kill(struct fs_pin *pin) +{ + struct bsd_acct_struct *acct; + acct = container_of(pin, struct bsd_acct_struct, pin); + mutex_lock(&acct->lock); + if (!acct->ns) { + mutex_unlock(&acct->lock); + pin_put(pin); + acct = NULL; + } + acct_kill(acct, NULL); +} + static int acct_on(struct filename *pathname) { struct file *file; @@ -254,25 +224,22 @@ static int acct_on(struct filename *pathname) } atomic_long_set(&acct->pin.count, 1); + acct->pin.kill = acct_pin_kill; acct->file = file; acct->needcheck = jiffies; acct->ns = ns; mutex_init(&acct->lock); mnt = file->f_path.mnt; mnt_pin(mnt); + mutex_lock_nested(&acct->lock, 1); /* nobody has seen it yet */ + pin_insert(&acct->pin, mnt); old = acct_get(ns); - mutex_lock_nested(&acct->lock, 1); /* nobody has seen it yet */ - if (old) { + if (old) acct_kill(old, acct); - } else { + else ns->bacct = acct; - spin_lock(&acct_lock); - hlist_add_head(&acct->pin.s_list, &mnt->mnt_sb->s_pins); - hlist_add_head(&acct->pin.m_list, &real_mount(mnt)->mnt_pins); - spin_unlock(&acct_lock); - mutex_unlock(&acct->lock); - } + mutex_unlock(&acct->lock); mntput(mnt); /* it's pinned, now give up active reference */ return 0; } @@ -312,36 +279,6 @@ SYSCALL_DEFINE1(acct, const char __user *, name) return error; } -void acct_auto_close_mnt(struct hlist_head *list) -{ - rcu_read_lock(); - while (1) { - struct hlist_node *p = ACCESS_ONCE(list->first); - if (!p) - break; - acct_kill(__acct_get(hlist_entry(p, - struct bsd_acct_struct, - pin.m_list)), NULL); - rcu_read_lock(); - } - rcu_read_unlock(); -} - -void acct_auto_close(struct hlist_head *list) -{ - rcu_read_lock(); - while (1) { - struct hlist_node *p = ACCESS_ONCE(list->first); - if (!p) - break; - acct_kill(__acct_get(hlist_entry(p, - struct bsd_acct_struct, - pin.s_list)), NULL); - rcu_read_lock(); - } - rcu_read_unlock(); -} - void acct_exit_ns(struct pid_namespace *ns) { acct_kill(acct_get(ns), NULL); -- cgit v1.2.3 From 8fa1f1c2bd86007beb4a4845e6087ac4a704dc80 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 21 May 2014 18:22:52 -0400 Subject: make fs/{namespace,super}.c forget about acct.h These externs belong in fs/internal.h. Rename (they are not acct-specific anymore) and move them over there. Signed-off-by: Al Viro --- fs/fs_pin.c | 9 +++++---- fs/internal.h | 6 ++++++ fs/namespace.c | 3 +-- fs/super.c | 3 +-- include/linux/acct.h | 2 -- 5 files changed, 13 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/fs/fs_pin.c b/fs/fs_pin.c index f3ce0b874a44..9368236ca100 100644 --- a/fs/fs_pin.c +++ b/fs/fs_pin.c @@ -1,6 +1,7 @@ #include #include #include +#include "internal.h" #include "mount.h" static void pin_free_rcu(struct rcu_head *head) @@ -32,13 +33,13 @@ void pin_insert(struct fs_pin *pin, struct vfsmount *m) spin_unlock(&pin_lock); } -void acct_auto_close_mnt(struct hlist_head *list) +void mnt_pin_kill(struct mount *m) { while (1) { struct hlist_node *p; struct fs_pin *pin; rcu_read_lock(); - p = ACCESS_ONCE(list->first); + p = ACCESS_ONCE(m->mnt_pins.first); if (!p) { rcu_read_unlock(); break; @@ -54,13 +55,13 @@ void acct_auto_close_mnt(struct hlist_head *list) } } -void acct_auto_close(struct hlist_head *list) +void sb_pin_kill(struct super_block *sb) { while (1) { struct hlist_node *p; struct fs_pin *pin; rcu_read_lock(); - p = ACCESS_ONCE(list->first); + p = ACCESS_ONCE(sb->s_pins.first); if (!p) { rcu_read_unlock(); break; diff --git a/fs/internal.h b/fs/internal.h index 9a2edba87c2b..e325b4f9c799 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -143,3 +143,9 @@ extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, * pipe.c */ extern const struct file_operations pipefifo_fops; + +/* + * fs_pin.c + */ +extern void sb_pin_kill(struct super_block *sb); +extern void mnt_pin_kill(struct mount *m); diff --git a/fs/namespace.c b/fs/namespace.c index 22e530addfaf..0e4ce51c5277 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -16,7 +16,6 @@ #include #include #include -#include /* acct_auto_close_mnt */ #include /* init_rootfs */ #include /* get_fs_root et.al. */ #include /* fsnotify_vfsmount_delete */ @@ -956,7 +955,7 @@ put_again: mnt->mnt_pinned = 0; rcu_read_unlock(); unlock_mount_hash(); - acct_auto_close_mnt(&mnt->mnt_pins); + mnt_pin_kill(mnt); goto put_again; } if (unlikely(mnt->mnt.mnt_flags & MNT_DOOMED)) { diff --git a/fs/super.c b/fs/super.c index a369f8964dc1..a371ce6aa919 100644 --- a/fs/super.c +++ b/fs/super.c @@ -22,7 +22,6 @@ #include #include -#include #include #include #include @@ -707,7 +706,7 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) if (remount_ro) { if (sb->s_pins.first) { up_write(&sb->s_umount); - acct_auto_close(&sb->s_pins); + sb_pin_kill(sb); down_write(&sb->s_umount); if (!sb->s_root) return 0; diff --git a/include/linux/acct.h b/include/linux/acct.h index 137837929dbe..dccc2d4fe7de 100644 --- a/include/linux/acct.h +++ b/include/linux/acct.h @@ -32,8 +32,6 @@ extern void acct_exit_ns(struct pid_namespace *); #define acct_process() do { } while (0) #define acct_exit_ns(ns) do { } while (0) #endif -extern void acct_auto_close(struct hlist_head *); -extern void acct_auto_close_mnt(struct hlist_head *); /* * ACCT_VERSION numbers as yet defined: -- cgit v1.2.3 From 3064c3563ba4c23e2c7a47254ec056ed9ba0098a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 7 Aug 2014 09:12:31 -0400 Subject: death to mnt_pinned Rather than playing silly buggers with vfsmount refcounts, just have acct_on() ask fs/namespace.c for internal clone of file->f_path.mnt and replace it with said clone. Then attach the pin to original vfsmount. Voila - the clone will be alive until the file gets closed, making sure that underlying superblock remains active, etc., and we can drop the original vfsmount, so that it's not kept busy. If the file lives until the final mntput of the original vfsmount, we'll notice that there's an fs_pin (one in bsd_acct_struct that holds that file) and mnt_pin_kill() will take it out. Since ->kill() is synchronous, we won't proceed past that point until these files are closed (and private clones of our vfsmount are gone), so we get the same ordering warranties we used to get. mnt_pin()/mnt_unpin()/->mnt_pinned is gone now, and good riddance - it never became usable outside of kernel/acct.c (and racy wrt umount even there). Signed-off-by: Al Viro --- fs/mount.h | 1 - fs/namespace.c | 35 +++++++++-------------------------- include/linux/mount.h | 4 ++-- kernel/acct.c | 24 +++++++++++++++++++----- 4 files changed, 30 insertions(+), 34 deletions(-) (limited to 'include/linux') diff --git a/fs/mount.h b/fs/mount.h index 0a2d1458681f..6740a6215529 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -55,7 +55,6 @@ struct mount { int mnt_id; /* mount identifier */ int mnt_group_id; /* peer group identifier */ int mnt_expiry_mark; /* true if marked for expiry */ - int mnt_pinned; struct hlist_head mnt_pins; struct path mnt_ex_mountpoint; }; diff --git a/fs/namespace.c b/fs/namespace.c index 0e4ce51c5277..65af9d0e0d67 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -937,7 +937,6 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, static void mntput_no_expire(struct mount *mnt) { -put_again: rcu_read_lock(); mnt_add_count(mnt, -1); if (likely(mnt->mnt_ns)) { /* shouldn't be the last one */ @@ -950,14 +949,6 @@ put_again: unlock_mount_hash(); return; } - if (unlikely(mnt->mnt_pinned)) { - mnt_add_count(mnt, mnt->mnt_pinned + 1); - mnt->mnt_pinned = 0; - rcu_read_unlock(); - unlock_mount_hash(); - mnt_pin_kill(mnt); - goto put_again; - } if (unlikely(mnt->mnt.mnt_flags & MNT_DOOMED)) { rcu_read_unlock(); unlock_mount_hash(); @@ -980,6 +971,8 @@ put_again: * so mnt_get_writers() below is safe. */ WARN_ON(mnt_get_writers(mnt)); + if (unlikely(mnt->mnt_pins.first)) + mnt_pin_kill(mnt); fsnotify_vfsmount_delete(&mnt->mnt); dput(mnt->mnt.mnt_root); deactivate_super(mnt->mnt.mnt_sb); @@ -1007,25 +1000,15 @@ struct vfsmount *mntget(struct vfsmount *mnt) } EXPORT_SYMBOL(mntget); -void mnt_pin(struct vfsmount *mnt) +struct vfsmount *mnt_clone_internal(struct path *path) { - lock_mount_hash(); - real_mount(mnt)->mnt_pinned++; - unlock_mount_hash(); -} -EXPORT_SYMBOL(mnt_pin); - -void mnt_unpin(struct vfsmount *m) -{ - struct mount *mnt = real_mount(m); - lock_mount_hash(); - if (mnt->mnt_pinned) { - mnt_add_count(mnt, 1); - mnt->mnt_pinned--; - } - unlock_mount_hash(); + struct mount *p; + p = clone_mnt(real_mount(path->mnt), path->dentry, CL_PRIVATE); + if (IS_ERR(p)) + return ERR_CAST(p); + p->mnt.mnt_flags |= MNT_INTERNAL; + return &p->mnt; } -EXPORT_SYMBOL(mnt_unpin); static inline void mangle(struct seq_file *m, const char *s) { diff --git a/include/linux/mount.h b/include/linux/mount.h index 839bac270904..864b120c1345 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -62,6 +62,7 @@ struct vfsmount { }; struct file; /* forward dec */ +struct path; extern int mnt_want_write(struct vfsmount *mnt); extern int mnt_want_write_file(struct file *file); @@ -70,8 +71,7 @@ extern void mnt_drop_write(struct vfsmount *mnt); extern void mnt_drop_write_file(struct file *file); extern void mntput(struct vfsmount *mnt); extern struct vfsmount *mntget(struct vfsmount *mnt); -extern void mnt_pin(struct vfsmount *mnt); -extern void mnt_unpin(struct vfsmount *mnt); +extern struct vfsmount *mnt_clone_internal(struct path *path); extern int __mnt_is_readonly(struct vfsmount *mnt); struct file_system_type; diff --git a/kernel/acct.c b/kernel/acct.c index a7993a6cb604..2e6cf818021d 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -154,7 +154,6 @@ static void close_work(struct work_struct *work) { struct bsd_acct_struct *acct = container_of(work, struct bsd_acct_struct, work); struct file *file = acct->file; - mnt_unpin(file->f_path.mnt); if (file->f_op->flush) file->f_op->flush(file, NULL); __fput_sync(file); @@ -196,9 +195,10 @@ static void acct_pin_kill(struct fs_pin *pin) static int acct_on(struct filename *pathname) { struct file *file; - struct vfsmount *mnt; + struct vfsmount *mnt, *internal; struct pid_namespace *ns = task_active_pid_ns(current); struct bsd_acct_struct *acct, *old; + int err; acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL); if (!acct) @@ -222,6 +222,21 @@ static int acct_on(struct filename *pathname) filp_close(file, NULL); return -EIO; } + internal = mnt_clone_internal(&file->f_path); + if (IS_ERR(internal)) { + kfree(acct); + filp_close(file, NULL); + return PTR_ERR(internal); + } + err = mnt_want_write(internal); + if (err) { + mntput(internal); + kfree(acct); + filp_close(file, NULL); + return err; + } + mnt = file->f_path.mnt; + file->f_path.mnt = internal; atomic_long_set(&acct->pin.count, 1); acct->pin.kill = acct_pin_kill; @@ -229,8 +244,6 @@ static int acct_on(struct filename *pathname) acct->needcheck = jiffies; acct->ns = ns; mutex_init(&acct->lock); - mnt = file->f_path.mnt; - mnt_pin(mnt); mutex_lock_nested(&acct->lock, 1); /* nobody has seen it yet */ pin_insert(&acct->pin, mnt); @@ -240,7 +253,8 @@ static int acct_on(struct filename *pathname) else ns->bacct = acct; mutex_unlock(&acct->lock); - mntput(mnt); /* it's pinned, now give up active reference */ + mnt_drop_write(mnt); + mntput(mnt); return 0; } -- cgit v1.2.3 From 1a0a397e41cb1bf70cfe45fd0eeff08c7c501ec0 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 14 Feb 2014 17:35:37 -0500 Subject: dcache: d_obtain_alias callers don't all want DISCONNECTED There are a few d_obtain_alias callers that are using it to get the root of a filesystem which may already have an alias somewhere else. This is not the same as the filehandle-lookup case, and none of them actually need DCACHE_DISCONNECTED set. It isn't really a serious problem, but it would really be clearer if we reserved DCACHE_DISCONNECTED for those cases where it's actually needed. In the btrfs case this was causing a spurious printk from nfsd/nfsfh.c:fh_verify when it found an unexpected DCACHE_DISCONNECTED dentry. Josef worked around this by unsetting DCACHE_DISCONNECTED manually in 3a0dfa6a12e "Btrfs: unset DCACHE_DISCONNECTED when mounting default subvol", and this replaces that workaround. Cc: Josef Bacik Signed-off-by: J. Bruce Fields Signed-off-by: Al Viro --- fs/btrfs/super.c | 9 +------ fs/ceph/super.c | 2 +- fs/dcache.c | 69 +++++++++++++++++++++++++++++++++++--------------- fs/nfs/getroot.c | 2 +- fs/nilfs2/super.c | 2 +- include/linux/dcache.h | 1 + 6 files changed, 54 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 8e16bca69c56..67b48b9a03e0 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -851,7 +851,6 @@ static struct dentry *get_default_root(struct super_block *sb, struct btrfs_path *path; struct btrfs_key location; struct inode *inode; - struct dentry *dentry; u64 dir_id; int new = 0; @@ -922,13 +921,7 @@ setup_root: return dget(sb->s_root); } - dentry = d_obtain_alias(inode); - if (!IS_ERR(dentry)) { - spin_lock(&dentry->d_lock); - dentry->d_flags &= ~DCACHE_DISCONNECTED; - spin_unlock(&dentry->d_lock); - } - return dentry; + return d_obtain_root(inode); } static int btrfs_fill_super(struct super_block *sb, diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 06150fd745ac..f6e12377335c 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -755,7 +755,7 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc, goto out; } } else { - root = d_obtain_alias(inode); + root = d_obtain_root(inode); } ceph_init_dentry(root); dout("open_root_inode success, root dentry is %p\n", root); diff --git a/fs/dcache.c b/fs/dcache.c index 3ed095363997..63d556c0e698 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1781,25 +1781,7 @@ struct dentry *d_find_any_alias(struct inode *inode) } EXPORT_SYMBOL(d_find_any_alias); -/** - * d_obtain_alias - find or allocate a dentry for a given inode - * @inode: inode to allocate the dentry for - * - * Obtain a dentry for an inode resulting from NFS filehandle conversion or - * similar open by handle operations. The returned dentry may be anonymous, - * or may have a full name (if the inode was already in the cache). - * - * When called on a directory inode, we must ensure that the inode only ever - * has one dentry. If a dentry is found, that is returned instead of - * allocating a new one. - * - * On successful return, the reference to the inode has been transferred - * to the dentry. In case of an error the reference on the inode is released. - * To make it easier to use in export operations a %NULL or IS_ERR inode may - * be passed in and will be the error will be propagate to the return value, - * with a %NULL @inode replaced by ERR_PTR(-ESTALE). - */ -struct dentry *d_obtain_alias(struct inode *inode) +struct dentry *__d_obtain_alias(struct inode *inode, int disconnected) { static const struct qstr anonstring = QSTR_INIT("/", 1); struct dentry *tmp; @@ -1830,7 +1812,10 @@ struct dentry *d_obtain_alias(struct inode *inode) } /* attach a disconnected dentry */ - add_flags = d_flags_for_inode(inode) | DCACHE_DISCONNECTED; + add_flags = d_flags_for_inode(inode); + + if (disconnected) + add_flags |= DCACHE_DISCONNECTED; spin_lock(&tmp->d_lock); tmp->d_inode = inode; @@ -1851,8 +1836,52 @@ struct dentry *d_obtain_alias(struct inode *inode) iput(inode); return res; } + +/** + * d_obtain_alias - find or allocate a DISCONNECTED dentry for a given inode + * @inode: inode to allocate the dentry for + * + * Obtain a dentry for an inode resulting from NFS filehandle conversion or + * similar open by handle operations. The returned dentry may be anonymous, + * or may have a full name (if the inode was already in the cache). + * + * When called on a directory inode, we must ensure that the inode only ever + * has one dentry. If a dentry is found, that is returned instead of + * allocating a new one. + * + * On successful return, the reference to the inode has been transferred + * to the dentry. In case of an error the reference on the inode is released. + * To make it easier to use in export operations a %NULL or IS_ERR inode may + * be passed in and the error will be propagated to the return value, + * with a %NULL @inode replaced by ERR_PTR(-ESTALE). + */ +struct dentry *d_obtain_alias(struct inode *inode) +{ + return __d_obtain_alias(inode, 1); +} EXPORT_SYMBOL(d_obtain_alias); +/** + * d_obtain_root - find or allocate a dentry for a given inode + * @inode: inode to allocate the dentry for + * + * Obtain an IS_ROOT dentry for the root of a filesystem. + * + * We must ensure that directory inodes only ever have one dentry. If a + * dentry is found, that is returned instead of allocating a new one. + * + * On successful return, the reference to the inode has been transferred + * to the dentry. In case of an error the reference on the inode is + * released. A %NULL or IS_ERR inode may be passed in and will be the + * error will be propagate to the return value, with a %NULL @inode + * replaced by ERR_PTR(-ESTALE). + */ +struct dentry *d_obtain_root(struct inode *inode) +{ + return __d_obtain_alias(inode, 0); +} +EXPORT_SYMBOL(d_obtain_root); + /** * d_add_ci - lookup or allocate new dentry with case-exact name * @inode: the inode case-insensitive lookup has found diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index b94f80420a58..880618a8b048 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -112,7 +112,7 @@ struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh, * if the dentry tree reaches them; however if the dentry already * exists, we'll pick it up at this point and use it as the root */ - ret = d_obtain_alias(inode); + ret = d_obtain_root(inode); if (IS_ERR(ret)) { dprintk("nfs_get_root: get root dentry failed\n"); goto out; diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 8c532b2ca3ab..ac914994dfed 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -942,7 +942,7 @@ static int nilfs_get_root_dentry(struct super_block *sb, iput(inode); } } else { - dentry = d_obtain_alias(inode); + dentry = d_obtain_root(inode); if (IS_ERR(dentry)) { ret = PTR_ERR(dentry); goto failed_dentry; diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 3c7ec327ebd2..e4ae2ad48d07 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -249,6 +249,7 @@ extern struct dentry * d_splice_alias(struct inode *, struct dentry *); extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *); extern struct dentry *d_find_any_alias(struct inode *inode); extern struct dentry * d_obtain_alias(struct inode *); +extern struct dentry * d_obtain_root(struct inode *); extern void shrink_dcache_sb(struct super_block *); extern void shrink_dcache_parent(struct dentry *); extern void shrink_dcache_for_umount(struct super_block *); -- cgit v1.2.3 From c7f3888ad7f0932a87fb76e6e4edff2a90cc7920 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 18 Jun 2014 20:34:33 -0400 Subject: switch iov_iter_get_pages() to passing maximal number of pages ... instead of maximal size. Signed-off-by: Al Viro --- fs/direct-io.c | 2 +- fs/fuse/file.c | 4 ++-- include/linux/uio.h | 2 +- mm/iov_iter.c | 17 ++++++++--------- 4 files changed, 12 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/fs/direct-io.c b/fs/direct-io.c index 17e39b047de5..c3116404ab49 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -158,7 +158,7 @@ static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio) { ssize_t ret; - ret = iov_iter_get_pages(sdio->iter, dio->pages, DIO_PAGES * PAGE_SIZE, + ret = iov_iter_get_pages(sdio->iter, dio->pages, DIO_PAGES, &sdio->from); if (ret < 0 && sdio->blocks_available && (dio->rw & WRITE)) { diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 40ac2628ddcf..912061ac4baf 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1303,10 +1303,10 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii, while (nbytes < *nbytesp && req->num_pages < req->max_pages) { unsigned npages; size_t start; - unsigned n = req->max_pages - req->num_pages; ssize_t ret = iov_iter_get_pages(ii, &req->pages[req->num_pages], - n * PAGE_SIZE, &start); + req->max_pages - req->num_pages, + &start); if (ret < 0) return ret; diff --git a/include/linux/uio.h b/include/linux/uio.h index 09a7cffc224e..48d64e6ab292 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -84,7 +84,7 @@ unsigned long iov_iter_alignment(const struct iov_iter *i); void iov_iter_init(struct iov_iter *i, int direction, const struct iovec *iov, unsigned long nr_segs, size_t count); ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages, - size_t maxsize, size_t *start); + unsigned maxpages, size_t *start); ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages, size_t maxsize, size_t *start); int iov_iter_npages(const struct iov_iter *i, int maxpages); diff --git a/mm/iov_iter.c b/mm/iov_iter.c index 7b5dbd1517b5..ab88dc0ea1d3 100644 --- a/mm/iov_iter.c +++ b/mm/iov_iter.c @@ -310,7 +310,7 @@ void iov_iter_init(struct iov_iter *i, int direction, EXPORT_SYMBOL(iov_iter_init); static ssize_t get_pages_iovec(struct iov_iter *i, - struct page **pages, size_t maxsize, + struct page **pages, unsigned maxpages, size_t *start) { size_t offset = i->iov_offset; @@ -323,10 +323,10 @@ static ssize_t get_pages_iovec(struct iov_iter *i, len = iov->iov_len - offset; if (len > i->count) len = i->count; - if (len > maxsize) - len = maxsize; addr = (unsigned long)iov->iov_base + offset; len += *start = addr & (PAGE_SIZE - 1); + if (len > maxpages * PAGE_SIZE) + len = maxpages * PAGE_SIZE; addr &= ~(PAGE_SIZE - 1); n = (len + PAGE_SIZE - 1) / PAGE_SIZE; res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, pages); @@ -588,15 +588,14 @@ static unsigned long alignment_bvec(const struct iov_iter *i) } static ssize_t get_pages_bvec(struct iov_iter *i, - struct page **pages, size_t maxsize, + struct page **pages, unsigned maxpages, size_t *start) { const struct bio_vec *bvec = i->bvec; size_t len = bvec->bv_len - i->iov_offset; if (len > i->count) len = i->count; - if (len > maxsize) - len = maxsize; + /* can't be more than PAGE_SIZE */ *start = bvec->bv_offset + i->iov_offset; get_page(*pages = bvec->bv_page); @@ -712,13 +711,13 @@ unsigned long iov_iter_alignment(const struct iov_iter *i) EXPORT_SYMBOL(iov_iter_alignment); ssize_t iov_iter_get_pages(struct iov_iter *i, - struct page **pages, size_t maxsize, + struct page **pages, unsigned maxpages, size_t *start) { if (i->type & ITER_BVEC) - return get_pages_bvec(i, pages, maxsize, start); + return get_pages_bvec(i, pages, maxpages, start); else - return get_pages_iovec(i, pages, maxsize, start); + return get_pages_iovec(i, pages, maxpages, start); } EXPORT_SYMBOL(iov_iter_get_pages); -- cgit v1.2.3 From 3c49b52b155d0f723792377e1a4480a0e7ca0ba2 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 25 Jul 2014 16:05:29 -0400 Subject: tracing: Do not do anything special with tracepoint_string when tracing is disabled When CONFIG_TRACING is not enabled, there's no reason to save the trace strings either by the linker or as a static variable that can be referenced later. Simply pass back the string that is given to tracepoint_string(). Had to move the define to include/linux/tracepoint.h so that it is still visible when CONFIG_TRACING is not set. Link: http://lkml.kernel.org/p/1406318733-26754-2-git-send-email-nicolas.pitre@linaro.org Suggested-by: Nicolas Pitre Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 34 ---------------------------------- include/linux/tracepoint.h | 44 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 34 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index cff3106ffe2c..c9f619a2070f 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -574,40 +574,6 @@ do { \ __trace_printk(ip, fmt, ##args); \ } while (0) -/** - * tracepoint_string - register constant persistent string to trace system - * @str - a constant persistent string that will be referenced in tracepoints - * - * If constant strings are being used in tracepoints, it is faster and - * more efficient to just save the pointer to the string and reference - * that with a printf "%s" instead of saving the string in the ring buffer - * and wasting space and time. - * - * The problem with the above approach is that userspace tools that read - * the binary output of the trace buffers do not have access to the string. - * Instead they just show the address of the string which is not very - * useful to users. - * - * With tracepoint_string(), the string will be registered to the tracing - * system and exported to userspace via the debugfs/tracing/printk_formats - * file that maps the string address to the string text. This way userspace - * tools that read the binary buffers have a way to map the pointers to - * the ASCII strings they represent. - * - * The @str used must be a constant string and persistent as it would not - * make sense to show a string that no longer exists. But it is still fine - * to be used with modules, because when modules are unloaded, if they - * had tracepoints, the ring buffers are cleared too. As long as the string - * does not change during the life of the module, it is fine to use - * tracepoint_string() within a module. - */ -#define tracepoint_string(str) \ - ({ \ - static const char *___tp_str __tracepoint_string = str; \ - ___tp_str; \ - }) -#define __tracepoint_string __attribute__((section("__tracepoint_str"))) - #ifdef CONFIG_PERF_EVENTS struct perf_event; diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 2e2a5f7717e5..b1293f15f592 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -249,6 +249,50 @@ extern void syscall_unregfunc(void); #endif /* CONFIG_TRACEPOINTS */ +#ifdef CONFIG_TRACING +/** + * tracepoint_string - register constant persistent string to trace system + * @str - a constant persistent string that will be referenced in tracepoints + * + * If constant strings are being used in tracepoints, it is faster and + * more efficient to just save the pointer to the string and reference + * that with a printf "%s" instead of saving the string in the ring buffer + * and wasting space and time. + * + * The problem with the above approach is that userspace tools that read + * the binary output of the trace buffers do not have access to the string. + * Instead they just show the address of the string which is not very + * useful to users. + * + * With tracepoint_string(), the string will be registered to the tracing + * system and exported to userspace via the debugfs/tracing/printk_formats + * file that maps the string address to the string text. This way userspace + * tools that read the binary buffers have a way to map the pointers to + * the ASCII strings they represent. + * + * The @str used must be a constant string and persistent as it would not + * make sense to show a string that no longer exists. But it is still fine + * to be used with modules, because when modules are unloaded, if they + * had tracepoints, the ring buffers are cleared too. As long as the string + * does not change during the life of the module, it is fine to use + * tracepoint_string() within a module. + */ +#define tracepoint_string(str) \ + ({ \ + static const char *___tp_str __tracepoint_string = str; \ + ___tp_str; \ + }) +#define __tracepoint_string __attribute__((section("__tracepoint_str"))) +#else +/* + * tracepoint_string() is used to save the string address for userspace + * tracing tools. When tracing isn't configured, there's no need to save + * anything. + */ +# define tracepoint_string(str) str +# define __tracepoint_string +#endif + /* * The need for the DECLARE_TRACE_NOARGS() is to handle the prototype * (void). "void" is a special value in a function prototype and can -- cgit v1.2.3 From f8ec894945e7d205ce62be59e55e72c4304e4739 Mon Sep 17 00:00:00 2001 From: Henrik Rydberg Date: Fri, 25 Jul 2014 17:16:42 -0700 Subject: Input: MT - make slot cleanup callable outside mt_sync_frame() Some semi-mt drivers use the slots in a manual way, but may still want to call parts of the frame synchronization logic. This patch makes input_mt_drop_unused callable from those drivers. Signed-off-by: Henrik Rydberg Reviewed-by: Benson Leung Signed-off-by: Dmitry Torokhov --- drivers/input/input-mt.c | 38 +++++++++++++++++++++++++++----------- include/linux/input/mt.h | 1 + 2 files changed, 28 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/drivers/input/input-mt.c b/drivers/input/input-mt.c index d398f1321f14..c30204f2fa30 100644 --- a/drivers/input/input-mt.c +++ b/drivers/input/input-mt.c @@ -236,6 +236,31 @@ void input_mt_report_pointer_emulation(struct input_dev *dev, bool use_count) } EXPORT_SYMBOL(input_mt_report_pointer_emulation); +/** + * input_mt_drop_unused() - Inactivate slots not seen in this frame + * @dev: input device with allocated MT slots + * + * Lift all slots not seen since the last call to this function. + */ +void input_mt_drop_unused(struct input_dev *dev) +{ + struct input_mt *mt = dev->mt; + int i; + + if (!mt) + return; + + for (i = 0; i < mt->num_slots; i++) { + if (!input_mt_is_used(mt, &mt->slots[i])) { + input_mt_slot(dev, i); + input_event(dev, EV_ABS, ABS_MT_TRACKING_ID, -1); + } + } + + mt->frame++; +} +EXPORT_SYMBOL(input_mt_drop_unused); + /** * input_mt_sync_frame() - synchronize mt frame * @dev: input device with allocated MT slots @@ -247,27 +272,18 @@ EXPORT_SYMBOL(input_mt_report_pointer_emulation); void input_mt_sync_frame(struct input_dev *dev) { struct input_mt *mt = dev->mt; - struct input_mt_slot *s; bool use_count = false; if (!mt) return; - if (mt->flags & INPUT_MT_DROP_UNUSED) { - for (s = mt->slots; s != mt->slots + mt->num_slots; s++) { - if (input_mt_is_used(mt, s)) - continue; - input_mt_slot(dev, s - mt->slots); - input_event(dev, EV_ABS, ABS_MT_TRACKING_ID, -1); - } - } + if (mt->flags & INPUT_MT_DROP_UNUSED) + input_mt_drop_unused(dev); if ((mt->flags & INPUT_MT_POINTER) && !(mt->flags & INPUT_MT_SEMI_MT)) use_count = true; input_mt_report_pointer_emulation(dev, use_count); - - mt->frame++; } EXPORT_SYMBOL(input_mt_sync_frame); diff --git a/include/linux/input/mt.h b/include/linux/input/mt.h index 1b1dfa80d9ff..f583ff639776 100644 --- a/include/linux/input/mt.h +++ b/include/linux/input/mt.h @@ -105,6 +105,7 @@ void input_mt_report_slot_state(struct input_dev *dev, void input_mt_report_finger_count(struct input_dev *dev, int count); void input_mt_report_pointer_emulation(struct input_dev *dev, bool use_count); +void input_mt_drop_unused(struct input_dev *dev); void input_mt_sync_frame(struct input_dev *dev); -- cgit v1.2.3 From 92d18a6851fb6295466657ad1cf7fe88c2054ffa Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 8 Aug 2014 10:36:20 -0600 Subject: drivers/vfio: Fix EEH build error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The VFIO related components could be built as dynamic modules. Unfortunately, CONFIG_EEH can't be configured to "m". The patch fixes the build errors when configuring VFIO related components as dynamic modules as follows: CC [M] drivers/vfio/vfio_iommu_spapr_tce.o In file included from drivers/vfio/vfio.c:33:0: include/linux/vfio.h:101:43: warning: ‘struct pci_dev’ declared \ inside parameter list [enabled by default] : WRAP arch/powerpc/boot/zImage.pseries WRAP arch/powerpc/boot/zImage.maple WRAP arch/powerpc/boot/zImage.pmac WRAP arch/powerpc/boot/zImage.epapr MODPOST 1818 modules ERROR: ".vfio_spapr_iommu_eeh_ioctl" [drivers/vfio/vfio_iommu_spapr_tce.ko]\ undefined! ERROR: ".vfio_spapr_pci_eeh_open" [drivers/vfio/pci/vfio-pci.ko] undefined! ERROR: ".vfio_spapr_pci_eeh_release" [drivers/vfio/pci/vfio-pci.ko] undefined! Reported-by: Alexey Kardashevskiy Signed-off-by: Gavin Shan Signed-off-by: Alexey Kardashevskiy Signed-off-by: Alex Williamson --- drivers/vfio/Kconfig | 6 ++++++ drivers/vfio/Makefile | 2 +- drivers/vfio/vfio_spapr_eeh.c | 3 +++ include/linux/vfio.h | 1 + 4 files changed, 11 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig index af7b204b9215..d8c57636b9ce 100644 --- a/drivers/vfio/Kconfig +++ b/drivers/vfio/Kconfig @@ -8,11 +8,17 @@ config VFIO_IOMMU_SPAPR_TCE depends on VFIO && SPAPR_TCE_IOMMU default n +config VFIO_SPAPR_EEH + tristate + depends on EEH && VFIO_IOMMU_SPAPR_TCE + default n + menuconfig VFIO tristate "VFIO Non-Privileged userspace driver framework" depends on IOMMU_API select VFIO_IOMMU_TYPE1 if X86 select VFIO_IOMMU_SPAPR_TCE if (PPC_POWERNV || PPC_PSERIES) + select VFIO_SPAPR_EEH if (PPC_POWERNV || PPC_PSERIES) select ANON_INODES help VFIO provides a framework for secure userspace device drivers. diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile index 50e30bc75e85..0b035b12600a 100644 --- a/drivers/vfio/Makefile +++ b/drivers/vfio/Makefile @@ -1,5 +1,5 @@ obj-$(CONFIG_VFIO) += vfio.o obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o -obj-$(CONFIG_EEH) += vfio_spapr_eeh.o +obj-$(CONFIG_VFIO_SPAPR_EEH) += vfio_spapr_eeh.o obj-$(CONFIG_VFIO_PCI) += pci/ diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c index f834b4ce1431..949f98e997af 100644 --- a/drivers/vfio/vfio_spapr_eeh.c +++ b/drivers/vfio/vfio_spapr_eeh.c @@ -18,11 +18,13 @@ int vfio_spapr_pci_eeh_open(struct pci_dev *pdev) { return eeh_dev_open(pdev); } +EXPORT_SYMBOL_GPL(vfio_spapr_pci_eeh_open); void vfio_spapr_pci_eeh_release(struct pci_dev *pdev) { eeh_dev_release(pdev); } +EXPORT_SYMBOL_GPL(vfio_spapr_pci_eeh_release); long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group, unsigned int cmd, unsigned long arg) @@ -85,3 +87,4 @@ long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group, return ret; } +EXPORT_SYMBOL(vfio_spapr_iommu_eeh_ioctl); diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 25a0fbd4b998..224128a96b7f 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -98,6 +98,7 @@ extern int vfio_external_user_iommu_id(struct vfio_group *group); extern long vfio_external_check_extension(struct vfio_group *group, unsigned long arg); +struct pci_dev; #ifdef CONFIG_EEH extern int vfio_spapr_pci_eeh_open(struct pci_dev *pdev); extern void vfio_spapr_pci_eeh_release(struct pci_dev *pdev); -- cgit v1.2.3 From 9b936c960f22954bfb89f2fefd8f96916bb42908 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 8 Aug 2014 10:39:16 -0600 Subject: drivers/vfio: Enable VFIO if EEH is not supported The existing vfio_pci_open() fails upon error returned from vfio_spapr_pci_eeh_open(), which breaks POWER7's P5IOC2 PHB support which this patch brings back. The patch fixes the issue by dropping the return value of vfio_spapr_pci_eeh_open(). Signed-off-by: Alexey Kardashevskiy Signed-off-by: Gavin Shan Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci.c | 6 +----- drivers/vfio/vfio_spapr_eeh.c | 4 ++-- include/linux/vfio.h | 5 ++--- 3 files changed, 5 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 1651c0769b72..f7825332a325 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -200,11 +200,7 @@ static int vfio_pci_open(void *device_data) if (ret) goto error; - ret = vfio_spapr_pci_eeh_open(vdev->pdev); - if (ret) { - vfio_pci_disable(vdev); - goto error; - } + vfio_spapr_pci_eeh_open(vdev->pdev); } vdev->refcnt++; error: diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c index 4779cace8036..86dfceb9201f 100644 --- a/drivers/vfio/vfio_spapr_eeh.c +++ b/drivers/vfio/vfio_spapr_eeh.c @@ -19,9 +19,9 @@ #define DRIVER_DESC "VFIO IOMMU SPAPR EEH" /* We might build address mapping here for "fast" path later */ -int vfio_spapr_pci_eeh_open(struct pci_dev *pdev) +void vfio_spapr_pci_eeh_open(struct pci_dev *pdev) { - return eeh_dev_open(pdev); + eeh_dev_open(pdev); } EXPORT_SYMBOL_GPL(vfio_spapr_pci_eeh_open); diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 224128a96b7f..d3204115f15d 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -100,15 +100,14 @@ extern long vfio_external_check_extension(struct vfio_group *group, struct pci_dev; #ifdef CONFIG_EEH -extern int vfio_spapr_pci_eeh_open(struct pci_dev *pdev); +extern void vfio_spapr_pci_eeh_open(struct pci_dev *pdev); extern void vfio_spapr_pci_eeh_release(struct pci_dev *pdev); extern long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group, unsigned int cmd, unsigned long arg); #else -static inline int vfio_spapr_pci_eeh_open(struct pci_dev *pdev) +static inline void vfio_spapr_pci_eeh_open(struct pci_dev *pdev) { - return 0; } static inline void vfio_spapr_pci_eeh_release(struct pci_dev *pdev) -- cgit v1.2.3 From 00501b531c4723972aa11d6d4ebcf8d6552007c8 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Fri, 8 Aug 2014 14:19:20 -0700 Subject: mm: memcontrol: rewrite charge API These patches rework memcg charge lifetime to integrate more naturally with the lifetime of user pages. This drastically simplifies the code and reduces charging and uncharging overhead. The most expensive part of charging and uncharging is the page_cgroup bit spinlock, which is removed entirely after this series. Here are the top-10 profile entries of a stress test that reads a 128G sparse file on a freshly booted box, without even a dedicated cgroup (i.e. executing in the root memcg). Before: 15.36% cat [kernel.kallsyms] [k] copy_user_generic_string 13.31% cat [kernel.kallsyms] [k] memset 11.48% cat [kernel.kallsyms] [k] do_mpage_readpage 4.23% cat [kernel.kallsyms] [k] get_page_from_freelist 2.38% cat [kernel.kallsyms] [k] put_page 2.32% cat [kernel.kallsyms] [k] __mem_cgroup_commit_charge 2.18% kswapd0 [kernel.kallsyms] [k] __mem_cgroup_uncharge_common 1.92% kswapd0 [kernel.kallsyms] [k] shrink_page_list 1.86% cat [kernel.kallsyms] [k] __radix_tree_lookup 1.62% cat [kernel.kallsyms] [k] __pagevec_lru_add_fn After: 15.67% cat [kernel.kallsyms] [k] copy_user_generic_string 13.48% cat [kernel.kallsyms] [k] memset 11.42% cat [kernel.kallsyms] [k] do_mpage_readpage 3.98% cat [kernel.kallsyms] [k] get_page_from_freelist 2.46% cat [kernel.kallsyms] [k] put_page 2.13% kswapd0 [kernel.kallsyms] [k] shrink_page_list 1.88% cat [kernel.kallsyms] [k] __radix_tree_lookup 1.67% cat [kernel.kallsyms] [k] __pagevec_lru_add_fn 1.39% kswapd0 [kernel.kallsyms] [k] free_pcppages_bulk 1.30% cat [kernel.kallsyms] [k] kfree As you can see, the memcg footprint has shrunk quite a bit. text data bss dec hex filename 37970 9892 400 48262 bc86 mm/memcontrol.o.old 35239 9892 400 45531 b1db mm/memcontrol.o This patch (of 4): The memcg charge API charges pages before they are rmapped - i.e. have an actual "type" - and so every callsite needs its own set of charge and uncharge functions to know what type is being operated on. Worse, uncharge has to happen from a context that is still type-specific, rather than at the end of the page's lifetime with exclusive access, and so requires a lot of synchronization. Rewrite the charge API to provide a generic set of try_charge(), commit_charge() and cancel_charge() transaction operations, much like what's currently done for swap-in: mem_cgroup_try_charge() attempts to reserve a charge, reclaiming pages from the memcg if necessary. mem_cgroup_commit_charge() commits the page to the charge once it has a valid page->mapping and PageAnon() reliably tells the type. mem_cgroup_cancel_charge() aborts the transaction. This reduces the charge API and enables subsequent patches to drastically simplify uncharging. As pages need to be committed after rmap is established but before they are added to the LRU, page_add_new_anon_rmap() must stop doing LRU additions again. Revive lru_cache_add_active_or_unevictable(). [hughd@google.com: fix shmem_unuse] [hughd@google.com: Add comments on the private use of -EAGAIN] Signed-off-by: Johannes Weiner Acked-by: Michal Hocko Cc: Tejun Heo Cc: Vladimir Davydov Signed-off-by: Hugh Dickins Cc: Naoya Horiguchi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/cgroups/memcg_test.txt | 32 +-- include/linux/memcontrol.h | 53 ++--- include/linux/swap.h | 3 + kernel/events/uprobes.c | 15 +- mm/filemap.c | 21 +- mm/huge_memory.c | 57 +++-- mm/memcontrol.c | 407 ++++++++++++++--------------------- mm/memory.c | 41 ++-- mm/rmap.c | 19 -- mm/shmem.c | 37 ++-- mm/swap.c | 34 +++ mm/swapfile.c | 14 +- 12 files changed, 338 insertions(+), 395 deletions(-) (limited to 'include/linux') diff --git a/Documentation/cgroups/memcg_test.txt b/Documentation/cgroups/memcg_test.txt index 80ac454704b8..bcf750d3cecd 100644 --- a/Documentation/cgroups/memcg_test.txt +++ b/Documentation/cgroups/memcg_test.txt @@ -24,24 +24,7 @@ Please note that implementation details can be changed. a page/swp_entry may be charged (usage += PAGE_SIZE) at - mem_cgroup_charge_anon() - Called at new page fault and Copy-On-Write. - - mem_cgroup_try_charge_swapin() - Called at do_swap_page() (page fault on swap entry) and swapoff. - Followed by charge-commit-cancel protocol. (With swap accounting) - At commit, a charge recorded in swap_cgroup is removed. - - mem_cgroup_charge_file() - Called at add_to_page_cache() - - mem_cgroup_cache_charge_swapin() - Called at shmem's swapin. - - mem_cgroup_prepare_migration() - Called before migration. "extra" charge is done and followed by - charge-commit-cancel protocol. - At commit, charge against oldpage or newpage will be committed. + mem_cgroup_try_charge() 2. Uncharge a page/swp_entry may be uncharged (usage -= PAGE_SIZE) by @@ -69,19 +52,14 @@ Please note that implementation details can be changed. to new page is committed. At failure, charge to old page is committed. 3. charge-commit-cancel - In some case, we can't know this "charge" is valid or not at charging - (because of races). - To handle such case, there are charge-commit-cancel functions. - mem_cgroup_try_charge_XXX - mem_cgroup_commit_charge_XXX - mem_cgroup_cancel_charge_XXX - these are used in swap-in and migration. + Memcg pages are charged in two steps: + mem_cgroup_try_charge() + mem_cgroup_commit_charge() or mem_cgroup_cancel_charge() At try_charge(), there are no flags to say "this page is charged". at this point, usage += PAGE_SIZE. - At commit(), the function checks the page should be charged or not - and set flags or avoid charging.(usage -= PAGE_SIZE) + At commit(), the page is associated with the memcg. At cancel(), simply usage -= PAGE_SIZE. diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index eb65d29516ca..1a9a096858e0 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -54,28 +54,11 @@ struct mem_cgroup_reclaim_cookie { }; #ifdef CONFIG_MEMCG -/* - * All "charge" functions with gfp_mask should use GFP_KERNEL or - * (gfp_mask & GFP_RECLAIM_MASK). In current implementatin, memcg doesn't - * alloc memory but reclaims memory from all available zones. So, "where I want - * memory from" bits of gfp_mask has no meaning. So any bits of that field is - * available but adding a rule is better. charge functions' gfp_mask should - * be set to GFP_KERNEL or gfp_mask & GFP_RECLAIM_MASK for avoiding ambiguous - * codes. - * (Of course, if memcg does memory allocation in future, GFP_KERNEL is sane.) - */ - -extern int mem_cgroup_charge_anon(struct page *page, struct mm_struct *mm, - gfp_t gfp_mask); -/* for swap handling */ -extern int mem_cgroup_try_charge_swapin(struct mm_struct *mm, - struct page *page, gfp_t mask, struct mem_cgroup **memcgp); -extern void mem_cgroup_commit_charge_swapin(struct page *page, - struct mem_cgroup *memcg); -extern void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg); - -extern int mem_cgroup_charge_file(struct page *page, struct mm_struct *mm, - gfp_t gfp_mask); +int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm, + gfp_t gfp_mask, struct mem_cgroup **memcgp); +void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg, + bool lrucare); +void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg); struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *); struct lruvec *mem_cgroup_page_lruvec(struct page *, struct zone *); @@ -233,30 +216,22 @@ void mem_cgroup_print_bad_page(struct page *page); #else /* CONFIG_MEMCG */ struct mem_cgroup; -static inline int mem_cgroup_charge_anon(struct page *page, - struct mm_struct *mm, gfp_t gfp_mask) -{ - return 0; -} - -static inline int mem_cgroup_charge_file(struct page *page, - struct mm_struct *mm, gfp_t gfp_mask) -{ - return 0; -} - -static inline int mem_cgroup_try_charge_swapin(struct mm_struct *mm, - struct page *page, gfp_t gfp_mask, struct mem_cgroup **memcgp) +static inline int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm, + gfp_t gfp_mask, + struct mem_cgroup **memcgp) { + *memcgp = NULL; return 0; } -static inline void mem_cgroup_commit_charge_swapin(struct page *page, - struct mem_cgroup *memcg) +static inline void mem_cgroup_commit_charge(struct page *page, + struct mem_cgroup *memcg, + bool lrucare) { } -static inline void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg) +static inline void mem_cgroup_cancel_charge(struct page *page, + struct mem_cgroup *memcg) { } diff --git a/include/linux/swap.h b/include/linux/swap.h index 1eb64043c076..46a649e4e8cd 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -320,6 +320,9 @@ extern void swap_setup(void); extern void add_page_to_unevictable_list(struct page *page); +extern void lru_cache_add_active_or_unevictable(struct page *page, + struct vm_area_struct *vma); + /* linux/mm/vmscan.c */ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask, nodemask_t *mask); diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 6f3254e8c137..1d0af8a2c646 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -167,6 +167,11 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr, /* For mmu_notifiers */ const unsigned long mmun_start = addr; const unsigned long mmun_end = addr + PAGE_SIZE; + struct mem_cgroup *memcg; + + err = mem_cgroup_try_charge(kpage, vma->vm_mm, GFP_KERNEL, &memcg); + if (err) + return err; /* For try_to_free_swap() and munlock_vma_page() below */ lock_page(page); @@ -179,6 +184,8 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr, get_page(kpage); page_add_new_anon_rmap(kpage, vma, addr); + mem_cgroup_commit_charge(kpage, memcg, false); + lru_cache_add_active_or_unevictable(kpage, vma); if (!PageAnon(page)) { dec_mm_counter(mm, MM_FILEPAGES); @@ -200,6 +207,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr, err = 0; unlock: + mem_cgroup_cancel_charge(kpage, memcg); mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); unlock_page(page); return err; @@ -315,18 +323,11 @@ retry: if (!new_page) goto put_old; - if (mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL)) - goto put_new; - __SetPageUptodate(new_page); copy_highpage(new_page, old_page); copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE); ret = __replace_page(vma, vaddr, old_page, new_page); - if (ret) - mem_cgroup_uncharge_page(new_page); - -put_new: page_cache_release(new_page); put_old: put_page(old_page); diff --git a/mm/filemap.c b/mm/filemap.c index af19a6b079f5..349a40e35545 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -31,6 +31,7 @@ #include #include #include /* for BUG_ON(!in_atomic()) only */ +#include #include #include #include @@ -548,19 +549,24 @@ static int __add_to_page_cache_locked(struct page *page, pgoff_t offset, gfp_t gfp_mask, void **shadowp) { + int huge = PageHuge(page); + struct mem_cgroup *memcg; int error; VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON_PAGE(PageSwapBacked(page), page); - error = mem_cgroup_charge_file(page, current->mm, - gfp_mask & GFP_RECLAIM_MASK); - if (error) - return error; + if (!huge) { + error = mem_cgroup_try_charge(page, current->mm, + gfp_mask, &memcg); + if (error) + return error; + } error = radix_tree_maybe_preload(gfp_mask & ~__GFP_HIGHMEM); if (error) { - mem_cgroup_uncharge_cache_page(page); + if (!huge) + mem_cgroup_cancel_charge(page, memcg); return error; } @@ -575,13 +581,16 @@ static int __add_to_page_cache_locked(struct page *page, goto err_insert; __inc_zone_page_state(page, NR_FILE_PAGES); spin_unlock_irq(&mapping->tree_lock); + if (!huge) + mem_cgroup_commit_charge(page, memcg, false); trace_mm_filemap_add_to_page_cache(page); return 0; err_insert: page->mapping = NULL; /* Leave page->index set: truncation relies upon it */ spin_unlock_irq(&mapping->tree_lock); - mem_cgroup_uncharge_cache_page(page); + if (!huge) + mem_cgroup_cancel_charge(page, memcg); page_cache_release(page); return error; } diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 3630d577e987..d9a21d06b862 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -715,13 +715,20 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm, unsigned long haddr, pmd_t *pmd, struct page *page) { + struct mem_cgroup *memcg; pgtable_t pgtable; spinlock_t *ptl; VM_BUG_ON_PAGE(!PageCompound(page), page); + + if (mem_cgroup_try_charge(page, mm, GFP_TRANSHUGE, &memcg)) + return VM_FAULT_OOM; + pgtable = pte_alloc_one(mm, haddr); - if (unlikely(!pgtable)) + if (unlikely(!pgtable)) { + mem_cgroup_cancel_charge(page, memcg); return VM_FAULT_OOM; + } clear_huge_page(page, haddr, HPAGE_PMD_NR); /* @@ -734,7 +741,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm, ptl = pmd_lock(mm, pmd); if (unlikely(!pmd_none(*pmd))) { spin_unlock(ptl); - mem_cgroup_uncharge_page(page); + mem_cgroup_cancel_charge(page, memcg); put_page(page); pte_free(mm, pgtable); } else { @@ -742,6 +749,8 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm, entry = mk_huge_pmd(page, vma->vm_page_prot); entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); page_add_new_anon_rmap(page, vma, haddr); + mem_cgroup_commit_charge(page, memcg, false); + lru_cache_add_active_or_unevictable(page, vma); pgtable_trans_huge_deposit(mm, pmd, pgtable); set_pmd_at(mm, haddr, pmd, entry); add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR); @@ -827,13 +836,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, count_vm_event(THP_FAULT_FALLBACK); return VM_FAULT_FALLBACK; } - if (unlikely(mem_cgroup_charge_anon(page, mm, GFP_TRANSHUGE))) { - put_page(page); - count_vm_event(THP_FAULT_FALLBACK); - return VM_FAULT_FALLBACK; - } if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, page))) { - mem_cgroup_uncharge_page(page); put_page(page); count_vm_event(THP_FAULT_FALLBACK); return VM_FAULT_FALLBACK; @@ -979,6 +982,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm, struct page *page, unsigned long haddr) { + struct mem_cgroup *memcg; spinlock_t *ptl; pgtable_t pgtable; pmd_t _pmd; @@ -999,20 +1003,21 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm, __GFP_OTHER_NODE, vma, address, page_to_nid(page)); if (unlikely(!pages[i] || - mem_cgroup_charge_anon(pages[i], mm, - GFP_KERNEL))) { + mem_cgroup_try_charge(pages[i], mm, GFP_KERNEL, + &memcg))) { if (pages[i]) put_page(pages[i]); - mem_cgroup_uncharge_start(); while (--i >= 0) { - mem_cgroup_uncharge_page(pages[i]); + memcg = (void *)page_private(pages[i]); + set_page_private(pages[i], 0); + mem_cgroup_cancel_charge(pages[i], memcg); put_page(pages[i]); } - mem_cgroup_uncharge_end(); kfree(pages); ret |= VM_FAULT_OOM; goto out; } + set_page_private(pages[i], (unsigned long)memcg); } for (i = 0; i < HPAGE_PMD_NR; i++) { @@ -1041,7 +1046,11 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm, pte_t *pte, entry; entry = mk_pte(pages[i], vma->vm_page_prot); entry = maybe_mkwrite(pte_mkdirty(entry), vma); + memcg = (void *)page_private(pages[i]); + set_page_private(pages[i], 0); page_add_new_anon_rmap(pages[i], vma, haddr); + mem_cgroup_commit_charge(pages[i], memcg, false); + lru_cache_add_active_or_unevictable(pages[i], vma); pte = pte_offset_map(&_pmd, haddr); VM_BUG_ON(!pte_none(*pte)); set_pte_at(mm, haddr, pte, entry); @@ -1065,12 +1074,12 @@ out: out_free_pages: spin_unlock(ptl); mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); - mem_cgroup_uncharge_start(); for (i = 0; i < HPAGE_PMD_NR; i++) { - mem_cgroup_uncharge_page(pages[i]); + memcg = (void *)page_private(pages[i]); + set_page_private(pages[i], 0); + mem_cgroup_cancel_charge(pages[i], memcg); put_page(pages[i]); } - mem_cgroup_uncharge_end(); kfree(pages); goto out; } @@ -1081,6 +1090,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, spinlock_t *ptl; int ret = 0; struct page *page = NULL, *new_page; + struct mem_cgroup *memcg; unsigned long haddr; unsigned long mmun_start; /* For mmu_notifiers */ unsigned long mmun_end; /* For mmu_notifiers */ @@ -1132,7 +1142,8 @@ alloc: goto out; } - if (unlikely(mem_cgroup_charge_anon(new_page, mm, GFP_TRANSHUGE))) { + if (unlikely(mem_cgroup_try_charge(new_page, mm, + GFP_TRANSHUGE, &memcg))) { put_page(new_page); if (page) { split_huge_page(page); @@ -1161,7 +1172,7 @@ alloc: put_user_huge_page(page); if (unlikely(!pmd_same(*pmd, orig_pmd))) { spin_unlock(ptl); - mem_cgroup_uncharge_page(new_page); + mem_cgroup_cancel_charge(new_page, memcg); put_page(new_page); goto out_mn; } else { @@ -1170,6 +1181,8 @@ alloc: entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); pmdp_clear_flush(vma, haddr, pmd); page_add_new_anon_rmap(new_page, vma, haddr); + mem_cgroup_commit_charge(new_page, memcg, false); + lru_cache_add_active_or_unevictable(new_page, vma); set_pmd_at(mm, haddr, pmd, entry); update_mmu_cache_pmd(vma, address, pmd); if (!page) { @@ -2413,6 +2426,7 @@ static void collapse_huge_page(struct mm_struct *mm, spinlock_t *pmd_ptl, *pte_ptl; int isolated; unsigned long hstart, hend; + struct mem_cgroup *memcg; unsigned long mmun_start; /* For mmu_notifiers */ unsigned long mmun_end; /* For mmu_notifiers */ @@ -2423,7 +2437,8 @@ static void collapse_huge_page(struct mm_struct *mm, if (!new_page) return; - if (unlikely(mem_cgroup_charge_anon(new_page, mm, GFP_TRANSHUGE))) + if (unlikely(mem_cgroup_try_charge(new_page, mm, + GFP_TRANSHUGE, &memcg))) return; /* @@ -2510,6 +2525,8 @@ static void collapse_huge_page(struct mm_struct *mm, spin_lock(pmd_ptl); BUG_ON(!pmd_none(*pmd)); page_add_new_anon_rmap(new_page, vma, address); + mem_cgroup_commit_charge(new_page, memcg, false); + lru_cache_add_active_or_unevictable(new_page, vma); pgtable_trans_huge_deposit(mm, pmd, pgtable); set_pmd_at(mm, address, pmd, _pmd); update_mmu_cache_pmd(vma, address, pmd); @@ -2523,7 +2540,7 @@ out_up_write: return; out: - mem_cgroup_uncharge_page(new_page); + mem_cgroup_cancel_charge(new_page, memcg); goto out_up_write; } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 90dc501eaf3f..1cbe1e54ff5f 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2551,17 +2551,8 @@ static int memcg_cpu_hotplug_callback(struct notifier_block *nb, return NOTIFY_OK; } -/** - * mem_cgroup_try_charge - try charging a memcg - * @memcg: memcg to charge - * @nr_pages: number of pages to charge - * - * Returns 0 if @memcg was charged successfully, -EINTR if the charge - * was bypassed to root_mem_cgroup, and -ENOMEM if the charge failed. - */ -static int mem_cgroup_try_charge(struct mem_cgroup *memcg, - gfp_t gfp_mask, - unsigned int nr_pages) +static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask, + unsigned int nr_pages) { unsigned int batch = max(CHARGE_BATCH, nr_pages); int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; @@ -2660,41 +2651,7 @@ done: return ret; } -/** - * mem_cgroup_try_charge_mm - try charging a mm - * @mm: mm_struct to charge - * @nr_pages: number of pages to charge - * @oom: trigger OOM if reclaim fails - * - * Returns the charged mem_cgroup associated with the given mm_struct or - * NULL the charge failed. - */ -static struct mem_cgroup *mem_cgroup_try_charge_mm(struct mm_struct *mm, - gfp_t gfp_mask, - unsigned int nr_pages) - -{ - struct mem_cgroup *memcg; - int ret; - - memcg = get_mem_cgroup_from_mm(mm); - ret = mem_cgroup_try_charge(memcg, gfp_mask, nr_pages); - css_put(&memcg->css); - if (ret == -EINTR) - memcg = root_mem_cgroup; - else if (ret) - memcg = NULL; - - return memcg; -} - -/* - * Somemtimes we have to undo a charge we got by try_charge(). - * This function is for that and do uncharge, put css's refcnt. - * gotten by try_charge(). - */ -static void __mem_cgroup_cancel_charge(struct mem_cgroup *memcg, - unsigned int nr_pages) +static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages) { unsigned long bytes = nr_pages * PAGE_SIZE; @@ -2760,17 +2717,13 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page) return memcg; } -static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg, - struct page *page, - unsigned int nr_pages, - enum charge_type ctype, - bool lrucare) +static void commit_charge(struct page *page, struct mem_cgroup *memcg, + unsigned int nr_pages, bool anon, bool lrucare) { struct page_cgroup *pc = lookup_page_cgroup(page); struct zone *uninitialized_var(zone); struct lruvec *lruvec; bool was_on_lru = false; - bool anon; lock_page_cgroup(pc); VM_BUG_ON_PAGE(PageCgroupUsed(pc), page); @@ -2807,11 +2760,6 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg, spin_unlock_irq(&zone->lru_lock); } - if (ctype == MEM_CGROUP_CHARGE_TYPE_ANON) - anon = true; - else - anon = false; - mem_cgroup_charge_statistics(memcg, page, anon, nr_pages); unlock_page_cgroup(pc); @@ -2882,21 +2830,21 @@ static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size) if (ret) return ret; - ret = mem_cgroup_try_charge(memcg, gfp, size >> PAGE_SHIFT); + ret = try_charge(memcg, gfp, size >> PAGE_SHIFT); if (ret == -EINTR) { /* - * mem_cgroup_try_charge() chosed to bypass to root due to - * OOM kill or fatal signal. Since our only options are to - * either fail the allocation or charge it to this cgroup, do - * it as a temporary condition. But we can't fail. From a - * kmem/slab perspective, the cache has already been selected, - * by mem_cgroup_kmem_get_cache(), so it is too late to change + * try_charge() chose to bypass to root due to OOM kill or + * fatal signal. Since our only options are to either fail + * the allocation or charge it to this cgroup, do it as a + * temporary condition. But we can't fail. From a kmem/slab + * perspective, the cache has already been selected, by + * mem_cgroup_kmem_get_cache(), so it is too late to change * our minds. * * This condition will only trigger if the task entered - * memcg_charge_kmem in a sane state, but was OOM-killed during - * mem_cgroup_try_charge() above. Tasks that were already - * dying when the allocation triggers should have been already + * memcg_charge_kmem in a sane state, but was OOM-killed + * during try_charge() above. Tasks that were already dying + * when the allocation triggers should have been already * directed to the root cgroup in memcontrol.h */ res_counter_charge_nofail(&memcg->res, size, &fail_res); @@ -3618,164 +3566,6 @@ out: return ret; } -int mem_cgroup_charge_anon(struct page *page, - struct mm_struct *mm, gfp_t gfp_mask) -{ - unsigned int nr_pages = 1; - struct mem_cgroup *memcg; - - if (mem_cgroup_disabled()) - return 0; - - VM_BUG_ON_PAGE(page_mapped(page), page); - VM_BUG_ON_PAGE(page->mapping && !PageAnon(page), page); - VM_BUG_ON(!mm); - - if (PageTransHuge(page)) { - nr_pages <<= compound_order(page); - VM_BUG_ON_PAGE(!PageTransHuge(page), page); - } - - memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, nr_pages); - if (!memcg) - return -ENOMEM; - __mem_cgroup_commit_charge(memcg, page, nr_pages, - MEM_CGROUP_CHARGE_TYPE_ANON, false); - return 0; -} - -/* - * While swap-in, try_charge -> commit or cancel, the page is locked. - * And when try_charge() successfully returns, one refcnt to memcg without - * struct page_cgroup is acquired. This refcnt will be consumed by - * "commit()" or removed by "cancel()" - */ -static int __mem_cgroup_try_charge_swapin(struct mm_struct *mm, - struct page *page, - gfp_t mask, - struct mem_cgroup **memcgp) -{ - struct mem_cgroup *memcg = NULL; - struct page_cgroup *pc; - int ret; - - pc = lookup_page_cgroup(page); - /* - * Every swap fault against a single page tries to charge the - * page, bail as early as possible. shmem_unuse() encounters - * already charged pages, too. The USED bit is protected by - * the page lock, which serializes swap cache removal, which - * in turn serializes uncharging. - */ - if (PageCgroupUsed(pc)) - goto out; - if (do_swap_account) - memcg = try_get_mem_cgroup_from_page(page); - if (!memcg) - memcg = get_mem_cgroup_from_mm(mm); - ret = mem_cgroup_try_charge(memcg, mask, 1); - css_put(&memcg->css); - if (ret == -EINTR) - memcg = root_mem_cgroup; - else if (ret) - return ret; -out: - *memcgp = memcg; - return 0; -} - -int mem_cgroup_try_charge_swapin(struct mm_struct *mm, struct page *page, - gfp_t gfp_mask, struct mem_cgroup **memcgp) -{ - if (mem_cgroup_disabled()) { - *memcgp = NULL; - return 0; - } - /* - * A racing thread's fault, or swapoff, may have already - * updated the pte, and even removed page from swap cache: in - * those cases unuse_pte()'s pte_same() test will fail; but - * there's also a KSM case which does need to charge the page. - */ - if (!PageSwapCache(page)) { - struct mem_cgroup *memcg; - - memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1); - if (!memcg) - return -ENOMEM; - *memcgp = memcg; - return 0; - } - return __mem_cgroup_try_charge_swapin(mm, page, gfp_mask, memcgp); -} - -void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg) -{ - if (mem_cgroup_disabled()) - return; - if (!memcg) - return; - __mem_cgroup_cancel_charge(memcg, 1); -} - -static void -__mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *memcg, - enum charge_type ctype) -{ - if (mem_cgroup_disabled()) - return; - if (!memcg) - return; - - __mem_cgroup_commit_charge(memcg, page, 1, ctype, true); - /* - * Now swap is on-memory. This means this page may be - * counted both as mem and swap....double count. - * Fix it by uncharging from memsw. Basically, this SwapCache is stable - * under lock_page(). But in do_swap_page()::memory.c, reuse_swap_page() - * may call delete_from_swap_cache() before reach here. - */ - if (do_swap_account && PageSwapCache(page)) { - swp_entry_t ent = {.val = page_private(page)}; - mem_cgroup_uncharge_swap(ent); - } -} - -void mem_cgroup_commit_charge_swapin(struct page *page, - struct mem_cgroup *memcg) -{ - __mem_cgroup_commit_charge_swapin(page, memcg, - MEM_CGROUP_CHARGE_TYPE_ANON); -} - -int mem_cgroup_charge_file(struct page *page, struct mm_struct *mm, - gfp_t gfp_mask) -{ - enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE; - struct mem_cgroup *memcg; - int ret; - - if (mem_cgroup_disabled()) - return 0; - if (PageCompound(page)) - return 0; - - if (PageSwapCache(page)) { /* shmem */ - ret = __mem_cgroup_try_charge_swapin(mm, page, - gfp_mask, &memcg); - if (ret) - return ret; - __mem_cgroup_commit_charge_swapin(page, memcg, type); - return 0; - } - - memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1); - if (!memcg) - return -ENOMEM; - __mem_cgroup_commit_charge(memcg, page, 1, type, false); - return 0; -} - static void mem_cgroup_do_uncharge(struct mem_cgroup *memcg, unsigned int nr_pages, const enum charge_type ctype) @@ -4122,7 +3912,6 @@ void mem_cgroup_prepare_migration(struct page *page, struct page *newpage, struct mem_cgroup *memcg = NULL; unsigned int nr_pages = 1; struct page_cgroup *pc; - enum charge_type ctype; *memcgp = NULL; @@ -4184,16 +3973,12 @@ void mem_cgroup_prepare_migration(struct page *page, struct page *newpage, * page. In the case new page is migrated but not remapped, new page's * mapcount will be finally 0 and we call uncharge in end_migration(). */ - if (PageAnon(page)) - ctype = MEM_CGROUP_CHARGE_TYPE_ANON; - else - ctype = MEM_CGROUP_CHARGE_TYPE_CACHE; /* * The page is committed to the memcg, but it's not actually * charged to the res_counter since we plan on replacing the * old one and only one page is going to be left afterwards. */ - __mem_cgroup_commit_charge(memcg, newpage, nr_pages, ctype, false); + commit_charge(newpage, memcg, nr_pages, PageAnon(page), false); } /* remove redundant charge if migration failed*/ @@ -4252,7 +4037,6 @@ void mem_cgroup_replace_page_cache(struct page *oldpage, { struct mem_cgroup *memcg = NULL; struct page_cgroup *pc; - enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE; if (mem_cgroup_disabled()) return; @@ -4278,7 +4062,7 @@ void mem_cgroup_replace_page_cache(struct page *oldpage, * the newpage may be on LRU(or pagevec for LRU) already. We lock * LRU while we overwrite pc->mem_cgroup. */ - __mem_cgroup_commit_charge(memcg, newpage, 1, type, true); + commit_charge(newpage, memcg, 1, false, true); } #ifdef CONFIG_DEBUG_VM @@ -6319,20 +6103,19 @@ static int mem_cgroup_do_precharge(unsigned long count) int ret; /* Try a single bulk charge without reclaim first */ - ret = mem_cgroup_try_charge(mc.to, GFP_KERNEL & ~__GFP_WAIT, count); + ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_WAIT, count); if (!ret) { mc.precharge += count; return ret; } if (ret == -EINTR) { - __mem_cgroup_cancel_charge(root_mem_cgroup, count); + cancel_charge(root_mem_cgroup, count); return ret; } /* Try charges one by one with reclaim */ while (count--) { - ret = mem_cgroup_try_charge(mc.to, - GFP_KERNEL & ~__GFP_NORETRY, 1); + ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_NORETRY, 1); /* * In case of failure, any residual charges against * mc.to will be dropped by mem_cgroup_clear_mc() @@ -6340,7 +6123,7 @@ static int mem_cgroup_do_precharge(unsigned long count) * bypassed to root right away or they'll be lost. */ if (ret == -EINTR) - __mem_cgroup_cancel_charge(root_mem_cgroup, 1); + cancel_charge(root_mem_cgroup, 1); if (ret) return ret; mc.precharge++; @@ -6609,7 +6392,7 @@ static void __mem_cgroup_clear_mc(void) /* we must uncharge all the leftover precharges from mc.to */ if (mc.precharge) { - __mem_cgroup_cancel_charge(mc.to, mc.precharge); + cancel_charge(mc.to, mc.precharge); mc.precharge = 0; } /* @@ -6617,7 +6400,7 @@ static void __mem_cgroup_clear_mc(void) * we must uncharge here. */ if (mc.moved_charge) { - __mem_cgroup_cancel_charge(mc.from, mc.moved_charge); + cancel_charge(mc.from, mc.moved_charge); mc.moved_charge = 0; } /* we must fixup refcnts and charges */ @@ -6946,6 +6729,150 @@ static void __init enable_swap_cgroup(void) } #endif +/** + * mem_cgroup_try_charge - try charging a page + * @page: page to charge + * @mm: mm context of the victim + * @gfp_mask: reclaim mode + * @memcgp: charged memcg return + * + * Try to charge @page to the memcg that @mm belongs to, reclaiming + * pages according to @gfp_mask if necessary. + * + * Returns 0 on success, with *@memcgp pointing to the charged memcg. + * Otherwise, an error code is returned. + * + * After page->mapping has been set up, the caller must finalize the + * charge with mem_cgroup_commit_charge(). Or abort the transaction + * with mem_cgroup_cancel_charge() in case page instantiation fails. + */ +int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm, + gfp_t gfp_mask, struct mem_cgroup **memcgp) +{ + struct mem_cgroup *memcg = NULL; + unsigned int nr_pages = 1; + int ret = 0; + + if (mem_cgroup_disabled()) + goto out; + + if (PageSwapCache(page)) { + struct page_cgroup *pc = lookup_page_cgroup(page); + /* + * Every swap fault against a single page tries to charge the + * page, bail as early as possible. shmem_unuse() encounters + * already charged pages, too. The USED bit is protected by + * the page lock, which serializes swap cache removal, which + * in turn serializes uncharging. + */ + if (PageCgroupUsed(pc)) + goto out; + } + + if (PageTransHuge(page)) { + nr_pages <<= compound_order(page); + VM_BUG_ON_PAGE(!PageTransHuge(page), page); + } + + if (do_swap_account && PageSwapCache(page)) + memcg = try_get_mem_cgroup_from_page(page); + if (!memcg) + memcg = get_mem_cgroup_from_mm(mm); + + ret = try_charge(memcg, gfp_mask, nr_pages); + + css_put(&memcg->css); + + if (ret == -EINTR) { + memcg = root_mem_cgroup; + ret = 0; + } +out: + *memcgp = memcg; + return ret; +} + +/** + * mem_cgroup_commit_charge - commit a page charge + * @page: page to charge + * @memcg: memcg to charge the page to + * @lrucare: page might be on LRU already + * + * Finalize a charge transaction started by mem_cgroup_try_charge(), + * after page->mapping has been set up. This must happen atomically + * as part of the page instantiation, i.e. under the page table lock + * for anonymous pages, under the page lock for page and swap cache. + * + * In addition, the page must not be on the LRU during the commit, to + * prevent racing with task migration. If it might be, use @lrucare. + * + * Use mem_cgroup_cancel_charge() to cancel the transaction instead. + */ +void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg, + bool lrucare) +{ + unsigned int nr_pages = 1; + + VM_BUG_ON_PAGE(!page->mapping, page); + VM_BUG_ON_PAGE(PageLRU(page) && !lrucare, page); + + if (mem_cgroup_disabled()) + return; + /* + * Swap faults will attempt to charge the same page multiple + * times. But reuse_swap_page() might have removed the page + * from swapcache already, so we can't check PageSwapCache(). + */ + if (!memcg) + return; + + if (PageTransHuge(page)) { + nr_pages <<= compound_order(page); + VM_BUG_ON_PAGE(!PageTransHuge(page), page); + } + + commit_charge(page, memcg, nr_pages, PageAnon(page), lrucare); + + if (do_swap_account && PageSwapCache(page)) { + swp_entry_t entry = { .val = page_private(page) }; + /* + * The swap entry might not get freed for a long time, + * let's not wait for it. The page already received a + * memory+swap charge, drop the swap entry duplicate. + */ + mem_cgroup_uncharge_swap(entry); + } +} + +/** + * mem_cgroup_cancel_charge - cancel a page charge + * @page: page to charge + * @memcg: memcg to charge the page to + * + * Cancel a charge transaction started by mem_cgroup_try_charge(). + */ +void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg) +{ + unsigned int nr_pages = 1; + + if (mem_cgroup_disabled()) + return; + /* + * Swap faults will attempt to charge the same page multiple + * times. But reuse_swap_page() might have removed the page + * from swapcache already, so we can't check PageSwapCache(). + */ + if (!memcg) + return; + + if (PageTransHuge(page)) { + nr_pages <<= compound_order(page); + VM_BUG_ON_PAGE(!PageTransHuge(page), page); + } + + cancel_charge(memcg, nr_pages); +} + /* * subsys_initcall() for memory controller. * diff --git a/mm/memory.c b/mm/memory.c index 5c55270729f7..6d7648773dc4 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2049,6 +2049,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, struct page *dirty_page = NULL; unsigned long mmun_start = 0; /* For mmu_notifiers */ unsigned long mmun_end = 0; /* For mmu_notifiers */ + struct mem_cgroup *memcg; old_page = vm_normal_page(vma, address, orig_pte); if (!old_page) { @@ -2204,7 +2205,7 @@ gotten: } __SetPageUptodate(new_page); - if (mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL)) + if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg)) goto oom_free_new; mmun_start = address & PAGE_MASK; @@ -2234,6 +2235,8 @@ gotten: */ ptep_clear_flush(vma, address, page_table); page_add_new_anon_rmap(new_page, vma, address); + mem_cgroup_commit_charge(new_page, memcg, false); + lru_cache_add_active_or_unevictable(new_page, vma); /* * We call the notify macro here because, when using secondary * mmu page tables (such as kvm shadow page tables), we want the @@ -2271,7 +2274,7 @@ gotten: new_page = old_page; ret |= VM_FAULT_WRITE; } else - mem_cgroup_uncharge_page(new_page); + mem_cgroup_cancel_charge(new_page, memcg); if (new_page) page_cache_release(new_page); @@ -2410,10 +2413,10 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, { spinlock_t *ptl; struct page *page, *swapcache; + struct mem_cgroup *memcg; swp_entry_t entry; pte_t pte; int locked; - struct mem_cgroup *ptr; int exclusive = 0; int ret = 0; @@ -2489,7 +2492,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, goto out_page; } - if (mem_cgroup_try_charge_swapin(mm, page, GFP_KERNEL, &ptr)) { + if (mem_cgroup_try_charge(page, mm, GFP_KERNEL, &memcg)) { ret = VM_FAULT_OOM; goto out_page; } @@ -2514,10 +2517,6 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, * while the page is counted on swap but not yet in mapcount i.e. * before page_add_anon_rmap() and swap_free(); try_to_free_swap() * must be called after the swap_free(), or it will never succeed. - * Because delete_from_swap_page() may be called by reuse_swap_page(), - * mem_cgroup_commit_charge_swapin() may not be able to find swp_entry - * in page->private. In this case, a record in swap_cgroup is silently - * discarded at swap_free(). */ inc_mm_counter_fast(mm, MM_ANONPAGES); @@ -2533,12 +2532,14 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, if (pte_swp_soft_dirty(orig_pte)) pte = pte_mksoft_dirty(pte); set_pte_at(mm, address, page_table, pte); - if (page == swapcache) + if (page == swapcache) { do_page_add_anon_rmap(page, vma, address, exclusive); - else /* ksm created a completely new copy */ + mem_cgroup_commit_charge(page, memcg, true); + } else { /* ksm created a completely new copy */ page_add_new_anon_rmap(page, vma, address); - /* It's better to call commit-charge after rmap is established */ - mem_cgroup_commit_charge_swapin(page, ptr); + mem_cgroup_commit_charge(page, memcg, false); + lru_cache_add_active_or_unevictable(page, vma); + } swap_free(entry); if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page)) @@ -2571,7 +2572,7 @@ unlock: out: return ret; out_nomap: - mem_cgroup_cancel_charge_swapin(ptr); + mem_cgroup_cancel_charge(page, memcg); pte_unmap_unlock(page_table, ptl); out_page: unlock_page(page); @@ -2627,6 +2628,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, pte_t *page_table, pmd_t *pmd, unsigned int flags) { + struct mem_cgroup *memcg; struct page *page; spinlock_t *ptl; pte_t entry; @@ -2660,7 +2662,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, */ __SetPageUptodate(page); - if (mem_cgroup_charge_anon(page, mm, GFP_KERNEL)) + if (mem_cgroup_try_charge(page, mm, GFP_KERNEL, &memcg)) goto oom_free_page; entry = mk_pte(page, vma->vm_page_prot); @@ -2673,6 +2675,8 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, inc_mm_counter_fast(mm, MM_ANONPAGES); page_add_new_anon_rmap(page, vma, address); + mem_cgroup_commit_charge(page, memcg, false); + lru_cache_add_active_or_unevictable(page, vma); setpte: set_pte_at(mm, address, page_table, entry); @@ -2682,7 +2686,7 @@ unlock: pte_unmap_unlock(page_table, ptl); return 0; release: - mem_cgroup_uncharge_page(page); + mem_cgroup_cancel_charge(page, memcg); page_cache_release(page); goto unlock; oom_free_page: @@ -2919,6 +2923,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma, pgoff_t pgoff, unsigned int flags, pte_t orig_pte) { struct page *fault_page, *new_page; + struct mem_cgroup *memcg; spinlock_t *ptl; pte_t *pte; int ret; @@ -2930,7 +2935,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma, if (!new_page) return VM_FAULT_OOM; - if (mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL)) { + if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg)) { page_cache_release(new_page); return VM_FAULT_OOM; } @@ -2950,12 +2955,14 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma, goto uncharge_out; } do_set_pte(vma, address, new_page, pte, true, true); + mem_cgroup_commit_charge(new_page, memcg, false); + lru_cache_add_active_or_unevictable(new_page, vma); pte_unmap_unlock(pte, ptl); unlock_page(fault_page); page_cache_release(fault_page); return ret; uncharge_out: - mem_cgroup_uncharge_page(new_page); + mem_cgroup_cancel_charge(new_page, memcg); page_cache_release(new_page); return ret; } diff --git a/mm/rmap.c b/mm/rmap.c index 22a4a7699cdb..f56b5ed78128 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1032,25 +1032,6 @@ void page_add_new_anon_rmap(struct page *page, __mod_zone_page_state(page_zone(page), NR_ANON_PAGES, hpage_nr_pages(page)); __page_set_anon_rmap(page, vma, address, 1); - - VM_BUG_ON_PAGE(PageLRU(page), page); - if (likely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) != VM_LOCKED)) { - SetPageActive(page); - lru_cache_add(page); - return; - } - - if (!TestSetPageMlocked(page)) { - /* - * We use the irq-unsafe __mod_zone_page_stat because this - * counter is not modified from interrupt context, and the pte - * lock is held(spinlock), which implies preemption disabled. - */ - __mod_zone_page_state(page_zone(page), NR_MLOCK, - hpage_nr_pages(page)); - count_vm_event(UNEVICTABLE_PGMLOCKED); - } - add_page_to_unevictable_list(page); } /** diff --git a/mm/shmem.c b/mm/shmem.c index 302d1cf7ad07..1f1a8085538b 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -621,7 +621,7 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, radswap = swp_to_radix_entry(swap); index = radix_tree_locate_item(&mapping->page_tree, radswap); if (index == -1) - return 0; + return -EAGAIN; /* tell shmem_unuse we found nothing */ /* * Move _head_ to start search for next from here. @@ -680,7 +680,6 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, spin_unlock(&info->lock); swap_free(swap); } - error = 1; /* not an error, but entry was found */ } return error; } @@ -692,7 +691,7 @@ int shmem_unuse(swp_entry_t swap, struct page *page) { struct list_head *this, *next; struct shmem_inode_info *info; - int found = 0; + struct mem_cgroup *memcg; int error = 0; /* @@ -707,26 +706,32 @@ int shmem_unuse(swp_entry_t swap, struct page *page) * the shmem_swaplist_mutex which might hold up shmem_writepage(). * Charged back to the user (not to caller) when swap account is used. */ - error = mem_cgroup_charge_file(page, current->mm, GFP_KERNEL); + error = mem_cgroup_try_charge(page, current->mm, GFP_KERNEL, &memcg); if (error) goto out; /* No radix_tree_preload: swap entry keeps a place for page in tree */ + error = -EAGAIN; mutex_lock(&shmem_swaplist_mutex); list_for_each_safe(this, next, &shmem_swaplist) { info = list_entry(this, struct shmem_inode_info, swaplist); if (info->swapped) - found = shmem_unuse_inode(info, swap, &page); + error = shmem_unuse_inode(info, swap, &page); else list_del_init(&info->swaplist); cond_resched(); - if (found) + if (error != -EAGAIN) break; + /* found nothing in this: move on to search the next */ } mutex_unlock(&shmem_swaplist_mutex); - if (found < 0) - error = found; + if (error) { + if (error != -ENOMEM) + error = 0; + mem_cgroup_cancel_charge(page, memcg); + } else + mem_cgroup_commit_charge(page, memcg, true); out: unlock_page(page); page_cache_release(page); @@ -1030,6 +1035,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, struct address_space *mapping = inode->i_mapping; struct shmem_inode_info *info; struct shmem_sb_info *sbinfo; + struct mem_cgroup *memcg; struct page *page; swp_entry_t swap; int error; @@ -1108,8 +1114,7 @@ repeat: goto failed; } - error = mem_cgroup_charge_file(page, current->mm, - gfp & GFP_RECLAIM_MASK); + error = mem_cgroup_try_charge(page, current->mm, gfp, &memcg); if (!error) { error = shmem_add_to_page_cache(page, mapping, index, swp_to_radix_entry(swap)); @@ -1125,12 +1130,16 @@ repeat: * Reset swap.val? No, leave it so "failed" goes back to * "repeat": reading a hole and writing should succeed. */ - if (error) + if (error) { + mem_cgroup_cancel_charge(page, memcg); delete_from_swap_cache(page); + } } if (error) goto failed; + mem_cgroup_commit_charge(page, memcg, true); + spin_lock(&info->lock); info->swapped--; shmem_recalc_inode(inode); @@ -1168,8 +1177,7 @@ repeat: if (sgp == SGP_WRITE) __SetPageReferenced(page); - error = mem_cgroup_charge_file(page, current->mm, - gfp & GFP_RECLAIM_MASK); + error = mem_cgroup_try_charge(page, current->mm, gfp, &memcg); if (error) goto decused; error = radix_tree_maybe_preload(gfp & GFP_RECLAIM_MASK); @@ -1179,9 +1187,10 @@ repeat: radix_tree_preload_end(); } if (error) { - mem_cgroup_uncharge_cache_page(page); + mem_cgroup_cancel_charge(page, memcg); goto decused; } + mem_cgroup_commit_charge(page, memcg, false); lru_cache_add_anon(page); spin_lock(&info->lock); diff --git a/mm/swap.c b/mm/swap.c index c789d01c9ec3..3baca701bb78 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -687,6 +687,40 @@ void add_page_to_unevictable_list(struct page *page) spin_unlock_irq(&zone->lru_lock); } +/** + * lru_cache_add_active_or_unevictable + * @page: the page to be added to LRU + * @vma: vma in which page is mapped for determining reclaimability + * + * Place @page on the active or unevictable LRU list, depending on its + * evictability. Note that if the page is not evictable, it goes + * directly back onto it's zone's unevictable list, it does NOT use a + * per cpu pagevec. + */ +void lru_cache_add_active_or_unevictable(struct page *page, + struct vm_area_struct *vma) +{ + VM_BUG_ON_PAGE(PageLRU(page), page); + + if (likely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) != VM_LOCKED)) { + SetPageActive(page); + lru_cache_add(page); + return; + } + + if (!TestSetPageMlocked(page)) { + /* + * We use the irq-unsafe __mod_zone_page_stat because this + * counter is not modified from interrupt context, and the pte + * lock is held(spinlock), which implies preemption disabled. + */ + __mod_zone_page_state(page_zone(page), NR_MLOCK, + hpage_nr_pages(page)); + count_vm_event(UNEVICTABLE_PGMLOCKED); + } + add_page_to_unevictable_list(page); +} + /* * If the page can not be invalidated, it is moved to the * inactive list to speed up its reclaim. It is moved to the diff --git a/mm/swapfile.c b/mm/swapfile.c index 4c524f7bd0bf..0883b4912ff7 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1106,15 +1106,14 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, if (unlikely(!page)) return -ENOMEM; - if (mem_cgroup_try_charge_swapin(vma->vm_mm, page, - GFP_KERNEL, &memcg)) { + if (mem_cgroup_try_charge(page, vma->vm_mm, GFP_KERNEL, &memcg)) { ret = -ENOMEM; goto out_nolock; } pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); if (unlikely(!maybe_same_pte(*pte, swp_entry_to_pte(entry)))) { - mem_cgroup_cancel_charge_swapin(memcg); + mem_cgroup_cancel_charge(page, memcg); ret = 0; goto out; } @@ -1124,11 +1123,14 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, get_page(page); set_pte_at(vma->vm_mm, addr, pte, pte_mkold(mk_pte(page, vma->vm_page_prot))); - if (page == swapcache) + if (page == swapcache) { page_add_anon_rmap(page, vma, addr); - else /* ksm created a completely new copy */ + mem_cgroup_commit_charge(page, memcg, true); + } else { /* ksm created a completely new copy */ page_add_new_anon_rmap(page, vma, addr); - mem_cgroup_commit_charge_swapin(page, memcg); + mem_cgroup_commit_charge(page, memcg, false); + lru_cache_add_active_or_unevictable(page, vma); + } swap_free(entry); /* * Move the page to the active list so it is not -- cgit v1.2.3 From 0a31bc97c80c3fa87b32c091d9a930ac19cd0c40 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Fri, 8 Aug 2014 14:19:22 -0700 Subject: mm: memcontrol: rewrite uncharge API The memcg uncharging code that is involved towards the end of a page's lifetime - truncation, reclaim, swapout, migration - is impressively complicated and fragile. Because anonymous and file pages were always charged before they had their page->mapping established, uncharges had to happen when the page type could still be known from the context; as in unmap for anonymous, page cache removal for file and shmem pages, and swap cache truncation for swap pages. However, these operations happen well before the page is actually freed, and so a lot of synchronization is necessary: - Charging, uncharging, page migration, and charge migration all need to take a per-page bit spinlock as they could race with uncharging. - Swap cache truncation happens during both swap-in and swap-out, and possibly repeatedly before the page is actually freed. This means that the memcg swapout code is called from many contexts that make no sense and it has to figure out the direction from page state to make sure memory and memory+swap are always correctly charged. - On page migration, the old page might be unmapped but then reused, so memcg code has to prevent untimely uncharging in that case. Because this code - which should be a simple charge transfer - is so special-cased, it is not reusable for replace_page_cache(). But now that charged pages always have a page->mapping, introduce mem_cgroup_uncharge(), which is called after the final put_page(), when we know for sure that nobody is looking at the page anymore. For page migration, introduce mem_cgroup_migrate(), which is called after the migration is successful and the new page is fully rmapped. Because the old page is no longer uncharged after migration, prevent double charges by decoupling the page's memcg association (PCG_USED and pc->mem_cgroup) from the page holding an actual charge. The new bits PCG_MEM and PCG_MEMSW represent the respective charges and are transferred to the new page during migration. mem_cgroup_migrate() is suitable for replace_page_cache() as well, which gets rid of mem_cgroup_replace_page_cache(). However, care needs to be taken because both the source and the target page can already be charged and on the LRU when fuse is splicing: grab the page lock on the charge moving side to prevent changing pc->mem_cgroup of a page under migration. Also, the lruvecs of both pages change as we uncharge the old and charge the new during migration, and putback may race with us, so grab the lru lock and isolate the pages iff on LRU to prevent races and ensure the pages are on the right lruvec afterward. Swap accounting is massively simplified: because the page is no longer uncharged as early as swap cache deletion, a new mem_cgroup_swapout() can transfer the page's memory+swap charge (PCG_MEMSW) to the swap entry before the final put_page() in page reclaim. Finally, page_cgroup changes are now protected by whatever protection the page itself offers: anonymous pages are charged under the page table lock, whereas page cache insertions, swapin, and migration hold the page lock. Uncharging happens under full exclusion with no outstanding references. Charging and uncharging also ensure that the page is off-LRU, which serializes against charge migration. Remove the very costly page_cgroup lock and set pc->flags non-atomically. [mhocko@suse.cz: mem_cgroup_charge_statistics needs preempt_disable] [vdavydov@parallels.com: fix flags definition] Signed-off-by: Johannes Weiner Cc: Hugh Dickins Cc: Tejun Heo Cc: Vladimir Davydov Tested-by: Jet Chen Acked-by: Michal Hocko Tested-by: Felipe Balbi Signed-off-by: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/cgroups/memcg_test.txt | 128 +----- include/linux/memcontrol.h | 49 +-- include/linux/page_cgroup.h | 43 +- include/linux/swap.h | 12 +- mm/filemap.c | 4 +- mm/memcontrol.c | 828 ++++++++++++++--------------------- mm/memory.c | 2 - mm/migrate.c | 38 +- mm/rmap.c | 1 - mm/shmem.c | 8 +- mm/swap.c | 6 + mm/swap_state.c | 8 +- mm/swapfile.c | 7 +- mm/truncate.c | 9 - mm/vmscan.c | 12 +- mm/zswap.c | 2 +- 16 files changed, 389 insertions(+), 768 deletions(-) (limited to 'include/linux') diff --git a/Documentation/cgroups/memcg_test.txt b/Documentation/cgroups/memcg_test.txt index bcf750d3cecd..8870b0212150 100644 --- a/Documentation/cgroups/memcg_test.txt +++ b/Documentation/cgroups/memcg_test.txt @@ -29,28 +29,13 @@ Please note that implementation details can be changed. 2. Uncharge a page/swp_entry may be uncharged (usage -= PAGE_SIZE) by - mem_cgroup_uncharge_page() - Called when an anonymous page is fully unmapped. I.e., mapcount goes - to 0. If the page is SwapCache, uncharge is delayed until - mem_cgroup_uncharge_swapcache(). - - mem_cgroup_uncharge_cache_page() - Called when a page-cache is deleted from radix-tree. If the page is - SwapCache, uncharge is delayed until mem_cgroup_uncharge_swapcache(). - - mem_cgroup_uncharge_swapcache() - Called when SwapCache is removed from radix-tree. The charge itself - is moved to swap_cgroup. (If mem+swap controller is disabled, no - charge to swap occurs.) + mem_cgroup_uncharge() + Called when a page's refcount goes down to 0. mem_cgroup_uncharge_swap() Called when swp_entry's refcnt goes down to 0. A charge against swap disappears. - mem_cgroup_end_migration(old, new) - At success of migration old is uncharged (if necessary), a charge - to new page is committed. At failure, charge to old page is committed. - 3. charge-commit-cancel Memcg pages are charged in two steps: mem_cgroup_try_charge() @@ -69,18 +54,6 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y. Anonymous page is newly allocated at - page fault into MAP_ANONYMOUS mapping. - Copy-On-Write. - It is charged right after it's allocated before doing any page table - related operations. Of course, it's uncharged when another page is used - for the fault address. - - At freeing anonymous page (by exit() or munmap()), zap_pte() is called - and pages for ptes are freed one by one.(see mm/memory.c). Uncharges - are done at page_remove_rmap() when page_mapcount() goes down to 0. - - Another page freeing is by page-reclaim (vmscan.c) and anonymous - pages are swapped out. In this case, the page is marked as - PageSwapCache(). uncharge() routine doesn't uncharge the page marked - as SwapCache(). It's delayed until __delete_from_swap_cache(). 4.1 Swap-in. At swap-in, the page is taken from swap-cache. There are 2 cases. @@ -89,41 +62,6 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y. (b) If the SwapCache has been mapped by processes, it has been charged already. - This swap-in is one of the most complicated work. In do_swap_page(), - following events occur when pte is unchanged. - - (1) the page (SwapCache) is looked up. - (2) lock_page() - (3) try_charge_swapin() - (4) reuse_swap_page() (may call delete_swap_cache()) - (5) commit_charge_swapin() - (6) swap_free(). - - Considering following situation for example. - - (A) The page has not been charged before (2) and reuse_swap_page() - doesn't call delete_from_swap_cache(). - (B) The page has not been charged before (2) and reuse_swap_page() - calls delete_from_swap_cache(). - (C) The page has been charged before (2) and reuse_swap_page() doesn't - call delete_from_swap_cache(). - (D) The page has been charged before (2) and reuse_swap_page() calls - delete_from_swap_cache(). - - memory.usage/memsw.usage changes to this page/swp_entry will be - Case (A) (B) (C) (D) - Event - Before (2) 0/ 1 0/ 1 1/ 1 1/ 1 - =========================================== - (3) +1/+1 +1/+1 +1/+1 +1/+1 - (4) - 0/ 0 - -1/ 0 - (5) 0/-1 0/ 0 -1/-1 0/ 0 - (6) - 0/-1 - 0/-1 - =========================================== - Result 1/ 1 1/ 1 1/ 1 1/ 1 - - In any cases, charges to this page should be 1/ 1. - 4.2 Swap-out. At swap-out, typical state transition is below. @@ -136,28 +74,20 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y. swp_entry's refcnt -= 1. - At (b), the page is marked as SwapCache and not uncharged. - At (d), the page is removed from SwapCache and a charge in page_cgroup - is moved to swap_cgroup. - Finally, at task exit, (e) zap_pte() is called and swp_entry's refcnt -=1 -> 0. - Here, a charge in swap_cgroup disappears. 5. Page Cache Page Cache is charged at - add_to_page_cache_locked(). - uncharged at - - __remove_from_page_cache(). - The logic is very clear. (About migration, see below) Note: __remove_from_page_cache() is called by remove_from_page_cache() and __remove_mapping(). 6. Shmem(tmpfs) Page Cache - Memcg's charge/uncharge have special handlers of shmem. The best way - to understand shmem's page state transition is to read mm/shmem.c. + The best way to understand shmem's page state transition is to read + mm/shmem.c. But brief explanation of the behavior of memcg around shmem will be helpful to understand the logic. @@ -170,56 +100,10 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y. It's charged when... - A new page is added to shmem's radix-tree. - A swp page is read. (move a charge from swap_cgroup to page_cgroup) - It's uncharged when - - A page is removed from radix-tree and not SwapCache. - - When SwapCache is removed, a charge is moved to swap_cgroup. - - When swp_entry's refcnt goes down to 0, a charge in swap_cgroup - disappears. 7. Page Migration - One of the most complicated functions is page-migration-handler. - Memcg has 2 routines. Assume that we are migrating a page's contents - from OLDPAGE to NEWPAGE. - - Usual migration logic is.. - (a) remove the page from LRU. - (b) allocate NEWPAGE (migration target) - (c) lock by lock_page(). - (d) unmap all mappings. - (e-1) If necessary, replace entry in radix-tree. - (e-2) move contents of a page. - (f) map all mappings again. - (g) pushback the page to LRU. - (-) OLDPAGE will be freed. - - Before (g), memcg should complete all necessary charge/uncharge to - NEWPAGE/OLDPAGE. - - The point is.... - - If OLDPAGE is anonymous, all charges will be dropped at (d) because - try_to_unmap() drops all mapcount and the page will not be - SwapCache. - - - If OLDPAGE is SwapCache, charges will be kept at (g) because - __delete_from_swap_cache() isn't called at (e-1) - - - If OLDPAGE is page-cache, charges will be kept at (g) because - remove_from_swap_cache() isn't called at (e-1) - - memcg provides following hooks. - - - mem_cgroup_prepare_migration(OLDPAGE) - Called after (b) to account a charge (usage += PAGE_SIZE) against - memcg which OLDPAGE belongs to. - - - mem_cgroup_end_migration(OLDPAGE, NEWPAGE) - Called after (f) before (g). - If OLDPAGE is used, commit OLDPAGE again. If OLDPAGE is already - charged, a charge by prepare_migration() is automatically canceled. - If NEWPAGE is used, commit NEWPAGE and uncharge OLDPAGE. - - But zap_pte() (by exit or munmap) can be called while migration, - we have to check if OLDPAGE/NEWPAGE is a valid page after commit(). + + mem_cgroup_migrate() 8. LRU Each memcg has its own private LRU. Now, its handling is under global diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 1a9a096858e0..806b8fa15c5f 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -60,15 +60,17 @@ void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg, bool lrucare); void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg); -struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *); -struct lruvec *mem_cgroup_page_lruvec(struct page *, struct zone *); +void mem_cgroup_uncharge(struct page *page); + +/* Batched uncharging */ +void mem_cgroup_uncharge_start(void); +void mem_cgroup_uncharge_end(void); -/* For coalescing uncharge for reducing memcg' overhead*/ -extern void mem_cgroup_uncharge_start(void); -extern void mem_cgroup_uncharge_end(void); +void mem_cgroup_migrate(struct page *oldpage, struct page *newpage, + bool lrucare); -extern void mem_cgroup_uncharge_page(struct page *page); -extern void mem_cgroup_uncharge_cache_page(struct page *page); +struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *); +struct lruvec *mem_cgroup_page_lruvec(struct page *, struct zone *); bool __mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg, struct mem_cgroup *memcg); @@ -96,12 +98,6 @@ bool mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *memcg) extern struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *memcg); -extern void -mem_cgroup_prepare_migration(struct page *page, struct page *newpage, - struct mem_cgroup **memcgp); -extern void mem_cgroup_end_migration(struct mem_cgroup *memcg, - struct page *oldpage, struct page *newpage, bool migration_ok); - struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *, struct mem_cgroup *, struct mem_cgroup_reclaim_cookie *); @@ -116,8 +112,6 @@ unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list); void mem_cgroup_update_lru_size(struct lruvec *, enum lru_list, int); extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p); -extern void mem_cgroup_replace_page_cache(struct page *oldpage, - struct page *newpage); static inline void mem_cgroup_oom_enable(void) { @@ -235,19 +229,21 @@ static inline void mem_cgroup_cancel_charge(struct page *page, { } -static inline void mem_cgroup_uncharge_start(void) +static inline void mem_cgroup_uncharge(struct page *page) { } -static inline void mem_cgroup_uncharge_end(void) +static inline void mem_cgroup_uncharge_start(void) { } -static inline void mem_cgroup_uncharge_page(struct page *page) +static inline void mem_cgroup_uncharge_end(void) { } -static inline void mem_cgroup_uncharge_cache_page(struct page *page) +static inline void mem_cgroup_migrate(struct page *oldpage, + struct page *newpage, + bool lrucare) { } @@ -286,17 +282,6 @@ static inline struct cgroup_subsys_state return NULL; } -static inline void -mem_cgroup_prepare_migration(struct page *page, struct page *newpage, - struct mem_cgroup **memcgp) -{ -} - -static inline void mem_cgroup_end_migration(struct mem_cgroup *memcg, - struct page *oldpage, struct page *newpage, bool migration_ok) -{ -} - static inline struct mem_cgroup * mem_cgroup_iter(struct mem_cgroup *root, struct mem_cgroup *prev, @@ -392,10 +377,6 @@ static inline void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx) { } -static inline void mem_cgroup_replace_page_cache(struct page *oldpage, - struct page *newpage) -{ -} #endif /* CONFIG_MEMCG */ #if !defined(CONFIG_MEMCG) || !defined(CONFIG_DEBUG_VM) diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h index 777a524716db..9bfb8e68a595 100644 --- a/include/linux/page_cgroup.h +++ b/include/linux/page_cgroup.h @@ -3,9 +3,9 @@ enum { /* flags for mem_cgroup */ - PCG_LOCK, /* Lock for pc->mem_cgroup and following bits. */ - PCG_USED, /* this object is in use. */ - PCG_MIGRATION, /* under page migration */ + PCG_USED = 0x01, /* This page is charged to a memcg */ + PCG_MEM = 0x02, /* This page holds a memory charge */ + PCG_MEMSW = 0x04, /* This page holds a memory+swap charge */ __NR_PCG_FLAGS, }; @@ -44,42 +44,9 @@ static inline void __init page_cgroup_init(void) struct page_cgroup *lookup_page_cgroup(struct page *page); struct page *lookup_cgroup_page(struct page_cgroup *pc); -#define TESTPCGFLAG(uname, lname) \ -static inline int PageCgroup##uname(struct page_cgroup *pc) \ - { return test_bit(PCG_##lname, &pc->flags); } - -#define SETPCGFLAG(uname, lname) \ -static inline void SetPageCgroup##uname(struct page_cgroup *pc)\ - { set_bit(PCG_##lname, &pc->flags); } - -#define CLEARPCGFLAG(uname, lname) \ -static inline void ClearPageCgroup##uname(struct page_cgroup *pc) \ - { clear_bit(PCG_##lname, &pc->flags); } - -#define TESTCLEARPCGFLAG(uname, lname) \ -static inline int TestClearPageCgroup##uname(struct page_cgroup *pc) \ - { return test_and_clear_bit(PCG_##lname, &pc->flags); } - -TESTPCGFLAG(Used, USED) -CLEARPCGFLAG(Used, USED) -SETPCGFLAG(Used, USED) - -SETPCGFLAG(Migration, MIGRATION) -CLEARPCGFLAG(Migration, MIGRATION) -TESTPCGFLAG(Migration, MIGRATION) - -static inline void lock_page_cgroup(struct page_cgroup *pc) -{ - /* - * Don't take this lock in IRQ context. - * This lock is for pc->mem_cgroup, USED, MIGRATION - */ - bit_spin_lock(PCG_LOCK, &pc->flags); -} - -static inline void unlock_page_cgroup(struct page_cgroup *pc) +static inline int PageCgroupUsed(struct page_cgroup *pc) { - bit_spin_unlock(PCG_LOCK, &pc->flags); + return !!(pc->flags & PCG_USED); } #else /* CONFIG_MEMCG */ diff --git a/include/linux/swap.h b/include/linux/swap.h index 46a649e4e8cd..1b72060f093a 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -381,9 +381,13 @@ static inline int mem_cgroup_swappiness(struct mem_cgroup *mem) } #endif #ifdef CONFIG_MEMCG_SWAP -extern void mem_cgroup_uncharge_swap(swp_entry_t ent); +extern void mem_cgroup_swapout(struct page *page, swp_entry_t entry); +extern void mem_cgroup_uncharge_swap(swp_entry_t entry); #else -static inline void mem_cgroup_uncharge_swap(swp_entry_t ent) +static inline void mem_cgroup_swapout(struct page *page, swp_entry_t entry) +{ +} +static inline void mem_cgroup_uncharge_swap(swp_entry_t entry) { } #endif @@ -443,7 +447,7 @@ extern void swap_shmem_alloc(swp_entry_t); extern int swap_duplicate(swp_entry_t); extern int swapcache_prepare(swp_entry_t); extern void swap_free(swp_entry_t); -extern void swapcache_free(swp_entry_t, struct page *page); +extern void swapcache_free(swp_entry_t); extern int free_swap_and_cache(swp_entry_t); extern int swap_type_of(dev_t, sector_t, struct block_device **); extern unsigned int count_swap_pages(int, int); @@ -507,7 +511,7 @@ static inline void swap_free(swp_entry_t swp) { } -static inline void swapcache_free(swp_entry_t swp, struct page *page) +static inline void swapcache_free(swp_entry_t swp) { } diff --git a/mm/filemap.c b/mm/filemap.c index 349a40e35545..f501b56ec2c6 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -234,7 +234,6 @@ void delete_from_page_cache(struct page *page) spin_lock_irq(&mapping->tree_lock); __delete_from_page_cache(page, NULL); spin_unlock_irq(&mapping->tree_lock); - mem_cgroup_uncharge_cache_page(page); if (freepage) freepage(page); @@ -490,8 +489,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask) if (PageSwapBacked(new)) __inc_zone_page_state(new, NR_SHMEM); spin_unlock_irq(&mapping->tree_lock); - /* mem_cgroup codes must not be called under tree_lock */ - mem_cgroup_replace_page_cache(old, new); + mem_cgroup_migrate(old, new, true); radix_tree_preload_end(); if (freepage) freepage(old); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 1cbe1e54ff5f..9106f1b12f56 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -754,9 +754,11 @@ static void __mem_cgroup_remove_exceeded(struct mem_cgroup_per_zone *mz, static void mem_cgroup_remove_exceeded(struct mem_cgroup_per_zone *mz, struct mem_cgroup_tree_per_zone *mctz) { - spin_lock(&mctz->lock); + unsigned long flags; + + spin_lock_irqsave(&mctz->lock, flags); __mem_cgroup_remove_exceeded(mz, mctz); - spin_unlock(&mctz->lock); + spin_unlock_irqrestore(&mctz->lock, flags); } @@ -779,7 +781,9 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page) * mem is over its softlimit. */ if (excess || mz->on_tree) { - spin_lock(&mctz->lock); + unsigned long flags; + + spin_lock_irqsave(&mctz->lock, flags); /* if on-tree, remove it */ if (mz->on_tree) __mem_cgroup_remove_exceeded(mz, mctz); @@ -788,7 +792,7 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page) * If excess is 0, no tree ops. */ __mem_cgroup_insert_exceeded(mz, mctz, excess); - spin_unlock(&mctz->lock); + spin_unlock_irqrestore(&mctz->lock, flags); } } } @@ -839,9 +843,9 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz) { struct mem_cgroup_per_zone *mz; - spin_lock(&mctz->lock); + spin_lock_irq(&mctz->lock); mz = __mem_cgroup_largest_soft_limit_node(mctz); - spin_unlock(&mctz->lock); + spin_unlock_irq(&mctz->lock); return mz; } @@ -882,13 +886,6 @@ static long mem_cgroup_read_stat(struct mem_cgroup *memcg, return val; } -static void mem_cgroup_swap_statistics(struct mem_cgroup *memcg, - bool charge) -{ - int val = (charge) ? 1 : -1; - this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_SWAP], val); -} - static unsigned long mem_cgroup_read_events(struct mem_cgroup *memcg, enum mem_cgroup_events_index idx) { @@ -909,13 +906,13 @@ static unsigned long mem_cgroup_read_events(struct mem_cgroup *memcg, static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg, struct page *page, - bool anon, int nr_pages) + int nr_pages) { /* * Here, RSS means 'mapped anon' and anon's SwapCache. Shmem/tmpfs is * counted as CACHE even if it's on ANON LRU. */ - if (anon) + if (PageAnon(page)) __this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_RSS], nr_pages); else @@ -1013,7 +1010,6 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg, */ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page) { - preempt_disable(); /* threshold event is triggered in finer grain than soft limit */ if (unlikely(mem_cgroup_event_ratelimit(memcg, MEM_CGROUP_TARGET_THRESH))) { @@ -1026,8 +1022,6 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page) do_numainfo = mem_cgroup_event_ratelimit(memcg, MEM_CGROUP_TARGET_NUMAINFO); #endif - preempt_enable(); - mem_cgroup_threshold(memcg); if (unlikely(do_softlimit)) mem_cgroup_update_tree(memcg, page); @@ -1035,8 +1029,7 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page) if (unlikely(do_numainfo)) atomic_inc(&memcg->numainfo_events); #endif - } else - preempt_enable(); + } } struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p) @@ -1347,20 +1340,6 @@ out: return lruvec; } -/* - * Following LRU functions are allowed to be used without PCG_LOCK. - * Operations are called by routine of global LRU independently from memcg. - * What we have to take care of here is validness of pc->mem_cgroup. - * - * Changes to pc->mem_cgroup happens when - * 1. charge - * 2. moving account - * In typical case, "charge" is done before add-to-lru. Exception is SwapCache. - * It is added to LRU before charge. - * If PCG_USED bit is not set, page_cgroup is not added to this private LRU. - * When moving account, the page is not on LRU. It's isolated. - */ - /** * mem_cgroup_page_lruvec - return lruvec for adding an lru page * @page: the page @@ -2261,22 +2240,14 @@ cleanup: * * Notes: Race condition * - * We usually use lock_page_cgroup() for accessing page_cgroup member but - * it tends to be costly. But considering some conditions, we doesn't need - * to do so _always_. - * - * Considering "charge", lock_page_cgroup() is not required because all - * file-stat operations happen after a page is attached to radix-tree. There - * are no race with "charge". + * Charging occurs during page instantiation, while the page is + * unmapped and locked in page migration, or while the page table is + * locked in THP migration. No race is possible. * - * Considering "uncharge", we know that memcg doesn't clear pc->mem_cgroup - * at "uncharge" intentionally. So, we always see valid pc->mem_cgroup even - * if there are race with "uncharge". Statistics itself is properly handled - * by flags. + * Uncharge happens to pages with zero references, no race possible. * - * Considering "move", this is an only case we see a race. To make the race - * small, we check memcg->moving_account and detect there are possibility - * of race or not. If there is, we take a lock. + * Charge moving between groups is protected by checking mm->moving + * account and taking the move_lock in the slowpath. */ void __mem_cgroup_begin_update_page_stat(struct page *page, @@ -2689,6 +2660,16 @@ static struct mem_cgroup *mem_cgroup_lookup(unsigned short id) return mem_cgroup_from_id(id); } +/* + * try_get_mem_cgroup_from_page - look up page's memcg association + * @page: the page + * + * Look up, get a css reference, and return the memcg that owns @page. + * + * The page must be locked to prevent racing with swap-in and page + * cache charges. If coming from an unlocked page table, the caller + * must ensure the page is on the LRU or this can race with charging. + */ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page) { struct mem_cgroup *memcg = NULL; @@ -2699,7 +2680,6 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page) VM_BUG_ON_PAGE(!PageLocked(page), page); pc = lookup_page_cgroup(page); - lock_page_cgroup(pc); if (PageCgroupUsed(pc)) { memcg = pc->mem_cgroup; if (memcg && !css_tryget_online(&memcg->css)) @@ -2713,19 +2693,46 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page) memcg = NULL; rcu_read_unlock(); } - unlock_page_cgroup(pc); return memcg; } +static void lock_page_lru(struct page *page, int *isolated) +{ + struct zone *zone = page_zone(page); + + spin_lock_irq(&zone->lru_lock); + if (PageLRU(page)) { + struct lruvec *lruvec; + + lruvec = mem_cgroup_page_lruvec(page, zone); + ClearPageLRU(page); + del_page_from_lru_list(page, lruvec, page_lru(page)); + *isolated = 1; + } else + *isolated = 0; +} + +static void unlock_page_lru(struct page *page, int isolated) +{ + struct zone *zone = page_zone(page); + + if (isolated) { + struct lruvec *lruvec; + + lruvec = mem_cgroup_page_lruvec(page, zone); + VM_BUG_ON_PAGE(PageLRU(page), page); + SetPageLRU(page); + add_page_to_lru_list(page, lruvec, page_lru(page)); + } + spin_unlock_irq(&zone->lru_lock); +} + static void commit_charge(struct page *page, struct mem_cgroup *memcg, - unsigned int nr_pages, bool anon, bool lrucare) + unsigned int nr_pages, bool lrucare) { struct page_cgroup *pc = lookup_page_cgroup(page); - struct zone *uninitialized_var(zone); - struct lruvec *lruvec; - bool was_on_lru = false; + int isolated; - lock_page_cgroup(pc); VM_BUG_ON_PAGE(PageCgroupUsed(pc), page); /* * we don't need page_cgroup_lock about tail pages, becase they are not @@ -2736,39 +2743,38 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg, * In some cases, SwapCache and FUSE(splice_buf->radixtree), the page * may already be on some other mem_cgroup's LRU. Take care of it. */ - if (lrucare) { - zone = page_zone(page); - spin_lock_irq(&zone->lru_lock); - if (PageLRU(page)) { - lruvec = mem_cgroup_zone_lruvec(zone, pc->mem_cgroup); - ClearPageLRU(page); - del_page_from_lru_list(page, lruvec, page_lru(page)); - was_on_lru = true; - } - } + if (lrucare) + lock_page_lru(page, &isolated); + /* + * Nobody should be changing or seriously looking at + * pc->mem_cgroup and pc->flags at this point: + * + * - the page is uncharged + * + * - the page is off-LRU + * + * - an anonymous fault has exclusive page access, except for + * a locked page table + * + * - a page cache insertion, a swapin fault, or a migration + * have the page locked + */ pc->mem_cgroup = memcg; - SetPageCgroupUsed(pc); - - if (lrucare) { - if (was_on_lru) { - lruvec = mem_cgroup_zone_lruvec(zone, pc->mem_cgroup); - VM_BUG_ON_PAGE(PageLRU(page), page); - SetPageLRU(page); - add_page_to_lru_list(page, lruvec, page_lru(page)); - } - spin_unlock_irq(&zone->lru_lock); - } + pc->flags = PCG_USED | PCG_MEM | (do_swap_account ? PCG_MEMSW : 0); - mem_cgroup_charge_statistics(memcg, page, anon, nr_pages); - unlock_page_cgroup(pc); + if (lrucare) + unlock_page_lru(page, isolated); + local_irq_disable(); + mem_cgroup_charge_statistics(memcg, page, nr_pages); /* * "charge_statistics" updated event counter. Then, check it. * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree. * if they exceeds softlimit. */ memcg_check_events(memcg, page); + local_irq_enable(); } static DEFINE_MUTEX(set_limit_mutex); @@ -3395,7 +3401,6 @@ static inline void memcg_unregister_all_caches(struct mem_cgroup *memcg) #ifdef CONFIG_TRANSPARENT_HUGEPAGE -#define PCGF_NOCOPY_AT_SPLIT (1 << PCG_LOCK | 1 << PCG_MIGRATION) /* * Because tail pages are not marked as "used", set it. We're under * zone->lru_lock, 'splitting on pmd' and compound_lock. @@ -3416,7 +3421,7 @@ void mem_cgroup_split_huge_fixup(struct page *head) for (i = 1; i < HPAGE_PMD_NR; i++) { pc = head_pc + i; pc->mem_cgroup = memcg; - pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT; + pc->flags = head_pc->flags; } __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE], HPAGE_PMD_NR); @@ -3446,7 +3451,6 @@ static int mem_cgroup_move_account(struct page *page, { unsigned long flags; int ret; - bool anon = PageAnon(page); VM_BUG_ON(from == to); VM_BUG_ON_PAGE(PageLRU(page), page); @@ -3460,15 +3464,21 @@ static int mem_cgroup_move_account(struct page *page, if (nr_pages > 1 && !PageTransHuge(page)) goto out; - lock_page_cgroup(pc); + /* + * Prevent mem_cgroup_migrate() from looking at pc->mem_cgroup + * of its source page while we change it: page migration takes + * both pages off the LRU, but page cache replacement doesn't. + */ + if (!trylock_page(page)) + goto out; ret = -EINVAL; if (!PageCgroupUsed(pc) || pc->mem_cgroup != from) - goto unlock; + goto out_unlock; move_lock_mem_cgroup(from, &flags); - if (!anon && page_mapped(page)) { + if (!PageAnon(page) && page_mapped(page)) { __this_cpu_sub(from->stat->count[MEM_CGROUP_STAT_FILE_MAPPED], nr_pages); __this_cpu_add(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED], @@ -3482,20 +3492,25 @@ static int mem_cgroup_move_account(struct page *page, nr_pages); } - mem_cgroup_charge_statistics(from, page, anon, -nr_pages); + /* + * It is safe to change pc->mem_cgroup here because the page + * is referenced, charged, and isolated - we can't race with + * uncharging, charging, migration, or LRU putback. + */ /* caller should have done css_get */ pc->mem_cgroup = to; - mem_cgroup_charge_statistics(to, page, anon, nr_pages); move_unlock_mem_cgroup(from, &flags); ret = 0; -unlock: - unlock_page_cgroup(pc); - /* - * check events - */ + + local_irq_disable(); + mem_cgroup_charge_statistics(to, page, nr_pages); memcg_check_events(to, page); + mem_cgroup_charge_statistics(from, page, -nr_pages); memcg_check_events(from, page); + local_irq_enable(); +out_unlock: + unlock_page(page); out: return ret; } @@ -3566,193 +3581,6 @@ out: return ret; } -static void mem_cgroup_do_uncharge(struct mem_cgroup *memcg, - unsigned int nr_pages, - const enum charge_type ctype) -{ - struct memcg_batch_info *batch = NULL; - bool uncharge_memsw = true; - - /* If swapout, usage of swap doesn't decrease */ - if (!do_swap_account || ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT) - uncharge_memsw = false; - - batch = ¤t->memcg_batch; - /* - * In usual, we do css_get() when we remember memcg pointer. - * But in this case, we keep res->usage until end of a series of - * uncharges. Then, it's ok to ignore memcg's refcnt. - */ - if (!batch->memcg) - batch->memcg = memcg; - /* - * do_batch > 0 when unmapping pages or inode invalidate/truncate. - * In those cases, all pages freed continuously can be expected to be in - * the same cgroup and we have chance to coalesce uncharges. - * But we do uncharge one by one if this is killed by OOM(TIF_MEMDIE) - * because we want to do uncharge as soon as possible. - */ - - if (!batch->do_batch || test_thread_flag(TIF_MEMDIE)) - goto direct_uncharge; - - if (nr_pages > 1) - goto direct_uncharge; - - /* - * In typical case, batch->memcg == mem. This means we can - * merge a series of uncharges to an uncharge of res_counter. - * If not, we uncharge res_counter ony by one. - */ - if (batch->memcg != memcg) - goto direct_uncharge; - /* remember freed charge and uncharge it later */ - batch->nr_pages++; - if (uncharge_memsw) - batch->memsw_nr_pages++; - return; -direct_uncharge: - res_counter_uncharge(&memcg->res, nr_pages * PAGE_SIZE); - if (uncharge_memsw) - res_counter_uncharge(&memcg->memsw, nr_pages * PAGE_SIZE); - if (unlikely(batch->memcg != memcg)) - memcg_oom_recover(memcg); -} - -/* - * uncharge if !page_mapped(page) - */ -static struct mem_cgroup * -__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype, - bool end_migration) -{ - struct mem_cgroup *memcg = NULL; - unsigned int nr_pages = 1; - struct page_cgroup *pc; - bool anon; - - if (mem_cgroup_disabled()) - return NULL; - - if (PageTransHuge(page)) { - nr_pages <<= compound_order(page); - VM_BUG_ON_PAGE(!PageTransHuge(page), page); - } - /* - * Check if our page_cgroup is valid - */ - pc = lookup_page_cgroup(page); - if (unlikely(!PageCgroupUsed(pc))) - return NULL; - - lock_page_cgroup(pc); - - memcg = pc->mem_cgroup; - - if (!PageCgroupUsed(pc)) - goto unlock_out; - - anon = PageAnon(page); - - switch (ctype) { - case MEM_CGROUP_CHARGE_TYPE_ANON: - /* - * Generally PageAnon tells if it's the anon statistics to be - * updated; but sometimes e.g. mem_cgroup_uncharge_page() is - * used before page reached the stage of being marked PageAnon. - */ - anon = true; - /* fallthrough */ - case MEM_CGROUP_CHARGE_TYPE_DROP: - /* See mem_cgroup_prepare_migration() */ - if (page_mapped(page)) - goto unlock_out; - /* - * Pages under migration may not be uncharged. But - * end_migration() /must/ be the one uncharging the - * unused post-migration page and so it has to call - * here with the migration bit still set. See the - * res_counter handling below. - */ - if (!end_migration && PageCgroupMigration(pc)) - goto unlock_out; - break; - case MEM_CGROUP_CHARGE_TYPE_SWAPOUT: - if (!PageAnon(page)) { /* Shared memory */ - if (page->mapping && !page_is_file_cache(page)) - goto unlock_out; - } else if (page_mapped(page)) /* Anon */ - goto unlock_out; - break; - default: - break; - } - - mem_cgroup_charge_statistics(memcg, page, anon, -nr_pages); - - ClearPageCgroupUsed(pc); - /* - * pc->mem_cgroup is not cleared here. It will be accessed when it's - * freed from LRU. This is safe because uncharged page is expected not - * to be reused (freed soon). Exception is SwapCache, it's handled by - * special functions. - */ - - unlock_page_cgroup(pc); - /* - * even after unlock, we have memcg->res.usage here and this memcg - * will never be freed, so it's safe to call css_get(). - */ - memcg_check_events(memcg, page); - if (do_swap_account && ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT) { - mem_cgroup_swap_statistics(memcg, true); - css_get(&memcg->css); - } - /* - * Migration does not charge the res_counter for the - * replacement page, so leave it alone when phasing out the - * page that is unused after the migration. - */ - if (!end_migration) - mem_cgroup_do_uncharge(memcg, nr_pages, ctype); - - return memcg; - -unlock_out: - unlock_page_cgroup(pc); - return NULL; -} - -void mem_cgroup_uncharge_page(struct page *page) -{ - /* early check. */ - if (page_mapped(page)) - return; - VM_BUG_ON_PAGE(page->mapping && !PageAnon(page), page); - /* - * If the page is in swap cache, uncharge should be deferred - * to the swap path, which also properly accounts swap usage - * and handles memcg lifetime. - * - * Note that this check is not stable and reclaim may add the - * page to swap cache at any time after this. However, if the - * page is not in swap cache by the time page->mapcount hits - * 0, there won't be any page table references to the swap - * slot, and reclaim will free it and not actually write the - * page to disk. - */ - if (PageSwapCache(page)) - return; - __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_ANON, false); -} - -void mem_cgroup_uncharge_cache_page(struct page *page) -{ - VM_BUG_ON_PAGE(page_mapped(page), page); - VM_BUG_ON_PAGE(page->mapping, page); - __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE, false); -} - /* * Batch_start/batch_end is called in unmap_page_range/invlidate/trucate. * In that cases, pages are freed continuously and we can expect pages @@ -3763,6 +3591,9 @@ void mem_cgroup_uncharge_cache_page(struct page *page) void mem_cgroup_uncharge_start(void) { + unsigned long flags; + + local_irq_save(flags); current->memcg_batch.do_batch++; /* We can do nest. */ if (current->memcg_batch.do_batch == 1) { @@ -3770,21 +3601,18 @@ void mem_cgroup_uncharge_start(void) current->memcg_batch.nr_pages = 0; current->memcg_batch.memsw_nr_pages = 0; } + local_irq_restore(flags); } void mem_cgroup_uncharge_end(void) { struct memcg_batch_info *batch = ¤t->memcg_batch; + unsigned long flags; - if (!batch->do_batch) - return; - - batch->do_batch--; - if (batch->do_batch) /* If stacked, do nothing. */ - return; - - if (!batch->memcg) - return; + local_irq_save(flags); + VM_BUG_ON(!batch->do_batch); + if (--batch->do_batch) /* If stacked, do nothing */ + goto out; /* * This "batch->memcg" is valid without any css_get/put etc... * bacause we hide charges behind us. @@ -3796,61 +3624,16 @@ void mem_cgroup_uncharge_end(void) res_counter_uncharge(&batch->memcg->memsw, batch->memsw_nr_pages * PAGE_SIZE); memcg_oom_recover(batch->memcg); - /* forget this pointer (for sanity check) */ - batch->memcg = NULL; -} - -#ifdef CONFIG_SWAP -/* - * called after __delete_from_swap_cache() and drop "page" account. - * memcg information is recorded to swap_cgroup of "ent" - */ -void -mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout) -{ - struct mem_cgroup *memcg; - int ctype = MEM_CGROUP_CHARGE_TYPE_SWAPOUT; - - if (!swapout) /* this was a swap cache but the swap is unused ! */ - ctype = MEM_CGROUP_CHARGE_TYPE_DROP; - - memcg = __mem_cgroup_uncharge_common(page, ctype, false); - - /* - * record memcg information, if swapout && memcg != NULL, - * css_get() was called in uncharge(). - */ - if (do_swap_account && swapout && memcg) - swap_cgroup_record(ent, mem_cgroup_id(memcg)); +out: + local_irq_restore(flags); } -#endif #ifdef CONFIG_MEMCG_SWAP -/* - * called from swap_entry_free(). remove record in swap_cgroup and - * uncharge "memsw" account. - */ -void mem_cgroup_uncharge_swap(swp_entry_t ent) +static void mem_cgroup_swap_statistics(struct mem_cgroup *memcg, + bool charge) { - struct mem_cgroup *memcg; - unsigned short id; - - if (!do_swap_account) - return; - - id = swap_cgroup_record(ent, 0); - rcu_read_lock(); - memcg = mem_cgroup_lookup(id); - if (memcg) { - /* - * We uncharge this because swap is freed. This memcg can - * be obsolete one. We avoid calling css_tryget_online(). - */ - res_counter_uncharge(&memcg->memsw, PAGE_SIZE); - mem_cgroup_swap_statistics(memcg, false); - css_put(&memcg->css); - } - rcu_read_unlock(); + int val = (charge) ? 1 : -1; + this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_SWAP], val); } /** @@ -3902,169 +3685,6 @@ static inline int mem_cgroup_move_swap_account(swp_entry_t entry, } #endif -/* - * Before starting migration, account PAGE_SIZE to mem_cgroup that the old - * page belongs to. - */ -void mem_cgroup_prepare_migration(struct page *page, struct page *newpage, - struct mem_cgroup **memcgp) -{ - struct mem_cgroup *memcg = NULL; - unsigned int nr_pages = 1; - struct page_cgroup *pc; - - *memcgp = NULL; - - if (mem_cgroup_disabled()) - return; - - if (PageTransHuge(page)) - nr_pages <<= compound_order(page); - - pc = lookup_page_cgroup(page); - lock_page_cgroup(pc); - if (PageCgroupUsed(pc)) { - memcg = pc->mem_cgroup; - css_get(&memcg->css); - /* - * At migrating an anonymous page, its mapcount goes down - * to 0 and uncharge() will be called. But, even if it's fully - * unmapped, migration may fail and this page has to be - * charged again. We set MIGRATION flag here and delay uncharge - * until end_migration() is called - * - * Corner Case Thinking - * A) - * When the old page was mapped as Anon and it's unmap-and-freed - * while migration was ongoing. - * If unmap finds the old page, uncharge() of it will be delayed - * until end_migration(). If unmap finds a new page, it's - * uncharged when it make mapcount to be 1->0. If unmap code - * finds swap_migration_entry, the new page will not be mapped - * and end_migration() will find it(mapcount==0). - * - * B) - * When the old page was mapped but migraion fails, the kernel - * remaps it. A charge for it is kept by MIGRATION flag even - * if mapcount goes down to 0. We can do remap successfully - * without charging it again. - * - * C) - * The "old" page is under lock_page() until the end of - * migration, so, the old page itself will not be swapped-out. - * If the new page is swapped out before end_migraton, our - * hook to usual swap-out path will catch the event. - */ - if (PageAnon(page)) - SetPageCgroupMigration(pc); - } - unlock_page_cgroup(pc); - /* - * If the page is not charged at this point, - * we return here. - */ - if (!memcg) - return; - - *memcgp = memcg; - /* - * We charge new page before it's used/mapped. So, even if unlock_page() - * is called before end_migration, we can catch all events on this new - * page. In the case new page is migrated but not remapped, new page's - * mapcount will be finally 0 and we call uncharge in end_migration(). - */ - /* - * The page is committed to the memcg, but it's not actually - * charged to the res_counter since we plan on replacing the - * old one and only one page is going to be left afterwards. - */ - commit_charge(newpage, memcg, nr_pages, PageAnon(page), false); -} - -/* remove redundant charge if migration failed*/ -void mem_cgroup_end_migration(struct mem_cgroup *memcg, - struct page *oldpage, struct page *newpage, bool migration_ok) -{ - struct page *used, *unused; - struct page_cgroup *pc; - bool anon; - - if (!memcg) - return; - - if (!migration_ok) { - used = oldpage; - unused = newpage; - } else { - used = newpage; - unused = oldpage; - } - anon = PageAnon(used); - __mem_cgroup_uncharge_common(unused, - anon ? MEM_CGROUP_CHARGE_TYPE_ANON - : MEM_CGROUP_CHARGE_TYPE_CACHE, - true); - css_put(&memcg->css); - /* - * We disallowed uncharge of pages under migration because mapcount - * of the page goes down to zero, temporarly. - * Clear the flag and check the page should be charged. - */ - pc = lookup_page_cgroup(oldpage); - lock_page_cgroup(pc); - ClearPageCgroupMigration(pc); - unlock_page_cgroup(pc); - - /* - * If a page is a file cache, radix-tree replacement is very atomic - * and we can skip this check. When it was an Anon page, its mapcount - * goes down to 0. But because we added MIGRATION flage, it's not - * uncharged yet. There are several case but page->mapcount check - * and USED bit check in mem_cgroup_uncharge_page() will do enough - * check. (see prepare_charge() also) - */ - if (anon) - mem_cgroup_uncharge_page(used); -} - -/* - * At replace page cache, newpage is not under any memcg but it's on - * LRU. So, this function doesn't touch res_counter but handles LRU - * in correct way. Both pages are locked so we cannot race with uncharge. - */ -void mem_cgroup_replace_page_cache(struct page *oldpage, - struct page *newpage) -{ - struct mem_cgroup *memcg = NULL; - struct page_cgroup *pc; - - if (mem_cgroup_disabled()) - return; - - pc = lookup_page_cgroup(oldpage); - /* fix accounting on old pages */ - lock_page_cgroup(pc); - if (PageCgroupUsed(pc)) { - memcg = pc->mem_cgroup; - mem_cgroup_charge_statistics(memcg, oldpage, false, -1); - ClearPageCgroupUsed(pc); - } - unlock_page_cgroup(pc); - - /* - * When called from shmem_replace_page(), in some cases the - * oldpage has already been charged, and in some cases not. - */ - if (!memcg) - return; - /* - * Even if newpage->mapping was NULL before starting replacement, - * the newpage may be on LRU(or pagevec for LRU) already. We lock - * LRU while we overwrite pc->mem_cgroup. - */ - commit_charge(newpage, memcg, 1, false, true); -} - #ifdef CONFIG_DEBUG_VM static struct page_cgroup *lookup_page_cgroup_used(struct page *page) { @@ -4263,7 +3883,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, gfp_mask, &nr_scanned); nr_reclaimed += reclaimed; *total_scanned += nr_scanned; - spin_lock(&mctz->lock); + spin_lock_irq(&mctz->lock); /* * If we failed to reclaim anything from this memory cgroup @@ -4303,7 +3923,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, */ /* If excess == 0, no tree ops */ __mem_cgroup_insert_exceeded(mz, mctz, excess); - spin_unlock(&mctz->lock); + spin_unlock_irq(&mctz->lock); css_put(&mz->memcg->css); loop++; /* @@ -6265,9 +5885,9 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma, if (page) { pc = lookup_page_cgroup(page); /* - * Do only loose check w/o page_cgroup lock. - * mem_cgroup_move_account() checks the pc is valid or not under - * the lock. + * Do only loose check w/o serialization. + * mem_cgroup_move_account() checks the pc is valid or + * not under LRU exclusion. */ if (PageCgroupUsed(pc) && pc->mem_cgroup == mc.from) { ret = MC_TARGET_PAGE; @@ -6729,6 +6349,67 @@ static void __init enable_swap_cgroup(void) } #endif +#ifdef CONFIG_MEMCG_SWAP +/** + * mem_cgroup_swapout - transfer a memsw charge to swap + * @page: page whose memsw charge to transfer + * @entry: swap entry to move the charge to + * + * Transfer the memsw charge of @page to @entry. + */ +void mem_cgroup_swapout(struct page *page, swp_entry_t entry) +{ + struct page_cgroup *pc; + unsigned short oldid; + + VM_BUG_ON_PAGE(PageLRU(page), page); + VM_BUG_ON_PAGE(page_count(page), page); + + if (!do_swap_account) + return; + + pc = lookup_page_cgroup(page); + + /* Readahead page, never charged */ + if (!PageCgroupUsed(pc)) + return; + + VM_BUG_ON_PAGE(!(pc->flags & PCG_MEMSW), page); + + oldid = swap_cgroup_record(entry, mem_cgroup_id(pc->mem_cgroup)); + VM_BUG_ON_PAGE(oldid, page); + + pc->flags &= ~PCG_MEMSW; + css_get(&pc->mem_cgroup->css); + mem_cgroup_swap_statistics(pc->mem_cgroup, true); +} + +/** + * mem_cgroup_uncharge_swap - uncharge a swap entry + * @entry: swap entry to uncharge + * + * Drop the memsw charge associated with @entry. + */ +void mem_cgroup_uncharge_swap(swp_entry_t entry) +{ + struct mem_cgroup *memcg; + unsigned short id; + + if (!do_swap_account) + return; + + id = swap_cgroup_record(entry, 0); + rcu_read_lock(); + memcg = mem_cgroup_lookup(id); + if (memcg) { + res_counter_uncharge(&memcg->memsw, PAGE_SIZE); + mem_cgroup_swap_statistics(memcg, false); + css_put(&memcg->css); + } + rcu_read_unlock(); +} +#endif + /** * mem_cgroup_try_charge - try charging a page * @page: page to charge @@ -6831,7 +6512,7 @@ void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg, VM_BUG_ON_PAGE(!PageTransHuge(page), page); } - commit_charge(page, memcg, nr_pages, PageAnon(page), lrucare); + commit_charge(page, memcg, nr_pages, lrucare); if (do_swap_account && PageSwapCache(page)) { swp_entry_t entry = { .val = page_private(page) }; @@ -6873,6 +6554,139 @@ void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg) cancel_charge(memcg, nr_pages); } +/** + * mem_cgroup_uncharge - uncharge a page + * @page: page to uncharge + * + * Uncharge a page previously charged with mem_cgroup_try_charge() and + * mem_cgroup_commit_charge(). + */ +void mem_cgroup_uncharge(struct page *page) +{ + struct memcg_batch_info *batch; + unsigned int nr_pages = 1; + struct mem_cgroup *memcg; + struct page_cgroup *pc; + unsigned long pc_flags; + unsigned long flags; + + VM_BUG_ON_PAGE(PageLRU(page), page); + VM_BUG_ON_PAGE(page_count(page), page); + + if (mem_cgroup_disabled()) + return; + + pc = lookup_page_cgroup(page); + + /* Every final put_page() ends up here */ + if (!PageCgroupUsed(pc)) + return; + + if (PageTransHuge(page)) { + nr_pages <<= compound_order(page); + VM_BUG_ON_PAGE(!PageTransHuge(page), page); + } + /* + * Nobody should be changing or seriously looking at + * pc->mem_cgroup and pc->flags at this point, we have fully + * exclusive access to the page. + */ + memcg = pc->mem_cgroup; + pc_flags = pc->flags; + pc->flags = 0; + + local_irq_save(flags); + + if (nr_pages > 1) + goto direct; + if (unlikely(test_thread_flag(TIF_MEMDIE))) + goto direct; + batch = ¤t->memcg_batch; + if (!batch->do_batch) + goto direct; + if (batch->memcg && batch->memcg != memcg) + goto direct; + if (!batch->memcg) + batch->memcg = memcg; + if (pc_flags & PCG_MEM) + batch->nr_pages++; + if (pc_flags & PCG_MEMSW) + batch->memsw_nr_pages++; + goto out; +direct: + if (pc_flags & PCG_MEM) + res_counter_uncharge(&memcg->res, nr_pages * PAGE_SIZE); + if (pc_flags & PCG_MEMSW) + res_counter_uncharge(&memcg->memsw, nr_pages * PAGE_SIZE); + memcg_oom_recover(memcg); +out: + mem_cgroup_charge_statistics(memcg, page, -nr_pages); + memcg_check_events(memcg, page); + + local_irq_restore(flags); +} + +/** + * mem_cgroup_migrate - migrate a charge to another page + * @oldpage: currently charged page + * @newpage: page to transfer the charge to + * @lrucare: both pages might be on the LRU already + * + * Migrate the charge from @oldpage to @newpage. + * + * Both pages must be locked, @newpage->mapping must be set up. + */ +void mem_cgroup_migrate(struct page *oldpage, struct page *newpage, + bool lrucare) +{ + unsigned int nr_pages = 1; + struct page_cgroup *pc; + int isolated; + + VM_BUG_ON_PAGE(!PageLocked(oldpage), oldpage); + VM_BUG_ON_PAGE(!PageLocked(newpage), newpage); + VM_BUG_ON_PAGE(!lrucare && PageLRU(oldpage), oldpage); + VM_BUG_ON_PAGE(!lrucare && PageLRU(newpage), newpage); + VM_BUG_ON_PAGE(PageAnon(oldpage) != PageAnon(newpage), newpage); + + if (mem_cgroup_disabled()) + return; + + /* Page cache replacement: new page already charged? */ + pc = lookup_page_cgroup(newpage); + if (PageCgroupUsed(pc)) + return; + + /* Re-entrant migration: old page already uncharged? */ + pc = lookup_page_cgroup(oldpage); + if (!PageCgroupUsed(pc)) + return; + + VM_BUG_ON_PAGE(!(pc->flags & PCG_MEM), oldpage); + VM_BUG_ON_PAGE(do_swap_account && !(pc->flags & PCG_MEMSW), oldpage); + + if (PageTransHuge(oldpage)) { + nr_pages <<= compound_order(oldpage); + VM_BUG_ON_PAGE(!PageTransHuge(oldpage), oldpage); + VM_BUG_ON_PAGE(!PageTransHuge(newpage), newpage); + } + + if (lrucare) + lock_page_lru(oldpage, &isolated); + + pc->flags = 0; + + if (lrucare) + unlock_page_lru(oldpage, isolated); + + local_irq_disable(); + mem_cgroup_charge_statistics(pc->mem_cgroup, oldpage, -nr_pages); + memcg_check_events(pc->mem_cgroup, oldpage); + local_irq_enable(); + + commit_charge(newpage, pc->mem_cgroup, nr_pages, lrucare); +} + /* * subsys_initcall() for memory controller. * diff --git a/mm/memory.c b/mm/memory.c index 6d7648773dc4..2a899e4e82ba 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1292,7 +1292,6 @@ static void unmap_page_range(struct mmu_gather *tlb, details = NULL; BUG_ON(addr >= end); - mem_cgroup_uncharge_start(); tlb_start_vma(tlb, vma); pgd = pgd_offset(vma->vm_mm, addr); do { @@ -1302,7 +1301,6 @@ static void unmap_page_range(struct mmu_gather *tlb, next = zap_pud_range(tlb, vma, pgd, addr, next, details); } while (pgd++, addr = next, addr != end); tlb_end_vma(tlb, vma); - mem_cgroup_uncharge_end(); } diff --git a/mm/migrate.c b/mm/migrate.c index be6dbf995c0c..f78ec9bd454d 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -780,6 +780,7 @@ static int move_to_new_page(struct page *newpage, struct page *page, if (rc != MIGRATEPAGE_SUCCESS) { newpage->mapping = NULL; } else { + mem_cgroup_migrate(page, newpage, false); if (remap_swapcache) remove_migration_ptes(page, newpage); page->mapping = NULL; @@ -795,7 +796,6 @@ static int __unmap_and_move(struct page *page, struct page *newpage, { int rc = -EAGAIN; int remap_swapcache = 1; - struct mem_cgroup *mem; struct anon_vma *anon_vma = NULL; if (!trylock_page(page)) { @@ -821,9 +821,6 @@ static int __unmap_and_move(struct page *page, struct page *newpage, lock_page(page); } - /* charge against new page */ - mem_cgroup_prepare_migration(page, newpage, &mem); - if (PageWriteback(page)) { /* * Only in the case of a full synchronous migration is it @@ -833,10 +830,10 @@ static int __unmap_and_move(struct page *page, struct page *newpage, */ if (mode != MIGRATE_SYNC) { rc = -EBUSY; - goto uncharge; + goto out_unlock; } if (!force) - goto uncharge; + goto out_unlock; wait_on_page_writeback(page); } /* @@ -872,7 +869,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage, */ remap_swapcache = 0; } else { - goto uncharge; + goto out_unlock; } } @@ -885,7 +882,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage, * the page migration right away (proteced by page lock). */ rc = balloon_page_migrate(newpage, page, mode); - goto uncharge; + goto out_unlock; } /* @@ -904,7 +901,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage, VM_BUG_ON_PAGE(PageAnon(page), page); if (page_has_private(page)) { try_to_free_buffers(page); - goto uncharge; + goto out_unlock; } goto skip_unmap; } @@ -923,10 +920,7 @@ skip_unmap: if (anon_vma) put_anon_vma(anon_vma); -uncharge: - mem_cgroup_end_migration(mem, page, newpage, - (rc == MIGRATEPAGE_SUCCESS || - rc == MIGRATEPAGE_BALLOON_SUCCESS)); +out_unlock: unlock_page(page); out: return rc; @@ -1786,7 +1780,6 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, pg_data_t *pgdat = NODE_DATA(node); int isolated = 0; struct page *new_page = NULL; - struct mem_cgroup *memcg = NULL; int page_lru = page_is_file_cache(page); unsigned long mmun_start = address & HPAGE_PMD_MASK; unsigned long mmun_end = mmun_start + HPAGE_PMD_SIZE; @@ -1852,15 +1845,6 @@ fail_putback: goto out_unlock; } - /* - * Traditional migration needs to prepare the memcg charge - * transaction early to prevent the old page from being - * uncharged when installing migration entries. Here we can - * save the potential rollback and start the charge transfer - * only when migration is already known to end successfully. - */ - mem_cgroup_prepare_migration(page, new_page, &memcg); - orig_entry = *pmd; entry = mk_pmd(new_page, vma->vm_page_prot); entry = pmd_mkhuge(entry); @@ -1888,14 +1872,10 @@ fail_putback: goto fail_putback; } + mem_cgroup_migrate(page, new_page, false); + page_remove_rmap(page); - /* - * Finish the charge transaction under the page table lock to - * prevent split_huge_page() from dividing up the charge - * before it's fully transferred to the new page. - */ - mem_cgroup_end_migration(memcg, page, new_page, true); spin_unlock(ptl); mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); diff --git a/mm/rmap.c b/mm/rmap.c index f56b5ed78128..3e8491c504f8 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1089,7 +1089,6 @@ void page_remove_rmap(struct page *page) if (unlikely(PageHuge(page))) goto out; if (anon) { - mem_cgroup_uncharge_page(page); if (PageTransHuge(page)) __dec_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES); diff --git a/mm/shmem.c b/mm/shmem.c index 1f1a8085538b..6dc80d298f9d 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -419,7 +419,6 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, pvec.pages, indices); if (!pvec.nr) break; - mem_cgroup_uncharge_start(); for (i = 0; i < pagevec_count(&pvec); i++) { struct page *page = pvec.pages[i]; @@ -447,7 +446,6 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, } pagevec_remove_exceptionals(&pvec); pagevec_release(&pvec); - mem_cgroup_uncharge_end(); cond_resched(); index++; } @@ -495,7 +493,6 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, index = start; continue; } - mem_cgroup_uncharge_start(); for (i = 0; i < pagevec_count(&pvec); i++) { struct page *page = pvec.pages[i]; @@ -531,7 +528,6 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, } pagevec_remove_exceptionals(&pvec); pagevec_release(&pvec); - mem_cgroup_uncharge_end(); index++; } @@ -835,7 +831,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) } mutex_unlock(&shmem_swaplist_mutex); - swapcache_free(swap, NULL); + swapcache_free(swap); redirty: set_page_dirty(page); if (wbc->for_reclaim) @@ -1008,7 +1004,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp, */ oldpage = newpage; } else { - mem_cgroup_replace_page_cache(oldpage, newpage); + mem_cgroup_migrate(oldpage, newpage, false); lru_cache_add_anon(newpage); *pagep = newpage; } diff --git a/mm/swap.c b/mm/swap.c index 3baca701bb78..00523fffa5ed 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -62,6 +62,7 @@ static void __page_cache_release(struct page *page) del_page_from_lru_list(page, lruvec, page_off_lru(page)); spin_unlock_irqrestore(&zone->lru_lock, flags); } + mem_cgroup_uncharge(page); } static void __put_single_page(struct page *page) @@ -907,6 +908,8 @@ void release_pages(struct page **pages, int nr, bool cold) struct lruvec *lruvec; unsigned long uninitialized_var(flags); + mem_cgroup_uncharge_start(); + for (i = 0; i < nr; i++) { struct page *page = pages[i]; @@ -938,6 +941,7 @@ void release_pages(struct page **pages, int nr, bool cold) __ClearPageLRU(page); del_page_from_lru_list(page, lruvec, page_off_lru(page)); } + mem_cgroup_uncharge(page); /* Clear Active bit in case of parallel mark_page_accessed */ __ClearPageActive(page); @@ -947,6 +951,8 @@ void release_pages(struct page **pages, int nr, bool cold) if (zone) spin_unlock_irqrestore(&zone->lru_lock, flags); + mem_cgroup_uncharge_end(); + free_hot_cold_page_list(&pages_to_free, cold); } EXPORT_SYMBOL(release_pages); diff --git a/mm/swap_state.c b/mm/swap_state.c index 2972eee184a4..e160151da6b8 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -176,7 +176,7 @@ int add_to_swap(struct page *page, struct list_head *list) if (unlikely(PageTransHuge(page))) if (unlikely(split_huge_page_to_list(page, list))) { - swapcache_free(entry, NULL); + swapcache_free(entry); return 0; } @@ -202,7 +202,7 @@ int add_to_swap(struct page *page, struct list_head *list) * add_to_swap_cache() doesn't return -EEXIST, so we can safely * clear SWAP_HAS_CACHE flag. */ - swapcache_free(entry, NULL); + swapcache_free(entry); return 0; } } @@ -225,7 +225,7 @@ void delete_from_swap_cache(struct page *page) __delete_from_swap_cache(page); spin_unlock_irq(&address_space->tree_lock); - swapcache_free(entry, page); + swapcache_free(entry); page_cache_release(page); } @@ -386,7 +386,7 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, * add_to_swap_cache() doesn't return -EEXIST, so we can safely * clear SWAP_HAS_CACHE flag. */ - swapcache_free(entry, NULL); + swapcache_free(entry); } while (err != -ENOMEM); if (new_page) diff --git a/mm/swapfile.c b/mm/swapfile.c index 0883b4912ff7..8798b2e0ac59 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -843,16 +843,13 @@ void swap_free(swp_entry_t entry) /* * Called after dropping swapcache to decrease refcnt to swap entries. */ -void swapcache_free(swp_entry_t entry, struct page *page) +void swapcache_free(swp_entry_t entry) { struct swap_info_struct *p; - unsigned char count; p = swap_info_get(entry); if (p) { - count = swap_entry_free(p, entry, SWAP_HAS_CACHE); - if (page) - mem_cgroup_uncharge_swapcache(page, entry, count != 0); + swap_entry_free(p, entry, SWAP_HAS_CACHE); spin_unlock(&p->lock); } } diff --git a/mm/truncate.c b/mm/truncate.c index eda247307164..96d167372d89 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -281,7 +281,6 @@ void truncate_inode_pages_range(struct address_space *mapping, while (index < end && pagevec_lookup_entries(&pvec, mapping, index, min(end - index, (pgoff_t)PAGEVEC_SIZE), indices)) { - mem_cgroup_uncharge_start(); for (i = 0; i < pagevec_count(&pvec); i++) { struct page *page = pvec.pages[i]; @@ -307,7 +306,6 @@ void truncate_inode_pages_range(struct address_space *mapping, } pagevec_remove_exceptionals(&pvec); pagevec_release(&pvec); - mem_cgroup_uncharge_end(); cond_resched(); index++; } @@ -369,7 +367,6 @@ void truncate_inode_pages_range(struct address_space *mapping, pagevec_release(&pvec); break; } - mem_cgroup_uncharge_start(); for (i = 0; i < pagevec_count(&pvec); i++) { struct page *page = pvec.pages[i]; @@ -394,7 +391,6 @@ void truncate_inode_pages_range(struct address_space *mapping, } pagevec_remove_exceptionals(&pvec); pagevec_release(&pvec); - mem_cgroup_uncharge_end(); index++; } cleancache_invalidate_inode(mapping); @@ -493,7 +489,6 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping, while (index <= end && pagevec_lookup_entries(&pvec, mapping, index, min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1, indices)) { - mem_cgroup_uncharge_start(); for (i = 0; i < pagevec_count(&pvec); i++) { struct page *page = pvec.pages[i]; @@ -522,7 +517,6 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping, } pagevec_remove_exceptionals(&pvec); pagevec_release(&pvec); - mem_cgroup_uncharge_end(); cond_resched(); index++; } @@ -553,7 +547,6 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page) BUG_ON(page_has_private(page)); __delete_from_page_cache(page, NULL); spin_unlock_irq(&mapping->tree_lock); - mem_cgroup_uncharge_cache_page(page); if (mapping->a_ops->freepage) mapping->a_ops->freepage(page); @@ -602,7 +595,6 @@ int invalidate_inode_pages2_range(struct address_space *mapping, while (index <= end && pagevec_lookup_entries(&pvec, mapping, index, min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1, indices)) { - mem_cgroup_uncharge_start(); for (i = 0; i < pagevec_count(&pvec); i++) { struct page *page = pvec.pages[i]; @@ -655,7 +647,6 @@ int invalidate_inode_pages2_range(struct address_space *mapping, } pagevec_remove_exceptionals(&pvec); pagevec_release(&pvec); - mem_cgroup_uncharge_end(); cond_resched(); index++; } diff --git a/mm/vmscan.c b/mm/vmscan.c index d2f65c856350..7068e838d22b 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -577,9 +577,10 @@ static int __remove_mapping(struct address_space *mapping, struct page *page, if (PageSwapCache(page)) { swp_entry_t swap = { .val = page_private(page) }; + mem_cgroup_swapout(page, swap); __delete_from_swap_cache(page); spin_unlock_irq(&mapping->tree_lock); - swapcache_free(swap, page); + swapcache_free(swap); } else { void (*freepage)(struct page *); void *shadow = NULL; @@ -600,7 +601,6 @@ static int __remove_mapping(struct address_space *mapping, struct page *page, shadow = workingset_eviction(mapping, page); __delete_from_page_cache(page, shadow); spin_unlock_irq(&mapping->tree_lock); - mem_cgroup_uncharge_cache_page(page); if (freepage != NULL) freepage(page); @@ -1103,6 +1103,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, */ __clear_page_locked(page); free_it: + mem_cgroup_uncharge(page); nr_reclaimed++; /* @@ -1132,12 +1133,13 @@ keep: list_add(&page->lru, &ret_pages); VM_BUG_ON_PAGE(PageLRU(page) || PageUnevictable(page), page); } + mem_cgroup_uncharge_end(); free_hot_cold_page_list(&free_pages, true); list_splice(&ret_pages, page_list); count_vm_events(PGACTIVATE, pgactivate); - mem_cgroup_uncharge_end(); + *ret_nr_dirty += nr_dirty; *ret_nr_congested += nr_congested; *ret_nr_unqueued_dirty += nr_unqueued_dirty; @@ -1435,6 +1437,8 @@ putback_inactive_pages(struct lruvec *lruvec, struct list_head *page_list) __ClearPageActive(page); del_page_from_lru_list(page, lruvec, lru); + mem_cgroup_uncharge(page); + if (unlikely(PageCompound(page))) { spin_unlock_irq(&zone->lru_lock); (*get_compound_page_dtor(page))(page); @@ -1656,6 +1660,8 @@ static void move_active_pages_to_lru(struct lruvec *lruvec, __ClearPageActive(page); del_page_from_lru_list(page, lruvec, lru); + mem_cgroup_uncharge(page); + if (unlikely(PageCompound(page))) { spin_unlock_irq(&zone->lru_lock); (*get_compound_page_dtor(page))(page); diff --git a/mm/zswap.c b/mm/zswap.c index 032c21eeab2b..9da56af24df5 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -507,7 +507,7 @@ static int zswap_get_swap_cache_page(swp_entry_t entry, * add_to_swap_cache() doesn't return -EEXIST, so we can safely * clear SWAP_HAS_CACHE flag. */ - swapcache_free(entry, NULL); + swapcache_free(entry); } while (err != -ENOMEM); if (new_page) -- cgit v1.2.3 From 747db954cab64c6b7a95b121b517165f34751898 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Fri, 8 Aug 2014 14:19:24 -0700 Subject: mm: memcontrol: use page lists for uncharge batching Pages are now uncharged at release time, and all sources of batched uncharges operate on lists of pages. Directly use those lists, and get rid of the per-task batching state. This also batches statistics accounting, in addition to the res counter charges, to reduce IRQ-disabling and re-enabling. Signed-off-by: Johannes Weiner Acked-by: Michal Hocko Cc: Hugh Dickins Cc: Tejun Heo Cc: Vladimir Davydov Cc: Naoya Horiguchi Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 12 +-- include/linux/sched.h | 6 -- kernel/fork.c | 4 - mm/memcontrol.c | 206 ++++++++++++++++++++++++--------------------- mm/swap.c | 6 +- mm/vmscan.c | 12 ++- 6 files changed, 117 insertions(+), 129 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 806b8fa15c5f..e0752d204d9e 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -59,12 +59,8 @@ int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm, void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg, bool lrucare); void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg); - void mem_cgroup_uncharge(struct page *page); - -/* Batched uncharging */ -void mem_cgroup_uncharge_start(void); -void mem_cgroup_uncharge_end(void); +void mem_cgroup_uncharge_list(struct list_head *page_list); void mem_cgroup_migrate(struct page *oldpage, struct page *newpage, bool lrucare); @@ -233,11 +229,7 @@ static inline void mem_cgroup_uncharge(struct page *page) { } -static inline void mem_cgroup_uncharge_start(void) -{ -} - -static inline void mem_cgroup_uncharge_end(void) +static inline void mem_cgroup_uncharge_list(struct list_head *page_list) { } diff --git a/include/linux/sched.h b/include/linux/sched.h index 7c19d552dc3f..4fcf82a4d243 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1628,12 +1628,6 @@ struct task_struct { unsigned long trace_recursion; #endif /* CONFIG_TRACING */ #ifdef CONFIG_MEMCG /* memcg uses this to do batch job */ - struct memcg_batch_info { - int do_batch; /* incremented when batch uncharge started */ - struct mem_cgroup *memcg; /* target memcg of uncharge */ - unsigned long nr_pages; /* uncharged usage */ - unsigned long memsw_nr_pages; /* uncharged mem+swap usage */ - } memcg_batch; unsigned int memcg_kmem_skip_account; struct memcg_oom_info { struct mem_cgroup *memcg; diff --git a/kernel/fork.c b/kernel/fork.c index fbd3497b221f..f6f5086c9e7d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1346,10 +1346,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, #ifdef CONFIG_DEBUG_MUTEXES p->blocked_on = NULL; /* not blocked yet */ #endif -#ifdef CONFIG_MEMCG - p->memcg_batch.do_batch = 0; - p->memcg_batch.memcg = NULL; -#endif #ifdef CONFIG_BCACHE p->sequential_io = 0; p->sequential_io_avg = 0; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 9106f1b12f56..a6e2be0241af 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3581,53 +3581,6 @@ out: return ret; } -/* - * Batch_start/batch_end is called in unmap_page_range/invlidate/trucate. - * In that cases, pages are freed continuously and we can expect pages - * are in the same memcg. All these calls itself limits the number of - * pages freed at once, then uncharge_start/end() is called properly. - * This may be called prural(2) times in a context, - */ - -void mem_cgroup_uncharge_start(void) -{ - unsigned long flags; - - local_irq_save(flags); - current->memcg_batch.do_batch++; - /* We can do nest. */ - if (current->memcg_batch.do_batch == 1) { - current->memcg_batch.memcg = NULL; - current->memcg_batch.nr_pages = 0; - current->memcg_batch.memsw_nr_pages = 0; - } - local_irq_restore(flags); -} - -void mem_cgroup_uncharge_end(void) -{ - struct memcg_batch_info *batch = ¤t->memcg_batch; - unsigned long flags; - - local_irq_save(flags); - VM_BUG_ON(!batch->do_batch); - if (--batch->do_batch) /* If stacked, do nothing */ - goto out; - /* - * This "batch->memcg" is valid without any css_get/put etc... - * bacause we hide charges behind us. - */ - if (batch->nr_pages) - res_counter_uncharge(&batch->memcg->res, - batch->nr_pages * PAGE_SIZE); - if (batch->memsw_nr_pages) - res_counter_uncharge(&batch->memcg->memsw, - batch->memsw_nr_pages * PAGE_SIZE); - memcg_oom_recover(batch->memcg); -out: - local_irq_restore(flags); -} - #ifdef CONFIG_MEMCG_SWAP static void mem_cgroup_swap_statistics(struct mem_cgroup *memcg, bool charge) @@ -6554,6 +6507,98 @@ void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg) cancel_charge(memcg, nr_pages); } +static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout, + unsigned long nr_mem, unsigned long nr_memsw, + unsigned long nr_anon, unsigned long nr_file, + unsigned long nr_huge, struct page *dummy_page) +{ + unsigned long flags; + + if (nr_mem) + res_counter_uncharge(&memcg->res, nr_mem * PAGE_SIZE); + if (nr_memsw) + res_counter_uncharge(&memcg->memsw, nr_memsw * PAGE_SIZE); + + memcg_oom_recover(memcg); + + local_irq_save(flags); + __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS], nr_anon); + __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_CACHE], nr_file); + __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE], nr_huge); + __this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_PGPGOUT], pgpgout); + __this_cpu_add(memcg->stat->nr_page_events, nr_anon + nr_file); + memcg_check_events(memcg, dummy_page); + local_irq_restore(flags); +} + +static void uncharge_list(struct list_head *page_list) +{ + struct mem_cgroup *memcg = NULL; + unsigned long nr_memsw = 0; + unsigned long nr_anon = 0; + unsigned long nr_file = 0; + unsigned long nr_huge = 0; + unsigned long pgpgout = 0; + unsigned long nr_mem = 0; + struct list_head *next; + struct page *page; + + next = page_list->next; + do { + unsigned int nr_pages = 1; + struct page_cgroup *pc; + + page = list_entry(next, struct page, lru); + next = page->lru.next; + + VM_BUG_ON_PAGE(PageLRU(page), page); + VM_BUG_ON_PAGE(page_count(page), page); + + pc = lookup_page_cgroup(page); + if (!PageCgroupUsed(pc)) + continue; + + /* + * Nobody should be changing or seriously looking at + * pc->mem_cgroup and pc->flags at this point, we have + * fully exclusive access to the page. + */ + + if (memcg != pc->mem_cgroup) { + if (memcg) { + uncharge_batch(memcg, pgpgout, nr_mem, nr_memsw, + nr_anon, nr_file, nr_huge, page); + pgpgout = nr_mem = nr_memsw = 0; + nr_anon = nr_file = nr_huge = 0; + } + memcg = pc->mem_cgroup; + } + + if (PageTransHuge(page)) { + nr_pages <<= compound_order(page); + VM_BUG_ON_PAGE(!PageTransHuge(page), page); + nr_huge += nr_pages; + } + + if (PageAnon(page)) + nr_anon += nr_pages; + else + nr_file += nr_pages; + + if (pc->flags & PCG_MEM) + nr_mem += nr_pages; + if (pc->flags & PCG_MEMSW) + nr_memsw += nr_pages; + pc->flags = 0; + + pgpgout++; + } while (next != page_list); + + if (memcg) + uncharge_batch(memcg, pgpgout, nr_mem, nr_memsw, + nr_anon, nr_file, nr_huge, page); +} + /** * mem_cgroup_uncharge - uncharge a page * @page: page to uncharge @@ -6563,67 +6608,34 @@ void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg) */ void mem_cgroup_uncharge(struct page *page) { - struct memcg_batch_info *batch; - unsigned int nr_pages = 1; - struct mem_cgroup *memcg; struct page_cgroup *pc; - unsigned long pc_flags; - unsigned long flags; - - VM_BUG_ON_PAGE(PageLRU(page), page); - VM_BUG_ON_PAGE(page_count(page), page); if (mem_cgroup_disabled()) return; + /* Don't touch page->lru of any random page, pre-check: */ pc = lookup_page_cgroup(page); - - /* Every final put_page() ends up here */ if (!PageCgroupUsed(pc)) return; - if (PageTransHuge(page)) { - nr_pages <<= compound_order(page); - VM_BUG_ON_PAGE(!PageTransHuge(page), page); - } - /* - * Nobody should be changing or seriously looking at - * pc->mem_cgroup and pc->flags at this point, we have fully - * exclusive access to the page. - */ - memcg = pc->mem_cgroup; - pc_flags = pc->flags; - pc->flags = 0; - - local_irq_save(flags); + INIT_LIST_HEAD(&page->lru); + uncharge_list(&page->lru); +} - if (nr_pages > 1) - goto direct; - if (unlikely(test_thread_flag(TIF_MEMDIE))) - goto direct; - batch = ¤t->memcg_batch; - if (!batch->do_batch) - goto direct; - if (batch->memcg && batch->memcg != memcg) - goto direct; - if (!batch->memcg) - batch->memcg = memcg; - if (pc_flags & PCG_MEM) - batch->nr_pages++; - if (pc_flags & PCG_MEMSW) - batch->memsw_nr_pages++; - goto out; -direct: - if (pc_flags & PCG_MEM) - res_counter_uncharge(&memcg->res, nr_pages * PAGE_SIZE); - if (pc_flags & PCG_MEMSW) - res_counter_uncharge(&memcg->memsw, nr_pages * PAGE_SIZE); - memcg_oom_recover(memcg); -out: - mem_cgroup_charge_statistics(memcg, page, -nr_pages); - memcg_check_events(memcg, page); +/** + * mem_cgroup_uncharge_list - uncharge a list of page + * @page_list: list of pages to uncharge + * + * Uncharge a list of pages previously charged with + * mem_cgroup_try_charge() and mem_cgroup_commit_charge(). + */ +void mem_cgroup_uncharge_list(struct list_head *page_list) +{ + if (mem_cgroup_disabled()) + return; - local_irq_restore(flags); + if (!list_empty(page_list)) + uncharge_list(page_list); } /** diff --git a/mm/swap.c b/mm/swap.c index 00523fffa5ed..6b2dc3897cd5 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -908,8 +908,6 @@ void release_pages(struct page **pages, int nr, bool cold) struct lruvec *lruvec; unsigned long uninitialized_var(flags); - mem_cgroup_uncharge_start(); - for (i = 0; i < nr; i++) { struct page *page = pages[i]; @@ -941,7 +939,6 @@ void release_pages(struct page **pages, int nr, bool cold) __ClearPageLRU(page); del_page_from_lru_list(page, lruvec, page_off_lru(page)); } - mem_cgroup_uncharge(page); /* Clear Active bit in case of parallel mark_page_accessed */ __ClearPageActive(page); @@ -951,8 +948,7 @@ void release_pages(struct page **pages, int nr, bool cold) if (zone) spin_unlock_irqrestore(&zone->lru_lock, flags); - mem_cgroup_uncharge_end(); - + mem_cgroup_uncharge_list(&pages_to_free); free_hot_cold_page_list(&pages_to_free, cold); } EXPORT_SYMBOL(release_pages); diff --git a/mm/vmscan.c b/mm/vmscan.c index 7068e838d22b..2836b5373b2e 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -822,7 +822,6 @@ static unsigned long shrink_page_list(struct list_head *page_list, cond_resched(); - mem_cgroup_uncharge_start(); while (!list_empty(page_list)) { struct address_space *mapping; struct page *page; @@ -1103,7 +1102,6 @@ static unsigned long shrink_page_list(struct list_head *page_list, */ __clear_page_locked(page); free_it: - mem_cgroup_uncharge(page); nr_reclaimed++; /* @@ -1133,8 +1131,8 @@ keep: list_add(&page->lru, &ret_pages); VM_BUG_ON_PAGE(PageLRU(page) || PageUnevictable(page), page); } - mem_cgroup_uncharge_end(); + mem_cgroup_uncharge_list(&free_pages); free_hot_cold_page_list(&free_pages, true); list_splice(&ret_pages, page_list); @@ -1437,10 +1435,9 @@ putback_inactive_pages(struct lruvec *lruvec, struct list_head *page_list) __ClearPageActive(page); del_page_from_lru_list(page, lruvec, lru); - mem_cgroup_uncharge(page); - if (unlikely(PageCompound(page))) { spin_unlock_irq(&zone->lru_lock); + mem_cgroup_uncharge(page); (*get_compound_page_dtor(page))(page); spin_lock_irq(&zone->lru_lock); } else @@ -1548,6 +1545,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, spin_unlock_irq(&zone->lru_lock); + mem_cgroup_uncharge_list(&page_list); free_hot_cold_page_list(&page_list, true); /* @@ -1660,10 +1658,9 @@ static void move_active_pages_to_lru(struct lruvec *lruvec, __ClearPageActive(page); del_page_from_lru_list(page, lruvec, lru); - mem_cgroup_uncharge(page); - if (unlikely(PageCompound(page))) { spin_unlock_irq(&zone->lru_lock); + mem_cgroup_uncharge(page); (*get_compound_page_dtor(page))(page); spin_lock_irq(&zone->lru_lock); } else @@ -1771,6 +1768,7 @@ static void shrink_active_list(unsigned long nr_to_scan, __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken); spin_unlock_irq(&zone->lru_lock); + mem_cgroup_uncharge_list(&l_hold); free_hot_cold_page_list(&l_hold, true); } -- cgit v1.2.3 From 434584fe68155f884b19f32b3befec8972c5d563 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Fri, 8 Aug 2014 14:19:26 -0700 Subject: page-cgroup: trivial cleanup Add forward declarations for struct pglist_data, mem_cgroup. Remove __init, __meminit from function prototypes and inline functions. Remove redundant inclusion of bit_spinlock.h. Signed-off-by: Vladimir Davydov Acked-by: Michal Hocko Acked-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page_cgroup.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h index 9bfb8e68a595..b8f8c9e36a3e 100644 --- a/include/linux/page_cgroup.h +++ b/include/linux/page_cgroup.h @@ -12,8 +12,10 @@ enum { #ifndef __GENERATING_BOUNDS_H #include +struct pglist_data; + #ifdef CONFIG_MEMCG -#include +struct mem_cgroup; /* * Page Cgroup can be considered as an extended mem_map. @@ -27,16 +29,16 @@ struct page_cgroup { struct mem_cgroup *mem_cgroup; }; -void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat); +extern void pgdat_page_cgroup_init(struct pglist_data *pgdat); #ifdef CONFIG_SPARSEMEM -static inline void __init page_cgroup_init_flatmem(void) +static inline void page_cgroup_init_flatmem(void) { } -extern void __init page_cgroup_init(void); +extern void page_cgroup_init(void); #else -void __init page_cgroup_init_flatmem(void); -static inline void __init page_cgroup_init(void) +extern void page_cgroup_init_flatmem(void); +static inline void page_cgroup_init(void) { } #endif @@ -48,11 +50,10 @@ static inline int PageCgroupUsed(struct page_cgroup *pc) { return !!(pc->flags & PCG_USED); } - -#else /* CONFIG_MEMCG */ +#else /* !CONFIG_MEMCG */ struct page_cgroup; -static inline void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat) +static inline void pgdat_page_cgroup_init(struct pglist_data *pgdat) { } @@ -65,10 +66,9 @@ static inline void page_cgroup_init(void) { } -static inline void __init page_cgroup_init_flatmem(void) +static inline void page_cgroup_init_flatmem(void) { } - #endif /* CONFIG_MEMCG */ #include -- cgit v1.2.3 From 9a3f4d85d58cb4e02e226f9be946d54c33eb715b Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Fri, 8 Aug 2014 14:19:28 -0700 Subject: page-cgroup: get rid of NR_PCG_FLAGS It's not used anywhere today, so let's remove it. Signed-off-by: Vladimir Davydov Acked-by: Michal Hocko Acked-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page_cgroup.h | 6 ------ kernel/bounds.c | 2 -- 2 files changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h index b8f8c9e36a3e..9d9f540658f5 100644 --- a/include/linux/page_cgroup.h +++ b/include/linux/page_cgroup.h @@ -6,12 +6,8 @@ enum { PCG_USED = 0x01, /* This page is charged to a memcg */ PCG_MEM = 0x02, /* This page holds a memory charge */ PCG_MEMSW = 0x04, /* This page holds a memory+swap charge */ - __NR_PCG_FLAGS, }; -#ifndef __GENERATING_BOUNDS_H -#include - struct pglist_data; #ifdef CONFIG_MEMCG @@ -107,6 +103,4 @@ static inline void swap_cgroup_swapoff(int type) #endif /* CONFIG_MEMCG_SWAP */ -#endif /* !__GENERATING_BOUNDS_H */ - #endif /* __LINUX_PAGE_CGROUP_H */ diff --git a/kernel/bounds.c b/kernel/bounds.c index 9fd4246b04b8..e1d1d1952bfa 100644 --- a/kernel/bounds.c +++ b/kernel/bounds.c @@ -9,7 +9,6 @@ #include #include #include -#include #include #include @@ -18,7 +17,6 @@ void foo(void) /* The enum constants to put into include/generated/bounds.h */ DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS); DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES); - DEFINE(NR_PCG_FLAGS, __NR_PCG_FLAGS); #ifdef CONFIG_SMP DEFINE(NR_CPUS_BITS, ilog2(CONFIG_NR_CPUS)); #endif -- cgit v1.2.3 From 3cbb01871e22709fdd39478eca831de317df332f Mon Sep 17 00:00:00 2001 From: Greg Thelen Date: Fri, 8 Aug 2014 14:19:31 -0700 Subject: memcg: remove lookup_cgroup_page() prototype Commit 6b208e3f6e35 ("mm: memcg: remove unused node/section info from pc->flags") deleted lookup_cgroup_page() but left a prototype for it. Kill the vestigial prototype. Signed-off-by: Greg Thelen Cc: Johannes Weiner Acked-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page_cgroup.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h index 9d9f540658f5..5c831f1eca79 100644 --- a/include/linux/page_cgroup.h +++ b/include/linux/page_cgroup.h @@ -40,7 +40,6 @@ static inline void page_cgroup_init(void) #endif struct page_cgroup *lookup_page_cgroup(struct page *page); -struct page *lookup_cgroup_page(struct page_cgroup *pc); static inline int PageCgroupUsed(struct page_cgroup *pc) { -- cgit v1.2.3 From ccf94f1b4a8560ffdc221840535bae5e5a91a53c Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Fri, 8 Aug 2014 14:21:22 -0700 Subject: proc: constify seq_operations proc_uid_seq_operations, proc_gid_seq_operations and proc_projid_seq_operations are only called in proc_id_map_open with seq_open as const struct seq_operations so we can constify the 3 structures and update proc_id_map_open prototype. text data bss dec hex filename 6817 404 1984 9205 23f5 kernel/user_namespace.o-before 6913 308 1984 9205 23f5 kernel/user_namespace.o-after Signed-off-by: Fabian Frederick Cc: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/base.c | 2 +- include/linux/user_namespace.h | 6 +++--- kernel/user_namespace.c | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/fs/proc/base.c b/fs/proc/base.c index 2d696b0c93bf..79df9ff71afd 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2449,7 +2449,7 @@ static int proc_tgid_io_accounting(struct task_struct *task, char *buffer) #ifdef CONFIG_USER_NS static int proc_id_map_open(struct inode *inode, struct file *file, - struct seq_operations *seq_ops) + const struct seq_operations *seq_ops) { struct user_namespace *ns = NULL; struct task_struct *task; diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 4836ba3c1cd8..e95372654f09 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -57,9 +57,9 @@ static inline void put_user_ns(struct user_namespace *ns) } struct seq_operations; -extern struct seq_operations proc_uid_seq_operations; -extern struct seq_operations proc_gid_seq_operations; -extern struct seq_operations proc_projid_seq_operations; +extern const struct seq_operations proc_uid_seq_operations; +extern const struct seq_operations proc_gid_seq_operations; +extern const struct seq_operations proc_projid_seq_operations; extern ssize_t proc_uid_map_write(struct file *, const char __user *, size_t, loff_t *); extern ssize_t proc_gid_map_write(struct file *, const char __user *, size_t, loff_t *); extern ssize_t proc_projid_map_write(struct file *, const char __user *, size_t, loff_t *); diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index fcc02560fd6b..aa312b0dc3ec 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -526,21 +526,21 @@ static void m_stop(struct seq_file *seq, void *v) return; } -struct seq_operations proc_uid_seq_operations = { +const struct seq_operations proc_uid_seq_operations = { .start = uid_m_start, .stop = m_stop, .next = m_next, .show = uid_m_show, }; -struct seq_operations proc_gid_seq_operations = { +const struct seq_operations proc_gid_seq_operations = { .start = gid_m_start, .stop = m_stop, .next = m_next, .show = gid_m_show, }; -struct seq_operations proc_projid_seq_operations = { +const struct seq_operations proc_projid_seq_operations = { .start = projid_m_start, .stop = m_stop, .next = m_next, -- cgit v1.2.3 From 41f727fde1fe40efeb4fef6fdce74ff794be5aeb Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Fri, 8 Aug 2014 14:21:56 -0700 Subject: fork/exec: cleanup mm initialization mm initialization on fork/exec is spread all over the place, which makes the code look inconsistent. We have mm_init(), which is supposed to init/nullify mm's internals, but it doesn't init all the fields it should: - on fork ->mmap,mm_rb,vmacache_seqnum,map_count,mm_cpumask,locked_vm are zeroed in dup_mmap(); - on fork ->pmd_huge_pte is zeroed in dup_mm(), immediately before calling mm_init(); - ->cpu_vm_mask_var ptr is initialized by mm_init_cpumask(), which is called before mm_init() on both fork and exec; - ->context is initialized by init_new_context(), which is called after mm_init() on both fork and exec; Let's consolidate all the initializations in mm_init() to make the code look cleaner. Signed-off-by: Vladimir Davydov Cc: Oleg Nesterov Cc: David Rientjes Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 4 ---- include/linux/mm_types.h | 1 + kernel/fork.c | 47 ++++++++++++++++++++--------------------------- 3 files changed, 21 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/fs/exec.c b/fs/exec.c index ab1f1200ce5d..a2b42a98c743 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -368,10 +368,6 @@ static int bprm_mm_init(struct linux_binprm *bprm) if (!mm) goto err; - err = init_new_context(current, mm); - if (err) - goto err; - err = __bprm_mm_init(bprm); if (err) goto err; diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 796deac19fcf..6e0b286649f1 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -461,6 +461,7 @@ static inline void mm_init_cpumask(struct mm_struct *mm) #ifdef CONFIG_CPUMASK_OFFSTACK mm->cpu_vm_mask_var = &mm->cpumask_allocation; #endif + cpumask_clear(mm->cpu_vm_mask_var); } /* Future-safe accessor for struct mm_struct's cpu_vm_mask. */ diff --git a/kernel/fork.c b/kernel/fork.c index f6f5086c9e7d..418b52a9ec6a 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -374,12 +374,6 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) */ down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING); - mm->locked_vm = 0; - mm->mmap = NULL; - mm->vmacache_seqnum = 0; - mm->map_count = 0; - cpumask_clear(mm_cpumask(mm)); - mm->mm_rb = RB_ROOT; rb_link = &mm->mm_rb.rb_node; rb_parent = NULL; pprev = &mm->mmap; @@ -538,17 +532,27 @@ static void mm_init_aio(struct mm_struct *mm) static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p) { + mm->mmap = NULL; + mm->mm_rb = RB_ROOT; + mm->vmacache_seqnum = 0; atomic_set(&mm->mm_users, 1); atomic_set(&mm->mm_count, 1); init_rwsem(&mm->mmap_sem); INIT_LIST_HEAD(&mm->mmlist); mm->core_state = NULL; atomic_long_set(&mm->nr_ptes, 0); + mm->map_count = 0; + mm->locked_vm = 0; memset(&mm->rss_stat, 0, sizeof(mm->rss_stat)); spin_lock_init(&mm->page_table_lock); + mm_init_cpumask(mm); mm_init_aio(mm); mm_init_owner(mm, p); + mmu_notifier_mm_init(mm); clear_tlb_flush_pending(mm); +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS + mm->pmd_huge_pte = NULL; +#endif if (current->mm) { mm->flags = current->mm->flags & MMF_INIT_MASK; @@ -558,11 +562,17 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p) mm->def_flags = 0; } - if (likely(!mm_alloc_pgd(mm))) { - mmu_notifier_mm_init(mm); - return mm; - } + if (mm_alloc_pgd(mm)) + goto fail_nopgd; + + if (init_new_context(p, mm)) + goto fail_nocontext; + return mm; + +fail_nocontext: + mm_free_pgd(mm); +fail_nopgd: free_mm(mm); return NULL; } @@ -596,7 +606,6 @@ struct mm_struct *mm_alloc(void) return NULL; memset(mm, 0, sizeof(*mm)); - mm_init_cpumask(mm); return mm_init(mm, current); } @@ -828,17 +837,10 @@ static struct mm_struct *dup_mm(struct task_struct *tsk) goto fail_nomem; memcpy(mm, oldmm, sizeof(*mm)); - mm_init_cpumask(mm); -#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS - mm->pmd_huge_pte = NULL; -#endif if (!mm_init(mm, tsk)) goto fail_nomem; - if (init_new_context(tsk, mm)) - goto fail_nocontext; - dup_mm_exe_file(oldmm, mm); err = dup_mmap(mm, oldmm); @@ -860,15 +862,6 @@ free_pt: fail_nomem: return NULL; - -fail_nocontext: - /* - * If init_new_context() failed, we cannot use mmput() to free the mm - * because it calls destroy_context() - */ - mm_free_pgd(mm); - free_mm(mm); - return NULL; } static int copy_mm(unsigned long clone_flags, struct task_struct *tsk) -- cgit v1.2.3 From 33144e8429bd7fceacbb869a7f5061db42e13fe6 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Fri, 8 Aug 2014 14:22:03 -0700 Subject: kernel/fork.c: make mm_init_owner static It's only used in fork.c:mm_init(). Signed-off-by: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 5 ----- kernel/fork.c | 14 +++++++------- 2 files changed, 7 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 4fcf82a4d243..b21e9218c0fd 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2961,15 +2961,10 @@ static inline void inc_syscw(struct task_struct *tsk) #ifdef CONFIG_MEMCG extern void mm_update_next_owner(struct mm_struct *mm); -extern void mm_init_owner(struct mm_struct *mm, struct task_struct *p); #else static inline void mm_update_next_owner(struct mm_struct *mm) { } - -static inline void mm_init_owner(struct mm_struct *mm, struct task_struct *p) -{ -} #endif /* CONFIG_MEMCG */ static inline unsigned long task_rlimit(const struct task_struct *tsk, diff --git a/kernel/fork.c b/kernel/fork.c index aff84f84b0d3..86da59e165ad 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -535,6 +535,13 @@ static void mm_init_aio(struct mm_struct *mm) #endif } +static void mm_init_owner(struct mm_struct *mm, struct task_struct *p) +{ +#ifdef CONFIG_MEMCG + mm->owner = p; +#endif +} + static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p) { mm->mmap = NULL; @@ -1139,13 +1146,6 @@ static void rt_mutex_init_task(struct task_struct *p) #endif } -#ifdef CONFIG_MEMCG -void mm_init_owner(struct mm_struct *mm, struct task_struct *p) -{ - mm->owner = p; -} -#endif /* CONFIG_MEMCG */ - /* * Initialize POSIX timer handling for a single task. */ -- cgit v1.2.3 From 4aff1ce7add1c432fe5ea3ae0231155f33e5ef38 Mon Sep 17 00:00:00 2001 From: Alexandre Bounine Date: Fri, 8 Aug 2014 14:22:09 -0700 Subject: rapidio: add new RapidIO DMA interface routines Add RapidIO DMA interface routines that directly use reference to the mport device object and/or target device destination ID as parameters. This allows to perform RapidIO DMA transfer requests by modules that do not have an access to the RapidIO device list. Signed-off-by: Alexandre Bounine Cc: Matt Porter Cc: Andre van Herk Cc: Stef van Os Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rapidio/rio.c | 66 +++++++++++++++++++++++++++++++++++-------------- include/linux/rio_drv.h | 5 ++++ 2 files changed, 52 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c index a54ba0494dd3..d7b87c64b7cd 100644 --- a/drivers/rapidio/rio.c +++ b/drivers/rapidio/rio.c @@ -1509,30 +1509,39 @@ EXPORT_SYMBOL_GPL(rio_route_clr_table); static bool rio_chan_filter(struct dma_chan *chan, void *arg) { - struct rio_dev *rdev = arg; + struct rio_mport *mport = arg; /* Check that DMA device belongs to the right MPORT */ - return (rdev->net->hport == - container_of(chan->device, struct rio_mport, dma)); + return mport == container_of(chan->device, struct rio_mport, dma); } /** - * rio_request_dma - request RapidIO capable DMA channel that supports - * specified target RapidIO device. - * @rdev: RIO device control structure + * rio_request_mport_dma - request RapidIO capable DMA channel associated + * with specified local RapidIO mport device. + * @mport: RIO mport to perform DMA data transfers * * Returns pointer to allocated DMA channel or NULL if failed. */ -struct dma_chan *rio_request_dma(struct rio_dev *rdev) +struct dma_chan *rio_request_mport_dma(struct rio_mport *mport) { dma_cap_mask_t mask; - struct dma_chan *dchan; dma_cap_zero(mask); dma_cap_set(DMA_SLAVE, mask); - dchan = dma_request_channel(mask, rio_chan_filter, rdev); + return dma_request_channel(mask, rio_chan_filter, mport); +} +EXPORT_SYMBOL_GPL(rio_request_mport_dma); - return dchan; +/** + * rio_request_dma - request RapidIO capable DMA channel that supports + * specified target RapidIO device. + * @rdev: RIO device associated with DMA transfer + * + * Returns pointer to allocated DMA channel or NULL if failed. + */ +struct dma_chan *rio_request_dma(struct rio_dev *rdev) +{ + return rio_request_mport_dma(rdev->net->hport); } EXPORT_SYMBOL_GPL(rio_request_dma); @@ -1547,10 +1556,10 @@ void rio_release_dma(struct dma_chan *dchan) EXPORT_SYMBOL_GPL(rio_release_dma); /** - * rio_dma_prep_slave_sg - RapidIO specific wrapper + * rio_dma_prep_xfer - RapidIO specific wrapper * for device_prep_slave_sg callback defined by DMAENGINE. - * @rdev: RIO device control structure * @dchan: DMA channel to configure + * @destid: target RapidIO device destination ID * @data: RIO specific data descriptor * @direction: DMA data transfer direction (TO or FROM the device) * @flags: dmaengine defined flags @@ -1560,11 +1569,10 @@ EXPORT_SYMBOL_GPL(rio_release_dma); * target RIO device. * Returns pointer to DMA transaction descriptor or NULL if failed. */ -struct dma_async_tx_descriptor *rio_dma_prep_slave_sg(struct rio_dev *rdev, - struct dma_chan *dchan, struct rio_dma_data *data, +struct dma_async_tx_descriptor *rio_dma_prep_xfer(struct dma_chan *dchan, + u16 destid, struct rio_dma_data *data, enum dma_transfer_direction direction, unsigned long flags) { - struct dma_async_tx_descriptor *txd = NULL; struct rio_dma_ext rio_ext; if (dchan->device->device_prep_slave_sg == NULL) { @@ -1572,15 +1580,35 @@ struct dma_async_tx_descriptor *rio_dma_prep_slave_sg(struct rio_dev *rdev, return NULL; } - rio_ext.destid = rdev->destid; + rio_ext.destid = destid; rio_ext.rio_addr_u = data->rio_addr_u; rio_ext.rio_addr = data->rio_addr; rio_ext.wr_type = data->wr_type; - txd = dmaengine_prep_rio_sg(dchan, data->sg, data->sg_len, - direction, flags, &rio_ext); + return dmaengine_prep_rio_sg(dchan, data->sg, data->sg_len, + direction, flags, &rio_ext); +} +EXPORT_SYMBOL_GPL(rio_dma_prep_xfer); - return txd; +/** + * rio_dma_prep_slave_sg - RapidIO specific wrapper + * for device_prep_slave_sg callback defined by DMAENGINE. + * @rdev: RIO device control structure + * @dchan: DMA channel to configure + * @data: RIO specific data descriptor + * @direction: DMA data transfer direction (TO or FROM the device) + * @flags: dmaengine defined flags + * + * Initializes RapidIO capable DMA channel for the specified data transfer. + * Uses DMA channel private extension to pass information related to remote + * target RIO device. + * Returns pointer to DMA transaction descriptor or NULL if failed. + */ +struct dma_async_tx_descriptor *rio_dma_prep_slave_sg(struct rio_dev *rdev, + struct dma_chan *dchan, struct rio_dma_data *data, + enum dma_transfer_direction direction, unsigned long flags) +{ + return rio_dma_prep_xfer(dchan, rdev->destid, data, direction, flags); } EXPORT_SYMBOL_GPL(rio_dma_prep_slave_sg); diff --git a/include/linux/rio_drv.h b/include/linux/rio_drv.h index 5059994fe297..9fc2f213e74f 100644 --- a/include/linux/rio_drv.h +++ b/include/linux/rio_drv.h @@ -384,11 +384,16 @@ void rio_dev_put(struct rio_dev *); #ifdef CONFIG_RAPIDIO_DMA_ENGINE extern struct dma_chan *rio_request_dma(struct rio_dev *rdev); +extern struct dma_chan *rio_request_mport_dma(struct rio_mport *mport); extern void rio_release_dma(struct dma_chan *dchan); extern struct dma_async_tx_descriptor *rio_dma_prep_slave_sg( struct rio_dev *rdev, struct dma_chan *dchan, struct rio_dma_data *data, enum dma_transfer_direction direction, unsigned long flags); +extern struct dma_async_tx_descriptor *rio_dma_prep_xfer( + struct dma_chan *dchan, u16 destid, + struct rio_dma_data *data, + enum dma_transfer_direction direction, unsigned long flags); #endif /** -- cgit v1.2.3 From e5eea0981a3840f3f39f43d2d00461c4c24018e7 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Fri, 8 Aug 2014 14:22:16 -0700 Subject: sysctl: remove typedef ctl_table Remove the final user, and the typedef itself. Signed-off-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/proc_sysctl.c | 2 +- include/linux/sysctl.h | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 71290463a1d3..f92d5dd578a4 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -632,7 +632,7 @@ out: return ret; } -static int scan(struct ctl_table_header *head, ctl_table *table, +static int scan(struct ctl_table_header *head, struct ctl_table *table, unsigned long *pos, struct file *file, struct dir_context *ctx) { diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 14a8ff2de11e..b7361f831226 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -34,8 +34,6 @@ struct ctl_table_root; struct ctl_table_header; struct ctl_dir; -typedef struct ctl_table ctl_table; - typedef int proc_handler (struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos); -- cgit v1.2.3 From 69361eef9056b0babb507798c2135ad1572f0ef7 Mon Sep 17 00:00:00 2001 From: Josh Hunt Date: Fri, 8 Aug 2014 14:22:31 -0700 Subject: panic: add TAINT_SOFTLOCKUP This taint flag will be set if the system has ever entered a softlockup state. Similar to TAINT_WARN it is useful to know whether or not the system has been in a softlockup state when debugging. [akpm@linux-foundation.org: apply the taint before calling panic()] Signed-off-by: Josh Hunt Cc: Jason Baron Cc: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/oops-tracing.txt | 2 ++ Documentation/sysctl/kernel.txt | 1 + include/linux/kernel.h | 1 + kernel/panic.c | 1 + kernel/watchdog.c | 1 + 5 files changed, 6 insertions(+) (limited to 'include/linux') diff --git a/Documentation/oops-tracing.txt b/Documentation/oops-tracing.txt index e3155995ddd8..beefb9f82902 100644 --- a/Documentation/oops-tracing.txt +++ b/Documentation/oops-tracing.txt @@ -268,6 +268,8 @@ characters, each representing a particular tainted value. 14: 'E' if an unsigned module has been loaded in a kernel supporting module signature. + 15: 'L' if a soft lockup has previously occurred on the system. + The primary reason for the 'Tainted: ' string is to tell kernel debuggers if this is a clean kernel or if anything unusual has occurred. Tainting is permanent: even if an offending module is diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index c14374e71775..f79eb9666379 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -826,6 +826,7 @@ can be ORed together: 4096 - An out-of-tree module has been loaded. 8192 - An unsigned module has been loaded in a kernel supporting module signature. +16384 - A soft lockup has previously occurred on the system. ============================================================== diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 3dc22abbc68a..31ae66f34235 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -470,6 +470,7 @@ extern enum system_states { #define TAINT_FIRMWARE_WORKAROUND 11 #define TAINT_OOT_MODULE 12 #define TAINT_UNSIGNED_MODULE 13 +#define TAINT_SOFTLOCKUP 14 extern const char hex_asc[]; #define hex_asc_lo(x) hex_asc[((x) & 0x0f)] diff --git a/kernel/panic.c b/kernel/panic.c index 62e16cef9cc2..d09dc5c32c67 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -224,6 +224,7 @@ static const struct tnt tnts[] = { { TAINT_FIRMWARE_WORKAROUND, 'I', ' ' }, { TAINT_OOT_MODULE, 'O', ' ' }, { TAINT_UNSIGNED_MODULE, 'E', ' ' }, + { TAINT_SOFTLOCKUP, 'L', ' ' }, }; /** diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 51b29e9d2ba6..a8d6914030fe 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -368,6 +368,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) smp_mb__after_atomic(); } + add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK); if (softlockup_panic) panic("softlockup: hung tasks"); __this_cpu_write(soft_watchdog_warn, true); -- cgit v1.2.3 From d97b07c54f34e88352ebe676beb798c8f59ac588 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 8 Aug 2014 14:23:14 -0700 Subject: initramfs: support initramfs that is bigger than 2GiB Now with 64bit bzImage and kexec tools, we support ramdisk that size is bigger than 2g, as we could put it above 4G. Found compressed initramfs image could not be decompressed properly. It turns out that image length is int during decompress detection, and it will become < 0 when length is more than 2G. Furthermore, during decompressing len as int is used for inbuf count, that has problem too. Change len to long, that should be ok as on 32 bit platform long is 32bits. Tested with following compressed initramfs image as root with kexec. gzip, bzip2, xz, lzma, lzop, lz4. run time for populate_rootfs(): size name Nehalem-EX Westmere-EX Ivybridge-EX 9034400256 root_img : 26s 24s 30s 3561095057 root_img.lz4 : 28s 27s 27s 3459554629 root_img.lzo : 29s 29s 28s 3219399480 root_img.gz : 64s 62s 49s 2251594592 root_img.xz : 262s 260s 183s 2226366598 root_img.lzma: 386s 376s 277s 2901482513 root_img.bz2 : 635s 599s Signed-off-by: Yinghai Lu Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Rashika Kheria Cc: Josh Triplett Cc: Kyungsik Lee Cc: P J P Cc: Al Viro Cc: Tetsuo Handa Cc: "Daniel M. Weeks" Cc: Alexandre Courbot Cc: Jan Beulich Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- crypto/zlib.c | 8 ++++---- fs/isofs/compress.c | 4 ++-- fs/jffs2/compr_zlib.c | 7 ++++--- include/linux/decompress/bunzip2.h | 8 ++++---- include/linux/decompress/generic.h | 10 +++++----- include/linux/decompress/inflate.h | 8 ++++---- include/linux/decompress/unlz4.h | 8 ++++---- include/linux/decompress/unlzma.h | 8 ++++---- include/linux/decompress/unlzo.h | 8 ++++---- include/linux/decompress/unxz.h | 8 ++++---- include/linux/zlib.h | 4 ++-- init/do_mounts_rd.c | 10 +++++----- init/initramfs.c | 22 +++++++++++----------- lib/decompress.c | 2 +- lib/decompress_bunzip2.c | 26 +++++++++++++------------- lib/decompress_inflate.c | 12 ++++++------ lib/decompress_unlz4.c | 18 +++++++++--------- lib/decompress_unlzma.c | 28 ++++++++++++++-------------- lib/decompress_unlzo.c | 12 ++++++------ lib/decompress_unxz.c | 10 +++++----- 20 files changed, 111 insertions(+), 110 deletions(-) (limited to 'include/linux') diff --git a/crypto/zlib.c b/crypto/zlib.c index 06b62e5cdcc7..c9ee681d57fd 100644 --- a/crypto/zlib.c +++ b/crypto/zlib.c @@ -168,7 +168,7 @@ static int zlib_compress_update(struct crypto_pcomp *tfm, } ret = req->avail_out - stream->avail_out; - pr_debug("avail_in %u, avail_out %u (consumed %u, produced %u)\n", + pr_debug("avail_in %lu, avail_out %lu (consumed %lu, produced %u)\n", stream->avail_in, stream->avail_out, req->avail_in - stream->avail_in, ret); req->next_in = stream->next_in; @@ -198,7 +198,7 @@ static int zlib_compress_final(struct crypto_pcomp *tfm, } ret = req->avail_out - stream->avail_out; - pr_debug("avail_in %u, avail_out %u (consumed %u, produced %u)\n", + pr_debug("avail_in %lu, avail_out %lu (consumed %lu, produced %u)\n", stream->avail_in, stream->avail_out, req->avail_in - stream->avail_in, ret); req->next_in = stream->next_in; @@ -283,7 +283,7 @@ static int zlib_decompress_update(struct crypto_pcomp *tfm, } ret = req->avail_out - stream->avail_out; - pr_debug("avail_in %u, avail_out %u (consumed %u, produced %u)\n", + pr_debug("avail_in %lu, avail_out %lu (consumed %lu, produced %u)\n", stream->avail_in, stream->avail_out, req->avail_in - stream->avail_in, ret); req->next_in = stream->next_in; @@ -331,7 +331,7 @@ static int zlib_decompress_final(struct crypto_pcomp *tfm, } ret = req->avail_out - stream->avail_out; - pr_debug("avail_in %u, avail_out %u (consumed %u, produced %u)\n", + pr_debug("avail_in %lu, avail_out %lu (consumed %lu, produced %u)\n", stream->avail_in, stream->avail_out, req->avail_in - stream->avail_in, ret); req->next_in = stream->next_in; diff --git a/fs/isofs/compress.c b/fs/isofs/compress.c index 592e5115a561..f311bf084015 100644 --- a/fs/isofs/compress.c +++ b/fs/isofs/compress.c @@ -158,8 +158,8 @@ static loff_t zisofs_uncompress_block(struct inode *inode, loff_t block_start, "zisofs: zisofs_inflate returned" " %d, inode = %lu," " page idx = %d, bh idx = %d," - " avail_in = %d," - " avail_out = %d\n", + " avail_in = %ld," + " avail_out = %ld\n", zerr, inode->i_ino, curpage, curbh, stream.avail_in, stream.avail_out); diff --git a/fs/jffs2/compr_zlib.c b/fs/jffs2/compr_zlib.c index 0b9a1e44e833..5698dae5d92d 100644 --- a/fs/jffs2/compr_zlib.c +++ b/fs/jffs2/compr_zlib.c @@ -94,11 +94,12 @@ static int jffs2_zlib_compress(unsigned char *data_in, while (def_strm.total_out < *dstlen - STREAM_END_SPACE && def_strm.total_in < *sourcelen) { def_strm.avail_out = *dstlen - (def_strm.total_out + STREAM_END_SPACE); - def_strm.avail_in = min((unsigned)(*sourcelen-def_strm.total_in), def_strm.avail_out); - jffs2_dbg(1, "calling deflate with avail_in %d, avail_out %d\n", + def_strm.avail_in = min_t(unsigned long, + (*sourcelen-def_strm.total_in), def_strm.avail_out); + jffs2_dbg(1, "calling deflate with avail_in %ld, avail_out %ld\n", def_strm.avail_in, def_strm.avail_out); ret = zlib_deflate(&def_strm, Z_PARTIAL_FLUSH); - jffs2_dbg(1, "deflate returned with avail_in %d, avail_out %d, total_in %ld, total_out %ld\n", + jffs2_dbg(1, "deflate returned with avail_in %ld, avail_out %ld, total_in %ld, total_out %ld\n", def_strm.avail_in, def_strm.avail_out, def_strm.total_in, def_strm.total_out); if (ret != Z_OK) { diff --git a/include/linux/decompress/bunzip2.h b/include/linux/decompress/bunzip2.h index 115272137a9c..4d683df898e6 100644 --- a/include/linux/decompress/bunzip2.h +++ b/include/linux/decompress/bunzip2.h @@ -1,10 +1,10 @@ #ifndef DECOMPRESS_BUNZIP2_H #define DECOMPRESS_BUNZIP2_H -int bunzip2(unsigned char *inbuf, int len, - int(*fill)(void*, unsigned int), - int(*flush)(void*, unsigned int), +int bunzip2(unsigned char *inbuf, long len, + long (*fill)(void*, unsigned long), + long (*flush)(void*, unsigned long), unsigned char *output, - int *pos, + long *pos, void(*error)(char *x)); #endif diff --git a/include/linux/decompress/generic.h b/include/linux/decompress/generic.h index 0c7111a55a1a..1fcfd64b5076 100644 --- a/include/linux/decompress/generic.h +++ b/include/linux/decompress/generic.h @@ -1,11 +1,11 @@ #ifndef DECOMPRESS_GENERIC_H #define DECOMPRESS_GENERIC_H -typedef int (*decompress_fn) (unsigned char *inbuf, int len, - int(*fill)(void*, unsigned int), - int(*flush)(void*, unsigned int), +typedef int (*decompress_fn) (unsigned char *inbuf, long len, + long (*fill)(void*, unsigned long), + long (*flush)(void*, unsigned long), unsigned char *outbuf, - int *posp, + long *posp, void(*error)(char *x)); /* inbuf - input buffer @@ -33,7 +33,7 @@ typedef int (*decompress_fn) (unsigned char *inbuf, int len, /* Utility routine to detect the decompression method */ -decompress_fn decompress_method(const unsigned char *inbuf, int len, +decompress_fn decompress_method(const unsigned char *inbuf, long len, const char **name); #endif diff --git a/include/linux/decompress/inflate.h b/include/linux/decompress/inflate.h index 1d0aedef9822..e4f411fdbd24 100644 --- a/include/linux/decompress/inflate.h +++ b/include/linux/decompress/inflate.h @@ -1,10 +1,10 @@ #ifndef LINUX_DECOMPRESS_INFLATE_H #define LINUX_DECOMPRESS_INFLATE_H -int gunzip(unsigned char *inbuf, int len, - int(*fill)(void*, unsigned int), - int(*flush)(void*, unsigned int), +int gunzip(unsigned char *inbuf, long len, + long (*fill)(void*, unsigned long), + long (*flush)(void*, unsigned long), unsigned char *output, - int *pos, + long *pos, void(*error_fn)(char *x)); #endif diff --git a/include/linux/decompress/unlz4.h b/include/linux/decompress/unlz4.h index d5b68bf3ec92..3273c2f36496 100644 --- a/include/linux/decompress/unlz4.h +++ b/include/linux/decompress/unlz4.h @@ -1,10 +1,10 @@ #ifndef DECOMPRESS_UNLZ4_H #define DECOMPRESS_UNLZ4_H -int unlz4(unsigned char *inbuf, int len, - int(*fill)(void*, unsigned int), - int(*flush)(void*, unsigned int), +int unlz4(unsigned char *inbuf, long len, + long (*fill)(void*, unsigned long), + long (*flush)(void*, unsigned long), unsigned char *output, - int *pos, + long *pos, void(*error)(char *x)); #endif diff --git a/include/linux/decompress/unlzma.h b/include/linux/decompress/unlzma.h index 7796538f1bf4..8a891a193840 100644 --- a/include/linux/decompress/unlzma.h +++ b/include/linux/decompress/unlzma.h @@ -1,11 +1,11 @@ #ifndef DECOMPRESS_UNLZMA_H #define DECOMPRESS_UNLZMA_H -int unlzma(unsigned char *, int, - int(*fill)(void*, unsigned int), - int(*flush)(void*, unsigned int), +int unlzma(unsigned char *, long, + long (*fill)(void*, unsigned long), + long (*flush)(void*, unsigned long), unsigned char *output, - int *posp, + long *posp, void(*error)(char *x) ); diff --git a/include/linux/decompress/unlzo.h b/include/linux/decompress/unlzo.h index 987229752519..af18f95d6570 100644 --- a/include/linux/decompress/unlzo.h +++ b/include/linux/decompress/unlzo.h @@ -1,10 +1,10 @@ #ifndef DECOMPRESS_UNLZO_H #define DECOMPRESS_UNLZO_H -int unlzo(unsigned char *inbuf, int len, - int(*fill)(void*, unsigned int), - int(*flush)(void*, unsigned int), +int unlzo(unsigned char *inbuf, long len, + long (*fill)(void*, unsigned long), + long (*flush)(void*, unsigned long), unsigned char *output, - int *pos, + long *pos, void(*error)(char *x)); #endif diff --git a/include/linux/decompress/unxz.h b/include/linux/decompress/unxz.h index 41728fc6c8a1..f764e2a7201e 100644 --- a/include/linux/decompress/unxz.h +++ b/include/linux/decompress/unxz.h @@ -10,10 +10,10 @@ #ifndef DECOMPRESS_UNXZ_H #define DECOMPRESS_UNXZ_H -int unxz(unsigned char *in, int in_size, - int (*fill)(void *dest, unsigned int size), - int (*flush)(void *src, unsigned int size), - unsigned char *out, int *in_used, +int unxz(unsigned char *in, long in_size, + long (*fill)(void *dest, unsigned long size), + long (*flush)(void *src, unsigned long size), + unsigned char *out, long *in_used, void (*error)(char *x)); #endif diff --git a/include/linux/zlib.h b/include/linux/zlib.h index 197abb2a54c5..92dbbd3f6c75 100644 --- a/include/linux/zlib.h +++ b/include/linux/zlib.h @@ -83,11 +83,11 @@ struct internal_state; typedef struct z_stream_s { const Byte *next_in; /* next input byte */ - uInt avail_in; /* number of bytes available at next_in */ + uLong avail_in; /* number of bytes available at next_in */ uLong total_in; /* total nb of input bytes read so far */ Byte *next_out; /* next output byte should be put there */ - uInt avail_out; /* remaining free space at next_out */ + uLong avail_out; /* remaining free space at next_out */ uLong total_out; /* total nb of bytes output so far */ char *msg; /* last error message, NULL if no error */ diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c index a8227022e3a0..e5d059e8aa11 100644 --- a/init/do_mounts_rd.c +++ b/init/do_mounts_rd.c @@ -311,9 +311,9 @@ static int exit_code; static int decompress_error; static int crd_infd, crd_outfd; -static int __init compr_fill(void *buf, unsigned int len) +static long __init compr_fill(void *buf, unsigned long len) { - int r = sys_read(crd_infd, buf, len); + long r = sys_read(crd_infd, buf, len); if (r < 0) printk(KERN_ERR "RAMDISK: error while reading compressed data"); else if (r == 0) @@ -321,13 +321,13 @@ static int __init compr_fill(void *buf, unsigned int len) return r; } -static int __init compr_flush(void *window, unsigned int outcnt) +static long __init compr_flush(void *window, unsigned long outcnt) { - int written = sys_write(crd_outfd, window, outcnt); + long written = sys_write(crd_outfd, window, outcnt); if (written != outcnt) { if (decompress_error == 0) printk(KERN_ERR - "RAMDISK: incomplete write (%d != %d)\n", + "RAMDISK: incomplete write (%ld != %ld)\n", written, outcnt); decompress_error = 1; return -1; diff --git a/init/initramfs.c b/init/initramfs.c index 4f276b6a167b..a7566031242e 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -197,7 +197,7 @@ static __initdata enum state { } state, next_state; static __initdata char *victim; -static __initdata unsigned count; +static unsigned long count __initdata; static __initdata loff_t this_header, next_header; static inline void __init eat(unsigned n) @@ -209,7 +209,7 @@ static inline void __init eat(unsigned n) static __initdata char *vcollected; static __initdata char *collected; -static __initdata int remains; +static long remains __initdata; static __initdata char *collect; static void __init read_into(char *buf, unsigned size, enum state next) @@ -236,7 +236,7 @@ static int __init do_start(void) static int __init do_collect(void) { - unsigned n = remains; + unsigned long n = remains; if (count < n) n = count; memcpy(collect, victim, n); @@ -407,7 +407,7 @@ static __initdata int (*actions[])(void) = { [Reset] = do_reset, }; -static int __init write_buffer(char *buf, unsigned len) +static long __init write_buffer(char *buf, unsigned long len) { count = len; victim = buf; @@ -417,11 +417,11 @@ static int __init write_buffer(char *buf, unsigned len) return len - count; } -static int __init flush_buffer(void *bufv, unsigned len) +static long __init flush_buffer(void *bufv, unsigned long len) { char *buf = (char *) bufv; - int written; - int origLen = len; + long written; + long origLen = len; if (message) return -1; while ((written = write_buffer(buf, len)) < len && !message) { @@ -440,13 +440,13 @@ static int __init flush_buffer(void *bufv, unsigned len) return origLen; } -static unsigned my_inptr; /* index of next byte to be processed in inbuf */ +static unsigned long my_inptr; /* index of next byte to be processed in inbuf */ #include -static char * __init unpack_to_rootfs(char *buf, unsigned len) +static char * __init unpack_to_rootfs(char *buf, unsigned long len) { - int written, res; + long written; decompress_fn decompress; const char *compress_name; static __initdata char msg_buf[64]; @@ -480,7 +480,7 @@ static char * __init unpack_to_rootfs(char *buf, unsigned len) decompress = decompress_method(buf, len, &compress_name); pr_debug("Detected %s compressed data\n", compress_name); if (decompress) { - res = decompress(buf, len, NULL, flush_buffer, NULL, + int res = decompress(buf, len, NULL, flush_buffer, NULL, &my_inptr, error); if (res) error("decompressor failed"); diff --git a/lib/decompress.c b/lib/decompress.c index 86069d74c062..37f3c786348f 100644 --- a/lib/decompress.c +++ b/lib/decompress.c @@ -54,7 +54,7 @@ static const struct compress_format compressed_formats[] __initconst = { { {0, 0}, NULL, NULL } }; -decompress_fn __init decompress_method(const unsigned char *inbuf, int len, +decompress_fn __init decompress_method(const unsigned char *inbuf, long len, const char **name) { const struct compress_format *cf; diff --git a/lib/decompress_bunzip2.c b/lib/decompress_bunzip2.c index 31c5f7675fbf..8290e0bef7ea 100644 --- a/lib/decompress_bunzip2.c +++ b/lib/decompress_bunzip2.c @@ -92,8 +92,8 @@ struct bunzip_data { /* State for interrupting output loop */ int writeCopies, writePos, writeRunCountdown, writeCount, writeCurrent; /* I/O tracking data (file handles, buffers, positions, etc.) */ - int (*fill)(void*, unsigned int); - int inbufCount, inbufPos /*, outbufPos*/; + long (*fill)(void*, unsigned long); + long inbufCount, inbufPos /*, outbufPos*/; unsigned char *inbuf /*,*outbuf*/; unsigned int inbufBitCount, inbufBits; /* The CRC values stored in the block header and calculated from the @@ -617,7 +617,7 @@ decode_next_byte: goto decode_next_byte; } -static int INIT nofill(void *buf, unsigned int len) +static long INIT nofill(void *buf, unsigned long len) { return -1; } @@ -625,8 +625,8 @@ static int INIT nofill(void *buf, unsigned int len) /* Allocate the structure, read file header. If in_fd ==-1, inbuf must contain a complete bunzip file (len bytes long). If in_fd!=-1, inbuf and len are ignored, and data is read from file handle into temporary buffer. */ -static int INIT start_bunzip(struct bunzip_data **bdp, void *inbuf, int len, - int (*fill)(void*, unsigned int)) +static int INIT start_bunzip(struct bunzip_data **bdp, void *inbuf, long len, + long (*fill)(void*, unsigned long)) { struct bunzip_data *bd; unsigned int i, j, c; @@ -675,11 +675,11 @@ static int INIT start_bunzip(struct bunzip_data **bdp, void *inbuf, int len, /* Example usage: decompress src_fd to dst_fd. (Stops at end of bzip2 data, not end of file.) */ -STATIC int INIT bunzip2(unsigned char *buf, int len, - int(*fill)(void*, unsigned int), - int(*flush)(void*, unsigned int), +STATIC int INIT bunzip2(unsigned char *buf, long len, + long (*fill)(void*, unsigned long), + long (*flush)(void*, unsigned long), unsigned char *outbuf, - int *pos, + long *pos, void(*error)(char *x)) { struct bunzip_data *bd; @@ -743,11 +743,11 @@ exit_0: } #ifdef PREBOOT -STATIC int INIT decompress(unsigned char *buf, int len, - int(*fill)(void*, unsigned int), - int(*flush)(void*, unsigned int), +STATIC int INIT decompress(unsigned char *buf, long len, + long (*fill)(void*, unsigned long), + long (*flush)(void*, unsigned long), unsigned char *outbuf, - int *pos, + long *pos, void(*error)(char *x)) { return bunzip2(buf, len - 4, fill, flush, outbuf, pos, error); diff --git a/lib/decompress_inflate.c b/lib/decompress_inflate.c index 0edfd742a154..d4c7891635ec 100644 --- a/lib/decompress_inflate.c +++ b/lib/decompress_inflate.c @@ -27,17 +27,17 @@ #define GZIP_IOBUF_SIZE (16*1024) -static int INIT nofill(void *buffer, unsigned int len) +static long INIT nofill(void *buffer, unsigned long len) { return -1; } /* Included from initramfs et al code */ -STATIC int INIT gunzip(unsigned char *buf, int len, - int(*fill)(void*, unsigned int), - int(*flush)(void*, unsigned int), +STATIC int INIT gunzip(unsigned char *buf, long len, + long (*fill)(void*, unsigned long), + long (*flush)(void*, unsigned long), unsigned char *out_buf, - int *pos, + long *pos, void(*error)(char *x)) { u8 *zbuf; struct z_stream_s *strm; @@ -142,7 +142,7 @@ STATIC int INIT gunzip(unsigned char *buf, int len, /* Write any data generated */ if (flush && strm->next_out > out_buf) { - int l = strm->next_out - out_buf; + long l = strm->next_out - out_buf; if (l != flush(out_buf, l)) { rc = -1; error("write error"); diff --git a/lib/decompress_unlz4.c b/lib/decompress_unlz4.c index 3ad7f3954dfd..40f66ebe57b7 100644 --- a/lib/decompress_unlz4.c +++ b/lib/decompress_unlz4.c @@ -31,10 +31,10 @@ #define LZ4_DEFAULT_UNCOMPRESSED_CHUNK_SIZE (8 << 20) #define ARCHIVE_MAGICNUMBER 0x184C2102 -STATIC inline int INIT unlz4(u8 *input, int in_len, - int (*fill) (void *, unsigned int), - int (*flush) (void *, unsigned int), - u8 *output, int *posp, +STATIC inline int INIT unlz4(u8 *input, long in_len, + long (*fill)(void *, unsigned long), + long (*flush)(void *, unsigned long), + u8 *output, long *posp, void (*error) (char *x)) { int ret = -1; @@ -43,7 +43,7 @@ STATIC inline int INIT unlz4(u8 *input, int in_len, u8 *inp; u8 *inp_start; u8 *outp; - int size = in_len; + long size = in_len; #ifdef PREBOOT size_t out_len = get_unaligned_le32(input + in_len); #endif @@ -196,11 +196,11 @@ exit_0: } #ifdef PREBOOT -STATIC int INIT decompress(unsigned char *buf, int in_len, - int(*fill)(void*, unsigned int), - int(*flush)(void*, unsigned int), +STATIC int INIT decompress(unsigned char *buf, long in_len, + long (*fill)(void*, unsigned long), + long (*flush)(void*, unsigned long), unsigned char *output, - int *posp, + long *posp, void(*error)(char *x) ) { diff --git a/lib/decompress_unlzma.c b/lib/decompress_unlzma.c index 32adb73a9038..0be83af62b88 100644 --- a/lib/decompress_unlzma.c +++ b/lib/decompress_unlzma.c @@ -65,11 +65,11 @@ static long long INIT read_int(unsigned char *ptr, int size) #define LZMA_IOBUF_SIZE 0x10000 struct rc { - int (*fill)(void*, unsigned int); + long (*fill)(void*, unsigned long); uint8_t *ptr; uint8_t *buffer; uint8_t *buffer_end; - int buffer_size; + long buffer_size; uint32_t code; uint32_t range; uint32_t bound; @@ -82,7 +82,7 @@ struct rc { #define RC_MODEL_TOTAL_BITS 11 -static int INIT nofill(void *buffer, unsigned int len) +static long INIT nofill(void *buffer, unsigned long len) { return -1; } @@ -99,8 +99,8 @@ static void INIT rc_read(struct rc *rc) /* Called once */ static inline void INIT rc_init(struct rc *rc, - int (*fill)(void*, unsigned int), - char *buffer, int buffer_size) + long (*fill)(void*, unsigned long), + char *buffer, long buffer_size) { if (fill) rc->fill = fill; @@ -280,7 +280,7 @@ struct writer { size_t buffer_pos; int bufsize; size_t global_pos; - int(*flush)(void*, unsigned int); + long (*flush)(void*, unsigned long); struct lzma_header *header; }; @@ -534,11 +534,11 @@ static inline int INIT process_bit1(struct writer *wr, struct rc *rc, -STATIC inline int INIT unlzma(unsigned char *buf, int in_len, - int(*fill)(void*, unsigned int), - int(*flush)(void*, unsigned int), +STATIC inline int INIT unlzma(unsigned char *buf, long in_len, + long (*fill)(void*, unsigned long), + long (*flush)(void*, unsigned long), unsigned char *output, - int *posp, + long *posp, void(*error)(char *x) ) { @@ -667,11 +667,11 @@ exit_0: } #ifdef PREBOOT -STATIC int INIT decompress(unsigned char *buf, int in_len, - int(*fill)(void*, unsigned int), - int(*flush)(void*, unsigned int), +STATIC int INIT decompress(unsigned char *buf, long in_len, + long (*fill)(void*, unsigned long), + long (*flush)(void*, unsigned long), unsigned char *output, - int *posp, + long *posp, void(*error)(char *x) ) { diff --git a/lib/decompress_unlzo.c b/lib/decompress_unlzo.c index 960183d4258f..b94a31bdd87d 100644 --- a/lib/decompress_unlzo.c +++ b/lib/decompress_unlzo.c @@ -51,7 +51,7 @@ static const unsigned char lzop_magic[] = { #define HEADER_SIZE_MIN (9 + 7 + 4 + 8 + 1 + 4) #define HEADER_SIZE_MAX (9 + 7 + 1 + 8 + 8 + 4 + 1 + 255 + 4) -STATIC inline int INIT parse_header(u8 *input, int *skip, int in_len) +STATIC inline long INIT parse_header(u8 *input, long *skip, long in_len) { int l; u8 *parse = input; @@ -108,14 +108,14 @@ STATIC inline int INIT parse_header(u8 *input, int *skip, int in_len) return 1; } -STATIC inline int INIT unlzo(u8 *input, int in_len, - int (*fill) (void *, unsigned int), - int (*flush) (void *, unsigned int), - u8 *output, int *posp, +STATIC int INIT unlzo(u8 *input, long in_len, + long (*fill)(void *, unsigned long), + long (*flush)(void *, unsigned long), + u8 *output, long *posp, void (*error) (char *x)) { u8 r = 0; - int skip = 0; + long skip = 0; u32 src_len, dst_len; size_t tmp; u8 *in_buf, *in_buf_save, *out_buf; diff --git a/lib/decompress_unxz.c b/lib/decompress_unxz.c index 9f34eb56854d..b07a78340e9d 100644 --- a/lib/decompress_unxz.c +++ b/lib/decompress_unxz.c @@ -248,10 +248,10 @@ void *memmove(void *dest, const void *src, size_t size) * both input and output buffers are available as a single chunk, i.e. when * fill() and flush() won't be used. */ -STATIC int INIT unxz(unsigned char *in, int in_size, - int (*fill)(void *dest, unsigned int size), - int (*flush)(void *src, unsigned int size), - unsigned char *out, int *in_used, +STATIC int INIT unxz(unsigned char *in, long in_size, + long (*fill)(void *dest, unsigned long size), + long (*flush)(void *src, unsigned long size), + unsigned char *out, long *in_used, void (*error)(char *x)) { struct xz_buf b; @@ -329,7 +329,7 @@ STATIC int INIT unxz(unsigned char *in, int in_size, * returned by xz_dec_run(), but probably * it's not too bad. */ - if (flush(b.out, b.out_pos) != (int)b.out_pos) + if (flush(b.out, b.out_pos) != (long)b.out_pos) ret = XZ_BUF_ERROR; b.out_pos = 0; -- cgit v1.2.3 From ab602f799159393143d567e5c04b936fec79d6bd Mon Sep 17 00:00:00 2001 From: Jack Miller Date: Fri, 8 Aug 2014 14:23:19 -0700 Subject: shm: make exit_shm work proportional to task activity This is small set of patches our team has had kicking around for a few versions internally that fixes tasks getting hung on shm_exit when there are many threads hammering it at once. Anton wrote a simple test to cause the issue: http://ozlabs.org/~anton/junkcode/bust_shm_exit.c Before applying this patchset, this test code will cause either hanging tracebacks or pthread out of memory errors. After this patchset, it will still produce output like: root@somehost:~# ./bust_shm_exit 1024 160 ... INFO: rcu_sched detected stalls on CPUs/tasks: {} (detected by 116, t=2111 jiffies, g=241, c=240, q=7113) INFO: Stall ended before state dump start ... But the task will continue to run along happily, so we consider this an improvement over hanging, even if it's a bit noisy. This patch (of 3): exit_shm obtains the ipc_ns shm rwsem for write and holds it while it walks every shared memory segment in the namespace. Thus the amount of work is related to the number of shm segments in the namespace not the number of segments that might need to be cleaned. In addition, this occurs after the task has been notified the thread has exited, so the number of tasks waiting for the ns shm rwsem can grow without bound until memory is exausted. Add a list to the task struct of all shmids allocated by this task. Init the list head in copy_process. Use the ns->rwsem for locking. Add segments after id is added, remove before removing from id. On unshare of NEW_IPCNS orphan any ids as if the task had exited, similar to handling of semaphore undo. I chose a define for the init sequence since its a simple list init, otherwise it would require a function call to avoid include loops between the semaphore code and the task struct. Converting the list_del to list_del_init for the unshare cases would remove the exit followed by init, but I left it blow up if not inited. Signed-off-by: Milton Miller Signed-off-by: Jack Miller Cc: Davidlohr Bueso Cc: Manfred Spraul Cc: Anton Blanchard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 2 ++ include/linux/shm.h | 16 +++++++++++++++- ipc/shm.c | 22 +++++++++++----------- kernel/fork.c | 6 ++++++ 4 files changed, 34 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index b21e9218c0fd..db2f6474e95e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -33,6 +33,7 @@ struct sched_param { #include #include +#include #include #include #include @@ -1385,6 +1386,7 @@ struct task_struct { #ifdef CONFIG_SYSVIPC /* ipc stuff */ struct sysv_sem sysvsem; + struct sysv_shm sysvshm; #endif #ifdef CONFIG_DETECT_HUNG_TASK /* hung task detection */ diff --git a/include/linux/shm.h b/include/linux/shm.h index 57d77709fbe2..fd206387048a 100644 --- a/include/linux/shm.h +++ b/include/linux/shm.h @@ -1,6 +1,7 @@ #ifndef _LINUX_SHM_H_ #define _LINUX_SHM_H_ +#include #include #include #include @@ -20,6 +21,7 @@ struct shmid_kernel /* private to the kernel */ /* The task created the shm object. NULL if the task is dead. */ struct task_struct *shm_creator; + struct list_head shm_clist; /* list by creator */ }; /* shm_mode upper byte flags */ @@ -44,11 +46,20 @@ struct shmid_kernel /* private to the kernel */ #define SHM_HUGE_1GB (30 << SHM_HUGE_SHIFT) #ifdef CONFIG_SYSVIPC +struct sysv_shm { + struct list_head shm_clist; +}; + long do_shmat(int shmid, char __user *shmaddr, int shmflg, unsigned long *addr, unsigned long shmlba); extern int is_file_shm_hugepages(struct file *file); -extern void exit_shm(struct task_struct *task); +void exit_shm(struct task_struct *task); +#define shm_init_task(task) INIT_LIST_HEAD(&(task)->sysvshm.shm_clist) #else +struct sysv_shm { + /* empty */ +}; + static inline long do_shmat(int shmid, char __user *shmaddr, int shmflg, unsigned long *addr, unsigned long shmlba) @@ -62,6 +73,9 @@ static inline int is_file_shm_hugepages(struct file *file) static inline void exit_shm(struct task_struct *task) { } +static inline void shm_init_task(struct task_struct *task) +{ +} #endif #endif /* _LINUX_SHM_H_ */ diff --git a/ipc/shm.c b/ipc/shm.c index 89fc354156cb..1fc3a61b443b 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -178,6 +178,7 @@ static void shm_rcu_free(struct rcu_head *head) static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s) { + list_del(&s->shm_clist); ipc_rmid(&shm_ids(ns), &s->shm_perm); } @@ -268,14 +269,10 @@ static void shm_close(struct vm_area_struct *vma) } /* Called with ns->shm_ids(ns).rwsem locked */ -static int shm_try_destroy_current(int id, void *p, void *data) +static void shm_mark_orphan(struct shmid_kernel *shp, struct ipc_namespace *ns) { - struct ipc_namespace *ns = data; - struct kern_ipc_perm *ipcp = p; - struct shmid_kernel *shp = container_of(ipcp, struct shmid_kernel, shm_perm); - - if (shp->shm_creator != current) - return 0; + if (WARN_ON(shp->shm_creator != current)) /* Remove me when it works */ + return; /* * Mark it as orphaned to destroy the segment when @@ -289,13 +286,12 @@ static int shm_try_destroy_current(int id, void *p, void *data) * is not set, it shouldn't be deleted here. */ if (!ns->shm_rmid_forced) - return 0; + return; if (shm_may_destroy(ns, shp)) { shm_lock_by_ptr(shp); shm_destroy(ns, shp); } - return 0; } /* Called with ns->shm_ids(ns).rwsem locked */ @@ -333,14 +329,17 @@ void shm_destroy_orphaned(struct ipc_namespace *ns) void exit_shm(struct task_struct *task) { struct ipc_namespace *ns = task->nsproxy->ipc_ns; + struct shmid_kernel *shp, *n; if (shm_ids(ns).in_use == 0) return; /* Destroy all already created segments, but not mapped yet */ down_write(&shm_ids(ns).rwsem); - if (shm_ids(ns).in_use) - idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_current, ns); + list_for_each_entry_safe(shp, n, &task->sysvshm.shm_clist, shm_clist) + shm_mark_orphan(shp, ns); + /* remove the list head from any segments still attached */ + list_del(&task->sysvshm.shm_clist); up_write(&shm_ids(ns).rwsem); } @@ -561,6 +560,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) shp->shm_nattch = 0; shp->shm_file = file; shp->shm_creator = current; + list_add(&shp->shm_clist, ¤t->sysvshm.shm_clist); /* * shmid gets reported as "inode#" in /proc/pid/maps. diff --git a/kernel/fork.c b/kernel/fork.c index 86da59e165ad..fa9124322cd4 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1362,6 +1362,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, if (retval) goto bad_fork_cleanup_policy; /* copy all the process information */ + shm_init_task(p); retval = copy_semundo(clone_flags, p); if (retval) goto bad_fork_cleanup_audit; @@ -1913,6 +1914,11 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) */ exit_sem(current); } + if (unshare_flags & CLONE_NEWIPC) { + /* Orphan segments in old ns (see sem above). */ + exit_shm(current); + shm_init_task(current); + } if (new_nsproxy) switch_task_namespaces(current, new_nsproxy); -- cgit v1.2.3 From 2f137d66fb65ef41df6e558f23d481f07394a424 Mon Sep 17 00:00:00 2001 From: Jack Miller Date: Fri, 8 Aug 2014 14:23:23 -0700 Subject: shm: remove unneeded extern for function A small cleanup while changing adjacent code. Extern is not needed for functions and only one declaration had it so remove it from the odd line. Signed-off-by: Milton Miller Signed-off-by: Jack Miller Cc: Davidlohr Bueso Cc: Manfred Spraul Cc: Anton Blanchard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/shm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/shm.h b/include/linux/shm.h index fd206387048a..6fb801686ad6 100644 --- a/include/linux/shm.h +++ b/include/linux/shm.h @@ -52,7 +52,7 @@ struct sysv_shm { long do_shmat(int shmid, char __user *shmaddr, int shmflg, unsigned long *addr, unsigned long shmlba); -extern int is_file_shm_hugepages(struct file *file); +int is_file_shm_hugepages(struct file *file); void exit_shm(struct task_struct *task); #define shm_init_task(task) INIT_LIST_HEAD(&(task)->sysvshm.shm_clist) #else -- cgit v1.2.3 From 308c09f17da4adc53935115dbeb5bce4f067d8f9 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Fri, 8 Aug 2014 14:23:25 -0700 Subject: lib/scatterlist: make ARCH_HAS_SG_CHAIN an actual Kconfig Rather than have architectures #define ARCH_HAS_SG_CHAIN in an architecture specific scatterlist.h, make it a proper Kconfig option and use that instead. At same time, remove the header files are are now mostly useless and just include asm-generic/scatterlist.h. [sfr@canb.auug.org.au: powerpc files now need asm/dma.h] Signed-off-by: Laura Abbott Acked-by: Thomas Gleixner [x86] Acked-by: Benjamin Herrenschmidt [powerpc] Acked-by: Heiko Carstens Cc: Russell King Cc: Tony Luck Cc: Fenghua Yu Cc: Paul Mackerras Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: "James E.J. Bottomley" Cc: Martin Schwidefsky Signed-off-by: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/Kconfig | 1 + arch/arm/include/asm/Kbuild | 1 + arch/arm/include/asm/scatterlist.h | 12 ------------ arch/arm64/Kconfig | 1 + arch/ia64/Kconfig | 1 + arch/ia64/include/asm/Kbuild | 1 + arch/ia64/include/asm/scatterlist.h | 7 ------- arch/powerpc/Kconfig | 1 + arch/powerpc/include/asm/Kbuild | 1 + arch/powerpc/include/asm/scatterlist.h | 17 ----------------- arch/powerpc/mm/dma-noncoherent.c | 1 + arch/powerpc/platforms/44x/warp.c | 1 + arch/powerpc/platforms/52xx/efika.c | 1 + arch/powerpc/platforms/amigaone/setup.c | 1 + arch/s390/Kconfig | 1 + arch/s390/include/asm/Kbuild | 1 + arch/s390/include/asm/scatterlist.h | 3 --- arch/sparc/Kconfig | 1 + arch/sparc/include/asm/Kbuild | 1 + arch/sparc/include/asm/scatterlist.h | 8 -------- arch/um/include/asm/Kbuild | 1 + arch/x86/Kconfig | 1 + arch/x86/include/asm/Kbuild | 3 ++- arch/x86/include/asm/scatterlist.h | 8 -------- include/linux/scatterlist.h | 2 +- include/scsi/scsi.h | 2 +- lib/Kconfig | 7 +++++++ lib/scatterlist.c | 4 ++-- 28 files changed, 30 insertions(+), 60 deletions(-) delete mode 100644 arch/arm/include/asm/scatterlist.h delete mode 100644 arch/ia64/include/asm/scatterlist.h delete mode 100644 arch/powerpc/include/asm/scatterlist.h delete mode 100644 arch/s390/include/asm/scatterlist.h delete mode 100644 arch/sparc/include/asm/scatterlist.h delete mode 100644 arch/x86/include/asm/scatterlist.h (limited to 'include/linux') diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index d31c500653a2..8e9dbcbcf5af 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -83,6 +83,7 @@ config ARM . config ARM_HAS_SG_CHAIN + select ARCH_HAS_SG_CHAIN bool config NEED_SG_DMA_LENGTH diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild index f5a357601983..70cd84eb7fda 100644 --- a/arch/arm/include/asm/Kbuild +++ b/arch/arm/include/asm/Kbuild @@ -22,6 +22,7 @@ generic-y += poll.h generic-y += preempt.h generic-y += resource.h generic-y += rwsem.h +generic-y += scatterlist.h generic-y += sections.h generic-y += segment.h generic-y += sembuf.h diff --git a/arch/arm/include/asm/scatterlist.h b/arch/arm/include/asm/scatterlist.h deleted file mode 100644 index cefdb8f898a1..000000000000 --- a/arch/arm/include/asm/scatterlist.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef _ASMARM_SCATTERLIST_H -#define _ASMARM_SCATTERLIST_H - -#ifdef CONFIG_ARM_HAS_SG_CHAIN -#define ARCH_HAS_SG_CHAIN -#endif - -#include -#include -#include - -#endif /* _ASMARM_SCATTERLIST_H */ diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index b0f9c9db9590..fd4e81a4e1ce 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1,6 +1,7 @@ config ARM64 def_bool y select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE + select ARCH_HAS_SG_CHAIN select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_USE_CMPXCHG_LOCKREF select ARCH_SUPPORTS_ATOMIC_RMW diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 44a6915ab13d..c84c88bbbbd7 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -28,6 +28,7 @@ config IA64 select HAVE_MEMBLOCK select HAVE_MEMBLOCK_NODE_MAP select HAVE_VIRT_CPU_ACCOUNTING + select ARCH_HAS_SG_CHAIN select VIRT_TO_BUS select ARCH_DISCARD_MEMBLOCK select GENERIC_IRQ_PROBE diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild index 0da4aa2602ae..e8317d2d6c8d 100644 --- a/arch/ia64/include/asm/Kbuild +++ b/arch/ia64/include/asm/Kbuild @@ -5,5 +5,6 @@ generic-y += hash.h generic-y += kvm_para.h generic-y += mcs_spinlock.h generic-y += preempt.h +generic-y += scatterlist.h generic-y += trace_clock.h generic-y += vtime.h diff --git a/arch/ia64/include/asm/scatterlist.h b/arch/ia64/include/asm/scatterlist.h deleted file mode 100644 index 08fd93bff1db..000000000000 --- a/arch/ia64/include/asm/scatterlist.h +++ /dev/null @@ -1,7 +0,0 @@ -#ifndef _ASM_IA64_SCATTERLIST_H -#define _ASM_IA64_SCATTERLIST_H - -#include -#define ARCH_HAS_SG_CHAIN - -#endif /* _ASM_IA64_SCATTERLIST_H */ diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 80b94b0add1f..4bc7b62fb4b6 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -111,6 +111,7 @@ config PPC select HAVE_DMA_API_DEBUG select HAVE_OPROFILE select HAVE_DEBUG_KMEMLEAK + select ARCH_HAS_SG_CHAIN select GENERIC_ATOMIC64 if PPC32 select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE select HAVE_PERF_EVENTS diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild index 3fb1bc432f4f..7f23f162ce9c 100644 --- a/arch/powerpc/include/asm/Kbuild +++ b/arch/powerpc/include/asm/Kbuild @@ -4,5 +4,6 @@ generic-y += hash.h generic-y += mcs_spinlock.h generic-y += preempt.h generic-y += rwsem.h +generic-y += scatterlist.h generic-y += trace_clock.h generic-y += vtime.h diff --git a/arch/powerpc/include/asm/scatterlist.h b/arch/powerpc/include/asm/scatterlist.h deleted file mode 100644 index de1f620bd5c9..000000000000 --- a/arch/powerpc/include/asm/scatterlist.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef _ASM_POWERPC_SCATTERLIST_H -#define _ASM_POWERPC_SCATTERLIST_H -/* - * Copyright (C) 2001 PPC64 Team, IBM Corp - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include -#include - -#define ARCH_HAS_SG_CHAIN - -#endif /* _ASM_POWERPC_SCATTERLIST_H */ diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c index 7b6c10750179..d85e86aac7fb 100644 --- a/arch/powerpc/mm/dma-noncoherent.c +++ b/arch/powerpc/mm/dma-noncoherent.c @@ -33,6 +33,7 @@ #include #include +#include #include "mmu_decl.h" diff --git a/arch/powerpc/platforms/44x/warp.c b/arch/powerpc/platforms/44x/warp.c index 534574a97ec9..3a104284b338 100644 --- a/arch/powerpc/platforms/44x/warp.c +++ b/arch/powerpc/platforms/44x/warp.c @@ -25,6 +25,7 @@ #include #include #include +#include static __initdata struct of_device_id warp_of_bus[] = { diff --git a/arch/powerpc/platforms/52xx/efika.c b/arch/powerpc/platforms/52xx/efika.c index 6e19b0ad5d26..3feffde9128d 100644 --- a/arch/powerpc/platforms/52xx/efika.c +++ b/arch/powerpc/platforms/52xx/efika.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/platforms/amigaone/setup.c b/arch/powerpc/platforms/amigaone/setup.c index 03aabc0e16ac..2fe12046279e 100644 --- a/arch/powerpc/platforms/amigaone/setup.c +++ b/arch/powerpc/platforms/amigaone/setup.c @@ -24,6 +24,7 @@ #include #include #include +#include extern void __flush_disable_L1(void); diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 8ca60f8d5683..05c78bb5f570 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -145,6 +145,7 @@ config S390 select TTY select VIRT_CPU_ACCOUNTING select VIRT_TO_BUS + select ARCH_HAS_SG_CHAIN config SCHED_OMIT_FRAME_POINTER def_bool y diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild index 57892a8a9055..b3fea0722ff1 100644 --- a/arch/s390/include/asm/Kbuild +++ b/arch/s390/include/asm/Kbuild @@ -4,4 +4,5 @@ generic-y += clkdev.h generic-y += hash.h generic-y += mcs_spinlock.h generic-y += preempt.h +generic-y += scatterlist.h generic-y += trace_clock.h diff --git a/arch/s390/include/asm/scatterlist.h b/arch/s390/include/asm/scatterlist.h deleted file mode 100644 index 6d45ef6c12a7..000000000000 --- a/arch/s390/include/asm/scatterlist.h +++ /dev/null @@ -1,3 +0,0 @@ -#include - -#define ARCH_HAS_SG_CHAIN diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 4692c90936f1..a537816613f9 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -42,6 +42,7 @@ config SPARC select MODULES_USE_ELF_RELA select ODD_RT_SIGACTION select OLD_SIGSUSPEND + select ARCH_HAS_SG_CHAIN config SPARC32 def_bool !64BIT diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild index a45821818003..cdd1b447bb6c 100644 --- a/arch/sparc/include/asm/Kbuild +++ b/arch/sparc/include/asm/Kbuild @@ -15,6 +15,7 @@ generic-y += mcs_spinlock.h generic-y += module.h generic-y += mutex.h generic-y += preempt.h +generic-y += scatterlist.h generic-y += serial.h generic-y += trace_clock.h generic-y += types.h diff --git a/arch/sparc/include/asm/scatterlist.h b/arch/sparc/include/asm/scatterlist.h deleted file mode 100644 index 92bb638313f8..000000000000 --- a/arch/sparc/include/asm/scatterlist.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef _SPARC_SCATTERLIST_H -#define _SPARC_SCATTERLIST_H - -#include - -#define ARCH_HAS_SG_CHAIN - -#endif /* !(_SPARC_SCATTERLIST_H) */ diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild index a5e4b6068213..7bd64aa2e94a 100644 --- a/arch/um/include/asm/Kbuild +++ b/arch/um/include/asm/Kbuild @@ -21,6 +21,7 @@ generic-y += param.h generic-y += pci.h generic-y += percpu.h generic-y += preempt.h +generic-y += scatterlist.h generic-y += sections.h generic-y += switch_to.h generic-y += topology.h diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index bf2405053af5..c915cc6e40be 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -96,6 +96,7 @@ config X86 select IRQ_FORCED_THREADING select HAVE_BPF_JIT if X86_64 select HAVE_ARCH_TRANSPARENT_HUGEPAGE + select ARCH_HAS_SG_CHAIN select CLKEVT_I8253 select ARCH_HAVE_NMI_SAFE_CMPXCHG select GENERIC_IOMAP diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index 3ca9762e1649..3bf000fab0ae 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -5,6 +5,7 @@ genhdr-y += unistd_64.h genhdr-y += unistd_x32.h generic-y += clkdev.h -generic-y += early_ioremap.h generic-y += cputime.h +generic-y += early_ioremap.h generic-y += mcs_spinlock.h +generic-y += scatterlist.h diff --git a/arch/x86/include/asm/scatterlist.h b/arch/x86/include/asm/scatterlist.h deleted file mode 100644 index 4240878b9d76..000000000000 --- a/arch/x86/include/asm/scatterlist.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef _ASM_X86_SCATTERLIST_H -#define _ASM_X86_SCATTERLIST_H - -#include - -#define ARCH_HAS_SG_CHAIN - -#endif /* _ASM_X86_SCATTERLIST_H */ diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index f4ec8bbcb372..ed8f9e70df9b 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -136,7 +136,7 @@ static inline void sg_set_buf(struct scatterlist *sg, const void *buf, static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents, struct scatterlist *sgl) { -#ifndef ARCH_HAS_SG_CHAIN +#ifndef CONFIG_ARCH_HAS_SG_CHAIN BUG(); #endif diff --git a/include/scsi/scsi.h b/include/scsi/scsi.h index e6df23cae7be..261e708010da 100644 --- a/include/scsi/scsi.h +++ b/include/scsi/scsi.h @@ -31,7 +31,7 @@ enum scsi_timeouts { * Like SCSI_MAX_SG_SEGMENTS, but for archs that have sg chaining. This limit * is totally arbitrary, a setting of 2048 will get you at least 8mb ios. */ -#ifdef ARCH_HAS_SG_CHAIN +#ifdef CONFIG_ARCH_HAS_SG_CHAIN #define SCSI_MAX_SG_CHAIN_SEGMENTS 2048 #else #define SCSI_MAX_SG_CHAIN_SEGMENTS SCSI_MAX_SG_SEGMENTS diff --git a/lib/Kconfig b/lib/Kconfig index df872659ddd3..a5ce0c7f6c30 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -508,4 +508,11 @@ config UCS2_STRING source "lib/fonts/Kconfig" +# +# sg chaining option +# + +config ARCH_HAS_SG_CHAIN + def_bool n + endmenu diff --git a/lib/scatterlist.c b/lib/scatterlist.c index b4415fceb7e7..9cdf62f8accd 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -73,7 +73,7 @@ EXPORT_SYMBOL(sg_nents); **/ struct scatterlist *sg_last(struct scatterlist *sgl, unsigned int nents) { -#ifndef ARCH_HAS_SG_CHAIN +#ifndef CONFIG_ARCH_HAS_SG_CHAIN struct scatterlist *ret = &sgl[nents - 1]; #else struct scatterlist *sg, *ret = NULL; @@ -255,7 +255,7 @@ int __sg_alloc_table(struct sg_table *table, unsigned int nents, if (nents == 0) return -EINVAL; -#ifndef ARCH_HAS_SG_CHAIN +#ifndef CONFIG_ARCH_HAS_SG_CHAIN if (WARN_ON_ONCE(nents > max_ents)) return -EINVAL; #endif -- cgit v1.2.3 From a6c19dfe39941a5d3f4d072121c0a4841e7e26fd Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 8 Aug 2014 14:23:40 -0700 Subject: arm64,ia64,ppc,s390,sh,tile,um,x86,mm: remove default gate area The core mm code will provide a default gate area based on FIXADDR_USER_START and FIXADDR_USER_END if !defined(__HAVE_ARCH_GATE_AREA) && defined(AT_SYSINFO_EHDR). This default is only useful for ia64. arm64, ppc, s390, sh, tile, 64-bit UML, and x86_32 have their own code just to disable it. arm, 32-bit UML, and x86_64 have gate areas, but they have their own implementations. This gets rid of the default and moves the code into ia64. This should save some code on architectures without a gate area: it's now possible to inline the gate_area functions in the default case. Signed-off-by: Andy Lutomirski Acked-by: Nathan Lynch Acked-by: H. Peter Anvin Acked-by: Benjamin Herrenschmidt [in principle] Acked-by: Richard Weinberger [for um] Acked-by: Will Deacon [for arm64] Cc: Catalin Marinas Cc: Will Deacon Cc: Tony Luck Cc: Fenghua Yu Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Chris Metcalf Cc: Jeff Dike Cc: Richard Weinberger Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Nathan Lynch Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/include/asm/page.h | 3 --- arch/arm64/kernel/vdso.c | 19 ------------------- arch/ia64/include/asm/page.h | 2 ++ arch/ia64/mm/init.c | 31 +++++++++++++++++++++++++++++++ arch/powerpc/include/asm/page.h | 3 --- arch/powerpc/kernel/vdso.c | 16 ---------------- arch/s390/include/asm/page.h | 2 -- arch/s390/kernel/vdso.c | 15 --------------- arch/sh/include/asm/page.h | 5 ----- arch/sh/kernel/vsyscall/vsyscall.c | 15 --------------- arch/tile/include/asm/page.h | 6 ------ arch/tile/kernel/vdso.c | 15 --------------- arch/um/include/asm/page.h | 5 +++++ arch/x86/include/asm/page.h | 1 - arch/x86/include/asm/page_64.h | 2 ++ arch/x86/um/asm/elf.h | 1 - arch/x86/um/mem_64.c | 15 --------------- arch/x86/vdso/vdso32-setup.c | 19 +------------------ include/linux/mm.h | 17 ++++++++++++----- mm/memory.c | 38 -------------------------------------- mm/nommu.c | 5 ----- 21 files changed, 53 insertions(+), 182 deletions(-) (limited to 'include/linux') diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index 7a3f462133b0..22b16232bd60 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -28,9 +28,6 @@ #define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) #define PAGE_MASK (~(PAGE_SIZE-1)) -/* We do define AT_SYSINFO_EHDR but don't use the gate mechanism */ -#define __HAVE_ARCH_GATE_AREA 1 - /* * The idmap and swapper page tables need some space reserved in the kernel * image. Both require pgd, pud (4 levels only) and pmd tables to (section) diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index a81a446a5786..32aeea083d93 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -194,25 +194,6 @@ up_fail: return PTR_ERR(ret); } -/* - * We define AT_SYSINFO_EHDR, so we need these function stubs to keep - * Linux happy. - */ -int in_gate_area_no_mm(unsigned long addr) -{ - return 0; -} - -int in_gate_area(struct mm_struct *mm, unsigned long addr) -{ - return 0; -} - -struct vm_area_struct *get_gate_vma(struct mm_struct *mm) -{ - return NULL; -} - /* * Update the vDSO data page to keep in sync with kernel timekeeping. */ diff --git a/arch/ia64/include/asm/page.h b/arch/ia64/include/asm/page.h index f1e1b2e3cdb3..1f1bf144fe62 100644 --- a/arch/ia64/include/asm/page.h +++ b/arch/ia64/include/asm/page.h @@ -231,4 +231,6 @@ get_order (unsigned long size) #define PERCPU_ADDR (-PERCPU_PAGE_SIZE) #define LOAD_OFFSET (KERNEL_START - KERNEL_TR_PAGE_SIZE) +#define __HAVE_ARCH_GATE_AREA 1 + #endif /* _ASM_IA64_PAGE_H */ diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 892d43e32f3b..6b3345758d3e 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -278,6 +278,37 @@ setup_gate (void) ia64_patch_gate(); } +static struct vm_area_struct gate_vma; + +static int __init gate_vma_init(void) +{ + gate_vma.vm_mm = NULL; + gate_vma.vm_start = FIXADDR_USER_START; + gate_vma.vm_end = FIXADDR_USER_END; + gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC; + gate_vma.vm_page_prot = __P101; + + return 0; +} +__initcall(gate_vma_init); + +struct vm_area_struct *get_gate_vma(struct mm_struct *mm) +{ + return &gate_vma; +} + +int in_gate_area_no_mm(unsigned long addr) +{ + if ((addr >= FIXADDR_USER_START) && (addr < FIXADDR_USER_END)) + return 1; + return 0; +} + +int in_gate_area(struct mm_struct *mm, unsigned long addr) +{ + return in_gate_area_no_mm(addr); +} + void ia64_mmu_init(void *my_cpu_data) { unsigned long pta, impl_va_bits; diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index 32e4e212b9c1..26fe1ae15212 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -48,9 +48,6 @@ extern unsigned int HPAGE_SHIFT; #define HUGE_MAX_HSTATE (MMU_PAGE_COUNT-1) #endif -/* We do define AT_SYSINFO_EHDR but don't use the gate mechanism */ -#define __HAVE_ARCH_GATE_AREA 1 - /* * Subtle: (1 << PAGE_SHIFT) is an int, not an unsigned long. So if we * assign PAGE_MASK to a larger type it gets extended the way we want diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index ce74c335a6a4..f174351842cf 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -840,19 +840,3 @@ static int __init vdso_init(void) return 0; } arch_initcall(vdso_init); - -int in_gate_area_no_mm(unsigned long addr) -{ - return 0; -} - -int in_gate_area(struct mm_struct *mm, unsigned long addr) -{ - return 0; -} - -struct vm_area_struct *get_gate_vma(struct mm_struct *mm) -{ - return NULL; -} - diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 114258eeaacd..7b2ac6e44166 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -162,6 +162,4 @@ static inline int devmem_is_allowed(unsigned long pfn) #include #include -#define __HAVE_ARCH_GATE_AREA 1 - #endif /* _S390_PAGE_H */ diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index 613649096783..0bbb7e027c5a 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -316,18 +316,3 @@ static int __init vdso_init(void) return 0; } early_initcall(vdso_init); - -int in_gate_area_no_mm(unsigned long addr) -{ - return 0; -} - -int in_gate_area(struct mm_struct *mm, unsigned long addr) -{ - return 0; -} - -struct vm_area_struct *get_gate_vma(struct mm_struct *mm) -{ - return NULL; -} diff --git a/arch/sh/include/asm/page.h b/arch/sh/include/asm/page.h index 15d970328f71..fe20d14ae051 100644 --- a/arch/sh/include/asm/page.h +++ b/arch/sh/include/asm/page.h @@ -186,11 +186,6 @@ typedef struct page *pgtable_t; #include #include -/* vDSO support */ -#ifdef CONFIG_VSYSCALL -#define __HAVE_ARCH_GATE_AREA -#endif - /* * Some drivers need to perform DMA into kmalloc'ed buffers * and so we have to increase the kmalloc minalign for this. diff --git a/arch/sh/kernel/vsyscall/vsyscall.c b/arch/sh/kernel/vsyscall/vsyscall.c index 5ca579720a09..ea2aa1393b87 100644 --- a/arch/sh/kernel/vsyscall/vsyscall.c +++ b/arch/sh/kernel/vsyscall/vsyscall.c @@ -92,18 +92,3 @@ const char *arch_vma_name(struct vm_area_struct *vma) return NULL; } - -struct vm_area_struct *get_gate_vma(struct mm_struct *mm) -{ - return NULL; -} - -int in_gate_area(struct mm_struct *mm, unsigned long address) -{ - return 0; -} - -int in_gate_area_no_mm(unsigned long address) -{ - return 0; -} diff --git a/arch/tile/include/asm/page.h b/arch/tile/include/asm/page.h index 672768008618..a213a8d84a95 100644 --- a/arch/tile/include/asm/page.h +++ b/arch/tile/include/asm/page.h @@ -38,12 +38,6 @@ #define PAGE_MASK (~(PAGE_SIZE - 1)) #define HPAGE_MASK (~(HPAGE_SIZE - 1)) -/* - * We do define AT_SYSINFO_EHDR to support vDSO, - * but don't use the gate mechanism. - */ -#define __HAVE_ARCH_GATE_AREA 1 - /* * If the Kconfig doesn't specify, set a maximum zone order that * is enough so that we can create huge pages from small pages given diff --git a/arch/tile/kernel/vdso.c b/arch/tile/kernel/vdso.c index 1533af24106e..5bc51d7dfdcb 100644 --- a/arch/tile/kernel/vdso.c +++ b/arch/tile/kernel/vdso.c @@ -121,21 +121,6 @@ const char *arch_vma_name(struct vm_area_struct *vma) return NULL; } -struct vm_area_struct *get_gate_vma(struct mm_struct *mm) -{ - return NULL; -} - -int in_gate_area(struct mm_struct *mm, unsigned long address) -{ - return 0; -} - -int in_gate_area_no_mm(unsigned long address) -{ - return 0; -} - int setup_vdso_pages(void) { struct page **pagelist; diff --git a/arch/um/include/asm/page.h b/arch/um/include/asm/page.h index 5ff53d9185f7..71c5d132062a 100644 --- a/arch/um/include/asm/page.h +++ b/arch/um/include/asm/page.h @@ -119,4 +119,9 @@ extern unsigned long uml_physmem; #include #endif /* __ASSEMBLY__ */ + +#ifdef CONFIG_X86_32 +#define __HAVE_ARCH_GATE_AREA 1 +#endif + #endif /* __UM_PAGE_H */ diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h index 775873d3be55..802dde30c928 100644 --- a/arch/x86/include/asm/page.h +++ b/arch/x86/include/asm/page.h @@ -70,7 +70,6 @@ extern bool __virt_addr_valid(unsigned long kaddr); #include #include -#define __HAVE_ARCH_GATE_AREA 1 #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA #endif /* __KERNEL__ */ diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h index 0f1ddee6a0ce..f408caf73430 100644 --- a/arch/x86/include/asm/page_64.h +++ b/arch/x86/include/asm/page_64.h @@ -39,4 +39,6 @@ void copy_page(void *to, void *from); #endif /* !__ASSEMBLY__ */ +#define __HAVE_ARCH_GATE_AREA 1 + #endif /* _ASM_X86_PAGE_64_H */ diff --git a/arch/x86/um/asm/elf.h b/arch/x86/um/asm/elf.h index 0feee2fd5077..25a1022dd793 100644 --- a/arch/x86/um/asm/elf.h +++ b/arch/x86/um/asm/elf.h @@ -216,6 +216,5 @@ extern long elf_aux_hwcap; #define ELF_HWCAP (elf_aux_hwcap) #define SET_PERSONALITY(ex) do ; while(0) -#define __HAVE_ARCH_GATE_AREA 1 #endif diff --git a/arch/x86/um/mem_64.c b/arch/x86/um/mem_64.c index c6492e75797b..f8fecaddcc0d 100644 --- a/arch/x86/um/mem_64.c +++ b/arch/x86/um/mem_64.c @@ -9,18 +9,3 @@ const char *arch_vma_name(struct vm_area_struct *vma) return NULL; } - -struct vm_area_struct *get_gate_vma(struct mm_struct *mm) -{ - return NULL; -} - -int in_gate_area(struct mm_struct *mm, unsigned long addr) -{ - return 0; -} - -int in_gate_area_no_mm(unsigned long addr) -{ - return 0; -} diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c index e4f7781ee162..e904c270573b 100644 --- a/arch/x86/vdso/vdso32-setup.c +++ b/arch/x86/vdso/vdso32-setup.c @@ -115,23 +115,6 @@ static __init int ia32_binfmt_init(void) return 0; } __initcall(ia32_binfmt_init); -#endif - -#else /* CONFIG_X86_32 */ - -struct vm_area_struct *get_gate_vma(struct mm_struct *mm) -{ - return NULL; -} - -int in_gate_area(struct mm_struct *mm, unsigned long addr) -{ - return 0; -} - -int in_gate_area_no_mm(unsigned long addr) -{ - return 0; -} +#endif /* CONFIG_SYSCTL */ #endif /* CONFIG_X86_64 */ diff --git a/include/linux/mm.h b/include/linux/mm.h index e03dd29145a0..8981cc882ed2 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2014,13 +2014,20 @@ static inline bool kernel_page_present(struct page *page) { return true; } #endif /* CONFIG_HIBERNATION */ #endif +#ifdef __HAVE_ARCH_GATE_AREA extern struct vm_area_struct *get_gate_vma(struct mm_struct *mm); -#ifdef __HAVE_ARCH_GATE_AREA -int in_gate_area_no_mm(unsigned long addr); -int in_gate_area(struct mm_struct *mm, unsigned long addr); +extern int in_gate_area_no_mm(unsigned long addr); +extern int in_gate_area(struct mm_struct *mm, unsigned long addr); #else -int in_gate_area_no_mm(unsigned long addr); -#define in_gate_area(mm, addr) ({(void)mm; in_gate_area_no_mm(addr);}) +static inline struct vm_area_struct *get_gate_vma(struct mm_struct *mm) +{ + return NULL; +} +static inline int in_gate_area_no_mm(unsigned long addr) { return 0; } +static inline int in_gate_area(struct mm_struct *mm, unsigned long addr) +{ + return 0; +} #endif /* __HAVE_ARCH_GATE_AREA */ #ifdef CONFIG_SYSCTL diff --git a/mm/memory.c b/mm/memory.c index 2a899e4e82ba..ab3537bcfed2 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3430,44 +3430,6 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) } #endif /* __PAGETABLE_PMD_FOLDED */ -#if !defined(__HAVE_ARCH_GATE_AREA) - -#if defined(AT_SYSINFO_EHDR) -static struct vm_area_struct gate_vma; - -static int __init gate_vma_init(void) -{ - gate_vma.vm_mm = NULL; - gate_vma.vm_start = FIXADDR_USER_START; - gate_vma.vm_end = FIXADDR_USER_END; - gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC; - gate_vma.vm_page_prot = __P101; - - return 0; -} -__initcall(gate_vma_init); -#endif - -struct vm_area_struct *get_gate_vma(struct mm_struct *mm) -{ -#ifdef AT_SYSINFO_EHDR - return &gate_vma; -#else - return NULL; -#endif -} - -int in_gate_area_no_mm(unsigned long addr) -{ -#ifdef AT_SYSINFO_EHDR - if ((addr >= FIXADDR_USER_START) && (addr < FIXADDR_USER_END)) - return 1; -#endif - return 0; -} - -#endif /* __HAVE_ARCH_GATE_AREA */ - static int __follow_pte(struct mm_struct *mm, unsigned long address, pte_t **ptepp, spinlock_t **ptlp) { diff --git a/mm/nommu.c b/mm/nommu.c index 4a852f6c5709..a881d9673c6b 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1981,11 +1981,6 @@ error: return -ENOMEM; } -int in_gate_area_no_mm(unsigned long addr) -{ - return 0; -} - int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { BUG(); -- cgit v1.2.3 From 4bb5f5d9395bc112d93a134d8f5b05611eddc9c0 Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Fri, 8 Aug 2014 14:25:25 -0700 Subject: mm: allow drivers to prevent new writable mappings This patch (of 6): The i_mmap_writable field counts existing writable mappings of an address_space. To allow drivers to prevent new writable mappings, make this counter signed and prevent new writable mappings if it is negative. This is modelled after i_writecount and DENYWRITE. This will be required by the shmem-sealing infrastructure to prevent any new writable mappings after the WRITE seal has been set. In case there exists a writable mapping, this operation will fail with EBUSY. Note that we rely on the fact that iff you already own a writable mapping, you can increase the counter without using the helpers. This is the same that we do for i_writecount. Signed-off-by: David Herrmann Acked-by: Hugh Dickins Cc: Michael Kerrisk Cc: Ryan Lortie Cc: Lennart Poettering Cc: Daniel Mack Cc: Andy Lutomirski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/inode.c | 1 + include/linux/fs.h | 29 +++++++++++++++++++++++++++-- kernel/fork.c | 2 +- mm/mmap.c | 30 ++++++++++++++++++++++++------ mm/swap_state.c | 1 + 5 files changed, 54 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/fs/inode.c b/fs/inode.c index 5938f3928944..26753ba7b6d6 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -165,6 +165,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode) mapping->a_ops = &empty_aops; mapping->host = inode; mapping->flags = 0; + atomic_set(&mapping->i_mmap_writable, 0); mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE); mapping->private_data = NULL; mapping->backing_dev_info = &default_backing_dev_info; diff --git a/include/linux/fs.h b/include/linux/fs.h index 1ab6c6913040..f0890e4a7c25 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -387,7 +387,7 @@ struct address_space { struct inode *host; /* owner: inode, block_device */ struct radix_tree_root page_tree; /* radix tree of all pages */ spinlock_t tree_lock; /* and lock protecting it */ - unsigned int i_mmap_writable;/* count VM_SHARED mappings */ + atomic_t i_mmap_writable;/* count VM_SHARED mappings */ struct rb_root i_mmap; /* tree of private and shared mappings */ struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */ struct mutex i_mmap_mutex; /* protect tree, count, list */ @@ -470,10 +470,35 @@ static inline int mapping_mapped(struct address_space *mapping) * Note that i_mmap_writable counts all VM_SHARED vmas: do_mmap_pgoff * marks vma as VM_SHARED if it is shared, and the file was opened for * writing i.e. vma may be mprotected writable even if now readonly. + * + * If i_mmap_writable is negative, no new writable mappings are allowed. You + * can only deny writable mappings, if none exists right now. */ static inline int mapping_writably_mapped(struct address_space *mapping) { - return mapping->i_mmap_writable != 0; + return atomic_read(&mapping->i_mmap_writable) > 0; +} + +static inline int mapping_map_writable(struct address_space *mapping) +{ + return atomic_inc_unless_negative(&mapping->i_mmap_writable) ? + 0 : -EPERM; +} + +static inline void mapping_unmap_writable(struct address_space *mapping) +{ + atomic_dec(&mapping->i_mmap_writable); +} + +static inline int mapping_deny_writable(struct address_space *mapping) +{ + return atomic_dec_unless_positive(&mapping->i_mmap_writable) ? + 0 : -EBUSY; +} + +static inline void mapping_allow_writable(struct address_space *mapping) +{ + atomic_inc(&mapping->i_mmap_writable); } /* diff --git a/kernel/fork.c b/kernel/fork.c index fa9124322cd4..1380d8ace334 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -429,7 +429,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) atomic_dec(&inode->i_writecount); mutex_lock(&mapping->i_mmap_mutex); if (tmp->vm_flags & VM_SHARED) - mapping->i_mmap_writable++; + atomic_inc(&mapping->i_mmap_writable); flush_dcache_mmap_lock(mapping); /* insert tmp into the share list, just after mpnt */ if (unlikely(tmp->vm_flags & VM_NONLINEAR)) diff --git a/mm/mmap.c b/mm/mmap.c index 64c9d736155c..c1f2ea4a0b99 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -221,7 +221,7 @@ static void __remove_shared_vm_struct(struct vm_area_struct *vma, if (vma->vm_flags & VM_DENYWRITE) atomic_inc(&file_inode(file)->i_writecount); if (vma->vm_flags & VM_SHARED) - mapping->i_mmap_writable--; + mapping_unmap_writable(mapping); flush_dcache_mmap_lock(mapping); if (unlikely(vma->vm_flags & VM_NONLINEAR)) @@ -622,7 +622,7 @@ static void __vma_link_file(struct vm_area_struct *vma) if (vma->vm_flags & VM_DENYWRITE) atomic_dec(&file_inode(file)->i_writecount); if (vma->vm_flags & VM_SHARED) - mapping->i_mmap_writable++; + atomic_inc(&mapping->i_mmap_writable); flush_dcache_mmap_lock(mapping); if (unlikely(vma->vm_flags & VM_NONLINEAR)) @@ -1577,6 +1577,17 @@ munmap_back: if (error) goto free_vma; } + if (vm_flags & VM_SHARED) { + error = mapping_map_writable(file->f_mapping); + if (error) + goto allow_write_and_free_vma; + } + + /* ->mmap() can change vma->vm_file, but must guarantee that + * vma_link() below can deny write-access if VM_DENYWRITE is set + * and map writably if VM_SHARED is set. This usually means the + * new file must not have been exposed to user-space, yet. + */ vma->vm_file = get_file(file); error = file->f_op->mmap(file, vma); if (error) @@ -1616,8 +1627,12 @@ munmap_back: vma_link(mm, vma, prev, rb_link, rb_parent); /* Once vma denies write, undo our temporary denial count */ - if (vm_flags & VM_DENYWRITE) - allow_write_access(file); + if (file) { + if (vm_flags & VM_SHARED) + mapping_unmap_writable(file->f_mapping); + if (vm_flags & VM_DENYWRITE) + allow_write_access(file); + } file = vma->vm_file; out: perf_event_mmap(vma); @@ -1646,14 +1661,17 @@ out: return addr; unmap_and_free_vma: - if (vm_flags & VM_DENYWRITE) - allow_write_access(file); vma->vm_file = NULL; fput(file); /* Undo any partial mapping done by a device driver. */ unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end); charged = 0; + if (vm_flags & VM_SHARED) + mapping_unmap_writable(file->f_mapping); +allow_write_and_free_vma: + if (vm_flags & VM_DENYWRITE) + allow_write_access(file); free_vma: kmem_cache_free(vm_area_cachep, vma); unacct_error: diff --git a/mm/swap_state.c b/mm/swap_state.c index e160151da6b8..3e0ec83d000c 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -39,6 +39,7 @@ static struct backing_dev_info swap_backing_dev_info = { struct address_space swapper_spaces[MAX_SWAPFILES] = { [0 ... MAX_SWAPFILES - 1] = { .page_tree = RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN), + .i_mmap_writable = ATOMIC_INIT(0), .a_ops = &swap_aops, .backing_dev_info = &swap_backing_dev_info, } -- cgit v1.2.3 From 40e041a2c858b3caefc757e26cb85bfceae5062b Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Fri, 8 Aug 2014 14:25:27 -0700 Subject: shm: add sealing API If two processes share a common memory region, they usually want some guarantees to allow safe access. This often includes: - one side cannot overwrite data while the other reads it - one side cannot shrink the buffer while the other accesses it - one side cannot grow the buffer beyond previously set boundaries If there is a trust-relationship between both parties, there is no need for policy enforcement. However, if there's no trust relationship (eg., for general-purpose IPC) sharing memory-regions is highly fragile and often not possible without local copies. Look at the following two use-cases: 1) A graphics client wants to share its rendering-buffer with a graphics-server. The memory-region is allocated by the client for read/write access and a second FD is passed to the server. While scanning out from the memory region, the server has no guarantee that the client doesn't shrink the buffer at any time, requiring rather cumbersome SIGBUS handling. 2) A process wants to perform an RPC on another process. To avoid huge bandwidth consumption, zero-copy is preferred. After a message is assembled in-memory and a FD is passed to the remote side, both sides want to be sure that neither modifies this shared copy, anymore. The source may have put sensible data into the message without a separate copy and the target may want to parse the message inline, to avoid a local copy. While SIGBUS handling, POSIX mandatory locking and MAP_DENYWRITE provide ways to achieve most of this, the first one is unproportionally ugly to use in libraries and the latter two are broken/racy or even disabled due to denial of service attacks. This patch introduces the concept of SEALING. If you seal a file, a specific set of operations is blocked on that file forever. Unlike locks, seals can only be set, never removed. Hence, once you verified a specific set of seals is set, you're guaranteed that no-one can perform the blocked operations on this file, anymore. An initial set of SEALS is introduced by this patch: - SHRINK: If SEAL_SHRINK is set, the file in question cannot be reduced in size. This affects ftruncate() and open(O_TRUNC). - GROW: If SEAL_GROW is set, the file in question cannot be increased in size. This affects ftruncate(), fallocate() and write(). - WRITE: If SEAL_WRITE is set, no write operations (besides resizing) are possible. This affects fallocate(PUNCH_HOLE), mmap() and write(). - SEAL: If SEAL_SEAL is set, no further seals can be added to a file. This basically prevents the F_ADD_SEAL operation on a file and can be set to prevent others from adding further seals that you don't want. The described use-cases can easily use these seals to provide safe use without any trust-relationship: 1) The graphics server can verify that a passed file-descriptor has SEAL_SHRINK set. This allows safe scanout, while the client is allowed to increase buffer size for window-resizing on-the-fly. Concurrent writes are explicitly allowed. 2) For general-purpose IPC, both processes can verify that SEAL_SHRINK, SEAL_GROW and SEAL_WRITE are set. This guarantees that neither process can modify the data while the other side parses it. Furthermore, it guarantees that even with writable FDs passed to the peer, it cannot increase the size to hit memory-limits of the source process (in case the file-storage is accounted to the source). The new API is an extension to fcntl(), adding two new commands: F_GET_SEALS: Return a bitset describing the seals on the file. This can be called on any FD if the underlying file supports sealing. F_ADD_SEALS: Change the seals of a given file. This requires WRITE access to the file and F_SEAL_SEAL may not already be set. Furthermore, the underlying file must support sealing and there may not be any existing shared mapping of that file. Otherwise, EBADF/EPERM is returned. The given seals are _added_ to the existing set of seals on the file. You cannot remove seals again. The fcntl() handler is currently specific to shmem and disabled on all files. A file needs to explicitly support sealing for this interface to work. A separate syscall is added in a follow-up, which creates files that support sealing. There is no intention to support this on other file-systems. Semantics are unclear for non-volatile files and we lack any use-case right now. Therefore, the implementation is specific to shmem. Signed-off-by: David Herrmann Acked-by: Hugh Dickins Cc: Michael Kerrisk Cc: Ryan Lortie Cc: Lennart Poettering Cc: Daniel Mack Cc: Andy Lutomirski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fcntl.c | 5 ++ include/linux/shmem_fs.h | 17 ++++++ include/uapi/linux/fcntl.h | 15 +++++ mm/shmem.c | 143 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 180 insertions(+) (limited to 'include/linux') diff --git a/fs/fcntl.c b/fs/fcntl.c index 72c82f69b01b..22d1c3df61ac 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -336,6 +337,10 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, case F_GETPIPE_SZ: err = pipe_fcntl(filp, cmd, arg); break; + case F_ADD_SEALS: + case F_GET_SEALS: + err = shmem_fcntl(filp, cmd, arg); + break; default: break; } diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index 4d1771c2d29f..50777b5b1e4c 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -1,6 +1,7 @@ #ifndef __SHMEM_FS_H #define __SHMEM_FS_H +#include #include #include #include @@ -11,6 +12,7 @@ struct shmem_inode_info { spinlock_t lock; + unsigned int seals; /* shmem seals */ unsigned long flags; unsigned long alloced; /* data pages alloced to file */ union { @@ -65,4 +67,19 @@ static inline struct page *shmem_read_mapping_page( mapping_gfp_mask(mapping)); } +#ifdef CONFIG_TMPFS + +extern int shmem_add_seals(struct file *file, unsigned int seals); +extern int shmem_get_seals(struct file *file); +extern long shmem_fcntl(struct file *file, unsigned int cmd, unsigned long arg); + +#else + +static inline long shmem_fcntl(struct file *f, unsigned int c, unsigned long a) +{ + return -EINVAL; +} + +#endif + #endif diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h index 074b886c6be0..beed138bd359 100644 --- a/include/uapi/linux/fcntl.h +++ b/include/uapi/linux/fcntl.h @@ -27,6 +27,21 @@ #define F_SETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 7) #define F_GETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 8) +/* + * Set/Get seals + */ +#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9) +#define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10) + +/* + * Types of seals + */ +#define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */ +#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */ +#define F_SEAL_GROW 0x0004 /* prevent file from growing */ +#define F_SEAL_WRITE 0x0008 /* prevent writes */ +/* (1U << 31) is reserved for signed error codes */ + /* * Types of directory notifications that may be requested. */ diff --git a/mm/shmem.c b/mm/shmem.c index 6dc80d298f9d..8b43bb7a4efe 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -66,6 +66,7 @@ static struct vfsmount *shm_mnt; #include #include #include +#include #include #include @@ -547,6 +548,7 @@ EXPORT_SYMBOL_GPL(shmem_truncate_range); static int shmem_setattr(struct dentry *dentry, struct iattr *attr) { struct inode *inode = dentry->d_inode; + struct shmem_inode_info *info = SHMEM_I(inode); int error; error = inode_change_ok(inode, attr); @@ -557,6 +559,11 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr) loff_t oldsize = inode->i_size; loff_t newsize = attr->ia_size; + /* protected by i_mutex */ + if ((newsize < oldsize && (info->seals & F_SEAL_SHRINK)) || + (newsize > oldsize && (info->seals & F_SEAL_GROW))) + return -EPERM; + if (newsize != oldsize) { error = shmem_reacct_size(SHMEM_I(inode)->flags, oldsize, newsize); @@ -1412,6 +1419,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode info = SHMEM_I(inode); memset(info, 0, (char *)inode - (char *)info); spin_lock_init(&info->lock); + info->seals = F_SEAL_SEAL; info->flags = flags & VM_NORESERVE; INIT_LIST_HEAD(&info->swaplist); simple_xattrs_init(&info->xattrs); @@ -1470,7 +1478,17 @@ shmem_write_begin(struct file *file, struct address_space *mapping, struct page **pagep, void **fsdata) { struct inode *inode = mapping->host; + struct shmem_inode_info *info = SHMEM_I(inode); pgoff_t index = pos >> PAGE_CACHE_SHIFT; + + /* i_mutex is held by caller */ + if (unlikely(info->seals)) { + if (info->seals & F_SEAL_WRITE) + return -EPERM; + if ((info->seals & F_SEAL_GROW) && pos + len > inode->i_size) + return -EPERM; + } + return shmem_getpage(inode, index, pagep, SGP_WRITE, NULL); } @@ -1808,11 +1826,125 @@ static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence) return offset; } +static int shmem_wait_for_pins(struct address_space *mapping) +{ + return 0; +} + +#define F_ALL_SEALS (F_SEAL_SEAL | \ + F_SEAL_SHRINK | \ + F_SEAL_GROW | \ + F_SEAL_WRITE) + +int shmem_add_seals(struct file *file, unsigned int seals) +{ + struct inode *inode = file_inode(file); + struct shmem_inode_info *info = SHMEM_I(inode); + int error; + + /* + * SEALING + * Sealing allows multiple parties to share a shmem-file but restrict + * access to a specific subset of file operations. Seals can only be + * added, but never removed. This way, mutually untrusted parties can + * share common memory regions with a well-defined policy. A malicious + * peer can thus never perform unwanted operations on a shared object. + * + * Seals are only supported on special shmem-files and always affect + * the whole underlying inode. Once a seal is set, it may prevent some + * kinds of access to the file. Currently, the following seals are + * defined: + * SEAL_SEAL: Prevent further seals from being set on this file + * SEAL_SHRINK: Prevent the file from shrinking + * SEAL_GROW: Prevent the file from growing + * SEAL_WRITE: Prevent write access to the file + * + * As we don't require any trust relationship between two parties, we + * must prevent seals from being removed. Therefore, sealing a file + * only adds a given set of seals to the file, it never touches + * existing seals. Furthermore, the "setting seals"-operation can be + * sealed itself, which basically prevents any further seal from being + * added. + * + * Semantics of sealing are only defined on volatile files. Only + * anonymous shmem files support sealing. More importantly, seals are + * never written to disk. Therefore, there's no plan to support it on + * other file types. + */ + + if (file->f_op != &shmem_file_operations) + return -EINVAL; + if (!(file->f_mode & FMODE_WRITE)) + return -EPERM; + if (seals & ~(unsigned int)F_ALL_SEALS) + return -EINVAL; + + mutex_lock(&inode->i_mutex); + + if (info->seals & F_SEAL_SEAL) { + error = -EPERM; + goto unlock; + } + + if ((seals & F_SEAL_WRITE) && !(info->seals & F_SEAL_WRITE)) { + error = mapping_deny_writable(file->f_mapping); + if (error) + goto unlock; + + error = shmem_wait_for_pins(file->f_mapping); + if (error) { + mapping_allow_writable(file->f_mapping); + goto unlock; + } + } + + info->seals |= seals; + error = 0; + +unlock: + mutex_unlock(&inode->i_mutex); + return error; +} +EXPORT_SYMBOL_GPL(shmem_add_seals); + +int shmem_get_seals(struct file *file) +{ + if (file->f_op != &shmem_file_operations) + return -EINVAL; + + return SHMEM_I(file_inode(file))->seals; +} +EXPORT_SYMBOL_GPL(shmem_get_seals); + +long shmem_fcntl(struct file *file, unsigned int cmd, unsigned long arg) +{ + long error; + + switch (cmd) { + case F_ADD_SEALS: + /* disallow upper 32bit */ + if (arg > UINT_MAX) + return -EINVAL; + + error = shmem_add_seals(file, arg); + break; + case F_GET_SEALS: + error = shmem_get_seals(file); + break; + default: + error = -EINVAL; + break; + } + + return error; +} + static long shmem_fallocate(struct file *file, int mode, loff_t offset, loff_t len) { struct inode *inode = file_inode(file); struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); + struct shmem_inode_info *info = SHMEM_I(inode); struct shmem_falloc shmem_falloc; pgoff_t start, index, end; int error; @@ -1828,6 +1960,12 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset, loff_t unmap_end = round_down(offset + len, PAGE_SIZE) - 1; DECLARE_WAIT_QUEUE_HEAD_ONSTACK(shmem_falloc_waitq); + /* protected by i_mutex */ + if (info->seals & F_SEAL_WRITE) { + error = -EPERM; + goto out; + } + shmem_falloc.waitq = &shmem_falloc_waitq; shmem_falloc.start = unmap_start >> PAGE_SHIFT; shmem_falloc.next = (unmap_end + 1) >> PAGE_SHIFT; @@ -1854,6 +1992,11 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset, if (error) goto out; + if ((info->seals & F_SEAL_GROW) && offset + len > inode->i_size) { + error = -EPERM; + goto out; + } + start = offset >> PAGE_CACHE_SHIFT; end = (offset + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; /* Try to avoid a swapstorm if len is impossible to satisfy */ -- cgit v1.2.3 From 9183df25fe7b194563db3fec6dc3202a5855839c Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Fri, 8 Aug 2014 14:25:29 -0700 Subject: shm: add memfd_create() syscall memfd_create() is similar to mmap(MAP_ANON), but returns a file-descriptor that you can pass to mmap(). It can support sealing and avoids any connection to user-visible mount-points. Thus, it's not subject to quotas on mounted file-systems, but can be used like malloc()'ed memory, but with a file-descriptor to it. memfd_create() returns the raw shmem file, so calls like ftruncate() can be used to modify the underlying inode. Also calls like fstat() will return proper information and mark the file as regular file. If you want sealing, you can specify MFD_ALLOW_SEALING. Otherwise, sealing is not supported (like on all other regular files). Compared to O_TMPFILE, it does not require a tmpfs mount-point and is not subject to a filesystem size limit. It is still properly accounted to memcg limits, though, and to the same overcommit or no-overcommit accounting as all user memory. Signed-off-by: David Herrmann Acked-by: Hugh Dickins Cc: Michael Kerrisk Cc: Ryan Lortie Cc: Lennart Poettering Cc: Daniel Mack Cc: Andy Lutomirski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/syscalls/syscall_32.tbl | 1 + arch/x86/syscalls/syscall_64.tbl | 1 + include/linux/syscalls.h | 1 + include/uapi/linux/memfd.h | 8 +++++ kernel/sys_ni.c | 1 + mm/shmem.c | 73 ++++++++++++++++++++++++++++++++++++++++ 6 files changed, 85 insertions(+) create mode 100644 include/uapi/linux/memfd.h (limited to 'include/linux') diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl index d1b4a119d4a5..028b78168d85 100644 --- a/arch/x86/syscalls/syscall_32.tbl +++ b/arch/x86/syscalls/syscall_32.tbl @@ -362,3 +362,4 @@ 353 i386 renameat2 sys_renameat2 354 i386 seccomp sys_seccomp 355 i386 getrandom sys_getrandom +356 i386 memfd_create sys_memfd_create diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl index 252c804bb1aa..ca2b9aa78c81 100644 --- a/arch/x86/syscalls/syscall_64.tbl +++ b/arch/x86/syscalls/syscall_64.tbl @@ -325,6 +325,7 @@ 316 common renameat2 sys_renameat2 317 common seccomp sys_seccomp 318 common getrandom sys_getrandom +319 common memfd_create sys_memfd_create # # x32-specific system call numbers start at 512 to avoid cache impact diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 701daff5d899..15a069425cbf 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -802,6 +802,7 @@ asmlinkage long sys_timerfd_settime(int ufd, int flags, asmlinkage long sys_timerfd_gettime(int ufd, struct itimerspec __user *otmr); asmlinkage long sys_eventfd(unsigned int count); asmlinkage long sys_eventfd2(unsigned int count, int flags); +asmlinkage long sys_memfd_create(const char __user *uname_ptr, unsigned int flags); asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len); asmlinkage long sys_old_readdir(unsigned int, struct old_linux_dirent __user *, unsigned int); asmlinkage long sys_pselect6(int, fd_set __user *, fd_set __user *, diff --git a/include/uapi/linux/memfd.h b/include/uapi/linux/memfd.h new file mode 100644 index 000000000000..534e364bda92 --- /dev/null +++ b/include/uapi/linux/memfd.h @@ -0,0 +1,8 @@ +#ifndef _UAPI_LINUX_MEMFD_H +#define _UAPI_LINUX_MEMFD_H + +/* flags for memfd_create(2) (unsigned int) */ +#define MFD_CLOEXEC 0x0001U +#define MFD_ALLOW_SEALING 0x0002U + +#endif /* _UAPI_LINUX_MEMFD_H */ diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 2904a2105914..1f79e3714533 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -197,6 +197,7 @@ cond_syscall(compat_sys_timerfd_settime); cond_syscall(compat_sys_timerfd_gettime); cond_syscall(sys_eventfd); cond_syscall(sys_eventfd2); +cond_syscall(sys_memfd_create); /* performance counters: */ cond_syscall(sys_perf_event_open); diff --git a/mm/shmem.c b/mm/shmem.c index 8b43bb7a4efe..4a5498795a2b 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -66,7 +66,9 @@ static struct vfsmount *shm_mnt; #include #include #include +#include #include +#include #include #include @@ -2732,6 +2734,77 @@ static int shmem_show_options(struct seq_file *seq, struct dentry *root) shmem_show_mpol(seq, sbinfo->mpol); return 0; } + +#define MFD_NAME_PREFIX "memfd:" +#define MFD_NAME_PREFIX_LEN (sizeof(MFD_NAME_PREFIX) - 1) +#define MFD_NAME_MAX_LEN (NAME_MAX - MFD_NAME_PREFIX_LEN) + +#define MFD_ALL_FLAGS (MFD_CLOEXEC | MFD_ALLOW_SEALING) + +SYSCALL_DEFINE2(memfd_create, + const char __user *, uname, + unsigned int, flags) +{ + struct shmem_inode_info *info; + struct file *file; + int fd, error; + char *name; + long len; + + if (flags & ~(unsigned int)MFD_ALL_FLAGS) + return -EINVAL; + + /* length includes terminating zero */ + len = strnlen_user(uname, MFD_NAME_MAX_LEN + 1); + if (len <= 0) + return -EFAULT; + if (len > MFD_NAME_MAX_LEN + 1) + return -EINVAL; + + name = kmalloc(len + MFD_NAME_PREFIX_LEN, GFP_TEMPORARY); + if (!name) + return -ENOMEM; + + strcpy(name, MFD_NAME_PREFIX); + if (copy_from_user(&name[MFD_NAME_PREFIX_LEN], uname, len)) { + error = -EFAULT; + goto err_name; + } + + /* terminating-zero may have changed after strnlen_user() returned */ + if (name[len + MFD_NAME_PREFIX_LEN - 1]) { + error = -EFAULT; + goto err_name; + } + + fd = get_unused_fd_flags((flags & MFD_CLOEXEC) ? O_CLOEXEC : 0); + if (fd < 0) { + error = fd; + goto err_name; + } + + file = shmem_file_setup(name, 0, VM_NORESERVE); + if (IS_ERR(file)) { + error = PTR_ERR(file); + goto err_fd; + } + info = SHMEM_I(file_inode(file)); + file->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE; + file->f_flags |= O_RDWR | O_LARGEFILE; + if (flags & MFD_ALLOW_SEALING) + info->seals &= ~F_SEAL_SEAL; + + fd_install(fd, file); + kfree(name); + return fd; + +err_fd: + put_unused_fd(fd); +err_name: + kfree(name); + return error; +} + #endif /* CONFIG_TMPFS */ static void shmem_put_super(struct super_block *sb) -- cgit v1.2.3 From 7d3e2bca22feb1f4a624009ff6c15e6f724cb4e7 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Fri, 8 Aug 2014 14:25:43 -0700 Subject: kexec: rename unusebale_pages to unusable_pages Let's use the more common "unusable". This patch was originally written and posted by Boris. I am including it in this patch series. Signed-off-by: Borislav Petkov Signed-off-by: Vivek Goyal Cc: Borislav Petkov Cc: Michael Kerrisk Cc: Yinghai Lu Cc: Eric Biederman Cc: H. Peter Anvin Cc: Matthew Garrett Cc: Greg Kroah-Hartman Cc: Dave Young Cc: WANG Chao Cc: Baoquan He Cc: Andy Lutomirski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kexec.h | 2 +- kernel/kexec.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kexec.h b/include/linux/kexec.h index a75641930049..d9bb0a57d208 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -100,7 +100,7 @@ struct kimage { struct list_head control_pages; struct list_head dest_pages; - struct list_head unuseable_pages; + struct list_head unusable_pages; /* Address of next control page to allocate for crash kernels. */ unsigned long control_page; diff --git a/kernel/kexec.c b/kernel/kexec.c index 4b8f0c925884..c7cc2a00181c 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -154,7 +154,7 @@ static int do_kimage_alloc(struct kimage **rimage, unsigned long entry, INIT_LIST_HEAD(&image->dest_pages); /* Initialize the list of unusable pages */ - INIT_LIST_HEAD(&image->unuseable_pages); + INIT_LIST_HEAD(&image->unusable_pages); /* Read in the segments */ image->nr_segments = nr_segments; @@ -609,7 +609,7 @@ static void kimage_free_extra_pages(struct kimage *image) kimage_free_page_list(&image->dest_pages); /* Walk through and free any unusable pages I have cached */ - kimage_free_page_list(&image->unuseable_pages); + kimage_free_page_list(&image->unusable_pages); } static void kimage_terminate(struct kimage *image) @@ -732,7 +732,7 @@ static struct page *kimage_alloc_page(struct kimage *image, /* If the page cannot be used file it away */ if (page_to_pfn(page) > (KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) { - list_add(&page->lru, &image->unuseable_pages); + list_add(&page->lru, &image->unusable_pages); continue; } addr = page_to_pfn(page) << PAGE_SHIFT; -- cgit v1.2.3 From 8c86e70acead629aacb4afcd818add66bf6844d9 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Fri, 8 Aug 2014 14:25:50 -0700 Subject: resource: provide new functions to walk through resources I have added two more functions to walk through resources. Currently walk_system_ram_range() deals with pfn and /proc/iomem can contain partial pages. By dealing in pfn, callback function loses the info that last page of a memory range is a partial page and not the full page. So I implemented walk_system_ram_res() which returns u64 values to callback functions and now it properly return start and end address. walk_system_ram_range() uses find_next_system_ram() to find the next ram resource. This in turn only travels through siblings of top level child and does not travers through all the nodes of the resoruce tree. I also need another function where I can walk through all the resources, for example figure out where "GART" aperture is. Figure out where ACPI memory is. So I wrote another function walk_iomem_res() which walks through all /proc/iomem resources and returns matches as asked by caller. Caller can specify "name" of resource, start and end and flags. Got rid of find_next_system_ram_res() and instead implemented more generic find_next_iomem_res() which can be used to traverse top level children only based on an argument. Signed-off-by: Vivek Goyal Cc: Yinghai Lu Cc: Borislav Petkov Cc: Michael Kerrisk Cc: Eric Biederman Cc: H. Peter Anvin Cc: Matthew Garrett Cc: Greg Kroah-Hartman Cc: Dave Young Cc: WANG Chao Cc: Baoquan He Cc: Andy Lutomirski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ioport.h | 6 +++ kernel/resource.c | 101 ++++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 98 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 5e3a906cc089..142ec544167c 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -237,6 +237,12 @@ extern int iomem_is_exclusive(u64 addr); extern int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages, void *arg, int (*func)(unsigned long, unsigned long, void *)); +extern int +walk_system_ram_res(u64 start, u64 end, void *arg, + int (*func)(u64, u64, void *)); +extern int +walk_iomem_res(char *name, unsigned long flags, u64 start, u64 end, void *arg, + int (*func)(u64, u64, void *)); /* True if any part of r1 overlaps r2 */ static inline bool resource_overlaps(struct resource *r1, struct resource *r2) diff --git a/kernel/resource.c b/kernel/resource.c index 3c2237ac32db..da14b8d09296 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -59,10 +59,12 @@ static DEFINE_RWLOCK(resource_lock); static struct resource *bootmem_resource_free; static DEFINE_SPINLOCK(bootmem_resource_lock); -static void *r_next(struct seq_file *m, void *v, loff_t *pos) +static struct resource *next_resource(struct resource *p, bool sibling_only) { - struct resource *p = v; - (*pos)++; + /* Caller wants to traverse through siblings only */ + if (sibling_only) + return p->sibling; + if (p->child) return p->child; while (!p->sibling && p->parent) @@ -70,6 +72,13 @@ static void *r_next(struct seq_file *m, void *v, loff_t *pos) return p->sibling; } +static void *r_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct resource *p = v; + (*pos)++; + return (void *)next_resource(p, false); +} + #ifdef CONFIG_PROC_FS enum { MAX_IORES_LEVEL = 5 }; @@ -322,16 +331,19 @@ int release_resource(struct resource *old) EXPORT_SYMBOL(release_resource); -#if !defined(CONFIG_ARCH_HAS_WALK_MEMORY) /* - * Finds the lowest memory reosurce exists within [res->start.res->end) + * Finds the lowest iomem reosurce exists with-in [res->start.res->end) * the caller must specify res->start, res->end, res->flags and "name". * If found, returns 0, res is overwritten, if not found, returns -1. + * This walks through whole tree and not just first level children + * until and unless first_level_children_only is true. */ -static int find_next_system_ram(struct resource *res, char *name) +static int find_next_iomem_res(struct resource *res, char *name, + bool first_level_children_only) { resource_size_t start, end; struct resource *p; + bool sibling_only = false; BUG_ON(!res); @@ -340,8 +352,14 @@ static int find_next_system_ram(struct resource *res, char *name) BUG_ON(start >= end); read_lock(&resource_lock); - for (p = iomem_resource.child; p ; p = p->sibling) { - /* system ram is just marked as IORESOURCE_MEM */ + + if (first_level_children_only) { + p = iomem_resource.child; + sibling_only = true; + } else + p = &iomem_resource; + + while ((p = next_resource(p, sibling_only))) { if (p->flags != res->flags) continue; if (name && strcmp(p->name, name)) @@ -353,6 +371,7 @@ static int find_next_system_ram(struct resource *res, char *name) if ((p->end >= start) && (p->start < end)) break; } + read_unlock(&resource_lock); if (!p) return -1; @@ -364,6 +383,70 @@ static int find_next_system_ram(struct resource *res, char *name) return 0; } +/* + * Walks through iomem resources and calls func() with matching resource + * ranges. This walks through whole tree and not just first level children. + * All the memory ranges which overlap start,end and also match flags and + * name are valid candidates. + * + * @name: name of resource + * @flags: resource flags + * @start: start addr + * @end: end addr + */ +int walk_iomem_res(char *name, unsigned long flags, u64 start, u64 end, + void *arg, int (*func)(u64, u64, void *)) +{ + struct resource res; + u64 orig_end; + int ret = -1; + + res.start = start; + res.end = end; + res.flags = flags; + orig_end = res.end; + while ((res.start < res.end) && + (!find_next_iomem_res(&res, name, false))) { + ret = (*func)(res.start, res.end, arg); + if (ret) + break; + res.start = res.end + 1; + res.end = orig_end; + } + return ret; +} + +/* + * This function calls callback against all memory range of "System RAM" + * which are marked as IORESOURCE_MEM and IORESOUCE_BUSY. + * Now, this function is only for "System RAM". This function deals with + * full ranges and not pfn. If resources are not pfn aligned, dealing + * with pfn can truncate ranges. + */ +int walk_system_ram_res(u64 start, u64 end, void *arg, + int (*func)(u64, u64, void *)) +{ + struct resource res; + u64 orig_end; + int ret = -1; + + res.start = start; + res.end = end; + res.flags = IORESOURCE_MEM | IORESOURCE_BUSY; + orig_end = res.end; + while ((res.start < res.end) && + (!find_next_iomem_res(&res, "System RAM", true))) { + ret = (*func)(res.start, res.end, arg); + if (ret) + break; + res.start = res.end + 1; + res.end = orig_end; + } + return ret; +} + +#if !defined(CONFIG_ARCH_HAS_WALK_MEMORY) + /* * This function calls callback against all memory range of "System RAM" * which are marked as IORESOURCE_MEM and IORESOUCE_BUSY. @@ -382,7 +465,7 @@ int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages, res.flags = IORESOURCE_MEM | IORESOURCE_BUSY; orig_end = res.end; while ((res.start < res.end) && - (find_next_system_ram(&res, "System RAM") >= 0)) { + (find_next_iomem_res(&res, "System RAM", true) >= 0)) { pfn = (res.start + PAGE_SIZE - 1) >> PAGE_SHIFT; end_pfn = (res.end + 1) >> PAGE_SHIFT; if (end_pfn > pfn) -- cgit v1.2.3 From 815d5704a337a662bf960757edbff7a0680d40fd Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Fri, 8 Aug 2014 14:25:52 -0700 Subject: kexec: make kexec_segment user buffer pointer a union So far kexec_segment->buf was always a user space pointer as user space passed the array of kexec_segment structures and kernel copied it. But with new system call, list of kexec segments will be prepared by kernel and kexec_segment->buf will point to a kernel memory. So while I was adding code where I made assumption that ->buf is pointing to kernel memory, sparse started giving warning. Make ->buf a union. And where a user space pointer is expected, access it using ->buf and where a kernel space pointer is expected, access it using ->kbuf. That takes care of sparse warnings. Signed-off-by: Vivek Goyal Cc: Borislav Petkov Cc: Michael Kerrisk Cc: Yinghai Lu Cc: Eric Biederman Cc: H. Peter Anvin Cc: Matthew Garrett Cc: Greg Kroah-Hartman Cc: Dave Young Cc: WANG Chao Cc: Baoquan He Cc: Andy Lutomirski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kexec.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kexec.h b/include/linux/kexec.h index d9bb0a57d208..66d56ac0f64c 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -69,7 +69,18 @@ typedef unsigned long kimage_entry_t; #define IND_SOURCE 0x8 struct kexec_segment { - void __user *buf; + /* + * This pointer can point to user memory if kexec_load() system + * call is used or will point to kernel memory if + * kexec_file_load() system call is used. + * + * Use ->buf when expecting to deal with user memory and use ->kbuf + * when expecting to deal with kernel memory. + */ + union { + void __user *buf; + void *kbuf; + }; size_t bufsz; unsigned long mem; size_t memsz; -- cgit v1.2.3 From f0895685c7fd8c938c91a9d8a6f7c11f22df58d2 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Fri, 8 Aug 2014 14:25:55 -0700 Subject: kexec: new syscall kexec_file_load() declaration This is the new syscall kexec_file_load() declaration/interface. I have reserved the syscall number only for x86_64 so far. Other architectures (including i386) can reserve syscall number when they enable the support for this new syscall. Signed-off-by: Vivek Goyal Cc: Michael Kerrisk Cc: Borislav Petkov Cc: Yinghai Lu Cc: Eric Biederman Cc: H. Peter Anvin Cc: Matthew Garrett Cc: Greg Kroah-Hartman Cc: Dave Young Cc: WANG Chao Cc: Baoquan He Cc: Andy Lutomirski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/syscalls/syscall_64.tbl | 1 + include/linux/syscalls.h | 4 ++++ kernel/kexec.c | 7 +++++++ kernel/sys_ni.c | 1 + 4 files changed, 13 insertions(+) (limited to 'include/linux') diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl index ca2b9aa78c81..35dd922727b9 100644 --- a/arch/x86/syscalls/syscall_64.tbl +++ b/arch/x86/syscalls/syscall_64.tbl @@ -326,6 +326,7 @@ 317 common seccomp sys_seccomp 318 common getrandom sys_getrandom 319 common memfd_create sys_memfd_create +320 common kexec_file_load sys_kexec_file_load # # x32-specific system call numbers start at 512 to avoid cache impact diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 15a069425cbf..0f86d85a9ce4 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -317,6 +317,10 @@ asmlinkage long sys_restart_syscall(void); asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments, struct kexec_segment __user *segments, unsigned long flags); +asmlinkage long sys_kexec_file_load(int kernel_fd, int initrd_fd, + unsigned long cmdline_len, + const char __user *cmdline_ptr, + unsigned long flags); asmlinkage long sys_exit(int error_code); asmlinkage long sys_exit_group(int error_code); diff --git a/kernel/kexec.c b/kernel/kexec.c index bfdda316697d..ec4386c1b94f 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -1058,6 +1058,13 @@ COMPAT_SYSCALL_DEFINE4(kexec_load, compat_ulong_t, entry, } #endif +SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd, + unsigned long, cmdline_len, const char __user *, cmdline_ptr, + unsigned long, flags) +{ + return -ENOSYS; +} + void crash_kexec(struct pt_regs *regs) { /* Take the kexec_mutex here to prevent sys_kexec_load diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 1f79e3714533..391d4ddb6f4b 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -25,6 +25,7 @@ cond_syscall(sys_swapon); cond_syscall(sys_swapoff); cond_syscall(sys_kexec_load); cond_syscall(compat_sys_kexec_load); +cond_syscall(sys_kexec_file_load); cond_syscall(sys_init_module); cond_syscall(sys_finit_module); cond_syscall(sys_delete_module); -- cgit v1.2.3 From cb1052581e2bddd6096544f3f944f4e7fdad4c7f Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Fri, 8 Aug 2014 14:25:57 -0700 Subject: kexec: implementation of new syscall kexec_file_load Previous patch provided the interface definition and this patch prvides implementation of new syscall. Previously segment list was prepared in user space. Now user space just passes kernel fd, initrd fd and command line and kernel will create a segment list internally. This patch contains generic part of the code. Actual segment preparation and loading is done by arch and image specific loader. Which comes in next patch. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Vivek Goyal Cc: Borislav Petkov Cc: Michael Kerrisk Cc: Yinghai Lu Cc: Eric Biederman Cc: H. Peter Anvin Cc: Matthew Garrett Cc: Greg Kroah-Hartman Cc: Dave Young Cc: WANG Chao Cc: Baoquan He Cc: Andy Lutomirski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kernel/machine_kexec_64.c | 45 ++++ include/linux/kexec.h | 53 ++++ include/uapi/linux/kexec.h | 11 + kernel/kexec.c | 483 ++++++++++++++++++++++++++++++++++++- 4 files changed, 587 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 679cef0791cd..c8875b5545e1 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -22,6 +22,10 @@ #include #include +static struct kexec_file_ops *kexec_file_loaders[] = { + NULL, +}; + static void free_transition_pgtable(struct kimage *image) { free_page((unsigned long)image->arch.pud); @@ -283,3 +287,44 @@ void arch_crash_save_vmcoreinfo(void) (unsigned long)&_text - __START_KERNEL); } +/* arch-dependent functionality related to kexec file-based syscall */ + +int arch_kexec_kernel_image_probe(struct kimage *image, void *buf, + unsigned long buf_len) +{ + int i, ret = -ENOEXEC; + struct kexec_file_ops *fops; + + for (i = 0; i < ARRAY_SIZE(kexec_file_loaders); i++) { + fops = kexec_file_loaders[i]; + if (!fops || !fops->probe) + continue; + + ret = fops->probe(buf, buf_len); + if (!ret) { + image->fops = fops; + return ret; + } + } + + return ret; +} + +void *arch_kexec_kernel_image_load(struct kimage *image) +{ + if (!image->fops || !image->fops->load) + return ERR_PTR(-ENOEXEC); + + return image->fops->load(image, image->kernel_buf, + image->kernel_buf_len, image->initrd_buf, + image->initrd_buf_len, image->cmdline_buf, + image->cmdline_buf_len); +} + +int arch_kimage_file_post_load_cleanup(struct kimage *image) +{ + if (!image->fops || !image->fops->cleanup) + return 0; + + return image->fops->cleanup(image); +} diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 66d56ac0f64c..8e80901e466f 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -121,13 +121,57 @@ struct kimage { #define KEXEC_TYPE_DEFAULT 0 #define KEXEC_TYPE_CRASH 1 unsigned int preserve_context : 1; + /* If set, we are using file mode kexec syscall */ + unsigned int file_mode:1; #ifdef ARCH_HAS_KIMAGE_ARCH struct kimage_arch arch; #endif + + /* Additional fields for file based kexec syscall */ + void *kernel_buf; + unsigned long kernel_buf_len; + + void *initrd_buf; + unsigned long initrd_buf_len; + + char *cmdline_buf; + unsigned long cmdline_buf_len; + + /* File operations provided by image loader */ + struct kexec_file_ops *fops; + + /* Image loader handling the kernel can store a pointer here */ + void *image_loader_data; }; +/* + * Keeps track of buffer parameters as provided by caller for requesting + * memory placement of buffer. + */ +struct kexec_buf { + struct kimage *image; + char *buffer; + unsigned long bufsz; + unsigned long memsz; + unsigned long buf_align; + unsigned long buf_min; + unsigned long buf_max; + bool top_down; /* allocate from top of memory hole */ +}; +typedef int (kexec_probe_t)(const char *kernel_buf, unsigned long kernel_size); +typedef void *(kexec_load_t)(struct kimage *image, char *kernel_buf, + unsigned long kernel_len, char *initrd, + unsigned long initrd_len, char *cmdline, + unsigned long cmdline_len); +typedef int (kexec_cleanup_t)(struct kimage *image); + +struct kexec_file_ops { + kexec_probe_t *probe; + kexec_load_t *load; + kexec_cleanup_t *cleanup; +}; /* kexec interface functions */ extern void machine_kexec(struct kimage *image); @@ -138,6 +182,11 @@ extern asmlinkage long sys_kexec_load(unsigned long entry, struct kexec_segment __user *segments, unsigned long flags); extern int kernel_kexec(void); +extern int kexec_add_buffer(struct kimage *image, char *buffer, + unsigned long bufsz, unsigned long memsz, + unsigned long buf_align, unsigned long buf_min, + unsigned long buf_max, bool top_down, + unsigned long *load_addr); extern struct page *kimage_alloc_control_pages(struct kimage *image, unsigned int order); extern void crash_kexec(struct pt_regs *); @@ -188,6 +237,10 @@ extern int kexec_load_disabled; #define KEXEC_FLAGS (KEXEC_ON_CRASH | KEXEC_PRESERVE_CONTEXT) #endif +/* List of defined/legal kexec file flags */ +#define KEXEC_FILE_FLAGS (KEXEC_FILE_UNLOAD | KEXEC_FILE_ON_CRASH | \ + KEXEC_FILE_NO_INITRAMFS) + #define VMCOREINFO_BYTES (4096) #define VMCOREINFO_NOTE_NAME "VMCOREINFO" #define VMCOREINFO_NOTE_NAME_BYTES ALIGN(sizeof(VMCOREINFO_NOTE_NAME), 4) diff --git a/include/uapi/linux/kexec.h b/include/uapi/linux/kexec.h index d6629d49a243..6925f5b42f89 100644 --- a/include/uapi/linux/kexec.h +++ b/include/uapi/linux/kexec.h @@ -13,6 +13,17 @@ #define KEXEC_PRESERVE_CONTEXT 0x00000002 #define KEXEC_ARCH_MASK 0xffff0000 +/* + * Kexec file load interface flags. + * KEXEC_FILE_UNLOAD : Unload already loaded kexec/kdump image. + * KEXEC_FILE_ON_CRASH : Load/unload operation belongs to kdump image. + * KEXEC_FILE_NO_INITRAMFS : No initramfs is being loaded. Ignore the initrd + * fd field. + */ +#define KEXEC_FILE_UNLOAD 0x00000001 +#define KEXEC_FILE_ON_CRASH 0x00000002 +#define KEXEC_FILE_NO_INITRAMFS 0x00000004 + /* These values match the ELF architecture values. * Unless there is a good reason that should continue to be the case. */ diff --git a/kernel/kexec.c b/kernel/kexec.c index ec4386c1b94f..9b46219254dd 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -6,6 +6,8 @@ * Version 2. See the file COPYING for more details. */ +#define pr_fmt(fmt) "kexec: " fmt + #include #include #include @@ -327,6 +329,221 @@ out_free_image: return ret; } +static int copy_file_from_fd(int fd, void **buf, unsigned long *buf_len) +{ + struct fd f = fdget(fd); + int ret; + struct kstat stat; + loff_t pos; + ssize_t bytes = 0; + + if (!f.file) + return -EBADF; + + ret = vfs_getattr(&f.file->f_path, &stat); + if (ret) + goto out; + + if (stat.size > INT_MAX) { + ret = -EFBIG; + goto out; + } + + /* Don't hand 0 to vmalloc, it whines. */ + if (stat.size == 0) { + ret = -EINVAL; + goto out; + } + + *buf = vmalloc(stat.size); + if (!*buf) { + ret = -ENOMEM; + goto out; + } + + pos = 0; + while (pos < stat.size) { + bytes = kernel_read(f.file, pos, (char *)(*buf) + pos, + stat.size - pos); + if (bytes < 0) { + vfree(*buf); + ret = bytes; + goto out; + } + + if (bytes == 0) + break; + pos += bytes; + } + + if (pos != stat.size) { + ret = -EBADF; + vfree(*buf); + goto out; + } + + *buf_len = pos; +out: + fdput(f); + return ret; +} + +/* Architectures can provide this probe function */ +int __weak arch_kexec_kernel_image_probe(struct kimage *image, void *buf, + unsigned long buf_len) +{ + return -ENOEXEC; +} + +void * __weak arch_kexec_kernel_image_load(struct kimage *image) +{ + return ERR_PTR(-ENOEXEC); +} + +void __weak arch_kimage_file_post_load_cleanup(struct kimage *image) +{ +} + +/* + * Free up memory used by kernel, initrd, and comand line. This is temporary + * memory allocation which is not needed any more after these buffers have + * been loaded into separate segments and have been copied elsewhere. + */ +static void kimage_file_post_load_cleanup(struct kimage *image) +{ + vfree(image->kernel_buf); + image->kernel_buf = NULL; + + vfree(image->initrd_buf); + image->initrd_buf = NULL; + + kfree(image->cmdline_buf); + image->cmdline_buf = NULL; + + /* See if architecture has anything to cleanup post load */ + arch_kimage_file_post_load_cleanup(image); +} + +/* + * In file mode list of segments is prepared by kernel. Copy relevant + * data from user space, do error checking, prepare segment list + */ +static int +kimage_file_prepare_segments(struct kimage *image, int kernel_fd, int initrd_fd, + const char __user *cmdline_ptr, + unsigned long cmdline_len, unsigned flags) +{ + int ret = 0; + void *ldata; + + ret = copy_file_from_fd(kernel_fd, &image->kernel_buf, + &image->kernel_buf_len); + if (ret) + return ret; + + /* Call arch image probe handlers */ + ret = arch_kexec_kernel_image_probe(image, image->kernel_buf, + image->kernel_buf_len); + + if (ret) + goto out; + + /* It is possible that there no initramfs is being loaded */ + if (!(flags & KEXEC_FILE_NO_INITRAMFS)) { + ret = copy_file_from_fd(initrd_fd, &image->initrd_buf, + &image->initrd_buf_len); + if (ret) + goto out; + } + + if (cmdline_len) { + image->cmdline_buf = kzalloc(cmdline_len, GFP_KERNEL); + if (!image->cmdline_buf) { + ret = -ENOMEM; + goto out; + } + + ret = copy_from_user(image->cmdline_buf, cmdline_ptr, + cmdline_len); + if (ret) { + ret = -EFAULT; + goto out; + } + + image->cmdline_buf_len = cmdline_len; + + /* command line should be a string with last byte null */ + if (image->cmdline_buf[cmdline_len - 1] != '\0') { + ret = -EINVAL; + goto out; + } + } + + /* Call arch image load handlers */ + ldata = arch_kexec_kernel_image_load(image); + + if (IS_ERR(ldata)) { + ret = PTR_ERR(ldata); + goto out; + } + + image->image_loader_data = ldata; +out: + /* In case of error, free up all allocated memory in this function */ + if (ret) + kimage_file_post_load_cleanup(image); + return ret; +} + +static int +kimage_file_alloc_init(struct kimage **rimage, int kernel_fd, + int initrd_fd, const char __user *cmdline_ptr, + unsigned long cmdline_len, unsigned long flags) +{ + int ret; + struct kimage *image; + + image = do_kimage_alloc_init(); + if (!image) + return -ENOMEM; + + image->file_mode = 1; + + ret = kimage_file_prepare_segments(image, kernel_fd, initrd_fd, + cmdline_ptr, cmdline_len, flags); + if (ret) + goto out_free_image; + + ret = sanity_check_segment_list(image); + if (ret) + goto out_free_post_load_bufs; + + ret = -ENOMEM; + image->control_code_page = kimage_alloc_control_pages(image, + get_order(KEXEC_CONTROL_PAGE_SIZE)); + if (!image->control_code_page) { + pr_err("Could not allocate control_code_buffer\n"); + goto out_free_post_load_bufs; + } + + image->swap_page = kimage_alloc_control_pages(image, 0); + if (!image->swap_page) { + pr_err(KERN_ERR "Could not allocate swap buffer\n"); + goto out_free_control_pages; + } + + *rimage = image; + return 0; +out_free_control_pages: + kimage_free_page_list(&image->control_pages); +out_free_post_load_bufs: + kimage_file_post_load_cleanup(image); + kfree(image->image_loader_data); +out_free_image: + kfree(image); + return ret; +} + static int kimage_is_destination_range(struct kimage *image, unsigned long start, unsigned long end) @@ -644,6 +861,16 @@ static void kimage_free(struct kimage *image) /* Free the kexec control pages... */ kimage_free_page_list(&image->control_pages); + + kfree(image->image_loader_data); + + /* + * Free up any temporary buffers allocated. This might hit if + * error occurred much later after buffer allocation. + */ + if (image->file_mode) + kimage_file_post_load_cleanup(image); + kfree(image); } @@ -772,10 +999,14 @@ static int kimage_load_normal_segment(struct kimage *image, unsigned long maddr; size_t ubytes, mbytes; int result; - unsigned char __user *buf; + unsigned char __user *buf = NULL; + unsigned char *kbuf = NULL; result = 0; - buf = segment->buf; + if (image->file_mode) + kbuf = segment->kbuf; + else + buf = segment->buf; ubytes = segment->bufsz; mbytes = segment->memsz; maddr = segment->mem; @@ -807,7 +1038,11 @@ static int kimage_load_normal_segment(struct kimage *image, PAGE_SIZE - (maddr & ~PAGE_MASK)); uchunk = min(ubytes, mchunk); - result = copy_from_user(ptr, buf, uchunk); + /* For file based kexec, source pages are in kernel memory */ + if (image->file_mode) + memcpy(ptr, kbuf, uchunk); + else + result = copy_from_user(ptr, buf, uchunk); kunmap(page); if (result) { result = -EFAULT; @@ -815,7 +1050,10 @@ static int kimage_load_normal_segment(struct kimage *image, } ubytes -= uchunk; maddr += mchunk; - buf += mchunk; + if (image->file_mode) + kbuf += mchunk; + else + buf += mchunk; mbytes -= mchunk; } out: @@ -1062,7 +1300,72 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd, unsigned long, cmdline_len, const char __user *, cmdline_ptr, unsigned long, flags) { - return -ENOSYS; + int ret = 0, i; + struct kimage **dest_image, *image; + + /* We only trust the superuser with rebooting the system. */ + if (!capable(CAP_SYS_BOOT) || kexec_load_disabled) + return -EPERM; + + /* Make sure we have a legal set of flags */ + if (flags != (flags & KEXEC_FILE_FLAGS)) + return -EINVAL; + + image = NULL; + + if (!mutex_trylock(&kexec_mutex)) + return -EBUSY; + + dest_image = &kexec_image; + if (flags & KEXEC_FILE_ON_CRASH) + dest_image = &kexec_crash_image; + + if (flags & KEXEC_FILE_UNLOAD) + goto exchange; + + /* + * In case of crash, new kernel gets loaded in reserved region. It is + * same memory where old crash kernel might be loaded. Free any + * current crash dump kernel before we corrupt it. + */ + if (flags & KEXEC_FILE_ON_CRASH) + kimage_free(xchg(&kexec_crash_image, NULL)); + + ret = kimage_file_alloc_init(&image, kernel_fd, initrd_fd, cmdline_ptr, + cmdline_len, flags); + if (ret) + goto out; + + ret = machine_kexec_prepare(image); + if (ret) + goto out; + + for (i = 0; i < image->nr_segments; i++) { + struct kexec_segment *ksegment; + + ksegment = &image->segment[i]; + pr_debug("Loading segment %d: buf=0x%p bufsz=0x%zx mem=0x%lx memsz=0x%zx\n", + i, ksegment->buf, ksegment->bufsz, ksegment->mem, + ksegment->memsz); + + ret = kimage_load_segment(image, &image->segment[i]); + if (ret) + goto out; + } + + kimage_terminate(image); + + /* + * Free up any temporary buffers allocated which are not needed + * after image has been loaded + */ + kimage_file_post_load_cleanup(image); +exchange: + image = xchg(dest_image, image); +out: + mutex_unlock(&kexec_mutex); + kimage_free(image); + return ret; } void crash_kexec(struct pt_regs *regs) @@ -1620,6 +1923,176 @@ static int __init crash_save_vmcoreinfo_init(void) subsys_initcall(crash_save_vmcoreinfo_init); +static int __kexec_add_segment(struct kimage *image, char *buf, + unsigned long bufsz, unsigned long mem, + unsigned long memsz) +{ + struct kexec_segment *ksegment; + + ksegment = &image->segment[image->nr_segments]; + ksegment->kbuf = buf; + ksegment->bufsz = bufsz; + ksegment->mem = mem; + ksegment->memsz = memsz; + image->nr_segments++; + + return 0; +} + +static int locate_mem_hole_top_down(unsigned long start, unsigned long end, + struct kexec_buf *kbuf) +{ + struct kimage *image = kbuf->image; + unsigned long temp_start, temp_end; + + temp_end = min(end, kbuf->buf_max); + temp_start = temp_end - kbuf->memsz; + + do { + /* align down start */ + temp_start = temp_start & (~(kbuf->buf_align - 1)); + + if (temp_start < start || temp_start < kbuf->buf_min) + return 0; + + temp_end = temp_start + kbuf->memsz - 1; + + /* + * Make sure this does not conflict with any of existing + * segments + */ + if (kimage_is_destination_range(image, temp_start, temp_end)) { + temp_start = temp_start - PAGE_SIZE; + continue; + } + + /* We found a suitable memory range */ + break; + } while (1); + + /* If we are here, we found a suitable memory range */ + __kexec_add_segment(image, kbuf->buffer, kbuf->bufsz, temp_start, + kbuf->memsz); + + /* Success, stop navigating through remaining System RAM ranges */ + return 1; +} + +static int locate_mem_hole_bottom_up(unsigned long start, unsigned long end, + struct kexec_buf *kbuf) +{ + struct kimage *image = kbuf->image; + unsigned long temp_start, temp_end; + + temp_start = max(start, kbuf->buf_min); + + do { + temp_start = ALIGN(temp_start, kbuf->buf_align); + temp_end = temp_start + kbuf->memsz - 1; + + if (temp_end > end || temp_end > kbuf->buf_max) + return 0; + /* + * Make sure this does not conflict with any of existing + * segments + */ + if (kimage_is_destination_range(image, temp_start, temp_end)) { + temp_start = temp_start + PAGE_SIZE; + continue; + } + + /* We found a suitable memory range */ + break; + } while (1); + + /* If we are here, we found a suitable memory range */ + __kexec_add_segment(image, kbuf->buffer, kbuf->bufsz, temp_start, + kbuf->memsz); + + /* Success, stop navigating through remaining System RAM ranges */ + return 1; +} + +static int locate_mem_hole_callback(u64 start, u64 end, void *arg) +{ + struct kexec_buf *kbuf = (struct kexec_buf *)arg; + unsigned long sz = end - start + 1; + + /* Returning 0 will take to next memory range */ + if (sz < kbuf->memsz) + return 0; + + if (end < kbuf->buf_min || start > kbuf->buf_max) + return 0; + + /* + * Allocate memory top down with-in ram range. Otherwise bottom up + * allocation. + */ + if (kbuf->top_down) + return locate_mem_hole_top_down(start, end, kbuf); + return locate_mem_hole_bottom_up(start, end, kbuf); +} + +/* + * Helper function for placing a buffer in a kexec segment. This assumes + * that kexec_mutex is held. + */ +int kexec_add_buffer(struct kimage *image, char *buffer, unsigned long bufsz, + unsigned long memsz, unsigned long buf_align, + unsigned long buf_min, unsigned long buf_max, + bool top_down, unsigned long *load_addr) +{ + + struct kexec_segment *ksegment; + struct kexec_buf buf, *kbuf; + int ret; + + /* Currently adding segment this way is allowed only in file mode */ + if (!image->file_mode) + return -EINVAL; + + if (image->nr_segments >= KEXEC_SEGMENT_MAX) + return -EINVAL; + + /* + * Make sure we are not trying to add buffer after allocating + * control pages. All segments need to be placed first before + * any control pages are allocated. As control page allocation + * logic goes through list of segments to make sure there are + * no destination overlaps. + */ + if (!list_empty(&image->control_pages)) { + WARN_ON(1); + return -EINVAL; + } + + memset(&buf, 0, sizeof(struct kexec_buf)); + kbuf = &buf; + kbuf->image = image; + kbuf->buffer = buffer; + kbuf->bufsz = bufsz; + + kbuf->memsz = ALIGN(memsz, PAGE_SIZE); + kbuf->buf_align = max(buf_align, PAGE_SIZE); + kbuf->buf_min = buf_min; + kbuf->buf_max = buf_max; + kbuf->top_down = top_down; + + /* Walk the RAM ranges and allocate a suitable range for the buffer */ + ret = walk_system_ram_res(0, -1, kbuf, locate_mem_hole_callback); + if (ret != 1) { + /* A suitable memory range could not be found for buffer */ + return -EADDRNOTAVAIL; + } + + /* Found a suitable memory range */ + ksegment = &image->segment[image->nr_segments - 1]; + *load_addr = ksegment->mem; + return 0; +} + + /* * Move into place and start executing a preloaded standalone * executable. If nothing was preloaded return an error. -- cgit v1.2.3 From 12db5562e0352986a265841638482b84f3a6899b Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Fri, 8 Aug 2014 14:26:04 -0700 Subject: kexec: load and relocate purgatory at kernel load time Load purgatory code in RAM and relocate it based on the location. Relocation code has been inspired by module relocation code and purgatory relocation code in kexec-tools. Also compute the checksums of loaded kexec segments and store them in purgatory. Arch independent code provides this functionality so that arch dependent bootloaders can make use of it. Helper functions are provided to get/set symbol values in purgatory which are used by bootloaders later to set things like stack and entry point of second kernel etc. Signed-off-by: Vivek Goyal Cc: Borislav Petkov Cc: Michael Kerrisk Cc: Yinghai Lu Cc: Eric Biederman Cc: H. Peter Anvin Cc: Matthew Garrett Cc: Greg Kroah-Hartman Cc: Dave Young Cc: WANG Chao Cc: Baoquan He Cc: Andy Lutomirski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/Kconfig | 2 + arch/ia64/Kconfig | 2 + arch/m68k/Kconfig | 2 + arch/mips/Kconfig | 2 + arch/powerpc/Kconfig | 2 + arch/s390/Kconfig | 2 + arch/sh/Kconfig | 2 + arch/tile/Kconfig | 2 + arch/x86/Kconfig | 2 + arch/x86/kernel/machine_kexec_64.c | 142 ++++++++++ include/linux/kexec.h | 33 +++ kernel/kexec.c | 544 ++++++++++++++++++++++++++++++++++++- 12 files changed, 736 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 8e9dbcbcf5af..cacc8d5355b3 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -2065,6 +2065,8 @@ config XIP_PHYS_ADDR config KEXEC bool "Kexec system call (EXPERIMENTAL)" depends on (!SMP || PM_SLEEP_SMP) + select CRYPTO + select CRYPTO_SHA256 help kexec is a system call that implements the ability to shutdown your current kernel, and to start another kernel. It is like a reboot diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index c84c88bbbbd7..64aefb76bd69 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -549,6 +549,8 @@ source "drivers/sn/Kconfig" config KEXEC bool "kexec system call" depends on !IA64_HP_SIM && (!SMP || HOTPLUG_CPU) + select CRYPTO + select CRYPTO_SHA256 help kexec is a system call that implements the ability to shutdown your current kernel, and to start another kernel. It is like a reboot diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index 87b7c7581b1d..3ff8c9a25335 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -91,6 +91,8 @@ config MMU_SUN3 config KEXEC bool "kexec system call" depends on M68KCLASSIC + select CRYPTO + select CRYPTO_SHA256 help kexec is a system call that implements the ability to shutdown your current kernel, and to start another kernel. It is like a reboot diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 900c7e5333b6..df51e78a72cc 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -2396,6 +2396,8 @@ source "kernel/Kconfig.preempt" config KEXEC bool "Kexec system call" + select CRYPTO + select CRYPTO_SHA256 help kexec is a system call that implements the ability to shutdown your current kernel, and to start another kernel. It is like a reboot diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 4bc7b62fb4b6..a577609f8ed6 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -399,6 +399,8 @@ config PPC64_SUPPORTS_MEMORY_FAILURE config KEXEC bool "kexec system call" depends on (PPC_BOOK3S || FSL_BOOKE || (44x && !SMP)) + select CRYPTO + select CRYPTO_SHA256 help kexec is a system call that implements the ability to shutdown your current kernel, and to start another kernel. It is like a reboot diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 05c78bb5f570..ab39ceb89ecf 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -48,6 +48,8 @@ config ARCH_SUPPORTS_DEBUG_PAGEALLOC config KEXEC def_bool y + select CRYPTO + select CRYPTO_SHA256 config AUDIT_ARCH def_bool y diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index aa2df3eaeb29..453fa5c09550 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -595,6 +595,8 @@ source kernel/Kconfig.hz config KEXEC bool "kexec system call (EXPERIMENTAL)" depends on SUPERH32 && MMU + select CRYPTO + select CRYPTO_SHA256 help kexec is a system call that implements the ability to shutdown your current kernel, and to start another kernel. It is like a reboot diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index 7fcd492adbfc..a3ffe2dd4832 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig @@ -191,6 +191,8 @@ source "kernel/Kconfig.hz" config KEXEC bool "kexec system call" + select CRYPTO + select CRYPTO_SHA256 ---help--- kexec is a system call that implements the ability to shutdown your current kernel, and to start another kernel. It is like a reboot diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 98fe3df6df82..9558b9fcafbf 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1583,6 +1583,8 @@ source kernel/Kconfig.hz config KEXEC bool "kexec system call" select BUILD_BIN2C + select CRYPTO + select CRYPTO_SHA256 ---help--- kexec is a system call that implements the ability to shutdown your current kernel, and to start another kernel. It is like a reboot diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index c8875b5545e1..88404c440727 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -6,6 +6,8 @@ * Version 2. See the file COPYING for more details. */ +#define pr_fmt(fmt) "kexec: " fmt + #include #include #include @@ -328,3 +330,143 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image) return image->fops->cleanup(image); } + +/* + * Apply purgatory relocations. + * + * ehdr: Pointer to elf headers + * sechdrs: Pointer to section headers. + * relsec: section index of SHT_RELA section. + * + * TODO: Some of the code belongs to generic code. Move that in kexec.c. + */ +int arch_kexec_apply_relocations_add(const Elf64_Ehdr *ehdr, + Elf64_Shdr *sechdrs, unsigned int relsec) +{ + unsigned int i; + Elf64_Rela *rel; + Elf64_Sym *sym; + void *location; + Elf64_Shdr *section, *symtabsec; + unsigned long address, sec_base, value; + const char *strtab, *name, *shstrtab; + + /* + * ->sh_offset has been modified to keep the pointer to section + * contents in memory + */ + rel = (void *)sechdrs[relsec].sh_offset; + + /* Section to which relocations apply */ + section = &sechdrs[sechdrs[relsec].sh_info]; + + pr_debug("Applying relocate section %u to %u\n", relsec, + sechdrs[relsec].sh_info); + + /* Associated symbol table */ + symtabsec = &sechdrs[sechdrs[relsec].sh_link]; + + /* String table */ + if (symtabsec->sh_link >= ehdr->e_shnum) { + /* Invalid strtab section number */ + pr_err("Invalid string table section index %d\n", + symtabsec->sh_link); + return -ENOEXEC; + } + + strtab = (char *)sechdrs[symtabsec->sh_link].sh_offset; + + /* section header string table */ + shstrtab = (char *)sechdrs[ehdr->e_shstrndx].sh_offset; + + for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { + + /* + * rel[i].r_offset contains byte offset from beginning + * of section to the storage unit affected. + * + * This is location to update (->sh_offset). This is temporary + * buffer where section is currently loaded. This will finally + * be loaded to a different address later, pointed to by + * ->sh_addr. kexec takes care of moving it + * (kexec_load_segment()). + */ + location = (void *)(section->sh_offset + rel[i].r_offset); + + /* Final address of the location */ + address = section->sh_addr + rel[i].r_offset; + + /* + * rel[i].r_info contains information about symbol table index + * w.r.t which relocation must be made and type of relocation + * to apply. ELF64_R_SYM() and ELF64_R_TYPE() macros get + * these respectively. + */ + sym = (Elf64_Sym *)symtabsec->sh_offset + + ELF64_R_SYM(rel[i].r_info); + + if (sym->st_name) + name = strtab + sym->st_name; + else + name = shstrtab + sechdrs[sym->st_shndx].sh_name; + + pr_debug("Symbol: %s info: %02x shndx: %02x value=%llx size: %llx\n", + name, sym->st_info, sym->st_shndx, sym->st_value, + sym->st_size); + + if (sym->st_shndx == SHN_UNDEF) { + pr_err("Undefined symbol: %s\n", name); + return -ENOEXEC; + } + + if (sym->st_shndx == SHN_COMMON) { + pr_err("symbol '%s' in common section\n", name); + return -ENOEXEC; + } + + if (sym->st_shndx == SHN_ABS) + sec_base = 0; + else if (sym->st_shndx >= ehdr->e_shnum) { + pr_err("Invalid section %d for symbol %s\n", + sym->st_shndx, name); + return -ENOEXEC; + } else + sec_base = sechdrs[sym->st_shndx].sh_addr; + + value = sym->st_value; + value += sec_base; + value += rel[i].r_addend; + + switch (ELF64_R_TYPE(rel[i].r_info)) { + case R_X86_64_NONE: + break; + case R_X86_64_64: + *(u64 *)location = value; + break; + case R_X86_64_32: + *(u32 *)location = value; + if (value != *(u32 *)location) + goto overflow; + break; + case R_X86_64_32S: + *(s32 *)location = value; + if ((s64)value != *(s32 *)location) + goto overflow; + break; + case R_X86_64_PC32: + value -= (u64)address; + *(u32 *)location = value; + break; + default: + pr_err("Unknown rela relocation: %llu\n", + ELF64_R_TYPE(rel[i].r_info)); + return -ENOEXEC; + } + } + return 0; + +overflow: + pr_err("Overflow in relocation type %d value 0x%lx\n", + (int)ELF64_R_TYPE(rel[i].r_info), value); + return -ENOEXEC; +} diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 8e80901e466f..84f09e9eca26 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -10,6 +10,7 @@ #include #include #include +#include #include /* Verify architecture specific macros are defined */ @@ -95,6 +96,27 @@ struct compat_kexec_segment { }; #endif +struct kexec_sha_region { + unsigned long start; + unsigned long len; +}; + +struct purgatory_info { + /* Pointer to elf header of read only purgatory */ + Elf_Ehdr *ehdr; + + /* Pointer to purgatory sechdrs which are modifiable */ + Elf_Shdr *sechdrs; + /* + * Temporary buffer location where purgatory is loaded and relocated + * This memory can be freed post image load + */ + void *purgatory_buf; + + /* Address where purgatory is finally loaded and is executed from */ + unsigned long purgatory_load_addr; +}; + struct kimage { kimage_entry_t head; kimage_entry_t *entry; @@ -143,6 +165,9 @@ struct kimage { /* Image loader handling the kernel can store a pointer here */ void *image_loader_data; + + /* Information for loading purgatory */ + struct purgatory_info purgatory_info; }; /* @@ -189,6 +214,14 @@ extern int kexec_add_buffer(struct kimage *image, char *buffer, unsigned long *load_addr); extern struct page *kimage_alloc_control_pages(struct kimage *image, unsigned int order); +extern int kexec_load_purgatory(struct kimage *image, unsigned long min, + unsigned long max, int top_down, + unsigned long *load_addr); +extern int kexec_purgatory_get_set_symbol(struct kimage *image, + const char *name, void *buf, + unsigned int size, bool get_value); +extern void *kexec_purgatory_get_symbol_addr(struct kimage *image, + const char *name); extern void crash_kexec(struct pt_regs *); int kexec_should_crash(struct task_struct *); void crash_save_cpu(struct pt_regs *regs, int cpu); diff --git a/kernel/kexec.c b/kernel/kexec.c index 9b46219254dd..669e331aa9ec 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -42,6 +42,9 @@ #include #include +#include +#include + /* Per cpu memory for storing cpu states in case of system crash. */ note_buf_t __percpu *crash_notes; @@ -54,6 +57,15 @@ size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data); /* Flag to indicate we are going to kexec a new kernel */ bool kexec_in_progress = false; +/* + * Declare these symbols weak so that if architecture provides a purgatory, + * these will be overridden. + */ +char __weak kexec_purgatory[0]; +size_t __weak kexec_purgatory_size = 0; + +static int kexec_calculate_store_digests(struct kimage *image); + /* Location of the reserved area for the crash kernel */ struct resource crashk_res = { .name = "Crash kernel", @@ -404,6 +416,24 @@ void __weak arch_kimage_file_post_load_cleanup(struct kimage *image) { } +/* Apply relocations of type RELA */ +int __weak +arch_kexec_apply_relocations_add(const Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, + unsigned int relsec) +{ + pr_err("RELA relocation unsupported.\n"); + return -ENOEXEC; +} + +/* Apply relocations of type REL */ +int __weak +arch_kexec_apply_relocations(const Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, + unsigned int relsec) +{ + pr_err("REL relocation unsupported.\n"); + return -ENOEXEC; +} + /* * Free up memory used by kernel, initrd, and comand line. This is temporary * memory allocation which is not needed any more after these buffers have @@ -411,6 +441,8 @@ void __weak arch_kimage_file_post_load_cleanup(struct kimage *image) */ static void kimage_file_post_load_cleanup(struct kimage *image) { + struct purgatory_info *pi = &image->purgatory_info; + vfree(image->kernel_buf); image->kernel_buf = NULL; @@ -420,6 +452,12 @@ static void kimage_file_post_load_cleanup(struct kimage *image) kfree(image->cmdline_buf); image->cmdline_buf = NULL; + vfree(pi->purgatory_buf); + pi->purgatory_buf = NULL; + + vfree(pi->sechdrs); + pi->sechdrs = NULL; + /* See if architecture has anything to cleanup post load */ arch_kimage_file_post_load_cleanup(image); } @@ -1105,7 +1143,7 @@ static int kimage_load_crash_segment(struct kimage *image, } ubytes -= uchunk; maddr += mchunk; - buf += mchunk; + buf += mchunk; mbytes -= mchunk; } out: @@ -1340,6 +1378,10 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd, if (ret) goto out; + ret = kexec_calculate_store_digests(image); + if (ret) + goto out; + for (i = 0; i < image->nr_segments; i++) { struct kexec_segment *ksegment; @@ -2092,6 +2134,506 @@ int kexec_add_buffer(struct kimage *image, char *buffer, unsigned long bufsz, return 0; } +/* Calculate and store the digest of segments */ +static int kexec_calculate_store_digests(struct kimage *image) +{ + struct crypto_shash *tfm; + struct shash_desc *desc; + int ret = 0, i, j, zero_buf_sz, sha_region_sz; + size_t desc_size, nullsz; + char *digest; + void *zero_buf; + struct kexec_sha_region *sha_regions; + struct purgatory_info *pi = &image->purgatory_info; + + zero_buf = __va(page_to_pfn(ZERO_PAGE(0)) << PAGE_SHIFT); + zero_buf_sz = PAGE_SIZE; + + tfm = crypto_alloc_shash("sha256", 0, 0); + if (IS_ERR(tfm)) { + ret = PTR_ERR(tfm); + goto out; + } + + desc_size = crypto_shash_descsize(tfm) + sizeof(*desc); + desc = kzalloc(desc_size, GFP_KERNEL); + if (!desc) { + ret = -ENOMEM; + goto out_free_tfm; + } + + sha_region_sz = KEXEC_SEGMENT_MAX * sizeof(struct kexec_sha_region); + sha_regions = vzalloc(sha_region_sz); + if (!sha_regions) + goto out_free_desc; + + desc->tfm = tfm; + desc->flags = 0; + + ret = crypto_shash_init(desc); + if (ret < 0) + goto out_free_sha_regions; + + digest = kzalloc(SHA256_DIGEST_SIZE, GFP_KERNEL); + if (!digest) { + ret = -ENOMEM; + goto out_free_sha_regions; + } + + for (j = i = 0; i < image->nr_segments; i++) { + struct kexec_segment *ksegment; + + ksegment = &image->segment[i]; + /* + * Skip purgatory as it will be modified once we put digest + * info in purgatory. + */ + if (ksegment->kbuf == pi->purgatory_buf) + continue; + + ret = crypto_shash_update(desc, ksegment->kbuf, + ksegment->bufsz); + if (ret) + break; + + /* + * Assume rest of the buffer is filled with zero and + * update digest accordingly. + */ + nullsz = ksegment->memsz - ksegment->bufsz; + while (nullsz) { + unsigned long bytes = nullsz; + + if (bytes > zero_buf_sz) + bytes = zero_buf_sz; + ret = crypto_shash_update(desc, zero_buf, bytes); + if (ret) + break; + nullsz -= bytes; + } + + if (ret) + break; + + sha_regions[j].start = ksegment->mem; + sha_regions[j].len = ksegment->memsz; + j++; + } + + if (!ret) { + ret = crypto_shash_final(desc, digest); + if (ret) + goto out_free_digest; + ret = kexec_purgatory_get_set_symbol(image, "sha_regions", + sha_regions, sha_region_sz, 0); + if (ret) + goto out_free_digest; + + ret = kexec_purgatory_get_set_symbol(image, "sha256_digest", + digest, SHA256_DIGEST_SIZE, 0); + if (ret) + goto out_free_digest; + } + +out_free_digest: + kfree(digest); +out_free_sha_regions: + vfree(sha_regions); +out_free_desc: + kfree(desc); +out_free_tfm: + kfree(tfm); +out: + return ret; +} + +/* Actually load purgatory. Lot of code taken from kexec-tools */ +static int __kexec_load_purgatory(struct kimage *image, unsigned long min, + unsigned long max, int top_down) +{ + struct purgatory_info *pi = &image->purgatory_info; + unsigned long align, buf_align, bss_align, buf_sz, bss_sz, bss_pad; + unsigned long memsz, entry, load_addr, curr_load_addr, bss_addr, offset; + unsigned char *buf_addr, *src; + int i, ret = 0, entry_sidx = -1; + const Elf_Shdr *sechdrs_c; + Elf_Shdr *sechdrs = NULL; + void *purgatory_buf = NULL; + + /* + * sechdrs_c points to section headers in purgatory and are read + * only. No modifications allowed. + */ + sechdrs_c = (void *)pi->ehdr + pi->ehdr->e_shoff; + + /* + * We can not modify sechdrs_c[] and its fields. It is read only. + * Copy it over to a local copy where one can store some temporary + * data and free it at the end. We need to modify ->sh_addr and + * ->sh_offset fields to keep track of permanent and temporary + * locations of sections. + */ + sechdrs = vzalloc(pi->ehdr->e_shnum * sizeof(Elf_Shdr)); + if (!sechdrs) + return -ENOMEM; + + memcpy(sechdrs, sechdrs_c, pi->ehdr->e_shnum * sizeof(Elf_Shdr)); + + /* + * We seem to have multiple copies of sections. First copy is which + * is embedded in kernel in read only section. Some of these sections + * will be copied to a temporary buffer and relocated. And these + * sections will finally be copied to their final destination at + * segment load time. + * + * Use ->sh_offset to reflect section address in memory. It will + * point to original read only copy if section is not allocatable. + * Otherwise it will point to temporary copy which will be relocated. + * + * Use ->sh_addr to contain final address of the section where it + * will go during execution time. + */ + for (i = 0; i < pi->ehdr->e_shnum; i++) { + if (sechdrs[i].sh_type == SHT_NOBITS) + continue; + + sechdrs[i].sh_offset = (unsigned long)pi->ehdr + + sechdrs[i].sh_offset; + } + + /* + * Identify entry point section and make entry relative to section + * start. + */ + entry = pi->ehdr->e_entry; + for (i = 0; i < pi->ehdr->e_shnum; i++) { + if (!(sechdrs[i].sh_flags & SHF_ALLOC)) + continue; + + if (!(sechdrs[i].sh_flags & SHF_EXECINSTR)) + continue; + + /* Make entry section relative */ + if (sechdrs[i].sh_addr <= pi->ehdr->e_entry && + ((sechdrs[i].sh_addr + sechdrs[i].sh_size) > + pi->ehdr->e_entry)) { + entry_sidx = i; + entry -= sechdrs[i].sh_addr; + break; + } + } + + /* Determine how much memory is needed to load relocatable object. */ + buf_align = 1; + bss_align = 1; + buf_sz = 0; + bss_sz = 0; + + for (i = 0; i < pi->ehdr->e_shnum; i++) { + if (!(sechdrs[i].sh_flags & SHF_ALLOC)) + continue; + + align = sechdrs[i].sh_addralign; + if (sechdrs[i].sh_type != SHT_NOBITS) { + if (buf_align < align) + buf_align = align; + buf_sz = ALIGN(buf_sz, align); + buf_sz += sechdrs[i].sh_size; + } else { + /* bss section */ + if (bss_align < align) + bss_align = align; + bss_sz = ALIGN(bss_sz, align); + bss_sz += sechdrs[i].sh_size; + } + } + + /* Determine the bss padding required to align bss properly */ + bss_pad = 0; + if (buf_sz & (bss_align - 1)) + bss_pad = bss_align - (buf_sz & (bss_align - 1)); + + memsz = buf_sz + bss_pad + bss_sz; + + /* Allocate buffer for purgatory */ + purgatory_buf = vzalloc(buf_sz); + if (!purgatory_buf) { + ret = -ENOMEM; + goto out; + } + + if (buf_align < bss_align) + buf_align = bss_align; + + /* Add buffer to segment list */ + ret = kexec_add_buffer(image, purgatory_buf, buf_sz, memsz, + buf_align, min, max, top_down, + &pi->purgatory_load_addr); + if (ret) + goto out; + + /* Load SHF_ALLOC sections */ + buf_addr = purgatory_buf; + load_addr = curr_load_addr = pi->purgatory_load_addr; + bss_addr = load_addr + buf_sz + bss_pad; + + for (i = 0; i < pi->ehdr->e_shnum; i++) { + if (!(sechdrs[i].sh_flags & SHF_ALLOC)) + continue; + + align = sechdrs[i].sh_addralign; + if (sechdrs[i].sh_type != SHT_NOBITS) { + curr_load_addr = ALIGN(curr_load_addr, align); + offset = curr_load_addr - load_addr; + /* We already modifed ->sh_offset to keep src addr */ + src = (char *) sechdrs[i].sh_offset; + memcpy(buf_addr + offset, src, sechdrs[i].sh_size); + + /* Store load address and source address of section */ + sechdrs[i].sh_addr = curr_load_addr; + + /* + * This section got copied to temporary buffer. Update + * ->sh_offset accordingly. + */ + sechdrs[i].sh_offset = (unsigned long)(buf_addr + offset); + + /* Advance to the next address */ + curr_load_addr += sechdrs[i].sh_size; + } else { + bss_addr = ALIGN(bss_addr, align); + sechdrs[i].sh_addr = bss_addr; + bss_addr += sechdrs[i].sh_size; + } + } + + /* Update entry point based on load address of text section */ + if (entry_sidx >= 0) + entry += sechdrs[entry_sidx].sh_addr; + + /* Make kernel jump to purgatory after shutdown */ + image->start = entry; + + /* Used later to get/set symbol values */ + pi->sechdrs = sechdrs; + + /* + * Used later to identify which section is purgatory and skip it + * from checksumming. + */ + pi->purgatory_buf = purgatory_buf; + return ret; +out: + vfree(sechdrs); + vfree(purgatory_buf); + return ret; +} + +static int kexec_apply_relocations(struct kimage *image) +{ + int i, ret; + struct purgatory_info *pi = &image->purgatory_info; + Elf_Shdr *sechdrs = pi->sechdrs; + + /* Apply relocations */ + for (i = 0; i < pi->ehdr->e_shnum; i++) { + Elf_Shdr *section, *symtab; + + if (sechdrs[i].sh_type != SHT_RELA && + sechdrs[i].sh_type != SHT_REL) + continue; + + /* + * For section of type SHT_RELA/SHT_REL, + * ->sh_link contains section header index of associated + * symbol table. And ->sh_info contains section header + * index of section to which relocations apply. + */ + if (sechdrs[i].sh_info >= pi->ehdr->e_shnum || + sechdrs[i].sh_link >= pi->ehdr->e_shnum) + return -ENOEXEC; + + section = &sechdrs[sechdrs[i].sh_info]; + symtab = &sechdrs[sechdrs[i].sh_link]; + + if (!(section->sh_flags & SHF_ALLOC)) + continue; + + /* + * symtab->sh_link contain section header index of associated + * string table. + */ + if (symtab->sh_link >= pi->ehdr->e_shnum) + /* Invalid section number? */ + continue; + + /* + * Respective archicture needs to provide support for applying + * relocations of type SHT_RELA/SHT_REL. + */ + if (sechdrs[i].sh_type == SHT_RELA) + ret = arch_kexec_apply_relocations_add(pi->ehdr, + sechdrs, i); + else if (sechdrs[i].sh_type == SHT_REL) + ret = arch_kexec_apply_relocations(pi->ehdr, + sechdrs, i); + if (ret) + return ret; + } + + return 0; +} + +/* Load relocatable purgatory object and relocate it appropriately */ +int kexec_load_purgatory(struct kimage *image, unsigned long min, + unsigned long max, int top_down, + unsigned long *load_addr) +{ + struct purgatory_info *pi = &image->purgatory_info; + int ret; + + if (kexec_purgatory_size <= 0) + return -EINVAL; + + if (kexec_purgatory_size < sizeof(Elf_Ehdr)) + return -ENOEXEC; + + pi->ehdr = (Elf_Ehdr *)kexec_purgatory; + + if (memcmp(pi->ehdr->e_ident, ELFMAG, SELFMAG) != 0 + || pi->ehdr->e_type != ET_REL + || !elf_check_arch(pi->ehdr) + || pi->ehdr->e_shentsize != sizeof(Elf_Shdr)) + return -ENOEXEC; + + if (pi->ehdr->e_shoff >= kexec_purgatory_size + || (pi->ehdr->e_shnum * sizeof(Elf_Shdr) > + kexec_purgatory_size - pi->ehdr->e_shoff)) + return -ENOEXEC; + + ret = __kexec_load_purgatory(image, min, max, top_down); + if (ret) + return ret; + + ret = kexec_apply_relocations(image); + if (ret) + goto out; + + *load_addr = pi->purgatory_load_addr; + return 0; +out: + vfree(pi->sechdrs); + vfree(pi->purgatory_buf); + return ret; +} + +static Elf_Sym *kexec_purgatory_find_symbol(struct purgatory_info *pi, + const char *name) +{ + Elf_Sym *syms; + Elf_Shdr *sechdrs; + Elf_Ehdr *ehdr; + int i, k; + const char *strtab; + + if (!pi->sechdrs || !pi->ehdr) + return NULL; + + sechdrs = pi->sechdrs; + ehdr = pi->ehdr; + + for (i = 0; i < ehdr->e_shnum; i++) { + if (sechdrs[i].sh_type != SHT_SYMTAB) + continue; + + if (sechdrs[i].sh_link >= ehdr->e_shnum) + /* Invalid strtab section number */ + continue; + strtab = (char *)sechdrs[sechdrs[i].sh_link].sh_offset; + syms = (Elf_Sym *)sechdrs[i].sh_offset; + + /* Go through symbols for a match */ + for (k = 0; k < sechdrs[i].sh_size/sizeof(Elf_Sym); k++) { + if (ELF_ST_BIND(syms[k].st_info) != STB_GLOBAL) + continue; + + if (strcmp(strtab + syms[k].st_name, name) != 0) + continue; + + if (syms[k].st_shndx == SHN_UNDEF || + syms[k].st_shndx >= ehdr->e_shnum) { + pr_debug("Symbol: %s has bad section index %d.\n", + name, syms[k].st_shndx); + return NULL; + } + + /* Found the symbol we are looking for */ + return &syms[k]; + } + } + + return NULL; +} + +void *kexec_purgatory_get_symbol_addr(struct kimage *image, const char *name) +{ + struct purgatory_info *pi = &image->purgatory_info; + Elf_Sym *sym; + Elf_Shdr *sechdr; + + sym = kexec_purgatory_find_symbol(pi, name); + if (!sym) + return ERR_PTR(-EINVAL); + + sechdr = &pi->sechdrs[sym->st_shndx]; + + /* + * Returns the address where symbol will finally be loaded after + * kexec_load_segment() + */ + return (void *)(sechdr->sh_addr + sym->st_value); +} + +/* + * Get or set value of a symbol. If "get_value" is true, symbol value is + * returned in buf otherwise symbol value is set based on value in buf. + */ +int kexec_purgatory_get_set_symbol(struct kimage *image, const char *name, + void *buf, unsigned int size, bool get_value) +{ + Elf_Sym *sym; + Elf_Shdr *sechdrs; + struct purgatory_info *pi = &image->purgatory_info; + char *sym_buf; + + sym = kexec_purgatory_find_symbol(pi, name); + if (!sym) + return -EINVAL; + + if (sym->st_size != size) { + pr_err("symbol %s size mismatch: expected %lu actual %u\n", + name, (unsigned long)sym->st_size, size); + return -EINVAL; + } + + sechdrs = pi->sechdrs; + + if (sechdrs[sym->st_shndx].sh_type == SHT_NOBITS) { + pr_err("symbol %s is in a bss section. Cannot %s\n", name, + get_value ? "get" : "set"); + return -EINVAL; + } + + sym_buf = (unsigned char *)sechdrs[sym->st_shndx].sh_offset + + sym->st_value; + + if (get_value) + memcpy((void *)buf, sym_buf, size); + else + memcpy((void *)sym_buf, buf, size); + + return 0; +} /* * Move into place and start executing a preloaded standalone -- cgit v1.2.3 From 27f48d3e633be23656a097baa3be336e04a82d84 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Fri, 8 Aug 2014 14:26:06 -0700 Subject: kexec-bzImage64: support for loading bzImage using 64bit entry This is loader specific code which can load bzImage and set it up for 64bit entry. This does not take care of 32bit entry or real mode entry. 32bit mode entry can be implemented if somebody needs it. Signed-off-by: Vivek Goyal Cc: Borislav Petkov Cc: Michael Kerrisk Cc: Yinghai Lu Cc: Eric Biederman Cc: H. Peter Anvin Cc: Matthew Garrett Cc: Greg Kroah-Hartman Cc: Dave Young Cc: WANG Chao Cc: Baoquan He Cc: Andy Lutomirski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/include/asm/kexec-bzimage64.h | 6 + arch/x86/include/asm/kexec.h | 21 ++ arch/x86/kernel/Makefile | 1 + arch/x86/kernel/kexec-bzimage64.c | 375 +++++++++++++++++++++++++++++++++ arch/x86/kernel/machine_kexec_64.c | 5 +- include/linux/kexec.h | 2 +- kernel/kexec.c | 11 +- 7 files changed, 415 insertions(+), 6 deletions(-) create mode 100644 arch/x86/include/asm/kexec-bzimage64.h create mode 100644 arch/x86/kernel/kexec-bzimage64.c (limited to 'include/linux') diff --git a/arch/x86/include/asm/kexec-bzimage64.h b/arch/x86/include/asm/kexec-bzimage64.h new file mode 100644 index 000000000000..d1b5d194e31d --- /dev/null +++ b/arch/x86/include/asm/kexec-bzimage64.h @@ -0,0 +1,6 @@ +#ifndef _ASM_KEXEC_BZIMAGE64_H +#define _ASM_KEXEC_BZIMAGE64_H + +extern struct kexec_file_ops kexec_bzImage64_ops; + +#endif /* _ASM_KEXE_BZIMAGE64_H */ diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h index 17483a492f18..0dfccced4edf 100644 --- a/arch/x86/include/asm/kexec.h +++ b/arch/x86/include/asm/kexec.h @@ -23,6 +23,7 @@ #include #include +#include /* * KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return. @@ -161,6 +162,26 @@ struct kimage_arch { pmd_t *pmd; pte_t *pte; }; + +struct kexec_entry64_regs { + uint64_t rax; + uint64_t rbx; + uint64_t rcx; + uint64_t rdx; + uint64_t rsi; + uint64_t rdi; + uint64_t rsp; + uint64_t rbp; + uint64_t r8; + uint64_t r9; + uint64_t r10; + uint64_t r11; + uint64_t r12; + uint64_t r13; + uint64_t r14; + uint64_t r15; + uint64_t rip; +}; #endif typedef void crash_vmclear_fn(void); diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index bde3993624f1..b5ea75c4a4b4 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -118,4 +118,5 @@ ifeq ($(CONFIG_X86_64),y) obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o obj-y += vsmp_64.o + obj-$(CONFIG_KEXEC) += kexec-bzimage64.o endif diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c new file mode 100644 index 000000000000..bcedd100192f --- /dev/null +++ b/arch/x86/kernel/kexec-bzimage64.c @@ -0,0 +1,375 @@ +/* + * Kexec bzImage loader + * + * Copyright (C) 2014 Red Hat Inc. + * Authors: + * Vivek Goyal + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#define pr_fmt(fmt) "kexec-bzImage64: " fmt + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +/* + * Defines lowest physical address for various segments. Not sure where + * exactly these limits came from. Current bzimage64 loader in kexec-tools + * uses these so I am retaining it. It can be changed over time as we gain + * more insight. + */ +#define MIN_PURGATORY_ADDR 0x3000 +#define MIN_BOOTPARAM_ADDR 0x3000 +#define MIN_KERNEL_LOAD_ADDR 0x100000 +#define MIN_INITRD_LOAD_ADDR 0x1000000 + +/* + * This is a place holder for all boot loader specific data structure which + * gets allocated in one call but gets freed much later during cleanup + * time. Right now there is only one field but it can grow as need be. + */ +struct bzimage64_data { + /* + * Temporary buffer to hold bootparams buffer. This should be + * freed once the bootparam segment has been loaded. + */ + void *bootparams_buf; +}; + +static int setup_initrd(struct boot_params *params, + unsigned long initrd_load_addr, unsigned long initrd_len) +{ + params->hdr.ramdisk_image = initrd_load_addr & 0xffffffffUL; + params->hdr.ramdisk_size = initrd_len & 0xffffffffUL; + + params->ext_ramdisk_image = initrd_load_addr >> 32; + params->ext_ramdisk_size = initrd_len >> 32; + + return 0; +} + +static int setup_cmdline(struct boot_params *params, + unsigned long bootparams_load_addr, + unsigned long cmdline_offset, char *cmdline, + unsigned long cmdline_len) +{ + char *cmdline_ptr = ((char *)params) + cmdline_offset; + unsigned long cmdline_ptr_phys; + uint32_t cmdline_low_32, cmdline_ext_32; + + memcpy(cmdline_ptr, cmdline, cmdline_len); + cmdline_ptr[cmdline_len - 1] = '\0'; + + cmdline_ptr_phys = bootparams_load_addr + cmdline_offset; + cmdline_low_32 = cmdline_ptr_phys & 0xffffffffUL; + cmdline_ext_32 = cmdline_ptr_phys >> 32; + + params->hdr.cmd_line_ptr = cmdline_low_32; + if (cmdline_ext_32) + params->ext_cmd_line_ptr = cmdline_ext_32; + + return 0; +} + +static int setup_memory_map_entries(struct boot_params *params) +{ + unsigned int nr_e820_entries; + + nr_e820_entries = e820_saved.nr_map; + + /* TODO: Pass entries more than E820MAX in bootparams setup data */ + if (nr_e820_entries > E820MAX) + nr_e820_entries = E820MAX; + + params->e820_entries = nr_e820_entries; + memcpy(¶ms->e820_map, &e820_saved.map, + nr_e820_entries * sizeof(struct e820entry)); + + return 0; +} + +static int setup_boot_parameters(struct boot_params *params) +{ + unsigned int nr_e820_entries; + unsigned long long mem_k, start, end; + int i; + + /* Get subarch from existing bootparams */ + params->hdr.hardware_subarch = boot_params.hdr.hardware_subarch; + + /* Copying screen_info will do? */ + memcpy(¶ms->screen_info, &boot_params.screen_info, + sizeof(struct screen_info)); + + /* Fill in memsize later */ + params->screen_info.ext_mem_k = 0; + params->alt_mem_k = 0; + + /* Default APM info */ + memset(¶ms->apm_bios_info, 0, sizeof(params->apm_bios_info)); + + /* Default drive info */ + memset(¶ms->hd0_info, 0, sizeof(params->hd0_info)); + memset(¶ms->hd1_info, 0, sizeof(params->hd1_info)); + + /* Default sysdesc table */ + params->sys_desc_table.length = 0; + + setup_memory_map_entries(params); + nr_e820_entries = params->e820_entries; + + for (i = 0; i < nr_e820_entries; i++) { + if (params->e820_map[i].type != E820_RAM) + continue; + start = params->e820_map[i].addr; + end = params->e820_map[i].addr + params->e820_map[i].size - 1; + + if ((start <= 0x100000) && end > 0x100000) { + mem_k = (end >> 10) - (0x100000 >> 10); + params->screen_info.ext_mem_k = mem_k; + params->alt_mem_k = mem_k; + if (mem_k > 0xfc00) + params->screen_info.ext_mem_k = 0xfc00; /* 64M*/ + if (mem_k > 0xffffffff) + params->alt_mem_k = 0xffffffff; + } + } + + /* Setup EDD info */ + memcpy(params->eddbuf, boot_params.eddbuf, + EDDMAXNR * sizeof(struct edd_info)); + params->eddbuf_entries = boot_params.eddbuf_entries; + + memcpy(params->edd_mbr_sig_buffer, boot_params.edd_mbr_sig_buffer, + EDD_MBR_SIG_MAX * sizeof(unsigned int)); + + return 0; +} + +int bzImage64_probe(const char *buf, unsigned long len) +{ + int ret = -ENOEXEC; + struct setup_header *header; + + /* kernel should be atleast two sectors long */ + if (len < 2 * 512) { + pr_err("File is too short to be a bzImage\n"); + return ret; + } + + header = (struct setup_header *)(buf + offsetof(struct boot_params, hdr)); + if (memcmp((char *)&header->header, "HdrS", 4) != 0) { + pr_err("Not a bzImage\n"); + return ret; + } + + if (header->boot_flag != 0xAA55) { + pr_err("No x86 boot sector present\n"); + return ret; + } + + if (header->version < 0x020C) { + pr_err("Must be at least protocol version 2.12\n"); + return ret; + } + + if (!(header->loadflags & LOADED_HIGH)) { + pr_err("zImage not a bzImage\n"); + return ret; + } + + if (!(header->xloadflags & XLF_KERNEL_64)) { + pr_err("Not a bzImage64. XLF_KERNEL_64 is not set.\n"); + return ret; + } + + if (!(header->xloadflags & XLF_CAN_BE_LOADED_ABOVE_4G)) { + pr_err("XLF_CAN_BE_LOADED_ABOVE_4G is not set.\n"); + return ret; + } + + /* I've got a bzImage */ + pr_debug("It's a relocatable bzImage64\n"); + ret = 0; + + return ret; +} + +void *bzImage64_load(struct kimage *image, char *kernel, + unsigned long kernel_len, char *initrd, + unsigned long initrd_len, char *cmdline, + unsigned long cmdline_len) +{ + + struct setup_header *header; + int setup_sects, kern16_size, ret = 0; + unsigned long setup_header_size, params_cmdline_sz; + struct boot_params *params; + unsigned long bootparam_load_addr, kernel_load_addr, initrd_load_addr; + unsigned long purgatory_load_addr; + unsigned long kernel_bufsz, kernel_memsz, kernel_align; + char *kernel_buf; + struct bzimage64_data *ldata; + struct kexec_entry64_regs regs64; + void *stack; + unsigned int setup_hdr_offset = offsetof(struct boot_params, hdr); + + header = (struct setup_header *)(kernel + setup_hdr_offset); + setup_sects = header->setup_sects; + if (setup_sects == 0) + setup_sects = 4; + + kern16_size = (setup_sects + 1) * 512; + if (kernel_len < kern16_size) { + pr_err("bzImage truncated\n"); + return ERR_PTR(-ENOEXEC); + } + + if (cmdline_len > header->cmdline_size) { + pr_err("Kernel command line too long\n"); + return ERR_PTR(-EINVAL); + } + + /* + * Load purgatory. For 64bit entry point, purgatory code can be + * anywhere. + */ + ret = kexec_load_purgatory(image, MIN_PURGATORY_ADDR, ULONG_MAX, 1, + &purgatory_load_addr); + if (ret) { + pr_err("Loading purgatory failed\n"); + return ERR_PTR(ret); + } + + pr_debug("Loaded purgatory at 0x%lx\n", purgatory_load_addr); + + /* Load Bootparams and cmdline */ + params_cmdline_sz = sizeof(struct boot_params) + cmdline_len; + params = kzalloc(params_cmdline_sz, GFP_KERNEL); + if (!params) + return ERR_PTR(-ENOMEM); + + /* Copy setup header onto bootparams. Documentation/x86/boot.txt */ + setup_header_size = 0x0202 + kernel[0x0201] - setup_hdr_offset; + + /* Is there a limit on setup header size? */ + memcpy(¶ms->hdr, (kernel + setup_hdr_offset), setup_header_size); + + ret = kexec_add_buffer(image, (char *)params, params_cmdline_sz, + params_cmdline_sz, 16, MIN_BOOTPARAM_ADDR, + ULONG_MAX, 1, &bootparam_load_addr); + if (ret) + goto out_free_params; + pr_debug("Loaded boot_param and command line at 0x%lx bufsz=0x%lx memsz=0x%lx\n", + bootparam_load_addr, params_cmdline_sz, params_cmdline_sz); + + /* Load kernel */ + kernel_buf = kernel + kern16_size; + kernel_bufsz = kernel_len - kern16_size; + kernel_memsz = PAGE_ALIGN(header->init_size); + kernel_align = header->kernel_alignment; + + ret = kexec_add_buffer(image, kernel_buf, + kernel_bufsz, kernel_memsz, kernel_align, + MIN_KERNEL_LOAD_ADDR, ULONG_MAX, 1, + &kernel_load_addr); + if (ret) + goto out_free_params; + + pr_debug("Loaded 64bit kernel at 0x%lx bufsz=0x%lx memsz=0x%lx\n", + kernel_load_addr, kernel_memsz, kernel_memsz); + + /* Load initrd high */ + if (initrd) { + ret = kexec_add_buffer(image, initrd, initrd_len, initrd_len, + PAGE_SIZE, MIN_INITRD_LOAD_ADDR, + ULONG_MAX, 1, &initrd_load_addr); + if (ret) + goto out_free_params; + + pr_debug("Loaded initrd at 0x%lx bufsz=0x%lx memsz=0x%lx\n", + initrd_load_addr, initrd_len, initrd_len); + + setup_initrd(params, initrd_load_addr, initrd_len); + } + + setup_cmdline(params, bootparam_load_addr, sizeof(struct boot_params), + cmdline, cmdline_len); + + /* bootloader info. Do we need a separate ID for kexec kernel loader? */ + params->hdr.type_of_loader = 0x0D << 4; + params->hdr.loadflags = 0; + + /* Setup purgatory regs for entry */ + ret = kexec_purgatory_get_set_symbol(image, "entry64_regs", ®s64, + sizeof(regs64), 1); + if (ret) + goto out_free_params; + + regs64.rbx = 0; /* Bootstrap Processor */ + regs64.rsi = bootparam_load_addr; + regs64.rip = kernel_load_addr + 0x200; + stack = kexec_purgatory_get_symbol_addr(image, "stack_end"); + if (IS_ERR(stack)) { + pr_err("Could not find address of symbol stack_end\n"); + ret = -EINVAL; + goto out_free_params; + } + + regs64.rsp = (unsigned long)stack; + ret = kexec_purgatory_get_set_symbol(image, "entry64_regs", ®s64, + sizeof(regs64), 0); + if (ret) + goto out_free_params; + + setup_boot_parameters(params); + + /* Allocate loader specific data */ + ldata = kzalloc(sizeof(struct bzimage64_data), GFP_KERNEL); + if (!ldata) { + ret = -ENOMEM; + goto out_free_params; + } + + /* + * Store pointer to params so that it could be freed after loading + * params segment has been loaded and contents have been copied + * somewhere else. + */ + ldata->bootparams_buf = params; + return ldata; + +out_free_params: + kfree(params); + return ERR_PTR(ret); +} + +/* This cleanup function is called after various segments have been loaded */ +int bzImage64_cleanup(void *loader_data) +{ + struct bzimage64_data *ldata = loader_data; + + if (!ldata) + return 0; + + kfree(ldata->bootparams_buf); + ldata->bootparams_buf = NULL; + + return 0; +} + +struct kexec_file_ops kexec_bzImage64_ops = { + .probe = bzImage64_probe, + .load = bzImage64_load, + .cleanup = bzImage64_cleanup, +}; diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 88404c440727..18d0f9e0b6da 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -23,9 +23,10 @@ #include #include #include +#include static struct kexec_file_ops *kexec_file_loaders[] = { - NULL, + &kexec_bzImage64_ops, }; static void free_transition_pgtable(struct kimage *image) @@ -328,7 +329,7 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image) if (!image->fops || !image->fops->cleanup) return 0; - return image->fops->cleanup(image); + return image->fops->cleanup(image->image_loader_data); } /* diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 84f09e9eca26..9481703b0e7a 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -190,7 +190,7 @@ typedef void *(kexec_load_t)(struct kimage *image, char *kernel_buf, unsigned long kernel_len, char *initrd, unsigned long initrd_len, char *cmdline, unsigned long cmdline_len); -typedef int (kexec_cleanup_t)(struct kimage *image); +typedef int (kexec_cleanup_t)(void *loader_data); struct kexec_file_ops { kexec_probe_t *probe; diff --git a/kernel/kexec.c b/kernel/kexec.c index 669e331aa9ec..0926f2a3ed03 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -460,6 +460,14 @@ static void kimage_file_post_load_cleanup(struct kimage *image) /* See if architecture has anything to cleanup post load */ arch_kimage_file_post_load_cleanup(image); + + /* + * Above call should have called into bootloader to free up + * any data stored in kimage->image_loader_data. It should + * be ok now to free it up. + */ + kfree(image->image_loader_data); + image->image_loader_data = NULL; } /* @@ -576,7 +584,6 @@ out_free_control_pages: kimage_free_page_list(&image->control_pages); out_free_post_load_bufs: kimage_file_post_load_cleanup(image); - kfree(image->image_loader_data); out_free_image: kfree(image); return ret; @@ -900,8 +907,6 @@ static void kimage_free(struct kimage *image) /* Free the kexec control pages... */ kimage_free_page_list(&image->control_pages); - kfree(image->image_loader_data); - /* * Free up any temporary buffers allocated. This might hit if * error occurred much later after buffer allocation. -- cgit v1.2.3 From 6a2c20e7d8900ed273dc34a9af9bf02fc478e427 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Fri, 8 Aug 2014 14:26:11 -0700 Subject: kexec: support kexec/kdump on EFI systems This patch does two things. It passes EFI run time mappings to second kernel in bootparams efi_info. Second kernel parse this info and create new mappings in second kernel. That means mappings in first and second kernel will be same. This paves the way to enable EFI in kexec kernel. This patch also prepares and passes EFI setup data through bootparams. This contains bunch of information about various tables and their addresses. These information gathering and passing has been written along the lines of what current kexec-tools is doing to make kexec work with UEFI. [akpm@linux-foundation.org: s/get_efi/efi_get/g, per Matt] Signed-off-by: Vivek Goyal Cc: Borislav Petkov Cc: Michael Kerrisk Cc: Yinghai Lu Cc: Eric Biederman Cc: H. Peter Anvin Cc: Matthew Garrett Cc: Greg Kroah-Hartman Cc: Dave Young Cc: WANG Chao Cc: Baoquan He Cc: Andy Lutomirski Cc: Matt Fleming Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kernel/kexec-bzimage64.c | 146 ++++++++++++++++++++++++++++++++++--- drivers/firmware/efi/runtime-map.c | 21 ++++++ include/linux/efi.h | 19 +++++ 3 files changed, 174 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c index a8e646458a10..623e6c58081f 100644 --- a/arch/x86/kernel/kexec-bzimage64.c +++ b/arch/x86/kernel/kexec-bzimage64.c @@ -18,10 +18,12 @@ #include #include #include +#include #include #include #include +#include #define MAX_ELFCOREHDR_STR_LEN 30 /* elfcorehdr=0x<64bit-value> */ @@ -90,7 +92,7 @@ static int setup_cmdline(struct kimage *image, struct boot_params *params, return 0; } -static int setup_memory_map_entries(struct boot_params *params) +static int setup_e820_entries(struct boot_params *params) { unsigned int nr_e820_entries; @@ -107,8 +109,93 @@ static int setup_memory_map_entries(struct boot_params *params) return 0; } -static int setup_boot_parameters(struct kimage *image, - struct boot_params *params) +#ifdef CONFIG_EFI +static int setup_efi_info_memmap(struct boot_params *params, + unsigned long params_load_addr, + unsigned int efi_map_offset, + unsigned int efi_map_sz) +{ + void *efi_map = (void *)params + efi_map_offset; + unsigned long efi_map_phys_addr = params_load_addr + efi_map_offset; + struct efi_info *ei = ¶ms->efi_info; + + if (!efi_map_sz) + return 0; + + efi_runtime_map_copy(efi_map, efi_map_sz); + + ei->efi_memmap = efi_map_phys_addr & 0xffffffff; + ei->efi_memmap_hi = efi_map_phys_addr >> 32; + ei->efi_memmap_size = efi_map_sz; + + return 0; +} + +static int +prepare_add_efi_setup_data(struct boot_params *params, + unsigned long params_load_addr, + unsigned int efi_setup_data_offset) +{ + unsigned long setup_data_phys; + struct setup_data *sd = (void *)params + efi_setup_data_offset; + struct efi_setup_data *esd = (void *)sd + sizeof(struct setup_data); + + esd->fw_vendor = efi.fw_vendor; + esd->runtime = efi.runtime; + esd->tables = efi.config_table; + esd->smbios = efi.smbios; + + sd->type = SETUP_EFI; + sd->len = sizeof(struct efi_setup_data); + + /* Add setup data */ + setup_data_phys = params_load_addr + efi_setup_data_offset; + sd->next = params->hdr.setup_data; + params->hdr.setup_data = setup_data_phys; + + return 0; +} + +static int +setup_efi_state(struct boot_params *params, unsigned long params_load_addr, + unsigned int efi_map_offset, unsigned int efi_map_sz, + unsigned int efi_setup_data_offset) +{ + struct efi_info *current_ei = &boot_params.efi_info; + struct efi_info *ei = ¶ms->efi_info; + + if (!current_ei->efi_memmap_size) + return 0; + + /* + * If 1:1 mapping is not enabled, second kernel can not setup EFI + * and use EFI run time services. User space will have to pass + * acpi_rsdp= on kernel command line to make second kernel boot + * without efi. + */ + if (efi_enabled(EFI_OLD_MEMMAP)) + return 0; + + ei->efi_loader_signature = current_ei->efi_loader_signature; + ei->efi_systab = current_ei->efi_systab; + ei->efi_systab_hi = current_ei->efi_systab_hi; + + ei->efi_memdesc_version = current_ei->efi_memdesc_version; + ei->efi_memdesc_size = efi_get_runtime_map_desc_size(); + + setup_efi_info_memmap(params, params_load_addr, efi_map_offset, + efi_map_sz); + prepare_add_efi_setup_data(params, params_load_addr, + efi_setup_data_offset); + return 0; +} +#endif /* CONFIG_EFI */ + +static int +setup_boot_parameters(struct kimage *image, struct boot_params *params, + unsigned long params_load_addr, + unsigned int efi_map_offset, unsigned int efi_map_sz, + unsigned int efi_setup_data_offset) { unsigned int nr_e820_entries; unsigned long long mem_k, start, end; @@ -140,7 +227,7 @@ static int setup_boot_parameters(struct kimage *image, if (ret) return ret; } else - setup_memory_map_entries(params); + setup_e820_entries(params); nr_e820_entries = params->e820_entries; @@ -161,6 +248,12 @@ static int setup_boot_parameters(struct kimage *image, } } +#ifdef CONFIG_EFI + /* Setup EFI state */ + setup_efi_state(params, params_load_addr, efi_map_offset, efi_map_sz, + efi_setup_data_offset); +#endif + /* Setup EDD info */ memcpy(params->eddbuf, boot_params.eddbuf, EDDMAXNR * sizeof(struct edd_info)); @@ -214,6 +307,15 @@ int bzImage64_probe(const char *buf, unsigned long len) return ret; } + /* + * Can't handle 32bit EFI as it does not allow loading kernel + * above 4G. This should be handled by 32bit bzImage loader + */ + if (efi_enabled(EFI_RUNTIME_SERVICES) && !efi_enabled(EFI_64BIT)) { + pr_debug("EFI is 32 bit. Can't load kernel above 4G.\n"); + return ret; + } + /* I've got a bzImage */ pr_debug("It's a relocatable bzImage64\n"); ret = 0; @@ -229,7 +331,7 @@ void *bzImage64_load(struct kimage *image, char *kernel, struct setup_header *header; int setup_sects, kern16_size, ret = 0; - unsigned long setup_header_size, params_cmdline_sz; + unsigned long setup_header_size, params_cmdline_sz, params_misc_sz; struct boot_params *params; unsigned long bootparam_load_addr, kernel_load_addr, initrd_load_addr; unsigned long purgatory_load_addr; @@ -239,6 +341,7 @@ void *bzImage64_load(struct kimage *image, char *kernel, struct kexec_entry64_regs regs64; void *stack; unsigned int setup_hdr_offset = offsetof(struct boot_params, hdr); + unsigned int efi_map_offset, efi_map_sz, efi_setup_data_offset; header = (struct setup_header *)(kernel + setup_hdr_offset); setup_sects = header->setup_sects; @@ -285,12 +388,29 @@ void *bzImage64_load(struct kimage *image, char *kernel, pr_debug("Loaded purgatory at 0x%lx\n", purgatory_load_addr); - /* Load Bootparams and cmdline */ + + /* + * Load Bootparams and cmdline and space for efi stuff. + * + * Allocate memory together for multiple data structures so + * that they all can go in single area/segment and we don't + * have to create separate segment for each. Keeps things + * little bit simple + */ + efi_map_sz = efi_get_runtime_map_size(); + efi_map_sz = ALIGN(efi_map_sz, 16); params_cmdline_sz = sizeof(struct boot_params) + cmdline_len + MAX_ELFCOREHDR_STR_LEN; - params = kzalloc(params_cmdline_sz, GFP_KERNEL); + params_cmdline_sz = ALIGN(params_cmdline_sz, 16); + params_misc_sz = params_cmdline_sz + efi_map_sz + + sizeof(struct setup_data) + + sizeof(struct efi_setup_data); + + params = kzalloc(params_misc_sz, GFP_KERNEL); if (!params) return ERR_PTR(-ENOMEM); + efi_map_offset = params_cmdline_sz; + efi_setup_data_offset = efi_map_offset + efi_map_sz; /* Copy setup header onto bootparams. Documentation/x86/boot.txt */ setup_header_size = 0x0202 + kernel[0x0201] - setup_hdr_offset; @@ -298,13 +418,13 @@ void *bzImage64_load(struct kimage *image, char *kernel, /* Is there a limit on setup header size? */ memcpy(¶ms->hdr, (kernel + setup_hdr_offset), setup_header_size); - ret = kexec_add_buffer(image, (char *)params, params_cmdline_sz, - params_cmdline_sz, 16, MIN_BOOTPARAM_ADDR, + ret = kexec_add_buffer(image, (char *)params, params_misc_sz, + params_misc_sz, 16, MIN_BOOTPARAM_ADDR, ULONG_MAX, 1, &bootparam_load_addr); if (ret) goto out_free_params; - pr_debug("Loaded boot_param and command line at 0x%lx bufsz=0x%lx memsz=0x%lx\n", - bootparam_load_addr, params_cmdline_sz, params_cmdline_sz); + pr_debug("Loaded boot_param, command line and misc at 0x%lx bufsz=0x%lx memsz=0x%lx\n", + bootparam_load_addr, params_misc_sz, params_misc_sz); /* Load kernel */ kernel_buf = kernel + kern16_size; @@ -365,7 +485,9 @@ void *bzImage64_load(struct kimage *image, char *kernel, if (ret) goto out_free_params; - ret = setup_boot_parameters(image, params); + ret = setup_boot_parameters(image, params, bootparam_load_addr, + efi_map_offset, efi_map_sz, + efi_setup_data_offset); if (ret) goto out_free_params; diff --git a/drivers/firmware/efi/runtime-map.c b/drivers/firmware/efi/runtime-map.c index 97cdd16a2169..018c29a26615 100644 --- a/drivers/firmware/efi/runtime-map.c +++ b/drivers/firmware/efi/runtime-map.c @@ -138,6 +138,27 @@ add_sysfs_runtime_map_entry(struct kobject *kobj, int nr) return entry; } +int efi_get_runtime_map_size(void) +{ + return nr_efi_runtime_map * efi_memdesc_size; +} + +int efi_get_runtime_map_desc_size(void) +{ + return efi_memdesc_size; +} + +int efi_runtime_map_copy(void *buf, size_t bufsz) +{ + size_t sz = efi_get_runtime_map_size(); + + if (sz > bufsz) + sz = bufsz; + + memcpy(buf, efi_runtime_map, sz); + return 0; +} + void efi_runtime_map_setup(void *map, int nr_entries, u32 desc_size) { efi_runtime_map = map; diff --git a/include/linux/efi.h b/include/linux/efi.h index efc681fd5895..45cb4ffdea62 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1156,6 +1156,9 @@ int efivars_sysfs_init(void); #ifdef CONFIG_EFI_RUNTIME_MAP int efi_runtime_map_init(struct kobject *); void efi_runtime_map_setup(void *, int, u32); +int efi_get_runtime_map_size(void); +int efi_get_runtime_map_desc_size(void); +int efi_runtime_map_copy(void *buf, size_t bufsz); #else static inline int efi_runtime_map_init(struct kobject *kobj) { @@ -1164,6 +1167,22 @@ static inline int efi_runtime_map_init(struct kobject *kobj) static inline void efi_runtime_map_setup(void *map, int nr_entries, u32 desc_size) {} + +static inline int efi_get_runtime_map_size(void) +{ + return 0; +} + +static inline int efi_get_runtime_map_desc_size(void) +{ + return 0; +} + +static inline int efi_runtime_map_copy(void *buf, size_t bufsz) +{ + return 0; +} + #endif /* prototypes shared between arch specific and generic stub code */ -- cgit v1.2.3 From 8e7d838103feac320baf9e68d73f954840ac1eea Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Fri, 8 Aug 2014 14:26:13 -0700 Subject: kexec: verify the signature of signed PE bzImage This is the final piece of the puzzle of verifying kernel image signature during kexec_file_load() syscall. This patch calls into PE file routines to verify signature of bzImage. If signature are valid, kexec_file_load() succeeds otherwise it fails. Two new config options have been introduced. First one is CONFIG_KEXEC_VERIFY_SIG. This option enforces that kernel has to be validly signed otherwise kernel load will fail. If this option is not set, no signature verification will be done. Only exception will be when secureboot is enabled. In that case signature verification should be automatically enforced when secureboot is enabled. But that will happen when secureboot patches are merged. Second config option is CONFIG_KEXEC_BZIMAGE_VERIFY_SIG. This option enables signature verification support on bzImage. If this option is not set and previous one is set, kernel image loading will fail because kernel does not have support to verify signature of bzImage. I tested these patches with both "pesign" and "sbsign" signed bzImages. I used signing_key.priv key and signing_key.x509 cert for signing as generated during kernel build process (if module signing is enabled). Used following method to sign bzImage. pesign ====== - Convert DER format cert to PEM format cert openssl x509 -in signing_key.x509 -inform DER -out signing_key.x509.PEM -outform PEM - Generate a .p12 file from existing cert and private key file openssl pkcs12 -export -out kernel-key.p12 -inkey signing_key.priv -in signing_key.x509.PEM - Import .p12 file into pesign db pk12util -i /tmp/kernel-key.p12 -d /etc/pki/pesign - Sign bzImage pesign -i /boot/vmlinuz-3.16.0-rc3+ -o /boot/vmlinuz-3.16.0-rc3+.signed.pesign -c "Glacier signing key - Magrathea" -s sbsign ====== sbsign --key signing_key.priv --cert signing_key.x509.PEM --output /boot/vmlinuz-3.16.0-rc3+.signed.sbsign /boot/vmlinuz-3.16.0-rc3+ Patch details: Well all the hard work is done in previous patches. Now bzImage loader has just call into that code and verify whether bzImage signature are valid or not. Also create two config options. First one is CONFIG_KEXEC_VERIFY_SIG. This option enforces that kernel has to be validly signed otherwise kernel load will fail. If this option is not set, no signature verification will be done. Only exception will be when secureboot is enabled. In that case signature verification should be automatically enforced when secureboot is enabled. But that will happen when secureboot patches are merged. Second config option is CONFIG_KEXEC_BZIMAGE_VERIFY_SIG. This option enables signature verification support on bzImage. If this option is not set and previous one is set, kernel image loading will fail because kernel does not have support to verify signature of bzImage. Signed-off-by: Vivek Goyal Cc: Borislav Petkov Cc: Michael Kerrisk Cc: Yinghai Lu Cc: Eric Biederman Cc: H. Peter Anvin Cc: Matthew Garrett Cc: Greg Kroah-Hartman Cc: Dave Young Cc: WANG Chao Cc: Baoquan He Cc: Andy Lutomirski Cc: Matt Fleming Cc: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/Kconfig | 22 ++++++++++++++++++++++ arch/x86/kernel/kexec-bzimage64.c | 21 +++++++++++++++++++++ arch/x86/kernel/machine_kexec_64.c | 11 +++++++++++ include/linux/kexec.h | 3 +++ kernel/kexec.c | 15 +++++++++++++++ 5 files changed, 72 insertions(+) (limited to 'include/linux') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 9558b9fcafbf..4aafd322e21e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1599,6 +1599,28 @@ config KEXEC interface is strongly in flux, so no good recommendation can be made. +config KEXEC_VERIFY_SIG + bool "Verify kernel signature during kexec_file_load() syscall" + depends on KEXEC + ---help--- + This option makes kernel signature verification mandatory for + kexec_file_load() syscall. If kernel is signature can not be + verified, kexec_file_load() will fail. + + This option enforces signature verification at generic level. + One needs to enable signature verification for type of kernel + image being loaded to make sure it works. For example, enable + bzImage signature verification option to be able to load and + verify signatures of bzImage. Otherwise kernel loading will fail. + +config KEXEC_BZIMAGE_VERIFY_SIG + bool "Enable bzImage signature verification support" + depends on KEXEC_VERIFY_SIG + depends on SIGNED_PE_FILE_VERIFICATION + select SYSTEM_TRUSTED_KEYRING + ---help--- + Enable bzImage signature verification support. + config CRASH_DUMP bool "kernel crash dumps" depends on X86_64 || (X86_32 && HIGHMEM) diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c index 623e6c58081f..9642b9b33655 100644 --- a/arch/x86/kernel/kexec-bzimage64.c +++ b/arch/x86/kernel/kexec-bzimage64.c @@ -19,6 +19,8 @@ #include #include #include +#include +#include #include #include @@ -525,8 +527,27 @@ int bzImage64_cleanup(void *loader_data) return 0; } +#ifdef CONFIG_KEXEC_BZIMAGE_VERIFY_SIG +int bzImage64_verify_sig(const char *kernel, unsigned long kernel_len) +{ + bool trusted; + int ret; + + ret = verify_pefile_signature(kernel, kernel_len, + system_trusted_keyring, &trusted); + if (ret < 0) + return ret; + if (!trusted) + return -EKEYREJECTED; + return 0; +} +#endif + struct kexec_file_ops kexec_bzImage64_ops = { .probe = bzImage64_probe, .load = bzImage64_load, .cleanup = bzImage64_cleanup, +#ifdef CONFIG_KEXEC_BZIMAGE_VERIFY_SIG + .verify_sig = bzImage64_verify_sig, +#endif }; diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 9330434da777..8b04018e5d1f 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -372,6 +372,17 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image) return image->fops->cleanup(image->image_loader_data); } +int arch_kexec_kernel_verify_sig(struct kimage *image, void *kernel, + unsigned long kernel_len) +{ + if (!image->fops || !image->fops->verify_sig) { + pr_debug("kernel loader does not support signature verification."); + return -EKEYREJECTED; + } + + return image->fops->verify_sig(kernel, kernel_len); +} + /* * Apply purgatory relocations. * diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 9481703b0e7a..4b2a0e11cc5b 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -191,11 +191,14 @@ typedef void *(kexec_load_t)(struct kimage *image, char *kernel_buf, unsigned long initrd_len, char *cmdline, unsigned long cmdline_len); typedef int (kexec_cleanup_t)(void *loader_data); +typedef int (kexec_verify_sig_t)(const char *kernel_buf, + unsigned long kernel_len); struct kexec_file_ops { kexec_probe_t *probe; kexec_load_t *load; kexec_cleanup_t *cleanup; + kexec_verify_sig_t *verify_sig; }; /* kexec interface functions */ diff --git a/kernel/kexec.c b/kernel/kexec.c index f18c780f9716..0b49a0a58102 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -416,6 +416,12 @@ void __weak arch_kimage_file_post_load_cleanup(struct kimage *image) { } +int __weak arch_kexec_kernel_verify_sig(struct kimage *image, void *buf, + unsigned long buf_len) +{ + return -EKEYREJECTED; +} + /* Apply relocations of type RELA */ int __weak arch_kexec_apply_relocations_add(const Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, @@ -494,6 +500,15 @@ kimage_file_prepare_segments(struct kimage *image, int kernel_fd, int initrd_fd, if (ret) goto out; +#ifdef CONFIG_KEXEC_VERIFY_SIG + ret = arch_kexec_kernel_verify_sig(image, image->kernel_buf, + image->kernel_buf_len); + if (ret) { + pr_debug("kernel signature verification failed.\n"); + goto out; + } + pr_debug("kernel signature verification successful.\n"); +#endif /* It is possible that there no initramfs is being loaded */ if (!(flags & KEXEC_FILE_NO_INITRAMFS)) { ret = copy_file_from_fd(initrd_fd, &image->initrd_buf, -- cgit v1.2.3 From fd3cbdc0d1b5254a2e8793df58c409b469899a3f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 10 Aug 2014 08:53:39 +0200 Subject: jump_label: Fix small typos in the documentation Was reading through the documentation of this code and noticed a few typos, missing commas, etc. Cc: Jason Baron Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Borislav Petkov Cc: Andrew Morton Cc: Linus Torvalds Cc: Thomas Gleixner Cc: Mel Gorman Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/jump_label.h | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 784304b222b3..98f923b6a0ea 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -8,28 +8,28 @@ * Copyright (C) 2011-2012 Peter Zijlstra * * Jump labels provide an interface to generate dynamic branches using - * self-modifying code. Assuming toolchain and architecture support the result - * of a "if (static_key_false(&key))" statement is a unconditional branch (which + * self-modifying code. Assuming toolchain and architecture support, the result + * of a "if (static_key_false(&key))" statement is an unconditional branch (which * defaults to false - and the true block is placed out of line). * * However at runtime we can change the branch target using * static_key_slow_{inc,dec}(). These function as a 'reference' count on the key - * object and for as long as there are references all branches referring to + * object, and for as long as there are references all branches referring to * that particular key will point to the (out of line) true block. * - * Since this relies on modifying code the static_key_slow_{inc,dec}() functions + * Since this relies on modifying code, the static_key_slow_{inc,dec}() functions * must be considered absolute slow paths (machine wide synchronization etc.). - * OTOH, since the affected branches are unconditional their runtime overhead + * OTOH, since the affected branches are unconditional, their runtime overhead * will be absolutely minimal, esp. in the default (off) case where the total * effect is a single NOP of appropriate size. The on case will patch in a jump * to the out-of-line block. * - * When the control is directly exposed to userspace it is prudent to delay the + * When the control is directly exposed to userspace, it is prudent to delay the * decrement to avoid high frequency code modifications which can (and do) * cause significant performance degradation. Struct static_key_deferred and * static_key_slow_dec_deferred() provide for this. * - * Lacking toolchain and or architecture support, it falls back to a simple + * Lacking toolchain and or architecture support, jump labels fall back to a simple * conditional branch. * * struct static_key my_key = STATIC_KEY_INIT_TRUE; @@ -43,8 +43,7 @@ * * Not initializing the key (static data is initialized to 0s anyway) is the * same as using STATIC_KEY_INIT_FALSE. - * -*/ + */ #include #include -- cgit v1.2.3 From 26375b5c8449927f740ce0e837e23f45c951fb80 Mon Sep 17 00:00:00 2001 From: Jaehoon Chung Date: Thu, 7 Aug 2014 16:37:58 +0900 Subject: mmc: dw_mmc: Slot quirk "disable-wp" is deprecated. Slot quirks "disable-wp" is deprecated. Instead, use the host quirk "disable-wp". (Because the slot-node is removed in dt-file.) Signed-off-by: Jaehoon Chung Tested-by: Sachin Kamat Acked-by: Seungwon Jeon Reviewed-by: Doug Anderson Tested-by: Doug Anderson Signed-off-by: Ulf Hansson --- drivers/mmc/host/dw_mmc.c | 11 +++++++++-- include/linux/mmc/dw_mmc.h | 2 ++ 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c index 39cf54f479d9..8f216edbdf08 100644 --- a/drivers/mmc/host/dw_mmc.c +++ b/drivers/mmc/host/dw_mmc.c @@ -996,7 +996,8 @@ static int dw_mci_get_ro(struct mmc_host *mmc) int gpio_ro = mmc_gpio_get_ro(mmc); /* Use platform get_ro function, else try on board write protect */ - if (slot->quirks & DW_MCI_SLOT_QUIRK_NO_WRITE_PROTECT) + if ((slot->quirks & DW_MCI_SLOT_QUIRK_NO_WRITE_PROTECT) || + (slot->host->quirks & DW_MCI_QUIRK_NO_WRITE_PROTECT)) read_only = 0; else if (!IS_ERR_VALUE(gpio_ro)) read_only = gpio_ro; @@ -2014,8 +2015,11 @@ static int dw_mci_of_get_slot_quirks(struct device *dev, u8 slot) /* get quirks */ for (idx = 0; idx < ARRAY_SIZE(of_slot_quirks); idx++) - if (of_get_property(np, of_slot_quirks[idx].quirk, NULL)) + if (of_get_property(np, of_slot_quirks[idx].quirk, NULL)) { + dev_warn(dev, "Slot quirk %s is deprecated\n", + of_slot_quirks[idx].quirk); quirks |= of_slot_quirks[idx].id; + } return quirks; } @@ -2279,6 +2283,9 @@ static struct dw_mci_of_quirks { { .quirk = "broken-cd", .id = DW_MCI_QUIRK_BROKEN_CARD_DETECTION, + }, { + .quirk = "disable-wp", + .id = DW_MCI_QUIRK_NO_WRITE_PROTECT, }, }; diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h index babaea93bca6..29ce014ab421 100644 --- a/include/linux/mmc/dw_mmc.h +++ b/include/linux/mmc/dw_mmc.h @@ -213,6 +213,8 @@ struct dw_mci_dma_ops { #define DW_MCI_QUIRK_HIGHSPEED BIT(2) /* Unreliable card detection */ #define DW_MCI_QUIRK_BROKEN_CARD_DETECTION BIT(3) +/* No write protect */ +#define DW_MCI_QUIRK_NO_WRITE_PROTECT BIT(4) /* Slot level quirks */ /* This slot has no write protect */ -- cgit v1.2.3 From 0d5501c1c828fb97d02af50aa9d2b1a5498b94e4 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Fri, 8 Aug 2014 14:42:13 -0400 Subject: net: Always untag vlan-tagged traffic on input. Currently the functionality to untag traffic on input resides as part of the vlan module and is build only when VLAN support is enabled in the kernel. When VLAN is disabled, the function vlan_untag() turns into a stub and doesn't really untag the packets. This seems to create an interesting interaction between VMs supporting checksum offloading and some network drivers. There are some drivers that do not allow the user to change tx-vlan-offload feature of the driver. These drivers also seem to assume that any VLAN-tagged traffic they transmit will have the vlan information in the vlan_tci and not in the vlan header already in the skb. When transmitting skbs that already have tagged data with partial checksum set, the checksum doesn't appear to be updated correctly by the card thus resulting in a failure to establish TCP connections. The following is a packet trace taken on the receiver where a sender is a VM with a VLAN configued. The host VM is running on doest not have VLAN support and the outging interface on the host is tg3: 10:12:43.503055 52:54:00:ae:42:3f > 28:d2:44:7d:c2:de, ethertype 802.1Q (0x8100), length 78: vlan 100, p 0, ethertype IPv4, (tos 0x0, ttl 64, id 27243, offset 0, flags [DF], proto TCP (6), length 60) 10.0.100.1.58545 > 10.0.100.10.ircu-2: Flags [S], cksum 0xdc39 (incorrect -> 0x48d9), seq 1069378582, win 29200, options [mss 1460,sackOK,TS val 4294837885 ecr 0,nop,wscale 7], length 0 10:12:44.505556 52:54:00:ae:42:3f > 28:d2:44:7d:c2:de, ethertype 802.1Q (0x8100), length 78: vlan 100, p 0, ethertype IPv4, (tos 0x0, ttl 64, id 27244, offset 0, flags [DF], proto TCP (6), length 60) 10.0.100.1.58545 > 10.0.100.10.ircu-2: Flags [S], cksum 0xdc39 (incorrect -> 0x44ee), seq 1069378582, win 29200, options [mss 1460,sackOK,TS val 4294838888 ecr 0,nop,wscale 7], length 0 This connection finally times out. I've only access to the TG3 hardware in this configuration thus have only tested this with TG3 driver. There are a lot of other drivers that do not permit user changes to vlan acceleration features, and I don't know if they all suffere from a similar issue. The patch attempt to fix this another way. It moves the vlan header stipping code out of the vlan module and always builds it into the kernel network core. This way, even if vlan is not supported on a virtualizatoin host, the virtual machines running on top of such host will still work with VLANs enabled. CC: Patrick McHardy CC: Nithin Nayak Sujir CC: Michael Chan CC: Jiri Pirko Signed-off-by: Vladislav Yasevich Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 6 ------ include/linux/skbuff.h | 1 + net/8021q/vlan_core.c | 53 ------------------------------------------------- net/bridge/br_vlan.c | 2 +- net/core/dev.c | 2 +- net/core/skbuff.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 56 insertions(+), 61 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 4967916fe4ac..d69f0577a319 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -187,7 +187,6 @@ vlan_dev_get_egress_qos_mask(struct net_device *dev, u32 skprio) } extern bool vlan_do_receive(struct sk_buff **skb); -extern struct sk_buff *vlan_untag(struct sk_buff *skb); extern int vlan_vid_add(struct net_device *dev, __be16 proto, u16 vid); extern void vlan_vid_del(struct net_device *dev, __be16 proto, u16 vid); @@ -241,11 +240,6 @@ static inline bool vlan_do_receive(struct sk_buff **skb) return false; } -static inline struct sk_buff *vlan_untag(struct sk_buff *skb) -{ - return skb; -} - static inline int vlan_vid_add(struct net_device *dev, __be16 proto, u16 vid) { return 0; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 11c270551d25..abde271c18ae 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2555,6 +2555,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen); void skb_scrub_packet(struct sk_buff *skb, bool xnet); unsigned int skb_gso_transport_seglen(const struct sk_buff *skb); struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features); +struct sk_buff *skb_vlan_untag(struct sk_buff *skb); struct skb_checksum_ops { __wsum (*update)(const void *mem, int len, __wsum wsum); diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 75d427763992..90cc2bdd4064 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -112,59 +112,6 @@ __be16 vlan_dev_vlan_proto(const struct net_device *dev) } EXPORT_SYMBOL(vlan_dev_vlan_proto); -static struct sk_buff *vlan_reorder_header(struct sk_buff *skb) -{ - if (skb_cow(skb, skb_headroom(skb)) < 0) { - kfree_skb(skb); - return NULL; - } - - memmove(skb->data - ETH_HLEN, skb->data - VLAN_ETH_HLEN, 2 * ETH_ALEN); - skb->mac_header += VLAN_HLEN; - return skb; -} - -struct sk_buff *vlan_untag(struct sk_buff *skb) -{ - struct vlan_hdr *vhdr; - u16 vlan_tci; - - if (unlikely(vlan_tx_tag_present(skb))) { - /* vlan_tci is already set-up so leave this for another time */ - return skb; - } - - skb = skb_share_check(skb, GFP_ATOMIC); - if (unlikely(!skb)) - goto err_free; - - if (unlikely(!pskb_may_pull(skb, VLAN_HLEN))) - goto err_free; - - vhdr = (struct vlan_hdr *) skb->data; - vlan_tci = ntohs(vhdr->h_vlan_TCI); - __vlan_hwaccel_put_tag(skb, skb->protocol, vlan_tci); - - skb_pull_rcsum(skb, VLAN_HLEN); - vlan_set_encap_proto(skb, vhdr); - - skb = vlan_reorder_header(skb); - if (unlikely(!skb)) - goto err_free; - - skb_reset_network_header(skb); - skb_reset_transport_header(skb); - skb_reset_mac_len(skb); - - return skb; - -err_free: - kfree_skb(skb); - return NULL; -} -EXPORT_SYMBOL(vlan_untag); - - /* * vlan info and vid list */ diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index febb0f87fa37..e1bcd653899b 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -181,7 +181,7 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v, */ if (unlikely(!vlan_tx_tag_present(skb) && skb->protocol == proto)) { - skb = vlan_untag(skb); + skb = skb_vlan_untag(skb); if (unlikely(!skb)) return false; } diff --git a/net/core/dev.c b/net/core/dev.c index 1c15b189c52b..b65a5051361f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3602,7 +3602,7 @@ another_round: if (skb->protocol == cpu_to_be16(ETH_P_8021Q) || skb->protocol == cpu_to_be16(ETH_P_8021AD)) { - skb = vlan_untag(skb); + skb = skb_vlan_untag(skb); if (unlikely(!skb)) goto unlock; } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 224506a6fa80..163b673f9e62 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -62,6 +62,7 @@ #include #include #include +#include #include #include @@ -3973,3 +3974,55 @@ unsigned int skb_gso_transport_seglen(const struct sk_buff *skb) return shinfo->gso_size; } EXPORT_SYMBOL_GPL(skb_gso_transport_seglen); + +static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb) +{ + if (skb_cow(skb, skb_headroom(skb)) < 0) { + kfree_skb(skb); + return NULL; + } + + memmove(skb->data - ETH_HLEN, skb->data - VLAN_ETH_HLEN, 2 * ETH_ALEN); + skb->mac_header += VLAN_HLEN; + return skb; +} + +struct sk_buff *skb_vlan_untag(struct sk_buff *skb) +{ + struct vlan_hdr *vhdr; + u16 vlan_tci; + + if (unlikely(vlan_tx_tag_present(skb))) { + /* vlan_tci is already set-up so leave this for another time */ + return skb; + } + + skb = skb_share_check(skb, GFP_ATOMIC); + if (unlikely(!skb)) + goto err_free; + + if (unlikely(!pskb_may_pull(skb, VLAN_HLEN))) + goto err_free; + + vhdr = (struct vlan_hdr *)skb->data; + vlan_tci = ntohs(vhdr->h_vlan_TCI); + __vlan_hwaccel_put_tag(skb, skb->protocol, vlan_tci); + + skb_pull_rcsum(skb, VLAN_HLEN); + vlan_set_encap_proto(skb, vhdr); + + skb = skb_reorder_vlan_header(skb); + if (unlikely(!skb)) + goto err_free; + + skb_reset_network_header(skb); + skb_reset_transport_header(skb); + skb_reset_mac_len(skb); + + return skb; + +err_free: + kfree_skb(skb); + return NULL; +} +EXPORT_SYMBOL(skb_vlan_untag); -- cgit v1.2.3 From 14c4000a88afaaa2d0877cc86d42a74fde0f35e0 Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Sat, 9 Aug 2014 11:15:30 +0530 Subject: printk: Add function to return log buffer address and size Platforms like IBM Power Systems supports service processor assisted dump. It provides interface to add memory region to be captured when system is crashed. During initialization/running we can add kernel memory region to be collected. Presently we don't have a way to get the log buffer base address and size. This patch adds support to return log buffer address and size. Signed-off-by: Vasant Hegde Signed-off-by: Benjamin Herrenschmidt Acked-by: Andrew Morton --- include/linux/printk.h | 3 +++ kernel/printk/printk.c | 12 ++++++++++++ 2 files changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/printk.h b/include/linux/printk.h index 0990997a5304..d78125f73ac4 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -10,6 +10,9 @@ extern const char linux_banner[]; extern const char linux_proc_banner[]; +extern char *log_buf_addr_get(void); +extern u32 log_buf_len_get(void); + static inline int printk_get_level(const char *buffer) { if (buffer[0] == KERN_SOH_ASCII && buffer[1]) { diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index de1a6bb6861d..e04c455a0e38 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -272,6 +272,18 @@ static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN); static char *log_buf = __log_buf; static u32 log_buf_len = __LOG_BUF_LEN; +/* Return log buffer address */ +char *log_buf_addr_get(void) +{ + return log_buf; +} + +/* Return log buffer size */ +u32 log_buf_len_get(void) +{ + return log_buf_len; +} + /* human readable text of the record */ static char *log_text(const struct printk_log *msg) { -- cgit v1.2.3 From fadfe7be6e50de7f03913833b33c56cd8fb66bac Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 1 Aug 2014 14:33:02 +0200 Subject: perf: Add queued work to remove orphaned child events In cases when the owner task exits before the workload and the workload made some forks, all the events stay in until the last workload process exits. Thats' because each child event holds parent reference. We want to release all children events once the parent is gone, because at that time there's no process to read them anyway, so they're just eating resources. This removal races with process exit, which removes all events and fork, which clone events. To be clear of those two, adding work queue to remove orphaned child for context in case such event is detected. Using delayed work queue (with delay == 1), because we queue this work under perf scheduler callbacks. Normal work queue tries to wake up the queue process, which deadlocks on rq->lock in this place. Also preventing clones from abandoned parent event. Signed-off-by: Jiri Olsa Signed-off-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Corey Ashford Cc: Frederic Weisbecker Cc: Mark Rutland Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Corey Ashford Cc: Frederic Weisbecker Cc: Mark Rutland Cc: Linus Torvalds Link: http://lkml.kernel.org/r/1406896382-18404-4-git-send-email-jolsa@kernel.org Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 4 +++ kernel/events/core.c | 87 +++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 90 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 707617a8c0f6..ef5b62bdb103 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -52,6 +52,7 @@ struct perf_guest_info_callbacks { #include #include #include +#include #include struct perf_callchain_entry { @@ -507,6 +508,9 @@ struct perf_event_context { int nr_cgroups; /* cgroup evts */ int nr_branch_stack; /* branch_stack evt */ struct rcu_head rcu_head; + + struct delayed_work orphans_remove; + bool orphans_remove_sched; }; /* diff --git a/kernel/events/core.c b/kernel/events/core.c index bbb3ca22f07c..a25460559b4f 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -46,6 +46,8 @@ #include +static struct workqueue_struct *perf_wq; + struct remote_function_call { struct task_struct *p; int (*func)(void *info); @@ -1381,6 +1383,45 @@ out: perf_event__header_size(tmp); } +/* + * User event without the task. + */ +static bool is_orphaned_event(struct perf_event *event) +{ + return event && !is_kernel_event(event) && !event->owner; +} + +/* + * Event has a parent but parent's task finished and it's + * alive only because of children holding refference. + */ +static bool is_orphaned_child(struct perf_event *event) +{ + return is_orphaned_event(event->parent); +} + +static void orphans_remove_work(struct work_struct *work); + +static void schedule_orphans_remove(struct perf_event_context *ctx) +{ + if (!ctx->task || ctx->orphans_remove_sched || !perf_wq) + return; + + if (queue_delayed_work(perf_wq, &ctx->orphans_remove, 1)) { + get_ctx(ctx); + ctx->orphans_remove_sched = true; + } +} + +static int __init perf_workqueue_init(void) +{ + perf_wq = create_singlethread_workqueue("perf"); + WARN(!perf_wq, "failed to create perf workqueue\n"); + return perf_wq ? 0 : -1; +} + +core_initcall(perf_workqueue_init); + static inline int event_filter_match(struct perf_event *event) { @@ -1430,6 +1471,9 @@ event_sched_out(struct perf_event *event, if (event->attr.exclusive || !cpuctx->active_oncpu) cpuctx->exclusive = 0; + if (is_orphaned_child(event)) + schedule_orphans_remove(ctx); + perf_pmu_enable(event->pmu); } @@ -1732,6 +1776,9 @@ event_sched_in(struct perf_event *event, if (event->attr.exclusive) cpuctx->exclusive = 1; + if (is_orphaned_child(event)) + schedule_orphans_remove(ctx); + out: perf_pmu_enable(event->pmu); @@ -3074,6 +3121,7 @@ static void __perf_event_init_context(struct perf_event_context *ctx) INIT_LIST_HEAD(&ctx->flexible_groups); INIT_LIST_HEAD(&ctx->event_list); atomic_set(&ctx->refcount, 1); + INIT_DELAYED_WORK(&ctx->orphans_remove, orphans_remove_work); } static struct perf_event_context * @@ -3405,6 +3453,42 @@ static int perf_release(struct inode *inode, struct file *file) return 0; } +/* + * Remove all orphanes events from the context. + */ +static void orphans_remove_work(struct work_struct *work) +{ + struct perf_event_context *ctx; + struct perf_event *event, *tmp; + + ctx = container_of(work, struct perf_event_context, + orphans_remove.work); + + mutex_lock(&ctx->mutex); + list_for_each_entry_safe(event, tmp, &ctx->event_list, event_entry) { + struct perf_event *parent_event = event->parent; + + if (!is_orphaned_child(event)) + continue; + + perf_remove_from_context(event, true); + + mutex_lock(&parent_event->child_mutex); + list_del_init(&event->child_list); + mutex_unlock(&parent_event->child_mutex); + + free_event(event); + put_event(parent_event); + } + + raw_spin_lock_irq(&ctx->lock); + ctx->orphans_remove_sched = false; + raw_spin_unlock_irq(&ctx->lock); + mutex_unlock(&ctx->mutex); + + put_ctx(ctx); +} + u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) { struct perf_event *child; @@ -7709,7 +7793,8 @@ inherit_event(struct perf_event *parent_event, if (IS_ERR(child_event)) return child_event; - if (!atomic_long_inc_not_zero(&parent_event->refcount)) { + if (is_orphaned_event(parent_event) || + !atomic_long_inc_not_zero(&parent_event->refcount)) { free_event(child_event); return NULL; } -- cgit v1.2.3 From 770eee1fd38c70a009b321f5dbe64358f42511fd Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Mon, 11 Aug 2014 21:27:12 +0200 Subject: perf/x86: Fix data source encoding issues for load latency/precise store This patch fixes issues introuduce by Andi's previous patch 'Revamp PEBS' series. This patch fixes the following: - precise_store_data_hsw() encode the mem op type whenever we can - precise_store_data_hsw set the default data source correctly - 0 is not a valid init value for data source. Define PERF_MEM_NA as the default value This bug was actually introduced by commit 722e76e60f2775c21b087ff12c5e678cf0ebcaaf Author: Stephane Eranian Date: Thu May 15 17:56:44 2014 +0200 fix Haswell precise store data source encoding Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1407785233-32193-4-git-send-email-eranian@google.com Cc: Arnaldo Carvalho de Melo Cc: ak@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_ds.c | 11 +++++++---- include/linux/perf_event.h | 9 ++++++++- 2 files changed, 15 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index a9b60f32064f..67919ce0f76a 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -113,9 +113,12 @@ static u64 precise_store_data_hsw(struct perf_event *event, u64 status) union perf_mem_data_src dse; u64 cfg = event->hw.config & INTEL_ARCH_EVENT_MASK; - dse.val = 0; - dse.mem_op = PERF_MEM_OP_NA; - dse.mem_lvl = PERF_MEM_LVL_NA; + dse.val = PERF_MEM_NA; + + if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW) + dse.mem_op = PERF_MEM_OP_STORE; + else if (event->hw.flags & PERF_X86_EVENT_PEBS_LD_HSW) + dse.mem_op = PERF_MEM_OP_LOAD; /* * L1 info only valid for following events: @@ -126,7 +129,7 @@ static u64 precise_store_data_hsw(struct perf_event *event, u64 status) * MEM_UOPS_RETIRED.ALL_STORES */ if (cfg != 0x12d0 && cfg != 0x22d0 && cfg != 0x42d0 && cfg != 0x82d0) - return dse.mem_lvl; + return dse.val; if (status & 1) dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index ef5b62bdb103..f0a1036b1911 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -608,6 +608,13 @@ struct perf_sample_data { u64 txn; }; +/* default value for data source */ +#define PERF_MEM_NA (PERF_MEM_S(OP, NA) |\ + PERF_MEM_S(LVL, NA) |\ + PERF_MEM_S(SNOOP, NA) |\ + PERF_MEM_S(LOCK, NA) |\ + PERF_MEM_S(TLB, NA)) + static inline void perf_sample_data_init(struct perf_sample_data *data, u64 addr, u64 period) { @@ -620,7 +627,7 @@ static inline void perf_sample_data_init(struct perf_sample_data *data, data->regs_user.regs = NULL; data->stack_user_size = 0; data->weight = 0; - data->data_src.val = 0; + data->data_src.val = PERF_MEM_NA; data->txn = 0; } -- cgit v1.2.3 From 2e39465abc4b7856a0ea6fcf4f6b4668bb5db877 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 4 Aug 2014 12:07:15 +0200 Subject: locking: Remove deprecated smp_mb__() barriers Its been a while and there are no in-tree users left, so remove the deprecated barriers. Signed-off-by: Peter Zijlstra Cc: Chen, Gong Cc: Jacob Pan Cc: Joe Perches Cc: John Sullivan Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Srinivas Pandruvada Cc: Theodore Ts'o Signed-off-by: Ingo Molnar --- include/linux/atomic.h | 36 ------------------------------------ include/linux/bitops.h | 20 -------------------- kernel/sched/core.c | 16 ---------------- 3 files changed, 72 deletions(-) (limited to 'include/linux') diff --git a/include/linux/atomic.h b/include/linux/atomic.h index fef3a809e7cf..5b08a8540ecf 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -3,42 +3,6 @@ #define _LINUX_ATOMIC_H #include -/* - * Provide __deprecated wrappers for the new interface, avoid flag day changes. - * We need the ugly external functions to break header recursion hell. - */ -#ifndef smp_mb__before_atomic_inc -static inline void __deprecated smp_mb__before_atomic_inc(void) -{ - extern void __smp_mb__before_atomic(void); - __smp_mb__before_atomic(); -} -#endif - -#ifndef smp_mb__after_atomic_inc -static inline void __deprecated smp_mb__after_atomic_inc(void) -{ - extern void __smp_mb__after_atomic(void); - __smp_mb__after_atomic(); -} -#endif - -#ifndef smp_mb__before_atomic_dec -static inline void __deprecated smp_mb__before_atomic_dec(void) -{ - extern void __smp_mb__before_atomic(void); - __smp_mb__before_atomic(); -} -#endif - -#ifndef smp_mb__after_atomic_dec -static inline void __deprecated smp_mb__after_atomic_dec(void) -{ - extern void __smp_mb__after_atomic(void); - __smp_mb__after_atomic(); -} -#endif - /** * atomic_add_unless - add unless the number is already a given value * @v: pointer of type atomic_t diff --git a/include/linux/bitops.h b/include/linux/bitops.h index cbc5833fb221..be5fd38bd5a0 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -32,26 +32,6 @@ extern unsigned long __sw_hweight64(__u64 w); */ #include -/* - * Provide __deprecated wrappers for the new interface, avoid flag day changes. - * We need the ugly external functions to break header recursion hell. - */ -#ifndef smp_mb__before_clear_bit -static inline void __deprecated smp_mb__before_clear_bit(void) -{ - extern void __smp_mb__before_atomic(void); - __smp_mb__before_atomic(); -} -#endif - -#ifndef smp_mb__after_clear_bit -static inline void __deprecated smp_mb__after_clear_bit(void) -{ - extern void __smp_mb__after_atomic(void); - __smp_mb__after_atomic(); -} -#endif - #define for_each_set_bit(bit, addr, size) \ for ((bit) = find_first_bit((addr), (size)); \ (bit) < (size); \ diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 1211575a2208..76c518c9b3a7 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -90,22 +90,6 @@ #define CREATE_TRACE_POINTS #include -#ifdef smp_mb__before_atomic -void __smp_mb__before_atomic(void) -{ - smp_mb__before_atomic(); -} -EXPORT_SYMBOL(__smp_mb__before_atomic); -#endif - -#ifdef smp_mb__after_atomic -void __smp_mb__after_atomic(void) -{ - smp_mb__after_atomic(); -} -EXPORT_SYMBOL(__smp_mb__after_atomic); -#endif - void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period) { unsigned long delta; -- cgit v1.2.3 From 7608a43d8f2e02f8b532f8e11481d7ecf8b5d3f9 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Wed, 30 Jul 2014 13:41:54 -0700 Subject: locking/mutexes: Use MUTEX_SPIN_ON_OWNER when appropriate 4badad35 ("locking/mutex: Disable optimistic spinning on some architectures") added a ARCH_SUPPORTS_ATOMIC_RMW flag to disable the mutex optimistic feature on specific archs. Because CONFIG_MUTEX_SPIN_ON_OWNER only depended on DEBUG and SMP, it was ok to have the ->owner field conditional a bit flexible. However by adding a new variable to the matter, we can waste space with the unused field, ie: CONFIG_SMP && (!CONFIG_MUTEX_SPIN_ON_OWNER && !CONFIG_DEBUG_MUTEX). Signed-off-by: Davidlohr Bueso Acked-by: Jason Low Signed-off-by: Peter Zijlstra Cc: aswin@hp.com Cc: Davidlohr Bueso Cc: Heiko Carstens Cc: Jason Low Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Tim Chen Link: http://lkml.kernel.org/r/1406752916-3341-5-git-send-email-davidlohr@hp.com Signed-off-by: Ingo Molnar --- include/linux/mutex.h | 2 +- kernel/locking/mutex.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 8d5535c58cc2..e4c29418f407 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -52,7 +52,7 @@ struct mutex { atomic_t count; spinlock_t wait_lock; struct list_head wait_list; -#if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_SMP) +#if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_MUTEX_SPIN_ON_OWNER) struct task_struct *owner; #endif #ifdef CONFIG_MUTEX_SPIN_ON_OWNER diff --git a/kernel/locking/mutex.h b/kernel/locking/mutex.h index 4115fbf83b12..5cda397607f2 100644 --- a/kernel/locking/mutex.h +++ b/kernel/locking/mutex.h @@ -16,7 +16,7 @@ #define mutex_remove_waiter(lock, waiter, ti) \ __list_del((waiter)->list.prev, (waiter)->list.next) -#ifdef CONFIG_SMP +#ifdef CONFIG_MUTEX_SPIN_ON_OWNER static inline void mutex_set_owner(struct mutex *lock) { lock->owner = current; -- cgit v1.2.3 From 214e0aed639ef40987bf6159fad303171a6de31e Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Wed, 30 Jul 2014 13:41:55 -0700 Subject: locking/Documentation: Move locking related docs into Documentation/locking/ Specifically: Documentation/locking/lockdep-design.txt Documentation/locking/lockstat.txt Documentation/locking/mutex-design.txt Documentation/locking/rt-mutex-design.txt Documentation/locking/rt-mutex.txt Documentation/locking/spinlocks.txt Documentation/locking/ww-mutex-design.txt Signed-off-by: Davidlohr Bueso Acked-by: Randy Dunlap Signed-off-by: Peter Zijlstra Cc: jason.low2@hp.com Cc: aswin@hp.com Cc: Alexei Starovoitov Cc: Al Viro Cc: Andrew Morton Cc: Chris Mason Cc: Dan Streetman Cc: David Airlie Cc: Davidlohr Bueso Cc: David S. Miller Cc: Greg Kroah-Hartman Cc: Heiko Carstens Cc: Jason Low Cc: Josef Bacik Cc: Kees Cook Cc: Linus Torvalds Cc: Lubomir Rintel Cc: Masanari Iida Cc: Paul E. McKenney Cc: Randy Dunlap Cc: Tim Chen Cc: Vineet Gupta Cc: fengguang.wu@intel.com Link: http://lkml.kernel.org/r/1406752916-3341-6-git-send-email-davidlohr@hp.com Signed-off-by: Ingo Molnar --- Documentation/00-INDEX | 2 + Documentation/DocBook/kernel-locking.tmpl | 2 +- Documentation/lockdep-design.txt | 286 ----------- Documentation/locking/lockdep-design.txt | 286 +++++++++++ Documentation/locking/lockstat.txt | 178 +++++++ Documentation/locking/mutex-design.txt | 157 ++++++ Documentation/locking/rt-mutex-design.txt | 781 ++++++++++++++++++++++++++++++ Documentation/locking/rt-mutex.txt | 79 +++ Documentation/locking/spinlocks.txt | 167 +++++++ Documentation/locking/ww-mutex-design.txt | 344 +++++++++++++ Documentation/lockstat.txt | 178 ------- Documentation/mutex-design.txt | 157 ------ Documentation/rt-mutex-design.txt | 781 ------------------------------ Documentation/rt-mutex.txt | 79 --- Documentation/spinlocks.txt | 167 ------- Documentation/ww-mutex-design.txt | 344 ------------- MAINTAINERS | 4 +- drivers/gpu/drm/drm_modeset_lock.c | 2 +- include/linux/lockdep.h | 2 +- include/linux/mutex.h | 2 +- include/linux/rwsem.h | 2 +- kernel/locking/mutex.c | 2 +- kernel/locking/rtmutex.c | 2 +- lib/Kconfig.debug | 4 +- 24 files changed, 2005 insertions(+), 2003 deletions(-) delete mode 100644 Documentation/lockdep-design.txt create mode 100644 Documentation/locking/lockdep-design.txt create mode 100644 Documentation/locking/lockstat.txt create mode 100644 Documentation/locking/mutex-design.txt create mode 100644 Documentation/locking/rt-mutex-design.txt create mode 100644 Documentation/locking/rt-mutex.txt create mode 100644 Documentation/locking/spinlocks.txt create mode 100644 Documentation/locking/ww-mutex-design.txt delete mode 100644 Documentation/lockstat.txt delete mode 100644 Documentation/mutex-design.txt delete mode 100644 Documentation/rt-mutex-design.txt delete mode 100644 Documentation/rt-mutex.txt delete mode 100644 Documentation/spinlocks.txt delete mode 100644 Documentation/ww-mutex-design.txt (limited to 'include/linux') diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX index 27e67a98b7be..1750fcef1ab4 100644 --- a/Documentation/00-INDEX +++ b/Documentation/00-INDEX @@ -287,6 +287,8 @@ local_ops.txt - semantics and behavior of local atomic operations. lockdep-design.txt - documentation on the runtime locking correctness validator. +locking/ + - directory with info about kernel locking primitives lockstat.txt - info on collecting statistics on locks (and contention). lockup-watchdogs.txt diff --git a/Documentation/DocBook/kernel-locking.tmpl b/Documentation/DocBook/kernel-locking.tmpl index e584ee12a1e7..7c9cc4846cb6 100644 --- a/Documentation/DocBook/kernel-locking.tmpl +++ b/Documentation/DocBook/kernel-locking.tmpl @@ -1972,7 +1972,7 @@ machines due to caching. - Documentation/spinlocks.txt: + Documentation/locking/spinlocks.txt: Linus Torvalds' spinlocking tutorial in the kernel sources. diff --git a/Documentation/lockdep-design.txt b/Documentation/lockdep-design.txt deleted file mode 100644 index 5dbc99c04f6e..000000000000 --- a/Documentation/lockdep-design.txt +++ /dev/null @@ -1,286 +0,0 @@ -Runtime locking correctness validator -===================================== - -started by Ingo Molnar -additions by Arjan van de Ven - -Lock-class ----------- - -The basic object the validator operates upon is a 'class' of locks. - -A class of locks is a group of locks that are logically the same with -respect to locking rules, even if the locks may have multiple (possibly -tens of thousands of) instantiations. For example a lock in the inode -struct is one class, while each inode has its own instantiation of that -lock class. - -The validator tracks the 'state' of lock-classes, and it tracks -dependencies between different lock-classes. The validator maintains a -rolling proof that the state and the dependencies are correct. - -Unlike an lock instantiation, the lock-class itself never goes away: when -a lock-class is used for the first time after bootup it gets registered, -and all subsequent uses of that lock-class will be attached to this -lock-class. - -State ------ - -The validator tracks lock-class usage history into 4n + 1 separate state bits: - -- 'ever held in STATE context' -- 'ever held as readlock in STATE context' -- 'ever held with STATE enabled' -- 'ever held as readlock with STATE enabled' - -Where STATE can be either one of (kernel/lockdep_states.h) - - hardirq - - softirq - - reclaim_fs - -- 'ever used' [ == !unused ] - -When locking rules are violated, these state bits are presented in the -locking error messages, inside curlies. A contrived example: - - modprobe/2287 is trying to acquire lock: - (&sio_locks[i].lock){-.-...}, at: [] mutex_lock+0x21/0x24 - - but task is already holding lock: - (&sio_locks[i].lock){-.-...}, at: [] mutex_lock+0x21/0x24 - - -The bit position indicates STATE, STATE-read, for each of the states listed -above, and the character displayed in each indicates: - - '.' acquired while irqs disabled and not in irq context - '-' acquired in irq context - '+' acquired with irqs enabled - '?' acquired in irq context with irqs enabled. - -Unused mutexes cannot be part of the cause of an error. - - -Single-lock state rules: ------------------------- - -A softirq-unsafe lock-class is automatically hardirq-unsafe as well. The -following states are exclusive, and only one of them is allowed to be -set for any lock-class: - - and - and - -The validator detects and reports lock usage that violate these -single-lock state rules. - -Multi-lock dependency rules: ----------------------------- - -The same lock-class must not be acquired twice, because this could lead -to lock recursion deadlocks. - -Furthermore, two locks may not be taken in different order: - - -> - -> - -because this could lead to lock inversion deadlocks. (The validator -finds such dependencies in arbitrary complexity, i.e. there can be any -other locking sequence between the acquire-lock operations, the -validator will still track all dependencies between locks.) - -Furthermore, the following usage based lock dependencies are not allowed -between any two lock-classes: - - -> - -> - -The first rule comes from the fact the a hardirq-safe lock could be -taken by a hardirq context, interrupting a hardirq-unsafe lock - and -thus could result in a lock inversion deadlock. Likewise, a softirq-safe -lock could be taken by an softirq context, interrupting a softirq-unsafe -lock. - -The above rules are enforced for any locking sequence that occurs in the -kernel: when acquiring a new lock, the validator checks whether there is -any rule violation between the new lock and any of the held locks. - -When a lock-class changes its state, the following aspects of the above -dependency rules are enforced: - -- if a new hardirq-safe lock is discovered, we check whether it - took any hardirq-unsafe lock in the past. - -- if a new softirq-safe lock is discovered, we check whether it took - any softirq-unsafe lock in the past. - -- if a new hardirq-unsafe lock is discovered, we check whether any - hardirq-safe lock took it in the past. - -- if a new softirq-unsafe lock is discovered, we check whether any - softirq-safe lock took it in the past. - -(Again, we do these checks too on the basis that an interrupt context -could interrupt _any_ of the irq-unsafe or hardirq-unsafe locks, which -could lead to a lock inversion deadlock - even if that lock scenario did -not trigger in practice yet.) - -Exception: Nested data dependencies leading to nested locking -------------------------------------------------------------- - -There are a few cases where the Linux kernel acquires more than one -instance of the same lock-class. Such cases typically happen when there -is some sort of hierarchy within objects of the same type. In these -cases there is an inherent "natural" ordering between the two objects -(defined by the properties of the hierarchy), and the kernel grabs the -locks in this fixed order on each of the objects. - -An example of such an object hierarchy that results in "nested locking" -is that of a "whole disk" block-dev object and a "partition" block-dev -object; the partition is "part of" the whole device and as long as one -always takes the whole disk lock as a higher lock than the partition -lock, the lock ordering is fully correct. The validator does not -automatically detect this natural ordering, as the locking rule behind -the ordering is not static. - -In order to teach the validator about this correct usage model, new -versions of the various locking primitives were added that allow you to -specify a "nesting level". An example call, for the block device mutex, -looks like this: - -enum bdev_bd_mutex_lock_class -{ - BD_MUTEX_NORMAL, - BD_MUTEX_WHOLE, - BD_MUTEX_PARTITION -}; - - mutex_lock_nested(&bdev->bd_contains->bd_mutex, BD_MUTEX_PARTITION); - -In this case the locking is done on a bdev object that is known to be a -partition. - -The validator treats a lock that is taken in such a nested fashion as a -separate (sub)class for the purposes of validation. - -Note: When changing code to use the _nested() primitives, be careful and -check really thoroughly that the hierarchy is correctly mapped; otherwise -you can get false positives or false negatives. - -Proof of 100% correctness: --------------------------- - -The validator achieves perfect, mathematical 'closure' (proof of locking -correctness) in the sense that for every simple, standalone single-task -locking sequence that occurred at least once during the lifetime of the -kernel, the validator proves it with a 100% certainty that no -combination and timing of these locking sequences can cause any class of -lock related deadlock. [*] - -I.e. complex multi-CPU and multi-task locking scenarios do not have to -occur in practice to prove a deadlock: only the simple 'component' -locking chains have to occur at least once (anytime, in any -task/context) for the validator to be able to prove correctness. (For -example, complex deadlocks that would normally need more than 3 CPUs and -a very unlikely constellation of tasks, irq-contexts and timings to -occur, can be detected on a plain, lightly loaded single-CPU system as -well!) - -This radically decreases the complexity of locking related QA of the -kernel: what has to be done during QA is to trigger as many "simple" -single-task locking dependencies in the kernel as possible, at least -once, to prove locking correctness - instead of having to trigger every -possible combination of locking interaction between CPUs, combined with -every possible hardirq and softirq nesting scenario (which is impossible -to do in practice). - -[*] assuming that the validator itself is 100% correct, and no other - part of the system corrupts the state of the validator in any way. - We also assume that all NMI/SMM paths [which could interrupt - even hardirq-disabled codepaths] are correct and do not interfere - with the validator. We also assume that the 64-bit 'chain hash' - value is unique for every lock-chain in the system. Also, lock - recursion must not be higher than 20. - -Performance: ------------- - -The above rules require _massive_ amounts of runtime checking. If we did -that for every lock taken and for every irqs-enable event, it would -render the system practically unusably slow. The complexity of checking -is O(N^2), so even with just a few hundred lock-classes we'd have to do -tens of thousands of checks for every event. - -This problem is solved by checking any given 'locking scenario' (unique -sequence of locks taken after each other) only once. A simple stack of -held locks is maintained, and a lightweight 64-bit hash value is -calculated, which hash is unique for every lock chain. The hash value, -when the chain is validated for the first time, is then put into a hash -table, which hash-table can be checked in a lockfree manner. If the -locking chain occurs again later on, the hash table tells us that we -dont have to validate the chain again. - -Troubleshooting: ----------------- - -The validator tracks a maximum of MAX_LOCKDEP_KEYS number of lock classes. -Exceeding this number will trigger the following lockdep warning: - - (DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS)) - -By default, MAX_LOCKDEP_KEYS is currently set to 8191, and typical -desktop systems have less than 1,000 lock classes, so this warning -normally results from lock-class leakage or failure to properly -initialize locks. These two problems are illustrated below: - -1. Repeated module loading and unloading while running the validator - will result in lock-class leakage. The issue here is that each - load of the module will create a new set of lock classes for - that module's locks, but module unloading does not remove old - classes (see below discussion of reuse of lock classes for why). - Therefore, if that module is loaded and unloaded repeatedly, - the number of lock classes will eventually reach the maximum. - -2. Using structures such as arrays that have large numbers of - locks that are not explicitly initialized. For example, - a hash table with 8192 buckets where each bucket has its own - spinlock_t will consume 8192 lock classes -unless- each spinlock - is explicitly initialized at runtime, for example, using the - run-time spin_lock_init() as opposed to compile-time initializers - such as __SPIN_LOCK_UNLOCKED(). Failure to properly initialize - the per-bucket spinlocks would guarantee lock-class overflow. - In contrast, a loop that called spin_lock_init() on each lock - would place all 8192 locks into a single lock class. - - The moral of this story is that you should always explicitly - initialize your locks. - -One might argue that the validator should be modified to allow -lock classes to be reused. However, if you are tempted to make this -argument, first review the code and think through the changes that would -be required, keeping in mind that the lock classes to be removed are -likely to be linked into the lock-dependency graph. This turns out to -be harder to do than to say. - -Of course, if you do run out of lock classes, the next thing to do is -to find the offending lock classes. First, the following command gives -you the number of lock classes currently in use along with the maximum: - - grep "lock-classes" /proc/lockdep_stats - -This command produces the following output on a modest system: - - lock-classes: 748 [max: 8191] - -If the number allocated (748 above) increases continually over time, -then there is likely a leak. The following command can be used to -identify the leaking lock classes: - - grep "BD" /proc/lockdep - -Run the command and save the output, then compare against the output from -a later run of this command to identify the leakers. This same output -can also help you find situations where runtime lock initialization has -been omitted. diff --git a/Documentation/locking/lockdep-design.txt b/Documentation/locking/lockdep-design.txt new file mode 100644 index 000000000000..5dbc99c04f6e --- /dev/null +++ b/Documentation/locking/lockdep-design.txt @@ -0,0 +1,286 @@ +Runtime locking correctness validator +===================================== + +started by Ingo Molnar +additions by Arjan van de Ven + +Lock-class +---------- + +The basic object the validator operates upon is a 'class' of locks. + +A class of locks is a group of locks that are logically the same with +respect to locking rules, even if the locks may have multiple (possibly +tens of thousands of) instantiations. For example a lock in the inode +struct is one class, while each inode has its own instantiation of that +lock class. + +The validator tracks the 'state' of lock-classes, and it tracks +dependencies between different lock-classes. The validator maintains a +rolling proof that the state and the dependencies are correct. + +Unlike an lock instantiation, the lock-class itself never goes away: when +a lock-class is used for the first time after bootup it gets registered, +and all subsequent uses of that lock-class will be attached to this +lock-class. + +State +----- + +The validator tracks lock-class usage history into 4n + 1 separate state bits: + +- 'ever held in STATE context' +- 'ever held as readlock in STATE context' +- 'ever held with STATE enabled' +- 'ever held as readlock with STATE enabled' + +Where STATE can be either one of (kernel/lockdep_states.h) + - hardirq + - softirq + - reclaim_fs + +- 'ever used' [ == !unused ] + +When locking rules are violated, these state bits are presented in the +locking error messages, inside curlies. A contrived example: + + modprobe/2287 is trying to acquire lock: + (&sio_locks[i].lock){-.-...}, at: [] mutex_lock+0x21/0x24 + + but task is already holding lock: + (&sio_locks[i].lock){-.-...}, at: [] mutex_lock+0x21/0x24 + + +The bit position indicates STATE, STATE-read, for each of the states listed +above, and the character displayed in each indicates: + + '.' acquired while irqs disabled and not in irq context + '-' acquired in irq context + '+' acquired with irqs enabled + '?' acquired in irq context with irqs enabled. + +Unused mutexes cannot be part of the cause of an error. + + +Single-lock state rules: +------------------------ + +A softirq-unsafe lock-class is automatically hardirq-unsafe as well. The +following states are exclusive, and only one of them is allowed to be +set for any lock-class: + + and + and + +The validator detects and reports lock usage that violate these +single-lock state rules. + +Multi-lock dependency rules: +---------------------------- + +The same lock-class must not be acquired twice, because this could lead +to lock recursion deadlocks. + +Furthermore, two locks may not be taken in different order: + + -> + -> + +because this could lead to lock inversion deadlocks. (The validator +finds such dependencies in arbitrary complexity, i.e. there can be any +other locking sequence between the acquire-lock operations, the +validator will still track all dependencies between locks.) + +Furthermore, the following usage based lock dependencies are not allowed +between any two lock-classes: + + -> + -> + +The first rule comes from the fact the a hardirq-safe lock could be +taken by a hardirq context, interrupting a hardirq-unsafe lock - and +thus could result in a lock inversion deadlock. Likewise, a softirq-safe +lock could be taken by an softirq context, interrupting a softirq-unsafe +lock. + +The above rules are enforced for any locking sequence that occurs in the +kernel: when acquiring a new lock, the validator checks whether there is +any rule violation between the new lock and any of the held locks. + +When a lock-class changes its state, the following aspects of the above +dependency rules are enforced: + +- if a new hardirq-safe lock is discovered, we check whether it + took any hardirq-unsafe lock in the past. + +- if a new softirq-safe lock is discovered, we check whether it took + any softirq-unsafe lock in the past. + +- if a new hardirq-unsafe lock is discovered, we check whether any + hardirq-safe lock took it in the past. + +- if a new softirq-unsafe lock is discovered, we check whether any + softirq-safe lock took it in the past. + +(Again, we do these checks too on the basis that an interrupt context +could interrupt _any_ of the irq-unsafe or hardirq-unsafe locks, which +could lead to a lock inversion deadlock - even if that lock scenario did +not trigger in practice yet.) + +Exception: Nested data dependencies leading to nested locking +------------------------------------------------------------- + +There are a few cases where the Linux kernel acquires more than one +instance of the same lock-class. Such cases typically happen when there +is some sort of hierarchy within objects of the same type. In these +cases there is an inherent "natural" ordering between the two objects +(defined by the properties of the hierarchy), and the kernel grabs the +locks in this fixed order on each of the objects. + +An example of such an object hierarchy that results in "nested locking" +is that of a "whole disk" block-dev object and a "partition" block-dev +object; the partition is "part of" the whole device and as long as one +always takes the whole disk lock as a higher lock than the partition +lock, the lock ordering is fully correct. The validator does not +automatically detect this natural ordering, as the locking rule behind +the ordering is not static. + +In order to teach the validator about this correct usage model, new +versions of the various locking primitives were added that allow you to +specify a "nesting level". An example call, for the block device mutex, +looks like this: + +enum bdev_bd_mutex_lock_class +{ + BD_MUTEX_NORMAL, + BD_MUTEX_WHOLE, + BD_MUTEX_PARTITION +}; + + mutex_lock_nested(&bdev->bd_contains->bd_mutex, BD_MUTEX_PARTITION); + +In this case the locking is done on a bdev object that is known to be a +partition. + +The validator treats a lock that is taken in such a nested fashion as a +separate (sub)class for the purposes of validation. + +Note: When changing code to use the _nested() primitives, be careful and +check really thoroughly that the hierarchy is correctly mapped; otherwise +you can get false positives or false negatives. + +Proof of 100% correctness: +-------------------------- + +The validator achieves perfect, mathematical 'closure' (proof of locking +correctness) in the sense that for every simple, standalone single-task +locking sequence that occurred at least once during the lifetime of the +kernel, the validator proves it with a 100% certainty that no +combination and timing of these locking sequences can cause any class of +lock related deadlock. [*] + +I.e. complex multi-CPU and multi-task locking scenarios do not have to +occur in practice to prove a deadlock: only the simple 'component' +locking chains have to occur at least once (anytime, in any +task/context) for the validator to be able to prove correctness. (For +example, complex deadlocks that would normally need more than 3 CPUs and +a very unlikely constellation of tasks, irq-contexts and timings to +occur, can be detected on a plain, lightly loaded single-CPU system as +well!) + +This radically decreases the complexity of locking related QA of the +kernel: what has to be done during QA is to trigger as many "simple" +single-task locking dependencies in the kernel as possible, at least +once, to prove locking correctness - instead of having to trigger every +possible combination of locking interaction between CPUs, combined with +every possible hardirq and softirq nesting scenario (which is impossible +to do in practice). + +[*] assuming that the validator itself is 100% correct, and no other + part of the system corrupts the state of the validator in any way. + We also assume that all NMI/SMM paths [which could interrupt + even hardirq-disabled codepaths] are correct and do not interfere + with the validator. We also assume that the 64-bit 'chain hash' + value is unique for every lock-chain in the system. Also, lock + recursion must not be higher than 20. + +Performance: +------------ + +The above rules require _massive_ amounts of runtime checking. If we did +that for every lock taken and for every irqs-enable event, it would +render the system practically unusably slow. The complexity of checking +is O(N^2), so even with just a few hundred lock-classes we'd have to do +tens of thousands of checks for every event. + +This problem is solved by checking any given 'locking scenario' (unique +sequence of locks taken after each other) only once. A simple stack of +held locks is maintained, and a lightweight 64-bit hash value is +calculated, which hash is unique for every lock chain. The hash value, +when the chain is validated for the first time, is then put into a hash +table, which hash-table can be checked in a lockfree manner. If the +locking chain occurs again later on, the hash table tells us that we +dont have to validate the chain again. + +Troubleshooting: +---------------- + +The validator tracks a maximum of MAX_LOCKDEP_KEYS number of lock classes. +Exceeding this number will trigger the following lockdep warning: + + (DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS)) + +By default, MAX_LOCKDEP_KEYS is currently set to 8191, and typical +desktop systems have less than 1,000 lock classes, so this warning +normally results from lock-class leakage or failure to properly +initialize locks. These two problems are illustrated below: + +1. Repeated module loading and unloading while running the validator + will result in lock-class leakage. The issue here is that each + load of the module will create a new set of lock classes for + that module's locks, but module unloading does not remove old + classes (see below discussion of reuse of lock classes for why). + Therefore, if that module is loaded and unloaded repeatedly, + the number of lock classes will eventually reach the maximum. + +2. Using structures such as arrays that have large numbers of + locks that are not explicitly initialized. For example, + a hash table with 8192 buckets where each bucket has its own + spinlock_t will consume 8192 lock classes -unless- each spinlock + is explicitly initialized at runtime, for example, using the + run-time spin_lock_init() as opposed to compile-time initializers + such as __SPIN_LOCK_UNLOCKED(). Failure to properly initialize + the per-bucket spinlocks would guarantee lock-class overflow. + In contrast, a loop that called spin_lock_init() on each lock + would place all 8192 locks into a single lock class. + + The moral of this story is that you should always explicitly + initialize your locks. + +One might argue that the validator should be modified to allow +lock classes to be reused. However, if you are tempted to make this +argument, first review the code and think through the changes that would +be required, keeping in mind that the lock classes to be removed are +likely to be linked into the lock-dependency graph. This turns out to +be harder to do than to say. + +Of course, if you do run out of lock classes, the next thing to do is +to find the offending lock classes. First, the following command gives +you the number of lock classes currently in use along with the maximum: + + grep "lock-classes" /proc/lockdep_stats + +This command produces the following output on a modest system: + + lock-classes: 748 [max: 8191] + +If the number allocated (748 above) increases continually over time, +then there is likely a leak. The following command can be used to +identify the leaking lock classes: + + grep "BD" /proc/lockdep + +Run the command and save the output, then compare against the output from +a later run of this command to identify the leakers. This same output +can also help you find situations where runtime lock initialization has +been omitted. diff --git a/Documentation/locking/lockstat.txt b/Documentation/locking/lockstat.txt new file mode 100644 index 000000000000..7428773a1e69 --- /dev/null +++ b/Documentation/locking/lockstat.txt @@ -0,0 +1,178 @@ + +LOCK STATISTICS + +- WHAT + +As the name suggests, it provides statistics on locks. + +- WHY + +Because things like lock contention can severely impact performance. + +- HOW + +Lockdep already has hooks in the lock functions and maps lock instances to +lock classes. We build on that (see Documentation/lokcing/lockdep-design.txt). +The graph below shows the relation between the lock functions and the various +hooks therein. + + __acquire + | + lock _____ + | \ + | __contended + | | + | + | _______/ + |/ + | + __acquired + | + . + + . + | + __release + | + unlock + +lock, unlock - the regular lock functions +__* - the hooks +<> - states + +With these hooks we provide the following statistics: + + con-bounces - number of lock contention that involved x-cpu data + contentions - number of lock acquisitions that had to wait + wait time min - shortest (non-0) time we ever had to wait for a lock + max - longest time we ever had to wait for a lock + total - total time we spend waiting on this lock + avg - average time spent waiting on this lock + acq-bounces - number of lock acquisitions that involved x-cpu data + acquisitions - number of times we took the lock + hold time min - shortest (non-0) time we ever held the lock + max - longest time we ever held the lock + total - total time this lock was held + avg - average time this lock was held + +These numbers are gathered per lock class, per read/write state (when +applicable). + +It also tracks 4 contention points per class. A contention point is a call site +that had to wait on lock acquisition. + + - CONFIGURATION + +Lock statistics are enabled via CONFIG_LOCK_STAT. + + - USAGE + +Enable collection of statistics: + +# echo 1 >/proc/sys/kernel/lock_stat + +Disable collection of statistics: + +# echo 0 >/proc/sys/kernel/lock_stat + +Look at the current lock statistics: + +( line numbers not part of actual output, done for clarity in the explanation + below ) + +# less /proc/lock_stat + +01 lock_stat version 0.4 +02----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +03 class name con-bounces contentions waittime-min waittime-max waittime-total waittime-avg acq-bounces acquisitions holdtime-min holdtime-max holdtime-total holdtime-avg +04----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +05 +06 &mm->mmap_sem-W: 46 84 0.26 939.10 16371.53 194.90 47291 2922365 0.16 2220301.69 17464026916.32 5975.99 +07 &mm->mmap_sem-R: 37 100 1.31 299502.61 325629.52 3256.30 212344 34316685 0.10 7744.91 95016910.20 2.77 +08 --------------- +09 &mm->mmap_sem 1 [] khugepaged_scan_mm_slot+0x57/0x280 +19 &mm->mmap_sem 96 [] __do_page_fault+0x1d4/0x510 +11 &mm->mmap_sem 34 [] vm_mmap_pgoff+0x87/0xd0 +12 &mm->mmap_sem 17 [] vm_munmap+0x41/0x80 +13 --------------- +14 &mm->mmap_sem 1 [] dup_mmap+0x2a/0x3f0 +15 &mm->mmap_sem 60 [] SyS_mprotect+0xe9/0x250 +16 &mm->mmap_sem 41 [] __do_page_fault+0x1d4/0x510 +17 &mm->mmap_sem 68 [] vm_mmap_pgoff+0x87/0xd0 +18 +19............................................................................................................................................................................................................................. +20 +21 unix_table_lock: 110 112 0.21 49.24 163.91 1.46 21094 66312 0.12 624.42 31589.81 0.48 +22 --------------- +23 unix_table_lock 45 [] unix_create1+0x16e/0x1b0 +24 unix_table_lock 47 [] unix_release_sock+0x31/0x250 +25 unix_table_lock 15 [] unix_find_other+0x117/0x230 +26 unix_table_lock 5 [] unix_autobind+0x11f/0x1b0 +27 --------------- +28 unix_table_lock 39 [] unix_release_sock+0x31/0x250 +29 unix_table_lock 49 [] unix_create1+0x16e/0x1b0 +30 unix_table_lock 20 [] unix_find_other+0x117/0x230 +31 unix_table_lock 4 [] unix_autobind+0x11f/0x1b0 + + +This excerpt shows the first two lock class statistics. Line 01 shows the +output version - each time the format changes this will be updated. Line 02-04 +show the header with column descriptions. Lines 05-18 and 20-31 show the actual +statistics. These statistics come in two parts; the actual stats separated by a +short separator (line 08, 13) from the contention points. + +The first lock (05-18) is a read/write lock, and shows two lines above the +short separator. The contention points don't match the column descriptors, +they have two: contentions and [] symbol. The second set of contention +points are the points we're contending with. + +The integer part of the time values is in us. + +Dealing with nested locks, subclasses may appear: + +32........................................................................................................................................................................................................................... +33 +34 &rq->lock: 13128 13128 0.43 190.53 103881.26 7.91 97454 3453404 0.00 401.11 13224683.11 3.82 +35 --------- +36 &rq->lock 645 [] task_rq_lock+0x43/0x75 +37 &rq->lock 297 [] try_to_wake_up+0x127/0x25a +38 &rq->lock 360 [] select_task_rq_fair+0x1f0/0x74a +39 &rq->lock 428 [] scheduler_tick+0x46/0x1fb +40 --------- +41 &rq->lock 77 [] task_rq_lock+0x43/0x75 +42 &rq->lock 174 [] try_to_wake_up+0x127/0x25a +43 &rq->lock 4715 [] double_rq_lock+0x42/0x54 +44 &rq->lock 893 [] schedule+0x157/0x7b8 +45 +46........................................................................................................................................................................................................................... +47 +48 &rq->lock/1: 1526 11488 0.33 388.73 136294.31 11.86 21461 38404 0.00 37.93 109388.53 2.84 +49 ----------- +50 &rq->lock/1 11526 [] double_rq_lock+0x4f/0x54 +51 ----------- +52 &rq->lock/1 5645 [] double_rq_lock+0x42/0x54 +53 &rq->lock/1 1224 [] schedule+0x157/0x7b8 +54 &rq->lock/1 4336 [] double_rq_lock+0x4f/0x54 +55 &rq->lock/1 181 [] try_to_wake_up+0x127/0x25a + +Line 48 shows statistics for the second subclass (/1) of &rq->lock class +(subclass starts from 0), since in this case, as line 50 suggests, +double_rq_lock actually acquires a nested lock of two spinlocks. + +View the top contending locks: + +# grep : /proc/lock_stat | head + clockevents_lock: 2926159 2947636 0.15 46882.81 1784540466.34 605.41 3381345 3879161 0.00 2260.97 53178395.68 13.71 + tick_broadcast_lock: 346460 346717 0.18 2257.43 39364622.71 113.54 3642919 4242696 0.00 2263.79 49173646.60 11.59 + &mapping->i_mmap_mutex: 203896 203899 3.36 645530.05 31767507988.39 155800.21 3361776 8893984 0.17 2254.15 14110121.02 1.59 + &rq->lock: 135014 136909 0.18 606.09 842160.68 6.15 1540728 10436146 0.00 728.72 17606683.41 1.69 + &(&zone->lru_lock)->rlock: 93000 94934 0.16 59.18 188253.78 1.98 1199912 3809894 0.15 391.40 3559518.81 0.93 + tasklist_lock-W: 40667 41130 0.23 1189.42 428980.51 10.43 270278 510106 0.16 653.51 3939674.91 7.72 + tasklist_lock-R: 21298 21305 0.20 1310.05 215511.12 10.12 186204 241258 0.14 1162.33 1179779.23 4.89 + rcu_node_1: 47656 49022 0.16 635.41 193616.41 3.95 844888 1865423 0.00 764.26 1656226.96 0.89 + &(&dentry->d_lockref.lock)->rlock: 39791 40179 0.15 1302.08 88851.96 2.21 2790851 12527025 0.10 1910.75 3379714.27 0.27 + rcu_node_0: 29203 30064 0.16 786.55 1555573.00 51.74 88963 244254 0.00 398.87 428872.51 1.76 + +Clear the statistics: + +# echo 0 > /proc/lock_stat diff --git a/Documentation/locking/mutex-design.txt b/Documentation/locking/mutex-design.txt new file mode 100644 index 000000000000..ee231ed09ec6 --- /dev/null +++ b/Documentation/locking/mutex-design.txt @@ -0,0 +1,157 @@ +Generic Mutex Subsystem + +started by Ingo Molnar +updated by Davidlohr Bueso + +What are mutexes? +----------------- + +In the Linux kernel, mutexes refer to a particular locking primitive +that enforces serialization on shared memory systems, and not only to +the generic term referring to 'mutual exclusion' found in academia +or similar theoretical text books. Mutexes are sleeping locks which +behave similarly to binary semaphores, and were introduced in 2006[1] +as an alternative to these. This new data structure provided a number +of advantages, including simpler interfaces, and at that time smaller +code (see Disadvantages). + +[1] http://lwn.net/Articles/164802/ + +Implementation +-------------- + +Mutexes are represented by 'struct mutex', defined in include/linux/mutex.h +and implemented in kernel/locking/mutex.c. These locks use a three +state atomic counter (->count) to represent the different possible +transitions that can occur during the lifetime of a lock: + + 1: unlocked + 0: locked, no waiters + negative: locked, with potential waiters + +In its most basic form it also includes a wait-queue and a spinlock +that serializes access to it. CONFIG_SMP systems can also include +a pointer to the lock task owner (->owner) as well as a spinner MCS +lock (->osq), both described below in (ii). + +When acquiring a mutex, there are three possible paths that can be +taken, depending on the state of the lock: + +(i) fastpath: tries to atomically acquire the lock by decrementing the + counter. If it was already taken by another task it goes to the next + possible path. This logic is architecture specific. On x86-64, the + locking fastpath is 2 instructions: + + 0000000000000e10 : + e21: f0 ff 0b lock decl (%rbx) + e24: 79 08 jns e2e + + the unlocking fastpath is equally tight: + + 0000000000000bc0 : + bc8: f0 ff 07 lock incl (%rdi) + bcb: 7f 0a jg bd7 + + +(ii) midpath: aka optimistic spinning, tries to spin for acquisition + while the lock owner is running and there are no other tasks ready + to run that have higher priority (need_resched). The rationale is + that if the lock owner is running, it is likely to release the lock + soon. The mutex spinners are queued up using MCS lock so that only + one spinner can compete for the mutex. + + The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spinlock + with the desirable properties of being fair and with each cpu trying + to acquire the lock spinning on a local variable. It avoids expensive + cacheline bouncing that common test-and-set spinlock implementations + incur. An MCS-like lock is specially tailored for optimistic spinning + for sleeping lock implementation. An important feature of the customized + MCS lock is that it has the extra property that spinners are able to exit + the MCS spinlock queue when they need to reschedule. This further helps + avoid situations where MCS spinners that need to reschedule would continue + waiting to spin on mutex owner, only to go directly to slowpath upon + obtaining the MCS lock. + + +(iii) slowpath: last resort, if the lock is still unable to be acquired, + the task is added to the wait-queue and sleeps until woken up by the + unlock path. Under normal circumstances it blocks as TASK_UNINTERRUPTIBLE. + +While formally kernel mutexes are sleepable locks, it is path (ii) that +makes them more practically a hybrid type. By simply not interrupting a +task and busy-waiting for a few cycles instead of immediately sleeping, +the performance of this lock has been seen to significantly improve a +number of workloads. Note that this technique is also used for rw-semaphores. + +Semantics +--------- + +The mutex subsystem checks and enforces the following rules: + + - Only one task can hold the mutex at a time. + - Only the owner can unlock the mutex. + - Multiple unlocks are not permitted. + - Recursive locking/unlocking is not permitted. + - A mutex must only be initialized via the API (see below). + - A task may not exit with a mutex held. + - Memory areas where held locks reside must not be freed. + - Held mutexes must not be reinitialized. + - Mutexes may not be used in hardware or software interrupt + contexts such as tasklets and timers. + +These semantics are fully enforced when CONFIG DEBUG_MUTEXES is enabled. +In addition, the mutex debugging code also implements a number of other +features that make lock debugging easier and faster: + + - Uses symbolic names of mutexes, whenever they are printed + in debug output. + - Point-of-acquire tracking, symbolic lookup of function names, + list of all locks held in the system, printout of them. + - Owner tracking. + - Detects self-recursing locks and prints out all relevant info. + - Detects multi-task circular deadlocks and prints out all affected + locks and tasks (and only those tasks). + + +Interfaces +---------- +Statically define the mutex: + DEFINE_MUTEX(name); + +Dynamically initialize the mutex: + mutex_init(mutex); + +Acquire the mutex, uninterruptible: + void mutex_lock(struct mutex *lock); + void mutex_lock_nested(struct mutex *lock, unsigned int subclass); + int mutex_trylock(struct mutex *lock); + +Acquire the mutex, interruptible: + int mutex_lock_interruptible_nested(struct mutex *lock, + unsigned int subclass); + int mutex_lock_interruptible(struct mutex *lock); + +Acquire the mutex, interruptible, if dec to 0: + int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); + +Unlock the mutex: + void mutex_unlock(struct mutex *lock); + +Test if the mutex is taken: + int mutex_is_locked(struct mutex *lock); + +Disadvantages +------------- + +Unlike its original design and purpose, 'struct mutex' is larger than +most locks in the kernel. E.g: on x86-64 it is 40 bytes, almost twice +as large as 'struct semaphore' (24 bytes) and 8 bytes shy of the +'struct rw_semaphore' variant. Larger structure sizes mean more CPU +cache and memory footprint. + +When to use mutexes +------------------- + +Unless the strict semantics of mutexes are unsuitable and/or the critical +region prevents the lock from being shared, always prefer them to any other +locking primitive. diff --git a/Documentation/locking/rt-mutex-design.txt b/Documentation/locking/rt-mutex-design.txt new file mode 100644 index 000000000000..8666070d3189 --- /dev/null +++ b/Documentation/locking/rt-mutex-design.txt @@ -0,0 +1,781 @@ +# +# Copyright (c) 2006 Steven Rostedt +# Licensed under the GNU Free Documentation License, Version 1.2 +# + +RT-mutex implementation design +------------------------------ + +This document tries to describe the design of the rtmutex.c implementation. +It doesn't describe the reasons why rtmutex.c exists. For that please see +Documentation/rt-mutex.txt. Although this document does explain problems +that happen without this code, but that is in the concept to understand +what the code actually is doing. + +The goal of this document is to help others understand the priority +inheritance (PI) algorithm that is used, as well as reasons for the +decisions that were made to implement PI in the manner that was done. + + +Unbounded Priority Inversion +---------------------------- + +Priority inversion is when a lower priority process executes while a higher +priority process wants to run. This happens for several reasons, and +most of the time it can't be helped. Anytime a high priority process wants +to use a resource that a lower priority process has (a mutex for example), +the high priority process must wait until the lower priority process is done +with the resource. This is a priority inversion. What we want to prevent +is something called unbounded priority inversion. That is when the high +priority process is prevented from running by a lower priority process for +an undetermined amount of time. + +The classic example of unbounded priority inversion is where you have three +processes, let's call them processes A, B, and C, where A is the highest +priority process, C is the lowest, and B is in between. A tries to grab a lock +that C owns and must wait and lets C run to release the lock. But in the +meantime, B executes, and since B is of a higher priority than C, it preempts C, +but by doing so, it is in fact preempting A which is a higher priority process. +Now there's no way of knowing how long A will be sleeping waiting for C +to release the lock, because for all we know, B is a CPU hog and will +never give C a chance to release the lock. This is called unbounded priority +inversion. + +Here's a little ASCII art to show the problem. + + grab lock L1 (owned by C) + | +A ---+ + C preempted by B + | +C +----+ + +B +--------> + B now keeps A from running. + + +Priority Inheritance (PI) +------------------------- + +There are several ways to solve this issue, but other ways are out of scope +for this document. Here we only discuss PI. + +PI is where a process inherits the priority of another process if the other +process blocks on a lock owned by the current process. To make this easier +to understand, let's use the previous example, with processes A, B, and C again. + +This time, when A blocks on the lock owned by C, C would inherit the priority +of A. So now if B becomes runnable, it would not preempt C, since C now has +the high priority of A. As soon as C releases the lock, it loses its +inherited priority, and A then can continue with the resource that C had. + +Terminology +----------- + +Here I explain some terminology that is used in this document to help describe +the design that is used to implement PI. + +PI chain - The PI chain is an ordered series of locks and processes that cause + processes to inherit priorities from a previous process that is + blocked on one of its locks. This is described in more detail + later in this document. + +mutex - In this document, to differentiate from locks that implement + PI and spin locks that are used in the PI code, from now on + the PI locks will be called a mutex. + +lock - In this document from now on, I will use the term lock when + referring to spin locks that are used to protect parts of the PI + algorithm. These locks disable preemption for UP (when + CONFIG_PREEMPT is enabled) and on SMP prevents multiple CPUs from + entering critical sections simultaneously. + +spin lock - Same as lock above. + +waiter - A waiter is a struct that is stored on the stack of a blocked + process. Since the scope of the waiter is within the code for + a process being blocked on the mutex, it is fine to allocate + the waiter on the process's stack (local variable). This + structure holds a pointer to the task, as well as the mutex that + the task is blocked on. It also has the plist node structures to + place the task in the waiter_list of a mutex as well as the + pi_list of a mutex owner task (described below). + + waiter is sometimes used in reference to the task that is waiting + on a mutex. This is the same as waiter->task. + +waiters - A list of processes that are blocked on a mutex. + +top waiter - The highest priority process waiting on a specific mutex. + +top pi waiter - The highest priority process waiting on one of the mutexes + that a specific process owns. + +Note: task and process are used interchangeably in this document, mostly to + differentiate between two processes that are being described together. + + +PI chain +-------- + +The PI chain is a list of processes and mutexes that may cause priority +inheritance to take place. Multiple chains may converge, but a chain +would never diverge, since a process can't be blocked on more than one +mutex at a time. + +Example: + + Process: A, B, C, D, E + Mutexes: L1, L2, L3, L4 + + A owns: L1 + B blocked on L1 + B owns L2 + C blocked on L2 + C owns L3 + D blocked on L3 + D owns L4 + E blocked on L4 + +The chain would be: + + E->L4->D->L3->C->L2->B->L1->A + +To show where two chains merge, we could add another process F and +another mutex L5 where B owns L5 and F is blocked on mutex L5. + +The chain for F would be: + + F->L5->B->L1->A + +Since a process may own more than one mutex, but never be blocked on more than +one, the chains merge. + +Here we show both chains: + + E->L4->D->L3->C->L2-+ + | + +->B->L1->A + | + F->L5-+ + +For PI to work, the processes at the right end of these chains (or we may +also call it the Top of the chain) must be equal to or higher in priority +than the processes to the left or below in the chain. + +Also since a mutex may have more than one process blocked on it, we can +have multiple chains merge at mutexes. If we add another process G that is +blocked on mutex L2: + + G->L2->B->L1->A + +And once again, to show how this can grow I will show the merging chains +again. + + E->L4->D->L3->C-+ + +->L2-+ + | | + G-+ +->B->L1->A + | + F->L5-+ + + +Plist +----- + +Before I go further and talk about how the PI chain is stored through lists +on both mutexes and processes, I'll explain the plist. This is similar to +the struct list_head functionality that is already in the kernel. +The implementation of plist is out of scope for this document, but it is +very important to understand what it does. + +There are a few differences between plist and list, the most important one +being that plist is a priority sorted linked list. This means that the +priorities of the plist are sorted, such that it takes O(1) to retrieve the +highest priority item in the list. Obviously this is useful to store processes +based on their priorities. + +Another difference, which is important for implementation, is that, unlike +list, the head of the list is a different element than the nodes of a list. +So the head of the list is declared as struct plist_head and nodes that will +be added to the list are declared as struct plist_node. + + +Mutex Waiter List +----------------- + +Every mutex keeps track of all the waiters that are blocked on itself. The mutex +has a plist to store these waiters by priority. This list is protected by +a spin lock that is located in the struct of the mutex. This lock is called +wait_lock. Since the modification of the waiter list is never done in +interrupt context, the wait_lock can be taken without disabling interrupts. + + +Task PI List +------------ + +To keep track of the PI chains, each process has its own PI list. This is +a list of all top waiters of the mutexes that are owned by the process. +Note that this list only holds the top waiters and not all waiters that are +blocked on mutexes owned by the process. + +The top of the task's PI list is always the highest priority task that +is waiting on a mutex that is owned by the task. So if the task has +inherited a priority, it will always be the priority of the task that is +at the top of this list. + +This list is stored in the task structure of a process as a plist called +pi_list. This list is protected by a spin lock also in the task structure, +called pi_lock. This lock may also be taken in interrupt context, so when +locking the pi_lock, interrupts must be disabled. + + +Depth of the PI Chain +--------------------- + +The maximum depth of the PI chain is not dynamic, and could actually be +defined. But is very complex to figure it out, since it depends on all +the nesting of mutexes. Let's look at the example where we have 3 mutexes, +L1, L2, and L3, and four separate functions func1, func2, func3 and func4. +The following shows a locking order of L1->L2->L3, but may not actually +be directly nested that way. + +void func1(void) +{ + mutex_lock(L1); + + /* do anything */ + + mutex_unlock(L1); +} + +void func2(void) +{ + mutex_lock(L1); + mutex_lock(L2); + + /* do something */ + + mutex_unlock(L2); + mutex_unlock(L1); +} + +void func3(void) +{ + mutex_lock(L2); + mutex_lock(L3); + + /* do something else */ + + mutex_unlock(L3); + mutex_unlock(L2); +} + +void func4(void) +{ + mutex_lock(L3); + + /* do something again */ + + mutex_unlock(L3); +} + +Now we add 4 processes that run each of these functions separately. +Processes A, B, C, and D which run functions func1, func2, func3 and func4 +respectively, and such that D runs first and A last. With D being preempted +in func4 in the "do something again" area, we have a locking that follows: + +D owns L3 + C blocked on L3 + C owns L2 + B blocked on L2 + B owns L1 + A blocked on L1 + +And thus we have the chain A->L1->B->L2->C->L3->D. + +This gives us a PI depth of 4 (four processes), but looking at any of the +functions individually, it seems as though they only have at most a locking +depth of two. So, although the locking depth is defined at compile time, +it still is very difficult to find the possibilities of that depth. + +Now since mutexes can be defined by user-land applications, we don't want a DOS +type of application that nests large amounts of mutexes to create a large +PI chain, and have the code holding spin locks while looking at a large +amount of data. So to prevent this, the implementation not only implements +a maximum lock depth, but also only holds at most two different locks at a +time, as it walks the PI chain. More about this below. + + +Mutex owner and flags +--------------------- + +The mutex structure contains a pointer to the owner of the mutex. If the +mutex is not owned, this owner is set to NULL. Since all architectures +have the task structure on at least a four byte alignment (and if this is +not true, the rtmutex.c code will be broken!), this allows for the two +least significant bits to be used as flags. This part is also described +in Documentation/rt-mutex.txt, but will also be briefly described here. + +Bit 0 is used as the "Pending Owner" flag. This is described later. +Bit 1 is used as the "Has Waiters" flags. This is also described later + in more detail, but is set whenever there are waiters on a mutex. + + +cmpxchg Tricks +-------------- + +Some architectures implement an atomic cmpxchg (Compare and Exchange). This +is used (when applicable) to keep the fast path of grabbing and releasing +mutexes short. + +cmpxchg is basically the following function performed atomically: + +unsigned long _cmpxchg(unsigned long *A, unsigned long *B, unsigned long *C) +{ + unsigned long T = *A; + if (*A == *B) { + *A = *C; + } + return T; +} +#define cmpxchg(a,b,c) _cmpxchg(&a,&b,&c) + +This is really nice to have, since it allows you to only update a variable +if the variable is what you expect it to be. You know if it succeeded if +the return value (the old value of A) is equal to B. + +The macro rt_mutex_cmpxchg is used to try to lock and unlock mutexes. If +the architecture does not support CMPXCHG, then this macro is simply set +to fail every time. But if CMPXCHG is supported, then this will +help out extremely to keep the fast path short. + +The use of rt_mutex_cmpxchg with the flags in the owner field help optimize +the system for architectures that support it. This will also be explained +later in this document. + + +Priority adjustments +-------------------- + +The implementation of the PI code in rtmutex.c has several places that a +process must adjust its priority. With the help of the pi_list of a +process this is rather easy to know what needs to be adjusted. + +The functions implementing the task adjustments are rt_mutex_adjust_prio, +__rt_mutex_adjust_prio (same as the former, but expects the task pi_lock +to already be taken), rt_mutex_getprio, and rt_mutex_setprio. + +rt_mutex_getprio and rt_mutex_setprio are only used in __rt_mutex_adjust_prio. + +rt_mutex_getprio returns the priority that the task should have. Either the +task's own normal priority, or if a process of a higher priority is waiting on +a mutex owned by the task, then that higher priority should be returned. +Since the pi_list of a task holds an order by priority list of all the top +waiters of all the mutexes that the task owns, rt_mutex_getprio simply needs +to compare the top pi waiter to its own normal priority, and return the higher +priority back. + +(Note: if looking at the code, you will notice that the lower number of + prio is returned. This is because the prio field in the task structure + is an inverse order of the actual priority. So a "prio" of 5 is + of higher priority than a "prio" of 10.) + +__rt_mutex_adjust_prio examines the result of rt_mutex_getprio, and if the +result does not equal the task's current priority, then rt_mutex_setprio +is called to adjust the priority of the task to the new priority. +Note that rt_mutex_setprio is defined in kernel/sched/core.c to implement the +actual change in priority. + +It is interesting to note that __rt_mutex_adjust_prio can either increase +or decrease the priority of the task. In the case that a higher priority +process has just blocked on a mutex owned by the task, __rt_mutex_adjust_prio +would increase/boost the task's priority. But if a higher priority task +were for some reason to leave the mutex (timeout or signal), this same function +would decrease/unboost the priority of the task. That is because the pi_list +always contains the highest priority task that is waiting on a mutex owned +by the task, so we only need to compare the priority of that top pi waiter +to the normal priority of the given task. + + +High level overview of the PI chain walk +---------------------------------------- + +The PI chain walk is implemented by the function rt_mutex_adjust_prio_chain. + +The implementation has gone through several iterations, and has ended up +with what we believe is the best. It walks the PI chain by only grabbing +at most two locks at a time, and is very efficient. + +The rt_mutex_adjust_prio_chain can be used either to boost or lower process +priorities. + +rt_mutex_adjust_prio_chain is called with a task to be checked for PI +(de)boosting (the owner of a mutex that a process is blocking on), a flag to +check for deadlocking, the mutex that the task owns, and a pointer to a waiter +that is the process's waiter struct that is blocked on the mutex (although this +parameter may be NULL for deboosting). + +For this explanation, I will not mention deadlock detection. This explanation +will try to stay at a high level. + +When this function is called, there are no locks held. That also means +that the state of the owner and lock can change when entered into this function. + +Before this function is called, the task has already had rt_mutex_adjust_prio +performed on it. This means that the task is set to the priority that it +should be at, but the plist nodes of the task's waiter have not been updated +with the new priorities, and that this task may not be in the proper locations +in the pi_lists and wait_lists that the task is blocked on. This function +solves all that. + +A loop is entered, where task is the owner to be checked for PI changes that +was passed by parameter (for the first iteration). The pi_lock of this task is +taken to prevent any more changes to the pi_list of the task. This also +prevents new tasks from completing the blocking on a mutex that is owned by this +task. + +If the task is not blocked on a mutex then the loop is exited. We are at +the top of the PI chain. + +A check is now done to see if the original waiter (the process that is blocked +on the current mutex) is the top pi waiter of the task. That is, is this +waiter on the top of the task's pi_list. If it is not, it either means that +there is another process higher in priority that is blocked on one of the +mutexes that the task owns, or that the waiter has just woken up via a signal +or timeout and has left the PI chain. In either case, the loop is exited, since +we don't need to do any more changes to the priority of the current task, or any +task that owns a mutex that this current task is waiting on. A priority chain +walk is only needed when a new top pi waiter is made to a task. + +The next check sees if the task's waiter plist node has the priority equal to +the priority the task is set at. If they are equal, then we are done with +the loop. Remember that the function started with the priority of the +task adjusted, but the plist nodes that hold the task in other processes +pi_lists have not been adjusted. + +Next, we look at the mutex that the task is blocked on. The mutex's wait_lock +is taken. This is done by a spin_trylock, because the locking order of the +pi_lock and wait_lock goes in the opposite direction. If we fail to grab the +lock, the pi_lock is released, and we restart the loop. + +Now that we have both the pi_lock of the task as well as the wait_lock of +the mutex the task is blocked on, we update the task's waiter's plist node +that is located on the mutex's wait_list. + +Now we release the pi_lock of the task. + +Next the owner of the mutex has its pi_lock taken, so we can update the +task's entry in the owner's pi_list. If the task is the highest priority +process on the mutex's wait_list, then we remove the previous top waiter +from the owner's pi_list, and replace it with the task. + +Note: It is possible that the task was the current top waiter on the mutex, + in which case the task is not yet on the pi_list of the waiter. This + is OK, since plist_del does nothing if the plist node is not on any + list. + +If the task was not the top waiter of the mutex, but it was before we +did the priority updates, that means we are deboosting/lowering the +task. In this case, the task is removed from the pi_list of the owner, +and the new top waiter is added. + +Lastly, we unlock both the pi_lock of the task, as well as the mutex's +wait_lock, and continue the loop again. On the next iteration of the +loop, the previous owner of the mutex will be the task that will be +processed. + +Note: One might think that the owner of this mutex might have changed + since we just grab the mutex's wait_lock. And one could be right. + The important thing to remember is that the owner could not have + become the task that is being processed in the PI chain, since + we have taken that task's pi_lock at the beginning of the loop. + So as long as there is an owner of this mutex that is not the same + process as the tasked being worked on, we are OK. + + Looking closely at the code, one might be confused. The check for the + end of the PI chain is when the task isn't blocked on anything or the + task's waiter structure "task" element is NULL. This check is + protected only by the task's pi_lock. But the code to unlock the mutex + sets the task's waiter structure "task" element to NULL with only + the protection of the mutex's wait_lock, which was not taken yet. + Isn't this a race condition if the task becomes the new owner? + + The answer is No! The trick is the spin_trylock of the mutex's + wait_lock. If we fail that lock, we release the pi_lock of the + task and continue the loop, doing the end of PI chain check again. + + In the code to release the lock, the wait_lock of the mutex is held + the entire time, and it is not let go when we grab the pi_lock of the + new owner of the mutex. So if the switch of a new owner were to happen + after the check for end of the PI chain and the grabbing of the + wait_lock, the unlocking code would spin on the new owner's pi_lock + but never give up the wait_lock. So the PI chain loop is guaranteed to + fail the spin_trylock on the wait_lock, release the pi_lock, and + try again. + + If you don't quite understand the above, that's OK. You don't have to, + unless you really want to make a proof out of it ;) + + +Pending Owners and Lock stealing +-------------------------------- + +One of the flags in the owner field of the mutex structure is "Pending Owner". +What this means is that an owner was chosen by the process releasing the +mutex, but that owner has yet to wake up and actually take the mutex. + +Why is this important? Why can't we just give the mutex to another process +and be done with it? + +The PI code is to help with real-time processes, and to let the highest +priority process run as long as possible with little latencies and delays. +If a high priority process owns a mutex that a lower priority process is +blocked on, when the mutex is released it would be given to the lower priority +process. What if the higher priority process wants to take that mutex again. +The high priority process would fail to take that mutex that it just gave up +and it would need to boost the lower priority process to run with full +latency of that critical section (since the low priority process just entered +it). + +There's no reason a high priority process that gives up a mutex should be +penalized if it tries to take that mutex again. If the new owner of the +mutex has not woken up yet, there's no reason that the higher priority process +could not take that mutex away. + +To solve this, we introduced Pending Ownership and Lock Stealing. When a +new process is given a mutex that it was blocked on, it is only given +pending ownership. This means that it's the new owner, unless a higher +priority process comes in and tries to grab that mutex. If a higher priority +process does come along and wants that mutex, we let the higher priority +process "steal" the mutex from the pending owner (only if it is still pending) +and continue with the mutex. + + +Taking of a mutex (The walk through) +------------------------------------ + +OK, now let's take a look at the detailed walk through of what happens when +taking a mutex. + +The first thing that is tried is the fast taking of the mutex. This is +done when we have CMPXCHG enabled (otherwise the fast taking automatically +fails). Only when the owner field of the mutex is NULL can the lock be +taken with the CMPXCHG and nothing else needs to be done. + +If there is contention on the lock, whether it is owned or pending owner +we go about the slow path (rt_mutex_slowlock). + +The slow path function is where the task's waiter structure is created on +the stack. This is because the waiter structure is only needed for the +scope of this function. The waiter structure holds the nodes to store +the task on the wait_list of the mutex, and if need be, the pi_list of +the owner. + +The wait_lock of the mutex is taken since the slow path of unlocking the +mutex also takes this lock. + +We then call try_to_take_rt_mutex. This is where the architecture that +does not implement CMPXCHG would always grab the lock (if there's no +contention). + +try_to_take_rt_mutex is used every time the task tries to grab a mutex in the +slow path. The first thing that is done here is an atomic setting of +the "Has Waiters" flag of the mutex's owner field. Yes, this could really +be false, because if the mutex has no owner, there are no waiters and +the current task also won't have any waiters. But we don't have the lock +yet, so we assume we are going to be a waiter. The reason for this is to +play nice for those architectures that do have CMPXCHG. By setting this flag +now, the owner of the mutex can't release the mutex without going into the +slow unlock path, and it would then need to grab the wait_lock, which this +code currently holds. So setting the "Has Waiters" flag forces the owner +to synchronize with this code. + +Now that we know that we can't have any races with the owner releasing the +mutex, we check to see if we can take the ownership. This is done if the +mutex doesn't have a owner, or if we can steal the mutex from a pending +owner. Let's look at the situations we have here. + + 1) Has owner that is pending + ---------------------------- + + The mutex has a owner, but it hasn't woken up and the mutex flag + "Pending Owner" is set. The first check is to see if the owner isn't the + current task. This is because this function is also used for the pending + owner to grab the mutex. When a pending owner wakes up, it checks to see + if it can take the mutex, and this is done if the owner is already set to + itself. If so, we succeed and leave the function, clearing the "Pending + Owner" bit. + + If the pending owner is not current, we check to see if the current priority is + higher than the pending owner. If not, we fail the function and return. + + There's also something special about a pending owner. That is a pending owner + is never blocked on a mutex. So there is no PI chain to worry about. It also + means that if the mutex doesn't have any waiters, there's no accounting needed + to update the pending owner's pi_list, since we only worry about processes + blocked on the current mutex. + + If there are waiters on this mutex, and we just stole the ownership, we need + to take the top waiter, remove it from the pi_list of the pending owner, and + add it to the current pi_list. Note that at this moment, the pending owner + is no longer on the list of waiters. This is fine, since the pending owner + would add itself back when it realizes that it had the ownership stolen + from itself. When the pending owner tries to grab the mutex, it will fail + in try_to_take_rt_mutex if the owner field points to another process. + + 2) No owner + ----------- + + If there is no owner (or we successfully stole the lock), we set the owner + of the mutex to current, and set the flag of "Has Waiters" if the current + mutex actually has waiters, or we clear the flag if it doesn't. See, it was + OK that we set that flag early, since now it is cleared. + + 3) Failed to grab ownership + --------------------------- + + The most interesting case is when we fail to take ownership. This means that + there exists an owner, or there's a pending owner with equal or higher + priority than the current task. + +We'll continue on the failed case. + +If the mutex has a timeout, we set up a timer to go off to break us out +of this mutex if we failed to get it after a specified amount of time. + +Now we enter a loop that will continue to try to take ownership of the mutex, or +fail from a timeout or signal. + +Once again we try to take the mutex. This will usually fail the first time +in the loop, since it had just failed to get the mutex. But the second time +in the loop, this would likely succeed, since the task would likely be +the pending owner. + +If the mutex is TASK_INTERRUPTIBLE a check for signals and timeout is done +here. + +The waiter structure has a "task" field that points to the task that is blocked +on the mutex. This field can be NULL the first time it goes through the loop +or if the task is a pending owner and had its mutex stolen. If the "task" +field is NULL then we need to set up the accounting for it. + +Task blocks on mutex +-------------------- + +The accounting of a mutex and process is done with the waiter structure of +the process. The "task" field is set to the process, and the "lock" field +to the mutex. The plist nodes are initialized to the processes current +priority. + +Since the wait_lock was taken at the entry of the slow lock, we can safely +add the waiter to the wait_list. If the current process is the highest +priority process currently waiting on this mutex, then we remove the +previous top waiter process (if it exists) from the pi_list of the owner, +and add the current process to that list. Since the pi_list of the owner +has changed, we call rt_mutex_adjust_prio on the owner to see if the owner +should adjust its priority accordingly. + +If the owner is also blocked on a lock, and had its pi_list changed +(or deadlock checking is on), we unlock the wait_lock of the mutex and go ahead +and run rt_mutex_adjust_prio_chain on the owner, as described earlier. + +Now all locks are released, and if the current process is still blocked on a +mutex (waiter "task" field is not NULL), then we go to sleep (call schedule). + +Waking up in the loop +--------------------- + +The schedule can then wake up for a few reasons. + 1) we were given pending ownership of the mutex. + 2) we received a signal and was TASK_INTERRUPTIBLE + 3) we had a timeout and was TASK_INTERRUPTIBLE + +In any of these cases, we continue the loop and once again try to grab the +ownership of the mutex. If we succeed, we exit the loop, otherwise we continue +and on signal and timeout, will exit the loop, or if we had the mutex stolen +we just simply add ourselves back on the lists and go back to sleep. + +Note: For various reasons, because of timeout and signals, the steal mutex + algorithm needs to be careful. This is because the current process is + still on the wait_list. And because of dynamic changing of priorities, + especially on SCHED_OTHER tasks, the current process can be the + highest priority task on the wait_list. + +Failed to get mutex on Timeout or Signal +---------------------------------------- + +If a timeout or signal occurred, the waiter's "task" field would not be +NULL and the task needs to be taken off the wait_list of the mutex and perhaps +pi_list of the owner. If this process was a high priority process, then +the rt_mutex_adjust_prio_chain needs to be executed again on the owner, +but this time it will be lowering the priorities. + + +Unlocking the Mutex +------------------- + +The unlocking of a mutex also has a fast path for those architectures with +CMPXCHG. Since the taking of a mutex on contention always sets the +"Has Waiters" flag of the mutex's owner, we use this to know if we need to +take the slow path when unlocking the mutex. If the mutex doesn't have any +waiters, the owner field of the mutex would equal the current process and +the mutex can be unlocked by just replacing the owner field with NULL. + +If the owner field has the "Has Waiters" bit set (or CMPXCHG is not available), +the slow unlock path is taken. + +The first thing done in the slow unlock path is to take the wait_lock of the +mutex. This synchronizes the locking and unlocking of the mutex. + +A check is made to see if the mutex has waiters or not. On architectures that +do not have CMPXCHG, this is the location that the owner of the mutex will +determine if a waiter needs to be awoken or not. On architectures that +do have CMPXCHG, that check is done in the fast path, but it is still needed +in the slow path too. If a waiter of a mutex woke up because of a signal +or timeout between the time the owner failed the fast path CMPXCHG check and +the grabbing of the wait_lock, the mutex may not have any waiters, thus the +owner still needs to make this check. If there are no waiters then the mutex +owner field is set to NULL, the wait_lock is released and nothing more is +needed. + +If there are waiters, then we need to wake one up and give that waiter +pending ownership. + +On the wake up code, the pi_lock of the current owner is taken. The top +waiter of the lock is found and removed from the wait_list of the mutex +as well as the pi_list of the current owner. The task field of the new +pending owner's waiter structure is set to NULL, and the owner field of the +mutex is set to the new owner with the "Pending Owner" bit set, as well +as the "Has Waiters" bit if there still are other processes blocked on the +mutex. + +The pi_lock of the previous owner is released, and the new pending owner's +pi_lock is taken. Remember that this is the trick to prevent the race +condition in rt_mutex_adjust_prio_chain from adding itself as a waiter +on the mutex. + +We now clear the "pi_blocked_on" field of the new pending owner, and if +the mutex still has waiters pending, we add the new top waiter to the pi_list +of the pending owner. + +Finally we unlock the pi_lock of the pending owner and wake it up. + + +Contact +------- + +For updates on this document, please email Steven Rostedt + + +Credits +------- + +Author: Steven Rostedt + +Reviewers: Ingo Molnar, Thomas Gleixner, Thomas Duetsch, and Randy Dunlap + +Updates +------- + +This document was originally written for 2.6.17-rc3-mm1 diff --git a/Documentation/locking/rt-mutex.txt b/Documentation/locking/rt-mutex.txt new file mode 100644 index 000000000000..243393d882ee --- /dev/null +++ b/Documentation/locking/rt-mutex.txt @@ -0,0 +1,79 @@ +RT-mutex subsystem with PI support +---------------------------------- + +RT-mutexes with priority inheritance are used to support PI-futexes, +which enable pthread_mutex_t priority inheritance attributes +(PTHREAD_PRIO_INHERIT). [See Documentation/pi-futex.txt for more details +about PI-futexes.] + +This technology was developed in the -rt tree and streamlined for +pthread_mutex support. + +Basic principles: +----------------- + +RT-mutexes extend the semantics of simple mutexes by the priority +inheritance protocol. + +A low priority owner of a rt-mutex inherits the priority of a higher +priority waiter until the rt-mutex is released. If the temporarily +boosted owner blocks on a rt-mutex itself it propagates the priority +boosting to the owner of the other rt_mutex it gets blocked on. The +priority boosting is immediately removed once the rt_mutex has been +unlocked. + +This approach allows us to shorten the block of high-prio tasks on +mutexes which protect shared resources. Priority inheritance is not a +magic bullet for poorly designed applications, but it allows +well-designed applications to use userspace locks in critical parts of +an high priority thread, without losing determinism. + +The enqueueing of the waiters into the rtmutex waiter list is done in +priority order. For same priorities FIFO order is chosen. For each +rtmutex, only the top priority waiter is enqueued into the owner's +priority waiters list. This list too queues in priority order. Whenever +the top priority waiter of a task changes (for example it timed out or +got a signal), the priority of the owner task is readjusted. [The +priority enqueueing is handled by "plists", see include/linux/plist.h +for more details.] + +RT-mutexes are optimized for fastpath operations and have no internal +locking overhead when locking an uncontended mutex or unlocking a mutex +without waiters. The optimized fastpath operations require cmpxchg +support. [If that is not available then the rt-mutex internal spinlock +is used] + +The state of the rt-mutex is tracked via the owner field of the rt-mutex +structure: + +rt_mutex->owner holds the task_struct pointer of the owner. Bit 0 and 1 +are used to keep track of the "owner is pending" and "rtmutex has +waiters" state. + + owner bit1 bit0 + NULL 0 0 mutex is free (fast acquire possible) + NULL 0 1 invalid state + NULL 1 0 Transitional state* + NULL 1 1 invalid state + taskpointer 0 0 mutex is held (fast release possible) + taskpointer 0 1 task is pending owner + taskpointer 1 0 mutex is held and has waiters + taskpointer 1 1 task is pending owner and mutex has waiters + +Pending-ownership handling is a performance optimization: +pending-ownership is assigned to the first (highest priority) waiter of +the mutex, when the mutex is released. The thread is woken up and once +it starts executing it can acquire the mutex. Until the mutex is taken +by it (bit 0 is cleared) a competing higher priority thread can "steal" +the mutex which puts the woken up thread back on the waiters list. + +The pending-ownership optimization is especially important for the +uninterrupted workflow of high-prio tasks which repeatedly +takes/releases locks that have lower-prio waiters. Without this +optimization the higher-prio thread would ping-pong to the lower-prio +task [because at unlock time we always assign a new owner]. + +(*) The "mutex has waiters" bit gets set to take the lock. If the lock +doesn't already have an owner, this bit is quickly cleared if there are +no waiters. So this is a transitional state to synchronize with looking +at the owner field of the mutex and the mutex owner releasing the lock. diff --git a/Documentation/locking/spinlocks.txt b/Documentation/locking/spinlocks.txt new file mode 100644 index 000000000000..ff35e40bdf5b --- /dev/null +++ b/Documentation/locking/spinlocks.txt @@ -0,0 +1,167 @@ +Lesson 1: Spin locks + +The most basic primitive for locking is spinlock. + +static DEFINE_SPINLOCK(xxx_lock); + + unsigned long flags; + + spin_lock_irqsave(&xxx_lock, flags); + ... critical section here .. + spin_unlock_irqrestore(&xxx_lock, flags); + +The above is always safe. It will disable interrupts _locally_, but the +spinlock itself will guarantee the global lock, so it will guarantee that +there is only one thread-of-control within the region(s) protected by that +lock. This works well even under UP also, so the code does _not_ need to +worry about UP vs SMP issues: the spinlocks work correctly under both. + + NOTE! Implications of spin_locks for memory are further described in: + + Documentation/memory-barriers.txt + (5) LOCK operations. + (6) UNLOCK operations. + +The above is usually pretty simple (you usually need and want only one +spinlock for most things - using more than one spinlock can make things a +lot more complex and even slower and is usually worth it only for +sequences that you _know_ need to be split up: avoid it at all cost if you +aren't sure). + +This is really the only really hard part about spinlocks: once you start +using spinlocks they tend to expand to areas you might not have noticed +before, because you have to make sure the spinlocks correctly protect the +shared data structures _everywhere_ they are used. The spinlocks are most +easily added to places that are completely independent of other code (for +example, internal driver data structures that nobody else ever touches). + + NOTE! The spin-lock is safe only when you _also_ use the lock itself + to do locking across CPU's, which implies that EVERYTHING that + touches a shared variable has to agree about the spinlock they want + to use. + +---- + +Lesson 2: reader-writer spinlocks. + +If your data accesses have a very natural pattern where you usually tend +to mostly read from the shared variables, the reader-writer locks +(rw_lock) versions of the spinlocks are sometimes useful. They allow multiple +readers to be in the same critical region at once, but if somebody wants +to change the variables it has to get an exclusive write lock. + + NOTE! reader-writer locks require more atomic memory operations than + simple spinlocks. Unless the reader critical section is long, you + are better off just using spinlocks. + +The routines look the same as above: + + rwlock_t xxx_lock = __RW_LOCK_UNLOCKED(xxx_lock); + + unsigned long flags; + + read_lock_irqsave(&xxx_lock, flags); + .. critical section that only reads the info ... + read_unlock_irqrestore(&xxx_lock, flags); + + write_lock_irqsave(&xxx_lock, flags); + .. read and write exclusive access to the info ... + write_unlock_irqrestore(&xxx_lock, flags); + +The above kind of lock may be useful for complex data structures like +linked lists, especially searching for entries without changing the list +itself. The read lock allows many concurrent readers. Anything that +_changes_ the list will have to get the write lock. + + NOTE! RCU is better for list traversal, but requires careful + attention to design detail (see Documentation/RCU/listRCU.txt). + +Also, you cannot "upgrade" a read-lock to a write-lock, so if you at _any_ +time need to do any changes (even if you don't do it every time), you have +to get the write-lock at the very beginning. + + NOTE! We are working hard to remove reader-writer spinlocks in most + cases, so please don't add a new one without consensus. (Instead, see + Documentation/RCU/rcu.txt for complete information.) + +---- + +Lesson 3: spinlocks revisited. + +The single spin-lock primitives above are by no means the only ones. They +are the most safe ones, and the ones that work under all circumstances, +but partly _because_ they are safe they are also fairly slow. They are slower +than they'd need to be, because they do have to disable interrupts +(which is just a single instruction on a x86, but it's an expensive one - +and on other architectures it can be worse). + +If you have a case where you have to protect a data structure across +several CPU's and you want to use spinlocks you can potentially use +cheaper versions of the spinlocks. IFF you know that the spinlocks are +never used in interrupt handlers, you can use the non-irq versions: + + spin_lock(&lock); + ... + spin_unlock(&lock); + +(and the equivalent read-write versions too, of course). The spinlock will +guarantee the same kind of exclusive access, and it will be much faster. +This is useful if you know that the data in question is only ever +manipulated from a "process context", ie no interrupts involved. + +The reasons you mustn't use these versions if you have interrupts that +play with the spinlock is that you can get deadlocks: + + spin_lock(&lock); + ... + <- interrupt comes in: + spin_lock(&lock); + +where an interrupt tries to lock an already locked variable. This is ok if +the other interrupt happens on another CPU, but it is _not_ ok if the +interrupt happens on the same CPU that already holds the lock, because the +lock will obviously never be released (because the interrupt is waiting +for the lock, and the lock-holder is interrupted by the interrupt and will +not continue until the interrupt has been processed). + +(This is also the reason why the irq-versions of the spinlocks only need +to disable the _local_ interrupts - it's ok to use spinlocks in interrupts +on other CPU's, because an interrupt on another CPU doesn't interrupt the +CPU that holds the lock, so the lock-holder can continue and eventually +releases the lock). + +Note that you can be clever with read-write locks and interrupts. For +example, if you know that the interrupt only ever gets a read-lock, then +you can use a non-irq version of read locks everywhere - because they +don't block on each other (and thus there is no dead-lock wrt interrupts. +But when you do the write-lock, you have to use the irq-safe version. + +For an example of being clever with rw-locks, see the "waitqueue_lock" +handling in kernel/sched/core.c - nothing ever _changes_ a wait-queue from +within an interrupt, they only read the queue in order to know whom to +wake up. So read-locks are safe (which is good: they are very common +indeed), while write-locks need to protect themselves against interrupts. + + Linus + +---- + +Reference information: + +For dynamic initialization, use spin_lock_init() or rwlock_init() as +appropriate: + + spinlock_t xxx_lock; + rwlock_t xxx_rw_lock; + + static int __init xxx_init(void) + { + spin_lock_init(&xxx_lock); + rwlock_init(&xxx_rw_lock); + ... + } + + module_init(xxx_init); + +For static initialization, use DEFINE_SPINLOCK() / DEFINE_RWLOCK() or +__SPIN_LOCK_UNLOCKED() / __RW_LOCK_UNLOCKED() as appropriate. diff --git a/Documentation/locking/ww-mutex-design.txt b/Documentation/locking/ww-mutex-design.txt new file mode 100644 index 000000000000..8a112dc304c3 --- /dev/null +++ b/Documentation/locking/ww-mutex-design.txt @@ -0,0 +1,344 @@ +Wait/Wound Deadlock-Proof Mutex Design +====================================== + +Please read mutex-design.txt first, as it applies to wait/wound mutexes too. + +Motivation for WW-Mutexes +------------------------- + +GPU's do operations that commonly involve many buffers. Those buffers +can be shared across contexts/processes, exist in different memory +domains (for example VRAM vs system memory), and so on. And with +PRIME / dmabuf, they can even be shared across devices. So there are +a handful of situations where the driver needs to wait for buffers to +become ready. If you think about this in terms of waiting on a buffer +mutex for it to become available, this presents a problem because +there is no way to guarantee that buffers appear in a execbuf/batch in +the same order in all contexts. That is directly under control of +userspace, and a result of the sequence of GL calls that an application +makes. Which results in the potential for deadlock. The problem gets +more complex when you consider that the kernel may need to migrate the +buffer(s) into VRAM before the GPU operates on the buffer(s), which +may in turn require evicting some other buffers (and you don't want to +evict other buffers which are already queued up to the GPU), but for a +simplified understanding of the problem you can ignore this. + +The algorithm that the TTM graphics subsystem came up with for dealing with +this problem is quite simple. For each group of buffers (execbuf) that need +to be locked, the caller would be assigned a unique reservation id/ticket, +from a global counter. In case of deadlock while locking all the buffers +associated with a execbuf, the one with the lowest reservation ticket (i.e. +the oldest task) wins, and the one with the higher reservation id (i.e. the +younger task) unlocks all of the buffers that it has already locked, and then +tries again. + +In the RDBMS literature this deadlock handling approach is called wait/wound: +The older tasks waits until it can acquire the contended lock. The younger tasks +needs to back off and drop all the locks it is currently holding, i.e. the +younger task is wounded. + +Concepts +-------- + +Compared to normal mutexes two additional concepts/objects show up in the lock +interface for w/w mutexes: + +Acquire context: To ensure eventual forward progress it is important the a task +trying to acquire locks doesn't grab a new reservation id, but keeps the one it +acquired when starting the lock acquisition. This ticket is stored in the +acquire context. Furthermore the acquire context keeps track of debugging state +to catch w/w mutex interface abuse. + +W/w class: In contrast to normal mutexes the lock class needs to be explicit for +w/w mutexes, since it is required to initialize the acquire context. + +Furthermore there are three different class of w/w lock acquire functions: + +* Normal lock acquisition with a context, using ww_mutex_lock. + +* Slowpath lock acquisition on the contending lock, used by the wounded task + after having dropped all already acquired locks. These functions have the + _slow postfix. + + From a simple semantics point-of-view the _slow functions are not strictly + required, since simply calling the normal ww_mutex_lock functions on the + contending lock (after having dropped all other already acquired locks) will + work correctly. After all if no other ww mutex has been acquired yet there's + no deadlock potential and hence the ww_mutex_lock call will block and not + prematurely return -EDEADLK. The advantage of the _slow functions is in + interface safety: + - ww_mutex_lock has a __must_check int return type, whereas ww_mutex_lock_slow + has a void return type. Note that since ww mutex code needs loops/retries + anyway the __must_check doesn't result in spurious warnings, even though the + very first lock operation can never fail. + - When full debugging is enabled ww_mutex_lock_slow checks that all acquired + ww mutex have been released (preventing deadlocks) and makes sure that we + block on the contending lock (preventing spinning through the -EDEADLK + slowpath until the contended lock can be acquired). + +* Functions to only acquire a single w/w mutex, which results in the exact same + semantics as a normal mutex. This is done by calling ww_mutex_lock with a NULL + context. + + Again this is not strictly required. But often you only want to acquire a + single lock in which case it's pointless to set up an acquire context (and so + better to avoid grabbing a deadlock avoidance ticket). + +Of course, all the usual variants for handling wake-ups due to signals are also +provided. + +Usage +----- + +Three different ways to acquire locks within the same w/w class. Common +definitions for methods #1 and #2: + +static DEFINE_WW_CLASS(ww_class); + +struct obj { + struct ww_mutex lock; + /* obj data */ +}; + +struct obj_entry { + struct list_head head; + struct obj *obj; +}; + +Method 1, using a list in execbuf->buffers that's not allowed to be reordered. +This is useful if a list of required objects is already tracked somewhere. +Furthermore the lock helper can use propagate the -EALREADY return code back to +the caller as a signal that an object is twice on the list. This is useful if +the list is constructed from userspace input and the ABI requires userspace to +not have duplicate entries (e.g. for a gpu commandbuffer submission ioctl). + +int lock_objs(struct list_head *list, struct ww_acquire_ctx *ctx) +{ + struct obj *res_obj = NULL; + struct obj_entry *contended_entry = NULL; + struct obj_entry *entry; + + ww_acquire_init(ctx, &ww_class); + +retry: + list_for_each_entry (entry, list, head) { + if (entry->obj == res_obj) { + res_obj = NULL; + continue; + } + ret = ww_mutex_lock(&entry->obj->lock, ctx); + if (ret < 0) { + contended_entry = entry; + goto err; + } + } + + ww_acquire_done(ctx); + return 0; + +err: + list_for_each_entry_continue_reverse (entry, list, head) + ww_mutex_unlock(&entry->obj->lock); + + if (res_obj) + ww_mutex_unlock(&res_obj->lock); + + if (ret == -EDEADLK) { + /* we lost out in a seqno race, lock and retry.. */ + ww_mutex_lock_slow(&contended_entry->obj->lock, ctx); + res_obj = contended_entry->obj; + goto retry; + } + ww_acquire_fini(ctx); + + return ret; +} + +Method 2, using a list in execbuf->buffers that can be reordered. Same semantics +of duplicate entry detection using -EALREADY as method 1 above. But the +list-reordering allows for a bit more idiomatic code. + +int lock_objs(struct list_head *list, struct ww_acquire_ctx *ctx) +{ + struct obj_entry *entry, *entry2; + + ww_acquire_init(ctx, &ww_class); + + list_for_each_entry (entry, list, head) { + ret = ww_mutex_lock(&entry->obj->lock, ctx); + if (ret < 0) { + entry2 = entry; + + list_for_each_entry_continue_reverse (entry2, list, head) + ww_mutex_unlock(&entry2->obj->lock); + + if (ret != -EDEADLK) { + ww_acquire_fini(ctx); + return ret; + } + + /* we lost out in a seqno race, lock and retry.. */ + ww_mutex_lock_slow(&entry->obj->lock, ctx); + + /* + * Move buf to head of the list, this will point + * buf->next to the first unlocked entry, + * restarting the for loop. + */ + list_del(&entry->head); + list_add(&entry->head, list); + } + } + + ww_acquire_done(ctx); + return 0; +} + +Unlocking works the same way for both methods #1 and #2: + +void unlock_objs(struct list_head *list, struct ww_acquire_ctx *ctx) +{ + struct obj_entry *entry; + + list_for_each_entry (entry, list, head) + ww_mutex_unlock(&entry->obj->lock); + + ww_acquire_fini(ctx); +} + +Method 3 is useful if the list of objects is constructed ad-hoc and not upfront, +e.g. when adjusting edges in a graph where each node has its own ww_mutex lock, +and edges can only be changed when holding the locks of all involved nodes. w/w +mutexes are a natural fit for such a case for two reasons: +- They can handle lock-acquisition in any order which allows us to start walking + a graph from a starting point and then iteratively discovering new edges and + locking down the nodes those edges connect to. +- Due to the -EALREADY return code signalling that a given objects is already + held there's no need for additional book-keeping to break cycles in the graph + or keep track off which looks are already held (when using more than one node + as a starting point). + +Note that this approach differs in two important ways from the above methods: +- Since the list of objects is dynamically constructed (and might very well be + different when retrying due to hitting the -EDEADLK wound condition) there's + no need to keep any object on a persistent list when it's not locked. We can + therefore move the list_head into the object itself. +- On the other hand the dynamic object list construction also means that the -EALREADY return + code can't be propagated. + +Note also that methods #1 and #2 and method #3 can be combined, e.g. to first lock a +list of starting nodes (passed in from userspace) using one of the above +methods. And then lock any additional objects affected by the operations using +method #3 below. The backoff/retry procedure will be a bit more involved, since +when the dynamic locking step hits -EDEADLK we also need to unlock all the +objects acquired with the fixed list. But the w/w mutex debug checks will catch +any interface misuse for these cases. + +Also, method 3 can't fail the lock acquisition step since it doesn't return +-EALREADY. Of course this would be different when using the _interruptible +variants, but that's outside of the scope of these examples here. + +struct obj { + struct ww_mutex ww_mutex; + struct list_head locked_list; +}; + +static DEFINE_WW_CLASS(ww_class); + +void __unlock_objs(struct list_head *list) +{ + struct obj *entry, *temp; + + list_for_each_entry_safe (entry, temp, list, locked_list) { + /* need to do that before unlocking, since only the current lock holder is + allowed to use object */ + list_del(&entry->locked_list); + ww_mutex_unlock(entry->ww_mutex) + } +} + +void lock_objs(struct list_head *list, struct ww_acquire_ctx *ctx) +{ + struct obj *obj; + + ww_acquire_init(ctx, &ww_class); + +retry: + /* re-init loop start state */ + loop { + /* magic code which walks over a graph and decides which objects + * to lock */ + + ret = ww_mutex_lock(obj->ww_mutex, ctx); + if (ret == -EALREADY) { + /* we have that one already, get to the next object */ + continue; + } + if (ret == -EDEADLK) { + __unlock_objs(list); + + ww_mutex_lock_slow(obj, ctx); + list_add(&entry->locked_list, list); + goto retry; + } + + /* locked a new object, add it to the list */ + list_add_tail(&entry->locked_list, list); + } + + ww_acquire_done(ctx); + return 0; +} + +void unlock_objs(struct list_head *list, struct ww_acquire_ctx *ctx) +{ + __unlock_objs(list); + ww_acquire_fini(ctx); +} + +Method 4: Only lock one single objects. In that case deadlock detection and +prevention is obviously overkill, since with grabbing just one lock you can't +produce a deadlock within just one class. To simplify this case the w/w mutex +api can be used with a NULL context. + +Implementation Details +---------------------- + +Design: + ww_mutex currently encapsulates a struct mutex, this means no extra overhead for + normal mutex locks, which are far more common. As such there is only a small + increase in code size if wait/wound mutexes are not used. + + In general, not much contention is expected. The locks are typically used to + serialize access to resources for devices. The only way to make wakeups + smarter would be at the cost of adding a field to struct mutex_waiter. This + would add overhead to all cases where normal mutexes are used, and + ww_mutexes are generally less performance sensitive. + +Lockdep: + Special care has been taken to warn for as many cases of api abuse + as possible. Some common api abuses will be caught with + CONFIG_DEBUG_MUTEXES, but CONFIG_PROVE_LOCKING is recommended. + + Some of the errors which will be warned about: + - Forgetting to call ww_acquire_fini or ww_acquire_init. + - Attempting to lock more mutexes after ww_acquire_done. + - Attempting to lock the wrong mutex after -EDEADLK and + unlocking all mutexes. + - Attempting to lock the right mutex after -EDEADLK, + before unlocking all mutexes. + + - Calling ww_mutex_lock_slow before -EDEADLK was returned. + + - Unlocking mutexes with the wrong unlock function. + - Calling one of the ww_acquire_* twice on the same context. + - Using a different ww_class for the mutex than for the ww_acquire_ctx. + - Normal lockdep errors that can result in deadlocks. + + Some of the lockdep errors that can result in deadlocks: + - Calling ww_acquire_init to initialize a second ww_acquire_ctx before + having called ww_acquire_fini on the first. + - 'normal' deadlocks that can occur. + +FIXME: Update this section once we have the TASK_DEADLOCK task state flag magic +implemented. diff --git a/Documentation/lockstat.txt b/Documentation/lockstat.txt deleted file mode 100644 index 72d010689751..000000000000 --- a/Documentation/lockstat.txt +++ /dev/null @@ -1,178 +0,0 @@ - -LOCK STATISTICS - -- WHAT - -As the name suggests, it provides statistics on locks. - -- WHY - -Because things like lock contention can severely impact performance. - -- HOW - -Lockdep already has hooks in the lock functions and maps lock instances to -lock classes. We build on that (see Documentation/lockdep-design.txt). -The graph below shows the relation between the lock functions and the various -hooks therein. - - __acquire - | - lock _____ - | \ - | __contended - | | - | - | _______/ - |/ - | - __acquired - | - . - - . - | - __release - | - unlock - -lock, unlock - the regular lock functions -__* - the hooks -<> - states - -With these hooks we provide the following statistics: - - con-bounces - number of lock contention that involved x-cpu data - contentions - number of lock acquisitions that had to wait - wait time min - shortest (non-0) time we ever had to wait for a lock - max - longest time we ever had to wait for a lock - total - total time we spend waiting on this lock - avg - average time spent waiting on this lock - acq-bounces - number of lock acquisitions that involved x-cpu data - acquisitions - number of times we took the lock - hold time min - shortest (non-0) time we ever held the lock - max - longest time we ever held the lock - total - total time this lock was held - avg - average time this lock was held - -These numbers are gathered per lock class, per read/write state (when -applicable). - -It also tracks 4 contention points per class. A contention point is a call site -that had to wait on lock acquisition. - - - CONFIGURATION - -Lock statistics are enabled via CONFIG_LOCK_STAT. - - - USAGE - -Enable collection of statistics: - -# echo 1 >/proc/sys/kernel/lock_stat - -Disable collection of statistics: - -# echo 0 >/proc/sys/kernel/lock_stat - -Look at the current lock statistics: - -( line numbers not part of actual output, done for clarity in the explanation - below ) - -# less /proc/lock_stat - -01 lock_stat version 0.4 -02----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- -03 class name con-bounces contentions waittime-min waittime-max waittime-total waittime-avg acq-bounces acquisitions holdtime-min holdtime-max holdtime-total holdtime-avg -04----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- -05 -06 &mm->mmap_sem-W: 46 84 0.26 939.10 16371.53 194.90 47291 2922365 0.16 2220301.69 17464026916.32 5975.99 -07 &mm->mmap_sem-R: 37 100 1.31 299502.61 325629.52 3256.30 212344 34316685 0.10 7744.91 95016910.20 2.77 -08 --------------- -09 &mm->mmap_sem 1 [] khugepaged_scan_mm_slot+0x57/0x280 -19 &mm->mmap_sem 96 [] __do_page_fault+0x1d4/0x510 -11 &mm->mmap_sem 34 [] vm_mmap_pgoff+0x87/0xd0 -12 &mm->mmap_sem 17 [] vm_munmap+0x41/0x80 -13 --------------- -14 &mm->mmap_sem 1 [] dup_mmap+0x2a/0x3f0 -15 &mm->mmap_sem 60 [] SyS_mprotect+0xe9/0x250 -16 &mm->mmap_sem 41 [] __do_page_fault+0x1d4/0x510 -17 &mm->mmap_sem 68 [] vm_mmap_pgoff+0x87/0xd0 -18 -19............................................................................................................................................................................................................................. -20 -21 unix_table_lock: 110 112 0.21 49.24 163.91 1.46 21094 66312 0.12 624.42 31589.81 0.48 -22 --------------- -23 unix_table_lock 45 [] unix_create1+0x16e/0x1b0 -24 unix_table_lock 47 [] unix_release_sock+0x31/0x250 -25 unix_table_lock 15 [] unix_find_other+0x117/0x230 -26 unix_table_lock 5 [] unix_autobind+0x11f/0x1b0 -27 --------------- -28 unix_table_lock 39 [] unix_release_sock+0x31/0x250 -29 unix_table_lock 49 [] unix_create1+0x16e/0x1b0 -30 unix_table_lock 20 [] unix_find_other+0x117/0x230 -31 unix_table_lock 4 [] unix_autobind+0x11f/0x1b0 - - -This excerpt shows the first two lock class statistics. Line 01 shows the -output version - each time the format changes this will be updated. Line 02-04 -show the header with column descriptions. Lines 05-18 and 20-31 show the actual -statistics. These statistics come in two parts; the actual stats separated by a -short separator (line 08, 13) from the contention points. - -The first lock (05-18) is a read/write lock, and shows two lines above the -short separator. The contention points don't match the column descriptors, -they have two: contentions and [] symbol. The second set of contention -points are the points we're contending with. - -The integer part of the time values is in us. - -Dealing with nested locks, subclasses may appear: - -32........................................................................................................................................................................................................................... -33 -34 &rq->lock: 13128 13128 0.43 190.53 103881.26 7.91 97454 3453404 0.00 401.11 13224683.11 3.82 -35 --------- -36 &rq->lock 645 [] task_rq_lock+0x43/0x75 -37 &rq->lock 297 [] try_to_wake_up+0x127/0x25a -38 &rq->lock 360 [] select_task_rq_fair+0x1f0/0x74a -39 &rq->lock 428 [] scheduler_tick+0x46/0x1fb -40 --------- -41 &rq->lock 77 [] task_rq_lock+0x43/0x75 -42 &rq->lock 174 [] try_to_wake_up+0x127/0x25a -43 &rq->lock 4715 [] double_rq_lock+0x42/0x54 -44 &rq->lock 893 [] schedule+0x157/0x7b8 -45 -46........................................................................................................................................................................................................................... -47 -48 &rq->lock/1: 1526 11488 0.33 388.73 136294.31 11.86 21461 38404 0.00 37.93 109388.53 2.84 -49 ----------- -50 &rq->lock/1 11526 [] double_rq_lock+0x4f/0x54 -51 ----------- -52 &rq->lock/1 5645 [] double_rq_lock+0x42/0x54 -53 &rq->lock/1 1224 [] schedule+0x157/0x7b8 -54 &rq->lock/1 4336 [] double_rq_lock+0x4f/0x54 -55 &rq->lock/1 181 [] try_to_wake_up+0x127/0x25a - -Line 48 shows statistics for the second subclass (/1) of &rq->lock class -(subclass starts from 0), since in this case, as line 50 suggests, -double_rq_lock actually acquires a nested lock of two spinlocks. - -View the top contending locks: - -# grep : /proc/lock_stat | head - clockevents_lock: 2926159 2947636 0.15 46882.81 1784540466.34 605.41 3381345 3879161 0.00 2260.97 53178395.68 13.71 - tick_broadcast_lock: 346460 346717 0.18 2257.43 39364622.71 113.54 3642919 4242696 0.00 2263.79 49173646.60 11.59 - &mapping->i_mmap_mutex: 203896 203899 3.36 645530.05 31767507988.39 155800.21 3361776 8893984 0.17 2254.15 14110121.02 1.59 - &rq->lock: 135014 136909 0.18 606.09 842160.68 6.15 1540728 10436146 0.00 728.72 17606683.41 1.69 - &(&zone->lru_lock)->rlock: 93000 94934 0.16 59.18 188253.78 1.98 1199912 3809894 0.15 391.40 3559518.81 0.93 - tasklist_lock-W: 40667 41130 0.23 1189.42 428980.51 10.43 270278 510106 0.16 653.51 3939674.91 7.72 - tasklist_lock-R: 21298 21305 0.20 1310.05 215511.12 10.12 186204 241258 0.14 1162.33 1179779.23 4.89 - rcu_node_1: 47656 49022 0.16 635.41 193616.41 3.95 844888 1865423 0.00 764.26 1656226.96 0.89 - &(&dentry->d_lockref.lock)->rlock: 39791 40179 0.15 1302.08 88851.96 2.21 2790851 12527025 0.10 1910.75 3379714.27 0.27 - rcu_node_0: 29203 30064 0.16 786.55 1555573.00 51.74 88963 244254 0.00 398.87 428872.51 1.76 - -Clear the statistics: - -# echo 0 > /proc/lock_stat diff --git a/Documentation/mutex-design.txt b/Documentation/mutex-design.txt deleted file mode 100644 index ee231ed09ec6..000000000000 --- a/Documentation/mutex-design.txt +++ /dev/null @@ -1,157 +0,0 @@ -Generic Mutex Subsystem - -started by Ingo Molnar -updated by Davidlohr Bueso - -What are mutexes? ------------------ - -In the Linux kernel, mutexes refer to a particular locking primitive -that enforces serialization on shared memory systems, and not only to -the generic term referring to 'mutual exclusion' found in academia -or similar theoretical text books. Mutexes are sleeping locks which -behave similarly to binary semaphores, and were introduced in 2006[1] -as an alternative to these. This new data structure provided a number -of advantages, including simpler interfaces, and at that time smaller -code (see Disadvantages). - -[1] http://lwn.net/Articles/164802/ - -Implementation --------------- - -Mutexes are represented by 'struct mutex', defined in include/linux/mutex.h -and implemented in kernel/locking/mutex.c. These locks use a three -state atomic counter (->count) to represent the different possible -transitions that can occur during the lifetime of a lock: - - 1: unlocked - 0: locked, no waiters - negative: locked, with potential waiters - -In its most basic form it also includes a wait-queue and a spinlock -that serializes access to it. CONFIG_SMP systems can also include -a pointer to the lock task owner (->owner) as well as a spinner MCS -lock (->osq), both described below in (ii). - -When acquiring a mutex, there are three possible paths that can be -taken, depending on the state of the lock: - -(i) fastpath: tries to atomically acquire the lock by decrementing the - counter. If it was already taken by another task it goes to the next - possible path. This logic is architecture specific. On x86-64, the - locking fastpath is 2 instructions: - - 0000000000000e10 : - e21: f0 ff 0b lock decl (%rbx) - e24: 79 08 jns e2e - - the unlocking fastpath is equally tight: - - 0000000000000bc0 : - bc8: f0 ff 07 lock incl (%rdi) - bcb: 7f 0a jg bd7 - - -(ii) midpath: aka optimistic spinning, tries to spin for acquisition - while the lock owner is running and there are no other tasks ready - to run that have higher priority (need_resched). The rationale is - that if the lock owner is running, it is likely to release the lock - soon. The mutex spinners are queued up using MCS lock so that only - one spinner can compete for the mutex. - - The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spinlock - with the desirable properties of being fair and with each cpu trying - to acquire the lock spinning on a local variable. It avoids expensive - cacheline bouncing that common test-and-set spinlock implementations - incur. An MCS-like lock is specially tailored for optimistic spinning - for sleeping lock implementation. An important feature of the customized - MCS lock is that it has the extra property that spinners are able to exit - the MCS spinlock queue when they need to reschedule. This further helps - avoid situations where MCS spinners that need to reschedule would continue - waiting to spin on mutex owner, only to go directly to slowpath upon - obtaining the MCS lock. - - -(iii) slowpath: last resort, if the lock is still unable to be acquired, - the task is added to the wait-queue and sleeps until woken up by the - unlock path. Under normal circumstances it blocks as TASK_UNINTERRUPTIBLE. - -While formally kernel mutexes are sleepable locks, it is path (ii) that -makes them more practically a hybrid type. By simply not interrupting a -task and busy-waiting for a few cycles instead of immediately sleeping, -the performance of this lock has been seen to significantly improve a -number of workloads. Note that this technique is also used for rw-semaphores. - -Semantics ---------- - -The mutex subsystem checks and enforces the following rules: - - - Only one task can hold the mutex at a time. - - Only the owner can unlock the mutex. - - Multiple unlocks are not permitted. - - Recursive locking/unlocking is not permitted. - - A mutex must only be initialized via the API (see below). - - A task may not exit with a mutex held. - - Memory areas where held locks reside must not be freed. - - Held mutexes must not be reinitialized. - - Mutexes may not be used in hardware or software interrupt - contexts such as tasklets and timers. - -These semantics are fully enforced when CONFIG DEBUG_MUTEXES is enabled. -In addition, the mutex debugging code also implements a number of other -features that make lock debugging easier and faster: - - - Uses symbolic names of mutexes, whenever they are printed - in debug output. - - Point-of-acquire tracking, symbolic lookup of function names, - list of all locks held in the system, printout of them. - - Owner tracking. - - Detects self-recursing locks and prints out all relevant info. - - Detects multi-task circular deadlocks and prints out all affected - locks and tasks (and only those tasks). - - -Interfaces ----------- -Statically define the mutex: - DEFINE_MUTEX(name); - -Dynamically initialize the mutex: - mutex_init(mutex); - -Acquire the mutex, uninterruptible: - void mutex_lock(struct mutex *lock); - void mutex_lock_nested(struct mutex *lock, unsigned int subclass); - int mutex_trylock(struct mutex *lock); - -Acquire the mutex, interruptible: - int mutex_lock_interruptible_nested(struct mutex *lock, - unsigned int subclass); - int mutex_lock_interruptible(struct mutex *lock); - -Acquire the mutex, interruptible, if dec to 0: - int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); - -Unlock the mutex: - void mutex_unlock(struct mutex *lock); - -Test if the mutex is taken: - int mutex_is_locked(struct mutex *lock); - -Disadvantages -------------- - -Unlike its original design and purpose, 'struct mutex' is larger than -most locks in the kernel. E.g: on x86-64 it is 40 bytes, almost twice -as large as 'struct semaphore' (24 bytes) and 8 bytes shy of the -'struct rw_semaphore' variant. Larger structure sizes mean more CPU -cache and memory footprint. - -When to use mutexes -------------------- - -Unless the strict semantics of mutexes are unsuitable and/or the critical -region prevents the lock from being shared, always prefer them to any other -locking primitive. diff --git a/Documentation/rt-mutex-design.txt b/Documentation/rt-mutex-design.txt deleted file mode 100644 index 8666070d3189..000000000000 --- a/Documentation/rt-mutex-design.txt +++ /dev/null @@ -1,781 +0,0 @@ -# -# Copyright (c) 2006 Steven Rostedt -# Licensed under the GNU Free Documentation License, Version 1.2 -# - -RT-mutex implementation design ------------------------------- - -This document tries to describe the design of the rtmutex.c implementation. -It doesn't describe the reasons why rtmutex.c exists. For that please see -Documentation/rt-mutex.txt. Although this document does explain problems -that happen without this code, but that is in the concept to understand -what the code actually is doing. - -The goal of this document is to help others understand the priority -inheritance (PI) algorithm that is used, as well as reasons for the -decisions that were made to implement PI in the manner that was done. - - -Unbounded Priority Inversion ----------------------------- - -Priority inversion is when a lower priority process executes while a higher -priority process wants to run. This happens for several reasons, and -most of the time it can't be helped. Anytime a high priority process wants -to use a resource that a lower priority process has (a mutex for example), -the high priority process must wait until the lower priority process is done -with the resource. This is a priority inversion. What we want to prevent -is something called unbounded priority inversion. That is when the high -priority process is prevented from running by a lower priority process for -an undetermined amount of time. - -The classic example of unbounded priority inversion is where you have three -processes, let's call them processes A, B, and C, where A is the highest -priority process, C is the lowest, and B is in between. A tries to grab a lock -that C owns and must wait and lets C run to release the lock. But in the -meantime, B executes, and since B is of a higher priority than C, it preempts C, -but by doing so, it is in fact preempting A which is a higher priority process. -Now there's no way of knowing how long A will be sleeping waiting for C -to release the lock, because for all we know, B is a CPU hog and will -never give C a chance to release the lock. This is called unbounded priority -inversion. - -Here's a little ASCII art to show the problem. - - grab lock L1 (owned by C) - | -A ---+ - C preempted by B - | -C +----+ - -B +--------> - B now keeps A from running. - - -Priority Inheritance (PI) -------------------------- - -There are several ways to solve this issue, but other ways are out of scope -for this document. Here we only discuss PI. - -PI is where a process inherits the priority of another process if the other -process blocks on a lock owned by the current process. To make this easier -to understand, let's use the previous example, with processes A, B, and C again. - -This time, when A blocks on the lock owned by C, C would inherit the priority -of A. So now if B becomes runnable, it would not preempt C, since C now has -the high priority of A. As soon as C releases the lock, it loses its -inherited priority, and A then can continue with the resource that C had. - -Terminology ------------ - -Here I explain some terminology that is used in this document to help describe -the design that is used to implement PI. - -PI chain - The PI chain is an ordered series of locks and processes that cause - processes to inherit priorities from a previous process that is - blocked on one of its locks. This is described in more detail - later in this document. - -mutex - In this document, to differentiate from locks that implement - PI and spin locks that are used in the PI code, from now on - the PI locks will be called a mutex. - -lock - In this document from now on, I will use the term lock when - referring to spin locks that are used to protect parts of the PI - algorithm. These locks disable preemption for UP (when - CONFIG_PREEMPT is enabled) and on SMP prevents multiple CPUs from - entering critical sections simultaneously. - -spin lock - Same as lock above. - -waiter - A waiter is a struct that is stored on the stack of a blocked - process. Since the scope of the waiter is within the code for - a process being blocked on the mutex, it is fine to allocate - the waiter on the process's stack (local variable). This - structure holds a pointer to the task, as well as the mutex that - the task is blocked on. It also has the plist node structures to - place the task in the waiter_list of a mutex as well as the - pi_list of a mutex owner task (described below). - - waiter is sometimes used in reference to the task that is waiting - on a mutex. This is the same as waiter->task. - -waiters - A list of processes that are blocked on a mutex. - -top waiter - The highest priority process waiting on a specific mutex. - -top pi waiter - The highest priority process waiting on one of the mutexes - that a specific process owns. - -Note: task and process are used interchangeably in this document, mostly to - differentiate between two processes that are being described together. - - -PI chain --------- - -The PI chain is a list of processes and mutexes that may cause priority -inheritance to take place. Multiple chains may converge, but a chain -would never diverge, since a process can't be blocked on more than one -mutex at a time. - -Example: - - Process: A, B, C, D, E - Mutexes: L1, L2, L3, L4 - - A owns: L1 - B blocked on L1 - B owns L2 - C blocked on L2 - C owns L3 - D blocked on L3 - D owns L4 - E blocked on L4 - -The chain would be: - - E->L4->D->L3->C->L2->B->L1->A - -To show where two chains merge, we could add another process F and -another mutex L5 where B owns L5 and F is blocked on mutex L5. - -The chain for F would be: - - F->L5->B->L1->A - -Since a process may own more than one mutex, but never be blocked on more than -one, the chains merge. - -Here we show both chains: - - E->L4->D->L3->C->L2-+ - | - +->B->L1->A - | - F->L5-+ - -For PI to work, the processes at the right end of these chains (or we may -also call it the Top of the chain) must be equal to or higher in priority -than the processes to the left or below in the chain. - -Also since a mutex may have more than one process blocked on it, we can -have multiple chains merge at mutexes. If we add another process G that is -blocked on mutex L2: - - G->L2->B->L1->A - -And once again, to show how this can grow I will show the merging chains -again. - - E->L4->D->L3->C-+ - +->L2-+ - | | - G-+ +->B->L1->A - | - F->L5-+ - - -Plist ------ - -Before I go further and talk about how the PI chain is stored through lists -on both mutexes and processes, I'll explain the plist. This is similar to -the struct list_head functionality that is already in the kernel. -The implementation of plist is out of scope for this document, but it is -very important to understand what it does. - -There are a few differences between plist and list, the most important one -being that plist is a priority sorted linked list. This means that the -priorities of the plist are sorted, such that it takes O(1) to retrieve the -highest priority item in the list. Obviously this is useful to store processes -based on their priorities. - -Another difference, which is important for implementation, is that, unlike -list, the head of the list is a different element than the nodes of a list. -So the head of the list is declared as struct plist_head and nodes that will -be added to the list are declared as struct plist_node. - - -Mutex Waiter List ------------------ - -Every mutex keeps track of all the waiters that are blocked on itself. The mutex -has a plist to store these waiters by priority. This list is protected by -a spin lock that is located in the struct of the mutex. This lock is called -wait_lock. Since the modification of the waiter list is never done in -interrupt context, the wait_lock can be taken without disabling interrupts. - - -Task PI List ------------- - -To keep track of the PI chains, each process has its own PI list. This is -a list of all top waiters of the mutexes that are owned by the process. -Note that this list only holds the top waiters and not all waiters that are -blocked on mutexes owned by the process. - -The top of the task's PI list is always the highest priority task that -is waiting on a mutex that is owned by the task. So if the task has -inherited a priority, it will always be the priority of the task that is -at the top of this list. - -This list is stored in the task structure of a process as a plist called -pi_list. This list is protected by a spin lock also in the task structure, -called pi_lock. This lock may also be taken in interrupt context, so when -locking the pi_lock, interrupts must be disabled. - - -Depth of the PI Chain ---------------------- - -The maximum depth of the PI chain is not dynamic, and could actually be -defined. But is very complex to figure it out, since it depends on all -the nesting of mutexes. Let's look at the example where we have 3 mutexes, -L1, L2, and L3, and four separate functions func1, func2, func3 and func4. -The following shows a locking order of L1->L2->L3, but may not actually -be directly nested that way. - -void func1(void) -{ - mutex_lock(L1); - - /* do anything */ - - mutex_unlock(L1); -} - -void func2(void) -{ - mutex_lock(L1); - mutex_lock(L2); - - /* do something */ - - mutex_unlock(L2); - mutex_unlock(L1); -} - -void func3(void) -{ - mutex_lock(L2); - mutex_lock(L3); - - /* do something else */ - - mutex_unlock(L3); - mutex_unlock(L2); -} - -void func4(void) -{ - mutex_lock(L3); - - /* do something again */ - - mutex_unlock(L3); -} - -Now we add 4 processes that run each of these functions separately. -Processes A, B, C, and D which run functions func1, func2, func3 and func4 -respectively, and such that D runs first and A last. With D being preempted -in func4 in the "do something again" area, we have a locking that follows: - -D owns L3 - C blocked on L3 - C owns L2 - B blocked on L2 - B owns L1 - A blocked on L1 - -And thus we have the chain A->L1->B->L2->C->L3->D. - -This gives us a PI depth of 4 (four processes), but looking at any of the -functions individually, it seems as though they only have at most a locking -depth of two. So, although the locking depth is defined at compile time, -it still is very difficult to find the possibilities of that depth. - -Now since mutexes can be defined by user-land applications, we don't want a DOS -type of application that nests large amounts of mutexes to create a large -PI chain, and have the code holding spin locks while looking at a large -amount of data. So to prevent this, the implementation not only implements -a maximum lock depth, but also only holds at most two different locks at a -time, as it walks the PI chain. More about this below. - - -Mutex owner and flags ---------------------- - -The mutex structure contains a pointer to the owner of the mutex. If the -mutex is not owned, this owner is set to NULL. Since all architectures -have the task structure on at least a four byte alignment (and if this is -not true, the rtmutex.c code will be broken!), this allows for the two -least significant bits to be used as flags. This part is also described -in Documentation/rt-mutex.txt, but will also be briefly described here. - -Bit 0 is used as the "Pending Owner" flag. This is described later. -Bit 1 is used as the "Has Waiters" flags. This is also described later - in more detail, but is set whenever there are waiters on a mutex. - - -cmpxchg Tricks --------------- - -Some architectures implement an atomic cmpxchg (Compare and Exchange). This -is used (when applicable) to keep the fast path of grabbing and releasing -mutexes short. - -cmpxchg is basically the following function performed atomically: - -unsigned long _cmpxchg(unsigned long *A, unsigned long *B, unsigned long *C) -{ - unsigned long T = *A; - if (*A == *B) { - *A = *C; - } - return T; -} -#define cmpxchg(a,b,c) _cmpxchg(&a,&b,&c) - -This is really nice to have, since it allows you to only update a variable -if the variable is what you expect it to be. You know if it succeeded if -the return value (the old value of A) is equal to B. - -The macro rt_mutex_cmpxchg is used to try to lock and unlock mutexes. If -the architecture does not support CMPXCHG, then this macro is simply set -to fail every time. But if CMPXCHG is supported, then this will -help out extremely to keep the fast path short. - -The use of rt_mutex_cmpxchg with the flags in the owner field help optimize -the system for architectures that support it. This will also be explained -later in this document. - - -Priority adjustments --------------------- - -The implementation of the PI code in rtmutex.c has several places that a -process must adjust its priority. With the help of the pi_list of a -process this is rather easy to know what needs to be adjusted. - -The functions implementing the task adjustments are rt_mutex_adjust_prio, -__rt_mutex_adjust_prio (same as the former, but expects the task pi_lock -to already be taken), rt_mutex_getprio, and rt_mutex_setprio. - -rt_mutex_getprio and rt_mutex_setprio are only used in __rt_mutex_adjust_prio. - -rt_mutex_getprio returns the priority that the task should have. Either the -task's own normal priority, or if a process of a higher priority is waiting on -a mutex owned by the task, then that higher priority should be returned. -Since the pi_list of a task holds an order by priority list of all the top -waiters of all the mutexes that the task owns, rt_mutex_getprio simply needs -to compare the top pi waiter to its own normal priority, and return the higher -priority back. - -(Note: if looking at the code, you will notice that the lower number of - prio is returned. This is because the prio field in the task structure - is an inverse order of the actual priority. So a "prio" of 5 is - of higher priority than a "prio" of 10.) - -__rt_mutex_adjust_prio examines the result of rt_mutex_getprio, and if the -result does not equal the task's current priority, then rt_mutex_setprio -is called to adjust the priority of the task to the new priority. -Note that rt_mutex_setprio is defined in kernel/sched/core.c to implement the -actual change in priority. - -It is interesting to note that __rt_mutex_adjust_prio can either increase -or decrease the priority of the task. In the case that a higher priority -process has just blocked on a mutex owned by the task, __rt_mutex_adjust_prio -would increase/boost the task's priority. But if a higher priority task -were for some reason to leave the mutex (timeout or signal), this same function -would decrease/unboost the priority of the task. That is because the pi_list -always contains the highest priority task that is waiting on a mutex owned -by the task, so we only need to compare the priority of that top pi waiter -to the normal priority of the given task. - - -High level overview of the PI chain walk ----------------------------------------- - -The PI chain walk is implemented by the function rt_mutex_adjust_prio_chain. - -The implementation has gone through several iterations, and has ended up -with what we believe is the best. It walks the PI chain by only grabbing -at most two locks at a time, and is very efficient. - -The rt_mutex_adjust_prio_chain can be used either to boost or lower process -priorities. - -rt_mutex_adjust_prio_chain is called with a task to be checked for PI -(de)boosting (the owner of a mutex that a process is blocking on), a flag to -check for deadlocking, the mutex that the task owns, and a pointer to a waiter -that is the process's waiter struct that is blocked on the mutex (although this -parameter may be NULL for deboosting). - -For this explanation, I will not mention deadlock detection. This explanation -will try to stay at a high level. - -When this function is called, there are no locks held. That also means -that the state of the owner and lock can change when entered into this function. - -Before this function is called, the task has already had rt_mutex_adjust_prio -performed on it. This means that the task is set to the priority that it -should be at, but the plist nodes of the task's waiter have not been updated -with the new priorities, and that this task may not be in the proper locations -in the pi_lists and wait_lists that the task is blocked on. This function -solves all that. - -A loop is entered, where task is the owner to be checked for PI changes that -was passed by parameter (for the first iteration). The pi_lock of this task is -taken to prevent any more changes to the pi_list of the task. This also -prevents new tasks from completing the blocking on a mutex that is owned by this -task. - -If the task is not blocked on a mutex then the loop is exited. We are at -the top of the PI chain. - -A check is now done to see if the original waiter (the process that is blocked -on the current mutex) is the top pi waiter of the task. That is, is this -waiter on the top of the task's pi_list. If it is not, it either means that -there is another process higher in priority that is blocked on one of the -mutexes that the task owns, or that the waiter has just woken up via a signal -or timeout and has left the PI chain. In either case, the loop is exited, since -we don't need to do any more changes to the priority of the current task, or any -task that owns a mutex that this current task is waiting on. A priority chain -walk is only needed when a new top pi waiter is made to a task. - -The next check sees if the task's waiter plist node has the priority equal to -the priority the task is set at. If they are equal, then we are done with -the loop. Remember that the function started with the priority of the -task adjusted, but the plist nodes that hold the task in other processes -pi_lists have not been adjusted. - -Next, we look at the mutex that the task is blocked on. The mutex's wait_lock -is taken. This is done by a spin_trylock, because the locking order of the -pi_lock and wait_lock goes in the opposite direction. If we fail to grab the -lock, the pi_lock is released, and we restart the loop. - -Now that we have both the pi_lock of the task as well as the wait_lock of -the mutex the task is blocked on, we update the task's waiter's plist node -that is located on the mutex's wait_list. - -Now we release the pi_lock of the task. - -Next the owner of the mutex has its pi_lock taken, so we can update the -task's entry in the owner's pi_list. If the task is the highest priority -process on the mutex's wait_list, then we remove the previous top waiter -from the owner's pi_list, and replace it with the task. - -Note: It is possible that the task was the current top waiter on the mutex, - in which case the task is not yet on the pi_list of the waiter. This - is OK, since plist_del does nothing if the plist node is not on any - list. - -If the task was not the top waiter of the mutex, but it was before we -did the priority updates, that means we are deboosting/lowering the -task. In this case, the task is removed from the pi_list of the owner, -and the new top waiter is added. - -Lastly, we unlock both the pi_lock of the task, as well as the mutex's -wait_lock, and continue the loop again. On the next iteration of the -loop, the previous owner of the mutex will be the task that will be -processed. - -Note: One might think that the owner of this mutex might have changed - since we just grab the mutex's wait_lock. And one could be right. - The important thing to remember is that the owner could not have - become the task that is being processed in the PI chain, since - we have taken that task's pi_lock at the beginning of the loop. - So as long as there is an owner of this mutex that is not the same - process as the tasked being worked on, we are OK. - - Looking closely at the code, one might be confused. The check for the - end of the PI chain is when the task isn't blocked on anything or the - task's waiter structure "task" element is NULL. This check is - protected only by the task's pi_lock. But the code to unlock the mutex - sets the task's waiter structure "task" element to NULL with only - the protection of the mutex's wait_lock, which was not taken yet. - Isn't this a race condition if the task becomes the new owner? - - The answer is No! The trick is the spin_trylock of the mutex's - wait_lock. If we fail that lock, we release the pi_lock of the - task and continue the loop, doing the end of PI chain check again. - - In the code to release the lock, the wait_lock of the mutex is held - the entire time, and it is not let go when we grab the pi_lock of the - new owner of the mutex. So if the switch of a new owner were to happen - after the check for end of the PI chain and the grabbing of the - wait_lock, the unlocking code would spin on the new owner's pi_lock - but never give up the wait_lock. So the PI chain loop is guaranteed to - fail the spin_trylock on the wait_lock, release the pi_lock, and - try again. - - If you don't quite understand the above, that's OK. You don't have to, - unless you really want to make a proof out of it ;) - - -Pending Owners and Lock stealing --------------------------------- - -One of the flags in the owner field of the mutex structure is "Pending Owner". -What this means is that an owner was chosen by the process releasing the -mutex, but that owner has yet to wake up and actually take the mutex. - -Why is this important? Why can't we just give the mutex to another process -and be done with it? - -The PI code is to help with real-time processes, and to let the highest -priority process run as long as possible with little latencies and delays. -If a high priority process owns a mutex that a lower priority process is -blocked on, when the mutex is released it would be given to the lower priority -process. What if the higher priority process wants to take that mutex again. -The high priority process would fail to take that mutex that it just gave up -and it would need to boost the lower priority process to run with full -latency of that critical section (since the low priority process just entered -it). - -There's no reason a high priority process that gives up a mutex should be -penalized if it tries to take that mutex again. If the new owner of the -mutex has not woken up yet, there's no reason that the higher priority process -could not take that mutex away. - -To solve this, we introduced Pending Ownership and Lock Stealing. When a -new process is given a mutex that it was blocked on, it is only given -pending ownership. This means that it's the new owner, unless a higher -priority process comes in and tries to grab that mutex. If a higher priority -process does come along and wants that mutex, we let the higher priority -process "steal" the mutex from the pending owner (only if it is still pending) -and continue with the mutex. - - -Taking of a mutex (The walk through) ------------------------------------- - -OK, now let's take a look at the detailed walk through of what happens when -taking a mutex. - -The first thing that is tried is the fast taking of the mutex. This is -done when we have CMPXCHG enabled (otherwise the fast taking automatically -fails). Only when the owner field of the mutex is NULL can the lock be -taken with the CMPXCHG and nothing else needs to be done. - -If there is contention on the lock, whether it is owned or pending owner -we go about the slow path (rt_mutex_slowlock). - -The slow path function is where the task's waiter structure is created on -the stack. This is because the waiter structure is only needed for the -scope of this function. The waiter structure holds the nodes to store -the task on the wait_list of the mutex, and if need be, the pi_list of -the owner. - -The wait_lock of the mutex is taken since the slow path of unlocking the -mutex also takes this lock. - -We then call try_to_take_rt_mutex. This is where the architecture that -does not implement CMPXCHG would always grab the lock (if there's no -contention). - -try_to_take_rt_mutex is used every time the task tries to grab a mutex in the -slow path. The first thing that is done here is an atomic setting of -the "Has Waiters" flag of the mutex's owner field. Yes, this could really -be false, because if the mutex has no owner, there are no waiters and -the current task also won't have any waiters. But we don't have the lock -yet, so we assume we are going to be a waiter. The reason for this is to -play nice for those architectures that do have CMPXCHG. By setting this flag -now, the owner of the mutex can't release the mutex without going into the -slow unlock path, and it would then need to grab the wait_lock, which this -code currently holds. So setting the "Has Waiters" flag forces the owner -to synchronize with this code. - -Now that we know that we can't have any races with the owner releasing the -mutex, we check to see if we can take the ownership. This is done if the -mutex doesn't have a owner, or if we can steal the mutex from a pending -owner. Let's look at the situations we have here. - - 1) Has owner that is pending - ---------------------------- - - The mutex has a owner, but it hasn't woken up and the mutex flag - "Pending Owner" is set. The first check is to see if the owner isn't the - current task. This is because this function is also used for the pending - owner to grab the mutex. When a pending owner wakes up, it checks to see - if it can take the mutex, and this is done if the owner is already set to - itself. If so, we succeed and leave the function, clearing the "Pending - Owner" bit. - - If the pending owner is not current, we check to see if the current priority is - higher than the pending owner. If not, we fail the function and return. - - There's also something special about a pending owner. That is a pending owner - is never blocked on a mutex. So there is no PI chain to worry about. It also - means that if the mutex doesn't have any waiters, there's no accounting needed - to update the pending owner's pi_list, since we only worry about processes - blocked on the current mutex. - - If there are waiters on this mutex, and we just stole the ownership, we need - to take the top waiter, remove it from the pi_list of the pending owner, and - add it to the current pi_list. Note that at this moment, the pending owner - is no longer on the list of waiters. This is fine, since the pending owner - would add itself back when it realizes that it had the ownership stolen - from itself. When the pending owner tries to grab the mutex, it will fail - in try_to_take_rt_mutex if the owner field points to another process. - - 2) No owner - ----------- - - If there is no owner (or we successfully stole the lock), we set the owner - of the mutex to current, and set the flag of "Has Waiters" if the current - mutex actually has waiters, or we clear the flag if it doesn't. See, it was - OK that we set that flag early, since now it is cleared. - - 3) Failed to grab ownership - --------------------------- - - The most interesting case is when we fail to take ownership. This means that - there exists an owner, or there's a pending owner with equal or higher - priority than the current task. - -We'll continue on the failed case. - -If the mutex has a timeout, we set up a timer to go off to break us out -of this mutex if we failed to get it after a specified amount of time. - -Now we enter a loop that will continue to try to take ownership of the mutex, or -fail from a timeout or signal. - -Once again we try to take the mutex. This will usually fail the first time -in the loop, since it had just failed to get the mutex. But the second time -in the loop, this would likely succeed, since the task would likely be -the pending owner. - -If the mutex is TASK_INTERRUPTIBLE a check for signals and timeout is done -here. - -The waiter structure has a "task" field that points to the task that is blocked -on the mutex. This field can be NULL the first time it goes through the loop -or if the task is a pending owner and had its mutex stolen. If the "task" -field is NULL then we need to set up the accounting for it. - -Task blocks on mutex --------------------- - -The accounting of a mutex and process is done with the waiter structure of -the process. The "task" field is set to the process, and the "lock" field -to the mutex. The plist nodes are initialized to the processes current -priority. - -Since the wait_lock was taken at the entry of the slow lock, we can safely -add the waiter to the wait_list. If the current process is the highest -priority process currently waiting on this mutex, then we remove the -previous top waiter process (if it exists) from the pi_list of the owner, -and add the current process to that list. Since the pi_list of the owner -has changed, we call rt_mutex_adjust_prio on the owner to see if the owner -should adjust its priority accordingly. - -If the owner is also blocked on a lock, and had its pi_list changed -(or deadlock checking is on), we unlock the wait_lock of the mutex and go ahead -and run rt_mutex_adjust_prio_chain on the owner, as described earlier. - -Now all locks are released, and if the current process is still blocked on a -mutex (waiter "task" field is not NULL), then we go to sleep (call schedule). - -Waking up in the loop ---------------------- - -The schedule can then wake up for a few reasons. - 1) we were given pending ownership of the mutex. - 2) we received a signal and was TASK_INTERRUPTIBLE - 3) we had a timeout and was TASK_INTERRUPTIBLE - -In any of these cases, we continue the loop and once again try to grab the -ownership of the mutex. If we succeed, we exit the loop, otherwise we continue -and on signal and timeout, will exit the loop, or if we had the mutex stolen -we just simply add ourselves back on the lists and go back to sleep. - -Note: For various reasons, because of timeout and signals, the steal mutex - algorithm needs to be careful. This is because the current process is - still on the wait_list. And because of dynamic changing of priorities, - especially on SCHED_OTHER tasks, the current process can be the - highest priority task on the wait_list. - -Failed to get mutex on Timeout or Signal ----------------------------------------- - -If a timeout or signal occurred, the waiter's "task" field would not be -NULL and the task needs to be taken off the wait_list of the mutex and perhaps -pi_list of the owner. If this process was a high priority process, then -the rt_mutex_adjust_prio_chain needs to be executed again on the owner, -but this time it will be lowering the priorities. - - -Unlocking the Mutex -------------------- - -The unlocking of a mutex also has a fast path for those architectures with -CMPXCHG. Since the taking of a mutex on contention always sets the -"Has Waiters" flag of the mutex's owner, we use this to know if we need to -take the slow path when unlocking the mutex. If the mutex doesn't have any -waiters, the owner field of the mutex would equal the current process and -the mutex can be unlocked by just replacing the owner field with NULL. - -If the owner field has the "Has Waiters" bit set (or CMPXCHG is not available), -the slow unlock path is taken. - -The first thing done in the slow unlock path is to take the wait_lock of the -mutex. This synchronizes the locking and unlocking of the mutex. - -A check is made to see if the mutex has waiters or not. On architectures that -do not have CMPXCHG, this is the location that the owner of the mutex will -determine if a waiter needs to be awoken or not. On architectures that -do have CMPXCHG, that check is done in the fast path, but it is still needed -in the slow path too. If a waiter of a mutex woke up because of a signal -or timeout between the time the owner failed the fast path CMPXCHG check and -the grabbing of the wait_lock, the mutex may not have any waiters, thus the -owner still needs to make this check. If there are no waiters then the mutex -owner field is set to NULL, the wait_lock is released and nothing more is -needed. - -If there are waiters, then we need to wake one up and give that waiter -pending ownership. - -On the wake up code, the pi_lock of the current owner is taken. The top -waiter of the lock is found and removed from the wait_list of the mutex -as well as the pi_list of the current owner. The task field of the new -pending owner's waiter structure is set to NULL, and the owner field of the -mutex is set to the new owner with the "Pending Owner" bit set, as well -as the "Has Waiters" bit if there still are other processes blocked on the -mutex. - -The pi_lock of the previous owner is released, and the new pending owner's -pi_lock is taken. Remember that this is the trick to prevent the race -condition in rt_mutex_adjust_prio_chain from adding itself as a waiter -on the mutex. - -We now clear the "pi_blocked_on" field of the new pending owner, and if -the mutex still has waiters pending, we add the new top waiter to the pi_list -of the pending owner. - -Finally we unlock the pi_lock of the pending owner and wake it up. - - -Contact -------- - -For updates on this document, please email Steven Rostedt - - -Credits -------- - -Author: Steven Rostedt - -Reviewers: Ingo Molnar, Thomas Gleixner, Thomas Duetsch, and Randy Dunlap - -Updates -------- - -This document was originally written for 2.6.17-rc3-mm1 diff --git a/Documentation/rt-mutex.txt b/Documentation/rt-mutex.txt deleted file mode 100644 index 243393d882ee..000000000000 --- a/Documentation/rt-mutex.txt +++ /dev/null @@ -1,79 +0,0 @@ -RT-mutex subsystem with PI support ----------------------------------- - -RT-mutexes with priority inheritance are used to support PI-futexes, -which enable pthread_mutex_t priority inheritance attributes -(PTHREAD_PRIO_INHERIT). [See Documentation/pi-futex.txt for more details -about PI-futexes.] - -This technology was developed in the -rt tree and streamlined for -pthread_mutex support. - -Basic principles: ------------------ - -RT-mutexes extend the semantics of simple mutexes by the priority -inheritance protocol. - -A low priority owner of a rt-mutex inherits the priority of a higher -priority waiter until the rt-mutex is released. If the temporarily -boosted owner blocks on a rt-mutex itself it propagates the priority -boosting to the owner of the other rt_mutex it gets blocked on. The -priority boosting is immediately removed once the rt_mutex has been -unlocked. - -This approach allows us to shorten the block of high-prio tasks on -mutexes which protect shared resources. Priority inheritance is not a -magic bullet for poorly designed applications, but it allows -well-designed applications to use userspace locks in critical parts of -an high priority thread, without losing determinism. - -The enqueueing of the waiters into the rtmutex waiter list is done in -priority order. For same priorities FIFO order is chosen. For each -rtmutex, only the top priority waiter is enqueued into the owner's -priority waiters list. This list too queues in priority order. Whenever -the top priority waiter of a task changes (for example it timed out or -got a signal), the priority of the owner task is readjusted. [The -priority enqueueing is handled by "plists", see include/linux/plist.h -for more details.] - -RT-mutexes are optimized for fastpath operations and have no internal -locking overhead when locking an uncontended mutex or unlocking a mutex -without waiters. The optimized fastpath operations require cmpxchg -support. [If that is not available then the rt-mutex internal spinlock -is used] - -The state of the rt-mutex is tracked via the owner field of the rt-mutex -structure: - -rt_mutex->owner holds the task_struct pointer of the owner. Bit 0 and 1 -are used to keep track of the "owner is pending" and "rtmutex has -waiters" state. - - owner bit1 bit0 - NULL 0 0 mutex is free (fast acquire possible) - NULL 0 1 invalid state - NULL 1 0 Transitional state* - NULL 1 1 invalid state - taskpointer 0 0 mutex is held (fast release possible) - taskpointer 0 1 task is pending owner - taskpointer 1 0 mutex is held and has waiters - taskpointer 1 1 task is pending owner and mutex has waiters - -Pending-ownership handling is a performance optimization: -pending-ownership is assigned to the first (highest priority) waiter of -the mutex, when the mutex is released. The thread is woken up and once -it starts executing it can acquire the mutex. Until the mutex is taken -by it (bit 0 is cleared) a competing higher priority thread can "steal" -the mutex which puts the woken up thread back on the waiters list. - -The pending-ownership optimization is especially important for the -uninterrupted workflow of high-prio tasks which repeatedly -takes/releases locks that have lower-prio waiters. Without this -optimization the higher-prio thread would ping-pong to the lower-prio -task [because at unlock time we always assign a new owner]. - -(*) The "mutex has waiters" bit gets set to take the lock. If the lock -doesn't already have an owner, this bit is quickly cleared if there are -no waiters. So this is a transitional state to synchronize with looking -at the owner field of the mutex and the mutex owner releasing the lock. diff --git a/Documentation/spinlocks.txt b/Documentation/spinlocks.txt deleted file mode 100644 index 97eaf5727178..000000000000 --- a/Documentation/spinlocks.txt +++ /dev/null @@ -1,167 +0,0 @@ -Lesson 1: Spin locks - -The most basic primitive for locking is spinlock. - -static DEFINE_SPINLOCK(xxx_lock); - - unsigned long flags; - - spin_lock_irqsave(&xxx_lock, flags); - ... critical section here .. - spin_unlock_irqrestore(&xxx_lock, flags); - -The above is always safe. It will disable interrupts _locally_, but the -spinlock itself will guarantee the global lock, so it will guarantee that -there is only one thread-of-control within the region(s) protected by that -lock. This works well even under UP also, so the code does _not_ need to -worry about UP vs SMP issues: the spinlocks work correctly under both. - - NOTE! Implications of spin_locks for memory are further described in: - - Documentation/memory-barriers.txt - (5) LOCK operations. - (6) UNLOCK operations. - -The above is usually pretty simple (you usually need and want only one -spinlock for most things - using more than one spinlock can make things a -lot more complex and even slower and is usually worth it only for -sequences that you _know_ need to be split up: avoid it at all cost if you -aren't sure). - -This is really the only really hard part about spinlocks: once you start -using spinlocks they tend to expand to areas you might not have noticed -before, because you have to make sure the spinlocks correctly protect the -shared data structures _everywhere_ they are used. The spinlocks are most -easily added to places that are completely independent of other code (for -example, internal driver data structures that nobody else ever touches). - - NOTE! The spin-lock is safe only when you _also_ use the lock itself - to do locking across CPU's, which implies that EVERYTHING that - touches a shared variable has to agree about the spinlock they want - to use. - ----- - -Lesson 2: reader-writer spinlocks. - -If your data accesses have a very natural pattern where you usually tend -to mostly read from the shared variables, the reader-writer locks -(rw_lock) versions of the spinlocks are sometimes useful. They allow multiple -readers to be in the same critical region at once, but if somebody wants -to change the variables it has to get an exclusive write lock. - - NOTE! reader-writer locks require more atomic memory operations than - simple spinlocks. Unless the reader critical section is long, you - are better off just using spinlocks. - -The routines look the same as above: - - rwlock_t xxx_lock = __RW_LOCK_UNLOCKED(xxx_lock); - - unsigned long flags; - - read_lock_irqsave(&xxx_lock, flags); - .. critical section that only reads the info ... - read_unlock_irqrestore(&xxx_lock, flags); - - write_lock_irqsave(&xxx_lock, flags); - .. read and write exclusive access to the info ... - write_unlock_irqrestore(&xxx_lock, flags); - -The above kind of lock may be useful for complex data structures like -linked lists, especially searching for entries without changing the list -itself. The read lock allows many concurrent readers. Anything that -_changes_ the list will have to get the write lock. - - NOTE! RCU is better for list traversal, but requires careful - attention to design detail (see Documentation/RCU/listRCU.txt). - -Also, you cannot "upgrade" a read-lock to a write-lock, so if you at _any_ -time need to do any changes (even if you don't do it every time), you have -to get the write-lock at the very beginning. - - NOTE! We are working hard to remove reader-writer spinlocks in most - cases, so please don't add a new one without consensus. (Instead, see - Documentation/RCU/rcu.txt for complete information.) - ----- - -Lesson 3: spinlocks revisited. - -The single spin-lock primitives above are by no means the only ones. They -are the most safe ones, and the ones that work under all circumstances, -but partly _because_ they are safe they are also fairly slow. They are slower -than they'd need to be, because they do have to disable interrupts -(which is just a single instruction on a x86, but it's an expensive one - -and on other architectures it can be worse). - -If you have a case where you have to protect a data structure across -several CPU's and you want to use spinlocks you can potentially use -cheaper versions of the spinlocks. IFF you know that the spinlocks are -never used in interrupt handlers, you can use the non-irq versions: - - spin_lock(&lock); - ... - spin_unlock(&lock); - -(and the equivalent read-write versions too, of course). The spinlock will -guarantee the same kind of exclusive access, and it will be much faster. -This is useful if you know that the data in question is only ever -manipulated from a "process context", ie no interrupts involved. - -The reasons you mustn't use these versions if you have interrupts that -play with the spinlock is that you can get deadlocks: - - spin_lock(&lock); - ... - <- interrupt comes in: - spin_lock(&lock); - -where an interrupt tries to lock an already locked variable. This is ok if -the other interrupt happens on another CPU, but it is _not_ ok if the -interrupt happens on the same CPU that already holds the lock, because the -lock will obviously never be released (because the interrupt is waiting -for the lock, and the lock-holder is interrupted by the interrupt and will -not continue until the interrupt has been processed). - -(This is also the reason why the irq-versions of the spinlocks only need -to disable the _local_ interrupts - it's ok to use spinlocks in interrupts -on other CPU's, because an interrupt on another CPU doesn't interrupt the -CPU that holds the lock, so the lock-holder can continue and eventually -releases the lock). - -Note that you can be clever with read-write locks and interrupts. For -example, if you know that the interrupt only ever gets a read-lock, then -you can use a non-irq version of read locks everywhere - because they -don't block on each other (and thus there is no dead-lock wrt interrupts. -But when you do the write-lock, you have to use the irq-safe version. - -For an example of being clever with rw-locks, see the "waitqueue_lock" -handling in kernel/sched/core.c - nothing ever _changes_ a wait-queue from -within an interrupt, they only read the queue in order to know whom to -wake up. So read-locks are safe (which is good: they are very common -indeed), while write-locks need to protect themselves against interrupts. - - Linus - ----- - -Reference information: - -For dynamic initialization, use spin_lock_init() or rwlock_init() as -appropriate: - - spinlock_t xxx_lock; - rwlock_t xxx_rw_lock; - - static int __init xxx_init(void) - { - spin_lock_init(&xxx_lock); - rwlock_init(&xxx_rw_lock); - ... - } - - module_init(xxx_init); - -For static initialization, use DEFINE_SPINLOCK() / DEFINE_RWLOCK() or -__SPIN_LOCK_UNLOCKED() / __RW_LOCK_UNLOCKED() as appropriate. diff --git a/Documentation/ww-mutex-design.txt b/Documentation/ww-mutex-design.txt deleted file mode 100644 index 8a112dc304c3..000000000000 --- a/Documentation/ww-mutex-design.txt +++ /dev/null @@ -1,344 +0,0 @@ -Wait/Wound Deadlock-Proof Mutex Design -====================================== - -Please read mutex-design.txt first, as it applies to wait/wound mutexes too. - -Motivation for WW-Mutexes -------------------------- - -GPU's do operations that commonly involve many buffers. Those buffers -can be shared across contexts/processes, exist in different memory -domains (for example VRAM vs system memory), and so on. And with -PRIME / dmabuf, they can even be shared across devices. So there are -a handful of situations where the driver needs to wait for buffers to -become ready. If you think about this in terms of waiting on a buffer -mutex for it to become available, this presents a problem because -there is no way to guarantee that buffers appear in a execbuf/batch in -the same order in all contexts. That is directly under control of -userspace, and a result of the sequence of GL calls that an application -makes. Which results in the potential for deadlock. The problem gets -more complex when you consider that the kernel may need to migrate the -buffer(s) into VRAM before the GPU operates on the buffer(s), which -may in turn require evicting some other buffers (and you don't want to -evict other buffers which are already queued up to the GPU), but for a -simplified understanding of the problem you can ignore this. - -The algorithm that the TTM graphics subsystem came up with for dealing with -this problem is quite simple. For each group of buffers (execbuf) that need -to be locked, the caller would be assigned a unique reservation id/ticket, -from a global counter. In case of deadlock while locking all the buffers -associated with a execbuf, the one with the lowest reservation ticket (i.e. -the oldest task) wins, and the one with the higher reservation id (i.e. the -younger task) unlocks all of the buffers that it has already locked, and then -tries again. - -In the RDBMS literature this deadlock handling approach is called wait/wound: -The older tasks waits until it can acquire the contended lock. The younger tasks -needs to back off and drop all the locks it is currently holding, i.e. the -younger task is wounded. - -Concepts --------- - -Compared to normal mutexes two additional concepts/objects show up in the lock -interface for w/w mutexes: - -Acquire context: To ensure eventual forward progress it is important the a task -trying to acquire locks doesn't grab a new reservation id, but keeps the one it -acquired when starting the lock acquisition. This ticket is stored in the -acquire context. Furthermore the acquire context keeps track of debugging state -to catch w/w mutex interface abuse. - -W/w class: In contrast to normal mutexes the lock class needs to be explicit for -w/w mutexes, since it is required to initialize the acquire context. - -Furthermore there are three different class of w/w lock acquire functions: - -* Normal lock acquisition with a context, using ww_mutex_lock. - -* Slowpath lock acquisition on the contending lock, used by the wounded task - after having dropped all already acquired locks. These functions have the - _slow postfix. - - From a simple semantics point-of-view the _slow functions are not strictly - required, since simply calling the normal ww_mutex_lock functions on the - contending lock (after having dropped all other already acquired locks) will - work correctly. After all if no other ww mutex has been acquired yet there's - no deadlock potential and hence the ww_mutex_lock call will block and not - prematurely return -EDEADLK. The advantage of the _slow functions is in - interface safety: - - ww_mutex_lock has a __must_check int return type, whereas ww_mutex_lock_slow - has a void return type. Note that since ww mutex code needs loops/retries - anyway the __must_check doesn't result in spurious warnings, even though the - very first lock operation can never fail. - - When full debugging is enabled ww_mutex_lock_slow checks that all acquired - ww mutex have been released (preventing deadlocks) and makes sure that we - block on the contending lock (preventing spinning through the -EDEADLK - slowpath until the contended lock can be acquired). - -* Functions to only acquire a single w/w mutex, which results in the exact same - semantics as a normal mutex. This is done by calling ww_mutex_lock with a NULL - context. - - Again this is not strictly required. But often you only want to acquire a - single lock in which case it's pointless to set up an acquire context (and so - better to avoid grabbing a deadlock avoidance ticket). - -Of course, all the usual variants for handling wake-ups due to signals are also -provided. - -Usage ------ - -Three different ways to acquire locks within the same w/w class. Common -definitions for methods #1 and #2: - -static DEFINE_WW_CLASS(ww_class); - -struct obj { - struct ww_mutex lock; - /* obj data */ -}; - -struct obj_entry { - struct list_head head; - struct obj *obj; -}; - -Method 1, using a list in execbuf->buffers that's not allowed to be reordered. -This is useful if a list of required objects is already tracked somewhere. -Furthermore the lock helper can use propagate the -EALREADY return code back to -the caller as a signal that an object is twice on the list. This is useful if -the list is constructed from userspace input and the ABI requires userspace to -not have duplicate entries (e.g. for a gpu commandbuffer submission ioctl). - -int lock_objs(struct list_head *list, struct ww_acquire_ctx *ctx) -{ - struct obj *res_obj = NULL; - struct obj_entry *contended_entry = NULL; - struct obj_entry *entry; - - ww_acquire_init(ctx, &ww_class); - -retry: - list_for_each_entry (entry, list, head) { - if (entry->obj == res_obj) { - res_obj = NULL; - continue; - } - ret = ww_mutex_lock(&entry->obj->lock, ctx); - if (ret < 0) { - contended_entry = entry; - goto err; - } - } - - ww_acquire_done(ctx); - return 0; - -err: - list_for_each_entry_continue_reverse (entry, list, head) - ww_mutex_unlock(&entry->obj->lock); - - if (res_obj) - ww_mutex_unlock(&res_obj->lock); - - if (ret == -EDEADLK) { - /* we lost out in a seqno race, lock and retry.. */ - ww_mutex_lock_slow(&contended_entry->obj->lock, ctx); - res_obj = contended_entry->obj; - goto retry; - } - ww_acquire_fini(ctx); - - return ret; -} - -Method 2, using a list in execbuf->buffers that can be reordered. Same semantics -of duplicate entry detection using -EALREADY as method 1 above. But the -list-reordering allows for a bit more idiomatic code. - -int lock_objs(struct list_head *list, struct ww_acquire_ctx *ctx) -{ - struct obj_entry *entry, *entry2; - - ww_acquire_init(ctx, &ww_class); - - list_for_each_entry (entry, list, head) { - ret = ww_mutex_lock(&entry->obj->lock, ctx); - if (ret < 0) { - entry2 = entry; - - list_for_each_entry_continue_reverse (entry2, list, head) - ww_mutex_unlock(&entry2->obj->lock); - - if (ret != -EDEADLK) { - ww_acquire_fini(ctx); - return ret; - } - - /* we lost out in a seqno race, lock and retry.. */ - ww_mutex_lock_slow(&entry->obj->lock, ctx); - - /* - * Move buf to head of the list, this will point - * buf->next to the first unlocked entry, - * restarting the for loop. - */ - list_del(&entry->head); - list_add(&entry->head, list); - } - } - - ww_acquire_done(ctx); - return 0; -} - -Unlocking works the same way for both methods #1 and #2: - -void unlock_objs(struct list_head *list, struct ww_acquire_ctx *ctx) -{ - struct obj_entry *entry; - - list_for_each_entry (entry, list, head) - ww_mutex_unlock(&entry->obj->lock); - - ww_acquire_fini(ctx); -} - -Method 3 is useful if the list of objects is constructed ad-hoc and not upfront, -e.g. when adjusting edges in a graph where each node has its own ww_mutex lock, -and edges can only be changed when holding the locks of all involved nodes. w/w -mutexes are a natural fit for such a case for two reasons: -- They can handle lock-acquisition in any order which allows us to start walking - a graph from a starting point and then iteratively discovering new edges and - locking down the nodes those edges connect to. -- Due to the -EALREADY return code signalling that a given objects is already - held there's no need for additional book-keeping to break cycles in the graph - or keep track off which looks are already held (when using more than one node - as a starting point). - -Note that this approach differs in two important ways from the above methods: -- Since the list of objects is dynamically constructed (and might very well be - different when retrying due to hitting the -EDEADLK wound condition) there's - no need to keep any object on a persistent list when it's not locked. We can - therefore move the list_head into the object itself. -- On the other hand the dynamic object list construction also means that the -EALREADY return - code can't be propagated. - -Note also that methods #1 and #2 and method #3 can be combined, e.g. to first lock a -list of starting nodes (passed in from userspace) using one of the above -methods. And then lock any additional objects affected by the operations using -method #3 below. The backoff/retry procedure will be a bit more involved, since -when the dynamic locking step hits -EDEADLK we also need to unlock all the -objects acquired with the fixed list. But the w/w mutex debug checks will catch -any interface misuse for these cases. - -Also, method 3 can't fail the lock acquisition step since it doesn't return --EALREADY. Of course this would be different when using the _interruptible -variants, but that's outside of the scope of these examples here. - -struct obj { - struct ww_mutex ww_mutex; - struct list_head locked_list; -}; - -static DEFINE_WW_CLASS(ww_class); - -void __unlock_objs(struct list_head *list) -{ - struct obj *entry, *temp; - - list_for_each_entry_safe (entry, temp, list, locked_list) { - /* need to do that before unlocking, since only the current lock holder is - allowed to use object */ - list_del(&entry->locked_list); - ww_mutex_unlock(entry->ww_mutex) - } -} - -void lock_objs(struct list_head *list, struct ww_acquire_ctx *ctx) -{ - struct obj *obj; - - ww_acquire_init(ctx, &ww_class); - -retry: - /* re-init loop start state */ - loop { - /* magic code which walks over a graph and decides which objects - * to lock */ - - ret = ww_mutex_lock(obj->ww_mutex, ctx); - if (ret == -EALREADY) { - /* we have that one already, get to the next object */ - continue; - } - if (ret == -EDEADLK) { - __unlock_objs(list); - - ww_mutex_lock_slow(obj, ctx); - list_add(&entry->locked_list, list); - goto retry; - } - - /* locked a new object, add it to the list */ - list_add_tail(&entry->locked_list, list); - } - - ww_acquire_done(ctx); - return 0; -} - -void unlock_objs(struct list_head *list, struct ww_acquire_ctx *ctx) -{ - __unlock_objs(list); - ww_acquire_fini(ctx); -} - -Method 4: Only lock one single objects. In that case deadlock detection and -prevention is obviously overkill, since with grabbing just one lock you can't -produce a deadlock within just one class. To simplify this case the w/w mutex -api can be used with a NULL context. - -Implementation Details ----------------------- - -Design: - ww_mutex currently encapsulates a struct mutex, this means no extra overhead for - normal mutex locks, which are far more common. As such there is only a small - increase in code size if wait/wound mutexes are not used. - - In general, not much contention is expected. The locks are typically used to - serialize access to resources for devices. The only way to make wakeups - smarter would be at the cost of adding a field to struct mutex_waiter. This - would add overhead to all cases where normal mutexes are used, and - ww_mutexes are generally less performance sensitive. - -Lockdep: - Special care has been taken to warn for as many cases of api abuse - as possible. Some common api abuses will be caught with - CONFIG_DEBUG_MUTEXES, but CONFIG_PROVE_LOCKING is recommended. - - Some of the errors which will be warned about: - - Forgetting to call ww_acquire_fini or ww_acquire_init. - - Attempting to lock more mutexes after ww_acquire_done. - - Attempting to lock the wrong mutex after -EDEADLK and - unlocking all mutexes. - - Attempting to lock the right mutex after -EDEADLK, - before unlocking all mutexes. - - - Calling ww_mutex_lock_slow before -EDEADLK was returned. - - - Unlocking mutexes with the wrong unlock function. - - Calling one of the ww_acquire_* twice on the same context. - - Using a different ww_class for the mutex than for the ww_acquire_ctx. - - Normal lockdep errors that can result in deadlocks. - - Some of the lockdep errors that can result in deadlocks: - - Calling ww_acquire_init to initialize a second ww_acquire_ctx before - having called ww_acquire_fini on the first. - - 'normal' deadlocks that can occur. - -FIXME: Update this section once we have the TASK_DEADLOCK task state flag magic -implemented. diff --git a/MAINTAINERS b/MAINTAINERS index 1acc624ecfd7..aac481fcbf5c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5523,8 +5523,8 @@ M: Ingo Molnar L: linux-kernel@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git core/locking S: Maintained -F: Documentation/lockdep*.txt -F: Documentation/lockstat.txt +F: Documentation/locking/lockdep*.txt +F: Documentation/locking/lockstat.txt F: include/linux/lockdep.h F: kernel/locking/ diff --git a/drivers/gpu/drm/drm_modeset_lock.c b/drivers/gpu/drm/drm_modeset_lock.c index 0dc57d5ecd10..3a02e5e3e9f3 100644 --- a/drivers/gpu/drm/drm_modeset_lock.c +++ b/drivers/gpu/drm/drm_modeset_lock.c @@ -35,7 +35,7 @@ * of extra utility/tracking out of our acquire-ctx. This is provided * by drm_modeset_lock / drm_modeset_acquire_ctx. * - * For basic principles of ww_mutex, see: Documentation/ww-mutex-design.txt + * For basic principles of ww_mutex, see: Documentation/locking/ww-mutex-design.txt * * The basic usage pattern is to: * diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 008388f920d7..f388481201cd 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -4,7 +4,7 @@ * Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra * - * see Documentation/lockdep-design.txt for more details. + * see Documentation/locking/lockdep-design.txt for more details. */ #ifndef __LINUX_LOCKDEP_H #define __LINUX_LOCKDEP_H diff --git a/include/linux/mutex.h b/include/linux/mutex.h index e4c29418f407..cc31498fc526 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -133,7 +133,7 @@ static inline int mutex_is_locked(struct mutex *lock) /* * See kernel/locking/mutex.c for detailed documentation of these APIs. - * Also see Documentation/mutex-design.txt. + * Also see Documentation/locking/mutex-design.txt. */ #ifdef CONFIG_DEBUG_LOCK_ALLOC extern void mutex_lock_nested(struct mutex *lock, unsigned int subclass); diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index 035d3c57fc8a..8f498cdde280 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -149,7 +149,7 @@ extern void downgrade_write(struct rw_semaphore *sem); * static then another method for expressing nested locking is * the explicit definition of lock class keys and the use of * lockdep_set_class() at lock initialization time. - * See Documentation/lockdep-design.txt for more details.) + * See Documentation/locking/lockdep-design.txt for more details.) */ extern void down_read_nested(struct rw_semaphore *sem, int subclass); extern void down_write_nested(struct rw_semaphore *sem, int subclass); diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index 0d8b6ed93874..dadbf88c22c4 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -15,7 +15,7 @@ * by Steven Rostedt, based on work by Gregory Haskins, Peter Morreale * and Sven Dietrich. * - * Also see Documentation/mutex-design.txt. + * Also see Documentation/locking/mutex-design.txt. */ #include #include diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index a0ea2a141b3b..7c98873a3077 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -8,7 +8,7 @@ * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt * Copyright (C) 2006 Esben Nielsen * - * See Documentation/rt-mutex-design.txt for details. + * See Documentation/locking/rt-mutex-design.txt for details. */ #include #include diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 901096d31c66..9b94a063e26c 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -924,7 +924,7 @@ config PROVE_LOCKING the proof of observed correctness is also maintained for an arbitrary combination of these separate locking variants. - For more details, see Documentation/lockdep-design.txt. + For more details, see Documentation/locking/lockdep-design.txt. config LOCKDEP bool @@ -945,7 +945,7 @@ config LOCK_STAT help This feature enables tracking lock contention points - For more details, see Documentation/lockstat.txt + For more details, see Documentation/locking/lockstat.txt This also enables lock events required by "perf lock", subcommand of perf. -- cgit v1.2.3 From 4999201a59ef555f9105d2bb2459ed895627f7aa Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 8 Aug 2014 12:35:36 +0200 Subject: locking/spinlocks: Always evaluate the second argument of spin_lock_nested() Evaluating a macro argument only if certain configuration options have been selected is confusing and error-prone. Hence always evaluate the second argument of spin_lock_nested(). An intentional side effect of this patch is that it avoids that the following warning is reported for netif_addr_lock_nested() when building with CONFIG_DEBUG_LOCK_ALLOC=n and with W=1: include/linux/netdevice.h: In function 'netif_addr_lock_nested': include/linux/netdevice.h:2865:6: warning: variable 'subclass' set but not used [-Wunused-but-set-variable] int subclass = SINGLE_DEPTH_NESTING; ^ Signed-off-by: Bart Van Assche Signed-off-by: Peter Zijlstra Cc: David Rientjes Cc: David S. Miller Cc: Andrew Morton Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Paul E. McKenney Link: http://lkml.kernel.org/r/53E4A7F8.1040700@acm.org Signed-off-by: Ingo Molnar --- include/linux/spinlock.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 3f2867ff0ced..262ba4ef9a8e 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -197,7 +197,13 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock) _raw_spin_lock_nest_lock(lock, &(nest_lock)->dep_map); \ } while (0) #else -# define raw_spin_lock_nested(lock, subclass) _raw_spin_lock(lock) +/* + * Always evaluate the 'subclass' argument to avoid that the compiler + * warns about set-but-not-used variables when building with + * CONFIG_DEBUG_LOCK_ALLOC=n and with W=1. + */ +# define raw_spin_lock_nested(lock, subclass) \ + _raw_spin_lock(((void)(subclass), (lock))) # define raw_spin_lock_nest_lock(lock, nest_lock) _raw_spin_lock(lock) #endif -- cgit v1.2.3 From f0bab73cb539fb803c4d419951e8d28aa4964f8f Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Wed, 6 Aug 2014 13:22:01 -0400 Subject: locking/lockdep: Restrict the use of recursive read_lock() with qrwlock Unlike the original unfair rwlock implementation, queued rwlock will grant lock according to the chronological sequence of the lock requests except when the lock requester is in the interrupt context. Consequently, recursive read_lock calls will now hang the process if there is a write_lock call somewhere in between the read_lock calls. This patch updates the lockdep implementation to look for recursive read_lock calls. A new read state (3) is used to mark those read_lock call that cannot be recursively called except in the interrupt context. The new read state does exhaust the 2 bits available in held_lock:read bit field. The addition of any new read state in the future may require a redesign of how all those bits are squeezed together in the held_lock structure. Signed-off-by: Waiman Long Signed-off-by: Peter Zijlstra Cc: Maarten Lankhorst Cc: Rik van Riel Cc: Scott J Norton Cc: Fengguang Wu Cc: Linus Torvalds Link: http://lkml.kernel.org/r/1407345722-61615-2-git-send-email-Waiman.Long@hp.com Signed-off-by: Ingo Molnar --- include/linux/lockdep.h | 10 +++++++++- kernel/locking/lockdep.c | 6 ++++++ 2 files changed, 15 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index f388481201cd..b5a84b62fb84 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -478,16 +478,24 @@ static inline void print_irqtrace_events(struct task_struct *curr) * on the per lock-class debug mode: */ +/* + * Read states in the 2-bit held_lock:read field: + * 0: Exclusive lock + * 1: Shareable lock, cannot be recursively called + * 2: Shareable lock, can be recursively called + * 3: Shareable lock, cannot be recursively called except in interrupt context + */ #define lock_acquire_exclusive(l, s, t, n, i) lock_acquire(l, s, t, 0, 1, n, i) #define lock_acquire_shared(l, s, t, n, i) lock_acquire(l, s, t, 1, 1, n, i) #define lock_acquire_shared_recursive(l, s, t, n, i) lock_acquire(l, s, t, 2, 1, n, i) +#define lock_acquire_shared_irecursive(l, s, t, n, i) lock_acquire(l, s, t, 3, 1, n, i) #define spin_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i) #define spin_acquire_nest(l, s, t, n, i) lock_acquire_exclusive(l, s, t, n, i) #define spin_release(l, n, i) lock_release(l, n, i) #define rwlock_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i) -#define rwlock_acquire_read(l, s, t, i) lock_acquire_shared_recursive(l, s, t, NULL, i) +#define rwlock_acquire_read(l, s, t, i) lock_acquire_shared_irecursive(l, s, t, NULL, i) #define rwlock_release(l, n, i) lock_release(l, n, i) #define seqcount_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 88d0d4420ad2..420ba685c4e5 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -3597,6 +3597,12 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass, raw_local_irq_save(flags); check_flags(flags); + /* + * An interrupt recursive read in interrupt context can be considered + * to be the same as a recursive read from checking perspective. + */ + if ((read == 3) && in_interrupt()) + read = 2; current->lockdep_recursion = 1; trace_lock_acquire(lock, subclass, trylock, read, check, nest_lock, ip); __lock_acquire(lock, subclass, trylock, read, check, -- cgit v1.2.3 From 2c67568903d6ae1b8cfa343c649029180239418e Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 8 Aug 2014 13:02:36 +0200 Subject: spi: Add missing kerneldoc bits These are all arguments or fields that got added without updating the kerneldoc comments. Signed-off-by: Thierry Reding Signed-off-by: Mark Brown --- drivers/spi/spi.c | 1 + include/linux/spi/spi.h | 7 +++++++ 2 files changed, 8 insertions(+) (limited to 'include/linux') diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index e6f076d5ffd5..f52f3e647ef8 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -843,6 +843,7 @@ out: /** * spi_finalize_current_transfer - report completion of a transfer + * @master: the master reporting completion * * Called by SPI drivers using the core transfer_one_message() * implementation to notify it that the current interrupt driven diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index e713543336f1..46d188a9947c 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -253,6 +253,7 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv) * the device whose settings are being modified. * @transfer: adds a message to the controller's transfer queue. * @cleanup: frees controller-specific state + * @can_dma: determine whether this master supports DMA * @queued: whether this master is providing an internal message queue * @kworker: thread struct for message pump * @kworker_task: pointer to task for message pump kworker thread @@ -262,6 +263,7 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv) * @cur_msg: the currently in-flight message * @cur_msg_prepared: spi_prepare_message was called for the currently * in-flight message + * @cur_msg_mapped: message has been mapped for DMA * @xfer_completion: used by core transfer_one_message() * @busy: message pump is busy * @running: message pump is running @@ -299,6 +301,10 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv) * @cs_gpios: Array of GPIOs to use as chip select lines; one per CS * number. Any individual value may be -ENOENT for CS lines that * are not GPIOs (driven by the SPI controller itself). + * @dma_tx: DMA transmit channel + * @dma_rx: DMA receive channel + * @dummy_rx: dummy receive buffer for full-duplex devices + * @dummy_tx: dummy transmit buffer for full-duplex devices * * Each SPI master controller can communicate with one or more @spi_device * children. These make a small bus, sharing MOSI, MISO and SCK signals @@ -632,6 +638,7 @@ struct spi_transfer { * addresses for each transfer buffer * @complete: called to report transaction completions * @context: the argument to complete() when it's called + * @frame_length: the total number of bytes in the message * @actual_length: the total number of bytes that were transferred in all * successful segments * @status: zero for success, else negative errno -- cgit v1.2.3 From 5300fdcb7b7e97d83033bc7196582705524d35ea Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Wed, 13 Aug 2014 16:38:29 +0200 Subject: rhashtable: RCU annotations for next pointers Properly annotate next pointers as access is RCU protected in the lookup path. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 4 ++-- lib/rhashtable.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 9cda293c867d..8c6048e77f29 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -21,7 +21,7 @@ #include struct rhash_head { - struct rhash_head *next; + struct rhash_head __rcu *next; }; #define INIT_HASH_HEAD(ptr) ((ptr)->next = NULL) @@ -97,7 +97,7 @@ u32 rhashtable_obj_hashfn(const struct rhashtable *ht, void *ptr); void rhashtable_insert(struct rhashtable *ht, struct rhash_head *node, gfp_t); bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *node, gfp_t); void rhashtable_remove_pprev(struct rhashtable *ht, struct rhash_head *obj, - struct rhash_head **pprev, gfp_t flags); + struct rhash_head __rcu **pprev, gfp_t flags); bool rht_grow_above_75(const struct rhashtable *ht, size_t new_size); bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size); diff --git a/lib/rhashtable.c b/lib/rhashtable.c index e6940cf16628..338dd7aa5e13 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -386,7 +386,7 @@ EXPORT_SYMBOL_GPL(rhashtable_insert); * deletion when combined with walking or lookup. */ void rhashtable_remove_pprev(struct rhashtable *ht, struct rhash_head *obj, - struct rhash_head **pprev, gfp_t flags) + struct rhash_head __rcu **pprev, gfp_t flags) { struct bucket_table *tbl = rht_dereference(ht->tbl, ht); -- cgit v1.2.3 From c91eee56dc4f8c3d9ae834bacb835596d47a709e Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Wed, 13 Aug 2014 16:38:30 +0200 Subject: rhashtable: unexport and make rht_obj() static No need to export rht_obj(), all inner to outer object translations occur internally. It was intended to be used with rht_for_each() which now primarily serves as the iterator for rhashtable_remove_pprev() to effectively flush and free the full table. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 1 - lib/rhashtable.c | 8 +------- 2 files changed, 1 insertion(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 8c6048e77f29..af967c4c7591 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -117,7 +117,6 @@ void rhashtable_destroy(const struct rhashtable *ht); #define rht_dereference_rcu(p, ht) \ rcu_dereference_check(p, lockdep_rht_mutex_is_held(ht)) -/* Internal, use rht_obj() instead */ #define rht_entry(ptr, type, member) container_of(ptr, type, member) #define rht_entry_safe(ptr, type, member) \ ({ \ diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 338dd7aa5e13..a2c78810ebc1 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -38,16 +38,10 @@ int lockdep_rht_mutex_is_held(const struct rhashtable *ht) EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held); #endif -/** - * rht_obj - cast hash head to outer object - * @ht: hash table - * @he: hashed node - */ -void *rht_obj(const struct rhashtable *ht, const struct rhash_head *he) +static void *rht_obj(const struct rhashtable *ht, const struct rhash_head *he) { return (void *) he - ht->p.head_offset; } -EXPORT_SYMBOL_GPL(rht_obj); static u32 __hashfn(const struct rhashtable *ht, const void *key, u32 len, u32 hsize) -- cgit v1.2.3 From 93f560811e80216e98f3fcec220aa0f8836b09af Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Wed, 13 Aug 2014 16:38:31 +0200 Subject: rhashtable: fix annotations for rht_for_each_entry_rcu() Call rcu_deference_raw() directly from within rht_for_each_entry_rcu() as list_for_each_entry_rcu() does. Fixes the following sparse warnings: net/netlink/af_netlink.c:2906:25: expected struct rhash_head const *__mptr net/netlink/af_netlink.c:2906:25: got struct rhash_head [noderef] * Fixes: e341694e3eb57fc ("netlink: Convert netlink_lookup() to use RCU protected hash table") Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index af967c4c7591..36826c0166c5 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -123,11 +123,6 @@ void rhashtable_destroy(const struct rhashtable *ht); typeof(ptr) __ptr = (ptr); \ __ptr ? rht_entry(__ptr, type, member) : NULL; \ }) -#define rht_entry_safe_rcu(ptr, type, member) \ -({ \ - typeof(*ptr) __rcu *__ptr = (typeof(*ptr) __rcu __force *)ptr; \ - __ptr ? container_of((typeof(ptr))rcu_dereference_raw(__ptr), type, member) : NULL; \ -}) #define rht_next_entry_safe(pos, ht, member) \ ({ \ @@ -204,9 +199,10 @@ void rhashtable_destroy(const struct rhashtable *ht); * traversal is guarded by rcu_read_lock(). */ #define rht_for_each_entry_rcu(pos, head, member) \ - for (pos = rht_entry_safe_rcu(head, typeof(*(pos)), member); \ + for (pos = rht_entry_safe(rcu_dereference_raw(head), \ + typeof(*(pos)), member); \ pos; \ - pos = rht_entry_safe_rcu((pos)->member.next, \ - typeof(*(pos)), member)) + pos = rht_entry_safe(rcu_dereference_raw((pos)->member.next), \ + typeof(*(pos)), member)) #endif /* _LINUX_RHASHTABLE_H */ -- cgit v1.2.3 From 8a58d1f1f373238cb0d6d7f8d3dd723aa164b8ac Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 15 Aug 2014 12:38:41 -0600 Subject: blk-mq: get rid of unused BLK_MQ_F_SHOULD_SORT flag We used to use this for determining whether to sort the dispatch list, but it's unused now. Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index eb726b9c5762..a1e31f274fcd 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -127,10 +127,9 @@ enum { BLK_MQ_RQ_QUEUE_ERROR = 2, /* end IO with error */ BLK_MQ_F_SHOULD_MERGE = 1 << 0, - BLK_MQ_F_SHOULD_SORT = 1 << 1, - BLK_MQ_F_TAG_SHARED = 1 << 2, - BLK_MQ_F_SG_MERGE = 1 << 3, - BLK_MQ_F_SYSFS_UP = 1 << 4, + BLK_MQ_F_TAG_SHARED = 1 << 1, + BLK_MQ_F_SG_MERGE = 1 << 2, + BLK_MQ_F_SYSFS_UP = 1 << 3, BLK_MQ_S_STOPPED = 0, BLK_MQ_S_TAG_ACTIVE = 1, -- cgit v1.2.3 From 515d9b2c03943ca904cd135e1b1d9ddd168c1b27 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 12 Aug 2014 18:22:27 +0200 Subject: ata: remove deprecated struct ahci_platform_data The last user of the deprecated struct ahci_platform_data has been cleaned up recently (SPEAr1340 got a proper PHY driver). Signed-off-by: Bartlomiej Zolnierkiewicz Acked-by: Hans de Goede Signed-off-by: Tejun Heo --- drivers/ata/ahci_platform.c | 18 +----------------- drivers/ata/libahci_platform.c | 23 ----------------------- include/linux/ahci_platform.h | 13 ------------- 3 files changed, 1 insertion(+), 53 deletions(-) (limited to 'include/linux') diff --git a/drivers/ata/ahci_platform.c b/drivers/ata/ahci_platform.c index f61ddb9146d6..06f1d59fa678 100644 --- a/drivers/ata/ahci_platform.c +++ b/drivers/ata/ahci_platform.c @@ -32,7 +32,6 @@ static const struct ata_port_info ahci_port_info = { static int ahci_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; - struct ahci_platform_data *pdata = dev_get_platdata(dev); struct ahci_host_priv *hpriv; int rc; @@ -44,29 +43,14 @@ static int ahci_probe(struct platform_device *pdev) if (rc) return rc; - /* - * Some platforms might need to prepare for mmio region access, - * which could be done in the following init call. So, the mmio - * region shouldn't be accessed before init (if provided) has - * returned successfully. - */ - if (pdata && pdata->init) { - rc = pdata->init(dev, hpriv->mmio); - if (rc) - goto disable_resources; - } - if (of_device_is_compatible(dev->of_node, "hisilicon,hisi-ahci")) hpriv->flags |= AHCI_HFLAG_NO_FBS | AHCI_HFLAG_NO_NCQ; rc = ahci_platform_init_host(pdev, hpriv, &ahci_port_info); if (rc) - goto pdata_exit; + goto disable_resources; return 0; -pdata_exit: - if (pdata && pdata->exit) - pdata->exit(dev); disable_resources: ahci_platform_disable_resources(hpriv); return rc; diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c index 5b92c290e6c6..c0510de8a4c9 100644 --- a/drivers/ata/libahci_platform.c +++ b/drivers/ata/libahci_platform.c @@ -502,13 +502,8 @@ EXPORT_SYMBOL_GPL(ahci_platform_init_host); static void ahci_host_stop(struct ata_host *host) { - struct device *dev = host->dev; - struct ahci_platform_data *pdata = dev_get_platdata(dev); struct ahci_host_priv *hpriv = host->private_data; - if (pdata && pdata->exit) - pdata->exit(dev); - ahci_platform_disable_resources(hpriv); } @@ -592,7 +587,6 @@ EXPORT_SYMBOL_GPL(ahci_platform_resume_host); */ int ahci_platform_suspend(struct device *dev) { - struct ahci_platform_data *pdata = dev_get_platdata(dev); struct ata_host *host = dev_get_drvdata(dev); struct ahci_host_priv *hpriv = host->private_data; int rc; @@ -601,19 +595,9 @@ int ahci_platform_suspend(struct device *dev) if (rc) return rc; - if (pdata && pdata->suspend) { - rc = pdata->suspend(dev); - if (rc) - goto resume_host; - } - ahci_platform_disable_resources(hpriv); return 0; - -resume_host: - ahci_platform_resume_host(dev); - return rc; } EXPORT_SYMBOL_GPL(ahci_platform_suspend); @@ -629,7 +613,6 @@ EXPORT_SYMBOL_GPL(ahci_platform_suspend); */ int ahci_platform_resume(struct device *dev) { - struct ahci_platform_data *pdata = dev_get_platdata(dev); struct ata_host *host = dev_get_drvdata(dev); struct ahci_host_priv *hpriv = host->private_data; int rc; @@ -638,12 +621,6 @@ int ahci_platform_resume(struct device *dev) if (rc) return rc; - if (pdata && pdata->resume) { - rc = pdata->resume(dev); - if (rc) - goto disable_resources; - } - rc = ahci_platform_resume_host(dev); if (rc) goto disable_resources; diff --git a/include/linux/ahci_platform.h b/include/linux/ahci_platform.h index 09a947e8bc87..642d6ae4030c 100644 --- a/include/linux/ahci_platform.h +++ b/include/linux/ahci_platform.h @@ -22,19 +22,6 @@ struct ata_port_info; struct ahci_host_priv; struct platform_device; -/* - * Note ahci_platform_data is deprecated, it is only kept around for use - * by the old da850 and spear13xx ahci code. - * New drivers should instead declare their own platform_driver struct, and - * use ahci_platform* functions in their own probe, suspend and resume methods. - */ -struct ahci_platform_data { - int (*init)(struct device *dev, void __iomem *addr); - void (*exit)(struct device *dev); - int (*suspend)(struct device *dev); - int (*resume)(struct device *dev); -}; - int ahci_platform_enable_clks(struct ahci_host_priv *hpriv); void ahci_platform_disable_clks(struct ahci_host_priv *hpriv); int ahci_platform_enable_resources(struct ahci_host_priv *hpriv); -- cgit v1.2.3 From 005547e0828ce9064afebb1e6d56a18efd80e7a3 Mon Sep 17 00:00:00 2001 From: James Ban Date: Fri, 8 Aug 2014 14:27:04 +0900 Subject: regulator: da9211: support DA9213 This is a patch for supporting DA9213. Signed-off-by: James Ban Signed-off-by: Mark Brown --- drivers/regulator/Kconfig | 9 ++-- drivers/regulator/da9211-regulator.c | 95 ++++++++++++++++++++++++++++-------- drivers/regulator/da9211-regulator.h | 7 ++- include/linux/regulator/da9211.h | 7 ++- 4 files changed, 91 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig index 2dc8289e5dba..1344aa83b438 100644 --- a/drivers/regulator/Kconfig +++ b/drivers/regulator/Kconfig @@ -199,13 +199,14 @@ config REGULATOR_DA9210 interface. config REGULATOR_DA9211 - tristate "Dialog Semiconductor DA9211/DA9212 regulator" + tristate "Dialog Semiconductor DA9211/DA9212/DA9213/DA9214 regulator" depends on I2C select REGMAP_I2C help - Say y here to support for the Dialog Semiconductor DA9211/DA9212. - The DA9211/DA9212 is a multi-phase synchronous step down - converter 12A DC-DC Buck controlled through an I2C + Say y here to support for the Dialog Semiconductor DA9211/DA9212 + /DA9213/DA9214. + The DA9211/DA9212/DA9213/DA9214 is a multi-phase synchronous + step down converter 12A or 16A DC-DC Buck controlled through an I2C interface. config REGULATOR_DBX500_PRCMU diff --git a/drivers/regulator/da9211-regulator.c b/drivers/regulator/da9211-regulator.c index 1482adafa1ad..ccc2e362d751 100644 --- a/drivers/regulator/da9211-regulator.c +++ b/drivers/regulator/da9211-regulator.c @@ -1,5 +1,5 @@ /* - * da9211-regulator.c - Regulator device driver for DA9211 + * da9211-regulator.c - Regulator device driver for DA9211/DA9213 * Copyright (C) 2014 Dialog Semiconductor Ltd. * * This library is free software; you can redistribute it and/or @@ -27,6 +27,10 @@ #include #include "da9211-regulator.h" +/* DEVICE IDs */ +#define DA9211_DEVICE_ID 0x22 +#define DA9213_DEVICE_ID 0x23 + #define DA9211_BUCK_MODE_SLEEP 1 #define DA9211_BUCK_MODE_SYNC 2 #define DA9211_BUCK_MODE_AUTO 3 @@ -42,6 +46,7 @@ struct da9211 { struct regulator_dev *rdev[DA9211_MAX_REGULATORS]; int num_regulator; int chip_irq; + int chip_id; }; static const struct regmap_range_cfg da9211_regmap_range[] = { @@ -52,14 +57,14 @@ static const struct regmap_range_cfg da9211_regmap_range[] = { .window_start = 0, .window_len = 256, .range_min = 0, - .range_max = 2*256, + .range_max = 5*128, }, }; static const struct regmap_config da9211_regmap_config = { .reg_bits = 8, .val_bits = 8, - .max_register = 2 * 256, + .max_register = 5 * 128, .ranges = da9211_regmap_range, .num_ranges = ARRAY_SIZE(da9211_regmap_range), }; @@ -69,11 +74,20 @@ static const struct regmap_config da9211_regmap_config = { #define DA9211_MAX_MV 1570 #define DA9211_STEP_MV 10 -/* Current limits for buck (uA) indices corresponds with register values */ +/* Current limits for DA9211 buck (uA) indices + * corresponds with register values + */ static const int da9211_current_limits[] = { 2000000, 2200000, 2400000, 2600000, 2800000, 3000000, 3200000, 3400000, 3600000, 3800000, 4000000, 4200000, 4400000, 4600000, 4800000, 5000000 }; +/* Current limits for DA9213 buck (uA) indices + * corresponds with register values + */ +static const int da9213_current_limits[] = { + 3000000, 3200000, 3400000, 3600000, 3800000, 4000000, 4200000, 4400000, + 4600000, 4800000, 5000000, 5200000, 5400000, 5600000, 5800000, 6000000 +}; static unsigned int da9211_buck_get_mode(struct regulator_dev *rdev) { @@ -129,12 +143,26 @@ static int da9211_set_current_limit(struct regulator_dev *rdev, int min, { int id = rdev_get_id(rdev); struct da9211 *chip = rdev_get_drvdata(rdev); - int i; + int i, max_size; + const int *current_limits; + + switch (chip->chip_id) { + case DA9211: + current_limits = da9211_current_limits; + max_size = ARRAY_SIZE(da9211_current_limits)-1; + break; + case DA9213: + current_limits = da9213_current_limits; + max_size = ARRAY_SIZE(da9213_current_limits)-1; + break; + default: + return -EINVAL; + } /* search for closest to maximum */ - for (i = ARRAY_SIZE(da9211_current_limits)-1; i >= 0; i--) { - if (min <= da9211_current_limits[i] && - max >= da9211_current_limits[i]) { + for (i = max_size; i >= 0; i--) { + if (min <= current_limits[i] && + max >= current_limits[i]) { return regmap_update_bits(chip->regmap, DA9211_REG_BUCK_ILIM, (0x0F << id*4), (i << id*4)); @@ -150,14 +178,28 @@ static int da9211_get_current_limit(struct regulator_dev *rdev) struct da9211 *chip = rdev_get_drvdata(rdev); unsigned int data; int ret; + const int *current_limits; + + switch (chip->chip_id) { + case DA9211: + current_limits = da9211_current_limits; + break; + case DA9213: + current_limits = da9213_current_limits; + break; + default: + return -EINVAL; + } ret = regmap_read(chip->regmap, DA9211_REG_BUCK_ILIM, &data); if (ret < 0) return ret; - /* select one of 16 values: 0000 (2000mA) to 1111 (5000mA) */ + /* select one of 16 values: 0000 (2000mA or 3000mA) + * to 1111 (5000mA or 6000mA). + */ data = (data >> id*4) & 0x0F; - return da9211_current_limits[data]; + return current_limits[data]; } static struct regulator_ops da9211_buck_ops = { @@ -264,10 +306,7 @@ static int da9211_regulator_init(struct da9211 *chip) } for (i = 0; i < chip->num_regulator; i++) { - if (chip->pdata) - config.init_data = - &(chip->pdata->init_data[i]); - + config.init_data = &(chip->pdata->init_data[i]); config.dev = chip->dev; config.driver_data = chip; config.regmap = chip->regmap; @@ -301,6 +340,7 @@ static int da9211_i2c_probe(struct i2c_client *i2c, { struct da9211 *chip; int error, ret; + unsigned int data; chip = devm_kzalloc(&i2c->dev, sizeof(struct da9211), GFP_KERNEL); @@ -308,7 +348,7 @@ static int da9211_i2c_probe(struct i2c_client *i2c, chip->regmap = devm_regmap_init_i2c(i2c, &da9211_regmap_config); if (IS_ERR(chip->regmap)) { error = PTR_ERR(chip->regmap); - dev_err(&i2c->dev, "Failed to allocate register map: %d\n", + dev_err(chip->dev, "Failed to allocate register map: %d\n", error); return error; } @@ -316,8 +356,22 @@ static int da9211_i2c_probe(struct i2c_client *i2c, i2c_set_clientdata(i2c, chip); chip->pdata = i2c->dev.platform_data; - if (!chip->pdata) { - dev_err(&i2c->dev, "No platform init data supplied\n"); + + ret = regmap_read(chip->regmap, DA9211_REG_DEVICE_ID, &data); + if (ret < 0) { + dev_err(chip->dev, "Failed to read DEVICE_ID reg: %d\n", ret); + return ret; + } + + switch (data) { + case DA9211_DEVICE_ID: + chip->chip_id = DA9211; + break; + case DA9213_DEVICE_ID: + chip->chip_id = DA9213; + break; + default: + dev_err(chip->dev, "Unsupported device id = 0x%x.\n", data); return -ENODEV; } @@ -340,13 +394,14 @@ static int da9211_i2c_probe(struct i2c_client *i2c, ret = da9211_regulator_init(chip); if (ret < 0) - dev_err(&i2c->dev, "Failed to initialize regulator: %d\n", ret); + dev_err(chip->dev, "Failed to initialize regulator: %d\n", ret); return ret; } static const struct i2c_device_id da9211_i2c_id[] = { - {"da9211", 0}, + {"da9211", DA9211}, + {"da9213", DA9213}, {}, }; @@ -364,5 +419,5 @@ static struct i2c_driver da9211_regulator_driver = { module_i2c_driver(da9211_regulator_driver); MODULE_AUTHOR("James Ban "); -MODULE_DESCRIPTION("Regulator device driver for Dialog DA9211"); +MODULE_DESCRIPTION("Regulator device driver for Dialog DA9211/DA9213"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/regulator/da9211-regulator.h b/drivers/regulator/da9211-regulator.h index 88b1769e8058..93fa9df2721c 100644 --- a/drivers/regulator/da9211-regulator.h +++ b/drivers/regulator/da9211-regulator.h @@ -1,5 +1,5 @@ /* - * da9211-regulator.h - Regulator definitions for DA9211 + * da9211-regulator.h - Regulator definitions for DA9211/DA9213 * Copyright (C) 2014 Dialog Semiconductor Ltd. * * This library is free software; you can redistribute it and/or @@ -53,12 +53,15 @@ /* BUCK Phase Selection*/ #define DA9211_REG_CONFIG_E 0x147 +/* Device ID */ +#define DA9211_REG_DEVICE_ID 0x201 + /* * Registers bits */ /* DA9211_REG_PAGE_CON (addr=0x00) */ #define DA9211_REG_PAGE_SHIFT 1 -#define DA9211_REG_PAGE_MASK 0x02 +#define DA9211_REG_PAGE_MASK 0x06 /* On I2C registers 0x00 - 0xFF */ #define DA9211_REG_PAGE0 0 /* On I2C registers 0x100 - 0x1FF */ diff --git a/include/linux/regulator/da9211.h b/include/linux/regulator/da9211.h index 0981ce0e72cc..658c3c33f4c0 100644 --- a/include/linux/regulator/da9211.h +++ b/include/linux/regulator/da9211.h @@ -1,5 +1,5 @@ /* - * da9211.h - Regulator device driver for DA9211 + * da9211.h - Regulator device driver for DA9211/DA9213 * Copyright (C) 2014 Dialog Semiconductor Ltd. * * This library is free software; you can redistribute it and/or @@ -20,6 +20,11 @@ #define DA9211_MAX_REGULATORS 2 +enum da9211_chip_id { + DA9211, + DA9213, +}; + struct da9211_pdata { /* * Number of buck -- cgit v1.2.3 From 0e4f417857083f399769491f6e7773d111debd0f Mon Sep 17 00:00:00 2001 From: Amit Daniel Kachhap Date: Tue, 15 Jul 2014 16:32:51 +0530 Subject: regulator: s2mpxxx: Move regulator min/step voltages in common place This is a cleanup patch and moves min/step voltages in a common samsung header file so that they can be used by other s2mpxxx PMIC drivers. Only few required macros are added currently and others can be added if needed. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Amit Daniel Kachhap Acked-by: Lee Jones Signed-off-by: Mark Brown --- drivers/regulator/s2mpa01.c | 32 ++++++++++++------------ drivers/regulator/s2mps11.c | 50 ++++++++++++++++++------------------- include/linux/mfd/samsung/core.h | 21 ++++++++++++++++ include/linux/mfd/samsung/s2mpa01.h | 12 --------- include/linux/mfd/samsung/s2mps11.h | 9 ------- include/linux/mfd/samsung/s2mps14.h | 10 -------- 6 files changed, 62 insertions(+), 72 deletions(-) (limited to 'include/linux') diff --git a/drivers/regulator/s2mpa01.c b/drivers/regulator/s2mpa01.c index ee83b4876420..962c5f192f7c 100644 --- a/drivers/regulator/s2mpa01.c +++ b/drivers/regulator/s2mpa01.c @@ -241,8 +241,8 @@ static struct regulator_ops s2mpa01_buck_ops = { .ops = &s2mpa01_ldo_ops, \ .type = REGULATOR_VOLTAGE, \ .owner = THIS_MODULE, \ - .min_uV = S2MPA01_LDO_MIN, \ - .uV_step = S2MPA01_LDO_STEP1, \ + .min_uV = MIN_800_MV, \ + .uV_step = STEP_50_MV, \ .n_voltages = S2MPA01_LDO_N_VOLTAGES, \ .vsel_reg = S2MPA01_REG_L1CTRL + num - 1, \ .vsel_mask = S2MPA01_LDO_VSEL_MASK, \ @@ -255,8 +255,8 @@ static struct regulator_ops s2mpa01_buck_ops = { .ops = &s2mpa01_ldo_ops, \ .type = REGULATOR_VOLTAGE, \ .owner = THIS_MODULE, \ - .min_uV = S2MPA01_LDO_MIN, \ - .uV_step = S2MPA01_LDO_STEP2, \ + .min_uV = MIN_800_MV, \ + .uV_step = STEP_25_MV, \ .n_voltages = S2MPA01_LDO_N_VOLTAGES, \ .vsel_reg = S2MPA01_REG_L1CTRL + num - 1, \ .vsel_mask = S2MPA01_LDO_VSEL_MASK, \ @@ -270,8 +270,8 @@ static struct regulator_ops s2mpa01_buck_ops = { .ops = &s2mpa01_buck_ops, \ .type = REGULATOR_VOLTAGE, \ .owner = THIS_MODULE, \ - .min_uV = S2MPA01_BUCK_MIN1, \ - .uV_step = S2MPA01_BUCK_STEP1, \ + .min_uV = MIN_600_MV, \ + .uV_step = STEP_6_25_MV, \ .n_voltages = S2MPA01_BUCK_N_VOLTAGES, \ .ramp_delay = S2MPA01_RAMP_DELAY, \ .vsel_reg = S2MPA01_REG_B1CTRL2 + (num - 1) * 2, \ @@ -286,8 +286,8 @@ static struct regulator_ops s2mpa01_buck_ops = { .ops = &s2mpa01_buck_ops, \ .type = REGULATOR_VOLTAGE, \ .owner = THIS_MODULE, \ - .min_uV = S2MPA01_BUCK_MIN2, \ - .uV_step = S2MPA01_BUCK_STEP1, \ + .min_uV = MIN_800_MV, \ + .uV_step = STEP_6_25_MV, \ .n_voltages = S2MPA01_BUCK_N_VOLTAGES, \ .ramp_delay = S2MPA01_RAMP_DELAY, \ .vsel_reg = S2MPA01_REG_B5CTRL2, \ @@ -302,8 +302,8 @@ static struct regulator_ops s2mpa01_buck_ops = { .ops = &s2mpa01_buck_ops, \ .type = REGULATOR_VOLTAGE, \ .owner = THIS_MODULE, \ - .min_uV = S2MPA01_BUCK_MIN1, \ - .uV_step = S2MPA01_BUCK_STEP1, \ + .min_uV = MIN_600_MV, \ + .uV_step = STEP_6_25_MV, \ .n_voltages = S2MPA01_BUCK_N_VOLTAGES, \ .ramp_delay = S2MPA01_RAMP_DELAY, \ .vsel_reg = S2MPA01_REG_B6CTRL2 + (num - 6) * 2, \ @@ -318,8 +318,8 @@ static struct regulator_ops s2mpa01_buck_ops = { .ops = &s2mpa01_buck_ops, \ .type = REGULATOR_VOLTAGE, \ .owner = THIS_MODULE, \ - .min_uV = S2MPA01_BUCK_MIN2, \ - .uV_step = S2MPA01_BUCK_STEP2, \ + .min_uV = MIN_800_MV, \ + .uV_step = STEP_12_5_MV, \ .n_voltages = S2MPA01_BUCK_N_VOLTAGES, \ .ramp_delay = S2MPA01_RAMP_DELAY, \ .vsel_reg = S2MPA01_REG_B8CTRL2, \ @@ -334,8 +334,8 @@ static struct regulator_ops s2mpa01_buck_ops = { .ops = &s2mpa01_buck_ops, \ .type = REGULATOR_VOLTAGE, \ .owner = THIS_MODULE, \ - .min_uV = S2MPA01_BUCK_MIN4, \ - .uV_step = S2MPA01_BUCK_STEP2, \ + .min_uV = MIN_1500_MV, \ + .uV_step = STEP_12_5_MV, \ .n_voltages = S2MPA01_BUCK_N_VOLTAGES, \ .ramp_delay = S2MPA01_RAMP_DELAY, \ .vsel_reg = S2MPA01_REG_B9CTRL2, \ @@ -350,8 +350,8 @@ static struct regulator_ops s2mpa01_buck_ops = { .ops = &s2mpa01_buck_ops, \ .type = REGULATOR_VOLTAGE, \ .owner = THIS_MODULE, \ - .min_uV = S2MPA01_BUCK_MIN3, \ - .uV_step = S2MPA01_BUCK_STEP2, \ + .min_uV = MIN_1000_MV, \ + .uV_step = STEP_12_5_MV, \ .n_voltages = S2MPA01_BUCK_N_VOLTAGES, \ .ramp_delay = S2MPA01_RAMP_DELAY, \ .vsel_reg = S2MPA01_REG_B10CTRL2, \ diff --git a/drivers/regulator/s2mps11.c b/drivers/regulator/s2mps11.c index b16c53a8272f..3dede776a837 100644 --- a/drivers/regulator/s2mps11.c +++ b/drivers/regulator/s2mps11.c @@ -255,14 +255,14 @@ static struct regulator_ops s2mps11_buck_ops = { .set_ramp_delay = s2mps11_set_ramp_delay, }; -#define regulator_desc_s2mps11_ldo1(num) { \ +#define regulator_desc_s2mps11_ldo1(num) { \ .name = "LDO"#num, \ .id = S2MPS11_LDO##num, \ .ops = &s2mps11_ldo_ops, \ .type = REGULATOR_VOLTAGE, \ .owner = THIS_MODULE, \ - .min_uV = S2MPS11_LDO_MIN, \ - .uV_step = S2MPS11_LDO_STEP1, \ + .min_uV = MIN_800_MV, \ + .uV_step = STEP_50_MV, \ .n_voltages = S2MPS11_LDO_N_VOLTAGES, \ .vsel_reg = S2MPS11_REG_L1CTRL + num - 1, \ .vsel_mask = S2MPS11_LDO_VSEL_MASK, \ @@ -275,8 +275,8 @@ static struct regulator_ops s2mps11_buck_ops = { .ops = &s2mps11_ldo_ops, \ .type = REGULATOR_VOLTAGE, \ .owner = THIS_MODULE, \ - .min_uV = S2MPS11_LDO_MIN, \ - .uV_step = S2MPS11_LDO_STEP2, \ + .min_uV = MIN_800_MV, \ + .uV_step = STEP_25_MV, \ .n_voltages = S2MPS11_LDO_N_VOLTAGES, \ .vsel_reg = S2MPS11_REG_L1CTRL + num - 1, \ .vsel_mask = S2MPS11_LDO_VSEL_MASK, \ @@ -290,8 +290,8 @@ static struct regulator_ops s2mps11_buck_ops = { .ops = &s2mps11_buck_ops, \ .type = REGULATOR_VOLTAGE, \ .owner = THIS_MODULE, \ - .min_uV = S2MPS11_BUCK_MIN1, \ - .uV_step = S2MPS11_BUCK_STEP1, \ + .min_uV = MIN_600_MV, \ + .uV_step = STEP_6_25_MV, \ .n_voltages = S2MPS11_BUCK_N_VOLTAGES, \ .ramp_delay = S2MPS11_RAMP_DELAY, \ .vsel_reg = S2MPS11_REG_B1CTRL2 + (num - 1) * 2, \ @@ -306,8 +306,8 @@ static struct regulator_ops s2mps11_buck_ops = { .ops = &s2mps11_buck_ops, \ .type = REGULATOR_VOLTAGE, \ .owner = THIS_MODULE, \ - .min_uV = S2MPS11_BUCK_MIN1, \ - .uV_step = S2MPS11_BUCK_STEP1, \ + .min_uV = MIN_600_MV, \ + .uV_step = STEP_6_25_MV, \ .n_voltages = S2MPS11_BUCK_N_VOLTAGES, \ .ramp_delay = S2MPS11_RAMP_DELAY, \ .vsel_reg = S2MPS11_REG_B5CTRL2, \ @@ -322,8 +322,8 @@ static struct regulator_ops s2mps11_buck_ops = { .ops = &s2mps11_buck_ops, \ .type = REGULATOR_VOLTAGE, \ .owner = THIS_MODULE, \ - .min_uV = S2MPS11_BUCK_MIN1, \ - .uV_step = S2MPS11_BUCK_STEP1, \ + .min_uV = MIN_600_MV, \ + .uV_step = STEP_6_25_MV, \ .n_voltages = S2MPS11_BUCK_N_VOLTAGES, \ .ramp_delay = S2MPS11_RAMP_DELAY, \ .vsel_reg = S2MPS11_REG_B6CTRL2 + (num - 6) * 2, \ @@ -338,8 +338,8 @@ static struct regulator_ops s2mps11_buck_ops = { .ops = &s2mps11_buck_ops, \ .type = REGULATOR_VOLTAGE, \ .owner = THIS_MODULE, \ - .min_uV = S2MPS11_BUCK_MIN3, \ - .uV_step = S2MPS11_BUCK_STEP3, \ + .min_uV = MIN_3000_MV, \ + .uV_step = STEP_25_MV, \ .n_voltages = S2MPS11_BUCK_N_VOLTAGES, \ .ramp_delay = S2MPS11_RAMP_DELAY, \ .vsel_reg = S2MPS11_REG_B9CTRL2, \ @@ -354,8 +354,8 @@ static struct regulator_ops s2mps11_buck_ops = { .ops = &s2mps11_buck_ops, \ .type = REGULATOR_VOLTAGE, \ .owner = THIS_MODULE, \ - .min_uV = S2MPS11_BUCK_MIN2, \ - .uV_step = S2MPS11_BUCK_STEP2, \ + .min_uV = MIN_750_MV, \ + .uV_step = STEP_12_5_MV, \ .n_voltages = S2MPS11_BUCK_N_VOLTAGES, \ .ramp_delay = S2MPS11_RAMP_DELAY, \ .vsel_reg = S2MPS11_REG_B10CTRL2, \ @@ -516,8 +516,8 @@ static struct regulator_ops s2mps14_reg_ops = { .ops = &s2mps14_reg_ops, \ .type = REGULATOR_VOLTAGE, \ .owner = THIS_MODULE, \ - .min_uV = S2MPS14_LDO_MIN_800MV, \ - .uV_step = S2MPS14_LDO_STEP_25MV, \ + .min_uV = MIN_800_MV, \ + .uV_step = STEP_25_MV, \ .n_voltages = S2MPS14_LDO_N_VOLTAGES, \ .vsel_reg = S2MPS14_REG_L1CTRL + num - 1, \ .vsel_mask = S2MPS14_LDO_VSEL_MASK, \ @@ -530,8 +530,8 @@ static struct regulator_ops s2mps14_reg_ops = { .ops = &s2mps14_reg_ops, \ .type = REGULATOR_VOLTAGE, \ .owner = THIS_MODULE, \ - .min_uV = S2MPS14_LDO_MIN_1800MV, \ - .uV_step = S2MPS14_LDO_STEP_25MV, \ + .min_uV = MIN_1800_MV, \ + .uV_step = STEP_25_MV, \ .n_voltages = S2MPS14_LDO_N_VOLTAGES, \ .vsel_reg = S2MPS14_REG_L1CTRL + num - 1, \ .vsel_mask = S2MPS14_LDO_VSEL_MASK, \ @@ -544,8 +544,8 @@ static struct regulator_ops s2mps14_reg_ops = { .ops = &s2mps14_reg_ops, \ .type = REGULATOR_VOLTAGE, \ .owner = THIS_MODULE, \ - .min_uV = S2MPS14_LDO_MIN_800MV, \ - .uV_step = S2MPS14_LDO_STEP_12_5MV, \ + .min_uV = MIN_800_MV, \ + .uV_step = STEP_12_5_MV, \ .n_voltages = S2MPS14_LDO_N_VOLTAGES, \ .vsel_reg = S2MPS14_REG_L1CTRL + num - 1, \ .vsel_mask = S2MPS14_LDO_VSEL_MASK, \ @@ -558,8 +558,8 @@ static struct regulator_ops s2mps14_reg_ops = { .ops = &s2mps14_reg_ops, \ .type = REGULATOR_VOLTAGE, \ .owner = THIS_MODULE, \ - .min_uV = S2MPS14_BUCK1235_MIN_600MV, \ - .uV_step = S2MPS14_BUCK1235_STEP_6_25MV, \ + .min_uV = MIN_600_MV, \ + .uV_step = STEP_6_25_MV, \ .n_voltages = S2MPS14_BUCK_N_VOLTAGES, \ .linear_min_sel = S2MPS14_BUCK1235_START_SEL, \ .ramp_delay = S2MPS14_BUCK_RAMP_DELAY, \ @@ -574,8 +574,8 @@ static struct regulator_ops s2mps14_reg_ops = { .ops = &s2mps14_reg_ops, \ .type = REGULATOR_VOLTAGE, \ .owner = THIS_MODULE, \ - .min_uV = S2MPS14_BUCK4_MIN_1400MV, \ - .uV_step = S2MPS14_BUCK4_STEP_12_5MV, \ + .min_uV = MIN_1400_MV, \ + .uV_step = STEP_12_5_MV, \ .n_voltages = S2MPS14_BUCK_N_VOLTAGES, \ .linear_min_sel = S2MPS14_BUCK4_START_SEL, \ .ramp_delay = S2MPS14_BUCK_RAMP_DELAY, \ diff --git a/include/linux/mfd/samsung/core.h b/include/linux/mfd/samsung/core.h index b5f73de81aad..1825edacbda7 100644 --- a/include/linux/mfd/samsung/core.h +++ b/include/linux/mfd/samsung/core.h @@ -14,6 +14,27 @@ #ifndef __LINUX_MFD_SEC_CORE_H #define __LINUX_MFD_SEC_CORE_H +/* Macros to represent minimum voltages for LDO/BUCK */ +#define MIN_3000_MV 3000000 +#define MIN_2500_MV 2500000 +#define MIN_2000_MV 2000000 +#define MIN_1800_MV 1800000 +#define MIN_1500_MV 1500000 +#define MIN_1400_MV 1400000 +#define MIN_1000_MV 1000000 + +#define MIN_900_MV 900000 +#define MIN_850_MV 850000 +#define MIN_800_MV 800000 +#define MIN_750_MV 750000 +#define MIN_600_MV 600000 + +/* Macros to represent steps for LDO/BUCK */ +#define STEP_50_MV 50000 +#define STEP_25_MV 25000 +#define STEP_12_5_MV 12500 +#define STEP_6_25_MV 6250 + enum sec_device_type { S5M8751X, S5M8763X, diff --git a/include/linux/mfd/samsung/s2mpa01.h b/include/linux/mfd/samsung/s2mpa01.h index fbc63bc0d6a2..2766108bca2f 100644 --- a/include/linux/mfd/samsung/s2mpa01.h +++ b/include/linux/mfd/samsung/s2mpa01.h @@ -155,18 +155,6 @@ enum s2mpa01_regulators { S2MPA01_REGULATOR_MAX, }; -#define S2MPA01_BUCK_MIN1 600000 -#define S2MPA01_BUCK_MIN2 800000 -#define S2MPA01_BUCK_MIN3 1000000 -#define S2MPA01_BUCK_MIN4 1500000 -#define S2MPA01_LDO_MIN 800000 - -#define S2MPA01_BUCK_STEP1 6250 -#define S2MPA01_BUCK_STEP2 12500 - -#define S2MPA01_LDO_STEP1 50000 -#define S2MPA01_LDO_STEP2 25000 - #define S2MPA01_LDO_VSEL_MASK 0x3F #define S2MPA01_BUCK_VSEL_MASK 0xFF #define S2MPA01_ENABLE_MASK (0x03 << S2MPA01_ENABLE_SHIFT) diff --git a/include/linux/mfd/samsung/s2mps11.h b/include/linux/mfd/samsung/s2mps11.h index b3ddf98dec37..7981a9d77d3f 100644 --- a/include/linux/mfd/samsung/s2mps11.h +++ b/include/linux/mfd/samsung/s2mps11.h @@ -171,15 +171,6 @@ enum s2mps11_regulators { S2MPS11_REGULATOR_MAX, }; -#define S2MPS11_BUCK_MIN1 600000 -#define S2MPS11_BUCK_MIN2 750000 -#define S2MPS11_BUCK_MIN3 3000000 -#define S2MPS11_LDO_MIN 800000 -#define S2MPS11_BUCK_STEP1 6250 -#define S2MPS11_BUCK_STEP2 12500 -#define S2MPS11_BUCK_STEP3 25000 -#define S2MPS11_LDO_STEP1 50000 -#define S2MPS11_LDO_STEP2 25000 #define S2MPS11_LDO_VSEL_MASK 0x3F #define S2MPS11_BUCK_VSEL_MASK 0xFF #define S2MPS11_ENABLE_MASK (0x03 << S2MPS11_ENABLE_SHIFT) diff --git a/include/linux/mfd/samsung/s2mps14.h b/include/linux/mfd/samsung/s2mps14.h index 900cd7a04314..c92f4782afb5 100644 --- a/include/linux/mfd/samsung/s2mps14.h +++ b/include/linux/mfd/samsung/s2mps14.h @@ -123,10 +123,6 @@ enum s2mps14_regulators { }; /* Regulator constraints for BUCKx */ -#define S2MPS14_BUCK1235_MIN_600MV 600000 -#define S2MPS14_BUCK4_MIN_1400MV 1400000 -#define S2MPS14_BUCK1235_STEP_6_25MV 6250 -#define S2MPS14_BUCK4_STEP_12_5MV 12500 #define S2MPS14_BUCK1235_START_SEL 0x20 #define S2MPS14_BUCK4_START_SEL 0x40 /* @@ -136,12 +132,6 @@ enum s2mps14_regulators { */ #define S2MPS14_BUCK_RAMP_DELAY 12500 -/* Regulator constraints for different types of LDOx */ -#define S2MPS14_LDO_MIN_800MV 800000 -#define S2MPS14_LDO_MIN_1800MV 1800000 -#define S2MPS14_LDO_STEP_12_5MV 12500 -#define S2MPS14_LDO_STEP_25MV 25000 - #define S2MPS14_LDO_VSEL_MASK 0x3F #define S2MPS14_BUCK_VSEL_MASK 0xFF #define S2MPS14_ENABLE_MASK (0x03 << S2MPS14_ENABLE_SHIFT) -- cgit v1.2.3 From 272e2315fac3bfca0edfa3252b8a643c425602af Mon Sep 17 00:00:00 2001 From: Guodong Xu Date: Wed, 13 Aug 2014 19:33:38 +0800 Subject: regulator: core: add const qualifier to ops in struct regulator_desc struct regulator_ops *ops is a member in struct regulator_desc, which gets its value from individual regulator driver upon regulator_register() and is used by regulator core APIs. It's not allowed for regulator core to modify any of these callbacks in *ops. Add 'const' qualifier to enforce that. Signed-off-by: Guodong Xu Signed-off-by: Mark Brown --- drivers/regulator/core.c | 24 ++++++++++++------------ include/linux/regulator/driver.h | 2 +- 2 files changed, 13 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index a3c3785901f5..052e7f1f011d 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -839,7 +839,7 @@ static void print_constraints(struct regulator_dev *rdev) static int machine_constraints_voltage(struct regulator_dev *rdev, struct regulation_constraints *constraints) { - struct regulator_ops *ops = rdev->desc->ops; + const struct regulator_ops *ops = rdev->desc->ops; int ret; /* do we need to apply the constraint voltage */ @@ -938,7 +938,7 @@ static int machine_constraints_voltage(struct regulator_dev *rdev, static int machine_constraints_current(struct regulator_dev *rdev, struct regulation_constraints *constraints) { - struct regulator_ops *ops = rdev->desc->ops; + const struct regulator_ops *ops = rdev->desc->ops; int ret; if (!constraints->min_uA && !constraints->max_uA) @@ -982,7 +982,7 @@ static int set_machine_constraints(struct regulator_dev *rdev, const struct regulation_constraints *constraints) { int ret = 0; - struct regulator_ops *ops = rdev->desc->ops; + const struct regulator_ops *ops = rdev->desc->ops; if (constraints) rdev->constraints = kmemdup(constraints, sizeof(*constraints), @@ -2208,9 +2208,9 @@ EXPORT_SYMBOL_GPL(regulator_count_voltages); */ int regulator_list_voltage(struct regulator *regulator, unsigned selector) { - struct regulator_dev *rdev = regulator->rdev; - struct regulator_ops *ops = rdev->desc->ops; - int ret; + struct regulator_dev *rdev = regulator->rdev; + const struct regulator_ops *ops = rdev->desc->ops; + int ret; if (rdev->desc->fixed_uV && rdev->desc->n_voltages == 1 && !selector) return rdev->desc->fixed_uV; @@ -2572,8 +2572,8 @@ EXPORT_SYMBOL_GPL(regulator_set_voltage); int regulator_set_voltage_time(struct regulator *regulator, int old_uV, int new_uV) { - struct regulator_dev *rdev = regulator->rdev; - struct regulator_ops *ops = rdev->desc->ops; + struct regulator_dev *rdev = regulator->rdev; + const struct regulator_ops *ops = rdev->desc->ops; int old_sel = -1; int new_sel = -1; int voltage; @@ -3336,9 +3336,9 @@ EXPORT_SYMBOL_GPL(regulator_mode_to_status); */ static int add_regulator_attributes(struct regulator_dev *rdev) { - struct device *dev = &rdev->dev; - struct regulator_ops *ops = rdev->desc->ops; - int status = 0; + struct device *dev = &rdev->dev; + const struct regulator_ops *ops = rdev->desc->ops; + int status = 0; /* some attributes need specific methods to be displayed */ if ((ops->get_voltage && ops->get_voltage(rdev) >= 0) || @@ -3905,7 +3905,7 @@ core_initcall(regulator_init); static int __init regulator_init_complete(void) { struct regulator_dev *rdev; - struct regulator_ops *ops; + const struct regulator_ops *ops; struct regulation_constraints *c; int enabled, ret; diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index bbe03a1924c0..4b628139a9cb 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -245,7 +245,7 @@ struct regulator_desc { int id; bool continuous_voltage_range; unsigned n_voltages; - struct regulator_ops *ops; + const struct regulator_ops *ops; int irq; enum regulator_type type; struct module *owner; -- cgit v1.2.3 From 871f565055ed232e5751da18a331b73e8254adaf Mon Sep 17 00:00:00 2001 From: Guodong Xu Date: Wed, 13 Aug 2014 19:33:40 +0800 Subject: regulator: core: add guard delay between calling regulator_disable and _enable Some regulator require a minimum delay between its disable and next enable. This is to avoid damages when out-of-range frequent disable/enable of a single regulator can bring to the regulator chip. Add @off_on_delay to struct regulator_desc. Device drivers' can use this field to set this guard time. Add @last_off_jiffy to struct regulator_dev. When @off_on_delay is set by driver, regulator core can store its last off (disable) time into this field. Signed-off-by: Guodong Xu Signed-off-by: Mark Brown --- drivers/regulator/core.c | 31 +++++++++++++++++++++++++++++++ include/linux/regulator/driver.h | 6 ++++++ 2 files changed, 37 insertions(+) (limited to 'include/linux') diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index dc0e9813b62d..1e976b6320a2 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -1813,6 +1813,31 @@ static int _regulator_do_enable(struct regulator_dev *rdev) trace_regulator_enable(rdev_get_name(rdev)); + if (rdev->desc->off_on_delay) { + /* if needed, keep a distance of off_on_delay from last time + * this regulator was disabled. + */ + unsigned long start_jiffy = jiffies; + unsigned long intended, max_delay, remaining; + + max_delay = usecs_to_jiffies(rdev->desc->off_on_delay); + intended = rdev->last_off_jiffy + max_delay; + + if (time_before(start_jiffy, intended)) { + /* calc remaining jiffies to deal with one-time + * timer wrapping. + * in case of multiple timer wrapping, either it can be + * detected by out-of-range remaining, or it cannot be + * detected and we gets a panelty of + * _regulator_enable_delay(). + */ + remaining = intended - start_jiffy; + if (remaining <= max_delay) + _regulator_enable_delay( + jiffies_to_usecs(remaining)); + } + } + if (rdev->ena_pin) { ret = regulator_ena_gpio_ctrl(rdev, true); if (ret < 0) @@ -1925,6 +1950,12 @@ static int _regulator_do_disable(struct regulator_dev *rdev) return ret; } + /* cares about last_off_jiffy only if off_on_delay is required by + * device. + */ + if (rdev->desc->off_on_delay) + rdev->last_off_jiffy = jiffies; + trace_regulator_disable_complete(rdev_get_name(rdev)); return 0; diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index 4b628139a9cb..efe058f8f746 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -238,6 +238,7 @@ enum regulator_type { * @bypass_val_off: Disabling value for control when using regmap set_bypass * * @enable_time: Time taken for initial enable of regulator (in uS). + * @off_on_delay: guard time (in uS), before re-enabling a regulator */ struct regulator_desc { const char *name; @@ -276,6 +277,8 @@ struct regulator_desc { unsigned int bypass_val_off; unsigned int enable_time; + + unsigned int off_on_delay; }; /** @@ -348,6 +351,9 @@ struct regulator_dev { struct regulator_enable_gpio *ena_pin; unsigned int ena_gpio_state:1; + + /* time when this regulator was disabled last time */ + unsigned long last_off_jiffy; }; struct regulator_dev * -- cgit v1.2.3 From 16466f4284154311f163a58b77379eb186274f87 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 14 Aug 2014 16:52:51 -0700 Subject: net: phy: bcm7xxx: remove 28nm wildcard entry A wildcard entry with the 32-bits OUI 0x600d8400 was added as part of the BCM7xxx internal PHY driver, but that entry might match other PHYs that are not covered by this driver, so let's just remove it. Fixes: b560a58c45c6 ("net: phy: add Broadcom BCM7xxx internal PHY driver") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/bcm7xxx.c | 14 -------------- include/linux/brcmphy.h | 1 - 2 files changed, 15 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c index 526b94cea569..2b40548c85d5 100644 --- a/drivers/net/phy/bcm7xxx.c +++ b/drivers/net/phy/bcm7xxx.c @@ -286,19 +286,6 @@ static struct phy_driver bcm7xxx_driver[] = { .suspend = bcm7xxx_suspend, .resume = bcm7xxx_28nm_config_init, .driver = { .owner = THIS_MODULE }, -}, { - .name = "Broadcom BCM7XXX 28nm", - .phy_id = PHY_ID_BCM7XXX_28, - .phy_id_mask = PHY_BCM_OUI_MASK, - .features = PHY_GBIT_FEATURES | - SUPPORTED_Pause | SUPPORTED_Asym_Pause, - .flags = PHY_IS_INTERNAL, - .config_init = bcm7xxx_28nm_config_init, - .config_aneg = genphy_config_aneg, - .read_status = genphy_read_status, - .suspend = bcm7xxx_suspend, - .resume = bcm7xxx_28nm_config_init, - .driver = { .owner = THIS_MODULE }, }, { .phy_id = PHY_BCM_OUI_4, .phy_id_mask = 0xffff0000, @@ -331,7 +318,6 @@ static struct mdio_device_id __maybe_unused bcm7xxx_tbl[] = { { PHY_ID_BCM7366, 0xfffffff0, }, { PHY_ID_BCM7439, 0xfffffff0, }, { PHY_ID_BCM7445, 0xfffffff0, }, - { PHY_ID_BCM7XXX_28, 0xfffffc00 }, { PHY_BCM_OUI_4, 0xffff0000 }, { PHY_BCM_OUI_5, 0xffffff00 }, { } diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 6f76277baf39..61219b9b3445 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -16,7 +16,6 @@ #define PHY_ID_BCM7366 0x600d8490 #define PHY_ID_BCM7439 0x600d8480 #define PHY_ID_BCM7445 0x600d8510 -#define PHY_ID_BCM7XXX_28 0x600d8400 #define PHY_BCM_OUI_MASK 0xfffffc00 #define PHY_BCM_OUI_1 0x00206000 -- cgit v1.2.3 From 983c684466e02b21f83c025ea539deee6c0aeac0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 3 Aug 2014 13:03:10 -0400 Subject: SUNRPC: get rid of the request wait queue We're always _only_ waking up tasks from within the sp_threads list, so we know that they are enqueued and alive. The rq_wait waitqueue is just a distraction with extra atomic semantics. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc.h | 1 - net/sunrpc/svc.c | 2 -- net/sunrpc/svc_xprt.c | 32 +++++++++++++++++--------------- 3 files changed, 17 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index cf61ecd148e0..21678464883a 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -280,7 +280,6 @@ struct svc_rqst { bool rq_splice_ok; /* turned off in gss privacy * to prevent encrypting page * cache pages */ - wait_queue_head_t rq_wait; /* synchronization */ struct task_struct *rq_task; /* service thread */ }; diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 1db5007ddbce..ca8a7958f4e6 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -612,8 +612,6 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node) if (!rqstp) goto out_enomem; - init_waitqueue_head(&rqstp->rq_wait); - serv->sv_nrthreads++; spin_lock_bh(&pool->sp_lock); pool->sp_nrthreads++; diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 08e49d1e17b3..faaf2b46273b 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -348,8 +348,6 @@ static void svc_xprt_do_enqueue(struct svc_xprt *xprt) cpu = get_cpu(); pool = svc_pool_for_cpu(xprt->xpt_server, cpu); - put_cpu(); - spin_lock_bh(&pool->sp_lock); if (!list_empty(&pool->sp_threads) && @@ -382,10 +380,15 @@ static void svc_xprt_do_enqueue(struct svc_xprt *xprt) printk(KERN_ERR "svc_xprt_enqueue: server %p, rq_xprt=%p!\n", rqstp, rqstp->rq_xprt); - rqstp->rq_xprt = xprt; + /* Note the order of the following 3 lines: + * We want to assign xprt to rqstp->rq_xprt only _after_ + * we've woken up the process, so that we don't race with + * the lockless check in svc_get_next_xprt(). + */ svc_xprt_get(xprt); + wake_up_process(rqstp->rq_task); + rqstp->rq_xprt = xprt; pool->sp_stats.threads_woken++; - wake_up(&rqstp->rq_wait); } else { dprintk("svc: transport %p put into queue\n", xprt); list_add_tail(&xprt->xpt_ready, &pool->sp_sockets); @@ -394,6 +397,7 @@ static void svc_xprt_do_enqueue(struct svc_xprt *xprt) out_unlock: spin_unlock_bh(&pool->sp_lock); + put_cpu(); } /* @@ -509,7 +513,7 @@ void svc_wake_up(struct svc_serv *serv) svc_thread_dequeue(pool, rqstp); rqstp->rq_xprt = NULL; */ - wake_up(&rqstp->rq_wait); + wake_up_process(rqstp->rq_task); } else pool->sp_task_pending = 1; spin_unlock_bh(&pool->sp_lock); @@ -628,7 +632,6 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) { struct svc_xprt *xprt; struct svc_pool *pool = rqstp->rq_pool; - DECLARE_WAITQUEUE(wait, current); long time_left; /* Normally we will wait up to 5 seconds for any required @@ -654,15 +657,15 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) xprt = ERR_PTR(-EAGAIN); goto out; } - /* No data pending. Go to sleep */ - svc_thread_enqueue(pool, rqstp); - /* * We have to be able to interrupt this wait * to bring down the daemons ... */ set_current_state(TASK_INTERRUPTIBLE); + /* No data pending. Go to sleep */ + svc_thread_enqueue(pool, rqstp); + /* * checking kthread_should_stop() here allows us to avoid * locking and signalling when stopping kthreads that call @@ -676,14 +679,13 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) goto out; } - add_wait_queue(&rqstp->rq_wait, &wait); spin_unlock_bh(&pool->sp_lock); time_left = schedule_timeout(timeout); + __set_current_state(TASK_RUNNING); try_to_freeze(); - remove_wait_queue(&rqstp->rq_wait, &wait); xprt = rqstp->rq_xprt; if (xprt != NULL) return xprt; @@ -786,10 +788,10 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) printk(KERN_ERR "svc_recv: service %p, transport not NULL!\n", rqstp); - if (waitqueue_active(&rqstp->rq_wait)) - printk(KERN_ERR - "svc_recv: service %p, wait queue active!\n", - rqstp); + + /* Make sure the task pointer is set! */ + if (WARN_ON_ONCE(!rqstp->rq_task)) + rqstp->rq_task = current_task; err = svc_alloc_arg(rqstp); if (err) -- cgit v1.2.3 From 716845ebeb505353d900320b4a74e8330520410d Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Mon, 18 Aug 2014 10:34:08 +0800 Subject: regulator: core: Fix build error due to const qualifier for ops Drop const qualifier for ops of struct regulator_desc. Allow regulator drivers to update ops before registering regulator. Fix below build error: CC [M] drivers/regulator/mc13892-regulator.o drivers/regulator/mc13892-regulator.c: In function 'mc13892_regulator_probe': drivers/regulator/mc13892-regulator.c:586:3: error: assignment of member 'set_mode' in read-only object drivers/regulator/mc13892-regulator.c:588:3: error: assignment of member 'get_mode' in read-only object make[2]: *** [drivers/regulator/mc13892-regulator.o] Error 1 make[1]: *** [drivers/regulator] Error 2 make: *** [drivers] Error 2 Reported-by: Stephen Rothwell Signed-off-by: Axel Lin Signed-off-by: Mark Brown --- include/linux/regulator/driver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index efe058f8f746..3abda7554d82 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -246,7 +246,7 @@ struct regulator_desc { int id; bool continuous_voltage_range; unsigned n_voltages; - const struct regulator_ops *ops; + struct regulator_ops *ops; int irq; enum regulator_type type; struct module *owner; -- cgit v1.2.3 From e5f81539f657af7e9f54ea37986fde8f92acef22 Mon Sep 17 00:00:00 2001 From: Feng Kan Date: Wed, 30 Jul 2014 14:56:58 -0700 Subject: irqchip: gic: Replace hex numbers with defines. This is to cleanup some hex numbers used in the code and replace them with defines to make the code cleaner. Signed-off-by: Feng Kan Reviewed-by: Anup Patel Link: https://lkml.kernel.org/r/1406757419-18729-2-git-send-email-fkan@apm.com Signed-off-by: Jason Cooper --- drivers/irqchip/irq-gic-common.c | 15 +++++++++------ drivers/irqchip/irq-gic.c | 25 +++++++++++++------------ include/linux/irqchip/arm-gic.h | 15 +++++++++++++++ 3 files changed, 37 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/drivers/irqchip/irq-gic-common.c b/drivers/irqchip/irq-gic-common.c index 60ac704d2090..61541ff24397 100644 --- a/drivers/irqchip/irq-gic-common.c +++ b/drivers/irqchip/irq-gic-common.c @@ -74,20 +74,22 @@ void __init gic_dist_config(void __iomem *base, int gic_irqs, * Set all global interrupts to be level triggered, active low. */ for (i = 32; i < gic_irqs; i += 16) - writel_relaxed(0, base + GIC_DIST_CONFIG + i / 4); + writel_relaxed(GICD_INT_ACTLOW_LVLTRIG, + base + GIC_DIST_CONFIG + i / 4); /* * Set priority on all global interrupts. */ for (i = 32; i < gic_irqs; i += 4) - writel_relaxed(0xa0a0a0a0, base + GIC_DIST_PRI + i); + writel_relaxed(GICD_INT_DEF_PRI_X4, base + GIC_DIST_PRI + i); /* * Disable all interrupts. Leave the PPI and SGIs alone * as they are enabled by redistributor registers. */ for (i = 32; i < gic_irqs; i += 32) - writel_relaxed(0xffffffff, base + GIC_DIST_ENABLE_CLEAR + i / 8); + writel_relaxed(GICD_INT_EN_CLR_X32, + base + GIC_DIST_ENABLE_CLEAR + i / 8); if (sync_access) sync_access(); @@ -101,14 +103,15 @@ void gic_cpu_config(void __iomem *base, void (*sync_access)(void)) * Deal with the banked PPI and SGI interrupts - disable all * PPI interrupts, ensure all SGI interrupts are enabled. */ - writel_relaxed(0xffff0000, base + GIC_DIST_ENABLE_CLEAR); - writel_relaxed(0x0000ffff, base + GIC_DIST_ENABLE_SET); + writel_relaxed(GICD_INT_EN_CLR_PPI, base + GIC_DIST_ENABLE_CLEAR); + writel_relaxed(GICD_INT_EN_SET_SGI, base + GIC_DIST_ENABLE_SET); /* * Set priority on PPI and SGI interrupts */ for (i = 0; i < 32; i += 4) - writel_relaxed(0xa0a0a0a0, base + GIC_DIST_PRI + i * 4 / 4); + writel_relaxed(GICD_INT_DEF_PRI_X4, + base + GIC_DIST_PRI + i * 4 / 4); if (sync_access) sync_access(); diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index 4b959e606fe8..35847453cecb 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -298,8 +298,8 @@ static void gic_handle_cascade_irq(unsigned int irq, struct irq_desc *desc) status = readl_relaxed(gic_data_cpu_base(chip_data) + GIC_CPU_INTACK); raw_spin_unlock(&irq_controller_lock); - gic_irq = (status & 0x3ff); - if (gic_irq == 1023) + gic_irq = (status & GICC_IAR_INT_ID_MASK); + if (gic_irq == GICC_INT_SPURIOUS) goto out; cascade_irq = irq_find_mapping(chip_data->domain, gic_irq); @@ -360,7 +360,7 @@ static void __init gic_dist_init(struct gic_chip_data *gic) unsigned int gic_irqs = gic->gic_irqs; void __iomem *base = gic_data_dist_base(gic); - writel_relaxed(0, base + GIC_DIST_CTRL); + writel_relaxed(GICD_DISABLE, base + GIC_DIST_CTRL); /* * Set all global interrupts to this CPU only. @@ -373,7 +373,7 @@ static void __init gic_dist_init(struct gic_chip_data *gic) gic_dist_config(base, gic_irqs, NULL); - writel_relaxed(1, base + GIC_DIST_CTRL); + writel_relaxed(GICD_ENABLE, base + GIC_DIST_CTRL); } static void gic_cpu_init(struct gic_chip_data *gic) @@ -400,8 +400,8 @@ static void gic_cpu_init(struct gic_chip_data *gic) gic_cpu_config(dist_base, NULL); - writel_relaxed(0xf0, base + GIC_CPU_PRIMASK); - writel_relaxed(1, base + GIC_CPU_CTRL); + writel_relaxed(GICC_INT_PRI_THRESHOLD, base + GIC_CPU_PRIMASK); + writel_relaxed(GICC_ENABLE, base + GIC_CPU_CTRL); } void gic_cpu_if_down(void) @@ -467,14 +467,14 @@ static void gic_dist_restore(unsigned int gic_nr) if (!dist_base) return; - writel_relaxed(0, dist_base + GIC_DIST_CTRL); + writel_relaxed(GICD_DISABLE, dist_base + GIC_DIST_CTRL); for (i = 0; i < DIV_ROUND_UP(gic_irqs, 16); i++) writel_relaxed(gic_data[gic_nr].saved_spi_conf[i], dist_base + GIC_DIST_CONFIG + i * 4); for (i = 0; i < DIV_ROUND_UP(gic_irqs, 4); i++) - writel_relaxed(0xa0a0a0a0, + writel_relaxed(GICD_INT_DEF_PRI_X4, dist_base + GIC_DIST_PRI + i * 4); for (i = 0; i < DIV_ROUND_UP(gic_irqs, 4); i++) @@ -485,7 +485,7 @@ static void gic_dist_restore(unsigned int gic_nr) writel_relaxed(gic_data[gic_nr].saved_spi_enable[i], dist_base + GIC_DIST_ENABLE_SET + i * 4); - writel_relaxed(1, dist_base + GIC_DIST_CTRL); + writel_relaxed(GICD_ENABLE, dist_base + GIC_DIST_CTRL); } static void gic_cpu_save(unsigned int gic_nr) @@ -539,10 +539,11 @@ static void gic_cpu_restore(unsigned int gic_nr) writel_relaxed(ptr[i], dist_base + GIC_DIST_CONFIG + i * 4); for (i = 0; i < DIV_ROUND_UP(32, 4); i++) - writel_relaxed(0xa0a0a0a0, dist_base + GIC_DIST_PRI + i * 4); + writel_relaxed(GICD_INT_DEF_PRI_X4, + dist_base + GIC_DIST_PRI + i * 4); - writel_relaxed(0xf0, cpu_base + GIC_CPU_PRIMASK); - writel_relaxed(1, cpu_base + GIC_CPU_CTRL); + writel_relaxed(GICC_INT_PRI_THRESHOLD, cpu_base + GIC_CPU_PRIMASK); + writel_relaxed(GICC_ENABLE, cpu_base + GIC_CPU_CTRL); } static int gic_notifier(struct notifier_block *self, unsigned long cmd, void *v) diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h index 45e2d8c15bd2..5cb9d41af5be 100644 --- a/include/linux/irqchip/arm-gic.h +++ b/include/linux/irqchip/arm-gic.h @@ -21,7 +21,10 @@ #define GIC_CPU_ACTIVEPRIO 0xd0 #define GIC_CPU_IDENT 0xfc +#define GICC_ENABLE 0x1 +#define GICC_INT_PRI_THRESHOLD 0xf0 #define GICC_IAR_INT_ID_MASK 0x3ff +#define GICC_INT_SPURIOUS 1023 #define GIC_DIST_CTRL 0x000 #define GIC_DIST_CTR 0x004 @@ -39,6 +42,18 @@ #define GIC_DIST_SGI_PENDING_CLEAR 0xf10 #define GIC_DIST_SGI_PENDING_SET 0xf20 +#define GICD_ENABLE 0x1 +#define GICD_DISABLE 0x0 +#define GICD_INT_ACTLOW_LVLTRIG 0x0 +#define GICD_INT_EN_CLR_X32 0xffffffff +#define GICD_INT_EN_SET_SGI 0x0000ffff +#define GICD_INT_EN_CLR_PPI 0xffff0000 +#define GICD_INT_DEF_PRI 0xa0 +#define GICD_INT_DEF_PRI_X4 ((GICD_INT_DEF_PRI << 24) |\ + (GICD_INT_DEF_PRI << 16) |\ + (GICD_INT_DEF_PRI << 8) |\ + GICD_INT_DEF_PRI) + #define GICH_HCR 0x0 #define GICH_VTR 0x4 #define GICH_VMCR 0x8 -- cgit v1.2.3 From 3228950621d92f0f212378f95a6998ef3a1be0bb Mon Sep 17 00:00:00 2001 From: Feng Kan Date: Wed, 30 Jul 2014 14:56:59 -0700 Subject: irqchip: gic: Preserve gic V2 bypass bits in cpu ctrl register This change is made to preserve the GIC v2 bypass bits in the GIC_CPU_CTRL register (also known as the GICC_CTLR register in spec). This code will preserve all bits configured by the bootloader regarding v2 bypass group bits. In the X-Gene platform, the bypass functionality is not used and bypass bits should not be changed by the kernel gic code as it could lead to incorrect behavior. Signed-off-by: Feng Kan Reviewed-by: Vinayak Kale Reviewed-by: Anup Patel Link: https://lkml.kernel.org/r/1406757419-18729-3-git-send-email-fkan@apm.com Signed-off-by: Jason Cooper --- drivers/irqchip/irq-gic.c | 25 ++++++++++++++++++++++--- include/linux/irqchip/arm-gic.h | 1 + 2 files changed, 23 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index 35847453cecb..2500f6ba29e1 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -353,6 +353,21 @@ static u8 gic_get_cpumask(struct gic_chip_data *gic) return mask; } +static void gic_cpu_if_up(void) +{ + void __iomem *cpu_base = gic_data_cpu_base(&gic_data[0]); + u32 bypass = 0; + + /* + * Preserve bypass disable bits to be written back later + */ + bypass = readl(cpu_base + GIC_CPU_CTRL); + bypass &= GICC_DIS_BYPASS_MASK; + + writel_relaxed(bypass | GICC_ENABLE, cpu_base + GIC_CPU_CTRL); +} + + static void __init gic_dist_init(struct gic_chip_data *gic) { unsigned int i; @@ -401,13 +416,17 @@ static void gic_cpu_init(struct gic_chip_data *gic) gic_cpu_config(dist_base, NULL); writel_relaxed(GICC_INT_PRI_THRESHOLD, base + GIC_CPU_PRIMASK); - writel_relaxed(GICC_ENABLE, base + GIC_CPU_CTRL); + gic_cpu_if_up(); } void gic_cpu_if_down(void) { void __iomem *cpu_base = gic_data_cpu_base(&gic_data[0]); - writel_relaxed(0, cpu_base + GIC_CPU_CTRL); + u32 val = 0; + + val = readl(cpu_base + GIC_CPU_CTRL); + val &= ~GICC_ENABLE; + writel_relaxed(val, cpu_base + GIC_CPU_CTRL); } #ifdef CONFIG_CPU_PM @@ -543,7 +562,7 @@ static void gic_cpu_restore(unsigned int gic_nr) dist_base + GIC_DIST_PRI + i * 4); writel_relaxed(GICC_INT_PRI_THRESHOLD, cpu_base + GIC_CPU_PRIMASK); - writel_relaxed(GICC_ENABLE, cpu_base + GIC_CPU_CTRL); + gic_cpu_if_up(); } static int gic_notifier(struct notifier_block *self, unsigned long cmd, void *v) diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h index 5cb9d41af5be..13eed92c7d24 100644 --- a/include/linux/irqchip/arm-gic.h +++ b/include/linux/irqchip/arm-gic.h @@ -25,6 +25,7 @@ #define GICC_INT_PRI_THRESHOLD 0xf0 #define GICC_IAR_INT_ID_MASK 0x3ff #define GICC_INT_SPURIOUS 1023 +#define GICC_DIS_BYPASS_MASK 0x1e0 #define GIC_DIST_CTRL 0x000 #define GIC_DIST_CTR 0x004 -- cgit v1.2.3 From 366047515c6eab2ff886bc28d1c2b0ad041d040a Mon Sep 17 00:00:00 2001 From: Lan Tianyu Date: Fri, 15 Aug 2014 13:38:59 +0800 Subject: i2c: rework kernel config I2C_ACPI Commit da3c6647(I2C/ACPI: Clean up I2C ACPI code and Add CONFIG_I2C_ACPI config) adds a new kernel config I2C_ACPI and make I2C core built in when the config is selected. This is wrong because distributions etc generally compile I2C as a module and the commit broken that. This patch is to rename I2C_ACPI to ACPI_I2C_OPREGION. New config only controls ACPI I2C operation region code and depends on I2C=y. Signed-off-by: Lan Tianyu Reviewed-by: Mika Westerberg [wsa: removed unrelated change for Kconfig] Signed-off-by: Wolfram Sang --- drivers/i2c/Kconfig | 15 ++++++--------- drivers/i2c/Makefile | 2 +- drivers/i2c/i2c-acpi.c | 2 ++ include/linux/i2c.h | 12 ++++++++---- 4 files changed, 17 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/drivers/i2c/Kconfig b/drivers/i2c/Kconfig index 3e3b680dc007..b51a402752c4 100644 --- a/drivers/i2c/Kconfig +++ b/drivers/i2c/Kconfig @@ -23,17 +23,14 @@ config I2C This I2C support can also be built as a module. If so, the module will be called i2c-core. -config I2C_ACPI - bool "I2C ACPI support" - select I2C - depends on ACPI +config ACPI_I2C_OPREGION + bool "ACPI I2C Operation region support" + depends on I2C=y && ACPI default y help - Say Y here if you want to enable ACPI I2C support. This includes support - for automatic enumeration of I2C slave devices and support for ACPI I2C - Operation Regions. Operation Regions allow firmware (BIOS) code to - access I2C slave devices, such as smart batteries through an I2C host - controller driver. + Say Y here if you want to enable ACPI I2C operation region support. + Operation Regions allow firmware (BIOS) code to access I2C slave devices, + such as smart batteries through an I2C host controller driver. if I2C diff --git a/drivers/i2c/Makefile b/drivers/i2c/Makefile index a1f590cbb435..e0228b228256 100644 --- a/drivers/i2c/Makefile +++ b/drivers/i2c/Makefile @@ -3,7 +3,7 @@ # i2ccore-y := i2c-core.o -i2ccore-$(CONFIG_I2C_ACPI) += i2c-acpi.o +i2ccore-$(CONFIG_ACPI) += i2c-acpi.o obj-$(CONFIG_I2C_BOARDINFO) += i2c-boardinfo.o obj-$(CONFIG_I2C) += i2ccore.o diff --git a/drivers/i2c/i2c-acpi.c b/drivers/i2c/i2c-acpi.c index e8b61967334b..0dbc18c15c43 100644 --- a/drivers/i2c/i2c-acpi.c +++ b/drivers/i2c/i2c-acpi.c @@ -126,6 +126,7 @@ void acpi_i2c_register_devices(struct i2c_adapter *adap) dev_warn(&adap->dev, "failed to enumerate I2C slaves\n"); } +#ifdef CONFIG_ACPI_I2C_OPREGION static int acpi_gsb_i2c_read_bytes(struct i2c_client *client, u8 cmd, u8 *data, u8 data_len) { @@ -360,3 +361,4 @@ void acpi_i2c_remove_space_handler(struct i2c_adapter *adapter) acpi_bus_detach_private_data(handle); } +#endif diff --git a/include/linux/i2c.h b/include/linux/i2c.h index ea507665896c..a95efeb53a8b 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -577,16 +577,20 @@ static inline struct i2c_adapter *of_find_i2c_adapter_by_node(struct device_node } #endif /* CONFIG_OF */ -#ifdef CONFIG_I2C_ACPI -int acpi_i2c_install_space_handler(struct i2c_adapter *adapter); -void acpi_i2c_remove_space_handler(struct i2c_adapter *adapter); +#ifdef CONFIG_ACPI void acpi_i2c_register_devices(struct i2c_adapter *adap); #else static inline void acpi_i2c_register_devices(struct i2c_adapter *adap) { } +#endif /* CONFIG_ACPI */ + +#ifdef CONFIG_ACPI_I2C_OPREGION +int acpi_i2c_install_space_handler(struct i2c_adapter *adapter); +void acpi_i2c_remove_space_handler(struct i2c_adapter *adapter); +#else static inline void acpi_i2c_remove_space_handler(struct i2c_adapter *adapter) { } static inline int acpi_i2c_install_space_handler(struct i2c_adapter *adapter) { return 0; } -#endif +#endif /* CONFIG_ACPI_I2C_OPREGION */ #endif /* _LINUX_I2C_H */ -- cgit v1.2.3 From 7b7d8982f0169d5ac67c6c2877449fb7f6968cac Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 27 Jul 2014 14:31:53 -0700 Subject: mtd: fix linux/mtd/nand.h kernel-doc warning Fix kernel-doc warning in : Warning(..//include/linux/mtd/nand.h:795): No description found for parameter 'ecc' Signed-off-by: Randy Dunlap Cc: David Woodhouse Cc: Brian Norris Cc: linux-mtd@lists.infradead.org Signed-off-by: Brian Norris --- include/linux/mtd/nand.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 3083c53e0270..b7c11991cb09 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -766,6 +766,7 @@ struct nand_chip { * @options: stores various chip bit options * @id_len: The valid length of the @id. * @oobsize: OOB size + * @ecc: ECC correctability and step information from the datasheet. * @ecc.strength_ds: The ECC correctability from the datasheet, same as the * @ecc_strength_ds in nand_chip{}. * @ecc.step_ds: The ECC step required by the @ecc.strength_ds, same as the -- cgit v1.2.3 From 31f754628cbb12c983600f22d9f0fed50dfe2134 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Mon, 21 Jul 2014 19:07:22 -0700 Subject: mtd: use __packed shorthand Signed-off-by: Brian Norris --- drivers/mtd/mtdswap.c | 2 +- drivers/mtd/nand/sm_common.h | 2 +- include/linux/mtd/cfi.h | 22 +++++++++++----------- 3 files changed, 13 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/mtdswap.c b/drivers/mtd/mtdswap.c index 48cf6f98df44..fc8b3d16cce7 100644 --- a/drivers/mtd/mtdswap.c +++ b/drivers/mtd/mtdswap.c @@ -145,7 +145,7 @@ struct mtdswap_dev { struct mtdswap_oobdata { __le16 magic; __le32 count; -} __attribute__((packed)); +} __packed; #define MTDSWAP_MAGIC_CLEAN 0x2095 #define MTDSWAP_MAGIC_DIRTY (MTDSWAP_MAGIC_CLEAN + 1) diff --git a/drivers/mtd/nand/sm_common.h b/drivers/mtd/nand/sm_common.h index 00f4a83359b2..d3e028e58b0f 100644 --- a/drivers/mtd/nand/sm_common.h +++ b/drivers/mtd/nand/sm_common.h @@ -18,7 +18,7 @@ struct sm_oob { uint8_t ecc2[3]; uint8_t lba_copy2[2]; uint8_t ecc1[3]; -} __attribute__((packed)); +} __packed; /* one sector is always 512 bytes, but it can consist of two nand pages */ diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h index 37ef6b194089..299d7d31fe53 100644 --- a/include/linux/mtd/cfi.h +++ b/include/linux/mtd/cfi.h @@ -153,7 +153,7 @@ struct cfi_ident { uint16_t MaxBufWriteSize; uint8_t NumEraseRegions; uint32_t EraseRegionInfo[0]; /* Not host ordered */ -} __attribute__((packed)); +} __packed; /* Extended Query Structure for both PRI and ALT */ @@ -161,7 +161,7 @@ struct cfi_extquery { uint8_t pri[3]; uint8_t MajorVersion; uint8_t MinorVersion; -} __attribute__((packed)); +} __packed; /* Vendor-Specific PRI for Intel/Sharp Extended Command Set (0x0001) */ @@ -180,7 +180,7 @@ struct cfi_pri_intelext { uint8_t FactProtRegSize; uint8_t UserProtRegSize; uint8_t extra[0]; -} __attribute__((packed)); +} __packed; struct cfi_intelext_otpinfo { uint32_t ProtRegAddr; @@ -188,7 +188,7 @@ struct cfi_intelext_otpinfo { uint8_t FactProtRegSize; uint16_t UserGroups; uint8_t UserProtRegSize; -} __attribute__((packed)); +} __packed; struct cfi_intelext_blockinfo { uint16_t NumIdentBlocks; @@ -196,7 +196,7 @@ struct cfi_intelext_blockinfo { uint16_t MinBlockEraseCycles; uint8_t BitsPerCell; uint8_t BlockCap; -} __attribute__((packed)); +} __packed; struct cfi_intelext_regioninfo { uint16_t NumIdentPartitions; @@ -205,7 +205,7 @@ struct cfi_intelext_regioninfo { uint8_t NumOpAllowedSimEraMode; uint8_t NumBlockTypes; struct cfi_intelext_blockinfo BlockTypes[1]; -} __attribute__((packed)); +} __packed; struct cfi_intelext_programming_regioninfo { uint8_t ProgRegShift; @@ -214,7 +214,7 @@ struct cfi_intelext_programming_regioninfo { uint8_t Reserved2; uint8_t ControlInvalid; uint8_t Reserved3; -} __attribute__((packed)); +} __packed; /* Vendor-Specific PRI for AMD/Fujitsu Extended Command Set (0x0002) */ @@ -233,7 +233,7 @@ struct cfi_pri_amdstd { uint8_t VppMin; uint8_t VppMax; uint8_t TopBottom; -} __attribute__((packed)); +} __packed; /* Vendor-Specific PRI for Atmel chips (command set 0x0002) */ @@ -245,18 +245,18 @@ struct cfi_pri_atmel { uint8_t BottomBoot; uint8_t BurstMode; uint8_t PageMode; -} __attribute__((packed)); +} __packed; struct cfi_pri_query { uint8_t NumFields; uint32_t ProtField[1]; /* Not host ordered */ -} __attribute__((packed)); +} __packed; struct cfi_bri_query { uint8_t PageModeReadCap; uint8_t NumFields; uint32_t ConfField[1]; /* Not host ordered */ -} __attribute__((packed)); +} __packed; #define P_ID_NONE 0x0000 #define P_ID_INTEL_EXT 0x0001 -- cgit v1.2.3 From b25046b1e5e3f1423434da77ccc859f2f779d1ce Mon Sep 17 00:00:00 2001 From: Boris BREZILLON Date: Sun, 17 Aug 2014 10:29:42 +0200 Subject: mtd: nand: fix DocBook warnings on nand_sdr_timings doc Change the comment type (from /** to /*) to prevent DocBook from complaining about missing description for nand_sdr_timings fields. There is currently no need in documenting those fields because they are fully described in the ONFI specification (which is pointed out in the comment). Signed-off-by: Boris BREZILLON Reported-by: Randy Dunlap Acked-by: Randy Dunlap Tested-by: Randy Dunlap Signed-off-by: Brian Norris --- include/linux/mtd/nand.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 3083c53e0270..c300db3ae285 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -949,7 +949,7 @@ static inline int jedec_feature(struct nand_chip *chip) : 0; } -/** +/* * struct nand_sdr_timings - SDR NAND chip timings * * This struct defines the timing requirements of a SDR NAND chip. -- cgit v1.2.3 From 7132fe4f568721cbd5d9bce5a8a71556e9bc45b4 Mon Sep 17 00:00:00 2001 From: Dan Murphy Date: Sun, 17 Aug 2014 09:24:26 -0700 Subject: Input: drv260x - add TI drv260x haptics driver Add the TI drv260x haptics/vibrator driver. This device uses the input force feedback to produce a wave form to driver an ERM or LRA actuator device. The initial driver supports the devices real time playback mode. But the device has additional wave patterns in ROM. This functionality will be added in future patchsets. Product data sheet is located here: http://www.ti.com/product/drv2605 Signed-off-by: Dan Murphy Signed-off-by: Dmitry Torokhov --- .../devicetree/bindings/input/ti,drv260x.txt | 51 ++ drivers/input/misc/Kconfig | 11 + drivers/input/misc/Makefile | 1 + drivers/input/misc/drv260x.c | 738 +++++++++++++++++++++ include/dt-bindings/input/ti-drv260x.h | 36 + include/linux/platform_data/drv260x-pdata.h | 28 + 6 files changed, 865 insertions(+) create mode 100644 Documentation/devicetree/bindings/input/ti,drv260x.txt create mode 100644 drivers/input/misc/drv260x.c create mode 100644 include/dt-bindings/input/ti-drv260x.h create mode 100644 include/linux/platform_data/drv260x-pdata.h (limited to 'include/linux') diff --git a/Documentation/devicetree/bindings/input/ti,drv260x.txt b/Documentation/devicetree/bindings/input/ti,drv260x.txt new file mode 100644 index 000000000000..a9c8519eb9de --- /dev/null +++ b/Documentation/devicetree/bindings/input/ti,drv260x.txt @@ -0,0 +1,51 @@ +Texas Instruments - drv260x Haptics driver family + +The drv260x family serial control bus communicates through I2C protocols + +Required properties: + - compatible - One of: + "ti,drv2604" - DRV2604 + "ti,drv2605" - DRV2605 + "ti,drv2605l" - DRV2605L + - reg - I2C slave address + - vbat-supply - Required supply regulator + - mode - Power up mode of the chip (defined in include/dt-bindings/input/ti-drv260x.h) + DRV260X_LRA_MODE - Linear Resonance Actuator mode (Piezoelectric) + DRV260X_LRA_NO_CAL_MODE - This is a LRA Mode but there is no calibration + sequence during init. And the device is configured for real + time playback mode (RTP mode). + DRV260X_ERM_MODE - Eccentric Rotating Mass mode (Rotary vibrator) + - library-sel - These are ROM based waveforms pre-programmed into the IC. + This should be set to set the library to use at power up. + (defined in include/dt-bindings/input/ti-drv260x.h) + DRV260X_LIB_EMPTY - Do not use a pre-programmed library + DRV260X_ERM_LIB_A - Pre-programmed Library + DRV260X_ERM_LIB_B - Pre-programmed Library + DRV260X_ERM_LIB_C - Pre-programmed Library + DRV260X_ERM_LIB_D - Pre-programmed Library + DRV260X_ERM_LIB_E - Pre-programmed Library + DRV260X_ERM_LIB_F - Pre-programmed Library + DRV260X_LIB_LRA - Pre-programmed LRA Library + +Optional properties: + - enable-gpio - gpio pin to enable/disable the device. + - vib-rated-mv - The rated voltage of the actuator in millivolts. + If this is not set then the value will be defaulted to + 3.2 v. + - vib-overdrive-mv - The overdrive voltage of the actuator in millivolts. + If this is not set then the value will be defaulted to + 3.2 v. +Example: + +drv2605l: drv2605l@5a { + compatible = "ti,drv2605l"; + reg = <0x5a>; + enable-gpio = <&gpio1 28 GPIO_ACTIVE_HIGH>; + mode = ; + library-sel = ; + vib-rated-mv = <3200>; + vib-overdriver-mv = <3200>; +}; + +For more product information please see the link below: +http://www.ti.com/product/drv2605 diff --git a/drivers/input/misc/Kconfig b/drivers/input/misc/Kconfig index 2ff4425a893b..41d0ae62ba05 100644 --- a/drivers/input/misc/Kconfig +++ b/drivers/input/misc/Kconfig @@ -676,4 +676,15 @@ config INPUT_SOC_BUTTON_ARRAY To compile this driver as a module, choose M here: the module will be called soc_button_array. +config INPUT_DRV260X_HAPTICS + tristate "TI DRV260X haptics support" + depends on INPUT && I2C && GPIOLIB + select INPUT_FF_MEMLESS + select REGMAP_I2C + help + Say Y to enable support for the TI DRV260X haptics driver. + + To compile this driver as a module, choose M here: the + module will be called drv260x-haptics. + endif diff --git a/drivers/input/misc/Makefile b/drivers/input/misc/Makefile index 4955ad322a01..cd4bc2d3474f 100644 --- a/drivers/input/misc/Makefile +++ b/drivers/input/misc/Makefile @@ -26,6 +26,7 @@ obj-$(CONFIG_INPUT_COBALT_BTNS) += cobalt_btns.o obj-$(CONFIG_INPUT_DA9052_ONKEY) += da9052_onkey.o obj-$(CONFIG_INPUT_DA9055_ONKEY) += da9055_onkey.o obj-$(CONFIG_INPUT_DM355EVM) += dm355evm_keys.o +obj-$(CONFIG_INPUT_DRV260X_HAPTICS) += drv260x.o obj-$(CONFIG_INPUT_GP2A) += gp2ap002a00f.o obj-$(CONFIG_INPUT_GPIO_BEEPER) += gpio-beeper.o obj-$(CONFIG_INPUT_GPIO_TILT_POLLED) += gpio_tilt_polled.o diff --git a/drivers/input/misc/drv260x.c b/drivers/input/misc/drv260x.c new file mode 100644 index 000000000000..e90e3b8189f7 --- /dev/null +++ b/drivers/input/misc/drv260x.c @@ -0,0 +1,738 @@ +/* + * DRV260X haptics driver family + * + * Author: Dan Murphy + * + * Copyright: (C) 2014 Texas Instruments, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define DRV260X_STATUS 0x0 +#define DRV260X_MODE 0x1 +#define DRV260X_RT_PB_IN 0x2 +#define DRV260X_LIB_SEL 0x3 +#define DRV260X_WV_SEQ_1 0x4 +#define DRV260X_WV_SEQ_2 0x5 +#define DRV260X_WV_SEQ_3 0x6 +#define DRV260X_WV_SEQ_4 0x7 +#define DRV260X_WV_SEQ_5 0x8 +#define DRV260X_WV_SEQ_6 0x9 +#define DRV260X_WV_SEQ_7 0xa +#define DRV260X_WV_SEQ_8 0xb +#define DRV260X_GO 0xc +#define DRV260X_OVERDRIVE_OFF 0xd +#define DRV260X_SUSTAIN_P_OFF 0xe +#define DRV260X_SUSTAIN_N_OFF 0xf +#define DRV260X_BRAKE_OFF 0x10 +#define DRV260X_A_TO_V_CTRL 0x11 +#define DRV260X_A_TO_V_MIN_INPUT 0x12 +#define DRV260X_A_TO_V_MAX_INPUT 0x13 +#define DRV260X_A_TO_V_MIN_OUT 0x14 +#define DRV260X_A_TO_V_MAX_OUT 0x15 +#define DRV260X_RATED_VOLT 0x16 +#define DRV260X_OD_CLAMP_VOLT 0x17 +#define DRV260X_CAL_COMP 0x18 +#define DRV260X_CAL_BACK_EMF 0x19 +#define DRV260X_FEEDBACK_CTRL 0x1a +#define DRV260X_CTRL1 0x1b +#define DRV260X_CTRL2 0x1c +#define DRV260X_CTRL3 0x1d +#define DRV260X_CTRL4 0x1e +#define DRV260X_CTRL5 0x1f +#define DRV260X_LRA_LOOP_PERIOD 0x20 +#define DRV260X_VBAT_MON 0x21 +#define DRV260X_LRA_RES_PERIOD 0x22 +#define DRV260X_MAX_REG 0x23 + +#define DRV260X_ALLOWED_R_BYTES 25 +#define DRV260X_ALLOWED_W_BYTES 2 +#define DRV260X_MAX_RW_RETRIES 5 +#define DRV260X_I2C_RETRY_DELAY 10 + +#define DRV260X_GO_BIT 0x01 + +/* Library Selection */ +#define DRV260X_LIB_SEL_MASK 0x07 +#define DRV260X_LIB_SEL_RAM 0x0 +#define DRV260X_LIB_SEL_OD 0x1 +#define DRV260X_LIB_SEL_40_60 0x2 +#define DRV260X_LIB_SEL_60_80 0x3 +#define DRV260X_LIB_SEL_100_140 0x4 +#define DRV260X_LIB_SEL_140_PLUS 0x5 + +#define DRV260X_LIB_SEL_HIZ_MASK 0x10 +#define DRV260X_LIB_SEL_HIZ_EN 0x01 +#define DRV260X_LIB_SEL_HIZ_DIS 0 + +/* Mode register */ +#define DRV260X_STANDBY (1 << 6) +#define DRV260X_STANDBY_MASK 0x40 +#define DRV260X_INTERNAL_TRIGGER 0x00 +#define DRV260X_EXT_TRIGGER_EDGE 0x01 +#define DRV260X_EXT_TRIGGER_LEVEL 0x02 +#define DRV260X_PWM_ANALOG_IN 0x03 +#define DRV260X_AUDIOHAPTIC 0x04 +#define DRV260X_RT_PLAYBACK 0x05 +#define DRV260X_DIAGNOSTICS 0x06 +#define DRV260X_AUTO_CAL 0x07 + +/* Audio to Haptics Control */ +#define DRV260X_AUDIO_HAPTICS_PEAK_10MS (0 << 2) +#define DRV260X_AUDIO_HAPTICS_PEAK_20MS (1 << 2) +#define DRV260X_AUDIO_HAPTICS_PEAK_30MS (2 << 2) +#define DRV260X_AUDIO_HAPTICS_PEAK_40MS (3 << 2) + +#define DRV260X_AUDIO_HAPTICS_FILTER_100HZ 0x00 +#define DRV260X_AUDIO_HAPTICS_FILTER_125HZ 0x01 +#define DRV260X_AUDIO_HAPTICS_FILTER_150HZ 0x02 +#define DRV260X_AUDIO_HAPTICS_FILTER_200HZ 0x03 + +/* Min/Max Input/Output Voltages */ +#define DRV260X_AUDIO_HAPTICS_MIN_IN_VOLT 0x19 +#define DRV260X_AUDIO_HAPTICS_MAX_IN_VOLT 0x64 +#define DRV260X_AUDIO_HAPTICS_MIN_OUT_VOLT 0x19 +#define DRV260X_AUDIO_HAPTICS_MAX_OUT_VOLT 0xFF + +/* Feedback register */ +#define DRV260X_FB_REG_ERM_MODE 0x7f +#define DRV260X_FB_REG_LRA_MODE (1 << 7) + +#define DRV260X_BRAKE_FACTOR_MASK 0x1f +#define DRV260X_BRAKE_FACTOR_2X (1 << 0) +#define DRV260X_BRAKE_FACTOR_3X (2 << 4) +#define DRV260X_BRAKE_FACTOR_4X (3 << 4) +#define DRV260X_BRAKE_FACTOR_6X (4 << 4) +#define DRV260X_BRAKE_FACTOR_8X (5 << 4) +#define DRV260X_BRAKE_FACTOR_16 (6 << 4) +#define DRV260X_BRAKE_FACTOR_DIS (7 << 4) + +#define DRV260X_LOOP_GAIN_LOW 0xf3 +#define DRV260X_LOOP_GAIN_MED (1 << 2) +#define DRV260X_LOOP_GAIN_HIGH (2 << 2) +#define DRV260X_LOOP_GAIN_VERY_HIGH (3 << 2) + +#define DRV260X_BEMF_GAIN_0 0xfc +#define DRV260X_BEMF_GAIN_1 (1 << 0) +#define DRV260X_BEMF_GAIN_2 (2 << 0) +#define DRV260X_BEMF_GAIN_3 (3 << 0) + +/* Control 1 register */ +#define DRV260X_AC_CPLE_EN (1 << 5) +#define DRV260X_STARTUP_BOOST (1 << 7) + +/* Control 2 register */ + +#define DRV260X_IDISS_TIME_45 0 +#define DRV260X_IDISS_TIME_75 (1 << 0) +#define DRV260X_IDISS_TIME_150 (1 << 1) +#define DRV260X_IDISS_TIME_225 0x03 + +#define DRV260X_BLANK_TIME_45 (0 << 2) +#define DRV260X_BLANK_TIME_75 (1 << 2) +#define DRV260X_BLANK_TIME_150 (2 << 2) +#define DRV260X_BLANK_TIME_225 (3 << 2) + +#define DRV260X_SAMP_TIME_150 (0 << 4) +#define DRV260X_SAMP_TIME_200 (1 << 4) +#define DRV260X_SAMP_TIME_250 (2 << 4) +#define DRV260X_SAMP_TIME_300 (3 << 4) + +#define DRV260X_BRAKE_STABILIZER (1 << 6) +#define DRV260X_UNIDIR_IN (0 << 7) +#define DRV260X_BIDIR_IN (1 << 7) + +/* Control 3 Register */ +#define DRV260X_LRA_OPEN_LOOP (1 << 0) +#define DRV260X_ANANLOG_IN (1 << 1) +#define DRV260X_LRA_DRV_MODE (1 << 2) +#define DRV260X_RTP_UNSIGNED_DATA (1 << 3) +#define DRV260X_SUPPLY_COMP_DIS (1 << 4) +#define DRV260X_ERM_OPEN_LOOP (1 << 5) +#define DRV260X_NG_THRESH_0 (0 << 6) +#define DRV260X_NG_THRESH_2 (1 << 6) +#define DRV260X_NG_THRESH_4 (2 << 6) +#define DRV260X_NG_THRESH_8 (3 << 6) + +/* Control 4 Register */ +#define DRV260X_AUTOCAL_TIME_150MS (0 << 4) +#define DRV260X_AUTOCAL_TIME_250MS (1 << 4) +#define DRV260X_AUTOCAL_TIME_500MS (2 << 4) +#define DRV260X_AUTOCAL_TIME_1000MS (3 << 4) + +/** + * struct drv260x_data - + * @input_dev - Pointer to the input device + * @client - Pointer to the I2C client + * @regmap - Register map of the device + * @work - Work item used to off load the enable/disable of the vibration + * @enable_gpio - Pointer to the gpio used for enable/disabling + * @regulator - Pointer to the regulator for the IC + * @magnitude - Magnitude of the vibration event + * @mode - The operating mode of the IC (LRA_NO_CAL, ERM or LRA) + * @library - The vibration library to be used + * @rated_voltage - The rated_voltage of the actuator + * @overdriver_voltage - The over drive voltage of the actuator +**/ +struct drv260x_data { + struct input_dev *input_dev; + struct i2c_client *client; + struct regmap *regmap; + struct work_struct work; + struct gpio_desc *enable_gpio; + struct regulator *regulator; + u32 magnitude; + u32 mode; + u32 library; + int rated_voltage; + int overdrive_voltage; +}; + +static struct reg_default drv260x_reg_defs[] = { + { DRV260X_STATUS, 0xe0 }, + { DRV260X_MODE, 0x40 }, + { DRV260X_RT_PB_IN, 0x00 }, + { DRV260X_LIB_SEL, 0x00 }, + { DRV260X_WV_SEQ_1, 0x01 }, + { DRV260X_WV_SEQ_2, 0x00 }, + { DRV260X_WV_SEQ_3, 0x00 }, + { DRV260X_WV_SEQ_4, 0x00 }, + { DRV260X_WV_SEQ_5, 0x00 }, + { DRV260X_WV_SEQ_6, 0x00 }, + { DRV260X_WV_SEQ_7, 0x00 }, + { DRV260X_WV_SEQ_8, 0x00 }, + { DRV260X_GO, 0x00 }, + { DRV260X_OVERDRIVE_OFF, 0x00 }, + { DRV260X_SUSTAIN_P_OFF, 0x00 }, + { DRV260X_SUSTAIN_N_OFF, 0x00 }, + { DRV260X_BRAKE_OFF, 0x00 }, + { DRV260X_A_TO_V_CTRL, 0x05 }, + { DRV260X_A_TO_V_MIN_INPUT, 0x19 }, + { DRV260X_A_TO_V_MAX_INPUT, 0xff }, + { DRV260X_A_TO_V_MIN_OUT, 0x19 }, + { DRV260X_A_TO_V_MAX_OUT, 0xff }, + { DRV260X_RATED_VOLT, 0x3e }, + { DRV260X_OD_CLAMP_VOLT, 0x8c }, + { DRV260X_CAL_COMP, 0x0c }, + { DRV260X_CAL_BACK_EMF, 0x6c }, + { DRV260X_FEEDBACK_CTRL, 0x36 }, + { DRV260X_CTRL1, 0x93 }, + { DRV260X_CTRL2, 0xfa }, + { DRV260X_CTRL3, 0xa0 }, + { DRV260X_CTRL4, 0x20 }, + { DRV260X_CTRL5, 0x80 }, + { DRV260X_LRA_LOOP_PERIOD, 0x33 }, + { DRV260X_VBAT_MON, 0x00 }, + { DRV260X_LRA_RES_PERIOD, 0x00 }, +}; + +#define DRV260X_DEF_RATED_VOLT 0x90 +#define DRV260X_DEF_OD_CLAMP_VOLT 0x90 + +/** + * Rated and Overdriver Voltages: + * Calculated using the formula r = v * 255 / 5.6 + * where r is what will be written to the register + * and v is the rated or overdriver voltage of the actuator + **/ +static int drv260x_calculate_voltage(unsigned int voltage) +{ + return (voltage * 255 / 5600); +} + +static void drv260x_worker(struct work_struct *work) +{ + struct drv260x_data *haptics = container_of(work, struct drv260x_data, work); + int error; + + gpiod_set_value(haptics->enable_gpio, 1); + /* Data sheet says to wait 250us before trying to communicate */ + udelay(250); + + error = regmap_write(haptics->regmap, + DRV260X_MODE, DRV260X_RT_PLAYBACK); + if (error) { + dev_err(&haptics->client->dev, + "Failed to write set mode: %d\n", error); + } else { + error = regmap_write(haptics->regmap, + DRV260X_RT_PB_IN, haptics->magnitude); + if (error) + dev_err(&haptics->client->dev, + "Failed to set magnitude: %d\n", error); + } +} + +static int drv260x_haptics_play(struct input_dev *input, void *data, + struct ff_effect *effect) +{ + struct drv260x_data *haptics = input_get_drvdata(input); + + haptics->mode = DRV260X_LRA_NO_CAL_MODE; + + if (effect->u.rumble.strong_magnitude > 0) + haptics->magnitude = effect->u.rumble.strong_magnitude; + else if (effect->u.rumble.weak_magnitude > 0) + haptics->magnitude = effect->u.rumble.weak_magnitude; + else + haptics->magnitude = 0; + + schedule_work(&haptics->work); + + return 0; +} + +static void drv260x_close(struct input_dev *input) +{ + struct drv260x_data *haptics = input_get_drvdata(input); + int error; + + cancel_work_sync(&haptics->work); + + error = regmap_write(haptics->regmap, DRV260X_MODE, DRV260X_STANDBY); + if (error) + dev_err(&haptics->client->dev, + "Failed to enter standby mode: %d\n", error); + + gpiod_set_value(haptics->enable_gpio, 0); +} + +static const struct reg_default drv260x_lra_cal_regs[] = { + { DRV260X_MODE, DRV260X_AUTO_CAL }, + { DRV260X_CTRL3, DRV260X_NG_THRESH_2 }, + { DRV260X_FEEDBACK_CTRL, DRV260X_FB_REG_LRA_MODE | + DRV260X_BRAKE_FACTOR_4X | DRV260X_LOOP_GAIN_HIGH }, +}; + +static const struct reg_default drv260x_lra_init_regs[] = { + { DRV260X_MODE, DRV260X_RT_PLAYBACK }, + { DRV260X_A_TO_V_CTRL, DRV260X_AUDIO_HAPTICS_PEAK_20MS | + DRV260X_AUDIO_HAPTICS_FILTER_125HZ }, + { DRV260X_A_TO_V_MIN_INPUT, DRV260X_AUDIO_HAPTICS_MIN_IN_VOLT }, + { DRV260X_A_TO_V_MAX_INPUT, DRV260X_AUDIO_HAPTICS_MAX_IN_VOLT }, + { DRV260X_A_TO_V_MIN_OUT, DRV260X_AUDIO_HAPTICS_MIN_OUT_VOLT }, + { DRV260X_A_TO_V_MAX_OUT, DRV260X_AUDIO_HAPTICS_MAX_OUT_VOLT }, + { DRV260X_FEEDBACK_CTRL, DRV260X_FB_REG_LRA_MODE | + DRV260X_BRAKE_FACTOR_2X | DRV260X_LOOP_GAIN_MED | + DRV260X_BEMF_GAIN_3 }, + { DRV260X_CTRL1, DRV260X_STARTUP_BOOST }, + { DRV260X_CTRL2, DRV260X_SAMP_TIME_250 }, + { DRV260X_CTRL3, DRV260X_NG_THRESH_2 | DRV260X_ANANLOG_IN }, + { DRV260X_CTRL4, DRV260X_AUTOCAL_TIME_500MS }, +}; + +static const struct reg_default drv260x_erm_cal_regs[] = { + { DRV260X_MODE, DRV260X_AUTO_CAL }, + { DRV260X_A_TO_V_MIN_INPUT, DRV260X_AUDIO_HAPTICS_MIN_IN_VOLT }, + { DRV260X_A_TO_V_MAX_INPUT, DRV260X_AUDIO_HAPTICS_MAX_IN_VOLT }, + { DRV260X_A_TO_V_MIN_OUT, DRV260X_AUDIO_HAPTICS_MIN_OUT_VOLT }, + { DRV260X_A_TO_V_MAX_OUT, DRV260X_AUDIO_HAPTICS_MAX_OUT_VOLT }, + { DRV260X_FEEDBACK_CTRL, DRV260X_BRAKE_FACTOR_3X | + DRV260X_LOOP_GAIN_MED | DRV260X_BEMF_GAIN_2 }, + { DRV260X_CTRL1, DRV260X_STARTUP_BOOST }, + { DRV260X_CTRL2, DRV260X_SAMP_TIME_250 | DRV260X_BLANK_TIME_75 | + DRV260X_IDISS_TIME_75 }, + { DRV260X_CTRL3, DRV260X_NG_THRESH_2 | DRV260X_ERM_OPEN_LOOP }, + { DRV260X_CTRL4, DRV260X_AUTOCAL_TIME_500MS }, +}; + +static int drv260x_init(struct drv260x_data *haptics) +{ + int error; + unsigned int cal_buf; + + error = regmap_write(haptics->regmap, + DRV260X_RATED_VOLT, haptics->rated_voltage); + if (error) { + dev_err(&haptics->client->dev, + "Failed to write DRV260X_RATED_VOLT register: %d\n", + error); + return error; + } + + error = regmap_write(haptics->regmap, + DRV260X_OD_CLAMP_VOLT, haptics->overdrive_voltage); + if (error) { + dev_err(&haptics->client->dev, + "Failed to write DRV260X_OD_CLAMP_VOLT register: %d\n", + error); + return error; + } + + switch (haptics->mode) { + case DRV260X_LRA_MODE: + error = regmap_register_patch(haptics->regmap, + drv260x_lra_cal_regs, + ARRAY_SIZE(drv260x_lra_cal_regs)); + if (error) { + dev_err(&haptics->client->dev, + "Failed to write LRA calibration registers: %d\n", + error); + return error; + } + + break; + + case DRV260X_ERM_MODE: + error = regmap_register_patch(haptics->regmap, + drv260x_erm_cal_regs, + ARRAY_SIZE(drv260x_erm_cal_regs)); + if (error) { + dev_err(&haptics->client->dev, + "Failed to write ERM calibration registers: %d\n", + error); + return error; + } + + error = regmap_update_bits(haptics->regmap, DRV260X_LIB_SEL, + DRV260X_LIB_SEL_MASK, + haptics->library); + if (error) { + dev_err(&haptics->client->dev, + "Failed to write DRV260X_LIB_SEL register: %d\n", + error); + return error; + } + + break; + + default: + error = regmap_register_patch(haptics->regmap, + drv260x_lra_init_regs, + ARRAY_SIZE(drv260x_lra_init_regs)); + if (error) { + dev_err(&haptics->client->dev, + "Failed to write LRA init registers: %d\n", + error); + return error; + } + + error = regmap_update_bits(haptics->regmap, DRV260X_LIB_SEL, + DRV260X_LIB_SEL_MASK, + haptics->library); + if (error) { + dev_err(&haptics->client->dev, + "Failed to write DRV260X_LIB_SEL register: %d\n", + error); + return error; + } + + /* No need to set GO bit here */ + return 0; + } + + error = regmap_write(haptics->regmap, DRV260X_GO, DRV260X_GO_BIT); + if (error) { + dev_err(&haptics->client->dev, + "Failed to write GO register: %d\n", + error); + return error; + } + + do { + error = regmap_read(haptics->regmap, DRV260X_GO, &cal_buf); + if (error) { + dev_err(&haptics->client->dev, + "Failed to read GO register: %d\n", + error); + return error; + } + } while (cal_buf == DRV260X_GO_BIT); + + return 0; +} + +static const struct regmap_config drv260x_regmap_config = { + .reg_bits = 8, + .val_bits = 8, + + .max_register = DRV260X_MAX_REG, + .reg_defaults = drv260x_reg_defs, + .num_reg_defaults = ARRAY_SIZE(drv260x_reg_defs), + .cache_type = REGCACHE_NONE, +}; + +#ifdef CONFIG_OF +static int drv260x_parse_dt(struct device *dev, + struct drv260x_data *haptics) +{ + struct device_node *np = dev->of_node; + unsigned int voltage; + int error; + + error = of_property_read_u32(np, "mode", &haptics->mode); + if (error) { + dev_err(dev, "%s: No entry for mode\n", __func__); + return error; + } + + error = of_property_read_u32(np, "library-sel", &haptics->library); + if (error) { + dev_err(dev, "%s: No entry for library selection\n", + __func__); + return error; + } + + error = of_property_read_u32(np, "vib-rated-mv", &voltage); + if (!error) + haptics->rated_voltage = drv260x_calculate_voltage(voltage); + + + error = of_property_read_u32(np, "vib-overdrive-mv", &voltage); + if (!error) + haptics->overdrive_voltage = drv260x_calculate_voltage(voltage); + + return 0; +} +#else +static inline int drv260x_parse_dt(struct device *dev, + struct drv260x_data *haptics) +{ + dev_err(dev, "no platform data defined\n"); + + return -EINVAL; +} +#endif + +static int drv260x_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + const struct drv260x_platform_data *pdata = dev_get_platdata(&client->dev); + struct drv260x_data *haptics; + int error; + + haptics = devm_kzalloc(&client->dev, sizeof(*haptics), GFP_KERNEL); + if (!haptics) + return -ENOMEM; + + haptics->rated_voltage = DRV260X_DEF_OD_CLAMP_VOLT; + haptics->rated_voltage = DRV260X_DEF_RATED_VOLT; + + if (pdata) { + haptics->mode = pdata->mode; + haptics->library = pdata->library_selection; + if (pdata->vib_overdrive_voltage) + haptics->overdrive_voltage = drv260x_calculate_voltage(pdata->vib_overdrive_voltage); + if (pdata->vib_rated_voltage) + haptics->rated_voltage = drv260x_calculate_voltage(pdata->vib_rated_voltage); + } else if (client->dev.of_node) { + error = drv260x_parse_dt(&client->dev, haptics); + if (error) + return error; + } else { + dev_err(&client->dev, "Platform data not set\n"); + return -ENODEV; + } + + + if (haptics->mode < DRV260X_LRA_MODE || + haptics->mode > DRV260X_ERM_MODE) { + dev_err(&client->dev, + "Vibrator mode is invalid: %i\n", + haptics->mode); + return -EINVAL; + } + + if (haptics->library < DRV260X_LIB_EMPTY || + haptics->library > DRV260X_ERM_LIB_F) { + dev_err(&client->dev, + "Library value is invalid: %i\n", haptics->library); + return -EINVAL; + } + + if (haptics->mode == DRV260X_LRA_MODE && + haptics->library != DRV260X_LIB_EMPTY && + haptics->library != DRV260X_LIB_LRA) { + dev_err(&client->dev, + "LRA Mode with ERM Library mismatch\n"); + return -EINVAL; + } + + haptics->regulator = devm_regulator_get(&client->dev, "vbat"); + if (IS_ERR(haptics->regulator)) { + error = PTR_ERR(haptics->regulator); + dev_err(&client->dev, + "unable to get regulator, error: %d\n", error); + return error; + } + + haptics->enable_gpio = devm_gpiod_get(&client->dev, "enable"); + if (IS_ERR(haptics->enable_gpio)) { + error = PTR_ERR(haptics->enable_gpio); + if (error != -ENOENT && error != -ENOSYS) + return error; + haptics->enable_gpio = NULL; + } else { + gpiod_direction_output(haptics->enable_gpio, 1); + } + + haptics->input_dev = devm_input_allocate_device(&client->dev); + if (!haptics->input_dev) { + dev_err(&client->dev, "Failed to allocate input device\n"); + return -ENOMEM; + } + + haptics->input_dev->name = "drv260x:haptics"; + haptics->input_dev->dev.parent = client->dev.parent; + haptics->input_dev->close = drv260x_close; + input_set_drvdata(haptics->input_dev, haptics); + input_set_capability(haptics->input_dev, EV_FF, FF_RUMBLE); + + error = input_ff_create_memless(haptics->input_dev, NULL, + drv260x_haptics_play); + if (error) { + dev_err(&client->dev, "input_ff_create() failed: %d\n", + error); + return error; + } + + INIT_WORK(&haptics->work, drv260x_worker); + + haptics->client = client; + i2c_set_clientdata(client, haptics); + + haptics->regmap = devm_regmap_init_i2c(client, &drv260x_regmap_config); + if (IS_ERR(haptics->regmap)) { + error = PTR_ERR(haptics->regmap); + dev_err(&client->dev, "Failed to allocate register map: %d\n", + error); + return error; + } + + error = drv260x_init(haptics); + if (error) { + dev_err(&client->dev, "Device init failed: %d\n", error); + return error; + } + + error = input_register_device(haptics->input_dev); + if (error) { + dev_err(&client->dev, "couldn't register input device: %d\n", + error); + return error; + } + + return 0; +} + +#ifdef CONFIG_PM_SLEEP +static int drv260x_suspend(struct device *dev) +{ + struct drv260x_data *haptics = dev_get_drvdata(dev); + int ret = 0; + + mutex_lock(&haptics->input_dev->mutex); + + if (haptics->input_dev->users) { + ret = regmap_update_bits(haptics->regmap, + DRV260X_MODE, + DRV260X_STANDBY_MASK, + DRV260X_STANDBY); + if (ret) { + dev_err(dev, "Failed to set standby mode\n"); + goto out; + } + + gpiod_set_value(haptics->enable_gpio, 0); + + ret = regulator_disable(haptics->regulator); + if (ret) { + dev_err(dev, "Failed to disable regulator\n"); + regmap_update_bits(haptics->regmap, + DRV260X_MODE, + DRV260X_STANDBY_MASK, 0); + } + } +out: + mutex_unlock(&haptics->input_dev->mutex); + return ret; +} + +static int drv260x_resume(struct device *dev) +{ + struct drv260x_data *haptics = dev_get_drvdata(dev); + int ret = 0; + + mutex_lock(&haptics->input_dev->mutex); + + if (haptics->input_dev->users) { + ret = regulator_enable(haptics->regulator); + if (ret) { + dev_err(dev, "Failed to enable regulator\n"); + goto out; + } + + ret = regmap_update_bits(haptics->regmap, + DRV260X_MODE, + DRV260X_STANDBY_MASK, 0); + if (ret) { + dev_err(dev, "Failed to unset standby mode\n"); + regulator_disable(haptics->regulator); + goto out; + } + + gpiod_set_value(haptics->enable_gpio, 1); + } + +out: + mutex_unlock(&haptics->input_dev->mutex); + return ret; +} +#endif + +static SIMPLE_DEV_PM_OPS(drv260x_pm_ops, drv260x_suspend, drv260x_resume); + +static const struct i2c_device_id drv260x_id[] = { + { "drv2605l", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, drv260x_id); + +#ifdef CONFIG_OF +static const struct of_device_id drv260x_of_match[] = { + { .compatible = "ti,drv2604", }, + { .compatible = "ti,drv2604l", }, + { .compatible = "ti,drv2605", }, + { .compatible = "ti,drv2605l", }, + { } +}; +MODULE_DEVICE_TABLE(of, drv260x_of_match); +#endif + +static struct i2c_driver drv260x_driver = { + .probe = drv260x_probe, + .driver = { + .name = "drv260x-haptics", + .owner = THIS_MODULE, + .of_match_table = of_match_ptr(drv260x_of_match), + .pm = &drv260x_pm_ops, + }, + .id_table = drv260x_id, +}; +module_i2c_driver(drv260x_driver); + +MODULE_ALIAS("platform:drv260x-haptics"); +MODULE_DESCRIPTION("TI DRV260x haptics driver"); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Dan Murphy "); diff --git a/include/dt-bindings/input/ti-drv260x.h b/include/dt-bindings/input/ti-drv260x.h new file mode 100644 index 000000000000..2626e6d9f707 --- /dev/null +++ b/include/dt-bindings/input/ti-drv260x.h @@ -0,0 +1,36 @@ +/* + * DRV260X haptics driver family + * + * Author: Dan Murphy + * + * Copyright: (C) 2014 Texas Instruments, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#ifndef _DT_BINDINGS_TI_DRV260X_H +#define _DT_BINDINGS_TI_DRV260X_H + +/* Calibration Types */ +#define DRV260X_LRA_MODE 0x00 +#define DRV260X_LRA_NO_CAL_MODE 0x01 +#define DRV260X_ERM_MODE 0x02 + +/* Library Selection */ +#define DRV260X_LIB_EMPTY 0x00 +#define DRV260X_ERM_LIB_A 0x01 +#define DRV260X_ERM_LIB_B 0x02 +#define DRV260X_ERM_LIB_C 0x03 +#define DRV260X_ERM_LIB_D 0x04 +#define DRV260X_ERM_LIB_E 0x05 +#define DRV260X_LIB_LRA 0x06 +#define DRV260X_ERM_LIB_F 0x07 + +#endif diff --git a/include/linux/platform_data/drv260x-pdata.h b/include/linux/platform_data/drv260x-pdata.h new file mode 100644 index 000000000000..0a03b0944411 --- /dev/null +++ b/include/linux/platform_data/drv260x-pdata.h @@ -0,0 +1,28 @@ +/* + * Platform data for DRV260X haptics driver family + * + * Author: Dan Murphy + * + * Copyright: (C) 2014 Texas Instruments, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#ifndef _LINUX_DRV260X_PDATA_H +#define _LINUX_DRV260X_PDATA_H + +struct drv260x_platform_data { + u32 library_selection; + u32 mode; + u32 vib_rated_voltage; + u32 vib_overdrive_voltage; +}; + +#endif -- cgit v1.2.3 From 58b84f6a97f7f8811e0636836734809ff52cad43 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 19 Aug 2014 12:00:53 -0500 Subject: gpio: move GPIOD flags outside #ifdef The GPIOD flags are defined inside the #ifdef CONFIG_GPIOLIB switch, making the gpiolib stubs fail if these flags are used by a consumer. This is not correct: the stubs should compile fine without GPIOLIB. Reported-by: Ulf Hansson Reviewed-by: Alexandre Courbot Signed-off-by: Linus Walleij --- include/linux/gpio/consumer.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index b7ce0c64c6f3..c7e17de732f3 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -16,8 +16,6 @@ struct device; */ struct gpio_desc; -#ifdef CONFIG_GPIOLIB - #define GPIOD_FLAGS_BIT_DIR_SET BIT(0) #define GPIOD_FLAGS_BIT_DIR_OUT BIT(1) #define GPIOD_FLAGS_BIT_DIR_VAL BIT(2) @@ -34,6 +32,8 @@ enum gpiod_flags { GPIOD_FLAGS_BIT_DIR_VAL, }; +#ifdef CONFIG_GPIOLIB + /* Acquire and dispose GPIOs */ struct gpio_desc *__must_check __gpiod_get(struct device *dev, const char *con_id, -- cgit v1.2.3 From df11e506d330d9a0e5a701cd2c5fcb7d461b6060 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Thu, 21 Aug 2014 10:11:34 +0800 Subject: regulator: core: Add back the const qualifier for ops of struct regulator_desc Fix below build warning: CC [M] drivers/regulator/hi6421-regulator.o drivers/regulator/hi6421-regulator.c:356:2: warning: initialization discards 'const' qualifier from pointer target type [enabled by default] This is a revert of commit 716845ebeb50 ("regulator: core: Fix build error due to const qualifier for ops"). The build error was fixed by commit 39f5460d7f9c ("regulator: core: add const to regulator_ops and fix build error in mc13892"). Signed-off-by: Axel Lin Signed-off-by: Mark Brown --- include/linux/regulator/driver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index 3abda7554d82..efe058f8f746 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -246,7 +246,7 @@ struct regulator_desc { int id; bool continuous_voltage_range; unsigned n_voltages; - struct regulator_ops *ops; + const struct regulator_ops *ops; int irq; enum regulator_type type; struct module *owner; -- cgit v1.2.3 From e790d9ef6405633b007339d746b709aed43a928d Mon Sep 17 00:00:00 2001 From: Radim Krčmář Date: Thu, 21 Aug 2014 18:08:05 +0200 Subject: KVM: add kvm_arch_sched_in MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce preempt notifiers for architecture specific code. Advantage over creating a new notifier in every arch is slightly simpler code and guaranteed call order with respect to kvm_sched_in. Signed-off-by: Radim Krčmář Signed-off-by: Paolo Bonzini --- arch/arm/kvm/arm.c | 4 ++++ arch/mips/kvm/mips.c | 4 ++++ arch/powerpc/kvm/powerpc.c | 4 ++++ arch/s390/kvm/kvm-s390.c | 4 ++++ arch/x86/kvm/x86.c | 4 ++++ include/linux/kvm_host.h | 2 ++ virt/kvm/kvm_main.c | 2 ++ 7 files changed, 24 insertions(+) (limited to 'include/linux') diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index a99e0cdf8ba2..9f788ebac55b 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -288,6 +288,10 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) { } +void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) +{ +} + void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { vcpu->cpu = cpu; diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index cd7114147ae7..2362df2a79f9 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -1002,6 +1002,10 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) { } +void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) +{ +} + int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, struct kvm_translation *tr) { diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 4c79284b58be..cbc432f4f0a6 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -720,6 +720,10 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) kvmppc_subarch_vcpu_uninit(vcpu); } +void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) +{ +} + void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { #ifdef CONFIG_BOOKE diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index ce81eb2ab76a..a3c324ec4370 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -555,6 +555,10 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) /* Nothing todo */ } +void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) +{ +} + void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { save_fp_ctl(&vcpu->arch.host_fpregs.fpc); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index cd718c01cdf1..7d43dc7bb906 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7171,6 +7171,10 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) static_key_slow_dec(&kvm_no_apic_vcpu); } +void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) +{ +} + int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { if (type) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index a4c33b34fe3f..ebd723676633 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -624,6 +624,8 @@ void kvm_arch_exit(void); int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu); +void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu); + void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu); void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 39b16035386f..5a0817ee996e 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3124,6 +3124,8 @@ static void kvm_sched_in(struct preempt_notifier *pn, int cpu) if (vcpu->preempted) vcpu->preempted = false; + kvm_arch_sched_in(vcpu, cpu); + kvm_arch_vcpu_load(vcpu, cpu); } -- cgit v1.2.3 From c200b1aa6cb460ce8c3ecf6fdc690d3949c3cc5d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 20 Aug 2014 18:36:46 +0800 Subject: f2fs: fix incorrect calculation with total/free inode num Theoretically, our total inodes number is the same as total node number, but there are three node ids are reserved in f2fs, they are 0, 1 (node nid), and 2 (meta nid), and they should never be used by user, so our total/free inode number calculated in ->statfs is wrong. This patch indroduces F2FS_RESERVED_NODE_NUM and then fixes this issue by recalculating total/free inode number with the macro. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 2 +- fs/f2fs/super.c | 4 ++-- include/linux/f2fs_fs.h | 3 +++ 3 files changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index b4d964029fc7..044395c20ee9 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1957,7 +1957,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi) nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks; /* not used nids: 0, node, meta, (and root counted as valid node) */ - nm_i->available_nids = nm_i->max_nid - 3; + nm_i->available_nids = nm_i->max_nid - F2FS_RESERVED_NODE_NUM; nm_i->fcnt = 0; nm_i->nat_cnt = 0; nm_i->ram_thresh = DEF_RAM_THRESHOLD; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index ddb1e9d36363..0f61f5aa587c 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -508,8 +508,8 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_bfree = buf->f_blocks - valid_user_blocks(sbi) - ovp_count; buf->f_bavail = user_block_count - valid_user_blocks(sbi); - buf->f_files = sbi->total_node_count; - buf->f_ffree = sbi->total_node_count - valid_inode_count(sbi); + buf->f_files = sbi->total_node_count - F2FS_RESERVED_NODE_NUM; + buf->f_ffree = buf->f_files - valid_inode_count(sbi); buf->f_namelen = F2FS_NAME_LEN; buf->f_fsid.val[0] = (u32)id; diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 6ff0b0b42d47..0ed77f32c9ac 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -24,6 +24,9 @@ #define NULL_ADDR ((block_t)0) /* used as block_t addresses */ #define NEW_ADDR ((block_t)-1) /* used as block_t addresses */ +/* 0, 1(node nid), 2(meta nid) are reserved node id */ +#define F2FS_RESERVED_NODE_NUM 3 + #define F2FS_ROOT_INO(sbi) (sbi->root_ino_num) #define F2FS_NODE_INO(sbi) (sbi->node_ino_num) #define F2FS_META_INO(sbi) (sbi->meta_ino_num) -- cgit v1.2.3 From 8913dc0bb913ac3dc83ed5c10bac2f4e55431981 Mon Sep 17 00:00:00 2001 From: Paul Zimmerman Date: Thu, 21 Aug 2014 20:28:20 +0000 Subject: usb: gadget: document a usb_ep_dequeue() requirement Document the requirement that the request be dequeued and its completion routine called before usb_ep_dequeue() returns. Also fix some capitalization issues in the existing text. Signed-off-by: Paul Zimmerman Acked-by: Alan Stern Signed-off-by: Felipe Balbi --- include/linux/usb/gadget.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index c3a61853cd13..c540557b564b 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -345,12 +345,13 @@ static inline int usb_ep_queue(struct usb_ep *ep, * @ep:the endpoint associated with the request * @req:the request being canceled * - * if the request is still active on the endpoint, it is dequeued and its + * If the request is still active on the endpoint, it is dequeued and its * completion routine is called (with status -ECONNRESET); else a negative - * error code is returned. + * error code is returned. This is guaranteed to happen before the call to + * usb_ep_dequeue() returns. * - * note that some hardware can't clear out write fifos (to unlink the request - * at the head of the queue) except as part of disconnecting from usb. such + * Note that some hardware can't clear out write fifos (to unlink the request + * at the head of the queue) except as part of disconnecting from usb. Such * restrictions prevent drivers from supporting configuration changes, * even to configuration zero (a "chapter 9" requirement). */ -- cgit v1.2.3 From 33b7f99cf003ca6c1d31c42b50e1100ad71aaec0 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Fri, 15 Aug 2014 17:23:02 -0400 Subject: ftrace: Allow ftrace_ops to use the hashes from other ops Currently the top level debug file system function tracer shares its ftrace_ops with the function graph tracer. This was thought to be fine because the tracers are not used together, as one can only enable function or function_graph tracer in the current_tracer file. But that assumption proved to be incorrect. The function profiler can use the function graph tracer when function tracing is enabled. Since all function graph users uses the function tracing ftrace_ops this causes a conflict and when a user enables both function profiling as well as the function tracer it will crash ftrace and disable it. The quick solution so far is to move them as separate ftrace_ops like it was earlier. The problem though is to synchronize the functions that are traced because both function and function_graph tracer are limited by the selections made in the set_ftrace_filter and set_ftrace_notrace files. To handle this, a new structure is made called ftrace_ops_hash. This structure will now hold the filter_hash and notrace_hash, and the ftrace_ops will point to this structure. That will allow two ftrace_ops to share the same hashes. Since most ftrace_ops do not share the hashes, and to keep allocation simple, the ftrace_ops structure will include both a pointer to the ftrace_ops_hash called func_hash, as well as the structure itself, called local_hash. When the ops are registered, the func_hash pointer will be initialized to point to the local_hash within the ftrace_ops structure. Some of the ftrace internal ftrace_ops will be initialized statically. This will allow for the function and function_graph tracer to have separate ops but still share the same hash tables that determine what functions they trace. Cc: stable@vger.kernel.org # 3.16 (apply after 3.17-rc4 is out) Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 14 +++++-- kernel/trace/ftrace.c | 100 +++++++++++++++++++++++++------------------------ 2 files changed, 63 insertions(+), 51 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 6bb5e3f2a3b4..f0b0edbf55a9 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -102,6 +102,15 @@ enum { FTRACE_OPS_FL_DELETED = 1 << 8, }; +#ifdef CONFIG_DYNAMIC_FTRACE +/* The hash used to know what functions callbacks trace */ +struct ftrace_ops_hash { + struct ftrace_hash *notrace_hash; + struct ftrace_hash *filter_hash; + struct mutex regex_lock; +}; +#endif + /* * Note, ftrace_ops can be referenced outside of RCU protection. * (Although, for perf, the control ops prevent that). If ftrace_ops is @@ -121,10 +130,9 @@ struct ftrace_ops { int __percpu *disabled; #ifdef CONFIG_DYNAMIC_FTRACE int nr_trampolines; - struct ftrace_hash *notrace_hash; - struct ftrace_hash *filter_hash; + struct ftrace_ops_hash local_hash; + struct ftrace_ops_hash *func_hash; struct ftrace_hash *tramp_hash; - struct mutex regex_lock; unsigned long trampoline; #endif }; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 1654b12c891a..c92757adba79 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -65,15 +65,17 @@ #define FL_GLOBAL_CONTROL_MASK (FTRACE_OPS_FL_CONTROL) #ifdef CONFIG_DYNAMIC_FTRACE -#define INIT_REGEX_LOCK(opsname) \ - .regex_lock = __MUTEX_INITIALIZER(opsname.regex_lock), +#define INIT_OPS_HASH(opsname) \ + .func_hash = &opsname.local_hash, \ + .local_hash.regex_lock = __MUTEX_INITIALIZER(opsname.local_hash.regex_lock), #else -#define INIT_REGEX_LOCK(opsname) +#define INIT_OPS_HASH(opsname) #endif static struct ftrace_ops ftrace_list_end __read_mostly = { .func = ftrace_stub, .flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_STUB, + INIT_OPS_HASH(ftrace_list_end) }; /* ftrace_enabled is a method to turn ftrace on or off */ @@ -140,7 +142,8 @@ static inline void ftrace_ops_init(struct ftrace_ops *ops) { #ifdef CONFIG_DYNAMIC_FTRACE if (!(ops->flags & FTRACE_OPS_FL_INITIALIZED)) { - mutex_init(&ops->regex_lock); + mutex_init(&ops->local_hash.regex_lock); + ops->func_hash = &ops->local_hash; ops->flags |= FTRACE_OPS_FL_INITIALIZED; } #endif @@ -899,7 +902,7 @@ static void unregister_ftrace_profiler(void) static struct ftrace_ops ftrace_profile_ops __read_mostly = { .func = function_profile_call, .flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_INITIALIZED, - INIT_REGEX_LOCK(ftrace_profile_ops) + INIT_OPS_HASH(ftrace_profile_ops) }; static int register_ftrace_profiler(void) @@ -1081,11 +1084,12 @@ static const struct ftrace_hash empty_hash = { #define EMPTY_HASH ((struct ftrace_hash *)&empty_hash) static struct ftrace_ops global_ops = { - .func = ftrace_stub, - .notrace_hash = EMPTY_HASH, - .filter_hash = EMPTY_HASH, - .flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_INITIALIZED, - INIT_REGEX_LOCK(global_ops) + .func = ftrace_stub, + .local_hash.notrace_hash = EMPTY_HASH, + .local_hash.filter_hash = EMPTY_HASH, + INIT_OPS_HASH(global_ops) + .flags = FTRACE_OPS_FL_RECURSION_SAFE | + FTRACE_OPS_FL_INITIALIZED, }; struct ftrace_page { @@ -1226,8 +1230,8 @@ static void free_ftrace_hash_rcu(struct ftrace_hash *hash) void ftrace_free_filter(struct ftrace_ops *ops) { ftrace_ops_init(ops); - free_ftrace_hash(ops->filter_hash); - free_ftrace_hash(ops->notrace_hash); + free_ftrace_hash(ops->func_hash->filter_hash); + free_ftrace_hash(ops->func_hash->notrace_hash); } static struct ftrace_hash *alloc_ftrace_hash(int size_bits) @@ -1382,8 +1386,8 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs) return 0; #endif - filter_hash = rcu_dereference_raw_notrace(ops->filter_hash); - notrace_hash = rcu_dereference_raw_notrace(ops->notrace_hash); + filter_hash = rcu_dereference_raw_notrace(ops->func_hash->filter_hash); + notrace_hash = rcu_dereference_raw_notrace(ops->func_hash->notrace_hash); if ((ftrace_hash_empty(filter_hash) || ftrace_lookup_ip(filter_hash, ip)) && @@ -1554,14 +1558,14 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops, * gets inversed. */ if (filter_hash) { - hash = ops->filter_hash; - other_hash = ops->notrace_hash; + hash = ops->func_hash->filter_hash; + other_hash = ops->func_hash->notrace_hash; if (ftrace_hash_empty(hash)) all = 1; } else { inc = !inc; - hash = ops->notrace_hash; - other_hash = ops->filter_hash; + hash = ops->func_hash->notrace_hash; + other_hash = ops->func_hash->filter_hash; /* * If the notrace hash has no items, * then there's nothing to do. @@ -2436,8 +2440,8 @@ static inline int ops_traces_mod(struct ftrace_ops *ops) * Filter_hash being empty will default to trace module. * But notrace hash requires a test of individual module functions. */ - return ftrace_hash_empty(ops->filter_hash) && - ftrace_hash_empty(ops->notrace_hash); + return ftrace_hash_empty(ops->func_hash->filter_hash) && + ftrace_hash_empty(ops->func_hash->notrace_hash); } /* @@ -2459,12 +2463,12 @@ ops_references_rec(struct ftrace_ops *ops, struct dyn_ftrace *rec) return 0; /* The function must be in the filter */ - if (!ftrace_hash_empty(ops->filter_hash) && - !ftrace_lookup_ip(ops->filter_hash, rec->ip)) + if (!ftrace_hash_empty(ops->func_hash->filter_hash) && + !ftrace_lookup_ip(ops->func_hash->filter_hash, rec->ip)) return 0; /* If in notrace hash, we ignore it too */ - if (ftrace_lookup_ip(ops->notrace_hash, rec->ip)) + if (ftrace_lookup_ip(ops->func_hash->notrace_hash, rec->ip)) return 0; return 1; @@ -2785,10 +2789,10 @@ t_next(struct seq_file *m, void *v, loff_t *pos) } else { rec = &iter->pg->records[iter->idx++]; if (((iter->flags & FTRACE_ITER_FILTER) && - !(ftrace_lookup_ip(ops->filter_hash, rec->ip))) || + !(ftrace_lookup_ip(ops->func_hash->filter_hash, rec->ip))) || ((iter->flags & FTRACE_ITER_NOTRACE) && - !ftrace_lookup_ip(ops->notrace_hash, rec->ip)) || + !ftrace_lookup_ip(ops->func_hash->notrace_hash, rec->ip)) || ((iter->flags & FTRACE_ITER_ENABLED) && !(rec->flags & FTRACE_FL_ENABLED))) { @@ -2837,9 +2841,9 @@ static void *t_start(struct seq_file *m, loff_t *pos) * functions are enabled. */ if ((iter->flags & FTRACE_ITER_FILTER && - ftrace_hash_empty(ops->filter_hash)) || + ftrace_hash_empty(ops->func_hash->filter_hash)) || (iter->flags & FTRACE_ITER_NOTRACE && - ftrace_hash_empty(ops->notrace_hash))) { + ftrace_hash_empty(ops->func_hash->notrace_hash))) { if (*pos > 0) return t_hash_start(m, pos); iter->flags |= FTRACE_ITER_PRINTALL; @@ -3001,12 +3005,12 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag, iter->ops = ops; iter->flags = flag; - mutex_lock(&ops->regex_lock); + mutex_lock(&ops->func_hash->regex_lock); if (flag & FTRACE_ITER_NOTRACE) - hash = ops->notrace_hash; + hash = ops->func_hash->notrace_hash; else - hash = ops->filter_hash; + hash = ops->func_hash->filter_hash; if (file->f_mode & FMODE_WRITE) { const int size_bits = FTRACE_HASH_DEFAULT_BITS; @@ -3041,7 +3045,7 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag, file->private_data = iter; out_unlock: - mutex_unlock(&ops->regex_lock); + mutex_unlock(&ops->func_hash->regex_lock); return ret; } @@ -3279,7 +3283,7 @@ static struct ftrace_ops trace_probe_ops __read_mostly = { .func = function_trace_probe_call, .flags = FTRACE_OPS_FL_INITIALIZED, - INIT_REGEX_LOCK(trace_probe_ops) + INIT_OPS_HASH(trace_probe_ops) }; static int ftrace_probe_registered; @@ -3342,7 +3346,7 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, void *data) { struct ftrace_func_probe *entry; - struct ftrace_hash **orig_hash = &trace_probe_ops.filter_hash; + struct ftrace_hash **orig_hash = &trace_probe_ops.func_hash->filter_hash; struct ftrace_hash *hash; struct ftrace_page *pg; struct dyn_ftrace *rec; @@ -3359,7 +3363,7 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, if (WARN_ON(not)) return -EINVAL; - mutex_lock(&trace_probe_ops.regex_lock); + mutex_lock(&trace_probe_ops.func_hash->regex_lock); hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, *orig_hash); if (!hash) { @@ -3428,7 +3432,7 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, out_unlock: mutex_unlock(&ftrace_lock); out: - mutex_unlock(&trace_probe_ops.regex_lock); + mutex_unlock(&trace_probe_ops.func_hash->regex_lock); free_ftrace_hash(hash); return count; @@ -3446,7 +3450,7 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, struct ftrace_func_entry *rec_entry; struct ftrace_func_probe *entry; struct ftrace_func_probe *p; - struct ftrace_hash **orig_hash = &trace_probe_ops.filter_hash; + struct ftrace_hash **orig_hash = &trace_probe_ops.func_hash->filter_hash; struct list_head free_list; struct ftrace_hash *hash; struct hlist_node *tmp; @@ -3468,7 +3472,7 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, return; } - mutex_lock(&trace_probe_ops.regex_lock); + mutex_lock(&trace_probe_ops.func_hash->regex_lock); hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, *orig_hash); if (!hash) @@ -3521,7 +3525,7 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, mutex_unlock(&ftrace_lock); out_unlock: - mutex_unlock(&trace_probe_ops.regex_lock); + mutex_unlock(&trace_probe_ops.func_hash->regex_lock); free_ftrace_hash(hash); } @@ -3717,12 +3721,12 @@ ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len, if (unlikely(ftrace_disabled)) return -ENODEV; - mutex_lock(&ops->regex_lock); + mutex_lock(&ops->func_hash->regex_lock); if (enable) - orig_hash = &ops->filter_hash; + orig_hash = &ops->func_hash->filter_hash; else - orig_hash = &ops->notrace_hash; + orig_hash = &ops->func_hash->notrace_hash; if (reset) hash = alloc_ftrace_hash(FTRACE_HASH_DEFAULT_BITS); @@ -3752,7 +3756,7 @@ ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len, mutex_unlock(&ftrace_lock); out_regex_unlock: - mutex_unlock(&ops->regex_lock); + mutex_unlock(&ops->func_hash->regex_lock); free_ftrace_hash(hash); return ret; @@ -3975,15 +3979,15 @@ int ftrace_regex_release(struct inode *inode, struct file *file) trace_parser_put(parser); - mutex_lock(&iter->ops->regex_lock); + mutex_lock(&iter->ops->func_hash->regex_lock); if (file->f_mode & FMODE_WRITE) { filter_hash = !!(iter->flags & FTRACE_ITER_FILTER); if (filter_hash) - orig_hash = &iter->ops->filter_hash; + orig_hash = &iter->ops->func_hash->filter_hash; else - orig_hash = &iter->ops->notrace_hash; + orig_hash = &iter->ops->func_hash->notrace_hash; mutex_lock(&ftrace_lock); ret = ftrace_hash_move(iter->ops, filter_hash, @@ -3994,7 +3998,7 @@ int ftrace_regex_release(struct inode *inode, struct file *file) mutex_unlock(&ftrace_lock); } - mutex_unlock(&iter->ops->regex_lock); + mutex_unlock(&iter->ops->func_hash->regex_lock); free_ftrace_hash(iter->hash); kfree(iter); @@ -4611,7 +4615,7 @@ void __init ftrace_init(void) static struct ftrace_ops global_ops = { .func = ftrace_stub, .flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_INITIALIZED, - INIT_REGEX_LOCK(global_ops) + INIT_OPS_HASH(global_ops) }; static int __init ftrace_nodyn_init(void) @@ -4713,7 +4717,7 @@ ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip, static struct ftrace_ops control_ops = { .func = ftrace_ops_control_func, .flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_INITIALIZED, - INIT_REGEX_LOCK(control_ops) + INIT_OPS_HASH(control_ops) }; static inline void -- cgit v1.2.3 From d4261e5650004d6d51137553ea5433d5828562dc Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 19 Aug 2014 16:02:12 +0200 Subject: bonding: create netlink event when bonding option is changed Userspace needs to be notified if one changes some option. Signed-off-by: Jiri Pirko Acked-by: Veaceslav Falico Acked-by: Andy Gospodarek Signed-off-by: David S. Miller --- drivers/net/bonding/bond_options.c | 2 ++ include/linux/netdevice.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index dc73463c2c23..d8dc17faa6b4 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -625,6 +625,8 @@ int __bond_opt_set(struct bonding *bond, out: if (ret) bond_opt_error_interpret(bond, opt, ret, val); + else + call_netdevice_notifiers(NETDEV_CHANGEINFODATA, bond->dev); return ret; } diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 38377392d082..7e2b0b8b5cd7 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1982,6 +1982,7 @@ struct pcpu_sw_netstats { #define NETDEV_CHANGEUPPER 0x0015 #define NETDEV_RESEND_IGMP 0x0016 #define NETDEV_PRECHANGEMTU 0x0017 /* notify before mtu change happened */ +#define NETDEV_CHANGEINFODATA 0x0018 int register_netdevice_notifier(struct notifier_block *nb); int unregister_netdevice_notifier(struct notifier_block *nb); -- cgit v1.2.3 From b5b822050ca3c4fc1f475100cc197cc00ba2d492 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 22 Aug 2014 16:17:38 +0800 Subject: f2fs: use macro for code readability This patch introduces DEF_NIDS_PER_INODE/GET_ORPHAN_BLOCKS/F2FS_CP_PACKS macro instead of numbers in code for readability. change log from v1: o fix typo pointed out by Jaegeuk Kim. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 21 ++++++++++----------- include/linux/f2fs_fs.h | 13 ++++++++++--- 2 files changed, 20 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index c9c08d52ecfd..ec3b7a5381fa 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -443,8 +443,8 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) struct f2fs_orphan_block *orphan_blk = NULL; unsigned int nentries = 0; unsigned short index; - unsigned short orphan_blocks = (unsigned short)((sbi->n_orphans + - (F2FS_ORPHANS_PER_BLOCK - 1)) / F2FS_ORPHANS_PER_BLOCK); + unsigned short orphan_blocks = + (unsigned short)GET_ORPHAN_BLOCKS(sbi->n_orphans); struct page *page = NULL; struct ino_entry *orphan = NULL; @@ -837,7 +837,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi)); ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi)); ckpt->free_segment_count = cpu_to_le32(free_segments(sbi)); - for (i = 0; i < 3; i++) { + for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) { ckpt->cur_node_segno[i] = cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_NODE)); ckpt->cur_node_blkoff[i] = @@ -845,7 +845,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) ckpt->alloc_type[i + CURSEG_HOT_NODE] = curseg_alloc_type(sbi, i + CURSEG_HOT_NODE); } - for (i = 0; i < 3; i++) { + for (i = 0; i < NR_CURSEG_DATA_TYPE; i++) { ckpt->cur_data_segno[i] = cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_DATA)); ckpt->cur_data_blkoff[i] = @@ -860,24 +860,23 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) /* 2 cp + n data seg summary + orphan inode blocks */ data_sum_blocks = npages_for_summary_flush(sbi); - if (data_sum_blocks < 3) + if (data_sum_blocks < NR_CURSEG_DATA_TYPE) set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); else clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); - orphan_blocks = (sbi->n_orphans + F2FS_ORPHANS_PER_BLOCK - 1) - / F2FS_ORPHANS_PER_BLOCK; + orphan_blocks = GET_ORPHAN_BLOCKS(sbi->n_orphans); ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks + orphan_blocks); if (is_umount) { set_ckpt_flags(ckpt, CP_UMOUNT_FLAG); - ckpt->cp_pack_total_block_count = cpu_to_le32(2 + + ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS+ cp_payload_blks + data_sum_blocks + orphan_blocks + NR_CURSEG_NODE_TYPE); } else { clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG); - ckpt->cp_pack_total_block_count = cpu_to_le32(2 + + ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS + cp_payload_blks + data_sum_blocks + orphan_blocks); } @@ -1022,8 +1021,8 @@ void init_ino_entry_info(struct f2fs_sb_info *sbi) * for cp pack we can have max 1020*504 orphan entries */ sbi->n_orphans = 0; - sbi->max_orphans = (sbi->blocks_per_seg - 2 - NR_CURSEG_TYPE) - * F2FS_ORPHANS_PER_BLOCK; + sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS - + NR_CURSEG_TYPE) * F2FS_ORPHANS_PER_BLOCK; } int __init create_checkpoint_caches(void) diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 0ed77f32c9ac..08ed2b0a96e6 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -90,6 +90,8 @@ struct f2fs_super_block { #define CP_ORPHAN_PRESENT_FLAG 0x00000002 #define CP_UMOUNT_FLAG 0x00000001 +#define F2FS_CP_PACKS 2 /* # of checkpoint packs */ + struct f2fs_checkpoint { __le64 checkpoint_ver; /* checkpoint block version number */ __le64 user_block_count; /* # of user blocks */ @@ -126,6 +128,9 @@ struct f2fs_checkpoint { */ #define F2FS_ORPHANS_PER_BLOCK 1020 +#define GET_ORPHAN_BLOCKS(n) ((n + F2FS_ORPHANS_PER_BLOCK - 1) / \ + F2FS_ORPHANS_PER_BLOCK) + struct f2fs_orphan_block { __le32 ino[F2FS_ORPHANS_PER_BLOCK]; /* inode numbers */ __le32 reserved; /* reserved */ @@ -147,6 +152,7 @@ struct f2fs_extent { #define F2FS_NAME_LEN 255 #define F2FS_INLINE_XATTR_ADDRS 50 /* 200 bytes for inline xattrs */ #define DEF_ADDRS_PER_INODE 923 /* Address Pointers in an Inode */ +#define DEF_NIDS_PER_INODE 5 /* Node IDs in an Inode */ #define ADDRS_PER_INODE(fi) addrs_per_inode(fi) #define ADDRS_PER_BLOCK 1018 /* Address Pointers in a Direct Block */ #define NIDS_PER_BLOCK 1018 /* Node IDs in an Indirect Block */ @@ -166,8 +172,9 @@ struct f2fs_extent { #define MAX_INLINE_DATA (sizeof(__le32) * (DEF_ADDRS_PER_INODE - \ F2FS_INLINE_XATTR_ADDRS - 1)) -#define INLINE_DATA_OFFSET (PAGE_CACHE_SIZE - sizeof(struct node_footer) \ - - sizeof(__le32) * (DEF_ADDRS_PER_INODE + 5 - 1)) +#define INLINE_DATA_OFFSET (PAGE_CACHE_SIZE - sizeof(struct node_footer) -\ + sizeof(__le32) * (DEF_ADDRS_PER_INODE + \ + DEF_NIDS_PER_INODE - 1)) struct f2fs_inode { __le16 i_mode; /* file mode */ @@ -197,7 +204,7 @@ struct f2fs_inode { __le32 i_addr[DEF_ADDRS_PER_INODE]; /* Pointers to data blocks */ - __le32 i_nid[5]; /* direct(2), indirect(2), + __le32 i_nid[DEF_NIDS_PER_INODE]; /* direct(2), indirect(2), double_indirect(1) node id */ } __packed; -- cgit v1.2.3 From 7c3af975257383ece54b83c0505d3e0656cb7daf Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Fri, 8 Aug 2014 11:00:57 -0400 Subject: nfs: don't sleep with inode lock in lock_and_join_requests This handles the 'nonblock=false' case in nfs_lock_and_join_requests. If the group is already locked and blocking is allowed, drop the inode lock and wait for the group lock to be cleared before trying it all again. This should fix warnings found in peterz's tree (sched/wait branch), where might_sleep() checks are added to wait.[ch]. Reported-by: Fengguang Wu Signed-off-by: Weston Andros Adamson Reviewed-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 17 +++++++++++++++++ fs/nfs/write.c | 12 +++++++++++- include/linux/nfs_page.h | 1 + 3 files changed, 29 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 30c9626f96b0..4ec67f8d70aa 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -167,6 +167,23 @@ nfs_page_group_lock(struct nfs_page *req, bool nonblock) return -EAGAIN; } +/* + * nfs_page_group_lock_wait - wait for the lock to clear, but don't grab it + * @req - a request in the group + * + * This is a blocking call to wait for the group lock to be cleared. + */ +void +nfs_page_group_lock_wait(struct nfs_page *req) +{ + struct nfs_page *head = req->wb_head; + + WARN_ON_ONCE(head != head->wb_head); + + wait_on_bit(&head->wb_flags, PG_HEADLOCK, + TASK_UNINTERRUPTIBLE); +} + /* * nfs_page_group_unlock - unlock the head of the page group * @req - request in group that is to be unlocked diff --git a/fs/nfs/write.c b/fs/nfs/write.c index e056f617adf2..175d5d073ccf 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -478,13 +478,23 @@ try_again: return NULL; } - /* lock each request in the page group */ + /* holding inode lock, so always make a non-blocking call to try the + * page group lock */ ret = nfs_page_group_lock(head, true); if (ret < 0) { spin_unlock(&inode->i_lock); + + if (!nonblock && ret == -EAGAIN) { + nfs_page_group_lock_wait(head); + nfs_release_request(head); + goto try_again; + } + nfs_release_request(head); return ERR_PTR(ret); } + + /* lock each request in the page group */ subreq = head; do { /* diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 6ad2bbcad405..6c3e06ee2fb7 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -123,6 +123,7 @@ extern int nfs_wait_on_request(struct nfs_page *); extern void nfs_unlock_request(struct nfs_page *req); extern void nfs_unlock_and_release_request(struct nfs_page *); extern int nfs_page_group_lock(struct nfs_page *, bool); +extern void nfs_page_group_lock_wait(struct nfs_page *); extern void nfs_page_group_unlock(struct nfs_page *); extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int); -- cgit v1.2.3 From 989e04c5bc3ff77d65e1f0d87bf7904dfa30d41c Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Fri, 22 Aug 2014 14:15:22 -0700 Subject: tcp: improve undo on timeout Upon timeout, undo (via both timestamps/Eifel and DSACKs) was disabled if any retransmits were still in flight. The concern was perhaps that spurious retransmission sent in a previous recovery episode may trigger DSACKs to falsely undo the current recovery. However, this inadvertently misses undo opportunities (using either TCP timestamps or DSACKs) when timeout occurs during a loss episode, i.e. recurring timeouts or timeout during fast recovery. In these cases some retransmissions will be in flight but we should allow undo. Furthermore, we should only reset undo_marker and undo_retrans upon timeout if we are starting a new recovery episode. Finally, when we do reset our undo state, we now do so in a manner similar to tcp_enter_recovery(), so that we require a DSACK for each of the outstsanding retransmissions. This will achieve the original goal by requiring that we receive the same number of DSACKs as retransmissions. This patch increases the undo events by 50% on Google servers. Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller --- include/linux/tcp.h | 2 +- net/ipv4/tcp_input.c | 26 +++++++++++--------------- 2 files changed, 12 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index fa5258f322e7..e567f0dbf282 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -276,7 +276,7 @@ struct tcp_sock { u32 retrans_stamp; /* Timestamp of the last retransmit, * also used in SYN-SENT to remember stamp of * the first SYN. */ - u32 undo_marker; /* tracking retrans started here. */ + u32 undo_marker; /* snd_una upon a new recovery episode. */ int undo_retrans; /* number of undoable retransmissions. */ u32 total_retrans; /* Total retransmits for entire connection */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a906e0200ff2..aba4926ca095 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1888,21 +1888,21 @@ static inline void tcp_reset_reno_sack(struct tcp_sock *tp) tp->sacked_out = 0; } -static void tcp_clear_retrans_partial(struct tcp_sock *tp) +void tcp_clear_retrans(struct tcp_sock *tp) { tp->retrans_out = 0; tp->lost_out = 0; - tp->undo_marker = 0; tp->undo_retrans = -1; + tp->fackets_out = 0; + tp->sacked_out = 0; } -void tcp_clear_retrans(struct tcp_sock *tp) +static inline void tcp_init_undo(struct tcp_sock *tp) { - tcp_clear_retrans_partial(tp); - - tp->fackets_out = 0; - tp->sacked_out = 0; + tp->undo_marker = tp->snd_una; + /* Retransmission still in flight may cause DSACKs later. */ + tp->undo_retrans = tp->retrans_out ? : -1; } /* Enter Loss state. If we detect SACK reneging, forget all SACK information @@ -1925,18 +1925,18 @@ void tcp_enter_loss(struct sock *sk) tp->prior_ssthresh = tcp_current_ssthresh(sk); tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); tcp_ca_event(sk, CA_EVENT_LOSS); + tcp_init_undo(tp); } tp->snd_cwnd = 1; tp->snd_cwnd_cnt = 0; tp->snd_cwnd_stamp = tcp_time_stamp; - tcp_clear_retrans_partial(tp); + tp->retrans_out = 0; + tp->lost_out = 0; if (tcp_is_reno(tp)) tcp_reset_reno_sack(tp); - tp->undo_marker = tp->snd_una; - skb = tcp_write_queue_head(sk); is_reneg = skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED); if (is_reneg) { @@ -1950,9 +1950,6 @@ void tcp_enter_loss(struct sock *sk) if (skb == tcp_send_head(sk)) break; - if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) - tp->undo_marker = 0; - TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED; if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || is_reneg) { TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED; @@ -2671,8 +2668,7 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack) NET_INC_STATS_BH(sock_net(sk), mib_idx); tp->prior_ssthresh = 0; - tp->undo_marker = tp->snd_una; - tp->undo_retrans = tp->retrans_out ? : -1; + tcp_init_undo(tp); if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { if (!ece_ack) -- cgit v1.2.3 From 53f3cc46336b9e514c98556b4a009a69ed808d3b Mon Sep 17 00:00:00 2001 From: Alexander Shiyan Date: Sat, 23 Aug 2014 14:45:47 +0400 Subject: pata_platform: Remove useless irq_flags field IRQ flags can be obtained from resource structure, there are no need to use additional field in the platform_data to store these values. This patch removes this field and convert existing users of this driver to use IRQ flags from the resources. Signed-off-by: Alexander Shiyan Signed-off-by: Tejun Heo --- arch/blackfin/mach-bf537/boards/cm_bf537e.c | 3 +-- arch/blackfin/mach-bf537/boards/cm_bf537u.c | 3 +-- arch/blackfin/mach-bf537/boards/stamp.c | 3 +-- arch/blackfin/mach-bf537/boards/tcm_bf537.c | 3 +-- arch/blackfin/mach-bf561/boards/cm_bf561.c | 3 +-- drivers/ata/pata_of_platform.c | 2 -- drivers/ata/pata_platform.c | 4 +--- include/linux/ata_platform.h | 5 ----- 8 files changed, 6 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/arch/blackfin/mach-bf537/boards/cm_bf537e.c b/arch/blackfin/mach-bf537/boards/cm_bf537e.c index 1e7290ef3525..1e1014df5e9e 100644 --- a/arch/blackfin/mach-bf537/boards/cm_bf537e.c +++ b/arch/blackfin/mach-bf537/boards/cm_bf537e.c @@ -733,7 +733,6 @@ static struct platform_device bfin_mac_device = { static struct pata_platform_info bfin_pata_platform_data = { .ioport_shift = 2, - .irq_type = IRQF_TRIGGER_HIGH, }; static struct resource bfin_pata_resources[] = { @@ -750,7 +749,7 @@ static struct resource bfin_pata_resources[] = { { .start = PATA_INT, .end = PATA_INT, - .flags = IORESOURCE_IRQ, + .flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL, }, }; diff --git a/arch/blackfin/mach-bf537/boards/cm_bf537u.c b/arch/blackfin/mach-bf537/boards/cm_bf537u.c index c7495dc74690..d056db9e5592 100644 --- a/arch/blackfin/mach-bf537/boards/cm_bf537u.c +++ b/arch/blackfin/mach-bf537/boards/cm_bf537u.c @@ -587,7 +587,6 @@ static struct platform_device bfin_mac_device = { static struct pata_platform_info bfin_pata_platform_data = { .ioport_shift = 2, - .irq_type = IRQF_TRIGGER_HIGH, }; static struct resource bfin_pata_resources[] = { @@ -604,7 +603,7 @@ static struct resource bfin_pata_resources[] = { { .start = PATA_INT, .end = PATA_INT, - .flags = IORESOURCE_IRQ, + .flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL, }, }; diff --git a/arch/blackfin/mach-bf537/boards/stamp.c b/arch/blackfin/mach-bf537/boards/stamp.c index de19b8a56007..88a19fc9844d 100644 --- a/arch/blackfin/mach-bf537/boards/stamp.c +++ b/arch/blackfin/mach-bf537/boards/stamp.c @@ -2462,7 +2462,6 @@ static struct platform_device bfin_sport0_device = { #define PATA_INT IRQ_PF5 static struct pata_platform_info bfin_pata_platform_data = { .ioport_shift = 1, - .irq_flags = IRQF_TRIGGER_HIGH, }; static struct resource bfin_pata_resources[] = { @@ -2479,7 +2478,7 @@ static struct resource bfin_pata_resources[] = { { .start = PATA_INT, .end = PATA_INT, - .flags = IORESOURCE_IRQ, + .flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL, }, }; #elif defined(CF_IDE_NAND_CARD_USE_CF_IN_COMMON_MEMORY_MODE) diff --git a/arch/blackfin/mach-bf537/boards/tcm_bf537.c b/arch/blackfin/mach-bf537/boards/tcm_bf537.c index 6b988ad653d8..ed309c9a62b6 100644 --- a/arch/blackfin/mach-bf537/boards/tcm_bf537.c +++ b/arch/blackfin/mach-bf537/boards/tcm_bf537.c @@ -589,7 +589,6 @@ static struct platform_device bfin_mac_device = { static struct pata_platform_info bfin_pata_platform_data = { .ioport_shift = 2, - .irq_type = IRQF_TRIGGER_HIGH, }; static struct resource bfin_pata_resources[] = { @@ -606,7 +605,7 @@ static struct resource bfin_pata_resources[] = { { .start = PATA_INT, .end = PATA_INT, - .flags = IORESOURCE_IRQ, + .flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL, }, }; diff --git a/arch/blackfin/mach-bf561/boards/cm_bf561.c b/arch/blackfin/mach-bf561/boards/cm_bf561.c index e862f7823e68..c6db52ba3a06 100644 --- a/arch/blackfin/mach-bf561/boards/cm_bf561.c +++ b/arch/blackfin/mach-bf561/boards/cm_bf561.c @@ -354,7 +354,6 @@ static struct platform_device bfin_sir0_device = { static struct pata_platform_info bfin_pata_platform_data = { .ioport_shift = 2, - .irq_type = IRQF_TRIGGER_HIGH, }; static struct resource bfin_pata_resources[] = { @@ -371,7 +370,7 @@ static struct resource bfin_pata_resources[] = { { .start = PATA_INT, .end = PATA_INT, - .flags = IORESOURCE_IRQ, + .flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL, }, }; diff --git a/drivers/ata/pata_of_platform.c b/drivers/ata/pata_of_platform.c index 6af1c9b9a464..64965398914a 100644 --- a/drivers/ata/pata_of_platform.c +++ b/drivers/ata/pata_of_platform.c @@ -43,8 +43,6 @@ static int pata_of_platform_probe(struct platform_device *ofdev) } irq_res = platform_get_resource(ofdev, IORESOURCE_IRQ, 0); - if (irq_res) - irq_res->flags = 0; prop = of_get_property(dn, "reg-shift", NULL); if (prop) diff --git a/drivers/ata/pata_platform.c b/drivers/ata/pata_platform.c index a5579b55e332..f8cff3e247c5 100644 --- a/drivers/ata/pata_platform.c +++ b/drivers/ata/pata_platform.c @@ -118,7 +118,7 @@ int __pata_platform_probe(struct device *dev, struct resource *io_res, */ if (irq_res && irq_res->start > 0) { irq = irq_res->start; - irq_flags = irq_res->flags; + irq_flags = irq_res->flags & IRQF_TRIGGER_MASK; } /* @@ -213,8 +213,6 @@ static int pata_platform_probe(struct platform_device *pdev) * And the IRQ */ irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, 0); - if (irq_res) - irq_res->flags = pp_info ? pp_info->irq_flags : 0; return __pata_platform_probe(&pdev->dev, io_res, ctl_res, irq_res, pp_info ? pp_info->ioport_shift : 0, diff --git a/include/linux/ata_platform.h b/include/linux/ata_platform.h index b9fde17f767c..5c618a084225 100644 --- a/include/linux/ata_platform.h +++ b/include/linux/ata_platform.h @@ -8,11 +8,6 @@ struct pata_platform_info { * spacing used by ata_std_ports(). */ unsigned int ioport_shift; - /* - * Indicate platform specific irq types and initial - * IRQ flags when call request_irq() - */ - unsigned int irq_flags; }; extern int __pata_platform_probe(struct device *dev, -- cgit v1.2.3 From 3af20efc0f83cdc65ce56ec108c0e81f602364df Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 22 Aug 2014 18:55:39 -0700 Subject: net: phy: broadcom: extract all registers to brcmphy.h Commit 439d39a9ac8fbbba9c04581361188f33f21ced50 ("net: phy: broadcom: extract register definitions") added a bunch of registers to brcmphy.h but left some to broadcom.c, move all of them to the header file since the BCM54xx and BCM7xxx PHY drivers do share all of these registers. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/broadcom.c | 104 --------------------------------------------- include/linux/brcmphy.h | 103 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 103 insertions(+), 104 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index 34088d60da74..6001d76d8676 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -25,110 +25,6 @@ #define BRCM_PHY_REV(phydev) \ ((phydev)->drv->phy_id & ~((phydev)->drv->phy_id_mask)) -/* - * Broadcom LED source encodings. These are used in BCM5461, BCM5481, - * BCM5482, and possibly some others. - */ -#define BCM_LED_SRC_LINKSPD1 0x0 -#define BCM_LED_SRC_LINKSPD2 0x1 -#define BCM_LED_SRC_XMITLED 0x2 -#define BCM_LED_SRC_ACTIVITYLED 0x3 -#define BCM_LED_SRC_FDXLED 0x4 -#define BCM_LED_SRC_SLAVE 0x5 -#define BCM_LED_SRC_INTR 0x6 -#define BCM_LED_SRC_QUALITY 0x7 -#define BCM_LED_SRC_RCVLED 0x8 -#define BCM_LED_SRC_MULTICOLOR1 0xa -#define BCM_LED_SRC_OPENSHORT 0xb -#define BCM_LED_SRC_OFF 0xe /* Tied high */ -#define BCM_LED_SRC_ON 0xf /* Tied low */ - - -/* - * BCM5482: Shadow registers - * Shadow values go into bits [14:10] of register 0x1c to select a shadow - * register to access. - */ -/* 00101: Spare Control Register 3 */ -#define BCM54XX_SHD_SCR3 0x05 -#define BCM54XX_SHD_SCR3_DEF_CLK125 0x0001 -#define BCM54XX_SHD_SCR3_DLLAPD_DIS 0x0002 -#define BCM54XX_SHD_SCR3_TRDDAPD 0x0004 - -/* 01010: Auto Power-Down */ -#define BCM54XX_SHD_APD 0x0a -#define BCM54XX_SHD_APD_EN 0x0020 - -#define BCM5482_SHD_LEDS1 0x0d /* 01101: LED Selector 1 */ - /* LED3 / ~LINKSPD[2] selector */ -#define BCM5482_SHD_LEDS1_LED3(src) ((src & 0xf) << 4) - /* LED1 / ~LINKSPD[1] selector */ -#define BCM5482_SHD_LEDS1_LED1(src) ((src & 0xf) << 0) -#define BCM54XX_SHD_RGMII_MODE 0x0b /* 01011: RGMII Mode Selector */ -#define BCM5482_SHD_SSD 0x14 /* 10100: Secondary SerDes control */ -#define BCM5482_SHD_SSD_LEDM 0x0008 /* SSD LED Mode enable */ -#define BCM5482_SHD_SSD_EN 0x0001 /* SSD enable */ -#define BCM5482_SHD_MODE 0x1f /* 11111: Mode Control Register */ -#define BCM5482_SHD_MODE_1000BX 0x0001 /* Enable 1000BASE-X registers */ - - -/* - * EXPANSION SHADOW ACCESS REGISTERS. (PHY REG 0x15, 0x16, and 0x17) - */ -#define MII_BCM54XX_EXP_AADJ1CH0 0x001f -#define MII_BCM54XX_EXP_AADJ1CH0_SWP_ABCD_OEN 0x0200 -#define MII_BCM54XX_EXP_AADJ1CH0_SWSEL_THPF 0x0100 -#define MII_BCM54XX_EXP_AADJ1CH3 0x601f -#define MII_BCM54XX_EXP_AADJ1CH3_ADCCKADJ 0x0002 -#define MII_BCM54XX_EXP_EXP08 0x0F08 -#define MII_BCM54XX_EXP_EXP08_RJCT_2MHZ 0x0001 -#define MII_BCM54XX_EXP_EXP08_EARLY_DAC_WAKE 0x0200 -#define MII_BCM54XX_EXP_EXP75 0x0f75 -#define MII_BCM54XX_EXP_EXP75_VDACCTRL 0x003c -#define MII_BCM54XX_EXP_EXP75_CM_OSC 0x0001 -#define MII_BCM54XX_EXP_EXP96 0x0f96 -#define MII_BCM54XX_EXP_EXP96_MYST 0x0010 -#define MII_BCM54XX_EXP_EXP97 0x0f97 -#define MII_BCM54XX_EXP_EXP97_MYST 0x0c0c - -/* - * BCM5482: Secondary SerDes registers - */ -#define BCM5482_SSD_1000BX_CTL 0x00 /* 1000BASE-X Control */ -#define BCM5482_SSD_1000BX_CTL_PWRDOWN 0x0800 /* Power-down SSD */ -#define BCM5482_SSD_SGMII_SLAVE 0x15 /* SGMII Slave Register */ -#define BCM5482_SSD_SGMII_SLAVE_EN 0x0002 /* Slave mode enable */ -#define BCM5482_SSD_SGMII_SLAVE_AD 0x0001 /* Slave auto-detection */ - - -/*****************************************************************************/ -/* Fast Ethernet Transceiver definitions. */ -/*****************************************************************************/ - -#define MII_BRCM_FET_INTREG 0x1a /* Interrupt register */ -#define MII_BRCM_FET_IR_MASK 0x0100 /* Mask all interrupts */ -#define MII_BRCM_FET_IR_LINK_EN 0x0200 /* Link status change enable */ -#define MII_BRCM_FET_IR_SPEED_EN 0x0400 /* Link speed change enable */ -#define MII_BRCM_FET_IR_DUPLEX_EN 0x0800 /* Duplex mode change enable */ -#define MII_BRCM_FET_IR_ENABLE 0x4000 /* Interrupt enable */ - -#define MII_BRCM_FET_BRCMTEST 0x1f /* Brcm test register */ -#define MII_BRCM_FET_BT_SRE 0x0080 /* Shadow register enable */ - - -/*** Shadow register definitions ***/ - -#define MII_BRCM_FET_SHDW_MISCCTRL 0x10 /* Shadow misc ctrl */ -#define MII_BRCM_FET_SHDW_MC_FAME 0x4000 /* Force Auto MDIX enable */ - -#define MII_BRCM_FET_SHDW_AUXMODE4 0x1a /* Auxiliary mode 4 */ -#define MII_BRCM_FET_SHDW_AM4_LED_MASK 0x0003 -#define MII_BRCM_FET_SHDW_AM4_LED_MODE1 0x0001 - -#define MII_BRCM_FET_SHDW_AUXSTAT2 0x1b /* Auxiliary status 2 */ -#define MII_BRCM_FET_SHDW_AS2_APDE 0x0020 /* Auto power down enable */ - - MODULE_DESCRIPTION("Broadcom PHY driver"); MODULE_AUTHOR("Maciej W. Rozycki"); MODULE_LICENSE("GPL"); diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 61219b9b3445..be31bf9f60c2 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -92,4 +92,107 @@ #define MII_BCM54XX_AUXCTL_SHDWSEL_AUXCTL 0x0000 +/* + * Broadcom LED source encodings. These are used in BCM5461, BCM5481, + * BCM5482, and possibly some others. + */ +#define BCM_LED_SRC_LINKSPD1 0x0 +#define BCM_LED_SRC_LINKSPD2 0x1 +#define BCM_LED_SRC_XMITLED 0x2 +#define BCM_LED_SRC_ACTIVITYLED 0x3 +#define BCM_LED_SRC_FDXLED 0x4 +#define BCM_LED_SRC_SLAVE 0x5 +#define BCM_LED_SRC_INTR 0x6 +#define BCM_LED_SRC_QUALITY 0x7 +#define BCM_LED_SRC_RCVLED 0x8 +#define BCM_LED_SRC_MULTICOLOR1 0xa +#define BCM_LED_SRC_OPENSHORT 0xb +#define BCM_LED_SRC_OFF 0xe /* Tied high */ +#define BCM_LED_SRC_ON 0xf /* Tied low */ + + +/* + * BCM5482: Shadow registers + * Shadow values go into bits [14:10] of register 0x1c to select a shadow + * register to access. + */ +/* 00101: Spare Control Register 3 */ +#define BCM54XX_SHD_SCR3 0x05 +#define BCM54XX_SHD_SCR3_DEF_CLK125 0x0001 +#define BCM54XX_SHD_SCR3_DLLAPD_DIS 0x0002 +#define BCM54XX_SHD_SCR3_TRDDAPD 0x0004 + +/* 01010: Auto Power-Down */ +#define BCM54XX_SHD_APD 0x0a +#define BCM54XX_SHD_APD_EN 0x0020 + +#define BCM5482_SHD_LEDS1 0x0d /* 01101: LED Selector 1 */ + /* LED3 / ~LINKSPD[2] selector */ +#define BCM5482_SHD_LEDS1_LED3(src) ((src & 0xf) << 4) + /* LED1 / ~LINKSPD[1] selector */ +#define BCM5482_SHD_LEDS1_LED1(src) ((src & 0xf) << 0) +#define BCM54XX_SHD_RGMII_MODE 0x0b /* 01011: RGMII Mode Selector */ +#define BCM5482_SHD_SSD 0x14 /* 10100: Secondary SerDes control */ +#define BCM5482_SHD_SSD_LEDM 0x0008 /* SSD LED Mode enable */ +#define BCM5482_SHD_SSD_EN 0x0001 /* SSD enable */ +#define BCM5482_SHD_MODE 0x1f /* 11111: Mode Control Register */ +#define BCM5482_SHD_MODE_1000BX 0x0001 /* Enable 1000BASE-X registers */ + + +/* + * EXPANSION SHADOW ACCESS REGISTERS. (PHY REG 0x15, 0x16, and 0x17) + */ +#define MII_BCM54XX_EXP_AADJ1CH0 0x001f +#define MII_BCM54XX_EXP_AADJ1CH0_SWP_ABCD_OEN 0x0200 +#define MII_BCM54XX_EXP_AADJ1CH0_SWSEL_THPF 0x0100 +#define MII_BCM54XX_EXP_AADJ1CH3 0x601f +#define MII_BCM54XX_EXP_AADJ1CH3_ADCCKADJ 0x0002 +#define MII_BCM54XX_EXP_EXP08 0x0F08 +#define MII_BCM54XX_EXP_EXP08_RJCT_2MHZ 0x0001 +#define MII_BCM54XX_EXP_EXP08_EARLY_DAC_WAKE 0x0200 +#define MII_BCM54XX_EXP_EXP75 0x0f75 +#define MII_BCM54XX_EXP_EXP75_VDACCTRL 0x003c +#define MII_BCM54XX_EXP_EXP75_CM_OSC 0x0001 +#define MII_BCM54XX_EXP_EXP96 0x0f96 +#define MII_BCM54XX_EXP_EXP96_MYST 0x0010 +#define MII_BCM54XX_EXP_EXP97 0x0f97 +#define MII_BCM54XX_EXP_EXP97_MYST 0x0c0c + +/* + * BCM5482: Secondary SerDes registers + */ +#define BCM5482_SSD_1000BX_CTL 0x00 /* 1000BASE-X Control */ +#define BCM5482_SSD_1000BX_CTL_PWRDOWN 0x0800 /* Power-down SSD */ +#define BCM5482_SSD_SGMII_SLAVE 0x15 /* SGMII Slave Register */ +#define BCM5482_SSD_SGMII_SLAVE_EN 0x0002 /* Slave mode enable */ +#define BCM5482_SSD_SGMII_SLAVE_AD 0x0001 /* Slave auto-detection */ + + +/*****************************************************************************/ +/* Fast Ethernet Transceiver definitions. */ +/*****************************************************************************/ + +#define MII_BRCM_FET_INTREG 0x1a /* Interrupt register */ +#define MII_BRCM_FET_IR_MASK 0x0100 /* Mask all interrupts */ +#define MII_BRCM_FET_IR_LINK_EN 0x0200 /* Link status change enable */ +#define MII_BRCM_FET_IR_SPEED_EN 0x0400 /* Link speed change enable */ +#define MII_BRCM_FET_IR_DUPLEX_EN 0x0800 /* Duplex mode change enable */ +#define MII_BRCM_FET_IR_ENABLE 0x4000 /* Interrupt enable */ + +#define MII_BRCM_FET_BRCMTEST 0x1f /* Brcm test register */ +#define MII_BRCM_FET_BT_SRE 0x0080 /* Shadow register enable */ + + +/*** Shadow register definitions ***/ + +#define MII_BRCM_FET_SHDW_MISCCTRL 0x10 /* Shadow misc ctrl */ +#define MII_BRCM_FET_SHDW_MC_FAME 0x4000 /* Force Auto MDIX enable */ + +#define MII_BRCM_FET_SHDW_AUXMODE4 0x1a /* Auxiliary mode 4 */ +#define MII_BRCM_FET_SHDW_AM4_LED_MASK 0x0003 +#define MII_BRCM_FET_SHDW_AM4_LED_MODE1 0x0001 + +#define MII_BRCM_FET_SHDW_AUXSTAT2 0x1b /* Auxiliary status 2 */ +#define MII_BRCM_FET_SHDW_AS2_APDE 0x0020 /* Auto power down enable */ + #endif /* _LINUX_BRCMPHY_H */ -- cgit v1.2.3 From 705314797b8b997554b7e9d0ea7b65a497356e53 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 22 Aug 2014 18:55:40 -0700 Subject: net: phy: broadcom: move shadow 0x1C register accessors to brcmphy.h The shadow register 0x1C is used both by the BCM54xxx PHYs and the BCM7xxx internal PHYs, move the accessors to a common location so both drivers can use them. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/broadcom.c | 18 ------------------ include/linux/brcmphy.h | 20 ++++++++++++++++++++ 2 files changed, 20 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index 6001d76d8676..854f2c9a7b2b 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -29,24 +29,6 @@ MODULE_DESCRIPTION("Broadcom PHY driver"); MODULE_AUTHOR("Maciej W. Rozycki"); MODULE_LICENSE("GPL"); -/* - * Indirect register access functions for the 1000BASE-T/100BASE-TX/10BASE-T - * 0x1c shadow registers. - */ -static int bcm54xx_shadow_read(struct phy_device *phydev, u16 shadow) -{ - phy_write(phydev, MII_BCM54XX_SHD, MII_BCM54XX_SHD_VAL(shadow)); - return MII_BCM54XX_SHD_DATA(phy_read(phydev, MII_BCM54XX_SHD)); -} - -static int bcm54xx_shadow_write(struct phy_device *phydev, u16 shadow, u16 val) -{ - return phy_write(phydev, MII_BCM54XX_SHD, - MII_BCM54XX_SHD_WRITE | - MII_BCM54XX_SHD_VAL(shadow) | - MII_BCM54XX_SHD_DATA(val)); -} - /* Indirect register access functions for the Expansion Registers */ static int bcm54xx_exp_read(struct phy_device *phydev, u16 regnum) { diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index be31bf9f60c2..722cf26567fa 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -195,4 +195,24 @@ #define MII_BRCM_FET_SHDW_AUXSTAT2 0x1b /* Auxiliary status 2 */ #define MII_BRCM_FET_SHDW_AS2_APDE 0x0020 /* Auto power down enable */ +/* + * Indirect register access functions for the 1000BASE-T/100BASE-TX/10BASE-T + * 0x1c shadow registers. + */ +static inline int bcm54xx_shadow_read(struct phy_device *phydev, u16 shadow) +{ + phy_write(phydev, MII_BCM54XX_SHD, MII_BCM54XX_SHD_VAL(shadow)); + return MII_BCM54XX_SHD_DATA(phy_read(phydev, MII_BCM54XX_SHD)); +} + +static inline int bcm54xx_shadow_write(struct phy_device *phydev, u16 shadow, + u16 val) +{ + return phy_write(phydev, MII_BCM54XX_SHD, + MII_BCM54XX_SHD_WRITE | + MII_BCM54XX_SHD_VAL(shadow) | + MII_BCM54XX_SHD_DATA(val)); +} + + #endif /* _LINUX_BRCMPHY_H */ -- cgit v1.2.3 From 66ce7fb9807b036058aa380bfd2b3851ae25ce39 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 22 Aug 2014 18:55:43 -0700 Subject: net: phy: export phy_{read,write}_mmd_indirect Some PHY drivers might need to access Clause 45 registers in Clause 22 compatibility mode to e.g: properly advertise EEE support when disabled by default. Export these two helper functions: phy_read_mmd_indirect() and phy_write_mmd_indirect() for drivers to use them. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 6 ++++-- include/linux/phy.h | 27 +++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index c94e2a27446a..e7a5893f32ff 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -955,7 +955,7 @@ static inline void mmd_phy_indirect(struct mii_bus *bus, int prtad, int devad, * 3) Write reg 13 // MMD Data Command for MMD DEVAD * 3) Read reg 14 // Read MMD data */ -static int phy_read_mmd_indirect(struct phy_device *phydev, int prtad, +int phy_read_mmd_indirect(struct phy_device *phydev, int prtad, int devad, int addr) { struct phy_driver *phydrv = phydev->drv; @@ -971,6 +971,7 @@ static int phy_read_mmd_indirect(struct phy_device *phydev, int prtad, } return value; } +EXPORT_SYMBOL(phy_read_mmd_indirect); /** * phy_write_mmd_indirect - writes data to the MMD registers @@ -988,7 +989,7 @@ static int phy_read_mmd_indirect(struct phy_device *phydev, int prtad, * 3) Write reg 13 // MMD Data Command for MMD DEVAD * 3) Write reg 14 // Write MMD data */ -static void phy_write_mmd_indirect(struct phy_device *phydev, int prtad, +void phy_write_mmd_indirect(struct phy_device *phydev, int prtad, int devad, int addr, u32 data) { struct phy_driver *phydrv = phydev->drv; @@ -1002,6 +1003,7 @@ static void phy_write_mmd_indirect(struct phy_device *phydev, int prtad, phydrv->write_mmd_indirect(phydev, prtad, devad, addr, data); } } +EXPORT_SYMBOL(phy_write_mmd_indirect); /** * phy_init_eee - init and check the EEE feature diff --git a/include/linux/phy.h b/include/linux/phy.h index ed39956b5613..d090cfcaa167 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -597,6 +597,19 @@ static inline int phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum) MII_ADDR_C45 | (devad << 16) | (regnum & 0xffff)); } +/** + * phy_read_mmd_indirect - reads data from the MMD registers + * @phydev: The PHY device bus + * @prtad: MMD Address + * @devad: MMD DEVAD + * @addr: PHY address on the MII bus + * + * Description: it reads data from the MMD registers (clause 22 to access to + * clause 45) of the specified phy address. + */ +int phy_read_mmd_indirect(struct phy_device *phydev, int prtad, + int devad, int addr); + /** * phy_read - Convenience function for reading a given PHY register * @phydev: the phy_device struct @@ -668,6 +681,20 @@ static inline int phy_write_mmd(struct phy_device *phydev, int devad, return mdiobus_write(phydev->bus, phydev->addr, regnum, val); } +/** + * phy_write_mmd_indirect - writes data to the MMD registers + * @phydev: The PHY device + * @prtad: MMD Address + * @devad: MMD DEVAD + * @addr: PHY address on the MII bus + * @data: data to write in the MMD register + * + * Description: Write data from the MMD registers of the specified + * phy address. + */ +void phy_write_mmd_indirect(struct phy_device *phydev, int prtad, + int devad, int addr, u32 data); + struct phy_device *phy_device_create(struct mii_bus *bus, int addr, int phy_id, bool is_c45, struct phy_c45_device_ids *c45_ids); -- cgit v1.2.3 From b8f9a02924bbeb0c46ca4c19561cbe765b80e264 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 22 Aug 2014 18:55:45 -0700 Subject: net: phy: bcm7xxx: enable EEE at the PHY level The 28nm Gigabit PHY on BCM7xxx chips comes out of reset with absolutely no EEE capabilities, such that we would actually return that we do not support EEE when accessing 3.20 (MDIO_PCS_EEE_ABLE) registers. Poke through the vendor-specific C45 register to enable EEE globally at the PHY level, and advertise supported EEE modes. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/bcm7xxx.c | 31 +++++++++++++++++++++++++++++++ include/linux/brcmphy.h | 3 +++ 2 files changed, 34 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c index 29e256a4ed57..e98c510b75c5 100644 --- a/drivers/net/phy/bcm7xxx.c +++ b/drivers/net/phy/bcm7xxx.c @@ -14,6 +14,7 @@ #include #include #include +#include /* Broadcom BCM7xxx internal PHY registers */ #define MII_BCM7XXX_CHANNEL_WIDTH 0x2000 @@ -167,6 +168,32 @@ static int bcm7xxx_apd_enable(struct phy_device *phydev) return bcm54xx_shadow_write(phydev, BCM54XX_SHD_APD, val); } +static int bcm7xxx_eee_enable(struct phy_device *phydev) +{ + int val; + + val = phy_read_mmd_indirect(phydev, BRCM_CL45VEN_EEE_CONTROL, + MDIO_MMD_AN, phydev->addr); + if (val < 0) + return val; + + /* Enable general EEE feature at the PHY level */ + val |= LPI_FEATURE_EN | LPI_FEATURE_EN_DIG1000X; + + phy_write_mmd_indirect(phydev, BRCM_CL45VEN_EEE_CONTROL, + MDIO_MMD_AN, phydev->addr, val); + + /* Advertise supported modes */ + val = phy_read_mmd_indirect(phydev, MDIO_AN_EEE_ADV, + MDIO_MMD_AN, phydev->addr); + + val |= (MDIO_AN_EEE_ADV_100TX | MDIO_AN_EEE_ADV_1000T); + phy_write_mmd_indirect(phydev, MDIO_AN_EEE_ADV, + MDIO_MMD_AN, phydev->addr, val); + + return 0; +} + static int bcm7xxx_28nm_config_init(struct phy_device *phydev) { int ret; @@ -179,6 +206,10 @@ static int bcm7xxx_28nm_config_init(struct phy_device *phydev) if (ret) return ret; + ret = bcm7xxx_eee_enable(phydev); + if (ret) + return ret; + return bcm7xxx_apd_enable(phydev); } diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 722cf26567fa..ee1431d976fa 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -214,5 +214,8 @@ static inline int bcm54xx_shadow_write(struct phy_device *phydev, u16 shadow, MII_BCM54XX_SHD_DATA(val)); } +#define BRCM_CL45VEN_EEE_CONTROL 0x803d +#define LPI_FEATURE_EN 0x8000 +#define LPI_FEATURE_EN_DIG1000X 0x4000 #endif /* _LINUX_BRCMPHY_H */ -- cgit v1.2.3 From 690e36e726d00d2528bc569809048adf61550d80 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 23 Aug 2014 12:13:41 -0700 Subject: net: Allow raw buffers to be passed into the flow dissector. Drivers, and perhaps other entities we have not yet considered, sometimes want to know how deep the protocol headers go before deciding how large of an SKB to allocate and how much of the packet to place into the linear SKB area. For example, consider a driver which has a device which DMAs into pools of pages and then tells the driver where the data went in the DMA descriptor(s). The driver can then build an SKB and reference most of the data via SKB fragments (which are page/offset/length triplets). However at least some of the front of the packet should be placed into the linear SKB area, which comes before the fragments, so that packet processing can get at the headers efficiently. The first thing each protocol layer is going to do is a "pskb_may_pull()" so we might as well aggregate as much of this as possible while we're building the SKB in the driver. Part of supporting this is that we don't have an SKB yet, so we want to be able to let the flow dissector operate on a raw buffer in order to compute the offset of the end of the headers. So now we have a __skb_flow_dissect() which takes an explicit data pointer and length. Signed-off-by: David S. Miller --- include/linux/skbuff.h | 18 ++++++++++++------ include/net/flow_keys.h | 14 ++++++++++++-- net/core/flow_dissector.c | 40 ++++++++++++++++++++++++++-------------- 3 files changed, 50 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index abde271c18ae..18ddf9684a27 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2567,20 +2567,26 @@ __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len, __wsum skb_checksum(const struct sk_buff *skb, int offset, int len, __wsum csum); -static inline void *skb_header_pointer(const struct sk_buff *skb, int offset, - int len, void *buffer) +static inline void *__skb_header_pointer(const struct sk_buff *skb, int offset, + int len, void *data, int hlen, void *buffer) { - int hlen = skb_headlen(skb); - if (hlen - offset >= len) - return skb->data + offset; + return data + offset; - if (skb_copy_bits(skb, offset, buffer, len) < 0) + if (!skb || + skb_copy_bits(skb, offset, buffer, len) < 0) return NULL; return buffer; } +static inline void *skb_header_pointer(const struct sk_buff *skb, int offset, + int len, void *buffer) +{ + return __skb_header_pointer(skb, offset, len, skb->data, + skb_headlen(skb), buffer); +} + /** * skb_needs_linearize - check if we need to linearize a given skb * depending on the given device features. diff --git a/include/net/flow_keys.h b/include/net/flow_keys.h index 6667a054763a..4040f63932c5 100644 --- a/include/net/flow_keys.h +++ b/include/net/flow_keys.h @@ -27,7 +27,17 @@ struct flow_keys { u8 ip_proto; }; -bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow); -__be32 skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto); +bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow, + void *data, int hlen); +static inline bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow) +{ + return __skb_flow_dissect(skb, flow, NULL, 0); +} +__be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto, + void *data, int hlen_proto); +static inline __be32 skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto) +{ + return __skb_flow_get_ports(skb, thoff, ip_proto, NULL, 0); +} u32 flow_hash_from_keys(struct flow_keys *keys); #endif diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 5f362c1d0332..660c6492fb78 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -34,29 +34,40 @@ static void iph_to_flow_copy_addrs(struct flow_keys *flow, const struct iphdr *i * The function will try to retrieve the ports at offset thoff + poff where poff * is the protocol port offset returned from proto_ports_offset */ -__be32 skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto) +__be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto, + void *data, int hlen) { int poff = proto_ports_offset(ip_proto); + if (!data) { + data = skb->data; + hlen = skb_headlen(skb); + } + if (poff >= 0) { __be32 *ports, _ports; - ports = skb_header_pointer(skb, thoff + poff, - sizeof(_ports), &_ports); + ports = __skb_header_pointer(skb, thoff + poff, + sizeof(_ports), data, hlen, &_ports); if (ports) return *ports; } return 0; } -EXPORT_SYMBOL(skb_flow_get_ports); +EXPORT_SYMBOL(__skb_flow_get_ports); -bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow) +bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow, void *data, int hlen) { int nhoff = skb_network_offset(skb); u8 ip_proto; __be16 proto = skb->protocol; + if (!data) { + data = skb->data; + hlen = skb_headlen(skb); + } + memset(flow, 0, sizeof(*flow)); again: @@ -65,7 +76,7 @@ again: const struct iphdr *iph; struct iphdr _iph; ip: - iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); + iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph); if (!iph || iph->ihl < 5) return false; nhoff += iph->ihl * 4; @@ -83,7 +94,7 @@ ip: __be32 flow_label; ipv6: - iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); + iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph); if (!iph) return false; @@ -113,7 +124,7 @@ ipv6: const struct vlan_hdr *vlan; struct vlan_hdr _vlan; - vlan = skb_header_pointer(skb, nhoff, sizeof(_vlan), &_vlan); + vlan = __skb_header_pointer(skb, nhoff, sizeof(_vlan), data, hlen, &_vlan); if (!vlan) return false; @@ -126,7 +137,7 @@ ipv6: struct pppoe_hdr hdr; __be16 proto; } *hdr, _hdr; - hdr = skb_header_pointer(skb, nhoff, sizeof(_hdr), &_hdr); + hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr); if (!hdr) return false; proto = hdr->proto; @@ -151,7 +162,7 @@ ipv6: __be16 proto; } *hdr, _hdr; - hdr = skb_header_pointer(skb, nhoff, sizeof(_hdr), &_hdr); + hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr); if (!hdr) return false; /* @@ -171,8 +182,9 @@ ipv6: const struct ethhdr *eth; struct ethhdr _eth; - eth = skb_header_pointer(skb, nhoff, - sizeof(_eth), &_eth); + eth = __skb_header_pointer(skb, nhoff, + sizeof(_eth), + data, hlen, &_eth); if (!eth) return false; proto = eth->h_proto; @@ -194,12 +206,12 @@ ipv6: flow->n_proto = proto; flow->ip_proto = ip_proto; - flow->ports = skb_flow_get_ports(skb, nhoff, ip_proto); + flow->ports = __skb_flow_get_ports(skb, nhoff, ip_proto, data, hlen); flow->thoff = (u16) nhoff; return true; } -EXPORT_SYMBOL(skb_flow_dissect); +EXPORT_SYMBOL(__skb_flow_dissect); static u32 hashrnd __read_mostly; static __always_inline void __flow_hash_secret_init(void) -- cgit v1.2.3 From 179033b3e064d2cd3f5f9945e76b0a0f0fbf4883 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 7 Aug 2014 11:48:26 -0400 Subject: perf: Add PERF_EVENT_STATE_EXIT state for events with exited task Adding new perf event state to indicate that the monitored task has exited. In this case the event stays alive until the owner task exits or close the event fd while providing the last data through the read syscall and ring buffer. Instead it needs to propagate the error info (monitored task has died) via poll and read syscalls by returning POLLHUP and 0 respectively. Signed-off-by: Jiri Olsa Acked-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20140811120102.GY9918@twins.programming.kicks-ass.net Cc: Adrian Hunter Cc: Arnaldo Carvalho de Melo Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jean Pihet Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-t5y3w8jjx6tfo5w8y6oajsjq@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/perf_event.h | 1 + kernel/events/core.c | 10 +++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index f0a1036b1911..893a0d07986f 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -269,6 +269,7 @@ struct pmu { * enum perf_event_active_state - the states of a event */ enum perf_event_active_state { + PERF_EVENT_STATE_EXIT = -3, PERF_EVENT_STATE_ERROR = -2, PERF_EVENT_STATE_OFF = -1, PERF_EVENT_STATE_INACTIVE = 0, diff --git a/kernel/events/core.c b/kernel/events/core.c index 4575dd6e59ea..d8cb4d21a346 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3600,7 +3600,8 @@ perf_read_hw(struct perf_event *event, char __user *buf, size_t count) * error state (i.e. because it was pinned but it couldn't be * scheduled on to the CPU at some point). */ - if (event->state == PERF_EVENT_STATE_ERROR) + if ((event->state == PERF_EVENT_STATE_ERROR) || + (event->state == PERF_EVENT_STATE_EXIT)) return 0; if (count < event->read_size) @@ -3630,6 +3631,10 @@ static unsigned int perf_poll(struct file *file, poll_table *wait) unsigned int events = POLLHUP; poll_wait(file, &event->waitq, wait); + + if (event->state == PERF_EVENT_STATE_EXIT) + return events; + /* * Pin the event->rb by taking event->mmap_mutex; otherwise * perf_event_set_output() can swizzle our rb and make us miss wakeups. @@ -7588,6 +7593,9 @@ __perf_event_exit_task(struct perf_event *child_event, if (child_event->parent) { sync_child_event(child_event, child); free_event(child_event); + } else { + child_event->state = PERF_EVENT_STATE_EXIT; + perf_event_wakeup(child_event); } } -- cgit v1.2.3 From 1b05756c48ea07ced9604ef01d11194d936da163 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 5 Aug 2014 22:02:34 +0200 Subject: netfilter: ipset: Fix warn: integer overflows 'sizeof(*map) + size * set->dsize' Dan Carpenter reported that the static checker emits the warning net/netfilter/ipset/ip_set_list_set.c:600 init_list_set() warn: integer overflows 'sizeof(*map) + size * set->dsize' Limit the maximal number of elements in list type of sets. Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set_list.h | 1 + net/netfilter/ipset/ip_set_list_set.c | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set_list.h b/include/linux/netfilter/ipset/ip_set_list.h index 68c2aea897f5..fe2622a00151 100644 --- a/include/linux/netfilter/ipset/ip_set_list.h +++ b/include/linux/netfilter/ipset/ip_set_list.h @@ -6,5 +6,6 @@ #define IP_SET_LIST_DEFAULT_SIZE 8 #define IP_SET_LIST_MIN_SIZE 4 +#define IP_SET_LIST_MAX_SIZE 65536 #endif /* __IP_SET_LIST_H */ diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 3e2317f3cf68..f87adbad6076 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -597,7 +597,9 @@ init_list_set(struct net *net, struct ip_set *set, u32 size) struct set_elem *e; u32 i; - map = kzalloc(sizeof(*map) + size * set->dsize, GFP_KERNEL); + map = kzalloc(sizeof(*map) + + min_t(u32, size, IP_SET_LIST_MAX_SIZE) * set->dsize, + GFP_KERNEL); if (!map) return false; -- cgit v1.2.3 From 573e8fca255a27e3573b51f9b183d62641c47a3d Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Fri, 22 Aug 2014 13:33:47 -0700 Subject: net: skb_gro_checksum_* functions Add skb_gro_checksum_validate, skb_gro_checksum_validate_zero_check, and skb_gro_checksum_simple_validate, and __skb_gro_checksum_complete. These are the cognates of the normal checksum functions but are used in the gro_receive path and operate on GRO related fields in sk_buffs. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/netdevice.h | 76 +++++++++++++++++++++++++++++++++++++++++++++-- net/core/dev.c | 34 ++++++++++++++++++++- 2 files changed, 107 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7e2b0b8b5cd7..eb73444e1bd0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1883,7 +1883,13 @@ struct napi_gro_cb { u16 proto; /* Used in udp_gro_receive */ - u16 udp_mark; + u8 udp_mark:1; + + /* GRO checksum is valid */ + u8 csum_valid:1; + + /* Number encapsulation layers crossed */ + u8 encapsulation; /* used to support CHECKSUM_COMPLETE for tunneling protocols */ __wsum csum; @@ -2154,11 +2160,77 @@ static inline void *skb_gro_network_header(struct sk_buff *skb) static inline void skb_gro_postpull_rcsum(struct sk_buff *skb, const void *start, unsigned int len) { - if (skb->ip_summed == CHECKSUM_COMPLETE) + if (NAPI_GRO_CB(skb)->csum_valid) NAPI_GRO_CB(skb)->csum = csum_sub(NAPI_GRO_CB(skb)->csum, csum_partial(start, len, 0)); } +/* GRO checksum functions. These are logical equivalents of the normal + * checksum functions (in skbuff.h) except that they operate on the GRO + * offsets and fields in sk_buff. + */ + +__sum16 __skb_gro_checksum_complete(struct sk_buff *skb); + +static inline bool __skb_gro_checksum_validate_needed(struct sk_buff *skb, + bool zero_okay, + __sum16 check) +{ + return (skb->ip_summed != CHECKSUM_PARTIAL && + (skb->ip_summed != CHECKSUM_UNNECESSARY || + (NAPI_GRO_CB(skb)->encapsulation > skb->encapsulation)) && + (!zero_okay || check)); +} + +static inline __sum16 __skb_gro_checksum_validate_complete(struct sk_buff *skb, + __wsum psum) +{ + if (NAPI_GRO_CB(skb)->csum_valid && + !csum_fold(csum_add(psum, NAPI_GRO_CB(skb)->csum))) + return 0; + + NAPI_GRO_CB(skb)->csum = psum; + + return __skb_gro_checksum_complete(skb); +} + +/* Update skb for CHECKSUM_UNNECESSARY when we verified a top level + * checksum or an encapsulated one during GRO. This saves work + * if we fallback to normal path with the packet. + */ +static inline void skb_gro_incr_csum_unnecessary(struct sk_buff *skb) +{ + if (skb->ip_summed == CHECKSUM_UNNECESSARY) { + if (NAPI_GRO_CB(skb)->encapsulation) + skb->encapsulation = 1; + } else if (skb->ip_summed != CHECKSUM_PARTIAL) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + skb->encapsulation = 0; + } +} + +#define __skb_gro_checksum_validate(skb, proto, zero_okay, check, \ + compute_pseudo) \ +({ \ + __sum16 __ret = 0; \ + if (__skb_gro_checksum_validate_needed(skb, zero_okay, check)) \ + __ret = __skb_gro_checksum_validate_complete(skb, \ + compute_pseudo(skb, proto)); \ + if (!__ret) \ + skb_gro_incr_csum_unnecessary(skb); \ + __ret; \ +}) + +#define skb_gro_checksum_validate(skb, proto, compute_pseudo) \ + __skb_gro_checksum_validate(skb, proto, false, 0, compute_pseudo) + +#define skb_gro_checksum_validate_zero_check(skb, proto, check, \ + compute_pseudo) \ + __skb_gro_checksum_validate(skb, proto, true, check, compute_pseudo) + +#define skb_gro_checksum_simple_validate(skb) \ + __skb_gro_checksum_validate(skb, 0, false, 0, null_compute_pseudo) + static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, const void *saddr, diff --git a/net/core/dev.c b/net/core/dev.c index 1421dad4cb29..b6a718ec11c1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3962,7 +3962,13 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff goto normal; gro_list_prepare(napi, skb); - NAPI_GRO_CB(skb)->csum = skb->csum; /* Needed for CHECKSUM_COMPLETE */ + + if (skb->ip_summed == CHECKSUM_COMPLETE) { + NAPI_GRO_CB(skb)->csum = skb->csum; + NAPI_GRO_CB(skb)->csum_valid = 1; + } else { + NAPI_GRO_CB(skb)->csum_valid = 0; + } rcu_read_lock(); list_for_each_entry_rcu(ptype, head, list) { @@ -3975,6 +3981,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff NAPI_GRO_CB(skb)->flush = 0; NAPI_GRO_CB(skb)->free = 0; NAPI_GRO_CB(skb)->udp_mark = 0; + NAPI_GRO_CB(skb)->encapsulation = 0; pp = ptype->callbacks.gro_receive(&napi->gro_list, skb); break; @@ -4205,6 +4212,31 @@ gro_result_t napi_gro_frags(struct napi_struct *napi) } EXPORT_SYMBOL(napi_gro_frags); +/* Compute the checksum from gro_offset and return the folded value + * after adding in any pseudo checksum. + */ +__sum16 __skb_gro_checksum_complete(struct sk_buff *skb) +{ + __wsum wsum; + __sum16 sum; + + wsum = skb_checksum(skb, skb_gro_offset(skb), skb_gro_len(skb), 0); + + /* NAPI_GRO_CB(skb)->csum holds pseudo checksum */ + sum = csum_fold(csum_add(NAPI_GRO_CB(skb)->csum, wsum)); + if (likely(!sum)) { + if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) && + !skb->csum_complete_sw) + netdev_rx_csum_fault(skb->dev); + } + + NAPI_GRO_CB(skb)->csum = wsum; + NAPI_GRO_CB(skb)->csum_valid = 1; + + return sum; +} +EXPORT_SYMBOL(__skb_gro_checksum_complete); + /* * net_rps_action_and_irq_enable sends any pending IPI's for rps. * Note: called with local irq disabled, but exits with local irq enabled. -- cgit v1.2.3 From a98406e22c12e514bac28fec0a49dc793edaf3a8 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 23 Aug 2014 17:03:28 +0200 Subject: random32: improvements to prandom_bytes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch addresses a couple of minor items, mostly addesssing prandom_bytes(): 1) prandom_bytes{,_state}() should use size_t for length arguments, 2) We can use put_unaligned() when filling the array instead of open coding it [ perhaps some archs will further benefit from their own arch specific implementation when GCC cannot make up for it ], 3) Fix a typo, 4) Better use unsigned int as type for getting the arch seed, 5) Make use of prandom_u32_max() for timer slack. Regarding the change to put_unaligned(), callers of prandom_bytes() which internally invoke prandom_bytes_state(), don't bother as they expect the array to be filled randomly and don't have any control of the internal state what-so-ever (that's also why we have periodic reseeding there, etc), so they really don't care. Now for the direct callers of prandom_bytes_state(), which are solely located in test cases for MTD devices, that is, drivers/mtd/tests/{oobtest.c,pagetest.c,subpagetest.c}: These tests basically fill a test write-vector through prandom_bytes_state() with an a-priori defined seed each time and write that to a MTD device. Later on, they set up a read-vector and read back that blocks from the device. So in the verification phase, the write-vector is being re-setup [ so same seed and prandom_bytes_state() called ], and then memcmp()'ed against the read-vector to check if the data is the same. Akinobu, Lothar and I also tested this patch and it runs through the 3 relevant MTD test cases w/o any errors on the nandsim device (simulator for MTD devs) for x86_64, ppc64, ARM (i.MX28, i.MX53 and i.MX6): # modprobe nandsim first_id_byte=0x20 second_id_byte=0xac \ third_id_byte=0x00 fourth_id_byte=0x15 # modprobe mtd_oobtest dev=0 # modprobe mtd_pagetest dev=0 # modprobe mtd_subpagetest dev=0 We also don't have any users depending directly on a particular result of the PRNG (except the PRNG self-test itself), and that's just fine as it e.g. allowed us easily to do things like upgrading from taus88 to taus113. Signed-off-by: Daniel Borkmann Tested-by: Akinobu Mita Tested-by: Lothar Waßmann Cc: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/linux/random.h | 4 ++-- lib/random32.c | 39 ++++++++++++++++++--------------------- 2 files changed, 20 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/random.h b/include/linux/random.h index 57fbbffd77a0..b05856e16b75 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -26,7 +26,7 @@ unsigned int get_random_int(void); unsigned long randomize_range(unsigned long start, unsigned long end, unsigned long len); u32 prandom_u32(void); -void prandom_bytes(void *buf, int nbytes); +void prandom_bytes(void *buf, size_t nbytes); void prandom_seed(u32 seed); void prandom_reseed_late(void); @@ -35,7 +35,7 @@ struct rnd_state { }; u32 prandom_u32_state(struct rnd_state *state); -void prandom_bytes_state(struct rnd_state *state, void *buf, int nbytes); +void prandom_bytes_state(struct rnd_state *state, void *buf, size_t nbytes); /** * prandom_u32_max - returns a pseudo-random number in interval [0, ep_ro) diff --git a/lib/random32.c b/lib/random32.c index c9b6bf3afe0c..0bee183fa18f 100644 --- a/lib/random32.c +++ b/lib/random32.c @@ -37,6 +37,7 @@ #include #include #include +#include #ifdef CONFIG_RANDOM32_SELFTEST static void __init prandom_state_selftest(void); @@ -96,27 +97,23 @@ EXPORT_SYMBOL(prandom_u32); * This is used for pseudo-randomness with no outside seeding. * For more random results, use prandom_bytes(). */ -void prandom_bytes_state(struct rnd_state *state, void *buf, int bytes) +void prandom_bytes_state(struct rnd_state *state, void *buf, size_t bytes) { - unsigned char *p = buf; - int i; - - for (i = 0; i < round_down(bytes, sizeof(u32)); i += sizeof(u32)) { - u32 random = prandom_u32_state(state); - int j; + u8 *ptr = buf; - for (j = 0; j < sizeof(u32); j++) { - p[i + j] = random; - random >>= BITS_PER_BYTE; - } + while (bytes >= sizeof(u32)) { + put_unaligned(prandom_u32_state(state), (u32 *) ptr); + ptr += sizeof(u32); + bytes -= sizeof(u32); } - if (i < bytes) { - u32 random = prandom_u32_state(state); - for (; i < bytes; i++) { - p[i] = random; - random >>= BITS_PER_BYTE; - } + if (bytes > 0) { + u32 rem = prandom_u32_state(state); + do { + *ptr++ = (u8) rem; + bytes--; + rem >>= BITS_PER_BYTE; + } while (bytes > 0); } } EXPORT_SYMBOL(prandom_bytes_state); @@ -126,7 +123,7 @@ EXPORT_SYMBOL(prandom_bytes_state); * @buf: where to copy the pseudo-random bytes to * @bytes: the requested number of bytes */ -void prandom_bytes(void *buf, int bytes) +void prandom_bytes(void *buf, size_t bytes) { struct rnd_state *state = &get_cpu_var(net_rand_state); @@ -137,7 +134,7 @@ EXPORT_SYMBOL(prandom_bytes); static void prandom_warmup(struct rnd_state *state) { - /* Calling RNG ten times to satify recurrence condition */ + /* Calling RNG ten times to satisfy recurrence condition */ prandom_u32_state(state); prandom_u32_state(state); prandom_u32_state(state); @@ -152,7 +149,7 @@ static void prandom_warmup(struct rnd_state *state) static u32 __extract_hwseed(void) { - u32 val = 0; + unsigned int val = 0; (void)(arch_get_random_seed_int(&val) || arch_get_random_int(&val)); @@ -228,7 +225,7 @@ static void __prandom_timer(unsigned long dontcare) prandom_seed(entropy); /* reseed every ~60 seconds, in [40 .. 80) interval with slack */ - expires = 40 + (prandom_u32() % 40); + expires = 40 + prandom_u32_max(40); seed_timer.expires = jiffies + msecs_to_jiffies(expires * MSEC_PER_SEC); add_timer(&seed_timer); -- cgit v1.2.3 From 4798248e4e023170e937a65a1d30fcc52496dd42 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 22 Aug 2014 16:21:53 -0700 Subject: net: Add ops->ndo_xmit_flush() Signed-off-by: David S. Miller --- drivers/net/wan/dlci.c | 2 +- drivers/usb/gadget/function/f_ncm.c | 2 +- include/linux/netdevice.h | 35 +++++++++++++++++++++++++++++++++++ net/atm/mpc.c | 2 +- net/core/dev.c | 5 ++--- net/core/netpoll.c | 3 +-- net/core/pktgen.c | 4 +--- net/packet/af_packet.c | 3 +-- net/sched/sch_teql.c | 3 +-- 9 files changed, 44 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/wan/dlci.c b/drivers/net/wan/dlci.c index 43c9960dce1c..81b22a180aad 100644 --- a/drivers/net/wan/dlci.c +++ b/drivers/net/wan/dlci.c @@ -193,7 +193,7 @@ static netdev_tx_t dlci_transmit(struct sk_buff *skb, struct net_device *dev) struct dlci_local *dlp = netdev_priv(dev); if (skb) - dlp->slave->netdev_ops->ndo_start_xmit(skb, dlp->slave); + netdev_start_xmit(skb, dlp->slave); return NETDEV_TX_OK; } diff --git a/drivers/usb/gadget/function/f_ncm.c b/drivers/usb/gadget/function/f_ncm.c index bcdc882cd415..cb5d646db6a7 100644 --- a/drivers/usb/gadget/function/f_ncm.c +++ b/drivers/usb/gadget/function/f_ncm.c @@ -1101,7 +1101,7 @@ static void ncm_tx_tasklet(unsigned long data) /* Only send if data is available. */ if (ncm->skb_tx_data) { ncm->timer_force_tx = true; - ncm->netdev->netdev_ops->ndo_start_xmit(NULL, ncm->netdev); + netdev_start_xmit(NULL, ncm->netdev); ncm->timer_force_tx = false; } } diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index eb73444e1bd0..220c50984688 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -782,6 +782,19 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, * (can also return NETDEV_TX_LOCKED iff NETIF_F_LLTX) * Required can not be NULL. * + * void (*ndo_xmit_flush)(struct net_device *dev, u16 queue); + * A driver implements this function when it wishes to support + * deferred TX queue flushing. The idea is that the expensive + * operation to trigger TX queue processing can be done after + * N calls to ndo_start_xmit rather than being done every single + * time. In this regime ndo_start_xmit will be called one or more + * times, and then a final ndo_xmit_flush call will be made to + * have the driver tell the device about the new pending TX queue + * entries. The kernel keeps track of which queues need flushing + * by monitoring skb->queue_mapping of the packets it submits to + * ndo_start_xmit. This is the queue value that will be passed + * to ndo_xmit_flush. + * * u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb, * void *accel_priv, select_queue_fallback_t fallback); * Called to decide which queue to when device supports multiple @@ -1005,6 +1018,7 @@ struct net_device_ops { int (*ndo_stop)(struct net_device *dev); netdev_tx_t (*ndo_start_xmit) (struct sk_buff *skb, struct net_device *dev); + void (*ndo_xmit_flush)(struct net_device *dev, u16 queue); u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb, void *accel_priv, @@ -3430,6 +3444,27 @@ int __init dev_proc_init(void); #define dev_proc_init() 0 #endif +static inline netdev_tx_t __netdev_start_xmit(const struct net_device_ops *ops, + struct sk_buff *skb, struct net_device *dev) +{ + netdev_tx_t ret; + u16 q; + + q = skb->queue_mapping; + ret = ops->ndo_start_xmit(skb, dev); + if (dev_xmit_complete(ret) && ops->ndo_xmit_flush) + ops->ndo_xmit_flush(dev, q); + + return ret; +} + +static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + const struct net_device_ops *ops = dev->netdev_ops; + + return __netdev_start_xmit(ops, skb, dev); +} + int netdev_class_create_file_ns(struct class_attribute *class_attr, const void *ns); void netdev_class_remove_file_ns(struct class_attribute *class_attr, diff --git a/net/atm/mpc.c b/net/atm/mpc.c index e8e0e7a8a23d..d662da161e5a 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c @@ -599,7 +599,7 @@ static netdev_tx_t mpc_send_packet(struct sk_buff *skb, } non_ip: - return mpc->old_ops->ndo_start_xmit(skb, dev); + return __netdev_start_xmit(mpc->old_ops, skb, dev); } static int atm_mpoa_vcc_attach(struct atm_vcc *vcc, void __user *arg) diff --git a/net/core/dev.c b/net/core/dev.c index b6a718ec11c1..26d296c2447c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2602,7 +2602,6 @@ EXPORT_SYMBOL(netif_skb_features); int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq) { - const struct net_device_ops *ops = dev->netdev_ops; int rc = NETDEV_TX_OK; unsigned int skb_len; @@ -2667,7 +2666,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, skb_len = skb->len; trace_net_dev_start_xmit(skb, dev); - rc = ops->ndo_start_xmit(skb, dev); + rc = netdev_start_xmit(skb, dev); trace_net_dev_xmit(skb, rc, dev, skb_len); if (rc == NETDEV_TX_OK) txq_trans_update(txq); @@ -2686,7 +2685,7 @@ gso: skb_len = nskb->len; trace_net_dev_start_xmit(nskb, dev); - rc = ops->ndo_start_xmit(nskb, dev); + rc = netdev_start_xmit(nskb, dev); trace_net_dev_xmit(nskb, rc, dev, skb_len); if (unlikely(rc != NETDEV_TX_OK)) { if (rc & ~NETDEV_TX_MASK) diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 907fb5e36c02..a5ad06828d67 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -72,7 +72,6 @@ module_param(carrier_timeout, uint, 0644); static int netpoll_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq) { - const struct net_device_ops *ops = dev->netdev_ops; int status = NETDEV_TX_OK; netdev_features_t features; @@ -92,7 +91,7 @@ static int netpoll_start_xmit(struct sk_buff *skb, struct net_device *dev, skb->vlan_tci = 0; } - status = ops->ndo_start_xmit(skb, dev); + status = netdev_start_xmit(skb, dev); if (status == NETDEV_TX_OK) txq_trans_update(txq); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 8b849ddfef2e..83e2b4b19eb7 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3285,8 +3285,6 @@ static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev) static void pktgen_xmit(struct pktgen_dev *pkt_dev) { struct net_device *odev = pkt_dev->odev; - netdev_tx_t (*xmit)(struct sk_buff *, struct net_device *) - = odev->netdev_ops->ndo_start_xmit; struct netdev_queue *txq; u16 queue_map; int ret; @@ -3339,7 +3337,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) goto unlock; } atomic_inc(&(pkt_dev->skb->users)); - ret = (*xmit)(pkt_dev->skb, odev); + ret = netdev_start_xmit(pkt_dev->skb, odev); switch (ret) { case NETDEV_TX_OK: diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 93896d2092f6..0dfa990d4eaa 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -240,7 +240,6 @@ static void __fanout_link(struct sock *sk, struct packet_sock *po); static int packet_direct_xmit(struct sk_buff *skb) { struct net_device *dev = skb->dev; - const struct net_device_ops *ops = dev->netdev_ops; netdev_features_t features; struct netdev_queue *txq; int ret = NETDEV_TX_BUSY; @@ -262,7 +261,7 @@ static int packet_direct_xmit(struct sk_buff *skb) HARD_TX_LOCK(dev, txq, smp_processor_id()); if (!netif_xmit_frozen_or_drv_stopped(txq)) { - ret = ops->ndo_start_xmit(skb, dev); + ret = netdev_start_xmit(skb, dev); if (ret == NETDEV_TX_OK) txq_trans_update(txq); } diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index bd33793b527e..64cd93ca8104 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -301,7 +301,6 @@ restart: do { struct net_device *slave = qdisc_dev(q); struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0); - const struct net_device_ops *slave_ops = slave->netdev_ops; if (slave_txq->qdisc_sleeping != q) continue; @@ -317,7 +316,7 @@ restart: unsigned int length = qdisc_pkt_len(skb); if (!netif_xmit_frozen_or_stopped(slave_txq) && - slave_ops->ndo_start_xmit(skb, slave) == NETDEV_TX_OK) { + netdev_start_xmit(skb, slave) == NETDEV_TX_OK) { txq_trans_update(slave_txq); __netif_tx_unlock(slave_txq); master->slaves = NEXT_SLAVE(q); -- cgit v1.2.3 From d1c85c2ebe7ffe1f1b27846bd1ba0944c513d822 Mon Sep 17 00:00:00 2001 From: Zhouyi Zhou Date: Fri, 22 Aug 2014 10:40:15 +0800 Subject: netfilter: HAVE_JUMP_LABEL instead of CONFIG_JUMP_LABEL Use HAVE_JUMP_LABEL as elsewhere in the kernel to ensure that the toolchain has the required support in addition to CONFIG_JUMP_LABEL being set. Signed-off-by: Zhouyi Zhou Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 5 +++-- net/netfilter/core.c | 6 +++--- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 2077489f9887..2517ece98820 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #ifdef CONFIG_NETFILTER static inline int NF_DROP_GETERR(int verdict) @@ -99,9 +100,9 @@ void nf_unregister_sockopt(struct nf_sockopt_ops *reg); extern struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; -#if defined(CONFIG_JUMP_LABEL) -#include +#ifdef HAVE_JUMP_LABEL extern struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; + static inline bool nf_hooks_active(u_int8_t pf, unsigned int hook) { if (__builtin_constant_p(pf) && diff --git a/net/netfilter/core.c b/net/netfilter/core.c index a93c97f106d4..024a2e25c8a4 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -54,7 +54,7 @@ EXPORT_SYMBOL_GPL(nf_unregister_afinfo); struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS] __read_mostly; EXPORT_SYMBOL(nf_hooks); -#if defined(CONFIG_JUMP_LABEL) +#ifdef HAVE_JUMP_LABEL struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; EXPORT_SYMBOL(nf_hooks_needed); #endif @@ -72,7 +72,7 @@ int nf_register_hook(struct nf_hook_ops *reg) } list_add_rcu(®->list, elem->list.prev); mutex_unlock(&nf_hook_mutex); -#if defined(CONFIG_JUMP_LABEL) +#ifdef HAVE_JUMP_LABEL static_key_slow_inc(&nf_hooks_needed[reg->pf][reg->hooknum]); #endif return 0; @@ -84,7 +84,7 @@ void nf_unregister_hook(struct nf_hook_ops *reg) mutex_lock(&nf_hook_mutex); list_del_rcu(®->list); mutex_unlock(&nf_hook_mutex); -#if defined(CONFIG_JUMP_LABEL) +#ifdef HAVE_JUMP_LABEL static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]); #endif synchronize_net(); -- cgit v1.2.3 From 2ee507c472939db4b146d545352b8a7c79ef47f8 Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Thu, 31 Jul 2014 10:29:48 -0700 Subject: sched: Add function single_task_running to let a task check if it is the only task running on a cpu This function will help an async task processing batched jobs from workqueue decide if it wants to keep processing on more chunks of batched work that can be delayed, or to accumulate more work for more efficient batched processing later. If no other tasks are running on the cpu, the batching process can take advantgae of the available cpu cycles to a make decision to continue processing the existing accumulated work to minimize delay, otherwise it will yield. Signed-off-by: Tim Chen Signed-off-by: Herbert Xu --- include/linux/sched.h | 1 + kernel/sched/core.c | 12 ++++++++++++ 2 files changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 5c2c885ee52b..e6d2c056d8e0 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -167,6 +167,7 @@ extern int nr_threads; DECLARE_PER_CPU(unsigned long, process_counts); extern int nr_processes(void); extern unsigned long nr_running(void); +extern bool single_task_running(void); extern unsigned long nr_iowait(void); extern unsigned long nr_iowait_cpu(int cpu); extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index ec1a286684a5..59965ec0b7de 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2366,6 +2366,18 @@ unsigned long nr_running(void) return sum; } +/* + * Check if only the current task is running on the cpu. + */ +bool single_task_running(void) +{ + if (cpu_rq(smp_processor_id())->nr_running == 1) + return true; + else + return false; +} +EXPORT_SYMBOL(single_task_running); + unsigned long long nr_context_switches(void) { int i; -- cgit v1.2.3 From f153566570fb9e32c2f59182883f4f66048788fb Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Fri, 22 Aug 2014 21:48:00 +0100 Subject: iio:trigger: modify return value for iio_trigger_get Instead of a void function, return the trigger pointer. Whilst not in of itself a fix, this makes the following set of 7 fixes cleaner than they would otherwise be. Signed-off-by: Srinivas Pandruvada Signed-off-by: Jonathan Cameron Cc: Stable@vger.kernel.org --- include/linux/iio/trigger.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iio/trigger.h b/include/linux/iio/trigger.h index 4b79ffe7b188..fa76c79a52a1 100644 --- a/include/linux/iio/trigger.h +++ b/include/linux/iio/trigger.h @@ -84,10 +84,12 @@ static inline void iio_trigger_put(struct iio_trigger *trig) put_device(&trig->dev); } -static inline void iio_trigger_get(struct iio_trigger *trig) +static inline struct iio_trigger *iio_trigger_get(struct iio_trigger *trig) { get_device(&trig->dev); __module_get(trig->ops->owner); + + return trig; } /** -- cgit v1.2.3 From 48ea526a6877d605c961aa37fae33f3227b29424 Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Mon, 25 Aug 2014 16:06:53 +0300 Subject: net/mlx4: Use is_kdump_kernel() to detect kdump kernel Use is_kdump_kernel() to detect kdump kernel, instead of reset_devices. Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 071f6b234604..783dd099abd1 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -38,6 +38,7 @@ #include #include #include +#include #include @@ -1275,7 +1276,7 @@ int mlx4_mr_rereg_mem_write(struct mlx4_dev *dev, struct mlx4_mr *mr, /* Returns true if running in low memory profile (kdump kernel) */ static inline bool mlx4_low_memory_profile(void) { - return reset_devices; + return is_kdump_kernel(); } #endif /* MLX4_DEVICE_H */ -- cgit v1.2.3 From 7d5929c1f34304ca5a970cfde8044053e56aa8c9 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Mon, 25 Aug 2014 16:15:32 -0700 Subject: mtd: nand: omap: Revert to using software ECC by default For v3.12 and prior, 1-bit Hamming code ECC via software was the default choice. Commit c66d039197e4 in v3.13 changed the behaviour to use 1-bit Hamming code via Hardware using a different ECC layout i.e. (ROM code layout) than what is used by software ECC. This ECC layout change causes NAND filesystems created in v3.12 and prior to be unusable in v3.13 and later. So revert back to using software ECC by default if an ECC scheme is not explicitely specified. This defect can be observed on the following boards during legacy boot -omap3beagle -omap3touchbook -overo -am3517crane -devkit8000 -ldp -3430sdp Signed-off-by: Roger Quadros Tested-by: Grazvydas Ignotas Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/board-flash.c | 2 +- arch/arm/mach-omap2/gpmc-nand.c | 3 ++- drivers/mtd/nand/omap2.c | 14 +++++++++++--- include/linux/platform_data/mtd-nand-omap2.h | 13 +++++++++++-- 4 files changed, 25 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-omap2/board-flash.c b/arch/arm/mach-omap2/board-flash.c index e87f2a83d6bf..2d245c2e641c 100644 --- a/arch/arm/mach-omap2/board-flash.c +++ b/arch/arm/mach-omap2/board-flash.c @@ -142,7 +142,7 @@ __init board_nand_init(struct mtd_partition *nand_parts, u8 nr_parts, u8 cs, board_nand_data.nr_parts = nr_parts; board_nand_data.devsize = nand_type; - board_nand_data.ecc_opt = OMAP_ECC_HAM1_CODE_HW; + board_nand_data.ecc_opt = OMAP_ECC_HAM1_CODE_SW; gpmc_nand_init(&board_nand_data, gpmc_t); } #endif /* CONFIG_MTD_NAND_OMAP2 || CONFIG_MTD_NAND_OMAP2_MODULE */ diff --git a/arch/arm/mach-omap2/gpmc-nand.c b/arch/arm/mach-omap2/gpmc-nand.c index 8897ad7035fd..cb7764314f17 100644 --- a/arch/arm/mach-omap2/gpmc-nand.c +++ b/arch/arm/mach-omap2/gpmc-nand.c @@ -49,7 +49,8 @@ static bool gpmc_hwecc_bch_capable(enum omap_ecc ecc_opt) return 0; /* legacy platforms support only HAM1 (1-bit Hamming) ECC scheme */ - if (ecc_opt == OMAP_ECC_HAM1_CODE_HW) + if (ecc_opt == OMAP_ECC_HAM1_CODE_HW || + ecc_opt == OMAP_ECC_HAM1_CODE_SW) return 1; else return 0; diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c index f0ed92e210a1..4dd617897eee 100644 --- a/drivers/mtd/nand/omap2.c +++ b/drivers/mtd/nand/omap2.c @@ -1794,9 +1794,12 @@ static int omap_nand_probe(struct platform_device *pdev) } /* populate MTD interface based on ECC scheme */ - nand_chip->ecc.layout = &omap_oobinfo; ecclayout = &omap_oobinfo; switch (info->ecc_opt) { + case OMAP_ECC_HAM1_CODE_SW: + nand_chip->ecc.mode = NAND_ECC_SOFT; + break; + case OMAP_ECC_HAM1_CODE_HW: pr_info("nand: using OMAP_ECC_HAM1_CODE_HW\n"); nand_chip->ecc.mode = NAND_ECC_HW; @@ -1848,7 +1851,7 @@ static int omap_nand_probe(struct platform_device *pdev) nand_chip->ecc.priv = nand_bch_init(mtd, nand_chip->ecc.size, nand_chip->ecc.bytes, - &nand_chip->ecc.layout); + &ecclayout); if (!nand_chip->ecc.priv) { pr_err("nand: error: unable to use s/w BCH library\n"); err = -EINVAL; @@ -1923,7 +1926,7 @@ static int omap_nand_probe(struct platform_device *pdev) nand_chip->ecc.priv = nand_bch_init(mtd, nand_chip->ecc.size, nand_chip->ecc.bytes, - &nand_chip->ecc.layout); + &ecclayout); if (!nand_chip->ecc.priv) { pr_err("nand: error: unable to use s/w BCH library\n"); err = -EINVAL; @@ -2012,6 +2015,9 @@ static int omap_nand_probe(struct platform_device *pdev) goto return_error; } + if (info->ecc_opt == OMAP_ECC_HAM1_CODE_SW) + goto scan_tail; + /* all OOB bytes from oobfree->offset till end off OOB are free */ ecclayout->oobfree->length = mtd->oobsize - ecclayout->oobfree->offset; /* check if NAND device's OOB is enough to store ECC signatures */ @@ -2021,7 +2027,9 @@ static int omap_nand_probe(struct platform_device *pdev) err = -EINVAL; goto return_error; } + nand_chip->ecc.layout = ecclayout; +scan_tail: /* second phase scan */ if (nand_scan_tail(mtd)) { err = -ENXIO; diff --git a/include/linux/platform_data/mtd-nand-omap2.h b/include/linux/platform_data/mtd-nand-omap2.h index 660c029d694f..16ec262dfcc8 100644 --- a/include/linux/platform_data/mtd-nand-omap2.h +++ b/include/linux/platform_data/mtd-nand-omap2.h @@ -21,8 +21,17 @@ enum nand_io { }; enum omap_ecc { - /* 1-bit ECC calculation by GPMC, Error detection by Software */ - OMAP_ECC_HAM1_CODE_HW = 0, + /* + * 1-bit ECC: calculation and correction by SW + * ECC stored at end of spare area + */ + OMAP_ECC_HAM1_CODE_SW = 0, + + /* + * 1-bit ECC: calculation by GPMC, Error detection by Software + * ECC layout compatible with ROM code layout + */ + OMAP_ECC_HAM1_CODE_HW, /* 4-bit ECC calculation by GPMC, Error detection by Software */ OMAP_ECC_BCH4_CODE_HW_DETECTION_SW, /* 4-bit ECC calculation by GPMC, Error detection by ELM */ -- cgit v1.2.3 From 0b725a2ca61bedc33a2a63d0451d528b268cf975 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 25 Aug 2014 15:51:53 -0700 Subject: net: Remove ndo_xmit_flush netdev operation, use signalling instead. As reported by Jesper Dangaard Brouer, for high packet rates the overhead of having another indirect call in the TX path is non-trivial. There is the indirect call itself, and then there is all of the reloading of the state to refetch the tail pointer value and then write the device register. Move to a more passive scheme, which requires very light modifications to the device drivers. The signal is a new skb->xmit_more value, if it is non-zero it means that more SKBs are pending to be transmitted on the same queue as the current SKB. And therefore, the driver may elide the tail pointer update. Right now skb->xmit_more is always zero. Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/igb/igb_main.c | 36 +++++++++++-------------------- drivers/net/virtio_net.c | 12 +++-------- include/linux/netdevice.h | 25 ++------------------- include/linux/skbuff.h | 2 ++ 4 files changed, 19 insertions(+), 56 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index b9c020a05fb8..89c29b40d61c 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -136,7 +136,6 @@ static void igb_update_phy_info(unsigned long); static void igb_watchdog(unsigned long); static void igb_watchdog_task(struct work_struct *); static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *); -static void igb_xmit_flush(struct net_device *netdev, u16 queue); static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats); static int igb_change_mtu(struct net_device *, int); @@ -2076,7 +2075,6 @@ static const struct net_device_ops igb_netdev_ops = { .ndo_open = igb_open, .ndo_stop = igb_close, .ndo_start_xmit = igb_xmit_frame, - .ndo_xmit_flush = igb_xmit_flush, .ndo_get_stats64 = igb_get_stats64, .ndo_set_rx_mode = igb_set_rx_mode, .ndo_set_mac_address = igb_set_mac, @@ -4917,6 +4915,14 @@ static void igb_tx_map(struct igb_ring *tx_ring, tx_ring->next_to_use = i; + if (!skb->xmit_more) { + writel(i, tx_ring->tail); + + /* we need this if more than one processor can write to our tail + * at a time, it synchronizes IO on IA64/Altix systems + */ + mmiowb(); + } return; dma_error: @@ -5052,20 +5058,17 @@ out_drop: return NETDEV_TX_OK; } -static struct igb_ring *__igb_tx_queue_mapping(struct igb_adapter *adapter, unsigned int r_idx) +static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter, + struct sk_buff *skb) { + unsigned int r_idx = skb->queue_mapping; + if (r_idx >= adapter->num_tx_queues) r_idx = r_idx % adapter->num_tx_queues; return adapter->tx_ring[r_idx]; } -static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter, - struct sk_buff *skb) -{ - return __igb_tx_queue_mapping(adapter, skb->queue_mapping); -} - static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *netdev) { @@ -5094,21 +5097,6 @@ static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb)); } -static void igb_xmit_flush(struct net_device *netdev, u16 queue) -{ - struct igb_adapter *adapter = netdev_priv(netdev); - struct igb_ring *tx_ring; - - tx_ring = __igb_tx_queue_mapping(adapter, queue); - - writel(tx_ring->next_to_use, tx_ring->tail); - - /* we need this if more than one processor can write to our tail - * at a time, it synchronizes IO on IA64/Altix systems - */ - mmiowb(); -} - /** * igb_tx_timeout - Respond to a Tx Hang * @netdev: network interface device structure diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 62421086d3e6..f0c2824f5e0f 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -953,15 +953,10 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) } } - return NETDEV_TX_OK; -} + if (!skb->xmit_more) + virtqueue_kick(sq->vq); -static void xmit_flush(struct net_device *dev, u16 qnum) -{ - struct virtnet_info *vi = netdev_priv(dev); - struct send_queue *sq = &vi->sq[qnum]; - - virtqueue_kick(sq->vq); + return NETDEV_TX_OK; } /* @@ -1393,7 +1388,6 @@ static const struct net_device_ops virtnet_netdev = { .ndo_open = virtnet_open, .ndo_stop = virtnet_close, .ndo_start_xmit = start_xmit, - .ndo_xmit_flush = xmit_flush, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = virtnet_set_mac_address, .ndo_set_rx_mode = virtnet_set_rx_mode, diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 220c50984688..039b23786c22 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -782,19 +782,6 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, * (can also return NETDEV_TX_LOCKED iff NETIF_F_LLTX) * Required can not be NULL. * - * void (*ndo_xmit_flush)(struct net_device *dev, u16 queue); - * A driver implements this function when it wishes to support - * deferred TX queue flushing. The idea is that the expensive - * operation to trigger TX queue processing can be done after - * N calls to ndo_start_xmit rather than being done every single - * time. In this regime ndo_start_xmit will be called one or more - * times, and then a final ndo_xmit_flush call will be made to - * have the driver tell the device about the new pending TX queue - * entries. The kernel keeps track of which queues need flushing - * by monitoring skb->queue_mapping of the packets it submits to - * ndo_start_xmit. This is the queue value that will be passed - * to ndo_xmit_flush. - * * u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb, * void *accel_priv, select_queue_fallback_t fallback); * Called to decide which queue to when device supports multiple @@ -1018,7 +1005,6 @@ struct net_device_ops { int (*ndo_stop)(struct net_device *dev); netdev_tx_t (*ndo_start_xmit) (struct sk_buff *skb, struct net_device *dev); - void (*ndo_xmit_flush)(struct net_device *dev, u16 queue); u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb, void *accel_priv, @@ -3447,15 +3433,8 @@ int __init dev_proc_init(void); static inline netdev_tx_t __netdev_start_xmit(const struct net_device_ops *ops, struct sk_buff *skb, struct net_device *dev) { - netdev_tx_t ret; - u16 q; - - q = skb->queue_mapping; - ret = ops->ndo_start_xmit(skb, dev); - if (dev_xmit_complete(ret) && ops->ndo_xmit_flush) - ops->ndo_xmit_flush(dev, q); - - return ret; + skb->xmit_more = 0; + return ops->ndo_start_xmit(skb, dev); } static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_device *dev) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 18ddf9684a27..9b3802a197a8 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -452,6 +452,7 @@ static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1, * @tc_verd: traffic control verdict * @hash: the packet hash * @queue_mapping: Queue mapping for multiqueue devices + * @xmit_more: More SKBs are pending for this queue * @ndisc_nodetype: router type (from link layer) * @ooo_okay: allow the mapping of a socket to a queue to be changed * @l4_hash: indicate hash is a canonical 4-tuple hash over transport @@ -558,6 +559,7 @@ struct sk_buff { __u16 queue_mapping; kmemcheck_bitfield_begin(flags2); + __u8 xmit_more:1; #ifdef CONFIG_IPV6_NDISC_NODETYPE __u8 ndisc_nodetype:2; #endif -- cgit v1.2.3 From b4bbb107d73bbc0d92c9ae7fd8e69580aa9381e7 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 27 Jun 2014 11:56:58 +0200 Subject: dma-mapping: Provide write-combine allocations Provide an implementation for dma_{alloc,free,mmap}_writecombine() when the architecture supports DMA attributes. Signed-off-by: Thierry Reding Acked-by: Arnd Bergmann Signed-off-by: Marek Szyprowski --- arch/arm/include/asm/dma-mapping.h | 16 ---------------- include/asm-generic/dma-mapping-common.h | 8 -------- include/linux/dma-mapping.h | 26 ++++++++++++++++++++++++++ 3 files changed, 26 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index c45b61a4b4a5..85738b200023 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h @@ -265,22 +265,6 @@ extern int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, struct dma_attrs *attrs); -static inline void *dma_alloc_writecombine(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flag) -{ - DEFINE_DMA_ATTRS(attrs); - dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); - return dma_alloc_attrs(dev, size, dma_handle, flag, &attrs); -} - -static inline void dma_free_writecombine(struct device *dev, size_t size, - void *cpu_addr, dma_addr_t dma_handle) -{ - DEFINE_DMA_ATTRS(attrs); - dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); - return dma_free_attrs(dev, size, cpu_addr, dma_handle, &attrs); -} - /* * This can be called during early boot to increase the size of the atomic * coherent DMA pool above the default value of 256KiB. It must be called diff --git a/include/asm-generic/dma-mapping-common.h b/include/asm-generic/dma-mapping-common.h index de8bf89940f8..d137431bf26f 100644 --- a/include/asm-generic/dma-mapping-common.h +++ b/include/asm-generic/dma-mapping-common.h @@ -205,14 +205,6 @@ dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, #define dma_mmap_coherent(d, v, c, h, s) dma_mmap_attrs(d, v, c, h, s, NULL) -static inline int dma_mmap_writecombine(struct device *dev, struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t dma_addr, size_t size) -{ - DEFINE_DMA_ATTRS(attrs); - dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); - return dma_mmap_attrs(dev, vma, cpu_addr, dma_addr, size, &attrs); -} - int dma_common_get_sgtable(struct device *dev, struct sg_table *sgt, void *cpu_addr, dma_addr_t dma_addr, size_t size); diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 931b70986272..d5d388160f42 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -263,6 +263,32 @@ struct dma_attrs; #define dma_unmap_sg_attrs(dev, sgl, nents, dir, attrs) \ dma_unmap_sg(dev, sgl, nents, dir) +#else +static inline void *dma_alloc_writecombine(struct device *dev, size_t size, + dma_addr_t *dma_addr, gfp_t gfp) +{ + DEFINE_DMA_ATTRS(attrs); + dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); + return dma_alloc_attrs(dev, size, dma_addr, gfp, &attrs); +} + +static inline void dma_free_writecombine(struct device *dev, size_t size, + void *cpu_addr, dma_addr_t dma_addr) +{ + DEFINE_DMA_ATTRS(attrs); + dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); + return dma_free_attrs(dev, size, cpu_addr, dma_addr, &attrs); +} + +static inline int dma_mmap_writecombine(struct device *dev, + struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, + size_t size) +{ + DEFINE_DMA_ATTRS(attrs); + dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); + return dma_mmap_attrs(dev, vma, cpu_addr, dma_addr, size, &attrs); +} #endif /* CONFIG_HAVE_DMA_ATTRS */ #ifdef CONFIG_NEED_DMA_MAP_STATE -- cgit v1.2.3 From 170fd0b1f6108b48df4369afa0ee29a83e922748 Mon Sep 17 00:00:00 2001 From: Andrei Otcheretianski Date: Wed, 30 Jul 2014 14:36:18 +0300 Subject: ieee80211: Support parsing TPC report element in action frames TPC report element is contained in spectrum management's tpc report action frames and in radio measurement's link measurement report action frames. Add a function which checks whether an action frame contains this element. This may be needed by the drivers in order to set the correct tx power value in these frames. Signed-off-by: Andrei Otcheretianski Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 65 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 63ab3873c5ed..8018c915ee63 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -838,6 +838,16 @@ enum ieee80211_vht_opmode_bits { #define WLAN_SA_QUERY_TR_ID_LEN 2 +/** + * struct ieee80211_tpc_report_ie + * + * This structure refers to "TPC Report element" + */ +struct ieee80211_tpc_report_ie { + u8 tx_power; + u8 link_margin; +} __packed; + struct ieee80211_mgmt { __le16 frame_control; __le16 duration; @@ -973,6 +983,13 @@ struct ieee80211_mgmt { u8 action_code; u8 operating_mode; } __packed vht_opmode_notif; + struct { + u8 action_code; + u8 dialog_token; + u8 tpc_elem_id; + u8 tpc_elem_length; + struct ieee80211_tpc_report_ie tpc; + } __packed tpc_report; } u; } __packed action; } u; @@ -1865,6 +1882,7 @@ enum ieee80211_category { WLAN_CATEGORY_DLS = 2, WLAN_CATEGORY_BACK = 3, WLAN_CATEGORY_PUBLIC = 4, + WLAN_CATEGORY_RADIO_MEASUREMENT = 5, WLAN_CATEGORY_HT = 7, WLAN_CATEGORY_SA_QUERY = 8, WLAN_CATEGORY_PROTECTED_DUAL_OF_ACTION = 9, @@ -2378,4 +2396,51 @@ static inline bool ieee80211_check_tim(const struct ieee80211_tim_ie *tim, #define TU_TO_JIFFIES(x) (usecs_to_jiffies((x) * 1024)) #define TU_TO_EXP_TIME(x) (jiffies + TU_TO_JIFFIES(x)) +/** + * ieee80211_action_contains_tpc - checks if the frame contains TPC element + * @skb: the skb containing the frame, length will be checked + * + * This function checks if it's either TPC report action frame or Link + * Measurement report action frame as defined in IEEE Std. 802.11-2012 8.5.2.5 + * and 8.5.7.5 accordingly. + */ +static inline bool ieee80211_action_contains_tpc(struct sk_buff *skb) +{ + struct ieee80211_mgmt *mgmt = (void *)skb->data; + + if (!ieee80211_is_action(mgmt->frame_control)) + return false; + + if (skb->len < IEEE80211_MIN_ACTION_SIZE + + sizeof(mgmt->u.action.u.tpc_report)) + return false; + + /* + * TPC report - check that: + * category = 0 (Spectrum Management) or 5 (Radio Measurement) + * spectrum management action = 3 (TPC/Link Measurement report) + * TPC report EID = 35 + * TPC report element length = 2 + * + * The spectrum management's tpc_report struct is used here both for + * parsing tpc_report and radio measurement's link measurement report + * frame, since the relevant part is identical in both frames. + */ + if (mgmt->u.action.category != WLAN_CATEGORY_SPECTRUM_MGMT && + mgmt->u.action.category != WLAN_CATEGORY_RADIO_MEASUREMENT) + return false; + + /* both spectrum mgmt and link measurement have same action code */ + if (mgmt->u.action.u.tpc_report.action_code != + WLAN_ACTION_SPCT_TPC_RPRT) + return false; + + if (mgmt->u.action.u.tpc_report.tpc_elem_id != WLAN_EID_TPC_REPORT || + mgmt->u.action.u.tpc_report.tpc_elem_length != + sizeof(struct ieee80211_tpc_report_ie)) + return false; + + return true; +} + #endif /* LINUX_IEEE80211_H */ -- cgit v1.2.3 From 4a32fea9d78f2d2315c0072757b197d5a304dc8b Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 17 Aug 2014 12:30:27 -0500 Subject: scheduler: Replace __get_cpu_var with this_cpu_ptr Convert all uses of __get_cpu_var for address calculation to use this_cpu_ptr instead. [Uses of __get_cpu_var with cpumask_var_t are no longer handled by this patch] Cc: Peter Zijlstra Acked-by: Ingo Molnar Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- include/linux/kernel_stat.h | 4 ++-- kernel/events/callchain.c | 4 ++-- kernel/events/core.c | 24 ++++++++++++------------ kernel/sched/sched.h | 4 ++-- kernel/taskstats.c | 2 +- kernel/time/tick-sched.c | 4 ++-- kernel/user-return-notifier.c | 4 ++-- 7 files changed, 23 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index ecbc52f9ff77..8422b4ed6882 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -44,8 +44,8 @@ DECLARE_PER_CPU(struct kernel_stat, kstat); DECLARE_PER_CPU(struct kernel_cpustat, kernel_cpustat); /* Must have preemption disabled for this to be meaningful. */ -#define kstat_this_cpu (&__get_cpu_var(kstat)) -#define kcpustat_this_cpu (&__get_cpu_var(kernel_cpustat)) +#define kstat_this_cpu this_cpu_ptr(&kstat) +#define kcpustat_this_cpu this_cpu_ptr(&kernel_cpustat) #define kstat_cpu(cpu) per_cpu(kstat, cpu) #define kcpustat_cpu(cpu) per_cpu(kernel_cpustat, cpu) diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c index 97b67df8fbfe..c4f63e68a35c 100644 --- a/kernel/events/callchain.c +++ b/kernel/events/callchain.c @@ -137,7 +137,7 @@ static struct perf_callchain_entry *get_callchain_entry(int *rctx) int cpu; struct callchain_cpus_entries *entries; - *rctx = get_recursion_context(__get_cpu_var(callchain_recursion)); + *rctx = get_recursion_context(this_cpu_ptr(callchain_recursion)); if (*rctx == -1) return NULL; @@ -153,7 +153,7 @@ static struct perf_callchain_entry *get_callchain_entry(int *rctx) static void put_callchain_entry(int rctx) { - put_recursion_context(__get_cpu_var(callchain_recursion), rctx); + put_recursion_context(this_cpu_ptr(callchain_recursion), rctx); } struct perf_callchain_entry * diff --git a/kernel/events/core.c b/kernel/events/core.c index 1cf24b3e42ec..4d44e40a0483 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -239,7 +239,7 @@ static void perf_duration_warn(struct irq_work *w) u64 avg_local_sample_len; u64 local_samples_len; - local_samples_len = __get_cpu_var(running_sample_length); + local_samples_len = __this_cpu_read(running_sample_length); avg_local_sample_len = local_samples_len/NR_ACCUMULATED_SAMPLES; printk_ratelimited(KERN_WARNING @@ -261,10 +261,10 @@ void perf_sample_event_took(u64 sample_len_ns) return; /* decay the counter by 1 average sample */ - local_samples_len = __get_cpu_var(running_sample_length); + local_samples_len = __this_cpu_read(running_sample_length); local_samples_len -= local_samples_len/NR_ACCUMULATED_SAMPLES; local_samples_len += sample_len_ns; - __get_cpu_var(running_sample_length) = local_samples_len; + __this_cpu_write(running_sample_length, local_samples_len); /* * note: this will be biased artifically low until we have @@ -877,7 +877,7 @@ static DEFINE_PER_CPU(struct list_head, rotation_list); static void perf_pmu_rotate_start(struct pmu *pmu) { struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); - struct list_head *head = &__get_cpu_var(rotation_list); + struct list_head *head = this_cpu_ptr(&rotation_list); WARN_ON(!irqs_disabled()); @@ -2389,7 +2389,7 @@ void __perf_event_task_sched_out(struct task_struct *task, * to check if we have to switch out PMU state. * cgroup event are system-wide mode only */ - if (atomic_read(&__get_cpu_var(perf_cgroup_events))) + if (atomic_read(this_cpu_ptr(&perf_cgroup_events))) perf_cgroup_sched_out(task, next); } @@ -2632,11 +2632,11 @@ void __perf_event_task_sched_in(struct task_struct *prev, * to check if we have to switch in PMU state. * cgroup event are system-wide mode only */ - if (atomic_read(&__get_cpu_var(perf_cgroup_events))) + if (atomic_read(this_cpu_ptr(&perf_cgroup_events))) perf_cgroup_sched_in(prev, task); /* check for system-wide branch_stack events */ - if (atomic_read(&__get_cpu_var(perf_branch_stack_events))) + if (atomic_read(this_cpu_ptr(&perf_branch_stack_events))) perf_branch_stack_sched_in(prev, task); } @@ -2891,7 +2891,7 @@ bool perf_event_can_stop_tick(void) void perf_event_task_tick(void) { - struct list_head *head = &__get_cpu_var(rotation_list); + struct list_head *head = this_cpu_ptr(&rotation_list); struct perf_cpu_context *cpuctx, *tmp; struct perf_event_context *ctx; int throttled; @@ -5671,7 +5671,7 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id, struct perf_sample_data *data, struct pt_regs *regs) { - struct swevent_htable *swhash = &__get_cpu_var(swevent_htable); + struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable); struct perf_event *event; struct hlist_head *head; @@ -5690,7 +5690,7 @@ end: int perf_swevent_get_recursion_context(void) { - struct swevent_htable *swhash = &__get_cpu_var(swevent_htable); + struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable); return get_recursion_context(swhash->recursion); } @@ -5698,7 +5698,7 @@ EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context); inline void perf_swevent_put_recursion_context(int rctx) { - struct swevent_htable *swhash = &__get_cpu_var(swevent_htable); + struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable); put_recursion_context(swhash->recursion, rctx); } @@ -5727,7 +5727,7 @@ static void perf_swevent_read(struct perf_event *event) static int perf_swevent_add(struct perf_event *event, int flags) { - struct swevent_htable *swhash = &__get_cpu_var(swevent_htable); + struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable); struct hw_perf_event *hwc = &event->hw; struct hlist_head *head; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 579712f4e9d5..77d92f8130e8 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -650,10 +650,10 @@ static inline int cpu_of(struct rq *rq) DECLARE_PER_CPU(struct rq, runqueues); #define cpu_rq(cpu) (&per_cpu(runqueues, (cpu))) -#define this_rq() (&__get_cpu_var(runqueues)) +#define this_rq() this_cpu_ptr(&runqueues) #define task_rq(p) cpu_rq(task_cpu(p)) #define cpu_curr(cpu) (cpu_rq(cpu)->curr) -#define raw_rq() (&__raw_get_cpu_var(runqueues)) +#define raw_rq() raw_cpu_ptr(&runqueues) static inline u64 rq_clock(struct rq *rq) { diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 13d2f7cd65db..b312fcc73024 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -638,7 +638,7 @@ void taskstats_exit(struct task_struct *tsk, int group_dead) fill_tgid_exit(tsk); } - listeners = __this_cpu_ptr(&listener_array); + listeners = raw_cpu_ptr(&listener_array); if (list_empty(&listeners->list)) return; diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 73f90932282b..3cadc112519f 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -924,7 +924,7 @@ static void tick_nohz_account_idle_ticks(struct tick_sched *ts) */ void tick_nohz_idle_exit(void) { - struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); + struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched); ktime_t now; local_irq_disable(); @@ -1041,7 +1041,7 @@ static void tick_nohz_kick_tick(struct tick_sched *ts, ktime_t now) static inline void tick_nohz_irq_enter(void) { - struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); + struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched); ktime_t now; if (!ts->idle_active && !ts->tick_stopped) diff --git a/kernel/user-return-notifier.c b/kernel/user-return-notifier.c index 394f70b17162..9586b670a5b2 100644 --- a/kernel/user-return-notifier.c +++ b/kernel/user-return-notifier.c @@ -14,7 +14,7 @@ static DEFINE_PER_CPU(struct hlist_head, return_notifier_list); void user_return_notifier_register(struct user_return_notifier *urn) { set_tsk_thread_flag(current, TIF_USER_RETURN_NOTIFY); - hlist_add_head(&urn->link, &__get_cpu_var(return_notifier_list)); + hlist_add_head(&urn->link, this_cpu_ptr(&return_notifier_list)); } EXPORT_SYMBOL_GPL(user_return_notifier_register); @@ -25,7 +25,7 @@ EXPORT_SYMBOL_GPL(user_return_notifier_register); void user_return_notifier_unregister(struct user_return_notifier *urn) { hlist_del(&urn->link); - if (hlist_empty(&__get_cpu_var(return_notifier_list))) + if (hlist_empty(this_cpu_ptr(&return_notifier_list))) clear_tsk_thread_flag(current, TIF_USER_RETURN_NOTIFY); } EXPORT_SYMBOL_GPL(user_return_notifier_unregister); -- cgit v1.2.3 From 47405a253da4d8ca4b18ad537423083fdd790440 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 17 Aug 2014 12:30:56 -0500 Subject: percpu: Remove __this_cpu_ptr The __this_cpu_ptr macro is no longer in use so drop it. Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- include/linux/percpu-defs.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index cfd56046ecec..420032d41d27 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -257,9 +257,6 @@ do { \ #define __raw_get_cpu_var(var) (*raw_cpu_ptr(&(var))) #define __get_cpu_var(var) (*this_cpu_ptr(&(var))) -/* keep until we have removed all uses of __this_cpu_ptr */ -#define __this_cpu_ptr(ptr) raw_cpu_ptr(ptr) - /* * Must be an lvalue. Since @var must be a simple identifier, * we force a syntax error here if it isn't. -- cgit v1.2.3 From bf3baca6c54ce8a2f51687296f868dfe20d33f13 Mon Sep 17 00:00:00 2001 From: James Ban Date: Wed, 27 Aug 2014 11:47:07 +0900 Subject: regulator: da9211: support device tree This is a patch for supporting device tree of DA9211/DA9213. Signed-off-by: James Ban Signed-off-by: Mark Brown --- .../devicetree/bindings/regulator/da9211.txt | 63 ++++++++++++++++ drivers/regulator/da9211-regulator.c | 85 ++++++++++++++++++++-- include/linux/regulator/da9211.h | 2 +- 3 files changed, 142 insertions(+), 8 deletions(-) create mode 100644 Documentation/devicetree/bindings/regulator/da9211.txt (limited to 'include/linux') diff --git a/Documentation/devicetree/bindings/regulator/da9211.txt b/Documentation/devicetree/bindings/regulator/da9211.txt new file mode 100644 index 000000000000..240019a82f9a --- /dev/null +++ b/Documentation/devicetree/bindings/regulator/da9211.txt @@ -0,0 +1,63 @@ +* Dialog Semiconductor DA9211/DA9213 Voltage Regulator + +Required properties: +- compatible: "dlg,da9211" or "dlg,da9213". +- reg: I2C slave address, usually 0x68. +- interrupts: the interrupt outputs of the controller +- regulators: A node that houses a sub-node for each regulator within the + device. Each sub-node is identified using the node's name, with valid + values listed below. The content of each sub-node is defined by the + standard binding for regulators; see regulator.txt. + BUCKA and BUCKB. + +Optional properties: +- Any optional property defined in regulator.txt + +Example 1) DA9211 + + pmic: da9211@68 { + compatible = "dlg,da9211"; + reg = <0x68>; + interrupts = <3 27>; + + regulators { + BUCKA { + regulator-name = "VBUCKA"; + regulator-min-microvolt = < 300000>; + regulator-max-microvolt = <1570000>; + regulator-min-microamp = <2000000>; + regulator-max-microamp = <5000000>; + }; + BUCKB { + regulator-name = "VBUCKB"; + regulator-min-microvolt = < 300000>; + regulator-max-microvolt = <1570000>; + regulator-min-microamp = <2000000>; + regulator-max-microamp = <5000000>; + }; + }; + }; + +Example 2) DA92113 + pmic: da9213@68 { + compatible = "dlg,da9213"; + reg = <0x68>; + interrupts = <3 27>; + + regulators { + BUCKA { + regulator-name = "VBUCKA"; + regulator-min-microvolt = < 300000>; + regulator-max-microvolt = <1570000>; + regulator-min-microamp = <3000000>; + regulator-max-microamp = <6000000>; + }; + BUCKB { + regulator-name = "VBUCKB"; + regulator-min-microvolt = < 300000>; + regulator-max-microvolt = <1570000>; + regulator-min-microamp = <3000000>; + regulator-max-microamp = <6000000>; + }; + }; + }; diff --git a/drivers/regulator/da9211-regulator.c b/drivers/regulator/da9211-regulator.c index a26f1d283c57..5aabbac1b524 100644 --- a/drivers/regulator/da9211-regulator.c +++ b/drivers/regulator/da9211-regulator.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include "da9211-regulator.h" @@ -236,6 +237,59 @@ static struct regulator_desc da9211_regulators[] = { DA9211_BUCK(BUCKB), }; +#ifdef CONFIG_OF +static struct of_regulator_match da9211_matches[] = { + [DA9211_ID_BUCKA] = { .name = "BUCKA" }, + [DA9211_ID_BUCKB] = { .name = "BUCKB" }, + }; + +static struct da9211_pdata *da9211_parse_regulators_dt( + struct device *dev) +{ + struct da9211_pdata *pdata; + struct device_node *node; + int i, num, n; + + node = of_get_child_by_name(dev->of_node, "regulators"); + if (!node) { + dev_err(dev, "regulators node not found\n"); + return ERR_PTR(-ENODEV); + } + + num = of_regulator_match(dev, node, da9211_matches, + ARRAY_SIZE(da9211_matches)); + of_node_put(node); + if (num < 0) { + dev_err(dev, "Failed to match regulators\n"); + return ERR_PTR(-EINVAL); + } + + pdata = devm_kzalloc(dev, sizeof(*pdata), GFP_KERNEL); + if (!pdata) + return ERR_PTR(-ENOMEM); + + pdata->num_buck = num; + + n = 0; + for (i = 0; i < ARRAY_SIZE(da9211_matches); i++) { + if (!da9211_matches[i].init_data) + continue; + + pdata->init_data[n] = da9211_matches[i].init_data; + + n++; + }; + + return pdata; +} +#else +static struct da9211_pdata *da9211_parse_regulators_dt( + struct device *dev) +{ + return ERR_PTR(-ENODEV); +} +#endif + static irqreturn_t da9211_irq_handler(int irq, void *data) { struct da9211 *chip = data; @@ -306,7 +360,7 @@ static int da9211_regulator_init(struct da9211 *chip) } for (i = 0; i < chip->num_regulator; i++) { - config.init_data = &(chip->pdata->init_data[i]); + config.init_data = chip->pdata->init_data[i]; config.dev = chip->dev; config.driver_data = chip; config.regmap = chip->regmap; @@ -332,6 +386,21 @@ static int da9211_regulator_init(struct da9211 *chip) return 0; } + +static const struct i2c_device_id da9211_i2c_id[] = { + {"da9211", DA9211}, + {"da9213", DA9213}, + {}, +}; + +#ifdef CONFIG_OF +static const struct of_device_id da9211_dt_ids[] = { + { .compatible = "dlg,da9211", .data = &da9211_i2c_id[0] }, + { .compatible = "dlg,da9213", .data = &da9211_i2c_id[1] }, + {}, +}; +#endif + /* * I2C driver interface functions */ @@ -377,6 +446,14 @@ static int da9211_i2c_probe(struct i2c_client *i2c, return -ENODEV; } + if (!chip->pdata) + chip->pdata = da9211_parse_regulators_dt(chip->dev); + + if (IS_ERR(chip->pdata)) { + dev_err(chip->dev, "No regulators defined for the platform\n"); + return PTR_ERR(chip->pdata); + } + chip->chip_irq = i2c->irq; if (chip->chip_irq != 0) { @@ -401,12 +478,6 @@ static int da9211_i2c_probe(struct i2c_client *i2c, return ret; } -static const struct i2c_device_id da9211_i2c_id[] = { - {"da9211", DA9211}, - {"da9213", DA9213}, - {}, -}; - MODULE_DEVICE_TABLE(i2c, da9211_i2c_id); static struct i2c_driver da9211_regulator_driver = { diff --git a/include/linux/regulator/da9211.h b/include/linux/regulator/da9211.h index 658c3c33f4c0..5479394fefce 100644 --- a/include/linux/regulator/da9211.h +++ b/include/linux/regulator/da9211.h @@ -32,6 +32,6 @@ struct da9211_pdata { * 2 : 2 phase 2 buck */ int num_buck; - struct regulator_init_data *init_data; + struct regulator_init_data *init_data[DA9211_MAX_REGULATORS]; }; #endif -- cgit v1.2.3 From 6a4c264313c4ae32dc53821a9c57e0dc9696fb81 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 27 Aug 2014 06:21:23 +0930 Subject: module: rename KERNEL_PARAM_FL_NOARG to avoid confusion Make it clear this is about kernel_param_ops, not kernel_param (which will soon have a flags field of its own). No functional changes. Cc: Rusty Russell Cc: Jean Delvare Cc: Andrew Morton Cc: Li Zhong Cc: Jon Mason Cc: Daniel Vetter Signed-off-by: Jani Nikula Signed-off-by: Rusty Russell --- include/linux/moduleparam.h | 2 +- kernel/module.c | 2 +- kernel/params.c | 6 +++--- security/apparmor/lsm.c | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 494f99e852da..16fdddab856a 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -42,7 +42,7 @@ struct kernel_param; * NOARG - the parameter allows for no argument (foo instead of foo=1) */ enum { - KERNEL_PARAM_FL_NOARG = (1 << 0) + KERNEL_PARAM_OPS_FL_NOARG = (1 << 0) }; struct kernel_param_ops { diff --git a/kernel/module.c b/kernel/module.c index 03214bd288e9..8a0dc91eddbc 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -135,7 +135,7 @@ static int param_set_bool_enable_only(const char *val, } static const struct kernel_param_ops param_ops_bool_enable_only = { - .flags = KERNEL_PARAM_FL_NOARG, + .flags = KERNEL_PARAM_OPS_FL_NOARG, .set = param_set_bool_enable_only, .get = param_get_bool, }; diff --git a/kernel/params.c b/kernel/params.c index 34f527023794..8a484fc8bde8 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -104,7 +104,7 @@ static int parse_one(char *param, return 0; /* No one handled NULL, so do it here. */ if (!val && - !(params[i].ops->flags & KERNEL_PARAM_FL_NOARG)) + !(params[i].ops->flags & KERNEL_PARAM_OPS_FL_NOARG)) return -EINVAL; pr_debug("handling %s with %p\n", param, params[i].ops->set); @@ -318,7 +318,7 @@ int param_get_bool(char *buffer, const struct kernel_param *kp) EXPORT_SYMBOL(param_get_bool); struct kernel_param_ops param_ops_bool = { - .flags = KERNEL_PARAM_FL_NOARG, + .flags = KERNEL_PARAM_OPS_FL_NOARG, .set = param_set_bool, .get = param_get_bool, }; @@ -369,7 +369,7 @@ int param_set_bint(const char *val, const struct kernel_param *kp) EXPORT_SYMBOL(param_set_bint); struct kernel_param_ops param_ops_bint = { - .flags = KERNEL_PARAM_FL_NOARG, + .flags = KERNEL_PARAM_OPS_FL_NOARG, .set = param_set_bint, .get = param_get_int, }; diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 998100093332..65ca451a764d 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -668,7 +668,7 @@ static int param_set_aabool(const char *val, const struct kernel_param *kp); static int param_get_aabool(char *buffer, const struct kernel_param *kp); #define param_check_aabool param_check_bool static struct kernel_param_ops param_ops_aabool = { - .flags = KERNEL_PARAM_FL_NOARG, + .flags = KERNEL_PARAM_OPS_FL_NOARG, .set = param_set_aabool, .get = param_get_aabool }; @@ -685,7 +685,7 @@ static int param_set_aalockpolicy(const char *val, const struct kernel_param *kp static int param_get_aalockpolicy(char *buffer, const struct kernel_param *kp); #define param_check_aalockpolicy param_check_bool static struct kernel_param_ops param_ops_aalockpolicy = { - .flags = KERNEL_PARAM_FL_NOARG, + .flags = KERNEL_PARAM_OPS_FL_NOARG, .set = param_set_aalockpolicy, .get = param_get_aalockpolicy }; -- cgit v1.2.3 From 91f9d330cc14932084c37751997213cb0e7ea882 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 27 Aug 2014 06:22:23 +0930 Subject: module: make it possible to have unsafe, tainting module params Add flags field to struct kernel_params, and add the first flag: unsafe parameter. Modifying a kernel parameter with the unsafe flag set, either via the kernel command line or sysfs, will issue a warning and taint the kernel. Cc: Rusty Russell Cc: Jean Delvare Cc: Andrew Morton Cc: Li Zhong Cc: Jon Mason Cc: Daniel Vetter Signed-off-by: Jani Nikula Signed-off-by: Rusty Russell --- drivers/tty/serial/8250/8250_core.c | 2 +- include/linux/moduleparam.h | 44 +++++++++++++++++++++++++++++-------- kernel/params.c | 11 ++++++++++ 3 files changed, 47 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c index 1d42dba6121d..bd672948f2f1 100644 --- a/drivers/tty/serial/8250/8250_core.c +++ b/drivers/tty/serial/8250/8250_core.c @@ -3587,7 +3587,7 @@ static void __used s8250_options(void) #ifdef CONFIG_SERIAL_8250_RSA __module_param_call(MODULE_PARAM_PREFIX, probe_rsa, ¶m_array_ops, .arr = &__param_arr_probe_rsa, - 0444, -1); + 0444, -1, 0); #endif } #else diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 16fdddab856a..1e3ffb839daa 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -56,11 +56,21 @@ struct kernel_param_ops { void (*free)(void *arg); }; +/* + * Flags available for kernel_param + * + * UNSAFE - the parameter is dangerous and setting it will taint the kernel + */ +enum { + KERNEL_PARAM_FL_UNSAFE = (1 << 0) +}; + struct kernel_param { const char *name; const struct kernel_param_ops *ops; u16 perm; - s16 level; + s8 level; + u8 flags; union { void *arg; const struct kparam_string *str; @@ -137,7 +147,7 @@ struct kparam_array * The ops can have NULL set or get functions. */ #define module_param_cb(name, ops, arg, perm) \ - __module_param_call(MODULE_PARAM_PREFIX, name, ops, arg, perm, -1) + __module_param_call(MODULE_PARAM_PREFIX, name, ops, arg, perm, -1, 0) /** * _param_cb - general callback for a module/cmdline parameter @@ -149,7 +159,7 @@ struct kparam_array * The ops can have NULL set or get functions. */ #define __level_param_cb(name, ops, arg, perm, level) \ - __module_param_call(MODULE_PARAM_PREFIX, name, ops, arg, perm, level) + __module_param_call(MODULE_PARAM_PREFIX, name, ops, arg, perm, level, 0) #define core_param_cb(name, ops, arg, perm) \ __level_param_cb(name, ops, arg, perm, 1) @@ -184,14 +194,14 @@ struct kparam_array /* This is the fundamental function for registering boot/module parameters. */ -#define __module_param_call(prefix, name, ops, arg, perm, level) \ +#define __module_param_call(prefix, name, ops, arg, perm, level, flags) \ /* Default value instead of permissions? */ \ static const char __param_str_##name[] = prefix #name; \ static struct kernel_param __moduleparam_const __param_##name \ __used \ __attribute__ ((unused,__section__ ("__param"),aligned(sizeof(void *)))) \ = { __param_str_##name, ops, VERIFY_OCTAL_PERMISSIONS(perm), \ - level, { arg } } + level, flags, { arg } } /* Obsolete - use module_param_cb() */ #define module_param_call(name, set, get, arg, perm) \ @@ -199,7 +209,7 @@ struct kparam_array { 0, (void *)set, (void *)get }; \ __module_param_call(MODULE_PARAM_PREFIX, \ name, &__param_ops_##name, arg, \ - (perm) + sizeof(__check_old_set_param(set))*0, -1) + (perm) + sizeof(__check_old_set_param(set))*0, -1, 0) /* We don't get oldget: it's often a new-style param_get_uint, etc. */ static inline int @@ -279,7 +289,7 @@ static inline void __kernel_param_unlock(void) */ #define core_param(name, var, type, perm) \ param_check_##type(name, &(var)); \ - __module_param_call("", name, ¶m_ops_##type, &var, perm, -1) + __module_param_call("", name, ¶m_ops_##type, &var, perm, -1, 0) #endif /* !MODULE */ /** @@ -297,7 +307,7 @@ static inline void __kernel_param_unlock(void) = { len, string }; \ __module_param_call(MODULE_PARAM_PREFIX, name, \ ¶m_ops_string, \ - .str = &__param_string_##name, perm, -1); \ + .str = &__param_string_##name, perm, -1, 0);\ __MODULE_PARM_TYPE(name, "string") /** @@ -346,6 +356,22 @@ static inline void destroy_params(const struct kernel_param *params, #define __param_check(name, p, type) \ static inline type __always_unused *__check_##name(void) { return(p); } +/** + * param_check_unsafe - Warn and taint the kernel if setting dangerous options. + * + * This gets called from all the standard param setters, but can be used from + * custom setters as well. + */ +static inline void +param_check_unsafe(const struct kernel_param *kp) +{ + if (kp->flags & KERNEL_PARAM_FL_UNSAFE) { + pr_warn("Setting dangerous option %s - tainting kernel\n", + kp->name); + add_taint(TAINT_USER, LOCKDEP_STILL_OK); + } +} + extern struct kernel_param_ops param_ops_byte; extern int param_set_byte(const char *val, const struct kernel_param *kp); extern int param_get_byte(char *buffer, const struct kernel_param *kp); @@ -444,7 +470,7 @@ extern int param_set_bint(const char *val, const struct kernel_param *kp); __module_param_call(MODULE_PARAM_PREFIX, name, \ ¶m_array_ops, \ .arr = &__param_arr_##name, \ - perm, -1); \ + perm, -1, 0); \ __MODULE_PARM_TYPE(name, "array of " #type) extern struct kernel_param_ops param_array_ops; diff --git a/kernel/params.c b/kernel/params.c index 8a484fc8bde8..ad8d04563c3a 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -233,6 +233,7 @@ char *parse_args(const char *doing, #define STANDARD_PARAM_DEF(name, type, format, strtolfn) \ int param_set_##name(const char *val, const struct kernel_param *kp) \ { \ + param_check_unsafe(kp); \ return strtolfn(val, 0, (type *)kp->arg); \ } \ int param_get_##name(char *buffer, const struct kernel_param *kp) \ @@ -265,6 +266,8 @@ int param_set_charp(const char *val, const struct kernel_param *kp) return -ENOSPC; } + param_check_unsafe(kp); + maybe_kfree_parameter(*(char **)kp->arg); /* This is a hack. We can't kmalloc in early boot, and we @@ -302,6 +305,8 @@ EXPORT_SYMBOL(param_ops_charp); /* Actually could be a bool or an int, for historical reasons. */ int param_set_bool(const char *val, const struct kernel_param *kp) { + param_check_unsafe(kp); + /* No equals means "set"... */ if (!val) val = "1"; @@ -331,6 +336,8 @@ int param_set_invbool(const char *val, const struct kernel_param *kp) bool boolval; struct kernel_param dummy; + param_check_unsafe(kp); + dummy.arg = &boolval; ret = param_set_bool(val, &dummy); if (ret == 0) @@ -357,6 +364,8 @@ int param_set_bint(const char *val, const struct kernel_param *kp) bool v; int ret; + param_check_unsafe(kp); + /* Match bool exactly, by re-using it. */ boolkp = *kp; boolkp.arg = &v; @@ -476,6 +485,8 @@ int param_set_copystring(const char *val, const struct kernel_param *kp) { const struct kparam_string *kps = kp->str; + param_check_unsafe(kp); + if (strlen(val)+1 > kps->maxlen) { pr_err("%s: string doesn't fit in %u chars.\n", kp->name, kps->maxlen-1); -- cgit v1.2.3 From 3baee201b06cfaff84c2c5ddc551b192bb3eaed3 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 27 Aug 2014 06:23:23 +0930 Subject: module: add module_param_unsafe and module_param_named_unsafe Add the helpers to be used by modules wishing to expose unsafe debugging or testing module parameters that taint the kernel when set. Cc: Rusty Russell Cc: Jean Delvare Cc: Andrew Morton Cc: Li Zhong Cc: Jon Mason Cc: Daniel Vetter Signed-off-by: Jani Nikula Signed-off-by: Rusty Russell --- include/linux/moduleparam.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 1e3ffb839daa..9531f9f9729e 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -122,6 +122,12 @@ struct kparam_array #define module_param(name, type, perm) \ module_param_named(name, name, type, perm) +/** + * module_param_unsafe - same as module_param but taints kernel + */ +#define module_param_unsafe(name, type, perm) \ + module_param_named_unsafe(name, name, type, perm) + /** * module_param_named - typesafe helper for a renamed module/cmdline parameter * @name: a valid C identifier which is the parameter name. @@ -138,6 +144,14 @@ struct kparam_array module_param_cb(name, ¶m_ops_##type, &value, perm); \ __MODULE_PARM_TYPE(name, #type) +/** + * module_param_named_unsafe - same as module_param_named but taints kernel + */ +#define module_param_named_unsafe(name, value, type, perm) \ + param_check_##type(name, &(value)); \ + module_param_cb_unsafe(name, ¶m_ops_##type, &value, perm); \ + __MODULE_PARM_TYPE(name, #type) + /** * module_param_cb - general callback for a module/cmdline parameter * @name: a valid C identifier which is the parameter name. @@ -149,6 +163,10 @@ struct kparam_array #define module_param_cb(name, ops, arg, perm) \ __module_param_call(MODULE_PARAM_PREFIX, name, ops, arg, perm, -1, 0) +#define module_param_cb_unsafe(name, ops, arg, perm) \ + __module_param_call(MODULE_PARAM_PREFIX, name, ops, arg, perm, -1, \ + KERNEL_PARAM_FL_UNSAFE) + /** * _param_cb - general callback for a module/cmdline parameter * to be evaluated before certain initcall level -- cgit v1.2.3 From 7a486d3781295b5298cbf9556928a76d26896863 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 27 Aug 2014 06:25:23 +0930 Subject: param: check for tainting before calling set op. This means every set op doesn't need to call it, and it can move into params.c. Signed-off-by: Rusty Russell --- include/linux/moduleparam.h | 16 ---------------- kernel/params.c | 22 +++++++++++----------- 2 files changed, 11 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 9531f9f9729e..593501996574 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -374,22 +374,6 @@ static inline void destroy_params(const struct kernel_param *params, #define __param_check(name, p, type) \ static inline type __always_unused *__check_##name(void) { return(p); } -/** - * param_check_unsafe - Warn and taint the kernel if setting dangerous options. - * - * This gets called from all the standard param setters, but can be used from - * custom setters as well. - */ -static inline void -param_check_unsafe(const struct kernel_param *kp) -{ - if (kp->flags & KERNEL_PARAM_FL_UNSAFE) { - pr_warn("Setting dangerous option %s - tainting kernel\n", - kp->name); - add_taint(TAINT_USER, LOCKDEP_STILL_OK); - } -} - extern struct kernel_param_ops param_ops_byte; extern int param_set_byte(const char *val, const struct kernel_param *kp); extern int param_get_byte(char *buffer, const struct kernel_param *kp); diff --git a/kernel/params.c b/kernel/params.c index ad8d04563c3a..041b5899d5e2 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -83,6 +83,15 @@ bool parameq(const char *a, const char *b) return parameqn(a, b, strlen(a)+1); } +static void param_check_unsafe(const struct kernel_param *kp) +{ + if (kp->flags & KERNEL_PARAM_FL_UNSAFE) { + pr_warn("Setting dangerous option %s - tainting kernel\n", + kp->name); + add_taint(TAINT_USER, LOCKDEP_STILL_OK); + } +} + static int parse_one(char *param, char *val, const char *doing, @@ -109,6 +118,7 @@ static int parse_one(char *param, pr_debug("handling %s with %p\n", param, params[i].ops->set); mutex_lock(¶m_lock); + param_check_unsafe(¶ms[i]); err = params[i].ops->set(val, ¶ms[i]); mutex_unlock(¶m_lock); return err; @@ -233,7 +243,6 @@ char *parse_args(const char *doing, #define STANDARD_PARAM_DEF(name, type, format, strtolfn) \ int param_set_##name(const char *val, const struct kernel_param *kp) \ { \ - param_check_unsafe(kp); \ return strtolfn(val, 0, (type *)kp->arg); \ } \ int param_get_##name(char *buffer, const struct kernel_param *kp) \ @@ -266,8 +275,6 @@ int param_set_charp(const char *val, const struct kernel_param *kp) return -ENOSPC; } - param_check_unsafe(kp); - maybe_kfree_parameter(*(char **)kp->arg); /* This is a hack. We can't kmalloc in early boot, and we @@ -305,8 +312,6 @@ EXPORT_SYMBOL(param_ops_charp); /* Actually could be a bool or an int, for historical reasons. */ int param_set_bool(const char *val, const struct kernel_param *kp) { - param_check_unsafe(kp); - /* No equals means "set"... */ if (!val) val = "1"; @@ -336,8 +341,6 @@ int param_set_invbool(const char *val, const struct kernel_param *kp) bool boolval; struct kernel_param dummy; - param_check_unsafe(kp); - dummy.arg = &boolval; ret = param_set_bool(val, &dummy); if (ret == 0) @@ -364,8 +367,6 @@ int param_set_bint(const char *val, const struct kernel_param *kp) bool v; int ret; - param_check_unsafe(kp); - /* Match bool exactly, by re-using it. */ boolkp = *kp; boolkp.arg = &v; @@ -485,8 +486,6 @@ int param_set_copystring(const char *val, const struct kernel_param *kp) { const struct kparam_string *kps = kp->str; - param_check_unsafe(kp); - if (strlen(val)+1 > kps->maxlen) { pr_err("%s: string doesn't fit in %u chars.\n", kp->name, kps->maxlen-1); @@ -563,6 +562,7 @@ static ssize_t param_attr_store(struct module_attribute *mattr, return -EPERM; mutex_lock(¶m_lock); + param_check_unsafe(attribute->param); err = attribute->param->ops->set(buf, attribute->param); mutex_unlock(¶m_lock); if (!err) -- cgit v1.2.3 From 64d831269ccbca1fc6d739a0f3c8aa24afb43a5e Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 19 Aug 2014 12:15:00 +0200 Subject: KVM: Introduce gfn_to_hva_memslot_prot To support read-only memory regions on arm and arm64, we have a need to resolve a gfn to an hva given a pointer to a memslot to avoid looping through the memslots twice and to reuse the hva error checking of gfn_to_hva_prot(), add a new gfn_to_hva_memslot_prot() function and refactor gfn_to_hva_prot() to use this function. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall --- include/linux/kvm_host.h | 2 ++ virt/kvm/kvm_main.c | 11 +++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ebd723676633..6d8a658ec174 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -528,6 +528,8 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn); unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable); unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn); +unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot, gfn_t gfn, + bool *writable); void kvm_release_page_clean(struct page *page); void kvm_release_page_dirty(struct page *page); void kvm_set_page_accessed(struct page *page); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 5a0817ee996e..76c92a7249c4 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1076,9 +1076,9 @@ EXPORT_SYMBOL_GPL(gfn_to_hva); * If writable is set to false, the hva returned by this function is only * allowed to be read. */ -unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable) +unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot, + gfn_t gfn, bool *writable) { - struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); unsigned long hva = __gfn_to_hva_many(slot, gfn, NULL, false); if (!kvm_is_error_hva(hva) && writable) @@ -1087,6 +1087,13 @@ unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable) return hva; } +unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable) +{ + struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); + + return gfn_to_hva_memslot_prot(slot, gfn, writable); +} + static int kvm_read_hva(void *data, void __user *hva, int len) { return __copy_from_user(data, hva, len); -- cgit v1.2.3 From 5c21403d74af2c9cd635a34c2f9199681a5b813e Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 27 Aug 2014 14:39:04 -0700 Subject: net: Update sk_buff flag bit availability comment. We lost one when xmit_more was added. Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 9b3802a197a8..b69b7b512c06 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -580,7 +580,7 @@ struct sk_buff { __u8 encap_hdr_csum:1; __u8 csum_valid:1; __u8 csum_complete_sw:1; - /* 2/4 bit hole (depending on ndisc_nodetype presence) */ + /* 1/3 bit hole (depending on ndisc_nodetype presence) */ kmemcheck_bitfield_end(flags2); #if defined CONFIG_NET_DMA || defined CONFIG_NET_RX_BUSY_POLL -- cgit v1.2.3 From 3e8a72d1dae374cf6fc1dba97cec663585845ff9 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 27 Aug 2014 17:04:46 -0700 Subject: net: dsa: reduce number of protocol hooks DSA is currently registering one packet_type function per EtherType it needs to intercept in the receive path of a DSA-enabled Ethernet device. Right now we have three of them: trailer, DSA and eDSA, and there might be more in the future, this will not scale to the addition of new protocols. This patch proceeds with adding a new layer of abstraction and two new functions: dsa_switch_rcv() which will dispatch into the tag-protocol specific receive function implemented by net/dsa/tag_*.c dsa_slave_xmit() which will dispatch into the tag-protocol specific transmit function implemented by net/dsa/tag_*.c When we do create the per-port slave network devices, we iterate over the switch protocol to assign the DSA-specific receive and transmit operations. A new fake ethertype value is used: ETH_P_XDSA to illustrate the fact that this is no longer going to look like ETH_P_DSA or ETH_P_TRAILER like it used to be. This allows us to greatly simplify the check in eth_type_trans() and always override the skb->protocol with ETH_P_XDSA for Ethernet switches tagged protocol, while also reducing the number repetitive slave netdevice_ops assignments. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/netdevice.h | 28 ++++++++++++--------------- include/net/dsa.h | 20 +++---------------- include/uapi/linux/if_ether.h | 1 + net/dsa/dsa.c | 39 ++++++++++++++++++++----------------- net/dsa/dsa_priv.h | 9 +++------ net/dsa/slave.c | 45 ++++++++++++++----------------------------- net/dsa/tag_dsa.c | 8 ++++---- net/dsa/tag_edsa.c | 8 ++++---- net/dsa/tag_trailer.c | 8 ++++---- net/ethernet/eth.c | 7 ++----- 10 files changed, 68 insertions(+), 105 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 039b23786c22..1875dc71422a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1781,24 +1781,13 @@ void dev_net_set(struct net_device *dev, struct net *net) #endif } -static inline bool netdev_uses_dsa_tags(struct net_device *dev) +static inline bool netdev_uses_dsa(struct net_device *dev) { -#ifdef CONFIG_NET_DSA_TAG_DSA - if (dev->dsa_ptr != NULL) - return dsa_uses_dsa_tags(dev->dsa_ptr); -#endif - - return 0; -} - -static inline bool netdev_uses_trailer_tags(struct net_device *dev) -{ -#ifdef CONFIG_NET_DSA_TAG_TRAILER - if (dev->dsa_ptr != NULL) - return dsa_uses_trailer_tags(dev->dsa_ptr); +#ifdef CONFIG_NET_DSA + return dev->dsa_ptr != NULL; +#else + return false; #endif - - return 0; } /** @@ -1933,6 +1922,13 @@ struct udp_offload { struct offload_callbacks callbacks; }; +struct dsa_device_ops { + netdev_tx_t (*xmit)(struct sk_buff *skb, struct net_device *dev); + int (*rcv)(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev); +}; + + /* often modified stats are per cpu, other are shared (netdev->stats) */ struct pcpu_sw_netstats { u64 rx_packets; diff --git a/include/net/dsa.h b/include/net/dsa.h index 6efce384451e..6e26f1e4d8ce 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -59,6 +59,8 @@ struct dsa_platform_data { struct dsa_chip_data *chip; }; +struct dsa_device_ops; + struct dsa_switch_tree { /* * Configuration data for the platform device that owns @@ -71,6 +73,7 @@ struct dsa_switch_tree { * protocol to use. */ struct net_device *master_netdev; + const struct dsa_device_ops *ops; __be16 tag_protocol; /* @@ -186,21 +189,4 @@ static inline void *ds_to_priv(struct dsa_switch *ds) return (void *)(ds + 1); } -/* - * The original DSA tag format and some other tag formats have no - * ethertype, which means that we need to add a little hack to the - * networking receive path to make sure that received frames get - * the right ->protocol assigned to them when one of those tag - * formats is in use. - */ -static inline bool dsa_uses_dsa_tags(struct dsa_switch_tree *dst) -{ - return !!(dst->tag_protocol == htons(ETH_P_DSA)); -} - -static inline bool dsa_uses_trailer_tags(struct dsa_switch_tree *dst) -{ - return !!(dst->tag_protocol == htons(ETH_P_TRAILER)); -} - #endif diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index 0f8210b8e0bc..aa63ed023c2b 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -128,6 +128,7 @@ #define ETH_P_PHONET 0x00F5 /* Nokia Phonet frames */ #define ETH_P_IEEE802154 0x00F6 /* IEEE802.15.4 frame */ #define ETH_P_CAIF 0x00F7 /* ST-Ericsson CAIF protocol */ +#define ETH_P_XDSA 0x00F8 /* Multiplexed DSA protocol */ /* * This is an Ethernet frame header. diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 0a49632fac47..92e71d2a2ccd 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -608,6 +608,24 @@ static void dsa_shutdown(struct platform_device *pdev) { } +static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) +{ + struct dsa_switch_tree *dst = dev->dsa_ptr; + + if (unlikely(dst == NULL)) { + kfree_skb(skb); + return 0; + } + + return dst->ops->rcv(skb, dev, pt, orig_dev); +} + +struct packet_type dsa_pack_type __read_mostly = { + .type = cpu_to_be16(ETH_P_XDSA), + .func = dsa_switch_rcv, +}; + static const struct of_device_id dsa_of_match_table[] = { { .compatible = "marvell,dsa", }, {} @@ -633,30 +651,15 @@ static int __init dsa_init_module(void) if (rc) return rc; -#ifdef CONFIG_NET_DSA_TAG_DSA - dev_add_pack(&dsa_packet_type); -#endif -#ifdef CONFIG_NET_DSA_TAG_EDSA - dev_add_pack(&edsa_packet_type); -#endif -#ifdef CONFIG_NET_DSA_TAG_TRAILER - dev_add_pack(&trailer_packet_type); -#endif + dev_add_pack(&dsa_pack_type); + return 0; } module_init(dsa_init_module); static void __exit dsa_cleanup_module(void) { -#ifdef CONFIG_NET_DSA_TAG_TRAILER - dev_remove_pack(&trailer_packet_type); -#endif -#ifdef CONFIG_NET_DSA_TAG_EDSA - dev_remove_pack(&edsa_packet_type); -#endif -#ifdef CONFIG_NET_DSA_TAG_DSA - dev_remove_pack(&dsa_packet_type); -#endif + dev_remove_pack(&dsa_pack_type); platform_driver_unregister(&dsa_driver); } module_exit(dsa_cleanup_module); diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index d4cf5cc747e3..218d75d16f6f 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -45,16 +45,13 @@ struct net_device *dsa_slave_create(struct dsa_switch *ds, int port, char *name); /* tag_dsa.c */ -netdev_tx_t dsa_xmit(struct sk_buff *skb, struct net_device *dev); -extern struct packet_type dsa_packet_type; +extern const struct dsa_device_ops dsa_netdev_ops; /* tag_edsa.c */ -netdev_tx_t edsa_xmit(struct sk_buff *skb, struct net_device *dev); -extern struct packet_type edsa_packet_type; +extern const struct dsa_device_ops edsa_netdev_ops; /* tag_trailer.c */ -netdev_tx_t trailer_xmit(struct sk_buff *skb, struct net_device *dev); -extern struct packet_type trailer_packet_type; +extern const struct dsa_device_ops trailer_netdev_ops; #endif diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 45a1e34c89e0..ad1a913533aa 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -171,6 +171,14 @@ static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) return -EOPNOTSUPP; } +static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch_tree *dst = p->parent->dst; + + return dst->ops->xmit(skb, dev); +} + /* ethtool operations *******************************************************/ static int @@ -293,42 +301,16 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = { .get_sset_count = dsa_slave_get_sset_count, }; -#ifdef CONFIG_NET_DSA_TAG_DSA -static const struct net_device_ops dsa_netdev_ops = { +static const struct net_device_ops dsa_slave_netdev_ops = { .ndo_init = dsa_slave_init, .ndo_open = dsa_slave_open, .ndo_stop = dsa_slave_close, - .ndo_start_xmit = dsa_xmit, + .ndo_start_xmit = dsa_slave_xmit, .ndo_change_rx_flags = dsa_slave_change_rx_flags, .ndo_set_rx_mode = dsa_slave_set_rx_mode, .ndo_set_mac_address = dsa_slave_set_mac_address, .ndo_do_ioctl = dsa_slave_ioctl, }; -#endif -#ifdef CONFIG_NET_DSA_TAG_EDSA -static const struct net_device_ops edsa_netdev_ops = { - .ndo_init = dsa_slave_init, - .ndo_open = dsa_slave_open, - .ndo_stop = dsa_slave_close, - .ndo_start_xmit = edsa_xmit, - .ndo_change_rx_flags = dsa_slave_change_rx_flags, - .ndo_set_rx_mode = dsa_slave_set_rx_mode, - .ndo_set_mac_address = dsa_slave_set_mac_address, - .ndo_do_ioctl = dsa_slave_ioctl, -}; -#endif -#ifdef CONFIG_NET_DSA_TAG_TRAILER -static const struct net_device_ops trailer_netdev_ops = { - .ndo_init = dsa_slave_init, - .ndo_open = dsa_slave_open, - .ndo_stop = dsa_slave_close, - .ndo_start_xmit = trailer_xmit, - .ndo_change_rx_flags = dsa_slave_change_rx_flags, - .ndo_set_rx_mode = dsa_slave_set_rx_mode, - .ndo_set_mac_address = dsa_slave_set_mac_address, - .ndo_do_ioctl = dsa_slave_ioctl, -}; -#endif /* slave device setup *******************************************************/ struct net_device * @@ -349,21 +331,22 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent, slave_dev->ethtool_ops = &dsa_slave_ethtool_ops; eth_hw_addr_inherit(slave_dev, master); slave_dev->tx_queue_len = 0; + slave_dev->netdev_ops = &dsa_slave_netdev_ops; switch (ds->dst->tag_protocol) { #ifdef CONFIG_NET_DSA_TAG_DSA case htons(ETH_P_DSA): - slave_dev->netdev_ops = &dsa_netdev_ops; + ds->dst->ops = &dsa_netdev_ops; break; #endif #ifdef CONFIG_NET_DSA_TAG_EDSA case htons(ETH_P_EDSA): - slave_dev->netdev_ops = &edsa_netdev_ops; + ds->dst->ops = &edsa_netdev_ops; break; #endif #ifdef CONFIG_NET_DSA_TAG_TRAILER case htons(ETH_P_TRAILER): - slave_dev->netdev_ops = &trailer_netdev_ops; + ds->dst->ops = &trailer_netdev_ops; break; #endif default: diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c index cacce1e22f9c..d7dbc5bda5c0 100644 --- a/net/dsa/tag_dsa.c +++ b/net/dsa/tag_dsa.c @@ -16,7 +16,7 @@ #define DSA_HLEN 4 -netdev_tx_t dsa_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t dsa_xmit(struct sk_buff *skb, struct net_device *dev) { struct dsa_slave_priv *p = netdev_priv(dev); u8 *dsa_header; @@ -186,7 +186,7 @@ out: return 0; } -struct packet_type dsa_packet_type __read_mostly = { - .type = cpu_to_be16(ETH_P_DSA), - .func = dsa_rcv, +const struct dsa_device_ops dsa_netdev_ops = { + .xmit = dsa_xmit, + .rcv = dsa_rcv, }; diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c index e70c43c25e64..6b30abe89183 100644 --- a/net/dsa/tag_edsa.c +++ b/net/dsa/tag_edsa.c @@ -17,7 +17,7 @@ #define DSA_HLEN 4 #define EDSA_HLEN 8 -netdev_tx_t edsa_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t edsa_xmit(struct sk_buff *skb, struct net_device *dev) { struct dsa_slave_priv *p = netdev_priv(dev); u8 *edsa_header; @@ -205,7 +205,7 @@ out: return 0; } -struct packet_type edsa_packet_type __read_mostly = { - .type = cpu_to_be16(ETH_P_EDSA), - .func = edsa_rcv, +const struct dsa_device_ops edsa_netdev_ops = { + .xmit = edsa_xmit, + .rcv = edsa_rcv, }; diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c index 94bc260d015d..5fe9444842c5 100644 --- a/net/dsa/tag_trailer.c +++ b/net/dsa/tag_trailer.c @@ -14,7 +14,7 @@ #include #include "dsa_priv.h" -netdev_tx_t trailer_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t trailer_xmit(struct sk_buff *skb, struct net_device *dev) { struct dsa_slave_priv *p = netdev_priv(dev); struct sk_buff *nskb; @@ -114,7 +114,7 @@ out: return 0; } -struct packet_type trailer_packet_type __read_mostly = { - .type = cpu_to_be16(ETH_P_TRAILER), - .func = trailer_rcv, +const struct dsa_device_ops trailer_netdev_ops = { + .xmit = trailer_xmit, + .rcv = trailer_rcv, }; diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index f405e0592407..5cebca134585 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -181,11 +181,8 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) * variants has been configured on the receiving interface, * and if so, set skb->protocol without looking at the packet. */ - if (unlikely(netdev_uses_dsa_tags(dev))) - return htons(ETH_P_DSA); - - if (unlikely(netdev_uses_trailer_tags(dev))) - return htons(ETH_P_TRAILER); + if (unlikely(netdev_uses_dsa(dev))) + return htons(ETH_P_XDSA); if (likely(ntohs(eth->h_proto) >= ETH_P_802_3_MIN)) return eth->h_proto; -- cgit v1.2.3 From 464c3668f065baeacfffa9d421959d21069389fe Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 27 Aug 2014 17:04:48 -0700 Subject: net: phy: provide stub for fixed_phy_set_link_update In preparation for updating the DSA code and avoid using ifdefs there, provide an empty stub for fixed_phy_set_link_update when CONFIG_FIXED_PHY is not selected. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phy_fixed.h | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/phy_fixed.h b/include/linux/phy_fixed.h index ae612acebb53..941138664c1d 100644 --- a/include/linux/phy_fixed.h +++ b/include/linux/phy_fixed.h @@ -18,6 +18,9 @@ extern int fixed_phy_register(unsigned int irq, struct fixed_phy_status *status, struct device_node *np); extern void fixed_phy_del(int phy_addr); +extern int fixed_phy_set_link_update(struct phy_device *phydev, + int (*link_update)(struct net_device *, + struct fixed_phy_status *)); #else static inline int fixed_phy_add(unsigned int irq, int phy_id, struct fixed_phy_status *status) @@ -34,14 +37,12 @@ static inline int fixed_phy_del(int phy_addr) { return -ENODEV; } -#endif /* CONFIG_FIXED_PHY */ - -/* - * This function issued only by fixed_phy-aware drivers, no need - * protect it with #ifdef - */ -extern int fixed_phy_set_link_update(struct phy_device *phydev, +static inline int fixed_phy_set_link_update(struct phy_device *phydev, int (*link_update)(struct net_device *, - struct fixed_phy_status *)); + struct fixed_phy_status *)) +{ + return -ENODEV; +} +#endif /* CONFIG_FIXED_PHY */ #endif /* __PHY_FIXED_H */ -- cgit v1.2.3 From 5aed85cec29882d1c4b4b2a01cb75a99efdbe4ed Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 27 Aug 2014 17:04:52 -0700 Subject: net: dsa: allow switches to work without tagging In case switch port tagging is disabled (voluntarily, or the switch just does not support it), allow us to continue using the defined set of dsa_device_ops in net/dsa/slave.c. We introduce dsa_protocol_is_tagged() to check whether we need to override skb->protocol and go through the DSA-specifif packet_type function, or if we just go on and receive the SKB through the normal path. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/netdevice.h | 6 +++--- include/net/dsa.h | 5 +++++ net/dsa/slave.c | 19 ++++++++++++++++++- 3 files changed, 26 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1875dc71422a..429801370d0c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1784,10 +1784,10 @@ void dev_net_set(struct net_device *dev, struct net *net) static inline bool netdev_uses_dsa(struct net_device *dev) { #ifdef CONFIG_NET_DSA - return dev->dsa_ptr != NULL; -#else - return false; + if (dev->dsa_ptr != NULL) + return dsa_uses_tagged_protocol(dev->dsa_ptr); #endif + return false; } /** diff --git a/include/net/dsa.h b/include/net/dsa.h index dc357454ae3b..1035f6452d79 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -198,4 +198,9 @@ static inline void *ds_to_priv(struct dsa_switch *ds) return (void *)(ds + 1); } +static inline bool dsa_uses_tagged_protocol(struct dsa_switch_tree *dst) +{ + return dst->tag_protocol != 0; +} + #endif diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 03d2894a0f8a..241c2a1684cb 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -181,6 +181,17 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev) return dst->ops->xmit(skb, dev); } +static netdev_tx_t dsa_slave_notag_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + + skb->dev = p->parent->dst->master_netdev; + dev_queue_xmit(skb); + + return NETDEV_TX_OK; +} + /* ethtool operations *******************************************************/ static int @@ -314,6 +325,11 @@ static const struct net_device_ops dsa_slave_netdev_ops = { .ndo_do_ioctl = dsa_slave_ioctl, }; +static const struct dsa_device_ops notag_netdev_ops = { + .xmit = dsa_slave_notag_xmit, + .rcv = NULL, +}; + static void dsa_slave_adjust_link(struct net_device *dev) { struct dsa_slave_priv *p = netdev_priv(dev); @@ -415,7 +431,8 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent, break; #endif default: - BUG(); + ds->dst->ops = ¬ag_netdev_ops; + break; } SET_NETDEV_DEV(slave_dev, parent); -- cgit v1.2.3 From 97fdaab4699de3a2a91001efef60bb0622de1c53 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 26 Aug 2014 13:15:25 -0700 Subject: net: phy: broadcom: fix PHY_BCM_OUI_4 PHY_BCM_OUI_4 is missing two significant digits that actually make it an OUI, add those missing bits so it becomes usable again for matching. Fixes: b560a58c45c6 ("net: phy: add Broadcom BCM7xxx internal PHY driver") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/brcmphy.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index ee1431d976fa..921f17ca4c26 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -21,7 +21,7 @@ #define PHY_BCM_OUI_1 0x00206000 #define PHY_BCM_OUI_2 0x0143bc00 #define PHY_BCM_OUI_3 0x03625c00 -#define PHY_BCM_OUI_4 0x600d0000 +#define PHY_BCM_OUI_4 0x600d8400 #define PHY_BCM_OUI_5 0x03625e00 -- cgit v1.2.3 From 11bf2bbd596add62a86a74fc7aedc0b86c6ec154 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 26 Aug 2014 13:15:26 -0700 Subject: net: phy: broadcom: add new Broadcom OUI Broadcom started to use a new OUI for its 2013 and newer products: D4-01-29 which translates into 0xae025000 for a 32-bits OUI, add its definition. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/brcmphy.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 921f17ca4c26..cbcfad36d4c0 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -23,7 +23,7 @@ #define PHY_BCM_OUI_3 0x03625c00 #define PHY_BCM_OUI_4 0x600d8400 #define PHY_BCM_OUI_5 0x03625e00 - +#define PHY_BCM_OUI_6 0xae025000 #define PHY_BCM_FLAGS_MODE_COPPER 0x00000001 #define PHY_BCM_FLAGS_MODE_1000BX 0x00000002 -- cgit v1.2.3 From 430ad68ffb5fa632a277162e5995cd6f7a39fb78 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 26 Aug 2014 13:15:27 -0700 Subject: net: phy: bcm7xxx: add BCM7250 and BCM7364 PHY entries Add two new entries to the Broadcom BCM7xxx internal PHY driver for BCM7250 and BCM7364 chips. Those chips share the usual 28nm process Gigabit PHY sequence and require the same workarounds so far. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/bcm7xxx.c | 4 ++++ include/linux/brcmphy.h | 2 ++ 2 files changed, 6 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c index 948c7086679a..09dd6e1dc6e1 100644 --- a/drivers/net/phy/bcm7xxx.c +++ b/drivers/net/phy/bcm7xxx.c @@ -335,6 +335,8 @@ static int bcm7xxx_dummy_config_init(struct phy_device *phydev) } static struct phy_driver bcm7xxx_driver[] = { + BCM7XXX_28NM_GPHY(PHY_ID_BCM7250, "Broadcom BCM7250"), + BCM7XXX_28NM_GPHY(PHY_ID_BCM7364, "Broadcom BCM7364"), BCM7XXX_28NM_GPHY(PHY_ID_BCM7366, "Broadcom BCM7366"), BCM7XXX_28NM_GPHY(PHY_ID_BCM7439, "Broadcom BCM7439"), BCM7XXX_28NM_GPHY(PHY_ID_BCM7445, "Broadcom BCM7445"), @@ -367,6 +369,8 @@ static struct phy_driver bcm7xxx_driver[] = { } }; static struct mdio_device_id __maybe_unused bcm7xxx_tbl[] = { + { PHY_ID_BCM7250, 0xfffffff0, }, + { PHY_ID_BCM7364, 0xfffffff0, }, { PHY_ID_BCM7366, 0xfffffff0, }, { PHY_ID_BCM7439, 0xfffffff0, }, { PHY_ID_BCM7445, 0xfffffff0, }, diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index cbcfad36d4c0..5bd35cc0d471 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -13,6 +13,8 @@ #define PHY_ID_BCM5461 0x002060c0 #define PHY_ID_BCM57780 0x03625d90 +#define PHY_ID_BCM7250 0xae025280 +#define PHY_ID_BCM7364 0xae025260 #define PHY_ID_BCM7366 0x600d8490 #define PHY_ID_BCM7439 0x600d8480 #define PHY_ID_BCM7445 0x600d8510 -- cgit v1.2.3 From 1f58d9465c568eb47cab939bbc4f30ae51863295 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 8 Aug 2014 13:06:30 +0200 Subject: dma-buf/fence: Fix one more kerneldoc warning The seqno_fence_init() function's cond argument isn't described in the kerneldoc comment. Fix that to silence a warning when building DocBook documentation. Signed-off-by: Thierry Reding Signed-off-by: Sumit Semwal --- include/linux/seqno-fence.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/seqno-fence.h b/include/linux/seqno-fence.h index 3d6003de4b0d..a1ba6a5ccdd6 100644 --- a/include/linux/seqno-fence.h +++ b/include/linux/seqno-fence.h @@ -62,6 +62,7 @@ to_seqno_fence(struct fence *fence) * @context: the execution context this fence is a part of * @seqno_ofs: the offset within @sync_buf * @seqno: the sequence # to signal on + * @cond: fence wait condition * @ops: the fence_ops for operations on this seqno fence * * This function initializes a struct seqno_fence with passed parameters, -- cgit v1.2.3 From a8dbfeedfe47a19a4712749eb2444b1d7ea1150e Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 27 Aug 2014 14:31:24 -0700 Subject: regulator: fix kernel-doc warnings in header files Fix kernel-doc warnings in regulator header files: Warning(..//include/linux/regulator/machine.h:140): No description found for parameter 'ramp_disable' Warning(..//include/linux/regulator/driver.h:279): No description found for parameter 'linear_ranges' Warning(..//include/linux/regulator/driver.h:279): No description found for parameter 'n_linear_ranges' Signed-off-by: Randy Dunlap Signed-off-by: Mark Brown --- include/linux/regulator/driver.h | 2 ++ include/linux/regulator/machine.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index bbe03a1924c0..4efa1ed8a2b0 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -218,6 +218,8 @@ enum regulator_type { * @linear_min_sel: Minimal selector for starting linear mapping * @fixed_uV: Fixed voltage of rails. * @ramp_delay: Time to settle down after voltage change (unit: uV/us) + * @linear_ranges: A constant table of possible voltage ranges. + * @n_linear_ranges: Number of entries in the @linear_ranges table. * @volt_table: Voltage mapping table (if table based mapping) * * @vsel_reg: Register for selector when using regulator_regmap_X_voltage_ diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h index 730e638c5589..0b08d05d470b 100644 --- a/include/linux/regulator/machine.h +++ b/include/linux/regulator/machine.h @@ -85,6 +85,7 @@ struct regulator_state { * bootloader then it will be enabled when the constraints are * applied. * @apply_uV: Apply the voltage constraint when initialising. + * @ramp_disable: Disable ramp delay when initialising or when setting voltage. * * @input_uV: Input voltage for regulator when supplied by another regulator. * -- cgit v1.2.3 From 4ba2968420fa9d0604b6a6a5c61bfa8d0fa84ae0 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 26 Aug 2014 19:12:21 -0500 Subject: percpu: Resolve ambiguities in __get_cpu_var/cpumask_var_t __get_cpu_var can paper over differences in the definitions of cpumask_var_t and either use the address of the cpumask variable directly or perform a fetch of the address of the struct cpumask allocated elsewhere. This is important particularly when using per cpu cpumask_var_t declarations because in one case we have an offset into a per cpu area to handle and in the other case we need to fetch a pointer from the offset. This patch introduces a new macro this_cpu_cpumask_var_ptr() that is defined where cpumask_var_t is defined and performs the proper actions. All use cases where __get_cpu_var is used with cpumask_var_t are converted to the use of this_cpu_cpumask_var_ptr(). Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- arch/x86/include/asm/perf_event_p4.h | 2 +- arch/x86/kernel/apic/x2apic_cluster.c | 3 +-- arch/x86/oprofile/op_model_p4.c | 2 +- include/linux/cpumask.h | 11 +++++++++++ kernel/sched/deadline.c | 2 +- kernel/sched/fair.c | 2 +- kernel/sched/rt.c | 2 +- 7 files changed, 17 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h index 85e13ccf15c4..d725382c2ae0 100644 --- a/arch/x86/include/asm/perf_event_p4.h +++ b/arch/x86/include/asm/perf_event_p4.h @@ -189,7 +189,7 @@ static inline int p4_ht_thread(int cpu) { #ifdef CONFIG_SMP if (smp_num_siblings == 2) - return cpu != cpumask_first(__get_cpu_var(cpu_sibling_map)); + return cpu != cpumask_first(this_cpu_cpumask_var_ptr(cpu_sibling_map)); #endif return 0; } diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 6ce600f9bc78..1f5d5f2ffae6 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -42,8 +42,7 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest) * We are to modify mask, so we need an own copy * and be sure it's manipulated with irq off. */ - ipi_mask_ptr = __raw_get_cpu_var(ipi_mask); - cpumask_copy(ipi_mask_ptr, mask); + ipi_mask_ptr = this_cpu_cpumask_var_ptr(ipi_mask); /* * The idea is to send one IPI per cluster. diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c index 98ab13058f89..ad1d91f475ab 100644 --- a/arch/x86/oprofile/op_model_p4.c +++ b/arch/x86/oprofile/op_model_p4.c @@ -372,7 +372,7 @@ static unsigned int get_stagger(void) { #ifdef CONFIG_SMP int cpu = smp_processor_id(); - return cpu != cpumask_first(__get_cpu_var(cpu_sibling_map)); + return cpu != cpumask_first(this_cpu_cpumask_var_ptr(cpu_sibling_map)); #endif return 0; } diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 2997af6d2ccd..0a9a6da21e74 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -666,10 +666,19 @@ static inline size_t cpumask_size(void) * * This code makes NR_CPUS length memcopy and brings to a memory corruption. * cpumask_copy() provide safe copy functionality. + * + * Note that there is another evil here: If you define a cpumask_var_t + * as a percpu variable then the way to obtain the address of the cpumask + * structure differently influences what this_cpu_* operation needs to be + * used. Please use this_cpu_cpumask_var_t in those cases. The direct use + * of this_cpu_ptr() or this_cpu_read() will lead to failures when the + * other type of cpumask_var_t implementation is configured. */ #ifdef CONFIG_CPUMASK_OFFSTACK typedef struct cpumask *cpumask_var_t; +#define this_cpu_cpumask_var_ptr(x) this_cpu_read(x) + bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node); bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags); bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node); @@ -681,6 +690,8 @@ void free_bootmem_cpumask_var(cpumask_var_t mask); #else typedef struct cpumask cpumask_var_t[1]; +#define this_cpu_cpumask_var_ptr(x) this_cpu_ptr(x) + static inline bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) { return true; diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 255ce138b652..4a608cfaecbd 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1158,7 +1158,7 @@ static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl); static int find_later_rq(struct task_struct *task) { struct sched_domain *sd; - struct cpumask *later_mask = __get_cpu_var(local_cpu_mask_dl); + struct cpumask *later_mask = this_cpu_cpumask_var_ptr(local_cpu_mask_dl); int this_cpu = smp_processor_id(); int best_cpu, cpu = task_cpu(task); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index bfa3c86d0d68..197d659c144c 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6539,7 +6539,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, struct sched_group *group; struct rq *busiest; unsigned long flags; - struct cpumask *cpus = __get_cpu_var(load_balance_mask); + struct cpumask *cpus = this_cpu_cpumask_var_ptr(load_balance_mask); struct lb_env env = { .sd = sd, diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 5f6edca4fafd..a4c50fce9b90 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1526,7 +1526,7 @@ static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask); static int find_lowest_rq(struct task_struct *task) { struct sched_domain *sd; - struct cpumask *lowest_mask = __get_cpu_var(local_cpu_mask); + struct cpumask *lowest_mask = this_cpu_cpumask_var_ptr(local_cpu_mask); int this_cpu = smp_processor_id(); int cpu = task_cpu(task); -- cgit v1.2.3 From d37aba521379203b740a2929e6e6f6bd2485f5d7 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 28 Aug 2014 13:54:18 +0200 Subject: ARM: tegra: remove unused tegra_emc.h The header file include/linux/platform_data/tegra_emc.h does not seem to be used anywhere. It was orphaned by a7cbe92c "ARM: tegra: remove tegra EMC scaling driver". Remove it. Signed-off-by: Rasmus Villemoes Signed-off-by: Stephen Warren --- include/linux/platform_data/tegra_emc.h | 34 --------------------------------- 1 file changed, 34 deletions(-) delete mode 100644 include/linux/platform_data/tegra_emc.h (limited to 'include/linux') diff --git a/include/linux/platform_data/tegra_emc.h b/include/linux/platform_data/tegra_emc.h deleted file mode 100644 index df67505e98f8..000000000000 --- a/include/linux/platform_data/tegra_emc.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright (C) 2011 Google, Inc. - * - * Author: - * Colin Cross - * Olof Johansson - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#ifndef __TEGRA_EMC_H_ -#define __TEGRA_EMC_H_ - -#define TEGRA_EMC_NUM_REGS 46 - -struct tegra_emc_table { - unsigned long rate; - u32 regs[TEGRA_EMC_NUM_REGS]; -}; - -struct tegra_emc_pdata { - int num_tables; - struct tegra_emc_table *tables; -}; - -#endif -- cgit v1.2.3 From 2b8941b962a9f24d61c2b3c2e889928e6cf3d82b Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Wed, 27 Aug 2014 11:17:56 -0400 Subject: NFSD: Update some as-yet unused 4.2 error codes Recent NFS v4.2 drafts have removed NFS4ERR_METADATA_NOTSUPP and reassigned the error code to NFS4ERR_UNION_NOTSUPP. I also add in the NFS4ERR_OFFLOAD_NO_REQS error code. We're not using any of these yet, so there's no harm done. Signed-off-by: Anna Schumaker Signed-off-by: J. Bruce Fields --- fs/nfsd/nfsd.h | 2 +- include/linux/nfs4.h | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 847daf37e566..747f3b95bd11 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -251,7 +251,7 @@ void nfsd_lockd_shutdown(void); #define nfserr_deleg_revoked cpu_to_be32(NFS4ERR_DELEG_REVOKED) #define nfserr_partner_notsupp cpu_to_be32(NFS4ERR_PARTNER_NOTSUPP) #define nfserr_partner_no_auth cpu_to_be32(NFS4ERR_PARTNER_NO_AUTH) -#define nfserr_metadata_notsupp cpu_to_be32(NFS4ERR_METADATA_NOTSUPP) +#define nfserr_union_notsupp cpu_to_be32(NFS4ERR_UNION_NOTSUPP) #define nfserr_offload_denied cpu_to_be32(NFS4ERR_OFFLOAD_DENIED) #define nfserr_wrong_lfs cpu_to_be32(NFS4ERR_WRONG_LFS) #define nfserr_badlabel cpu_to_be32(NFS4ERR_BADLABEL) diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index a1e3064a8d99..79b2a0f5c318 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -235,10 +235,11 @@ enum nfsstat4 { /* nfs42 */ NFS4ERR_PARTNER_NOTSUPP = 10088, NFS4ERR_PARTNER_NO_AUTH = 10089, - NFS4ERR_METADATA_NOTSUPP = 10090, + NFS4ERR_UNION_NOTSUPP = 10090, NFS4ERR_OFFLOAD_DENIED = 10091, NFS4ERR_WRONG_LFS = 10092, NFS4ERR_BADLABEL = 10093, + NFS4ERR_OFFLOAD_NO_REQS = 10094, }; static inline bool seqid_mutating_err(u32 err) -- cgit v1.2.3 From db9ee220361de03ee86388f9ea5e529eaad5323c Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 27 Aug 2014 18:40:07 -0400 Subject: jbd2: fix descriptor block size handling errors with journal_csum It turns out that there are some serious problems with the on-disk format of journal checksum v2. The foremost is that the function to calculate descriptor tag size returns sizes that are too big. This causes alignment issues on some architectures and is compounded by the fact that some parts of jbd2 use the structure size (incorrectly) to determine the presence of a 64bit journal instead of checking the feature flags. Therefore, introduce journal checksum v3, which enlarges the descriptor block tag format to allow for full 32-bit checksums of journal blocks, fix the journal tag function to return the correct sizes, and fix the jbd2 recovery code to use feature flags to determine 64bitness. Add a few function helpers so we don't have to open-code quite so many pieces. Switching to a 16-byte block size was found to increase journal size overhead by a maximum of 0.1%, to convert a 32-bit journal with no checksumming to a 32-bit journal with checksum v3 enabled. Signed-off-by: Darrick J. Wong Reported-by: TR Reardon Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- fs/ext4/super.c | 5 +++-- fs/jbd2/commit.c | 21 +++++++++++--------- fs/jbd2/journal.c | 56 ++++++++++++++++++++++++++++++++++------------------ fs/jbd2/recovery.c | 26 +++++++++++++----------- fs/jbd2/revoke.c | 6 +++--- include/linux/jbd2.h | 30 +++++++++++++++++++++++----- 6 files changed, 95 insertions(+), 49 deletions(-) (limited to 'include/linux') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 32b43ad154b9..0b28b36e7915 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3181,9 +3181,9 @@ static int set_journal_csum_feature_set(struct super_block *sb) if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) { - /* journal checksum v2 */ + /* journal checksum v3 */ compat = 0; - incompat = JBD2_FEATURE_INCOMPAT_CSUM_V2; + incompat = JBD2_FEATURE_INCOMPAT_CSUM_V3; } else { /* journal checksum v1 */ compat = JBD2_FEATURE_COMPAT_CHECKSUM; @@ -3205,6 +3205,7 @@ static int set_journal_csum_feature_set(struct super_block *sb) jbd2_journal_clear_features(sbi->s_journal, JBD2_FEATURE_COMPAT_CHECKSUM, 0, JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT | + JBD2_FEATURE_INCOMPAT_CSUM_V3 | JBD2_FEATURE_INCOMPAT_CSUM_V2); } diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 6fac74349856..b73e0215baa7 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -97,7 +97,7 @@ static void jbd2_commit_block_csum_set(journal_t *j, struct buffer_head *bh) struct commit_header *h; __u32 csum; - if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + if (!jbd2_journal_has_csum_v2or3(j)) return; h = (struct commit_header *)(bh->b_data); @@ -313,11 +313,11 @@ static __u32 jbd2_checksum_data(__u32 crc32_sum, struct buffer_head *bh) return checksum; } -static void write_tag_block(int tag_bytes, journal_block_tag_t *tag, +static void write_tag_block(journal_t *j, journal_block_tag_t *tag, unsigned long long block) { tag->t_blocknr = cpu_to_be32(block & (u32)~0); - if (tag_bytes > JBD2_TAG_SIZE32) + if (JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_64BIT)) tag->t_blocknr_high = cpu_to_be32((block >> 31) >> 1); } @@ -327,7 +327,7 @@ static void jbd2_descr_block_csum_set(journal_t *j, struct jbd2_journal_block_tail *tail; __u32 csum; - if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + if (!jbd2_journal_has_csum_v2or3(j)) return; tail = (struct jbd2_journal_block_tail *)(bh->b_data + j->j_blocksize - @@ -340,12 +340,13 @@ static void jbd2_descr_block_csum_set(journal_t *j, static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag, struct buffer_head *bh, __u32 sequence) { + journal_block_tag3_t *tag3 = (journal_block_tag3_t *)tag; struct page *page = bh->b_page; __u8 *addr; __u32 csum32; __be32 seq; - if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + if (!jbd2_journal_has_csum_v2or3(j)) return; seq = cpu_to_be32(sequence); @@ -355,8 +356,10 @@ static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag, bh->b_size); kunmap_atomic(addr); - /* We only have space to store the lower 16 bits of the crc32c. */ - tag->t_checksum = cpu_to_be16(csum32); + if (JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V3)) + tag3->t_checksum = cpu_to_be32(csum32); + else + tag->t_checksum = cpu_to_be16(csum32); } /* * jbd2_journal_commit_transaction @@ -396,7 +399,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) LIST_HEAD(io_bufs); LIST_HEAD(log_bufs); - if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + if (jbd2_journal_has_csum_v2or3(journal)) csum_size = sizeof(struct jbd2_journal_block_tail); /* @@ -690,7 +693,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) tag_flag |= JBD2_FLAG_SAME_UUID; tag = (journal_block_tag_t *) tagp; - write_tag_block(tag_bytes, tag, jh2bh(jh)->b_blocknr); + write_tag_block(journal, tag, jh2bh(jh)->b_blocknr); tag->t_flags = cpu_to_be16(tag_flag); jbd2_block_tag_csum_set(journal, tag, wbuf[bufs], commit_transaction->t_tid); diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 67b8e303946c..19d74d86d99c 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -124,7 +124,7 @@ EXPORT_SYMBOL(__jbd2_debug); /* Checksumming functions */ static int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb) { - if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + if (!jbd2_journal_has_csum_v2or3(j)) return 1; return sb->s_checksum_type == JBD2_CRC32C_CHKSUM; @@ -145,7 +145,7 @@ static __be32 jbd2_superblock_csum(journal_t *j, journal_superblock_t *sb) static int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb) { - if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + if (!jbd2_journal_has_csum_v2or3(j)) return 1; return sb->s_checksum == jbd2_superblock_csum(j, sb); @@ -153,7 +153,7 @@ static int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb) static void jbd2_superblock_csum_set(journal_t *j, journal_superblock_t *sb) { - if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + if (!jbd2_journal_has_csum_v2or3(j)) return; sb->s_checksum = jbd2_superblock_csum(j, sb); @@ -1522,21 +1522,29 @@ static int journal_get_superblock(journal_t *journal) goto out; } - if (JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM) && - JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) { + if (jbd2_journal_has_csum_v2or3(journal) && + JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM)) { /* Can't have checksum v1 and v2 on at the same time! */ printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2 " "at the same time!\n"); goto out; } + if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2) && + JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V3)) { + /* Can't have checksum v2 and v3 at the same time! */ + printk(KERN_ERR "JBD2: Can't enable checksumming v2 and v3 " + "at the same time!\n"); + goto out; + } + if (!jbd2_verify_csum_type(journal, sb)) { printk(KERN_ERR "JBD2: Unknown checksum type\n"); goto out; } /* Load the checksum driver */ - if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) { + if (jbd2_journal_has_csum_v2or3(journal)) { journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0); if (IS_ERR(journal->j_chksum_driver)) { printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n"); @@ -1553,7 +1561,7 @@ static int journal_get_superblock(journal_t *journal) } /* Precompute checksum seed for all metadata */ - if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + if (jbd2_journal_has_csum_v2or3(journal)) journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid, sizeof(sb->s_uuid)); @@ -1813,8 +1821,14 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat, if (!jbd2_journal_check_available_features(journal, compat, ro, incompat)) return 0; - /* Asking for checksumming v2 and v1? Only give them v2. */ - if (incompat & JBD2_FEATURE_INCOMPAT_CSUM_V2 && + /* If enabling v2 checksums, turn on v3 instead */ + if (incompat & JBD2_FEATURE_INCOMPAT_CSUM_V2) { + incompat &= ~JBD2_FEATURE_INCOMPAT_CSUM_V2; + incompat |= JBD2_FEATURE_INCOMPAT_CSUM_V3; + } + + /* Asking for checksumming v3 and v1? Only give them v3. */ + if (incompat & JBD2_FEATURE_INCOMPAT_CSUM_V3 && compat & JBD2_FEATURE_COMPAT_CHECKSUM) compat &= ~JBD2_FEATURE_COMPAT_CHECKSUM; @@ -1823,8 +1837,8 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat, sb = journal->j_superblock; - /* If enabling v2 checksums, update superblock */ - if (INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V2)) { + /* If enabling v3 checksums, update superblock */ + if (INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V3)) { sb->s_checksum_type = JBD2_CRC32C_CHKSUM; sb->s_feature_compat &= ~cpu_to_be32(JBD2_FEATURE_COMPAT_CHECKSUM); @@ -1842,8 +1856,7 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat, } /* Precompute checksum seed for all metadata */ - if (JBD2_HAS_INCOMPAT_FEATURE(journal, - JBD2_FEATURE_INCOMPAT_CSUM_V2)) + if (jbd2_journal_has_csum_v2or3(journal)) journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid, sizeof(sb->s_uuid)); @@ -1852,7 +1865,8 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat, /* If enabling v1 checksums, downgrade superblock */ if (COMPAT_FEATURE_ON(JBD2_FEATURE_COMPAT_CHECKSUM)) sb->s_feature_incompat &= - ~cpu_to_be32(JBD2_FEATURE_INCOMPAT_CSUM_V2); + ~cpu_to_be32(JBD2_FEATURE_INCOMPAT_CSUM_V2 | + JBD2_FEATURE_INCOMPAT_CSUM_V3); sb->s_feature_compat |= cpu_to_be32(compat); sb->s_feature_ro_compat |= cpu_to_be32(ro); @@ -2165,16 +2179,20 @@ int jbd2_journal_blocks_per_page(struct inode *inode) */ size_t journal_tag_bytes(journal_t *journal) { - journal_block_tag_t tag; - size_t x = 0; + size_t sz; + + if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V3)) + return sizeof(journal_block_tag3_t); + + sz = sizeof(journal_block_tag_t); if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) - x += sizeof(tag.t_checksum); + sz += sizeof(__u16); if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) - return x + JBD2_TAG_SIZE64; + return sz; else - return x + JBD2_TAG_SIZE32; + return sz - sizeof(__u32); } /* diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index 00e9703d7dc6..9b329b55ffe3 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c @@ -181,7 +181,7 @@ static int jbd2_descr_block_csum_verify(journal_t *j, __be32 provided; __u32 calculated; - if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + if (!jbd2_journal_has_csum_v2or3(j)) return 1; tail = (struct jbd2_journal_block_tail *)(buf + j->j_blocksize - @@ -205,7 +205,7 @@ static int count_tags(journal_t *journal, struct buffer_head *bh) int nr = 0, size = journal->j_blocksize; int tag_bytes = journal_tag_bytes(journal); - if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + if (jbd2_journal_has_csum_v2or3(journal)) size -= sizeof(struct jbd2_journal_block_tail); tagp = &bh->b_data[sizeof(journal_header_t)]; @@ -338,10 +338,11 @@ int jbd2_journal_skip_recovery(journal_t *journal) return err; } -static inline unsigned long long read_tag_block(int tag_bytes, journal_block_tag_t *tag) +static inline unsigned long long read_tag_block(journal_t *journal, + journal_block_tag_t *tag) { unsigned long long block = be32_to_cpu(tag->t_blocknr); - if (tag_bytes > JBD2_TAG_SIZE32) + if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32; return block; } @@ -384,7 +385,7 @@ static int jbd2_commit_block_csum_verify(journal_t *j, void *buf) __be32 provided; __u32 calculated; - if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + if (!jbd2_journal_has_csum_v2or3(j)) return 1; h = buf; @@ -399,17 +400,21 @@ static int jbd2_commit_block_csum_verify(journal_t *j, void *buf) static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag, void *buf, __u32 sequence) { + journal_block_tag3_t *tag3 = (journal_block_tag3_t *)tag; __u32 csum32; __be32 seq; - if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + if (!jbd2_journal_has_csum_v2or3(j)) return 1; seq = cpu_to_be32(sequence); csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq)); csum32 = jbd2_chksum(j, csum32, buf, j->j_blocksize); - return tag->t_checksum == cpu_to_be16(csum32); + if (JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V3)) + return tag3->t_checksum == cpu_to_be32(csum32); + else + return tag->t_checksum == cpu_to_be16(csum32); } static int do_one_pass(journal_t *journal, @@ -513,8 +518,7 @@ static int do_one_pass(journal_t *journal, switch(blocktype) { case JBD2_DESCRIPTOR_BLOCK: /* Verify checksum first */ - if (JBD2_HAS_INCOMPAT_FEATURE(journal, - JBD2_FEATURE_INCOMPAT_CSUM_V2)) + if (jbd2_journal_has_csum_v2or3(journal)) descr_csum_size = sizeof(struct jbd2_journal_block_tail); if (descr_csum_size > 0 && @@ -575,7 +579,7 @@ static int do_one_pass(journal_t *journal, unsigned long long blocknr; J_ASSERT(obh != NULL); - blocknr = read_tag_block(tag_bytes, + blocknr = read_tag_block(journal, tag); /* If the block has been @@ -814,7 +818,7 @@ static int jbd2_revoke_block_csum_verify(journal_t *j, __be32 provided; __u32 calculated; - if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + if (!jbd2_journal_has_csum_v2or3(j)) return 1; tail = (struct jbd2_journal_revoke_tail *)(buf + j->j_blocksize - diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index 198c9c10276d..d5e95a175c92 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c @@ -91,8 +91,8 @@ #include #include #include -#endif #include +#endif static struct kmem_cache *jbd2_revoke_record_cache; static struct kmem_cache *jbd2_revoke_table_cache; @@ -597,7 +597,7 @@ static void write_one_revoke_record(journal_t *journal, offset = *offsetp; /* Do we need to leave space at the end for a checksum? */ - if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + if (jbd2_journal_has_csum_v2or3(journal)) csum_size = sizeof(struct jbd2_journal_revoke_tail); /* Make sure we have a descriptor with space left for the record */ @@ -644,7 +644,7 @@ static void jbd2_revoke_csum_set(journal_t *j, struct buffer_head *bh) struct jbd2_journal_revoke_tail *tail; __u32 csum; - if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + if (!jbd2_journal_has_csum_v2or3(j)) return; tail = (struct jbd2_journal_revoke_tail *)(bh->b_data + j->j_blocksize - diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index d5b50a19463c..0dae71e9971c 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -159,7 +159,11 @@ typedef struct journal_header_s * journal_block_tag (in the descriptor). The other h_chksum* fields are * not used. * - * Checksum v1 and v2 are mutually exclusive features. + * If FEATURE_INCOMPAT_CSUM_V3 is set, the descriptor block uses + * journal_block_tag3_t to store a full 32-bit checksum. Everything else + * is the same as v2. + * + * Checksum v1, v2, and v3 are mutually exclusive features. */ struct commit_header { __be32 h_magic; @@ -179,6 +183,14 @@ struct commit_header { * raw struct shouldn't be used for pointer math or sizeof() - use * journal_tag_bytes(journal) instead to compute this. */ +typedef struct journal_block_tag3_s +{ + __be32 t_blocknr; /* The on-disk block number */ + __be32 t_flags; /* See below */ + __be32 t_blocknr_high; /* most-significant high 32bits. */ + __be32 t_checksum; /* crc32c(uuid+seq+block) */ +} journal_block_tag3_t; + typedef struct journal_block_tag_s { __be32 t_blocknr; /* The on-disk block number */ @@ -187,9 +199,6 @@ typedef struct journal_block_tag_s __be32 t_blocknr_high; /* most-significant high 32bits. */ } journal_block_tag_t; -#define JBD2_TAG_SIZE32 (offsetof(journal_block_tag_t, t_blocknr_high)) -#define JBD2_TAG_SIZE64 (sizeof(journal_block_tag_t)) - /* Tail of descriptor block, for checksumming */ struct jbd2_journal_block_tail { __be32 t_checksum; /* crc32c(uuid+descr_block) */ @@ -284,6 +293,7 @@ typedef struct journal_superblock_s #define JBD2_FEATURE_INCOMPAT_64BIT 0x00000002 #define JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT 0x00000004 #define JBD2_FEATURE_INCOMPAT_CSUM_V2 0x00000008 +#define JBD2_FEATURE_INCOMPAT_CSUM_V3 0x00000010 /* Features known to this kernel version: */ #define JBD2_KNOWN_COMPAT_FEATURES JBD2_FEATURE_COMPAT_CHECKSUM @@ -291,7 +301,8 @@ typedef struct journal_superblock_s #define JBD2_KNOWN_INCOMPAT_FEATURES (JBD2_FEATURE_INCOMPAT_REVOKE | \ JBD2_FEATURE_INCOMPAT_64BIT | \ JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT | \ - JBD2_FEATURE_INCOMPAT_CSUM_V2) + JBD2_FEATURE_INCOMPAT_CSUM_V2 | \ + JBD2_FEATURE_INCOMPAT_CSUM_V3) #ifdef __KERNEL__ @@ -1296,6 +1307,15 @@ static inline int tid_geq(tid_t x, tid_t y) extern int jbd2_journal_blocks_per_page(struct inode *inode); extern size_t journal_tag_bytes(journal_t *journal); +static inline int jbd2_journal_has_csum_v2or3(journal_t *journal) +{ + if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2) || + JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V3)) + return 1; + + return 0; +} + /* * We reserve t_outstanding_credits >> JBD2_CONTROL_BLOCKS_SHIFT for * transaction control blocks. -- cgit v1.2.3 From abdc08a3a263a20e49534a36291d657bf53dda5b Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Tue, 19 Aug 2014 10:06:09 -0700 Subject: gpio: change gpiochip_request_own_desc() prototype The current prototype of gpiochip_request_own_desc() requires to obtain a pointer to a descriptor. This is in contradiction to all other GPIO request schemes, and imposes an extra step of obtaining a descriptor to drivers. Most drivers actually cannot even perform that step since the function that does it (gpichip_get_desc()) is gpiolib-private. Change gpiochip_request_own_desc() to return a descriptor from a (chip, hwnum) tuple and update users of this function (currently gpiolib-acpi only). Signed-off-by: Alexandre Courbot Tested-by: Mika Westerberg Signed-off-by: Linus Walleij --- Documentation/gpio/driver.txt | 3 ++- drivers/gpio/gpiolib-acpi.c | 20 +++----------------- drivers/gpio/gpiolib.c | 18 ++++++++++++++---- include/linux/gpio/driver.h | 3 ++- 4 files changed, 21 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/Documentation/gpio/driver.txt b/Documentation/gpio/driver.txt index 18790c237977..23b751a10d7b 100644 --- a/Documentation/gpio/driver.txt +++ b/Documentation/gpio/driver.txt @@ -178,7 +178,8 @@ does not help since it pins the module to the kernel forever (it calls try_module_get()). A GPIO driver can use the following functions instead to request and free descriptors without being pinned to the kernel forever. - int gpiochip_request_own_desc(struct gpio_desc *desc, const char *label) + struct gpio_desc *gpiochip_request_own_desc(struct gpio_desc *desc, + const char *label) void gpiochip_free_own_desc(struct gpio_desc *desc) diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index 84540025aa08..f9103e72e2a4 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -145,14 +145,8 @@ static acpi_status acpi_gpiochip_request_interrupt(struct acpi_resource *ares, if (!handler) return AE_BAD_PARAMETER; - desc = gpiochip_get_desc(chip, pin); + desc = gpiochip_request_own_desc(chip, pin, "ACPI:Event"); if (IS_ERR(desc)) { - dev_err(chip->dev, "Failed to get GPIO descriptor\n"); - return AE_ERROR; - } - - ret = gpiochip_request_own_desc(desc, "ACPI:Event"); - if (ret) { dev_err(chip->dev, "Failed to request GPIO\n"); return AE_ERROR; } @@ -420,22 +414,14 @@ acpi_gpio_adr_space_handler(u32 function, acpi_physical_address address, } } if (!found) { - int ret; - - desc = gpiochip_get_desc(chip, pin); + desc = gpiochip_request_own_desc(chip, pin, + "ACPI:OpRegion"); if (IS_ERR(desc)) { status = AE_ERROR; mutex_unlock(&achip->conn_lock); goto out; } - ret = gpiochip_request_own_desc(desc, "ACPI:OpRegion"); - if (ret) { - status = AE_ERROR; - mutex_unlock(&achip->conn_lock); - goto out; - } - switch (agpio->io_restriction) { case ACPI_IO_RESTRICT_INPUT: gpiod_direction_input(desc); diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 15cc0bb65dda..a5831d6a9b91 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -895,12 +895,22 @@ EXPORT_SYMBOL_GPL(gpiochip_is_requested); * allows the GPIO chip module to be unloaded as needed (we assume that the * GPIO chip driver handles freeing the GPIOs it has requested). */ -int gpiochip_request_own_desc(struct gpio_desc *desc, const char *label) +struct gpio_desc *gpiochip_request_own_desc(struct gpio_chip *chip, u16 hwnum, + const char *label) { - if (!desc || !desc->chip) - return -EINVAL; + struct gpio_desc *desc = gpiochip_get_desc(chip, hwnum); + int err; + + if (IS_ERR(desc)) { + chip_err(chip, "failed to get GPIO descriptor\n"); + return desc; + } + + err = __gpiod_request(desc, label); + if (err < 0) + return ERR_PTR(err); - return __gpiod_request(desc, label); + return desc; } EXPORT_SYMBOL_GPL(gpiochip_request_own_desc); diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index e78a2373e374..a2de58fffd19 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -166,7 +166,8 @@ int gpiochip_irqchip_add(struct gpio_chip *gpiochip, #endif /* CONFIG_GPIO_IRQCHIP */ -int gpiochip_request_own_desc(struct gpio_desc *desc, const char *label); +struct gpio_desc *gpiochip_request_own_desc(struct gpio_chip *chip, u16 hwnum, + const char *label); void gpiochip_free_own_desc(struct gpio_desc *desc); #else /* CONFIG_GPIOLIB */ -- cgit v1.2.3 From 7179569aeb52197fd2a9909ba226c4c9cc0e2e2a Mon Sep 17 00:00:00 2001 From: Heiko Stübner Date: Thu, 28 Aug 2014 12:36:04 -0700 Subject: regulator: core: Add REGULATOR_EVENT_PRE_VOLTAGE_CHANGE (and ABORT) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In some cases we need to know when a regulator is about to be changed. Add a way for clients to be notified. Note that for set_voltage() we don't necessarily know what voltage we'll end up with, so we tell the client what the range will be so they can prepare. Signed-off-by: Heiko Stübner Signed-off-by: Doug Anderson Signed-off-by: Mark Brown --- drivers/regulator/core.c | 63 +++++++++++++++++++++++++++++++++----- include/linux/regulator/consumer.h | 20 ++++++++++++ 2 files changed, 76 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index a3c3785901f5..dabc8e8862c8 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -102,7 +102,7 @@ static int _regulator_disable(struct regulator_dev *rdev); static int _regulator_get_voltage(struct regulator_dev *rdev); static int _regulator_get_current_limit(struct regulator_dev *rdev); static unsigned int _regulator_get_mode(struct regulator_dev *rdev); -static void _notifier_call_chain(struct regulator_dev *rdev, +static int _notifier_call_chain(struct regulator_dev *rdev, unsigned long event, void *data); static int _regulator_do_set_voltage(struct regulator_dev *rdev, int min_uV, int max_uV); @@ -2369,6 +2369,55 @@ int regulator_is_supported_voltage(struct regulator *regulator, } EXPORT_SYMBOL_GPL(regulator_is_supported_voltage); +static int _regulator_call_set_voltage(struct regulator_dev *rdev, + int min_uV, int max_uV, + unsigned *selector) +{ + struct pre_voltage_change_data data; + int ret; + + data.old_uV = _regulator_get_voltage(rdev); + data.min_uV = min_uV; + data.max_uV = max_uV; + ret = _notifier_call_chain(rdev, REGULATOR_EVENT_PRE_VOLTAGE_CHANGE, + &data); + if (ret & NOTIFY_STOP_MASK) + return -EINVAL; + + ret = rdev->desc->ops->set_voltage(rdev, min_uV, max_uV, selector); + if (ret >= 0) + return ret; + + _notifier_call_chain(rdev, REGULATOR_EVENT_ABORT_VOLTAGE_CHANGE, + (void *)data.old_uV); + + return ret; +} + +static int _regulator_call_set_voltage_sel(struct regulator_dev *rdev, + int uV, unsigned selector) +{ + struct pre_voltage_change_data data; + int ret; + + data.old_uV = _regulator_get_voltage(rdev); + data.min_uV = uV; + data.max_uV = uV; + ret = _notifier_call_chain(rdev, REGULATOR_EVENT_PRE_VOLTAGE_CHANGE, + &data); + if (ret & NOTIFY_STOP_MASK) + return -EINVAL; + + ret = rdev->desc->ops->set_voltage_sel(rdev, selector); + if (ret >= 0) + return ret; + + _notifier_call_chain(rdev, REGULATOR_EVENT_ABORT_VOLTAGE_CHANGE, + (void *)data.old_uV); + + return ret; +} + static int _regulator_do_set_voltage(struct regulator_dev *rdev, int min_uV, int max_uV) { @@ -2396,8 +2445,8 @@ static int _regulator_do_set_voltage(struct regulator_dev *rdev, } if (rdev->desc->ops->set_voltage) { - ret = rdev->desc->ops->set_voltage(rdev, min_uV, max_uV, - &selector); + ret = _regulator_call_set_voltage(rdev, min_uV, max_uV, + &selector); if (ret >= 0) { if (rdev->desc->ops->list_voltage) @@ -2432,8 +2481,8 @@ static int _regulator_do_set_voltage(struct regulator_dev *rdev, if (old_selector == selector) ret = 0; else - ret = rdev->desc->ops->set_voltage_sel( - rdev, ret); + ret = _regulator_call_set_voltage_sel( + rdev, best_val, selector); } else { ret = -EINVAL; } @@ -3079,11 +3128,11 @@ EXPORT_SYMBOL_GPL(regulator_unregister_notifier); /* notify regulator consumers and downstream regulator consumers. * Note mutex must be held by caller. */ -static void _notifier_call_chain(struct regulator_dev *rdev, +static int _notifier_call_chain(struct regulator_dev *rdev, unsigned long event, void *data) { /* call rdev chain first */ - blocking_notifier_call_chain(&rdev->notifier, event, data); + return blocking_notifier_call_chain(&rdev->notifier, event, data); } /** diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index f8a8733068a7..d347c805f923 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -93,7 +93,12 @@ struct regmap; * OVER_TEMP Regulator over temp. * FORCE_DISABLE Regulator forcibly shut down by software. * VOLTAGE_CHANGE Regulator voltage changed. + * Data passed is old voltage cast to (void *). * DISABLE Regulator was disabled. + * PRE_VOLTAGE_CHANGE Regulator is about to have voltage changed. + * Data passed is "struct pre_voltage_change_data" + * ABORT_VOLTAGE_CHANGE Regulator voltage change failed for some reason. + * Data passed is old voltage cast to (void *). * * NOTE: These events can be OR'ed together when passed into handler. */ @@ -106,6 +111,21 @@ struct regmap; #define REGULATOR_EVENT_FORCE_DISABLE 0x20 #define REGULATOR_EVENT_VOLTAGE_CHANGE 0x40 #define REGULATOR_EVENT_DISABLE 0x80 +#define REGULATOR_EVENT_PRE_VOLTAGE_CHANGE 0x100 +#define REGULATOR_EVENT_ABORT_VOLTAGE_CHANGE 0x200 + +/** + * struct pre_voltage_change_data - Data sent with PRE_VOLTAGE_CHANGE event + * + * @old_uV: Current voltage before change. + * @min_uV: Min voltage we'll change to. + * @max_uV: Max voltage we'll change to. + */ +struct pre_voltage_change_data { + unsigned long old_uV; + unsigned long min_uV; + unsigned long max_uV; +}; struct regulator; -- cgit v1.2.3 From 656473003bc7e056c3bbd4a4d9832dad01e86f76 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 29 Aug 2014 14:01:17 +0200 Subject: KVM: forward declare structs in kvm_types.h Opaque KVM structs are useful for prototypes in asm/kvm_host.h, to avoid "'struct foo' declared inside parameter list" warnings (and consequent breakage due to conflicting types). Move them from individual files to a generic place in linux/kvm_types.h. Signed-off-by: Paolo Bonzini --- arch/arm/include/asm/kvm_host.h | 7 ++----- arch/arm64/include/asm/kvm_host.h | 6 ++---- arch/ia64/include/asm/kvm_host.h | 3 --- arch/mips/include/asm/kvm_host.h | 5 ----- arch/powerpc/include/asm/kvm_host.h | 5 ----- arch/s390/include/asm/kvm_host.h | 5 +++-- arch/x86/include/asm/kvm_host.h | 4 ---- include/linux/kvm_host.h | 6 ------ include/linux/kvm_types.h | 14 ++++++++++++++ 9 files changed, 21 insertions(+), 34 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 6dfb404f6c46..4843397b812c 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -19,6 +19,8 @@ #ifndef __ARM_KVM_HOST_H__ #define __ARM_KVM_HOST_H__ +#include +#include #include #include #include @@ -40,7 +42,6 @@ #include -struct kvm_vcpu; u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode); int kvm_target_cpu(void); int kvm_reset_vcpu(struct kvm_vcpu *vcpu); @@ -149,20 +150,17 @@ struct kvm_vcpu_stat { u32 halt_wakeup; }; -struct kvm_vcpu_init; int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, const struct kvm_vcpu_init *init); int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init); unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); -struct kvm_one_reg; int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); u64 kvm_call_hyp(void *hypfn, ...); void force_vm_exit(const cpumask_t *mask); #define KVM_ARCH_WANT_MMU_NOTIFIER -struct kvm; int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end); @@ -187,7 +185,6 @@ struct kvm_vcpu __percpu **kvm_get_running_vcpus(void); int kvm_arm_copy_coproc_indices(struct kvm_vcpu *vcpu, u64 __user *uindices); unsigned long kvm_arm_num_coproc_regs(struct kvm_vcpu *vcpu); -struct kvm_one_reg; int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index e10c45a578e3..766147baae0b 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -22,6 +22,8 @@ #ifndef __ARM64_KVM_HOST_H__ #define __ARM64_KVM_HOST_H__ +#include +#include #include #include #include @@ -41,7 +43,6 @@ #define KVM_VCPU_MAX_FEATURES 3 -struct kvm_vcpu; int kvm_target_cpu(void); int kvm_reset_vcpu(struct kvm_vcpu *vcpu); int kvm_arch_dev_ioctl_check_extension(long ext); @@ -164,18 +165,15 @@ struct kvm_vcpu_stat { u32 halt_wakeup; }; -struct kvm_vcpu_init; int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, const struct kvm_vcpu_init *init); int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init); unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); -struct kvm_one_reg; int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); #define KVM_ARCH_WANT_MMU_NOTIFIER -struct kvm; int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end); diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h index db95f570705f..fccc09d04649 100644 --- a/arch/ia64/include/asm/kvm_host.h +++ b/arch/ia64/include/asm/kvm_host.h @@ -234,9 +234,6 @@ struct kvm_vm_data { #define KVM_REQ_PTC_G 32 #define KVM_REQ_RESUME 33 -struct kvm; -struct kvm_vcpu; - struct kvm_mmio_req { uint64_t addr; /* physical address */ uint64_t size; /* size in bytes */ diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 7a3fc67bd7f9..b93bc80ed7e7 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -96,11 +96,6 @@ #define CAUSEB_DC 27 #define CAUSEF_DC (_ULCAST_(1) << 27) -struct kvm; -struct kvm_run; -struct kvm_vcpu; -struct kvm_interrupt; - extern atomic_t kvm_mips_instance; extern pfn_t(*kvm_mips_gfn_to_pfn) (struct kvm *kvm, gfn_t gfn); extern void (*kvm_mips_release_pfn_clean) (pfn_t pfn); diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 98d9dd50d063..0e597283c5c6 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -53,7 +53,6 @@ #define KVM_ARCH_WANT_MMU_NOTIFIER -struct kvm; extern int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); extern int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end); @@ -76,10 +75,6 @@ extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); /* Physical Address Mask - allowed range of real mode RAM access */ #define KVM_PAM 0x0fffffffffffffffULL -struct kvm; -struct kvm_run; -struct kvm_vcpu; - struct lppaca; struct slb_shadow; struct dtl_entry; diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 773bef7614d8..d71291d3fb6f 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -13,8 +13,11 @@ #ifndef ASM_KVM_HOST_H #define ASM_KVM_HOST_H + +#include #include #include +#include #include #include #include @@ -431,8 +434,6 @@ static inline bool kvm_is_error_hva(unsigned long addr) } #define ASYNC_PF_PER_VCPU 64 -struct kvm_vcpu; -struct kvm_async_pf; struct kvm_arch_async_pf { unsigned long pfault_token; }; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index ac0f90e26a0b..567fface45f8 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -99,10 +99,6 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level) #define ASYNC_PF_PER_VCPU 64 -struct kvm_vcpu; -struct kvm; -struct kvm_async_pf; - enum kvm_reg { VCPU_REGS_RAX = 0, VCPU_REGS_RCX = 1, diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ebd723676633..e1cb915a1096 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -140,8 +140,6 @@ static inline bool is_error_page(struct page *page) #define KVM_USERSPACE_IRQ_SOURCE_ID 0 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 -struct kvm; -struct kvm_vcpu; extern struct kmem_cache *kvm_vcpu_cache; extern spinlock_t kvm_lock; @@ -325,8 +323,6 @@ struct kvm_kernel_irq_routing_entry { struct hlist_node link; }; -struct kvm_irq_routing_table; - #ifndef KVM_PRIVATE_MEM_SLOTS #define KVM_PRIVATE_MEM_SLOTS 0 #endif @@ -1036,8 +1032,6 @@ static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu) extern bool kvm_rebooting; -struct kvm_device_ops; - struct kvm_device { struct kvm_device_ops *ops; struct kvm *kvm; diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index b0bcce0ddc95..b606bb689a3e 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -17,6 +17,20 @@ #ifndef __KVM_TYPES_H__ #define __KVM_TYPES_H__ +struct kvm; +struct kvm_async_pf; +struct kvm_device_ops; +struct kvm_interrupt; +struct kvm_irq_routing_table; +struct kvm_memory_slot; +struct kvm_one_reg; +struct kvm_run; +struct kvm_userspace_memory_region; +struct kvm_vcpu; +struct kvm_vcpu_init; + +enum kvm_mr_change; + #include /* -- cgit v1.2.3 From 13a34e067eab24fec882e1834fbf2cc31911d474 Mon Sep 17 00:00:00 2001 From: Radim Krčmář Date: Thu, 28 Aug 2014 15:13:03 +0200 Subject: KVM: remove garbage arg to *hardware_{en,dis}able MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the beggining was on_each_cpu(), which required an unused argument to kvm_arch_ops.hardware_{en,dis}able, but this was soon forgotten. Remove unnecessary arguments that stem from this. Signed-off-by: Radim Krčmář Signed-off-by: Paolo Bonzini --- arch/arm/include/asm/kvm_host.h | 2 +- arch/arm/kvm/arm.c | 2 +- arch/arm64/include/asm/kvm_host.h | 2 +- arch/ia64/kvm/kvm-ia64.c | 4 ++-- arch/mips/include/asm/kvm_host.h | 2 +- arch/mips/kvm/mips.c | 2 +- arch/powerpc/include/asm/kvm_host.h | 2 +- arch/powerpc/kvm/powerpc.c | 2 +- arch/s390/include/asm/kvm_host.h | 2 +- arch/s390/kvm/kvm-s390.c | 2 +- arch/x86/include/asm/kvm_host.h | 4 ++-- arch/x86/kvm/svm.c | 4 ++-- arch/x86/kvm/vmx.c | 4 ++-- arch/x86/kvm/x86.c | 12 ++++++------ include/linux/kvm_host.h | 4 ++-- virt/kvm/kvm_main.c | 4 ++-- 16 files changed, 27 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index aea259e9431f..032a8538318a 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -230,7 +230,7 @@ static inline void vgic_arch_setup(const struct vgic_params *vgic) int kvm_perf_init(void); int kvm_perf_teardown(void); -static inline void kvm_arch_hardware_disable(void *garbage) {} +static inline void kvm_arch_hardware_disable(void) {} static inline void kvm_arch_hardware_unsetup(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 132bb0d9c5ad..005a7b5fd0aa 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -87,7 +87,7 @@ struct kvm_vcpu __percpu **kvm_get_running_vcpus(void) return &kvm_arm_running_vcpu; } -int kvm_arch_hardware_enable(void *garbage) +int kvm_arch_hardware_enable(void) { return 0; } diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index b5045e3e05f8..be9970a59497 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -242,7 +242,7 @@ static inline void vgic_arch_setup(const struct vgic_params *vgic) } } -static inline void kvm_arch_hardware_disable(void *garbage) {} +static inline void kvm_arch_hardware_disable(void) {} static inline void kvm_arch_hardware_unsetup(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 5e14dcaf844e..ec6b9acb6bea 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -125,7 +125,7 @@ long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 *opt_handler) static DEFINE_SPINLOCK(vp_lock); -int kvm_arch_hardware_enable(void *garbage) +int kvm_arch_hardware_enable(void) { long status; long tmp_base; @@ -160,7 +160,7 @@ int kvm_arch_hardware_enable(void *garbage) return 0; } -void kvm_arch_hardware_disable(void *garbage) +void kvm_arch_hardware_disable(void) { long status; diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 0b24d6622ec1..f2c249796ea8 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -762,7 +762,7 @@ extern int kvm_mips_trans_mtc0(uint32_t inst, uint32_t *opc, extern void kvm_mips_dump_stats(struct kvm_vcpu *vcpu); extern unsigned long kvm_mips_get_ramsize(struct kvm *kvm); -static inline void kvm_arch_hardware_disable(void *garbage) {} +static inline void kvm_arch_hardware_disable(void) {} static inline void kvm_arch_hardware_unsetup(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_free_memslot(struct kvm *kvm, diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 0ec7490d70bd..e3b21e51ff7e 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -77,7 +77,7 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) return 1; } -int kvm_arch_hardware_enable(void *garbage) +int kvm_arch_hardware_enable(void) { return 0; } diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 237cc0cc80a2..604000882352 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -682,7 +682,7 @@ struct kvm_vcpu_arch { #define __KVM_HAVE_ARCH_WQP #define __KVM_HAVE_CREATE_DEVICE -static inline void kvm_arch_hardware_disable(void *garbage) {} +static inline void kvm_arch_hardware_disable(void) {} static inline void kvm_arch_hardware_unsetup(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_memslots_updated(struct kvm *kvm) {} diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 72c3fc085207..da505237a664 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -384,7 +384,7 @@ int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, } EXPORT_SYMBOL_GPL(kvmppc_ld); -int kvm_arch_hardware_enable(void *garbage) +int kvm_arch_hardware_enable(void) { return 0; } diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index f6dd90684b97..a76a124dff48 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -452,7 +452,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, extern int sie64a(struct kvm_s390_sie_block *, u64 *); extern char sie_exit; -static inline void kvm_arch_hardware_disable(void *garbage) {} +static inline void kvm_arch_hardware_disable(void) {} static inline void kvm_arch_check_processor_compat(void *rtn) {} static inline void kvm_arch_exit(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index b8fe1ae777db..628e992eeded 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -100,7 +100,7 @@ int test_vfacility(unsigned long nr) } /* Section: not file related */ -int kvm_arch_hardware_enable(void *garbage) +int kvm_arch_hardware_enable(void) { /* every s390 is virtualization enabled ;-) */ return 0; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 567fface45f8..73e4149eda33 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -661,8 +661,8 @@ struct msr_data { struct kvm_x86_ops { int (*cpu_has_kvm_support)(void); /* __init */ int (*disabled_by_bios)(void); /* __init */ - int (*hardware_enable)(void *dummy); - void (*hardware_disable)(void *dummy); + int (*hardware_enable)(void); + void (*hardware_disable)(void); void (*check_processor_compatibility)(void *rtn); int (*hardware_setup)(void); /* __init */ void (*hardware_unsetup)(void); /* __exit */ diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 7cd230e55118..8ef704037370 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -622,7 +622,7 @@ static int has_svm(void) return 1; } -static void svm_hardware_disable(void *garbage) +static void svm_hardware_disable(void) { /* Make sure we clean up behind us */ if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) @@ -633,7 +633,7 @@ static void svm_hardware_disable(void *garbage) amd_pmu_disable_virt(); } -static int svm_hardware_enable(void *garbage) +static int svm_hardware_enable(void) { struct svm_cpu_data *sd; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index d70550d0bcff..671ca5edc709 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2728,7 +2728,7 @@ static void kvm_cpu_vmxon(u64 addr) : "memory", "cc"); } -static int hardware_enable(void *garbage) +static int hardware_enable(void) { int cpu = raw_smp_processor_id(); u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); @@ -2792,7 +2792,7 @@ static void kvm_cpu_vmxoff(void) asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc"); } -static void hardware_disable(void *garbage) +static void hardware_disable(void) { if (vmm_exclusive) { vmclear_local_loaded_vmcss(); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c10408ef9ab1..a375dfc42f6a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -246,7 +246,7 @@ void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask) } EXPORT_SYMBOL_GPL(kvm_set_shared_msr); -static void drop_user_return_notifiers(void *ignore) +static void drop_user_return_notifiers(void) { unsigned int cpu = smp_processor_id(); struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu); @@ -6959,7 +6959,7 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector) kvm_rip_write(vcpu, 0); } -int kvm_arch_hardware_enable(void *garbage) +int kvm_arch_hardware_enable(void) { struct kvm *kvm; struct kvm_vcpu *vcpu; @@ -6970,7 +6970,7 @@ int kvm_arch_hardware_enable(void *garbage) bool stable, backwards_tsc = false; kvm_shared_msr_cpu_online(); - ret = kvm_x86_ops->hardware_enable(garbage); + ret = kvm_x86_ops->hardware_enable(); if (ret != 0) return ret; @@ -7051,10 +7051,10 @@ int kvm_arch_hardware_enable(void *garbage) return 0; } -void kvm_arch_hardware_disable(void *garbage) +void kvm_arch_hardware_disable(void) { - kvm_x86_ops->hardware_disable(garbage); - drop_user_return_notifiers(garbage); + kvm_x86_ops->hardware_disable(); + drop_user_return_notifiers(); } int kvm_arch_hardware_setup(void) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index e1cb915a1096..e098dce179df 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -630,8 +630,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu); int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu); -int kvm_arch_hardware_enable(void *garbage); -void kvm_arch_hardware_disable(void *garbage); +int kvm_arch_hardware_enable(void); +void kvm_arch_hardware_disable(void); int kvm_arch_hardware_setup(void); void kvm_arch_hardware_unsetup(void); void kvm_arch_check_processor_compat(void *rtn); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 1d03967def40..7176929a4cda 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2669,7 +2669,7 @@ static void hardware_enable_nolock(void *junk) cpumask_set_cpu(cpu, cpus_hardware_enabled); - r = kvm_arch_hardware_enable(NULL); + r = kvm_arch_hardware_enable(); if (r) { cpumask_clear_cpu(cpu, cpus_hardware_enabled); @@ -2694,7 +2694,7 @@ static void hardware_disable_nolock(void *junk) if (!cpumask_test_cpu(cpu, cpus_hardware_enabled)) return; cpumask_clear_cpu(cpu, cpus_hardware_enabled); - kvm_arch_hardware_disable(NULL); + kvm_arch_hardware_disable(); } static void hardware_disable(void) -- cgit v1.2.3 From 10c51b56232d24f150e39884a9e749fd99cbc60c Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 27 Aug 2014 11:11:27 +0200 Subject: net: add skb_get_tx_queue() helper Replace occurences of skb_get_queue_mapping() and follow-up netdev_get_tx_queue() with an actual helper function. Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/netdevice.h | 6 ++++++ net/core/netpoll.c | 2 +- net/core/pktgen.c | 4 +--- net/packet/af_packet.c | 4 +--- net/sched/sch_generic.c | 6 ++++-- 5 files changed, 13 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 429801370d0c..dfc1d8b8bd0f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1747,6 +1747,12 @@ struct netdev_queue *netdev_get_tx_queue(const struct net_device *dev, return &dev->_tx[index]; } +static inline struct netdev_queue *skb_get_tx_queue(const struct net_device *dev, + const struct sk_buff *skb) +{ + return netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); +} + static inline void netdev_for_each_tx_queue(struct net_device *dev, void (*f)(struct net_device *, struct netdev_queue *, diff --git a/net/core/netpoll.c b/net/core/netpoll.c index a5ad06828d67..12b1df976562 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -115,7 +115,7 @@ static void queue_process(struct work_struct *work) continue; } - txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); + txq = skb_get_tx_queue(dev, skb); local_irq_save(flags); HARD_TX_LOCK(dev, txq, smp_processor_id()); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 83e2b4b19eb7..d81b540096c3 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3286,7 +3286,6 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) { struct net_device *odev = pkt_dev->odev; struct netdev_queue *txq; - u16 queue_map; int ret; /* If device is offline, then don't send */ @@ -3324,8 +3323,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) if (pkt_dev->delay && pkt_dev->last_ok) spin(pkt_dev, pkt_dev->next_tx); - queue_map = skb_get_queue_mapping(pkt_dev->skb); - txq = netdev_get_tx_queue(odev, queue_map); + txq = skb_get_tx_queue(odev, pkt_dev->skb); local_bh_disable(); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 0dfa990d4eaa..b7a7f5a721bd 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -243,7 +243,6 @@ static int packet_direct_xmit(struct sk_buff *skb) netdev_features_t features; struct netdev_queue *txq; int ret = NETDEV_TX_BUSY; - u16 queue_map; if (unlikely(!netif_running(dev) || !netif_carrier_ok(dev))) @@ -254,8 +253,7 @@ static int packet_direct_xmit(struct sk_buff *skb) __skb_linearize(skb)) goto drop; - queue_map = skb_get_queue_mapping(skb); - txq = netdev_get_tx_queue(dev, queue_map); + txq = skb_get_tx_queue(dev, skb); local_bh_disable(); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index fc04fe93c2da..05b3f5d104af 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -63,7 +63,7 @@ static inline struct sk_buff *dequeue_skb(struct Qdisc *q) if (unlikely(skb)) { /* check the reason of requeuing without tx lock first */ - txq = netdev_get_tx_queue(txq->dev, skb_get_queue_mapping(skb)); + txq = skb_get_tx_queue(txq->dev, skb); if (!netif_xmit_frozen_or_stopped(txq)) { q->gso_skb = NULL; q->q.qlen--; @@ -183,10 +183,12 @@ static inline int qdisc_restart(struct Qdisc *q) skb = dequeue_skb(q); if (unlikely(!skb)) return 0; + WARN_ON_ONCE(skb_dst_is_noref(skb)); + root_lock = qdisc_lock(q); dev = qdisc_dev(q); - txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); + txq = skb_get_tx_queue(dev, skb); return sch_direct_xmit(skb, q, dev, txq, root_lock); } -- cgit v1.2.3 From b95089d00c04712a9d4655d5c638930ac24b7bd3 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Wed, 27 Aug 2014 16:47:48 +0300 Subject: net/mlx4: Move the tunnel steering helper function to mlx4_core Move the function which we use to set VXLAN DMFS (flow-steering) rules from mlx4_en to mlx4_core. This refactoring will allow the mlx4_ib driver to call the helper for the use case of user-space RAW Ethernet QPs, such that they can serve VXLAN traffic too. Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 31 ++------------------- drivers/net/ethernet/mellanox/mlx4/mcg.c | 38 ++++++++++++++++++++++++++ include/linux/mlx4/device.h | 3 ++ 3 files changed, 43 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index bb536aa613f4..abddcf8c40aa 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -474,39 +474,12 @@ static int mlx4_en_tunnel_steer_add(struct mlx4_en_priv *priv, unsigned char *ad int qpn, u64 *reg_id) { int err; - struct mlx4_spec_list spec_eth_outer = { {NULL} }; - struct mlx4_spec_list spec_vxlan = { {NULL} }; - struct mlx4_spec_list spec_eth_inner = { {NULL} }; - - struct mlx4_net_trans_rule rule = { - .queue_mode = MLX4_NET_TRANS_Q_FIFO, - .exclusive = 0, - .allow_loopback = 1, - .promisc_mode = MLX4_FS_REGULAR, - .priority = MLX4_DOMAIN_NIC, - }; - - __be64 mac_mask = cpu_to_be64(MLX4_MAC_MASK << 16); if (priv->mdev->dev->caps.tunnel_offload_mode != MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) return 0; /* do nothing */ - rule.port = priv->port; - rule.qpn = qpn; - INIT_LIST_HEAD(&rule.list); - - spec_eth_outer.id = MLX4_NET_TRANS_RULE_ID_ETH; - memcpy(spec_eth_outer.eth.dst_mac, addr, ETH_ALEN); - memcpy(spec_eth_outer.eth.dst_mac_msk, &mac_mask, ETH_ALEN); - - spec_vxlan.id = MLX4_NET_TRANS_RULE_ID_VXLAN; /* any vxlan header */ - spec_eth_inner.id = MLX4_NET_TRANS_RULE_ID_ETH; /* any inner eth header */ - - list_add_tail(&spec_eth_outer.list, &rule.list); - list_add_tail(&spec_vxlan.list, &rule.list); - list_add_tail(&spec_eth_inner.list, &rule.list); - - err = mlx4_flow_attach(priv->mdev->dev, &rule, reg_id); + err = mlx4_tunnel_steer_add(priv->mdev->dev, addr, priv->port, qpn, + MLX4_DOMAIN_NIC, reg_id); if (err) { en_err(priv, "failed to add vxlan steering rule, err %d\n", err); return err; diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c index d80e7a6fac74..ca0f98c95105 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c @@ -1020,6 +1020,44 @@ int mlx4_flow_detach(struct mlx4_dev *dev, u64 reg_id) } EXPORT_SYMBOL_GPL(mlx4_flow_detach); +int mlx4_tunnel_steer_add(struct mlx4_dev *dev, unsigned char *addr, + int port, int qpn, u16 prio, u64 *reg_id) +{ + int err; + struct mlx4_spec_list spec_eth_outer = { {NULL} }; + struct mlx4_spec_list spec_vxlan = { {NULL} }; + struct mlx4_spec_list spec_eth_inner = { {NULL} }; + + struct mlx4_net_trans_rule rule = { + .queue_mode = MLX4_NET_TRANS_Q_FIFO, + .exclusive = 0, + .allow_loopback = 1, + .promisc_mode = MLX4_FS_REGULAR, + }; + + __be64 mac_mask = cpu_to_be64(MLX4_MAC_MASK << 16); + + rule.port = port; + rule.qpn = qpn; + rule.priority = prio; + INIT_LIST_HEAD(&rule.list); + + spec_eth_outer.id = MLX4_NET_TRANS_RULE_ID_ETH; + memcpy(spec_eth_outer.eth.dst_mac, addr, ETH_ALEN); + memcpy(spec_eth_outer.eth.dst_mac_msk, &mac_mask, ETH_ALEN); + + spec_vxlan.id = MLX4_NET_TRANS_RULE_ID_VXLAN; /* any vxlan header */ + spec_eth_inner.id = MLX4_NET_TRANS_RULE_ID_ETH; /* any inner eth header */ + + list_add_tail(&spec_eth_outer.list, &rule.list); + list_add_tail(&spec_vxlan.list, &rule.list); + list_add_tail(&spec_eth_inner.list, &rule.list); + + err = mlx4_flow_attach(dev, &rule, reg_id); + return err; +} +EXPORT_SYMBOL(mlx4_tunnel_steer_add); + int mlx4_FLOW_STEERING_IB_UC_QP_RANGE(struct mlx4_dev *dev, u32 min_range_qpn, u32 max_range_qpn) { diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 071f6b234604..511c6e0d21a9 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -1196,6 +1196,9 @@ int mlx4_map_sw_to_hw_steering_id(struct mlx4_dev *dev, enum mlx4_net_trans_rule_id id); int mlx4_hw_rule_sz(struct mlx4_dev *dev, enum mlx4_net_trans_rule_id id); +int mlx4_tunnel_steer_add(struct mlx4_dev *dev, unsigned char *addr, + int port, int qpn, u16 prio, u64 *reg_id); + void mlx4_sync_pkey_table(struct mlx4_dev *dev, int slave, int port, int i, int val); -- cgit v1.2.3 From 18fe8db5f2b53e4ac67b47048f24f50c57a2a759 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 28 Aug 2014 13:44:31 +0200 Subject: include/linux/cycx_x25.h: Remove unused header The header file include/linux/cycx_x25.h does not seem to be used anywhere. It was orphaned by 6fcdf4facb "wanrouter: delete now orphaned header content, files/drivers". Remove it. Signed-off-by: Rasmus Villemoes Signed-off-by: David S. Miller --- include/linux/cycx_x25.h | 125 ----------------------------------------------- 1 file changed, 125 deletions(-) delete mode 100644 include/linux/cycx_x25.h (limited to 'include/linux') diff --git a/include/linux/cycx_x25.h b/include/linux/cycx_x25.h deleted file mode 100644 index 362bf19d6cf1..000000000000 --- a/include/linux/cycx_x25.h +++ /dev/null @@ -1,125 +0,0 @@ -#ifndef _CYCX_X25_H -#define _CYCX_X25_H -/* -* cycx_x25.h Cyclom X.25 firmware API definitions. -* -* Author: Arnaldo Carvalho de Melo -* -* Copyright: (c) 1998-2003 Arnaldo Carvalho de Melo -* -* Based on sdla_x25.h by Gene Kozin <74604.152@compuserve.com> -* -* This program is free software; you can redistribute it and/or -* modify it under the terms of the GNU General Public License -* as published by the Free Software Foundation; either version -* 2 of the License, or (at your option) any later version. -* ============================================================================ -* 2000/04/02 acme dprintk and cycx_debug -* 1999/01/03 acme judicious use of data types -* 1999/01/02 acme #define X25_ACK_N3 0x4411 -* 1998/12/28 acme cleanup: lot'o'things removed -* commands listed, -* TX25Cmd & TX25Config structs -* typedef'ed -*/ -#ifndef PACKED -#define PACKED __attribute__((packed)) -#endif - -/* X.25 shared memory layout. */ -#define X25_MBOX_OFFS 0x300 /* general mailbox block */ -#define X25_RXMBOX_OFFS 0x340 /* receive mailbox */ - -/* Debug */ -#define dprintk(level, format, a...) if (cycx_debug >= level) printk(format, ##a) - -extern unsigned int cycx_debug; - -/* Data Structures */ -/* X.25 Command Block. */ -struct cycx_x25_cmd { - u16 command; - u16 link; /* values: 0 or 1 */ - u16 len; /* values: 0 thru 0x205 (517) */ - u32 buf; -} PACKED; - -/* Defines for the 'command' field. */ -#define X25_CONNECT_REQUEST 0x4401 -#define X25_CONNECT_RESPONSE 0x4402 -#define X25_DISCONNECT_REQUEST 0x4403 -#define X25_DISCONNECT_RESPONSE 0x4404 -#define X25_DATA_REQUEST 0x4405 -#define X25_ACK_TO_VC 0x4406 -#define X25_INTERRUPT_RESPONSE 0x4407 -#define X25_CONFIG 0x4408 -#define X25_CONNECT_INDICATION 0x4409 -#define X25_CONNECT_CONFIRM 0x440A -#define X25_DISCONNECT_INDICATION 0x440B -#define X25_DISCONNECT_CONFIRM 0x440C -#define X25_DATA_INDICATION 0x440E -#define X25_INTERRUPT_INDICATION 0x440F -#define X25_ACK_FROM_VC 0x4410 -#define X25_ACK_N3 0x4411 -#define X25_CONNECT_COLLISION 0x4413 -#define X25_N3WIN 0x4414 -#define X25_LINE_ON 0x4415 -#define X25_LINE_OFF 0x4416 -#define X25_RESET_REQUEST 0x4417 -#define X25_LOG 0x4500 -#define X25_STATISTIC 0x4600 -#define X25_TRACE 0x4700 -#define X25_N2TRACEXC 0x4702 -#define X25_N3TRACEXC 0x4703 - -/** - * struct cycx_x25_config - cyclom2x x25 firmware configuration - * @link - link number - * @speed - line speed - * @clock - internal/external - * @n2 - # of level 2 retransm.(values: 1 thru FF) - * @n2win - level 2 window (values: 1 thru 7) - * @n3win - level 3 window (values: 1 thru 7) - * @nvc - # of logical channels (values: 1 thru 64) - * @pktlen - level 3 packet length - log base 2 of size - * @locaddr - my address - * @remaddr - remote address - * @t1 - time, in seconds - * @t2 - time, in seconds - * @t21 - time, in seconds - * @npvc - # of permanent virt. circuits (1 thru nvc) - * @t23 - time, in seconds - * @flags - see dosx25.doc, in portuguese, for details - */ -struct cycx_x25_config { - u8 link; - u8 speed; - u8 clock; - u8 n2; - u8 n2win; - u8 n3win; - u8 nvc; - u8 pktlen; - u8 locaddr; - u8 remaddr; - u16 t1; - u16 t2; - u8 t21; - u8 npvc; - u8 t23; - u8 flags; -} PACKED; - -struct cycx_x25_stats { - u16 rx_crc_errors; - u16 rx_over_errors; - u16 n2_tx_frames; - u16 n2_rx_frames; - u16 tx_timeouts; - u16 rx_timeouts; - u16 n3_tx_packets; - u16 n3_rx_packets; - u16 tx_aborts; - u16 rx_aborts; -} PACKED; -#endif /* _CYCX_X25_H */ -- cgit v1.2.3 From fbd74659d4513816a6249b0db491e8d831803520 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 28 Aug 2014 13:44:32 +0200 Subject: include/linux/i82593.h: Remove unused header The header file include/linux/i82593.h does not seem to be used anywhere. It was orphaned by 8a594170 "drivers/net: delete intel i825xx based znet notebook driver". Remove it. Signed-off-by: Rasmus Villemoes Signed-off-by: David S. Miller --- include/linux/i82593.h | 229 ------------------------------------------------- 1 file changed, 229 deletions(-) delete mode 100644 include/linux/i82593.h (limited to 'include/linux') diff --git a/include/linux/i82593.h b/include/linux/i82593.h deleted file mode 100644 index afac5c7a323d..000000000000 --- a/include/linux/i82593.h +++ /dev/null @@ -1,229 +0,0 @@ -/* - * Definitions for Intel 82593 CSMA/CD Core LAN Controller - * The definitions are taken from the 1992 users manual with Intel - * order number 297125-001. - * - * /usr/src/pc/RCS/i82593.h,v 1.1 1996/07/17 15:23:12 root Exp - * - * Copyright 1994, Anders Klemets - * - * HISTORY - * i82593.h,v - * Revision 1.4 2005/11/4 09:15:00 baroniunas - * Modified copyright with permission of author as follows: - * - * "If I82539.H is the only file with my copyright statement - * that is included in the Source Forge project, then you have - * my approval to change the copyright statement to be a GPL - * license, in the way you proposed on October 10." - * - * Revision 1.1 1996/07/17 15:23:12 root - * Initial revision - * - * Revision 1.3 1995/04/05 15:13:58 adj - * Initial alpha release - * - * Revision 1.2 1994/06/16 23:57:31 klemets - * Mirrored all the fields in the configuration block. - * - * Revision 1.1 1994/06/02 20:25:34 klemets - * Initial revision - * - * - */ -#ifndef _I82593_H -#define _I82593_H - -/* Intel 82593 CSMA/CD Core LAN Controller */ - -/* Port 0 Command Register definitions */ - -/* Execution operations */ -#define OP0_NOP 0 /* CHNL = 0 */ -#define OP0_SWIT_TO_PORT_1 0 /* CHNL = 1 */ -#define OP0_IA_SETUP 1 -#define OP0_CONFIGURE 2 -#define OP0_MC_SETUP 3 -#define OP0_TRANSMIT 4 -#define OP0_TDR 5 -#define OP0_DUMP 6 -#define OP0_DIAGNOSE 7 -#define OP0_TRANSMIT_NO_CRC 9 -#define OP0_RETRANSMIT 12 -#define OP0_ABORT 13 -/* Reception operations */ -#define OP0_RCV_ENABLE 8 -#define OP0_RCV_DISABLE 10 -#define OP0_STOP_RCV 11 -/* Status pointer control operations */ -#define OP0_FIX_PTR 15 /* CHNL = 1 */ -#define OP0_RLS_PTR 15 /* CHNL = 0 */ -#define OP0_RESET 14 - -#define CR0_CHNL (1 << 4) /* 0=Channel 0, 1=Channel 1 */ -#define CR0_STATUS_0 0x00 -#define CR0_STATUS_1 0x20 -#define CR0_STATUS_2 0x40 -#define CR0_STATUS_3 0x60 -#define CR0_INT_ACK (1 << 7) /* 0=No ack, 1=acknowledge */ - -/* Port 0 Status Register definitions */ - -#define SR0_NO_RESULT 0 /* dummy */ -#define SR0_EVENT_MASK 0x0f -#define SR0_IA_SETUP_DONE 1 -#define SR0_CONFIGURE_DONE 2 -#define SR0_MC_SETUP_DONE 3 -#define SR0_TRANSMIT_DONE 4 -#define SR0_TDR_DONE 5 -#define SR0_DUMP_DONE 6 -#define SR0_DIAGNOSE_PASSED 7 -#define SR0_TRANSMIT_NO_CRC_DONE 9 -#define SR0_RETRANSMIT_DONE 12 -#define SR0_EXECUTION_ABORTED 13 -#define SR0_END_OF_FRAME 8 -#define SR0_RECEPTION_ABORTED 10 -#define SR0_DIAGNOSE_FAILED 15 -#define SR0_STOP_REG_HIT 11 - -#define SR0_CHNL (1 << 4) -#define SR0_EXECUTION (1 << 5) -#define SR0_RECEPTION (1 << 6) -#define SR0_INTERRUPT (1 << 7) -#define SR0_BOTH_RX_TX (SR0_EXECUTION | SR0_RECEPTION) - -#define SR3_EXEC_STATE_MASK 0x03 -#define SR3_EXEC_IDLE 0 -#define SR3_TX_ABORT_IN_PROGRESS 1 -#define SR3_EXEC_ACTIVE 2 -#define SR3_ABORT_IN_PROGRESS 3 -#define SR3_EXEC_CHNL (1 << 2) -#define SR3_STP_ON_NO_RSRC (1 << 3) -#define SR3_RCVING_NO_RSRC (1 << 4) -#define SR3_RCV_STATE_MASK 0x60 -#define SR3_RCV_IDLE 0x00 -#define SR3_RCV_READY 0x20 -#define SR3_RCV_ACTIVE 0x40 -#define SR3_RCV_STOP_IN_PROG 0x60 -#define SR3_RCV_CHNL (1 << 7) - -/* Port 1 Command Register definitions */ - -#define OP1_NOP 0 -#define OP1_SWIT_TO_PORT_0 1 -#define OP1_INT_DISABLE 2 -#define OP1_INT_ENABLE 3 -#define OP1_SET_TS 5 -#define OP1_RST_TS 7 -#define OP1_POWER_DOWN 8 -#define OP1_RESET_RING_MNGMT 11 -#define OP1_RESET 14 -#define OP1_SEL_RST 15 - -#define CR1_STATUS_4 0x00 -#define CR1_STATUS_5 0x20 -#define CR1_STATUS_6 0x40 -#define CR1_STOP_REG_UPDATE (1 << 7) - -/* Receive frame status bits */ - -#define RX_RCLD (1 << 0) -#define RX_IA_MATCH (1 << 1) -#define RX_NO_AD_MATCH (1 << 2) -#define RX_NO_SFD (1 << 3) -#define RX_SRT_FRM (1 << 7) -#define RX_OVRRUN (1 << 8) -#define RX_ALG_ERR (1 << 10) -#define RX_CRC_ERR (1 << 11) -#define RX_LEN_ERR (1 << 12) -#define RX_RCV_OK (1 << 13) -#define RX_TYP_LEN (1 << 15) - -/* Transmit status bits */ - -#define TX_NCOL_MASK 0x0f -#define TX_FRTL (1 << 4) -#define TX_MAX_COL (1 << 5) -#define TX_HRT_BEAT (1 << 6) -#define TX_DEFER (1 << 7) -#define TX_UND_RUN (1 << 8) -#define TX_LOST_CTS (1 << 9) -#define TX_LOST_CRS (1 << 10) -#define TX_LTCOL (1 << 11) -#define TX_OK (1 << 13) -#define TX_COLL (1 << 15) - -struct i82593_conf_block { - u_char fifo_limit : 4, - forgnesi : 1, - fifo_32 : 1, - d6mod : 1, - throttle_enb : 1; - u_char throttle : 6, - cntrxint : 1, - contin : 1; - u_char addr_len : 3, - acloc : 1, - preamb_len : 2, - loopback : 2; - u_char lin_prio : 3, - tbofstop : 1, - exp_prio : 3, - bof_met : 1; - u_char : 4, - ifrm_spc : 4; - u_char : 5, - slottim_low : 3; - u_char slottim_hi : 3, - : 1, - max_retr : 4; - u_char prmisc : 1, - bc_dis : 1, - : 1, - crs_1 : 1, - nocrc_ins : 1, - crc_1632 : 1, - : 1, - crs_cdt : 1; - u_char cs_filter : 3, - crs_src : 1, - cd_filter : 3, - : 1; - u_char : 2, - min_fr_len : 6; - u_char lng_typ : 1, - lng_fld : 1, - rxcrc_xf : 1, - artx : 1, - sarec : 1, - tx_jabber : 1, /* why is this called max_len in the manual? */ - hash_1 : 1, - lbpkpol : 1; - u_char : 6, - fdx : 1, - : 1; - u_char dummy_6 : 6, /* supposed to be ones */ - mult_ia : 1, - dis_bof : 1; - u_char dummy_1 : 1, /* supposed to be one */ - tx_ifs_retrig : 2, - mc_all : 1, - rcv_mon : 2, - frag_acpt : 1, - tstrttrs : 1; - u_char fretx : 1, - runt_eop : 1, - hw_sw_pin : 1, - big_endn : 1, - syncrqs : 1, - sttlen : 1, - tx_eop : 1, - rx_eop : 1; - u_char rbuf_size : 5, - rcvstop : 1, - : 2; -}; - -#define I82593_MAX_MULTICAST_ADDRESSES 128 /* Hardware hashed filter */ - -#endif /* _I82593_H */ -- cgit v1.2.3 From 6fb7c3778f0fba0bad099c30e834c413c4f8bcb5 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 28 Aug 2014 13:44:33 +0200 Subject: include/linux/phonedev.h: Remove unused header The header file include/linux/phonedev.h does not seem to be used anywhere. It was orphaned by 7326446c "Staging: remove telephony drivers". Remove it. Signed-off-by: Rasmus Villemoes Signed-off-by: David S. Miller --- include/linux/phonedev.h | 25 ------------------------- 1 file changed, 25 deletions(-) delete mode 100644 include/linux/phonedev.h (limited to 'include/linux') diff --git a/include/linux/phonedev.h b/include/linux/phonedev.h deleted file mode 100644 index 4269de99e320..000000000000 --- a/include/linux/phonedev.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef __LINUX_PHONEDEV_H -#define __LINUX_PHONEDEV_H - -#include - -#ifdef __KERNEL__ - -#include - -struct phone_device { - struct phone_device *next; - const struct file_operations *f_op; - int (*open) (struct phone_device *, struct file *); - int board; /* Device private index */ - int minor; -}; - -extern int phonedev_init(void); -#define PHONE_MAJOR 100 -extern int phone_register_device(struct phone_device *, int unit); -#define PHONE_UNIT_ANY -1 -extern void phone_unregister_device(struct phone_device *); - -#endif -#endif -- cgit v1.2.3 From de20fe8e2cc3c4ca13fdb529e6720d9d199333fe Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Wed, 27 Aug 2014 21:26:35 -0700 Subject: net: Allocate a new 16 bits for flags in skbuff Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index b69b7b512c06..3c9574c80933 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -598,6 +598,10 @@ struct sk_buff { __u32 reserved_tailroom; }; + kmemcheck_bitfield_begin(flags3); + /* 16 bit hole */ + kmemcheck_bitfield_end(flags3); + __be16 inner_protocol; __u16 inner_transport_header; __u16 inner_network_header; -- cgit v1.2.3 From 77cffe23c1f88835f6bd7b47bfa0c060c2969828 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Wed, 27 Aug 2014 21:26:46 -0700 Subject: net: Clarification of CHECKSUM_UNNECESSARY This patch: - Clarifies the specific requirements of devices returning CHECKSUM_UNNECESSARY (comments in skbuff.h). - Adds csum_level field to skbuff. This is used to express how many checksums are covered by CHECKSUM_UNNECESSARY (stores n - 1). This replaces the overloading of skb->encapsulation, that field is is now only used to indicate inner headers are valid. - Change __skb_checksum_validate_needed to "consume" each checksum as indicated by csum_level as layers of the the packet are parsed. - Remove skb_pop_rcv_encapsulation, no longer needed in the new csum_level model. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 2 -- include/linux/skbuff.h | 74 ++++++++++++++++++++++++++++++++++---------------- net/ipv4/gre_demux.c | 1 - 3 files changed, 51 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index beb377b2d4b7..67527f3d3be2 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1158,8 +1158,6 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) if (!vs) goto drop; - skb_pop_rcv_encapsulation(skb); - vs->rcv(vs, skb, vxh->vx_vni); return 0; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 3c9574c80933..c93b5859a772 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -47,11 +47,29 @@ * * The hardware you're dealing with doesn't calculate the full checksum * (as in CHECKSUM_COMPLETE), but it does parse headers and verify checksums - * for specific protocols e.g. TCP/UDP/SCTP, then, for such packets it will - * set CHECKSUM_UNNECESSARY if their checksums are okay. skb->csum is still - * undefined in this case though. It is a bad option, but, unfortunately, - * nowadays most vendors do this. Apparently with the secret goal to sell - * you new devices, when you will add new protocol to your host, f.e. IPv6 8) + * for specific protocols. For such packets it will set CHECKSUM_UNNECESSARY + * if their checksums are okay. skb->csum is still undefined in this case + * though. It is a bad option, but, unfortunately, nowadays most vendors do + * this. Apparently with the secret goal to sell you new devices, when you + * will add new protocol to your host, f.e. IPv6 8) + * + * CHECKSUM_UNNECESSARY is applicable to following protocols: + * TCP: IPv6 and IPv4. + * UDP: IPv4 and IPv6. A device may apply CHECKSUM_UNNECESSARY to a + * zero UDP checksum for either IPv4 or IPv6, the networking stack + * may perform further validation in this case. + * GRE: only if the checksum is present in the header. + * SCTP: indicates the CRC in SCTP header has been validated. + * + * skb->csum_level indicates the number of consecutive checksums found in + * the packet minus one that have been verified as CHECKSUM_UNNECESSARY. + * For instance if a device receives an IPv6->UDP->GRE->IPv4->TCP packet + * and a device is able to verify the checksums for UDP (possibly zero), + * GRE (checksum flag is set), and TCP-- skb->csum_level would be set to + * two. If the device were only able to verify the UDP checksum and not + * GRE, either because it doesn't support GRE checksum of because GRE + * checksum is bad, skb->csum_level would be set to zero (TCP checksum is + * not considered in this case). * * CHECKSUM_COMPLETE: * @@ -112,6 +130,9 @@ #define CHECKSUM_COMPLETE 2 #define CHECKSUM_PARTIAL 3 +/* Maximum value in skb->csum_level */ +#define SKB_MAX_CSUM_LEVEL 3 + #define SKB_DATA_ALIGN(X) ALIGN(X, SMP_CACHE_BYTES) #define SKB_WITH_OVERHEAD(X) \ ((X) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) @@ -571,11 +592,7 @@ struct sk_buff { __u8 wifi_acked:1; __u8 no_fcs:1; __u8 head_frag:1; - /* Encapsulation protocol and NIC drivers should use - * this flag to indicate to each other if the skb contains - * encapsulated packet or not and maybe use the inner packet - * headers if needed - */ + /* Indicates the inner headers are valid in the skbuff. */ __u8 encapsulation:1; __u8 encap_hdr_csum:1; __u8 csum_valid:1; @@ -599,7 +616,8 @@ struct sk_buff { }; kmemcheck_bitfield_begin(flags3); - /* 16 bit hole */ + __u8 csum_level:2; + /* 14 bit hole */ kmemcheck_bitfield_end(flags3); __be16 inner_protocol; @@ -1866,18 +1884,6 @@ static inline int pskb_network_may_pull(struct sk_buff *skb, unsigned int len) return pskb_may_pull(skb, skb_network_offset(skb) + len); } -static inline void skb_pop_rcv_encapsulation(struct sk_buff *skb) -{ - /* Only continue with checksum unnecessary if device indicated - * it is valid across encapsulation (skb->encapsulation was set). - */ - if (skb->ip_summed == CHECKSUM_UNNECESSARY && !skb->encapsulation) - skb->ip_summed = CHECKSUM_NONE; - - skb->encapsulation = 0; - skb->csum_valid = 0; -} - /* * CPUs often take a performance hit when accessing unaligned memory * locations. The actual performance hit varies, it can be small if the @@ -2798,6 +2804,27 @@ static inline __sum16 skb_checksum_complete(struct sk_buff *skb) 0 : __skb_checksum_complete(skb); } +static inline void __skb_decr_checksum_unnecessary(struct sk_buff *skb) +{ + if (skb->ip_summed == CHECKSUM_UNNECESSARY) { + if (skb->csum_level == 0) + skb->ip_summed = CHECKSUM_NONE; + else + skb->csum_level--; + } +} + +static inline void __skb_incr_checksum_unnecessary(struct sk_buff *skb) +{ + if (skb->ip_summed == CHECKSUM_UNNECESSARY) { + if (skb->csum_level < SKB_MAX_CSUM_LEVEL) + skb->csum_level++; + } else if (skb->ip_summed == CHECKSUM_NONE) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + skb->csum_level = 0; + } +} + /* Check if we need to perform checksum complete validation. * * Returns true if checksum complete is needed, false otherwise @@ -2809,6 +2836,7 @@ static inline bool __skb_checksum_validate_needed(struct sk_buff *skb, { if (skb_csum_unnecessary(skb) || (zero_okay && !check)) { skb->csum_valid = 1; + __skb_decr_checksum_unnecessary(skb); return false; } diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c index 7c1a8ff974dd..0485bf7f8f03 100644 --- a/net/ipv4/gre_demux.c +++ b/net/ipv4/gre_demux.c @@ -125,7 +125,6 @@ static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, *csum_err = true; return -EINVAL; } - skb_pop_rcv_encapsulation(skb); options++; } -- cgit v1.2.3 From 662880f4420340aad4f9a62a349c6c9d4faa1a5d Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Wed, 27 Aug 2014 21:26:56 -0700 Subject: net: Allow GRO to use and set levels of checksum unnecessary Allow GRO path to "consume" checksums provided in CHECKSUM_UNNECESSARY and to report new checksums verfied for use in fallback to normal path. Change GRO checksum path to track csum_level using a csum_cnt field in NAPI_GRO_CB. On GRO initialization, if ip_summed is CHECKSUM_UNNECESSARY set NAPI_GRO_CB(skb)->csum_cnt to skb->csum_level + 1. For each checksum verified, decrement NAPI_GRO_CB(skb)->csum_cnt while its greater than zero. If a checksum is verfied and NAPI_GRO_CB(skb)->csum_cnt == 0, we have verified a deeper checksum than originally indicated in skbuf so increment csum_level (or initialize to CHECKSUM_UNNECESSARY if ip_summed is CHECKSUM_NONE or CHECKSUM_COMPLETE). Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/netdevice.h | 26 ++++++++++++-------------- net/core/dev.c | 24 ++++++++++++++++-------- net/ipv4/gre_offload.c | 7 ++----- net/ipv4/udp_offload.c | 5 +++-- 4 files changed, 33 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index dfc1d8b8bd0f..456eb1fe51e8 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1883,8 +1883,8 @@ struct napi_gro_cb { /* GRO checksum is valid */ u8 csum_valid:1; - /* Number encapsulation layers crossed */ - u8 encapsulation; + /* Number of checksums via CHECKSUM_UNNECESSARY */ + u8 csum_cnt:3; /* used to support CHECKSUM_COMPLETE for tunneling protocols */ __wsum csum; @@ -2179,8 +2179,7 @@ static inline bool __skb_gro_checksum_validate_needed(struct sk_buff *skb, __sum16 check) { return (skb->ip_summed != CHECKSUM_PARTIAL && - (skb->ip_summed != CHECKSUM_UNNECESSARY || - (NAPI_GRO_CB(skb)->encapsulation > skb->encapsulation)) && + NAPI_GRO_CB(skb)->csum_cnt == 0 && (!zero_okay || check)); } @@ -2196,18 +2195,17 @@ static inline __sum16 __skb_gro_checksum_validate_complete(struct sk_buff *skb, return __skb_gro_checksum_complete(skb); } -/* Update skb for CHECKSUM_UNNECESSARY when we verified a top level - * checksum or an encapsulated one during GRO. This saves work - * if we fallback to normal path with the packet. - */ static inline void skb_gro_incr_csum_unnecessary(struct sk_buff *skb) { - if (skb->ip_summed == CHECKSUM_UNNECESSARY) { - if (NAPI_GRO_CB(skb)->encapsulation) - skb->encapsulation = 1; - } else if (skb->ip_summed != CHECKSUM_PARTIAL) { - skb->ip_summed = CHECKSUM_UNNECESSARY; - skb->encapsulation = 0; + if (NAPI_GRO_CB(skb)->csum_cnt > 0) { + /* Consume a checksum from CHECKSUM_UNNECESSARY */ + NAPI_GRO_CB(skb)->csum_cnt--; + } else { + /* Update skb for CHECKSUM_UNNECESSARY and csum_level when we + * verified a new top level checksum or an encapsulated one + * during GRO. This saves work if we fallback to normal path. + */ + __skb_incr_checksum_unnecessary(skb); } } diff --git a/net/core/dev.c b/net/core/dev.c index 26d296c2447c..a6077ef56345 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3962,13 +3962,6 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff gro_list_prepare(napi, skb); - if (skb->ip_summed == CHECKSUM_COMPLETE) { - NAPI_GRO_CB(skb)->csum = skb->csum; - NAPI_GRO_CB(skb)->csum_valid = 1; - } else { - NAPI_GRO_CB(skb)->csum_valid = 0; - } - rcu_read_lock(); list_for_each_entry_rcu(ptype, head, list) { if (ptype->type != type || !ptype->callbacks.gro_receive) @@ -3980,7 +3973,22 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff NAPI_GRO_CB(skb)->flush = 0; NAPI_GRO_CB(skb)->free = 0; NAPI_GRO_CB(skb)->udp_mark = 0; - NAPI_GRO_CB(skb)->encapsulation = 0; + + /* Setup for GRO checksum validation */ + switch (skb->ip_summed) { + case CHECKSUM_COMPLETE: + NAPI_GRO_CB(skb)->csum = skb->csum; + NAPI_GRO_CB(skb)->csum_valid = 1; + NAPI_GRO_CB(skb)->csum_cnt = 0; + break; + case CHECKSUM_UNNECESSARY: + NAPI_GRO_CB(skb)->csum_cnt = skb->csum_level + 1; + NAPI_GRO_CB(skb)->csum_valid = 0; + break; + default: + NAPI_GRO_CB(skb)->csum_cnt = 0; + NAPI_GRO_CB(skb)->csum_valid = 0; + } pp = ptype->callbacks.gro_receive(&napi->gro_list, skb); break; diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index d1bd16937d93..a4d7965fb880 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -172,12 +172,9 @@ static struct sk_buff **gre_gro_receive(struct sk_buff **head, } /* Don't bother verifying checksum if we're going to flush anyway. */ - if (greh->flags & GRE_CSUM) { - if (!NAPI_GRO_CB(skb)->flush && - skb_gro_checksum_simple_validate(skb)) + if ((greh->flags & GRE_CSUM) && !NAPI_GRO_CB(skb)->flush && + skb_gro_checksum_simple_validate(skb)) goto out_unlock; - NAPI_GRO_CB(skb)->encapsulation++; - } flush = 0; diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 8ed460e3753c..a6adff98382a 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -238,12 +238,13 @@ struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb, int flush = 1; if (NAPI_GRO_CB(skb)->udp_mark || - (!skb->encapsulation && !NAPI_GRO_CB(skb)->csum_valid)) + (skb->ip_summed != CHECKSUM_PARTIAL && + NAPI_GRO_CB(skb)->csum_cnt == 0 && + !NAPI_GRO_CB(skb)->csum_valid)) goto out; /* mark that this skb passed once through the udp gro layer */ NAPI_GRO_CB(skb)->udp_mark = 1; - NAPI_GRO_CB(skb)->encapsulation++; rcu_read_lock(); uo_priv = rcu_dereference(udp_offload_base); -- cgit v1.2.3 From 4e00517945bed110f1b8de580cce97626e9ef0b5 Mon Sep 17 00:00:00 2001 From: Robert Jarzmik Date: Sun, 31 Aug 2014 21:10:51 +0200 Subject: regulator: max1586: add device-tree support Add device-tree support to max1586. The driver can still be used with the legacy platform data, or the new device-tree way. This work is heavily inspired by the device-tree support of its cousin max8660 driver. Signed-off-by: Robert Jarzmik Signed-off-by: Mark Brown --- drivers/regulator/max1586.c | 81 ++++++++++++++++++++++++++++++++++++++- include/linux/regulator/max1586.h | 2 +- 2 files changed, 80 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/regulator/max1586.c b/drivers/regulator/max1586.c index d23d0577754b..5c04a7159baa 100644 --- a/drivers/regulator/max1586.c +++ b/drivers/regulator/max1586.c @@ -24,6 +24,8 @@ #include #include #include +#include +#include #define MAX1586_V3_MAX_VSEL 31 #define MAX1586_V6_MAX_VSEL 3 @@ -157,13 +159,87 @@ static struct regulator_desc max1586_reg[] = { }, }; +int of_get_max1586_platform_data(struct device *dev, + struct max1586_platform_data *pdata) +{ + struct max1586_subdev_data *sub; + struct of_regulator_match rmatch[ARRAY_SIZE(max1586_reg)]; + struct device_node *np = dev->of_node; + int i, matched; + + if (of_property_read_u32(np, "v3-gain", + &pdata->v3_gain) < 0) { + dev_err(dev, "%s has no 'v3-gain' property\n", np->full_name); + return -EINVAL; + } + + np = of_get_child_by_name(np, "regulators"); + if (!np) { + dev_err(dev, "missing 'regulators' subnode in DT\n"); + return -EINVAL; + } + + for (i = 0; i < ARRAY_SIZE(rmatch); i++) + rmatch[i].name = max1586_reg[i].name; + + matched = of_regulator_match(dev, np, rmatch, ARRAY_SIZE(rmatch)); + of_node_put(np); + /* + * If matched is 0, ie. neither Output_V3 nor Output_V6 have been found, + * return 0, which signals the normal situation where no subregulator is + * available. This is normal because the max1586 doesn't provide any + * readback support, so the subregulators can't report any status + * anyway. If matched < 0, return the error. + */ + if (matched <= 0) + return matched; + + pdata->subdevs = devm_kzalloc(dev, sizeof(struct max1586_subdev_data) * + matched, GFP_KERNEL); + if (!pdata->subdevs) + return -ENOMEM; + + pdata->num_subdevs = matched; + sub = pdata->subdevs; + + for (i = 0; i < matched; i++) { + sub->id = i; + sub->name = rmatch[i].of_node->name; + sub->platform_data = rmatch[i].init_data; + sub++; + } + + return 0; +} + +static const struct of_device_id max1586_of_match[] = { + { .compatible = "maxim,max1586", }, + {}, +}; +MODULE_DEVICE_TABLE(of, max1586_of_match); + static int max1586_pmic_probe(struct i2c_client *client, const struct i2c_device_id *i2c_id) { - struct max1586_platform_data *pdata = dev_get_platdata(&client->dev); + struct max1586_platform_data *pdata, pdata_of; struct regulator_config config = { }; struct max1586_data *max1586; - int i, id; + int i, id, ret; + const struct of_device_id *match; + + pdata = dev_get_platdata(&client->dev); + if (client->dev.of_node && !pdata) { + match = of_match_device(of_match_ptr(max1586_of_match), + &client->dev); + if (!match) { + dev_err(&client->dev, "Error: No device match found\n"); + return -ENODEV; + } + ret = of_get_max1586_platform_data(&client->dev, &pdata_of); + if (ret < 0) + return ret; + pdata = &pdata_of; + } max1586 = devm_kzalloc(&client->dev, sizeof(struct max1586_data), GFP_KERNEL); @@ -229,6 +305,7 @@ static struct i2c_driver max1586_pmic_driver = { .driver = { .name = "max1586", .owner = THIS_MODULE, + .of_match_table = of_match_ptr(max1586_of_match), }, .id_table = max1586_id, }; diff --git a/include/linux/regulator/max1586.h b/include/linux/regulator/max1586.h index de9a7fae20be..cedd0febe882 100644 --- a/include/linux/regulator/max1586.h +++ b/include/linux/regulator/max1586.h @@ -40,7 +40,7 @@ */ struct max1586_subdev_data { int id; - char *name; + const char *name; struct regulator_init_data *platform_data; }; -- cgit v1.2.3 From 068765ba7987e73d4381edfe47b70aa121c7155c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 1 Sep 2014 13:47:49 +0200 Subject: PM / sleep: Mechanism for aborting system suspends unconditionally It sometimes may be necessary to abort a system suspend in progress or wake up the system from suspend-to-idle even if the pm_wakeup_event()/pm_stay_awake() mechanism is not enabled. For this purpose, introduce a new global variable pm_abort_suspend and make pm_wakeup_pending() check its value. Also add routines for manipulating that variable. Signed-off-by: Rafael J. Wysocki --- drivers/base/power/wakeup.c | 16 +++++++++++++++- include/linux/suspend.h | 4 ++++ kernel/power/process.c | 1 + 3 files changed, 20 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index eb1bd2ecad8b..c2744b30d5d9 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -24,6 +24,9 @@ */ bool events_check_enabled __read_mostly; +/* If set and the system is suspending, terminate the suspend. */ +static bool pm_abort_suspend __read_mostly; + /* * Combined counters of registered wakeup events and wakeup events in progress. * They need to be modified together atomically, so it's better to use one @@ -719,7 +722,18 @@ bool pm_wakeup_pending(void) pm_print_active_wakeup_sources(); } - return ret; + return ret || pm_abort_suspend; +} + +void pm_system_wakeup(void) +{ + pm_abort_suspend = true; + freeze_wake(); +} + +void pm_wakeup_clear(void) +{ + pm_abort_suspend = false; } /** diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 519064e0c943..06a9910827c2 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -371,6 +371,8 @@ extern int unregister_pm_notifier(struct notifier_block *nb); extern bool events_check_enabled; extern bool pm_wakeup_pending(void); +extern void pm_system_wakeup(void); +extern void pm_wakeup_clear(void); extern bool pm_get_wakeup_count(unsigned int *count, bool block); extern bool pm_save_wakeup_count(unsigned int count); extern void pm_wakep_autosleep_enabled(bool set); @@ -418,6 +420,8 @@ static inline int unregister_pm_notifier(struct notifier_block *nb) #define pm_notifier(fn, pri) do { (void)(fn); } while (0) static inline bool pm_wakeup_pending(void) { return false; } +static inline void pm_system_wakeup(void) {} +static inline void pm_wakeup_clear(void) {} static inline void lock_system_sleep(void) {} static inline void unlock_system_sleep(void) {} diff --git a/kernel/power/process.c b/kernel/power/process.c index 4ee194eb524b..7b323221b9ee 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -129,6 +129,7 @@ int freeze_processes(void) if (!pm_freezing) atomic_inc(&system_freezing_cnt); + pm_wakeup_clear(); printk("Freezing user space processes ... "); pm_freezing = true; error = try_to_freeze_tasks(true); -- cgit v1.2.3 From cab303be91dc47942bc25de33dc1140123540800 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 28 Aug 2014 11:44:31 +0200 Subject: genirq: Add sanity checks for PM options on shared interrupt lines Account the IRQF_NO_SUSPEND and IRQF_RESUME_EARLY actions on shared interrupt lines and yell loudly if there is a mismatch. Signed-off-by: Thomas Gleixner Signed-off-by: Rafael J. Wysocki --- include/linux/irqdesc.h | 10 ++++++++++ kernel/irq/internals.h | 10 ++++++++++ kernel/irq/manage.c | 4 ++++ kernel/irq/pm.c | 36 ++++++++++++++++++++++++++++++++++++ 4 files changed, 60 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 472c021a2d4f..cb1a31e448ae 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -36,6 +36,11 @@ struct irq_desc; * @threads_oneshot: bitfield to handle shared oneshot threads * @threads_active: number of irqaction threads currently running * @wait_for_threads: wait queue for sync_irq to wait for threaded handlers + * @nr_actions: number of installed actions on this descriptor + * @no_suspend_depth: number of irqactions on a irq descriptor with + * IRQF_NO_SUSPEND set + * @force_resume_depth: number of irqactions on a irq descriptor with + * IRQF_FORCE_RESUME set * @dir: /proc/irq/ procfs entry * @name: flow handler name for /proc/interrupts output */ @@ -68,6 +73,11 @@ struct irq_desc { unsigned long threads_oneshot; atomic_t threads_active; wait_queue_head_t wait_for_threads; +#ifdef CONFIG_PM_SLEEP + unsigned int nr_actions; + unsigned int no_suspend_depth; + unsigned int force_resume_depth; +#endif #ifdef CONFIG_PROC_FS struct proc_dir_entry *dir; #endif diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index af2821178900..c402502a5111 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -194,3 +194,13 @@ static inline void kstat_incr_irqs_this_cpu(unsigned int irq, struct irq_desc *d __this_cpu_inc(*desc->kstat_irqs); __this_cpu_inc(kstat.irqs_sum); } + +#ifdef CONFIG_PM_SLEEP +void irq_pm_install_action(struct irq_desc *desc, struct irqaction *action); +void irq_pm_remove_action(struct irq_desc *desc, struct irqaction *action); +#else +static inline void +irq_pm_install_action(struct irq_desc *desc, struct irqaction *action) { } +static inline void +irq_pm_remove_action(struct irq_desc *desc, struct irqaction *action) { } +#endif diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index fa564e8db996..0a9104b4608b 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1200,6 +1200,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) new->irq = irq; *old_ptr = new; + irq_pm_install_action(desc, new); + /* Reset broken irq detection when installing new handler */ desc->irq_count = 0; desc->irqs_unhandled = 0; @@ -1318,6 +1320,8 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id) /* Found it - now remove it from the list of entries: */ *action_ptr = action->next; + irq_pm_remove_action(desc, action); + /* If this was the last handler, shut down the IRQ line: */ if (!desc->action) { irq_shutdown(desc); diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c index b84141dcee5e..1b1b67a73218 100644 --- a/kernel/irq/pm.c +++ b/kernel/irq/pm.c @@ -13,6 +13,42 @@ #include "internals.h" +/* + * Called from __setup_irq() with desc->lock held after @action has + * been installed in the action chain. + */ +void irq_pm_install_action(struct irq_desc *desc, struct irqaction *action) +{ + desc->nr_actions++; + + if (action->flags & IRQF_FORCE_RESUME) + desc->force_resume_depth++; + + WARN_ON_ONCE(desc->force_resume_depth && + desc->force_resume_depth != desc->nr_actions); + + if (action->flags & IRQF_NO_SUSPEND) + desc->no_suspend_depth++; + + WARN_ON_ONCE(desc->no_suspend_depth && + desc->no_suspend_depth != desc->nr_actions); +} + +/* + * Called from __free_irq() with desc->lock held after @action has + * been removed from the action chain. + */ +void irq_pm_remove_action(struct irq_desc *desc, struct irqaction *action) +{ + desc->nr_actions--; + + if (action->flags & IRQF_FORCE_RESUME) + desc->force_resume_depth--; + + if (action->flags & IRQF_NO_SUSPEND) + desc->no_suspend_depth--; +} + static void suspend_device_irq(struct irq_desc *desc, int irq) { if (!desc->action || (desc->action->flags & IRQF_NO_SUSPEND)) -- cgit v1.2.3 From b76f16748fa61801b1a1fd3ffb6f25ee228a35e0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 29 Aug 2014 13:54:09 +0200 Subject: genirq: Mark wakeup sources as armed on suspend This allows us to utilize this information in the irq_may_run() check without adding another conditional to the fast path. Signed-off-by: Thomas Gleixner Signed-off-by: Rafael J. Wysocki --- include/linux/irq.h | 8 ++++++++ kernel/irq/pm.c | 5 +++++ 2 files changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 62af59242ddc..03f48d936f66 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -173,6 +173,7 @@ struct irq_data { * IRQD_IRQ_DISABLED - Disabled state of the interrupt * IRQD_IRQ_MASKED - Masked state of the interrupt * IRQD_IRQ_INPROGRESS - In progress state of the interrupt + * IRQD_WAKEUP_ARMED - Wakeup mode armed */ enum { IRQD_TRIGGER_MASK = 0xf, @@ -186,6 +187,7 @@ enum { IRQD_IRQ_DISABLED = (1 << 16), IRQD_IRQ_MASKED = (1 << 17), IRQD_IRQ_INPROGRESS = (1 << 18), + IRQD_WAKEUP_ARMED = (1 << 19), }; static inline bool irqd_is_setaffinity_pending(struct irq_data *d) @@ -257,6 +259,12 @@ static inline bool irqd_irq_inprogress(struct irq_data *d) return d->state_use_accessors & IRQD_IRQ_INPROGRESS; } +static inline bool irqd_is_wakeup_armed(struct irq_data *d) +{ + return d->state_use_accessors & IRQD_WAKEUP_ARMED; +} + + /* * Functions for chained handlers which can be enabled/disabled by the * standard disable_irq/enable_irq calls. Must be called with diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c index cf0ce0163db9..766930eaeed9 100644 --- a/kernel/irq/pm.c +++ b/kernel/irq/pm.c @@ -54,6 +54,9 @@ static bool suspend_device_irq(struct irq_desc *desc, int irq) if (!desc->action || desc->no_suspend_depth) return false; + if (irqd_is_wakeup_set(&desc->irq_data)) + irqd_set(&desc->irq_data, IRQD_WAKEUP_ARMED); + desc->istate |= IRQS_SUSPENDED; __disable_irq(desc, irq); @@ -101,6 +104,8 @@ EXPORT_SYMBOL_GPL(suspend_device_irqs); static void resume_irq(struct irq_desc *desc, int irq) { + irqd_clear(&desc->irq_data, IRQD_WAKEUP_ARMED); + if (desc->istate & IRQS_SUSPENDED) goto resume; -- cgit v1.2.3 From 9ce7a25849e80cfb264f4995f832b932c1987e1a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 29 Aug 2014 14:00:16 +0200 Subject: genirq: Simplify wakeup mechanism Currently we suspend wakeup interrupts by lazy disabling them and check later whether the interrupt has fired, but that's not sufficient for suspend to idle as there is no way to check that once we transitioned into the CPU idle state. So we change the mechanism in the following way: 1) Leave the wakeup interrupts enabled across suspend 2) Add a check to irq_may_run() which is called at the beginning of each flow handler whether the interrupt is an armed wakeup source. This check is basically free as it just extends the existing check for IRQD_IRQ_INPROGRESS. So no new conditional in the hot path. If the IRQD_WAKEUP_ARMED flag is set, then the interrupt is disabled, marked as pending/suspended and the pm core is notified about the wakeup event. Signed-off-by: Thomas Gleixner [ rjw: syscore.c and put irq_pm_check_wakeup() into pm.c ] Signed-off-by: Rafael J. Wysocki --- drivers/base/syscore.c | 7 +++--- include/linux/interrupt.h | 5 ----- kernel/irq/chip.c | 20 ++++++++++++++++- kernel/irq/internals.h | 2 ++ kernel/irq/pm.c | 55 +++++++++++++++++++++++++---------------------- 5 files changed, 53 insertions(+), 36 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/syscore.c b/drivers/base/syscore.c index dbb8350ea8dc..8d98a329f6ea 100644 --- a/drivers/base/syscore.c +++ b/drivers/base/syscore.c @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include static LIST_HEAD(syscore_ops_list); @@ -54,9 +54,8 @@ int syscore_suspend(void) pr_debug("Checking wakeup interrupts\n"); /* Return error code if there are any wakeup interrupts pending. */ - ret = check_wakeup_irqs(); - if (ret) - return ret; + if (pm_wakeup_pending()) + return -EBUSY; WARN_ONCE(!irqs_disabled(), "Interrupts enabled before system core suspend.\n"); diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 698ad053d064..69517a24bc50 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -193,11 +193,6 @@ extern void irq_wake_thread(unsigned int irq, void *dev_id); /* The following three functions are for the core kernel use only. */ extern void suspend_device_irqs(void); extern void resume_device_irqs(void); -#ifdef CONFIG_PM_SLEEP -extern int check_wakeup_irqs(void); -#else -static inline int check_wakeup_irqs(void) { return 0; } -#endif /** * struct irq_affinity_notify - context for notification of IRQ affinity changes diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 6baf86085571..e7917ff8a486 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -344,8 +344,26 @@ static bool irq_check_poll(struct irq_desc *desc) static bool irq_may_run(struct irq_desc *desc) { - if (!irqd_irq_inprogress(&desc->irq_data)) + unsigned int mask = IRQD_IRQ_INPROGRESS | IRQD_WAKEUP_ARMED; + + /* + * If the interrupt is not in progress and is not an armed + * wakeup interrupt, proceed. + */ + if (!irqd_has_set(&desc->irq_data, mask)) return true; + + /* + * If the interrupt is an armed wakeup source, mark it pending + * and suspended, disable it and notify the pm core about the + * event. + */ + if (irq_pm_check_wakeup(desc)) + return false; + + /* + * Handle a potential concurrent poll on a different core. + */ return irq_check_poll(desc); } diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index c402502a5111..4332d766619d 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -196,9 +196,11 @@ static inline void kstat_incr_irqs_this_cpu(unsigned int irq, struct irq_desc *d } #ifdef CONFIG_PM_SLEEP +bool irq_pm_check_wakeup(struct irq_desc *desc); void irq_pm_install_action(struct irq_desc *desc, struct irqaction *action); void irq_pm_remove_action(struct irq_desc *desc, struct irqaction *action); #else +static inline bool irq_pm_check_wakeup(struct irq_desc *desc) { return false; } static inline void irq_pm_install_action(struct irq_desc *desc, struct irqaction *action) { } static inline void diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c index 766930eaeed9..3ca532592704 100644 --- a/kernel/irq/pm.c +++ b/kernel/irq/pm.c @@ -9,10 +9,24 @@ #include #include #include +#include #include #include "internals.h" +bool irq_pm_check_wakeup(struct irq_desc *desc) +{ + if (irqd_is_wakeup_armed(&desc->irq_data)) { + irqd_clear(&desc->irq_data, IRQD_WAKEUP_ARMED); + desc->istate |= IRQS_SUSPENDED | IRQS_PENDING; + desc->depth++; + irq_disable(desc); + pm_system_wakeup(); + return true; + } + return false; +} + /* * Called from __setup_irq() with desc->lock held after @action has * been installed in the action chain. @@ -54,8 +68,16 @@ static bool suspend_device_irq(struct irq_desc *desc, int irq) if (!desc->action || desc->no_suspend_depth) return false; - if (irqd_is_wakeup_set(&desc->irq_data)) + if (irqd_is_wakeup_set(&desc->irq_data)) { irqd_set(&desc->irq_data, IRQD_WAKEUP_ARMED); + /* + * We return true here to force the caller to issue + * synchronize_irq(). We need to make sure that the + * IRQD_WAKEUP_ARMED is visible before we return from + * suspend_device_irqs(). + */ + return true; + } desc->istate |= IRQS_SUSPENDED; __disable_irq(desc, irq); @@ -79,9 +101,13 @@ static bool suspend_device_irq(struct irq_desc *desc, int irq) * for this purpose. * * So we disable all interrupts and mark them IRQS_SUSPENDED except - * for those which are unused and those which are marked as not + * for those which are unused, those which are marked as not * suspendable via an interrupt request with the flag IRQF_NO_SUSPEND - * set. + * set and those which are marked as active wakeup sources. + * + * The active wakeup sources are handled by the flow handler entry + * code which checks for the IRQD_WAKEUP_ARMED flag, suspends the + * interrupt and notifies the pm core about the wakeup. */ void suspend_device_irqs(void) { @@ -173,26 +199,3 @@ void resume_device_irqs(void) resume_irqs(false); } EXPORT_SYMBOL_GPL(resume_device_irqs); - -/** - * check_wakeup_irqs - check if any wake-up interrupts are pending - */ -int check_wakeup_irqs(void) -{ - struct irq_desc *desc; - int irq; - - for_each_irq_desc(irq, desc) { - /* - * Only interrupts which are marked as wakeup source - * and have not been disabled before the suspend check - * can abort suspend. - */ - if (irqd_is_wakeup_set(&desc->irq_data)) { - if (desc->depth == 1 && desc->istate & IRQS_PENDING) - return -EBUSY; - } - } - - return 0; -} -- cgit v1.2.3 From 10b3ad8c21bb4b135768c30dd4c51a1c744da699 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 29 Aug 2014 21:07:24 -0700 Subject: net: Do txq_trans_update() in netdev_start_xmit() That way we don't have to audit every call site to make sure it is doing this properly. Signed-off-by: David S. Miller --- drivers/net/wan/dlci.c | 6 ++++-- include/linux/netdevice.h | 10 ++++++++-- net/core/dev.c | 7 ++----- net/core/netpoll.c | 4 +--- net/core/pktgen.c | 3 +-- net/packet/af_packet.c | 7 ++----- net/sched/sch_teql.c | 3 +-- 7 files changed, 19 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/wan/dlci.c b/drivers/net/wan/dlci.c index 81b22a180aad..6427e8283419 100644 --- a/drivers/net/wan/dlci.c +++ b/drivers/net/wan/dlci.c @@ -192,8 +192,10 @@ static netdev_tx_t dlci_transmit(struct sk_buff *skb, struct net_device *dev) { struct dlci_local *dlp = netdev_priv(dev); - if (skb) - netdev_start_xmit(skb, dlp->slave); + if (skb) { + struct netdev_queue *txq = skb_get_tx_queue(dev, skb); + netdev_start_xmit(skb, dlp->slave, txq); + } return NETDEV_TX_OK; } diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 456eb1fe51e8..16171802ea7d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3437,11 +3437,17 @@ static inline netdev_tx_t __netdev_start_xmit(const struct net_device_ops *ops, return ops->ndo_start_xmit(skb, dev); } -static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_device *dev) +static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_device *dev, + struct netdev_queue *txq) { const struct net_device_ops *ops = dev->netdev_ops; + int rc; - return __netdev_start_xmit(ops, skb, dev); + rc = __netdev_start_xmit(ops, skb, dev); + if (rc == NETDEV_TX_OK) + txq_trans_update(txq); + + return rc; } int netdev_class_create_file_ns(struct class_attribute *class_attr, diff --git a/net/core/dev.c b/net/core/dev.c index a6077ef56345..6392adaaa22f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2666,10 +2666,8 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, skb_len = skb->len; trace_net_dev_start_xmit(skb, dev); - rc = netdev_start_xmit(skb, dev); + rc = netdev_start_xmit(skb, dev, txq); trace_net_dev_xmit(skb, rc, dev, skb_len); - if (rc == NETDEV_TX_OK) - txq_trans_update(txq); return rc; } @@ -2685,7 +2683,7 @@ gso: skb_len = nskb->len; trace_net_dev_start_xmit(nskb, dev); - rc = netdev_start_xmit(nskb, dev); + rc = netdev_start_xmit(nskb, dev, txq); trace_net_dev_xmit(nskb, rc, dev, skb_len); if (unlikely(rc != NETDEV_TX_OK)) { if (rc & ~NETDEV_TX_MASK) @@ -2694,7 +2692,6 @@ gso: skb->next = nskb; return rc; } - txq_trans_update(txq); if (unlikely(netif_xmit_stopped(txq) && skb->next)) return NETDEV_TX_BUSY; } while (skb->next); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 12b1df976562..05bc57edaa81 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -91,9 +91,7 @@ static int netpoll_start_xmit(struct sk_buff *skb, struct net_device *dev, skb->vlan_tci = 0; } - status = netdev_start_xmit(skb, dev); - if (status == NETDEV_TX_OK) - txq_trans_update(txq); + status = netdev_start_xmit(skb, dev, txq); out: return status; diff --git a/net/core/pktgen.c b/net/core/pktgen.c index d81b540096c3..34bd2ff9f121 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3335,11 +3335,10 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) goto unlock; } atomic_inc(&(pkt_dev->skb->users)); - ret = netdev_start_xmit(pkt_dev->skb, odev); + ret = netdev_start_xmit(pkt_dev->skb, odev, txq); switch (ret) { case NETDEV_TX_OK: - txq_trans_update(txq); pkt_dev->last_ok = 1; pkt_dev->sofar++; pkt_dev->seq_num++; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index b7a7f5a721bd..fe305a05a8fc 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -258,11 +258,8 @@ static int packet_direct_xmit(struct sk_buff *skb) local_bh_disable(); HARD_TX_LOCK(dev, txq, smp_processor_id()); - if (!netif_xmit_frozen_or_drv_stopped(txq)) { - ret = netdev_start_xmit(skb, dev); - if (ret == NETDEV_TX_OK) - txq_trans_update(txq); - } + if (!netif_xmit_frozen_or_drv_stopped(txq)) + ret = netdev_start_xmit(skb, dev, txq); HARD_TX_UNLOCK(dev, txq); local_bh_enable(); diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 64cd93ca8104..193dc2cba1ec 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -316,8 +316,7 @@ restart: unsigned int length = qdisc_pkt_len(skb); if (!netif_xmit_frozen_or_stopped(slave_txq) && - netdev_start_xmit(skb, slave) == NETDEV_TX_OK) { - txq_trans_update(slave_txq); + netdev_start_xmit(skb, slave, slave_txq) == NETDEV_TX_OK) { __netif_tx_unlock(slave_txq); master->slaves = NEXT_SLAVE(q); netif_wake_queue(dev); -- cgit v1.2.3 From fa2dbdc253c2aee2a760c64de454cb62469ec11d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 29 Aug 2014 21:55:22 -0700 Subject: net: Pass a "more" indication down into netdev_start_xmit() code paths. For now it will always be false. Signed-off-by: David S. Miller --- drivers/net/wan/dlci.c | 2 +- include/linux/netdevice.h | 9 +++++---- net/atm/mpc.c | 2 +- net/core/dev.c | 2 +- net/core/netpoll.c | 2 +- net/core/pktgen.c | 2 +- net/packet/af_packet.c | 2 +- net/sched/sch_teql.c | 3 ++- 8 files changed, 13 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/wan/dlci.c b/drivers/net/wan/dlci.c index 6427e8283419..ae6ecf401189 100644 --- a/drivers/net/wan/dlci.c +++ b/drivers/net/wan/dlci.c @@ -194,7 +194,7 @@ static netdev_tx_t dlci_transmit(struct sk_buff *skb, struct net_device *dev) if (skb) { struct netdev_queue *txq = skb_get_tx_queue(dev, skb); - netdev_start_xmit(skb, dlp->slave, txq); + netdev_start_xmit(skb, dlp->slave, txq, false); } return NETDEV_TX_OK; } diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 16171802ea7d..5050218c5b7f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3431,19 +3431,20 @@ int __init dev_proc_init(void); #endif static inline netdev_tx_t __netdev_start_xmit(const struct net_device_ops *ops, - struct sk_buff *skb, struct net_device *dev) + struct sk_buff *skb, struct net_device *dev, + bool more) { - skb->xmit_more = 0; + skb->xmit_more = more ? 1 : 0; return ops->ndo_start_xmit(skb, dev); } static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_device *dev, - struct netdev_queue *txq) + struct netdev_queue *txq, bool more) { const struct net_device_ops *ops = dev->netdev_ops; int rc; - rc = __netdev_start_xmit(ops, skb, dev); + rc = __netdev_start_xmit(ops, skb, dev, more); if (rc == NETDEV_TX_OK) txq_trans_update(txq); diff --git a/net/atm/mpc.c b/net/atm/mpc.c index d662da161e5a..0e982222d425 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c @@ -599,7 +599,7 @@ static netdev_tx_t mpc_send_packet(struct sk_buff *skb, } non_ip: - return __netdev_start_xmit(mpc->old_ops, skb, dev); + return __netdev_start_xmit(mpc->old_ops, skb, dev, false); } static int atm_mpoa_vcc_attach(struct atm_vcc *vcc, void __user *arg) diff --git a/net/core/dev.c b/net/core/dev.c index ab7bb809711e..f0ed5a611a97 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2610,7 +2610,7 @@ static int xmit_one(struct sk_buff *skb, struct net_device *dev, len = skb->len; trace_net_dev_start_xmit(skb, dev); - rc = netdev_start_xmit(skb, dev, txq); + rc = netdev_start_xmit(skb, dev, txq, false); trace_net_dev_xmit(skb, rc, dev, len); return rc; diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 05bc57edaa81..e6645b4f330a 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -91,7 +91,7 @@ static int netpoll_start_xmit(struct sk_buff *skb, struct net_device *dev, skb->vlan_tci = 0; } - status = netdev_start_xmit(skb, dev, txq); + status = netdev_start_xmit(skb, dev, txq, false); out: return status; diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 34bd2ff9f121..5b36a9428c59 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3335,7 +3335,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) goto unlock; } atomic_inc(&(pkt_dev->skb->users)); - ret = netdev_start_xmit(pkt_dev->skb, odev, txq); + ret = netdev_start_xmit(pkt_dev->skb, odev, txq, false); switch (ret) { case NETDEV_TX_OK: diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index fe305a05a8fc..87d20f48ff06 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -259,7 +259,7 @@ static int packet_direct_xmit(struct sk_buff *skb) HARD_TX_LOCK(dev, txq, smp_processor_id()); if (!netif_xmit_frozen_or_drv_stopped(txq)) - ret = netdev_start_xmit(skb, dev, txq); + ret = netdev_start_xmit(skb, dev, txq, false); HARD_TX_UNLOCK(dev, txq); local_bh_enable(); diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 193dc2cba1ec..aaa8d03ed054 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -316,7 +316,8 @@ restart: unsigned int length = qdisc_pkt_len(skb); if (!netif_xmit_frozen_or_stopped(slave_txq) && - netdev_start_xmit(skb, slave, slave_txq) == NETDEV_TX_OK) { + netdev_start_xmit(skb, slave, slave_txq, false) == + NETDEV_TX_OK) { __netif_tx_unlock(slave_txq); master->slaves = NEXT_SLAVE(q); netif_wake_queue(dev); -- cgit v1.2.3 From 50cbe9ab5f8d92d2d4a327b56e96559d8f63a1fa Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 30 Aug 2014 19:13:51 -0700 Subject: net: Validate xmit SKBs right when we pull them out of the qdisc. Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + net/core/dev.c | 6 +----- net/sched/sch_generic.c | 5 ++++- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5050218c5b7f..47c49ba2dcf4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2827,6 +2827,7 @@ int dev_set_mac_address(struct net_device *, struct sockaddr *); int dev_change_carrier(struct net_device *, bool new_carrier); int dev_get_phys_port_id(struct net_device *dev, struct netdev_phys_port_id *ppid); +struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev); int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq); int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb); diff --git a/net/core/dev.c b/net/core/dev.c index 704a5434f77d..75bc5b068a13 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2656,7 +2656,7 @@ struct sk_buff *validate_xmit_vlan(struct sk_buff *skb, netdev_features_t featur return skb; } -static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev) +struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev) { netdev_features_t features; @@ -2719,10 +2719,6 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, { int rc = NETDEV_TX_OK; - skb = validate_xmit_skb(skb, dev); - if (!skb) - return rc; - if (likely(!skb->next)) return xmit_one(skb, dev, txq, false); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 05b3f5d104af..f178798a5836 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -70,8 +70,11 @@ static inline struct sk_buff *dequeue_skb(struct Qdisc *q) } else skb = NULL; } else { - if (!(q->flags & TCQ_F_ONETXQUEUE) || !netif_xmit_frozen_or_stopped(txq)) + if (!(q->flags & TCQ_F_ONETXQUEUE) || !netif_xmit_frozen_or_stopped(txq)) { skb = q->dequeue(q); + if (skb) + skb = validate_xmit_skb(skb, qdisc_dev(q)); + } } return skb; -- cgit v1.2.3 From ce93718fb7cdbc064c3000ff59e4d3200bdfa744 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 30 Aug 2014 19:22:20 -0700 Subject: net: Don't keep around original SKB when we software segment GSO frames. Just maintain the list properly by returning the head of the remaining SKB list from dev_hard_start_xmit(). Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 +-- net/core/dev.c | 79 +++++++++-------------------------------------- net/sched/sch_generic.c | 2 +- 3 files changed, 17 insertions(+), 68 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 47c49ba2dcf4..202c25a9aadf 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2828,8 +2828,8 @@ int dev_change_carrier(struct net_device *, bool new_carrier); int dev_get_phys_port_id(struct net_device *dev, struct netdev_phys_port_id *ppid); struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev); -int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, - struct netdev_queue *txq); +struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, + struct netdev_queue *txq, int *ret); int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb); int dev_forward_skb(struct net_device *dev, struct sk_buff *skb); bool is_skb_forwardable(struct net_device *dev, struct sk_buff *skb); diff --git a/net/core/dev.c b/net/core/dev.c index 75bc5b068a13..c89da4f306b1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2485,52 +2485,6 @@ static int illegal_highdma(struct net_device *dev, struct sk_buff *skb) return 0; } -struct dev_gso_cb { - void (*destructor)(struct sk_buff *skb); -}; - -#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb) - -static void dev_gso_skb_destructor(struct sk_buff *skb) -{ - struct dev_gso_cb *cb; - - kfree_skb_list(skb->next); - skb->next = NULL; - - cb = DEV_GSO_CB(skb); - if (cb->destructor) - cb->destructor(skb); -} - -/** - * dev_gso_segment - Perform emulated hardware segmentation on skb. - * @skb: buffer to segment - * @features: device features as applicable to this skb - * - * This function segments the given skb and stores the list of segments - * in skb->next. - */ -static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features) -{ - struct sk_buff *segs; - - segs = skb_gso_segment(skb, features); - - /* Verifying header integrity only. */ - if (!segs) - return 0; - - if (IS_ERR(segs)) - return PTR_ERR(segs); - - skb->next = segs; - DEV_GSO_CB(skb)->destructor = skb->destructor; - skb->destructor = dev_gso_skb_destructor; - - return 0; -} - /* If MPLS offload request, verify we are testing hardware MPLS features * instead of standard features for the netdev. */ @@ -2682,8 +2636,13 @@ struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev) features &= dev->hw_enc_features; if (netif_needs_gso(skb, features)) { - if (unlikely(dev_gso_segment(skb, features))) - goto out_kfree_skb; + struct sk_buff *segs; + + segs = skb_gso_segment(skb, features); + kfree_skb(skb); + if (IS_ERR(segs)) + segs = NULL; + skb = segs; } else { if (skb_needs_linearize(skb, features) && __skb_linearize(skb)) @@ -2714,26 +2673,16 @@ out_null: return NULL; } -int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, - struct netdev_queue *txq) +struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, + struct netdev_queue *txq, int *ret) { - int rc = NETDEV_TX_OK; - - if (likely(!skb->next)) - return xmit_one(skb, dev, txq, false); - - skb->next = xmit_list(skb->next, dev, txq, &rc); - if (likely(skb->next == NULL)) { - skb->destructor = DEV_GSO_CB(skb)->destructor; - consume_skb(skb); - return rc; + if (likely(!skb->next)) { + *ret = xmit_one(skb, dev, txq, false); + return skb; } - kfree_skb(skb); - - return rc; + return xmit_list(skb, dev, txq, ret); } -EXPORT_SYMBOL_GPL(dev_hard_start_xmit); static void qdisc_pkt_len_init(struct sk_buff *skb) { @@ -2945,7 +2894,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) if (!netif_xmit_stopped(txq)) { __this_cpu_inc(xmit_recursion); - rc = dev_hard_start_xmit(skb, dev, txq); + skb = dev_hard_start_xmit(skb, dev, txq, &rc); __this_cpu_dec(xmit_recursion); if (dev_xmit_complete(rc)) { HARD_TX_UNLOCK(dev, txq); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index f178798a5836..a8bf9f9928bd 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -129,7 +129,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, HARD_TX_LOCK(dev, txq, smp_processor_id()); if (!netif_xmit_frozen_or_stopped(txq)) - ret = dev_hard_start_xmit(skb, dev, txq); + skb = dev_hard_start_xmit(skb, dev, txq, &ret); HARD_TX_UNLOCK(dev, txq); -- cgit v1.2.3 From 5a21232983aa7acfe7fd26170832a9e0a4a7b4ae Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Sun, 31 Aug 2014 15:12:41 -0700 Subject: net: Support for csum_bad in skbuff This flag indicates that an invalid checksum was detected in the packet. __skb_mark_checksum_bad helper function was added to set this. Checksums can be marked bad from a driver or the GRO path (the latter is implemented in this patch). csum_bad is checked in __skb_checksum_validate_complete (i.e. calling that when ip_summed == CHECKSUM_NONE). csum_bad works in conjunction with ip_summed value. In the case that ip_summed is CHECKSUM_NONE and csum_bad is set, this implies that the first (or next) checksum encountered in the packet is bad. When ip_summed is CHECKSUM_UNNECESSARY, the first checksum after the last one validated is bad. For example, if ip_summed == CHECKSUM_UNNECESSARY, csum_level == 1, and csum_bad is set-- then the third checksum in the packet is bad. In the normal path, the packet will be dropped when processing the protocol layer of the bad checksum: __skb_decr_checksum_unnecessary called twice for the good checksums changing ip_summed to CHECKSUM_NONE so that __skb_checksum_validate_complete is called to validate the third checksum and that will fail since csum_bad is set. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 +++- include/linux/skbuff.h | 21 ++++++++++++++++++++- net/core/dev.c | 2 +- 3 files changed, 24 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 202c25a9aadf..a0ab6d9d400a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2216,7 +2216,9 @@ static inline void skb_gro_incr_csum_unnecessary(struct sk_buff *skb) if (__skb_gro_checksum_validate_needed(skb, zero_okay, check)) \ __ret = __skb_gro_checksum_validate_complete(skb, \ compute_pseudo(skb, proto)); \ - if (!__ret) \ + if (__ret) \ + __skb_mark_checksum_bad(skb); \ + else \ skb_gro_incr_csum_unnecessary(skb); \ __ret; \ }) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c93b5859a772..23710a243439 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -617,7 +617,8 @@ struct sk_buff { kmemcheck_bitfield_begin(flags3); __u8 csum_level:2; - /* 14 bit hole */ + __u8 csum_bad:1; + /* 13 bit hole */ kmemcheck_bitfield_end(flags3); __be16 inner_protocol; @@ -2825,6 +2826,21 @@ static inline void __skb_incr_checksum_unnecessary(struct sk_buff *skb) } } +static inline void __skb_mark_checksum_bad(struct sk_buff *skb) +{ + /* Mark current checksum as bad (typically called from GRO + * path). In the case that ip_summed is CHECKSUM_NONE + * this must be the first checksum encountered in the packet. + * When ip_summed is CHECKSUM_UNNECESSARY, this is the first + * checksum after the last one validated. For UDP, a zero + * checksum can not be marked as bad. + */ + + if (skb->ip_summed == CHECKSUM_NONE || + skb->ip_summed == CHECKSUM_UNNECESSARY) + skb->csum_bad = 1; +} + /* Check if we need to perform checksum complete validation. * * Returns true if checksum complete is needed, false otherwise @@ -2866,6 +2882,9 @@ static inline __sum16 __skb_checksum_validate_complete(struct sk_buff *skb, skb->csum_valid = 1; return 0; } + } else if (skb->csum_bad) { + /* ip_summed == CHECKSUM_NONE in this case */ + return 1; } skb->csum = psum; diff --git a/net/core/dev.c b/net/core/dev.c index 6857d57aa294..3774afc3bebf 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3918,7 +3918,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff if (!(skb->dev->features & NETIF_F_GRO)) goto normal; - if (skb_is_gso(skb) || skb_has_frag_list(skb)) + if (skb_is_gso(skb) || skb_has_frag_list(skb) || skb->csum_bad) goto normal; gro_list_prepare(napi, skb); -- cgit v1.2.3 From d96535a17dbbafd567961d14c08c0984ddda9c3c Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Sun, 31 Aug 2014 15:12:42 -0700 Subject: net: Infrastructure for checksum unnecessary conversions For normal path, added skb_checksum_try_convert which is called to attempt to convert CHECKSUM_UNNECESSARY to CHECKSUM_COMPLETE. The primary condition to allow this is that ip_summed is CHECKSUM_NONE and csum_valid is true, which will be the state after consuming a CHECKSUM_UNNECESSARY. For GRO path, added skb_gro_checksum_try_convert which is the GRO analogue of skb_checksum_try_convert. The primary condition to allow this is that NAPI_GRO_CB(skb)->csum_cnt == 0 and NAPI_GRO_CB(skb)->csum_valid is set. This implies that we have consumed all available CHECKSUM_UNNECESSARY checksums in the GRO path. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/netdevice.h | 20 ++++++++++++++++++++ include/linux/skbuff.h | 20 ++++++++++++++++++++ 2 files changed, 40 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a0ab6d9d400a..5be20a7bbb0d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2233,6 +2233,26 @@ static inline void skb_gro_incr_csum_unnecessary(struct sk_buff *skb) #define skb_gro_checksum_simple_validate(skb) \ __skb_gro_checksum_validate(skb, 0, false, 0, null_compute_pseudo) +static inline bool __skb_gro_checksum_convert_check(struct sk_buff *skb) +{ + return (NAPI_GRO_CB(skb)->csum_cnt == 0 && + !NAPI_GRO_CB(skb)->csum_valid); +} + +static inline void __skb_gro_checksum_convert(struct sk_buff *skb, + __sum16 check, __wsum pseudo) +{ + NAPI_GRO_CB(skb)->csum = ~pseudo; + NAPI_GRO_CB(skb)->csum_valid = 1; +} + +#define skb_gro_checksum_try_convert(skb, proto, check, compute_pseudo) \ +do { \ + if (__skb_gro_checksum_convert_check(skb)) \ + __skb_gro_checksum_convert(skb, check, \ + compute_pseudo(skb, proto)); \ +} while (0) + static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, const void *saddr, diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 23710a243439..02529fcad1ac 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2942,6 +2942,26 @@ static inline __wsum null_compute_pseudo(struct sk_buff *skb, int proto) #define skb_checksum_simple_validate(skb) \ __skb_checksum_validate(skb, 0, true, false, 0, null_compute_pseudo) +static inline bool __skb_checksum_convert_check(struct sk_buff *skb) +{ + return (skb->ip_summed == CHECKSUM_NONE && + skb->csum_valid && !skb->csum_bad); +} + +static inline void __skb_checksum_convert(struct sk_buff *skb, + __sum16 check, __wsum pseudo) +{ + skb->csum = ~pseudo; + skb->ip_summed = CHECKSUM_COMPLETE; +} + +#define skb_checksum_try_convert(skb, proto, check, compute_pseudo) \ +do { \ + if (__skb_checksum_convert_check(skb)) \ + __skb_checksum_convert(skb, check, \ + compute_pseudo(skb, proto)); \ +} while (0) + #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) void nf_conntrack_destroy(struct nf_conntrack *nfct); static inline void nf_conntrack_put(struct nf_conntrack *nfct) -- cgit v1.2.3 From 2abb7cdc0dc84e99b76ef983a1ae1978922aa9b3 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Sun, 31 Aug 2014 15:12:43 -0700 Subject: udp: Add support for doing checksum unnecessary conversion Add support for doing CHECKSUM_UNNECESSARY to CHECKSUM_COMPLETE conversion in UDP tunneling path. In the normal UDP path, we call skb_checksum_try_convert after locating the UDP socket. The check is that checksum conversion is enabled for the socket (new flag in UDP socket) and that checksum field is non-zero. In the UDP GRO path, we call skb_gro_checksum_try_convert after checksum is validated and checksum field is non-zero. Since this is already in GRO we assume that checksum conversion is always wanted. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/udp.h | 16 +++++++++++++++- net/ipv4/udp.c | 4 ++++ net/ipv4/udp_offload.c | 25 +++++++++++++++++-------- net/ipv6/udp.c | 4 ++++ net/ipv6/udp_offload.c | 24 +++++++++++++++++------- 5 files changed, 57 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/udp.h b/include/linux/udp.h index 247cfdcc4b08..ee3277593222 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -49,7 +49,11 @@ struct udp_sock { unsigned int corkflag; /* Cork is required */ __u8 encap_type; /* Is this an Encapsulation socket? */ unsigned char no_check6_tx:1,/* Send zero UDP6 checksums on TX? */ - no_check6_rx:1;/* Allow zero UDP6 checksums on RX? */ + no_check6_rx:1,/* Allow zero UDP6 checksums on RX? */ + convert_csum:1;/* On receive, convert checksum + * unnecessary to checksum complete + * if possible. + */ /* * Following member retains the information to create a UDP header * when the socket is uncorked. @@ -98,6 +102,16 @@ static inline bool udp_get_no_check6_rx(struct sock *sk) return udp_sk(sk)->no_check6_rx; } +static inline void udp_set_convert_csum(struct sock *sk, bool val) +{ + udp_sk(sk)->convert_csum = val; +} + +static inline bool udp_get_convert_csum(struct sock *sk) +{ + return udp_sk(sk)->convert_csum; +} + #define udp_portaddr_for_each_entry(__sk, node, list) \ hlist_nulls_for_each_entry(__sk, node, list, __sk_common.skc_portaddr_node) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 3549c21fe5f7..0da3849fd35b 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1788,6 +1788,10 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (sk != NULL) { int ret; + if (udp_sk(sk)->convert_csum && uh->check && !IS_UDPLITE(sk)) + skb_checksum_try_convert(skb, IPPROTO_UDP, uh->check, + inet_compute_pseudo); + ret = udp_queue_rcv_skb(sk, skb); sock_put(sk); diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index a6adff98382a..84e0e05c9c0e 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -290,16 +290,25 @@ static struct sk_buff **udp4_gro_receive(struct sk_buff **head, { struct udphdr *uh = udp_gro_udphdr(skb); - /* Don't bother verifying checksum if we're going to flush anyway. */ - if (unlikely(!uh) || - (!NAPI_GRO_CB(skb)->flush && - skb_gro_checksum_validate_zero_check(skb, IPPROTO_UDP, uh->check, - inet_gro_compute_pseudo))) { - NAPI_GRO_CB(skb)->flush = 1; - return NULL; - } + if (unlikely(!uh)) + goto flush; + /* Don't bother verifying checksum if we're going to flush anyway. */ + if (!NAPI_GRO_CB(skb)->flush) + goto skip; + + if (skb_gro_checksum_validate_zero_check(skb, IPPROTO_UDP, uh->check, + inet_gro_compute_pseudo)) + goto flush; + else if (uh->check) + skb_gro_checksum_try_convert(skb, IPPROTO_UDP, uh->check, + inet_gro_compute_pseudo); +skip: return udp_gro_receive(head, skb, uh); + +flush: + NAPI_GRO_CB(skb)->flush = 1; + return NULL; } int udp_gro_complete(struct sk_buff *skb, int nhoff) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 12fcce8fba46..f6ba535b6feb 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -891,6 +891,10 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, goto csum_error; } + if (udp_sk(sk)->convert_csum && uh->check && !IS_UDPLITE(sk)) + skb_checksum_try_convert(skb, IPPROTO_UDP, uh->check, + ip6_compute_pseudo); + ret = udpv6_queue_rcv_skb(sk, skb); sock_put(sk); diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index b13e377e9c53..89cb9a9b8537 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -134,16 +134,26 @@ static struct sk_buff **udp6_gro_receive(struct sk_buff **head, { struct udphdr *uh = udp_gro_udphdr(skb); + if (unlikely(!uh)) + goto flush; + /* Don't bother verifying checksum if we're going to flush anyway. */ - if (unlikely(!uh) || - (!NAPI_GRO_CB(skb)->flush && - skb_gro_checksum_validate_zero_check(skb, IPPROTO_UDP, uh->check, - ip6_gro_compute_pseudo))) { - NAPI_GRO_CB(skb)->flush = 1; - return NULL; - } + if (!NAPI_GRO_CB(skb)->flush) + goto skip; + if (skb_gro_checksum_validate_zero_check(skb, IPPROTO_UDP, uh->check, + ip6_gro_compute_pseudo)) + goto flush; + else if (uh->check) + skb_gro_checksum_try_convert(skb, IPPROTO_UDP, uh->check, + ip6_gro_compute_pseudo); + +skip: return udp_gro_receive(head, skb, uh); + +flush: + NAPI_GRO_CB(skb)->flush = 1; + return NULL; } int udp6_gro_complete(struct sk_buff *skb, int nhoff) -- cgit v1.2.3 From 0dbc8b7afef6e4fddcfebcbacbeb269a0a3b06d5 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 1 Sep 2014 15:15:40 +0200 Subject: gpio: move varargs hack outside #ifdef GPIOLIB commit 39b2bbe3d715cf5013b5c48695ccdd25bd3bf120 "gpio: add flags argument to gpiod_get*() functions" added a dynamic flags argument to all the GPIOD getter functions, however this did not cover the stubs so when people used gpiod stubs to compile out descriptor code, compilation failed. Solve this by: - Also rename all the stub functions __gpiod_* - Moving the vararg hack outside of #ifdef CONFIG_GPIOLIB so these will always be available. Reviewed-by: Alexandre Courbot Signed-off-by: Linus Walleij --- include/linux/gpio/consumer.h | 105 ++++++++++++++++++++++++------------------ 1 file changed, 60 insertions(+), 45 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index c7e17de732f3..12f146fa6604 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -38,60 +38,32 @@ enum gpiod_flags { struct gpio_desc *__must_check __gpiod_get(struct device *dev, const char *con_id, enum gpiod_flags flags); -#define __gpiod_get(dev, con_id, flags, ...) __gpiod_get(dev, con_id, flags) -#define gpiod_get(varargs...) __gpiod_get(varargs, 0) struct gpio_desc *__must_check __gpiod_get_index(struct device *dev, const char *con_id, unsigned int idx, enum gpiod_flags flags); -#define __gpiod_get_index(dev, con_id, index, flags, ...) \ - __gpiod_get_index(dev, con_id, index, flags) -#define gpiod_get_index(varargs...) __gpiod_get_index(varargs, 0) struct gpio_desc *__must_check __gpiod_get_optional(struct device *dev, const char *con_id, enum gpiod_flags flags); -#define __gpiod_get_optional(dev, con_id, flags, ...) \ - __gpiod_get_optional(dev, con_id, flags) -#define gpiod_get_optional(varargs...) __gpiod_get_optional(varargs, 0) struct gpio_desc *__must_check __gpiod_get_index_optional(struct device *dev, const char *con_id, unsigned int index, enum gpiod_flags flags); -#define __gpiod_get_index_optional(dev, con_id, index, flags, ...) \ - __gpiod_get_index_optional(dev, con_id, index, flags) -#define gpiod_get_index_optional(varargs...) \ - __gpiod_get_index_optional(varargs, 0) - void gpiod_put(struct gpio_desc *desc); struct gpio_desc *__must_check __devm_gpiod_get(struct device *dev, const char *con_id, enum gpiod_flags flags); -#define __devm_gpiod_get(dev, con_id, flags, ...) \ - __devm_gpiod_get(dev, con_id, flags) -#define devm_gpiod_get(varargs...) __devm_gpiod_get(varargs, 0) struct gpio_desc *__must_check __devm_gpiod_get_index(struct device *dev, const char *con_id, unsigned int idx, enum gpiod_flags flags); -#define __devm_gpiod_get_index(dev, con_id, index, flags, ...) \ - __devm_gpiod_get_index(dev, con_id, index, flags) -#define devm_gpiod_get_index(varargs...) __devm_gpiod_get_index(varargs, 0) struct gpio_desc *__must_check __devm_gpiod_get_optional(struct device *dev, const char *con_id, enum gpiod_flags flags); -#define __devm_gpiod_get_optional(dev, con_id, flags, ...) \ - __devm_gpiod_get_optional(dev, con_id, flags) -#define devm_gpiod_get_optional(varargs...) \ - __devm_gpiod_get_optional(varargs, 0) struct gpio_desc *__must_check __devm_gpiod_get_index_optional(struct device *dev, const char *con_id, unsigned int index, enum gpiod_flags flags); -#define __devm_gpiod_get_index_optional(dev, con_id, index, flags, ...) \ - __devm_gpiod_get_index_optional(dev, con_id, index, flags) -#define devm_gpiod_get_index_optional(varargs...) \ - __devm_gpiod_get_index_optional(varargs, 0) - void devm_gpiod_put(struct device *dev, struct gpio_desc *desc); int gpiod_get_direction(const struct gpio_desc *desc); @@ -124,27 +96,31 @@ int desc_to_gpio(const struct gpio_desc *desc); #else /* CONFIG_GPIOLIB */ -static inline struct gpio_desc *__must_check gpiod_get(struct device *dev, - const char *con_id) +static inline struct gpio_desc *__must_check __gpiod_get(struct device *dev, + const char *con_id, + enum gpiod_flags flags) { return ERR_PTR(-ENOSYS); } -static inline struct gpio_desc *__must_check gpiod_get_index(struct device *dev, - const char *con_id, - unsigned int idx) +static inline struct gpio_desc *__must_check +__gpiod_get_index(struct device *dev, + const char *con_id, + unsigned int idx, + enum gpiod_flags flags) { return ERR_PTR(-ENOSYS); } static inline struct gpio_desc *__must_check -gpiod_get_optional(struct device *dev, const char *con_id) +__gpiod_get_optional(struct device *dev, const char *con_id, + enum gpiod_flags flags) { return ERR_PTR(-ENOSYS); } static inline struct gpio_desc *__must_check -gpiod_get_index_optional(struct device *dev, const char *con_id, - unsigned int index) +__gpiod_get_index_optional(struct device *dev, const char *con_id, + unsigned int index, enum gpiod_flags flags) { return ERR_PTR(-ENOSYS); } @@ -157,28 +133,33 @@ static inline void gpiod_put(struct gpio_desc *desc) WARN_ON(1); } -static inline struct gpio_desc *__must_check devm_gpiod_get(struct device *dev, - const char *con_id) +static inline struct gpio_desc *__must_check +__devm_gpiod_get(struct device *dev, + const char *con_id, + enum gpiod_flags flags) { return ERR_PTR(-ENOSYS); } static inline -struct gpio_desc *__must_check devm_gpiod_get_index(struct device *dev, - const char *con_id, - unsigned int idx) +struct gpio_desc *__must_check +__devm_gpiod_get_index(struct device *dev, + const char *con_id, + unsigned int idx, + enum gpiod_flags flags) { return ERR_PTR(-ENOSYS); } static inline struct gpio_desc *__must_check -devm_gpiod_get_optional(struct device *dev, const char *con_id) +__devm_gpiod_get_optional(struct device *dev, const char *con_id, + enum gpiod_flags flags) { return ERR_PTR(-ENOSYS); } static inline struct gpio_desc *__must_check -devm_gpiod_get_index_optional(struct device *dev, const char *con_id, - unsigned int index) +__devm_gpiod_get_index_optional(struct device *dev, const char *con_id, + unsigned int index, enum gpiod_flags flags) { return ERR_PTR(-ENOSYS); } @@ -303,9 +284,43 @@ static inline int desc_to_gpio(const struct gpio_desc *desc) return -EINVAL; } - #endif /* CONFIG_GPIOLIB */ +/* + * Vararg-hacks! This is done to transition the kernel to always pass + * the options flags argument to the below functions. During a transition + * phase these vararg macros make both old-and-newstyle code compile, + * but when all calls to the elder API are removed, these should go away + * and the __gpiod_get() etc functions above be renamed just gpiod_get() + * etc. + */ +#define __gpiod_get(dev, con_id, flags, ...) __gpiod_get(dev, con_id, flags) +#define gpiod_get(varargs...) __gpiod_get(varargs, 0) +#define __gpiod_get_index(dev, con_id, index, flags, ...) \ + __gpiod_get_index(dev, con_id, index, flags) +#define gpiod_get_index(varargs...) __gpiod_get_index(varargs, 0) +#define __gpiod_get_optional(dev, con_id, flags, ...) \ + __gpiod_get_optional(dev, con_id, flags) +#define gpiod_get_optional(varargs...) __gpiod_get_optional(varargs, 0) +#define __gpiod_get_index_optional(dev, con_id, index, flags, ...) \ + __gpiod_get_index_optional(dev, con_id, index, flags) +#define gpiod_get_index_optional(varargs...) \ + __gpiod_get_index_optional(varargs, 0) +#define __devm_gpiod_get(dev, con_id, flags, ...) \ + __devm_gpiod_get(dev, con_id, flags) +#define devm_gpiod_get(varargs...) __devm_gpiod_get(varargs, 0) +#define __devm_gpiod_get_index(dev, con_id, index, flags, ...) \ + __devm_gpiod_get_index(dev, con_id, index, flags) +#define devm_gpiod_get_index(varargs...) __devm_gpiod_get_index(varargs, 0) +#define __devm_gpiod_get_optional(dev, con_id, flags, ...) \ + __devm_gpiod_get_optional(dev, con_id, flags) +#define devm_gpiod_get_optional(varargs...) \ + __devm_gpiod_get_optional(varargs, 0) +#define __devm_gpiod_get_index_optional(dev, con_id, index, flags, ...) \ + __devm_gpiod_get_index_optional(dev, con_id, index, flags) +#define devm_gpiod_get_index_optional(varargs...) \ + __devm_gpiod_get_index_optional(varargs, 0) + #if IS_ENABLED(CONFIG_GPIOLIB) && IS_ENABLED(CONFIG_GPIO_SYSFS) int gpiod_export(struct gpio_desc *desc, bool direction_may_change); -- cgit v1.2.3 From fbff66108352d19b5cffa7dce26d7638c9dd4d70 Mon Sep 17 00:00:00 2001 From: Mark Rustad Date: Thu, 28 Aug 2014 04:43:09 -0700 Subject: security: Silence shadow warning Renaming an unused formal parameter in the static inline function security_inode_init_security eliminates many W=2 warnings. Signed-off-by: Mark Rustad Signed-off-by: Jeff Kirsher Signed-off-by: James Morris --- include/linux/security.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index 623f90e5f38d..3b3aeb1b74cb 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -2108,7 +2108,7 @@ static inline int security_dentry_init_security(struct dentry *dentry, static inline int security_inode_init_security(struct inode *inode, struct inode *dir, const struct qstr *qstr, - const initxattrs initxattrs, + const initxattrs xattrs, void *fs_data) { return 0; -- cgit v1.2.3 From ea2fdf842365066c82ab941086c6a1741ced4f2a Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Thu, 28 Aug 2014 13:58:53 +0200 Subject: usb: phy: samsung: remove old common USB PHY code drivers/usb/phy/phy-samsung-usb[2,3] drivers got replaced by drivers/phy/phy-samsung-usb[2,3] ones and the old common Samsung USB PHY code is no longer used. Signed-off-by: Bartlomiej Zolnierkiewicz Acked-by: Kyungmin Park Reviewed-by: Vivek Gautam Reviewed-by: Jingoo Han Acked-by: Kishon Vijay Abraham I Cc: Kamil Debski Signed-off-by: Felipe Balbi --- drivers/usb/phy/phy-samsung-usb.c | 241 -------------------- drivers/usb/phy/phy-samsung-usb.h | 317 --------------------------- include/linux/platform_data/samsung-usbphy.h | 27 --- 3 files changed, 585 deletions(-) delete mode 100644 drivers/usb/phy/phy-samsung-usb.c delete mode 100644 drivers/usb/phy/phy-samsung-usb.h delete mode 100644 include/linux/platform_data/samsung-usbphy.h (limited to 'include/linux') diff --git a/drivers/usb/phy/phy-samsung-usb.c b/drivers/usb/phy/phy-samsung-usb.c deleted file mode 100644 index ac025ca08425..000000000000 --- a/drivers/usb/phy/phy-samsung-usb.c +++ /dev/null @@ -1,241 +0,0 @@ -/* linux/drivers/usb/phy/phy-samsung-usb.c - * - * Copyright (c) 2012 Samsung Electronics Co., Ltd. - * http://www.samsung.com - * - * Author: Praveen Paneri - * - * Samsung USB-PHY helper driver with common function calls; - * interacts with Samsung USB 2.0 PHY controller driver and later - * with Samsung USB 3.0 PHY driver. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "phy-samsung-usb.h" - -int samsung_usbphy_parse_dt(struct samsung_usbphy *sphy) -{ - struct device_node *usbphy_sys; - - /* Getting node for system controller interface for usb-phy */ - usbphy_sys = of_get_child_by_name(sphy->dev->of_node, "usbphy-sys"); - if (!usbphy_sys) { - dev_err(sphy->dev, "No sys-controller interface for usb-phy\n"); - return -ENODEV; - } - - sphy->pmuregs = of_iomap(usbphy_sys, 0); - - if (sphy->pmuregs == NULL) { - dev_err(sphy->dev, "Can't get usb-phy pmu control register\n"); - goto err0; - } - - sphy->sysreg = of_iomap(usbphy_sys, 1); - - /* - * Not returning error code here, since this situation is not fatal. - * Few SoCs may not have this switch available - */ - if (sphy->sysreg == NULL) - dev_warn(sphy->dev, "Can't get usb-phy sysreg cfg register\n"); - - of_node_put(usbphy_sys); - - return 0; - -err0: - of_node_put(usbphy_sys); - return -ENXIO; -} -EXPORT_SYMBOL_GPL(samsung_usbphy_parse_dt); - -/* - * Set isolation here for phy. - * Here 'on = true' would mean USB PHY block is isolated, hence - * de-activated and vice-versa. - */ -void samsung_usbphy_set_isolation_4210(struct samsung_usbphy *sphy, bool on) -{ - void __iomem *reg = NULL; - u32 reg_val; - u32 en_mask = 0; - - if (!sphy->pmuregs) { - dev_warn(sphy->dev, "Can't set pmu isolation\n"); - return; - } - - if (sphy->phy_type == USB_PHY_TYPE_DEVICE) { - reg = sphy->pmuregs + sphy->drv_data->devphy_reg_offset; - en_mask = sphy->drv_data->devphy_en_mask; - } else if (sphy->phy_type == USB_PHY_TYPE_HOST) { - reg = sphy->pmuregs + sphy->drv_data->hostphy_reg_offset; - en_mask = sphy->drv_data->hostphy_en_mask; - } - - reg_val = readl(reg); - - if (on) - reg_val &= ~en_mask; - else - reg_val |= en_mask; - - writel(reg_val, reg); - - if (sphy->drv_data->cpu_type == TYPE_EXYNOS4X12) { - writel(reg_val, sphy->pmuregs + EXYNOS4X12_PHY_HSIC_CTRL0); - writel(reg_val, sphy->pmuregs + EXYNOS4X12_PHY_HSIC_CTRL1); - } -} -EXPORT_SYMBOL_GPL(samsung_usbphy_set_isolation_4210); - -/* - * Configure the mode of working of usb-phy here: HOST/DEVICE. - */ -void samsung_usbphy_cfg_sel(struct samsung_usbphy *sphy) -{ - u32 reg; - - if (!sphy->sysreg) { - dev_warn(sphy->dev, "Can't configure specified phy mode\n"); - return; - } - - reg = readl(sphy->sysreg); - - if (sphy->phy_type == USB_PHY_TYPE_DEVICE) - reg &= ~EXYNOS_USB20PHY_CFG_HOST_LINK; - else if (sphy->phy_type == USB_PHY_TYPE_HOST) - reg |= EXYNOS_USB20PHY_CFG_HOST_LINK; - - writel(reg, sphy->sysreg); -} -EXPORT_SYMBOL_GPL(samsung_usbphy_cfg_sel); - -/* - * PHYs are different for USB Device and USB Host. - * This make sure that correct PHY type is selected before - * any operation on PHY. - */ -int samsung_usbphy_set_type(struct usb_phy *phy, - enum samsung_usb_phy_type phy_type) -{ - struct samsung_usbphy *sphy = phy_to_sphy(phy); - - sphy->phy_type = phy_type; - - return 0; -} -EXPORT_SYMBOL_GPL(samsung_usbphy_set_type); - -int samsung_usbphy_rate_to_clksel_64xx(struct samsung_usbphy *sphy, - unsigned long rate) -{ - unsigned int clksel; - - switch (rate) { - case 12 * MHZ: - clksel = PHYCLK_CLKSEL_12M; - break; - case 24 * MHZ: - clksel = PHYCLK_CLKSEL_24M; - break; - case 48 * MHZ: - clksel = PHYCLK_CLKSEL_48M; - break; - default: - dev_err(sphy->dev, - "Invalid reference clock frequency: %lu\n", rate); - return -EINVAL; - } - - return clksel; -} -EXPORT_SYMBOL_GPL(samsung_usbphy_rate_to_clksel_64xx); - -int samsung_usbphy_rate_to_clksel_4x12(struct samsung_usbphy *sphy, - unsigned long rate) -{ - unsigned int clksel; - - switch (rate) { - case 9600 * KHZ: - clksel = FSEL_CLKSEL_9600K; - break; - case 10 * MHZ: - clksel = FSEL_CLKSEL_10M; - break; - case 12 * MHZ: - clksel = FSEL_CLKSEL_12M; - break; - case 19200 * KHZ: - clksel = FSEL_CLKSEL_19200K; - break; - case 20 * MHZ: - clksel = FSEL_CLKSEL_20M; - break; - case 24 * MHZ: - clksel = FSEL_CLKSEL_24M; - break; - case 50 * MHZ: - clksel = FSEL_CLKSEL_50M; - break; - default: - dev_err(sphy->dev, - "Invalid reference clock frequency: %lu\n", rate); - return -EINVAL; - } - - return clksel; -} -EXPORT_SYMBOL_GPL(samsung_usbphy_rate_to_clksel_4x12); - -/* - * Returns reference clock frequency selection value - */ -int samsung_usbphy_get_refclk_freq(struct samsung_usbphy *sphy) -{ - struct clk *ref_clk; - unsigned long rate; - int refclk_freq; - - /* - * In exynos5250 USB host and device PHY use - * external crystal clock XXTI - */ - if (sphy->drv_data->cpu_type == TYPE_EXYNOS5250) - ref_clk = clk_get(sphy->dev, "ext_xtal"); - else - ref_clk = clk_get(sphy->dev, "xusbxti"); - if (IS_ERR(ref_clk)) { - dev_err(sphy->dev, "Failed to get reference clock\n"); - return PTR_ERR(ref_clk); - } - - rate = clk_get_rate(ref_clk); - refclk_freq = sphy->drv_data->rate_to_clksel(sphy, rate); - - clk_put(ref_clk); - - return refclk_freq; -} -EXPORT_SYMBOL_GPL(samsung_usbphy_get_refclk_freq); diff --git a/drivers/usb/phy/phy-samsung-usb.h b/drivers/usb/phy/phy-samsung-usb.h deleted file mode 100644 index 4eef45555971..000000000000 --- a/drivers/usb/phy/phy-samsung-usb.h +++ /dev/null @@ -1,317 +0,0 @@ -/* linux/drivers/usb/phy/phy-samsung-usb.h - * - * Copyright (c) 2012 Samsung Electronics Co., Ltd. - * http://www.samsung.com - * - * Samsung USB-PHY transceiver; talks to S3C HS OTG controller, EHCI-S5P and - * OHCI-EXYNOS controllers. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#include - -/* Register definitions */ - -#define SAMSUNG_PHYPWR (0x00) -#define PHYPWR_NORMAL_MASK (0x19 << 0) -#define PHYPWR_OTG_DISABLE (0x1 << 4) -#define PHYPWR_ANALOG_POWERDOWN (0x1 << 3) -#define PHYPWR_FORCE_SUSPEND (0x1 << 1) -/* For Exynos4 */ -#define PHYPWR_NORMAL_MASK_PHY0 (0x39 << 0) -#define PHYPWR_SLEEP_PHY0 (0x1 << 5) - -#define SAMSUNG_PHYCLK (0x04) -#define PHYCLK_MODE_USB11 (0x1 << 6) -#define PHYCLK_EXT_OSC (0x1 << 5) -#define PHYCLK_COMMON_ON_N (0x1 << 4) -#define PHYCLK_ID_PULL (0x1 << 2) -#define PHYCLK_CLKSEL_MASK (0x3 << 0) -#define PHYCLK_CLKSEL_48M (0x0 << 0) -#define PHYCLK_CLKSEL_12M (0x2 << 0) -#define PHYCLK_CLKSEL_24M (0x3 << 0) - -#define SAMSUNG_RSTCON (0x08) -#define RSTCON_PHYLINK_SWRST (0x1 << 2) -#define RSTCON_HLINK_SWRST (0x1 << 1) -#define RSTCON_SWRST (0x1 << 0) - -/* EXYNOS4X12 */ -#define EXYNOS4X12_PHY_HSIC_CTRL0 (0x04) -#define EXYNOS4X12_PHY_HSIC_CTRL1 (0x08) -#define PHYPWR_NORMAL_MASK_HSIC1 (0x7 << 12) -#define PHYPWR_NORMAL_MASK_HSIC0 (0x7 << 9) -#define PHYPWR_NORMAL_MASK_PHY1 (0x7 << 6) -#define RSTCON_HOSTPHY_SWRST (0xf << 3) - -/* EXYNOS5 */ -#define EXYNOS5_PHY_HOST_CTRL0 (0x00) -#define HOST_CTRL0_PHYSWRSTALL (0x1 << 31) -#define HOST_CTRL0_REFCLKSEL_MASK (0x3 << 19) -#define HOST_CTRL0_REFCLKSEL_XTAL (0x0 << 19) -#define HOST_CTRL0_REFCLKSEL_EXTL (0x1 << 19) -#define HOST_CTRL0_REFCLKSEL_CLKCORE (0x2 << 19) -#define HOST_CTRL0_FSEL_MASK (0x7 << 16) -#define HOST_CTRL0_FSEL(_x) ((_x) << 16) -#define FSEL_CLKSEL_50M (0x7) -#define FSEL_CLKSEL_24M (0x5) -#define FSEL_CLKSEL_20M (0x4) -#define FSEL_CLKSEL_19200K (0x3) -#define FSEL_CLKSEL_12M (0x2) -#define FSEL_CLKSEL_10M (0x1) -#define FSEL_CLKSEL_9600K (0x0) -#define HOST_CTRL0_TESTBURNIN (0x1 << 11) -#define HOST_CTRL0_RETENABLE (0x1 << 10) -#define HOST_CTRL0_COMMONON_N (0x1 << 9) -#define HOST_CTRL0_SIDDQ (0x1 << 6) -#define HOST_CTRL0_FORCESLEEP (0x1 << 5) -#define HOST_CTRL0_FORCESUSPEND (0x1 << 4) -#define HOST_CTRL0_WORDINTERFACE (0x1 << 3) -#define HOST_CTRL0_UTMISWRST (0x1 << 2) -#define HOST_CTRL0_LINKSWRST (0x1 << 1) -#define HOST_CTRL0_PHYSWRST (0x1 << 0) - -#define EXYNOS5_PHY_HOST_TUNE0 (0x04) - -#define EXYNOS5_PHY_HSIC_CTRL1 (0x10) - -#define EXYNOS5_PHY_HSIC_TUNE1 (0x14) - -#define EXYNOS5_PHY_HSIC_CTRL2 (0x20) - -#define EXYNOS5_PHY_HSIC_TUNE2 (0x24) -#define HSIC_CTRL_REFCLKSEL_MASK (0x3 << 23) -#define HSIC_CTRL_REFCLKSEL (0x2 << 23) -#define HSIC_CTRL_REFCLKDIV_MASK (0x7f << 16) -#define HSIC_CTRL_REFCLKDIV(_x) ((_x) << 16) -#define HSIC_CTRL_REFCLKDIV_12 (0x24 << 16) -#define HSIC_CTRL_REFCLKDIV_15 (0x1c << 16) -#define HSIC_CTRL_REFCLKDIV_16 (0x1a << 16) -#define HSIC_CTRL_REFCLKDIV_19_2 (0x15 << 16) -#define HSIC_CTRL_REFCLKDIV_20 (0x14 << 16) -#define HSIC_CTRL_SIDDQ (0x1 << 6) -#define HSIC_CTRL_FORCESLEEP (0x1 << 5) -#define HSIC_CTRL_FORCESUSPEND (0x1 << 4) -#define HSIC_CTRL_WORDINTERFACE (0x1 << 3) -#define HSIC_CTRL_UTMISWRST (0x1 << 2) -#define HSIC_CTRL_PHYSWRST (0x1 << 0) - -#define EXYNOS5_PHY_HOST_EHCICTRL (0x30) -#define HOST_EHCICTRL_ENAINCRXALIGN (0x1 << 29) -#define HOST_EHCICTRL_ENAINCR4 (0x1 << 28) -#define HOST_EHCICTRL_ENAINCR8 (0x1 << 27) -#define HOST_EHCICTRL_ENAINCR16 (0x1 << 26) - -#define EXYNOS5_PHY_HOST_OHCICTRL (0x34) -#define HOST_OHCICTRL_SUSPLGCY (0x1 << 3) -#define HOST_OHCICTRL_APPSTARTCLK (0x1 << 2) -#define HOST_OHCICTRL_CNTSEL (0x1 << 1) -#define HOST_OHCICTRL_CLKCKTRST (0x1 << 0) - -#define EXYNOS5_PHY_OTG_SYS (0x38) -#define OTG_SYS_PHYLINK_SWRESET (0x1 << 14) -#define OTG_SYS_LINKSWRST_UOTG (0x1 << 13) -#define OTG_SYS_PHY0_SWRST (0x1 << 12) -#define OTG_SYS_REFCLKSEL_MASK (0x3 << 9) -#define OTG_SYS_REFCLKSEL_XTAL (0x0 << 9) -#define OTG_SYS_REFCLKSEL_EXTL (0x1 << 9) -#define OTG_SYS_REFCLKSEL_CLKCORE (0x2 << 9) -#define OTG_SYS_IDPULLUP_UOTG (0x1 << 8) -#define OTG_SYS_COMMON_ON (0x1 << 7) -#define OTG_SYS_FSEL_MASK (0x7 << 4) -#define OTG_SYS_FSEL(_x) ((_x) << 4) -#define OTG_SYS_FORCESLEEP (0x1 << 3) -#define OTG_SYS_OTGDISABLE (0x1 << 2) -#define OTG_SYS_SIDDQ_UOTG (0x1 << 1) -#define OTG_SYS_FORCESUSPEND (0x1 << 0) - -#define EXYNOS5_PHY_OTG_TUNE (0x40) - -/* EXYNOS5: USB 3.0 DRD */ -#define EXYNOS5_DRD_LINKSYSTEM (0x04) -#define LINKSYSTEM_FLADJ_MASK (0x3f << 1) -#define LINKSYSTEM_FLADJ(_x) ((_x) << 1) -#define LINKSYSTEM_XHCI_VERSION_CONTROL (0x1 << 27) - -#define EXYNOS5_DRD_PHYUTMI (0x08) -#define PHYUTMI_OTGDISABLE (0x1 << 6) -#define PHYUTMI_FORCESUSPEND (0x1 << 1) -#define PHYUTMI_FORCESLEEP (0x1 << 0) - -#define EXYNOS5_DRD_PHYPIPE (0x0c) - -#define EXYNOS5_DRD_PHYCLKRST (0x10) -#define PHYCLKRST_SSC_REFCLKSEL_MASK (0xff << 23) -#define PHYCLKRST_SSC_REFCLKSEL(_x) ((_x) << 23) -#define PHYCLKRST_SSC_RANGE_MASK (0x03 << 21) -#define PHYCLKRST_SSC_RANGE(_x) ((_x) << 21) -#define PHYCLKRST_SSC_EN (0x1 << 20) -#define PHYCLKRST_REF_SSP_EN (0x1 << 19) -#define PHYCLKRST_REF_CLKDIV2 (0x1 << 18) -#define PHYCLKRST_MPLL_MULTIPLIER_MASK (0x7f << 11) -#define PHYCLKRST_MPLL_MULTIPLIER_100MHZ_REF (0x19 << 11) -#define PHYCLKRST_MPLL_MULTIPLIER_50M_REF (0x02 << 11) -#define PHYCLKRST_MPLL_MULTIPLIER_24MHZ_REF (0x68 << 11) -#define PHYCLKRST_MPLL_MULTIPLIER_20MHZ_REF (0x7d << 11) -#define PHYCLKRST_MPLL_MULTIPLIER_19200KHZ_REF (0x02 << 11) -#define PHYCLKRST_FSEL_MASK (0x3f << 5) -#define PHYCLKRST_FSEL(_x) ((_x) << 5) -#define PHYCLKRST_FSEL_PAD_100MHZ (0x27 << 5) -#define PHYCLKRST_FSEL_PAD_24MHZ (0x2a << 5) -#define PHYCLKRST_FSEL_PAD_20MHZ (0x31 << 5) -#define PHYCLKRST_FSEL_PAD_19_2MHZ (0x38 << 5) -#define PHYCLKRST_RETENABLEN (0x1 << 4) -#define PHYCLKRST_REFCLKSEL_MASK (0x03 << 2) -#define PHYCLKRST_REFCLKSEL_PAD_REFCLK (0x2 << 2) -#define PHYCLKRST_REFCLKSEL_EXT_REFCLK (0x3 << 2) -#define PHYCLKRST_PORTRESET (0x1 << 1) -#define PHYCLKRST_COMMONONN (0x1 << 0) - -#define EXYNOS5_DRD_PHYREG0 (0x14) - -#define EXYNOS5_DRD_PHYREG1 (0x18) - -#define EXYNOS5_DRD_PHYPARAM0 (0x1c) -#define PHYPARAM0_REF_USE_PAD (0x1 << 31) -#define PHYPARAM0_REF_LOSLEVEL_MASK (0x1f << 26) -#define PHYPARAM0_REF_LOSLEVEL (0x9 << 26) - -#define EXYNOS5_DRD_PHYPARAM1 (0x20) -#define PHYPARAM1_PCS_TXDEEMPH_MASK (0x1f << 0) -#define PHYPARAM1_PCS_TXDEEMPH (0x1c) - -#define EXYNOS5_DRD_PHYTERM (0x24) - -#define EXYNOS5_DRD_PHYTEST (0x28) -#define PHYTEST_POWERDOWN_SSP (0x1 << 3) -#define PHYTEST_POWERDOWN_HSP (0x1 << 2) - -#define EXYNOS5_DRD_PHYADP (0x2c) - -#define EXYNOS5_DRD_PHYBATCHG (0x30) -#define PHYBATCHG_UTMI_CLKSEL (0x1 << 2) - -#define EXYNOS5_DRD_PHYRESUME (0x34) - -#define EXYNOS5_DRD_LINKPORT (0x44) - -#ifndef MHZ -#define MHZ (1000*1000) -#endif - -#ifndef KHZ -#define KHZ (1000) -#endif - -#define EXYNOS_USBHOST_PHY_CTRL_OFFSET (0x4) -#define S3C64XX_USBPHY_ENABLE (0x1 << 16) -#define EXYNOS_USBPHY_ENABLE (0x1 << 0) -#define EXYNOS_USB20PHY_CFG_HOST_LINK (0x1 << 0) - -enum samsung_cpu_type { - TYPE_S3C64XX, - TYPE_EXYNOS4210, - TYPE_EXYNOS4X12, - TYPE_EXYNOS5250, -}; - -struct samsung_usbphy; - -/* - * struct samsung_usbphy_drvdata - driver data for various SoC variants - * @cpu_type: machine identifier - * @devphy_en_mask: device phy enable mask for PHY CONTROL register - * @hostphy_en_mask: host phy enable mask for PHY CONTROL register - * @devphy_reg_offset: offset to DEVICE PHY CONTROL register from - * mapped address of system controller. - * @hostphy_reg_offset: offset to HOST PHY CONTROL register from - * mapped address of system controller. - * - * Here we have a separate mask for device type phy. - * Having different masks for host and device type phy helps - * in setting independent masks in case of SoCs like S5PV210, - * in which PHY0 and PHY1 enable bits belong to same register - * placed at position 0 and 1 respectively. - * Although for newer SoCs like exynos these bits belong to - * different registers altogether placed at position 0. - */ -struct samsung_usbphy_drvdata { - int cpu_type; - int devphy_en_mask; - int hostphy_en_mask; - u32 devphy_reg_offset; - u32 hostphy_reg_offset; - int (*rate_to_clksel)(struct samsung_usbphy *, unsigned long); - void (*set_isolation)(struct samsung_usbphy *, bool); - void (*phy_enable)(struct samsung_usbphy *); - void (*phy_disable)(struct samsung_usbphy *); -}; - -/* - * struct samsung_usbphy - transceiver driver state - * @phy: transceiver structure - * @plat: platform data - * @dev: The parent device supplied to the probe function - * @clk: usb phy clock - * @regs: usb phy controller registers memory base - * @pmuregs: USB device PHY_CONTROL register memory base - * @sysreg: USB2.0 PHY_CFG register memory base - * @ref_clk_freq: reference clock frequency selection - * @drv_data: driver data available for different SoCs - * @phy_type: Samsung SoCs specific phy types: #HOST - * #DEVICE - * @phy_usage: usage count for phy - * @lock: lock for phy operations - */ -struct samsung_usbphy { - struct usb_phy phy; - struct samsung_usbphy_data *plat; - struct device *dev; - struct clk *clk; - void __iomem *regs; - void __iomem *pmuregs; - void __iomem *sysreg; - int ref_clk_freq; - const struct samsung_usbphy_drvdata *drv_data; - enum samsung_usb_phy_type phy_type; - atomic_t phy_usage; - spinlock_t lock; -}; - -#define phy_to_sphy(x) container_of((x), struct samsung_usbphy, phy) - -static const struct of_device_id samsung_usbphy_dt_match[]; - -static inline const struct samsung_usbphy_drvdata -*samsung_usbphy_get_driver_data(struct platform_device *pdev) -{ - if (pdev->dev.of_node) { - const struct of_device_id *match; - match = of_match_node(samsung_usbphy_dt_match, - pdev->dev.of_node); - return match->data; - } - - return (struct samsung_usbphy_drvdata *) - platform_get_device_id(pdev)->driver_data; -} - -extern int samsung_usbphy_parse_dt(struct samsung_usbphy *sphy); -extern void samsung_usbphy_set_isolation_4210(struct samsung_usbphy *sphy, - bool on); -extern void samsung_usbphy_cfg_sel(struct samsung_usbphy *sphy); -extern int samsung_usbphy_set_type(struct usb_phy *phy, - enum samsung_usb_phy_type phy_type); -extern int samsung_usbphy_get_refclk_freq(struct samsung_usbphy *sphy); -extern int samsung_usbphy_rate_to_clksel_64xx(struct samsung_usbphy *sphy, - unsigned long rate); -extern int samsung_usbphy_rate_to_clksel_4x12(struct samsung_usbphy *sphy, - unsigned long rate); diff --git a/include/linux/platform_data/samsung-usbphy.h b/include/linux/platform_data/samsung-usbphy.h deleted file mode 100644 index 1bd24cba982b..000000000000 --- a/include/linux/platform_data/samsung-usbphy.h +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright (C) 2012 Samsung Electronics Co.Ltd - * http://www.samsung.com/ - * Author: Praveen Paneri - * - * Defines platform data for samsung usb phy driver. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ - -#ifndef __SAMSUNG_USBPHY_PLATFORM_H -#define __SAMSUNG_USBPHY_PLATFORM_H - -/** - * samsung_usbphy_data - Platform data for USB PHY driver. - * @pmu_isolation: Function to control usb phy isolation in PMU. - */ -struct samsung_usbphy_data { - void (*pmu_isolation)(int on); -}; - -extern void samsung_usbphy_set_pdata(struct samsung_usbphy_data *pd); - -#endif /* __SAMSUNG_USBPHY_PLATFORM_H */ -- cgit v1.2.3 From 9067359faf890b3a18ab38c792d458fba77b32b4 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Tue, 2 Sep 2014 02:03:12 -0700 Subject: Revert "leds: convert blink timer to workqueue" This reverts commit 8b37e1bef5a6b60e949e28a4db3006e4b00bd758. It's broken as it changes led_blink_set() in a way that it can now sleep (while synchronously waiting for workqueue to be cancelled). That's a problem, because it's possible that this function gets called from atomic context (tpt_trig_timer() takes a readlock and thus disables preemption). This has been brought up 3 weeks ago already [1] but no proper fix has materialized, and I keep seeing the problem since 3.17-rc1. [1] https://lkml.org/lkml/2014/8/16/128 BUG: sleeping function called from invalid context at kernel/workqueue.c:2650 in_atomic(): 1, irqs_disabled(): 0, pid: 2335, name: wpa_supplicant 5 locks held by wpa_supplicant/2335: #0: (rtnl_mutex){+.+.+.}, at: [] rtnl_lock+0x12/0x20 #1: (&wdev->mtx){+.+.+.}, at: [] cfg80211_mgd_wext_siwessid+0x5c/0x180 [cfg80211] #2: (&local->mtx){+.+.+.}, at: [] ieee80211_prep_connection+0x17a/0x9a0 [mac80211] #3: (&local->chanctx_mtx){+.+.+.}, at: [] ieee80211_vif_use_channel+0x5d/0x2a0 [mac80211] #4: (&trig->leddev_list_lock){.+.+..}, at: [] tpt_trig_timer+0xec/0x170 [mac80211] CPU: 0 PID: 2335 Comm: wpa_supplicant Not tainted 3.17.0-rc3 #1 Hardware name: LENOVO 7470BN2/7470BN2, BIOS 6DET38WW (2.02 ) 12/19/2008 ffff8800360b5a50 ffff8800751f76d8 ffffffff8159e97f ffff8800360b5a30 ffff8800751f76e8 ffffffff810739a5 ffff8800751f77b0 ffffffff8106862f ffffffff810685d0 0aa2209200000000 ffff880000000004 ffff8800361c59d0 Call Trace: [] dump_stack+0x4d/0x66 [] __might_sleep+0xe5/0x120 [] flush_work+0x5f/0x270 [] ? mod_delayed_work_on+0x80/0x80 [] ? mark_held_locks+0x6a/0x90 [] ? __cancel_work_timer+0x6f/0x100 [] ? trace_hardirqs_on_caller+0xfd/0x1c0 [] __cancel_work_timer+0x7b/0x100 [] cancel_delayed_work_sync+0xe/0x10 [] led_blink_set+0x1b/0x40 [] tpt_trig_timer+0x110/0x170 [mac80211] [] ieee80211_mod_tpt_led_trig+0x9d/0x160 [mac80211] [] __ieee80211_recalc_idle+0x98/0x140 [mac80211] [] ieee80211_idle_off+0xe/0x10 [mac80211] [] ieee80211_add_chanctx+0x3b/0x220 [mac80211] [] ieee80211_new_chanctx+0x44/0xf0 [mac80211] [] ieee80211_vif_use_channel+0x1fa/0x2a0 [mac80211] [] ieee80211_prep_connection+0x188/0x9a0 [mac80211] [] ieee80211_mgd_auth+0x256/0x2e0 [mac80211] [] ieee80211_auth+0x13/0x20 [mac80211] [] cfg80211_mlme_auth+0x106/0x270 [cfg80211] [] cfg80211_conn_do_work+0x155/0x3b0 [cfg80211] [] cfg80211_connect+0x3f0/0x540 [cfg80211] [] cfg80211_mgd_wext_connect+0x158/0x1f0 [cfg80211] [] cfg80211_mgd_wext_siwessid+0xde/0x180 [cfg80211] [] ? cfg80211_wext_giwessid+0x50/0x50 [cfg80211] [] cfg80211_wext_siwessid+0x1d/0x40 [cfg80211] [] ioctl_standard_iw_point+0x14c/0x3e0 [] ? trace_hardirqs_on_caller+0xfd/0x1c0 [] ioctl_standard_call+0x8a/0xd0 [] ? ioctl_standard_iw_point+0x3e0/0x3e0 [] wireless_process_ioctl.constprop.10+0xb6/0x100 [] wext_handle_ioctl+0x5d/0xb0 [] dev_ioctl+0x329/0x620 [] ? trace_hardirqs_on_caller+0xfd/0x1c0 [] sock_ioctl+0x142/0x2e0 [] do_vfs_ioctl+0x300/0x520 [] ? sysret_check+0x1b/0x56 [] ? trace_hardirqs_on_caller+0xfd/0x1c0 [] SyS_ioctl+0x81/0xa0 [] system_call_fastpath+0x1a/0x1f wlan0: send auth to 00:0b:6b:3c:8c:e4 (try 1/3) wlan0: authenticated wlan0: associate with 00:0b:6b:3c:8c:e4 (try 1/3) wlan0: RX AssocResp from 00:0b:6b:3c:8c:e4 (capab=0x431 status=0 aid=2) wlan0: associated IPv6: ADDRCONF(NETDEV_CHANGE): wlan0: link becomes ready cfg80211: Calling CRDA for country: NA wlan0: Limiting TX power to 27 (27 - 0) dBm as advertised by 00:0b:6b:3c:8c:e4 ================================= [ INFO: inconsistent lock state ] 3.17.0-rc3 #1 Not tainted --------------------------------- inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage. swapper/0/0 [HC0[0]:SC1[1]:HE1:SE0] takes: ((&(&led_cdev->blink_work)->work)){+.?...}, at: [] flush_work+0x0/0x270 {SOFTIRQ-ON-W} state was registered at: [] __lock_acquire+0x30e/0x1a30 [] lock_acquire+0x91/0x110 [] flush_work+0x38/0x270 [] __cancel_work_timer+0x7b/0x100 [] cancel_delayed_work_sync+0xe/0x10 [] led_blink_set+0x1b/0x40 [] tpt_trig_timer+0x110/0x170 [mac80211] [] ieee80211_mod_tpt_led_trig+0x9d/0x160 [mac80211] [] __ieee80211_recalc_idle+0x98/0x140 [mac80211] [] ieee80211_idle_off+0xe/0x10 [mac80211] [] ieee80211_add_chanctx+0x3b/0x220 [mac80211] [] ieee80211_new_chanctx+0x44/0xf0 [mac80211] [] ieee80211_vif_use_channel+0x1fa/0x2a0 [mac80211] [] ieee80211_prep_connection+0x188/0x9a0 [mac80211] [] ieee80211_mgd_auth+0x256/0x2e0 [mac80211] [] ieee80211_auth+0x13/0x20 [mac80211] [] cfg80211_mlme_auth+0x106/0x270 [cfg80211] [] cfg80211_conn_do_work+0x155/0x3b0 [cfg80211] [] cfg80211_connect+0x3f0/0x540 [cfg80211] [] cfg80211_mgd_wext_connect+0x158/0x1f0 [cfg80211] [] cfg80211_mgd_wext_siwessid+0xde/0x180 [cfg80211] [] cfg80211_wext_siwessid+0x1d/0x40 [cfg80211] [] ioctl_standard_iw_point+0x14c/0x3e0 [] ioctl_standard_call+0x8a/0xd0 [] wireless_process_ioctl.constprop.10+0xb6/0x100 [] wext_handle_ioctl+0x5d/0xb0 [] dev_ioctl+0x329/0x620 [] sock_ioctl+0x142/0x2e0 [] do_vfs_ioctl+0x300/0x520 [] SyS_ioctl+0x81/0xa0 [] system_call_fastpath+0x1a/0x1f irq event stamp: 493416 hardirqs last enabled at (493416): [] __cancel_work_timer+0x6f/0x100 hardirqs last disabled at (493415): [] try_to_grab_pending+0x1f/0x160 softirqs last enabled at (493408): [] _local_bh_enable+0x1d/0x50 softirqs last disabled at (493409): [] irq_exit+0xa5/0xb0 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock((&(&led_cdev->blink_work)->work)); lock((&(&led_cdev->blink_work)->work)); *** DEADLOCK *** 2 locks held by swapper/0/0: #0: (((&tpt_trig->timer))){+.-...}, at: [] call_timer_fn+0x0/0x180 #1: (&trig->leddev_list_lock){.+.?..}, at: [] tpt_trig_timer+0xec/0x170 [mac80211] stack backtrace: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 3.17.0-rc3 #1 Hardware name: LENOVO 7470BN2/7470BN2, BIOS 6DET38WW (2.02 ) 12/19/2008 ffffffff8246eb30 ffff88007c203b00 ffffffff8159e97f ffffffff81a194c0 ffff88007c203b50 ffffffff81599c29 0000000000000001 ffffffff00000001 ffff880000000000 0000000000000006 ffffffff81a194c0 ffffffff81093ad0 Call Trace: [] dump_stack+0x4d/0x66 [] print_usage_bug+0x1f4/0x205 [] ? check_usage_backwards+0x140/0x140 [] mark_lock+0x223/0x2b0 [] __lock_acquire+0x2b0/0x1a30 [] lock_acquire+0x91/0x110 [] ? mod_delayed_work_on+0x80/0x80 [] ? __ieee80211_get_rx_led_name+0x10/0x10 [mac80211] [] flush_work+0x38/0x270 [] ? mod_delayed_work_on+0x80/0x80 [] ? mark_held_locks+0x6a/0x90 [] ? __cancel_work_timer+0x6f/0x100 [] ? __ieee80211_get_rx_led_name+0x10/0x10 [mac80211] [] ? trace_hardirqs_on_caller+0xad/0x1c0 [] ? __ieee80211_get_rx_led_name+0x10/0x10 [mac80211] [] __cancel_work_timer+0x7b/0x100 [] cancel_delayed_work_sync+0xe/0x10 [] led_blink_set+0x1b/0x40 [] tpt_trig_timer+0x110/0x170 [mac80211] [] call_timer_fn+0x75/0x180 [] ? process_timeout+0x10/0x10 [] ? __ieee80211_get_rx_led_name+0x10/0x10 [mac80211] [] run_timer_softirq+0x1fc/0x2f0 [] __do_softirq+0x115/0x2e0 [] irq_exit+0xa5/0xb0 [] do_IRQ+0x53/0xf0 [] common_interrupt+0x6f/0x6f [] ? cpuidle_enter_state+0x6e/0x180 [] cpuidle_enter+0x12/0x20 [] cpu_startup_entry+0x330/0x360 [] rest_init+0xc1/0xd0 [] ? csum_partial_copy_generic+0x170/0x170 [] start_kernel+0x44f/0x45a [] ? set_init_arg+0x53/0x53 [] x86_64_start_reservations+0x2a/0x2c [] x86_64_start_kernel+0xf1/0xf4 Cc: Vincent Donnefort Cc: Hugh Dickins Cc: Tejun Heo Signed-off-by: Jiri Kosina Signed-off-by: Bryan Wu --- drivers/leds/led-class.c | 14 +++++++------- drivers/leds/led-core.c | 11 +++++------ include/linux/leds.h | 3 ++- 3 files changed, 14 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c index 129729d35478..aa29198fca3e 100644 --- a/drivers/leds/led-class.c +++ b/drivers/leds/led-class.c @@ -15,10 +15,10 @@ #include #include #include +#include #include #include #include -#include #include "leds.h" static struct class *leds_class; @@ -97,10 +97,9 @@ static const struct attribute_group *led_groups[] = { NULL, }; -static void led_work_function(struct work_struct *ws) +static void led_timer_function(unsigned long data) { - struct led_classdev *led_cdev = - container_of(ws, struct led_classdev, blink_work.work); + struct led_classdev *led_cdev = (void *)data; unsigned long brightness; unsigned long delay; @@ -144,8 +143,7 @@ static void led_work_function(struct work_struct *ws) } } - queue_delayed_work(system_wq, &led_cdev->blink_work, - msecs_to_jiffies(delay)); + mod_timer(&led_cdev->blink_timer, jiffies + msecs_to_jiffies(delay)); } static void set_brightness_delayed(struct work_struct *ws) @@ -233,7 +231,9 @@ int led_classdev_register(struct device *parent, struct led_classdev *led_cdev) INIT_WORK(&led_cdev->set_brightness_work, set_brightness_delayed); - INIT_DELAYED_WORK(&led_cdev->blink_work, led_work_function); + init_timer(&led_cdev->blink_timer); + led_cdev->blink_timer.function = led_timer_function; + led_cdev->blink_timer.data = (unsigned long)led_cdev; #ifdef CONFIG_LEDS_TRIGGERS led_trigger_set_default(led_cdev); diff --git a/drivers/leds/led-core.c b/drivers/leds/led-core.c index 4bb116867b88..71b40d3bf776 100644 --- a/drivers/leds/led-core.c +++ b/drivers/leds/led-core.c @@ -16,7 +16,6 @@ #include #include #include -#include #include "leds.h" DECLARE_RWSEM(leds_list_lock); @@ -52,7 +51,7 @@ static void led_set_software_blink(struct led_classdev *led_cdev, return; } - queue_delayed_work(system_wq, &led_cdev->blink_work, 1); + mod_timer(&led_cdev->blink_timer, jiffies + 1); } @@ -76,7 +75,7 @@ void led_blink_set(struct led_classdev *led_cdev, unsigned long *delay_on, unsigned long *delay_off) { - cancel_delayed_work_sync(&led_cdev->blink_work); + del_timer_sync(&led_cdev->blink_timer); led_cdev->flags &= ~LED_BLINK_ONESHOT; led_cdev->flags &= ~LED_BLINK_ONESHOT_STOP; @@ -91,7 +90,7 @@ void led_blink_set_oneshot(struct led_classdev *led_cdev, int invert) { if ((led_cdev->flags & LED_BLINK_ONESHOT) && - delayed_work_pending(&led_cdev->blink_work)) + timer_pending(&led_cdev->blink_timer)) return; led_cdev->flags |= LED_BLINK_ONESHOT; @@ -108,7 +107,7 @@ EXPORT_SYMBOL(led_blink_set_oneshot); void led_stop_software_blink(struct led_classdev *led_cdev) { - cancel_delayed_work_sync(&led_cdev->blink_work); + del_timer_sync(&led_cdev->blink_timer); led_cdev->blink_delay_on = 0; led_cdev->blink_delay_off = 0; } @@ -117,7 +116,7 @@ EXPORT_SYMBOL_GPL(led_stop_software_blink); void led_set_brightness(struct led_classdev *led_cdev, enum led_brightness brightness) { - /* delay brightness setting if need to stop soft-blink work */ + /* delay brightness setting if need to stop soft-blink timer */ if (led_cdev->blink_delay_on || led_cdev->blink_delay_off) { led_cdev->delayed_set_value = brightness; schedule_work(&led_cdev->set_brightness_work); diff --git a/include/linux/leds.h b/include/linux/leds.h index 6a599dce7f9d..e43686472197 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -15,6 +15,7 @@ #include #include #include +#include #include struct device; @@ -68,7 +69,7 @@ struct led_classdev { const char *default_trigger; /* Trigger to use */ unsigned long blink_delay_on, blink_delay_off; - struct delayed_work blink_work; + struct timer_list blink_timer; int blink_brightness; struct work_struct set_brightness_work; -- cgit v1.2.3 From 5835d96e9ce4efdba8c6cefffc2f1575925456de Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 2 Sep 2014 14:46:04 -0400 Subject: percpu: implement [__]alloc_percpu_gfp() Now that pcpu_alloc_area() can allocate only from populated areas, it's easy to add atomic allocation support to [__]alloc_percpu(). Update pcpu_alloc() so that it accepts @gfp and skips all the blocking operations and allocates only from the populated areas if @gfp doesn't contain GFP_KERNEL. New interface functions [__]alloc_percpu_gfp() are added. While this means that atomic allocations are possible, this isn't complete yet as there's no mechanism to ensure that certain amount of populated areas is kept available and atomic allocations may keep failing under certain conditions. Signed-off-by: Tejun Heo --- include/linux/percpu.h | 9 +++++-- mm/percpu.c | 64 ++++++++++++++++++++++++++++++-------------------- 2 files changed, 46 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 6f61b61b7996..d1b416da25ed 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -122,11 +122,16 @@ extern void __init setup_per_cpu_areas(void); #endif extern void __init percpu_init_late(void); +extern void __percpu *__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp); extern void __percpu *__alloc_percpu(size_t size, size_t align); extern void free_percpu(void __percpu *__pdata); extern phys_addr_t per_cpu_ptr_to_phys(void *addr); -#define alloc_percpu(type) \ - (typeof(type) __percpu *)__alloc_percpu(sizeof(type), __alignof__(type)) +#define alloc_percpu_gfp(type, gfp) \ + (typeof(type) __percpu *)__alloc_percpu_gfp(sizeof(type), \ + __alignof__(type), gfp) +#define alloc_percpu(type) \ + (typeof(type) __percpu *)__alloc_percpu(sizeof(type), \ + __alignof__(type)) #endif /* __LINUX_PERCPU_H */ diff --git a/mm/percpu.c b/mm/percpu.c index 577d84fb3002..c52b93117dc2 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -151,11 +151,6 @@ static struct pcpu_chunk *pcpu_first_chunk; static struct pcpu_chunk *pcpu_reserved_chunk; static int pcpu_reserved_chunk_limit; -/* - * Free path accesses and alters only the index data structures and can be - * safely called from atomic context. When memory needs to be returned to - * the system, free path schedules reclaim_work. - */ static DEFINE_SPINLOCK(pcpu_lock); /* all internal data structures */ static DEFINE_MUTEX(pcpu_alloc_mutex); /* chunk create/destroy, [de]pop */ @@ -727,20 +722,21 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr) * @size: size of area to allocate in bytes * @align: alignment of area (max PAGE_SIZE) * @reserved: allocate from the reserved chunk if available + * @gfp: allocation flags * - * Allocate percpu area of @size bytes aligned at @align. - * - * CONTEXT: - * Does GFP_KERNEL allocation. + * Allocate percpu area of @size bytes aligned at @align. If @gfp doesn't + * contain %GFP_KERNEL, the allocation is atomic. * * RETURNS: * Percpu pointer to the allocated area on success, NULL on failure. */ -static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved) +static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, + gfp_t gfp) { static int warn_limit = 10; struct pcpu_chunk *chunk; const char *err; + bool is_atomic = !(gfp & GFP_KERNEL); int slot, off, new_alloc, cpu, ret; unsigned long flags; void __percpu *ptr; @@ -773,14 +769,15 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved) while ((new_alloc = pcpu_need_to_extend(chunk))) { spin_unlock_irqrestore(&pcpu_lock, flags); - if (pcpu_extend_area_map(chunk, new_alloc) < 0) { + if (is_atomic || + pcpu_extend_area_map(chunk, new_alloc) < 0) { err = "failed to extend area map of reserved chunk"; goto fail; } spin_lock_irqsave(&pcpu_lock, flags); } - off = pcpu_alloc_area(chunk, size, align, false); + off = pcpu_alloc_area(chunk, size, align, is_atomic); if (off >= 0) goto area_found; @@ -797,6 +794,8 @@ restart: new_alloc = pcpu_need_to_extend(chunk); if (new_alloc) { + if (is_atomic) + continue; spin_unlock_irqrestore(&pcpu_lock, flags); if (pcpu_extend_area_map(chunk, new_alloc) < 0) { @@ -811,7 +810,7 @@ restart: goto restart; } - off = pcpu_alloc_area(chunk, size, align, false); + off = pcpu_alloc_area(chunk, size, align, is_atomic); if (off >= 0) goto area_found; } @@ -824,6 +823,9 @@ restart: * tasks to create chunks simultaneously. Serialize and create iff * there's still no empty chunk after grabbing the mutex. */ + if (is_atomic) + goto fail; + mutex_lock(&pcpu_alloc_mutex); if (list_empty(&pcpu_slot[pcpu_nr_slots - 1])) { @@ -846,7 +848,7 @@ area_found: spin_unlock_irqrestore(&pcpu_lock, flags); /* populate if not all pages are already there */ - if (true) { + if (!is_atomic) { int page_start, page_end, rs, re; mutex_lock(&pcpu_alloc_mutex); @@ -884,9 +886,9 @@ area_found: fail_unlock: spin_unlock_irqrestore(&pcpu_lock, flags); fail: - if (warn_limit) { - pr_warning("PERCPU: allocation failed, size=%zu align=%zu, " - "%s\n", size, align, err); + if (!is_atomic && warn_limit) { + pr_warning("PERCPU: allocation failed, size=%zu align=%zu atomic=%d, %s\n", + size, align, is_atomic, err); dump_stack(); if (!--warn_limit) pr_info("PERCPU: limit reached, disable warning\n"); @@ -895,22 +897,34 @@ fail: } /** - * __alloc_percpu - allocate dynamic percpu area + * __alloc_percpu_gfp - allocate dynamic percpu area * @size: size of area to allocate in bytes * @align: alignment of area (max PAGE_SIZE) + * @gfp: allocation flags * - * Allocate zero-filled percpu area of @size bytes aligned at @align. - * Might sleep. Might trigger writeouts. - * - * CONTEXT: - * Does GFP_KERNEL allocation. + * Allocate zero-filled percpu area of @size bytes aligned at @align. If + * @gfp doesn't contain %GFP_KERNEL, the allocation doesn't block and can + * be called from any context but is a lot more likely to fail. * * RETURNS: * Percpu pointer to the allocated area on success, NULL on failure. */ +void __percpu *__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp) +{ + return pcpu_alloc(size, align, false, gfp); +} +EXPORT_SYMBOL_GPL(__alloc_percpu_gfp); + +/** + * __alloc_percpu - allocate dynamic percpu area + * @size: size of area to allocate in bytes + * @align: alignment of area (max PAGE_SIZE) + * + * Equivalent to __alloc_percpu_gfp(size, align, %GFP_KERNEL). + */ void __percpu *__alloc_percpu(size_t size, size_t align) { - return pcpu_alloc(size, align, false); + return pcpu_alloc(size, align, false, GFP_KERNEL); } EXPORT_SYMBOL_GPL(__alloc_percpu); @@ -932,7 +946,7 @@ EXPORT_SYMBOL_GPL(__alloc_percpu); */ void __percpu *__alloc_reserved_percpu(size_t size, size_t align) { - return pcpu_alloc(size, align, true); + return pcpu_alloc(size, align, true, GFP_KERNEL); } /** -- cgit v1.2.3 From 1a4d76076cda69b0abf15463a8cebc172406da25 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 2 Sep 2014 14:46:05 -0400 Subject: percpu: implement asynchronous chunk population The percpu allocator now supports atomic allocations by only allocating from already populated areas but the mechanism to ensure that there's adequate amount of populated areas was missing. This patch expands pcpu_balance_work so that in addition to freeing excess free chunks it also populates chunks to maintain an adequate level of populated areas. pcpu_alloc() schedules pcpu_balance_work if the amount of free populated areas is too low or after an atomic allocation failure. * PERPCU_DYNAMIC_RESERVE is increased by two pages to account for PCPU_EMPTY_POP_PAGES_LOW. * pcpu_async_enabled is added to gate both async jobs - chunk->map_extend_work and pcpu_balance_work - so that we don't end up scheduling them while the needed subsystems aren't up yet. Signed-off-by: Tejun Heo --- include/linux/percpu.h | 4 +- mm/percpu.c | 117 +++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 115 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index d1b416da25ed..a3aa63e47637 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -48,9 +48,9 @@ * intelligent way to determine this would be nice. */ #if BITS_PER_LONG > 32 -#define PERCPU_DYNAMIC_RESERVE (20 << 10) +#define PERCPU_DYNAMIC_RESERVE (28 << 10) #else -#define PERCPU_DYNAMIC_RESERVE (12 << 10) +#define PERCPU_DYNAMIC_RESERVE (20 << 10) #endif extern void *pcpu_base_addr; diff --git a/mm/percpu.c b/mm/percpu.c index 28a830590b4c..867efd38d879 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -78,6 +78,8 @@ #define PCPU_DFL_MAP_ALLOC 16 /* start a map with 16 ents */ #define PCPU_ATOMIC_MAP_MARGIN_LOW 32 #define PCPU_ATOMIC_MAP_MARGIN_HIGH 64 +#define PCPU_EMPTY_POP_PAGES_LOW 2 +#define PCPU_EMPTY_POP_PAGES_HIGH 4 #ifdef CONFIG_SMP /* default addr <-> pcpu_ptr mapping, override in asm/percpu.h if necessary */ @@ -168,9 +170,22 @@ static struct list_head *pcpu_slot __read_mostly; /* chunk list slots */ */ static int pcpu_nr_empty_pop_pages; -/* balance work is used to populate or destroy chunks asynchronously */ +/* + * Balance work is used to populate or destroy chunks asynchronously. We + * try to keep the number of populated free pages between + * PCPU_EMPTY_POP_PAGES_LOW and HIGH for atomic allocations and at most one + * empty chunk. + */ static void pcpu_balance_workfn(struct work_struct *work); static DECLARE_WORK(pcpu_balance_work, pcpu_balance_workfn); +static bool pcpu_async_enabled __read_mostly; +static bool pcpu_atomic_alloc_failed; + +static void pcpu_schedule_balance_work(void) +{ + if (pcpu_async_enabled) + schedule_work(&pcpu_balance_work); +} static bool pcpu_addr_in_first_chunk(void *addr) { @@ -386,7 +401,8 @@ static int pcpu_need_to_extend(struct pcpu_chunk *chunk, bool is_atomic) margin = 3; if (chunk->map_alloc < - chunk->map_used + PCPU_ATOMIC_MAP_MARGIN_LOW) + chunk->map_used + PCPU_ATOMIC_MAP_MARGIN_LOW && + pcpu_async_enabled) schedule_work(&chunk->map_extend_work); } else { margin = PCPU_ATOMIC_MAP_MARGIN_HIGH; @@ -1005,6 +1021,9 @@ area_found: if (chunk != pcpu_reserved_chunk) pcpu_nr_empty_pop_pages -= occ_pages; + if (pcpu_nr_empty_pop_pages < PCPU_EMPTY_POP_PAGES_LOW) + pcpu_schedule_balance_work(); + /* clear the areas and return address relative to base address */ for_each_possible_cpu(cpu) memset((void *)pcpu_chunk_addr(chunk, cpu, 0) + off, 0, size); @@ -1023,6 +1042,11 @@ fail: if (!--warn_limit) pr_info("PERCPU: limit reached, disable warning\n"); } + if (is_atomic) { + /* see the flag handling in pcpu_blance_workfn() */ + pcpu_atomic_alloc_failed = true; + pcpu_schedule_balance_work(); + } return NULL; } @@ -1080,7 +1104,7 @@ void __percpu *__alloc_reserved_percpu(size_t size, size_t align) } /** - * pcpu_balance_workfn - reclaim fully free chunks, workqueue function + * pcpu_balance_workfn - manage the amount of free chunks and populated pages * @work: unused * * Reclaim all fully free chunks except for the first one. @@ -1090,7 +1114,12 @@ static void pcpu_balance_workfn(struct work_struct *work) LIST_HEAD(to_free); struct list_head *free_head = &pcpu_slot[pcpu_nr_slots - 1]; struct pcpu_chunk *chunk, *next; + int slot, nr_to_pop, ret; + /* + * There's no reason to keep around multiple unused chunks and VM + * areas can be scarce. Destroy all free chunks except for one. + */ mutex_lock(&pcpu_alloc_mutex); spin_lock_irq(&pcpu_lock); @@ -1118,6 +1147,74 @@ static void pcpu_balance_workfn(struct work_struct *work) pcpu_destroy_chunk(chunk); } + /* + * Ensure there are certain number of free populated pages for + * atomic allocs. Fill up from the most packed so that atomic + * allocs don't increase fragmentation. If atomic allocation + * failed previously, always populate the maximum amount. This + * should prevent atomic allocs larger than PAGE_SIZE from keeping + * failing indefinitely; however, large atomic allocs are not + * something we support properly and can be highly unreliable and + * inefficient. + */ +retry_pop: + if (pcpu_atomic_alloc_failed) { + nr_to_pop = PCPU_EMPTY_POP_PAGES_HIGH; + /* best effort anyway, don't worry about synchronization */ + pcpu_atomic_alloc_failed = false; + } else { + nr_to_pop = clamp(PCPU_EMPTY_POP_PAGES_HIGH - + pcpu_nr_empty_pop_pages, + 0, PCPU_EMPTY_POP_PAGES_HIGH); + } + + for (slot = pcpu_size_to_slot(PAGE_SIZE); slot < pcpu_nr_slots; slot++) { + int nr_unpop = 0, rs, re; + + if (!nr_to_pop) + break; + + spin_lock_irq(&pcpu_lock); + list_for_each_entry(chunk, &pcpu_slot[slot], list) { + nr_unpop = pcpu_unit_pages - chunk->nr_populated; + if (nr_unpop) + break; + } + spin_unlock_irq(&pcpu_lock); + + if (!nr_unpop) + continue; + + /* @chunk can't go away while pcpu_alloc_mutex is held */ + pcpu_for_each_unpop_region(chunk, rs, re, 0, pcpu_unit_pages) { + int nr = min(re - rs, nr_to_pop); + + ret = pcpu_populate_chunk(chunk, rs, rs + nr); + if (!ret) { + nr_to_pop -= nr; + spin_lock_irq(&pcpu_lock); + pcpu_chunk_populated(chunk, rs, rs + nr); + spin_unlock_irq(&pcpu_lock); + } else { + nr_to_pop = 0; + } + + if (!nr_to_pop) + break; + } + } + + if (nr_to_pop) { + /* ran out of chunks to populate, create a new one and retry */ + chunk = pcpu_create_chunk(); + if (chunk) { + spin_lock_irq(&pcpu_lock); + pcpu_chunk_relocate(chunk, -1); + spin_unlock_irq(&pcpu_lock); + goto retry_pop; + } + } + mutex_unlock(&pcpu_alloc_mutex); } @@ -1160,7 +1257,7 @@ void free_percpu(void __percpu *ptr) list_for_each_entry(pos, &pcpu_slot[pcpu_nr_slots - 1], list) if (pos != chunk) { - schedule_work(&pcpu_balance_work); + pcpu_schedule_balance_work(); break; } } @@ -2187,3 +2284,15 @@ void __init percpu_init_late(void) spin_unlock_irqrestore(&pcpu_lock, flags); } } + +/* + * Percpu allocator is initialized early during boot when neither slab or + * workqueue is available. Plug async management until everything is up + * and running. + */ +static int __init percpu_enable_async(void) +{ + pcpu_async_enabled = true; + return 0; +} +subsys_initcall(percpu_enable_async); -- cgit v1.2.3 From d07e9c178f188f76851aca8ab49a78da13dda006 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 29 Aug 2014 15:13:21 +0200 Subject: PM / domains: Make generic_pm_domain.name const Signed-off-by: Geert Uytterhoeven Signed-off-by: Rafael J. Wysocki --- include/linux/pm_domain.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 7c1d252b20c0..ebc4c76ffb73 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -60,7 +60,7 @@ struct generic_pm_domain { struct mutex lock; struct dev_power_governor *gov; struct work_struct power_off_work; - char *name; + const char *name; unsigned int in_progress; /* Number of devices being suspended now */ atomic_t sd_count; /* Number of subdomains with power "on" */ enum gpd_status status; /* Current state of the domain */ -- cgit v1.2.3 From 76ba59f8366f2d9282cb5bda9de75b4b68cbe55f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 26 Aug 2014 11:03:16 +0100 Subject: genirq: Add irq_domain-aware core IRQ handler Calling irq_find_mapping from outside a irq_{enter,exit} section is unsafe and produces ugly messages if CONFIG_PROVE_RCU is enabled: If coming from the idle state, the rcu_read_lock call in irq_find_mapping will generate an unpleasant warning: =============================== [ INFO: suspicious RCU usage. ] 3.16.0-rc1+ #135 Not tainted ------------------------------- include/linux/rcupdate.h:871 rcu_read_lock() used illegally while idle! other info that might help us debug this: RCU used illegally from idle CPU! rcu_scheduler_active = 1, debug_locks = 0 RCU used illegally from extended quiescent state! 1 lock held by swapper/0/0: #0: (rcu_read_lock){......}, at: [] irq_find_mapping+0x4c/0x198 As this issue is fairly widespread and involves at least three different architectures, a possible solution is to add a new handle_domain_irq entry point into the generic IRQ code that the interrupt controller code can call. This new function takes an irq_domain, and calls into irq_find_domain inside the irq_{enter,exit} block. An additional "lookup" parameter is used to allow non-domain architecture code to be replaced by this as well. Interrupt controllers can then be updated to use the new mechanism. This code is sitting behind a new CONFIG_HANDLE_DOMAIN_IRQ, as not all architectures implement set_irq_regs (yes, mn10300, I'm looking at you...). Reported-by: Vladimir Murzin Signed-off-by: Marc Zyngier Link: https://lkml.kernel.org/r/1409047421-27649-2-git-send-email-marc.zyngier@arm.com Signed-off-by: Jason Cooper --- include/linux/irqdesc.h | 19 +++++++++++++++++++ kernel/irq/Kconfig | 3 +++ kernel/irq/irqdesc.c | 42 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 64 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 472c021a2d4f..ff24667cd86c 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -12,6 +12,8 @@ struct irq_affinity_notify; struct proc_dir_entry; struct module; struct irq_desc; +struct irq_domain; +struct pt_regs; /** * struct irq_desc - interrupt descriptor @@ -118,6 +120,23 @@ static inline void generic_handle_irq_desc(unsigned int irq, struct irq_desc *de int generic_handle_irq(unsigned int irq); +#ifdef CONFIG_HANDLE_DOMAIN_IRQ +/* + * Convert a HW interrupt number to a logical one using a IRQ domain, + * and handle the result interrupt number. Return -EINVAL if + * conversion failed. Providing a NULL domain indicates that the + * conversion has already been done. + */ +int __handle_domain_irq(struct irq_domain *domain, unsigned int hwirq, + bool lookup, struct pt_regs *regs); + +static inline int handle_domain_irq(struct irq_domain *domain, + unsigned int hwirq, struct pt_regs *regs) +{ + return __handle_domain_irq(domain, hwirq, true, regs); +} +#endif + /* Test to see if a driver has successfully requested an irq */ static inline int irq_has_action(unsigned int irq) { diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig index d269cecdfbf0..225086b2652e 100644 --- a/kernel/irq/Kconfig +++ b/kernel/irq/Kconfig @@ -55,6 +55,9 @@ config GENERIC_IRQ_CHIP config IRQ_DOMAIN bool +config HANDLE_DOMAIN_IRQ + bool + config IRQ_DOMAIN_DEBUG bool "Expose hardware/virtual IRQ mapping via debugfs" depends on IRQ_DOMAIN && DEBUG_FS diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 1487a123db5c..a1782f88f0af 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "internals.h" @@ -336,6 +337,47 @@ int generic_handle_irq(unsigned int irq) } EXPORT_SYMBOL_GPL(generic_handle_irq); +#ifdef CONFIG_HANDLE_DOMAIN_IRQ +/** + * __handle_domain_irq - Invoke the handler for a HW irq belonging to a domain + * @domain: The domain where to perform the lookup + * @hwirq: The HW irq number to convert to a logical one + * @lookup: Whether to perform the domain lookup or not + * @regs: Register file coming from the low-level handling code + * + * Returns: 0 on success, or -EINVAL if conversion has failed + */ +int __handle_domain_irq(struct irq_domain *domain, unsigned int hwirq, + bool lookup, struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + unsigned int irq = hwirq; + int ret = 0; + + irq_enter(); + +#ifdef CONFIG_IRQ_DOMAIN + if (lookup) + irq = irq_find_mapping(domain, hwirq); +#endif + + /* + * Some hardware gives randomly wrong interrupts. Rather + * than crashing, do something sensible. + */ + if (unlikely(!irq || irq >= nr_irqs)) { + ack_bad_irq(irq); + ret = -EINVAL; + } else { + generic_handle_irq(irq); + } + + irq_exit(); + set_irq_regs(old_regs); + return ret; +} +#endif + /* Dynamic interrupt handling */ /** -- cgit v1.2.3 From a4412fc9486ec85686c6c7929e7e829f62ae377e Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 21 Jul 2014 18:49:14 -0700 Subject: seccomp,x86,arm,mips,s390: Remove nr parameter from secure_computing The secure_computing function took a syscall number parameter, but it only paid any attention to that parameter if seccomp mode 1 was enabled. Rather than coming up with a kludge to get the parameter to work in mode 2, just remove the parameter. To avoid churn in arches that don't have seccomp filters (and may not even support syscall_get_nr right now), this leaves the parameter in secure_computing_strict, which is now a real function. For ARM, this is a bit ugly due to the fact that ARM conditionally supports seccomp filters. Fixing that would probably only be a couple of lines of code, but it should be coordinated with the audit maintainers. This will be a slight slowdown on some arches. The right fix is to pass in all of seccomp_data instead of trying to make just the syscall nr part be fast. This is a prerequisite for making two-phase seccomp work cleanly. Cc: Russell King Cc: linux-arm-kernel@lists.infradead.org Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: linux-s390@vger.kernel.org Cc: x86@kernel.org Cc: Kees Cook Signed-off-by: Andy Lutomirski Signed-off-by: Kees Cook --- arch/arm/kernel/ptrace.c | 7 ++++- arch/mips/kernel/ptrace.c | 2 +- arch/s390/kernel/ptrace.c | 2 +- arch/x86/kernel/ptrace.c | 2 +- arch/x86/kernel/vsyscall_64.c | 2 +- include/linux/seccomp.h | 21 +++++++------- kernel/seccomp.c | 64 ++++++++++++++++++++++++++++++------------- 7 files changed, 66 insertions(+), 34 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c index 0c27ed6f3f23..5e772a21ab97 100644 --- a/arch/arm/kernel/ptrace.c +++ b/arch/arm/kernel/ptrace.c @@ -933,8 +933,13 @@ asmlinkage int syscall_trace_enter(struct pt_regs *regs, int scno) current_thread_info()->syscall = scno; /* Do the secure computing check first; failures should be fast. */ - if (secure_computing(scno) == -1) +#ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER + if (secure_computing() == -1) return -1; +#else + /* XXX: remove this once OABI gets fixed */ + secure_computing_strict(scno); +#endif if (test_thread_flag(TIF_SYSCALL_TRACE)) tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER); diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index 645b3c4fcfba..f7aac5b57b4b 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -770,7 +770,7 @@ asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall) long ret = 0; user_exit(); - if (secure_computing(syscall) == -1) + if (secure_computing() == -1) return -1; if (test_thread_flag(TIF_SYSCALL_TRACE) && diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 5dc7ad9e2fbf..bebacad48305 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -803,7 +803,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) long ret = 0; /* Do the secure computing check first. */ - if (secure_computing(regs->gprs[2])) { + if (secure_computing()) { /* seccomp failures shouldn't expose any additional code. */ ret = -1; goto out; diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 678c0ada3b3c..93c182a00506 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -1471,7 +1471,7 @@ long syscall_trace_enter(struct pt_regs *regs) regs->flags |= X86_EFLAGS_TF; /* do the secure computing check first */ - if (secure_computing(regs->orig_ax)) { + if (secure_computing()) { /* seccomp failures shouldn't expose any additional code. */ ret = -1L; goto out; diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index e1e1e80fc6a6..957779f4eb40 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -216,7 +216,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) */ regs->orig_ax = syscall_nr; regs->ax = -ENOSYS; - tmp = secure_computing(syscall_nr); + tmp = secure_computing(); if ((!tmp && regs->orig_ax != syscall_nr) || regs->ip != address) { warn_bad_vsyscall(KERN_DEBUG, regs, "seccomp tried to change syscall nr or ip"); diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index 5d586a45a319..aa3c040230be 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h @@ -27,19 +27,17 @@ struct seccomp { struct seccomp_filter *filter; }; -extern int __secure_computing(int); -static inline int secure_computing(int this_syscall) +#ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER +extern int __secure_computing(void); +static inline int secure_computing(void) { if (unlikely(test_thread_flag(TIF_SECCOMP))) - return __secure_computing(this_syscall); + return __secure_computing(); return 0; } - -/* A wrapper for architectures supporting only SECCOMP_MODE_STRICT. */ -static inline void secure_computing_strict(int this_syscall) -{ - BUG_ON(secure_computing(this_syscall) != 0); -} +#else +extern void secure_computing_strict(int this_syscall); +#endif extern long prctl_get_seccomp(void); extern long prctl_set_seccomp(unsigned long, char __user *); @@ -56,8 +54,11 @@ static inline int seccomp_mode(struct seccomp *s) struct seccomp { }; struct seccomp_filter { }; -static inline int secure_computing(int this_syscall) { return 0; } +#ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER +static inline int secure_computing(void) { return 0; } +#else static inline void secure_computing_strict(int this_syscall) { return; } +#endif static inline long prctl_get_seccomp(void) { diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 44eb005c6695..5e738e0dd2e9 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -23,8 +23,11 @@ /* #define SECCOMP_DEBUG 1 */ -#ifdef CONFIG_SECCOMP_FILTER +#ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER #include +#endif + +#ifdef CONFIG_SECCOMP_FILTER #include #include #include @@ -172,7 +175,7 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen) * * Returns valid seccomp BPF response codes. */ -static u32 seccomp_run_filters(int syscall) +static u32 seccomp_run_filters(void) { struct seccomp_filter *f = ACCESS_ONCE(current->seccomp.filter); struct seccomp_data sd; @@ -564,10 +567,43 @@ static int mode1_syscalls_32[] = { }; #endif -int __secure_computing(int this_syscall) +static void __secure_computing_strict(int this_syscall) +{ + int *syscall_whitelist = mode1_syscalls; +#ifdef CONFIG_COMPAT + if (is_compat_task()) + syscall_whitelist = mode1_syscalls_32; +#endif + do { + if (*syscall_whitelist == this_syscall) + return; + } while (*++syscall_whitelist); + +#ifdef SECCOMP_DEBUG + dump_stack(); +#endif + audit_seccomp(this_syscall, SIGKILL, SECCOMP_RET_KILL); + do_exit(SIGKILL); +} + +#ifndef CONFIG_HAVE_ARCH_SECCOMP_FILTER +void secure_computing_strict(int this_syscall) +{ + int mode = current->seccomp.mode; + + if (mode == 0) + return; + else if (mode == SECCOMP_MODE_STRICT) + __secure_computing_strict(this_syscall); + else + BUG(); +} +#else +int __secure_computing(void) { + struct pt_regs *regs = task_pt_regs(current); + int this_syscall = syscall_get_nr(current, regs); int exit_sig = 0; - int *syscall; u32 ret; /* @@ -578,23 +614,12 @@ int __secure_computing(int this_syscall) switch (current->seccomp.mode) { case SECCOMP_MODE_STRICT: - syscall = mode1_syscalls; -#ifdef CONFIG_COMPAT - if (is_compat_task()) - syscall = mode1_syscalls_32; -#endif - do { - if (*syscall == this_syscall) - return 0; - } while (*++syscall); - exit_sig = SIGKILL; - ret = SECCOMP_RET_KILL; - break; + __secure_computing_strict(this_syscall); + return 0; #ifdef CONFIG_SECCOMP_FILTER case SECCOMP_MODE_FILTER: { int data; - struct pt_regs *regs = task_pt_regs(current); - ret = seccomp_run_filters(this_syscall); + ret = seccomp_run_filters(); data = ret & SECCOMP_RET_DATA; ret &= SECCOMP_RET_ACTION; switch (ret) { @@ -652,9 +677,10 @@ int __secure_computing(int this_syscall) #ifdef CONFIG_SECCOMP_FILTER skip: audit_seccomp(this_syscall, exit_sig, ret); -#endif return -1; +#endif } +#endif /* CONFIG_HAVE_ARCH_SECCOMP_FILTER */ long prctl_get_seccomp(void) { -- cgit v1.2.3 From 13aa72f0fd0a9f98a41cefb662487269e2f1ad65 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 21 Jul 2014 18:49:15 -0700 Subject: seccomp: Refactor the filter callback and the API The reason I did this is to add a seccomp API that will be usable for an x86 fast path. The x86 entry code needs to use a rather expensive slow path for a syscall that might be visible to things like ptrace. By splitting seccomp into two phases, we can check whether we need the slow path and then use the fast path in if the filter allows the syscall or just returns some errno. As a side effect, I think the new code is much easier to understand than the old code. This has one user-visible effect: the audit record written for SECCOMP_RET_TRACE is now a simple indication that SECCOMP_RET_TRACE happened. It used to depend in a complicated way on what the tracer did. I couldn't make much sense of it. Signed-off-by: Andy Lutomirski Signed-off-by: Kees Cook --- include/linux/seccomp.h | 6 ++ kernel/seccomp.c | 190 +++++++++++++++++++++++++++++++----------------- 2 files changed, 130 insertions(+), 66 deletions(-) (limited to 'include/linux') diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index aa3c040230be..38851085e481 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h @@ -35,6 +35,12 @@ static inline int secure_computing(void) return __secure_computing(); return 0; } + +#define SECCOMP_PHASE1_OK 0 +#define SECCOMP_PHASE1_SKIP 1 + +extern u32 seccomp_phase1(void); +int seccomp_phase2(u32 phase1_result); #else extern void secure_computing_strict(int this_syscall); #endif diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 5e738e0dd2e9..6c8528ce9df9 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -21,8 +21,6 @@ #include #include -/* #define SECCOMP_DEBUG 1 */ - #ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER #include #endif @@ -601,10 +599,21 @@ void secure_computing_strict(int this_syscall) #else int __secure_computing(void) { - struct pt_regs *regs = task_pt_regs(current); - int this_syscall = syscall_get_nr(current, regs); - int exit_sig = 0; - u32 ret; + u32 phase1_result = seccomp_phase1(); + + if (likely(phase1_result == SECCOMP_PHASE1_OK)) + return 0; + else if (likely(phase1_result == SECCOMP_PHASE1_SKIP)) + return -1; + else + return seccomp_phase2(phase1_result); +} + +#ifdef CONFIG_SECCOMP_FILTER +static u32 __seccomp_phase1_filter(int this_syscall, struct pt_regs *regs) +{ + u32 filter_ret, action; + int data; /* * Make sure that any changes to mode from another thread have @@ -612,73 +621,122 @@ int __secure_computing(void) */ rmb(); - switch (current->seccomp.mode) { + filter_ret = seccomp_run_filters(); + data = filter_ret & SECCOMP_RET_DATA; + action = filter_ret & SECCOMP_RET_ACTION; + + switch (action) { + case SECCOMP_RET_ERRNO: + /* Set the low-order 16-bits as a errno. */ + syscall_set_return_value(current, regs, + -data, 0); + goto skip; + + case SECCOMP_RET_TRAP: + /* Show the handler the original registers. */ + syscall_rollback(current, regs); + /* Let the filter pass back 16 bits of data. */ + seccomp_send_sigsys(this_syscall, data); + goto skip; + + case SECCOMP_RET_TRACE: + return filter_ret; /* Save the rest for phase 2. */ + + case SECCOMP_RET_ALLOW: + return SECCOMP_PHASE1_OK; + + case SECCOMP_RET_KILL: + default: + audit_seccomp(this_syscall, SIGSYS, action); + do_exit(SIGSYS); + } + + unreachable(); + +skip: + audit_seccomp(this_syscall, 0, action); + return SECCOMP_PHASE1_SKIP; +} +#endif + +/** + * seccomp_phase1() - run fast path seccomp checks on the current syscall + * + * This only reads pt_regs via the syscall_xyz helpers. The only change + * it will make to pt_regs is via syscall_set_return_value, and it will + * only do that if it returns SECCOMP_PHASE1_SKIP. + * + * It may also call do_exit or force a signal; these actions must be + * safe. + * + * If it returns SECCOMP_PHASE1_OK, the syscall passes checks and should + * be processed normally. + * + * If it returns SECCOMP_PHASE1_SKIP, then the syscall should not be + * invoked. In this case, seccomp_phase1 will have set the return value + * using syscall_set_return_value. + * + * If it returns anything else, then the return value should be passed + * to seccomp_phase2 from a context in which ptrace hooks are safe. + */ +u32 seccomp_phase1(void) +{ + int mode = current->seccomp.mode; + struct pt_regs *regs = task_pt_regs(current); + int this_syscall = syscall_get_nr(current, regs); + + switch (mode) { case SECCOMP_MODE_STRICT: - __secure_computing_strict(this_syscall); - return 0; + __secure_computing_strict(this_syscall); /* may call do_exit */ + return SECCOMP_PHASE1_OK; #ifdef CONFIG_SECCOMP_FILTER - case SECCOMP_MODE_FILTER: { - int data; - ret = seccomp_run_filters(); - data = ret & SECCOMP_RET_DATA; - ret &= SECCOMP_RET_ACTION; - switch (ret) { - case SECCOMP_RET_ERRNO: - /* Set the low-order 16-bits as a errno. */ - syscall_set_return_value(current, regs, - -data, 0); - goto skip; - case SECCOMP_RET_TRAP: - /* Show the handler the original registers. */ - syscall_rollback(current, regs); - /* Let the filter pass back 16 bits of data. */ - seccomp_send_sigsys(this_syscall, data); - goto skip; - case SECCOMP_RET_TRACE: - /* Skip these calls if there is no tracer. */ - if (!ptrace_event_enabled(current, PTRACE_EVENT_SECCOMP)) { - syscall_set_return_value(current, regs, - -ENOSYS, 0); - goto skip; - } - /* Allow the BPF to provide the event message */ - ptrace_event(PTRACE_EVENT_SECCOMP, data); - /* - * The delivery of a fatal signal during event - * notification may silently skip tracer notification. - * Terminating the task now avoids executing a system - * call that may not be intended. - */ - if (fatal_signal_pending(current)) - break; - if (syscall_get_nr(current, regs) < 0) - goto skip; /* Explicit request to skip. */ - - return 0; - case SECCOMP_RET_ALLOW: - return 0; - case SECCOMP_RET_KILL: - default: - break; - } - exit_sig = SIGSYS; - break; - } + case SECCOMP_MODE_FILTER: + return __seccomp_phase1_filter(this_syscall, regs); #endif default: BUG(); } +} -#ifdef SECCOMP_DEBUG - dump_stack(); -#endif - audit_seccomp(this_syscall, exit_sig, ret); - do_exit(exit_sig); -#ifdef CONFIG_SECCOMP_FILTER -skip: - audit_seccomp(this_syscall, exit_sig, ret); - return -1; -#endif +/** + * seccomp_phase2() - finish slow path seccomp work for the current syscall + * @phase1_result: The return value from seccomp_phase1() + * + * This must be called from a context in which ptrace hooks can be used. + * + * Returns 0 if the syscall should be processed or -1 to skip the syscall. + */ +int seccomp_phase2(u32 phase1_result) +{ + struct pt_regs *regs = task_pt_regs(current); + u32 action = phase1_result & SECCOMP_RET_ACTION; + int data = phase1_result & SECCOMP_RET_DATA; + + BUG_ON(action != SECCOMP_RET_TRACE); + + audit_seccomp(syscall_get_nr(current, regs), 0, action); + + /* Skip these calls if there is no tracer. */ + if (!ptrace_event_enabled(current, PTRACE_EVENT_SECCOMP)) { + syscall_set_return_value(current, regs, + -ENOSYS, 0); + return -1; + } + + /* Allow the BPF to provide the event message */ + ptrace_event(PTRACE_EVENT_SECCOMP, data); + /* + * The delivery of a fatal signal during event + * notification may silently skip tracer notification. + * Terminating the task now avoids executing a system + * call that may not be intended. + */ + if (fatal_signal_pending(current)) + do_exit(SIGSYS); + if (syscall_get_nr(current, regs) < 0) + return -1; /* Explicit request to skip. */ + + return 0; } #endif /* CONFIG_HAVE_ARCH_SECCOMP_FILTER */ -- cgit v1.2.3 From d39bd00deabe57420f2a3669eb71b0e0c4997184 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 21 Jul 2014 18:49:16 -0700 Subject: seccomp: Allow arch code to provide seccomp_data populate_seccomp_data is expensive: it works by inspecting task_pt_regs and various other bits to piece together all the information, and it's does so in multiple partially redundant steps. Arch-specific code in the syscall entry path can do much better. Admittedly this adds a bit of additional room for error, but the speedup should be worth it. Signed-off-by: Andy Lutomirski Signed-off-by: Kees Cook --- include/linux/seccomp.h | 2 +- kernel/seccomp.c | 32 +++++++++++++++++++------------- 2 files changed, 20 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index 38851085e481..a19ddacdac30 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h @@ -39,7 +39,7 @@ static inline int secure_computing(void) #define SECCOMP_PHASE1_OK 0 #define SECCOMP_PHASE1_SKIP 1 -extern u32 seccomp_phase1(void); +extern u32 seccomp_phase1(struct seccomp_data *sd); int seccomp_phase2(u32 phase1_result); #else extern void secure_computing_strict(int this_syscall); diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 6c8528ce9df9..1285cb205d49 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -173,10 +173,10 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen) * * Returns valid seccomp BPF response codes. */ -static u32 seccomp_run_filters(void) +static u32 seccomp_run_filters(struct seccomp_data *sd) { struct seccomp_filter *f = ACCESS_ONCE(current->seccomp.filter); - struct seccomp_data sd; + struct seccomp_data sd_local; u32 ret = SECCOMP_RET_ALLOW; /* Ensure unexpected behavior doesn't result in failing open. */ @@ -186,14 +186,17 @@ static u32 seccomp_run_filters(void) /* Make sure cross-thread synced filter points somewhere sane. */ smp_read_barrier_depends(); - populate_seccomp_data(&sd); + if (!sd) { + populate_seccomp_data(&sd_local); + sd = &sd_local; + } /* * All filters in the list are evaluated and the lowest BPF return * value always takes priority (ignoring the DATA). */ for (; f; f = f->prev) { - u32 cur_ret = BPF_PROG_RUN(f->prog, (void *)&sd); + u32 cur_ret = BPF_PROG_RUN(f->prog, (void *)sd); if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION)) ret = cur_ret; @@ -599,7 +602,7 @@ void secure_computing_strict(int this_syscall) #else int __secure_computing(void) { - u32 phase1_result = seccomp_phase1(); + u32 phase1_result = seccomp_phase1(NULL); if (likely(phase1_result == SECCOMP_PHASE1_OK)) return 0; @@ -610,7 +613,7 @@ int __secure_computing(void) } #ifdef CONFIG_SECCOMP_FILTER -static u32 __seccomp_phase1_filter(int this_syscall, struct pt_regs *regs) +static u32 __seccomp_phase1_filter(int this_syscall, struct seccomp_data *sd) { u32 filter_ret, action; int data; @@ -621,20 +624,20 @@ static u32 __seccomp_phase1_filter(int this_syscall, struct pt_regs *regs) */ rmb(); - filter_ret = seccomp_run_filters(); + filter_ret = seccomp_run_filters(sd); data = filter_ret & SECCOMP_RET_DATA; action = filter_ret & SECCOMP_RET_ACTION; switch (action) { case SECCOMP_RET_ERRNO: /* Set the low-order 16-bits as a errno. */ - syscall_set_return_value(current, regs, + syscall_set_return_value(current, task_pt_regs(current), -data, 0); goto skip; case SECCOMP_RET_TRAP: /* Show the handler the original registers. */ - syscall_rollback(current, regs); + syscall_rollback(current, task_pt_regs(current)); /* Let the filter pass back 16 bits of data. */ seccomp_send_sigsys(this_syscall, data); goto skip; @@ -661,11 +664,14 @@ skip: /** * seccomp_phase1() - run fast path seccomp checks on the current syscall + * @arg sd: The seccomp_data or NULL * * This only reads pt_regs via the syscall_xyz helpers. The only change * it will make to pt_regs is via syscall_set_return_value, and it will * only do that if it returns SECCOMP_PHASE1_SKIP. * + * If sd is provided, it will not read pt_regs at all. + * * It may also call do_exit or force a signal; these actions must be * safe. * @@ -679,11 +685,11 @@ skip: * If it returns anything else, then the return value should be passed * to seccomp_phase2 from a context in which ptrace hooks are safe. */ -u32 seccomp_phase1(void) +u32 seccomp_phase1(struct seccomp_data *sd) { int mode = current->seccomp.mode; - struct pt_regs *regs = task_pt_regs(current); - int this_syscall = syscall_get_nr(current, regs); + int this_syscall = sd ? sd->nr : + syscall_get_nr(current, task_pt_regs(current)); switch (mode) { case SECCOMP_MODE_STRICT: @@ -691,7 +697,7 @@ u32 seccomp_phase1(void) return SECCOMP_PHASE1_OK; #ifdef CONFIG_SECCOMP_FILTER case SECCOMP_MODE_FILTER: - return __seccomp_phase1_filter(this_syscall, regs); + return __seccomp_phase1_filter(this_syscall, sd); #endif default: BUG(); -- cgit v1.2.3 From 26ae4980b5e4739af93543a147facb421fb78ae8 Mon Sep 17 00:00:00 2001 From: Aaron Sierra Date: Fri, 15 Aug 2014 16:07:48 -0500 Subject: fsl_ifc: Fix csor_ext position in fsl_ifc_regs According to Freescale manuals, the IFC_CSORn_EXT register is located immediately _after_ the bank's IFC_CSORn register. This patch adjusts the csor_ext member of and reserved register arrays immediately surrounding the csor_cs structure to provide proper access to this register. Signed-off-by: Aaron Sierra Acked-by: Prabhakar Kushwaha Signed-off-by: Scott Wood --- include/linux/fsl_ifc.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsl_ifc.h b/include/linux/fsl_ifc.h index f49ddb1b2273..84d60cb841b1 100644 --- a/include/linux/fsl_ifc.h +++ b/include/linux/fsl_ifc.h @@ -781,13 +781,13 @@ struct fsl_ifc_regs { __be32 amask; u32 res4[0x2]; } amask_cs[FSL_IFC_BANK_COUNT]; - u32 res5[0x17]; + u32 res5[0x18]; struct { - __be32 csor_ext; __be32 csor; + __be32 csor_ext; u32 res6; } csor_cs[FSL_IFC_BANK_COUNT]; - u32 res7[0x19]; + u32 res7[0x18]; struct { __be32 ftim[4]; u32 res8[0x8]; -- cgit v1.2.3 From 940001762ac514810e305aab356983829e5fa82a Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Wed, 3 Sep 2014 09:22:36 +0800 Subject: lib/rhashtable: allow user to set the minimum shifts of shrinking Although rhashtable library allows user to specify a quiet big size for user's created hash table, the table may be shrunk to a very small size - HASH_MIN_SIZE(4) after object is removed from the table at the first time. Subsequently, even if the total amount of objects saved in the table is quite lower than user's initial setting in a long time, the hash table size is still dynamically adjusted by rhashtable_shrink() or rhashtable_expand() each time object is inserted or removed from the table. However, as synchronize_rcu() has to be called when table is shrunk or expanded by the two functions, we should permit user to set the minimum table size through configuring the minimum number of shifts according to user specific requirement, avoiding these expensive actions of shrinking or expanding because of calling synchronize_rcu(). Signed-off-by: Ying Xue Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 2 ++ lib/rhashtable.c | 12 ++++++++---- 2 files changed, 10 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 36826c0166c5..fb298e9d6d3a 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -44,6 +44,7 @@ struct rhashtable; * @head_offset: Offset of rhash_head in struct to be hashed * @hash_rnd: Seed to use while hashing * @max_shift: Maximum number of shifts while expanding + * @min_shift: Minimum number of shifts while shrinking * @hashfn: Function to hash key * @obj_hashfn: Function to hash object * @grow_decision: If defined, may return true if table should expand @@ -57,6 +58,7 @@ struct rhashtable_params { size_t head_offset; u32 hash_rnd; size_t max_shift; + size_t min_shift; rht_hashfn_t hashfn; rht_obj_hashfn_t obj_hashfn; bool (*grow_decision)(const struct rhashtable *ht, diff --git a/lib/rhashtable.c b/lib/rhashtable.c index a2c78810ebc1..8dfec3f26d4c 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -298,7 +298,7 @@ int rhashtable_shrink(struct rhashtable *ht, gfp_t flags) ASSERT_RHT_MUTEX(ht); - if (tbl->size <= HASH_MIN_SIZE) + if (ht->shift <= ht->p.min_shift) return 0; ntbl = bucket_table_alloc(tbl->size / 2, flags); @@ -506,9 +506,10 @@ void *rhashtable_lookup_compare(const struct rhashtable *ht, u32 hash, } EXPORT_SYMBOL_GPL(rhashtable_lookup_compare); -static size_t rounded_hashtable_size(unsigned int nelem) +static size_t rounded_hashtable_size(struct rhashtable_params *params) { - return max(roundup_pow_of_two(nelem * 4 / 3), HASH_MIN_SIZE); + return max(roundup_pow_of_two(params->nelem_hint * 4 / 3), + 1UL << params->min_shift); } /** @@ -566,8 +567,11 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params) (!params->key_len && !params->obj_hashfn)) return -EINVAL; + params->min_shift = max_t(size_t, params->min_shift, + ilog2(HASH_MIN_SIZE)); + if (params->nelem_hint) - size = rounded_hashtable_size(params->nelem_hint); + size = rounded_hashtable_size(params); tbl = bucket_table_alloc(size, GFP_KERNEL); if (tbl == NULL) -- cgit v1.2.3 From 03e9f0cac5da6af85758276cb4624caf5911f2b9 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 3 Sep 2014 13:02:56 +0200 Subject: pinctrl: clean up after enable refactoring commit 2243a87d90b42eb38bc281957df3e57c712b5e56 "pinctrl: avoid duplicated calling enable_pinmux_setting for a pin" removed the .disable callback from the struct pinmux_ops, making the .enable() callback the only remaining callback. However .enable() is a bad name as it seems to imply that a muxing can also be disabled. Rename the callback to .set_mux() and also take this opportunity to clean out any remaining mentions of .disable() from the documentation. Acked-by: Stephen Warren Acked-by: Bjorn Andersson Acked-by: Fan Wu Signed-off-by: Linus Walleij --- Documentation/pinctrl.txt | 14 ++------------ drivers/pinctrl/berlin/berlin.c | 8 ++++---- drivers/pinctrl/mvebu/pinctrl-mvebu.c | 6 +++--- drivers/pinctrl/nomadik/pinctrl-abx500.c | 6 +++--- drivers/pinctrl/nomadik/pinctrl-nomadik.c | 6 +++--- drivers/pinctrl/pinctrl-adi2.c | 6 +++--- drivers/pinctrl/pinctrl-as3722.c | 4 ++-- drivers/pinctrl/pinctrl-at91.c | 6 +++--- drivers/pinctrl/pinctrl-bcm281xx.c | 8 ++++---- drivers/pinctrl/pinctrl-bcm2835.c | 4 ++-- drivers/pinctrl/pinctrl-imx.c | 6 +++--- drivers/pinctrl/pinctrl-imx1-core.c | 6 +++--- drivers/pinctrl/pinctrl-lantiq.c | 8 ++++---- drivers/pinctrl/pinctrl-mxs.c | 6 +++--- drivers/pinctrl/pinctrl-palmas.c | 5 +++-- drivers/pinctrl/pinctrl-rockchip.c | 6 +++--- drivers/pinctrl/pinctrl-single.c | 4 ++-- drivers/pinctrl/pinctrl-st.c | 6 +++--- drivers/pinctrl/pinctrl-tb10x.c | 4 ++-- drivers/pinctrl/pinctrl-tegra-xusb.c | 8 ++++---- drivers/pinctrl/pinctrl-tegra.c | 7 ++++--- drivers/pinctrl/pinctrl-tz1090-pdc.c | 7 ++++--- drivers/pinctrl/pinctrl-tz1090.c | 6 +++--- drivers/pinctrl/pinctrl-u300.c | 6 +++--- drivers/pinctrl/pinmux.c | 10 +++++----- drivers/pinctrl/qcom/pinctrl-msm.c | 8 ++++---- drivers/pinctrl/samsung/pinctrl-exynos5440.c | 7 ++++--- drivers/pinctrl/samsung/pinctrl-samsung.c | 7 ++++--- drivers/pinctrl/sh-pfc/pinctrl.c | 6 +++--- drivers/pinctrl/sirf/pinctrl-sirf.c | 7 ++++--- drivers/pinctrl/spear/pinctrl-spear.c | 4 ++-- drivers/pinctrl/sunxi/pinctrl-sunxi.c | 8 ++++---- drivers/pinctrl/vt8500/pinctrl-wmt.c | 8 ++++---- include/linux/pinctrl/pinmux.h | 7 +++---- 34 files changed, 110 insertions(+), 115 deletions(-) (limited to 'include/linux') diff --git a/Documentation/pinctrl.txt b/Documentation/pinctrl.txt index 23f1590f49fe..b8f2147b96dd 100644 --- a/Documentation/pinctrl.txt +++ b/Documentation/pinctrl.txt @@ -702,7 +702,7 @@ static int foo_get_groups(struct pinctrl_dev *pctldev, unsigned selector, return 0; } -int foo_enable(struct pinctrl_dev *pctldev, unsigned selector, +int foo_set_mux(struct pinctrl_dev *pctldev, unsigned selector, unsigned group) { u8 regbit = (1 << selector + group); @@ -711,21 +711,11 @@ int foo_enable(struct pinctrl_dev *pctldev, unsigned selector, return 0; } -void foo_disable(struct pinctrl_dev *pctldev, unsigned selector, - unsigned group) -{ - u8 regbit = (1 << selector + group); - - writeb((readb(MUX) & ~(regbit)), MUX) - return 0; -} - struct pinmux_ops foo_pmxops = { .get_functions_count = foo_get_functions_count, .get_function_name = foo_get_fname, .get_function_groups = foo_get_groups, - .enable = foo_enable, - .disable = foo_disable, + .set_mux = foo_set_mux, }; /* Pinmux operations are handled by some pin controller */ diff --git a/drivers/pinctrl/berlin/berlin.c b/drivers/pinctrl/berlin/berlin.c index 86db2235ab00..61f1bf0e60ba 100644 --- a/drivers/pinctrl/berlin/berlin.c +++ b/drivers/pinctrl/berlin/berlin.c @@ -170,9 +170,9 @@ berlin_pinctrl_find_function_by_name(struct berlin_pinctrl *pctrl, return NULL; } -static int berlin_pinmux_enable(struct pinctrl_dev *pctrl_dev, - unsigned function, - unsigned group) +static int berlin_pinmux_set(struct pinctrl_dev *pctrl_dev, + unsigned function, + unsigned group) { struct berlin_pinctrl *pctrl = pinctrl_dev_get_drvdata(pctrl_dev); const struct berlin_desc_group *group_desc = pctrl->desc->groups + group; @@ -197,7 +197,7 @@ static const struct pinmux_ops berlin_pinmux_ops = { .get_functions_count = &berlin_pinmux_get_functions_count, .get_function_name = &berlin_pinmux_get_function_name, .get_function_groups = &berlin_pinmux_get_function_groups, - .enable = &berlin_pinmux_enable, + .set_mux = &berlin_pinmux_set, }; static int berlin_pinctrl_add_function(struct berlin_pinctrl *pctrl, diff --git a/drivers/pinctrl/mvebu/pinctrl-mvebu.c b/drivers/pinctrl/mvebu/pinctrl-mvebu.c index 9908374f8f92..f3b426cdaf8f 100644 --- a/drivers/pinctrl/mvebu/pinctrl-mvebu.c +++ b/drivers/pinctrl/mvebu/pinctrl-mvebu.c @@ -259,8 +259,8 @@ static int mvebu_pinmux_get_groups(struct pinctrl_dev *pctldev, unsigned fid, return 0; } -static int mvebu_pinmux_enable(struct pinctrl_dev *pctldev, unsigned fid, - unsigned gid) +static int mvebu_pinmux_set(struct pinctrl_dev *pctldev, unsigned fid, + unsigned gid) { struct mvebu_pinctrl *pctl = pinctrl_dev_get_drvdata(pctldev); struct mvebu_pinctrl_function *func = &pctl->functions[fid]; @@ -344,7 +344,7 @@ static const struct pinmux_ops mvebu_pinmux_ops = { .get_function_groups = mvebu_pinmux_get_groups, .gpio_request_enable = mvebu_pinmux_gpio_request_enable, .gpio_set_direction = mvebu_pinmux_gpio_set_direction, - .enable = mvebu_pinmux_enable, + .set_mux = mvebu_pinmux_set, }; static int mvebu_pinctrl_get_groups_count(struct pinctrl_dev *pctldev) diff --git a/drivers/pinctrl/nomadik/pinctrl-abx500.c b/drivers/pinctrl/nomadik/pinctrl-abx500.c index a53a689a2bfa..b0289824cf73 100644 --- a/drivers/pinctrl/nomadik/pinctrl-abx500.c +++ b/drivers/pinctrl/nomadik/pinctrl-abx500.c @@ -709,8 +709,8 @@ static int abx500_pmx_get_func_groups(struct pinctrl_dev *pctldev, return 0; } -static int abx500_pmx_enable(struct pinctrl_dev *pctldev, unsigned function, - unsigned group) +static int abx500_pmx_set(struct pinctrl_dev *pctldev, unsigned function, + unsigned group) { struct abx500_pinctrl *pct = pinctrl_dev_get_drvdata(pctldev); struct gpio_chip *chip = &pct->chip; @@ -784,7 +784,7 @@ static const struct pinmux_ops abx500_pinmux_ops = { .get_functions_count = abx500_pmx_get_funcs_cnt, .get_function_name = abx500_pmx_get_func_name, .get_function_groups = abx500_pmx_get_func_groups, - .enable = abx500_pmx_enable, + .set_mux = abx500_pmx_set, .gpio_request_enable = abx500_gpio_request_enable, .gpio_disable_free = abx500_gpio_disable_free, }; diff --git a/drivers/pinctrl/nomadik/pinctrl-nomadik.c b/drivers/pinctrl/nomadik/pinctrl-nomadik.c index e7cab07eef47..c093d75a022a 100644 --- a/drivers/pinctrl/nomadik/pinctrl-nomadik.c +++ b/drivers/pinctrl/nomadik/pinctrl-nomadik.c @@ -1647,8 +1647,8 @@ static int nmk_pmx_get_func_groups(struct pinctrl_dev *pctldev, return 0; } -static int nmk_pmx_enable(struct pinctrl_dev *pctldev, unsigned function, - unsigned group) +static int nmk_pmx_set(struct pinctrl_dev *pctldev, unsigned function, + unsigned group) { struct nmk_pinctrl *npct = pinctrl_dev_get_drvdata(pctldev); const struct nmk_pingroup *g; @@ -1810,7 +1810,7 @@ static const struct pinmux_ops nmk_pinmux_ops = { .get_functions_count = nmk_pmx_get_funcs_cnt, .get_function_name = nmk_pmx_get_func_name, .get_function_groups = nmk_pmx_get_func_groups, - .enable = nmk_pmx_enable, + .set_mux = nmk_pmx_set, .gpio_request_enable = nmk_gpio_request_enable, .gpio_disable_free = nmk_gpio_disable_free, }; diff --git a/drivers/pinctrl/pinctrl-adi2.c b/drivers/pinctrl/pinctrl-adi2.c index b092b93c67a1..e02943b0285d 100644 --- a/drivers/pinctrl/pinctrl-adi2.c +++ b/drivers/pinctrl/pinctrl-adi2.c @@ -619,8 +619,8 @@ static struct pinctrl_ops adi_pctrl_ops = { .get_group_pins = adi_get_group_pins, }; -static int adi_pinmux_enable(struct pinctrl_dev *pctldev, unsigned func_id, - unsigned group_id) +static int adi_pinmux_set(struct pinctrl_dev *pctldev, unsigned func_id, + unsigned group_id) { struct adi_pinctrl *pinctrl = pinctrl_dev_get_drvdata(pctldev); struct gpio_port *port; @@ -698,7 +698,7 @@ static int adi_pinmux_request_gpio(struct pinctrl_dev *pctldev, } static struct pinmux_ops adi_pinmux_ops = { - .enable = adi_pinmux_enable, + .set_mux = adi_pinmux_set, .get_functions_count = adi_pinmux_get_funcs_count, .get_function_name = adi_pinmux_get_func_name, .get_function_groups = adi_pinmux_get_groups, diff --git a/drivers/pinctrl/pinctrl-as3722.c b/drivers/pinctrl/pinctrl-as3722.c index 0e4ec91f4d49..1f790a4b83fe 100644 --- a/drivers/pinctrl/pinctrl-as3722.c +++ b/drivers/pinctrl/pinctrl-as3722.c @@ -230,7 +230,7 @@ static int as3722_pinctrl_get_func_groups(struct pinctrl_dev *pctldev, return 0; } -static int as3722_pinctrl_enable(struct pinctrl_dev *pctldev, unsigned function, +static int as3722_pinctrl_set(struct pinctrl_dev *pctldev, unsigned function, unsigned group) { struct as3722_pctrl_info *as_pci = pinctrl_dev_get_drvdata(pctldev); @@ -327,7 +327,7 @@ static const struct pinmux_ops as3722_pinmux_ops = { .get_functions_count = as3722_pinctrl_get_funcs_count, .get_function_name = as3722_pinctrl_get_func_name, .get_function_groups = as3722_pinctrl_get_func_groups, - .enable = as3722_pinctrl_enable, + .set_mux = as3722_pinctrl_set, .gpio_request_enable = as3722_pinctrl_gpio_request_enable, .gpio_set_direction = as3722_pinctrl_gpio_set_direction, }; diff --git a/drivers/pinctrl/pinctrl-at91.c b/drivers/pinctrl/pinctrl-at91.c index af1ba4fc150d..4839c36a3a1c 100644 --- a/drivers/pinctrl/pinctrl-at91.c +++ b/drivers/pinctrl/pinctrl-at91.c @@ -554,8 +554,8 @@ static void at91_mux_gpio_enable(void __iomem *pio, unsigned mask, bool input) writel_relaxed(mask, pio + (input ? PIO_ODR : PIO_OER)); } -static int at91_pmx_enable(struct pinctrl_dev *pctldev, unsigned selector, - unsigned group) +static int at91_pmx_set(struct pinctrl_dev *pctldev, unsigned selector, + unsigned group) { struct at91_pinctrl *info = pinctrl_dev_get_drvdata(pctldev); const struct at91_pmx_pin *pins_conf = info->groups[group].pins_conf; @@ -684,7 +684,7 @@ static const struct pinmux_ops at91_pmx_ops = { .get_functions_count = at91_pmx_get_funcs_count, .get_function_name = at91_pmx_get_func_name, .get_function_groups = at91_pmx_get_groups, - .enable = at91_pmx_enable, + .set_mux = at91_pmx_set, .gpio_request_enable = at91_gpio_request_enable, .gpio_disable_free = at91_gpio_disable_free, }; diff --git a/drivers/pinctrl/pinctrl-bcm281xx.c b/drivers/pinctrl/pinctrl-bcm281xx.c index c5ca9e633fff..a26e0c2ba33e 100644 --- a/drivers/pinctrl/pinctrl-bcm281xx.c +++ b/drivers/pinctrl/pinctrl-bcm281xx.c @@ -1055,9 +1055,9 @@ static int bcm281xx_pinctrl_get_fcn_groups(struct pinctrl_dev *pctldev, return 0; } -static int bcm281xx_pinmux_enable(struct pinctrl_dev *pctldev, - unsigned function, - unsigned group) +static int bcm281xx_pinmux_set(struct pinctrl_dev *pctldev, + unsigned function, + unsigned group) { struct bcm281xx_pinctrl_data *pdata = pinctrl_dev_get_drvdata(pctldev); const struct bcm281xx_pin_function *f = &pdata->functions[function]; @@ -1084,7 +1084,7 @@ static struct pinmux_ops bcm281xx_pinctrl_pinmux_ops = { .get_functions_count = bcm281xx_pinctrl_get_fcns_count, .get_function_name = bcm281xx_pinctrl_get_fcn_name, .get_function_groups = bcm281xx_pinctrl_get_fcn_groups, - .enable = bcm281xx_pinmux_enable, + .set_mux = bcm281xx_pinmux_set, }; static int bcm281xx_pinctrl_pin_config_get(struct pinctrl_dev *pctldev, diff --git a/drivers/pinctrl/pinctrl-bcm2835.c b/drivers/pinctrl/pinctrl-bcm2835.c index 5bcfd7ace0cd..eabba02f71f9 100644 --- a/drivers/pinctrl/pinctrl-bcm2835.c +++ b/drivers/pinctrl/pinctrl-bcm2835.c @@ -830,7 +830,7 @@ static int bcm2835_pmx_get_function_groups(struct pinctrl_dev *pctldev, return 0; } -static int bcm2835_pmx_enable(struct pinctrl_dev *pctldev, +static int bcm2835_pmx_set(struct pinctrl_dev *pctldev, unsigned func_selector, unsigned group_selector) { @@ -869,7 +869,7 @@ static const struct pinmux_ops bcm2835_pmx_ops = { .get_functions_count = bcm2835_pmx_get_functions_count, .get_function_name = bcm2835_pmx_get_function_name, .get_function_groups = bcm2835_pmx_get_function_groups, - .enable = bcm2835_pmx_enable, + .set_mux = bcm2835_pmx_set, .gpio_disable_free = bcm2835_pmx_gpio_disable_free, .gpio_set_direction = bcm2835_pmx_gpio_set_direction, }; diff --git a/drivers/pinctrl/pinctrl-imx.c b/drivers/pinctrl/pinctrl-imx.c index 946d594a64dd..570e5acb4a6a 100644 --- a/drivers/pinctrl/pinctrl-imx.c +++ b/drivers/pinctrl/pinctrl-imx.c @@ -179,8 +179,8 @@ static const struct pinctrl_ops imx_pctrl_ops = { }; -static int imx_pmx_enable(struct pinctrl_dev *pctldev, unsigned selector, - unsigned group) +static int imx_pmx_set(struct pinctrl_dev *pctldev, unsigned selector, + unsigned group) { struct imx_pinctrl *ipctl = pinctrl_dev_get_drvdata(pctldev); const struct imx_pinctrl_soc_info *info = ipctl->info; @@ -298,7 +298,7 @@ static const struct pinmux_ops imx_pmx_ops = { .get_functions_count = imx_pmx_get_funcs_count, .get_function_name = imx_pmx_get_func_name, .get_function_groups = imx_pmx_get_groups, - .enable = imx_pmx_enable, + .set_mux = imx_pmx_set, }; static int imx_pinconf_get(struct pinctrl_dev *pctldev, diff --git a/drivers/pinctrl/pinctrl-imx1-core.c b/drivers/pinctrl/pinctrl-imx1-core.c index 483420757c9f..176a3e62f1cf 100644 --- a/drivers/pinctrl/pinctrl-imx1-core.c +++ b/drivers/pinctrl/pinctrl-imx1-core.c @@ -298,8 +298,8 @@ static const struct pinctrl_ops imx1_pctrl_ops = { }; -static int imx1_pmx_enable(struct pinctrl_dev *pctldev, unsigned selector, - unsigned group) +static int imx1_pmx_set(struct pinctrl_dev *pctldev, unsigned selector, + unsigned group) { struct imx1_pinctrl *ipctl = pinctrl_dev_get_drvdata(pctldev); const struct imx1_pinctrl_soc_info *info = ipctl->info; @@ -385,7 +385,7 @@ static const struct pinmux_ops imx1_pmx_ops = { .get_functions_count = imx1_pmx_get_funcs_count, .get_function_name = imx1_pmx_get_func_name, .get_function_groups = imx1_pmx_get_groups, - .enable = imx1_pmx_enable, + .set_mux = imx1_pmx_set, }; static int imx1_pinconf_get(struct pinctrl_dev *pctldev, diff --git a/drivers/pinctrl/pinctrl-lantiq.c b/drivers/pinctrl/pinctrl-lantiq.c index d22ca252b80d..296e5b37f768 100644 --- a/drivers/pinctrl/pinctrl-lantiq.c +++ b/drivers/pinctrl/pinctrl-lantiq.c @@ -257,9 +257,9 @@ static int match_group_mux(const struct ltq_pin_group *grp, return ret; } -static int ltq_pmx_enable(struct pinctrl_dev *pctrldev, - unsigned func, - unsigned group) +static int ltq_pmx_set(struct pinctrl_dev *pctrldev, + unsigned func, + unsigned group) { struct ltq_pinmux_info *info = pinctrl_dev_get_drvdata(pctrldev); const struct ltq_pin_group *pin_grp = &info->grps[group]; @@ -316,7 +316,7 @@ static const struct pinmux_ops ltq_pmx_ops = { .get_functions_count = ltq_pmx_func_count, .get_function_name = ltq_pmx_func_name, .get_function_groups = ltq_pmx_get_groups, - .enable = ltq_pmx_enable, + .set_mux = ltq_pmx_set, .gpio_request_enable = ltq_pmx_gpio_request_enable, }; diff --git a/drivers/pinctrl/pinctrl-mxs.c b/drivers/pinctrl/pinctrl-mxs.c index 40c76f26998c..67035091f8fd 100644 --- a/drivers/pinctrl/pinctrl-mxs.c +++ b/drivers/pinctrl/pinctrl-mxs.c @@ -195,8 +195,8 @@ static int mxs_pinctrl_get_func_groups(struct pinctrl_dev *pctldev, return 0; } -static int mxs_pinctrl_enable(struct pinctrl_dev *pctldev, unsigned selector, - unsigned group) +static int mxs_pinctrl_set_mux(struct pinctrl_dev *pctldev, unsigned selector, + unsigned group) { struct mxs_pinctrl_data *d = pinctrl_dev_get_drvdata(pctldev); struct mxs_group *g = &d->soc->groups[group]; @@ -223,7 +223,7 @@ static const struct pinmux_ops mxs_pinmux_ops = { .get_functions_count = mxs_pinctrl_get_funcs_count, .get_function_name = mxs_pinctrl_get_func_name, .get_function_groups = mxs_pinctrl_get_func_groups, - .enable = mxs_pinctrl_enable, + .set_mux = mxs_pinctrl_set_mux, }; static int mxs_pinconf_get(struct pinctrl_dev *pctldev, diff --git a/drivers/pinctrl/pinctrl-palmas.c b/drivers/pinctrl/pinctrl-palmas.c index f13d0e78a41c..e3079d3d19fe 100644 --- a/drivers/pinctrl/pinctrl-palmas.c +++ b/drivers/pinctrl/pinctrl-palmas.c @@ -685,7 +685,8 @@ static int palmas_pinctrl_get_func_groups(struct pinctrl_dev *pctldev, return 0; } -static int palmas_pinctrl_enable(struct pinctrl_dev *pctldev, unsigned function, +static int palmas_pinctrl_set_mux(struct pinctrl_dev *pctldev, + unsigned function, unsigned group) { struct palmas_pctrl_chip_info *pci = pinctrl_dev_get_drvdata(pctldev); @@ -742,7 +743,7 @@ static const struct pinmux_ops palmas_pinmux_ops = { .get_functions_count = palmas_pinctrl_get_funcs_count, .get_function_name = palmas_pinctrl_get_func_name, .get_function_groups = palmas_pinctrl_get_func_groups, - .enable = palmas_pinctrl_enable, + .set_mux = palmas_pinctrl_set_mux, }; static int palmas_pinconf_get(struct pinctrl_dev *pctldev, diff --git a/drivers/pinctrl/pinctrl-rockchip.c b/drivers/pinctrl/pinctrl-rockchip.c index 5e8b2e04cd7a..b91b966add8f 100644 --- a/drivers/pinctrl/pinctrl-rockchip.c +++ b/drivers/pinctrl/pinctrl-rockchip.c @@ -813,8 +813,8 @@ static int rockchip_pmx_get_groups(struct pinctrl_dev *pctldev, return 0; } -static int rockchip_pmx_enable(struct pinctrl_dev *pctldev, unsigned selector, - unsigned group) +static int rockchip_pmx_set(struct pinctrl_dev *pctldev, unsigned selector, + unsigned group) { struct rockchip_pinctrl *info = pinctrl_dev_get_drvdata(pctldev); const unsigned int *pins = info->groups[group].pins; @@ -889,7 +889,7 @@ static const struct pinmux_ops rockchip_pmx_ops = { .get_functions_count = rockchip_pmx_get_funcs_count, .get_function_name = rockchip_pmx_get_func_name, .get_function_groups = rockchip_pmx_get_groups, - .enable = rockchip_pmx_enable, + .set_mux = rockchip_pmx_set, .gpio_set_direction = rockchip_pmx_gpio_set_direction, }; diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c index 784de13facf3..9032422ad18f 100644 --- a/drivers/pinctrl/pinctrl-single.c +++ b/drivers/pinctrl/pinctrl-single.c @@ -447,7 +447,7 @@ static int pcs_get_function(struct pinctrl_dev *pctldev, unsigned pin, return 0; } -static int pcs_enable(struct pinctrl_dev *pctldev, unsigned fselector, +static int pcs_set_mux(struct pinctrl_dev *pctldev, unsigned fselector, unsigned group) { struct pcs_device *pcs; @@ -519,7 +519,7 @@ static const struct pinmux_ops pcs_pinmux_ops = { .get_functions_count = pcs_get_functions_count, .get_function_name = pcs_get_function_name, .get_function_groups = pcs_get_function_groups, - .enable = pcs_enable, + .set_mux = pcs_sex_mux, .gpio_request_enable = pcs_request_gpio, }; diff --git a/drivers/pinctrl/pinctrl-st.c b/drivers/pinctrl/pinctrl-st.c index 5475374d803f..6c4c41bed1e3 100644 --- a/drivers/pinctrl/pinctrl-st.c +++ b/drivers/pinctrl/pinctrl-st.c @@ -914,8 +914,8 @@ static struct st_pio_control *st_get_pio_control( return &bank->pc; } -static int st_pmx_enable(struct pinctrl_dev *pctldev, unsigned fselector, - unsigned group) +static int st_pmx_set_mux(struct pinctrl_dev *pctldev, unsigned fselector, + unsigned group) { struct st_pinctrl *info = pinctrl_dev_get_drvdata(pctldev); struct st_pinconf *conf = info->groups[group].pin_conf; @@ -951,7 +951,7 @@ static struct pinmux_ops st_pmxops = { .get_functions_count = st_pmx_get_funcs_count, .get_function_name = st_pmx_get_fname, .get_function_groups = st_pmx_get_groups, - .enable = st_pmx_enable, + .set_mux = st_pmx_set_mux, .gpio_set_direction = st_pmx_set_gpio_direction, }; diff --git a/drivers/pinctrl/pinctrl-tb10x.c b/drivers/pinctrl/pinctrl-tb10x.c index 71c5d4f0c538..3b9bfcf717ac 100644 --- a/drivers/pinctrl/pinctrl-tb10x.c +++ b/drivers/pinctrl/pinctrl-tb10x.c @@ -697,7 +697,7 @@ static void tb10x_gpio_disable_free(struct pinctrl_dev *pctl, mutex_unlock(&state->mutex); } -static int tb10x_pctl_enable(struct pinctrl_dev *pctl, +static int tb10x_pctl_set_mux(struct pinctrl_dev *pctl, unsigned func_selector, unsigned group_selector) { struct tb10x_pinctrl *state = pinctrl_dev_get_drvdata(pctl); @@ -744,7 +744,7 @@ static struct pinmux_ops tb10x_pinmux_ops = { .get_function_groups = tb10x_get_function_groups, .gpio_request_enable = tb10x_gpio_request_enable, .gpio_disable_free = tb10x_gpio_disable_free, - .enable = tb10x_pctl_enable, + .set_mux = tb10x_pctl_set_mux, }; static struct pinctrl_desc tb10x_pindesc = { diff --git a/drivers/pinctrl/pinctrl-tegra-xusb.c b/drivers/pinctrl/pinctrl-tegra-xusb.c index a06620474845..4648c9e5c582 100644 --- a/drivers/pinctrl/pinctrl-tegra-xusb.c +++ b/drivers/pinctrl/pinctrl-tegra-xusb.c @@ -281,9 +281,9 @@ static int tegra_xusb_padctl_get_function_groups(struct pinctrl_dev *pinctrl, return 0; } -static int tegra_xusb_padctl_pinmux_enable(struct pinctrl_dev *pinctrl, - unsigned int function, - unsigned int group) +static int tegra_xusb_padctl_pinmux_set(struct pinctrl_dev *pinctrl, + unsigned int function, + unsigned int group) { struct tegra_xusb_padctl *padctl = pinctrl_dev_get_drvdata(pinctrl); const struct tegra_xusb_padctl_lane *lane; @@ -311,7 +311,7 @@ static const struct pinmux_ops tegra_xusb_padctl_pinmux_ops = { .get_functions_count = tegra_xusb_padctl_get_functions_count, .get_function_name = tegra_xusb_padctl_get_function_name, .get_function_groups = tegra_xusb_padctl_get_function_groups, - .enable = tegra_xusb_padctl_pinmux_enable, + .set_mux = tegra_xusb_padctl_pinmux_set, }; static int tegra_xusb_padctl_pinconf_group_get(struct pinctrl_dev *pinctrl, diff --git a/drivers/pinctrl/pinctrl-tegra.c b/drivers/pinctrl/pinctrl-tegra.c index 150af5503c09..e5949d51bc52 100644 --- a/drivers/pinctrl/pinctrl-tegra.c +++ b/drivers/pinctrl/pinctrl-tegra.c @@ -262,8 +262,9 @@ static int tegra_pinctrl_get_func_groups(struct pinctrl_dev *pctldev, return 0; } -static int tegra_pinctrl_enable(struct pinctrl_dev *pctldev, unsigned function, - unsigned group) +static int tegra_pinctrl_set_mux(struct pinctrl_dev *pctldev, + unsigned function, + unsigned group) { struct tegra_pmx *pmx = pinctrl_dev_get_drvdata(pctldev); const struct tegra_pingroup *g; @@ -294,7 +295,7 @@ static const struct pinmux_ops tegra_pinmux_ops = { .get_functions_count = tegra_pinctrl_get_funcs_count, .get_function_name = tegra_pinctrl_get_func_name, .get_function_groups = tegra_pinctrl_get_func_groups, - .enable = tegra_pinctrl_enable, + .set_mux = tegra_pinctrl_set_mux, }; static int tegra_pinconf_reg(struct tegra_pmx *pmx, diff --git a/drivers/pinctrl/pinctrl-tz1090-pdc.c b/drivers/pinctrl/pinctrl-tz1090-pdc.c index 41e81a35cabb..3bb6a3b78864 100644 --- a/drivers/pinctrl/pinctrl-tz1090-pdc.c +++ b/drivers/pinctrl/pinctrl-tz1090-pdc.c @@ -547,8 +547,9 @@ static void tz1090_pdc_pinctrl_mux(struct tz1090_pdc_pmx *pmx, __global_unlock2(flags); } -static int tz1090_pdc_pinctrl_enable(struct pinctrl_dev *pctldev, - unsigned int function, unsigned int group) +static int tz1090_pdc_pinctrl_set_mux(struct pinctrl_dev *pctldev, + unsigned int function, + unsigned int group) { struct tz1090_pdc_pmx *pmx = pinctrl_dev_get_drvdata(pctldev); const struct tz1090_pdc_pingroup *grp = &tz1090_pdc_groups[group]; @@ -634,7 +635,7 @@ static struct pinmux_ops tz1090_pdc_pinmux_ops = { .get_functions_count = tz1090_pdc_pinctrl_get_funcs_count, .get_function_name = tz1090_pdc_pinctrl_get_func_name, .get_function_groups = tz1090_pdc_pinctrl_get_func_groups, - .enable = tz1090_pdc_pinctrl_enable, + .set_mux = tz1090_pdc_pinctrl_set_mux, .gpio_request_enable = tz1090_pdc_pinctrl_gpio_request_enable, .gpio_disable_free = tz1090_pdc_pinctrl_gpio_disable_free, }; diff --git a/drivers/pinctrl/pinctrl-tz1090.c b/drivers/pinctrl/pinctrl-tz1090.c index 24082216842e..48d36413b99f 100644 --- a/drivers/pinctrl/pinctrl-tz1090.c +++ b/drivers/pinctrl/pinctrl-tz1090.c @@ -1415,8 +1415,8 @@ found_mux: * the effect is the same as enabling the function on each individual pin in the * group. */ -static int tz1090_pinctrl_enable(struct pinctrl_dev *pctldev, - unsigned int function, unsigned int group) +static int tz1090_pinctrl_set_mux(struct pinctrl_dev *pctldev, + unsigned int function, unsigned int group) { struct tz1090_pmx *pmx = pinctrl_dev_get_drvdata(pctldev); struct tz1090_pingroup *grp; @@ -1517,7 +1517,7 @@ static struct pinmux_ops tz1090_pinmux_ops = { .get_functions_count = tz1090_pinctrl_get_funcs_count, .get_function_name = tz1090_pinctrl_get_func_name, .get_function_groups = tz1090_pinctrl_get_func_groups, - .enable = tz1090_pinctrl_enable, + .set_mux = tz1090_pinctrl_set_mux, .gpio_request_enable = tz1090_pinctrl_gpio_request_enable, .gpio_disable_free = tz1090_pinctrl_gpio_disable_free, }; diff --git a/drivers/pinctrl/pinctrl-u300.c b/drivers/pinctrl/pinctrl-u300.c index 0959bb36450f..e9c7113d81f2 100644 --- a/drivers/pinctrl/pinctrl-u300.c +++ b/drivers/pinctrl/pinctrl-u300.c @@ -955,8 +955,8 @@ static void u300_pmx_endisable(struct u300_pmx *upmx, unsigned selector, } } -static int u300_pmx_enable(struct pinctrl_dev *pctldev, unsigned selector, - unsigned group) +static int u300_pmx_set_mux(struct pinctrl_dev *pctldev, unsigned selector, + unsigned group) { struct u300_pmx *upmx; @@ -994,7 +994,7 @@ static const struct pinmux_ops u300_pmx_ops = { .get_functions_count = u300_pmx_get_funcs_count, .get_function_name = u300_pmx_get_func_name, .get_function_groups = u300_pmx_get_groups, - .enable = u300_pmx_enable, + .set_mux = u300_pmx_set_mux, }; static int u300_pin_config_get(struct pinctrl_dev *pctldev, unsigned pin, diff --git a/drivers/pinctrl/pinmux.c b/drivers/pinctrl/pinmux.c index c055daf9a80f..b874458dcb88 100644 --- a/drivers/pinctrl/pinmux.c +++ b/drivers/pinctrl/pinmux.c @@ -41,7 +41,7 @@ int pinmux_check_ops(struct pinctrl_dev *pctldev) !ops->get_functions_count || !ops->get_function_name || !ops->get_function_groups || - !ops->enable) { + !ops->set_mux) { dev_err(pctldev->dev, "pinmux ops lacks necessary functions\n"); return -EINVAL; } @@ -445,15 +445,15 @@ int pinmux_enable_setting(struct pinctrl_setting const *setting) desc->mux_setting = &(setting->data.mux); } - ret = ops->enable(pctldev, setting->data.mux.func, - setting->data.mux.group); + ret = ops->set_mux(pctldev, setting->data.mux.func, + setting->data.mux.group); if (ret) - goto err_enable; + goto err_set_mux; return 0; -err_enable: +err_set_mux: for (i = 0; i < num_pins; i++) { desc = pin_desc_get(pctldev, pins[i]); if (desc) diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c b/drivers/pinctrl/qcom/pinctrl-msm.c index 80a64cad907d..01eab47a746f 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm.c +++ b/drivers/pinctrl/qcom/pinctrl-msm.c @@ -134,9 +134,9 @@ static int msm_get_function_groups(struct pinctrl_dev *pctldev, return 0; } -static int msm_pinmux_enable(struct pinctrl_dev *pctldev, - unsigned function, - unsigned group) +static int msm_pinmux_set_mux(struct pinctrl_dev *pctldev, + unsigned function, + unsigned group) { struct msm_pinctrl *pctrl = pinctrl_dev_get_drvdata(pctldev); const struct msm_pingroup *g; @@ -170,7 +170,7 @@ static const struct pinmux_ops msm_pinmux_ops = { .get_functions_count = msm_get_functions_count, .get_function_name = msm_get_function_name, .get_function_groups = msm_get_function_groups, - .enable = msm_pinmux_enable, + .set_mux = msm_pinmux_set_mux, }; static int msm_config_reg(struct msm_pinctrl *pctrl, diff --git a/drivers/pinctrl/samsung/pinctrl-exynos5440.c b/drivers/pinctrl/samsung/pinctrl-exynos5440.c index 603da2f9dd95..b995ec2c5d16 100644 --- a/drivers/pinctrl/samsung/pinctrl-exynos5440.c +++ b/drivers/pinctrl/samsung/pinctrl-exynos5440.c @@ -364,8 +364,9 @@ static void exynos5440_pinmux_setup(struct pinctrl_dev *pctldev, unsigned select } /* enable a specified pinmux by writing to registers */ -static int exynos5440_pinmux_enable(struct pinctrl_dev *pctldev, unsigned selector, - unsigned group) +static int exynos5440_pinmux_set_mux(struct pinctrl_dev *pctldev, + unsigned selector, + unsigned group) { exynos5440_pinmux_setup(pctldev, selector, group, true); return 0; @@ -387,7 +388,7 @@ static const struct pinmux_ops exynos5440_pinmux_ops = { .get_functions_count = exynos5440_get_functions_count, .get_function_name = exynos5440_pinmux_get_fname, .get_function_groups = exynos5440_pinmux_get_groups, - .enable = exynos5440_pinmux_enable, + .set_mux = exynos5440_pinmux_set_mux, .gpio_set_direction = exynos5440_pinmux_gpio_set_direction, }; diff --git a/drivers/pinctrl/samsung/pinctrl-samsung.c b/drivers/pinctrl/samsung/pinctrl-samsung.c index b07406da333c..4a47691c32b1 100644 --- a/drivers/pinctrl/samsung/pinctrl-samsung.c +++ b/drivers/pinctrl/samsung/pinctrl-samsung.c @@ -401,8 +401,9 @@ static void samsung_pinmux_setup(struct pinctrl_dev *pctldev, unsigned selector, } /* enable a specified pinmux by writing to registers */ -static int samsung_pinmux_enable(struct pinctrl_dev *pctldev, unsigned selector, - unsigned group) +static int samsung_pinmux_set_mux(struct pinctrl_dev *pctldev, + unsigned selector, + unsigned group) { samsung_pinmux_setup(pctldev, selector, group, true); return 0; @@ -413,7 +414,7 @@ static const struct pinmux_ops samsung_pinmux_ops = { .get_functions_count = samsung_get_functions_count, .get_function_name = samsung_pinmux_get_fname, .get_function_groups = samsung_pinmux_get_groups, - .enable = samsung_pinmux_enable, + .set_mux = samsung_pinmux_set_mux, }; /* set or get the pin config settings for a specified pin */ diff --git a/drivers/pinctrl/sh-pfc/pinctrl.c b/drivers/pinctrl/sh-pfc/pinctrl.c index 11db3ee39d40..910deaefa0ac 100644 --- a/drivers/pinctrl/sh-pfc/pinctrl.c +++ b/drivers/pinctrl/sh-pfc/pinctrl.c @@ -312,8 +312,8 @@ static int sh_pfc_get_function_groups(struct pinctrl_dev *pctldev, return 0; } -static int sh_pfc_func_enable(struct pinctrl_dev *pctldev, unsigned selector, - unsigned group) +static int sh_pfc_func_set_mux(struct pinctrl_dev *pctldev, unsigned selector, + unsigned group) { struct sh_pfc_pinctrl *pmx = pinctrl_dev_get_drvdata(pctldev); struct sh_pfc *pfc = pmx->pfc; @@ -442,7 +442,7 @@ static const struct pinmux_ops sh_pfc_pinmux_ops = { .get_functions_count = sh_pfc_get_functions_count, .get_function_name = sh_pfc_get_function_name, .get_function_groups = sh_pfc_get_function_groups, - .enable = sh_pfc_func_enable, + .set_mux = sh_pfc_func_set_mux, .gpio_request_enable = sh_pfc_gpio_request_enable, .gpio_disable_free = sh_pfc_gpio_disable_free, .gpio_set_direction = sh_pfc_gpio_set_direction, diff --git a/drivers/pinctrl/sirf/pinctrl-sirf.c b/drivers/pinctrl/sirf/pinctrl-sirf.c index 45f93f952a39..6f252fd7cb0c 100644 --- a/drivers/pinctrl/sirf/pinctrl-sirf.c +++ b/drivers/pinctrl/sirf/pinctrl-sirf.c @@ -179,8 +179,9 @@ static void sirfsoc_pinmux_endisable(struct sirfsoc_pmx *spmx, } } -static int sirfsoc_pinmux_enable(struct pinctrl_dev *pmxdev, unsigned selector, - unsigned group) +static int sirfsoc_pinmux_set_mux(struct pinctrl_dev *pmxdev, + unsigned selector, + unsigned group) { struct sirfsoc_pmx *spmx; @@ -237,7 +238,7 @@ static int sirfsoc_pinmux_request_gpio(struct pinctrl_dev *pmxdev, } static struct pinmux_ops sirfsoc_pinmux_ops = { - .enable = sirfsoc_pinmux_enable, + .set_mux = sirfsoc_pinmux_set_mux, .get_functions_count = sirfsoc_pinmux_get_funcs_count, .get_function_name = sirfsoc_pinmux_get_func_name, .get_function_groups = sirfsoc_pinmux_get_groups, diff --git a/drivers/pinctrl/spear/pinctrl-spear.c b/drivers/pinctrl/spear/pinctrl-spear.c index f72cc4e192bd..abdb05ac43dc 100644 --- a/drivers/pinctrl/spear/pinctrl-spear.c +++ b/drivers/pinctrl/spear/pinctrl-spear.c @@ -268,7 +268,7 @@ static int spear_pinctrl_endisable(struct pinctrl_dev *pctldev, return 0; } -static int spear_pinctrl_enable(struct pinctrl_dev *pctldev, unsigned function, +static int spear_pinctrl_set_mux(struct pinctrl_dev *pctldev, unsigned function, unsigned group) { return spear_pinctrl_endisable(pctldev, function, group, true); @@ -338,7 +338,7 @@ static const struct pinmux_ops spear_pinmux_ops = { .get_functions_count = spear_pinctrl_get_funcs_count, .get_function_name = spear_pinctrl_get_func_name, .get_function_groups = spear_pinctrl_get_func_groups, - .enable = spear_pinctrl_enable, + .set_mux = spear_pinctrl_set_mux, .gpio_request_enable = gpio_request_enable, .gpio_disable_free = gpio_disable_free, }; diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi.c b/drivers/pinctrl/sunxi/pinctrl-sunxi.c index 3df66e366c87..ef9d804e55de 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sunxi.c +++ b/drivers/pinctrl/sunxi/pinctrl-sunxi.c @@ -393,9 +393,9 @@ static void sunxi_pmx_set(struct pinctrl_dev *pctldev, spin_unlock_irqrestore(&pctl->lock, flags); } -static int sunxi_pmx_enable(struct pinctrl_dev *pctldev, - unsigned function, - unsigned group) +static int sunxi_pmx_set_mux(struct pinctrl_dev *pctldev, + unsigned function, + unsigned group) { struct sunxi_pinctrl *pctl = pinctrl_dev_get_drvdata(pctldev); struct sunxi_pinctrl_group *g = pctl->groups + group; @@ -441,7 +441,7 @@ static const struct pinmux_ops sunxi_pmx_ops = { .get_functions_count = sunxi_pmx_get_funcs_cnt, .get_function_name = sunxi_pmx_get_func_name, .get_function_groups = sunxi_pmx_get_func_groups, - .enable = sunxi_pmx_enable, + .set_mux = sunxi_pmx_set_mux, .gpio_set_direction = sunxi_pmx_gpio_set_direction, }; diff --git a/drivers/pinctrl/vt8500/pinctrl-wmt.c b/drivers/pinctrl/vt8500/pinctrl-wmt.c index 8cea355f9a81..d055d63309e4 100644 --- a/drivers/pinctrl/vt8500/pinctrl-wmt.c +++ b/drivers/pinctrl/vt8500/pinctrl-wmt.c @@ -131,9 +131,9 @@ static int wmt_set_pinmux(struct wmt_pinctrl_data *data, unsigned func, return 0; } -static int wmt_pmx_enable(struct pinctrl_dev *pctldev, - unsigned func_selector, - unsigned group_selector) +static int wmt_pmx_set_mux(struct pinctrl_dev *pctldev, + unsigned func_selector, + unsigned group_selector) { struct wmt_pinctrl_data *data = pinctrl_dev_get_drvdata(pctldev); u32 pinnum = data->pins[group_selector].number; @@ -168,7 +168,7 @@ static struct pinmux_ops wmt_pinmux_ops = { .get_functions_count = wmt_pmx_get_functions_count, .get_function_name = wmt_pmx_get_function_name, .get_function_groups = wmt_pmx_get_function_groups, - .enable = wmt_pmx_enable, + .set_mux = wmt_pmx_set_mux, .gpio_disable_free = wmt_pmx_gpio_disable_free, .gpio_set_direction = wmt_pmx_gpio_set_direction, }; diff --git a/include/linux/pinctrl/pinmux.h b/include/linux/pinctrl/pinmux.h index 3097aafbeb24..511bda9ed4bf 100644 --- a/include/linux/pinctrl/pinmux.h +++ b/include/linux/pinctrl/pinmux.h @@ -39,13 +39,12 @@ struct pinctrl_dev; * name can be used with the generic @pinctrl_ops to retrieve the * actual pins affected. The applicable groups will be returned in * @groups and the number of groups in @num_groups - * @enable: enable a certain muxing function with a certain pin group. The + * @set_mux: enable a certain muxing function with a certain pin group. The * driver does not need to figure out whether enabling this function * conflicts some other use of the pins in that group, such collisions * are handled by the pinmux subsystem. The @func_selector selects a * certain function whereas @group_selector selects a certain set of pins * to be used. On simple controllers the latter argument may be ignored - * @disable: disable a certain muxing selector with a certain pin group * @gpio_request_enable: requests and enables GPIO on a certain pin. * Implement this only if you can mux every pin individually as GPIO. The * affected GPIO range is passed along with an offset(pin number) into that @@ -68,8 +67,8 @@ struct pinmux_ops { unsigned selector, const char * const **groups, unsigned * const num_groups); - int (*enable) (struct pinctrl_dev *pctldev, unsigned func_selector, - unsigned group_selector); + int (*set_mux) (struct pinctrl_dev *pctldev, unsigned func_selector, + unsigned group_selector); int (*gpio_request_enable) (struct pinctrl_dev *pctldev, struct pinctrl_gpio_range *range, unsigned offset); -- cgit v1.2.3 From 3af0dbd592fe0a92002f16e341519ba03e92adf7 Mon Sep 17 00:00:00 2001 From: Sonic Zhang Date: Mon, 1 Sep 2014 11:19:52 +0800 Subject: gpio: mcp23s08 to support both device tree and platform data Device tree is not enabled in some architecture where gpio driver mcp23s08 is still required. v2-changes: - Parse device tree properties into platform data other than individual variables. v3-changes: - Use of_node in gpio_chip device structure, because the struct device * always has an of_node which is NULL when OF is not used. Signed-off-by: Sonic Zhang Reviewed-by: Alexandre Courbot Signed-off-by: Linus Walleij --- drivers/gpio/Kconfig | 1 - drivers/gpio/gpio-mcp23s08.c | 64 +++++++++++++++++++++++--------------------- include/linux/spi/mcp23s08.h | 18 +++++++++++++ 3 files changed, 52 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index ec27ec0be8c2..ec398bee7c60 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -806,7 +806,6 @@ config GPIO_MAX7301 config GPIO_MCP23S08 tristate "Microchip MCP23xxx I/O expander" - depends on OF_GPIO depends on (SPI_MASTER && !I2C) || I2C help SPI/I2C driver for Microchip MCP23S08/MCP23S17/MCP23008/MCP23017 diff --git a/drivers/gpio/gpio-mcp23s08.c b/drivers/gpio/gpio-mcp23s08.c index 6f183d9b487e..8488e2fd307c 100644 --- a/drivers/gpio/gpio-mcp23s08.c +++ b/drivers/gpio/gpio-mcp23s08.c @@ -479,7 +479,7 @@ static int mcp23s08_irq_setup(struct mcp23s08 *mcp) mutex_init(&mcp->irq_lock); - mcp->irq_domain = irq_domain_add_linear(chip->of_node, chip->ngpio, + mcp->irq_domain = irq_domain_add_linear(chip->dev->of_node, chip->ngpio, &irq_domain_simple_ops, mcp); if (!mcp->irq_domain) return -ENODEV; @@ -581,7 +581,7 @@ done: static int mcp23s08_probe_one(struct mcp23s08 *mcp, struct device *dev, void *data, unsigned addr, unsigned type, - unsigned base, unsigned pullups) + struct mcp23s08_platform_data *pdata, int cs) { int status; bool mirror = false; @@ -635,7 +635,7 @@ static int mcp23s08_probe_one(struct mcp23s08 *mcp, struct device *dev, return -EINVAL; } - mcp->chip.base = base; + mcp->chip.base = pdata->base; mcp->chip.can_sleep = true; mcp->chip.dev = dev; mcp->chip.owner = THIS_MODULE; @@ -648,11 +648,9 @@ static int mcp23s08_probe_one(struct mcp23s08 *mcp, struct device *dev, if (status < 0) goto fail; - mcp->irq_controller = of_property_read_bool(mcp->chip.of_node, - "interrupt-controller"); + mcp->irq_controller = pdata->irq_controller; if (mcp->irq && mcp->irq_controller && (type == MCP_TYPE_017)) - mirror = of_property_read_bool(mcp->chip.of_node, - "microchip,irq-mirror"); + mirror = pdata->mirror; if ((status & IOCON_SEQOP) || !(status & IOCON_HAEN) || mirror) { /* mcp23s17 has IOCON twice, make sure they are in sync */ @@ -668,7 +666,7 @@ static int mcp23s08_probe_one(struct mcp23s08 *mcp, struct device *dev, } /* configure ~100K pullups */ - status = mcp->ops->write(mcp, MCP_GPPU, pullups); + status = mcp->ops->write(mcp, MCP_GPPU, pdata->chip[cs].pullups); if (status < 0) goto fail; @@ -768,25 +766,29 @@ MODULE_DEVICE_TABLE(of, mcp23s08_i2c_of_match); static int mcp230xx_probe(struct i2c_client *client, const struct i2c_device_id *id) { - struct mcp23s08_platform_data *pdata; + struct mcp23s08_platform_data *pdata, local_pdata; struct mcp23s08 *mcp; - int status, base, pullups; + int status; const struct of_device_id *match; match = of_match_device(of_match_ptr(mcp23s08_i2c_of_match), &client->dev); - pdata = dev_get_platdata(&client->dev); - if (match || !pdata) { - base = -1; - pullups = 0; + if (match) { + pdata = &local_pdata; + pdata->base = -1; + pdata->chip[0].pullups = 0; + pdata->irq_controller = of_property_read_bool( + client->dev.of_node, + "interrupt-controller"); + pdata->mirror = of_property_read_bool(client->dev.of_node, + "microchip,irq-mirror"); client->irq = irq_of_parse_and_map(client->dev.of_node, 0); } else { - if (!gpio_is_valid(pdata->base)) { + pdata = dev_get_platdata(&client->dev); + if (!pdata || !gpio_is_valid(pdata->base)) { dev_dbg(&client->dev, "invalid platform data\n"); return -EINVAL; } - base = pdata->base; - pullups = pdata->chip[0].pullups; } mcp = kzalloc(sizeof(*mcp), GFP_KERNEL); @@ -795,7 +797,7 @@ static int mcp230xx_probe(struct i2c_client *client, mcp->irq = client->irq; status = mcp23s08_probe_one(mcp, &client->dev, client, client->addr, - id->driver_data, base, pullups); + id->driver_data, pdata, 0); if (status) goto fail; @@ -863,14 +865,12 @@ static void mcp23s08_i2c_exit(void) { } static int mcp23s08_probe(struct spi_device *spi) { - struct mcp23s08_platform_data *pdata; + struct mcp23s08_platform_data *pdata, local_pdata; unsigned addr; int chips = 0; struct mcp23s08_driver_data *data; int status, type; - unsigned base = -1, - ngpio = 0, - pullups[ARRAY_SIZE(pdata->chip)]; + unsigned ngpio = 0; const struct of_device_id *match; u32 spi_present_mask = 0; @@ -893,11 +893,18 @@ static int mcp23s08_probe(struct spi_device *spi) return -ENODEV; } + pdata = &local_pdata; + pdata->base = -1; for (addr = 0; addr < ARRAY_SIZE(pdata->chip); addr++) { - pullups[addr] = 0; + pdata->chip[addr].pullups = 0; if (spi_present_mask & (1 << addr)) chips++; } + pdata->irq_controller = of_property_read_bool( + spi->dev.of_node, + "interrupt-controller"); + pdata->mirror = of_property_read_bool(spi->dev.of_node, + "microchip,irq-mirror"); } else { type = spi_get_device_id(spi)->driver_data; pdata = dev_get_platdata(&spi->dev); @@ -917,10 +924,7 @@ static int mcp23s08_probe(struct spi_device *spi) return -EINVAL; } spi_present_mask |= 1 << addr; - pullups[addr] = pdata->chip[addr].pullups; } - - base = pdata->base; } if (!chips) @@ -938,13 +942,13 @@ static int mcp23s08_probe(struct spi_device *spi) chips--; data->mcp[addr] = &data->chip[chips]; status = mcp23s08_probe_one(data->mcp[addr], &spi->dev, spi, - 0x40 | (addr << 1), type, base, - pullups[addr]); + 0x40 | (addr << 1), type, pdata, + addr); if (status < 0) goto fail; - if (base != -1) - base += (type == MCP_TYPE_S17) ? 16 : 8; + if (pdata->base != -1) + pdata->base += (type == MCP_TYPE_S17) ? 16 : 8; ngpio += (type == MCP_TYPE_S17) ? 16 : 8; } data->ngpio = ngpio; diff --git a/include/linux/spi/mcp23s08.h b/include/linux/spi/mcp23s08.h index 2d676d5aaa89..aa07d7b32568 100644 --- a/include/linux/spi/mcp23s08.h +++ b/include/linux/spi/mcp23s08.h @@ -22,4 +22,22 @@ struct mcp23s08_platform_data { * base to base+15 (or base+31 for s17 variant). */ unsigned base; + /* Marks the device as a interrupt controller. + * NOTE: The interrupt functionality is only supported for i2c + * versions of the chips. The spi chips can also do the interrupts, + * but this is not supported by the linux driver yet. + */ + bool irq_controller; + + /* Sets the mirror flag in the IOCON register. Devices + * with two interrupt outputs (these are the devices ending with 17 and + * those that have 16 IOs) have two IO banks: IO 0-7 form bank 1 and + * IO 8-15 are bank 2. These chips have two different interrupt outputs: + * One for bank 1 and another for bank 2. If irq-mirror is set, both + * interrupts are generated regardless of the bank that an input change + * occurred on. If it is not set, the interrupt are only generated for + * the bank they belong to. + * On devices with only one interrupt output this property is useless. + */ + bool mirror; }; -- cgit v1.2.3 From 0d37899363b0e5486f8800231b7edd75e8b60942 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 3 Sep 2014 20:01:55 +0200 Subject: pinctrl: generic: Fix PIN_CONFIG_DRIVE_OPEN_SOURCE source/drain doc mismatch PIN_CONFIG_DRIVE_OPEN_SOURCE enables open source, not open drain. Signed-off-by: Geert Uytterhoeven Signed-off-by: Linus Walleij --- include/linux/pinctrl/pinconf-generic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h index a15f10727eb8..d578a60eff23 100644 --- a/include/linux/pinctrl/pinconf-generic.h +++ b/include/linux/pinctrl/pinconf-generic.h @@ -57,7 +57,7 @@ * which are then pulled up with an external resistor. Setting this * config will enable open drain mode, the argument is ignored. * @PIN_CONFIG_DRIVE_OPEN_SOURCE: the pin will be driven with open source - * (open emitter). Setting this config will enable open drain mode, the + * (open emitter). Setting this config will enable open source mode, the * argument is ignored. * @PIN_CONFIG_DRIVE_STRENGTH: the pin will sink or source at most the current * passed as argument. The argument is in mA. -- cgit v1.2.3 From 87fed556d08d21dd7dd3e0222c94c187e4c2d5e2 Mon Sep 17 00:00:00 2001 From: Rafał Miłecki Date: Wed, 3 Sep 2014 10:35:13 +0200 Subject: bcma: get info about flash type SoC booted from MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is an ongoing work on cleaning MIPS's nvram support so it could be re-used on other platforms (bcm53xx to say precisely). This will require a bit of extra logic in bcma this patch implements. Signed-off-by: Rafał Miłecki Signed-off-by: John W. Linville --- drivers/bcma/driver_mips.c | 62 ++++++++++++++++++++++++++++++++++++++++++ include/linux/bcma/bcma_regs.h | 5 ++++ 2 files changed, 67 insertions(+) (limited to 'include/linux') diff --git a/drivers/bcma/driver_mips.c b/drivers/bcma/driver_mips.c index 11115bbe115c..004d6aa671ce 100644 --- a/drivers/bcma/driver_mips.c +++ b/drivers/bcma/driver_mips.c @@ -21,6 +21,14 @@ #include #include +enum bcma_boot_dev { + BCMA_BOOT_DEV_UNK = 0, + BCMA_BOOT_DEV_ROM, + BCMA_BOOT_DEV_PARALLEL, + BCMA_BOOT_DEV_SERIAL, + BCMA_BOOT_DEV_NAND, +}; + static const char * const part_probes[] = { "bcm47xxpart", NULL }; static struct physmap_flash_data bcma_pflash_data = { @@ -229,11 +237,51 @@ u32 bcma_cpu_clock(struct bcma_drv_mips *mcore) } EXPORT_SYMBOL(bcma_cpu_clock); +static enum bcma_boot_dev bcma_boot_dev(struct bcma_bus *bus) +{ + struct bcma_drv_cc *cc = &bus->drv_cc; + u8 cc_rev = cc->core->id.rev; + + if (cc_rev == 42) { + struct bcma_device *core; + + core = bcma_find_core(bus, BCMA_CORE_NS_ROM); + if (core) { + switch (bcma_aread32(core, BCMA_IOST) & + BCMA_NS_ROM_IOST_BOOT_DEV_MASK) { + case BCMA_NS_ROM_IOST_BOOT_DEV_NOR: + return BCMA_BOOT_DEV_SERIAL; + case BCMA_NS_ROM_IOST_BOOT_DEV_NAND: + return BCMA_BOOT_DEV_NAND; + case BCMA_NS_ROM_IOST_BOOT_DEV_ROM: + default: + return BCMA_BOOT_DEV_ROM; + } + } + } else { + if (cc_rev == 38) { + if (cc->status & BCMA_CC_CHIPST_5357_NAND_BOOT) + return BCMA_BOOT_DEV_NAND; + else if (cc->status & BIT(5)) + return BCMA_BOOT_DEV_ROM; + } + + if ((cc->capabilities & BCMA_CC_CAP_FLASHT) == + BCMA_CC_FLASHT_PARA) + return BCMA_BOOT_DEV_PARALLEL; + else + return BCMA_BOOT_DEV_SERIAL; + } + + return BCMA_BOOT_DEV_SERIAL; +} + static void bcma_core_mips_flash_detect(struct bcma_drv_mips *mcore) { struct bcma_bus *bus = mcore->core->bus; struct bcma_drv_cc *cc = &bus->drv_cc; struct bcma_pflash *pflash = &cc->pflash; + enum bcma_boot_dev boot_dev; switch (cc->capabilities & BCMA_CC_CAP_FLASHT) { case BCMA_CC_FLASHT_STSER: @@ -269,6 +317,20 @@ static void bcma_core_mips_flash_detect(struct bcma_drv_mips *mcore) bcma_nflash_init(cc); } } + + /* Determine flash type this SoC boots from */ + boot_dev = bcma_boot_dev(bus); + switch (boot_dev) { + case BCMA_BOOT_DEV_PARALLEL: + case BCMA_BOOT_DEV_SERIAL: + /* TODO: Init NVRAM using BCMA_SOC_FLASH2 window */ + break; + case BCMA_BOOT_DEV_NAND: + /* TODO: Init NVRAM using BCMA_SOC_FLASH1 window */ + break; + default: + break; + } } void bcma_core_mips_early_init(struct bcma_drv_mips *mcore) diff --git a/include/linux/bcma/bcma_regs.h b/include/linux/bcma/bcma_regs.h index 917dcd7965e7..e64ae7bf80a1 100644 --- a/include/linux/bcma/bcma_regs.h +++ b/include/linux/bcma/bcma_regs.h @@ -39,6 +39,11 @@ #define BCMA_RESET_CTL_RESET 0x0001 #define BCMA_RESET_ST 0x0804 +#define BCMA_NS_ROM_IOST_BOOT_DEV_MASK 0x0003 +#define BCMA_NS_ROM_IOST_BOOT_DEV_NOR 0x0000 +#define BCMA_NS_ROM_IOST_BOOT_DEV_NAND 0x0001 +#define BCMA_NS_ROM_IOST_BOOT_DEV_ROM 0x0002 + /* BCMA PCI config space registers. */ #define BCMA_PCI_PMCSR 0x44 #define BCMA_PCI_PE 0x100 -- cgit v1.2.3 From 40bea039593dfc7f3f9814dab844f6db43ae580b Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 13 Aug 2014 18:50:16 +0200 Subject: nohz: Restore NMI safe local irq work for local nohz kick The local nohz kick is currently used by perf which needs it to be NMI-safe. Recent commit though (7d1311b93e58ed55f3a31cc8f94c4b8fe988a2b9) changed its implementation to fire the local kick using the remote kick API. It was convenient to make the code more generic but the remote kick isn't NMI-safe. As a result: WARNING: CPU: 3 PID: 18062 at kernel/irq_work.c:72 irq_work_queue_on+0x11e/0x140() CPU: 3 PID: 18062 Comm: trinity-subchil Not tainted 3.16.0+ #34 0000000000000009 00000000903774d1 ffff880244e06c00 ffffffff9a7f1e37 0000000000000000 ffff880244e06c38 ffffffff9a0791dd ffff880244fce180 0000000000000003 ffff880244e06d58 ffff880244e06ef8 0000000000000000 Call Trace: [] dump_stack+0x4e/0x7a [] warn_slowpath_common+0x7d/0xa0 [] warn_slowpath_null+0x1a/0x20 [] irq_work_queue_on+0x11e/0x140 [] tick_nohz_full_kick_cpu+0x57/0x90 [] __perf_event_overflow+0x275/0x350 [] ? perf_event_task_disable+0xa0/0xa0 [] ? x86_perf_event_set_period+0xbf/0x150 [] perf_event_overflow+0x14/0x20 [] intel_pmu_handle_irq+0x206/0x410 [] ? arch_vtime_task_switch+0x63/0x130 [] perf_event_nmi_handler+0x2b/0x50 [] nmi_handle+0xd2/0x390 [] ? nmi_handle+0x5/0x390 [] ? lock_release+0xab/0x330 [] default_do_nmi+0x72/0x1c0 [] ? cpuacct_account_field+0xcf/0x200 [] do_nmi+0xb8/0x100 Lets fix this by restoring the use of local irq work for the nohz local kick. Reported-by: Catalin Iacob Reported-and-tested-by: Dave Jones Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Frederic Weisbecker --- include/linux/tick.h | 7 +------ kernel/time/tick-sched.c | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tick.h b/include/linux/tick.h index 059052306831..9a82c7dc3fdd 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -183,13 +183,8 @@ static inline bool tick_nohz_full_cpu(int cpu) extern void tick_nohz_init(void); extern void __tick_nohz_full_check(void); +extern void tick_nohz_full_kick(void); extern void tick_nohz_full_kick_cpu(int cpu); - -static inline void tick_nohz_full_kick(void) -{ - tick_nohz_full_kick_cpu(smp_processor_id()); -} - extern void tick_nohz_full_kick_all(void); extern void __tick_nohz_task_switch(struct task_struct *tsk); #else diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 99aa6ee3908f..f654a8a298fa 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -224,6 +224,20 @@ static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = { .func = nohz_full_kick_work_func, }; +/* + * Kick this CPU if it's full dynticks in order to force it to + * re-evaluate its dependency on the tick and restart it if necessary. + * This kick, unlike tick_nohz_full_kick_cpu() and tick_nohz_full_kick_all(), + * is NMI safe. + */ +void tick_nohz_full_kick(void) +{ + if (!tick_nohz_full_cpu(smp_processor_id())) + return; + + irq_work_queue(&__get_cpu_var(nohz_full_kick_work)); +} + /* * Kick the CPU if it's full dynticks in order to force it to * re-evaluate its dependency on the tick and restart it if necessary. -- cgit v1.2.3 From 8d38821cbcf51292cd5a23469d03bd38932a3ba9 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 1 Aug 2014 14:15:10 +0200 Subject: resources: Add device-managed request/release_resource() Provide device-managed implementations of the request_resource() and release_resource() functions. Upon failure to request a resource, the new devm_request_resource() function will output an error message for consistent error reporting. Signed-off-by: Thierry Reding Signed-off-by: Bjorn Helgaas Acked-by: Tejun Heo --- Documentation/driver-model/devres.txt | 2 + include/linux/ioport.h | 5 +++ kernel/resource.c | 70 +++++++++++++++++++++++++++++++++++ 3 files changed, 77 insertions(+) (limited to 'include/linux') diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt index d14710b04439..befc3fe12ba6 100644 --- a/Documentation/driver-model/devres.txt +++ b/Documentation/driver-model/devres.txt @@ -264,8 +264,10 @@ IIO IO region devm_release_mem_region() devm_release_region() + devm_release_resource() devm_request_mem_region() devm_request_region() + devm_request_resource() IOMAP devm_ioport_map() diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 142ec544167c..2c5250222278 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -215,6 +215,11 @@ static inline int __deprecated check_region(resource_size_t s, /* Wrappers for managed devices */ struct device; + +extern int devm_request_resource(struct device *dev, struct resource *root, + struct resource *new); +extern void devm_release_resource(struct device *dev, struct resource *new); + #define devm_request_region(dev,start,n,name) \ __devm_request_region(dev, &ioport_resource, (start), (n), (name)) #define devm_request_mem_region(dev,start,n,name) \ diff --git a/kernel/resource.c b/kernel/resource.c index da14b8d09296..ca24f19f9d18 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -1248,6 +1248,76 @@ int release_mem_region_adjustable(struct resource *parent, /* * Managed region resource */ +static void devm_resource_release(struct device *dev, void *ptr) +{ + struct resource **r = ptr; + + release_resource(*r); +} + +/** + * devm_request_resource() - request and reserve an I/O or memory resource + * @dev: device for which to request the resource + * @root: root of the resource tree from which to request the resource + * @new: descriptor of the resource to request + * + * This is a device-managed version of request_resource(). There is usually + * no need to release resources requested by this function explicitly since + * that will be taken care of when the device is unbound from its driver. + * If for some reason the resource needs to be released explicitly, because + * of ordering issues for example, drivers must call devm_release_resource() + * rather than the regular release_resource(). + * + * When a conflict is detected between any existing resources and the newly + * requested resource, an error message will be printed. + * + * Returns 0 on success or a negative error code on failure. + */ +int devm_request_resource(struct device *dev, struct resource *root, + struct resource *new) +{ + struct resource *conflict, **ptr; + + ptr = devres_alloc(devm_resource_release, sizeof(*ptr), GFP_KERNEL); + if (!ptr) + return -ENOMEM; + + *ptr = new; + + conflict = request_resource_conflict(root, new); + if (conflict) { + dev_err(dev, "resource collision: %pR conflicts with %s %pR\n", + new, conflict->name, conflict); + devres_free(ptr); + return -EBUSY; + } + + devres_add(dev, ptr); + return 0; +} +EXPORT_SYMBOL(devm_request_resource); + +static int devm_resource_match(struct device *dev, void *res, void *data) +{ + struct resource **ptr = res; + + return *ptr == data; +} + +/** + * devm_release_resource() - release a previously requested resource + * @dev: device for which to release the resource + * @new: descriptor of the resource to release + * + * Releases a resource previously requested using devm_request_resource(). + */ +void devm_release_resource(struct device *dev, struct resource *new) +{ + WARN_ON(devres_release(dev, devm_resource_release, devm_resource_match, + new)); +} +EXPORT_SYMBOL(devm_release_resource); + struct region_devres { struct resource *parent; resource_size_t start; -- cgit v1.2.3 From efd01a72e7ec99ed583151fbf16b176cd2158967 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Tue, 5 Aug 2014 14:08:55 +0200 Subject: PCI/AER: Make standalone includable The header file references u16 and u32 types, but they are not defined in the header nor does the header pull in the necessary includes for them. This causes build breakage when the file is included without any of the dependencies being satisfied from somewhere else. Fix this by including linux/types.h (for u16 and u32). [bhelgaas: removed pci_dev declaration (already added by 5ccb8225abf2)] Signed-off-by: Thierry Reding Signed-off-by: Bjorn Helgaas --- include/linux/aer.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/aer.h b/include/linux/aer.h index c826d1c28f9c..4fef65e57023 100644 --- a/include/linux/aer.h +++ b/include/linux/aer.h @@ -7,6 +7,8 @@ #ifndef _AER_H_ #define _AER_H_ +#include + #define AER_NONFATAL 0 #define AER_FATAL 1 #define AER_CORRECTABLE 2 -- cgit v1.2.3 From 3b5e6454aaf6b4439b19400d8365e2ec2d24e411 Mon Sep 17 00:00:00 2001 From: Gioh Kim Date: Thu, 4 Sep 2014 22:04:42 -0400 Subject: fs/buffer.c: support buffer cache allocations with gfp modifiers A buffer cache is allocated from movable area because it is referred for a while and released soon. But some filesystems are taking buffer cache for a long time and it can disturb page migration. New APIs are introduced to allocate buffer cache with user specific flag. *_gfp APIs are for user want to set page allocation flag for page cache allocation. And *_unmovable APIs are for the user wants to allocate page cache from non-movable area. Signed-off-by: Gioh Kim Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara --- fs/buffer.c | 45 +++++++++++++++++++++++++------------------ include/linux/buffer_head.h | 47 ++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 68 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/fs/buffer.c b/fs/buffer.c index 8f05111bbb8b..9a6029e0dd71 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -993,7 +993,7 @@ init_page_buffers(struct page *page, struct block_device *bdev, */ static int grow_dev_page(struct block_device *bdev, sector_t block, - pgoff_t index, int size, int sizebits) + pgoff_t index, int size, int sizebits, gfp_t gfp) { struct inode *inode = bdev->bd_inode; struct page *page; @@ -1002,8 +1002,8 @@ grow_dev_page(struct block_device *bdev, sector_t block, int ret = 0; /* Will call free_more_memory() */ gfp_t gfp_mask; - gfp_mask = mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS; - gfp_mask |= __GFP_MOVABLE; + gfp_mask = (mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS) | gfp; + /* * XXX: __getblk_slow() can not really deal with failure and * will endlessly loop on improvised global reclaim. Prefer @@ -1058,7 +1058,7 @@ failed: * that page was dirty, the buffers are set dirty also. */ static int -grow_buffers(struct block_device *bdev, sector_t block, int size) +grow_buffers(struct block_device *bdev, sector_t block, int size, gfp_t gfp) { pgoff_t index; int sizebits; @@ -1085,11 +1085,12 @@ grow_buffers(struct block_device *bdev, sector_t block, int size) } /* Create a page with the proper size buffers.. */ - return grow_dev_page(bdev, block, index, size, sizebits); + return grow_dev_page(bdev, block, index, size, sizebits, gfp); } -static struct buffer_head * -__getblk_slow(struct block_device *bdev, sector_t block, int size) +struct buffer_head * +__getblk_slow(struct block_device *bdev, sector_t block, + unsigned size, gfp_t gfp) { /* Size must be multiple of hard sectorsize */ if (unlikely(size & (bdev_logical_block_size(bdev)-1) || @@ -1111,13 +1112,14 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size) if (bh) return bh; - ret = grow_buffers(bdev, block, size); + ret = grow_buffers(bdev, block, size, gfp); if (ret < 0) return NULL; if (ret == 0) free_more_memory(); } } +EXPORT_SYMBOL(__getblk_slow); /* * The relationship between dirty buffers and dirty pages: @@ -1371,24 +1373,25 @@ __find_get_block(struct block_device *bdev, sector_t block, unsigned size) EXPORT_SYMBOL(__find_get_block); /* - * __getblk will locate (and, if necessary, create) the buffer_head + * __getblk_gfp() will locate (and, if necessary, create) the buffer_head * which corresponds to the passed block_device, block and size. The * returned buffer has its reference count incremented. * - * __getblk() will lock up the machine if grow_dev_page's try_to_free_buffers() - * attempt is failing. FIXME, perhaps? + * __getblk_gfp() will lock up the machine if grow_dev_page's + * try_to_free_buffers() attempt is failing. FIXME, perhaps? */ struct buffer_head * -__getblk(struct block_device *bdev, sector_t block, unsigned size) +__getblk_gfp(struct block_device *bdev, sector_t block, + unsigned size, gfp_t gfp) { struct buffer_head *bh = __find_get_block(bdev, block, size); might_sleep(); if (bh == NULL) - bh = __getblk_slow(bdev, block, size); + bh = __getblk_slow(bdev, block, size, gfp); return bh; } -EXPORT_SYMBOL(__getblk); +EXPORT_SYMBOL(__getblk_gfp); /* * Do async read-ahead on a buffer.. @@ -1404,24 +1407,28 @@ void __breadahead(struct block_device *bdev, sector_t block, unsigned size) EXPORT_SYMBOL(__breadahead); /** - * __bread() - reads a specified block and returns the bh + * __bread_gfp() - reads a specified block and returns the bh * @bdev: the block_device to read from * @block: number of block * @size: size (in bytes) to read - * + * @gfp: page allocation flag + * * Reads a specified block, and returns buffer head that contains it. + * The page cache can be allocated from non-movable area + * not to prevent page migration if you set gfp to zero. * It returns NULL if the block was unreadable. */ struct buffer_head * -__bread(struct block_device *bdev, sector_t block, unsigned size) +__bread_gfp(struct block_device *bdev, sector_t block, + unsigned size, gfp_t gfp) { - struct buffer_head *bh = __getblk(bdev, block, size); + struct buffer_head *bh = __getblk_gfp(bdev, block, size, gfp); if (likely(bh) && !buffer_uptodate(bh)) bh = __bread_slow(bh); return bh; } -EXPORT_SYMBOL(__bread); +EXPORT_SYMBOL(__bread_gfp); /* * invalidate_bh_lrus() is called rarely - but not only at unmount. diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 324329ceea1e..73b45225a7ca 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -175,12 +175,13 @@ void __wait_on_buffer(struct buffer_head *); wait_queue_head_t *bh_waitq_head(struct buffer_head *bh); struct buffer_head *__find_get_block(struct block_device *bdev, sector_t block, unsigned size); -struct buffer_head *__getblk(struct block_device *bdev, sector_t block, - unsigned size); +struct buffer_head *__getblk_gfp(struct block_device *bdev, sector_t block, + unsigned size, gfp_t gfp); void __brelse(struct buffer_head *); void __bforget(struct buffer_head *); void __breadahead(struct block_device *, sector_t block, unsigned int size); -struct buffer_head *__bread(struct block_device *, sector_t block, unsigned size); +struct buffer_head *__bread_gfp(struct block_device *, + sector_t block, unsigned size, gfp_t gfp); void invalidate_bh_lrus(void); struct buffer_head *alloc_buffer_head(gfp_t gfp_flags); void free_buffer_head(struct buffer_head * bh); @@ -295,7 +296,13 @@ static inline void bforget(struct buffer_head *bh) static inline struct buffer_head * sb_bread(struct super_block *sb, sector_t block) { - return __bread(sb->s_bdev, block, sb->s_blocksize); + return __bread_gfp(sb->s_bdev, block, sb->s_blocksize, __GFP_MOVABLE); +} + +static inline struct buffer_head * +sb_bread_unmovable(struct super_block *sb, sector_t block) +{ + return __bread_gfp(sb->s_bdev, block, sb->s_blocksize, 0); } static inline void @@ -307,7 +314,7 @@ sb_breadahead(struct super_block *sb, sector_t block) static inline struct buffer_head * sb_getblk(struct super_block *sb, sector_t block) { - return __getblk(sb->s_bdev, block, sb->s_blocksize); + return __getblk_gfp(sb->s_bdev, block, sb->s_blocksize, __GFP_MOVABLE); } static inline struct buffer_head * @@ -344,6 +351,36 @@ static inline void lock_buffer(struct buffer_head *bh) __lock_buffer(bh); } +static inline struct buffer_head *getblk_unmovable(struct block_device *bdev, + sector_t block, + unsigned size) +{ + return __getblk_gfp(bdev, block, size, 0); +} + +static inline struct buffer_head *__getblk(struct block_device *bdev, + sector_t block, + unsigned size) +{ + return __getblk_gfp(bdev, block, size, __GFP_MOVABLE); +} + +/** + * __bread() - reads a specified block and returns the bh + * @bdev: the block_device to read from + * @block: number of block + * @size: size (in bytes) to read + * + * Reads a specified block, and returns buffer head that contains it. + * The page cache is allocated from movable area so that it can be migrated. + * It returns NULL if the block was unreadable. + */ +static inline struct buffer_head * +__bread(struct block_device *bdev, sector_t block, unsigned size) +{ + return __bread_gfp(bdev, block, size, __GFP_MOVABLE); +} + extern int __set_page_dirty_buffers(struct page *page); #else /* CONFIG_BLOCK */ -- cgit v1.2.3 From a9fe8e29945d56f35235a3a0fba99b4cf181d211 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Tue, 2 Sep 2014 15:49:26 +0200 Subject: ipv4: implement igmp_qrv sysctl to tune igmp robustness variable As in IPv6 people might increase the igmp query robustness variable to make sure unsolicited state change reports aren't lost on the network. Add and document this new knob to igmp code. RFCs allow tuning this parameter back to first IGMP RFC, so we also use this setting for all counters, including source specific multicast. Also take over sysctl value when upping the interface and don't reuse the last one seen on the interface. Cc: Flavio Leitner Signed-off-by: Hannes Frederic Sowa Acked-by: Flavio Leitner Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 5 +++++ include/linux/igmp.h | 1 + net/ipv4/igmp.c | 31 +++++++++++++++---------------- net/ipv4/sysctl_net_ipv4.c | 10 ++++++++++ 4 files changed, 31 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index cfc71ac0f764..db2383cb1df9 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -844,6 +844,11 @@ igmp_max_memberships - INTEGER conf/all/* is special, changes the settings for all interfaces +igmp_qrv - INTEGER + Controls the IGMP query robustness variable (see RFC2236 8.1). + Default: 2 (as specified by RFC2236 8.1) + Minimum: 1 (as specified by RFC6636 4.5) + log_martians - BOOLEAN Log packets with impossible addresses to kernel log. log_martians for the interface will be enabled if at least one of diff --git a/include/linux/igmp.h b/include/linux/igmp.h index f47550d75f85..2c677afeea47 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -39,6 +39,7 @@ static inline struct igmpv3_query * extern int sysctl_igmp_max_memberships; extern int sysctl_igmp_max_msf; +extern int sysctl_igmp_qrv; struct ip_sf_socklist { unsigned int sl_max; diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 890c4258804c..4146153d875d 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -117,7 +117,7 @@ #define IGMP_V2_Unsolicited_Report_Interval (10*HZ) #define IGMP_V3_Unsolicited_Report_Interval (1*HZ) #define IGMP_Query_Response_Interval (10*HZ) -#define IGMP_Unsolicited_Report_Count 2 +#define IGMP_Query_Robustness_Variable 2 #define IGMP_Initial_Report_Delay (1) @@ -756,8 +756,7 @@ static void igmp_ifc_event(struct in_device *in_dev) { if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) return; - in_dev->mr_ifc_count = in_dev->mr_qrv ? in_dev->mr_qrv : - IGMP_Unsolicited_Report_Count; + in_dev->mr_ifc_count = in_dev->mr_qrv ?: sysctl_igmp_qrv; igmp_ifc_start_timer(in_dev, 1); } @@ -1086,8 +1085,7 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im) pmc->interface = im->interface; in_dev_hold(in_dev); pmc->multiaddr = im->multiaddr; - pmc->crcount = in_dev->mr_qrv ? in_dev->mr_qrv : - IGMP_Unsolicited_Report_Count; + pmc->crcount = in_dev->mr_qrv ?: sysctl_igmp_qrv; pmc->sfmode = im->sfmode; if (pmc->sfmode == MCAST_INCLUDE) { struct ip_sf_list *psf; @@ -1226,8 +1224,7 @@ static void igmp_group_added(struct ip_mc_list *im) } /* else, v3 */ - im->crcount = in_dev->mr_qrv ? in_dev->mr_qrv : - IGMP_Unsolicited_Report_Count; + im->crcount = in_dev->mr_qrv ?: sysctl_igmp_qrv; igmp_ifc_event(in_dev); #endif } @@ -1322,7 +1319,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) spin_lock_init(&im->lock); #ifdef CONFIG_IP_MULTICAST setup_timer(&im->timer, igmp_timer_expire, (unsigned long)im); - im->unsolicit_count = IGMP_Unsolicited_Report_Count; + im->unsolicit_count = sysctl_igmp_qrv; #endif im->next_rcu = in_dev->mc_list; @@ -1460,7 +1457,7 @@ void ip_mc_init_dev(struct in_device *in_dev) (unsigned long)in_dev); setup_timer(&in_dev->mr_ifc_timer, igmp_ifc_timer_expire, (unsigned long)in_dev); - in_dev->mr_qrv = IGMP_Unsolicited_Report_Count; + in_dev->mr_qrv = sysctl_igmp_qrv; #endif spin_lock_init(&in_dev->mc_tomb_lock); @@ -1474,6 +1471,9 @@ void ip_mc_up(struct in_device *in_dev) ASSERT_RTNL(); +#ifdef CONFIG_IP_MULTICAST + in_dev->mr_qrv = sysctl_igmp_qrv; +#endif ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS); for_each_pmc_rtnl(in_dev, pmc) @@ -1540,7 +1540,9 @@ static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr) */ int sysctl_igmp_max_memberships __read_mostly = IP_MAX_MEMBERSHIPS; int sysctl_igmp_max_msf __read_mostly = IP_MAX_MSF; - +#ifdef CONFIG_IP_MULTICAST +int sysctl_igmp_qrv __read_mostly = IGMP_Query_Robustness_Variable; +#endif static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode, __be32 *psfsrc) @@ -1575,8 +1577,7 @@ static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode, #ifdef CONFIG_IP_MULTICAST if (psf->sf_oldin && !IGMP_V1_SEEN(in_dev) && !IGMP_V2_SEEN(in_dev)) { - psf->sf_crcount = in_dev->mr_qrv ? in_dev->mr_qrv : - IGMP_Unsolicited_Report_Count; + psf->sf_crcount = in_dev->mr_qrv ?: sysctl_igmp_qrv; psf->sf_next = pmc->tomb; pmc->tomb = psf; rv = 1; @@ -1639,8 +1640,7 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode, /* filter mode change */ pmc->sfmode = MCAST_INCLUDE; #ifdef CONFIG_IP_MULTICAST - pmc->crcount = in_dev->mr_qrv ? in_dev->mr_qrv : - IGMP_Unsolicited_Report_Count; + pmc->crcount = in_dev->mr_qrv ?: sysctl_igmp_qrv; in_dev->mr_ifc_count = pmc->crcount; for (psf = pmc->sources; psf; psf = psf->sf_next) psf->sf_crcount = 0; @@ -1818,8 +1818,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, #ifdef CONFIG_IP_MULTICAST /* else no filters; keep old mode for reports */ - pmc->crcount = in_dev->mr_qrv ? in_dev->mr_qrv : - IGMP_Unsolicited_Report_Count; + pmc->crcount = in_dev->mr_qrv ?: sysctl_igmp_qrv; in_dev->mr_ifc_count = pmc->crcount; for (psf = pmc->sources; psf; psf = psf->sf_next) psf->sf_crcount = 0; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 79a007c52558..45d156dacd61 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -450,6 +450,16 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, +#ifdef CONFIG_IP_MULTICAST + { + .procname = "igmp_qrv", + .data = &sysctl_igmp_qrv, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &one + }, +#endif { .procname = "inet_peer_threshold", .data = &inet_peer_threshold, -- cgit v1.2.3 From 6b44f519017b219a12b37173c7eef8dfce2c0100 Mon Sep 17 00:00:00 2001 From: Scot Doyle Date: Sun, 24 Aug 2014 17:12:27 +0000 Subject: sched/wait: Document timeout corner case The timeout may elapse without 0 being returned, such as when waiting on an unused queue. Document this possibility. Signed-off-by: Scot Doyle Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Link: http://lkml.kernel.org/r/alpine.LNX.2.11.1408241710070.6462@localhost.localdomain Signed-off-by: Ingo Molnar --- include/linux/wait.h | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/wait.h b/include/linux/wait.h index 6fb1ba5f9b2f..034f6fc7c65f 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -280,9 +280,11 @@ do { \ * wake_up() has to be called after changing any variable that could * change the result of the wait condition. * - * The function returns 0 if the @timeout elapsed, or the remaining - * jiffies (at least 1) if the @condition evaluated to %true before - * the @timeout elapsed. + * Returns: + * 0 if the @condition evaluated to %false after the @timeout elapsed, + * 1 if the @condition evaluated to %true after the @timeout elapsed, + * or the remaining jiffies (at least 1) if the @condition evaluated + * to %true before the @timeout elapsed. */ #define wait_event_timeout(wq, condition, timeout) \ ({ \ @@ -363,9 +365,11 @@ do { \ * change the result of the wait condition. * * Returns: - * 0 if the @timeout elapsed, -%ERESTARTSYS if it was interrupted by - * a signal, or the remaining jiffies (at least 1) if the @condition - * evaluated to %true before the @timeout elapsed. + * 0 if the @condition evaluated to %false after the @timeout elapsed, + * 1 if the @condition evaluated to %true after the @timeout elapsed, + * the remaining jiffies (at least 1) if the @condition evaluated + * to %true before the @timeout elapsed, or -%ERESTARTSYS if it was + * interrupted by a signal. */ #define wait_event_interruptible_timeout(wq, condition, timeout) \ ({ \ -- cgit v1.2.3 From 2740f0cf8ec8bc7ee6a58f68841759e367dda98f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 3 Sep 2014 15:24:58 +0300 Subject: cfg80211: add Intel Mobile Communications copyright Our legal structure changed at some point (see wikipedia), but we forgot to immediately switch over to the new copyright notice. For files that we have modified in the time since the change, add the proper copyright notice now. Signed-off-by: Johannes Berg Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 1 + include/net/cfg80211.h | 1 + net/wireless/chan.c | 1 + net/wireless/core.c | 1 + net/wireless/nl80211.c | 1 + net/wireless/reg.c | 1 + net/wireless/scan.c | 1 + net/wireless/util.c | 1 + 8 files changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 8018c915ee63..77d4f26e0235 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -6,6 +6,7 @@ * Copyright (c) 2002-2003, Jouni Malinen * Copyright (c) 2005, Devicescape Software, Inc. * Copyright (c) 2006, Michael Wu + * Copyright (c) 2013 - 2014 Intel Mobile Communications GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index ab21299c8f4d..6be68bb31918 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4,6 +4,7 @@ * 802.11 device and configuration interface * * Copyright 2006-2010 Johannes Berg + * Copyright 2013-2014 Intel Mobile Communications GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/net/wireless/chan.c b/net/wireless/chan.c index 992b34070bcb..72d81e2154d5 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -4,6 +4,7 @@ * any point in time. * * Copyright 2009 Johannes Berg + * Copyright 2013-2014 Intel Mobile Communications GmbH */ #include diff --git a/net/wireless/core.c b/net/wireless/core.c index a207eb116829..9698fe709251 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -2,6 +2,7 @@ * This is the linux wireless configuration interface. * * Copyright 2006-2010 Johannes Berg + * Copyright 2013-2014 Intel Mobile Communications GmbH */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 3011401f52c0..d876146498dd 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2,6 +2,7 @@ * This is the new netlink-based wireless configuration interface. * * Copyright 2006-2010 Johannes Berg + * Copyright 2013-2014 Intel Mobile Communications GmbH */ #include diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 1afdf45db38f..8bfc9b172a49 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -3,6 +3,7 @@ * Copyright 2005-2006, Devicescape Software, Inc. * Copyright 2007 Johannes Berg * Copyright 2008-2011 Luis R. Rodriguez + * Copyright 2013-2014 Intel Mobile Communications GmbH * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 620a4b40d466..bda39f149810 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -2,6 +2,7 @@ * cfg80211 scan result handling * * Copyright 2008 Johannes Berg + * Copyright 2013-2014 Intel Mobile Communications GmbH */ #include #include diff --git a/net/wireless/util.c b/net/wireless/util.c index 728f1c0dc70d..a8b2816ffcb9 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -2,6 +2,7 @@ * Wireless utility functions * * Copyright 2007-2009 Johannes Berg + * Copyright 2013-2014 Intel Mobile Communications GmbH */ #include #include -- cgit v1.2.3 From 60a3b2253c413cf601783b070507d7dd6620c954 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 2 Sep 2014 22:53:44 +0200 Subject: net: bpf: make eBPF interpreter images read-only With eBPF getting more extended and exposure to user space is on it's way, hardening the memory range the interpreter uses to steer its command flow seems appropriate. This patch moves the to be interpreted bytecode to read-only pages. In case we execute a corrupted BPF interpreter image for some reason e.g. caused by an attacker which got past a verifier stage, it would not only provide arbitrary read/write memory access but arbitrary function calls as well. After setting up the BPF interpreter image, its contents do not change until destruction time, thus we can setup the image on immutable made pages in order to mitigate modifications to that code. The idea is derived from commit 314beb9bcabf ("x86: bpf_jit_comp: secure bpf jit against spraying attacks"). This is possible because bpf_prog is not part of sk_filter anymore. After setup bpf_prog cannot be altered during its life-time. This prevents any modifications to the entire bpf_prog structure (incl. function/JIT image pointer). Every eBPF program (including classic BPF that are migrated) have to call bpf_prog_select_runtime() to select either interpreter or a JIT image as a last setup step, and they all are being freed via bpf_prog_free(), including non-JIT. Therefore, we can easily integrate this into the eBPF life-time, plus since we directly allocate a bpf_prog, we have no performance penalty. Tested with seccomp and test_bpf testsuite in JIT/non-JIT mode and manual inspection of kernel_page_tables. Brad Spengler proposed the same idea via Twitter during development of this patch. Joint work with Hannes Frederic Sowa. Suggested-by: Brad Spengler Signed-off-by: Daniel Borkmann Signed-off-by: Hannes Frederic Sowa Cc: Alexei Starovoitov Cc: Kees Cook Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- arch/arm/net/bpf_jit_32.c | 3 +- arch/mips/net/bpf_jit.c | 3 +- arch/powerpc/net/bpf_jit_comp.c | 3 +- arch/s390/net/bpf_jit_comp.c | 2 +- arch/sparc/net/bpf_jit_comp.c | 3 +- arch/x86/net/bpf_jit_comp.c | 18 ++++------ include/linux/filter.h | 49 ++++++++++++++++++++++--- kernel/bpf/core.c | 80 +++++++++++++++++++++++++++++++++++++++-- kernel/seccomp.c | 7 ++-- lib/test_bpf.c | 2 +- net/core/filter.c | 6 ++-- 11 files changed, 144 insertions(+), 32 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index a37b989a2f91..a76623bcf722 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -930,5 +930,6 @@ void bpf_jit_free(struct bpf_prog *fp) { if (fp->jited) module_free(NULL, fp->bpf_func); - kfree(fp); + + bpf_prog_unlock_free(fp); } diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c index 05a56619ece2..cfa83cf2447d 100644 --- a/arch/mips/net/bpf_jit.c +++ b/arch/mips/net/bpf_jit.c @@ -1427,5 +1427,6 @@ void bpf_jit_free(struct bpf_prog *fp) { if (fp->jited) module_free(NULL, fp->bpf_func); - kfree(fp); + + bpf_prog_unlock_free(fp); } diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 3afa6f4c1957..40c53ff59124 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -697,5 +697,6 @@ void bpf_jit_free(struct bpf_prog *fp) { if (fp->jited) module_free(NULL, fp->bpf_func); - kfree(fp); + + bpf_prog_unlock_free(fp); } diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 61e45b7c04d7..f2833c5b218a 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -887,5 +887,5 @@ void bpf_jit_free(struct bpf_prog *fp) module_free(NULL, header); free_filter: - kfree(fp); + bpf_prog_unlock_free(fp); } diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c index 1f76c22a6a75..f7a736b645e8 100644 --- a/arch/sparc/net/bpf_jit_comp.c +++ b/arch/sparc/net/bpf_jit_comp.c @@ -812,5 +812,6 @@ void bpf_jit_free(struct bpf_prog *fp) { if (fp->jited) module_free(NULL, fp->bpf_func); - kfree(fp); + + bpf_prog_unlock_free(fp); } diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index b08a98c59530..39ccfbb4a723 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -972,23 +972,17 @@ out: kfree(addrs); } -static void bpf_jit_free_deferred(struct work_struct *work) +void bpf_jit_free(struct bpf_prog *fp) { - struct bpf_prog *fp = container_of(work, struct bpf_prog, work); unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; struct bpf_binary_header *header = (void *)addr; + if (!fp->jited) + goto free_filter; + set_memory_rw(addr, header->pages); module_free(NULL, header); - kfree(fp); -} -void bpf_jit_free(struct bpf_prog *fp) -{ - if (fp->jited) { - INIT_WORK(&fp->work, bpf_jit_free_deferred); - schedule_work(&fp->work); - } else { - kfree(fp); - } +free_filter: + bpf_prog_unlock_free(fp); } diff --git a/include/linux/filter.h b/include/linux/filter.h index a5227ab8ccb1..c78994593355 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -9,6 +9,11 @@ #include #include #include +#include + +struct sk_buff; +struct sock; +struct seccomp_data; /* Internally used and optimized filter representation with extended * instruction set based on top of classic BPF. @@ -320,20 +325,23 @@ struct sock_fprog_kern { struct sock_filter *filter; }; -struct sk_buff; -struct sock; -struct seccomp_data; +struct bpf_work_struct { + struct bpf_prog *prog; + struct work_struct work; +}; struct bpf_prog { + u32 pages; /* Number of allocated pages */ u32 jited:1, /* Is our filter JIT'ed? */ len:31; /* Number of filter blocks */ struct sock_fprog_kern *orig_prog; /* Original BPF program */ + struct bpf_work_struct *work; /* Deferred free work struct */ unsigned int (*bpf_func)(const struct sk_buff *skb, const struct bpf_insn *filter); + /* Instructions for interpreter */ union { struct sock_filter insns[0]; struct bpf_insn insnsi[0]; - struct work_struct work; }; }; @@ -353,6 +361,26 @@ static inline unsigned int bpf_prog_size(unsigned int proglen) #define bpf_classic_proglen(fprog) (fprog->len * sizeof(fprog->filter[0])) +#ifdef CONFIG_DEBUG_SET_MODULE_RONX +static inline void bpf_prog_lock_ro(struct bpf_prog *fp) +{ + set_memory_ro((unsigned long)fp, fp->pages); +} + +static inline void bpf_prog_unlock_ro(struct bpf_prog *fp) +{ + set_memory_rw((unsigned long)fp, fp->pages); +} +#else +static inline void bpf_prog_lock_ro(struct bpf_prog *fp) +{ +} + +static inline void bpf_prog_unlock_ro(struct bpf_prog *fp) +{ +} +#endif /* CONFIG_DEBUG_SET_MODULE_RONX */ + int sk_filter(struct sock *sk, struct sk_buff *skb); void bpf_prog_select_runtime(struct bpf_prog *fp); @@ -361,6 +389,17 @@ void bpf_prog_free(struct bpf_prog *fp); int bpf_convert_filter(struct sock_filter *prog, int len, struct bpf_insn *new_prog, int *new_len); +struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags); +struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size, + gfp_t gfp_extra_flags); +void __bpf_prog_free(struct bpf_prog *fp); + +static inline void bpf_prog_unlock_free(struct bpf_prog *fp) +{ + bpf_prog_unlock_ro(fp); + __bpf_prog_free(fp); +} + int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog); void bpf_prog_destroy(struct bpf_prog *fp); @@ -450,7 +489,7 @@ static inline void bpf_jit_compile(struct bpf_prog *fp) static inline void bpf_jit_free(struct bpf_prog *fp) { - kfree(fp); + bpf_prog_unlock_free(fp); } #endif /* CONFIG_BPF_JIT */ diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 7f0dbcbb34af..b54bb2c2e494 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -22,6 +22,7 @@ */ #include #include +#include #include /* Registers */ @@ -63,6 +64,67 @@ void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, uns return NULL; } +struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags) +{ + gfp_t gfp_flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO | + gfp_extra_flags; + struct bpf_work_struct *ws; + struct bpf_prog *fp; + + size = round_up(size, PAGE_SIZE); + fp = __vmalloc(size, gfp_flags, PAGE_KERNEL); + if (fp == NULL) + return NULL; + + ws = kmalloc(sizeof(*ws), GFP_KERNEL | gfp_extra_flags); + if (ws == NULL) { + vfree(fp); + return NULL; + } + + fp->pages = size / PAGE_SIZE; + fp->work = ws; + + return fp; +} +EXPORT_SYMBOL_GPL(bpf_prog_alloc); + +struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size, + gfp_t gfp_extra_flags) +{ + gfp_t gfp_flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO | + gfp_extra_flags; + struct bpf_prog *fp; + + BUG_ON(fp_old == NULL); + + size = round_up(size, PAGE_SIZE); + if (size <= fp_old->pages * PAGE_SIZE) + return fp_old; + + fp = __vmalloc(size, gfp_flags, PAGE_KERNEL); + if (fp != NULL) { + memcpy(fp, fp_old, fp_old->pages * PAGE_SIZE); + fp->pages = size / PAGE_SIZE; + + /* We keep fp->work from fp_old around in the new + * reallocated structure. + */ + fp_old->work = NULL; + __bpf_prog_free(fp_old); + } + + return fp; +} +EXPORT_SYMBOL_GPL(bpf_prog_realloc); + +void __bpf_prog_free(struct bpf_prog *fp) +{ + kfree(fp->work); + vfree(fp); +} +EXPORT_SYMBOL_GPL(__bpf_prog_free); + /* Base function for offset calculation. Needs to go into .text section, * therefore keeping it non-static as well; will also be used by JITs * anyway later on, so do not let the compiler omit it. @@ -523,12 +585,26 @@ void bpf_prog_select_runtime(struct bpf_prog *fp) /* Probe if internal BPF can be JITed */ bpf_int_jit_compile(fp); + /* Lock whole bpf_prog as read-only */ + bpf_prog_lock_ro(fp); } EXPORT_SYMBOL_GPL(bpf_prog_select_runtime); -/* free internal BPF program */ +static void bpf_prog_free_deferred(struct work_struct *work) +{ + struct bpf_work_struct *ws; + + ws = container_of(work, struct bpf_work_struct, work); + bpf_jit_free(ws->prog); +} + +/* Free internal BPF program */ void bpf_prog_free(struct bpf_prog *fp) { - bpf_jit_free(fp); + struct bpf_work_struct *ws = fp->work; + + INIT_WORK(&ws->work, bpf_prog_free_deferred); + ws->prog = fp; + schedule_work(&ws->work); } EXPORT_SYMBOL_GPL(bpf_prog_free); diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 44eb005c6695..84922befea84 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -395,16 +395,15 @@ static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog) if (!filter) goto free_prog; - filter->prog = kzalloc(bpf_prog_size(new_len), - GFP_KERNEL|__GFP_NOWARN); + filter->prog = bpf_prog_alloc(bpf_prog_size(new_len), __GFP_NOWARN); if (!filter->prog) goto free_filter; ret = bpf_convert_filter(fp, fprog->len, filter->prog->insnsi, &new_len); if (ret) goto free_filter_prog; - kfree(fp); + kfree(fp); atomic_set(&filter->usage, 1); filter->prog->len = new_len; @@ -413,7 +412,7 @@ static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog) return filter; free_filter_prog: - kfree(filter->prog); + __bpf_prog_free(filter->prog); free_filter: kfree(filter); free_prog: diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 8c66c6aace04..9a67456ba29a 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -1836,7 +1836,7 @@ static struct bpf_prog *generate_filter(int which, int *err) break; case INTERNAL: - fp = kzalloc(bpf_prog_size(flen), GFP_KERNEL); + fp = bpf_prog_alloc(bpf_prog_size(flen), 0); if (fp == NULL) { pr_cont("UNEXPECTED_FAIL no memory left\n"); *err = -ENOMEM; diff --git a/net/core/filter.c b/net/core/filter.c index d814b8a89d0f..37f8eb06fdee 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -933,7 +933,7 @@ static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp) /* Expand fp for appending the new filter representation. */ old_fp = fp; - fp = krealloc(old_fp, bpf_prog_size(new_len), GFP_KERNEL); + fp = bpf_prog_realloc(old_fp, bpf_prog_size(new_len), 0); if (!fp) { /* The old_fp is still around in case we couldn't * allocate new memory, so uncharge on that one. @@ -1013,7 +1013,7 @@ int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog) if (fprog->filter == NULL) return -EINVAL; - fp = kmalloc(bpf_prog_size(fprog->len), GFP_KERNEL); + fp = bpf_prog_alloc(bpf_prog_size(fprog->len), 0); if (!fp) return -ENOMEM; @@ -1069,7 +1069,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) if (fprog->filter == NULL) return -EINVAL; - prog = kmalloc(bpf_fsize, GFP_KERNEL); + prog = bpf_prog_alloc(bpf_fsize, 0); if (!prog) return -ENOMEM; -- cgit v1.2.3 From f0db9b073415848709dd59a6394969882f517da9 Mon Sep 17 00:00:00 2001 From: Govindarajulu Varadarajan <_govind@gmx.com> Date: Wed, 3 Sep 2014 03:17:20 +0530 Subject: ethtool: Add generic options for tunables This patch adds new ethtool cmd, ETHTOOL_GTUNABLE & ETHTOOL_STUNABLE for getting tunable values from driver. Add get_tunable and set_tunable to ethtool_ops. Driver implements these functions for getting/setting tunable value. Signed-off-by: Govindarajulu Varadarajan <_govind@gmx.com> Signed-off-by: David S. Miller --- include/linux/ethtool.h | 4 +++ include/uapi/linux/ethtool.h | 28 +++++++++++++++ net/core/ethtool.c | 81 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 113 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index e658229fee39..c1a2d60dfb82 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -257,6 +257,10 @@ struct ethtool_ops { struct ethtool_eeprom *, u8 *); int (*get_eee)(struct net_device *, struct ethtool_eee *); int (*set_eee)(struct net_device *, struct ethtool_eee *); + int (*get_tunable)(struct net_device *, + const struct ethtool_tunable *, void *); + int (*set_tunable)(struct net_device *, + const struct ethtool_tunable *, const void *); }; diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index e3c7a719c76b..7a364f2f3d3f 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -209,6 +209,32 @@ struct ethtool_value { __u32 data; }; +enum tunable_id { + ETHTOOL_ID_UNSPEC, + ETHTOOL_RX_COPYBREAK, +}; + +enum tunable_type_id { + ETHTOOL_TUNABLE_UNSPEC, + ETHTOOL_TUNABLE_U8, + ETHTOOL_TUNABLE_U16, + ETHTOOL_TUNABLE_U32, + ETHTOOL_TUNABLE_U64, + ETHTOOL_TUNABLE_STRING, + ETHTOOL_TUNABLE_S8, + ETHTOOL_TUNABLE_S16, + ETHTOOL_TUNABLE_S32, + ETHTOOL_TUNABLE_S64, +}; + +struct ethtool_tunable { + __u32 cmd; + __u32 id; + __u32 type_id; + __u32 len; + void *data[0]; +}; + /** * struct ethtool_regs - hardware register dump * @cmd: Command number = %ETHTOOL_GREGS @@ -1152,6 +1178,8 @@ enum ethtool_sfeatures_retval_bits { #define ETHTOOL_GRSSH 0x00000046 /* Get RX flow hash configuration */ #define ETHTOOL_SRSSH 0x00000047 /* Set RX flow hash configuration */ +#define ETHTOOL_GTUNABLE 0x00000048 /* Get tunable configuration */ +#define ETHTOOL_STUNABLE 0x00000049 /* Set tunable configuration */ /* compatibility with older code */ #define SPARC_ETH_GSET ETHTOOL_GSET diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 17cb912793fa..27e61b886520 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1621,6 +1621,80 @@ static int ethtool_get_module_eeprom(struct net_device *dev, modinfo.eeprom_len); } +static int ethtool_tunable_valid(const struct ethtool_tunable *tuna) +{ + switch (tuna->id) { + case ETHTOOL_RX_COPYBREAK: + if (tuna->len != sizeof(u32) || + tuna->type_id != ETHTOOL_TUNABLE_U32) + return -EINVAL; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int ethtool_get_tunable(struct net_device *dev, void __user *useraddr) +{ + int ret; + struct ethtool_tunable tuna; + const struct ethtool_ops *ops = dev->ethtool_ops; + void *data; + + if (!ops->get_tunable) + return -EOPNOTSUPP; + if (copy_from_user(&tuna, useraddr, sizeof(tuna))) + return -EFAULT; + ret = ethtool_tunable_valid(&tuna); + if (ret) + return ret; + data = kmalloc(tuna.len, GFP_USER); + if (!data) + return -ENOMEM; + ret = ops->get_tunable(dev, &tuna, data); + if (ret) + goto out; + useraddr += sizeof(tuna); + ret = -EFAULT; + if (copy_to_user(useraddr, data, tuna.len)) + goto out; + ret = 0; + +out: + kfree(data); + return ret; +} + +static int ethtool_set_tunable(struct net_device *dev, void __user *useraddr) +{ + int ret; + struct ethtool_tunable tuna; + const struct ethtool_ops *ops = dev->ethtool_ops; + void *data; + + if (!ops->set_tunable) + return -EOPNOTSUPP; + if (copy_from_user(&tuna, useraddr, sizeof(tuna))) + return -EFAULT; + ret = ethtool_tunable_valid(&tuna); + if (ret) + return ret; + data = kmalloc(tuna.len, GFP_USER); + if (!data) + return -ENOMEM; + useraddr += sizeof(tuna); + ret = -EFAULT; + if (copy_from_user(data, useraddr, tuna.len)) + goto out; + ret = ops->set_tunable(dev, &tuna, data); + +out: + kfree(data); + return ret; +} + /* The main entry point in this file. Called from net/core/dev_ioctl.c */ int dev_ethtool(struct net *net, struct ifreq *ifr) @@ -1670,6 +1744,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GCHANNELS: case ETHTOOL_GET_TS_INFO: case ETHTOOL_GEEE: + case ETHTOOL_GTUNABLE: break; default: if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) @@ -1857,6 +1932,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GMODULEEEPROM: rc = ethtool_get_module_eeprom(dev, useraddr); break; + case ETHTOOL_GTUNABLE: + rc = ethtool_get_tunable(dev, useraddr); + break; + case ETHTOOL_STUNABLE: + rc = ethtool_set_tunable(dev, useraddr); + break; default: rc = -EOPNOTSUPP; } -- cgit v1.2.3 From e793c0f70e9bdf4a2e71c151a1a3cf85c4db92ad Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Thu, 4 Sep 2014 23:44:36 +0900 Subject: net: treewide: Fix typo found in DocBook/networking.xml This patch fix spelling typo found in DocBook/networking.xml. It is because the neworking.xml is generated from comments in the source, I have to fix typo in comments within the source. Signed-off-by: Masanari Iida Acked-by: Randy Dunlap Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++-- include/net/wimax.h | 2 +- include/trace/events/irq.h | 4 ++-- net/core/datagram.c | 2 +- net/core/gen_estimator.c | 2 +- net/core/gen_stats.c | 2 +- net/core/skbuff.c | 4 ++-- net/core/sock.c | 4 ++-- net/socket.c | 2 +- 9 files changed, 13 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 38377392d082..c8e388e5fccc 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3176,7 +3176,7 @@ static inline int __dev_uc_sync(struct net_device *dev, } /** - * __dev_uc_unsync - Remove synchonized addresses from device + * __dev_uc_unsync - Remove synchronized addresses from device * @dev: device to sync * @unsync: function to call if address should be removed * @@ -3220,7 +3220,7 @@ static inline int __dev_mc_sync(struct net_device *dev, } /** - * __dev_mc_unsync - Remove synchonized addresses from device + * __dev_mc_unsync - Remove synchronized addresses from device * @dev: device to sync * @unsync: function to call if address should be removed * diff --git a/include/net/wimax.h b/include/net/wimax.h index e52ef5357e08..c52b68577cb0 100644 --- a/include/net/wimax.h +++ b/include/net/wimax.h @@ -290,7 +290,7 @@ struct wimax_dev; * This operation has to be synchronous, and return only when the * reset is complete. In case of having had to resort to bus/cold * reset implying a device disconnection, the call is allowed to - * return inmediately. + * return immediately. * NOTE: wimax_dev->mutex is NOT locked when this op is being * called; however, wimax_dev->mutex_reset IS locked to ensure * serialization of calls to wimax_reset(). diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h index 1c09820df585..3608bebd3d9c 100644 --- a/include/trace/events/irq.h +++ b/include/trace/events/irq.h @@ -107,7 +107,7 @@ DECLARE_EVENT_CLASS(softirq, * @vec_nr: softirq vector number * * When used in combination with the softirq_exit tracepoint - * we can determine the softirq handler runtine. + * we can determine the softirq handler routine. */ DEFINE_EVENT(softirq, softirq_entry, @@ -121,7 +121,7 @@ DEFINE_EVENT(softirq, softirq_entry, * @vec_nr: softirq vector number * * When used in combination with the softirq_entry tracepoint - * we can determine the softirq handler runtine. + * we can determine the softirq handler routine. */ DEFINE_EVENT(softirq, softirq_exit, diff --git a/net/core/datagram.c b/net/core/datagram.c index 488dd1a825c0..fdbc9a81d4c2 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -775,7 +775,7 @@ __sum16 __skb_checksum_complete(struct sk_buff *skb) EXPORT_SYMBOL(__skb_checksum_complete); /** - * skb_copy_and_csum_datagram_iovec - Copy and checkum skb to user iovec. + * skb_copy_and_csum_datagram_iovec - Copy and checksum skb to user iovec. * @skb: skbuff * @hlen: hardware length * @iov: io vector diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index 6b5b6e7013ca..9d33dfffca19 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -197,7 +197,7 @@ struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats * as destination. A new timer with the interval specified in the * configuration TLV is created. Upon each interval, the latest statistics * will be read from &bstats and the estimated rate will be stored in - * &rate_est with the statistics lock grabed during this period. + * &rate_est with the statistics lock grabbed during this period. * * Returns 0 on success or a negative error code. * diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c index 9d3d9e78397b..2ddbce4cce14 100644 --- a/net/core/gen_stats.c +++ b/net/core/gen_stats.c @@ -206,7 +206,7 @@ EXPORT_SYMBOL(gnet_stats_copy_queue); * @st: application specific statistics data * @len: length of data * - * Appends the application sepecific statistics to the top level TLV created by + * Appends the application specific statistics to the top level TLV created by * gnet_stats_start_copy() and remembers the data for XSTATS if the dumping * handle is in backward compatibility mode. * diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 163b673f9e62..da1378a3e2c7 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2647,7 +2647,7 @@ EXPORT_SYMBOL(skb_prepare_seq_read); * skb_seq_read() will return the remaining part of the block. * * Note 1: The size of each block of data returned can be arbitrary, - * this limitation is the cost for zerocopy seqeuental + * this limitation is the cost for zerocopy sequential * reads of potentially non linear data. * * Note 2: Fragment lists within fragments are not implemented @@ -2781,7 +2781,7 @@ EXPORT_SYMBOL(skb_find_text); /** * skb_append_datato_frags - append the user data to a skb * @sk: sock structure - * @skb: skb structure to be appened with user data. + * @skb: skb structure to be appended with user data. * @getfrag: call back function to be used for getting the user data * @from: pointer to user message iov * @length: length of the iov message diff --git a/net/core/sock.c b/net/core/sock.c index 29870571c42f..d372b4bd3f99 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -166,7 +166,7 @@ EXPORT_SYMBOL(sk_ns_capable); /** * sk_capable - Socket global capability test * @sk: Socket to use a capability on or through - * @cap: The global capbility to use + * @cap: The global capability to use * * Test to see if the opener of the socket had when the socket was * created and the current process has the capability @cap in all user @@ -183,7 +183,7 @@ EXPORT_SYMBOL(sk_capable); * @sk: Socket to use a capability on or through * @cap: The capability to use * - * Test to see if the opener of the socket had when the socke was created + * Test to see if the opener of the socket had when the socket was created * and the current process has the capability @cap over the network namespace * the socket is a member of. */ diff --git a/net/socket.c b/net/socket.c index 4eb09b34b2d3..2e2586e2dee1 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2601,7 +2601,7 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args) * * This function is called by a protocol handler that wants to * advertise its address family, and have it linked into the - * socket interface. The value ops->family coresponds to the + * socket interface. The value ops->family corresponds to the * socket system call protocol family. */ int sock_register(const struct net_proto_family *ops) -- cgit v1.2.3 From 62bccb8cdb69051b95a55ab0c489e3cab261c8ef Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 4 Sep 2014 13:31:35 -0400 Subject: net-timestamp: Make the clone operation stand-alone from phy timestamping The phy timestamping takes a different path than the regular timestamping does in that it will create a clone first so that the packets needing to be timestamped can be placed in a queue, or the context block could be used. In order to support these use cases I am pulling the core of the code out so it can be used in other drivers beyond just phy devices. In addition I have added a destructor named sock_efree which is meant to provide a simple way for dropping the reference to skb exceptions that aren't part of either the receive or send windows for the socket, and I have removed some duplication in spots where this destructor could be used in place of sock_edemux. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- drivers/net/phy/dp83640.c | 6 +++--- include/linux/skbuff.h | 2 ++ include/net/sock.h | 1 + net/core/skbuff.c | 32 +++++++++++++++++++++++++------- net/core/sock.c | 6 ++++++ net/core/timestamping.c | 14 +++----------- 6 files changed, 40 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c index d5991ac46ab0..87648b306551 100644 --- a/drivers/net/phy/dp83640.c +++ b/drivers/net/phy/dp83640.c @@ -1148,7 +1148,7 @@ static void dp83640_remove(struct phy_device *phydev) kfree_skb(skb); while ((skb = skb_dequeue(&dp83640->tx_queue)) != NULL) - skb_complete_tx_timestamp(skb, NULL); + kfree_skb(skb); clock = dp83640_clock_get(dp83640->clock); @@ -1405,7 +1405,7 @@ static void dp83640_txtstamp(struct phy_device *phydev, case HWTSTAMP_TX_ONESTEP_SYNC: if (is_sync(skb, type)) { - skb_complete_tx_timestamp(skb, NULL); + kfree_skb(skb); return; } /* fall through */ @@ -1416,7 +1416,7 @@ static void dp83640_txtstamp(struct phy_device *phydev, case HWTSTAMP_TX_OFF: default: - skb_complete_tx_timestamp(skb, NULL); + kfree_skb(skb); break; } } diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 02529fcad1ac..1cf0cfaef10a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2690,6 +2690,8 @@ static inline ktime_t net_invalid_timestamp(void) return ktime_set(0, 0); } +struct sk_buff *skb_clone_sk(struct sk_buff *skb); + #ifdef CONFIG_NETWORK_PHY_TIMESTAMPING void skb_clone_tx_timestamp(struct sk_buff *skb); diff --git a/include/net/sock.h b/include/net/sock.h index 3fde6130789d..e02be37a3d91 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1574,6 +1574,7 @@ struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, void sock_wfree(struct sk_buff *skb); void skb_orphan_partial(struct sk_buff *skb); void sock_rfree(struct sk_buff *skb); +void sock_efree(struct sk_buff *skb); void sock_edemux(struct sk_buff *skb); int sock_setsockopt(struct socket *sock, int level, int op, diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 697e696c914b..a936a401564e 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3511,6 +3511,27 @@ struct sk_buff *sock_dequeue_err_skb(struct sock *sk) } EXPORT_SYMBOL(sock_dequeue_err_skb); +struct sk_buff *skb_clone_sk(struct sk_buff *skb) +{ + struct sock *sk = skb->sk; + struct sk_buff *clone; + + if (!sk || !atomic_inc_not_zero(&sk->sk_refcnt)) + return NULL; + + clone = skb_clone(skb, GFP_ATOMIC); + if (!clone) { + sock_put(sk); + return NULL; + } + + clone->sk = sk; + clone->destructor = sock_efree; + + return clone; +} +EXPORT_SYMBOL(skb_clone_sk); + static void __skb_complete_tx_timestamp(struct sk_buff *skb, struct sock *sk, int tstype) @@ -3540,14 +3561,11 @@ void skb_complete_tx_timestamp(struct sk_buff *skb, { struct sock *sk = skb->sk; - skb->sk = NULL; + /* take a reference to prevent skb_orphan() from freeing the socket */ + sock_hold(sk); - if (hwtstamps) { - *skb_hwtstamps(skb) = *hwtstamps; - __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND); - } else { - kfree_skb(skb); - } + *skb_hwtstamps(skb) = *hwtstamps; + __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND); sock_put(sk); } diff --git a/net/core/sock.c b/net/core/sock.c index f1a638ee93d9..d04005c51724 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1637,6 +1637,12 @@ void sock_rfree(struct sk_buff *skb) } EXPORT_SYMBOL(sock_rfree); +void sock_efree(struct sk_buff *skb) +{ + sock_put(skb->sk); +} +EXPORT_SYMBOL(sock_efree); + void sock_edemux(struct sk_buff *skb) { struct sock *sk = skb->sk; diff --git a/net/core/timestamping.c b/net/core/timestamping.c index f48a59fd8f39..43d3dd62fcc8 100644 --- a/net/core/timestamping.c +++ b/net/core/timestamping.c @@ -36,10 +36,9 @@ void skb_clone_tx_timestamp(struct sk_buff *skb) { struct phy_device *phydev; struct sk_buff *clone; - struct sock *sk = skb->sk; unsigned int type; - if (!sk) + if (!skb->sk) return; type = classify(skb); @@ -48,16 +47,9 @@ void skb_clone_tx_timestamp(struct sk_buff *skb) phydev = skb->dev->phydev; if (likely(phydev->drv->txtstamp)) { - if (!atomic_inc_not_zero(&sk->sk_refcnt)) + clone = skb_clone_sk(skb); + if (!clone) return; - - clone = skb_clone(skb, GFP_ATOMIC); - if (!clone) { - sock_put(sk); - return; - } - - clone->sk = sk; phydev->drv->txtstamp(phydev, clone, type); } } -- cgit v1.2.3 From 56193d1bce2b2759cb4bdcc00cd05544894a0c90 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 5 Sep 2014 19:20:26 -0400 Subject: net: Add function for parsing the header length out of linear ethernet frames This patch updates some of the flow_dissector api so that it can be used to parse the length of ethernet buffers stored in fragments. Most of the changes needed were to __skb_get_poff as it needed to be updated to support sending a linear buffer instead of a skb. I have split __skb_get_poff into two functions, the first is skb_get_poff and it retains the functionality of the original __skb_get_poff. The other function is __skb_get_poff which now works much like __skb_flow_dissect in relation to skb_flow_dissect in that it provides the same functionality but works with just a data buffer and hlen instead of needing an skb. Signed-off-by: Alexander Duyck Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/etherdevice.h | 1 + include/linux/skbuff.h | 4 +++- include/net/flow_keys.h | 2 ++ net/core/filter.c | 2 +- net/core/flow_dissector.c | 46 +++++++++++++++++++++++++++++++-------------- net/ethernet/eth.c | 27 ++++++++++++++++++++++++++ 6 files changed, 66 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 9c5529dc6d07..733980fce8e3 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -29,6 +29,7 @@ #include #ifdef __KERNEL__ +u32 eth_get_headlen(void *data, unsigned int max_len); __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev); extern const struct header_ops eth_header_ops; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 1cf0cfaef10a..07c9fdd0c126 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3218,7 +3218,9 @@ bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off); int skb_checksum_setup(struct sk_buff *skb, bool recalculate); -u32 __skb_get_poff(const struct sk_buff *skb); +u32 skb_get_poff(const struct sk_buff *skb); +u32 __skb_get_poff(const struct sk_buff *skb, void *data, + const struct flow_keys *keys, int hlen); /** * skb_head_is_locked - Determine if the skb->head is locked down diff --git a/include/net/flow_keys.h b/include/net/flow_keys.h index 9a03f73c4974..7ee2df083542 100644 --- a/include/net/flow_keys.h +++ b/include/net/flow_keys.h @@ -40,4 +40,6 @@ static inline __be32 skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 return __skb_flow_get_ports(skb, thoff, ip_proto, NULL, 0); } u32 flow_hash_from_keys(struct flow_keys *keys); +unsigned int flow_get_hlen(const unsigned char *data, unsigned int max_len, + __be16 protocol); #endif diff --git a/net/core/filter.c b/net/core/filter.c index 37f8eb06fdee..fa5b7d0f77ac 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -113,7 +113,7 @@ static unsigned int pkt_type_offset(void) static u64 __skb_get_pay_offset(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) { - return __skb_get_poff((struct sk_buff *)(unsigned long) ctx); + return skb_get_poff((struct sk_buff *)(unsigned long) ctx); } static u64 __skb_get_nlattr(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 12f48ca7a0b0..8560dea58803 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -13,6 +13,7 @@ #include #include #include +#include /* copy saddr & daddr, possibly using 64bit load/store * Equivalent to : flow->src = iph->saddr; @@ -117,6 +118,13 @@ ipv6: flow->dst = (__force __be32)ipv6_addr_hash(&iph->daddr); nhoff += sizeof(struct ipv6hdr); + /* skip the flow label processing if skb is NULL. The + * assumption here is that if there is no skb we are not + * looking for flow info as much as we are length. + */ + if (!skb) + break; + flow_label = ip6_flowlabel(iph); if (flow_label) { /* Awesome, IPv6 packet has a flow label so we can @@ -165,6 +173,9 @@ ipv6: return false; } } + case htons(ETH_P_FCOE): + flow->thoff = (u16)(nhoff + FCOE_HEADER_LEN); + /* fall through */ default: return false; } @@ -316,26 +327,18 @@ u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb, } EXPORT_SYMBOL(__skb_tx_hash); -/* __skb_get_poff() returns the offset to the payload as far as it could - * be dissected. The main user is currently BPF, so that we can dynamically - * truncate packets without needing to push actual payload to the user - * space and can analyze headers only, instead. - */ -u32 __skb_get_poff(const struct sk_buff *skb) +u32 __skb_get_poff(const struct sk_buff *skb, void *data, + const struct flow_keys *keys, int hlen) { - struct flow_keys keys; - u32 poff = 0; + u32 poff = keys->thoff; - if (!skb_flow_dissect(skb, &keys)) - return 0; - - poff += keys.thoff; - switch (keys.ip_proto) { + switch (keys->ip_proto) { case IPPROTO_TCP: { const struct tcphdr *tcph; struct tcphdr _tcph; - tcph = skb_header_pointer(skb, poff, sizeof(_tcph), &_tcph); + tcph = __skb_header_pointer(skb, poff, sizeof(_tcph), + data, hlen, &_tcph); if (!tcph) return poff; @@ -369,6 +372,21 @@ u32 __skb_get_poff(const struct sk_buff *skb) return poff; } +/* skb_get_poff() returns the offset to the payload as far as it could + * be dissected. The main user is currently BPF, so that we can dynamically + * truncate packets without needing to push actual payload to the user + * space and can analyze headers only, instead. + */ +u32 skb_get_poff(const struct sk_buff *skb) +{ + struct flow_keys keys; + + if (!skb_flow_dissect(skb, &keys)) + return 0; + + return __skb_get_poff(skb, skb->data, &keys, skb_headlen(skb)); +} + static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) { #ifdef CONFIG_XPS diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 5cebca134585..33a140e15834 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -145,6 +145,33 @@ int eth_rebuild_header(struct sk_buff *skb) } EXPORT_SYMBOL(eth_rebuild_header); +/** + * eth_get_headlen - determine the the length of header for an ethernet frame + * @data: pointer to start of frame + * @len: total length of frame + * + * Make a best effort attempt to pull the length for all of the headers for + * a given frame in a linear buffer. + */ +u32 eth_get_headlen(void *data, unsigned int len) +{ + const struct ethhdr *eth = (const struct ethhdr *)data; + struct flow_keys keys; + + /* this should never happen, but better safe than sorry */ + if (len < sizeof(*eth)) + return len; + + /* parse any remaining L2/L3 headers, check for L4 */ + if (!__skb_flow_dissect(NULL, &keys, data, + eth->h_proto, sizeof(*eth), len)) + return max_t(u32, keys.thoff, sizeof(*eth)); + + /* parse for any L4 headers */ + return min_t(u32, __skb_get_poff(NULL, data, &keys, len), len); +} +EXPORT_SYMBOL(eth_get_headlen); + /** * eth_type_trans - determine the packet's protocol ID. * @skb: received socket data -- cgit v1.2.3 From dec38b5ce6a9edb406c60c2670b26a1a4262fdb9 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Sat, 6 Sep 2014 01:11:12 +0100 Subject: regulator: isl9305: Add Intersil ISL9305/H driver The ISL9305 and ISL9305H are mini-PMICs offering two DCDC regulators and two LDO regulators. While there are some register differences between them these do not affect the current Linux driver as the relevant features are not yet supported. Signed-off-by: Mark Brown --- .../devicetree/bindings/regulator/isl9305.txt | 36 +++ drivers/regulator/Kconfig | 6 + drivers/regulator/Makefile | 1 + drivers/regulator/isl9305.c | 247 +++++++++++++++++++++ include/linux/platform_data/isl9305.h | 30 +++ 5 files changed, 320 insertions(+) create mode 100644 Documentation/devicetree/bindings/regulator/isl9305.txt create mode 100644 drivers/regulator/isl9305.c create mode 100644 include/linux/platform_data/isl9305.h (limited to 'include/linux') diff --git a/Documentation/devicetree/bindings/regulator/isl9305.txt b/Documentation/devicetree/bindings/regulator/isl9305.txt new file mode 100644 index 000000000000..a626fc1bbf0d --- /dev/null +++ b/Documentation/devicetree/bindings/regulator/isl9305.txt @@ -0,0 +1,36 @@ +Intersil ISL9305/ISL9305H voltage regulator + +Required properties: + +- compatible: "isl,isl9305" or "isl,isl9305h" +- reg: I2C slave address, usually 0x68. +- regulators: A node that houses a sub-node for each regulator within the + device. Each sub-node is identified using the node's name, with valid + values being "dcd1", "dcd2", "ldo1" and "ldo2". The content of each sub-node + is defined by the standard binding for regulators; see regulator.txt. +- VINDCD1-supply: A phandle to a regulator node supplying VINDCD1. + VINDCD2-supply: A phandle to a regulator node supplying VINDCD2. + VINLDO1-supply: A phandle to a regulator node supplying VINLDO1. + VINLDO2-supply: A phandle to a regulator node supplying VINLDO2. + +Optional properties: +- Per-regulator optional properties are defined in regulator.txt + +Example + + pmic: isl9305@68 { + compatible = "isl,isl9305"; + reg = <0x68>; + + VINDCD1-supply = <&system_power>; + VINDCD2-supply = <&system_power>; + VINLDO1-supply = <&system_power>; + VINLDO2-supply = <&system_power>; + + regulators { + dcd1 { + regulator-name = "VDD_DSP"; + regulator-always-on; + }; + }; + }; diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig index e6b98ed4c12f..3c8b6f401e4f 100644 --- a/drivers/regulator/Kconfig +++ b/drivers/regulator/Kconfig @@ -250,6 +250,12 @@ config REGULATOR_HI6421 21 general purpose LDOs, 3 dedicated LDOs, and 5 BUCKs. All of them come with support to either ECO (idle) or sleep mode. +config REGULATOR_ISL9305 + tristate "Intersil ISL9305 regulator" + depends on I2C + help + This driver supports ISL9305 voltage regulator chip. + config REGULATOR_ISL6271A tristate "Intersil ISL6271A Power regulator" depends on I2C diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile index 5513e2c141b1..e12fee8c943b 100644 --- a/drivers/regulator/Makefile +++ b/drivers/regulator/Makefile @@ -34,6 +34,7 @@ obj-$(CONFIG_REGULATOR_FAN53555) += fan53555.o obj-$(CONFIG_REGULATOR_GPIO) += gpio-regulator.o obj-$(CONFIG_REGULATOR_HI6421) += hi6421-regulator.o obj-$(CONFIG_REGULATOR_ISL6271A) += isl6271a-regulator.o +obj-$(CONFIG_REGULATOR_ISL9305) += isl9305.o obj-$(CONFIG_REGULATOR_LP3971) += lp3971.o obj-$(CONFIG_REGULATOR_LP3972) += lp3972.o obj-$(CONFIG_REGULATOR_LP872X) += lp872x.o diff --git a/drivers/regulator/isl9305.c b/drivers/regulator/isl9305.c new file mode 100644 index 000000000000..b0d12d186b68 --- /dev/null +++ b/drivers/regulator/isl9305.c @@ -0,0 +1,247 @@ +/* + * isl9305 - Intersil ISL9305 DCDC regulator + * + * Copyright 2014 Linaro Ltd + * + * Author: Mark Brown + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Registers + */ +#define ISL9305_DCD1OUT 0x0 +#define ISL9305_DCD2OUT 0x1 +#define ISL9305_LDO1OUT 0x2 +#define ISL9305_LDO2OUT 0x3 +#define ISL9305_DCD_PARAMETER 0x4 +#define ISL9305_SYSTEM_PARAMETER 0x5 +#define ISL9305_DCD_SRCTL 0x6 + +#define ISL9305_MAX_REG ISL9305_DCD_SRCTL + +/* + * DCD_PARAMETER + */ +#define ISL9305_DCD_PHASE 0x40 +#define ISL9305_DCD2_ULTRA 0x20 +#define ISL9305_DCD1_ULTRA 0x10 +#define ISL9305_DCD2_BLD 0x08 +#define ISL9305_DCD1_BLD 0x04 +#define ISL9305_DCD2_MODE 0x02 +#define ISL9305_DCD1_MODE 0x01 + +/* + * SYSTEM_PARAMETER + */ +#define ISL9305_I2C_EN 0x40 +#define ISL9305_DCDPOR_MASK 0x30 +#define ISL9305_LDO2_EN 0x08 +#define ISL9305_LDO1_EN 0x04 +#define ISL9305_DCD2_EN 0x02 +#define ISL9305_DCD1_EN 0x01 + +/* + * DCD_SRCTL + */ +#define ISL9305_DCD2SR_MASK 0xc0 +#define ISL9305_DCD1SR_MASK 0x07 + +static const struct regulator_ops isl9305_ops = { + .enable = regulator_enable_regmap, + .disable = regulator_disable_regmap, + .is_enabled = regulator_is_enabled_regmap, + .list_voltage = regulator_list_voltage_linear, + .get_voltage_sel = regulator_get_voltage_sel_regmap, + .set_voltage_sel = regulator_set_voltage_sel_regmap, +}; + +static const struct regulator_desc isl9305_regulators[] = { + [ISL9305_DCD1] = { + .name = "DCD1", + .n_voltages = 0x70, + .min_uV = 825000, + .uV_step = 25000, + .vsel_reg = ISL9305_DCD1OUT, + .vsel_mask = 0x7f, + .enable_reg = ISL9305_SYSTEM_PARAMETER, + .enable_mask = ISL9305_DCD1_EN, + .supply_name = "VINDCD1", + .ops = &isl9305_ops, + }, + [ISL9305_DCD2] = { + .name = "DCD2", + .n_voltages = 0x70, + .min_uV = 825000, + .uV_step = 25000, + .vsel_reg = ISL9305_DCD2OUT, + .vsel_mask = 0x7f, + .enable_reg = ISL9305_SYSTEM_PARAMETER, + .enable_mask = ISL9305_DCD2_EN, + .supply_name = "VINDCD2", + .ops = &isl9305_ops, + }, + [ISL9305_LDO1] = { + .name = "LDO1", + .n_voltages = 0x37, + .min_uV = 900000, + .uV_step = 50000, + .vsel_reg = ISL9305_LDO1OUT, + .vsel_mask = 0x3f, + .enable_reg = ISL9305_SYSTEM_PARAMETER, + .enable_mask = ISL9305_LDO1_EN, + .supply_name = "VINLDO1", + .ops = &isl9305_ops, + }, + [ISL9305_LDO2] = { + .name = "LDO2", + .n_voltages = 0x37, + .min_uV = 900000, + .uV_step = 50000, + .vsel_reg = ISL9305_LDO2OUT, + .vsel_mask = 0x3f, + .enable_reg = ISL9305_SYSTEM_PARAMETER, + .enable_mask = ISL9305_LDO2_EN, + .supply_name = "VINLDO2", + .ops = &isl9305_ops, + }, +}; + +#ifdef CONFIG_OF +static struct of_regulator_match isl9305_reg_matches[] = { + [ISL9305_DCD1] = { .name = "dcd1" }, + [ISL9305_DCD2] = { .name = "dcd2" }, + [ISL9305_LDO1] = { .name = "ldo1" }, + [ISL9305_LDO2] = { .name = "ldo2" }, +}; + +static struct of_regulator_match *isl9305_parse_dt(struct i2c_client *i2c) +{ + struct device_node *node = i2c->dev.of_node; + struct of_regulator_match *matches; + struct device_node *regs; + int count; + + regs = of_get_child_by_name(node, "regulators"); + if (!regs) + return NULL; + + matches = devm_kmemdup(&i2c->dev, isl9305_reg_matches, + sizeof(isl9305_reg_matches), GFP_KERNEL); + if (!matches) + return NULL; + + count = of_regulator_match(&i2c->dev, regs, matches, + ARRAY_SIZE(isl9305_reg_matches)); + of_node_put(regs); + if ((count < 0) || (count > ARRAY_SIZE(isl9305_reg_matches))) + return NULL; + + return matches; +} +#else +static struct of_regulator_match *isl9305_parse_dt(struct i2c_client *i2c) +{ + return NULL; +} +#endif + +static const struct regmap_config isl9305_regmap = { + .reg_bits = 8, + .val_bits = 8, + + .max_register = ISL9305_MAX_REG, + .cache_type = REGCACHE_RBTREE, +}; + +static int isl9305_i2c_probe(struct i2c_client *i2c, + const struct i2c_device_id *id) +{ + struct regulator_config config = { }; + struct isl9305_pdata *pdata = i2c->dev.platform_data; + struct of_regulator_match *of_matches; + struct regulator_dev *rdev; + struct regmap *regmap; + int i, ret; + + of_matches = isl9305_parse_dt(i2c); + + regmap = devm_regmap_init_i2c(i2c, &isl9305_regmap); + if (IS_ERR(regmap)) { + ret = PTR_ERR(regmap); + dev_err(&i2c->dev, "Failed to create regmap: %d\n", ret); + return ret; + } + + config.dev = &i2c->dev; + + for (i = 0; i < ARRAY_SIZE(isl9305_regulators); i++) { + config.of_node = NULL; + config.init_data = NULL; + + if (of_matches) { + config.init_data = of_matches[i].init_data; + config.of_node = of_matches[i].of_node; + } + + if (!config.init_data && pdata) + config.init_data = pdata->init_data[i]; + + rdev = devm_regulator_register(&i2c->dev, + &isl9305_regulators[i], + &config); + if (IS_ERR(rdev)) { + ret = PTR_ERR(rdev); + dev_err(&i2c->dev, "Failed to register %s: %d\n", + isl9305_regulators[i].name, ret); + return ret; + } + } + + return 0; +} + +#ifdef CONFIG_OF +static const struct of_device_id isl9305_dt_ids[] = { + { .compatible = "isl,isl9305" }, + { .compatible = "isl,isl9305h" }, + {}, +}; +#endif + +static const struct i2c_device_id isl9305_i2c_id[] = { + { "isl9305", }, + { "isl9305h", }, + { } +}; +MODULE_DEVICE_TABLE(i2c, isl9305_i2c_id); + +static struct i2c_driver isl9305_regulator_driver = { + .driver = { + .name = "isl9305", + .owner = THIS_MODULE, + .of_match_table = of_match_ptr(isl9305_dt_ids), + }, + .probe = isl9305_i2c_probe, + .id_table = isl9305_i2c_id, +}; + +module_i2c_driver(isl9305_regulator_driver); + +MODULE_AUTHOR("Mark Brown"); +MODULE_DESCRIPTION("Intersil ISL9305 DCDC regulator"); +MODULE_LICENSE("GPL"); diff --git a/include/linux/platform_data/isl9305.h b/include/linux/platform_data/isl9305.h new file mode 100644 index 000000000000..1419133fa69e --- /dev/null +++ b/include/linux/platform_data/isl9305.h @@ -0,0 +1,30 @@ +/* + * isl9305 - Intersil ISL9305 DCDC regulator + * + * Copyright 2014 Linaro Ltd + * + * Author: Mark Brown + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#ifndef __ISL9305_H +#define __ISL9305_H + +#define ISL9305_DCD1 0 +#define ISL9305_DCD2 1 +#define ISL9305_LDO1 2 +#define ISL9305_LDO2 3 + +#define ISL9305_MAX_REGULATOR ISL9305_LDO2 + +struct regulator_init_data; + +struct isl9305_pdata { + struct regulator_init_data *init_data[ISL9305_MAX_REGULATOR]; +}; + +#endif -- cgit v1.2.3 From a8adcc9012d8502e06ba7b3f966bad8f2c58edc3 Mon Sep 17 00:00:00 2001 From: Ramakrishna Pallala Date: Wed, 27 Aug 2014 23:44:08 +0530 Subject: power_supply: Add boot and calibration attributes Usually PMIC's come with coulomb counting mechanism which can be used to implement a Fuel Gauginig solution in Software itself. One of key input to these SW Fuel Gauge solutioons is the boot up parameters like boot voltage and boot current. This patch adds the VOLTAGE_BOOT and CURRENT_BOOT power supply attributes to report bootup voltage and current. This patch also adds CALIBRATE power supply attribute which useful is for calibrating the battery/coulomb counter. Signed-off-by: Ramakrishna Pallala Signed-off-by: Sebastian Reichel --- Documentation/power/power_supply_class.txt | 6 ++++++ drivers/power/power_supply_sysfs.c | 3 +++ include/linux/power_supply.h | 5 +++++ 3 files changed, 14 insertions(+) (limited to 'include/linux') diff --git a/Documentation/power/power_supply_class.txt b/Documentation/power/power_supply_class.txt index 48cff881cb8a..82dacc06e355 100644 --- a/Documentation/power/power_supply_class.txt +++ b/Documentation/power/power_supply_class.txt @@ -101,6 +101,10 @@ VOLTAGE_MAX, VOLTAGE_MIN - same as _DESIGN voltage values except that these ones should be used if hardware could only guess (measure and retain) the thresholds of a given power supply. +VOLTAGE_BOOT - Reports the voltage measured during boot + +CURRENT_BOOT - Reports the current measured during boot + CHARGE_FULL_DESIGN, CHARGE_EMPTY_DESIGN - design charge values, when battery considered full/empty. @@ -123,6 +127,8 @@ the current drawn from a charging source. CHARGE_TERM_CURRENT - Charge termination current used to detect the end of charge condition. +CALIBRATE - battery or coulomb counter calibration status + CONSTANT_CHARGE_VOLTAGE - constant charge voltage programmed by charger. CONSTANT_CHARGE_VOLTAGE_MAX - maximum charge voltage supported by the power supply object. diff --git a/drivers/power/power_supply_sysfs.c b/drivers/power/power_supply_sysfs.c index 750a20275664..7e4726d37211 100644 --- a/drivers/power/power_supply_sysfs.c +++ b/drivers/power/power_supply_sysfs.c @@ -149,9 +149,11 @@ static struct device_attribute power_supply_attrs[] = { POWER_SUPPLY_ATTR(voltage_now), POWER_SUPPLY_ATTR(voltage_avg), POWER_SUPPLY_ATTR(voltage_ocv), + POWER_SUPPLY_ATTR(voltage_boot), POWER_SUPPLY_ATTR(current_max), POWER_SUPPLY_ATTR(current_now), POWER_SUPPLY_ATTR(current_avg), + POWER_SUPPLY_ATTR(current_boot), POWER_SUPPLY_ATTR(power_now), POWER_SUPPLY_ATTR(power_avg), POWER_SUPPLY_ATTR(charge_full_design), @@ -193,6 +195,7 @@ static struct device_attribute power_supply_attrs[] = { POWER_SUPPLY_ATTR(type), POWER_SUPPLY_ATTR(scope), POWER_SUPPLY_ATTR(charge_term_current), + POWER_SUPPLY_ATTR(calibrate), /* Properties of type `const char *' */ POWER_SUPPLY_ATTR(model_name), POWER_SUPPLY_ATTR(manufacturer), diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index f3dea41dbcd2..de59a28b1b5b 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -102,9 +102,11 @@ enum power_supply_property { POWER_SUPPLY_PROP_VOLTAGE_NOW, POWER_SUPPLY_PROP_VOLTAGE_AVG, POWER_SUPPLY_PROP_VOLTAGE_OCV, + POWER_SUPPLY_PROP_VOLTAGE_BOOT, POWER_SUPPLY_PROP_CURRENT_MAX, POWER_SUPPLY_PROP_CURRENT_NOW, POWER_SUPPLY_PROP_CURRENT_AVG, + POWER_SUPPLY_PROP_CURRENT_BOOT, POWER_SUPPLY_PROP_POWER_NOW, POWER_SUPPLY_PROP_POWER_AVG, POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN, @@ -146,6 +148,7 @@ enum power_supply_property { POWER_SUPPLY_PROP_TYPE, /* use power_supply.type instead */ POWER_SUPPLY_PROP_SCOPE, POWER_SUPPLY_PROP_CHARGE_TERM_CURRENT, + POWER_SUPPLY_PROP_CALIBRATE, /* Properties of type `const char *' */ POWER_SUPPLY_PROP_MODEL_NAME, POWER_SUPPLY_PROP_MANUFACTURER, @@ -291,6 +294,7 @@ static inline bool power_supply_is_amp_property(enum power_supply_property psp) case POWER_SUPPLY_PROP_CURRENT_MAX: case POWER_SUPPLY_PROP_CURRENT_NOW: case POWER_SUPPLY_PROP_CURRENT_AVG: + case POWER_SUPPLY_PROP_CURRENT_BOOT: return 1; default: break; @@ -315,6 +319,7 @@ static inline bool power_supply_is_watt_property(enum power_supply_property psp) case POWER_SUPPLY_PROP_VOLTAGE_NOW: case POWER_SUPPLY_PROP_VOLTAGE_AVG: case POWER_SUPPLY_PROP_VOLTAGE_OCV: + case POWER_SUPPLY_PROP_VOLTAGE_BOOT: case POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE: case POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE_MAX: case POWER_SUPPLY_PROP_POWER_NOW: -- cgit v1.2.3 From 521d24ee598bd8a8b71d7ac76ce2c0da0e548406 Mon Sep 17 00:00:00 2001 From: Pranith Kumar Date: Tue, 8 Jul 2014 18:26:18 -0400 Subject: rcu: Return bool type in rcu_lockdep_current_cpu_online() Return true instead of 1 in rcu_lockdep_current_cpu_online() as this has bool as return type. Signed-off-by: Pranith Kumar Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index d231aa17b1d7..7e47e44bce03 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -349,7 +349,7 @@ bool rcu_lockdep_current_cpu_online(void); #else /* #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */ static inline bool rcu_lockdep_current_cpu_online(void) { - return 1; + return true; } #endif /* #else #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */ -- cgit v1.2.3 From 85b39d305bfe809a11ff2770d380be3e2465beec Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 8 Jul 2014 15:17:59 -0700 Subject: rcu: Uninline rcu_read_lock_held() This commit uninlines rcu_read_lock_held(). According to "size vmlinux" this saves 28549 in .text: - 5541731 3014560 14757888 23314179 + 5513182 3026848 14757888 23297918 Note: it looks as if the data grows by 12288 bytes but this is not true, it does not actually grow. But .data starts with ALIGN(THREAD_SIZE) and since .text shrinks the padding grows, and thus .data grows too as it seen by /bin/size. diff System.map: - ffffffff81510000 D _sdata - ffffffff81510000 D init_thread_union + ffffffff81509000 D _sdata + ffffffff8150c000 D init_thread_union Perhaps we can change vmlinux.lds.S to .data itself, so that /bin/size can't "wrongly" report that .data grows if .text shinks. Signed-off-by: Oleg Nesterov Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 36 +----------------------------------- kernel/rcu/update.c | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 7e47e44bce03..321ed0d4e675 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -371,41 +371,7 @@ extern struct lockdep_map rcu_sched_lock_map; extern struct lockdep_map rcu_callback_map; int debug_lockdep_rcu_enabled(void); -/** - * rcu_read_lock_held() - might we be in RCU read-side critical section? - * - * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an RCU - * read-side critical section. In absence of CONFIG_DEBUG_LOCK_ALLOC, - * this assumes we are in an RCU read-side critical section unless it can - * prove otherwise. This is useful for debug checks in functions that - * require that they be called within an RCU read-side critical section. - * - * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot - * and while lockdep is disabled. - * - * Note that rcu_read_lock() and the matching rcu_read_unlock() must - * occur in the same context, for example, it is illegal to invoke - * rcu_read_unlock() in process context if the matching rcu_read_lock() - * was invoked from within an irq handler. - * - * Note that rcu_read_lock() is disallowed if the CPU is either idle or - * offline from an RCU perspective, so check for those as well. - */ -static inline int rcu_read_lock_held(void) -{ - if (!debug_lockdep_rcu_enabled()) - return 1; - if (!rcu_is_watching()) - return 0; - if (!rcu_lockdep_current_cpu_online()) - return 0; - return lock_is_held(&rcu_lock_map); -} - -/* - * rcu_read_lock_bh_held() is defined out of line to avoid #include-file - * hell. - */ +int rcu_read_lock_held(void); int rcu_read_lock_bh_held(void); /** diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index 4056d7992a6c..ea8ea7b16e11 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -136,6 +136,38 @@ int notrace debug_lockdep_rcu_enabled(void) } EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled); +/** + * rcu_read_lock_held() - might we be in RCU read-side critical section? + * + * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an RCU + * read-side critical section. In absence of CONFIG_DEBUG_LOCK_ALLOC, + * this assumes we are in an RCU read-side critical section unless it can + * prove otherwise. This is useful for debug checks in functions that + * require that they be called within an RCU read-side critical section. + * + * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot + * and while lockdep is disabled. + * + * Note that rcu_read_lock() and the matching rcu_read_unlock() must + * occur in the same context, for example, it is illegal to invoke + * rcu_read_unlock() in process context if the matching rcu_read_lock() + * was invoked from within an irq handler. + * + * Note that rcu_read_lock() is disallowed if the CPU is either idle or + * offline from an RCU perspective, so check for those as well. + */ +int rcu_read_lock_held(void) +{ + if (!debug_lockdep_rcu_enabled()) + return 1; + if (!rcu_is_watching()) + return 0; + if (!rcu_lockdep_current_cpu_online()) + return 0; + return lock_is_held(&rcu_lock_map); +} +EXPORT_SYMBOL_GPL(rcu_read_lock_held); + /** * rcu_read_lock_bh_held() - might we be in RCU-bh read-side critical section? * -- cgit v1.2.3 From eea203fea3484598280a07fe503e025e886297fb Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 14 Jul 2014 09:16:15 -0400 Subject: rcu: Use pr_alert/pr_cont for printing logs User pr_alert/pr_cont for printing the logs from rcutorture module directly instead of writing it to a buffer and then printing it. This allows us from not having to allocate such buffers. Also remove a resulting empty function. I tested this using the parse-torture.sh script as follows: $ dmesg | grep torture > log.txt $ bash parse-torture.sh log.txt test $ There were no warnings which means that parsing went fine. Signed-off-by: Joe Perches Signed-off-by: Pranith Kumar Signed-off-by: Paul E. McKenney --- include/linux/torture.h | 2 +- kernel/rcu/rcutorture.c | 127 +++++++++++++++++++++--------------------------- kernel/torture.c | 16 +++--- 3 files changed, 64 insertions(+), 81 deletions(-) (limited to 'include/linux') diff --git a/include/linux/torture.h b/include/linux/torture.h index 5ca58fcbaf1b..fec46f8c08eb 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -51,7 +51,7 @@ /* Definitions for online/offline exerciser. */ int torture_onoff_init(long ooholdoff, long oointerval); -char *torture_onoff_stats(char *page); +void torture_onoff_stats(void); bool torture_onoff_failures(void); /* Low-rider random number generator. */ diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 7e67711cbae8..ff4f0c756dee 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -242,7 +242,7 @@ struct rcu_torture_ops { void (*call)(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); void (*cb_barrier)(void); void (*fqs)(void); - void (*stats)(char *page); + void (*stats)(void); int irq_capable; int can_boost; const char *name; @@ -525,21 +525,21 @@ static void srcu_torture_barrier(void) srcu_barrier(&srcu_ctl); } -static void srcu_torture_stats(char *page) +static void srcu_torture_stats(void) { int cpu; int idx = srcu_ctl.completed & 0x1; - page += sprintf(page, "%s%s per-CPU(idx=%d):", - torture_type, TORTURE_FLAG, idx); + pr_alert("%s%s per-CPU(idx=%d):", + torture_type, TORTURE_FLAG, idx); for_each_possible_cpu(cpu) { long c0, c1; c0 = (long)per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[!idx]; c1 = (long)per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[idx]; - page += sprintf(page, " %d(%ld,%ld)", cpu, c0, c1); + pr_cont(" %d(%ld,%ld)", cpu, c0, c1); } - sprintf(page, "\n"); + pr_cont("\n"); } static void srcu_torture_synchronize_expedited(void) @@ -1031,10 +1031,15 @@ rcu_torture_reader(void *arg) } /* - * Create an RCU-torture statistics message in the specified buffer. + * Print torture statistics. Caller must ensure that there is only + * one call to this function at a given time!!! This is normally + * accomplished by relying on the module system to only have one copy + * of the module loaded, and then by giving the rcu_torture_stats + * kthread full control (or the init/cleanup functions when rcu_torture_stats + * thread is not running). */ static void -rcu_torture_printk(char *page) +rcu_torture_stats_print(void) { int cpu; int i; @@ -1052,55 +1057,60 @@ rcu_torture_printk(char *page) if (pipesummary[i] != 0) break; } - page += sprintf(page, "%s%s ", torture_type, TORTURE_FLAG); - page += sprintf(page, - "rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d ", - rcu_torture_current, - rcu_torture_current_version, - list_empty(&rcu_torture_freelist), - atomic_read(&n_rcu_torture_alloc), - atomic_read(&n_rcu_torture_alloc_fail), - atomic_read(&n_rcu_torture_free)); - page += sprintf(page, "rtmbe: %d rtbke: %ld rtbre: %ld ", - atomic_read(&n_rcu_torture_mberror), - n_rcu_torture_boost_ktrerror, - n_rcu_torture_boost_rterror); - page += sprintf(page, "rtbf: %ld rtb: %ld nt: %ld ", - n_rcu_torture_boost_failure, - n_rcu_torture_boosts, - n_rcu_torture_timers); - page = torture_onoff_stats(page); - page += sprintf(page, "barrier: %ld/%ld:%ld", - n_barrier_successes, - n_barrier_attempts, - n_rcu_torture_barrier_error); - page += sprintf(page, "\n%s%s ", torture_type, TORTURE_FLAG); + + pr_alert("%s%s ", torture_type, TORTURE_FLAG); + pr_cont("rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d ", + rcu_torture_current, + rcu_torture_current_version, + list_empty(&rcu_torture_freelist), + atomic_read(&n_rcu_torture_alloc), + atomic_read(&n_rcu_torture_alloc_fail), + atomic_read(&n_rcu_torture_free)); + pr_cont("rtmbe: %d rtbke: %ld rtbre: %ld ", + atomic_read(&n_rcu_torture_mberror), + n_rcu_torture_boost_ktrerror, + n_rcu_torture_boost_rterror); + pr_cont("rtbf: %ld rtb: %ld nt: %ld ", + n_rcu_torture_boost_failure, + n_rcu_torture_boosts, + n_rcu_torture_timers); + torture_onoff_stats(); + pr_cont("barrier: %ld/%ld:%ld\n", + n_barrier_successes, + n_barrier_attempts, + n_rcu_torture_barrier_error); + + pr_alert("%s%s ", torture_type, TORTURE_FLAG); if (atomic_read(&n_rcu_torture_mberror) != 0 || n_rcu_torture_barrier_error != 0 || n_rcu_torture_boost_ktrerror != 0 || n_rcu_torture_boost_rterror != 0 || n_rcu_torture_boost_failure != 0 || i > 1) { - page += sprintf(page, "!!! "); + pr_cont("%s", "!!! "); atomic_inc(&n_rcu_torture_error); WARN_ON_ONCE(1); } - page += sprintf(page, "Reader Pipe: "); + pr_cont("Reader Pipe: "); for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) - page += sprintf(page, " %ld", pipesummary[i]); - page += sprintf(page, "\n%s%s ", torture_type, TORTURE_FLAG); - page += sprintf(page, "Reader Batch: "); + pr_cont(" %ld", pipesummary[i]); + pr_cont("\n"); + + pr_alert("%s%s ", torture_type, TORTURE_FLAG); + pr_cont("Reader Batch: "); for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) - page += sprintf(page, " %ld", batchsummary[i]); - page += sprintf(page, "\n%s%s ", torture_type, TORTURE_FLAG); - page += sprintf(page, "Free-Block Circulation: "); + pr_cont(" %ld", batchsummary[i]); + pr_cont("\n"); + + pr_alert("%s%s ", torture_type, TORTURE_FLAG); + pr_cont("Free-Block Circulation: "); for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) { - page += sprintf(page, " %d", - atomic_read(&rcu_torture_wcount[i])); + pr_cont(" %d", atomic_read(&rcu_torture_wcount[i])); } - page += sprintf(page, "\n"); + pr_cont("\n"); + if (cur_ops->stats) - cur_ops->stats(page); + cur_ops->stats(); if (rtcv_snap == rcu_torture_current_version && rcu_torture_current != NULL) { int __maybe_unused flags; @@ -1109,40 +1119,15 @@ rcu_torture_printk(char *page) rcutorture_get_gp_data(cur_ops->ttype, &flags, &gpnum, &completed); - page += sprintf(page, - "??? Writer stall state %d g%lu c%lu f%#x\n", - rcu_torture_writer_state, - gpnum, completed, flags); + pr_alert("??? Writer stall state %d g%lu c%lu f%#x\n", + rcu_torture_writer_state, + gpnum, completed, flags); show_rcu_gp_kthreads(); rcutorture_trace_dump(); } rtcv_snap = rcu_torture_current_version; } -/* - * Print torture statistics. Caller must ensure that there is only - * one call to this function at a given time!!! This is normally - * accomplished by relying on the module system to only have one copy - * of the module loaded, and then by giving the rcu_torture_stats - * kthread full control (or the init/cleanup functions when rcu_torture_stats - * thread is not running). - */ -static void -rcu_torture_stats_print(void) -{ - int size = nr_cpu_ids * 200 + 8192; - char *buf; - - buf = kmalloc(size, GFP_KERNEL); - if (!buf) { - pr_err("rcu-torture: Out of memory, need: %d", size); - return; - } - rcu_torture_printk(buf); - pr_alert("%s", buf); - kfree(buf); -} - /* * Periodically prints torture statistics, if periodic statistics printing * was specified via the stat_interval module parameter. diff --git a/kernel/torture.c b/kernel/torture.c index d600af21f022..ede8b25ec1ae 100644 --- a/kernel/torture.c +++ b/kernel/torture.c @@ -211,18 +211,16 @@ EXPORT_SYMBOL_GPL(torture_onoff_cleanup); /* * Print online/offline testing statistics. */ -char *torture_onoff_stats(char *page) +void torture_onoff_stats(void) { #ifdef CONFIG_HOTPLUG_CPU - page += sprintf(page, - "onoff: %ld/%ld:%ld/%ld %d,%d:%d,%d %lu:%lu (HZ=%d) ", - n_online_successes, n_online_attempts, - n_offline_successes, n_offline_attempts, - min_online, max_online, - min_offline, max_offline, - sum_online, sum_offline, HZ); + pr_cont("onoff: %ld/%ld:%ld/%ld %d,%d:%d,%d %lu:%lu (HZ=%d) ", + n_online_successes, n_online_attempts, + n_offline_successes, n_offline_attempts, + min_online, max_online, + min_offline, max_offline, + sum_online, sum_offline, HZ); #endif /* #ifdef CONFIG_HOTPLUG_CPU */ - return page; } EXPORT_SYMBOL_GPL(torture_onoff_stats); -- cgit v1.2.3 From 8315f42295d2667a7f942f154b73a86fd7cb2227 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 27 Jun 2014 13:42:20 -0700 Subject: rcu: Add call_rcu_tasks() This commit adds a new RCU-tasks flavor of RCU, which provides call_rcu_tasks(). This RCU flavor's quiescent states are voluntary context switch (not preemption!) and userspace execution (not the idle loop -- use some sort of schedule_on_each_cpu() if you need to handle the idle tasks. Note that unlike other RCU flavors, these quiescent states occur in tasks, not necessarily CPUs. Includes fixes from Steven Rostedt. This RCU flavor is assumed to have very infrequent latency-tolerant updaters. This assumption permits significant simplifications, including a single global callback list protected by a single global lock, along with a single task-private linked list containing all tasks that have not yet passed through a quiescent state. If experience shows this assumption to be incorrect, the required additional complexity will be added. Suggested-by: Steven Rostedt Signed-off-by: Paul E. McKenney --- include/linux/init_task.h | 9 +++ include/linux/rcupdate.h | 36 ++++++++++ include/linux/sched.h | 23 ++++--- init/Kconfig | 10 +++ kernel/rcu/tiny.c | 2 + kernel/rcu/tree.c | 2 + kernel/rcu/update.c | 171 ++++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 242 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 2bb4c4f3531a..dffd9258ee60 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -117,6 +117,14 @@ extern struct group_info init_groups; #else #define INIT_TASK_RCU_PREEMPT(tsk) #endif +#ifdef CONFIG_TASKS_RCU +#define INIT_TASK_RCU_TASKS(tsk) \ + .rcu_tasks_holdout = false, \ + .rcu_tasks_holdout_list = \ + LIST_HEAD_INIT(tsk.rcu_tasks_holdout_list), +#else +#define INIT_TASK_RCU_TASKS(tsk) +#endif extern struct cred init_cred; @@ -224,6 +232,7 @@ extern struct task_group root_task_group; INIT_FTRACE_GRAPH \ INIT_TRACE_RECURSION \ INIT_TASK_RCU_PREEMPT(tsk) \ + INIT_TASK_RCU_TASKS(tsk) \ INIT_CPUSET_SEQ(tsk) \ INIT_RT_MUTEXES(tsk) \ INIT_VTIME(tsk) \ diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index d231aa17b1d7..3432063f4c87 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -197,6 +197,26 @@ void call_rcu_sched(struct rcu_head *head, void synchronize_sched(void); +/** + * call_rcu_tasks() - Queue an RCU for invocation task-based grace period + * @head: structure to be used for queueing the RCU updates. + * @func: actual callback function to be invoked after the grace period + * + * The callback function will be invoked some time after a full grace + * period elapses, in other words after all currently executing RCU + * read-side critical sections have completed. call_rcu_tasks() assumes + * that the read-side critical sections end at a voluntary context + * switch (not a preemption!), entry into idle, or transition to usermode + * execution. As such, there are no read-side primitives analogous to + * rcu_read_lock() and rcu_read_unlock() because this primitive is intended + * to determine that all tasks have passed through a safe state, not so + * much for data-strcuture synchronization. + * + * See the description of call_rcu() for more detailed information on + * memory ordering guarantees. + */ +void call_rcu_tasks(struct rcu_head *head, void (*func)(struct rcu_head *head)); + #ifdef CONFIG_PREEMPT_RCU void __rcu_read_lock(void); @@ -294,6 +314,22 @@ static inline void rcu_user_hooks_switch(struct task_struct *prev, rcu_irq_exit(); \ } while (0) +/* + * Note a voluntary context switch for RCU-tasks benefit. This is a + * macro rather than an inline function to avoid #include hell. + */ +#ifdef CONFIG_TASKS_RCU +#define rcu_note_voluntary_context_switch(t) \ + do { \ + preempt_disable(); /* Exclude synchronize_sched(); */ \ + if (ACCESS_ONCE((t)->rcu_tasks_holdout)) \ + ACCESS_ONCE((t)->rcu_tasks_holdout) = false; \ + preempt_enable(); \ + } while (0) +#else /* #ifdef CONFIG_TASKS_RCU */ +#define rcu_note_voluntary_context_switch(t) do { } while (0) +#endif /* #else #ifdef CONFIG_TASKS_RCU */ + #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) || defined(CONFIG_SMP) bool __rcu_is_watching(void); #endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) || defined(CONFIG_SMP) */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 5c2c885ee52b..eaacac4ae77d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1270,6 +1270,11 @@ struct task_struct { #ifdef CONFIG_TREE_PREEMPT_RCU struct rcu_node *rcu_blocked_node; #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ +#ifdef CONFIG_TASKS_RCU + unsigned long rcu_tasks_nvcsw; + bool rcu_tasks_holdout; + struct list_head rcu_tasks_holdout_list; +#endif /* #ifdef CONFIG_TASKS_RCU */ #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) struct sched_info sched_info; @@ -2000,28 +2005,24 @@ extern void task_clear_jobctl_pending(struct task_struct *task, unsigned int mask); #ifdef CONFIG_PREEMPT_RCU - #define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */ #define RCU_READ_UNLOCK_NEED_QS (1 << 1) /* RCU core needs CPU response. */ +#endif /* #ifdef CONFIG_PREEMPT_RCU */ static inline void rcu_copy_process(struct task_struct *p) { +#ifdef CONFIG_PREEMPT_RCU p->rcu_read_lock_nesting = 0; p->rcu_read_unlock_special = 0; -#ifdef CONFIG_TREE_PREEMPT_RCU p->rcu_blocked_node = NULL; -#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ INIT_LIST_HEAD(&p->rcu_node_entry); +#endif /* #ifdef CONFIG_PREEMPT_RCU */ +#ifdef CONFIG_TASKS_RCU + p->rcu_tasks_holdout = false; + INIT_LIST_HEAD(&p->rcu_tasks_holdout_list); +#endif /* #ifdef CONFIG_TASKS_RCU */ } -#else - -static inline void rcu_copy_process(struct task_struct *p) -{ -} - -#endif - static inline void tsk_restore_flags(struct task_struct *task, unsigned long orig_flags, unsigned long flags) { diff --git a/init/Kconfig b/init/Kconfig index e84c6423a2e5..c4539c4e177f 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -507,6 +507,16 @@ config PREEMPT_RCU This option enables preemptible-RCU code that is common between TREE_PREEMPT_RCU and, in the old days, TINY_PREEMPT_RCU. +config TASKS_RCU + bool "Task_based RCU implementation using voluntary context switch" + default n + help + This option enables a task-based RCU implementation that uses + only voluntary context switch (not preemption!), idle, and + user-mode execution as quiescent states. + + If unsure, say N. + config RCU_STALL_COMMON def_bool ( TREE_RCU || TREE_PREEMPT_RCU || RCU_TRACE ) help diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index d9efcc13008c..717f00854fc0 100644 --- a/kernel/rcu/tiny.c +++ b/kernel/rcu/tiny.c @@ -254,6 +254,8 @@ void rcu_check_callbacks(int cpu, int user) rcu_sched_qs(cpu); else if (!in_softirq()) rcu_bh_qs(cpu); + if (user) + rcu_note_voluntary_context_switch(current); } /* diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 1b70cb6fbe3c..8ad91d1e317d 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2410,6 +2410,8 @@ void rcu_check_callbacks(int cpu, int user) rcu_preempt_check_callbacks(cpu); if (rcu_pending(cpu)) invoke_rcu_core(); + if (user) + rcu_note_voluntary_context_switch(current); trace_rcu_utilization(TPS("End scheduler-tick")); } diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index 4056d7992a6c..19b3dacb0753 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -47,6 +47,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS @@ -347,3 +348,173 @@ static int __init check_cpu_stall_init(void) early_initcall(check_cpu_stall_init); #endif /* #ifdef CONFIG_RCU_STALL_COMMON */ + +#ifdef CONFIG_TASKS_RCU + +/* + * Simple variant of RCU whose quiescent states are voluntary context switch, + * user-space execution, and idle. As such, grace periods can take one good + * long time. There are no read-side primitives similar to rcu_read_lock() + * and rcu_read_unlock() because this implementation is intended to get + * the system into a safe state for some of the manipulations involved in + * tracing and the like. Finally, this implementation does not support + * high call_rcu_tasks() rates from multiple CPUs. If this is required, + * per-CPU callback lists will be needed. + */ + +/* Global list of callbacks and associated lock. */ +static struct rcu_head *rcu_tasks_cbs_head; +static struct rcu_head **rcu_tasks_cbs_tail = &rcu_tasks_cbs_head; +static DEFINE_RAW_SPINLOCK(rcu_tasks_cbs_lock); + +/* Post an RCU-tasks callback. */ +void call_rcu_tasks(struct rcu_head *rhp, void (*func)(struct rcu_head *rhp)) +{ + unsigned long flags; + + rhp->next = NULL; + rhp->func = func; + raw_spin_lock_irqsave(&rcu_tasks_cbs_lock, flags); + *rcu_tasks_cbs_tail = rhp; + rcu_tasks_cbs_tail = &rhp->next; + raw_spin_unlock_irqrestore(&rcu_tasks_cbs_lock, flags); +} +EXPORT_SYMBOL_GPL(call_rcu_tasks); + +/* See if the current task has stopped holding out, remove from list if so. */ +static void check_holdout_task(struct task_struct *t) +{ + if (!ACCESS_ONCE(t->rcu_tasks_holdout) || + t->rcu_tasks_nvcsw != ACCESS_ONCE(t->nvcsw) || + !ACCESS_ONCE(t->on_rq)) { + ACCESS_ONCE(t->rcu_tasks_holdout) = false; + list_del_rcu(&t->rcu_tasks_holdout_list); + put_task_struct(t); + } +} + +/* RCU-tasks kthread that detects grace periods and invokes callbacks. */ +static int __noreturn rcu_tasks_kthread(void *arg) +{ + unsigned long flags; + struct task_struct *g, *t; + struct rcu_head *list; + struct rcu_head *next; + LIST_HEAD(rcu_tasks_holdouts); + + /* FIXME: Add housekeeping affinity. */ + + /* + * Each pass through the following loop makes one check for + * newly arrived callbacks, and, if there are some, waits for + * one RCU-tasks grace period and then invokes the callbacks. + * This loop is terminated by the system going down. ;-) + */ + for (;;) { + + /* Pick up any new callbacks. */ + raw_spin_lock_irqsave(&rcu_tasks_cbs_lock, flags); + list = rcu_tasks_cbs_head; + rcu_tasks_cbs_head = NULL; + rcu_tasks_cbs_tail = &rcu_tasks_cbs_head; + raw_spin_unlock_irqrestore(&rcu_tasks_cbs_lock, flags); + + /* If there were none, wait a bit and start over. */ + if (!list) { + schedule_timeout_interruptible(HZ); + WARN_ON(signal_pending(current)); + continue; + } + + /* + * Wait for all pre-existing t->on_rq and t->nvcsw + * transitions to complete. Invoking synchronize_sched() + * suffices because all these transitions occur with + * interrupts disabled. Without this synchronize_sched(), + * a read-side critical section that started before the + * grace period might be incorrectly seen as having started + * after the grace period. + * + * This synchronize_sched() also dispenses with the + * need for a memory barrier on the first store to + * ->rcu_tasks_holdout, as it forces the store to happen + * after the beginning of the grace period. + */ + synchronize_sched(); + + /* + * There were callbacks, so we need to wait for an + * RCU-tasks grace period. Start off by scanning + * the task list for tasks that are not already + * voluntarily blocked. Mark these tasks and make + * a list of them in rcu_tasks_holdouts. + */ + rcu_read_lock(); + for_each_process_thread(g, t) { + if (t != current && ACCESS_ONCE(t->on_rq) && + !is_idle_task(t)) { + get_task_struct(t); + t->rcu_tasks_nvcsw = ACCESS_ONCE(t->nvcsw); + ACCESS_ONCE(t->rcu_tasks_holdout) = true; + list_add(&t->rcu_tasks_holdout_list, + &rcu_tasks_holdouts); + } + } + rcu_read_unlock(); + + /* + * Each pass through the following loop scans the list + * of holdout tasks, removing any that are no longer + * holdouts. When the list is empty, we are done. + */ + while (!list_empty(&rcu_tasks_holdouts)) { + schedule_timeout_interruptible(HZ); + WARN_ON(signal_pending(current)); + rcu_read_lock(); + list_for_each_entry_rcu(t, &rcu_tasks_holdouts, + rcu_tasks_holdout_list) + check_holdout_task(t); + rcu_read_unlock(); + } + + /* + * Because ->on_rq and ->nvcsw are not guaranteed + * to have a full memory barriers prior to them in the + * schedule() path, memory reordering on other CPUs could + * cause their RCU-tasks read-side critical sections to + * extend past the end of the grace period. However, + * because these ->nvcsw updates are carried out with + * interrupts disabled, we can use synchronize_sched() + * to force the needed ordering on all such CPUs. + * + * This synchronize_sched() also confines all + * ->rcu_tasks_holdout accesses to be within the grace + * period, avoiding the need for memory barriers for + * ->rcu_tasks_holdout accesses. + */ + synchronize_sched(); + + /* Invoke the callbacks. */ + while (list) { + next = list->next; + local_bh_disable(); + list->func(list); + local_bh_enable(); + list = next; + cond_resched(); + } + } +} + +/* Spawn rcu_tasks_kthread() at boot time. */ +static int __init rcu_spawn_tasks_kthread(void) +{ + struct task_struct __maybe_unused *t; + + t = kthread_run(rcu_tasks_kthread, NULL, "rcu_tasks_kthread"); + BUG_ON(IS_ERR(t)); + return 0; +} +early_initcall(rcu_spawn_tasks_kthread); + +#endif /* #ifdef CONFIG_TASKS_RCU */ -- cgit v1.2.3 From bde6c3aa993066acb0d6ce32ecabe03b9d5df92d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 1 Jul 2014 11:26:57 -0700 Subject: rcu: Provide cond_resched_rcu_qs() to force quiescent states in long loops RCU-tasks requires the occasional voluntary context switch from CPU-bound in-kernel tasks. In some cases, this requires instrumenting cond_resched(). However, there is some reluctance to countenance unconditionally instrumenting cond_resched() (see http://lwn.net/Articles/603252/), so this commit creates a separate cond_resched_rcu_qs() that may be used in place of cond_resched() in locations prone to long-duration in-kernel looping. This commit currently instruments only RCU-tasks. Future possibilities include also instrumenting RCU, RCU-bh, and RCU-sched in order to reduce IPI usage. Signed-off-by: Paul E. McKenney --- fs/file.c | 2 +- include/linux/rcupdate.h | 13 +++++++++++++ kernel/rcu/rcutorture.c | 4 ++-- kernel/rcu/tree.c | 12 ++++++------ kernel/rcu/tree_plugin.h | 2 +- mm/mlock.c | 2 +- 6 files changed, 24 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/fs/file.c b/fs/file.c index 66923fe3176e..1cafc4c9275b 100644 --- a/fs/file.c +++ b/fs/file.c @@ -367,7 +367,7 @@ static struct fdtable *close_files(struct files_struct * files) struct file * file = xchg(&fdt->fd[i], NULL); if (file) { filp_close(file, files); - cond_resched(); + cond_resched_rcu_qs(); } } i++; diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 3432063f4c87..473350462d04 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -330,6 +330,19 @@ static inline void rcu_user_hooks_switch(struct task_struct *prev, #define rcu_note_voluntary_context_switch(t) do { } while (0) #endif /* #else #ifdef CONFIG_TASKS_RCU */ +/** + * cond_resched_rcu_qs - Report potential quiescent states to RCU + * + * This macro resembles cond_resched(), except that it is defined to + * report potential quiescent states to RCU-tasks even if the cond_resched() + * machinery were to be shut off, as some advocate for PREEMPT kernels. + */ +#define cond_resched_rcu_qs() \ +do { \ + rcu_note_voluntary_context_switch(current); \ + cond_resched(); \ +} while (0) + #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) || defined(CONFIG_SMP) bool __rcu_is_watching(void); #endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) || defined(CONFIG_SMP) */ diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 948a7693748e..178716713e11 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -667,7 +667,7 @@ static int rcu_torture_boost(void *arg) } call_rcu_time = jiffies; } - cond_resched(); + cond_resched_rcu_qs(); stutter_wait("rcu_torture_boost"); if (torture_must_stop()) goto checkwait; @@ -1019,7 +1019,7 @@ rcu_torture_reader(void *arg) __this_cpu_inc(rcu_torture_batch[completed]); preempt_enable(); cur_ops->readunlock(idx); - cond_resched(); + cond_resched_rcu_qs(); stutter_wait("rcu_torture_reader"); } while (!torture_must_stop()); if (irqreader && cur_ops->irq_capable) { diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 8ad91d1e317d..e23dad0661e2 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1647,7 +1647,7 @@ static int rcu_gp_init(struct rcu_state *rsp) rnp->level, rnp->grplo, rnp->grphi, rnp->qsmask); raw_spin_unlock_irq(&rnp->lock); - cond_resched(); + cond_resched_rcu_qs(); } mutex_unlock(&rsp->onoff_mutex); @@ -1736,7 +1736,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp) /* smp_mb() provided by prior unlock-lock pair. */ nocb += rcu_future_gp_cleanup(rsp, rnp); raw_spin_unlock_irq(&rnp->lock); - cond_resched(); + cond_resched_rcu_qs(); } rnp = rcu_get_root(rsp); raw_spin_lock_irq(&rnp->lock); @@ -1785,7 +1785,7 @@ static int __noreturn rcu_gp_kthread(void *arg) /* Locking provides needed memory barrier. */ if (rcu_gp_init(rsp)) break; - cond_resched(); + cond_resched_rcu_qs(); flush_signals(current); trace_rcu_grace_period(rsp->name, ACCESS_ONCE(rsp->gpnum), @@ -1828,10 +1828,10 @@ static int __noreturn rcu_gp_kthread(void *arg) trace_rcu_grace_period(rsp->name, ACCESS_ONCE(rsp->gpnum), TPS("fqsend")); - cond_resched(); + cond_resched_rcu_qs(); } else { /* Deal with stray signal. */ - cond_resched(); + cond_resched_rcu_qs(); flush_signals(current); trace_rcu_grace_period(rsp->name, ACCESS_ONCE(rsp->gpnum), @@ -2434,7 +2434,7 @@ static void force_qs_rnp(struct rcu_state *rsp, struct rcu_node *rnp; rcu_for_each_leaf_node(rsp, rnp) { - cond_resched(); + cond_resched_rcu_qs(); mask = 0; raw_spin_lock_irqsave(&rnp->lock, flags); smp_mb__after_unlock_lock(); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index a7997e272564..7672586d3920 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -1848,7 +1848,7 @@ static int rcu_oom_notify(struct notifier_block *self, get_online_cpus(); for_each_online_cpu(cpu) { smp_call_function_single(cpu, rcu_oom_notify_cpu, NULL, 1); - cond_resched(); + cond_resched_rcu_qs(); } put_online_cpus(); diff --git a/mm/mlock.c b/mm/mlock.c index ce84cb0b83ef..ab3150c26711 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -789,7 +789,7 @@ static int do_mlockall(int flags) /* Ignore errors */ mlock_fixup(vma, &prev, vma->vm_start, vma->vm_end, newflags); - cond_resched(); + cond_resched_rcu_qs(); } out: return 0; -- cgit v1.2.3 From 53c6d4edf874d3cbc031a53738c6cba9277faea5 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 1 Jul 2014 12:22:23 -0700 Subject: rcu: Add synchronous grace-period waiting for RCU-tasks It turns out to be easier to add the synchronous grace-period waiting functions to RCU-tasks than to work around their absense in rcutorture, so this commit adds them. The key point is that the existence of call_rcu_tasks() means that rcutorture needs an rcu_barrier_tasks(). Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 2 ++ kernel/rcu/update.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 473350462d04..640152fedcde 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -216,6 +216,8 @@ void synchronize_sched(void); * memory ordering guarantees. */ void call_rcu_tasks(struct rcu_head *head, void (*func)(struct rcu_head *head)); +void synchronize_rcu_tasks(void); +void rcu_barrier_tasks(void); #ifdef CONFIG_PREEMPT_RCU diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index 19b3dacb0753..5fd1ddbfcc55 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -381,6 +381,61 @@ void call_rcu_tasks(struct rcu_head *rhp, void (*func)(struct rcu_head *rhp)) } EXPORT_SYMBOL_GPL(call_rcu_tasks); +/** + * synchronize_rcu_tasks - wait until an rcu-tasks grace period has elapsed. + * + * Control will return to the caller some time after a full rcu-tasks + * grace period has elapsed, in other words after all currently + * executing rcu-tasks read-side critical sections have elapsed. These + * read-side critical sections are delimited by calls to schedule(), + * cond_resched_rcu_qs(), idle execution, userspace execution, calls + * to synchronize_rcu_tasks(), and (in theory, anyway) cond_resched(). + * + * This is a very specialized primitive, intended only for a few uses in + * tracing and other situations requiring manipulation of function + * preambles and profiling hooks. The synchronize_rcu_tasks() function + * is not (yet) intended for heavy use from multiple CPUs. + * + * Note that this guarantee implies further memory-ordering guarantees. + * On systems with more than one CPU, when synchronize_rcu_tasks() returns, + * each CPU is guaranteed to have executed a full memory barrier since the + * end of its last RCU-tasks read-side critical section whose beginning + * preceded the call to synchronize_rcu_tasks(). In addition, each CPU + * having an RCU-tasks read-side critical section that extends beyond + * the return from synchronize_rcu_tasks() is guaranteed to have executed + * a full memory barrier after the beginning of synchronize_rcu_tasks() + * and before the beginning of that RCU-tasks read-side critical section. + * Note that these guarantees include CPUs that are offline, idle, or + * executing in user mode, as well as CPUs that are executing in the kernel. + * + * Furthermore, if CPU A invoked synchronize_rcu_tasks(), which returned + * to its caller on CPU B, then both CPU A and CPU B are guaranteed + * to have executed a full memory barrier during the execution of + * synchronize_rcu_tasks() -- even if CPU A and CPU B are the same CPU + * (but again only if the system has more than one CPU). + */ +void synchronize_rcu_tasks(void) +{ + /* Complain if the scheduler has not started. */ + rcu_lockdep_assert(!rcu_scheduler_active, + "synchronize_rcu_tasks called too soon"); + + /* Wait for the grace period. */ + wait_rcu_gp(call_rcu_tasks); +} + +/** + * rcu_barrier_tasks - Wait for in-flight call_rcu_tasks() callbacks. + * + * Although the current implementation is guaranteed to wait, it is not + * obligated to, for example, if there are no pending callbacks. + */ +void rcu_barrier_tasks(void) +{ + /* There is only one callback queue, so this is easy. ;-) */ + synchronize_rcu_tasks(); +} + /* See if the current task has stopped holding out, remove from list if so. */ static void check_holdout_task(struct task_struct *t) { -- cgit v1.2.3 From 3f95aa81d265223fdb13ea2b59883766a05adbdf Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 4 Aug 2014 06:10:23 -0700 Subject: rcu: Make TASKS_RCU handle tasks that are almost done exiting Once a task has passed exit_notify() in the do_exit() code path, it is no longer on the task lists, and is therefore no longer visible to rcu_tasks_kthread(). This means that an almost-exited task might be preempted while within a trampoline, and this task won't be waited on by rcu_tasks_kthread(). This commit fixes this bug by adding an srcu_struct. An exiting task does srcu_read_lock() just before calling exit_notify(), and does the corresponding srcu_read_unlock() after doing the final preempt_disable(). This means that rcu_tasks_kthread() can do synchronize_srcu() to wait for all mostly-exited tasks to reach their final preempt_disable() region, and then use synchronize_sched() to wait for those tasks to finish exiting. Reported-by: Oleg Nesterov Suggested-by: Lai Jiangshan Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 3 +++ kernel/exit.c | 3 +++ kernel/rcu/update.c | 21 +++++++++++++++++++++ 3 files changed, 27 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 640152fedcde..54b2ebb20313 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -321,6 +321,8 @@ static inline void rcu_user_hooks_switch(struct task_struct *prev, * macro rather than an inline function to avoid #include hell. */ #ifdef CONFIG_TASKS_RCU +#define TASKS_RCU(x) x +extern struct srcu_struct tasks_rcu_exit_srcu; #define rcu_note_voluntary_context_switch(t) \ do { \ preempt_disable(); /* Exclude synchronize_sched(); */ \ @@ -329,6 +331,7 @@ static inline void rcu_user_hooks_switch(struct task_struct *prev, preempt_enable(); \ } while (0) #else /* #ifdef CONFIG_TASKS_RCU */ +#define TASKS_RCU(x) do { } while (0) #define rcu_note_voluntary_context_switch(t) do { } while (0) #endif /* #else #ifdef CONFIG_TASKS_RCU */ diff --git a/kernel/exit.c b/kernel/exit.c index 32c58f7433a3..d13f2eec4bb8 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -667,6 +667,7 @@ void do_exit(long code) { struct task_struct *tsk = current; int group_dead; + TASKS_RCU(int tasks_rcu_i); profile_task_exit(tsk); @@ -775,6 +776,7 @@ void do_exit(long code) */ flush_ptrace_hw_breakpoint(tsk); + TASKS_RCU(tasks_rcu_i = __srcu_read_lock(&tasks_rcu_exit_srcu)); exit_notify(tsk, group_dead); proc_exit_connector(tsk); #ifdef CONFIG_NUMA @@ -814,6 +816,7 @@ void do_exit(long code) if (tsk->nr_dirtied) __this_cpu_add(dirty_throttle_leaks, tsk->nr_dirtied); exit_rcu(); + TASKS_RCU(__srcu_read_unlock(&tasks_rcu_exit_srcu, tasks_rcu_i)); /* * The setting of TASK_RUNNING by try_to_wake_up() may be delayed diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index 5fd1ddbfcc55..403fc4ae539e 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -367,6 +367,13 @@ static struct rcu_head *rcu_tasks_cbs_head; static struct rcu_head **rcu_tasks_cbs_tail = &rcu_tasks_cbs_head; static DEFINE_RAW_SPINLOCK(rcu_tasks_cbs_lock); +/* Track exiting tasks in order to allow them to be waited for. */ +DEFINE_SRCU(tasks_rcu_exit_srcu); + +/* Control stall timeouts. Disable with <= 0, otherwise jiffies till stall. */ +static int rcu_task_stall_timeout __read_mostly = HZ * 60 * 3; +module_param(rcu_task_stall_timeout, int, 0644); + /* Post an RCU-tasks callback. */ void call_rcu_tasks(struct rcu_head *rhp, void (*func)(struct rcu_head *rhp)) { @@ -517,6 +524,15 @@ static int __noreturn rcu_tasks_kthread(void *arg) } rcu_read_unlock(); + /* + * Wait for tasks that are in the process of exiting. + * This does only part of the job, ensuring that all + * tasks that were previously exiting reach the point + * where they have disabled preemption, allowing the + * later synchronize_sched() to finish the job. + */ + synchronize_srcu(&tasks_rcu_exit_srcu); + /* * Each pass through the following loop scans the list * of holdout tasks, removing any that are no longer @@ -546,6 +562,11 @@ static int __noreturn rcu_tasks_kthread(void *arg) * ->rcu_tasks_holdout accesses to be within the grace * period, avoiding the need for memory barriers for * ->rcu_tasks_holdout accesses. + * + * In addition, this synchronize_sched() waits for exiting + * tasks to complete their final preempt_disable() region + * of execution, cleaning up after the synchronize_srcu() + * above. */ synchronize_sched(); -- cgit v1.2.3 From 69c604557ce34015629b325b85ff1a4996038a3b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 1 Jul 2014 11:59:36 -0700 Subject: rcutorture: Add torture tests for RCU-tasks This commit adds torture tests for RCU-tasks. It also fixes a bug that would segfault for an RCU flavor lacking a callback-barrier function. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/rcupdate.h | 1 + kernel/rcu/rcutorture.c | 50 +++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 50 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 54b2ebb20313..a3123f53a4ce 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -55,6 +55,7 @@ enum rcutorture_type { RCU_FLAVOR, RCU_BH_FLAVOR, RCU_SCHED_FLAVOR, + RCU_TASKS_FLAVOR, SRCU_FLAVOR, INVALID_RCU_FLAVOR }; diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 178716713e11..75b1abf78c48 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -601,6 +601,52 @@ static struct rcu_torture_ops sched_ops = { .name = "sched" }; +#ifdef CONFIG_TASKS_RCU + +/* + * Definitions for RCU-tasks torture testing. + */ + +static int tasks_torture_read_lock(void) +{ + return 0; +} + +static void tasks_torture_read_unlock(int idx) +{ +} + +static void rcu_tasks_torture_deferred_free(struct rcu_torture *p) +{ + call_rcu_tasks(&p->rtort_rcu, rcu_torture_cb); +} + +static struct rcu_torture_ops tasks_ops = { + .ttype = RCU_TASKS_FLAVOR, + .init = rcu_sync_torture_init, + .readlock = tasks_torture_read_lock, + .read_delay = rcu_read_delay, /* just reuse rcu's version. */ + .readunlock = tasks_torture_read_unlock, + .completed = rcu_no_completed, + .deferred_free = rcu_tasks_torture_deferred_free, + .sync = synchronize_rcu_tasks, + .exp_sync = synchronize_rcu_tasks, + .call = call_rcu_tasks, + .cb_barrier = rcu_barrier_tasks, + .fqs = NULL, + .stats = NULL, + .irq_capable = 1, + .name = "tasks" +}; + +#define RCUTORTURE_TASKS_OPS &tasks_ops, + +#else /* #ifdef CONFIG_TASKS_RCU */ + +#define RCUTORTURE_TASKS_OPS + +#endif /* #else #ifdef CONFIG_TASKS_RCU */ + /* * RCU torture priority-boost testing. Runs one real-time thread per * CPU for moderate bursts, repeatedly registering RCU callbacks and @@ -1295,7 +1341,8 @@ static int rcu_torture_barrier_cbs(void *arg) if (atomic_dec_and_test(&barrier_cbs_count)) wake_up(&barrier_wq); } while (!torture_must_stop()); - cur_ops->cb_barrier(); + if (cur_ops->cb_barrier != NULL) + cur_ops->cb_barrier(); destroy_rcu_head_on_stack(&rcu); torture_kthread_stopping("rcu_torture_barrier_cbs"); return 0; @@ -1534,6 +1581,7 @@ rcu_torture_init(void) int firsterr = 0; static struct rcu_torture_ops *torture_ops[] = { &rcu_ops, &rcu_bh_ops, &rcu_busted_ops, &srcu_ops, &sched_ops, + RCUTORTURE_TASKS_OPS }; if (!torture_init_begin(torture_type, verbose, &rcutorture_runnable)) -- cgit v1.2.3 From 176f8f7a52cc6d09d686f0d900abda6942a52fbb Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 4 Aug 2014 17:43:50 -0700 Subject: rcu: Make TASKS_RCU handle nohz_full= CPUs Currently TASKS_RCU would ignore a CPU running a task in nohz_full= usermode execution. There would be neither a context switch nor a scheduling-clock interrupt to tell TASKS_RCU that the task in question had passed through a quiescent state. The grace period would therefore extend indefinitely. This commit therefore makes RCU's dyntick-idle subsystem record the task_struct structure of the task that is running in dyntick-idle mode on each CPU. The TASKS_RCU grace period can then access this information and record a quiescent state on behalf of any CPU running in dyntick-idle usermode. Signed-off-by: Paul E. McKenney --- include/linux/init_task.h | 3 ++- include/linux/sched.h | 2 ++ kernel/rcu/tree.c | 2 ++ kernel/rcu/tree.h | 2 ++ kernel/rcu/tree_plugin.h | 16 ++++++++++++++++ kernel/rcu/update.c | 4 +++- 6 files changed, 27 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index dffd9258ee60..03b274873b06 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -121,7 +121,8 @@ extern struct group_info init_groups; #define INIT_TASK_RCU_TASKS(tsk) \ .rcu_tasks_holdout = false, \ .rcu_tasks_holdout_list = \ - LIST_HEAD_INIT(tsk.rcu_tasks_holdout_list), + LIST_HEAD_INIT(tsk.rcu_tasks_holdout_list), \ + .rcu_tasks_idle_cpu = -1, #else #define INIT_TASK_RCU_TASKS(tsk) #endif diff --git a/include/linux/sched.h b/include/linux/sched.h index eaacac4ae77d..ec8b34722bcc 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1274,6 +1274,7 @@ struct task_struct { unsigned long rcu_tasks_nvcsw; bool rcu_tasks_holdout; struct list_head rcu_tasks_holdout_list; + int rcu_tasks_idle_cpu; #endif /* #ifdef CONFIG_TASKS_RCU */ #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) @@ -2020,6 +2021,7 @@ static inline void rcu_copy_process(struct task_struct *p) #ifdef CONFIG_TASKS_RCU p->rcu_tasks_holdout = false; INIT_LIST_HEAD(&p->rcu_tasks_holdout_list); + p->rcu_tasks_idle_cpu = -1; #endif /* #ifdef CONFIG_TASKS_RCU */ } diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index e23dad0661e2..c880f5387b1f 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -526,6 +526,7 @@ static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval, atomic_inc(&rdtp->dynticks); smp_mb__after_atomic(); /* Force ordering with next sojourn. */ WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); + rcu_dynticks_task_enter(); /* * It is illegal to enter an extended quiescent state while @@ -642,6 +643,7 @@ void rcu_irq_exit(void) static void rcu_eqs_exit_common(struct rcu_dynticks *rdtp, long long oldval, int user) { + rcu_dynticks_task_exit(); smp_mb__before_atomic(); /* Force ordering w/previous sojourn. */ atomic_inc(&rdtp->dynticks); /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 6a86eb7bac45..3a92000c354f 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -605,6 +605,8 @@ static void rcu_sysidle_report_gp(struct rcu_state *rsp, int isidle, static void rcu_bind_gp_kthread(void); static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp); static bool rcu_nohz_full_cpu(struct rcu_state *rsp); +static void rcu_dynticks_task_enter(void); +static void rcu_dynticks_task_exit(void); #endif /* #ifndef RCU_TREE_NONCORE */ diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 7672586d3920..e466b40052a7 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -3036,3 +3036,19 @@ static void rcu_bind_gp_kthread(void) housekeeping_affine(current); #endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ } + +/* Record the current task on dyntick-idle entry. */ +static void rcu_dynticks_task_enter(void) +{ +#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) + ACCESS_ONCE(current->rcu_tasks_idle_cpu) = smp_processor_id(); +#endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */ +} + +/* Record no current task on dyntick-idle exit. */ +static void rcu_dynticks_task_exit(void) +{ +#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) + ACCESS_ONCE(current->rcu_tasks_idle_cpu) = -1; +#endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */ +} diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index e1d71741958f..2658de4a5975 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -463,7 +463,9 @@ static void check_holdout_task(struct task_struct *t, { if (!ACCESS_ONCE(t->rcu_tasks_holdout) || t->rcu_tasks_nvcsw != ACCESS_ONCE(t->nvcsw) || - !ACCESS_ONCE(t->on_rq)) { + !ACCESS_ONCE(t->on_rq) || + (IS_ENABLED(CONFIG_NO_HZ_FULL) && + !is_idle_task(t) && t->rcu_tasks_idle_cpu >= 0)) { ACCESS_ONCE(t->rcu_tasks_holdout) = false; list_del_rcu(&t->rcu_tasks_holdout_list); put_task_struct(t); -- cgit v1.2.3 From 01a81330344b09028881c953a51d1106a9e63518 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 5 Aug 2014 05:23:35 -0700 Subject: rcu: Remove redundant preempt_disable() from rcu_note_voluntary_context_switch() In theory, synchronize_sched() requires a read-side critical section to order against. In practice, preemption can be thought of as being disabled across every machine instruction, at least for those machine instructions that are not in the idle loop and not on offline CPUs. So this commit removes the redundant preempt_disable() from rcu_note_voluntary_context_switch(). Please note that the single instruction in question is the store of zero to ->rcu_tasks_holdout. The "if" is simply a performance optimization that avoids unnecessary stores. To see this, keep in mind that both the "if" condition and the store are in a quiescent state. Therefore, even if the task is preempted for a full grace period (presumably due to its having done a context switch beforehand), the store will be recording a legitimate quiescent state. Reported-by: Lai Jiangshan Signed-off-by: Paul E. McKenney Conflicts: include/linux/rcupdate.h --- include/linux/rcupdate.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index a3123f53a4ce..132e1e34cdca 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -326,10 +326,8 @@ static inline void rcu_user_hooks_switch(struct task_struct *prev, extern struct srcu_struct tasks_rcu_exit_srcu; #define rcu_note_voluntary_context_switch(t) \ do { \ - preempt_disable(); /* Exclude synchronize_sched(); */ \ if (ACCESS_ONCE((t)->rcu_tasks_holdout)) \ ACCESS_ONCE((t)->rcu_tasks_holdout) = false; \ - preempt_enable(); \ } while (0) #else /* #ifdef CONFIG_TASKS_RCU */ #define TASKS_RCU(x) do { } while (0) -- cgit v1.2.3 From 1d082fd061884a587c490c4fc8a2056ce1e47624 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 14 Aug 2014 16:01:53 -0700 Subject: rcu: Remove local_irq_disable() in rcu_preempt_note_context_switch() The rcu_preempt_note_context_switch() function is on a scheduling fast path, so it would be good to avoid disabling irqs. The reason that irqs are disabled is to synchronize process-level and irq-handler access to the task_struct ->rcu_read_unlock_special bitmask. This commit therefore makes ->rcu_read_unlock_special instead be a union of bools with a short allowing single-access checks in RCU's __rcu_read_unlock(). This results in the process-level and irq-handler accesses being simple loads and stores, so that irqs need no longer be disabled. This commit therefore removes the irq disabling from rcu_preempt_note_context_switch(). Reported-by: Peter Zijlstra Signed-off-by: Paul E. McKenney --- include/linux/init_task.h | 2 +- include/linux/sched.h | 16 +++++++++------- kernel/rcu/tree_plugin.h | 32 +++++++++++++++----------------- kernel/rcu/update.c | 2 +- 4 files changed, 26 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 03b274873b06..77fc43f8fb72 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -111,7 +111,7 @@ extern struct group_info init_groups; #ifdef CONFIG_PREEMPT_RCU #define INIT_TASK_RCU_PREEMPT(tsk) \ .rcu_read_lock_nesting = 0, \ - .rcu_read_unlock_special = 0, \ + .rcu_read_unlock_special.s = 0, \ .rcu_node_entry = LIST_HEAD_INIT(tsk.rcu_node_entry), \ INIT_TASK_RCU_TREE_PREEMPT() #else diff --git a/include/linux/sched.h b/include/linux/sched.h index ec8b34722bcc..42888d715fb1 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1212,6 +1212,13 @@ struct sched_dl_entity { struct hrtimer dl_timer; }; +union rcu_special { + struct { + bool blocked; + bool need_qs; + } b; + short s; +}; struct rcu_node; enum perf_event_task_context { @@ -1264,7 +1271,7 @@ struct task_struct { #ifdef CONFIG_PREEMPT_RCU int rcu_read_lock_nesting; - char rcu_read_unlock_special; + union rcu_special rcu_read_unlock_special; struct list_head rcu_node_entry; #endif /* #ifdef CONFIG_PREEMPT_RCU */ #ifdef CONFIG_TREE_PREEMPT_RCU @@ -2005,16 +2012,11 @@ extern void task_clear_jobctl_trapping(struct task_struct *task); extern void task_clear_jobctl_pending(struct task_struct *task, unsigned int mask); -#ifdef CONFIG_PREEMPT_RCU -#define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */ -#define RCU_READ_UNLOCK_NEED_QS (1 << 1) /* RCU core needs CPU response. */ -#endif /* #ifdef CONFIG_PREEMPT_RCU */ - static inline void rcu_copy_process(struct task_struct *p) { #ifdef CONFIG_PREEMPT_RCU p->rcu_read_lock_nesting = 0; - p->rcu_read_unlock_special = 0; + p->rcu_read_unlock_special.s = 0; p->rcu_blocked_node = NULL; INIT_LIST_HEAD(&p->rcu_node_entry); #endif /* #ifdef CONFIG_PREEMPT_RCU */ diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index e466b40052a7..0981c0cd70fe 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -155,9 +155,8 @@ EXPORT_SYMBOL_GPL(rcu_batches_completed); * not in a quiescent state. There might be any number of tasks blocked * while in an RCU read-side critical section. * - * Unlike the other rcu_*_qs() functions, callers to this function - * must disable irqs in order to protect the assignment to - * ->rcu_read_unlock_special. + * As with the other rcu_*_qs() functions, callers to this function + * must disable preemption. */ static void rcu_preempt_qs(int cpu) { @@ -166,7 +165,7 @@ static void rcu_preempt_qs(int cpu) if (rdp->passed_quiesce == 0) trace_rcu_grace_period(TPS("rcu_preempt"), rdp->gpnum, TPS("cpuqs")); rdp->passed_quiesce = 1; - current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; + current->rcu_read_unlock_special.b.need_qs = false; } /* @@ -190,14 +189,14 @@ static void rcu_preempt_note_context_switch(int cpu) struct rcu_node *rnp; if (t->rcu_read_lock_nesting > 0 && - (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) { + !t->rcu_read_unlock_special.b.blocked) { /* Possibly blocking in an RCU read-side critical section. */ rdp = per_cpu_ptr(rcu_preempt_state.rda, cpu); rnp = rdp->mynode; raw_spin_lock_irqsave(&rnp->lock, flags); smp_mb__after_unlock_lock(); - t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; + t->rcu_read_unlock_special.b.blocked = true; t->rcu_blocked_node = rnp; /* @@ -239,7 +238,7 @@ static void rcu_preempt_note_context_switch(int cpu) : rnp->gpnum + 1); raw_spin_unlock_irqrestore(&rnp->lock, flags); } else if (t->rcu_read_lock_nesting < 0 && - t->rcu_read_unlock_special) { + t->rcu_read_unlock_special.s) { /* * Complete exit from RCU read-side critical section on @@ -257,9 +256,7 @@ static void rcu_preempt_note_context_switch(int cpu) * grace period, then the fact that the task has been enqueued * means that we continue to block the current grace period. */ - local_irq_save(flags); rcu_preempt_qs(cpu); - local_irq_restore(flags); } /* @@ -340,7 +337,7 @@ void rcu_read_unlock_special(struct task_struct *t) bool drop_boost_mutex = false; #endif /* #ifdef CONFIG_RCU_BOOST */ struct rcu_node *rnp; - int special; + union rcu_special special; /* NMI handlers cannot block and cannot safely manipulate state. */ if (in_nmi()) @@ -350,12 +347,13 @@ void rcu_read_unlock_special(struct task_struct *t) /* * If RCU core is waiting for this CPU to exit critical section, - * let it know that we have done so. + * let it know that we have done so. Because irqs are disabled, + * t->rcu_read_unlock_special cannot change. */ special = t->rcu_read_unlock_special; - if (special & RCU_READ_UNLOCK_NEED_QS) { + if (special.b.need_qs) { rcu_preempt_qs(smp_processor_id()); - if (!t->rcu_read_unlock_special) { + if (!t->rcu_read_unlock_special.s) { local_irq_restore(flags); return; } @@ -368,8 +366,8 @@ void rcu_read_unlock_special(struct task_struct *t) } /* Clean up if blocked during RCU read-side critical section. */ - if (special & RCU_READ_UNLOCK_BLOCKED) { - t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED; + if (special.b.blocked) { + t->rcu_read_unlock_special.b.blocked = false; /* * Remove this task from the list it blocked on. The @@ -658,7 +656,7 @@ static void rcu_preempt_check_callbacks(int cpu) } if (t->rcu_read_lock_nesting > 0 && per_cpu(rcu_preempt_data, cpu).qs_pending) - t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS; + t->rcu_read_unlock_special.b.need_qs = true; } #ifdef CONFIG_RCU_BOOST @@ -941,7 +939,7 @@ void exit_rcu(void) return; t->rcu_read_lock_nesting = 1; barrier(); - t->rcu_read_unlock_special = RCU_READ_UNLOCK_BLOCKED; + t->rcu_read_unlock_special.b.blocked = true; __rcu_read_unlock(); } diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index 9487b4898e51..6fb911558562 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -93,7 +93,7 @@ void __rcu_read_unlock(void) barrier(); /* critical section before exit code. */ t->rcu_read_lock_nesting = INT_MIN; barrier(); /* assign before ->rcu_read_unlock_special load */ - if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) + if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special.s))) rcu_read_unlock_special(t); barrier(); /* ->rcu_read_unlock_special load before assign */ t->rcu_read_lock_nesting = 0; -- cgit v1.2.3 From 284a8c93af47306beed967a303d84730b32bab39 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 14 Aug 2014 16:38:46 -0700 Subject: rcu: Per-CPU operation cleanups to rcu_*_qs() functions The rcu_bh_qs(), rcu_preempt_qs(), and rcu_sched_qs() functions use old-style per-CPU variable access and write to ->passed_quiesce even if it is already set. This commit therefore updates to use the new-style per-CPU variable access functions and avoids the spurious writes. This commit also eliminates the "cpu" argument to these functions because they are always invoked on the indicated CPU. Reported-by: Peter Zijlstra Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 4 ++-- include/linux/rcutiny.h | 2 +- kernel/rcu/tiny.c | 10 +++++----- kernel/rcu/tree.c | 34 ++++++++++++++++++---------------- kernel/rcu/tree_plugin.h | 27 +++++++++++++++------------ kernel/softirq.c | 2 +- 6 files changed, 42 insertions(+), 37 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 132e1e34cdca..2fab0e37afe0 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -261,8 +261,8 @@ static inline int rcu_preempt_depth(void) /* Internal to kernel */ void rcu_init(void); -void rcu_sched_qs(int cpu); -void rcu_bh_qs(int cpu); +void rcu_sched_qs(void); +void rcu_bh_qs(void); void rcu_check_callbacks(int cpu, int user); struct notifier_block; void rcu_idle_enter(void); diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index d40a6a451330..38cc5b1e252d 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -80,7 +80,7 @@ static inline void kfree_call_rcu(struct rcu_head *head, static inline void rcu_note_context_switch(int cpu) { - rcu_sched_qs(cpu); + rcu_sched_qs(); } /* diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index 717f00854fc0..61b8d2ccc2cb 100644 --- a/kernel/rcu/tiny.c +++ b/kernel/rcu/tiny.c @@ -72,7 +72,7 @@ static void rcu_idle_enter_common(long long newval) current->pid, current->comm, idle->pid, idle->comm); /* must be idle task! */ } - rcu_sched_qs(0); /* implies rcu_bh_qsctr_inc(0) */ + rcu_sched_qs(); /* implies rcu_bh_inc() */ barrier(); rcu_dynticks_nesting = newval; } @@ -217,7 +217,7 @@ static int rcu_qsctr_help(struct rcu_ctrlblk *rcp) * are at it, given that any rcu quiescent state is also an rcu_bh * quiescent state. Use "+" instead of "||" to defeat short circuiting. */ -void rcu_sched_qs(int cpu) +void rcu_sched_qs(void) { unsigned long flags; @@ -231,7 +231,7 @@ void rcu_sched_qs(int cpu) /* * Record an rcu_bh quiescent state. */ -void rcu_bh_qs(int cpu) +void rcu_bh_qs(void) { unsigned long flags; @@ -251,9 +251,9 @@ void rcu_check_callbacks(int cpu, int user) { RCU_TRACE(check_cpu_stalls()); if (user || rcu_is_cpu_rrupt_from_idle()) - rcu_sched_qs(cpu); + rcu_sched_qs(); else if (!in_softirq()) - rcu_bh_qs(cpu); + rcu_bh_qs(); if (user) rcu_note_voluntary_context_switch(current); } diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index c880f5387b1f..4c340625ffd4 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -188,22 +188,24 @@ static int rcu_gp_in_progress(struct rcu_state *rsp) * one since the start of the grace period, this just sets a flag. * The caller must have disabled preemption. */ -void rcu_sched_qs(int cpu) +void rcu_sched_qs(void) { - struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu); - - if (rdp->passed_quiesce == 0) - trace_rcu_grace_period(TPS("rcu_sched"), rdp->gpnum, TPS("cpuqs")); - rdp->passed_quiesce = 1; + if (!__this_cpu_read(rcu_sched_data.passed_quiesce)) { + trace_rcu_grace_period(TPS("rcu_sched"), + __this_cpu_read(rcu_sched_data.gpnum), + TPS("cpuqs")); + __this_cpu_write(rcu_sched_data.passed_quiesce, 1); + } } -void rcu_bh_qs(int cpu) +void rcu_bh_qs(void) { - struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu); - - if (rdp->passed_quiesce == 0) - trace_rcu_grace_period(TPS("rcu_bh"), rdp->gpnum, TPS("cpuqs")); - rdp->passed_quiesce = 1; + if (!__this_cpu_read(rcu_bh_data.passed_quiesce)) { + trace_rcu_grace_period(TPS("rcu_bh"), + __this_cpu_read(rcu_bh_data.gpnum), + TPS("cpuqs")); + __this_cpu_write(rcu_bh_data.passed_quiesce, 1); + } } static DEFINE_PER_CPU(int, rcu_sched_qs_mask); @@ -278,7 +280,7 @@ static void rcu_momentary_dyntick_idle(void) void rcu_note_context_switch(int cpu) { trace_rcu_utilization(TPS("Start context switch")); - rcu_sched_qs(cpu); + rcu_sched_qs(); rcu_preempt_note_context_switch(cpu); if (unlikely(raw_cpu_read(rcu_sched_qs_mask))) rcu_momentary_dyntick_idle(); @@ -2395,8 +2397,8 @@ void rcu_check_callbacks(int cpu, int user) * at least not while the corresponding CPU is online. */ - rcu_sched_qs(cpu); - rcu_bh_qs(cpu); + rcu_sched_qs(); + rcu_bh_qs(); } else if (!in_softirq()) { @@ -2407,7 +2409,7 @@ void rcu_check_callbacks(int cpu, int user) * critical section, so note it. */ - rcu_bh_qs(cpu); + rcu_bh_qs(); } rcu_preempt_check_callbacks(cpu); if (rcu_pending(cpu)) diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 0981c0cd70fe..25e692a36280 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -158,14 +158,16 @@ EXPORT_SYMBOL_GPL(rcu_batches_completed); * As with the other rcu_*_qs() functions, callers to this function * must disable preemption. */ -static void rcu_preempt_qs(int cpu) -{ - struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); - - if (rdp->passed_quiesce == 0) - trace_rcu_grace_period(TPS("rcu_preempt"), rdp->gpnum, TPS("cpuqs")); - rdp->passed_quiesce = 1; - current->rcu_read_unlock_special.b.need_qs = false; +static void rcu_preempt_qs(void) +{ + if (!__this_cpu_read(rcu_preempt_data.passed_quiesce)) { + trace_rcu_grace_period(TPS("rcu_preempt"), + __this_cpu_read(rcu_preempt_data.gpnum), + TPS("cpuqs")); + __this_cpu_write(rcu_preempt_data.passed_quiesce, 1); + barrier(); /* Coordinate with rcu_preempt_check_callbacks(). */ + current->rcu_read_unlock_special.b.need_qs = false; + } } /* @@ -256,7 +258,7 @@ static void rcu_preempt_note_context_switch(int cpu) * grace period, then the fact that the task has been enqueued * means that we continue to block the current grace period. */ - rcu_preempt_qs(cpu); + rcu_preempt_qs(); } /* @@ -352,7 +354,7 @@ void rcu_read_unlock_special(struct task_struct *t) */ special = t->rcu_read_unlock_special; if (special.b.need_qs) { - rcu_preempt_qs(smp_processor_id()); + rcu_preempt_qs(); if (!t->rcu_read_unlock_special.s) { local_irq_restore(flags); return; @@ -651,11 +653,12 @@ static void rcu_preempt_check_callbacks(int cpu) struct task_struct *t = current; if (t->rcu_read_lock_nesting == 0) { - rcu_preempt_qs(cpu); + rcu_preempt_qs(); return; } if (t->rcu_read_lock_nesting > 0 && - per_cpu(rcu_preempt_data, cpu).qs_pending) + per_cpu(rcu_preempt_data, cpu).qs_pending && + !per_cpu(rcu_preempt_data, cpu).passed_quiesce) t->rcu_read_unlock_special.b.need_qs = true; } diff --git a/kernel/softirq.c b/kernel/softirq.c index 5918d227730f..348ec763b104 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -278,7 +278,7 @@ restart: pending >>= softirq_bit; } - rcu_bh_qs(smp_processor_id()); + rcu_bh_qs(); local_irq_disable(); pending = local_softirq_pending(); -- cgit v1.2.3 From 908c7f1949cb7cc6e92ba8f18f2998e87e265b8e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 8 Sep 2014 09:51:29 +0900 Subject: percpu_counter: add @gfp to percpu_counter_init() Percpu allocator now supports allocation mask. Add @gfp to percpu_counter_init() so that !GFP_KERNEL allocation masks can be used with percpu_counters too. We could have left percpu_counter_init() alone and added percpu_counter_init_gfp(); however, the number of users isn't that high and introducing _gfp variants to all percpu data structures would be quite ugly, so let's just do the conversion. This is the one with the most users. Other percpu data structures are a lot easier to convert. This patch doesn't make any functional difference. Signed-off-by: Tejun Heo Acked-by: Jan Kara Acked-by: "David S. Miller" Cc: x86@kernel.org Cc: Jens Axboe Cc: "Theodore Ts'o" Cc: Alexander Viro Cc: Andrew Morton --- arch/x86/kvm/mmu.c | 2 +- fs/btrfs/disk-io.c | 8 ++++---- fs/btrfs/extent-tree.c | 2 +- fs/ext2/super.c | 6 +++--- fs/ext3/super.c | 6 +++--- fs/ext4/super.c | 14 +++++++++----- fs/file_table.c | 2 +- fs/quota/dquot.c | 2 +- fs/super.c | 3 ++- include/linux/percpu_counter.h | 10 ++++++---- include/net/dst_ops.h | 2 +- include/net/inet_frag.h | 2 +- lib/flex_proportions.c | 4 ++-- lib/percpu_counter.c | 4 ++-- lib/proportions.c | 6 +++--- mm/backing-dev.c | 2 +- mm/mmap.c | 2 +- mm/nommu.c | 2 +- mm/shmem.c | 2 +- net/dccp/proto.c | 2 +- net/ipv4/tcp.c | 4 ++-- net/ipv4/tcp_memcontrol.c | 2 +- net/sctp/protocol.c | 2 +- 23 files changed, 49 insertions(+), 42 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 931467881da7..5bd53f206f4f 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -4534,7 +4534,7 @@ int kvm_mmu_module_init(void) if (!mmu_page_header_cache) goto nomem; - if (percpu_counter_init(&kvm_total_used_mmu_pages, 0)) + if (percpu_counter_init(&kvm_total_used_mmu_pages, 0, GFP_KERNEL)) goto nomem; register_shrinker(&mmu_shrinker); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 08e65e9cf2aa..61dae01788d7 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1180,7 +1180,7 @@ static struct btrfs_subvolume_writers *btrfs_alloc_subvolume_writers(void) if (!writers) return ERR_PTR(-ENOMEM); - ret = percpu_counter_init(&writers->counter, 0); + ret = percpu_counter_init(&writers->counter, 0, GFP_KERNEL); if (ret < 0) { kfree(writers); return ERR_PTR(ret); @@ -2185,7 +2185,7 @@ int open_ctree(struct super_block *sb, goto fail_srcu; } - ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0); + ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0, GFP_KERNEL); if (ret) { err = ret; goto fail_bdi; @@ -2193,13 +2193,13 @@ int open_ctree(struct super_block *sb, fs_info->dirty_metadata_batch = PAGE_CACHE_SIZE * (1 + ilog2(nr_cpu_ids)); - ret = percpu_counter_init(&fs_info->delalloc_bytes, 0); + ret = percpu_counter_init(&fs_info->delalloc_bytes, 0, GFP_KERNEL); if (ret) { err = ret; goto fail_dirty_metadata_bytes; } - ret = percpu_counter_init(&fs_info->bio_counter, 0); + ret = percpu_counter_init(&fs_info->bio_counter, 0, GFP_KERNEL); if (ret) { err = ret; goto fail_delalloc_bytes; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 813537f362f9..94ec71eda86b 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3493,7 +3493,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, if (!found) return -ENOMEM; - ret = percpu_counter_init(&found->total_bytes_pinned, 0); + ret = percpu_counter_init(&found->total_bytes_pinned, 0, GFP_KERNEL); if (ret) { kfree(found); return ret; diff --git a/fs/ext2/super.c b/fs/ext2/super.c index b88edc05c230..170dc41e8bf4 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -1067,14 +1067,14 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) ext2_rsv_window_add(sb, &sbi->s_rsv_window_head); err = percpu_counter_init(&sbi->s_freeblocks_counter, - ext2_count_free_blocks(sb)); + ext2_count_free_blocks(sb), GFP_KERNEL); if (!err) { err = percpu_counter_init(&sbi->s_freeinodes_counter, - ext2_count_free_inodes(sb)); + ext2_count_free_inodes(sb), GFP_KERNEL); } if (!err) { err = percpu_counter_init(&sbi->s_dirs_counter, - ext2_count_dirs(sb)); + ext2_count_dirs(sb), GFP_KERNEL); } if (err) { ext2_msg(sb, KERN_ERR, "error: insufficient memory"); diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 08cdfe5461e3..eba021b88440 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -2039,14 +2039,14 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) goto failed_mount2; } err = percpu_counter_init(&sbi->s_freeblocks_counter, - ext3_count_free_blocks(sb)); + ext3_count_free_blocks(sb), GFP_KERNEL); if (!err) { err = percpu_counter_init(&sbi->s_freeinodes_counter, - ext3_count_free_inodes(sb)); + ext3_count_free_inodes(sb), GFP_KERNEL); } if (!err) { err = percpu_counter_init(&sbi->s_dirs_counter, - ext3_count_dirs(sb)); + ext3_count_dirs(sb), GFP_KERNEL); } if (err) { ext3_msg(sb, KERN_ERR, "error: insufficient memory"); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 32b43ad154b9..e25ca8fdde7d 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3891,7 +3891,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) /* Register extent status tree shrinker */ ext4_es_register_shrinker(sbi); - if ((err = percpu_counter_init(&sbi->s_extent_cache_cnt, 0)) != 0) { + err = percpu_counter_init(&sbi->s_extent_cache_cnt, 0, GFP_KERNEL); + if (err) { ext4_msg(sb, KERN_ERR, "insufficient memory"); goto failed_mount3; } @@ -4105,17 +4106,20 @@ no_journal: block = ext4_count_free_clusters(sb); ext4_free_blocks_count_set(sbi->s_es, EXT4_C2B(sbi, block)); - err = percpu_counter_init(&sbi->s_freeclusters_counter, block); + err = percpu_counter_init(&sbi->s_freeclusters_counter, block, + GFP_KERNEL); if (!err) { unsigned long freei = ext4_count_free_inodes(sb); sbi->s_es->s_free_inodes_count = cpu_to_le32(freei); - err = percpu_counter_init(&sbi->s_freeinodes_counter, freei); + err = percpu_counter_init(&sbi->s_freeinodes_counter, freei, + GFP_KERNEL); } if (!err) err = percpu_counter_init(&sbi->s_dirs_counter, - ext4_count_dirs(sb)); + ext4_count_dirs(sb), GFP_KERNEL); if (!err) - err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0); + err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0, + GFP_KERNEL); if (err) { ext4_msg(sb, KERN_ERR, "insufficient memory"); goto failed_mount6; diff --git a/fs/file_table.c b/fs/file_table.c index 385bfd31512a..0bab12b20460 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -331,5 +331,5 @@ void __init files_init(unsigned long mempages) n = (mempages * (PAGE_SIZE / 1024)) / 10; files_stat.max_files = max_t(unsigned long, n, NR_FILE); - percpu_counter_init(&nr_files, 0); + percpu_counter_init(&nr_files, 0, GFP_KERNEL); } diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index f2d0eee9d1f1..8b663b2d9562 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -2725,7 +2725,7 @@ static int __init dquot_init(void) panic("Cannot create dquot hash table"); for (i = 0; i < _DQST_DQSTAT_LAST; i++) { - ret = percpu_counter_init(&dqstats.counter[i], 0); + ret = percpu_counter_init(&dqstats.counter[i], 0, GFP_KERNEL); if (ret) panic("Cannot create dquot stat counters"); } diff --git a/fs/super.c b/fs/super.c index b9a214d2fe98..1b836107acee 100644 --- a/fs/super.c +++ b/fs/super.c @@ -175,7 +175,8 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags) goto fail; for (i = 0; i < SB_FREEZE_LEVELS; i++) { - if (percpu_counter_init(&s->s_writers.counter[i], 0) < 0) + if (percpu_counter_init(&s->s_writers.counter[i], 0, + GFP_KERNEL) < 0) goto fail; lockdep_init_map(&s->s_writers.lock_map[i], sb_writers_name[i], &type->s_writers_key[i], 0); diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index d5dd4657c8d6..50e50095c8d1 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h @@ -12,6 +12,7 @@ #include #include #include +#include #ifdef CONFIG_SMP @@ -26,14 +27,14 @@ struct percpu_counter { extern int percpu_counter_batch; -int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, +int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, gfp_t gfp, struct lock_class_key *key); -#define percpu_counter_init(fbc, value) \ +#define percpu_counter_init(fbc, value, gfp) \ ({ \ static struct lock_class_key __key; \ \ - __percpu_counter_init(fbc, value, &__key); \ + __percpu_counter_init(fbc, value, gfp, &__key); \ }) void percpu_counter_destroy(struct percpu_counter *fbc); @@ -89,7 +90,8 @@ struct percpu_counter { s64 count; }; -static inline int percpu_counter_init(struct percpu_counter *fbc, s64 amount) +static inline int percpu_counter_init(struct percpu_counter *fbc, s64 amount, + gfp_t gfp) { fbc->count = amount; return 0; diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h index 2f26dfb8450e..1f99a1de0e4f 100644 --- a/include/net/dst_ops.h +++ b/include/net/dst_ops.h @@ -63,7 +63,7 @@ static inline void dst_entries_add(struct dst_ops *dst, int val) static inline int dst_entries_init(struct dst_ops *dst) { - return percpu_counter_init(&dst->pcpuc_entries, 0); + return percpu_counter_init(&dst->pcpuc_entries, 0, GFP_KERNEL); } static inline void dst_entries_destroy(struct dst_ops *dst) diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 65a8855e99fe..8d1765577acc 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -151,7 +151,7 @@ static inline void add_frag_mem_limit(struct inet_frag_queue *q, int i) static inline void init_frag_mem_limit(struct netns_frags *nf) { - percpu_counter_init(&nf->mem, 0); + percpu_counter_init(&nf->mem, 0, GFP_KERNEL); } static inline unsigned int sum_frag_mem_limit(struct netns_frags *nf) diff --git a/lib/flex_proportions.c b/lib/flex_proportions.c index ebf3bac460b0..b9d026bfcf38 100644 --- a/lib/flex_proportions.c +++ b/lib/flex_proportions.c @@ -40,7 +40,7 @@ int fprop_global_init(struct fprop_global *p) p->period = 0; /* Use 1 to avoid dealing with periods with 0 events... */ - err = percpu_counter_init(&p->events, 1); + err = percpu_counter_init(&p->events, 1, GFP_KERNEL); if (err) return err; seqcount_init(&p->sequence); @@ -172,7 +172,7 @@ int fprop_local_init_percpu(struct fprop_local_percpu *pl) { int err; - err = percpu_counter_init(&pl->events, 0); + err = percpu_counter_init(&pl->events, 0, GFP_KERNEL); if (err) return err; pl->period = 0; diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c index 3fde78275cd1..48144cdae819 100644 --- a/lib/percpu_counter.c +++ b/lib/percpu_counter.c @@ -112,7 +112,7 @@ s64 __percpu_counter_sum(struct percpu_counter *fbc) } EXPORT_SYMBOL(__percpu_counter_sum); -int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, +int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, gfp_t gfp, struct lock_class_key *key) { unsigned long flags __maybe_unused; @@ -120,7 +120,7 @@ int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, raw_spin_lock_init(&fbc->lock); lockdep_set_class(&fbc->lock, key); fbc->count = amount; - fbc->counters = alloc_percpu(s32); + fbc->counters = alloc_percpu_gfp(s32, gfp); if (!fbc->counters) return -ENOMEM; diff --git a/lib/proportions.c b/lib/proportions.c index 05df84801b56..ca95f8d54384 100644 --- a/lib/proportions.c +++ b/lib/proportions.c @@ -83,11 +83,11 @@ int prop_descriptor_init(struct prop_descriptor *pd, int shift) pd->index = 0; pd->pg[0].shift = shift; mutex_init(&pd->mutex); - err = percpu_counter_init(&pd->pg[0].events, 0); + err = percpu_counter_init(&pd->pg[0].events, 0, GFP_KERNEL); if (err) goto out; - err = percpu_counter_init(&pd->pg[1].events, 0); + err = percpu_counter_init(&pd->pg[1].events, 0, GFP_KERNEL); if (err) percpu_counter_destroy(&pd->pg[0].events); @@ -193,7 +193,7 @@ int prop_local_init_percpu(struct prop_local_percpu *pl) raw_spin_lock_init(&pl->lock); pl->shift = 0; pl->period = 0; - return percpu_counter_init(&pl->events, 0); + return percpu_counter_init(&pl->events, 0, GFP_KERNEL); } void prop_local_destroy_percpu(struct prop_local_percpu *pl) diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 1706cbbdf5f0..f19a818be2d3 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -455,7 +455,7 @@ int bdi_init(struct backing_dev_info *bdi) bdi_wb_init(&bdi->wb, bdi); for (i = 0; i < NR_BDI_STAT_ITEMS; i++) { - err = percpu_counter_init(&bdi->bdi_stat[i], 0); + err = percpu_counter_init(&bdi->bdi_stat[i], 0, GFP_KERNEL); if (err) goto err; } diff --git a/mm/mmap.c b/mm/mmap.c index c1f2ea4a0b99..d7ec93e25fa1 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -3196,7 +3196,7 @@ void __init mmap_init(void) { int ret; - ret = percpu_counter_init(&vm_committed_as, 0); + ret = percpu_counter_init(&vm_committed_as, 0, GFP_KERNEL); VM_BUG_ON(ret); } diff --git a/mm/nommu.c b/mm/nommu.c index a881d9673c6b..bd1808e194a7 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -539,7 +539,7 @@ void __init mmap_init(void) { int ret; - ret = percpu_counter_init(&vm_committed_as, 0); + ret = percpu_counter_init(&vm_committed_as, 0, GFP_KERNEL); VM_BUG_ON(ret); vm_region_jar = KMEM_CACHE(vm_region, SLAB_PANIC); } diff --git a/mm/shmem.c b/mm/shmem.c index 0e5fb225007c..d4bc55d3f107 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2993,7 +2993,7 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent) #endif spin_lock_init(&sbinfo->stat_lock); - if (percpu_counter_init(&sbinfo->used_blocks, 0)) + if (percpu_counter_init(&sbinfo->used_blocks, 0, GFP_KERNEL)) goto failed; sbinfo->free_inodes = sbinfo->max_inodes; diff --git a/net/dccp/proto.c b/net/dccp/proto.c index de2c1e719305..e421eddf67b4 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -1115,7 +1115,7 @@ static int __init dccp_init(void) BUILD_BUG_ON(sizeof(struct dccp_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb)); - rc = percpu_counter_init(&dccp_orphan_count, 0); + rc = percpu_counter_init(&dccp_orphan_count, 0, GFP_KERNEL); if (rc) goto out_fail; rc = -ENOBUFS; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 541f26a67ba2..d59c2604c247 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3188,8 +3188,8 @@ void __init tcp_init(void) BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); - percpu_counter_init(&tcp_sockets_allocated, 0); - percpu_counter_init(&tcp_orphan_count, 0); + percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL); + percpu_counter_init(&tcp_orphan_count, 0, GFP_KERNEL); tcp_hashinfo.bind_bucket_cachep = kmem_cache_create("tcp_bind_bucket", sizeof(struct inet_bind_bucket), 0, diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c index 3af522622fad..1d191357bf88 100644 --- a/net/ipv4/tcp_memcontrol.c +++ b/net/ipv4/tcp_memcontrol.c @@ -32,7 +32,7 @@ int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss) res_parent = &parent_cg->memory_allocated; res_counter_init(&cg_proto->memory_allocated, res_parent); - percpu_counter_init(&cg_proto->sockets_allocated, 0); + percpu_counter_init(&cg_proto->sockets_allocated, 0, GFP_KERNEL); return 0; } diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 6240834f4b95..f00a85a3fddd 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1341,7 +1341,7 @@ static __init int sctp_init(void) if (!sctp_chunk_cachep) goto err_chunk_cachep; - status = percpu_counter_init(&sctp_sockets_allocated, 0); + status = percpu_counter_init(&sctp_sockets_allocated, 0, GFP_KERNEL); if (status) goto err_percpu_counter_init; -- cgit v1.2.3 From 20ae00792c6f1f18fc4fc5965445a145df92827e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 8 Sep 2014 09:51:30 +0900 Subject: proportions: add @gfp to init functions Percpu allocator now supports allocation mask. Add @gfp to [flex_]proportions init functions so that !GFP_KERNEL allocation masks can be used with them too. This patch doesn't make any functional difference. Signed-off-by: Tejun Heo Reviewed-by: Jan Kara Cc: Peter Zijlstra --- include/linux/flex_proportions.h | 5 +++-- include/linux/proportions.h | 5 +++-- lib/flex_proportions.c | 8 ++++---- lib/proportions.c | 10 +++++----- mm/backing-dev.c | 2 +- mm/page-writeback.c | 2 +- 6 files changed, 17 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/flex_proportions.h b/include/linux/flex_proportions.h index 4ebc49fae391..0d348e011a6e 100644 --- a/include/linux/flex_proportions.h +++ b/include/linux/flex_proportions.h @@ -10,6 +10,7 @@ #include #include #include +#include /* * When maximum proportion of some event type is specified, this is the @@ -32,7 +33,7 @@ struct fprop_global { seqcount_t sequence; }; -int fprop_global_init(struct fprop_global *p); +int fprop_global_init(struct fprop_global *p, gfp_t gfp); void fprop_global_destroy(struct fprop_global *p); bool fprop_new_period(struct fprop_global *p, int periods); @@ -79,7 +80,7 @@ struct fprop_local_percpu { raw_spinlock_t lock; /* Protect period and numerator */ }; -int fprop_local_init_percpu(struct fprop_local_percpu *pl); +int fprop_local_init_percpu(struct fprop_local_percpu *pl, gfp_t gfp); void fprop_local_destroy_percpu(struct fprop_local_percpu *pl); void __fprop_inc_percpu(struct fprop_global *p, struct fprop_local_percpu *pl); void __fprop_inc_percpu_max(struct fprop_global *p, struct fprop_local_percpu *pl, diff --git a/include/linux/proportions.h b/include/linux/proportions.h index 26a8a4ed9b07..00e8e8fa7358 100644 --- a/include/linux/proportions.h +++ b/include/linux/proportions.h @@ -12,6 +12,7 @@ #include #include #include +#include struct prop_global { /* @@ -40,7 +41,7 @@ struct prop_descriptor { struct mutex mutex; /* serialize the prop_global switch */ }; -int prop_descriptor_init(struct prop_descriptor *pd, int shift); +int prop_descriptor_init(struct prop_descriptor *pd, int shift, gfp_t gfp); void prop_change_shift(struct prop_descriptor *pd, int new_shift); /* @@ -61,7 +62,7 @@ struct prop_local_percpu { raw_spinlock_t lock; /* protect the snapshot state */ }; -int prop_local_init_percpu(struct prop_local_percpu *pl); +int prop_local_init_percpu(struct prop_local_percpu *pl, gfp_t gfp); void prop_local_destroy_percpu(struct prop_local_percpu *pl); void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl); void prop_fraction_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl, diff --git a/lib/flex_proportions.c b/lib/flex_proportions.c index b9d026bfcf38..8f25652f40d4 100644 --- a/lib/flex_proportions.c +++ b/lib/flex_proportions.c @@ -34,13 +34,13 @@ */ #include -int fprop_global_init(struct fprop_global *p) +int fprop_global_init(struct fprop_global *p, gfp_t gfp) { int err; p->period = 0; /* Use 1 to avoid dealing with periods with 0 events... */ - err = percpu_counter_init(&p->events, 1, GFP_KERNEL); + err = percpu_counter_init(&p->events, 1, gfp); if (err) return err; seqcount_init(&p->sequence); @@ -168,11 +168,11 @@ void fprop_fraction_single(struct fprop_global *p, */ #define PROP_BATCH (8*(1+ilog2(nr_cpu_ids))) -int fprop_local_init_percpu(struct fprop_local_percpu *pl) +int fprop_local_init_percpu(struct fprop_local_percpu *pl, gfp_t gfp) { int err; - err = percpu_counter_init(&pl->events, 0, GFP_KERNEL); + err = percpu_counter_init(&pl->events, 0, gfp); if (err) return err; pl->period = 0; diff --git a/lib/proportions.c b/lib/proportions.c index ca95f8d54384..6f724298f67a 100644 --- a/lib/proportions.c +++ b/lib/proportions.c @@ -73,7 +73,7 @@ #include #include -int prop_descriptor_init(struct prop_descriptor *pd, int shift) +int prop_descriptor_init(struct prop_descriptor *pd, int shift, gfp_t gfp) { int err; @@ -83,11 +83,11 @@ int prop_descriptor_init(struct prop_descriptor *pd, int shift) pd->index = 0; pd->pg[0].shift = shift; mutex_init(&pd->mutex); - err = percpu_counter_init(&pd->pg[0].events, 0, GFP_KERNEL); + err = percpu_counter_init(&pd->pg[0].events, 0, gfp); if (err) goto out; - err = percpu_counter_init(&pd->pg[1].events, 0, GFP_KERNEL); + err = percpu_counter_init(&pd->pg[1].events, 0, gfp); if (err) percpu_counter_destroy(&pd->pg[0].events); @@ -188,12 +188,12 @@ prop_adjust_shift(int *pl_shift, unsigned long *pl_period, int new_shift) #define PROP_BATCH (8*(1+ilog2(nr_cpu_ids))) -int prop_local_init_percpu(struct prop_local_percpu *pl) +int prop_local_init_percpu(struct prop_local_percpu *pl, gfp_t gfp) { raw_spin_lock_init(&pl->lock); pl->shift = 0; pl->period = 0; - return percpu_counter_init(&pl->events, 0, GFP_KERNEL); + return percpu_counter_init(&pl->events, 0, gfp); } void prop_local_destroy_percpu(struct prop_local_percpu *pl) diff --git a/mm/backing-dev.c b/mm/backing-dev.c index f19a818be2d3..64ec49d1772b 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -470,7 +470,7 @@ int bdi_init(struct backing_dev_info *bdi) bdi->write_bandwidth = INIT_BW; bdi->avg_write_bandwidth = INIT_BW; - err = fprop_local_init_percpu(&bdi->completions); + err = fprop_local_init_percpu(&bdi->completions, GFP_KERNEL); if (err) { err: diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 91d73ef1744d..508599403721 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -1777,7 +1777,7 @@ void __init page_writeback_init(void) writeback_set_ratelimit(); register_cpu_notifier(&ratelimit_nb); - fprop_global_init(&writeout_completions); + fprop_global_init(&writeout_completions, GFP_KERNEL); } /** -- cgit v1.2.3 From a34375ef9e65340a138fc0be287de5c940d260fc Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 8 Sep 2014 09:51:30 +0900 Subject: percpu-refcount: add @gfp to percpu_ref_init() Percpu allocator now supports allocation mask. Add @gfp to percpu_ref_init() so that !GFP_KERNEL allocation masks can be used with percpu_refs too. This patch doesn't make any functional difference. v2: blk-mq conversion was missing. Updated. Signed-off-by: Tejun Heo Cc: Kent Overstreet Cc: Benjamin LaHaise Cc: Li Zefan Cc: Nicholas A. Bellinger Cc: Jens Axboe --- block/blk-mq.c | 3 ++- drivers/target/target_core_tpg.c | 3 ++- fs/aio.c | 4 ++-- include/linux/percpu-refcount.h | 3 ++- kernel/cgroup.c | 6 +++--- lib/percpu-refcount.c | 6 ++++-- 6 files changed, 15 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/block/blk-mq.c b/block/blk-mq.c index 5189cb1e478a..702df07b980d 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1776,7 +1776,8 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) if (!q) goto err_hctxs; - if (percpu_ref_init(&q->mq_usage_counter, blk_mq_usage_counter_release)) + if (percpu_ref_init(&q->mq_usage_counter, blk_mq_usage_counter_release, + GFP_KERNEL)) goto err_map; setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q); diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c index fddfae61222f..4ab6da338585 100644 --- a/drivers/target/target_core_tpg.c +++ b/drivers/target/target_core_tpg.c @@ -819,7 +819,8 @@ int core_tpg_add_lun( { int ret; - ret = percpu_ref_init(&lun->lun_ref, core_tpg_lun_ref_release); + ret = percpu_ref_init(&lun->lun_ref, core_tpg_lun_ref_release, + GFP_KERNEL); if (ret < 0) return ret; diff --git a/fs/aio.c b/fs/aio.c index bd7ec2cc2674..93fbcc0f5696 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -666,10 +666,10 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) INIT_LIST_HEAD(&ctx->active_reqs); - if (percpu_ref_init(&ctx->users, free_ioctx_users)) + if (percpu_ref_init(&ctx->users, free_ioctx_users, GFP_KERNEL)) goto err; - if (percpu_ref_init(&ctx->reqs, free_ioctx_reqs)) + if (percpu_ref_init(&ctx->reqs, free_ioctx_reqs, GFP_KERNEL)) goto err; ctx->cpu = alloc_percpu(struct kioctx_cpu); diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h index 3dfbf237cd8f..ee8325122dbd 100644 --- a/include/linux/percpu-refcount.h +++ b/include/linux/percpu-refcount.h @@ -49,6 +49,7 @@ #include #include #include +#include struct percpu_ref; typedef void (percpu_ref_func_t)(struct percpu_ref *); @@ -66,7 +67,7 @@ struct percpu_ref { }; int __must_check percpu_ref_init(struct percpu_ref *ref, - percpu_ref_func_t *release); + percpu_ref_func_t *release, gfp_t gfp); void percpu_ref_reinit(struct percpu_ref *ref); void percpu_ref_exit(struct percpu_ref *ref); void percpu_ref_kill_and_confirm(struct percpu_ref *ref, diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 7dc8788cfd52..589b4d89a0a5 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1628,7 +1628,7 @@ static int cgroup_setup_root(struct cgroup_root *root, unsigned int ss_mask) goto out; root_cgrp->id = ret; - ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release); + ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release, GFP_KERNEL); if (ret) goto out; @@ -4487,7 +4487,7 @@ static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss, init_and_link_css(css, ss, cgrp); - err = percpu_ref_init(&css->refcnt, css_release); + err = percpu_ref_init(&css->refcnt, css_release, GFP_KERNEL); if (err) goto err_free_css; @@ -4555,7 +4555,7 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, goto out_unlock; } - ret = percpu_ref_init(&cgrp->self.refcnt, css_release); + ret = percpu_ref_init(&cgrp->self.refcnt, css_release, GFP_KERNEL); if (ret) goto out_free_cgrp; diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c index fe5a3342e960..ff9903264a91 100644 --- a/lib/percpu-refcount.c +++ b/lib/percpu-refcount.c @@ -40,6 +40,7 @@ static unsigned __percpu *pcpu_count_ptr(struct percpu_ref *ref) * percpu_ref_init - initialize a percpu refcount * @ref: percpu_ref to initialize * @release: function which will be called when refcount hits 0 + * @gfp: allocation mask to use * * Initializes the refcount in single atomic counter mode with a refcount of 1; * analagous to atomic_set(ref, 1). @@ -47,11 +48,12 @@ static unsigned __percpu *pcpu_count_ptr(struct percpu_ref *ref) * Note that @release must not sleep - it may potentially be called from RCU * callback context by percpu_ref_kill(). */ -int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release) +int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release, + gfp_t gfp) { atomic_set(&ref->count, 1 + PCPU_COUNT_BIAS); - ref->pcpu_count_ptr = (unsigned long)alloc_percpu(unsigned); + ref->pcpu_count_ptr = (unsigned long)alloc_percpu_gfp(unsigned, gfp); if (!ref->pcpu_count_ptr) return -ENOMEM; -- cgit v1.2.3 From e78c3496790ee8a36522a838b59b388e8a709e65 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Sat, 16 Aug 2014 13:40:10 -0400 Subject: time, signal: Protect resource use statistics with seqlock Both times() and clock_gettime(CLOCK_PROCESS_CPUTIME_ID) have scalability issues on large systems, due to both functions being serialized with a lock. The lock protects against reporting a wrong value, due to a thread in the task group exiting, its statistics reporting up to the signal struct, and that exited task's statistics being counted twice (or not at all). Protecting that with a lock results in times() and clock_gettime() being completely serialized on large systems. This can be fixed by using a seqlock around the events that gather and propagate statistics. As an additional benefit, the protection code can be moved into thread_group_cputime(), slightly simplifying the calling functions. In the case of posix_cpu_clock_get_task() things can be simplified a lot, because the calling function already ensures that the task sticks around, and the rest is now taken care of in thread_group_cputime(). This way the statistics reporting code can run lockless. Signed-off-by: Rik van Riel Signed-off-by: Peter Zijlstra (Intel) Cc: Alex Thorlton Cc: Andrew Morton Cc: Daeseok Youn Cc: David Rientjes Cc: Dongsheng Yang Cc: Geert Uytterhoeven Cc: Guillaume Morin Cc: Ionut Alexa Cc: Kees Cook Cc: Linus Torvalds Cc: Li Zefan Cc: Michal Hocko Cc: Michal Schmidt Cc: Oleg Nesterov Cc: Vladimir Davydov Cc: umgwanakikbuti@gmail.com Cc: fweisbec@gmail.com Cc: srao@redhat.com Cc: lwoodman@redhat.com Cc: atheurer@redhat.com Link: http://lkml.kernel.org/r/20140816134010.26a9b572@annuminas.surriel.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 + kernel/exit.c | 4 ++++ kernel/fork.c | 1 + kernel/sched/cputime.c | 33 ++++++++++++++++++++------------- kernel/sys.c | 2 -- kernel/time/posix-cpu-timers.c | 14 -------------- 6 files changed, 26 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 5c2c885ee52b..dd9eb4807389 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -645,6 +645,7 @@ struct signal_struct { * Live threads maintain their own counters and add to these * in __exit_signal, except for the group leader. */ + seqlock_t stats_lock; cputime_t utime, stime, cutime, cstime; cputime_t gtime; cputime_t cgtime; diff --git a/kernel/exit.c b/kernel/exit.c index b93d46dab6fc..fa09b86609db 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -127,6 +127,7 @@ static void __exit_signal(struct task_struct *tsk) * the signal_struct. */ task_cputime(tsk, &utime, &stime); + write_seqlock(&sig->stats_lock); sig->utime += utime; sig->stime += stime; sig->gtime += task_gtime(tsk); @@ -140,6 +141,7 @@ static void __exit_signal(struct task_struct *tsk) sig->sum_sched_runtime += tsk->se.sum_exec_runtime; sig->nr_threads--; __unhash_process(tsk, group_dead); + write_sequnlock(&sig->stats_lock); /* * Do this under ->siglock, we can race with another thread @@ -1042,6 +1044,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) spin_lock_irq(&p->real_parent->sighand->siglock); psig = p->real_parent->signal; sig = p->signal; + write_seqlock(&psig->stats_lock); psig->cutime += tgutime + sig->cutime; psig->cstime += tgstime + sig->cstime; psig->cgtime += task_gtime(p) + sig->gtime + sig->cgtime; @@ -1064,6 +1067,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) psig->cmaxrss = maxrss; task_io_accounting_add(&psig->ioac, &p->ioac); task_io_accounting_add(&psig->ioac, &sig->ioac); + write_sequnlock(&psig->stats_lock); spin_unlock_irq(&p->real_parent->sighand->siglock); } diff --git a/kernel/fork.c b/kernel/fork.c index 0cf9cdb6e491..9387ae8ab048 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1068,6 +1068,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) sig->curr_target = tsk; init_sigpending(&sig->shared_pending); INIT_LIST_HEAD(&sig->posix_timers); + seqlock_init(&sig->stats_lock); hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); sig->real_timer.function = it_real_fn; diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 3e52836359ba..49b7cfe98f7a 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -288,18 +288,28 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times) struct signal_struct *sig = tsk->signal; cputime_t utime, stime; struct task_struct *t; - - times->utime = sig->utime; - times->stime = sig->stime; - times->sum_exec_runtime = sig->sum_sched_runtime; + unsigned int seq, nextseq; rcu_read_lock(); - for_each_thread(tsk, t) { - task_cputime(t, &utime, &stime); - times->utime += utime; - times->stime += stime; - times->sum_exec_runtime += task_sched_runtime(t); - } + /* Attempt a lockless read on the first round. */ + nextseq = 0; + do { + seq = nextseq; + read_seqbegin_or_lock(&sig->stats_lock, &seq); + times->utime = sig->utime; + times->stime = sig->stime; + times->sum_exec_runtime = sig->sum_sched_runtime; + + for_each_thread(tsk, t) { + task_cputime(t, &utime, &stime); + times->utime += utime; + times->stime += stime; + times->sum_exec_runtime += task_sched_runtime(t); + } + /* If lockless access failed, take the lock. */ + nextseq = 1; + } while (need_seqretry(&sig->stats_lock, seq)); + done_seqretry(&sig->stats_lock, seq); rcu_read_unlock(); } @@ -611,9 +621,6 @@ void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) cputime_adjust(&cputime, &p->prev_cputime, ut, st); } -/* - * Must be called with siglock held. - */ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) { struct task_cputime cputime; diff --git a/kernel/sys.c b/kernel/sys.c index ce8129192a26..b6636643cbd1 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -862,11 +862,9 @@ void do_sys_times(struct tms *tms) { cputime_t tgutime, tgstime, cutime, cstime; - spin_lock_irq(¤t->sighand->siglock); thread_group_cputime_adjusted(current, &tgutime, &tgstime); cutime = current->signal->cutime; cstime = current->signal->cstime; - spin_unlock_irq(¤t->sighand->siglock); tms->tms_utime = cputime_to_clock_t(tgutime); tms->tms_stime = cputime_to_clock_t(tgstime); tms->tms_cutime = cputime_to_clock_t(cutime); diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 3b8946416a5f..492b986195d5 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -272,22 +272,8 @@ static int posix_cpu_clock_get_task(struct task_struct *tsk, if (same_thread_group(tsk, current)) err = cpu_clock_sample(which_clock, tsk, &rtn); } else { - unsigned long flags; - struct sighand_struct *sighand; - - /* - * while_each_thread() is not yet entirely RCU safe, - * keep locking the group while sampling process - * clock for now. - */ - sighand = lock_task_sighand(tsk, &flags); - if (!sighand) - return err; - if (tsk == current || thread_group_leader(tsk)) err = cpu_clock_sample_group(which_clock, tsk, &rtn); - - unlock_task_sighand(tsk, &flags); } if (!err) -- cgit v1.2.3 From 0b750b3baa2d64f1b77aecc10f20deeb28efe60d Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 5 Sep 2014 18:08:47 +0200 Subject: HID: usbhid: add always-poll quirk Add quirk to make sure that a device is always polled for input events even if it hasn't been opened. This is needed for devices that disconnects from the bus unless the interrupt endpoint has been polled at least once or when not responding to an input event (e.g. after having shut down X). Signed-off-by: Johan Hovold Signed-off-by: Jiri Kosina --- drivers/hid/usbhid/hid-core.c | 26 +++++++++++++++++++++++--- include/linux/hid.h | 1 + 2 files changed, 24 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c index 79cf503e37bf..ddd547ad6d7e 100644 --- a/drivers/hid/usbhid/hid-core.c +++ b/drivers/hid/usbhid/hid-core.c @@ -82,7 +82,7 @@ static int hid_start_in(struct hid_device *hid) struct usbhid_device *usbhid = hid->driver_data; spin_lock_irqsave(&usbhid->lock, flags); - if (hid->open > 0 && + if ((hid->open > 0 || hid->quirks & HID_QUIRK_ALWAYS_POLL) && !test_bit(HID_DISCONNECTED, &usbhid->iofl) && !test_bit(HID_SUSPENDED, &usbhid->iofl) && !test_and_set_bit(HID_IN_RUNNING, &usbhid->iofl)) { @@ -292,6 +292,8 @@ static void hid_irq_in(struct urb *urb) case 0: /* success */ usbhid_mark_busy(usbhid); usbhid->retry_delay = 0; + if ((hid->quirks & HID_QUIRK_ALWAYS_POLL) && !hid->open) + break; hid_input_report(urb->context, HID_INPUT_REPORT, urb->transfer_buffer, urb->actual_length, 1); @@ -735,8 +737,10 @@ void usbhid_close(struct hid_device *hid) if (!--hid->open) { spin_unlock_irq(&usbhid->lock); hid_cancel_delayed_stuff(usbhid); - usb_kill_urb(usbhid->urbin); - usbhid->intf->needs_remote_wakeup = 0; + if (!(hid->quirks & HID_QUIRK_ALWAYS_POLL)) { + usb_kill_urb(usbhid->urbin); + usbhid->intf->needs_remote_wakeup = 0; + } } else { spin_unlock_irq(&usbhid->lock); } @@ -1134,6 +1138,19 @@ static int usbhid_start(struct hid_device *hid) set_bit(HID_STARTED, &usbhid->iofl); + if (hid->quirks & HID_QUIRK_ALWAYS_POLL) { + ret = usb_autopm_get_interface(usbhid->intf); + if (ret) + goto fail; + usbhid->intf->needs_remote_wakeup = 1; + ret = hid_start_in(hid); + if (ret) { + dev_err(&hid->dev, + "failed to start in urb: %d\n", ret); + } + usb_autopm_put_interface(usbhid->intf); + } + /* Some keyboards don't work until their LEDs have been set. * Since BIOSes do set the LEDs, it must be safe for any device * that supports the keyboard boot protocol. @@ -1166,6 +1183,9 @@ static void usbhid_stop(struct hid_device *hid) if (WARN_ON(!usbhid)) return; + if (hid->quirks & HID_QUIRK_ALWAYS_POLL) + usbhid->intf->needs_remote_wakeup = 0; + clear_bit(HID_STARTED, &usbhid->iofl); spin_lock_irq(&usbhid->lock); /* Sync with error and led handlers */ set_bit(HID_DISCONNECTED, &usbhid->iofl); diff --git a/include/linux/hid.h b/include/linux/hid.h index f53c4a9cca1d..26ee25fced27 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -287,6 +287,7 @@ struct hid_item { #define HID_QUIRK_HIDINPUT_FORCE 0x00000080 #define HID_QUIRK_NO_EMPTY_INPUT 0x00000100 #define HID_QUIRK_NO_INIT_INPUT_REPORTS 0x00000200 +#define HID_QUIRK_ALWAYS_POLL 0x00000400 #define HID_QUIRK_SKIP_OUTPUT_REPORTS 0x00010000 #define HID_QUIRK_SKIP_OUTPUT_REPORT_ID 0x00020000 #define HID_QUIRK_NO_OUTPUT_REPORTS_ON_INTR_EP 0x00040000 -- cgit v1.2.3 From c8d6591752e96c550cb98b781326d72d8eedcc79 Mon Sep 17 00:00:00 2001 From: "Steinar H. Gunderson" Date: Wed, 3 Sep 2014 06:48:37 -0700 Subject: mac80211: support DTPC IE (from Cisco Client eXtensions) Linux already supports 802.11h, where the access point can tell the client to reduce its transmission power. However, 802.11h is only defined for 5 GHz, where the need for this is much smaller than on 2.4 GHz. Cisco has their own solution, called DTPC (Dynamic Transmit Power Control). Cisco APs on a controller sometimes but not always send 802.11h; they always send DTPC, even on 2.4 GHz. This patch adds support for parsing and honoring the DTPC IE in addition to the 802.11h element (they do not always contain the same limits, so both must be honored); the format is not documented, but very simple. Tested (on top of wireless.git and on 3.16.1) against a Cisco Aironet 1142 joined to a Cisco 2504 WLC, by setting various transmit power levels for the given access points and observing the results. The Wireshark 802.11 dissector agrees with the interpretation of the element, except for negative numbers, which seem to never happen anyway. Signed-off-by: Steinar H. Gunderson Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 3 ++- net/mac80211/ieee80211_i.h | 1 + net/mac80211/mlme.c | 60 +++++++++++++++++++++++++++++++++++++--------- net/mac80211/util.c | 25 +++++++++++++++++++ 4 files changed, 77 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 77d4f26e0235..61a09f0644aa 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1824,7 +1824,8 @@ enum ieee80211_eid { WLAN_EID_DMG_TSPEC = 146, WLAN_EID_DMG_AT = 147, WLAN_EID_DMG_CAP = 148, - /* 149-150 reserved for Cisco */ + /* 149 reserved for Cisco */ + WLAN_EID_CISCO_VENDOR_SPECIFIC = 150, WLAN_EID_DMG_OPERATION = 151, WLAN_EID_DMG_BSS_PARAM_CHANGE = 152, WLAN_EID_DMG_BEAM_REFINEMENT = 153, diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 6b7bdd8fae29..c2aaec4dfcf0 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1369,6 +1369,7 @@ struct ieee802_11_elems { const struct ieee80211_wide_bw_chansw_ie *wide_bw_chansw_ie; const u8 *country_elem; const u8 *pwr_constr_elem; + const u8 *cisco_dtpc_elem; const struct ieee80211_timeout_interval_ie *timeout_int; const u8 *opmode_notif; const struct ieee80211_sec_chan_offs_ie *sec_chan_offs; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 0b812a88f95f..a7b92f5f7161 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1230,14 +1230,30 @@ ieee80211_find_80211h_pwr_constr(struct ieee80211_sub_if_data *sdata, return have_chan_pwr; } +static void ieee80211_find_cisco_dtpc(struct ieee80211_sub_if_data *sdata, + struct ieee80211_channel *channel, + const u8 *cisco_dtpc_ie, + int *pwr_level) +{ + /* From practical testing, the first data byte of the DTPC element + * seems to contain the requested dBm level, and the CLI on Cisco + * APs clearly state the range is -127 to 127 dBm, which indicates + * a signed byte, although it seemingly never actually goes negative. + * The other byte seems to always be zero. + */ + *pwr_level = (__s8)cisco_dtpc_ie[4]; +} + static u32 ieee80211_handle_pwr_constr(struct ieee80211_sub_if_data *sdata, struct ieee80211_channel *channel, struct ieee80211_mgmt *mgmt, const u8 *country_ie, u8 country_ie_len, - const u8 *pwr_constr_ie) + const u8 *pwr_constr_ie, + const u8 *cisco_dtpc_ie) { - bool has_80211h_pwr = false; - int chan_pwr, pwr_reduction_80211h; + bool has_80211h_pwr = false, has_cisco_pwr = false; + int chan_pwr = 0, pwr_reduction_80211h = 0; + int pwr_level_cisco, pwr_level_80211h; int new_ap_level; if (country_ie && pwr_constr_ie && @@ -1246,16 +1262,35 @@ static u32 ieee80211_handle_pwr_constr(struct ieee80211_sub_if_data *sdata, has_80211h_pwr = ieee80211_find_80211h_pwr_constr( sdata, channel, country_ie, country_ie_len, pwr_constr_ie, &chan_pwr, &pwr_reduction_80211h); - new_ap_level = max_t(int, 0, chan_pwr - pwr_reduction_80211h); + pwr_level_80211h = + max_t(int, 0, chan_pwr - pwr_reduction_80211h); + } + + if (cisco_dtpc_ie) { + ieee80211_find_cisco_dtpc( + sdata, channel, cisco_dtpc_ie, &pwr_level_cisco); + has_cisco_pwr = true; } - if (!has_80211h_pwr) + if (!has_80211h_pwr && !has_cisco_pwr) return 0; - sdata_info(sdata, - "Limiting TX power to %d (%d - %d) dBm as advertised by %pM\n", - new_ap_level, chan_pwr, pwr_reduction_80211h, - sdata->u.mgd.bssid); + /* If we have both 802.11h and Cisco DTPC, apply both limits + * by picking the smallest of the two power levels advertised. + */ + if (has_80211h_pwr && + (!has_cisco_pwr || pwr_level_80211h <= pwr_level_cisco)) { + sdata_info(sdata, + "Limiting TX power to %d (%d - %d) dBm as advertised by %pM\n", + pwr_level_80211h, chan_pwr, pwr_reduction_80211h, + sdata->u.mgd.bssid); + new_ap_level = pwr_level_80211h; + } else { /* has_cisco_pwr is always true here. */ + sdata_info(sdata, + "Limiting TX power to %d dBm as advertised by %pM\n", + pwr_level_cisco, sdata->u.mgd.bssid); + new_ap_level = pwr_level_cisco; + } if (sdata->ap_power_level == new_ap_level) return 0; @@ -2923,7 +2958,9 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata, /* * This is the canonical list of information elements we care about, * the filter code also gives us all changes to the Microsoft OUI - * (00:50:F2) vendor IE which is used for WMM which we need to track. + * (00:50:F2) vendor IE which is used for WMM which we need to track, + * as well as the DTPC IE (part of the Cisco OUI) used for signaling + * changes to requested client power. * * We implement beacon filtering in software since that means we can * avoid processing the frame here and in cfg80211, and userspace @@ -3232,7 +3269,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, changed |= ieee80211_handle_pwr_constr(sdata, chan, mgmt, elems.country_elem, elems.country_elem_len, - elems.pwr_constr_elem); + elems.pwr_constr_elem, + elems.cisco_dtpc_elem); ieee80211_bss_info_change_notify(sdata, changed); } diff --git a/net/mac80211/util.c b/net/mac80211/util.c index b444f27a788e..3c61060a4d2b 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1015,6 +1015,31 @@ u32 ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, } elems->pwr_constr_elem = pos; break; + case WLAN_EID_CISCO_VENDOR_SPECIFIC: + /* Lots of different options exist, but we only care + * about the Dynamic Transmit Power Control element. + * First check for the Cisco OUI, then for the DTPC + * tag (0x00). + */ + if (elen < 4) { + elem_parse_failed = true; + break; + } + + if (pos[0] != 0x00 || pos[1] != 0x40 || + pos[2] != 0x96 || pos[3] != 0x00) + break; + + if (elen != 6) { + elem_parse_failed = true; + break; + } + + if (calc_crc) + crc = crc32_be(crc, pos - 2, elen + 2); + + elems->cisco_dtpc_elem = pos; + break; case WLAN_EID_TIMEOUT_INTERVAL: if (elen >= sizeof(struct ieee80211_timeout_interval_ie)) elems->timeout_int = (void *)pos; -- cgit v1.2.3 From 4930d247af29f849cd1bddd65be2400684dc886e Mon Sep 17 00:00:00 2001 From: Gaël PORTAY Date: Sat, 6 Sep 2014 19:52:35 +0200 Subject: ARM: at91/tclib: move initialization from alloc to probe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move resource retrieval from atmel_tc_alloc to tc_probe to avoid lately reporting resource related issues when a TC block user request a TC block. Moreover, resources retrieval are usually done in the probe function, thus moving them add some consistency with other drivers. Initialization is done once, ie not every time a tc block is requested. If it fails, the device is not appended to the list of tc blocks. Furhermore, the device id is retrieved at probe as well, avoiding parsing DT every time the user requests of tc block. Signed-off-by: Gaël PORTAY Acked-by: Thierry Reding Acked-by: Boris Brezillon Signed-off-by: Nicolas Ferre --- drivers/clocksource/tcb_clksrc.c | 2 +- drivers/misc/atmel_tclib.c | 71 +++++++++++++--------------------------- drivers/pwm/pwm-atmel-tcb.c | 2 +- include/linux/atmel_tc.h | 8 +++-- 4 files changed, 29 insertions(+), 54 deletions(-) (limited to 'include/linux') diff --git a/drivers/clocksource/tcb_clksrc.c b/drivers/clocksource/tcb_clksrc.c index a8d7ea14f183..f922e81d531b 100644 --- a/drivers/clocksource/tcb_clksrc.c +++ b/drivers/clocksource/tcb_clksrc.c @@ -279,7 +279,7 @@ static int __init tcb_clksrc_init(void) int i; int ret; - tc = atmel_tc_alloc(CONFIG_ATMEL_TCB_CLKSRC_BLOCK, clksrc.name); + tc = atmel_tc_alloc(CONFIG_ATMEL_TCB_CLKSRC_BLOCK); if (!tc) { pr_debug("can't alloc TC for clocksource\n"); return -ENODEV; diff --git a/drivers/misc/atmel_tclib.c b/drivers/misc/atmel_tclib.c index b514a2d4485b..d505d1e0857b 100644 --- a/drivers/misc/atmel_tclib.c +++ b/drivers/misc/atmel_tclib.c @@ -35,60 +35,31 @@ static LIST_HEAD(tc_list); /** * atmel_tc_alloc - allocate a specified TC block * @block: which block to allocate - * @name: name to be associated with the iomem resource * * Caller allocates a block. If it is available, a pointer to a * pre-initialized struct atmel_tc is returned. The caller can access * the registers directly through the "regs" field. */ -struct atmel_tc *atmel_tc_alloc(unsigned block, const char *name) +struct atmel_tc *atmel_tc_alloc(unsigned block) { struct atmel_tc *tc; struct platform_device *pdev = NULL; - struct resource *r; - size_t size; spin_lock(&tc_list_lock); list_for_each_entry(tc, &tc_list, node) { - if (tc->pdev->dev.of_node) { - if (of_alias_get_id(tc->pdev->dev.of_node, "tcb") - == block) { - pdev = tc->pdev; - break; - } - } else if (tc->pdev->id == block) { + if (tc->allocated) + continue; + + if ((tc->pdev->dev.of_node && tc->id == block) || + (tc->pdev->id == block)) { pdev = tc->pdev; + tc->allocated = true; break; } } - - if (!pdev || tc->iomem) - goto fail; - - r = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!r) - goto fail; - - size = resource_size(r); - r = request_mem_region(r->start, size, name); - if (!r) - goto fail; - - tc->regs = ioremap(r->start, size); - if (!tc->regs) - goto fail_ioremap; - - tc->iomem = r; - -out: spin_unlock(&tc_list_lock); - return tc; -fail_ioremap: - release_mem_region(r->start, size); -fail: - tc = NULL; - goto out; + return pdev ? tc : NULL; } EXPORT_SYMBOL_GPL(atmel_tc_alloc); @@ -96,19 +67,14 @@ EXPORT_SYMBOL_GPL(atmel_tc_alloc); * atmel_tc_free - release a specified TC block * @tc: Timer/counter block that was returned by atmel_tc_alloc() * - * This reverses the effect of atmel_tc_alloc(), unmapping the I/O - * registers, invalidating the resource returned by that routine and - * making the TC available to other drivers. + * This reverses the effect of atmel_tc_alloc(), invalidating the resource + * returned by that routine and making the TC available to other drivers. */ void atmel_tc_free(struct atmel_tc *tc) { spin_lock(&tc_list_lock); - if (tc->regs) { - iounmap(tc->regs); - release_mem_region(tc->iomem->start, resource_size(tc->iomem)); - tc->regs = NULL; - tc->iomem = NULL; - } + if (tc->allocated) + tc->allocated = false; spin_unlock(&tc_list_lock); } EXPORT_SYMBOL_GPL(atmel_tc_free); @@ -142,9 +108,7 @@ static int __init tc_probe(struct platform_device *pdev) struct atmel_tc *tc; struct clk *clk; int irq; - - if (!platform_get_resource(pdev, IORESOURCE_MEM, 0)) - return -EINVAL; + struct resource *r; irq = platform_get_irq(pdev, 0); if (irq < 0) @@ -160,12 +124,21 @@ static int __init tc_probe(struct platform_device *pdev) if (IS_ERR(clk)) return PTR_ERR(clk); + r = platform_get_resource(pdev, IORESOURCE_MEM, 0); + tc->regs = devm_ioremap_resource(&pdev->dev, r); + if (IS_ERR(tc->regs)) + return PTR_ERR(tc->regs); + /* Now take SoC information if available */ if (pdev->dev.of_node) { const struct of_device_id *match; match = of_match_node(atmel_tcb_dt_ids, pdev->dev.of_node); if (match) tc->tcb_config = match->data; + + tc->id = of_alias_get_id(tc->pdev->dev.of_node, "tcb"); + } else { + tc->id = pdev->id; } tc->clk[0] = clk; diff --git a/drivers/pwm/pwm-atmel-tcb.c b/drivers/pwm/pwm-atmel-tcb.c index f3dcd02390f1..d56e5b717431 100644 --- a/drivers/pwm/pwm-atmel-tcb.c +++ b/drivers/pwm/pwm-atmel-tcb.c @@ -379,7 +379,7 @@ static int atmel_tcb_pwm_probe(struct platform_device *pdev) return err; } - tc = atmel_tc_alloc(tcblock, "tcb-pwm"); + tc = atmel_tc_alloc(tcblock); if (tc == NULL) { dev_err(&pdev->dev, "failed to allocate Timer Counter Block\n"); return -ENOMEM; diff --git a/include/linux/atmel_tc.h b/include/linux/atmel_tc.h index 89a931babecf..d8aa88461c64 100644 --- a/include/linux/atmel_tc.h +++ b/include/linux/atmel_tc.h @@ -44,12 +44,13 @@ struct atmel_tcb_config { /** * struct atmel_tc - information about a Timer/Counter Block * @pdev: physical device - * @iomem: resource associated with the I/O register * @regs: mapping through which the I/O registers can be accessed + * @id: block id * @tcb_config: configuration data from SoC * @irq: irq for each of the three channels * @clk: internal clock source for each of the three channels * @node: list node, for tclib internal use + * @allocated: if already used, for tclib internal use * * On some platforms, each TC channel has its own clocks and IRQs, * while on others, all TC channels share the same clock and IRQ. @@ -61,15 +62,16 @@ struct atmel_tcb_config { */ struct atmel_tc { struct platform_device *pdev; - struct resource *iomem; void __iomem *regs; + int id; const struct atmel_tcb_config *tcb_config; int irq[3]; struct clk *clk[3]; struct list_head node; + bool allocated; }; -extern struct atmel_tc *atmel_tc_alloc(unsigned block, const char *name); +extern struct atmel_tc *atmel_tc_alloc(unsigned block); extern void atmel_tc_free(struct atmel_tc *tc); /* platform-specific ATMEL_TC_TIMER_CLOCKx divisors (0 means 32KiHz) */ -- cgit v1.2.3 From 84f462371cc07272a17e2ae96c3540f795db273a Mon Sep 17 00:00:00 2001 From: Gaël PORTAY Date: Sat, 6 Sep 2014 19:52:36 +0200 Subject: ARM: at91/tclib: mask interruptions at shutdown and probe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Shutdown properly the timer counter block by masking interruptions. Otherwise, a segmentation may happen when kexec-ing a new kernel (see backtrace below). An interruption may happen before the handler is set, leading to a kernel segmentation fault. Furthermore, we make sure the interruptions are masked when the driver is initialized. This will prevent freshly kexec-ed kernel from crashing when launched from a kernel which does not properly mask interruptions at shutdown. The backtrace below happened after kexec-ing a new kernel, from a kernel that did not shut down properly leaving interruptions unmasked. Unable to handle kernel NULL pointer dereference at virtual address 00000000 pgd = c0004000 [00000000] *pgd=00000000 Internal error: Oops: 80000005 [#1] ARM Modules linked in: CPU: 0 PID: 1 Comm: swapper Not tainted 3.16.0+ #144 task: c1828aa0 ti: c182a000 task.ti: c182a000 PC is at 0x0 LR is at ch2_irq+0x28/0x30 pc : [<00000000>] lr : [] psr: 000000d3 sp : c182bd38 ip : c182bd48 fp : c182bd44 r10: c0373390 r9 : c1825b00 r8 : 60000053 r7 : 00000000 r6 : 00000000 r5 : 00000013 r4 : c036e800 r3 : 00000000 r2 : 00002004 r1 : c036e760 r0 : c036e760 Flags: nzcv IRQs off FIQs off Mode SVC_32 ISA ARM Segment kernel Control: 0005317f Table: 20004000 DAC: 00000017 Process swapper (pid: 1, stack limit = 0xc182a1c0) Stack: (0xc182bd38 to 0xc182c000) bd20: c182bd7c c182bd48 bd40: c0045430 c01db8ec 00000000 c18c6f40 c182bd74 c1825b00 c035cec4 00000000 bd60: c182be2c 60000053 c1825b34 00000000 c182bd94 c182bd80 c0045570 c0045408 bd80: 00000000 c1825b00 c182bdac c182bd98 c0047f34 c0045550 00000013 c036619c bda0: c182bdc4 c182bdb0 c0044da4 c0047e98 0000007f 00000013 c182bde4 c182bdc8 bdc0: c0009e34 c0044d8c fefff000 c0046728 60000053 ffffffff c182bdf4 c182bde8 bde0: c00086a8 c0009ddc c182be74 c182bdf8 c000cb80 c0008674 00000000 00000013 be00: 00000000 00014200 c1825b00 c036e800 00000013 c035ed98 60000053 c1825b34 be20: 00000000 c182be74 c182be20 c182be40 c0047994 c0046728 60000053 ffffffff be40: 00000013 c036e800 c182be64 c1825b00 00000013 c036e800 c035ed98 c03874bc be60: 00000004 c036e700 c182be94 c182be78 c004689c c0046398 c036e760 c18c6080 be80: 00000000 c035ed10 c182bedc c182be98 c0348b08 c004684c 0000000c c034dac8 bea0: 004c4b3f c028c338 c036e760 00000013 c014ecc8 c18e67e0 c035b9c0 c0348884 bec0: c035b9c0 c182a020 00000000 00000000 c182bf54 c182bee0 c00089fc c0348894 bee0: c00da51c c1ffcc78 c182bf0c c182bef8 c002d100 c002d09c c1ffcc78 00000000 bf00: c182bf54 c182bf10 c002d308 c0336570 c182bf3c c0334e44 00000003 00000003 bf20: 00000030 c0334b44 c0044d74 00000003 00000003 c034dac8 c0350a94 c0373440 bf40: c0373440 00000030 c182bf94 c182bf58 c0336d24 c000890c 00000003 00000003 bf60: c0336560 c182bf64 c182bf64 6e616e0d 00000000 c0272fc8 00000000 00000000 bf80: 00000000 00000000 c182bfac c182bf98 c0272fd8 c0336bd8 c182a000 00000000 bfa0: 00000000 c182bfb0 c00095d0 c0272fd8 00000000 00000000 00000000 00000000 bfc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 bfe0: 00000000 00000000 00000000 00000000 00000013 00000000 374d27cd 33cc33e4 Backtrace: [] (ch2_irq) from [] (handle_irq_event_percpu+0x38/0x148) [] (handle_irq_event_percpu) from [] (handle_irq_event+0x30/0x40) r10:00000000 r9:c1825b34 r8:60000053 r7:c182be2c r6:00000000 r5:c035cec4 r4:c1825b00 [] (handle_irq_event) from [] (handle_fasteoi_irq+0xac/0x11c) r4:c1825b00 r3:00000000 [] (handle_fasteoi_irq) from [] (generic_handle_irq+0x28/0x38) r5:c036619c r4:00000013 [] (generic_handle_irq) from [] (handle_IRQ+0x68/0x88) r4:00000013 r3:0000007f [] (handle_IRQ) from [] (at91_aic_handle_irq+0x44/0x4c) r6:ffffffff r5:60000053 r4:c0046728 r3:fefff000 [] (at91_aic_handle_irq) from [] (__irq_svc+0x40/0x4c) Exception stack(0xc182bdf8 to 0xc182be40) bde0: 00000000 00000013 be00: 00000000 00014200 c1825b00 c036e800 00000013 c035ed98 60000053 c1825b34 be20: 00000000 c182be74 c182be20 c182be40 c0047994 c0046728 60000053 ffffffff [] (__setup_irq) from [] (setup_irq+0x60/0x8c) r10:c036e700 r9:00000004 r8:c03874bc r7:c035ed98 r6:c036e800 r5:00000013 r4:c1825b00 [] (setup_irq) from [] (tcb_clksrc_init+0x284/0x31c) r6:c035ed10 r5:00000000 r4:c18c6080 r3:c036e760 [] (tcb_clksrc_init) from [] (do_one_initcall+0x100/0x1b4) r10:00000000 r9:00000000 r8:c182a020 r7:c035b9c0 r6:c0348884 r5:c035b9c0 r4:c18e67e0 [] (do_one_initcall) from [] (kernel_init_freeable+0x15c/0x224) r9:00000030 r8:c0373440 r7:c0373440 r6:c0350a94 r5:c034dac8 r4:00000003 [] (kernel_init_freeable) from [] (kernel_init+0x10/0xec) r9:00000000 r8:00000000 r7:00000000 r6:00000000 r5:c0272fc8 r4:00000000 [] (kernel_init) from [] (ret_from_fork+0x14/0x24) r4:00000000 r3:c182a000 Code: bad PC value ---[ end trace 5b30f0017e282e47 ]--- Kernel panic - not syncing: Fatal exception in interrupt Signed-off-by: Gaël PORTAY Acked-by: Boris Brezillon Acked-by: Daniel Lezcano Signed-off-by: Nicolas Ferre --- drivers/misc/atmel_tclib.c | 16 ++++++++++++++++ include/linux/atmel_tc.h | 5 +++++ 2 files changed, 21 insertions(+) (limited to 'include/linux') diff --git a/drivers/misc/atmel_tclib.c b/drivers/misc/atmel_tclib.c index d505d1e0857b..0ca05c3ec8d6 100644 --- a/drivers/misc/atmel_tclib.c +++ b/drivers/misc/atmel_tclib.c @@ -109,6 +109,7 @@ static int __init tc_probe(struct platform_device *pdev) struct clk *clk; int irq; struct resource *r; + unsigned int i; irq = platform_get_irq(pdev, 0); if (irq < 0) @@ -157,18 +158,33 @@ static int __init tc_probe(struct platform_device *pdev) if (tc->irq[2] < 0) tc->irq[2] = irq; + for (i = 0; i < 3; i++) + writel(ATMEL_TC_ALL_IRQ, tc->regs + ATMEL_TC_REG(i, IDR)); + spin_lock(&tc_list_lock); list_add_tail(&tc->node, &tc_list); spin_unlock(&tc_list_lock); + platform_set_drvdata(pdev, tc); + return 0; } +static void tc_shutdown(struct platform_device *pdev) +{ + int i; + struct atmel_tc *tc = platform_get_drvdata(pdev); + + for (i = 0; i < 3; i++) + writel(ATMEL_TC_ALL_IRQ, tc->regs + ATMEL_TC_REG(i, IDR)); +} + static struct platform_driver tc_driver = { .driver = { .name = "atmel_tcb", .of_match_table = of_match_ptr(atmel_tcb_dt_ids), }, + .shutdown = tc_shutdown, }; static int __init tc_init(void) diff --git a/include/linux/atmel_tc.h b/include/linux/atmel_tc.h index d8aa88461c64..b87c1c7c242a 100644 --- a/include/linux/atmel_tc.h +++ b/include/linux/atmel_tc.h @@ -260,5 +260,10 @@ extern const u8 atmel_tc_divisors[5]; #define ATMEL_TC_LDRAS (1 << 5) /* RA loading */ #define ATMEL_TC_LDRBS (1 << 6) /* RB loading */ #define ATMEL_TC_ETRGS (1 << 7) /* external trigger */ +#define ATMEL_TC_ALL_IRQ (ATMEL_TC_COVFS | ATMEL_TC_LOVRS | \ + ATMEL_TC_CPAS | ATMEL_TC_CPBS | \ + ATMEL_TC_CPCS | ATMEL_TC_LDRAS | \ + ATMEL_TC_LDRBS | ATMEL_TC_ETRGS) \ + /* all IRQs */ #endif -- cgit v1.2.3 From ff9ea323816dc1c8ac7144afd4eab3ac97704430 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 8 Sep 2014 08:03:56 +0900 Subject: block, bdi: an active gendisk always has a request_queue associated with it bdev_get_queue() returns the request_queue associated with the specified block_device. blk_get_backing_dev_info() makes use of bdev_get_queue() to determine the associated bdi given a block_device. All the callers of bdev_get_queue() including blk_get_backing_dev_info() assume that bdev_get_queue() may return NULL and implement NULL handling; however, bdev_get_queue() requires the passed in block_device is opened and attached to its gendisk. Because an active gendisk always has a valid request_queue associated with it, bdev_get_queue() can never return NULL and neither can blk_get_backing_dev_info(). Make it clear that neither of the two functions can return NULL and remove NULL handling from all the callers. Signed-off-by: Tejun Heo Cc: Chris Mason Cc: Dave Chinner Signed-off-by: Jens Axboe --- block/blk-core.c | 10 +++------- block/compat_ioctl.c | 4 ---- block/ioctl.c | 4 ---- fs/block_dev.c | 2 -- fs/btrfs/disk-io.c | 2 +- fs/xfs/xfs_buf.c | 2 -- include/linux/blkdev.h | 2 +- 7 files changed, 5 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/block/blk-core.c b/block/blk-core.c index 93603e6ff479..817446175489 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -83,18 +83,14 @@ void blk_queue_congestion_threshold(struct request_queue *q) * @bdev: device * * Locates the passed device's request queue and returns the address of its - * backing_dev_info - * - * Will return NULL if the request queue cannot be located. + * backing_dev_info. This function can only be called if @bdev is opened + * and the return value is never NULL. */ struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev) { - struct backing_dev_info *ret = NULL; struct request_queue *q = bdev_get_queue(bdev); - if (q) - ret = &q->backing_dev_info; - return ret; + return &q->backing_dev_info; } EXPORT_SYMBOL(blk_get_backing_dev_info); diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c index 18b282ce361e..f678c733df40 100644 --- a/block/compat_ioctl.c +++ b/block/compat_ioctl.c @@ -709,8 +709,6 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) if (!arg) return -EINVAL; bdi = blk_get_backing_dev_info(bdev); - if (bdi == NULL) - return -ENOTTY; return compat_put_long(arg, (bdi->ra_pages * PAGE_CACHE_SIZE) / 512); case BLKROGET: /* compatible */ @@ -731,8 +729,6 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) if (!capable(CAP_SYS_ADMIN)) return -EACCES; bdi = blk_get_backing_dev_info(bdev); - if (bdi == NULL) - return -ENOTTY; bdi->ra_pages = (arg * 512) / PAGE_CACHE_SIZE; return 0; case BLKGETSIZE: diff --git a/block/ioctl.c b/block/ioctl.c index d6cda8147c91..6c7bf903742f 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -356,8 +356,6 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, if (!arg) return -EINVAL; bdi = blk_get_backing_dev_info(bdev); - if (bdi == NULL) - return -ENOTTY; return put_long(arg, (bdi->ra_pages * PAGE_CACHE_SIZE) / 512); case BLKROGET: return put_int(arg, bdev_read_only(bdev) != 0); @@ -386,8 +384,6 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, if(!capable(CAP_SYS_ADMIN)) return -EACCES; bdi = blk_get_backing_dev_info(bdev); - if (bdi == NULL) - return -ENOTTY; bdi->ra_pages = (arg * 512) / PAGE_CACHE_SIZE; return 0; case BLKBSZSET: diff --git a/fs/block_dev.c b/fs/block_dev.c index 6d7274619bf9..d3251eca6429 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1173,8 +1173,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) if (!ret) { bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); bdi = blk_get_backing_dev_info(bdev); - if (bdi == NULL) - bdi = &default_backing_dev_info; bdev_inode_switch_bdi(bdev->bd_inode, bdi); } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index d0ed9e664f7d..39ff591ae1b4 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1694,7 +1694,7 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits) if (!device->bdev) continue; bdi = blk_get_backing_dev_info(device->bdev); - if (bdi && bdi_congested(bdi, bdi_bits)) { + if (bdi_congested(bdi, bdi_bits)) { ret = 1; break; } diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index cd7b8ca9b064..497fcde381d7 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1678,8 +1678,6 @@ xfs_alloc_buftarg( btp->bt_dev = bdev->bd_dev; btp->bt_bdev = bdev; btp->bt_bdi = blk_get_backing_dev_info(bdev); - if (!btp->bt_bdi) - goto error; if (xfs_setsize_buftarg_early(btp, bdev)) goto error; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 518b46555b80..e267bf0db559 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -865,7 +865,7 @@ extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, static inline struct request_queue *bdev_get_queue(struct block_device *bdev) { - return bdev->bd_disk->queue; + return bdev->bd_disk->queue; /* this is never NULL */ } /* -- cgit v1.2.3 From e36f1dfce0b45d347927568efe1088821758cc3c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 8 Sep 2014 08:03:57 +0900 Subject: bdi: remove unused stuff Two flags and one bdi_writeback field are no longer used. Remove them. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/backing-dev.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index e488e9459a93..2103a7fa3fd8 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -28,12 +28,10 @@ struct dentry; * Bits in backing_dev_info.state */ enum bdi_state { - BDI_wb_alloc, /* Default embedded wb allocated */ BDI_async_congested, /* The async (write) queue is getting full */ BDI_sync_congested, /* The sync queue is getting full */ BDI_registered, /* bdi_register() was done */ BDI_writeback_running, /* Writeback is in progress */ - BDI_unused, /* Available bits start here */ }; typedef int (congested_fn)(void *, int); @@ -50,7 +48,6 @@ enum bdi_stat_item { struct bdi_writeback { struct backing_dev_info *bdi; /* our parent bdi */ - unsigned int nr; unsigned long last_old_flush; /* last old data flush */ -- cgit v1.2.3 From 018a17bdc8658ad448497c84d4ba21b6985820ec Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 8 Sep 2014 08:04:01 +0900 Subject: bdi: reimplement bdev_inode_switch_bdi() A block_device may be attached to different gendisks and thus different bdis over time. bdev_inode_switch_bdi() is used to switch the associated bdi. The function assumes that the inode could be dirty and transfers it between bdis if so. This is a bit nasty in that it reaches into bdi internals. This patch reimplements the function so that it writes out the inode if dirty. This is a lot simpler and can be implemented without exposing bdi internals. Signed-off-by: Tejun Heo Cc: Alexander Viro Signed-off-by: Jens Axboe --- fs/block_dev.c | 32 +++++++++++--------------------- include/linux/backing-dev.h | 1 - mm/backing-dev.c | 2 +- 3 files changed, 12 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/fs/block_dev.c b/fs/block_dev.c index d3251eca6429..cc8d68ac29aa 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -50,32 +50,22 @@ inline struct block_device *I_BDEV(struct inode *inode) EXPORT_SYMBOL(I_BDEV); /* - * Move the inode from its current bdi to a new bdi. If the inode is dirty we - * need to move it onto the dirty list of @dst so that the inode is always on - * the right list. + * Move the inode from its current bdi to a new bdi. Make sure the inode + * is clean before moving so that it doesn't linger on the old bdi. */ static void bdev_inode_switch_bdi(struct inode *inode, struct backing_dev_info *dst) { - struct backing_dev_info *old = inode->i_data.backing_dev_info; - bool wakeup_bdi = false; - - if (unlikely(dst == old)) /* deadlock avoidance */ - return; - bdi_lock_two(&old->wb, &dst->wb); - spin_lock(&inode->i_lock); - inode->i_data.backing_dev_info = dst; - if (inode->i_state & I_DIRTY) { - if (bdi_cap_writeback_dirty(dst) && !wb_has_dirty_io(&dst->wb)) - wakeup_bdi = true; - list_move(&inode->i_wb_list, &dst->wb.b_dirty); + while (true) { + spin_lock(&inode->i_lock); + if (!(inode->i_state & I_DIRTY)) { + inode->i_data.backing_dev_info = dst; + spin_unlock(&inode->i_lock); + return; + } + spin_unlock(&inode->i_lock); + WARN_ON_ONCE(write_inode_now(inode, true)); } - spin_unlock(&inode->i_lock); - spin_unlock(&old->wb.list_lock); - spin_unlock(&dst->wb.list_lock); - - if (wakeup_bdi) - bdi_wakeup_thread_delayed(dst); } /* Kill _all_ buffers and pagecache , dirty or not.. */ diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 2103a7fa3fd8..5da6012b7a14 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -121,7 +121,6 @@ void bdi_start_background_writeback(struct backing_dev_info *bdi); void bdi_writeback_workfn(struct work_struct *work); int bdi_has_dirty_io(struct backing_dev_info *bdi); void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi); -void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2); extern spinlock_t bdi_lock; extern struct list_head bdi_list; diff --git a/mm/backing-dev.c b/mm/backing-dev.c index b65fe93ad612..7d63d5e9d3de 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -40,7 +40,7 @@ LIST_HEAD(bdi_list); /* bdi_wq serves all asynchronous writeback tasks */ struct workqueue_struct *bdi_wq; -void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2) +static void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2) { if (wb1 < wb2) { spin_lock(&wb1->list_lock); -- cgit v1.2.3 From e676253b19b2d269cccf67fdb1592120a0cd0676 Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Delgado Date: Tue, 5 Aug 2014 11:45:59 +0200 Subject: serial/8250: Add support for RS485 IOCTLs This patch allow the users of the 8250 infrastructure to define a handler for RS485 configration. If no handler is defined the 8250 driver will work as usual. Signed-off-by: Ricardo Ribalda Delgado Acked-by: Alan Cox -- v2:Change suggested by Alan "One Thousand Gnomes": - Move rs485 structure further down on the uart_8250_port structure drivers/tty/serial/8250/8250_core.c | 39 +++++++++++++++++++++++++++++++++++++ include/linux/serial_8250.h | 3 +++ 2 files changed, 42 insertions(+) Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_core.c | 39 +++++++++++++++++++++++++++++++++++++ include/linux/serial_8250.h | 3 +++ 2 files changed, 42 insertions(+) (limited to 'include/linux') diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c index 1d42dba6121d..b28ed1beb50f 100644 --- a/drivers/tty/serial/8250/8250_core.c +++ b/drivers/tty/serial/8250/8250_core.c @@ -2843,6 +2843,42 @@ serial8250_verify_port(struct uart_port *port, struct serial_struct *ser) return 0; } +static int serial8250_ioctl(struct uart_port *port, unsigned int cmd, + unsigned long arg) +{ + struct uart_8250_port *up = + container_of(port, struct uart_8250_port, port); + int ret; + struct serial_rs485 rs485_config; + + if (!up->rs485_config) + return -ENOIOCTLCMD; + + switch (cmd) { + case TIOCSRS485: + if (copy_from_user(&rs485_config, (void __user *)arg, + sizeof(rs485_config))) + return -EFAULT; + + ret = up->rs485_config(up, &rs485_config); + if (ret) + return ret; + + memcpy(&up->rs485, &rs485_config, sizeof(rs485_config)); + + return 0; + case TIOCGRS485: + if (copy_to_user((void __user *)arg, &up->rs485, + sizeof(up->rs485))) + return -EFAULT; + return 0; + default: + break; + } + + return -ENOIOCTLCMD; +} + static const char * serial8250_type(struct uart_port *port) { @@ -2872,6 +2908,7 @@ static struct uart_ops serial8250_pops = { .request_port = serial8250_request_port, .config_port = serial8250_config_port, .verify_port = serial8250_verify_port, + .ioctl = serial8250_ioctl, #ifdef CONFIG_CONSOLE_POLL .poll_get_char = serial8250_get_poll_char, .poll_put_char = serial8250_put_poll_char, @@ -3388,6 +3425,8 @@ int serial8250_register_8250_port(struct uart_8250_port *up) uart->port.fifosize = up->port.fifosize; uart->tx_loadsz = up->tx_loadsz; uart->capabilities = up->capabilities; + uart->rs485_config = up->rs485_config; + uart->rs485 = up->rs485; /* Take tx_loadsz from fifosize if it wasn't set separately */ if (uart->port.fifosize && !uart->tx_loadsz) diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index f93649e22c43..6dd671765312 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -96,10 +96,13 @@ struct uart_8250_port { unsigned char msr_saved_flags; struct uart_8250_dma *dma; + struct serial_rs485 rs485; /* 8250 specific callbacks */ int (*dl_read)(struct uart_8250_port *); void (*dl_write)(struct uart_8250_port *, int); + int (*rs485_config)(struct uart_8250_port *, + struct serial_rs485 *rs485); }; static inline struct uart_8250_port *up_to_u8250p(struct uart_port *up) -- cgit v1.2.3 From 55e15c949fd05d247a889df0ed0177a676fec665 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 3 Sep 2014 12:52:17 +0200 Subject: PM / domains: Remove the pm_genpd_add|remove_callbacks APIs There are no users of these APIs. To simplify the generic power domain let's remove them. Signed-off-by: Ulf Hansson Reviewed-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 106 -------------------------------------------- include/linux/pm_domain.h | 19 -------- 2 files changed, 125 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index eee55c1e5fde..e613e3cb96d8 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -1743,112 +1743,6 @@ int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, return ret; } -/** - * pm_genpd_add_callbacks - Add PM domain callbacks to a given device. - * @dev: Device to add the callbacks to. - * @ops: Set of callbacks to add. - * @td: Timing data to add to the device along with the callbacks (optional). - * - * Every call to this routine should be balanced with a call to - * __pm_genpd_remove_callbacks() and they must not be nested. - */ -int pm_genpd_add_callbacks(struct device *dev, struct gpd_dev_ops *ops, - struct gpd_timing_data *td) -{ - struct generic_pm_domain_data *gpd_data_new, *gpd_data = NULL; - int ret = 0; - - if (!(dev && ops)) - return -EINVAL; - - gpd_data_new = __pm_genpd_alloc_dev_data(dev); - if (!gpd_data_new) - return -ENOMEM; - - pm_runtime_disable(dev); - device_pm_lock(); - - ret = dev_pm_get_subsys_data(dev); - if (ret) - goto out; - - spin_lock_irq(&dev->power.lock); - - if (dev->power.subsys_data->domain_data) { - gpd_data = to_gpd_data(dev->power.subsys_data->domain_data); - } else { - gpd_data = gpd_data_new; - dev->power.subsys_data->domain_data = &gpd_data->base; - } - gpd_data->refcount++; - gpd_data->ops = *ops; - if (td) - gpd_data->td = *td; - - spin_unlock_irq(&dev->power.lock); - - out: - device_pm_unlock(); - pm_runtime_enable(dev); - - if (gpd_data != gpd_data_new) - __pm_genpd_free_dev_data(dev, gpd_data_new); - - return ret; -} -EXPORT_SYMBOL_GPL(pm_genpd_add_callbacks); - -/** - * __pm_genpd_remove_callbacks - Remove PM domain callbacks from a given device. - * @dev: Device to remove the callbacks from. - * @clear_td: If set, clear the device's timing data too. - * - * This routine can only be called after pm_genpd_add_callbacks(). - */ -int __pm_genpd_remove_callbacks(struct device *dev, bool clear_td) -{ - struct generic_pm_domain_data *gpd_data = NULL; - bool remove = false; - int ret = 0; - - if (!(dev && dev->power.subsys_data)) - return -EINVAL; - - pm_runtime_disable(dev); - device_pm_lock(); - - spin_lock_irq(&dev->power.lock); - - if (dev->power.subsys_data->domain_data) { - gpd_data = to_gpd_data(dev->power.subsys_data->domain_data); - gpd_data->ops = (struct gpd_dev_ops){ NULL }; - if (clear_td) - gpd_data->td = (struct gpd_timing_data){ 0 }; - - if (--gpd_data->refcount == 0) { - dev->power.subsys_data->domain_data = NULL; - remove = true; - } - } else { - ret = -EINVAL; - } - - spin_unlock_irq(&dev->power.lock); - - device_pm_unlock(); - pm_runtime_enable(dev); - - if (ret) - return ret; - - dev_pm_put_subsys_data(dev); - if (remove) - __pm_genpd_free_dev_data(dev, gpd_data); - - return 0; -} -EXPORT_SYMBOL_GPL(__pm_genpd_remove_callbacks); - /** * pm_genpd_attach_cpuidle - Connect the given PM domain with cpuidle. * @genpd: PM domain to be connected with cpuidle. diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index ebc4c76ffb73..9ae2b9e88d61 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -151,10 +151,6 @@ extern int pm_genpd_add_subdomain_names(const char *master_name, const char *subdomain_name); extern int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, struct generic_pm_domain *target); -extern int pm_genpd_add_callbacks(struct device *dev, - struct gpd_dev_ops *ops, - struct gpd_timing_data *td); -extern int __pm_genpd_remove_callbacks(struct device *dev, bool clear_td); extern int pm_genpd_attach_cpuidle(struct generic_pm_domain *genpd, int state); extern int pm_genpd_name_attach_cpuidle(const char *name, int state); extern int pm_genpd_detach_cpuidle(struct generic_pm_domain *genpd); @@ -217,16 +213,6 @@ static inline int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, { return -ENOSYS; } -static inline int pm_genpd_add_callbacks(struct device *dev, - struct gpd_dev_ops *ops, - struct gpd_timing_data *td) -{ - return -ENOSYS; -} -static inline int __pm_genpd_remove_callbacks(struct device *dev, bool clear_td) -{ - return -ENOSYS; -} static inline int pm_genpd_attach_cpuidle(struct generic_pm_domain *genpd, int st) { return -ENOSYS; @@ -281,11 +267,6 @@ static inline int pm_genpd_name_add_device(const char *domain_name, return __pm_genpd_name_add_device(domain_name, dev, NULL); } -static inline int pm_genpd_remove_callbacks(struct device *dev) -{ - return __pm_genpd_remove_callbacks(dev, true); -} - #ifdef CONFIG_PM_GENERIC_DOMAINS_RUNTIME extern void genpd_queue_power_off_work(struct generic_pm_domain *genpd); extern void pm_genpd_poweroff_unused(void); -- cgit v1.2.3 From 67da6d4bf43c4208433ef8f3ee487401b4dc9c74 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 3 Sep 2014 12:52:18 +0200 Subject: PM / domains: Ignore callbacks for subsys generic_pm_domain_data In a step of simplifying the generic power domain let's move away from using these callbacks. Signed-off-by: Ulf Hansson Reviewed-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 131 +++----------------------------------------- include/linux/pm_domain.h | 1 - 2 files changed, 8 insertions(+), 124 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index e613e3cb96d8..aa5b14c1e643 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -25,10 +25,6 @@ __routine = genpd->dev_ops.callback; \ if (__routine) { \ __ret = __routine(dev); \ - } else { \ - __routine = dev_gpd_data(dev)->ops.callback; \ - if (__routine) \ - __ret = __routine(dev); \ } \ __ret; \ }) @@ -1871,10 +1867,6 @@ static int pm_genpd_default_save_state(struct device *dev) { int (*cb)(struct device *__dev); - cb = dev_gpd_data(dev)->ops.save_state; - if (cb) - return cb(dev); - if (dev->type && dev->type->pm) cb = dev->type->pm->runtime_suspend; else if (dev->class && dev->class->pm) @@ -1898,10 +1890,6 @@ static int pm_genpd_default_restore_state(struct device *dev) { int (*cb)(struct device *__dev); - cb = dev_gpd_data(dev)->ops.restore_state; - if (cb) - return cb(dev); - if (dev->type && dev->type->pm) cb = dev->type->pm->runtime_resume; else if (dev->class && dev->class->pm) @@ -1917,109 +1905,6 @@ static int pm_genpd_default_restore_state(struct device *dev) return cb ? cb(dev) : 0; } -#ifdef CONFIG_PM_SLEEP - -/** - * pm_genpd_default_suspend - Default "device suspend" for PM domians. - * @dev: Device to handle. - */ -static int pm_genpd_default_suspend(struct device *dev) -{ - int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.suspend; - - return cb ? cb(dev) : pm_generic_suspend(dev); -} - -/** - * pm_genpd_default_suspend_late - Default "late device suspend" for PM domians. - * @dev: Device to handle. - */ -static int pm_genpd_default_suspend_late(struct device *dev) -{ - int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.suspend_late; - - return cb ? cb(dev) : pm_generic_suspend_late(dev); -} - -/** - * pm_genpd_default_resume_early - Default "early device resume" for PM domians. - * @dev: Device to handle. - */ -static int pm_genpd_default_resume_early(struct device *dev) -{ - int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.resume_early; - - return cb ? cb(dev) : pm_generic_resume_early(dev); -} - -/** - * pm_genpd_default_resume - Default "device resume" for PM domians. - * @dev: Device to handle. - */ -static int pm_genpd_default_resume(struct device *dev) -{ - int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.resume; - - return cb ? cb(dev) : pm_generic_resume(dev); -} - -/** - * pm_genpd_default_freeze - Default "device freeze" for PM domians. - * @dev: Device to handle. - */ -static int pm_genpd_default_freeze(struct device *dev) -{ - int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.freeze; - - return cb ? cb(dev) : pm_generic_freeze(dev); -} - -/** - * pm_genpd_default_freeze_late - Default "late device freeze" for PM domians. - * @dev: Device to handle. - */ -static int pm_genpd_default_freeze_late(struct device *dev) -{ - int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.freeze_late; - - return cb ? cb(dev) : pm_generic_freeze_late(dev); -} - -/** - * pm_genpd_default_thaw_early - Default "early device thaw" for PM domians. - * @dev: Device to handle. - */ -static int pm_genpd_default_thaw_early(struct device *dev) -{ - int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.thaw_early; - - return cb ? cb(dev) : pm_generic_thaw_early(dev); -} - -/** - * pm_genpd_default_thaw - Default "device thaw" for PM domians. - * @dev: Device to handle. - */ -static int pm_genpd_default_thaw(struct device *dev) -{ - int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.thaw; - - return cb ? cb(dev) : pm_generic_thaw(dev); -} - -#else /* !CONFIG_PM_SLEEP */ - -#define pm_genpd_default_suspend NULL -#define pm_genpd_default_suspend_late NULL -#define pm_genpd_default_resume_early NULL -#define pm_genpd_default_resume NULL -#define pm_genpd_default_freeze NULL -#define pm_genpd_default_freeze_late NULL -#define pm_genpd_default_thaw_early NULL -#define pm_genpd_default_thaw NULL - -#endif /* !CONFIG_PM_SLEEP */ - /** * pm_genpd_init - Initialize a generic I/O PM domain object. * @genpd: PM domain object to initialize. @@ -2071,14 +1956,14 @@ void pm_genpd_init(struct generic_pm_domain *genpd, genpd->domain.ops.complete = pm_genpd_complete; genpd->dev_ops.save_state = pm_genpd_default_save_state; genpd->dev_ops.restore_state = pm_genpd_default_restore_state; - genpd->dev_ops.suspend = pm_genpd_default_suspend; - genpd->dev_ops.suspend_late = pm_genpd_default_suspend_late; - genpd->dev_ops.resume_early = pm_genpd_default_resume_early; - genpd->dev_ops.resume = pm_genpd_default_resume; - genpd->dev_ops.freeze = pm_genpd_default_freeze; - genpd->dev_ops.freeze_late = pm_genpd_default_freeze_late; - genpd->dev_ops.thaw_early = pm_genpd_default_thaw_early; - genpd->dev_ops.thaw = pm_genpd_default_thaw; + genpd->dev_ops.suspend = pm_generic_suspend; + genpd->dev_ops.suspend_late = pm_generic_suspend_late; + genpd->dev_ops.resume_early = pm_generic_resume_early; + genpd->dev_ops.resume = pm_generic_resume; + genpd->dev_ops.freeze = pm_generic_freeze; + genpd->dev_ops.freeze_late = pm_generic_freeze_late; + genpd->dev_ops.thaw_early = pm_generic_thaw_early; + genpd->dev_ops.thaw = pm_generic_thaw; mutex_lock(&gpd_list_lock); list_add(&genpd->gpd_list_node, &gpd_list); mutex_unlock(&gpd_list_lock); diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 9ae2b9e88d61..436ea149d40c 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -108,7 +108,6 @@ struct gpd_timing_data { struct generic_pm_domain_data { struct pm_domain_data base; - struct gpd_dev_ops ops; struct gpd_timing_data td; struct notifier_block nb; struct mutex lock; -- cgit v1.2.3 From 1e0407ca54d28db8e5f02e437ff21cc6416c0be8 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 3 Sep 2014 12:52:19 +0200 Subject: PM / domains: Remove system PM callbacks from gpd_dev_ops There no users of these callbacks, let's simplify the generic power domain by removing them. Signed-off-by: Ulf Hansson Reviewed-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 64 ++++++--------------------------------------- include/linux/pm_domain.h | 8 ------ 2 files changed, 8 insertions(+), 64 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index aa5b14c1e643..e777609ccc77 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -770,46 +770,6 @@ static bool genpd_dev_active_wakeup(struct generic_pm_domain *genpd, return GENPD_DEV_CALLBACK(genpd, bool, active_wakeup, dev); } -static int genpd_suspend_dev(struct generic_pm_domain *genpd, struct device *dev) -{ - return GENPD_DEV_CALLBACK(genpd, int, suspend, dev); -} - -static int genpd_suspend_late(struct generic_pm_domain *genpd, struct device *dev) -{ - return GENPD_DEV_CALLBACK(genpd, int, suspend_late, dev); -} - -static int genpd_resume_early(struct generic_pm_domain *genpd, struct device *dev) -{ - return GENPD_DEV_CALLBACK(genpd, int, resume_early, dev); -} - -static int genpd_resume_dev(struct generic_pm_domain *genpd, struct device *dev) -{ - return GENPD_DEV_CALLBACK(genpd, int, resume, dev); -} - -static int genpd_freeze_dev(struct generic_pm_domain *genpd, struct device *dev) -{ - return GENPD_DEV_CALLBACK(genpd, int, freeze, dev); -} - -static int genpd_freeze_late(struct generic_pm_domain *genpd, struct device *dev) -{ - return GENPD_DEV_CALLBACK(genpd, int, freeze_late, dev); -} - -static int genpd_thaw_early(struct generic_pm_domain *genpd, struct device *dev) -{ - return GENPD_DEV_CALLBACK(genpd, int, thaw_early, dev); -} - -static int genpd_thaw_dev(struct generic_pm_domain *genpd, struct device *dev) -{ - return GENPD_DEV_CALLBACK(genpd, int, thaw, dev); -} - /** * pm_genpd_sync_poweroff - Synchronously power off a PM domain and its masters. * @genpd: PM domain to power off, if possible. @@ -991,7 +951,7 @@ static int pm_genpd_suspend(struct device *dev) if (IS_ERR(genpd)) return -EINVAL; - return genpd->suspend_power_off ? 0 : genpd_suspend_dev(genpd, dev); + return genpd->suspend_power_off ? 0 : pm_generic_suspend(dev); } /** @@ -1012,7 +972,7 @@ static int pm_genpd_suspend_late(struct device *dev) if (IS_ERR(genpd)) return -EINVAL; - return genpd->suspend_power_off ? 0 : genpd_suspend_late(genpd, dev); + return genpd->suspend_power_off ? 0 : pm_generic_suspend_late(dev); } /** @@ -1099,7 +1059,7 @@ static int pm_genpd_resume_early(struct device *dev) if (IS_ERR(genpd)) return -EINVAL; - return genpd->suspend_power_off ? 0 : genpd_resume_early(genpd, dev); + return genpd->suspend_power_off ? 0 : pm_generic_resume_early(dev); } /** @@ -1120,7 +1080,7 @@ static int pm_genpd_resume(struct device *dev) if (IS_ERR(genpd)) return -EINVAL; - return genpd->suspend_power_off ? 0 : genpd_resume_dev(genpd, dev); + return genpd->suspend_power_off ? 0 : pm_generic_resume(dev); } /** @@ -1141,7 +1101,7 @@ static int pm_genpd_freeze(struct device *dev) if (IS_ERR(genpd)) return -EINVAL; - return genpd->suspend_power_off ? 0 : genpd_freeze_dev(genpd, dev); + return genpd->suspend_power_off ? 0 : pm_generic_freeze(dev); } /** @@ -1163,7 +1123,7 @@ static int pm_genpd_freeze_late(struct device *dev) if (IS_ERR(genpd)) return -EINVAL; - return genpd->suspend_power_off ? 0 : genpd_freeze_late(genpd, dev); + return genpd->suspend_power_off ? 0 : pm_generic_freeze_late(dev); } /** @@ -1227,7 +1187,7 @@ static int pm_genpd_thaw_early(struct device *dev) if (IS_ERR(genpd)) return -EINVAL; - return genpd->suspend_power_off ? 0 : genpd_thaw_early(genpd, dev); + return genpd->suspend_power_off ? 0 : pm_generic_thaw_early(dev); } /** @@ -1248,7 +1208,7 @@ static int pm_genpd_thaw(struct device *dev) if (IS_ERR(genpd)) return -EINVAL; - return genpd->suspend_power_off ? 0 : genpd_thaw_dev(genpd, dev); + return genpd->suspend_power_off ? 0 : pm_generic_thaw(dev); } /** @@ -1956,14 +1916,6 @@ void pm_genpd_init(struct generic_pm_domain *genpd, genpd->domain.ops.complete = pm_genpd_complete; genpd->dev_ops.save_state = pm_genpd_default_save_state; genpd->dev_ops.restore_state = pm_genpd_default_restore_state; - genpd->dev_ops.suspend = pm_generic_suspend; - genpd->dev_ops.suspend_late = pm_generic_suspend_late; - genpd->dev_ops.resume_early = pm_generic_resume_early; - genpd->dev_ops.resume = pm_generic_resume; - genpd->dev_ops.freeze = pm_generic_freeze; - genpd->dev_ops.freeze_late = pm_generic_freeze_late; - genpd->dev_ops.thaw_early = pm_generic_thaw_early; - genpd->dev_ops.thaw = pm_generic_thaw; mutex_lock(&gpd_list_lock); list_add(&genpd->gpd_list_node, &gpd_list); mutex_unlock(&gpd_list_lock); diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 436ea149d40c..b6343d4b9b04 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -35,14 +35,6 @@ struct gpd_dev_ops { int (*stop)(struct device *dev); int (*save_state)(struct device *dev); int (*restore_state)(struct device *dev); - int (*suspend)(struct device *dev); - int (*suspend_late)(struct device *dev); - int (*resume_early)(struct device *dev); - int (*resume)(struct device *dev); - int (*freeze)(struct device *dev); - int (*freeze_late)(struct device *dev); - int (*thaw_early)(struct device *dev); - int (*thaw)(struct device *dev); bool (*active_wakeup)(struct device *dev); }; -- cgit v1.2.3 From c5d79ec2a5715489cff16a0d1cf4fa9108a5509e Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 3 Sep 2014 12:52:22 +0200 Subject: PM / domains: Remove dev_irq_safe from genpd config The genpd dev_irq_safe configuration somewhat overlaps with the runtime PM pm_runtime_irq_safe() option. Also, currently genpd don't have a good way to deal with these device. So, until we figured out if and how to support this in genpd, let's remove the option to configure it. Signed-off-by: Ulf Hansson Reviewed-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 4 ---- include/linux/pm_domain.h | 1 - 2 files changed, 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index e777609ccc77..4298a2bcd228 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -615,8 +615,6 @@ static int pm_genpd_runtime_suspend(struct device *dev) if (IS_ERR(genpd)) return -EINVAL; - might_sleep_if(!genpd->dev_irq_safe); - stop_ok = genpd->gov ? genpd->gov->stop_ok : NULL; if (stop_ok && !stop_ok(dev)) return -EBUSY; @@ -661,8 +659,6 @@ static int pm_genpd_runtime_resume(struct device *dev) if (IS_ERR(genpd)) return -EINVAL; - might_sleep_if(!genpd->dev_irq_safe); - /* If power.irq_safe, the PM domain is never powered off. */ if (dev->power.irq_safe) return genpd_start_dev_no_timing(genpd, dev); diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index b6343d4b9b04..360c94ceeebb 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -63,7 +63,6 @@ struct generic_pm_domain { unsigned int suspended_count; /* System suspend device counter */ unsigned int prepared_count; /* Suspend counter of prepared devices */ bool suspend_power_off; /* Power status before system suspend */ - bool dev_irq_safe; /* Device callbacks are IRQ-safe */ int (*power_off)(struct generic_pm_domain *domain); s64 power_off_latency_ns; int (*power_on)(struct generic_pm_domain *domain); -- cgit v1.2.3 From d47e6464ae6c96735d4706f5cb0537fe717b6b00 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 3 Sep 2014 12:52:24 +0200 Subject: PM / domains: Remove pm_genpd_syscore_switch() API The pm_genpd_syscore_poweroff() API and pm_genpd_syscore_poweron() API makes the pm_genpd_syscore_switch() API redundant. Moreover, since there are no active users, let's just remove it. Signed-off-by: Ulf Hansson Reviewed-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 17 ++++++++++++++--- include/linux/pm_domain.h | 16 ++++------------ 2 files changed, 18 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index b910d0f6ff60..601e35b2fa71 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -1292,13 +1292,13 @@ static void pm_genpd_complete(struct device *dev) } /** - * pm_genpd_syscore_switch - Switch power during system core suspend or resume. + * genpd_syscore_switch - Switch power during system core suspend or resume. * @dev: Device that normally is marked as "always on" to switch power for. * * This routine may only be called during the system core (syscore) suspend or * resume phase for devices whose "always on" flags are set. */ -void pm_genpd_syscore_switch(struct device *dev, bool suspend) +static void genpd_syscore_switch(struct device *dev, bool suspend) { struct generic_pm_domain *genpd; @@ -1314,7 +1314,18 @@ void pm_genpd_syscore_switch(struct device *dev, bool suspend) genpd->suspended_count--; } } -EXPORT_SYMBOL_GPL(pm_genpd_syscore_switch); + +void pm_genpd_syscore_poweroff(struct device *dev) +{ + genpd_syscore_switch(dev, true); +} +EXPORT_SYMBOL_GPL(pm_genpd_syscore_poweroff); + +void pm_genpd_syscore_poweron(struct device *dev) +{ + genpd_syscore_switch(dev, false); +} +EXPORT_SYMBOL_GPL(pm_genpd_syscore_poweron); #else diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 360c94ceeebb..3997af691600 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -266,19 +266,11 @@ static inline void pm_genpd_poweroff_unused(void) {} #endif #ifdef CONFIG_PM_GENERIC_DOMAINS_SLEEP -extern void pm_genpd_syscore_switch(struct device *dev, bool suspend); +extern void pm_genpd_syscore_poweroff(struct device *dev); +extern void pm_genpd_syscore_poweron(struct device *dev); #else -static inline void pm_genpd_syscore_switch(struct device *dev, bool suspend) {} +static inline void pm_genpd_syscore_poweroff(struct device *dev) {} +static inline void pm_genpd_syscore_poweron(struct device *dev) {} #endif -static inline void pm_genpd_syscore_poweroff(struct device *dev) -{ - pm_genpd_syscore_switch(dev, true); -} - -static inline void pm_genpd_syscore_poweron(struct device *dev) -{ - pm_genpd_syscore_switch(dev, false); -} - #endif /* _LINUX_PM_DOMAIN_H */ -- cgit v1.2.3 From d971f0b0eaaf3f2086bf21bbd64f7ea7e2f28459 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 3 Sep 2014 12:52:25 +0200 Subject: PM / domains: Remove genpd_queue_power_off_work() API There are no active users of this API. Let's remove it and if future needs shows up we could consider to have a get/put API instead. Signed-off-by: Ulf Hansson Reviewed-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 5 ++++- include/linux/pm_domain.h | 2 -- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 601e35b2fa71..d71d0458d41f 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -422,7 +422,7 @@ static bool genpd_abort_poweroff(struct generic_pm_domain *genpd) * Queue up the execution of pm_genpd_poweroff() unless it's already been done * before. */ -void genpd_queue_power_off_work(struct generic_pm_domain *genpd) +static void genpd_queue_power_off_work(struct generic_pm_domain *genpd) { queue_work(pm_wq, &genpd->power_off_work); } @@ -729,6 +729,9 @@ static inline int genpd_dev_pm_qos_notifier(struct notifier_block *nb, return NOTIFY_DONE; } +static inline void +genpd_queue_power_off_work(struct generic_pm_domain *genpd) {} + static inline void genpd_power_off_work_fn(struct work_struct *work) {} #define pm_genpd_runtime_suspend NULL diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 3997af691600..c19c90b839b8 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -258,10 +258,8 @@ static inline int pm_genpd_name_add_device(const char *domain_name, } #ifdef CONFIG_PM_GENERIC_DOMAINS_RUNTIME -extern void genpd_queue_power_off_work(struct generic_pm_domain *genpd); extern void pm_genpd_poweroff_unused(void); #else -static inline void genpd_queue_power_off_work(struct generic_pm_domain *gpd) {} static inline void pm_genpd_poweroff_unused(void) {} #endif -- cgit v1.2.3 From 0f574d4c3a7a325cbbef28ee738dedca9851e957 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 3 Sep 2014 12:52:30 +0200 Subject: PM / domains: Remove default_stop_ok() API There are currently no need to export default_stop_ok() as an API, instead let's keep it local to the PM domain governor. Signed-off-by: Ulf Hansson Reviewed-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain_governor.c | 7 ++----- include/linux/pm_domain.h | 6 ------ 2 files changed, 2 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/power/domain_governor.c b/drivers/base/power/domain_governor.c index a089e3bcdfbc..d88a62e104d4 100644 --- a/drivers/base/power/domain_governor.c +++ b/drivers/base/power/domain_governor.c @@ -42,7 +42,7 @@ static int dev_update_qos_constraint(struct device *dev, void *data) * default_stop_ok - Default PM domain governor routine for stopping devices. * @dev: Device to check. */ -bool default_stop_ok(struct device *dev) +static bool default_stop_ok(struct device *dev) { struct gpd_timing_data *td = &dev_gpd_data(dev)->td; unsigned long flags; @@ -229,10 +229,7 @@ static bool always_on_power_down_ok(struct dev_pm_domain *domain) #else /* !CONFIG_PM_RUNTIME */ -bool default_stop_ok(struct device *dev) -{ - return false; -} +static inline bool default_stop_ok(struct device *dev) { return false; } #define default_power_down_ok NULL #define always_on_power_down_ok NULL diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index c19c90b839b8..3b59f296e6ec 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -151,8 +151,6 @@ extern void pm_genpd_init(struct generic_pm_domain *genpd, extern int pm_genpd_poweron(struct generic_pm_domain *genpd); extern int pm_genpd_name_poweron(const char *domain_name); -extern bool default_stop_ok(struct device *dev); - extern struct dev_power_governor pm_domain_always_on_gov; #else @@ -231,10 +229,6 @@ static inline int pm_genpd_name_poweron(const char *domain_name) { return -ENOSYS; } -static inline bool default_stop_ok(struct device *dev) -{ - return false; -} #define simple_qos_governor NULL #define pm_domain_always_on_gov NULL #endif -- cgit v1.2.3 From ae3c511c2d72161b11e93866203b59a3a37dfac7 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 3 Sep 2014 12:52:31 +0200 Subject: PM / domains: Keep declaration of dev_power_governors together This is a pure code cleanup in the header file for the PM domain. No functional change. Signed-off-by: Ulf Hansson Reviewed-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki --- include/linux/pm_domain.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 3b59f296e6ec..aa03586c94a2 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -117,8 +117,6 @@ static inline struct generic_pm_domain_data *dev_gpd_data(struct device *dev) return to_gpd_data(dev->power.subsys_data->domain_data); } -extern struct dev_power_governor simple_qos_governor; - extern struct generic_pm_domain *dev_to_genpd(struct device *dev); extern int __pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev, @@ -151,6 +149,7 @@ extern void pm_genpd_init(struct generic_pm_domain *genpd, extern int pm_genpd_poweron(struct generic_pm_domain *genpd); extern int pm_genpd_name_poweron(const char *domain_name); +extern struct dev_power_governor simple_qos_governor; extern struct dev_power_governor pm_domain_always_on_gov; #else -- cgit v1.2.3 From 8a949b07e4062cbd07e04e6a47249e69ca65b944 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Tue, 2 Sep 2014 17:39:19 -0400 Subject: serial: core: Document lock requirement for UPF_* flags updates The flags field of struct uart_port can only be safely modified if the port mutex is held; no other lock prevents concurrent changes from corrupting the field. Signed-off-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index cf3a1e789bf5..8cb267b1fcd5 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -152,6 +152,7 @@ struct uart_port { unsigned long sysrq; /* sysrq timeout */ #endif + /* flags must be updated while holding port mutex */ upf_t flags; #define UPF_FOURPORT ((__force upf_t) (1 << 1)) -- cgit v1.2.3 From b99b121b2aa42e60e5b73fdd3a49863337839c7b Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 5 Sep 2014 21:02:37 +0200 Subject: tty: serial: 8250_core: allow to overwrite & export serial8250_startup() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The OMAP version of the 8250 can actually use 1:1 serial8250_startup(). However it needs to be extended by a wake up irq which should to be requested & enabled at ->startup() time and disabled at ->shutdown() time. v2…v3: properly copy callbacks v1…v2: add shutdown callback Acked-by: Alan Cox Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_core.c | 25 +++++++++++++++++++++++-- include/linux/serial_8250.h | 2 ++ include/linux/serial_core.h | 2 ++ 3 files changed, 27 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c index 90e0d5e2b7ee..872c020607a8 100644 --- a/drivers/tty/serial/8250/8250_core.c +++ b/drivers/tty/serial/8250/8250_core.c @@ -1930,7 +1930,7 @@ static void serial8250_put_poll_char(struct uart_port *port, #endif /* CONFIG_CONSOLE_POLL */ -static int serial8250_startup(struct uart_port *port) +int serial8250_do_startup(struct uart_port *port) { struct uart_8250_port *up = up_to_u8250p(port); unsigned long flags; @@ -2181,8 +2181,16 @@ dont_test_tx_en: return 0; } +EXPORT_SYMBOL_GPL(serial8250_do_startup); -static void serial8250_shutdown(struct uart_port *port) +static int serial8250_startup(struct uart_port *port) +{ + if (port->startup) + return port->startup(port); + return serial8250_do_startup(port); +} + +void serial8250_do_shutdown(struct uart_port *port) { struct uart_8250_port *up = up_to_u8250p(port); unsigned long flags; @@ -2232,6 +2240,15 @@ static void serial8250_shutdown(struct uart_port *port) if (port->irq) serial_unlink_irq_chain(up); } +EXPORT_SYMBOL_GPL(serial8250_do_shutdown); + +static void serial8250_shutdown(struct uart_port *port) +{ + if (port->shutdown) + port->shutdown(port); + else + serial8250_do_shutdown(port); +} static unsigned int serial8250_get_divisor(struct uart_port *port, unsigned int baud) { @@ -3466,6 +3483,10 @@ int serial8250_register_8250_port(struct uart_8250_port *up) /* Possibly override set_termios call */ if (up->port.set_termios) uart->port.set_termios = up->port.set_termios; + if (up->port.startup) + uart->port.startup = up->port.startup; + if (up->port.shutdown) + uart->port.shutdown = up->port.shutdown; if (up->port.pm) uart->port.pm = up->port.pm; if (up->port.handle_break) diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index 6dd671765312..6fc9d7bee05e 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -124,6 +124,8 @@ extern void serial8250_early_out(struct uart_port *port, int offset, int value); extern int setup_early_serial8250_console(char *cmdline); extern void serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios, struct ktermios *old); +extern int serial8250_do_startup(struct uart_port *port); +extern void serial8250_do_shutdown(struct uart_port *port); extern void serial8250_do_pm(struct uart_port *port, unsigned int state, unsigned int oldstate); extern int fsl8250_handle_irq(struct uart_port *port); diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 8cb267b1fcd5..3bd7d55eebce 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -122,6 +122,8 @@ struct uart_port { void (*set_termios)(struct uart_port *, struct ktermios *new, struct ktermios *old); + int (*startup)(struct uart_port *port); + void (*shutdown)(struct uart_port *port); int (*handle_irq)(struct uart_port *); void (*pm)(struct uart_port *, unsigned int state, unsigned int old); -- cgit v1.2.3 From 413fffc3a1db7f270afdf1ecb35c1edc013acc68 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 28 Aug 2014 11:22:23 +0530 Subject: cpufreq: Add support for per-policy driver data Drivers supporting multiple clusters or multiple 'struct cpufreq_policy' instances may need to keep per-policy data. If the core doesn't provide support for that, they might do it in the most unoptimized way: 'per-cpu' data. This patch adds another field in struct cpufreq_policy: 'driver_data'. It isn't accessed by core and is for driver's internal use only. Tested-by: Stephen Boyd Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 7d1955afa62c..138336b6bb04 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -112,6 +112,9 @@ struct cpufreq_policy { spinlock_t transition_lock; wait_queue_head_t transition_wait; struct task_struct *transition_task; /* Task which is doing the transition */ + + /* For cpufreq driver's internal use */ + void *driver_data; }; /* Only for ACPI */ -- cgit v1.2.3 From 8ca28610e5e37193cd61fefa4310941e28de10ca Mon Sep 17 00:00:00 2001 From: Hans-Christian Egtvedt Date: Thu, 7 Aug 2014 15:14:06 +0200 Subject: mmc: include linux/types.h for bool definition in atmel-mci.h This patch adds an include of linux/types.h to make sure bool is defined before utilized in this header file. Signed-off-by: Hans-Christian Egtvedt Acked-by: Ludovic Desroches Signed-off-by: Ulf Hansson --- include/linux/atmel-mci.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/atmel-mci.h b/include/linux/atmel-mci.h index 4c7a4b2104bf..91b77f8d495d 100644 --- a/include/linux/atmel-mci.h +++ b/include/linux/atmel-mci.h @@ -1,6 +1,8 @@ #ifndef __LINUX_ATMEL_MCI_H #define __LINUX_ATMEL_MCI_H +#include + #define ATMCI_MAX_NR_SLOTS 2 /** -- cgit v1.2.3 From b3683994843a0ede0e19daccd1ac32a46b21eb39 Mon Sep 17 00:00:00 2001 From: Yi Sun Date: Wed, 13 Aug 2014 13:34:01 +0800 Subject: mmc: Correct the value of MMC_NUM_PHY_PARTITION eMMC card can support up to 7 physical partitions, including 2 boot, 1 RPMB and 4 GPs. Change MMC_NUM_PHY_PARTITION from 6 to 7, which is the correct value. Signed-off-by: Yi Sun Signed-off-by: Ulf Hansson --- include/linux/mmc/card.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index d424b9de3aff..bde5147a4221 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -214,11 +214,12 @@ enum mmc_blk_status { }; /* The number of MMC physical partitions. These consist of: - * boot partitions (2), general purpose partitions (4) in MMC v4.4. + * boot partitions (2), general purpose partitions (4) and + * RPMB partition (1) in MMC v4.4. */ #define MMC_NUM_BOOT_PARTITION 2 #define MMC_NUM_GP_PARTITION 4 -#define MMC_NUM_PHY_PARTITION 6 +#define MMC_NUM_PHY_PARTITION 7 #define MAX_MMC_PART_NAME_LEN 20 /* -- cgit v1.2.3 From 3d705d14fe4c72be83bae1610680e209ee226b9d Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Tue, 19 Aug 2014 10:45:51 +0200 Subject: mmc: implement Driver Stage Register handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some eMMC and SD cards implement a DSR register that allows to tune raise/fall times and drive strength of the CMD and DATA outputs. The values to use depend on the card in use and the host. It might be needed to reduce the drive strength to prevent voltage peaks above the host's specification. Implement a 'dsr' devicetree property that allows to specify the value to set the DSR to. For non-dt setups the new members of mmc_host can be set by board code. This patch was initially authored by Sascha Hauer. It contains improvements authored by Markus Niebel and Uwe Kleine-König. Signed-off-by: Sascha Hauer Signed-off-by: Markus Niebel Signed-off-by: Uwe Kleine-König Signed-off-by: Ulf Hansson --- Documentation/devicetree/bindings/mmc/mmc.txt | 2 ++ drivers/mmc/core/host.c | 8 ++++++++ drivers/mmc/core/mmc.c | 8 ++++++++ drivers/mmc/core/mmc_ops.c | 20 ++++++++++++++++++++ drivers/mmc/core/mmc_ops.h | 1 + drivers/mmc/core/sd.c | 8 ++++++++ include/linux/mmc/card.h | 3 ++- include/linux/mmc/host.h | 3 +++ 8 files changed, 52 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/Documentation/devicetree/bindings/mmc/mmc.txt b/Documentation/devicetree/bindings/mmc/mmc.txt index 431716e37a39..b52628b18a53 100644 --- a/Documentation/devicetree/bindings/mmc/mmc.txt +++ b/Documentation/devicetree/bindings/mmc/mmc.txt @@ -40,6 +40,8 @@ Optional properties: - mmc-hs200-1_2v: eMMC HS200 mode(1.2V I/O) is supported - mmc-hs400-1_8v: eMMC HS400 mode(1.8V I/O) is supported - mmc-hs400-1_2v: eMMC HS400 mode(1.2V I/O) is supported +- dsr: Value the card's (optional) Driver Stage Register (DSR) should be + programmed with. Valid range: [0 .. 0xffff]. *NOTE* on CD and WP polarity. To use common for all SD/MMC host controllers line polarity properties, we have to fix the meaning of the "normal" and "inverted" diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c index 95cceae96944..d572b2beb65a 100644 --- a/drivers/mmc/core/host.c +++ b/drivers/mmc/core/host.c @@ -452,6 +452,14 @@ int mmc_of_parse(struct mmc_host *host) if (of_find_property(np, "mmc-hs400-1_2v", &len)) host->caps2 |= MMC_CAP2_HS400_1_2V | MMC_CAP2_HS200_1_2V_SDR; + host->dsr_req = !of_property_read_u32(np, "dsr", &host->dsr); + if (host->dsr_req && (host->dsr & ~0xffff)) { + dev_err(host->parent, + "device tree specified broken value for DSR: 0x%x, ignoring\n", + host->dsr); + host->dsr_req = 0; + } + return 0; out: diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index 1eda8dd8c867..31c43165f8bc 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -162,6 +162,7 @@ static int mmc_decode_csd(struct mmc_card *card) csd->read_partial = UNSTUFF_BITS(resp, 79, 1); csd->write_misalign = UNSTUFF_BITS(resp, 78, 1); csd->read_misalign = UNSTUFF_BITS(resp, 77, 1); + csd->dsr_imp = UNSTUFF_BITS(resp, 76, 1); csd->r2w_factor = UNSTUFF_BITS(resp, 26, 3); csd->write_blkbits = UNSTUFF_BITS(resp, 22, 4); csd->write_partial = UNSTUFF_BITS(resp, 21, 1); @@ -1271,6 +1272,13 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr, goto free_card; } + /* + * handling only for cards supporting DSR and hosts requesting + * DSR configuration + */ + if (card->csd.dsr_imp && host->dsr_req) + mmc_set_dsr(host); + /* * Select card, as all following commands rely on that. */ diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c index f51b5ba3bbea..ba0275e90617 100644 --- a/drivers/mmc/core/mmc_ops.c +++ b/drivers/mmc/core/mmc_ops.c @@ -93,6 +93,26 @@ int mmc_deselect_cards(struct mmc_host *host) return _mmc_select_card(host, NULL); } +/* + * Write the value specified in the device tree or board code into the optional + * 16 bit Driver Stage Register. This can be used to tune raise/fall times and + * drive strength of the DAT and CMD outputs. The actual meaning of a given + * value is hardware dependant. + * The presence of the DSR register can be determined from the CSD register, + * bit 76. + */ +int mmc_set_dsr(struct mmc_host *host) +{ + struct mmc_command cmd = {0}; + + cmd.opcode = MMC_SET_DSR; + + cmd.arg = (host->dsr << 16) | 0xffff; + cmd.flags = MMC_RSP_NONE | MMC_CMD_AC; + + return mmc_wait_for_cmd(host, &cmd, MMC_CMD_RETRIES); +} + int mmc_go_idle(struct mmc_host *host) { int err; diff --git a/drivers/mmc/core/mmc_ops.h b/drivers/mmc/core/mmc_ops.h index 80ae9f4e0293..390dac665b2a 100644 --- a/drivers/mmc/core/mmc_ops.h +++ b/drivers/mmc/core/mmc_ops.h @@ -14,6 +14,7 @@ int mmc_select_card(struct mmc_card *card); int mmc_deselect_cards(struct mmc_host *host); +int mmc_set_dsr(struct mmc_host *host); int mmc_go_idle(struct mmc_host *host); int mmc_send_op_cond(struct mmc_host *host, u32 ocr, u32 *rocr); int mmc_all_send_cid(struct mmc_host *host, u32 *cid); diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c index 0c44510bf717..25913889cbaa 100644 --- a/drivers/mmc/core/sd.c +++ b/drivers/mmc/core/sd.c @@ -127,6 +127,7 @@ static int mmc_decode_csd(struct mmc_card *card) csd->read_partial = UNSTUFF_BITS(resp, 79, 1); csd->write_misalign = UNSTUFF_BITS(resp, 78, 1); csd->read_misalign = UNSTUFF_BITS(resp, 77, 1); + csd->dsr_imp = UNSTUFF_BITS(resp, 76, 1); csd->r2w_factor = UNSTUFF_BITS(resp, 26, 3); csd->write_blkbits = UNSTUFF_BITS(resp, 22, 4); csd->write_partial = UNSTUFF_BITS(resp, 21, 1); @@ -953,6 +954,13 @@ static int mmc_sd_init_card(struct mmc_host *host, u32 ocr, mmc_decode_cid(card); } + /* + * handling only for cards supporting DSR and hosts requesting + * DSR configuration + */ + if (card->csd.dsr_imp && host->dsr_req) + mmc_set_dsr(host); + /* * Select card, as all following commands rely on that. */ diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index bde5147a4221..0ea795f5feb9 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -42,7 +42,8 @@ struct mmc_csd { unsigned int read_partial:1, read_misalign:1, write_partial:1, - write_misalign:1; + write_misalign:1, + dsr_imp:1; }; struct mmc_ext_csd { diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 7960424d0bc0..4cbf61476999 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -365,6 +365,9 @@ struct mmc_host { unsigned int slotno; /* used for sdio acpi binding */ + int dsr_req; /* DSR value is valid */ + u32 dsr; /* optional driver stage (DSR) value */ + unsigned long private[0] ____cacheline_aligned; }; -- cgit v1.2.3 From 384b2cbd56a02efb16358ed7c0c039e4afca5ed0 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Sun, 24 Aug 2014 19:58:48 -0700 Subject: mmc: tmio: care about DMA tx/rx addr offset Basically, SD_BUF0 Tx/Rx addresses are same in normal TMIO controller, but, it is different on Renesas R-Car SDHI controller if it uses DMAC (Rx address needs to add 0x2000 to Tx address) This patch adds new .dma_rx_offset and cares it Tested-by: Nguyen Xuan Nui Tested-by: Hiep Cao Minh Acked-by: Laurent Pinchart Acked-by: Ben Dooks Signed-off-by: Kuninori Morimoto Signed-off-by: Ulf Hansson --- drivers/mmc/host/sh_mobile_sdhi.c | 3 +++ drivers/mmc/host/tmio_mmc_dma.c | 2 +- include/linux/mfd/tmio.h | 1 + 3 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/mmc/host/sh_mobile_sdhi.c b/drivers/mmc/host/sh_mobile_sdhi.c index 2dafe28c8df9..f0818cd0242c 100644 --- a/drivers/mmc/host/sh_mobile_sdhi.c +++ b/drivers/mmc/host/sh_mobile_sdhi.c @@ -39,6 +39,7 @@ struct sh_mobile_sdhi_of_data { unsigned long tmio_flags; unsigned long capabilities; unsigned long capabilities2; + dma_addr_t dma_rx_offset; }; static const struct sh_mobile_sdhi_of_data sh_mobile_sdhi_of_cfg[] = { @@ -56,6 +57,7 @@ static const struct sh_mobile_sdhi_of_data of_rcar_gen2_compatible = { .tmio_flags = TMIO_MMC_HAS_IDLE_WAIT | TMIO_MMC_WRPROTECT_DISABLE, .capabilities = MMC_CAP_SD_HIGHSPEED | MMC_CAP_SDIO_IRQ, .capabilities2 = MMC_CAP2_NO_MULTI_READ, + .dma_rx_offset = 0x2000, }; static const struct of_device_id sh_mobile_sdhi_of_match[] = { @@ -228,6 +230,7 @@ static int sh_mobile_sdhi_probe(struct platform_device *pdev) mmc_data->flags |= of_data->tmio_flags; mmc_data->capabilities |= of_data->capabilities; mmc_data->capabilities2 |= of_data->capabilities2; + dma_priv->dma_rx_offset = of_data->dma_rx_offset; } /* SD control register space size is 0x100, 0x200 for bus_shift=1 */ diff --git a/drivers/mmc/host/tmio_mmc_dma.c b/drivers/mmc/host/tmio_mmc_dma.c index eb8f1d5c34b1..bd646b041085 100644 --- a/drivers/mmc/host/tmio_mmc_dma.c +++ b/drivers/mmc/host/tmio_mmc_dma.c @@ -312,7 +312,7 @@ void tmio_mmc_request_dma(struct tmio_mmc_host *host, struct tmio_mmc_data *pdat if (pdata->dma->chan_priv_rx) cfg.slave_id = pdata->dma->slave_id_rx; cfg.direction = DMA_DEV_TO_MEM; - cfg.src_addr = cfg.dst_addr; + cfg.src_addr = cfg.dst_addr + pdata->dma->dma_rx_offset; cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES; cfg.dst_addr = 0; ret = dmaengine_slave_config(host->chan_rx, &cfg); diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h index 8f6f2e91e7ae..777e29b1ad0f 100644 --- a/include/linux/mfd/tmio.h +++ b/include/linux/mfd/tmio.h @@ -96,6 +96,7 @@ struct tmio_mmc_dma { int slave_id_tx; int slave_id_rx; int alignment_shift; + dma_addr_t dma_rx_offset; bool (*filter)(struct dma_chan *chan, void *arg); }; -- cgit v1.2.3 From b8d11962c2d83c984d5afd091e5b725ad2fd5607 Mon Sep 17 00:00:00 2001 From: Shinobu Uehara Date: Sun, 24 Aug 2014 20:00:25 -0700 Subject: mmc: tmio: control multiple block transfer mode Renesas SDHI has "Multiple Block Transfer Mode" settings on SD_CMD register which controls CMD12 automatically. This patch cares it, because CMD12 is not needed when CMD53 (= SD_IO_RW_EXTENDED) [Kuninori Morimoto: tidyuped for upstreaming enabled this flags for all SH-Mobile/R-Car] Tested-by: Nguyen Xuan Nui Tested-by: Hiep Cao Minh Signed-off-by: Shinobu Uehara Signed-off-by: Kuninori Morimoto Signed-off-by: Ulf Hansson --- drivers/mmc/host/sh_mobile_sdhi.c | 5 +++++ drivers/mmc/host/tmio_mmc_pio.c | 10 ++++++++++ include/linux/mfd/tmio.h | 6 ++++++ 3 files changed, 21 insertions(+) (limited to 'include/linux') diff --git a/drivers/mmc/host/sh_mobile_sdhi.c b/drivers/mmc/host/sh_mobile_sdhi.c index f0818cd0242c..4727586b063e 100644 --- a/drivers/mmc/host/sh_mobile_sdhi.c +++ b/drivers/mmc/host/sh_mobile_sdhi.c @@ -225,6 +225,11 @@ static int sh_mobile_sdhi_probe(struct platform_device *pdev) */ mmc_data->flags |= TMIO_MMC_SDIO_IRQ; + /* + * All SDHI have CMD12 controll bit + */ + mmc_data->flags |= TMIO_MMC_HAVE_CMD12_CTRL; + if (of_id && of_id->data) { const struct sh_mobile_sdhi_of_data *of_data = of_id->data; mmc_data->flags |= of_data->tmio_flags; diff --git a/drivers/mmc/host/tmio_mmc_pio.c b/drivers/mmc/host/tmio_mmc_pio.c index 3d0ad737abea..c2804827be9f 100644 --- a/drivers/mmc/host/tmio_mmc_pio.c +++ b/drivers/mmc/host/tmio_mmc_pio.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include #include @@ -310,6 +311,7 @@ static void tmio_mmc_done_work(struct work_struct *work) #define TRANSFER_READ 0x1000 #define TRANSFER_MULTI 0x2000 #define SECURITY_CMD 0x4000 +#define NO_CMD12_ISSUE 0x4000 /* TMIO_MMC_HAVE_CMD12_CTRL */ static int tmio_mmc_start_command(struct tmio_mmc_host *host, struct mmc_command *cmd) { @@ -346,6 +348,14 @@ static int tmio_mmc_start_command(struct tmio_mmc_host *host, struct mmc_command if (data->blocks > 1) { sd_ctrl_write16(host, CTL_STOP_INTERNAL_ACTION, 0x100); c |= TRANSFER_MULTI; + + /* + * Disable auto CMD12 at IO_RW_EXTENDED when + * multiple block transfer + */ + if ((host->pdata->flags & TMIO_MMC_HAVE_CMD12_CTRL) && + (cmd->opcode == SD_IO_RW_EXTENDED)) + c |= NO_CMD12_ISSUE; } if (data->flags & MMC_DATA_READ) c |= TRANSFER_READ; diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h index 777e29b1ad0f..7432d95b08e2 100644 --- a/include/linux/mfd/tmio.h +++ b/include/linux/mfd/tmio.h @@ -83,6 +83,12 @@ */ #define TMIO_MMC_HAVE_HIGH_REG (1 << 6) +/* + * Some controllers have CMD12 automatically + * issue/non-issue register + */ +#define TMIO_MMC_HAVE_CMD12_CTRL (1 << 7) + int tmio_core_mmc_enable(void __iomem *cnf, int shift, unsigned long base); int tmio_core_mmc_resume(void __iomem *cnf, int shift, unsigned long base); void tmio_core_mmc_pwr(void __iomem *cnf, int shift, int state); -- cgit v1.2.3 From 6b98757e53cb0e93b02db4067c14afcb32c90615 Mon Sep 17 00:00:00 2001 From: Shinobu Uehara Date: Sun, 24 Aug 2014 20:00:52 -0700 Subject: mmc: tmio: add TMIO_MMC_SDIO_STATUS_QUIRK Renesas R-Car SDHI should set reserved bits on CTL_SDIO_STATUS register when writing. This patch adds new TMIO_MMC_SDIO_STATUS_QUIRK flags for this purpose [Kuninori Morimoto: tidyuped for upstreaming enabled this flags for all SH-Mobile/R-Car] Tested-by: Nguyen Xuan Nui Tested-by: Hiep Cao Minh Signed-off-by: Shinobu Uehara Signed-off-by: Kuninori Morimoto Signed-off-by: Ulf Hansson --- drivers/mmc/host/sh_mobile_sdhi.c | 5 +++++ drivers/mmc/host/tmio_mmc_pio.c | 7 ++++++- include/linux/mfd/tmio.h | 5 +++++ 3 files changed, 16 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/mmc/host/sh_mobile_sdhi.c b/drivers/mmc/host/sh_mobile_sdhi.c index 4727586b063e..ebcfc659b799 100644 --- a/drivers/mmc/host/sh_mobile_sdhi.c +++ b/drivers/mmc/host/sh_mobile_sdhi.c @@ -230,6 +230,11 @@ static int sh_mobile_sdhi_probe(struct platform_device *pdev) */ mmc_data->flags |= TMIO_MMC_HAVE_CMD12_CTRL; + /* + * All SDHI need SDIO_INFO1 reserved bit + */ + mmc_data->flags |= TMIO_MMC_SDIO_STATUS_QUIRK; + if (of_id && of_id->data) { const struct sh_mobile_sdhi_of_data *of_data = of_id->data; mmc_data->flags |= of_data->tmio_flags; diff --git a/drivers/mmc/host/tmio_mmc_pio.c b/drivers/mmc/host/tmio_mmc_pio.c index c2804827be9f..c5ffa92ccb18 100644 --- a/drivers/mmc/host/tmio_mmc_pio.c +++ b/drivers/mmc/host/tmio_mmc_pio.c @@ -665,6 +665,7 @@ irqreturn_t tmio_mmc_sdio_irq(int irq, void *devid) struct mmc_host *mmc = host->mmc; struct tmio_mmc_data *pdata = host->pdata; unsigned int ireg, status; + unsigned int sdio_status; if (!(pdata->flags & TMIO_MMC_SDIO_IRQ)) return IRQ_HANDLED; @@ -672,7 +673,11 @@ irqreturn_t tmio_mmc_sdio_irq(int irq, void *devid) status = sd_ctrl_read16(host, CTL_SDIO_STATUS); ireg = status & TMIO_SDIO_MASK_ALL & ~host->sdcard_irq_mask; - sd_ctrl_write16(host, CTL_SDIO_STATUS, status & ~TMIO_SDIO_MASK_ALL); + sdio_status = status & ~TMIO_SDIO_MASK_ALL; + if (pdata->flags & TMIO_MMC_SDIO_STATUS_QUIRK) + sdio_status |= 6; + + sd_ctrl_write16(host, CTL_SDIO_STATUS, sdio_status); if (mmc->caps & MMC_CAP_SDIO_IRQ && ireg & TMIO_SDIO_STAT_IOIRQ) mmc_signal_sdio_irq(mmc); diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h index 7432d95b08e2..a7493ae01b58 100644 --- a/include/linux/mfd/tmio.h +++ b/include/linux/mfd/tmio.h @@ -89,6 +89,11 @@ */ #define TMIO_MMC_HAVE_CMD12_CTRL (1 << 7) +/* + * Some controllers needs to set 1 on SDIO status reserved bits + */ +#define TMIO_MMC_SDIO_STATUS_QUIRK (1 << 8) + int tmio_core_mmc_enable(void __iomem *cnf, int shift, unsigned long base); int tmio_core_mmc_resume(void __iomem *cnf, int shift, unsigned long base); void tmio_core_mmc_pwr(void __iomem *cnf, int shift, int state); -- cgit v1.2.3 From e85dd04ea8c8d32ba8eae278959d28df34338e9d Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Sun, 24 Aug 2014 20:01:54 -0700 Subject: mmc: tmio: remove Renesas specific #ifdef This patch adds new TMIO_MMC_HAVE_CTL_DMA_REG flag, and remove Renesas specific #ifdef from tmio driver Tested-by: Nguyen Xuan Nui Tested-by: Hiep Cao Minh Signed-off-by: Kuninori Morimoto Signed-off-by: Ulf Hansson --- drivers/mmc/host/sh_mobile_sdhi.c | 5 +++++ drivers/mmc/host/tmio_mmc_dma.c | 6 ++---- include/linux/mfd/tmio.h | 5 +++++ 3 files changed, 12 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/host/sh_mobile_sdhi.c b/drivers/mmc/host/sh_mobile_sdhi.c index ebcfc659b799..a077da02bb8e 100644 --- a/drivers/mmc/host/sh_mobile_sdhi.c +++ b/drivers/mmc/host/sh_mobile_sdhi.c @@ -235,6 +235,11 @@ static int sh_mobile_sdhi_probe(struct platform_device *pdev) */ mmc_data->flags |= TMIO_MMC_SDIO_STATUS_QUIRK; + /* + * All SDHI have DMA control register + */ + mmc_data->flags |= TMIO_MMC_HAVE_CTL_DMA_REG; + if (of_id && of_id->data) { const struct sh_mobile_sdhi_of_data *of_data = of_id->data; mmc_data->flags |= of_data->tmio_flags; diff --git a/drivers/mmc/host/tmio_mmc_dma.c b/drivers/mmc/host/tmio_mmc_dma.c index bd646b041085..7d077388b9eb 100644 --- a/drivers/mmc/host/tmio_mmc_dma.c +++ b/drivers/mmc/host/tmio_mmc_dma.c @@ -28,10 +28,8 @@ void tmio_mmc_enable_dma(struct tmio_mmc_host *host, bool enable) if (!host->chan_tx || !host->chan_rx) return; -#if defined(CONFIG_SUPERH) || defined(CONFIG_ARCH_SHMOBILE) - /* Switch DMA mode on or off - SuperH specific? */ - sd_ctrl_write16(host, CTL_DMA_ENABLE, enable ? 2 : 0); -#endif + if (host->pdata->flags & TMIO_MMC_HAVE_CTL_DMA_REG) + sd_ctrl_write16(host, CTL_DMA_ENABLE, enable ? 2 : 0); } void tmio_mmc_abort_dma(struct tmio_mmc_host *host) diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h index a7493ae01b58..adcb0cdb2fdb 100644 --- a/include/linux/mfd/tmio.h +++ b/include/linux/mfd/tmio.h @@ -94,6 +94,11 @@ */ #define TMIO_MMC_SDIO_STATUS_QUIRK (1 << 8) +/* + * Some controllers have DMA enable/disable register + */ +#define TMIO_MMC_HAVE_CTL_DMA_REG (1 << 9) + int tmio_core_mmc_enable(void __iomem *cnf, int shift, unsigned long base); int tmio_core_mmc_resume(void __iomem *cnf, int shift, unsigned long base); void tmio_core_mmc_pwr(void __iomem *cnf, int shift, int state); -- cgit v1.2.3 From da29fe2bf573f0ae56fdc2e790387cb73fc8c6f8 Mon Sep 17 00:00:00 2001 From: Shinobu Uehara Date: Sun, 24 Aug 2014 20:03:00 -0700 Subject: mmc: tmio: add actual clock support as option Some controller is supporting actual clock on SD_CLK_CTRL :: DIV[7:0]. Renesas SH-Mobile SDHI doesn't support, but, Renesas R-Car SDHI supports it. This patch adds new TMIO_MMC_CLK_ACTUAL flag for it. [Kuninori Morimoto: tidyuped for upstreaming] Tested-by: Nguyen Xuan Nui Tested-by: Hiep Cao Minh Signed-off-by: Shinobu Uehara Signed-off-by: Kuninori Morimoto Signed-off-by: Ulf Hansson --- drivers/mmc/host/sh_mobile_sdhi.c | 6 ++++-- drivers/mmc/host/tmio_mmc_pio.c | 5 +++++ include/linux/mfd/tmio.h | 5 +++++ 3 files changed, 14 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/host/sh_mobile_sdhi.c b/drivers/mmc/host/sh_mobile_sdhi.c index a077da02bb8e..d5d493719491 100644 --- a/drivers/mmc/host/sh_mobile_sdhi.c +++ b/drivers/mmc/host/sh_mobile_sdhi.c @@ -49,12 +49,14 @@ static const struct sh_mobile_sdhi_of_data sh_mobile_sdhi_of_cfg[] = { }; static const struct sh_mobile_sdhi_of_data of_rcar_gen1_compatible = { - .tmio_flags = TMIO_MMC_HAS_IDLE_WAIT | TMIO_MMC_WRPROTECT_DISABLE, + .tmio_flags = TMIO_MMC_HAS_IDLE_WAIT | TMIO_MMC_WRPROTECT_DISABLE | + TMIO_MMC_CLK_ACTUAL, .capabilities = MMC_CAP_SD_HIGHSPEED | MMC_CAP_SDIO_IRQ, }; static const struct sh_mobile_sdhi_of_data of_rcar_gen2_compatible = { - .tmio_flags = TMIO_MMC_HAS_IDLE_WAIT | TMIO_MMC_WRPROTECT_DISABLE, + .tmio_flags = TMIO_MMC_HAS_IDLE_WAIT | TMIO_MMC_WRPROTECT_DISABLE | + TMIO_MMC_CLK_ACTUAL, .capabilities = MMC_CAP_SD_HIGHSPEED | MMC_CAP_SDIO_IRQ, .capabilities2 = MMC_CAP2_NO_MULTI_READ, .dma_rx_offset = 0x2000, diff --git a/drivers/mmc/host/tmio_mmc_pio.c b/drivers/mmc/host/tmio_mmc_pio.c index 7cfe939992de..ba454131f9a8 100644 --- a/drivers/mmc/host/tmio_mmc_pio.c +++ b/drivers/mmc/host/tmio_mmc_pio.c @@ -159,6 +159,11 @@ static void tmio_mmc_set_clock(struct tmio_mmc_host *host, for (clock = host->mmc->f_min, clk = 0x80000080; new_clock >= (clock<<1); clk >>= 1) clock <<= 1; + + /* 1/1 clock is option */ + if ((host->pdata->flags & TMIO_MMC_CLK_ACTUAL) && + ((clk >> 22) & 0x1)) + clk |= 0xff; } if (host->set_clk_div) diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h index adcb0cdb2fdb..90436d523e5e 100644 --- a/include/linux/mfd/tmio.h +++ b/include/linux/mfd/tmio.h @@ -99,6 +99,11 @@ */ #define TMIO_MMC_HAVE_CTL_DMA_REG (1 << 9) +/* + * Some controllers allows to set SDx actual clock + */ +#define TMIO_MMC_CLK_ACTUAL (1 << 10) + int tmio_core_mmc_enable(void __iomem *cnf, int shift, unsigned long base); int tmio_core_mmc_resume(void __iomem *cnf, int shift, unsigned long base); void tmio_core_mmc_pwr(void __iomem *cnf, int shift, int state); -- cgit v1.2.3 From 51da2240906cb94e8f6ba55e403b6206df6fb2dd Mon Sep 17 00:00:00 2001 From: Yuvaraj CD Date: Fri, 22 Aug 2014 19:17:50 +0530 Subject: mmc: dw_mmc: use mmc_regulator_get_supply to handle regulators This patch makes use of mmc_regulator_get_supply() to handle the vmmc and vqmmc regulators.Also it moves the code handling the these regulators to dw_mci_set_ios().It turned on the vmmc and vqmmc during MMC_POWER_UP and MMC_POWER_ON,and turned off during MMC_POWER_OFF. Signed-off-by: Yuvaraj Kumar C D Signed-off-by: Ulf Hansson --- drivers/mmc/host/dw_mmc.c | 72 ++++++++++++++++++++++------------------------ include/linux/mmc/dw_mmc.h | 2 +- 2 files changed, 36 insertions(+), 38 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c index 7f227e964265..aadb0d6aa63f 100644 --- a/drivers/mmc/host/dw_mmc.c +++ b/drivers/mmc/host/dw_mmc.c @@ -936,6 +936,7 @@ static void dw_mci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) struct dw_mci_slot *slot = mmc_priv(mmc); const struct dw_mci_drv_data *drv_data = slot->host->drv_data; u32 regs; + int ret; switch (ios->bus_width) { case MMC_BUS_WIDTH_4: @@ -974,12 +975,38 @@ static void dw_mci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) switch (ios->power_mode) { case MMC_POWER_UP: + if (!IS_ERR(mmc->supply.vmmc)) { + ret = mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, + ios->vdd); + if (ret) { + dev_err(slot->host->dev, + "failed to enable vmmc regulator\n"); + /*return, if failed turn on vmmc*/ + return; + } + } + if (!IS_ERR(mmc->supply.vqmmc) && !slot->host->vqmmc_enabled) { + ret = regulator_enable(mmc->supply.vqmmc); + if (ret < 0) + dev_err(slot->host->dev, + "failed to enable vqmmc regulator\n"); + else + slot->host->vqmmc_enabled = true; + } set_bit(DW_MMC_CARD_NEED_INIT, &slot->flags); regs = mci_readl(slot->host, PWREN); regs |= (1 << slot->id); mci_writel(slot->host, PWREN, regs); break; case MMC_POWER_OFF: + if (!IS_ERR(mmc->supply.vmmc)) + mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, 0); + + if (!IS_ERR(mmc->supply.vqmmc) && slot->host->vqmmc_enabled) { + regulator_disable(mmc->supply.vqmmc); + slot->host->vqmmc_enabled = false; + } + regs = mci_readl(slot->host, PWREN); regs &= ~(1 << slot->id); mci_writel(slot->host, PWREN, regs); @@ -2110,7 +2137,13 @@ static int dw_mci_init_slot(struct dw_mci *host, unsigned int id) mmc->f_max = freq[1]; } - mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34; + /*if there are external regulators, get them*/ + ret = mmc_regulator_get_supply(mmc); + if (ret == -EPROBE_DEFER) + goto err_setup_bus; + + if (!mmc->ocr_avail) + mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34; if (host->pdata->caps) mmc->caps = host->pdata->caps; @@ -2176,7 +2209,7 @@ static int dw_mci_init_slot(struct dw_mci *host, unsigned int id) err_setup_bus: mmc_free_host(mmc); - return -EINVAL; + return ret; } static void dw_mci_cleanup_slot(struct dw_mci_slot *slot, unsigned int id) @@ -2469,24 +2502,6 @@ int dw_mci_probe(struct dw_mci *host) } } - host->vmmc = devm_regulator_get_optional(host->dev, "vmmc"); - if (IS_ERR(host->vmmc)) { - ret = PTR_ERR(host->vmmc); - if (ret == -EPROBE_DEFER) - goto err_clk_ciu; - - dev_info(host->dev, "no vmmc regulator found: %d\n", ret); - host->vmmc = NULL; - } else { - ret = regulator_enable(host->vmmc); - if (ret) { - if (ret != -EPROBE_DEFER) - dev_err(host->dev, - "regulator_enable fail: %d\n", ret); - goto err_clk_ciu; - } - } - host->quirks = host->pdata->quirks; spin_lock_init(&host->lock); @@ -2630,8 +2645,6 @@ err_workqueue: err_dmaunmap: if (host->use_dma && host->dma_ops->exit) host->dma_ops->exit(host); - if (host->vmmc) - regulator_disable(host->vmmc); err_clk_ciu: if (!IS_ERR(host->ciu_clk)) @@ -2667,9 +2680,6 @@ void dw_mci_remove(struct dw_mci *host) if (host->use_dma && host->dma_ops->exit) host->dma_ops->exit(host); - if (host->vmmc) - regulator_disable(host->vmmc); - if (!IS_ERR(host->ciu_clk)) clk_disable_unprepare(host->ciu_clk); @@ -2686,9 +2696,6 @@ EXPORT_SYMBOL(dw_mci_remove); */ int dw_mci_suspend(struct dw_mci *host) { - if (host->vmmc) - regulator_disable(host->vmmc); - return 0; } EXPORT_SYMBOL(dw_mci_suspend); @@ -2697,15 +2704,6 @@ int dw_mci_resume(struct dw_mci *host) { int i, ret; - if (host->vmmc) { - ret = regulator_enable(host->vmmc); - if (ret) { - dev_err(host->dev, - "failed to enable regulator: %d\n", ret); - return ret; - } - } - if (!dw_mci_ctrl_reset(host, SDMMC_CTRL_ALL_RESET_FLAGS)) { ret = -ENODEV; return ret; diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h index 29ce014ab421..84e2827d0f0b 100644 --- a/include/linux/mmc/dw_mmc.h +++ b/include/linux/mmc/dw_mmc.h @@ -188,7 +188,7 @@ struct dw_mci { /* Workaround flags */ u32 quirks; - struct regulator *vmmc; /* Power regulator */ + bool vqmmc_enabled; unsigned long irq_flags; /* IRQ flags */ int irq; }; -- cgit v1.2.3 From 0173055842cd1d9ed3984e70891c22dbf2f29372 Mon Sep 17 00:00:00 2001 From: Doug Anderson Date: Fri, 22 Aug 2014 19:17:51 +0530 Subject: mmc: dw_mmc: Support voltage changes For UHS cards we need the ability to switch voltages from 3.3V to 1.8V. Add support to the dw_mmc driver to handle this. Note that dw_mmc needs a little bit of extra code since the interface needs a special bit programmed to the CMD register while CMD11 is progressing. This means adding a few extra states to the state machine to track. Signed-off-by: Doug Anderson Signed-off-by: Yuvaraj Kumar C D Signed-off-by: Ulf Hansson --- drivers/mmc/host/dw_mmc.c | 137 ++++++++++++++++++++++++++++++++++++++++++--- drivers/mmc/host/dw_mmc.h | 5 +- include/linux/mmc/dw_mmc.h | 2 + 3 files changed, 134 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c index aadb0d6aa63f..23719249182b 100644 --- a/drivers/mmc/host/dw_mmc.c +++ b/drivers/mmc/host/dw_mmc.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -234,10 +235,13 @@ err: } #endif /* defined(CONFIG_DEBUG_FS) */ +static void mci_send_cmd(struct dw_mci_slot *slot, u32 cmd, u32 arg); + static u32 dw_mci_prepare_command(struct mmc_host *mmc, struct mmc_command *cmd) { struct mmc_data *data; struct dw_mci_slot *slot = mmc_priv(mmc); + struct dw_mci *host = slot->host; const struct dw_mci_drv_data *drv_data = slot->host->drv_data; u32 cmdr; cmd->error = -EINPROGRESS; @@ -253,6 +257,34 @@ static u32 dw_mci_prepare_command(struct mmc_host *mmc, struct mmc_command *cmd) else if (cmd->opcode != MMC_SEND_STATUS && cmd->data) cmdr |= SDMMC_CMD_PRV_DAT_WAIT; + if (cmd->opcode == SD_SWITCH_VOLTAGE) { + u32 clk_en_a; + + /* Special bit makes CMD11 not die */ + cmdr |= SDMMC_CMD_VOLT_SWITCH; + + /* Change state to continue to handle CMD11 weirdness */ + WARN_ON(slot->host->state != STATE_SENDING_CMD); + slot->host->state = STATE_SENDING_CMD11; + + /* + * We need to disable low power mode (automatic clock stop) + * while doing voltage switch so we don't confuse the card, + * since stopping the clock is a specific part of the UHS + * voltage change dance. + * + * Note that low power mode (SDMMC_CLKEN_LOW_PWR) will be + * unconditionally turned back on in dw_mci_setup_bus() if it's + * ever called with a non-zero clock. That shouldn't happen + * until the voltage change is all done. + */ + clk_en_a = mci_readl(host, CLKENA); + clk_en_a &= ~(SDMMC_CLKEN_LOW_PWR << slot->id); + mci_writel(host, CLKENA, clk_en_a); + mci_send_cmd(slot, SDMMC_CMD_UPD_CLK | + SDMMC_CMD_PRV_DAT_WAIT, 0); + } + if (cmd->flags & MMC_RSP_PRESENT) { /* We expect a response, so set this bit */ cmdr |= SDMMC_CMD_RESP_EXP; @@ -775,11 +807,15 @@ static void dw_mci_setup_bus(struct dw_mci_slot *slot, bool force_clkinit) unsigned int clock = slot->clock; u32 div; u32 clk_en_a; + u32 sdmmc_cmd_bits = SDMMC_CMD_UPD_CLK | SDMMC_CMD_PRV_DAT_WAIT; + + /* We must continue to set bit 28 in CMD until the change is complete */ + if (host->state == STATE_WAITING_CMD11_DONE) + sdmmc_cmd_bits |= SDMMC_CMD_VOLT_SWITCH; if (!clock) { mci_writel(host, CLKENA, 0); - mci_send_cmd(slot, - SDMMC_CMD_UPD_CLK | SDMMC_CMD_PRV_DAT_WAIT, 0); + mci_send_cmd(slot, sdmmc_cmd_bits, 0); } else if (clock != host->current_speed || force_clkinit) { div = host->bus_hz / clock; if (host->bus_hz % clock && host->bus_hz > clock) @@ -803,15 +839,13 @@ static void dw_mci_setup_bus(struct dw_mci_slot *slot, bool force_clkinit) mci_writel(host, CLKSRC, 0); /* inform CIU */ - mci_send_cmd(slot, - SDMMC_CMD_UPD_CLK | SDMMC_CMD_PRV_DAT_WAIT, 0); + mci_send_cmd(slot, sdmmc_cmd_bits, 0); /* set clock to desired speed */ mci_writel(host, CLKDIV, div); /* inform CIU */ - mci_send_cmd(slot, - SDMMC_CMD_UPD_CLK | SDMMC_CMD_PRV_DAT_WAIT, 0); + mci_send_cmd(slot, sdmmc_cmd_bits, 0); /* enable clock; only low power if no SDIO */ clk_en_a = SDMMC_CLKEN_ENABLE << slot->id; @@ -820,8 +854,7 @@ static void dw_mci_setup_bus(struct dw_mci_slot *slot, bool force_clkinit) mci_writel(host, CLKENA, clk_en_a); /* inform CIU */ - mci_send_cmd(slot, - SDMMC_CMD_UPD_CLK | SDMMC_CMD_PRV_DAT_WAIT, 0); + mci_send_cmd(slot, sdmmc_cmd_bits, 0); /* keep the clock with reflecting clock dividor */ slot->__clk_old = clock << div; @@ -897,6 +930,17 @@ static void dw_mci_queue_request(struct dw_mci *host, struct dw_mci_slot *slot, slot->mrq = mrq; + if (host->state == STATE_WAITING_CMD11_DONE) { + dev_warn(&slot->mmc->class_dev, + "Voltage change didn't complete\n"); + /* + * this case isn't expected to happen, so we can + * either crash here or just try to continue on + * in the closest possible state + */ + host->state = STATE_IDLE; + } + if (host->state == STATE_IDLE) { host->state = STATE_SENDING_CMD; dw_mci_start_request(host, slot); @@ -973,6 +1017,9 @@ static void dw_mci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) /* Slot specific timing and width adjustment */ dw_mci_setup_bus(slot, false); + if (slot->host->state == STATE_WAITING_CMD11_DONE && ios->clock != 0) + slot->host->state = STATE_IDLE; + switch (ios->power_mode) { case MMC_POWER_UP: if (!IS_ERR(mmc->supply.vmmc)) { @@ -1016,6 +1063,59 @@ static void dw_mci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) } } +static int dw_mci_card_busy(struct mmc_host *mmc) +{ + struct dw_mci_slot *slot = mmc_priv(mmc); + u32 status; + + /* + * Check the busy bit which is low when DAT[3:0] + * (the data lines) are 0000 + */ + status = mci_readl(slot->host, STATUS); + + return !!(status & SDMMC_STATUS_BUSY); +} + +static int dw_mci_switch_voltage(struct mmc_host *mmc, struct mmc_ios *ios) +{ + struct dw_mci_slot *slot = mmc_priv(mmc); + struct dw_mci *host = slot->host; + u32 uhs; + u32 v18 = SDMMC_UHS_18V << slot->id; + int min_uv, max_uv; + int ret; + + /* + * Program the voltage. Note that some instances of dw_mmc may use + * the UHS_REG for this. For other instances (like exynos) the UHS_REG + * does no harm but you need to set the regulator directly. Try both. + */ + uhs = mci_readl(host, UHS_REG); + if (ios->signal_voltage == MMC_SIGNAL_VOLTAGE_330) { + min_uv = 2700000; + max_uv = 3600000; + uhs &= ~v18; + } else { + min_uv = 1700000; + max_uv = 1950000; + uhs |= v18; + } + if (!IS_ERR(mmc->supply.vqmmc)) { + ret = regulator_set_voltage(mmc->supply.vqmmc, min_uv, max_uv); + + if (ret) { + dev_err(&mmc->class_dev, + "Regulator set error %d: %d - %d\n", + ret, min_uv, max_uv); + return ret; + } + } + mci_writel(host, UHS_REG, uhs); + + return 0; +} + static int dw_mci_get_ro(struct mmc_host *mmc) { int read_only; @@ -1158,6 +1258,9 @@ static const struct mmc_host_ops dw_mci_ops = { .get_cd = dw_mci_get_cd, .enable_sdio_irq = dw_mci_enable_sdio_irq, .execute_tuning = dw_mci_execute_tuning, + .card_busy = dw_mci_card_busy, + .start_signal_voltage_switch = dw_mci_switch_voltage, + }; static void dw_mci_request_end(struct dw_mci *host, struct mmc_request *mrq) @@ -1181,7 +1284,11 @@ static void dw_mci_request_end(struct dw_mci *host, struct mmc_request *mrq) dw_mci_start_request(host, slot); } else { dev_vdbg(host->dev, "list empty\n"); - host->state = STATE_IDLE; + + if (host->state == STATE_SENDING_CMD11) + host->state = STATE_WAITING_CMD11_DONE; + else + host->state = STATE_IDLE; } spin_unlock(&host->lock); @@ -1292,8 +1399,10 @@ static void dw_mci_tasklet_func(unsigned long priv) switch (state) { case STATE_IDLE: + case STATE_WAITING_CMD11_DONE: break; + case STATE_SENDING_CMD11: case STATE_SENDING_CMD: if (!test_and_clear_bit(EVENT_CMD_COMPLETE, &host->pending_events)) @@ -1894,6 +2003,14 @@ static irqreturn_t dw_mci_interrupt(int irq, void *dev_id) } if (pending) { + /* Check volt switch first, since it can look like an error */ + if ((host->state == STATE_SENDING_CMD11) && + (pending & SDMMC_INT_VOLT_SWITCH)) { + mci_writel(host, RINTSTS, SDMMC_INT_VOLT_SWITCH); + pending &= ~SDMMC_INT_VOLT_SWITCH; + dw_mci_cmd_interrupt(host, pending); + } + if (pending & DW_MCI_CMD_ERROR_FLAGS) { mci_writel(host, RINTSTS, DW_MCI_CMD_ERROR_FLAGS); host->cmd_status = pending; @@ -1999,7 +2116,9 @@ static void dw_mci_work_routine_card(struct work_struct *work) switch (host->state) { case STATE_IDLE: + case STATE_WAITING_CMD11_DONE: break; + case STATE_SENDING_CMD11: case STATE_SENDING_CMD: mrq->cmd->error = -ENOMEDIUM; if (!mrq->data) diff --git a/drivers/mmc/host/dw_mmc.h b/drivers/mmc/host/dw_mmc.h index 08fd956d81f3..01b99e8a9190 100644 --- a/drivers/mmc/host/dw_mmc.h +++ b/drivers/mmc/host/dw_mmc.h @@ -99,6 +99,7 @@ #define SDMMC_INT_HLE BIT(12) #define SDMMC_INT_FRUN BIT(11) #define SDMMC_INT_HTO BIT(10) +#define SDMMC_INT_VOLT_SWITCH BIT(10) /* overloads bit 10! */ #define SDMMC_INT_DRTO BIT(9) #define SDMMC_INT_RTO BIT(8) #define SDMMC_INT_DCRC BIT(7) @@ -113,6 +114,7 @@ /* Command register defines */ #define SDMMC_CMD_START BIT(31) #define SDMMC_CMD_USE_HOLD_REG BIT(29) +#define SDMMC_CMD_VOLT_SWITCH BIT(28) #define SDMMC_CMD_CCS_EXP BIT(23) #define SDMMC_CMD_CEATA_RD BIT(22) #define SDMMC_CMD_UPD_CLK BIT(21) @@ -130,6 +132,7 @@ /* Status register defines */ #define SDMMC_GET_FCNT(x) (((x)>>17) & 0x1FFF) #define SDMMC_STATUS_DMA_REQ BIT(31) +#define SDMMC_STATUS_BUSY BIT(9) /* FIFOTH register defines */ #define SDMMC_SET_FIFOTH(m, r, t) (((m) & 0x7) << 28 | \ ((r) & 0xFFF) << 16 | \ @@ -150,7 +153,7 @@ #define SDMMC_GET_VERID(x) ((x) & 0xFFFF) /* Card read threshold */ #define SDMMC_SET_RD_THLD(v, x) (((v) & 0x1FFF) << 16 | (x)) - +#define SDMMC_UHS_18V BIT(0) /* All ctrl reset bits */ #define SDMMC_CTRL_ALL_RESET_FLAGS \ (SDMMC_CTRL_RESET | SDMMC_CTRL_FIFO_RESET | SDMMC_CTRL_DMA_RESET) diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h index 84e2827d0f0b..001366927cf4 100644 --- a/include/linux/mmc/dw_mmc.h +++ b/include/linux/mmc/dw_mmc.h @@ -26,6 +26,8 @@ enum dw_mci_state { STATE_DATA_BUSY, STATE_SENDING_STOP, STATE_DATA_ERROR, + STATE_SENDING_CMD11, + STATE_WAITING_CMD11_DONE, }; enum { -- cgit v1.2.3 From e99783a45220a2c5f5a598e0e81213ecf2dbcf2f Mon Sep 17 00:00:00 2001 From: Chanho Min Date: Sat, 30 Aug 2014 12:40:40 +0900 Subject: mmc: sdhci: handle busy-end interrupt during command It is fully legal for a controller to start handling busy-end interrupt before it has signaled that the command has completed. So make sure we do things in the proper order, Or it results that command interrupt is ignored so it can cause unexpected operations. This is founded at some toshiba emmc with the bellow warning. "mmc0: Got command interrupt 0x00000001 even though no command operation was in progress." This issue has been also reported by Youssef TRIKI: It is not specific to Toshiba devices, and happens with eMMC devices as well as SD card which support Auto-CMD12 rather than CMD23. Also, similar patch is submitted by: Gwendal Grignou Changes since v1: Fixed conflict with the next of git.linaro.org/people/ulf.hansson/mmc.git and Tested if issue is fixed again. Signed-off-by: Hankyung Yu Signed-off-by: Chanho Min Tested-by: Youssef TRIKI Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci.c | 17 +++++++++++++++-- include/linux/mmc/sdhci.h | 1 + 2 files changed, 16 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index b5e22b8c4885..335cdf3b5224 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -1016,6 +1016,7 @@ void sdhci_send_command(struct sdhci_host *host, struct mmc_command *cmd) mod_timer(&host->timer, timeout); host->cmd = cmd; + host->busy_handle = 0; sdhci_prepare_data(host, cmd); @@ -2261,8 +2262,12 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask) if (host->cmd->data) DBG("Cannot wait for busy signal when also " "doing a data transfer"); - else if (!(host->quirks & SDHCI_QUIRK_NO_BUSY_IRQ)) + else if (!(host->quirks & SDHCI_QUIRK_NO_BUSY_IRQ) + && !host->busy_handle) { + /* Mark that command complete before busy is ended */ + host->busy_handle = 1; return; + } /* The controller does not support the end-of-busy IRQ, * fall through and take the SDHCI_INT_RESPONSE */ @@ -2330,7 +2335,15 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask) return; } if (intmask & SDHCI_INT_DATA_END) { - sdhci_finish_command(host); + /* + * Some cards handle busy-end interrupt + * before the command completed, so make + * sure we do things in the proper order. + */ + if (host->busy_handle) + sdhci_finish_command(host); + else + host->busy_handle = 1; return; } } diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h index 09ebe57d5ce9..0aa85ca0c788 100644 --- a/include/linux/mmc/sdhci.h +++ b/include/linux/mmc/sdhci.h @@ -146,6 +146,7 @@ struct sdhci_host { struct mmc_command *cmd; /* Current command */ struct mmc_data *data; /* Current data request */ unsigned int data_early:1; /* Data finished before cmd */ + unsigned int busy_handle:1; /* Handling the order of Busy-end */ struct sg_mapping_iter sg_miter; /* SG state for PIO */ unsigned int blocks; /* remaining PIO blocks */ -- cgit v1.2.3 From 2e47e84245adcb1b3872210678b6146f674fb3ff Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 2 Sep 2014 19:08:53 -0700 Subject: mmc: Add .multi_io_quirk callback for multi I/O HW bug Historically, we have been using MMC_CAP* to handle host HW issues and currently the block layer uses MMC_CAP2_NO_MULTI_READ flag for a multi I/O HW bug workaround. There are a few tweaks needed to make MMC_CAP2_NO_MULTI_READ suite all situations. Therefore let's add an optional host ops callback to enable host drivers to return the number of blocks it allows per request. In a future patch and when host drivers have converted to the new callback, MMC_CAP2_NO_MULTI_READ shall be removed. Signed-off-by: Kuninori Morimoto Signed-off-by: Ulf Hansson --- drivers/mmc/card/block.c | 10 ++++++++++ include/linux/mmc/host.h | 7 +++++++ 2 files changed, 17 insertions(+) (limited to 'include/linux') diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index ede41f05c392..adab9038f6f7 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -1402,6 +1402,16 @@ static void mmc_blk_rw_rq_prep(struct mmc_queue_req *mqrq, if (card->host->caps2 & MMC_CAP2_NO_MULTI_READ && rq_data_dir(req) == READ) brq->data.blocks = 1; + + /* + * Some controllers have HW issues while operating + * in multiple I/O mode + */ + if (card->host->ops->multi_io_quirk) + brq->data.blocks = card->host->ops->multi_io_quirk(card, + (rq_data_dir(req) == READ) ? + MMC_DATA_READ : MMC_DATA_WRITE, + brq->data.blocks); } if (brq->data.blocks > 1 || do_rel_wr) { diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 4cbf61476999..10e2bd6985ae 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -139,6 +139,13 @@ struct mmc_host_ops { int (*select_drive_strength)(unsigned int max_dtr, int host_drv, int card_drv); void (*hw_reset)(struct mmc_host *host); void (*card_event)(struct mmc_host *host); + + /* + * Optional callback to support controllers with HW issues for multiple + * I/O. Returns the number of supported blocks for the request. + */ + int (*multi_io_quirk)(struct mmc_card *card, + unsigned int direction, int blk_size); }; struct mmc_card; -- cgit v1.2.3 From bbf0208d39121bd8873b032459cb2b5f35e14593 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Mon, 8 Sep 2014 23:45:25 -0700 Subject: mmc: use .multi_io_quirk on tmio_mmc Now, tmio_mmc can use .multi_io_quirk callback instead of MMC_CAP2_NO_MULTI_READ flags. let's use it. Signed-off-by: Kuninori Morimoto Acked-by: Lee Jones Signed-off-by: Ulf Hansson --- drivers/mmc/host/tmio_mmc_pio.c | 13 +++++++++++++ include/linux/mfd/tmio.h | 3 +++ 2 files changed, 16 insertions(+) (limited to 'include/linux') diff --git a/drivers/mmc/host/tmio_mmc_pio.c b/drivers/mmc/host/tmio_mmc_pio.c index ba454131f9a8..ff5ff0f725c9 100644 --- a/drivers/mmc/host/tmio_mmc_pio.c +++ b/drivers/mmc/host/tmio_mmc_pio.c @@ -970,12 +970,25 @@ static int tmio_mmc_get_ro(struct mmc_host *mmc) return ret; } +static int tmio_multi_io_quirk(struct mmc_card *card, + unsigned int direction, int blk_size) +{ + struct tmio_mmc_host *host = mmc_priv(card->host); + struct tmio_mmc_data *pdata = host->pdata; + + if (pdata->multi_io_quirk) + return pdata->multi_io_quirk(card, direction, blk_size); + + return blk_size; +} + static const struct mmc_host_ops tmio_mmc_ops = { .request = tmio_mmc_request, .set_ios = tmio_mmc_set_ios, .get_ro = tmio_mmc_get_ro, .get_cd = mmc_gpio_get_cd, .enable_sdio_irq = tmio_mmc_enable_sdio_irq, + .multi_io_quirk = tmio_multi_io_quirk, }; static int tmio_mmc_init_ocr(struct tmio_mmc_host *host) diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h index 90436d523e5e..57388171610d 100644 --- a/include/linux/mfd/tmio.h +++ b/include/linux/mfd/tmio.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -142,6 +143,8 @@ struct tmio_mmc_data { /* clock management callbacks */ int (*clk_enable)(struct platform_device *pdev, unsigned int *f); void (*clk_disable)(struct platform_device *pdev); + int (*multi_io_quirk)(struct mmc_card *card, + unsigned int direction, int blk_size); }; /* -- cgit v1.2.3 From 0abb71feb228ddbd17e0dfa13216541e036bb549 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Mon, 8 Sep 2014 23:46:49 -0700 Subject: mmc: remove MMC_CAP2_NO_MULTI_READ flags Now, mmc framework uses multi_io_quirk for I/O HW bug workaround. MMC_CAP2_NO_MULTI_READ flag is no longer needed Signed-off-by: Kuninori Morimoto Signed-off-by: Ulf Hansson --- drivers/mmc/card/block.c | 5 ----- include/linux/mmc/host.h | 1 - 2 files changed, 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index adab9038f6f7..413f984cb76d 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -1398,11 +1398,6 @@ static void mmc_blk_rw_rq_prep(struct mmc_queue_req *mqrq, if (disable_multi) brq->data.blocks = 1; - /* Some controllers can't do multiblock reads due to hw bugs */ - if (card->host->caps2 & MMC_CAP2_NO_MULTI_READ && - rq_data_dir(req) == READ) - brq->data.blocks = 1; - /* * Some controllers have HW issues while operating * in multiple I/O mode diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 10e2bd6985ae..797ae657dc3d 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -272,7 +272,6 @@ struct mmc_host { #define MMC_CAP2_BOOTPART_NOACC (1 << 0) /* Boot partition no access */ #define MMC_CAP2_FULL_PWR_CYCLE (1 << 2) /* Can do full power cycle */ -#define MMC_CAP2_NO_MULTI_READ (1 << 3) /* Multiblock reads don't work */ #define MMC_CAP2_HS200_1_8V_SDR (1 << 5) /* can support */ #define MMC_CAP2_HS200_1_2V_SDR (1 << 6) /* can support */ #define MMC_CAP2_HS200 (MMC_CAP2_HS200_1_8V_SDR | \ -- cgit v1.2.3 From 9d2fa2428ae149ba3a5b7a4ceb0a9e11f1882b3b Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 27 Aug 2014 13:00:51 +0200 Subject: mmc: slot-gpio: add gpiod variant to get wp GPIO This makes it possible to get the write protect (read only) GPIO line from a GPIO descriptor. Written to exactly mirror the card detect function. Acked-by: Alexandre Courbot Signed-off-by: Linus Walleij Signed-off-by: Ulf Hansson --- drivers/mmc/core/slot-gpio.c | 48 +++++++++++++++++++++++++++++++++++++++++++ include/linux/mmc/slot-gpio.h | 3 +++ 2 files changed, 51 insertions(+) (limited to 'include/linux') diff --git a/drivers/mmc/core/slot-gpio.c b/drivers/mmc/core/slot-gpio.c index 908c2b29e79f..e3fce4493fab 100644 --- a/drivers/mmc/core/slot-gpio.c +++ b/drivers/mmc/core/slot-gpio.c @@ -325,6 +325,54 @@ int mmc_gpiod_request_cd(struct mmc_host *host, const char *con_id, } EXPORT_SYMBOL(mmc_gpiod_request_cd); +/** + * mmc_gpiod_request_ro - request a gpio descriptor for write protection + * @host: mmc host + * @con_id: function within the GPIO consumer + * @idx: index of the GPIO to obtain in the consumer + * @override_active_level: ignore %GPIO_ACTIVE_LOW flag + * @debounce: debounce time in microseconds + * + * Use this function in place of mmc_gpio_request_ro() to use the GPIO + * descriptor API. Note that it is paired with mmc_gpiod_free_ro() not + * mmc_gpio_free_ro(). + * + * Returns zero on success, else an error. + */ +int mmc_gpiod_request_ro(struct mmc_host *host, const char *con_id, + unsigned int idx, bool override_active_level, + unsigned int debounce) +{ + struct mmc_gpio *ctx; + struct gpio_desc *desc; + int ret; + + ret = mmc_gpio_alloc(host); + if (ret < 0) + return ret; + + ctx = host->slot.handler_priv; + + if (!con_id) + con_id = ctx->ro_label; + + desc = devm_gpiod_get_index(host->parent, con_id, idx, GPIOD_IN); + if (IS_ERR(desc)) + return PTR_ERR(desc); + + if (debounce) { + ret = gpiod_set_debounce(desc, debounce); + if (ret < 0) + return ret; + } + + ctx->override_ro_active_level = override_active_level; + ctx->ro_gpio = desc; + + return 0; +} +EXPORT_SYMBOL(mmc_gpiod_request_ro); + /** * mmc_gpiod_free_cd - free the card-detection gpio descriptor * @host: mmc host diff --git a/include/linux/mmc/slot-gpio.h b/include/linux/mmc/slot-gpio.h index d2433381e828..a0d0442c15bf 100644 --- a/include/linux/mmc/slot-gpio.h +++ b/include/linux/mmc/slot-gpio.h @@ -25,6 +25,9 @@ void mmc_gpio_free_cd(struct mmc_host *host); int mmc_gpiod_request_cd(struct mmc_host *host, const char *con_id, unsigned int idx, bool override_active_level, unsigned int debounce); +int mmc_gpiod_request_ro(struct mmc_host *host, const char *con_id, + unsigned int idx, bool override_active_level, + unsigned int debounce); void mmc_gpiod_free_cd(struct mmc_host *host); void mmc_gpiod_request_cd_irq(struct mmc_host *host); -- cgit v1.2.3 From 3034a146820c26fe6da66a45f6340fe87fe0983a Mon Sep 17 00:00:00 2001 From: Dmitry Kasatkin Date: Fri, 27 Jun 2014 18:15:44 +0300 Subject: ima: pass 'opened' flag to identify newly created files Empty files and missing xattrs do not guarantee that a file was just created. This patch passes FILE_CREATED flag to IMA to reliably identify new files. Signed-off-by: Dmitry Kasatkin Signed-off-by: Mimi Zohar Cc: 3.14+ --- fs/namei.c | 2 +- fs/nfsd/vfs.c | 2 +- include/linux/ima.h | 4 ++-- security/integrity/ima/ima.h | 4 ++-- security/integrity/ima/ima_appraise.c | 4 ++-- security/integrity/ima/ima_main.c | 16 ++++++++-------- 6 files changed, 16 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/fs/namei.c b/fs/namei.c index 985c6f368485..005771f97189 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3058,7 +3058,7 @@ opened: error = open_check_o_direct(file); if (error) goto exit_fput; - error = ima_file_check(file, op->acc_mode); + error = ima_file_check(file, op->acc_mode, *opened); if (error) goto exit_fput; diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 140c496f612c..d49c778faecb 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -709,7 +709,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, host_err = PTR_ERR(*filp); *filp = NULL; } else { - host_err = ima_file_check(*filp, may_flags); + host_err = ima_file_check(*filp, may_flags, 0); if (may_flags & NFSD_MAY_64BIT_COOKIE) (*filp)->f_mode |= FMODE_64BITHASH; diff --git a/include/linux/ima.h b/include/linux/ima.h index 7cf5e9b32550..120ccc53fcb7 100644 --- a/include/linux/ima.h +++ b/include/linux/ima.h @@ -15,7 +15,7 @@ struct linux_binprm; #ifdef CONFIG_IMA extern int ima_bprm_check(struct linux_binprm *bprm); -extern int ima_file_check(struct file *file, int mask); +extern int ima_file_check(struct file *file, int mask, int opened); extern void ima_file_free(struct file *file); extern int ima_file_mmap(struct file *file, unsigned long prot); extern int ima_module_check(struct file *file); @@ -27,7 +27,7 @@ static inline int ima_bprm_check(struct linux_binprm *bprm) return 0; } -static inline int ima_file_check(struct file *file, int mask) +static inline int ima_file_check(struct file *file, int mask, int opened) { return 0; } diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h index 57da4bd7ba0c..0fb456c20eda 100644 --- a/security/integrity/ima/ima.h +++ b/security/integrity/ima/ima.h @@ -177,7 +177,7 @@ void ima_delete_rules(void); int ima_appraise_measurement(int func, struct integrity_iint_cache *iint, struct file *file, const unsigned char *filename, struct evm_ima_xattr_data *xattr_value, - int xattr_len); + int xattr_len, int opened); int ima_must_appraise(struct inode *inode, int mask, enum ima_hooks func); void ima_update_xattr(struct integrity_iint_cache *iint, struct file *file); enum integrity_status ima_get_cache_status(struct integrity_iint_cache *iint, @@ -193,7 +193,7 @@ static inline int ima_appraise_measurement(int func, struct file *file, const unsigned char *filename, struct evm_ima_xattr_data *xattr_value, - int xattr_len) + int xattr_len, int opened) { return INTEGRITY_UNKNOWN; } diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c index a4605d677248..225fd944a4ef 100644 --- a/security/integrity/ima/ima_appraise.c +++ b/security/integrity/ima/ima_appraise.c @@ -183,7 +183,7 @@ int ima_read_xattr(struct dentry *dentry, int ima_appraise_measurement(int func, struct integrity_iint_cache *iint, struct file *file, const unsigned char *filename, struct evm_ima_xattr_data *xattr_value, - int xattr_len) + int xattr_len, int opened) { static const char op[] = "appraise_data"; char *cause = "unknown"; @@ -203,7 +203,7 @@ int ima_appraise_measurement(int func, struct integrity_iint_cache *iint, cause = "missing-hash"; status = INTEGRITY_NOLABEL; - if (inode->i_size == 0) { + if (opened & FILE_CREATED) { iint->flags |= IMA_NEW_FILE; status = INTEGRITY_PASS; } diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index 0a2298f90c9c..f82cf9b8e92b 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -157,7 +157,7 @@ void ima_file_free(struct file *file) } static int process_measurement(struct file *file, const char *filename, - int mask, int function) + int mask, int function, int opened) { struct inode *inode = file_inode(file); struct integrity_iint_cache *iint; @@ -226,7 +226,7 @@ static int process_measurement(struct file *file, const char *filename, xattr_value, xattr_len); if (action & IMA_APPRAISE_SUBMASK) rc = ima_appraise_measurement(_func, iint, file, pathname, - xattr_value, xattr_len); + xattr_value, xattr_len, opened); if (action & IMA_AUDIT) ima_audit_measurement(iint, pathname); kfree(pathbuf); @@ -255,7 +255,7 @@ out: int ima_file_mmap(struct file *file, unsigned long prot) { if (file && (prot & PROT_EXEC)) - return process_measurement(file, NULL, MAY_EXEC, MMAP_CHECK); + return process_measurement(file, NULL, MAY_EXEC, MMAP_CHECK, 0); return 0; } @@ -277,7 +277,7 @@ int ima_bprm_check(struct linux_binprm *bprm) return process_measurement(bprm->file, (strcmp(bprm->filename, bprm->interp) == 0) ? bprm->filename : bprm->interp, - MAY_EXEC, BPRM_CHECK); + MAY_EXEC, BPRM_CHECK, 0); } /** @@ -290,12 +290,12 @@ int ima_bprm_check(struct linux_binprm *bprm) * On success return 0. On integrity appraisal error, assuming the file * is in policy and IMA-appraisal is in enforcing mode, return -EACCES. */ -int ima_file_check(struct file *file, int mask) +int ima_file_check(struct file *file, int mask, int opened) { ima_rdwr_violation_check(file); return process_measurement(file, NULL, mask & (MAY_READ | MAY_WRITE | MAY_EXEC), - FILE_CHECK); + FILE_CHECK, opened); } EXPORT_SYMBOL_GPL(ima_file_check); @@ -318,7 +318,7 @@ int ima_module_check(struct file *file) #endif return 0; /* We rely on module signature checking */ } - return process_measurement(file, NULL, MAY_EXEC, MODULE_CHECK); + return process_measurement(file, NULL, MAY_EXEC, MODULE_CHECK, 0); } int ima_fw_from_file(struct file *file, char *buf, size_t size) @@ -329,7 +329,7 @@ int ima_fw_from_file(struct file *file, char *buf, size_t size) return -EACCES; /* INTEGRITY_UNKNOWN */ return 0; } - return process_measurement(file, NULL, MAY_EXEC, FIRMWARE_CHECK); + return process_measurement(file, NULL, MAY_EXEC, FIRMWARE_CHECK, 0); } static int __init init_ima(void) -- cgit v1.2.3 From ef979a26e3d521d51dbd9950e46a69e303073171 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Tue, 9 Sep 2014 08:56:48 +0800 Subject: usb: gadget: add reset API at usb_gadget_driver Adding reset API for UDC bus reset handler is useful for below two issues. Current disconnect API at usb_gadget_driver is also invoked at udc's bus reset handler, but the document says it is invoked when the host is disconnected. Besides, we may expect the gadget_driver to do different things for host sends bus reset and host disconnects gadget, eg, we may not want to flush dirty page for mass storage at bus reset, and want to do it at disconnection. Acked-by: Alan Stern Signed-off-by: Peter Chen Signed-off-by: Felipe Balbi --- include/linux/usb/gadget.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index c540557b564b..598a6e9b2850 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -817,6 +817,8 @@ static inline int usb_gadget_disconnect(struct usb_gadget *gadget) * Called in a context that permits sleeping. * @suspend: Invoked on USB suspend. May be called in_interrupt. * @resume: Invoked on USB resume. May be called in_interrupt. + * @reset: Invoked on USB bus reset. It is mandatory for all gadget drivers + * and should be called in_interrupt. * @driver: Driver model state for this driver. * * Devices are disabled till a gadget driver successfully bind()s, which @@ -874,6 +876,7 @@ struct usb_gadget_driver { void (*disconnect)(struct usb_gadget *); void (*suspend)(struct usb_gadget *); void (*resume)(struct usb_gadget *); + void (*reset)(struct usb_gadget *); /* FIXME support safe rmmod */ struct device_driver driver; -- cgit v1.2.3 From 02ab695bb37ee9ad515df0d0790d5977505dd04a Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 4 Sep 2014 22:17:17 -0700 Subject: net: filter: add "load 64-bit immediate" eBPF instruction add BPF_LD_IMM64 instruction to load 64-bit immediate value into a register. All previous instructions were 8-byte. This is first 16-byte instruction. Two consecutive 'struct bpf_insn' blocks are interpreted as single instruction: insn[0].code = BPF_LD | BPF_DW | BPF_IMM insn[0].dst_reg = destination register insn[0].imm = lower 32-bit insn[1].code = 0 insn[1].imm = upper 32-bit All unused fields must be zero. Classic BPF has similar instruction: BPF_LD | BPF_W | BPF_IMM which loads 32-bit immediate value into a register. x64 JITs it as single 'movabsq %rax, imm64' arm64 may JIT as sequence of four 'movk x0, #imm16, lsl #shift' insn Note that old eBPF programs are binary compatible with new interpreter. It helps eBPF programs load 64-bit constant into a register with one instruction instead of using two registers and 4 instructions: BPF_MOV32_IMM(R1, imm32) BPF_ALU64_IMM(BPF_LSH, R1, 32) BPF_MOV32_IMM(R2, imm32) BPF_ALU64_REG(BPF_OR, R1, R2) User space generated programs will use this instruction to load constants only. To tell kernel that user space needs a pointer the _pseudo_ variant of this instruction may be added later, which will use extra bits of encoding to indicate what type of pointer user space is asking kernel to provide. For example 'off' or 'src_reg' fields can be used for such purpose. src_reg = 1 could mean that user space is asking kernel to validate and load in-kernel map pointer. src_reg = 2 could mean that user space needs readonly data section pointer src_reg = 3 could mean that user space needs a pointer to per-cpu local data All such future pseudo instructions will not be carrying the actual pointer as part of the instruction, but rather will be treated as a request to kernel to provide one. The kernel will verify the request_for_a_pointer, then will drop _pseudo_ marking and will store actual internal pointer inside the instruction, so the end result is the interpreter and JITs never see pseudo BPF_LD_IMM64 insns and only operate on generic BPF_LD_IMM64 that loads 64-bit immediate into a register. User space never operates on direct pointers and verifier can easily recognize request_for_pointer vs other instructions. Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- Documentation/networking/filter.txt | 8 +++++++- arch/x86/net/bpf_jit_comp.c | 17 +++++++++++++++++ include/linux/filter.h | 18 ++++++++++++++++++ kernel/bpf/core.c | 5 +++++ lib/test_bpf.c | 21 +++++++++++++++++++++ 5 files changed, 68 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt index c48a9704bda8..81916ab5d96f 100644 --- a/Documentation/networking/filter.txt +++ b/Documentation/networking/filter.txt @@ -951,7 +951,7 @@ Size modifier is one of ... Mode modifier is one of: - BPF_IMM 0x00 /* classic BPF only, reserved in eBPF */ + BPF_IMM 0x00 /* used for 32-bit mov in classic BPF and 64-bit in eBPF */ BPF_ABS 0x20 BPF_IND 0x40 BPF_MEM 0x60 @@ -995,6 +995,12 @@ BPF_XADD | BPF_DW | BPF_STX: lock xadd *(u64 *)(dst_reg + off16) += src_reg Where size is one of: BPF_B or BPF_H or BPF_W or BPF_DW. Note that 1 and 2 byte atomic increments are not supported. +eBPF has one 16-byte instruction: BPF_LD | BPF_DW | BPF_IMM which consists +of two consecutive 'struct bpf_insn' 8-byte blocks and interpreted as single +instruction that loads 64-bit immediate value into a dst_reg. +Classic BPF has similar instruction: BPF_LD | BPF_W | BPF_IMM which loads +32-bit immediate value into a register. + Testing ------- diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 39ccfbb4a723..06f8c17f5484 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -393,6 +393,23 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, EMIT1_off32(add_1reg(0xB8, dst_reg), imm32); break; + case BPF_LD | BPF_IMM | BPF_DW: + if (insn[1].code != 0 || insn[1].src_reg != 0 || + insn[1].dst_reg != 0 || insn[1].off != 0) { + /* verifier must catch invalid insns */ + pr_err("invalid BPF_LD_IMM64 insn\n"); + return -EINVAL; + } + + /* movabsq %rax, imm64 */ + EMIT2(add_1mod(0x48, dst_reg), add_1reg(0xB8, dst_reg)); + EMIT(insn[0].imm, 4); + EMIT(insn[1].imm, 4); + + insn++; + i++; + break; + /* dst %= src, dst /= src, dst %= imm32, dst /= imm32 */ case BPF_ALU | BPF_MOD | BPF_X: case BPF_ALU | BPF_DIV | BPF_X: diff --git a/include/linux/filter.h b/include/linux/filter.h index c78994593355..bf323da77950 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -166,6 +166,24 @@ enum { .off = 0, \ .imm = IMM }) +/* BPF_LD_IMM64 macro encodes single 'load 64-bit immediate' insn */ +#define BPF_LD_IMM64(DST, IMM) \ + BPF_LD_IMM64_RAW(DST, 0, IMM) + +#define BPF_LD_IMM64_RAW(DST, SRC, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_LD | BPF_DW | BPF_IMM, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = 0, \ + .imm = (__u32) (IMM) }), \ + ((struct bpf_insn) { \ + .code = 0, /* zero is reserved opcode */ \ + .dst_reg = 0, \ + .src_reg = 0, \ + .off = 0, \ + .imm = ((__u64) (IMM)) >> 32 }) + /* Short form of mov based on type, BPF_X: dst_reg = src_reg, BPF_K: dst_reg = imm32 */ #define BPF_MOV64_RAW(TYPE, DST, SRC, IMM) \ diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index b54bb2c2e494..2c2bfaacce66 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -242,6 +242,7 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn) [BPF_LD | BPF_IND | BPF_W] = &&LD_IND_W, [BPF_LD | BPF_IND | BPF_H] = &&LD_IND_H, [BPF_LD | BPF_IND | BPF_B] = &&LD_IND_B, + [BPF_LD | BPF_IMM | BPF_DW] = &&LD_IMM_DW, }; void *ptr; int off; @@ -301,6 +302,10 @@ select_insn: ALU64_MOV_K: DST = IMM; CONT; + LD_IMM_DW: + DST = (u64) (u32) insn[0].imm | ((u64) (u32) insn[1].imm) << 32; + insn++; + CONT; ALU64_ARSH_X: (*(s64 *) &DST) >>= SRC; CONT; diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 9a67456ba29a..413890815d3e 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -1735,6 +1735,27 @@ static struct bpf_test tests[] = { { }, { { 1, 0 } }, }, + { + "load 64-bit immediate", + .u.insns_int = { + BPF_LD_IMM64(R1, 0x567800001234L), + BPF_MOV64_REG(R2, R1), + BPF_MOV64_REG(R3, R2), + BPF_ALU64_IMM(BPF_RSH, R2, 32), + BPF_ALU64_IMM(BPF_LSH, R3, 32), + BPF_ALU64_IMM(BPF_RSH, R3, 32), + BPF_ALU64_IMM(BPF_MOV, R0, 0), + BPF_JMP_IMM(BPF_JEQ, R2, 0x5678, 1), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JEQ, R3, 0x1234, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } } + }, }; static struct net_device dev; -- cgit v1.2.3 From daedfb22451dd02b35c0549566cbb7cc06bdd53b Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 4 Sep 2014 22:17:18 -0700 Subject: net: filter: split filter.h and expose eBPF to user space allow user space to generate eBPF programs uapi/linux/bpf.h: eBPF instruction set definition linux/filter.h: the rest This patch only moves macro definitions, but practically it freezes existing eBPF instruction set, though new instructions can still be added in the future. These eBPF definitions cannot go into uapi/linux/filter.h, since the names may conflict with existing applications. Full eBPF ISA description is in Documentation/networking/filter.txt Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/filter.h | 56 +--------------------------------------- include/uapi/linux/Kbuild | 1 + include/uapi/linux/bpf.h | 65 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 67 insertions(+), 55 deletions(-) create mode 100644 include/uapi/linux/bpf.h (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index bf323da77950..8f82ef3f1cdd 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -10,58 +10,12 @@ #include #include #include +#include struct sk_buff; struct sock; struct seccomp_data; -/* Internally used and optimized filter representation with extended - * instruction set based on top of classic BPF. - */ - -/* instruction classes */ -#define BPF_ALU64 0x07 /* alu mode in double word width */ - -/* ld/ldx fields */ -#define BPF_DW 0x18 /* double word */ -#define BPF_XADD 0xc0 /* exclusive add */ - -/* alu/jmp fields */ -#define BPF_MOV 0xb0 /* mov reg to reg */ -#define BPF_ARSH 0xc0 /* sign extending arithmetic shift right */ - -/* change endianness of a register */ -#define BPF_END 0xd0 /* flags for endianness conversion: */ -#define BPF_TO_LE 0x00 /* convert to little-endian */ -#define BPF_TO_BE 0x08 /* convert to big-endian */ -#define BPF_FROM_LE BPF_TO_LE -#define BPF_FROM_BE BPF_TO_BE - -#define BPF_JNE 0x50 /* jump != */ -#define BPF_JSGT 0x60 /* SGT is signed '>', GT in x86 */ -#define BPF_JSGE 0x70 /* SGE is signed '>=', GE in x86 */ -#define BPF_CALL 0x80 /* function call */ -#define BPF_EXIT 0x90 /* function return */ - -/* Register numbers */ -enum { - BPF_REG_0 = 0, - BPF_REG_1, - BPF_REG_2, - BPF_REG_3, - BPF_REG_4, - BPF_REG_5, - BPF_REG_6, - BPF_REG_7, - BPF_REG_8, - BPF_REG_9, - BPF_REG_10, - __MAX_BPF_REG, -}; - -/* BPF has 10 general purpose 64-bit registers and stack frame. */ -#define MAX_BPF_REG __MAX_BPF_REG - /* ArgX, context and stack frame pointer register positions. Note, * Arg1, Arg2, Arg3, etc are used as argument mappings of function * calls in BPF_CALL instruction. @@ -322,14 +276,6 @@ enum { #define SK_RUN_FILTER(filter, ctx) \ (*filter->prog->bpf_func)(ctx, filter->prog->insnsi) -struct bpf_insn { - __u8 code; /* opcode */ - __u8 dst_reg:4; /* dest register */ - __u8 src_reg:4; /* source register */ - __s16 off; /* signed offset */ - __s32 imm; /* signed immediate constant */ -}; - #ifdef CONFIG_COMPAT /* A struct sock_filter is architecture independent. */ struct compat_sock_fprog { diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index 24e9033f8b3f..fb3f7b675229 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -67,6 +67,7 @@ header-y += bfs_fs.h header-y += binfmts.h header-y += blkpg.h header-y += blktrace_api.h +header-y += bpf.h header-y += bpqether.h header-y += bsg.h header-y += btrfs.h diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h new file mode 100644 index 000000000000..479ed0b6be16 --- /dev/null +++ b/include/uapi/linux/bpf.h @@ -0,0 +1,65 @@ +/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#ifndef _UAPI__LINUX_BPF_H__ +#define _UAPI__LINUX_BPF_H__ + +#include + +/* Extended instruction set based on top of classic BPF */ + +/* instruction classes */ +#define BPF_ALU64 0x07 /* alu mode in double word width */ + +/* ld/ldx fields */ +#define BPF_DW 0x18 /* double word */ +#define BPF_XADD 0xc0 /* exclusive add */ + +/* alu/jmp fields */ +#define BPF_MOV 0xb0 /* mov reg to reg */ +#define BPF_ARSH 0xc0 /* sign extending arithmetic shift right */ + +/* change endianness of a register */ +#define BPF_END 0xd0 /* flags for endianness conversion: */ +#define BPF_TO_LE 0x00 /* convert to little-endian */ +#define BPF_TO_BE 0x08 /* convert to big-endian */ +#define BPF_FROM_LE BPF_TO_LE +#define BPF_FROM_BE BPF_TO_BE + +#define BPF_JNE 0x50 /* jump != */ +#define BPF_JSGT 0x60 /* SGT is signed '>', GT in x86 */ +#define BPF_JSGE 0x70 /* SGE is signed '>=', GE in x86 */ +#define BPF_CALL 0x80 /* function call */ +#define BPF_EXIT 0x90 /* function return */ + +/* Register numbers */ +enum { + BPF_REG_0 = 0, + BPF_REG_1, + BPF_REG_2, + BPF_REG_3, + BPF_REG_4, + BPF_REG_5, + BPF_REG_6, + BPF_REG_7, + BPF_REG_8, + BPF_REG_9, + BPF_REG_10, + __MAX_BPF_REG, +}; + +/* BPF has 10 general purpose 64-bit registers and stack frame. */ +#define MAX_BPF_REG __MAX_BPF_REG + +struct bpf_insn { + __u8 code; /* opcode */ + __u8 dst_reg:4; /* dest register */ + __u8 src_reg:4; /* source register */ + __s16 off; /* signed offset */ + __s32 imm; /* signed immediate constant */ +}; + +#endif /* _UAPI__LINUX_BPF_H__ */ -- cgit v1.2.3 From dc8ecdd3a3fccf73fcb07711cde064ce5727f9d1 Mon Sep 17 00:00:00 2001 From: Rafał Miłecki Date: Mon, 1 Sep 2014 23:11:06 +0200 Subject: bcma: move bus struct setup into early part of host specific code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change is important for SoC host. In future we will want to know chip ID (needed for early MIPS boot) before doing cores scanning. Signed-off-by: Rafał Miłecki Acked-by: Hauke Mehrtens Signed-off-by: John W. Linville --- drivers/bcma/host_pci.c | 3 +++ drivers/bcma/host_soc.c | 3 +++ drivers/bcma/main.c | 2 -- drivers/bcma/scan.c | 7 ------- include/linux/bcma/bcma.h | 1 - 5 files changed, 6 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/bcma/host_pci.c b/drivers/bcma/host_pci.c index f032ed6dd459..1e5ac0a79696 100644 --- a/drivers/bcma/host_pci.c +++ b/drivers/bcma/host_pci.c @@ -208,6 +208,9 @@ static int bcma_host_pci_probe(struct pci_dev *dev, bus->boardinfo.vendor = bus->host_pci->subsystem_vendor; bus->boardinfo.type = bus->host_pci->subsystem_device; + /* Initialize struct, detect chip */ + bcma_init_bus(bus); + /* Register */ err = bcma_bus_register(bus); if (err) diff --git a/drivers/bcma/host_soc.c b/drivers/bcma/host_soc.c index 1edd7e064621..379e0d4ebe72 100644 --- a/drivers/bcma/host_soc.c +++ b/drivers/bcma/host_soc.c @@ -178,6 +178,9 @@ int __init bcma_host_soc_register(struct bcma_soc *soc) bus->hosttype = BCMA_HOSTTYPE_SOC; bus->ops = &bcma_host_soc_ops; + /* Initialize struct, detect chip */ + bcma_init_bus(bus); + /* Register */ err = bcma_bus_early_register(bus, &soc->core_cc, &soc->core_mips); if (err) diff --git a/drivers/bcma/main.c b/drivers/bcma/main.c index 0ff8d58831ef..9f6b0cb9a12c 100644 --- a/drivers/bcma/main.c +++ b/drivers/bcma/main.c @@ -334,8 +334,6 @@ int __init bcma_bus_early_register(struct bcma_bus *bus, struct bcma_device *core; struct bcma_device_id match; - bcma_init_bus(bus); - match.manuf = BCMA_MANUF_BCM; match.id = bcma_cc_core_id(bus); match.class = BCMA_CL_SIM; diff --git a/drivers/bcma/scan.c b/drivers/bcma/scan.c index e9bd77249a4c..e2b990303042 100644 --- a/drivers/bcma/scan.c +++ b/drivers/bcma/scan.c @@ -438,9 +438,6 @@ void bcma_init_bus(struct bcma_bus *bus) s32 tmp; struct bcma_chipinfo *chipinfo = &(bus->chipinfo); - if (bus->init_done) - return; - INIT_LIST_HEAD(&bus->cores); bus->nr_cores = 0; @@ -452,8 +449,6 @@ void bcma_init_bus(struct bcma_bus *bus) chipinfo->pkg = (tmp & BCMA_CC_ID_PKG) >> BCMA_CC_ID_PKG_SHIFT; bcma_info(bus, "Found chip with id 0x%04X, rev 0x%02X and package 0x%02X\n", chipinfo->id, chipinfo->rev, chipinfo->pkg); - - bus->init_done = true; } int bcma_bus_scan(struct bcma_bus *bus) @@ -463,8 +458,6 @@ int bcma_bus_scan(struct bcma_bus *bus) int err, core_num = 0; - bcma_init_bus(bus); - erombase = bcma_scan_read32(bus, 0, BCMA_CC_EROM); if (bus->hosttype == BCMA_HOSTTYPE_SOC) { eromptr = ioremap_nocache(erombase, BCMA_CORE_SIZE); diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h index 0272e49135d0..c1ba87d1548e 100644 --- a/include/linux/bcma/bcma.h +++ b/include/linux/bcma/bcma.h @@ -332,7 +332,6 @@ struct bcma_bus { struct bcma_device *mapped_core; struct list_head cores; u8 nr_cores; - u8 init_done:1; u8 num; struct bcma_drv_cc drv_cc; -- cgit v1.2.3 From a395135ddebb0a06052b84c309eb6cb68b79c797 Mon Sep 17 00:00:00 2001 From: Rafał Miłecki Date: Mon, 1 Sep 2014 23:11:07 +0200 Subject: bcma: use separated function to initialize bus on SoC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is required to split SoC bus init into two phases. The later one (which includes scanning) should be called when kalloc is available. Cc: Ralf Baechle Signed-off-by: Rafał Miłecki Acked-by: Hauke Mehrtens Signed-off-by: John W. Linville --- arch/mips/bcm47xx/setup.c | 4 ++++ drivers/bcma/host_soc.c | 11 +++++++++-- include/linux/bcma/bcma_soc.h | 1 + 3 files changed, 14 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/arch/mips/bcm47xx/setup.c b/arch/mips/bcm47xx/setup.c index 2b63e7e7d3d3..fff6ed4978c7 100644 --- a/arch/mips/bcm47xx/setup.c +++ b/arch/mips/bcm47xx/setup.c @@ -201,6 +201,10 @@ static void __init bcm47xx_register_bcma(void) pr_warn("bcm47xx: someone else already registered a bcma SPROM callback handler (err %d)\n", err); err = bcma_host_soc_register(&bcm47xx_bus.bcma); + if (err) + panic("Failed to register BCMA bus (err %d)", err); + + err = bcma_host_soc_init(&bcm47xx_bus.bcma); if (err) panic("Failed to initialize BCMA bus (err %d)", err); diff --git a/drivers/bcma/host_soc.c b/drivers/bcma/host_soc.c index 379e0d4ebe72..718e054dd727 100644 --- a/drivers/bcma/host_soc.c +++ b/drivers/bcma/host_soc.c @@ -165,7 +165,6 @@ static const struct bcma_host_ops bcma_host_soc_ops = { int __init bcma_host_soc_register(struct bcma_soc *soc) { struct bcma_bus *bus = &soc->bus; - int err; /* iomap only first core. We have to read some register on this core * to scan the bus. @@ -181,7 +180,15 @@ int __init bcma_host_soc_register(struct bcma_soc *soc) /* Initialize struct, detect chip */ bcma_init_bus(bus); - /* Register */ + return 0; +} + +int __init bcma_host_soc_init(struct bcma_soc *soc) +{ + struct bcma_bus *bus = &soc->bus; + int err; + + /* Scan bus and initialize it */ err = bcma_bus_early_register(bus, &soc->core_cc, &soc->core_mips); if (err) iounmap(bus->mmio); diff --git a/include/linux/bcma/bcma_soc.h b/include/linux/bcma/bcma_soc.h index 4203c5593b9f..f24d245f8394 100644 --- a/include/linux/bcma/bcma_soc.h +++ b/include/linux/bcma/bcma_soc.h @@ -10,6 +10,7 @@ struct bcma_soc { }; int __init bcma_host_soc_register(struct bcma_soc *soc); +int __init bcma_host_soc_init(struct bcma_soc *soc); int bcma_bus_register(struct bcma_bus *bus); -- cgit v1.2.3 From 23a2f39c8f4035eade7f226eb7ada30c78d9eee3 Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Mon, 8 Sep 2014 22:53:35 +0200 Subject: bcma: store more alternative addresses Each core could have more than one alternative address. There are cores with 8 alternative addresses for different functions. The PHY control in the Chip common B core is done through the 2. alternative address and not the first one. Signed-off-by: Hauke Mehrtens CC: linux-usb@vger.kernel.org Signed-off-by: John W. Linville --- drivers/bcma/scan.c | 9 +++++---- drivers/usb/host/bcma-hcd.c | 2 +- include/linux/bcma/bcma.h | 2 +- 3 files changed, 7 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/bcma/scan.c b/drivers/bcma/scan.c index e2b990303042..06be7282cbc4 100644 --- a/drivers/bcma/scan.c +++ b/drivers/bcma/scan.c @@ -276,7 +276,7 @@ static int bcma_get_next_core(struct bcma_bus *bus, u32 __iomem **eromptr, struct bcma_device *core) { u32 tmp; - u8 i, j; + u8 i, j, k; s32 cia, cib; u8 ports[2], wrappers[2]; @@ -367,6 +367,7 @@ static int bcma_get_next_core(struct bcma_bus *bus, u32 __iomem **eromptr, core->addr = tmp; /* get & parse slave ports */ + k = 0; for (i = 0; i < ports[1]; i++) { for (j = 0; ; j++) { tmp = bcma_erom_get_addr_desc(bus, eromptr, @@ -376,9 +377,9 @@ static int bcma_get_next_core(struct bcma_bus *bus, u32 __iomem **eromptr, /* pr_debug("erom: slave port %d " * "has %d descriptors\n", i, j); */ break; - } else { - if (i == 0 && j == 0) - core->addr1 = tmp; + } else if (k < ARRAY_SIZE(core->addr_s)) { + core->addr_s[k] = tmp; + k++; } } } diff --git a/drivers/usb/host/bcma-hcd.c b/drivers/usb/host/bcma-hcd.c index 205f4a336583..cd6d0afb6b8f 100644 --- a/drivers/usb/host/bcma-hcd.c +++ b/drivers/usb/host/bcma-hcd.c @@ -237,7 +237,7 @@ static int bcma_hcd_probe(struct bcma_device *dev) bcma_hcd_init_chip(dev); /* In AI chips EHCI is addrspace 0, OHCI is 1 */ - ohci_addr = dev->addr1; + ohci_addr = dev->addr_s[0]; if ((chipinfo->id == 0x5357 || chipinfo->id == 0x4749) && chipinfo->rev == 0) ohci_addr = 0x18009000; diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h index c1ba87d1548e..7fc16c991291 100644 --- a/include/linux/bcma/bcma.h +++ b/include/linux/bcma/bcma.h @@ -267,7 +267,7 @@ struct bcma_device { u8 core_unit; u32 addr; - u32 addr1; + u32 addr_s[8]; u32 wrap; void __iomem *io_addr; -- cgit v1.2.3 From 1716bcf3f76fe71e98d4851a3eb73ea3d93d4773 Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Mon, 8 Sep 2014 22:53:36 +0200 Subject: bcma: add support for chipcommon B core This core is used on BCM4708 to configure the PCIe and USB3 PHYs and it contains the addresses to the Device Management unit. This will be used by the PCIe driver first. Signed-off-by: Hauke Mehrtens Signed-off-by: John W. Linville --- drivers/bcma/Makefile | 1 + drivers/bcma/bcma_private.h | 4 ++ drivers/bcma/driver_chipcommon_b.c | 61 +++++++++++++++++++++++++++++ drivers/bcma/main.c | 10 +++++ drivers/bcma/scan.c | 1 + include/linux/bcma/bcma.h | 1 + include/linux/bcma/bcma_driver_chipcommon.h | 8 ++++ 7 files changed, 86 insertions(+) create mode 100644 drivers/bcma/driver_chipcommon_b.c (limited to 'include/linux') diff --git a/drivers/bcma/Makefile b/drivers/bcma/Makefile index 91290f7f61b8..838b4b9d352f 100644 --- a/drivers/bcma/Makefile +++ b/drivers/bcma/Makefile @@ -1,5 +1,6 @@ bcma-y += main.o scan.o core.o sprom.o bcma-y += driver_chipcommon.o driver_chipcommon_pmu.o +bcma-y += driver_chipcommon_b.o bcma-$(CONFIG_BCMA_SFLASH) += driver_chipcommon_sflash.o bcma-$(CONFIG_BCMA_NFLASH) += driver_chipcommon_nflash.o bcma-y += driver_pci.o diff --git a/drivers/bcma/bcma_private.h b/drivers/bcma/bcma_private.h index 09b632ad0fe2..b40be43c6f31 100644 --- a/drivers/bcma/bcma_private.h +++ b/drivers/bcma/bcma_private.h @@ -50,6 +50,10 @@ void bcma_chipco_serial_init(struct bcma_drv_cc *cc); extern struct platform_device bcma_pflash_dev; #endif /* CONFIG_BCMA_DRIVER_MIPS */ +/* driver_chipcommon_b.c */ +int bcma_core_chipcommon_b_init(struct bcma_drv_cc_b *ccb); +void bcma_core_chipcommon_b_free(struct bcma_drv_cc_b *ccb); + /* driver_chipcommon_pmu.c */ u32 bcma_pmu_get_alp_clock(struct bcma_drv_cc *cc); u32 bcma_pmu_get_cpu_clock(struct bcma_drv_cc *cc); diff --git a/drivers/bcma/driver_chipcommon_b.c b/drivers/bcma/driver_chipcommon_b.c new file mode 100644 index 000000000000..c20b5f4ff290 --- /dev/null +++ b/drivers/bcma/driver_chipcommon_b.c @@ -0,0 +1,61 @@ +/* + * Broadcom specific AMBA + * ChipCommon B Unit driver + * + * Copyright 2014, Hauke Mehrtens + * + * Licensed under the GNU/GPL. See COPYING for details. + */ + +#include "bcma_private.h" +#include +#include + +static bool bcma_wait_reg(struct bcma_bus *bus, void __iomem *addr, u32 mask, + u32 value, int timeout) +{ + unsigned long deadline = jiffies + timeout; + u32 val; + + do { + val = readl(addr); + if ((val & mask) == value) + return true; + cpu_relax(); + udelay(10); + } while (!time_after_eq(jiffies, deadline)); + + bcma_err(bus, "Timeout waiting for register %p\n", addr); + + return false; +} + +void bcma_chipco_b_mii_write(struct bcma_drv_cc_b *ccb, u32 offset, u32 value) +{ + struct bcma_bus *bus = ccb->core->bus; + + writel(offset, ccb->mii + 0x00); + bcma_wait_reg(bus, ccb->mii + 0x00, 0x0100, 0x0000, 100); + writel(value, ccb->mii + 0x04); + bcma_wait_reg(bus, ccb->mii + 0x00, 0x0100, 0x0000, 100); +} +EXPORT_SYMBOL_GPL(bcma_chipco_b_mii_write); + +int bcma_core_chipcommon_b_init(struct bcma_drv_cc_b *ccb) +{ + if (ccb->setup_done) + return 0; + + ccb->setup_done = 1; + ccb->mii = ioremap_nocache(ccb->core->addr_s[1], BCMA_CORE_SIZE); + if (!ccb->mii) + return -ENOMEM; + + return 0; +} + +void bcma_core_chipcommon_b_free(struct bcma_drv_cc_b *ccb) +{ + if (ccb->mii) + iounmap(ccb->mii); +} diff --git a/drivers/bcma/main.c b/drivers/bcma/main.c index 297a2d46985a..c421403cab43 100644 --- a/drivers/bcma/main.c +++ b/drivers/bcma/main.c @@ -173,6 +173,7 @@ static int bcma_register_devices(struct bcma_bus *bus) switch (core->id.id) { case BCMA_CORE_4706_CHIPCOMMON: case BCMA_CORE_CHIPCOMMON: + case BCMA_CORE_NS_CHIPCOMMON_B: case BCMA_CORE_PCI: case BCMA_CORE_PCIE: case BCMA_CORE_PCIE2: @@ -287,6 +288,13 @@ int bcma_bus_register(struct bcma_bus *bus) bcma_core_chipcommon_init(&bus->drv_cc); } + /* Init CC core */ + core = bcma_find_core(bus, BCMA_CORE_NS_CHIPCOMMON_B); + if (core) { + bus->drv_cc_b.core = core; + bcma_core_chipcommon_b_init(&bus->drv_cc_b); + } + /* Init MIPS core */ core = bcma_find_core(bus, BCMA_CORE_MIPS_74K); if (core) { @@ -341,6 +349,8 @@ void bcma_bus_unregister(struct bcma_bus *bus) else if (err) bcma_err(bus, "Can not unregister GPIO driver: %i\n", err); + bcma_core_chipcommon_b_free(&bus->drv_cc_b); + cores[0] = bcma_find_core(bus, BCMA_CORE_MIPS_74K); cores[1] = bcma_find_core(bus, BCMA_CORE_PCIE); cores[2] = bcma_find_core(bus, BCMA_CORE_4706_MAC_GBIT_COMMON); diff --git a/drivers/bcma/scan.c b/drivers/bcma/scan.c index 06be7282cbc4..b3a403c136fb 100644 --- a/drivers/bcma/scan.c +++ b/drivers/bcma/scan.c @@ -314,6 +314,7 @@ static int bcma_get_next_core(struct bcma_bus *bus, u32 __iomem **eromptr, /* Some specific cores don't need wrappers */ switch (core->id.id) { case BCMA_CORE_4706_MAC_GBIT_COMMON: + case BCMA_CORE_NS_CHIPCOMMON_B: /* Not used yet: case BCMA_CORE_OOB_ROUTER: */ break; default: diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h index 7fc16c991291..634597917670 100644 --- a/include/linux/bcma/bcma.h +++ b/include/linux/bcma/bcma.h @@ -335,6 +335,7 @@ struct bcma_bus { u8 num; struct bcma_drv_cc drv_cc; + struct bcma_drv_cc_b drv_cc_b; struct bcma_drv_pci drv_pci[2]; struct bcma_drv_pcie2 drv_pcie2; struct bcma_drv_mips drv_mips; diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h index 63d105cd14a3..db6fa217f98b 100644 --- a/include/linux/bcma/bcma_driver_chipcommon.h +++ b/include/linux/bcma/bcma_driver_chipcommon.h @@ -644,6 +644,12 @@ struct bcma_drv_cc { #endif }; +struct bcma_drv_cc_b { + struct bcma_device *core; + u8 setup_done:1; + void __iomem *mii; +}; + /* Register access */ #define bcma_cc_read32(cc, offset) \ bcma_read32((cc)->core, offset) @@ -699,4 +705,6 @@ extern void bcma_pmu_spuravoid_pllupdate(struct bcma_drv_cc *cc, int spuravoid); extern u32 bcma_pmu_get_bus_clock(struct bcma_drv_cc *cc); +void bcma_chipco_b_mii_write(struct bcma_drv_cc_b *ccb, u32 offset, u32 value); + #endif /* LINUX_BCMA_DRIVER_CC_H_ */ -- cgit v1.2.3 From d0449b90f80f263e17e8b3ce31442e45121dc46c Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Fri, 22 Aug 2014 10:18:42 -0400 Subject: locks: Remove unused conf argument from lm_grant This argument is always NULL so don't pass it around. [jlayton: remove dependencies on previous patches in series] Signed-off-by: Joe Perches Signed-off-by: Jeff Layton --- fs/dlm/plock.c | 8 ++++---- fs/lockd/svclock.c | 12 +++--------- include/linux/fs.h | 2 +- 3 files changed, 8 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c index f704458ea5f5..e0ab3a93eeff 100644 --- a/fs/dlm/plock.c +++ b/fs/dlm/plock.c @@ -30,7 +30,7 @@ struct plock_op { struct plock_xop { struct plock_op xop; - void *callback; + int (*callback)(struct file_lock *fl, int result); void *fl; void *file; struct file_lock flc; @@ -190,7 +190,7 @@ static int dlm_plock_callback(struct plock_op *op) struct file *file; struct file_lock *fl; struct file_lock *flc; - int (*notify)(void *, void *, int) = NULL; + int (*notify)(struct file_lock *fl, int result) = NULL; struct plock_xop *xop = (struct plock_xop *)op; int rv = 0; @@ -209,7 +209,7 @@ static int dlm_plock_callback(struct plock_op *op) notify = xop->callback; if (op->info.rv) { - notify(fl, NULL, op->info.rv); + notify(fl, op->info.rv); goto out; } @@ -228,7 +228,7 @@ static int dlm_plock_callback(struct plock_op *op) (unsigned long long)op->info.number, file, fl); } - rv = notify(fl, NULL, 0); + rv = notify(fl, 0); if (rv) { /* XXX: We need to cancel the fs lock here: */ log_print("dlm_plock_callback: lock granted after lock request " diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index ab798a88ec1d..2a6170133c1d 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -667,22 +667,16 @@ nlmsvc_cancel_blocked(struct net *net, struct nlm_file *file, struct nlm_lock *l * deferred rpc for GETLK and SETLK. */ static void -nlmsvc_update_deferred_block(struct nlm_block *block, struct file_lock *conf, - int result) +nlmsvc_update_deferred_block(struct nlm_block *block, int result) { block->b_flags |= B_GOT_CALLBACK; if (result == 0) block->b_granted = 1; else block->b_flags |= B_TIMED_OUT; - if (conf) { - if (block->b_fl) - __locks_copy_lock(block->b_fl, conf); - } } -static int nlmsvc_grant_deferred(struct file_lock *fl, struct file_lock *conf, - int result) +static int nlmsvc_grant_deferred(struct file_lock *fl, int result) { struct nlm_block *block; int rc = -ENOENT; @@ -697,7 +691,7 @@ static int nlmsvc_grant_deferred(struct file_lock *fl, struct file_lock *conf, rc = -ENOLCK; break; } - nlmsvc_update_deferred_block(block, conf, result); + nlmsvc_update_deferred_block(block, result); } else if (result == 0) block->b_granted = 1; diff --git a/include/linux/fs.h b/include/linux/fs.h index 94187721ad41..908af4f81680 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -869,7 +869,7 @@ struct lock_manager_operations { int (*lm_compare_owner)(struct file_lock *, struct file_lock *); unsigned long (*lm_owner_key)(struct file_lock *); void (*lm_notify)(struct file_lock *); /* unblock callback */ - int (*lm_grant)(struct file_lock *, struct file_lock *, int); + int (*lm_grant)(struct file_lock *, int); void (*lm_break)(struct file_lock *); int (*lm_change)(struct file_lock **, int); }; -- cgit v1.2.3 From 3fe0fff18fe87c6a2179837de68d1174903c6367 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Fri, 22 Aug 2014 10:18:42 -0400 Subject: locks: Rename __locks_copy_lock() to locks_copy_conflock() Jeff advice, " Right now __locks_copy_lock is only used to copy conflocks. It would be good to rename that to something more distinct (i.e.locks_copy_conflock), to make it clear that we're generating a conflock there." v5: change order from 3/6 to 2/6 v4: new patch only renaming function name Signed-off-by: Kinglong Mee Signed-off-by: Jeff Layton --- fs/locks.c | 10 +++++----- include/linux/fs.h | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/fs/locks.c b/fs/locks.c index bb08857f90b5..ec9becd02d3d 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -281,7 +281,7 @@ static void locks_copy_private(struct file_lock *new, struct file_lock *fl) /* * Initialize a new lock from an existing file_lock structure. */ -void __locks_copy_lock(struct file_lock *new, const struct file_lock *fl) +void locks_copy_conflock(struct file_lock *new, struct file_lock *fl) { new->fl_owner = fl->fl_owner; new->fl_pid = fl->fl_pid; @@ -293,14 +293,14 @@ void __locks_copy_lock(struct file_lock *new, const struct file_lock *fl) new->fl_ops = NULL; new->fl_lmops = NULL; } -EXPORT_SYMBOL(__locks_copy_lock); +EXPORT_SYMBOL(locks_copy_conflock); void locks_copy_lock(struct file_lock *new, struct file_lock *fl) { /* "new" must be a freshly-initialized lock */ WARN_ON_ONCE(new->fl_ops); - __locks_copy_lock(new, fl); + locks_copy_conflock(new, fl); new->fl_file = fl->fl_file; new->fl_ops = fl->fl_ops; new->fl_lmops = fl->fl_lmops; @@ -735,7 +735,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl) break; } if (cfl) { - __locks_copy_lock(fl, cfl); + locks_copy_conflock(fl, cfl); if (cfl->fl_nspid) fl->fl_pid = pid_vnr(cfl->fl_nspid); } else @@ -941,7 +941,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str if (!posix_locks_conflict(request, fl)) continue; if (conflock) - __locks_copy_lock(conflock, fl); + locks_copy_conflock(conflock, fl); error = -EAGAIN; if (!(request->fl_flags & FL_SLEEP)) goto out; diff --git a/include/linux/fs.h b/include/linux/fs.h index 908af4f81680..5ab86f44b697 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -966,7 +966,7 @@ void locks_free_lock(struct file_lock *fl); extern void locks_init_lock(struct file_lock *); extern struct file_lock * locks_alloc_lock(void); extern void locks_copy_lock(struct file_lock *, struct file_lock *); -extern void __locks_copy_lock(struct file_lock *, const struct file_lock *); +extern void locks_copy_conflock(struct file_lock *, struct file_lock *); extern void locks_remove_posix(struct file *, fl_owner_t); extern void locks_remove_file(struct file *); extern void locks_release_private(struct file_lock *); @@ -1026,7 +1026,7 @@ static inline void locks_init_lock(struct file_lock *fl) return; } -static inline void __locks_copy_lock(struct file_lock *new, struct file_lock *fl) +static inline void locks_copy_conflock(struct file_lock *new, struct file_lock *fl) { return; } -- cgit v1.2.3 From 5c97d7b1479982a48cf2129062b880c2555049ac Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Fri, 22 Aug 2014 10:18:43 -0400 Subject: locks: New ops in lock_manager_operations for get/put owner NFSD or other lockmanager may increase the owner's reference, so adds two new options for copying and releasing owner. v5: change order from 2/6 to 3/6 v4: rename lm_copy_owner/lm_release_owner to lm_get_owner/lm_put_owner Reviewed-by: Jeff Layton Signed-off-by: Kinglong Mee Signed-off-by: Jeff Layton --- fs/locks.c | 12 ++++++++++-- include/linux/fs.h | 2 ++ 2 files changed, 12 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/fs/locks.c b/fs/locks.c index ec9becd02d3d..5e83f3a99377 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -230,8 +230,12 @@ void locks_release_private(struct file_lock *fl) fl->fl_ops->fl_release_private(fl); fl->fl_ops = NULL; } - fl->fl_lmops = NULL; + if (fl->fl_lmops) { + if (fl->fl_lmops->lm_put_owner) + fl->fl_lmops->lm_put_owner(fl); + fl->fl_lmops = NULL; + } } EXPORT_SYMBOL_GPL(locks_release_private); @@ -274,8 +278,12 @@ static void locks_copy_private(struct file_lock *new, struct file_lock *fl) fl->fl_ops->fl_copy_lock(new, fl); new->fl_ops = fl->fl_ops; } - if (fl->fl_lmops) + + if (fl->fl_lmops) { + if (fl->fl_lmops->lm_get_owner) + fl->fl_lmops->lm_get_owner(new, fl); new->fl_lmops = fl->fl_lmops; + } } /* diff --git a/include/linux/fs.h b/include/linux/fs.h index 5ab86f44b697..3b07ce2698de 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -868,6 +868,8 @@ struct file_lock_operations { struct lock_manager_operations { int (*lm_compare_owner)(struct file_lock *, struct file_lock *); unsigned long (*lm_owner_key)(struct file_lock *); + void (*lm_get_owner)(struct file_lock *, struct file_lock *); + void (*lm_put_owner)(struct file_lock *); void (*lm_notify)(struct file_lock *); /* unblock callback */ int (*lm_grant)(struct file_lock *, int); void (*lm_break)(struct file_lock *); -- cgit v1.2.3 From 09802fd2a8caea2a2147fca8d7975697c5de573d Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 22 Aug 2014 10:18:44 -0400 Subject: lockd: rip out deferred lock handling from testlock codepath As Kinglong points out, the nlm_block->b_fl field is no longer used at all. Also, vfs_test_lock in the generic locking code will only return FILE_LOCK_DEFERRED if FL_SLEEP is set, and it isn't here. The only other place that returns that value is the DLM lock code, but it only does that in dlm_posix_lock, never in dlm_posix_get. Remove all of the deferred locking code from the testlock codepath since it doesn't appear to ever be used anyway. I do have a small concern that this might cause a behavior change in the case where you have a block already sitting on the list when the testlock request comes in, but that looks like it doesn't really work properly anyway. I think it's best to just pass that down to vfs_test_lock and let the filesystem report that instead of trying to infer what's going on with the lock by looking at an existing block. Cc: cluster-devel@redhat.com Signed-off-by: Jeff Layton Reviewed-by: Kinglong Mee --- fs/lockd/svclock.c | 55 +++++---------------------------------------- include/linux/lockd/lockd.h | 1 - 2 files changed, 6 insertions(+), 50 deletions(-) (limited to 'include/linux') diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index acfa94d5b489..13db95f54176 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -245,7 +245,6 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_host *host, block->b_daemon = rqstp->rq_server; block->b_host = host; block->b_file = file; - block->b_fl = NULL; file->f_count++; /* Add to file's list of blocks */ @@ -295,7 +294,6 @@ static void nlmsvc_free_block(struct kref *kref) nlmsvc_freegrantargs(block->b_call); nlmsvc_release_call(block->b_call); nlm_release_file(block->b_file); - kfree(block->b_fl); kfree(block); } @@ -508,7 +506,6 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file, struct nlm_host *host, struct nlm_lock *lock, struct nlm_lock *conflock, struct nlm_cookie *cookie) { - struct nlm_block *block = NULL; int error; __be32 ret; @@ -519,63 +516,26 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file, (long long)lock->fl.fl_start, (long long)lock->fl.fl_end); - /* Get existing block (in case client is busy-waiting) */ - block = nlmsvc_lookup_block(file, lock); - - if (block == NULL) { - struct file_lock *conf = kzalloc(sizeof(*conf), GFP_KERNEL); - - if (conf == NULL) - return nlm_granted; - block = nlmsvc_create_block(rqstp, host, file, lock, cookie); - if (block == NULL) { - kfree(conf); - return nlm_granted; - } - block->b_fl = conf; - } - if (block->b_flags & B_QUEUED) { - dprintk("lockd: nlmsvc_testlock deferred block %p flags %d fl %p\n", - block, block->b_flags, block->b_fl); - if (block->b_flags & B_TIMED_OUT) { - nlmsvc_unlink_block(block); - ret = nlm_lck_denied; - goto out; - } - if (block->b_flags & B_GOT_CALLBACK) { - nlmsvc_unlink_block(block); - if (block->b_fl != NULL - && block->b_fl->fl_type != F_UNLCK) { - lock->fl = *block->b_fl; - goto conf_lock; - } else { - ret = nlm_granted; - goto out; - } - } - ret = nlm_drop_reply; - goto out; - } - if (locks_in_grace(SVC_NET(rqstp))) { ret = nlm_lck_denied_grace_period; goto out; } + error = vfs_test_lock(file->f_file, &lock->fl); - if (error == FILE_LOCK_DEFERRED) { - ret = nlmsvc_defer_lock_rqst(rqstp, block); - goto out; - } if (error) { + /* We can't currently deal with deferred test requests */ + if (error == FILE_LOCK_DEFERRED) + WARN_ON_ONCE(1); + ret = nlm_lck_denied_nolocks; goto out; } + if (lock->fl.fl_type == F_UNLCK) { ret = nlm_granted; goto out; } -conf_lock: dprintk("lockd: conflicting lock(ty=%d, %Ld-%Ld)\n", lock->fl.fl_type, (long long)lock->fl.fl_start, (long long)lock->fl.fl_end); @@ -589,8 +549,6 @@ conf_lock: locks_release_private(&lock->fl); ret = nlm_lck_denied; out: - if (block) - nlmsvc_release_block(block); return ret; } @@ -661,7 +619,6 @@ nlmsvc_cancel_blocked(struct net *net, struct nlm_file *file, struct nlm_lock *l * This is a callback from the filesystem for VFS file lock requests. * It will be used if lm_grant is defined and the filesystem can not * respond to the request immediately. - * For GETLK request it will copy the reply to the nlm_block. * For SETLK or SETLKW request it will get the local posix lock. * In all cases it will move the block to the head of nlm_blocked q where * nlmsvc_retry_blocked() can send back a reply for SETLKW or revisit the diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 219d79627c05..ff82a32871b5 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -178,7 +178,6 @@ struct nlm_block { unsigned char b_granted; /* VFS granted lock */ struct nlm_file * b_file; /* file in question */ struct cache_req * b_cache_req; /* deferred request handling */ - struct file_lock * b_fl; /* set for GETLK */ struct cache_deferred_req * b_deferred_req; unsigned int b_flags; /* block flags */ #define B_QUEUED 1 /* lock queued */ -- cgit v1.2.3 From 699688a416524c3cea9eafaca69fc6c06c13c02e Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 22 Aug 2014 10:18:44 -0400 Subject: locks: remove lock_may_read and lock_may_write There are no callers of these functions. Signed-off-by: Jeff Layton --- fs/locks.c | 80 ------------------------------------------------------ include/linux/fs.h | 14 ---------- 2 files changed, 94 deletions(-) (limited to 'include/linux') diff --git a/fs/locks.c b/fs/locks.c index c3991dc80137..5200ffd2ba9b 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -2597,86 +2597,6 @@ static int __init proc_locks_init(void) module_init(proc_locks_init); #endif -/** - * lock_may_read - checks that the region is free of locks - * @inode: the inode that is being read - * @start: the first byte to read - * @len: the number of bytes to read - * - * Emulates Windows locking requirements. Whole-file - * mandatory locks (share modes) can prohibit a read and - * byte-range POSIX locks can prohibit a read if they overlap. - * - * N.B. this function is only ever called - * from knfsd and ownership of locks is never checked. - */ -int lock_may_read(struct inode *inode, loff_t start, unsigned long len) -{ - struct file_lock *fl; - int result = 1; - - spin_lock(&inode->i_lock); - for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { - if (IS_POSIX(fl)) { - if (fl->fl_type == F_RDLCK) - continue; - if ((fl->fl_end < start) || (fl->fl_start > (start + len))) - continue; - } else if (IS_FLOCK(fl)) { - if (!(fl->fl_type & LOCK_MAND)) - continue; - if (fl->fl_type & LOCK_READ) - continue; - } else - continue; - result = 0; - break; - } - spin_unlock(&inode->i_lock); - return result; -} - -EXPORT_SYMBOL(lock_may_read); - -/** - * lock_may_write - checks that the region is free of locks - * @inode: the inode that is being written - * @start: the first byte to write - * @len: the number of bytes to write - * - * Emulates Windows locking requirements. Whole-file - * mandatory locks (share modes) can prohibit a write and - * byte-range POSIX locks can prohibit a write if they overlap. - * - * N.B. this function is only ever called - * from knfsd and ownership of locks is never checked. - */ -int lock_may_write(struct inode *inode, loff_t start, unsigned long len) -{ - struct file_lock *fl; - int result = 1; - - spin_lock(&inode->i_lock); - for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { - if (IS_POSIX(fl)) { - if ((fl->fl_end < start) || (fl->fl_start > (start + len))) - continue; - } else if (IS_FLOCK(fl)) { - if (!(fl->fl_type & LOCK_MAND)) - continue; - if (fl->fl_type & LOCK_WRITE) - continue; - } else - continue; - result = 0; - break; - } - spin_unlock(&inode->i_lock); - return result; -} - -EXPORT_SYMBOL(lock_may_write); - static int __init filelock_init(void) { int i; diff --git a/include/linux/fs.h b/include/linux/fs.h index 3b07ce2698de..458f733c96bd 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -985,8 +985,6 @@ extern void lease_get_mtime(struct inode *, struct timespec *time); extern int generic_setlease(struct file *, long, struct file_lock **); extern int vfs_setlease(struct file *, long, struct file_lock **); extern int lease_modify(struct file_lock **, int); -extern int lock_may_read(struct inode *, loff_t start, unsigned long count); -extern int lock_may_write(struct inode *, loff_t start, unsigned long count); #else /* !CONFIG_FILE_LOCKING */ static inline int fcntl_getlk(struct file *file, unsigned int cmd, struct flock __user *user) @@ -1117,18 +1115,6 @@ static inline int lease_modify(struct file_lock **before, int arg) { return -EINVAL; } - -static inline int lock_may_read(struct inode *inode, loff_t start, - unsigned long len) -{ - return 1; -} - -static inline int lock_may_write(struct inode *inode, loff_t start, - unsigned long len) -{ - return 1; -} #endif /* !CONFIG_FILE_LOCKING */ -- cgit v1.2.3 From 1c994a0909a556508c2cc26ab5d9e13c5ce33aa0 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 27 Aug 2014 06:49:41 -0400 Subject: locks: consolidate "nolease" routines GFS2 and NFS have setlease routines that always just return -EINVAL. Turn that into a generic routine that can live in fs/libfs.c. Cc: Cc: Steven Whitehouse Cc: Signed-off-by: Jeff Layton Acked-by: Trond Myklebust Reviewed-by: Christoph Hellwig --- fs/gfs2/file.c | 22 +--------------------- fs/libfs.c | 16 ++++++++++++++++ fs/nfs/file.c | 13 +------------ fs/nfs/internal.h | 1 - fs/nfs/nfs4file.c | 2 +- include/linux/fs.h | 1 + 6 files changed, 20 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 26b3f952e6b1..2c02478a86b0 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -912,26 +912,6 @@ out_uninit: #ifdef CONFIG_GFS2_FS_LOCKING_DLM -/** - * gfs2_setlease - acquire/release a file lease - * @file: the file pointer - * @arg: lease type - * @fl: file lock - * - * We don't currently have a way to enforce a lease across the whole - * cluster; until we do, disable leases (by just returning -EINVAL), - * unless the administrator has requested purely local locking. - * - * Locking: called under i_lock - * - * Returns: errno - */ - -static int gfs2_setlease(struct file *file, long arg, struct file_lock **fl) -{ - return -EINVAL; -} - /** * gfs2_lock - acquire/release a posix lock on a file * @file: the file pointer @@ -1069,7 +1049,7 @@ const struct file_operations gfs2_file_fops = { .flock = gfs2_flock, .splice_read = generic_file_splice_read, .splice_write = iter_file_splice_write, - .setlease = gfs2_setlease, + .setlease = simple_nosetlease, .fallocate = gfs2_fallocate, }; diff --git a/fs/libfs.c b/fs/libfs.c index 88e3e00e2eca..29012a303ef8 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -1075,3 +1075,19 @@ struct inode *alloc_anon_inode(struct super_block *s) return inode; } EXPORT_SYMBOL(alloc_anon_inode); + +/** + * simple_nosetlease - generic helper for prohibiting leases + * @filp: file pointer + * @arg: type of lease to obtain + * @flp: new lease supplied for insertion + * + * Generic helper for filesystems that do not wish to allow leases to be set. + * All arguments are ignored and it just returns -EINVAL. + */ +int +simple_nosetlease(struct file *filp, long arg, struct file_lock **flp) +{ + return -EINVAL; +} +EXPORT_SYMBOL(simple_nosetlease); diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 524dd80d1898..8c4048ecdad1 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -891,17 +891,6 @@ int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) } EXPORT_SYMBOL_GPL(nfs_flock); -/* - * There is no protocol support for leases, so we have no way to implement - * them correctly in the face of opens by other clients. - */ -int nfs_setlease(struct file *file, long arg, struct file_lock **fl) -{ - dprintk("NFS: setlease(%pD2, arg=%ld)\n", file, arg); - return -EINVAL; -} -EXPORT_SYMBOL_GPL(nfs_setlease); - const struct file_operations nfs_file_operations = { .llseek = nfs_file_llseek, .read = new_sync_read, @@ -918,6 +907,6 @@ const struct file_operations nfs_file_operations = { .splice_read = nfs_file_splice_read, .splice_write = iter_file_splice_write, .check_flags = nfs_check_flags, - .setlease = nfs_setlease, + .setlease = simple_nosetlease, }; EXPORT_SYMBOL_GPL(nfs_file_operations); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 9056622d2230..94d922ebb5ac 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -346,7 +346,6 @@ int nfs_file_release(struct inode *, struct file *); int nfs_lock(struct file *, int, struct file_lock *); int nfs_flock(struct file *, int, struct file_lock *); int nfs_check_flags(int); -int nfs_setlease(struct file *, long, struct file_lock **); /* inode.c */ extern struct workqueue_struct *nfsiod_workqueue; diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index a816f0627a6c..3e987ad9ae25 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -131,5 +131,5 @@ const struct file_operations nfs4_file_operations = { .splice_read = nfs_file_splice_read, .splice_write = iter_file_splice_write, .check_flags = nfs_check_flags, - .setlease = nfs_setlease, + .setlease = simple_nosetlease, }; diff --git a/include/linux/fs.h b/include/linux/fs.h index 458f733c96bd..435e3d9ec5cf 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2599,6 +2599,7 @@ extern int simple_write_end(struct file *file, struct address_space *mapping, struct page *page, void *fsdata); extern int always_delete_dentry(const struct dentry *); extern struct inode *alloc_anon_inode(struct super_block *); +extern int simple_nosetlease(struct file *, long, struct file_lock **); extern const struct dentry_operations simple_dentry_operations; extern struct dentry *simple_lookup(struct inode *, struct dentry *, unsigned int flags); -- cgit v1.2.3 From e0b93eddfe17dcb7d644eb5d6ad02a86fc41a977 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 22 Aug 2014 11:27:32 -0400 Subject: security: make security_file_set_fowner, f_setown and __f_setown void return security_file_set_fowner always returns 0, so make it f_setown and __f_setown void return functions and fix up the error handling in the callers. Cc: linux-security-module@vger.kernel.org Signed-off-by: Jeff Layton Reviewed-by: Christoph Hellwig --- drivers/net/tun.c | 4 +--- drivers/tty/tty_io.c | 3 ++- fs/fcntl.c | 21 +++++++-------------- fs/locks.c | 2 +- fs/notify/dnotify/dnotify.c | 8 +------- include/linux/fs.h | 4 ++-- include/linux/security.h | 8 ++++---- net/socket.c | 3 ++- security/capability.c | 4 ++-- security/security.c | 4 ++-- security/selinux/hooks.c | 4 +--- security/smack/smack_lsm.c | 3 +-- 12 files changed, 26 insertions(+), 42 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/tun.c b/drivers/net/tun.c index acaaf6784179..186ce541c657 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -2152,9 +2152,7 @@ static int tun_chr_fasync(int fd, struct file *file, int on) goto out; if (on) { - ret = __f_setown(file, task_pid(current), PIDTYPE_PID, 0); - if (ret) - goto out; + __f_setown(file, task_pid(current), PIDTYPE_PID, 0); tfile->flags |= TUN_FASYNC; } else tfile->flags &= ~TUN_FASYNC; diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index 8fbad3410c75..aea3b66f7bf2 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -2163,8 +2163,9 @@ static int __tty_fasync(int fd, struct file *filp, int on) } get_pid(pid); spin_unlock_irqrestore(&tty->ctrl_lock, flags); - retval = __f_setown(filp, pid, type, 0); + __f_setown(filp, pid, type, 0); put_pid(pid); + retval = 0; } out: return retval; diff --git a/fs/fcntl.c b/fs/fcntl.c index 22d1c3df61ac..99d440a4a6ba 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -98,26 +98,19 @@ static void f_modown(struct file *filp, struct pid *pid, enum pid_type type, write_unlock_irq(&filp->f_owner.lock); } -int __f_setown(struct file *filp, struct pid *pid, enum pid_type type, +void __f_setown(struct file *filp, struct pid *pid, enum pid_type type, int force) { - int err; - - err = security_file_set_fowner(filp); - if (err) - return err; - + security_file_set_fowner(filp); f_modown(filp, pid, type, force); - return 0; } EXPORT_SYMBOL(__f_setown); -int f_setown(struct file *filp, unsigned long arg, int force) +void f_setown(struct file *filp, unsigned long arg, int force) { enum pid_type type; struct pid *pid; int who = arg; - int result; type = PIDTYPE_PID; if (who < 0) { type = PIDTYPE_PGID; @@ -125,9 +118,8 @@ int f_setown(struct file *filp, unsigned long arg, int force) } rcu_read_lock(); pid = find_vpid(who); - result = __f_setown(filp, pid, type, force); + __f_setown(filp, pid, type, force); rcu_read_unlock(); - return result; } EXPORT_SYMBOL(f_setown); @@ -181,7 +173,7 @@ static int f_setown_ex(struct file *filp, unsigned long arg) if (owner.pid && !pid) ret = -ESRCH; else - ret = __f_setown(filp, pid, type, 1); + __f_setown(filp, pid, type, 1); rcu_read_unlock(); return ret; @@ -302,7 +294,8 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, force_successful_syscall_return(); break; case F_SETOWN: - err = f_setown(filp, arg, 1); + f_setown(filp, arg, 1); + err = 0; break; case F_GETOWN_EX: err = f_getown_ex(filp, arg); diff --git a/fs/locks.c b/fs/locks.c index 5200ffd2ba9b..f5f648e003dd 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1776,7 +1776,7 @@ static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg) if (!fasync_insert_entry(fd, filp, &ret->fl_fasync, new)) new = NULL; - error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0); + __f_setown(filp, task_pid(current), PIDTYPE_PID, 0); out_unlock: spin_unlock(&inode->i_lock); if (fl) diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c index abc8cbcfe90e..caaaf9dfe353 100644 --- a/fs/notify/dnotify/dnotify.c +++ b/fs/notify/dnotify/dnotify.c @@ -346,13 +346,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) goto out; } - error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0); - if (error) { - /* if we added, we must shoot */ - if (dn_mark == new_dn_mark) - destroy = 1; - goto out; - } + __f_setown(filp, task_pid(current), PIDTYPE_PID, 0); error = attach_dn(dn, dn_mark, id, fd, filp, mask); /* !error means that we attached the dn to the dn_mark, so don't free it */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 435e3d9ec5cf..96528f73dda4 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1139,8 +1139,8 @@ extern void fasync_free(struct fasync_struct *); /* can be called from interrupts */ extern void kill_fasync(struct fasync_struct **, int, int); -extern int __f_setown(struct file *filp, struct pid *, enum pid_type, int force); -extern int f_setown(struct file *filp, unsigned long arg, int force); +extern void __f_setown(struct file *filp, struct pid *, enum pid_type, int force); +extern void f_setown(struct file *filp, unsigned long arg, int force); extern void f_delown(struct file *filp); extern pid_t f_getown(struct file *filp); extern int send_sigurg(struct fown_struct *fown); diff --git a/include/linux/security.h b/include/linux/security.h index 623f90e5f38d..b10e7af95d3b 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1559,7 +1559,7 @@ struct security_operations { int (*file_lock) (struct file *file, unsigned int cmd); int (*file_fcntl) (struct file *file, unsigned int cmd, unsigned long arg); - int (*file_set_fowner) (struct file *file); + void (*file_set_fowner) (struct file *file); int (*file_send_sigiotask) (struct task_struct *tsk, struct fown_struct *fown, int sig); int (*file_receive) (struct file *file); @@ -1834,7 +1834,7 @@ int security_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot, unsigned long prot); int security_file_lock(struct file *file, unsigned int cmd); int security_file_fcntl(struct file *file, unsigned int cmd, unsigned long arg); -int security_file_set_fowner(struct file *file); +void security_file_set_fowner(struct file *file); int security_file_send_sigiotask(struct task_struct *tsk, struct fown_struct *fown, int sig); int security_file_receive(struct file *file); @@ -2312,9 +2312,9 @@ static inline int security_file_fcntl(struct file *file, unsigned int cmd, return 0; } -static inline int security_file_set_fowner(struct file *file) +static inline void security_file_set_fowner(struct file *file) { - return 0; + return; } static inline int security_file_send_sigiotask(struct task_struct *tsk, diff --git a/net/socket.c b/net/socket.c index 95ee7d8682e7..769c9671847e 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1069,7 +1069,8 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) err = -EFAULT; if (get_user(pid, (int __user *)argp)) break; - err = f_setown(sock->file, pid, 1); + f_setown(sock->file, pid, 1); + err = 0; break; case FIOGETOWN: case SIOCGPGRP: diff --git a/security/capability.c b/security/capability.c index a74fde6a7468..d68c57a62bcf 100644 --- a/security/capability.c +++ b/security/capability.c @@ -343,9 +343,9 @@ static int cap_file_fcntl(struct file *file, unsigned int cmd, return 0; } -static int cap_file_set_fowner(struct file *file) +static void cap_file_set_fowner(struct file *file) { - return 0; + return; } static int cap_file_send_sigiotask(struct task_struct *tsk, diff --git a/security/security.c b/security/security.c index e41b1a8d7644..18b35c63fc0c 100644 --- a/security/security.c +++ b/security/security.c @@ -775,9 +775,9 @@ int security_file_fcntl(struct file *file, unsigned int cmd, unsigned long arg) return security_ops->file_fcntl(file, cmd, arg); } -int security_file_set_fowner(struct file *file) +void security_file_set_fowner(struct file *file) { - return security_ops->file_set_fowner(file); + security_ops->file_set_fowner(file); } int security_file_send_sigiotask(struct task_struct *tsk, diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index b0e940497e23..ada0d0bf3463 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3346,14 +3346,12 @@ static int selinux_file_fcntl(struct file *file, unsigned int cmd, return err; } -static int selinux_file_set_fowner(struct file *file) +static void selinux_file_set_fowner(struct file *file) { struct file_security_struct *fsec; fsec = file->f_security; fsec->fown_sid = current_sid(); - - return 0; } static int selinux_file_send_sigiotask(struct task_struct *tsk, diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index e6ab307ce86e..69e5635d89e5 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -1390,12 +1390,11 @@ static int smack_mmap_file(struct file *file, * Returns 0 * Further research may be required on this one. */ -static int smack_file_set_fowner(struct file *file) +static void smack_file_set_fowner(struct file *file) { struct smack_known *skp = smk_of_current(); file->f_security = skp->smk_known; - return 0; } /** -- cgit v1.2.3 From 2ae4c673e3cbd69bc2decf6d7f5961f3c7b9b38b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 2 Sep 2014 15:43:52 -0700 Subject: f2fs: retain inconsistency information to initiate fsck.f2fs This patch adds sbi->need_fsck to conduct fsck.f2fs later. This flag can only be removed by fsck.f2fs. Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 3 +++ fs/f2fs/f2fs.h | 1 + fs/f2fs/super.c | 1 + include/linux/f2fs_fs.h | 1 + 4 files changed, 6 insertions(+) (limited to 'include/linux') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index cb5cb4ca1814..5af97d99106e 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -882,6 +882,9 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) else clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); + if (sbi->need_fsck) + set_ckpt_flags(ckpt, CP_FSCK_FLAG); + /* update SIT/NAT bitmap */ get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP)); get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP)); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 222ff5b4d1e1..210c62df08c3 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -434,6 +434,7 @@ struct f2fs_sb_info { struct buffer_head *raw_super_buf; /* buffer head of raw sb */ struct f2fs_super_block *raw_super; /* raw super block pointer */ int s_dirty; /* dirty flag for checkpoint */ + bool need_fsck; /* need fsck.f2fs to fix */ /* for node-related operations */ struct f2fs_nm_info *nm_info; /* node manager */ diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 41bdf511003d..a6923041f41a 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -849,6 +849,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi) atomic_set(&sbi->nr_pages[i], 0); sbi->dir_level = DEF_DIR_LEVEL; + sbi->need_fsck = false; } /* diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 08ed2b0a96e6..9ca1ff3d4662 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -85,6 +85,7 @@ struct f2fs_super_block { /* * For checkpoint */ +#define CP_FSCK_FLAG 0x00000010 #define CP_ERROR_FLAG 0x00000008 #define CP_COMPACT_SUM_FLAG 0x00000004 #define CP_ORPHAN_PRESENT_FLAG 0x00000002 -- cgit v1.2.3 From 87354059881ce9315181604dc17076c535f4d744 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 22 Jul 2014 20:41:42 -0400 Subject: ftrace: Add helper function ftrace_ops_get_func() Add the helper function to what the mcount trampoline is to call for a ftrace_ops function. This helper will be used by arch code in the future to set up dynamic trampolines. But as this does the same tests that are performed in choosing what function to call for the default mcount trampoline, might as well use it to clean up the existing code. Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 2 ++ kernel/trace/ftrace.c | 47 +++++++++++++++++++++++++++++++++++------------ 2 files changed, 37 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index f0b0edbf55a9..ef37286547fc 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -56,6 +56,8 @@ struct ftrace_ops; typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct pt_regs *regs); +ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops); + /* * FTRACE_OPS_FL_* bits denote the state of ftrace_ops struct and are * set in the flags member. diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 17b606362ab4..dabf734f909c 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -259,20 +259,12 @@ static void update_ftrace_function(void) * then have the mcount trampoline call the function directly. */ if (ftrace_ops_list == &ftrace_list_end || - (ftrace_ops_list->next == &ftrace_list_end && - !(ftrace_ops_list->flags & FTRACE_OPS_FL_DYNAMIC) && - !FTRACE_FORCE_LIST_FUNC)) { + (ftrace_ops_list->next == &ftrace_list_end)) { + /* Set the ftrace_ops that the arch callback uses */ set_function_trace_op = ftrace_ops_list; - /* - * If the func handles its own recursion, call it directly. - * Otherwise call the recursion protected function that - * will call the ftrace ops function. - */ - if (ftrace_ops_list->flags & FTRACE_OPS_FL_RECURSION_SAFE) - func = ftrace_ops_list->func; - else - func = ftrace_ops_recurs_func; + + func = ftrace_ops_get_func(ftrace_ops_list); } else { /* Just use the default ftrace_ops */ set_function_trace_op = &ftrace_list_end; @@ -4856,6 +4848,37 @@ static void ftrace_ops_recurs_func(unsigned long ip, unsigned long parent_ip, trace_clear_recursion(bit); } +/** + * ftrace_ops_get_func - get the function a trampoline should call + * @ops: the ops to get the function for + * + * Normally the mcount trampoline will call the ops->func, but there + * are times that it should not. For example, if the ops does not + * have its own recursion protection, then it should call the + * ftrace_ops_recurs_func() instead. + * + * Returns the function that the trampoline should call for @ops. + */ +ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops) +{ + /* + * If this is a dynamic ops or we force list func, + * then it needs to call the list anyway. + */ + if (ops->flags & FTRACE_OPS_FL_DYNAMIC || FTRACE_FORCE_LIST_FUNC) + return ftrace_ops_list_func; + + /* + * If the func handles its own recursion, call it directly. + * Otherwise call the recursion protected function that + * will call the ftrace ops function. + */ + if (!(ops->flags & FTRACE_OPS_FL_RECURSION_SAFE)) + return ftrace_ops_recurs_func; + + return ops->func; +} + static void clear_ftrace_swapper(void) { struct task_struct *p; -- cgit v1.2.3 From 738cbe72adc5c8f2016c4c68aa5162631d4f27e1 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 8 Sep 2014 08:04:47 +0200 Subject: net: bpf: consolidate JIT binary allocator Introduced in commit 314beb9bcabf ("x86: bpf_jit_comp: secure bpf jit against spraying attacks") and later on replicated in aa2d2c73c21f ("s390/bpf,jit: address randomize and write protect jit code") for s390 architecture, write protection for BPF JIT images got added and a random start address of the JIT code, so that it's not on a page boundary anymore. Since both use a very similar allocator for the BPF binary header, we can consolidate this code into the BPF core as it's mostly JIT independant anyway. This will also allow for future archs that support DEBUG_SET_MODULE_RONX to just reuse instead of reimplementing it. JIT tested on x86_64 and s390x with BPF test suite. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Cc: Eric Dumazet Cc: Heiko Carstens Cc: Martin Schwidefsky Signed-off-by: David S. Miller --- arch/s390/net/bpf_jit_comp.c | 45 ++++++++------------------------------- arch/x86/net/bpf_jit_comp.c | 50 ++++++++++---------------------------------- include/linux/filter.h | 13 ++++++++++++ kernel/bpf/core.c | 39 ++++++++++++++++++++++++++++++++++ 4 files changed, 72 insertions(+), 75 deletions(-) (limited to 'include/linux') diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index f2833c5b218a..b734f975c22e 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -5,11 +5,9 @@ * * Author(s): Martin Schwidefsky */ -#include #include #include #include -#include #include #include #include @@ -148,6 +146,12 @@ struct bpf_jit { ret; \ }) +static void bpf_jit_fill_hole(void *area, unsigned int size) +{ + /* Fill whole space with illegal instructions */ + memset(area, 0, size); +} + static void bpf_jit_prologue(struct bpf_jit *jit) { /* Save registers and create stack frame if necessary */ @@ -780,38 +784,6 @@ out: return -1; } -/* - * Note: for security reasons, bpf code will follow a randomly - * sized amount of illegal instructions. - */ -struct bpf_binary_header { - unsigned int pages; - u8 image[]; -}; - -static struct bpf_binary_header *bpf_alloc_binary(unsigned int bpfsize, - u8 **image_ptr) -{ - struct bpf_binary_header *header; - unsigned int sz, hole; - - /* Most BPF filters are really small, but if some of them fill a page, - * allow at least 128 extra bytes for illegal instructions. - */ - sz = round_up(bpfsize + sizeof(*header) + 128, PAGE_SIZE); - header = module_alloc(sz); - if (!header) - return NULL; - memset(header, 0, sz); - header->pages = sz / PAGE_SIZE; - hole = min(sz - (bpfsize + sizeof(*header)), PAGE_SIZE - sizeof(*header)); - /* Insert random number of illegal instructions before BPF code - * and make sure the first instruction starts at an even address. - */ - *image_ptr = &header->image[(prandom_u32() % hole) & -2]; - return header; -} - void bpf_jit_compile(struct bpf_prog *fp) { struct bpf_binary_header *header = NULL; @@ -850,7 +822,8 @@ void bpf_jit_compile(struct bpf_prog *fp) size = prg_len + lit_len; if (size >= BPF_SIZE_MAX) goto out; - header = bpf_alloc_binary(size, &jit.start); + header = bpf_jit_binary_alloc(size, &jit.start, + 2, bpf_jit_fill_hole); if (!header) goto out; jit.prg = jit.mid = jit.start + prg_len; @@ -884,7 +857,7 @@ void bpf_jit_free(struct bpf_prog *fp) goto free_filter; set_memory_rw(addr, header->pages); - module_free(NULL, header); + bpf_jit_binary_free(header); free_filter: bpf_prog_unlock_free(fp); diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 06f8c17f5484..9de0b5476b0c 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -8,12 +8,10 @@ * as published by the Free Software Foundation; version 2 * of the License. */ -#include -#include #include #include #include -#include +#include int bpf_jit_enable __read_mostly; @@ -109,39 +107,6 @@ static inline void bpf_flush_icache(void *start, void *end) #define CHOOSE_LOAD_FUNC(K, func) \ ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset) -struct bpf_binary_header { - unsigned int pages; - /* Note : for security reasons, bpf code will follow a randomly - * sized amount of int3 instructions - */ - u8 image[]; -}; - -static struct bpf_binary_header *bpf_alloc_binary(unsigned int proglen, - u8 **image_ptr) -{ - unsigned int sz, hole; - struct bpf_binary_header *header; - - /* Most of BPF filters are really small, - * but if some of them fill a page, allow at least - * 128 extra bytes to insert a random section of int3 - */ - sz = round_up(proglen + sizeof(*header) + 128, PAGE_SIZE); - header = module_alloc(sz); - if (!header) - return NULL; - - memset(header, 0xcc, sz); /* fill whole space with int3 instructions */ - - header->pages = sz / PAGE_SIZE; - hole = min(sz - (proglen + sizeof(*header)), PAGE_SIZE - sizeof(*header)); - - /* insert a random number of int3 instructions before BPF code */ - *image_ptr = &header->image[prandom_u32() % hole]; - return header; -} - /* pick a register outside of BPF range for JIT internal work */ #define AUX_REG (MAX_BPF_REG + 1) @@ -206,6 +171,12 @@ static inline u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg) return byte + reg2hex[dst_reg] + (reg2hex[src_reg] << 3); } +static void jit_fill_hole(void *area, unsigned int size) +{ + /* fill whole space with int3 instructions */ + memset(area, 0xcc, size); +} + struct jit_context { unsigned int cleanup_addr; /* epilogue code offset */ bool seen_ld_abs; @@ -959,7 +930,7 @@ void bpf_int_jit_compile(struct bpf_prog *prog) if (proglen <= 0) { image = NULL; if (header) - module_free(NULL, header); + bpf_jit_binary_free(header); goto out; } if (image) { @@ -969,7 +940,8 @@ void bpf_int_jit_compile(struct bpf_prog *prog) break; } if (proglen == oldproglen) { - header = bpf_alloc_binary(proglen, &image); + header = bpf_jit_binary_alloc(proglen, &image, + 1, jit_fill_hole); if (!header) goto out; } @@ -998,7 +970,7 @@ void bpf_jit_free(struct bpf_prog *fp) goto free_filter; set_memory_rw(addr, header->pages); - module_free(NULL, header); + bpf_jit_binary_free(header); free_filter: bpf_prog_unlock_free(fp); diff --git a/include/linux/filter.h b/include/linux/filter.h index 8f82ef3f1cdd..868764fcffb8 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -289,6 +289,11 @@ struct sock_fprog_kern { struct sock_filter *filter; }; +struct bpf_binary_header { + unsigned int pages; + u8 image[]; +}; + struct bpf_work_struct { struct bpf_prog *prog; struct work_struct work; @@ -358,6 +363,14 @@ struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size, gfp_t gfp_extra_flags); void __bpf_prog_free(struct bpf_prog *fp); +typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size); + +struct bpf_binary_header * +bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, + unsigned int alignment, + bpf_jit_fill_hole_t bpf_fill_ill_insns); +void bpf_jit_binary_free(struct bpf_binary_header *hdr); + static inline void bpf_prog_unlock_free(struct bpf_prog *fp) { bpf_prog_unlock_ro(fp); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 2c2bfaacce66..8ee520f0ec70 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -20,9 +20,12 @@ * Andi Kleen - Fix a few bad bugs and races. * Kris Katterjohn - Added many additional checks in bpf_check_classic() */ + #include #include #include +#include +#include #include /* Registers */ @@ -125,6 +128,42 @@ void __bpf_prog_free(struct bpf_prog *fp) } EXPORT_SYMBOL_GPL(__bpf_prog_free); +struct bpf_binary_header * +bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, + unsigned int alignment, + bpf_jit_fill_hole_t bpf_fill_ill_insns) +{ + struct bpf_binary_header *hdr; + unsigned int size, hole, start; + + /* Most of BPF filters are really small, but if some of them + * fill a page, allow at least 128 extra bytes to insert a + * random section of illegal instructions. + */ + size = round_up(proglen + sizeof(*hdr) + 128, PAGE_SIZE); + hdr = module_alloc(size); + if (hdr == NULL) + return NULL; + + /* Fill space with illegal/arch-dep instructions. */ + bpf_fill_ill_insns(hdr, size); + + hdr->pages = size / PAGE_SIZE; + hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)), + PAGE_SIZE - sizeof(*hdr)); + start = (prandom_u32() % hole) & ~(alignment - 1); + + /* Leave a random number of instructions before BPF code. */ + *image_ptr = &hdr->image[start]; + + return hdr; +} + +void bpf_jit_binary_free(struct bpf_binary_header *hdr) +{ + module_free(NULL, hdr); +} + /* Base function for offset calculation. Needs to go into .text section, * therefore keeping it non-static as well; will also be used by JITs * anyway later on, so do not let the compiler omit it. -- cgit v1.2.3 From 286aad3c4014ca825c447e07e24f8929e6d266d2 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 8 Sep 2014 08:04:49 +0200 Subject: net: bpf: be friendly to kmemcheck Reported by Mikulas Patocka, kmemcheck currently barks out a false positive since we don't have special kmemcheck annotation for bitfields used in bpf_prog structure. We currently have jited:1, len:31 and thus when accessing len while CONFIG_KMEMCHECK enabled, kmemcheck throws a warning that we're reading uninitialized memory. As we don't need the whole bit universe for pages member, we can just split it to u16 and use a bool flag for jited instead of a bitfield. Signed-off-by: Mikulas Patocka Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- arch/arm/net/bpf_jit_32.c | 2 +- arch/mips/net/bpf_jit.c | 2 +- arch/powerpc/net/bpf_jit_comp.c | 2 +- arch/s390/net/bpf_jit_comp.c | 2 +- arch/sparc/net/bpf_jit_comp.c | 2 +- arch/x86/net/bpf_jit_comp.c | 2 +- include/linux/filter.h | 6 +++--- net/core/filter.c | 2 +- 8 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 2d1a5b93d91c..6b45f649eff0 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -933,7 +933,7 @@ void bpf_jit_compile(struct bpf_prog *fp) set_memory_ro((unsigned long)header, header->pages); fp->bpf_func = (void *)ctx.target; - fp->jited = 1; + fp->jited = true; out: kfree(ctx.offsets); return; diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c index cfa83cf2447d..0e97ccd29fe3 100644 --- a/arch/mips/net/bpf_jit.c +++ b/arch/mips/net/bpf_jit.c @@ -1417,7 +1417,7 @@ void bpf_jit_compile(struct bpf_prog *fp) bpf_jit_dump(fp->len, alloc_size, 2, ctx.target); fp->bpf_func = (void *)ctx.target; - fp->jited = 1; + fp->jited = true; out: kfree(ctx.offsets); diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 40c53ff59124..cbae2dfd053c 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -686,7 +686,7 @@ void bpf_jit_compile(struct bpf_prog *fp) ((u64 *)image)[0] = (u64)code_base; ((u64 *)image)[1] = local_paca->kernel_toc; fp->bpf_func = (void *)image; - fp->jited = 1; + fp->jited = true; } out: kfree(addrs); diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index b734f975c22e..555f5c7e83ab 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -842,7 +842,7 @@ void bpf_jit_compile(struct bpf_prog *fp) if (jit.start) { set_memory_ro((unsigned long)header, header->pages); fp->bpf_func = (void *) jit.start; - fp->jited = 1; + fp->jited = true; } out: kfree(addrs); diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c index f7a736b645e8..b2ad9dc5425e 100644 --- a/arch/sparc/net/bpf_jit_comp.c +++ b/arch/sparc/net/bpf_jit_comp.c @@ -801,7 +801,7 @@ cond_branch: f_offset = addrs[i + filter[i].jf]; if (image) { bpf_flush_icache(image, image + proglen); fp->bpf_func = (void *)image; - fp->jited = 1; + fp->jited = true; } out: kfree(addrs); diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 9de0b5476b0c..d56cd1f515bd 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -955,7 +955,7 @@ void bpf_int_jit_compile(struct bpf_prog *prog) bpf_flush_icache(header, image + proglen); set_memory_ro((unsigned long)header, header->pages); prog->bpf_func = (void *)image; - prog->jited = 1; + prog->jited = true; } out: kfree(addrs); diff --git a/include/linux/filter.h b/include/linux/filter.h index 868764fcffb8..4b59edead908 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -300,9 +300,9 @@ struct bpf_work_struct { }; struct bpf_prog { - u32 pages; /* Number of allocated pages */ - u32 jited:1, /* Is our filter JIT'ed? */ - len:31; /* Number of filter blocks */ + u16 pages; /* Number of allocated pages */ + bool jited; /* Is our filter JIT'ed? */ + u32 len; /* Number of filter blocks */ struct sock_fprog_kern *orig_prog; /* Original BPF program */ struct bpf_work_struct *work; /* Deferred free work struct */ unsigned int (*bpf_func)(const struct sk_buff *skb, diff --git a/net/core/filter.c b/net/core/filter.c index fa5b7d0f77ac..dfc716ffa44b 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -972,7 +972,7 @@ static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp) int err; fp->bpf_func = NULL; - fp->jited = 0; + fp->jited = false; err = bpf_check_classic(fp->insns, fp->len); if (err) { -- cgit v1.2.3 From a0c7b164ad115ec0556dc0904ee2218cbc5cedfa Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 9 Sep 2014 23:13:57 +0100 Subject: regulator: of: Provide simplified DT parsing method Currently regulator drivers which support DT all repeat very similar code to supply a list of known regulator identifiers to be matched with DT, convert that to platform data which is then matched up with the regulators as they are registered. This is both fiddly to get right and for devices which can use the standard helpers to provide their operations is the main source of code in the driver. Since this code is essentially identical for most drivers we can factor it out into the core, moving the identifiers in the match table into the regulator descriptors and also allowing drivers to pass in the name of the subnode to search. When a driver provides an of_match string for the regulator the core will attempt to use that to obtain init_data, allowing the driver to remove all explicit code for DT parsing and simply provide data instead. The current code leaks the phandles for the child nodes, this will be addressed incrementally and makes no practical difference for FDT anyway as the DT data structures are never freed. Signed-off-by: Mark Brown --- drivers/regulator/core.c | 10 +++++--- drivers/regulator/internal.h | 4 ++++ drivers/regulator/of_regulator.c | 51 ++++++++++++++++++++++++++++++++++++++++ include/linux/regulator/driver.h | 4 ++++ 4 files changed, 66 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index a3c3785901f5..496002efd961 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -3516,12 +3516,17 @@ regulator_register(const struct regulator_desc *regulator_desc, return ERR_PTR(-EINVAL); } - init_data = config->init_data; - rdev = kzalloc(sizeof(struct regulator_dev), GFP_KERNEL); if (rdev == NULL) return ERR_PTR(-ENOMEM); + init_data = regulator_of_get_init_data(dev, regulator_desc, + &rdev->dev.of_node); + if (!init_data) { + init_data = config->init_data; + rdev->dev.of_node = of_node_get(config->of_node); + } + mutex_lock(®ulator_list_mutex); mutex_init(&rdev->mutex); @@ -3548,7 +3553,6 @@ regulator_register(const struct regulator_desc *regulator_desc, /* register with sysfs */ rdev->dev.class = ®ulator_class; - rdev->dev.of_node = of_node_get(config->of_node); rdev->dev.parent = dev; dev_set_name(&rdev->dev, "regulator.%d", atomic_inc_return(®ulator_no) - 1); diff --git a/drivers/regulator/internal.h b/drivers/regulator/internal.h index 84bbda10c396..a6043ad32ead 100644 --- a/drivers/regulator/internal.h +++ b/drivers/regulator/internal.h @@ -35,4 +35,8 @@ struct regulator { struct dentry *debugfs; }; +struct regulator_init_data *regulator_of_get_init_data(struct device *dev, + const struct regulator_desc *desc, + struct device_node **node); + #endif diff --git a/drivers/regulator/of_regulator.c b/drivers/regulator/of_regulator.c index ee5e67bc8d5b..7a51814abdc5 100644 --- a/drivers/regulator/of_regulator.c +++ b/drivers/regulator/of_regulator.c @@ -14,8 +14,11 @@ #include #include #include +#include #include +#include "internal.h" + static void of_get_regulation_constraints(struct device_node *np, struct regulator_init_data **init_data) { @@ -189,3 +192,51 @@ int of_regulator_match(struct device *dev, struct device_node *node, return count; } EXPORT_SYMBOL_GPL(of_regulator_match); + +struct regulator_init_data *regulator_of_get_init_data(struct device *dev, + const struct regulator_desc *desc, + struct device_node **node) +{ + struct device_node *search, *child; + struct regulator_init_data *init_data = NULL; + const char *name; + + if (!dev->of_node || !desc->of_match) + return NULL; + + if (desc->regulators_node) + search = of_get_child_by_name(dev->of_node, + desc->regulators_node); + else + search = dev->of_node; + + if (!search) { + dev_err(dev, "Failed to find regulator container node\n"); + return NULL; + } + + for_each_child_of_node(search, child) { + name = of_get_property(child, "regulator-compatible", NULL); + if (!name) + name = child->name; + + if (strcmp(desc->of_match, name)) + continue; + + init_data = of_get_regulator_init_data(dev, child); + if (!init_data) { + dev_err(dev, + "failed to parse DT for regulator %s\n", + child->name); + break; + } + + of_node_get(child); + *node = child; + break; + } + + of_node_put(search); + + return init_data; +} diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index bbe03a1924c0..c35f5f97a147 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -203,6 +203,8 @@ enum regulator_type { * * @name: Identifying name for the regulator. * @supply_name: Identifying the regulator supply + * @of_match: Name used to identify regulator in DT. + * @regulators_node: Name of node containing regulator definitions in DT. * @id: Numerical identifier for the regulator. * @ops: Regulator operations table. * @irq: Interrupt number for the regulator. @@ -242,6 +244,8 @@ enum regulator_type { struct regulator_desc { const char *name; const char *supply_name; + const char *of_match; + const char *regulators_node; int id; bool continuous_voltage_range; unsigned n_voltages; -- cgit v1.2.3 From e1effa0144a1ddf5b456c388ffaf784f3c5163fd Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 5 Aug 2014 17:19:38 -0400 Subject: ftrace: Annotate the ops operation on update Add three new flags for ftrace_ops: FTRACE_OPS_FL_ADDING FTRACE_OPS_FL_REMOVING FTRACE_OPS_FL_MODIFYING These will be set for the ftrace_ops when they are first added to the function tracing, being removed from function tracing or just having their functions changed from function tracing, respectively. This will be needed to remove the tramp_hash, which can grow quite big. The tramp_hash is used to note what functions a ftrace_ops is using a trampoline for. Denoting which ftrace_ops is being modified, will allow us to use the ftrace_ops hashes themselves, which are much smaller as they have a global flag to denote if a ftrace_ops is tracing all functions, as well as a notrace hash if the ftrace_ops is tracing all but a few. The tramp_hash just creates a hash item for every function, which can go into the 10s of thousands if all functions are using the ftrace_ops trampoline. Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 6 ++++++ kernel/trace/ftrace.c | 45 +++++++++++++++++++++++++++++++++++++++------ 2 files changed, 45 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index ef37286547fc..d9216f6385d9 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -91,6 +91,9 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops); * INITIALIZED - The ftrace_ops has already been initialized (first use time * register_ftrace_function() is called, it will initialized the ops) * DELETED - The ops are being deleted, do not let them be registered again. + * ADDING - The ops is in the process of being added. + * REMOVING - The ops is in the process of being removed. + * MODIFYING - The ops is in the process of changing its filter functions. */ enum { FTRACE_OPS_FL_ENABLED = 1 << 0, @@ -102,6 +105,9 @@ enum { FTRACE_OPS_FL_STUB = 1 << 6, FTRACE_OPS_FL_INITIALIZED = 1 << 7, FTRACE_OPS_FL_DELETED = 1 << 8, + FTRACE_OPS_FL_ADDING = 1 << 9, + FTRACE_OPS_FL_REMOVING = 1 << 10, + FTRACE_OPS_FL_MODIFYING = 1 << 11, }; #ifdef CONFIG_DYNAMIC_FTRACE diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 858ac16f8492..e43c793093e5 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1057,6 +1057,12 @@ static struct pid * const ftrace_swapper_pid = &init_struct_pid; static struct ftrace_ops *removed_ops; +/* + * Set when doing a global update, like enabling all recs or disabling them. + * It is not set when just updating a single ftrace_ops. + */ +static bool update_all_ops; + #ifndef CONFIG_FTRACE_MCOUNT_RECORD # error Dynamic ftrace depends on MCOUNT_RECORD #endif @@ -2366,6 +2372,13 @@ static void ftrace_run_update_code(int command) FTRACE_WARN_ON(ret); } +static void ftrace_run_modify_code(struct ftrace_ops *ops, int command) +{ + ops->flags |= FTRACE_OPS_FL_MODIFYING; + ftrace_run_update_code(command); + ops->flags &= ~FTRACE_OPS_FL_MODIFYING; +} + static ftrace_func_t saved_ftrace_func; static int ftrace_start_up; @@ -2387,6 +2400,13 @@ static void ftrace_startup_enable(int command) ftrace_run_update_code(command); } +static void ftrace_startup_all(int command) +{ + update_all_ops = true; + ftrace_startup_enable(command); + update_all_ops = false; +} + static int ftrace_startup(struct ftrace_ops *ops, int command) { int ret; @@ -2401,12 +2421,22 @@ static int ftrace_startup(struct ftrace_ops *ops, int command) ftrace_start_up++; command |= FTRACE_UPDATE_CALLS; - ops->flags |= FTRACE_OPS_FL_ENABLED; + /* + * Note that ftrace probes uses this to start up + * and modify functions it will probe. But we still + * set the ADDING flag for modification, as probes + * do not have trampolines. If they add them in the + * future, then the probes will need to distinguish + * between adding and updating probes. + */ + ops->flags |= FTRACE_OPS_FL_ENABLED | FTRACE_OPS_FL_ADDING; ftrace_hash_rec_enable(ops, 1); ftrace_startup_enable(command); + ops->flags &= ~FTRACE_OPS_FL_ADDING; + return 0; } @@ -2456,11 +2486,12 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command) * If the ops uses a trampoline, then it needs to be * tested first on update. */ + ops->flags |= FTRACE_OPS_FL_REMOVING; removed_ops = ops; ftrace_run_update_code(command); - removed_ops = NULL; + ops->flags &= ~FTRACE_OPS_FL_REMOVING; /* * Dynamic ops may be freed, we must make sure that all @@ -3373,7 +3404,7 @@ static void __enable_ftrace_function_probe(void) if (ftrace_probe_registered) { /* still need to update the function call sites */ if (ftrace_enabled) - ftrace_run_update_code(FTRACE_UPDATE_CALLS); + ftrace_run_modify_code(&trace_probe_ops, FTRACE_UPDATE_CALLS); return; } @@ -3792,7 +3823,7 @@ ftrace_match_addr(struct ftrace_hash *hash, unsigned long ip, int remove) static void ftrace_ops_update_code(struct ftrace_ops *ops) { if (ops->flags & FTRACE_OPS_FL_ENABLED && ftrace_enabled) - ftrace_run_update_code(FTRACE_UPDATE_CALLS); + ftrace_run_modify_code(ops, FTRACE_UPDATE_CALLS); } static int @@ -4717,6 +4748,7 @@ core_initcall(ftrace_nodyn_init); static inline int ftrace_init_dyn_debugfs(struct dentry *d_tracer) { return 0; } static inline void ftrace_startup_enable(int command) { } +static inline void ftrace_startup_all(int command) { } /* Keep as macros so we do not need to define the commands */ # define ftrace_startup(ops, command) \ ({ \ @@ -5016,7 +5048,8 @@ static int ftrace_pid_add(int p) set_ftrace_pid_task(pid); ftrace_update_pid_func(); - ftrace_startup_enable(0); + + ftrace_startup_all(0); mutex_unlock(&ftrace_lock); return 0; @@ -5045,7 +5078,7 @@ static void ftrace_pid_reset(void) } ftrace_update_pid_func(); - ftrace_startup_enable(0); + ftrace_startup_all(0); mutex_unlock(&ftrace_lock); } -- cgit v1.2.3 From fef5aeeee9e3717e7aea991a7ae9ff6a7a2d4c85 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Thu, 24 Jul 2014 12:25:47 -0400 Subject: ftrace: Replace tramp_hash with old_*_hash to save space Allowing function callbacks to declare their own trampolines requires that each ftrace_ops that has a trampoline must have some sort of accounting that keeps track of which ops has a trampoline attached to a record. The easy way to solve this was to add a "tramp_hash" that created a hash entry for every function that a ops uses with a trampoline. But since we can have literally tens of thousands of functions being traced, that means we need tens of thousands of descriptors to map the ops to the function in the hash. This is quite expensive and can cause enabling and disabling the function graph tracer to take some time to start and stop. It can take up to several seconds to disable or enable all functions in the function graph tracer for this reason. The better approach albeit more complex, is to keep track of how ops are being enabled and disabled, and use that along with the counting of the number of ops attached to records, to determive what ops has a trampoline attached to a record at enabling and disabling of tracing. To do this, the tramp_hash has been replaced with an old_filter_hash and old_notrace_hash, which get the copy of the ops filter_hash and notrace_hash respectively. The old hashes is kept until the ops has been modified or removed and the old hashes are used with the logic of the accounting to determine the ops that have the trampoline of a record. The reason this has less of a footprint is due to the trick that an "empty" hash in the filter_hash means "all functions" and an empty hash in the notrace hash means "no functions" in the hash. This is much more efficienct, doesn't have the delay, and takes up much less memory, as we do not need to map all the functions but just figure out which functions are mapped at the time it is enabled or disabled. Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 2 +- kernel/trace/ftrace.c | 239 +++++++++++++++++-------------------------------- 2 files changed, 85 insertions(+), 156 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index d9216f6385d9..662697babd48 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -140,7 +140,7 @@ struct ftrace_ops { int nr_trampolines; struct ftrace_ops_hash local_hash; struct ftrace_ops_hash *func_hash; - struct ftrace_hash *tramp_hash; + struct ftrace_ops_hash old_hash; unsigned long trampoline; #endif }; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index e43c793093e5..d325a1e76554 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1373,6 +1373,21 @@ update: return 0; } +static bool hash_contains_ip(unsigned long ip, + struct ftrace_ops_hash *hash) +{ + /* + * The function record is a match if it exists in the filter + * hash and not in the notrace hash. Note, an emty hash is + * considered a match for the filter hash, but an empty + * notrace hash is considered not in the notrace hash. + */ + return (ftrace_hash_empty(hash->filter_hash) || + ftrace_lookup_ip(hash->filter_hash, ip)) && + (ftrace_hash_empty(hash->notrace_hash) || + !ftrace_lookup_ip(hash->notrace_hash, ip)); +} + /* * Test the hashes for this ops to see if we want to call * the ops->func or not. @@ -1388,8 +1403,7 @@ update: static int ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs) { - struct ftrace_hash *filter_hash; - struct ftrace_hash *notrace_hash; + struct ftrace_ops_hash hash; int ret; #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS @@ -1402,13 +1416,10 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs) return 0; #endif - filter_hash = rcu_dereference_raw_notrace(ops->func_hash->filter_hash); - notrace_hash = rcu_dereference_raw_notrace(ops->func_hash->notrace_hash); + hash.filter_hash = rcu_dereference_raw_notrace(ops->func_hash->filter_hash); + hash.notrace_hash = rcu_dereference_raw_notrace(ops->func_hash->notrace_hash); - if ((ftrace_hash_empty(filter_hash) || - ftrace_lookup_ip(filter_hash, ip)) && - (ftrace_hash_empty(notrace_hash) || - !ftrace_lookup_ip(notrace_hash, ip))) + if (hash_contains_ip(ip, &hash)) ret = 1; else ret = 0; @@ -1520,46 +1531,6 @@ static bool test_rec_ops_needs_regs(struct dyn_ftrace *rec) return keep_regs; } -static void ftrace_remove_tramp(struct ftrace_ops *ops, - struct dyn_ftrace *rec) -{ - /* If TRAMP is not set, no ops should have a trampoline for this */ - if (!(rec->flags & FTRACE_FL_TRAMP)) - return; - - rec->flags &= ~FTRACE_FL_TRAMP; - - if ((!ftrace_hash_empty(ops->func_hash->filter_hash) && - !ftrace_lookup_ip(ops->func_hash->filter_hash, rec->ip)) || - ftrace_lookup_ip(ops->func_hash->notrace_hash, rec->ip)) - return; - /* - * The tramp_hash entry will be removed at time - * of update. - */ - ops->nr_trampolines--; -} - -static void ftrace_clear_tramps(struct dyn_ftrace *rec, struct ftrace_ops *ops) -{ - struct ftrace_ops *op; - - /* If TRAMP is not set, no ops should have a trampoline for this */ - if (!(rec->flags & FTRACE_FL_TRAMP)) - return; - - do_for_each_ftrace_op(op, ftrace_ops_list) { - /* - * This function is called to clear other tramps - * not the one that is being updated. - */ - if (op == ops) - continue; - if (op->nr_trampolines) - ftrace_remove_tramp(op, rec); - } while_for_each_ftrace_op(op); -} - static void __ftrace_hash_rec_update(struct ftrace_ops *ops, int filter_hash, bool inc) @@ -1648,18 +1619,16 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops, * function, and the ops has a trampoline registered * for it, then we can call it directly. */ - if (ftrace_rec_count(rec) == 1 && ops->trampoline) { + if (ftrace_rec_count(rec) == 1 && ops->trampoline) rec->flags |= FTRACE_FL_TRAMP; - ops->nr_trampolines++; - } else { + else /* * If we are adding another function callback * to this function, and the previous had a * custom trampoline in use, then we need to go * back to the default trampoline. */ - ftrace_clear_tramps(rec, ops); - } + rec->flags &= ~FTRACE_FL_TRAMP; /* * If any ops wants regs saved for this function @@ -1672,9 +1641,6 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops, return; rec->flags--; - if (ops->trampoline && !ftrace_rec_count(rec)) - ftrace_remove_tramp(ops, rec); - /* * If the rec had REGS enabled and the ops that is * being removed had REGS set, then see if there is @@ -1688,6 +1654,17 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops, rec->flags &= ~FTRACE_FL_REGS; } + /* + * If the rec had TRAMP enabled, then it needs to + * be cleared. As TRAMP can only be enabled iff + * there is only a single ops attached to it. + * In otherwords, always disable it on decrementing. + * In the future, we may set it if rec count is + * decremented to one, and the ops that is left + * has a trampoline. + */ + rec->flags &= ~FTRACE_FL_TRAMP; + /* * flags will be cleared in ftrace_check_record() * if rec count is zero. @@ -1910,15 +1887,14 @@ static struct ftrace_ops * ftrace_find_tramp_ops_any(struct dyn_ftrace *rec) { struct ftrace_ops *op; + unsigned long ip = rec->ip; do_for_each_ftrace_op(op, ftrace_ops_list) { if (!op->trampoline) continue; - if (ftrace_lookup_ip(op->func_hash->filter_hash, rec->ip) && - (ftrace_hash_empty(op->func_hash->notrace_hash) || - !ftrace_lookup_ip(op->func_hash->notrace_hash, rec->ip))) + if (hash_contains_ip(ip, op->func_hash)) return op; } while_for_each_ftrace_op(op); @@ -1929,18 +1905,51 @@ static struct ftrace_ops * ftrace_find_tramp_ops_curr(struct dyn_ftrace *rec) { struct ftrace_ops *op; + unsigned long ip = rec->ip; - /* Removed ops need to be tested first */ - if (removed_ops && removed_ops->tramp_hash) { - if (ftrace_lookup_ip(removed_ops->tramp_hash, rec->ip)) + /* + * Need to check removed ops first. + * If they are being removed, and this rec has a tramp, + * and this rec is in the ops list, then it would be the + * one with the tramp. + */ + if (removed_ops) { + if (hash_contains_ip(ip, &removed_ops->old_hash)) return removed_ops; } + /* + * Need to find the current trampoline for a rec. + * Now, a trampoline is only attached to a rec if there + * was a single 'ops' attached to it. But this can be called + * when we are adding another op to the rec or removing the + * current one. Thus, if the op is being added, we can + * ignore it because it hasn't attached itself to the rec + * yet. That means we just need to find the op that has a + * trampoline and is not beeing added. + */ do_for_each_ftrace_op(op, ftrace_ops_list) { - if (!op->tramp_hash) + + if (!op->trampoline) + continue; + + /* + * If the ops is being added, it hasn't gotten to + * the point to be removed from this tree yet. + */ + if (op->flags & FTRACE_OPS_FL_ADDING) continue; - if (ftrace_lookup_ip(op->tramp_hash, rec->ip)) + /* + * If the ops is not being added and has a trampoline, + * then it must be the one that we want! + */ + if (hash_contains_ip(ip, op->func_hash)) + return op; + + /* If the ops is being modified, it may be in the old hash. */ + if ((op->flags & FTRACE_OPS_FL_MODIFYING) && + hash_contains_ip(ip, &op->old_hash)) return op; } while_for_each_ftrace_op(op); @@ -1952,10 +1961,11 @@ static struct ftrace_ops * ftrace_find_tramp_ops_new(struct dyn_ftrace *rec) { struct ftrace_ops *op; + unsigned long ip = rec->ip; do_for_each_ftrace_op(op, ftrace_ops_list) { /* pass rec in as regs to have non-NULL val */ - if (ftrace_ops_test(op, rec->ip, rec)) + if (hash_contains_ip(ip, op->func_hash)) return op; } while_for_each_ftrace_op(op); @@ -2262,92 +2272,6 @@ void __weak arch_ftrace_update_code(int command) ftrace_run_stop_machine(command); } -static int ftrace_save_ops_tramp_hash(struct ftrace_ops *ops) -{ - struct ftrace_page *pg; - struct dyn_ftrace *rec; - int size, bits; - int ret; - - size = ops->nr_trampolines; - bits = 0; - /* - * Make the hash size about 1/2 the # found - */ - for (size /= 2; size; size >>= 1) - bits++; - - ops->tramp_hash = alloc_ftrace_hash(bits); - /* - * TODO: a failed allocation is going to screw up - * the accounting of what needs to be modified - * and not. For now, we kill ftrace if we fail - * to allocate here. But there are ways around this, - * but that will take a little more work. - */ - if (!ops->tramp_hash) - return -ENOMEM; - - do_for_each_ftrace_rec(pg, rec) { - if (ftrace_rec_count(rec) == 1 && - ftrace_ops_test(ops, rec->ip, rec)) { - - /* - * If another ops adds to a rec, the rec will - * lose its trampoline and never get it back - * until all ops are off of it. - */ - if (!(rec->flags & FTRACE_FL_TRAMP)) - continue; - - /* This record had better have a trampoline */ - if (FTRACE_WARN_ON(!(rec->flags & FTRACE_FL_TRAMP_EN))) - return -1; - - ret = add_hash_entry(ops->tramp_hash, rec->ip); - if (ret < 0) - return ret; - } - } while_for_each_ftrace_rec(); - - /* The number of recs in the hash must match nr_trampolines */ - if (FTRACE_WARN_ON(ops->tramp_hash->count != ops->nr_trampolines)) - pr_warn("count=%ld trampolines=%d\n", - ops->tramp_hash->count, - ops->nr_trampolines); - - return 0; -} - -static int ftrace_save_tramp_hashes(void) -{ - struct ftrace_ops *op; - int ret; - - /* - * Now that any trampoline is being used, we need to save the - * hashes for the ops that have them. This allows the mapping - * back from the record to the ops that has the trampoline to - * know what code is being replaced. Modifying code must always - * verify what it is changing. - */ - do_for_each_ftrace_op(op, ftrace_ops_list) { - - /* The tramp_hash is recreated each time. */ - free_ftrace_hash(op->tramp_hash); - op->tramp_hash = NULL; - - if (op->nr_trampolines) { - ret = ftrace_save_ops_tramp_hash(op); - if (ret) - return ret; - } - - } while_for_each_ftrace_op(op); - - return 0; -} - static void ftrace_run_update_code(int command) { int ret; @@ -2367,9 +2291,6 @@ static void ftrace_run_update_code(int command) ret = ftrace_arch_code_modify_post_process(); FTRACE_WARN_ON(ret); - - ret = ftrace_save_tramp_hashes(); - FTRACE_WARN_ON(ret); } static void ftrace_run_modify_code(struct ftrace_ops *ops, int command) @@ -2489,8 +2410,16 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command) ops->flags |= FTRACE_OPS_FL_REMOVING; removed_ops = ops; + /* The trampoline logic checks the old hashes */ + ops->old_hash.filter_hash = ops->func_hash->filter_hash; + ops->old_hash.notrace_hash = ops->func_hash->notrace_hash; + ftrace_run_update_code(command); + ops->old_hash.filter_hash = NULL; + ops->old_hash.notrace_hash = NULL; + + removed_ops = NULL; ops->flags &= ~FTRACE_OPS_FL_REMOVING; /* @@ -3017,7 +2946,7 @@ static int t_show(struct seq_file *m, void *v) struct ftrace_ops *ops; ops = ftrace_find_tramp_ops_any(rec); - if (ops && ops->trampoline) + if (ops) seq_printf(m, "\ttramp: %pS", (void *)ops->trampoline); else -- cgit v1.2.3 From 3a630178fd5f30c285fd7016c5340a176b625913 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Thu, 7 Aug 2014 10:52:04 -0700 Subject: tracing: generate RCU warnings even when tracepoints are disabled Dave Jones reported seeing a bug from one of my TLB tracepoints: http://lkml.kernel.org/r/20140806181801.GA4605@redhat.com I've been running these patches for months and never saw this. But, a big chunk of my testing, especially with all the debugging enabled, was in a vm where intel_idle doesn't work. On the systems where I was using intel_idle, I never had lockdep enabled and this tracepoint on at the same time. This patch ensures that whenever we have lockdep available, we do _some_ RCU activity at the site of the tracepoint, despite whether the tracepoint's condition matches or even if the tracepoint itself is completely disabled. This is a bit of a hack, but it is pretty self-contained. I confirmed that with this patch plus lockdep I get the same splat as Dave Jones did, but without enabling the tracepoint explicitly. Link: http://lkml.kernel.org/p/20140807175204.C257CAC5@viggo.jf.intel.com Signed-off-by: Dave Hansen Cc: Dave Hansen Cc: Dave Jones , Cc: paulmck@linux.vnet.ibm.com Cc: Ingo Molnar Signed-off-by: Steven Rostedt --- include/linux/tracepoint.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index b1293f15f592..e08e21e5f601 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -157,6 +157,12 @@ extern void syscall_unregfunc(void); * Make sure the alignment of the structure in the __tracepoints section will * not add unwanted padding between the beginning of the section and the * structure. Force alignment to the same alignment as the section start. + * + * When lockdep is enabled, we make sure to always do the RCU portions of + * the tracepoint code, regardless of whether tracing is on or we match the + * condition. This lets us find RCU issues triggered with tracepoints even + * when this tracepoint is off. This code has no purpose other than poking + * RCU a bit. */ #define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \ extern struct tracepoint __tracepoint_##name; \ @@ -167,6 +173,11 @@ extern void syscall_unregfunc(void); TP_PROTO(data_proto), \ TP_ARGS(data_args), \ TP_CONDITION(cond),,); \ + if (IS_ENABLED(CONFIG_LOCKDEP)) { \ + rcu_read_lock_sched_notrace(); \ + rcu_dereference_sched(__tracepoint_##name.funcs);\ + rcu_read_unlock_sched_notrace(); \ + } \ } \ __DECLARE_TRACE_RCU(name, PARAMS(proto), PARAMS(args), \ PARAMS(cond), PARAMS(data_proto), PARAMS(data_args)) \ -- cgit v1.2.3 From b440bde74f043c8ec31081cb59c9a53ade954701 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 10 Sep 2014 13:45:01 -0600 Subject: PCI: Add pci_ignore_hotplug() to ignore hotplug events for a device Powering off a hot-pluggable device, e.g., with pci_set_power_state(D3cold), normally generates a hot-remove event that unbinds the driver. Some drivers expect to remain bound to a device even while they power it off and back on again. This can be dangerous, because if the device is removed or replaced while it is powered off, the driver doesn't know that anything changed. But some drivers accept that risk. Add pci_ignore_hotplug() for use by drivers that know their device cannot be removed. Using pci_ignore_hotplug() tells the PCI core that hot-plug events for the device should be ignored. The radeon and nouveau drivers use this to switch between a low-power, integrated GPU and a higher-power, higher-performance discrete GPU. They power off the unused GPU, but they want to remain bound to it. This is a reimplementation of f244d8b623da ("ACPIPHP / radeon / nouveau: Fix VGA switcheroo problem related to hotplug") but extends it to work with both acpiphp and pciehp. This fixes a problem where systems with dual GPUs using the radeon drivers become unusable, freezing every few seconds (see bugzillas below). The resume of the radeon device may also fail, e.g., This fixes problems on dual GPU systems where the radeon driver becomes unusable because of problems while suspending the device, as in bug 79701: [drm] radeon: finishing device. radeon 0000:01:00.0: Userspace still has active objects ! radeon 0000:01:00.0: ffff8800cb4ec288 ffff8800cb4ec000 16384 4294967297 force free ... WARNING: CPU: 0 PID: 67 at /home/apw/COD/linux/drivers/gpu/drm/radeon/radeon_gart.c:234 radeon_gart_unbind+0xd2/0xe0 [radeon]() trying to unbind memory from uninitialized GART ! or while resuming it, as in bug 77261: radeon 0000:01:00.0: ring 0 stalled for more than 10158msec radeon 0000:01:00.0: GPU lockup ... radeon 0000:01:00.0: GPU pci config reset pciehp 0000:00:01.0:pcie04: Card not present on Slot(1-1) radeon 0000:01:00.0: GPU reset succeeded, trying to resume *ERROR* radeon: dpm resume failed radeon 0000:01:00.0: Wait for MC idle timedout ! Link: https://bugzilla.kernel.org/show_bug.cgi?id=77261 Link: https://bugzilla.kernel.org/show_bug.cgi?id=79701 Reported-by: Shawn Starr Reported-by: Jose P. Signed-off-by: Bjorn Helgaas Acked-by: Alex Deucher Acked-by: Rajat Jain Acked-by: Rafael J. Wysocki Acked-by: Dave Airlie CC: stable@vger.kernel.org # v3.15+ --- drivers/gpu/drm/nouveau/nouveau_drm.c | 1 + drivers/gpu/drm/radeon/radeon_drv.c | 1 + drivers/pci/hotplug/acpiphp_glue.c | 16 ++++++---------- drivers/pci/hotplug/pciehp_hpc.c | 12 ++++++++++++ include/linux/pci.h | 6 ++++++ 5 files changed, 26 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 250a5e88c751..9c3af96a7153 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -627,6 +627,7 @@ int nouveau_pmops_suspend(struct device *dev) pci_save_state(pdev); pci_disable_device(pdev); + pci_ignore_hotplug(pdev); pci_set_power_state(pdev, PCI_D3hot); return 0; } diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 8df888908833..abbd87adfd75 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -440,6 +440,7 @@ static int radeon_pmops_runtime_suspend(struct device *dev) ret = radeon_suspend_kms(drm_dev, false, false); pci_save_state(pdev); pci_disable_device(pdev); + pci_ignore_hotplug(pdev); pci_set_power_state(pdev, PCI_D3cold); drm_dev->switch_power_state = DRM_SWITCH_POWER_DYNAMIC_OFF; diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index 70741c8c46a0..6cd5160fc057 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -560,19 +560,15 @@ static void disable_slot(struct acpiphp_slot *slot) slot->flags &= (~SLOT_ENABLED); } -static bool acpiphp_no_hotplug(struct acpi_device *adev) -{ - return adev && adev->flags.no_hotplug; -} - static bool slot_no_hotplug(struct acpiphp_slot *slot) { - struct acpiphp_func *func; + struct pci_bus *bus = slot->bus; + struct pci_dev *dev; - list_for_each_entry(func, &slot->funcs, sibling) - if (acpiphp_no_hotplug(func_to_acpi_device(func))) + list_for_each_entry(dev, &bus->devices, bus_list) { + if (PCI_SLOT(dev->devfn) == slot->device && dev->ignore_hotplug) return true; - + } return false; } @@ -645,7 +641,7 @@ static void trim_stale_devices(struct pci_dev *dev) status = acpi_evaluate_integer(adev->handle, "_STA", NULL, &sta); alive = (ACPI_SUCCESS(status) && device_status_valid(sta)) - || acpiphp_no_hotplug(adev); + || dev->ignore_hotplug; } if (!alive) alive = pci_device_is_present(dev); diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 9da84b8b27d8..5e01ae39ec46 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -506,6 +506,8 @@ static irqreturn_t pcie_isr(int irq, void *dev_id) { struct controller *ctrl = (struct controller *)dev_id; struct pci_dev *pdev = ctrl_dev(ctrl); + struct pci_bus *subordinate = pdev->subordinate; + struct pci_dev *dev; struct slot *slot = ctrl->slot; u16 detected, intr_loc; @@ -539,6 +541,16 @@ static irqreturn_t pcie_isr(int irq, void *dev_id) wake_up(&ctrl->queue); } + if (subordinate) { + list_for_each_entry(dev, &subordinate->devices, bus_list) { + if (dev->ignore_hotplug) { + ctrl_dbg(ctrl, "ignoring hotplug event %#06x (%s requested no hotplug)\n", + intr_loc, pci_name(dev)); + return IRQ_HANDLED; + } + } + } + if (!(intr_loc & ~PCI_EXP_SLTSTA_CC)) return IRQ_HANDLED; diff --git a/include/linux/pci.h b/include/linux/pci.h index 61978a460841..96453f9bc8ba 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -303,6 +303,7 @@ struct pci_dev { D3cold, not set for devices powered on/off by the corresponding bridge */ + unsigned int ignore_hotplug:1; /* Ignore hotplug events */ unsigned int d3_delay; /* D3->D0 transition time in ms */ unsigned int d3cold_delay; /* D3cold->D0 transition time in ms */ @@ -1021,6 +1022,11 @@ bool pci_dev_run_wake(struct pci_dev *dev); bool pci_check_pme_status(struct pci_dev *dev); void pci_pme_wakeup_bus(struct pci_bus *bus); +static inline void pci_ignore_hotplug(struct pci_dev *dev) +{ + dev->ignore_hotplug = 1; +} + static inline int pci_enable_wake(struct pci_dev *dev, pci_power_t state, bool enable) { -- cgit v1.2.3 From 378520b837cf4da769600b83690d8e825f16a611 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Thu, 7 Aug 2014 10:15:02 +0800 Subject: nfs41: add a helper function to set layoutcommit after commit Track lwb in nfs_commit_data so that we can use it to setup layoutcommit in commit_done callback. Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 29 +++++++++++++++++++++++++++++ fs/nfs/pnfs.h | 1 + fs/nfs/write.c | 15 +++++++++++++++ include/linux/nfs_xdr.h | 1 + 4 files changed, 46 insertions(+) (limited to 'include/linux') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index a3851debf8a2..a6586cdee211 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1797,6 +1797,35 @@ pnfs_set_layoutcommit(struct nfs_pgio_header *hdr) } EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit); +void pnfs_commit_set_layoutcommit(struct nfs_commit_data *data) +{ + struct inode *inode = data->inode; + struct nfs_inode *nfsi = NFS_I(inode); + bool mark_as_dirty = false; + + spin_lock(&inode->i_lock); + if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { + mark_as_dirty = true; + dprintk("%s: Set layoutcommit for inode %lu ", + __func__, inode->i_ino); + } + if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &data->lseg->pls_flags)) { + /* references matched in nfs4_layoutcommit_release */ + pnfs_get_lseg(data->lseg); + } + if (data->lwb > nfsi->layout->plh_lwb) + nfsi->layout->plh_lwb = data->lwb; + spin_unlock(&inode->i_lock); + dprintk("%s: lseg %p end_pos %llu\n", + __func__, data->lseg, nfsi->layout->plh_lwb); + + /* if pnfs_layoutcommit_inode() runs between inode locks, the next one + * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */ + if (mark_as_dirty) + mark_inode_dirty_sync(inode); +} +EXPORT_SYMBOL_GPL(pnfs_commit_set_layoutcommit); + void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data) { struct nfs_server *nfss = NFS_SERVER(data->args.inode); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index aca3dff5dae6..79c63114ce77 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -219,6 +219,7 @@ void pnfs_roc_release(struct inode *ino); void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task); void pnfs_set_layoutcommit(struct nfs_pgio_header *); +void pnfs_commit_set_layoutcommit(struct nfs_commit_data *data); void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data); int pnfs_layoutcommit_inode(struct inode *inode, bool sync); int _pnfs_return_layout(struct inode *); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 175d5d073ccf..3c5638f381cd 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1538,6 +1538,18 @@ int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data, } EXPORT_SYMBOL_GPL(nfs_initiate_commit); +static loff_t nfs_get_lwb(struct list_head *head) +{ + loff_t lwb = 0; + struct nfs_page *req; + + list_for_each_entry(req, head, wb_list) + if (lwb < (req_offset(req) + req->wb_bytes)) + lwb = req_offset(req) + req->wb_bytes; + + return lwb; +} + /* * Set up the argument/result storage required for the RPC call. */ @@ -1557,6 +1569,9 @@ void nfs_init_commit(struct nfs_commit_data *data, data->inode = inode; data->cred = first->wb_context->cred; data->lseg = lseg; /* reference transferred */ + /* only set lwb for pnfs commit */ + if (lseg) + data->lwb = nfs_get_lwb(&data->pages); data->mds_ops = &nfs_commit_ops; data->completion_ops = cinfo->completion_ops; data->dreq = cinfo->dreq; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 0040629894df..e563b2c976ef 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1328,6 +1328,7 @@ struct nfs_commit_data { struct pnfs_layout_segment *lseg; struct nfs_client *ds_clp; /* pNFS data server */ int ds_commit_index; + loff_t lwb; const struct rpc_call_ops *mds_ops; const struct nfs_commit_completion_ops *completion_ops; int (*commit_done_cb) (struct rpc_task *task, struct nfs_commit_data *data); -- cgit v1.2.3 From 5f919c9f10c1cf821ee5f414683214a361a1b98c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 21 Aug 2014 11:09:25 -0500 Subject: pnfs: allow splicing pre-encoded pages into the layoutcommit args Currently there is no XDR buffer space allocated for the per-layout driver layoutcommit payload, which leads to server buffer overflows in the blocklayout driver even under simple workloads. As we can't do per-layout sizes for XDR operations we'll have to splice a previously encoded list of pages into the XDR stream, similar to how we handle ACL buffers. Signed-off-by: Christoph Hellwig Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 18 +++++++++++++----- fs/nfs/pnfs.c | 15 +++++++++++++++ fs/nfs/pnfs.h | 4 ++-- include/linux/nfs_xdr.h | 3 +++ 4 files changed, 33 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index e13b59d8d9aa..f2cd957adb90 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -395,7 +395,10 @@ static int nfs4_stat_to_errno(int); 2 /* last byte written */ + \ 1 /* nt_timechanged (false) */ + \ 1 /* layoutupdate4 layout type */ + \ - 1 /* NULL filelayout layoutupdate4 payload */) + 1 /* layoutupdate4 opaqueue len */) + /* the actual content of layoutupdate4 should + be allocated by drivers and spliced in + using xdr_write_pages */ #define decode_layoutcommit_maxsz (op_decode_hdr_maxsz + 3) #define encode_layoutreturn_maxsz (8 + op_encode_hdr_maxsz + \ encode_stateid_maxsz + \ @@ -1990,7 +1993,7 @@ encode_layoutget(struct xdr_stream *xdr, static int encode_layoutcommit(struct xdr_stream *xdr, struct inode *inode, - const struct nfs4_layoutcommit_args *args, + struct nfs4_layoutcommit_args *args, struct compound_hdr *hdr) { __be32 *p; @@ -2011,11 +2014,16 @@ encode_layoutcommit(struct xdr_stream *xdr, *p++ = cpu_to_be32(0); /* Never send time_modify_changed */ *p++ = cpu_to_be32(NFS_SERVER(args->inode)->pnfs_curr_ld->id);/* type */ - if (NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit) + if (NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit) { NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit( NFS_I(inode)->layout, xdr, args); - else - encode_uint32(xdr, 0); /* no layout-type payload */ + } else { + encode_uint32(xdr, args->layoutupdate_len); + if (args->layoutupdate_pages) { + xdr_write_pages(xdr, args->layoutupdate_pages, 0, + args->layoutupdate_len); + } + } return 0; } diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 57b5728e0b8e..8827ab130ed3 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1854,6 +1854,7 @@ void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data) int pnfs_layoutcommit_inode(struct inode *inode, bool sync) { + struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; struct nfs4_layoutcommit_data *data; struct nfs_inode *nfsi = NFS_I(inode); loff_t end_pos; @@ -1904,6 +1905,20 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync) data->args.lastbytewritten = end_pos - 1; data->res.server = NFS_SERVER(inode); + if (ld->prepare_layoutcommit) { + status = ld->prepare_layoutcommit(&data->args); + if (status) { + spin_lock(&inode->i_lock); + if (end_pos < nfsi->layout->plh_lwb) + nfsi->layout->plh_lwb = end_pos; + spin_unlock(&inode->i_lock); + put_rpccred(data->cred); + set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags); + goto clear_layoutcommitting; + } + } + + status = nfs4_proc_layoutcommit(data, sync); out: if (status) diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 1dd8a5e96c9f..8835b5a320cc 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -128,8 +128,8 @@ struct pnfs_layoutdriver_type { const struct nfs4_layoutreturn_args *args); void (*cleanup_layoutcommit) (struct nfs4_layoutcommit_data *data); - - void (*encode_layoutcommit) (struct pnfs_layout_hdr *layoutid, + int (*prepare_layoutcommit) (struct nfs4_layoutcommit_args *args); + void (*encode_layoutcommit) (struct pnfs_layout_hdr *lo, struct xdr_stream *xdr, const struct nfs4_layoutcommit_args *args); }; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index e563b2c976ef..f4092c6b90fb 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -279,6 +279,9 @@ struct nfs4_layoutcommit_args { __u64 lastbytewritten; struct inode *inode; const u32 *bitmask; + size_t layoutupdate_len; + struct page *layoutupdate_page; + struct page **layoutupdate_pages; }; struct nfs4_layoutcommit_res { -- cgit v1.2.3 From b954d83421d51d822c42e5ab7b65069b25ad3005 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 10 Sep 2014 15:01:02 +0200 Subject: net: bpf: only build bpf_jit_binary_{alloc, free}() when jit selected Since BPF JIT depends on the availability of module_alloc() and module_free() helpers (HAVE_BPF_JIT and MODULES), we better build that code only in case we have BPF_JIT in our config enabled, just like with other JIT code. Fixes builds for arm/marzen_defconfig and sh/rsk7269_defconfig. ==================== kernel/built-in.o: In function `bpf_jit_binary_alloc': /home/cwang/linux/kernel/bpf/core.c:144: undefined reference to `module_alloc' kernel/built-in.o: In function `bpf_jit_binary_free': /home/cwang/linux/kernel/bpf/core.c:164: undefined reference to `module_free' make: *** [vmlinux] Error 1 ==================== Reported-by: Fengguang Wu Fixes: 738cbe72adc5 ("net: bpf: consolidate JIT binary allocator") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 78 +++++++++++++++++++++++++------------------------- kernel/bpf/core.c | 2 ++ 2 files changed, 41 insertions(+), 39 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 4b59edead908..1a0bc6d134d7 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -4,12 +4,18 @@ #ifndef __LINUX_FILTER_H__ #define __LINUX_FILTER_H__ +#include + #include #include #include +#include +#include #include -#include + #include + +#include #include struct sk_buff; @@ -363,14 +369,6 @@ struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size, gfp_t gfp_extra_flags); void __bpf_prog_free(struct bpf_prog *fp); -typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size); - -struct bpf_binary_header * -bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, - unsigned int alignment, - bpf_jit_fill_hole_t bpf_fill_ill_insns); -void bpf_jit_binary_free(struct bpf_binary_header *hdr); - static inline void bpf_prog_unlock_free(struct bpf_prog *fp) { bpf_prog_unlock_ro(fp); @@ -393,6 +391,38 @@ void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp); u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); void bpf_int_jit_compile(struct bpf_prog *fp); +#ifdef CONFIG_BPF_JIT +typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size); + +struct bpf_binary_header * +bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, + unsigned int alignment, + bpf_jit_fill_hole_t bpf_fill_ill_insns); +void bpf_jit_binary_free(struct bpf_binary_header *hdr); + +void bpf_jit_compile(struct bpf_prog *fp); +void bpf_jit_free(struct bpf_prog *fp); + +static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen, + u32 pass, void *image) +{ + pr_err("flen=%u proglen=%u pass=%u image=%pK\n", + flen, proglen, pass, image); + if (image) + print_hex_dump(KERN_ERR, "JIT code: ", DUMP_PREFIX_OFFSET, + 16, 1, image, proglen, false); +} +#else +static inline void bpf_jit_compile(struct bpf_prog *fp) +{ +} + +static inline void bpf_jit_free(struct bpf_prog *fp) +{ + bpf_prog_unlock_free(fp); +} +#endif /* CONFIG_BPF_JIT */ + #define BPF_ANC BIT(15) static inline u16 bpf_anc_helper(const struct sock_filter *ftest) @@ -440,36 +470,6 @@ static inline void *bpf_load_pointer(const struct sk_buff *skb, int k, return bpf_internal_load_pointer_neg_helper(skb, k, size); } -#ifdef CONFIG_BPF_JIT -#include -#include -#include - -void bpf_jit_compile(struct bpf_prog *fp); -void bpf_jit_free(struct bpf_prog *fp); - -static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen, - u32 pass, void *image) -{ - pr_err("flen=%u proglen=%u pass=%u image=%pK\n", - flen, proglen, pass, image); - if (image) - print_hex_dump(KERN_ERR, "JIT code: ", DUMP_PREFIX_OFFSET, - 16, 1, image, proglen, false); -} -#else -#include - -static inline void bpf_jit_compile(struct bpf_prog *fp) -{ -} - -static inline void bpf_jit_free(struct bpf_prog *fp) -{ - bpf_prog_unlock_free(fp); -} -#endif /* CONFIG_BPF_JIT */ - static inline int bpf_tell_extensions(void) { return SKF_AD_MAX; diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 8ee520f0ec70..8b7002488251 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -128,6 +128,7 @@ void __bpf_prog_free(struct bpf_prog *fp) } EXPORT_SYMBOL_GPL(__bpf_prog_free); +#ifdef CONFIG_BPF_JIT struct bpf_binary_header * bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, unsigned int alignment, @@ -163,6 +164,7 @@ void bpf_jit_binary_free(struct bpf_binary_header *hdr) { module_free(NULL, hdr); } +#endif /* CONFIG_BPF_JIT */ /* Base function for offset calculation. Needs to go into .text section, * therefore keeping it non-static as well; will also be used by JITs -- cgit v1.2.3 From 6314b6796e3c070d4c8086b08dfd453a0aeac4cf Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 4 Sep 2014 23:37:49 -0700 Subject: clk: Don't hold prepare_lock across debugfs creation Rob Clark reports a lockdep splat that involves the prepare_lock chained with the mmap semaphore. ====================================================== [ INFO: possible circular locking dependency detected ] 3.17.0-rc1-00050-g07a489b #802 Tainted: G W ------------------------------------------------------- Xorg.bin/5413 is trying to acquire lock: (prepare_lock){+.+.+.}, at: [] clk_prepare_lock+0x88/0xfc but task is already holding lock: (qcom_iommu_lock){+.+...}, at: [] qcom_iommu_unmap+0x1c/0x1f0 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #4 (qcom_iommu_lock){+.+...}: [] qcom_iommu_map+0x28/0x450 [] iommu_map+0xc8/0x12c [] msm_iommu_map+0xb4/0x130 [] msm_gem_get_iova_locked+0x9c/0xe8 [] msm_gem_get_iova+0x4c/0x64 [] mdp4_kms_init+0x4c4/0x6c0 [] msm_load+0x2ac/0x34c [] drm_dev_register+0xac/0x108 [] drm_platform_init+0x50/0xf0 [] try_to_bring_up_master.part.3+0xc8/0x108 [] component_master_add_with_match+0xa8/0x104 [] msm_pdev_probe+0x64/0x70 [] platform_drv_probe+0x2c/0x60 [] driver_probe_device+0x108/0x234 [] bus_for_each_drv+0x64/0x98 [] device_attach+0x78/0x8c [] bus_probe_device+0x88/0xac [] deferred_probe_work_func+0x68/0x9c [] process_one_work+0x1a0/0x40c [] worker_thread+0x44/0x4d8 [] kthread+0xd8/0xec [] ret_from_fork+0x14/0x2c -> #3 (&dev->struct_mutex){+.+.+.}: [] drm_gem_mmap+0x38/0xd0 [] msm_gem_mmap+0xc/0x5c [] mmap_region+0x35c/0x6c8 [] do_mmap_pgoff+0x314/0x398 [] vm_mmap_pgoff+0x84/0xb4 [] SyS_mmap_pgoff+0x94/0xbc [] ret_fast_syscall+0x0/0x48 -> #2 (&mm->mmap_sem){++++++}: [] filldir64+0x68/0x180 [] dcache_readdir+0x188/0x22c [] iterate_dir+0x9c/0x11c [] SyS_getdents64+0x78/0xe8 [] ret_fast_syscall+0x0/0x48 -> #1 (&sb->s_type->i_mutex_key#3){+.+.+.}: [] __create_file+0x58/0x1dc [] debugfs_create_dir+0x1c/0x24 [] clk_debug_create_subtree+0x20/0x170 [] clk_debug_init+0xec/0x14c [] do_one_initcall+0x8c/0x1c8 [] kernel_init_freeable+0x13c/0x1dc [] kernel_init+0x8/0xe8 [] ret_from_fork+0x14/0x2c -> #0 (prepare_lock){+.+.+.}: [] mutex_lock_nested+0x70/0x3e8 [] clk_prepare_lock+0x88/0xfc [] clk_prepare+0xc/0x24 [] __enable_clocks.isra.4+0x18/0xa4 [] __flush_iotlb_va+0xe0/0x114 [] qcom_iommu_unmap+0xac/0x1f0 [] iommu_unmap+0x9c/0xe8 [] msm_iommu_unmap+0x64/0x84 [] msm_gem_free_object+0x11c/0x338 [] drm_gem_object_handle_unreference_unlocked+0xfc/0x130 [] drm_gem_object_release_handle+0x50/0x68 [] idr_for_each+0xa8/0xdc [] drm_gem_release+0x1c/0x28 [] drm_release+0x370/0x428 [] __fput+0x98/0x1e8 [] task_work_run+0xb0/0xfc [] do_exit+0x2ec/0x948 [] do_group_exit+0x4c/0xb8 [] get_signal+0x28c/0x6ac [] do_signal+0xc4/0x3e4 [] do_work_pending+0xb4/0xc4 [] work_pending+0xc/0x20 other info that might help us debug this: Chain exists of: prepare_lock --> &dev->struct_mutex --> qcom_iommu_lock Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(qcom_iommu_lock); lock(&dev->struct_mutex); lock(qcom_iommu_lock); lock(prepare_lock); *** DEADLOCK *** 3 locks held by Xorg.bin/5413: #0: (drm_global_mutex){+.+.+.}, at: [] drm_release+0x34/0x428 #1: (&dev->struct_mutex){+.+.+.}, at: [] drm_gem_object_handle_unreference_unlocked+0xcc/0x130 #2: (qcom_iommu_lock){+.+...}, at: [] qcom_iommu_unmap+0x1c/0x1f0 stack backtrace: CPU: 1 PID: 5413 Comm: Xorg.bin Tainted: G W 3.17.0-rc1-00050-g07a489b #802 [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (dump_stack+0x98/0xb8) [] (dump_stack) from [] (print_circular_bug+0x218/0x340) [] (print_circular_bug) from [] (__lock_acquire+0x1d24/0x20b8) [] (__lock_acquire) from [] (lock_acquire+0x9c/0xbc) [] (lock_acquire) from [] (mutex_lock_nested+0x70/0x3e8) [] (mutex_lock_nested) from [] (clk_prepare_lock+0x88/0xfc) [] (clk_prepare_lock) from [] (clk_prepare+0xc/0x24) [] (clk_prepare) from [] (__enable_clocks.isra.4+0x18/0xa4) [] (__enable_clocks.isra.4) from [] (__flush_iotlb_va+0xe0/0x114) [] (__flush_iotlb_va) from [] (qcom_iommu_unmap+0xac/0x1f0) [] (qcom_iommu_unmap) from [] (iommu_unmap+0x9c/0xe8) [] (iommu_unmap) from [] (msm_iommu_unmap+0x64/0x84) [] (msm_iommu_unmap) from [] (msm_gem_free_object+0x11c/0x338) [] (msm_gem_free_object) from [] (drm_gem_object_handle_unreference_unlocked+0xfc/0x130) [] (drm_gem_object_handle_unreference_unlocked) from [] (drm_gem_object_release_handle+0x50/0x68) [] (drm_gem_object_release_handle) from [] (idr_for_each+0xa8/0xdc) [] (idr_for_each) from [] (drm_gem_release+0x1c/0x28) [] (drm_gem_release) from [] (drm_release+0x370/0x428) [] (drm_release) from [] (__fput+0x98/0x1e8) [] (__fput) from [] (task_work_run+0xb0/0xfc) [] (task_work_run) from [] (do_exit+0x2ec/0x948) [] (do_exit) from [] (do_group_exit+0x4c/0xb8) [] (do_group_exit) from [] (get_signal+0x28c/0x6ac) [] (get_signal) from [] (do_signal+0xc4/0x3e4) [] (do_signal) from [] (do_work_pending+0xb4/0xc4) [] (do_work_pending) from [] (work_pending+0xc/0x20) We can break this chain if we don't hold the prepare_lock while creating debugfs directories. We only hold the prepare_lock right now because we're traversing the clock tree recursively and we don't want the hierarchy to change during the traversal. Replacing this traversal with a simple linked list walk allows us to only grab a list lock instead of the prepare_lock, thus breaking the lock chain. Signed-off-by: Stephen Boyd Signed-off-by: Mike Turquette --- drivers/clk/clk.c | 73 +++++++++++++++++---------------------------- include/linux/clk-private.h | 1 + 2 files changed, 28 insertions(+), 46 deletions(-) (limited to 'include/linux') diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index b76fa69b44cb..8ca28189e4e9 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -100,6 +100,8 @@ static void clk_enable_unlock(unsigned long flags) static struct dentry *rootdir; static int inited = 0; +static DEFINE_MUTEX(clk_debug_lock); +static HLIST_HEAD(clk_debug_list); static struct hlist_head *all_lists[] = { &clk_root_list, @@ -300,28 +302,6 @@ out: return ret; } -/* caller must hold prepare_lock */ -static int clk_debug_create_subtree(struct clk *clk, struct dentry *pdentry) -{ - struct clk *child; - int ret = -EINVAL;; - - if (!clk || !pdentry) - goto out; - - ret = clk_debug_create_one(clk, pdentry); - - if (ret) - goto out; - - hlist_for_each_entry(child, &clk->children, child_node) - clk_debug_create_subtree(child, pdentry); - - ret = 0; -out: - return ret; -} - /** * clk_debug_register - add a clk node to the debugfs clk tree * @clk: the clk being added to the debugfs clk tree @@ -329,20 +309,21 @@ out: * Dynamically adds a clk to the debugfs clk tree if debugfs has been * initialized. Otherwise it bails out early since the debugfs clk tree * will be created lazily by clk_debug_init as part of a late_initcall. - * - * Caller must hold prepare_lock. Only clk_init calls this function (so - * far) so this is taken care. */ static int clk_debug_register(struct clk *clk) { int ret = 0; + mutex_lock(&clk_debug_lock); + hlist_add_head(&clk->debug_node, &clk_debug_list); + if (!inited) - goto out; + goto unlock; - ret = clk_debug_create_subtree(clk, rootdir); + ret = clk_debug_create_one(clk, rootdir); +unlock: + mutex_unlock(&clk_debug_lock); -out: return ret; } @@ -353,12 +334,18 @@ out: * Dynamically removes a clk and all it's children clk nodes from the * debugfs clk tree if clk->dentry points to debugfs created by * clk_debug_register in __clk_init. - * - * Caller must hold prepare_lock. */ static void clk_debug_unregister(struct clk *clk) { + mutex_lock(&clk_debug_lock); + if (!clk->dentry) + goto out; + + hlist_del_init(&clk->debug_node); debugfs_remove_recursive(clk->dentry); + clk->dentry = NULL; +out: + mutex_unlock(&clk_debug_lock); } struct dentry *clk_debugfs_add_file(struct clk *clk, char *name, umode_t mode, @@ -415,17 +402,12 @@ static int __init clk_debug_init(void) if (!d) return -ENOMEM; - clk_prepare_lock(); - - hlist_for_each_entry(clk, &clk_root_list, child_node) - clk_debug_create_subtree(clk, rootdir); - - hlist_for_each_entry(clk, &clk_orphan_list, child_node) - clk_debug_create_subtree(clk, rootdir); + mutex_lock(&clk_debug_lock); + hlist_for_each_entry(clk, &clk_debug_list, debug_node) + clk_debug_create_one(clk, rootdir); inited = 1; - - clk_prepare_unlock(); + mutex_unlock(&clk_debug_lock); return 0; } @@ -2087,14 +2069,16 @@ void clk_unregister(struct clk *clk) { unsigned long flags; - if (!clk || WARN_ON_ONCE(IS_ERR(clk))) - return; + if (!clk || WARN_ON_ONCE(IS_ERR(clk))) + return; + + clk_debug_unregister(clk); clk_prepare_lock(); if (clk->ops == &clk_nodrv_ops) { pr_err("%s: unregistered clock: %s\n", __func__, clk->name); - goto out; + return; } /* * Assign empty clock ops for consumers that might still hold @@ -2113,16 +2097,13 @@ void clk_unregister(struct clk *clk) clk_set_parent(child, NULL); } - clk_debug_unregister(clk); - hlist_del_init(&clk->child_node); if (clk->prepare_count) pr_warn("%s: unregistering prepared clock: %s\n", __func__, clk->name); - kref_put(&clk->ref, __clk_release); -out: + clk_prepare_unlock(); } EXPORT_SYMBOL_GPL(clk_unregister); diff --git a/include/linux/clk-private.h b/include/linux/clk-private.h index efbf70b9fd84..4ed34105c371 100644 --- a/include/linux/clk-private.h +++ b/include/linux/clk-private.h @@ -48,6 +48,7 @@ struct clk { unsigned long accuracy; struct hlist_head children; struct hlist_node child_node; + struct hlist_node debug_node; unsigned int notifier_count; #ifdef CONFIG_DEBUG_FS struct dentry *dentry; -- cgit v1.2.3 From 09e05c3f78e9e82bda5958eb95bbf719f7a0ed6b Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Wed, 10 Sep 2014 16:41:56 +0300 Subject: net/mlx4: Set vlan stripping policy by the right command Changing the vlan stripping policy of the QP isn't supported by older firmware versions for the INIT2RTR command. Nevertheless, we've used it. Fix that by doing this policy change using INIT2RTR only if the firmware supports it, otherwise, we call UPDATE_QP command to do the task. Fixes: 7677fc9 ('net/mlx4: Strengthen VLAN tags/priorities enforcement in VST mode') Signed-off-by: Matan Barak Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx4/main.c | 2 +- drivers/net/ethernet/mellanox/mlx4/qp.c | 12 ++++++++++-- drivers/net/ethernet/mellanox/mlx4/resource_tracker.c | 18 ++++++++++++++++-- include/linux/mlx4/device.h | 1 + include/linux/mlx4/qp.h | 12 ++++++++++-- 5 files changed, 38 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index af8256353c7d..162b82c1dde4 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1680,7 +1680,7 @@ static void mlx4_ib_update_qps(struct mlx4_ib_dev *ibdev, goto unlock; update_params.smac_index = new_smac_index; - if (mlx4_update_qp(ibdev->dev, &qp->mqp, MLX4_UPDATE_QP_SMAC, + if (mlx4_update_qp(ibdev->dev, qp->mqp.qpn, MLX4_UPDATE_QP_SMAC, &update_params)) { release_mac = new_smac; goto unlock; diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c index 0dc31d85fc3b..2301365c79c7 100644 --- a/drivers/net/ethernet/mellanox/mlx4/qp.c +++ b/drivers/net/ethernet/mellanox/mlx4/qp.c @@ -390,13 +390,14 @@ err_icm: EXPORT_SYMBOL_GPL(mlx4_qp_alloc); #define MLX4_UPDATE_QP_SUPPORTED_ATTRS MLX4_UPDATE_QP_SMAC -int mlx4_update_qp(struct mlx4_dev *dev, struct mlx4_qp *qp, +int mlx4_update_qp(struct mlx4_dev *dev, u32 qpn, enum mlx4_update_qp_attr attr, struct mlx4_update_qp_params *params) { struct mlx4_cmd_mailbox *mailbox; struct mlx4_update_qp_context *cmd; u64 pri_addr_path_mask = 0; + u64 qp_mask = 0; int err = 0; mailbox = mlx4_alloc_cmd_mailbox(dev); @@ -413,9 +414,16 @@ int mlx4_update_qp(struct mlx4_dev *dev, struct mlx4_qp *qp, cmd->qp_context.pri_path.grh_mylmc = params->smac_index; } + if (attr & MLX4_UPDATE_QP_VSD) { + qp_mask |= 1ULL << MLX4_UPD_QP_MASK_VSD; + if (params->flags & MLX4_UPDATE_QP_PARAMS_FLAGS_VSD_ENABLE) + cmd->qp_context.param3 |= cpu_to_be32(MLX4_STRIP_VLAN); + } + cmd->primary_addr_path_mask = cpu_to_be64(pri_addr_path_mask); + cmd->qp_mask = cpu_to_be64(qp_mask); - err = mlx4_cmd(dev, mailbox->dma, qp->qpn & 0xffffff, 0, + err = mlx4_cmd(dev, mailbox->dma, qpn & 0xffffff, 0, MLX4_CMD_UPDATE_QP, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 2fe61b6a8e37..5d2498dcf536 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -702,11 +702,13 @@ static int update_vport_qp_param(struct mlx4_dev *dev, struct mlx4_qp_context *qpc = inbox->buf + 8; struct mlx4_vport_oper_state *vp_oper; struct mlx4_priv *priv; + u32 qp_type; int port; port = (qpc->pri_path.sched_queue & 0x40) ? 2 : 1; priv = mlx4_priv(dev); vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; + qp_type = (be32_to_cpu(qpc->flags) >> 16) & 0xff; if (MLX4_VGT != vp_oper->state.default_vlan) { /* the reserved QPs (special, proxy, tunnel) @@ -715,8 +717,20 @@ static int update_vport_qp_param(struct mlx4_dev *dev, if (mlx4_is_qp_reserved(dev, qpn)) return 0; - /* force strip vlan by clear vsd */ - qpc->param3 &= ~cpu_to_be32(MLX4_STRIP_VLAN); + /* force strip vlan by clear vsd, MLX QP refers to Raw Ethernet */ + if (qp_type == MLX4_QP_ST_UD || + (qp_type == MLX4_QP_ST_MLX && mlx4_is_eth(dev, port))) { + if (dev->caps.bmme_flags & MLX4_BMME_FLAG_VSD_INIT2RTR) { + *(__be32 *)inbox->buf = + cpu_to_be32(be32_to_cpu(*(__be32 *)inbox->buf) | + MLX4_QP_OPTPAR_VLAN_STRIPPING); + qpc->param3 &= ~cpu_to_be32(MLX4_STRIP_VLAN); + } else { + struct mlx4_update_qp_params params = {.flags = 0}; + + mlx4_update_qp(dev, qpn, MLX4_UPDATE_QP_VSD, ¶ms); + } + } if (vp_oper->state.link_state == IFLA_VF_LINK_STATE_DISABLE && dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_UPDATE_QP) { diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 511c6e0d21a9..a5b7d7cfcedf 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -209,6 +209,7 @@ enum { MLX4_BMME_FLAG_TYPE_2_WIN = 1 << 9, MLX4_BMME_FLAG_RESERVED_LKEY = 1 << 10, MLX4_BMME_FLAG_FAST_REG_WR = 1 << 11, + MLX4_BMME_FLAG_VSD_INIT2RTR = 1 << 28, }; enum mlx4_event { diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index 7040dc98ff8b..5f4e36cf0091 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -56,7 +56,8 @@ enum mlx4_qp_optpar { MLX4_QP_OPTPAR_RNR_RETRY = 1 << 13, MLX4_QP_OPTPAR_ACK_TIMEOUT = 1 << 14, MLX4_QP_OPTPAR_SCHED_QUEUE = 1 << 16, - MLX4_QP_OPTPAR_COUNTER_INDEX = 1 << 20 + MLX4_QP_OPTPAR_COUNTER_INDEX = 1 << 20, + MLX4_QP_OPTPAR_VLAN_STRIPPING = 1 << 21, }; enum mlx4_qp_state { @@ -423,13 +424,20 @@ struct mlx4_wqe_inline_seg { enum mlx4_update_qp_attr { MLX4_UPDATE_QP_SMAC = 1 << 0, + MLX4_UPDATE_QP_VSD = 1 << 2, + MLX4_UPDATE_QP_SUPPORTED_ATTRS = (1 << 2) - 1 +}; + +enum mlx4_update_qp_params_flags { + MLX4_UPDATE_QP_PARAMS_FLAGS_VSD_ENABLE = 1 << 0, }; struct mlx4_update_qp_params { u8 smac_index; + u32 flags; }; -int mlx4_update_qp(struct mlx4_dev *dev, struct mlx4_qp *qp, +int mlx4_update_qp(struct mlx4_dev *dev, u32 qpn, enum mlx4_update_qp_attr attr, struct mlx4_update_qp_params *params); int mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt, -- cgit v1.2.3 From 184c3fc3f52fb75800deb76deffb70907d1f76ea Mon Sep 17 00:00:00 2001 From: Mark Rustad Date: Thu, 11 Sep 2014 09:47:23 +0930 Subject: moduleparam: Resolve missing-field-initializer warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Resolve a missing-field-initializer warning, that is produced by every reference to module_param_call, by using designated initialization for the first field. That is enough to silence the complaint. The message is only seen when doing a W=2 build. I happened to be using gcc 4.8.3, but I think most versions would produce the warning when it is enabled. It can either be silenced by using even a single designated initializer as I did here, or providing values for all of the fields. Because of the number of references to the macro, this change silences many warnings in W=2 builds. One instance of the full warning message looks like this: /home/share/git/nn-mdr/include/linux/moduleparam.h:198:16: warning: missing initializer for field ‘free’ of ‘struct kernel_param_ops’ [-Wmissing-field-initializers] static struct kernel_param_ops __param_ops_##name = \ ^ /home/share/git/nn-mdr/fs/fuse/inode.c:35:1: note: in expansion of macro ‘module_param_call’ module_param_call(max_user_bgreq, set_global_limit, param_get_uint, ^ /home/share/git/nn-mdr/include/linux/moduleparam.h:56:9: note: ‘free’ declared here void (*free)(void *arg); Signed-off-by: Mark Rustad Signed-off-by: Jeff Kirsher Signed-off-by: Rusty Russell --- include/linux/moduleparam.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 593501996574..b43f4752304e 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -224,7 +224,7 @@ struct kparam_array /* Obsolete - use module_param_cb() */ #define module_param_call(name, set, get, arg, perm) \ static struct kernel_param_ops __param_ops_##name = \ - { 0, (void *)set, (void *)get }; \ + { .flags = 0, (void *)set, (void *)get }; \ __module_param_call(MODULE_PARAM_PREFIX, \ name, &__param_ops_##name, arg, \ (perm) + sizeof(__check_old_set_param(set))*0, -1, 0) -- cgit v1.2.3 From 3d598f47e804a77208c6bb0a454123018e2f2281 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 19 Aug 2014 20:29:12 +0300 Subject: dmaengine: dw: move dw_dmac.h to where it belongs to There is a common storage for platform data related structures and definitions inside kernel source tree. The patch moves file from include/linux to include/linux/platform_data and renames it acoordingly. The users are also updated. Signed-off-by: Andy Shevchenko Acked-by: Viresh Kumar [For the arch/avr32/.* and .*sound/atmel.*] Acked-by: Hans-Christian Egtvedt Signed-off-by: Vinod Koul --- MAINTAINERS | 2 +- arch/avr32/mach-at32ap/at32ap700x.c | 2 +- arch/avr32/mach-at32ap/include/mach/atmel-mci.h | 2 +- drivers/dma/dw/internal.h | 2 +- drivers/dma/dw/regs.h | 6 +- include/linux/dw_dmac.h | 111 ------------------------ include/linux/platform_data/dma-dw.h | 111 ++++++++++++++++++++++++ include/sound/atmel-abdac.h | 2 +- include/sound/atmel-ac97c.h | 2 +- sound/atmel/abdac.c | 2 +- sound/atmel/ac97c.c | 2 +- 11 files changed, 122 insertions(+), 122 deletions(-) delete mode 100644 include/linux/dw_dmac.h create mode 100644 include/linux/platform_data/dma-dw.h (limited to 'include/linux') diff --git a/MAINTAINERS b/MAINTAINERS index aefa94841ff3..a10072963889 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7875,7 +7875,7 @@ SYNOPSYS DESIGNWARE DMAC DRIVER M: Viresh Kumar M: Andy Shevchenko S: Maintained -F: include/linux/dw_dmac.h +F: include/linux/platform_data/dma-dw.h F: drivers/dma/dw/ SYNOPSYS DESIGNWARE MMC/SD/SDIO DRIVER diff --git a/arch/avr32/mach-at32ap/at32ap700x.c b/arch/avr32/mach-at32ap/at32ap700x.c index db85b5ec3351..2a1aa712c6e7 100644 --- a/arch/avr32/mach-at32ap/at32ap700x.c +++ b/arch/avr32/mach-at32ap/at32ap700x.c @@ -7,7 +7,7 @@ */ #include #include -#include +#include #include #include #include diff --git a/arch/avr32/mach-at32ap/include/mach/atmel-mci.h b/arch/avr32/mach-at32ap/include/mach/atmel-mci.h index 4bba58561d5c..11d7f4b28dc8 100644 --- a/arch/avr32/mach-at32ap/include/mach/atmel-mci.h +++ b/arch/avr32/mach-at32ap/include/mach/atmel-mci.h @@ -1,7 +1,7 @@ #ifndef __MACH_ATMEL_MCI_H #define __MACH_ATMEL_MCI_H -#include +#include /** * struct mci_dma_data - DMA data for MCI interface diff --git a/drivers/dma/dw/internal.h b/drivers/dma/dw/internal.h index 32667f9e0dda..43cc1dfad5c9 100644 --- a/drivers/dma/dw/internal.h +++ b/drivers/dma/dw/internal.h @@ -12,7 +12,7 @@ #define _DW_DMAC_INTERNAL_H #include -#include +#include #include "regs.h" diff --git a/drivers/dma/dw/regs.h b/drivers/dma/dw/regs.h index bb98d3e91e8b..af02439155e9 100644 --- a/drivers/dma/dw/regs.h +++ b/drivers/dma/dw/regs.h @@ -11,7 +11,7 @@ #include #include -#include +#include #define DW_DMA_MAX_NR_CHANNELS 8 #define DW_DMA_MAX_NR_REQUESTS 16 @@ -161,7 +161,7 @@ struct dw_dma_regs { #define DWC_CTLH_DONE 0x00001000 #define DWC_CTLH_BLOCK_TS_MASK 0x00000fff -/* Bitfields in CFG_LO. Platform-configurable bits are in */ +/* Bitfields in CFG_LO. Platform-configurable bits are in */ #define DWC_CFGL_CH_PRIOR_MASK (0x7 << 5) /* priority mask */ #define DWC_CFGL_CH_PRIOR(x) ((x) << 5) /* priority */ #define DWC_CFGL_CH_SUSP (1 << 8) /* pause xfer */ @@ -172,7 +172,7 @@ struct dw_dma_regs { #define DWC_CFGL_RELOAD_SAR (1 << 30) #define DWC_CFGL_RELOAD_DAR (1 << 31) -/* Bitfields in CFG_HI. Platform-configurable bits are in */ +/* Bitfields in CFG_HI. Platform-configurable bits are in */ #define DWC_CFGH_DS_UPD_EN (1 << 5) #define DWC_CFGH_SS_UPD_EN (1 << 6) diff --git a/include/linux/dw_dmac.h b/include/linux/dw_dmac.h deleted file mode 100644 index 68b4024184de..000000000000 --- a/include/linux/dw_dmac.h +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Driver for the Synopsys DesignWare DMA Controller - * - * Copyright (C) 2007 Atmel Corporation - * Copyright (C) 2010-2011 ST Microelectronics - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#ifndef DW_DMAC_H -#define DW_DMAC_H - -#include - -/** - * struct dw_dma_slave - Controller-specific information about a slave - * - * @dma_dev: required DMA master device. Depricated. - * @bus_id: name of this device channel, not just a device name since - * devices may have more than one channel e.g. "foo_tx" - * @cfg_hi: Platform-specific initializer for the CFG_HI register - * @cfg_lo: Platform-specific initializer for the CFG_LO register - * @src_master: src master for transfers on allocated channel. - * @dst_master: dest master for transfers on allocated channel. - */ -struct dw_dma_slave { - struct device *dma_dev; - u32 cfg_hi; - u32 cfg_lo; - u8 src_master; - u8 dst_master; -}; - -/** - * struct dw_dma_platform_data - Controller configuration parameters - * @nr_channels: Number of channels supported by hardware (max 8) - * @is_private: The device channels should be marked as private and not for - * by the general purpose DMA channel allocator. - * @chan_allocation_order: Allocate channels starting from 0 or 7 - * @chan_priority: Set channel priority increasing from 0 to 7 or 7 to 0. - * @block_size: Maximum block size supported by the controller - * @nr_masters: Number of AHB masters supported by the controller - * @data_width: Maximum data width supported by hardware per AHB master - * (0 - 8bits, 1 - 16bits, ..., 5 - 256bits) - */ -struct dw_dma_platform_data { - unsigned int nr_channels; - bool is_private; -#define CHAN_ALLOCATION_ASCENDING 0 /* zero to seven */ -#define CHAN_ALLOCATION_DESCENDING 1 /* seven to zero */ - unsigned char chan_allocation_order; -#define CHAN_PRIORITY_ASCENDING 0 /* chan0 highest */ -#define CHAN_PRIORITY_DESCENDING 1 /* chan7 highest */ - unsigned char chan_priority; - unsigned short block_size; - unsigned char nr_masters; - unsigned char data_width[4]; -}; - -/* bursts size */ -enum dw_dma_msize { - DW_DMA_MSIZE_1, - DW_DMA_MSIZE_4, - DW_DMA_MSIZE_8, - DW_DMA_MSIZE_16, - DW_DMA_MSIZE_32, - DW_DMA_MSIZE_64, - DW_DMA_MSIZE_128, - DW_DMA_MSIZE_256, -}; - -/* Platform-configurable bits in CFG_HI */ -#define DWC_CFGH_FCMODE (1 << 0) -#define DWC_CFGH_FIFO_MODE (1 << 1) -#define DWC_CFGH_PROTCTL(x) ((x) << 2) -#define DWC_CFGH_SRC_PER(x) ((x) << 7) -#define DWC_CFGH_DST_PER(x) ((x) << 11) - -/* Platform-configurable bits in CFG_LO */ -#define DWC_CFGL_LOCK_CH_XFER (0 << 12) /* scope of LOCK_CH */ -#define DWC_CFGL_LOCK_CH_BLOCK (1 << 12) -#define DWC_CFGL_LOCK_CH_XACT (2 << 12) -#define DWC_CFGL_LOCK_BUS_XFER (0 << 14) /* scope of LOCK_BUS */ -#define DWC_CFGL_LOCK_BUS_BLOCK (1 << 14) -#define DWC_CFGL_LOCK_BUS_XACT (2 << 14) -#define DWC_CFGL_LOCK_CH (1 << 15) /* channel lockout */ -#define DWC_CFGL_LOCK_BUS (1 << 16) /* busmaster lockout */ -#define DWC_CFGL_HS_DST_POL (1 << 18) /* dst handshake active low */ -#define DWC_CFGL_HS_SRC_POL (1 << 19) /* src handshake active low */ - -/* DMA API extensions */ -struct dw_cyclic_desc { - struct dw_desc **desc; - unsigned long periods; - void (*period_callback)(void *param); - void *period_callback_param; -}; - -struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan, - dma_addr_t buf_addr, size_t buf_len, size_t period_len, - enum dma_transfer_direction direction); -void dw_dma_cyclic_free(struct dma_chan *chan); -int dw_dma_cyclic_start(struct dma_chan *chan); -void dw_dma_cyclic_stop(struct dma_chan *chan); - -dma_addr_t dw_dma_get_src_addr(struct dma_chan *chan); - -dma_addr_t dw_dma_get_dst_addr(struct dma_chan *chan); - -#endif /* DW_DMAC_H */ diff --git a/include/linux/platform_data/dma-dw.h b/include/linux/platform_data/dma-dw.h new file mode 100644 index 000000000000..68b4024184de --- /dev/null +++ b/include/linux/platform_data/dma-dw.h @@ -0,0 +1,111 @@ +/* + * Driver for the Synopsys DesignWare DMA Controller + * + * Copyright (C) 2007 Atmel Corporation + * Copyright (C) 2010-2011 ST Microelectronics + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef DW_DMAC_H +#define DW_DMAC_H + +#include + +/** + * struct dw_dma_slave - Controller-specific information about a slave + * + * @dma_dev: required DMA master device. Depricated. + * @bus_id: name of this device channel, not just a device name since + * devices may have more than one channel e.g. "foo_tx" + * @cfg_hi: Platform-specific initializer for the CFG_HI register + * @cfg_lo: Platform-specific initializer for the CFG_LO register + * @src_master: src master for transfers on allocated channel. + * @dst_master: dest master for transfers on allocated channel. + */ +struct dw_dma_slave { + struct device *dma_dev; + u32 cfg_hi; + u32 cfg_lo; + u8 src_master; + u8 dst_master; +}; + +/** + * struct dw_dma_platform_data - Controller configuration parameters + * @nr_channels: Number of channels supported by hardware (max 8) + * @is_private: The device channels should be marked as private and not for + * by the general purpose DMA channel allocator. + * @chan_allocation_order: Allocate channels starting from 0 or 7 + * @chan_priority: Set channel priority increasing from 0 to 7 or 7 to 0. + * @block_size: Maximum block size supported by the controller + * @nr_masters: Number of AHB masters supported by the controller + * @data_width: Maximum data width supported by hardware per AHB master + * (0 - 8bits, 1 - 16bits, ..., 5 - 256bits) + */ +struct dw_dma_platform_data { + unsigned int nr_channels; + bool is_private; +#define CHAN_ALLOCATION_ASCENDING 0 /* zero to seven */ +#define CHAN_ALLOCATION_DESCENDING 1 /* seven to zero */ + unsigned char chan_allocation_order; +#define CHAN_PRIORITY_ASCENDING 0 /* chan0 highest */ +#define CHAN_PRIORITY_DESCENDING 1 /* chan7 highest */ + unsigned char chan_priority; + unsigned short block_size; + unsigned char nr_masters; + unsigned char data_width[4]; +}; + +/* bursts size */ +enum dw_dma_msize { + DW_DMA_MSIZE_1, + DW_DMA_MSIZE_4, + DW_DMA_MSIZE_8, + DW_DMA_MSIZE_16, + DW_DMA_MSIZE_32, + DW_DMA_MSIZE_64, + DW_DMA_MSIZE_128, + DW_DMA_MSIZE_256, +}; + +/* Platform-configurable bits in CFG_HI */ +#define DWC_CFGH_FCMODE (1 << 0) +#define DWC_CFGH_FIFO_MODE (1 << 1) +#define DWC_CFGH_PROTCTL(x) ((x) << 2) +#define DWC_CFGH_SRC_PER(x) ((x) << 7) +#define DWC_CFGH_DST_PER(x) ((x) << 11) + +/* Platform-configurable bits in CFG_LO */ +#define DWC_CFGL_LOCK_CH_XFER (0 << 12) /* scope of LOCK_CH */ +#define DWC_CFGL_LOCK_CH_BLOCK (1 << 12) +#define DWC_CFGL_LOCK_CH_XACT (2 << 12) +#define DWC_CFGL_LOCK_BUS_XFER (0 << 14) /* scope of LOCK_BUS */ +#define DWC_CFGL_LOCK_BUS_BLOCK (1 << 14) +#define DWC_CFGL_LOCK_BUS_XACT (2 << 14) +#define DWC_CFGL_LOCK_CH (1 << 15) /* channel lockout */ +#define DWC_CFGL_LOCK_BUS (1 << 16) /* busmaster lockout */ +#define DWC_CFGL_HS_DST_POL (1 << 18) /* dst handshake active low */ +#define DWC_CFGL_HS_SRC_POL (1 << 19) /* src handshake active low */ + +/* DMA API extensions */ +struct dw_cyclic_desc { + struct dw_desc **desc; + unsigned long periods; + void (*period_callback)(void *param); + void *period_callback_param; +}; + +struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan, + dma_addr_t buf_addr, size_t buf_len, size_t period_len, + enum dma_transfer_direction direction); +void dw_dma_cyclic_free(struct dma_chan *chan); +int dw_dma_cyclic_start(struct dma_chan *chan); +void dw_dma_cyclic_stop(struct dma_chan *chan); + +dma_addr_t dw_dma_get_src_addr(struct dma_chan *chan); + +dma_addr_t dw_dma_get_dst_addr(struct dma_chan *chan); + +#endif /* DW_DMAC_H */ diff --git a/include/sound/atmel-abdac.h b/include/sound/atmel-abdac.h index edff6a8ba1b5..a8f735d677fa 100644 --- a/include/sound/atmel-abdac.h +++ b/include/sound/atmel-abdac.h @@ -10,7 +10,7 @@ #ifndef __INCLUDE_SOUND_ATMEL_ABDAC_H #define __INCLUDE_SOUND_ATMEL_ABDAC_H -#include +#include /** * struct atmel_abdac_pdata - board specific ABDAC configuration diff --git a/include/sound/atmel-ac97c.h b/include/sound/atmel-ac97c.h index 00e6c289a936..f2a1cdc37661 100644 --- a/include/sound/atmel-ac97c.h +++ b/include/sound/atmel-ac97c.h @@ -10,7 +10,7 @@ #ifndef __INCLUDE_SOUND_ATMEL_AC97C_H #define __INCLUDE_SOUND_ATMEL_AC97C_H -#include +#include #define AC97C_CAPTURE 0x01 #define AC97C_PLAYBACK 0x02 diff --git a/sound/atmel/abdac.c b/sound/atmel/abdac.c index edf2ca72d518..154a7c44e38d 100644 --- a/sound/atmel/abdac.c +++ b/sound/atmel/abdac.c @@ -9,7 +9,7 @@ */ #include #include -#include +#include #include #include #include diff --git a/sound/atmel/ac97c.c b/sound/atmel/ac97c.c index a04d23174dc2..1dfb35afef8f 100644 --- a/sound/atmel/ac97c.c +++ b/sound/atmel/ac97c.c @@ -31,7 +31,7 @@ #include #include -#include +#include #include -- cgit v1.2.3 From 7e1e2f27c5508518e58e5cbb11e26cbb815f4c56 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 19 Aug 2014 20:29:14 +0300 Subject: dmaengine: dw: convert dw_dma_slave to use explicit HS interfaces Instead of exposing the possibility to set DMA registers CFG_HI and CFG_LO strict user to provide handshake interfaces explicitly. Signed-off-by: Andy Shevchenko Acked-by: Hans-Christian Egtvedt Signed-off-by: Vinod Koul --- arch/avr32/mach-at32ap/at32ap700x.c | 15 +++++---------- drivers/dma/dw/core.c | 4 ++-- include/linux/platform_data/dma-dw.h | 10 ++++------ 3 files changed, 11 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/arch/avr32/mach-at32ap/at32ap700x.c b/arch/avr32/mach-at32ap/at32ap700x.c index ec7be287a97e..37b75602adf6 100644 --- a/arch/avr32/mach-at32ap/at32ap700x.c +++ b/arch/avr32/mach-at32ap/at32ap700x.c @@ -1356,10 +1356,8 @@ at32_add_device_mci(unsigned int id, struct mci_platform_data *data) goto fail; slave->sdata.dma_dev = &dw_dmac0_device.dev; - slave->sdata.cfg_hi = (DWC_CFGH_SRC_PER(0) - | DWC_CFGH_DST_PER(1)); - slave->sdata.cfg_lo &= ~(DWC_CFGL_HS_DST_POL - | DWC_CFGL_HS_SRC_POL); + slave->sdata.src_id = 0; + slave->sdata.dst_id = 1; slave->sdata.src_master = 1; slave->sdata.dst_master = 0; @@ -2054,8 +2052,7 @@ at32_add_device_ac97c(unsigned int id, struct ac97c_platform_data *data, /* Check if DMA slave interface for capture should be configured. */ if (flags & AC97C_CAPTURE) { rx_dws->dma_dev = &dw_dmac0_device.dev; - rx_dws->cfg_hi = DWC_CFGH_SRC_PER(3); - rx_dws->cfg_lo &= ~(DWC_CFGL_HS_DST_POL | DWC_CFGL_HS_SRC_POL); + rx_dws->src_id = 3; rx_dws->src_master = 0; rx_dws->dst_master = 1; } @@ -2063,8 +2060,7 @@ at32_add_device_ac97c(unsigned int id, struct ac97c_platform_data *data, /* Check if DMA slave interface for playback should be configured. */ if (flags & AC97C_PLAYBACK) { tx_dws->dma_dev = &dw_dmac0_device.dev; - tx_dws->cfg_hi = DWC_CFGH_DST_PER(4); - tx_dws->cfg_lo &= ~(DWC_CFGL_HS_DST_POL | DWC_CFGL_HS_SRC_POL); + tx_dws->dst_id = 4; tx_dws->src_master = 0; tx_dws->dst_master = 1; } @@ -2136,8 +2132,7 @@ at32_add_device_abdac(unsigned int id, struct atmel_abdac_pdata *data) dws = &data->dws; dws->dma_dev = &dw_dmac0_device.dev; - dws->cfg_hi = DWC_CFGH_DST_PER(2); - dws->cfg_lo &= ~(DWC_CFGL_HS_DST_POL | DWC_CFGL_HS_SRC_POL); + dws->dst_id = 2; dws->src_master = 0; dws->dst_master = 1; diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c index 1af731b83b3f..0a9c052d437c 100644 --- a/drivers/dma/dw/core.c +++ b/drivers/dma/dw/core.c @@ -155,8 +155,8 @@ static void dwc_initialize(struct dw_dma_chan *dwc) */ BUG_ON(!dws->dma_dev || dws->dma_dev != dw->dma.dev); - cfghi = dws->cfg_hi; - cfglo |= dws->cfg_lo & ~DWC_CFGL_CH_PRIOR_MASK; + cfghi |= DWC_CFGH_DST_PER(dws->dst_id); + cfghi |= DWC_CFGH_SRC_PER(dws->src_id); } else { if (dwc->direction == DMA_MEM_TO_DEV) cfghi = DWC_CFGH_DST_PER(dwc->request_line); diff --git a/include/linux/platform_data/dma-dw.h b/include/linux/platform_data/dma-dw.h index 68b4024184de..bc411a1bf8e7 100644 --- a/include/linux/platform_data/dma-dw.h +++ b/include/linux/platform_data/dma-dw.h @@ -17,17 +17,15 @@ * struct dw_dma_slave - Controller-specific information about a slave * * @dma_dev: required DMA master device. Depricated. - * @bus_id: name of this device channel, not just a device name since - * devices may have more than one channel e.g. "foo_tx" - * @cfg_hi: Platform-specific initializer for the CFG_HI register - * @cfg_lo: Platform-specific initializer for the CFG_LO register + * @src_id: src request line + * @dst_id: dst request line * @src_master: src master for transfers on allocated channel. * @dst_master: dest master for transfers on allocated channel. */ struct dw_dma_slave { struct device *dma_dev; - u32 cfg_hi; - u32 cfg_lo; + u8 src_id; + u8 dst_id; u8 src_master; u8 dst_master; }; -- cgit v1.2.3 From 960d01acf62747d6518694f92be5b06f67473833 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 9 Sep 2014 22:55:35 +0300 Subject: cfg80211: add WMM traffic stream API Add nl80211 and driver API to validate, add and delete traffic streams with appropriate settings. The API calls for userspace doing the action frame handshake with the peer, and then allows only to set up the parameters in the driver. To avoid setting up a session only to tear it down again, the validate API is provided, but the real usage later can still fail so userspace must be prepared for that. Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 4 ++ include/net/cfg80211.h | 23 ++++++++- include/uapi/linux/nl80211.h | 28 +++++++++++ net/wireless/nl80211.c | 109 +++++++++++++++++++++++++++++++++++++++++++ net/wireless/rdev-ops.h | 31 ++++++++++++ net/wireless/trace.h | 45 ++++++++++++++++++ 6 files changed, 239 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 61a09f0644aa..b1be39c76931 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -166,8 +166,12 @@ static inline u16 ieee80211_sn_sub(u16 sn1, u16 sn2) #define IEEE80211_MAX_MESH_ID_LEN 32 +#define IEEE80211_FIRST_TSPEC_TSID 8 #define IEEE80211_NUM_TIDS 16 +/* number of user priorities 802.11 uses */ +#define IEEE80211_NUM_UPS 8 + #define IEEE80211_QOS_CTL_LEN 2 /* 1d tag mask */ #define IEEE80211_QOS_CTL_TAG1D_MASK 0x0007 diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index c2c710c14a50..a13beab123dc 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2318,6 +2318,17 @@ struct cfg80211_qos_map { * @set_ap_chanwidth: Set the AP (including P2P GO) mode channel width for the * given interface This is used e.g. for dynamic HT 20/40 MHz channel width * changes during the lifetime of the BSS. + * + * @add_tx_ts: validate (if admitted_time is 0) or add a TX TS to the device + * with the given parameters; action frame exchange has been handled by + * userspace so this just has to modify the TX path to take the TS into + * account. + * If the admitted time is 0 just validate the parameters to make sure + * the session can be created at all; it is valid to just always return + * success for that but that may result in inefficient behaviour (handshake + * with the peer followed by immediate teardown when the addition is later + * rejected) + * @del_tx_ts: remove an existing TX TS */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); @@ -2558,6 +2569,12 @@ struct cfg80211_ops { int (*set_ap_chanwidth)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_chan_def *chandef); + + int (*add_tx_ts)(struct wiphy *wiphy, struct net_device *dev, + u8 tsid, const u8 *peer, u8 user_prio, + u16 admitted_time); + int (*del_tx_ts)(struct wiphy *wiphy, struct net_device *dev, + u8 tsid, const u8 *peer); }; /* @@ -2604,9 +2621,13 @@ struct cfg80211_ops { * @WIPHY_FLAG_SUPPORTS_5_10_MHZ: Device supports 5 MHz and 10 MHz channels. * @WIPHY_FLAG_HAS_CHANNEL_SWITCH: Device supports channel switch in * beaconing mode (AP, IBSS, Mesh, ...). + * @WIPHY_FLAG_SUPPORTS_WMM_ADMISSION: the device supports setting up WMM + * TSPEC sessions (TID aka TSID 0-7) with the NL80211_CMD_ADD_TX_TS + * command. Standard IEEE 802.11 TSPEC setup is not yet supported, it + * needs to be able to handle Block-Ack agreements and other things. */ enum wiphy_flags { - /* use hole at 0 */ + WIPHY_FLAG_SUPPORTS_WMM_ADMISSION = BIT(0), /* use hole at 1 */ /* use hole at 2 */ WIPHY_FLAG_NETNS_OK = BIT(3), diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 29c4399e08b9..e5b8caf5e466 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -722,6 +722,22 @@ * QoS mapping is relevant for IP packets, it is only valid during an * association. This is cleared on disassociation and AP restart. * + * @NL80211_CMD_ADD_TX_TS: Ask the kernel to add a traffic stream for the given + * %NL80211_ATTR_TSID and %NL80211_ATTR_MAC with %NL80211_ATTR_USER_PRIO + * and %NL80211_ATTR_ADMITTED_TIME parameters. + * Note that the action frame handshake with the AP shall be handled by + * userspace via the normal management RX/TX framework, this only sets + * up the TX TS in the driver/device. + * If the admitted time attribute is not added then the request just checks + * if a subsequent setup could be successful, the intent is to use this to + * avoid setting up a session with the AP when local restrictions would + * make that impossible. However, the subsequent "real" setup may still + * fail even if the check was successful. + * @NL80211_CMD_DEL_TX_TS: Remove an existing TS with the %NL80211_ATTR_TSID + * and %NL80211_ATTR_MAC parameters. It isn't necessary to call this + * before removing a station entry entirely, or before disassociating + * or similar, cleanup will happen in the driver/device in this case. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -893,6 +909,9 @@ enum nl80211_commands { NL80211_CMD_SET_QOS_MAP, + NL80211_CMD_ADD_TX_TS, + NL80211_CMD_DEL_TX_TS, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -1611,6 +1630,11 @@ enum nl80211_commands { * drivers to indicate dynack capability. Dynack is automatically disabled * setting valid value for coverage class. * + * @NL80211_ATTR_TSID: a TSID value (u8 attribute) + * @NL80211_ATTR_USER_PRIO: user priority value (u8 attribute) + * @NL80211_ATTR_ADMITTED_TIME: admitted time in units of 32 microseconds + * (per second) (u16 attribute) + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -1957,6 +1981,10 @@ enum nl80211_attrs { NL80211_ATTR_WIPHY_DYN_ACK, + NL80211_ATTR_TSID, + NL80211_ATTR_USER_PRIO, + NL80211_ATTR_ADMITTED_TIME, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index e9fbd4f4ddb0..ab7ee4893e40 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -391,6 +391,9 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_IFACE_SOCKET_OWNER] = { .type = NLA_FLAG }, [NL80211_ATTR_CSA_C_OFFSETS_TX] = { .type = NLA_BINARY }, [NL80211_ATTR_USE_RRM] = { .type = NLA_FLAG }, + [NL80211_ATTR_TSID] = { .type = NLA_U8 }, + [NL80211_ATTR_USER_PRIO] = { .type = NLA_U8 }, + [NL80211_ATTR_ADMITTED_TIME] = { .type = NLA_U16 }, }; /* policy for the key attributes */ @@ -1510,6 +1513,9 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, if (rdev->wiphy.flags & WIPHY_FLAG_HAS_CHANNEL_SWITCH) CMD(channel_switch, CHANNEL_SWITCH); CMD(set_qos_map, SET_QOS_MAP); + if (rdev->wiphy.flags & + WIPHY_FLAG_SUPPORTS_WMM_ADMISSION) + CMD(add_tx_ts, ADD_TX_TS); } /* add into the if now */ #undef CMD @@ -9390,6 +9396,93 @@ static int nl80211_set_qos_map(struct sk_buff *skb, return ret; } +static int nl80211_add_tx_ts(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = dev->ieee80211_ptr; + const u8 *peer; + u8 tsid, up; + u16 admitted_time = 0; + int err; + + if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_WMM_ADMISSION)) + return -EOPNOTSUPP; + + if (!info->attrs[NL80211_ATTR_TSID] || !info->attrs[NL80211_ATTR_MAC] || + !info->attrs[NL80211_ATTR_USER_PRIO]) + return -EINVAL; + + tsid = nla_get_u8(info->attrs[NL80211_ATTR_TSID]); + if (tsid >= IEEE80211_NUM_TIDS) + return -EINVAL; + + up = nla_get_u8(info->attrs[NL80211_ATTR_USER_PRIO]); + if (up >= IEEE80211_NUM_UPS) + return -EINVAL; + + /* WMM uses TIDs 0-7 even for TSPEC */ + if (tsid < IEEE80211_FIRST_TSPEC_TSID) { + if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_WMM_ADMISSION)) + return -EINVAL; + } else { + /* TODO: handle 802.11 TSPEC/admission control + * need more attributes for that (e.g. BA session requirement) + */ + return -EINVAL; + } + + peer = nla_data(info->attrs[NL80211_ATTR_MAC]); + + if (info->attrs[NL80211_ATTR_ADMITTED_TIME]) { + admitted_time = + nla_get_u16(info->attrs[NL80211_ATTR_ADMITTED_TIME]); + if (!admitted_time) + return -EINVAL; + } + + wdev_lock(wdev); + switch (wdev->iftype) { + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_P2P_CLIENT: + if (wdev->current_bss) + break; + err = -ENOTCONN; + goto out; + default: + err = -EOPNOTSUPP; + goto out; + } + + err = rdev_add_tx_ts(rdev, dev, tsid, peer, up, admitted_time); + + out: + wdev_unlock(wdev); + return err; +} + +static int nl80211_del_tx_ts(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = dev->ieee80211_ptr; + const u8 *peer; + u8 tsid; + int err; + + if (!info->attrs[NL80211_ATTR_TSID] || !info->attrs[NL80211_ATTR_MAC]) + return -EINVAL; + + tsid = nla_get_u8(info->attrs[NL80211_ATTR_TSID]); + peer = nla_data(info->attrs[NL80211_ATTR_MAC]); + + wdev_lock(wdev); + err = rdev_del_tx_ts(rdev, dev, tsid, peer); + wdev_unlock(wdev); + + return err; +} + #define NL80211_FLAG_NEED_WIPHY 0x01 #define NL80211_FLAG_NEED_NETDEV 0x02 #define NL80211_FLAG_NEED_RTNL 0x04 @@ -10147,6 +10240,22 @@ static const struct genl_ops nl80211_ops[] = { .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, + { + .cmd = NL80211_CMD_ADD_TX_TS, + .doit = nl80211_add_tx_ts, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, + { + .cmd = NL80211_CMD_DEL_TX_TS, + .doit = nl80211_del_tx_ts, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, }; /* notification functions */ diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 56c2240c30ce..f6d457d6a558 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -915,4 +915,35 @@ rdev_set_ap_chanwidth(struct cfg80211_registered_device *rdev, return ret; } +static inline int +rdev_add_tx_ts(struct cfg80211_registered_device *rdev, + struct net_device *dev, u8 tsid, const u8 *peer, + u8 user_prio, u16 admitted_time) +{ + int ret = -EOPNOTSUPP; + + trace_rdev_add_tx_ts(&rdev->wiphy, dev, tsid, peer, + user_prio, admitted_time); + if (rdev->ops->add_tx_ts) + ret = rdev->ops->add_tx_ts(&rdev->wiphy, dev, tsid, peer, + user_prio, admitted_time); + trace_rdev_return_int(&rdev->wiphy, ret); + + return ret; +} + +static inline int +rdev_del_tx_ts(struct cfg80211_registered_device *rdev, + struct net_device *dev, u8 tsid, const u8 *peer) +{ + int ret = -EOPNOTSUPP; + + trace_rdev_del_tx_ts(&rdev->wiphy, dev, tsid, peer); + if (rdev->ops->del_tx_ts) + ret = rdev->ops->del_tx_ts(&rdev->wiphy, dev, tsid, peer); + trace_rdev_return_int(&rdev->wiphy, ret); + + return ret; +} + #endif /* __CFG80211_RDEV_OPS */ diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 0c524cd76c83..625a6e6d1168 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1896,6 +1896,51 @@ TRACE_EVENT(rdev_set_ap_chanwidth, WIPHY_PR_ARG, NETDEV_PR_ARG, CHAN_DEF_PR_ARG) ); +TRACE_EVENT(rdev_add_tx_ts, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + u8 tsid, const u8 *peer, u8 user_prio, u16 admitted_time), + TP_ARGS(wiphy, netdev, tsid, peer, user_prio, admitted_time), + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + MAC_ENTRY(peer) + __field(u8, tsid) + __field(u8, user_prio) + __field(u16, admitted_time) + ), + TP_fast_assign( + WIPHY_ASSIGN; + NETDEV_ASSIGN; + MAC_ASSIGN(peer, peer); + __entry->tsid = tsid; + __entry->user_prio = user_prio; + __entry->admitted_time = admitted_time; + ), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " MAC_PR_FMT ", TSID %d, UP %d, time %d", + WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(peer), + __entry->tsid, __entry->user_prio, __entry->admitted_time) +); + +TRACE_EVENT(rdev_del_tx_ts, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + u8 tsid, const u8 *peer), + TP_ARGS(wiphy, netdev, tsid, peer), + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + MAC_ENTRY(peer) + __field(u8, tsid) + ), + TP_fast_assign( + WIPHY_ASSIGN; + NETDEV_ASSIGN; + MAC_ASSIGN(peer, peer); + __entry->tsid = tsid; + ), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " MAC_PR_FMT ", TSID %d", + WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(peer), __entry->tsid) +); + /************************************************************* * cfg80211 exported functions traces * *************************************************************/ -- cgit v1.2.3 From 047133066e6c2549403fe5a2d619f47ba4212ef5 Mon Sep 17 00:00:00 2001 From: Jacek Anaszewski Date: Thu, 7 Aug 2014 05:10:22 -0700 Subject: leds: Reorder include directives Reorder include directives so that they are arranged in alphabetical order. Signed-off-by: Jacek Anaszewski Acked-by: Kyungmin Park Cc: Richard Purdie Signed-off-by: Bryan Wu --- drivers/leds/led-class.c | 13 +++++++------ drivers/leds/led-core.c | 3 ++- include/linux/leds.h | 2 +- 3 files changed, 10 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c index aa29198fca3e..4bd4572efe6e 100644 --- a/drivers/leds/led-class.c +++ b/drivers/leds/led-class.c @@ -9,16 +9,17 @@ * published by the Free Software Foundation. */ -#include -#include +#include +#include +#include #include +#include +#include #include +#include +#include #include -#include #include -#include -#include -#include #include "leds.h" static struct class *leds_class; diff --git a/drivers/leds/led-core.c b/drivers/leds/led-core.c index 71b40d3bf776..50b579ad948e 100644 --- a/drivers/leds/led-core.c +++ b/drivers/leds/led-core.c @@ -12,10 +12,11 @@ */ #include +#include #include #include +#include #include -#include #include "leds.h" DECLARE_RWSEM(leds_list_lock); diff --git a/include/linux/leds.h b/include/linux/leds.h index e43686472197..4be2d7623d9e 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -13,8 +13,8 @@ #define __LINUX_LEDS_H_INCLUDED #include -#include #include +#include #include #include -- cgit v1.2.3 From d8082827d8a214343b761f2c4554d2a7d1573d63 Mon Sep 17 00:00:00 2001 From: Jacek Anaszewski Date: Thu, 7 Aug 2014 05:10:23 -0700 Subject: leds: make brightness type consistent across whole subsystem Documentations states that brightness units type is enum led_brightness and this is the type used by the led API functions. Adjust the type of brightness variables in the struct led_classdev accordingly. Signed-off-by: Jacek Anaszewski Acked-by: Kyungmin Park Cc: Richard Purdie Signed-off-by: Bryan Wu --- include/linux/leds.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/leds.h b/include/linux/leds.h index 4be2d7623d9e..f2e1cbc25705 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -31,8 +31,8 @@ enum led_brightness { struct led_classdev { const char *name; - int brightness; - int max_brightness; + enum led_brightness brightness; + enum led_brightness max_brightness; int flags; /* Lower 16 bits reflect status */ -- cgit v1.2.3 From a3b3ca753cdc92c7d5f57404afed3115b3b79cc6 Mon Sep 17 00:00:00 2001 From: Jaewon Kim Date: Thu, 11 Sep 2014 23:15:01 -0700 Subject: Input: add haptic driver on max77693 This driver to supports the haptic controller on MAX77693 Multifunction device with PMIC, CHARGER, LED, MUIC, HAPTIC. This driver supports external pwm and LRA (Linear Resonant Actuator) motor. User can control the haptic device via force feedback framework. Signed-off-by: Jaewon Kim Acked-by: Chanwoo Choi Signed-off-by: Dmitry Torokhov --- drivers/input/misc/Kconfig | 11 ++ drivers/input/misc/Makefile | 1 + drivers/input/misc/max77693-haptic.c | 357 +++++++++++++++++++++++++++++++++++ include/linux/mfd/max77693-private.h | 9 + 4 files changed, 378 insertions(+) create mode 100644 drivers/input/misc/max77693-haptic.c (limited to 'include/linux') diff --git a/drivers/input/misc/Kconfig b/drivers/input/misc/Kconfig index 36382e5b7703..23297ab6163f 100644 --- a/drivers/input/misc/Kconfig +++ b/drivers/input/misc/Kconfig @@ -144,6 +144,17 @@ config INPUT_M68K_BEEP tristate "M68k Beeper support" depends on M68K +config INPUT_MAX77693_HAPTIC + tristate "MAXIM MAX77693 haptic controller support" + depends on MFD_MAX77693 && PWM + select INPUT_FF_MEMLESS + help + This option enables support for the haptic controller on + MAXIM MAX77693 chip. + + To compile this driver as module, choose M here: the + module will be called max77693-haptic. + config INPUT_MAX8925_ONKEY tristate "MAX8925 ONKEY support" depends on MFD_MAX8925 diff --git a/drivers/input/misc/Makefile b/drivers/input/misc/Makefile index e8b84d2c845f..19c760361f80 100644 --- a/drivers/input/misc/Makefile +++ b/drivers/input/misc/Makefile @@ -37,6 +37,7 @@ obj-$(CONFIG_INPUT_IXP4XX_BEEPER) += ixp4xx-beeper.o obj-$(CONFIG_INPUT_KEYSPAN_REMOTE) += keyspan_remote.o obj-$(CONFIG_INPUT_KXTJ9) += kxtj9.o obj-$(CONFIG_INPUT_M68K_BEEP) += m68kspkr.o +obj-$(CONFIG_INPUT_MAX77693_HAPTIC) += max77693-haptic.o obj-$(CONFIG_INPUT_MAX8925_ONKEY) += max8925_onkey.o obj-$(CONFIG_INPUT_MAX8997_HAPTIC) += max8997_haptic.o obj-$(CONFIG_INPUT_MC13783_PWRBUTTON) += mc13783-pwrbutton.o diff --git a/drivers/input/misc/max77693-haptic.c b/drivers/input/misc/max77693-haptic.c new file mode 100644 index 000000000000..d605db4d2f39 --- /dev/null +++ b/drivers/input/misc/max77693-haptic.c @@ -0,0 +1,357 @@ +/* + * MAXIM MAX77693 Haptic device driver + * + * Copyright (C) 2014 Samsung Electronics + * Jaewon Kim + * + * This program is not provided / owned by Maxim Integrated Products. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define MAX_MAGNITUDE_SHIFT 16 + +enum max77693_haptic_motor_type { + MAX77693_HAPTIC_ERM = 0, + MAX77693_HAPTIC_LRA, +}; + +enum max77693_haptic_pulse_mode { + MAX77693_HAPTIC_EXTERNAL_MODE = 0, + MAX77693_HAPTIC_INTERNAL_MODE, +}; + +enum max77693_haptic_pwm_divisor { + MAX77693_HAPTIC_PWM_DIVISOR_32 = 0, + MAX77693_HAPTIC_PWM_DIVISOR_64, + MAX77693_HAPTIC_PWM_DIVISOR_128, + MAX77693_HAPTIC_PWM_DIVISOR_256, +}; + +struct max77693_haptic { + struct regmap *regmap_pmic; + struct regmap *regmap_haptic; + struct device *dev; + struct input_dev *input_dev; + struct pwm_device *pwm_dev; + struct regulator *motor_reg; + + bool enabled; + bool suspend_state; + unsigned int magnitude; + unsigned int pwm_duty; + enum max77693_haptic_motor_type type; + enum max77693_haptic_pulse_mode mode; + enum max77693_haptic_pwm_divisor pwm_divisor; + + struct work_struct work; +}; + +static int max77693_haptic_set_duty_cycle(struct max77693_haptic *haptic) +{ + int delta = (haptic->pwm_dev->period + haptic->pwm_duty) / 2; + int error; + + error = pwm_config(haptic->pwm_dev, delta, haptic->pwm_dev->period); + if (error) { + dev_err(haptic->dev, "failed to configure pwm: %d\n", error); + return error; + } + + return 0; +} + +static int max77693_haptic_configure(struct max77693_haptic *haptic, + bool enable) +{ + unsigned int value; + int error; + + value = ((haptic->type << MAX77693_CONFIG2_MODE) | + (enable << MAX77693_CONFIG2_MEN) | + (haptic->mode << MAX77693_CONFIG2_HTYP) | + (haptic->pwm_divisor)); + + error = regmap_write(haptic->regmap_haptic, + MAX77693_HAPTIC_REG_CONFIG2, value); + if (error) { + dev_err(haptic->dev, + "failed to update haptic config: %d\n", error); + return error; + } + + return 0; +} + +static int max77693_haptic_lowsys(struct max77693_haptic *haptic, bool enable) +{ + int error; + + error = regmap_update_bits(haptic->regmap_pmic, + MAX77693_PMIC_REG_LSCNFG, + MAX77693_PMIC_LOW_SYS_MASK, + enable << MAX77693_PMIC_LOW_SYS_SHIFT); + if (error) { + dev_err(haptic->dev, "cannot update pmic regmap: %d\n", error); + return error; + } + + return 0; +} + +static void max77693_haptic_enable(struct max77693_haptic *haptic) +{ + int error; + + if (haptic->enabled) + return; + + error = pwm_enable(haptic->pwm_dev); + if (error) { + dev_err(haptic->dev, + "failed to enable haptic pwm device: %d\n", error); + return; + } + + error = max77693_haptic_lowsys(haptic, true); + if (error) + goto err_enable_lowsys; + + error = max77693_haptic_configure(haptic, true); + if (error) + goto err_enable_config; + + haptic->enabled = true; + + return; + +err_enable_config: + max77693_haptic_lowsys(haptic, false); +err_enable_lowsys: + pwm_disable(haptic->pwm_dev); +} + +static void max77693_haptic_disable(struct max77693_haptic *haptic) +{ + int error; + + if (haptic->enabled) + return; + + error = max77693_haptic_configure(haptic, false); + if (error) + return; + + error = max77693_haptic_lowsys(haptic, false); + if (error) + goto err_disable_lowsys; + + pwm_disable(haptic->pwm_dev); + haptic->enabled = false; + + return; + +err_disable_lowsys: + max77693_haptic_configure(haptic, true); +} + +static void max77693_haptic_play_work(struct work_struct *work) +{ + struct max77693_haptic *haptic = + container_of(work, struct max77693_haptic, work); + int error; + + error = max77693_haptic_set_duty_cycle(haptic); + if (error) { + dev_err(haptic->dev, "failed to set duty cycle: %d\n", error); + return; + } + + if (haptic->magnitude) + max77693_haptic_enable(haptic); + else + max77693_haptic_disable(haptic); +} + +static int max77693_haptic_play_effect(struct input_dev *dev, void *data, + struct ff_effect *effect) +{ + struct max77693_haptic *haptic = input_get_drvdata(dev); + uint64_t period_mag_multi; + + haptic->magnitude = effect->u.rumble.strong_magnitude; + if (!haptic->magnitude) + haptic->magnitude = effect->u.rumble.weak_magnitude; + + /* + * The magnitude comes from force-feedback interface. + * The formula to convert magnitude to pwm_duty as follows: + * - pwm_duty = (magnitude * pwm_period) / MAX_MAGNITUDE(0xFFFF) + */ + period_mag_multi = (int64_t)(haptic->pwm_dev->period * + haptic->magnitude); + haptic->pwm_duty = (unsigned int)(period_mag_multi >> + MAX_MAGNITUDE_SHIFT); + + schedule_work(&haptic->work); + + return 0; +} + +static int max77693_haptic_open(struct input_dev *dev) +{ + struct max77693_haptic *haptic = input_get_drvdata(dev); + int error; + + error = regulator_enable(haptic->motor_reg); + if (error) { + dev_err(haptic->dev, + "failed to enable regulator: %d\n", error); + return error; + } + + return 0; +} + +static void max77693_haptic_close(struct input_dev *dev) +{ + struct max77693_haptic *haptic = input_get_drvdata(dev); + int error; + + cancel_work_sync(&haptic->work); + max77693_haptic_disable(haptic); + + error = regulator_disable(haptic->motor_reg); + if (error) + dev_err(haptic->dev, + "failed to disable regulator: %d\n", error); +} + +static int max77693_haptic_probe(struct platform_device *pdev) +{ + struct max77693_dev *max77693 = dev_get_drvdata(pdev->dev.parent); + struct max77693_haptic *haptic; + int error; + + haptic = devm_kzalloc(&pdev->dev, sizeof(*haptic), GFP_KERNEL); + if (!haptic) + return -ENOMEM; + + haptic->regmap_pmic = max77693->regmap; + haptic->regmap_haptic = max77693->regmap_haptic; + haptic->dev = &pdev->dev; + haptic->type = MAX77693_HAPTIC_LRA; + haptic->mode = MAX77693_HAPTIC_EXTERNAL_MODE; + haptic->pwm_divisor = MAX77693_HAPTIC_PWM_DIVISOR_128; + haptic->suspend_state = false; + + INIT_WORK(&haptic->work, max77693_haptic_play_work); + + /* Get pwm and regulatot for haptic device */ + haptic->pwm_dev = devm_pwm_get(&pdev->dev, NULL); + if (IS_ERR(haptic->pwm_dev)) { + dev_err(&pdev->dev, "failed to get pwm device\n"); + return PTR_ERR(haptic->pwm_dev); + } + + haptic->motor_reg = devm_regulator_get(&pdev->dev, "haptic"); + if (IS_ERR(haptic->motor_reg)) { + dev_err(&pdev->dev, "failed to get regulator\n"); + return PTR_ERR(haptic->motor_reg); + } + + /* Initialize input device for haptic device */ + haptic->input_dev = devm_input_allocate_device(&pdev->dev); + if (!haptic->input_dev) { + dev_err(&pdev->dev, "failed to allocate input device\n"); + return -ENOMEM; + } + + haptic->input_dev->name = "max77693-haptic"; + haptic->input_dev->id.version = 1; + haptic->input_dev->dev.parent = &pdev->dev; + haptic->input_dev->open = max77693_haptic_open; + haptic->input_dev->close = max77693_haptic_close; + input_set_drvdata(haptic->input_dev, haptic); + input_set_capability(haptic->input_dev, EV_FF, FF_RUMBLE); + + error = input_ff_create_memless(haptic->input_dev, NULL, + max77693_haptic_play_effect); + if (error) { + dev_err(&pdev->dev, "failed to create force-feedback\n"); + return error; + } + + error = input_register_device(haptic->input_dev); + if (error) { + dev_err(&pdev->dev, "failed to register input device\n"); + return error; + } + + platform_set_drvdata(pdev, haptic); + + return 0; +} + +#ifdef CONFIG_PM_SLEEP +static int max77693_haptic_suspend(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct max77693_haptic *haptic = platform_get_drvdata(pdev); + + if (haptic->enabled) { + max77693_haptic_disable(haptic); + haptic->suspend_state = true; + } + + return 0; +} + +static int max77693_haptic_resume(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct max77693_haptic *haptic = platform_get_drvdata(pdev); + + if (haptic->suspend_state) { + max77693_haptic_enable(haptic); + haptic->suspend_state = false; + } + + return 0; +} +#endif + +static SIMPLE_DEV_PM_OPS(max77693_haptic_pm_ops, + max77693_haptic_suspend, max77693_haptic_resume); + +static struct platform_driver max77693_haptic_driver = { + .driver = { + .name = "max77693-haptic", + .owner = THIS_MODULE, + .pm = &max77693_haptic_pm_ops, + }, + .probe = max77693_haptic_probe, +}; +module_platform_driver(max77693_haptic_driver); + +MODULE_AUTHOR("Jaewon Kim "); +MODULE_DESCRIPTION("MAXIM MAX77693 Haptic driver"); +MODULE_ALIAS("platform:max77693-haptic"); +MODULE_LICENSE("GPL"); diff --git a/include/linux/mfd/max77693-private.h b/include/linux/mfd/max77693-private.h index c466ff3e16b8..d0e578fd7053 100644 --- a/include/linux/mfd/max77693-private.h +++ b/include/linux/mfd/max77693-private.h @@ -251,6 +251,15 @@ enum max77693_haptic_reg { MAX77693_HAPTIC_REG_END, }; +/* max77693-pmic LSCNFG configuraton register */ +#define MAX77693_PMIC_LOW_SYS_MASK 0x80 +#define MAX77693_PMIC_LOW_SYS_SHIFT 7 + +/* max77693-haptic configuration register */ +#define MAX77693_CONFIG2_MODE 7 +#define MAX77693_CONFIG2_MEN 6 +#define MAX77693_CONFIG2_HTYP 5 + enum max77693_irq_source { LED_INT = 0, TOPSYS_INT, -- cgit v1.2.3 From 974a70bdecea5296db1b643e4046ef208e99c592 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Fri, 12 Sep 2014 09:32:41 +0800 Subject: usb: gadget: udc-core: add utility for bus reset The udc driver can notify the udc core that bus reset occurs by calling this utility, the core will notify gadget driver this information and update gadget state accordingly. Signed-off-by: Peter Chen Signed-off-by: Felipe Balbi --- drivers/usb/gadget/udc/udc-core.c | 17 +++++++++++++++++ include/linux/usb/gadget.h | 6 ++++++ 2 files changed, 23 insertions(+) (limited to 'include/linux') diff --git a/drivers/usb/gadget/udc/udc-core.c b/drivers/usb/gadget/udc/udc-core.c index b0d98172bc07..ba661c6da54a 100644 --- a/drivers/usb/gadget/udc/udc-core.c +++ b/drivers/usb/gadget/udc/udc-core.c @@ -123,6 +123,23 @@ EXPORT_SYMBOL_GPL(usb_gadget_set_state); /* ------------------------------------------------------------------------- */ +/** + * usb_gadget_udc_reset - notifies the udc core that bus reset occurs + * @gadget: The gadget which bus reset occurs + * @driver: The gadget driver we want to notify + * + * If the udc driver has bus reset handler, it needs to call this when the bus + * reset occurs, it notifies the gadget driver that the bus reset occurs as + * well as updates gadget state. + */ +void usb_gadget_udc_reset(struct usb_gadget *gadget, + struct usb_gadget_driver *driver) +{ + driver->reset(gadget); + usb_gadget_set_state(gadget, USB_STATE_DEFAULT); +} +EXPORT_SYMBOL_GPL(usb_gadget_udc_reset); + /** * usb_gadget_udc_start - tells usb device controller to start up * @gadget: The gadget we want to get started diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index 598a6e9b2850..d18811433324 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -1017,6 +1017,12 @@ extern void usb_gadget_set_state(struct usb_gadget *gadget, /*-------------------------------------------------------------------------*/ +/* utility to tell udc core that the bus reset occurs */ +extern void usb_gadget_udc_reset(struct usb_gadget *gadget, + struct usb_gadget_driver *driver); + +/*-------------------------------------------------------------------------*/ + /* utility wrapping a simple endpoint selection policy */ extern struct usb_ep *usb_ep_autoconfig(struct usb_gadget *, -- cgit v1.2.3 From d4b18c3e00b8d18fbd316abe9639b91ad416e1f3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 10 Sep 2014 17:36:31 -0700 Subject: pnfs: remove GETDEVICELIST implementation The current GETDEVICELIST implementation is buggy in that it doesn't handle cursors correctly, and in that it returns an error if the server returns NFSERR_NOTSUPP. Given that there is no actual need for GETDEVICELIST, it has various issues and might get removed for NFSv4.2 stop using it in the blocklayout driver, and thus the Linux NFS client as whole. Signed-off-by: Christoph Hellwig Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/blocklayout.c | 2 +- fs/nfs/nfs4proc.c | 48 --------------- fs/nfs/nfs4xdr.c | 130 --------------------------------------- fs/nfs/pnfs.h | 5 -- fs/nfs/pnfs_dev.c | 30 --------- include/linux/nfs_xdr.h | 11 ---- 6 files changed, 1 insertion(+), 225 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 25ba9e0e6fff..3e1f1afc6db4 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -532,7 +532,7 @@ bl_set_layoutdriver(struct nfs_server *server, const struct nfs_fh *fh) return -EINVAL; } - return nfs4_deviceid_getdevicelist(server, fh); + return 0; } static bool diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 47fa67a3a8cf..0b711227cfa5 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7808,54 +7808,6 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp) return status; } -/* - * Retrieve the list of Data Server devices from the MDS. - */ -static int _nfs4_getdevicelist(struct nfs_server *server, - const struct nfs_fh *fh, - struct pnfs_devicelist *devlist) -{ - struct nfs4_getdevicelist_args args = { - .fh = fh, - .layoutclass = server->pnfs_curr_ld->id, - }; - struct nfs4_getdevicelist_res res = { - .devlist = devlist, - }; - struct rpc_message msg = { - .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETDEVICELIST], - .rpc_argp = &args, - .rpc_resp = &res, - }; - int status; - - dprintk("--> %s\n", __func__); - status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, - &res.seq_res, 0); - dprintk("<-- %s status=%d\n", __func__, status); - return status; -} - -int nfs4_proc_getdevicelist(struct nfs_server *server, - const struct nfs_fh *fh, - struct pnfs_devicelist *devlist) -{ - struct nfs4_exception exception = { }; - int err; - - do { - err = nfs4_handle_exception(server, - _nfs4_getdevicelist(server, fh, devlist), - &exception); - } while (exception.retry); - - dprintk("%s: err=%d, num_devs=%u\n", __func__, - err, devlist->num_devs); - - return err; -} -EXPORT_SYMBOL_GPL(nfs4_proc_getdevicelist); - static int _nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev, diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index f2cd957adb90..b8165eab0a32 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -362,17 +362,6 @@ static int nfs4_stat_to_errno(int); XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5) #define encode_reclaim_complete_maxsz (op_encode_hdr_maxsz + 4) #define decode_reclaim_complete_maxsz (op_decode_hdr_maxsz + 4) -#define encode_getdevicelist_maxsz (op_encode_hdr_maxsz + 4 + \ - encode_verifier_maxsz) -#define decode_getdevicelist_maxsz (op_decode_hdr_maxsz + \ - 2 /* nfs_cookie4 gdlr_cookie */ + \ - decode_verifier_maxsz \ - /* verifier4 gdlr_verifier */ + \ - 1 /* gdlr_deviceid_list count */ + \ - XDR_QUADLEN(NFS4_PNFS_GETDEVLIST_MAXNUM * \ - NFS4_DEVICEID4_SIZE) \ - /* gdlr_deviceid_list */ + \ - 1 /* bool gdlr_eof */) #define encode_getdeviceinfo_maxsz (op_encode_hdr_maxsz + 4 + \ XDR_QUADLEN(NFS4_DEVICEID4_SIZE)) #define decode_getdeviceinfo_maxsz (op_decode_hdr_maxsz + \ @@ -812,14 +801,6 @@ static int nfs4_stat_to_errno(int); #define NFS4_dec_reclaim_complete_sz (compound_decode_hdr_maxsz + \ decode_sequence_maxsz + \ decode_reclaim_complete_maxsz) -#define NFS4_enc_getdevicelist_sz (compound_encode_hdr_maxsz + \ - encode_sequence_maxsz + \ - encode_putfh_maxsz + \ - encode_getdevicelist_maxsz) -#define NFS4_dec_getdevicelist_sz (compound_decode_hdr_maxsz + \ - decode_sequence_maxsz + \ - decode_putfh_maxsz + \ - decode_getdevicelist_maxsz) #define NFS4_enc_getdeviceinfo_sz (compound_encode_hdr_maxsz + \ encode_sequence_maxsz +\ encode_getdeviceinfo_maxsz) @@ -1929,24 +1910,6 @@ static void encode_sequence(struct xdr_stream *xdr, } #ifdef CONFIG_NFS_V4_1 -static void -encode_getdevicelist(struct xdr_stream *xdr, - const struct nfs4_getdevicelist_args *args, - struct compound_hdr *hdr) -{ - __be32 *p; - nfs4_verifier dummy = { - .data = "dummmmmy", - }; - - encode_op_hdr(xdr, OP_GETDEVICELIST, decode_getdevicelist_maxsz, hdr); - p = reserve_space(xdr, 16); - *p++ = cpu_to_be32(args->layoutclass); - *p++ = cpu_to_be32(NFS4_PNFS_GETDEVLIST_MAXNUM); - xdr_encode_hyper(p, 0ULL); /* cookie */ - encode_nfs4_verifier(xdr, &dummy); -} - static void encode_getdeviceinfo(struct xdr_stream *xdr, const struct nfs4_getdeviceinfo_args *args, @@ -2900,24 +2863,6 @@ static void nfs4_xdr_enc_reclaim_complete(struct rpc_rqst *req, encode_nops(&hdr); } -/* - * Encode GETDEVICELIST request - */ -static void nfs4_xdr_enc_getdevicelist(struct rpc_rqst *req, - struct xdr_stream *xdr, - struct nfs4_getdevicelist_args *args) -{ - struct compound_hdr hdr = { - .minorversion = nfs4_xdr_minorversion(&args->seq_args), - }; - - encode_compound_hdr(xdr, req, &hdr); - encode_sequence(xdr, &args->seq_args, &hdr); - encode_putfh(xdr, args->fh, &hdr); - encode_getdevicelist(xdr, args, &hdr); - encode_nops(&hdr); -} - /* * Encode GETDEVICEINFO request */ @@ -5773,54 +5718,6 @@ out_overflow: } #if defined(CONFIG_NFS_V4_1) -/* - * TODO: Need to handle case when EOF != true; - */ -static int decode_getdevicelist(struct xdr_stream *xdr, - struct pnfs_devicelist *res) -{ - __be32 *p; - int status, i; - nfs4_verifier verftemp; - - status = decode_op_hdr(xdr, OP_GETDEVICELIST); - if (status) - return status; - - p = xdr_inline_decode(xdr, 8 + 8 + 4); - if (unlikely(!p)) - goto out_overflow; - - /* TODO: Skip cookie for now */ - p += 2; - - /* Read verifier */ - p = xdr_decode_opaque_fixed(p, verftemp.data, NFS4_VERIFIER_SIZE); - - res->num_devs = be32_to_cpup(p); - - dprintk("%s: num_dev %d\n", __func__, res->num_devs); - - if (res->num_devs > NFS4_PNFS_GETDEVLIST_MAXNUM) { - printk(KERN_ERR "NFS: %s too many result dev_num %u\n", - __func__, res->num_devs); - return -EIO; - } - - p = xdr_inline_decode(xdr, - res->num_devs * NFS4_DEVICEID4_SIZE + 4); - if (unlikely(!p)) - goto out_overflow; - for (i = 0; i < res->num_devs; i++) - p = xdr_decode_opaque_fixed(p, res->dev_id[i].data, - NFS4_DEVICEID4_SIZE); - res->eof = be32_to_cpup(p); - return 0; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; -} - static int decode_getdeviceinfo(struct xdr_stream *xdr, struct pnfs_device *pdev) { @@ -7104,32 +7001,6 @@ static int nfs4_xdr_dec_reclaim_complete(struct rpc_rqst *rqstp, return status; } -/* - * Decode GETDEVICELIST response - */ -static int nfs4_xdr_dec_getdevicelist(struct rpc_rqst *rqstp, - struct xdr_stream *xdr, - struct nfs4_getdevicelist_res *res) -{ - struct compound_hdr hdr; - int status; - - dprintk("encoding getdevicelist!\n"); - - status = decode_compound_hdr(xdr, &hdr); - if (status != 0) - goto out; - status = decode_sequence(xdr, &res->seq_res, rqstp); - if (status != 0) - goto out; - status = decode_putfh(xdr); - if (status != 0) - goto out; - status = decode_getdevicelist(xdr, res->devlist); -out: - return status; -} - /* * Decode GETDEVINFO response */ @@ -7498,7 +7369,6 @@ struct rpc_procinfo nfs4_procedures[] = { PROC(SECINFO_NO_NAME, enc_secinfo_no_name, dec_secinfo_no_name), PROC(TEST_STATEID, enc_test_stateid, dec_test_stateid), PROC(FREE_STATEID, enc_free_stateid, dec_free_stateid), - PROC(GETDEVICELIST, enc_getdevicelist, dec_getdevicelist), PROC(BIND_CONN_TO_SESSION, enc_bind_conn_to_session, dec_bind_conn_to_session), PROC(DESTROY_CLIENTID, enc_destroy_clientid, dec_destroy_clientid), diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index d84fe29fc88b..b10f12fc6818 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -180,9 +180,6 @@ extern int pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *); extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *); /* nfs4proc.c */ -extern int nfs4_proc_getdevicelist(struct nfs_server *server, - const struct nfs_fh *fh, - struct pnfs_devicelist *devlist); extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *dev, struct rpc_cred *cred); @@ -277,8 +274,6 @@ bool nfs4_put_deviceid_node(struct nfs4_deviceid_node *); void nfs4_mark_deviceid_unavailable(struct nfs4_deviceid_node *node); bool nfs4_test_deviceid_unavailable(struct nfs4_deviceid_node *node); void nfs4_deviceid_purge_client(const struct nfs_client *); -int nfs4_deviceid_getdevicelist(struct nfs_server *server, - const struct nfs_fh *fh); static inline struct nfs4_deviceid_node * nfs4_get_deviceid(struct nfs4_deviceid_node *d) diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c index 82c2836fdb38..aa2ec0015183 100644 --- a/fs/nfs/pnfs_dev.c +++ b/fs/nfs/pnfs_dev.c @@ -358,33 +358,3 @@ nfs4_deviceid_mark_client_invalid(struct nfs_client *clp) } rcu_read_unlock(); } - -int -nfs4_deviceid_getdevicelist(struct nfs_server *server, - const struct nfs_fh *fh) -{ - struct pnfs_devicelist *dlist; - struct nfs4_deviceid_node *d; - int error = 0, i; - - dlist = kzalloc(sizeof(struct pnfs_devicelist), GFP_NOFS); - if (!dlist) - return -ENOMEM; - - while (!dlist->eof) { - error = nfs4_proc_getdevicelist(server, fh, dlist); - if (error) - break; - - for (i = 0; i < dlist->num_devs; i++) { - d = nfs4_find_get_deviceid(server, &dlist->dev_id[i], - NULL, GFP_NOFS); - if (d) - nfs4_put_deviceid_node(d); - } - } - - kfree(dlist); - return error; -} -EXPORT_SYMBOL_GPL(nfs4_deviceid_getdevicelist); diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index f4092c6b90fb..7ae249ccb78d 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -252,17 +252,6 @@ struct nfs4_layoutget { gfp_t gfp_flags; }; -struct nfs4_getdevicelist_args { - struct nfs4_sequence_args seq_args; - const struct nfs_fh *fh; - u32 layoutclass; -}; - -struct nfs4_getdevicelist_res { - struct nfs4_sequence_res seq_res; - struct pnfs_devicelist *devlist; -}; - struct nfs4_getdeviceinfo_args { struct nfs4_sequence_args seq_args; struct pnfs_device *pdev; -- cgit v1.2.3 From f418c64b71590bac8fdebd0969a1eeaffaf036d2 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Wed, 3 Sep 2014 12:19:07 -0400 Subject: NFS: Unconditionally enable commit code The goal is to create a generic NFS module with code that does not depend on what versions of NFS are enabled. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 14 ------------ fs/nfs/write.c | 61 -------------------------------------------------- include/linux/nfs_fs.h | 8 ------- 3 files changed, 83 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 65ef6e00deee..dda4b8667c02 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -178,7 +178,6 @@ static int nfs_direct_set_or_cmp_hdr_verf(struct nfs_direct_req *dreq, return memcmp(verfp, &hdr->verf, sizeof(struct nfs_writeverf)); } -#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) /* * nfs_direct_cmp_commit_data_verf - compare verifier for commit data * @dreq - direct request possibly spanning multiple servers @@ -197,7 +196,6 @@ static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq, WARN_ON_ONCE(verfp->committed < 0); return memcmp(verfp, &data->verf, sizeof(struct nfs_writeverf)); } -#endif /** * nfs_direct_IO - NFS address space operation for direct I/O @@ -576,7 +574,6 @@ out: return result; } -#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) { struct nfs_pageio_descriptor desc; @@ -700,17 +697,6 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode schedule_work(&dreq->work); /* Calls nfs_direct_write_schedule_work */ } -#else -static void nfs_direct_write_schedule_work(struct work_struct *work) -{ -} - -static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode) -{ - nfs_direct_complete(dreq, true); -} -#endif - static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) { struct nfs_direct_req *dreq = hdr->dreq; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 128d97f01d1c..6786873a2901 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -720,7 +720,6 @@ nfs_mark_request_dirty(struct nfs_page *req) __set_page_dirty_nobuffers(req->wb_page); } -#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) /* * nfs_page_search_commits_for_head_request_locked * @@ -870,43 +869,6 @@ int nfs_write_need_commit(struct nfs_pgio_header *hdr) return hdr->verf.committed != NFS_FILE_SYNC; } -#else -static struct nfs_page * -nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi, - struct page *page) -{ - return NULL; -} - -static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo, - struct inode *inode) -{ -} - -void nfs_init_cinfo(struct nfs_commit_info *cinfo, - struct inode *inode, - struct nfs_direct_req *dreq) -{ -} - -void -nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, - struct nfs_commit_info *cinfo) -{ -} - -static void -nfs_clear_request_commit(struct nfs_page *req) -{ -} - -int nfs_write_need_commit(struct nfs_pgio_header *hdr) -{ - return 0; -} - -#endif - static void nfs_write_completion(struct nfs_pgio_header *hdr) { struct nfs_commit_info cinfo; @@ -942,7 +904,6 @@ out: hdr->release(hdr); } -#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo) { @@ -999,19 +960,6 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, return ret; } -#else -unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo) -{ - return 0; -} - -int nfs_scan_commit(struct inode *inode, struct list_head *dst, - struct nfs_commit_info *cinfo) -{ - return 0; -} -#endif - /* * Search for an existing write request, and attempt to update * it to reflect a new dirty region on a given page. @@ -1404,7 +1352,6 @@ static int nfs_writeback_done(struct rpc_task *task, return status; nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, hdr->res.count); -#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) if (hdr->res.verf->committed < hdr->args.stable && task->tk_status >= 0) { /* We tried a write call, but the server did not @@ -1426,7 +1373,6 @@ static int nfs_writeback_done(struct rpc_task *task, complain = jiffies + 300 * HZ; } } -#endif /* Deal with the suid/sgid bit corner case */ if (nfs_should_remove_suid(inode)) @@ -1479,7 +1425,6 @@ static void nfs_writeback_result(struct rpc_task *task, } -#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait) { int ret; @@ -1803,12 +1748,6 @@ out_mark_dirty: __mark_inode_dirty(inode, I_DIRTY_DATASYNC); return ret; } -#else -static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc) -{ - return 0; -} -#endif int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) { diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 5180a7ededec..fd334d4b0d41 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -529,17 +529,9 @@ extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned extern int nfs_wb_all(struct inode *inode); extern int nfs_wb_page(struct inode *inode, struct page* page); extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); -#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) extern int nfs_commit_inode(struct inode *, int); extern struct nfs_commit_data *nfs_commitdata_alloc(void); extern void nfs_commit_free(struct nfs_commit_data *data); -#else -static inline int -nfs_commit_inode(struct inode *inode, int how) -{ - return 0; -} -#endif static inline int nfs_have_writebacks(struct inode *inode) -- cgit v1.2.3 From cb8c20fa53ec28602793ee43ddc7e8883be62e69 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Wed, 3 Sep 2014 12:19:10 -0400 Subject: NFS: Move NFS v3 acl functions to nfs3_fs.h This code is internal to the v3 module, so other parts of the client shouldn't have any knowledge of it. nfs3_getxattr(), nfs3_setxattr(), and nfs3_removexattr() no longer exist anywhere so I remove the declarations while I'm here. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/nfs3_fs.h | 19 +++++++++++++++++++ fs/nfs/nfs3super.c | 1 + include/linux/nfs_fs.h | 33 --------------------------------- 3 files changed, 20 insertions(+), 33 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/nfs3_fs.h b/fs/nfs/nfs3_fs.h index 6599e0dcd69f..333ae4068506 100644 --- a/fs/nfs/nfs3_fs.h +++ b/fs/nfs/nfs3_fs.h @@ -6,6 +6,25 @@ #ifndef __LINUX_FS_NFS_NFS3_FS_H #define __LINUX_FS_NFS_NFS3_FS_H +/* + * nfs3acl.c + */ +#ifdef CONFIG_NFS_V3_ACL +extern struct posix_acl *nfs3_get_acl(struct inode *inode, int type); +extern int nfs3_set_acl(struct inode *inode, struct posix_acl *acl, int type); +extern int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, + struct posix_acl *dfacl); +extern ssize_t nfs3_listxattr(struct dentry *, char *, size_t); +extern const struct xattr_handler *nfs3_xattr_handlers[]; +#else +static inline int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, + struct posix_acl *dfacl) +{ + return 0; +} +#define nfs3_listxattr NULL +#endif /* CONFIG_NFS_V3_ACL */ + /* nfs3client.c */ struct nfs_server *nfs3_create_server(struct nfs_mount_info *, struct nfs_subversion *); struct nfs_server *nfs3_clone_server(struct nfs_server *, struct nfs_fh *, diff --git a/fs/nfs/nfs3super.c b/fs/nfs/nfs3super.c index d6a98949af19..6af29c2da352 100644 --- a/fs/nfs/nfs3super.c +++ b/fs/nfs/nfs3super.c @@ -4,6 +4,7 @@ #include #include #include "internal.h" +#include "nfs3_fs.h" #include "nfs.h" static struct nfs_subversion nfs_v3 = { diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index fd334d4b0d41..28d649054d5f 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -442,22 +442,6 @@ static inline struct rpc_cred *nfs_file_cred(struct file *file) return NULL; } -/* - * linux/fs/nfs/xattr.c - */ -#ifdef CONFIG_NFS_V3_ACL -extern ssize_t nfs3_listxattr(struct dentry *, char *, size_t); -extern ssize_t nfs3_getxattr(struct dentry *, const char *, void *, size_t); -extern int nfs3_setxattr(struct dentry *, const char *, - const void *, size_t, int); -extern int nfs3_removexattr (struct dentry *, const char *name); -#else -# define nfs3_listxattr NULL -# define nfs3_getxattr NULL -# define nfs3_setxattr NULL -# define nfs3_removexattr NULL -#endif - /* * linux/fs/nfs/direct.c */ @@ -548,23 +532,6 @@ extern int nfs_readpages(struct file *, struct address_space *, extern int nfs_readpage_async(struct nfs_open_context *, struct inode *, struct page *); -/* - * linux/fs/nfs3proc.c - */ -#ifdef CONFIG_NFS_V3_ACL -extern struct posix_acl *nfs3_get_acl(struct inode *inode, int type); -extern int nfs3_set_acl(struct inode *inode, struct posix_acl *acl, int type); -extern int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, - struct posix_acl *dfacl); -extern const struct xattr_handler *nfs3_xattr_handlers[]; -#else -static inline int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, - struct posix_acl *dfacl) -{ - return 0; -} -#endif /* CONFIG_NFS_V3_ACL */ - /* * inline functions */ -- cgit v1.2.3 From e09c2c295468476a239d13324ce9042ec4de05eb Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 13 Sep 2014 04:14:30 +0900 Subject: workqueue: apply __WQ_ORDERED to create_singlethread_workqueue() create_singlethread_workqueue() is a compat interface for single threaded workqueue which maps to ordered workqueue w/ rescuer in the current implementation. create_singlethread_workqueue() currently implemented by invoking alloc_workqueue() w/ appropriate parameters. 8719dceae2f9 ("workqueue: reject adjusting max_active or applying attrs to ordered workqueues") introduced __WQ_ORDERED to protect ordered workqueues against dynamic attribute changes which can break ordering guarantees but forgot to apply it to create_singlethread_workqueue(). This in itself is okay as nobody currently uses dynamic attribute change on workqueues created with create_singlethread_workqueue(). However, 4c16bd327c ("workqueue: implement NUMA affinity for unbound workqueues") broke singlethreaded guarantee for ordered workqueues through allocating a separate pool_workqueue on each NUMA node by default. A later change 8a2b75384444 ("workqueue: fix ordered workqueues in NUMA setups") fixed it by allocating only one global pool_workqueue if __WQ_ORDERED is set. Combined, the __WQ_ORDERED omission in create_singlethread_workqueue() became critical breaking its single threadedness and ordering guarantee. Let's make create_singlethread_workqueue() wrap alloc_ordered_workqueue() instead so that it inherits __WQ_ORDERED and can implicitly track future ordered_workqueue changes. v2: I missed that __WQ_ORDERED now protects against pwq splitting across NUMA nodes and incorrectly described the patch as a nice-to-have fix to protect against future dynamic attribute usages. Oleg pointed out that this is actually a critical breakage due to 8a2b75384444 ("workqueue: fix ordered workqueues in NUMA setups"). Signed-off-by: Tejun Heo Reported-by: Mike Anderson Cc: Oleg Nesterov Cc: Gustavo Luiz Duarte Cc: Tomas Henzl Cc: stable@vger.kernel.org Fixes: 4c16bd327c ("workqueue: implement NUMA affinity for unbound workqueues") --- include/linux/workqueue.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index a0cc2e95ed1b..b996e6cde6bb 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -419,7 +419,7 @@ __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active, alloc_workqueue("%s", WQ_FREEZABLE | WQ_UNBOUND | WQ_MEM_RECLAIM, \ 1, (name)) #define create_singlethread_workqueue(name) \ - alloc_workqueue("%s", WQ_UNBOUND | WQ_MEM_RECLAIM, 1, (name)) + alloc_ordered_workqueue("%s", WQ_MEM_RECLAIM, name) extern void destroy_workqueue(struct workqueue_struct *wq); -- cgit v1.2.3 From d78c9300c51d6ceed9f6d078d4e9366f259de28c Mon Sep 17 00:00:00 2001 From: Andrew Hunter Date: Thu, 4 Sep 2014 14:17:16 -0700 Subject: jiffies: Fix timeval conversion to jiffies timeval_to_jiffies tried to round a timeval up to an integral number of jiffies, but the logic for doing so was incorrect: intervals corresponding to exactly N jiffies would become N+1. This manifested itself particularly repeatedly stopping/starting an itimer: setitimer(ITIMER_PROF, &val, NULL); setitimer(ITIMER_PROF, NULL, &val); would add a full tick to val, _even if it was exactly representable in terms of jiffies_ (say, the result of a previous rounding.) Doing this repeatedly would cause unbounded growth in val. So fix the math. Here's what was wrong with the conversion: we essentially computed (eliding seconds) jiffies = usec * (NSEC_PER_USEC/TICK_NSEC) by using scaling arithmetic, which took the best approximation of NSEC_PER_USEC/TICK_NSEC with denominator of 2^USEC_JIFFIE_SC = x/(2^USEC_JIFFIE_SC), and computed: jiffies = (usec * x) >> USEC_JIFFIE_SC and rounded this calculation up in the intermediate form (since we can't necessarily exactly represent TICK_NSEC in usec.) But the scaling arithmetic is a (very slight) *over*approximation of the true value; that is, instead of dividing by (1 usec/ 1 jiffie), we effectively divided by (1 usec/1 jiffie)-epsilon (rounding down). This would normally be fine, but we want to round timeouts up, and we did so by adding 2^USEC_JIFFIE_SC - 1 before the shift; this would be fine if our division was exact, but dividing this by the slightly smaller factor was equivalent to adding just _over_ 1 to the final result (instead of just _under_ 1, as desired.) In particular, with HZ=1000, we consistently computed that 10000 usec was 11 jiffies; the same was true for any exact multiple of TICK_NSEC. We could possibly still round in the intermediate form, adding something less than 2^USEC_JIFFIE_SC - 1, but easier still is to convert usec->nsec, round in nanoseconds, and then convert using time*spec*_to_jiffies. This adds one constant multiplication, and is not observably slower in microbenchmarks on recent x86 hardware. Tested: the following program: int main() { struct itimerval zero = {{0, 0}, {0, 0}}; /* Initially set to 10 ms. */ struct itimerval initial = zero; initial.it_interval.tv_usec = 10000; setitimer(ITIMER_PROF, &initial, NULL); /* Save and restore several times. */ for (size_t i = 0; i < 10; ++i) { struct itimerval prev; setitimer(ITIMER_PROF, &zero, &prev); /* on old kernels, this goes up by TICK_USEC every iteration */ printf("previous value: %ld %ld %ld %ld\n", prev.it_interval.tv_sec, prev.it_interval.tv_usec, prev.it_value.tv_sec, prev.it_value.tv_usec); setitimer(ITIMER_PROF, &prev, NULL); } return 0; } Cc: stable@vger.kernel.org Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Paul Turner Cc: Richard Cochran Cc: Prarit Bhargava Reviewed-by: Paul Turner Reported-by: Aaron Jacobs Signed-off-by: Andrew Hunter [jstultz: Tweaked to apply to 3.17-rc] Signed-off-by: John Stultz --- include/linux/jiffies.h | 12 ----------- kernel/time/time.c | 56 +++++++++++++++++++++++++++---------------------- 2 files changed, 31 insertions(+), 37 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index 1f44466c1e9d..c367cbdf73ab 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -258,23 +258,11 @@ extern unsigned long preset_lpj; #define SEC_JIFFIE_SC (32 - SHIFT_HZ) #endif #define NSEC_JIFFIE_SC (SEC_JIFFIE_SC + 29) -#define USEC_JIFFIE_SC (SEC_JIFFIE_SC + 19) #define SEC_CONVERSION ((unsigned long)((((u64)NSEC_PER_SEC << SEC_JIFFIE_SC) +\ TICK_NSEC -1) / (u64)TICK_NSEC)) #define NSEC_CONVERSION ((unsigned long)((((u64)1 << NSEC_JIFFIE_SC) +\ TICK_NSEC -1) / (u64)TICK_NSEC)) -#define USEC_CONVERSION \ - ((unsigned long)((((u64)NSEC_PER_USEC << USEC_JIFFIE_SC) +\ - TICK_NSEC -1) / (u64)TICK_NSEC)) -/* - * USEC_ROUND is used in the timeval to jiffie conversion. See there - * for more details. It is the scaled resolution rounding value. Note - * that it is a 64-bit value. Since, when it is applied, we are already - * in jiffies (albit scaled), it is nothing but the bits we will shift - * off. - */ -#define USEC_ROUND (u64)(((u64)1 << USEC_JIFFIE_SC) - 1) /* * The maximum jiffie value is (MAX_INT >> 1). Here we translate that * into seconds. The 64-bit case will overflow if we are not careful, diff --git a/kernel/time/time.c b/kernel/time/time.c index f0294ba14634..a9ae20fb0b11 100644 --- a/kernel/time/time.c +++ b/kernel/time/time.c @@ -559,17 +559,20 @@ EXPORT_SYMBOL(usecs_to_jiffies); * that a remainder subtract here would not do the right thing as the * resolution values don't fall on second boundries. I.e. the line: * nsec -= nsec % TICK_NSEC; is NOT a correct resolution rounding. + * Note that due to the small error in the multiplier here, this + * rounding is incorrect for sufficiently large values of tv_nsec, but + * well formed timespecs should have tv_nsec < NSEC_PER_SEC, so we're + * OK. * * Rather, we just shift the bits off the right. * * The >> (NSEC_JIFFIE_SC - SEC_JIFFIE_SC) converts the scaled nsec * value to a scaled second value. */ -unsigned long -timespec_to_jiffies(const struct timespec *value) +static unsigned long +__timespec_to_jiffies(unsigned long sec, long nsec) { - unsigned long sec = value->tv_sec; - long nsec = value->tv_nsec + TICK_NSEC - 1; + nsec = nsec + TICK_NSEC - 1; if (sec >= MAX_SEC_IN_JIFFIES){ sec = MAX_SEC_IN_JIFFIES; @@ -580,6 +583,13 @@ timespec_to_jiffies(const struct timespec *value) (NSEC_JIFFIE_SC - SEC_JIFFIE_SC))) >> SEC_JIFFIE_SC; } + +unsigned long +timespec_to_jiffies(const struct timespec *value) +{ + return __timespec_to_jiffies(value->tv_sec, value->tv_nsec); +} + EXPORT_SYMBOL(timespec_to_jiffies); void @@ -596,31 +606,27 @@ jiffies_to_timespec(const unsigned long jiffies, struct timespec *value) } EXPORT_SYMBOL(jiffies_to_timespec); -/* Same for "timeval" - * - * Well, almost. The problem here is that the real system resolution is - * in nanoseconds and the value being converted is in micro seconds. - * Also for some machines (those that use HZ = 1024, in-particular), - * there is a LARGE error in the tick size in microseconds. - - * The solution we use is to do the rounding AFTER we convert the - * microsecond part. Thus the USEC_ROUND, the bits to be shifted off. - * Instruction wise, this should cost only an additional add with carry - * instruction above the way it was done above. +/* + * We could use a similar algorithm to timespec_to_jiffies (with a + * different multiplier for usec instead of nsec). But this has a + * problem with rounding: we can't exactly add TICK_NSEC - 1 to the + * usec value, since it's not necessarily integral. + * + * We could instead round in the intermediate scaled representation + * (i.e. in units of 1/2^(large scale) jiffies) but that's also + * perilous: the scaling introduces a small positive error, which + * combined with a division-rounding-upward (i.e. adding 2^(scale) - 1 + * units to the intermediate before shifting) leads to accidental + * overflow and overestimates. + * + * At the cost of one additional multiplication by a constant, just + * use the timespec implementation. */ unsigned long timeval_to_jiffies(const struct timeval *value) { - unsigned long sec = value->tv_sec; - long usec = value->tv_usec; - - if (sec >= MAX_SEC_IN_JIFFIES){ - sec = MAX_SEC_IN_JIFFIES; - usec = 0; - } - return (((u64)sec * SEC_CONVERSION) + - (((u64)usec * USEC_CONVERSION + USEC_ROUND) >> - (USEC_JIFFIE_SC - SEC_JIFFIE_SC))) >> SEC_JIFFIE_SC; + return __timespec_to_jiffies(value->tv_sec, + value->tv_usec * NSEC_PER_USEC); } EXPORT_SYMBOL(timeval_to_jiffies); -- cgit v1.2.3 From 3ef7de5304edf60d0b8674dd7cdacc104e15a93c Mon Sep 17 00:00:00 2001 From: Jacek Anaszewski Date: Wed, 20 Aug 2014 06:41:55 -0700 Subject: leds: Improve and export led_update_brightness led_update_brightness helper function used to be exploited only locally in the led-class.c module, where its result was being passed to the brightness_show sysfs callback. With the introduction of v4l2-flash subdevice the same functionality becomes required for reading current brightness from a LED device. This patch adds checking of return value of the brightness_get callback and moves the led_update_brightness() function to the LED subsystem public API. Signed-off-by: Jacek Anaszewski Acked-by: Kyungmin Park Cc: Richard Purdie Signed-off-by: Bryan Wu --- drivers/leds/led-class.c | 6 ------ drivers/leds/led-core.c | 16 ++++++++++++++++ include/linux/leds.h | 10 ++++++++++ 3 files changed, 26 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c index 71666a40b79a..7440c58b8e6f 100644 --- a/drivers/leds/led-class.c +++ b/drivers/leds/led-class.c @@ -24,12 +24,6 @@ static struct class *leds_class; -static void led_update_brightness(struct led_classdev *led_cdev) -{ - if (led_cdev->brightness_get) - led_cdev->brightness = led_cdev->brightness_get(led_cdev); -} - static ssize_t brightness_show(struct device *dev, struct device_attribute *attr, char *buf) { diff --git a/drivers/leds/led-core.c b/drivers/leds/led-core.c index 50b579ad948e..aaa8eba9099f 100644 --- a/drivers/leds/led-core.c +++ b/drivers/leds/led-core.c @@ -127,3 +127,19 @@ void led_set_brightness(struct led_classdev *led_cdev, __led_set_brightness(led_cdev, brightness); } EXPORT_SYMBOL(led_set_brightness); + +int led_update_brightness(struct led_classdev *led_cdev) +{ + int ret = 0; + + if (led_cdev->brightness_get) { + ret = led_cdev->brightness_get(led_cdev); + if (ret >= 0) { + led_cdev->brightness = ret; + return 0; + } + } + + return ret; +} +EXPORT_SYMBOL(led_update_brightness); diff --git a/include/linux/leds.h b/include/linux/leds.h index f2e1cbc25705..a57611d0c94e 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -140,6 +140,16 @@ extern void led_blink_set_oneshot(struct led_classdev *led_cdev, */ extern void led_set_brightness(struct led_classdev *led_cdev, enum led_brightness brightness); +/** + * led_update_brightness - update LED brightness + * @led_cdev: the LED to query + * + * Get an LED's current brightness and update led_cdev->brightness + * member with the obtained value. + * + * Returns: 0 on success or negative error value on failure + */ +extern int led_update_brightness(struct led_classdev *led_cdev); /* * LED Triggers -- cgit v1.2.3 From fbfa398b84a5fc6e085dedba5ec3e94f21815d05 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 28 Aug 2014 12:21:44 -0600 Subject: PCI: Remove unused pci_configure_slot() All pci_configure_slot() uses have been removed, so remove the definition as well. Signed-off-by: Bjorn Helgaas Acked-by: Yinghai Lu --- drivers/pci/probe.c | 28 ---------------------------- include/linux/pci_hotplug.h | 2 -- 2 files changed, 30 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index e9482867555b..4b3b29bc7a82 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1358,34 +1358,6 @@ static void program_hpp_type2(struct pci_dev *dev, struct hpp_type2 *hpp) */ } -void pci_configure_slot(struct pci_dev *dev) -{ - struct pci_dev *cdev; - struct hotplug_params hpp; - int ret; - - if (!(dev->hdr_type == PCI_HEADER_TYPE_NORMAL || - (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE && - (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI))) - return; - - pcie_bus_configure_settings(dev->bus); - - memset(&hpp, 0, sizeof(hpp)); - ret = pci_get_hp_params(dev, &hpp); - - program_hpp_type2(dev, hpp.t2); - program_hpp_type1(dev, hpp.t1); - program_hpp_type0(dev, hpp.t0); - - if (dev->subordinate) { - list_for_each_entry(cdev, &dev->subordinate->devices, - bus_list) - pci_configure_slot(cdev); - } -} -EXPORT_SYMBOL_GPL(pci_configure_slot); - static void pci_configure_device(struct pci_dev *dev) { struct hotplug_params hpp; diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h index 5f2e559af6b0..2706ee9a4327 100644 --- a/include/linux/pci_hotplug.h +++ b/include/linux/pci_hotplug.h @@ -187,6 +187,4 @@ static inline int pci_get_hp_params(struct pci_dev *dev, return -ENODEV; } #endif - -void pci_configure_slot(struct pci_dev *dev); #endif -- cgit v1.2.3 From 46e5da40aec256155cfedee96dd21a75da941f2c Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Fri, 12 Sep 2014 20:04:52 -0700 Subject: net: qdisc: use rcu prefix and silence sparse warnings Add __rcu notation to qdisc handling by doing this we can make smatch output more legible. And anyways some of the cases should be using rcu_dereference() see qdisc_all_tx_empty(), qdisc_tx_chainging(), and so on. Also *wake_queue() API is commonly called from driver timer routines without rcu lock or rtnl lock. So I added rcu_read_lock() blocks around netif_wake_subqueue and netif_tx_wake_queue. Signed-off-by: John Fastabend Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 29 ++++----------------------- include/net/sch_generic.h | 21 +++++++++++++------ net/core/dev.c | 51 +++++++++++++++++++++++++++++++++++++++++++++-- net/sched/sch_generic.c | 4 ++-- net/sched/sch_mqprio.c | 6 ++++-- net/sched/sch_teql.c | 13 +++++++----- 6 files changed, 82 insertions(+), 42 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ba72f6baae1a..ae721f53739e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -543,7 +543,7 @@ struct netdev_queue { * read mostly part */ struct net_device *dev; - struct Qdisc *qdisc; + struct Qdisc __rcu *qdisc; struct Qdisc *qdisc_sleeping; #ifdef CONFIG_SYSFS struct kobject kobj; @@ -2356,12 +2356,7 @@ static inline void input_queue_tail_incr_save(struct softnet_data *sd, DECLARE_PER_CPU_ALIGNED(struct softnet_data, softnet_data); void __netif_schedule(struct Qdisc *q); - -static inline void netif_schedule_queue(struct netdev_queue *txq) -{ - if (!(txq->state & QUEUE_STATE_ANY_XOFF)) - __netif_schedule(txq->qdisc); -} +void netif_schedule_queue(struct netdev_queue *txq); static inline void netif_tx_schedule_all(struct net_device *dev) { @@ -2397,11 +2392,7 @@ static inline void netif_tx_start_all_queues(struct net_device *dev) } } -static inline void netif_tx_wake_queue(struct netdev_queue *dev_queue) -{ - if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state)) - __netif_schedule(dev_queue->qdisc); -} +void netif_tx_wake_queue(struct netdev_queue *dev_queue); /** * netif_wake_queue - restart transmit @@ -2673,19 +2664,7 @@ static inline bool netif_subqueue_stopped(const struct net_device *dev, return __netif_subqueue_stopped(dev, skb_get_queue_mapping(skb)); } -/** - * netif_wake_subqueue - allow sending packets on subqueue - * @dev: network device - * @queue_index: sub queue index - * - * Resume individual transmit queue of a device with multiple transmit queues. - */ -static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index) -{ - struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index); - if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &txq->state)) - __netif_schedule(txq->qdisc); -} +void netif_wake_subqueue(struct net_device *dev, u16 queue_index); #ifdef CONFIG_XPS int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask, diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index a3cfb8ebeb53..56838ab29b42 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -259,7 +259,9 @@ static inline spinlock_t *qdisc_lock(struct Qdisc *qdisc) static inline struct Qdisc *qdisc_root(const struct Qdisc *qdisc) { - return qdisc->dev_queue->qdisc; + struct Qdisc *q = rcu_dereference_rtnl(qdisc->dev_queue->qdisc); + + return q; } static inline struct Qdisc *qdisc_root_sleeping(const struct Qdisc *qdisc) @@ -384,7 +386,7 @@ static inline void qdisc_reset_all_tx_gt(struct net_device *dev, unsigned int i) struct Qdisc *qdisc; for (; i < dev->num_tx_queues; i++) { - qdisc = netdev_get_tx_queue(dev, i)->qdisc; + qdisc = rtnl_dereference(netdev_get_tx_queue(dev, i)->qdisc); if (qdisc) { spin_lock_bh(qdisc_lock(qdisc)); qdisc_reset(qdisc); @@ -402,13 +404,18 @@ static inline void qdisc_reset_all_tx(struct net_device *dev) static inline bool qdisc_all_tx_empty(const struct net_device *dev) { unsigned int i; + + rcu_read_lock(); for (i = 0; i < dev->num_tx_queues; i++) { struct netdev_queue *txq = netdev_get_tx_queue(dev, i); - const struct Qdisc *q = txq->qdisc; + const struct Qdisc *q = rcu_dereference(txq->qdisc); - if (q->q.qlen) + if (q->q.qlen) { + rcu_read_unlock(); return false; + } } + rcu_read_unlock(); return true; } @@ -416,9 +423,10 @@ static inline bool qdisc_all_tx_empty(const struct net_device *dev) static inline bool qdisc_tx_changing(const struct net_device *dev) { unsigned int i; + for (i = 0; i < dev->num_tx_queues; i++) { struct netdev_queue *txq = netdev_get_tx_queue(dev, i); - if (txq->qdisc != txq->qdisc_sleeping) + if (rcu_access_pointer(txq->qdisc) != txq->qdisc_sleeping) return true; } return false; @@ -428,9 +436,10 @@ static inline bool qdisc_tx_changing(const struct net_device *dev) static inline bool qdisc_tx_is_noop(const struct net_device *dev) { unsigned int i; + for (i = 0; i < dev->num_tx_queues; i++) { struct netdev_queue *txq = netdev_get_tx_queue(dev, i); - if (txq->qdisc != &noop_qdisc) + if (rcu_access_pointer(txq->qdisc) != &noop_qdisc) return false; } return true; diff --git a/net/core/dev.c b/net/core/dev.c index 3c6a967e5830..b3d6dbc0c696 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2177,6 +2177,53 @@ static struct dev_kfree_skb_cb *get_kfree_skb_cb(const struct sk_buff *skb) return (struct dev_kfree_skb_cb *)skb->cb; } +void netif_schedule_queue(struct netdev_queue *txq) +{ + rcu_read_lock(); + if (!(txq->state & QUEUE_STATE_ANY_XOFF)) { + struct Qdisc *q = rcu_dereference(txq->qdisc); + + __netif_schedule(q); + } + rcu_read_unlock(); +} +EXPORT_SYMBOL(netif_schedule_queue); + +/** + * netif_wake_subqueue - allow sending packets on subqueue + * @dev: network device + * @queue_index: sub queue index + * + * Resume individual transmit queue of a device with multiple transmit queues. + */ +void netif_wake_subqueue(struct net_device *dev, u16 queue_index) +{ + struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index); + + if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &txq->state)) { + struct Qdisc *q; + + rcu_read_lock(); + q = rcu_dereference(txq->qdisc); + __netif_schedule(q); + rcu_read_unlock(); + } +} +EXPORT_SYMBOL(netif_wake_subqueue); + +void netif_tx_wake_queue(struct netdev_queue *dev_queue) +{ + if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state)) { + struct Qdisc *q; + + rcu_read_lock(); + q = rcu_dereference(dev_queue->qdisc); + __netif_schedule(q); + rcu_read_unlock(); + } +} +EXPORT_SYMBOL(netif_tx_wake_queue); + void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason) { unsigned long flags; @@ -3432,7 +3479,7 @@ static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq) skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl); skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS); - q = rxq->qdisc; + q = rcu_dereference(rxq->qdisc); if (q != &noop_qdisc) { spin_lock(qdisc_lock(q)); if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) @@ -3449,7 +3496,7 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb, { struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue); - if (!rxq || rxq->qdisc == &noop_qdisc) + if (!rxq || rcu_access_pointer(rxq->qdisc) == &noop_qdisc) goto out; if (*pt_prev) { diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 19696ebe9ebc..346ef85617d3 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -783,7 +783,7 @@ static void dev_deactivate_queue(struct net_device *dev, struct Qdisc *qdisc_default = _qdisc_default; struct Qdisc *qdisc; - qdisc = dev_queue->qdisc; + qdisc = rtnl_dereference(dev_queue->qdisc); if (qdisc) { spin_lock_bh(qdisc_lock(qdisc)); @@ -876,7 +876,7 @@ static void dev_init_scheduler_queue(struct net_device *dev, { struct Qdisc *qdisc = _qdisc; - dev_queue->qdisc = qdisc; + rcu_assign_pointer(dev_queue->qdisc, qdisc); dev_queue->qdisc_sleeping = qdisc; } diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index 6749e2f540d0..37e7d25d21f1 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -231,7 +231,7 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb) memset(&sch->qstats, 0, sizeof(sch->qstats)); for (i = 0; i < dev->num_tx_queues; i++) { - qdisc = netdev_get_tx_queue(dev, i)->qdisc; + qdisc = rtnl_dereference(netdev_get_tx_queue(dev, i)->qdisc); spin_lock_bh(qdisc_lock(qdisc)); sch->q.qlen += qdisc->q.qlen; sch->bstats.bytes += qdisc->bstats.bytes; @@ -340,7 +340,9 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, spin_unlock_bh(d->lock); for (i = tc.offset; i < tc.offset + tc.count; i++) { - qdisc = netdev_get_tx_queue(dev, i)->qdisc; + struct netdev_queue *q = netdev_get_tx_queue(dev, i); + + qdisc = rtnl_dereference(q->qdisc); spin_lock_bh(qdisc_lock(qdisc)); bstats.bytes += qdisc->bstats.bytes; bstats.packets += qdisc->bstats.packets; diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index aaa8d03ed054..5cd291bd00e4 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -96,11 +96,14 @@ teql_dequeue(struct Qdisc *sch) struct teql_sched_data *dat = qdisc_priv(sch); struct netdev_queue *dat_queue; struct sk_buff *skb; + struct Qdisc *q; skb = __skb_dequeue(&dat->q); dat_queue = netdev_get_tx_queue(dat->m->dev, 0); + q = rcu_dereference_bh(dat_queue->qdisc); + if (skb == NULL) { - struct net_device *m = qdisc_dev(dat_queue->qdisc); + struct net_device *m = qdisc_dev(q); if (m) { dat->m->slaves = sch; netif_wake_queue(m); @@ -108,7 +111,7 @@ teql_dequeue(struct Qdisc *sch) } else { qdisc_bstats_update(sch, skb); } - sch->q.qlen = dat->q.qlen + dat_queue->qdisc->q.qlen; + sch->q.qlen = dat->q.qlen + q->q.qlen; return skb; } @@ -157,9 +160,9 @@ teql_destroy(struct Qdisc *sch) txq = netdev_get_tx_queue(master->dev, 0); master->slaves = NULL; - root_lock = qdisc_root_sleeping_lock(txq->qdisc); + root_lock = qdisc_root_sleeping_lock(rtnl_dereference(txq->qdisc)); spin_lock_bh(root_lock); - qdisc_reset(txq->qdisc); + qdisc_reset(rtnl_dereference(txq->qdisc)); spin_unlock_bh(root_lock); } } @@ -266,7 +269,7 @@ static inline int teql_resolve(struct sk_buff *skb, struct dst_entry *dst = skb_dst(skb); int res; - if (txq->qdisc == &noop_qdisc) + if (rcu_access_pointer(txq->qdisc) == &noop_qdisc) return -ENODEV; if (!dev->header_ops || !dst) -- cgit v1.2.3 From 331b72922c5f58d48fd5500acadc91777cc31970 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Fri, 12 Sep 2014 20:08:20 -0700 Subject: net: sched: RCU cls_tcindex Make cls_tcindex RCU safe. This patch addds a new RCU routine rcu_dereference_bh_rtnl() to check caller either holds the rcu read lock or RTNL. This is needed to handle the case where tcindex_lookup() is being called in both cases. Signed-off-by: John Fastabend Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 10 ++ net/sched/cls_tcindex.c | 248 ++++++++++++++++++++++++++++------------------ 2 files changed, 164 insertions(+), 94 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 167bae7bdfa4..6cacbce1a06c 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -46,6 +46,16 @@ static inline int lockdep_rtnl_is_held(void) #define rcu_dereference_rtnl(p) \ rcu_dereference_check(p, lockdep_rtnl_is_held()) +/** + * rcu_dereference_bh_rtnl - rcu_dereference_bh with debug checking + * @p: The pointer to read, prior to dereference + * + * Do an rcu_dereference_bh(p), but check caller either holds rcu_read_lock_bh() + * or RTNL. Note : Please prefer rtnl_dereference() or rcu_dereference_bh() + */ +#define rcu_dereference_bh_rtnl(p) \ + rcu_dereference_bh_check(p, lockdep_rtnl_is_held()) + /** * rtnl_dereference - fetch RCU pointer when updates are prevented by RTNL * @p: The pointer to read, prior to dereferencing diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 3e9f76413b3b..a9f4279fbd69 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -32,19 +32,21 @@ struct tcindex_filter_result { struct tcindex_filter { u16 key; struct tcindex_filter_result result; - struct tcindex_filter *next; + struct tcindex_filter __rcu *next; + struct rcu_head rcu; }; struct tcindex_data { struct tcindex_filter_result *perfect; /* perfect hash; NULL if none */ - struct tcindex_filter **h; /* imperfect hash; only used if !perfect; - NULL if unused */ + struct tcindex_filter __rcu **h; /* imperfect hash; */ + struct tcf_proto *tp; u16 mask; /* AND key with mask */ - int shift; /* shift ANDed key to the right */ - int hash; /* hash table size; 0 if undefined */ - int alloc_hash; /* allocated size */ - int fall_through; /* 0: only classify if explicit match */ + u32 shift; /* shift ANDed key to the right */ + u32 hash; /* hash table size; 0 if undefined */ + u32 alloc_hash; /* allocated size */ + u32 fall_through; /* 0: only classify if explicit match */ + struct rcu_head rcu; }; static inline int @@ -56,13 +58,18 @@ tcindex_filter_is_set(struct tcindex_filter_result *r) static struct tcindex_filter_result * tcindex_lookup(struct tcindex_data *p, u16 key) { - struct tcindex_filter *f; + if (p->perfect) { + struct tcindex_filter_result *f = p->perfect + key; + + return tcindex_filter_is_set(f) ? f : NULL; + } else if (p->h) { + struct tcindex_filter __rcu **fp; + struct tcindex_filter *f; - if (p->perfect) - return tcindex_filter_is_set(p->perfect + key) ? - p->perfect + key : NULL; - else if (p->h) { - for (f = p->h[key % p->hash]; f; f = f->next) + fp = &p->h[key % p->hash]; + for (f = rcu_dereference_bh_rtnl(*fp); + f; + fp = &f->next, f = rcu_dereference_bh_rtnl(*fp)) if (f->key == key) return &f->result; } @@ -74,7 +81,7 @@ tcindex_lookup(struct tcindex_data *p, u16 key) static int tcindex_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { - struct tcindex_data *p = tp->root; + struct tcindex_data *p = rcu_dereference(tp->root); struct tcindex_filter_result *f; int key = (skb->tc_index & p->mask) >> p->shift; @@ -99,7 +106,7 @@ static int tcindex_classify(struct sk_buff *skb, const struct tcf_proto *tp, static unsigned long tcindex_get(struct tcf_proto *tp, u32 handle) { - struct tcindex_data *p = tp->root; + struct tcindex_data *p = rtnl_dereference(tp->root); struct tcindex_filter_result *r; pr_debug("tcindex_get(tp %p,handle 0x%08x)\n", tp, handle); @@ -129,49 +136,59 @@ static int tcindex_init(struct tcf_proto *tp) p->hash = DEFAULT_HASH_SIZE; p->fall_through = 1; - tp->root = p; + rcu_assign_pointer(tp->root, p); return 0; } - static int -__tcindex_delete(struct tcf_proto *tp, unsigned long arg, int lock) +tcindex_delete(struct tcf_proto *tp, unsigned long arg) { - struct tcindex_data *p = tp->root; + struct tcindex_data *p = rtnl_dereference(tp->root); struct tcindex_filter_result *r = (struct tcindex_filter_result *) arg; + struct tcindex_filter __rcu **walk; struct tcindex_filter *f = NULL; - pr_debug("tcindex_delete(tp %p,arg 0x%lx),p %p,f %p\n", tp, arg, p, f); + pr_debug("tcindex_delete(tp %p,arg 0x%lx),p %p\n", tp, arg, p); if (p->perfect) { if (!r->res.class) return -ENOENT; } else { int i; - struct tcindex_filter **walk = NULL; - for (i = 0; i < p->hash; i++) - for (walk = p->h+i; *walk; walk = &(*walk)->next) - if (&(*walk)->result == r) + for (i = 0; i < p->hash; i++) { + walk = p->h + i; + for (f = rtnl_dereference(*walk); f; + walk = &f->next, f = rtnl_dereference(*walk)) { + if (&f->result == r) goto found; + } + } return -ENOENT; found: - f = *walk; - if (lock) - tcf_tree_lock(tp); - *walk = f->next; - if (lock) - tcf_tree_unlock(tp); + rcu_assign_pointer(*walk, rtnl_dereference(f->next)); } tcf_unbind_filter(tp, &r->res); tcf_exts_destroy(tp, &r->exts); - kfree(f); + if (f) + kfree_rcu(f, rcu); return 0; } -static int tcindex_delete(struct tcf_proto *tp, unsigned long arg) +static int tcindex_destroy_element(struct tcf_proto *tp, + unsigned long arg, + struct tcf_walker *walker) { - return __tcindex_delete(tp, arg, 1); + return tcindex_delete(tp, arg); +} + +static void __tcindex_destroy(struct rcu_head *head) +{ + struct tcindex_data *p = container_of(head, struct tcindex_data, rcu); + + kfree(p->perfect); + kfree(p->h); + kfree(p); } static inline int @@ -194,6 +211,14 @@ static void tcindex_filter_result_init(struct tcindex_filter_result *r) tcf_exts_init(&r->exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); } +static void __tcindex_partial_destroy(struct rcu_head *head) +{ + struct tcindex_data *p = container_of(head, struct tcindex_data, rcu); + + kfree(p->perfect); + kfree(p); +} + static int tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, u32 handle, struct tcindex_data *p, @@ -203,7 +228,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, int err, balloc = 0; struct tcindex_filter_result new_filter_result, *old_r = r; struct tcindex_filter_result cr; - struct tcindex_data cp; + struct tcindex_data *cp, *oldp; struct tcindex_filter *f = NULL; /* make gcc behave */ struct tcf_exts e; @@ -212,84 +237,118 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, if (err < 0) return err; - memcpy(&cp, p, sizeof(cp)); - tcindex_filter_result_init(&new_filter_result); + /* tcindex_data attributes must look atomic to classifier/lookup so + * allocate new tcindex data and RCU assign it onto root. Keeping + * perfect hash and hash pointers from old data. + */ + cp = kzalloc(sizeof(cp), GFP_KERNEL); + if (!cp) + return -ENOMEM; + + cp->mask = p->mask; + cp->shift = p->shift; + cp->hash = p->hash; + cp->alloc_hash = p->alloc_hash; + cp->fall_through = p->fall_through; + cp->tp = tp; + + if (p->perfect) { + cp->perfect = kmemdup(p->perfect, + sizeof(*r) * cp->hash, GFP_KERNEL); + if (!cp->perfect) + goto errout; + } + cp->h = p->h; + + memset(&new_filter_result, 0, sizeof(new_filter_result)); + tcf_exts_init(&new_filter_result.exts, + TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); tcindex_filter_result_init(&cr); if (old_r) cr.res = r->res; if (tb[TCA_TCINDEX_HASH]) - cp.hash = nla_get_u32(tb[TCA_TCINDEX_HASH]); + cp->hash = nla_get_u32(tb[TCA_TCINDEX_HASH]); if (tb[TCA_TCINDEX_MASK]) - cp.mask = nla_get_u16(tb[TCA_TCINDEX_MASK]); + cp->mask = nla_get_u16(tb[TCA_TCINDEX_MASK]); if (tb[TCA_TCINDEX_SHIFT]) - cp.shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]); + cp->shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]); err = -EBUSY; + /* Hash already allocated, make sure that we still meet the * requirements for the allocated hash. */ - if (cp.perfect) { - if (!valid_perfect_hash(&cp) || - cp.hash > cp.alloc_hash) + if (cp->perfect) { + if (!valid_perfect_hash(cp) || + cp->hash > cp->alloc_hash) goto errout; - } else if (cp.h && cp.hash != cp.alloc_hash) + } else if (cp->h && cp->hash != cp->alloc_hash) { goto errout; + } err = -EINVAL; if (tb[TCA_TCINDEX_FALL_THROUGH]) - cp.fall_through = nla_get_u32(tb[TCA_TCINDEX_FALL_THROUGH]); + cp->fall_through = nla_get_u32(tb[TCA_TCINDEX_FALL_THROUGH]); - if (!cp.hash) { + if (!cp->hash) { /* Hash not specified, use perfect hash if the upper limit * of the hashing index is below the threshold. */ - if ((cp.mask >> cp.shift) < PERFECT_HASH_THRESHOLD) - cp.hash = (cp.mask >> cp.shift) + 1; + if ((cp->mask >> cp->shift) < PERFECT_HASH_THRESHOLD) + cp->hash = (cp->mask >> cp->shift) + 1; else - cp.hash = DEFAULT_HASH_SIZE; + cp->hash = DEFAULT_HASH_SIZE; } - if (!cp.perfect && !cp.h) - cp.alloc_hash = cp.hash; + if (!cp->perfect && cp->h) + cp->alloc_hash = cp->hash; /* Note: this could be as restrictive as if (handle & ~(mask >> shift)) * but then, we'd fail handles that may become valid after some future * mask change. While this is extremely unlikely to ever matter, * the check below is safer (and also more backwards-compatible). */ - if (cp.perfect || valid_perfect_hash(&cp)) - if (handle >= cp.alloc_hash) + if (cp->perfect || valid_perfect_hash(cp)) + if (handle >= cp->alloc_hash) goto errout; err = -ENOMEM; - if (!cp.perfect && !cp.h) { - if (valid_perfect_hash(&cp)) { + if (!cp->perfect && !cp->h) { + if (valid_perfect_hash(cp)) { int i; - cp.perfect = kcalloc(cp.hash, sizeof(*r), GFP_KERNEL); - if (!cp.perfect) + cp->perfect = kcalloc(cp->hash, sizeof(*r), GFP_KERNEL); + if (!cp->perfect) goto errout; - for (i = 0; i < cp.hash; i++) - tcf_exts_init(&cp.perfect[i].exts, TCA_TCINDEX_ACT, + for (i = 0; i < cp->hash; i++) + tcf_exts_init(&cp->perfect[i].exts, + TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); balloc = 1; } else { - cp.h = kcalloc(cp.hash, sizeof(f), GFP_KERNEL); - if (!cp.h) + struct tcindex_filter __rcu **hash; + + hash = kcalloc(cp->hash, + sizeof(struct tcindex_filter *), + GFP_KERNEL); + + if (!hash) goto errout; + + cp->h = hash; balloc = 2; } } - if (cp.perfect) - r = cp.perfect + handle; + if (cp->perfect) + r = cp->perfect + handle; else - r = tcindex_lookup(&cp, handle) ? : &new_filter_result; + r = tcindex_lookup(cp, handle) ? : &new_filter_result; if (r == &new_filter_result) { f = kzalloc(sizeof(*f), GFP_KERNEL); @@ -307,33 +366,41 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, else tcf_exts_change(tp, &cr.exts, &e); - tcf_tree_lock(tp); if (old_r && old_r != r) tcindex_filter_result_init(old_r); - memcpy(p, &cp, sizeof(cp)); + oldp = p; r->res = cr.res; + rcu_assign_pointer(tp->root, cp); if (r == &new_filter_result) { - struct tcindex_filter **fp; + struct tcindex_filter *nfp; + struct tcindex_filter __rcu **fp; f->key = handle; f->result = new_filter_result; f->next = NULL; - for (fp = p->h+(handle % p->hash); *fp; fp = &(*fp)->next) - /* nothing */; - *fp = f; + + fp = p->h + (handle % p->hash); + for (nfp = rtnl_dereference(*fp); + nfp; + fp = &nfp->next, nfp = rtnl_dereference(*fp)) + ; /* nothing */ + + rcu_assign_pointer(*fp, f); } - tcf_tree_unlock(tp); + if (oldp) + call_rcu(&oldp->rcu, __tcindex_partial_destroy); return 0; errout_alloc: if (balloc == 1) - kfree(cp.perfect); + kfree(cp->perfect); else if (balloc == 2) - kfree(cp.h); + kfree(cp->h); errout: + kfree(cp); tcf_exts_destroy(tp, &e); return err; } @@ -345,7 +412,7 @@ tcindex_change(struct net *net, struct sk_buff *in_skb, { struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_TCINDEX_MAX + 1]; - struct tcindex_data *p = tp->root; + struct tcindex_data *p = rtnl_dereference(tp->root); struct tcindex_filter_result *r = (struct tcindex_filter_result *) *arg; int err; @@ -364,10 +431,9 @@ tcindex_change(struct net *net, struct sk_buff *in_skb, tca[TCA_RATE], ovr); } - static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker) { - struct tcindex_data *p = tp->root; + struct tcindex_data *p = rtnl_dereference(tp->root); struct tcindex_filter *f, *next; int i; @@ -390,8 +456,8 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker) if (!p->h) return; for (i = 0; i < p->hash; i++) { - for (f = p->h[i]; f; f = next) { - next = f->next; + for (f = rtnl_dereference(p->h[i]); f; f = next) { + next = rtnl_dereference(f->next); if (walker->count >= walker->skip) { if (walker->fn(tp, (unsigned long) &f->result, walker) < 0) { @@ -404,17 +470,9 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker) } } - -static int tcindex_destroy_element(struct tcf_proto *tp, - unsigned long arg, struct tcf_walker *walker) -{ - return __tcindex_delete(tp, arg, 0); -} - - static void tcindex_destroy(struct tcf_proto *tp) { - struct tcindex_data *p = tp->root; + struct tcindex_data *p = rtnl_dereference(tp->root); struct tcf_walker walker; pr_debug("tcindex_destroy(tp %p),p %p\n", tp, p); @@ -422,17 +480,16 @@ static void tcindex_destroy(struct tcf_proto *tp) walker.skip = 0; walker.fn = tcindex_destroy_element; tcindex_walk(tp, &walker); - kfree(p->perfect); - kfree(p->h); - kfree(p); - tp->root = NULL; + + RCU_INIT_POINTER(tp->root, NULL); + call_rcu(&p->rcu, __tcindex_destroy); } static int tcindex_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, struct sk_buff *skb, struct tcmsg *t) { - struct tcindex_data *p = tp->root; + struct tcindex_data *p = rtnl_dereference(tp->root); struct tcindex_filter_result *r = (struct tcindex_filter_result *) fh; unsigned char *b = skb_tail_pointer(skb); struct nlattr *nest; @@ -455,15 +512,18 @@ static int tcindex_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, nla_nest_end(skb, nest); } else { if (p->perfect) { - t->tcm_handle = r-p->perfect; + t->tcm_handle = r - p->perfect; } else { struct tcindex_filter *f; + struct tcindex_filter __rcu **fp; int i; t->tcm_handle = 0; for (i = 0; !t->tcm_handle && i < p->hash; i++) { - for (f = p->h[i]; !t->tcm_handle && f; - f = f->next) { + fp = &p->h[i]; + for (f = rtnl_dereference(*fp); + !t->tcm_handle && f; + fp = &f->next, f = rtnl_dereference(*fp)) { if (&f->result == r) t->tcm_handle = f->key; } -- cgit v1.2.3 From a80e49e2cc3145af014a8ae44f575829cc236192 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 16 Aug 2014 17:47:18 +0200 Subject: nohz: Move nohz full init call to tick init This way we unbloat a bit main.c and more importantly we initialize nohz full after init_IRQ(). This dependency will be needed in further patches because nohz full needs irq work to raise its own IRQ. Information about the support for this ability on ARM64 is obtained on init_IRQ() which initialize the pointer to __smp_call_function. Since tick_init() is called right after init_IRQ(), this is a good place to call tick_nohz_init() and prepare for that dependency. Acked-by: Peter Zijlstra (Intel) Cc: Ingo Molnar Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Frederic Weisbecker --- include/linux/tick.h | 2 -- init/main.c | 1 - kernel/time/tick-common.c | 1 + kernel/time/tick-internal.h | 7 +++++++ 4 files changed, 8 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tick.h b/include/linux/tick.h index 9a82c7dc3fdd..595ee86f5e0d 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -181,14 +181,12 @@ static inline bool tick_nohz_full_cpu(int cpu) return cpumask_test_cpu(cpu, tick_nohz_full_mask); } -extern void tick_nohz_init(void); extern void __tick_nohz_full_check(void); extern void tick_nohz_full_kick(void); extern void tick_nohz_full_kick_cpu(int cpu); extern void tick_nohz_full_kick_all(void); extern void __tick_nohz_task_switch(struct task_struct *tsk); #else -static inline void tick_nohz_init(void) { } static inline bool tick_nohz_full_enabled(void) { return false; } static inline bool tick_nohz_full_cpu(int cpu) { return false; } static inline void __tick_nohz_full_check(void) { } diff --git a/init/main.c b/init/main.c index bb1aed928f21..8af2f1abfe38 100644 --- a/init/main.c +++ b/init/main.c @@ -577,7 +577,6 @@ asmlinkage __visible void __init start_kernel(void) local_irq_disable(); idr_init_cache(); rcu_init(); - tick_nohz_init(); context_tracking_init(); radix_tree_init(); /* init some links before init_ISA_irqs() */ diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 0a0608edeb26..052b4b53c3d6 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -400,4 +400,5 @@ void tick_resume(void) void __init tick_init(void) { tick_broadcast_init(); + tick_nohz_init(); } diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index c19c1d84b6f3..366aeb4f2c66 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -99,6 +99,13 @@ static inline int tick_broadcast_oneshot_active(void) { return 0; } static inline bool tick_broadcast_oneshot_available(void) { return false; } #endif /* !TICK_ONESHOT */ +/* NO_HZ_FULL internal */ +#ifdef CONFIG_NO_HZ_FULL +extern void tick_nohz_init(void); +# else +static inline void tick_nohz_init(void) { } +#endif + /* * Broadcasting support */ -- cgit v1.2.3 From c5c38ef3d70377dc504a6a3f611a3ec814bc757b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 6 Sep 2014 15:43:02 +0200 Subject: irq_work: Introduce arch_irq_work_has_interrupt() The nohz full code needs irq work to trigger its own interrupt so that the subsystem can work even when the tick is stopped. Lets introduce arch_irq_work_has_interrupt() that archs can override to tell about their support for this ability. Signed-off-by: Peter Zijlstra Cc: Ingo Molnar Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Frederic Weisbecker --- arch/alpha/include/asm/Kbuild | 1 + arch/arc/include/asm/Kbuild | 1 + arch/arm/include/asm/Kbuild | 1 + arch/arm64/include/asm/Kbuild | 3 ++- arch/avr32/include/asm/Kbuild | 1 + arch/blackfin/include/asm/Kbuild | 1 + arch/c6x/include/asm/Kbuild | 1 + arch/cris/include/asm/Kbuild | 1 + arch/frv/include/asm/Kbuild | 1 + arch/hexagon/include/asm/Kbuild | 1 + arch/ia64/include/asm/Kbuild | 1 + arch/m32r/include/asm/Kbuild | 1 + arch/m68k/include/asm/Kbuild | 1 + arch/metag/include/asm/Kbuild | 1 + arch/microblaze/include/asm/Kbuild | 1 + arch/mips/include/asm/Kbuild | 1 + arch/mn10300/include/asm/Kbuild | 1 + arch/openrisc/include/asm/Kbuild | 1 + arch/parisc/include/asm/Kbuild | 1 + arch/powerpc/include/asm/Kbuild | 1 + arch/s390/include/asm/Kbuild | 1 + arch/score/include/asm/Kbuild | 1 + arch/sh/include/asm/Kbuild | 1 + arch/sparc/include/asm/Kbuild | 1 + arch/tile/include/asm/Kbuild | 1 + arch/um/include/asm/Kbuild | 1 + arch/unicore32/include/asm/Kbuild | 1 + arch/x86/include/asm/Kbuild | 1 + arch/xtensa/include/asm/Kbuild | 1 + include/asm-generic/irq_work.h | 10 ++++++++++ include/linux/irq_work.h | 2 ++ 31 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 include/asm-generic/irq_work.h (limited to 'include/linux') diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild index e858aa0ad8af..a52cbf178c3a 100644 --- a/arch/alpha/include/asm/Kbuild +++ b/arch/alpha/include/asm/Kbuild @@ -4,6 +4,7 @@ generic-y += clkdev.h generic-y += cputime.h generic-y += exec.h generic-y += hash.h +generic-y += irq_work.h generic-y += mcs_spinlock.h generic-y += preempt.h generic-y += scatterlist.h diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild index e76fd79f32b0..b8fffc1a2ac2 100644 --- a/arch/arc/include/asm/Kbuild +++ b/arch/arc/include/asm/Kbuild @@ -18,6 +18,7 @@ generic-y += ioctl.h generic-y += ioctls.h generic-y += ipcbuf.h generic-y += irq_regs.h +generic-y += irq_work.h generic-y += kmap_types.h generic-y += kvm_para.h generic-y += local.h diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild index 70cd84eb7fda..202905e7ea0c 100644 --- a/arch/arm/include/asm/Kbuild +++ b/arch/arm/include/asm/Kbuild @@ -11,6 +11,7 @@ generic-y += hash.h generic-y += ioctl.h generic-y += ipcbuf.h generic-y += irq_regs.h +generic-y += irq_work.h generic-y += kdebug.h generic-y += local.h generic-y += local64.h diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index 0b3fcf86e6ba..d617789b1ebd 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -9,8 +9,8 @@ generic-y += current.h generic-y += delay.h generic-y += div64.h generic-y += dma.h -generic-y += emergency-restart.h generic-y += early_ioremap.h +generic-y += emergency-restart.h generic-y += errno.h generic-y += ftrace.h generic-y += hash.h @@ -19,6 +19,7 @@ generic-y += ioctl.h generic-y += ioctls.h generic-y += ipcbuf.h generic-y += irq_regs.h +generic-y += irq_work.h generic-y += kdebug.h generic-y += kmap_types.h generic-y += kvm_para.h diff --git a/arch/avr32/include/asm/Kbuild b/arch/avr32/include/asm/Kbuild index 00a0f3ccd6eb..2a71b1cb9848 100644 --- a/arch/avr32/include/asm/Kbuild +++ b/arch/avr32/include/asm/Kbuild @@ -9,6 +9,7 @@ generic-y += exec.h generic-y += futex.h generic-y += hash.h generic-y += irq_regs.h +generic-y += irq_work.h generic-y += local.h generic-y += local64.h generic-y += mcs_spinlock.h diff --git a/arch/blackfin/include/asm/Kbuild b/arch/blackfin/include/asm/Kbuild index 0d93b9a79ca9..46ed6bb9c679 100644 --- a/arch/blackfin/include/asm/Kbuild +++ b/arch/blackfin/include/asm/Kbuild @@ -15,6 +15,7 @@ generic-y += hw_irq.h generic-y += ioctl.h generic-y += ipcbuf.h generic-y += irq_regs.h +generic-y += irq_work.h generic-y += kdebug.h generic-y += kmap_types.h generic-y += kvm_para.h diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild index 8dbdce8421b0..e77e0c1dbe75 100644 --- a/arch/c6x/include/asm/Kbuild +++ b/arch/c6x/include/asm/Kbuild @@ -22,6 +22,7 @@ generic-y += ioctl.h generic-y += ioctls.h generic-y += ipcbuf.h generic-y += irq_regs.h +generic-y += irq_work.h generic-y += kdebug.h generic-y += kmap_types.h generic-y += local.h diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild index 31742dfadff9..802b94c4ca86 100644 --- a/arch/cris/include/asm/Kbuild +++ b/arch/cris/include/asm/Kbuild @@ -8,6 +8,7 @@ generic-y += clkdev.h generic-y += cputime.h generic-y += exec.h generic-y += hash.h +generic-y += irq_work.h generic-y += kvm_para.h generic-y += linkage.h generic-y += mcs_spinlock.h diff --git a/arch/frv/include/asm/Kbuild b/arch/frv/include/asm/Kbuild index 5b73921b6e9d..3caf05cabfc5 100644 --- a/arch/frv/include/asm/Kbuild +++ b/arch/frv/include/asm/Kbuild @@ -3,6 +3,7 @@ generic-y += clkdev.h generic-y += cputime.h generic-y += exec.h generic-y += hash.h +generic-y += irq_work.h generic-y += mcs_spinlock.h generic-y += preempt.h generic-y += scatterlist.h diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild index 0e69796b58c7..5f234a5a2320 100644 --- a/arch/hexagon/include/asm/Kbuild +++ b/arch/hexagon/include/asm/Kbuild @@ -23,6 +23,7 @@ generic-y += ioctls.h generic-y += iomap.h generic-y += ipcbuf.h generic-y += irq_regs.h +generic-y += irq_work.h generic-y += kdebug.h generic-y += kmap_types.h generic-y += local.h diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild index e8317d2d6c8d..747320be9d0e 100644 --- a/arch/ia64/include/asm/Kbuild +++ b/arch/ia64/include/asm/Kbuild @@ -2,6 +2,7 @@ generic-y += clkdev.h generic-y += exec.h generic-y += hash.h +generic-y += irq_work.h generic-y += kvm_para.h generic-y += mcs_spinlock.h generic-y += preempt.h diff --git a/arch/m32r/include/asm/Kbuild b/arch/m32r/include/asm/Kbuild index accc10a3dc78..e02448b0648b 100644 --- a/arch/m32r/include/asm/Kbuild +++ b/arch/m32r/include/asm/Kbuild @@ -3,6 +3,7 @@ generic-y += clkdev.h generic-y += cputime.h generic-y += exec.h generic-y += hash.h +generic-y += irq_work.h generic-y += mcs_spinlock.h generic-y += module.h generic-y += preempt.h diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild index c67c94a2d672..dbaf9f3065e8 100644 --- a/arch/m68k/include/asm/Kbuild +++ b/arch/m68k/include/asm/Kbuild @@ -11,6 +11,7 @@ generic-y += hw_irq.h generic-y += ioctl.h generic-y += ipcbuf.h generic-y += irq_regs.h +generic-y += irq_work.h generic-y += kdebug.h generic-y += kmap_types.h generic-y += kvm_para.h diff --git a/arch/metag/include/asm/Kbuild b/arch/metag/include/asm/Kbuild index c29ead89a317..7b8111c8f937 100644 --- a/arch/metag/include/asm/Kbuild +++ b/arch/metag/include/asm/Kbuild @@ -19,6 +19,7 @@ generic-y += ioctl.h generic-y += ioctls.h generic-y += ipcbuf.h generic-y += irq_regs.h +generic-y += irq_work.h generic-y += kdebug.h generic-y += kmap_types.h generic-y += kvm_para.h diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild index 27a3acda6c19..448143b8cabd 100644 --- a/arch/microblaze/include/asm/Kbuild +++ b/arch/microblaze/include/asm/Kbuild @@ -5,6 +5,7 @@ generic-y += cputime.h generic-y += device.h generic-y += exec.h generic-y += hash.h +generic-y += irq_work.h generic-y += mcs_spinlock.h generic-y += preempt.h generic-y += scatterlist.h diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild index 335e5290ec75..57012ef1f51e 100644 --- a/arch/mips/include/asm/Kbuild +++ b/arch/mips/include/asm/Kbuild @@ -3,6 +3,7 @@ generic-y += cputime.h generic-y += current.h generic-y += emergency-restart.h generic-y += hash.h +generic-y += irq_work.h generic-y += local64.h generic-y += mcs_spinlock.h generic-y += mutex.h diff --git a/arch/mn10300/include/asm/Kbuild b/arch/mn10300/include/asm/Kbuild index ecbd6676bd33..77eb1a68d13b 100644 --- a/arch/mn10300/include/asm/Kbuild +++ b/arch/mn10300/include/asm/Kbuild @@ -4,6 +4,7 @@ generic-y += clkdev.h generic-y += cputime.h generic-y += exec.h generic-y += hash.h +generic-y += irq_work.h generic-y += mcs_spinlock.h generic-y += preempt.h generic-y += scatterlist.h diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild index 480af0d9c2f5..89b61d7dc790 100644 --- a/arch/openrisc/include/asm/Kbuild +++ b/arch/openrisc/include/asm/Kbuild @@ -31,6 +31,7 @@ generic-y += ioctl.h generic-y += ioctls.h generic-y += ipcbuf.h generic-y += irq_regs.h +generic-y += irq_work.h generic-y += kdebug.h generic-y += kmap_types.h generic-y += kvm_para.h diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild index ecf25e6678ad..ffb024b8423f 100644 --- a/arch/parisc/include/asm/Kbuild +++ b/arch/parisc/include/asm/Kbuild @@ -10,6 +10,7 @@ generic-y += exec.h generic-y += hash.h generic-y += hw_irq.h generic-y += irq_regs.h +generic-y += irq_work.h generic-y += kdebug.h generic-y += kvm_para.h generic-y += local.h diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild index 7f23f162ce9c..31e8f59aff38 100644 --- a/arch/powerpc/include/asm/Kbuild +++ b/arch/powerpc/include/asm/Kbuild @@ -1,6 +1,7 @@ generic-y += clkdev.h generic-y += hash.h +generic-y += irq_work.h generic-y += mcs_spinlock.h generic-y += preempt.h generic-y += rwsem.h diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild index b3fea0722ff1..773f86676588 100644 --- a/arch/s390/include/asm/Kbuild +++ b/arch/s390/include/asm/Kbuild @@ -2,6 +2,7 @@ generic-y += clkdev.h generic-y += hash.h +generic-y += irq_work.h generic-y += mcs_spinlock.h generic-y += preempt.h generic-y += scatterlist.h diff --git a/arch/score/include/asm/Kbuild b/arch/score/include/asm/Kbuild index aad209199f7e..1909d2a5b82f 100644 --- a/arch/score/include/asm/Kbuild +++ b/arch/score/include/asm/Kbuild @@ -6,6 +6,7 @@ generic-y += barrier.h generic-y += clkdev.h generic-y += cputime.h generic-y += hash.h +generic-y += irq_work.h generic-y += mcs_spinlock.h generic-y += preempt.h generic-y += scatterlist.h diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild index c19e47dacb31..5a6c9acff0d2 100644 --- a/arch/sh/include/asm/Kbuild +++ b/arch/sh/include/asm/Kbuild @@ -12,6 +12,7 @@ generic-y += hash.h generic-y += ioctl.h generic-y += ipcbuf.h generic-y += irq_regs.h +generic-y += irq_work.h generic-y += kvm_para.h generic-y += local.h generic-y += local64.h diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild index cdd1b447bb6c..f5f94ce1692c 100644 --- a/arch/sparc/include/asm/Kbuild +++ b/arch/sparc/include/asm/Kbuild @@ -8,6 +8,7 @@ generic-y += emergency-restart.h generic-y += exec.h generic-y += hash.h generic-y += irq_regs.h +generic-y += irq_work.h generic-y += linkage.h generic-y += local.h generic-y += local64.h diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild index 0aa5675e7025..e6462b8a6284 100644 --- a/arch/tile/include/asm/Kbuild +++ b/arch/tile/include/asm/Kbuild @@ -17,6 +17,7 @@ generic-y += ioctl.h generic-y += ioctls.h generic-y += ipcbuf.h generic-y += irq_regs.h +generic-y += irq_work.h generic-y += local.h generic-y += local64.h generic-y += mcs_spinlock.h diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild index 7bd64aa2e94a..244b12c8cb39 100644 --- a/arch/um/include/asm/Kbuild +++ b/arch/um/include/asm/Kbuild @@ -14,6 +14,7 @@ generic-y += hash.h generic-y += hw_irq.h generic-y += io.h generic-y += irq_regs.h +generic-y += irq_work.h generic-y += kdebug.h generic-y += mcs_spinlock.h generic-y += mutex.h diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild index 1e5fb872a4aa..5a2bb53faa42 100644 --- a/arch/unicore32/include/asm/Kbuild +++ b/arch/unicore32/include/asm/Kbuild @@ -22,6 +22,7 @@ generic-y += ioctl.h generic-y += ioctls.h generic-y += ipcbuf.h generic-y += irq_regs.h +generic-y += irq_work.h generic-y += kdebug.h generic-y += kmap_types.h generic-y += local.h diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index 3bf000fab0ae..8fa909cc6553 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -7,5 +7,6 @@ genhdr-y += unistd_x32.h generic-y += clkdev.h generic-y += cputime.h generic-y += early_ioremap.h +generic-y += irq_work.h generic-y += mcs_spinlock.h generic-y += scatterlist.h diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild index c3d20ba6eb86..105d38922c44 100644 --- a/arch/xtensa/include/asm/Kbuild +++ b/arch/xtensa/include/asm/Kbuild @@ -12,6 +12,7 @@ generic-y += hardirq.h generic-y += hash.h generic-y += ioctl.h generic-y += irq_regs.h +generic-y += irq_work.h generic-y += kdebug.h generic-y += kmap_types.h generic-y += kvm_para.h diff --git a/include/asm-generic/irq_work.h b/include/asm-generic/irq_work.h new file mode 100644 index 000000000000..a44f452c6590 --- /dev/null +++ b/include/asm-generic/irq_work.h @@ -0,0 +1,10 @@ +#ifndef __ASM_IRQ_WORK_H +#define __ASM_IRQ_WORK_H + +static inline bool arch_irq_work_has_interrupt(void) +{ + return false; +} + +#endif /* __ASM_IRQ_WORK_H */ + diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h index bf9422c3aefe..6b47b2ede405 100644 --- a/include/linux/irq_work.h +++ b/include/linux/irq_work.h @@ -42,6 +42,8 @@ void irq_work_run(void); void irq_work_sync(struct irq_work *work); #ifdef CONFIG_IRQ_WORK +#include + bool irq_work_needs_cpu(void); #else static inline bool irq_work_needs_cpu(void) { return false; } -- cgit v1.2.3 From 76a33061b9323b7fdb220ae5fa116c10833ec22e Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 16 Aug 2014 18:37:19 +0200 Subject: irq_work: Force raised irq work to run on irq work interrupt The nohz full kick, which restarts the tick when any resource depend on it, can't be executed anywhere given the operation it does on timers. If it is called from the scheduler or timers code, chances are that we run into a deadlock. This is why we run the nohz full kick from an irq work. That way we make sure that the kick runs on a virgin context. However if that's the case when irq work runs in its own dedicated self-ipi, things are different for the big bunch of archs that don't support the self triggered way. In order to support them, irq works are also handled by the timer interrupt as fallback. Now when irq works run on the timer interrupt, the context isn't blank. More precisely, they can run in the context of the hrtimer that runs the tick. But the nohz kick cancels and restarts this hrtimer and cancelling an hrtimer from itself isn't allowed. This is why we run in an endless loop: Kernel panic - not syncing: Watchdog detected hard LOCKUP on cpu 2 CPU: 2 PID: 7538 Comm: kworker/u8:8 Not tainted 3.16.0+ #34 Workqueue: btrfs-endio-write normal_work_helper [btrfs] ffff880244c06c88 000000001b486fe1 ffff880244c06bf0 ffffffff8a7f1e37 ffffffff8ac52a18 ffff880244c06c78 ffffffff8a7ef928 0000000000000010 ffff880244c06c88 ffff880244c06c20 000000001b486fe1 0000000000000000 Call Trace: ] dump_stack+0x4e/0x7a [] panic+0xd4/0x207 [] watchdog_overflow_callback+0x118/0x120 [] __perf_event_overflow+0xae/0x350 [] ? perf_event_task_disable+0xa0/0xa0 [] ? x86_perf_event_set_period+0xbf/0x150 [] perf_event_overflow+0x14/0x20 [] intel_pmu_handle_irq+0x206/0x410 [] perf_event_nmi_handler+0x2b/0x50 [] nmi_handle+0xd2/0x390 [] ? nmi_handle+0x5/0x390 [] ? match_held_lock+0x8/0x1b0 [] default_do_nmi+0x72/0x1c0 [] do_nmi+0xb8/0x100 [] end_repeat_nmi+0x1e/0x2e [] ? match_held_lock+0x8/0x1b0 [] ? match_held_lock+0x8/0x1b0 [] ? match_held_lock+0x8/0x1b0 <] lock_acquired+0xaf/0x450 [] ? lock_hrtimer_base.isra.20+0x25/0x50 [] _raw_spin_lock_irqsave+0x78/0x90 [] ? lock_hrtimer_base.isra.20+0x25/0x50 [] lock_hrtimer_base.isra.20+0x25/0x50 [] hrtimer_try_to_cancel+0x33/0x1e0 [] hrtimer_cancel+0x1a/0x30 [] tick_nohz_restart+0x17/0x90 [] __tick_nohz_full_check+0xc3/0x100 [] nohz_full_kick_work_func+0xe/0x10 [] irq_work_run_list+0x44/0x70 [] irq_work_run+0x2a/0x50 [] update_process_times+0x5b/0x70 [] tick_sched_handle.isra.21+0x25/0x60 [] tick_sched_timer+0x41/0x60 [] __run_hrtimer+0x72/0x470 [] ? tick_sched_do_timer+0xb0/0xb0 [] hrtimer_interrupt+0x117/0x270 [] local_apic_timer_interrupt+0x37/0x60 [] smp_apic_timer_interrupt+0x3f/0x50 [] apic_timer_interrupt+0x6f/0x80 To fix this we force non-lazy irq works to run on irq work self-IPIs when available. That ability of the arch to trigger irq work self IPIs is available with arch_irq_work_has_interrupt(). Reported-by: Catalin Iacob Reported-by: Dave Jones Acked-by: Peter Zijlstra (Intel) Cc: Ingo Molnar Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Frederic Weisbecker --- include/linux/irq_work.h | 1 + kernel/irq_work.c | 15 +++++++++++++-- kernel/time/timer.c | 2 +- 3 files changed, 15 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h index 6b47b2ede405..bf3fe719c7ce 100644 --- a/include/linux/irq_work.h +++ b/include/linux/irq_work.h @@ -39,6 +39,7 @@ bool irq_work_queue_on(struct irq_work *work, int cpu); #endif void irq_work_run(void); +void irq_work_tick(void); void irq_work_sync(struct irq_work *work); #ifdef CONFIG_IRQ_WORK diff --git a/kernel/irq_work.c b/kernel/irq_work.c index e6bcbe756663..385b85aded19 100644 --- a/kernel/irq_work.c +++ b/kernel/irq_work.c @@ -115,8 +115,10 @@ bool irq_work_needs_cpu(void) raised = &__get_cpu_var(raised_list); lazy = &__get_cpu_var(lazy_list); - if (llist_empty(raised) && llist_empty(lazy)) - return false; + + if (llist_empty(raised) || arch_irq_work_has_interrupt()) + if (llist_empty(lazy)) + return false; /* All work should have been flushed before going offline */ WARN_ON_ONCE(cpu_is_offline(smp_processor_id())); @@ -171,6 +173,15 @@ void irq_work_run(void) } EXPORT_SYMBOL_GPL(irq_work_run); +void irq_work_tick(void) +{ + struct llist_head *raised = &__get_cpu_var(raised_list); + + if (!llist_empty(raised) && !arch_irq_work_has_interrupt()) + irq_work_run_list(raised); + irq_work_run_list(&__get_cpu_var(lazy_list)); +} + /* * Synchronize against the irq_work @entry, ensures the entry is not * currently in use. diff --git a/kernel/time/timer.c b/kernel/time/timer.c index aca5dfe2fa3d..9bbb8344ed3b 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1385,7 +1385,7 @@ void update_process_times(int user_tick) rcu_check_callbacks(cpu, user_tick); #ifdef CONFIG_IRQ_WORK if (in_irq()) - irq_work_run(); + irq_work_tick(); #endif scheduler_tick(); run_posix_cpu_timers(p); -- cgit v1.2.3 From 23d0db76ffa13ffb95229946e4648568c3c29db5 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 13 Sep 2014 11:24:03 -0700 Subject: Make hash_64() use a 64-bit multiply when appropriate The hash_64() function historically does the multiply by the GOLDEN_RATIO_PRIME_64 number with explicit shifts and adds, because unlike the 32-bit case, gcc seems unable to turn the constant multiply into the more appropriate shift and adds when required. However, that means that we generate those shifts and adds even when the architecture has a fast multiplier, and could just do it better in hardware. Use the now-cleaned-up CONFIG_ARCH_HAS_FAST_MULTIPLIER (together with "is it a 64-bit architecture") to decide whether to use an integer multiply or the explicit sequence of shift/add instructions. Signed-off-by: Linus Torvalds --- include/linux/hash.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hash.h b/include/linux/hash.h index bd1754c7ecef..d0494c399392 100644 --- a/include/linux/hash.h +++ b/include/linux/hash.h @@ -37,6 +37,9 @@ static __always_inline u64 hash_64(u64 val, unsigned int bits) { u64 hash = val; +#if defined(CONFIG_ARCH_HAS_FAST_MULTIPLIER) && BITS_PER_LONG == 64 + hash = hash * GOLDEN_RATIO_PRIME_64; +#else /* Sigh, gcc can't optimise this alone like it does for 32 bits. */ u64 n = hash; n <<= 18; @@ -51,6 +54,7 @@ static __always_inline u64 hash_64(u64 val, unsigned int bits) hash += n; n <<= 2; hash += n; +#endif /* High bits are more random, so use them. */ return hash >> (64 - bits); -- cgit v1.2.3 From 6c555490e0ce885a9caf0a045db69382a3ccbc9c Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Thu, 11 Sep 2014 15:35:09 -0700 Subject: ipv6: drop useless rcu_read_lock() in anycast These code is now protected by rtnl lock, rcu read lock is useless now. Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++-- net/core/dev.c | 14 ++++++++------ net/ipv6/anycast.c | 18 ++++++------------ 3 files changed, 16 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ae721f53739e..ee38b948d9a0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2083,8 +2083,8 @@ void __dev_remove_pack(struct packet_type *pt); void dev_add_offload(struct packet_offload *po); void dev_remove_offload(struct packet_offload *po); -struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short flags, - unsigned short mask); +struct net_device *__dev_get_by_flags(struct net *net, unsigned short flags, + unsigned short mask); struct net_device *dev_get_by_name(struct net *net, const char *name); struct net_device *dev_get_by_name_rcu(struct net *net, const char *name); struct net_device *__dev_get_by_name(struct net *net, const char *name); diff --git a/net/core/dev.c b/net/core/dev.c index b3d6dbc0c696..e916ba8caccf 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -897,23 +897,25 @@ struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type) EXPORT_SYMBOL(dev_getfirstbyhwtype); /** - * dev_get_by_flags_rcu - find any device with given flags + * __dev_get_by_flags - find any device with given flags * @net: the applicable net namespace * @if_flags: IFF_* values * @mask: bitmask of bits in if_flags to check * * Search for any interface with the given flags. Returns NULL if a device * is not found or a pointer to the device. Must be called inside - * rcu_read_lock(), and result refcount is unchanged. + * rtnl_lock(), and result refcount is unchanged. */ -struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short if_flags, - unsigned short mask) +struct net_device *__dev_get_by_flags(struct net *net, unsigned short if_flags, + unsigned short mask) { struct net_device *dev, *ret; + ASSERT_RTNL(); + ret = NULL; - for_each_netdev_rcu(net, dev) { + for_each_netdev(net, dev) { if (((dev->flags ^ if_flags) & mask) == 0) { ret = dev; break; @@ -921,7 +923,7 @@ struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short if_flags } return ret; } -EXPORT_SYMBOL(dev_get_by_flags_rcu); +EXPORT_SYMBOL(__dev_get_by_flags); /** * dev_valid_name - check if name is okay for network device diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index ff2de7d9d8e6..3b0429bc6b5a 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -78,7 +78,6 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) pac->acl_addr = *addr; rtnl_lock(); - rcu_read_lock(); if (ifindex == 0) { struct rt6_info *rt; @@ -91,11 +90,11 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) goto error; } else { /* router, no matching interface: just pick one */ - dev = dev_get_by_flags_rcu(net, IFF_UP, - IFF_UP | IFF_LOOPBACK); + dev = __dev_get_by_flags(net, IFF_UP, + IFF_UP | IFF_LOOPBACK); } } else - dev = dev_get_by_index_rcu(net, ifindex); + dev = __dev_get_by_index(net, ifindex); if (dev == NULL) { err = -ENODEV; @@ -137,7 +136,6 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) } error: - rcu_read_unlock(); rtnl_unlock(); if (pac) sock_kfree_s(sk, pac, sizeof(*pac)); @@ -174,11 +172,9 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) spin_unlock_bh(&ipv6_sk_ac_lock); rtnl_lock(); - rcu_read_lock(); - dev = dev_get_by_index_rcu(net, pac->acl_ifindex); + dev = __dev_get_by_index(net, pac->acl_ifindex); if (dev) ipv6_dev_ac_dec(dev, &pac->acl_addr); - rcu_read_unlock(); rtnl_unlock(); sock_kfree_s(sk, pac, sizeof(*pac)); @@ -203,12 +199,11 @@ void ipv6_sock_ac_close(struct sock *sk) prev_index = 0; rtnl_lock(); - rcu_read_lock(); while (pac) { struct ipv6_ac_socklist *next = pac->acl_next; if (pac->acl_ifindex != prev_index) { - dev = dev_get_by_index_rcu(net, pac->acl_ifindex); + dev = __dev_get_by_index(net, pac->acl_ifindex); prev_index = pac->acl_ifindex; } if (dev) @@ -216,7 +211,6 @@ void ipv6_sock_ac_close(struct sock *sk) sock_kfree_s(sk, pac, sizeof(*pac)); pac = next; } - rcu_read_unlock(); rtnl_unlock(); } @@ -341,7 +335,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr) return 0; } -/* called with rcu_read_lock() */ +/* called with rtnl_lock() */ static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr) { struct inet6_dev *idev = __in6_dev_get(dev); -- cgit v1.2.3 From 233577a22089facf5271ab5e845b2262047c971f Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Fri, 12 Sep 2014 14:04:43 +0200 Subject: net: filter: constify detection of pkt_type_offset Currently we have 2 pkt_type_offset functions doing the same thing and spread across the architecture files. Remove those and replace them with a PKT_TYPE_OFFSET macro helper which gets the constant value from a zero sized sk_buff member right in front of the bitfield with offsetof. This new offset marker does not change size of struct sk_buff. Cc: Eric Dumazet Cc: Markos Chandras Cc: Martin Schwidefsky Cc: Daniel Borkmann Cc: Alexei Starovoitov Signed-off-by: Denis Kirjanov Signed-off-by: Hannes Frederic Sowa Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- arch/mips/net/bpf_jit.c | 27 +-------------------------- arch/s390/net/bpf_jit_comp.c | 35 +---------------------------------- include/linux/skbuff.h | 10 ++++++++++ net/core/filter.c | 31 ++----------------------------- 4 files changed, 14 insertions(+), 89 deletions(-) (limited to 'include/linux') diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c index 0e97ccd29fe3..7edc08398c4a 100644 --- a/arch/mips/net/bpf_jit.c +++ b/arch/mips/net/bpf_jit.c @@ -765,27 +765,6 @@ static u64 jit_get_skb_w(struct sk_buff *skb, unsigned offset) return (u64)err << 32 | ntohl(ret); } -#ifdef __BIG_ENDIAN_BITFIELD -#define PKT_TYPE_MAX (7 << 5) -#else -#define PKT_TYPE_MAX 7 -#endif -static int pkt_type_offset(void) -{ - struct sk_buff skb_probe = { - .pkt_type = ~0, - }; - u8 *ct = (u8 *)&skb_probe; - unsigned int off; - - for (off = 0; off < sizeof(struct sk_buff); off++) { - if (ct[off] == PKT_TYPE_MAX) - return off; - } - pr_err_once("Please fix pkt_type_offset(), as pkt_type couldn't be found\n"); - return -1; -} - static int build_body(struct jit_ctx *ctx) { void *load_func[] = {jit_get_skb_b, jit_get_skb_h, jit_get_skb_w}; @@ -1332,11 +1311,7 @@ jmp_cmp: case BPF_ANC | SKF_AD_PKTTYPE: ctx->flags |= SEEN_SKB; - off = pkt_type_offset(); - - if (off < 0) - return -1; - emit_load_byte(r_tmp, r_skb, off, ctx); + emit_load_byte(r_tmp, r_skb, PKT_TYPE_OFFSET(), ctx); /* Keep only the last 3 bits */ emit_andi(r_A, r_tmp, PKT_TYPE_MAX, ctx); #ifdef __BIG_ENDIAN_BITFIELD diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 555f5c7e83ab..c52ac77408ca 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -227,37 +227,6 @@ static void bpf_jit_epilogue(struct bpf_jit *jit) EMIT2(0x07fe); } -/* Helper to find the offset of pkt_type in sk_buff - * Make sure its still a 3bit field starting at the MSBs within a byte. - */ -#define PKT_TYPE_MAX 0xe0 -static int pkt_type_offset; - -static int __init bpf_pkt_type_offset_init(void) -{ - struct sk_buff skb_probe = { - .pkt_type = ~0, - }; - char *ct = (char *)&skb_probe; - int off; - - pkt_type_offset = -1; - for (off = 0; off < sizeof(struct sk_buff); off++) { - if (!ct[off]) - continue; - if (ct[off] == PKT_TYPE_MAX) - pkt_type_offset = off; - else { - /* Found non matching bit pattern, fix needed. */ - WARN_ON_ONCE(1); - pkt_type_offset = -1; - return -1; - } - } - return 0; -} -device_initcall(bpf_pkt_type_offset_init); - /* * make sure we dont leak kernel information to user */ @@ -757,12 +726,10 @@ call_fn: /* lg %r1,(%r13) */ } break; case BPF_ANC | SKF_AD_PKTTYPE: - if (pkt_type_offset < 0) - goto out; /* lhi %r5,0 */ EMIT4(0xa7580000); /* ic %r5,(%r2) */ - EMIT4_DISP(0x43502000, pkt_type_offset); + EMIT4_DISP(0x43502000, PKT_TYPE_OFFSET()); /* srl %r5,5 */ EMIT4_DISP(0x88500000, 5); break; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 07c9fdd0c126..756e3d057e84 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -548,6 +548,16 @@ struct sk_buff { ip_summed:2, nohdr:1, nfctinfo:3; + +/* if you move pkt_type around you also must adapt those constants */ +#ifdef __BIG_ENDIAN_BITFIELD +#define PKT_TYPE_MAX (7 << 5) +#else +#define PKT_TYPE_MAX 7 +#endif +#define PKT_TYPE_OFFSET() offsetof(struct sk_buff, __pkt_type_offset) + + __u8 __pkt_type_offset[0]; __u8 pkt_type:3, fclone:2, ipvs_property:1, diff --git a/net/core/filter.c b/net/core/filter.c index dfc716ffa44b..601f28de7311 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -87,30 +87,6 @@ int sk_filter(struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(sk_filter); -/* Helper to find the offset of pkt_type in sk_buff structure. We want - * to make sure its still a 3bit field starting at a byte boundary; - * taken from arch/x86/net/bpf_jit_comp.c. - */ -#ifdef __BIG_ENDIAN_BITFIELD -#define PKT_TYPE_MAX (7 << 5) -#else -#define PKT_TYPE_MAX 7 -#endif -static unsigned int pkt_type_offset(void) -{ - struct sk_buff skb_probe = { .pkt_type = ~0, }; - u8 *ct = (u8 *) &skb_probe; - unsigned int off; - - for (off = 0; off < sizeof(struct sk_buff); off++) { - if (ct[off] == PKT_TYPE_MAX) - return off; - } - - pr_err_once("Please fix %s, as pkt_type couldn't be found!\n", __func__); - return -1; -} - static u64 __skb_get_pay_offset(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) { return skb_get_poff((struct sk_buff *)(unsigned long) ctx); @@ -190,11 +166,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp, break; case SKF_AD_OFF + SKF_AD_PKTTYPE: - *insn = BPF_LDX_MEM(BPF_B, BPF_REG_A, BPF_REG_CTX, - pkt_type_offset()); - if (insn->off < 0) - return false; - insn++; + *insn++ = BPF_LDX_MEM(BPF_B, BPF_REG_A, BPF_REG_CTX, + PKT_TYPE_OFFSET()); *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, PKT_TYPE_MAX); #ifdef __BIG_ENDIAN_BITFIELD insn++; -- cgit v1.2.3 From 3fc8867740b4a0bf56f372c6f5ddd14970962fb1 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 12 Sep 2014 23:12:46 -0700 Subject: netdevice: Support DSA tagging when DSA is built as a module This change corrects an error seen when DSA tagging is built as a module. Without this change it is not possible to get XDSA tagged frames as the test for tagging is stripped by the #ifdef check. Signed-off-by: Alexander Duyck Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ee38b948d9a0..f9e81d10a3b9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1789,7 +1789,7 @@ void dev_net_set(struct net_device *dev, struct net *net) static inline bool netdev_uses_dsa(struct net_device *dev) { -#ifdef CONFIG_NET_DSA +#if IS_ENABLED(CONFIG_NET_DSA) if (dev->dsa_ptr != NULL) return dsa_uses_tagged_protocol(dev->dsa_ptr); #endif -- cgit v1.2.3 From 9226b5b440f2b4fbb3b797f3cb74a9a627220660 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 14 Sep 2014 17:28:32 -0700 Subject: vfs: avoid non-forwarding large load after small store in path lookup The performance regression that Josef Bacik reported in the pathname lookup (see commit 99d263d4c5b2 "vfs: fix bad hashing of dentries") made me look at performance stability of the dcache code, just to verify that the problem was actually fixed. That turned up a few other problems in this area. There are a few cases where we exit RCU lookup mode and go to the slow serializing case when we shouldn't, Al has fixed those and they'll come in with the next VFS pull. But my performance verification also shows that link_path_walk() turns out to have a very unfortunate 32-bit store of the length and hash of the name we look up, followed by a 64-bit read of the combined hash_len field. That screws up the processor store to load forwarding, causing an unnecessary hickup in this critical routine. It's caused by the ugly calling convention for the "hash_name()" function, and easily fixed by just making hash_name() fill in the whole 'struct qstr' rather than passing it a pointer to just the hash value. With that, the profile for this function looks much smoother. Signed-off-by: Linus Torvalds --- fs/namei.c | 19 ++++++++++--------- include/linux/dcache.h | 1 + 2 files changed, 11 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/fs/namei.c b/fs/namei.c index 229235862e50..2be5120b81b3 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1669,13 +1669,14 @@ EXPORT_SYMBOL(full_name_hash); /* * Calculate the length and hash of the path component, and - * return the length of the component; + * fill in the qstr. return the "len" as the result. */ -static inline unsigned long hash_name(const char *name, unsigned int *hashp) +static inline unsigned long hash_name(const char *name, struct qstr *res) { unsigned long a, b, adata, bdata, mask, hash, len; const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS; + res->name = name; hash = a = 0; len = -sizeof(unsigned long); do { @@ -1691,9 +1692,10 @@ static inline unsigned long hash_name(const char *name, unsigned int *hashp) mask = create_zero_mask(adata | bdata); hash += a & zero_bytemask(mask); - *hashp = fold_hash(hash); + len += find_zero(mask); + res->hash_len = hashlen_create(fold_hash(hash), len); - return len + find_zero(mask); + return len; } #else @@ -1711,18 +1713,19 @@ EXPORT_SYMBOL(full_name_hash); * We know there's a real path component here of at least * one character. */ -static inline unsigned long hash_name(const char *name, unsigned int *hashp) +static inline long hash_name(const char *name, struct qstr *res) { unsigned long hash = init_name_hash(); unsigned long len = 0, c; + res->name = name; c = (unsigned char)*name; do { len++; hash = partial_name_hash(c, hash); c = (unsigned char)name[len]; } while (c && c != '/'); - *hashp = end_name_hash(hash); + res->hash_len = hashlen_create(end_name_hash(hash), len); return len; } @@ -1756,9 +1759,7 @@ static int link_path_walk(const char *name, struct nameidata *nd) if (err) break; - len = hash_name(name, &this.hash); - this.name = name; - this.len = len; + len = hash_name(name, &this); type = LAST_NORM; if (name[0] == '.') switch (len) { diff --git a/include/linux/dcache.h b/include/linux/dcache.h index e4ae2ad48d07..75a227cc7ce2 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -55,6 +55,7 @@ struct qstr { #define QSTR_INIT(n,l) { { { .len = l } }, .name = n } #define hashlen_hash(hashlen) ((u32) (hashlen)) #define hashlen_len(hashlen) ((u32)((hashlen) >> 32)) +#define hashlen_create(hash,len) (((u64)(len)<<32)|(u32)(hash)) struct dentry_stat_t { long nr_dentry; -- cgit v1.2.3 From 1d46fea7d091f9dc2d4fd3fcb9f0117ca288f9a5 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Wed, 27 Aug 2014 00:42:56 +0200 Subject: drm/rcar-du: Use struct videomode in platform data In preparation for DT support where panel timings will be described by a DRM-agnostic video mode, replace the struct drm_mode_modeinfo instance in the panel platform data with a struct videomode. Signed-off-by: Laurent Pinchart --- arch/arm/mach-shmobile/board-koelsch-reference.c | 19 +++++++++---------- arch/arm/mach-shmobile/board-koelsch.c | 19 +++++++++---------- arch/arm/mach-shmobile/board-lager-reference.c | 19 +++++++++---------- arch/arm/mach-shmobile/board-lager.c | 19 +++++++++---------- arch/arm/mach-shmobile/board-marzen.c | 19 +++++++++---------- drivers/gpu/drm/rcar-du/Kconfig | 1 + drivers/gpu/drm/rcar-du/rcar_du_lvdscon.c | 15 +++------------ include/linux/platform_data/rcar-du.h | 4 ++-- 8 files changed, 51 insertions(+), 64 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-shmobile/board-koelsch-reference.c b/arch/arm/mach-shmobile/board-koelsch-reference.c index 3ff88c138896..364e69bf85d4 100644 --- a/arch/arm/mach-shmobile/board-koelsch-reference.c +++ b/arch/arm/mach-shmobile/board-koelsch-reference.c @@ -41,16 +41,15 @@ static struct rcar_du_encoder_data koelsch_du_encoders[] = { .width_mm = 210, .height_mm = 158, .mode = { - .clock = 65000, - .hdisplay = 1024, - .hsync_start = 1048, - .hsync_end = 1184, - .htotal = 1344, - .vdisplay = 768, - .vsync_start = 771, - .vsync_end = 777, - .vtotal = 806, - .flags = 0, + .pixelclock = 65000000, + .hactive = 1024, + .hfront_porch = 20, + .hback_porch = 160, + .hsync_len = 136, + .vactive = 768, + .vfront_porch = 3, + .vback_porch = 29, + .vsync_len = 6, }, }, }, diff --git a/arch/arm/mach-shmobile/board-koelsch.c b/arch/arm/mach-shmobile/board-koelsch.c index b7d5bc7659cd..ad10ddb6a321 100644 --- a/arch/arm/mach-shmobile/board-koelsch.c +++ b/arch/arm/mach-shmobile/board-koelsch.c @@ -63,16 +63,15 @@ static struct rcar_du_encoder_data koelsch_du_encoders[] = { .width_mm = 210, .height_mm = 158, .mode = { - .clock = 65000, - .hdisplay = 1024, - .hsync_start = 1048, - .hsync_end = 1184, - .htotal = 1344, - .vdisplay = 768, - .vsync_start = 771, - .vsync_end = 777, - .vtotal = 806, - .flags = 0, + .pixelclock = 65000000, + .hactive = 1024, + .hfront_porch = 20, + .hback_porch = 160, + .hsync_len = 136, + .vactive = 768, + .vfront_porch = 3, + .vback_porch = 29, + .vsync_len = 6, }, }, }, diff --git a/arch/arm/mach-shmobile/board-lager-reference.c b/arch/arm/mach-shmobile/board-lager-reference.c index 41c808e56005..12a53a1c3d02 100644 --- a/arch/arm/mach-shmobile/board-lager-reference.c +++ b/arch/arm/mach-shmobile/board-lager-reference.c @@ -43,16 +43,15 @@ static struct rcar_du_encoder_data lager_du_encoders[] = { .width_mm = 210, .height_mm = 158, .mode = { - .clock = 65000, - .hdisplay = 1024, - .hsync_start = 1048, - .hsync_end = 1184, - .htotal = 1344, - .vdisplay = 768, - .vsync_start = 771, - .vsync_end = 777, - .vtotal = 806, - .flags = 0, + .pixelclock = 65000000, + .hactive = 1024, + .hfront_porch = 20, + .hback_porch = 160, + .hsync_len = 136, + .vactive = 768, + .vfront_porch = 3, + .vback_porch = 29, + .vsync_len = 6, }, }, }, diff --git a/arch/arm/mach-shmobile/board-lager.c b/arch/arm/mach-shmobile/board-lager.c index e1d8215da0b0..80576c2ee668 100644 --- a/arch/arm/mach-shmobile/board-lager.c +++ b/arch/arm/mach-shmobile/board-lager.c @@ -99,16 +99,15 @@ static struct rcar_du_encoder_data lager_du_encoders[] = { .width_mm = 210, .height_mm = 158, .mode = { - .clock = 65000, - .hdisplay = 1024, - .hsync_start = 1048, - .hsync_end = 1184, - .htotal = 1344, - .vdisplay = 768, - .vsync_start = 771, - .vsync_end = 777, - .vtotal = 806, - .flags = 0, + .pixelclock = 65000000, + .hactive = 1024, + .hfront_porch = 20, + .hback_porch = 160, + .hsync_len = 136, + .vactive = 768, + .vfront_porch = 3, + .vback_porch = 29, + .vsync_len = 6, }, }, }, diff --git a/arch/arm/mach-shmobile/board-marzen.c b/arch/arm/mach-shmobile/board-marzen.c index e5cf4201e769..ce33d7825c49 100644 --- a/arch/arm/mach-shmobile/board-marzen.c +++ b/arch/arm/mach-shmobile/board-marzen.c @@ -192,16 +192,15 @@ static struct rcar_du_encoder_data du_encoders[] = { .width_mm = 210, .height_mm = 158, .mode = { - .clock = 65000, - .hdisplay = 1024, - .hsync_start = 1048, - .hsync_end = 1184, - .htotal = 1344, - .vdisplay = 768, - .vsync_start = 771, - .vsync_end = 777, - .vtotal = 806, - .flags = 0, + .pixelclock = 65000000, + .hactive = 1024, + .hfront_porch = 20, + .hback_porch = 160, + .hsync_len = 136, + .vactive = 768, + .vfront_porch = 3, + .vback_porch = 29, + .vsync_len = 6, }, }, }, diff --git a/drivers/gpu/drm/rcar-du/Kconfig b/drivers/gpu/drm/rcar-du/Kconfig index 2e3d7b5b0ad7..c96f6089f8bf 100644 --- a/drivers/gpu/drm/rcar-du/Kconfig +++ b/drivers/gpu/drm/rcar-du/Kconfig @@ -6,6 +6,7 @@ config DRM_RCAR_DU select DRM_KMS_CMA_HELPER select DRM_GEM_CMA_HELPER select DRM_KMS_FB_HELPER + select VIDEOMODE_HELPERS help Choose this option if you have an R-Car chipset. If M is selected the module will be called rcar-du-drm. diff --git a/drivers/gpu/drm/rcar-du/rcar_du_lvdscon.c b/drivers/gpu/drm/rcar-du/rcar_du_lvdscon.c index cfcf6e74ad0a..d29544121658 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_lvdscon.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_lvdscon.c @@ -40,18 +40,9 @@ static int rcar_du_lvds_connector_get_modes(struct drm_connector *connector) return 0; mode->type = DRM_MODE_TYPE_PREFERRED | DRM_MODE_TYPE_DRIVER; - mode->clock = lvdscon->panel->mode.clock; - mode->hdisplay = lvdscon->panel->mode.hdisplay; - mode->hsync_start = lvdscon->panel->mode.hsync_start; - mode->hsync_end = lvdscon->panel->mode.hsync_end; - mode->htotal = lvdscon->panel->mode.htotal; - mode->vdisplay = lvdscon->panel->mode.vdisplay; - mode->vsync_start = lvdscon->panel->mode.vsync_start; - mode->vsync_end = lvdscon->panel->mode.vsync_end; - mode->vtotal = lvdscon->panel->mode.vtotal; - mode->flags = lvdscon->panel->mode.flags; - - drm_mode_set_name(mode); + + drm_display_mode_from_videomode(&lvdscon->panel->mode, mode); + drm_mode_probed_add(connector, mode); return 1; diff --git a/include/linux/platform_data/rcar-du.h b/include/linux/platform_data/rcar-du.h index 1a2e9901a22e..a5f045e1d8fe 100644 --- a/include/linux/platform_data/rcar-du.h +++ b/include/linux/platform_data/rcar-du.h @@ -14,7 +14,7 @@ #ifndef __RCAR_DU_H__ #define __RCAR_DU_H__ -#include +#include