From 9e6e70f8d8b6698e0017c56b86525aabe9c7cd4c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Mar 2009 14:10:24 -0400 Subject: NFSv4: Support NFSv4 optional attributes in the struct nfs_fattr Currently, filling struct nfs_fattr is more or less an all or nothing operation, since NFSv2 and NFSv3 have only mandatory attributes. In NFSv4, some attributes are optional, and so we may simply not be able to fill in those fields. Furthermore, NFSv4 allows you to specify which attributes you are interested in retrieving, thus permitting you to optimise away retrieval of attributes that you know will no change... Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 48 +++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 41 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 2e5f00066afd..b99295e07cdf 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -27,7 +27,7 @@ static inline int nfs_fsid_equal(const struct nfs_fsid *a, const struct nfs_fsid } struct nfs_fattr { - unsigned short valid; /* which fields are valid */ + unsigned int valid; /* which fields are valid */ __u64 pre_size; /* pre_op_attr.size */ struct timespec pre_mtime; /* pre_op_attr.mtime */ struct timespec pre_ctime; /* pre_op_attr.ctime */ @@ -59,12 +59,46 @@ struct nfs_fattr { unsigned long gencount; }; -#define NFS_ATTR_WCC 0x0001 /* pre-op WCC data */ -#define NFS_ATTR_FATTR 0x0002 /* post-op attributes */ -#define NFS_ATTR_FATTR_V3 0x0004 /* NFSv3 attributes */ -#define NFS_ATTR_FATTR_V4 0x0008 /* NFSv4 change attribute */ -#define NFS_ATTR_WCC_V4 0x0010 /* pre-op change attribute */ -#define NFS_ATTR_FATTR_V4_REFERRAL 0x0020 /* NFSv4 referral */ +#define NFS_ATTR_FATTR_TYPE (1U << 0) +#define NFS_ATTR_FATTR_MODE (1U << 1) +#define NFS_ATTR_FATTR_NLINK (1U << 2) +#define NFS_ATTR_FATTR_OWNER (1U << 3) +#define NFS_ATTR_FATTR_GROUP (1U << 4) +#define NFS_ATTR_FATTR_RDEV (1U << 5) +#define NFS_ATTR_FATTR_SIZE (1U << 6) +#define NFS_ATTR_FATTR_PRESIZE (1U << 7) +#define NFS_ATTR_FATTR_BLOCKS_USED (1U << 8) +#define NFS_ATTR_FATTR_SPACE_USED (1U << 9) +#define NFS_ATTR_FATTR_FSID (1U << 10) +#define NFS_ATTR_FATTR_FILEID (1U << 11) +#define NFS_ATTR_FATTR_ATIME (1U << 12) +#define NFS_ATTR_FATTR_MTIME (1U << 13) +#define NFS_ATTR_FATTR_CTIME (1U << 14) +#define NFS_ATTR_FATTR_PREMTIME (1U << 15) +#define NFS_ATTR_FATTR_PRECTIME (1U << 16) +#define NFS_ATTR_FATTR_CHANGE (1U << 17) +#define NFS_ATTR_FATTR_PRECHANGE (1U << 18) +#define NFS_ATTR_FATTR_V4_REFERRAL (1U << 19) /* NFSv4 referral */ + +#define NFS_ATTR_FATTR (NFS_ATTR_FATTR_TYPE \ + | NFS_ATTR_FATTR_MODE \ + | NFS_ATTR_FATTR_NLINK \ + | NFS_ATTR_FATTR_OWNER \ + | NFS_ATTR_FATTR_GROUP \ + | NFS_ATTR_FATTR_RDEV \ + | NFS_ATTR_FATTR_SIZE \ + | NFS_ATTR_FATTR_FSID \ + | NFS_ATTR_FATTR_FILEID \ + | NFS_ATTR_FATTR_ATIME \ + | NFS_ATTR_FATTR_MTIME \ + | NFS_ATTR_FATTR_CTIME) +#define NFS_ATTR_FATTR_V2 (NFS_ATTR_FATTR \ + | NFS_ATTR_FATTR_BLOCKS_USED) +#define NFS_ATTR_FATTR_V3 (NFS_ATTR_FATTR \ + | NFS_ATTR_FATTR_SPACE_USED) +#define NFS_ATTR_FATTR_V4 (NFS_ATTR_FATTR \ + | NFS_ATTR_FATTR_SPACE_USED \ + | NFS_ATTR_FATTR_CHANGE) /* * Info on the file system -- cgit v1.2.3 From 1ca277d88dafdbc3c5a69d32590e7184b9af6371 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Mar 2009 14:10:25 -0400 Subject: NFS: Shrink the struct nfs_fattr We don't need the bitmap[] field anymore, since the 'valid' field tells us all we need to know about which attributes were filled in... Also move the pre-op attributes in order to improve the structure packing. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 3 --- include/linux/nfs_xdr.h | 7 +++---- 2 files changed, 3 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 7d220da3db36..9f1df8361974 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -3002,9 +3002,6 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, cons if ((status = decode_attr_bitmap(xdr, bitmap)) != 0) goto xdr_error; - fattr->bitmap[0] = bitmap[0]; - fattr->bitmap[1] = bitmap[1]; - if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0) goto xdr_error; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index b99295e07cdf..6013acb0131f 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -28,9 +28,6 @@ static inline int nfs_fsid_equal(const struct nfs_fsid *a, const struct nfs_fsid struct nfs_fattr { unsigned int valid; /* which fields are valid */ - __u64 pre_size; /* pre_op_attr.size */ - struct timespec pre_mtime; /* pre_op_attr.mtime */ - struct timespec pre_ctime; /* pre_op_attr.ctime */ enum nfs_ftype type; /* always use NFSv2 types */ __u32 mode; __u32 nlink; @@ -52,9 +49,11 @@ struct nfs_fattr { struct timespec atime; struct timespec mtime; struct timespec ctime; - __u32 bitmap[2]; /* NFSv4 returned attribute bitmap */ __u64 change_attr; /* NFSv4 change attribute */ __u64 pre_change_attr;/* pre-op NFSv4 change attribute */ + __u64 pre_size; /* pre_op_attr.size */ + struct timespec pre_mtime; /* pre_op_attr.mtime */ + struct timespec pre_ctime; /* pre_op_attr.ctime */ unsigned long time_start; unsigned long gencount; }; -- cgit v1.2.3 From bca794785c2c12ecddeb09e70165b8ff80baa6ae Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Mar 2009 14:10:26 -0400 Subject: NFS: Fix the type of struct nfs_fattr->mode There is no point in using anything other than umode_t, since we copy the content pretty much directly into inode->i_mode. Signed-off-by: Trond Myklebust --- fs/nfs/getroot.c | 4 ++-- fs/nfs/nfs2xdr.c | 7 +++---- fs/nfs/nfs3xdr.c | 31 +++++++++++++------------------ fs/nfs/nfs4xdr.c | 40 +++++++++++++++++++--------------------- include/linux/nfs_xdr.h | 3 +-- 5 files changed, 38 insertions(+), 47 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index b7c9b2df1f29..46177cb87064 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -156,7 +156,7 @@ int nfs4_path_walk(struct nfs_server *server, return ret; } - if (fattr.type != NFDIR) { + if (!S_ISDIR(fattr.mode)) { printk(KERN_ERR "nfs4_get_root:" " getroot encountered non-directory\n"); return -ENOTDIR; @@ -213,7 +213,7 @@ eat_dot_dir: return ret; } - if (fattr.type != NFDIR) { + if (!S_ISDIR(fattr.mode)) { printk(KERN_ERR "nfs4_get_root:" " lookupfh encountered non-directory\n"); return -ENOTDIR; diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index bea99992c302..c862c9340f9a 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -120,8 +120,8 @@ xdr_decode_time(__be32 *p, struct timespec *timep) static __be32 * xdr_decode_fattr(__be32 *p, struct nfs_fattr *fattr) { - u32 rdev; - fattr->type = (enum nfs_ftype) ntohl(*p++); + u32 rdev, type; + type = ntohl(*p++); fattr->mode = ntohl(*p++); fattr->nlink = ntohl(*p++); fattr->uid = ntohl(*p++); @@ -138,8 +138,7 @@ xdr_decode_fattr(__be32 *p, struct nfs_fattr *fattr) p = xdr_decode_time(p, &fattr->ctime); fattr->valid |= NFS_ATTR_FATTR_V2; fattr->rdev = new_decode_dev(rdev); - if (fattr->type == NFCHR && rdev == NFS2_FIFO_DEV) { - fattr->type = NFFIFO; + if (type == NFCHR && rdev == NFS2_FIFO_DEV) { fattr->mode = (fattr->mode & ~S_IFMT) | S_IFIFO; fattr->rdev = 0; } diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index c0f7d02aced9..e6a1932c7110 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -91,19 +91,15 @@ /* * Map file type to S_IFMT bits */ -static struct { - unsigned int mode; - unsigned int nfs2type; -} nfs_type2fmt[] = { - { 0, NFNON }, - { S_IFREG, NFREG }, - { S_IFDIR, NFDIR }, - { S_IFBLK, NFBLK }, - { S_IFCHR, NFCHR }, - { S_IFLNK, NFLNK }, - { S_IFSOCK, NFSOCK }, - { S_IFIFO, NFFIFO }, - { 0, NFBAD } +static const umode_t nfs_type2fmt[] = { + [NF3BAD] = 0, + [NF3REG] = S_IFREG, + [NF3DIR] = S_IFDIR, + [NF3BLK] = S_IFBLK, + [NF3CHR] = S_IFCHR, + [NF3LNK] = S_IFLNK, + [NF3SOCK] = S_IFSOCK, + [NF3FIFO] = S_IFIFO, }; /* @@ -148,13 +144,12 @@ static __be32 * xdr_decode_fattr(__be32 *p, struct nfs_fattr *fattr) { unsigned int type, major, minor; - int fmode; + umode_t fmode; type = ntohl(*p++); - if (type >= NF3BAD) - type = NF3BAD; - fmode = nfs_type2fmt[type].mode; - fattr->type = nfs_type2fmt[type].nfs2type; + if (type > NF3FIFO) + type = NF3NON; + fmode = nfs_type2fmt[type]; fattr->mode = (ntohl(*p++) & ~S_IFMT) | fmode; fattr->nlink = ntohl(*p++); fattr->uid = ntohl(*p++); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 9f1df8361974..c1906d2a226b 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -522,20 +522,17 @@ static int nfs4_stat_to_errno(int); decode_lookup_maxsz + \ decode_fs_locations_maxsz) -static struct { - unsigned int mode; - unsigned int nfs2type; -} nfs_type2fmt[] = { - { 0, NFNON }, - { S_IFREG, NFREG }, - { S_IFDIR, NFDIR }, - { S_IFBLK, NFBLK }, - { S_IFCHR, NFCHR }, - { S_IFLNK, NFLNK }, - { S_IFSOCK, NFSOCK }, - { S_IFIFO, NFFIFO }, - { 0, NFNON }, - { 0, NFNON }, +static const umode_t nfs_type2fmt[] = { + [NF4BAD] = 0, + [NF4REG] = S_IFREG, + [NF4DIR] = S_IFDIR, + [NF4BLK] = S_IFBLK, + [NF4CHR] = S_IFCHR, + [NF4LNK] = S_IFLNK, + [NF4SOCK] = S_IFSOCK, + [NF4FIFO] = S_IFIFO, + [NF4ATTRDIR] = 0, + [NF4NAMEDATTR] = 0, }; struct compound_hdr { @@ -2173,7 +2170,7 @@ static int decode_attr_type(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t * } bitmap[0] &= ~FATTR4_WORD0_TYPE; } - dprintk("%s: type=0%o\n", __func__, nfs_type2fmt[*type].nfs2type); + dprintk("%s: type=0%o\n", __func__, nfs_type2fmt[*type]); return 0; } @@ -2580,8 +2577,9 @@ static int decode_attr_maxwrite(struct xdr_stream *xdr, uint32_t *bitmap, uint32 return status; } -static int decode_attr_mode(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *mode) +static int decode_attr_mode(struct xdr_stream *xdr, uint32_t *bitmap, umode_t *mode) { + uint32_t tmp; __be32 *p; *mode = 0; @@ -2589,8 +2587,8 @@ static int decode_attr_mode(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t * return -EIO; if (likely(bitmap[1] & FATTR4_WORD1_MODE)) { READ_BUF(4); - READ32(*mode); - *mode &= ~S_IFMT; + READ32(tmp); + *mode = tmp & ~S_IFMT; bitmap[1] &= ~FATTR4_WORD1_MODE; } dprintk("%s: file mode=0%o\n", __func__, (unsigned int)*mode); @@ -2994,7 +2992,8 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, cons uint32_t attrlen, bitmap[2] = {0}, type; - int status, fmode = 0; + int status; + umode_t fmode = 0; uint64_t fileid; if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) @@ -3008,8 +3007,7 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, cons if ((status = decode_attr_type(xdr, bitmap, &type)) != 0) goto xdr_error; - fattr->type = nfs_type2fmt[type].nfs2type; - fattr->mode = nfs_type2fmt[type].mode; + fattr->mode = nfs_type2fmt[type]; if ((status = decode_attr_change(xdr, bitmap, &fattr->change_attr)) != 0) goto xdr_error; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 6013acb0131f..0691b9c188d9 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -28,8 +28,7 @@ static inline int nfs_fsid_equal(const struct nfs_fsid *a, const struct nfs_fsid struct nfs_fattr { unsigned int valid; /* which fields are valid */ - enum nfs_ftype type; /* always use NFSv2 types */ - __u32 mode; + umode_t mode; __u32 nlink; __u32 uid; __u32 gid; -- cgit v1.2.3 From a65318bf3afc93ce49227e849d213799b072c5fd Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Mar 2009 14:10:28 -0400 Subject: NFSv4: Simplify some cache consistency post-op GETATTRs Certain asynchronous operations such as write() do not expect (or care) that other metadata such as the file owner, mode, acls, ... change. All they want to do is update and/or check the change attribute, ctime, and mtime. By skipping the file owner and group update, we also avoid having to do a potential idmapper upcall for these asynchronous RPC calls. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 13 ++++++++----- include/linux/nfs_fs_sb.h | 5 +++++ 2 files changed, 13 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index aa433d077945..101f5f4c304f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1439,7 +1439,7 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait) if (calldata->arg.seqid == NULL) goto out_free_calldata; calldata->arg.fmode = 0; - calldata->arg.bitmask = server->attr_bitmask; + calldata->arg.bitmask = server->cache_consistency_bitmask; calldata->res.fattr = &calldata->fattr; calldata->res.seqid = calldata->arg.seqid; calldata->res.server = server; @@ -1600,6 +1600,9 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f server->caps |= NFS_CAP_HARDLINKS; if (res.has_symlinks != 0) server->caps |= NFS_CAP_SYMLINKS; + memcpy(server->cache_consistency_bitmask, res.attr_bitmask, sizeof(server->cache_consistency_bitmask)); + server->cache_consistency_bitmask[0] &= FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE; + server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY; server->acl_bitmask = res.acl_bitmask; } return status; @@ -2079,7 +2082,7 @@ static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct inode *dir) struct nfs_removeargs *args = msg->rpc_argp; struct nfs_removeres *res = msg->rpc_resp; - args->bitmask = server->attr_bitmask; + args->bitmask = server->cache_consistency_bitmask; res->server = server; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE]; } @@ -2323,7 +2326,7 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, .pages = &page, .pgbase = 0, .count = count, - .bitmask = NFS_SERVER(dentry->d_inode)->attr_bitmask, + .bitmask = NFS_SERVER(dentry->d_inode)->cache_consistency_bitmask, }; struct nfs4_readdir_res res; struct rpc_message msg = { @@ -2552,7 +2555,7 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag { struct nfs_server *server = NFS_SERVER(data->inode); - data->args.bitmask = server->attr_bitmask; + data->args.bitmask = server->cache_consistency_bitmask; data->res.server = server; data->timestamp = jiffies; @@ -2575,7 +2578,7 @@ static void nfs4_proc_commit_setup(struct nfs_write_data *data, struct rpc_messa { struct nfs_server *server = NFS_SERVER(data->inode); - data->args.bitmask = server->attr_bitmask; + data->args.bitmask = server->cache_consistency_bitmask; data->res.server = server; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT]; } diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 9bb81aec91cf..29b1e40dce99 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -106,6 +106,11 @@ struct nfs_server { u32 attr_bitmask[2];/* V4 bitmask representing the set of attributes supported on this filesystem */ + u32 cache_consistency_bitmask[2]; + /* V4 bitmask representing the subset + of change attribute, size, ctime + and mtime attributes supported by + the server */ u32 acl_bitmask; /* V4 bitmask representing the ACEs that are supported on this filesystem */ -- cgit v1.2.3 From fb8a1f11b64e213d94dfa1cebb2a42a7b8c115c4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Mar 2009 14:10:29 -0400 Subject: NFS: cleanup - remove struct nfs_inode->ncommit Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 1 - fs/nfs/write.c | 25 ++++++++++++++++--------- include/linux/nfs_fs.h | 3 +-- 3 files changed, 17 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index b7656bd3706f..00f116cdadc6 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1345,7 +1345,6 @@ static void init_once(void *foo) INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC); - nfsi->ncommit = 0; nfsi->npages = 0; atomic_set(&nfsi->silly_count, 1); INIT_HLIST_HEAD(&nfsi->silly_list); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 9f9845859fc1..1a999939fedf 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -404,7 +404,6 @@ nfs_mark_request_commit(struct nfs_page *req) struct nfs_inode *nfsi = NFS_I(inode); spin_lock(&inode->i_lock); - nfsi->ncommit++; set_bit(PG_CLEAN, &(req)->wb_flags); radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, @@ -523,6 +522,12 @@ static void nfs_cancel_commit_list(struct list_head *head) } } +static int +nfs_need_commit(struct nfs_inode *nfsi) +{ + return radix_tree_tagged(&nfsi->nfs_page_tree, NFS_PAGE_TAG_COMMIT); +} + #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) /* * nfs_scan_commit - Scan an inode for commit requests @@ -538,16 +543,18 @@ static int nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages) { struct nfs_inode *nfsi = NFS_I(inode); - int res = 0; - if (nfsi->ncommit != 0) { - res = nfs_scan_list(nfsi, dst, idx_start, npages, - NFS_PAGE_TAG_COMMIT); - nfsi->ncommit -= res; - } - return res; + if (!nfs_need_commit(nfsi)) + return 0; + + return nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT); } #else +static inline int nfs_need_commit(struct nfs_inode *nfsi) +{ + return 0; +} + static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages) { return 0; @@ -820,7 +827,7 @@ static int nfs_write_rpcsetup(struct nfs_page *req, data->args.stable = NFS_UNSTABLE; if (how & FLUSH_STABLE) { data->args.stable = NFS_DATA_SYNC; - if (!NFS_I(inode)->ncommit) + if (!nfs_need_commit(NFS_I(inode))) data->args.stable = NFS_FILE_SYNC; } diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index db867b04ac3c..c9fecd3e8f0f 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -166,8 +166,7 @@ struct nfs_inode { */ struct radix_tree_root nfs_page_tree; - unsigned long ncommit, - npages; + unsigned long npages; /* Open contexts for shared mmap writes */ struct list_head open_files; -- cgit v1.2.3 From 72cb77f4a5ace37b12dcb47a0e8637a2c28ad881 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Mar 2009 14:10:30 -0400 Subject: NFS: Throttle page dirtying while we're flushing to disk The following patch is a combination of a patch by myself and Peter Staubach. Trond: If we allow other processes to dirty pages while a process is doing a consistency sync to disk, we can end up never making progress. Peter: Attached is a patch which addresses a continuing problem with the NFS client generating out of order WRITE requests. While this is compliant with all of the current protocol specifications, there are servers in the market which can not handle out of order WRITE requests very well. Also, this may lead to sub-optimal block allocations in the underlying file system on the server. This may cause the read throughputs to be reduced when reading the file from the server. Peter: There has been a lot of work recently done to address out of order issues on a systemic level. However, the NFS client is still susceptible to the problem. Out of order WRITE requests can occur when pdflush is in the middle of writing out pages while the process dirtying the pages calls generic_file_buffered_write which calls generic_perform_write which calls balance_dirty_pages_rate_limited which ends up calling writeback_inodes which ends up calling back into the NFS client to writes out dirty pages for the same file that pdflush happens to be working with. Signed-off-by: Peter Staubach [modification by Trond to merge the two similar patches] Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 9 +++++++++ fs/nfs/inode.c | 12 ++++++++++++ fs/nfs/internal.h | 1 + fs/nfs/nfs4proc.c | 10 +--------- fs/nfs/pagelist.c | 11 ----------- fs/nfs/write.c | 28 +++++++++++++++++++--------- include/linux/nfs_fs.h | 1 + 7 files changed, 43 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 90f292b520d2..404c19c866a7 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -354,6 +354,15 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping, file->f_path.dentry->d_name.name, mapping->host->i_ino, len, (long long) pos); + /* + * Prevent starvation issues if someone is doing a consistency + * sync-to-disk + */ + ret = wait_on_bit(&NFS_I(mapping->host)->flags, NFS_INO_FLUSHING, + nfs_wait_bit_killable, TASK_KILLABLE); + if (ret) + return ret; + page = grab_cache_page_write_begin(mapping, index, flags); if (!page) return -ENOMEM; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 00f116cdadc6..c40adc5dd609 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -65,6 +65,18 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr) return nfs_fileid_to_ino_t(fattr->fileid); } +/** + * nfs_wait_bit_killable - helper for functions that are sleeping on bit locks + * @word: long word containing the bit lock + */ +int nfs_wait_bit_killable(void *word) +{ + if (fatal_signal_pending(current)) + return -ERESTARTSYS; + schedule(); + return 0; +} + /** * nfs_compat_user_ino64 - returns the user-visible inode number * @fileid: 64-bit fileid diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 340ede8f608f..a55e69aa52e5 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -165,6 +165,7 @@ extern void nfs_clear_inode(struct inode *); extern void nfs4_clear_inode(struct inode *); #endif void nfs_zap_acl_cache(struct inode *inode); +extern int nfs_wait_bit_killable(void *word); /* super.c */ void nfs_parse_ip_address(char *, size_t, struct sockaddr *, size_t *); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 101f5f4c304f..95f171e7e05a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -193,14 +193,6 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent kunmap_atomic(start, KM_USER0); } -static int nfs4_wait_bit_killable(void *word) -{ - if (fatal_signal_pending(current)) - return -ERESTARTSYS; - schedule(); - return 0; -} - static int nfs4_wait_clnt_recover(struct nfs_client *clp) { int res; @@ -208,7 +200,7 @@ static int nfs4_wait_clnt_recover(struct nfs_client *clp) might_sleep(); res = wait_on_bit(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING, - nfs4_wait_bit_killable, TASK_KILLABLE); + nfs_wait_bit_killable, TASK_KILLABLE); return res; } diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 7f079209d70a..e2975939126a 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -176,17 +176,6 @@ void nfs_release_request(struct nfs_page *req) kref_put(&req->wb_kref, nfs_free_request); } -static int nfs_wait_bit_killable(void *word) -{ - int ret = 0; - - if (fatal_signal_pending(current)) - ret = -ERESTARTSYS; - else - schedule(); - return ret; -} - /** * nfs_wait_on_request - Wait for a request to complete. * @req: request to wait upon. diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 1a999939fedf..36fd35e0de83 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -313,19 +313,34 @@ static int nfs_writepages_callback(struct page *page, struct writeback_control * int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) { struct inode *inode = mapping->host; + unsigned long *bitlock = &NFS_I(inode)->flags; struct nfs_pageio_descriptor pgio; int err; + /* Stop dirtying of new pages while we sync */ + err = wait_on_bit_lock(bitlock, NFS_INO_FLUSHING, + nfs_wait_bit_killable, TASK_KILLABLE); + if (err) + goto out_err; + nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES); nfs_pageio_init_write(&pgio, inode, wb_priority(wbc)); err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio); nfs_pageio_complete(&pgio); + + clear_bit_unlock(NFS_INO_FLUSHING, bitlock); + smp_mb__after_clear_bit(); + wake_up_bit(bitlock, NFS_INO_FLUSHING); + if (err < 0) - return err; - if (pgio.pg_error < 0) - return pgio.pg_error; + goto out_err; + err = pgio.pg_error; + if (err < 0) + goto out_err; return 0; +out_err: + return err; } /* @@ -1432,18 +1447,13 @@ static int nfs_write_mapping(struct address_space *mapping, int how) { struct writeback_control wbc = { .bdi = mapping->backing_dev_info, - .sync_mode = WB_SYNC_NONE, + .sync_mode = WB_SYNC_ALL, .nr_to_write = LONG_MAX, .range_start = 0, .range_end = LLONG_MAX, .for_writepages = 1, }; - int ret; - ret = __nfs_write_mapping(mapping, &wbc, how); - if (ret < 0) - return ret; - wbc.sync_mode = WB_SYNC_ALL; return __nfs_write_mapping(mapping, &wbc, how); } diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index c9fecd3e8f0f..933bc261c0df 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -206,6 +206,7 @@ struct nfs_inode { #define NFS_INO_STALE (1) /* possible stale inode */ #define NFS_INO_ACL_LRU_SET (2) /* Inode is on the LRU list */ #define NFS_INO_MOUNTPOINT (3) /* inode is remote mountpoint */ +#define NFS_INO_FLUSHING (4) /* inode is flushing out data */ static inline struct nfs_inode *NFS_I(const struct inode *inode) { -- cgit v1.2.3 From 441e3e242903f9b190d5764bed73edb58f977413 Mon Sep 17 00:00:00 2001 From: Tom Talpey Date: Wed, 11 Mar 2009 14:37:56 -0400 Subject: SUNRPC: dynamically load RPC transport modules on-demand Provide an api to attempt to load any necessary kernel RPC client transport module automatically. By convention, the desired module name is "xprt"+"transport name". For example, when NFS mounting with "-o proto=rdma", attempt to load the "xprtrdma" module. Signed-off-by: Tom Talpey Cc: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 1 + net/sunrpc/xprt.c | 31 +++++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 11fc71d50c1e..2b0d960603b9 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -235,6 +235,7 @@ static inline __be32 *xprt_skip_transport_header(struct rpc_xprt *xprt, __be32 * */ int xprt_register_transport(struct xprt_class *type); int xprt_unregister_transport(struct xprt_class *type); +int xprt_load_transport(const char *); void xprt_set_retrans_timeout_def(struct rpc_task *task); void xprt_set_retrans_timeout_rtt(struct rpc_task *task); void xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 62098d101a1f..d1afec640394 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -151,6 +151,37 @@ out: } EXPORT_SYMBOL_GPL(xprt_unregister_transport); +/** + * xprt_load_transport - load a transport implementation + * @transport_name: transport to load + * + * Returns: + * 0: transport successfully loaded + * -ENOENT: transport module not available + */ +int xprt_load_transport(const char *transport_name) +{ + struct xprt_class *t; + char module_name[sizeof t->name + 5]; + int result; + + result = 0; + spin_lock(&xprt_list_lock); + list_for_each_entry(t, &xprt_list, list) { + if (strcmp(t->name, transport_name) == 0) { + spin_unlock(&xprt_list_lock); + goto out; + } + } + spin_unlock(&xprt_list_lock); + strcpy(module_name, "xprt"); + strncat(module_name, transport_name, sizeof t->name); + result = request_module(module_name); +out: + return result; +} +EXPORT_SYMBOL_GPL(xprt_load_transport); + /** * xprt_reserve_xprt - serialize write access to transports * @task: task that is requesting access to the transport -- cgit v1.2.3 From 7d1e8255cf959fba7ee2317550dfde39f0b936ae Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Mar 2009 14:38:03 -0400 Subject: SUNRPC: Add the equivalent of the linger and linger2 timeouts to RPC sockets This fixes a regression against FreeBSD servers as reported by Tomas Kasparek. Apparently when using RPC over a TCP socket, the FreeBSD servers don't ever react to the client closing the socket, and so commit e06799f958bf7f9f8fae15f0c6f519953fb0257c (SUNRPC: Use shutdown() instead of close() when disconnecting a TCP socket) causes the setup to hang forever whenever the client attempts to close and then reconnect. We break the deadlock by adding a 'linger2' style timeout to the socket, after which, the client will abort the connection using a TCP 'RST'. The default timeout is set to 15 seconds. A subsequent patch will put it under user control by means of a systctl. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 1 + net/sunrpc/xprtsock.c | 98 +++++++++++++++++++++++++++++++++++++-------- 2 files changed, 82 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 2b0d960603b9..1758d9f5b5c3 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -260,6 +260,7 @@ void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie); #define XPRT_BOUND (4) #define XPRT_BINDING (5) #define XPRT_CLOSING (6) +#define XPRT_CONNECTION_ABORT (7) static inline void xprt_set_connected(struct rpc_xprt *xprt) { diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 2e070679ab4a..b51f58b95c39 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -49,6 +49,8 @@ unsigned int xprt_tcp_slot_table_entries = RPC_DEF_SLOT_TABLE; unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT; unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT; +#define XS_TCP_LINGER_TO (15U * HZ) + /* * We can register our own files under /proc/sys/sunrpc by * calling register_sysctl_table() again. The files in that @@ -806,6 +808,7 @@ static void xs_close(struct rpc_xprt *xprt) xs_reset_transport(transport); smp_mb__before_clear_bit(); + clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); clear_bit(XPRT_CLOSE_WAIT, &xprt->state); clear_bit(XPRT_CLOSING, &xprt->state); smp_mb__after_clear_bit(); @@ -1133,6 +1136,47 @@ out: read_unlock(&sk->sk_callback_lock); } +/* + * Do the equivalent of linger/linger2 handling for dealing with + * broken servers that don't close the socket in a timely + * fashion + */ +static void xs_tcp_schedule_linger_timeout(struct rpc_xprt *xprt, + unsigned long timeout) +{ + struct sock_xprt *transport; + + if (xprt_test_and_set_connecting(xprt)) + return; + set_bit(XPRT_CONNECTION_ABORT, &xprt->state); + transport = container_of(xprt, struct sock_xprt, xprt); + queue_delayed_work(rpciod_workqueue, &transport->connect_worker, + timeout); +} + +static void xs_tcp_cancel_linger_timeout(struct rpc_xprt *xprt) +{ + struct sock_xprt *transport; + + transport = container_of(xprt, struct sock_xprt, xprt); + + if (!test_bit(XPRT_CONNECTION_ABORT, &xprt->state) || + !cancel_delayed_work(&transport->connect_worker)) + return; + clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); + xprt_clear_connecting(xprt); +} + +static void xs_sock_mark_closed(struct rpc_xprt *xprt) +{ + smp_mb__before_clear_bit(); + clear_bit(XPRT_CLOSE_WAIT, &xprt->state); + clear_bit(XPRT_CLOSING, &xprt->state); + smp_mb__after_clear_bit(); + /* Mark transport as closed and wake up all pending tasks */ + xprt_disconnect_done(xprt); +} + /** * xs_tcp_state_change - callback to handle TCP socket state changes * @sk: socket whose state has changed @@ -1178,6 +1222,7 @@ static void xs_tcp_state_change(struct sock *sk) clear_bit(XPRT_CONNECTED, &xprt->state); clear_bit(XPRT_CLOSE_WAIT, &xprt->state); smp_mb__after_clear_bit(); + xs_tcp_schedule_linger_timeout(xprt, XS_TCP_LINGER_TO); break; case TCP_CLOSE_WAIT: /* The server initiated a shutdown of the socket */ @@ -1194,17 +1239,14 @@ static void xs_tcp_state_change(struct sock *sk) break; case TCP_LAST_ACK: set_bit(XPRT_CLOSING, &xprt->state); + xs_tcp_schedule_linger_timeout(xprt, XS_TCP_LINGER_TO); smp_mb__before_clear_bit(); clear_bit(XPRT_CONNECTED, &xprt->state); smp_mb__after_clear_bit(); break; case TCP_CLOSE: - smp_mb__before_clear_bit(); - clear_bit(XPRT_CLOSE_WAIT, &xprt->state); - clear_bit(XPRT_CLOSING, &xprt->state); - smp_mb__after_clear_bit(); - /* Mark transport as closed and wake up all pending tasks */ - xprt_disconnect_done(xprt); + xs_tcp_cancel_linger_timeout(xprt); + xs_sock_mark_closed(xprt); } out: read_unlock(&sk->sk_callback_lock); @@ -1562,8 +1604,8 @@ static void xs_udp_connect_worker4(struct work_struct *work) xs_udp_finish_connecting(xprt, sock); status = 0; out: - xprt_wake_pending_tasks(xprt, status); xprt_clear_connecting(xprt); + xprt_wake_pending_tasks(xprt, status); } /** @@ -1604,8 +1646,8 @@ static void xs_udp_connect_worker6(struct work_struct *work) xs_udp_finish_connecting(xprt, sock); status = 0; out: - xprt_wake_pending_tasks(xprt, status); xprt_clear_connecting(xprt); + xprt_wake_pending_tasks(xprt, status); } /* @@ -1626,7 +1668,9 @@ static void xs_abort_connection(struct rpc_xprt *xprt, struct sock_xprt *transpo memset(&any, 0, sizeof(any)); any.sa_family = AF_UNSPEC; result = kernel_connect(transport->sock, &any, sizeof(any), 0); - if (result) + if (!result) + xs_sock_mark_closed(xprt); + else dprintk("RPC: AF_UNSPEC connect return code %d\n", result); } @@ -1702,6 +1746,7 @@ static void xs_tcp_connect_worker4(struct work_struct *work) goto out; if (!sock) { + clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); /* start from scratch */ if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) { dprintk("RPC: can't create TCP transport socket (%d).\n", -err); @@ -1713,10 +1758,18 @@ static void xs_tcp_connect_worker4(struct work_struct *work) sock_release(sock); goto out; } - } else + } else { + int abort_and_exit; + + abort_and_exit = test_and_clear_bit(XPRT_CONNECTION_ABORT, + &xprt->state); /* "close" the socket, preserving the local port */ xs_tcp_reuse_connection(xprt, transport); + if (abort_and_exit) + goto out_eagain; + } + dprintk("RPC: worker connecting xprt %p to address: %s\n", xprt, xprt->address_strings[RPC_DISPLAY_ALL]); @@ -1732,17 +1785,18 @@ static void xs_tcp_connect_worker4(struct work_struct *work) case 0: case -EINPROGRESS: case -EALREADY: - goto out_clear; + xprt_clear_connecting(xprt); + return; } /* get rid of existing socket, and retry */ xs_tcp_shutdown(xprt); printk("%s: connect returned unhandled error %d\n", __func__, status); +out_eagain: status = -EAGAIN; out: - xprt_wake_pending_tasks(xprt, status); -out_clear: xprt_clear_connecting(xprt); + xprt_wake_pending_tasks(xprt, status); } /** @@ -1763,6 +1817,7 @@ static void xs_tcp_connect_worker6(struct work_struct *work) goto out; if (!sock) { + clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); /* start from scratch */ if ((err = sock_create_kern(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) { dprintk("RPC: can't create TCP transport socket (%d).\n", -err); @@ -1774,10 +1829,18 @@ static void xs_tcp_connect_worker6(struct work_struct *work) sock_release(sock); goto out; } - } else + } else { + int abort_and_exit; + + abort_and_exit = test_and_clear_bit(XPRT_CONNECTION_ABORT, + &xprt->state); /* "close" the socket, preserving the local port */ xs_tcp_reuse_connection(xprt, transport); + if (abort_and_exit) + goto out_eagain; + } + dprintk("RPC: worker connecting xprt %p to address: %s\n", xprt, xprt->address_strings[RPC_DISPLAY_ALL]); @@ -1792,17 +1855,18 @@ static void xs_tcp_connect_worker6(struct work_struct *work) case 0: case -EINPROGRESS: case -EALREADY: - goto out_clear; + xprt_clear_connecting(xprt); + return; } /* get rid of existing socket, and retry */ xs_tcp_shutdown(xprt); printk("%s: connect returned unhandled error %d\n", __func__, status); +out_eagain: status = -EAGAIN; out: - xprt_wake_pending_tasks(xprt, status); -out_clear: xprt_clear_connecting(xprt); + xprt_wake_pending_tasks(xprt, status); } /** -- cgit v1.2.3 From 7fe5c398fc2186ed586db11106a6692d871d0d58 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 19 Mar 2009 15:35:50 -0400 Subject: NFS: Optimise NFS close() Close-to-open cache consistency rules really only require us to flush out writes on calls to close(), and require us to revalidate attributes on the very last close of the file. Currently we appear to be doing a lot of extra attribute revalidation and cache flushes. Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 11 ++--------- fs/nfs/inode.c | 41 +++++++++++++++++++++++++++++------------ fs/nfs/internal.h | 3 +++ fs/nfs/nfs3proc.c | 1 + fs/nfs/nfs4proc.c | 10 ++++++++++ fs/nfs/proc.c | 1 + include/linux/nfs_xdr.h | 1 + 7 files changed, 47 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 1eab9c9ad242..d451073c4947 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -137,9 +137,6 @@ nfs_file_release(struct inode *inode, struct file *filp) dentry->d_parent->d_name.name, dentry->d_name.name); - /* Ensure that dirty pages are flushed out with the right creds */ - if (filp->f_mode & FMODE_WRITE) - nfs_wb_all(dentry->d_inode); nfs_inc_stats(inode, NFSIOS_VFSRELEASE); return nfs_release(inode, filp); } @@ -231,7 +228,6 @@ nfs_file_flush(struct file *file, fl_owner_t id) struct nfs_open_context *ctx = nfs_file_open_context(file); struct dentry *dentry = file->f_path.dentry; struct inode *inode = dentry->d_inode; - int status; dprintk("NFS: flush(%s/%s)\n", dentry->d_parent->d_name.name, @@ -241,11 +237,8 @@ nfs_file_flush(struct file *file, fl_owner_t id) return 0; nfs_inc_stats(inode, NFSIOS_VFSFLUSH); - /* Ensure that data+attribute caches are up to date after close() */ - status = nfs_do_fsync(ctx, inode); - if (!status) - nfs_revalidate_inode(NFS_SERVER(inode), inode); - return status; + /* Flush writes to the server and return any errors */ + return nfs_do_fsync(ctx, inode); } static ssize_t diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index c40adc5dd609..a834d1d850b7 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -541,6 +541,32 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) return err; } +/** + * nfs_close_context - Common close_context() routine NFSv2/v3 + * @ctx: pointer to context + * @is_sync: is this a synchronous close + * + * always ensure that the attributes are up to date if we're mounted + * with close-to-open semantics + */ +void nfs_close_context(struct nfs_open_context *ctx, int is_sync) +{ + struct inode *inode; + struct nfs_server *server; + + if (!(ctx->mode & FMODE_WRITE)) + return; + if (!is_sync) + return; + inode = ctx->path.dentry->d_inode; + if (!list_empty(&NFS_I(inode)->open_files)) + return; + server = NFS_SERVER(inode); + if (server->flags & NFS_MOUNT_NOCTO) + return; + nfs_revalidate_inode(server, inode); +} + static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, struct dentry *dentry, struct rpc_cred *cred) { struct nfs_open_context *ctx; @@ -567,24 +593,15 @@ struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx) return ctx; } -static void __put_nfs_open_context(struct nfs_open_context *ctx, int wait) +static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync) { - struct inode *inode; + struct inode *inode = ctx->path.dentry->d_inode; - if (ctx == NULL) - return; - - inode = ctx->path.dentry->d_inode; if (!atomic_dec_and_lock(&ctx->count, &inode->i_lock)) return; list_del(&ctx->list); spin_unlock(&inode->i_lock); - if (ctx->state != NULL) { - if (wait) - nfs4_close_sync(&ctx->path, ctx->state, ctx->mode); - else - nfs4_close_state(&ctx->path, ctx->state, ctx->mode); - } + NFS_PROTO(inode)->close_context(ctx, is_sync); if (ctx->cred != NULL) put_rpccred(ctx->cred); path_put(&ctx->path); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index a55e69aa52e5..2041f68ff1cc 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -152,6 +152,9 @@ extern __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus); extern struct rpc_procinfo nfs4_procedures[]; #endif +/* proc.c */ +void nfs_close_context(struct nfs_open_context *ctx, int is_sync); + /* dir.c */ extern int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask); diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index c55be7a7679e..b82fe6847f14 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -834,4 +834,5 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .commit_done = nfs3_commit_done, .lock = nfs3_proc_lock, .clear_acl_cache = nfs3_forget_cached_acls, + .close_context = nfs_close_context, }; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 95f171e7e05a..97bacccff579 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1572,6 +1572,15 @@ out_drop: return 0; } +void nfs4_close_context(struct nfs_open_context *ctx, int is_sync) +{ + if (ctx->state == NULL) + return; + if (is_sync) + nfs4_close_sync(&ctx->path, ctx->state, ctx->mode); + else + nfs4_close_state(&ctx->path, ctx->state, ctx->mode); +} static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) { @@ -3776,6 +3785,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .commit_done = nfs4_commit_done, .lock = nfs4_proc_lock, .clear_acl_cache = nfs4_zap_acl_attr, + .close_context = nfs4_close_context, }; /* diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 193465210d7c..7be72d90d49d 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -663,4 +663,5 @@ const struct nfs_rpc_ops nfs_v2_clientops = { .commit_setup = nfs_proc_commit_setup, .lock = nfs_proc_lock, .lock_check_bounds = nfs_lock_check_bounds, + .close_context = nfs_close_context, }; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 0691b9c188d9..9708e78a4d49 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -868,6 +868,7 @@ struct nfs_rpc_ops { int (*lock)(struct file *, int, struct file_lock *); int (*lock_check_bounds)(const struct file_lock *); void (*clear_acl_cache)(struct inode *); + void (*close_context)(struct nfs_open_context *ctx, int); }; /* -- cgit v1.2.3 From 1bf83e558cb29d163f4bc6decbc3800ecf4db195 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 13 Jan 2009 14:38:34 +0100 Subject: PCI: PCIe portdrv: Use driver data to simplify code PCI Express port driver extension, as defined by struct pcie_port_device_ext in portdrv.h, is allocated and initialized, but never used (it also is never freed). Extend it to hold the PCI Express port type as well as the port interrupt mode, change its name and use it to simplify the code in portdrv_core.c . Additionally, remove the redundant interrupt_mode member of struct pcie_device defined in include/linux/pcieport_if.h . Signed-off-by: Rafael J. Wysocki Signed-off-by: Jesse Barnes --- drivers/pci/pcie/portdrv.h | 5 ++- drivers/pci/pcie/portdrv_core.c | 95 ++++++++++++++++------------------------- include/linux/pcieport_if.h | 1 - 3 files changed, 39 insertions(+), 62 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h index 2529f3f2ea5a..b0dcbc73415e 100644 --- a/drivers/pci/pcie/portdrv.h +++ b/drivers/pci/pcie/portdrv.h @@ -28,8 +28,9 @@ #define get_descriptor_id(type, service) (((type - 4) << 4) | service) -struct pcie_port_device_ext { - int interrupt_mode; /* [0:INTx | 1:MSI | 2:MSI-X] */ +struct pcie_port_data { + int port_type; /* Type of the port */ + int port_irq_mode; /* [0:INTx | 1:MSI | 2:MSI-X] */ }; extern struct bus_type pcie_port_bus_type; diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c index 8b3f8c18032f..273e97619bce 100644 --- a/drivers/pci/pcie/portdrv_core.c +++ b/drivers/pci/pcie/portdrv_core.c @@ -15,10 +15,9 @@ #include #include +#include "../pci.h" #include "portdrv.h" -extern int pcie_mch_quirk; /* MSI-quirk Indicator */ - /** * release_pcie_device - free PCI Express port service device structure * @dev: Port service device to release @@ -31,28 +30,6 @@ static void release_pcie_device(struct device *dev) kfree(to_pcie_device(dev)); } -static int is_msi_quirked(struct pci_dev *dev) -{ - int port_type, quirk = 0; - u16 reg16; - - pci_read_config_word(dev, - pci_find_capability(dev, PCI_CAP_ID_EXP) + - PCIE_CAPABILITIES_REG, ®16); - port_type = (reg16 >> 4) & PORT_TYPE_MASK; - switch(port_type) { - case PCIE_RC_PORT: - if (pcie_mch_quirk == 1) - quirk = 1; - break; - case PCIE_SW_UPSTREAM_PORT: - case PCIE_SW_DOWNSTREAM_PORT: - default: - break; - } - return quirk; -} - /** * assign_interrupt_mode - choose interrupt mode for PCI Express port services * (INTx, MSI-X, MSI) and set up vectors @@ -64,6 +41,7 @@ static int is_msi_quirked(struct pci_dev *dev) */ static int assign_interrupt_mode(struct pci_dev *dev, int *vectors, int mask) { + struct pcie_port_data *port_data = pci_get_drvdata(dev); int i, pos, nvec, status = -EINVAL; int interrupt_mode = PCIE_PORT_INTx_MODE; @@ -75,7 +53,7 @@ static int assign_interrupt_mode(struct pci_dev *dev, int *vectors, int mask) } /* Check MSI quirk */ - if (is_msi_quirked(dev)) + if (port_data->port_type == PCIE_RC_PORT && pcie_mch_quirk) return interrupt_mode; /* Select MSI-X over MSI if supported */ @@ -132,13 +110,11 @@ static int get_port_device_capability(struct pci_dev *dev) pos + PCIE_SLOT_CAPABILITIES_REG, ®32); if (reg32 & SLOT_HP_CAPABLE_MASK) services |= PCIE_PORT_SERVICE_HP; - } - /* PME Capable - root port capability */ - if (((reg16 >> 4) & PORT_TYPE_MASK) == PCIE_RC_PORT) - services |= PCIE_PORT_SERVICE_PME; - + } + /* AER capable */ if (pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR)) services |= PCIE_PORT_SERVICE_AER; + /* VC support */ if (pci_find_ext_capability(dev, PCI_EXT_CAP_ID_VC)) services |= PCIE_PORT_SERVICE_VC; @@ -152,15 +128,15 @@ static int get_port_device_capability(struct pci_dev *dev) * @port_type: Type of the port * @service_type: Type of service to associate with the service device * @irq: Interrupt vector to associate with the service device - * @irq_mode: Interrupt mode of the service (INTx, MSI-X, MSI) */ static void pcie_device_init(struct pci_dev *parent, struct pcie_device *dev, - int port_type, int service_type, int irq, int irq_mode) + int service_type, int irq) { + struct pcie_port_data *port_data = pci_get_drvdata(parent); struct device *device; + int port_type = port_data->port_type; dev->port = parent; - dev->interrupt_mode = irq_mode; dev->irq = irq; dev->id.vendor = parent->vendor; dev->id.device = parent->device; @@ -185,10 +161,9 @@ static void pcie_device_init(struct pci_dev *parent, struct pcie_device *dev, * @port_type: Type of the port * @service_type: Type of service to associate with the service device * @irq: Interrupt vector to associate with the service device - * @irq_mode: Interrupt mode of the service (INTx, MSI-X, MSI) */ static struct pcie_device* alloc_pcie_device(struct pci_dev *parent, - int port_type, int service_type, int irq, int irq_mode) + int service_type, int irq) { struct pcie_device *device; @@ -196,7 +171,7 @@ static struct pcie_device* alloc_pcie_device(struct pci_dev *parent, if (!device) return NULL; - pcie_device_init(parent, device, port_type, service_type, irq,irq_mode); + pcie_device_init(parent, device, service_type, irq); return device; } @@ -230,39 +205,36 @@ int pcie_port_device_probe(struct pci_dev *dev) */ int pcie_port_device_register(struct pci_dev *dev) { - struct pcie_port_device_ext *p_ext; - int status, type, capabilities, irq_mode, i; + struct pcie_port_data *port_data; + int status, capabilities, irq_mode, i; int vectors[PCIE_PORT_DEVICE_MAXSERVICES]; u16 reg16; - /* Allocate port device extension */ - if (!(p_ext = kmalloc(sizeof(struct pcie_port_device_ext), GFP_KERNEL))) + port_data = kzalloc(sizeof(*port_data), GFP_KERNEL); + if (!port_data) return -ENOMEM; - - pci_set_drvdata(dev, p_ext); + pci_set_drvdata(dev, port_data); /* Get port type */ pci_read_config_word(dev, pci_find_capability(dev, PCI_CAP_ID_EXP) + PCIE_CAPABILITIES_REG, ®16); - type = (reg16 >> 4) & PORT_TYPE_MASK; + port_data->port_type = (reg16 >> 4) & PORT_TYPE_MASK; - /* Now get port services */ capabilities = get_port_device_capability(dev); + /* Root ports are capable of generating PME too */ + if (port_data->port_type == PCIE_RC_PORT) + capabilities |= PCIE_PORT_SERVICE_PME; + irq_mode = assign_interrupt_mode(dev, vectors, capabilities); - p_ext->interrupt_mode = irq_mode; + port_data->port_irq_mode = irq_mode; /* Allocate child services if any */ for (i = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++) { struct pcie_device *child; if (capabilities & (1 << i)) { - child = alloc_pcie_device( - dev, /* parent */ - type, /* port type */ - i, /* service type */ - vectors[i], /* irq */ - irq_mode /* interrupt mode */); + child = alloc_pcie_device(dev, i, vectors[i]); if (child) { status = device_register(&child->device); if (status) { @@ -349,25 +321,30 @@ static int remove_iter(struct device *dev, void *data) */ void pcie_port_device_remove(struct pci_dev *dev) { - struct device *device; - unsigned long device_addr; - int interrupt_mode = PCIE_PORT_INTx_MODE; + struct pcie_port_data *port_data = pci_get_drvdata(dev); int status; do { + unsigned long device_addr; + status = device_for_each_child(&dev->dev, &device_addr, remove_iter); if (status) { - device = (struct device*)device_addr; - interrupt_mode = (to_pcie_device(device))->interrupt_mode; + struct device *device = (struct device*)device_addr; put_device(device); device_unregister(device); } } while (status); - /* Switch to INTx by default if MSI enabled */ - if (interrupt_mode == PCIE_PORT_MSIX_MODE) + + switch (port_data->port_irq_mode) { + case PCIE_PORT_MSIX_MODE: pci_disable_msix(dev); - else if (interrupt_mode == PCIE_PORT_MSI_MODE) + break; + case PCIE_PORT_MSI_MODE: pci_disable_msi(dev); + break; + } + + kfree(port_data); } /** diff --git a/include/linux/pcieport_if.h b/include/linux/pcieport_if.h index 6cd91e3f9820..194409af1037 100644 --- a/include/linux/pcieport_if.h +++ b/include/linux/pcieport_if.h @@ -36,7 +36,6 @@ struct pcie_port_service_id { struct pcie_device { int irq; /* Service IRQ/MSI/MSI-X Vector */ - int interrupt_mode; /* [0:INTx | 1:MSI | 2:MSI-X] */ struct pcie_port_service_id id; /* Service ID */ struct pci_dev *port; /* Root/Upstream/Downstream Port */ void *priv_data; /* Service Private Data */ -- cgit v1.2.3 From 90e9cd50f7feeddc911325c8a8c1b7e1fccc6599 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 13 Jan 2009 14:39:39 +0100 Subject: PCI: PCIe portdrv: Aviod using service devices with wrong interrupts The PCI Express port driver should not attempt to register service devices that require the ability to generate interrupts if generating interrupts is not possible. Namely, if the port has no interrupt pin configured and we cannot set up MSI or MSI-X for it, there is no way it can generate interrupts and in such a case the port services that rely on interrupts (PME, PCIe HP, AER) should not be enabled for it. Signed-off-by: Rafael J. Wysocki Signed-off-by: Jesse Barnes --- drivers/pci/pcie/portdrv_core.c | 41 ++++++++++++++++++++++++++++------------- include/linux/pcieport_if.h | 1 + 2 files changed, 29 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c index 273e97619bce..265eba033a4a 100644 --- a/drivers/pci/pcie/portdrv_core.c +++ b/drivers/pci/pcie/portdrv_core.c @@ -43,7 +43,7 @@ static int assign_interrupt_mode(struct pci_dev *dev, int *vectors, int mask) { struct pcie_port_data *port_data = pci_get_drvdata(dev); int i, pos, nvec, status = -EINVAL; - int interrupt_mode = PCIE_PORT_INTx_MODE; + int interrupt_mode = PCIE_PORT_NO_IRQ; /* Set INTx as default */ for (i = 0, nvec = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++) { @@ -51,7 +51,9 @@ static int assign_interrupt_mode(struct pci_dev *dev, int *vectors, int mask) nvec++; vectors[i] = dev->irq; } - + if (dev->pin) + interrupt_mode = PCIE_PORT_INTx_MODE; + /* Check MSI quirk */ if (port_data->port_type == PCIE_RC_PORT && pcie_mch_quirk) return interrupt_mode; @@ -141,7 +143,7 @@ static void pcie_device_init(struct pci_dev *parent, struct pcie_device *dev, dev->id.vendor = parent->vendor; dev->id.device = parent->device; dev->id.port_type = port_type; - dev->id.service_type = (1 << service_type); + dev->id.service_type = service_type; /* Initialize generic device interface */ device = &dev->device; @@ -232,19 +234,32 @@ int pcie_port_device_register(struct pci_dev *dev) /* Allocate child services if any */ for (i = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++) { struct pcie_device *child; + int service = 1 << i; - if (capabilities & (1 << i)) { - child = alloc_pcie_device(dev, i, vectors[i]); - if (child) { - status = device_register(&child->device); - if (status) { - kfree(child); - continue; - } - get_device(&child->device); - } + if (!(capabilities & service)) + continue; + + /* + * Don't use service devices that require interrupts if there is + * no way to generate them. + */ + if (irq_mode == PCIE_PORT_NO_IRQ + && service != PCIE_PORT_SERVICE_VC) + continue; + + child = alloc_pcie_device(dev, service, vectors[i]); + if (!child) + continue; + + status = device_register(&child->device); + if (status) { + kfree(child); + continue; } + + get_device(&child->device); } + return 0; } diff --git a/include/linux/pcieport_if.h b/include/linux/pcieport_if.h index 194409af1037..8e1ae1fd92f6 100644 --- a/include/linux/pcieport_if.h +++ b/include/linux/pcieport_if.h @@ -22,6 +22,7 @@ #define PCIE_PORT_SERVICE_VC 8 /* Virtual Channel */ /* Root/Upstream/Downstream Port's Interrupt Mode */ +#define PCIE_PORT_NO_IRQ (-1) #define PCIE_PORT_INTx_MODE 0 #define PCIE_PORT_MSI_MODE 1 #define PCIE_PORT_MSIX_MODE 2 -- cgit v1.2.3 From 0516c8bcd25293f438573101c439ce25a18916ad Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 13 Jan 2009 14:44:19 +0100 Subject: PCI: PCIe portdrv: Simplily probe callback of service drivers The second argument of the ->probe() callback in struct pcie_port_service_driver is unnecessary and never used. Remove it. Signed-off-by: Rafael J. Wysocki Signed-off-by: Jesse Barnes --- drivers/pci/hotplug/pciehp_acpi.c | 3 +-- drivers/pci/hotplug/pciehp_core.c | 2 +- drivers/pci/pcie/aer/aerdrv.c | 6 ++---- drivers/pci/pcie/portdrv_core.c | 2 +- include/linux/pcieport_if.h | 3 +-- 5 files changed, 6 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/hotplug/pciehp_acpi.c b/drivers/pci/hotplug/pciehp_acpi.c index 438d795f9fe3..ad8835758a17 100644 --- a/drivers/pci/hotplug/pciehp_acpi.c +++ b/drivers/pci/hotplug/pciehp_acpi.c @@ -82,8 +82,7 @@ static int __initdata acpi_slot_detected; static struct list_head __initdata dummy_slots = LIST_HEAD_INIT(dummy_slots); /* Dummy driver for dumplicate name detection */ -static int __init dummy_probe(struct pcie_device *dev, - const struct pcie_port_service_id *id) +static int __init dummy_probe(struct pcie_device *dev) { int pos; u32 slot_cap; diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c index 681e3912b821..3429b21dbb53 100644 --- a/drivers/pci/hotplug/pciehp_core.c +++ b/drivers/pci/hotplug/pciehp_core.c @@ -401,7 +401,7 @@ static int get_cur_bus_speed(struct hotplug_slot *hotplug_slot, enum pci_bus_spe return 0; } -static int pciehp_probe(struct pcie_device *dev, const struct pcie_port_service_id *id) +static int pciehp_probe(struct pcie_device *dev) { int rc; struct controller *ctrl; diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c index e390707661dd..57c41204c549 100644 --- a/drivers/pci/pcie/aer/aerdrv.c +++ b/drivers/pci/pcie/aer/aerdrv.c @@ -38,8 +38,7 @@ MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_LICENSE("GPL"); -static int __devinit aer_probe (struct pcie_device *dev, - const struct pcie_port_service_id *id ); +static int __devinit aer_probe (struct pcie_device *dev); static void aer_remove(struct pcie_device *dev); static int aer_suspend(struct pcie_device *dev, pm_message_t state) {return 0;} @@ -207,8 +206,7 @@ static void aer_remove(struct pcie_device *dev) * * Invoked when PCI Express bus loads AER service driver. **/ -static int __devinit aer_probe (struct pcie_device *dev, - const struct pcie_port_service_id *id ) +static int __devinit aer_probe (struct pcie_device *dev) { int status; struct aer_rpc *rpc; diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c index 91ecbc43155f..682524b0c93a 100644 --- a/drivers/pci/pcie/portdrv_core.c +++ b/drivers/pci/pcie/portdrv_core.c @@ -402,7 +402,7 @@ static int pcie_port_probe_service(struct device *dev) return -ENODEV; pciedev = to_pcie_device(dev); - status = driver->probe(pciedev, driver->id_table); + status = driver->probe(pciedev); if (!status) { dev_printk(KERN_DEBUG, dev, "service driver %s loaded\n", driver->name); diff --git a/include/linux/pcieport_if.h b/include/linux/pcieport_if.h index 8e1ae1fd92f6..59e90b8a7839 100644 --- a/include/linux/pcieport_if.h +++ b/include/linux/pcieport_if.h @@ -56,8 +56,7 @@ static inline void* get_service_data(struct pcie_device *dev) struct pcie_port_service_driver { const char *name; - int (*probe) (struct pcie_device *dev, - const struct pcie_port_service_id *id); + int (*probe) (struct pcie_device *dev); void (*remove) (struct pcie_device *dev); int (*suspend) (struct pcie_device *dev, pm_message_t state); int (*resume) (struct pcie_device *dev); -- cgit v1.2.3 From 22106368c999246c414610dcaacd485e741605b1 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 13 Jan 2009 14:46:46 +0100 Subject: PCI: PCIe portdrv: Remove struct pcie_port_service_id The PCI Express port driver uses 'struct pcie_port_service_id' for matching port service devices and drivers, but this structure contains fields that duplicate information from the port device itself (vendor, device, subvendor, subdevice) and fields that are not used by any existing port service driver (class, class_mask, drvier_data). Also, both existing port service drivers (AER and PCIe HP) don't even use the vendor and device fields for device matching. Therefore 'struct pcie_port_service_id' can be removed altogether and the only useful members of it (port_type, service) can be introduced directly into the port service device and port service driver structures. That simplifies the code quite a bit and reduces its size. Signed-off-by: Rafael J. Wysocki Signed-off-by: Jesse Barnes --- drivers/pci/hotplug/pciehp_acpi.c | 13 ++----------- drivers/pci/hotplug/pciehp_core.c | 12 ++---------- drivers/pci/pcie/aer/aerdrv.c | 16 ++-------------- drivers/pci/pcie/aer/aerdrv_core.c | 10 +++++----- drivers/pci/pcie/portdrv.h | 5 ----- drivers/pci/pcie/portdrv_bus.c | 18 ++++++++++-------- drivers/pci/pcie/portdrv_core.c | 5 +---- include/linux/pcieport_if.h | 17 ++++++++--------- 8 files changed, 30 insertions(+), 66 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/hotplug/pciehp_acpi.c b/drivers/pci/hotplug/pciehp_acpi.c index ad8835758a17..21734c311529 100644 --- a/drivers/pci/hotplug/pciehp_acpi.c +++ b/drivers/pci/hotplug/pciehp_acpi.c @@ -67,16 +67,6 @@ static int __init parse_detect_mode(void) return PCIEHP_DETECT_DEFAULT; } -static struct pcie_port_service_id __initdata port_pci_ids[] = { - { - .vendor = PCI_ANY_ID, - .device = PCI_ANY_ID, - .port_type = PCIE_ANY_PORT, - .service_type = PCIE_PORT_SERVICE_HP, - .driver_data = 0, - }, { /* end: all zeroes */ } -}; - static int __initdata dup_slot_id; static int __initdata acpi_slot_detected; static struct list_head __initdata dummy_slots = LIST_HEAD_INIT(dummy_slots); @@ -110,7 +100,8 @@ static int __init dummy_probe(struct pcie_device *dev) static struct pcie_port_service_driver __initdata dummy_driver = { .name = "pciehp_dummy", - .id_table = port_pci_ids, + .port_type = PCIE_ANY_PORT, + .service = PCIE_PORT_SERVICE_HP, .probe = dummy_probe, }; diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c index 3429b21dbb53..3d21bbba3308 100644 --- a/drivers/pci/hotplug/pciehp_core.c +++ b/drivers/pci/hotplug/pciehp_core.c @@ -505,18 +505,10 @@ static int pciehp_resume (struct pcie_device *dev) } #endif -static struct pcie_port_service_id port_pci_ids[] = { { - .vendor = PCI_ANY_ID, - .device = PCI_ANY_ID, - .port_type = PCIE_ANY_PORT, - .service_type = PCIE_PORT_SERVICE_HP, - .driver_data = 0, - }, { /* end: all zeroes */ } -}; - static struct pcie_port_service_driver hpdriver_portdrv = { .name = PCIE_MODULE_NAME, - .id_table = &port_pci_ids[0], + .port_type = PCIE_ANY_PORT, + .service = PCIE_PORT_SERVICE_HP, .probe = pciehp_probe, .remove = pciehp_remove, diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c index 57c41204c549..e11c03194063 100644 --- a/drivers/pci/pcie/aer/aerdrv.c +++ b/drivers/pci/pcie/aer/aerdrv.c @@ -48,19 +48,6 @@ static pci_ers_result_t aer_error_detected(struct pci_dev *dev, static void aer_error_resume(struct pci_dev *dev); static pci_ers_result_t aer_root_reset(struct pci_dev *dev); -/* - * PCI Express bus's AER Root service driver data structure - */ -static struct pcie_port_service_id aer_id[] = { - { - .vendor = PCI_ANY_ID, - .device = PCI_ANY_ID, - .port_type = PCIE_RC_PORT, - .service_type = PCIE_PORT_SERVICE_AER, - }, - { /* end: all zeroes */ } -}; - static struct pci_error_handlers aer_error_handlers = { .error_detected = aer_error_detected, .resume = aer_error_resume, @@ -68,7 +55,8 @@ static struct pci_error_handlers aer_error_handlers = { static struct pcie_port_service_driver aerdriver = { .name = "aer", - .id_table = &aer_id[0], + .port_type = PCIE_ANY_PORT, + .service = PCIE_PORT_SERVICE_AER, .probe = aer_probe, .remove = aer_remove, diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index 382575007382..307452f30035 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -351,21 +351,21 @@ static int find_aer_service_iter(struct device *device, void *data) { struct device_driver *driver; struct pcie_port_service_driver *service_driver; - struct pcie_device *pcie_dev; struct find_aer_service_data *result; result = (struct find_aer_service_data *) data; if (device->bus == &pcie_port_bus_type) { - pcie_dev = to_pcie_device(device); - if (pcie_dev->id.port_type == PCIE_SW_DOWNSTREAM_PORT) + struct pcie_port_data *port_data; + + port_data = pci_get_drvdata(to_pcie_device(device)->port); + if (port_data->port_type == PCIE_SW_DOWNSTREAM_PORT) result->is_downstream = 1; driver = device->driver; if (driver) { service_driver = to_service_driver(driver); - if (service_driver->id_table->service_type == - PCIE_PORT_SERVICE_AER) { + if (service_driver->service == PCIE_PORT_SERVICE_AER) { result->aer_driver = service_driver; return 1; } diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h index b0dcbc73415e..ad4d082a0344 100644 --- a/drivers/pci/pcie/portdrv.h +++ b/drivers/pci/pcie/portdrv.h @@ -28,11 +28,6 @@ #define get_descriptor_id(type, service) (((type - 4) << 4) | service) -struct pcie_port_data { - int port_type; /* Type of the port */ - int port_irq_mode; /* [0:INTx | 1:MSI | 2:MSI-X] */ -}; - extern struct bus_type pcie_port_bus_type; extern int pcie_port_device_probe(struct pci_dev *dev); extern int pcie_port_device_register(struct pci_dev *dev); diff --git a/drivers/pci/pcie/portdrv_bus.c b/drivers/pci/pcie/portdrv_bus.c index eec89b767f9f..ef3a4eeaebb4 100644 --- a/drivers/pci/pcie/portdrv_bus.c +++ b/drivers/pci/pcie/portdrv_bus.c @@ -26,20 +26,22 @@ EXPORT_SYMBOL_GPL(pcie_port_bus_type); static int pcie_port_bus_match(struct device *dev, struct device_driver *drv) { struct pcie_device *pciedev; + struct pcie_port_data *port_data; struct pcie_port_service_driver *driver; if (drv->bus != &pcie_port_bus_type || dev->bus != &pcie_port_bus_type) return 0; - + pciedev = to_pcie_device(dev); driver = to_service_driver(drv); - if ( (driver->id_table->vendor != PCI_ANY_ID && - driver->id_table->vendor != pciedev->id.vendor) || - (driver->id_table->device != PCI_ANY_ID && - driver->id_table->device != pciedev->id.device) || - (driver->id_table->port_type != PCIE_ANY_PORT && - driver->id_table->port_type != pciedev->id.port_type) || - driver->id_table->service_type != pciedev->id.service_type ) + + if (driver->service != pciedev->service) + return 0; + + port_data = pci_get_drvdata(pciedev->port); + + if (driver->port_type != PCIE_ANY_PORT + && driver->port_type != port_data->port_type) return 0; return 1; diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c index 682524b0c93a..843d9e30dd3b 100644 --- a/drivers/pci/pcie/portdrv_core.c +++ b/drivers/pci/pcie/portdrv_core.c @@ -140,10 +140,7 @@ static void pcie_device_init(struct pci_dev *parent, struct pcie_device *dev, dev->port = parent; dev->irq = irq; - dev->id.vendor = parent->vendor; - dev->id.device = parent->device; - dev->id.port_type = port_type; - dev->id.service_type = service_type; + dev->service = service_type; /* Initialize generic device interface */ device = &dev->device; diff --git a/include/linux/pcieport_if.h b/include/linux/pcieport_if.h index 59e90b8a7839..a3832079508e 100644 --- a/include/linux/pcieport_if.h +++ b/include/linux/pcieport_if.h @@ -27,18 +27,15 @@ #define PCIE_PORT_MSI_MODE 1 #define PCIE_PORT_MSIX_MODE 2 -struct pcie_port_service_id { - __u32 vendor, device; /* Vendor and device ID or PCI_ANY_ID*/ - __u32 subvendor, subdevice; /* Subsystem ID's or PCI_ANY_ID */ - __u32 class, class_mask; /* (class,subclass,prog-if) triplet */ - __u32 port_type, service_type; /* Port Entity */ - kernel_ulong_t driver_data; +struct pcie_port_data { + int port_type; /* Type of the port */ + int port_irq_mode; /* [0:INTx | 1:MSI | 2:MSI-X] */ }; struct pcie_device { int irq; /* Service IRQ/MSI/MSI-X Vector */ - struct pcie_port_service_id id; /* Service ID */ - struct pci_dev *port; /* Root/Upstream/Downstream Port */ + struct pci_dev *port; /* Root/Upstream/Downstream Port */ + u32 service; /* Port service this device represents */ void *priv_data; /* Service Private Data */ struct device device; /* Generic Device Interface */ }; @@ -67,7 +64,9 @@ struct pcie_port_service_driver { /* Link Reset Capability - AER service driver specific */ pci_ers_result_t (*reset_link) (struct pci_dev *dev); - const struct pcie_port_service_id *id_table; + int port_type; /* Type of the port this driver can handle */ + u32 service; /* Port service this device represents */ + struct device_driver driver; }; #define to_service_driver(d) \ -- cgit v1.2.3 From a52e2e3513d4beafe8fe8699f1519b021c2d05ba Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 24 Jan 2009 00:21:14 +0100 Subject: PCI/MSI: Introduce pci_msix_table_size() Introduce new function pci_msix_table_size() returning the size of the MSI-X table of given PCI device or 0 if the device doesn't support MSI-X. Signed-off-by: Rafael J. Wysocki Reviewed-by: Hidetoshi Seto Signed-off-by: Jesse Barnes --- drivers/pci/msi.c | 24 +++++++++++++++++++----- include/linux/pci.h | 5 +++++ 2 files changed, 24 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index baba2eb5367d..08aedd5875b0 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -674,6 +674,23 @@ static int msi_free_irqs(struct pci_dev* dev) return 0; } +/** + * pci_msix_table_size - return the number of device's MSI-X table entries + * @dev: pointer to the pci_dev data structure of MSI-X device function + */ +int pci_msix_table_size(struct pci_dev *dev) +{ + int pos; + u16 control; + + pos = pci_find_capability(dev, PCI_CAP_ID_MSIX); + if (!pos) + return 0; + + pci_read_config_word(dev, msi_control_reg(pos), &control); + return multi_msix_capable(control); +} + /** * pci_enable_msix - configure device's MSI-X capability structure * @dev: pointer to the pci_dev data structure of MSI-X device function @@ -691,9 +708,8 @@ static int msi_free_irqs(struct pci_dev* dev) **/ int pci_enable_msix(struct pci_dev* dev, struct msix_entry *entries, int nvec) { - int status, pos, nr_entries; + int status, nr_entries; int i, j; - u16 control; if (!entries) return -EINVAL; @@ -702,9 +718,7 @@ int pci_enable_msix(struct pci_dev* dev, struct msix_entry *entries, int nvec) if (status) return status; - pos = pci_find_capability(dev, PCI_CAP_ID_MSIX); - pci_read_config_word(dev, msi_control_reg(pos), &control); - nr_entries = multi_msix_capable(control); + nr_entries = pci_msix_table_size(dev); if (nvec > nr_entries) return -EINVAL; diff --git a/include/linux/pci.h b/include/linux/pci.h index 7bd624bfdcfd..b5d6d0e0f1cb 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -799,6 +799,10 @@ static inline void pci_msi_shutdown(struct pci_dev *dev) static inline void pci_disable_msi(struct pci_dev *dev) { } +static inline int pci_msix_table_size(struct pci_dev *dev) +{ + return 0; +} static inline int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec) { @@ -823,6 +827,7 @@ static inline int pci_msi_enabled(void) extern int pci_enable_msi(struct pci_dev *dev); extern void pci_msi_shutdown(struct pci_dev *dev); extern void pci_disable_msi(struct pci_dev *dev); +extern int pci_msix_table_size(struct pci_dev *dev); extern int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec); extern void pci_msix_shutdown(struct pci_dev *dev); -- cgit v1.2.3 From b43d451385ef833e0696032aac2629da04d46c59 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 24 Jan 2009 00:23:22 +0100 Subject: PCI/PCIe portdrv: Fix allocation of interrupts If MSI-X interrupt mode is used by the PCI Express port driver, too many vectors are allocated and it is not ensured that the right vectors will be used for the right services. Namely, the PCI Express specification states that both PCI Express native PME and PCI Express hotplug will always use the same MSI or MSI-X message for signalling interrupts, which implies that the same vector will be used by both of them. Also, the VC service does not use interrupts at all. Moreover, is not clear which of the vectors allocated by pci_enable_msix() in the current code will be used for PME and hotplug and which of them will be used for AER if all of these services are configured. For these reasons, rework the allocation of interrupts for PCI Express ports so that if MSI-X are enabled, the right vectors will be used for the right purposes. Signed-off-by: Rafael J. Wysocki Reviewed-by: Hidetoshi Seto Signed-off-by: Jesse Barnes --- drivers/pci/pcie/portdrv.h | 6 ++ drivers/pci/pcie/portdrv_core.c | 206 ++++++++++++++++++++++++++++++++-------- include/linux/pcieport_if.h | 12 ++- 3 files changed, 181 insertions(+), 43 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h index ad4d082a0344..5b818bd835ef 100644 --- a/drivers/pci/pcie/portdrv.h +++ b/drivers/pci/pcie/portdrv.h @@ -25,6 +25,12 @@ #define PCIE_CAPABILITIES_REG 0x2 #define PCIE_SLOT_CAPABILITIES_REG 0x14 #define PCIE_PORT_DEVICE_MAXSERVICES 4 +#define PCIE_PORT_MSI_VECTOR_MASK 0x1f +/* + * According to the PCI Express Base Specification 2.0, the indices of the MSI-X + * table entires used by port services must not exceed 31 + */ +#define PCIE_PORT_MAX_MSIX_ENTRIES 32 #define get_descriptor_id(type, service) (((type - 4) << 4) | service) diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c index 843d9e30dd3b..3aea92a92928 100644 --- a/drivers/pci/pcie/portdrv_core.c +++ b/drivers/pci/pcie/portdrv_core.c @@ -30,6 +30,152 @@ static void release_pcie_device(struct device *dev) kfree(to_pcie_device(dev)); } +/** + * pcie_port_msix_add_entry - add entry to given array of MSI-X entries + * @entries: Array of MSI-X entries + * @new_entry: Index of the entry to add to the array + * @nr_entries: Number of entries aleady in the array + * + * Return value: Position of the added entry in the array + */ +static int pcie_port_msix_add_entry( + struct msix_entry *entries, int new_entry, int nr_entries) +{ + int j; + + for (j = 0; j < nr_entries; j++) + if (entries[j].entry == new_entry) + return j; + + entries[j].entry = new_entry; + return j; +} + +/** + * pcie_port_enable_msix - try to set up MSI-X as interrupt mode for given port + * @dev: PCI Express port to handle + * @vectors: Array of interrupt vectors to populate + * @mask: Bitmask of port capabilities returned by get_port_device_capability() + * + * Return value: 0 on success, error code on failure + */ +static int pcie_port_enable_msix(struct pci_dev *dev, int *vectors, int mask) +{ + struct msix_entry *msix_entries; + int idx[PCIE_PORT_DEVICE_MAXSERVICES]; + int nr_entries, status, pos, i, nvec; + u16 reg16; + u32 reg32; + + nr_entries = pci_msix_table_size(dev); + if (!nr_entries) + return -EINVAL; + if (nr_entries > PCIE_PORT_MAX_MSIX_ENTRIES) + nr_entries = PCIE_PORT_MAX_MSIX_ENTRIES; + + msix_entries = kzalloc(sizeof(*msix_entries) * nr_entries, GFP_KERNEL); + if (!msix_entries) + return -ENOMEM; + + /* + * Allocate as many entries as the port wants, so that we can check + * which of them will be useful. Moreover, if nr_entries is correctly + * equal to the number of entries this port actually uses, we'll happily + * go through without any tricks. + */ + for (i = 0; i < nr_entries; i++) + msix_entries[i].entry = i; + + status = pci_enable_msix(dev, msix_entries, nr_entries); + if (status) + goto Exit; + + for (i = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++) + idx[i] = -1; + status = -EIO; + nvec = 0; + + if (mask & (PCIE_PORT_SERVICE_PME | PCIE_PORT_SERVICE_HP)) { + int entry; + + /* + * The code below follows the PCI Express Base Specification 2.0 + * stating in Section 6.1.6 that "PME and Hot-Plug Event + * interrupts (when both are implemented) always share the same + * MSI or MSI-X vector, as indicated by the Interrupt Message + * Number field in the PCI Express Capabilities register", where + * according to Section 7.8.2 of the specification "For MSI-X, + * the value in this field indicates which MSI-X Table entry is + * used to generate the interrupt message." + */ + pos = pci_find_capability(dev, PCI_CAP_ID_EXP); + pci_read_config_word(dev, pos + PCIE_CAPABILITIES_REG, ®16); + entry = (reg16 >> 9) & PCIE_PORT_MSI_VECTOR_MASK; + if (entry >= nr_entries) + goto Error; + + i = pcie_port_msix_add_entry(msix_entries, entry, nvec); + if (i == nvec) + nvec++; + + idx[PCIE_PORT_SERVICE_PME_SHIFT] = i; + idx[PCIE_PORT_SERVICE_HP_SHIFT] = i; + } + + if (mask & PCIE_PORT_SERVICE_AER) { + int entry; + + /* + * The code below follows Section 7.10.10 of the PCI Express + * Base Specification 2.0 stating that bits 31-27 of the Root + * Error Status Register contain a value indicating which of the + * MSI/MSI-X vectors assigned to the port is going to be used + * for AER, where "For MSI-X, the value in this register + * indicates which MSI-X Table entry is used to generate the + * interrupt message." + */ + pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); + pci_read_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, ®32); + entry = reg32 >> 27; + if (entry >= nr_entries) + goto Error; + + i = pcie_port_msix_add_entry(msix_entries, entry, nvec); + if (i == nvec) + nvec++; + + idx[PCIE_PORT_SERVICE_AER_SHIFT] = i; + } + + /* + * If nvec is equal to the allocated number of entries, we can just use + * what we have. Otherwise, the port has some extra entries not for the + * services we know and we need to work around that. + */ + if (nvec == nr_entries) { + status = 0; + } else { + /* Drop the temporary MSI-X setup */ + pci_disable_msix(dev); + + /* Now allocate the MSI-X vectors for real */ + status = pci_enable_msix(dev, msix_entries, nvec); + if (status) + goto Exit; + } + + for (i = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++) + vectors[i] = idx[i] >= 0 ? msix_entries[idx[i]].vector : -1; + + Exit: + kfree(msix_entries); + return status; + + Error: + pci_disable_msix(dev); + goto Exit; +} + /** * assign_interrupt_mode - choose interrupt mode for PCI Express port services * (INTx, MSI-X, MSI) and set up vectors @@ -42,49 +188,31 @@ static void release_pcie_device(struct device *dev) static int assign_interrupt_mode(struct pci_dev *dev, int *vectors, int mask) { struct pcie_port_data *port_data = pci_get_drvdata(dev); - int i, pos, nvec, status = -EINVAL; - int interrupt_mode = PCIE_PORT_NO_IRQ; - - /* Set INTx as default */ - for (i = 0, nvec = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++) { - if (mask & (1 << i)) - nvec++; - vectors[i] = dev->irq; - } - if (dev->pin) - interrupt_mode = PCIE_PORT_INTx_MODE; + int irq, interrupt_mode = PCIE_PORT_NO_IRQ; + int i; /* Check MSI quirk */ if (port_data->port_type == PCIE_RC_PORT && pcie_mch_quirk) - return interrupt_mode; + goto Fallback; + + /* Try to use MSI-X if supported */ + if (!pcie_port_enable_msix(dev, vectors, mask)) + return PCIE_PORT_MSIX_MODE; + + /* We're not going to use MSI-X, so try MSI and fall back to INTx */ + if (!pci_enable_msi(dev)) + interrupt_mode = PCIE_PORT_MSI_MODE; + + Fallback: + if (interrupt_mode == PCIE_PORT_NO_IRQ && dev->pin) + interrupt_mode = PCIE_PORT_INTx_MODE; + + irq = interrupt_mode != PCIE_PORT_NO_IRQ ? dev->irq : -1; + for (i = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++) + vectors[i] = irq; + + vectors[PCIE_PORT_SERVICE_VC_SHIFT] = -1; - /* Select MSI-X over MSI if supported */ - pos = pci_find_capability(dev, PCI_CAP_ID_MSIX); - if (pos) { - struct msix_entry msix_entries[PCIE_PORT_DEVICE_MAXSERVICES] = - {{0, 0}, {0, 1}, {0, 2}, {0, 3}}; - status = pci_enable_msix(dev, msix_entries, nvec); - if (!status) { - int j = 0; - - interrupt_mode = PCIE_PORT_MSIX_MODE; - for (i = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++) { - if (mask & (1 << i)) - vectors[i] = msix_entries[j++].vector; - } - } - } - if (status) { - pos = pci_find_capability(dev, PCI_CAP_ID_MSI); - if (pos) { - status = pci_enable_msi(dev); - if (!status) { - interrupt_mode = PCIE_PORT_MSI_MODE; - for (i = 0;i < PCIE_PORT_DEVICE_MAXSERVICES;i++) - vectors[i] = dev->irq; - } - } - } return interrupt_mode; } diff --git a/include/linux/pcieport_if.h b/include/linux/pcieport_if.h index a3832079508e..5d2afcfa6bc1 100644 --- a/include/linux/pcieport_if.h +++ b/include/linux/pcieport_if.h @@ -16,10 +16,14 @@ #define PCIE_ANY_PORT 7 /* Service Type */ -#define PCIE_PORT_SERVICE_PME 1 /* Power Management Event */ -#define PCIE_PORT_SERVICE_AER 2 /* Advanced Error Reporting */ -#define PCIE_PORT_SERVICE_HP 4 /* Native Hotplug */ -#define PCIE_PORT_SERVICE_VC 8 /* Virtual Channel */ +#define PCIE_PORT_SERVICE_PME_SHIFT 0 /* Power Management Event */ +#define PCIE_PORT_SERVICE_PME (1 << PCIE_PORT_SERVICE_PME_SHIFT) +#define PCIE_PORT_SERVICE_AER_SHIFT 1 /* Advanced Error Reporting */ +#define PCIE_PORT_SERVICE_AER (1 << PCIE_PORT_SERVICE_AER_SHIFT) +#define PCIE_PORT_SERVICE_HP_SHIFT 2 /* Native Hotplug */ +#define PCIE_PORT_SERVICE_HP (1 << PCIE_PORT_SERVICE_HP_SHIFT) +#define PCIE_PORT_SERVICE_VC_SHIFT 3 /* Virtual Channel */ +#define PCIE_PORT_SERVICE_VC (1 << PCIE_PORT_SERVICE_VC_SHIFT) /* Root/Upstream/Downstream Port's Interrupt Mode */ #define PCIE_PORT_NO_IRQ (-1) -- cgit v1.2.3 From 63f10f0f6df4e4e860b790d64bebfde85b540b0a Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Mon, 9 Feb 2009 15:59:29 +0900 Subject: PCI/ACPI: move _OSC code to pci_root.c Move PCI _OSC management code from drivers/pci/pci-acpi.c to drivers/acpi/pci_root.c. The benefits are - We no longer need struct osc_data and its management code (contents are moved to struct acpi_pci_root). This simplify the code, and we no longer care about kmalloc() failure. - We can make pci_acpi_osc_support() be a static function, which is called only from drivers/acpi/pci_root.c. Signed-off-by: Kenji Kaneshige Reviewed-by: Andrew Patterson Tested-by: Andrew Patterson Acked-by: Alex Chiang Signed-off-by: Jesse Barnes --- drivers/acpi/pci_root.c | 180 ++++++++++++++++++++++++++++++++++++++- drivers/pci/pci-acpi.c | 215 ----------------------------------------------- include/linux/pci-acpi.h | 1 - 3 files changed, 178 insertions(+), 218 deletions(-) (limited to 'include/linux') diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c index 5b38a026d122..979eccc82c5b 100644 --- a/drivers/acpi/pci_root.c +++ b/drivers/acpi/pci_root.c @@ -66,11 +66,18 @@ struct acpi_pci_root { struct acpi_device * device; struct acpi_pci_id id; struct pci_bus *bus; + + u32 osc_support_set; /* _OSC state of support bits */ + u32 osc_control_set; /* _OSC state of control bits */ + u32 osc_control_qry; /* the latest _OSC query result */ + + u32 osc_queried:1; /* has _OSC control been queried? */ }; static LIST_HEAD(acpi_pci_roots); static struct acpi_pci_driver *sub_driver; +static DEFINE_MUTEX(osc_lock); int acpi_pci_register_driver(struct acpi_pci_driver *driver) { @@ -185,6 +192,175 @@ static void acpi_pci_bridge_scan(struct acpi_device *device) } } +static u8 OSC_UUID[16] = {0x5B, 0x4D, 0xDB, 0x33, 0xF7, 0x1F, 0x1C, 0x40, + 0x96, 0x57, 0x74, 0x41, 0xC0, 0x3D, 0xD7, 0x66}; + +static acpi_status acpi_pci_run_osc(acpi_handle handle, + const u32 *capbuf, u32 *retval) +{ + acpi_status status; + struct acpi_object_list input; + union acpi_object in_params[4]; + struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL}; + union acpi_object *out_obj; + u32 errors; + + /* Setting up input parameters */ + input.count = 4; + input.pointer = in_params; + in_params[0].type = ACPI_TYPE_BUFFER; + in_params[0].buffer.length = 16; + in_params[0].buffer.pointer = OSC_UUID; + in_params[1].type = ACPI_TYPE_INTEGER; + in_params[1].integer.value = 1; + in_params[2].type = ACPI_TYPE_INTEGER; + in_params[2].integer.value = 3; + in_params[3].type = ACPI_TYPE_BUFFER; + in_params[3].buffer.length = 12; + in_params[3].buffer.pointer = (u8 *)capbuf; + + status = acpi_evaluate_object(handle, "_OSC", &input, &output); + if (ACPI_FAILURE(status)) + return status; + + if (!output.length) + return AE_NULL_OBJECT; + + out_obj = output.pointer; + if (out_obj->type != ACPI_TYPE_BUFFER) { + printk(KERN_DEBUG "_OSC evaluation returned wrong type\n"); + status = AE_TYPE; + goto out_kfree; + } + /* Need to ignore the bit0 in result code */ + errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0); + if (errors) { + if (errors & OSC_REQUEST_ERROR) + printk(KERN_DEBUG "_OSC request failed\n"); + if (errors & OSC_INVALID_UUID_ERROR) + printk(KERN_DEBUG "_OSC invalid UUID\n"); + if (errors & OSC_INVALID_REVISION_ERROR) + printk(KERN_DEBUG "_OSC invalid revision\n"); + if (errors & OSC_CAPABILITIES_MASK_ERROR) { + if (capbuf[OSC_QUERY_TYPE] & OSC_QUERY_ENABLE) + goto out_success; + printk(KERN_DEBUG + "Firmware did not grant requested _OSC control\n"); + status = AE_SUPPORT; + goto out_kfree; + } + status = AE_ERROR; + goto out_kfree; + } +out_success: + *retval = *((u32 *)(out_obj->buffer.pointer + 8)); + status = AE_OK; + +out_kfree: + kfree(output.pointer); + return status; +} + +static acpi_status acpi_pci_query_osc(struct acpi_pci_root *root, u32 flags) +{ + acpi_status status; + u32 support_set, result, capbuf[3]; + + /* do _OSC query for all possible controls */ + support_set = root->osc_support_set | (flags & OSC_SUPPORT_MASKS); + capbuf[OSC_QUERY_TYPE] = OSC_QUERY_ENABLE; + capbuf[OSC_SUPPORT_TYPE] = support_set; + capbuf[OSC_CONTROL_TYPE] = OSC_CONTROL_MASKS; + + status = acpi_pci_run_osc(root->device->handle, capbuf, &result); + if (ACPI_SUCCESS(status)) { + root->osc_support_set = support_set; + root->osc_control_qry = result; + root->osc_queried = 1; + } + return status; +} + +static acpi_status acpi_pci_osc_support(struct acpi_pci_root *root, u32 flags) +{ + acpi_status status; + acpi_handle tmp; + + status = acpi_get_handle(root->device->handle, "_OSC", &tmp); + if (ACPI_FAILURE(status)) + return status; + mutex_lock(&osc_lock); + status = acpi_pci_query_osc(root, flags); + mutex_unlock(&osc_lock); + return status; +} + +static struct acpi_pci_root *acpi_pci_find_root(acpi_handle handle) +{ + struct acpi_pci_root *root; + list_for_each_entry(root, &acpi_pci_roots, node) { + if (root->device->handle == handle) + return root; + } + return NULL; +} + +/** + * pci_osc_control_set - commit requested control to Firmware + * @handle: acpi_handle for the target ACPI object + * @flags: driver's requested control bits + * + * Attempt to take control from Firmware on requested control bits. + **/ +acpi_status pci_osc_control_set(acpi_handle handle, u32 flags) +{ + acpi_status status; + u32 control_req, result, capbuf[3]; + acpi_handle tmp; + struct acpi_pci_root *root; + + status = acpi_get_handle(handle, "_OSC", &tmp); + if (ACPI_FAILURE(status)) + return status; + + control_req = (flags & OSC_CONTROL_MASKS); + if (!control_req) + return AE_TYPE; + + root = acpi_pci_find_root(handle); + if (!root) + return AE_NOT_EXIST; + + mutex_lock(&osc_lock); + /* No need to evaluate _OSC if the control was already granted. */ + if ((root->osc_control_set & control_req) == control_req) + goto out; + + /* Need to query controls first before requesting them */ + if (!root->osc_queried) { + status = acpi_pci_query_osc(root, root->osc_support_set); + if (ACPI_FAILURE(status)) + goto out; + } + if ((root->osc_control_qry & control_req) != control_req) { + printk(KERN_DEBUG + "Firmware did not grant requested _OSC control\n"); + status = AE_SUPPORT; + goto out; + } + + capbuf[OSC_QUERY_TYPE] = 0; + capbuf[OSC_SUPPORT_TYPE] = root->osc_support_set; + capbuf[OSC_CONTROL_TYPE] = root->osc_control_set | control_req; + status = acpi_pci_run_osc(handle, capbuf, &result); + if (ACPI_SUCCESS(status)) + root->osc_control_set = result; +out: + mutex_unlock(&osc_lock); + return status; +} +EXPORT_SYMBOL(pci_osc_control_set); + static int __devinit acpi_pci_root_add(struct acpi_device *device) { int result = 0; @@ -217,7 +393,7 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device) * PCI domains, so we indicate this in _OSC support capabilities. */ flags = base_flags = OSC_PCI_SEGMENT_GROUPS_SUPPORT; - pci_acpi_osc_support(device->handle, flags); + acpi_pci_osc_support(root, flags); /* * Segment @@ -353,7 +529,7 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device) if (pci_msi_enabled()) flags |= OSC_MSI_SUPPORT; if (flags != base_flags) - pci_acpi_osc_support(device->handle, flags); + acpi_pci_osc_support(root, flags); end: if (result) { diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index deea8a187eb8..fac5eddcefd2 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -18,221 +18,6 @@ #include #include "pci.h" -struct acpi_osc_data { - acpi_handle handle; - u32 support_set; - u32 control_set; - u32 control_query; - int is_queried; - struct list_head sibiling; -}; -static LIST_HEAD(acpi_osc_data_list); - -struct acpi_osc_args { - u32 capbuf[3]; -}; - -static DEFINE_MUTEX(pci_acpi_lock); - -static struct acpi_osc_data *acpi_get_osc_data(acpi_handle handle) -{ - struct acpi_osc_data *data; - - list_for_each_entry(data, &acpi_osc_data_list, sibiling) { - if (data->handle == handle) - return data; - } - data = kzalloc(sizeof(*data), GFP_KERNEL); - if (!data) - return NULL; - INIT_LIST_HEAD(&data->sibiling); - data->handle = handle; - list_add_tail(&data->sibiling, &acpi_osc_data_list); - return data; -} - -static u8 OSC_UUID[16] = {0x5B, 0x4D, 0xDB, 0x33, 0xF7, 0x1F, 0x1C, 0x40, - 0x96, 0x57, 0x74, 0x41, 0xC0, 0x3D, 0xD7, 0x66}; - -static acpi_status acpi_run_osc(acpi_handle handle, - struct acpi_osc_args *osc_args, u32 *retval) -{ - acpi_status status; - struct acpi_object_list input; - union acpi_object in_params[4]; - struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL}; - union acpi_object *out_obj; - u32 errors, flags = osc_args->capbuf[OSC_QUERY_TYPE]; - - /* Setting up input parameters */ - input.count = 4; - input.pointer = in_params; - in_params[0].type = ACPI_TYPE_BUFFER; - in_params[0].buffer.length = 16; - in_params[0].buffer.pointer = OSC_UUID; - in_params[1].type = ACPI_TYPE_INTEGER; - in_params[1].integer.value = 1; - in_params[2].type = ACPI_TYPE_INTEGER; - in_params[2].integer.value = 3; - in_params[3].type = ACPI_TYPE_BUFFER; - in_params[3].buffer.length = 12; - in_params[3].buffer.pointer = (u8 *)osc_args->capbuf; - - status = acpi_evaluate_object(handle, "_OSC", &input, &output); - if (ACPI_FAILURE(status)) - return status; - - if (!output.length) - return AE_NULL_OBJECT; - - out_obj = output.pointer; - if (out_obj->type != ACPI_TYPE_BUFFER) { - printk(KERN_DEBUG "Evaluate _OSC returns wrong type\n"); - status = AE_TYPE; - goto out_kfree; - } - /* Need to ignore the bit0 in result code */ - errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0); - if (errors) { - if (errors & OSC_REQUEST_ERROR) - printk(KERN_DEBUG "_OSC request fails\n"); - if (errors & OSC_INVALID_UUID_ERROR) - printk(KERN_DEBUG "_OSC invalid UUID\n"); - if (errors & OSC_INVALID_REVISION_ERROR) - printk(KERN_DEBUG "_OSC invalid revision\n"); - if (errors & OSC_CAPABILITIES_MASK_ERROR) { - if (flags & OSC_QUERY_ENABLE) - goto out_success; - printk(KERN_DEBUG "_OSC FW not grant req. control\n"); - status = AE_SUPPORT; - goto out_kfree; - } - status = AE_ERROR; - goto out_kfree; - } -out_success: - *retval = *((u32 *)(out_obj->buffer.pointer + 8)); - status = AE_OK; - -out_kfree: - kfree(output.pointer); - return status; -} - -static acpi_status __acpi_query_osc(u32 flags, struct acpi_osc_data *osc_data) -{ - acpi_status status; - u32 support_set, result; - struct acpi_osc_args osc_args; - - /* do _OSC query for all possible controls */ - support_set = osc_data->support_set | (flags & OSC_SUPPORT_MASKS); - osc_args.capbuf[OSC_QUERY_TYPE] = OSC_QUERY_ENABLE; - osc_args.capbuf[OSC_SUPPORT_TYPE] = support_set; - osc_args.capbuf[OSC_CONTROL_TYPE] = OSC_CONTROL_MASKS; - - status = acpi_run_osc(osc_data->handle, &osc_args, &result); - if (ACPI_SUCCESS(status)) { - osc_data->support_set = support_set; - osc_data->control_query = result; - osc_data->is_queried = 1; - } - - return status; -} - -/* - * pci_acpi_osc_support: Invoke _OSC indicating support for the given feature - * @flags: Bitmask of flags to support - * - * See the ACPI spec for the definition of the flags - */ -int pci_acpi_osc_support(acpi_handle handle, u32 flags) -{ - acpi_status status; - acpi_handle tmp; - struct acpi_osc_data *osc_data; - int rc = 0; - - status = acpi_get_handle(handle, "_OSC", &tmp); - if (ACPI_FAILURE(status)) - return -ENOTTY; - - mutex_lock(&pci_acpi_lock); - osc_data = acpi_get_osc_data(handle); - if (!osc_data) { - printk(KERN_ERR "acpi osc data array is full\n"); - rc = -ENOMEM; - goto out; - } - - __acpi_query_osc(flags, osc_data); -out: - mutex_unlock(&pci_acpi_lock); - return rc; -} - -/** - * pci_osc_control_set - commit requested control to Firmware - * @handle: acpi_handle for the target ACPI object - * @flags: driver's requested control bits - * - * Attempt to take control from Firmware on requested control bits. - **/ -acpi_status pci_osc_control_set(acpi_handle handle, u32 flags) -{ - acpi_status status; - u32 control_req, control_set, result; - acpi_handle tmp; - struct acpi_osc_data *osc_data; - struct acpi_osc_args osc_args; - - status = acpi_get_handle(handle, "_OSC", &tmp); - if (ACPI_FAILURE(status)) - return status; - - mutex_lock(&pci_acpi_lock); - osc_data = acpi_get_osc_data(handle); - if (!osc_data) { - printk(KERN_ERR "acpi osc data array is full\n"); - status = AE_ERROR; - goto out; - } - - control_req = (flags & OSC_CONTROL_MASKS); - if (!control_req) { - status = AE_TYPE; - goto out; - } - - /* No need to evaluate _OSC if the control was already granted. */ - if ((osc_data->control_set & control_req) == control_req) - goto out; - - if (!osc_data->is_queried) { - status = __acpi_query_osc(osc_data->support_set, osc_data); - if (ACPI_FAILURE(status)) - goto out; - } - - if ((osc_data->control_query & control_req) != control_req) { - status = AE_SUPPORT; - goto out; - } - - control_set = osc_data->control_set | control_req; - osc_args.capbuf[OSC_QUERY_TYPE] = 0; - osc_args.capbuf[OSC_SUPPORT_TYPE] = osc_data->support_set; - osc_args.capbuf[OSC_CONTROL_TYPE] = control_set; - status = acpi_run_osc(handle, &osc_args, &result); - if (ACPI_SUCCESS(status)) - osc_data->control_set = result; -out: - mutex_unlock(&pci_acpi_lock); - return status; -} -EXPORT_SYMBOL(pci_osc_control_set); - /* * _SxD returns the D-state with the highest power * (lowest D-state number) supported in the S-state "x". diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h index 042c166f65d5..65cb103b21db 100644 --- a/include/linux/pci-acpi.h +++ b/include/linux/pci-acpi.h @@ -50,7 +50,6 @@ #ifdef CONFIG_ACPI extern acpi_status pci_osc_control_set(acpi_handle handle, u32 flags); -int pci_acpi_osc_support(acpi_handle handle, u32 flags); static inline acpi_handle acpi_find_root_bridge_handle(struct pci_dev *pdev) { /* Find root host bridge */ -- cgit v1.2.3 From 9f5404d8ea90bfa4d58a3936e5a3d0d28cecf60f Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Mon, 9 Feb 2009 16:00:04 +0900 Subject: PCI/ACPI: rename pci_osc_control_set() - Rename pci_osc_control_set() to acpi_pci_osc_control_set() according to the other API names in drivers/acpi/pci_root.c. - Move _OSC related definitions to include/linux/acpi.h because _OSC related API is implemented in drivers/acpi/pci_root.c now. Signed-off-by: Kenji Kaneshige Reviewed-by: Andrew Patterson Tested-by: Andrew Patterson Signed-off-by: Jesse Barnes --- drivers/acpi/pci_root.c | 6 ++--- drivers/pci/hotplug/acpi_pcihp.c | 5 ++--- drivers/pci/pcie/aer/aerdrv_acpi.c | 2 +- include/linux/acpi.h | 34 ++++++++++++++++++++++++++++ include/linux/pci-acpi.h | 45 -------------------------------------- 5 files changed, 40 insertions(+), 52 deletions(-) (limited to 'include/linux') diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c index 979eccc82c5b..196f97d00956 100644 --- a/drivers/acpi/pci_root.c +++ b/drivers/acpi/pci_root.c @@ -306,13 +306,13 @@ static struct acpi_pci_root *acpi_pci_find_root(acpi_handle handle) } /** - * pci_osc_control_set - commit requested control to Firmware + * acpi_pci_osc_control_set - commit requested control to Firmware * @handle: acpi_handle for the target ACPI object * @flags: driver's requested control bits * * Attempt to take control from Firmware on requested control bits. **/ -acpi_status pci_osc_control_set(acpi_handle handle, u32 flags) +acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 flags) { acpi_status status; u32 control_req, result, capbuf[3]; @@ -359,7 +359,7 @@ out: mutex_unlock(&osc_lock); return status; } -EXPORT_SYMBOL(pci_osc_control_set); +EXPORT_SYMBOL(acpi_pci_osc_control_set); static int __devinit acpi_pci_root_add(struct acpi_device *device) { diff --git a/drivers/pci/hotplug/acpi_pcihp.c b/drivers/pci/hotplug/acpi_pcihp.c index 1c1141801060..f47bc74be567 100644 --- a/drivers/pci/hotplug/acpi_pcihp.c +++ b/drivers/pci/hotplug/acpi_pcihp.c @@ -30,9 +30,8 @@ #include #include #include +#include #include -#include -#include #define MY_NAME "acpi_pcihp" @@ -408,7 +407,7 @@ int acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev, u32 flags) acpi_get_name(handle, ACPI_FULL_PATHNAME, &string); dbg("Trying to get hotplug control for %s\n", (char *)string.pointer); - status = pci_osc_control_set(handle, flags); + status = acpi_pci_osc_control_set(handle, flags); if (ACPI_SUCCESS(status)) goto got_one; kfree(string.pointer); diff --git a/drivers/pci/pcie/aer/aerdrv_acpi.c b/drivers/pci/pcie/aer/aerdrv_acpi.c index ebce26c37049..8edb2f300e8f 100644 --- a/drivers/pci/pcie/aer/aerdrv_acpi.c +++ b/drivers/pci/pcie/aer/aerdrv_acpi.c @@ -38,7 +38,7 @@ int aer_osc_setup(struct pcie_device *pciedev) handle = acpi_find_root_bridge_handle(pdev); if (handle) { - status = pci_osc_control_set(handle, + status = acpi_pci_osc_control_set(handle, OSC_PCI_EXPRESS_AER_CONTROL | OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL); } diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 6fce2fc2d124..2a3b189e3e26 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -256,6 +256,40 @@ void __init acpi_no_s4_hw_signature(void); void __init acpi_old_suspend_ordering(void); void __init acpi_s4_no_nvs(void); #endif /* CONFIG_PM_SLEEP */ + +#define OSC_QUERY_TYPE 0 +#define OSC_SUPPORT_TYPE 1 +#define OSC_CONTROL_TYPE 2 +#define OSC_SUPPORT_MASKS 0x1f + +/* _OSC DW0 Definition */ +#define OSC_QUERY_ENABLE 1 +#define OSC_REQUEST_ERROR 2 +#define OSC_INVALID_UUID_ERROR 4 +#define OSC_INVALID_REVISION_ERROR 8 +#define OSC_CAPABILITIES_MASK_ERROR 16 + +/* _OSC DW1 Definition (OS Support Fields) */ +#define OSC_EXT_PCI_CONFIG_SUPPORT 1 +#define OSC_ACTIVE_STATE_PWR_SUPPORT 2 +#define OSC_CLOCK_PWR_CAPABILITY_SUPPORT 4 +#define OSC_PCI_SEGMENT_GROUPS_SUPPORT 8 +#define OSC_MSI_SUPPORT 16 + +/* _OSC DW1 Definition (OS Control Fields) */ +#define OSC_PCI_EXPRESS_NATIVE_HP_CONTROL 1 +#define OSC_SHPC_NATIVE_HP_CONTROL 2 +#define OSC_PCI_EXPRESS_PME_CONTROL 4 +#define OSC_PCI_EXPRESS_AER_CONTROL 8 +#define OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL 16 + +#define OSC_CONTROL_MASKS (OSC_PCI_EXPRESS_NATIVE_HP_CONTROL | \ + OSC_SHPC_NATIVE_HP_CONTROL | \ + OSC_PCI_EXPRESS_PME_CONTROL | \ + OSC_PCI_EXPRESS_AER_CONTROL | \ + OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL) + +extern acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 flags); #else /* CONFIG_ACPI */ static inline int early_acpi_boot_init(void) diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h index 65cb103b21db..20480b9f10c8 100644 --- a/include/linux/pci-acpi.h +++ b/include/linux/pci-acpi.h @@ -10,46 +10,7 @@ #include -#define OSC_QUERY_TYPE 0 -#define OSC_SUPPORT_TYPE 1 -#define OSC_CONTROL_TYPE 2 -#define OSC_SUPPORT_MASKS 0x1f - -/* - * _OSC DW0 Definition - */ -#define OSC_QUERY_ENABLE 1 -#define OSC_REQUEST_ERROR 2 -#define OSC_INVALID_UUID_ERROR 4 -#define OSC_INVALID_REVISION_ERROR 8 -#define OSC_CAPABILITIES_MASK_ERROR 16 - -/* - * _OSC DW1 Definition (OS Support Fields) - */ -#define OSC_EXT_PCI_CONFIG_SUPPORT 1 -#define OSC_ACTIVE_STATE_PWR_SUPPORT 2 -#define OSC_CLOCK_PWR_CAPABILITY_SUPPORT 4 -#define OSC_PCI_SEGMENT_GROUPS_SUPPORT 8 -#define OSC_MSI_SUPPORT 16 - -/* - * _OSC DW1 Definition (OS Control Fields) - */ -#define OSC_PCI_EXPRESS_NATIVE_HP_CONTROL 1 -#define OSC_SHPC_NATIVE_HP_CONTROL 2 -#define OSC_PCI_EXPRESS_PME_CONTROL 4 -#define OSC_PCI_EXPRESS_AER_CONTROL 8 -#define OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL 16 - -#define OSC_CONTROL_MASKS (OSC_PCI_EXPRESS_NATIVE_HP_CONTROL | \ - OSC_SHPC_NATIVE_HP_CONTROL | \ - OSC_PCI_EXPRESS_PME_CONTROL | \ - OSC_PCI_EXPRESS_AER_CONTROL | \ - OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL) - #ifdef CONFIG_ACPI -extern acpi_status pci_osc_control_set(acpi_handle handle, u32 flags); static inline acpi_handle acpi_find_root_bridge_handle(struct pci_dev *pdev) { /* Find root host bridge */ @@ -69,12 +30,6 @@ static inline acpi_handle acpi_pci_get_bridge_handle(struct pci_bus *pbus) return acpi_get_pci_rootbridge_handle(seg, busnr); } #else -#if !defined(AE_ERROR) -typedef u32 acpi_status; -#define AE_ERROR (acpi_status) (0x0001) -#endif -static inline acpi_status pci_osc_control_set(acpi_handle handle, u32 flags) -{return AE_ERROR;} static inline acpi_handle acpi_find_root_bridge_handle(struct pci_dev *pdev) { return NULL; } #endif -- cgit v1.2.3 From c48f1670f42b71f39f4a3bfba01ffb691cc9206c Mon Sep 17 00:00:00 2001 From: "akpm@linux-foundation.org" Date: Tue, 3 Feb 2009 15:45:26 -0800 Subject: PCI: constify pci_bus_add_devices() drivers/pci/hotplug/fakephp.c:283: warning: passing argument 1 of 'pci_bus_add_devices' discards qualifiers from pointer target type Signed-off-by: Andrew Morton Signed-off-by: Jesse Barnes --- drivers/pci/bus.c | 2 +- include/linux/pci.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index 52b54f053be0..118c77778d29 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -133,7 +133,7 @@ int pci_bus_add_child(struct pci_bus *bus) * * Call hotplug for each new devices. */ -void pci_bus_add_devices(struct pci_bus *bus) +void pci_bus_add_devices(const struct pci_bus *bus) { struct pci_dev *dev; struct pci_bus *child; diff --git a/include/linux/pci.h b/include/linux/pci.h index b5d6d0e0f1cb..a1af2fe00639 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -528,7 +528,7 @@ void pcibios_update_irq(struct pci_dev *, int irq); /* Generic PCI functions used internally */ extern struct pci_bus *pci_find_bus(int domain, int busnr); -void pci_bus_add_devices(struct pci_bus *bus); +void pci_bus_add_devices(const struct pci_bus *bus); struct pci_bus *pci_scan_bus_parented(struct device *parent, int bus, struct pci_ops *ops, void *sysdata); static inline struct pci_bus * __devinit pci_scan_bus(int bus, struct pci_ops *ops, -- cgit v1.2.3 From ea7415512a07add2b09c070c9a5d1950833cf9b3 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 18 Feb 2009 10:44:29 -0800 Subject: PCI: constify pci_bus_assign_resources() drivers/pci/hotplug/fakephp.c: In function 'pci_rescan_bus': drivers/pci/hotplug/fakephp.c:271: warning: passing argument 1 of 'pci_bus_assign_resources' discards qualifiers from pointer target type Signed-off-by: Andrew Morton Signed-off-by: Jesse Barnes --- drivers/pci/setup-bus.c | 4 ++-- include/linux/pci.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index 704608945780..170a3eda9dd3 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -27,7 +27,7 @@ #include -static void pbus_assign_resources_sorted(struct pci_bus *bus) +static void pbus_assign_resources_sorted(const struct pci_bus *bus) { struct pci_dev *dev; struct resource *res; @@ -495,7 +495,7 @@ void __ref pci_bus_size_bridges(struct pci_bus *bus) } EXPORT_SYMBOL(pci_bus_size_bridges); -void __ref pci_bus_assign_resources(struct pci_bus *bus) +void __ref pci_bus_assign_resources(const struct pci_bus *bus) { struct pci_bus *b; struct pci_dev *dev; diff --git a/include/linux/pci.h b/include/linux/pci.h index a1af2fe00639..7baf2a5db12a 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -708,7 +708,7 @@ ssize_t pci_write_vpd(struct pci_dev *dev, loff_t pos, size_t count, const void int pci_vpd_truncate(struct pci_dev *dev, size_t size); /* Helper functions for low-level code (drivers/pci/setup-[bus,res].c) */ -void pci_bus_assign_resources(struct pci_bus *bus); +void pci_bus_assign_resources(const struct pci_bus *bus); void pci_bus_size_bridges(struct pci_bus *bus); int pci_claim_resource(struct pci_dev *, int); void pci_assign_unassigned_resources(void); -- cgit v1.2.3 From 3a3c244c9a355105bc193fde873c73727bf87192 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 15 Feb 2009 22:32:48 +0100 Subject: PCI: PCIe portdrv: Implement pm object Implement pm object for the PCI Express port driver in order to use the new power management framework and reduce the code size. Signed-off-by: Rafael J. Wysocki Signed-off-by: Jesse Barnes --- drivers/pci/hotplug/pciehp_core.c | 4 ++-- drivers/pci/pcie/aer/aerdrv.c | 6 ------ drivers/pci/pcie/portdrv.h | 4 ++-- drivers/pci/pcie/portdrv_core.c | 14 ++++++-------- drivers/pci/pcie/portdrv_pci.c | 31 +++++++++++++++---------------- include/linux/pcieport_if.h | 2 +- 6 files changed, 26 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c index 3d21bbba3308..fb254b2454de 100644 --- a/drivers/pci/hotplug/pciehp_core.c +++ b/drivers/pci/hotplug/pciehp_core.c @@ -475,7 +475,7 @@ static void pciehp_remove (struct pcie_device *dev) } #ifdef CONFIG_PM -static int pciehp_suspend (struct pcie_device *dev, pm_message_t state) +static int pciehp_suspend (struct pcie_device *dev) { dev_info(&dev->device, "%s ENTRY\n", __func__); return 0; @@ -503,7 +503,7 @@ static int pciehp_resume (struct pcie_device *dev) } return 0; } -#endif +#endif /* PM */ static struct pcie_port_service_driver hpdriver_portdrv = { .name = PCIE_MODULE_NAME, diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c index e11c03194063..32ade5af927e 100644 --- a/drivers/pci/pcie/aer/aerdrv.c +++ b/drivers/pci/pcie/aer/aerdrv.c @@ -40,9 +40,6 @@ MODULE_LICENSE("GPL"); static int __devinit aer_probe (struct pcie_device *dev); static void aer_remove(struct pcie_device *dev); -static int aer_suspend(struct pcie_device *dev, pm_message_t state) -{return 0;} -static int aer_resume(struct pcie_device *dev) {return 0;} static pci_ers_result_t aer_error_detected(struct pci_dev *dev, enum pci_channel_state error); static void aer_error_resume(struct pci_dev *dev); @@ -61,9 +58,6 @@ static struct pcie_port_service_driver aerdriver = { .probe = aer_probe, .remove = aer_remove, - .suspend = aer_suspend, - .resume = aer_resume, - .err_handler = &aer_error_handlers, .reset_link = aer_root_reset, diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h index 5b818bd835ef..17ad53868f9f 100644 --- a/drivers/pci/pcie/portdrv.h +++ b/drivers/pci/pcie/portdrv.h @@ -38,8 +38,8 @@ extern struct bus_type pcie_port_bus_type; extern int pcie_port_device_probe(struct pci_dev *dev); extern int pcie_port_device_register(struct pci_dev *dev); #ifdef CONFIG_PM -extern int pcie_port_device_suspend(struct pci_dev *dev, pm_message_t state); -extern int pcie_port_device_resume(struct pci_dev *dev); +extern int pcie_port_device_suspend(struct device *dev); +extern int pcie_port_device_resume(struct device *dev); #endif extern void pcie_port_device_remove(struct pci_dev *dev); extern int __must_check pcie_port_bus_register(void); diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c index 569af0015fce..5a5bfe7cdf5f 100644 --- a/drivers/pci/pcie/portdrv_core.c +++ b/drivers/pci/pcie/portdrv_core.c @@ -410,13 +410,12 @@ int pcie_port_device_register(struct pci_dev *dev) static int suspend_iter(struct device *dev, void *data) { struct pcie_port_service_driver *service_driver; - pm_message_t state = * (pm_message_t *) data; if ((dev->bus == &pcie_port_bus_type) && (dev->driver)) { service_driver = to_service_driver(dev->driver); if (service_driver->suspend) - service_driver->suspend(to_pcie_device(dev), state); + service_driver->suspend(to_pcie_device(dev)); } return 0; } @@ -424,11 +423,10 @@ static int suspend_iter(struct device *dev, void *data) /** * pcie_port_device_suspend - suspend port services associated with a PCIe port * @dev: PCI Express port to handle - * @state: Representation of system power management transition in progress */ -int pcie_port_device_suspend(struct pci_dev *dev, pm_message_t state) +int pcie_port_device_suspend(struct device *dev) { - return device_for_each_child(&dev->dev, &state, suspend_iter); + return device_for_each_child(dev, NULL, suspend_iter); } static int resume_iter(struct device *dev, void *data) @@ -448,11 +446,11 @@ static int resume_iter(struct device *dev, void *data) * pcie_port_device_suspend - resume port services associated with a PCIe port * @dev: PCI Express port to handle */ -int pcie_port_device_resume(struct pci_dev *dev) +int pcie_port_device_resume(struct device *dev) { - return device_for_each_child(&dev->dev, NULL, resume_iter); + return device_for_each_child(dev, NULL, resume_iter); } -#endif +#endif /* PM */ static int remove_iter(struct device *dev, void *data) { diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c index 94d0e2af9bad..a61f4930d676 100644 --- a/drivers/pci/pcie/portdrv_pci.c +++ b/drivers/pci/pcie/portdrv_pci.c @@ -44,21 +44,21 @@ static int pcie_portdrv_restore_config(struct pci_dev *dev) } #ifdef CONFIG_PM -static int pcie_portdrv_suspend(struct pci_dev *dev, pm_message_t state) -{ - return pcie_port_device_suspend(dev, state); +static struct dev_pm_ops pcie_portdrv_pm_ops = { + .suspend = pcie_port_device_suspend, + .resume = pcie_port_device_resume, + .freeze = pcie_port_device_suspend, + .thaw = pcie_port_device_resume, + .poweroff = pcie_port_device_suspend, + .restore = pcie_port_device_resume, +}; -} +#define PCIE_PORTDRV_PM_OPS (&pcie_portdrv_pm_ops) -static int pcie_portdrv_resume(struct pci_dev *dev) -{ - pci_set_master(dev); - return pcie_port_device_resume(dev); -} -#else -#define pcie_portdrv_suspend NULL -#define pcie_portdrv_resume NULL -#endif +#else /* !PM */ + +#define PCIE_PORTDRV_PM_OPS NULL +#endif /* !PM */ /* * pcie_portdrv_probe - Probe PCI-Express port devices @@ -268,10 +268,9 @@ static struct pci_driver pcie_portdriver = { .probe = pcie_portdrv_probe, .remove = pcie_portdrv_remove, - .suspend = pcie_portdrv_suspend, - .resume = pcie_portdrv_resume, - .err_handler = &pcie_portdrv_err_handler, + + .driver.pm = PCIE_PORTDRV_PM_OPS, }; static int __init pcie_portdrv_init(void) diff --git a/include/linux/pcieport_if.h b/include/linux/pcieport_if.h index 5d2afcfa6bc1..b4c79545330b 100644 --- a/include/linux/pcieport_if.h +++ b/include/linux/pcieport_if.h @@ -59,7 +59,7 @@ struct pcie_port_service_driver { const char *name; int (*probe) (struct pcie_device *dev); void (*remove) (struct pcie_device *dev); - int (*suspend) (struct pcie_device *dev, pm_message_t state); + int (*suspend) (struct pcie_device *dev); int (*resume) (struct pcie_device *dev); /* Service Error Recovery Handler */ -- cgit v1.2.3 From 0747aaf42d78d26684c6f6b34a4103ff81f571f8 Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Tue, 17 Feb 2009 14:11:56 +0900 Subject: PCI/ACPI: fix wrong assumption in acpi_pci_get_bridge_handle Current acpi_pci_get_bridge_handle() has an assumption that pci_bus->self is NULL on the root pci bus. But it might not true on some platforms. Because of this wrong assumption, current acpi_pci_get_bridge_handle() might return improper ACPI handle. We must check pci_bus->parent instead. This bug is the root cause of the following kernel panic reported by James Bottomley. This problem was introduced by the commit e8c331e963c58b83db24b7d0e39e8c07f687dbc6. The immediate cause was acpi_pci_get_bridge_handle() returned NULL unexpectedly and it was passed as the second argument of acpi_walk_namespace(). pci_hotplug: PCI Hot Plug PCI Core version: 0.5 acpiphp: ACPI Hot Plug PCI Controller Driver version: 0.5 BUG: unable to handle kernel NULL pointer dereference at 0000000000000010 IP: [] acpi_ns_get_next_node+0xb/0x3c PGD 0 Oops: 0000 [#1] SMP last sysfs file: CPU 0 Modules linked in: Pid: 1, comm: swapper Not tainted 2.6.28 #1 RIP: 0010:[] [] acpi_ns_get_next_node+0xb/0x3c RSP: 0018:ffff88007f87fd30 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffffffff8037d260 R09: ffff88007f87fdfc R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000001 R13: 0000000000000000 R14: 0000000000000001 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffffffff80742040(0000) knlGS:0000000000000000 CS: 0010 DS: 0018 ES: 0018 CR0: 000000008005003b CR2: 0000000000000010 CR3: 0000000000201000 CR4: 00000000000006a0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process swapper (pid: 1, threadinfo ffff88007f87e000, task ffff88007f875040) Stack: 0000000000000000 ffffffff803964f5 ffff88007f81b728 0000000000001001 ffff88007f87fdfc ffffffff8037d260 0000000600000001 0000000000000000 ffffffff8037d260 0000000000000000 0000000000000001 ffff88007f87fdfc Call Trace: [] acpi_ns_walk_namespace+0x55/0x138 [] is_pci_dock_device+0x0/0x20 [] is_pci_dock_device+0x0/0x20 [] acpi_walk_namespace+0x5f/0x83 [] detect_ejectable_slots+0x53/0x70 [] add_bridge+0xe8/0x200 [] acpi_walk_namespace+0x6b/0x83 [] acpi_pci_register_driver+0x48/0x61 [] acpiphp_init+0x0/0x58 [] acpiphp_glue_init+0x4c/0x5a [] acpiphp_init+0x37/0x58 [] _stext+0x3b/0x180 [] create_proc_entry+0x58/0xa0 [] register_irq_proc+0xc1/0xe0 [] kernel_init+0x152/0x1ac [] finish_task_switch+0x0/0x110 [] child_rip+0xa/0x20 [] restore_args+0x0/0x30 [] kernel_init+0x0/0x1ac [] child_rip+0x0/0x20 Code: 89 c2 48 8b 00 48 85 c0 75 f5 48 8b 45 00 48 89 02 44 88 65 09 48 89 5d 00 31 c0 5b 5d 41 5c c3 53 48 85 d2 89 fb 48 89 d7 75 06 <48> 8b 56 10 eb 08 e8 73 f1 ff ff 48 89 c2 85 db 74 1a eb 13 0f RIP [] acpi_ns_get_next_node+0xb/0x3c RSP CR2: 0000000000000010 ---[ end trace a7919e7f17c0a725 ]--- Signed-off-by: Kenji Kaneshige Signed-off-by: Jesse Barnes --- include/linux/pci-acpi.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h index 20480b9f10c8..3cee2367459f 100644 --- a/include/linux/pci-acpi.h +++ b/include/linux/pci-acpi.h @@ -23,11 +23,10 @@ static inline acpi_handle acpi_find_root_bridge_handle(struct pci_dev *pdev) static inline acpi_handle acpi_pci_get_bridge_handle(struct pci_bus *pbus) { - int seg = pci_domain_nr(pbus), busnr = pbus->number; - struct pci_dev *bridge = pbus->self; - if (bridge) - return DEVICE_ACPI_HANDLE(&(bridge->dev)); - return acpi_get_pci_rootbridge_handle(seg, busnr); + if (pbus->parent) + return DEVICE_ACPI_HANDLE(&(pbus->self->dev)); + return acpi_get_pci_rootbridge_handle(pci_domain_nr(pbus), + pbus->number); } #else static inline acpi_handle acpi_find_root_bridge_handle(struct pci_dev *pdev) -- cgit v1.2.3 From d18690af626b83fef1d1953b9f70e09497060586 Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Tue, 17 Feb 2009 14:12:36 +0900 Subject: PCI/ACPI: fix wrong assumption in acpi_find_root_bridge_handle Current acpi_find_root_bridge_handle() has a assumption that pci_bus->self is NULL on the root pci bus. But it might not be true on some platforms. Because of this wrong assumption, current acpi_find_root_bridge_handle() might cause endless loop. We must check pci_bus->parent instead. Signed-off-by: Kenji Kaneshige Signed-off-by: Jesse Barnes --- include/linux/pci-acpi.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h index 3cee2367459f..092e82e0048c 100644 --- a/include/linux/pci-acpi.h +++ b/include/linux/pci-acpi.h @@ -13,12 +13,12 @@ #ifdef CONFIG_ACPI static inline acpi_handle acpi_find_root_bridge_handle(struct pci_dev *pdev) { - /* Find root host bridge */ - while (pdev->bus->self) - pdev = pdev->bus->self; - - return acpi_get_pci_rootbridge_handle(pci_domain_nr(pdev->bus), - pdev->bus->number); + struct pci_bus *pbus = pdev->bus; + /* Find a PCI root bus */ + while (pbus->parent) + pbus = pbus->parent; + return acpi_get_pci_rootbridge_handle(pci_domain_nr(pbus), + pbus->number); } static inline acpi_handle acpi_pci_get_bridge_handle(struct pci_bus *pbus) -- cgit v1.2.3 From 998dd7c719f62dcfa91d7bf7f4eb9c160e03d817 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Wed, 25 Feb 2009 13:15:52 +0800 Subject: PCI: fix incorrect mask of PM No_Soft_Reset bit Reviewed-by: Matthew Wilcox Signed-off-by: Yu Zhao Signed-off-by: Jesse Barnes --- include/linux/pci_regs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h index 027815b4635e..b647a4df59fc 100644 --- a/include/linux/pci_regs.h +++ b/include/linux/pci_regs.h @@ -235,7 +235,7 @@ #define PCI_PM_CAP_PME_SHIFT 11 /* Start of the PME Mask in PMC */ #define PCI_PM_CTRL 4 /* PM control and status register */ #define PCI_PM_CTRL_STATE_MASK 0x0003 /* Current power state (D0 to D3) */ -#define PCI_PM_CTRL_NO_SOFT_RESET 0x0004 /* No reset for D3hot->D0 */ +#define PCI_PM_CTRL_NO_SOFT_RESET 0x0008 /* No reset for D3hot->D0 */ #define PCI_PM_CTRL_PME_ENABLE 0x0100 /* PME pin enable */ #define PCI_PM_CTRL_DATA_SEL_MASK 0x1e00 /* Data select (??) */ #define PCI_PM_CTRL_DATA_SCALE_MASK 0x6000 /* Data scale (??) */ -- cgit v1.2.3 From 24d27553390c69d11cdbd930d635193956fc295f Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Tue, 17 Mar 2009 08:54:06 -0400 Subject: PCI MSI: Replace 'type' with 'is_msix' By changing from a 5-bit field to a 1-bit field, we free up some bits that can be used by a later patch. Also rearrange the fields for better packing on 64-bit platforms (reducing the size of msi_desc from 72 bytes to 64 bytes). Signed-off-by: Matthew Wilcox Signed-off-by: Jesse Barnes --- drivers/pci/msi.c | 115 ++++++++++++++++++---------------------------------- include/linux/msi.h | 4 +- 2 files changed, 41 insertions(+), 78 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index dceea56f7342..b3db4388f974 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -111,20 +111,10 @@ static void msix_flush_writes(struct irq_desc *desc) entry = get_irq_desc_msi(desc); BUG_ON(!entry || !entry->dev); - switch (entry->msi_attrib.type) { - case PCI_CAP_ID_MSI: - /* nothing to do */ - break; - case PCI_CAP_ID_MSIX: - { + if (entry->msi_attrib.is_msix) { int offset = entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET; readl(entry->mask_base + offset); - break; - } - default: - BUG(); - break; } } @@ -143,32 +133,23 @@ static int msi_set_mask_bits(struct irq_desc *desc, u32 mask, u32 flag) entry = get_irq_desc_msi(desc); BUG_ON(!entry || !entry->dev); - switch (entry->msi_attrib.type) { - case PCI_CAP_ID_MSI: - if (entry->msi_attrib.maskbit) { - int pos; - u32 mask_bits; - - pos = (long)entry->mask_base; - pci_read_config_dword(entry->dev, pos, &mask_bits); - mask_bits &= ~(mask); - mask_bits |= flag & mask; - pci_write_config_dword(entry->dev, pos, mask_bits); - } else { - return 0; - } - break; - case PCI_CAP_ID_MSIX: - { + if (entry->msi_attrib.is_msix) { int offset = entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET; writel(flag, entry->mask_base + offset); readl(entry->mask_base + offset); - break; - } - default: - BUG(); - break; + } else { + int pos; + u32 mask_bits; + + if (!entry->msi_attrib.maskbit) + return 0; + + pos = (long)entry->mask_base; + pci_read_config_dword(entry->dev, pos, &mask_bits); + mask_bits &= ~mask; + mask_bits |= flag & mask; + pci_write_config_dword(entry->dev, pos, mask_bits); } entry->msi_attrib.masked = !!flag; return 1; @@ -177,9 +158,14 @@ static int msi_set_mask_bits(struct irq_desc *desc, u32 mask, u32 flag) void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg) { struct msi_desc *entry = get_irq_desc_msi(desc); - switch(entry->msi_attrib.type) { - case PCI_CAP_ID_MSI: - { + if (entry->msi_attrib.is_msix) { + void __iomem *base = entry->mask_base + + entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE; + + msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET); + msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET); + msg->data = readl(base + PCI_MSIX_ENTRY_DATA_OFFSET); + } else { struct pci_dev *dev = entry->dev; int pos = entry->msi_attrib.pos; u16 data; @@ -195,21 +181,6 @@ void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg) pci_read_config_word(dev, msi_data_reg(pos, 0), &data); } msg->data = data; - break; - } - case PCI_CAP_ID_MSIX: - { - void __iomem *base; - base = entry->mask_base + - entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE; - - msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET); - msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET); - msg->data = readl(base + PCI_MSIX_ENTRY_DATA_OFFSET); - break; - } - default: - BUG(); } } @@ -223,9 +194,17 @@ void read_msi_msg(unsigned int irq, struct msi_msg *msg) void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg) { struct msi_desc *entry = get_irq_desc_msi(desc); - switch (entry->msi_attrib.type) { - case PCI_CAP_ID_MSI: - { + if (entry->msi_attrib.is_msix) { + void __iomem *base; + base = entry->mask_base + + entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE; + + writel(msg->address_lo, + base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET); + writel(msg->address_hi, + base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET); + writel(msg->data, base + PCI_MSIX_ENTRY_DATA_OFFSET); + } else { struct pci_dev *dev = entry->dev; int pos = entry->msi_attrib.pos; @@ -240,23 +219,6 @@ void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg) pci_write_config_word(dev, msi_data_reg(pos, 0), msg->data); } - break; - } - case PCI_CAP_ID_MSIX: - { - void __iomem *base; - base = entry->mask_base + - entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE; - - writel(msg->address_lo, - base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET); - writel(msg->address_hi, - base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET); - writel(msg->data, base + PCI_MSIX_ENTRY_DATA_OFFSET); - break; - } - default: - BUG(); } entry->msg = *msg; } @@ -393,7 +355,7 @@ static int msi_capability_init(struct pci_dev *dev) if (!entry) return -ENOMEM; - entry->msi_attrib.type = PCI_CAP_ID_MSI; + entry->msi_attrib.is_msix = 0; entry->msi_attrib.is_64 = is_64bit_address(control); entry->msi_attrib.entry_nr = 0; entry->msi_attrib.maskbit = is_mask_bit_support(control); @@ -475,7 +437,7 @@ static int msix_capability_init(struct pci_dev *dev, break; j = entries[i].entry; - entry->msi_attrib.type = PCI_CAP_ID_MSIX; + entry->msi_attrib.is_msix = 1; entry->msi_attrib.is_64 = 1; entry->msi_attrib.entry_nr = j; entry->msi_attrib.maskbit = 1; @@ -619,12 +581,13 @@ void pci_msi_shutdown(struct pci_dev* dev) struct irq_desc *desc = irq_to_desc(dev->irq); msi_set_mask_bits(desc, mask, ~mask); } - if (!entry->dev || entry->msi_attrib.type != PCI_CAP_ID_MSI) + if (!entry->dev || entry->msi_attrib.is_msix) return; /* Restore dev->irq to its default pin-assertion irq */ dev->irq = entry->msi_attrib.default_irq; } + void pci_disable_msi(struct pci_dev* dev) { struct msi_desc *entry; @@ -635,7 +598,7 @@ void pci_disable_msi(struct pci_dev* dev) pci_msi_shutdown(dev); entry = list_entry(dev->msi_list.next, struct msi_desc, list); - if (!entry->dev || entry->msi_attrib.type != PCI_CAP_ID_MSI) + if (!entry->dev || entry->msi_attrib.is_msix) return; msi_free_irqs(dev); @@ -654,7 +617,7 @@ static int msi_free_irqs(struct pci_dev* dev) arch_teardown_msi_irqs(dev); list_for_each_entry_safe(entry, tmp, &dev->msi_list, list) { - if (entry->msi_attrib.type == PCI_CAP_ID_MSIX) { + if (entry->msi_attrib.is_msix) { writel(1, entry->mask_base + entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET); diff --git a/include/linux/msi.h b/include/linux/msi.h index d2b8a1e8ca11..9c5ce214fbf4 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -20,13 +20,13 @@ extern void write_msi_msg(unsigned int irq, struct msi_msg *msg); struct msi_desc { struct { - __u8 type : 5; /* {0: unused, 5h:MSI, 11h:MSI-X} */ + __u8 is_msix : 1; __u8 maskbit : 1; /* mask-pending bit supported ? */ __u8 masked : 1; __u8 is_64 : 1; /* Address size: 0=32bit 1=64bit */ __u8 pos; /* Location of the msi capability */ - __u32 maskbits_mask; /* mask bits mask */ __u16 entry_nr; /* specific enabled entry */ + __u32 maskbits_mask; /* mask bits mask */ unsigned default_irq; /* default pre-assigned irq */ }msi_attrib; -- cgit v1.2.3 From 264d9caaa1c574c0274b019a810abfe957391005 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Tue, 17 Mar 2009 08:54:08 -0400 Subject: PCI MSI: Use mask_pos instead of mask_base when appropriate MSI interrupts have a mask_pos where MSI-X have a mask_base. Use a transparent union to get rid of some ugly casts. Signed-off-by: Matthew Wilcox Signed-off-by: Jesse Barnes --- drivers/pci/msi.c | 5 ++--- include/linux/msi.h | 5 ++++- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index a658c0f34e16..fcde04df6dfe 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -145,7 +145,7 @@ static int msi_set_mask_bits(struct irq_desc *desc, u32 mask, u32 flag) if (!entry->msi_attrib.maskbit) return 0; - pos = (long)entry->mask_base; + pos = entry->mask_pos; pci_read_config_dword(entry->dev, pos, &mask_bits); mask_bits &= ~mask; mask_bits |= flag & mask; @@ -363,8 +363,7 @@ static int msi_capability_init(struct pci_dev *dev) unsigned int base, maskbits, temp; base = msi_mask_bits_reg(pos, entry->msi_attrib.is_64); - entry->mask_base = (void __iomem *)(long)base; - + entry->mask_pos = base; /* All MSIs are unmasked by default, Mask them all */ pci_read_config_dword(dev, base, &maskbits); temp = msi_mask((control & PCI_MSI_FLAGS_QMASK) >> 1); diff --git a/include/linux/msi.h b/include/linux/msi.h index 9c5ce214fbf4..5025ca4d91e4 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -33,7 +33,10 @@ struct msi_desc { unsigned int irq; struct list_head list; - void __iomem *mask_base; + union { + void __iomem *mask_base; + u8 mask_pos; + }; struct pci_dev *dev; /* Last set MSI message */ -- cgit v1.2.3 From f2440d9acbe866b917b16cc0f927366341ce9215 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Tue, 17 Mar 2009 08:54:09 -0400 Subject: PCI MSI: Refactor interrupt masking code Since most of the callers already know whether they have an MSI or an MSI-X capability, split msi_set_mask_bits() into msi_mask_irq() and msix_mask_irq(). The only callers which don't (mask_msi_irq() and unmask_msi_irq()) can share code in msi_set_mask_bit(). This then becomes the only caller of msix_flush_writes(), so we can inline it. The flushing read can be to any address that belongs to the device, so we can eliminate the calculation too. We can also get rid of maskbits_mask from struct msi_desc and simply recalculate it on the rare occasion that we need it. The single-bit 'masked' element is replaced by a copy of the 32-bit 'masked' register, so this patch does not affect the size of msi_desc. Signed-off-by: Matthew Wilcox Signed-off-by: Jesse Barnes --- drivers/pci/msi.c | 155 +++++++++++++++++++++++++--------------------------- include/linux/msi.h | 5 +- 2 files changed, 77 insertions(+), 83 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index fcde04df6dfe..adcc78242571 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -105,17 +105,14 @@ static inline __attribute_const__ u32 msi_mask(unsigned x) return (1 << (1 << x)) - 1; } -static void msix_flush_writes(struct irq_desc *desc) +static inline __attribute_const__ u32 msi_capable_mask(u16 control) { - struct msi_desc *entry; + return msi_mask((control >> 1) & 7); +} - entry = get_irq_desc_msi(desc); - BUG_ON(!entry); - if (entry->msi_attrib.is_msix) { - int offset = entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE + - PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET; - readl(entry->mask_base + offset); - } +static inline __attribute_const__ u32 msi_enabled_mask(u16 control) +{ + return msi_mask((control >> 4) & 7); } /* @@ -127,32 +124,57 @@ static void msix_flush_writes(struct irq_desc *desc) * Returns 1 if it succeeded in masking the interrupt and 0 if the device * doesn't support MSI masking. */ -static int msi_set_mask_bits(struct irq_desc *desc, u32 mask, u32 flag) +static void msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag) { - struct msi_desc *entry; + u32 mask_bits = desc->masked; - entry = get_irq_desc_msi(desc); - BUG_ON(!entry); - if (entry->msi_attrib.is_msix) { - int offset = entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE + - PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET; - writel(flag, entry->mask_base + offset); - readl(entry->mask_base + offset); - } else { - int pos; - u32 mask_bits; + if (!desc->msi_attrib.maskbit) + return; + + mask_bits &= ~mask; + mask_bits |= flag; + pci_write_config_dword(desc->dev, desc->mask_pos, mask_bits); + desc->masked = mask_bits; +} + +/* + * This internal function does not flush PCI writes to the device. + * All users must ensure that they read from the device before either + * assuming that the device state is up to date, or returning out of this + * file. This saves a few milliseconds when initialising devices with lots + * of MSI-X interrupts. + */ +static void msix_mask_irq(struct msi_desc *desc, u32 flag) +{ + u32 mask_bits = desc->masked; + unsigned offset = desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE + + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET; + mask_bits &= ~1; + mask_bits |= flag; + writel(mask_bits, desc->mask_base + offset); + desc->masked = mask_bits; +} - if (!entry->msi_attrib.maskbit) - return 0; +static void msi_set_mask_bit(unsigned irq, u32 flag) +{ + struct msi_desc *desc = get_irq_msi(irq); - pos = entry->mask_pos; - pci_read_config_dword(entry->dev, pos, &mask_bits); - mask_bits &= ~mask; - mask_bits |= flag & mask; - pci_write_config_dword(entry->dev, pos, mask_bits); + if (desc->msi_attrib.is_msix) { + msix_mask_irq(desc, flag); + readl(desc->mask_base); /* Flush write to device */ + } else { + msi_mask_irq(desc, 1, flag); } - entry->msi_attrib.masked = !!flag; - return 1; +} + +void mask_msi_irq(unsigned int irq) +{ + msi_set_mask_bit(irq, 1); +} + +void unmask_msi_irq(unsigned int irq) +{ + msi_set_mask_bit(irq, 0); } void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg) @@ -230,22 +252,6 @@ void write_msi_msg(unsigned int irq, struct msi_msg *msg) write_msi_msg_desc(desc, msg); } -void mask_msi_irq(unsigned int irq) -{ - struct irq_desc *desc = irq_to_desc(irq); - - msi_set_mask_bits(desc, 1, 1); - msix_flush_writes(desc); -} - -void unmask_msi_irq(unsigned int irq) -{ - struct irq_desc *desc = irq_to_desc(irq); - - msi_set_mask_bits(desc, 1, 0); - msix_flush_writes(desc); -} - static int msi_free_irqs(struct pci_dev* dev); static struct msi_desc *alloc_msi_entry(struct pci_dev *dev) @@ -281,13 +287,9 @@ static void __pci_restore_msi_state(struct pci_dev *dev) pci_intx_for_msi(dev, 0); msi_set_enable(dev, 0); write_msi_msg(dev->irq, &entry->msg); - if (entry->msi_attrib.maskbit) { - struct irq_desc *desc = irq_to_desc(dev->irq); - msi_set_mask_bits(desc, entry->msi_attrib.maskbits_mask, - entry->msi_attrib.masked); - } pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &control); + msi_mask_irq(entry, msi_capable_mask(control), entry->masked); control &= ~PCI_MSI_FLAGS_QSIZE; control |= PCI_MSI_FLAGS_ENABLE; pci_write_config_word(dev, pos + PCI_MSI_FLAGS, control); @@ -307,9 +309,8 @@ static void __pci_restore_msix_state(struct pci_dev *dev) msix_set_enable(dev, 0); list_for_each_entry(entry, &dev->msi_list, list) { - struct irq_desc *desc = irq_to_desc(entry->irq); write_msi_msg(entry->irq, &entry->msg); - msi_set_mask_bits(desc, 1, entry->msi_attrib.masked); + msix_mask_irq(entry, entry->masked); } BUG_ON(list_empty(&dev->msi_list)); @@ -342,6 +343,7 @@ static int msi_capability_init(struct pci_dev *dev) struct msi_desc *entry; int pos, ret; u16 control; + unsigned mask; msi_set_enable(dev, 0); /* Ensure msi is disabled as I set it up */ @@ -356,21 +358,16 @@ static int msi_capability_init(struct pci_dev *dev) entry->msi_attrib.is_64 = is_64bit_address(control); entry->msi_attrib.entry_nr = 0; entry->msi_attrib.maskbit = is_mask_bit_support(control); - entry->msi_attrib.masked = 1; entry->msi_attrib.default_irq = dev->irq; /* Save IOAPIC IRQ */ entry->msi_attrib.pos = pos; - if (entry->msi_attrib.maskbit) { - unsigned int base, maskbits, temp; - - base = msi_mask_bits_reg(pos, entry->msi_attrib.is_64); - entry->mask_pos = base; - /* All MSIs are unmasked by default, Mask them all */ - pci_read_config_dword(dev, base, &maskbits); - temp = msi_mask((control & PCI_MSI_FLAGS_QMASK) >> 1); - maskbits |= temp; - pci_write_config_dword(dev, base, maskbits); - entry->msi_attrib.maskbits_mask = temp; - } + + entry->mask_pos = msi_mask_bits_reg(pos, entry->msi_attrib.is_64); + /* All MSIs are unmasked by default, Mask them all */ + if (entry->msi_attrib.maskbit) + pci_read_config_dword(dev, entry->mask_pos, &entry->masked); + mask = msi_capable_mask(control); + msi_mask_irq(entry, mask, mask); + list_add_tail(&entry->list, &dev->msi_list); /* Configure MSI capability structure */ @@ -435,11 +432,12 @@ static int msix_capability_init(struct pci_dev *dev, entry->msi_attrib.is_msix = 1; entry->msi_attrib.is_64 = 1; entry->msi_attrib.entry_nr = j; - entry->msi_attrib.maskbit = 1; - entry->msi_attrib.masked = 1; entry->msi_attrib.default_irq = dev->irq; entry->msi_attrib.pos = pos; entry->mask_base = base; + entry->masked = readl(base + j * PCI_MSIX_ENTRY_SIZE + + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET); + msix_mask_irq(entry, 1); list_add_tail(&entry->list, &dev->msi_list); } @@ -556,9 +554,11 @@ int pci_enable_msi(struct pci_dev* dev) } EXPORT_SYMBOL(pci_enable_msi); -void pci_msi_shutdown(struct pci_dev* dev) +void pci_msi_shutdown(struct pci_dev *dev) { - struct msi_desc *entry; + struct msi_desc *desc; + u32 mask; + u16 ctrl; if (!pci_msi_enable || !dev || !dev->msi_enabled) return; @@ -568,18 +568,13 @@ void pci_msi_shutdown(struct pci_dev* dev) dev->msi_enabled = 0; BUG_ON(list_empty(&dev->msi_list)); - entry = list_entry(dev->msi_list.next, struct msi_desc, list); - /* Return the the pci reset with msi irqs unmasked */ - if (entry->msi_attrib.maskbit) { - u32 mask = entry->msi_attrib.maskbits_mask; - struct irq_desc *desc = irq_to_desc(dev->irq); - msi_set_mask_bits(desc, mask, ~mask); - } - if (entry->msi_attrib.is_msix) - return; + desc = list_first_entry(&dev->msi_list, struct msi_desc, list); + pci_read_config_word(dev, desc->msi_attrib.pos + PCI_MSI_FLAGS, &ctrl); + mask = msi_capable_mask(ctrl); + msi_mask_irq(desc, mask, ~mask); /* Restore dev->irq to its default pin-assertion irq */ - dev->irq = entry->msi_attrib.default_irq; + dev->irq = desc->msi_attrib.default_irq; } void pci_disable_msi(struct pci_dev* dev) diff --git a/include/linux/msi.h b/include/linux/msi.h index 5025ca4d91e4..37c1bbe546e5 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -22,14 +22,13 @@ struct msi_desc { struct { __u8 is_msix : 1; __u8 maskbit : 1; /* mask-pending bit supported ? */ - __u8 masked : 1; __u8 is_64 : 1; /* Address size: 0=32bit 1=64bit */ __u8 pos; /* Location of the msi capability */ __u16 entry_nr; /* specific enabled entry */ - __u32 maskbits_mask; /* mask bits mask */ unsigned default_irq; /* default pre-assigned irq */ - }msi_attrib; + } msi_attrib; + u32 masked; /* mask bits */ unsigned int irq; struct list_head list; -- cgit v1.2.3 From 1c8d7b0a562da06d3ebe83f01b1ed553205d1ae4 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Tue, 17 Mar 2009 08:54:10 -0400 Subject: PCI MSI: Add support for multiple MSI Add the new API pci_enable_msi_block() to allow drivers to request multiple MSI and reimplement pci_enable_msi in terms of pci_enable_msi_block. Ensure that the architecture back ends don't have to know about multiple MSI. Signed-off-by: Matthew Wilcox Signed-off-by: Jesse Barnes --- Documentation/PCI/MSI-HOWTO.txt | 45 +++++++++++++++++--- arch/powerpc/kernel/msi.c | 4 ++ arch/x86/kernel/io_apic.c | 4 ++ drivers/pci/msi.c | 91 +++++++++++++++++++++++++++++------------ drivers/pci/msi.h | 6 --- include/linux/msi.h | 1 + include/linux/pci.h | 6 ++- 7 files changed, 116 insertions(+), 41 deletions(-) (limited to 'include/linux') diff --git a/Documentation/PCI/MSI-HOWTO.txt b/Documentation/PCI/MSI-HOWTO.txt index 1c02431f1d1a..9494f6dc38eb 100644 --- a/Documentation/PCI/MSI-HOWTO.txt +++ b/Documentation/PCI/MSI-HOWTO.txt @@ -94,15 +94,48 @@ This function should be called before the driver calls request_irq() since enabling MSIs disables the pin-based IRQ and the driver will not receive interrupts on the old interrupt. -4.2.2 pci_disable_msi +4.2.2 pci_enable_msi_block + +int pci_enable_msi_block(struct pci_dev *dev, int count) + +This variation on the above call allows a device driver to request multiple +MSIs. The MSI specification only allows interrupts to be allocated in +powers of two, up to a maximum of 2^5 (32). + +If this function returns 0, it has succeeded in allocating at least as many +interrupts as the driver requested (it may have allocated more in order +to satisfy the power-of-two requirement). In this case, the function +enables MSI on this device and updates dev->irq to be the lowest of +the new interrupts assigned to it. The other interrupts assigned to +the device are in the range dev->irq to dev->irq + count - 1. + +If this function returns a negative number, it indicates an error and +the driver should not attempt to request any more MSI interrupts for +this device. If this function returns a positive number, it will be +less than 'count' and indicate the number of interrupts that could have +been allocated. In neither case will the irq value have been +updated, nor will the device have been switched into MSI mode. + +The device driver must decide what action to take if +pci_enable_msi_block() returns a value less than the number asked for. +Some devices can make use of fewer interrupts than the maximum they +request; in this case the driver should call pci_enable_msi_block() +again. Note that it is not guaranteed to succeed, even when the +'count' has been reduced to the value returned from a previous call to +pci_enable_msi_block(). This is because there are multiple constraints +on the number of vectors that can be allocated; pci_enable_msi_block() +will return as soon as it finds any constraint that doesn't allow the +call to succeed. + +4.2.3 pci_disable_msi void pci_disable_msi(struct pci_dev *dev) -This function should be used to undo the effect of pci_enable_msi(). -Calling it restores dev->irq to the pin-based interrupt number and frees -the previously allocated message signaled interrupt(s). The interrupt -may subsequently be assigned to another device, so drivers should not -cache the value of dev->irq. +This function should be used to undo the effect of pci_enable_msi() or +pci_enable_msi_block(). Calling it restores dev->irq to the pin-based +interrupt number and frees the previously allocated message signaled +interrupt(s). The interrupt may subsequently be assigned to another +device, so drivers should not cache the value of dev->irq. A device driver must always call free_irq() on the interrupt(s) for which it has called request_irq() before calling this function. diff --git a/arch/powerpc/kernel/msi.c b/arch/powerpc/kernel/msi.c index 3bb7d3dd28be..0c16e2a854e5 100644 --- a/arch/powerpc/kernel/msi.c +++ b/arch/powerpc/kernel/msi.c @@ -19,6 +19,10 @@ int arch_msi_check_device(struct pci_dev* dev, int nvec, int type) return -ENOSYS; } + /* PowerPC doesn't support multiple MSI yet */ + if (type == PCI_CAP_ID_MSI && nvec > 1) + return 1; + if (ppc_md.msi_check_device) { pr_debug("msi: Using platform check routine.\n"); return ppc_md.msi_check_device(dev, nvec, type); diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index bc7ac4da90d7..a09549a6321b 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -3510,6 +3510,10 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) int index = 0; #endif + /* x86 doesn't support multiple MSI yet */ + if (type == PCI_CAP_ID_MSI && nvec > 1) + return 1; + irq_want = nr_irqs_gsi; sub_handle = 0; list_for_each_entry(msidesc, &dev->msi_list, list) { diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index adcc78242571..6f2e6295e773 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -40,6 +40,13 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) struct msi_desc *entry; int ret; + /* + * If an architecture wants to support multiple MSI, it needs to + * override arch_setup_msi_irqs() + */ + if (type == PCI_CAP_ID_MSI && nvec > 1) + return 1; + list_for_each_entry(entry, &dev->msi_list, list) { ret = arch_setup_msi_irq(dev, entry); if (ret < 0) @@ -58,8 +65,12 @@ void arch_teardown_msi_irqs(struct pci_dev *dev) struct msi_desc *entry; list_for_each_entry(entry, &dev->msi_list, list) { - if (entry->irq != 0) - arch_teardown_msi_irq(entry->irq); + int i, nvec; + if (entry->irq == 0) + continue; + nvec = 1 << entry->msi_attrib.multiple; + for (i = 0; i < nvec; i++) + arch_teardown_msi_irq(entry->irq + i); } } #endif @@ -163,7 +174,8 @@ static void msi_set_mask_bit(unsigned irq, u32 flag) msix_mask_irq(desc, flag); readl(desc->mask_base); /* Flush write to device */ } else { - msi_mask_irq(desc, 1, flag); + unsigned offset = irq - desc->dev->irq; + msi_mask_irq(desc, 1 << offset, flag << offset); } } @@ -229,6 +241,12 @@ void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg) } else { struct pci_dev *dev = entry->dev; int pos = entry->msi_attrib.pos; + u16 msgctl; + + pci_read_config_word(dev, msi_control_reg(pos), &msgctl); + msgctl &= ~PCI_MSI_FLAGS_QSIZE; + msgctl |= entry->msi_attrib.multiple << 4; + pci_write_config_word(dev, msi_control_reg(pos), msgctl); pci_write_config_dword(dev, msi_lower_address_reg(pos), msg->address_lo); @@ -291,7 +309,7 @@ static void __pci_restore_msi_state(struct pci_dev *dev) pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &control); msi_mask_irq(entry, msi_capable_mask(control), entry->masked); control &= ~PCI_MSI_FLAGS_QSIZE; - control |= PCI_MSI_FLAGS_ENABLE; + control |= (entry->msi_attrib.multiple << 4) | PCI_MSI_FLAGS_ENABLE; pci_write_config_word(dev, pos + PCI_MSI_FLAGS, control); } @@ -332,13 +350,15 @@ EXPORT_SYMBOL_GPL(pci_restore_msi_state); /** * msi_capability_init - configure device's MSI capability structure * @dev: pointer to the pci_dev data structure of MSI device function + * @nvec: number of interrupts to allocate * - * Setup the MSI capability structure of device function with a single - * MSI irq, regardless of device function is capable of handling - * multiple messages. A return of zero indicates the successful setup - * of an entry zero with the new MSI irq or non-zero for otherwise. - **/ -static int msi_capability_init(struct pci_dev *dev) + * Setup the MSI capability structure of the device with the requested + * number of interrupts. A return value of zero indicates the successful + * setup of an entry with the new MSI irq. A negative return value indicates + * an error, and a positive return value indicates the number of interrupts + * which could have been allocated. + */ +static int msi_capability_init(struct pci_dev *dev, int nvec) { struct msi_desc *entry; int pos, ret; @@ -371,7 +391,7 @@ static int msi_capability_init(struct pci_dev *dev) list_add_tail(&entry->list, &dev->msi_list); /* Configure MSI capability structure */ - ret = arch_setup_msi_irqs(dev, 1, PCI_CAP_ID_MSI); + ret = arch_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSI); if (ret) { msi_free_irqs(dev); return ret; @@ -524,35 +544,48 @@ static int pci_msi_check_device(struct pci_dev* dev, int nvec, int type) } /** - * pci_enable_msi - configure device's MSI capability structure - * @dev: pointer to the pci_dev data structure of MSI device function + * pci_enable_msi_block - configure device's MSI capability structure + * @dev: device to configure + * @nvec: number of interrupts to configure * - * Setup the MSI capability structure of device function with - * a single MSI irq upon its software driver call to request for - * MSI mode enabled on its hardware device function. A return of zero - * indicates the successful setup of an entry zero with the new MSI - * irq or non-zero for otherwise. - **/ -int pci_enable_msi(struct pci_dev* dev) + * Allocate IRQs for a device with the MSI capability. + * This function returns a negative errno if an error occurs. If it + * is unable to allocate the number of interrupts requested, it returns + * the number of interrupts it might be able to allocate. If it successfully + * allocates at least the number of interrupts requested, it returns 0 and + * updates the @dev's irq member to the lowest new interrupt number; the + * other interrupt numbers allocated to this device are consecutive. + */ +int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec) { - int status; + int status, pos, maxvec; + u16 msgctl; + + pos = pci_find_capability(dev, PCI_CAP_ID_MSI); + if (!pos) + return -EINVAL; + pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl); + maxvec = 1 << ((msgctl & PCI_MSI_FLAGS_QMASK) >> 1); + if (nvec > maxvec) + return maxvec; - status = pci_msi_check_device(dev, 1, PCI_CAP_ID_MSI); + status = pci_msi_check_device(dev, nvec, PCI_CAP_ID_MSI); if (status) return status; WARN_ON(!!dev->msi_enabled); - /* Check whether driver already requested for MSI-X irqs */ + /* Check whether driver already requested MSI-X irqs */ if (dev->msix_enabled) { dev_info(&dev->dev, "can't enable MSI " "(MSI-X already enabled)\n"); return -EINVAL; } - status = msi_capability_init(dev); + + status = msi_capability_init(dev, nvec); return status; } -EXPORT_SYMBOL(pci_enable_msi); +EXPORT_SYMBOL(pci_enable_msi_block); void pci_msi_shutdown(struct pci_dev *dev) { @@ -599,8 +632,12 @@ static int msi_free_irqs(struct pci_dev* dev) struct msi_desc *entry, *tmp; list_for_each_entry(entry, &dev->msi_list, list) { - if (entry->irq) - BUG_ON(irq_has_action(entry->irq)); + int i, nvec; + if (!entry->irq) + continue; + nvec = 1 << entry->msi_attrib.multiple; + for (i = 0; i < nvec; i++) + BUG_ON(irq_has_action(entry->irq + i)); } arch_teardown_msi_irqs(dev); diff --git a/drivers/pci/msi.h b/drivers/pci/msi.h index 3898f5237144..71f4df2ef654 100644 --- a/drivers/pci/msi.h +++ b/drivers/pci/msi.h @@ -20,14 +20,8 @@ #define msi_mask_bits_reg(base, is64bit) \ ( (is64bit == 1) ? base+PCI_MSI_MASK_BIT : base+PCI_MSI_MASK_BIT-4) #define msi_disable(control) control &= ~PCI_MSI_FLAGS_ENABLE -#define multi_msi_capable(control) \ - (1 << ((control & PCI_MSI_FLAGS_QMASK) >> 1)) -#define multi_msi_enable(control, num) \ - control |= (((num >> 1) << 4) & PCI_MSI_FLAGS_QSIZE); #define is_64bit_address(control) (!!(control & PCI_MSI_FLAGS_64BIT)) #define is_mask_bit_support(control) (!!(control & PCI_MSI_FLAGS_MASKBIT)) -#define msi_enable(control, num) multi_msi_enable(control, num); \ - control |= PCI_MSI_FLAGS_ENABLE #define msix_table_offset_reg(base) (base + 0x04) #define msix_pba_offset_reg(base) (base + 0x08) diff --git a/include/linux/msi.h b/include/linux/msi.h index 37c1bbe546e5..6991ab5b24d1 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -21,6 +21,7 @@ extern void write_msi_msg(unsigned int irq, struct msi_msg *msg); struct msi_desc { struct { __u8 is_msix : 1; + __u8 multiple: 3; /* log2 number of messages */ __u8 maskbit : 1; /* mask-pending bit supported ? */ __u8 is_64 : 1; /* Address size: 0=32bit 1=64bit */ __u8 pos; /* Location of the msi capability */ diff --git a/include/linux/pci.h b/include/linux/pci.h index 7baf2a5db12a..1f6c5ddaae36 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -789,7 +789,7 @@ struct msix_entry { #ifndef CONFIG_PCI_MSI -static inline int pci_enable_msi(struct pci_dev *dev) +static inline int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec) { return -1; } @@ -824,7 +824,7 @@ static inline int pci_msi_enabled(void) return 0; } #else -extern int pci_enable_msi(struct pci_dev *dev); +extern int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec); extern void pci_msi_shutdown(struct pci_dev *dev); extern void pci_disable_msi(struct pci_dev *dev); extern int pci_msix_table_size(struct pci_dev *dev); @@ -846,6 +846,8 @@ static inline int pcie_aspm_enabled(void) extern int pcie_aspm_enabled(void); #endif +#define pci_enable_msi(pdev) pci_enable_msi_block(pdev, 1) + #ifdef CONFIG_HT_IRQ /* The functions a driver should call */ int ht_create_irq(struct pci_dev *dev, int idx); -- cgit v1.2.3 From 8293b0f629095efbe7c7e3f9b437f8c040c19eb5 Mon Sep 17 00:00:00 2001 From: David O'Shea Date: Mon, 2 Mar 2009 09:51:13 +0100 Subject: PCI: Compaq Evo D510 SMBus quirk using USB instead of VGA On the Compaq Evo D510 SFF/CMT, a PCI quirk activated the SMBus device based on detection of the on-board VGA controller, but the on-board VGA is disabled if an AGP card is inserted, so look for one of the USB controllers instead. Signed-off-by: David O'Shea Signed-off-by: Jean Delvare Signed-off-by: Jesse Barnes --- drivers/pci/quirks.c | 9 +++++++-- include/linux/pci_ids.h | 1 + 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 50233818a763..7ddcfc65e790 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -1186,10 +1186,15 @@ static void __init asus_hides_smbus_hostbridge(struct pci_dev *dev) * its on-board VGA controller */ asus_hides_smbus = 1; } - else if (dev->device == PCI_DEVICE_ID_INTEL_82845G_IG) + else if (dev->device == PCI_DEVICE_ID_INTEL_82801DB_2) switch(dev->subsystem_device) { case 0x00b8: /* Compaq Evo D510 CMT */ case 0x00b9: /* Compaq Evo D510 SFF */ + /* Motherboard doesn't have Host bridge + * subvendor/subdevice IDs and on-board VGA + * controller is disabled if an AGP card is + * inserted, therefore checking USB UHCI + * Controller #1 */ asus_hides_smbus = 1; } else if (dev->device == PCI_DEVICE_ID_INTEL_82815_CGC) @@ -1214,7 +1219,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82855GM_HB, as DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82915GM_HB, asus_hides_smbus_hostbridge); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82810_IG3, asus_hides_smbus_hostbridge); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82845G_IG, asus_hides_smbus_hostbridge); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_2, asus_hides_smbus_hostbridge); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82815_CGC, asus_hides_smbus_hostbridge); static void asus_hides_smbus_lpc(struct pci_dev *dev) diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index aca8c458aa8a..3ddf8beabdf8 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2373,6 +2373,7 @@ #define PCI_DEVICE_ID_INTEL_82801CA_12 0x248c #define PCI_DEVICE_ID_INTEL_82801DB_0 0x24c0 #define PCI_DEVICE_ID_INTEL_82801DB_1 0x24c1 +#define PCI_DEVICE_ID_INTEL_82801DB_2 0x24c2 #define PCI_DEVICE_ID_INTEL_82801DB_3 0x24c3 #define PCI_DEVICE_ID_INTEL_82801DB_5 0x24c5 #define PCI_DEVICE_ID_INTEL_82801DB_6 0x24c6 -- cgit v1.2.3 From d1b054da8f599905f3c18a218961dcf17f9d5f13 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Fri, 20 Mar 2009 11:25:11 +0800 Subject: PCI: initialize and release SR-IOV capability If a device has the SR-IOV capability, initialize it (set the ARI Capable Hierarchy in the lowest numbered PF if necessary; calculate the System Page Size for the VF MMIO, probe the VF Offset, Stride and BARs). A lock for the VF bus allocation is also initialized if a PF is the lowest numbered PF. Reviewed-by: Matthew Wilcox Signed-off-by: Yu Zhao Signed-off-by: Jesse Barnes --- drivers/pci/Kconfig | 10 +++ drivers/pci/Makefile | 2 + drivers/pci/iov.c | 182 +++++++++++++++++++++++++++++++++++++++++++++++ drivers/pci/pci.c | 7 ++ drivers/pci/pci.h | 37 ++++++++++ drivers/pci/probe.c | 4 ++ include/linux/pci.h | 11 +++ include/linux/pci_regs.h | 33 +++++++++ 8 files changed, 286 insertions(+) create mode 100644 drivers/pci/iov.c (limited to 'include/linux') diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig index 2a4501dd2515..fdc864f9cf23 100644 --- a/drivers/pci/Kconfig +++ b/drivers/pci/Kconfig @@ -59,3 +59,13 @@ config HT_IRQ This allows native hypertransport devices to use interrupts. If unsure say Y. + +config PCI_IOV + bool "PCI IOV support" + depends on PCI + help + I/O Virtualization is a PCI feature supported by some devices + which allows them to create virtual devices which share their + physical resources. + + If unsure, say N. diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile index 3d07ce24f6a8..ba6af162fd39 100644 --- a/drivers/pci/Makefile +++ b/drivers/pci/Makefile @@ -29,6 +29,8 @@ obj-$(CONFIG_DMAR) += dmar.o iova.o intel-iommu.o obj-$(CONFIG_INTR_REMAP) += dmar.o intr_remapping.o +obj-$(CONFIG_PCI_IOV) += iov.o + # # Some architectures use the generic PCI setup functions # diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c new file mode 100644 index 000000000000..66cc414ed15f --- /dev/null +++ b/drivers/pci/iov.c @@ -0,0 +1,182 @@ +/* + * drivers/pci/iov.c + * + * Copyright (C) 2009 Intel Corporation, Yu Zhao + * + * PCI Express I/O Virtualization (IOV) support. + * Single Root IOV 1.0 + */ + +#include +#include +#include +#include +#include "pci.h" + + +static int sriov_init(struct pci_dev *dev, int pos) +{ + int i; + int rc; + int nres; + u32 pgsz; + u16 ctrl, total, offset, stride; + struct pci_sriov *iov; + struct resource *res; + struct pci_dev *pdev; + + if (dev->pcie_type != PCI_EXP_TYPE_RC_END && + dev->pcie_type != PCI_EXP_TYPE_ENDPOINT) + return -ENODEV; + + pci_read_config_word(dev, pos + PCI_SRIOV_CTRL, &ctrl); + if (ctrl & PCI_SRIOV_CTRL_VFE) { + pci_write_config_word(dev, pos + PCI_SRIOV_CTRL, 0); + ssleep(1); + } + + pci_read_config_word(dev, pos + PCI_SRIOV_TOTAL_VF, &total); + if (!total) + return 0; + + ctrl = 0; + list_for_each_entry(pdev, &dev->bus->devices, bus_list) + if (pdev->is_physfn) + goto found; + + pdev = NULL; + if (pci_ari_enabled(dev->bus)) + ctrl |= PCI_SRIOV_CTRL_ARI; + +found: + pci_write_config_word(dev, pos + PCI_SRIOV_CTRL, ctrl); + pci_write_config_word(dev, pos + PCI_SRIOV_NUM_VF, total); + pci_read_config_word(dev, pos + PCI_SRIOV_VF_OFFSET, &offset); + pci_read_config_word(dev, pos + PCI_SRIOV_VF_STRIDE, &stride); + if (!offset || (total > 1 && !stride)) + return -EIO; + + pci_read_config_dword(dev, pos + PCI_SRIOV_SUP_PGSIZE, &pgsz); + i = PAGE_SHIFT > 12 ? PAGE_SHIFT - 12 : 0; + pgsz &= ~((1 << i) - 1); + if (!pgsz) + return -EIO; + + pgsz &= ~(pgsz - 1); + pci_write_config_dword(dev, pos + PCI_SRIOV_SYS_PGSIZE, pgsz); + + nres = 0; + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { + res = dev->resource + PCI_IOV_RESOURCES + i; + i += __pci_read_base(dev, pci_bar_unknown, res, + pos + PCI_SRIOV_BAR + i * 4); + if (!res->flags) + continue; + if (resource_size(res) & (PAGE_SIZE - 1)) { + rc = -EIO; + goto failed; + } + res->end = res->start + resource_size(res) * total - 1; + nres++; + } + + iov = kzalloc(sizeof(*iov), GFP_KERNEL); + if (!iov) { + rc = -ENOMEM; + goto failed; + } + + iov->pos = pos; + iov->nres = nres; + iov->ctrl = ctrl; + iov->total = total; + iov->offset = offset; + iov->stride = stride; + iov->pgsz = pgsz; + iov->self = dev; + pci_read_config_dword(dev, pos + PCI_SRIOV_CAP, &iov->cap); + pci_read_config_byte(dev, pos + PCI_SRIOV_FUNC_LINK, &iov->link); + + if (pdev) + iov->dev = pci_dev_get(pdev); + else { + iov->dev = dev; + mutex_init(&iov->lock); + } + + dev->sriov = iov; + dev->is_physfn = 1; + + return 0; + +failed: + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { + res = dev->resource + PCI_IOV_RESOURCES + i; + res->flags = 0; + } + + return rc; +} + +static void sriov_release(struct pci_dev *dev) +{ + if (dev == dev->sriov->dev) + mutex_destroy(&dev->sriov->lock); + else + pci_dev_put(dev->sriov->dev); + + kfree(dev->sriov); + dev->sriov = NULL; +} + +/** + * pci_iov_init - initialize the IOV capability + * @dev: the PCI device + * + * Returns 0 on success, or negative on failure. + */ +int pci_iov_init(struct pci_dev *dev) +{ + int pos; + + if (!dev->is_pcie) + return -ENODEV; + + pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_SRIOV); + if (pos) + return sriov_init(dev, pos); + + return -ENODEV; +} + +/** + * pci_iov_release - release resources used by the IOV capability + * @dev: the PCI device + */ +void pci_iov_release(struct pci_dev *dev) +{ + if (dev->is_physfn) + sriov_release(dev); +} + +/** + * pci_iov_resource_bar - get position of the SR-IOV BAR + * @dev: the PCI device + * @resno: the resource number + * @type: the BAR type to be filled in + * + * Returns position of the BAR encapsulated in the SR-IOV capability. + */ +int pci_iov_resource_bar(struct pci_dev *dev, int resno, + enum pci_bar_type *type) +{ + if (resno < PCI_IOV_RESOURCES || resno > PCI_IOV_RESOURCE_END) + return 0; + + BUG_ON(!dev->is_physfn); + + *type = pci_bar_unknown; + + return dev->sriov->pos + PCI_SRIOV_BAR + + 4 * (resno - PCI_IOV_RESOURCES); +} diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index a35a8b2ba631..2b3201ec2b05 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -2360,12 +2360,19 @@ int pci_select_bars(struct pci_dev *dev, unsigned long flags) */ int pci_resource_bar(struct pci_dev *dev, int resno, enum pci_bar_type *type) { + int reg; + if (resno < PCI_ROM_RESOURCE) { *type = pci_bar_unknown; return PCI_BASE_ADDRESS_0 + 4 * resno; } else if (resno == PCI_ROM_RESOURCE) { *type = pci_bar_mem32; return dev->rom_base_reg; + } else if (resno < PCI_BRIDGE_RESOURCES) { + /* device specific resource */ + reg = pci_iov_resource_bar(dev, resno, type); + if (reg) + return reg; } dev_err(&dev->dev, "BAR: invalid resource #%d\n", resno); diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 2cd1cba7236f..7d5327c986f5 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -201,4 +201,41 @@ resource_size_t pci_specified_resource_alignment(struct pci_dev *dev); extern void pci_disable_bridge_window(struct pci_dev *dev); #endif +/* Single Root I/O Virtualization */ +struct pci_sriov { + int pos; /* capability position */ + int nres; /* number of resources */ + u32 cap; /* SR-IOV Capabilities */ + u16 ctrl; /* SR-IOV Control */ + u16 total; /* total VFs associated with the PF */ + u16 offset; /* first VF Routing ID offset */ + u16 stride; /* following VF stride */ + u32 pgsz; /* page size for BAR alignment */ + u8 link; /* Function Dependency Link */ + struct pci_dev *dev; /* lowest numbered PF */ + struct pci_dev *self; /* this PF */ + struct mutex lock; /* lock for VF bus */ +}; + +#ifdef CONFIG_PCI_IOV +extern int pci_iov_init(struct pci_dev *dev); +extern void pci_iov_release(struct pci_dev *dev); +extern int pci_iov_resource_bar(struct pci_dev *dev, int resno, + enum pci_bar_type *type); +#else +static inline int pci_iov_init(struct pci_dev *dev) +{ + return -ENODEV; +} +static inline void pci_iov_release(struct pci_dev *dev) + +{ +} +static inline int pci_iov_resource_bar(struct pci_dev *dev, int resno, + enum pci_bar_type *type) +{ + return 0; +} +#endif /* CONFIG_PCI_IOV */ + #endif /* DRIVERS_PCI_H */ diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 579a56c8181f..0471f6ea1466 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -785,6 +785,7 @@ static int pci_setup_device(struct pci_dev * dev) static void pci_release_capabilities(struct pci_dev *dev) { pci_vpd_release(dev); + pci_iov_release(dev); } /** @@ -979,6 +980,9 @@ static void pci_init_capabilities(struct pci_dev *dev) /* Alternative Routing-ID Forwarding */ pci_enable_ari(dev); + + /* Single Root I/O Virtualization */ + pci_iov_init(dev); } void pci_device_add(struct pci_dev *dev, struct pci_bus *bus) diff --git a/include/linux/pci.h b/include/linux/pci.h index 1f6c5ddaae36..8ce2f2d9ab63 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -93,6 +93,12 @@ enum { /* #6: expansion ROM resource */ PCI_ROM_RESOURCE, + /* device specific resources */ +#ifdef CONFIG_PCI_IOV + PCI_IOV_RESOURCES, + PCI_IOV_RESOURCE_END = PCI_IOV_RESOURCES + PCI_SRIOV_NUM_BARS - 1, +#endif + /* resources assigned to buses behind the bridge */ #define PCI_BRIDGE_RESOURCE_NUM 4 @@ -180,6 +186,7 @@ struct pci_cap_saved_state { struct pcie_link_state; struct pci_vpd; +struct pci_sriov; /* * The pci_dev structure is used to describe PCI devices. @@ -257,6 +264,7 @@ struct pci_dev { unsigned int is_managed:1; unsigned int is_pcie:1; unsigned int state_saved:1; + unsigned int is_physfn:1; pci_dev_flags_t dev_flags; atomic_t enable_cnt; /* pci_enable_device has been called */ @@ -270,6 +278,9 @@ struct pci_dev { struct list_head msi_list; #endif struct pci_vpd *vpd; +#ifdef CONFIG_PCI_IOV + struct pci_sriov *sriov; /* SR-IOV capability related */ +#endif }; extern struct pci_dev *alloc_pci_dev(void); diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h index b647a4df59fc..d4e663877f45 100644 --- a/include/linux/pci_regs.h +++ b/include/linux/pci_regs.h @@ -375,6 +375,7 @@ #define PCI_EXP_TYPE_UPSTREAM 0x5 /* Upstream Port */ #define PCI_EXP_TYPE_DOWNSTREAM 0x6 /* Downstream Port */ #define PCI_EXP_TYPE_PCI_BRIDGE 0x7 /* PCI/PCI-X Bridge */ +#define PCI_EXP_TYPE_RC_END 0x9 /* Root Complex Integrated Endpoint */ #define PCI_EXP_FLAGS_SLOT 0x0100 /* Slot implemented */ #define PCI_EXP_FLAGS_IRQ 0x3e00 /* Interrupt message number */ #define PCI_EXP_DEVCAP 4 /* Device capabilities */ @@ -498,6 +499,7 @@ #define PCI_EXT_CAP_ID_DSN 3 #define PCI_EXT_CAP_ID_PWR 4 #define PCI_EXT_CAP_ID_ARI 14 +#define PCI_EXT_CAP_ID_SRIOV 16 /* Advanced Error Reporting */ #define PCI_ERR_UNCOR_STATUS 4 /* Uncorrectable Error Status */ @@ -615,4 +617,35 @@ #define PCI_ARI_CTRL_ACS 0x0002 /* ACS Function Groups Enable */ #define PCI_ARI_CTRL_FG(x) (((x) >> 4) & 7) /* Function Group */ +/* Single Root I/O Virtualization */ +#define PCI_SRIOV_CAP 0x04 /* SR-IOV Capabilities */ +#define PCI_SRIOV_CAP_VFM 0x01 /* VF Migration Capable */ +#define PCI_SRIOV_CAP_INTR(x) ((x) >> 21) /* Interrupt Message Number */ +#define PCI_SRIOV_CTRL 0x08 /* SR-IOV Control */ +#define PCI_SRIOV_CTRL_VFE 0x01 /* VF Enable */ +#define PCI_SRIOV_CTRL_VFM 0x02 /* VF Migration Enable */ +#define PCI_SRIOV_CTRL_INTR 0x04 /* VF Migration Interrupt Enable */ +#define PCI_SRIOV_CTRL_MSE 0x08 /* VF Memory Space Enable */ +#define PCI_SRIOV_CTRL_ARI 0x10 /* ARI Capable Hierarchy */ +#define PCI_SRIOV_STATUS 0x0a /* SR-IOV Status */ +#define PCI_SRIOV_STATUS_VFM 0x01 /* VF Migration Status */ +#define PCI_SRIOV_INITIAL_VF 0x0c /* Initial VFs */ +#define PCI_SRIOV_TOTAL_VF 0x0e /* Total VFs */ +#define PCI_SRIOV_NUM_VF 0x10 /* Number of VFs */ +#define PCI_SRIOV_FUNC_LINK 0x12 /* Function Dependency Link */ +#define PCI_SRIOV_VF_OFFSET 0x14 /* First VF Offset */ +#define PCI_SRIOV_VF_STRIDE 0x16 /* Following VF Stride */ +#define PCI_SRIOV_VF_DID 0x1a /* VF Device ID */ +#define PCI_SRIOV_SUP_PGSIZE 0x1c /* Supported Page Sizes */ +#define PCI_SRIOV_SYS_PGSIZE 0x20 /* System Page Size */ +#define PCI_SRIOV_BAR 0x24 /* VF BAR0 */ +#define PCI_SRIOV_NUM_BARS 6 /* Number of VF BARs */ +#define PCI_SRIOV_VFM 0x3c /* VF Migration State Array Offset*/ +#define PCI_SRIOV_VFM_BIR(x) ((x) & 7) /* State BIR */ +#define PCI_SRIOV_VFM_OFFSET(x) ((x) & ~7) /* State Offset */ +#define PCI_SRIOV_VFM_UA 0x0 /* Inactive.Unavailable */ +#define PCI_SRIOV_VFM_MI 0x1 /* Dormant.MigrateIn */ +#define PCI_SRIOV_VFM_MO 0x2 /* Active.MigrateOut */ +#define PCI_SRIOV_VFM_AV 0x3 /* Active.Available */ + #endif /* LINUX_PCI_REGS_H */ -- cgit v1.2.3 From dd7cc44d0bcec5e9c42fe52e88dc254ae62eac8d Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Fri, 20 Mar 2009 11:25:15 +0800 Subject: PCI: add SR-IOV API for Physical Function driver Add or remove the Virtual Function when the SR-IOV is enabled or disabled by the device driver. This can happen anytime rather than only at the device probe stage. Reviewed-by: Matthew Wilcox Signed-off-by: Yu Zhao Signed-off-by: Jesse Barnes --- drivers/pci/iov.c | 314 ++++++++++++++++++++++++++++++++++++++++++++++++++++ drivers/pci/pci.h | 2 + include/linux/pci.h | 19 +++- 3 files changed, 334 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index 5ddfc09a8d3f..d0ff8ad8f7ba 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -13,6 +13,7 @@ #include #include "pci.h" +#define VIRTFN_ID_LEN 16 static inline u8 virtfn_bus(struct pci_dev *dev, int id) { @@ -26,6 +27,284 @@ static inline u8 virtfn_devfn(struct pci_dev *dev, int id) dev->sriov->stride * id) & 0xff; } +static struct pci_bus *virtfn_add_bus(struct pci_bus *bus, int busnr) +{ + int rc; + struct pci_bus *child; + + if (bus->number == busnr) + return bus; + + child = pci_find_bus(pci_domain_nr(bus), busnr); + if (child) + return child; + + child = pci_add_new_bus(bus, NULL, busnr); + if (!child) + return NULL; + + child->subordinate = busnr; + child->dev.parent = bus->bridge; + rc = pci_bus_add_child(child); + if (rc) { + pci_remove_bus(child); + return NULL; + } + + return child; +} + +static void virtfn_remove_bus(struct pci_bus *bus, int busnr) +{ + struct pci_bus *child; + + if (bus->number == busnr) + return; + + child = pci_find_bus(pci_domain_nr(bus), busnr); + BUG_ON(!child); + + if (list_empty(&child->devices)) + pci_remove_bus(child); +} + +static int virtfn_add(struct pci_dev *dev, int id, int reset) +{ + int i; + int rc; + u64 size; + char buf[VIRTFN_ID_LEN]; + struct pci_dev *virtfn; + struct resource *res; + struct pci_sriov *iov = dev->sriov; + + virtfn = alloc_pci_dev(); + if (!virtfn) + return -ENOMEM; + + mutex_lock(&iov->dev->sriov->lock); + virtfn->bus = virtfn_add_bus(dev->bus, virtfn_bus(dev, id)); + if (!virtfn->bus) { + kfree(virtfn); + mutex_unlock(&iov->dev->sriov->lock); + return -ENOMEM; + } + virtfn->devfn = virtfn_devfn(dev, id); + virtfn->vendor = dev->vendor; + pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_DID, &virtfn->device); + pci_setup_device(virtfn); + virtfn->dev.parent = dev->dev.parent; + + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { + res = dev->resource + PCI_IOV_RESOURCES + i; + if (!res->parent) + continue; + virtfn->resource[i].name = pci_name(virtfn); + virtfn->resource[i].flags = res->flags; + size = resource_size(res); + do_div(size, iov->total); + virtfn->resource[i].start = res->start + size * id; + virtfn->resource[i].end = virtfn->resource[i].start + size - 1; + rc = request_resource(res, &virtfn->resource[i]); + BUG_ON(rc); + } + + if (reset) + pci_execute_reset_function(virtfn); + + pci_device_add(virtfn, virtfn->bus); + mutex_unlock(&iov->dev->sriov->lock); + + virtfn->physfn = pci_dev_get(dev); + virtfn->is_virtfn = 1; + + rc = pci_bus_add_device(virtfn); + if (rc) + goto failed1; + sprintf(buf, "virtfn%u", id); + rc = sysfs_create_link(&dev->dev.kobj, &virtfn->dev.kobj, buf); + if (rc) + goto failed1; + rc = sysfs_create_link(&virtfn->dev.kobj, &dev->dev.kobj, "physfn"); + if (rc) + goto failed2; + + kobject_uevent(&virtfn->dev.kobj, KOBJ_CHANGE); + + return 0; + +failed2: + sysfs_remove_link(&dev->dev.kobj, buf); +failed1: + pci_dev_put(dev); + mutex_lock(&iov->dev->sriov->lock); + pci_remove_bus_device(virtfn); + virtfn_remove_bus(dev->bus, virtfn_bus(dev, id)); + mutex_unlock(&iov->dev->sriov->lock); + + return rc; +} + +static void virtfn_remove(struct pci_dev *dev, int id, int reset) +{ + char buf[VIRTFN_ID_LEN]; + struct pci_bus *bus; + struct pci_dev *virtfn; + struct pci_sriov *iov = dev->sriov; + + bus = pci_find_bus(pci_domain_nr(dev->bus), virtfn_bus(dev, id)); + if (!bus) + return; + + virtfn = pci_get_slot(bus, virtfn_devfn(dev, id)); + if (!virtfn) + return; + + pci_dev_put(virtfn); + + if (reset) { + device_release_driver(&virtfn->dev); + pci_execute_reset_function(virtfn); + } + + sprintf(buf, "virtfn%u", id); + sysfs_remove_link(&dev->dev.kobj, buf); + sysfs_remove_link(&virtfn->dev.kobj, "physfn"); + + mutex_lock(&iov->dev->sriov->lock); + pci_remove_bus_device(virtfn); + virtfn_remove_bus(dev->bus, virtfn_bus(dev, id)); + mutex_unlock(&iov->dev->sriov->lock); + + pci_dev_put(dev); +} + +static int sriov_enable(struct pci_dev *dev, int nr_virtfn) +{ + int rc; + int i, j; + int nres; + u16 offset, stride, initial; + struct resource *res; + struct pci_dev *pdev; + struct pci_sriov *iov = dev->sriov; + + if (!nr_virtfn) + return 0; + + if (iov->nr_virtfn) + return -EINVAL; + + pci_read_config_word(dev, iov->pos + PCI_SRIOV_INITIAL_VF, &initial); + if (initial > iov->total || + (!(iov->cap & PCI_SRIOV_CAP_VFM) && (initial != iov->total))) + return -EIO; + + if (nr_virtfn < 0 || nr_virtfn > iov->total || + (!(iov->cap & PCI_SRIOV_CAP_VFM) && (nr_virtfn > initial))) + return -EINVAL; + + pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn); + pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_OFFSET, &offset); + pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_STRIDE, &stride); + if (!offset || (nr_virtfn > 1 && !stride)) + return -EIO; + + nres = 0; + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { + res = dev->resource + PCI_IOV_RESOURCES + i; + if (res->parent) + nres++; + } + if (nres != iov->nres) { + dev_err(&dev->dev, "not enough MMIO resources for SR-IOV\n"); + return -ENOMEM; + } + + iov->offset = offset; + iov->stride = stride; + + if (virtfn_bus(dev, nr_virtfn - 1) > dev->bus->subordinate) { + dev_err(&dev->dev, "SR-IOV: bus number out of range\n"); + return -ENOMEM; + } + + if (iov->link != dev->devfn) { + pdev = pci_get_slot(dev->bus, iov->link); + if (!pdev) + return -ENODEV; + + pci_dev_put(pdev); + + if (!pdev->is_physfn) + return -ENODEV; + + rc = sysfs_create_link(&dev->dev.kobj, + &pdev->dev.kobj, "dep_link"); + if (rc) + return rc; + } + + iov->ctrl |= PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE; + pci_block_user_cfg_access(dev); + pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); + msleep(100); + pci_unblock_user_cfg_access(dev); + + iov->initial = initial; + if (nr_virtfn < initial) + initial = nr_virtfn; + + for (i = 0; i < initial; i++) { + rc = virtfn_add(dev, i, 0); + if (rc) + goto failed; + } + + kobject_uevent(&dev->dev.kobj, KOBJ_CHANGE); + iov->nr_virtfn = nr_virtfn; + + return 0; + +failed: + for (j = 0; j < i; j++) + virtfn_remove(dev, j, 0); + + iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE); + pci_block_user_cfg_access(dev); + pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); + ssleep(1); + pci_unblock_user_cfg_access(dev); + + if (iov->link != dev->devfn) + sysfs_remove_link(&dev->dev.kobj, "dep_link"); + + return rc; +} + +static void sriov_disable(struct pci_dev *dev) +{ + int i; + struct pci_sriov *iov = dev->sriov; + + if (!iov->nr_virtfn) + return; + + for (i = 0; i < iov->nr_virtfn; i++) + virtfn_remove(dev, i, 0); + + iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE); + pci_block_user_cfg_access(dev); + pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); + ssleep(1); + pci_unblock_user_cfg_access(dev); + + if (iov->link != dev->devfn) + sysfs_remove_link(&dev->dev.kobj, "dep_link"); + + iov->nr_virtfn = 0; +} + static int sriov_init(struct pci_dev *dev, int pos) { int i; @@ -132,6 +411,8 @@ failed: static void sriov_release(struct pci_dev *dev) { + BUG_ON(dev->sriov->nr_virtfn); + if (dev == dev->sriov->dev) mutex_destroy(&dev->sriov->lock); else @@ -155,6 +436,7 @@ static void sriov_restore_state(struct pci_dev *dev) pci_update_resource(dev, i); pci_write_config_dword(dev, iov->pos + PCI_SRIOV_SYS_PGSIZE, iov->pgsz); + pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, iov->nr_virtfn); pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); if (iov->ctrl & PCI_SRIOV_CTRL_VFE) msleep(100); @@ -245,3 +527,35 @@ int pci_iov_bus_range(struct pci_bus *bus) return max ? max - bus->number : 0; } + +/** + * pci_enable_sriov - enable the SR-IOV capability + * @dev: the PCI device + * + * Returns 0 on success, or negative on failure. + */ +int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn) +{ + might_sleep(); + + if (!dev->is_physfn) + return -ENODEV; + + return sriov_enable(dev, nr_virtfn); +} +EXPORT_SYMBOL_GPL(pci_enable_sriov); + +/** + * pci_disable_sriov - disable the SR-IOV capability + * @dev: the PCI device + */ +void pci_disable_sriov(struct pci_dev *dev) +{ + might_sleep(); + + if (!dev->is_physfn) + return; + + sriov_disable(dev); +} +EXPORT_SYMBOL_GPL(pci_disable_sriov); diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index f4fc10fc5872..0f1c7d103509 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -209,6 +209,8 @@ struct pci_sriov { u32 cap; /* SR-IOV Capabilities */ u16 ctrl; /* SR-IOV Control */ u16 total; /* total VFs associated with the PF */ + u16 initial; /* initial VFs associated with the PF */ + u16 nr_virtfn; /* number of VFs available */ u16 offset; /* first VF Routing ID offset */ u16 stride; /* following VF stride */ u32 pgsz; /* page size for BAR alignment */ diff --git a/include/linux/pci.h b/include/linux/pci.h index 8ce2f2d9ab63..c2e491e04063 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -265,6 +265,7 @@ struct pci_dev { unsigned int is_pcie:1; unsigned int state_saved:1; unsigned int is_physfn:1; + unsigned int is_virtfn:1; pci_dev_flags_t dev_flags; atomic_t enable_cnt; /* pci_enable_device has been called */ @@ -279,7 +280,10 @@ struct pci_dev { #endif struct pci_vpd *vpd; #ifdef CONFIG_PCI_IOV - struct pci_sriov *sriov; /* SR-IOV capability related */ + union { + struct pci_sriov *sriov; /* SR-IOV capability related */ + struct pci_dev *physfn; /* the PF this VF is associated with */ + }; #endif }; @@ -1212,5 +1216,18 @@ int pci_ext_cfg_avail(struct pci_dev *dev); void __iomem *pci_ioremap_bar(struct pci_dev *pdev, int bar); +#ifdef CONFIG_PCI_IOV +extern int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn); +extern void pci_disable_sriov(struct pci_dev *dev); +#else +static inline int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn) +{ + return -ENODEV; +} +static inline void pci_disable_sriov(struct pci_dev *dev) +{ +} +#endif + #endif /* __KERNEL__ */ #endif /* LINUX_PCI_H */ -- cgit v1.2.3 From 74bb1bcc7dbbc9ddef773bf3395d7ff92aaaad2e Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Fri, 20 Mar 2009 11:25:16 +0800 Subject: PCI: handle SR-IOV Virtual Function Migration Add or remove a Virtual Function after receiving a Migrate In or Out Request. Reviewed-by: Matthew Wilcox Signed-off-by: Yu Zhao Signed-off-by: Jesse Barnes --- drivers/pci/iov.c | 119 ++++++++++++++++++++++++++++++++++++++++++++++++++++ drivers/pci/pci.h | 4 ++ include/linux/pci.h | 6 +++ 3 files changed, 129 insertions(+) (limited to 'include/linux') diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index d0ff8ad8f7ba..7227efc760db 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -179,6 +179,97 @@ static void virtfn_remove(struct pci_dev *dev, int id, int reset) pci_dev_put(dev); } +static int sriov_migration(struct pci_dev *dev) +{ + u16 status; + struct pci_sriov *iov = dev->sriov; + + if (!iov->nr_virtfn) + return 0; + + if (!(iov->cap & PCI_SRIOV_CAP_VFM)) + return 0; + + pci_read_config_word(dev, iov->pos + PCI_SRIOV_STATUS, &status); + if (!(status & PCI_SRIOV_STATUS_VFM)) + return 0; + + schedule_work(&iov->mtask); + + return 1; +} + +static void sriov_migration_task(struct work_struct *work) +{ + int i; + u8 state; + u16 status; + struct pci_sriov *iov = container_of(work, struct pci_sriov, mtask); + + for (i = iov->initial; i < iov->nr_virtfn; i++) { + state = readb(iov->mstate + i); + if (state == PCI_SRIOV_VFM_MI) { + writeb(PCI_SRIOV_VFM_AV, iov->mstate + i); + state = readb(iov->mstate + i); + if (state == PCI_SRIOV_VFM_AV) + virtfn_add(iov->self, i, 1); + } else if (state == PCI_SRIOV_VFM_MO) { + virtfn_remove(iov->self, i, 1); + writeb(PCI_SRIOV_VFM_UA, iov->mstate + i); + state = readb(iov->mstate + i); + if (state == PCI_SRIOV_VFM_AV) + virtfn_add(iov->self, i, 0); + } + } + + pci_read_config_word(iov->self, iov->pos + PCI_SRIOV_STATUS, &status); + status &= ~PCI_SRIOV_STATUS_VFM; + pci_write_config_word(iov->self, iov->pos + PCI_SRIOV_STATUS, status); +} + +static int sriov_enable_migration(struct pci_dev *dev, int nr_virtfn) +{ + int bir; + u32 table; + resource_size_t pa; + struct pci_sriov *iov = dev->sriov; + + if (nr_virtfn <= iov->initial) + return 0; + + pci_read_config_dword(dev, iov->pos + PCI_SRIOV_VFM, &table); + bir = PCI_SRIOV_VFM_BIR(table); + if (bir > PCI_STD_RESOURCE_END) + return -EIO; + + table = PCI_SRIOV_VFM_OFFSET(table); + if (table + nr_virtfn > pci_resource_len(dev, bir)) + return -EIO; + + pa = pci_resource_start(dev, bir) + table; + iov->mstate = ioremap(pa, nr_virtfn); + if (!iov->mstate) + return -ENOMEM; + + INIT_WORK(&iov->mtask, sriov_migration_task); + + iov->ctrl |= PCI_SRIOV_CTRL_VFM | PCI_SRIOV_CTRL_INTR; + pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); + + return 0; +} + +static void sriov_disable_migration(struct pci_dev *dev) +{ + struct pci_sriov *iov = dev->sriov; + + iov->ctrl &= ~(PCI_SRIOV_CTRL_VFM | PCI_SRIOV_CTRL_INTR); + pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); + + cancel_work_sync(&iov->mtask); + iounmap(iov->mstate); +} + static int sriov_enable(struct pci_dev *dev, int nr_virtfn) { int rc; @@ -261,6 +352,12 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn) goto failed; } + if (iov->cap & PCI_SRIOV_CAP_VFM) { + rc = sriov_enable_migration(dev, nr_virtfn); + if (rc) + goto failed; + } + kobject_uevent(&dev->dev.kobj, KOBJ_CHANGE); iov->nr_virtfn = nr_virtfn; @@ -290,6 +387,9 @@ static void sriov_disable(struct pci_dev *dev) if (!iov->nr_virtfn) return; + if (iov->cap & PCI_SRIOV_CAP_VFM) + sriov_disable_migration(dev); + for (i = 0; i < iov->nr_virtfn; i++) virtfn_remove(dev, i, 0); @@ -559,3 +659,22 @@ void pci_disable_sriov(struct pci_dev *dev) sriov_disable(dev); } EXPORT_SYMBOL_GPL(pci_disable_sriov); + +/** + * pci_sriov_migration - notify SR-IOV core of Virtual Function Migration + * @dev: the PCI device + * + * Returns IRQ_HANDLED if the IRQ is handled, or IRQ_NONE if not. + * + * Physical Function driver is responsible to register IRQ handler using + * VF Migration Interrupt Message Number, and call this function when the + * interrupt is generated by the hardware. + */ +irqreturn_t pci_sriov_migration(struct pci_dev *dev) +{ + if (!dev->is_physfn) + return IRQ_NONE; + + return sriov_migration(dev) ? IRQ_HANDLED : IRQ_NONE; +} +EXPORT_SYMBOL_GPL(pci_sriov_migration); diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 0f1c7d103509..22dcfdb75d91 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -1,6 +1,8 @@ #ifndef DRIVERS_PCI_H #define DRIVERS_PCI_H +#include + #define PCI_CFG_SPACE_SIZE 256 #define PCI_CFG_SPACE_EXP_SIZE 4096 @@ -218,6 +220,8 @@ struct pci_sriov { struct pci_dev *dev; /* lowest numbered PF */ struct pci_dev *self; /* this PF */ struct mutex lock; /* lock for VF bus */ + struct work_struct mtask; /* VF Migration task */ + u8 __iomem *mstate; /* VF Migration State Array */ }; #ifdef CONFIG_PCI_IOV diff --git a/include/linux/pci.h b/include/linux/pci.h index c2e491e04063..1216843412da 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -52,6 +52,7 @@ #include #include #include +#include /* Include the ID list */ #include @@ -1219,6 +1220,7 @@ void __iomem *pci_ioremap_bar(struct pci_dev *pdev, int bar); #ifdef CONFIG_PCI_IOV extern int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn); extern void pci_disable_sriov(struct pci_dev *dev); +extern irqreturn_t pci_sriov_migration(struct pci_dev *dev); #else static inline int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn) { @@ -1227,6 +1229,10 @@ static inline int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn) static inline void pci_disable_sriov(struct pci_dev *dev) { } +static inline irqreturn_t pci_sriov_migration(struct pci_dev *dev) +{ + return IRQ_NONE; +} #endif #endif /* __KERNEL__ */ -- cgit v1.2.3 From 79af72d716cf1bb13b175429cf181a6c4d063ee8 Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Fri, 20 Mar 2009 14:55:55 -0600 Subject: PCI: pci_is_root_bus helper Introduce pci_is_root_bus helper function. This will help make code more consistent, as well as prevent incorrect assumptions (such as pci_bus->self == NULL on a root bus, which is not always true). Signed-off-by: Kenji Kaneshige Signed-off-by: Alex Chiang Signed-off-by: Jesse Barnes --- include/linux/pci.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 1216843412da..50d94388e87c 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -357,6 +357,15 @@ struct pci_bus { #define pci_bus_b(n) list_entry(n, struct pci_bus, node) #define to_pci_bus(n) container_of(n, struct pci_bus, dev) +/* + * Returns true if the pci bus is root (behind host-pci bridge), + * false otherwise + */ +static inline bool pci_is_root_bus(struct pci_bus *pbus) +{ + return !(pbus->parent); +} + #ifdef CONFIG_PCI_MSI static inline bool pci_dev_msi_enabled(struct pci_dev *pci_dev) { -- cgit v1.2.3 From 3ed4fd96b3188406ac5357d9290bcffa08c65cf6 Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Fri, 20 Mar 2009 14:56:25 -0600 Subject: PCI: Introduce pci_rescan_bus() This API is used by the PCI core to rescan a bus and rediscover newly added devices. Over time, it is expected that the various PCI hotplug drivers will migrate to this interface and away from the old pci_do_scan_bus() interface. Signed-off-by: Alex Chiang Signed-off-by: Jesse Barnes --- drivers/pci/hotplug/fakephp.c | 6 +++--- drivers/pci/probe.c | 32 ++++++++++++++++++++++++++++++++ include/linux/pci.h | 3 +++ 3 files changed, 38 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/hotplug/fakephp.c b/drivers/pci/hotplug/fakephp.c index d8649e127298..16063745766e 100644 --- a/drivers/pci/hotplug/fakephp.c +++ b/drivers/pci/hotplug/fakephp.c @@ -245,12 +245,12 @@ static int pci_rescan_slot(struct pci_dev *temp) /** - * pci_rescan_bus - Rescan PCI bus + * pci_rescan_bus_local - fakephp version of rescan PCI bus * @bus: the PCI bus to rescan * * Call pci_rescan_slot for each possible function of the bus. */ -static void pci_rescan_bus(const struct pci_bus *bus) +static void pci_rescan_bus_local(const struct pci_bus *bus) { unsigned int devfn; struct pci_dev *dev; @@ -291,7 +291,7 @@ static void pci_rescan_buses(const struct list_head *list) const struct list_head *l; list_for_each(l,list) { const struct pci_bus *b = pci_bus_b(l); - pci_rescan_bus(b); + pci_rescan_bus_local(b); pci_rescan_buses(&b->children); } } diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index f69256c63b2b..60a8e5fec6c5 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1212,6 +1212,38 @@ struct pci_bus * __devinit pci_scan_bus_parented(struct device *parent, EXPORT_SYMBOL(pci_scan_bus_parented); #ifdef CONFIG_HOTPLUG +/** + * pci_rescan_bus - scan a PCI bus for devices. + * @bus: PCI bus to scan + * + * Scan a PCI bus and child buses for new devices, adds them, + * and enables them. + * + * Returns the max number of subordinate bus discovered. + */ +unsigned int __devinit pci_rescan_bus(struct pci_bus *bus) +{ + unsigned int max; + struct pci_dev *dev; + + max = pci_scan_child_bus(bus); + + up_read(&pci_bus_sem); + list_for_each_entry(dev, &bus->devices, bus_list) + if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE || + dev->hdr_type == PCI_HEADER_TYPE_CARDBUS) + if (dev->subordinate) + pci_bus_size_bridges(dev->subordinate); + down_read(&pci_bus_sem); + + pci_bus_assign_resources(bus); + pci_enable_bridges(bus); + pci_bus_add_devices(bus); + + return max; +} +EXPORT_SYMBOL_GPL(pci_rescan_bus); + EXPORT_SYMBOL(pci_add_new_bus); EXPORT_SYMBOL(pci_scan_slot); EXPORT_SYMBOL(pci_scan_bridge); diff --git a/include/linux/pci.h b/include/linux/pci.h index 50d94388e87c..6fb335b0d74f 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -726,6 +726,9 @@ int pci_back_from_sleep(struct pci_dev *dev); /* Functions for PCI Hotplug drivers to use */ int pci_bus_find_capability(struct pci_bus *bus, unsigned int devfn, int cap); +#ifdef CONFIG_HOTPLUG +unsigned int pci_rescan_bus(struct pci_bus *bus); +#endif /* Vital product data routines */ ssize_t pci_read_vpd(struct pci_dev *dev, loff_t pos, size_t count, void *buf); -- cgit v1.2.3 From 7058548cd50e5bda8db086bb2e5c1d82f746d047 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 25 Mar 2009 23:35:46 -0400 Subject: ext4: Use WRITE_SYNC for commits which are caused by fsync() If a commit is triggered by fsync(), set a flag indicating the journal blocks associated with the transaction should be flushed out using WRITE_SYNC. Signed-off-by: "Theodore Ts'o" --- fs/jbd2/commit.c | 5 ++++- fs/jbd2/transaction.c | 2 ++ include/linux/jbd2.h | 6 ++++++ 3 files changed, 12 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 62804e57a44c..4ea72377c7a2 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -367,6 +367,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) int tag_bytes = journal_tag_bytes(journal); struct buffer_head *cbh = NULL; /* For transactional checksums */ __u32 crc32_sum = ~0; + int write_op = WRITE; /* * First job: lock down the current transaction and wait for @@ -401,6 +402,8 @@ void jbd2_journal_commit_transaction(journal_t *journal) spin_lock(&journal->j_state_lock); commit_transaction->t_state = T_LOCKED; + if (commit_transaction->t_synchronous_commit) + write_op = WRITE_SYNC; stats.u.run.rs_wait = commit_transaction->t_max_wait; stats.u.run.rs_locked = jiffies; stats.u.run.rs_running = jbd2_time_diff(commit_transaction->t_start, @@ -680,7 +683,7 @@ start_journal_io: clear_buffer_dirty(bh); set_buffer_uptodate(bh); bh->b_end_io = journal_end_buffer_io_sync; - submit_bh(WRITE, bh); + submit_bh(write_op, bh); } cond_resched(); stats.u.run.rs_blocks_logged += bufs; diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 28ce21d8598e..996ffda06bf3 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -1315,6 +1315,8 @@ int jbd2_journal_stop(handle_t *handle) } } + if (handle->h_sync) + transaction->t_synchronous_commit = 1; current->journal_info = NULL; spin_lock(&journal->j_state_lock); spin_lock(&transaction->t_handle_lock); diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 4d248b3f1323..8815a3456b3b 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -648,6 +648,12 @@ struct transaction_s */ int t_handle_count; + /* + * This transaction is being forced and some process is + * waiting for it to finish. + */ + int t_synchronous_commit:1; + /* * For use by the filesystem to store fs-specific data * structures associated with the transaction -- cgit v1.2.3 From 898585172fa729513d8636257b44bd1cfd279096 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Mon, 16 Feb 2009 02:55:47 +0800 Subject: PCI: save and restore PCIe 2.0 registers PCIe 2.0 defines several new registers (Device Control 2, Link Control 2, and Slot Control 2). Save and retore them in pci_save_pcie_state() and pci_restore_pcie_state(). Signed-off-by: Yu Zhao Signed-off-by: Jesse Barnes --- drivers/pci/pci.c | 11 ++++++++++- include/linux/pci_regs.h | 2 ++ 2 files changed, 12 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 676bbcbc272b..59569b8cf1d5 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -647,6 +647,8 @@ pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state) EXPORT_SYMBOL(pci_choose_state); +#define PCI_EXP_SAVE_REGS 7 + static int pci_save_pcie_state(struct pci_dev *dev) { int pos, i = 0; @@ -668,6 +670,9 @@ static int pci_save_pcie_state(struct pci_dev *dev) pci_read_config_word(dev, pos + PCI_EXP_LNKCTL, &cap[i++]); pci_read_config_word(dev, pos + PCI_EXP_SLTCTL, &cap[i++]); pci_read_config_word(dev, pos + PCI_EXP_RTCTL, &cap[i++]); + pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, &cap[i++]); + pci_read_config_word(dev, pos + PCI_EXP_LNKCTL2, &cap[i++]); + pci_read_config_word(dev, pos + PCI_EXP_SLTCTL2, &cap[i++]); return 0; } @@ -688,6 +693,9 @@ static void pci_restore_pcie_state(struct pci_dev *dev) pci_write_config_word(dev, pos + PCI_EXP_LNKCTL, cap[i++]); pci_write_config_word(dev, pos + PCI_EXP_SLTCTL, cap[i++]); pci_write_config_word(dev, pos + PCI_EXP_RTCTL, cap[i++]); + pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, cap[i++]); + pci_write_config_word(dev, pos + PCI_EXP_LNKCTL2, cap[i++]); + pci_write_config_word(dev, pos + PCI_EXP_SLTCTL2, cap[i++]); } @@ -1372,7 +1380,8 @@ void pci_allocate_cap_save_buffers(struct pci_dev *dev) { int error; - error = pci_add_cap_save_buffer(dev, PCI_CAP_ID_EXP, 4 * sizeof(u16)); + error = pci_add_cap_save_buffer(dev, PCI_CAP_ID_EXP, + PCI_EXP_SAVE_REGS * sizeof(u16)); if (error) dev_err(&dev->dev, "unable to preallocate PCI Express save buffer\n"); diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h index d4e663877f45..e4d08c1b2e0b 100644 --- a/include/linux/pci_regs.h +++ b/include/linux/pci_regs.h @@ -488,6 +488,8 @@ #define PCI_EXP_DEVCAP2_ARI 0x20 /* Alternative Routing-ID */ #define PCI_EXP_DEVCTL2 40 /* Device Control 2 */ #define PCI_EXP_DEVCTL2_ARI 0x20 /* Alternative Routing-ID */ +#define PCI_EXP_LNKCTL2 48 /* Link Control 2 */ +#define PCI_EXP_SLTCTL2 56 /* Slot Control 2 */ /* Extended Capabilities (PCI-X 2.0 and Express) */ #define PCI_EXT_CAP_ID(header) (header & 0x0000ffff) -- cgit v1.2.3 From efb3288b423d7e3533a68dccecaa05a56a281a4e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 18 Mar 2009 20:45:43 -0400 Subject: SUNRPC: Clean up static inline functions in svc_xprt.h Clean up: Enable the use of const arguments in higher level svc_ APIs by adding const to the arguments of the helper functions in svc_xprt.h Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/svc_xprt.h | 46 +++++++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 0127daca4354..959b931b6053 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -88,29 +88,32 @@ static inline void svc_xprt_get(struct svc_xprt *xprt) kref_get(&xprt->xpt_ref); } static inline void svc_xprt_set_local(struct svc_xprt *xprt, - struct sockaddr *sa, int salen) + const struct sockaddr *sa, + const size_t salen) { memcpy(&xprt->xpt_local, sa, salen); xprt->xpt_locallen = salen; } static inline void svc_xprt_set_remote(struct svc_xprt *xprt, - struct sockaddr *sa, int salen) + const struct sockaddr *sa, + const size_t salen) { memcpy(&xprt->xpt_remote, sa, salen); xprt->xpt_remotelen = salen; } -static inline unsigned short svc_addr_port(struct sockaddr *sa) +static inline unsigned short svc_addr_port(const struct sockaddr *sa) { - unsigned short ret = 0; + const struct sockaddr_in *sin = (const struct sockaddr_in *)sa; + const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)sa; + switch (sa->sa_family) { case AF_INET: - ret = ntohs(((struct sockaddr_in *)sa)->sin_port); - break; + return ntohs(sin->sin_port); case AF_INET6: - ret = ntohs(((struct sockaddr_in6 *)sa)->sin6_port); - break; + return ntohs(sin6->sin6_port); } - return ret; + + return 0; } static inline size_t svc_addr_len(struct sockaddr *sa) @@ -124,36 +127,39 @@ static inline size_t svc_addr_len(struct sockaddr *sa) return -EAFNOSUPPORT; } -static inline unsigned short svc_xprt_local_port(struct svc_xprt *xprt) +static inline unsigned short svc_xprt_local_port(const struct svc_xprt *xprt) { - return svc_addr_port((struct sockaddr *)&xprt->xpt_local); + return svc_addr_port((const struct sockaddr *)&xprt->xpt_local); } -static inline unsigned short svc_xprt_remote_port(struct svc_xprt *xprt) +static inline unsigned short svc_xprt_remote_port(const struct svc_xprt *xprt) { - return svc_addr_port((struct sockaddr *)&xprt->xpt_remote); + return svc_addr_port((const struct sockaddr *)&xprt->xpt_remote); } -static inline char *__svc_print_addr(struct sockaddr *addr, - char *buf, size_t len) +static inline char *__svc_print_addr(const struct sockaddr *addr, + char *buf, const size_t len) { + const struct sockaddr_in *sin = (const struct sockaddr_in *)addr; + const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)addr; + switch (addr->sa_family) { case AF_INET: - snprintf(buf, len, "%pI4, port=%u", - &((struct sockaddr_in *)addr)->sin_addr, - ntohs(((struct sockaddr_in *) addr)->sin_port)); + snprintf(buf, len, "%pI4, port=%u", &sin->sin_addr, + ntohs(sin->sin_port)); break; case AF_INET6: snprintf(buf, len, "%pI6, port=%u", - &((struct sockaddr_in6 *)addr)->sin6_addr, - ntohs(((struct sockaddr_in6 *) addr)->sin6_port)); + &sin6->sin6_addr, + ntohs(sin6->sin6_port)); break; default: snprintf(buf, len, "unknown address type: %d", addr->sa_family); break; } + return buf; } #endif /* SUNRPC_SVC_XPRT_H */ -- cgit v1.2.3 From 156e62094a74cf43f02f56ef96b6cda567501357 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 18 Mar 2009 20:45:58 -0400 Subject: SUNRPC: Clean up svc_find_xprt() calling sequence Clean up: add documentating comment and use appropriate data types for svc_find_xprt()'s arguments. This also eliminates a mixed sign comparison: @port was an int, while the return value of svc_xprt_local_port() is an unsigned short. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/svc_xprt.h | 3 ++- net/sunrpc/svc_xprt.c | 16 +++++++++++----- 2 files changed, 13 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 959b931b6053..55b68582c5d9 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -80,7 +80,8 @@ void svc_close_xprt(struct svc_xprt *xprt); void svc_delete_xprt(struct svc_xprt *xprt); int svc_port_is_privileged(struct sockaddr *sin); int svc_print_xprts(char *buf, int maxlen); -struct svc_xprt *svc_find_xprt(struct svc_serv *, char *, int, int); +struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name, + const sa_family_t af, const unsigned short port); int svc_xprt_names(struct svc_serv *serv, char *buf, int buflen); static inline void svc_xprt_get(struct svc_xprt *xprt) diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index e588df5d6b34..c947c93dbc24 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -1033,7 +1033,13 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt) return dr; } -/* +/** + * svc_find_xprt - find an RPC transport instance + * @serv: pointer to svc_serv to search + * @xcl_name: C string containing transport's class name + * @af: Address family of transport's local address + * @port: transport's IP port number + * * Return the transport instance pointer for the endpoint accepting * connections/peer traffic from the specified transport class, * address family and port. @@ -1042,14 +1048,14 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt) * wild-card, and will result in matching the first transport in the * service's list that has a matching class name. */ -struct svc_xprt *svc_find_xprt(struct svc_serv *serv, char *xcl_name, - int af, int port) +struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name, + const sa_family_t af, const unsigned short port) { struct svc_xprt *xprt; struct svc_xprt *found = NULL; /* Sanity check the args */ - if (!serv || !xcl_name) + if (serv == NULL || xcl_name == NULL) return found; spin_lock_bh(&serv->sv_lock); @@ -1058,7 +1064,7 @@ struct svc_xprt *svc_find_xprt(struct svc_serv *serv, char *xcl_name, continue; if (af != AF_UNSPEC && af != xprt->xpt_local.ss_family) continue; - if (port && port != svc_xprt_local_port(xprt)) + if (port != 0 && port != svc_xprt_local_port(xprt)) continue; found = xprt; svc_xprt_get(xprt); -- cgit v1.2.3 From 4b62e58cccff9c5e7ffc7023f7ec24c75fbd549b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 18 Mar 2009 20:46:06 -0400 Subject: SUNRPC: Pass a family argument to svc_register() The sv_family field is going away. Instead of using sv_family, have the svc_register() function take a protocol family argument. Since this argument represents a protocol family, and not an address family, this argument takes an int, as this is what is passed to sock_create_kern(). Also make sure svc_register's helpers are checking for PF_FOO instead of AF_FOO. The value of [AP]F_FOO are equivalent; this is simply a symbolic change to reflect the semantics of the value stored in that variable. sock_create_kern() should return EPFNOSUPPORT if the passed-in protocol family isn't supported, but it uses EAFNOSUPPORT for this case. We will stick with that tradition here, as svc_register() is called by the RPC server in the same path as sock_create_kern(). Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/svc.h | 4 ++-- net/sunrpc/svc.c | 21 +++++++++++---------- net/sunrpc/svcsock.c | 2 +- 3 files changed, 14 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 3435d24bfe55..1f18fc728cba 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -396,8 +396,8 @@ struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); void svc_destroy(struct svc_serv *); int svc_process(struct svc_rqst *); -int svc_register(const struct svc_serv *, const unsigned short, - const unsigned short); +int svc_register(const struct svc_serv *, const int, + const unsigned short, const unsigned short); void svc_wake_up(struct svc_serv *); void svc_reserve(struct svc_rqst *rqstp, int space); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index c51fed4d1af1..41bc36ea2224 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -800,17 +800,17 @@ static int __svc_rpcb_register6(const u32 program, const u32 version, * if any error occurs. */ static int __svc_register(const u32 program, const u32 version, - const sa_family_t family, + const int family, const unsigned short protocol, const unsigned short port) { int error; switch (family) { - case AF_INET: + case PF_INET: return __svc_rpcb_register4(program, version, protocol, port); - case AF_INET6: + case PF_INET6: error = __svc_rpcb_register6(program, version, protocol, port); if (error < 0) @@ -840,11 +840,11 @@ static int __svc_register(const u32 program, const u32 version, * if any error occurs. */ static int __svc_register(const u32 program, const u32 version, - sa_family_t family, + const int family, const unsigned short protocol, const unsigned short port) { - if (family != AF_INET) + if (family != PF_INET) return -EAFNOSUPPORT; return rpcb_register(program, version, protocol, port); @@ -855,13 +855,14 @@ static int __svc_register(const u32 program, const u32 version, /** * svc_register - register an RPC service with the local portmapper * @serv: svc_serv struct for the service to register + * @family: protocol family of service's listener socket * @proto: transport protocol number to advertise * @port: port to advertise * - * Service is registered for any address in serv's address family + * Service is registered for any address in the passed-in protocol family */ -int svc_register(const struct svc_serv *serv, const unsigned short proto, - const unsigned short port) +int svc_register(const struct svc_serv *serv, const int family, + const unsigned short proto, const unsigned short port) { struct svc_program *progp; unsigned int i; @@ -879,7 +880,7 @@ int svc_register(const struct svc_serv *serv, const unsigned short proto, i, proto == IPPROTO_UDP? "udp" : "tcp", port, - serv->sv_family, + family, progp->pg_vers[i]->vs_hidden? " (but not telling portmap)" : ""); @@ -887,7 +888,7 @@ int svc_register(const struct svc_serv *serv, const unsigned short proto, continue; error = __svc_register(progp->pg_prog, i, - serv->sv_family, proto, port); + family, proto, port); if (error < 0) break; } diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 5763e6460fea..d00583c1cd04 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1122,7 +1122,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, /* Register socket with portmapper */ if (*errp >= 0 && pmap_register) - *errp = svc_register(serv, inet->sk_protocol, + *errp = svc_register(serv, serv->sv_family, inet->sk_protocol, ntohs(inet_sk(inet)->sport)); if (*errp < 0) { -- cgit v1.2.3 From 9652ada3fb5914a67d8422114e8a76388330fa79 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 18 Mar 2009 20:46:21 -0400 Subject: SUNRPC: Change svc_create_xprt() to take a @family argument The sv_family field is going away. Pass a protocol family argument to svc_create_xprt() instead of extracting the family from the passed-in svc_serv struct. Again, as this is a listener socket and not an address, we make this new argument an "int" protocol family, instead of an "sa_family_t." Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/lockd/svc.c | 3 ++- fs/nfs/callback.c | 4 ++-- fs/nfsd/nfsctl.c | 2 +- fs/nfsd/nfssvc.c | 4 ++-- include/linux/sunrpc/svc_xprt.h | 3 ++- net/sunrpc/svc_xprt.c | 15 +++++++++------ 6 files changed, 18 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 64f1c31b5853..390c5593655c 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -211,7 +211,8 @@ static int create_lockd_listener(struct svc_serv *serv, char *name, xprt = svc_find_xprt(serv, name, 0, 0); if (xprt == NULL) - return svc_create_xprt(serv, name, port, SVC_SOCK_DEFAULTS); + return svc_create_xprt(serv, name, nlmsvc_family, + port, SVC_SOCK_DEFAULTS); svc_xprt_put(xprt); return 0; diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 3e634f2a1083..fb35cab63c8a 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -122,8 +122,8 @@ int nfs_callback_up(void) if (!serv) goto out_err; - ret = svc_create_xprt(serv, "tcp", nfs_callback_set_tcpport, - SVC_SOCK_ANONYMOUS); + ret = svc_create_xprt(serv, "tcp", nfs_callback_family, + nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); if (ret <= 0) goto out_err; nfs_callback_tcpport = ret; diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 5a936c14f6ff..a4ed8644d69c 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -943,7 +943,7 @@ static ssize_t __write_ports(struct file *file, char *buf, size_t size) err = nfsd_create_serv(); if (!err) { err = svc_create_xprt(nfsd_serv, - transport, port, + transport, PF_INET, port, SVC_SOCK_ANONYMOUS); if (err == -ENOENT) /* Give a reasonable perror msg for diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 07e4f5d7baa8..ab7f249055b5 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -244,7 +244,7 @@ static int nfsd_init_socks(int port) if (!list_empty(&nfsd_serv->sv_permsocks)) return 0; - error = svc_create_xprt(nfsd_serv, "udp", port, + error = svc_create_xprt(nfsd_serv, "udp", PF_INET, port, SVC_SOCK_DEFAULTS); if (error < 0) return error; @@ -253,7 +253,7 @@ static int nfsd_init_socks(int port) if (error < 0) return error; - error = svc_create_xprt(nfsd_serv, "tcp", port, + error = svc_create_xprt(nfsd_serv, "tcp", PF_INET, port, SVC_SOCK_DEFAULTS); if (error < 0) return error; diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 55b68582c5d9..0d9cb6ef28b0 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -71,7 +71,8 @@ int svc_reg_xprt_class(struct svc_xprt_class *); void svc_unreg_xprt_class(struct svc_xprt_class *); void svc_xprt_init(struct svc_xprt_class *, struct svc_xprt *, struct svc_serv *); -int svc_create_xprt(struct svc_serv *, char *, unsigned short, int); +int svc_create_xprt(struct svc_serv *, const char *, const int, + const unsigned short, int); void svc_xprt_enqueue(struct svc_xprt *xprt); void svc_xprt_received(struct svc_xprt *); void svc_xprt_put(struct svc_xprt *xprt); diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index c947c93dbc24..2819ee093f36 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -161,7 +161,9 @@ EXPORT_SYMBOL_GPL(svc_xprt_init); static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl, struct svc_serv *serv, - unsigned short port, int flags) + const int family, + const unsigned short port, + int flags) { struct sockaddr_in sin = { .sin_family = AF_INET, @@ -176,12 +178,12 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl, struct sockaddr *sap; size_t len; - switch (serv->sv_family) { - case AF_INET: + switch (family) { + case PF_INET: sap = (struct sockaddr *)&sin; len = sizeof(sin); break; - case AF_INET6: + case PF_INET6: sap = (struct sockaddr *)&sin6; len = sizeof(sin6); break; @@ -192,7 +194,8 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl, return xcl->xcl_ops->xpo_create(serv, sap, len, flags); } -int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port, +int svc_create_xprt(struct svc_serv *serv, const char *xprt_name, + const int family, const unsigned short port, int flags) { struct svc_xprt_class *xcl; @@ -209,7 +212,7 @@ int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port, goto err; spin_unlock(&svc_xprt_class_lock); - newxprt = __svc_xpo_create(xcl, serv, port, flags); + newxprt = __svc_xpo_create(xcl, serv, family, port, flags); if (IS_ERR(newxprt)) { module_put(xcl->xcl_owner); return PTR_ERR(newxprt); -- cgit v1.2.3 From 49a9072f29a1039f142ec98b44a72d7173651c02 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 18 Mar 2009 20:46:29 -0400 Subject: SUNRPC: Remove @family argument from svc_create() and svc_create_pooled() Since an RPC service listener's protocol family is specified now via svc_create_xprt(), it no longer needs to be passed to svc_create() or svc_create_pooled(). Remove that argument from the synopsis of those functions, and remove the sv_family field from the svc_serv struct. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/lockd/svc.c | 2 +- fs/nfs/callback.c | 3 +-- fs/nfsd/nfssvc.c | 1 - include/linux/sunrpc/svc.h | 5 ++--- net/sunrpc/svc.c | 11 +++++------ 5 files changed, 9 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 390c5593655c..d30920038cb6 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -275,7 +275,7 @@ int lockd_up(void) "lockd_up: no pid, %d users??\n", nlmsvc_users); error = -ENOMEM; - serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, nlmsvc_family, NULL); + serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, NULL); if (!serv) { printk(KERN_WARNING "lockd_up: create service failed\n"); goto out; diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index fb35cab63c8a..ddf4b4ae6967 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -116,8 +116,7 @@ int nfs_callback_up(void) mutex_lock(&nfs_callback_mutex); if (nfs_callback_info.users++ || nfs_callback_info.task != NULL) goto out; - serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, - nfs_callback_family, NULL); + serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, NULL); ret = -ENOMEM; if (!serv) goto out_err; diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index ab7f249055b5..bc3567bab8c4 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -229,7 +229,6 @@ int nfsd_create_serv(void) atomic_set(&nfsd_busy, 0); nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, - AF_INET, nfsd_last_thread, nfsd, THIS_MODULE); if (nfsd_serv == NULL) err = -ENOMEM; diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 1f18fc728cba..d3a4c0231933 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -69,7 +69,6 @@ struct svc_serv { struct list_head sv_tempsocks; /* all temporary sockets */ int sv_tmpcnt; /* count of temporary sockets */ struct timer_list sv_temptimer; /* timer for aging temporary sockets */ - sa_family_t sv_family; /* listener's address family */ char * sv_name; /* service name */ @@ -385,13 +384,13 @@ struct svc_procedure { /* * Function prototypes. */ -struct svc_serv *svc_create(struct svc_program *, unsigned int, sa_family_t, +struct svc_serv *svc_create(struct svc_program *, unsigned int, void (*shutdown)(struct svc_serv *)); struct svc_rqst *svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool); void svc_exit_thread(struct svc_rqst *); struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, - sa_family_t, void (*shutdown)(struct svc_serv *), + void (*shutdown)(struct svc_serv *), svc_thread_fn, struct module *); int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); void svc_destroy(struct svc_serv *); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 41bc36ea2224..d72ff44826d8 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -359,7 +359,7 @@ svc_pool_for_cpu(struct svc_serv *serv, int cpu) */ static struct svc_serv * __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, - sa_family_t family, void (*shutdown)(struct svc_serv *serv)) + void (*shutdown)(struct svc_serv *serv)) { struct svc_serv *serv; unsigned int vers; @@ -368,7 +368,6 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, if (!(serv = kzalloc(sizeof(*serv), GFP_KERNEL))) return NULL; - serv->sv_family = family; serv->sv_name = prog->pg_name; serv->sv_program = prog; serv->sv_nrthreads = 1; @@ -427,21 +426,21 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, struct svc_serv * svc_create(struct svc_program *prog, unsigned int bufsize, - sa_family_t family, void (*shutdown)(struct svc_serv *serv)) + void (*shutdown)(struct svc_serv *serv)) { - return __svc_create(prog, bufsize, /*npools*/1, family, shutdown); + return __svc_create(prog, bufsize, /*npools*/1, shutdown); } EXPORT_SYMBOL_GPL(svc_create); struct svc_serv * svc_create_pooled(struct svc_program *prog, unsigned int bufsize, - sa_family_t family, void (*shutdown)(struct svc_serv *serv), + void (*shutdown)(struct svc_serv *serv), svc_thread_fn func, struct module *mod) { struct svc_serv *serv; unsigned int npools = svc_pool_map_get(); - serv = __svc_create(prog, bufsize, npools, family, shutdown); + serv = __svc_create(prog, bufsize, npools, shutdown); if (serv != NULL) { serv->sv_function = func; -- cgit v1.2.3 From a08915ba594da66145f33a972db578a58b9135f1 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 31 Mar 2009 20:15:13 +0200 Subject: ide-cd: use scatterlists for PIO transfers (fs requests) * Export ide_pio_bytes(). * Add ->last_xfer_len field to struct ide_cmd. * Add ide_cd_error_cmd() helper to ide-cd. * Convert ide-cd to use scatterlists also for PIO transfers (fs requests only for now) and get rid of partial completions (except when the error happens -- which is still subject to change later because looking at ATAPI spec it seems that the device is free to error the whole transfer with setting the Error bit only on the last transfer chunk). * Update ide_cd_{prepare_rw,restore_request,do_request}() accordingly. * Inline ide_cd_restore_request() into cdrom_start_rw(). Cc: Borislav Petkov Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/ide-cd.c | 146 +++++++++++++-------------------------------- drivers/ide/ide-taskfile.c | 5 +- include/linux/ide.h | 4 ++ 3 files changed, 50 insertions(+), 105 deletions(-) (limited to 'include/linux') diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 30113e69c8bb..5f15859c2c73 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -539,64 +539,12 @@ static ide_startstop_t ide_cd_prepare_rw_request(ide_drive_t *drive, { ide_debug_log(IDE_DBG_RQ, "rq->cmd_flags: 0x%x", rq->cmd_flags); - if (rq_data_dir(rq) == READ) { - unsigned short sectors_per_frame = - queue_hardsect_size(drive->queue) >> SECTOR_BITS; - int nskip = rq->sector & (sectors_per_frame - 1); - - /* - * If the requested sector doesn't start on a frame boundary, - * we must adjust the start of the transfer so that it does, - * and remember to skip the first few sectors. - * - * If the rq->current_nr_sectors field is larger than the size - * of the buffer, it will mean that we're to skip a number of - * sectors equal to the amount by which rq->current_nr_sectors - * is larger than the buffer size. - */ - if (nskip > 0) { - /* sanity check... */ - if (rq->current_nr_sectors != - bio_cur_sectors(rq->bio)) { - printk(KERN_ERR PFX "%s: %s: buffer botch (%u)\n", - drive->name, __func__, - rq->current_nr_sectors); - return ide_stopped; - } - rq->current_nr_sectors += nskip; - } - } - /* set up the command */ rq->timeout = ATAPI_WAIT_PC; return ide_started; } -/* - * Fix up a possibly partially-processed request so that we can start it over - * entirely, or even put it back on the request queue. - */ -static void ide_cd_restore_request(ide_drive_t *drive, struct request *rq) -{ - - ide_debug_log(IDE_DBG_FUNC, "enter"); - - if (rq->buffer != bio_data(rq->bio)) { - sector_t n = - (rq->buffer - (char *)bio_data(rq->bio)) / SECTOR_SIZE; - - rq->buffer = bio_data(rq->bio); - rq->nr_sectors += n; - rq->sector -= n; - } - rq->current_nr_sectors = bio_cur_sectors(rq->bio); - rq->hard_cur_sectors = rq->current_nr_sectors; - rq->hard_nr_sectors = rq->nr_sectors; - rq->hard_sector = rq->sector; - rq->q->prep_rq_fn(rq->q, rq); -} - static void ide_cd_request_sense_fixup(ide_drive_t *drive, struct request *rq) { ide_debug_log(IDE_DBG_FUNC, "rq->cmd[0]: 0x%x", rq->cmd[0]); @@ -690,6 +638,17 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd, return (flags & REQ_FAILED) ? -EIO : 0; } +static void ide_cd_error_cmd(ide_drive_t *drive, struct ide_cmd *cmd) +{ + unsigned int nr_bytes = cmd->nbytes - cmd->nleft; + + if (cmd->tf_flags & IDE_TFLAG_WRITE) + nr_bytes -= cmd->last_xfer_len; + + if (nr_bytes > 0) + ide_complete_rq(drive, 0, nr_bytes); +} + /* * Called from blk_end_request_callback() after the data of the request is * completed and before the request itself is completed. By returning value '1', @@ -703,6 +662,7 @@ static int cdrom_newpc_intr_dummy_cb(struct request *rq) static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive) { ide_hwif_t *hwif = drive->hwif; + struct ide_cmd *cmd = &hwif->cmd; struct request *rq = hwif->rq; xfer_func_t *xferfunc; ide_expiry_t *expiry = NULL; @@ -769,11 +729,10 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive) * Otherwise, complete the command normally. */ uptodate = 1; - if (rq->current_nr_sectors > 0) { + if (cmd->nleft > 0) { printk(KERN_ERR PFX "%s: %s: data underrun " - "(%d blocks)\n", - drive->name, __func__, - rq->current_nr_sectors); + "(%u bytes)\n", drive->name, __func__, + cmd->nleft); if (!write) rq->cmd_flags |= REQ_FAILED; uptodate = 0; @@ -795,24 +754,10 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive) if (blk_fs_request(rq)) { if (write == 0) { - int nskip; - if (ide_cd_check_transfer_size(drive, len)) goto out_end; - - /* - * First, figure out if we need to bit-bucket - * any of the leading sectors. - */ - nskip = min_t(int, rq->current_nr_sectors - - bio_cur_sectors(rq->bio), - thislen >> 9); - if (nskip > 0) { - ide_pad_transfer(drive, write, nskip << 9); - rq->current_nr_sectors -= nskip; - thislen -= (nskip << 9); - } } + cmd->last_xfer_len = 0; } if (ireason == 0) { @@ -835,15 +780,15 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive) /* bio backed? */ if (rq->bio) { if (blk_fs_request(rq)) { - ptr = rq->buffer; - blen = rq->current_nr_sectors << 9; + blen = min_t(int, thislen, cmd->nleft); } else { ptr = bio_data(rq->bio); blen = bio_iovec(rq->bio)->bv_len; } } - if (!ptr) { + if ((blk_fs_request(rq) && cmd->nleft == 0) || + (blk_fs_request(rq) == 0 && ptr == NULL)) { if (blk_fs_request(rq) && !write) /* * If the buffers are full, pipe the rest into @@ -863,26 +808,16 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive) if (blen > thislen) blen = thislen; - xferfunc(drive, NULL, ptr, blen); + if (blk_fs_request(rq)) { + ide_pio_bytes(drive, cmd, write, blen); + cmd->last_xfer_len += blen; + } else + xferfunc(drive, NULL, ptr, blen); thislen -= blen; len -= blen; - if (blk_fs_request(rq)) { - rq->buffer += blen; - rq->nr_sectors -= (blen >> 9); - rq->current_nr_sectors -= (blen >> 9); - rq->sector += (blen >> 9); - - if (rq->current_nr_sectors == 0 && rq->nr_sectors) { - nsectors = rq->hard_cur_sectors; - - if (nsectors == 0) - nsectors = 1; - - ide_complete_rq(drive, 0, nsectors << 9); - } - } else { + if (blk_fs_request(rq) == 0) { rq->data_len -= blen; /* @@ -933,8 +868,10 @@ out_end: ide_cd_complete_failed_rq(drive, rq); if (blk_fs_request(rq)) { - if (rq->current_nr_sectors == 0) + if (cmd->nleft == 0) uptodate = 1; + if (uptodate == 0) + ide_cd_error_cmd(drive, cmd); } else { if (uptodate <= 0 && rq->errors == 0) rq->errors = -EIO; @@ -944,7 +881,7 @@ out_end: if (blk_pc_request(rq)) nsectors = (rq->data_len + 511) >> 9; else - nsectors = rq->hard_cur_sectors; + nsectors = rq->hard_nr_sectors; if (nsectors == 0) nsectors = 1; @@ -960,9 +897,10 @@ out_end: static ide_startstop_t cdrom_start_rw(ide_drive_t *drive, struct request *rq) { struct cdrom_info *cd = drive->driver_data; + struct request_queue *q = drive->queue; int write = rq_data_dir(rq) == WRITE; unsigned short sectors_per_frame = - queue_hardsect_size(drive->queue) >> SECTOR_BITS; + queue_hardsect_size(q) >> SECTOR_BITS; ide_debug_log(IDE_DBG_RQ, "rq->cmd[0]: 0x%x, write: 0x%x, " "secs_per_frame: %u", @@ -977,17 +915,16 @@ static ide_startstop_t cdrom_start_rw(ide_drive_t *drive, struct request *rq) * We may be retrying this request after an error. Fix up any * weirdness which might be present in the request packet. */ - ide_cd_restore_request(drive, rq); + q->prep_rq_fn(q, rq); } - /* use DMA, if possible / writes *must* be hardware frame aligned */ + /* fs requests *must* be hardware frame aligned */ if ((rq->nr_sectors & (sectors_per_frame - 1)) || - (rq->sector & (sectors_per_frame - 1))) { - if (write) - return ide_stopped; - drive->dma = 0; - } else - drive->dma = !!(drive->dev_flags & IDE_DFLAG_USING_DMA); + (rq->sector & (sectors_per_frame - 1))) + return ide_stopped; + + /* use DMA, if possible */ + drive->dma = !!(drive->dev_flags & IDE_DFLAG_USING_DMA); if (write) cd->devinfo.media_written = 1; @@ -1050,8 +987,6 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq, if (blk_fs_request(rq)) { if (cdrom_start_rw(drive, rq) == ide_stopped || ide_cd_prepare_rw_request(drive, rq) == ide_stopped) { - if (rq->current_nr_sectors == 0) - uptodate = 1; goto out_end; } } else if (blk_sense_request(rq) || blk_pc_request(rq) || @@ -1078,6 +1013,11 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq, cmd.rq = rq; + if (blk_fs_request(rq)) { + ide_init_sg_cmd(&cmd, rq->nr_sectors << 9); + ide_map_sg(drive, &cmd); + } + return ide_issue_pc(drive, &cmd); out_end: nsectors = rq->hard_nr_sectors; diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c index 0e333ecf2ad6..a3b7a50562b2 100644 --- a/drivers/ide/ide-taskfile.c +++ b/drivers/ide/ide-taskfile.c @@ -188,8 +188,8 @@ static u8 wait_drive_not_busy(ide_drive_t *drive) return stat; } -static void ide_pio_bytes(ide_drive_t *drive, struct ide_cmd *cmd, - unsigned int write, unsigned int len) +void ide_pio_bytes(ide_drive_t *drive, struct ide_cmd *cmd, + unsigned int write, unsigned int len) { ide_hwif_t *hwif = drive->hwif; struct scatterlist *sg = hwif->sg_table; @@ -243,6 +243,7 @@ static void ide_pio_bytes(ide_drive_t *drive, struct ide_cmd *cmd, len -= nr_bytes; } } +EXPORT_SYMBOL_GPL(ide_pio_bytes); static void ide_pio_datablock(ide_drive_t *drive, struct ide_cmd *cmd, unsigned int write) diff --git a/include/linux/ide.h b/include/linux/ide.h index d5d832271f44..c2841c0c36c8 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -352,6 +352,8 @@ struct ide_cmd { unsigned int nbytes; unsigned int nleft; + unsigned int last_xfer_len; + struct scatterlist *cursg; unsigned int cursg_ofs; @@ -1226,6 +1228,8 @@ ide_startstop_t ide_issue_pc(ide_drive_t *, struct ide_cmd *); ide_startstop_t do_rw_taskfile(ide_drive_t *, struct ide_cmd *); +void ide_pio_bytes(ide_drive_t *, struct ide_cmd *, unsigned int, unsigned int); + void ide_finish_cmd(ide_drive_t *, struct ide_cmd *, u8); int ide_raw_taskfile(ide_drive_t *, struct ide_cmd *, u8 *, u16); -- cgit v1.2.3 From 06a449e30135aabb6686c95bf0c42b46d169a3b3 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 31 Mar 2009 20:15:13 +0200 Subject: ide-cd: fix non-SECTOR_SIZE-multiples PIO transfers for fs requests We now support arbitrary number of bytes per-IRQ also for fs requests so remove ide_cd_check_transfer_size() and IDE_AFLAG_LIMIT_NFRAMES. Cc: Borislav Petkov Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/ide-cd.c | 36 +----------------------------------- include/linux/ide.h | 5 ----- 2 files changed, 1 insertion(+), 40 deletions(-) (limited to 'include/linux') diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 5f15859c2c73..c0cefe5becf3 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -509,31 +509,6 @@ static int ide_cd_check_ireason(ide_drive_t *drive, struct request *rq, return -1; } -/* - * Assume that the drive will always provide data in multiples of at least - * SECTOR_SIZE, as it gets hairy to keep track of the transfers otherwise. - */ -static int ide_cd_check_transfer_size(ide_drive_t *drive, int len) -{ - ide_debug_log(IDE_DBG_FUNC, "len: %d", len); - - if ((len % SECTOR_SIZE) == 0) - return 0; - - printk(KERN_ERR PFX "%s: %s: Bad transfer size %d\n", drive->name, - __func__, len); - - if (drive->atapi_flags & IDE_AFLAG_LIMIT_NFRAMES) - printk(KERN_ERR PFX "This drive is not supported by this " - "version of the driver\n"); - else { - printk(KERN_ERR PFX "Trying to limit transfer sizes\n"); - drive->atapi_flags |= IDE_AFLAG_LIMIT_NFRAMES; - } - - return 1; -} - static ide_startstop_t ide_cd_prepare_rw_request(ide_drive_t *drive, struct request *rq) { @@ -752,13 +727,7 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive) if (rc) goto out_end; - if (blk_fs_request(rq)) { - if (write == 0) { - if (ide_cd_check_transfer_size(drive, len)) - goto out_end; - } - cmd->last_xfer_len = 0; - } + cmd->last_xfer_len = 0; if (ireason == 0) { write = 1; @@ -1619,9 +1588,6 @@ static const struct ide_proc_devset *ide_cd_proc_devsets(ide_drive_t *drive) #endif static const struct cd_list_entry ide_cd_quirks_list[] = { - /* Limit transfer size per interrupt. */ - { "SAMSUNG CD-ROM SCR-2430", NULL, IDE_AFLAG_LIMIT_NFRAMES }, - { "SAMSUNG CD-ROM SCR-2432", NULL, IDE_AFLAG_LIMIT_NFRAMES }, /* SCR-3231 doesn't support the SET_CD_SPEED command. */ { "SAMSUNG CD-ROM SCR-3231", NULL, IDE_AFLAG_NO_SPEED_SELECT }, /* Old NEC260 (not R) was released before ATAPI 1.2 spec. */ diff --git a/include/linux/ide.h b/include/linux/ide.h index c2841c0c36c8..cb501bf78f7d 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -458,11 +458,6 @@ enum { IDE_AFLAG_TOCADDR_AS_BCD = (1 << 3), /* TOC track numbers are in BCD. */ IDE_AFLAG_TOCTRACKS_AS_BCD = (1 << 4), - /* - * Drive does not provide data in multiples of SECTOR_SIZE - * when more than one interrupt is needed. - */ - IDE_AFLAG_LIMIT_NFRAMES = (1 << 5), /* Saved TOC information is current. */ IDE_AFLAG_TOC_VALID = (1 << 6), /* We think that the drive door is locked. */ -- cgit v1.2.3 From 35c9b4daf4c94b30e5cede597d98016ebf31b5ad Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 31 Mar 2009 20:15:19 +0200 Subject: ide: add ->dma_clear method and remove ->dma_timeout one All custom ->dma_timeout implementations call the generic one thus it is possible to have only an optional method for resetting DMA engine instead: * Add ->dma_clear method and convert hpt366, pdc202xx_old and sl82c105 host drivers to use it. * Always use ide_dma_timeout() in ide_dma_timeout_retry() and remove ->dma_timeout method. * Make ide_dma_timeout() static. There should be no functional changes caused by this patch. Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/alim15x3.c | 1 - drivers/ide/au1xxx-ide.c | 1 - drivers/ide/cmd64x.c | 3 --- drivers/ide/cs5536.c | 1 - drivers/ide/hpt366.c | 10 +--------- drivers/ide/icside.c | 1 - drivers/ide/ide-dma-sff.c | 3 +-- drivers/ide/ide-dma.c | 10 ++++++---- drivers/ide/it821x.c | 3 +-- drivers/ide/ns87415.c | 1 - drivers/ide/pdc202xx_old.c | 10 ++-------- drivers/ide/pmac.c | 1 - drivers/ide/sc1200.c | 1 - drivers/ide/scc_pata.c | 1 - drivers/ide/sgiioc4.c | 1 - drivers/ide/siimage.c | 1 - drivers/ide/sl82c105.c | 7 +++---- drivers/ide/tc86c001.c | 1 - drivers/ide/trm290.c | 1 - drivers/ide/tx4939ide.c | 1 - include/linux/ide.h | 4 ++-- 21 files changed, 16 insertions(+), 47 deletions(-) (limited to 'include/linux') diff --git a/drivers/ide/alim15x3.c b/drivers/ide/alim15x3.c index d516168464fc..d3faf0b97f42 100644 --- a/drivers/ide/alim15x3.c +++ b/drivers/ide/alim15x3.c @@ -509,7 +509,6 @@ static const struct ide_dma_ops ali_dma_ops = { .dma_test_irq = ide_dma_test_irq, .dma_lost_irq = ide_dma_lost_irq, .dma_timer_expiry = ide_dma_sff_timer_expiry, - .dma_timeout = ide_dma_timeout, .dma_sff_read_status = ide_dma_sff_read_status, }; diff --git a/drivers/ide/au1xxx-ide.c b/drivers/ide/au1xxx-ide.c index d3a9d6c15328..0c08c5e01f2a 100644 --- a/drivers/ide/au1xxx-ide.c +++ b/drivers/ide/au1xxx-ide.c @@ -353,7 +353,6 @@ static const struct ide_dma_ops au1xxx_dma_ops = { .dma_end = auide_dma_end, .dma_test_irq = auide_dma_test_irq, .dma_lost_irq = ide_dma_lost_irq, - .dma_timeout = ide_dma_timeout, }; static int auide_ddma_init(ide_hwif_t *hwif, const struct ide_port_info *d) diff --git a/drivers/ide/cmd64x.c b/drivers/ide/cmd64x.c index bf0e3f470824..f0a49d2ff711 100644 --- a/drivers/ide/cmd64x.c +++ b/drivers/ide/cmd64x.c @@ -384,7 +384,6 @@ static const struct ide_dma_ops cmd64x_dma_ops = { .dma_test_irq = cmd64x_dma_test_irq, .dma_lost_irq = ide_dma_lost_irq, .dma_timer_expiry = ide_dma_sff_timer_expiry, - .dma_timeout = ide_dma_timeout, .dma_sff_read_status = ide_dma_sff_read_status, }; @@ -396,7 +395,6 @@ static const struct ide_dma_ops cmd646_rev1_dma_ops = { .dma_test_irq = ide_dma_test_irq, .dma_lost_irq = ide_dma_lost_irq, .dma_timer_expiry = ide_dma_sff_timer_expiry, - .dma_timeout = ide_dma_timeout, .dma_sff_read_status = ide_dma_sff_read_status, }; @@ -408,7 +406,6 @@ static const struct ide_dma_ops cmd648_dma_ops = { .dma_test_irq = cmd648_dma_test_irq, .dma_lost_irq = ide_dma_lost_irq, .dma_timer_expiry = ide_dma_sff_timer_expiry, - .dma_timeout = ide_dma_timeout, .dma_sff_read_status = ide_dma_sff_read_status, }; diff --git a/drivers/ide/cs5536.c b/drivers/ide/cs5536.c index d5dcf4899607..353a35bbba63 100644 --- a/drivers/ide/cs5536.c +++ b/drivers/ide/cs5536.c @@ -236,7 +236,6 @@ static const struct ide_dma_ops cs5536_dma_ops = { .dma_test_irq = ide_dma_test_irq, .dma_lost_irq = ide_dma_lost_irq, .dma_timer_expiry = ide_dma_sff_timer_expiry, - .dma_timeout = ide_dma_timeout, }; static const struct ide_port_info cs5536_info = { diff --git a/drivers/ide/hpt366.c b/drivers/ide/hpt366.c index dbaf184ed9c5..a0eb87f59134 100644 --- a/drivers/ide/hpt366.c +++ b/drivers/ide/hpt366.c @@ -835,12 +835,6 @@ static int hpt370_dma_end(ide_drive_t *drive) return ide_dma_end(drive); } -static void hpt370_dma_timeout(ide_drive_t *drive) -{ - hpt370_irq_timeout(drive); - ide_dma_timeout(drive); -} - /* returns 1 if DMA IRQ issued, 0 otherwise */ static int hpt374_dma_test_irq(ide_drive_t *drive) { @@ -1423,7 +1417,6 @@ static const struct ide_dma_ops hpt37x_dma_ops = { .dma_test_irq = hpt374_dma_test_irq, .dma_lost_irq = ide_dma_lost_irq, .dma_timer_expiry = ide_dma_sff_timer_expiry, - .dma_timeout = ide_dma_timeout, .dma_sff_read_status = ide_dma_sff_read_status, }; @@ -1435,7 +1428,7 @@ static const struct ide_dma_ops hpt370_dma_ops = { .dma_test_irq = ide_dma_test_irq, .dma_lost_irq = ide_dma_lost_irq, .dma_timer_expiry = ide_dma_sff_timer_expiry, - .dma_timeout = hpt370_dma_timeout, + .dma_clear = hpt370_irq_timeout, .dma_sff_read_status = ide_dma_sff_read_status, }; @@ -1447,7 +1440,6 @@ static const struct ide_dma_ops hpt36x_dma_ops = { .dma_test_irq = ide_dma_test_irq, .dma_lost_irq = hpt366_dma_lost_irq, .dma_timer_expiry = ide_dma_sff_timer_expiry, - .dma_timeout = ide_dma_timeout, .dma_sff_read_status = ide_dma_sff_read_status, }; diff --git a/drivers/ide/icside.c b/drivers/ide/icside.c index 51ce404fe532..f069f122ee6e 100644 --- a/drivers/ide/icside.c +++ b/drivers/ide/icside.c @@ -377,7 +377,6 @@ static const struct ide_dma_ops icside_v6_dma_ops = { .dma_start = icside_dma_start, .dma_end = icside_dma_end, .dma_test_irq = icside_dma_test_irq, - .dma_timeout = ide_dma_timeout, .dma_lost_irq = ide_dma_lost_irq, }; #else diff --git a/drivers/ide/ide-dma-sff.c b/drivers/ide/ide-dma-sff.c index 75a9ea2e4c82..7836d7e03fff 100644 --- a/drivers/ide/ide-dma-sff.c +++ b/drivers/ide/ide-dma-sff.c @@ -338,9 +338,8 @@ const struct ide_dma_ops sff_dma_ops = { .dma_start = ide_dma_start, .dma_end = ide_dma_end, .dma_test_irq = ide_dma_test_irq, - .dma_timer_expiry = ide_dma_sff_timer_expiry, - .dma_timeout = ide_dma_timeout, .dma_lost_irq = ide_dma_lost_irq, + .dma_timer_expiry = ide_dma_sff_timer_expiry, .dma_sff_read_status = ide_dma_sff_read_status, }; EXPORT_SYMBOL_GPL(sff_dma_ops); diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c index 3dbf80c15491..dc5d9bc4ced0 100644 --- a/drivers/ide/ide-dma.c +++ b/drivers/ide/ide-dma.c @@ -460,7 +460,7 @@ void ide_dma_lost_irq(ide_drive_t *drive) } EXPORT_SYMBOL_GPL(ide_dma_lost_irq); -void ide_dma_timeout(ide_drive_t *drive) +static void ide_dma_timeout(ide_drive_t *drive) { ide_hwif_t *hwif = drive->hwif; @@ -473,7 +473,6 @@ void ide_dma_timeout(ide_drive_t *drive) hwif->dma_ops->dma_end(drive); } -EXPORT_SYMBOL_GPL(ide_dma_timeout); /* * un-busy the port etc, and clear any pending DMA status. we want to @@ -483,6 +482,7 @@ EXPORT_SYMBOL_GPL(ide_dma_timeout); ide_startstop_t ide_dma_timeout_retry(ide_drive_t *drive, int error) { ide_hwif_t *hwif = drive->hwif; + const struct ide_dma_ops *dma_ops = hwif->dma_ops; struct request *rq; ide_startstop_t ret = ide_stopped; @@ -492,12 +492,14 @@ ide_startstop_t ide_dma_timeout_retry(ide_drive_t *drive, int error) if (error < 0) { printk(KERN_WARNING "%s: DMA timeout error\n", drive->name); - (void)hwif->dma_ops->dma_end(drive); + (void)dma_ops->dma_end(drive); ret = ide_error(drive, "dma timeout error", hwif->tp_ops->read_status(hwif)); } else { printk(KERN_WARNING "%s: DMA timeout retry\n", drive->name); - hwif->dma_ops->dma_timeout(drive); + if (dma_ops->dma_clear) + dma_ops->dma_clear(drive); + ide_dma_timeout(drive); } /* diff --git a/drivers/ide/it821x.c b/drivers/ide/it821x.c index 0d4ac65cf949..51aa745246dc 100644 --- a/drivers/ide/it821x.c +++ b/drivers/ide/it821x.c @@ -511,9 +511,8 @@ static struct ide_dma_ops it821x_pass_through_dma_ops = { .dma_start = it821x_dma_start, .dma_end = it821x_dma_end, .dma_test_irq = ide_dma_test_irq, - .dma_timer_expiry = ide_dma_sff_timer_expiry, - .dma_timeout = ide_dma_timeout, .dma_lost_irq = ide_dma_lost_irq, + .dma_timer_expiry = ide_dma_sff_timer_expiry, .dma_sff_read_status = ide_dma_sff_read_status, }; diff --git a/drivers/ide/ns87415.c b/drivers/ide/ns87415.c index 7b65fe5bf449..9039a373020f 100644 --- a/drivers/ide/ns87415.c +++ b/drivers/ide/ns87415.c @@ -306,7 +306,6 @@ static const struct ide_dma_ops ns87415_dma_ops = { .dma_test_irq = ide_dma_test_irq, .dma_lost_irq = ide_dma_lost_irq, .dma_timer_expiry = ide_dma_sff_timer_expiry, - .dma_timeout = ide_dma_timeout, .dma_sff_read_status = superio_dma_sff_read_status, }; diff --git a/drivers/ide/pdc202xx_old.c b/drivers/ide/pdc202xx_old.c index f7536d1943f7..248a54bd2386 100644 --- a/drivers/ide/pdc202xx_old.c +++ b/drivers/ide/pdc202xx_old.c @@ -258,12 +258,6 @@ static void pdc202xx_dma_lost_irq(ide_drive_t *drive) ide_dma_lost_irq(drive); } -static void pdc202xx_dma_timeout(ide_drive_t *drive) -{ - pdc202xx_reset(drive); - ide_dma_timeout(drive); -} - static int init_chipset_pdc202xx(struct pci_dev *dev) { unsigned long dmabase = pci_resource_start(dev, 4); @@ -336,7 +330,7 @@ static const struct ide_dma_ops pdc20246_dma_ops = { .dma_test_irq = pdc202xx_dma_test_irq, .dma_lost_irq = pdc202xx_dma_lost_irq, .dma_timer_expiry = ide_dma_sff_timer_expiry, - .dma_timeout = pdc202xx_dma_timeout, + .dma_clear = pdc202xx_reset, .dma_sff_read_status = ide_dma_sff_read_status, }; @@ -348,7 +342,7 @@ static const struct ide_dma_ops pdc2026x_dma_ops = { .dma_test_irq = pdc202xx_dma_test_irq, .dma_lost_irq = pdc202xx_dma_lost_irq, .dma_timer_expiry = ide_dma_sff_timer_expiry, - .dma_timeout = pdc202xx_dma_timeout, + .dma_clear = pdc202xx_reset, .dma_sff_read_status = ide_dma_sff_read_status, }; diff --git a/drivers/ide/pmac.c b/drivers/ide/pmac.c index 2bfcfedaa076..d15cc46a66e3 100644 --- a/drivers/ide/pmac.c +++ b/drivers/ide/pmac.c @@ -1650,7 +1650,6 @@ static const struct ide_dma_ops pmac_dma_ops = { .dma_start = pmac_ide_dma_start, .dma_end = pmac_ide_dma_end, .dma_test_irq = pmac_ide_dma_test_irq, - .dma_timeout = ide_dma_timeout, .dma_lost_irq = pmac_ide_dma_lost_irq, }; diff --git a/drivers/ide/sc1200.c b/drivers/ide/sc1200.c index 1c3a82914999..371549d18a01 100644 --- a/drivers/ide/sc1200.c +++ b/drivers/ide/sc1200.c @@ -291,7 +291,6 @@ static const struct ide_dma_ops sc1200_dma_ops = { .dma_test_irq = ide_dma_test_irq, .dma_lost_irq = ide_dma_lost_irq, .dma_timer_expiry = ide_dma_sff_timer_expiry, - .dma_timeout = ide_dma_timeout, .dma_sff_read_status = ide_dma_sff_read_status, }; diff --git a/drivers/ide/scc_pata.c b/drivers/ide/scc_pata.c index 0cc137cfe76d..64534d150b0c 100644 --- a/drivers/ide/scc_pata.c +++ b/drivers/ide/scc_pata.c @@ -872,7 +872,6 @@ static const struct ide_dma_ops scc_dma_ops = { .dma_end = scc_dma_end, .dma_test_irq = scc_dma_test_irq, .dma_lost_irq = ide_dma_lost_irq, - .dma_timeout = ide_dma_timeout, .dma_timer_expiry = ide_dma_sff_timer_expiry, .dma_sff_read_status = scc_dma_sff_read_status, }; diff --git a/drivers/ide/sgiioc4.c b/drivers/ide/sgiioc4.c index b12de8346c73..44df0c750bab 100644 --- a/drivers/ide/sgiioc4.c +++ b/drivers/ide/sgiioc4.c @@ -533,7 +533,6 @@ static const struct ide_dma_ops sgiioc4_dma_ops = { .dma_end = sgiioc4_dma_end, .dma_test_irq = sgiioc4_dma_test_irq, .dma_lost_irq = sgiioc4_dma_lost_irq, - .dma_timeout = ide_dma_timeout, }; static const struct ide_port_info sgiioc4_port_info __devinitconst = { diff --git a/drivers/ide/siimage.c b/drivers/ide/siimage.c index 075cb1243b2a..e4973cd1fba9 100644 --- a/drivers/ide/siimage.c +++ b/drivers/ide/siimage.c @@ -715,7 +715,6 @@ static const struct ide_dma_ops sil_dma_ops = { .dma_end = ide_dma_end, .dma_test_irq = siimage_dma_test_irq, .dma_timer_expiry = ide_dma_sff_timer_expiry, - .dma_timeout = ide_dma_timeout, .dma_lost_irq = ide_dma_lost_irq, .dma_sff_read_status = ide_dma_sff_read_status, }; diff --git a/drivers/ide/sl82c105.c b/drivers/ide/sl82c105.c index d25137b04e7a..d6f8977191c8 100644 --- a/drivers/ide/sl82c105.c +++ b/drivers/ide/sl82c105.c @@ -189,14 +189,13 @@ static void sl82c105_dma_start(ide_drive_t *drive) ide_dma_start(drive); } -static void sl82c105_dma_timeout(ide_drive_t *drive) +static void sl82c105_dma_clear(ide_drive_t *drive) { struct pci_dev *dev = to_pci_dev(drive->hwif->dev); - DBG(("sl82c105_dma_timeout(drive:%s)\n", drive->name)); + DBG(("sl82c105_dma_clear(drive:%s)\n", drive->name)); sl82c105_reset_host(dev); - ide_dma_timeout(drive); } static int sl82c105_dma_end(ide_drive_t *drive) @@ -298,7 +297,7 @@ static const struct ide_dma_ops sl82c105_dma_ops = { .dma_test_irq = ide_dma_test_irq, .dma_lost_irq = sl82c105_dma_lost_irq, .dma_timer_expiry = ide_dma_sff_timer_expiry, - .dma_timeout = sl82c105_dma_timeout, + .dma_clear = sl82c105_dma_clear, .dma_sff_read_status = ide_dma_sff_read_status, }; diff --git a/drivers/ide/tc86c001.c b/drivers/ide/tc86c001.c index 427d4b3c2c63..b4cf42dc8a6f 100644 --- a/drivers/ide/tc86c001.c +++ b/drivers/ide/tc86c001.c @@ -187,7 +187,6 @@ static const struct ide_dma_ops tc86c001_dma_ops = { .dma_test_irq = ide_dma_test_irq, .dma_lost_irq = ide_dma_lost_irq, .dma_timer_expiry = ide_dma_sff_timer_expiry, - .dma_timeout = ide_dma_timeout, .dma_sff_read_status = ide_dma_sff_read_status, }; diff --git a/drivers/ide/trm290.c b/drivers/ide/trm290.c index ed1496845a93..d6a950828e9f 100644 --- a/drivers/ide/trm290.c +++ b/drivers/ide/trm290.c @@ -314,7 +314,6 @@ static struct ide_dma_ops trm290_dma_ops = { .dma_end = trm290_dma_end, .dma_test_irq = trm290_dma_test_irq, .dma_lost_irq = ide_dma_lost_irq, - .dma_timeout = ide_dma_timeout, }; static const struct ide_port_info trm290_chipset __devinitdata = { diff --git a/drivers/ide/tx4939ide.c b/drivers/ide/tx4939ide.c index e0e0a803dde3..53f99853b065 100644 --- a/drivers/ide/tx4939ide.c +++ b/drivers/ide/tx4939ide.c @@ -632,7 +632,6 @@ static const struct ide_dma_ops tx4939ide_dma_ops = { .dma_test_irq = tx4939ide_dma_test_irq, .dma_lost_irq = ide_dma_lost_irq, .dma_timer_expiry = ide_dma_sff_timer_expiry, - .dma_timeout = ide_dma_timeout, .dma_sff_read_status = tx4939ide_dma_sff_read_status, }; diff --git a/include/linux/ide.h b/include/linux/ide.h index cb501bf78f7d..d3035f2f1250 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -716,8 +716,9 @@ struct ide_dma_ops { int (*dma_end)(struct ide_drive_s *); int (*dma_test_irq)(struct ide_drive_s *); void (*dma_lost_irq)(struct ide_drive_s *); + /* below ones are optional */ int (*dma_timer_expiry)(struct ide_drive_s *); - void (*dma_timeout)(struct ide_drive_s *); + void (*dma_clear)(struct ide_drive_s *); /* * The following method is optional and only required to be * implemented for the SFF-8038i compatible controllers. @@ -1461,7 +1462,6 @@ static inline int config_drive_for_dma(ide_drive_t *drive) { return 0; } #endif /* CONFIG_BLK_DEV_IDEDMA_SFF */ void ide_dma_lost_irq(ide_drive_t *); -void ide_dma_timeout(ide_drive_t *); ide_startstop_t ide_dma_timeout_retry(ide_drive_t *, int); #else -- cgit v1.2.3 From 4453011f959a5f5c6c7a33aea54fe17f5e43a867 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 31 Mar 2009 20:15:20 +0200 Subject: ide: destroy DMA mappings after ending DMA (v2) Move ide_destroy_dmatable() call out from ->dma_end method to {ide_pc,cdrom_newpc,ide_dma}_intr(), ide_dma_timeout_retry() and sgiioc4_resetproc(). This causes minor/safe behavior changes w.r.t.: * cmd64x.c::cmd64{8,x}_dma_end() * cs5536.c::cs5536_dma_end() * icside.c::icside_dma_end() * it821x.c::it821x_dma_end() * scc_pata.c::__scc_dma_end() * sl82c105.c::sl82c105_dma_end() * tx4939ide.c::tx4939ide_dma_end() v2: * Fix build for CONFIG_BLK_DEV_IDEDMA=n (reported by Randy Dunlap). Cc: Randy Dunlap Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/au1xxx-ide.c | 2 -- drivers/ide/cmd64x.c | 2 -- drivers/ide/icside.c | 3 --- drivers/ide/ide-atapi.c | 7 +++++-- drivers/ide/ide-cd.c | 1 + drivers/ide/ide-dma-sff.c | 2 -- drivers/ide/ide-dma.c | 3 +++ drivers/ide/ns87415.c | 2 -- drivers/ide/pmac.c | 2 -- drivers/ide/sc1200.c | 1 - drivers/ide/scc_pata.c | 2 -- drivers/ide/sgiioc4.c | 2 +- drivers/ide/trm290.c | 3 +-- drivers/ide/tx4939ide.c | 4 +--- include/linux/ide.h | 1 + 15 files changed, 13 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/drivers/ide/au1xxx-ide.c b/drivers/ide/au1xxx-ide.c index 0c08c5e01f2a..ba2a211758a9 100644 --- a/drivers/ide/au1xxx-ide.c +++ b/drivers/ide/au1xxx-ide.c @@ -280,8 +280,6 @@ static int auide_build_dmatable(ide_drive_t *drive, struct ide_cmd *cmd) static int auide_dma_end(ide_drive_t *drive) { - ide_destroy_dmatable(drive); - return 0; } diff --git a/drivers/ide/cmd64x.c b/drivers/ide/cmd64x.c index f0a49d2ff711..f2edf280ef8b 100644 --- a/drivers/ide/cmd64x.c +++ b/drivers/ide/cmd64x.c @@ -327,8 +327,6 @@ static int cmd646_1_dma_end(ide_drive_t *drive) outb(dma_cmd & ~1, hwif->dma_base + ATA_DMA_CMD); /* clear the INTR & ERROR bits */ outb(dma_stat | 6, hwif->dma_base + ATA_DMA_STATUS); - /* and free any DMA resources */ - ide_destroy_dmatable(drive); /* verify good DMA status */ return (dma_stat & 7) != 4; } diff --git a/drivers/ide/icside.c b/drivers/ide/icside.c index f069f122ee6e..9bf57d7c8e57 100644 --- a/drivers/ide/icside.c +++ b/drivers/ide/icside.c @@ -291,9 +291,6 @@ static int icside_dma_end(ide_drive_t *drive) disable_dma(ec->dma); - /* Teardown mappings after DMA has completed. */ - ide_destroy_dmatable(drive); - return get_dma_residue(ec->dma) != 0; } diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c index f591166d2c93..1481f71f8173 100644 --- a/drivers/ide/ide-atapi.c +++ b/drivers/ide/ide-atapi.c @@ -342,8 +342,11 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) stat = tp_ops->read_status(hwif); if (pc->flags & PC_FLAG_DMA_IN_PROGRESS) { - if (hwif->dma_ops->dma_end(drive) || - (drive->media == ide_tape && (stat & ATA_ERR))) { + int rc = hwif->dma_ops->dma_end(drive); + + ide_destroy_dmatable(drive); + + if (rc || (drive->media == ide_tape && (stat & ATA_ERR))) { if (drive->media == ide_floppy) printk(KERN_ERR "%s: DMA %s error\n", drive->name, rq_data_dir(pc->rq) diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 5319e7a73708..4a0d66ee9547 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -639,6 +639,7 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive) if (dma) { drive->dma = 0; dma_error = hwif->dma_ops->dma_end(drive); + ide_destroy_dmatable(drive); if (dma_error) { printk(KERN_ERR PFX "%s: DMA %s error\n", drive->name, write ? "write" : "read"); diff --git a/drivers/ide/ide-dma-sff.c b/drivers/ide/ide-dma-sff.c index 7836d7e03fff..f8adbb5eb339 100644 --- a/drivers/ide/ide-dma-sff.c +++ b/drivers/ide/ide-dma-sff.c @@ -310,8 +310,6 @@ int ide_dma_end(ide_drive_t *drive) /* clear INTR & ERROR bits */ ide_dma_sff_write_status(hwif, dma_stat | ATA_DMA_ERR | ATA_DMA_INTR); - /* purge DMA mappings */ - ide_destroy_dmatable(drive); wmb(); /* verify good DMA status */ diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c index 4e2005071113..b430898bbcd6 100644 --- a/drivers/ide/ide-dma.c +++ b/drivers/ide/ide-dma.c @@ -92,6 +92,7 @@ ide_startstop_t ide_dma_intr(ide_drive_t *drive) u8 stat = 0, dma_stat = 0; dma_stat = hwif->dma_ops->dma_end(drive); + ide_destroy_dmatable(drive); stat = hwif->tp_ops->read_status(hwif); if (OK_STAT(stat, DRIVE_READY, drive->bad_wstat | ATA_DRQ)) { @@ -479,6 +480,7 @@ ide_startstop_t ide_dma_timeout_retry(ide_drive_t *drive, int error) if (error < 0) { printk(KERN_WARNING "%s: DMA timeout error\n", drive->name); (void)dma_ops->dma_end(drive); + ide_destroy_dmatable(drive); ret = ide_error(drive, "dma timeout error", hwif->tp_ops->read_status(hwif)); } else { @@ -490,6 +492,7 @@ ide_startstop_t ide_dma_timeout_retry(ide_drive_t *drive, int error) ide_dump_status(drive, "DMA timeout", hwif->tp_ops->read_status(hwif)); (void)dma_ops->dma_end(drive); + ide_destroy_dmatable(drive); } } diff --git a/drivers/ide/ns87415.c b/drivers/ide/ns87415.c index 9039a373020f..9ad71a74f93f 100644 --- a/drivers/ide/ns87415.c +++ b/drivers/ide/ns87415.c @@ -210,8 +210,6 @@ static int ns87415_dma_end(ide_drive_t *drive) /* from ERRATA: clear the INTR & ERROR bits */ dma_cmd = inb(hwif->dma_base + ATA_DMA_CMD); outb(dma_cmd | 6, hwif->dma_base + ATA_DMA_CMD); - /* and free any DMA resources */ - ide_destroy_dmatable(drive); /* verify good DMA status */ return (dma_stat & 7) != 4; } diff --git a/drivers/ide/pmac.c b/drivers/ide/pmac.c index d15cc46a66e3..5643a8b957bf 100644 --- a/drivers/ide/pmac.c +++ b/drivers/ide/pmac.c @@ -1562,8 +1562,6 @@ pmac_ide_dma_end (ide_drive_t *drive) dstat = readl(&dma->status); writel(((RUN|WAKE|DEAD) << 16), &dma->control); - ide_destroy_dmatable(drive); - /* verify good dma status. we don't check for ACTIVE beeing 0. We should... * in theory, but with ATAPI decices doing buffer underruns, that would * cause us to disable DMA, which isn't what we want diff --git a/drivers/ide/sc1200.c b/drivers/ide/sc1200.c index 371549d18a01..d9c47034bedd 100644 --- a/drivers/ide/sc1200.c +++ b/drivers/ide/sc1200.c @@ -184,7 +184,6 @@ static int sc1200_dma_end(ide_drive_t *drive) outb(inb(dma_base)&~1, dma_base); /* !! DO THIS HERE !! stop DMA */ drive->waiting_for_dma = 0; - ide_destroy_dmatable(drive); /* purge DMA mappings */ return (dma_stat & 7) != 4; /* verify good DMA status */ } diff --git a/drivers/ide/scc_pata.c b/drivers/ide/scc_pata.c index 64534d150b0c..693536ebe331 100644 --- a/drivers/ide/scc_pata.c +++ b/drivers/ide/scc_pata.c @@ -365,8 +365,6 @@ static int __scc_dma_end(ide_drive_t *drive) dma_stat = scc_dma_sff_read_status(hwif); /* clear the INTR & ERROR bits */ scc_ide_outb(dma_stat | 6, hwif->dma_base + 4); - /* purge DMA mappings */ - ide_destroy_dmatable(drive); /* verify good DMA status */ wmb(); return (dma_stat & 7) != 4 ? (0x10 | dma_stat) : 0; diff --git a/drivers/ide/sgiioc4.c b/drivers/ide/sgiioc4.c index 44df0c750bab..457a762a1f29 100644 --- a/drivers/ide/sgiioc4.c +++ b/drivers/ide/sgiioc4.c @@ -259,7 +259,6 @@ static int sgiioc4_dma_end(ide_drive_t *drive) } drive->waiting_for_dma = 0; - ide_destroy_dmatable(drive); return dma_stat; } @@ -284,6 +283,7 @@ static void sgiioc4_resetproc(ide_drive_t * drive) { sgiioc4_dma_end(drive); + ide_destroy_dmatable(drive); sgiioc4_clearirq(drive); } diff --git a/drivers/ide/trm290.c b/drivers/ide/trm290.c index d6a950828e9f..8dd3d8226870 100644 --- a/drivers/ide/trm290.c +++ b/drivers/ide/trm290.c @@ -216,8 +216,7 @@ static int trm290_dma_end(ide_drive_t *drive) u16 status; drive->waiting_for_dma = 0; - /* purge DMA mappings */ - ide_destroy_dmatable(drive); + status = inw(drive->hwif->dma_base + 2); return status != 0x00ff; diff --git a/drivers/ide/tx4939ide.c b/drivers/ide/tx4939ide.c index 53f99853b065..f62ced855cf3 100644 --- a/drivers/ide/tx4939ide.c +++ b/drivers/ide/tx4939ide.c @@ -335,11 +335,9 @@ static int tx4939ide_dma_end(ide_drive_t *drive) /* read and clear the INTR & ERROR bits */ dma_stat = tx4939ide_clear_dma_status(base); - /* purge DMA mappings */ - ide_destroy_dmatable(drive); - /* verify good DMA status */ wmb(); + /* verify good DMA status */ if ((dma_stat & (ATA_DMA_INTR | ATA_DMA_ERR | ATA_DMA_ACTIVE)) == 0 && (ctl & (TX4939IDE_INT_XFEREND | TX4939IDE_INT_HOST)) == (TX4939IDE_INT_XFEREND | TX4939IDE_INT_HOST)) diff --git a/include/linux/ide.h b/include/linux/ide.h index d3035f2f1250..b6c4942fde11 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1479,6 +1479,7 @@ static inline ide_startstop_t ide_dma_timeout_retry(ide_drive_t *drive, int erro static inline void ide_release_dma_engine(ide_hwif_t *hwif) { ; } static inline int ide_build_sglist(ide_drive_t *drive, struct ide_cmd *cmd) { return 0; } +static inline void ide_destroy_dmatable(ide_drive_t *drive) { ; } #endif /* CONFIG_BLK_DEV_IDEDMA */ #ifdef CONFIG_BLK_DEV_IDEACPI -- cgit v1.2.3 From 5ae5412d9a23b05ab08461b202bad21ad8f6b66d Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 31 Mar 2009 20:15:20 +0200 Subject: ide: add ide_dma_prepare() helper * Add ide_dma_prepare() helper. * Convert ide_issue_pc() and do_rw_taskfile() to use it. * Make ide_build_sglist() static. There should be no functional changes caused by this patch. Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/ide-atapi.c | 18 ++++-------------- drivers/ide/ide-dma.c | 11 ++++++++++- drivers/ide/ide-taskfile.c | 4 +--- include/linux/ide.h | 7 ++++--- 4 files changed, 19 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c index 1481f71f8173..89d2339bdef3 100644 --- a/drivers/ide/ide-atapi.c +++ b/drivers/ide/ide-atapi.c @@ -638,7 +638,6 @@ ide_startstop_t ide_issue_pc(ide_drive_t *drive, struct ide_cmd *cmd) { struct ide_atapi_pc *pc; ide_hwif_t *hwif = drive->hwif; - const struct ide_dma_ops *dma_ops = hwif->dma_ops; ide_expiry_t *expiry = NULL; struct request *rq = hwif->rq; unsigned int timeout; @@ -652,12 +651,8 @@ ide_startstop_t ide_issue_pc(ide_drive_t *drive, struct ide_cmd *cmd) expiry = ide_cd_expiry; timeout = ATAPI_WAIT_PC; - if (drive->dma) { - if (ide_build_sglist(drive, cmd)) - drive->dma = !dma_ops->dma_setup(drive, cmd); - else - drive->dma = 0; - } + if (drive->dma) + drive->dma = !ide_dma_prepare(drive, cmd); } else { pc = drive->pc; @@ -675,13 +670,8 @@ ide_startstop_t ide_issue_pc(ide_drive_t *drive, struct ide_cmd *cmd) ide_dma_off(drive); } - if ((pc->flags & PC_FLAG_DMA_OK) && - (drive->dev_flags & IDE_DFLAG_USING_DMA)) { - if (ide_build_sglist(drive, cmd)) - drive->dma = !dma_ops->dma_setup(drive, cmd); - else - drive->dma = 0; - } + if (pc->flags & PC_FLAG_DMA_OK) + drive->dma = !ide_dma_prepare(drive, cmd); if (!drive->dma) pc->flags &= ~PC_FLAG_DMA_OK; diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c index b430898bbcd6..cf5897f5533f 100644 --- a/drivers/ide/ide-dma.c +++ b/drivers/ide/ide-dma.c @@ -128,7 +128,7 @@ int ide_dma_good_drive(ide_drive_t *drive) * operate in a portable fashion. */ -int ide_build_sglist(ide_drive_t *drive, struct ide_cmd *cmd) +static int ide_build_sglist(ide_drive_t *drive, struct ide_cmd *cmd) { ide_hwif_t *hwif = drive->hwif; struct scatterlist *sg = hwif->sg_table; @@ -563,3 +563,12 @@ int ide_allocate_dma_engine(ide_hwif_t *hwif) return 0; } EXPORT_SYMBOL_GPL(ide_allocate_dma_engine); + +int ide_dma_prepare(ide_drive_t *drive, struct ide_cmd *cmd) +{ + if ((drive->dev_flags & IDE_DFLAG_USING_DMA) == 0 || + ide_build_sglist(drive, cmd) == 0 || + drive->hwif->dma_ops->dma_setup(drive, cmd)) + return 1; + return 0; +} diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c index a3b7a50562b2..dba68db629bf 100644 --- a/drivers/ide/ide-taskfile.c +++ b/drivers/ide/ide-taskfile.c @@ -100,9 +100,7 @@ ide_startstop_t do_rw_taskfile(ide_drive_t *drive, struct ide_cmd *orig_cmd) ide_execute_command(drive, cmd, handler, WAIT_WORSTCASE); return ide_started; case ATA_PROT_DMA: - if ((drive->dev_flags & IDE_DFLAG_USING_DMA) == 0 || - ide_build_sglist(drive, cmd) == 0 || - dma_ops->dma_setup(drive, cmd)) + if (ide_dma_prepare(drive, cmd)) return ide_stopped; hwif->expiry = dma_ops->dma_timer_expiry; ide_execute_command(drive, cmd, ide_dma_intr, 2 * WAIT_CMD); diff --git a/include/linux/ide.h b/include/linux/ide.h index b6c4942fde11..78892e2a432c 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1443,7 +1443,8 @@ ide_startstop_t ide_dma_intr(ide_drive_t *); int ide_allocate_dma_engine(ide_hwif_t *); void ide_release_dma_engine(ide_hwif_t *); -int ide_build_sglist(ide_drive_t *, struct ide_cmd *); +int ide_dma_prepare(ide_drive_t *, struct ide_cmd *); + void ide_destroy_dmatable(ide_drive_t *); #ifdef CONFIG_BLK_DEV_IDEDMA_SFF @@ -1477,8 +1478,8 @@ static inline void ide_check_dma_crc(ide_drive_t *drive) { ; } static inline ide_startstop_t ide_dma_intr(ide_drive_t *drive) { return ide_stopped; } static inline ide_startstop_t ide_dma_timeout_retry(ide_drive_t *drive, int error) { return ide_stopped; } static inline void ide_release_dma_engine(ide_hwif_t *hwif) { ; } -static inline int ide_build_sglist(ide_drive_t *drive, - struct ide_cmd *cmd) { return 0; } +static inline int ide_dma_prepare(ide_drive_t *drive, + struct ide_cmd *cmd) { return 1; } static inline void ide_destroy_dmatable(ide_drive_t *drive) { ; } #endif /* CONFIG_BLK_DEV_IDEDMA */ -- cgit v1.2.3 From 8a4a5738ba499083cf4c5668895efe220b1946d3 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 31 Mar 2009 20:15:21 +0200 Subject: ide: add ->dma_check method * Add (an optional) ->dma_check method for checking if DMA can be used for a given command and fail DMA setup in ide_dma_prepare() if necessary. * Convert alim15x3 and trm290 host drivers to use ->dma_check. * Rename ali15x3_dma_setup() to ali_dma_check() while at it. There should be no functional changes caused by this patch. Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/alim15x3.c | 9 +++++---- drivers/ide/ide-dma.c | 5 ++++- drivers/ide/trm290.c | 17 ++++++++++------- include/linux/ide.h | 1 + 4 files changed, 20 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/drivers/ide/alim15x3.c b/drivers/ide/alim15x3.c index d3faf0b97f42..537da1cde16d 100644 --- a/drivers/ide/alim15x3.c +++ b/drivers/ide/alim15x3.c @@ -189,20 +189,20 @@ static void ali_set_dma_mode(ide_drive_t *drive, const u8 speed) } /** - * ali15x3_dma_setup - begin a DMA phase + * ali_dma_check - DMA check * @drive: target device * @cmd: command * * Returns 1 if the DMA cannot be performed, zero on success. */ -static int ali15x3_dma_setup(ide_drive_t *drive, struct ide_cmd *cmd) +static int ali_dma_check(ide_drive_t *drive, struct ide_cmd *cmd) { if (m5229_revision < 0xC2 && drive->media != ide_disk) { if (cmd->tf_flags & IDE_TFLAG_WRITE) return 1; /* try PIO instead of DMA */ } - return ide_dma_setup(drive, cmd); + return 0; } /** @@ -503,11 +503,12 @@ static const struct ide_port_ops ali_port_ops = { static const struct ide_dma_ops ali_dma_ops = { .dma_host_set = ide_dma_host_set, - .dma_setup = ali15x3_dma_setup, + .dma_setup = ide_dma_setup, .dma_start = ide_dma_start, .dma_end = ide_dma_end, .dma_test_irq = ide_dma_test_irq, .dma_lost_irq = ide_dma_lost_irq, + .dma_check = ali_dma_check, .dma_timer_expiry = ide_dma_sff_timer_expiry, .dma_sff_read_status = ide_dma_sff_read_status, }; diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c index cf5897f5533f..c0505e2dfc2e 100644 --- a/drivers/ide/ide-dma.c +++ b/drivers/ide/ide-dma.c @@ -566,9 +566,12 @@ EXPORT_SYMBOL_GPL(ide_allocate_dma_engine); int ide_dma_prepare(ide_drive_t *drive, struct ide_cmd *cmd) { + const struct ide_dma_ops *dma_ops = drive->hwif->dma_ops; + if ((drive->dev_flags & IDE_DFLAG_USING_DMA) == 0 || + (dma_ops->dma_check && dma_ops->dma_check(drive, cmd)) || ide_build_sglist(drive, cmd) == 0 || - drive->hwif->dma_ops->dma_setup(drive, cmd)) + dma_ops->dma_setup(drive, cmd)) return 1; return 0; } diff --git a/drivers/ide/trm290.c b/drivers/ide/trm290.c index b91bb709af40..1076efd050dc 100644 --- a/drivers/ide/trm290.c +++ b/drivers/ide/trm290.c @@ -176,19 +176,21 @@ static void trm290_selectproc (ide_drive_t *drive) trm290_prepare_drive(drive, !!(drive->dev_flags & IDE_DFLAG_USING_DMA)); } -static int trm290_dma_setup(ide_drive_t *drive, struct ide_cmd *cmd) +static int trm290_dma_check(ide_drive_t *drive, struct ide_cmd *cmd) { - ide_hwif_t *hwif = drive->hwif; - unsigned int count, rw; - if (cmd->tf_flags & IDE_TFLAG_WRITE) { #ifdef TRM290_NO_DMA_WRITES /* always use PIO for writes */ return 1; #endif - rw = 1; - } else - rw = 2; + } + return 0; +} + +static int trm290_dma_setup(ide_drive_t *drive, struct ide_cmd *cmd) +{ + ide_hwif_t *hwif = drive->hwif; + unsigned int count, rw = (cmd->tf_flags & IDE_TFLAG_WRITE) ? 1 : 2; count = ide_build_dmatable(drive, cmd); if (count == 0) { @@ -312,6 +314,7 @@ static struct ide_dma_ops trm290_dma_ops = { .dma_end = trm290_dma_end, .dma_test_irq = trm290_dma_test_irq, .dma_lost_irq = ide_dma_lost_irq, + .dma_check = trm290_dma_check, }; static const struct ide_port_info trm290_chipset __devinitdata = { diff --git a/include/linux/ide.h b/include/linux/ide.h index 78892e2a432c..b350667b83ad 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -717,6 +717,7 @@ struct ide_dma_ops { int (*dma_test_irq)(struct ide_drive_s *); void (*dma_lost_irq)(struct ide_drive_s *); /* below ones are optional */ + int (*dma_check)(struct ide_drive_s *, struct ide_cmd *); int (*dma_timer_expiry)(struct ide_drive_s *); void (*dma_clear)(struct ide_drive_s *); /* -- cgit v1.2.3 From f094d4d83bccee9277ddb6aadccf35747889426b Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 31 Mar 2009 20:15:24 +0200 Subject: ide: sanitize ide_build_sglist() and ide_destroy_dmatable() * Move ide_map_sg() calls out from ide_build_sglist() to ide_dma_prepare(). * Pass command to ide_destroy_dmatable(). * Rename ide_build_sglist() to ide_dma_map_sg() and ide_destroy_dmatable() to ide_dma_unmap_sg(). There should be no functional changes caused by this patch. Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/ide-atapi.c | 3 ++- drivers/ide/ide-cd.c | 2 +- drivers/ide/ide-dma.c | 50 ++++++++++++++++++++++++------------------------- drivers/ide/sgiioc4.c | 7 ++++--- include/linux/ide.h | 6 +++--- 5 files changed, 35 insertions(+), 33 deletions(-) (limited to 'include/linux') diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c index db6e617790bd..3f3fc7c7b2fe 100644 --- a/drivers/ide/ide-atapi.c +++ b/drivers/ide/ide-atapi.c @@ -326,6 +326,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) { struct ide_atapi_pc *pc = drive->pc; ide_hwif_t *hwif = drive->hwif; + struct ide_cmd *cmd = &hwif->cmd; struct request *rq = hwif->rq; const struct ide_tp_ops *tp_ops = hwif->tp_ops; xfer_func_t *xferfunc; @@ -346,7 +347,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) drive->waiting_for_dma = 0; rc = hwif->dma_ops->dma_end(drive); - ide_destroy_dmatable(drive); + ide_dma_unmap_sg(drive, cmd); if (rc || (drive->media == ide_tape && (stat & ATA_ERR))) { if (drive->media == ide_floppy) diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index f5c7bb739f45..35729a47f797 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -640,7 +640,7 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive) drive->dma = 0; drive->waiting_for_dma = 0; dma_error = hwif->dma_ops->dma_end(drive); - ide_destroy_dmatable(drive); + ide_dma_unmap_sg(drive, cmd); if (dma_error) { printk(KERN_ERR PFX "%s: DMA %s error\n", drive->name, write ? "write" : "read"); diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c index 4d3102887d9c..a5612eadc306 100644 --- a/drivers/ide/ide-dma.c +++ b/drivers/ide/ide-dma.c @@ -89,17 +89,16 @@ static const struct drive_list_entry drive_blacklist[] = { ide_startstop_t ide_dma_intr(ide_drive_t *drive) { ide_hwif_t *hwif = drive->hwif; + struct ide_cmd *cmd = &hwif->cmd; u8 stat = 0, dma_stat = 0; drive->waiting_for_dma = 0; dma_stat = hwif->dma_ops->dma_end(drive); - ide_destroy_dmatable(drive); + ide_dma_unmap_sg(drive, cmd); stat = hwif->tp_ops->read_status(hwif); if (OK_STAT(stat, DRIVE_READY, drive->bad_wstat | ATA_DRQ)) { if (!dma_stat) { - struct ide_cmd *cmd = &hwif->cmd; - if ((cmd->tf_flags & IDE_TFLAG_FS) == 0) ide_finish_cmd(drive, cmd, stat); else @@ -119,8 +118,8 @@ int ide_dma_good_drive(ide_drive_t *drive) } /** - * ide_build_sglist - map IDE scatter gather for DMA I/O - * @drive: the drive to build the DMA table for + * ide_dma_map_sg - map IDE scatter gather for DMA I/O + * @drive: the drive to map the DMA table for * @cmd: command * * Perform the DMA mapping magic necessary to access the source or @@ -129,23 +128,19 @@ int ide_dma_good_drive(ide_drive_t *drive) * operate in a portable fashion. */ -static int ide_build_sglist(ide_drive_t *drive, struct ide_cmd *cmd) +static int ide_dma_map_sg(ide_drive_t *drive, struct ide_cmd *cmd) { ide_hwif_t *hwif = drive->hwif; struct scatterlist *sg = hwif->sg_table; int i; - ide_map_sg(drive, cmd); - if (cmd->tf_flags & IDE_TFLAG_WRITE) cmd->sg_dma_direction = DMA_TO_DEVICE; else cmd->sg_dma_direction = DMA_FROM_DEVICE; i = dma_map_sg(hwif->dev, sg, cmd->sg_nents, cmd->sg_dma_direction); - if (i == 0) - ide_map_sg(drive, cmd); - else { + if (i) { cmd->orig_sg_nents = cmd->sg_nents; cmd->sg_nents = i; } @@ -154,7 +149,7 @@ static int ide_build_sglist(ide_drive_t *drive, struct ide_cmd *cmd) } /** - * ide_destroy_dmatable - clean up DMA mapping + * ide_dma_unmap_sg - clean up DMA mapping * @drive: The drive to unmap * * Teardown mappings after DMA has completed. This must be called @@ -164,15 +159,14 @@ static int ide_build_sglist(ide_drive_t *drive, struct ide_cmd *cmd) * time. */ -void ide_destroy_dmatable(ide_drive_t *drive) +void ide_dma_unmap_sg(ide_drive_t *drive, struct ide_cmd *cmd) { ide_hwif_t *hwif = drive->hwif; - struct ide_cmd *cmd = &hwif->cmd; dma_unmap_sg(hwif->dev, hwif->sg_table, cmd->orig_sg_nents, cmd->sg_dma_direction); } -EXPORT_SYMBOL_GPL(ide_destroy_dmatable); +EXPORT_SYMBOL_GPL(ide_dma_unmap_sg); /** * ide_dma_off_quietly - Generic DMA kill @@ -471,6 +465,7 @@ ide_startstop_t ide_dma_timeout_retry(ide_drive_t *drive, int error) { ide_hwif_t *hwif = drive->hwif; const struct ide_dma_ops *dma_ops = hwif->dma_ops; + struct ide_cmd *cmd = &hwif->cmd; struct request *rq; ide_startstop_t ret = ide_stopped; @@ -482,7 +477,7 @@ ide_startstop_t ide_dma_timeout_retry(ide_drive_t *drive, int error) printk(KERN_WARNING "%s: DMA timeout error\n", drive->name); drive->waiting_for_dma = 0; (void)dma_ops->dma_end(drive); - ide_destroy_dmatable(drive); + ide_dma_unmap_sg(drive, cmd); ret = ide_error(drive, "dma timeout error", hwif->tp_ops->read_status(hwif)); } else { @@ -495,7 +490,7 @@ ide_startstop_t ide_dma_timeout_retry(ide_drive_t *drive, int error) hwif->tp_ops->read_status(hwif)); drive->waiting_for_dma = 0; (void)dma_ops->dma_end(drive); - ide_destroy_dmatable(drive); + ide_dma_unmap_sg(drive, cmd); } } @@ -572,14 +567,19 @@ int ide_dma_prepare(ide_drive_t *drive, struct ide_cmd *cmd) const struct ide_dma_ops *dma_ops = drive->hwif->dma_ops; if ((drive->dev_flags & IDE_DFLAG_USING_DMA) == 0 || - (dma_ops->dma_check && dma_ops->dma_check(drive, cmd)) || - ide_build_sglist(drive, cmd) == 0) - return 1; - if (dma_ops->dma_setup(drive, cmd)) { - ide_destroy_dmatable(drive); - ide_map_sg(drive, cmd); - return 1; - } + (dma_ops->dma_check && dma_ops->dma_check(drive, cmd))) + goto out; + ide_map_sg(drive, cmd); + if (ide_dma_map_sg(drive, cmd) == 0) + goto out_map; + if (dma_ops->dma_setup(drive, cmd)) + goto out_dma_unmap; drive->waiting_for_dma = 1; return 0; +out_dma_unmap: + ide_dma_unmap_sg(drive, cmd); +out_map: + ide_map_sg(drive, cmd); +out: + return 1; } diff --git a/drivers/ide/sgiioc4.c b/drivers/ide/sgiioc4.c index cb2657c4c976..6ef5a567d377 100644 --- a/drivers/ide/sgiioc4.c +++ b/drivers/ide/sgiioc4.c @@ -277,11 +277,12 @@ static void sgiioc4_dma_host_set(ide_drive_t *drive, int on) sgiioc4_clearirq(drive); } -static void -sgiioc4_resetproc(ide_drive_t * drive) +static void sgiioc4_resetproc(ide_drive_t *drive) { + struct ide_cmd *cmd = &drive->hwif->cmd; + sgiioc4_dma_end(drive); - ide_destroy_dmatable(drive); + ide_dma_unmap_sg(drive, cmd); sgiioc4_clearirq(drive); } diff --git a/include/linux/ide.h b/include/linux/ide.h index b350667b83ad..03c520917b7a 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1445,8 +1445,7 @@ int ide_allocate_dma_engine(ide_hwif_t *); void ide_release_dma_engine(ide_hwif_t *); int ide_dma_prepare(ide_drive_t *, struct ide_cmd *); - -void ide_destroy_dmatable(ide_drive_t *); +void ide_dma_unmap_sg(ide_drive_t *, struct ide_cmd *); #ifdef CONFIG_BLK_DEV_IDEDMA_SFF int config_drive_for_dma(ide_drive_t *); @@ -1481,7 +1480,8 @@ static inline ide_startstop_t ide_dma_timeout_retry(ide_drive_t *drive, int erro static inline void ide_release_dma_engine(ide_hwif_t *hwif) { ; } static inline int ide_dma_prepare(ide_drive_t *drive, struct ide_cmd *cmd) { return 1; } -static inline void ide_destroy_dmatable(ide_drive_t *drive) { ; } +static inline void ide_dma_unmap_sg(ide_drive_t *drive, + struct ide_cmd *cmd) { ; } #endif /* CONFIG_BLK_DEV_IDEDMA */ #ifdef CONFIG_BLK_DEV_IDEACPI -- cgit v1.2.3 From 41fa9f863baacd32dd049daf8050d55a0c9e6f1a Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 31 Mar 2009 20:15:25 +0200 Subject: ide: decrease size of ->pc_buf field in struct ide_atapi_pc struct ide_atapi_pc is often allocated on the stack and size of ->pc_buf size is 256 bytes. However since only ide_floppy_create_read_capacity_cmd() and idetape_create_inquiry_cmd() require such size allocate buffers for these pc-s explicitely and decrease ->pc_buf size to 64 bytes. Cc: Borislav Petkov Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/ide-floppy.c | 5 ++++- drivers/ide/ide-floppy_ioctl.c | 5 ++++- drivers/ide/ide-tape.c | 4 ++++ include/linux/ide.h | 2 +- 4 files changed, 13 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c index 7ae662334835..0faae3098295 100644 --- a/drivers/ide/ide-floppy.c +++ b/drivers/ide/ide-floppy.c @@ -385,7 +385,7 @@ static int ide_floppy_get_capacity(ide_drive_t *drive) struct gendisk *disk = floppy->disk; struct ide_atapi_pc pc; u8 *cap_desc; - u8 header_len, desc_cnt; + u8 pc_buf[256], header_len, desc_cnt; int i, rc = 1, blocks, length; drive->bios_cyl = 0; @@ -395,6 +395,9 @@ static int ide_floppy_get_capacity(ide_drive_t *drive) drive->capacity64 = 0; ide_floppy_create_read_capacity_cmd(&pc); + pc.buf = &pc_buf[0]; + pc.buf_size = sizeof(pc_buf); + if (ide_queue_pc_tail(drive, disk, &pc)) { printk(KERN_ERR PFX "Can't get floppy parameters\n"); return 1; diff --git a/drivers/ide/ide-floppy_ioctl.c b/drivers/ide/ide-floppy_ioctl.c index 8f8be8546038..cd8a42027ede 100644 --- a/drivers/ide/ide-floppy_ioctl.c +++ b/drivers/ide/ide-floppy_ioctl.c @@ -36,9 +36,9 @@ static int ide_floppy_get_format_capacities(ide_drive_t *drive, int __user *arg) { struct ide_disk_obj *floppy = drive->driver_data; - u8 header_len, desc_cnt; int i, blocks, length, u_array_size, u_index; int __user *argp; + u8 pc_buf[256], header_len, desc_cnt; if (get_user(u_array_size, arg)) return -EFAULT; @@ -47,6 +47,9 @@ static int ide_floppy_get_format_capacities(ide_drive_t *drive, return -EINVAL; ide_floppy_create_read_capacity_cmd(pc); + pc->buf = &pc_buf[0]; + pc->buf_size = sizeof(pc_buf); + if (ide_queue_pc_tail(drive, floppy->disk, pc)) { printk(KERN_ERR "ide-floppy: Can't get floppy parameters\n"); return -EIO; diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index 64dfa7458f8d..cafc67d9e2e8 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -2014,9 +2014,13 @@ static void idetape_get_inquiry_results(ide_drive_t *drive) { idetape_tape_t *tape = drive->driver_data; struct ide_atapi_pc pc; + u8 pc_buf[256]; char fw_rev[4], vendor_id[8], product_id[16]; idetape_create_inquiry_cmd(&pc); + pc.buf = &pc_buf[0]; + pc.buf_size = sizeof(pc_buf); + if (ide_queue_pc_tail(drive, tape->disk, &pc)) { printk(KERN_ERR "ide-tape: %s: can't get INQUIRY results\n", tape->name); diff --git a/include/linux/ide.h b/include/linux/ide.h index 03c520917b7a..0f48fbd46028 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -377,7 +377,7 @@ enum { * With each packet command, we allocate a buffer of IDE_PC_BUFFER_SIZE bytes. * This is used for several packet commands (not for READ/WRITE commands). */ -#define IDE_PC_BUFFER_SIZE 256 +#define IDE_PC_BUFFER_SIZE 64 #define ATAPI_WAIT_PC (60 * HZ) struct ide_atapi_pc { -- cgit v1.2.3 From 349d12a1fe57d49287a539909cf14f362634342d Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 31 Mar 2009 20:15:26 +0200 Subject: ide-floppy: use ide_pio_bytes() * Fix ide_init_sg_cmd() setup for non-fs requests. * Convert ide_pc_intr() to use ide_pio_bytes() for floppy media. * Remove no longer needed ide_io_buffers() and sg/sg_cnt fields from struct ide_atapi_pc. * Remove partial completions; kill idefloppy_update_buffers(), as a result. * Add some more debugging statements. Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Borislav Petkov Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/ide-atapi.c | 68 ++++++++++++------------------------------------ drivers/ide/ide-floppy.c | 24 ++++------------- include/linux/ide.h | 5 ---- 3 files changed, 21 insertions(+), 76 deletions(-) (limited to 'include/linux') diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c index 40f413b20bd8..100e6f94b4f0 100644 --- a/drivers/ide/ide-atapi.c +++ b/drivers/ide/ide-atapi.c @@ -71,49 +71,6 @@ int ide_check_atapi_device(ide_drive_t *drive, const char *s) } EXPORT_SYMBOL_GPL(ide_check_atapi_device); -/* PIO data transfer routine using the scatter gather table. */ -int ide_io_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc, - unsigned int bcount, int write) -{ - ide_hwif_t *hwif = drive->hwif; - const struct ide_tp_ops *tp_ops = hwif->tp_ops; - xfer_func_t *xf = write ? tp_ops->output_data : tp_ops->input_data; - struct scatterlist *sg = pc->sg; - char *buf; - int count, done = 0; - - while (bcount) { - count = min(sg->length - pc->b_count, bcount); - - if (PageHighMem(sg_page(sg))) { - unsigned long flags; - - local_irq_save(flags); - buf = kmap_atomic(sg_page(sg), KM_IRQ0) + sg->offset; - xf(drive, NULL, buf + pc->b_count, count); - kunmap_atomic(buf - sg->offset, KM_IRQ0); - local_irq_restore(flags); - } else { - buf = sg_virt(sg); - xf(drive, NULL, buf + pc->b_count, count); - } - - bcount -= count; - pc->b_count += count; - done += count; - - if (pc->b_count == sg->length) { - if (!--pc->sg_cnt) - break; - pc->sg = sg = sg_next(sg); - pc->b_count = 0; - } - } - - return done; -} -EXPORT_SYMBOL_GPL(ide_io_buffers); - void ide_init_pc(struct ide_atapi_pc *pc) { memset(pc, 0, sizeof(*pc)); @@ -353,6 +310,9 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) pc->xferred = pc->req_xfer; if (drive->pc_update_buffers) drive->pc_update_buffers(drive, pc); + + if (drive->media == ide_floppy) + ide_complete_rq(drive, 0, blk_rq_bytes(rq)); } debug_log("%s: DMA finished\n", drive->name); } @@ -408,12 +368,19 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) rq->errors = 0; ide_complete_rq(drive, 0, blk_rq_bytes(rq)); } else { + unsigned int done; + if (blk_fs_request(rq) == 0 && uptodate <= 0) { if (rq->errors == 0) rq->errors = -EIO; } - ide_complete_rq(drive, uptodate ? 0 : -EIO, - ide_rq_bytes(rq)); + + if (drive->media == ide_tape) + done = ide_rq_bytes(rq); /* FIXME */ + else + done = blk_rq_bytes(rq); + + ide_complete_rq(drive, uptodate ? 0 : -EIO, done); } return ide_stopped; @@ -446,14 +413,11 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) xferfunc = write ? tp_ops->output_data : tp_ops->input_data; - if ((drive->media == ide_floppy && !pc->buf) || - (drive->media == ide_tape && pc->bh)) { + if (drive->media == ide_floppy && pc->buf == NULL) { + done = min_t(unsigned int, bcount, cmd->nleft); + ide_pio_bytes(drive, cmd, write, done); + } else if (drive->media == ide_tape && pc->bh) { done = drive->pc_io_buffers(drive, pc, bcount, write); - - /* FIXME: don't do partial completions */ - if (drive->media == ide_floppy) - ide_complete_rq(drive, 0, - done ? done : ide_rq_bytes(rq)); } else { done = min_t(unsigned int, bcount, pc->req_xfer - pc->xferred); xferfunc(drive, NULL, pc->cur_pos, done); diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c index 0faae3098295..2b4868d95f8b 100644 --- a/drivers/ide/ide-floppy.c +++ b/drivers/ide/ide-floppy.c @@ -61,16 +61,6 @@ */ #define IDEFLOPPY_PC_DELAY (HZ/20) /* default delay for ZIP 100 (50ms) */ -static void idefloppy_update_buffers(ide_drive_t *drive, - struct ide_atapi_pc *pc) -{ - struct request *rq = pc->rq; - struct bio *bio = rq->bio; - - while ((bio = rq->bio) != NULL) - ide_complete_rq(drive, 0, ide_rq_bytes(rq)); -} - static int ide_floppy_callback(ide_drive_t *drive, int dsc) { struct ide_disk_obj *floppy = drive->driver_data; @@ -213,7 +203,6 @@ static void idefloppy_create_rw_cmd(ide_drive_t *drive, memcpy(rq->cmd, pc->c, 12); pc->rq = rq; - pc->b_count = 0; if (rq->cmd_flags & REQ_RW) pc->flags |= PC_FLAG_WRITING; pc->buf = NULL; @@ -227,7 +216,6 @@ static void idefloppy_blockpc_cmd(struct ide_disk_obj *floppy, ide_init_pc(pc); memcpy(pc->c, rq->cmd, sizeof(pc->c)); pc->rq = rq; - pc->b_count = 0; if (rq->data_len && rq_data_dir(rq) == WRITE) pc->flags |= PC_FLAG_WRITING; pc->buf = rq->data; @@ -244,10 +232,11 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive, struct request *rq, sector_t block) { struct ide_disk_obj *floppy = drive->driver_data; - ide_hwif_t *hwif = drive->hwif; struct ide_cmd cmd; struct ide_atapi_pc *pc; + ide_debug_log(IDE_DBG_FUNC, "enter, cmd: 0x%x\n", rq->cmd[0]); + if (drive->debug_mask & IDE_DBG_RQ) blk_dump_rq_flags(rq, (rq->rq_disk ? rq->rq_disk->disk_name @@ -294,13 +283,10 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive, cmd.rq = rq; if (blk_fs_request(rq) || pc->req_xfer) { - ide_init_sg_cmd(&cmd, rq->nr_sectors << 9); + ide_init_sg_cmd(&cmd, pc->req_xfer); ide_map_sg(drive, &cmd); } - pc->sg = hwif->sg_table; - pc->sg_cnt = cmd.sg_nents; - pc->rq = rq; return ide_floppy_issue_pc(drive, &cmd, pc); @@ -388,6 +374,8 @@ static int ide_floppy_get_capacity(ide_drive_t *drive) u8 pc_buf[256], header_len, desc_cnt; int i, rc = 1, blocks, length; + ide_debug_log(IDE_DBG_FUNC, "enter"); + drive->bios_cyl = 0; drive->bios_head = drive->bios_sect = 0; floppy->blocks = 0; @@ -488,8 +476,6 @@ static void ide_floppy_setup(ide_drive_t *drive) u16 *id = drive->id; drive->pc_callback = ide_floppy_callback; - drive->pc_update_buffers = idefloppy_update_buffers; - drive->pc_io_buffers = ide_io_buffers; /* * We used to check revisions here. At this point however I'm giving up. diff --git a/include/linux/ide.h b/include/linux/ide.h index 0f48fbd46028..836c4c6cb7e3 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -415,9 +415,6 @@ struct ide_atapi_pc { struct idetape_bh *bh; char *b_data; - struct scatterlist *sg; - unsigned int sg_cnt; - unsigned long timeout; }; @@ -1177,8 +1174,6 @@ void ide_tf_read(ide_drive_t *, struct ide_cmd *); void ide_input_data(ide_drive_t *, struct ide_cmd *, void *, unsigned int); void ide_output_data(ide_drive_t *, struct ide_cmd *, void *, unsigned int); -int ide_io_buffers(ide_drive_t *, struct ide_atapi_pc *, unsigned int, int); - extern void SELECT_DRIVE(ide_drive_t *); void SELECT_MASK(ide_drive_t *, int); -- cgit v1.2.3 From ecf3a31d2a08a419bdf919456f1724f5b72bde2c Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Tue, 31 Mar 2009 20:15:30 +0200 Subject: ide: turn set_irq() method into write_devctl() method Turn set_irq() method with its software reset hack into write_devctl() method (for just writing a value into the device control register) at last... Signed-off-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/at91_ide.c | 2 +- drivers/ide/au1xxx-ide.c | 3 +-- drivers/ide/falconide.c | 3 +-- drivers/ide/ide-eh.c | 7 +++---- drivers/ide/ide-h8300.c | 3 +-- drivers/ide/ide-io-std.c | 16 +++------------- drivers/ide/ide-io.c | 4 +++- drivers/ide/ide-iops.c | 4 ++-- drivers/ide/ide-pm.c | 2 +- drivers/ide/ide-probe.c | 6 +++--- drivers/ide/ide-taskfile.c | 2 +- drivers/ide/ns87415.c | 3 +-- drivers/ide/pmac.c | 14 ++------------ drivers/ide/q40ide.c | 3 +-- drivers/ide/scc_pata.c | 14 ++------------ drivers/ide/sgiioc4.c | 3 +-- drivers/ide/tx4938ide.c | 3 +-- drivers/ide/tx4939ide.c | 6 ++---- include/linux/ide.h | 6 ++---- 19 files changed, 32 insertions(+), 72 deletions(-) (limited to 'include/linux') diff --git a/drivers/ide/at91_ide.c b/drivers/ide/at91_ide.c index 8fc6ae958b0b..e6e96743aa7b 100644 --- a/drivers/ide/at91_ide.c +++ b/drivers/ide/at91_ide.c @@ -295,7 +295,7 @@ static const struct ide_tp_ops at91_ide_tp_ops = { .exec_command = ide_exec_command, .read_status = ide_read_status, .read_altstatus = ide_read_altstatus, - .set_irq = ide_set_irq, + .write_devctl = ide_write_devctl, .tf_load = at91_ide_tf_load, .tf_read = at91_ide_tf_read, diff --git a/drivers/ide/au1xxx-ide.c b/drivers/ide/au1xxx-ide.c index 1bfb43d0d3a8..2ca10d533dad 100644 --- a/drivers/ide/au1xxx-ide.c +++ b/drivers/ide/au1xxx-ide.c @@ -467,8 +467,7 @@ static const struct ide_tp_ops au1xxx_tp_ops = { .exec_command = ide_exec_command, .read_status = ide_read_status, .read_altstatus = ide_read_altstatus, - - .set_irq = ide_set_irq, + .write_devctl = ide_write_devctl, .tf_load = ide_tf_load, .tf_read = ide_tf_read, diff --git a/drivers/ide/falconide.c b/drivers/ide/falconide.c index b368a5effc3a..5063be85dc33 100644 --- a/drivers/ide/falconide.c +++ b/drivers/ide/falconide.c @@ -89,8 +89,7 @@ static const struct ide_tp_ops falconide_tp_ops = { .exec_command = ide_exec_command, .read_status = ide_read_status, .read_altstatus = ide_read_altstatus, - - .set_irq = ide_set_irq, + .write_devctl = ide_write_devctl, .tf_load = ide_tf_load, .tf_read = ide_tf_read, diff --git a/drivers/ide/ide-eh.c b/drivers/ide/ide-eh.c index 11664976eea3..de4b7f1c9c9f 100644 --- a/drivers/ide/ide-eh.c +++ b/drivers/ide/ide-eh.c @@ -401,15 +401,14 @@ static ide_startstop_t do_reset1(ide_drive_t *drive, int do_not_try_atapi) * immediate interrupt due to the edge transition it produces. * This single interrupt gives us a "fast poll" for drives that * recover from reset very quickly, saving us the first 50ms wait time. - * - * TODO: add ->softreset method and stop abusing ->set_irq */ /* set SRST and nIEN */ - tp_ops->set_irq(hwif, 4); + tp_ops->write_devctl(hwif, ATA_SRST | ATA_NIEN | ATA_DEVCTL_OBS); /* more than enough time */ udelay(10); /* clear SRST, leave nIEN (unless device is on the quirk list) */ - tp_ops->set_irq(hwif, drive->quirk_list == 2); + tp_ops->write_devctl(hwif, (drive->quirk_list == 2 ? 0 : ATA_NIEN) | + ATA_DEVCTL_OBS); /* more than enough time */ udelay(10); hwif->poll_timeout = jiffies + WAIT_WORSTCASE; diff --git a/drivers/ide/ide-h8300.c b/drivers/ide/ide-h8300.c index 7492f28d1290..a57ccad61acf 100644 --- a/drivers/ide/ide-h8300.c +++ b/drivers/ide/ide-h8300.c @@ -159,8 +159,7 @@ static const struct ide_tp_ops h8300_tp_ops = { .exec_command = ide_exec_command, .read_status = ide_read_status, .read_altstatus = ide_read_altstatus, - - .set_irq = ide_set_irq, + .write_devctl = ide_write_devctl, .tf_load = h8300_tf_load, .tf_read = h8300_tf_read, diff --git a/drivers/ide/ide-io-std.c b/drivers/ide/ide-io-std.c index 3a867e49a0af..bbeedce6b17d 100644 --- a/drivers/ide/ide-io-std.c +++ b/drivers/ide/ide-io-std.c @@ -64,23 +64,14 @@ u8 ide_read_altstatus(ide_hwif_t *hwif) } EXPORT_SYMBOL_GPL(ide_read_altstatus); -void ide_set_irq(ide_hwif_t *hwif, int on) +void ide_write_devctl(ide_hwif_t *hwif, u8 ctl) { - u8 ctl = ATA_DEVCTL_OBS; - - if (on == 4) { /* hack for SRST */ - ctl |= 4; - on &= ~4; - } - - ctl |= on ? 0 : 2; - if (hwif->host_flags & IDE_HFLAG_MMIO) writeb(ctl, (void __iomem *)hwif->io_ports.ctl_addr); else outb(ctl, hwif->io_ports.ctl_addr); } -EXPORT_SYMBOL_GPL(ide_set_irq); +EXPORT_SYMBOL_GPL(ide_write_devctl); void ide_tf_load(ide_drive_t *drive, struct ide_cmd *cmd) { @@ -312,8 +303,7 @@ const struct ide_tp_ops default_tp_ops = { .exec_command = ide_exec_command, .read_status = ide_read_status, .read_altstatus = ide_read_altstatus, - - .set_irq = ide_set_irq, + .write_devctl = ide_write_devctl, .tf_load = ide_tf_load, .tf_read = ide_tf_read, diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index 3c52317d8524..5b57905a7d71 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -494,7 +494,9 @@ repeat: * quirk_list may not like intr setups/cleanups */ if (prev_port && prev_port->cur_dev->quirk_list == 0) - prev_port->tp_ops->set_irq(prev_port, 0); + prev_port->tp_ops->write_devctl(prev_port, + ATA_NIEN | + ATA_DEVCTL_OBS); hwif->host->cur_port = hwif; } diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c index 0caca342802d..ae227dd8466f 100644 --- a/drivers/ide/ide-iops.c +++ b/drivers/ide/ide-iops.c @@ -360,7 +360,7 @@ int ide_config_drive_speed(ide_drive_t *drive, u8 speed) SELECT_DRIVE(drive); SELECT_MASK(drive, 1); udelay(1); - tp_ops->set_irq(hwif, 0); + tp_ops->write_devctl(hwif, ATA_NIEN | ATA_DEVCTL_OBS); memset(&cmd, 0, sizeof(cmd)); cmd.tf_flags = IDE_TFLAG_OUT_FEATURE | IDE_TFLAG_OUT_NSECT; @@ -372,7 +372,7 @@ int ide_config_drive_speed(ide_drive_t *drive, u8 speed) tp_ops->exec_command(hwif, ATA_CMD_SET_FEATURES); if (drive->quirk_list == 2) - tp_ops->set_irq(hwif, 1); + tp_ops->write_devctl(hwif, ATA_DEVCTL_OBS); error = __ide_wait_stat(drive, drive->ready_stat, ATA_BUSY | ATA_DRQ | ATA_ERR, diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c index ebf2d21ebdcb..20553d4c42a2 100644 --- a/drivers/ide/ide-pm.c +++ b/drivers/ide/ide-pm.c @@ -233,7 +233,7 @@ void ide_check_pm_state(ide_drive_t *drive, struct request *rq) if (rc) printk(KERN_WARNING "%s: bus not ready on wakeup\n", drive->name); SELECT_DRIVE(drive); - hwif->tp_ops->set_irq(hwif, 1); + hwif->tp_ops->write_devctl(hwif, ATA_DEVCTL_OBS); rc = ide_wait_not_busy(hwif, 100000); if (rc) printk(KERN_WARNING "%s: drive not ready on wakeup\n", drive->name); diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index 7c1f1bf81836..d240f76b0da6 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -260,7 +260,7 @@ int ide_dev_read_id(ide_drive_t *drive, u8 cmd, u16 *id) * during the identify phase that the IRQ handler isn't expecting. */ if (io_ports->ctl_addr) - tp_ops->set_irq(hwif, 0); + tp_ops->write_devctl(hwif, ATA_NIEN | ATA_DEVCTL_OBS); /* take a deep breath */ msleep(50); @@ -628,7 +628,7 @@ static int ide_port_wait_ready(ide_hwif_t *hwif) if ((drive->dev_flags & IDE_DFLAG_NOPROBE) == 0 || (drive->dev_flags & IDE_DFLAG_PRESENT)) { SELECT_DRIVE(drive); - hwif->tp_ops->set_irq(hwif, 1); + hwif->tp_ops->write_devctl(hwif, ATA_DEVCTL_OBS); mdelay(2); rc = ide_wait_not_busy(hwif, 35000); if (rc) @@ -845,7 +845,7 @@ static int init_irq (ide_hwif_t *hwif) irq_handler = ide_intr; if (io_ports->ctl_addr) - hwif->tp_ops->set_irq(hwif, 1); + hwif->tp_ops->write_devctl(hwif, ATA_DEVCTL_OBS); if (request_irq(hwif->irq, irq_handler, sa, hwif->name, hwif)) goto out_up; diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c index dba68db629bf..47f13cd11031 100644 --- a/drivers/ide/ide-taskfile.c +++ b/drivers/ide/ide-taskfile.c @@ -80,7 +80,7 @@ ide_startstop_t do_rw_taskfile(ide_drive_t *drive, struct ide_cmd *orig_cmd) if ((cmd->tf_flags & IDE_TFLAG_DMA_PIO_FALLBACK) == 0) { ide_tf_dump(drive->name, tf); - tp_ops->set_irq(hwif, 1); + tp_ops->write_devctl(hwif, ATA_DEVCTL_OBS); SELECT_MASK(drive, 0); tp_ops->tf_load(drive, cmd); } diff --git a/drivers/ide/ns87415.c b/drivers/ide/ns87415.c index 13a9e00efa13..00ab0be7335a 100644 --- a/drivers/ide/ns87415.c +++ b/drivers/ide/ns87415.c @@ -109,8 +109,7 @@ static const struct ide_tp_ops superio_tp_ops = { .exec_command = ide_exec_command, .read_status = superio_read_status, .read_altstatus = ide_read_altstatus, - - .set_irq = ide_set_irq, + .write_devctl = ide_write_devctl, .tf_load = ide_tf_load, .tf_read = superio_tf_read, diff --git a/drivers/ide/pmac.c b/drivers/ide/pmac.c index 879c3d8d9f36..7aa45ea37eeb 100644 --- a/drivers/ide/pmac.c +++ b/drivers/ide/pmac.c @@ -476,17 +476,8 @@ static void pmac_exec_command(ide_hwif_t *hwif, u8 cmd) + IDE_TIMING_CONFIG)); } -static void pmac_set_irq(ide_hwif_t *hwif, int on) +static void pmac_write_devctl(ide_hwif_t *hwif, u8 ctl) { - u8 ctl = ATA_DEVCTL_OBS; - - if (on == 4) { /* hack for SRST */ - ctl |= 4; - on &= ~4; - } - - ctl |= on ? 0 : 2; - writeb(ctl, (void __iomem *)hwif->io_ports.ctl_addr); (void)readl((void __iomem *)(hwif->io_ports.data_addr + IDE_TIMING_CONFIG)); @@ -954,8 +945,7 @@ static const struct ide_tp_ops pmac_tp_ops = { .exec_command = pmac_exec_command, .read_status = ide_read_status, .read_altstatus = ide_read_altstatus, - - .set_irq = pmac_set_irq, + .write_devctl = pmac_write_devctl, .tf_load = ide_tf_load, .tf_read = ide_tf_read, diff --git a/drivers/ide/q40ide.c b/drivers/ide/q40ide.c index 2a43a2f49633..7fddfd34fcce 100644 --- a/drivers/ide/q40ide.c +++ b/drivers/ide/q40ide.c @@ -99,8 +99,7 @@ static const struct ide_tp_ops q40ide_tp_ops = { .exec_command = ide_exec_command, .read_status = ide_read_status, .read_altstatus = ide_read_altstatus, - - .set_irq = ide_set_irq, + .write_devctl = ide_write_devctl, .tf_load = ide_tf_load, .tf_read = ide_tf_read, diff --git a/drivers/ide/scc_pata.c b/drivers/ide/scc_pata.c index 6e47eac1cd7f..6ba4983d831c 100644 --- a/drivers/ide/scc_pata.c +++ b/drivers/ide/scc_pata.c @@ -148,17 +148,8 @@ static u8 scc_dma_sff_read_status(ide_hwif_t *hwif) return (u8)in_be32((void *)(hwif->dma_base + 4)); } -static void scc_set_irq(ide_hwif_t *hwif, int on) +static void scc_write_devctl(ide_hwif_t *hwif, u8 ctl) { - u8 ctl = ATA_DEVCTL_OBS; - - if (on == 4) { /* hack for SRST */ - ctl |= 4; - on &= ~4; - } - - ctl |= on ? 0 : 2; - out_be32((void *)hwif->io_ports.ctl_addr, ctl); eieio(); in_be32((void *)(hwif->dma_base + 0x01c)); @@ -843,8 +834,7 @@ static const struct ide_tp_ops scc_tp_ops = { .exec_command = scc_exec_command, .read_status = scc_read_status, .read_altstatus = scc_read_altstatus, - - .set_irq = scc_set_irq, + .write_devctl = scc_write_devctl, .tf_load = scc_tf_load, .tf_read = scc_tf_read, diff --git a/drivers/ide/sgiioc4.c b/drivers/ide/sgiioc4.c index 6ef5a567d377..58980fcafc3b 100644 --- a/drivers/ide/sgiioc4.c +++ b/drivers/ide/sgiioc4.c @@ -503,8 +503,7 @@ static const struct ide_tp_ops sgiioc4_tp_ops = { .exec_command = ide_exec_command, .read_status = sgiioc4_read_status, .read_altstatus = ide_read_altstatus, - - .set_irq = ide_set_irq, + .write_devctl = ide_write_devctl, .tf_load = ide_tf_load, .tf_read = ide_tf_read, diff --git a/drivers/ide/tx4938ide.c b/drivers/ide/tx4938ide.c index 1c4a78ac1a20..ec3aa32fbbe0 100644 --- a/drivers/ide/tx4938ide.c +++ b/drivers/ide/tx4938ide.c @@ -204,8 +204,7 @@ static const struct ide_tp_ops tx4938ide_tp_ops = { .exec_command = ide_exec_command, .read_status = ide_read_status, .read_altstatus = ide_read_altstatus, - - .set_irq = ide_set_irq, + .write_devctl = ide_write_devctl, .tf_load = tx4938ide_tf_load, .tf_read = tx4938ide_tf_read, diff --git a/drivers/ide/tx4939ide.c b/drivers/ide/tx4939ide.c index 77aee5b2ce95..43bc0372413a 100644 --- a/drivers/ide/tx4939ide.c +++ b/drivers/ide/tx4939ide.c @@ -571,8 +571,7 @@ static const struct ide_tp_ops tx4939ide_tp_ops = { .exec_command = ide_exec_command, .read_status = ide_read_status, .read_altstatus = ide_read_altstatus, - - .set_irq = ide_set_irq, + .write_devctl = ide_write_devctl, .tf_load = tx4939ide_tf_load, .tf_read = tx4939ide_tf_read, @@ -595,8 +594,7 @@ static const struct ide_tp_ops tx4939ide_tp_ops = { .exec_command = ide_exec_command, .read_status = ide_read_status, .read_altstatus = ide_read_altstatus, - - .set_irq = ide_set_irq, + .write_devctl = ide_write_devctl, .tf_load = tx4939ide_tf_load, .tf_read = ide_tf_read, diff --git a/include/linux/ide.h b/include/linux/ide.h index 836c4c6cb7e3..ccb70abe991b 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -655,8 +655,7 @@ struct ide_tp_ops { void (*exec_command)(struct hwif_s *, u8); u8 (*read_status)(struct hwif_s *); u8 (*read_altstatus)(struct hwif_s *); - - void (*set_irq)(struct hwif_s *, int); + void (*write_devctl)(struct hwif_s *, u8); void (*tf_load)(ide_drive_t *, struct ide_cmd *); void (*tf_read)(ide_drive_t *, struct ide_cmd *); @@ -1165,8 +1164,7 @@ void ide_tf_dump(const char *, struct ide_taskfile *); void ide_exec_command(ide_hwif_t *, u8); u8 ide_read_status(ide_hwif_t *); u8 ide_read_altstatus(ide_hwif_t *); - -void ide_set_irq(ide_hwif_t *, int); +void ide_write_devctl(ide_hwif_t *, u8); void ide_tf_load(ide_drive_t *, struct ide_cmd *); void ide_tf_read(ide_drive_t *, struct ide_cmd *); -- cgit v1.2.3 From 6762511934e6e7287ce3c8baac0d52ef64e3787b Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Tue, 31 Mar 2009 20:15:30 +0200 Subject: ide: rename IDE_TFLAG_IN_[HOB_]FEATURE The feature register has never been readable -- when its location is read, one gets the error register value; hence rename IDE_TFLAG_IN_[HOB_]FEATURE into IDE_TFLAG_IN_[HOB_]ERROR and introduce the 'hob_error' field into the 'struct ide_taskfile' (despite the error register not really depending on the HOB bit). Signed-off-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/at91_ide.c | 16 ++++++++-------- drivers/ide/ide-h8300.c | 16 ++++++++-------- drivers/ide/ide-io-std.c | 16 ++++++++-------- drivers/ide/ide-iops.c | 2 +- drivers/ide/ns87415.c | 16 ++++++++-------- drivers/ide/scc_pata.c | 16 ++++++++-------- drivers/ide/tx4938ide.c | 17 ++++++++--------- drivers/ide/tx4939ide.c | 17 ++++++++--------- include/linux/ide.h | 12 ++++++++---- 9 files changed, 65 insertions(+), 63 deletions(-) (limited to 'include/linux') diff --git a/drivers/ide/at91_ide.c b/drivers/ide/at91_ide.c index e6e96743aa7b..9dce793d93b4 100644 --- a/drivers/ide/at91_ide.c +++ b/drivers/ide/at91_ide.c @@ -244,8 +244,8 @@ static void at91_ide_tf_read(ide_drive_t *drive, struct ide_cmd *cmd) /* be sure we're looking at the low order bits */ ide_mm_outb(ATA_DEVCTL_OBS, io_ports->ctl_addr); - if (cmd->tf_flags & IDE_TFLAG_IN_FEATURE) - tf->feature = ide_mm_inb(io_ports->feature_addr); + if (cmd->tf_flags & IDE_TFLAG_IN_ERROR) + tf->error = ide_mm_inb(io_ports->feature_addr); if (cmd->tf_flags & IDE_TFLAG_IN_NSECT) tf->nsect = ide_mm_inb(io_ports->nsect_addr); if (cmd->tf_flags & IDE_TFLAG_IN_LBAL) @@ -260,16 +260,16 @@ static void at91_ide_tf_read(ide_drive_t *drive, struct ide_cmd *cmd) if (cmd->tf_flags & IDE_TFLAG_LBA48) { ide_mm_outb(ATA_HOB | ATA_DEVCTL_OBS, io_ports->ctl_addr); - if (cmd->tf_flags & IDE_TFLAG_IN_HOB_FEATURE) - tf->hob_feature = ide_mm_inb(io_ports->feature_addr); + if (cmd->tf_flags & IDE_TFLAG_IN_HOB_ERROR) + tf->hob_error = ide_mm_inb(io_ports->feature_addr); if (cmd->tf_flags & IDE_TFLAG_IN_HOB_NSECT) - tf->hob_nsect = ide_mm_inb(io_ports->nsect_addr); + tf->hob_nsect = ide_mm_inb(io_ports->nsect_addr); if (cmd->tf_flags & IDE_TFLAG_IN_HOB_LBAL) - tf->hob_lbal = ide_mm_inb(io_ports->lbal_addr); + tf->hob_lbal = ide_mm_inb(io_ports->lbal_addr); if (cmd->tf_flags & IDE_TFLAG_IN_HOB_LBAM) - tf->hob_lbam = ide_mm_inb(io_ports->lbam_addr); + tf->hob_lbam = ide_mm_inb(io_ports->lbam_addr); if (cmd->tf_flags & IDE_TFLAG_IN_HOB_LBAH) - tf->hob_lbah = ide_mm_inb(io_ports->lbah_addr); + tf->hob_lbah = ide_mm_inb(io_ports->lbah_addr); } } diff --git a/drivers/ide/ide-h8300.c b/drivers/ide/ide-h8300.c index a57ccad61acf..1d45cd5b6a1c 100644 --- a/drivers/ide/ide-h8300.c +++ b/drivers/ide/ide-h8300.c @@ -100,8 +100,8 @@ static void h8300_tf_read(ide_drive_t *drive, struct ide_cmd *cmd) /* be sure we're looking at the low order bits */ outb(ATA_DEVCTL_OBS, io_ports->ctl_addr); - if (cmd->tf_flags & IDE_TFLAG_IN_FEATURE) - tf->feature = inb(io_ports->feature_addr); + if (cmd->tf_flags & IDE_TFLAG_IN_ERROR) + tf->error = inb(io_ports->feature_addr); if (cmd->tf_flags & IDE_TFLAG_IN_NSECT) tf->nsect = inb(io_ports->nsect_addr); if (cmd->tf_flags & IDE_TFLAG_IN_LBAL) @@ -116,16 +116,16 @@ static void h8300_tf_read(ide_drive_t *drive, struct ide_cmd *cmd) if (cmd->tf_flags & IDE_TFLAG_LBA48) { outb(ATA_HOB | ATA_DEVCTL_OBS, io_ports->ctl_addr); - if (cmd->tf_flags & IDE_TFLAG_IN_HOB_FEATURE) - tf->hob_feature = inb(io_ports->feature_addr); + if (cmd->tf_flags & IDE_TFLAG_IN_HOB_ERROR) + tf->hob_error = inb(io_ports->feature_addr); if (cmd->tf_flags & IDE_TFLAG_IN_HOB_NSECT) - tf->hob_nsect = inb(io_ports->nsect_addr); + tf->hob_nsect = inb(io_ports->nsect_addr); if (cmd->tf_flags & IDE_TFLAG_IN_HOB_LBAL) - tf->hob_lbal = inb(io_ports->lbal_addr); + tf->hob_lbal = inb(io_ports->lbal_addr); if (cmd->tf_flags & IDE_TFLAG_IN_HOB_LBAM) - tf->hob_lbam = inb(io_ports->lbam_addr); + tf->hob_lbam = inb(io_ports->lbam_addr); if (cmd->tf_flags & IDE_TFLAG_IN_HOB_LBAH) - tf->hob_lbah = inb(io_ports->lbah_addr); + tf->hob_lbah = inb(io_ports->lbah_addr); } } diff --git a/drivers/ide/ide-io-std.c b/drivers/ide/ide-io-std.c index bbeedce6b17d..31f5c5f4c093 100644 --- a/drivers/ide/ide-io-std.c +++ b/drivers/ide/ide-io-std.c @@ -159,8 +159,8 @@ void ide_tf_read(ide_drive_t *drive, struct ide_cmd *cmd) /* be sure we're looking at the low order bits */ tf_outb(ATA_DEVCTL_OBS, io_ports->ctl_addr); - if (cmd->tf_flags & IDE_TFLAG_IN_FEATURE) - tf->feature = tf_inb(io_ports->feature_addr); + if (cmd->tf_flags & IDE_TFLAG_IN_ERROR) + tf->error = tf_inb(io_ports->feature_addr); if (cmd->tf_flags & IDE_TFLAG_IN_NSECT) tf->nsect = tf_inb(io_ports->nsect_addr); if (cmd->tf_flags & IDE_TFLAG_IN_LBAL) @@ -175,16 +175,16 @@ void ide_tf_read(ide_drive_t *drive, struct ide_cmd *cmd) if (cmd->tf_flags & IDE_TFLAG_LBA48) { tf_outb(ATA_HOB | ATA_DEVCTL_OBS, io_ports->ctl_addr); - if (cmd->tf_flags & IDE_TFLAG_IN_HOB_FEATURE) - tf->hob_feature = tf_inb(io_ports->feature_addr); + if (cmd->tf_flags & IDE_TFLAG_IN_HOB_ERROR) + tf->hob_error = tf_inb(io_ports->feature_addr); if (cmd->tf_flags & IDE_TFLAG_IN_HOB_NSECT) - tf->hob_nsect = tf_inb(io_ports->nsect_addr); + tf->hob_nsect = tf_inb(io_ports->nsect_addr); if (cmd->tf_flags & IDE_TFLAG_IN_HOB_LBAL) - tf->hob_lbal = tf_inb(io_ports->lbal_addr); + tf->hob_lbal = tf_inb(io_ports->lbal_addr); if (cmd->tf_flags & IDE_TFLAG_IN_HOB_LBAM) - tf->hob_lbam = tf_inb(io_ports->lbam_addr); + tf->hob_lbam = tf_inb(io_ports->lbam_addr); if (cmd->tf_flags & IDE_TFLAG_IN_HOB_LBAH) - tf->hob_lbah = tf_inb(io_ports->lbah_addr); + tf->hob_lbah = tf_inb(io_ports->lbah_addr); } } EXPORT_SYMBOL_GPL(ide_tf_read); diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c index ae227dd8466f..6f363a26700d 100644 --- a/drivers/ide/ide-iops.c +++ b/drivers/ide/ide-iops.c @@ -55,7 +55,7 @@ u8 ide_read_error(ide_drive_t *drive) struct ide_cmd cmd; memset(&cmd, 0, sizeof(cmd)); - cmd.tf_flags = IDE_TFLAG_IN_FEATURE; + cmd.tf_flags = IDE_TFLAG_IN_ERROR; drive->hwif->tp_ops->tf_read(drive, &cmd); diff --git a/drivers/ide/ns87415.c b/drivers/ide/ns87415.c index 00ab0be7335a..0a6cf74c3265 100644 --- a/drivers/ide/ns87415.c +++ b/drivers/ide/ns87415.c @@ -76,8 +76,8 @@ static void superio_tf_read(ide_drive_t *drive, struct ide_cmd *cmd) /* be sure we're looking at the low order bits */ outb(ATA_DEVCTL_OBS, io_ports->ctl_addr); - if (cmd->tf_flags & IDE_TFLAG_IN_FEATURE) - tf->feature = inb(io_ports->feature_addr); + if (cmd->tf_flags & IDE_TFLAG_IN_ERROR) + tf->error = inb(io_ports->feature_addr); if (cmd->tf_flags & IDE_TFLAG_IN_NSECT) tf->nsect = inb(io_ports->nsect_addr); if (cmd->tf_flags & IDE_TFLAG_IN_LBAL) @@ -92,16 +92,16 @@ static void superio_tf_read(ide_drive_t *drive, struct ide_cmd *cmd) if (cmd->tf_flags & IDE_TFLAG_LBA48) { outb(ATA_HOB | ATA_DEVCTL_OBS, io_ports->ctl_addr); - if (cmd->tf_flags & IDE_TFLAG_IN_HOB_FEATURE) - tf->hob_feature = inb(io_ports->feature_addr); + if (cmd->tf_flags & IDE_TFLAG_IN_HOB_ERROR) + tf->hob_error = inb(io_ports->feature_addr); if (cmd->tf_flags & IDE_TFLAG_IN_HOB_NSECT) - tf->hob_nsect = inb(io_ports->nsect_addr); + tf->hob_nsect = inb(io_ports->nsect_addr); if (cmd->tf_flags & IDE_TFLAG_IN_HOB_LBAL) - tf->hob_lbal = inb(io_ports->lbal_addr); + tf->hob_lbal = inb(io_ports->lbal_addr); if (cmd->tf_flags & IDE_TFLAG_IN_HOB_LBAM) - tf->hob_lbam = inb(io_ports->lbam_addr); + tf->hob_lbam = inb(io_ports->lbam_addr); if (cmd->tf_flags & IDE_TFLAG_IN_HOB_LBAH) - tf->hob_lbah = inb(io_ports->lbah_addr); + tf->hob_lbah = inb(io_ports->lbah_addr); } } diff --git a/drivers/ide/scc_pata.c b/drivers/ide/scc_pata.c index 6ba4983d831c..ea0a9752c6f9 100644 --- a/drivers/ide/scc_pata.c +++ b/drivers/ide/scc_pata.c @@ -702,8 +702,8 @@ static void scc_tf_read(ide_drive_t *drive, struct ide_cmd *cmd) /* be sure we're looking at the low order bits */ scc_ide_outb(ATA_DEVCTL_OBS, io_ports->ctl_addr); - if (cmd->tf_flags & IDE_TFLAG_IN_FEATURE) - tf->feature = scc_ide_inb(io_ports->feature_addr); + if (cmd->tf_flags & IDE_TFLAG_IN_ERROR) + tf->error = scc_ide_inb(io_ports->feature_addr); if (cmd->tf_flags & IDE_TFLAG_IN_NSECT) tf->nsect = scc_ide_inb(io_ports->nsect_addr); if (cmd->tf_flags & IDE_TFLAG_IN_LBAL) @@ -718,16 +718,16 @@ static void scc_tf_read(ide_drive_t *drive, struct ide_cmd *cmd) if (cmd->tf_flags & IDE_TFLAG_LBA48) { scc_ide_outb(ATA_HOB | ATA_DEVCTL_OBS, io_ports->ctl_addr); - if (cmd->tf_flags & IDE_TFLAG_IN_HOB_FEATURE) - tf->hob_feature = scc_ide_inb(io_ports->feature_addr); + if (cmd->tf_flags & IDE_TFLAG_IN_HOB_ERROR) + tf->hob_error = scc_ide_inb(io_ports->feature_addr); if (cmd->tf_flags & IDE_TFLAG_IN_HOB_NSECT) - tf->hob_nsect = scc_ide_inb(io_ports->nsect_addr); + tf->hob_nsect = scc_ide_inb(io_ports->nsect_addr); if (cmd->tf_flags & IDE_TFLAG_IN_HOB_LBAL) - tf->hob_lbal = scc_ide_inb(io_ports->lbal_addr); + tf->hob_lbal = scc_ide_inb(io_ports->lbal_addr); if (cmd->tf_flags & IDE_TFLAG_IN_HOB_LBAM) - tf->hob_lbam = scc_ide_inb(io_ports->lbam_addr); + tf->hob_lbam = scc_ide_inb(io_ports->lbam_addr); if (cmd->tf_flags & IDE_TFLAG_IN_HOB_LBAH) - tf->hob_lbah = scc_ide_inb(io_ports->lbah_addr); + tf->hob_lbah = scc_ide_inb(io_ports->lbah_addr); } } diff --git a/drivers/ide/tx4938ide.c b/drivers/ide/tx4938ide.c index ec3aa32fbbe0..606c37f5267d 100644 --- a/drivers/ide/tx4938ide.c +++ b/drivers/ide/tx4938ide.c @@ -144,8 +144,8 @@ static void tx4938ide_tf_read(ide_drive_t *drive, struct ide_cmd *cmd) /* be sure we're looking at the low order bits */ tx4938ide_outb(ATA_DEVCTL_OBS, io_ports->ctl_addr); - if (cmd->tf_flags & IDE_TFLAG_IN_FEATURE) - tf->feature = tx4938ide_inb(io_ports->feature_addr); + if (cmd->tf_flags & IDE_TFLAG_IN_ERROR) + tf->error = tx4938ide_inb(io_ports->feature_addr); if (cmd->tf_flags & IDE_TFLAG_IN_NSECT) tf->nsect = tx4938ide_inb(io_ports->nsect_addr); if (cmd->tf_flags & IDE_TFLAG_IN_LBAL) @@ -160,17 +160,16 @@ static void tx4938ide_tf_read(ide_drive_t *drive, struct ide_cmd *cmd) if (cmd->tf_flags & IDE_TFLAG_LBA48) { tx4938ide_outb(ATA_HOB | ATA_DEVCTL_OBS, io_ports->ctl_addr); - if (cmd->tf_flags & IDE_TFLAG_IN_HOB_FEATURE) - tf->hob_feature = - tx4938ide_inb(io_ports->feature_addr); + if (cmd->tf_flags & IDE_TFLAG_IN_HOB_ERROR) + tf->hob_error = tx4938ide_inb(io_ports->feature_addr); if (cmd->tf_flags & IDE_TFLAG_IN_HOB_NSECT) - tf->hob_nsect = tx4938ide_inb(io_ports->nsect_addr); + tf->hob_nsect = tx4938ide_inb(io_ports->nsect_addr); if (cmd->tf_flags & IDE_TFLAG_IN_HOB_LBAL) - tf->hob_lbal = tx4938ide_inb(io_ports->lbal_addr); + tf->hob_lbal = tx4938ide_inb(io_ports->lbal_addr); if (cmd->tf_flags & IDE_TFLAG_IN_HOB_LBAM) - tf->hob_lbam = tx4938ide_inb(io_ports->lbam_addr); + tf->hob_lbam = tx4938ide_inb(io_ports->lbam_addr); if (cmd->tf_flags & IDE_TFLAG_IN_HOB_LBAH) - tf->hob_lbah = tx4938ide_inb(io_ports->lbah_addr); + tf->hob_lbah = tx4938ide_inb(io_ports->lbah_addr); } } diff --git a/drivers/ide/tx4939ide.c b/drivers/ide/tx4939ide.c index 43bc0372413a..f1e9da71110c 100644 --- a/drivers/ide/tx4939ide.c +++ b/drivers/ide/tx4939ide.c @@ -511,8 +511,8 @@ static void tx4939ide_tf_read(ide_drive_t *drive, struct ide_cmd *cmd) /* be sure we're looking at the low order bits */ tx4939ide_outb(ATA_DEVCTL_OBS, io_ports->ctl_addr); - if (cmd->tf_flags & IDE_TFLAG_IN_FEATURE) - tf->feature = tx4939ide_inb(io_ports->feature_addr); + if (cmd->tf_flags & IDE_TFLAG_IN_ERROR) + tf->error = tx4939ide_inb(io_ports->feature_addr); if (cmd->tf_flags & IDE_TFLAG_IN_NSECT) tf->nsect = tx4939ide_inb(io_ports->nsect_addr); if (cmd->tf_flags & IDE_TFLAG_IN_LBAL) @@ -527,17 +527,16 @@ static void tx4939ide_tf_read(ide_drive_t *drive, struct ide_cmd *cmd) if (cmd->tf_flags & IDE_TFLAG_LBA48) { tx4939ide_outb(ATA_HOB | ATA_DEVCTL_OBS, io_ports->ctl_addr); - if (cmd->tf_flags & IDE_TFLAG_IN_HOB_FEATURE) - tf->hob_feature = - tx4939ide_inb(io_ports->feature_addr); + if (cmd->tf_flags & IDE_TFLAG_IN_HOB_ERROR) + tf->hob_error = tx4939ide_inb(io_ports->feature_addr); if (cmd->tf_flags & IDE_TFLAG_IN_HOB_NSECT) - tf->hob_nsect = tx4939ide_inb(io_ports->nsect_addr); + tf->hob_nsect = tx4939ide_inb(io_ports->nsect_addr); if (cmd->tf_flags & IDE_TFLAG_IN_HOB_LBAL) - tf->hob_lbal = tx4939ide_inb(io_ports->lbal_addr); + tf->hob_lbal = tx4939ide_inb(io_ports->lbal_addr); if (cmd->tf_flags & IDE_TFLAG_IN_HOB_LBAM) - tf->hob_lbam = tx4939ide_inb(io_ports->lbam_addr); + tf->hob_lbam = tx4939ide_inb(io_ports->lbam_addr); if (cmd->tf_flags & IDE_TFLAG_IN_HOB_LBAH) - tf->hob_lbah = tx4939ide_inb(io_ports->lbah_addr); + tf->hob_lbah = tx4939ide_inb(io_ports->lbah_addr); } } diff --git a/include/linux/ide.h b/include/linux/ide.h index ccb70abe991b..e919c865f0c7 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -265,7 +265,7 @@ enum { IDE_TFLAG_WRITE = (1 << 12), IDE_TFLAG_CUSTOM_HANDLER = (1 << 13), IDE_TFLAG_DMA_PIO_FALLBACK = (1 << 14), - IDE_TFLAG_IN_HOB_FEATURE = (1 << 15), + IDE_TFLAG_IN_HOB_ERROR = (1 << 15), IDE_TFLAG_IN_HOB_NSECT = (1 << 16), IDE_TFLAG_IN_HOB_LBAL = (1 << 17), IDE_TFLAG_IN_HOB_LBAM = (1 << 18), @@ -273,10 +273,10 @@ enum { IDE_TFLAG_IN_HOB_LBA = IDE_TFLAG_IN_HOB_LBAL | IDE_TFLAG_IN_HOB_LBAM | IDE_TFLAG_IN_HOB_LBAH, - IDE_TFLAG_IN_HOB = IDE_TFLAG_IN_HOB_FEATURE | + IDE_TFLAG_IN_HOB = IDE_TFLAG_IN_HOB_ERROR | IDE_TFLAG_IN_HOB_NSECT | IDE_TFLAG_IN_HOB_LBA, - IDE_TFLAG_IN_FEATURE = (1 << 20), + IDE_TFLAG_IN_ERROR = (1 << 20), IDE_TFLAG_IN_NSECT = (1 << 21), IDE_TFLAG_IN_LBAL = (1 << 22), IDE_TFLAG_IN_LBAM = (1 << 23), @@ -310,8 +310,12 @@ enum { struct ide_taskfile { u8 hob_data; /* 0: high data byte (for TASKFILE IOCTL) */ + /* 1-5: additional data to support LBA48 */ + union { + u8 hob_error; /* read: error */ + u8 hob_feature; /* write: feature */ + }; - u8 hob_feature; /* 1-5: additional data to support LBA48 */ u8 hob_nsect; u8 hob_lbal; u8 hob_lbam; -- cgit v1.2.3 From abb596b25edac1ec1acc4ef53df190771661c3d2 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Tue, 31 Mar 2009 20:15:32 +0200 Subject: ide: turn selectproc() method into dev_select() method (take 5) Turn selectproc() method into dev_select() method by teaching it to write to the device register and moving it from 'struct ide_port_ops' to 'struct ide_tp_ops'. Signed-off-by: Sergei Shtylyov Cc: benh@kernel.crashing.org Cc: petkovbb@gmail.com [bart: add ->dev_select to at91_ide.c and tx4939.c (__BIG_ENDIAN case)] Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/at91_ide.c | 1 + drivers/ide/au1xxx-ide.c | 1 + drivers/ide/falconide.c | 1 + drivers/ide/ht6560b.c | 20 +++++++++++++++-- drivers/ide/ide-h8300.c | 1 + drivers/ide/ide-io-std.c | 13 +++++++++++ drivers/ide/ide-iops.c | 12 +--------- drivers/ide/ns87415.c | 25 ++++++++++++++++----- drivers/ide/pmac.c | 58 ++++++++++++++++++++++++++++++++---------------- drivers/ide/q40ide.c | 1 + drivers/ide/qd65xx.c | 21 +++++++++++++++--- drivers/ide/scc_pata.c | 1 + drivers/ide/sgiioc4.c | 1 + drivers/ide/trm290.c | 20 +++++++++++++---- drivers/ide/tx4938ide.c | 1 + drivers/ide/tx4939ide.c | 4 +++- include/linux/ide.h | 6 ++--- 17 files changed, 139 insertions(+), 48 deletions(-) (limited to 'include/linux') diff --git a/drivers/ide/at91_ide.c b/drivers/ide/at91_ide.c index 04b39ff02d76..8eda552326e9 100644 --- a/drivers/ide/at91_ide.c +++ b/drivers/ide/at91_ide.c @@ -283,6 +283,7 @@ static const struct ide_tp_ops at91_ide_tp_ops = { .read_altstatus = ide_read_altstatus, .write_devctl = ide_write_devctl, + .dev_select = ide_dev_select, .tf_load = at91_ide_tf_load, .tf_read = at91_ide_tf_read, diff --git a/drivers/ide/au1xxx-ide.c b/drivers/ide/au1xxx-ide.c index 2ca10d533dad..46013644c965 100644 --- a/drivers/ide/au1xxx-ide.c +++ b/drivers/ide/au1xxx-ide.c @@ -469,6 +469,7 @@ static const struct ide_tp_ops au1xxx_tp_ops = { .read_altstatus = ide_read_altstatus, .write_devctl = ide_write_devctl, + .dev_select = ide_dev_select, .tf_load = ide_tf_load, .tf_read = ide_tf_read, diff --git a/drivers/ide/falconide.c b/drivers/ide/falconide.c index 5063be85dc33..afa2af9a362b 100644 --- a/drivers/ide/falconide.c +++ b/drivers/ide/falconide.c @@ -91,6 +91,7 @@ static const struct ide_tp_ops falconide_tp_ops = { .read_altstatus = ide_read_altstatus, .write_devctl = ide_write_devctl, + .dev_select = ide_dev_select, .tf_load = ide_tf_load, .tf_read = ide_tf_read, diff --git a/drivers/ide/ht6560b.c b/drivers/ide/ht6560b.c index c7e5c2246b79..2fb0f2965009 100644 --- a/drivers/ide/ht6560b.c +++ b/drivers/ide/ht6560b.c @@ -103,7 +103,7 @@ /* * This routine is invoked from ide.c to prepare for access to a given drive. */ -static void ht6560b_selectproc (ide_drive_t *drive) +static void ht6560b_dev_select(ide_drive_t *drive) { ide_hwif_t *hwif = drive->hwif; unsigned long flags; @@ -143,6 +143,8 @@ static void ht6560b_selectproc (ide_drive_t *drive) #endif } local_irq_restore(flags); + + outb(drive->select | ATA_DEVICE_OBS, hwif->io_ports.device_addr); } /* @@ -305,15 +307,29 @@ static int probe_ht6560b; module_param_named(probe, probe_ht6560b, bool, 0); MODULE_PARM_DESC(probe, "probe for HT6560B chipset"); +static const struct ide_tp_ops ht6560b_tp_ops = { + .exec_command = ide_exec_command, + .read_status = ide_read_status, + .read_altstatus = ide_read_altstatus, + .write_devctl = ide_write_devctl, + + .dev_select = ht6560b_dev_select, + .tf_load = ide_tf_load, + .tf_read = ide_tf_read, + + .input_data = ide_input_data, + .output_data = ide_output_data, +}; + static const struct ide_port_ops ht6560b_port_ops = { .init_dev = ht6560b_init_dev, .set_pio_mode = ht6560b_set_pio_mode, - .selectproc = ht6560b_selectproc, }; static const struct ide_port_info ht6560b_port_info __initdata = { .name = DRV_NAME, .chipset = ide_ht6560b, + .tp_ops = &ht6560b_tp_ops, .port_ops = &ht6560b_port_ops, .host_flags = IDE_HFLAG_SERIALIZE | /* is this needed? */ IDE_HFLAG_NO_DMA | diff --git a/drivers/ide/ide-h8300.c b/drivers/ide/ide-h8300.c index 8541a9abd7ac..dac9a6d44963 100644 --- a/drivers/ide/ide-h8300.c +++ b/drivers/ide/ide-h8300.c @@ -151,6 +151,7 @@ static const struct ide_tp_ops h8300_tp_ops = { .read_altstatus = ide_read_altstatus, .write_devctl = ide_write_devctl, + .dev_select = ide_dev_select, .tf_load = h8300_tf_load, .tf_read = h8300_tf_read, diff --git a/drivers/ide/ide-io-std.c b/drivers/ide/ide-io-std.c index 7f77bb7db488..9cac281d82c4 100644 --- a/drivers/ide/ide-io-std.c +++ b/drivers/ide/ide-io-std.c @@ -73,6 +73,18 @@ void ide_write_devctl(ide_hwif_t *hwif, u8 ctl) } EXPORT_SYMBOL_GPL(ide_write_devctl); +void ide_dev_select(ide_drive_t *drive) +{ + ide_hwif_t *hwif = drive->hwif; + u8 select = drive->select | ATA_DEVICE_OBS; + + if (hwif->host_flags & IDE_HFLAG_MMIO) + writeb(select, (void __iomem *)hwif->io_ports.device_addr); + else + outb(select, hwif->io_ports.device_addr); +} +EXPORT_SYMBOL_GPL(ide_dev_select); + void ide_tf_load(ide_drive_t *drive, struct ide_cmd *cmd) { ide_hwif_t *hwif = drive->hwif; @@ -280,6 +292,7 @@ const struct ide_tp_ops default_tp_ops = { .read_altstatus = ide_read_altstatus, .write_devctl = ide_write_devctl, + .dev_select = ide_dev_select, .tf_load = ide_tf_load, .tf_read = ide_tf_read, diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c index 6f363a26700d..dfb0ec317fa3 100644 --- a/drivers/ide/ide-iops.c +++ b/drivers/ide/ide-iops.c @@ -29,17 +29,7 @@ void SELECT_DRIVE(ide_drive_t *drive) { - ide_hwif_t *hwif = drive->hwif; - const struct ide_port_ops *port_ops = hwif->port_ops; - struct ide_cmd cmd; - - if (port_ops && port_ops->selectproc) - port_ops->selectproc(drive); - - memset(&cmd, 0, sizeof(cmd)); - cmd.tf_flags = IDE_TFLAG_OUT_DEVICE; - - drive->hwif->tp_ops->tf_load(drive, &cmd); + drive->hwif->tp_ops->dev_select(drive); } void SELECT_MASK(ide_drive_t *drive, int mask) diff --git a/drivers/ide/ns87415.c b/drivers/ide/ns87415.c index 9f6dff83b141..af1b421eb450 100644 --- a/drivers/ide/ns87415.c +++ b/drivers/ide/ns87415.c @@ -98,12 +98,15 @@ static void superio_tf_read(ide_drive_t *drive, struct ide_cmd *cmd) } } +static void ns87415_dev_select(ide_drive_t *drive); + static const struct ide_tp_ops superio_tp_ops = { .exec_command = ide_exec_command, .read_status = superio_read_status, .read_altstatus = ide_read_altstatus, .write_devctl = ide_write_devctl, + .dev_select = ns87415_dev_select, .tf_load = ide_tf_load, .tf_read = superio_tf_read, @@ -182,10 +185,12 @@ static void ns87415_prepare_drive (ide_drive_t *drive, unsigned int use_dma) local_irq_restore(flags); } -static void ns87415_selectproc (ide_drive_t *drive) +static void ns87415_dev_select(ide_drive_t *drive) { ns87415_prepare_drive(drive, !!(drive->dev_flags & IDE_DFLAG_USING_DMA)); + + outb(drive->select | ATA_DEVICE_OBS, drive->hwif->io_ports.device_addr); } static void ns87415_dma_start(ide_drive_t *drive) @@ -229,7 +234,7 @@ static void __devinit init_hwif_ns87415 (ide_hwif_t *hwif) * Also, leave IRQ masked during drive probing, to prevent infinite * interrupts from a potentially floating INTA.. * - * IRQs get unmasked in selectproc when drive is first used. + * IRQs get unmasked in dev_select() when drive is first used. */ (void) pci_read_config_dword(dev, 0x40, &ctrl); (void) pci_read_config_byte(dev, 0x09, &progif); @@ -281,8 +286,18 @@ static void __devinit init_hwif_ns87415 (ide_hwif_t *hwif) outb(0x60, hwif->dma_base + ATA_DMA_STATUS); } -static const struct ide_port_ops ns87415_port_ops = { - .selectproc = ns87415_selectproc, +static const struct ide_tp_ops ns87415_tp_ops = { + .exec_command = ide_exec_command, + .read_status = ide_read_status, + .read_altstatus = ide_read_altstatus, + .write_devctl = ide_write_devctl, + + .dev_select = ns87415_dev_select, + .tf_load = ide_tf_load, + .tf_read = ide_tf_read, + + .input_data = ide_input_data, + .output_data = ide_output_data, }; static const struct ide_dma_ops ns87415_dma_ops = { @@ -299,7 +314,7 @@ static const struct ide_dma_ops ns87415_dma_ops = { static const struct ide_port_info ns87415_chipset __devinitdata = { .name = DRV_NAME, .init_hwif = init_hwif_ns87415, - .port_ops = &ns87415_port_ops, + .tp_ops = &ns87415_tp_ops, .dma_ops = &ns87415_dma_ops, .host_flags = IDE_HFLAG_TRUST_BIOS_FOR_DMA | IDE_HFLAG_NO_ATAPI_DMA, diff --git a/drivers/ide/pmac.c b/drivers/ide/pmac.c index 7aa45ea37eeb..24ce1f805cd7 100644 --- a/drivers/ide/pmac.c +++ b/drivers/ide/pmac.c @@ -404,8 +404,6 @@ kauai_lookup_timing(struct kauai_timing* table, int cycle_time) #define IDE_WAKEUP_DELAY (1*HZ) static int pmac_ide_init_dma(ide_hwif_t *, const struct ide_port_info *); -static void pmac_ide_selectproc(ide_drive_t *drive); -static void pmac_ide_kauai_selectproc(ide_drive_t *drive); #define PMAC_IDE_REG(x) \ ((void __iomem *)((drive)->hwif->io_ports.data_addr + (x))) @@ -415,8 +413,7 @@ static void pmac_ide_kauai_selectproc(ide_drive_t *drive); * timing register when selecting that unit. This version is for * ASICs with a single timing register */ -static void -pmac_ide_selectproc(ide_drive_t *drive) +static void pmac_ide_apply_timings(ide_drive_t *drive) { ide_hwif_t *hwif = drive->hwif; pmac_ide_hwif_t *pmif = @@ -434,8 +431,7 @@ pmac_ide_selectproc(ide_drive_t *drive) * timing register when selecting that unit. This version is for * ASICs with a dual timing register (Kauai) */ -static void -pmac_ide_kauai_selectproc(ide_drive_t *drive) +static void pmac_ide_kauai_apply_timings(ide_drive_t *drive) { ide_hwif_t *hwif = drive->hwif; pmac_ide_hwif_t *pmif = @@ -464,9 +460,25 @@ pmac_ide_do_update_timings(ide_drive_t *drive) if (pmif->kind == controller_sh_ata6 || pmif->kind == controller_un_ata6 || pmif->kind == controller_k2_ata6) - pmac_ide_kauai_selectproc(drive); + pmac_ide_kauai_apply_timings(drive); else - pmac_ide_selectproc(drive); + pmac_ide_apply_timings(drive); +} + +static void pmac_dev_select(ide_drive_t *drive) +{ + pmac_ide_apply_timings(drive); + + writeb(drive->select | ATA_DEVICE_OBS, + (void __iomem *)drive->hwif->io_ports.device_addr); +} + +static void pmac_kauai_dev_select(ide_drive_t *drive) +{ + pmac_ide_kauai_apply_timings(drive); + + writeb(drive->select | ATA_DEVICE_OBS, + (void __iomem *)drive->hwif->io_ports.device_addr); } static void pmac_exec_command(ide_hwif_t *hwif, u8 cmd) @@ -947,6 +959,7 @@ static const struct ide_tp_ops pmac_tp_ops = { .read_altstatus = ide_read_altstatus, .write_devctl = pmac_write_devctl, + .dev_select = pmac_dev_select, .tf_load = ide_tf_load, .tf_read = ide_tf_read, @@ -954,19 +967,24 @@ static const struct ide_tp_ops pmac_tp_ops = { .output_data = ide_output_data, }; -static const struct ide_port_ops pmac_ide_ata6_port_ops = { - .init_dev = pmac_ide_init_dev, - .set_pio_mode = pmac_ide_set_pio_mode, - .set_dma_mode = pmac_ide_set_dma_mode, - .selectproc = pmac_ide_kauai_selectproc, - .cable_detect = pmac_ide_cable_detect, +static const struct ide_tp_ops pmac_ata6_tp_ops = { + .exec_command = pmac_exec_command, + .read_status = ide_read_status, + .read_altstatus = ide_read_altstatus, + .write_devctl = pmac_write_devctl, + + .dev_select = pmac_kauai_dev_select, + .tf_load = ide_tf_load, + .tf_read = ide_tf_read, + + .input_data = ide_input_data, + .output_data = ide_output_data, }; static const struct ide_port_ops pmac_ide_ata4_port_ops = { .init_dev = pmac_ide_init_dev, .set_pio_mode = pmac_ide_set_pio_mode, .set_dma_mode = pmac_ide_set_dma_mode, - .selectproc = pmac_ide_selectproc, .cable_detect = pmac_ide_cable_detect, }; @@ -974,7 +992,6 @@ static const struct ide_port_ops pmac_ide_port_ops = { .init_dev = pmac_ide_init_dev, .set_pio_mode = pmac_ide_set_pio_mode, .set_dma_mode = pmac_ide_set_dma_mode, - .selectproc = pmac_ide_selectproc, }; static const struct ide_dma_ops pmac_dma_ops; @@ -1011,15 +1028,18 @@ static int __devinit pmac_ide_setup_device(pmac_ide_hwif_t *pmif, hw_regs_t *hw) pmif->broken_dma = pmif->broken_dma_warn = 0; if (of_device_is_compatible(np, "shasta-ata")) { pmif->kind = controller_sh_ata6; - d.port_ops = &pmac_ide_ata6_port_ops; + d.tp_ops = &pmac_ata6_tp_ops; + d.port_ops = &pmac_ide_ata4_port_ops; d.udma_mask = ATA_UDMA6; } else if (of_device_is_compatible(np, "kauai-ata")) { pmif->kind = controller_un_ata6; - d.port_ops = &pmac_ide_ata6_port_ops; + d.tp_ops = &pmac_ata6_tp_ops; + d.port_ops = &pmac_ide_ata4_port_ops; d.udma_mask = ATA_UDMA5; } else if (of_device_is_compatible(np, "K2-UATA")) { pmif->kind = controller_k2_ata6; - d.port_ops = &pmac_ide_ata6_port_ops; + d.tp_ops = &pmac_ata6_tp_ops; + d.port_ops = &pmac_ide_ata4_port_ops; d.udma_mask = ATA_UDMA5; } else if (of_device_is_compatible(np, "keylargo-ata")) { if (strcmp(np->name, "ata-4") == 0) { diff --git a/drivers/ide/q40ide.c b/drivers/ide/q40ide.c index 7fddfd34fcce..d007e7f66598 100644 --- a/drivers/ide/q40ide.c +++ b/drivers/ide/q40ide.c @@ -101,6 +101,7 @@ static const struct ide_tp_ops q40ide_tp_ops = { .read_altstatus = ide_read_altstatus, .write_devctl = ide_write_devctl, + .dev_select = ide_dev_select, .tf_load = ide_tf_load, .tf_read = ide_tf_read, diff --git a/drivers/ide/qd65xx.c b/drivers/ide/qd65xx.c index 08c4fa35e9b1..c9a134986891 100644 --- a/drivers/ide/qd65xx.c +++ b/drivers/ide/qd65xx.c @@ -90,13 +90,15 @@ static int timings[4]={-1,-1,-1,-1}; /* stores current timing for each timer */ * This routine is invoked to prepare for access to a given drive. */ -static void qd65xx_select(ide_drive_t *drive) +static void qd65xx_dev_select(ide_drive_t *drive) { u8 index = (( (QD_TIMREG(drive)) & 0x80 ) >> 7) | (QD_TIMREG(drive) & 0x02); if (timings[index] != QD_TIMING(drive)) outb(timings[index] = QD_TIMING(drive), QD_TIMREG(drive)); + + outb(drive->select | ATA_DEVICE_OBS, drive->hwif->io_ports.device_addr); } /* @@ -309,20 +311,33 @@ static void __init qd6580_init_dev(ide_drive_t *drive) drive->drive_data = (drive->dn & 1) ? t2 : t1; } +static const struct ide_tp_ops qd65xx_tp_ops = { + .exec_command = ide_exec_command, + .read_status = ide_read_status, + .read_altstatus = ide_read_altstatus, + .write_devctl = ide_write_devctl, + + .dev_select = qd65xx_dev_select, + .tf_load = ide_tf_load, + .tf_read = ide_tf_read, + + .input_data = ide_input_data, + .output_data = ide_output_data, +}; + static const struct ide_port_ops qd6500_port_ops = { .init_dev = qd6500_init_dev, .set_pio_mode = qd6500_set_pio_mode, - .selectproc = qd65xx_select, }; static const struct ide_port_ops qd6580_port_ops = { .init_dev = qd6580_init_dev, .set_pio_mode = qd6580_set_pio_mode, - .selectproc = qd65xx_select, }; static const struct ide_port_info qd65xx_port_info __initdata = { .name = DRV_NAME, + .tp_ops = &qd65xx_tp_ops, .chipset = ide_qd65xx, .host_flags = IDE_HFLAG_IO_32BIT | IDE_HFLAG_NO_DMA, diff --git a/drivers/ide/scc_pata.c b/drivers/ide/scc_pata.c index 97f8e0ef21b1..6d8dbd9c10bc 100644 --- a/drivers/ide/scc_pata.c +++ b/drivers/ide/scc_pata.c @@ -825,6 +825,7 @@ static const struct ide_tp_ops scc_tp_ops = { .read_altstatus = scc_read_altstatus, .write_devctl = scc_write_devctl, + .dev_select = ide_dev_select, .tf_load = scc_tf_load, .tf_read = scc_tf_read, diff --git a/drivers/ide/sgiioc4.c b/drivers/ide/sgiioc4.c index 58980fcafc3b..e5d2a48a84de 100644 --- a/drivers/ide/sgiioc4.c +++ b/drivers/ide/sgiioc4.c @@ -505,6 +505,7 @@ static const struct ide_tp_ops sgiioc4_tp_ops = { .read_altstatus = ide_read_altstatus, .write_devctl = ide_write_devctl, + .dev_select = ide_dev_select, .tf_load = ide_tf_load, .tf_read = ide_tf_read, diff --git a/drivers/ide/trm290.c b/drivers/ide/trm290.c index c0528f27fcae..4b42ca091534 100644 --- a/drivers/ide/trm290.c +++ b/drivers/ide/trm290.c @@ -171,9 +171,11 @@ static void trm290_prepare_drive (ide_drive_t *drive, unsigned int use_dma) local_irq_restore(flags); } -static void trm290_selectproc (ide_drive_t *drive) +static void trm290_dev_select(ide_drive_t *drive) { trm290_prepare_drive(drive, !!(drive->dev_flags & IDE_DFLAG_USING_DMA)); + + outb(drive->select | ATA_DEVICE_OBS, drive->hwif->io_ports.device_addr); } static int trm290_dma_check(ide_drive_t *drive, struct ide_cmd *cmd) @@ -298,8 +300,18 @@ static void __devinit init_hwif_trm290(ide_hwif_t *hwif) #endif } -static const struct ide_port_ops trm290_port_ops = { - .selectproc = trm290_selectproc, +static const struct ide_tp_ops trm290_tp_ops = { + .exec_command = ide_exec_command, + .read_status = ide_read_status, + .read_altstatus = ide_read_altstatus, + .write_devctl = ide_write_devctl, + + .dev_select = trm290_dev_select, + .tf_load = ide_tf_load, + .tf_read = ide_tf_read, + + .input_data = ide_input_data, + .output_data = ide_output_data, }; static struct ide_dma_ops trm290_dma_ops = { @@ -315,7 +327,7 @@ static struct ide_dma_ops trm290_dma_ops = { static const struct ide_port_info trm290_chipset __devinitdata = { .name = DRV_NAME, .init_hwif = init_hwif_trm290, - .port_ops = &trm290_port_ops, + .tp_ops = &trm290_tp_ops, .dma_ops = &trm290_dma_ops, .host_flags = IDE_HFLAG_TRM290 | IDE_HFLAG_NO_ATAPI_DMA | diff --git a/drivers/ide/tx4938ide.c b/drivers/ide/tx4938ide.c index be391b615963..4cb79c4c2604 100644 --- a/drivers/ide/tx4938ide.c +++ b/drivers/ide/tx4938ide.c @@ -189,6 +189,7 @@ static const struct ide_tp_ops tx4938ide_tp_ops = { .read_altstatus = ide_read_altstatus, .write_devctl = ide_write_devctl, + .dev_select = ide_dev_select, .tf_load = tx4938ide_tf_load, .tf_read = tx4938ide_tf_read, diff --git a/drivers/ide/tx4939ide.c b/drivers/ide/tx4939ide.c index 5a614d1c94f1..0040a9a3e26e 100644 --- a/drivers/ide/tx4939ide.c +++ b/drivers/ide/tx4939ide.c @@ -429,7 +429,7 @@ static void tx4939ide_tf_load_fixup(ide_drive_t *drive) * Fix ATA100 CORE System Control Register. (The write to the * Device/Head register may write wrong data to the System * Control Register) - * While Sys_Ctl is written here, selectproc is not needed. + * While Sys_Ctl is written here, dev_select() is not needed. */ tx4939ide_writew(sysctl, base, TX4939IDE_Sys_Ctl); } @@ -556,6 +556,7 @@ static const struct ide_tp_ops tx4939ide_tp_ops = { .read_altstatus = ide_read_altstatus, .write_devctl = ide_write_devctl, + .dev_select = ide_dev_select, .tf_load = tx4939ide_tf_load, .tf_read = tx4939ide_tf_read, @@ -579,6 +580,7 @@ static const struct ide_tp_ops tx4939ide_tp_ops = { .read_altstatus = ide_read_altstatus, .write_devctl = ide_write_devctl, + .dev_select = ide_dev_select, .tf_load = tx4939ide_tf_load, .tf_read = ide_tf_read, diff --git a/include/linux/ide.h b/include/linux/ide.h index e919c865f0c7..c69181c61fd8 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -603,7 +603,7 @@ struct ide_drive_s { unsigned int bios_cyl; /* BIOS/fdisk/LILO number of cyls */ unsigned int cyl; /* "real" number of cyls */ - unsigned int drive_data; /* used by set_pio_mode/selectproc */ + unsigned int drive_data; /* used by set_pio_mode/dev_select() */ unsigned int failures; /* current failure count */ unsigned int max_failures; /* maximum allowed failure count */ u64 probed_capacity;/* initial reported media capacity (ide-cd only currently) */ @@ -661,6 +661,7 @@ struct ide_tp_ops { u8 (*read_altstatus)(struct hwif_s *); void (*write_devctl)(struct hwif_s *, u8); + void (*dev_select)(ide_drive_t *); void (*tf_load)(ide_drive_t *, struct ide_cmd *); void (*tf_read)(ide_drive_t *, struct ide_cmd *); @@ -678,7 +679,6 @@ extern const struct ide_tp_ops default_tp_ops; * @init_dev: host specific initialization of a device * @set_pio_mode: routine to program host for PIO mode * @set_dma_mode: routine to program host for DMA mode - * @selectproc: tweaks hardware to select drive * @reset_poll: chipset polling based on hba specifics * @pre_reset: chipset specific changes to default for device-hba resets * @resetproc: routine to reset controller after a disk reset @@ -695,7 +695,6 @@ struct ide_port_ops { void (*init_dev)(ide_drive_t *); void (*set_pio_mode)(ide_drive_t *, const u8); void (*set_dma_mode)(ide_drive_t *, const u8); - void (*selectproc)(ide_drive_t *); int (*reset_poll)(ide_drive_t *); void (*pre_reset)(ide_drive_t *); void (*resetproc)(ide_drive_t *); @@ -1170,6 +1169,7 @@ u8 ide_read_status(ide_hwif_t *); u8 ide_read_altstatus(ide_hwif_t *); void ide_write_devctl(ide_hwif_t *, u8); +void ide_dev_select(ide_drive_t *); void ide_tf_load(ide_drive_t *, struct ide_cmd *); void ide_tf_read(ide_drive_t *, struct ide_cmd *); -- cgit v1.2.3 From fdd88f0af616db59a6a36bdf0185181d2b779f53 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Tue, 31 Mar 2009 20:15:33 +0200 Subject: ide: inline SELECT_DRIVE() Since SELECT_DRIVE() has boiled down to a mere dev_select() method call, it now makes sense to just inline it... Signed-off-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/ide-eh.c | 7 ++++--- drivers/ide/ide-io.c | 2 +- drivers/ide/ide-iops.c | 7 +------ drivers/ide/ide-pm.c | 5 +++-- drivers/ide/ide-probe.c | 15 ++++++++------- drivers/ide/ns87415.c | 2 +- include/linux/ide.h | 1 - 7 files changed, 18 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/drivers/ide/ide-eh.c b/drivers/ide/ide-eh.c index de4b7f1c9c9f..5d5fb961b5ce 100644 --- a/drivers/ide/ide-eh.c +++ b/drivers/ide/ide-eh.c @@ -165,11 +165,12 @@ static ide_startstop_t do_reset1(ide_drive_t *, int); static ide_startstop_t atapi_reset_pollfunc(ide_drive_t *drive) { ide_hwif_t *hwif = drive->hwif; + const struct ide_tp_ops *tp_ops = hwif->tp_ops; u8 stat; - SELECT_DRIVE(drive); + tp_ops->dev_select(drive); udelay(10); - stat = hwif->tp_ops->read_status(hwif); + stat = tp_ops->read_status(hwif); if (OK_STAT(stat, 0, ATA_BUSY)) printk(KERN_INFO "%s: ATAPI reset complete\n", drive->name); @@ -348,7 +349,7 @@ static ide_startstop_t do_reset1(ide_drive_t *drive, int do_not_try_atapi) /* For an ATAPI device, first try an ATAPI SRST. */ if (drive->media != ide_disk && !do_not_try_atapi) { pre_reset(drive); - SELECT_DRIVE(drive); + tp_ops->dev_select(drive); udelay(20); tp_ops->exec_command(hwif, ATA_CMD_DEV_RESET); ndelay(400); diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index 5589dce88674..1deb6d29b186 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -348,7 +348,7 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq) if (blk_pm_request(rq)) ide_check_pm_state(drive, rq); - SELECT_DRIVE(drive); + drive->hwif->tp_ops->dev_select(drive); if (ide_wait_stat(&startstop, drive, drive->ready_stat, ATA_BUSY | ATA_DRQ, WAIT_READY)) { printk(KERN_ERR "%s: drive not ready for command\n", drive->name); diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c index dfb0ec317fa3..27bb70ddd459 100644 --- a/drivers/ide/ide-iops.c +++ b/drivers/ide/ide-iops.c @@ -27,11 +27,6 @@ #include #include -void SELECT_DRIVE(ide_drive_t *drive) -{ - drive->hwif->tp_ops->dev_select(drive); -} - void SELECT_MASK(ide_drive_t *drive, int mask) { const struct ide_port_ops *port_ops = drive->hwif->port_ops; @@ -347,7 +342,7 @@ int ide_config_drive_speed(ide_drive_t *drive, u8 speed) disable_irq_nosync(hwif->irq); udelay(1); - SELECT_DRIVE(drive); + tp_ops->dev_select(drive); SELECT_MASK(drive, 1); udelay(1); tp_ops->write_devctl(hwif, ATA_NIEN | ATA_DEVCTL_OBS); diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c index 20553d4c42a2..bb7858ebb7d1 100644 --- a/drivers/ide/ide-pm.c +++ b/drivers/ide/ide-pm.c @@ -223,6 +223,7 @@ void ide_check_pm_state(ide_drive_t *drive, struct request *rq) * point. */ ide_hwif_t *hwif = drive->hwif; + const struct ide_tp_ops *tp_ops = hwif->tp_ops; struct request_queue *q = drive->queue; unsigned long flags; int rc; @@ -232,8 +233,8 @@ void ide_check_pm_state(ide_drive_t *drive, struct request *rq) rc = ide_wait_not_busy(hwif, 35000); if (rc) printk(KERN_WARNING "%s: bus not ready on wakeup\n", drive->name); - SELECT_DRIVE(drive); - hwif->tp_ops->write_devctl(hwif, ATA_DEVCTL_OBS); + tp_ops->dev_select(drive); + tp_ops->write_devctl(hwif, ATA_DEVCTL_OBS); rc = ide_wait_not_busy(hwif, 100000); if (rc) printk(KERN_WARNING "%s: drive not ready on wakeup\n", drive->name); diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index d240f76b0da6..d8c1c3e735bb 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -390,13 +390,13 @@ static int do_probe (ide_drive_t *drive, u8 cmd) * (e.g. crw9624 as drive0 with disk as slave) */ msleep(50); - SELECT_DRIVE(drive); + tp_ops->dev_select(drive); msleep(50); if (ide_read_device(drive) != drive->select && present == 0) { if (drive->dn & 1) { /* exit with drive0 selected */ - SELECT_DRIVE(hwif->devices[0]); + tp_ops->dev_select(hwif->devices[0]); /* allow ATA_BUSY to assert & clear */ msleep(50); } @@ -422,7 +422,7 @@ static int do_probe (ide_drive_t *drive, u8 cmd) printk(KERN_ERR "%s: no response (status = 0x%02x), " "resetting drive\n", drive->name, stat); msleep(50); - SELECT_DRIVE(drive); + tp_ops->dev_select(drive); msleep(50); tp_ops->exec_command(hwif, ATA_CMD_DEV_RESET); (void)ide_busy_sleep(hwif, WAIT_WORSTCASE, 0); @@ -441,7 +441,7 @@ static int do_probe (ide_drive_t *drive, u8 cmd) } if (drive->dn & 1) { /* exit with drive0 selected */ - SELECT_DRIVE(hwif->devices[0]); + tp_ops->dev_select(hwif->devices[0]); msleep(50); /* ensure drive irq is clear */ (void)tp_ops->read_status(hwif); @@ -605,6 +605,7 @@ out: static int ide_port_wait_ready(ide_hwif_t *hwif) { + const struct ide_tp_ops *tp_ops = hwif->tp_ops; ide_drive_t *drive; int i, rc; @@ -627,8 +628,8 @@ static int ide_port_wait_ready(ide_hwif_t *hwif) /* Ignore disks that we will not probe for later. */ if ((drive->dev_flags & IDE_DFLAG_NOPROBE) == 0 || (drive->dev_flags & IDE_DFLAG_PRESENT)) { - SELECT_DRIVE(drive); - hwif->tp_ops->write_devctl(hwif, ATA_DEVCTL_OBS); + tp_ops->dev_select(drive); + tp_ops->write_devctl(hwif, ATA_DEVCTL_OBS); mdelay(2); rc = ide_wait_not_busy(hwif, 35000); if (rc) @@ -640,7 +641,7 @@ static int ide_port_wait_ready(ide_hwif_t *hwif) out: /* Exit function with master reselected (let's be sane) */ if (i) - SELECT_DRIVE(hwif->devices[0]); + tp_ops->dev_select(hwif->devices[0]); return rc; } diff --git a/drivers/ide/ns87415.c b/drivers/ide/ns87415.c index af1b421eb450..71a39fb3856f 100644 --- a/drivers/ide/ns87415.c +++ b/drivers/ide/ns87415.c @@ -262,7 +262,7 @@ static void __devinit init_hwif_ns87415 (ide_hwif_t *hwif) #ifdef __sparc_v9__ /* * XXX: Reset the device, if we don't it will not respond to - * SELECT_DRIVE() properly during first ide_probe_port(). + * dev_select() properly during first ide_probe_port(). */ timeout = 10000; outb(12, hwif->io_ports.ctl_addr); diff --git a/include/linux/ide.h b/include/linux/ide.h index c69181c61fd8..a5d26f66ef78 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1176,7 +1176,6 @@ void ide_tf_read(ide_drive_t *, struct ide_cmd *); void ide_input_data(ide_drive_t *, struct ide_cmd *, void *, unsigned int); void ide_output_data(ide_drive_t *, struct ide_cmd *, void *, unsigned int); -extern void SELECT_DRIVE(ide_drive_t *); void SELECT_MASK(ide_drive_t *, int); u8 ide_read_error(ide_drive_t *); -- cgit v1.2.3 From ae149b6bec64a09373ba20fce75f8aa6b14b78fd Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 31 Mar 2009 15:19:15 -0700 Subject: proc tty: add struct tty_operations::proc_fops Used for gradual switch of TTY drivers from using ->read_proc which helps with gradual switch from ->read_proc for the whole tree. As side effect, fix possible race condition when ->data initialized after PDE is hooked into proc tree. ->proc_fops takes precedence over ->read_proc. Signed-off-by: Alexey Dobriyan Cc: Alan Cox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/proc_tty.c | 20 +++++++++++++------- include/linux/tty_driver.h | 1 + 2 files changed, 14 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c index 4a9e0f65ae60..854827b1d463 100644 --- a/fs/proc/proc_tty.c +++ b/fs/proc/proc_tty.c @@ -144,16 +144,22 @@ void proc_tty_register_driver(struct tty_driver *driver) { struct proc_dir_entry *ent; - if (!driver->ops->read_proc || !driver->driver_name || - driver->proc_entry) + if (!driver->driver_name || driver->proc_entry) return; - ent = create_proc_entry(driver->driver_name, 0, proc_tty_driver); - if (!ent) + if (driver->ops->proc_fops) { + ent = proc_create_data(driver->driver_name, 0, proc_tty_driver, + driver->ops->proc_fops, driver); + if (!ent) + return; + } else if (driver->ops->read_proc) { + ent = create_proc_entry(driver->driver_name, 0, proc_tty_driver); + if (!ent) + return; + ent->read_proc = driver->ops->read_proc; + ent->data = driver; + } else return; - ent->read_proc = driver->ops->read_proc; - ent->data = driver; - driver->proc_entry = ent; } diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h index 08e088334dba..c9a69575ded6 100644 --- a/include/linux/tty_driver.h +++ b/include/linux/tty_driver.h @@ -264,6 +264,7 @@ struct tty_operations { int (*poll_get_char)(struct tty_driver *driver, int line); void (*poll_put_char)(struct tty_driver *driver, int line, char ch); #endif + const struct file_operations *proc_fops; }; struct tty_driver { -- cgit v1.2.3 From 0f043a81ebe84be3576667f04fdda481609e3816 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 31 Mar 2009 15:19:25 -0700 Subject: proc tty: remove struct tty_operations::read_proc struct tty_operations::proc_fops took it's place and there is one less create_proc_read_entry() user now! Signed-off-by: Alexey Dobriyan Cc: Alan Cox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/isdn/capi/capi.c | 7 ------- fs/proc/proc_tty.c | 18 ++++-------------- include/linux/tty_driver.h | 2 -- net/bluetooth/rfcomm/tty.c | 6 ------ 4 files changed, 4 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/drivers/isdn/capi/capi.c b/drivers/isdn/capi/capi.c index 3e468d2cf730..2d8352419c0d 100644 --- a/drivers/isdn/capi/capi.c +++ b/drivers/isdn/capi/capi.c @@ -1331,12 +1331,6 @@ static void capinc_tty_send_xchar(struct tty_struct *tty, char ch) #endif } -static int capinc_tty_read_proc(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - return 0; -} - static struct tty_driver *capinc_tty_driver; static const struct tty_operations capinc_ops = { @@ -1358,7 +1352,6 @@ static const struct tty_operations capinc_ops = { .flush_buffer = capinc_tty_flush_buffer, .set_ldisc = capinc_tty_set_ldisc, .send_xchar = capinc_tty_send_xchar, - .read_proc = capinc_tty_read_proc, }; static int capinc_tty_init(void) diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c index 854827b1d463..83adcc869437 100644 --- a/fs/proc/proc_tty.c +++ b/fs/proc/proc_tty.c @@ -144,22 +144,12 @@ void proc_tty_register_driver(struct tty_driver *driver) { struct proc_dir_entry *ent; - if (!driver->driver_name || driver->proc_entry) + if (!driver->driver_name || driver->proc_entry || + !driver->ops->proc_fops) return; - if (driver->ops->proc_fops) { - ent = proc_create_data(driver->driver_name, 0, proc_tty_driver, - driver->ops->proc_fops, driver); - if (!ent) - return; - } else if (driver->ops->read_proc) { - ent = create_proc_entry(driver->driver_name, 0, proc_tty_driver); - if (!ent) - return; - ent->read_proc = driver->ops->read_proc; - ent->data = driver; - } else - return; + ent = proc_create_data(driver->driver_name, 0, proc_tty_driver, + driver->ops->proc_fops, driver); driver->proc_entry = ent; } diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h index c9a69575ded6..8615d661ab60 100644 --- a/include/linux/tty_driver.h +++ b/include/linux/tty_driver.h @@ -252,8 +252,6 @@ struct tty_operations { void (*set_ldisc)(struct tty_struct *tty); void (*wait_until_sent)(struct tty_struct *tty, int timeout); void (*send_xchar)(struct tty_struct *tty, char ch); - int (*read_proc)(char *page, char **start, off_t off, - int count, int *eof, void *data); int (*tiocmget)(struct tty_struct *tty, struct file *file); int (*tiocmset)(struct tty_struct *tty, struct file *file, unsigned int set, unsigned int clear); diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index abdc703a11d2..cab71ea2796d 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -1093,11 +1093,6 @@ static void rfcomm_tty_hangup(struct tty_struct *tty) } } -static int rfcomm_tty_read_proc(char *buf, char **start, off_t offset, int len, int *eof, void *unused) -{ - return 0; -} - static int rfcomm_tty_tiocmget(struct tty_struct *tty, struct file *filp) { struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data; @@ -1156,7 +1151,6 @@ static const struct tty_operations rfcomm_ops = { .send_xchar = rfcomm_tty_send_xchar, .hangup = rfcomm_tty_hangup, .wait_until_sent = rfcomm_tty_wait_until_sent, - .read_proc = rfcomm_tty_read_proc, .tiocmget = rfcomm_tty_tiocmget, .tiocmset = rfcomm_tty_tiocmset, }; -- cgit v1.2.3 From 9de1581e75ba9d7979766d4ab6d56f0f2d87f7c6 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 31 Mar 2009 15:19:29 -0700 Subject: get_mm_hiwater_xxx: trivial, s/define/inline/ Andrew pointed out get_mm_hiwater_xxx() evaluate "mm" argument thrice/twice, make them inline. Signed-off-by: Oleg Nesterov Cc: Hugh Dickins Reviewed-by: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 29df6374d2de..481fad3a9b42 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -391,8 +391,15 @@ extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long); (mm)->hiwater_vm = (mm)->total_vm; \ } while (0) -#define get_mm_hiwater_rss(mm) max((mm)->hiwater_rss, get_mm_rss(mm)) -#define get_mm_hiwater_vm(mm) max((mm)->hiwater_vm, (mm)->total_vm) +static inline unsigned long get_mm_hiwater_rss(struct mm_struct *mm) +{ + return max(mm->hiwater_rss, get_mm_rss(mm)); +} + +static inline unsigned long get_mm_hiwater_vm(struct mm_struct *mm) +{ + return max(mm->hiwater_vm, mm->total_vm); +} extern void set_dumpable(struct mm_struct *mm, int value); extern int get_dumpable(struct mm_struct *mm); -- cgit v1.2.3 From ee99c71c59f897436ec65debb99372b3146f9985 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Tue, 31 Mar 2009 15:19:31 -0700 Subject: mm: introduce for_each_populated_zone() macro Impact: cleanup In almost cases, for_each_zone() is used with populated_zone(). It's because almost function doesn't need memoryless node information. Therefore, for_each_populated_zone() can help to make code simplify. This patch has no functional change. [akpm@linux-foundation.org: small cleanup] Signed-off-by: KOSAKI Motohiro Cc: KAMEZAWA Hiroyuki Cc: Mel Gorman Reviewed-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 8 ++++++++ kernel/power/snapshot.c | 9 +++------ kernel/power/swsusp.c | 17 ++++++++--------- mm/page_alloc.c | 26 +++++--------------------- mm/vmscan.c | 4 +--- mm/vmstat.c | 11 ++--------- 6 files changed, 27 insertions(+), 48 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 1aca6cebbb78..26ef24076b76 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -806,6 +806,14 @@ extern struct zone *next_zone(struct zone *zone); zone; \ zone = next_zone(zone)) +#define for_each_populated_zone(zone) \ + for (zone = (first_online_pgdat())->node_zones; \ + zone; \ + zone = next_zone(zone)) \ + if (!populated_zone(zone)) \ + ; /* do nothing */ \ + else + static inline struct zone *zonelist_zone(struct zoneref *zoneref) { return zoneref->zone; diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index f5fc2d7680f2..33e2e4a819f9 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -321,13 +321,10 @@ static int create_mem_extents(struct list_head *list, gfp_t gfp_mask) INIT_LIST_HEAD(list); - for_each_zone(zone) { + for_each_populated_zone(zone) { unsigned long zone_start, zone_end; struct mem_extent *ext, *cur, *aux; - if (!populated_zone(zone)) - continue; - zone_start = zone->zone_start_pfn; zone_end = zone->zone_start_pfn + zone->spanned_pages; @@ -804,8 +801,8 @@ static unsigned int count_free_highmem_pages(void) struct zone *zone; unsigned int cnt = 0; - for_each_zone(zone) - if (populated_zone(zone) && is_highmem(zone)) + for_each_populated_zone(zone) + if (is_highmem(zone)) cnt += zone_page_state(zone, NR_FREE_PAGES); return cnt; diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index a92c91451559..1ee6636414b2 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c @@ -229,17 +229,16 @@ int swsusp_shrink_memory(void) size = count_data_pages() + PAGES_FOR_IO + SPARE_PAGES; tmp = size; size += highmem_size; - for_each_zone (zone) - if (populated_zone(zone)) { - tmp += snapshot_additional_pages(zone); - if (is_highmem(zone)) { - highmem_size -= + for_each_populated_zone(zone) { + tmp += snapshot_additional_pages(zone); + if (is_highmem(zone)) { + highmem_size -= zone_page_state(zone, NR_FREE_PAGES); - } else { - tmp -= zone_page_state(zone, NR_FREE_PAGES); - tmp += zone->lowmem_reserve[ZONE_NORMAL]; - } + } else { + tmp -= zone_page_state(zone, NR_FREE_PAGES); + tmp += zone->lowmem_reserve[ZONE_NORMAL]; } + } if (highmem_size < 0) highmem_size = 0; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index a3803ea8c27d..cbd532161f68 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -922,13 +922,10 @@ static void drain_pages(unsigned int cpu) unsigned long flags; struct zone *zone; - for_each_zone(zone) { + for_each_populated_zone(zone) { struct per_cpu_pageset *pset; struct per_cpu_pages *pcp; - if (!populated_zone(zone)) - continue; - pset = zone_pcp(zone, cpu); pcp = &pset->pcp; @@ -1879,10 +1876,7 @@ void show_free_areas(void) int cpu; struct zone *zone; - for_each_zone(zone) { - if (!populated_zone(zone)) - continue; - + for_each_populated_zone(zone) { show_node(zone); printk("%s per-cpu:\n", zone->name); @@ -1922,12 +1916,9 @@ void show_free_areas(void) global_page_state(NR_PAGETABLE), global_page_state(NR_BOUNCE)); - for_each_zone(zone) { + for_each_populated_zone(zone) { int i; - if (!populated_zone(zone)) - continue; - show_node(zone); printk("%s" " free:%lukB" @@ -1967,12 +1958,9 @@ void show_free_areas(void) printk("\n"); } - for_each_zone(zone) { + for_each_populated_zone(zone) { unsigned long nr[MAX_ORDER], flags, order, total = 0; - if (!populated_zone(zone)) - continue; - show_node(zone); printk("%s: ", zone->name); @@ -2784,11 +2772,7 @@ static int __cpuinit process_zones(int cpu) node_set_state(node, N_CPU); /* this node has a cpu */ - for_each_zone(zone) { - - if (!populated_zone(zone)) - continue; - + for_each_populated_zone(zone) { zone_pcp(zone, cpu) = kmalloc_node(sizeof(struct per_cpu_pageset), GFP_KERNEL, node); if (!zone_pcp(zone, cpu)) diff --git a/mm/vmscan.c b/mm/vmscan.c index 1bca60f0c527..301f057fd115 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2061,11 +2061,9 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int prio, struct zone *zone; unsigned long ret = 0; - for_each_zone(zone) { + for_each_populated_zone(zone) { enum lru_list l; - if (!populated_zone(zone)) - continue; if (zone_is_all_unreclaimable(zone) && prio != DEF_PRIORITY) continue; diff --git a/mm/vmstat.c b/mm/vmstat.c index 8cd81ea1ddc1..9826766f1274 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -135,11 +135,7 @@ static void refresh_zone_stat_thresholds(void) int cpu; int threshold; - for_each_zone(zone) { - - if (!zone->present_pages) - continue; - + for_each_populated_zone(zone) { threshold = calculate_threshold(zone); for_each_online_cpu(cpu) @@ -301,12 +297,9 @@ void refresh_cpu_vm_stats(int cpu) int i; int global_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, }; - for_each_zone(zone) { + for_each_populated_zone(zone) { struct per_cpu_pageset *p; - if (!populated_zone(zone)) - continue; - p = zone_pcp(zone, cpu); for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) -- cgit v1.2.3 From e3a7cca1ef4c1af9b0acef9bd66eff6582a737b5 Mon Sep 17 00:00:00 2001 From: Edward Shishkin Date: Tue, 31 Mar 2009 15:19:39 -0700 Subject: vfs: add/use account_page_dirtied() Add a helper function account_page_dirtied(). Use that from two callsites. reiser4 adds a function which adds a third callsite. Signed-off-by: Edward Shishkin Cc: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/buffer.c | 9 +-------- include/linux/mm.h | 1 + mm/page-writeback.c | 22 +++++++++++++++------- 3 files changed, 17 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/fs/buffer.c b/fs/buffer.c index a2fd743d97cb..73abe6d8218c 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -621,14 +621,7 @@ static void __set_page_dirty(struct page *page, spin_lock_irq(&mapping->tree_lock); if (page->mapping) { /* Race with truncate? */ WARN_ON_ONCE(warn && !PageUptodate(page)); - - if (mapping_cap_account_dirty(mapping)) { - __inc_zone_page_state(page, NR_FILE_DIRTY); - __inc_bdi_stat(mapping->backing_dev_info, - BDI_RECLAIMABLE); - task_dirty_inc(current); - task_io_account_write(PAGE_CACHE_SIZE); - } + account_page_dirtied(page, mapping); radix_tree_tag_set(&mapping->page_tree, page_index(page), PAGECACHE_TAG_DIRTY); } diff --git a/include/linux/mm.h b/include/linux/mm.h index b1ea37fc7a24..2223f8dfa568 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -834,6 +834,7 @@ int __set_page_dirty_nobuffers(struct page *page); int __set_page_dirty_no_writeback(struct page *page); int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page); +void account_page_dirtied(struct page *page, struct address_space *mapping); int set_page_dirty(struct page *page); int set_page_dirty_lock(struct page *page); int clear_page_dirty_for_io(struct page *page); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 40ca7cdb653e..6aa92b03c747 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -1197,6 +1197,20 @@ int __set_page_dirty_no_writeback(struct page *page) return 0; } +/* + * Helper function for set_page_dirty family. + * NOTE: This relies on being atomic wrt interrupts. + */ +void account_page_dirtied(struct page *page, struct address_space *mapping) +{ + if (mapping_cap_account_dirty(mapping)) { + __inc_zone_page_state(page, NR_FILE_DIRTY); + __inc_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE); + task_dirty_inc(current); + task_io_account_write(PAGE_CACHE_SIZE); + } +} + /* * For address_spaces which do not use buffers. Just tag the page as dirty in * its radix tree. @@ -1226,13 +1240,7 @@ int __set_page_dirty_nobuffers(struct page *page) if (mapping2) { /* Race with truncate? */ BUG_ON(mapping2 != mapping); WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page)); - if (mapping_cap_account_dirty(mapping)) { - __inc_zone_page_state(page, NR_FILE_DIRTY); - __inc_bdi_stat(mapping->backing_dev_info, - BDI_RECLAIMABLE); - task_dirty_inc(current); - task_io_account_write(PAGE_CACHE_SIZE); - } + account_page_dirtied(page, mapping); radix_tree_tag_set(&mapping->page_tree, page_index(page), PAGECACHE_TAG_DIRTY); } -- cgit v1.2.3 From d1d7487173eab8352125cf6cc271940f24254bd4 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Tue, 31 Mar 2009 15:23:14 -0700 Subject: mm: remove pagevec_swap_free() pagevec_swap_free() is now unused. Signed-off-by: KOSAKI Motohiro Cc: Johannes Weiner Cc: Rik van Riel Acked-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pagevec.h | 1 - mm/swap.c | 23 ----------------------- 2 files changed, 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h index 7b2886fa7fdc..bab82f4c571c 100644 --- a/include/linux/pagevec.h +++ b/include/linux/pagevec.h @@ -24,7 +24,6 @@ void __pagevec_release(struct pagevec *pvec); void __pagevec_free(struct pagevec *pvec); void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru); void pagevec_strip(struct pagevec *pvec); -void pagevec_swap_free(struct pagevec *pvec); unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping, pgoff_t start, unsigned nr_pages); unsigned pagevec_lookup_tag(struct pagevec *pvec, diff --git a/mm/swap.c b/mm/swap.c index 8adb9feb61e1..6e83084c1f6c 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -456,29 +456,6 @@ void pagevec_strip(struct pagevec *pvec) } } -/** - * pagevec_swap_free - try to free swap space from the pages in a pagevec - * @pvec: pagevec with swapcache pages to free the swap space of - * - * The caller needs to hold an extra reference to each page and - * not hold the page lock on the pages. This function uses a - * trylock on the page lock so it may not always free the swap - * space associated with a page. - */ -void pagevec_swap_free(struct pagevec *pvec) -{ - int i; - - for (i = 0; i < pagevec_count(pvec); i++) { - struct page *page = pvec->pages[i]; - - if (PageSwapCache(page) && trylock_page(page)) { - try_to_free_swap(page); - unlock_page(page); - } - } -} - /** * pagevec_lookup - gang pagecache lookup * @pvec: Where the resulting pages are placed -- cgit v1.2.3 From 610a77e04a8d9fe8764dc484e2182fa251ce1cc2 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 31 Mar 2009 15:23:16 -0700 Subject: memdup_user(): introduce I notice there are many places doing copy_from_user() which follows kmalloc(): dst = kmalloc(len, GFP_KERNEL); if (!dst) return -ENOMEM; if (copy_from_user(dst, src, len)) { kfree(dst); return -EFAULT } memdup_user() is a wrapper of the above code. With this new function, we don't have to write 'len' twice, which can lead to typos/mistakes. It also produces smaller code and kernel text. A quick grep shows 250+ places where memdup_user() *may* be used. I'll prepare a patchset to do this conversion. Signed-off-by: Li Zefan Cc: KOSAKI Motohiro Cc: Americo Wang Cc: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/string.h | 1 + mm/util.c | 30 ++++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) (limited to 'include/linux') diff --git a/include/linux/string.h b/include/linux/string.h index d18fc198aa2f..8852739f36df 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -12,6 +12,7 @@ #include /* for NULL */ extern char *strndup_user(const char __user *, long); +extern void *memdup_user(const void __user *, size_t); /* * Include machine specific inline routines diff --git a/mm/util.c b/mm/util.c index 37eaccdf3054..7c122e49f769 100644 --- a/mm/util.c +++ b/mm/util.c @@ -69,6 +69,36 @@ void *kmemdup(const void *src, size_t len, gfp_t gfp) } EXPORT_SYMBOL(kmemdup); +/** + * memdup_user - duplicate memory region from user space + * + * @src: source address in user space + * @len: number of bytes to copy + * + * Returns an ERR_PTR() on failure. + */ +void *memdup_user(const void __user *src, size_t len) +{ + void *p; + + /* + * Always use GFP_KERNEL, since copy_from_user() can sleep and + * cause pagefault, which makes it pointless to use GFP_NOFS + * or GFP_ATOMIC. + */ + p = kmalloc_track_caller(len, GFP_KERNEL); + if (!p) + return ERR_PTR(-ENOMEM); + + if (copy_from_user(p, src, len)) { + kfree(p); + return ERR_PTR(-EFAULT); + } + + return p; +} +EXPORT_SYMBOL(memdup_user); + /** * __krealloc - like krealloc() but don't free @p. * @p: object to reallocate memory for. -- cgit v1.2.3 From 6a11f75b6a17b5d9ac5025f8d048382fd1f47377 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Tue, 31 Mar 2009 15:23:17 -0700 Subject: generic debug pagealloc CONFIG_DEBUG_PAGEALLOC is now supported by x86, powerpc, sparc64, and s390. This patch implements it for the rest of the architectures by filling the pages with poison byte patterns after free_pages() and verifying the poison patterns before alloc_pages(). This generic one cannot detect invalid page accesses immediately but invalid read access may cause invalid dereference by poisoned memory and invalid write access can be detected after a long delay. Signed-off-by: Akinobu Mita Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/avr32/mm/fault.c | 18 ------ arch/powerpc/Kconfig | 3 + arch/powerpc/Kconfig.debug | 1 + arch/s390/Kconfig | 3 + arch/s390/Kconfig.debug | 1 + arch/sparc/Kconfig | 3 + arch/sparc/Kconfig.debug | 3 +- arch/x86/Kconfig | 3 + arch/x86/Kconfig.debug | 1 + include/linux/mm_types.h | 4 ++ include/linux/page-debug-flags.h | 30 +++++++++ include/linux/poison.h | 3 + lib/Kconfig.debug | 1 + mm/Kconfig.debug | 17 ++++++ mm/Makefile | 1 + mm/debug-pagealloc.c | 129 +++++++++++++++++++++++++++++++++++++++ 16 files changed, 202 insertions(+), 19 deletions(-) create mode 100644 include/linux/page-debug-flags.h create mode 100644 mm/Kconfig.debug create mode 100644 mm/debug-pagealloc.c (limited to 'include/linux') diff --git a/arch/avr32/mm/fault.c b/arch/avr32/mm/fault.c index ce4e4296b954..62d4abbaa654 100644 --- a/arch/avr32/mm/fault.c +++ b/arch/avr32/mm/fault.c @@ -250,21 +250,3 @@ asmlinkage void do_bus_error(unsigned long addr, int write_access, dump_dtlb(); die("Bus Error", regs, SIGKILL); } - -/* - * This functionality is currently not possible to implement because - * we're using segmentation to ensure a fixed mapping of the kernel - * virtual address space. - * - * It would be possible to implement this, but it would require us to - * disable segmentation at startup and load the kernel mappings into - * the TLB like any other pages. There will be lots of trickery to - * avoid recursive invocation of the TLB miss handler, though... - */ -#ifdef CONFIG_DEBUG_PAGEALLOC -void kernel_map_pages(struct page *page, int numpages, int enable) -{ - -} -EXPORT_SYMBOL(kernel_map_pages); -#endif diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index ad6b1c084fe3..45192dce65c4 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -228,6 +228,9 @@ config PPC_OF_PLATFORM_PCI depends on PPC64 # not supported on 32 bits yet default n +config ARCH_SUPPORTS_DEBUG_PAGEALLOC + def_bool y + source "init/Kconfig" source "kernel/Kconfig.freezer" diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index 22091bbfdc9b..6aa0b5e087cd 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -30,6 +30,7 @@ config DEBUG_STACK_USAGE config DEBUG_PAGEALLOC bool "Debug page memory allocations" depends on DEBUG_KERNEL && !HIBERNATION + depends on ARCH_SUPPORTS_DEBUG_PAGEALLOC help Unmap pages from the kernel linear mapping after free_pages(). This results in a large slowdown, but helps to find certain types diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 2a8af5e16345..dcb667c4375a 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -72,6 +72,9 @@ config PGSTE config VIRT_CPU_ACCOUNTING def_bool y +config ARCH_SUPPORTS_DEBUG_PAGEALLOC + def_bool y + mainmenu "Linux Kernel Configuration" config S390 diff --git a/arch/s390/Kconfig.debug b/arch/s390/Kconfig.debug index 4599fa06bd82..7e297a3cde34 100644 --- a/arch/s390/Kconfig.debug +++ b/arch/s390/Kconfig.debug @@ -9,6 +9,7 @@ source "lib/Kconfig.debug" config DEBUG_PAGEALLOC bool "Debug page memory allocations" depends on DEBUG_KERNEL + depends on ARCH_SUPPORTS_DEBUG_PAGEALLOC help Unmap pages from the kernel linear mapping after free_pages(). This results in a slowdown, but helps to find certain types of diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index c3ea215334f6..cc12cd48bbc5 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -124,6 +124,9 @@ config ARCH_NO_VIRT_TO_BUS config OF def_bool y +config ARCH_SUPPORTS_DEBUG_PAGEALLOC + def_bool y if SPARC64 + source "init/Kconfig" source "kernel/Kconfig.freezer" diff --git a/arch/sparc/Kconfig.debug b/arch/sparc/Kconfig.debug index b8a15e271bfa..d001b42041a5 100644 --- a/arch/sparc/Kconfig.debug +++ b/arch/sparc/Kconfig.debug @@ -24,7 +24,8 @@ config STACK_DEBUG config DEBUG_PAGEALLOC bool "Debug page memory allocations" - depends on SPARC64 && DEBUG_KERNEL && !HIBERNATION + depends on DEBUG_KERNEL && !HIBERNATION + depends on ARCH_SUPPORTS_DEBUG_PAGEALLOC help Unmap pages from the kernel linear mapping after free_pages(). This results in a large slowdown, but helps to find certain types diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 45161b816313..748e50a1a152 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -165,6 +165,9 @@ config AUDIT_ARCH config ARCH_SUPPORTS_OPTIMIZED_INLINING def_bool y +config ARCH_SUPPORTS_DEBUG_PAGEALLOC + def_bool y + # Use the generic interrupt handling code in kernel/irq/: config GENERIC_HARDIRQS bool diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index fdb45df608b6..a345cb5447a8 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -75,6 +75,7 @@ config DEBUG_STACK_USAGE config DEBUG_PAGEALLOC bool "Debug page memory allocations" depends on DEBUG_KERNEL + depends on ARCH_SUPPORTS_DEBUG_PAGEALLOC ---help--- Unmap pages from the kernel linear mapping after free_pages(). This results in a large slowdown, but helps to find certain types diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index d84feb7bdbf0..ddadb4defe00 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -174,6 +175,9 @@ struct vm_area_struct { #ifdef CONFIG_NUMA struct mempolicy *vm_policy; /* NUMA policy for the VMA */ #endif +#ifdef CONFIG_WANT_PAGE_DEBUG_FLAGS + unsigned long debug_flags; /* Use atomic bitops on this */ +#endif }; struct core_thread { diff --git a/include/linux/page-debug-flags.h b/include/linux/page-debug-flags.h new file mode 100644 index 000000000000..b0638fd91e92 --- /dev/null +++ b/include/linux/page-debug-flags.h @@ -0,0 +1,30 @@ +#ifndef LINUX_PAGE_DEBUG_FLAGS_H +#define LINUX_PAGE_DEBUG_FLAGS_H + +/* + * page->debug_flags bits: + * + * PAGE_DEBUG_FLAG_POISON is set for poisoned pages. This is used to + * implement generic debug pagealloc feature. The pages are filled with + * poison patterns and set this flag after free_pages(). The poisoned + * pages are verified whether the patterns are not corrupted and clear + * the flag before alloc_pages(). + */ + +enum page_debug_flags { + PAGE_DEBUG_FLAG_POISON, /* Page is poisoned */ +}; + +/* + * Ensure that CONFIG_WANT_PAGE_DEBUG_FLAGS reliably + * gets turned off when no debug features are enabling it! + */ + +#ifdef CONFIG_WANT_PAGE_DEBUG_FLAGS +#if !defined(CONFIG_PAGE_POISONING) \ +/* && !defined(CONFIG_PAGE_DEBUG_SOMETHING_ELSE) && ... */ +#error WANT_PAGE_DEBUG_FLAGS is turned on with no debug features! +#endif +#endif /* CONFIG_WANT_PAGE_DEBUG_FLAGS */ + +#endif /* LINUX_PAGE_DEBUG_FLAGS_H */ diff --git a/include/linux/poison.h b/include/linux/poison.h index 9f31683728fd..6729f7dcd60e 100644 --- a/include/linux/poison.h +++ b/include/linux/poison.h @@ -17,6 +17,9 @@ */ #define TIMER_ENTRY_STATIC ((void *) 0x74737461) +/********** mm/debug-pagealloc.c **********/ +#define PAGE_POISON 0xaa + /********** mm/slab.c **********/ /* * Magic nums for obj red zoning. diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 58bfe7e8faba..9638d99644af 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -796,6 +796,7 @@ config SYSCTL_SYSCALL_CHECK to properly maintain and use. This enables checks that help you to keep things correct. +source mm/Kconfig.debug source kernel/trace/Kconfig config PROVIDE_OHCI1394_DMA_INIT diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug new file mode 100644 index 000000000000..c8d62d49a44e --- /dev/null +++ b/mm/Kconfig.debug @@ -0,0 +1,17 @@ +config WANT_PAGE_DEBUG_FLAGS + bool + +config PAGE_POISONING + bool "Debug page memory allocations" + depends on DEBUG_KERNEL && !ARCH_SUPPORTS_DEBUG_PAGEALLOC + depends on !HIBERNATION + select DEBUG_PAGEALLOC + select WANT_PAGE_DEBUG_FLAGS + help + Fill the pages with poison patterns after free_pages() and verify + the patterns before alloc_pages(). This results in a large slowdown, + but helps to find certain types of memory corruptions. + + This option cannot enalbe with hibernation. Otherwise, it will get + wrong messages for memory corruption because the free pages are not + saved to the suspend image. diff --git a/mm/Makefile b/mm/Makefile index 818569b68f46..ec73c68b6015 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -24,6 +24,7 @@ obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o obj-$(CONFIG_TMPFS_POSIX_ACL) += shmem_acl.o obj-$(CONFIG_SLOB) += slob.o obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o +obj-$(CONFIG_PAGE_POISONING) += debug-pagealloc.o obj-$(CONFIG_SLAB) += slab.o obj-$(CONFIG_SLUB) += slub.o obj-$(CONFIG_FAILSLAB) += failslab.o diff --git a/mm/debug-pagealloc.c b/mm/debug-pagealloc.c new file mode 100644 index 000000000000..a1e3324de2b5 --- /dev/null +++ b/mm/debug-pagealloc.c @@ -0,0 +1,129 @@ +#include +#include +#include +#include + +static inline void set_page_poison(struct page *page) +{ + __set_bit(PAGE_DEBUG_FLAG_POISON, &page->debug_flags); +} + +static inline void clear_page_poison(struct page *page) +{ + __clear_bit(PAGE_DEBUG_FLAG_POISON, &page->debug_flags); +} + +static inline bool page_poison(struct page *page) +{ + return test_bit(PAGE_DEBUG_FLAG_POISON, &page->debug_flags); +} + +static void poison_highpage(struct page *page) +{ + /* + * Page poisoning for highmem pages is not implemented. + * + * This can be called from interrupt contexts. + * So we need to create a new kmap_atomic slot for this + * application and it will need interrupt protection. + */ +} + +static void poison_page(struct page *page) +{ + void *addr; + + if (PageHighMem(page)) { + poison_highpage(page); + return; + } + set_page_poison(page); + addr = page_address(page); + memset(addr, PAGE_POISON, PAGE_SIZE); +} + +static void poison_pages(struct page *page, int n) +{ + int i; + + for (i = 0; i < n; i++) + poison_page(page + i); +} + +static bool single_bit_flip(unsigned char a, unsigned char b) +{ + unsigned char error = a ^ b; + + return error && !(error & (error - 1)); +} + +static void check_poison_mem(unsigned char *mem, size_t bytes) +{ + unsigned char *start; + unsigned char *end; + + for (start = mem; start < mem + bytes; start++) { + if (*start != PAGE_POISON) + break; + } + if (start == mem + bytes) + return; + + for (end = mem + bytes - 1; end > start; end--) { + if (*end != PAGE_POISON) + break; + } + + if (!printk_ratelimit()) + return; + else if (start == end && single_bit_flip(*start, PAGE_POISON)) + printk(KERN_ERR "pagealloc: single bit error\n"); + else + printk(KERN_ERR "pagealloc: memory corruption\n"); + + print_hex_dump(KERN_ERR, "", DUMP_PREFIX_ADDRESS, 16, 1, start, + end - start + 1, 1); + dump_stack(); +} + +static void unpoison_highpage(struct page *page) +{ + /* + * See comment in poison_highpage(). + * Highmem pages should not be poisoned for now + */ + BUG_ON(page_poison(page)); +} + +static void unpoison_page(struct page *page) +{ + if (PageHighMem(page)) { + unpoison_highpage(page); + return; + } + if (page_poison(page)) { + void *addr = page_address(page); + + check_poison_mem(addr, PAGE_SIZE); + clear_page_poison(page); + } +} + +static void unpoison_pages(struct page *page, int n) +{ + int i; + + for (i = 0; i < n; i++) + unpoison_page(page + i); +} + +void kernel_map_pages(struct page *page, int numpages, int enable) +{ + if (!debug_pagealloc_enabled) + return; + + if (enable) + unpoison_pages(page, numpages); + else + poison_pages(page, numpages); +} -- cgit v1.2.3 From 704503d836042d4a4c7685b7036e7de0418fbc0f Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 31 Mar 2009 15:23:18 -0700 Subject: mm: fix proc_dointvec_userhz_jiffies "breakage" Addresses http://bugzilla.kernel.org/show_bug.cgi?id=9838 On i386, HZ=1000, jiffies_to_clock_t() converts time in a somewhat strange way from the user's point of view: # echo 500 >/proc/sys/vm/dirty_writeback_centisecs # cat /proc/sys/vm/dirty_writeback_centisecs 499 So, we have 5000 jiffies converted to only 499 clock ticks and reported back. TICK_NSEC = 999848 ACTHZ = 256039 Keeping in-kernel variable in units passed from userspace will fix issue of course, but this probably won't be right for every sysctl. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Alexey Dobriyan Cc: Peter Zijlstra Cc: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/writeback.h | 4 ++-- kernel/sysctl.c | 2 +- mm/page-writeback.c | 20 +++++++++++--------- 3 files changed, 14 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 7300ecdc480c..93445477f86a 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -109,8 +109,8 @@ extern int dirty_background_ratio; extern unsigned long dirty_background_bytes; extern int vm_dirty_ratio; extern unsigned long vm_dirty_bytes; -extern int dirty_writeback_interval; -extern int dirty_expire_interval; +extern unsigned int dirty_writeback_interval; +extern unsigned int dirty_expire_interval; extern int vm_highmem_is_dirtyable; extern int block_dump; extern int laptop_mode; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index c5ef44ff850f..2e490a389dd2 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1010,7 +1010,7 @@ static struct ctl_table vm_table[] = { .data = &dirty_expire_interval, .maxlen = sizeof(dirty_expire_interval), .mode = 0644, - .proc_handler = &proc_dointvec_userhz_jiffies, + .proc_handler = &proc_dointvec, }, { .ctl_name = VM_NR_PDFLUSH_THREADS, diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 6aa92b03c747..30351f0063ac 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -92,14 +92,14 @@ int vm_dirty_ratio = 20; unsigned long vm_dirty_bytes; /* - * The interval between `kupdate'-style writebacks, in jiffies + * The interval between `kupdate'-style writebacks */ -int dirty_writeback_interval = 5 * HZ; +unsigned int dirty_writeback_interval = 5 * 100; /* sentiseconds */ /* - * The longest number of jiffies for which data is allowed to remain dirty + * The longest time for which data is allowed to remain dirty */ -int dirty_expire_interval = 30 * HZ; +unsigned int dirty_expire_interval = 30 * 100; /* sentiseconds */ /* * Flag that makes the machine dump writes/reads and block dirtyings. @@ -770,9 +770,9 @@ static void wb_kupdate(unsigned long arg) sync_supers(); - oldest_jif = jiffies - dirty_expire_interval; + oldest_jif = jiffies - msecs_to_jiffies(dirty_expire_interval); start_jif = jiffies; - next_jif = start_jif + dirty_writeback_interval; + next_jif = start_jif + msecs_to_jiffies(dirty_writeback_interval * 10); nr_to_write = global_page_state(NR_FILE_DIRTY) + global_page_state(NR_UNSTABLE_NFS) + (inodes_stat.nr_inodes - inodes_stat.nr_unused); @@ -801,9 +801,10 @@ static void wb_kupdate(unsigned long arg) int dirty_writeback_centisecs_handler(ctl_table *table, int write, struct file *file, void __user *buffer, size_t *length, loff_t *ppos) { - proc_dointvec_userhz_jiffies(table, write, file, buffer, length, ppos); + proc_dointvec(table, write, file, buffer, length, ppos); if (dirty_writeback_interval) - mod_timer(&wb_timer, jiffies + dirty_writeback_interval); + mod_timer(&wb_timer, jiffies + + msecs_to_jiffies(dirty_writeback_interval * 10)); else del_timer(&wb_timer); return 0; @@ -905,7 +906,8 @@ void __init page_writeback_init(void) { int shift; - mod_timer(&wb_timer, jiffies + dirty_writeback_interval); + mod_timer(&wb_timer, + jiffies + msecs_to_jiffies(dirty_writeback_interval * 10)); writeback_set_ratelimit(); register_cpu_notifier(&ratelimit_nb); -- cgit v1.2.3 From c2fdf3a9b2d52842808a8e551b53b55dd9b45030 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 31 Mar 2009 15:23:19 -0700 Subject: mm: enable hashdist by default on 64bit NUMA On PowerPC we allocate large boot time hashes on node 0. This leads to an imbalance in the free memory, for example on a 64GB box (4 x 16GB nodes): Free memory: Node 0: 97.03% Node 1: 98.54% Node 2: 98.42% Node 3: 98.53% If we switch to using vmalloc (like ia64 and x86-64) things are more balanced: Free memory: Node 0: 97.53% Node 1: 98.35% Node 2: 98.33% Node 3: 98.33% For many HPC applications we are limited by the free available memory on the smallest node, so even though the same amount of memory is used the better balancing helps. Since all 64bit NUMA capable architectures should have sufficient vmalloc space, it makes sense to enable it via CONFIG_64BIT. Signed-off-by: Anton Blanchard Acked-by: David S. Miller Acked-by: Benjamin Herrenschmidt Acked-by: Ralf Baechle Cc: Heiko Carstens Cc: Martin Schwidefsky Cc: Ivan Kokshaysky Cc: Richard Henderson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bootmem.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 455d83219fae..bc3ab7073695 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -146,10 +146,10 @@ extern void *alloc_large_system_hash(const char *tablename, #define HASH_EARLY 0x00000001 /* Allocating during early boot? */ -/* Only NUMA needs hash distribution. - * IA64 and x86_64 have sufficient vmalloc space. +/* Only NUMA needs hash distribution. 64bit NUMA architectures have + * sufficient vmalloc space. */ -#if defined(CONFIG_NUMA) && (defined(CONFIG_IA64) || defined(CONFIG_X86_64)) +#if defined(CONFIG_NUMA) && defined(CONFIG_64BIT) #define HASHDIST_DEFAULT 1 #else #define HASHDIST_DEFAULT 0 -- cgit v1.2.3 From c2ec175c39f62949438354f603f4aa170846aabb Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Tue, 31 Mar 2009 15:23:21 -0700 Subject: mm: page_mkwrite change prototype to match fault Change the page_mkwrite prototype to take a struct vm_fault, and return VM_FAULT_xxx flags. There should be no functional change. This makes it possible to return much more detailed error information to the VM (and also can provide more information eg. virtual_address to the driver, which might be important in some special cases). This is required for a subsequent fix. And will also make it easier to merge page_mkwrite() with fault() in future. Signed-off-by: Nick Piggin Cc: Chris Mason Cc: Trond Myklebust Cc: Miklos Szeredi Cc: Steven Whitehouse Cc: Mark Fasheh Cc: Joel Becker Cc: Artem Bityutskiy Cc: Felix Blyakher Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/filesystems/Locking | 2 +- drivers/video/fb_defio.c | 3 ++- fs/btrfs/ctree.h | 2 +- fs/btrfs/inode.c | 5 ++++- fs/buffer.c | 6 +++++- fs/ext4/ext4.h | 2 +- fs/ext4/inode.c | 5 ++++- fs/fuse/file.c | 3 ++- fs/gfs2/ops_file.c | 5 ++++- fs/nfs/file.c | 5 ++++- fs/ocfs2/mmap.c | 6 ++++-- fs/ubifs/file.c | 9 ++++++--- fs/xfs/linux-2.6/xfs_file.c | 4 ++-- include/linux/buffer_head.h | 2 +- include/linux/mm.h | 3 ++- mm/memory.c | 26 ++++++++++++++++++++++---- 16 files changed, 65 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 4e78ce677843..76efe5b71d7d 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -505,7 +505,7 @@ prototypes: void (*open)(struct vm_area_struct*); void (*close)(struct vm_area_struct*); int (*fault)(struct vm_area_struct*, struct vm_fault *); - int (*page_mkwrite)(struct vm_area_struct *, struct page *); + int (*page_mkwrite)(struct vm_area_struct *, struct vm_fault *); int (*access)(struct vm_area_struct *, unsigned long, void*, int, int); locking rules: diff --git a/drivers/video/fb_defio.c b/drivers/video/fb_defio.c index 082026546aee..0a7a6679ee6e 100644 --- a/drivers/video/fb_defio.c +++ b/drivers/video/fb_defio.c @@ -85,8 +85,9 @@ EXPORT_SYMBOL_GPL(fb_deferred_io_fsync); /* vm_ops->page_mkwrite handler */ static int fb_deferred_io_mkwrite(struct vm_area_struct *vma, - struct page *page) + struct vm_fault *vmf) { + struct page *page = vmf->page; struct fb_info *info = vma->vm_private_data; struct fb_deferred_io *fbdefio = info->fbdefio; struct page *cur; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 5e1d4e30e9d8..7dd1b6d0bf32 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2060,7 +2060,7 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, unsigned long btrfs_force_ra(struct address_space *mapping, struct file_ra_state *ra, struct file *file, pgoff_t offset, pgoff_t last_index); -int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page); +int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); int btrfs_readpage(struct file *file, struct page *page); void btrfs_delete_inode(struct inode *inode); void btrfs_put_inode(struct inode *inode); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 7d4f948bc22a..ec5423790bbb 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4292,8 +4292,9 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) * beyond EOF, then the page is guaranteed safe against truncation until we * unlock the page. */ -int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) +int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { + struct page *page = vmf->page; struct inode *inode = fdentry(vma->vm_file)->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; @@ -4362,6 +4363,8 @@ again: out_unlock: unlock_page(page); out: + if (ret) + ret = VM_FAULT_SIGBUS; return ret; } diff --git a/fs/buffer.c b/fs/buffer.c index 73abe6d8218c..6d51a3da362c 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2313,9 +2313,10 @@ int block_commit_write(struct page *page, unsigned from, unsigned to) * unlock the page. */ int -block_page_mkwrite(struct vm_area_struct *vma, struct page *page, +block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, get_block_t get_block) { + struct page *page = vmf->page; struct inode *inode = vma->vm_file->f_path.dentry->d_inode; unsigned long end; loff_t size; @@ -2340,6 +2341,9 @@ block_page_mkwrite(struct vm_area_struct *vma, struct page *page, ret = block_commit_write(page, 0, end); out_unlock: + if (ret) + ret = VM_FAULT_SIGBUS; + unlock_page(page); return ret; } diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 6083bb38057b..990c94000924 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1098,7 +1098,7 @@ extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int idxblocks); extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); extern int ext4_block_truncate_page(handle_t *handle, struct address_space *mapping, loff_t from); -extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page); +extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); extern qsize_t ext4_get_reserved_space(struct inode *inode); /* ioctl.c */ diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 71d3ecd5db79..dd82ff390067 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5146,8 +5146,9 @@ static int ext4_bh_unmapped(handle_t *handle, struct buffer_head *bh) return !buffer_mapped(bh); } -int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page) +int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { + struct page *page = vmf->page; loff_t size; unsigned long len; int ret = -EINVAL; @@ -5199,6 +5200,8 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page) goto out_unlock; ret = 0; out_unlock: + if (ret) + ret = VM_FAULT_SIGBUS; up_read(&inode->i_alloc_sem); return ret; } diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 821d10f719bd..4e340fedf768 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1234,8 +1234,9 @@ static void fuse_vma_close(struct vm_area_struct *vma) * - sync(2) * - try_to_free_pages() with order > PAGE_ALLOC_COSTLY_ORDER */ -static int fuse_page_mkwrite(struct vm_area_struct *vma, struct page *page) +static int fuse_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { + struct page *page = vmf->page; /* * Don't use page->mapping as it may become NULL from a * concurrent truncate. diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c index 3b9e8de3500b..70b9b8548945 100644 --- a/fs/gfs2/ops_file.c +++ b/fs/gfs2/ops_file.c @@ -337,8 +337,9 @@ static int gfs2_allocate_page_backing(struct page *page) * blocks allocated on disk to back that page. */ -static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page) +static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { + struct page *page = vmf->page; struct inode *inode = vma->vm_file->f_path.dentry->d_inode; struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); @@ -412,6 +413,8 @@ out_unlock: gfs2_glock_dq(&gh); out: gfs2_holder_uninit(&gh); + if (ret) + ret = VM_FAULT_SIGBUS; return ret; } diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 90f292b520d2..cec79392e4ba 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -451,8 +451,9 @@ const struct address_space_operations nfs_file_aops = { .launder_page = nfs_launder_page, }; -static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page) +static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { + struct page *page = vmf->page; struct file *filp = vma->vm_file; struct dentry *dentry = filp->f_path.dentry; unsigned pagelen; @@ -483,6 +484,8 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page) ret = pagelen; out_unlock: unlock_page(page); + if (ret) + ret = VM_FAULT_SIGBUS; return ret; } diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index eea1d24713ea..b606496b72ec 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c @@ -154,8 +154,9 @@ out: return ret; } -static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page) +static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { + struct page *page = vmf->page; struct inode *inode = vma->vm_file->f_path.dentry->d_inode; struct buffer_head *di_bh = NULL; sigset_t blocked, oldset; @@ -196,7 +197,8 @@ out: ret2 = ocfs2_vm_op_unblock_sigs(&oldset); if (ret2 < 0) mlog_errno(ret2); - + if (ret) + ret = VM_FAULT_SIGBUS; return ret; } diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 93b6de51f261..0ff89fe71e51 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -1434,8 +1434,9 @@ static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags) * mmap()d file has taken write protection fault and is being made * writable. UBIFS must ensure page is budgeted for. */ -static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page) +static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { + struct page *page = vmf->page; struct inode *inode = vma->vm_file->f_path.dentry->d_inode; struct ubifs_info *c = inode->i_sb->s_fs_info; struct timespec now = ubifs_current_time(inode); @@ -1447,7 +1448,7 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page) ubifs_assert(!(inode->i_sb->s_flags & MS_RDONLY)); if (unlikely(c->ro_media)) - return -EROFS; + return VM_FAULT_SIGBUS; /* -EROFS */ /* * We have not locked @page so far so we may budget for changing the @@ -1480,7 +1481,7 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page) if (err == -ENOSPC) ubifs_warn("out of space for mmapped file " "(inode number %lu)", inode->i_ino); - return err; + return VM_FAULT_SIGBUS; } lock_page(page); @@ -1520,6 +1521,8 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page) out_unlock: unlock_page(page); ubifs_release_budget(c, &req); + if (err) + err = VM_FAULT_SIGBUS; return err; } diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index e14c4e3aea0c..f4e255441574 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -234,9 +234,9 @@ xfs_file_mmap( STATIC int xfs_vm_page_mkwrite( struct vm_area_struct *vma, - struct page *page) + struct vm_fault *vmf) { - return block_page_mkwrite(vma, page, xfs_get_blocks); + return block_page_mkwrite(vma, vmf, xfs_get_blocks); } const struct file_operations xfs_file_operations = { diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index f19fd9045ea0..3d7bcde2e332 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -216,7 +216,7 @@ int cont_write_begin(struct file *, struct address_space *, loff_t, get_block_t *, loff_t *); int generic_cont_expand_simple(struct inode *inode, loff_t size); int block_commit_write(struct page *page, unsigned from, unsigned to); -int block_page_mkwrite(struct vm_area_struct *vma, struct page *page, +int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, get_block_t get_block); void block_sync_page(struct page *); sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *); diff --git a/include/linux/mm.h b/include/linux/mm.h index 2223f8dfa568..aeabe953ba4f 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -135,6 +135,7 @@ extern pgprot_t protection_map[16]; #define FAULT_FLAG_WRITE 0x01 /* Fault was a write access */ #define FAULT_FLAG_NONLINEAR 0x02 /* Fault was via a nonlinear mapping */ +#define FAULT_FLAG_MKWRITE 0x04 /* Fault was mkwrite of existing pte */ /* * This interface is used by x86 PAT code to identify a pfn mapping that is @@ -187,7 +188,7 @@ struct vm_operations_struct { /* notification that a previously read-only page is about to become * writable, if an error is returned it will cause a SIGBUS */ - int (*page_mkwrite)(struct vm_area_struct *vma, struct page *page); + int (*page_mkwrite)(struct vm_area_struct *vma, struct vm_fault *vmf); /* called by access_process_vm when get_user_pages() fails, typically * for use by special VMAs that can switch between memory and hardware diff --git a/mm/memory.c b/mm/memory.c index 5b4ad5e4f98d..cf6873e91c6a 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1945,6 +1945,15 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, * get_user_pages(.write=1, .force=1). */ if (vma->vm_ops && vma->vm_ops->page_mkwrite) { + struct vm_fault vmf; + int tmp; + + vmf.virtual_address = (void __user *)(address & + PAGE_MASK); + vmf.pgoff = old_page->index; + vmf.flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE; + vmf.page = old_page; + /* * Notify the address space that the page is about to * become writable so that it can prohibit this or wait @@ -1956,8 +1965,12 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, page_cache_get(old_page); pte_unmap_unlock(page_table, ptl); - if (vma->vm_ops->page_mkwrite(vma, old_page) < 0) + tmp = vma->vm_ops->page_mkwrite(vma, &vmf); + if (unlikely(tmp & + (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) { + ret = tmp; goto unwritable_page; + } /* * Since we dropped the lock we need to revalidate @@ -2106,7 +2119,7 @@ oom: unwritable_page: page_cache_release(old_page); - return VM_FAULT_SIGBUS; + return ret; } /* @@ -2648,9 +2661,14 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, * to become writable */ if (vma->vm_ops->page_mkwrite) { + int tmp; + unlock_page(page); - if (vma->vm_ops->page_mkwrite(vma, page) < 0) { - ret = VM_FAULT_SIGBUS; + vmf.flags |= FAULT_FLAG_MKWRITE; + tmp = vma->vm_ops->page_mkwrite(vma, &vmf); + if (unlikely(tmp & + (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) { + ret = tmp; anon = 1; /* no anon but release vmf.page */ goto out_unlocked; } -- cgit v1.2.3 From f4112de6b679d84bd9b9681c7504be7bdfb7c7d5 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Tue, 31 Mar 2009 15:23:25 -0700 Subject: mm: introduce debug_kmap_atomic x86 has debug_kmap_atomic_prot() which is error checking function for kmap_atomic. It is usefull for the other architectures, although it needs CONFIG_TRACE_IRQFLAGS_SUPPORT. This patch exposes it to the other architectures. Signed-off-by: Akinobu Mita Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/highmem_32.c | 45 +-------------------------------------------- include/linux/highmem.h | 12 ++++++++++++ mm/highmem.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 58 insertions(+), 44 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c index 522db5e3d0bf..8126e8d1a2a4 100644 --- a/arch/x86/mm/highmem_32.c +++ b/arch/x86/mm/highmem_32.c @@ -19,49 +19,6 @@ void kunmap(struct page *page) kunmap_high(page); } -static void debug_kmap_atomic_prot(enum km_type type) -{ -#ifdef CONFIG_DEBUG_HIGHMEM - static unsigned warn_count = 10; - - if (unlikely(warn_count == 0)) - return; - - if (unlikely(in_interrupt())) { - if (in_irq()) { - if (type != KM_IRQ0 && type != KM_IRQ1 && - type != KM_BIO_SRC_IRQ && type != KM_BIO_DST_IRQ && - type != KM_BOUNCE_READ) { - WARN_ON(1); - warn_count--; - } - } else if (!irqs_disabled()) { /* softirq */ - if (type != KM_IRQ0 && type != KM_IRQ1 && - type != KM_SOFTIRQ0 && type != KM_SOFTIRQ1 && - type != KM_SKB_SUNRPC_DATA && - type != KM_SKB_DATA_SOFTIRQ && - type != KM_BOUNCE_READ) { - WARN_ON(1); - warn_count--; - } - } - } - - if (type == KM_IRQ0 || type == KM_IRQ1 || type == KM_BOUNCE_READ || - type == KM_BIO_SRC_IRQ || type == KM_BIO_DST_IRQ) { - if (!irqs_disabled()) { - WARN_ON(1); - warn_count--; - } - } else if (type == KM_SOFTIRQ0 || type == KM_SOFTIRQ1) { - if (irq_count() == 0 && !irqs_disabled()) { - WARN_ON(1); - warn_count--; - } - } -#endif -} - /* * kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap because * no global lock is needed and because the kmap code must perform a global TLB @@ -81,7 +38,7 @@ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot) if (!PageHighMem(page)) return page_address(page); - debug_kmap_atomic_prot(type); + debug_kmap_atomic(type); idx = type + KM_TYPE_NR*smp_processor_id(); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 13875ce9112a..7ff5c55f9b55 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -187,4 +187,16 @@ static inline void copy_highpage(struct page *to, struct page *from) kunmap_atomic(vto, KM_USER1); } +#if defined(CONFIG_DEBUG_HIGHMEM) && defined(CONFIG_TRACE_IRQFLAGS_SUPPORT) + +void debug_kmap_atomic(enum km_type type); + +#else + +static inline void debug_kmap_atomic(enum km_type type) +{ +} + +#endif + #endif /* _LINUX_HIGHMEM_H */ diff --git a/mm/highmem.c b/mm/highmem.c index 910198037bf5..68eb1d9b63fa 100644 --- a/mm/highmem.c +++ b/mm/highmem.c @@ -422,3 +422,48 @@ void __init page_address_init(void) } #endif /* defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL) */ + +#if defined(CONFIG_DEBUG_HIGHMEM) && defined(CONFIG_TRACE_IRQFLAGS_SUPPORT) + +void debug_kmap_atomic(enum km_type type) +{ + static unsigned warn_count = 10; + + if (unlikely(warn_count == 0)) + return; + + if (unlikely(in_interrupt())) { + if (in_irq()) { + if (type != KM_IRQ0 && type != KM_IRQ1 && + type != KM_BIO_SRC_IRQ && type != KM_BIO_DST_IRQ && + type != KM_BOUNCE_READ) { + WARN_ON(1); + warn_count--; + } + } else if (!irqs_disabled()) { /* softirq */ + if (type != KM_IRQ0 && type != KM_IRQ1 && + type != KM_SOFTIRQ0 && type != KM_SOFTIRQ1 && + type != KM_SKB_SUNRPC_DATA && + type != KM_SKB_DATA_SOFTIRQ && + type != KM_BOUNCE_READ) { + WARN_ON(1); + warn_count--; + } + } + } + + if (type == KM_IRQ0 || type == KM_IRQ1 || type == KM_BOUNCE_READ || + type == KM_BIO_SRC_IRQ || type == KM_BIO_DST_IRQ) { + if (!irqs_disabled()) { + WARN_ON(1); + warn_count--; + } + } else if (type == KM_SOFTIRQ0 || type == KM_SOFTIRQ1) { + if (irq_count() == 0 && !irqs_disabled()) { + WARN_ON(1); + warn_count--; + } + } +} + +#endif -- cgit v1.2.3 From 33925b25d2c00a29664f1994ab350a9bff70f7a2 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 31 Mar 2009 15:23:26 -0700 Subject: nommu: there is no mlock() for NOMMU, so don't provide the bits The mlock() facility does not exist for NOMMU since all mappings are effectively locked anyway, so we don't make the bits available when they're not useful. Signed-off-by: David Howells Reviewed-by: KOSAKI Motohiro Cc: Peter Zijlstra Cc: Greg Ungerer Cc: Johannes Weiner Cc: Rik van Riel Cc: Lee Schermerhorn Cc: Enrik Berkhan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page-flags.h | 20 +++++++++++++------- mm/Kconfig | 8 ++++++++ mm/internal.h | 8 +++++--- 3 files changed, 26 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 219a523ecdb0..61df1779b2a5 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -96,6 +96,8 @@ enum pageflags { PG_swapbacked, /* Page is backed by RAM/swap */ #ifdef CONFIG_UNEVICTABLE_LRU PG_unevictable, /* Page is "unevictable" */ +#endif +#ifdef CONFIG_HAVE_MLOCKED_PAGE_BIT PG_mlocked, /* Page is vma mlocked */ #endif #ifdef CONFIG_IA64_UNCACHED_ALLOCATOR @@ -234,20 +236,20 @@ PAGEFLAG_FALSE(SwapCache) #ifdef CONFIG_UNEVICTABLE_LRU PAGEFLAG(Unevictable, unevictable) __CLEARPAGEFLAG(Unevictable, unevictable) TESTCLEARFLAG(Unevictable, unevictable) +#else +PAGEFLAG_FALSE(Unevictable) TESTCLEARFLAG_FALSE(Unevictable) + SETPAGEFLAG_NOOP(Unevictable) CLEARPAGEFLAG_NOOP(Unevictable) + __CLEARPAGEFLAG_NOOP(Unevictable) +#endif +#ifdef CONFIG_HAVE_MLOCKED_PAGE_BIT #define MLOCK_PAGES 1 PAGEFLAG(Mlocked, mlocked) __CLEARPAGEFLAG(Mlocked, mlocked) TESTSCFLAG(Mlocked, mlocked) - #else - #define MLOCK_PAGES 0 PAGEFLAG_FALSE(Mlocked) SETPAGEFLAG_NOOP(Mlocked) TESTCLEARFLAG_FALSE(Mlocked) - -PAGEFLAG_FALSE(Unevictable) TESTCLEARFLAG_FALSE(Unevictable) - SETPAGEFLAG_NOOP(Unevictable) CLEARPAGEFLAG_NOOP(Unevictable) - __CLEARPAGEFLAG_NOOP(Unevictable) #endif #ifdef CONFIG_IA64_UNCACHED_ALLOCATOR @@ -367,9 +369,13 @@ static inline void __ClearPageTail(struct page *page) #ifdef CONFIG_UNEVICTABLE_LRU #define __PG_UNEVICTABLE (1 << PG_unevictable) -#define __PG_MLOCKED (1 << PG_mlocked) #else #define __PG_UNEVICTABLE 0 +#endif + +#ifdef CONFIG_HAVE_MLOCKED_PAGE_BIT +#define __PG_MLOCKED (1 << PG_mlocked) +#else #define __PG_MLOCKED 0 #endif diff --git a/mm/Kconfig b/mm/Kconfig index a5b77811fdf2..8c895973dfba 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -214,5 +214,13 @@ config UNEVICTABLE_LRU will use one page flag and increase the code size a little, say Y unless you know what you are doing. +config HAVE_MLOCK + bool + default y if MMU=y + +config HAVE_MLOCKED_PAGE_BIT + bool + default y if HAVE_MLOCK=y && UNEVICTABLE_LRU=y + config MMU_NOTIFIER bool diff --git a/mm/internal.h b/mm/internal.h index 478223b73a2a..987bb03fbdd8 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -63,6 +63,7 @@ static inline unsigned long page_order(struct page *page) return page_private(page); } +#ifdef CONFIG_HAVE_MLOCK extern long mlock_vma_pages_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); extern void munlock_vma_pages_range(struct vm_area_struct *vma, @@ -71,6 +72,7 @@ static inline void munlock_vma_pages_all(struct vm_area_struct *vma) { munlock_vma_pages_range(vma, vma->vm_start, vma->vm_end); } +#endif #ifdef CONFIG_UNEVICTABLE_LRU /* @@ -90,7 +92,7 @@ static inline void unevictable_migrate_page(struct page *new, struct page *old) } #endif -#ifdef CONFIG_UNEVICTABLE_LRU +#ifdef CONFIG_HAVE_MLOCKED_PAGE_BIT /* * Called only in fault path via page_evictable() for a new page * to determine if it's being mapped into a LOCKED vma. @@ -165,7 +167,7 @@ static inline void free_page_mlock(struct page *page) } } -#else /* CONFIG_UNEVICTABLE_LRU */ +#else /* CONFIG_HAVE_MLOCKED_PAGE_BIT */ static inline int is_mlocked_vma(struct vm_area_struct *v, struct page *p) { return 0; @@ -175,7 +177,7 @@ static inline void mlock_vma_page(struct page *page) { } static inline void mlock_migrate_page(struct page *new, struct page *old) { } static inline void free_page_mlock(struct page *page) { } -#endif /* CONFIG_UNEVICTABLE_LRU */ +#endif /* CONFIG_HAVE_MLOCKED_PAGE_BIT */ /* * Return the mem_map entry representing the 'offset' subpage within -- cgit v1.2.3 From 327c0e968645f2601a43f5ea7c19c7b3a5fa0a34 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Tue, 31 Mar 2009 15:23:31 -0700 Subject: vmscan: fix it to take care of nodemask try_to_free_pages() is used for the direct reclaim of up to SWAP_CLUSTER_MAX pages when watermarks are low. The caller to alloc_pages_nodemask() can specify a nodemask of nodes that are allowed to be used but this is not passed to try_to_free_pages(). This can lead to unnecessary reclaim of pages that are unusable by the caller and int the worst case lead to allocation failure as progress was not been make where it is needed. This patch passes the nodemask used for alloc_pages_nodemask() to try_to_free_pages(). Reviewed-by: KOSAKI Motohiro Acked-by: Mel Gorman Signed-off-by: KAMEZAWA Hiroyuki Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/buffer.c | 2 +- include/linux/swap.h | 2 +- mm/page_alloc.c | 3 ++- mm/vmscan.c | 13 +++++++++++-- 4 files changed, 15 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/fs/buffer.c b/fs/buffer.c index 0c14f8d52ee5..c77b848c3d43 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -290,7 +290,7 @@ static void free_more_memory(void) &zone); if (zone) try_to_free_pages(node_zonelist(nid, GFP_NOFS), 0, - GFP_NOFS); + GFP_NOFS, NULL); } } diff --git a/include/linux/swap.h b/include/linux/swap.h index d30215578877..b8b0c4ce83e6 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -212,7 +212,7 @@ static inline void lru_cache_add_active_file(struct page *page) /* linux/mm/vmscan.c */ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, - gfp_t gfp_mask); + gfp_t gfp_mask, nodemask_t *mask); extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem, gfp_t gfp_mask, bool noswap, unsigned int swappiness); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index cbd532161f68..0284e528748d 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1582,7 +1582,8 @@ nofail_alloc: reclaim_state.reclaimed_slab = 0; p->reclaim_state = &reclaim_state; - did_some_progress = try_to_free_pages(zonelist, order, gfp_mask); + did_some_progress = try_to_free_pages(zonelist, order, + gfp_mask, nodemask); p->reclaim_state = NULL; lockdep_clear_current_reclaim_state(); diff --git a/mm/vmscan.c b/mm/vmscan.c index f4619c6cd59e..06e72693b458 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -78,6 +78,12 @@ struct scan_control { /* Which cgroup do we reclaim from */ struct mem_cgroup *mem_cgroup; + /* + * Nodemask of nodes allowed by the caller. If NULL, all nodes + * are scanned. + */ + nodemask_t *nodemask; + /* Pluggable isolate pages callback */ unsigned long (*isolate_pages)(unsigned long nr, struct list_head *dst, unsigned long *scanned, int order, int mode, @@ -1538,7 +1544,8 @@ static void shrink_zones(int priority, struct zonelist *zonelist, struct zone *zone; sc->all_unreclaimable = 1; - for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { + for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx, + sc->nodemask) { if (!populated_zone(zone)) continue; /* @@ -1683,7 +1690,7 @@ out: } unsigned long try_to_free_pages(struct zonelist *zonelist, int order, - gfp_t gfp_mask) + gfp_t gfp_mask, nodemask_t *nodemask) { struct scan_control sc = { .gfp_mask = gfp_mask, @@ -1694,6 +1701,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, .order = order, .mem_cgroup = NULL, .isolate_pages = isolate_pages_global, + .nodemask = nodemask, }; return do_try_to_free_pages(zonelist, &sc); @@ -1714,6 +1722,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, .order = 0, .mem_cgroup = mem_cont, .isolate_pages = mem_cgroup_isolate_pages, + .nodemask = NULL, /* we don't care the placement */ }; struct zonelist *zonelist; -- cgit v1.2.3 From 9fab5619bdd7f84cdd22cc760778f759f9819a33 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 31 Mar 2009 15:23:33 -0700 Subject: shmem: writepage directly to swap Synopsis: if shmem_writepage calls swap_writepage directly, most shmem swap loads benefit, and a catastrophic interaction between SLUB and some flash storage is avoided. shmem_writepage() has always been peculiar in making no attempt to write: it has just transferred a shmem page from file cache to swap cache, then let that page make its way around the LRU again before being written and freed. The idea was that people use tmpfs because they want those pages to stay in RAM; so although we give it an overflow to swap, we should resist writing too soon, giving those pages a second chance before they can be reclaimed. That was always questionable, and I've toyed with this patch for years; but never had a clear justification to depart from the original design. It became more questionable in 2.6.28, when the split LRU patches classed shmem and tmpfs pages as SwapBacked rather than as file_cache: that in itself gives them more resistance to reclaim than normal file pages. I prepared this patch for 2.6.29, but the merge window arrived before I'd completed gathering statistics to justify sending it in. Then while comparing SLQB against SLUB, running SLUB on a laptop I'd habitually used with SLAB, I found SLUB to run my tmpfs kbuild swapping tests five times slower than SLAB or SLQB - other machines slower too, but nowhere near so bad. Simpler "cp -a" swapping tests showed the same. slub_max_order=0 brings sanity to all, but heavy swapping is too far from normal to justify such a tuning. The crucial factor on that laptop turns out to be that I'm using an SD card for swap. What happens is this: By default, SLUB uses order-2 pages for shmem_inode_cache (and many other fs inodes), so creating tmpfs files under memory pressure brings lumpy reclaim into play. One subpage of the order is chosen from the bottom of the LRU as usual, then the other three picked out from their random positions on the LRUs. In a tmpfs load, many of these pages will be ones which already passed through shmem_writepage, so already have swap allocated. And though their offsets on swap were probably allocated sequentially, now that the pages are picked off at random, their swap offsets are scattered. But the flash storage on the SD card is very sensitive to having its writes merged: once swap is written at scattered offsets, performance falls apart. Rotating disk seeks increase too, but less disastrously. So: stop giving shmem/tmpfs pages a second pass around the LRU, write them out to swap as soon as their swap has been allocated. It's surely possible to devise an artificial load which runs faster the old way, one whose sizing is such that the tmpfs pages on their second pass are the ones that are wanted again, and other pages not. But I've not yet found such a load: on all machines, under the loads I've tried, immediate swap_writepage speeds up shmem swapping: especially when using the SLUB allocator (and more effectively than slub_max_order=0), but also with the others; and it also reduces the variance between runs. How much faster varies widely: a factor of five is rare, 5% is common. One load which might have suffered: imagine a swapping shmem load in a limited mem_cgroup on a machine with plenty of memory. Before 2.6.29 the swapcache was not charged, and such a load would have run quickest with the shmem swapcache never written to swap. But now swapcache is charged, so even this load benefits from shmem_writepage directly to swap. Apologies for the #ifndef CONFIG_SWAP swap_writepage() stub in swap.h: it's silly because that will never get called; but refactoring shmem.c sensibly according to CONFIG_SWAP will be a separate task. Signed-off-by: Hugh Dickins Acked-by: Pekka Enberg Acked-by: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 5 +++++ mm/shmem.c | 3 +-- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index b8b0c4ce83e6..62d81435347a 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -382,6 +382,11 @@ static inline struct page *swapin_readahead(swp_entry_t swp, gfp_t gfp_mask, return NULL; } +static inline int swap_writepage(struct page *p, struct writeback_control *wbc) +{ + return 0; +} + static inline struct page *lookup_swap_cache(swp_entry_t swp) { return NULL; diff --git a/mm/shmem.c b/mm/shmem.c index 7ec78e24a30d..d94d2e9146bc 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1068,8 +1068,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) swap_duplicate(swap); BUG_ON(page_mapped(page)); page_cache_release(page); /* pagecache ref */ - set_page_dirty(page); - unlock_page(page); + swap_writepage(page, wbc); if (inode) { mutex_lock(&shmem_swaplist_mutex); /* move instead of add in case we're racing */ -- cgit v1.2.3 From a8af78982ff4c0b3731527b0217d286a343a3089 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Tue, 31 Mar 2009 15:23:37 -0700 Subject: pm: rework includes, remove arch ifdefs Make the following header file changes: - remove arch ifdefs and asm/suspend.h from linux/suspend.h - add asm/suspend.h to disk.c (for arch_prepare_suspend()) - add linux/io.h to swsusp.c (for ioremap()) - x86 32/64 bit compile fixes Signed-off-by: Magnus Damm Cc: Paul Mundt Acked-by: "Rafael J. Wysocki" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kernel/asm-offsets_32.c | 1 + arch/x86/kernel/asm-offsets_64.c | 1 + arch/x86/power/cpu_32.c | 1 + arch/x86/power/cpu_64.c | 1 + arch/x86/power/hibernate_64.c | 1 + include/linux/suspend.h | 3 --- kernel/power/disk.c | 1 + kernel/power/swsusp.c | 1 + 8 files changed, 7 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index fbf2f33e3080..5a6aa1c1162f 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c @@ -18,6 +18,7 @@ #include #include #include +#include #include diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 8793ab33e2c1..e72f062fb4b5 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -16,6 +16,7 @@ #include #include #include +#include #include diff --git a/arch/x86/power/cpu_32.c b/arch/x86/power/cpu_32.c index 274d06082f48..ce702c5b3a2c 100644 --- a/arch/x86/power/cpu_32.c +++ b/arch/x86/power/cpu_32.c @@ -12,6 +12,7 @@ #include #include #include +#include static struct saved_context saved_context; diff --git a/arch/x86/power/cpu_64.c b/arch/x86/power/cpu_64.c index e3b6cf70d62c..5343540f2607 100644 --- a/arch/x86/power/cpu_64.c +++ b/arch/x86/power/cpu_64.c @@ -15,6 +15,7 @@ #include #include #include +#include static void fix_processor_context(void); diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c index 6dd000dd7933..65fdc86e923f 100644 --- a/arch/x86/power/hibernate_64.c +++ b/arch/x86/power/hibernate_64.c @@ -14,6 +14,7 @@ #include #include #include +#include /* References to section boundaries */ extern const void __nosave_begin, __nosave_end; diff --git a/include/linux/suspend.h b/include/linux/suspend.h index c7d9bb1832ba..3e3a4364cbff 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -1,9 +1,6 @@ #ifndef _LINUX_SUSPEND_H #define _LINUX_SUSPEND_H -#if defined(CONFIG_X86) || defined(CONFIG_FRV) || defined(CONFIG_PPC32) || defined(CONFIG_PPC64) -#include -#endif #include #include #include diff --git a/kernel/power/disk.c b/kernel/power/disk.c index e886d1332a10..f3db382c2b2d 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "power.h" diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index 1ee6636414b2..78c35047586d 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c @@ -51,6 +51,7 @@ #include #include #include +#include #include "power.h" -- cgit v1.2.3 From 53d6660836f233df66490707365ab177e5fb2bb4 Mon Sep 17 00:00:00 2001 From: "J. R. Okajima" Date: Tue, 31 Mar 2009 15:23:43 -0700 Subject: loop: add ioctl to resize a loop device Add the ability to 'resize' the loop device on the fly. One practical application is a loop file with XFS filesystem, already mounted: You can easily enlarge the file (append some bytes) and then call ioctl(fd, LOOP_SET_CAPACITY, new); The loop driver will learn about the new size and you can use xfs_growfs later on, which will allow you to use full capacity of the loop file without the need to unmount. Test app: #include #include #include #include #include #include #include #include #include #include #include #define _GNU_SOURCE #include char *me; void usage(FILE *f) { fprintf(f, "%s [options] loop_dev [backend_file]\n" "-s, --set new_size_in_bytes\n" "\twhen backend_file is given, " "it will be expanded too while keeping the original contents\n", me); } struct option opts[] = { { .name = "set", .has_arg = 1, .flag = NULL, .val = 's' }, { .name = "help", .has_arg = 0, .flag = NULL, .val = 'h' } }; void err_size(char *name, __u64 old) { fprintf(stderr, "size must be larger than current %s (%llu)\n", name, old); } int main(int argc, char *argv[]) { int fd, err, c, i, bfd; ssize_t ssz; size_t sz; __u64 old, new, append; char a[BUFSIZ]; struct stat st; FILE *out; char *backend, *dev; err = EINVAL; out = stderr; me = argv[0]; new = 0; while ((c = getopt_long(argc, argv, "s:h", opts, &i)) != -1) { switch (c) { case 's': errno = 0; new = strtoull(optarg, NULL, 0); if (errno) { err = errno; perror(argv[i]); goto out; } break; case 'h': err = 0; out = stdout; goto err; default: perror(argv[i]); goto err; } } if (optind < argc) dev = argv[optind++]; else goto err; fd = open(dev, O_RDONLY); if (fd < 0) { err = errno; perror(dev); goto out; } err = ioctl(fd, BLKGETSIZE64, &old); if (err) { err = errno; perror("ioctl BLKGETSIZE64"); goto out; } if (!new) { printf("%llu\n", old); goto out; } if (new < old) { err = EINVAL; err_size(dev, old); goto out; } if (optind < argc) { backend = argv[optind++]; bfd = open(backend, O_WRONLY|O_APPEND); if (bfd < 0) { err = errno; perror(backend); goto out; } err = fstat(bfd, &st); if (err) { err = errno; perror(backend); goto out; } if (new < st.st_size) { err = EINVAL; err_size(backend, st.st_size); goto out; } append = new - st.st_size; sz = sizeof(a); while (append > 0) { if (append < sz) sz = append; ssz = write(bfd, a, sz); if (ssz != sz) { err = errno; perror(backend); goto out; } append -= sz; } err = fsync(bfd); if (err) { err = errno; perror(backend); goto out; } } err = ioctl(fd, LOOP_SET_CAPACITY, new); if (err) { err = errno; perror("ioctl LOOP_SET_CAPACITY"); } goto out; err: usage(out); out: return err; } Signed-off-by: J. R. Okajima Signed-off-by: Tomas Matejicek Cc: Cc: Karel Zak Cc: Jens Axboe Cc: Al Viro Cc: Christoph Hellwig Cc: Akinobu Mita Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/loop.c | 30 ++++++++++++++++++++++++++++++ include/linux/loop.h | 1 + 2 files changed, 31 insertions(+) (limited to 'include/linux') diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 2621ed2ce6d2..40b17d3b55a1 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1192,6 +1192,30 @@ loop_get_status64(struct loop_device *lo, struct loop_info64 __user *arg) { return err; } +static int loop_set_capacity(struct loop_device *lo, struct block_device *bdev) +{ + int err; + sector_t sec; + loff_t sz; + + err = -ENXIO; + if (unlikely(lo->lo_state != Lo_bound)) + goto out; + err = figure_loop_size(lo); + if (unlikely(err)) + goto out; + sec = get_capacity(lo->lo_disk); + /* the width of sector_t may be narrow for bit-shift */ + sz = sec; + sz <<= 9; + mutex_lock(&bdev->bd_mutex); + bd_set_size(bdev, sz); + mutex_unlock(&bdev->bd_mutex); + + out: + return err; +} + static int lo_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) { @@ -1224,6 +1248,11 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode, case LOOP_GET_STATUS64: err = loop_get_status64(lo, (struct loop_info64 __user *) arg); break; + case LOOP_SET_CAPACITY: + err = -EPERM; + if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) + err = loop_set_capacity(lo, bdev); + break; default: err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL; } @@ -1371,6 +1400,7 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode, lo, (struct compat_loop_info __user *) arg); mutex_unlock(&lo->lo_ctl_mutex); break; + case LOOP_SET_CAPACITY: case LOOP_CLR_FD: case LOOP_GET_STATUS64: case LOOP_SET_STATUS64: diff --git a/include/linux/loop.h b/include/linux/loop.h index 6ffd6db5bb0d..40725447f5e0 100644 --- a/include/linux/loop.h +++ b/include/linux/loop.h @@ -160,5 +160,6 @@ int loop_unregister_transfer(int number); #define LOOP_SET_STATUS64 0x4C04 #define LOOP_GET_STATUS64 0x4C05 #define LOOP_CHANGE_FD 0x4C06 +#define LOOP_SET_CAPACITY 0x4C07 #endif -- cgit v1.2.3 From c2d7543851849a6923680cdd7e1047ed1a84a1c5 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Tue, 31 Mar 2009 15:23:46 -0700 Subject: filesystem freeze: allow SysRq emergency thaw to thaw frozen filesystems Now that the filesystem freeze operation has been elevated to the VFS, and is just an ioctl away, some sort of safety net for unintentionally frozen root filesystems may be in order. The timeout thaw originally proposed did not get merged, but perhaps something like this would be useful in emergencies. For example, freeze /path/to/mountpoint may freeze your root filesystem if you forgot that you had that unmounted. I chose 'j' as the last remaining character other than 'h' which is sort of reserved for help (because help is generated on any unknown character). I've tested this on a non-root fs with multiple (nested) freezers, as well as on a system rendered unresponsive due to a frozen root fs. [randy.dunlap@oracle.com: emergency thaw only if CONFIG_BLOCK enabled] Signed-off-by: Eric Sandeen Cc: Takashi Sato Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/sysrq.txt | 5 +++++ drivers/char/sysrq.c | 19 ++++++++++++++++++- fs/buffer.c | 33 +++++++++++++++++++++++++++++++++ include/linux/fs.h | 1 + 4 files changed, 57 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/Documentation/sysrq.txt b/Documentation/sysrq.txt index 9e592c718afb..afa2946892da 100644 --- a/Documentation/sysrq.txt +++ b/Documentation/sysrq.txt @@ -81,6 +81,8 @@ On all - write a character to /proc/sysrq-trigger. e.g.: 'i' - Send a SIGKILL to all processes, except for init. +'j' - Forcibly "Just thaw it" - filesystems frozen by the FIFREEZE ioctl. + 'k' - Secure Access Key (SAK) Kills all programs on the current virtual console. NOTE: See important comments below in SAK section. @@ -160,6 +162,9 @@ t'E'rm and k'I'll are useful if you have some sort of runaway process you are unable to kill any other way, especially if it's spawning other processes. +"'J'ust thaw it" is useful if your system becomes unresponsive due to a frozen +(probably root) filesystem via the FIFREEZE ioctl. + * Sometimes SysRq seems to get 'stuck' after using it, what can I do? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ That happens to me, also. I've found that tapping shift, alt, and control diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c index 33a9351c896d..5afe7316c72e 100644 --- a/drivers/char/sysrq.c +++ b/drivers/char/sysrq.c @@ -346,6 +346,19 @@ static struct sysrq_key_op sysrq_moom_op = { .enable_mask = SYSRQ_ENABLE_SIGNAL, }; +#ifdef CONFIG_BLOCK +static void sysrq_handle_thaw(int key, struct tty_struct *tty) +{ + emergency_thaw_all(); +} +static struct sysrq_key_op sysrq_thaw_op = { + .handler = sysrq_handle_thaw, + .help_msg = "thaw-filesystems(J)", + .action_msg = "Emergency Thaw of all frozen filesystems", + .enable_mask = SYSRQ_ENABLE_SIGNAL, +}; +#endif + static void sysrq_handle_kill(int key, struct tty_struct *tty) { send_sig_all(SIGKILL); @@ -396,9 +409,13 @@ static struct sysrq_key_op *sysrq_key_table[36] = { &sysrq_moom_op, /* f */ /* g: May be registered by ppc for kgdb */ NULL, /* g */ - NULL, /* h */ + NULL, /* h - reserved for help */ &sysrq_kill_op, /* i */ +#ifdef CONFIG_BLOCK + &sysrq_thaw_op, /* j */ +#else NULL, /* j */ +#endif &sysrq_SAK_op, /* k */ #ifdef CONFIG_SMP &sysrq_showallcpus_op, /* l */ diff --git a/fs/buffer.c b/fs/buffer.c index c77b848c3d43..f5f8b15a6e40 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -547,6 +547,39 @@ repeat: return err; } +void do_thaw_all(unsigned long unused) +{ + struct super_block *sb; + char b[BDEVNAME_SIZE]; + + spin_lock(&sb_lock); +restart: + list_for_each_entry(sb, &super_blocks, s_list) { + sb->s_count++; + spin_unlock(&sb_lock); + down_read(&sb->s_umount); + while (sb->s_bdev && !thaw_bdev(sb->s_bdev, sb)) + printk(KERN_WARNING "Emergency Thaw on %s\n", + bdevname(sb->s_bdev, b)); + up_read(&sb->s_umount); + spin_lock(&sb_lock); + if (__put_super_and_need_restart(sb)) + goto restart; + } + spin_unlock(&sb_lock); + printk(KERN_WARNING "Emergency Thaw complete\n"); +} + +/** + * emergency_thaw_all -- forcibly thaw every frozen filesystem + * + * Used for emergency unfreeze of all filesystems via SysRq + */ +void emergency_thaw_all(void) +{ + pdflush_operation(do_thaw_all, 0); +} + /** * sync_mapping_buffers - write out & wait upon a mapping's "associated" buffers * @mapping: the mapping which wants those buffers written diff --git a/include/linux/fs.h b/include/linux/fs.h index 87e7bfc5ebd7..61211ad823fe 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1878,6 +1878,7 @@ extern struct block_device *open_by_devnum(dev_t, fmode_t); extern void invalidate_bdev(struct block_device *); extern int sync_blockdev(struct block_device *bdev); extern struct super_block *freeze_bdev(struct block_device *); +extern void emergency_thaw_all(void); extern int thaw_bdev(struct block_device *bdev, struct super_block *sb); extern int fsync_bdev(struct block_device *); extern int fsync_super(struct super_block *); -- cgit v1.2.3 From 311d07611e8b354cc1ee6546e4c574c01111adc8 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Tue, 31 Mar 2009 15:23:51 -0700 Subject: introduce pr_cont() macro We cover all log-levels by pr_... macros except KERN_CONT one. Add it for convenience. Signed-off-by: Cyrill Gorcunov Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Harvey Harrison Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index f81d80f47dcb..e720b0da7751 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -353,6 +353,8 @@ static inline char *pack_hex_byte(char *buf, u8 byte) printk(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__) #define pr_info(fmt, ...) \ printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) +#define pr_cont(fmt, ...) \ + printk(KERN_CONT fmt, ##__VA_ARGS__) /* If you are writing a driver, please use dev_dbg instead */ #if defined(DEBUG) -- cgit v1.2.3 From bcd0b235bf3808dec5115c381cd55568f63b85f0 Mon Sep 17 00:00:00 2001 From: Davide Libenzi Date: Tue, 31 Mar 2009 15:24:18 -0700 Subject: eventfd: improve support for semaphore-like behavior People started using eventfd in a semaphore-like way where before they were using pipes. That is, counter-based resource access. Where a "wait()" returns immediately by decrementing the counter by one, if counter is greater than zero. Otherwise will wait. And where a "post(count)" will add count to the counter releasing the appropriate amount of waiters. If eventfd the "post" (write) part is fine, while the "wait" (read) does not dequeue 1, but the whole counter value. The problem with eventfd is that a read() on the fd returns and wipes the whole counter, making the use of it as semaphore a little bit more cumbersome. You can do a read() followed by a write() of COUNTER-1, but IMO it's pretty easy and cheap to make this work w/out extra steps. This patch introduces a new eventfd flag that tells eventfd to only dequeue 1 from the counter, allowing simple read/write to make it behave like a semaphore. Simple test here: http://www.xmailserver.org/eventfd-sem.c To be back-compatible with earlier kernels, userspace applications should probe for the availability of this feature via #ifdef EFD_SEMAPHORE fd = eventfd2 (CNT, EFD_SEMAPHORE); if (fd == -1 && errno == EINVAL) #else #endif Signed-off-by: Davide Libenzi Cc: Tested-by: Michael Kerrisk Cc: Ulrich Drepper Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/eventfd.c | 20 +++++++++++--------- include/linux/eventfd.h | 12 +++++++++++- 2 files changed, 22 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/fs/eventfd.c b/fs/eventfd.c index 5de2c2db3aa2..91c0829a7035 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c @@ -28,6 +28,7 @@ struct eventfd_ctx { * issue a wakeup. */ __u64 count; + unsigned int flags; }; /* @@ -87,22 +88,20 @@ static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count, { struct eventfd_ctx *ctx = file->private_data; ssize_t res; - __u64 ucnt; + __u64 ucnt = 0; DECLARE_WAITQUEUE(wait, current); if (count < sizeof(ucnt)) return -EINVAL; spin_lock_irq(&ctx->wqh.lock); res = -EAGAIN; - ucnt = ctx->count; - if (ucnt > 0) + if (ctx->count > 0) res = sizeof(ucnt); else if (!(file->f_flags & O_NONBLOCK)) { __add_wait_queue(&ctx->wqh, &wait); for (res = 0;;) { set_current_state(TASK_INTERRUPTIBLE); if (ctx->count > 0) { - ucnt = ctx->count; res = sizeof(ucnt); break; } @@ -117,8 +116,9 @@ static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count, __remove_wait_queue(&ctx->wqh, &wait); __set_current_state(TASK_RUNNING); } - if (res > 0) { - ctx->count = 0; + if (likely(res > 0)) { + ucnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count; + ctx->count -= ucnt; if (waitqueue_active(&ctx->wqh)) wake_up_locked(&ctx->wqh); } @@ -166,7 +166,7 @@ static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t c __remove_wait_queue(&ctx->wqh, &wait); __set_current_state(TASK_RUNNING); } - if (res > 0) { + if (likely(res > 0)) { ctx->count += ucnt; if (waitqueue_active(&ctx->wqh)) wake_up_locked(&ctx->wqh); @@ -207,7 +207,7 @@ SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags) BUILD_BUG_ON(EFD_CLOEXEC != O_CLOEXEC); BUILD_BUG_ON(EFD_NONBLOCK != O_NONBLOCK); - if (flags & ~(EFD_CLOEXEC | EFD_NONBLOCK)) + if (flags & ~EFD_FLAGS_SET) return -EINVAL; ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); @@ -216,13 +216,14 @@ SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags) init_waitqueue_head(&ctx->wqh); ctx->count = count; + ctx->flags = flags; /* * When we call this, the initialization must be complete, since * anon_inode_getfd() will install the fd. */ fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx, - flags & (O_CLOEXEC | O_NONBLOCK)); + flags & EFD_SHARED_FCNTL_FLAGS); if (fd < 0) kfree(ctx); return fd; @@ -232,3 +233,4 @@ SYSCALL_DEFINE1(eventfd, unsigned int, count) { return sys_eventfd2(count, 0); } + diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h index a667637b54e3..f45a8ae5f828 100644 --- a/include/linux/eventfd.h +++ b/include/linux/eventfd.h @@ -13,10 +13,20 @@ /* For O_CLOEXEC and O_NONBLOCK */ #include -/* Flags for eventfd2. */ +/* + * CAREFUL: Check include/asm-generic/fcntl.h when defining + * new flags, since they might collide with O_* ones. We want + * to re-use O_* flags that couldn't possibly have a meaning + * from eventfd, in order to leave a free define-space for + * shared O_* flags. + */ +#define EFD_SEMAPHORE (1 << 0) #define EFD_CLOEXEC O_CLOEXEC #define EFD_NONBLOCK O_NONBLOCK +#define EFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK) +#define EFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS | EFD_SEMAPHORE) + struct file *eventfd_fget(int fd); int eventfd_signal(struct file *file, int n); -- cgit v1.2.3 From 4ede816ac36e027db5fe0051ad9c73f76db63772 Mon Sep 17 00:00:00 2001 From: Davide Libenzi Date: Tue, 31 Mar 2009 15:24:20 -0700 Subject: epoll keyed wakeups: add __wake_up_locked_key() and __wake_up_sync_key() This patchset introduces wakeup hints for some of the most popular (from epoll POV) devices, so that epoll code can avoid spurious wakeups on its waiters. The problem with epoll is that the callback-based wakeups do not, ATM, carry any information about the events the wakeup is related to. So the only choice epoll has (not being able to call f_op->poll() from inside the callback), is to add the file* to a ready-list and resolve the real events later on, at epoll_wait() (or its own f_op->poll()) time. This can cause spurious wakeups, since the wake_up() itself might be for an event the caller is not interested into. The rate of these spurious wakeup can be pretty high in case of many network sockets being monitored. By allowing devices to report the events the wakeups refer to (at least the two major classes - POLLIN/POLLOUT), we are able to spare useless wakeups by proper handling inside the epoll's poll callback. Epoll will have in any case to call f_op->poll() on the file* later on, since the change to be done in order to have the full event set sent via wakeup, is too invasive for the way our f_op->poll() system works (the full event set is calculated inside the poll function - there are too many of them to even start thinking the change - also poll/select would need change too). Epoll is changed in a way that both devices which send event hints, and the ones that don't, are correctly handled. The former will gain some efficiency though. As a general rule for devices, would be to add an event mask by using key-aware wakeup macros, when making up poll wait queues. I tested it (together with the epoll's poll fix patch Andrew has in -mm) and wakeups for the supported devices are correctly filtered. Test program available here: http://www.xmailserver.org/epoll_test.c This patch: Nothing revolutionary here. Just using the available "key" that our wakeup core already support. The __wake_up_locked_key() was no brainer, since both __wake_up_locked() and __wake_up_locked_key() are thin wrappers around __wake_up_common(). The __wake_up_sync() function had a body, so the choice was between borrowing the body for __wake_up_sync_key() and calling it from __wake_up_sync(), or make an inline and calling it from both. I chose the former since in most archs it all resolves to "mov $0, REG; jmp ADDR". Signed-off-by: Davide Libenzi Cc: Alan Cox Cc: Ingo Molnar Cc: David Miller Cc: William Lee Irwin III Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/wait.h | 7 +++++-- kernel/sched.c | 23 +++++++++++++++++++---- 2 files changed, 24 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/wait.h b/include/linux/wait.h index a210ede73b56..0d2eeb03a718 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -135,8 +135,11 @@ static inline void __remove_wait_queue(wait_queue_head_t *head, void __wake_up_common(wait_queue_head_t *q, unsigned int mode, int nr_exclusive, int sync, void *key); void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key); -extern void __wake_up_locked(wait_queue_head_t *q, unsigned int mode); -extern void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr); +void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key); +void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, int nr, + void *key); +void __wake_up_locked(wait_queue_head_t *q, unsigned int mode); +void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr); void __wake_up_bit(wait_queue_head_t *, void *, int); int __wait_on_bit(wait_queue_head_t *, struct wait_bit_queue *, int (*)(void *), unsigned); int __wait_on_bit_lock(wait_queue_head_t *, struct wait_bit_queue *, int (*)(void *), unsigned); diff --git a/kernel/sched.c b/kernel/sched.c index 196d48babbef..73513f4e19df 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5196,11 +5196,17 @@ void __wake_up_locked(wait_queue_head_t *q, unsigned int mode) __wake_up_common(q, mode, 1, 0, NULL); } +void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key) +{ + __wake_up_common(q, mode, 1, 0, key); +} + /** - * __wake_up_sync - wake up threads blocked on a waitqueue. + * __wake_up_sync_key - wake up threads blocked on a waitqueue. * @q: the waitqueue * @mode: which threads * @nr_exclusive: how many wake-one or wake-many threads to wake up + * @key: opaque value to be passed to wakeup targets * * The sync wakeup differs that the waker knows that it will schedule * away soon, so while the target thread will be woken up, it will not @@ -5209,8 +5215,8 @@ void __wake_up_locked(wait_queue_head_t *q, unsigned int mode) * * On UP it can prevent extra preemption. */ -void -__wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive) +void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, + int nr_exclusive, void *key) { unsigned long flags; int sync = 1; @@ -5222,9 +5228,18 @@ __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive) sync = 0; spin_lock_irqsave(&q->lock, flags); - __wake_up_common(q, mode, nr_exclusive, sync, NULL); + __wake_up_common(q, mode, nr_exclusive, sync, key); spin_unlock_irqrestore(&q->lock, flags); } +EXPORT_SYMBOL_GPL(__wake_up_sync_key); + +/* + * __wake_up_sync - see __wake_up_sync_key() + */ +void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive) +{ + __wake_up_sync_key(q, mode, nr_exclusive, NULL); +} EXPORT_SYMBOL_GPL(__wake_up_sync); /* For internal use only */ /** -- cgit v1.2.3 From c0da37753695e010776ccf2200a5731e0f88a9f3 Mon Sep 17 00:00:00 2001 From: Davide Libenzi Date: Tue, 31 Mar 2009 15:24:20 -0700 Subject: epoll keyed wakeups: introduce new *_poll() wakeup macros Introduce new wakeup macros that allow passing an event mask to the wakeup targets. They exactly mimic their non-_poll() counterpart, with the added event mask passing capability. I did add only the ones currently requested, avoiding the _nr() and _all() for the moment. Signed-off-by: Davide Libenzi Cc: Alan Cox Cc: Ingo Molnar Cc: David Miller Cc: William Lee Irwin III Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/wait.h | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/wait.h b/include/linux/wait.h index 0d2eeb03a718..5d631c17eaee 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -158,21 +158,17 @@ wait_queue_head_t *bit_waitqueue(void *, int); #define wake_up_interruptible_all(x) __wake_up(x, TASK_INTERRUPTIBLE, 0, NULL) #define wake_up_interruptible_sync(x) __wake_up_sync((x), TASK_INTERRUPTIBLE, 1) -#ifdef CONFIG_DEBUG_LOCK_ALLOC /* - * macro to avoid include hell + * Wakeup macros to be used to report events to the targets. */ -#define wake_up_nested(x, s) \ -do { \ - unsigned long flags; \ - \ - spin_lock_irqsave_nested(&(x)->lock, flags, (s)); \ - wake_up_locked(x); \ - spin_unlock_irqrestore(&(x)->lock, flags); \ -} while (0) -#else -#define wake_up_nested(x, s) wake_up(x) -#endif +#define wake_up_poll(x, m) \ + __wake_up(x, TASK_NORMAL, 1, (void *) (m)) +#define wake_up_locked_poll(x, m) \ + __wake_up_locked_key((x), TASK_NORMAL, (void *) (m)) +#define wake_up_interruptible_poll(x, m) \ + __wake_up(x, TASK_INTERRUPTIBLE, 1, (void *) (m)) +#define wake_up_interruptible_sync_poll(x, m) \ + __wake_up_sync_key((x), TASK_INTERRUPTIBLE, 1, (void *) (m)) #define __wait_event(wq, condition) \ do { \ -- cgit v1.2.3 From 364fdbc00fbdd409ade63500710123fe323aa164 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Tue, 31 Mar 2009 15:24:36 -0700 Subject: spi_mpc83xx: rework chip selects handling The main purpose of this patch is to pass 'struct spi_device' to the chip select handling routines. This is needed so that we could implement full-fledged OpenFirmware support for this driver. While at it, also: - Replace two {de,activate}_cs routines by single cs_contol(). - Don't duplicate platform data callbacks in mpc83xx_spi struct. Signed-off-by: Anton Vorontsov Cc: David Brownell Cc: Benjamin Herrenschmidt Cc: Kumar Gala Cc: Grant Likely Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/platforms/83xx/mpc832x_rdb.c | 16 ++++------------ arch/powerpc/sysdev/fsl_soc.c | 14 ++++++-------- arch/powerpc/sysdev/fsl_soc.h | 5 +++-- drivers/spi/spi_mpc83xx.c | 20 +++++++------------- include/linux/fsl_devices.h | 5 +++-- 5 files changed, 23 insertions(+), 37 deletions(-) (limited to 'include/linux') diff --git a/arch/powerpc/platforms/83xx/mpc832x_rdb.c b/arch/powerpc/platforms/83xx/mpc832x_rdb.c index 2a1295f19832..28e23cde64a1 100644 --- a/arch/powerpc/platforms/83xx/mpc832x_rdb.c +++ b/arch/powerpc/platforms/83xx/mpc832x_rdb.c @@ -39,16 +39,10 @@ #endif #ifdef CONFIG_QUICC_ENGINE -static void mpc83xx_spi_activate_cs(u8 cs, u8 polarity) +static void mpc83xx_spi_cs_control(struct spi_device *spi, bool on) { - pr_debug("%s %d %d\n", __func__, cs, polarity); - par_io_data_set(3, 13, polarity); -} - -static void mpc83xx_spi_deactivate_cs(u8 cs, u8 polarity) -{ - pr_debug("%s %d %d\n", __func__, cs, polarity); - par_io_data_set(3, 13, !polarity); + pr_debug("%s %d %d\n", __func__, spi->chip_select, on); + par_io_data_set(3, 13, on); } static struct mmc_spi_platform_data mpc832x_mmc_pdata = { @@ -74,9 +68,7 @@ static int __init mpc832x_spi_init(void) par_io_config_pin(3, 14, 2, 0, 0, 0); /* SD_INSERT, I */ par_io_config_pin(3, 15, 2, 0, 0, 0); /* SD_PROTECT,I */ - return fsl_spi_init(&mpc832x_spi_boardinfo, 1, - mpc83xx_spi_activate_cs, - mpc83xx_spi_deactivate_cs); + return fsl_spi_init(&mpc832x_spi_boardinfo, 1, mpc83xx_spi_cs_control); } machine_device_initcall(mpc832x_rdb, mpc832x_spi_init); #endif /* CONFIG_QUICC_ENGINE */ diff --git a/arch/powerpc/sysdev/fsl_soc.c b/arch/powerpc/sysdev/fsl_soc.c index a01c89d3f9bd..a46c1c867930 100644 --- a/arch/powerpc/sysdev/fsl_soc.c +++ b/arch/powerpc/sysdev/fsl_soc.c @@ -420,8 +420,8 @@ arch_initcall(fsl_usb_of_init); static int __init of_fsl_spi_probe(char *type, char *compatible, u32 sysclk, struct spi_board_info *board_infos, unsigned int num_board_infos, - void (*activate_cs)(u8 cs, u8 polarity), - void (*deactivate_cs)(u8 cs, u8 polarity)) + void (*cs_control)(struct spi_device *dev, + bool on)) { struct device_node *np; unsigned int i = 0; @@ -433,8 +433,7 @@ static int __init of_fsl_spi_probe(char *type, char *compatible, u32 sysclk, struct resource res[2]; struct platform_device *pdev; struct fsl_spi_platform_data pdata = { - .activate_cs = activate_cs, - .deactivate_cs = deactivate_cs, + .cs_control = cs_control, }; memset(res, 0, sizeof(res)); @@ -501,8 +500,7 @@ next: int __init fsl_spi_init(struct spi_board_info *board_infos, unsigned int num_board_infos, - void (*activate_cs)(u8 cs, u8 polarity), - void (*deactivate_cs)(u8 cs, u8 polarity)) + void (*cs_control)(struct spi_device *spi, bool on)) { u32 sysclk = -1; int ret; @@ -518,10 +516,10 @@ int __init fsl_spi_init(struct spi_board_info *board_infos, } ret = of_fsl_spi_probe(NULL, "fsl,spi", sysclk, board_infos, - num_board_infos, activate_cs, deactivate_cs); + num_board_infos, cs_control); if (!ret) of_fsl_spi_probe("spi", "fsl_spi", sysclk, board_infos, - num_board_infos, activate_cs, deactivate_cs); + num_board_infos, cs_control); return spi_register_board_info(board_infos, num_board_infos); } diff --git a/arch/powerpc/sysdev/fsl_soc.h b/arch/powerpc/sysdev/fsl_soc.h index 9c744e4285a0..b5f3456780b8 100644 --- a/arch/powerpc/sysdev/fsl_soc.h +++ b/arch/powerpc/sysdev/fsl_soc.h @@ -4,6 +4,8 @@ #include +struct spi_device; + extern phys_addr_t get_immrbase(void); #if defined(CONFIG_CPM2) || defined(CONFIG_QUICC_ENGINE) || defined(CONFIG_8xx) extern u32 get_brgfreq(void); @@ -19,8 +21,7 @@ struct device_node; extern int fsl_spi_init(struct spi_board_info *board_infos, unsigned int num_board_infos, - void (*activate_cs)(u8 cs, u8 polarity), - void (*deactivate_cs)(u8 cs, u8 polarity)); + void (*cs_control)(struct spi_device *spi, bool on)); extern void fsl_rstcr_restart(char *cmd); diff --git a/drivers/spi/spi_mpc83xx.c b/drivers/spi/spi_mpc83xx.c index df6420029004..b95085a46f90 100644 --- a/drivers/spi/spi_mpc83xx.c +++ b/drivers/spi/spi_mpc83xx.c @@ -89,9 +89,6 @@ struct mpc83xx_spi { bool qe_mode; - void (*activate_cs) (u8 cs, u8 polarity); - void (*deactivate_cs) (u8 cs, u8 polarity); - u8 busy; struct workqueue_struct *workqueue; @@ -153,15 +150,14 @@ MPC83XX_SPI_TX_BUF(u32) static void mpc83xx_spi_chipselect(struct spi_device *spi, int value) { - struct mpc83xx_spi *mpc83xx_spi; - u8 pol = spi->mode & SPI_CS_HIGH ? 1 : 0; + struct mpc83xx_spi *mpc83xx_spi = spi_master_get_devdata(spi->master); + struct fsl_spi_platform_data *pdata = spi->dev.parent->platform_data; + bool pol = spi->mode & SPI_CS_HIGH; struct spi_mpc83xx_cs *cs = spi->controller_state; - mpc83xx_spi = spi_master_get_devdata(spi->master); - if (value == BITBANG_CS_INACTIVE) { - if (mpc83xx_spi->deactivate_cs) - mpc83xx_spi->deactivate_cs(spi->chip_select, pol); + if (pdata->cs_control) + pdata->cs_control(spi, !pol); } if (value == BITBANG_CS_ACTIVE) { @@ -186,8 +182,8 @@ static void mpc83xx_spi_chipselect(struct spi_device *spi, int value) mpc83xx_spi_write_reg(mode, regval); local_irq_restore(flags); } - if (mpc83xx_spi->activate_cs) - mpc83xx_spi->activate_cs(spi->chip_select, pol); + if (pdata->cs_control) + pdata->cs_control(spi, pol); } } @@ -582,8 +578,6 @@ static int __init mpc83xx_spi_probe(struct platform_device *dev) master->cleanup = mpc83xx_spi_cleanup; mpc83xx_spi = spi_master_get_devdata(master); - mpc83xx_spi->activate_cs = pdata->activate_cs; - mpc83xx_spi->deactivate_cs = pdata->deactivate_cs; mpc83xx_spi->qe_mode = pdata->qe_mode; mpc83xx_spi->get_rx = mpc83xx_spi_rx_buf_u8; mpc83xx_spi->get_tx = mpc83xx_spi_tx_buf_u8; diff --git a/include/linux/fsl_devices.h b/include/linux/fsl_devices.h index d9051d717d27..7bc1b643d370 100644 --- a/include/linux/fsl_devices.h +++ b/include/linux/fsl_devices.h @@ -95,14 +95,15 @@ struct fsl_usb2_platform_data { #define FSL_USB2_PORT0_ENABLED 0x00000001 #define FSL_USB2_PORT1_ENABLED 0x00000002 +struct spi_device; + struct fsl_spi_platform_data { u32 initial_spmode; /* initial SPMODE value */ u16 bus_num; bool qe_mode; /* board specific information */ u16 max_chipselect; - void (*activate_cs)(u8 cs, u8 polarity); - void (*deactivate_cs)(u8 cs, u8 polarity); + void (*cs_control)(struct spi_device *spi, bool on); u32 sysclk; }; -- cgit v1.2.3 From 35b4b3c0c1265f1a7342574be393c157601401f0 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Tue, 31 Mar 2009 15:24:37 -0700 Subject: spi_mpc83xx: add OF platform driver bindings Implement full support for OF SPI bindings. Now the driver can manage its own chip selects without any help from the board files and/or fsl_soc constructors. The "legacy" code is well isolated and could be removed as time goes by. Signed-off-by: Anton Vorontsov Cc: David Brownell Cc: Benjamin Herrenschmidt Cc: Kumar Gala Cc: Grant Likely Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/spi/spi_mpc83xx.c | 330 +++++++++++++++++++++++++++++++++++++++----- include/linux/fsl_devices.h | 2 +- 2 files changed, 295 insertions(+), 37 deletions(-) (limited to 'include/linux') diff --git a/drivers/spi/spi_mpc83xx.c b/drivers/spi/spi_mpc83xx.c index b95085a46f90..f4573a96af24 100644 --- a/drivers/spi/spi_mpc83xx.c +++ b/drivers/spi/spi_mpc83xx.c @@ -14,6 +14,8 @@ #include #include #include +#include +#include #include #include #include @@ -23,7 +25,13 @@ #include #include #include +#include +#include +#include +#include +#include +#include #include #include @@ -79,7 +87,7 @@ struct mpc83xx_spi { u32(*get_tx) (struct mpc83xx_spi *); unsigned int count; - int irq; + unsigned int irq; unsigned nsecs; /* (clock cycle time)/2 */ @@ -543,36 +551,23 @@ static void mpc83xx_spi_cleanup(struct spi_device *spi) kfree(spi->controller_state); } -static int __init mpc83xx_spi_probe(struct platform_device *dev) +static struct spi_master * __devinit +mpc83xx_spi_probe(struct device *dev, struct resource *mem, unsigned int irq) { + struct fsl_spi_platform_data *pdata = dev->platform_data; struct spi_master *master; struct mpc83xx_spi *mpc83xx_spi; - struct fsl_spi_platform_data *pdata; - struct resource *r; u32 regval; int ret = 0; - /* Get resources(memory, IRQ) associated with the device */ - master = spi_alloc_master(&dev->dev, sizeof(struct mpc83xx_spi)); - + master = spi_alloc_master(dev, sizeof(struct mpc83xx_spi)); if (master == NULL) { ret = -ENOMEM; goto err; } - platform_set_drvdata(dev, master); - pdata = dev->dev.platform_data; - - if (pdata == NULL) { - ret = -ENODEV; - goto free_master; - } + dev_set_drvdata(dev, master); - r = platform_get_resource(dev, IORESOURCE_MEM, 0); - if (r == NULL) { - ret = -ENODEV; - goto free_master; - } master->setup = mpc83xx_spi_setup; master->transfer = mpc83xx_spi_transfer; master->cleanup = mpc83xx_spi_cleanup; @@ -592,18 +587,13 @@ static int __init mpc83xx_spi_probe(struct platform_device *dev) init_completion(&mpc83xx_spi->done); - mpc83xx_spi->base = ioremap(r->start, r->end - r->start + 1); + mpc83xx_spi->base = ioremap(mem->start, mem->end - mem->start + 1); if (mpc83xx_spi->base == NULL) { ret = -ENOMEM; goto put_master; } - mpc83xx_spi->irq = platform_get_irq(dev, 0); - - if (mpc83xx_spi->irq < 0) { - ret = -ENXIO; - goto unmap_io; - } + mpc83xx_spi->irq = irq; /* Register for SPI Interrupt */ ret = request_irq(mpc83xx_spi->irq, mpc83xx_spi_irq, @@ -645,9 +635,9 @@ static int __init mpc83xx_spi_probe(struct platform_device *dev) printk(KERN_INFO "%s: MPC83xx SPI Controller driver at 0x%p (irq = %d)\n", - dev_name(&dev->dev), mpc83xx_spi->base, mpc83xx_spi->irq); + dev_name(dev), mpc83xx_spi->base, mpc83xx_spi->irq); - return ret; + return master; unreg_master: destroy_workqueue(mpc83xx_spi->workqueue); @@ -657,18 +647,16 @@ unmap_io: iounmap(mpc83xx_spi->base); put_master: spi_master_put(master); -free_master: - kfree(master); err: - return ret; + return ERR_PTR(ret); } -static int __exit mpc83xx_spi_remove(struct platform_device *dev) +static int __devexit mpc83xx_spi_remove(struct device *dev) { struct mpc83xx_spi *mpc83xx_spi; struct spi_master *master; - master = platform_get_drvdata(dev); + master = dev_get_drvdata(dev); mpc83xx_spi = spi_master_get_devdata(master); flush_workqueue(mpc83xx_spi->workqueue); @@ -681,23 +669,293 @@ static int __exit mpc83xx_spi_remove(struct platform_device *dev) return 0; } +struct mpc83xx_spi_probe_info { + struct fsl_spi_platform_data pdata; + int *gpios; + bool *alow_flags; +}; + +static struct mpc83xx_spi_probe_info * +to_of_pinfo(struct fsl_spi_platform_data *pdata) +{ + return container_of(pdata, struct mpc83xx_spi_probe_info, pdata); +} + +static void mpc83xx_spi_cs_control(struct spi_device *spi, bool on) +{ + struct device *dev = spi->dev.parent; + struct mpc83xx_spi_probe_info *pinfo = to_of_pinfo(dev->platform_data); + u16 cs = spi->chip_select; + int gpio = pinfo->gpios[cs]; + bool alow = pinfo->alow_flags[cs]; + + gpio_set_value(gpio, on ^ alow); +} + +static int of_mpc83xx_spi_get_chipselects(struct device *dev) +{ + struct device_node *np = dev_archdata_get_node(&dev->archdata); + struct fsl_spi_platform_data *pdata = dev->platform_data; + struct mpc83xx_spi_probe_info *pinfo = to_of_pinfo(pdata); + unsigned int ngpios; + int i = 0; + int ret; + + ngpios = of_gpio_count(np); + if (!ngpios) { + /* + * SPI w/o chip-select line. One SPI device is still permitted + * though. + */ + pdata->max_chipselect = 1; + return 0; + } + + pinfo->gpios = kmalloc(ngpios * sizeof(pinfo->gpios), GFP_KERNEL); + if (!pinfo->gpios) + return -ENOMEM; + memset(pinfo->gpios, -1, ngpios * sizeof(pinfo->gpios)); + + pinfo->alow_flags = kzalloc(ngpios * sizeof(pinfo->alow_flags), + GFP_KERNEL); + if (!pinfo->alow_flags) { + ret = -ENOMEM; + goto err_alloc_flags; + } + + for (; i < ngpios; i++) { + int gpio; + enum of_gpio_flags flags; + + gpio = of_get_gpio_flags(np, i, &flags); + if (!gpio_is_valid(gpio)) { + dev_err(dev, "invalid gpio #%d: %d\n", i, gpio); + goto err_loop; + } + + ret = gpio_request(gpio, dev_name(dev)); + if (ret) { + dev_err(dev, "can't request gpio #%d: %d\n", i, ret); + goto err_loop; + } + + pinfo->gpios[i] = gpio; + pinfo->alow_flags[i] = flags & OF_GPIO_ACTIVE_LOW; + + ret = gpio_direction_output(pinfo->gpios[i], + pinfo->alow_flags[i]); + if (ret) { + dev_err(dev, "can't set output direction for gpio " + "#%d: %d\n", i, ret); + goto err_loop; + } + } + + pdata->max_chipselect = ngpios; + pdata->cs_control = mpc83xx_spi_cs_control; + + return 0; + +err_loop: + while (i >= 0) { + if (gpio_is_valid(pinfo->gpios[i])) + gpio_free(pinfo->gpios[i]); + i--; + } + + kfree(pinfo->alow_flags); + pinfo->alow_flags = NULL; +err_alloc_flags: + kfree(pinfo->gpios); + pinfo->gpios = NULL; + return ret; +} + +static int of_mpc83xx_spi_free_chipselects(struct device *dev) +{ + struct fsl_spi_platform_data *pdata = dev->platform_data; + struct mpc83xx_spi_probe_info *pinfo = to_of_pinfo(pdata); + int i; + + if (!pinfo->gpios) + return 0; + + for (i = 0; i < pdata->max_chipselect; i++) { + if (gpio_is_valid(pinfo->gpios[i])) + gpio_free(pinfo->gpios[i]); + } + + kfree(pinfo->gpios); + kfree(pinfo->alow_flags); + return 0; +} + +static int __devinit of_mpc83xx_spi_probe(struct of_device *ofdev, + const struct of_device_id *ofid) +{ + struct device *dev = &ofdev->dev; + struct device_node *np = ofdev->node; + struct mpc83xx_spi_probe_info *pinfo; + struct fsl_spi_platform_data *pdata; + struct spi_master *master; + struct resource mem; + struct resource irq; + const void *prop; + int ret = -ENOMEM; + + pinfo = kzalloc(sizeof(*pinfo), GFP_KERNEL); + if (!pinfo) + return -ENOMEM; + + pdata = &pinfo->pdata; + dev->platform_data = pdata; + + /* Allocate bus num dynamically. */ + pdata->bus_num = -1; + + /* SPI controller is either clocked from QE or SoC clock. */ + pdata->sysclk = get_brgfreq(); + if (pdata->sysclk == -1) { + pdata->sysclk = fsl_get_sys_freq(); + if (pdata->sysclk == -1) { + ret = -ENODEV; + goto err_clk; + } + } + + prop = of_get_property(np, "mode", NULL); + if (prop && !strcmp(prop, "cpu-qe")) + pdata->qe_mode = 1; + + ret = of_mpc83xx_spi_get_chipselects(dev); + if (ret) + goto err; + + ret = of_address_to_resource(np, 0, &mem); + if (ret) + goto err; + + ret = of_irq_to_resource(np, 0, &irq); + if (!ret) { + ret = -EINVAL; + goto err; + } + + master = mpc83xx_spi_probe(dev, &mem, irq.start); + if (IS_ERR(master)) { + ret = PTR_ERR(master); + goto err; + } + + of_register_spi_devices(master, np); + + return 0; + +err: + of_mpc83xx_spi_free_chipselects(dev); +err_clk: + kfree(pinfo); + return ret; +} + +static int __devexit of_mpc83xx_spi_remove(struct of_device *ofdev) +{ + int ret; + + ret = mpc83xx_spi_remove(&ofdev->dev); + if (ret) + return ret; + of_mpc83xx_spi_free_chipselects(&ofdev->dev); + return 0; +} + +static const struct of_device_id of_mpc83xx_spi_match[] = { + { .compatible = "fsl,spi" }, + {}, +}; +MODULE_DEVICE_TABLE(of, of_mpc83xx_spi_match); + +static struct of_platform_driver of_mpc83xx_spi_driver = { + .name = "mpc83xx_spi", + .match_table = of_mpc83xx_spi_match, + .probe = of_mpc83xx_spi_probe, + .remove = __devexit_p(of_mpc83xx_spi_remove), +}; + +#ifdef CONFIG_MPC832x_RDB +/* + * XXX XXX XXX + * This is "legacy" platform driver, was used by the MPC8323E-RDB boards + * only. The driver should go away soon, since newer MPC8323E-RDB's device + * tree can work with OpenFirmware driver. But for now we support old trees + * as well. + */ +static int __devinit plat_mpc83xx_spi_probe(struct platform_device *pdev) +{ + struct resource *mem; + unsigned int irq; + struct spi_master *master; + + if (!pdev->dev.platform_data) + return -EINVAL; + + mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!mem) + return -EINVAL; + + irq = platform_get_irq(pdev, 0); + if (!irq) + return -EINVAL; + + master = mpc83xx_spi_probe(&pdev->dev, mem, irq); + if (IS_ERR(master)) + return PTR_ERR(master); + return 0; +} + +static int __devexit plat_mpc83xx_spi_remove(struct platform_device *pdev) +{ + return mpc83xx_spi_remove(&pdev->dev); +} + MODULE_ALIAS("platform:mpc83xx_spi"); static struct platform_driver mpc83xx_spi_driver = { - .remove = __exit_p(mpc83xx_spi_remove), + .probe = plat_mpc83xx_spi_probe, + .remove = __exit_p(plat_mpc83xx_spi_remove), .driver = { .name = "mpc83xx_spi", .owner = THIS_MODULE, }, }; +static bool legacy_driver_failed; + +static void __init legacy_driver_register(void) +{ + legacy_driver_failed = platform_driver_register(&mpc83xx_spi_driver); +} + +static void __exit legacy_driver_unregister(void) +{ + if (legacy_driver_failed) + return; + platform_driver_unregister(&mpc83xx_spi_driver); +} +#else +static void __init legacy_driver_register(void) {} +static void __exit legacy_driver_unregister(void) {} +#endif /* CONFIG_MPC832x_RDB */ + static int __init mpc83xx_spi_init(void) { - return platform_driver_probe(&mpc83xx_spi_driver, mpc83xx_spi_probe); + legacy_driver_register(); + return of_register_platform_driver(&of_mpc83xx_spi_driver); } static void __exit mpc83xx_spi_exit(void) { - platform_driver_unregister(&mpc83xx_spi_driver); + of_unregister_platform_driver(&of_mpc83xx_spi_driver); + legacy_driver_unregister(); } module_init(mpc83xx_spi_init); diff --git a/include/linux/fsl_devices.h b/include/linux/fsl_devices.h index 7bc1b643d370..7ef1caf50269 100644 --- a/include/linux/fsl_devices.h +++ b/include/linux/fsl_devices.h @@ -99,7 +99,7 @@ struct spi_device; struct fsl_spi_platform_data { u32 initial_spmode; /* initial SPMODE value */ - u16 bus_num; + s16 bus_num; bool qe_mode; /* board specific information */ u16 max_chipselect; -- cgit v1.2.3 From 79955898f961a870cbcc58f6ae13f3741a909da5 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Tue, 31 Mar 2009 15:24:45 -0700 Subject: autofs4: fix kernel includes autofs_dev-ioctl.h is included by both the kernel module and user space tools and it includes two kernel header files. Compiles work if the kernel headers are installed but fail otherwise. Signed-off-by: Ian Kent Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/auto_dev-ioctl.h | 7 ++++++- include/linux/auto_fs.h | 6 ++++-- 2 files changed, 10 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/auto_dev-ioctl.h b/include/linux/auto_dev-ioctl.h index 91a773993a5c..850f39b33e74 100644 --- a/include/linux/auto_dev-ioctl.h +++ b/include/linux/auto_dev-ioctl.h @@ -10,8 +10,13 @@ #ifndef _LINUX_AUTO_DEV_IOCTL_H #define _LINUX_AUTO_DEV_IOCTL_H +#include + +#ifdef __KERNEL__ #include -#include +#else +#include +#endif /* __KERNEL__ */ #define AUTOFS_DEVICE_NAME "autofs" diff --git a/include/linux/auto_fs.h b/include/linux/auto_fs.h index c21e5972a3e8..63265852b7d1 100644 --- a/include/linux/auto_fs.h +++ b/include/linux/auto_fs.h @@ -17,11 +17,13 @@ #ifdef __KERNEL__ #include #include +#include +#include +#else #include +#include #endif /* __KERNEL__ */ -#include - /* This file describes autofs v3 */ #define AUTOFS_PROTO_VERSION 3 -- cgit v1.2.3 From 78d89ef40c2ff7265df077e20c4d76be7d415204 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 31 Mar 2009 15:24:48 -0700 Subject: rtc: convert LEAP_YEAR into an inline - the LEAP_YEAR macro is buggy - it references its arg multiple times. Fix this by turning it into a C function. - give it a more approriate name - Move it to rtc.h so that other .c files can use it, instead of copying it. Cc: dann frazier Acked-by: Alessandro Zummo Cc: stephane eranian Cc: "Luck, Tony" Cc: David Brownell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rtc/rtc-lib.c | 7 +++---- include/linux/rtc.h | 6 ++++++ 2 files changed, 9 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/rtc/rtc-lib.c b/drivers/rtc/rtc-lib.c index dd70bf73ce9d..773851f338b8 100644 --- a/drivers/rtc/rtc-lib.c +++ b/drivers/rtc/rtc-lib.c @@ -26,14 +26,13 @@ static const unsigned short rtc_ydays[2][13] = { }; #define LEAPS_THRU_END_OF(y) ((y)/4 - (y)/100 + (y)/400) -#define LEAP_YEAR(year) ((!(year % 4) && (year % 100)) || !(year % 400)) /* * The number of days in the month. */ int rtc_month_days(unsigned int month, unsigned int year) { - return rtc_days_in_month[month] + (LEAP_YEAR(year) && month == 1); + return rtc_days_in_month[month] + (is_leap_year(year) && month == 1); } EXPORT_SYMBOL(rtc_month_days); @@ -42,7 +41,7 @@ EXPORT_SYMBOL(rtc_month_days); */ int rtc_year_days(unsigned int day, unsigned int month, unsigned int year) { - return rtc_ydays[LEAP_YEAR(year)][month] + day-1; + return rtc_ydays[is_leap_year(year)][month] + day-1; } EXPORT_SYMBOL(rtc_year_days); @@ -66,7 +65,7 @@ void rtc_time_to_tm(unsigned long time, struct rtc_time *tm) - LEAPS_THRU_END_OF(1970 - 1); if (days < 0) { year -= 1; - days += 365 + LEAP_YEAR(year); + days += 365 + is_leap_year(year); } tm->tm_year = year - 1900; tm->tm_yday = days + 1; diff --git a/include/linux/rtc.h b/include/linux/rtc.h index 4046b75563c1..60f88a7fb13d 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -99,6 +99,7 @@ struct rtc_pll_info { #ifdef __KERNEL__ +#include #include extern int rtc_month_days(unsigned int month, unsigned int year); @@ -232,6 +233,11 @@ int rtc_register(rtc_task_t *task); int rtc_unregister(rtc_task_t *task); int rtc_control(rtc_task_t *t, unsigned int cmd, unsigned long arg); +static inline bool is_leap_year(unsigned int year) +{ + return (!(year % 4) && (year % 100)) || !(year % 400); +} + #endif /* __KERNEL__ */ #endif /* _LINUX_RTC_H_ */ -- cgit v1.2.3 From 614c0dc93284404be2a4d5750c79bb95f2b6c980 Mon Sep 17 00:00:00 2001 From: Krzysztof Helt Date: Tue, 31 Mar 2009 15:25:15 -0700 Subject: cirrusfb: add accelerator constant Add an accelerator constant so almost all Cirrus are recognized as accelerators by the fbset command. Signed-off-by: Krzysztof Helt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/video/cirrusfb.c | 11 +++++++++-- include/linux/fb.h | 1 + 2 files changed, 10 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/video/cirrusfb.c b/drivers/video/cirrusfb.c index 15e2e6bfcbff..e9a2661669eb 100644 --- a/drivers/video/cirrusfb.c +++ b/drivers/video/cirrusfb.c @@ -519,6 +519,7 @@ static int cirrusfb_check_var(struct fb_var_screeninfo *var, int yres; /* memory size in pixels */ unsigned pixels = info->screen_size * 8 / var->bits_per_pixel; + struct cirrusfb_info *cinfo = info->par; switch (var->bits_per_pixel) { case 1: @@ -627,6 +628,9 @@ static int cirrusfb_check_var(struct fb_var_screeninfo *var, if (cirrusfb_check_pixclock(var, info)) return -EINVAL; + if (!is_laguna(cinfo)) + var->accel_flags = FB_ACCELF_TEXT; + return 0; } @@ -2029,8 +2033,12 @@ static int __devinit cirrusfb_set_fbinfo(struct fb_info *info) | FBINFO_HWACCEL_FILLRECT | FBINFO_HWACCEL_IMAGEBLIT | FBINFO_HWACCEL_COPYAREA; - if (noaccel || is_laguna(cinfo)) + if (noaccel || is_laguna(cinfo)) { info->flags |= FBINFO_HWACCEL_DISABLED; + info->fix.accel = FB_ACCEL_NONE; + } else + info->fix.accel = FB_ACCEL_CIRRUS_ALPINE; + info->fbops = &cirrusfb_ops; if (cinfo->btype == BT_GD5480) { @@ -2056,7 +2064,6 @@ static int __devinit cirrusfb_set_fbinfo(struct fb_info *info) /* FIXME: map region at 0xB8000 if available, fill in here */ info->fix.mmio_len = 0; - info->fix.accel = FB_ACCEL_NONE; fb_alloc_cmap(&info->cmap, 256, 0); diff --git a/include/linux/fb.h b/include/linux/fb.h index 31527e17076b..fe7d0d7907ab 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -123,6 +123,7 @@ struct dentry; #define FB_ACCEL_TRIDENT_3DIMAGE 51 /* Trident 3DImage */ #define FB_ACCEL_TRIDENT_BLADE3D 52 /* Trident Blade3D */ #define FB_ACCEL_TRIDENT_BLADEXP 53 /* Trident BladeXP */ +#define FB_ACCEL_CIRRUS_ALPINE 53 /* Cirrus Logic 543x/544x/5480 */ #define FB_ACCEL_NEOMAGIC_NM2070 90 /* NeoMagic NM2070 */ #define FB_ACCEL_NEOMAGIC_NM2090 91 /* NeoMagic NM2090 */ #define FB_ACCEL_NEOMAGIC_NM2093 92 /* NeoMagic NM2093 */ -- cgit v1.2.3 From 6a7f2829b5f8be124e168265f176dbbbea8861a0 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 31 Mar 2009 15:25:19 -0700 Subject: fbdev: uninline lock_fb_info() Before: text data bss dec hex filename 3648 2910 32 6590 19be drivers/video/backlight/backlight.o 3226 2812 32 6070 17b6 drivers/video/backlight/lcd.o 30990 16688 8480 56158 db5e drivers/video/console/fbcon.o 15488 8400 24 23912 5d68 drivers/video/fbmem.o After: text data bss dec hex filename 3537 2870 32 6439 1927 drivers/video/backlight/backlight.o 3131 2772 32 5935 172f drivers/video/backlight/lcd.o 30876 16648 8480 56004 dac4 drivers/video/console/fbcon.o 15506 8400 24 23930 5d7a drivers/video/fbmem.o Cc: Andrea Righi Cc: Geert Uytterhoeven Cc: Krzysztof Helt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/video/fbmem.c | 11 +++++++++++ include/linux/fb.h | 10 +--------- 2 files changed, 12 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c index b64f061dd447..2ac32e6b5953 100644 --- a/drivers/video/fbmem.c +++ b/drivers/video/fbmem.c @@ -46,6 +46,17 @@ struct fb_info *registered_fb[FB_MAX] __read_mostly; int num_registered_fb __read_mostly; +int lock_fb_info(struct fb_info *info) +{ + mutex_lock(&info->lock); + if (!info->fbops) { + mutex_unlock(&info->lock); + return 0; + } + return 1; +} +EXPORT_SYMBOL(lock_fb_info); + /* * Helpers */ diff --git a/include/linux/fb.h b/include/linux/fb.h index fe7d0d7907ab..f563c5013932 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -961,15 +961,7 @@ extern struct fb_info *registered_fb[FB_MAX]; extern int num_registered_fb; extern struct class *fb_class; -static inline int lock_fb_info(struct fb_info *info) -{ - mutex_lock(&info->lock); - if (!info->fbops) { - mutex_unlock(&info->lock); - return 0; - } - return 1; -} +extern int lock_fb_info(struct fb_info *info); static inline void unlock_fb_info(struct fb_info *info) { -- cgit v1.2.3