From d1c338a509cea5378df59629ad47382810c38623 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Sun, 19 Aug 2012 12:29:16 -0700 Subject: libceph: delay debugfs initialization until we learn global_id The debugfs directory includes the cluster fsid and our unique global_id. We need to delay the initialization of the debug entry until we have learned both the fsid and our global_id from the monitor or else the second client can't create its debugfs entry and will fail (and multiple client instances aren't properly reflected in debugfs). Reported by: Yan, Zheng Signed-off-by: Sage Weil Reviewed-by: Yehuda Sadeh --- fs/ceph/debugfs.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/ceph') diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index fb962efdacee..6d59006bfa27 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c @@ -201,6 +201,7 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc) int err = -ENOMEM; dout("ceph_fs_debugfs_init\n"); + BUG_ON(!fsc->client->debugfs_dir); fsc->debugfs_congestion_kb = debugfs_create_file("writeback_congestion_kb", 0600, -- cgit v1.2.3 From 6c5e50fa614fea5325a2973be06f7ec6f1055316 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 21 Aug 2012 15:55:25 -0700 Subject: ceph: tolerate (and warn on) extraneous dentry from mds If the MDS gives us a dentry and we weren't prepared to handle it, WARN_ON_ONCE instead of crashing. Reported-by: Yan, Zheng Signed-off-by: Sage Weil Reviewed-by: Alex Elder --- fs/ceph/inode.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 9fff9f3b17e4..4b5762ef7c2b 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -992,11 +992,15 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, if (rinfo->head->is_dentry) { struct inode *dir = req->r_locked_dir; - err = fill_inode(dir, &rinfo->diri, rinfo->dirfrag, - session, req->r_request_started, -1, - &req->r_caps_reservation); - if (err < 0) - return err; + if (dir) { + err = fill_inode(dir, &rinfo->diri, rinfo->dirfrag, + session, req->r_request_started, -1, + &req->r_caps_reservation); + if (err < 0) + return err; + } else { + WARN_ON_ONCE(1); + } } /* @@ -1004,6 +1008,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, * will have trouble splicing in the virtual snapdir later */ if (rinfo->head->is_dentry && !req->r_aborted && + req->r_locked_dir && (rinfo->head->is_target || strncmp(req->r_dentry->d_name.name, fsc->mount_options->snapdir_name, req->r_dentry->d_name.len))) { -- cgit v1.2.3 From 45f2e081f573526977abfa781a12728f83e9641f Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 21 Aug 2012 12:11:51 -0700 Subject: ceph: avoid divide by zero in __validate_layout() If "l->stripe_unit" is zero the the mod on the next line will cause a divide by zero bug. This comes from the copy_from_user() in ceph_ioctl_set_layout_policy(). Passing 0 is valid, though (it means "do not change") so avoid the % check in that case. Reported-by: Dan Carpenter Signed-off-by: Sage Weil Reviewed-by: Alex Elder --- fs/ceph/ioctl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/ceph') diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c index 8e3fb69fbe62..1396ceb46797 100644 --- a/fs/ceph/ioctl.c +++ b/fs/ceph/ioctl.c @@ -42,7 +42,8 @@ static long __validate_layout(struct ceph_mds_client *mdsc, /* validate striping parameters */ if ((l->object_size & ~PAGE_MASK) || (l->stripe_unit & ~PAGE_MASK) || - ((unsigned)l->object_size % (unsigned)l->stripe_unit)) + (l->stripe_unit != 0 && + ((unsigned)l->object_size % (unsigned)l->stripe_unit))) return -EINVAL; /* make sure it's a valid data pool */ -- cgit v1.2.3 From 2744c171dbaa0b1ec7639e7d0ff817fba9461a38 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 26 Sep 2012 21:41:05 -0400 Subject: ceph: don't abuse d_delete() on failure exits Signed-off-by: Al Viro --- fs/ceph/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 4b5762ef7c2b..ba95eea201bf 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -1104,7 +1104,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, pr_err("fill_trace bad get_inode " "%llx.%llx\n", vino.ino, vino.snap); err = PTR_ERR(in); - d_delete(dn); + d_drop(dn); goto done; } dn = splice_dentry(dn, in, &have_lease, true); @@ -1277,7 +1277,7 @@ retry_lookup: in = ceph_get_inode(parent->d_sb, vino); if (IS_ERR(in)) { dout("new_inode badness\n"); - d_delete(dn); + d_drop(dn); dput(dn); err = PTR_ERR(in); goto out; -- cgit v1.2.3 From c98f533c9497e285109a047bfb955d683f33f7e4 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 9 Aug 2012 10:33:26 -0700 Subject: ceph: let path portion of mount "device" be optional A recent change to /sbin/mountall causes any trailing '/' character in the "device" (or fs_spec) field in /etc/fstab to be stripped. As a result, an entry for a ceph mount that intends to mount the root of the name space ends up with now path portion, and the ceph mount option processing code rejects this. That is, an entry in /etc/fstab like: cephserver:port:/ /mnt ceph defaults 0 0 provides to the ceph code just "cephserver:port:" as the "device," and that gets rejected. Although this is a bug in /sbin/mountall, we can have the ceph mount code support an empty/nonexistent path, interpreting it to mean the root of the name space. RFC 5952 offers recommendations for how to express IPv6 addresses, and recommends the usage found in RFC 3986 (which specifies the format for URI's) for representing both IPv4 and IPv6 addresses that include port numbers. (See in particular the definition of "authority" found in the Appendix of RFC 3986.) According to those standards, no host specification will ever contain a '/' character. As a result, it is sufficient to scan a provided "device" from an /etc/fstab entry for the first '/' character, and if it's found, treat that as the beginning of the path. If no '/' character is present, we can treat the entire string as the monitor host specification(s), and assume the path to be the root of the name space. We'll still require a ':' to separate the host portion from the (possibly empty) path portion. This means that we can more formally define how ceph will interpret the "device" it's provided when processing a mount request: "device" will look like: [,...]:[] where is [:] is optional, but if present must begin with '/' This addresses http://tracker.newdream.net/issues/2919 Signed-off-by: Alex Elder Reviewed-by: Dan Mick --- fs/ceph/super.c | 37 ++++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/super.c b/fs/ceph/super.c index b982239f38f9..2f586b0e5e0f 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -307,7 +307,10 @@ static int parse_mount_options(struct ceph_mount_options **pfsopt, { struct ceph_mount_options *fsopt; const char *dev_name_end; - int err = -ENOMEM; + int err; + + if (!dev_name || !*dev_name) + return -EINVAL; fsopt = kzalloc(sizeof(*fsopt), GFP_KERNEL); if (!fsopt) @@ -328,21 +331,33 @@ static int parse_mount_options(struct ceph_mount_options **pfsopt, fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT; fsopt->congestion_kb = default_congestion_kb(); - /* ip1[:port1][,ip2[:port2]...]:/subdir/in/fs */ + /* + * Distinguish the server list from the path in "dev_name". + * Internally we do not include the leading '/' in the path. + * + * "dev_name" will look like: + * [,...]:[] + * where + * is [:] + * is optional, but if present must begin with '/' + */ + dev_name_end = strchr(dev_name, '/'); + if (dev_name_end) { + /* skip over leading '/' for path */ + *path = dev_name_end + 1; + } else { + /* path is empty */ + dev_name_end = dev_name + strlen(dev_name); + *path = dev_name_end; + } err = -EINVAL; - if (!dev_name) - goto out; - *path = strstr(dev_name, ":/"); - if (*path == NULL) { - pr_err("device name is missing path (no :/ in %s)\n", + dev_name_end--; /* back up to ':' separator */ + if (*dev_name_end != ':') { + pr_err("device name is missing path (no : separator in %s)\n", dev_name); goto out; } - dev_name_end = *path; dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name); - - /* path on server */ - *path += 2; dout("server path '%s'\n", *path); *popt = ceph_parse_options(options, dev_name, dev_name_end, -- cgit v1.2.3 From 3e8f43a089f06279c5f76a9ccd42578eebf7bfa5 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Thu, 20 Sep 2012 17:42:25 +0800 Subject: ceph: Fix oops when handling mdsmap that decreases max_mds When i >= newmap->m_max_mds, ceph_mdsmap_get_addr(newmap, i) return NULL. Passing NULL to memcmp() triggers oops. Signed-off-by: Yan, Zheng Signed-off-by: Sage Weil --- fs/ceph/mds_client.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/ceph') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index a5a735422aa7..1bcf712655d9 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2625,7 +2625,8 @@ static void check_new_map(struct ceph_mds_client *mdsc, ceph_mdsmap_is_laggy(newmap, i) ? " (laggy)" : "", session_state_name(s->s_state)); - if (memcmp(ceph_mdsmap_get_addr(oldmap, i), + if (i >= newmap->m_max_mds || + memcmp(ceph_mdsmap_get_addr(oldmap, i), ceph_mdsmap_get_addr(newmap, i), sizeof(struct ceph_entity_addr))) { if (s->s_state == CEPH_MDS_SESSION_OPENING) { -- cgit v1.2.3 From b905a7f8b7a61c192927d0324f2ea6c998f451ba Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 28 Sep 2012 12:59:16 +0800 Subject: ceph: convert to use le32_add_cpu() Convert cpu_to_le32(le32_to_cpu(E1) + E2) to use le32_add_cpu(). dpatch engine is used to auto generate this patch. (https://github.com/weiyj/dpatch) Signed-off-by: Wei Yongjun Signed-off-by: Sage Weil --- fs/ceph/caps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ceph') diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 620daad201db..3251e9cc6401 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1005,7 +1005,7 @@ static void __queue_cap_release(struct ceph_mds_session *session, BUG_ON(msg->front.iov_len + sizeof(*item) > PAGE_CACHE_SIZE); head = msg->front.iov_base; - head->num = cpu_to_le32(le32_to_cpu(head->num) + 1); + le32_add_cpu(&head->num, 1); item = msg->front.iov_base + msg->front.iov_len; item->ino = cpu_to_le64(ino); item->cap_id = cpu_to_le64(cap_id); -- cgit v1.2.3 From 6816282dab3a72efe8c0d182c1bc2960d87f4322 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 24 Sep 2012 21:01:02 -0700 Subject: ceph: propagate layout error on osd request creation If we are creating an osd request and get an invalid layout, return an EINVAL to the caller. We switch up the return to have an error code instead of NULL implying -ENOMEM. Signed-off-by: Sage Weil Reviewed-by: Alex Elder --- fs/ceph/addr.c | 8 ++++---- fs/ceph/file.c | 4 ++-- net/ceph/osd_client.c | 15 +++++++++------ 3 files changed, 15 insertions(+), 12 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 452e71a1b753..4469b63c9b7b 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -308,8 +308,8 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max) NULL, 0, ci->i_truncate_seq, ci->i_truncate_size, NULL, false, 1, 0); - if (!req) - return -ENOMEM; + if (IS_ERR(req)) + return PTR_ERR(req); /* build page vector */ nr_pages = len >> PAGE_CACHE_SHIFT; @@ -832,8 +832,8 @@ get_more_pages: ci->i_truncate_size, &inode->i_mtime, true, 1, 0); - if (!req) { - rc = -ENOMEM; + if (IS_ERR(req)) { + rc = PTR_ERR(req); unlock_page(page); break; } diff --git a/fs/ceph/file.c b/fs/ceph/file.c index ecebbc09bfc7..5840d2aaed15 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -536,8 +536,8 @@ more: do_sync, ci->i_truncate_seq, ci->i_truncate_size, &mtime, false, 2, page_align); - if (!req) - return -ENOMEM; + if (IS_ERR(req)) + return PTR_ERR(req); if (file->f_flags & O_DIRECT) { pages = ceph_get_direct_page_vector(data, num_pages, false); diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index f7b56e23988c..ccbdfbba9e53 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -464,6 +464,7 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, { struct ceph_osd_req_op ops[3]; struct ceph_osd_request *req; + int r; ops[0].op = opcode; ops[0].extent.truncate_seq = truncate_seq; @@ -482,10 +483,12 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, use_mempool, GFP_NOFS, NULL, NULL); if (!req) - return NULL; + return ERR_PTR(-ENOMEM); /* calculate max write size */ - calc_layout(osdc, vino, layout, off, plen, req, ops); + r = calc_layout(osdc, vino, layout, off, plen, req, ops); + if (r < 0) + return ERR_PTR(r); req->r_file_layout = *layout; /* keep a copy */ /* in case it differs from natural (file) alignment that @@ -1928,8 +1931,8 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc, CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, NULL, 0, truncate_seq, truncate_size, NULL, false, 1, page_align); - if (!req) - return -ENOMEM; + if (IS_ERR(req)) + return PTR_ERR(req); /* it may be a short read due to an object boundary */ req->r_pages = pages; @@ -1971,8 +1974,8 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, snapc, do_sync, truncate_seq, truncate_size, mtime, nofail, 1, page_align); - if (!req) - return -ENOMEM; + if (IS_ERR(req)) + return PTR_ERR(req); /* it may be a short write due to an object boundary */ req->r_pages = pages; -- cgit v1.2.3 From 8c0a85377048b64c880e76ec7368904fe46d0b94 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 26 Sep 2012 11:33:07 +1000 Subject: fs: push rcu_barrier() from deactivate_locked_super() to filesystems There's no reason to call rcu_barrier() on every deactivate_locked_super(). We only need to make sure that all delayed rcu free inodes are flushed before we destroy related cache. Removing rcu_barrier() from deactivate_locked_super() affects some fast paths. E.g. on my machine exit_group() of a last process in IPC namespace takes 0.07538s. rcu_barrier() takes 0.05188s of that time. Signed-off-by: Kirill A. Shutemov Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- fs/9p/v9fs.c | 5 +++++ fs/adfs/super.c | 5 +++++ fs/affs/super.c | 5 +++++ fs/afs/super.c | 5 +++++ fs/befs/linuxvfs.c | 5 +++++ fs/bfs/inode.c | 5 +++++ fs/btrfs/extent_io.c | 6 ++++++ fs/btrfs/inode.c | 5 +++++ fs/ceph/super.c | 5 +++++ fs/cifs/cifsfs.c | 5 +++++ fs/coda/inode.c | 5 +++++ fs/ecryptfs/main.c | 6 ++++++ fs/efs/super.c | 5 +++++ fs/exofs/super.c | 5 +++++ fs/ext2/super.c | 5 +++++ fs/ext3/super.c | 5 +++++ fs/ext4/super.c | 5 +++++ fs/fat/inode.c | 5 +++++ fs/freevxfs/vxfs_super.c | 5 +++++ fs/fuse/inode.c | 6 ++++++ fs/hfs/super.c | 6 ++++++ fs/hfsplus/super.c | 6 ++++++ fs/hpfs/super.c | 5 +++++ fs/hugetlbfs/inode.c | 5 +++++ fs/isofs/inode.c | 5 +++++ fs/jffs2/super.c | 6 ++++++ fs/jfs/super.c | 6 ++++++ fs/logfs/inode.c | 5 +++++ fs/minix/inode.c | 5 +++++ fs/ncpfs/inode.c | 5 +++++ fs/nfs/inode.c | 5 +++++ fs/nilfs2/super.c | 6 ++++++ fs/ntfs/super.c | 6 ++++++ fs/ocfs2/dlmfs/dlmfs.c | 5 +++++ fs/ocfs2/super.c | 5 +++++ fs/openpromfs/inode.c | 5 +++++ fs/qnx4/inode.c | 5 +++++ fs/qnx6/inode.c | 5 +++++ fs/reiserfs/super.c | 5 +++++ fs/romfs/super.c | 5 +++++ fs/squashfs/super.c | 5 +++++ fs/super.c | 6 ------ fs/sysv/inode.c | 5 +++++ fs/ubifs/super.c | 6 ++++++ fs/udf/super.c | 5 +++++ fs/ufs/super.c | 5 +++++ fs/xfs/xfs_super.c | 5 +++++ 47 files changed, 240 insertions(+), 6 deletions(-) (limited to 'fs/ceph') diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index b85efa773949..392c5dac1981 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c @@ -560,6 +560,11 @@ static int v9fs_init_inode_cache(void) */ static void v9fs_destroy_inode_cache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(v9fs_inode_cache); } diff --git a/fs/adfs/super.c b/fs/adfs/super.c index bdaec92353c2..c830c857c663 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -275,6 +275,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(adfs_inode_cachep); } diff --git a/fs/affs/super.c b/fs/affs/super.c index c70f1e5fc024..2f57053bf26c 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -147,6 +147,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(affs_inode_cachep); } diff --git a/fs/afs/super.c b/fs/afs/super.c index df8c6047c2a1..43165009428d 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -123,6 +123,11 @@ void __exit afs_fs_exit(void) BUG(); } + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(afs_inode_cachep); _leave(""); } diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index cf7f3c67c8b7..962b4f8f7994 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -454,6 +454,11 @@ befs_init_inodecache(void) static void befs_destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(befs_inode_cachep); } diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index 9870417c26e7..d5fc598d6e4a 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -280,6 +280,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(bfs_inode_cachep); } diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 4c878476bb91..b08ea4717e9d 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -107,6 +107,12 @@ void extent_io_exit(void) list_del(&eb->leak_list); kmem_cache_free(extent_buffer_cache, eb); } + + /* + * Make sure all delayed rcu free are flushed before we + * destroy caches. + */ + rcu_barrier(); if (extent_state_cache) kmem_cache_destroy(extent_state_cache); if (extent_buffer_cache) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ec154f954646..cf03a91d806f 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7076,6 +7076,11 @@ static void init_once(void *foo) void btrfs_destroy_cachep(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); if (btrfs_inode_cachep) kmem_cache_destroy(btrfs_inode_cachep); if (btrfs_trans_handle_cachep) diff --git a/fs/ceph/super.c b/fs/ceph/super.c index b982239f38f9..3a42d9326378 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -603,6 +603,11 @@ bad_cap: static void destroy_caches(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(ceph_inode_cachep); kmem_cache_destroy(ceph_cap_cachep); kmem_cache_destroy(ceph_dentry_cachep); diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index db8a404a51dd..d4ce77a02327 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -977,6 +977,11 @@ cifs_init_inodecache(void) static void cifs_destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(cifs_inode_cachep); } diff --git a/fs/coda/inode.c b/fs/coda/inode.c index d315c6c5891a..be2aa4909487 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c @@ -85,6 +85,11 @@ int coda_init_inodecache(void) void coda_destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(coda_inode_cachep); } diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 9b627c15010a..34fcde765d24 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -710,6 +710,12 @@ static void ecryptfs_free_kmem_caches(void) { int i; + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); + for (i = 0; i < ARRAY_SIZE(ecryptfs_cache_infos); i++) { struct ecryptfs_cache_info *info; diff --git a/fs/efs/super.c b/fs/efs/super.c index e755ec746c69..2002431ef9a0 100644 --- a/fs/efs/super.c +++ b/fs/efs/super.c @@ -96,6 +96,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(efs_inode_cachep); } diff --git a/fs/exofs/super.c b/fs/exofs/super.c index dde41a75c7c8..59e3bbfac0b1 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c @@ -206,6 +206,11 @@ static int init_inodecache(void) */ static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(exofs_inode_cachep); } diff --git a/fs/ext2/super.c b/fs/ext2/super.c index af74d9e27b71..6c205d0c565b 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -206,6 +206,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(ext2_inode_cachep); } diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 8c892e93d8e7..8d41c8889eee 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -532,6 +532,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(ext3_inode_cachep); } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index c6e0cb3d1f4a..455b7d8c6d62 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1019,6 +1019,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(ext4_inode_cachep); } diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 05e897fe9866..fd8e47cd898b 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -521,6 +521,11 @@ static int __init fat_init_inodecache(void) static void __exit fat_destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(fat_inode_cachep); } diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c index d4fabd26084e..fed2c8afb3a9 100644 --- a/fs/freevxfs/vxfs_super.c +++ b/fs/freevxfs/vxfs_super.c @@ -279,6 +279,11 @@ static void __exit vxfs_cleanup(void) { unregister_filesystem(&vxfs_fs_type); + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(vxfs_inode_cachep); } diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index fca222dabe3c..f0eda124cffb 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1197,6 +1197,12 @@ static void fuse_fs_cleanup(void) { unregister_filesystem(&fuse_fs_type); unregister_fuseblk(); + + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(fuse_inode_cachep); } diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 4eb873e0c07b..941d7a8c2197 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -482,6 +482,12 @@ static int __init init_hfs_fs(void) static void __exit exit_hfs_fs(void) { unregister_filesystem(&hfs_fs_type); + + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(hfs_inode_cachep); } diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index fdafb2d71654..811a84d2d964 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -635,6 +635,12 @@ static int __init init_hfsplus_fs(void) static void __exit exit_hfsplus_fs(void) { unregister_filesystem(&hfsplus_fs_type); + + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(hfsplus_inode_cachep); } diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index 706a12c083ea..3cb1da56eb73 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -210,6 +210,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(hpfs_inode_cachep); } diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 8349a899912e..c4b85d064e6b 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -1042,6 +1042,11 @@ static int __init init_hugetlbfs_fs(void) static void __exit exit_hugetlbfs_fs(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(hugetlbfs_inode_cachep); kern_unmount(hugetlbfs_vfsmount); unregister_filesystem(&hugetlbfs_fs_type); diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 29037c365ba4..f94cde4527e8 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -114,6 +114,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(isofs_inode_cachep); } diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index 61ea41389f90..ff487954cd96 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -418,6 +418,12 @@ static void __exit exit_jffs2_fs(void) unregister_filesystem(&jffs2_fs_type); jffs2_destroy_slab_caches(); jffs2_compressors_exit(); + + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(jffs2_inode_cachep); } diff --git a/fs/jfs/super.c b/fs/jfs/super.c index c55c7452d285..3735347fd5f6 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -903,6 +903,12 @@ static void __exit exit_jfs_fs(void) jfs_proc_clean(); #endif unregister_filesystem(&jfs_fs_type); + + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(jfs_inode_cachep); } diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c index 6984562738d3..121bba2cf6f2 100644 --- a/fs/logfs/inode.c +++ b/fs/logfs/inode.c @@ -417,5 +417,10 @@ int logfs_init_inode_cache(void) void logfs_destroy_inode_cache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(logfs_inode_cache); } diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 2a503ad020d5..dc8d3629c20a 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -100,6 +100,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(minix_inode_cachep); } diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index 333df07ae3bd..0c62c55b25d7 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -89,6 +89,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(ncp_inode_cachep); } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 9b47610338f5..e4c716d374a8 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1571,6 +1571,11 @@ static int __init nfs_init_inodecache(void) static void nfs_destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(nfs_inode_cachep); } diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 6a10812711c1..3c991dc84f2f 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -1382,6 +1382,12 @@ static void nilfs_segbuf_init_once(void *obj) static void nilfs_destroy_cachep(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); + if (nilfs_inode_cachep) kmem_cache_destroy(nilfs_inode_cachep); if (nilfs_transaction_cachep) diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 2bc149d6a784..fe08d4afa106 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -3168,6 +3168,12 @@ static void __exit exit_ntfs_fs(void) ntfs_debug("Unregistering NTFS driver."); unregister_filesystem(&ntfs_fs_type); + + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(ntfs_big_inode_cache); kmem_cache_destroy(ntfs_inode_cache); kmem_cache_destroy(ntfs_name_cache); diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c index 83b6f98e0665..16b712d260d4 100644 --- a/fs/ocfs2/dlmfs/dlmfs.c +++ b/fs/ocfs2/dlmfs/dlmfs.c @@ -691,6 +691,11 @@ static void __exit exit_dlmfs_fs(void) flush_workqueue(user_dlm_worker); destroy_workqueue(user_dlm_worker); + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(dlmfs_inode_cache); bdi_destroy(&dlmfs_backing_dev_info); diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 68f4541c2db9..0e91ec22a940 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1818,6 +1818,11 @@ static int ocfs2_initialize_mem_caches(void) static void ocfs2_free_mem_caches(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); if (ocfs2_inode_cachep) kmem_cache_destroy(ocfs2_inode_cachep); ocfs2_inode_cachep = NULL; diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c index 4a3477949bca..2ad080faca34 100644 --- a/fs/openpromfs/inode.c +++ b/fs/openpromfs/inode.c @@ -463,6 +463,11 @@ static int __init init_openprom_fs(void) static void __exit exit_openprom_fs(void) { unregister_filesystem(&openprom_fs_type); + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(op_inode_cachep); } diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index 552e994e3aa1..9534b4f76579 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c @@ -391,6 +391,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(qnx4_inode_cachep); } diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c index 2049c814bda4..1b37fff7b5ff 100644 --- a/fs/qnx6/inode.c +++ b/fs/qnx6/inode.c @@ -651,6 +651,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(qnx6_inode_cachep); } diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 7a37dabf5a96..1078ae179993 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -608,6 +608,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(reiserfs_inode_cachep); } diff --git a/fs/romfs/super.c b/fs/romfs/super.c index 77c5f2173983..fd7c5f60b46b 100644 --- a/fs/romfs/super.c +++ b/fs/romfs/super.c @@ -648,6 +648,11 @@ error_register: static void __exit exit_romfs_fs(void) { unregister_filesystem(&romfs_fs_type); + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(romfs_inode_cachep); } diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 29cd014ed3a1..260e3928d4f5 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -425,6 +425,11 @@ static int __init init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(squashfs_inode_cachep); } diff --git a/fs/super.c b/fs/super.c index 0902cfa6a12e..5fdf7ff32c4e 100644 --- a/fs/super.c +++ b/fs/super.c @@ -307,12 +307,6 @@ void deactivate_locked_super(struct super_block *s) /* caches are now gone, we can safely kill the shrinker now */ unregister_shrinker(&s->s_shrink); - - /* - * We need to call rcu_barrier so all the delayed rcu free - * inodes are flushed before we release the fs module. - */ - rcu_barrier(); put_filesystem(fs); put_super(s); } else { diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index 80e1e2b18df1..0d0c50bd3321 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -360,5 +360,10 @@ int __init sysv_init_icache(void) void sysv_destroy_icache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(sysv_inode_cachep); } diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 71a197f0f93d..36e09ca9130b 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -2298,6 +2298,12 @@ static void __exit ubifs_exit(void) dbg_debugfs_exit(); ubifs_compressors_exit(); unregister_shrinker(&ubifs_shrinker_info); + + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(ubifs_inode_slab); unregister_filesystem(&ubifs_fs_type); } diff --git a/fs/udf/super.c b/fs/udf/super.c index 18fc038a438d..b8d27642ab06 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -171,6 +171,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(udf_inode_cachep); } diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 444927e5706b..f7cfecfe1cab 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -1466,6 +1466,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(ufs_inode_cachep); } diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 19e2380fb867..83d36e473d2f 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1506,6 +1506,11 @@ xfs_init_zones(void) STATIC void xfs_destroy_zones(void) { + /* + * Make sure all delayed rcu free are flushed before we + * destroy caches. + */ + rcu_barrier(); kmem_zone_destroy(xfs_ili_zone); kmem_zone_destroy(xfs_inode_zone); kmem_zone_destroy(xfs_efi_zone); -- cgit v1.2.3 From 457712a0bc5389b75d2c93840a684fd77df2aabb Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 24 Sep 2012 21:04:57 -0700 Subject: ceph: return EIO on invalid layout on GET_DATALOC ioctl If the user calls GET_DATALOC on a file with an invalid (e.g., zeroed) layout, return EIO to userland. Signed-off-by: Sage Weil Reviewed-by: Alex Elder --- fs/ceph/ioctl.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c index 1396ceb46797..36549a46e311 100644 --- a/fs/ceph/ioctl.c +++ b/fs/ceph/ioctl.c @@ -187,14 +187,18 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) u64 tmp; struct ceph_object_layout ol; struct ceph_pg pgid; + int r; /* copy and validate */ if (copy_from_user(&dl, arg, sizeof(dl))) return -EFAULT; down_read(&osdc->map_sem); - ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, &len, - &dl.object_no, &dl.object_offset, &olen); + r = ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, &len, + &dl.object_no, &dl.object_offset, + &olen); + if (r < 0) + return -EIO; dl.file_offset -= dl.object_offset; dl.object_size = ceph_file_layout_object_size(ci->i_layout); dl.block_size = ceph_file_layout_su(ci->i_layout); -- cgit v1.2.3 From 6285bc231277419255f3498d3eb5ddc9f8e7fe79 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Tue, 2 Oct 2012 10:25:51 -0500 Subject: ceph: avoid 32-bit page index overflow A pgoff_t is defined (by default) to have type (unsigned long). On architectures such as i686 that's a 32-bit type. The ceph address space code was attempting to produce 64 bit offsets by shifting a page's index by PAGE_CACHE_SHIFT, but the result was not what was desired because the shift occurred before the result got promoted to 64 bits. Fix this by converting all uses of page->index used in this way to use the page_offset() macro, which ensures the 64-bit result has the intended value. This fixes http://tracker.newdream.net/issues/3112 Reported-by: Mohamed Pakkeer Signed-off-by: Alex Elder Reviewed-by: Sage Weil --- fs/ceph/addr.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 4469b63c9b7b..22b6e4583faa 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -205,7 +205,7 @@ static int readpage_nounlock(struct file *filp, struct page *page) dout("readpage inode %p file %p page %p index %lu\n", inode, filp, page, page->index); err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout, - page->index << PAGE_CACHE_SHIFT, &len, + (u64) page_offset(page), &len, ci->i_truncate_seq, ci->i_truncate_size, &page, 1, 0); if (err == -ENOENT) @@ -286,7 +286,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max) int nr_pages = 0; int ret; - off = page->index << PAGE_CACHE_SHIFT; + off = (u64) page_offset(page); /* count pages */ next_index = page->index; @@ -426,7 +426,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) struct ceph_inode_info *ci; struct ceph_fs_client *fsc; struct ceph_osd_client *osdc; - loff_t page_off = page->index << PAGE_CACHE_SHIFT; + loff_t page_off = page_offset(page); int len = PAGE_CACHE_SIZE; loff_t i_size; int err = 0; @@ -817,8 +817,7 @@ get_more_pages: /* ok */ if (locked_pages == 0) { /* prepare async write request */ - offset = (unsigned long long)page->index - << PAGE_CACHE_SHIFT; + offset = (u64) page_offset(page); len = wsize; req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, @@ -1180,7 +1179,7 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) struct inode *inode = vma->vm_file->f_dentry->d_inode; struct page *page = vmf->page; struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; - loff_t off = page->index << PAGE_CACHE_SHIFT; + loff_t off = page_offset(page); loff_t size, len; int ret; -- cgit v1.2.3 From 0b173bc4daa8f8ec03a85abf5e47b23502ff80af Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Mon, 8 Oct 2012 16:28:46 -0700 Subject: mm: kill vma flag VM_CAN_NONLINEAR Move actual pte filling for non-linear file mappings into the new special vma operation: ->remap_pages(). Filesystems must implement this method to get non-linear mapping support, if it uses filemap_fault() then generic_file_remap_pages() can be used. Now device drivers can implement this method and obtain nonlinear vma support. Signed-off-by: Konstantin Khlebnikov Cc: Alexander Viro Cc: Carsten Otte Cc: Chris Metcalf #arch/tile Cc: Cyrill Gorcunov Cc: Eric Paris Cc: H. Peter Anvin Cc: Hugh Dickins Cc: Ingo Molnar Cc: James Morris Cc: Jason Baron Cc: Kentaro Takeda Cc: Matt Helsley Cc: Nick Piggin Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Robert Richter Cc: Suresh Siddha Cc: Tetsuo Handa Cc: Venkatesh Pallipadi Acked-by: Linus Torvalds Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/staging/android/ashmem.c | 1 - fs/9p/vfs_file.c | 1 + fs/btrfs/file.c | 2 +- fs/ceph/addr.c | 2 +- fs/cifs/file.c | 1 + fs/ext4/file.c | 2 +- fs/fuse/file.c | 1 + fs/gfs2/file.c | 2 +- fs/nfs/file.c | 1 + fs/nilfs2/file.c | 2 +- fs/ocfs2/mmap.c | 2 +- fs/ubifs/file.c | 1 + fs/xfs/xfs_file.c | 2 +- include/linux/fs.h | 2 ++ include/linux/mm.h | 7 ++++--- mm/filemap.c | 2 +- mm/filemap_xip.c | 3 ++- mm/fremap.c | 14 ++++++++------ mm/mmap.c | 3 +-- mm/nommu.c | 8 ++++++++ mm/shmem.c | 3 +-- 21 files changed, 39 insertions(+), 23 deletions(-) (limited to 'fs/ceph') diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c index 94a740d2883d..634b9ae713e0 100644 --- a/drivers/staging/android/ashmem.c +++ b/drivers/staging/android/ashmem.c @@ -332,7 +332,6 @@ static int ashmem_mmap(struct file *file, struct vm_area_struct *vma) if (vma->vm_file) fput(vma->vm_file); vma->vm_file = asma->file; - vma->vm_flags |= VM_CAN_NONLINEAR; out: mutex_unlock(&ashmem_mutex); diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index dd6f7ee1e312..c2483e97beee 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -738,6 +738,7 @@ v9fs_cached_file_write(struct file *filp, const char __user * data, static const struct vm_operations_struct v9fs_file_vm_ops = { .fault = filemap_fault, .page_mkwrite = v9fs_vm_page_mkwrite, + .remap_pages = generic_file_remap_pages, }; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 5caf285c6e4d..f6b40e86121b 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1599,6 +1599,7 @@ out: static const struct vm_operations_struct btrfs_file_vm_ops = { .fault = filemap_fault, .page_mkwrite = btrfs_page_mkwrite, + .remap_pages = generic_file_remap_pages, }; static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma) @@ -1610,7 +1611,6 @@ static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma) file_accessed(filp); vma->vm_ops = &btrfs_file_vm_ops; - vma->vm_flags |= VM_CAN_NONLINEAR; return 0; } diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 22b6e4583faa..6690269f5dde 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -1224,6 +1224,7 @@ out: static struct vm_operations_struct ceph_vmops = { .fault = filemap_fault, .page_mkwrite = ceph_page_mkwrite, + .remap_pages = generic_file_remap_pages, }; int ceph_mmap(struct file *file, struct vm_area_struct *vma) @@ -1234,6 +1235,5 @@ int ceph_mmap(struct file *file, struct vm_area_struct *vma) return -ENOEXEC; file_accessed(file); vma->vm_ops = &ceph_vmops; - vma->vm_flags |= VM_CAN_NONLINEAR; return 0; } diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 7d7bbdc4c8e7..edb25b4bbb95 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -3003,6 +3003,7 @@ cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) static struct vm_operations_struct cifs_file_vm_ops = { .fault = filemap_fault, .page_mkwrite = cifs_page_mkwrite, + .remap_pages = generic_file_remap_pages, }; int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma) diff --git a/fs/ext4/file.c b/fs/ext4/file.c index ca6f07afe601..bf3966bccd34 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -207,6 +207,7 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov, static const struct vm_operations_struct ext4_file_vm_ops = { .fault = filemap_fault, .page_mkwrite = ext4_page_mkwrite, + .remap_pages = generic_file_remap_pages, }; static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma) @@ -217,7 +218,6 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma) return -ENOEXEC; file_accessed(file); vma->vm_ops = &ext4_file_vm_ops; - vma->vm_flags |= VM_CAN_NONLINEAR; return 0; } diff --git a/fs/fuse/file.c b/fs/fuse/file.c index aba15f1b7ad2..78d2837bc940 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1379,6 +1379,7 @@ static const struct vm_operations_struct fuse_file_vm_ops = { .close = fuse_vma_close, .fault = filemap_fault, .page_mkwrite = fuse_page_mkwrite, + .remap_pages = generic_file_remap_pages, }; static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma) diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 30e21997a1a1..0def0504afc1 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -492,6 +492,7 @@ out: static const struct vm_operations_struct gfs2_vm_ops = { .fault = filemap_fault, .page_mkwrite = gfs2_page_mkwrite, + .remap_pages = generic_file_remap_pages, }; /** @@ -526,7 +527,6 @@ static int gfs2_mmap(struct file *file, struct vm_area_struct *vma) return error; } vma->vm_ops = &gfs2_vm_ops; - vma->vm_flags |= VM_CAN_NONLINEAR; return 0; } diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 6a7fcab7ecb3..f692be97676d 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -578,6 +578,7 @@ out: static const struct vm_operations_struct nfs_file_vm_ops = { .fault = filemap_fault, .page_mkwrite = nfs_vm_page_mkwrite, + .remap_pages = generic_file_remap_pages, }; static int nfs_need_sync_write(struct file *filp, struct inode *inode) diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c index 5b387a4c293e..16f35f7423c5 100644 --- a/fs/nilfs2/file.c +++ b/fs/nilfs2/file.c @@ -135,13 +135,13 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) static const struct vm_operations_struct nilfs_file_vm_ops = { .fault = filemap_fault, .page_mkwrite = nilfs_page_mkwrite, + .remap_pages = generic_file_remap_pages, }; static int nilfs_file_mmap(struct file *file, struct vm_area_struct *vma) { file_accessed(file); vma->vm_ops = &nilfs_file_vm_ops; - vma->vm_flags |= VM_CAN_NONLINEAR; return 0; } diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index d150372fd81d..47a87dda54ce 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c @@ -173,6 +173,7 @@ out: static const struct vm_operations_struct ocfs2_file_vm_ops = { .fault = ocfs2_fault, .page_mkwrite = ocfs2_page_mkwrite, + .remap_pages = generic_file_remap_pages, }; int ocfs2_mmap(struct file *file, struct vm_area_struct *vma) @@ -188,7 +189,6 @@ int ocfs2_mmap(struct file *file, struct vm_area_struct *vma) ocfs2_inode_unlock(file->f_dentry->d_inode, lock_level); out: vma->vm_ops = &ocfs2_file_vm_ops; - vma->vm_flags |= VM_CAN_NONLINEAR; return 0; } diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index ff48c5a85309..5bc77817f382 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -1536,6 +1536,7 @@ out_unlock: static const struct vm_operations_struct ubifs_file_vm_ops = { .fault = filemap_fault, .page_mkwrite = ubifs_vm_page_mkwrite, + .remap_pages = generic_file_remap_pages, }; static int ubifs_file_mmap(struct file *file, struct vm_area_struct *vma) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 1eaeb8be3aae..aa473fa640a2 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -940,7 +940,6 @@ xfs_file_mmap( struct vm_area_struct *vma) { vma->vm_ops = &xfs_file_vm_ops; - vma->vm_flags |= VM_CAN_NONLINEAR; file_accessed(filp); return 0; @@ -1443,4 +1442,5 @@ const struct file_operations xfs_dir_file_operations = { static const struct vm_operations_struct xfs_file_vm_ops = { .fault = filemap_fault, .page_mkwrite = xfs_vm_page_mkwrite, + .remap_pages = generic_file_remap_pages, }; diff --git a/include/linux/fs.h b/include/linux/fs.h index ca6d8c806f47..5a8a273d5b2f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2552,6 +2552,8 @@ extern int sb_min_blocksize(struct super_block *, int); extern int generic_file_mmap(struct file *, struct vm_area_struct *); extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); +extern int generic_file_remap_pages(struct vm_area_struct *, unsigned long addr, + unsigned long size, pgoff_t pgoff); extern int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size); int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk); extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t); diff --git a/include/linux/mm.h b/include/linux/mm.h index fb0685b17914..44d3fc25f556 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -105,7 +105,6 @@ extern unsigned int kobjsize(const void *objp); #define VM_ARCH_1 0x01000000 /* Architecture-specific flag */ #define VM_NODUMP 0x04000000 /* Do not include in the core dump */ -#define VM_CAN_NONLINEAR 0x08000000 /* Has ->fault & does nonlinear pages */ #define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */ #define VM_HUGEPAGE 0x20000000 /* MADV_HUGEPAGE marked this vma */ #define VM_NOHUGEPAGE 0x40000000 /* MADV_NOHUGEPAGE marked this vma */ @@ -171,8 +170,7 @@ extern pgprot_t protection_map[16]; * of VM_FAULT_xxx flags that give details about how the fault was handled. * * pgoff should be used in favour of virtual_address, if possible. If pgoff - * is used, one may set VM_CAN_NONLINEAR in the vma->vm_flags to get nonlinear - * mapping support. + * is used, one may implement ->remap_pages to get nonlinear mapping support. */ struct vm_fault { unsigned int flags; /* FAULT_FLAG_xxx flags */ @@ -230,6 +228,9 @@ struct vm_operations_struct { int (*migrate)(struct vm_area_struct *vma, const nodemask_t *from, const nodemask_t *to, unsigned long flags); #endif + /* called by sys_remap_file_pages() to populate non-linear mapping */ + int (*remap_pages)(struct vm_area_struct *vma, unsigned long addr, + unsigned long size, pgoff_t pgoff); }; struct mmu_gather; diff --git a/mm/filemap.c b/mm/filemap.c index 384344575c37..a9827b42556e 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1737,6 +1737,7 @@ EXPORT_SYMBOL(filemap_page_mkwrite); const struct vm_operations_struct generic_file_vm_ops = { .fault = filemap_fault, .page_mkwrite = filemap_page_mkwrite, + .remap_pages = generic_file_remap_pages, }; /* This is used for a general mmap of a disk file */ @@ -1749,7 +1750,6 @@ int generic_file_mmap(struct file * file, struct vm_area_struct * vma) return -ENOEXEC; file_accessed(file); vma->vm_ops = &generic_file_vm_ops; - vma->vm_flags |= VM_CAN_NONLINEAR; return 0; } diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c index 13e013b1270c..91750227a191 100644 --- a/mm/filemap_xip.c +++ b/mm/filemap_xip.c @@ -305,6 +305,7 @@ out: static const struct vm_operations_struct xip_file_vm_ops = { .fault = xip_file_fault, .page_mkwrite = filemap_page_mkwrite, + .remap_pages = generic_file_remap_pages, }; int xip_file_mmap(struct file * file, struct vm_area_struct * vma) @@ -313,7 +314,7 @@ int xip_file_mmap(struct file * file, struct vm_area_struct * vma) file_accessed(file); vma->vm_ops = &xip_file_vm_ops; - vma->vm_flags |= VM_CAN_NONLINEAR | VM_MIXEDMAP; + vma->vm_flags |= VM_MIXEDMAP; return 0; } EXPORT_SYMBOL_GPL(xip_file_mmap); diff --git a/mm/fremap.c b/mm/fremap.c index 048659c0c03d..3d731a498788 100644 --- a/mm/fremap.c +++ b/mm/fremap.c @@ -5,6 +5,7 @@ * * started by Ingo Molnar, Copyright (C) 2002, 2003 */ +#include #include #include #include @@ -80,9 +81,10 @@ out: return err; } -static int populate_range(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long addr, unsigned long size, pgoff_t pgoff) +int generic_file_remap_pages(struct vm_area_struct *vma, unsigned long addr, + unsigned long size, pgoff_t pgoff) { + struct mm_struct *mm = vma->vm_mm; int err; do { @@ -95,9 +97,9 @@ static int populate_range(struct mm_struct *mm, struct vm_area_struct *vma, pgoff++; } while (size); - return 0; - + return 0; } +EXPORT_SYMBOL(generic_file_remap_pages); /** * sys_remap_file_pages - remap arbitrary pages of an existing VM_SHARED vma @@ -167,7 +169,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, if (vma->vm_private_data && !(vma->vm_flags & VM_NONLINEAR)) goto out; - if (!(vma->vm_flags & VM_CAN_NONLINEAR)) + if (!vma->vm_ops->remap_pages) goto out; if (start < vma->vm_start || start + size > vma->vm_end) @@ -228,7 +230,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, } mmu_notifier_invalidate_range_start(mm, start, start + size); - err = populate_range(mm, vma, start, size, pgoff); + err = vma->vm_ops->remap_pages(vma, start, size, pgoff); mmu_notifier_invalidate_range_end(mm, start, start + size); if (!err && !(flags & MAP_NONBLOCK)) { if (vma->vm_flags & VM_LOCKED) { diff --git a/mm/mmap.c b/mm/mmap.c index b0989f4d4f09..d0686d355113 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -669,8 +669,7 @@ again: remove_next = 1 + (end > next->vm_end); static inline int is_mergeable_vma(struct vm_area_struct *vma, struct file *file, unsigned long vm_flags) { - /* VM_CAN_NONLINEAR may get set later by f_op->mmap() */ - if ((vma->vm_flags ^ vm_flags) & ~VM_CAN_NONLINEAR) + if (vma->vm_flags ^ vm_flags) return 0; if (vma->vm_file != file) return 0; diff --git a/mm/nommu.c b/mm/nommu.c index dee2ff89fd58..98318dcff742 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1961,6 +1961,14 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) } EXPORT_SYMBOL(filemap_fault); +int generic_file_remap_pages(struct vm_area_struct *vma, unsigned long addr, + unsigned long size, pgoff_t pgoff) +{ + BUG(); + return 0; +} +EXPORT_SYMBOL(generic_file_remap_pages); + static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, unsigned long addr, void *buf, int len, int write) { diff --git a/mm/shmem.c b/mm/shmem.c index d3752110c8c7..cc12072f8787 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1339,7 +1339,6 @@ static int shmem_mmap(struct file *file, struct vm_area_struct *vma) { file_accessed(file); vma->vm_ops = &shmem_vm_ops; - vma->vm_flags |= VM_CAN_NONLINEAR; return 0; } @@ -2643,6 +2642,7 @@ static const struct vm_operations_struct shmem_vm_ops = { .set_policy = shmem_set_policy, .get_policy = shmem_get_policy, #endif + .remap_pages = generic_file_remap_pages, }; static struct dentry *shmem_mount(struct file_system_type *fs_type, @@ -2836,7 +2836,6 @@ int shmem_zero_setup(struct vm_area_struct *vma) fput(vma->vm_file); vma->vm_file = file; vma->vm_ops = &shmem_vm_ops; - vma->vm_flags |= VM_CAN_NONLINEAR; return 0; } -- cgit v1.2.3