From 2ae409dc6a907e80f4cd32ad4482ef52441e3147 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 11 Jul 2017 16:20:05 +0800 Subject: ceph: remove unused cap_release_safety mount option Signed-off-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- include/linux/ceph/libceph.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 8a79587e1317..dca30ac9bd34 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -93,7 +93,6 @@ struct ceph_options { #define CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT 5 /* cap release delay */ #define CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT 60 /* cap release delay */ -#define CEPH_CAP_RELEASE_SAFETY_DEFAULT (CEPH_CAPS_PER_RELEASE * 4) /* mount state */ enum { -- cgit v1.2.3 From 4214fb158cc423ac31b841000e219855be055388 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 11 Jul 2017 18:49:44 +0800 Subject: ceph: validate correctness of some mount options Signed-off-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/super.c | 21 ++++++++++++++------- fs/ceph/super.h | 9 +++++++++ include/linux/ceph/libceph.h | 10 ---------- 3 files changed, 23 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/fs/ceph/super.c b/fs/ceph/super.c index caf9801712ca..1deb8810d7c7 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -243,21 +243,33 @@ static int parse_fsopt_token(char *c, void *private) fsopt->rsize = ALIGN(intval, PAGE_SIZE); break; case Opt_rasize: - fsopt->rasize = intval; + if (intval < 0) + return -EINVAL; + fsopt->rasize = ALIGN(intval + PAGE_SIZE - 1, PAGE_SIZE); break; case Opt_caps_wanted_delay_min: + if (intval < 1) + return -EINVAL; fsopt->caps_wanted_delay_min = intval; break; case Opt_caps_wanted_delay_max: + if (intval < 1) + return -EINVAL; fsopt->caps_wanted_delay_max = intval; break; case Opt_readdir_max_entries: + if (intval < 1) + return -EINVAL; fsopt->max_readdir = intval; break; case Opt_readdir_max_bytes: + if (intval < PAGE_SIZE && intval != 0) + return -EINVAL; fsopt->max_readdir_bytes = intval; break; case Opt_congestion_kb: + if (intval < 1024) /* at least 1M */ + return -EINVAL; fsopt->congestion_kb = intval; break; case Opt_dirstat: @@ -946,12 +958,7 @@ static int ceph_setup_bdi(struct super_block *sb, struct ceph_fs_client *fsc) return err; /* set ra_pages based on rasize mount option? */ - if (fsc->mount_options->rasize >= PAGE_SIZE) - sb->s_bdi->ra_pages = - (fsc->mount_options->rasize + PAGE_SIZE - 1) - >> PAGE_SHIFT; - else - sb->s_bdi->ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_SIZE; + sb->s_bdi->ra_pages = fsc->mount_options->rasize >> PAGE_SHIFT; /* set io_pages based on max osd read size */ sb->s_bdi->io_pages = fsc->mount_options->rsize >> PAGE_SHIFT; diff --git a/fs/ceph/super.h b/fs/ceph/super.h index eed2a67d8e52..279a2f401cf5 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -56,6 +56,15 @@ #define CEPH_MAX_READDIR_BYTES_DEFAULT (512*1024) #define CEPH_SNAPDIRNAME_DEFAULT ".snap" +/* + * Delay telling the MDS we no longer want caps, in case we reopen + * the file. Delay a minimum amount of time, even if we send a cap + * message for some other reason. Otherwise, take the oppotunity to + * update the mds to avoid sending another message later. + */ +#define CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT 5 /* cap release delay */ +#define CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT 60 /* cap release delay */ + struct ceph_mount_options { int flags; int sb_flags; diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index dca30ac9bd34..4c846aabd9f6 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -84,16 +84,6 @@ struct ceph_options { #define CEPH_AUTH_NAME_DEFAULT "guest" -/* - * Delay telling the MDS we no longer want caps, in case we reopen - * the file. Delay a minimum amount of time, even if we send a cap - * message for some other reason. Otherwise, take the oppotunity to - * update the mds to avoid sending another message later. - */ -#define CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT 5 /* cap release delay */ -#define CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT 60 /* cap release delay */ - - /* mount state */ enum { CEPH_MOUNT_MOUNTING, -- cgit v1.2.3 From 3fb99d483e614bc3834784c7a686572c7970bb92 Mon Sep 17 00:00:00 2001 From: Yanhu Cao Date: Fri, 21 Jul 2017 17:20:10 +0800 Subject: ceph: nuke startsync op startsync is a no-op, has been for years. Remove it. Link: http://tracker.ceph.com/issues/20604 Signed-off-by: Yanhu Cao Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/addr.c | 21 +++------------------ fs/ceph/file.c | 5 +---- include/linux/ceph/rados.h | 1 - net/ceph/osd_client.c | 5 ----- 4 files changed, 4 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 149b10063be8..825931516623 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -752,21 +752,11 @@ static int ceph_writepages_start(struct address_space *mapping, int rc = 0; unsigned int wsize = i_blocksize(inode); struct ceph_osd_request *req = NULL; - int do_sync = 0; loff_t snap_size, i_size; u64 truncate_size; u32 truncate_seq; - /* - * Include a 'sync' in the OSD request if this is a data - * integrity write (e.g., O_SYNC write or fsync()), or if our - * cap is being revoked. - */ - if ((wbc->sync_mode == WB_SYNC_ALL) || - ceph_caps_revoking(ci, CEPH_CAP_FILE_BUFFER)) - do_sync = 1; - dout("writepages_start %p dosync=%d (mode=%s)\n", - inode, do_sync, + dout("writepages_start %p (mode=%s)\n", inode, wbc->sync_mode == WB_SYNC_NONE ? "NONE" : (wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD")); @@ -936,7 +926,7 @@ get_more_pages: break; } - num_ops = 1 + do_sync; + num_ops = 1; strip_unit_end = page->index + ((len - 1) >> PAGE_SHIFT); @@ -1042,7 +1032,7 @@ new_request: for (i = 0; i < locked_pages; i++) { u64 cur_offset = page_offset(pages[i]); if (offset + len != cur_offset) { - if (op_idx + do_sync + 1 == req->r_num_ops) + if (op_idx + 1 == req->r_num_ops) break; osd_req_op_extent_dup_last(req, op_idx, cur_offset - offset); @@ -1079,17 +1069,12 @@ new_request: 0, !!pool, false); osd_req_op_extent_update(req, op_idx, len); - if (do_sync) { - op_idx++; - osd_req_op_init(req, op_idx, CEPH_OSD_OP_STARTSYNC, 0); - } BUG_ON(op_idx + 1 != req->r_num_ops); pool = NULL; if (i < locked_pages) { BUG_ON(num_ops <= req->r_num_ops); num_ops -= req->r_num_ops; - num_ops += do_sync; locked_pages -= i; /* allocate new pages array for next request */ diff --git a/fs/ceph/file.c b/fs/ceph/file.c index a39ff54cb372..0e8986c69639 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -800,7 +800,6 @@ static void ceph_aio_retry_work(struct work_struct *work) } req->r_ops[0] = orig_req->r_ops[0]; - osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC, 0); req->r_mtime = aio_req->mtime; req->r_data_offset = req->r_ops[0].extent.offset; @@ -874,8 +873,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, vino = ceph_vino(inode); req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, vino, pos, &size, 0, - /*include a 'startsync' command*/ - write ? 2 : 1, + 1, write ? CEPH_OSD_OP_WRITE : CEPH_OSD_OP_READ, flags, snapc, @@ -927,7 +925,6 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, truncate_inode_pages_range(inode->i_mapping, pos, (pos+len) | (PAGE_SIZE - 1)); - osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC, 0); req->r_mtime = mtime; } diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index b8281feda9c7..01408841c9c4 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h @@ -230,7 +230,6 @@ extern const char *ceph_osd_state_name(int s); \ /* fancy write */ \ f(APPEND, __CEPH_OSD_OP(WR, DATA, 6), "append") \ - f(STARTSYNC, __CEPH_OSD_OP(WR, DATA, 7), "startsync") \ f(SETTRUNC, __CEPH_OSD_OP(WR, DATA, 8), "settrunc") \ f(TRIMTRUNC, __CEPH_OSD_OP(WR, DATA, 9), "trimtrunc") \ \ diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index dcfbdd74dfd1..e02f01f534e2 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -863,8 +863,6 @@ static u32 osd_req_encode_op(struct ceph_osd_op *dst, dst->cls.method_len = src->cls.method_len; dst->cls.indata_len = cpu_to_le32(src->cls.indata_len); break; - case CEPH_OSD_OP_STARTSYNC: - break; case CEPH_OSD_OP_WATCH: dst->watch.cookie = cpu_to_le64(src->watch.cookie); dst->watch.ver = cpu_to_le64(0); @@ -916,9 +914,6 @@ static u32 osd_req_encode_op(struct ceph_osd_op *dst, * if the file was recently truncated, we include information about its * old and new size so that the object can be updated appropriately. (we * avoid synchronously deleting truncated objects because it's slow.) - * - * if @do_sync, include a 'startsync' command so that the osd will flush - * data quickly. */ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, struct ceph_file_layout *layout, -- cgit v1.2.3 From 95569713afc0b53ded1bba67834e0be24529a8c9 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Mon, 24 Jul 2017 17:59:39 +0800 Subject: ceph: new cap message flags indicate if there is pending capsnap These flags tell mds if there is pending capsnap explicitly. Without this explicit notification, mds can only conclude if client has pending capsnap. The method mds use is inefficient and error-prone. Signed-off-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/caps.c | 5 ++++- include/linux/ceph/ceph_fs.h | 4 +++- 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 7007ae2a5ad2..b675c004f6a7 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1248,7 +1248,10 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, arg.mode = inode->i_mode; arg.inline_data = ci->i_inline_version != CEPH_INLINE_NONE; - arg.flags = 0; + if (list_empty(&ci->i_cap_snaps)) + arg.flags = CEPH_CLIENT_CAPS_NO_CAPSNAP; + else + arg.flags = CEPH_CLIENT_CAPS_PENDING_CAPSNAP; if (sync) arg.flags |= CEPH_CLIENT_CAPS_SYNC; diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index edf5b04b918a..d1642a4b4c5e 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -669,7 +669,9 @@ enum { extern const char *ceph_cap_op_name(int op); /* flags field in client cap messages (version >= 10) */ -#define CEPH_CLIENT_CAPS_SYNC (0x1) +#define CEPH_CLIENT_CAPS_SYNC (1<<0) +#define CEPH_CLIENT_CAPS_NO_CAPSNAP (1<<1) +#define CEPH_CLIENT_CAPS_PENDING_CAPSNAP (1<<2); /* * caps message, used for capability callbacks, acks, requests, etc. -- cgit v1.2.3 From 06d74376c8af32f5b8d777a943aa4dc99165088b Mon Sep 17 00:00:00 2001 From: Douglas Fuller Date: Wed, 16 Aug 2017 10:19:27 -0400 Subject: ceph: more accurate statfs Improve accuracy of statfs reporting for Ceph filesystems comprising exactly one data pool. In this case, the Ceph monitor can now report the space usage for the single data pool instead of the global data for the entire Ceph cluster. Include support for this message in mon_client and leverage it in ceph/super. Signed-off-by: Douglas Fuller Reviewed-by: Yan, Zheng Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- fs/ceph/super.c | 9 ++++++++- include/linux/ceph/ceph_fs.h | 2 ++ include/linux/ceph/mon_client.h | 4 ++-- net/ceph/mon_client.c | 6 +++++- 4 files changed, 17 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 1deb8810d7c7..324d29ecbe0b 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -49,9 +49,16 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) struct ceph_statfs st; u64 fsid; int err; + u64 data_pool; + + if (fsc->mdsc->mdsmap->m_num_data_pg_pools == 1) { + data_pool = fsc->mdsc->mdsmap->m_data_pg_pools[0]; + } else { + data_pool = CEPH_NOPOOL; + } dout("statfs\n"); - err = ceph_monc_do_statfs(&fsc->client->monc, &st); + err = ceph_monc_do_statfs(&fsc->client->monc, data_pool, &st); if (err < 0) return err; diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index d1642a4b4c5e..b422170b791a 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -167,6 +167,8 @@ struct ceph_mon_request_header { struct ceph_mon_statfs { struct ceph_mon_request_header monhdr; struct ceph_fsid fsid; + __u8 contains_data_pool; + __le64 data_pool; } __attribute__ ((packed)); struct ceph_statfs { diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h index d5a3ecea578d..0fa990bf867a 100644 --- a/include/linux/ceph/mon_client.h +++ b/include/linux/ceph/mon_client.h @@ -133,8 +133,8 @@ void ceph_monc_renew_subs(struct ceph_mon_client *monc); extern int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch, unsigned long timeout); -extern int ceph_monc_do_statfs(struct ceph_mon_client *monc, - struct ceph_statfs *buf); +int ceph_monc_do_statfs(struct ceph_mon_client *monc, u64 data_pool, + struct ceph_statfs *buf); int ceph_monc_get_version(struct ceph_mon_client *monc, const char *what, u64 *newest); diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index 875675765531..63edc6e5f026 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -676,7 +676,8 @@ bad: /* * Do a synchronous statfs(). */ -int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf) +int ceph_monc_do_statfs(struct ceph_mon_client *monc, u64 data_pool, + struct ceph_statfs *buf) { struct ceph_mon_generic_request *req; struct ceph_mon_statfs *h; @@ -696,6 +697,7 @@ int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf) goto out; req->u.st = buf; + req->request->hdr.version = cpu_to_le16(2); mutex_lock(&monc->mutex); register_generic_request(req); @@ -705,6 +707,8 @@ int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf) h->monhdr.session_mon = cpu_to_le16(-1); h->monhdr.session_mon_tid = 0; h->fsid = monc->monmap->fsid; + h->contains_data_pool = (data_pool != CEPH_NOPOOL); + h->data_pool = cpu_to_le64(data_pool); send_generic_request(monc, req); mutex_unlock(&monc->mutex); -- cgit v1.2.3