From 2b777c9dd9ebbb2f8b6818d454cc5e6d7c1e3c8b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 3 Apr 2014 22:31:22 -0400 Subject: ceph_sync_read: stop poking into iov_iter guts Signed-off-by: Al Viro --- include/linux/ceph/libceph.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux/ceph') diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 2f49aa4c4f7f..279b0afac1c1 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -222,8 +222,6 @@ extern void ceph_copy_to_page_vector(struct page **pages, extern void ceph_copy_from_page_vector(struct page **pages, void *data, loff_t off, size_t len); -extern int ceph_copy_page_vector_to_user(struct page **pages, void __user *data, - loff_t off, size_t len); extern void ceph_zero_page_vector_range(int off, int len, struct page **pages); -- cgit v1.2.3 From f98a128a55ff85d0087de89f304f10bd75e792aa Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Thu, 17 Apr 2014 08:55:50 +0800 Subject: ceph: update inode fields according to issued caps Cap message and request reply from non-auth MDS may carry stale information (corresponding locks are in LOCK states) even they have the newest inode version. So client should update inode fields according to issued caps. Signed-off-by: Yan, Zheng --- fs/ceph/caps.c | 58 ++++++++++++++++++++---------------- fs/ceph/inode.c | 70 ++++++++++++++++++++++++-------------------- include/linux/ceph/ceph_fs.h | 2 ++ 3 files changed, 73 insertions(+), 57 deletions(-) (limited to 'include/linux/ceph') diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index de39a03f5b71..5f6d24ede794 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2476,7 +2476,8 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, __check_cap_issue(ci, cap, newcaps); - if ((issued & CEPH_CAP_AUTH_EXCL) == 0) { + if ((newcaps & CEPH_CAP_AUTH_SHARED) && + (issued & CEPH_CAP_AUTH_EXCL) == 0) { inode->i_mode = le32_to_cpu(grant->mode); inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(grant->uid)); inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(grant->gid)); @@ -2485,7 +2486,8 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, from_kgid(&init_user_ns, inode->i_gid)); } - if ((issued & CEPH_CAP_LINK_EXCL) == 0) { + if ((newcaps & CEPH_CAP_AUTH_SHARED) && + (issued & CEPH_CAP_LINK_EXCL) == 0) { set_nlink(inode, le32_to_cpu(grant->nlink)); if (inode->i_nlink == 0 && (newcaps & (CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL))) @@ -2512,31 +2514,35 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, if ((issued & CEPH_CAP_FILE_CACHE) && ci->i_rdcache_gen > 1) queue_revalidate = 1; - /* size/ctime/mtime/atime? */ - queue_trunc = ceph_fill_file_size(inode, issued, - le32_to_cpu(grant->truncate_seq), - le64_to_cpu(grant->truncate_size), - size); - ceph_decode_timespec(&mtime, &grant->mtime); - ceph_decode_timespec(&atime, &grant->atime); - ceph_decode_timespec(&ctime, &grant->ctime); - ceph_fill_file_time(inode, issued, - le32_to_cpu(grant->time_warp_seq), &ctime, &mtime, - &atime); - - - /* file layout may have changed */ - ci->i_layout = grant->layout; - - /* max size increase? */ - if (ci->i_auth_cap == cap && max_size != ci->i_max_size) { - dout("max_size %lld -> %llu\n", ci->i_max_size, max_size); - ci->i_max_size = max_size; - if (max_size >= ci->i_wanted_max_size) { - ci->i_wanted_max_size = 0; /* reset */ - ci->i_requested_max_size = 0; + if (newcaps & CEPH_CAP_ANY_RD) { + /* ctime/mtime/atime? */ + ceph_decode_timespec(&mtime, &grant->mtime); + ceph_decode_timespec(&atime, &grant->atime); + ceph_decode_timespec(&ctime, &grant->ctime); + ceph_fill_file_time(inode, issued, + le32_to_cpu(grant->time_warp_seq), + &ctime, &mtime, &atime); + } + + if (newcaps & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR)) { + /* file layout may have changed */ + ci->i_layout = grant->layout; + /* size/truncate_seq? */ + queue_trunc = ceph_fill_file_size(inode, issued, + le32_to_cpu(grant->truncate_seq), + le64_to_cpu(grant->truncate_size), + size); + /* max size increase? */ + if (ci->i_auth_cap == cap && max_size != ci->i_max_size) { + dout("max_size %lld -> %llu\n", + ci->i_max_size, max_size); + ci->i_max_size = max_size; + if (max_size >= ci->i_wanted_max_size) { + ci->i_wanted_max_size = 0; /* reset */ + ci->i_requested_max_size = 0; + } + wake = 1; } - wake = 1; } /* check cap bits */ diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 233c6f96910a..f9e7399877d6 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -585,14 +585,15 @@ static int fill_inode(struct inode *inode, struct ceph_mds_reply_inode *info = iinfo->in; struct ceph_inode_info *ci = ceph_inode(inode); int i; - int issued = 0, implemented; + int issued = 0, implemented, new_issued; struct timespec mtime, atime, ctime; u32 nsplits; struct ceph_inode_frag *frag; struct rb_node *rb_node; struct ceph_buffer *xattr_blob = NULL; int err = 0; - int queue_trunc = 0; + bool queue_trunc = false; + bool new_version = false; dout("fill_inode %p ino %llx.%llx v %llu had %llu\n", inode, ceph_vinop(inode), le64_to_cpu(info->version), @@ -623,19 +624,23 @@ static int fill_inode(struct inode *inode, * 3 2 skip * 3 3 update */ - if (le64_to_cpu(info->version) > 0 && - (ci->i_version & ~1) >= le64_to_cpu(info->version)) - goto no_change; - + if (ci->i_version == 0 || + ((info->cap.flags & CEPH_CAP_FLAG_AUTH) && + le64_to_cpu(info->version) > (ci->i_version & ~1))) + new_version = true; + issued = __ceph_caps_issued(ci, &implemented); issued |= implemented | __ceph_caps_dirty(ci); + new_issued = ~issued & le32_to_cpu(info->cap.caps); /* update inode */ ci->i_version = le64_to_cpu(info->version); inode->i_version++; inode->i_rdev = le32_to_cpu(info->rdev); + inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; - if ((issued & CEPH_CAP_AUTH_EXCL) == 0) { + if ((new_version || (new_issued & CEPH_CAP_AUTH_SHARED)) && + (issued & CEPH_CAP_AUTH_EXCL) == 0) { inode->i_mode = le32_to_cpu(info->mode); inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(info->uid)); inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(info->gid)); @@ -644,23 +649,35 @@ static int fill_inode(struct inode *inode, from_kgid(&init_user_ns, inode->i_gid)); } - if ((issued & CEPH_CAP_LINK_EXCL) == 0) + if ((new_version || (new_issued & CEPH_CAP_LINK_SHARED)) && + (issued & CEPH_CAP_LINK_EXCL) == 0) set_nlink(inode, le32_to_cpu(info->nlink)); - /* be careful with mtime, atime, size */ - ceph_decode_timespec(&atime, &info->atime); - ceph_decode_timespec(&mtime, &info->mtime); - ceph_decode_timespec(&ctime, &info->ctime); - queue_trunc = ceph_fill_file_size(inode, issued, - le32_to_cpu(info->truncate_seq), - le64_to_cpu(info->truncate_size), - le64_to_cpu(info->size)); - ceph_fill_file_time(inode, issued, - le32_to_cpu(info->time_warp_seq), - &ctime, &mtime, &atime); - - ci->i_layout = info->layout; - inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; + if (new_version || (new_issued & CEPH_CAP_ANY_RD)) { + /* be careful with mtime, atime, size */ + ceph_decode_timespec(&atime, &info->atime); + ceph_decode_timespec(&mtime, &info->mtime); + ceph_decode_timespec(&ctime, &info->ctime); + ceph_fill_file_time(inode, issued, + le32_to_cpu(info->time_warp_seq), + &ctime, &mtime, &atime); + } + + if (new_version || + (new_issued & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR))) { + ci->i_layout = info->layout; + queue_trunc = ceph_fill_file_size(inode, issued, + le32_to_cpu(info->truncate_seq), + le64_to_cpu(info->truncate_size), + le64_to_cpu(info->size)); + /* only update max_size on auth cap */ + if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) && + ci->i_max_size != le64_to_cpu(info->max_size)) { + dout("max_size %lld -> %llu\n", ci->i_max_size, + le64_to_cpu(info->max_size)); + ci->i_max_size = le64_to_cpu(info->max_size); + } + } /* xattrs */ /* note that if i_xattrs.len <= 4, i_xattrs.data will still be NULL. */ @@ -745,15 +762,6 @@ static int fill_inode(struct inode *inode, dout(" marking %p complete (empty)\n", inode); __ceph_dir_set_complete(ci, atomic_read(&ci->i_release_count)); } -no_change: - /* only update max_size on auth cap */ - if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) && - ci->i_max_size != le64_to_cpu(info->max_size)) { - dout("max_size %lld -> %llu\n", ci->i_max_size, - le64_to_cpu(info->max_size)); - ci->i_max_size = le64_to_cpu(info->max_size); - } - spin_unlock(&ci->i_ceph_lock); /* queue truncate if we saw i_size decrease */ diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index 5f6db18d72e8..3c97d5e9b951 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -625,6 +625,8 @@ int ceph_flags_to_mode(int flags); CEPH_CAP_LINK_EXCL | \ CEPH_CAP_XATTR_EXCL | \ CEPH_CAP_FILE_EXCL) +#define CEPH_CAP_ANY_FILE_RD (CEPH_CAP_FILE_RD | CEPH_CAP_FILE_CACHE | \ + CEPH_CAP_FILE_SHARED) #define CEPH_CAP_ANY_FILE_WR (CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER | \ CEPH_CAP_FILE_EXCL) #define CEPH_CAP_ANY_WR (CEPH_CAP_ANY_EXCL | CEPH_CAP_ANY_FILE_WR) -- cgit v1.2.3 From 513a8243d67f8e8d27f2883bd2f18bc87c7ca376 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 13 May 2014 11:19:26 +0400 Subject: libceph: mon_get_version request infrastructure Add support for mon_get_version requests to libceph. This reuses much of the ceph_mon_generic_request infrastructure, with one exception. Older OSDs don't set mon_get_version reply hdr->tid even if the original request had a non-zero tid, which makes it impossible to lookup ceph_mon_generic_request contexts by tid in get_generic_reply() for such replies. As a workaround, we allocate a reply message on the reply path. This can probably interfere with revoke, but I don't see a better way. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- include/linux/ceph/mon_client.h | 9 ++- net/ceph/ceph_common.c | 2 + net/ceph/debugfs.c | 2 + net/ceph/mon_client.c | 123 ++++++++++++++++++++++++++++++++++++++-- 4 files changed, 128 insertions(+), 8 deletions(-) (limited to 'include/linux/ceph') diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h index a486f390dfbe..585ef9450e9d 100644 --- a/include/linux/ceph/mon_client.h +++ b/include/linux/ceph/mon_client.h @@ -40,9 +40,9 @@ struct ceph_mon_request { }; /* - * ceph_mon_generic_request is being used for the statfs and poolop requests - * which are bening done a bit differently because we need to get data back - * to the caller + * ceph_mon_generic_request is being used for the statfs, poolop and + * mon_get_version requests which are being done a bit differently + * because we need to get data back to the caller */ struct ceph_mon_generic_request { struct kref kref; @@ -108,6 +108,9 @@ extern void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc); extern int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf); +extern int ceph_monc_do_get_version(struct ceph_mon_client *monc, + const char *what, u64 *newest); + extern int ceph_monc_open_session(struct ceph_mon_client *monc); extern int ceph_monc_validate_auth(struct ceph_mon_client *monc); diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 67d7721d237e..1675021d8c12 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -72,6 +72,8 @@ const char *ceph_msg_type_name(int type) case CEPH_MSG_MON_SUBSCRIBE_ACK: return "mon_subscribe_ack"; case CEPH_MSG_STATFS: return "statfs"; case CEPH_MSG_STATFS_REPLY: return "statfs_reply"; + case CEPH_MSG_MON_GET_VERSION: return "mon_get_version"; + case CEPH_MSG_MON_GET_VERSION_REPLY: return "mon_get_version_reply"; case CEPH_MSG_MDS_MAP: return "mds_map"; case CEPH_MSG_CLIENT_SESSION: return "client_session"; case CEPH_MSG_CLIENT_RECONNECT: return "client_reconnect"; diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c index 8903dcee8d8e..d1a62c69a9f4 100644 --- a/net/ceph/debugfs.c +++ b/net/ceph/debugfs.c @@ -129,6 +129,8 @@ static int monc_show(struct seq_file *s, void *p) seq_printf(s, "%llu statfs\n", req->tid); else if (op == CEPH_MSG_POOLOP) seq_printf(s, "%llu poolop\n", req->tid); + else if (op == CEPH_MSG_MON_GET_VERSION) + seq_printf(s, "%llu mon_get_version", req->tid); else seq_printf(s, "%llu unknown\n", req->tid); } diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index 2ac9ef35110b..11d8d2f2708a 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -477,14 +477,13 @@ static struct ceph_msg *get_generic_reply(struct ceph_connection *con, return m; } -static int do_generic_request(struct ceph_mon_client *monc, - struct ceph_mon_generic_request *req) +static int __do_generic_request(struct ceph_mon_client *monc, u64 tid, + struct ceph_mon_generic_request *req) { int err; /* register request */ - mutex_lock(&monc->mutex); - req->tid = ++monc->last_tid; + req->tid = tid != 0 ? tid : ++monc->last_tid; req->request->hdr.tid = cpu_to_le64(req->tid); __insert_generic_request(monc, req); monc->num_generic_requests++; @@ -496,13 +495,24 @@ static int do_generic_request(struct ceph_mon_client *monc, mutex_lock(&monc->mutex); rb_erase(&req->node, &monc->generic_request_tree); monc->num_generic_requests--; - mutex_unlock(&monc->mutex); if (!err) err = req->result; return err; } +static int do_generic_request(struct ceph_mon_client *monc, + struct ceph_mon_generic_request *req) +{ + int err; + + mutex_lock(&monc->mutex); + err = __do_generic_request(monc, 0, req); + mutex_unlock(&monc->mutex); + + return err; +} + /* * statfs */ @@ -579,6 +589,96 @@ out: } EXPORT_SYMBOL(ceph_monc_do_statfs); +static void handle_get_version_reply(struct ceph_mon_client *monc, + struct ceph_msg *msg) +{ + struct ceph_mon_generic_request *req; + u64 tid = le64_to_cpu(msg->hdr.tid); + void *p = msg->front.iov_base; + void *end = p + msg->front_alloc_len; + u64 handle; + + dout("%s %p tid %llu\n", __func__, msg, tid); + + ceph_decode_need(&p, end, 2*sizeof(u64), bad); + handle = ceph_decode_64(&p); + if (tid != 0 && tid != handle) + goto bad; + + mutex_lock(&monc->mutex); + req = __lookup_generic_req(monc, handle); + if (req) { + *(u64 *)req->buf = ceph_decode_64(&p); + req->result = 0; + get_generic_request(req); + } + mutex_unlock(&monc->mutex); + if (req) { + complete_all(&req->completion); + put_generic_request(req); + } + + return; +bad: + pr_err("corrupt mon_get_version reply\n"); + ceph_msg_dump(msg); +} + +/* + * Send MMonGetVersion and wait for the reply. + * + * @what: one of "mdsmap", "osdmap" or "monmap" + */ +int ceph_monc_do_get_version(struct ceph_mon_client *monc, const char *what, + u64 *newest) +{ + struct ceph_mon_generic_request *req; + void *p, *end; + u64 tid; + int err; + + req = kzalloc(sizeof(*req), GFP_NOFS); + if (!req) + return -ENOMEM; + + kref_init(&req->kref); + req->buf = newest; + req->buf_len = sizeof(*newest); + init_completion(&req->completion); + + req->request = ceph_msg_new(CEPH_MSG_MON_GET_VERSION, + sizeof(u64) + sizeof(u32) + strlen(what), + GFP_NOFS, true); + if (!req->request) { + err = -ENOMEM; + goto out; + } + + req->reply = ceph_msg_new(CEPH_MSG_MON_GET_VERSION_REPLY, 1024, + GFP_NOFS, true); + if (!req->reply) { + err = -ENOMEM; + goto out; + } + + p = req->request->front.iov_base; + end = p + req->request->front_alloc_len; + + /* fill out request */ + mutex_lock(&monc->mutex); + tid = ++monc->last_tid; + ceph_encode_64(&p, tid); /* handle */ + ceph_encode_string(&p, end, what, strlen(what)); + + err = __do_generic_request(monc, tid, req); + + mutex_unlock(&monc->mutex); +out: + kref_put(&req->kref, release_generic_request); + return err; +} +EXPORT_SYMBOL(ceph_monc_do_get_version); + /* * pool ops */ @@ -981,6 +1081,10 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg) handle_statfs_reply(monc, msg); break; + case CEPH_MSG_MON_GET_VERSION_REPLY: + handle_get_version_reply(monc, msg); + break; + case CEPH_MSG_POOLOP_REPLY: handle_poolop_reply(monc, msg); break; @@ -1029,6 +1133,15 @@ static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con, case CEPH_MSG_AUTH_REPLY: m = ceph_msg_get(monc->m_auth_reply); break; + case CEPH_MSG_MON_GET_VERSION_REPLY: + if (le64_to_cpu(hdr->tid) != 0) + return get_generic_reply(con, hdr, skip); + + /* + * Older OSDs don't set reply tid even if the orignal + * request had a non-zero tid. Workaround this weirdness + * by falling through to the allocate case. + */ case CEPH_MSG_MON_MAP: case CEPH_MSG_MDS_MAP: case CEPH_MSG_OSD_MAP: -- cgit v1.2.3 From 6044cde6f2a94d88142d4401624152a741866338 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 13 May 2014 11:19:27 +0400 Subject: libceph: add ceph_monc_wait_osdmap() Add ceph_monc_wait_osdmap(), which will block until the osdmap with the specified epoch is received or timeout occurs. Export both of these as they are going to be needed by rbd. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- include/linux/ceph/mon_client.h | 2 ++ net/ceph/mon_client.c | 27 +++++++++++++++++++++++++++ 2 files changed, 29 insertions(+) (limited to 'include/linux/ceph') diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h index 585ef9450e9d..deb47e45ac7c 100644 --- a/include/linux/ceph/mon_client.h +++ b/include/linux/ceph/mon_client.h @@ -104,6 +104,8 @@ extern int ceph_monc_got_mdsmap(struct ceph_mon_client *monc, u32 have); extern int ceph_monc_got_osdmap(struct ceph_mon_client *monc, u32 have); extern void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc); +extern int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch, + unsigned long timeout); extern int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf); diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index 11d8d2f2708a..067d3af2eaf6 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -296,6 +296,33 @@ void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc) __send_subscribe(monc); mutex_unlock(&monc->mutex); } +EXPORT_SYMBOL(ceph_monc_request_next_osdmap); + +int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch, + unsigned long timeout) +{ + unsigned long started = jiffies; + int ret; + + mutex_lock(&monc->mutex); + while (monc->have_osdmap < epoch) { + mutex_unlock(&monc->mutex); + + if (timeout != 0 && time_after_eq(jiffies, started + timeout)) + return -ETIMEDOUT; + + ret = wait_event_interruptible_timeout(monc->client->auth_wq, + monc->have_osdmap >= epoch, timeout); + if (ret < 0) + return ret; + + mutex_lock(&monc->mutex); + } + + mutex_unlock(&monc->mutex); + return 0; +} +EXPORT_SYMBOL(ceph_monc_wait_osdmap); /* * -- cgit v1.2.3