From c6bcda6f525129b1df169f77d96a4b0972e1ecb1 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Fri, 11 Apr 2014 10:18:07 +0800 Subject: ceph: queue vmtruncate if necessary when handing cap grant/revoke cap grant/revoke message from non-auth MDS can update inode's size and truncate_seq/truncate_size. (the message arrives before auth MDS's cap trunc message) Signed-off-by: Yan, Zheng --- fs/ceph/caps.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) (limited to 'fs/ceph/caps.c') diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index c561b628ebce..de39a03f5b71 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2418,11 +2418,12 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, u64 max_size = le64_to_cpu(grant->max_size); struct timespec mtime, atime, ctime; int check_caps = 0; - int wake = 0; - int writeback = 0; - int queue_invalidate = 0; - int deleted_inode = 0; - int queue_revalidate = 0; + bool wake = 0; + bool writeback = 0; + bool queue_trunc = 0; + bool queue_invalidate = 0; + bool queue_revalidate = 0; + bool deleted_inode = 0; dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n", inode, cap, mds, seq, ceph_cap_string(newcaps)); @@ -2512,9 +2513,10 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, queue_revalidate = 1; /* size/ctime/mtime/atime? */ - ceph_fill_file_size(inode, issued, - le32_to_cpu(grant->truncate_seq), - le64_to_cpu(grant->truncate_size), size); + queue_trunc = ceph_fill_file_size(inode, issued, + le32_to_cpu(grant->truncate_seq), + le64_to_cpu(grant->truncate_size), + size); ceph_decode_timespec(&mtime, &grant->mtime); ceph_decode_timespec(&atime, &grant->atime); ceph_decode_timespec(&ctime, &grant->ctime); @@ -2595,6 +2597,12 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, spin_unlock(&ci->i_ceph_lock); + if (queue_trunc) { + ceph_queue_vmtruncate(inode); + ceph_queue_revalidate(inode); + } else if (queue_revalidate) + ceph_queue_revalidate(inode); + if (writeback) /* * queue inode for writeback: we can't actually call @@ -2606,8 +2614,6 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, ceph_queue_invalidate(inode); if (deleted_inode) invalidate_aliases(inode); - if (queue_revalidate) - ceph_queue_revalidate(inode); if (wake) wake_up_all(&ci->i_cap_wq); -- cgit v1.2.3 From f98a128a55ff85d0087de89f304f10bd75e792aa Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Thu, 17 Apr 2014 08:55:50 +0800 Subject: ceph: update inode fields according to issued caps Cap message and request reply from non-auth MDS may carry stale information (corresponding locks are in LOCK states) even they have the newest inode version. So client should update inode fields according to issued caps. Signed-off-by: Yan, Zheng --- fs/ceph/caps.c | 58 ++++++++++++++++++++---------------- fs/ceph/inode.c | 70 ++++++++++++++++++++++++-------------------- include/linux/ceph/ceph_fs.h | 2 ++ 3 files changed, 73 insertions(+), 57 deletions(-) (limited to 'fs/ceph/caps.c') diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index de39a03f5b71..5f6d24ede794 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2476,7 +2476,8 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, __check_cap_issue(ci, cap, newcaps); - if ((issued & CEPH_CAP_AUTH_EXCL) == 0) { + if ((newcaps & CEPH_CAP_AUTH_SHARED) && + (issued & CEPH_CAP_AUTH_EXCL) == 0) { inode->i_mode = le32_to_cpu(grant->mode); inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(grant->uid)); inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(grant->gid)); @@ -2485,7 +2486,8 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, from_kgid(&init_user_ns, inode->i_gid)); } - if ((issued & CEPH_CAP_LINK_EXCL) == 0) { + if ((newcaps & CEPH_CAP_AUTH_SHARED) && + (issued & CEPH_CAP_LINK_EXCL) == 0) { set_nlink(inode, le32_to_cpu(grant->nlink)); if (inode->i_nlink == 0 && (newcaps & (CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL))) @@ -2512,31 +2514,35 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, if ((issued & CEPH_CAP_FILE_CACHE) && ci->i_rdcache_gen > 1) queue_revalidate = 1; - /* size/ctime/mtime/atime? */ - queue_trunc = ceph_fill_file_size(inode, issued, - le32_to_cpu(grant->truncate_seq), - le64_to_cpu(grant->truncate_size), - size); - ceph_decode_timespec(&mtime, &grant->mtime); - ceph_decode_timespec(&atime, &grant->atime); - ceph_decode_timespec(&ctime, &grant->ctime); - ceph_fill_file_time(inode, issued, - le32_to_cpu(grant->time_warp_seq), &ctime, &mtime, - &atime); - - - /* file layout may have changed */ - ci->i_layout = grant->layout; - - /* max size increase? */ - if (ci->i_auth_cap == cap && max_size != ci->i_max_size) { - dout("max_size %lld -> %llu\n", ci->i_max_size, max_size); - ci->i_max_size = max_size; - if (max_size >= ci->i_wanted_max_size) { - ci->i_wanted_max_size = 0; /* reset */ - ci->i_requested_max_size = 0; + if (newcaps & CEPH_CAP_ANY_RD) { + /* ctime/mtime/atime? */ + ceph_decode_timespec(&mtime, &grant->mtime); + ceph_decode_timespec(&atime, &grant->atime); + ceph_decode_timespec(&ctime, &grant->ctime); + ceph_fill_file_time(inode, issued, + le32_to_cpu(grant->time_warp_seq), + &ctime, &mtime, &atime); + } + + if (newcaps & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR)) { + /* file layout may have changed */ + ci->i_layout = grant->layout; + /* size/truncate_seq? */ + queue_trunc = ceph_fill_file_size(inode, issued, + le32_to_cpu(grant->truncate_seq), + le64_to_cpu(grant->truncate_size), + size); + /* max size increase? */ + if (ci->i_auth_cap == cap && max_size != ci->i_max_size) { + dout("max_size %lld -> %llu\n", + ci->i_max_size, max_size); + ci->i_max_size = max_size; + if (max_size >= ci->i_wanted_max_size) { + ci->i_wanted_max_size = 0; /* reset */ + ci->i_requested_max_size = 0; + } + wake = 1; } - wake = 1; } /* check cap bits */ diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 233c6f96910a..f9e7399877d6 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -585,14 +585,15 @@ static int fill_inode(struct inode *inode, struct ceph_mds_reply_inode *info = iinfo->in; struct ceph_inode_info *ci = ceph_inode(inode); int i; - int issued = 0, implemented; + int issued = 0, implemented, new_issued; struct timespec mtime, atime, ctime; u32 nsplits; struct ceph_inode_frag *frag; struct rb_node *rb_node; struct ceph_buffer *xattr_blob = NULL; int err = 0; - int queue_trunc = 0; + bool queue_trunc = false; + bool new_version = false; dout("fill_inode %p ino %llx.%llx v %llu had %llu\n", inode, ceph_vinop(inode), le64_to_cpu(info->version), @@ -623,19 +624,23 @@ static int fill_inode(struct inode *inode, * 3 2 skip * 3 3 update */ - if (le64_to_cpu(info->version) > 0 && - (ci->i_version & ~1) >= le64_to_cpu(info->version)) - goto no_change; - + if (ci->i_version == 0 || + ((info->cap.flags & CEPH_CAP_FLAG_AUTH) && + le64_to_cpu(info->version) > (ci->i_version & ~1))) + new_version = true; + issued = __ceph_caps_issued(ci, &implemented); issued |= implemented | __ceph_caps_dirty(ci); + new_issued = ~issued & le32_to_cpu(info->cap.caps); /* update inode */ ci->i_version = le64_to_cpu(info->version); inode->i_version++; inode->i_rdev = le32_to_cpu(info->rdev); + inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; - if ((issued & CEPH_CAP_AUTH_EXCL) == 0) { + if ((new_version || (new_issued & CEPH_CAP_AUTH_SHARED)) && + (issued & CEPH_CAP_AUTH_EXCL) == 0) { inode->i_mode = le32_to_cpu(info->mode); inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(info->uid)); inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(info->gid)); @@ -644,23 +649,35 @@ static int fill_inode(struct inode *inode, from_kgid(&init_user_ns, inode->i_gid)); } - if ((issued & CEPH_CAP_LINK_EXCL) == 0) + if ((new_version || (new_issued & CEPH_CAP_LINK_SHARED)) && + (issued & CEPH_CAP_LINK_EXCL) == 0) set_nlink(inode, le32_to_cpu(info->nlink)); - /* be careful with mtime, atime, size */ - ceph_decode_timespec(&atime, &info->atime); - ceph_decode_timespec(&mtime, &info->mtime); - ceph_decode_timespec(&ctime, &info->ctime); - queue_trunc = ceph_fill_file_size(inode, issued, - le32_to_cpu(info->truncate_seq), - le64_to_cpu(info->truncate_size), - le64_to_cpu(info->size)); - ceph_fill_file_time(inode, issued, - le32_to_cpu(info->time_warp_seq), - &ctime, &mtime, &atime); - - ci->i_layout = info->layout; - inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; + if (new_version || (new_issued & CEPH_CAP_ANY_RD)) { + /* be careful with mtime, atime, size */ + ceph_decode_timespec(&atime, &info->atime); + ceph_decode_timespec(&mtime, &info->mtime); + ceph_decode_timespec(&ctime, &info->ctime); + ceph_fill_file_time(inode, issued, + le32_to_cpu(info->time_warp_seq), + &ctime, &mtime, &atime); + } + + if (new_version || + (new_issued & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR))) { + ci->i_layout = info->layout; + queue_trunc = ceph_fill_file_size(inode, issued, + le32_to_cpu(info->truncate_seq), + le64_to_cpu(info->truncate_size), + le64_to_cpu(info->size)); + /* only update max_size on auth cap */ + if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) && + ci->i_max_size != le64_to_cpu(info->max_size)) { + dout("max_size %lld -> %llu\n", ci->i_max_size, + le64_to_cpu(info->max_size)); + ci->i_max_size = le64_to_cpu(info->max_size); + } + } /* xattrs */ /* note that if i_xattrs.len <= 4, i_xattrs.data will still be NULL. */ @@ -745,15 +762,6 @@ static int fill_inode(struct inode *inode, dout(" marking %p complete (empty)\n", inode); __ceph_dir_set_complete(ci, atomic_read(&ci->i_release_count)); } -no_change: - /* only update max_size on auth cap */ - if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) && - ci->i_max_size != le64_to_cpu(info->max_size)) { - dout("max_size %lld -> %llu\n", ci->i_max_size, - le64_to_cpu(info->max_size)); - ci->i_max_size = le64_to_cpu(info->max_size); - } - spin_unlock(&ci->i_ceph_lock); /* queue truncate if we saw i_size decrease */ diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index 5f6db18d72e8..3c97d5e9b951 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -625,6 +625,8 @@ int ceph_flags_to_mode(int flags); CEPH_CAP_LINK_EXCL | \ CEPH_CAP_XATTR_EXCL | \ CEPH_CAP_FILE_EXCL) +#define CEPH_CAP_ANY_FILE_RD (CEPH_CAP_FILE_RD | CEPH_CAP_FILE_CACHE | \ + CEPH_CAP_FILE_SHARED) #define CEPH_CAP_ANY_FILE_WR (CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER | \ CEPH_CAP_FILE_EXCL) #define CEPH_CAP_ANY_WR (CEPH_CAP_ANY_EXCL | CEPH_CAP_ANY_FILE_WR) -- cgit v1.2.3 From d9df2783507943316b305e177e5b1c157200c76f Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Fri, 18 Apr 2014 09:57:11 +0800 Subject: ceph: pre-allocate ceph_cap struct for ceph_add_cap() So that ceph_add_cap() can be used while i_ceph_lock is locked. This simplifies the code that handle cap import/export. Signed-off-by: Yan, Zheng --- fs/ceph/caps.c | 81 +++++++++++++++++++++++++++------------------------------ fs/ceph/inode.c | 70 +++++++++++++++++++++++++++---------------------- fs/ceph/super.h | 13 ++++----- 3 files changed, 85 insertions(+), 79 deletions(-) (limited to 'fs/ceph/caps.c') diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 5f6d24ede794..73a42f504357 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -221,8 +221,8 @@ int ceph_unreserve_caps(struct ceph_mds_client *mdsc, return 0; } -static struct ceph_cap *get_cap(struct ceph_mds_client *mdsc, - struct ceph_cap_reservation *ctx) +struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc, + struct ceph_cap_reservation *ctx) { struct ceph_cap *cap = NULL; @@ -508,15 +508,14 @@ static void __check_cap_issue(struct ceph_inode_info *ci, struct ceph_cap *cap, * it is < 0. (This is so we can atomically add the cap and add an * open file reference to it.) */ -int ceph_add_cap(struct inode *inode, - struct ceph_mds_session *session, u64 cap_id, - int fmode, unsigned issued, unsigned wanted, - unsigned seq, unsigned mseq, u64 realmino, int flags, - struct ceph_cap_reservation *caps_reservation) +void ceph_add_cap(struct inode *inode, + struct ceph_mds_session *session, u64 cap_id, + int fmode, unsigned issued, unsigned wanted, + unsigned seq, unsigned mseq, u64 realmino, int flags, + struct ceph_cap **new_cap) { struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_cap *new_cap = NULL; struct ceph_cap *cap; int mds = session->s_mds; int actual_wanted; @@ -531,20 +530,10 @@ int ceph_add_cap(struct inode *inode, if (fmode >= 0) wanted |= ceph_caps_for_mode(fmode); -retry: - spin_lock(&ci->i_ceph_lock); cap = __get_cap_for_mds(ci, mds); if (!cap) { - if (new_cap) { - cap = new_cap; - new_cap = NULL; - } else { - spin_unlock(&ci->i_ceph_lock); - new_cap = get_cap(mdsc, caps_reservation); - if (new_cap == NULL) - return -ENOMEM; - goto retry; - } + cap = *new_cap; + *new_cap = NULL; cap->issued = 0; cap->implemented = 0; @@ -562,9 +551,6 @@ retry: session->s_nr_caps++; spin_unlock(&session->s_cap_lock); } else { - if (new_cap) - ceph_put_cap(mdsc, new_cap); - /* * auth mds of the inode changed. we received the cap export * message, but still haven't received the cap import message. @@ -626,7 +612,6 @@ retry: ci->i_auth_cap = cap; cap->mds_wanted = wanted; } - ci->i_cap_exporting_issued = 0; } else { WARN_ON(ci->i_auth_cap == cap); } @@ -648,9 +633,6 @@ retry: if (fmode >= 0) __ceph_get_fmode(ci, fmode); - spin_unlock(&ci->i_ceph_lock); - wake_up_all(&ci->i_cap_wq); - return 0; } /* @@ -685,7 +667,7 @@ static int __cap_is_valid(struct ceph_cap *cap) */ int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented) { - int have = ci->i_snap_caps | ci->i_cap_exporting_issued; + int have = ci->i_snap_caps; struct ceph_cap *cap; struct rb_node *p; @@ -900,7 +882,7 @@ int __ceph_caps_mds_wanted(struct ceph_inode_info *ci) */ static int __ceph_is_any_caps(struct ceph_inode_info *ci) { - return !RB_EMPTY_ROOT(&ci->i_caps) || ci->i_cap_exporting_issued; + return !RB_EMPTY_ROOT(&ci->i_caps); } int ceph_is_any_caps(struct inode *inode) @@ -2796,7 +2778,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, { struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; struct ceph_mds_session *tsession = NULL; - struct ceph_cap *cap, *tcap; + struct ceph_cap *cap, *tcap, *new_cap = NULL; struct ceph_inode_info *ci = ceph_inode(inode); u64 t_cap_id; unsigned mseq = le32_to_cpu(ex->migrate_seq); @@ -2858,15 +2840,14 @@ retry: } __ceph_remove_cap(cap, false); goto out_unlock; - } - - if (tsession) { - int flag = (cap == ci->i_auth_cap) ? CEPH_CAP_FLAG_AUTH : 0; - spin_unlock(&ci->i_ceph_lock); + } else if (tsession) { /* add placeholder for the export tagert */ + int flag = (cap == ci->i_auth_cap) ? CEPH_CAP_FLAG_AUTH : 0; ceph_add_cap(inode, tsession, t_cap_id, -1, issued, 0, - t_seq - 1, t_mseq, (u64)-1, flag, NULL); - goto retry; + t_seq - 1, t_mseq, (u64)-1, flag, &new_cap); + + __ceph_remove_cap(cap, false); + goto out_unlock; } spin_unlock(&ci->i_ceph_lock); @@ -2885,6 +2866,7 @@ retry: SINGLE_DEPTH_NESTING); } ceph_add_cap_releases(mdsc, tsession); + new_cap = ceph_get_cap(mdsc, NULL); } else { WARN_ON(1); tsession = NULL; @@ -2899,6 +2881,8 @@ out_unlock: mutex_unlock(&tsession->s_mutex); ceph_put_mds_session(tsession); } + if (new_cap) + ceph_put_cap(mdsc, new_cap); } /* @@ -2914,7 +2898,7 @@ static void handle_cap_import(struct ceph_mds_client *mdsc, void *snaptrace, int snaptrace_len) { struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_cap *cap; + struct ceph_cap *cap, *new_cap = NULL; int mds = session->s_mds; unsigned issued = le32_to_cpu(im->caps); unsigned wanted = le32_to_cpu(im->wanted); @@ -2936,7 +2920,20 @@ static void handle_cap_import(struct ceph_mds_client *mdsc, dout("handle_cap_import inode %p ci %p mds%d mseq %d peer %d\n", inode, ci, mds, mseq, peer); +retry: spin_lock(&ci->i_ceph_lock); + cap = __get_cap_for_mds(ci, mds); + if (!cap) { + if (!new_cap) { + spin_unlock(&ci->i_ceph_lock); + new_cap = ceph_get_cap(mdsc, NULL); + goto retry; + } + } + + ceph_add_cap(inode, session, cap_id, -1, issued, wanted, seq, mseq, + realmino, CEPH_CAP_FLAG_AUTH, &new_cap); + cap = peer >= 0 ? __get_cap_for_mds(ci, peer) : NULL; if (cap && cap->cap_id == p_cap_id) { dout(" remove export cap %p mds%d flags %d\n", @@ -2951,7 +2948,6 @@ static void handle_cap_import(struct ceph_mds_client *mdsc, cap->mseq, mds, le32_to_cpu(ph->seq), le32_to_cpu(ph->mseq)); } - ci->i_cap_exporting_issued = cap->issued; __ceph_remove_cap(cap, (ph->flags & CEPH_CAP_FLAG_RELEASE)); } @@ -2960,16 +2956,17 @@ static void handle_cap_import(struct ceph_mds_client *mdsc, ci->i_requested_max_size = 0; spin_unlock(&ci->i_ceph_lock); + wake_up_all(&ci->i_cap_wq); + down_write(&mdsc->snap_rwsem); ceph_update_snap_trace(mdsc, snaptrace, snaptrace+snaptrace_len, false); downgrade_write(&mdsc->snap_rwsem); - ceph_add_cap(inode, session, cap_id, -1, - issued, wanted, seq, mseq, realmino, CEPH_CAP_FLAG_AUTH, - NULL /* no caps context */); kick_flushing_inode_caps(mdsc, session, inode); up_read(&mdsc->snap_rwsem); + if (new_cap) + ceph_put_cap(mdsc, new_cap); } /* diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index f9e7399877d6..8ad50a30808e 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -341,7 +341,6 @@ struct inode *ceph_alloc_inode(struct super_block *sb) INIT_LIST_HEAD(&ci->i_cap_snaps); ci->i_head_snapc = NULL; ci->i_snap_caps = 0; - ci->i_cap_exporting_issued = 0; for (i = 0; i < CEPH_FILE_MODE_NUM; i++) ci->i_nr_by_mode[i] = 0; @@ -407,7 +406,7 @@ void ceph_destroy_inode(struct inode *inode) /* * we may still have a snap_realm reference if there are stray - * caps in i_cap_exporting_issued or i_snap_caps. + * caps in i_snap_caps. */ if (ci->i_snap_realm) { struct ceph_mds_client *mdsc = @@ -582,6 +581,7 @@ static int fill_inode(struct inode *inode, unsigned long ttl_from, int cap_fmode, struct ceph_cap_reservation *caps_reservation) { + struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; struct ceph_mds_reply_inode *info = iinfo->in; struct ceph_inode_info *ci = ceph_inode(inode); int i; @@ -591,7 +591,9 @@ static int fill_inode(struct inode *inode, struct ceph_inode_frag *frag; struct rb_node *rb_node; struct ceph_buffer *xattr_blob = NULL; + struct ceph_cap *new_cap = NULL; int err = 0; + bool wake = false; bool queue_trunc = false; bool new_version = false; @@ -599,6 +601,10 @@ static int fill_inode(struct inode *inode, inode, ceph_vinop(inode), le64_to_cpu(info->version), ci->i_version); + /* prealloc new cap struct */ + if (info->cap.caps && ceph_snap(inode) == CEPH_NOSNAP) + new_cap = ceph_get_cap(mdsc, caps_reservation); + /* * prealloc xattr data, if it looks like we'll need it. only * if len > 4 (meaning there are actually xattrs; the first 4 @@ -762,8 +768,37 @@ static int fill_inode(struct inode *inode, dout(" marking %p complete (empty)\n", inode); __ceph_dir_set_complete(ci, atomic_read(&ci->i_release_count)); } + + /* were we issued a capability? */ + if (info->cap.caps) { + if (ceph_snap(inode) == CEPH_NOSNAP) { + ceph_add_cap(inode, session, + le64_to_cpu(info->cap.cap_id), + cap_fmode, + le32_to_cpu(info->cap.caps), + le32_to_cpu(info->cap.wanted), + le32_to_cpu(info->cap.seq), + le32_to_cpu(info->cap.mseq), + le64_to_cpu(info->cap.realm), + info->cap.flags, &new_cap); + wake = true; + } else { + dout(" %p got snap_caps %s\n", inode, + ceph_cap_string(le32_to_cpu(info->cap.caps))); + ci->i_snap_caps |= le32_to_cpu(info->cap.caps); + if (cap_fmode >= 0) + __ceph_get_fmode(ci, cap_fmode); + } + } else if (cap_fmode >= 0) { + pr_warning("mds issued no caps on %llx.%llx\n", + ceph_vinop(inode)); + __ceph_get_fmode(ci, cap_fmode); + } spin_unlock(&ci->i_ceph_lock); + if (wake) + wake_up_all(&ci->i_cap_wq); + /* queue truncate if we saw i_size decrease */ if (queue_trunc) ceph_queue_vmtruncate(inode); @@ -806,41 +841,14 @@ static int fill_inode(struct inode *inode, } mutex_unlock(&ci->i_fragtree_mutex); - /* were we issued a capability? */ - if (info->cap.caps) { - if (ceph_snap(inode) == CEPH_NOSNAP) { - ceph_add_cap(inode, session, - le64_to_cpu(info->cap.cap_id), - cap_fmode, - le32_to_cpu(info->cap.caps), - le32_to_cpu(info->cap.wanted), - le32_to_cpu(info->cap.seq), - le32_to_cpu(info->cap.mseq), - le64_to_cpu(info->cap.realm), - info->cap.flags, - caps_reservation); - } else { - spin_lock(&ci->i_ceph_lock); - dout(" %p got snap_caps %s\n", inode, - ceph_cap_string(le32_to_cpu(info->cap.caps))); - ci->i_snap_caps |= le32_to_cpu(info->cap.caps); - if (cap_fmode >= 0) - __ceph_get_fmode(ci, cap_fmode); - spin_unlock(&ci->i_ceph_lock); - } - } else if (cap_fmode >= 0) { - pr_warning("mds issued no caps on %llx.%llx\n", - ceph_vinop(inode)); - __ceph_get_fmode(ci, cap_fmode); - } - /* update delegation info? */ if (dirinfo) ceph_fill_dirfrag(inode, dirinfo); err = 0; - out: + if (new_cap) + ceph_put_cap(mdsc, new_cap); if (xattr_blob) ceph_buffer_put(xattr_blob); return err; diff --git a/fs/ceph/super.h b/fs/ceph/super.h index ead05cc1f447..12b20744e386 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -292,7 +292,6 @@ struct ceph_inode_info { struct ceph_snap_context *i_head_snapc; /* set if wr_buffer_head > 0 or dirty|flushing caps */ unsigned i_snap_caps; /* cap bits for snapped files */ - unsigned i_cap_exporting_issued; int i_nr_by_mode[CEPH_FILE_MODE_NUM]; /* open file counts */ @@ -775,11 +774,13 @@ static inline void ceph_forget_all_cached_acls(struct inode *inode) extern const char *ceph_cap_string(int c); extern void ceph_handle_caps(struct ceph_mds_session *session, struct ceph_msg *msg); -extern int ceph_add_cap(struct inode *inode, - struct ceph_mds_session *session, u64 cap_id, - int fmode, unsigned issued, unsigned wanted, - unsigned cap, unsigned seq, u64 realmino, int flags, - struct ceph_cap_reservation *caps_reservation); +extern struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc, + struct ceph_cap_reservation *ctx); +extern void ceph_add_cap(struct inode *inode, + struct ceph_mds_session *session, u64 cap_id, + int fmode, unsigned issued, unsigned wanted, + unsigned cap, unsigned seq, u64 realmino, int flags, + struct ceph_cap **new_cap); extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release); extern void ceph_put_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap); -- cgit v1.2.3 From 2cd698be9a3d3a0f8f3c66814eac34144c31954c Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Fri, 18 Apr 2014 13:20:27 +0800 Subject: ceph: handle cap import atomically cap import messages are processed by both handle_cap_import() and handle_cap_grant(). These two functions are not executed in the same atomic context, so they can races with cap release. The fix is make handle_cap_import() not release the i_ceph_lock when it returns. Let handle_cap_grant() release the lock after it finishes its job. Signed-off-by: Yan, Zheng --- fs/ceph/caps.c | 99 ++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 54 insertions(+), 45 deletions(-) (limited to 'fs/ceph/caps.c') diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 73a42f504357..9f2c99c34e92 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2379,23 +2379,20 @@ static void invalidate_aliases(struct inode *inode) * actually be a revocation if it specifies a smaller cap set.) * * caller holds s_mutex and i_ceph_lock, we drop both. - * - * return value: - * 0 - ok - * 1 - check_caps on auth cap only (writeback) - * 2 - check_caps (ack revoke) */ -static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, +static void handle_cap_grant(struct ceph_mds_client *mdsc, + struct inode *inode, struct ceph_mds_caps *grant, + void *snaptrace, int snaptrace_len, + struct ceph_buffer *xattr_buf, struct ceph_mds_session *session, - struct ceph_cap *cap, - struct ceph_buffer *xattr_buf) - __releases(ci->i_ceph_lock) + struct ceph_cap *cap, int issued) + __releases(ci->i_ceph_lock) { struct ceph_inode_info *ci = ceph_inode(inode); int mds = session->s_mds; int seq = le32_to_cpu(grant->seq); int newcaps = le32_to_cpu(grant->caps); - int issued, implemented, used, wanted, dirty; + int used, wanted, dirty; u64 size = le64_to_cpu(grant->size); u64 max_size = le64_to_cpu(grant->max_size); struct timespec mtime, atime, ctime; @@ -2449,10 +2446,6 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, } /* side effects now are allowed */ - - issued = __ceph_caps_issued(ci, &implemented); - issued |= implemented | __ceph_caps_dirty(ci); - cap->cap_gen = session->s_cap_gen; cap->seq = seq; @@ -2585,6 +2578,17 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, spin_unlock(&ci->i_ceph_lock); + if (le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT) { + down_write(&mdsc->snap_rwsem); + ceph_update_snap_trace(mdsc, snaptrace, + snaptrace + snaptrace_len, false); + downgrade_write(&mdsc->snap_rwsem); + kick_flushing_inode_caps(mdsc, session, inode); + up_read(&mdsc->snap_rwsem); + if (newcaps & ~issued) + wake = 1; + } + if (queue_trunc) { ceph_queue_vmtruncate(inode); ceph_queue_revalidate(inode); @@ -2886,21 +2890,22 @@ out_unlock: } /* - * Handle cap IMPORT. If there are temp bits from an older EXPORT, - * clean them up. + * Handle cap IMPORT. * - * caller holds s_mutex. + * caller holds s_mutex. acquires i_ceph_lock */ static void handle_cap_import(struct ceph_mds_client *mdsc, struct inode *inode, struct ceph_mds_caps *im, struct ceph_mds_cap_peer *ph, struct ceph_mds_session *session, - void *snaptrace, int snaptrace_len) + struct ceph_cap **target_cap, int *old_issued) + __acquires(ci->i_ceph_lock) { struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_cap *cap, *new_cap = NULL; + struct ceph_cap *cap, *ocap, *new_cap = NULL; int mds = session->s_mds; - unsigned issued = le32_to_cpu(im->caps); + int issued; + unsigned caps = le32_to_cpu(im->caps); unsigned wanted = le32_to_cpu(im->wanted); unsigned seq = le32_to_cpu(im->seq); unsigned mseq = le32_to_cpu(im->migrate_seq); @@ -2929,44 +2934,43 @@ retry: new_cap = ceph_get_cap(mdsc, NULL); goto retry; } + cap = new_cap; + } else { + if (new_cap) { + ceph_put_cap(mdsc, new_cap); + new_cap = NULL; + } } - ceph_add_cap(inode, session, cap_id, -1, issued, wanted, seq, mseq, + __ceph_caps_issued(ci, &issued); + issued |= __ceph_caps_dirty(ci); + + ceph_add_cap(inode, session, cap_id, -1, caps, wanted, seq, mseq, realmino, CEPH_CAP_FLAG_AUTH, &new_cap); - cap = peer >= 0 ? __get_cap_for_mds(ci, peer) : NULL; - if (cap && cap->cap_id == p_cap_id) { + ocap = peer >= 0 ? __get_cap_for_mds(ci, peer) : NULL; + if (ocap && ocap->cap_id == p_cap_id) { dout(" remove export cap %p mds%d flags %d\n", - cap, peer, ph->flags); + ocap, peer, ph->flags); if ((ph->flags & CEPH_CAP_FLAG_AUTH) && - (cap->seq != le32_to_cpu(ph->seq) || - cap->mseq != le32_to_cpu(ph->mseq))) { + (ocap->seq != le32_to_cpu(ph->seq) || + ocap->mseq != le32_to_cpu(ph->mseq))) { pr_err("handle_cap_import: mismatched seq/mseq: " "ino (%llx.%llx) mds%d seq %d mseq %d " "importer mds%d has peer seq %d mseq %d\n", - ceph_vinop(inode), peer, cap->seq, - cap->mseq, mds, le32_to_cpu(ph->seq), + ceph_vinop(inode), peer, ocap->seq, + ocap->mseq, mds, le32_to_cpu(ph->seq), le32_to_cpu(ph->mseq)); } - __ceph_remove_cap(cap, (ph->flags & CEPH_CAP_FLAG_RELEASE)); + __ceph_remove_cap(ocap, (ph->flags & CEPH_CAP_FLAG_RELEASE)); } /* make sure we re-request max_size, if necessary */ ci->i_wanted_max_size = 0; ci->i_requested_max_size = 0; - spin_unlock(&ci->i_ceph_lock); - - wake_up_all(&ci->i_cap_wq); - down_write(&mdsc->snap_rwsem); - ceph_update_snap_trace(mdsc, snaptrace, snaptrace+snaptrace_len, - false); - downgrade_write(&mdsc->snap_rwsem); - kick_flushing_inode_caps(mdsc, session, inode); - up_read(&mdsc->snap_rwsem); - - if (new_cap) - ceph_put_cap(mdsc, new_cap); + *old_issued = issued; + *target_cap = cap; } /* @@ -2986,7 +2990,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, struct ceph_mds_caps *h; struct ceph_mds_cap_peer *peer = NULL; int mds = session->s_mds; - int op; + int op, issued; u32 seq, mseq; struct ceph_vino vino; u64 cap_id; @@ -3078,7 +3082,10 @@ void ceph_handle_caps(struct ceph_mds_session *session, case CEPH_CAP_OP_IMPORT: handle_cap_import(mdsc, inode, h, peer, session, - snaptrace, snaptrace_len); + &cap, &issued); + handle_cap_grant(mdsc, inode, h, snaptrace, snaptrace_len, + msg->middle, session, cap, issued); + goto done_unlocked; } /* the rest require a cap */ @@ -3095,8 +3102,10 @@ void ceph_handle_caps(struct ceph_mds_session *session, switch (op) { case CEPH_CAP_OP_REVOKE: case CEPH_CAP_OP_GRANT: - case CEPH_CAP_OP_IMPORT: - handle_cap_grant(inode, h, session, cap, msg->middle); + __ceph_caps_issued(ci, &issued); + issued |= __ceph_caps_dirty(ci); + handle_cap_grant(mdsc, inode, h, NULL, 0, msg->middle, + session, cap, issued); goto done_unlocked; case CEPH_CAP_OP_FLUSH_ACK: -- cgit v1.2.3 From ca665e0282ece4f8121ab4de474351f291fa8c2d Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Mon, 21 Apr 2014 15:46:37 +0800 Subject: mds: check cap ID when handling cap export message handle following sequence of events: - mds0 exports an inode to mds1. client receives the cap import message from mds1. caps from mds0 are removed while handling the cap import message. - mds1 exports an inode to mds0. client receives the cap export message from mds1. handle_cap_export() adds placeholder caps for mds0 - client receives the first cap export message (for exporting inode from mds0 to mds1) Signed-off-by: Yan, Zheng --- fs/ceph/caps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ceph/caps.c') diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 9f2c99c34e92..1fde164b74b5 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2805,7 +2805,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, retry: spin_lock(&ci->i_ceph_lock); cap = __get_cap_for_mds(ci, mds); - if (!cap) + if (!cap || cap->cap_id != le64_to_cpu(ex->cap_id)) goto out_unlock; if (target < 0) { -- cgit v1.2.3