diff options
Diffstat (limited to 'fs/ceph/caps.c')
-rw-r--r-- | fs/ceph/caps.c | 132 |
1 files changed, 102 insertions, 30 deletions
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index cefca661464b..b93c631c6c87 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -975,10 +975,12 @@ static int send_cap_msg(struct ceph_mds_session *session, kuid_t uid, kgid_t gid, umode_t mode, u64 xattr_version, struct ceph_buffer *xattrs_buf, - u64 follows) + u64 follows, bool inline_data) { struct ceph_mds_caps *fc; struct ceph_msg *msg; + void *p; + size_t extra_len; dout("send_cap_msg %s %llx %llx caps %s wanted %s dirty %s" " seq %u/%u mseq %u follows %lld size %llu/%llu" @@ -988,7 +990,10 @@ static int send_cap_msg(struct ceph_mds_session *session, seq, issue_seq, mseq, follows, size, max_size, xattr_version, xattrs_buf ? (int)xattrs_buf->vec.iov_len : 0); - msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, sizeof(*fc), GFP_NOFS, false); + /* flock buffer size + inline version + inline data size */ + extra_len = 4 + 8 + 4; + msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, sizeof(*fc) + extra_len, + GFP_NOFS, false); if (!msg) return -ENOMEM; @@ -1020,6 +1025,14 @@ static int send_cap_msg(struct ceph_mds_session *session, fc->gid = cpu_to_le32(from_kgid(&init_user_ns, gid)); fc->mode = cpu_to_le32(mode); + p = fc + 1; + /* flock buffer size */ + ceph_encode_32(&p, 0); + /* inline version */ + ceph_encode_64(&p, inline_data ? 0 : CEPH_INLINE_NONE); + /* inline data size */ + ceph_encode_32(&p, 0); + fc->xattr_version = cpu_to_le64(xattr_version); if (xattrs_buf) { msg->middle = ceph_buffer_get(xattrs_buf); @@ -1126,6 +1139,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, u64 flush_tid = 0; int i; int ret; + bool inline_data; held = cap->issued | cap->implemented; revoking = cap->implemented & ~cap->issued; @@ -1209,13 +1223,15 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, xattr_version = ci->i_xattrs.version; } + inline_data = ci->i_inline_version != CEPH_INLINE_NONE; + spin_unlock(&ci->i_ceph_lock); ret = send_cap_msg(session, ceph_vino(inode).ino, cap_id, op, keep, want, flushing, seq, flush_tid, issue_seq, mseq, size, max_size, &mtime, &atime, time_warp_seq, uid, gid, mode, xattr_version, xattr_blob, - follows); + follows, inline_data); if (ret < 0) { dout("error sending cap msg, must requeue %p\n", inode); delayed = 1; @@ -1336,7 +1352,7 @@ retry: capsnap->time_warp_seq, capsnap->uid, capsnap->gid, capsnap->mode, capsnap->xattr_version, capsnap->xattr_blob, - capsnap->follows); + capsnap->follows, capsnap->inline_data); next_follows = capsnap->follows + 1; ceph_put_cap_snap(capsnap); @@ -2057,15 +2073,17 @@ static void __take_cap_refs(struct ceph_inode_info *ci, int got) * requested from the MDS. */ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, - int *got, loff_t endoff, int *check_max, int *err) + loff_t endoff, int *got, struct page **pinned_page, + int *check_max, int *err) { struct inode *inode = &ci->vfs_inode; int ret = 0; - int have, implemented; + int have, implemented, _got = 0; int file_wanted; dout("get_cap_refs %p need %s want %s\n", inode, ceph_cap_string(need), ceph_cap_string(want)); +again: spin_lock(&ci->i_ceph_lock); /* make sure file is actually open */ @@ -2075,7 +2093,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, ceph_cap_string(need), ceph_cap_string(file_wanted)); *err = -EBADF; ret = 1; - goto out; + goto out_unlock; } /* finish pending truncate */ @@ -2095,7 +2113,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, *check_max = 1; ret = 1; } - goto out; + goto out_unlock; } /* * If a sync write is in progress, we must wait, so that we @@ -2103,7 +2121,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, */ if (__ceph_have_pending_cap_snap(ci)) { dout("get_cap_refs %p cap_snap_pending\n", inode); - goto out; + goto out_unlock; } } @@ -2120,18 +2138,50 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, inode, ceph_cap_string(have), ceph_cap_string(not), ceph_cap_string(revoking)); if ((revoking & not) == 0) { - *got = need | (have & want); - __take_cap_refs(ci, *got); + _got = need | (have & want); + __take_cap_refs(ci, _got); ret = 1; } } else { dout("get_cap_refs %p have %s needed %s\n", inode, ceph_cap_string(have), ceph_cap_string(need)); } -out: +out_unlock: spin_unlock(&ci->i_ceph_lock); + + if (ci->i_inline_version != CEPH_INLINE_NONE && + (_got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) && + i_size_read(inode) > 0) { + int ret1; + struct page *page = find_get_page(inode->i_mapping, 0); + if (page) { + if (PageUptodate(page)) { + *pinned_page = page; + goto out; + } + page_cache_release(page); + } + /* + * drop cap refs first because getattr while holding + * caps refs can cause deadlock. + */ + ceph_put_cap_refs(ci, _got); + _got = 0; + + /* getattr request will bring inline data into page cache */ + ret1 = __ceph_do_getattr(inode, NULL, + CEPH_STAT_CAP_INLINE_DATA, true); + if (ret1 >= 0) { + ret = 0; + goto again; + } + *err = ret1; + ret = 1; + } +out: dout("get_cap_refs %p ret %d got %s\n", inode, - ret, ceph_cap_string(*got)); + ret, ceph_cap_string(_got)); + *got = _got; return ret; } @@ -2168,8 +2218,8 @@ static void check_max_size(struct inode *inode, loff_t endoff) * due to a small max_size, make sure we check_max_size (and possibly * ask the mds) so we don't get hung up indefinitely. */ -int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, int *got, - loff_t endoff) +int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, + loff_t endoff, int *got, struct page **pinned_page) { int check_max, ret, err; @@ -2179,8 +2229,8 @@ retry: check_max = 0; err = 0; ret = wait_event_interruptible(ci->i_cap_wq, - try_get_cap_refs(ci, need, want, - got, endoff, + try_get_cap_refs(ci, need, want, endoff, + got, pinned_page, &check_max, &err)); if (err) ret = err; @@ -2383,6 +2433,8 @@ static void invalidate_aliases(struct inode *inode) static void handle_cap_grant(struct ceph_mds_client *mdsc, struct inode *inode, struct ceph_mds_caps *grant, void *snaptrace, int snaptrace_len, + u64 inline_version, + void *inline_data, int inline_len, struct ceph_buffer *xattr_buf, struct ceph_mds_session *session, struct ceph_cap *cap, int issued) @@ -2403,6 +2455,7 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc, bool queue_invalidate = false; bool queue_revalidate = false; bool deleted_inode = false; + bool fill_inline = false; dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n", inode, cap, mds, seq, ceph_cap_string(newcaps)); @@ -2576,6 +2629,13 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc, } BUG_ON(cap->issued & ~cap->implemented); + if (inline_version > 0 && inline_version >= ci->i_inline_version) { + ci->i_inline_version = inline_version; + if (ci->i_inline_version != CEPH_INLINE_NONE && + (newcaps & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO))) + fill_inline = true; + } + spin_unlock(&ci->i_ceph_lock); if (le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT) { @@ -2589,6 +2649,9 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc, wake = true; } + if (fill_inline) + ceph_fill_inline_data(inode, NULL, inline_data, inline_len); + if (queue_trunc) { ceph_queue_vmtruncate(inode); ceph_queue_revalidate(inode); @@ -2996,11 +3059,12 @@ void ceph_handle_caps(struct ceph_mds_session *session, u64 cap_id; u64 size, max_size; u64 tid; + u64 inline_version = 0; + void *inline_data = NULL; + u32 inline_len = 0; void *snaptrace; size_t snaptrace_len; - void *flock; - void *end; - u32 flock_len; + void *p, *end; dout("handle_caps from mds%d\n", mds); @@ -3021,30 +3085,37 @@ void ceph_handle_caps(struct ceph_mds_session *session, snaptrace = h + 1; snaptrace_len = le32_to_cpu(h->snap_trace_len); + p = snaptrace + snaptrace_len; if (le16_to_cpu(msg->hdr.version) >= 2) { - void *p = snaptrace + snaptrace_len; + u32 flock_len; ceph_decode_32_safe(&p, end, flock_len, bad); if (p + flock_len > end) goto bad; - flock = p; - } else { - flock = NULL; - flock_len = 0; + p += flock_len; } if (le16_to_cpu(msg->hdr.version) >= 3) { if (op == CEPH_CAP_OP_IMPORT) { - void *p = flock + flock_len; if (p + sizeof(*peer) > end) goto bad; peer = p; + p += sizeof(*peer); } else if (op == CEPH_CAP_OP_EXPORT) { /* recorded in unused fields */ peer = (void *)&h->size; } } + if (le16_to_cpu(msg->hdr.version) >= 4) { + ceph_decode_64_safe(&p, end, inline_version, bad); + ceph_decode_32_safe(&p, end, inline_len, bad); + if (p + inline_len > end) + goto bad; + inline_data = p; + p += inline_len; + } + /* lookup ino */ inode = ceph_find_inode(sb, vino); ci = ceph_inode(inode); @@ -3085,6 +3156,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, handle_cap_import(mdsc, inode, h, peer, session, &cap, &issued); handle_cap_grant(mdsc, inode, h, snaptrace, snaptrace_len, + inline_version, inline_data, inline_len, msg->middle, session, cap, issued); goto done_unlocked; } @@ -3105,8 +3177,9 @@ void ceph_handle_caps(struct ceph_mds_session *session, case CEPH_CAP_OP_GRANT: __ceph_caps_issued(ci, &issued); issued |= __ceph_caps_dirty(ci); - handle_cap_grant(mdsc, inode, h, NULL, 0, msg->middle, - session, cap, issued); + handle_cap_grant(mdsc, inode, h, NULL, 0, + inline_version, inline_data, inline_len, + msg->middle, session, cap, issued); goto done_unlocked; case CEPH_CAP_OP_FLUSH_ACK: @@ -3137,8 +3210,7 @@ flush_cap_releases: done: mutex_unlock(&session->s_mutex); done_unlocked: - if (inode) - iput(inode); + iput(inode); return; bad: |