diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-08-21 04:26:55 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-08-21 04:26:55 +0300 |
commit | 0a78ac4b9bb15b2a00dc5a5aba22b0e48834e1ad (patch) | |
tree | f010b008554ceb5f649c735684bdab2f84f894c2 /fs | |
parent | bfebeb16722d93caf7870b63aa7d094b6843479a (diff) | |
parent | 0fcf6c02b205f80f24eb548b236543ec151cb01c (diff) | |
download | linux-0a78ac4b9bb15b2a00dc5a5aba22b0e48834e1ad.tar.xz |
Merge tag 'ceph-for-4.19-rc1' of git://github.com/ceph/ceph-client
Pull ceph updates from Ilya Dryomov:
"The main things are support for cephx v2 authentication protocol and
basic support for rbd images within namespaces (myself).
Also included are y2038 conversion patches from Arnd, a pile of
miscellaneous fixes from Chengguang and Zheng's feature bit
infrastructure for the filesystem"
* tag 'ceph-for-4.19-rc1' of git://github.com/ceph/ceph-client: (40 commits)
ceph: don't drop message if it contains more data than expected
ceph: support cephfs' own feature bits
crush: fix using plain integer as NULL warning
libceph: remove unnecessary non NULL check for request_key
ceph: refactor error handling code in ceph_reserve_caps()
ceph: refactor ceph_unreserve_caps()
ceph: change to void return type for __do_request()
ceph: compare fsc->max_file_size and inode->i_size for max file size limit
ceph: add additional size check in ceph_setattr()
ceph: add additional offset check in ceph_write_iter()
ceph: add additional range check in ceph_fallocate()
ceph: add new field max_file_size in ceph_fs_client
libceph: weaken sizeof check in ceph_x_verify_authorizer_reply()
libceph: check authorizer reply/challenge length before reading
libceph: implement CEPHX_V2 calculation mode
libceph: add authorizer challenge
libceph: factor out encrypt_authorizer()
libceph: factor out __ceph_x_decrypt()
libceph: factor out __prepare_write_connect()
libceph: store ceph_auth_handshake pointer in ceph_connection
...
Diffstat (limited to 'fs')
-rw-r--r-- | fs/ceph/acl.c | 30 | ||||
-rw-r--r-- | fs/ceph/addr.c | 74 | ||||
-rw-r--r-- | fs/ceph/cache.c | 11 | ||||
-rw-r--r-- | fs/ceph/caps.c | 138 | ||||
-rw-r--r-- | fs/ceph/dir.c | 20 | ||||
-rw-r--r-- | fs/ceph/file.c | 34 | ||||
-rw-r--r-- | fs/ceph/inode.c | 83 | ||||
-rw-r--r-- | fs/ceph/mds_client.c | 98 | ||||
-rw-r--r-- | fs/ceph/mds_client.h | 14 | ||||
-rw-r--r-- | fs/ceph/quota.c | 2 | ||||
-rw-r--r-- | fs/ceph/snap.c | 6 | ||||
-rw-r--r-- | fs/ceph/super.c | 6 | ||||
-rw-r--r-- | fs/ceph/super.h | 12 | ||||
-rw-r--r-- | fs/ceph/xattr.c | 4 |
14 files changed, 302 insertions, 230 deletions
diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c index 59cb307b15fb..027408d55aee 100644 --- a/fs/ceph/acl.c +++ b/fs/ceph/acl.c @@ -45,6 +45,7 @@ static inline void ceph_set_cached_acl(struct inode *inode, struct posix_acl *ceph_get_acl(struct inode *inode, int type) { int size; + unsigned int retry_cnt = 0; const char *name; char *value = NULL; struct posix_acl *acl; @@ -60,6 +61,7 @@ struct posix_acl *ceph_get_acl(struct inode *inode, int type) BUG(); } +retry: size = __ceph_getxattr(inode, name, "", 0); if (size > 0) { value = kzalloc(size, GFP_NOFS); @@ -68,12 +70,22 @@ struct posix_acl *ceph_get_acl(struct inode *inode, int type) size = __ceph_getxattr(inode, name, value, size); } - if (size > 0) + if (size == -ERANGE && retry_cnt < 10) { + retry_cnt++; + kfree(value); + value = NULL; + goto retry; + } + + if (size > 0) { acl = posix_acl_from_xattr(&init_user_ns, value, size); - else if (size == -ERANGE || size == -ENODATA || size == 0) + } else if (size == -ENODATA || size == 0) { acl = NULL; - else + } else { + pr_err_ratelimited("get acl %llx.%llx failed, err=%d\n", + ceph_vinop(inode), size); acl = ERR_PTR(-EIO); + } kfree(value); @@ -89,6 +101,7 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type) const char *name = NULL; char *value = NULL; struct iattr newattrs; + struct timespec64 old_ctime = inode->i_ctime; umode_t new_mode = inode->i_mode, old_mode = inode->i_mode; switch (type) { @@ -133,7 +146,7 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type) if (new_mode != old_mode) { newattrs.ia_ctime = current_time(inode); newattrs.ia_mode = new_mode; - newattrs.ia_valid = ATTR_MODE; + newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; ret = __ceph_setattr(inode, &newattrs); if (ret) goto out_free; @@ -142,8 +155,9 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type) ret = __ceph_setxattr(inode, name, value, size, 0); if (ret) { if (new_mode != old_mode) { + newattrs.ia_ctime = old_ctime; newattrs.ia_mode = old_mode; - newattrs.ia_valid = ATTR_MODE; + newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; __ceph_setattr(inode, &newattrs); } goto out_free; @@ -171,10 +185,10 @@ int ceph_pre_init_acls(struct inode *dir, umode_t *mode, return err; if (acl) { - int ret = posix_acl_equiv_mode(acl, mode); - if (ret < 0) + err = posix_acl_equiv_mode(acl, mode); + if (err < 0) goto out_err; - if (ret == 0) { + if (err == 0) { posix_acl_release(acl); acl = NULL; } diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 292b3d72d725..9c332a6f6667 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -574,7 +574,6 @@ static u64 get_writepages_data_length(struct inode *inode, */ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) { - struct timespec ts; struct inode *inode; struct ceph_inode_info *ci; struct ceph_fs_client *fsc; @@ -625,12 +624,11 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) set_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC); set_page_writeback(page); - ts = timespec64_to_timespec(inode->i_mtime); err = ceph_osdc_writepages(&fsc->client->osdc, ceph_vino(inode), &ci->i_layout, snapc, page_off, len, ceph_wbc.truncate_seq, ceph_wbc.truncate_size, - &ts, &page, 1); + &inode->i_mtime, &page, 1); if (err < 0) { struct writeback_control tmp_wbc; if (!wbc) @@ -1134,7 +1132,7 @@ new_request: pages = NULL; } - req->r_mtime = timespec64_to_timespec(inode->i_mtime); + req->r_mtime = inode->i_mtime; rc = ceph_osdc_start_request(&fsc->client->osdc, req, true); BUG_ON(rc); req = NULL; @@ -1431,7 +1429,7 @@ static void ceph_restore_sigs(sigset_t *oldset) /* * vm ops */ -static int ceph_filemap_fault(struct vm_fault *vmf) +static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct inode *inode = file_inode(vma->vm_file); @@ -1439,8 +1437,9 @@ static int ceph_filemap_fault(struct vm_fault *vmf) struct ceph_file_info *fi = vma->vm_file->private_data; struct page *pinned_page = NULL; loff_t off = vmf->pgoff << PAGE_SHIFT; - int want, got, ret; + int want, got, err; sigset_t oldset; + vm_fault_t ret = VM_FAULT_SIGBUS; ceph_block_sigs(&oldset); @@ -1452,8 +1451,8 @@ static int ceph_filemap_fault(struct vm_fault *vmf) want = CEPH_CAP_FILE_CACHE; got = 0; - ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, -1, &got, &pinned_page); - if (ret < 0) + err = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, -1, &got, &pinned_page); + if (err < 0) goto out_restore; dout("filemap_fault %p %llu~%zd got cap refs on %s\n", @@ -1465,16 +1464,17 @@ static int ceph_filemap_fault(struct vm_fault *vmf) ceph_add_rw_context(fi, &rw_ctx); ret = filemap_fault(vmf); ceph_del_rw_context(fi, &rw_ctx); + dout("filemap_fault %p %llu~%zd drop cap refs %s ret %x\n", + inode, off, (size_t)PAGE_SIZE, + ceph_cap_string(got), ret); } else - ret = -EAGAIN; + err = -EAGAIN; - dout("filemap_fault %p %llu~%zd dropping cap refs on %s ret %d\n", - inode, off, (size_t)PAGE_SIZE, ceph_cap_string(got), ret); if (pinned_page) put_page(pinned_page); ceph_put_cap_refs(ci, got); - if (ret != -EAGAIN) + if (err != -EAGAIN) goto out_restore; /* read inline data */ @@ -1482,7 +1482,6 @@ static int ceph_filemap_fault(struct vm_fault *vmf) /* does not support inline data > PAGE_SIZE */ ret = VM_FAULT_SIGBUS; } else { - int ret1; struct address_space *mapping = inode->i_mapping; struct page *page = find_or_create_page(mapping, 0, mapping_gfp_constraint(mapping, @@ -1491,32 +1490,32 @@ static int ceph_filemap_fault(struct vm_fault *vmf) ret = VM_FAULT_OOM; goto out_inline; } - ret1 = __ceph_do_getattr(inode, page, + err = __ceph_do_getattr(inode, page, CEPH_STAT_CAP_INLINE_DATA, true); - if (ret1 < 0 || off >= i_size_read(inode)) { + if (err < 0 || off >= i_size_read(inode)) { unlock_page(page); put_page(page); - if (ret1 < 0) - ret = ret1; + if (err == -ENOMEM) + ret = VM_FAULT_OOM; else ret = VM_FAULT_SIGBUS; goto out_inline; } - if (ret1 < PAGE_SIZE) - zero_user_segment(page, ret1, PAGE_SIZE); + if (err < PAGE_SIZE) + zero_user_segment(page, err, PAGE_SIZE); else flush_dcache_page(page); SetPageUptodate(page); vmf->page = page; ret = VM_FAULT_MAJOR | VM_FAULT_LOCKED; out_inline: - dout("filemap_fault %p %llu~%zd read inline data ret %d\n", + dout("filemap_fault %p %llu~%zd read inline data ret %x\n", inode, off, (size_t)PAGE_SIZE, ret); } out_restore: ceph_restore_sigs(&oldset); - if (ret < 0) - ret = (ret == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS; + if (err < 0) + ret = vmf_error(err); return ret; } @@ -1524,7 +1523,7 @@ out_restore: /* * Reuse write_begin here for simplicity. */ -static int ceph_page_mkwrite(struct vm_fault *vmf) +static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct inode *inode = file_inode(vma->vm_file); @@ -1535,8 +1534,9 @@ static int ceph_page_mkwrite(struct vm_fault *vmf) loff_t off = page_offset(page); loff_t size = i_size_read(inode); size_t len; - int want, got, ret; + int want, got, err; sigset_t oldset; + vm_fault_t ret = VM_FAULT_SIGBUS; prealloc_cf = ceph_alloc_cap_flush(); if (!prealloc_cf) @@ -1550,10 +1550,10 @@ static int ceph_page_mkwrite(struct vm_fault *vmf) lock_page(page); locked_page = page; } - ret = ceph_uninline_data(vma->vm_file, locked_page); + err = ceph_uninline_data(vma->vm_file, locked_page); if (locked_page) unlock_page(locked_page); - if (ret < 0) + if (err < 0) goto out_free; } @@ -1570,9 +1570,9 @@ static int ceph_page_mkwrite(struct vm_fault *vmf) want = CEPH_CAP_FILE_BUFFER; got = 0; - ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, off + len, + err = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, off + len, &got, NULL); - if (ret < 0) + if (err < 0) goto out_free; dout("page_mkwrite %p %llu~%zd got cap refs on %s\n", @@ -1590,13 +1590,13 @@ static int ceph_page_mkwrite(struct vm_fault *vmf) break; } - ret = ceph_update_writeable_page(vma->vm_file, off, len, page); - if (ret >= 0) { + err = ceph_update_writeable_page(vma->vm_file, off, len, page); + if (err >= 0) { /* success. we'll keep the page locked. */ set_page_dirty(page); ret = VM_FAULT_LOCKED; } - } while (ret == -EAGAIN); + } while (err == -EAGAIN); if (ret == VM_FAULT_LOCKED || ci->i_inline_version != CEPH_INLINE_NONE) { @@ -1610,14 +1610,14 @@ static int ceph_page_mkwrite(struct vm_fault *vmf) __mark_inode_dirty(inode, dirty); } - dout("page_mkwrite %p %llu~%zd dropping cap refs on %s ret %d\n", + dout("page_mkwrite %p %llu~%zd dropping cap refs on %s ret %x\n", inode, off, len, ceph_cap_string(got), ret); ceph_put_cap_refs(ci, got); out_free: ceph_restore_sigs(&oldset); ceph_free_cap_flush(prealloc_cf); - if (ret < 0) - ret = (ret == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS; + if (err < 0) + ret = vmf_error(err); return ret; } @@ -1734,7 +1734,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page) goto out; } - req->r_mtime = timespec64_to_timespec(inode->i_mtime); + req->r_mtime = inode->i_mtime; err = ceph_osdc_start_request(&fsc->client->osdc, req, false); if (!err) err = ceph_osdc_wait_request(&fsc->client->osdc, req); @@ -1776,7 +1776,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page) goto out_put; } - req->r_mtime = timespec64_to_timespec(inode->i_mtime); + req->r_mtime = inode->i_mtime; err = ceph_osdc_start_request(&fsc->client->osdc, req, false); if (!err) err = ceph_osdc_wait_request(&fsc->client->osdc, req); @@ -1937,7 +1937,7 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci, 0, false, true); err = ceph_osdc_start_request(&fsc->client->osdc, rd_req, false); - wr_req->r_mtime = timespec64_to_timespec(ci->vfs_inode.i_mtime); + wr_req->r_mtime = ci->vfs_inode.i_mtime; err2 = ceph_osdc_start_request(&fsc->client->osdc, wr_req, false); if (!err) diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c index 362900e42424..1bf3502bdd6f 100644 --- a/fs/ceph/cache.c +++ b/fs/ceph/cache.c @@ -25,8 +25,9 @@ #include "cache.h" struct ceph_aux_inode { - u64 version; - struct timespec mtime; + u64 version; + u64 mtime_sec; + u64 mtime_nsec; }; struct fscache_netfs ceph_cache_netfs = { @@ -130,7 +131,8 @@ static enum fscache_checkaux ceph_fscache_inode_check_aux( memset(&aux, 0, sizeof(aux)); aux.version = ci->i_version; - aux.mtime = timespec64_to_timespec(inode->i_mtime); + aux.mtime_sec = inode->i_mtime.tv_sec; + aux.mtime_nsec = inode->i_mtime.tv_nsec; if (memcmp(data, &aux, sizeof(aux)) != 0) return FSCACHE_CHECKAUX_OBSOLETE; @@ -163,7 +165,8 @@ void ceph_fscache_register_inode_cookie(struct inode *inode) if (!ci->fscache) { memset(&aux, 0, sizeof(aux)); aux.version = ci->i_version; - aux.mtime = timespec64_to_timespec(inode->i_mtime); + aux.mtime_sec = inode->i_mtime.tv_sec; + aux.mtime_nsec = inode->i_mtime.tv_nsec; ci->fscache = fscache_acquire_cookie(fsc->fscache, &ceph_fscache_inode_object_def, &ci->i_vino, sizeof(ci->i_vino), diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 990258cbd836..dd7dfdd2ba13 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -156,6 +156,37 @@ void ceph_adjust_min_caps(struct ceph_mds_client *mdsc, int delta) spin_unlock(&mdsc->caps_list_lock); } +static void __ceph_unreserve_caps(struct ceph_mds_client *mdsc, int nr_caps) +{ + struct ceph_cap *cap; + int i; + + if (nr_caps) { + BUG_ON(mdsc->caps_reserve_count < nr_caps); + mdsc->caps_reserve_count -= nr_caps; + if (mdsc->caps_avail_count >= + mdsc->caps_reserve_count + mdsc->caps_min_count) { + mdsc->caps_total_count -= nr_caps; + for (i = 0; i < nr_caps; i++) { + cap = list_first_entry(&mdsc->caps_list, + struct ceph_cap, caps_item); + list_del(&cap->caps_item); + kmem_cache_free(ceph_cap_cachep, cap); + } + } else { + mdsc->caps_avail_count += nr_caps; + } + + dout("%s: caps %d = %d used + %d resv + %d avail\n", + __func__, + mdsc->caps_total_count, mdsc->caps_use_count, + mdsc->caps_reserve_count, mdsc->caps_avail_count); + BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count + + mdsc->caps_reserve_count + + mdsc->caps_avail_count); + } +} + /* * Called under mdsc->mutex. */ @@ -167,6 +198,7 @@ int ceph_reserve_caps(struct ceph_mds_client *mdsc, int have; int alloc = 0; int max_caps; + int err = 0; bool trimmed = false; struct ceph_mds_session *s; LIST_HEAD(newcaps); @@ -233,9 +265,14 @@ int ceph_reserve_caps(struct ceph_mds_client *mdsc, pr_warn("reserve caps ctx=%p ENOMEM need=%d got=%d\n", ctx, need, have + alloc); - goto out_nomem; + err = -ENOMEM; + break; + } + + if (!err) { + BUG_ON(have + alloc != need); + ctx->count = need; } - BUG_ON(have + alloc != need); spin_lock(&mdsc->caps_list_lock); mdsc->caps_total_count += alloc; @@ -245,77 +282,26 @@ int ceph_reserve_caps(struct ceph_mds_client *mdsc, BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count + mdsc->caps_reserve_count + mdsc->caps_avail_count); + + if (err) + __ceph_unreserve_caps(mdsc, have + alloc); + spin_unlock(&mdsc->caps_list_lock); - ctx->count = need; dout("reserve caps ctx=%p %d = %d used + %d resv + %d avail\n", ctx, mdsc->caps_total_count, mdsc->caps_use_count, mdsc->caps_reserve_count, mdsc->caps_avail_count); - return 0; - -out_nomem: - - spin_lock(&mdsc->caps_list_lock); - mdsc->caps_avail_count += have; - mdsc->caps_reserve_count -= have; - - while (!list_empty(&newcaps)) { - cap = list_first_entry(&newcaps, - struct ceph_cap, caps_item); - list_del(&cap->caps_item); - - /* Keep some preallocated caps around (ceph_min_count), to - * avoid lots of free/alloc churn. */ - if (mdsc->caps_avail_count >= - mdsc->caps_reserve_count + mdsc->caps_min_count) { - kmem_cache_free(ceph_cap_cachep, cap); - } else { - mdsc->caps_avail_count++; - mdsc->caps_total_count++; - list_add(&cap->caps_item, &mdsc->caps_list); - } - } - - BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count + - mdsc->caps_reserve_count + - mdsc->caps_avail_count); - spin_unlock(&mdsc->caps_list_lock); - return -ENOMEM; + return err; } -int ceph_unreserve_caps(struct ceph_mds_client *mdsc, +void ceph_unreserve_caps(struct ceph_mds_client *mdsc, struct ceph_cap_reservation *ctx) { - int i; - struct ceph_cap *cap; - dout("unreserve caps ctx=%p count=%d\n", ctx, ctx->count); - if (ctx->count) { - spin_lock(&mdsc->caps_list_lock); - BUG_ON(mdsc->caps_reserve_count < ctx->count); - mdsc->caps_reserve_count -= ctx->count; - if (mdsc->caps_avail_count >= - mdsc->caps_reserve_count + mdsc->caps_min_count) { - mdsc->caps_total_count -= ctx->count; - for (i = 0; i < ctx->count; i++) { - cap = list_first_entry(&mdsc->caps_list, - struct ceph_cap, caps_item); - list_del(&cap->caps_item); - kmem_cache_free(ceph_cap_cachep, cap); - } - } else { - mdsc->caps_avail_count += ctx->count; - } - ctx->count = 0; - dout("unreserve caps %d = %d used + %d resv + %d avail\n", - mdsc->caps_total_count, mdsc->caps_use_count, - mdsc->caps_reserve_count, mdsc->caps_avail_count); - BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count + - mdsc->caps_reserve_count + - mdsc->caps_avail_count); - spin_unlock(&mdsc->caps_list_lock); - } - return 0; + spin_lock(&mdsc->caps_list_lock); + __ceph_unreserve_caps(mdsc, ctx->count); + ctx->count = 0; + spin_unlock(&mdsc->caps_list_lock); } struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc, @@ -1125,7 +1111,7 @@ struct cap_msg_args { u64 flush_tid, oldest_flush_tid, size, max_size; u64 xattr_version; struct ceph_buffer *xattr_buf; - struct timespec atime, mtime, ctime; + struct timespec64 atime, mtime, ctime; int op, caps, wanted, dirty; u32 seq, issue_seq, mseq, time_warp_seq; u32 flags; @@ -1146,7 +1132,7 @@ static int send_cap_msg(struct cap_msg_args *arg) struct ceph_msg *msg; void *p; size_t extra_len; - struct timespec zerotime = {0}; + struct timespec64 zerotime = {0}; struct ceph_osd_client *osdc = &arg->session->s_mdsc->fsc->client->osdc; dout("send_cap_msg %s %llx %llx caps %s wanted %s dirty %s" @@ -1186,9 +1172,9 @@ static int send_cap_msg(struct cap_msg_args *arg) fc->size = cpu_to_le64(arg->size); fc->max_size = cpu_to_le64(arg->max_size); - ceph_encode_timespec(&fc->mtime, &arg->mtime); - ceph_encode_timespec(&fc->atime, &arg->atime); - ceph_encode_timespec(&fc->ctime, &arg->ctime); + ceph_encode_timespec64(&fc->mtime, &arg->mtime); + ceph_encode_timespec64(&fc->atime, &arg->atime); + ceph_encode_timespec64(&fc->ctime, &arg->ctime); fc->time_warp_seq = cpu_to_le32(arg->time_warp_seq); fc->uid = cpu_to_le32(from_kuid(&init_user_ns, arg->uid)); @@ -1237,7 +1223,7 @@ static int send_cap_msg(struct cap_msg_args *arg) * We just zero these out for now, as the MDS ignores them unless * the requisite feature flags are set (which we don't do yet). */ - ceph_encode_timespec(p, &zerotime); + ceph_encode_timespec64(p, &zerotime); p += sizeof(struct ceph_timespec); ceph_encode_64(&p, 0); @@ -1360,9 +1346,9 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, arg.xattr_buf = NULL; } - arg.mtime = timespec64_to_timespec(inode->i_mtime); - arg.atime = timespec64_to_timespec(inode->i_atime); - arg.ctime = timespec64_to_timespec(inode->i_ctime); + arg.mtime = inode->i_mtime; + arg.atime = inode->i_atime; + arg.ctime = inode->i_ctime; arg.op = op; arg.caps = cap->implemented; @@ -3148,11 +3134,11 @@ static void handle_cap_grant(struct inode *inode, } if (newcaps & CEPH_CAP_ANY_RD) { - struct timespec mtime, atime, ctime; + struct timespec64 mtime, atime, ctime; /* ctime/mtime/atime? */ - ceph_decode_timespec(&mtime, &grant->mtime); - ceph_decode_timespec(&atime, &grant->atime); - ceph_decode_timespec(&ctime, &grant->ctime); + ceph_decode_timespec64(&mtime, &grant->mtime); + ceph_decode_timespec64(&atime, &grant->atime); + ceph_decode_timespec64(&ctime, &grant->ctime); ceph_fill_file_time(inode, extra_info->issued, le32_to_cpu(grant->time_warp_seq), &ctime, &mtime, &atime); diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 036ac0f3a393..82928cea0209 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -827,12 +827,14 @@ static int ceph_mknod(struct inode *dir, struct dentry *dentry, if (ceph_snap(dir) != CEPH_NOSNAP) return -EROFS; - if (ceph_quota_is_max_files_exceeded(dir)) - return -EDQUOT; + if (ceph_quota_is_max_files_exceeded(dir)) { + err = -EDQUOT; + goto out; + } err = ceph_pre_init_acls(dir, &mode, &acls); if (err < 0) - return err; + goto out; dout("mknod in dir %p dentry %p mode 0%ho rdev %d\n", dir, dentry, mode, rdev); @@ -883,8 +885,10 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry, if (ceph_snap(dir) != CEPH_NOSNAP) return -EROFS; - if (ceph_quota_is_max_files_exceeded(dir)) - return -EDQUOT; + if (ceph_quota_is_max_files_exceeded(dir)) { + err = -EDQUOT; + goto out; + } dout("symlink in dir %p dentry %p to '%s'\n", dir, dentry, dest); req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SYMLINK, USE_AUTH_MDS); @@ -1393,7 +1397,7 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size, " rfiles: %20lld\n" " rsubdirs: %20lld\n" "rbytes: %20lld\n" - "rctime: %10ld.%09ld\n", + "rctime: %10lld.%09ld\n", ci->i_files + ci->i_subdirs, ci->i_files, ci->i_subdirs, @@ -1401,8 +1405,8 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size, ci->i_rfiles, ci->i_rsubdirs, ci->i_rbytes, - (long)ci->i_rctime.tv_sec, - (long)ci->i_rctime.tv_nsec); + ci->i_rctime.tv_sec, + ci->i_rctime.tv_nsec); } if (*ppos >= dfi->dir_info_len) diff --git a/fs/ceph/file.c b/fs/ceph/file.c index e2679e8a2535..92ab20433682 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -720,7 +720,7 @@ struct ceph_aio_request { struct list_head osd_reqs; unsigned num_reqs; atomic_t pending_reqs; - struct timespec mtime; + struct timespec64 mtime; struct ceph_cap_flush *prealloc_cf; }; @@ -922,7 +922,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, int num_pages = 0; int flags; int ret; - struct timespec mtime = timespec64_to_timespec(current_time(inode)); + struct timespec64 mtime = current_time(inode); size_t count = iov_iter_count(iter); loff_t pos = iocb->ki_pos; bool write = iov_iter_rw(iter) == WRITE; @@ -1130,7 +1130,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos, int flags; int ret; bool check_caps = false; - struct timespec mtime = timespec64_to_timespec(current_time(inode)); + struct timespec64 mtime = current_time(inode); size_t count = iov_iter_count(from); if (ceph_snap(file_inode(file)) != CEPH_NOSNAP) @@ -1383,12 +1383,12 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) struct ceph_file_info *fi = file->private_data; struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_osd_client *osdc = - &ceph_sb_to_client(inode->i_sb)->client->osdc; + struct ceph_fs_client *fsc = ceph_inode_to_client(inode); struct ceph_cap_flush *prealloc_cf; ssize_t count, written = 0; int err, want, got; loff_t pos; + loff_t limit = max(i_size_read(inode), fsc->max_file_size); if (ceph_snap(inode) != CEPH_NOSNAP) return -EROFS; @@ -1414,6 +1414,13 @@ retry_snap: goto out; pos = iocb->ki_pos; + if (unlikely(pos >= limit)) { + err = -EFBIG; + goto out; + } else { + iov_iter_truncate(from, limit - pos); + } + count = iov_iter_count(from); if (ceph_quota_is_max_bytes_exceeded(inode, pos + count)) { err = -EDQUOT; @@ -1435,7 +1442,7 @@ retry_snap: } /* FIXME: not complete since it doesn't account for being at quota */ - if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL)) { + if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_FULL)) { err = -ENOSPC; goto out; } @@ -1525,7 +1532,7 @@ retry_snap: } if (written >= 0) { - if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_NEARFULL)) + if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_NEARFULL)) iocb->ki_flags |= IOCB_DSYNC; written = generic_write_sync(iocb, written); } @@ -1546,6 +1553,7 @@ out_unlocked: static loff_t ceph_llseek(struct file *file, loff_t offset, int whence) { struct inode *inode = file->f_mapping->host; + struct ceph_fs_client *fsc = ceph_inode_to_client(inode); loff_t i_size; loff_t ret; @@ -1590,7 +1598,7 @@ static loff_t ceph_llseek(struct file *file, loff_t offset, int whence) break; } - ret = vfs_setpos(file, offset, inode->i_sb->s_maxbytes); + ret = vfs_setpos(file, offset, max(i_size, fsc->max_file_size)); out: inode_unlock(inode); @@ -1662,7 +1670,7 @@ static int ceph_zero_partial_object(struct inode *inode, goto out; } - req->r_mtime = timespec64_to_timespec(inode->i_mtime); + req->r_mtime = inode->i_mtime; ret = ceph_osdc_start_request(&fsc->client->osdc, req, false); if (!ret) { ret = ceph_osdc_wait_request(&fsc->client->osdc, req); @@ -1727,8 +1735,7 @@ static long ceph_fallocate(struct file *file, int mode, struct ceph_file_info *fi = file->private_data; struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_osd_client *osdc = - &ceph_inode_to_client(inode)->client->osdc; + struct ceph_fs_client *fsc = ceph_inode_to_client(inode); struct ceph_cap_flush *prealloc_cf; int want, got = 0; int dirty; @@ -1736,6 +1743,9 @@ static long ceph_fallocate(struct file *file, int mode, loff_t endoff = 0; loff_t size; + if ((offset + length) > max(i_size_read(inode), fsc->max_file_size)) + return -EFBIG; + if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) return -EOPNOTSUPP; @@ -1759,7 +1769,7 @@ static long ceph_fallocate(struct file *file, int mode, goto unlock; } - if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) && + if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_FULL) && !(mode & FALLOC_FL_PUNCH_HOLE)) { ret = -ENOSPC; goto unlock; diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index a866be999216..ebc7bdaed2d0 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -658,13 +658,10 @@ int ceph_fill_file_size(struct inode *inode, int issued, } void ceph_fill_file_time(struct inode *inode, int issued, - u64 time_warp_seq, struct timespec *ctime, - struct timespec *mtime, struct timespec *atime) + u64 time_warp_seq, struct timespec64 *ctime, + struct timespec64 *mtime, struct timespec64 *atime) { struct ceph_inode_info *ci = ceph_inode(inode); - struct timespec64 ctime64 = timespec_to_timespec64(*ctime); - struct timespec64 mtime64 = timespec_to_timespec64(*mtime); - struct timespec64 atime64 = timespec_to_timespec64(*atime); int warn = 0; if (issued & (CEPH_CAP_FILE_EXCL| @@ -673,39 +670,39 @@ void ceph_fill_file_time(struct inode *inode, int issued, CEPH_CAP_AUTH_EXCL| CEPH_CAP_XATTR_EXCL)) { if (ci->i_version == 0 || - timespec64_compare(&ctime64, &inode->i_ctime) > 0) { + timespec64_compare(ctime, &inode->i_ctime) > 0) { dout("ctime %lld.%09ld -> %lld.%09ld inc w/ cap\n", - (long long)inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec, - (long long)ctime->tv_sec, ctime->tv_nsec); - inode->i_ctime = ctime64; + inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec, + ctime->tv_sec, ctime->tv_nsec); + inode->i_ctime = *ctime; } if (ci->i_version == 0 || ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) > 0) { /* the MDS did a utimes() */ dout("mtime %lld.%09ld -> %lld.%09ld " "tw %d -> %d\n", - (long long)inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec, - (long long)mtime->tv_sec, mtime->tv_nsec, + inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec, + mtime->tv_sec, mtime->tv_nsec, ci->i_time_warp_seq, (int)time_warp_seq); - inode->i_mtime = mtime64; - inode->i_atime = atime64; + inode->i_mtime = *mtime; + inode->i_atime = *atime; ci->i_time_warp_seq = time_warp_seq; } else if (time_warp_seq == ci->i_time_warp_seq) { /* nobody did utimes(); take the max */ - if (timespec64_compare(&mtime64, &inode->i_mtime) > 0) { + if (timespec64_compare(mtime, &inode->i_mtime) > 0) { dout("mtime %lld.%09ld -> %lld.%09ld inc\n", - (long long)inode->i_mtime.tv_sec, + inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec, - (long long)mtime->tv_sec, mtime->tv_nsec); - inode->i_mtime = mtime64; + mtime->tv_sec, mtime->tv_nsec); + inode->i_mtime = *mtime; } - if (timespec64_compare(&atime64, &inode->i_atime) > 0) { + if (timespec64_compare(atime, &inode->i_atime) > 0) { dout("atime %lld.%09ld -> %lld.%09ld inc\n", - (long long)inode->i_atime.tv_sec, + inode->i_atime.tv_sec, inode->i_atime.tv_nsec, - (long long)atime->tv_sec, atime->tv_nsec); - inode->i_atime = atime64; + atime->tv_sec, atime->tv_nsec); + inode->i_atime = *atime; } } else if (issued & CEPH_CAP_FILE_EXCL) { /* we did a utimes(); ignore mds values */ @@ -715,9 +712,9 @@ void ceph_fill_file_time(struct inode *inode, int issued, } else { /* we have no write|excl caps; whatever the MDS says is true */ if (ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) >= 0) { - inode->i_ctime = ctime64; - inode->i_mtime = mtime64; - inode->i_atime = atime64; + inode->i_ctime = *ctime; + inode->i_mtime = *mtime; + inode->i_atime = *atime; ci->i_time_warp_seq = time_warp_seq; } else { warn = 1; @@ -743,7 +740,7 @@ static int fill_inode(struct inode *inode, struct page *locked_page, struct ceph_mds_reply_inode *info = iinfo->in; struct ceph_inode_info *ci = ceph_inode(inode); int issued, new_issued, info_caps; - struct timespec mtime, atime, ctime; + struct timespec64 mtime, atime, ctime; struct ceph_buffer *xattr_blob = NULL; struct ceph_string *pool_ns = NULL; struct ceph_cap *new_cap = NULL; @@ -823,9 +820,9 @@ static int fill_inode(struct inode *inode, struct page *locked_page, if (new_version || (new_issued & CEPH_CAP_ANY_RD)) { /* be careful with mtime, atime, size */ - ceph_decode_timespec(&atime, &info->atime); - ceph_decode_timespec(&mtime, &info->mtime); - ceph_decode_timespec(&ctime, &info->ctime); + ceph_decode_timespec64(&atime, &info->atime); + ceph_decode_timespec64(&mtime, &info->mtime); + ceph_decode_timespec64(&ctime, &info->ctime); ceph_fill_file_time(inode, issued, le32_to_cpu(info->time_warp_seq), &ctime, &mtime, &atime); @@ -872,7 +869,7 @@ static int fill_inode(struct inode *inode, struct page *locked_page, ci->i_rbytes = le64_to_cpu(info->rbytes); ci->i_rfiles = le64_to_cpu(info->rfiles); ci->i_rsubdirs = le64_to_cpu(info->rsubdirs); - ceph_decode_timespec(&ci->i_rctime, &info->rctime); + ceph_decode_timespec64(&ci->i_rctime, &info->rctime); } } @@ -1954,7 +1951,6 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr) int err = 0; int inode_dirty_flags = 0; bool lock_snap_rwsem = false; - struct timespec ts; prealloc_cf = ceph_alloc_cap_flush(); if (!prealloc_cf) @@ -2030,8 +2026,8 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr) if (ia_valid & ATTR_ATIME) { dout("setattr %p atime %lld.%ld -> %lld.%ld\n", inode, - (long long)inode->i_atime.tv_sec, inode->i_atime.tv_nsec, - (long long)attr->ia_atime.tv_sec, attr->ia_atime.tv_nsec); + inode->i_atime.tv_sec, inode->i_atime.tv_nsec, + attr->ia_atime.tv_sec, attr->ia_atime.tv_nsec); if (issued & CEPH_CAP_FILE_EXCL) { ci->i_time_warp_seq++; inode->i_atime = attr->ia_atime; @@ -2043,8 +2039,8 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr) dirtied |= CEPH_CAP_FILE_WR; } else if ((issued & CEPH_CAP_FILE_SHARED) == 0 || !timespec64_equal(&inode->i_atime, &attr->ia_atime)) { - ts = timespec64_to_timespec(attr->ia_atime); - ceph_encode_timespec(&req->r_args.setattr.atime, &ts); + ceph_encode_timespec64(&req->r_args.setattr.atime, + &attr->ia_atime); mask |= CEPH_SETATTR_ATIME; release |= CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR; @@ -2052,8 +2048,8 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr) } if (ia_valid & ATTR_MTIME) { dout("setattr %p mtime %lld.%ld -> %lld.%ld\n", inode, - (long long)inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec, - (long long)attr->ia_mtime.tv_sec, attr->ia_mtime.tv_nsec); + inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec, + attr->ia_mtime.tv_sec, attr->ia_mtime.tv_nsec); if (issued & CEPH_CAP_FILE_EXCL) { ci->i_time_warp_seq++; inode->i_mtime = attr->ia_mtime; @@ -2065,8 +2061,8 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr) dirtied |= CEPH_CAP_FILE_WR; } else if ((issued & CEPH_CAP_FILE_SHARED) == 0 || !timespec64_equal(&inode->i_mtime, &attr->ia_mtime)) { - ts = timespec64_to_timespec(attr->ia_mtime); - ceph_encode_timespec(&req->r_args.setattr.mtime, &ts); + ceph_encode_timespec64(&req->r_args.setattr.mtime, + &attr->ia_mtime); mask |= CEPH_SETATTR_MTIME; release |= CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR; @@ -2097,8 +2093,8 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr) bool only = (ia_valid & (ATTR_SIZE|ATTR_MTIME|ATTR_ATIME| ATTR_MODE|ATTR_UID|ATTR_GID)) == 0; dout("setattr %p ctime %lld.%ld -> %lld.%ld (%s)\n", inode, - (long long)inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec, - (long long)attr->ia_ctime.tv_sec, attr->ia_ctime.tv_nsec, + inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec, + attr->ia_ctime.tv_sec, attr->ia_ctime.tv_nsec, only ? "ctime only" : "ignored"); if (only) { /* @@ -2140,7 +2136,7 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr) req->r_inode_drop = release; req->r_args.setattr.mask = cpu_to_le32(mask); req->r_num_caps = 1; - req->r_stamp = timespec64_to_timespec(attr->ia_ctime); + req->r_stamp = attr->ia_ctime; err = ceph_mdsc_do_request(mdsc, NULL, req); } dout("setattr %p result=%d (%s locally, %d remote)\n", inode, err, @@ -2161,6 +2157,7 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr) int ceph_setattr(struct dentry *dentry, struct iattr *attr) { struct inode *inode = d_inode(dentry); + struct ceph_fs_client *fsc = ceph_inode_to_client(inode); int err; if (ceph_snap(inode) != CEPH_NOSNAP) @@ -2171,6 +2168,10 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) return err; if ((attr->ia_valid & ATTR_SIZE) && + attr->ia_size > max(inode->i_size, fsc->max_file_size)) + return -EFBIG; + + if ((attr->ia_valid & ATTR_SIZE) && ceph_quota_is_max_bytes_exceeded(inode, attr->ia_size)) return -EDQUOT; diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index dc8bc664a871..bc43c822426a 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -902,6 +902,27 @@ static struct ceph_msg *create_session_msg(u32 op, u64 seq) return msg; } +static void encode_supported_features(void **p, void *end) +{ + static const unsigned char bits[] = CEPHFS_FEATURES_CLIENT_SUPPORTED; + static const size_t count = ARRAY_SIZE(bits); + + if (count > 0) { + size_t i; + size_t size = ((size_t)bits[count - 1] + 64) / 64 * 8; + + BUG_ON(*p + 4 + size > end); + ceph_encode_32(p, size); + memset(*p, 0, size); + for (i = 0; i < count; i++) + ((unsigned char*)(*p))[i / 8] |= 1 << (bits[i] % 8); + *p += size; + } else { + BUG_ON(*p + 4 > end); + ceph_encode_32(p, 0); + } +} + /* * session message, specialization for CEPH_SESSION_REQUEST_OPEN * to include additional client metadata fields. @@ -911,11 +932,11 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6 struct ceph_msg *msg; struct ceph_mds_session_head *h; int i = -1; - int metadata_bytes = 0; + int extra_bytes = 0; int metadata_key_count = 0; struct ceph_options *opt = mdsc->fsc->client->options; struct ceph_mount_options *fsopt = mdsc->fsc->mount_options; - void *p; + void *p, *end; const char* metadata[][2] = { {"hostname", mdsc->nodename}, @@ -926,21 +947,26 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6 }; /* Calculate serialized length of metadata */ - metadata_bytes = 4; /* map length */ + extra_bytes = 4; /* map length */ for (i = 0; metadata[i][0]; ++i) { - metadata_bytes += 8 + strlen(metadata[i][0]) + + extra_bytes += 8 + strlen(metadata[i][0]) + strlen(metadata[i][1]); metadata_key_count++; } + /* supported feature */ + extra_bytes += 4 + 8; /* Allocate the message */ - msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h) + metadata_bytes, + msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h) + extra_bytes, GFP_NOFS, false); if (!msg) { pr_err("create_session_msg ENOMEM creating msg\n"); return NULL; } - h = msg->front.iov_base; + p = msg->front.iov_base; + end = p + msg->front.iov_len; + + h = p; h->op = cpu_to_le32(CEPH_SESSION_REQUEST_OPEN); h->seq = cpu_to_le64(seq); @@ -950,11 +976,11 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6 * * ClientSession messages with metadata are v2 */ - msg->hdr.version = cpu_to_le16(2); + msg->hdr.version = cpu_to_le16(3); msg->hdr.compat_version = cpu_to_le16(1); /* The write pointer, following the session_head structure */ - p = msg->front.iov_base + sizeof(*h); + p += sizeof(*h); /* Number of entries in the map */ ceph_encode_32(&p, metadata_key_count); @@ -972,6 +998,10 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6 p += val_len; } + encode_supported_features(&p, end); + msg->front.iov_len = p - msg->front.iov_base; + msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); + return msg; } @@ -1779,6 +1809,7 @@ struct ceph_mds_request * ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode) { struct ceph_mds_request *req = kzalloc(sizeof(*req), GFP_NOFS); + struct timespec64 ts; if (!req) return ERR_PTR(-ENOMEM); @@ -1797,7 +1828,8 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode) init_completion(&req->r_safe_completion); INIT_LIST_HEAD(&req->r_unsafe_item); - req->r_stamp = timespec_trunc(current_kernel_time(), mdsc->fsc->sb->s_time_gran); + ktime_get_coarse_real_ts64(&ts); + req->r_stamp = timespec64_trunc(ts, mdsc->fsc->sb->s_time_gran); req->r_op = op; req->r_direct_mode = mode; @@ -2094,7 +2126,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, /* time stamp */ { struct ceph_timespec ts; - ceph_encode_timespec(&ts, &req->r_stamp); + ceph_encode_timespec64(&ts, &req->r_stamp); ceph_encode_copy(&p, &ts, sizeof(ts)); } @@ -2187,7 +2219,7 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc, p = msg->front.iov_base + req->r_request_release_offset; { struct ceph_timespec ts; - ceph_encode_timespec(&ts, &req->r_stamp); + ceph_encode_timespec64(&ts, &req->r_stamp); ceph_encode_copy(&p, &ts, sizeof(ts)); } @@ -2225,7 +2257,7 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc, /* * send request, or put it on the appropriate wait list. */ -static int __do_request(struct ceph_mds_client *mdsc, +static void __do_request(struct ceph_mds_client *mdsc, struct ceph_mds_request *req) { struct ceph_mds_session *session = NULL; @@ -2235,7 +2267,7 @@ static int __do_request(struct ceph_mds_client *mdsc, if (req->r_err || test_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags)) { if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) __unregister_request(mdsc, req); - goto out; + return; } if (req->r_timeout && @@ -2258,7 +2290,7 @@ static int __do_request(struct ceph_mds_client *mdsc, if (mdsc->mdsmap->m_epoch == 0) { dout("do_request no mdsmap, waiting for map\n"); list_add(&req->r_wait, &mdsc->waiting_for_map); - goto finish; + return; } if (!(mdsc->fsc->mount_options->flags & CEPH_MOUNT_OPT_MOUNTWAIT) && @@ -2276,7 +2308,7 @@ static int __do_request(struct ceph_mds_client *mdsc, ceph_mdsmap_get_state(mdsc->mdsmap, mds) < CEPH_MDS_STATE_ACTIVE) { dout("do_request no mds or not active, waiting for map\n"); list_add(&req->r_wait, &mdsc->waiting_for_map); - goto out; + return; } /* get, open session */ @@ -2326,8 +2358,7 @@ finish: complete_request(mdsc, req); __unregister_request(mdsc, req); } -out: - return err; + return; } /* @@ -2748,7 +2779,7 @@ static void handle_session(struct ceph_mds_session *session, int wake = 0; /* decode */ - if (msg->front.iov_len != sizeof(*h)) + if (msg->front.iov_len < sizeof(*h)) goto bad; op = le32_to_cpu(h->op); seq = le64_to_cpu(h->seq); @@ -2958,15 +2989,12 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, rec.v2.flock_len = (__force __le32) ((ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) ? 0 : 1); } else { - struct timespec ts; rec.v1.cap_id = cpu_to_le64(cap->cap_id); rec.v1.wanted = cpu_to_le32(__ceph_caps_wanted(ci)); rec.v1.issued = cpu_to_le32(cap->issued); rec.v1.size = cpu_to_le64(inode->i_size); - ts = timespec64_to_timespec(inode->i_mtime); - ceph_encode_timespec(&rec.v1.mtime, &ts); - ts = timespec64_to_timespec(inode->i_atime); - ceph_encode_timespec(&rec.v1.atime, &ts); + ceph_encode_timespec64(&rec.v1.mtime, &inode->i_mtime); + ceph_encode_timespec64(&rec.v1.atime, &inode->i_atime); rec.v1.snaprealm = cpu_to_le64(ci->i_snap_realm->ino); rec.v1.pathbase = cpu_to_le64(pathbase); } @@ -3378,10 +3406,10 @@ static void handle_lease(struct ceph_mds_client *mdsc, vino.ino = le64_to_cpu(h->ino); vino.snap = CEPH_NOSNAP; seq = le32_to_cpu(h->seq); - dname.name = (void *)h + sizeof(*h) + sizeof(u32); - dname.len = msg->front.iov_len - sizeof(*h) - sizeof(u32); - if (dname.len != get_unaligned_le32(h+1)) + dname.len = get_unaligned_le32(h + 1); + if (msg->front.iov_len < sizeof(*h) + sizeof(u32) + dname.len) goto bad; + dname.name = (void *)(h + 1) + sizeof(u32); /* lookup inode */ inode = ceph_find_inode(sb, vino); @@ -3644,8 +3672,8 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) init_rwsem(&mdsc->pool_perm_rwsem); mdsc->pool_perm_tree = RB_ROOT; - strncpy(mdsc->nodename, utsname()->nodename, - sizeof(mdsc->nodename) - 1); + strscpy(mdsc->nodename, utsname()->nodename, + sizeof(mdsc->nodename)); return 0; } @@ -4019,7 +4047,8 @@ void ceph_mdsc_handle_mdsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg) } else { mdsc->mdsmap = newmap; /* first mds map */ } - mdsc->fsc->sb->s_maxbytes = mdsc->mdsmap->m_max_file_size; + mdsc->fsc->max_file_size = min((loff_t)mdsc->mdsmap->m_max_file_size, + MAX_LFS_FILESIZE); __wake_requests(mdsc, &mdsc->waiting_for_map); ceph_monc_got_map(&mdsc->fsc->client->monc, CEPH_SUB_MDSMAP, @@ -4155,6 +4184,16 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con, return auth; } +static int add_authorizer_challenge(struct ceph_connection *con, + void *challenge_buf, int challenge_buf_len) +{ + struct ceph_mds_session *s = con->private; + struct ceph_mds_client *mdsc = s->s_mdsc; + struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth; + + return ceph_auth_add_authorizer_challenge(ac, s->s_auth.authorizer, + challenge_buf, challenge_buf_len); +} static int verify_authorizer_reply(struct ceph_connection *con) { @@ -4218,6 +4257,7 @@ static const struct ceph_connection_operations mds_con_ops = { .put = con_put, .dispatch = dispatch, .get_authorizer = get_authorizer, + .add_authorizer_challenge = add_authorizer_challenge, .verify_authorizer_reply = verify_authorizer_reply, .invalidate_authorizer = invalidate_authorizer, .peer_reset = peer_reset, diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 2ec3b5b35067..32fcce0d4d3c 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -16,6 +16,18 @@ #include <linux/ceph/mdsmap.h> #include <linux/ceph/auth.h> +/* The first 8 bits are reserved for old ceph releases */ +#define CEPHFS_FEATURE_MIMIC 8 + +#define CEPHFS_FEATURES_ALL { \ + 0, 1, 2, 3, 4, 5, 6, 7, \ + CEPHFS_FEATURE_MIMIC, \ +} + +#define CEPHFS_FEATURES_CLIENT_SUPPORTED CEPHFS_FEATURES_ALL +#define CEPHFS_FEATURES_CLIENT_REQUIRED {} + + /* * Some lock dependencies: * @@ -229,7 +241,7 @@ struct ceph_mds_request { int r_fmode; /* file mode, if expecting cap */ kuid_t r_uid; kgid_t r_gid; - struct timespec r_stamp; + struct timespec64 r_stamp; /* for choosing which mds to send this request to */ int r_direct_mode; diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c index 242bfa5c0539..32d4f13784ba 100644 --- a/fs/ceph/quota.c +++ b/fs/ceph/quota.c @@ -48,7 +48,7 @@ void ceph_handle_quota(struct ceph_mds_client *mdsc, struct inode *inode; struct ceph_inode_info *ci; - if (msg->front.iov_len != sizeof(*h)) { + if (msg->front.iov_len < sizeof(*h)) { pr_err("%s corrupt message mds%d len %d\n", __func__, session->s_mds, (int)msg->front.iov_len); ceph_msg_dump(msg); diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index af81555c14fd..041c27ea8de1 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -594,9 +594,9 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci, BUG_ON(capsnap->writing); capsnap->size = inode->i_size; - capsnap->mtime = timespec64_to_timespec(inode->i_mtime); - capsnap->atime = timespec64_to_timespec(inode->i_atime); - capsnap->ctime = timespec64_to_timespec(inode->i_ctime); + capsnap->mtime = inode->i_mtime; + capsnap->atime = inode->i_atime; + capsnap->ctime = inode->i_ctime; capsnap->time_warp_seq = ci->i_time_warp_seq; capsnap->truncate_size = ci->i_truncate_size; capsnap->truncate_seq = ci->i_truncate_seq; diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 95a3b3ac9b6e..43ca3b763875 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -219,8 +219,7 @@ static int parse_fsopt_token(char *c, void *private) if (token < Opt_last_int) { ret = match_int(&argstr[0], &intval); if (ret < 0) { - pr_err("bad mount option arg (not int) " - "at '%s'\n", c); + pr_err("bad option arg (not int) at '%s'\n", c); return ret; } dout("got int token %d val %d\n", token, intval); @@ -941,11 +940,12 @@ static int ceph_set_super(struct super_block *s, void *data) dout("set_super %p data %p\n", s, data); s->s_flags = fsc->mount_options->sb_flags; - s->s_maxbytes = 1ULL << 40; /* temp value until we get mdsmap */ + s->s_maxbytes = MAX_LFS_FILESIZE; s->s_xattr = ceph_xattr_handlers; s->s_fs_info = fsc; fsc->sb = s; + fsc->max_file_size = 1ULL << 40; /* temp value until we get mdsmap */ s->s_op = &ceph_super_ops; s->s_d_op = &ceph_dentry_ops; diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 971328b99ede..582e28fd1b7b 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -98,6 +98,7 @@ struct ceph_fs_client { unsigned long mount_state; int min_caps; /* min caps i added */ + loff_t max_file_size; struct ceph_mds_client *mdsc; @@ -193,7 +194,7 @@ struct ceph_cap_snap { u64 xattr_version; u64 size; - struct timespec mtime, atime, ctime; + struct timespec64 mtime, atime, ctime; u64 time_warp_seq; u64 truncate_size; u32 truncate_seq; @@ -307,7 +308,7 @@ struct ceph_inode_info { char *i_symlink; /* for dirs */ - struct timespec i_rctime; + struct timespec64 i_rctime; u64 i_rbytes, i_rfiles, i_rsubdirs; u64 i_files, i_subdirs; @@ -655,7 +656,7 @@ extern void ceph_caps_finalize(struct ceph_mds_client *mdsc); extern void ceph_adjust_min_caps(struct ceph_mds_client *mdsc, int delta); extern int ceph_reserve_caps(struct ceph_mds_client *mdsc, struct ceph_cap_reservation *ctx, int need); -extern int ceph_unreserve_caps(struct ceph_mds_client *mdsc, +extern void ceph_unreserve_caps(struct ceph_mds_client *mdsc, struct ceph_cap_reservation *ctx); extern void ceph_reservation_status(struct ceph_fs_client *client, int *total, int *avail, int *used, @@ -857,8 +858,9 @@ extern struct inode *ceph_get_snapdir(struct inode *parent); extern int ceph_fill_file_size(struct inode *inode, int issued, u32 truncate_seq, u64 truncate_size, u64 size); extern void ceph_fill_file_time(struct inode *inode, int issued, - u64 time_warp_seq, struct timespec *ctime, - struct timespec *mtime, struct timespec *atime); + u64 time_warp_seq, struct timespec64 *ctime, + struct timespec64 *mtime, + struct timespec64 *atime); extern int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req); extern int ceph_readdir_prepopulate(struct ceph_mds_request *req, diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 5bc8edb4c2a6..5cc8b94f8206 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -224,8 +224,8 @@ static size_t ceph_vxattrcb_dir_rbytes(struct ceph_inode_info *ci, char *val, static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val, size_t size) { - return snprintf(val, size, "%ld.09%ld", (long)ci->i_rctime.tv_sec, - (long)ci->i_rctime.tv_nsec); + return snprintf(val, size, "%lld.09%ld", ci->i_rctime.tv_sec, + ci->i_rctime.tv_nsec); } /* quotas */ |