diff options
Diffstat (limited to 'fs/ceph/file.c')
-rw-r--r-- | fs/ceph/file.c | 388 |
1 files changed, 251 insertions, 137 deletions
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 813974244a9d..a7254cab44cc 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -12,6 +12,7 @@ #include <linux/falloc.h> #include <linux/iversion.h> #include <linux/ktime.h> +#include <linux/splice.h> #include "super.h" #include "mds_client.h" @@ -19,8 +20,9 @@ #include "io.h" #include "metric.h" -static __le32 ceph_flags_sys2wire(u32 flags) +static __le32 ceph_flags_sys2wire(struct ceph_mds_client *mdsc, u32 flags) { + struct ceph_client *cl = mdsc->fsc->client; u32 wire_flags = 0; switch (flags & O_ACCMODE) { @@ -48,7 +50,7 @@ static __le32 ceph_flags_sys2wire(u32 flags) #undef ceph_sys2wire if (flags) - dout("unused open flags: %x\n", flags); + doutc(cl, "unused open flags: %x\n", flags); return cpu_to_le32(wire_flags); } @@ -189,7 +191,7 @@ prepare_open_request(struct super_block *sb, int flags, int create_mode) if (IS_ERR(req)) goto out; req->r_fmode = ceph_flags_to_mode(flags); - req->r_args.open.flags = ceph_flags_sys2wire(flags); + req->r_args.open.flags = ceph_flags_sys2wire(mdsc, flags); req->r_args.open.mode = cpu_to_le32(create_mode); out: return req; @@ -201,11 +203,12 @@ static int ceph_init_file_info(struct inode *inode, struct file *file, struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mount_options *opt = ceph_inode_to_fs_client(&ci->netfs.inode)->mount_options; + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_file_info *fi; int ret; - dout("%s %p %p 0%o (%s)\n", __func__, inode, file, - inode->i_mode, isdir ? "dir" : "regular"); + doutc(cl, "%p %llx.%llx %p 0%o (%s)\n", inode, ceph_vinop(inode), + file, inode->i_mode, isdir ? "dir" : "regular"); BUG_ON(inode->i_fop->release != ceph_release); if (isdir) { @@ -259,6 +262,7 @@ error: */ static int ceph_init_file(struct inode *inode, struct file *file, int fmode) { + struct ceph_client *cl = ceph_inode_to_client(inode); int ret = 0; switch (inode->i_mode & S_IFMT) { @@ -271,13 +275,13 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode) break; case S_IFLNK: - dout("init_file %p %p 0%o (symlink)\n", inode, file, - inode->i_mode); + doutc(cl, "%p %llx.%llx %p 0%o (symlink)\n", inode, + ceph_vinop(inode), file, inode->i_mode); break; default: - dout("init_file %p %p 0%o (special)\n", inode, file, - inode->i_mode); + doutc(cl, "%p %llx.%llx %p 0%o (special)\n", inode, + ceph_vinop(inode), file, inode->i_mode); /* * we need to drop the open ref now, since we don't * have .release set to ceph_release. @@ -296,6 +300,7 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode) int ceph_renew_caps(struct inode *inode, int fmode) { struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); + struct ceph_client *cl = mdsc->fsc->client; struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mds_request *req; int err, flags, wanted; @@ -307,8 +312,9 @@ int ceph_renew_caps(struct inode *inode, int fmode) (!(wanted & CEPH_CAP_ANY_WR) || ci->i_auth_cap)) { int issued = __ceph_caps_issued(ci, NULL); spin_unlock(&ci->i_ceph_lock); - dout("renew caps %p want %s issued %s updating mds_wanted\n", - inode, ceph_cap_string(wanted), ceph_cap_string(issued)); + doutc(cl, "%p %llx.%llx want %s issued %s updating mds_wanted\n", + inode, ceph_vinop(inode), ceph_cap_string(wanted), + ceph_cap_string(issued)); ceph_check_caps(ci, 0); return 0; } @@ -339,7 +345,8 @@ int ceph_renew_caps(struct inode *inode, int fmode) err = ceph_mdsc_do_request(mdsc, NULL, req); ceph_mdsc_put_request(req); out: - dout("renew caps %p open result=%d\n", inode, err); + doutc(cl, "%p %llx.%llx open result=%d\n", inode, ceph_vinop(inode), + err); return err < 0 ? err : 0; } @@ -353,14 +360,21 @@ int ceph_open(struct inode *inode, struct file *file) { struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_fs_client *fsc = ceph_sb_to_fs_client(inode->i_sb); + struct ceph_client *cl = fsc->client; struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_mds_request *req; struct ceph_file_info *fi = file->private_data; int err; int flags, fmode, wanted; + struct dentry *dentry; + char *path; + int pathlen; + u64 pathbase; + bool do_sync = false; + int mask = MAY_READ; if (fi) { - dout("open file %p is already opened\n", file); + doutc(cl, "file %p is already opened\n", file); return 0; } @@ -374,11 +388,36 @@ int ceph_open(struct inode *inode, struct file *file) return err; } - dout("open inode %p ino %llx.%llx file %p flags %d (%d)\n", inode, - ceph_vinop(inode), file, flags, file->f_flags); + doutc(cl, "%p %llx.%llx file %p flags %d (%d)\n", inode, + ceph_vinop(inode), file, flags, file->f_flags); fmode = ceph_flags_to_mode(flags); wanted = ceph_caps_for_mode(fmode); + if (fmode & CEPH_FILE_MODE_WR) + mask |= MAY_WRITE; + dentry = d_find_alias(inode); + if (!dentry) { + do_sync = true; + } else { + path = ceph_mdsc_build_path(mdsc, dentry, &pathlen, &pathbase, 0); + if (IS_ERR(path)) { + do_sync = true; + err = 0; + } else { + err = ceph_mds_check_access(mdsc, path, mask); + } + ceph_mdsc_free_path(path, pathlen); + dput(dentry); + + /* For none EACCES cases will let the MDS do the mds auth check */ + if (err == -EACCES) { + return err; + } else if (err < 0) { + do_sync = true; + err = 0; + } + } + /* snapped files are read-only */ if (ceph_snap(inode) != CEPH_NOSNAP && (file->f_mode & FMODE_WRITE)) return -EROFS; @@ -394,14 +433,14 @@ int ceph_open(struct inode *inode, struct file *file) * asynchronously. */ spin_lock(&ci->i_ceph_lock); - if (__ceph_is_any_real_caps(ci) && + if (!do_sync && __ceph_is_any_real_caps(ci) && (((fmode & CEPH_FILE_MODE_WR) == 0) || ci->i_auth_cap)) { int mds_wanted = __ceph_caps_mds_wanted(ci, true); int issued = __ceph_caps_issued(ci, NULL); - dout("open %p fmode %d want %s issued %s using existing\n", - inode, fmode, ceph_cap_string(wanted), - ceph_cap_string(issued)); + doutc(cl, "open %p fmode %d want %s issued %s using existing\n", + inode, fmode, ceph_cap_string(wanted), + ceph_cap_string(issued)); __ceph_touch_fmode(ci, mdsc, fmode); spin_unlock(&ci->i_ceph_lock); @@ -412,7 +451,7 @@ int ceph_open(struct inode *inode, struct file *file) ceph_check_caps(ci, 0); return ceph_init_file(inode, file, fmode); - } else if (ceph_snap(inode) != CEPH_NOSNAP && + } else if (!do_sync && ceph_snap(inode) != CEPH_NOSNAP && (ci->i_snap_caps & wanted) == wanted) { __ceph_touch_fmode(ci, mdsc, fmode); spin_unlock(&ci->i_ceph_lock); @@ -421,7 +460,7 @@ int ceph_open(struct inode *inode, struct file *file) spin_unlock(&ci->i_ceph_lock); - dout("open fmode %d wants %s\n", fmode, ceph_cap_string(wanted)); + doutc(cl, "open fmode %d wants %s\n", fmode, ceph_cap_string(wanted)); req = prepare_open_request(inode->i_sb, flags, 0); if (IS_ERR(req)) { err = PTR_ERR(req); @@ -435,7 +474,7 @@ int ceph_open(struct inode *inode, struct file *file) if (!err) err = ceph_init_file(inode, file, req->r_fmode); ceph_mdsc_put_request(req); - dout("open result=%d on %llx.%llx\n", err, ceph_vinop(inode)); + doutc(cl, "open result=%d on %llx.%llx\n", err, ceph_vinop(inode)); out: return err; } @@ -515,6 +554,7 @@ no_async: static void restore_deleg_ino(struct inode *dir, u64 ino) { + struct ceph_client *cl = ceph_inode_to_client(dir); struct ceph_inode_info *ci = ceph_inode(dir); struct ceph_mds_session *s = NULL; @@ -525,7 +565,8 @@ static void restore_deleg_ino(struct inode *dir, u64 ino) if (s) { int err = ceph_restore_deleg_ino(s, ino); if (err) - pr_warn("ceph: unable to restore delegated ino 0x%llx to session: %d\n", + pr_warn_client(cl, + "unable to restore delegated ino 0x%llx to session: %d\n", ino, err); ceph_put_mds_session(s); } @@ -557,6 +598,7 @@ static void wake_async_create_waiters(struct inode *inode, static void ceph_async_create_cb(struct ceph_mds_client *mdsc, struct ceph_mds_request *req) { + struct ceph_client *cl = mdsc->fsc->client; struct dentry *dentry = req->r_dentry; struct inode *dinode = d_inode(dentry); struct inode *tinode = req->r_target_inode; @@ -577,7 +619,8 @@ static void ceph_async_create_cb(struct ceph_mds_client *mdsc, char *path = ceph_mdsc_build_path(mdsc, req->r_dentry, &pathlen, &base, 0); - pr_warn("async create failure path=(%llx)%s result=%d!\n", + pr_warn_client(cl, + "async create failure path=(%llx)%s result=%d!\n", base, IS_ERR(path) ? "<<bad>>" : path, result); ceph_mdsc_free_path(path, pathlen); @@ -596,14 +639,15 @@ static void ceph_async_create_cb(struct ceph_mds_client *mdsc, u64 ino = ceph_vino(tinode).ino; if (req->r_deleg_ino != ino) - pr_warn("%s: inode number mismatch! err=%d deleg_ino=0x%llx target=0x%llx\n", - __func__, req->r_err, req->r_deleg_ino, ino); + pr_warn_client(cl, + "inode number mismatch! err=%d deleg_ino=0x%llx target=0x%llx\n", + req->r_err, req->r_deleg_ino, ino); mapping_set_error(tinode->i_mapping, result); wake_async_create_waiters(tinode, req->r_session); } else if (!result) { - pr_warn("%s: no req->r_target_inode for 0x%llx\n", __func__, - req->r_deleg_ino); + pr_warn_client(cl, "no req->r_target_inode for 0x%llx\n", + req->r_deleg_ino); } out: ceph_mdsc_release_dir_caps(req); @@ -625,6 +669,7 @@ static int ceph_finish_async_create(struct inode *dir, struct inode *inode, struct timespec64 now; struct ceph_string *pool_ns; struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb); + struct ceph_client *cl = mdsc->fsc->client; struct ceph_vino vino = { .ino = req->r_deleg_ino, .snap = CEPH_NOSNAP }; @@ -655,7 +700,9 @@ static int ceph_finish_async_create(struct inode *dir, struct inode *inode, in.truncate_seq = cpu_to_le32(1); in.truncate_size = cpu_to_le64(-1ULL); in.xattr_version = cpu_to_le64(1); - in.uid = cpu_to_le32(from_kuid(&init_user_ns, current_fsuid())); + in.uid = cpu_to_le32(from_kuid(&init_user_ns, + mapped_fsuid(req->r_mnt_idmap, + &init_user_ns))); if (dir->i_mode & S_ISGID) { in.gid = cpu_to_le32(from_kgid(&init_user_ns, dir->i_gid)); @@ -663,7 +710,9 @@ static int ceph_finish_async_create(struct inode *dir, struct inode *inode, if (S_ISDIR(mode)) mode |= S_ISGID; } else { - in.gid = cpu_to_le32(from_kgid(&init_user_ns, current_fsgid())); + in.gid = cpu_to_le32(from_kgid(&init_user_ns, + mapped_fsgid(req->r_mnt_idmap, + &init_user_ns))); } in.mode = cpu_to_le32((u32)mode); @@ -683,7 +732,7 @@ static int ceph_finish_async_create(struct inode *dir, struct inode *inode, req->r_fmode, NULL); up_read(&mdsc->snap_rwsem); if (ret) { - dout("%s failed to fill inode: %d\n", __func__, ret); + doutc(cl, "failed to fill inode: %d\n", ret); ceph_dir_clear_complete(dir); if (!d_unhashed(dentry)) d_drop(dentry); @@ -691,8 +740,8 @@ static int ceph_finish_async_create(struct inode *dir, struct inode *inode, } else { struct dentry *dn; - dout("%s d_adding new inode 0x%llx to 0x%llx/%s\n", __func__, - vino.ino, ceph_ino(dir), dentry->d_name.name); + doutc(cl, "d_adding new inode 0x%llx to 0x%llx/%s\n", + vino.ino, ceph_ino(dir), dentry->d_name.name); ceph_dir_clear_ordered(dir); ceph_init_inode_acls(inode, as_ctx); if (inode->i_state & I_NEW) { @@ -730,7 +779,9 @@ static int ceph_finish_async_create(struct inode *dir, struct inode *inode, int ceph_atomic_open(struct inode *dir, struct dentry *dentry, struct file *file, unsigned flags, umode_t mode) { + struct mnt_idmap *idmap = file_mnt_idmap(file); struct ceph_fs_client *fsc = ceph_sb_to_fs_client(dir->i_sb); + struct ceph_client *cl = fsc->client; struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_mds_request *req; struct inode *new_inode = NULL; @@ -739,10 +790,13 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, bool try_async = ceph_test_mount_opt(fsc, ASYNC_DIROPS); int mask; int err; + char *path; + int pathlen; + u64 pathbase; - dout("atomic_open %p dentry %p '%pd' %s flags %d mode 0%o\n", - dir, dentry, dentry, - d_unhashed(dentry) ? "unhashed" : "hashed", flags, mode); + doutc(cl, "%p %llx.%llx dentry %p '%pd' %s flags %d mode 0%o\n", + dir, ceph_vinop(dir), dentry, dentry, + d_unhashed(dentry) ? "unhashed" : "hashed", flags, mode); if (dentry->d_name.len > NAME_MAX) return -ENAMETOOLONG; @@ -756,6 +810,34 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, */ flags &= ~O_TRUNC; + dn = d_find_alias(dir); + if (!dn) { + try_async = false; + } else { + path = ceph_mdsc_build_path(mdsc, dn, &pathlen, &pathbase, 0); + if (IS_ERR(path)) { + try_async = false; + err = 0; + } else { + int fmode = ceph_flags_to_mode(flags); + + mask = MAY_READ; + if (fmode & CEPH_FILE_MODE_WR) + mask |= MAY_WRITE; + err = ceph_mds_check_access(mdsc, path, mask); + } + ceph_mdsc_free_path(path, pathlen); + dput(dn); + + /* For none EACCES cases will let the MDS do the mds auth check */ + if (err == -EACCES) { + return err; + } else if (err < 0) { + try_async = false; + err = 0; + } + } + retry: if (flags & O_CREAT) { if (ceph_quota_is_max_files_exceeded(dir)) @@ -788,6 +870,8 @@ retry: mask |= CEPH_CAP_XATTR_SHARED; req->r_args.open.mask = cpu_to_le32(mask); req->r_parent = dir; + if (req->r_op == CEPH_MDS_OP_CREATE) + req->r_mnt_idmap = mnt_idmap_get(idmap); ihold(dir); if (IS_ENCRYPTED(dir)) { set_bit(CEPH_MDS_R_FSCRYPT_FILE, &req->r_req_flags); @@ -880,17 +964,18 @@ retry: goto out_req; if (dn || d_really_is_negative(dentry) || d_is_symlink(dentry)) { /* make vfs retry on splice, ENOENT, or symlink */ - dout("atomic_open finish_no_open on dn %p\n", dn); + doutc(cl, "finish_no_open on dn %p\n", dn); err = finish_no_open(file, dn); } else { if (IS_ENCRYPTED(dir) && !fscrypt_has_permitted_context(dir, d_inode(dentry))) { - pr_warn("Inconsistent encryption context (parent %llx:%llx child %llx:%llx)\n", + pr_warn_client(cl, + "Inconsistent encryption context (parent %llx:%llx child %llx:%llx)\n", ceph_vinop(dir), ceph_vinop(d_inode(dentry))); goto out_req; } - dout("atomic_open finish_open on dn %p\n", dn); + doutc(cl, "finish_open on dn %p\n", dn); if (req->r_op == CEPH_MDS_OP_CREATE && req->r_reply_info.has_create_ino) { struct inode *newino = d_inode(dentry); @@ -905,17 +990,19 @@ out_req: iput(new_inode); out_ctx: ceph_release_acl_sec_ctx(&as_ctx); - dout("atomic_open result=%d\n", err); + doutc(cl, "result=%d\n", err); return err; } int ceph_release(struct inode *inode, struct file *file) { + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); if (S_ISDIR(inode->i_mode)) { struct ceph_dir_file_info *dfi = file->private_data; - dout("release inode %p dir file %p\n", inode, file); + doutc(cl, "%p %llx.%llx dir file %p\n", inode, + ceph_vinop(inode), file); WARN_ON(!list_empty(&dfi->file_info.rw_contexts)); ceph_put_fmode(ci, dfi->file_info.fmode, 1); @@ -927,7 +1014,8 @@ int ceph_release(struct inode *inode, struct file *file) kmem_cache_free(ceph_dir_file_cachep, dfi); } else { struct ceph_file_info *fi = file->private_data; - dout("release inode %p regular file %p\n", inode, file); + doutc(cl, "%p %llx.%llx regular file %p\n", inode, + ceph_vinop(inode), file); WARN_ON(!list_empty(&fi->rw_contexts)); ceph_fscache_unuse_cookie(inode, file->f_mode & FMODE_WRITE); @@ -963,6 +1051,7 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos, { struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); + struct ceph_client *cl = fsc->client; struct ceph_osd_client *osdc = &fsc->client->osdc; ssize_t ret; u64 off = *ki_pos; @@ -971,7 +1060,8 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos, bool sparse = IS_ENCRYPTED(inode) || ceph_test_mount_opt(fsc, SPARSEREAD); u64 objver = 0; - dout("sync_read on inode %p %llx~%llx\n", inode, *ki_pos, len); + doutc(cl, "on inode %p %llx.%llx %llx~%llx\n", inode, + ceph_vinop(inode), *ki_pos, len); if (ceph_inode_is_shutdown(inode)) return -EIO; @@ -1001,12 +1091,13 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos, struct ceph_osd_req_op *op; u64 read_off = off; u64 read_len = len; + int extent_cnt; /* determine new offset/length if encrypted */ ceph_fscrypt_adjust_off_and_len(inode, &read_off, &read_len); - dout("sync_read orig %llu~%llu reading %llu~%llu", - off, len, read_off, read_len); + doutc(cl, "orig %llu~%llu reading %llu~%llu", off, len, + read_off, read_len); req = ceph_osdc_new_request(osdc, &ci->i_layout, ci->i_vino, read_off, &read_len, 0, 1, @@ -1025,6 +1116,16 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos, len = read_off + read_len - off; more = len < iov_iter_count(to); + op = &req->r_ops[0]; + if (sparse) { + extent_cnt = __ceph_sparse_read_ext_count(inode, read_len); + ret = ceph_alloc_sparse_ext_map(op, extent_cnt); + if (ret) { + ceph_osdc_put_request(req); + break; + } + } + num_pages = calc_pages_for(read_off, read_len); page_off = offset_in_page(off); pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL); @@ -1038,15 +1139,6 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos, offset_in_page(read_off), false, true); - op = &req->r_ops[0]; - if (sparse) { - ret = ceph_alloc_sparse_ext_map(op); - if (ret) { - ceph_osdc_put_request(req); - break; - } - } - ceph_osdc_start_request(osdc, req); ret = ceph_osdc_wait_request(osdc, req); @@ -1059,8 +1151,8 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos, objver = req->r_version; i_size = i_size_read(inode); - dout("sync_read %llu~%llu got %zd i_size %llu%s\n", - off, len, ret, i_size, (more ? " MORE" : "")); + doutc(cl, "%llu~%llu got %zd i_size %llu%s\n", off, len, + ret, i_size, (more ? " MORE" : "")); /* Fix it to go to end of extent map */ if (sparse && ret >= 0) @@ -1106,8 +1198,8 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos, int zlen = min(len - ret, i_size - off - ret); int zoff = page_off + ret; - dout("sync_read zero gap %llu~%llu\n", - off + ret, off + ret + zlen); + doutc(cl, "zero gap %llu~%llu\n", off + ret, + off + ret + zlen); ceph_zero_page_vector_range(zoff, zlen, pages); ret += zlen; } @@ -1152,7 +1244,7 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos, if (last_objver) *last_objver = objver; } - dout("sync_read result %zd retry_op %d\n", ret, *retry_op); + doutc(cl, "result %zd retry_op %d\n", ret, *retry_op); return ret; } @@ -1161,9 +1253,11 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to, { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); + struct ceph_client *cl = ceph_inode_to_client(inode); - dout("sync_read on file %p %llx~%zx %s\n", file, iocb->ki_pos, - iov_iter_count(to), (file->f_flags & O_DIRECT) ? "O_DIRECT" : ""); + doutc(cl, "on file %p %llx~%zx %s\n", file, iocb->ki_pos, + iov_iter_count(to), + (file->f_flags & O_DIRECT) ? "O_DIRECT" : ""); return __ceph_sync_read(inode, &iocb->ki_pos, to, retry_op, NULL); } @@ -1191,6 +1285,7 @@ static void ceph_aio_retry_work(struct work_struct *work); static void ceph_aio_complete(struct inode *inode, struct ceph_aio_request *aio_req) { + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); int ret; @@ -1204,7 +1299,7 @@ static void ceph_aio_complete(struct inode *inode, if (!ret) ret = aio_req->total_len; - dout("ceph_aio_complete %p rc %d\n", inode, ret); + doutc(cl, "%p %llx.%llx rc %d\n", inode, ceph_vinop(inode), ret); if (ret >= 0 && aio_req->write) { int dirty; @@ -1243,11 +1338,13 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req) struct ceph_client_metric *metric = &ceph_sb_to_mdsc(inode->i_sb)->metric; unsigned int len = osd_data->bvec_pos.iter.bi_size; bool sparse = (op->op == CEPH_OSD_OP_SPARSE_READ); + struct ceph_client *cl = ceph_inode_to_client(inode); BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_BVECS); BUG_ON(!osd_data->num_bvecs); - dout("ceph_aio_complete_req %p rc %d bytes %u\n", inode, rc, len); + doutc(cl, "req %p inode %p %llx.%llx, rc %d bytes %u\n", req, + inode, ceph_vinop(inode), rc, len); if (rc == -EOLDSNAPC) { struct ceph_aio_work *aio_work; @@ -1388,6 +1485,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); + struct ceph_client *cl = fsc->client; struct ceph_client_metric *metric = &fsc->mdsc->metric; struct ceph_vino vino; struct ceph_osd_request *req; @@ -1406,9 +1504,9 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, if (write && ceph_snap(file_inode(file)) != CEPH_NOSNAP) return -EROFS; - dout("sync_direct_%s on file %p %lld~%u snapc %p seq %lld\n", - (write ? "write" : "read"), file, pos, (unsigned)count, - snapc, snapc ? snapc->seq : 0); + doutc(cl, "sync_direct_%s on file %p %lld~%u snapc %p seq %lld\n", + (write ? "write" : "read"), file, pos, (unsigned)count, + snapc, snapc ? snapc->seq : 0); if (write) { int ret2; @@ -1419,7 +1517,8 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, pos >> PAGE_SHIFT, (pos + count - 1) >> PAGE_SHIFT); if (ret2 < 0) - dout("invalidate_inode_pages2_range returned %d\n", ret2); + doutc(cl, "invalidate_inode_pages2_range returned %d\n", + ret2); flags = /* CEPH_OSD_FLAG_ORDERSNAP | */ CEPH_OSD_FLAG_WRITE; } else { @@ -1431,6 +1530,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, ssize_t len; struct ceph_osd_req_op *op; int readop = sparse ? CEPH_OSD_OP_SPARSE_READ : CEPH_OSD_OP_READ; + int extent_cnt; if (write) size = min_t(u64, size, fsc->mount_options->wsize); @@ -1451,6 +1551,16 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, break; } + op = &req->r_ops[0]; + if (!write && sparse) { + extent_cnt = __ceph_sparse_read_ext_count(inode, size); + ret = ceph_alloc_sparse_ext_map(op, extent_cnt); + if (ret) { + ceph_osdc_put_request(req); + break; + } + } + len = iter_get_bvecs_alloc(iter, size, &bvecs, &num_pages); if (len < 0) { ceph_osdc_put_request(req); @@ -1460,6 +1570,8 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, if (len != size) osd_req_op_extent_update(req, 0, len); + osd_req_op_extent_osd_data_bvecs(req, 0, bvecs, num_pages, len); + /* * To simplify error handling, allow AIO when IO within i_size * or IO can be satisfied by single OSD request. @@ -1491,16 +1603,6 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, req->r_mtime = mtime; } - osd_req_op_extent_osd_data_bvecs(req, 0, bvecs, num_pages, len); - op = &req->r_ops[0]; - if (sparse) { - ret = ceph_alloc_sparse_ext_map(op); - if (ret) { - ceph_osdc_put_request(req); - break; - } - } - if (aio_req) { aio_req->total_len += len; aio_req->num_reqs++; @@ -1612,6 +1714,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos, struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); + struct ceph_client *cl = fsc->client; struct ceph_osd_client *osdc = &fsc->client->osdc; struct ceph_osd_request *req; struct page **pages; @@ -1626,8 +1729,8 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos, if (ceph_snap(file_inode(file)) != CEPH_NOSNAP) return -EROFS; - dout("sync_write on file %p %lld~%u snapc %p seq %lld\n", - file, pos, (unsigned)count, snapc, snapc->seq); + doutc(cl, "on file %p %lld~%u snapc %p seq %lld\n", file, pos, + (unsigned)count, snapc, snapc->seq); ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + count - 1); @@ -1671,9 +1774,9 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos, last = (pos + len) != (write_pos + write_len); rmw = first || last; - dout("sync_write ino %llx %lld~%llu adjusted %lld~%llu -- %srmw\n", - ci->i_vino.ino, pos, len, write_pos, write_len, - rmw ? "" : "no "); + doutc(cl, "ino %llx %lld~%llu adjusted %lld~%llu -- %srmw\n", + ci->i_vino.ino, pos, len, write_pos, write_len, + rmw ? "" : "no "); /* * The data is emplaced into the page as it would be if it were @@ -1882,7 +1985,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos, left -= ret; } if (ret < 0) { - dout("sync_write write failed with %d\n", ret); + doutc(cl, "write failed with %d\n", ret); ceph_release_page_vector(pages, num_pages); break; } @@ -1892,7 +1995,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos, write_pos, write_len, GFP_KERNEL); if (ret < 0) { - dout("encryption failed with %d\n", ret); + doutc(cl, "encryption failed with %d\n", ret); ceph_release_page_vector(pages, num_pages); break; } @@ -1911,7 +2014,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos, break; } - dout("sync_write write op %lld~%llu\n", write_pos, write_len); + doutc(cl, "write op %lld~%llu\n", write_pos, write_len); osd_req_op_extent_osd_data_pages(req, rmw ? 1 : 0, pages, write_len, offset_in_page(write_pos), false, true); @@ -1942,7 +2045,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos, req->r_end_latency, len, ret); ceph_osdc_put_request(req); if (ret != 0) { - dout("sync_write osd write returned %d\n", ret); + doutc(cl, "osd write returned %d\n", ret); /* Version changed! Must re-do the rmw cycle */ if ((assert_ver && (ret == -ERANGE || ret == -EOVERFLOW)) || (!assert_ver && ret == -EEXIST)) { @@ -1972,13 +2075,13 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos, pos >> PAGE_SHIFT, (pos + len - 1) >> PAGE_SHIFT); if (ret < 0) { - dout("invalidate_inode_pages2_range returned %d\n", - ret); + doutc(cl, "invalidate_inode_pages2_range returned %d\n", + ret); ret = 0; } pos += len; written += len; - dout("sync_write written %d\n", written); + doutc(cl, "written %d\n", written); if (pos > i_size_read(inode)) { check_caps = ceph_inode_set_size(inode, pos); if (check_caps) @@ -1992,7 +2095,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos, ret = written; iocb->ki_pos = pos; } - dout("sync_write returning %d\n", ret); + doutc(cl, "returning %d\n", ret); return ret; } @@ -2011,13 +2114,14 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to) struct inode *inode = file_inode(filp); struct ceph_inode_info *ci = ceph_inode(inode); bool direct_lock = iocb->ki_flags & IOCB_DIRECT; + struct ceph_client *cl = ceph_inode_to_client(inode); ssize_t ret; int want = 0, got = 0; int retry_op = 0, read = 0; again: - dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n", - inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, inode); + doutc(cl, "%llu~%u trying to get caps on %p %llx.%llx\n", + iocb->ki_pos, (unsigned)len, inode, ceph_vinop(inode)); if (ceph_inode_is_shutdown(inode)) return -ESTALE; @@ -2045,9 +2149,9 @@ again: (iocb->ki_flags & IOCB_DIRECT) || (fi->flags & CEPH_F_SYNC)) { - dout("aio_sync_read %p %llx.%llx %llu~%u got cap refs on %s\n", - inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, - ceph_cap_string(got)); + doutc(cl, "sync %p %llx.%llx %llu~%u got cap refs on %s\n", + inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, + ceph_cap_string(got)); if (!ceph_has_inline_data(ci)) { if (!retry_op && @@ -2065,16 +2169,16 @@ again: } } else { CEPH_DEFINE_RW_CONTEXT(rw_ctx, got); - dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n", - inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, - ceph_cap_string(got)); + doutc(cl, "async %p %llx.%llx %llu~%u got cap refs on %s\n", + inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, + ceph_cap_string(got)); ceph_add_rw_context(fi, &rw_ctx); ret = generic_file_read_iter(iocb, to); ceph_del_rw_context(fi, &rw_ctx); } - dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n", - inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret); + doutc(cl, "%p %llx.%llx dropping cap refs on %s = %d\n", + inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret); ceph_put_cap_refs(ci, got); if (direct_lock) @@ -2086,14 +2190,16 @@ again: int statret; struct page *page = NULL; loff_t i_size; + int mask = CEPH_STAT_CAP_SIZE; if (retry_op == READ_INLINE) { page = __page_cache_alloc(GFP_KERNEL); if (!page) return -ENOMEM; + + mask = CEPH_STAT_CAP_INLINE_DATA; } - statret = __ceph_do_getattr(inode, page, - CEPH_STAT_CAP_INLINE_DATA, !!page); + statret = __ceph_do_getattr(inode, page, mask, !!page); if (statret < 0) { if (page) __free_page(page); @@ -2134,8 +2240,8 @@ again: /* hit EOF or hole? */ if (retry_op == CHECK_EOF && iocb->ki_pos < i_size && ret < len) { - dout("sync_read hit hole, ppos %lld < size %lld" - ", reading more\n", iocb->ki_pos, i_size); + doutc(cl, "may hit hole, ppos %lld < size %lld, reading more\n", + iocb->ki_pos, i_size); read += ret; len -= ret; @@ -2230,6 +2336,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); + struct ceph_client *cl = fsc->client; struct ceph_osd_client *osdc = &fsc->client->osdc; struct ceph_cap_flush *prealloc_cf; ssize_t count, written = 0; @@ -2297,8 +2404,9 @@ retry_snap: if (err) goto out; - dout("aio_write %p %llx.%llx %llu~%zd getting caps. i_size %llu\n", - inode, ceph_vinop(inode), pos, count, i_size_read(inode)); + doutc(cl, "%p %llx.%llx %llu~%zd getting caps. i_size %llu\n", + inode, ceph_vinop(inode), pos, count, + i_size_read(inode)); if (!(fi->flags & CEPH_F_SYNC) && !direct_lock) want |= CEPH_CAP_FILE_BUFFER; if (fi->fmode & CEPH_FILE_MODE_LAZY) @@ -2314,8 +2422,8 @@ retry_snap: inode_inc_iversion_raw(inode); - dout("aio_write %p %llx.%llx %llu~%zd got cap refs on %s\n", - inode, ceph_vinop(inode), pos, count, ceph_cap_string(got)); + doutc(cl, "%p %llx.%llx %llu~%zd got cap refs on %s\n", + inode, ceph_vinop(inode), pos, count, ceph_cap_string(got)); if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 || (iocb->ki_flags & IOCB_DIRECT) || (fi->flags & CEPH_F_SYNC) || @@ -2375,14 +2483,14 @@ retry_snap: ceph_check_caps(ci, CHECK_CAPS_FLUSH); } - dout("aio_write %p %llx.%llx %llu~%u dropping cap refs on %s\n", - inode, ceph_vinop(inode), pos, (unsigned)count, - ceph_cap_string(got)); + doutc(cl, "%p %llx.%llx %llu~%u dropping cap refs on %s\n", + inode, ceph_vinop(inode), pos, (unsigned)count, + ceph_cap_string(got)); ceph_put_cap_refs(ci, got); if (written == -EOLDSNAPC) { - dout("aio_write %p %llx.%llx %llu~%u" "got EOLDSNAPC, retrying\n", - inode, ceph_vinop(inode), pos, (unsigned)count); + doutc(cl, "%p %llx.%llx %llu~%u" "got EOLDSNAPC, retrying\n", + inode, ceph_vinop(inode), pos, (unsigned)count); goto retry_snap; } @@ -2490,7 +2598,7 @@ static int ceph_zero_partial_object(struct inode *inode, goto out; } - req->r_mtime = inode->i_mtime; + req->r_mtime = inode_get_mtime(inode); ceph_osdc_start_request(&fsc->client->osdc, req); ret = ceph_osdc_wait_request(&fsc->client->osdc, req); if (ret == -ENOENT) @@ -2508,7 +2616,7 @@ static int ceph_zero_objects(struct inode *inode, loff_t offset, loff_t length) s32 stripe_unit = ci->i_layout.stripe_unit; s32 stripe_count = ci->i_layout.stripe_count; s32 object_size = ci->i_layout.object_size; - u64 object_set_size = object_size * stripe_count; + u64 object_set_size = (u64) object_size * stripe_count; u64 nearly, t; /* round offset up to next period boundary */ @@ -2554,14 +2662,15 @@ static long ceph_fallocate(struct file *file, int mode, struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_cap_flush *prealloc_cf; + struct ceph_client *cl = ceph_inode_to_client(inode); int want, got = 0; int dirty; int ret = 0; loff_t endoff = 0; loff_t size; - dout("%s %p %llx.%llx mode %x, offset %llu length %llu\n", __func__, - inode, ceph_vinop(inode), mode, offset, length); + doutc(cl, "%p %llx.%llx mode %x, offset %llu length %llu\n", + inode, ceph_vinop(inode), mode, offset, length); if (mode != (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) return -EOPNOTSUPP; @@ -2690,6 +2799,7 @@ static void put_rd_wr_caps(struct ceph_inode_info *src_ci, int src_got, static int is_file_size_ok(struct inode *src_inode, struct inode *dst_inode, loff_t src_off, loff_t dst_off, size_t len) { + struct ceph_client *cl = ceph_inode_to_client(src_inode); loff_t size, endoff; size = i_size_read(src_inode); @@ -2700,8 +2810,8 @@ static int is_file_size_ok(struct inode *src_inode, struct inode *dst_inode, * inode. */ if (src_off + len > size) { - dout("Copy beyond EOF (%llu + %zu > %llu)\n", - src_off, len, size); + doutc(cl, "Copy beyond EOF (%llu + %zu > %llu)\n", src_off, + len, size); return -EOPNOTSUPP; } size = i_size_read(dst_inode); @@ -2777,6 +2887,7 @@ static ssize_t ceph_do_objects_copy(struct ceph_inode_info *src_ci, u64 *src_off u64 src_objnum, src_objoff, dst_objnum, dst_objoff; u32 src_objlen, dst_objlen; u32 object_size = src_ci->i_layout.object_size; + struct ceph_client *cl = fsc->client; int ret; src_oloc.pool = src_ci->i_layout.pool_id; @@ -2818,9 +2929,10 @@ static ssize_t ceph_do_objects_copy(struct ceph_inode_info *src_ci, u64 *src_off if (ret) { if (ret == -EOPNOTSUPP) { fsc->have_copy_from2 = false; - pr_notice("OSDs don't support copy-from2; disabling copy offload\n"); + pr_notice_client(cl, + "OSDs don't support copy-from2; disabling copy offload\n"); } - dout("ceph_osdc_copy_from returned %d\n", ret); + doutc(cl, "returned %d\n", ret); if (!bytes) bytes = ret; goto out; @@ -2847,6 +2959,7 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off, struct ceph_inode_info *dst_ci = ceph_inode(dst_inode); struct ceph_cap_flush *prealloc_cf; struct ceph_fs_client *src_fsc = ceph_inode_to_fs_client(src_inode); + struct ceph_client *cl = src_fsc->client; loff_t size; ssize_t ret = -EIO, bytes; u64 src_objnum, dst_objnum, src_objoff, dst_objoff; @@ -2889,7 +3002,7 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off, (src_ci->i_layout.stripe_count != 1) || (dst_ci->i_layout.stripe_count != 1) || (src_ci->i_layout.object_size != dst_ci->i_layout.object_size)) { - dout("Invalid src/dst files layout\n"); + doutc(cl, "Invalid src/dst files layout\n"); return -EOPNOTSUPP; } @@ -2907,12 +3020,12 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off, /* Start by sync'ing the source and destination files */ ret = file_write_and_wait_range(src_file, src_off, (src_off + len)); if (ret < 0) { - dout("failed to write src file (%zd)\n", ret); + doutc(cl, "failed to write src file (%zd)\n", ret); goto out; } ret = file_write_and_wait_range(dst_file, dst_off, (dst_off + len)); if (ret < 0) { - dout("failed to write dst file (%zd)\n", ret); + doutc(cl, "failed to write dst file (%zd)\n", ret); goto out; } @@ -2924,7 +3037,7 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off, err = get_rd_wr_caps(src_file, &src_got, dst_file, (dst_off + len), &dst_got); if (err < 0) { - dout("get_rd_wr_caps returned %d\n", err); + doutc(cl, "get_rd_wr_caps returned %d\n", err); ret = -EOPNOTSUPP; goto out; } @@ -2939,7 +3052,8 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off, dst_off >> PAGE_SHIFT, (dst_off + len) >> PAGE_SHIFT); if (ret < 0) { - dout("Failed to invalidate inode pages (%zd)\n", ret); + doutc(cl, "Failed to invalidate inode pages (%zd)\n", + ret); ret = 0; /* XXX */ } ceph_calc_file_object_mapping(&src_ci->i_layout, src_off, @@ -2960,18 +3074,18 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off, * starting at the src_off */ if (src_objoff) { - dout("Initial partial copy of %u bytes\n", src_objlen); + doutc(cl, "Initial partial copy of %u bytes\n", src_objlen); /* * we need to temporarily drop all caps as we'll be calling * {read,write}_iter, which will get caps again. */ put_rd_wr_caps(src_ci, src_got, dst_ci, dst_got); - ret = do_splice_direct(src_file, &src_off, dst_file, - &dst_off, src_objlen, flags); + ret = splice_file_range(src_file, &src_off, dst_file, &dst_off, + src_objlen); /* Abort on short copies or on error */ if (ret < (long)src_objlen) { - dout("Failed partial copy (%zd)\n", ret); + doutc(cl, "Failed partial copy (%zd)\n", ret); goto out; } len -= ret; @@ -2993,7 +3107,7 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off, ret = bytes; goto out_caps; } - dout("Copied %zu bytes out of %zu\n", bytes, len); + doutc(cl, "Copied %zu bytes out of %zu\n", bytes, len); len -= bytes; ret += bytes; @@ -3021,13 +3135,13 @@ out_caps: * there were errors in remote object copies (len >= object_size). */ if (len && (len < src_ci->i_layout.object_size)) { - dout("Final partial copy of %zu bytes\n", len); - bytes = do_splice_direct(src_file, &src_off, dst_file, - &dst_off, len, flags); + doutc(cl, "Final partial copy of %zu bytes\n", len); + bytes = splice_file_range(src_file, &src_off, dst_file, + &dst_off, len); if (bytes > 0) ret += bytes; else - dout("Failed partial copy (%zd)\n", bytes); + doutc(cl, "Failed partial copy (%zd)\n", bytes); } out: @@ -3046,8 +3160,8 @@ static ssize_t ceph_copy_file_range(struct file *src_file, loff_t src_off, len, flags); if (ret == -EOPNOTSUPP || ret == -EXDEV) - ret = generic_copy_file_range(src_file, src_off, dst_file, - dst_off, len, flags); + ret = splice_copy_file_range(src_file, src_off, dst_file, + dst_off, len); return ret; } |