summaryrefslogtreecommitdiff
path: root/fs/ceph
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ceph')
-rw-r--r--fs/ceph/addr.c16
-rw-r--r--fs/ceph/caps.c93
-rw-r--r--fs/ceph/debugfs.c6
-rw-r--r--fs/ceph/dir.c56
-rw-r--r--fs/ceph/export.c25
-rw-r--r--fs/ceph/file.c15
-rw-r--r--fs/ceph/inode.c34
-rw-r--r--fs/ceph/mds_client.c15
-rw-r--r--fs/ceph/mds_client.h1
-rw-r--r--fs/ceph/snap.c22
-rw-r--r--fs/ceph/super.c11
-rw-r--r--fs/ceph/super.h70
-rw-r--r--fs/ceph/xattr.c12
13 files changed, 227 insertions, 149 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 561438b6a50c..33da49dc3cc6 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -24,7 +24,7 @@
* context needs to be associated with the osd write during writeback.
*
* Similarly, struct ceph_inode_info maintains a set of counters to
- * count dirty pages on the inode. In the absense of snapshots,
+ * count dirty pages on the inode. In the absence of snapshots,
* i_wrbuffer_ref == i_wrbuffer_ref_head == the dirty page count.
*
* When a snapshot is taken (that is, when the client receives
@@ -92,7 +92,7 @@ static int ceph_set_page_dirty(struct page *page)
ci->i_head_snapc = ceph_get_snap_context(snapc);
++ci->i_wrbuffer_ref_head;
if (ci->i_wrbuffer_ref == 0)
- igrab(inode);
+ ihold(inode);
++ci->i_wrbuffer_ref;
dout("%p set_page_dirty %p idx %lu head %d/%d -> %d/%d "
"snapc %p seq %lld (%d snaps)\n",
@@ -775,6 +775,13 @@ get_more_pages:
ci->i_truncate_seq,
ci->i_truncate_size,
&inode->i_mtime, true, 1, 0);
+
+ if (!req) {
+ rc = -ENOMEM;
+ unlock_page(page);
+ break;
+ }
+
max_pages = req->r_num_pages;
alloc_page_vec(fsc, req);
@@ -841,7 +848,8 @@ get_more_pages:
op->payload_len = cpu_to_le32(len);
req->r_request->hdr.data_len = cpu_to_le32(len);
- ceph_osdc_start_request(&fsc->client->osdc, req, true);
+ rc = ceph_osdc_start_request(&fsc->client->osdc, req, true);
+ BUG_ON(rc);
req = NULL;
/* continue? */
@@ -873,8 +881,6 @@ release_pvec_pages:
out:
if (req)
ceph_osdc_put_request(req);
- if (rc > 0)
- rc = 0; /* vfs expects us to return 0 */
ceph_put_snap_context(snapc);
dout("writepages done, rc = %d\n", rc);
return rc;
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 6b61ded701e1..1f72b00447c4 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -569,7 +569,8 @@ retry:
list_add_tail(&cap->session_caps, &session->s_caps);
session->s_nr_caps++;
spin_unlock(&session->s_cap_lock);
- }
+ } else if (new_cap)
+ ceph_put_cap(mdsc, new_cap);
if (!ci->i_snap_realm) {
/*
@@ -765,7 +766,7 @@ int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch)
if (touch) {
struct rb_node *q;
- /* touch this + preceeding caps */
+ /* touch this + preceding caps */
__touch_cap(cap);
for (q = rb_first(&ci->i_caps); q != p;
q = rb_next(q)) {
@@ -819,7 +820,7 @@ int __ceph_caps_used(struct ceph_inode_info *ci)
used |= CEPH_CAP_FILE_CACHE;
if (ci->i_wr_ref)
used |= CEPH_CAP_FILE_WR;
- if (ci->i_wrbuffer_ref)
+ if (ci->i_wb_ref || ci->i_wrbuffer_ref)
used |= CEPH_CAP_FILE_BUFFER;
return used;
}
@@ -1331,10 +1332,11 @@ static void ceph_flush_snaps(struct ceph_inode_info *ci)
}
/*
- * Mark caps dirty. If inode is newly dirty, add to the global dirty
- * list.
+ * Mark caps dirty. If inode is newly dirty, return the dirty flags.
+ * Caller is then responsible for calling __mark_inode_dirty with the
+ * returned flags value.
*/
-void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
+int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
{
struct ceph_mds_client *mdsc =
ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
@@ -1357,7 +1359,7 @@ void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
list_add(&ci->i_dirty_item, &mdsc->cap_dirty);
spin_unlock(&mdsc->cap_dirty_lock);
if (ci->i_flushing_caps == 0) {
- igrab(inode);
+ ihold(inode);
dirty |= I_DIRTY_SYNC;
}
}
@@ -1365,9 +1367,8 @@ void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
if (((was | ci->i_flushing_caps) & CEPH_CAP_FILE_BUFFER) &&
(mask & CEPH_CAP_FILE_BUFFER))
dirty |= I_DIRTY_DATASYNC;
- if (dirty)
- __mark_inode_dirty(inode, dirty);
__cap_delay_requeue(mdsc, ci);
+ return dirty;
}
/*
@@ -1990,11 +1991,11 @@ static void __take_cap_refs(struct ceph_inode_info *ci, int got)
if (got & CEPH_CAP_FILE_WR)
ci->i_wr_ref++;
if (got & CEPH_CAP_FILE_BUFFER) {
- if (ci->i_wrbuffer_ref == 0)
- igrab(&ci->vfs_inode);
- ci->i_wrbuffer_ref++;
- dout("__take_cap_refs %p wrbuffer %d -> %d (?)\n",
- &ci->vfs_inode, ci->i_wrbuffer_ref-1, ci->i_wrbuffer_ref);
+ if (ci->i_wb_ref == 0)
+ ihold(&ci->vfs_inode);
+ ci->i_wb_ref++;
+ dout("__take_cap_refs %p wb %d -> %d (?)\n",
+ &ci->vfs_inode, ci->i_wb_ref-1, ci->i_wb_ref);
}
}
@@ -2169,12 +2170,12 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
if (--ci->i_rdcache_ref == 0)
last++;
if (had & CEPH_CAP_FILE_BUFFER) {
- if (--ci->i_wrbuffer_ref == 0) {
+ if (--ci->i_wb_ref == 0) {
last++;
put++;
}
- dout("put_cap_refs %p wrbuffer %d -> %d (?)\n",
- inode, ci->i_wrbuffer_ref+1, ci->i_wrbuffer_ref);
+ dout("put_cap_refs %p wb %d -> %d (?)\n",
+ inode, ci->i_wb_ref+1, ci->i_wb_ref);
}
if (had & CEPH_CAP_FILE_WR)
if (--ci->i_wr_ref == 0) {
@@ -2634,6 +2635,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
struct ceph_mds_session *session,
int *open_target_sessions)
{
+ struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
struct ceph_inode_info *ci = ceph_inode(inode);
int mds = session->s_mds;
unsigned mseq = le32_to_cpu(ex->migrate_seq);
@@ -2670,6 +2672,19 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
* export targets, so that we get the matching IMPORT
*/
*open_target_sessions = 1;
+
+ /*
+ * we can't flush dirty caps that we've seen the
+ * EXPORT but no IMPORT for
+ */
+ spin_lock(&mdsc->cap_dirty_lock);
+ if (!list_empty(&ci->i_dirty_item)) {
+ dout(" moving %p to cap_dirty_migrating\n",
+ inode);
+ list_move(&ci->i_dirty_item,
+ &mdsc->cap_dirty_migrating);
+ }
+ spin_unlock(&mdsc->cap_dirty_lock);
}
__ceph_remove_cap(cap);
}
@@ -2707,6 +2722,13 @@ static void handle_cap_import(struct ceph_mds_client *mdsc,
ci->i_cap_exporting_issued = 0;
ci->i_cap_exporting_mseq = 0;
ci->i_cap_exporting_mds = -1;
+
+ spin_lock(&mdsc->cap_dirty_lock);
+ if (!list_empty(&ci->i_dirty_item)) {
+ dout(" moving %p back to cap_dirty\n", inode);
+ list_move(&ci->i_dirty_item, &mdsc->cap_dirty);
+ }
+ spin_unlock(&mdsc->cap_dirty_lock);
} else {
dout("handle_cap_import inode %p ci %p mds%d mseq %d\n",
inode, ci, mds, mseq);
@@ -2910,38 +2932,16 @@ void ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
*/
void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc)
{
- struct ceph_inode_info *ci, *nci = NULL;
- struct inode *inode, *ninode = NULL;
- struct list_head *p, *n;
+ struct ceph_inode_info *ci;
+ struct inode *inode;
dout("flush_dirty_caps\n");
spin_lock(&mdsc->cap_dirty_lock);
- list_for_each_safe(p, n, &mdsc->cap_dirty) {
- if (nci) {
- ci = nci;
- inode = ninode;
- ci->i_ceph_flags &= ~CEPH_I_NOFLUSH;
- dout("flush_dirty_caps inode %p (was next inode)\n",
- inode);
- } else {
- ci = list_entry(p, struct ceph_inode_info,
- i_dirty_item);
- inode = igrab(&ci->vfs_inode);
- BUG_ON(!inode);
- dout("flush_dirty_caps inode %p\n", inode);
- }
- if (n != &mdsc->cap_dirty) {
- nci = list_entry(n, struct ceph_inode_info,
- i_dirty_item);
- ninode = igrab(&nci->vfs_inode);
- BUG_ON(!ninode);
- nci->i_ceph_flags |= CEPH_I_NOFLUSH;
- dout("flush_dirty_caps next inode %p, noflush\n",
- ninode);
- } else {
- nci = NULL;
- ninode = NULL;
- }
+ while (!list_empty(&mdsc->cap_dirty)) {
+ ci = list_first_entry(&mdsc->cap_dirty, struct ceph_inode_info,
+ i_dirty_item);
+ inode = igrab(&ci->vfs_inode);
+ dout("flush_dirty_caps %p\n", inode);
spin_unlock(&mdsc->cap_dirty_lock);
if (inode) {
ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_FLUSH,
@@ -2951,6 +2951,7 @@ void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc)
spin_lock(&mdsc->cap_dirty_lock);
}
spin_unlock(&mdsc->cap_dirty_lock);
+ dout("flush_dirty_caps done\n");
}
/*
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index 08f65faac112..0dba6915712b 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -210,8 +210,6 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
if (!fsc->debugfs_congestion_kb)
goto out;
- dout("a\n");
-
snprintf(name, sizeof(name), "../../bdi/%s",
dev_name(fsc->backing_dev_info.dev));
fsc->debugfs_bdi =
@@ -221,7 +219,6 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
if (!fsc->debugfs_bdi)
goto out;
- dout("b\n");
fsc->debugfs_mdsmap = debugfs_create_file("mdsmap",
0600,
fsc->client->debugfs_dir,
@@ -230,7 +227,6 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
if (!fsc->debugfs_mdsmap)
goto out;
- dout("ca\n");
fsc->debugfs_mdsc = debugfs_create_file("mdsc",
0600,
fsc->client->debugfs_dir,
@@ -239,7 +235,6 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
if (!fsc->debugfs_mdsc)
goto out;
- dout("da\n");
fsc->debugfs_caps = debugfs_create_file("caps",
0400,
fsc->client->debugfs_dir,
@@ -248,7 +243,6 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
if (!fsc->debugfs_caps)
goto out;
- dout("ea\n");
fsc->debugfs_dentry_lru = debugfs_create_file("dentry_lru",
0600,
fsc->client->debugfs_dir,
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 0bc68de8edd7..33729e822bb9 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -161,7 +161,7 @@ more:
filp->f_pos = di->offset;
err = filldir(dirent, dentry->d_name.name,
dentry->d_name.len, di->offset,
- dentry->d_inode->i_ino,
+ ceph_translate_ino(dentry->d_sb, dentry->d_inode->i_ino),
dentry->d_inode->i_mode >> 12);
if (last) {
@@ -245,15 +245,17 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
dout("readdir off 0 -> '.'\n");
if (filldir(dirent, ".", 1, ceph_make_fpos(0, 0),
- inode->i_ino, inode->i_mode >> 12) < 0)
+ ceph_translate_ino(inode->i_sb, inode->i_ino),
+ inode->i_mode >> 12) < 0)
return 0;
filp->f_pos = 1;
off = 1;
}
if (filp->f_pos == 1) {
+ ino_t ino = filp->f_dentry->d_parent->d_inode->i_ino;
dout("readdir off 1 -> '..'\n");
if (filldir(dirent, "..", 2, ceph_make_fpos(0, 1),
- filp->f_dentry->d_parent->d_inode->i_ino,
+ ceph_translate_ino(inode->i_sb, ino),
inode->i_mode >> 12) < 0)
return 0;
filp->f_pos = 2;
@@ -358,7 +360,7 @@ more:
rinfo = &fi->last_readdir->r_reply_info;
dout("readdir frag %x num %d off %d chunkoff %d\n", frag,
rinfo->dir_nr, off, fi->offset);
- while (off - fi->offset >= 0 && off - fi->offset < rinfo->dir_nr) {
+ while (off >= fi->offset && off - fi->offset < rinfo->dir_nr) {
u64 pos = ceph_make_fpos(frag, off);
struct ceph_mds_reply_inode *in =
rinfo->dir_in[off - fi->offset].in;
@@ -377,7 +379,8 @@ more:
if (filldir(dirent,
rinfo->dir_dname[off - fi->offset],
rinfo->dir_dname_len[off - fi->offset],
- pos, ino, ftype) < 0) {
+ pos,
+ ceph_translate_ino(inode->i_sb, ino), ftype) < 0) {
dout("filldir stopping us...\n");
return 0;
}
@@ -409,7 +412,7 @@ more:
spin_lock(&inode->i_lock);
if (ci->i_release_count == fi->dir_release_count) {
dout(" marking %p complete\n", inode);
- ci->i_ceph_flags |= CEPH_I_COMPLETE;
+ /* ci->i_ceph_flags |= CEPH_I_COMPLETE; */
ci->i_max_offset = filp->f_pos;
}
spin_unlock(&inode->i_lock);
@@ -496,6 +499,7 @@ struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
/* .snap dir? */
if (err == -ENOENT &&
+ ceph_snap(parent) == CEPH_NOSNAP &&
strcmp(dentry->d_name.name,
fsc->mount_options->snapdir_name) == 0) {
struct inode *inode = ceph_get_snapdir(parent);
@@ -992,7 +996,7 @@ static int ceph_d_revalidate(struct dentry *dentry, struct nameidata *nd)
{
struct inode *dir;
- if (nd->flags & LOOKUP_RCU)
+ if (nd && nd->flags & LOOKUP_RCU)
return -ECHILD;
dir = dentry->d_parent->d_inode;
@@ -1023,34 +1027,13 @@ out_touch:
}
/*
- * When a dentry is released, clear the dir I_COMPLETE if it was part
- * of the current dir gen or if this is in the snapshot namespace.
+ * Release our ceph_dentry_info.
*/
-static void ceph_dentry_release(struct dentry *dentry)
+static void ceph_d_release(struct dentry *dentry)
{
struct ceph_dentry_info *di = ceph_dentry(dentry);
- struct inode *parent_inode = NULL;
- u64 snapid = CEPH_NOSNAP;
- if (!IS_ROOT(dentry)) {
- parent_inode = dentry->d_parent->d_inode;
- if (parent_inode)
- snapid = ceph_snap(parent_inode);
- }
- dout("dentry_release %p parent %p\n", dentry, parent_inode);
- if (parent_inode && snapid != CEPH_SNAPDIR) {
- struct ceph_inode_info *ci = ceph_inode(parent_inode);
-
- spin_lock(&parent_inode->i_lock);
- if (ci->i_shared_gen == di->lease_shared_gen ||
- snapid <= CEPH_MAXSNAP) {
- dout(" clearing %p complete (d_release)\n",
- parent_inode);
- ci->i_ceph_flags &= ~CEPH_I_COMPLETE;
- ci->i_release_count++;
- }
- spin_unlock(&parent_inode->i_lock);
- }
+ dout("d_release %p\n", dentry);
if (di) {
ceph_dentry_lru_del(dentry);
if (di->lease_session)
@@ -1083,16 +1066,17 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
struct inode *inode = file->f_dentry->d_inode;
struct ceph_inode_info *ci = ceph_inode(inode);
int left;
+ const int bufsize = 1024;
if (!ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT))
return -EISDIR;
if (!cf->dir_info) {
- cf->dir_info = kmalloc(1024, GFP_NOFS);
+ cf->dir_info = kmalloc(bufsize, GFP_NOFS);
if (!cf->dir_info)
return -ENOMEM;
cf->dir_info_len =
- sprintf(cf->dir_info,
+ snprintf(cf->dir_info, bufsize,
"entries: %20lld\n"
" files: %20lld\n"
" subdirs: %20lld\n"
@@ -1275,14 +1259,14 @@ const struct inode_operations ceph_dir_iops = {
const struct dentry_operations ceph_dentry_ops = {
.d_revalidate = ceph_d_revalidate,
- .d_release = ceph_dentry_release,
+ .d_release = ceph_d_release,
};
const struct dentry_operations ceph_snapdir_dentry_ops = {
.d_revalidate = ceph_snapdir_d_revalidate,
- .d_release = ceph_dentry_release,
+ .d_release = ceph_d_release,
};
const struct dentry_operations ceph_snap_dentry_ops = {
- .d_release = ceph_dentry_release,
+ .d_release = ceph_d_release,
};
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index e41056174bf8..a610d3d67488 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -86,6 +86,7 @@ static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len,
static struct dentry *__fh_to_dentry(struct super_block *sb,
struct ceph_nfs_fh *fh)
{
+ struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc;
struct inode *inode;
struct dentry *dentry;
struct ceph_vino vino;
@@ -95,8 +96,24 @@ static struct dentry *__fh_to_dentry(struct super_block *sb,
vino.ino = fh->ino;
vino.snap = CEPH_NOSNAP;
inode = ceph_find_inode(sb, vino);
- if (!inode)
- return ERR_PTR(-ESTALE);
+ if (!inode) {
+ struct ceph_mds_request *req;
+
+ req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPINO,
+ USE_ANY_MDS);
+ if (IS_ERR(req))
+ return ERR_CAST(req);
+
+ req->r_ino1 = vino;
+ req->r_num_caps = 1;
+ err = ceph_mdsc_do_request(mdsc, NULL, req);
+ inode = req->r_target_inode;
+ if (inode)
+ igrab(inode);
+ ceph_mdsc_put_request(req);
+ if (!inode)
+ return ERR_PTR(-ESTALE);
+ }
dentry = d_obtain_alias(inode);
if (IS_ERR(dentry)) {
@@ -148,8 +165,10 @@ static struct dentry *__cfh_to_dentry(struct super_block *sb,
snprintf(req->r_path2, 16, "%d", cfh->parent_name_hash);
req->r_num_caps = 1;
err = ceph_mdsc_do_request(mdsc, NULL, req);
+ inode = req->r_target_inode;
+ if (inode)
+ igrab(inode);
ceph_mdsc_put_request(req);
- inode = ceph_find_inode(sb, vino);
if (!inode)
return ERR_PTR(err ? err : -ESTALE);
}
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 7d0e4a82d898..203252d88d9f 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -564,11 +564,19 @@ more:
* start_request so that a tid has been assigned.
*/
spin_lock(&ci->i_unsafe_lock);
- list_add(&req->r_unsafe_item, &ci->i_unsafe_writes);
+ list_add_tail(&req->r_unsafe_item,
+ &ci->i_unsafe_writes);
spin_unlock(&ci->i_unsafe_lock);
ceph_get_cap_refs(ci, CEPH_CAP_FILE_WR);
}
+
ret = ceph_osdc_wait_request(&fsc->client->osdc, req);
+ if (ret < 0 && req->r_safe_callback) {
+ spin_lock(&ci->i_unsafe_lock);
+ list_del_init(&req->r_unsafe_item);
+ spin_unlock(&ci->i_unsafe_lock);
+ ceph_put_cap_refs(ci, CEPH_CAP_FILE_WR);
+ }
}
if (file->f_flags & O_DIRECT)
@@ -726,9 +734,12 @@ retry_snap:
}
}
if (ret >= 0) {
+ int dirty;
spin_lock(&inode->i_lock);
- __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
+ dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
spin_unlock(&inode->i_lock);
+ if (dirty)
+ __mark_inode_dirty(inode, dirty);
}
out:
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 5625463aa479..70b6a4839c38 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -36,6 +36,13 @@ static void ceph_vmtruncate_work(struct work_struct *work);
/*
* find or create an inode, given the ceph ino number
*/
+static int ceph_set_ino_cb(struct inode *inode, void *data)
+{
+ ceph_inode(inode)->i_vino = *(struct ceph_vino *)data;
+ inode->i_ino = ceph_vino_to_ino(*(struct ceph_vino *)data);
+ return 0;
+}
+
struct inode *ceph_get_inode(struct super_block *sb, struct ceph_vino vino)
{
struct inode *inode;
@@ -348,6 +355,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
ci->i_rd_ref = 0;
ci->i_rdcache_ref = 0;
ci->i_wr_ref = 0;
+ ci->i_wb_ref = 0;
ci->i_wrbuffer_ref = 0;
ci->i_wrbuffer_ref_head = 0;
ci->i_shared_gen = 0;
@@ -707,7 +715,7 @@ static int fill_inode(struct inode *inode,
(issued & CEPH_CAP_FILE_EXCL) == 0 &&
(ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) {
dout(" marking %p complete (empty)\n", inode);
- ci->i_ceph_flags |= CEPH_I_COMPLETE;
+ /* ci->i_ceph_flags |= CEPH_I_COMPLETE; */
ci->i_max_offset = 2;
}
break;
@@ -1030,9 +1038,6 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
dout("fill_trace doing d_move %p -> %p\n",
req->r_old_dentry, dn);
- /* d_move screws up d_subdirs order */
- ceph_i_clear(dir, CEPH_I_COMPLETE);
-
d_move(req->r_old_dentry, dn);
dout(" src %p '%.*s' dst %p '%.*s'\n",
req->r_old_dentry,
@@ -1044,12 +1049,15 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
rehashing bug in vfs_rename_dir */
ceph_invalidate_dentry_lease(dn);
- /* take overwritten dentry's readdir offset */
- dout("dn %p gets %p offset %lld (old offset %lld)\n",
- req->r_old_dentry, dn, ceph_dentry(dn)->offset,
+ /*
+ * d_move() puts the renamed dentry at the end of
+ * d_subdirs. We need to assign it an appropriate
+ * directory offset so we can behave when holding
+ * I_COMPLETE.
+ */
+ ceph_set_dentry_offset(req->r_old_dentry);
+ dout("dn %p gets new offset %lld\n", req->r_old_dentry,
ceph_dentry(req->r_old_dentry)->offset);
- ceph_dentry(req->r_old_dentry)->offset =
- ceph_dentry(dn)->offset;
dn = req->r_old_dentry; /* use old_dentry */
in = dn->d_inode;
@@ -1560,6 +1568,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
int release = 0, dirtied = 0;
int mask = 0;
int err = 0;
+ int inode_dirty_flags = 0;
if (ceph_snap(inode) != CEPH_NOSNAP)
return -EROFS;
@@ -1718,13 +1727,16 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
dout("setattr %p ATTR_FILE ... hrm!\n", inode);
if (dirtied) {
- __ceph_mark_dirty_caps(ci, dirtied);
+ inode_dirty_flags = __ceph_mark_dirty_caps(ci, dirtied);
inode->i_ctime = CURRENT_TIME;
}
release &= issued;
spin_unlock(&inode->i_lock);
+ if (inode_dirty_flags)
+ __mark_inode_dirty(inode, inode_dirty_flags);
+
if (mask) {
req->r_inode = igrab(inode);
req->r_inode_drop = release;
@@ -1809,7 +1821,7 @@ int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry,
err = ceph_do_getattr(inode, CEPH_STAT_CAP_INODE_ALL);
if (!err) {
generic_fillattr(inode, stat);
- stat->ino = inode->i_ino;
+ stat->ino = ceph_translate_ino(inode->i_sb, inode->i_ino);
if (ceph_snap(inode) != CEPH_NOSNAP)
stat->dev = ceph_snap(inode);
else
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index a1ee8fa3a8e7..79743d146be6 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -578,6 +578,7 @@ static void __register_request(struct ceph_mds_client *mdsc,
if (dir) {
struct ceph_inode_info *ci = ceph_inode(dir);
+ ihold(dir);
spin_lock(&ci->i_unsafe_lock);
req->r_unsafe_dir = dir;
list_add_tail(&req->r_unsafe_dir_item, &ci->i_unsafe_dirops);
@@ -598,6 +599,9 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
spin_lock(&ci->i_unsafe_lock);
list_del_init(&req->r_unsafe_dir_item);
spin_unlock(&ci->i_unsafe_lock);
+
+ iput(req->r_unsafe_dir);
+ req->r_unsafe_dir = NULL;
}
ceph_mdsc_put_request(req);
@@ -2691,7 +2695,6 @@ static void handle_lease(struct ceph_mds_client *mdsc,
{
struct super_block *sb = mdsc->fsc->sb;
struct inode *inode;
- struct ceph_inode_info *ci;
struct dentry *parent, *dentry;
struct ceph_dentry_info *di;
int mds = session->s_mds;
@@ -2728,7 +2731,6 @@ static void handle_lease(struct ceph_mds_client *mdsc,
dout("handle_lease no inode %llx\n", vino.ino);
goto release;
}
- ci = ceph_inode(inode);
/* dentry */
parent = d_find_alias(inode);
@@ -3002,6 +3004,7 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
spin_lock_init(&mdsc->snap_flush_lock);
mdsc->cap_flush_seq = 0;
INIT_LIST_HEAD(&mdsc->cap_dirty);
+ INIT_LIST_HEAD(&mdsc->cap_dirty_migrating);
mdsc->num_cap_flushing = 0;
spin_lock_init(&mdsc->cap_dirty_lock);
init_waitqueue_head(&mdsc->cap_flushing_wq);
@@ -3215,9 +3218,15 @@ void ceph_mdsc_destroy(struct ceph_fs_client *fsc)
{
struct ceph_mds_client *mdsc = fsc->mdsc;
+ dout("mdsc_destroy %p\n", mdsc);
ceph_mdsc_stop(mdsc);
+
+ /* flush out any connection work with references to us */
+ ceph_msgr_flush();
+
fsc->mdsc = NULL;
kfree(mdsc);
+ dout("mdsc_destroy %p done\n", mdsc);
}
@@ -3298,8 +3307,8 @@ static void con_put(struct ceph_connection *con)
{
struct ceph_mds_session *s = con->private;
+ dout("mdsc con_put %p (%d)\n", s, atomic_read(&s->s_ref) - 1);
ceph_put_mds_session(s);
- dout("mdsc con_put %p (%d)\n", s, atomic_read(&s->s_ref));
}
/*
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 4e3a9cc0bba6..7d8a0d662d56 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -278,6 +278,7 @@ struct ceph_mds_client {
u64 cap_flush_seq;
struct list_head cap_dirty; /* inodes with dirty caps */
+ struct list_head cap_dirty_migrating; /* ...that are migration... */
int num_cap_flushing; /* # caps we are flushing */
spinlock_t cap_dirty_lock; /* protects above items */
wait_queue_head_t cap_flushing_wq;
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 39c243acd062..24067d68a554 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -206,7 +206,7 @@ void ceph_put_snap_realm(struct ceph_mds_client *mdsc,
up_write(&mdsc->snap_rwsem);
} else {
spin_lock(&mdsc->snap_empty_lock);
- list_add(&mdsc->snap_empty, &realm->empty_item);
+ list_add(&realm->empty_item, &mdsc->snap_empty);
spin_unlock(&mdsc->snap_empty_lock);
}
}
@@ -342,7 +342,7 @@ static int build_snap_context(struct ceph_snap_realm *realm)
num = 0;
snapc->seq = realm->seq;
if (parent) {
- /* include any of parent's snaps occuring _after_ my
+ /* include any of parent's snaps occurring _after_ my
parent became my parent */
for (i = 0; i < parent->cached_context->num_snaps; i++)
if (parent->cached_context->snaps[i] >=
@@ -463,8 +463,8 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
dout("queue_cap_snap %p cap_snap %p queuing under %p\n", inode,
capsnap, snapc);
- igrab(inode);
-
+ ihold(inode);
+
atomic_set(&capsnap->nref, 1);
capsnap->ci = ci;
INIT_LIST_HEAD(&capsnap->ci_item);
@@ -584,10 +584,14 @@ static void queue_realm_cap_snaps(struct ceph_snap_realm *realm)
if (lastinode)
iput(lastinode);
- dout("queue_realm_cap_snaps %p %llx children\n", realm, realm->ino);
- list_for_each_entry(child, &realm->children, child_item)
- queue_realm_cap_snaps(child);
+ list_for_each_entry(child, &realm->children, child_item) {
+ dout("queue_realm_cap_snaps %p %llx queue child %p %llx\n",
+ realm, realm->ino, child, child->ino);
+ list_del_init(&child->dirty_item);
+ list_add(&child->dirty_item, &realm->dirty_item);
+ }
+ list_del_init(&realm->dirty_item);
dout("queue_realm_cap_snaps %p %llx done\n", realm, realm->ino);
}
@@ -683,7 +687,9 @@ more:
* queue cap snaps _after_ we've built the new snap contexts,
* so that i_head_snapc can be set appropriately.
*/
- list_for_each_entry(realm, &dirty_realms, dirty_item) {
+ while (!list_empty(&dirty_realms)) {
+ realm = list_first_entry(&dirty_realms, struct ceph_snap_realm,
+ dirty_item);
queue_realm_cap_snaps(realm);
}
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 9c5085465a63..f2f77fd3c14c 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -131,6 +131,7 @@ enum {
Opt_rbytes,
Opt_norbytes,
Opt_noasyncreaddir,
+ Opt_ino32,
};
static match_table_t fsopt_tokens = {
@@ -150,6 +151,7 @@ static match_table_t fsopt_tokens = {
{Opt_rbytes, "rbytes"},
{Opt_norbytes, "norbytes"},
{Opt_noasyncreaddir, "noasyncreaddir"},
+ {Opt_ino32, "ino32"},
{-1, NULL}
};
@@ -225,6 +227,9 @@ static int parse_fsopt_token(char *c, void *private)
case Opt_noasyncreaddir:
fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR;
break;
+ case Opt_ino32:
+ fsopt->flags |= CEPH_MOUNT_OPT_INO32;
+ break;
default:
BUG_ON(token);
}
@@ -288,7 +293,7 @@ static int parse_mount_options(struct ceph_mount_options **pfsopt,
fsopt->sb_flags = flags;
fsopt->flags = CEPH_MOUNT_OPT_DEFAULT;
- fsopt->rsize = CEPH_MOUNT_RSIZE_DEFAULT;
+ fsopt->rsize = CEPH_RSIZE_DEFAULT;
fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL);
fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT;
fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT;
@@ -348,7 +353,7 @@ static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt)
if (opt->name)
seq_printf(m, ",name=%s", opt->name);
- if (opt->secret)
+ if (opt->key)
seq_puts(m, ",secret=<hidden>");
if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT)
@@ -370,7 +375,7 @@ static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt)
if (fsopt->wsize)
seq_printf(m, ",wsize=%d", fsopt->wsize);
- if (fsopt->rsize != CEPH_MOUNT_RSIZE_DEFAULT)
+ if (fsopt->rsize != CEPH_RSIZE_DEFAULT)
seq_printf(m, ",rsize=%d", fsopt->rsize);
if (fsopt->congestion_kb != default_congestion_kb())
seq_printf(m, ",write_congestion_kb=%d", fsopt->congestion_kb);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 20b907d76ae2..f5cabefa98dc 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -27,6 +27,7 @@
#define CEPH_MOUNT_OPT_DIRSTAT (1<<4) /* `cat dirname` for stats */
#define CEPH_MOUNT_OPT_RBYTES (1<<5) /* dir st_bytes = rbytes */
#define CEPH_MOUNT_OPT_NOASYNCREADDIR (1<<7) /* no dcache readdir */
+#define CEPH_MOUNT_OPT_INO32 (1<<8) /* 32 bit inos */
#define CEPH_MOUNT_OPT_DEFAULT (CEPH_MOUNT_OPT_RBYTES)
@@ -35,6 +36,7 @@
#define ceph_test_mount_opt(fsc, opt) \
(!!((fsc)->mount_options->flags & CEPH_MOUNT_OPT_##opt))
+#define CEPH_RSIZE_DEFAULT (512*1024) /* readahead */
#define CEPH_MAX_READDIR_DEFAULT 1024
#define CEPH_MAX_READDIR_BYTES_DEFAULT (512*1024)
#define CEPH_SNAPDIRNAME_DEFAULT ".snap"
@@ -291,7 +293,7 @@ struct ceph_inode_info {
/* held references to caps */
int i_pin_ref;
- int i_rd_ref, i_rdcache_ref, i_wr_ref;
+ int i_rd_ref, i_rdcache_ref, i_wr_ref, i_wb_ref;
int i_wrbuffer_ref, i_wrbuffer_ref_head;
u32 i_shared_gen; /* increment each time we get FILE_SHARED */
u32 i_rdcache_gen; /* incremented each time we get FILE_CACHE. */
@@ -319,6 +321,16 @@ static inline struct ceph_inode_info *ceph_inode(struct inode *inode)
return container_of(inode, struct ceph_inode_info, vfs_inode);
}
+static inline struct ceph_fs_client *ceph_inode_to_client(struct inode *inode)
+{
+ return (struct ceph_fs_client *)inode->i_sb->s_fs_info;
+}
+
+static inline struct ceph_fs_client *ceph_sb_to_client(struct super_block *sb)
+{
+ return (struct ceph_fs_client *)sb->s_fs_info;
+}
+
static inline struct ceph_vino ceph_vino(struct inode *inode)
{
return ceph_inode(inode)->i_vino;
@@ -327,19 +339,49 @@ static inline struct ceph_vino ceph_vino(struct inode *inode)
/*
* ino_t is <64 bits on many architectures, blech.
*
- * don't include snap in ino hash, at least for now.
+ * i_ino (kernel inode) st_ino (userspace)
+ * i386 32 32
+ * x86_64+ino32 64 32
+ * x86_64 64 64
+ */
+static inline u32 ceph_ino_to_ino32(ino_t ino)
+{
+ ino ^= ino >> (sizeof(ino) * 8 - 32);
+ if (!ino)
+ ino = 1;
+ return ino;
+}
+
+/*
+ * kernel i_ino value
*/
static inline ino_t ceph_vino_to_ino(struct ceph_vino vino)
{
ino_t ino = (ino_t)vino.ino; /* ^ (vino.snap << 20); */
#if BITS_PER_LONG == 32
- ino ^= vino.ino >> (sizeof(u64)-sizeof(ino_t)) * 8;
- if (!ino)
- ino = 1;
+ ino = ceph_ino_to_ino32(ino);
#endif
return ino;
}
+/*
+ * user-visible ino (stat, filldir)
+ */
+#if BITS_PER_LONG == 32
+static inline ino_t ceph_translate_ino(struct super_block *sb, ino_t ino)
+{
+ return ino;
+}
+#else
+static inline ino_t ceph_translate_ino(struct super_block *sb, ino_t ino)
+{
+ if (ceph_test_mount_opt(ceph_sb_to_client(sb), INO32))
+ ino = ceph_ino_to_ino32(ino);
+ return ino;
+}
+#endif
+
+
/* for printf-style formatting */
#define ceph_vinop(i) ceph_inode(i)->i_vino.ino, ceph_inode(i)->i_vino.snap
@@ -428,13 +470,6 @@ static inline loff_t ceph_make_fpos(unsigned frag, unsigned off)
return ((loff_t)frag << 32) | (loff_t)off;
}
-static inline int ceph_set_ino_cb(struct inode *inode, void *data)
-{
- ceph_inode(inode)->i_vino = *(struct ceph_vino *)data;
- inode->i_ino = ceph_vino_to_ino(*(struct ceph_vino *)data);
- return 0;
-}
-
/*
* caps helpers
*/
@@ -471,7 +506,7 @@ static inline int __ceph_caps_dirty(struct ceph_inode_info *ci)
{
return ci->i_dirty_caps | ci->i_flushing_caps;
}
-extern void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask);
+extern int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask);
extern int ceph_caps_revoking(struct ceph_inode_info *ci, int mask);
extern int __ceph_caps_used(struct ceph_inode_info *ci);
@@ -503,15 +538,6 @@ extern void ceph_reservation_status(struct ceph_fs_client *client,
int *total, int *avail, int *used,
int *reserved, int *min);
-static inline struct ceph_fs_client *ceph_inode_to_client(struct inode *inode)
-{
- return (struct ceph_fs_client *)inode->i_sb->s_fs_info;
-}
-
-static inline struct ceph_fs_client *ceph_sb_to_client(struct super_block *sb)
-{
- return (struct ceph_fs_client *)sb->s_fs_info;
-}
/*
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 8c9eba6ef9df..f2b628696180 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -703,6 +703,7 @@ int ceph_setxattr(struct dentry *dentry, const char *name,
struct ceph_inode_xattr *xattr = NULL;
int issued;
int required_blob_size;
+ int dirty;
if (ceph_snap(inode) != CEPH_NOSNAP)
return -EROFS;
@@ -763,11 +764,12 @@ retry:
dout("setxattr %p issued %s\n", inode, ceph_cap_string(issued));
err = __set_xattr(ci, newname, name_len, newval,
val_len, 1, 1, 1, &xattr);
- __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL);
+ dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL);
ci->i_xattrs.dirty = true;
inode->i_ctime = CURRENT_TIME;
spin_unlock(&inode->i_lock);
-
+ if (dirty)
+ __mark_inode_dirty(inode, dirty);
return err;
do_sync:
@@ -810,6 +812,7 @@ int ceph_removexattr(struct dentry *dentry, const char *name)
struct ceph_vxattr_cb *vxattrs = ceph_inode_vxattrs(inode);
int issued;
int err;
+ int dirty;
if (ceph_snap(inode) != CEPH_NOSNAP)
return -EROFS;
@@ -833,12 +836,13 @@ int ceph_removexattr(struct dentry *dentry, const char *name)
goto do_sync;
err = __remove_xattr_by_name(ceph_inode(inode), name);
- __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL);
+ dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL);
ci->i_xattrs.dirty = true;
inode->i_ctime = CURRENT_TIME;
spin_unlock(&inode->i_lock);
-
+ if (dirty)
+ __mark_inode_dirty(inode, dirty);
return err;
do_sync:
spin_unlock(&inode->i_lock);