summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/gfs2/acl.c30
-rw-r--r--fs/gfs2/aops.c14
-rw-r--r--fs/gfs2/bmap.c24
-rw-r--r--fs/gfs2/dir.c4
-rw-r--r--fs/gfs2/file.c3
-rw-r--r--fs/gfs2/glock.c137
-rw-r--r--fs/gfs2/glock.h36
-rw-r--r--fs/gfs2/glops.c30
-rw-r--r--fs/gfs2/incore.h4
-rw-r--r--fs/gfs2/inode.c17
-rw-r--r--fs/gfs2/lock_dlm.c5
-rw-r--r--fs/gfs2/log.c13
-rw-r--r--fs/gfs2/lops.c7
-rw-r--r--fs/gfs2/meta_io.c9
-rw-r--r--fs/gfs2/ops_fstype.c7
-rw-r--r--fs/gfs2/quota.c7
-rw-r--r--fs/gfs2/rgrp.c3
-rw-r--r--fs/gfs2/super.c71
-rw-r--r--fs/gfs2/util.h1
-rw-r--r--fs/gfs2/xattr.c9
20 files changed, 321 insertions, 110 deletions
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 2524807ee070..9d5eecb123de 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -86,19 +86,6 @@ int __gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
char *data;
const char *name = gfs2_acl_name(type);
- if (acl && acl->a_count > GFS2_ACL_MAX_ENTRIES(GFS2_SB(inode)))
- return -E2BIG;
-
- if (type == ACL_TYPE_ACCESS) {
- umode_t mode = inode->i_mode;
-
- error = posix_acl_update_mode(inode, &inode->i_mode, &acl);
- if (error)
- return error;
- if (mode != inode->i_mode)
- mark_inode_dirty(inode);
- }
-
if (acl) {
len = posix_acl_to_xattr(&init_user_ns, acl, NULL, 0);
if (len == 0)
@@ -129,6 +116,10 @@ int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
struct gfs2_holder gh;
bool need_unlock = false;
int ret;
+ umode_t mode;
+
+ if (acl && acl->a_count > GFS2_ACL_MAX_ENTRIES(GFS2_SB(inode)))
+ return -E2BIG;
ret = gfs2_rsqa_alloc(ip);
if (ret)
@@ -140,7 +131,20 @@ int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
return ret;
need_unlock = true;
}
+
+ mode = inode->i_mode;
+ if (type == ACL_TYPE_ACCESS && acl) {
+ ret = posix_acl_update_mode(inode, &mode, &acl);
+ if (ret)
+ goto unlock;
+ }
+
ret = __gfs2_set_acl(inode, acl, type);
+ if (!ret && mode != inode->i_mode) {
+ inode->i_mode = mode;
+ mark_inode_dirty(inode);
+ }
+unlock:
if (need_unlock)
gfs2_glock_dq_uninit(&gh);
return ret;
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index ed7a2e252ad8..68ed06962537 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -234,7 +234,19 @@ out:
static int gfs2_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
- return mpage_writepages(mapping, wbc, gfs2_get_block_noalloc);
+ struct gfs2_sbd *sdp = gfs2_mapping2sbd(mapping);
+ int ret = mpage_writepages(mapping, wbc, gfs2_get_block_noalloc);
+
+ /*
+ * Even if we didn't write any pages here, we might still be holding
+ * dirty pages in the ail. We forcibly flush the ail because we don't
+ * want balance_dirty_pages() to loop indefinitely trying to write out
+ * pages held in the ail that it can't find.
+ */
+ if (ret == 0)
+ set_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags);
+
+ return ret;
}
/**
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 9fa3aef9a5b3..3dd0cceefa43 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -291,8 +291,9 @@ static void gfs2_metapath_ra(struct gfs2_glock *gl,
if (trylock_buffer(rabh)) {
if (!buffer_uptodate(rabh)) {
rabh->b_end_io = end_buffer_read_sync;
- submit_bh(REQ_OP_READ, REQ_RAHEAD | REQ_META,
- rabh);
+ submit_bh(REQ_OP_READ,
+ REQ_RAHEAD | REQ_META | REQ_PRIO,
+ rabh);
continue;
}
unlock_buffer(rabh);
@@ -1103,8 +1104,15 @@ static bool find_nonnull_ptr(struct gfs2_sbd *sdp, struct metapath *mp,
while (true) {
ptr = metapointer(h, mp);
- if (*ptr) /* if we have a non-null pointer */
+ if (*ptr) { /* if we have a non-null pointer */
+ /* Now zero the metapath after the current height. */
+ h++;
+ if (h < GFS2_MAX_META_HEIGHT)
+ memset(&mp->mp_list[h], 0,
+ (GFS2_MAX_META_HEIGHT - h) *
+ sizeof(mp->mp_list[0]));
return true;
+ }
if (mp->mp_list[h] < ptrs)
mp->mp_list[h]++;
@@ -1120,6 +1128,13 @@ enum dealloc_states {
DEALLOC_DONE = 3, /* process complete */
};
+static bool mp_eq_to_hgt(struct metapath *mp, __u16 *nbof, unsigned int h)
+{
+ if (memcmp(mp->mp_list, nbof, h * sizeof(mp->mp_list[0])))
+ return false;
+ return true;
+}
+
/**
* trunc_dealloc - truncate a file down to a desired size
* @ip: inode to truncate
@@ -1197,8 +1212,7 @@ static int trunc_dealloc(struct gfs2_inode *ip, u64 newsize)
/* If we're truncating to a non-zero size and the mp is
at the beginning of file for the strip height, we
need to preserve the first metadata pointer. */
- preserve1 = (newsize &&
- (mp.mp_list[mp_h] == nbof[mp_h]));
+ preserve1 = (newsize && mp_eq_to_hgt(&mp, nbof, mp_h));
bh = mp.mp_bh[mp_h];
gfs2_assert_withdraw(sdp, bh);
if (gfs2_assert_withdraw(sdp,
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 5ee2e2f8576c..06a0d1947c77 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -1513,7 +1513,9 @@ static void gfs2_dir_readahead(struct inode *inode, unsigned hsize, u32 index,
continue;
}
bh->b_end_io = end_buffer_read_sync;
- submit_bh(REQ_OP_READ, REQ_RAHEAD | REQ_META, bh);
+ submit_bh(REQ_OP_READ,
+ REQ_RAHEAD | REQ_META | REQ_PRIO,
+ bh);
continue;
}
brelse(bh);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index c2062a108d19..bb48074be019 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -1030,8 +1030,7 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl)
mutex_lock(&fp->f_fl_mutex);
- gl = fl_gh->gh_gl;
- if (gl) {
+ if (gfs2_holder_initialized(fl_gh)) {
if (fl_gh->gh_state == state)
goto out;
locks_lock_file_wait(file,
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index c38ab6c81898..98e845b7841b 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -15,6 +15,7 @@
#include <linux/buffer_head.h>
#include <linux/delay.h>
#include <linux/sort.h>
+#include <linux/hash.h>
#include <linux/jhash.h>
#include <linux/kallsyms.h>
#include <linux/gfs2_ondisk.h>
@@ -71,7 +72,7 @@ static DEFINE_SPINLOCK(lru_lock);
#define GFS2_GL_HASH_SHIFT 15
#define GFS2_GL_HASH_SIZE BIT(GFS2_GL_HASH_SHIFT)
-static struct rhashtable_params ht_parms = {
+static const struct rhashtable_params ht_parms = {
.nelem_hint = GFS2_GL_HASH_SIZE * 3 / 4,
.key_len = offsetofend(struct lm_lockname, ln_type),
.key_offset = offsetof(struct gfs2_glock, gl_name),
@@ -80,6 +81,49 @@ static struct rhashtable_params ht_parms = {
static struct rhashtable gl_hash_table;
+#define GLOCK_WAIT_TABLE_BITS 12
+#define GLOCK_WAIT_TABLE_SIZE (1 << GLOCK_WAIT_TABLE_BITS)
+static wait_queue_head_t glock_wait_table[GLOCK_WAIT_TABLE_SIZE] __cacheline_aligned;
+
+struct wait_glock_queue {
+ struct lm_lockname *name;
+ wait_queue_entry_t wait;
+};
+
+static int glock_wake_function(wait_queue_entry_t *wait, unsigned int mode,
+ int sync, void *key)
+{
+ struct wait_glock_queue *wait_glock =
+ container_of(wait, struct wait_glock_queue, wait);
+ struct lm_lockname *wait_name = wait_glock->name;
+ struct lm_lockname *wake_name = key;
+
+ if (wake_name->ln_sbd != wait_name->ln_sbd ||
+ wake_name->ln_number != wait_name->ln_number ||
+ wake_name->ln_type != wait_name->ln_type)
+ return 0;
+ return autoremove_wake_function(wait, mode, sync, key);
+}
+
+static wait_queue_head_t *glock_waitqueue(struct lm_lockname *name)
+{
+ u32 hash = jhash2((u32 *)name, sizeof(*name) / 4, 0);
+
+ return glock_wait_table + hash_32(hash, GLOCK_WAIT_TABLE_BITS);
+}
+
+/**
+ * wake_up_glock - Wake up waiters on a glock
+ * @gl: the glock
+ */
+static void wake_up_glock(struct gfs2_glock *gl)
+{
+ wait_queue_head_t *wq = glock_waitqueue(&gl->gl_name);
+
+ if (waitqueue_active(wq))
+ __wake_up(wq, TASK_NORMAL, 1, &gl->gl_name);
+}
+
static void gfs2_glock_dealloc(struct rcu_head *rcu)
{
struct gfs2_glock *gl = container_of(rcu, struct gfs2_glock, gl_rcu);
@@ -96,6 +140,9 @@ void gfs2_glock_free(struct gfs2_glock *gl)
{
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
+ rhashtable_remove_fast(&gl_hash_table, &gl->gl_node, ht_parms);
+ smp_mb();
+ wake_up_glock(gl);
call_rcu(&gl->gl_rcu, gfs2_glock_dealloc);
if (atomic_dec_and_test(&sdp->sd_glock_disposal))
wake_up(&sdp->sd_glock_wait);
@@ -107,7 +154,7 @@ void gfs2_glock_free(struct gfs2_glock *gl)
*
*/
-static void gfs2_glock_hold(struct gfs2_glock *gl)
+void gfs2_glock_hold(struct gfs2_glock *gl)
{
GLOCK_BUG_ON(gl, __lockref_is_dead(&gl->gl_lockref));
lockref_get(&gl->gl_lockref);
@@ -150,6 +197,9 @@ void gfs2_glock_add_to_lru(struct gfs2_glock *gl)
static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl)
{
+ if (!(gl->gl_ops->go_flags & GLOF_LRU))
+ return;
+
spin_lock(&lru_lock);
if (!list_empty(&gl->gl_lru)) {
list_del_init(&gl->gl_lru);
@@ -191,13 +241,20 @@ static void __gfs2_glock_put(struct gfs2_glock *gl)
gfs2_glock_remove_from_lru(gl);
spin_unlock(&gl->gl_lockref.lock);
- rhashtable_remove_fast(&gl_hash_table, &gl->gl_node, ht_parms);
GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders));
GLOCK_BUG_ON(gl, mapping && mapping->nrpages);
trace_gfs2_glock_put(gl);
sdp->sd_lockstruct.ls_ops->lm_put_lock(gl);
}
+/*
+ * Cause the glock to be put in work queue context.
+ */
+void gfs2_glock_queue_put(struct gfs2_glock *gl)
+{
+ gfs2_glock_queue_work(gl, 0);
+}
+
/**
* gfs2_glock_put() - Decrement reference count on glock
* @gl: The glock to put
@@ -676,6 +733,40 @@ static void glock_work_func(struct work_struct *work)
spin_unlock(&gl->gl_lockref.lock);
}
+static struct gfs2_glock *find_insert_glock(struct lm_lockname *name,
+ struct gfs2_glock *new)
+{
+ struct wait_glock_queue wait;
+ wait_queue_head_t *wq = glock_waitqueue(name);
+ struct gfs2_glock *gl;
+
+ wait.name = name;
+ init_wait(&wait.wait);
+ wait.wait.func = glock_wake_function;
+
+again:
+ prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
+ rcu_read_lock();
+ if (new) {
+ gl = rhashtable_lookup_get_insert_fast(&gl_hash_table,
+ &new->gl_node, ht_parms);
+ if (IS_ERR(gl))
+ goto out;
+ } else {
+ gl = rhashtable_lookup_fast(&gl_hash_table,
+ name, ht_parms);
+ }
+ if (gl && !lockref_get_not_dead(&gl->gl_lockref)) {
+ rcu_read_unlock();
+ schedule();
+ goto again;
+ }
+out:
+ rcu_read_unlock();
+ finish_wait(wq, &wait.wait);
+ return gl;
+}
+
/**
* gfs2_glock_get() - Get a glock, or create one if one doesn't exist
* @sdp: The GFS2 superblock
@@ -702,15 +793,11 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
struct kmem_cache *cachep;
int ret = 0;
- rcu_read_lock();
- gl = rhashtable_lookup_fast(&gl_hash_table, &name, ht_parms);
- if (gl && !lockref_get_not_dead(&gl->gl_lockref))
- gl = NULL;
- rcu_read_unlock();
-
- *glp = gl;
- if (gl)
+ gl = find_insert_glock(&name, NULL);
+ if (gl) {
+ *glp = gl;
return 0;
+ }
if (!create)
return -ENOENT;
@@ -764,10 +851,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
mapping->writeback_index = 0;
}
-again:
- rcu_read_lock();
- tmp = rhashtable_lookup_get_insert_fast(&gl_hash_table, &gl->gl_node,
- ht_parms);
+ tmp = find_insert_glock(&name, gl);
if (!tmp) {
*glp = gl;
goto out;
@@ -776,13 +860,7 @@ again:
ret = PTR_ERR(tmp);
goto out_free;
}
- if (lockref_get_not_dead(&tmp->gl_lockref)) {
- *glp = tmp;
- goto out_free;
- }
- rcu_read_unlock();
- cond_resched();
- goto again;
+ *glp = tmp;
out_free:
kfree(gl->gl_lksb.sb_lvbptr);
@@ -790,7 +868,6 @@ out_free:
atomic_dec(&sdp->sd_glock_disposal);
out:
- rcu_read_unlock();
return ret;
}
@@ -1473,14 +1550,15 @@ static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp)
do {
gl = ERR_PTR(rhashtable_walk_start(&iter));
- if (gl)
- continue;
+ if (IS_ERR(gl))
+ goto walk_stop;
while ((gl = rhashtable_walk_next(&iter)) && !IS_ERR(gl))
- if ((gl->gl_name.ln_sbd == sdp) &&
+ if (gl->gl_name.ln_sbd == sdp &&
lockref_get_not_dead(&gl->gl_lockref))
examiner(gl);
+walk_stop:
rhashtable_walk_stop(&iter);
} while (cond_resched(), gl == ERR_PTR(-EAGAIN));
@@ -1803,7 +1881,7 @@ static int gfs2_sbstats_seq_show(struct seq_file *seq, void *iter_ptr)
int __init gfs2_glock_init(void)
{
- int ret;
+ int i, ret;
ret = rhashtable_init(&gl_hash_table, &ht_parms);
if (ret < 0)
@@ -1832,6 +1910,9 @@ int __init gfs2_glock_init(void)
return ret;
}
+ for (i = 0; i < GLOCK_WAIT_TABLE_SIZE; i++)
+ init_waitqueue_head(glock_wait_table + i);
+
return 0;
}
@@ -1860,6 +1941,7 @@ static void gfs2_glock_iter_next(struct gfs2_glock_iter *gi)
}
static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos)
+ __acquires(RCU)
{
struct gfs2_glock_iter *gi = seq->private;
loff_t n = *pos;
@@ -1892,6 +1974,7 @@ static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr,
}
static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr)
+ __releases(RCU)
{
struct gfs2_glock_iter *gi = seq->private;
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 9ad4a6ac6c84..5e12220cc0c2 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -13,6 +13,7 @@
#include <linux/sched.h>
#include <linux/parser.h>
#include "incore.h"
+#include "util.h"
/* Options for hostdata parser */
@@ -181,7 +182,9 @@ static inline struct address_space *gfs2_glock2aspace(struct gfs2_glock *gl)
extern int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
const struct gfs2_glock_operations *glops,
int create, struct gfs2_glock **glp);
+extern void gfs2_glock_hold(struct gfs2_glock *gl);
extern void gfs2_glock_put(struct gfs2_glock *gl);
+extern void gfs2_glock_queue_put(struct gfs2_glock *gl);
extern void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state,
u16 flags, struct gfs2_holder *gh);
extern void gfs2_holder_reinit(unsigned int state, u16 flags,
@@ -257,11 +260,44 @@ static inline bool gfs2_holder_initialized(struct gfs2_holder *gh)
return gh->gh_gl;
}
+/**
+ * glock_set_object - set the gl_object field of a glock
+ * @gl: the glock
+ * @object: the object
+ */
static inline void glock_set_object(struct gfs2_glock *gl, void *object)
{
spin_lock(&gl->gl_lockref.lock);
+ if (gfs2_assert_warn(gl->gl_name.ln_sbd, gl->gl_object == NULL))
+ gfs2_dump_glock(NULL, gl);
gl->gl_object = object;
spin_unlock(&gl->gl_lockref.lock);
}
+/**
+ * glock_clear_object - clear the gl_object field of a glock
+ * @gl: the glock
+ * @object: the object
+ *
+ * I'd love to similarly add this:
+ * else if (gfs2_assert_warn(gl->gl_sbd, gl->gl_object == object))
+ * gfs2_dump_glock(NULL, gl);
+ * Unfortunately, that's not possible because as soon as gfs2_delete_inode
+ * frees the block in the rgrp, another process can reassign it for an I_NEW
+ * inode in gfs2_create_inode because that calls new_inode, not gfs2_iget.
+ * That means gfs2_delete_inode may subsequently try to call this function
+ * for a glock that's already pointing to a brand new inode. If we clear the
+ * new inode's gl_object, we'll introduce metadata corruption. Function
+ * gfs2_delete_inode calls clear_inode which calls gfs2_clear_inode which also
+ * tries to clear gl_object, so it's more than just gfs2_delete_inode.
+ *
+ */
+static inline void glock_clear_object(struct gfs2_glock *gl, void *object)
+{
+ spin_lock(&gl->gl_lockref.lock);
+ if (gl->gl_object == object)
+ gl->gl_object = NULL;
+ spin_unlock(&gl->gl_lockref.lock);
+}
+
#endif /* __GLOCK_DOT_H__ */
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 5e69636d4dd3..dac6559e2195 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -329,32 +329,6 @@ static int inode_go_demote_ok(const struct gfs2_glock *gl)
return 1;
}
-/**
- * gfs2_set_nlink - Set the inode's link count based on on-disk info
- * @inode: The inode in question
- * @nlink: The link count
- *
- * If the link count has hit zero, it must never be raised, whatever the
- * on-disk inode might say. When new struct inodes are created the link
- * count is set to 1, so that we can safely use this test even when reading
- * in on disk information for the first time.
- */
-
-static void gfs2_set_nlink(struct inode *inode, u32 nlink)
-{
- /*
- * We will need to review setting the nlink count here in the
- * light of the forthcoming ro bind mount work. This is a reminder
- * to do that.
- */
- if ((inode->i_nlink != nlink) && (inode->i_nlink != 0)) {
- if (nlink == 0)
- clear_nlink(inode);
- else
- set_nlink(inode, nlink);
- }
-}
-
static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
{
const struct gfs2_dinode *str = buf;
@@ -376,7 +350,7 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
i_uid_write(&ip->i_inode, be32_to_cpu(str->di_uid));
i_gid_write(&ip->i_inode, be32_to_cpu(str->di_gid));
- gfs2_set_nlink(&ip->i_inode, be32_to_cpu(str->di_nlink));
+ set_nlink(&ip->i_inode, be32_to_cpu(str->di_nlink));
i_size_write(&ip->i_inode, be64_to_cpu(str->di_size));
gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks));
atime.tv_sec = be64_to_cpu(str->di_atime);
@@ -470,7 +444,7 @@ static int inode_go_lock(struct gfs2_holder *gh)
(gh->gh_state == LM_ST_EXCLUSIVE)) {
spin_lock(&sdp->sd_trunc_lock);
if (list_empty(&ip->i_trunc_list))
- list_add(&sdp->sd_trunc_list, &ip->i_trunc_list);
+ list_add(&ip->i_trunc_list, &sdp->sd_trunc_list);
spin_unlock(&sdp->sd_trunc_lock);
wake_up(&sdp->sd_quota_wait);
return 1;
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 73fce76e67ee..6e18e9793ec4 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -606,6 +606,7 @@ enum {
SDF_NOJOURNALID = 6,
SDF_RORECOVERY = 7, /* read only recovery */
SDF_SKIP_DLM_UNLOCK = 8,
+ SDF_FORCE_AIL_FLUSH = 9,
};
enum gfs2_freeze_state {
@@ -816,6 +817,7 @@ struct gfs2_sbd {
atomic_t sd_log_in_flight;
struct bio *sd_log_bio;
wait_queue_head_t sd_log_flush_wait;
+ int sd_log_error;
atomic_t sd_reserving_log;
wait_queue_head_t sd_reserving_log_wait;
@@ -831,7 +833,7 @@ struct gfs2_sbd {
atomic_t sd_freeze_state;
struct mutex sd_freeze_mutex;
- char sd_fsname[GFS2_FSNAME_LEN];
+ char sd_fsname[GFS2_FSNAME_LEN + 3 * sizeof(int) + 2];
char sd_table_name[GFS2_FSNAME_LEN];
char sd_proto_name[GFS2_FSNAME_LEN];
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index acca501f8110..863749e29bf9 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -109,7 +109,7 @@ static void gfs2_set_iop(struct inode *inode)
* @no_addr: The inode number
* @no_formal_ino: The inode generation number
* @blktype: Requested block type (GFS2_BLKST_DINODE or GFS2_BLKST_UNLINKED;
- * GFS2_BLKST_FREE do indicate not to verify)
+ * GFS2_BLKST_FREE to indicate not to verify)
*
* If @type is DT_UNKNOWN, the inode type is fetched from disk.
*
@@ -145,7 +145,6 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
if (unlikely(error))
goto fail;
flush_delayed_work(&ip->i_gl->gl_work);
- glock_set_object(ip->i_gl, ip);
error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, &io_gl);
if (unlikely(error))
@@ -170,11 +169,11 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
}
}
+ glock_set_object(ip->i_gl, ip);
set_bit(GIF_INVALID, &ip->i_flags);
error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh);
if (unlikely(error))
goto fail_put;
- flush_delayed_work(&ip->i_iopen_gh.gh_gl->gl_work);
glock_set_object(ip->i_iopen_gh.gh_gl, ip);
gfs2_glock_put(io_gl);
io_gl = NULL;
@@ -202,14 +201,14 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
fail_refresh:
ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
- glock_set_object(ip->i_iopen_gh.gh_gl, NULL);
+ glock_clear_object(ip->i_iopen_gh.gh_gl, ip);
gfs2_glock_dq_uninit(&ip->i_iopen_gh);
fail_put:
if (io_gl)
gfs2_glock_put(io_gl);
+ glock_clear_object(ip->i_gl, ip);
if (gfs2_holder_initialized(&i_gh))
gfs2_glock_dq_uninit(&i_gh);
- glock_set_object(ip->i_gl, NULL);
fail:
iget_failed(inode);
return ERR_PTR(error);
@@ -706,8 +705,9 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
error = gfs2_glock_get(sdp, ip->i_no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl);
if (error)
goto fail_free_inode;
-
+ flush_delayed_work(&ip->i_gl->gl_work);
glock_set_object(ip->i_gl, ip);
+
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1);
if (error)
goto fail_free_inode;
@@ -775,14 +775,17 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
return error;
fail_gunlock3:
+ glock_clear_object(io_gl, ip);
gfs2_glock_dq_uninit(&ip->i_iopen_gh);
gfs2_glock_put(io_gl);
fail_gunlock2:
if (io_gl)
clear_bit(GLF_INODE_CREATING, &io_gl->gl_flags);
fail_free_inode:
- if (ip->i_gl)
+ if (ip->i_gl) {
+ glock_clear_object(ip->i_gl, ip);
gfs2_glock_put(ip->i_gl);
+ }
gfs2_rsqa_delete(ip, NULL);
fail_free_acls:
if (default_acl)
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 0515f0a68637..65f33a0ac190 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -23,8 +23,6 @@
#include "sys.h"
#include "trace_gfs2.h"
-extern struct workqueue_struct *gfs2_control_wq;
-
/**
* gfs2_update_stats - Update time based stats
* @mv: Pointer to mean/variance structure to update
@@ -1059,6 +1057,7 @@ static void free_recover_size(struct lm_lockstruct *ls)
ls->ls_recover_submit = NULL;
ls->ls_recover_result = NULL;
ls->ls_recover_size = 0;
+ ls->ls_lvb_bits = NULL;
}
/* dlm calls before it does lock recovery */
@@ -1175,7 +1174,7 @@ static void gdlm_recovery_result(struct gfs2_sbd *sdp, unsigned int jid,
spin_unlock(&ls->ls_recover_spin);
}
-const struct dlm_lockspace_ops gdlm_lockspace_ops = {
+static const struct dlm_lockspace_ops gdlm_lockspace_ops = {
.recover_prep = gdlm_recover_prep,
.recover_slot = gdlm_recover_slot,
.recover_done = gdlm_recover_done,
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 9a624f694400..f72c44231406 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -898,6 +898,10 @@ static inline int gfs2_jrnl_flush_reqd(struct gfs2_sbd *sdp)
static inline int gfs2_ail_flush_reqd(struct gfs2_sbd *sdp)
{
unsigned int used_blocks = sdp->sd_jdesc->jd_blocks - atomic_read(&sdp->sd_log_blks_free);
+
+ if (test_and_clear_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags))
+ return 1;
+
return used_blocks + atomic_read(&sdp->sd_log_blks_needed) >=
atomic_read(&sdp->sd_log_thresh2);
}
@@ -919,6 +923,15 @@ int gfs2_logd(void *data)
while (!kthread_should_stop()) {
+ /* Check for errors writing to the journal */
+ if (sdp->sd_log_error) {
+ gfs2_lm_withdraw(sdp,
+ "GFS2: fsid=%s: error %d: "
+ "withdrawing the file system to "
+ "prevent further damage.\n",
+ sdp->sd_fsname, sdp->sd_log_error);
+ }
+
did_flush = false;
if (gfs2_jrnl_flush_reqd(sdp) || t == 0) {
gfs2_ail1_empty(sdp);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 3010f9edd177..7dabbe721dba 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -207,8 +207,11 @@ static void gfs2_end_log_write(struct bio *bio)
struct page *page;
int i;
- if (bio->bi_status)
- fs_err(sdp, "Error %d writing to log\n", bio->bi_status);
+ if (bio->bi_status) {
+ fs_err(sdp, "Error %d writing to journal, jid=%u\n",
+ bio->bi_status, sdp->sd_jdesc->jd_jid);
+ wake_up(&sdp->sd_logd_waitq);
+ }
bio_for_each_segment_all(bvec, bio, i) {
page = bvec->bv_page;
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index fabe1614f879..61ef6c9be816 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -419,8 +419,9 @@ int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num,
if (ret == 0 && gfs2_metatype_check(sdp, bh, mtype)) {
brelse(bh);
ret = -EIO;
+ } else {
+ *bhp = bh;
}
- *bhp = bh;
return ret;
}
@@ -452,7 +453,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen)
if (buffer_uptodate(first_bh))
goto out;
if (!buffer_locked(first_bh))
- ll_rw_block(REQ_OP_READ, REQ_META, 1, &first_bh);
+ ll_rw_block(REQ_OP_READ, REQ_META | REQ_PRIO, 1, &first_bh);
dblock++;
extlen--;
@@ -461,7 +462,9 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen)
bh = gfs2_getbuf(gl, dblock, CREATE);
if (!buffer_uptodate(bh) && !buffer_locked(bh))
- ll_rw_block(REQ_OP_READ, REQ_RAHEAD | REQ_META, 1, &bh);
+ ll_rw_block(REQ_OP_READ,
+ REQ_RAHEAD | REQ_META | REQ_PRIO,
+ 1, &bh);
brelse(bh);
dblock++;
extlen--;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index e76058d34b74..c0a4b3778f3f 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -1113,7 +1113,7 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent
return error;
}
- snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s", sdp->sd_table_name);
+ snprintf(sdp->sd_fsname, sizeof(sdp->sd_fsname), "%s", sdp->sd_table_name);
error = gfs2_sys_fs_add(sdp);
/*
@@ -1159,10 +1159,10 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent
}
if (sdp->sd_args.ar_spectator)
- snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.s",
+ snprintf(sdp->sd_fsname, sizeof(sdp->sd_fsname), "%s.s",
sdp->sd_table_name);
else
- snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.%u",
+ snprintf(sdp->sd_fsname, sizeof(sdp->sd_fsname), "%s.%u",
sdp->sd_table_name, sdp->sd_lockstruct.ls_jid);
error = init_inodes(sdp, DO);
@@ -1388,7 +1388,6 @@ static void gfs2_kill_sb(struct super_block *sb)
sdp->sd_root_dir = NULL;
sdp->sd_master_dir = NULL;
shrink_dcache_sb(sb);
- gfs2_delete_debugfs_file(sdp);
free_percpu(sdp->sd_lkstats);
kill_block_super(sb);
}
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index c2ca9566b764..e647938432bd 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -730,7 +730,7 @@ static int gfs2_write_buf_to_page(struct gfs2_inode *ip, unsigned long index,
if (PageUptodate(page))
set_buffer_uptodate(bh);
if (!buffer_uptodate(bh)) {
- ll_rw_block(REQ_OP_READ, REQ_META, 1, &bh);
+ ll_rw_block(REQ_OP_READ, REQ_META | REQ_PRIO, 1, &bh);
wait_on_buffer(bh);
if (!buffer_uptodate(bh))
goto unlock_out;
@@ -1474,8 +1474,11 @@ static void quotad_error(struct gfs2_sbd *sdp, const char *msg, int error)
{
if (error == 0 || error == -EROFS)
return;
- if (!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
+ if (!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) {
fs_err(sdp, "gfs2_quotad: %s error %d\n", msg, error);
+ sdp->sd_log_error = error;
+ wake_up(&sdp->sd_logd_waitq);
+ }
}
static void quotad_check_timeo(struct gfs2_sbd *sdp, const char *msg,
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 836e38ba5d0a..95b2a57ded33 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -705,8 +705,7 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
rb_erase(n, &sdp->sd_rindex_tree);
if (gl) {
- glock_set_object(gl, NULL);
- gfs2_glock_add_to_lru(gl);
+ glock_clear_object(gl, rgd);
gfs2_glock_put(gl);
}
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index fdedec379b78..769841185ce5 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -924,6 +924,7 @@ restart:
gfs2_jindex_free(sdp);
/* Take apart glock structures and buffer lists */
gfs2_gl_hash_clear(sdp);
+ gfs2_delete_debugfs_file(sdp);
/* Unmount the locking protocol */
gfs2_lm_unmount(sdp);
@@ -943,9 +944,9 @@ static int gfs2_sync_fs(struct super_block *sb, int wait)
struct gfs2_sbd *sdp = sb->s_fs_info;
gfs2_quota_sync(sb, -1);
- if (wait && sdp)
+ if (wait)
gfs2_log_flush(sdp, NULL, NORMAL_FLUSH);
- return 0;
+ return sdp->sd_log_error;
}
void gfs2_freeze_func(struct work_struct *work)
@@ -1295,7 +1296,7 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)
* gfs2_drop_inode - Drop an inode (test for remote unlink)
* @inode: The inode to drop
*
- * If we've received a callback on an iopen lock then its because a
+ * If we've received a callback on an iopen lock then it's because a
* remote node tried to deallocate the inode but failed due to this node
* still having the inode open. Here we mark the link count zero
* since we know that it must have reached zero if the GLF_DEMOTE flag
@@ -1317,6 +1318,23 @@ static int gfs2_drop_inode(struct inode *inode)
if (test_bit(GLF_DEMOTE, &gl->gl_flags))
clear_nlink(inode);
}
+
+ /*
+ * When under memory pressure when an inode's link count has dropped to
+ * zero, defer deleting the inode to the delete workqueue. This avoids
+ * calling into DLM under memory pressure, which can deadlock.
+ */
+ if (!inode->i_nlink &&
+ unlikely(current->flags & PF_MEMALLOC) &&
+ gfs2_holder_initialized(&ip->i_iopen_gh)) {
+ struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl;
+
+ gfs2_glock_hold(gl);
+ if (queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0)
+ gfs2_glock_queue_put(gl);
+ return false;
+ }
+
return generic_drop_inode(inode);
}
@@ -1501,6 +1519,22 @@ out_qs:
}
/**
+ * gfs2_glock_put_eventually
+ * @gl: The glock to put
+ *
+ * When under memory pressure, trigger a deferred glock put to make sure we
+ * won't call into DLM and deadlock. Otherwise, put the glock directly.
+ */
+
+static void gfs2_glock_put_eventually(struct gfs2_glock *gl)
+{
+ if (current->flags & PF_MEMALLOC)
+ gfs2_glock_queue_put(gl);
+ else
+ gfs2_glock_put(gl);
+}
+
+/**
* gfs2_evict_inode - Remove an inode from cache
* @inode: The inode to evict
*
@@ -1544,9 +1578,14 @@ static void gfs2_evict_inode(struct inode *inode)
goto alloc_failed;
}
+ /* Deletes should never happen under memory pressure anymore. */
+ if (WARN_ON_ONCE(current->flags & PF_MEMALLOC))
+ goto out;
+
/* Must not read inode block until block type has been verified */
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, &gh);
if (unlikely(error)) {
+ glock_clear_object(ip->i_iopen_gh.gh_gl, ip);
ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
gfs2_glock_dq_uninit(&ip->i_iopen_gh);
goto out;
@@ -1562,6 +1601,12 @@ static void gfs2_evict_inode(struct inode *inode)
goto out_truncate;
}
+ /*
+ * The inode may have been recreated in the meantime.
+ */
+ if (inode->i_nlink)
+ goto out_truncate;
+
alloc_failed:
if (gfs2_holder_initialized(&ip->i_iopen_gh) &&
test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) {
@@ -1595,6 +1640,11 @@ alloc_failed:
goto out_unlock;
}
+ /* We're about to clear the bitmap for the dinode, but as soon as we
+ do, gfs2_create_inode can create another inode at the same block
+ location and try to set gl_object again. We clear gl_object here so
+ that subsequent inode creates don't see an old gl_object. */
+ glock_clear_object(ip->i_gl, ip);
error = gfs2_dinode_dealloc(ip);
goto out_unlock;
@@ -1623,14 +1673,17 @@ out_unlock:
gfs2_rs_deltree(&ip->i_res);
if (gfs2_holder_initialized(&ip->i_iopen_gh)) {
+ glock_clear_object(ip->i_iopen_gh.gh_gl, ip);
if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) {
ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
gfs2_glock_dq(&ip->i_iopen_gh);
}
gfs2_holder_uninit(&ip->i_iopen_gh);
}
- if (gfs2_holder_initialized(&gh))
+ if (gfs2_holder_initialized(&gh)) {
+ glock_clear_object(ip->i_gl, ip);
gfs2_glock_dq_uninit(&gh);
+ }
if (error && error != GLR_TRYFAILED && error != -EROFS)
fs_warn(sdp, "gfs2_evict_inode: %d\n", error);
out:
@@ -1640,15 +1693,19 @@ out:
gfs2_ordered_del_inode(ip);
clear_inode(inode);
gfs2_dir_hash_inval(ip);
- glock_set_object(ip->i_gl, NULL);
+ glock_clear_object(ip->i_gl, ip);
wait_on_bit_io(&ip->i_flags, GIF_GLOP_PENDING, TASK_UNINTERRUPTIBLE);
gfs2_glock_add_to_lru(ip->i_gl);
- gfs2_glock_put(ip->i_gl);
+ gfs2_glock_put_eventually(ip->i_gl);
ip->i_gl = NULL;
if (gfs2_holder_initialized(&ip->i_iopen_gh)) {
- glock_set_object(ip->i_iopen_gh.gh_gl, NULL);
+ struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl;
+
+ glock_clear_object(gl, ip);
ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
+ gfs2_glock_hold(gl);
gfs2_glock_dq_uninit(&ip->i_iopen_gh);
+ gfs2_glock_put_eventually(gl);
}
}
diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h
index c81295f407f6..3926f95a6eb7 100644
--- a/fs/gfs2/util.h
+++ b/fs/gfs2/util.h
@@ -151,6 +151,7 @@ extern struct kmem_cache *gfs2_rgrpd_cachep;
extern struct kmem_cache *gfs2_quotad_cachep;
extern struct kmem_cache *gfs2_qadata_cachep;
extern mempool_t *gfs2_page_pool;
+extern struct workqueue_struct *gfs2_control_wq;
static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt,
unsigned int *p)
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 54179554c7d2..ea09e41dbb49 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -25,6 +25,7 @@
#include "meta_io.h"
#include "quota.h"
#include "rgrp.h"
+#include "super.h"
#include "trans.h"
#include "util.h"
@@ -1209,8 +1210,12 @@ int __gfs2_xattr_set(struct inode *inode, const char *name,
if (namel > GFS2_EA_MAX_NAME_LEN)
return -ERANGE;
- if (value == NULL)
- return gfs2_xattr_remove(ip, type, name);
+ if (value == NULL) {
+ error = gfs2_xattr_remove(ip, type, name);
+ if (error == -ENODATA && !(flags & XATTR_REPLACE))
+ error = 0;
+ return error;
+ }
if (ea_check_size(sdp, namel, size))
return -ERANGE;