diff options
Diffstat (limited to 'fs/ocfs2')
-rw-r--r-- | fs/ocfs2/acl.c | 29 | ||||
-rw-r--r-- | fs/ocfs2/alloc.c | 1 | ||||
-rw-r--r-- | fs/ocfs2/aops.c | 2 | ||||
-rw-r--r-- | fs/ocfs2/cluster/tcp.c | 3 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmdomain.c | 1 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmmaster.c | 2 | ||||
-rw-r--r-- | fs/ocfs2/dlmfs/userdlm.c | 1 | ||||
-rw-r--r-- | fs/ocfs2/dlmglue.c | 106 | ||||
-rw-r--r-- | fs/ocfs2/dlmglue.h | 18 | ||||
-rw-r--r-- | fs/ocfs2/file.c | 71 | ||||
-rw-r--r-- | fs/ocfs2/file.h | 4 | ||||
-rw-r--r-- | fs/ocfs2/mmap.c | 15 | ||||
-rw-r--r-- | fs/ocfs2/ocfs2.h | 1 | ||||
-rw-r--r-- | fs/ocfs2/super.c | 1 |
14 files changed, 204 insertions, 51 deletions
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c index bed1fcb63088..dc22ba8c710f 100644 --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c @@ -283,16 +283,14 @@ int ocfs2_set_acl(handle_t *handle, int ocfs2_iop_set_acl(struct inode *inode, struct posix_acl *acl, int type) { struct buffer_head *bh = NULL; - int status = 0; + int status, had_lock; + struct ocfs2_lock_holder oh; - status = ocfs2_inode_lock(inode, &bh, 1); - if (status < 0) { - if (status != -ENOENT) - mlog_errno(status); - return status; - } + had_lock = ocfs2_inode_lock_tracker(inode, &bh, 1, &oh); + if (had_lock < 0) + return had_lock; status = ocfs2_set_acl(NULL, inode, bh, type, acl, NULL, NULL); - ocfs2_inode_unlock(inode, 1); + ocfs2_inode_unlock_tracker(inode, 1, &oh, had_lock); brelse(bh); return status; } @@ -302,21 +300,20 @@ struct posix_acl *ocfs2_iop_get_acl(struct inode *inode, int type) struct ocfs2_super *osb; struct buffer_head *di_bh = NULL; struct posix_acl *acl; - int ret; + int had_lock; + struct ocfs2_lock_holder oh; osb = OCFS2_SB(inode->i_sb); if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) return NULL; - ret = ocfs2_inode_lock(inode, &di_bh, 0); - if (ret < 0) { - if (ret != -ENOENT) - mlog_errno(ret); - return ERR_PTR(ret); - } + + had_lock = ocfs2_inode_lock_tracker(inode, &di_bh, 0, &oh); + if (had_lock < 0) + return ERR_PTR(had_lock); acl = ocfs2_get_acl_nolock(inode, type, di_bh); - ocfs2_inode_unlock(inode, 0); + ocfs2_inode_unlock_tracker(inode, 0, &oh, had_lock); brelse(di_bh); return acl; } diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index d4ec0d8961a6..fb15a96df0b6 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -30,6 +30,7 @@ #include <linux/swap.h> #include <linux/quotaops.h> #include <linux/blkdev.h> +#include <linux/sched/signal.h> #include <cluster/masklog.h> diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 11556b7d93ec..88a31e9340a0 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -608,7 +608,7 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno, int ret = 0; struct buffer_head *head, *bh, *wait[2], **wait_bh = wait; unsigned int block_end, block_start; - unsigned int bsize = 1 << inode->i_blkbits; + unsigned int bsize = i_blocksize(inode); if (!page_has_buffers(page)) create_empty_buffers(page, bsize, 0); diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index ec000575e863..d0ab7e56d0b4 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -54,6 +54,7 @@ */ #include <linux/kernel.h> +#include <linux/sched/mm.h> #include <linux/jiffies.h> #include <linux/slab.h> #include <linux/idr.h> @@ -1862,7 +1863,7 @@ static int o2net_accept_one(struct socket *sock, int *more) new_sock->type = sock->type; new_sock->ops = sock->ops; - ret = sock->ops->accept(sock, new_sock, O_NONBLOCK); + ret = sock->ops->accept(sock, new_sock, O_NONBLOCK, false); if (ret < 0) goto out; diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 32fd261ae13d..a2b19fbdcf46 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -33,6 +33,7 @@ #include <linux/delay.h> #include <linux/err.h> #include <linux/debugfs.h> +#include <linux/sched/signal.h> #include "cluster/heartbeat.h" #include "cluster/nodemanager.h" diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 7025d8c27999..3e04279446e8 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -2924,7 +2924,7 @@ again: /* * if target is down, we need to clear DLM_LOCK_RES_BLOCK_DIRTY for * another try; otherwise, we are sure the MIGRATING state is there, - * drop the unneded state which blocked threads trying to DIRTY + * drop the unneeded state which blocked threads trying to DIRTY */ spin_lock(&res->spinlock); BUG_ON(!(res->state & DLM_LOCK_RES_BLOCK_DIRTY)); diff --git a/fs/ocfs2/dlmfs/userdlm.c b/fs/ocfs2/dlmfs/userdlm.c index f70cda2f090d..9cecf4857195 100644 --- a/fs/ocfs2/dlmfs/userdlm.c +++ b/fs/ocfs2/dlmfs/userdlm.c @@ -28,6 +28,7 @@ */ #include <linux/signal.h> +#include <linux/sched/signal.h> #include <linux/module.h> #include <linux/fs.h> diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 77d1632e905d..3b7c937a36b5 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -33,6 +33,7 @@ #include <linux/seq_file.h> #include <linux/time.h> #include <linux/quotaops.h> +#include <linux/sched/signal.h> #define MLOG_MASK_PREFIX ML_DLM_GLUE #include <cluster/masklog.h> @@ -532,6 +533,7 @@ void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res) init_waitqueue_head(&res->l_event); INIT_LIST_HEAD(&res->l_blocked_list); INIT_LIST_HEAD(&res->l_mask_waiters); + INIT_LIST_HEAD(&res->l_holders); } void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, @@ -749,6 +751,50 @@ void ocfs2_lock_res_free(struct ocfs2_lock_res *res) res->l_flags = 0UL; } +/* + * Keep a list of processes who have interest in a lockres. + * Note: this is now only uesed for check recursive cluster locking. + */ +static inline void ocfs2_add_holder(struct ocfs2_lock_res *lockres, + struct ocfs2_lock_holder *oh) +{ + INIT_LIST_HEAD(&oh->oh_list); + oh->oh_owner_pid = get_pid(task_pid(current)); + + spin_lock(&lockres->l_lock); + list_add_tail(&oh->oh_list, &lockres->l_holders); + spin_unlock(&lockres->l_lock); +} + +static inline void ocfs2_remove_holder(struct ocfs2_lock_res *lockres, + struct ocfs2_lock_holder *oh) +{ + spin_lock(&lockres->l_lock); + list_del(&oh->oh_list); + spin_unlock(&lockres->l_lock); + + put_pid(oh->oh_owner_pid); +} + +static inline int ocfs2_is_locked_by_me(struct ocfs2_lock_res *lockres) +{ + struct ocfs2_lock_holder *oh; + struct pid *pid; + + /* look in the list of holders for one with the current task as owner */ + spin_lock(&lockres->l_lock); + pid = task_pid(current); + list_for_each_entry(oh, &lockres->l_holders, oh_list) { + if (oh->oh_owner_pid == pid) { + spin_unlock(&lockres->l_lock); + return 1; + } + } + spin_unlock(&lockres->l_lock); + + return 0; +} + static inline void ocfs2_inc_holders(struct ocfs2_lock_res *lockres, int level) { @@ -2333,8 +2379,9 @@ int ocfs2_inode_lock_full_nested(struct inode *inode, goto getbh; } - if (ocfs2_mount_local(osb)) - goto local; + if ((arg_flags & OCFS2_META_LOCK_GETBH) || + ocfs2_mount_local(osb)) + goto update; if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) ocfs2_wait_for_recovery(osb); @@ -2363,7 +2410,7 @@ int ocfs2_inode_lock_full_nested(struct inode *inode, if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) ocfs2_wait_for_recovery(osb); -local: +update: /* * We only see this flag if we're being called from * ocfs2_read_locked_inode(). It means we're locking an inode @@ -2497,6 +2544,59 @@ void ocfs2_inode_unlock(struct inode *inode, ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); } +/* + * This _tracker variantes are introduced to deal with the recursive cluster + * locking issue. The idea is to keep track of a lock holder on the stack of + * the current process. If there's a lock holder on the stack, we know the + * task context is already protected by cluster locking. Currently, they're + * used in some VFS entry routines. + * + * return < 0 on error, return == 0 if there's no lock holder on the stack + * before this call, return == 1 if this call would be a recursive locking. + */ +int ocfs2_inode_lock_tracker(struct inode *inode, + struct buffer_head **ret_bh, + int ex, + struct ocfs2_lock_holder *oh) +{ + int status; + int arg_flags = 0, has_locked; + struct ocfs2_lock_res *lockres; + + lockres = &OCFS2_I(inode)->ip_inode_lockres; + has_locked = ocfs2_is_locked_by_me(lockres); + /* Just get buffer head if the cluster lock has been taken */ + if (has_locked) + arg_flags = OCFS2_META_LOCK_GETBH; + + if (likely(!has_locked || ret_bh)) { + status = ocfs2_inode_lock_full(inode, ret_bh, ex, arg_flags); + if (status < 0) { + if (status != -ENOENT) + mlog_errno(status); + return status; + } + } + if (!has_locked) + ocfs2_add_holder(lockres, oh); + + return has_locked; +} + +void ocfs2_inode_unlock_tracker(struct inode *inode, + int ex, + struct ocfs2_lock_holder *oh, + int had_lock) +{ + struct ocfs2_lock_res *lockres; + + lockres = &OCFS2_I(inode)->ip_inode_lockres; + if (!had_lock) { + ocfs2_remove_holder(lockres, oh); + ocfs2_inode_unlock(inode, ex); + } +} + int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno) { struct ocfs2_lock_res *lockres; diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h index d293a22c32c5..a7fc18ba0dc1 100644 --- a/fs/ocfs2/dlmglue.h +++ b/fs/ocfs2/dlmglue.h @@ -70,6 +70,11 @@ struct ocfs2_orphan_scan_lvb { __be32 lvb_os_seqno; }; +struct ocfs2_lock_holder { + struct list_head oh_list; + struct pid *oh_owner_pid; +}; + /* ocfs2_inode_lock_full() 'arg_flags' flags */ /* don't wait on recovery. */ #define OCFS2_META_LOCK_RECOVERY (0x01) @@ -77,6 +82,8 @@ struct ocfs2_orphan_scan_lvb { #define OCFS2_META_LOCK_NOQUEUE (0x02) /* don't block waiting for the downconvert thread, instead return -EAGAIN */ #define OCFS2_LOCK_NONBLOCK (0x04) +/* just get back disk inode bh if we've got cluster lock. */ +#define OCFS2_META_LOCK_GETBH (0x08) /* Locking subclasses of inode cluster lock */ enum { @@ -170,4 +177,15 @@ void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug); /* To set the locking protocol on module initialization */ void ocfs2_set_locking_protocol(void); + +/* The _tracker pair is used to avoid cluster recursive locking */ +int ocfs2_inode_lock_tracker(struct inode *inode, + struct buffer_head **ret_bh, + int ex, + struct ocfs2_lock_holder *oh); +void ocfs2_inode_unlock_tracker(struct inode *inode, + int ex, + struct ocfs2_lock_holder *oh, + int had_lock); + #endif /* DLMGLUE_H */ diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index c4889655d32b..bfeb647459d9 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -808,7 +808,7 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from, /* We know that zero_from is block aligned */ for (block_start = zero_from; block_start < zero_to; block_start = block_end) { - block_end = block_start + (1 << inode->i_blkbits); + block_end = block_start + i_blocksize(inode); /* * block_start is block-aligned. Bump it by one to force @@ -1138,6 +1138,8 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) handle_t *handle = NULL; struct dquot *transfer_to[MAXQUOTAS] = { }; int qtype; + int had_lock; + struct ocfs2_lock_holder oh; trace_ocfs2_setattr(inode, dentry, (unsigned long long)OCFS2_I(inode)->ip_blkno, @@ -1173,11 +1175,30 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) } } - status = ocfs2_inode_lock(inode, &bh, 1); - if (status < 0) { - if (status != -ENOENT) - mlog_errno(status); + had_lock = ocfs2_inode_lock_tracker(inode, &bh, 1, &oh); + if (had_lock < 0) { + status = had_lock; goto bail_unlock_rw; + } else if (had_lock) { + /* + * As far as we know, ocfs2_setattr() could only be the first + * VFS entry point in the call chain of recursive cluster + * locking issue. + * + * For instance: + * chmod_common() + * notify_change() + * ocfs2_setattr() + * posix_acl_chmod() + * ocfs2_iop_get_acl() + * + * But, we're not 100% sure if it's always true, because the + * ordering of the VFS entry points in the call chain is out + * of our control. So, we'd better dump the stack here to + * catch the other cases of recursive locking. + */ + mlog(ML_ERROR, "Another case of recursive locking:\n"); + dump_stack(); } inode_locked = 1; @@ -1260,8 +1281,8 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) bail_commit: ocfs2_commit_trans(osb, handle); bail_unlock: - if (status) { - ocfs2_inode_unlock(inode, 1); + if (status && inode_locked) { + ocfs2_inode_unlock_tracker(inode, 1, &oh, had_lock); inode_locked = 0; } bail_unlock_rw: @@ -1279,22 +1300,21 @@ bail: mlog_errno(status); } if (inode_locked) - ocfs2_inode_unlock(inode, 1); + ocfs2_inode_unlock_tracker(inode, 1, &oh, had_lock); brelse(bh); return status; } -int ocfs2_getattr(struct vfsmount *mnt, - struct dentry *dentry, - struct kstat *stat) +int ocfs2_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags) { - struct inode *inode = d_inode(dentry); - struct super_block *sb = dentry->d_sb; + struct inode *inode = d_inode(path->dentry); + struct super_block *sb = path->dentry->d_sb; struct ocfs2_super *osb = sb->s_fs_info; int err; - err = ocfs2_inode_revalidate(dentry); + err = ocfs2_inode_revalidate(path->dentry); if (err) { if (err != -ENOENT) mlog_errno(err); @@ -1320,21 +1340,32 @@ bail: int ocfs2_permission(struct inode *inode, int mask) { - int ret; + int ret, had_lock; + struct ocfs2_lock_holder oh; if (mask & MAY_NOT_BLOCK) return -ECHILD; - ret = ocfs2_inode_lock(inode, NULL, 0); - if (ret) { - if (ret != -ENOENT) - mlog_errno(ret); + had_lock = ocfs2_inode_lock_tracker(inode, NULL, 0, &oh); + if (had_lock < 0) { + ret = had_lock; goto out; + } else if (had_lock) { + /* See comments in ocfs2_setattr() for details. + * The call chain of this case could be: + * do_sys_open() + * may_open() + * inode_permission() + * ocfs2_permission() + * ocfs2_iop_get_acl() + */ + mlog(ML_ERROR, "Another case of recursive locking:\n"); + dump_stack(); } ret = generic_permission(inode, mask); - ocfs2_inode_unlock(inode, 0); + ocfs2_inode_unlock_tracker(inode, 0, &oh, had_lock); out: return ret; } diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h index 897fd9a2e51d..1fdc9839cd93 100644 --- a/fs/ocfs2/file.h +++ b/fs/ocfs2/file.h @@ -68,8 +68,8 @@ int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh, int ocfs2_extend_allocation(struct inode *inode, u32 logical_start, u32 clusters_to_add, int mark_unwritten); int ocfs2_setattr(struct dentry *dentry, struct iattr *attr); -int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry, - struct kstat *stat); +int ocfs2_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags); int ocfs2_permission(struct inode *inode, int mask); int ocfs2_should_update_atime(struct inode *inode, diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index 429088786e93..098f5c712569 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c @@ -44,17 +44,18 @@ #include "ocfs2_trace.h" -static int ocfs2_fault(struct vm_area_struct *area, struct vm_fault *vmf) +static int ocfs2_fault(struct vm_fault *vmf) { + struct vm_area_struct *vma = vmf->vma; sigset_t oldset; int ret; ocfs2_block_signals(&oldset); - ret = filemap_fault(area, vmf); + ret = filemap_fault(vmf); ocfs2_unblock_signals(&oldset); - trace_ocfs2_fault(OCFS2_I(area->vm_file->f_mapping->host)->ip_blkno, - area, vmf->page, vmf->pgoff); + trace_ocfs2_fault(OCFS2_I(vma->vm_file->f_mapping->host)->ip_blkno, + vma, vmf->page, vmf->pgoff); return ret; } @@ -127,10 +128,10 @@ out: return ret; } -static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) +static int ocfs2_page_mkwrite(struct vm_fault *vmf) { struct page *page = vmf->page; - struct inode *inode = file_inode(vma->vm_file); + struct inode *inode = file_inode(vmf->vma->vm_file); struct buffer_head *di_bh = NULL; sigset_t oldset; int ret; @@ -160,7 +161,7 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) */ down_write(&OCFS2_I(inode)->ip_alloc_sem); - ret = __ocfs2_page_mkwrite(vma->vm_file, di_bh, page); + ret = __ocfs2_page_mkwrite(vmf->vma->vm_file, di_bh, page); up_write(&OCFS2_I(inode)->ip_alloc_sem); diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 7e5958b0be6b..0c39d71c67a1 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -172,6 +172,7 @@ struct ocfs2_lock_res { struct list_head l_blocked_list; struct list_head l_mask_waiters; + struct list_head l_holders; unsigned long l_flags; char l_name[OCFS2_LOCK_ID_MAX_LEN]; diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index a24e42f95341..ca1646fbcaef 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -42,6 +42,7 @@ #include <linux/seq_file.h> #include <linux/quotaops.h> #include <linux/cleancache.h> +#include <linux/signal.h> #define CREATE_TRACE_POINTS #include "ocfs2_trace.h" |