diff options
Diffstat (limited to 'fs/xfs/xfs_inode.c')
| -rw-r--r-- | fs/xfs/xfs_inode.c | 144 | 
1 files changed, 62 insertions, 82 deletions
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 8cd6e8d8fe9c..d53c39de7d05 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1940,10 +1940,10 @@ xfs_ifree_cluster(  	int			blks_per_cluster;  	int			nbufs;  	int			ninodes; -	int			i, j, found, pre_flushed; +	int			i, j;  	xfs_daddr_t		blkno;  	xfs_buf_t		*bp; -	xfs_inode_t		*ip, **ip_found; +	xfs_inode_t		*ip;  	xfs_inode_log_item_t	*iip;  	xfs_log_item_t		*lip;  	struct xfs_perag	*pag; @@ -1960,114 +1960,97 @@ xfs_ifree_cluster(  		nbufs = XFS_IALLOC_BLOCKS(mp) / blks_per_cluster;  	} -	ip_found = kmem_alloc(ninodes * sizeof(xfs_inode_t *), KM_NOFS); -  	for (j = 0; j < nbufs; j++, inum += ninodes) { +		int	found = 0; +  		blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum),  					 XFS_INO_TO_AGBNO(mp, inum)); +		/* +		 * We obtain and lock the backing buffer first in the process +		 * here, as we have to ensure that any dirty inode that we +		 * can't get the flush lock on is attached to the buffer. +		 * If we scan the in-memory inodes first, then buffer IO can +		 * complete before we get a lock on it, and hence we may fail +		 * to mark all the active inodes on the buffer stale. +		 */ +		bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno, +					mp->m_bsize * blks_per_cluster, +					XBF_LOCK); + +		/* +		 * Walk the inodes already attached to the buffer and mark them +		 * stale. These will all have the flush locks held, so an +		 * in-memory inode walk can't lock them. +		 */ +		lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); +		while (lip) { +			if (lip->li_type == XFS_LI_INODE) { +				iip = (xfs_inode_log_item_t *)lip; +				ASSERT(iip->ili_logged == 1); +				lip->li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*)) xfs_istale_done; +				xfs_trans_ail_copy_lsn(mp->m_ail, +							&iip->ili_flush_lsn, +							&iip->ili_item.li_lsn); +				xfs_iflags_set(iip->ili_inode, XFS_ISTALE); +				found++; +			} +			lip = lip->li_bio_list; +		}  		/* -		 * Look for each inode in memory and attempt to lock it, -		 * we can be racing with flush and tail pushing here. -		 * any inode we get the locks on, add to an array of -		 * inode items to process later. +		 * For each inode in memory attempt to add it to the inode +		 * buffer and set it up for being staled on buffer IO +		 * completion.  This is safe as we've locked out tail pushing +		 * and flushing by locking the buffer.  		 * -		 * The get the buffer lock, we could beat a flush -		 * or tail pushing thread to the lock here, in which -		 * case they will go looking for the inode buffer -		 * and fail, we need some other form of interlock -		 * here. +		 * We have already marked every inode that was part of a +		 * transaction stale above, which means there is no point in +		 * even trying to lock them.  		 */ -		found = 0;  		for (i = 0; i < ninodes; i++) {  			read_lock(&pag->pag_ici_lock);  			ip = radix_tree_lookup(&pag->pag_ici_root,  					XFS_INO_TO_AGINO(mp, (inum + i))); -			/* Inode not in memory or we found it already, -			 * nothing to do -			 */ +			/* Inode not in memory or stale, nothing to do */  			if (!ip || xfs_iflags_test(ip, XFS_ISTALE)) {  				read_unlock(&pag->pag_ici_lock);  				continue;  			} -			if (xfs_inode_clean(ip)) { -				read_unlock(&pag->pag_ici_lock); -				continue; -			} - -			/* If we can get the locks then add it to the -			 * list, otherwise by the time we get the bp lock -			 * below it will already be attached to the -			 * inode buffer. -			 */ - -			/* This inode will already be locked - by us, lets -			 * keep it that way. -			 */ - -			if (ip == free_ip) { -				if (xfs_iflock_nowait(ip)) { -					xfs_iflags_set(ip, XFS_ISTALE); -					if (xfs_inode_clean(ip)) { -						xfs_ifunlock(ip); -					} else { -						ip_found[found++] = ip; -					} -				} +			/* don't try to lock/unlock the current inode */ +			if (ip != free_ip && +			    !xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {  				read_unlock(&pag->pag_ici_lock);  				continue;  			} +			read_unlock(&pag->pag_ici_lock); -			if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { -				if (xfs_iflock_nowait(ip)) { -					xfs_iflags_set(ip, XFS_ISTALE); - -					if (xfs_inode_clean(ip)) { -						xfs_ifunlock(ip); -						xfs_iunlock(ip, XFS_ILOCK_EXCL); -					} else { -						ip_found[found++] = ip; -					} -				} else { +			if (!xfs_iflock_nowait(ip)) { +				if (ip != free_ip)  					xfs_iunlock(ip, XFS_ILOCK_EXCL); -				} +				continue;  			} -			read_unlock(&pag->pag_ici_lock); -		} -		bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno,  -					mp->m_bsize * blks_per_cluster, -					XBF_LOCK); - -		pre_flushed = 0; -		lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); -		while (lip) { -			if (lip->li_type == XFS_LI_INODE) { -				iip = (xfs_inode_log_item_t *)lip; -				ASSERT(iip->ili_logged == 1); -				lip->li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*)) xfs_istale_done; -				xfs_trans_ail_copy_lsn(mp->m_ail, -							&iip->ili_flush_lsn, -							&iip->ili_item.li_lsn); -				xfs_iflags_set(iip->ili_inode, XFS_ISTALE); -				pre_flushed++; +			xfs_iflags_set(ip, XFS_ISTALE); +			if (xfs_inode_clean(ip)) { +				ASSERT(ip != free_ip); +				xfs_ifunlock(ip); +				xfs_iunlock(ip, XFS_ILOCK_EXCL); +				continue;  			} -			lip = lip->li_bio_list; -		} -		for (i = 0; i < found; i++) { -			ip = ip_found[i];  			iip = ip->i_itemp; -  			if (!iip) { +				/* inode with unlogged changes only */ +				ASSERT(ip != free_ip);  				ip->i_update_core = 0;  				xfs_ifunlock(ip);  				xfs_iunlock(ip, XFS_ILOCK_EXCL);  				continue;  			} +			found++;  			iip->ili_last_fields = iip->ili_format.ilf_fields;  			iip->ili_format.ilf_fields = 0; @@ -2078,17 +2061,16 @@ xfs_ifree_cluster(  			xfs_buf_attach_iodone(bp,  				(void(*)(xfs_buf_t*,xfs_log_item_t*))  				xfs_istale_done, (xfs_log_item_t *)iip); -			if (ip != free_ip) { + +			if (ip != free_ip)  				xfs_iunlock(ip, XFS_ILOCK_EXCL); -			}  		} -		if (found || pre_flushed) +		if (found)  			xfs_trans_stale_inode_buf(tp, bp);  		xfs_trans_binval(tp, bp);  	} -	kmem_free(ip_found);  	xfs_perag_put(pag);  } @@ -2649,8 +2631,6 @@ xfs_iflush_cluster(  	int			i;  	pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); -	ASSERT(pag->pagi_inodeok); -	ASSERT(pag->pag_ici_init);  	inodes_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog;  	ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *);  | 
