diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-05-21 20:49:22 +0300 | 
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-05-21 20:49:22 +0300 | 
| commit | 07be1337b9e8bfcd855c6e9175b5066a30ac609b (patch) | |
| tree | e40ad01dc89f6eb17d461939b809fea3387fc2a5 /fs/btrfs/compression.c | |
| parent | 63d222b9d277c4d7bf08afd1631a7f8e327a825c (diff) | |
| parent | c315ef8d9db7f1a0ebd023a395ebdfde1c68057e (diff) | |
| download | linux-07be1337b9e8bfcd855c6e9175b5066a30ac609b.tar.xz | |
Merge branch 'for-linus-4.7' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs updates from Chris Mason:
 "This has our merge window series of cleanups and fixes.  These target
  a wide range of issues, but do include some important fixes for
  qgroups, O_DIRECT, and fsync handling.  Jeff Mahoney moved around a
  few definitions to make them easier for userland to consume.
  Also whiteout support is included now that issues with overlayfs have
  been cleared up.
  I have one more fix pending for page faults during btrfs_copy_from_user,
  but I wanted to get this bulk out the door first"
* 'for-linus-4.7' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (90 commits)
  btrfs: fix memory leak during RAID 5/6 device replacement
  Btrfs: add semaphore to synchronize direct IO writes with fsync
  Btrfs: fix race between block group relocation and nocow writes
  Btrfs: fix race between fsync and direct IO writes for prealloc extents
  Btrfs: fix number of transaction units for renames with whiteout
  Btrfs: pin logs earlier when doing a rename exchange operation
  Btrfs: unpin logs if rename exchange operation fails
  Btrfs: fix inode leak on failure to setup whiteout inode in rename
  btrfs: add support for RENAME_EXCHANGE and RENAME_WHITEOUT
  Btrfs: pin log earlier when renaming
  Btrfs: unpin log if rename operation fails
  Btrfs: don't do unnecessary delalloc flushes when relocating
  Btrfs: don't wait for unrelated IO to finish before relocation
  Btrfs: fix empty symlink after creating symlink and fsync parent dir
  Btrfs: fix for incorrect directory entries after fsync log replay
  btrfs: build fixup for qgroup_account_snapshot
  btrfs: qgroup: Fix qgroup accounting when creating snapshot
  Btrfs: fix fspath error deallocation
  btrfs: make find_workspace warn if there are no workspaces
  btrfs: make find_workspace always succeed
  ...
Diffstat (limited to 'fs/btrfs/compression.c')
| -rw-r--r-- | fs/btrfs/compression.c | 85 | 
1 files changed, 61 insertions, 24 deletions
| diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index ff61a41ac90b..658c39b70fba 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -743,8 +743,11 @@ out:  static struct {  	struct list_head idle_ws;  	spinlock_t ws_lock; -	int num_ws; -	atomic_t alloc_ws; +	/* Number of free workspaces */ +	int free_ws; +	/* Total number of allocated workspaces */ +	atomic_t total_ws; +	/* Waiters for a free workspace */  	wait_queue_head_t ws_wait;  } btrfs_comp_ws[BTRFS_COMPRESS_TYPES]; @@ -758,16 +761,34 @@ void __init btrfs_init_compress(void)  	int i;  	for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) { +		struct list_head *workspace; +  		INIT_LIST_HEAD(&btrfs_comp_ws[i].idle_ws);  		spin_lock_init(&btrfs_comp_ws[i].ws_lock); -		atomic_set(&btrfs_comp_ws[i].alloc_ws, 0); +		atomic_set(&btrfs_comp_ws[i].total_ws, 0);  		init_waitqueue_head(&btrfs_comp_ws[i].ws_wait); + +		/* +		 * Preallocate one workspace for each compression type so +		 * we can guarantee forward progress in the worst case +		 */ +		workspace = btrfs_compress_op[i]->alloc_workspace(); +		if (IS_ERR(workspace)) { +			printk(KERN_WARNING +	"BTRFS: cannot preallocate compression workspace, will try later"); +		} else { +			atomic_set(&btrfs_comp_ws[i].total_ws, 1); +			btrfs_comp_ws[i].free_ws = 1; +			list_add(workspace, &btrfs_comp_ws[i].idle_ws); +		}  	}  }  /* - * this finds an available workspace or allocates a new one - * ERR_PTR is returned if things go bad. + * This finds an available workspace or allocates a new one. + * If it's not possible to allocate a new one, waits until there's one. + * Preallocation makes a forward progress guarantees and we do not return + * errors.   */  static struct list_head *find_workspace(int type)  { @@ -777,36 +798,58 @@ static struct list_head *find_workspace(int type)  	struct list_head *idle_ws	= &btrfs_comp_ws[idx].idle_ws;  	spinlock_t *ws_lock		= &btrfs_comp_ws[idx].ws_lock; -	atomic_t *alloc_ws		= &btrfs_comp_ws[idx].alloc_ws; +	atomic_t *total_ws		= &btrfs_comp_ws[idx].total_ws;  	wait_queue_head_t *ws_wait	= &btrfs_comp_ws[idx].ws_wait; -	int *num_ws			= &btrfs_comp_ws[idx].num_ws; +	int *free_ws			= &btrfs_comp_ws[idx].free_ws;  again:  	spin_lock(ws_lock);  	if (!list_empty(idle_ws)) {  		workspace = idle_ws->next;  		list_del(workspace); -		(*num_ws)--; +		(*free_ws)--;  		spin_unlock(ws_lock);  		return workspace;  	} -	if (atomic_read(alloc_ws) > cpus) { +	if (atomic_read(total_ws) > cpus) {  		DEFINE_WAIT(wait);  		spin_unlock(ws_lock);  		prepare_to_wait(ws_wait, &wait, TASK_UNINTERRUPTIBLE); -		if (atomic_read(alloc_ws) > cpus && !*num_ws) +		if (atomic_read(total_ws) > cpus && !*free_ws)  			schedule();  		finish_wait(ws_wait, &wait);  		goto again;  	} -	atomic_inc(alloc_ws); +	atomic_inc(total_ws);  	spin_unlock(ws_lock);  	workspace = btrfs_compress_op[idx]->alloc_workspace();  	if (IS_ERR(workspace)) { -		atomic_dec(alloc_ws); +		atomic_dec(total_ws);  		wake_up(ws_wait); + +		/* +		 * Do not return the error but go back to waiting. There's a +		 * workspace preallocated for each type and the compression +		 * time is bounded so we get to a workspace eventually. This +		 * makes our caller's life easier. +		 * +		 * To prevent silent and low-probability deadlocks (when the +		 * initial preallocation fails), check if there are any +		 * workspaces at all. +		 */ +		if (atomic_read(total_ws) == 0) { +			static DEFINE_RATELIMIT_STATE(_rs, +					/* once per minute */ 60 * HZ, +					/* no burst */ 1); + +			if (__ratelimit(&_rs)) { +				printk(KERN_WARNING +			    "no compression workspaces, low memory, retrying"); +			} +		} +		goto again;  	}  	return workspace;  } @@ -820,21 +863,21 @@ static void free_workspace(int type, struct list_head *workspace)  	int idx = type - 1;  	struct list_head *idle_ws	= &btrfs_comp_ws[idx].idle_ws;  	spinlock_t *ws_lock		= &btrfs_comp_ws[idx].ws_lock; -	atomic_t *alloc_ws		= &btrfs_comp_ws[idx].alloc_ws; +	atomic_t *total_ws		= &btrfs_comp_ws[idx].total_ws;  	wait_queue_head_t *ws_wait	= &btrfs_comp_ws[idx].ws_wait; -	int *num_ws			= &btrfs_comp_ws[idx].num_ws; +	int *free_ws			= &btrfs_comp_ws[idx].free_ws;  	spin_lock(ws_lock); -	if (*num_ws < num_online_cpus()) { +	if (*free_ws < num_online_cpus()) {  		list_add(workspace, idle_ws); -		(*num_ws)++; +		(*free_ws)++;  		spin_unlock(ws_lock);  		goto wake;  	}  	spin_unlock(ws_lock);  	btrfs_compress_op[idx]->free_workspace(workspace); -	atomic_dec(alloc_ws); +	atomic_dec(total_ws);  wake:  	/*  	 * Make sure counter is updated before we wake up waiters. @@ -857,7 +900,7 @@ static void free_workspaces(void)  			workspace = btrfs_comp_ws[i].idle_ws.next;  			list_del(workspace);  			btrfs_compress_op[i]->free_workspace(workspace); -			atomic_dec(&btrfs_comp_ws[i].alloc_ws); +			atomic_dec(&btrfs_comp_ws[i].total_ws);  		}  	}  } @@ -894,8 +937,6 @@ int btrfs_compress_pages(int type, struct address_space *mapping,  	int ret;  	workspace = find_workspace(type); -	if (IS_ERR(workspace)) -		return PTR_ERR(workspace);  	ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping,  						      start, len, pages, @@ -930,8 +971,6 @@ static int btrfs_decompress_biovec(int type, struct page **pages_in,  	int ret;  	workspace = find_workspace(type); -	if (IS_ERR(workspace)) -		return PTR_ERR(workspace);  	ret = btrfs_compress_op[type-1]->decompress_biovec(workspace, pages_in,  							 disk_start, @@ -952,8 +991,6 @@ int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,  	int ret;  	workspace = find_workspace(type); -	if (IS_ERR(workspace)) -		return PTR_ERR(workspace);  	ret = btrfs_compress_op[type-1]->decompress(workspace, data_in,  						  dest_page, start_byte, | 
