From ebd8fef304f99da84d4a52ad056f6137ac9652d4 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 8 Sep 2014 09:51:29 +0900
Subject: percpu_counter: make percpu_counters_lock irq-safe

percpu_counter is scheduled to grow @gfp support to allow atomic
initialization.  This patch makes percpu_counters_lock irq-safe so
that it can be safely used from atomic contexts.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 lib/percpu_counter.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

(limited to 'lib')

diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index 7dd33577b905..3fde78275cd1 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -115,6 +115,8 @@ EXPORT_SYMBOL(__percpu_counter_sum);
 int __percpu_counter_init(struct percpu_counter *fbc, s64 amount,
 			  struct lock_class_key *key)
 {
+	unsigned long flags __maybe_unused;
+
 	raw_spin_lock_init(&fbc->lock);
 	lockdep_set_class(&fbc->lock, key);
 	fbc->count = amount;
@@ -126,9 +128,9 @@ int __percpu_counter_init(struct percpu_counter *fbc, s64 amount,
 
 #ifdef CONFIG_HOTPLUG_CPU
 	INIT_LIST_HEAD(&fbc->list);
-	spin_lock(&percpu_counters_lock);
+	spin_lock_irqsave(&percpu_counters_lock, flags);
 	list_add(&fbc->list, &percpu_counters);
-	spin_unlock(&percpu_counters_lock);
+	spin_unlock_irqrestore(&percpu_counters_lock, flags);
 #endif
 	return 0;
 }
@@ -136,15 +138,17 @@ EXPORT_SYMBOL(__percpu_counter_init);
 
 void percpu_counter_destroy(struct percpu_counter *fbc)
 {
+	unsigned long flags __maybe_unused;
+
 	if (!fbc->counters)
 		return;
 
 	debug_percpu_counter_deactivate(fbc);
 
 #ifdef CONFIG_HOTPLUG_CPU
-	spin_lock(&percpu_counters_lock);
+	spin_lock_irqsave(&percpu_counters_lock, flags);
 	list_del(&fbc->list);
-	spin_unlock(&percpu_counters_lock);
+	spin_unlock_irqrestore(&percpu_counters_lock, flags);
 #endif
 	free_percpu(fbc->counters);
 	fbc->counters = NULL;
@@ -173,7 +177,7 @@ static int percpu_counter_hotcpu_callback(struct notifier_block *nb,
 		return NOTIFY_OK;
 
 	cpu = (unsigned long)hcpu;
-	spin_lock(&percpu_counters_lock);
+	spin_lock_irq(&percpu_counters_lock);
 	list_for_each_entry(fbc, &percpu_counters, list) {
 		s32 *pcount;
 		unsigned long flags;
@@ -184,7 +188,7 @@ static int percpu_counter_hotcpu_callback(struct notifier_block *nb,
 		*pcount = 0;
 		raw_spin_unlock_irqrestore(&fbc->lock, flags);
 	}
-	spin_unlock(&percpu_counters_lock);
+	spin_unlock_irq(&percpu_counters_lock);
 #endif
 	return NOTIFY_OK;
 }
-- 
cgit v1.2.3


From 908c7f1949cb7cc6e92ba8f18f2998e87e265b8e Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 8 Sep 2014 09:51:29 +0900
Subject: percpu_counter: add @gfp to percpu_counter_init()

Percpu allocator now supports allocation mask.  Add @gfp to
percpu_counter_init() so that !GFP_KERNEL allocation masks can be used
with percpu_counters too.

We could have left percpu_counter_init() alone and added
percpu_counter_init_gfp(); however, the number of users isn't that
high and introducing _gfp variants to all percpu data structures would
be quite ugly, so let's just do the conversion.  This is the one with
the most users.  Other percpu data structures are a lot easier to
convert.

This patch doesn't make any functional difference.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Jan Kara <jack@suse.cz>
Acked-by: "David S. Miller" <davem@davemloft.net>
Cc: x86@kernel.org
Cc: Jens Axboe <axboe@kernel.dk>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Andrew Morton <akpm@linux-foundation.org>
---
 arch/x86/kvm/mmu.c             |  2 +-
 fs/btrfs/disk-io.c             |  8 ++++----
 fs/btrfs/extent-tree.c         |  2 +-
 fs/ext2/super.c                |  6 +++---
 fs/ext3/super.c                |  6 +++---
 fs/ext4/super.c                | 14 +++++++++-----
 fs/file_table.c                |  2 +-
 fs/quota/dquot.c               |  2 +-
 fs/super.c                     |  3 ++-
 include/linux/percpu_counter.h | 10 ++++++----
 include/net/dst_ops.h          |  2 +-
 include/net/inet_frag.h        |  2 +-
 lib/flex_proportions.c         |  4 ++--
 lib/percpu_counter.c           |  4 ++--
 lib/proportions.c              |  6 +++---
 mm/backing-dev.c               |  2 +-
 mm/mmap.c                      |  2 +-
 mm/nommu.c                     |  2 +-
 mm/shmem.c                     |  2 +-
 net/dccp/proto.c               |  2 +-
 net/ipv4/tcp.c                 |  4 ++--
 net/ipv4/tcp_memcontrol.c      |  2 +-
 net/sctp/protocol.c            |  2 +-
 23 files changed, 49 insertions(+), 42 deletions(-)

(limited to 'lib')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 931467881da7..5bd53f206f4f 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -4534,7 +4534,7 @@ int kvm_mmu_module_init(void)
 	if (!mmu_page_header_cache)
 		goto nomem;
 
-	if (percpu_counter_init(&kvm_total_used_mmu_pages, 0))
+	if (percpu_counter_init(&kvm_total_used_mmu_pages, 0, GFP_KERNEL))
 		goto nomem;
 
 	register_shrinker(&mmu_shrinker);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 08e65e9cf2aa..61dae01788d7 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1180,7 +1180,7 @@ static struct btrfs_subvolume_writers *btrfs_alloc_subvolume_writers(void)
 	if (!writers)
 		return ERR_PTR(-ENOMEM);
 
-	ret = percpu_counter_init(&writers->counter, 0);
+	ret = percpu_counter_init(&writers->counter, 0, GFP_KERNEL);
 	if (ret < 0) {
 		kfree(writers);
 		return ERR_PTR(ret);
@@ -2185,7 +2185,7 @@ int open_ctree(struct super_block *sb,
 		goto fail_srcu;
 	}
 
-	ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0);
+	ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0, GFP_KERNEL);
 	if (ret) {
 		err = ret;
 		goto fail_bdi;
@@ -2193,13 +2193,13 @@ int open_ctree(struct super_block *sb,
 	fs_info->dirty_metadata_batch = PAGE_CACHE_SIZE *
 					(1 + ilog2(nr_cpu_ids));
 
-	ret = percpu_counter_init(&fs_info->delalloc_bytes, 0);
+	ret = percpu_counter_init(&fs_info->delalloc_bytes, 0, GFP_KERNEL);
 	if (ret) {
 		err = ret;
 		goto fail_dirty_metadata_bytes;
 	}
 
-	ret = percpu_counter_init(&fs_info->bio_counter, 0);
+	ret = percpu_counter_init(&fs_info->bio_counter, 0, GFP_KERNEL);
 	if (ret) {
 		err = ret;
 		goto fail_delalloc_bytes;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 813537f362f9..94ec71eda86b 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3493,7 +3493,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
 	if (!found)
 		return -ENOMEM;
 
-	ret = percpu_counter_init(&found->total_bytes_pinned, 0);
+	ret = percpu_counter_init(&found->total_bytes_pinned, 0, GFP_KERNEL);
 	if (ret) {
 		kfree(found);
 		return ret;
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index b88edc05c230..170dc41e8bf4 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1067,14 +1067,14 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
 	ext2_rsv_window_add(sb, &sbi->s_rsv_window_head);
 
 	err = percpu_counter_init(&sbi->s_freeblocks_counter,
-				ext2_count_free_blocks(sb));
+				ext2_count_free_blocks(sb), GFP_KERNEL);
 	if (!err) {
 		err = percpu_counter_init(&sbi->s_freeinodes_counter,
-				ext2_count_free_inodes(sb));
+				ext2_count_free_inodes(sb), GFP_KERNEL);
 	}
 	if (!err) {
 		err = percpu_counter_init(&sbi->s_dirs_counter,
-				ext2_count_dirs(sb));
+				ext2_count_dirs(sb), GFP_KERNEL);
 	}
 	if (err) {
 		ext2_msg(sb, KERN_ERR, "error: insufficient memory");
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 08cdfe5461e3..eba021b88440 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2039,14 +2039,14 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 		goto failed_mount2;
 	}
 	err = percpu_counter_init(&sbi->s_freeblocks_counter,
-			ext3_count_free_blocks(sb));
+			ext3_count_free_blocks(sb), GFP_KERNEL);
 	if (!err) {
 		err = percpu_counter_init(&sbi->s_freeinodes_counter,
-				ext3_count_free_inodes(sb));
+				ext3_count_free_inodes(sb), GFP_KERNEL);
 	}
 	if (!err) {
 		err = percpu_counter_init(&sbi->s_dirs_counter,
-				ext3_count_dirs(sb));
+				ext3_count_dirs(sb), GFP_KERNEL);
 	}
 	if (err) {
 		ext3_msg(sb, KERN_ERR, "error: insufficient memory");
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 32b43ad154b9..e25ca8fdde7d 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3891,7 +3891,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	/* Register extent status tree shrinker */
 	ext4_es_register_shrinker(sbi);
 
-	if ((err = percpu_counter_init(&sbi->s_extent_cache_cnt, 0)) != 0) {
+	err = percpu_counter_init(&sbi->s_extent_cache_cnt, 0, GFP_KERNEL);
+	if (err) {
 		ext4_msg(sb, KERN_ERR, "insufficient memory");
 		goto failed_mount3;
 	}
@@ -4105,17 +4106,20 @@ no_journal:
 	block = ext4_count_free_clusters(sb);
 	ext4_free_blocks_count_set(sbi->s_es, 
 				   EXT4_C2B(sbi, block));
-	err = percpu_counter_init(&sbi->s_freeclusters_counter, block);
+	err = percpu_counter_init(&sbi->s_freeclusters_counter, block,
+				  GFP_KERNEL);
 	if (!err) {
 		unsigned long freei = ext4_count_free_inodes(sb);
 		sbi->s_es->s_free_inodes_count = cpu_to_le32(freei);
-		err = percpu_counter_init(&sbi->s_freeinodes_counter, freei);
+		err = percpu_counter_init(&sbi->s_freeinodes_counter, freei,
+					  GFP_KERNEL);
 	}
 	if (!err)
 		err = percpu_counter_init(&sbi->s_dirs_counter,
-					  ext4_count_dirs(sb));
+					  ext4_count_dirs(sb), GFP_KERNEL);
 	if (!err)
-		err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0);
+		err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0,
+					  GFP_KERNEL);
 	if (err) {
 		ext4_msg(sb, KERN_ERR, "insufficient memory");
 		goto failed_mount6;
diff --git a/fs/file_table.c b/fs/file_table.c
index 385bfd31512a..0bab12b20460 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -331,5 +331,5 @@ void __init files_init(unsigned long mempages)
 
 	n = (mempages * (PAGE_SIZE / 1024)) / 10;
 	files_stat.max_files = max_t(unsigned long, n, NR_FILE);
-	percpu_counter_init(&nr_files, 0);
+	percpu_counter_init(&nr_files, 0, GFP_KERNEL);
 } 
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index f2d0eee9d1f1..8b663b2d9562 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -2725,7 +2725,7 @@ static int __init dquot_init(void)
 		panic("Cannot create dquot hash table");
 
 	for (i = 0; i < _DQST_DQSTAT_LAST; i++) {
-		ret = percpu_counter_init(&dqstats.counter[i], 0);
+		ret = percpu_counter_init(&dqstats.counter[i], 0, GFP_KERNEL);
 		if (ret)
 			panic("Cannot create dquot stat counters");
 	}
diff --git a/fs/super.c b/fs/super.c
index b9a214d2fe98..1b836107acee 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -175,7 +175,8 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
 		goto fail;
 
 	for (i = 0; i < SB_FREEZE_LEVELS; i++) {
-		if (percpu_counter_init(&s->s_writers.counter[i], 0) < 0)
+		if (percpu_counter_init(&s->s_writers.counter[i], 0,
+					GFP_KERNEL) < 0)
 			goto fail;
 		lockdep_init_map(&s->s_writers.lock_map[i], sb_writers_name[i],
 				 &type->s_writers_key[i], 0);
diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h
index d5dd4657c8d6..50e50095c8d1 100644
--- a/include/linux/percpu_counter.h
+++ b/include/linux/percpu_counter.h
@@ -12,6 +12,7 @@
 #include <linux/threads.h>
 #include <linux/percpu.h>
 #include <linux/types.h>
+#include <linux/gfp.h>
 
 #ifdef CONFIG_SMP
 
@@ -26,14 +27,14 @@ struct percpu_counter {
 
 extern int percpu_counter_batch;
 
-int __percpu_counter_init(struct percpu_counter *fbc, s64 amount,
+int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, gfp_t gfp,
 			  struct lock_class_key *key);
 
-#define percpu_counter_init(fbc, value)					\
+#define percpu_counter_init(fbc, value, gfp)				\
 	({								\
 		static struct lock_class_key __key;			\
 									\
-		__percpu_counter_init(fbc, value, &__key);		\
+		__percpu_counter_init(fbc, value, gfp, &__key);		\
 	})
 
 void percpu_counter_destroy(struct percpu_counter *fbc);
@@ -89,7 +90,8 @@ struct percpu_counter {
 	s64 count;
 };
 
-static inline int percpu_counter_init(struct percpu_counter *fbc, s64 amount)
+static inline int percpu_counter_init(struct percpu_counter *fbc, s64 amount,
+				      gfp_t gfp)
 {
 	fbc->count = amount;
 	return 0;
diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h
index 2f26dfb8450e..1f99a1de0e4f 100644
--- a/include/net/dst_ops.h
+++ b/include/net/dst_ops.h
@@ -63,7 +63,7 @@ static inline void dst_entries_add(struct dst_ops *dst, int val)
 
 static inline int dst_entries_init(struct dst_ops *dst)
 {
-	return percpu_counter_init(&dst->pcpuc_entries, 0);
+	return percpu_counter_init(&dst->pcpuc_entries, 0, GFP_KERNEL);
 }
 
 static inline void dst_entries_destroy(struct dst_ops *dst)
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 65a8855e99fe..8d1765577acc 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -151,7 +151,7 @@ static inline void add_frag_mem_limit(struct inet_frag_queue *q, int i)
 
 static inline void init_frag_mem_limit(struct netns_frags *nf)
 {
-	percpu_counter_init(&nf->mem, 0);
+	percpu_counter_init(&nf->mem, 0, GFP_KERNEL);
 }
 
 static inline unsigned int sum_frag_mem_limit(struct netns_frags *nf)
diff --git a/lib/flex_proportions.c b/lib/flex_proportions.c
index ebf3bac460b0..b9d026bfcf38 100644
--- a/lib/flex_proportions.c
+++ b/lib/flex_proportions.c
@@ -40,7 +40,7 @@ int fprop_global_init(struct fprop_global *p)
 
 	p->period = 0;
 	/* Use 1 to avoid dealing with periods with 0 events... */
-	err = percpu_counter_init(&p->events, 1);
+	err = percpu_counter_init(&p->events, 1, GFP_KERNEL);
 	if (err)
 		return err;
 	seqcount_init(&p->sequence);
@@ -172,7 +172,7 @@ int fprop_local_init_percpu(struct fprop_local_percpu *pl)
 {
 	int err;
 
-	err = percpu_counter_init(&pl->events, 0);
+	err = percpu_counter_init(&pl->events, 0, GFP_KERNEL);
 	if (err)
 		return err;
 	pl->period = 0;
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index 3fde78275cd1..48144cdae819 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -112,7 +112,7 @@ s64 __percpu_counter_sum(struct percpu_counter *fbc)
 }
 EXPORT_SYMBOL(__percpu_counter_sum);
 
-int __percpu_counter_init(struct percpu_counter *fbc, s64 amount,
+int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, gfp_t gfp,
 			  struct lock_class_key *key)
 {
 	unsigned long flags __maybe_unused;
@@ -120,7 +120,7 @@ int __percpu_counter_init(struct percpu_counter *fbc, s64 amount,
 	raw_spin_lock_init(&fbc->lock);
 	lockdep_set_class(&fbc->lock, key);
 	fbc->count = amount;
-	fbc->counters = alloc_percpu(s32);
+	fbc->counters = alloc_percpu_gfp(s32, gfp);
 	if (!fbc->counters)
 		return -ENOMEM;
 
diff --git a/lib/proportions.c b/lib/proportions.c
index 05df84801b56..ca95f8d54384 100644
--- a/lib/proportions.c
+++ b/lib/proportions.c
@@ -83,11 +83,11 @@ int prop_descriptor_init(struct prop_descriptor *pd, int shift)
 	pd->index = 0;
 	pd->pg[0].shift = shift;
 	mutex_init(&pd->mutex);
-	err = percpu_counter_init(&pd->pg[0].events, 0);
+	err = percpu_counter_init(&pd->pg[0].events, 0, GFP_KERNEL);
 	if (err)
 		goto out;
 
-	err = percpu_counter_init(&pd->pg[1].events, 0);
+	err = percpu_counter_init(&pd->pg[1].events, 0, GFP_KERNEL);
 	if (err)
 		percpu_counter_destroy(&pd->pg[0].events);
 
@@ -193,7 +193,7 @@ int prop_local_init_percpu(struct prop_local_percpu *pl)
 	raw_spin_lock_init(&pl->lock);
 	pl->shift = 0;
 	pl->period = 0;
-	return percpu_counter_init(&pl->events, 0);
+	return percpu_counter_init(&pl->events, 0, GFP_KERNEL);
 }
 
 void prop_local_destroy_percpu(struct prop_local_percpu *pl)
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 1706cbbdf5f0..f19a818be2d3 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -455,7 +455,7 @@ int bdi_init(struct backing_dev_info *bdi)
 	bdi_wb_init(&bdi->wb, bdi);
 
 	for (i = 0; i < NR_BDI_STAT_ITEMS; i++) {
-		err = percpu_counter_init(&bdi->bdi_stat[i], 0);
+		err = percpu_counter_init(&bdi->bdi_stat[i], 0, GFP_KERNEL);
 		if (err)
 			goto err;
 	}
diff --git a/mm/mmap.c b/mm/mmap.c
index c1f2ea4a0b99..d7ec93e25fa1 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -3196,7 +3196,7 @@ void __init mmap_init(void)
 {
 	int ret;
 
-	ret = percpu_counter_init(&vm_committed_as, 0);
+	ret = percpu_counter_init(&vm_committed_as, 0, GFP_KERNEL);
 	VM_BUG_ON(ret);
 }
 
diff --git a/mm/nommu.c b/mm/nommu.c
index a881d9673c6b..bd1808e194a7 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -539,7 +539,7 @@ void __init mmap_init(void)
 {
 	int ret;
 
-	ret = percpu_counter_init(&vm_committed_as, 0);
+	ret = percpu_counter_init(&vm_committed_as, 0, GFP_KERNEL);
 	VM_BUG_ON(ret);
 	vm_region_jar = KMEM_CACHE(vm_region, SLAB_PANIC);
 }
diff --git a/mm/shmem.c b/mm/shmem.c
index 0e5fb225007c..d4bc55d3f107 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2993,7 +2993,7 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent)
 #endif
 
 	spin_lock_init(&sbinfo->stat_lock);
-	if (percpu_counter_init(&sbinfo->used_blocks, 0))
+	if (percpu_counter_init(&sbinfo->used_blocks, 0, GFP_KERNEL))
 		goto failed;
 	sbinfo->free_inodes = sbinfo->max_inodes;
 
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index de2c1e719305..e421eddf67b4 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -1115,7 +1115,7 @@ static int __init dccp_init(void)
 
 	BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
 		     FIELD_SIZEOF(struct sk_buff, cb));
-	rc = percpu_counter_init(&dccp_orphan_count, 0);
+	rc = percpu_counter_init(&dccp_orphan_count, 0, GFP_KERNEL);
 	if (rc)
 		goto out_fail;
 	rc = -ENOBUFS;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 541f26a67ba2..d59c2604c247 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3188,8 +3188,8 @@ void __init tcp_init(void)
 
 	BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb));
 
-	percpu_counter_init(&tcp_sockets_allocated, 0);
-	percpu_counter_init(&tcp_orphan_count, 0);
+	percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL);
+	percpu_counter_init(&tcp_orphan_count, 0, GFP_KERNEL);
 	tcp_hashinfo.bind_bucket_cachep =
 		kmem_cache_create("tcp_bind_bucket",
 				  sizeof(struct inet_bind_bucket), 0,
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c
index 3af522622fad..1d191357bf88 100644
--- a/net/ipv4/tcp_memcontrol.c
+++ b/net/ipv4/tcp_memcontrol.c
@@ -32,7 +32,7 @@ int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
 		res_parent = &parent_cg->memory_allocated;
 
 	res_counter_init(&cg_proto->memory_allocated, res_parent);
-	percpu_counter_init(&cg_proto->sockets_allocated, 0);
+	percpu_counter_init(&cg_proto->sockets_allocated, 0, GFP_KERNEL);
 
 	return 0;
 }
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 6240834f4b95..f00a85a3fddd 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1341,7 +1341,7 @@ static __init int sctp_init(void)
 	if (!sctp_chunk_cachep)
 		goto err_chunk_cachep;
 
-	status = percpu_counter_init(&sctp_sockets_allocated, 0);
+	status = percpu_counter_init(&sctp_sockets_allocated, 0, GFP_KERNEL);
 	if (status)
 		goto err_percpu_counter_init;
 
-- 
cgit v1.2.3


From 20ae00792c6f1f18fc4fc5965445a145df92827e Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 8 Sep 2014 09:51:30 +0900
Subject: proportions: add @gfp to init functions

Percpu allocator now supports allocation mask.  Add @gfp to
[flex_]proportions init functions so that !GFP_KERNEL allocation masks
can be used with them too.

This patch doesn't make any functional difference.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Peter Zijlstra <peterz@infradead.org>
---
 include/linux/flex_proportions.h |  5 +++--
 include/linux/proportions.h      |  5 +++--
 lib/flex_proportions.c           |  8 ++++----
 lib/proportions.c                | 10 +++++-----
 mm/backing-dev.c                 |  2 +-
 mm/page-writeback.c              |  2 +-
 6 files changed, 17 insertions(+), 15 deletions(-)

(limited to 'lib')

diff --git a/include/linux/flex_proportions.h b/include/linux/flex_proportions.h
index 4ebc49fae391..0d348e011a6e 100644
--- a/include/linux/flex_proportions.h
+++ b/include/linux/flex_proportions.h
@@ -10,6 +10,7 @@
 #include <linux/percpu_counter.h>
 #include <linux/spinlock.h>
 #include <linux/seqlock.h>
+#include <linux/gfp.h>
 
 /*
  * When maximum proportion of some event type is specified, this is the
@@ -32,7 +33,7 @@ struct fprop_global {
 	seqcount_t sequence;
 };
 
-int fprop_global_init(struct fprop_global *p);
+int fprop_global_init(struct fprop_global *p, gfp_t gfp);
 void fprop_global_destroy(struct fprop_global *p);
 bool fprop_new_period(struct fprop_global *p, int periods);
 
@@ -79,7 +80,7 @@ struct fprop_local_percpu {
 	raw_spinlock_t lock;	/* Protect period and numerator */
 };
 
-int fprop_local_init_percpu(struct fprop_local_percpu *pl);
+int fprop_local_init_percpu(struct fprop_local_percpu *pl, gfp_t gfp);
 void fprop_local_destroy_percpu(struct fprop_local_percpu *pl);
 void __fprop_inc_percpu(struct fprop_global *p, struct fprop_local_percpu *pl);
 void __fprop_inc_percpu_max(struct fprop_global *p, struct fprop_local_percpu *pl,
diff --git a/include/linux/proportions.h b/include/linux/proportions.h
index 26a8a4ed9b07..00e8e8fa7358 100644
--- a/include/linux/proportions.h
+++ b/include/linux/proportions.h
@@ -12,6 +12,7 @@
 #include <linux/percpu_counter.h>
 #include <linux/spinlock.h>
 #include <linux/mutex.h>
+#include <linux/gfp.h>
 
 struct prop_global {
 	/*
@@ -40,7 +41,7 @@ struct prop_descriptor {
 	struct mutex mutex;		/* serialize the prop_global switch */
 };
 
-int prop_descriptor_init(struct prop_descriptor *pd, int shift);
+int prop_descriptor_init(struct prop_descriptor *pd, int shift, gfp_t gfp);
 void prop_change_shift(struct prop_descriptor *pd, int new_shift);
 
 /*
@@ -61,7 +62,7 @@ struct prop_local_percpu {
 	raw_spinlock_t lock;		/* protect the snapshot state */
 };
 
-int prop_local_init_percpu(struct prop_local_percpu *pl);
+int prop_local_init_percpu(struct prop_local_percpu *pl, gfp_t gfp);
 void prop_local_destroy_percpu(struct prop_local_percpu *pl);
 void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl);
 void prop_fraction_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl,
diff --git a/lib/flex_proportions.c b/lib/flex_proportions.c
index b9d026bfcf38..8f25652f40d4 100644
--- a/lib/flex_proportions.c
+++ b/lib/flex_proportions.c
@@ -34,13 +34,13 @@
  */
 #include <linux/flex_proportions.h>
 
-int fprop_global_init(struct fprop_global *p)
+int fprop_global_init(struct fprop_global *p, gfp_t gfp)
 {
 	int err;
 
 	p->period = 0;
 	/* Use 1 to avoid dealing with periods with 0 events... */
-	err = percpu_counter_init(&p->events, 1, GFP_KERNEL);
+	err = percpu_counter_init(&p->events, 1, gfp);
 	if (err)
 		return err;
 	seqcount_init(&p->sequence);
@@ -168,11 +168,11 @@ void fprop_fraction_single(struct fprop_global *p,
  */
 #define PROP_BATCH (8*(1+ilog2(nr_cpu_ids)))
 
-int fprop_local_init_percpu(struct fprop_local_percpu *pl)
+int fprop_local_init_percpu(struct fprop_local_percpu *pl, gfp_t gfp)
 {
 	int err;
 
-	err = percpu_counter_init(&pl->events, 0, GFP_KERNEL);
+	err = percpu_counter_init(&pl->events, 0, gfp);
 	if (err)
 		return err;
 	pl->period = 0;
diff --git a/lib/proportions.c b/lib/proportions.c
index ca95f8d54384..6f724298f67a 100644
--- a/lib/proportions.c
+++ b/lib/proportions.c
@@ -73,7 +73,7 @@
 #include <linux/proportions.h>
 #include <linux/rcupdate.h>
 
-int prop_descriptor_init(struct prop_descriptor *pd, int shift)
+int prop_descriptor_init(struct prop_descriptor *pd, int shift, gfp_t gfp)
 {
 	int err;
 
@@ -83,11 +83,11 @@ int prop_descriptor_init(struct prop_descriptor *pd, int shift)
 	pd->index = 0;
 	pd->pg[0].shift = shift;
 	mutex_init(&pd->mutex);
-	err = percpu_counter_init(&pd->pg[0].events, 0, GFP_KERNEL);
+	err = percpu_counter_init(&pd->pg[0].events, 0, gfp);
 	if (err)
 		goto out;
 
-	err = percpu_counter_init(&pd->pg[1].events, 0, GFP_KERNEL);
+	err = percpu_counter_init(&pd->pg[1].events, 0, gfp);
 	if (err)
 		percpu_counter_destroy(&pd->pg[0].events);
 
@@ -188,12 +188,12 @@ prop_adjust_shift(int *pl_shift, unsigned long *pl_period, int new_shift)
 
 #define PROP_BATCH (8*(1+ilog2(nr_cpu_ids)))
 
-int prop_local_init_percpu(struct prop_local_percpu *pl)
+int prop_local_init_percpu(struct prop_local_percpu *pl, gfp_t gfp)
 {
 	raw_spin_lock_init(&pl->lock);
 	pl->shift = 0;
 	pl->period = 0;
-	return percpu_counter_init(&pl->events, 0, GFP_KERNEL);
+	return percpu_counter_init(&pl->events, 0, gfp);
 }
 
 void prop_local_destroy_percpu(struct prop_local_percpu *pl)
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index f19a818be2d3..64ec49d1772b 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -470,7 +470,7 @@ int bdi_init(struct backing_dev_info *bdi)
 	bdi->write_bandwidth = INIT_BW;
 	bdi->avg_write_bandwidth = INIT_BW;
 
-	err = fprop_local_init_percpu(&bdi->completions);
+	err = fprop_local_init_percpu(&bdi->completions, GFP_KERNEL);
 
 	if (err) {
 err:
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 91d73ef1744d..508599403721 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1777,7 +1777,7 @@ void __init page_writeback_init(void)
 	writeback_set_ratelimit();
 	register_cpu_notifier(&ratelimit_nb);
 
-	fprop_global_init(&writeout_completions);
+	fprop_global_init(&writeout_completions, GFP_KERNEL);
 }
 
 /**
-- 
cgit v1.2.3


From a34375ef9e65340a138fc0be287de5c940d260fc Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 8 Sep 2014 09:51:30 +0900
Subject: percpu-refcount: add @gfp to percpu_ref_init()

Percpu allocator now supports allocation mask.  Add @gfp to
percpu_ref_init() so that !GFP_KERNEL allocation masks can be used
with percpu_refs too.

This patch doesn't make any functional difference.

v2: blk-mq conversion was missing.  Updated.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Kent Overstreet <koverstreet@google.com>
Cc: Benjamin LaHaise <bcrl@kvack.org>
Cc: Li Zefan <lizefan@huawei.com>
Cc: Nicholas A. Bellinger <nab@linux-iscsi.org>
Cc: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c                   | 3 ++-
 drivers/target/target_core_tpg.c | 3 ++-
 fs/aio.c                         | 4 ++--
 include/linux/percpu-refcount.h  | 3 ++-
 kernel/cgroup.c                  | 6 +++---
 lib/percpu-refcount.c            | 6 ++++--
 6 files changed, 15 insertions(+), 10 deletions(-)

(limited to 'lib')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 5189cb1e478a..702df07b980d 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1776,7 +1776,8 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
 	if (!q)
 		goto err_hctxs;
 
-	if (percpu_ref_init(&q->mq_usage_counter, blk_mq_usage_counter_release))
+	if (percpu_ref_init(&q->mq_usage_counter, blk_mq_usage_counter_release,
+			    GFP_KERNEL))
 		goto err_map;
 
 	setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q);
diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c
index fddfae61222f..4ab6da338585 100644
--- a/drivers/target/target_core_tpg.c
+++ b/drivers/target/target_core_tpg.c
@@ -819,7 +819,8 @@ int core_tpg_add_lun(
 {
 	int ret;
 
-	ret = percpu_ref_init(&lun->lun_ref, core_tpg_lun_ref_release);
+	ret = percpu_ref_init(&lun->lun_ref, core_tpg_lun_ref_release,
+			      GFP_KERNEL);
 	if (ret < 0)
 		return ret;
 
diff --git a/fs/aio.c b/fs/aio.c
index bd7ec2cc2674..93fbcc0f5696 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -666,10 +666,10 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
 
 	INIT_LIST_HEAD(&ctx->active_reqs);
 
-	if (percpu_ref_init(&ctx->users, free_ioctx_users))
+	if (percpu_ref_init(&ctx->users, free_ioctx_users, GFP_KERNEL))
 		goto err;
 
-	if (percpu_ref_init(&ctx->reqs, free_ioctx_reqs))
+	if (percpu_ref_init(&ctx->reqs, free_ioctx_reqs, GFP_KERNEL))
 		goto err;
 
 	ctx->cpu = alloc_percpu(struct kioctx_cpu);
diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index 3dfbf237cd8f..ee8325122dbd 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -49,6 +49,7 @@
 #include <linux/kernel.h>
 #include <linux/percpu.h>
 #include <linux/rcupdate.h>
+#include <linux/gfp.h>
 
 struct percpu_ref;
 typedef void (percpu_ref_func_t)(struct percpu_ref *);
@@ -66,7 +67,7 @@ struct percpu_ref {
 };
 
 int __must_check percpu_ref_init(struct percpu_ref *ref,
-				 percpu_ref_func_t *release);
+				 percpu_ref_func_t *release, gfp_t gfp);
 void percpu_ref_reinit(struct percpu_ref *ref);
 void percpu_ref_exit(struct percpu_ref *ref);
 void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 7dc8788cfd52..589b4d89a0a5 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1628,7 +1628,7 @@ static int cgroup_setup_root(struct cgroup_root *root, unsigned int ss_mask)
 		goto out;
 	root_cgrp->id = ret;
 
-	ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release);
+	ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release, GFP_KERNEL);
 	if (ret)
 		goto out;
 
@@ -4487,7 +4487,7 @@ static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss,
 
 	init_and_link_css(css, ss, cgrp);
 
-	err = percpu_ref_init(&css->refcnt, css_release);
+	err = percpu_ref_init(&css->refcnt, css_release, GFP_KERNEL);
 	if (err)
 		goto err_free_css;
 
@@ -4555,7 +4555,7 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
 		goto out_unlock;
 	}
 
-	ret = percpu_ref_init(&cgrp->self.refcnt, css_release);
+	ret = percpu_ref_init(&cgrp->self.refcnt, css_release, GFP_KERNEL);
 	if (ret)
 		goto out_free_cgrp;
 
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index fe5a3342e960..ff9903264a91 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -40,6 +40,7 @@ static unsigned __percpu *pcpu_count_ptr(struct percpu_ref *ref)
  * percpu_ref_init - initialize a percpu refcount
  * @ref: percpu_ref to initialize
  * @release: function which will be called when refcount hits 0
+ * @gfp: allocation mask to use
  *
  * Initializes the refcount in single atomic counter mode with a refcount of 1;
  * analagous to atomic_set(ref, 1).
@@ -47,11 +48,12 @@ static unsigned __percpu *pcpu_count_ptr(struct percpu_ref *ref)
  * Note that @release must not sleep - it may potentially be called from RCU
  * callback context by percpu_ref_kill().
  */
-int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release)
+int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release,
+		    gfp_t gfp)
 {
 	atomic_set(&ref->count, 1 + PCPU_COUNT_BIAS);
 
-	ref->pcpu_count_ptr = (unsigned long)alloc_percpu(unsigned);
+	ref->pcpu_count_ptr = (unsigned long)alloc_percpu_gfp(unsigned, gfp);
 	if (!ref->pcpu_count_ptr)
 		return -ENOMEM;
 
-- 
cgit v1.2.3


From 4843c3320c3d23ab4ecf520f5eaf485aff8c7252 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sat, 20 Sep 2014 01:27:24 -0400
Subject: percpu-refcount: improve WARN messages

percpu_ref's WARN messages can be a lot more helpful by indicating
who's the culprit.  Make them report the release function that the
offending percpu-refcount is associated with.  This should make it a
lot easier to track down the reported invalid refcnting operations.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Kent Overstreet <kmo@daterainc.com>
---
 lib/percpu-refcount.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'lib')

diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index ff9903264a91..70d28c91f35a 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -145,8 +145,9 @@ static void percpu_ref_kill_rcu(struct rcu_head *rcu)
 
 	atomic_add((int) count - PCPU_COUNT_BIAS, &ref->count);
 
-	WARN_ONCE(atomic_read(&ref->count) <= 0, "percpu ref <= 0 (%i)",
-		  atomic_read(&ref->count));
+	WARN_ONCE(atomic_read(&ref->count) <= 0,
+		  "percpu ref (%pf) <= 0 (%i) after killed",
+		  ref->release, atomic_read(&ref->count));
 
 	/* @ref is viewed as dead on all CPUs, send out kill confirmation */
 	if (ref->confirm_kill)
@@ -178,7 +179,8 @@ void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
 				 percpu_ref_func_t *confirm_kill)
 {
 	WARN_ONCE(ref->pcpu_count_ptr & PCPU_REF_DEAD,
-		  "percpu_ref_kill() called more than once!\n");
+		  "percpu_ref_kill() called more than once on %pf!",
+		  ref->release);
 
 	ref->pcpu_count_ptr |= PCPU_REF_DEAD;
 	ref->confirm_kill = confirm_kill;
-- 
cgit v1.2.3


From e625305b390790717cf2cccf61efb81299647028 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sat, 20 Sep 2014 01:27:25 -0400
Subject: percpu-refcount: make percpu_ref based on longs instead of ints

percpu_ref is currently based on ints and the number of refs it can
cover is (1 << 31).  This makes it impossible to use a percpu_ref to
count memory objects or pages on 64bit machines as it may overflow.
This forces those users to somehow aggregate the references before
contributing to the percpu_ref which is often cumbersome and sometimes
challenging to get the same level of performance as using the
percpu_ref directly.

While using ints for the percpu counters makes them pack tighter on
64bit machines, the possible gain from using ints instead of longs is
extremely small compared to the overall gain from per-cpu operation.
This patch makes percpu_ref based on longs so that it can be used to
directly count memory objects or pages.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Kent Overstreet <kmo@daterainc.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
---
 include/linux/percpu-refcount.h | 24 ++++++++++++------------
 lib/percpu-refcount.c           | 37 +++++++++++++++++++------------------
 2 files changed, 31 insertions(+), 30 deletions(-)

(limited to 'lib')

diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index ee8325122dbd..5df6784bd9d2 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -55,7 +55,7 @@ struct percpu_ref;
 typedef void (percpu_ref_func_t)(struct percpu_ref *);
 
 struct percpu_ref {
-	atomic_t		count;
+	atomic_long_t		count;
 	/*
 	 * The low bit of the pointer indicates whether the ref is in percpu
 	 * mode; if set, then get/put will manipulate the atomic_t.
@@ -97,7 +97,7 @@ static inline void percpu_ref_kill(struct percpu_ref *ref)
  * branches as it can't assume that @ref->pcpu_count is not NULL.
  */
 static inline bool __pcpu_ref_alive(struct percpu_ref *ref,
-				    unsigned __percpu **pcpu_countp)
+				    unsigned long __percpu **pcpu_countp)
 {
 	unsigned long pcpu_ptr = ACCESS_ONCE(ref->pcpu_count_ptr);
 
@@ -107,7 +107,7 @@ static inline bool __pcpu_ref_alive(struct percpu_ref *ref,
 	if (unlikely(pcpu_ptr & PCPU_REF_DEAD))
 		return false;
 
-	*pcpu_countp = (unsigned __percpu *)pcpu_ptr;
+	*pcpu_countp = (unsigned long __percpu *)pcpu_ptr;
 	return true;
 }
 
@@ -119,14 +119,14 @@ static inline bool __pcpu_ref_alive(struct percpu_ref *ref,
   */
 static inline void percpu_ref_get(struct percpu_ref *ref)
 {
-	unsigned __percpu *pcpu_count;
+	unsigned long __percpu *pcpu_count;
 
 	rcu_read_lock_sched();
 
 	if (__pcpu_ref_alive(ref, &pcpu_count))
 		this_cpu_inc(*pcpu_count);
 	else
-		atomic_inc(&ref->count);
+		atomic_long_inc(&ref->count);
 
 	rcu_read_unlock_sched();
 }
@@ -142,7 +142,7 @@ static inline void percpu_ref_get(struct percpu_ref *ref)
  */
 static inline bool percpu_ref_tryget(struct percpu_ref *ref)
 {
-	unsigned __percpu *pcpu_count;
+	unsigned long __percpu *pcpu_count;
 	int ret = false;
 
 	rcu_read_lock_sched();
@@ -151,7 +151,7 @@ static inline bool percpu_ref_tryget(struct percpu_ref *ref)
 		this_cpu_inc(*pcpu_count);
 		ret = true;
 	} else {
-		ret = atomic_inc_not_zero(&ref->count);
+		ret = atomic_long_inc_not_zero(&ref->count);
 	}
 
 	rcu_read_unlock_sched();
@@ -175,7 +175,7 @@ static inline bool percpu_ref_tryget(struct percpu_ref *ref)
  */
 static inline bool percpu_ref_tryget_live(struct percpu_ref *ref)
 {
-	unsigned __percpu *pcpu_count;
+	unsigned long __percpu *pcpu_count;
 	int ret = false;
 
 	rcu_read_lock_sched();
@@ -199,13 +199,13 @@ static inline bool percpu_ref_tryget_live(struct percpu_ref *ref)
  */
 static inline void percpu_ref_put(struct percpu_ref *ref)
 {
-	unsigned __percpu *pcpu_count;
+	unsigned long __percpu *pcpu_count;
 
 	rcu_read_lock_sched();
 
 	if (__pcpu_ref_alive(ref, &pcpu_count))
 		this_cpu_dec(*pcpu_count);
-	else if (unlikely(atomic_dec_and_test(&ref->count)))
+	else if (unlikely(atomic_long_dec_and_test(&ref->count)))
 		ref->release(ref);
 
 	rcu_read_unlock_sched();
@@ -219,11 +219,11 @@ static inline void percpu_ref_put(struct percpu_ref *ref)
  */
 static inline bool percpu_ref_is_zero(struct percpu_ref *ref)
 {
-	unsigned __percpu *pcpu_count;
+	unsigned long __percpu *pcpu_count;
 
 	if (__pcpu_ref_alive(ref, &pcpu_count))
 		return false;
-	return !atomic_read(&ref->count);
+	return !atomic_long_read(&ref->count);
 }
 
 #endif
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index 70d28c91f35a..559ee0b20318 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -25,15 +25,15 @@
  * works.
  *
  * Converting to non percpu mode is done with some RCUish stuff in
- * percpu_ref_kill. Additionally, we need a bias value so that the atomic_t
- * can't hit 0 before we've added up all the percpu refs.
+ * percpu_ref_kill. Additionally, we need a bias value so that the
+ * atomic_long_t can't hit 0 before we've added up all the percpu refs.
  */
 
-#define PCPU_COUNT_BIAS		(1U << 31)
+#define PCPU_COUNT_BIAS		(1LU << (BITS_PER_LONG - 1))
 
-static unsigned __percpu *pcpu_count_ptr(struct percpu_ref *ref)
+static unsigned long __percpu *pcpu_count_ptr(struct percpu_ref *ref)
 {
-	return (unsigned __percpu *)(ref->pcpu_count_ptr & ~PCPU_REF_DEAD);
+	return (unsigned long __percpu *)(ref->pcpu_count_ptr & ~PCPU_REF_DEAD);
 }
 
 /**
@@ -43,7 +43,7 @@ static unsigned __percpu *pcpu_count_ptr(struct percpu_ref *ref)
  * @gfp: allocation mask to use
  *
  * Initializes the refcount in single atomic counter mode with a refcount of 1;
- * analagous to atomic_set(ref, 1).
+ * analagous to atomic_long_set(ref, 1).
  *
  * Note that @release must not sleep - it may potentially be called from RCU
  * callback context by percpu_ref_kill().
@@ -51,9 +51,9 @@ static unsigned __percpu *pcpu_count_ptr(struct percpu_ref *ref)
 int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release,
 		    gfp_t gfp)
 {
-	atomic_set(&ref->count, 1 + PCPU_COUNT_BIAS);
+	atomic_long_set(&ref->count, 1 + PCPU_COUNT_BIAS);
 
-	ref->pcpu_count_ptr = (unsigned long)alloc_percpu_gfp(unsigned, gfp);
+	ref->pcpu_count_ptr = (unsigned long)alloc_percpu_gfp(unsigned long, gfp);
 	if (!ref->pcpu_count_ptr)
 		return -ENOMEM;
 
@@ -75,13 +75,13 @@ EXPORT_SYMBOL_GPL(percpu_ref_init);
  */
 void percpu_ref_reinit(struct percpu_ref *ref)
 {
-	unsigned __percpu *pcpu_count = pcpu_count_ptr(ref);
+	unsigned long __percpu *pcpu_count = pcpu_count_ptr(ref);
 	int cpu;
 
 	BUG_ON(!pcpu_count);
 	WARN_ON(!percpu_ref_is_zero(ref));
 
-	atomic_set(&ref->count, 1 + PCPU_COUNT_BIAS);
+	atomic_long_set(&ref->count, 1 + PCPU_COUNT_BIAS);
 
 	/*
 	 * Restore per-cpu operation.  smp_store_release() is paired with
@@ -109,7 +109,7 @@ EXPORT_SYMBOL_GPL(percpu_ref_reinit);
  */
 void percpu_ref_exit(struct percpu_ref *ref)
 {
-	unsigned __percpu *pcpu_count = pcpu_count_ptr(ref);
+	unsigned long __percpu *pcpu_count = pcpu_count_ptr(ref);
 
 	if (pcpu_count) {
 		free_percpu(pcpu_count);
@@ -121,14 +121,15 @@ EXPORT_SYMBOL_GPL(percpu_ref_exit);
 static void percpu_ref_kill_rcu(struct rcu_head *rcu)
 {
 	struct percpu_ref *ref = container_of(rcu, struct percpu_ref, rcu);
-	unsigned __percpu *pcpu_count = pcpu_count_ptr(ref);
-	unsigned count = 0;
+	unsigned long __percpu *pcpu_count = pcpu_count_ptr(ref);
+	unsigned long count = 0;
 	int cpu;
 
 	for_each_possible_cpu(cpu)
 		count += *per_cpu_ptr(pcpu_count, cpu);
 
-	pr_debug("global %i pcpu %i", atomic_read(&ref->count), (int) count);
+	pr_debug("global %ld pcpu %ld",
+		 atomic_long_read(&ref->count), (long)count);
 
 	/*
 	 * It's crucial that we sum the percpu counters _before_ adding the sum
@@ -143,11 +144,11 @@ static void percpu_ref_kill_rcu(struct rcu_head *rcu)
 	 * time is equivalent and saves us atomic operations:
 	 */
 
-	atomic_add((int) count - PCPU_COUNT_BIAS, &ref->count);
+	atomic_long_add((long)count - PCPU_COUNT_BIAS, &ref->count);
 
-	WARN_ONCE(atomic_read(&ref->count) <= 0,
-		  "percpu ref (%pf) <= 0 (%i) after killed",
-		  ref->release, atomic_read(&ref->count));
+	WARN_ONCE(atomic_long_read(&ref->count) <= 0,
+		  "percpu ref (%pf) <= 0 (%ld) after killed",
+		  ref->release, atomic_long_read(&ref->count));
 
 	/* @ref is viewed as dead on all CPUs, send out kill confirmation */
 	if (ref->confirm_kill)
-- 
cgit v1.2.3


From 9eca80461a45177e456219a9cd944c27675d6512 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 24 Sep 2014 13:07:33 -0400
Subject: Revert "blk-mq, percpu_ref: implement a kludge for SCSI blk-mq stall
 during probe"

This reverts commit 0a30288da1aec914e158c2d7a3482a85f632750f, which
was a temporary fix for SCSI blk-mq stall issue.  The following
patches will fix the issue properly by introducing atomic mode to
percpu_ref.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Kent Overstreet <kmo@daterainc.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Christoph Hellwig <hch@lst.de>
---
 block/blk-mq.c                  | 11 +----------
 include/linux/percpu-refcount.h |  1 -
 lib/percpu-refcount.c           | 16 ----------------
 3 files changed, 1 insertion(+), 27 deletions(-)

(limited to 'lib')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 255d79c14dc1..44a78ae3f899 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -119,16 +119,7 @@ void blk_mq_freeze_queue(struct request_queue *q)
 	spin_unlock_irq(q->queue_lock);
 
 	if (freeze) {
-		/*
-		 * XXX: Temporary kludge to work around SCSI blk-mq stall.
-		 * SCSI synchronously creates and destroys many queues
-		 * back-to-back during probe leading to lengthy stalls.
-		 * This will be fixed by keeping ->mq_usage_counter in
-		 * atomic mode until genhd registration, but, for now,
-		 * let's work around using expedited synchronization.
-		 */
-		__percpu_ref_kill_expedited(&q->mq_usage_counter);
-
+		percpu_ref_kill(&q->mq_usage_counter);
 		blk_mq_run_queues(q, false);
 	}
 	wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->mq_usage_counter));
diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index 11b38ceca7e2..5df6784bd9d2 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -72,7 +72,6 @@ void percpu_ref_reinit(struct percpu_ref *ref);
 void percpu_ref_exit(struct percpu_ref *ref);
 void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
 				 percpu_ref_func_t *confirm_kill);
-void __percpu_ref_kill_expedited(struct percpu_ref *ref);
 
 /**
  * percpu_ref_kill - drop the initial ref
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index c6c31e2829b1..559ee0b20318 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -189,19 +189,3 @@ void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
 	call_rcu_sched(&ref->rcu, percpu_ref_kill_rcu);
 }
 EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm);
-
-/*
- * XXX: Temporary kludge to work around SCSI blk-mq stall.  Used only by
- * block/blk-mq.c::blk_mq_freeze_queue().  Will be removed during v3.18
- * devel cycle.  Do not use anywhere else.
- */
-void __percpu_ref_kill_expedited(struct percpu_ref *ref)
-{
-	WARN_ONCE(ref->pcpu_count_ptr & PCPU_REF_DEAD,
-		  "percpu_ref_kill() called more than once on %pf!",
-		  ref->release);
-
-	ref->pcpu_count_ptr |= PCPU_REF_DEAD;
-	synchronize_sched_expedited();
-	percpu_ref_kill_rcu(&ref->rcu);
-}
-- 
cgit v1.2.3


From a2237370194484ee6aeeff04b617e4b14d178966 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 24 Sep 2014 13:31:48 -0400
Subject: percpu_ref: relocate percpu_ref_reinit()

percpu_ref is gonna go through restructuring.  Move
percpu_ref_reinit() after percpu_ref_kill_and_confirm().  This will
make later changes easier to follow and result in cleaner
organization.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Kent Overstreet <kmo@daterainc.com>
---
 include/linux/percpu-refcount.h |  2 +-
 lib/percpu-refcount.c           | 70 ++++++++++++++++++++---------------------
 2 files changed, 36 insertions(+), 36 deletions(-)

(limited to 'lib')

diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index 5df6784bd9d2..f015f139d491 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -68,10 +68,10 @@ struct percpu_ref {
 
 int __must_check percpu_ref_init(struct percpu_ref *ref,
 				 percpu_ref_func_t *release, gfp_t gfp);
-void percpu_ref_reinit(struct percpu_ref *ref);
 void percpu_ref_exit(struct percpu_ref *ref);
 void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
 				 percpu_ref_func_t *confirm_kill);
+void percpu_ref_reinit(struct percpu_ref *ref);
 
 /**
  * percpu_ref_kill - drop the initial ref
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index 559ee0b20318..070dab5e7d77 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -62,41 +62,6 @@ int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release,
 }
 EXPORT_SYMBOL_GPL(percpu_ref_init);
 
-/**
- * percpu_ref_reinit - re-initialize a percpu refcount
- * @ref: perpcu_ref to re-initialize
- *
- * Re-initialize @ref so that it's in the same state as when it finished
- * percpu_ref_init().  @ref must have been initialized successfully, killed
- * and reached 0 but not exited.
- *
- * Note that percpu_ref_tryget[_live]() are safe to perform on @ref while
- * this function is in progress.
- */
-void percpu_ref_reinit(struct percpu_ref *ref)
-{
-	unsigned long __percpu *pcpu_count = pcpu_count_ptr(ref);
-	int cpu;
-
-	BUG_ON(!pcpu_count);
-	WARN_ON(!percpu_ref_is_zero(ref));
-
-	atomic_long_set(&ref->count, 1 + PCPU_COUNT_BIAS);
-
-	/*
-	 * Restore per-cpu operation.  smp_store_release() is paired with
-	 * smp_read_barrier_depends() in __pcpu_ref_alive() and guarantees
-	 * that the zeroing is visible to all percpu accesses which can see
-	 * the following PCPU_REF_DEAD clearing.
-	 */
-	for_each_possible_cpu(cpu)
-		*per_cpu_ptr(pcpu_count, cpu) = 0;
-
-	smp_store_release(&ref->pcpu_count_ptr,
-			  ref->pcpu_count_ptr & ~PCPU_REF_DEAD);
-}
-EXPORT_SYMBOL_GPL(percpu_ref_reinit);
-
 /**
  * percpu_ref_exit - undo percpu_ref_init()
  * @ref: percpu_ref to exit
@@ -189,3 +154,38 @@ void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
 	call_rcu_sched(&ref->rcu, percpu_ref_kill_rcu);
 }
 EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm);
+
+/**
+ * percpu_ref_reinit - re-initialize a percpu refcount
+ * @ref: perpcu_ref to re-initialize
+ *
+ * Re-initialize @ref so that it's in the same state as when it finished
+ * percpu_ref_init().  @ref must have been initialized successfully, killed
+ * and reached 0 but not exited.
+ *
+ * Note that percpu_ref_tryget[_live]() are safe to perform on @ref while
+ * this function is in progress.
+ */
+void percpu_ref_reinit(struct percpu_ref *ref)
+{
+	unsigned long __percpu *pcpu_count = pcpu_count_ptr(ref);
+	int cpu;
+
+	BUG_ON(!pcpu_count);
+	WARN_ON(!percpu_ref_is_zero(ref));
+
+	atomic_long_set(&ref->count, 1 + PCPU_COUNT_BIAS);
+
+	/*
+	 * Restore per-cpu operation.  smp_store_release() is paired with
+	 * smp_read_barrier_depends() in __pcpu_ref_alive() and guarantees
+	 * that the zeroing is visible to all percpu accesses which can see
+	 * the following PCPU_REF_DEAD clearing.
+	 */
+	for_each_possible_cpu(cpu)
+		*per_cpu_ptr(pcpu_count, cpu) = 0;
+
+	smp_store_release(&ref->pcpu_count_ptr,
+			  ref->pcpu_count_ptr & ~PCPU_REF_DEAD);
+}
+EXPORT_SYMBOL_GPL(percpu_ref_reinit);
-- 
cgit v1.2.3


From 6251f9976af7656b6970a8820153f356430f5de2 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 24 Sep 2014 13:31:48 -0400
Subject: percpu_ref: minor code and comment updates

* Some comments became stale.  Updated.
* percpu_ref_tryget() unnecessarily initializes @ret.  Removed.
* A blank line removed from percpu_ref_kill_rcu().
* Explicit function name in a WARN format string replaced with __func__.
* WARN_ON() in percpu_ref_reinit() converted to WARN_ON_ONCE().

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Kent Overstreet <kmo@daterainc.com>
---
 include/linux/percpu-refcount.h | 25 ++++++++++++++++---------
 lib/percpu-refcount.c           | 14 ++++++--------
 2 files changed, 22 insertions(+), 17 deletions(-)

(limited to 'lib')

diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index f015f139d491..d44b027f74fd 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -115,8 +115,10 @@ static inline bool __pcpu_ref_alive(struct percpu_ref *ref,
  * percpu_ref_get - increment a percpu refcount
  * @ref: percpu_ref to get
  *
- * Analagous to atomic_inc().
-  */
+ * Analagous to atomic_long_inc().
+ *
+ * This function is safe to call as long as @ref is between init and exit.
+ */
 static inline void percpu_ref_get(struct percpu_ref *ref)
 {
 	unsigned long __percpu *pcpu_count;
@@ -138,12 +140,12 @@ static inline void percpu_ref_get(struct percpu_ref *ref)
  * Increment a percpu refcount unless its count already reached zero.
  * Returns %true on success; %false on failure.
  *
- * The caller is responsible for ensuring that @ref stays accessible.
+ * This function is safe to call as long as @ref is between init and exit.
  */
 static inline bool percpu_ref_tryget(struct percpu_ref *ref)
 {
 	unsigned long __percpu *pcpu_count;
-	int ret = false;
+	int ret;
 
 	rcu_read_lock_sched();
 
@@ -166,12 +168,13 @@ static inline bool percpu_ref_tryget(struct percpu_ref *ref)
  * Increment a percpu refcount unless it has already been killed.  Returns
  * %true on success; %false on failure.
  *
- * Completion of percpu_ref_kill() in itself doesn't guarantee that tryget
- * will fail.  For such guarantee, percpu_ref_kill_and_confirm() should be
- * used.  After the confirm_kill callback is invoked, it's guaranteed that
- * no new reference will be given out by percpu_ref_tryget().
+ * Completion of percpu_ref_kill() in itself doesn't guarantee that this
+ * function will fail.  For such guarantee, percpu_ref_kill_and_confirm()
+ * should be used.  After the confirm_kill callback is invoked, it's
+ * guaranteed that no new reference will be given out by
+ * percpu_ref_tryget_live().
  *
- * The caller is responsible for ensuring that @ref stays accessible.
+ * This function is safe to call as long as @ref is between init and exit.
  */
 static inline bool percpu_ref_tryget_live(struct percpu_ref *ref)
 {
@@ -196,6 +199,8 @@ static inline bool percpu_ref_tryget_live(struct percpu_ref *ref)
  *
  * Decrement the refcount, and if 0, call the release function (which was passed
  * to percpu_ref_init())
+ *
+ * This function is safe to call as long as @ref is between init and exit.
  */
 static inline void percpu_ref_put(struct percpu_ref *ref)
 {
@@ -216,6 +221,8 @@ static inline void percpu_ref_put(struct percpu_ref *ref)
  * @ref: percpu_ref to test
  *
  * Returns %true if @ref reached zero.
+ *
+ * This function is safe to call as long as @ref is between init and exit.
  */
 static inline bool percpu_ref_is_zero(struct percpu_ref *ref)
 {
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index 070dab5e7d77..8ef3f5c20df6 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -108,7 +108,6 @@ static void percpu_ref_kill_rcu(struct rcu_head *rcu)
 	 * reaching 0 before we add the percpu counts. But doing it at the same
 	 * time is equivalent and saves us atomic operations:
 	 */
-
 	atomic_long_add((long)count - PCPU_COUNT_BIAS, &ref->count);
 
 	WARN_ONCE(atomic_long_read(&ref->count) <= 0,
@@ -120,8 +119,8 @@ static void percpu_ref_kill_rcu(struct rcu_head *rcu)
 		ref->confirm_kill(ref);
 
 	/*
-	 * Now we're in single atomic_t mode with a consistent refcount, so it's
-	 * safe to drop our initial ref:
+	 * Now we're in single atomic_long_t mode with a consistent
+	 * refcount, so it's safe to drop our initial ref:
 	 */
 	percpu_ref_put(ref);
 }
@@ -134,8 +133,8 @@ static void percpu_ref_kill_rcu(struct rcu_head *rcu)
  * Equivalent to percpu_ref_kill() but also schedules kill confirmation if
  * @confirm_kill is not NULL.  @confirm_kill, which may not block, will be
  * called after @ref is seen as dead from all CPUs - all further
- * invocations of percpu_ref_tryget() will fail.  See percpu_ref_tryget()
- * for more details.
+ * invocations of percpu_ref_tryget_live() will fail.  See
+ * percpu_ref_tryget_live() for more details.
  *
  * Due to the way percpu_ref is implemented, @confirm_kill will be called
  * after at least one full RCU grace period has passed but this is an
@@ -145,8 +144,7 @@ void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
 				 percpu_ref_func_t *confirm_kill)
 {
 	WARN_ONCE(ref->pcpu_count_ptr & PCPU_REF_DEAD,
-		  "percpu_ref_kill() called more than once on %pf!",
-		  ref->release);
+		  "%s called more than once on %pf!", __func__, ref->release);
 
 	ref->pcpu_count_ptr |= PCPU_REF_DEAD;
 	ref->confirm_kill = confirm_kill;
@@ -172,7 +170,7 @@ void percpu_ref_reinit(struct percpu_ref *ref)
 	int cpu;
 
 	BUG_ON(!pcpu_count);
-	WARN_ON(!percpu_ref_is_zero(ref));
+	WARN_ON_ONCE(!percpu_ref_is_zero(ref));
 
 	atomic_long_set(&ref->count, 1 + PCPU_COUNT_BIAS);
 
-- 
cgit v1.2.3


From eecc16ba9a49b05dd847a317af166a6728eb56ca Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 24 Sep 2014 13:31:48 -0400
Subject: percpu_ref: replace pcpu_ prefix with percpu_

percpu_ref uses pcpu_ prefix for internal stuff and percpu_ for
externally visible ones.  This is the same convention used in the
percpu allocator implementation.  It works fine there but percpu_ref
doesn't have too much internal-only stuff and scattered usages of
pcpu_ prefix are confusing than helpful.

This patch replaces all pcpu_ prefixes with percpu_.  This is pure
rename and there's no functional change.  Note that PCPU_REF_DEAD is
renamed to __PERCPU_REF_DEAD to signify that the flag is internal.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Kent Overstreet <kmo@daterainc.com>
---
 include/linux/percpu-refcount.h | 46 ++++++++++++++++-----------------
 lib/percpu-refcount.c           | 56 +++++++++++++++++++++--------------------
 2 files changed, 52 insertions(+), 50 deletions(-)

(limited to 'lib')

diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index d44b027f74fd..3d463a39e0f7 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -13,7 +13,7 @@
  *
  * The refcount will have a range of 0 to ((1U << 31) - 1), i.e. one bit less
  * than an atomic_t - this is because of the way shutdown works, see
- * percpu_ref_kill()/PCPU_COUNT_BIAS.
+ * percpu_ref_kill()/PERCPU_COUNT_BIAS.
  *
  * Before you call percpu_ref_kill(), percpu_ref_put() does not check for the
  * refcount hitting 0 - it can't, if it was in percpu mode. percpu_ref_kill()
@@ -60,7 +60,7 @@ struct percpu_ref {
 	 * The low bit of the pointer indicates whether the ref is in percpu
 	 * mode; if set, then get/put will manipulate the atomic_t.
 	 */
-	unsigned long		pcpu_count_ptr;
+	unsigned long		percpu_count_ptr;
 	percpu_ref_func_t	*release;
 	percpu_ref_func_t	*confirm_kill;
 	struct rcu_head		rcu;
@@ -88,26 +88,26 @@ static inline void percpu_ref_kill(struct percpu_ref *ref)
 	return percpu_ref_kill_and_confirm(ref, NULL);
 }
 
-#define PCPU_REF_DEAD		1
+#define __PERCPU_REF_DEAD	1
 
 /*
  * Internal helper.  Don't use outside percpu-refcount proper.  The
  * function doesn't return the pointer and let the caller test it for NULL
  * because doing so forces the compiler to generate two conditional
- * branches as it can't assume that @ref->pcpu_count is not NULL.
+ * branches as it can't assume that @ref->percpu_count is not NULL.
  */
-static inline bool __pcpu_ref_alive(struct percpu_ref *ref,
-				    unsigned long __percpu **pcpu_countp)
+static inline bool __percpu_ref_alive(struct percpu_ref *ref,
+				      unsigned long __percpu **percpu_countp)
 {
-	unsigned long pcpu_ptr = ACCESS_ONCE(ref->pcpu_count_ptr);
+	unsigned long percpu_ptr = ACCESS_ONCE(ref->percpu_count_ptr);
 
 	/* paired with smp_store_release() in percpu_ref_reinit() */
 	smp_read_barrier_depends();
 
-	if (unlikely(pcpu_ptr & PCPU_REF_DEAD))
+	if (unlikely(percpu_ptr & __PERCPU_REF_DEAD))
 		return false;
 
-	*pcpu_countp = (unsigned long __percpu *)pcpu_ptr;
+	*percpu_countp = (unsigned long __percpu *)percpu_ptr;
 	return true;
 }
 
@@ -121,12 +121,12 @@ static inline bool __pcpu_ref_alive(struct percpu_ref *ref,
  */
 static inline void percpu_ref_get(struct percpu_ref *ref)
 {
-	unsigned long __percpu *pcpu_count;
+	unsigned long __percpu *percpu_count;
 
 	rcu_read_lock_sched();
 
-	if (__pcpu_ref_alive(ref, &pcpu_count))
-		this_cpu_inc(*pcpu_count);
+	if (__percpu_ref_alive(ref, &percpu_count))
+		this_cpu_inc(*percpu_count);
 	else
 		atomic_long_inc(&ref->count);
 
@@ -144,13 +144,13 @@ static inline void percpu_ref_get(struct percpu_ref *ref)
  */
 static inline bool percpu_ref_tryget(struct percpu_ref *ref)
 {
-	unsigned long __percpu *pcpu_count;
+	unsigned long __percpu *percpu_count;
 	int ret;
 
 	rcu_read_lock_sched();
 
-	if (__pcpu_ref_alive(ref, &pcpu_count)) {
-		this_cpu_inc(*pcpu_count);
+	if (__percpu_ref_alive(ref, &percpu_count)) {
+		this_cpu_inc(*percpu_count);
 		ret = true;
 	} else {
 		ret = atomic_long_inc_not_zero(&ref->count);
@@ -178,13 +178,13 @@ static inline bool percpu_ref_tryget(struct percpu_ref *ref)
  */
 static inline bool percpu_ref_tryget_live(struct percpu_ref *ref)
 {
-	unsigned long __percpu *pcpu_count;
+	unsigned long __percpu *percpu_count;
 	int ret = false;
 
 	rcu_read_lock_sched();
 
-	if (__pcpu_ref_alive(ref, &pcpu_count)) {
-		this_cpu_inc(*pcpu_count);
+	if (__percpu_ref_alive(ref, &percpu_count)) {
+		this_cpu_inc(*percpu_count);
 		ret = true;
 	}
 
@@ -204,12 +204,12 @@ static inline bool percpu_ref_tryget_live(struct percpu_ref *ref)
  */
 static inline void percpu_ref_put(struct percpu_ref *ref)
 {
-	unsigned long __percpu *pcpu_count;
+	unsigned long __percpu *percpu_count;
 
 	rcu_read_lock_sched();
 
-	if (__pcpu_ref_alive(ref, &pcpu_count))
-		this_cpu_dec(*pcpu_count);
+	if (__percpu_ref_alive(ref, &percpu_count))
+		this_cpu_dec(*percpu_count);
 	else if (unlikely(atomic_long_dec_and_test(&ref->count)))
 		ref->release(ref);
 
@@ -226,9 +226,9 @@ static inline void percpu_ref_put(struct percpu_ref *ref)
  */
 static inline bool percpu_ref_is_zero(struct percpu_ref *ref)
 {
-	unsigned long __percpu *pcpu_count;
+	unsigned long __percpu *percpu_count;
 
-	if (__pcpu_ref_alive(ref, &pcpu_count))
+	if (__percpu_ref_alive(ref, &percpu_count))
 		return false;
 	return !atomic_long_read(&ref->count);
 }
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index 8ef3f5c20df6..5aea6b7356c7 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -11,8 +11,8 @@
  * percpu counters will all sum to the correct value
  *
  * (More precisely: because moduler arithmatic is commutative the sum of all the
- * pcpu_count vars will be equal to what it would have been if all the gets and
- * puts were done to a single integer, even if some of the percpu integers
+ * percpu_count vars will be equal to what it would have been if all the gets
+ * and puts were done to a single integer, even if some of the percpu integers
  * overflow or underflow).
  *
  * The real trick to implementing percpu refcounts is shutdown. We can't detect
@@ -29,11 +29,12 @@
  * atomic_long_t can't hit 0 before we've added up all the percpu refs.
  */
 
-#define PCPU_COUNT_BIAS		(1LU << (BITS_PER_LONG - 1))
+#define PERCPU_COUNT_BIAS	(1LU << (BITS_PER_LONG - 1))
 
-static unsigned long __percpu *pcpu_count_ptr(struct percpu_ref *ref)
+static unsigned long __percpu *percpu_count_ptr(struct percpu_ref *ref)
 {
-	return (unsigned long __percpu *)(ref->pcpu_count_ptr & ~PCPU_REF_DEAD);
+	return (unsigned long __percpu *)
+		(ref->percpu_count_ptr & ~__PERCPU_REF_DEAD);
 }
 
 /**
@@ -51,10 +52,11 @@ static unsigned long __percpu *pcpu_count_ptr(struct percpu_ref *ref)
 int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release,
 		    gfp_t gfp)
 {
-	atomic_long_set(&ref->count, 1 + PCPU_COUNT_BIAS);
+	atomic_long_set(&ref->count, 1 + PERCPU_COUNT_BIAS);
 
-	ref->pcpu_count_ptr = (unsigned long)alloc_percpu_gfp(unsigned long, gfp);
-	if (!ref->pcpu_count_ptr)
+	ref->percpu_count_ptr =
+		(unsigned long)alloc_percpu_gfp(unsigned long, gfp);
+	if (!ref->percpu_count_ptr)
 		return -ENOMEM;
 
 	ref->release = release;
@@ -74,11 +76,11 @@ EXPORT_SYMBOL_GPL(percpu_ref_init);
  */
 void percpu_ref_exit(struct percpu_ref *ref)
 {
-	unsigned long __percpu *pcpu_count = pcpu_count_ptr(ref);
+	unsigned long __percpu *percpu_count = percpu_count_ptr(ref);
 
-	if (pcpu_count) {
-		free_percpu(pcpu_count);
-		ref->pcpu_count_ptr = PCPU_REF_DEAD;
+	if (percpu_count) {
+		free_percpu(percpu_count);
+		ref->percpu_count_ptr = __PERCPU_REF_DEAD;
 	}
 }
 EXPORT_SYMBOL_GPL(percpu_ref_exit);
@@ -86,14 +88,14 @@ EXPORT_SYMBOL_GPL(percpu_ref_exit);
 static void percpu_ref_kill_rcu(struct rcu_head *rcu)
 {
 	struct percpu_ref *ref = container_of(rcu, struct percpu_ref, rcu);
-	unsigned long __percpu *pcpu_count = pcpu_count_ptr(ref);
+	unsigned long __percpu *percpu_count = percpu_count_ptr(ref);
 	unsigned long count = 0;
 	int cpu;
 
 	for_each_possible_cpu(cpu)
-		count += *per_cpu_ptr(pcpu_count, cpu);
+		count += *per_cpu_ptr(percpu_count, cpu);
 
-	pr_debug("global %ld pcpu %ld",
+	pr_debug("global %ld percpu %ld",
 		 atomic_long_read(&ref->count), (long)count);
 
 	/*
@@ -108,7 +110,7 @@ static void percpu_ref_kill_rcu(struct rcu_head *rcu)
 	 * reaching 0 before we add the percpu counts. But doing it at the same
 	 * time is equivalent and saves us atomic operations:
 	 */
-	atomic_long_add((long)count - PCPU_COUNT_BIAS, &ref->count);
+	atomic_long_add((long)count - PERCPU_COUNT_BIAS, &ref->count);
 
 	WARN_ONCE(atomic_long_read(&ref->count) <= 0,
 		  "percpu ref (%pf) <= 0 (%ld) after killed",
@@ -143,10 +145,10 @@ static void percpu_ref_kill_rcu(struct rcu_head *rcu)
 void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
 				 percpu_ref_func_t *confirm_kill)
 {
-	WARN_ONCE(ref->pcpu_count_ptr & PCPU_REF_DEAD,
+	WARN_ONCE(ref->percpu_count_ptr & __PERCPU_REF_DEAD,
 		  "%s called more than once on %pf!", __func__, ref->release);
 
-	ref->pcpu_count_ptr |= PCPU_REF_DEAD;
+	ref->percpu_count_ptr |= __PERCPU_REF_DEAD;
 	ref->confirm_kill = confirm_kill;
 
 	call_rcu_sched(&ref->rcu, percpu_ref_kill_rcu);
@@ -166,24 +168,24 @@ EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm);
  */
 void percpu_ref_reinit(struct percpu_ref *ref)
 {
-	unsigned long __percpu *pcpu_count = pcpu_count_ptr(ref);
+	unsigned long __percpu *percpu_count = percpu_count_ptr(ref);
 	int cpu;
 
-	BUG_ON(!pcpu_count);
+	BUG_ON(!percpu_count);
 	WARN_ON_ONCE(!percpu_ref_is_zero(ref));
 
-	atomic_long_set(&ref->count, 1 + PCPU_COUNT_BIAS);
+	atomic_long_set(&ref->count, 1 + PERCPU_COUNT_BIAS);
 
 	/*
 	 * Restore per-cpu operation.  smp_store_release() is paired with
-	 * smp_read_barrier_depends() in __pcpu_ref_alive() and guarantees
-	 * that the zeroing is visible to all percpu accesses which can see
-	 * the following PCPU_REF_DEAD clearing.
+	 * smp_read_barrier_depends() in __percpu_ref_alive() and
+	 * guarantees that the zeroing is visible to all percpu accesses
+	 * which can see the following __PERCPU_REF_DEAD clearing.
 	 */
 	for_each_possible_cpu(cpu)
-		*per_cpu_ptr(pcpu_count, cpu) = 0;
+		*per_cpu_ptr(percpu_count, cpu) = 0;
 
-	smp_store_release(&ref->pcpu_count_ptr,
-			  ref->pcpu_count_ptr & ~PCPU_REF_DEAD);
+	smp_store_release(&ref->percpu_count_ptr,
+			  ref->percpu_count_ptr & ~__PERCPU_REF_DEAD);
 }
 EXPORT_SYMBOL_GPL(percpu_ref_reinit);
-- 
cgit v1.2.3


From 9e804d1f58da1eca079f796347c1cf1d1df564e2 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 24 Sep 2014 13:31:48 -0400
Subject: percpu_ref: rename things to prepare for decoupling percpu/atomic
 mode switch

percpu_ref will be restructured so that percpu/atomic mode switching
and reference killing are dedoupled.  In preparation, do the following
renames.

* percpu_ref->confirm_kill	-> percpu_ref->confirm_switch
* __PERCPU_REF_DEAD		-> __PERCPU_REF_ATOMIC
* __percpu_ref_alive()		-> __ref_is_percpu()

This patch is pure rename and doesn't introduce any functional
changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Kent Overstreet <kmo@daterainc.com>
---
 include/linux/percpu-refcount.h | 25 ++++++++++++++-----------
 lib/percpu-refcount.c           | 22 +++++++++++-----------
 2 files changed, 25 insertions(+), 22 deletions(-)

(limited to 'lib')

diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index 3d463a39e0f7..910e5f72055d 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -54,6 +54,11 @@
 struct percpu_ref;
 typedef void (percpu_ref_func_t)(struct percpu_ref *);
 
+/* flags set in the lower bits of percpu_ref->percpu_count_ptr */
+enum {
+	__PERCPU_REF_ATOMIC	= 1LU << 0,	/* operating in atomic mode */
+};
+
 struct percpu_ref {
 	atomic_long_t		count;
 	/*
@@ -62,7 +67,7 @@ struct percpu_ref {
 	 */
 	unsigned long		percpu_count_ptr;
 	percpu_ref_func_t	*release;
-	percpu_ref_func_t	*confirm_kill;
+	percpu_ref_func_t	*confirm_switch;
 	struct rcu_head		rcu;
 };
 
@@ -88,23 +93,21 @@ static inline void percpu_ref_kill(struct percpu_ref *ref)
 	return percpu_ref_kill_and_confirm(ref, NULL);
 }
 
-#define __PERCPU_REF_DEAD	1
-
 /*
  * Internal helper.  Don't use outside percpu-refcount proper.  The
  * function doesn't return the pointer and let the caller test it for NULL
  * because doing so forces the compiler to generate two conditional
  * branches as it can't assume that @ref->percpu_count is not NULL.
  */
-static inline bool __percpu_ref_alive(struct percpu_ref *ref,
-				      unsigned long __percpu **percpu_countp)
+static inline bool __ref_is_percpu(struct percpu_ref *ref,
+					  unsigned long __percpu **percpu_countp)
 {
 	unsigned long percpu_ptr = ACCESS_ONCE(ref->percpu_count_ptr);
 
 	/* paired with smp_store_release() in percpu_ref_reinit() */
 	smp_read_barrier_depends();
 
-	if (unlikely(percpu_ptr & __PERCPU_REF_DEAD))
+	if (unlikely(percpu_ptr & __PERCPU_REF_ATOMIC))
 		return false;
 
 	*percpu_countp = (unsigned long __percpu *)percpu_ptr;
@@ -125,7 +128,7 @@ static inline void percpu_ref_get(struct percpu_ref *ref)
 
 	rcu_read_lock_sched();
 
-	if (__percpu_ref_alive(ref, &percpu_count))
+	if (__ref_is_percpu(ref, &percpu_count))
 		this_cpu_inc(*percpu_count);
 	else
 		atomic_long_inc(&ref->count);
@@ -149,7 +152,7 @@ static inline bool percpu_ref_tryget(struct percpu_ref *ref)
 
 	rcu_read_lock_sched();
 
-	if (__percpu_ref_alive(ref, &percpu_count)) {
+	if (__ref_is_percpu(ref, &percpu_count)) {
 		this_cpu_inc(*percpu_count);
 		ret = true;
 	} else {
@@ -183,7 +186,7 @@ static inline bool percpu_ref_tryget_live(struct percpu_ref *ref)
 
 	rcu_read_lock_sched();
 
-	if (__percpu_ref_alive(ref, &percpu_count)) {
+	if (__ref_is_percpu(ref, &percpu_count)) {
 		this_cpu_inc(*percpu_count);
 		ret = true;
 	}
@@ -208,7 +211,7 @@ static inline void percpu_ref_put(struct percpu_ref *ref)
 
 	rcu_read_lock_sched();
 
-	if (__percpu_ref_alive(ref, &percpu_count))
+	if (__ref_is_percpu(ref, &percpu_count))
 		this_cpu_dec(*percpu_count);
 	else if (unlikely(atomic_long_dec_and_test(&ref->count)))
 		ref->release(ref);
@@ -228,7 +231,7 @@ static inline bool percpu_ref_is_zero(struct percpu_ref *ref)
 {
 	unsigned long __percpu *percpu_count;
 
-	if (__percpu_ref_alive(ref, &percpu_count))
+	if (__ref_is_percpu(ref, &percpu_count))
 		return false;
 	return !atomic_long_read(&ref->count);
 }
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index 5aea6b7356c7..7aef590c1ef8 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -34,7 +34,7 @@
 static unsigned long __percpu *percpu_count_ptr(struct percpu_ref *ref)
 {
 	return (unsigned long __percpu *)
-		(ref->percpu_count_ptr & ~__PERCPU_REF_DEAD);
+		(ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC);
 }
 
 /**
@@ -80,7 +80,7 @@ void percpu_ref_exit(struct percpu_ref *ref)
 
 	if (percpu_count) {
 		free_percpu(percpu_count);
-		ref->percpu_count_ptr = __PERCPU_REF_DEAD;
+		ref->percpu_count_ptr = __PERCPU_REF_ATOMIC;
 	}
 }
 EXPORT_SYMBOL_GPL(percpu_ref_exit);
@@ -117,8 +117,8 @@ static void percpu_ref_kill_rcu(struct rcu_head *rcu)
 		  ref->release, atomic_long_read(&ref->count));
 
 	/* @ref is viewed as dead on all CPUs, send out kill confirmation */
-	if (ref->confirm_kill)
-		ref->confirm_kill(ref);
+	if (ref->confirm_switch)
+		ref->confirm_switch(ref);
 
 	/*
 	 * Now we're in single atomic_long_t mode with a consistent
@@ -145,11 +145,11 @@ static void percpu_ref_kill_rcu(struct rcu_head *rcu)
 void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
 				 percpu_ref_func_t *confirm_kill)
 {
-	WARN_ONCE(ref->percpu_count_ptr & __PERCPU_REF_DEAD,
+	WARN_ONCE(ref->percpu_count_ptr & __PERCPU_REF_ATOMIC,
 		  "%s called more than once on %pf!", __func__, ref->release);
 
-	ref->percpu_count_ptr |= __PERCPU_REF_DEAD;
-	ref->confirm_kill = confirm_kill;
+	ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC;
+	ref->confirm_switch = confirm_kill;
 
 	call_rcu_sched(&ref->rcu, percpu_ref_kill_rcu);
 }
@@ -178,14 +178,14 @@ void percpu_ref_reinit(struct percpu_ref *ref)
 
 	/*
 	 * Restore per-cpu operation.  smp_store_release() is paired with
-	 * smp_read_barrier_depends() in __percpu_ref_alive() and
-	 * guarantees that the zeroing is visible to all percpu accesses
-	 * which can see the following __PERCPU_REF_DEAD clearing.
+	 * smp_read_barrier_depends() in __ref_is_percpu() and guarantees
+	 * that the zeroing is visible to all percpu accesses which can see
+	 * the following __PERCPU_REF_ATOMIC clearing.
 	 */
 	for_each_possible_cpu(cpu)
 		*per_cpu_ptr(percpu_count, cpu) = 0;
 
 	smp_store_release(&ref->percpu_count_ptr,
-			  ref->percpu_count_ptr & ~__PERCPU_REF_DEAD);
+			  ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC);
 }
 EXPORT_SYMBOL_GPL(percpu_ref_reinit);
-- 
cgit v1.2.3


From 27344a9017cdaff82a167827da3001a0918afdc3 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 24 Sep 2014 13:31:49 -0400
Subject: percpu_ref: add PCPU_REF_DEAD

percpu_ref will be restructured so that percpu/atomic mode switching
and reference killing are dedoupled.  In preparation, add
PCPU_REF_DEAD and PCPU_REF_ATOMIC_DEAD which is OR of ATOMIC and DEAD.
For now, ATOMIC and DEAD are changed together and all PCPU_REF_ATOMIC
uses are converted to PCPU_REF_ATOMIC_DEAD without causing any
behavior changes.

percpu_ref_init() now specifies an explicit alignment when allocating
the percpu counters so that the pointer has enough unused low bits to
accomodate the flags.  Note that one flag was fine as min alignment
for percpu memory is 2 bytes but two flags are already too many for
the natural alignment of unsigned longs on archs like cris and m68k.

v2: The original patch had BUILD_BUG_ON() which triggers if unsigned
    long's alignment isn't enough to accomodate the flags, which
    triggered on cris and m64k.  percpu_ref_init() updated to specify
    the required alignment explicitly.  Reported by Fengguang.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Kent Overstreet <kmo@daterainc.com>
Cc: kbuild test robot <fengguang.wu@intel.com>
---
 include/linux/percpu-refcount.h |  6 +++++-
 lib/percpu-refcount.c           | 19 +++++++++++--------
 2 files changed, 16 insertions(+), 9 deletions(-)

(limited to 'lib')

diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index 910e5f72055d..bd9483d390b4 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -57,6 +57,10 @@ typedef void (percpu_ref_func_t)(struct percpu_ref *);
 /* flags set in the lower bits of percpu_ref->percpu_count_ptr */
 enum {
 	__PERCPU_REF_ATOMIC	= 1LU << 0,	/* operating in atomic mode */
+	__PERCPU_REF_DEAD	= 1LU << 1,	/* (being) killed */
+	__PERCPU_REF_ATOMIC_DEAD = __PERCPU_REF_ATOMIC | __PERCPU_REF_DEAD,
+
+	__PERCPU_REF_FLAG_BITS	= 2,
 };
 
 struct percpu_ref {
@@ -107,7 +111,7 @@ static inline bool __ref_is_percpu(struct percpu_ref *ref,
 	/* paired with smp_store_release() in percpu_ref_reinit() */
 	smp_read_barrier_depends();
 
-	if (unlikely(percpu_ptr & __PERCPU_REF_ATOMIC))
+	if (unlikely(percpu_ptr & __PERCPU_REF_ATOMIC_DEAD))
 		return false;
 
 	*percpu_countp = (unsigned long __percpu *)percpu_ptr;
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index 7aef590c1ef8..e2ff19f970cf 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -34,7 +34,7 @@
 static unsigned long __percpu *percpu_count_ptr(struct percpu_ref *ref)
 {
 	return (unsigned long __percpu *)
-		(ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC);
+		(ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC_DEAD);
 }
 
 /**
@@ -52,10 +52,13 @@ static unsigned long __percpu *percpu_count_ptr(struct percpu_ref *ref)
 int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release,
 		    gfp_t gfp)
 {
+	size_t align = max_t(size_t, 1 << __PERCPU_REF_FLAG_BITS,
+			     __alignof__(unsigned long));
+
 	atomic_long_set(&ref->count, 1 + PERCPU_COUNT_BIAS);
 
-	ref->percpu_count_ptr =
-		(unsigned long)alloc_percpu_gfp(unsigned long, gfp);
+	ref->percpu_count_ptr = (unsigned long)
+		__alloc_percpu_gfp(sizeof(unsigned long), align, gfp);
 	if (!ref->percpu_count_ptr)
 		return -ENOMEM;
 
@@ -80,7 +83,7 @@ void percpu_ref_exit(struct percpu_ref *ref)
 
 	if (percpu_count) {
 		free_percpu(percpu_count);
-		ref->percpu_count_ptr = __PERCPU_REF_ATOMIC;
+		ref->percpu_count_ptr = __PERCPU_REF_ATOMIC_DEAD;
 	}
 }
 EXPORT_SYMBOL_GPL(percpu_ref_exit);
@@ -145,10 +148,10 @@ static void percpu_ref_kill_rcu(struct rcu_head *rcu)
 void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
 				 percpu_ref_func_t *confirm_kill)
 {
-	WARN_ONCE(ref->percpu_count_ptr & __PERCPU_REF_ATOMIC,
+	WARN_ONCE(ref->percpu_count_ptr & __PERCPU_REF_ATOMIC_DEAD,
 		  "%s called more than once on %pf!", __func__, ref->release);
 
-	ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC;
+	ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC_DEAD;
 	ref->confirm_switch = confirm_kill;
 
 	call_rcu_sched(&ref->rcu, percpu_ref_kill_rcu);
@@ -180,12 +183,12 @@ void percpu_ref_reinit(struct percpu_ref *ref)
 	 * Restore per-cpu operation.  smp_store_release() is paired with
 	 * smp_read_barrier_depends() in __ref_is_percpu() and guarantees
 	 * that the zeroing is visible to all percpu accesses which can see
-	 * the following __PERCPU_REF_ATOMIC clearing.
+	 * the following __PERCPU_REF_ATOMIC_DEAD clearing.
 	 */
 	for_each_possible_cpu(cpu)
 		*per_cpu_ptr(percpu_count, cpu) = 0;
 
 	smp_store_release(&ref->percpu_count_ptr,
-			  ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC);
+			  ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC_DEAD);
 }
 EXPORT_SYMBOL_GPL(percpu_ref_reinit);
-- 
cgit v1.2.3


From 490c79a65708873228cf114cf00e32c204e4e907 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 24 Sep 2014 13:31:49 -0400
Subject: percpu_ref: decouple switching to atomic mode and killing

percpu_ref has treated the dropping of the base reference and
switching to atomic mode as an integral operation; however, there's
nothing inherent tying the two together.

The use cases for percpu_ref have been expanding continuously.  While
the current init/kill/reinit/exit model can cover a lot, the coupling
of kill/reinit with atomic/percpu mode switching is turning out to be
too restrictive for use cases where many percpu_refs are created and
destroyed back-to-back with only some of them reaching extended
operation.  The coupling also makes implementing always-atomic debug
mode difficult.

This patch separates out atomic mode switching into
percpu_ref_switch_to_atomic() and reimplements
percpu_ref_kill_and_confirm() on top of it.

* The handling of __PERCPU_REF_ATOMIC and __PERCPU_REF_DEAD is now
  differentiated.  Among get/put operations, percpu_ref_tryget_live()
  is the only one which cares about DEAD.

* percpu_ref_switch_to_atomic() can be called multiple times on the
  same ref.  This means that multiple @confirm_switch may get queued
  up which we can't do reliably without extra memory area.  This is
  handled by making the later invocation synchronously wait for the
  completion of the previous one.  This isn't particularly desirable
  but such synchronous waits shouldn't happen in most cases.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Kent Overstreet <kmo@daterainc.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
---
 include/linux/percpu-refcount.h |   8 ++-
 lib/percpu-refcount.c           | 141 +++++++++++++++++++++++++++++++---------
 2 files changed, 116 insertions(+), 33 deletions(-)

(limited to 'lib')

diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index bd9483d390b4..d1252e1335e8 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -78,9 +78,11 @@ struct percpu_ref {
 int __must_check percpu_ref_init(struct percpu_ref *ref,
 				 percpu_ref_func_t *release, gfp_t gfp);
 void percpu_ref_exit(struct percpu_ref *ref);
+void percpu_ref_switch_to_atomic(struct percpu_ref *ref,
+				 percpu_ref_func_t *confirm_switch);
+void percpu_ref_reinit(struct percpu_ref *ref);
 void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
 				 percpu_ref_func_t *confirm_kill);
-void percpu_ref_reinit(struct percpu_ref *ref);
 
 /**
  * percpu_ref_kill - drop the initial ref
@@ -111,7 +113,7 @@ static inline bool __ref_is_percpu(struct percpu_ref *ref,
 	/* paired with smp_store_release() in percpu_ref_reinit() */
 	smp_read_barrier_depends();
 
-	if (unlikely(percpu_ptr & __PERCPU_REF_ATOMIC_DEAD))
+	if (unlikely(percpu_ptr & __PERCPU_REF_ATOMIC))
 		return false;
 
 	*percpu_countp = (unsigned long __percpu *)percpu_ptr;
@@ -193,6 +195,8 @@ static inline bool percpu_ref_tryget_live(struct percpu_ref *ref)
 	if (__ref_is_percpu(ref, &percpu_count)) {
 		this_cpu_inc(*percpu_count);
 		ret = true;
+	} else if (!(ACCESS_ONCE(ref->percpu_count_ptr) & __PERCPU_REF_DEAD)) {
+		ret = atomic_long_inc_not_zero(&ref->count);
 	}
 
 	rcu_read_unlock_sched();
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index e2ff19f970cf..6e0d14366c5d 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -1,6 +1,8 @@
 #define pr_fmt(fmt) "%s: " fmt "\n", __func__
 
 #include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
 #include <linux/percpu-refcount.h>
 
 /*
@@ -31,6 +33,8 @@
 
 #define PERCPU_COUNT_BIAS	(1LU << (BITS_PER_LONG - 1))
 
+static DECLARE_WAIT_QUEUE_HEAD(percpu_ref_switch_waitq);
+
 static unsigned long __percpu *percpu_count_ptr(struct percpu_ref *ref)
 {
 	return (unsigned long __percpu *)
@@ -88,7 +92,19 @@ void percpu_ref_exit(struct percpu_ref *ref)
 }
 EXPORT_SYMBOL_GPL(percpu_ref_exit);
 
-static void percpu_ref_kill_rcu(struct rcu_head *rcu)
+static void percpu_ref_call_confirm_rcu(struct rcu_head *rcu)
+{
+	struct percpu_ref *ref = container_of(rcu, struct percpu_ref, rcu);
+
+	ref->confirm_switch(ref);
+	ref->confirm_switch = NULL;
+	wake_up_all(&percpu_ref_switch_waitq);
+
+	/* drop ref from percpu_ref_switch_to_atomic() */
+	percpu_ref_put(ref);
+}
+
+static void percpu_ref_switch_to_atomic_rcu(struct rcu_head *rcu)
 {
 	struct percpu_ref *ref = container_of(rcu, struct percpu_ref, rcu);
 	unsigned long __percpu *percpu_count = percpu_count_ptr(ref);
@@ -116,47 +132,79 @@ static void percpu_ref_kill_rcu(struct rcu_head *rcu)
 	atomic_long_add((long)count - PERCPU_COUNT_BIAS, &ref->count);
 
 	WARN_ONCE(atomic_long_read(&ref->count) <= 0,
-		  "percpu ref (%pf) <= 0 (%ld) after killed",
+		  "percpu ref (%pf) <= 0 (%ld) after switching to atomic",
 		  ref->release, atomic_long_read(&ref->count));
 
-	/* @ref is viewed as dead on all CPUs, send out kill confirmation */
-	if (ref->confirm_switch)
-		ref->confirm_switch(ref);
+	/* @ref is viewed as dead on all CPUs, send out switch confirmation */
+	percpu_ref_call_confirm_rcu(rcu);
+}
 
-	/*
-	 * Now we're in single atomic_long_t mode with a consistent
-	 * refcount, so it's safe to drop our initial ref:
-	 */
-	percpu_ref_put(ref);
+static void percpu_ref_noop_confirm_switch(struct percpu_ref *ref)
+{
+}
+
+static void __percpu_ref_switch_to_atomic(struct percpu_ref *ref,
+					  percpu_ref_func_t *confirm_switch)
+{
+	if (!(ref->percpu_count_ptr & __PERCPU_REF_ATOMIC)) {
+		/* switching from percpu to atomic */
+		ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC;
+
+		/*
+		 * Non-NULL ->confirm_switch is used to indicate that
+		 * switching is in progress.  Use noop one if unspecified.
+		 */
+		WARN_ON_ONCE(ref->confirm_switch);
+		ref->confirm_switch =
+			confirm_switch ?: percpu_ref_noop_confirm_switch;
+
+		percpu_ref_get(ref);	/* put after confirmation */
+		call_rcu_sched(&ref->rcu, percpu_ref_switch_to_atomic_rcu);
+	} else if (confirm_switch) {
+		/*
+		 * Somebody already set ATOMIC.  Switching may still be in
+		 * progress.  @confirm_switch must be invoked after the
+		 * switching is complete and a full sched RCU grace period
+		 * has passed.  Wait synchronously for the previous
+		 * switching and schedule @confirm_switch invocation.
+		 */
+		wait_event(percpu_ref_switch_waitq, !ref->confirm_switch);
+		ref->confirm_switch = confirm_switch;
+
+		percpu_ref_get(ref);	/* put after confirmation */
+		call_rcu_sched(&ref->rcu, percpu_ref_call_confirm_rcu);
+	}
 }
 
 /**
- * percpu_ref_kill_and_confirm - drop the initial ref and schedule confirmation
- * @ref: percpu_ref to kill
- * @confirm_kill: optional confirmation callback
+ * percpu_ref_switch_to_atomic - switch a percpu_ref to atomic mode
+ * @ref: percpu_ref to switch to atomic mode
+ * @confirm_switch: optional confirmation callback
  *
- * Equivalent to percpu_ref_kill() but also schedules kill confirmation if
- * @confirm_kill is not NULL.  @confirm_kill, which may not block, will be
- * called after @ref is seen as dead from all CPUs - all further
- * invocations of percpu_ref_tryget_live() will fail.  See
- * percpu_ref_tryget_live() for more details.
+ * There's no reason to use this function for the usual reference counting.
+ * Use percpu_ref_kill[_and_confirm]().
+ *
+ * Schedule switching of @ref to atomic mode.  All its percpu counts will
+ * be collected to the main atomic counter.  On completion, when all CPUs
+ * are guaraneed to be in atomic mode, @confirm_switch, which may not
+ * block, is invoked.  This function may be invoked concurrently with all
+ * the get/put operations and can safely be mixed with kill and reinit
+ * operations.
  *
- * Due to the way percpu_ref is implemented, @confirm_kill will be called
- * after at least one full RCU grace period has passed but this is an
- * implementation detail and callers must not depend on it.
+ * This function normally doesn't block and can be called from any context
+ * but it may block if @confirm_kill is specified and @ref is already in
+ * the process of switching to atomic mode.  In such cases, @confirm_switch
+ * will be invoked after the switching is complete.
+ *
+ * Due to the way percpu_ref is implemented, @confirm_switch will be called
+ * after at least one full sched RCU grace period has passed but this is an
+ * implementation detail and must not be depended upon.
  */
-void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
-				 percpu_ref_func_t *confirm_kill)
+void percpu_ref_switch_to_atomic(struct percpu_ref *ref,
+				 percpu_ref_func_t *confirm_switch)
 {
-	WARN_ONCE(ref->percpu_count_ptr & __PERCPU_REF_ATOMIC_DEAD,
-		  "%s called more than once on %pf!", __func__, ref->release);
-
-	ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC_DEAD;
-	ref->confirm_switch = confirm_kill;
-
-	call_rcu_sched(&ref->rcu, percpu_ref_kill_rcu);
+	__percpu_ref_switch_to_atomic(ref, confirm_switch);
 }
-EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm);
 
 /**
  * percpu_ref_reinit - re-initialize a percpu refcount
@@ -192,3 +240,34 @@ void percpu_ref_reinit(struct percpu_ref *ref)
 			  ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC_DEAD);
 }
 EXPORT_SYMBOL_GPL(percpu_ref_reinit);
+
+/**
+ * percpu_ref_kill_and_confirm - drop the initial ref and schedule confirmation
+ * @ref: percpu_ref to kill
+ * @confirm_kill: optional confirmation callback
+ *
+ * Equivalent to percpu_ref_kill() but also schedules kill confirmation if
+ * @confirm_kill is not NULL.  @confirm_kill, which may not block, will be
+ * called after @ref is seen as dead from all CPUs at which point all
+ * further invocations of percpu_ref_tryget_live() will fail.  See
+ * percpu_ref_tryget_live() for details.
+ *
+ * This function normally doesn't block and can be called from any context
+ * but it may block if @confirm_kill is specified and @ref is already in
+ * the process of switching to atomic mode by percpu_ref_switch_atomic().
+ *
+ * Due to the way percpu_ref is implemented, @confirm_switch will be called
+ * after at least one full sched RCU grace period has passed but this is an
+ * implementation detail and must not be depended upon.
+ */
+void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
+				 percpu_ref_func_t *confirm_kill)
+{
+	WARN_ONCE(ref->percpu_count_ptr & __PERCPU_REF_DEAD,
+		  "%s called more than once on %pf!", __func__, ref->release);
+
+	ref->percpu_count_ptr |= __PERCPU_REF_DEAD;
+	__percpu_ref_switch_to_atomic(ref, confirm_kill);
+	percpu_ref_put(ref);
+}
+EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm);
-- 
cgit v1.2.3


From f47ad45784611297b699f3dffb6c7222b76afe64 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 24 Sep 2014 13:31:49 -0400
Subject: percpu_ref: decouple switching to percpu mode and reinit

percpu_ref has treated the dropping of the base reference and
switching to atomic mode as an integral operation; however, there's
nothing inherent tying the two together.

The use cases for percpu_ref have been expanding continuously.  While
the current init/kill/reinit/exit model can cover a lot, the coupling
of kill/reinit with atomic/percpu mode switching is turning out to be
too restrictive for use cases where many percpu_refs are created and
destroyed back-to-back with only some of them reaching extended
operation.  The coupling also makes implementing always-atomic debug
mode difficult.

This patch separates out percpu mode switching into
percpu_ref_switch_to_percpu() and reimplements percpu_ref_reinit() on
top of it.

* DEAD still requires ATOMIC.  A dead ref can't be switched to percpu
  mode w/o going through reinit.

v2: __percpu_ref_switch_to_percpu() was missing static.  Fixed.
    Reported by Fengguang aka kbuild test robot.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Kent Overstreet <kmo@daterainc.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: kbuild test robot <fengguang.wu@intel.com>
---
 include/linux/percpu-refcount.h |  3 +-
 lib/percpu-refcount.c           | 73 ++++++++++++++++++++++++++++++-----------
 2 files changed, 56 insertions(+), 20 deletions(-)

(limited to 'lib')

diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index d1252e1335e8..cd7e20f0fe47 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -80,9 +80,10 @@ int __must_check percpu_ref_init(struct percpu_ref *ref,
 void percpu_ref_exit(struct percpu_ref *ref);
 void percpu_ref_switch_to_atomic(struct percpu_ref *ref,
 				 percpu_ref_func_t *confirm_switch);
-void percpu_ref_reinit(struct percpu_ref *ref);
+void percpu_ref_switch_to_percpu(struct percpu_ref *ref);
 void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
 				 percpu_ref_func_t *confirm_kill);
+void percpu_ref_reinit(struct percpu_ref *ref);
 
 /**
  * percpu_ref_kill - drop the initial ref
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index 6e0d14366c5d..5a6d43baccc5 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -206,40 +206,54 @@ void percpu_ref_switch_to_atomic(struct percpu_ref *ref,
 	__percpu_ref_switch_to_atomic(ref, confirm_switch);
 }
 
-/**
- * percpu_ref_reinit - re-initialize a percpu refcount
- * @ref: perpcu_ref to re-initialize
- *
- * Re-initialize @ref so that it's in the same state as when it finished
- * percpu_ref_init().  @ref must have been initialized successfully, killed
- * and reached 0 but not exited.
- *
- * Note that percpu_ref_tryget[_live]() are safe to perform on @ref while
- * this function is in progress.
- */
-void percpu_ref_reinit(struct percpu_ref *ref)
+static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref)
 {
 	unsigned long __percpu *percpu_count = percpu_count_ptr(ref);
 	int cpu;
 
 	BUG_ON(!percpu_count);
-	WARN_ON_ONCE(!percpu_ref_is_zero(ref));
 
-	atomic_long_set(&ref->count, 1 + PERCPU_COUNT_BIAS);
+	if (!(ref->percpu_count_ptr & __PERCPU_REF_ATOMIC))
+		return;
+
+	wait_event(percpu_ref_switch_waitq, !ref->confirm_switch);
+
+	atomic_long_add(PERCPU_COUNT_BIAS, &ref->count);
 
 	/*
 	 * Restore per-cpu operation.  smp_store_release() is paired with
 	 * smp_read_barrier_depends() in __ref_is_percpu() and guarantees
 	 * that the zeroing is visible to all percpu accesses which can see
-	 * the following __PERCPU_REF_ATOMIC_DEAD clearing.
+	 * the following __PERCPU_REF_ATOMIC clearing.
 	 */
 	for_each_possible_cpu(cpu)
 		*per_cpu_ptr(percpu_count, cpu) = 0;
 
 	smp_store_release(&ref->percpu_count_ptr,
-			  ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC_DEAD);
+			  ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC);
+}
+
+/**
+ * percpu_ref_switch_to_percpu - switch a percpu_ref to percpu mode
+ * @ref: percpu_ref to switch to percpu mode
+ *
+ * There's no reason to use this function for the usual reference counting.
+ * To re-use an expired ref, use percpu_ref_reinit().
+ *
+ * Switch @ref to percpu mode.  This function may be invoked concurrently
+ * with all the get/put operations and can safely be mixed with kill and
+ * reinit operations.
+ *
+ * This function normally doesn't block and can be called from any context
+ * but it may block if @ref is in the process of switching to atomic mode
+ * by percpu_ref_switch_atomic().
+ */
+void percpu_ref_switch_to_percpu(struct percpu_ref *ref)
+{
+	/* a dying or dead ref can't be switched to percpu mode w/o reinit */
+	if (!(ref->percpu_count_ptr & __PERCPU_REF_DEAD))
+		__percpu_ref_switch_to_percpu(ref);
 }
-EXPORT_SYMBOL_GPL(percpu_ref_reinit);
 
 /**
  * percpu_ref_kill_and_confirm - drop the initial ref and schedule confirmation
@@ -253,8 +267,8 @@ EXPORT_SYMBOL_GPL(percpu_ref_reinit);
  * percpu_ref_tryget_live() for details.
  *
  * This function normally doesn't block and can be called from any context
- * but it may block if @confirm_kill is specified and @ref is already in
- * the process of switching to atomic mode by percpu_ref_switch_atomic().
+ * but it may block if @confirm_kill is specified and @ref is in the
+ * process of switching to atomic mode by percpu_ref_switch_atomic().
  *
  * Due to the way percpu_ref is implemented, @confirm_switch will be called
  * after at least one full sched RCU grace period has passed but this is an
@@ -271,3 +285,24 @@ void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
 	percpu_ref_put(ref);
 }
 EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm);
+
+/**
+ * percpu_ref_reinit - re-initialize a percpu refcount
+ * @ref: perpcu_ref to re-initialize
+ *
+ * Re-initialize @ref so that it's in the same state as when it finished
+ * percpu_ref_init().  @ref must have been initialized successfully and
+ * reached 0 but not exited.
+ *
+ * Note that percpu_ref_tryget[_live]() are safe to perform on @ref while
+ * this function is in progress.
+ */
+void percpu_ref_reinit(struct percpu_ref *ref)
+{
+	WARN_ON_ONCE(!percpu_ref_is_zero(ref));
+
+	ref->percpu_count_ptr &= ~__PERCPU_REF_DEAD;
+	percpu_ref_get(ref);
+	__percpu_ref_switch_to_percpu(ref);
+}
+EXPORT_SYMBOL_GPL(percpu_ref_reinit);
-- 
cgit v1.2.3


From 2aad2a86f6685c10360ec8a5a55eb9ab7059cb72 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 24 Sep 2014 13:31:50 -0400
Subject: percpu_ref: add PERCPU_REF_INIT_* flags

With the recent addition of percpu_ref_reinit(), percpu_ref now can be
used as a persistent switch which can be turned on and off repeatedly
where turning off maps to killing the ref and waiting for it to drain;
however, there currently isn't a way to initialize a percpu_ref in its
off (killed and drained) state, which can be inconvenient for certain
persistent switch use cases.

Similarly, percpu_ref_switch_to_atomic/percpu() allow dynamic
selection of operation mode; however, currently a newly initialized
percpu_ref is always in percpu mode making it impossible to avoid the
latency overhead of switching to atomic mode.

This patch adds @flags to percpu_ref_init() and implements the
following flags.

* PERCPU_REF_INIT_ATOMIC	: start ref in atomic mode
* PERCPU_REF_INIT_DEAD		: start ref killed and drained

These flags should be able to serve the above two use cases.

v2: target_core_tpg.c conversion was missing.  Fixed.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Kent Overstreet <kmo@daterainc.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
---
 block/blk-mq.c                   |  2 +-
 drivers/target/target_core_tpg.c |  2 +-
 fs/aio.c                         |  4 ++--
 include/linux/percpu-refcount.h  | 18 +++++++++++++++++-
 kernel/cgroup.c                  |  7 ++++---
 lib/percpu-refcount.c            | 23 ++++++++++++++++++-----
 6 files changed, 43 insertions(+), 13 deletions(-)

(limited to 'lib')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 44a78ae3f899..d85fe01c44ef 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1796,7 +1796,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
 		goto err_hctxs;
 
 	if (percpu_ref_init(&q->mq_usage_counter, blk_mq_usage_counter_release,
-			    GFP_KERNEL))
+			    0, GFP_KERNEL))
 		goto err_map;
 
 	setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q);
diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c
index 4ab6da338585..be783f717f19 100644
--- a/drivers/target/target_core_tpg.c
+++ b/drivers/target/target_core_tpg.c
@@ -819,7 +819,7 @@ int core_tpg_add_lun(
 {
 	int ret;
 
-	ret = percpu_ref_init(&lun->lun_ref, core_tpg_lun_ref_release,
+	ret = percpu_ref_init(&lun->lun_ref, core_tpg_lun_ref_release, 0,
 			      GFP_KERNEL);
 	if (ret < 0)
 		return ret;
diff --git a/fs/aio.c b/fs/aio.c
index 8d217ed04e6e..84a751005f5b 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -661,10 +661,10 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
 
 	INIT_LIST_HEAD(&ctx->active_reqs);
 
-	if (percpu_ref_init(&ctx->users, free_ioctx_users, GFP_KERNEL))
+	if (percpu_ref_init(&ctx->users, free_ioctx_users, 0, GFP_KERNEL))
 		goto err;
 
-	if (percpu_ref_init(&ctx->reqs, free_ioctx_reqs, GFP_KERNEL))
+	if (percpu_ref_init(&ctx->reqs, free_ioctx_reqs, 0, GFP_KERNEL))
 		goto err;
 
 	ctx->cpu = alloc_percpu(struct kioctx_cpu);
diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index cd7e20f0fe47..b0293f268cd2 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -63,6 +63,21 @@ enum {
 	__PERCPU_REF_FLAG_BITS	= 2,
 };
 
+/* @flags for percpu_ref_init() */
+enum {
+	/*
+	 * Start w/ ref == 1 in atomic mode.  Can be switched to percpu
+	 * operation using percpu_ref_switch_to_percpu().
+	 */
+	PERCPU_REF_INIT_ATOMIC	= 1 << 0,
+
+	/*
+	 * Start dead w/ ref == 0 in atomic mode.  Must be revived with
+	 * percpu_ref_reinit() before used.  Implies INIT_ATOMIC.
+	 */
+	PERCPU_REF_INIT_DEAD	= 1 << 1,
+};
+
 struct percpu_ref {
 	atomic_long_t		count;
 	/*
@@ -76,7 +91,8 @@ struct percpu_ref {
 };
 
 int __must_check percpu_ref_init(struct percpu_ref *ref,
-				 percpu_ref_func_t *release, gfp_t gfp);
+				 percpu_ref_func_t *release, unsigned int flags,
+				 gfp_t gfp);
 void percpu_ref_exit(struct percpu_ref *ref);
 void percpu_ref_switch_to_atomic(struct percpu_ref *ref,
 				 percpu_ref_func_t *confirm_switch);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index a99d504294de..753df01a9831 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1634,7 +1634,8 @@ static int cgroup_setup_root(struct cgroup_root *root, unsigned int ss_mask)
 		goto out;
 	root_cgrp->id = ret;
 
-	ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release, GFP_KERNEL);
+	ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release, 0,
+			      GFP_KERNEL);
 	if (ret)
 		goto out;
 
@@ -4510,7 +4511,7 @@ static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss,
 
 	init_and_link_css(css, ss, cgrp);
 
-	err = percpu_ref_init(&css->refcnt, css_release, GFP_KERNEL);
+	err = percpu_ref_init(&css->refcnt, css_release, 0, GFP_KERNEL);
 	if (err)
 		goto err_free_css;
 
@@ -4583,7 +4584,7 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
 		goto out_unlock;
 	}
 
-	ret = percpu_ref_init(&cgrp->self.refcnt, css_release, GFP_KERNEL);
+	ret = percpu_ref_init(&cgrp->self.refcnt, css_release, 0, GFP_KERNEL);
 	if (ret)
 		goto out_free_cgrp;
 
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index 5a6d43baccc5..ed280fb1e5b5 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -45,27 +45,40 @@ static unsigned long __percpu *percpu_count_ptr(struct percpu_ref *ref)
  * percpu_ref_init - initialize a percpu refcount
  * @ref: percpu_ref to initialize
  * @release: function which will be called when refcount hits 0
+ * @flags: PERCPU_REF_INIT_* flags
  * @gfp: allocation mask to use
  *
- * Initializes the refcount in single atomic counter mode with a refcount of 1;
- * analagous to atomic_long_set(ref, 1).
+ * Initializes @ref.  If @flags is zero, @ref starts in percpu mode with a
+ * refcount of 1; analagous to atomic_long_set(ref, 1).  See the
+ * definitions of PERCPU_REF_INIT_* flags for flag behaviors.
  *
  * Note that @release must not sleep - it may potentially be called from RCU
  * callback context by percpu_ref_kill().
  */
 int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release,
-		    gfp_t gfp)
+		    unsigned int flags, gfp_t gfp)
 {
 	size_t align = max_t(size_t, 1 << __PERCPU_REF_FLAG_BITS,
 			     __alignof__(unsigned long));
-
-	atomic_long_set(&ref->count, 1 + PERCPU_COUNT_BIAS);
+	unsigned long start_count = 0;
 
 	ref->percpu_count_ptr = (unsigned long)
 		__alloc_percpu_gfp(sizeof(unsigned long), align, gfp);
 	if (!ref->percpu_count_ptr)
 		return -ENOMEM;
 
+	if (flags & (PERCPU_REF_INIT_ATOMIC | PERCPU_REF_INIT_DEAD))
+		ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC;
+	else
+		start_count += PERCPU_COUNT_BIAS;
+
+	if (flags & PERCPU_REF_INIT_DEAD)
+		ref->percpu_count_ptr |= __PERCPU_REF_DEAD;
+	else
+		start_count++;
+
+	atomic_long_set(&ref->count, start_count);
+
 	ref->release = release;
 	return 0;
 }
-- 
cgit v1.2.3


From 1cae13e75b7a7848c03138636d4eb8d8a5054dd5 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 24 Sep 2014 13:31:50 -0400
Subject: percpu_ref: make INIT_ATOMIC and switch_to_atomic() sticky

Currently, a percpu_ref which is initialized with
PERPCU_REF_INIT_ATOMIC or switched to atomic mode via
switch_to_atomic() automatically reverts to percpu mode on the first
percpu_ref_reinit().  This makes the atomic mode difficult to use for
cases where a percpu_ref is used as a persistent on/off switch which
may be cycled multiple times.

This patch makes such atomic state sticky so that it survives through
kill/reinit cycles.  After this patch, atomic state is cleared only by
an explicit percpu_ref_switch_to_percpu() call.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Kent Overstreet <kmo@daterainc.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
---
 include/linux/percpu-refcount.h |  5 ++++-
 lib/percpu-refcount.c           | 20 +++++++++++++++-----
 2 files changed, 19 insertions(+), 6 deletions(-)

(limited to 'lib')

diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index b0293f268cd2..00c01fc6d88c 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -67,7 +67,9 @@ enum {
 enum {
 	/*
 	 * Start w/ ref == 1 in atomic mode.  Can be switched to percpu
-	 * operation using percpu_ref_switch_to_percpu().
+	 * operation using percpu_ref_switch_to_percpu().  If initialized
+	 * with this flag, the ref will stay in atomic mode until
+	 * percpu_ref_switch_to_percpu() is invoked on it.
 	 */
 	PERCPU_REF_INIT_ATOMIC	= 1 << 0,
 
@@ -87,6 +89,7 @@ struct percpu_ref {
 	unsigned long		percpu_count_ptr;
 	percpu_ref_func_t	*release;
 	percpu_ref_func_t	*confirm_switch;
+	bool			force_atomic:1;
 	struct rcu_head		rcu;
 };
 
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index ed280fb1e5b5..6111bcb28376 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -67,6 +67,8 @@ int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release,
 	if (!ref->percpu_count_ptr)
 		return -ENOMEM;
 
+	ref->force_atomic = flags & PERCPU_REF_INIT_ATOMIC;
+
 	if (flags & (PERCPU_REF_INIT_ATOMIC | PERCPU_REF_INIT_DEAD))
 		ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC;
 	else
@@ -202,7 +204,8 @@ static void __percpu_ref_switch_to_atomic(struct percpu_ref *ref,
  * are guaraneed to be in atomic mode, @confirm_switch, which may not
  * block, is invoked.  This function may be invoked concurrently with all
  * the get/put operations and can safely be mixed with kill and reinit
- * operations.
+ * operations.  Note that @ref will stay in atomic mode across kill/reinit
+ * cycles until percpu_ref_switch_to_percpu() is called.
  *
  * This function normally doesn't block and can be called from any context
  * but it may block if @confirm_kill is specified and @ref is already in
@@ -216,6 +219,7 @@ static void __percpu_ref_switch_to_atomic(struct percpu_ref *ref,
 void percpu_ref_switch_to_atomic(struct percpu_ref *ref,
 				 percpu_ref_func_t *confirm_switch)
 {
+	ref->force_atomic = true;
 	__percpu_ref_switch_to_atomic(ref, confirm_switch);
 }
 
@@ -255,7 +259,10 @@ static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref)
  *
  * Switch @ref to percpu mode.  This function may be invoked concurrently
  * with all the get/put operations and can safely be mixed with kill and
- * reinit operations.
+ * reinit operations.  This function reverses the sticky atomic state set
+ * by PERCPU_REF_INIT_ATOMIC or percpu_ref_switch_to_atomic().  If @ref is
+ * dying or dead, the actual switching takes place on the following
+ * percpu_ref_reinit().
  *
  * This function normally doesn't block and can be called from any context
  * but it may block if @ref is in the process of switching to atomic mode
@@ -263,6 +270,8 @@ static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref)
  */
 void percpu_ref_switch_to_percpu(struct percpu_ref *ref)
 {
+	ref->force_atomic = false;
+
 	/* a dying or dead ref can't be switched to percpu mode w/o reinit */
 	if (!(ref->percpu_count_ptr & __PERCPU_REF_DEAD))
 		__percpu_ref_switch_to_percpu(ref);
@@ -304,8 +313,8 @@ EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm);
  * @ref: perpcu_ref to re-initialize
  *
  * Re-initialize @ref so that it's in the same state as when it finished
- * percpu_ref_init().  @ref must have been initialized successfully and
- * reached 0 but not exited.
+ * percpu_ref_init() ignoring %PERCPU_REF_INIT_DEAD.  @ref must have been
+ * initialized successfully and reached 0 but not exited.
  *
  * Note that percpu_ref_tryget[_live]() are safe to perform on @ref while
  * this function is in progress.
@@ -316,6 +325,7 @@ void percpu_ref_reinit(struct percpu_ref *ref)
 
 	ref->percpu_count_ptr &= ~__PERCPU_REF_DEAD;
 	percpu_ref_get(ref);
-	__percpu_ref_switch_to_percpu(ref);
+	if (!ref->force_atomic)
+		__percpu_ref_switch_to_percpu(ref);
 }
 EXPORT_SYMBOL_GPL(percpu_ref_reinit);
-- 
cgit v1.2.3