diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-07-19 20:06:06 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-07-19 20:06:06 +0300 |
commit | 5f4fc6d440d77a2cf74fe4ea56955674ac7e35e7 (patch) | |
tree | 87dedefb73acda80d945a84603c687002c0ce1d5 /net/rds/ib_rdma.c | |
parent | 249be8511b269495bc95cb8bdfdd5840b2ba73c0 (diff) | |
parent | 8d650cdedaabb33e85e9b7c517c0c71fcecc1de9 (diff) | |
download | linux-5f4fc6d440d77a2cf74fe4ea56955674ac7e35e7.tar.xz |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Pull networking fixes from David Miller:
1) Fix AF_XDP cq entry leak, from Ilya Maximets.
2) Fix handling of PHY power-down on RTL8411B, from Heiner Kallweit.
3) Add some new PCI IDs to iwlwifi, from Ihab Zhaika.
4) Fix handling of neigh timers wrt. entries added by userspace, from
Lorenzo Bianconi.
5) Various cases of missing of_node_put(), from Nishka Dasgupta.
6) The new NET_ACT_CT needs to depend upon NF_NAT, from Yue Haibing.
7) Various RDS layer fixes, from Gerd Rausch.
8) Fix some more fallout from TCQ_F_CAN_BYPASS generalization, from
Cong Wang.
9) Fix FIB source validation checks over loopback, also from Cong Wang.
10) Use promisc for unsupported number of filters, from Justin Chen.
11) Missing sibling route unlink on failure in ipv6, from Ido Schimmel.
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (90 commits)
tcp: fix tcp_set_congestion_control() use from bpf hook
ag71xx: fix return value check in ag71xx_probe()
ag71xx: fix error return code in ag71xx_probe()
usb: qmi_wwan: add D-Link DWM-222 A2 device ID
bnxt_en: Fix VNIC accounting when enabling aRFS on 57500 chips.
net: dsa: sja1105: Fix missing unlock on error in sk_buff()
gve: replace kfree with kvfree
selftests/bpf: fix test_xdp_noinline on s390
selftests/bpf: fix "valid read map access into a read-only array 1" on s390
net/mlx5: Replace kfree with kvfree
MAINTAINERS: update netsec driver
ipv6: Unlink sibling route in case of failure
liquidio: Replace vmalloc + memset with vzalloc
udp: Fix typo in net/ipv4/udp.c
net: bcmgenet: use promisc for unsupported filters
ipv6: rt6_check should return NULL if 'from' is NULL
tipc: initialize 'validated' field of received packets
selftests: add a test case for rp_filter
fib: relax source validation check for loopback packets
mlxsw: spectrum: Do not process learned records with a dummy FID
...
Diffstat (limited to 'net/rds/ib_rdma.c')
-rw-r--r-- | net/rds/ib_rdma.c | 60 |
1 files changed, 20 insertions, 40 deletions
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c index 0b347f46b2f4..c8c1e3ae8d84 100644 --- a/net/rds/ib_rdma.c +++ b/net/rds/ib_rdma.c @@ -40,9 +40,6 @@ struct workqueue_struct *rds_ib_mr_wq; -static DEFINE_PER_CPU(unsigned long, clean_list_grace); -#define CLEAN_LIST_BUSY_BIT 0 - static struct rds_ib_device *rds_ib_get_device(__be32 ipaddr) { struct rds_ib_device *rds_ibdev; @@ -195,12 +192,11 @@ struct rds_ib_mr *rds_ib_reuse_mr(struct rds_ib_mr_pool *pool) { struct rds_ib_mr *ibmr = NULL; struct llist_node *ret; - unsigned long *flag; + unsigned long flags; - preempt_disable(); - flag = this_cpu_ptr(&clean_list_grace); - set_bit(CLEAN_LIST_BUSY_BIT, flag); + spin_lock_irqsave(&pool->clean_lock, flags); ret = llist_del_first(&pool->clean_list); + spin_unlock_irqrestore(&pool->clean_lock, flags); if (ret) { ibmr = llist_entry(ret, struct rds_ib_mr, llnode); if (pool->pool_type == RDS_IB_MR_8K_POOL) @@ -209,23 +205,9 @@ struct rds_ib_mr *rds_ib_reuse_mr(struct rds_ib_mr_pool *pool) rds_ib_stats_inc(s_ib_rdma_mr_1m_reused); } - clear_bit(CLEAN_LIST_BUSY_BIT, flag); - preempt_enable(); return ibmr; } -static inline void wait_clean_list_grace(void) -{ - int cpu; - unsigned long *flag; - - for_each_online_cpu(cpu) { - flag = &per_cpu(clean_list_grace, cpu); - while (test_bit(CLEAN_LIST_BUSY_BIT, flag)) - cpu_relax(); - } -} - void rds_ib_sync_mr(void *trans_private, int direction) { struct rds_ib_mr *ibmr = trans_private; @@ -324,8 +306,7 @@ static unsigned int llist_append_to_list(struct llist_head *llist, * of clusters. Each cluster has linked llist nodes of * MR_CLUSTER_SIZE mrs that are ready for reuse. */ -static void list_to_llist_nodes(struct rds_ib_mr_pool *pool, - struct list_head *list, +static void list_to_llist_nodes(struct list_head *list, struct llist_node **nodes_head, struct llist_node **nodes_tail) { @@ -402,8 +383,13 @@ int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, */ dirty_to_clean = llist_append_to_list(&pool->drop_list, &unmap_list); dirty_to_clean += llist_append_to_list(&pool->free_list, &unmap_list); - if (free_all) + if (free_all) { + unsigned long flags; + + spin_lock_irqsave(&pool->clean_lock, flags); llist_append_to_list(&pool->clean_list, &unmap_list); + spin_unlock_irqrestore(&pool->clean_lock, flags); + } free_goal = rds_ib_flush_goal(pool, free_all); @@ -416,27 +402,20 @@ int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, rds_ib_unreg_fmr(&unmap_list, &nfreed, &unpinned, free_goal); if (!list_empty(&unmap_list)) { - /* we have to make sure that none of the things we're about - * to put on the clean list would race with other cpus trying - * to pull items off. The llist would explode if we managed to - * remove something from the clean list and then add it back again - * while another CPU was spinning on that same item in llist_del_first. - * - * This is pretty unlikely, but just in case wait for an llist grace period - * here before adding anything back into the clean list. - */ - wait_clean_list_grace(); - - list_to_llist_nodes(pool, &unmap_list, &clean_nodes, &clean_tail); + unsigned long flags; + + list_to_llist_nodes(&unmap_list, &clean_nodes, &clean_tail); if (ibmr_ret) { *ibmr_ret = llist_entry(clean_nodes, struct rds_ib_mr, llnode); clean_nodes = clean_nodes->next; } /* more than one entry in llist nodes */ - if (clean_nodes) + if (clean_nodes) { + spin_lock_irqsave(&pool->clean_lock, flags); llist_add_batch(clean_nodes, clean_tail, &pool->clean_list); - + spin_unlock_irqrestore(&pool->clean_lock, flags); + } } atomic_sub(unpinned, &pool->free_pinned); @@ -471,7 +450,7 @@ struct rds_ib_mr *rds_ib_try_reuse_ibmr(struct rds_ib_mr_pool *pool) rds_ib_stats_inc(s_ib_rdma_mr_8k_pool_depleted); else rds_ib_stats_inc(s_ib_rdma_mr_1m_pool_depleted); - return ERR_PTR(-EAGAIN); + break; } /* We do have some empty MRs. Flush them out. */ @@ -485,7 +464,7 @@ struct rds_ib_mr *rds_ib_try_reuse_ibmr(struct rds_ib_mr_pool *pool) return ibmr; } - return ibmr; + return NULL; } static void rds_ib_mr_pool_flush_worker(struct work_struct *work) @@ -610,6 +589,7 @@ struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev, init_llist_head(&pool->free_list); init_llist_head(&pool->drop_list); init_llist_head(&pool->clean_list); + spin_lock_init(&pool->clean_lock); mutex_init(&pool->flush_lock); init_waitqueue_head(&pool->flush_wait); INIT_DELAYED_WORK(&pool->flush_worker, rds_ib_mr_pool_flush_worker); |