From f2c3037811381f9149243828c7eb9a1631df9f9c Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Tue, 11 Jul 2023 12:52:52 -0500 Subject: RDMA/irdma: Fix data race on CQP completion stats CQP completion statistics is read lockesly in irdma_wait_event and irdma_check_cqp_progress while it can be updated in the completion thread irdma_sc_ccq_get_cqe_info on another CPU as KCSAN reports. Make completion statistics an atomic variable to reflect coherent updates to it. This will also avoid load/store tearing logic bug potentially possible by compiler optimizations. [77346.170861] BUG: KCSAN: data-race in irdma_handle_cqp_op [irdma] / irdma_sc_ccq_get_cqe_info [irdma] [77346.171383] write to 0xffff8a3250b108e0 of 8 bytes by task 9544 on cpu 4: [77346.171483] irdma_sc_ccq_get_cqe_info+0x27a/0x370 [irdma] [77346.171658] irdma_cqp_ce_handler+0x164/0x270 [irdma] [77346.171835] cqp_compl_worker+0x1b/0x20 [irdma] [77346.172009] process_one_work+0x4d1/0xa40 [77346.172024] worker_thread+0x319/0x700 [77346.172037] kthread+0x180/0x1b0 [77346.172054] ret_from_fork+0x22/0x30 [77346.172136] read to 0xffff8a3250b108e0 of 8 bytes by task 9838 on cpu 2: [77346.172234] irdma_handle_cqp_op+0xf4/0x4b0 [irdma] [77346.172413] irdma_cqp_aeq_cmd+0x75/0xa0 [irdma] [77346.172592] irdma_create_aeq+0x390/0x45a [irdma] [77346.172769] irdma_rt_init_hw.cold+0x212/0x85d [irdma] [77346.172944] irdma_probe+0x54f/0x620 [irdma] [77346.173122] auxiliary_bus_probe+0x66/0xa0 [77346.173137] really_probe+0x140/0x540 [77346.173154] __driver_probe_device+0xc7/0x220 [77346.173173] driver_probe_device+0x5f/0x140 [77346.173190] __driver_attach+0xf0/0x2c0 [77346.173208] bus_for_each_dev+0xa8/0xf0 [77346.173225] driver_attach+0x29/0x30 [77346.173240] bus_add_driver+0x29c/0x2f0 [77346.173255] driver_register+0x10f/0x1a0 [77346.173272] __auxiliary_driver_register+0xbc/0x140 [77346.173287] irdma_init_module+0x55/0x1000 [irdma] [77346.173460] do_one_initcall+0x7d/0x410 [77346.173475] do_init_module+0x81/0x2c0 [77346.173491] load_module+0x1232/0x12c0 [77346.173506] __do_sys_finit_module+0x101/0x180 [77346.173522] __x64_sys_finit_module+0x3c/0x50 [77346.173538] do_syscall_64+0x39/0x90 [77346.173553] entry_SYSCALL_64_after_hwframe+0x63/0xcd [77346.173634] value changed: 0x0000000000000094 -> 0x0000000000000095 Fixes: 915cc7ac0f8e ("RDMA/irdma: Add miscellaneous utility definitions") Signed-off-by: Shiraz Saleem Link: https://lore.kernel.org/r/20230711175253.1289-3-shiraz.saleem@intel.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/utils.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/hw/irdma/utils.c') diff --git a/drivers/infiniband/hw/irdma/utils.c b/drivers/infiniband/hw/irdma/utils.c index 71e1c5d34709..775a79946f7d 100644 --- a/drivers/infiniband/hw/irdma/utils.c +++ b/drivers/infiniband/hw/irdma/utils.c @@ -567,7 +567,7 @@ static int irdma_wait_event(struct irdma_pci_f *rf, bool cqp_error = false; int err_code = 0; - cqp_timeout.compl_cqp_cmds = rf->sc_dev.cqp_cmd_stats[IRDMA_OP_CMPL_CMDS]; + cqp_timeout.compl_cqp_cmds = atomic64_read(&rf->sc_dev.cqp->completed_ops); do { irdma_cqp_ce_handler(rf, &rf->ccq.sc_cq); if (wait_event_timeout(cqp_request->waitq, -- cgit v1.2.3 From f0842bb3d38863777e3454da5653d80b5fde6321 Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Tue, 11 Jul 2023 12:52:53 -0500 Subject: RDMA/irdma: Fix data race on CQP request done KCSAN detects a data race on cqp_request->request_done memory location which is accessed locklessly in irdma_handle_cqp_op while being updated in irdma_cqp_ce_handler. Annotate lockless intent with READ_ONCE/WRITE_ONCE to avoid any compiler optimizations like load fusing and/or KCSAN warning. [222808.417128] BUG: KCSAN: data-race in irdma_cqp_ce_handler [irdma] / irdma_wait_event [irdma] [222808.417532] write to 0xffff8e44107019dc of 1 bytes by task 29658 on cpu 5: [222808.417610] irdma_cqp_ce_handler+0x21e/0x270 [irdma] [222808.417725] cqp_compl_worker+0x1b/0x20 [irdma] [222808.417827] process_one_work+0x4d1/0xa40 [222808.417835] worker_thread+0x319/0x700 [222808.417842] kthread+0x180/0x1b0 [222808.417852] ret_from_fork+0x22/0x30 [222808.417918] read to 0xffff8e44107019dc of 1 bytes by task 29688 on cpu 1: [222808.417995] irdma_wait_event+0x1e2/0x2c0 [irdma] [222808.418099] irdma_handle_cqp_op+0xae/0x170 [irdma] [222808.418202] irdma_cqp_cq_destroy_cmd+0x70/0x90 [irdma] [222808.418308] irdma_puda_dele_rsrc+0x46d/0x4d0 [irdma] [222808.418411] irdma_rt_deinit_hw+0x179/0x1d0 [irdma] [222808.418514] irdma_ib_dealloc_device+0x11/0x40 [irdma] [222808.418618] ib_dealloc_device+0x2a/0x120 [ib_core] [222808.418823] __ib_unregister_device+0xde/0x100 [ib_core] [222808.418981] ib_unregister_device+0x22/0x40 [ib_core] [222808.419142] irdma_ib_unregister_device+0x70/0x90 [irdma] [222808.419248] i40iw_close+0x6f/0xc0 [irdma] [222808.419352] i40e_client_device_unregister+0x14a/0x180 [i40e] [222808.419450] i40iw_remove+0x21/0x30 [irdma] [222808.419554] auxiliary_bus_remove+0x31/0x50 [222808.419563] device_remove+0x69/0xb0 [222808.419572] device_release_driver_internal+0x293/0x360 [222808.419582] driver_detach+0x7c/0xf0 [222808.419592] bus_remove_driver+0x8c/0x150 [222808.419600] driver_unregister+0x45/0x70 [222808.419610] auxiliary_driver_unregister+0x16/0x30 [222808.419618] irdma_exit_module+0x18/0x1e [irdma] [222808.419733] __do_sys_delete_module.constprop.0+0x1e2/0x310 [222808.419745] __x64_sys_delete_module+0x1b/0x30 [222808.419755] do_syscall_64+0x39/0x90 [222808.419763] entry_SYSCALL_64_after_hwframe+0x63/0xcd [222808.419829] value changed: 0x01 -> 0x03 Fixes: 915cc7ac0f8e ("RDMA/irdma: Add miscellaneous utility definitions") Signed-off-by: Shiraz Saleem Link: https://lore.kernel.org/r/20230711175253.1289-4-shiraz.saleem@intel.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/hw.c | 2 +- drivers/infiniband/hw/irdma/main.h | 2 +- drivers/infiniband/hw/irdma/utils.c | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband/hw/irdma/utils.c') diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c index 795f7fd4f257..1cfc03da89e7 100644 --- a/drivers/infiniband/hw/irdma/hw.c +++ b/drivers/infiniband/hw/irdma/hw.c @@ -2075,7 +2075,7 @@ void irdma_cqp_ce_handler(struct irdma_pci_f *rf, struct irdma_sc_cq *cq) cqp_request->compl_info.error = info.error; if (cqp_request->waiting) { - cqp_request->request_done = true; + WRITE_ONCE(cqp_request->request_done, true); wake_up(&cqp_request->waitq); irdma_put_cqp_request(&rf->cqp, cqp_request); } else { diff --git a/drivers/infiniband/hw/irdma/main.h b/drivers/infiniband/hw/irdma/main.h index def6dd58dcd4..2323962cdeac 100644 --- a/drivers/infiniband/hw/irdma/main.h +++ b/drivers/infiniband/hw/irdma/main.h @@ -161,8 +161,8 @@ struct irdma_cqp_request { void (*callback_fcn)(struct irdma_cqp_request *cqp_request); void *param; struct irdma_cqp_compl_info compl_info; + bool request_done; /* READ/WRITE_ONCE macros operate on it */ bool waiting:1; - bool request_done:1; bool dynamic:1; }; diff --git a/drivers/infiniband/hw/irdma/utils.c b/drivers/infiniband/hw/irdma/utils.c index 775a79946f7d..eb083f70b09f 100644 --- a/drivers/infiniband/hw/irdma/utils.c +++ b/drivers/infiniband/hw/irdma/utils.c @@ -481,7 +481,7 @@ void irdma_free_cqp_request(struct irdma_cqp *cqp, if (cqp_request->dynamic) { kfree(cqp_request); } else { - cqp_request->request_done = false; + WRITE_ONCE(cqp_request->request_done, false); cqp_request->callback_fcn = NULL; cqp_request->waiting = false; @@ -515,7 +515,7 @@ irdma_free_pending_cqp_request(struct irdma_cqp *cqp, { if (cqp_request->waiting) { cqp_request->compl_info.error = true; - cqp_request->request_done = true; + WRITE_ONCE(cqp_request->request_done, true); wake_up(&cqp_request->waitq); } wait_event_timeout(cqp->remove_wq, @@ -571,7 +571,7 @@ static int irdma_wait_event(struct irdma_pci_f *rf, do { irdma_cqp_ce_handler(rf, &rf->ccq.sc_cq); if (wait_event_timeout(cqp_request->waitq, - cqp_request->request_done, + READ_ONCE(cqp_request->request_done), msecs_to_jiffies(CQP_COMPL_WAIT_TIME_MS))) break; -- cgit v1.2.3