diff options
Diffstat (limited to 'drivers/infiniband')
82 files changed, 1803 insertions, 974 deletions
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 8cd113b0ddfb..e15546ae4d0f 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -494,7 +494,10 @@ static void _cma_attach_to_dev(struct rdma_id_private *id_priv, id_priv->id.route.addr.dev_addr.transport = rdma_node_get_transport(cma_dev->device->node_type); list_add_tail(&id_priv->list, &cma_dev->id_list); - rdma_restrack_kadd(&id_priv->res); + if (id_priv->res.kern_name) + rdma_restrack_kadd(&id_priv->res); + else + rdma_restrack_uadd(&id_priv->res); } static void cma_attach_to_dev(struct rdma_id_private *id_priv, @@ -4613,85 +4616,6 @@ static void cma_remove_one(struct ib_device *device, void *client_data) kfree(cma_dev); } -static int cma_get_id_stats(struct sk_buff *skb, struct netlink_callback *cb) -{ - struct nlmsghdr *nlh; - struct rdma_cm_id_stats *id_stats; - struct rdma_id_private *id_priv; - struct rdma_cm_id *id = NULL; - struct cma_device *cma_dev; - int i_dev = 0, i_id = 0; - - /* - * We export all of the IDs as a sequence of messages. Each - * ID gets its own netlink message. - */ - mutex_lock(&lock); - - list_for_each_entry(cma_dev, &dev_list, list) { - if (i_dev < cb->args[0]) { - i_dev++; - continue; - } - - i_id = 0; - list_for_each_entry(id_priv, &cma_dev->id_list, list) { - if (i_id < cb->args[1]) { - i_id++; - continue; - } - - id_stats = ibnl_put_msg(skb, &nlh, cb->nlh->nlmsg_seq, - sizeof *id_stats, RDMA_NL_RDMA_CM, - RDMA_NL_RDMA_CM_ID_STATS, - NLM_F_MULTI); - if (!id_stats) - goto out; - - memset(id_stats, 0, sizeof *id_stats); - id = &id_priv->id; - id_stats->node_type = id->route.addr.dev_addr.dev_type; - id_stats->port_num = id->port_num; - id_stats->bound_dev_if = - id->route.addr.dev_addr.bound_dev_if; - - if (ibnl_put_attr(skb, nlh, - rdma_addr_size(cma_src_addr(id_priv)), - cma_src_addr(id_priv), - RDMA_NL_RDMA_CM_ATTR_SRC_ADDR)) - goto out; - if (ibnl_put_attr(skb, nlh, - rdma_addr_size(cma_dst_addr(id_priv)), - cma_dst_addr(id_priv), - RDMA_NL_RDMA_CM_ATTR_DST_ADDR)) - goto out; - - id_stats->pid = task_pid_vnr(id_priv->res.task); - id_stats->port_space = id->ps; - id_stats->cm_state = id_priv->state; - id_stats->qp_num = id_priv->qp_num; - id_stats->qp_type = id->qp_type; - - i_id++; - nlmsg_end(skb, nlh); - } - - cb->args[1] = 0; - i_dev++; - } - -out: - mutex_unlock(&lock); - cb->args[0] = i_dev; - cb->args[1] = i_id; - - return skb->len; -} - -static const struct rdma_nl_cbs cma_cb_table[RDMA_NL_RDMA_CM_NUM_OPS] = { - [RDMA_NL_RDMA_CM_ID_STATS] = { .dump = cma_get_id_stats}, -}; - static int cma_init_net(struct net *net) { struct cma_pernet *pernet = cma_pernet(net); @@ -4740,7 +4664,6 @@ static int __init cma_init(void) if (ret) goto err; - rdma_nl_register(RDMA_NL_RDMA_CM, cma_cb_table); cma_configfs_init(); return 0; @@ -4756,7 +4679,6 @@ err_wq: static void __exit cma_cleanup(void) { cma_configfs_exit(); - rdma_nl_unregister(RDMA_NL_RDMA_CM); ib_unregister_client(&cma_client); unregister_netdevice_notifier(&cma_nb); ib_sa_unregister_client(&sa_client); @@ -4764,7 +4686,5 @@ static void __exit cma_cleanup(void) destroy_workqueue(cma_wq); } -MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_RDMA_CM, 1); - module_init(cma_init); module_exit(cma_cleanup); diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index 42a49982f66e..bcb3e3029a9b 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -270,7 +270,6 @@ static inline int ib_mad_enforce_security(struct ib_mad_agent_private *map, #endif struct ib_device *ib_device_get_by_index(u32 ifindex); -void ib_device_put(struct ib_device *device); /* RDMA device netlink */ void nldev_init(void); void nldev_exit(void); diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 200431c540f2..55221990d946 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -121,13 +121,12 @@ static int ib_device_check_mandatory(struct ib_device *device) }; int i; + device->kverbs_provider = true; for (i = 0; i < ARRAY_SIZE(mandatory_table); ++i) { if (!*(void **) ((void *) &device->ops + mandatory_table[i].offset)) { - dev_warn(&device->dev, - "Device is missing mandatory function %s\n", - mandatory_table[i].name); - return -EINVAL; + device->kverbs_provider = false; + break; } } @@ -156,19 +155,26 @@ struct ib_device *ib_device_get_by_index(u32 index) down_read(&lists_rwsem); device = __ib_device_get_by_index(index); if (device) { - /* Do not return a device if unregistration has started. */ - if (!refcount_inc_not_zero(&device->refcount)) + if (!ib_device_try_get(device)) device = NULL; } up_read(&lists_rwsem); return device; } +/** + * ib_device_put - Release IB device reference + * @device: device whose reference to be released + * + * ib_device_put() releases reference to the IB device to allow it to be + * unregistered and eventually free. + */ void ib_device_put(struct ib_device *device) { if (refcount_dec_and_test(&device->refcount)) complete(&device->unreg_completion); } +EXPORT_SYMBOL(ib_device_put); static struct ib_device *__ib_device_get_by_name(const char *name) { @@ -268,7 +274,7 @@ static struct class ib_class = { }; /** - * ib_alloc_device - allocate an IB device struct + * _ib_alloc_device - allocate an IB device struct * @size:size of structure to allocate * * Low-level drivers should use ib_alloc_device() to allocate &struct @@ -277,7 +283,7 @@ static struct class ib_class = { * ib_dealloc_device() must be used to free structures allocated with * ib_alloc_device(). */ -struct ib_device *ib_alloc_device(size_t size) +struct ib_device *_ib_alloc_device(size_t size) { struct ib_device *device; @@ -288,7 +294,7 @@ struct ib_device *ib_alloc_device(size_t size) if (!device) return NULL; - rdma_restrack_init(&device->res); + rdma_restrack_init(device); device->dev.class = &ib_class; device_initialize(&device->dev); @@ -298,12 +304,11 @@ struct ib_device *ib_alloc_device(size_t size) rwlock_init(&device->client_data_lock); INIT_LIST_HEAD(&device->client_data_list); INIT_LIST_HEAD(&device->port_list); - refcount_set(&device->refcount, 1); init_completion(&device->unreg_completion); return device; } -EXPORT_SYMBOL(ib_alloc_device); +EXPORT_SYMBOL(_ib_alloc_device); /** * ib_dealloc_device - free an IB device struct @@ -316,7 +321,7 @@ void ib_dealloc_device(struct ib_device *device) WARN_ON(!list_empty(&device->client_data_list)); WARN_ON(device->reg_state != IB_DEV_UNREGISTERED && device->reg_state != IB_DEV_UNINITIALIZED); - rdma_restrack_clean(&device->res); + rdma_restrack_clean(device); put_device(&device->dev); } EXPORT_SYMBOL(ib_dealloc_device); @@ -325,6 +330,9 @@ static int add_client_context(struct ib_device *device, struct ib_client *client { struct ib_client_data *context; + if (!device->kverbs_provider && !client->no_kverbs_req) + return -EOPNOTSUPP; + context = kmalloc(sizeof(*context), GFP_KERNEL); if (!context) return -ENOMEM; @@ -608,6 +616,7 @@ int ib_register_device(struct ib_device *device, const char *name) goto cg_cleanup; } + refcount_set(&device->refcount, 1); device->reg_state = IB_DEV_REGISTERED; list_for_each_entry(client, &client_list, list) @@ -1261,6 +1270,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, disassociate_ucontext); SET_DEVICE_OP(dev_ops, drain_rq); SET_DEVICE_OP(dev_ops, drain_sq); + SET_DEVICE_OP(dev_ops, fill_res_entry); SET_DEVICE_OP(dev_ops, get_dev_fw_str); SET_DEVICE_OP(dev_ops, get_dma_mr); SET_DEVICE_OP(dev_ops, get_hw_stats); diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index 476abc74178e..350ea2bab78a 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -87,7 +87,8 @@ static struct rdma_nl_cbs iwcm_nl_cb_table[RDMA_NL_IWPM_NUM_OPS] = { [RDMA_NL_IWPM_REMOTE_INFO] = {.dump = iwpm_remote_info_cb}, [RDMA_NL_IWPM_HANDLE_ERR] = {.dump = iwpm_mapping_error_cb}, [RDMA_NL_IWPM_MAPINFO] = {.dump = iwpm_mapping_info_cb}, - [RDMA_NL_IWPM_MAPINFO_NUM] = {.dump = iwpm_ack_mapping_info_cb} + [RDMA_NL_IWPM_MAPINFO_NUM] = {.dump = iwpm_ack_mapping_info_cb}, + [RDMA_NL_IWPM_HELLO] = {.dump = iwpm_hello_cb} }; static struct workqueue_struct *iwcm_wq; @@ -525,6 +526,8 @@ static int iw_cm_map(struct iw_cm_id *cm_id, bool active) cm_id->mapped = true; pm_msg.loc_addr = cm_id->local_addr; pm_msg.rem_addr = cm_id->remote_addr; + pm_msg.flags = (cm_id->device->iwcm->driver_flags & IW_F_NO_PORT_MAP) ? + IWPM_FLAGS_NO_PORT_MAP : 0; if (active) status = iwpm_add_and_query_mapping(&pm_msg, RDMA_NL_IWCM); @@ -543,7 +546,7 @@ static int iw_cm_map(struct iw_cm_id *cm_id, bool active) return iwpm_create_mapinfo(&cm_id->local_addr, &cm_id->m_local_addr, - RDMA_NL_IWCM); + RDMA_NL_IWCM, pm_msg.flags); } /* diff --git a/drivers/infiniband/core/iwpm_msg.c b/drivers/infiniband/core/iwpm_msg.c index 8861c052155a..2452b0ddcf0d 100644 --- a/drivers/infiniband/core/iwpm_msg.c +++ b/drivers/infiniband/core/iwpm_msg.c @@ -34,18 +34,25 @@ #include "iwpm_util.h" static const char iwpm_ulib_name[IWPM_ULIBNAME_SIZE] = "iWarpPortMapperUser"; -static int iwpm_ulib_version = 3; +u16 iwpm_ulib_version = IWPM_UABI_VERSION_MIN; static int iwpm_user_pid = IWPM_PID_UNDEFINED; static atomic_t echo_nlmsg_seq; +/** + * iwpm_valid_pid - Check if the userspace iwarp port mapper pid is valid + * + * Returns true if the pid is greater than zero, otherwise returns false + */ int iwpm_valid_pid(void) { return iwpm_user_pid > 0; } -/* - * iwpm_register_pid - Send a netlink query to user space - * for the iwarp port mapper pid +/** + * iwpm_register_pid - Send a netlink query to userspace + * to get the iwarp port mapper pid + * @pm_msg: Contains driver info to send to the userspace port mapper + * @nl_client: The index of the netlink client * * nlmsg attributes: * [IWPM_NLA_REG_PID_SEQ] @@ -124,12 +131,19 @@ pid_query_error: return ret; } -/* - * iwpm_add_mapping - Send a netlink add mapping message - * to the port mapper +/** + * iwpm_add_mapping - Send a netlink add mapping request to + * the userspace port mapper + * @pm_msg: Contains the local ip/tcp address info to send + * @nl_client: The index of the netlink client + * * nlmsg attributes: * [IWPM_NLA_MANAGE_MAPPING_SEQ] * [IWPM_NLA_MANAGE_ADDR] + * [IWPM_NLA_MANAGE_FLAGS] + * + * If the request is successful, the pm_msg stores + * the port mapper response (mapped address info) */ int iwpm_add_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client) { @@ -173,6 +187,18 @@ int iwpm_add_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client) if (ret) goto add_mapping_error; + /* If flags are required and we're not V4, then return a quiet error */ + if (pm_msg->flags && iwpm_ulib_version == IWPM_UABI_VERSION_MIN) { + ret = -EINVAL; + goto add_mapping_error_nowarn; + } + if (iwpm_ulib_version > IWPM_UABI_VERSION_MIN) { + ret = ibnl_put_attr(skb, nlh, sizeof(u32), &pm_msg->flags, + IWPM_NLA_MANAGE_FLAGS); + if (ret) + goto add_mapping_error; + } + nlmsg_end(skb, nlh); nlmsg_request->req_buffer = pm_msg; @@ -187,6 +213,7 @@ int iwpm_add_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client) return ret; add_mapping_error: pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client); +add_mapping_error_nowarn: if (skb) dev_kfree_skb(skb); if (nlmsg_request) @@ -194,13 +221,17 @@ add_mapping_error: return ret; } -/* - * iwpm_add_and_query_mapping - Send a netlink add and query - * mapping message to the port mapper +/** + * iwpm_add_and_query_mapping - Process the port mapper response to + * iwpm_add_and_query_mapping request + * @pm_msg: Contains the local ip/tcp address info to send + * @nl_client: The index of the netlink client + * * nlmsg attributes: * [IWPM_NLA_QUERY_MAPPING_SEQ] * [IWPM_NLA_QUERY_LOCAL_ADDR] * [IWPM_NLA_QUERY_REMOTE_ADDR] + * [IWPM_NLA_QUERY_FLAGS] */ int iwpm_add_and_query_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client) { @@ -251,6 +282,18 @@ int iwpm_add_and_query_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client) if (ret) goto query_mapping_error; + /* If flags are required and we're not V4, then return a quite error */ + if (pm_msg->flags && iwpm_ulib_version == IWPM_UABI_VERSION_MIN) { + ret = -EINVAL; + goto query_mapping_error_nowarn; + } + if (iwpm_ulib_version > IWPM_UABI_VERSION_MIN) { + ret = ibnl_put_attr(skb, nlh, sizeof(u32), &pm_msg->flags, + IWPM_NLA_QUERY_FLAGS); + if (ret) + goto query_mapping_error; + } + nlmsg_end(skb, nlh); nlmsg_request->req_buffer = pm_msg; @@ -264,6 +307,7 @@ int iwpm_add_and_query_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client) return ret; query_mapping_error: pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client); +query_mapping_error_nowarn: if (skb) dev_kfree_skb(skb); if (nlmsg_request) @@ -271,9 +315,13 @@ query_mapping_error: return ret; } -/* - * iwpm_remove_mapping - Send a netlink remove mapping message - * to the port mapper +/** + * iwpm_remove_mapping - Send a netlink remove mapping request + * to the userspace port mapper + * + * @local_addr: Local ip/tcp address to remove + * @nl_client: The index of the netlink client + * * nlmsg attributes: * [IWPM_NLA_MANAGE_MAPPING_SEQ] * [IWPM_NLA_MANAGE_ADDR] @@ -344,9 +392,14 @@ static const struct nla_policy resp_reg_policy[IWPM_NLA_RREG_PID_MAX] = { [IWPM_NLA_RREG_PID_ERR] = { .type = NLA_U16 } }; -/* - * iwpm_register_pid_cb - Process a port mapper response to - * iwpm_register_pid() +/** + * iwpm_register_pid_cb - Process the port mapper response to + * iwpm_register_pid query + * @skb: + * @cb: Contains the received message (payload and netlink header) + * + * If successful, the function receives the userspace port mapper pid + * which is used in future communication with the port mapper */ int iwpm_register_pid_cb(struct sk_buff *skb, struct netlink_callback *cb) { @@ -379,7 +432,7 @@ int iwpm_register_pid_cb(struct sk_buff *skb, struct netlink_callback *cb) /* check device name, ulib name and version */ if (strcmp(pm_msg->dev_name, dev_name) || strcmp(iwpm_ulib_name, iwpm_name) || - iwpm_version != iwpm_ulib_version) { + iwpm_version < IWPM_UABI_VERSION_MIN) { pr_info("%s: Incorrect info (dev = %s name = %s version = %d)\n", __func__, dev_name, iwpm_name, iwpm_version); @@ -387,6 +440,10 @@ int iwpm_register_pid_cb(struct sk_buff *skb, struct netlink_callback *cb) goto register_pid_response_exit; } iwpm_user_pid = cb->nlh->nlmsg_pid; + iwpm_ulib_version = iwpm_version; + if (iwpm_ulib_version < IWPM_UABI_VERSION) + pr_warn_once("%s: Down level iwpmd/pid %u. Continuing...", + __func__, iwpm_user_pid); atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq); pr_debug("%s: iWarp Port Mapper (pid = %d) is available!\n", __func__, iwpm_user_pid); @@ -403,15 +460,19 @@ register_pid_response_exit: /* netlink attribute policy for the received response to add mapping request */ static const struct nla_policy resp_add_policy[IWPM_NLA_RMANAGE_MAPPING_MAX] = { - [IWPM_NLA_MANAGE_MAPPING_SEQ] = { .type = NLA_U32 }, - [IWPM_NLA_MANAGE_ADDR] = { .len = sizeof(struct sockaddr_storage) }, - [IWPM_NLA_MANAGE_MAPPED_LOC_ADDR] = { .len = sizeof(struct sockaddr_storage) }, - [IWPM_NLA_RMANAGE_MAPPING_ERR] = { .type = NLA_U16 } + [IWPM_NLA_RMANAGE_MAPPING_SEQ] = { .type = NLA_U32 }, + [IWPM_NLA_RMANAGE_ADDR] = { + .len = sizeof(struct sockaddr_storage) }, + [IWPM_NLA_RMANAGE_MAPPED_LOC_ADDR] = { + .len = sizeof(struct sockaddr_storage) }, + [IWPM_NLA_RMANAGE_MAPPING_ERR] = { .type = NLA_U16 } }; -/* - * iwpm_add_mapping_cb - Process a port mapper response to - * iwpm_add_mapping() +/** + * iwpm_add_mapping_cb - Process the port mapper response to + * iwpm_add_mapping request + * @skb: + * @cb: Contains the received message (payload and netlink header) */ int iwpm_add_mapping_cb(struct sk_buff *skb, struct netlink_callback *cb) { @@ -430,7 +491,7 @@ int iwpm_add_mapping_cb(struct sk_buff *skb, struct netlink_callback *cb) atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq); - msg_seq = nla_get_u32(nltb[IWPM_NLA_MANAGE_MAPPING_SEQ]); + msg_seq = nla_get_u32(nltb[IWPM_NLA_RMANAGE_MAPPING_SEQ]); nlmsg_request = iwpm_find_nlmsg_request(msg_seq); if (!nlmsg_request) { pr_info("%s: Could not find a matching request (seq = %u)\n", @@ -439,9 +500,9 @@ int iwpm_add_mapping_cb(struct sk_buff *skb, struct netlink_callback *cb) } pm_msg = nlmsg_request->req_buffer; local_sockaddr = (struct sockaddr_storage *) - nla_data(nltb[IWPM_NLA_MANAGE_ADDR]); + nla_data(nltb[IWPM_NLA_RMANAGE_ADDR]); mapped_sockaddr = (struct sockaddr_storage *) - nla_data(nltb[IWPM_NLA_MANAGE_MAPPED_LOC_ADDR]); + nla_data(nltb[IWPM_NLA_RMANAGE_MAPPED_LOC_ADDR]); if (iwpm_compare_sockaddr(local_sockaddr, &pm_msg->loc_addr)) { nlmsg_request->err_code = IWPM_USER_LIB_INFO_ERR; @@ -472,17 +533,23 @@ add_mapping_response_exit: /* netlink attribute policy for the response to add and query mapping request * and response with remote address info */ static const struct nla_policy resp_query_policy[IWPM_NLA_RQUERY_MAPPING_MAX] = { - [IWPM_NLA_QUERY_MAPPING_SEQ] = { .type = NLA_U32 }, - [IWPM_NLA_QUERY_LOCAL_ADDR] = { .len = sizeof(struct sockaddr_storage) }, - [IWPM_NLA_QUERY_REMOTE_ADDR] = { .len = sizeof(struct sockaddr_storage) }, - [IWPM_NLA_RQUERY_MAPPED_LOC_ADDR] = { .len = sizeof(struct sockaddr_storage) }, - [IWPM_NLA_RQUERY_MAPPED_REM_ADDR] = { .len = sizeof(struct sockaddr_storage) }, + [IWPM_NLA_RQUERY_MAPPING_SEQ] = { .type = NLA_U32 }, + [IWPM_NLA_RQUERY_LOCAL_ADDR] = { + .len = sizeof(struct sockaddr_storage) }, + [IWPM_NLA_RQUERY_REMOTE_ADDR] = { + .len = sizeof(struct sockaddr_storage) }, + [IWPM_NLA_RQUERY_MAPPED_LOC_ADDR] = { + .len = sizeof(struct sockaddr_storage) }, + [IWPM_NLA_RQUERY_MAPPED_REM_ADDR] = { + .len = sizeof(struct sockaddr_storage) }, [IWPM_NLA_RQUERY_MAPPING_ERR] = { .type = NLA_U16 } }; -/* - * iwpm_add_and_query_mapping_cb - Process a port mapper response to - * iwpm_add_and_query_mapping() +/** + * iwpm_add_and_query_mapping_cb - Process the port mapper response to + * iwpm_add_and_query_mapping request + * @skb: + * @cb: Contains the received message (payload and netlink header) */ int iwpm_add_and_query_mapping_cb(struct sk_buff *skb, struct netlink_callback *cb) @@ -502,7 +569,7 @@ int iwpm_add_and_query_mapping_cb(struct sk_buff *skb, return -EINVAL; atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq); - msg_seq = nla_get_u32(nltb[IWPM_NLA_QUERY_MAPPING_SEQ]); + msg_seq = nla_get_u32(nltb[IWPM_NLA_RQUERY_MAPPING_SEQ]); nlmsg_request = iwpm_find_nlmsg_request(msg_seq); if (!nlmsg_request) { pr_info("%s: Could not find a matching request (seq = %u)\n", @@ -511,9 +578,9 @@ int iwpm_add_and_query_mapping_cb(struct sk_buff *skb, } pm_msg = nlmsg_request->req_buffer; local_sockaddr = (struct sockaddr_storage *) - nla_data(nltb[IWPM_NLA_QUERY_LOCAL_ADDR]); + nla_data(nltb[IWPM_NLA_RQUERY_LOCAL_ADDR]); remote_sockaddr = (struct sockaddr_storage *) - nla_data(nltb[IWPM_NLA_QUERY_REMOTE_ADDR]); + nla_data(nltb[IWPM_NLA_RQUERY_REMOTE_ADDR]); mapped_loc_sockaddr = (struct sockaddr_storage *) nla_data(nltb[IWPM_NLA_RQUERY_MAPPED_LOC_ADDR]); mapped_rem_sockaddr = (struct sockaddr_storage *) @@ -560,9 +627,13 @@ query_mapping_response_exit: return 0; } -/* - * iwpm_remote_info_cb - Process a port mapper message, containing - * the remote connecting peer address info +/** + * iwpm_remote_info_cb - Process remote connecting peer address info, which + * the port mapper has received from the connecting peer + * @skb: + * @cb: Contains the received message (payload and netlink header) + * + * Stores the IPv4/IPv6 address info in a hash table */ int iwpm_remote_info_cb(struct sk_buff *skb, struct netlink_callback *cb) { @@ -588,9 +659,9 @@ int iwpm_remote_info_cb(struct sk_buff *skb, struct netlink_callback *cb) atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq); local_sockaddr = (struct sockaddr_storage *) - nla_data(nltb[IWPM_NLA_QUERY_LOCAL_ADDR]); + nla_data(nltb[IWPM_NLA_RQUERY_LOCAL_ADDR]); remote_sockaddr = (struct sockaddr_storage *) - nla_data(nltb[IWPM_NLA_QUERY_REMOTE_ADDR]); + nla_data(nltb[IWPM_NLA_RQUERY_REMOTE_ADDR]); mapped_loc_sockaddr = (struct sockaddr_storage *) nla_data(nltb[IWPM_NLA_RQUERY_MAPPED_LOC_ADDR]); mapped_rem_sockaddr = (struct sockaddr_storage *) @@ -635,8 +706,14 @@ static const struct nla_policy resp_mapinfo_policy[IWPM_NLA_MAPINFO_REQ_MAX] = { [IWPM_NLA_MAPINFO_ULIB_VER] = { .type = NLA_U16 } }; -/* - * iwpm_mapping_info_cb - Process a port mapper request for mapping info +/** + * iwpm_mapping_info_cb - Process a notification that the userspace + * port mapper daemon is started + * @skb: + * @cb: Contains the received message (payload and netlink header) + * + * Using the received port mapper pid, send all the local mapping + * info records to the userspace port mapper */ int iwpm_mapping_info_cb(struct sk_buff *skb, struct netlink_callback *cb) { @@ -655,7 +732,7 @@ int iwpm_mapping_info_cb(struct sk_buff *skb, struct netlink_callback *cb) iwpm_name = (char *)nla_data(nltb[IWPM_NLA_MAPINFO_ULIB_NAME]); iwpm_version = nla_get_u16(nltb[IWPM_NLA_MAPINFO_ULIB_VER]); if (strcmp(iwpm_ulib_name, iwpm_name) || - iwpm_version != iwpm_ulib_version) { + iwpm_version < IWPM_UABI_VERSION_MIN) { pr_info("%s: Invalid port mapper name = %s version = %d\n", __func__, iwpm_name, iwpm_version); return ret; @@ -669,6 +746,11 @@ int iwpm_mapping_info_cb(struct sk_buff *skb, struct netlink_callback *cb) iwpm_set_registration(nl_client, IWPM_REG_INCOMPL); atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq); iwpm_user_pid = cb->nlh->nlmsg_pid; + + if (iwpm_ulib_version < IWPM_UABI_VERSION) + pr_warn_once("%s: Down level iwpmd/pid %u. Continuing...", + __func__, iwpm_user_pid); + if (!iwpm_mapinfo_available()) return 0; pr_debug("%s: iWarp Port Mapper (pid = %d) is available!\n", @@ -684,9 +766,11 @@ static const struct nla_policy ack_mapinfo_policy[IWPM_NLA_MAPINFO_NUM_MAX] = { [IWPM_NLA_MAPINFO_ACK_NUM] = { .type = NLA_U32 } }; -/* - * iwpm_ack_mapping_info_cb - Process a port mapper ack for - * the provided mapping info records +/** + * iwpm_ack_mapping_info_cb - Process the port mapper ack for + * the provided local mapping info records + * @skb: + * @cb: Contains the received message (payload and netlink header) */ int iwpm_ack_mapping_info_cb(struct sk_buff *skb, struct netlink_callback *cb) { @@ -712,8 +796,11 @@ static const struct nla_policy map_error_policy[IWPM_NLA_ERR_MAX] = { [IWPM_NLA_ERR_CODE] = { .type = NLA_U16 }, }; -/* - * iwpm_mapping_error_cb - Process a port mapper error message +/** + * iwpm_mapping_error_cb - Process port mapper notification for error + * + * @skb: + * @cb: Contains the received message (payload and netlink header) */ int iwpm_mapping_error_cb(struct sk_buff *skb, struct netlink_callback *cb) { @@ -748,3 +835,46 @@ int iwpm_mapping_error_cb(struct sk_buff *skb, struct netlink_callback *cb) up(&nlmsg_request->sem); return 0; } + +/* netlink attribute policy for the received hello request */ +static const struct nla_policy hello_policy[IWPM_NLA_HELLO_MAX] = { + [IWPM_NLA_HELLO_ABI_VERSION] = { .type = NLA_U16 } +}; + +/** + * iwpm_hello_cb - Process a hello message from iwpmd + * + * @skb: + * @cb: Contains the received message (payload and netlink header) + * + * Using the received port mapper pid, send the kernel's abi_version + * after adjusting it to support the iwpmd version. + */ +int iwpm_hello_cb(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct nlattr *nltb[IWPM_NLA_HELLO_MAX]; + const char *msg_type = "Hello request"; + u8 nl_client; + u16 abi_version; + int ret = -EINVAL; + + if (iwpm_parse_nlmsg(cb, IWPM_NLA_HELLO_MAX, hello_policy, nltb, + msg_type)) { + pr_info("%s: Unable to parse nlmsg\n", __func__); + return ret; + } + abi_version = nla_get_u16(nltb[IWPM_NLA_HELLO_ABI_VERSION]); + nl_client = RDMA_NL_GET_CLIENT(cb->nlh->nlmsg_type); + if (!iwpm_valid_client(nl_client)) { + pr_info("%s: Invalid port mapper client = %d\n", + __func__, nl_client); + return ret; + } + iwpm_set_registration(nl_client, IWPM_REG_INCOMPL); + atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq); + iwpm_ulib_version = min_t(u16, IWPM_UABI_VERSION, abi_version); + pr_debug("Using ABI version %u\n", iwpm_ulib_version); + iwpm_user_pid = cb->nlh->nlmsg_pid; + ret = iwpm_send_hello(nl_client, iwpm_user_pid, iwpm_ulib_version); + return ret; +} diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c index cdb63f3f4de7..7b97f6e2075f 100644 --- a/drivers/infiniband/core/iwpm_util.c +++ b/drivers/infiniband/core/iwpm_util.c @@ -51,6 +51,12 @@ static DEFINE_SPINLOCK(iwpm_reminfo_lock); static DEFINE_MUTEX(iwpm_admin_lock); static struct iwpm_admin_data iwpm_admin; +/** + * iwpm_init - Allocate resources for the iwarp port mapper + * @nl_client: The index of the netlink client + * + * Should be called when network interface goes up. + */ int iwpm_init(u8 nl_client) { int ret = 0; @@ -87,6 +93,12 @@ init_exit: static void free_hash_bucket(void); static void free_reminfo_bucket(void); +/** + * iwpm_exit - Deallocate resources for the iwarp port mapper + * @nl_client: The index of the netlink client + * + * Should be called when network interface goes down. + */ int iwpm_exit(u8 nl_client) { @@ -112,9 +124,17 @@ int iwpm_exit(u8 nl_client) static struct hlist_head *get_mapinfo_hash_bucket(struct sockaddr_storage *, struct sockaddr_storage *); +/** + * iwpm_create_mapinfo - Store local and mapped IPv4/IPv6 address + * info in a hash table + * @local_addr: Local ip/tcp address + * @mapped_addr: Mapped local ip/tcp address + * @nl_client: The index of the netlink client + * @map_flags: IWPM mapping flags + */ int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr, struct sockaddr_storage *mapped_sockaddr, - u8 nl_client) + u8 nl_client, u32 map_flags) { struct hlist_head *hash_bucket_head = NULL; struct iwpm_mapping_info *map_info; @@ -132,6 +152,7 @@ int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr, memcpy(&map_info->mapped_sockaddr, mapped_sockaddr, sizeof(struct sockaddr_storage)); map_info->nl_client = nl_client; + map_info->map_flags = map_flags; spin_lock_irqsave(&iwpm_mapinfo_lock, flags); if (iwpm_hash_bucket) { @@ -150,6 +171,15 @@ int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr, return ret; } +/** + * iwpm_remove_mapinfo - Remove local and mapped IPv4/IPv6 address + * info from the hash table + * @local_addr: Local ip/tcp address + * @mapped_local_addr: Mapped local ip/tcp address + * + * Returns err code if mapping info is not found in the hash table, + * otherwise returns 0 + */ int iwpm_remove_mapinfo(struct sockaddr_storage *local_sockaddr, struct sockaddr_storage *mapped_local_addr) { @@ -250,6 +280,17 @@ void iwpm_add_remote_info(struct iwpm_remote_info *rem_info) spin_unlock_irqrestore(&iwpm_reminfo_lock, flags); } +/** + * iwpm_get_remote_info - Get the remote connecting peer address info + * + * @mapped_loc_addr: Mapped local address of the listening peer + * @mapped_rem_addr: Mapped remote address of the connecting peer + * @remote_addr: To store the remote address of the connecting peer + * @nl_client: The index of the netlink client + * + * The remote address info is retrieved and provided to the client in + * the remote_addr. After that it is removed from the hash table + */ int iwpm_get_remote_info(struct sockaddr_storage *mapped_loc_addr, struct sockaddr_storage *mapped_rem_addr, struct sockaddr_storage *remote_addr, @@ -686,6 +727,14 @@ int iwpm_send_mapinfo(u8 nl_client, int iwpm_pid) if (ret) goto send_mapping_info_unlock; + if (iwpm_ulib_version > IWPM_UABI_VERSION_MIN) { + ret = ibnl_put_attr(skb, nlh, sizeof(u32), + &map_info->map_flags, + IWPM_NLA_MAPINFO_FLAGS); + if (ret) + goto send_mapping_info_unlock; + } + nlmsg_end(skb, nlh); iwpm_print_sockaddr(&map_info->local_sockaddr, @@ -754,3 +803,40 @@ int iwpm_mapinfo_available(void) spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags); return full_bucket; } + +int iwpm_send_hello(u8 nl_client, int iwpm_pid, u16 abi_version) +{ + struct sk_buff *skb = NULL; + struct nlmsghdr *nlh; + u32 msg_seq; + const char *err_str = ""; + int ret = -EINVAL; + + skb = iwpm_create_nlmsg(RDMA_NL_IWPM_HELLO, &nlh, nl_client); + if (!skb) { + err_str = "Unable to create a nlmsg"; + goto hello_num_error; + } + nlh->nlmsg_seq = iwpm_get_nlmsg_seq(); + msg_seq = 0; + err_str = "Unable to put attribute of abi_version into nlmsg"; + ret = ibnl_put_attr(skb, nlh, sizeof(u16), &abi_version, + IWPM_NLA_HELLO_ABI_VERSION); + if (ret) + goto hello_num_error; + nlmsg_end(skb, nlh); + + ret = rdma_nl_unicast(skb, iwpm_pid); + if (ret) { + skb = NULL; + err_str = "Unable to send a nlmsg"; + goto hello_num_error; + } + pr_debug("%s: Sent hello abi_version = %u\n", __func__, abi_version); + return 0; +hello_num_error: + pr_info("%s: %s\n", __func__, err_str); + if (skb) + dev_kfree_skb(skb); + return ret; +} diff --git a/drivers/infiniband/core/iwpm_util.h b/drivers/infiniband/core/iwpm_util.h index af1fc14a0d3d..7e2bcc72f66c 100644 --- a/drivers/infiniband/core/iwpm_util.h +++ b/drivers/infiniband/core/iwpm_util.h @@ -78,6 +78,7 @@ struct iwpm_mapping_info { struct sockaddr_storage local_sockaddr; struct sockaddr_storage mapped_sockaddr; u8 nl_client; + u32 map_flags; }; struct iwpm_remote_info { @@ -266,4 +267,15 @@ int iwpm_parse_nlmsg(struct netlink_callback *cb, int policy_max, * @msg: Message to print */ void iwpm_print_sockaddr(struct sockaddr_storage *sockaddr, char *msg); + +/** + * iwpm_send_hello - Send hello response to iwpmd + * + * @nl_client: The index of the netlink client + * @abi_version: The kernel's abi_version + * + * Returns 0 on success or a negative error code + */ +int iwpm_send_hello(u8 nl_client, int iwpm_pid, u16 abi_version); +extern u16 iwpm_ulib_version; #endif diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c index 724f5a62e82f..eecfc0b377c9 100644 --- a/drivers/infiniband/core/netlink.c +++ b/drivers/infiniband/core/netlink.c @@ -56,7 +56,6 @@ EXPORT_SYMBOL(rdma_nl_chk_listeners); static bool is_nl_msg_valid(unsigned int type, unsigned int op) { static const unsigned int max_num_ops[RDMA_NL_NUM_CLIENTS] = { - [RDMA_NL_RDMA_CM] = RDMA_NL_RDMA_CM_NUM_OPS, [RDMA_NL_IWCM] = RDMA_NL_IWPM_NUM_OPS, [RDMA_NL_LS] = RDMA_NL_LS_NUM_OPS, [RDMA_NL_NLDEV] = RDMA_NLDEV_NUM_OPS, @@ -181,8 +180,7 @@ static int rdma_nl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, return -EINVAL; } /* FIXME: Convert IWCM to properly handle doit callbacks */ - if ((nlh->nlmsg_flags & NLM_F_DUMP) || index == RDMA_NL_RDMA_CM || - index == RDMA_NL_IWCM) { + if ((nlh->nlmsg_flags & NLM_F_DUMP) || index == RDMA_NL_IWCM) { struct netlink_dump_control c = { .dump = cb_table[op].dump, }; diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index e600fc23ae62..5601fa968244 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -314,7 +314,6 @@ static int fill_res_info(struct sk_buff *msg, struct ib_device *device) [RDMA_RESTRACK_CTX] = "ctx", }; - struct rdma_restrack_root *res = &device->res; struct nlattr *table_attr; int ret, i, curr; @@ -328,7 +327,8 @@ static int fill_res_info(struct sk_buff *msg, struct ib_device *device) for (i = 0; i < RDMA_RESTRACK_MAX; i++) { if (!names[i]) continue; - curr = rdma_restrack_count(res, i, task_active_pid_ns(current)); + curr = rdma_restrack_count(device, i, + task_active_pid_ns(current)); ret = fill_res_info_entry(msg, names[i], curr); if (ret) goto err; @@ -361,11 +361,19 @@ static int fill_res_name_pid(struct sk_buff *msg, return 0; } -static int fill_res_qp_entry(struct sk_buff *msg, struct netlink_callback *cb, +static bool fill_res_entry(struct ib_device *dev, struct sk_buff *msg, + struct rdma_restrack_entry *res) +{ + if (!dev->ops.fill_res_entry) + return false; + return dev->ops.fill_res_entry(msg, res); +} + +static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin, struct rdma_restrack_entry *res, uint32_t port) { struct ib_qp *qp = container_of(res, struct ib_qp, res); - struct rdma_restrack_root *resroot = &qp->device->res; + struct ib_device *dev = qp->device; struct ib_qp_init_attr qp_init_attr; struct nlattr *entry_attr; struct ib_qp_attr qp_attr; @@ -415,7 +423,7 @@ static int fill_res_qp_entry(struct sk_buff *msg, struct netlink_callback *cb, if (fill_res_name_pid(msg, res)) goto err; - if (resroot->fill_res_entry(msg, res)) + if (fill_res_entry(dev, msg, res)) goto err; nla_nest_end(msg, entry_attr); @@ -427,13 +435,12 @@ out: return -EMSGSIZE; } -static int fill_res_cm_id_entry(struct sk_buff *msg, - struct netlink_callback *cb, +static int fill_res_cm_id_entry(struct sk_buff *msg, bool has_cap_net_admin, struct rdma_restrack_entry *res, uint32_t port) { struct rdma_id_private *id_priv = container_of(res, struct rdma_id_private, res); - struct rdma_restrack_root *resroot = &id_priv->id.device->res; + struct ib_device *dev = id_priv->id.device; struct rdma_cm_id *cm_id = &id_priv->id; struct nlattr *entry_attr; @@ -475,7 +482,7 @@ static int fill_res_cm_id_entry(struct sk_buff *msg, if (fill_res_name_pid(msg, res)) goto err; - if (resroot->fill_res_entry(msg, res)) + if (fill_res_entry(dev, msg, res)) goto err; nla_nest_end(msg, entry_attr); @@ -487,11 +494,11 @@ out: return -EMSGSIZE; } -static int fill_res_cq_entry(struct sk_buff *msg, struct netlink_callback *cb, +static int fill_res_cq_entry(struct sk_buff *msg, bool has_cap_net_admin, struct rdma_restrack_entry *res, uint32_t port) { struct ib_cq *cq = container_of(res, struct ib_cq, res); - struct rdma_restrack_root *resroot = &cq->device->res; + struct ib_device *dev = cq->device; struct nlattr *entry_attr; entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_CQ_ENTRY); @@ -512,7 +519,7 @@ static int fill_res_cq_entry(struct sk_buff *msg, struct netlink_callback *cb, if (fill_res_name_pid(msg, res)) goto err; - if (resroot->fill_res_entry(msg, res)) + if (fill_res_entry(dev, msg, res)) goto err; nla_nest_end(msg, entry_attr); @@ -524,18 +531,18 @@ out: return -EMSGSIZE; } -static int fill_res_mr_entry(struct sk_buff *msg, struct netlink_callback *cb, +static int fill_res_mr_entry(struct sk_buff *msg, bool has_cap_net_admin, struct rdma_restrack_entry *res, uint32_t port) { struct ib_mr *mr = container_of(res, struct ib_mr, res); - struct rdma_restrack_root *resroot = &mr->pd->device->res; + struct ib_device *dev = mr->pd->device; struct nlattr *entry_attr; entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_MR_ENTRY); if (!entry_attr) goto out; - if (netlink_capable(cb->skb, CAP_NET_ADMIN)) { + if (has_cap_net_admin) { if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RKEY, mr->rkey)) goto err; if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LKEY, mr->lkey)) @@ -549,7 +556,7 @@ static int fill_res_mr_entry(struct sk_buff *msg, struct netlink_callback *cb, if (fill_res_name_pid(msg, res)) goto err; - if (resroot->fill_res_entry(msg, res)) + if (fill_res_entry(dev, msg, res)) goto err; nla_nest_end(msg, entry_attr); @@ -561,18 +568,18 @@ out: return -EMSGSIZE; } -static int fill_res_pd_entry(struct sk_buff *msg, struct netlink_callback *cb, +static int fill_res_pd_entry(struct sk_buff *msg, bool has_cap_net_admin, struct rdma_restrack_entry *res, uint32_t port) { struct ib_pd *pd = container_of(res, struct ib_pd, res); - struct rdma_restrack_root *resroot = &pd->device->res; + struct ib_device *dev = pd->device; struct nlattr *entry_attr; entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_PD_ENTRY); if (!entry_attr) goto out; - if (netlink_capable(cb->skb, CAP_NET_ADMIN)) { + if (has_cap_net_admin) { if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY, pd->local_dma_lkey)) goto err; @@ -584,15 +591,11 @@ static int fill_res_pd_entry(struct sk_buff *msg, struct netlink_callback *cb, if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT, atomic_read(&pd->usecnt), RDMA_NLDEV_ATTR_PAD)) goto err; - if ((pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) && - nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY, - pd->unsafe_global_rkey)) - goto err; if (fill_res_name_pid(msg, res)) goto err; - if (resroot->fill_res_entry(msg, res)) + if (fill_res_entry(dev, msg, res)) goto err; nla_nest_end(msg, entry_attr); @@ -909,7 +912,7 @@ static int nldev_res_get_dumpit(struct sk_buff *skb, } struct nldev_fill_res_entry { - int (*fill_res_func)(struct sk_buff *msg, struct netlink_callback *cb, + int (*fill_res_func)(struct sk_buff *msg, bool has_cap_net_admin, struct rdma_restrack_entry *res, u32 port); enum rdma_nldev_attr nldev_attr; enum rdma_nldev_command nldev_cmd; @@ -943,6 +946,17 @@ static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = { }, }; +static bool is_visible_in_pid_ns(struct rdma_restrack_entry *res) +{ + /* + * 1. Kern resources should be visible in init name space only + * 2. Present only resources visible in the current namespace + */ + if (rdma_is_kernel_res(res)) + return task_active_pid_ns(current) == &init_pid_ns; + return task_active_pid_ns(current) == task_active_pid_ns(res->task); +} + static int res_get_common_dumpit(struct sk_buff *skb, struct netlink_callback *cb, enum rdma_restrack_type res_type) @@ -954,6 +968,7 @@ static int res_get_common_dumpit(struct sk_buff *skb, struct nlattr *table_attr; struct ib_device *device; int start = cb->args[0]; + bool has_cap_net_admin; struct nlmsghdr *nlh; u32 index, port = 0; bool filled = false; @@ -1002,21 +1017,14 @@ static int res_get_common_dumpit(struct sk_buff *skb, goto err; } + has_cap_net_admin = netlink_capable(cb->skb, CAP_NET_ADMIN); + down_read(&device->res.rwsem); hash_for_each_possible(device->res.hash, res, node, res_type) { if (idx < start) goto next; - if ((rdma_is_kernel_res(res) && - task_active_pid_ns(current) != &init_pid_ns) || - (!rdma_is_kernel_res(res) && task_active_pid_ns(current) != - task_active_pid_ns(res->task))) - /* - * 1. Kern resources should be visible in init - * namspace only - * 2. Present only resources visible in the current - * namespace - */ + if (!is_visible_in_pid_ns(res)) goto next; if (!rdma_restrack_get(res)) @@ -1030,7 +1038,7 @@ static int res_get_common_dumpit(struct sk_buff *skb, filled = true; up_read(&device->res.rwsem); - ret = fe->fill_res_func(skb, cb, res, port); + ret = fe->fill_res_func(skb, has_cap_net_admin, res, port); down_read(&device->res.rwsem); /* * Return resource back, but it won't be released till @@ -1077,35 +1085,18 @@ err_index: return ret; } -static int nldev_res_get_qp_dumpit(struct sk_buff *skb, - struct netlink_callback *cb) -{ - return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_QP); -} - -static int nldev_res_get_cm_id_dumpit(struct sk_buff *skb, - struct netlink_callback *cb) -{ - return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_CM_ID); -} - -static int nldev_res_get_cq_dumpit(struct sk_buff *skb, - struct netlink_callback *cb) -{ - return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_CQ); -} - -static int nldev_res_get_mr_dumpit(struct sk_buff *skb, - struct netlink_callback *cb) -{ - return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_MR); -} +#define RES_GET_FUNCS(name, type) \ + static int nldev_res_get_##name##_dumpit(struct sk_buff *skb, \ + struct netlink_callback *cb) \ + { \ + return res_get_common_dumpit(skb, cb, type); \ + } -static int nldev_res_get_pd_dumpit(struct sk_buff *skb, - struct netlink_callback *cb) -{ - return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_PD); -} +RES_GET_FUNCS(qp, RDMA_RESTRACK_QP); +RES_GET_FUNCS(cm_id, RDMA_RESTRACK_CM_ID); +RES_GET_FUNCS(cq, RDMA_RESTRACK_CQ); +RES_GET_FUNCS(pd, RDMA_RESTRACK_PD); +RES_GET_FUNCS(mr, RDMA_RESTRACK_MR); static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = { [RDMA_NLDEV_CMD_GET] = { diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h index be6b8e1257d0..69f8db66925e 100644 --- a/drivers/infiniband/core/rdma_core.h +++ b/drivers/infiniband/core/rdma_core.h @@ -106,6 +106,8 @@ int uverbs_finalize_object(struct ib_uobject *uobj, enum uverbs_obj_access access, bool commit); +int uverbs_output_written(const struct uverbs_attr_bundle *bundle, size_t idx); + void setup_ufile_idr_uobject(struct ib_uverbs_file *ufile); void release_ufile_idr_uobject(struct ib_uverbs_file *ufile); diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c index 46a5c553c624..f80b37d437ac 100644 --- a/drivers/infiniband/core/restrack.c +++ b/drivers/infiniband/core/restrack.c @@ -12,16 +12,15 @@ #include "cma_priv.h" -static int fill_res_noop(struct sk_buff *msg, - struct rdma_restrack_entry *entry) +/** + * rdma_restrack_init() - initialize resource tracking + * @dev: IB device + */ +void rdma_restrack_init(struct ib_device *dev) { - return 0; -} + struct rdma_restrack_root *res = &dev->res; -void rdma_restrack_init(struct rdma_restrack_root *res) -{ init_rwsem(&res->rwsem); - res->fill_res_entry = fill_res_noop; } static const char *type2str(enum rdma_restrack_type type) @@ -38,11 +37,15 @@ static const char *type2str(enum rdma_restrack_type type) return names[type]; }; -void rdma_restrack_clean(struct rdma_restrack_root *res) +/** + * rdma_restrack_clean() - clean resource tracking + * @dev: IB device + */ +void rdma_restrack_clean(struct ib_device *dev) { + struct rdma_restrack_root *res = &dev->res; struct rdma_restrack_entry *e; char buf[TASK_COMM_LEN]; - struct ib_device *dev; const char *owner; int bkt; @@ -72,10 +75,16 @@ void rdma_restrack_clean(struct rdma_restrack_root *res) pr_err("restrack: %s", CUT_HERE); } -int rdma_restrack_count(struct rdma_restrack_root *res, - enum rdma_restrack_type type, +/** + * rdma_restrack_count() - the current usage of specific object + * @dev: IB device + * @type: actual type of object to operate + * @ns: PID namespace + */ +int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type, struct pid_namespace *ns) { + struct rdma_restrack_root *res = &dev->res; struct rdma_restrack_entry *e; u32 cnt = 0; @@ -161,17 +170,6 @@ static void rdma_restrack_add(struct rdma_restrack_entry *res) if (!dev) return; - if (res->type != RDMA_RESTRACK_CM_ID || rdma_is_kernel_res(res)) - res->task = NULL; - - if (!rdma_is_kernel_res(res)) { - if (!res->task) - rdma_restrack_set_task(res, NULL); - res->kern_name = NULL; - } else { - set_kern_name(res); - } - kref_init(&res->kref); init_completion(&res->comp); res->valid = true; @@ -187,6 +185,8 @@ static void rdma_restrack_add(struct rdma_restrack_entry *res) */ void rdma_restrack_kadd(struct rdma_restrack_entry *res) { + res->task = NULL; + set_kern_name(res); res->user = false; rdma_restrack_add(res); } @@ -198,6 +198,13 @@ EXPORT_SYMBOL(rdma_restrack_kadd); */ void rdma_restrack_uadd(struct rdma_restrack_entry *res) { + if (res->type != RDMA_RESTRACK_CM_ID) + res->task = NULL; + + if (!res->task) + rdma_restrack_set_task(res, NULL); + res->kern_name = NULL; + res->user = true; rdma_restrack_add(res); } diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c index d22c4a2ebac6..89a5be3a2f97 100644 --- a/drivers/infiniband/core/rw.c +++ b/drivers/infiniband/core/rw.c @@ -179,7 +179,6 @@ static int rdma_rw_init_map_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp, struct scatterlist *sg, u32 sg_cnt, u32 offset, u64 remote_addr, u32 rkey, enum dma_data_direction dir) { - struct ib_device *dev = qp->pd->device; u32 max_sge = dir == DMA_TO_DEVICE ? qp->max_write_sge : qp->max_read_sge; struct ib_sge *sge; @@ -209,8 +208,8 @@ static int rdma_rw_init_map_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp, rdma_wr->wr.sg_list = sge; for (j = 0; j < nr_sge; j++, sg = sg_next(sg)) { - sge->addr = ib_sg_dma_address(dev, sg) + offset; - sge->length = ib_sg_dma_len(dev, sg) - offset; + sge->addr = sg_dma_address(sg) + offset; + sge->length = sg_dma_len(sg) - offset; sge->lkey = qp->pd->local_dma_lkey; total_len += sge->length; @@ -236,14 +235,13 @@ static int rdma_rw_init_single_wr(struct rdma_rw_ctx *ctx, struct ib_qp *qp, struct scatterlist *sg, u32 offset, u64 remote_addr, u32 rkey, enum dma_data_direction dir) { - struct ib_device *dev = qp->pd->device; struct ib_rdma_wr *rdma_wr = &ctx->single.wr; ctx->nr_ops = 1; ctx->single.sge.lkey = qp->pd->local_dma_lkey; - ctx->single.sge.addr = ib_sg_dma_address(dev, sg) + offset; - ctx->single.sge.length = ib_sg_dma_len(dev, sg) - offset; + ctx->single.sge.addr = sg_dma_address(sg) + offset; + ctx->single.sge.length = sg_dma_len(sg) - offset; memset(rdma_wr, 0, sizeof(*rdma_wr)); if (dir == DMA_TO_DEVICE) @@ -294,7 +292,7 @@ int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num, * Skip to the S/G entry that sg_offset falls into: */ for (;;) { - u32 len = ib_sg_dma_len(dev, sg); + u32 len = sg_dma_len(sg); if (sg_offset < len) break; diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 1efe0a74e06b..b69d3efa8712 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -165,15 +165,12 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr, lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; - down_write(&mm->mmap_sem); - if (check_add_overflow(mm->pinned_vm, npages, &new_pinned) || - (new_pinned > lock_limit && !capable(CAP_IPC_LOCK))) { - up_write(&mm->mmap_sem); + new_pinned = atomic64_add_return(npages, &mm->pinned_vm); + if (new_pinned > lock_limit && !capable(CAP_IPC_LOCK)) { + atomic64_sub(npages, &mm->pinned_vm); ret = -ENOMEM; goto out; } - mm->pinned_vm = new_pinned; - up_write(&mm->mmap_sem); cur_base = addr & PAGE_MASK; @@ -233,9 +230,7 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr, umem_release: __ib_umem_release(context->device, umem, 0); vma: - down_write(&mm->mmap_sem); - mm->pinned_vm -= ib_umem_num_pages(umem); - up_write(&mm->mmap_sem); + atomic64_sub(ib_umem_num_pages(umem), &mm->pinned_vm); out: if (vma_list) free_page((unsigned long) vma_list); @@ -258,25 +253,12 @@ static void __ib_umem_release_tail(struct ib_umem *umem) kfree(umem); } -static void ib_umem_release_defer(struct work_struct *work) -{ - struct ib_umem *umem = container_of(work, struct ib_umem, work); - - down_write(&umem->owning_mm->mmap_sem); - umem->owning_mm->pinned_vm -= ib_umem_num_pages(umem); - up_write(&umem->owning_mm->mmap_sem); - - __ib_umem_release_tail(umem); -} - /** * ib_umem_release - release memory pinned with ib_umem_get * @umem: umem struct to release */ void ib_umem_release(struct ib_umem *umem) { - struct ib_ucontext *context = umem->context; - if (umem->is_odp) { ib_umem_odp_release(to_ib_umem_odp(umem)); __ib_umem_release_tail(umem); @@ -285,26 +267,7 @@ void ib_umem_release(struct ib_umem *umem) __ib_umem_release(umem->context->device, umem, 1); - /* - * We may be called with the mm's mmap_sem already held. This - * can happen when a userspace munmap() is the call that drops - * the last reference to our file and calls our release - * method. If there are memory regions to destroy, we'll end - * up here and not be able to take the mmap_sem. In that case - * we defer the vm_locked accounting a workqueue. - */ - if (context->closing) { - if (!down_write_trylock(&umem->owning_mm->mmap_sem)) { - INIT_WORK(&umem->work, ib_umem_release_defer); - queue_work(ib_wq, &umem->work); - return; - } - } else { - down_write(&umem->owning_mm->mmap_sem); - } - umem->owning_mm->pinned_vm -= ib_umem_num_pages(umem); - up_write(&umem->owning_mm->mmap_sem); - + atomic64_sub(ib_umem_num_pages(umem), &umem->owning_mm->pinned_vm); __ib_umem_release_tail(umem); } EXPORT_SYMBOL(ib_umem_release); diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index eb8a5eb65bfa..012044f16d1c 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -353,6 +353,8 @@ struct ib_umem_odp *ib_alloc_odp_umem(struct ib_umem_odp *root, umem->writable = root->umem.writable; umem->is_odp = 1; odp_data->per_mm = per_mm; + umem->owning_mm = per_mm->mm; + mmgrab(umem->owning_mm); mutex_init(&odp_data->umem_mutex); init_completion(&odp_data->notifier_completion); @@ -385,6 +387,7 @@ struct ib_umem_odp *ib_alloc_odp_umem(struct ib_umem_odp *root, out_page_list: vfree(odp_data->page_list); out_odp_data: + mmdrop(umem->owning_mm); kfree(odp_data); return ERR_PTR(ret); } diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index d4f1a2ef5015..aa260cafbd85 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -60,6 +60,10 @@ static int uverbs_response(struct uverbs_attr_bundle *attrs, const void *resp, { int ret; + if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_CORE_OUT)) + return uverbs_copy_to_struct_or_zero( + attrs, UVERBS_ATTR_CORE_OUT, resp, resp_len); + if (copy_to_user(attrs->ucore.outbuf, resp, min(attrs->ucore.outlen, resp_len))) return -EFAULT; @@ -1178,6 +1182,9 @@ static int ib_uverbs_poll_cq(struct uverbs_attr_bundle *attrs) goto out_put; } + if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_CORE_OUT)) + ret = uverbs_output_written(attrs, UVERBS_ATTR_CORE_OUT); + ret = 0; out_put: @@ -2009,8 +2016,10 @@ static int ib_uverbs_post_send(struct uverbs_attr_bundle *attrs) return -ENOMEM; qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs); - if (!qp) + if (!qp) { + ret = -EINVAL; goto out; + } is_ud = qp->qp_type == IB_QPT_UD; sg_ind = 0; @@ -3613,6 +3622,7 @@ static int ib_uverbs_ex_query_device(struct uverbs_attr_bundle *attrs) attr.odp_caps.per_transport_caps.uc_odp_caps; resp.odp_caps.per_transport_caps.ud_odp_caps = attr.odp_caps.per_transport_caps.ud_odp_caps; + resp.xrc_odp_caps = attr.odp_caps.per_transport_caps.xrc_odp_caps; resp.timestamp_mask = attr.timestamp_mask; resp.hca_core_clock = attr.hca_core_clock; diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c index 8c81ff698052..0ca04d224015 100644 --- a/drivers/infiniband/core/uverbs_ioctl.c +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -144,6 +144,21 @@ static bool uverbs_is_attr_cleared(const struct ib_uverbs_attr *uattr, 0, uattr->len - len); } +static int uverbs_set_output(const struct uverbs_attr_bundle *bundle, + const struct uverbs_attr *attr) +{ + struct bundle_priv *pbundle = + container_of(bundle, struct bundle_priv, bundle); + u16 flags; + + flags = pbundle->uattrs[attr->ptr_attr.uattr_idx].flags | + UVERBS_ATTR_F_VALID_OUTPUT; + if (put_user(flags, + &pbundle->user_attrs[attr->ptr_attr.uattr_idx].flags)) + return -EFAULT; + return 0; +} + static int uverbs_process_idrs_array(struct bundle_priv *pbundle, const struct uverbs_api_attr *attr_uapi, struct uverbs_objs_arr_attr *attr, @@ -456,6 +471,19 @@ static int ib_uverbs_run_method(struct bundle_priv *pbundle, } /* + * Until the drivers are revised to use the bundle directly we have to + * assume that the driver wrote to its UHW_OUT and flag userspace + * appropriately. + */ + if (!ret && pbundle->method_elm->has_udata) { + const struct uverbs_attr *attr = + uverbs_attr_get(&pbundle->bundle, UVERBS_ATTR_UHW_OUT); + + if (!IS_ERR(attr)) + ret = uverbs_set_output(&pbundle->bundle, attr); + } + + /* * EPROTONOSUPPORT is ONLY to be returned if the ioctl framework can * not invoke the method because the request is not supported. No * other cases should return this code. @@ -706,10 +734,7 @@ void uverbs_fill_udata(struct uverbs_attr_bundle *bundle, int uverbs_copy_to(const struct uverbs_attr_bundle *bundle, size_t idx, const void *from, size_t size) { - struct bundle_priv *pbundle = - container_of(bundle, struct bundle_priv, bundle); const struct uverbs_attr *attr = uverbs_attr_get(bundle, idx); - u16 flags; size_t min_size; if (IS_ERR(attr)) @@ -719,16 +744,25 @@ int uverbs_copy_to(const struct uverbs_attr_bundle *bundle, size_t idx, if (copy_to_user(u64_to_user_ptr(attr->ptr_attr.data), from, min_size)) return -EFAULT; - flags = pbundle->uattrs[attr->ptr_attr.uattr_idx].flags | - UVERBS_ATTR_F_VALID_OUTPUT; - if (put_user(flags, - &pbundle->user_attrs[attr->ptr_attr.uattr_idx].flags)) - return -EFAULT; - - return 0; + return uverbs_set_output(bundle, attr); } EXPORT_SYMBOL(uverbs_copy_to); + +/* + * This is only used if the caller has directly used copy_to_use to write the + * data. It signals to user space that the buffer is filled in. + */ +int uverbs_output_written(const struct uverbs_attr_bundle *bundle, size_t idx) +{ + const struct uverbs_attr *attr = uverbs_attr_get(bundle, idx); + + if (IS_ERR(attr)) + return PTR_ERR(attr); + + return uverbs_set_output(bundle, attr); +} + int _uverbs_get_const(s64 *to, const struct uverbs_attr_bundle *attrs_bundle, size_t idx, s64 lower_bound, u64 upper_bound, s64 *def_val) @@ -757,8 +791,10 @@ int uverbs_copy_to_struct_or_zero(const struct uverbs_attr_bundle *bundle, { const struct uverbs_attr *attr = uverbs_attr_get(bundle, idx); - if (clear_user(u64_to_user_ptr(attr->ptr_attr.data), - attr->ptr_attr.len)) - return -EFAULT; + if (size < attr->ptr_attr.len) { + if (clear_user(u64_to_user_ptr(attr->ptr_attr.data) + size, + attr->ptr_attr.len - size)) + return -EFAULT; + } return uverbs_copy_to(bundle, idx, from, size); } diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 996f167d1436..accc61cc93ac 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -228,6 +228,9 @@ void ib_uverbs_release_file(struct kref *ref) if (atomic_dec_and_test(&file->device->refcount)) ib_uverbs_comp_dev(file->device); + if (file->async_file) + kref_put(&file->async_file->ref, + ib_uverbs_release_async_event_file); put_device(&file->device->dev); kfree(file); } @@ -714,6 +717,7 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, buf += sizeof(hdr); + memset(bundle.attr_present, 0, sizeof(bundle.attr_present)); bundle.ufile = file; if (!method_elm->is_ex) { size_t in_len = hdr.in_words * 4 - sizeof(hdr); @@ -987,11 +991,19 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile) /* Get an arbitrary mm pointer that hasn't been cleaned yet */ mutex_lock(&ufile->umap_lock); - if (!list_empty(&ufile->umaps)) { - mm = list_first_entry(&ufile->umaps, - struct rdma_umap_priv, list) - ->vma->vm_mm; - mmget(mm); + while (!list_empty(&ufile->umaps)) { + int ret; + + priv = list_first_entry(&ufile->umaps, + struct rdma_umap_priv, list); + mm = priv->vma->vm_mm; + ret = mmget_not_zero(mm); + if (!ret) { + list_del_init(&priv->list); + mm = NULL; + continue; + } + break; } mutex_unlock(&ufile->umap_lock); if (!mm) @@ -1119,10 +1131,6 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp) list_del_init(&file->list); mutex_unlock(&file->device->lists_mutex); - if (file->async_file) - kref_put(&file->async_file->ref, - ib_uverbs_release_async_event_file); - kref_put(&file->ref, ib_uverbs_release_file); return 0; @@ -1151,6 +1159,7 @@ static const struct file_operations uverbs_mmap_fops = { static struct ib_client uverbs_client = { .name = "uverbs", + .no_kverbs_req = true, .add = ib_uverbs_add_one, .remove = ib_uverbs_remove_one }; diff --git a/drivers/infiniband/core/uverbs_std_types_device.c b/drivers/infiniband/core/uverbs_std_types_device.c index 5030ec480370..2a3f2f01028d 100644 --- a/drivers/infiniband/core/uverbs_std_types_device.c +++ b/drivers/infiniband/core/uverbs_std_types_device.c @@ -168,12 +168,18 @@ void copy_port_attr_to_resp(struct ib_port_attr *attr, static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_PORT)( struct uverbs_attr_bundle *attrs) { - struct ib_device *ib_dev = attrs->ufile->device->ib_dev; + struct ib_device *ib_dev; struct ib_port_attr attr = {}; struct ib_uverbs_query_port_resp_ex resp = {}; + struct ib_ucontext *ucontext; int ret; u8 port_num; + ucontext = ib_uverbs_get_ucontext(attrs); + if (IS_ERR(ucontext)) + return PTR_ERR(ucontext); + ib_dev = ucontext->device; + /* FIXME: Extend the UAPI_DEF_OBJ_NEEDS_FN stuff.. */ if (!ib_dev->ops.query_port) return -EOPNOTSUPP; diff --git a/drivers/infiniband/hw/bnxt_re/Kconfig b/drivers/infiniband/hw/bnxt_re/Kconfig index 18f5ed082f41..be2fdad248eb 100644 --- a/drivers/infiniband/hw/bnxt_re/Kconfig +++ b/drivers/infiniband/hw/bnxt_re/Kconfig @@ -1,5 +1,6 @@ config INFINIBAND_BNXT_RE tristate "Broadcom Netxtreme HCA support" + depends on 64BIT depends on ETHERNET && NETDEVICES && PCI && INET && DCB depends on MAY_USE_DEVLINK select NET_VENDOR_BROADCOM diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h index 31baa8939a4f..e55a1666c0cd 100644 --- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h +++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h @@ -124,6 +124,7 @@ struct bnxt_re_dev { #define BNXT_RE_FLAG_ISSUE_ROCE_STATS 29 struct net_device *netdev; unsigned int version, major, minor; + struct bnxt_qplib_chip_ctx chip_ctx; struct bnxt_en_dev *en_dev; struct bnxt_msix_entry msix_entries[BNXT_RE_MAX_MSIX]; int num_msix; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 9bc637e49faa..1d7469e23cde 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -663,17 +663,36 @@ int bnxt_re_destroy_ah(struct ib_ah *ib_ah, u32 flags) return 0; } +static u8 bnxt_re_stack_to_dev_nw_type(enum rdma_network_type ntype) +{ + u8 nw_type; + + switch (ntype) { + case RDMA_NETWORK_IPV4: + nw_type = CMDQ_CREATE_AH_TYPE_V2IPV4; + break; + case RDMA_NETWORK_IPV6: + nw_type = CMDQ_CREATE_AH_TYPE_V2IPV6; + break; + default: + nw_type = CMDQ_CREATE_AH_TYPE_V1; + break; + } + return nw_type; +} + struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd, struct rdma_ah_attr *ah_attr, u32 flags, struct ib_udata *udata) { struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd); + const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); struct bnxt_re_dev *rdev = pd->rdev; + const struct ib_gid_attr *sgid_attr; struct bnxt_re_ah *ah; - const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); - int rc; u8 nw_type; + int rc; if (!(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH)) { dev_err(rdev_to_dev(rdev), "Failed to alloc AH: GRH not set"); @@ -700,28 +719,11 @@ struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd, ah->qplib_ah.flow_label = grh->flow_label; ah->qplib_ah.hop_limit = grh->hop_limit; ah->qplib_ah.sl = rdma_ah_get_sl(ah_attr); - if (udata && - !rdma_is_multicast_addr((struct in6_addr *) - grh->dgid.raw) && - !rdma_link_local_addr((struct in6_addr *) - grh->dgid.raw)) { - const struct ib_gid_attr *sgid_attr; - sgid_attr = grh->sgid_attr; - /* Get network header type for this GID */ - nw_type = rdma_gid_attr_network_type(sgid_attr); - switch (nw_type) { - case RDMA_NETWORK_IPV4: - ah->qplib_ah.nw_type = CMDQ_CREATE_AH_TYPE_V2IPV4; - break; - case RDMA_NETWORK_IPV6: - ah->qplib_ah.nw_type = CMDQ_CREATE_AH_TYPE_V2IPV6; - break; - default: - ah->qplib_ah.nw_type = CMDQ_CREATE_AH_TYPE_V1; - break; - } - } + sgid_attr = grh->sgid_attr; + /* Get network header type for this GID */ + nw_type = rdma_gid_attr_network_type(sgid_attr); + ah->qplib_ah.nw_type = bnxt_re_stack_to_dev_nw_type(nw_type); memcpy(ah->qplib_ah.dmac, ah_attr->roce.dmac, ETH_ALEN); rc = bnxt_qplib_create_ah(&rdev->qplib_res, &ah->qplib_ah, @@ -882,7 +884,7 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd, struct bnxt_re_qp_req ureq; struct bnxt_qplib_qp *qplib_qp = &qp->qplib_qp; struct ib_umem *umem; - int bytes = 0; + int bytes = 0, psn_sz; struct ib_ucontext *context = pd->ib_pd.uobject->context; struct bnxt_re_ucontext *cntx = container_of(context, struct bnxt_re_ucontext, @@ -892,8 +894,12 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd, bytes = (qplib_qp->sq.max_wqe * BNXT_QPLIB_MAX_SQE_ENTRY_SIZE); /* Consider mapping PSN search memory only for RC QPs. */ - if (qplib_qp->type == CMDQ_CREATE_QP_TYPE_RC) - bytes += (qplib_qp->sq.max_wqe * sizeof(struct sq_psn_search)); + if (qplib_qp->type == CMDQ_CREATE_QP_TYPE_RC) { + psn_sz = bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx) ? + sizeof(struct sq_psn_search_ext) : + sizeof(struct sq_psn_search); + bytes += (qplib_qp->sq.max_wqe * psn_sz); + } bytes = PAGE_ALIGN(bytes); umem = ib_umem_get(udata, ureq.qpsva, bytes, IB_ACCESS_LOCAL_WRITE, 1); if (IS_ERR(umem)) @@ -1065,12 +1071,17 @@ struct ib_qp *bnxt_re_create_qp(struct ib_pd *ib_pd, qp->qplib_qp.pd = &pd->qplib_pd; qp->qplib_qp.qp_handle = (u64)(unsigned long)(&qp->qplib_qp); qp->qplib_qp.type = __from_ib_qp_type(qp_init_attr->qp_type); + + if (qp_init_attr->qp_type == IB_QPT_GSI && + bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx)) + qp->qplib_qp.type = CMDQ_CREATE_QP_TYPE_GSI; if (qp->qplib_qp.type == IB_QPT_MAX) { dev_err(rdev_to_dev(rdev), "QP type 0x%x not supported", qp->qplib_qp.type); rc = -EINVAL; goto fail; } + qp->qplib_qp.max_inline_data = qp_init_attr->cap.max_inline_data; qp->qplib_qp.sig_type = ((qp_init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false); @@ -1131,7 +1142,8 @@ struct ib_qp *bnxt_re_create_qp(struct ib_pd *ib_pd, qp->qplib_qp.mtu = ib_mtu_enum_to_int(iboe_get_mtu(rdev->netdev->mtu)); - if (qp_init_attr->qp_type == IB_QPT_GSI) { + if (qp_init_attr->qp_type == IB_QPT_GSI && + !(bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx))) { /* Allocate 1 more than what's provided */ entries = roundup_pow_of_two(qp_init_attr->cap.max_send_wr + 1); qp->qplib_qp.sq.max_wqe = min_t(u32, entries, @@ -1644,6 +1656,9 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr, __from_ib_access_flags(qp_attr->qp_access_flags); /* LOCAL_WRITE access must be set to allow RC receive */ qp->qplib_qp.access |= BNXT_QPLIB_ACCESS_LOCAL_WRITE; + /* Temp: Set all params on QP as of now */ + qp->qplib_qp.access |= CMDQ_MODIFY_QP_ACCESS_REMOTE_WRITE; + qp->qplib_qp.access |= CMDQ_MODIFY_QP_ACCESS_REMOTE_READ; } if (qp_attr_mask & IB_QP_PKEY_INDEX) { qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_PKEY; @@ -2091,7 +2106,8 @@ static int bnxt_re_build_qp1_shadow_qp_recv(struct bnxt_re_qp *qp, static int is_ud_qp(struct bnxt_re_qp *qp) { - return qp->qplib_qp.type == CMDQ_CREATE_QP_TYPE_UD; + return (qp->qplib_qp.type == CMDQ_CREATE_QP_TYPE_UD || + qp->qplib_qp.type == CMDQ_CREATE_QP_TYPE_GSI); } static int bnxt_re_build_send_wqe(struct bnxt_re_qp *qp, @@ -2395,7 +2411,7 @@ int bnxt_re_post_send(struct ib_qp *ib_qp, const struct ib_send_wr *wr, switch (wr->opcode) { case IB_WR_SEND: case IB_WR_SEND_WITH_IMM: - if (ib_qp->qp_type == IB_QPT_GSI) { + if (qp->qplib_qp.type == CMDQ_CREATE_QP1_TYPE_GSI) { rc = bnxt_re_build_qp1_send_v2(qp, wr, &wqe, payload_sz); if (rc) @@ -2525,7 +2541,8 @@ int bnxt_re_post_recv(struct ib_qp *ib_qp, const struct ib_recv_wr *wr, wqe.wr_id = wr->wr_id; wqe.type = BNXT_QPLIB_SWQE_TYPE_RECV; - if (ib_qp->qp_type == IB_QPT_GSI) + if (ib_qp->qp_type == IB_QPT_GSI && + qp->qplib_qp.type != CMDQ_CREATE_QP_TYPE_GSI) rc = bnxt_re_build_qp1_shadow_qp_recv(qp, wr, &wqe, payload_sz); if (!rc) @@ -3120,19 +3137,33 @@ static void bnxt_re_process_res_shadow_qp_wc(struct bnxt_re_qp *qp, } } -static void bnxt_re_process_res_ud_wc(struct ib_wc *wc, +static void bnxt_re_process_res_ud_wc(struct bnxt_re_qp *qp, + struct ib_wc *wc, struct bnxt_qplib_cqe *cqe) { + u8 nw_type; + wc->opcode = IB_WC_RECV; wc->status = __rc_to_ib_wc_status(cqe->status); - if (cqe->flags & CQ_RES_RC_FLAGS_IMM) + if (cqe->flags & CQ_RES_UD_FLAGS_IMM) wc->wc_flags |= IB_WC_WITH_IMM; - if (cqe->flags & CQ_RES_RC_FLAGS_INV) - wc->wc_flags |= IB_WC_WITH_INVALIDATE; - if ((cqe->flags & (CQ_RES_RC_FLAGS_RDMA | CQ_RES_RC_FLAGS_IMM)) == - (CQ_RES_RC_FLAGS_RDMA | CQ_RES_RC_FLAGS_IMM)) - wc->opcode = IB_WC_RECV_RDMA_WITH_IMM; + /* report only on GSI QP for Thor */ + if (qp->qplib_qp.type == CMDQ_CREATE_QP_TYPE_GSI) { + wc->wc_flags |= IB_WC_GRH; + memcpy(wc->smac, cqe->smac, ETH_ALEN); + wc->wc_flags |= IB_WC_WITH_SMAC; + if (cqe->flags & CQ_RES_UD_FLAGS_META_FORMAT_VLAN) { + wc->vlan_id = (cqe->cfa_meta & 0xFFF); + if (wc->vlan_id < 0x1000) + wc->wc_flags |= IB_WC_WITH_VLAN; + } + nw_type = (cqe->flags & CQ_RES_UD_FLAGS_ROCE_IP_VER_MASK) >> + CQ_RES_UD_FLAGS_ROCE_IP_VER_SFT; + wc->network_hdr_type = bnxt_re_to_ib_nw_type(nw_type); + wc->wc_flags |= IB_WC_WITH_NETWORK_HDR_TYPE; + } + } static int send_phantom_wqe(struct bnxt_re_qp *qp) @@ -3224,7 +3255,7 @@ int bnxt_re_poll_cq(struct ib_cq *ib_cq, int num_entries, struct ib_wc *wc) switch (cqe->opcode) { case CQ_BASE_CQE_TYPE_REQ: - if (qp->qplib_qp.id == + if (qp->rdev->qp1_sqp && qp->qplib_qp.id == qp->rdev->qp1_sqp->qplib_qp.id) { /* Handle this completion with * the stored completion @@ -3259,7 +3290,7 @@ int bnxt_re_poll_cq(struct ib_cq *ib_cq, int num_entries, struct ib_wc *wc) bnxt_re_process_res_rc_wc(wc, cqe); break; case CQ_BASE_CQE_TYPE_RES_UD: - if (qp->qplib_qp.id == + if (qp->rdev->qp1_sqp && qp->qplib_qp.id == qp->rdev->qp1_sqp->qplib_qp.id) { /* Handle this completion with * the stored completion @@ -3272,7 +3303,7 @@ int bnxt_re_poll_cq(struct ib_cq *ib_cq, int num_entries, struct ib_wc *wc) break; } } - bnxt_re_process_res_ud_wc(wc, cqe); + bnxt_re_process_res_ud_wc(qp, wc, cqe); break; default: dev_err(rdev_to_dev(cq->rdev), @@ -3299,10 +3330,10 @@ int bnxt_re_req_notify_cq(struct ib_cq *ib_cq, spin_lock_irqsave(&cq->cq_lock, flags); /* Trigger on the very next completion */ if (ib_cqn_flags & IB_CQ_NEXT_COMP) - type = DBR_DBR_TYPE_CQ_ARMALL; + type = DBC_DBC_TYPE_CQ_ARMALL; /* Trigger on the next solicited completion */ else if (ib_cqn_flags & IB_CQ_SOLICITED) - type = DBR_DBR_TYPE_CQ_ARMSE; + type = DBC_DBC_TYPE_CQ_ARMSE; /* Poll to see if there are missed events */ if ((ib_cqn_flags & IB_CQ_REPORT_MISSED_EVENTS) && @@ -3661,9 +3692,10 @@ struct ib_ucontext *bnxt_re_alloc_ucontext(struct ib_device *ibdev, struct ib_udata *udata) { struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev); + struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr; struct bnxt_re_uctx_resp resp; struct bnxt_re_ucontext *uctx; - struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr; + u32 chip_met_rev_num = 0; int rc; dev_dbg(rdev_to_dev(rdev), "ABI version requested %d", @@ -3688,14 +3720,24 @@ struct ib_ucontext *bnxt_re_alloc_ucontext(struct ib_device *ibdev, } spin_lock_init(&uctx->sh_lock); - resp.dev_id = rdev->en_dev->pdev->devfn; /*Temp, Use idr_alloc instead*/ + resp.comp_mask |= BNXT_RE_UCNTX_CMASK_HAVE_CCTX; + chip_met_rev_num = rdev->chip_ctx.chip_num; + chip_met_rev_num |= ((u32)rdev->chip_ctx.chip_rev & 0xFF) << + BNXT_RE_CHIP_ID0_CHIP_REV_SFT; + chip_met_rev_num |= ((u32)rdev->chip_ctx.chip_metal & 0xFF) << + BNXT_RE_CHIP_ID0_CHIP_MET_SFT; + resp.chip_id0 = chip_met_rev_num; + /* Future extension of chip info */ + resp.chip_id1 = 0; + /*Temp, Use idr_alloc instead */ + resp.dev_id = rdev->en_dev->pdev->devfn; resp.max_qp = rdev->qplib_ctx.qpc_count; resp.pg_size = PAGE_SIZE; resp.cqe_sz = sizeof(struct cq_base); resp.max_cqd = dev_attr->max_cq_wqes; resp.rsvd = 0; - rc = ib_copy_to_udata(udata, &resp, sizeof(resp)); + rc = ib_copy_to_udata(udata, &resp, min(udata->outlen, sizeof(resp))); if (rc) { dev_err(rdev_to_dev(rdev), "Failed to copy user context"); rc = -EFAULT; diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 16eecfa5882c..0d40a930c192 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -80,6 +80,29 @@ static DEFINE_MUTEX(bnxt_re_dev_lock); static struct workqueue_struct *bnxt_re_wq; static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev); +static void bnxt_re_destroy_chip_ctx(struct bnxt_re_dev *rdev) +{ + rdev->rcfw.res = NULL; + rdev->qplib_res.cctx = NULL; +} + +static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev) +{ + struct bnxt_en_dev *en_dev; + struct bnxt *bp; + + en_dev = rdev->en_dev; + bp = netdev_priv(en_dev->net); + + rdev->chip_ctx.chip_num = bp->chip_num; + /* rest members to follow eventually */ + + rdev->qplib_res.cctx = &rdev->chip_ctx; + rdev->rcfw.res = &rdev->qplib_res; + + return 0; +} + /* SR-IOV helper functions */ static void bnxt_re_get_sriov_func_type(struct bnxt_re_dev *rdev) @@ -278,6 +301,7 @@ static int bnxt_re_register_netdev(struct bnxt_re_dev *rdev) rc = en_dev->en_ops->bnxt_register_device(en_dev, BNXT_ROCE_ULP, &bnxt_re_ulp_ops, rdev); + rdev->qplib_res.pdev = rdev->en_dev->pdev; return rc; } @@ -345,7 +369,8 @@ static void bnxt_re_fill_fw_msg(struct bnxt_fw_msg *fw_msg, void *msg, fw_msg->timeout = timeout; } -static int bnxt_re_net_ring_free(struct bnxt_re_dev *rdev, u16 fw_ring_id) +static int bnxt_re_net_ring_free(struct bnxt_re_dev *rdev, + u16 fw_ring_id, int type) { struct bnxt_en_dev *en_dev = rdev->en_dev; struct hwrm_ring_free_input req = {0}; @@ -359,7 +384,7 @@ static int bnxt_re_net_ring_free(struct bnxt_re_dev *rdev, u16 fw_ring_id) memset(&fw_msg, 0, sizeof(fw_msg)); bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_RING_FREE, -1, -1); - req.ring_type = RING_ALLOC_REQ_RING_TYPE_L2_CMPL; + req.ring_type = type; req.ring_id = cpu_to_le16(fw_ring_id); bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); @@ -396,7 +421,7 @@ static int bnxt_re_net_ring_alloc(struct bnxt_re_dev *rdev, dma_addr_t *dma_arr, /* Association of ring index with doorbell index and MSIX number */ req.logical_id = cpu_to_le16(map_index); req.length = cpu_to_le32(ring_mask + 1); - req.ring_type = RING_ALLOC_REQ_RING_TYPE_L2_CMPL; + req.ring_type = type; req.int_mode = RING_ALLOC_REQ_INT_MODE_MSIX; bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); @@ -688,7 +713,7 @@ static struct bnxt_re_dev *bnxt_re_dev_add(struct net_device *netdev, struct bnxt_re_dev *rdev; /* Allocate bnxt_re_dev instance here */ - rdev = (struct bnxt_re_dev *)ib_alloc_device(sizeof(*rdev)); + rdev = ib_alloc_device(bnxt_re_dev, ibdev); if (!rdev) { dev_err(NULL, "%s: bnxt_re_dev allocation failure!", ROCE_DRV_MODULE_NAME); @@ -860,6 +885,12 @@ static int bnxt_re_cqn_handler(struct bnxt_qplib_nq *nq, return 0; } +static u32 bnxt_re_get_nqdb_offset(struct bnxt_re_dev *rdev, u16 indx) +{ + return bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx) ? + 0x10000 : rdev->msix_entries[indx].db_offset; +} + static void bnxt_re_cleanup_res(struct bnxt_re_dev *rdev) { int i; @@ -873,18 +904,18 @@ static void bnxt_re_cleanup_res(struct bnxt_re_dev *rdev) static int bnxt_re_init_res(struct bnxt_re_dev *rdev) { - int rc = 0, i; int num_vec_enabled = 0; + int rc = 0, i; + u32 db_offt; bnxt_qplib_init_res(&rdev->qplib_res); for (i = 1; i < rdev->num_msix ; i++) { + db_offt = bnxt_re_get_nqdb_offset(rdev, i); rc = bnxt_qplib_enable_nq(rdev->en_dev->pdev, &rdev->nq[i - 1], i - 1, rdev->msix_entries[i].vector, - rdev->msix_entries[i].db_offset, - &bnxt_re_cqn_handler, + db_offt, &bnxt_re_cqn_handler, &bnxt_re_srqn_handler); - if (rc) { dev_err(rdev_to_dev(rdev), "Failed to enable NQ with rc = 0x%x", rc); @@ -896,16 +927,18 @@ static int bnxt_re_init_res(struct bnxt_re_dev *rdev) fail: for (i = num_vec_enabled; i >= 0; i--) bnxt_qplib_disable_nq(&rdev->nq[i]); - return rc; } static void bnxt_re_free_nq_res(struct bnxt_re_dev *rdev) { + u8 type; int i; for (i = 0; i < rdev->num_msix - 1; i++) { - bnxt_re_net_ring_free(rdev, rdev->nq[i].ring_id); + type = bnxt_qplib_get_ring_type(&rdev->chip_ctx); + bnxt_re_net_ring_free(rdev, rdev->nq[i].ring_id, type); + rdev->nq[i].res = NULL; bnxt_qplib_free_nq(&rdev->nq[i]); } } @@ -927,8 +960,11 @@ static void bnxt_re_free_res(struct bnxt_re_dev *rdev) static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev) { - int rc = 0, i; int num_vec_created = 0; + dma_addr_t *pg_map; + int rc = 0, i; + int pages; + u8 type; /* Configure and allocate resources for qplib */ rdev->qplib_res.rcfw = &rdev->rcfw; @@ -949,6 +985,7 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev) goto dealloc_res; for (i = 0; i < rdev->num_msix - 1; i++) { + rdev->nq[i].res = &rdev->qplib_res; rdev->nq[i].hwq.max_elements = BNXT_RE_MAX_CQ_COUNT + BNXT_RE_MAX_SRQC_COUNT + 2; rc = bnxt_qplib_alloc_nq(rdev->en_dev->pdev, &rdev->nq[i]); @@ -957,13 +994,13 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev) i, rc); goto free_nq; } - rc = bnxt_re_net_ring_alloc - (rdev, rdev->nq[i].hwq.pbl[PBL_LVL_0].pg_map_arr, - rdev->nq[i].hwq.pbl[rdev->nq[i].hwq.level].pg_count, - HWRM_RING_ALLOC_CMPL, - BNXT_QPLIB_NQE_MAX_CNT - 1, - rdev->msix_entries[i + 1].ring_idx, - &rdev->nq[i].ring_id); + type = bnxt_qplib_get_ring_type(&rdev->chip_ctx); + pg_map = rdev->nq[i].hwq.pbl[PBL_LVL_0].pg_map_arr; + pages = rdev->nq[i].hwq.pbl[rdev->nq[i].hwq.level].pg_count; + rc = bnxt_re_net_ring_alloc(rdev, pg_map, pages, type, + BNXT_QPLIB_NQE_MAX_CNT - 1, + rdev->msix_entries[i + 1].ring_idx, + &rdev->nq[i].ring_id); if (rc) { dev_err(rdev_to_dev(rdev), "Failed to allocate NQ fw id with rc = 0x%x", @@ -976,7 +1013,8 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev) return 0; free_nq: for (i = num_vec_created; i >= 0; i--) { - bnxt_re_net_ring_free(rdev, rdev->nq[i].ring_id); + type = bnxt_qplib_get_ring_type(&rdev->chip_ctx); + bnxt_re_net_ring_free(rdev, rdev->nq[i].ring_id, type); bnxt_qplib_free_nq(&rdev->nq[i]); } bnxt_qplib_dealloc_dpi(&rdev->qplib_res, @@ -1230,6 +1268,7 @@ static void bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev) static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev) { + u8 type; int rc; if (test_and_clear_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags)) { @@ -1253,7 +1292,8 @@ static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev) bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id); bnxt_qplib_free_ctx(rdev->en_dev->pdev, &rdev->qplib_ctx); bnxt_qplib_disable_rcfw_channel(&rdev->rcfw); - bnxt_re_net_ring_free(rdev, rdev->rcfw.creq_ring_id); + type = bnxt_qplib_get_ring_type(&rdev->chip_ctx); + bnxt_re_net_ring_free(rdev, rdev->rcfw.creq_ring_id, type); bnxt_qplib_free_rcfw_channel(&rdev->rcfw); } if (test_and_clear_bit(BNXT_RE_FLAG_GOT_MSIX, &rdev->flags)) { @@ -1262,6 +1302,8 @@ static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev) dev_warn(rdev_to_dev(rdev), "Failed to free MSI-X vectors: %#x", rc); } + + bnxt_re_destroy_chip_ctx(rdev); if (test_and_clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags)) { rc = bnxt_re_unregister_netdev(rdev); if (rc) @@ -1282,9 +1324,12 @@ static void bnxt_re_worker(struct work_struct *work) static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev) { - int rc; - + dma_addr_t *pg_map; + u32 db_offt, ridx; + int pages, vid; bool locked; + u8 type; + int rc; /* Acquire rtnl lock through out this function */ rtnl_lock(); @@ -1299,6 +1344,12 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev) } set_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags); + rc = bnxt_re_setup_chip_ctx(rdev); + if (rc) { + dev_err(rdev_to_dev(rdev), "Failed to get chip context\n"); + return -EINVAL; + } + /* Check whether VF or PF */ bnxt_re_get_sriov_func_type(rdev); @@ -1322,21 +1373,22 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev) pr_err("Failed to allocate RCFW Channel: %#x\n", rc); goto fail; } - rc = bnxt_re_net_ring_alloc - (rdev, rdev->rcfw.creq.pbl[PBL_LVL_0].pg_map_arr, - rdev->rcfw.creq.pbl[rdev->rcfw.creq.level].pg_count, - HWRM_RING_ALLOC_CMPL, BNXT_QPLIB_CREQE_MAX_CNT - 1, - rdev->msix_entries[BNXT_RE_AEQ_IDX].ring_idx, - &rdev->rcfw.creq_ring_id); + type = bnxt_qplib_get_ring_type(&rdev->chip_ctx); + pg_map = rdev->rcfw.creq.pbl[PBL_LVL_0].pg_map_arr; + pages = rdev->rcfw.creq.pbl[rdev->rcfw.creq.level].pg_count; + ridx = rdev->msix_entries[BNXT_RE_AEQ_IDX].ring_idx; + rc = bnxt_re_net_ring_alloc(rdev, pg_map, pages, type, + BNXT_QPLIB_CREQE_MAX_CNT - 1, + ridx, &rdev->rcfw.creq_ring_id); if (rc) { pr_err("Failed to allocate CREQ: %#x\n", rc); goto free_rcfw; } - rc = bnxt_qplib_enable_rcfw_channel - (rdev->en_dev->pdev, &rdev->rcfw, - rdev->msix_entries[BNXT_RE_AEQ_IDX].vector, - rdev->msix_entries[BNXT_RE_AEQ_IDX].db_offset, - rdev->is_virtfn, &bnxt_re_aeq_handler); + db_offt = bnxt_re_get_nqdb_offset(rdev, BNXT_RE_AEQ_IDX); + vid = rdev->msix_entries[BNXT_RE_AEQ_IDX].vector; + rc = bnxt_qplib_enable_rcfw_channel(rdev->en_dev->pdev, &rdev->rcfw, + vid, db_offt, rdev->is_virtfn, + &bnxt_re_aeq_handler); if (rc) { pr_err("Failed to enable RCFW channel: %#x\n", rc); goto free_ring; @@ -1349,7 +1401,8 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev) if (!rdev->is_virtfn) bnxt_re_set_resource_limits(rdev); - rc = bnxt_qplib_alloc_ctx(rdev->en_dev->pdev, &rdev->qplib_ctx, 0); + rc = bnxt_qplib_alloc_ctx(rdev->en_dev->pdev, &rdev->qplib_ctx, 0, + bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx)); if (rc) { pr_err("Failed to allocate QPLIB context: %#x\n", rc); goto disable_rcfw; @@ -1420,7 +1473,8 @@ free_ctx: disable_rcfw: bnxt_qplib_disable_rcfw_channel(&rdev->rcfw); free_ring: - bnxt_re_net_ring_free(rdev, rdev->rcfw.creq_ring_id); + type = bnxt_qplib_get_ring_type(&rdev->chip_ctx); + bnxt_re_net_ring_free(rdev, rdev->rcfw.creq_ring_id, type); free_rcfw: bnxt_qplib_free_rcfw_channel(&rdev->rcfw); fail: diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index b98b054148cd..77eb3d556006 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -44,6 +44,7 @@ #include <linux/slab.h> #include <linux/pci.h> #include <linux/prefetch.h> +#include <linux/if_ether.h> #include "roce_hsi.h" @@ -244,6 +245,7 @@ static void bnxt_qplib_service_nq(unsigned long data) u16 type; int budget = nq->budget; uintptr_t q_handle; + bool gen_p5 = bnxt_qplib_is_chip_gen_p5(nq->res->cctx); /* Service the NQ until empty */ raw_cons = hwq->cons; @@ -290,7 +292,7 @@ static void bnxt_qplib_service_nq(unsigned long data) q_handle |= (u64)le32_to_cpu(nqsrqe->srq_handle_high) << 32; bnxt_qplib_arm_srq((struct bnxt_qplib_srq *)q_handle, - DBR_DBR_TYPE_SRQ_ARMENA); + DBC_DBC_TYPE_SRQ_ARMENA); if (!nq->srqn_handler(nq, (struct bnxt_qplib_srq *)q_handle, nqsrqe->event)) @@ -312,7 +314,9 @@ static void bnxt_qplib_service_nq(unsigned long data) } if (hwq->cons != raw_cons) { hwq->cons = raw_cons; - NQ_DB_REARM(nq->bar_reg_iomem, hwq->cons, hwq->max_elements); + bnxt_qplib_ring_nq_db_rearm(nq->bar_reg_iomem, hwq->cons, + hwq->max_elements, nq->ring_id, + gen_p5); } } @@ -336,9 +340,11 @@ static irqreturn_t bnxt_qplib_nq_irq(int irq, void *dev_instance) void bnxt_qplib_nq_stop_irq(struct bnxt_qplib_nq *nq, bool kill) { + bool gen_p5 = bnxt_qplib_is_chip_gen_p5(nq->res->cctx); tasklet_disable(&nq->worker); /* Mask h/w interrupt */ - NQ_DB(nq->bar_reg_iomem, nq->hwq.cons, nq->hwq.max_elements); + bnxt_qplib_ring_nq_db(nq->bar_reg_iomem, nq->hwq.cons, + nq->hwq.max_elements, nq->ring_id, gen_p5); /* Sync with last running IRQ handler */ synchronize_irq(nq->vector); if (kill) @@ -373,6 +379,7 @@ void bnxt_qplib_disable_nq(struct bnxt_qplib_nq *nq) int bnxt_qplib_nq_start_irq(struct bnxt_qplib_nq *nq, int nq_indx, int msix_vector, bool need_init) { + bool gen_p5 = bnxt_qplib_is_chip_gen_p5(nq->res->cctx); int rc; if (nq->requested) @@ -399,7 +406,8 @@ int bnxt_qplib_nq_start_irq(struct bnxt_qplib_nq *nq, int nq_indx, nq->vector, nq_indx); } nq->requested = true; - NQ_DB_REARM(nq->bar_reg_iomem, nq->hwq.cons, nq->hwq.max_elements); + bnxt_qplib_ring_nq_db_rearm(nq->bar_reg_iomem, nq->hwq.cons, + nq->hwq.max_elements, nq->ring_id, gen_p5); return rc; } @@ -433,7 +441,8 @@ int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq, rc = -ENOMEM; goto fail; } - nq->bar_reg_iomem = ioremap_nocache(nq_base + nq->bar_reg_off, 4); + /* Unconditionally map 8 bytes to support 57500 series */ + nq->bar_reg_iomem = ioremap_nocache(nq_base + nq->bar_reg_off, 8); if (!nq->bar_reg_iomem) { rc = -ENOMEM; goto fail; @@ -462,15 +471,17 @@ void bnxt_qplib_free_nq(struct bnxt_qplib_nq *nq) int bnxt_qplib_alloc_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq) { + u8 hwq_type; + nq->pdev = pdev; if (!nq->hwq.max_elements || nq->hwq.max_elements > BNXT_QPLIB_NQE_MAX_CNT) nq->hwq.max_elements = BNXT_QPLIB_NQE_MAX_CNT; - + hwq_type = bnxt_qplib_get_hwq_type(nq->res); if (bnxt_qplib_alloc_init_hwq(nq->pdev, &nq->hwq, NULL, 0, &nq->hwq.max_elements, BNXT_QPLIB_MAX_NQE_ENTRY_SIZE, 0, - PAGE_SIZE, HWQ_TYPE_L2_CMPL)) + PAGE_SIZE, hwq_type)) return -ENOMEM; nq->budget = 8; @@ -481,21 +492,19 @@ int bnxt_qplib_alloc_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq) static void bnxt_qplib_arm_srq(struct bnxt_qplib_srq *srq, u32 arm_type) { struct bnxt_qplib_hwq *srq_hwq = &srq->hwq; - struct dbr_dbr db_msg = { 0 }; void __iomem *db; - u32 sw_prod = 0; + u32 sw_prod; + u64 val = 0; /* Ring DB */ - sw_prod = (arm_type == DBR_DBR_TYPE_SRQ_ARM) ? srq->threshold : - HWQ_CMP(srq_hwq->prod, srq_hwq); - db_msg.index = cpu_to_le32((sw_prod << DBR_DBR_INDEX_SFT) & - DBR_DBR_INDEX_MASK); - db_msg.type_xid = cpu_to_le32(((srq->id << DBR_DBR_XID_SFT) & - DBR_DBR_XID_MASK) | arm_type); - db = (arm_type == DBR_DBR_TYPE_SRQ_ARMENA) ? - srq->dbr_base : srq->dpi->dbr; - wmb(); /* barrier before db ring */ - __iowrite64_copy(db, &db_msg, sizeof(db_msg) / sizeof(u64)); + sw_prod = (arm_type == DBC_DBC_TYPE_SRQ_ARM) ? + srq->threshold : HWQ_CMP(srq_hwq->prod, srq_hwq); + db = (arm_type == DBC_DBC_TYPE_SRQ_ARMENA) ? srq->dbr_base : + srq->dpi->dbr; + val = ((srq->id << DBC_DBC_XID_SFT) & DBC_DBC_XID_MASK) | arm_type; + val <<= 32; + val |= (sw_prod << DBC_DBC_INDEX_SFT) & DBC_DBC_INDEX_MASK; + writeq(val, db); } int bnxt_qplib_destroy_srq(struct bnxt_qplib_res *res, @@ -590,7 +599,7 @@ int bnxt_qplib_create_srq(struct bnxt_qplib_res *res, srq->id = le32_to_cpu(resp.xid); srq->dbr_base = res->dpi_tbl.dbr_bar_reg_iomem; if (srq->threshold) - bnxt_qplib_arm_srq(srq, DBR_DBR_TYPE_SRQ_ARMENA); + bnxt_qplib_arm_srq(srq, DBC_DBC_TYPE_SRQ_ARMENA); srq->arm_req = false; return 0; @@ -614,7 +623,7 @@ int bnxt_qplib_modify_srq(struct bnxt_qplib_res *res, srq_hwq->max_elements - sw_cons + sw_prod; if (count > srq->threshold) { srq->arm_req = false; - bnxt_qplib_arm_srq(srq, DBR_DBR_TYPE_SRQ_ARM); + bnxt_qplib_arm_srq(srq, DBC_DBC_TYPE_SRQ_ARM); } else { /* Deferred arming */ srq->arm_req = true; @@ -702,10 +711,10 @@ int bnxt_qplib_post_srq_recv(struct bnxt_qplib_srq *srq, srq_hwq->max_elements - sw_cons + sw_prod; spin_unlock(&srq_hwq->lock); /* Ring DB */ - bnxt_qplib_arm_srq(srq, DBR_DBR_TYPE_SRQ); + bnxt_qplib_arm_srq(srq, DBC_DBC_TYPE_SRQ); if (srq->arm_req == true && count > srq->threshold) { srq->arm_req = false; - bnxt_qplib_arm_srq(srq, DBR_DBR_TYPE_SRQ_ARM); + bnxt_qplib_arm_srq(srq, DBC_DBC_TYPE_SRQ_ARM); } done: return rc; @@ -861,10 +870,11 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) unsigned long int psn_search, poff = 0; struct bnxt_qplib_q *sq = &qp->sq; struct bnxt_qplib_q *rq = &qp->rq; + int i, rc, req_size, psn_sz = 0; struct bnxt_qplib_hwq *xrrq; - int i, rc, req_size, psn_sz; u16 cmd_flags = 0, max_ssge; u32 sw_prod, qp_flags = 0; + u16 max_rsge; RCFW_CMD_PREP(req, CREATE_QP, cmd_flags); @@ -874,8 +884,11 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) req.qp_handle = cpu_to_le64(qp->qp_handle); /* SQ */ - psn_sz = (qp->type == CMDQ_CREATE_QP_TYPE_RC) ? - sizeof(struct sq_psn_search) : 0; + if (qp->type == CMDQ_CREATE_QP_TYPE_RC) { + psn_sz = bnxt_qplib_is_chip_gen_p5(res->cctx) ? + sizeof(struct sq_psn_search_ext) : + sizeof(struct sq_psn_search); + } sq->hwq.max_elements = sq->max_wqe; rc = bnxt_qplib_alloc_init_hwq(res->pdev, &sq->hwq, sq->sglist, sq->nmap, &sq->hwq.max_elements, @@ -905,10 +918,16 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) poff = (psn_search & ~PAGE_MASK) / BNXT_QPLIB_MAX_PSNE_ENTRY_SIZE; } - for (i = 0; i < sq->hwq.max_elements; i++) + for (i = 0; i < sq->hwq.max_elements; i++) { sq->swq[i].psn_search = &psn_search_ptr[get_psne_pg(i + poff)] [get_psne_idx(i + poff)]; + /*psns_ext will be used only for P5 chips. */ + sq->swq[i].psn_ext = + (struct sq_psn_search_ext *) + &psn_search_ptr[get_psne_pg(i + poff)] + [get_psne_idx(i + poff)]; + } } pbl = &sq->hwq.pbl[PBL_LVL_0]; req.sq_pbl = cpu_to_le64(pbl->pg_map_arr[0]); @@ -1007,8 +1026,9 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) req.sq_fwo_sq_sge = cpu_to_le16( ((max_ssge & CMDQ_CREATE_QP_SQ_SGE_MASK) << CMDQ_CREATE_QP_SQ_SGE_SFT) | 0); + max_rsge = bnxt_qplib_is_chip_gen_p5(res->cctx) ? 6 : rq->max_sge; req.rq_fwo_rq_sge = cpu_to_le16( - ((rq->max_sge & CMDQ_CREATE_QP_RQ_SGE_MASK) + ((max_rsge & CMDQ_CREATE_QP_RQ_SGE_MASK) << CMDQ_CREATE_QP_RQ_SGE_SFT) | 0); /* ORRQ and IRRQ */ if (psn_sz) { @@ -1053,6 +1073,7 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) qp->id = le32_to_cpu(resp.xid); qp->cur_qp_state = CMDQ_MODIFY_QP_NEW_STATE_RESET; + qp->cctx = res->cctx; INIT_LIST_HEAD(&qp->sq_flush); INIT_LIST_HEAD(&qp->rq_flush); rcfw->qp_tbl[qp->id].qp_id = qp->id; @@ -1494,19 +1515,16 @@ void *bnxt_qplib_get_qp1_rq_buf(struct bnxt_qplib_qp *qp, void bnxt_qplib_post_send_db(struct bnxt_qplib_qp *qp) { struct bnxt_qplib_q *sq = &qp->sq; - struct dbr_dbr db_msg = { 0 }; u32 sw_prod; + u64 val = 0; + val = (((qp->id << DBC_DBC_XID_SFT) & DBC_DBC_XID_MASK) | + DBC_DBC_TYPE_SQ); + val <<= 32; sw_prod = HWQ_CMP(sq->hwq.prod, &sq->hwq); - - db_msg.index = cpu_to_le32((sw_prod << DBR_DBR_INDEX_SFT) & - DBR_DBR_INDEX_MASK); - db_msg.type_xid = - cpu_to_le32(((qp->id << DBR_DBR_XID_SFT) & DBR_DBR_XID_MASK) | - DBR_DBR_TYPE_SQ); + val |= (sw_prod << DBC_DBC_INDEX_SFT) & DBC_DBC_INDEX_MASK; /* Flush all the WQE writes to HW */ - wmb(); - __iowrite64_copy(qp->dpi->dbr, &db_msg, sizeof(db_msg) / sizeof(u64)); + writeq(val, qp->dpi->dbr); } int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, @@ -1617,7 +1635,8 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, ((offsetof(typeof(*sqe), data) + 15) >> 4); sqe->inv_key_or_imm_data = cpu_to_le32( wqe->send.inv_key); - if (qp->type == CMDQ_CREATE_QP_TYPE_UD) { + if (qp->type == CMDQ_CREATE_QP_TYPE_UD || + qp->type == CMDQ_CREATE_QP_TYPE_GSI) { sqe->q_key = cpu_to_le32(wqe->send.q_key); sqe->dst_qp = cpu_to_le32( wqe->send.dst_qp & SQ_SEND_DST_QP_MASK); @@ -1741,14 +1760,26 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, } swq->next_psn = sq->psn & BTH_PSN_MASK; if (swq->psn_search) { - swq->psn_search->opcode_start_psn = cpu_to_le32( - ((swq->start_psn << SQ_PSN_SEARCH_START_PSN_SFT) & - SQ_PSN_SEARCH_START_PSN_MASK) | - ((wqe->type << SQ_PSN_SEARCH_OPCODE_SFT) & - SQ_PSN_SEARCH_OPCODE_MASK)); - swq->psn_search->flags_next_psn = cpu_to_le32( - ((swq->next_psn << SQ_PSN_SEARCH_NEXT_PSN_SFT) & - SQ_PSN_SEARCH_NEXT_PSN_MASK)); + u32 opcd_spsn; + u32 flg_npsn; + + opcd_spsn = ((swq->start_psn << SQ_PSN_SEARCH_START_PSN_SFT) & + SQ_PSN_SEARCH_START_PSN_MASK); + opcd_spsn |= ((wqe->type << SQ_PSN_SEARCH_OPCODE_SFT) & + SQ_PSN_SEARCH_OPCODE_MASK); + flg_npsn = ((swq->next_psn << SQ_PSN_SEARCH_NEXT_PSN_SFT) & + SQ_PSN_SEARCH_NEXT_PSN_MASK); + if (bnxt_qplib_is_chip_gen_p5(qp->cctx)) { + swq->psn_ext->opcode_start_psn = + cpu_to_le32(opcd_spsn); + swq->psn_ext->flags_next_psn = + cpu_to_le32(flg_npsn); + } else { + swq->psn_search->opcode_start_psn = + cpu_to_le32(opcd_spsn); + swq->psn_search->flags_next_psn = + cpu_to_le32(flg_npsn); + } } queue_err: if (sch_handler) { @@ -1785,19 +1816,16 @@ done: void bnxt_qplib_post_recv_db(struct bnxt_qplib_qp *qp) { struct bnxt_qplib_q *rq = &qp->rq; - struct dbr_dbr db_msg = { 0 }; u32 sw_prod; + u64 val = 0; + val = (((qp->id << DBC_DBC_XID_SFT) & DBC_DBC_XID_MASK) | + DBC_DBC_TYPE_RQ); + val <<= 32; sw_prod = HWQ_CMP(rq->hwq.prod, &rq->hwq); - db_msg.index = cpu_to_le32((sw_prod << DBR_DBR_INDEX_SFT) & - DBR_DBR_INDEX_MASK); - db_msg.type_xid = - cpu_to_le32(((qp->id << DBR_DBR_XID_SFT) & DBR_DBR_XID_MASK) | - DBR_DBR_TYPE_RQ); - + val |= (sw_prod << DBC_DBC_INDEX_SFT) & DBC_DBC_INDEX_MASK; /* Flush the writes to HW Rx WQE before the ringing Rx DB */ - wmb(); - __iowrite64_copy(qp->dpi->dbr, &db_msg, sizeof(db_msg) / sizeof(u64)); + writeq(val, qp->dpi->dbr); } int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp, @@ -1881,32 +1909,28 @@ done: /* Spinlock must be held */ static void bnxt_qplib_arm_cq_enable(struct bnxt_qplib_cq *cq) { - struct dbr_dbr db_msg = { 0 }; + u64 val = 0; - db_msg.type_xid = - cpu_to_le32(((cq->id << DBR_DBR_XID_SFT) & DBR_DBR_XID_MASK) | - DBR_DBR_TYPE_CQ_ARMENA); + val = ((cq->id << DBC_DBC_XID_SFT) & DBC_DBC_XID_MASK) | + DBC_DBC_TYPE_CQ_ARMENA; + val <<= 32; /* Flush memory writes before enabling the CQ */ - wmb(); - __iowrite64_copy(cq->dbr_base, &db_msg, sizeof(db_msg) / sizeof(u64)); + writeq(val, cq->dbr_base); } static void bnxt_qplib_arm_cq(struct bnxt_qplib_cq *cq, u32 arm_type) { struct bnxt_qplib_hwq *cq_hwq = &cq->hwq; - struct dbr_dbr db_msg = { 0 }; u32 sw_cons; + u64 val = 0; /* Ring DB */ + val = ((cq->id << DBC_DBC_XID_SFT) & DBC_DBC_XID_MASK) | arm_type; + val <<= 32; sw_cons = HWQ_CMP(cq_hwq->cons, cq_hwq); - db_msg.index = cpu_to_le32((sw_cons << DBR_DBR_INDEX_SFT) & - DBR_DBR_INDEX_MASK); - db_msg.type_xid = - cpu_to_le32(((cq->id << DBR_DBR_XID_SFT) & DBR_DBR_XID_MASK) | - arm_type); + val |= (sw_cons << DBC_DBC_INDEX_SFT) & DBC_DBC_INDEX_MASK; /* flush memory writes before arming the CQ */ - wmb(); - __iowrite64_copy(cq->dpi->dbr, &db_msg, sizeof(db_msg) / sizeof(u64)); + writeq(val, cq->dpi->dbr); } int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq) @@ -2053,6 +2077,7 @@ static int __flush_rq(struct bnxt_qplib_q *rq, struct bnxt_qplib_qp *qp, opcode = CQ_BASE_CQE_TYPE_RES_RC; break; case CMDQ_CREATE_QP_TYPE_UD: + case CMDQ_CREATE_QP_TYPE_GSI: opcode = CQ_BASE_CQE_TYPE_RES_UD; break; } @@ -2125,7 +2150,7 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq, sq->send_phantom = true; /* TODO: Only ARM if the previous SQE is ARMALL */ - bnxt_qplib_arm_cq(cq, DBR_DBR_TYPE_CQ_ARMALL); + bnxt_qplib_arm_cq(cq, DBC_DBC_TYPE_CQ_ARMALL); rc = -EAGAIN; goto out; @@ -2410,12 +2435,14 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq, } cqe = *pcqe; cqe->opcode = hwcqe->cqe_type_toggle & CQ_BASE_CQE_TYPE_MASK; - cqe->length = le32_to_cpu(hwcqe->length); + cqe->length = (u32)le16_to_cpu(hwcqe->length); + cqe->cfa_meta = le16_to_cpu(hwcqe->cfa_metadata); cqe->invrkey = le32_to_cpu(hwcqe->imm_data); cqe->flags = le16_to_cpu(hwcqe->flags); cqe->status = hwcqe->status; cqe->qp_handle = (u64)(unsigned long)qp; - memcpy(cqe->smac, hwcqe->src_mac, 6); + /*FIXME: Endianness fix needed for smace */ + memcpy(cqe->smac, hwcqe->src_mac, ETH_ALEN); wr_id_idx = le32_to_cpu(hwcqe->src_qp_high_srq_or_rq_wr_id) & CQ_RES_UD_SRQ_OR_RQ_WR_ID_MASK; cqe->src_qp = le16_to_cpu(hwcqe->src_qp_low) | @@ -2794,7 +2821,7 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe, } if (cq->hwq.cons != raw_cons) { cq->hwq.cons = raw_cons; - bnxt_qplib_arm_cq(cq, DBR_DBR_TYPE_CQ); + bnxt_qplib_arm_cq(cq, DBC_DBC_TYPE_CQ); } exit: return num_cqes - budget; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h index 72352ca80ace..3f618b5f1f06 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h @@ -106,6 +106,7 @@ struct bnxt_qplib_swq { u32 start_psn; u32 next_psn; struct sq_psn_search *psn_search; + struct sq_psn_search_ext *psn_ext; }; struct bnxt_qplib_swqe { @@ -254,6 +255,7 @@ struct bnxt_qplib_q { struct bnxt_qplib_qp { struct bnxt_qplib_pd *pd; struct bnxt_qplib_dpi *dpi; + struct bnxt_qplib_chip_ctx *cctx; u64 qp_handle; #define BNXT_QPLIB_QP_ID_INVALID 0xFFFFFFFF u32 id; @@ -347,6 +349,7 @@ struct bnxt_qplib_cqe { u8 type; u8 opcode; u32 length; + u16 cfa_meta; u64 wr_id; union { __be32 immdata; @@ -432,13 +435,47 @@ struct bnxt_qplib_cq { #define NQ_DB_CP_FLAGS (NQ_DB_KEY_CP | \ NQ_DB_IDX_VALID | \ NQ_DB_IRQ_DIS) -#define NQ_DB_REARM(db, raw_cons, cp_bit) \ - writel(NQ_DB_CP_FLAGS_REARM | ((raw_cons) & ((cp_bit) - 1)), db) -#define NQ_DB(db, raw_cons, cp_bit) \ - writel(NQ_DB_CP_FLAGS | ((raw_cons) & ((cp_bit) - 1)), db) + +static inline void bnxt_qplib_ring_nq_db64(void __iomem *db, u32 index, + u32 xid, bool arm) +{ + u64 val; + + val = xid & DBC_DBC_XID_MASK; + val |= DBC_DBC_PATH_ROCE; + val |= arm ? DBC_DBC_TYPE_NQ_ARM : DBC_DBC_TYPE_NQ; + val <<= 32; + val |= index & DBC_DBC_INDEX_MASK; + writeq(val, db); +} + +static inline void bnxt_qplib_ring_nq_db_rearm(void __iomem *db, u32 raw_cons, + u32 max_elements, u32 xid, + bool gen_p5) +{ + u32 index = raw_cons & (max_elements - 1); + + if (gen_p5) + bnxt_qplib_ring_nq_db64(db, index, xid, true); + else + writel(NQ_DB_CP_FLAGS_REARM | (index & DBC_DBC32_XID_MASK), db); +} + +static inline void bnxt_qplib_ring_nq_db(void __iomem *db, u32 raw_cons, + u32 max_elements, u32 xid, + bool gen_p5) +{ + u32 index = raw_cons & (max_elements - 1); + + if (gen_p5) + bnxt_qplib_ring_nq_db64(db, index, xid, false); + else + writel(NQ_DB_CP_FLAGS | (index & DBC_DBC32_XID_MASK), db); +} struct bnxt_qplib_nq { struct pci_dev *pdev; + struct bnxt_qplib_res *res; int vector; cpumask_t mask; @@ -448,7 +485,7 @@ struct bnxt_qplib_nq { struct bnxt_qplib_hwq hwq; u16 bar_reg; - u16 bar_reg_off; + u32 bar_reg_off; u16 ring_id; void __iomem *bar_reg_iomem; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index 65e17de220f6..c6461e957078 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -359,11 +359,12 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw, static void bnxt_qplib_service_creq(unsigned long data) { struct bnxt_qplib_rcfw *rcfw = (struct bnxt_qplib_rcfw *)data; + bool gen_p5 = bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx); struct bnxt_qplib_hwq *creq = &rcfw->creq; + u32 type, budget = CREQ_ENTRY_POLL_BUDGET; struct creq_base *creqe, **creq_ptr; u32 sw_cons, raw_cons; unsigned long flags; - u32 type, budget = CREQ_ENTRY_POLL_BUDGET; /* Service the CREQ until budget is over */ spin_lock_irqsave(&creq->lock, flags); @@ -407,8 +408,9 @@ static void bnxt_qplib_service_creq(unsigned long data) if (creq->cons != raw_cons) { creq->cons = raw_cons; - CREQ_DB_REARM(rcfw->creq_bar_reg_iomem, raw_cons, - creq->max_elements); + bnxt_qplib_ring_creq_db_rearm(rcfw->creq_bar_reg_iomem, + raw_cons, creq->max_elements, + rcfw->creq_ring_id, gen_p5); } spin_unlock_irqrestore(&creq->lock, flags); } @@ -480,11 +482,13 @@ int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw, req.log2_dbr_pg_size = cpu_to_le16(PAGE_SHIFT - RCFW_DBR_BASE_PAGE_SHIFT); /* - * VFs need not setup the HW context area, PF + * Gen P5 devices doesn't require this allocation + * as the L2 driver does the same for RoCE also. + * Also, VFs need not setup the HW context area, PF * shall setup this area for VF. Skipping the * HW programming */ - if (is_virtfn) + if (is_virtfn || bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx)) goto skip_ctx_setup; level = ctx->qpc_tbl.level; @@ -560,12 +564,15 @@ int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev, struct bnxt_qplib_ctx *ctx, int qp_tbl_sz) { + u8 hwq_type; + rcfw->pdev = pdev; rcfw->creq.max_elements = BNXT_QPLIB_CREQE_MAX_CNT; + hwq_type = bnxt_qplib_get_hwq_type(rcfw->res); if (bnxt_qplib_alloc_init_hwq(rcfw->pdev, &rcfw->creq, NULL, 0, &rcfw->creq.max_elements, - BNXT_QPLIB_CREQE_UNITS, 0, PAGE_SIZE, - HWQ_TYPE_L2_CMPL)) { + BNXT_QPLIB_CREQE_UNITS, + 0, PAGE_SIZE, hwq_type)) { dev_err(&rcfw->pdev->dev, "HW channel CREQ allocation failed\n"); goto fail; @@ -607,10 +614,13 @@ fail: void bnxt_qplib_rcfw_stop_irq(struct bnxt_qplib_rcfw *rcfw, bool kill) { + bool gen_p5 = bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx); + tasklet_disable(&rcfw->worker); /* Mask h/w interrupts */ - CREQ_DB(rcfw->creq_bar_reg_iomem, rcfw->creq.cons, - rcfw->creq.max_elements); + bnxt_qplib_ring_creq_db(rcfw->creq_bar_reg_iomem, rcfw->creq.cons, + rcfw->creq.max_elements, rcfw->creq_ring_id, + gen_p5); /* Sync with last running IRQ-handler */ synchronize_irq(rcfw->vector); if (kill) @@ -647,6 +657,7 @@ void bnxt_qplib_disable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw) int bnxt_qplib_rcfw_start_irq(struct bnxt_qplib_rcfw *rcfw, int msix_vector, bool need_init) { + bool gen_p5 = bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx); int rc; if (rcfw->requested) @@ -663,8 +674,9 @@ int bnxt_qplib_rcfw_start_irq(struct bnxt_qplib_rcfw *rcfw, int msix_vector, if (rc) return rc; rcfw->requested = true; - CREQ_DB_REARM(rcfw->creq_bar_reg_iomem, rcfw->creq.cons, - rcfw->creq.max_elements); + bnxt_qplib_ring_creq_db_rearm(rcfw->creq_bar_reg_iomem, + rcfw->creq.cons, rcfw->creq.max_elements, + rcfw->creq_ring_id, gen_p5); return 0; } @@ -717,8 +729,9 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev, dev_err(&rcfw->pdev->dev, "CREQ BAR region %d resc start is 0!\n", rcfw->creq_bar_reg); + /* Unconditionally map 8 bytes to support 57500 series */ rcfw->creq_bar_reg_iomem = ioremap_nocache(res_base + cp_bar_reg_off, - 4); + 8); if (!rcfw->creq_bar_reg_iomem) { dev_err(&rcfw->pdev->dev, "CREQ BAR region %d mapping failed\n", rcfw->creq_bar_reg); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h index be0ef0e8c53e..2138533bb642 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h @@ -157,10 +157,46 @@ static inline u32 get_creq_idx(u32 val) #define CREQ_DB_CP_FLAGS (CREQ_DB_KEY_CP | \ CREQ_DB_IDX_VALID | \ CREQ_DB_IRQ_DIS) -#define CREQ_DB_REARM(db, raw_cons, cp_bit) \ - writel(CREQ_DB_CP_FLAGS_REARM | ((raw_cons) & ((cp_bit) - 1)), db) -#define CREQ_DB(db, raw_cons, cp_bit) \ - writel(CREQ_DB_CP_FLAGS | ((raw_cons) & ((cp_bit) - 1)), db) + +static inline void bnxt_qplib_ring_creq_db64(void __iomem *db, u32 index, + u32 xid, bool arm) +{ + u64 val = 0; + + val = xid & DBC_DBC_XID_MASK; + val |= DBC_DBC_PATH_ROCE; + val |= arm ? DBC_DBC_TYPE_NQ_ARM : DBC_DBC_TYPE_NQ; + val <<= 32; + val |= index & DBC_DBC_INDEX_MASK; + + writeq(val, db); +} + +static inline void bnxt_qplib_ring_creq_db_rearm(void __iomem *db, u32 raw_cons, + u32 max_elements, u32 xid, + bool gen_p5) +{ + u32 index = raw_cons & (max_elements - 1); + + if (gen_p5) + bnxt_qplib_ring_creq_db64(db, index, xid, true); + else + writel(CREQ_DB_CP_FLAGS_REARM | (index & DBC_DBC32_XID_MASK), + db); +} + +static inline void bnxt_qplib_ring_creq_db(void __iomem *db, u32 raw_cons, + u32 max_elements, u32 xid, + bool gen_p5) +{ + u32 index = raw_cons & (max_elements - 1); + + if (gen_p5) + bnxt_qplib_ring_creq_db64(db, index, xid, true); + else + writel(CREQ_DB_CP_FLAGS | (index & DBC_DBC32_XID_MASK), + db); +} #define CREQ_ENTRY_POLL_BUDGET 0x100 @@ -187,6 +223,7 @@ struct bnxt_qplib_qp_node { /* RCFW Communication Channels */ struct bnxt_qplib_rcfw { struct pci_dev *pdev; + struct bnxt_qplib_res *res; int vector; struct tasklet_struct worker; bool requested; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c index 57d4951679cb..c8502c2844a2 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c @@ -330,13 +330,13 @@ void bnxt_qplib_free_ctx(struct pci_dev *pdev, */ int bnxt_qplib_alloc_ctx(struct pci_dev *pdev, struct bnxt_qplib_ctx *ctx, - bool virt_fn) + bool virt_fn, bool is_p5) { int i, j, k, rc = 0; int fnz_idx = -1; __le64 **pbl_ptr; - if (virt_fn) + if (virt_fn || is_p5) goto stats_alloc; /* QPC Tables */ @@ -762,7 +762,11 @@ static int bnxt_qplib_alloc_stats_ctx(struct pci_dev *pdev, { memset(stats, 0, sizeof(*stats)); stats->fw_id = -1; - stats->size = sizeof(struct ctx_hw_stats); + /* 128 byte aligned context memory is required only for 57500. + * However making this unconditional, it does not harm previous + * generation. + */ + stats->size = ALIGN(sizeof(struct ctx_hw_stats), 128); stats->dma = dma_alloc_coherent(&pdev->dev, stats->size, &stats->dma_map, GFP_KERNEL); if (!stats->dma) { diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index 1e80aa7bbcce..32cebd0f1436 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -180,12 +180,20 @@ struct bnxt_qplib_ctx { u64 hwrm_intf_ver; }; +struct bnxt_qplib_chip_ctx { + u16 chip_num; + u8 chip_rev; + u8 chip_metal; +}; + +#define CHIP_NUM_57500 0x1750 + struct bnxt_qplib_res { struct pci_dev *pdev; + struct bnxt_qplib_chip_ctx *cctx; struct net_device *netdev; struct bnxt_qplib_rcfw *rcfw; - struct bnxt_qplib_pd_tbl pd_tbl; struct bnxt_qplib_sgid_tbl sgid_tbl; struct bnxt_qplib_pkey_tbl pkey_tbl; @@ -193,6 +201,24 @@ struct bnxt_qplib_res { bool prio; }; +static inline bool bnxt_qplib_is_chip_gen_p5(struct bnxt_qplib_chip_ctx *cctx) +{ + return (cctx->chip_num == CHIP_NUM_57500); +} + +static inline u8 bnxt_qplib_get_hwq_type(struct bnxt_qplib_res *res) +{ + return bnxt_qplib_is_chip_gen_p5(res->cctx) ? + HWQ_TYPE_QUEUE : HWQ_TYPE_L2_CMPL; +} + +static inline u8 bnxt_qplib_get_ring_type(struct bnxt_qplib_chip_ctx *cctx) +{ + return bnxt_qplib_is_chip_gen_p5(cctx) ? + RING_ALLOC_REQ_RING_TYPE_NQ : + RING_ALLOC_REQ_RING_TYPE_ROCE_CMPL; +} + #define to_bnxt_qplib(ptr, type, member) \ container_of(ptr, type, member) @@ -226,5 +252,5 @@ void bnxt_qplib_free_ctx(struct pci_dev *pdev, struct bnxt_qplib_ctx *ctx); int bnxt_qplib_alloc_ctx(struct pci_dev *pdev, struct bnxt_qplib_ctx *ctx, - bool virt_fn); + bool virt_fn, bool is_p5); #endif /* __BNXT_QPLIB_RES_H__ */ diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index efa0f2949dc7..e9c53e406404 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -119,7 +119,8 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, * reporting the max number */ attr->max_qp_wqes -= BNXT_QPLIB_RESERVED_QP_WRS; - attr->max_qp_sges = sb->max_sge; + attr->max_qp_sges = bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx) ? + 6 : sb->max_sge; attr->max_cq = le32_to_cpu(sb->max_cq); attr->max_cq_wqes = le32_to_cpu(sb->max_cqe); attr->max_cq_sges = attr->max_qp_sges; diff --git a/drivers/infiniband/hw/bnxt_re/roce_hsi.h b/drivers/infiniband/hw/bnxt_re/roce_hsi.h index 8a9ead419ac2..e4b09e7c2175 100644 --- a/drivers/infiniband/hw/bnxt_re/roce_hsi.h +++ b/drivers/infiniband/hw/bnxt_re/roce_hsi.h @@ -49,11 +49,11 @@ struct cmpl_doorbell { #define CMPL_DOORBELL_IDX_SFT 0 #define CMPL_DOORBELL_RESERVED_MASK 0x3000000UL #define CMPL_DOORBELL_RESERVED_SFT 24 - #define CMPL_DOORBELL_IDX_VALID 0x4000000UL + #define CMPL_DOORBELL_IDX_VALID 0x4000000UL #define CMPL_DOORBELL_MASK 0x8000000UL #define CMPL_DOORBELL_KEY_MASK 0xf0000000UL #define CMPL_DOORBELL_KEY_SFT 28 - #define CMPL_DOORBELL_KEY_CMPL (0x2UL << 28) + #define CMPL_DOORBELL_KEY_CMPL (0x2UL << 28) }; /* Status Door Bell Format (4 bytes) */ @@ -71,46 +71,56 @@ struct status_doorbell { /* RoCE Host Structures */ /* Doorbell Structures */ -/* 64b Doorbell Format (8 bytes) */ -struct dbr_dbr { - __le32 index; - #define DBR_DBR_INDEX_MASK 0xfffffUL - #define DBR_DBR_INDEX_SFT 0 - #define DBR_DBR_RESERVED12_MASK 0xfff00000UL - #define DBR_DBR_RESERVED12_SFT 20 - __le32 type_xid; - #define DBR_DBR_XID_MASK 0xfffffUL - #define DBR_DBR_XID_SFT 0 - #define DBR_DBR_RESERVED8_MASK 0xff00000UL - #define DBR_DBR_RESERVED8_SFT 20 - #define DBR_DBR_TYPE_MASK 0xf0000000UL - #define DBR_DBR_TYPE_SFT 28 - #define DBR_DBR_TYPE_SQ (0x0UL << 28) - #define DBR_DBR_TYPE_RQ (0x1UL << 28) - #define DBR_DBR_TYPE_SRQ (0x2UL << 28) - #define DBR_DBR_TYPE_SRQ_ARM (0x3UL << 28) - #define DBR_DBR_TYPE_CQ (0x4UL << 28) - #define DBR_DBR_TYPE_CQ_ARMSE (0x5UL << 28) - #define DBR_DBR_TYPE_CQ_ARMALL (0x6UL << 28) - #define DBR_DBR_TYPE_CQ_ARMENA (0x7UL << 28) - #define DBR_DBR_TYPE_SRQ_ARMENA (0x8UL << 28) - #define DBR_DBR_TYPE_CQ_CUTOFF_ACK (0x9UL << 28) - #define DBR_DBR_TYPE_NULL (0xfUL << 28) -}; - -/* 32b Doorbell Format (4 bytes) */ -struct dbr_dbr32 { - __le32 type_abs_incr_xid; - #define DBR_DBR32_XID_MASK 0xfffffUL - #define DBR_DBR32_XID_SFT 0 - #define DBR_DBR32_RESERVED4_MASK 0xf00000UL - #define DBR_DBR32_RESERVED4_SFT 20 - #define DBR_DBR32_INCR_MASK 0xf000000UL - #define DBR_DBR32_INCR_SFT 24 - #define DBR_DBR32_ABS 0x10000000UL - #define DBR_DBR32_TYPE_MASK 0xe0000000UL - #define DBR_DBR32_TYPE_SFT 29 - #define DBR_DBR32_TYPE_SQ (0x0UL << 29) +/* dbc_dbc (size:64b/8B) */ +struct dbc_dbc { + __le32 index; + #define DBC_DBC_INDEX_MASK 0xffffffUL + #define DBC_DBC_INDEX_SFT 0 + __le32 type_path_xid; + #define DBC_DBC_XID_MASK 0xfffffUL + #define DBC_DBC_XID_SFT 0 + #define DBC_DBC_PATH_MASK 0x3000000UL + #define DBC_DBC_PATH_SFT 24 + #define DBC_DBC_PATH_ROCE (0x0UL << 24) + #define DBC_DBC_PATH_L2 (0x1UL << 24) + #define DBC_DBC_PATH_ENGINE (0x2UL << 24) + #define DBC_DBC_PATH_LAST DBC_DBC_PATH_ENGINE + #define DBC_DBC_DEBUG_TRACE 0x8000000UL + #define DBC_DBC_TYPE_MASK 0xf0000000UL + #define DBC_DBC_TYPE_SFT 28 + #define DBC_DBC_TYPE_SQ (0x0UL << 28) + #define DBC_DBC_TYPE_RQ (0x1UL << 28) + #define DBC_DBC_TYPE_SRQ (0x2UL << 28) + #define DBC_DBC_TYPE_SRQ_ARM (0x3UL << 28) + #define DBC_DBC_TYPE_CQ (0x4UL << 28) + #define DBC_DBC_TYPE_CQ_ARMSE (0x5UL << 28) + #define DBC_DBC_TYPE_CQ_ARMALL (0x6UL << 28) + #define DBC_DBC_TYPE_CQ_ARMENA (0x7UL << 28) + #define DBC_DBC_TYPE_SRQ_ARMENA (0x8UL << 28) + #define DBC_DBC_TYPE_CQ_CUTOFF_ACK (0x9UL << 28) + #define DBC_DBC_TYPE_NQ (0xaUL << 28) + #define DBC_DBC_TYPE_NQ_ARM (0xbUL << 28) + #define DBC_DBC_TYPE_NULL (0xfUL << 28) + #define DBC_DBC_TYPE_LAST DBC_DBC_TYPE_NULL +}; + +/* dbc_dbc32 (size:32b/4B) */ +struct dbc_dbc32 { + __le32 type_abs_incr_xid; + #define DBC_DBC32_XID_MASK 0xfffffUL + #define DBC_DBC32_XID_SFT 0 + #define DBC_DBC32_PATH_MASK 0xc00000UL + #define DBC_DBC32_PATH_SFT 22 + #define DBC_DBC32_PATH_ROCE (0x0UL << 22) + #define DBC_DBC32_PATH_L2 (0x1UL << 22) + #define DBC_DBC32_PATH_LAST DBC_DBC32_PATH_L2 + #define DBC_DBC32_INCR_MASK 0xf000000UL + #define DBC_DBC32_INCR_SFT 24 + #define DBC_DBC32_ABS 0x10000000UL + #define DBC_DBC32_TYPE_MASK 0xe0000000UL + #define DBC_DBC32_TYPE_SFT 29 + #define DBC_DBC32_TYPE_SQ (0x0UL << 29) + #define DBC_DBC32_TYPE_LAST DBC_DBC32_TYPE_SQ }; /* SQ WQE Structures */ @@ -149,7 +159,24 @@ struct sq_psn_search { #define SQ_PSN_SEARCH_NEXT_PSN_MASK 0xffffffUL #define SQ_PSN_SEARCH_NEXT_PSN_SFT 0 #define SQ_PSN_SEARCH_FLAGS_MASK 0xff000000UL - #define SQ_PSN_SEARCH_FLAGS_SFT 24 + #define SQ_PSN_SEARCH_FLAGS_SFT 24 +}; + +/* sq_psn_search_ext (size:128b/16B) */ +struct sq_psn_search_ext { + __le32 opcode_start_psn; + #define SQ_PSN_SEARCH_EXT_START_PSN_MASK 0xffffffUL + #define SQ_PSN_SEARCH_EXT_START_PSN_SFT 0 + #define SQ_PSN_SEARCH_EXT_OPCODE_MASK 0xff000000UL + #define SQ_PSN_SEARCH_EXT_OPCODE_SFT 24 + __le32 flags_next_psn; + #define SQ_PSN_SEARCH_EXT_NEXT_PSN_MASK 0xffffffUL + #define SQ_PSN_SEARCH_EXT_NEXT_PSN_SFT 0 + #define SQ_PSN_SEARCH_EXT_FLAGS_MASK 0xff000000UL + #define SQ_PSN_SEARCH_EXT_FLAGS_SFT 24 + __le16 start_slot_idx; + __le16 reserved16; + __le32 reserved32; }; /* Send SQ WQE (40 bytes) */ @@ -505,22 +532,24 @@ struct cq_res_rc { /* Responder UD CQE (32 bytes) */ struct cq_res_ud { - __le32 length; + __le16 length; #define CQ_RES_UD_LENGTH_MASK 0x3fffUL #define CQ_RES_UD_LENGTH_SFT 0 - #define CQ_RES_UD_RESERVED18_MASK 0xffffc000UL - #define CQ_RES_UD_RESERVED18_SFT 14 + __le16 cfa_metadata; + #define CQ_RES_UD_CFA_METADATA_VID_MASK 0xfffUL + #define CQ_RES_UD_CFA_METADATA_VID_SFT 0 + #define CQ_RES_UD_CFA_METADATA_DE 0x1000UL + #define CQ_RES_UD_CFA_METADATA_PRI_MASK 0xe000UL + #define CQ_RES_UD_CFA_METADATA_PRI_SFT 13 __le32 imm_data; __le64 qp_handle; __le16 src_mac[3]; __le16 src_qp_low; u8 cqe_type_toggle; - #define CQ_RES_UD_TOGGLE 0x1UL - #define CQ_RES_UD_CQE_TYPE_MASK 0x1eUL - #define CQ_RES_UD_CQE_TYPE_SFT 1 + #define CQ_RES_UD_TOGGLE 0x1UL + #define CQ_RES_UD_CQE_TYPE_MASK 0x1eUL + #define CQ_RES_UD_CQE_TYPE_SFT 1 #define CQ_RES_UD_CQE_TYPE_RES_UD (0x2UL << 1) - #define CQ_RES_UD_RESERVED3_MASK 0xe0UL - #define CQ_RES_UD_RESERVED3_SFT 5 u8 status; #define CQ_RES_UD_STATUS_OK 0x0UL #define CQ_RES_UD_STATUS_LOCAL_ACCESS_ERROR 0x1UL @@ -536,18 +565,30 @@ struct cq_res_ud { #define CQ_RES_UD_FLAGS_SRQ_SRQ (0x1UL << 0) #define CQ_RES_UD_FLAGS_SRQ_LAST CQ_RES_UD_FLAGS_SRQ_SRQ #define CQ_RES_UD_FLAGS_IMM 0x2UL - #define CQ_RES_UD_FLAGS_ROCE_IP_VER_MASK 0xcUL - #define CQ_RES_UD_FLAGS_ROCE_IP_VER_SFT 2 - #define CQ_RES_UD_FLAGS_ROCE_IP_VER_V1 (0x0UL << 2) - #define CQ_RES_UD_FLAGS_ROCE_IP_VER_V2IPV4 (0x2UL << 2) - #define CQ_RES_UD_FLAGS_ROCE_IP_VER_V2IPV6 (0x3UL << 2) + #define CQ_RES_UD_FLAGS_UNUSED_MASK 0xcUL + #define CQ_RES_UD_FLAGS_UNUSED_SFT 2 + #define CQ_RES_UD_FLAGS_ROCE_IP_VER_MASK 0x30UL + #define CQ_RES_UD_FLAGS_ROCE_IP_VER_SFT 4 + #define CQ_RES_UD_FLAGS_ROCE_IP_VER_V1 (0x0UL << 4) + #define CQ_RES_UD_FLAGS_ROCE_IP_VER_V2IPV4 (0x2UL << 4) + #define CQ_RES_UD_FLAGS_ROCE_IP_VER_V2IPV6 (0x3UL << 4) #define CQ_RES_UD_FLAGS_ROCE_IP_VER_LAST \ CQ_RES_UD_FLAGS_ROCE_IP_VER_V2IPV6 + #define CQ_RES_UD_FLAGS_META_FORMAT_MASK 0x3c0UL + #define CQ_RES_UD_FLAGS_META_FORMAT_SFT 6 + #define CQ_RES_UD_FLAGS_META_FORMAT_NONE (0x0UL << 6) + #define CQ_RES_UD_FLAGS_META_FORMAT_VLAN (0x1UL << 6) + #define CQ_RES_UD_FLAGS_META_FORMAT_TUNNEL_ID (0x2UL << 6) + #define CQ_RES_UD_FLAGS_META_FORMAT_CHDR_DATA (0x3UL << 6) + #define CQ_RES_UD_FLAGS_META_FORMAT_HDR_OFFSET (0x4UL << 6) + #define CQ_RES_UD_FLAGS_META_FORMAT_LAST \ + CQ_RES_UD_FLAGS_META_FORMAT_HDR_OFFSET + #define CQ_RES_UD_FLAGS_EXT_META_FORMAT_MASK 0xc00UL + #define CQ_RES_UD_FLAGS_EXT_META_FORMAT_SFT 10 + __le32 src_qp_high_srq_or_rq_wr_id; #define CQ_RES_UD_SRQ_OR_RQ_WR_ID_MASK 0xfffffUL #define CQ_RES_UD_SRQ_OR_RQ_WR_ID_SFT 0 - #define CQ_RES_UD_RESERVED4_MASK 0xf00000UL - #define CQ_RES_UD_RESERVED4_SFT 20 #define CQ_RES_UD_SRC_QP_HIGH_MASK 0xff000000UL #define CQ_RES_UD_SRC_QP_HIGH_SFT 24 }; @@ -983,6 +1024,7 @@ struct cmdq_create_qp { #define CMDQ_CREATE_QP_TYPE_RC 0x2UL #define CMDQ_CREATE_QP_TYPE_UD 0x4UL #define CMDQ_CREATE_QP_TYPE_RAW_ETHERTYPE 0x6UL + #define CMDQ_CREATE_QP_TYPE_GSI 0x7UL u8 sq_pg_size_sq_lvl; #define CMDQ_CREATE_QP_SQ_LVL_MASK 0xfUL #define CMDQ_CREATE_QP_SQ_LVL_SFT 0 @@ -2719,6 +2761,8 @@ struct creq_query_func_resp_sb { __le16 max_srq; __le32 max_gid; __le32 tqm_alloc_reqs[12]; + __le32 max_dpi; + __le32 reserved_32; }; /* Set resources command response (16 bytes) */ diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c index 591de319c178..fb03bc492ef7 100644 --- a/drivers/infiniband/hw/cxgb3/iwch.c +++ b/drivers/infiniband/hw/cxgb3/iwch.c @@ -146,7 +146,7 @@ static void open_rnic_dev(struct t3cdev *tdev) pr_debug("%s t3cdev %p\n", __func__, tdev); pr_info_once("Chelsio T3 RDMA Driver - version %s\n", DRV_VERSION); - rnicp = (struct iwch_dev *)ib_alloc_device(sizeof(*rnicp)); + rnicp = ib_alloc_device(iwch_dev, ibdev); if (!rnicp) { pr_err("Cannot allocate ib device\n"); return; diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index 07c20cd07f33..804c1fc7bfc1 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -1393,7 +1393,7 @@ int iwch_register_device(struct iwch_dev *dev) dev->ibdev.dev.parent = &dev->rdev.rnic_info.pdev->dev; dev->ibdev.uverbs_abi_ver = IWCH_UVERBS_ABI_VERSION; - dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL); + dev->ibdev.iwcm = kzalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL); if (!dev->ibdev.iwcm) return -ENOMEM; diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 9c10fff6dcfb..4b4e2464b705 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -966,7 +966,7 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) pr_info("%s: On-Chip Queues not supported on this device\n", pci_name(infop->pdev)); - devp = (struct c4iw_dev *)ib_alloc_device(sizeof(*devp)); + devp = ib_alloc_device(c4iw_dev, ibdev); if (!devp) { pr_err("Cannot allocate ib device\n"); return ERR_PTR(-ENOMEM); diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index f977f8e7e162..f59bf7e5a589 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -549,6 +549,7 @@ static const struct ib_device_ops c4iw_dev_ops = { .destroy_cq = c4iw_destroy_cq, .destroy_qp = c4iw_destroy_qp, .destroy_srq = c4iw_destroy_srq, + .fill_res_entry = fill_res_entry, .get_dev_fw_str = get_dev_fw_str, .get_dma_mr = c4iw_get_dma_mr, .get_hw_stats = c4iw_get_mib, @@ -615,7 +616,7 @@ void c4iw_register_device(struct work_struct *work) dev->ibdev.dev.parent = &dev->rdev.lldi.pdev->dev; dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION; - dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL); + dev->ibdev.iwcm = kzalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL); if (!dev->ibdev.iwcm) { ret = -ENOMEM; goto err_dealloc_ctx; @@ -629,7 +630,6 @@ void c4iw_register_device(struct work_struct *work) dev->ibdev.iwcm->add_ref = c4iw_qp_add_ref; dev->ibdev.iwcm->rem_ref = c4iw_qp_rem_ref; dev->ibdev.iwcm->get_qp = c4iw_get_qp; - dev->ibdev.res.fill_res_entry = fill_res_entry; memcpy(dev->ibdev.iwcm->ifname, dev->rdev.lldi.ports[0]->name, sizeof(dev->ibdev.iwcm->ifname)); diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index c22ebc774a6a..f9a7e9d29c8b 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -488,7 +488,7 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) vmf = 1; break; case STATUS: - if (flags & (unsigned long)(VM_WRITE | VM_EXEC)) { + if (flags & VM_WRITE) { ret = -EPERM; goto done; } diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index c98d94cda15e..f88ad425664a 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -967,7 +967,6 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) { wc.ex.imm_data = packet->ohdr->u.ud.imm_data; wc.wc_flags = IB_WC_WITH_IMM; - tlen -= sizeof(u32); } else if (opcode == IB_OPCODE_UD_SEND_ONLY) { wc.ex.imm_data = 0; wc.wc_flags = 0; diff --git a/drivers/infiniband/hw/hfi1/user_pages.c b/drivers/infiniband/hw/hfi1/user_pages.c index e341e6dcc388..24b592c6522e 100644 --- a/drivers/infiniband/hw/hfi1/user_pages.c +++ b/drivers/infiniband/hw/hfi1/user_pages.c @@ -91,9 +91,7 @@ bool hfi1_can_pin_pages(struct hfi1_devdata *dd, struct mm_struct *mm, /* Convert to number of pages */ size = DIV_ROUND_UP(size, PAGE_SIZE); - down_read(&mm->mmap_sem); - pinned = mm->pinned_vm; - up_read(&mm->mmap_sem); + pinned = atomic64_read(&mm->pinned_vm); /* First, check the absolute limit against all pinned pages. */ if (pinned + npages >= ulimit && !can_lock) @@ -111,9 +109,7 @@ int hfi1_acquire_user_pages(struct mm_struct *mm, unsigned long vaddr, size_t np if (ret < 0) return ret; - down_write(&mm->mmap_sem); - mm->pinned_vm += ret; - up_write(&mm->mmap_sem); + atomic64_add(ret, &mm->pinned_vm); return ret; } @@ -130,8 +126,6 @@ void hfi1_release_user_pages(struct mm_struct *mm, struct page **p, } if (mm) { /* during close after signal, mm can be NULL */ - down_write(&mm->mmap_sem); - mm->pinned_vm -= npages; - up_write(&mm->mmap_sem); + atomic64_sub(npages, &mm->pinned_vm); } } diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.c b/drivers/infiniband/hw/hns/hns_roce_cmd.c index a0ba19d4a10e..2acf946d02e5 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cmd.c +++ b/drivers/infiniband/hw/hns/hns_roce_cmd.c @@ -176,17 +176,33 @@ int hns_roce_cmd_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, unsigned long in_modifier, u8 op_modifier, u16 op, unsigned long timeout) { - if (hr_dev->is_reset) - return 0; + int ret; + + if (hr_dev->hw->rst_prc_mbox) { + ret = hr_dev->hw->rst_prc_mbox(hr_dev); + if (ret == CMD_RST_PRC_SUCCESS) + return 0; + else if (ret == CMD_RST_PRC_EBUSY) + return -EBUSY; + } if (hr_dev->cmd.use_events) - return hns_roce_cmd_mbox_wait(hr_dev, in_param, out_param, - in_modifier, op_modifier, op, - timeout); + ret = hns_roce_cmd_mbox_wait(hr_dev, in_param, out_param, + in_modifier, op_modifier, op, + timeout); else - return hns_roce_cmd_mbox_poll(hr_dev, in_param, out_param, - in_modifier, op_modifier, op, - timeout); + ret = hns_roce_cmd_mbox_poll(hr_dev, in_param, out_param, + in_modifier, op_modifier, op, + timeout); + + if (ret == CMD_RST_PRC_EBUSY) + return -EBUSY; + + if (ret && (hr_dev->hw->rst_prc_mbox && + hr_dev->hw->rst_prc_mbox(hr_dev) == CMD_RST_PRC_SUCCESS)) + return 0; + + return ret; } EXPORT_SYMBOL_GPL(hns_roce_cmd_mbox); diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 6fde434b22fd..8ca8d74dfb6a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -217,6 +217,32 @@ enum { HNS_ROCE_DB_PER_PAGE = PAGE_SIZE / 4 }; +enum hns_roce_reset_stage { + HNS_ROCE_STATE_NON_RST, + HNS_ROCE_STATE_RST_BEF_DOWN, + HNS_ROCE_STATE_RST_DOWN, + HNS_ROCE_STATE_RST_UNINIT, + HNS_ROCE_STATE_RST_INIT, + HNS_ROCE_STATE_RST_INITED, +}; + +enum hns_roce_instance_state { + HNS_ROCE_STATE_NON_INIT, + HNS_ROCE_STATE_INIT, + HNS_ROCE_STATE_INITED, + HNS_ROCE_STATE_UNINIT, +}; + +enum { + HNS_ROCE_RST_DIRECT_RETURN = 0, +}; + +enum { + CMD_RST_PRC_OTHERS, + CMD_RST_PRC_SUCCESS, + CMD_RST_PRC_EBUSY, +}; + #define HNS_ROCE_CMD_SUCCESS 1 #define HNS_ROCE_PORT_DOWN 0 @@ -854,6 +880,7 @@ struct hns_roce_hw { u64 out_param, u32 in_modifier, u8 op_modifier, u16 op, u16 token, int event); int (*chk_mbox)(struct hns_roce_dev *hr_dev, unsigned long timeout); + int (*rst_prc_mbox)(struct hns_roce_dev *hr_dev); int (*set_gid)(struct hns_roce_dev *hr_dev, u8 port, int gid_index, const union ib_gid *gid, const struct ib_gid_attr *attr); int (*set_mac)(struct hns_roce_dev *hr_dev, u8 phy_port, u8 *addr); @@ -920,6 +947,8 @@ struct hns_roce_dev { spinlock_t bt_cmd_lock; bool active; bool is_reset; + bool dis_db; + unsigned long reset_cnt; struct hns_roce_ib_iboe iboe; struct list_head pgdir_list; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index b74c742b000c..fa08c22aad66 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -5002,7 +5002,7 @@ static int hns_roce_probe(struct platform_device *pdev) struct hns_roce_dev *hr_dev; struct device *dev = &pdev->dev; - hr_dev = (struct hns_roce_dev *)ib_alloc_device(sizeof(*hr_dev)); + hr_dev = ib_alloc_device(hns_roce_dev, ib_dev); if (!hr_dev) return -ENOMEM; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 5c483b437bdd..19fefff4f699 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -587,7 +587,7 @@ out: roce_set_field(sq_db.parameter, V2_DB_PARAMETER_SL_M, V2_DB_PARAMETER_SL_S, qp->sl); - hns_roce_write64_k((__le32 *)&sq_db, qp->sq.db_reg_l); + hns_roce_write64(hr_dev, (__le32 *)&sq_db, qp->sq.db_reg_l); qp->sq_next_wqe = ind; qp->next_sge = sge_ind; @@ -712,6 +712,113 @@ out: return ret; } +static int hns_roce_v2_cmd_hw_reseted(struct hns_roce_dev *hr_dev, + unsigned long instance_stage, + unsigned long reset_stage) +{ + /* When hardware reset has been completed once or more, we should stop + * sending mailbox&cmq&doorbell to hardware. If now in .init_instance() + * function, we should exit with error. If now at HNAE3_INIT_CLIENT + * stage of soft reset process, we should exit with error, and then + * HNAE3_INIT_CLIENT related process can rollback the operation like + * notifing hardware to free resources, HNAE3_INIT_CLIENT related + * process will exit with error to notify NIC driver to reschedule soft + * reset process once again. + */ + hr_dev->is_reset = true; + hr_dev->dis_db = true; + + if (reset_stage == HNS_ROCE_STATE_RST_INIT || + instance_stage == HNS_ROCE_STATE_INIT) + return CMD_RST_PRC_EBUSY; + + return CMD_RST_PRC_SUCCESS; +} + +static int hns_roce_v2_cmd_hw_resetting(struct hns_roce_dev *hr_dev, + unsigned long instance_stage, + unsigned long reset_stage) +{ + struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv; + struct hnae3_handle *handle = priv->handle; + const struct hnae3_ae_ops *ops = handle->ae_algo->ops; + + /* When hardware reset is detected, we should stop sending mailbox&cmq& + * doorbell to hardware. If now in .init_instance() function, we should + * exit with error. If now at HNAE3_INIT_CLIENT stage of soft reset + * process, we should exit with error, and then HNAE3_INIT_CLIENT + * related process can rollback the operation like notifing hardware to + * free resources, HNAE3_INIT_CLIENT related process will exit with + * error to notify NIC driver to reschedule soft reset process once + * again. + */ + hr_dev->dis_db = true; + if (!ops->get_hw_reset_stat(handle)) + hr_dev->is_reset = true; + + if (!hr_dev->is_reset || reset_stage == HNS_ROCE_STATE_RST_INIT || + instance_stage == HNS_ROCE_STATE_INIT) + return CMD_RST_PRC_EBUSY; + + return CMD_RST_PRC_SUCCESS; +} + +static int hns_roce_v2_cmd_sw_resetting(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv; + struct hnae3_handle *handle = priv->handle; + const struct hnae3_ae_ops *ops = handle->ae_algo->ops; + + /* When software reset is detected at .init_instance() function, we + * should stop sending mailbox&cmq&doorbell to hardware, and exit + * with error. + */ + hr_dev->dis_db = true; + if (ops->ae_dev_reset_cnt(handle) != hr_dev->reset_cnt) + hr_dev->is_reset = true; + + return CMD_RST_PRC_EBUSY; +} + +static int hns_roce_v2_rst_process_cmd(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv; + struct hnae3_handle *handle = priv->handle; + const struct hnae3_ae_ops *ops = handle->ae_algo->ops; + unsigned long instance_stage; /* the current instance stage */ + unsigned long reset_stage; /* the current reset stage */ + unsigned long reset_cnt; + bool sw_resetting; + bool hw_resetting; + + if (hr_dev->is_reset) + return CMD_RST_PRC_SUCCESS; + + /* Get information about reset from NIC driver or RoCE driver itself, + * the meaning of the following variables from NIC driver are described + * as below: + * reset_cnt -- The count value of completed hardware reset. + * hw_resetting -- Whether hardware device is resetting now. + * sw_resetting -- Whether NIC's software reset process is running now. + */ + instance_stage = handle->rinfo.instance_state; + reset_stage = handle->rinfo.reset_state; + reset_cnt = ops->ae_dev_reset_cnt(handle); + hw_resetting = ops->get_hw_reset_stat(handle); + sw_resetting = ops->ae_dev_resetting(handle); + + if (reset_cnt != hr_dev->reset_cnt) + return hns_roce_v2_cmd_hw_reseted(hr_dev, instance_stage, + reset_stage); + else if (hw_resetting) + return hns_roce_v2_cmd_hw_resetting(hr_dev, instance_stage, + reset_stage); + else if (sw_resetting && instance_stage == HNS_ROCE_STATE_INIT) + return hns_roce_v2_cmd_sw_resetting(hr_dev); + + return 0; +} + static int hns_roce_cmq_space(struct hns_roce_v2_cmq_ring *ring) { int ntu = ring->next_to_use; @@ -892,8 +999,8 @@ static int hns_roce_cmq_csq_clean(struct hns_roce_dev *hr_dev) return clean; } -static int hns_roce_cmq_send(struct hns_roce_dev *hr_dev, - struct hns_roce_cmq_desc *desc, int num) +static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, + struct hns_roce_cmq_desc *desc, int num) { struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv; struct hns_roce_v2_cmq_ring *csq = &priv->cmq.csq; @@ -905,9 +1012,6 @@ static int hns_roce_cmq_send(struct hns_roce_dev *hr_dev, int ret = 0; int ntc; - if (hr_dev->is_reset) - return 0; - spin_lock_bh(&csq->lock); if (num > hns_roce_cmq_space(csq)) { @@ -982,6 +1086,30 @@ static int hns_roce_cmq_send(struct hns_roce_dev *hr_dev, return ret; } +int hns_roce_cmq_send(struct hns_roce_dev *hr_dev, + struct hns_roce_cmq_desc *desc, int num) +{ + int retval; + int ret; + + ret = hns_roce_v2_rst_process_cmd(hr_dev); + if (ret == CMD_RST_PRC_SUCCESS) + return 0; + if (ret == CMD_RST_PRC_EBUSY) + return ret; + + ret = __hns_roce_cmq_send(hr_dev, desc, num); + if (ret) { + retval = hns_roce_v2_rst_process_cmd(hr_dev); + if (retval == CMD_RST_PRC_SUCCESS) + return 0; + else if (retval == CMD_RST_PRC_EBUSY) + return retval; + } + + return ret; +} + static int hns_roce_cmq_query_hw_info(struct hns_roce_dev *hr_dev) { struct hns_roce_query_version *resp; @@ -1857,6 +1985,9 @@ static int hns_roce_v2_chk_mbox(struct hns_roce_dev *hr_dev, status = hns_roce_v2_cmd_complete(hr_dev); if (status != 0x1) { + if (status == CMD_RST_PRC_EBUSY) + return status; + dev_err(dev, "mailbox status 0x%x!\n", status); return -EBUSY; } @@ -2367,6 +2498,7 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev, static int hns_roce_v2_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) { + struct hns_roce_dev *hr_dev = to_hr_dev(ibcq->device); struct hns_roce_cq *hr_cq = to_hr_cq(ibcq); u32 notification_flag; u32 doorbell[2]; @@ -2392,7 +2524,7 @@ static int hns_roce_v2_req_notify_cq(struct ib_cq *ibcq, roce_set_bit(doorbell[1], V2_CQ_DB_PARAMETER_NOTIFY_S, notification_flag); - hns_roce_write64_k(doorbell, hr_cq->cq_db_l); + hns_roce_write64(hr_dev, doorbell, hr_cq->cq_db_l); return 0; } @@ -4381,7 +4513,7 @@ static int hns_roce_v2_destroy_qp(struct ib_qp *ibqp) static int hns_roce_v2_qp_flow_control_init(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) { - struct hns_roce_sccc_clr_done *rst, *resp; + struct hns_roce_sccc_clr_done *resp; struct hns_roce_sccc_clr *clr; struct hns_roce_cmq_desc desc; int ret, i; @@ -4390,7 +4522,6 @@ static int hns_roce_v2_qp_flow_control_init(struct hns_roce_dev *hr_dev, /* set scc ctx clear done flag */ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_RESET_SCCC, false); - rst = (struct hns_roce_sccc_clr_done *)desc.data; ret = hns_roce_cmq_send(hr_dev, &desc, 1); if (ret) { dev_err(hr_dev->dev, "Reset SCC ctx failed(%d)\n", ret); @@ -4636,6 +4767,7 @@ static void hns_roce_v2_init_irq_work(struct hns_roce_dev *hr_dev, static void set_eq_cons_index_v2(struct hns_roce_eq *eq) { + struct hns_roce_dev *hr_dev = eq->hr_dev; u32 doorbell[2]; doorbell[0] = 0; @@ -4662,7 +4794,7 @@ static void set_eq_cons_index_v2(struct hns_roce_eq *eq) HNS_ROCE_V2_EQ_DB_PARA_S, (eq->cons_index & HNS_ROCE_V2_CONS_IDX_M)); - hns_roce_write64_k(doorbell, eq->doorbell); + hns_roce_write64(hr_dev, doorbell, eq->doorbell); } static struct hns_roce_aeqe *get_aeqe_v2(struct hns_roce_eq *eq, u32 entry) @@ -5819,7 +5951,7 @@ static int hns_roce_v2_modify_srq(struct ib_srq *ibsrq, return 0; } -int hns_roce_v2_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) +static int hns_roce_v2_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) { struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device); struct hns_roce_srq *srq = to_hr_srq(ibsrq); @@ -5884,6 +6016,7 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr) { + struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device); struct hns_roce_srq *srq = to_hr_srq(ibsrq); struct hns_roce_v2_wqe_data_seg *dseg; struct hns_roce_v2_db srq_db; @@ -5945,7 +6078,7 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, srq_db.byte_4 = HNS_ROCE_V2_SRQ_DB << 24 | srq->srqn; srq_db.parameter = srq->head; - hns_roce_write64_k((__le32 *)&srq_db, srq->db_reg_l); + hns_roce_write64(hr_dev, (__le32 *)&srq_db, srq->db_reg_l); } @@ -5978,6 +6111,7 @@ static const struct hns_roce_hw hns_roce_hw_v2 = { .hw_exit = hns_roce_v2_exit, .post_mbox = hns_roce_v2_post_mbox, .chk_mbox = hns_roce_v2_chk_mbox, + .rst_prc_mbox = hns_roce_v2_rst_process_cmd, .set_gid = hns_roce_v2_set_gid, .set_mac = hns_roce_v2_set_mac, .write_mtpt = hns_roce_v2_write_mtpt, @@ -6021,6 +6155,7 @@ MODULE_DEVICE_TABLE(pci, hns_roce_hw_v2_pci_tbl); static int hns_roce_hw_v2_get_cfg(struct hns_roce_dev *hr_dev, struct hnae3_handle *handle) { + struct hns_roce_v2_priv *priv = hr_dev->priv; const struct pci_device_id *id; int i; @@ -6051,15 +6186,18 @@ static int hns_roce_hw_v2_get_cfg(struct hns_roce_dev *hr_dev, hr_dev->cmd_mod = 1; hr_dev->loop_idc = 0; + hr_dev->reset_cnt = handle->ae_algo->ops->ae_dev_reset_cnt(handle); + priv->handle = handle; + return 0; } -static int hns_roce_hw_v2_init_instance(struct hnae3_handle *handle) +static int __hns_roce_hw_v2_init_instance(struct hnae3_handle *handle) { struct hns_roce_dev *hr_dev; int ret; - hr_dev = (struct hns_roce_dev *)ib_alloc_device(sizeof(*hr_dev)); + hr_dev = ib_alloc_device(hns_roce_dev, ib_dev); if (!hr_dev) return -ENOMEM; @@ -6071,7 +6209,6 @@ static int hns_roce_hw_v2_init_instance(struct hnae3_handle *handle) hr_dev->pci_dev = handle->pdev; hr_dev->dev = &handle->pdev->dev; - handle->priv = hr_dev; ret = hns_roce_hw_v2_get_cfg(hr_dev, handle); if (ret) { @@ -6085,6 +6222,8 @@ static int hns_roce_hw_v2_init_instance(struct hnae3_handle *handle) goto error_failed_get_cfg; } + handle->priv = hr_dev; + return 0; error_failed_get_cfg: @@ -6096,7 +6235,7 @@ error_failed_kzalloc: return ret; } -static void hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle, +static void __hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle, bool reset) { struct hns_roce_dev *hr_dev = (struct hns_roce_dev *)handle->priv; @@ -6104,24 +6243,79 @@ static void hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle, if (!hr_dev) return; + handle->priv = NULL; hns_roce_exit(hr_dev); kfree(hr_dev->priv); ib_dealloc_device(&hr_dev->ib_dev); } +static int hns_roce_hw_v2_init_instance(struct hnae3_handle *handle) +{ + const struct hnae3_ae_ops *ops = handle->ae_algo->ops; + struct device *dev = &handle->pdev->dev; + int ret; + + handle->rinfo.instance_state = HNS_ROCE_STATE_INIT; + + if (ops->ae_dev_resetting(handle) || ops->get_hw_reset_stat(handle)) { + handle->rinfo.instance_state = HNS_ROCE_STATE_NON_INIT; + goto reset_chk_err; + } + + ret = __hns_roce_hw_v2_init_instance(handle); + if (ret) { + handle->rinfo.instance_state = HNS_ROCE_STATE_NON_INIT; + dev_err(dev, "RoCE instance init failed! ret = %d\n", ret); + if (ops->ae_dev_resetting(handle) || + ops->get_hw_reset_stat(handle)) + goto reset_chk_err; + else + return ret; + } + + handle->rinfo.instance_state = HNS_ROCE_STATE_INITED; + + + return 0; + +reset_chk_err: + dev_err(dev, "Device is busy in resetting state.\n" + "please retry later.\n"); + + return -EBUSY; +} + +static void hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle, + bool reset) +{ + if (handle->rinfo.instance_state != HNS_ROCE_STATE_INITED) + return; + + handle->rinfo.instance_state = HNS_ROCE_STATE_UNINIT; + + __hns_roce_hw_v2_uninit_instance(handle, reset); + + handle->rinfo.instance_state = HNS_ROCE_STATE_NON_INIT; +} static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle) { - struct hns_roce_dev *hr_dev = (struct hns_roce_dev *)handle->priv; + struct hns_roce_dev *hr_dev; struct ib_event event; - if (!hr_dev) { - dev_err(&handle->pdev->dev, - "Input parameter handle->priv is NULL!\n"); - return -EINVAL; + if (handle->rinfo.instance_state != HNS_ROCE_STATE_INITED) { + set_bit(HNS_ROCE_RST_DIRECT_RETURN, &handle->rinfo.state); + return 0; } + handle->rinfo.reset_state = HNS_ROCE_STATE_RST_DOWN; + clear_bit(HNS_ROCE_RST_DIRECT_RETURN, &handle->rinfo.state); + + hr_dev = (struct hns_roce_dev *)handle->priv; + if (!hr_dev) + return 0; + hr_dev->active = false; - hr_dev->is_reset = true; + hr_dev->dis_db = true; event.event = IB_EVENT_DEVICE_FATAL; event.device = &hr_dev->ib_dev; @@ -6133,17 +6327,29 @@ static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle) static int hns_roce_hw_v2_reset_notify_init(struct hnae3_handle *handle) { + struct device *dev = &handle->pdev->dev; int ret; - ret = hns_roce_hw_v2_init_instance(handle); + if (test_and_clear_bit(HNS_ROCE_RST_DIRECT_RETURN, + &handle->rinfo.state)) { + handle->rinfo.reset_state = HNS_ROCE_STATE_RST_INITED; + return 0; + } + + handle->rinfo.reset_state = HNS_ROCE_STATE_RST_INIT; + + dev_info(&handle->pdev->dev, "In reset process RoCE client reinit.\n"); + ret = __hns_roce_hw_v2_init_instance(handle); if (ret) { /* when reset notify type is HNAE3_INIT_CLIENT In reset notify * callback function, RoCE Engine reinitialize. If RoCE reinit * failed, we should inform NIC driver. */ handle->priv = NULL; - dev_err(&handle->pdev->dev, - "In reset process RoCE reinit failed %d.\n", ret); + dev_err(dev, "In reset process RoCE reinit failed %d.\n", ret); + } else { + handle->rinfo.reset_state = HNS_ROCE_STATE_RST_INITED; + dev_info(dev, "Reset done, RoCE client reinit finished.\n"); } return ret; @@ -6151,8 +6357,14 @@ static int hns_roce_hw_v2_reset_notify_init(struct hnae3_handle *handle) static int hns_roce_hw_v2_reset_notify_uninit(struct hnae3_handle *handle) { + if (test_bit(HNS_ROCE_RST_DIRECT_RETURN, &handle->rinfo.state)) + return 0; + + handle->rinfo.reset_state = HNS_ROCE_STATE_RST_UNINIT; + dev_info(&handle->pdev->dev, "In reset process RoCE client uninit.\n"); msleep(100); - hns_roce_hw_v2_uninit_instance(handle, false); + __hns_roce_hw_v2_uninit_instance(handle, false); + return 0; } diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 242eeaeba761..6b0486fcbc59 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -96,6 +96,8 @@ #define HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE 2 #define HNS_ROCE_V2_RSV_QPS 8 +#define HNS_ROCE_V2_HW_RST_TIMEOUT 1000 + #define HNS_ROCE_CONTEXT_HOP_NUM 1 #define HNS_ROCE_SCCC_HOP_NUM 1 #define HNS_ROCE_MTT_HOP_NUM 1 @@ -1602,6 +1604,7 @@ struct hns_roce_link_table_entry { #define HNS_ROCE_LINK_TABLE_NXT_PTR_M GENMASK(31, 20) struct hns_roce_v2_priv { + struct hnae3_handle *handle; struct hns_roce_v2_cmq cmq; struct hns_roce_link_table tsq; struct hns_roce_link_table tpq; @@ -1796,4 +1799,15 @@ struct hns_roce_sccc_clr_done { __le32 rsv[5]; }; +static inline void hns_roce_write64(struct hns_roce_dev *hr_dev, __le32 val[2], + void __iomem *dest) +{ + struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv; + struct hnae3_handle *handle = priv->handle; + const struct hnae3_ae_ops *ops = handle->ae_algo->ops; + + if (!hr_dev->dis_db && !ops->get_hw_reset_stat(handle)) + hns_roce_write64_k(val, dest); +} + #endif diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 8975f858b36f..a8ee2f6da967 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -78,9 +78,9 @@ static int hns_roce_hw2sw_srq(struct hns_roce_dev *dev, HNS_ROCE_CMD_TIMEOUT_MSECS); } -int hns_roce_srq_alloc(struct hns_roce_dev *hr_dev, u32 pdn, u32 cqn, u16 xrcd, - struct hns_roce_mtt *hr_mtt, u64 db_rec_addr, - struct hns_roce_srq *srq) +static int hns_roce_srq_alloc(struct hns_roce_dev *hr_dev, u32 pdn, u32 cqn, + u16 xrcd, struct hns_roce_mtt *hr_mtt, + u64 db_rec_addr, struct hns_roce_srq *srq) { struct hns_roce_srq_table *srq_table = &hr_dev->srq_table; struct hns_roce_cmd_mailbox *mailbox; @@ -155,7 +155,8 @@ err_out: return ret; } -void hns_roce_srq_free(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) +static void hns_roce_srq_free(struct hns_roce_dev *hr_dev, + struct hns_roce_srq *srq) { struct hns_roce_srq_table *srq_table = &hr_dev->srq_table; int ret; @@ -210,6 +211,7 @@ struct ib_srq *hns_roce_create_srq(struct ib_pd *pd, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(pd->device); + struct hns_roce_ib_create_srq_resp resp = {}; struct hns_roce_srq *srq; int srq_desc_size; int srq_buf_size; @@ -377,16 +379,21 @@ struct ib_srq *hns_roce_create_srq(struct ib_pd *pd, srq->event = hns_roce_ib_srq_event; srq->ibsrq.ext.xrc.srq_num = srq->srqn; + resp.srqn = srq->srqn; if (udata) { - if (ib_copy_to_udata(udata, &srq->srqn, sizeof(__u32))) { + if (ib_copy_to_udata(udata, &resp, + min(udata->outlen, sizeof(resp)))) { ret = -EFAULT; - goto err_wrid; + goto err_srqc_alloc; } } return &srq->ibsrq; +err_srqc_alloc: + hns_roce_srq_free(hr_dev, srq); + err_wrid: kvfree(srq->wrid); diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 12b31a8440be..d4ab46dd9e6c 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -2762,7 +2762,7 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev struct net_device *netdev = iwdev->netdev; struct pci_dev *pcidev = (struct pci_dev *)iwdev->hw.dev_context; - iwibdev = (struct i40iw_ib_device *)ib_alloc_device(sizeof(*iwibdev)); + iwibdev = ib_alloc_device(i40iw_ib_device, ibdev); if (!iwibdev) { i40iw_pr_err("iwdev == NULL\n"); return NULL; diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 25439da8976c..936ee1314bcd 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -1411,7 +1411,7 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, sqp_mad = (struct mlx4_mad_snd_buf *) (sqp->tx_ring[wire_tx_ix].buf.addr); if (sqp->tx_ring[wire_tx_ix].ah) - rdma_destroy_ah(sqp->tx_ring[wire_tx_ix].ah, 0); + mlx4_ib_destroy_ah(sqp->tx_ring[wire_tx_ix].ah, 0); sqp->tx_ring[wire_tx_ix].ah = ah; ib_dma_sync_single_for_cpu(&dev->ib_dev, sqp->tx_ring[wire_tx_ix].buf.map, @@ -1902,7 +1902,7 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work) if (wc.status == IB_WC_SUCCESS) { switch (wc.opcode) { case IB_WC_SEND: - rdma_destroy_ah(sqp->tx_ring[wc.wr_id & + mlx4_ib_destroy_ah(sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah, 0); sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah = NULL; @@ -1931,7 +1931,7 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work) " status = %d, wrid = 0x%llx\n", ctx->slave, wc.status, wc.wr_id); if (!MLX4_TUN_IS_RECV(wc.wr_id)) { - rdma_destroy_ah(sqp->tx_ring[wc.wr_id & + mlx4_ib_destroy_ah(sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah, 0); sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah = NULL; diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index dc2ffd293a11..d66002a31000 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2635,7 +2635,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) if (num_ports == 0) return NULL; - ibdev = (struct mlx4_ib_dev *) ib_alloc_device(sizeof *ibdev); + ibdev = ib_alloc_device(mlx4_ib_dev, ib_dev); if (!ibdev) { dev_err(&dev->persist->pdev->dev, "Device struct alloc failed\n"); diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index c283c32f30fe..18704e503508 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -187,8 +187,8 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe, wqe_ctr = be16_to_cpu(cqe->wqe_counter); wc->wr_id = srq->wrid[wqe_ctr]; mlx5_ib_free_srq_wqe(srq, wqe_ctr); - if (msrq && atomic_dec_and_test(&msrq->refcount)) - complete(&msrq->free); + if (msrq) + mlx5_core_res_put(&msrq->common); } } else { wq = &qp->rq; diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 12de63c63fa0..cd43e39ced87 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -1107,12 +1107,10 @@ static void devx_free_indirect_mkey(struct rcu_head *rcu) static void devx_cleanup_mkey(struct devx_obj *obj) { struct mlx5_mkey_table *table = &obj->mdev->priv.mkey_table; - struct mlx5_core_mkey *del_mkey; unsigned long flags; write_lock_irqsave(&table->lock, flags); - del_mkey = radix_tree_delete(&table->tree, - mlx5_base_mkey(obj->devx_mr.mmkey.key)); + radix_tree_delete(&table->tree, mlx5_base_mkey(obj->devx_mr.mmkey.key)); write_unlock_irqrestore(&table->lock, flags); } diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c index e8a1e4498e3f..798591a18484 100644 --- a/drivers/infiniband/hw/mlx5/flow.c +++ b/drivers/infiniband/hw/mlx5/flow.c @@ -630,8 +630,7 @@ const struct uapi_definition mlx5_ib_flow_defs[] = { UAPI_DEF_IS_OBJ_SUPPORTED(flow_is_supported)), UAPI_DEF_CHAIN_OBJ_TREE( UVERBS_OBJECT_FLOW, - &mlx5_ib_fs, - UAPI_DEF_IS_OBJ_SUPPORTED(flow_is_supported)), + &mlx5_ib_fs), UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION, &mlx5_ib_flow_actions), {}, diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c index 46a9ddc8ca56..6d7b8bad4b61 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.c +++ b/drivers/infiniband/hw/mlx5/ib_rep.c @@ -70,7 +70,7 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) { struct mlx5_ib_dev *ibdev; - ibdev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*ibdev)); + ibdev = ib_alloc_device(mlx5_ib_dev, ib_dev); if (!ibdev) return -ENOMEM; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 2aaa196c2653..76d6c2557d0c 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -476,24 +476,51 @@ out: return err; } +struct mlx5_ib_vlan_info { + u16 vlan_id; + bool vlan; +}; + +static int get_lower_dev_vlan(struct net_device *lower_dev, void *data) +{ + struct mlx5_ib_vlan_info *vlan_info = data; + + if (is_vlan_dev(lower_dev)) { + vlan_info->vlan = true; + vlan_info->vlan_id = vlan_dev_vlan_id(lower_dev); + } + /* We are interested only in first level vlan device, so + * always return 1 to stop iterating over next level devices. + */ + return 1; +} + static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num, unsigned int index, const union ib_gid *gid, const struct ib_gid_attr *attr) { enum ib_gid_type gid_type = IB_GID_TYPE_IB; + struct mlx5_ib_vlan_info vlan_info = { }; u8 roce_version = 0; u8 roce_l3_type = 0; - bool vlan = false; u8 mac[ETH_ALEN]; - u16 vlan_id = 0; if (gid) { gid_type = attr->gid_type; ether_addr_copy(mac, attr->ndev->dev_addr); if (is_vlan_dev(attr->ndev)) { - vlan = true; - vlan_id = vlan_dev_vlan_id(attr->ndev); + vlan_info.vlan = true; + vlan_info.vlan_id = vlan_dev_vlan_id(attr->ndev); + } else { + /* If the netdev is upper device and if it's lower + * lower device is vlan device, consider vlan id of + * the lower vlan device for this gid entry. + */ + rcu_read_lock(); + netdev_walk_all_lower_dev_rcu(attr->ndev, + get_lower_dev_vlan, &vlan_info); + rcu_read_unlock(); } } @@ -514,8 +541,9 @@ static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num, } return mlx5_core_roce_gid_set(dev->mdev, index, roce_version, - roce_l3_type, gid->raw, mac, vlan, - vlan_id, port_num); + roce_l3_type, gid->raw, mac, + vlan_info.vlan, vlan_info.vlan_id, + port_num); } static int mlx5_ib_add_gid(const struct ib_gid_attr *attr, @@ -6176,7 +6204,7 @@ static int mlx5_ib_stage_odp_init(struct mlx5_ib_dev *dev) return mlx5_ib_odp_init_one(dev); } -void mlx5_ib_stage_odp_cleanup(struct mlx5_ib_dev *dev) +static void mlx5_ib_stage_odp_cleanup(struct mlx5_ib_dev *dev) { mlx5_ib_odp_cleanup_one(dev); } @@ -6508,7 +6536,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) if (mlx5_core_is_mp_slave(mdev) && ll == IB_LINK_LAYER_ETHERNET) return mlx5_ib_add_slave_port(mdev); - dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev)); + dev = ib_alloc_device(mlx5_ib_dev, ib_dev); if (!dev) return NULL; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 7fcc3f095371..f9817284c7a3 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1075,9 +1075,12 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, const struct ib_send_wr **bad_wr); int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr); -int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index, - void *buffer, u32 length, - struct mlx5_ib_qp_base *base); +int mlx5_ib_read_user_wqe_sq(struct mlx5_ib_qp *qp, int wqe_index, void *buffer, + int buflen, size_t *bc); +int mlx5_ib_read_user_wqe_rq(struct mlx5_ib_qp *qp, int wqe_index, void *buffer, + int buflen, size_t *bc); +int mlx5_ib_read_user_wqe_srq(struct mlx5_ib_srq *srq, int wqe_index, + void *buffer, int buflen, size_t *bc); struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, struct ib_ucontext *context, diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 3e0d5885c026..335fd0c6ea2a 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -315,6 +315,9 @@ void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) if (MLX5_CAP_ODP(dev->mdev, ud_odp_caps.send)) caps->per_transport_caps.ud_odp_caps |= IB_ODP_SUPPORT_SEND; + if (MLX5_CAP_ODP(dev->mdev, ud_odp_caps.srq_receive)) + caps->per_transport_caps.ud_odp_caps |= IB_ODP_SUPPORT_SRQ_RECV; + if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.send)) caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_SEND; @@ -330,6 +333,27 @@ void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.atomic)) caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_ATOMIC; + if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.srq_receive)) + caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_SRQ_RECV; + + if (MLX5_CAP_ODP(dev->mdev, xrc_odp_caps.send)) + caps->per_transport_caps.xrc_odp_caps |= IB_ODP_SUPPORT_SEND; + + if (MLX5_CAP_ODP(dev->mdev, xrc_odp_caps.receive)) + caps->per_transport_caps.xrc_odp_caps |= IB_ODP_SUPPORT_RECV; + + if (MLX5_CAP_ODP(dev->mdev, xrc_odp_caps.write)) + caps->per_transport_caps.xrc_odp_caps |= IB_ODP_SUPPORT_WRITE; + + if (MLX5_CAP_ODP(dev->mdev, xrc_odp_caps.read)) + caps->per_transport_caps.xrc_odp_caps |= IB_ODP_SUPPORT_READ; + + if (MLX5_CAP_ODP(dev->mdev, xrc_odp_caps.atomic)) + caps->per_transport_caps.xrc_odp_caps |= IB_ODP_SUPPORT_ATOMIC; + + if (MLX5_CAP_ODP(dev->mdev, xrc_odp_caps.srq_receive)) + caps->per_transport_caps.xrc_odp_caps |= IB_ODP_SUPPORT_SRQ_RECV; + if (MLX5_CAP_GEN(dev->mdev, fixed_buffer_size) && MLX5_CAP_GEN(dev->mdev, null_mkey) && MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) @@ -869,7 +893,6 @@ srcu_unlock: /** * Parse a series of data segments for page fault handling. * - * @qp the QP on which the fault occurred. * @pfault contains page fault information. * @wqe points at the first data segment in the WQE. * @wqe_end points after the end of the WQE. @@ -886,7 +909,7 @@ srcu_unlock: */ static int pagefault_data_segments(struct mlx5_ib_dev *dev, struct mlx5_pagefault *pfault, - struct mlx5_ib_qp *qp, void *wqe, + void *wqe, void *wqe_end, u32 *bytes_mapped, u32 *total_wqe_bytes, int receive_queue) { @@ -897,10 +920,6 @@ static int pagefault_data_segments(struct mlx5_ib_dev *dev, size_t bcnt; int inline_segment; - /* Skip SRQ next-WQE segment. */ - if (receive_queue && qp->ibqp.srq) - wqe += sizeof(struct mlx5_wqe_srq_next_seg); - if (bytes_mapped) *bytes_mapped = 0; if (total_wqe_bytes) @@ -1025,6 +1044,10 @@ static int mlx5_ib_mr_initiator_pfault_handler( MLX5_WQE_CTRL_OPCODE_MASK; switch (qp->ibqp.qp_type) { + case IB_QPT_XRC_INI: + *wqe += sizeof(struct mlx5_wqe_xrc_seg); + transport_caps = dev->odp_caps.per_transport_caps.xrc_odp_caps; + break; case IB_QPT_RC: transport_caps = dev->odp_caps.per_transport_caps.rc_odp_caps; break; @@ -1044,7 +1067,7 @@ static int mlx5_ib_mr_initiator_pfault_handler( return -EFAULT; } - if (qp->ibqp.qp_type != IB_QPT_RC) { + if (qp->ibqp.qp_type == IB_QPT_UD) { av = *wqe; if (av->dqp_dct & cpu_to_be32(MLX5_EXTENDED_UD_AV)) *wqe += sizeof(struct mlx5_av); @@ -1069,21 +1092,34 @@ static int mlx5_ib_mr_initiator_pfault_handler( } /* - * Parse responder WQE. Advances the wqe pointer to point at the - * scatter-gather list, and set wqe_end to the end of the WQE. + * Parse responder WQE and set wqe_end to the end of the WQE. */ -static int mlx5_ib_mr_responder_pfault_handler( - struct mlx5_ib_dev *dev, struct mlx5_pagefault *pfault, - struct mlx5_ib_qp *qp, void **wqe, void **wqe_end, int wqe_length) +static int mlx5_ib_mr_responder_pfault_handler_srq(struct mlx5_ib_dev *dev, + struct mlx5_ib_srq *srq, + void **wqe, void **wqe_end, + int wqe_length) { - struct mlx5_ib_wq *wq = &qp->rq; - int wqe_size = 1 << wq->wqe_shift; + int wqe_size = 1 << srq->msrq.wqe_shift; - if (qp->ibqp.srq) { - mlx5_ib_err(dev, "ODP fault on SRQ is not supported\n"); + if (wqe_size > wqe_length) { + mlx5_ib_err(dev, "Couldn't read all of the receive WQE's content\n"); return -EFAULT; } + *wqe_end = *wqe + wqe_size; + *wqe += sizeof(struct mlx5_wqe_srq_next_seg); + + return 0; +} + +static int mlx5_ib_mr_responder_pfault_handler_rq(struct mlx5_ib_dev *dev, + struct mlx5_ib_qp *qp, + void *wqe, void **wqe_end, + int wqe_length) +{ + struct mlx5_ib_wq *wq = &qp->rq; + int wqe_size = 1 << wq->wqe_shift; + if (qp->wq_sig) { mlx5_ib_err(dev, "ODP fault with WQE signatures is not supported\n"); return -EFAULT; @@ -1107,7 +1143,7 @@ invalid_transport_or_opcode: return -EFAULT; } - *wqe_end = *wqe + wqe_size; + *wqe_end = wqe + wqe_size; return 0; } @@ -1115,22 +1151,25 @@ invalid_transport_or_opcode: static inline struct mlx5_core_rsc_common *odp_get_rsc(struct mlx5_ib_dev *dev, u32 wq_num, int pf_type) { - enum mlx5_res_type res_type; + struct mlx5_core_rsc_common *common = NULL; + struct mlx5_core_srq *srq; switch (pf_type) { case MLX5_WQE_PF_TYPE_RMP: - res_type = MLX5_RES_SRQ; + srq = mlx5_cmd_get_srq(dev, wq_num); + if (srq) + common = &srq->common; break; case MLX5_WQE_PF_TYPE_REQ_SEND_OR_WRITE: case MLX5_WQE_PF_TYPE_RESP: case MLX5_WQE_PF_TYPE_REQ_READ_OR_ATOMIC: - res_type = MLX5_RES_QP; + common = mlx5_core_res_hold(dev->mdev, wq_num, MLX5_RES_QP); break; default: - return NULL; + break; } - return mlx5_core_res_hold(dev->mdev, wq_num, res_type); + return common; } static inline struct mlx5_ib_qp *res_to_qp(struct mlx5_core_rsc_common *res) @@ -1140,6 +1179,14 @@ static inline struct mlx5_ib_qp *res_to_qp(struct mlx5_core_rsc_common *res) return to_mibqp(mqp); } +static inline struct mlx5_ib_srq *res_to_srq(struct mlx5_core_rsc_common *res) +{ + struct mlx5_core_srq *msrq = + container_of(res, struct mlx5_core_srq, common); + + return to_mibsrq(msrq); +} + static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev, struct mlx5_pagefault *pfault) { @@ -1150,8 +1197,10 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev, int resume_with_error = 1; u16 wqe_index = pfault->wqe.wqe_index; int requestor = pfault->type & MLX5_PFAULT_REQUESTOR; - struct mlx5_core_rsc_common *res; - struct mlx5_ib_qp *qp; + struct mlx5_core_rsc_common *res = NULL; + struct mlx5_ib_qp *qp = NULL; + struct mlx5_ib_srq *srq = NULL; + size_t bytes_copied; res = odp_get_rsc(dev, pfault->wqe.wq_num, pfault->type); if (!res) { @@ -1163,6 +1212,10 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev, case MLX5_RES_QP: qp = res_to_qp(res); break; + case MLX5_RES_SRQ: + case MLX5_RES_XSRQ: + srq = res_to_srq(res); + break; default: mlx5_ib_err(dev, "wqe page fault for unsupported type %d\n", pfault->type); goto resolve_page_fault; @@ -1174,9 +1227,23 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev, goto resolve_page_fault; } - ret = mlx5_ib_read_user_wqe(qp, requestor, wqe_index, buffer, - PAGE_SIZE, &qp->trans_qp.base); - if (ret < 0) { + if (qp) { + if (requestor) { + ret = mlx5_ib_read_user_wqe_sq(qp, wqe_index, + buffer, PAGE_SIZE, + &bytes_copied); + } else { + ret = mlx5_ib_read_user_wqe_rq(qp, wqe_index, + buffer, PAGE_SIZE, + &bytes_copied); + } + } else { + ret = mlx5_ib_read_user_wqe_srq(srq, wqe_index, + buffer, PAGE_SIZE, + &bytes_copied); + } + + if (ret) { mlx5_ib_err(dev, "Failed reading a WQE following page fault, error=%d, wqe_index=%x, qpn=%x\n", ret, wqe_index, pfault->token); goto resolve_page_fault; @@ -1184,11 +1251,18 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev, wqe = buffer; if (requestor) - ret = mlx5_ib_mr_initiator_pfault_handler(dev, pfault, qp, &wqe, - &wqe_end, ret); + ret = mlx5_ib_mr_initiator_pfault_handler(dev, pfault, qp, + &wqe, &wqe_end, + bytes_copied); + else if (qp) + ret = mlx5_ib_mr_responder_pfault_handler_rq(dev, qp, + wqe, &wqe_end, + bytes_copied); else - ret = mlx5_ib_mr_responder_pfault_handler(dev, pfault, qp, &wqe, - &wqe_end, ret); + ret = mlx5_ib_mr_responder_pfault_handler_srq(dev, srq, + &wqe, &wqe_end, + bytes_copied); + if (ret < 0) goto resolve_page_fault; @@ -1197,7 +1271,7 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev, goto resolve_page_fault; } - ret = pagefault_data_segments(dev, pfault, qp, wqe, wqe_end, + ret = pagefault_data_segments(dev, pfault, wqe, wqe_end, &bytes_mapped, &total_wqe_bytes, !requestor); if (ret == -EAGAIN) { @@ -1611,10 +1685,12 @@ static void mlx5_ib_prefetch_mr_work(struct work_struct *work) struct prefetch_mr_work *w = container_of(work, struct prefetch_mr_work, work); - if (w->dev->ib_dev.reg_state == IB_DEV_REGISTERED) + if (ib_device_try_get(&w->dev->ib_dev)) { mlx5_ib_prefetch_sg_list(w->dev, w->pf_flags, w->sg_list, w->num_sge); - + ib_device_put(&w->dev->ib_dev); + } + put_device(&w->dev->ib_dev.dev); kfree(w); } @@ -1633,15 +1709,13 @@ int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, return mlx5_ib_prefetch_sg_list(dev, pf_flags, sg_list, num_sge); - if (dev->ib_dev.reg_state != IB_DEV_REGISTERED) - return -ENODEV; - work = kvzalloc(struct_size(work, sg_list, num_sge), GFP_KERNEL); if (!work) return -ENOMEM; memcpy(work->sg_list, sg_list, num_sge * sizeof(struct ib_sge)); + get_device(&dev->ib_dev.dev); work->dev = dev; work->pf_flags = pf_flags; work->num_sge = num_sge; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 529e76f67cb6..4f2bc101b061 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -109,75 +109,173 @@ static int is_sqp(enum ib_qp_type qp_type) } /** - * mlx5_ib_read_user_wqe() - Copy a user-space WQE to kernel space. + * mlx5_ib_read_user_wqe_common() - Copy a WQE (or part of) from user WQ + * to kernel buffer * - * @qp: QP to copy from. - * @send: copy from the send queue when non-zero, use the receive queue - * otherwise. - * @wqe_index: index to start copying from. For send work queues, the - * wqe_index is in units of MLX5_SEND_WQE_BB. - * For receive work queue, it is the number of work queue - * element in the queue. - * @buffer: destination buffer. - * @length: maximum number of bytes to copy. + * @umem: User space memory where the WQ is + * @buffer: buffer to copy to + * @buflen: buffer length + * @wqe_index: index of WQE to copy from + * @wq_offset: offset to start of WQ + * @wq_wqe_cnt: number of WQEs in WQ + * @wq_wqe_shift: log2 of WQE size + * @bcnt: number of bytes to copy + * @bytes_copied: number of bytes to copy (return value) * - * Copies at least a single WQE, but may copy more data. + * Copies from start of WQE bcnt or less bytes. + * Does not gurantee to copy the entire WQE. * - * Return: the number of bytes copied, or an error code. + * Return: zero on success, or an error code. */ -int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index, - void *buffer, u32 length, - struct mlx5_ib_qp_base *base) +static int mlx5_ib_read_user_wqe_common(struct ib_umem *umem, + void *buffer, + u32 buflen, + int wqe_index, + int wq_offset, + int wq_wqe_cnt, + int wq_wqe_shift, + int bcnt, + size_t *bytes_copied) +{ + size_t offset = wq_offset + ((wqe_index % wq_wqe_cnt) << wq_wqe_shift); + size_t wq_end = wq_offset + (wq_wqe_cnt << wq_wqe_shift); + size_t copy_length; + int ret; + + /* don't copy more than requested, more than buffer length or + * beyond WQ end + */ + copy_length = min_t(u32, buflen, wq_end - offset); + copy_length = min_t(u32, copy_length, bcnt); + + ret = ib_umem_copy_from(buffer, umem, offset, copy_length); + if (ret) + return ret; + + if (!ret && bytes_copied) + *bytes_copied = copy_length; + + return 0; +} + +int mlx5_ib_read_user_wqe_sq(struct mlx5_ib_qp *qp, + int wqe_index, + void *buffer, + int buflen, + size_t *bc) { - struct ib_device *ibdev = qp->ibqp.device; - struct mlx5_ib_dev *dev = to_mdev(ibdev); - struct mlx5_ib_wq *wq = send ? &qp->sq : &qp->rq; - size_t offset; - size_t wq_end; + struct mlx5_ib_qp_base *base = &qp->trans_qp.base; struct ib_umem *umem = base->ubuffer.umem; - u32 first_copy_length; - int wqe_length; + struct mlx5_ib_wq *wq = &qp->sq; + struct mlx5_wqe_ctrl_seg *ctrl; + size_t bytes_copied; + size_t bytes_copied2; + size_t wqe_length; int ret; + int ds; - if (wq->wqe_cnt == 0) { - mlx5_ib_dbg(dev, "mlx5_ib_read_user_wqe for a QP with wqe_cnt == 0. qp_type: 0x%x\n", - qp->ibqp.qp_type); + if (buflen < sizeof(*ctrl)) return -EINVAL; - } - offset = wq->offset + ((wqe_index % wq->wqe_cnt) << wq->wqe_shift); - wq_end = wq->offset + (wq->wqe_cnt << wq->wqe_shift); + /* at first read as much as possible */ + ret = mlx5_ib_read_user_wqe_common(umem, + buffer, + buflen, + wqe_index, + wq->offset, + wq->wqe_cnt, + wq->wqe_shift, + buflen, + &bytes_copied); + if (ret) + return ret; - if (send && length < sizeof(struct mlx5_wqe_ctrl_seg)) + /* we need at least control segment size to proceed */ + if (bytes_copied < sizeof(*ctrl)) return -EINVAL; - if (offset > umem->length || - (send && offset + sizeof(struct mlx5_wqe_ctrl_seg) > umem->length)) - return -EINVAL; + ctrl = buffer; + ds = be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_DS_MASK; + wqe_length = ds * MLX5_WQE_DS_UNITS; + + /* if we copied enough then we are done */ + if (bytes_copied >= wqe_length) { + *bc = bytes_copied; + return 0; + } + + /* otherwise this a wrapped around wqe + * so read the remaining bytes starting + * from wqe_index 0 + */ + ret = mlx5_ib_read_user_wqe_common(umem, + buffer + bytes_copied, + buflen - bytes_copied, + 0, + wq->offset, + wq->wqe_cnt, + wq->wqe_shift, + wqe_length - bytes_copied, + &bytes_copied2); - first_copy_length = min_t(u32, offset + length, wq_end) - offset; - ret = ib_umem_copy_from(buffer, umem, offset, first_copy_length); if (ret) return ret; + *bc = bytes_copied + bytes_copied2; + return 0; +} - if (send) { - struct mlx5_wqe_ctrl_seg *ctrl = buffer; - int ds = be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_DS_MASK; - - wqe_length = ds * MLX5_WQE_DS_UNITS; - } else { - wqe_length = 1 << wq->wqe_shift; - } +int mlx5_ib_read_user_wqe_rq(struct mlx5_ib_qp *qp, + int wqe_index, + void *buffer, + int buflen, + size_t *bc) +{ + struct mlx5_ib_qp_base *base = &qp->trans_qp.base; + struct ib_umem *umem = base->ubuffer.umem; + struct mlx5_ib_wq *wq = &qp->rq; + size_t bytes_copied; + int ret; - if (wqe_length <= first_copy_length) - return first_copy_length; + ret = mlx5_ib_read_user_wqe_common(umem, + buffer, + buflen, + wqe_index, + wq->offset, + wq->wqe_cnt, + wq->wqe_shift, + buflen, + &bytes_copied); - ret = ib_umem_copy_from(buffer + first_copy_length, umem, wq->offset, - wqe_length - first_copy_length); if (ret) return ret; + *bc = bytes_copied; + return 0; +} + +int mlx5_ib_read_user_wqe_srq(struct mlx5_ib_srq *srq, + int wqe_index, + void *buffer, + int buflen, + size_t *bc) +{ + struct ib_umem *umem = srq->umem; + size_t bytes_copied; + int ret; + + ret = mlx5_ib_read_user_wqe_common(umem, + buffer, + buflen, + wqe_index, + 0, + srq->msrq.max, + srq->msrq.wqe_shift, + buflen, + &bytes_copied); - return wqe_length; + if (ret) + return ret; + *bc = bytes_copied; + return 0; } static void mlx5_ib_qp_event(struct mlx5_core_qp *qp, int type) @@ -435,9 +533,9 @@ static int set_user_buf_size(struct mlx5_ib_dev *dev, return -EINVAL; } - if (ucmd->sq_wqe_count && ((1 << ilog2(ucmd->sq_wqe_count)) != ucmd->sq_wqe_count)) { - mlx5_ib_warn(dev, "sq_wqe_count %d, sq_wqe_count %d\n", - ucmd->sq_wqe_count, ucmd->sq_wqe_count); + if (ucmd->sq_wqe_count && !is_power_of_2(ucmd->sq_wqe_count)) { + mlx5_ib_warn(dev, "sq_wqe_count %d is not a power of two\n", + ucmd->sq_wqe_count); return -EINVAL; } @@ -1908,14 +2006,16 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, } if (!check_flags_mask(ucmd.flags, + MLX5_QP_FLAG_ALLOW_SCATTER_CQE | + MLX5_QP_FLAG_BFREG_INDEX | + MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE | + MLX5_QP_FLAG_SCATTER_CQE | MLX5_QP_FLAG_SIGNATURE | - MLX5_QP_FLAG_SCATTER_CQE | - MLX5_QP_FLAG_TUNNEL_OFFLOADS | - MLX5_QP_FLAG_BFREG_INDEX | - MLX5_QP_FLAG_TYPE_DCT | - MLX5_QP_FLAG_TYPE_DCI | - MLX5_QP_FLAG_ALLOW_SCATTER_CQE | - MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE)) + MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC | + MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC | + MLX5_QP_FLAG_TUNNEL_OFFLOADS | + MLX5_QP_FLAG_TYPE_DCI | + MLX5_QP_FLAG_TYPE_DCT)) return -EINVAL; err = get_qp_user_index(to_mucontext(pd->uobject->context), @@ -2647,10 +2747,10 @@ int mlx5_ib_destroy_qp(struct ib_qp *qp) static int to_mlx5_access_flags(struct mlx5_ib_qp *qp, const struct ib_qp_attr *attr, - int attr_mask, __be32 *hw_access_flags) + int attr_mask, __be32 *hw_access_flags_be) { u8 dest_rd_atomic; - u32 access_flags; + u32 access_flags, hw_access_flags = 0; struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.device); @@ -2668,7 +2768,7 @@ static int to_mlx5_access_flags(struct mlx5_ib_qp *qp, access_flags &= IB_ACCESS_REMOTE_WRITE; if (access_flags & IB_ACCESS_REMOTE_READ) - *hw_access_flags |= MLX5_QP_BIT_RRE; + hw_access_flags |= MLX5_QP_BIT_RRE; if (access_flags & IB_ACCESS_REMOTE_ATOMIC) { int atomic_mode; @@ -2676,14 +2776,14 @@ static int to_mlx5_access_flags(struct mlx5_ib_qp *qp, if (atomic_mode < 0) return -EOPNOTSUPP; - *hw_access_flags |= MLX5_QP_BIT_RAE; - *hw_access_flags |= atomic_mode << MLX5_ATOMIC_MODE_OFFSET; + hw_access_flags |= MLX5_QP_BIT_RAE; + hw_access_flags |= atomic_mode << MLX5_ATOMIC_MODE_OFFSET; } if (access_flags & IB_ACCESS_REMOTE_WRITE) - *hw_access_flags |= MLX5_QP_BIT_RWE; + hw_access_flags |= MLX5_QP_BIT_RWE; - *hw_access_flags = cpu_to_be32(*hw_access_flags); + *hw_access_flags_be = cpu_to_be32(hw_access_flags); return 0; } @@ -3384,7 +3484,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, } if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) { - __be32 access_flags = 0; + __be32 access_flags; err = to_mlx5_access_flags(qp, attr, attr_mask, &access_flags); if (err) diff --git a/drivers/infiniband/hw/mlx5/srq.h b/drivers/infiniband/hw/mlx5/srq.h index 75eb5839ae95..c330af35ff10 100644 --- a/drivers/infiniband/hw/mlx5/srq.h +++ b/drivers/infiniband/hw/mlx5/srq.h @@ -46,8 +46,6 @@ struct mlx5_core_srq { int wqe_shift; void (*event)(struct mlx5_core_srq *srq, enum mlx5_event e); - atomic_t refcount; - struct completion free; u16 uid; }; diff --git a/drivers/infiniband/hw/mlx5/srq_cmd.c b/drivers/infiniband/hw/mlx5/srq_cmd.c index 7aaaffbd4afa..63ac38bb3498 100644 --- a/drivers/infiniband/hw/mlx5/srq_cmd.c +++ b/drivers/infiniband/hw/mlx5/srq_cmd.c @@ -87,7 +87,7 @@ struct mlx5_core_srq *mlx5_cmd_get_srq(struct mlx5_ib_dev *dev, u32 srqn) srq = radix_tree_lookup(&table->tree, srqn); if (srq) - atomic_inc(&srq->refcount); + atomic_inc(&srq->common.refcount); spin_unlock(&table->lock); @@ -594,8 +594,8 @@ int mlx5_cmd_create_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, if (err) return err; - atomic_set(&srq->refcount, 1); - init_completion(&srq->free); + atomic_set(&srq->common.refcount, 1); + init_completion(&srq->common.free); spin_lock_irq(&table->lock); err = radix_tree_insert(&table->tree, srq->srqn, srq); @@ -627,9 +627,8 @@ int mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq) if (err) return err; - if (atomic_dec_and_test(&srq->refcount)) - complete(&srq->free); - wait_for_completion(&srq->free); + mlx5_core_res_put(&srq->common); + wait_for_completion(&srq->common.free); return 0; } @@ -685,7 +684,7 @@ static int srq_event_notifier(struct notifier_block *nb, srq = radix_tree_lookup(&table->tree, srqn); if (srq) - atomic_inc(&srq->refcount); + atomic_inc(&srq->common.refcount); spin_unlock(&table->lock); @@ -694,8 +693,7 @@ static int srq_event_notifier(struct notifier_block *nb, srq->event(srq, eqe->type); - if (atomic_dec_and_test(&srq->refcount)) - complete(&srq->free); + mlx5_core_res_put(&srq->common); return NOTIFY_OK; } diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index 92c49bff22bc..fe9654a7af71 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -961,7 +961,7 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type) /* We can handle large RDMA requests, so allow larger segments. */ dma_set_max_seg_size(&pdev->dev, 1024 * 1024 * 1024); - mdev = (struct mthca_dev *) ib_alloc_device(sizeof *mdev); + mdev = ib_alloc_device(mthca_dev, ib_dev); if (!mdev) { dev_err(&pdev->dev, "Device struct alloc failed, " "aborting.\n"); diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 63003b4d2485..1bb67562c8c8 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -534,7 +534,7 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd, { struct mthca_ucontext *context; - qp = kmalloc(sizeof *qp, GFP_KERNEL); + qp = kzalloc(sizeof(*qp), GFP_KERNEL); if (!qp) return ERR_PTR(-ENOMEM); @@ -600,7 +600,7 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd, if (udata) return ERR_PTR(-EINVAL); - qp = kmalloc(sizeof (struct mthca_sqp), GFP_KERNEL); + qp = kzalloc(sizeof(struct mthca_sqp), GFP_KERNEL); if (!qp) return ERR_PTR(-ENOMEM); diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 034156f7e9ed..6eb991d40035 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -3669,7 +3669,7 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev) struct nes_vnic *nesvnic = netdev_priv(netdev); struct nes_device *nesdev = nesvnic->nesdev; - nesibdev = (struct nes_ib_device *)ib_alloc_device(sizeof(struct nes_ib_device)); + nesibdev = ib_alloc_device(nes_ib_device, ibdev); if (nesibdev == NULL) { return NULL; } diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index b0491b9ecfe4..88970a6bb555 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -297,7 +297,7 @@ static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info) u8 lstate = 0; struct ocrdma_dev *dev; - dev = (struct ocrdma_dev *)ib_alloc_device(sizeof(struct ocrdma_dev)); + dev = ib_alloc_device(ocrdma_dev, ibdev); if (!dev) { pr_err("Unable to allocate ib device\n"); return NULL; diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index f85e72b65a10..878e9e23652b 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -853,7 +853,7 @@ static struct qedr_dev *qedr_add(struct qed_dev *cdev, struct pci_dev *pdev, struct qedr_dev *dev; int rc = 0; - dev = (struct qedr_dev *)ib_alloc_device(sizeof(*dev)); + dev = ib_alloc_device(qedr_dev, ibdev); if (!dev) { pr_err("Unable to allocate ib device\n"); return NULL; diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c index 6668bbf4e96d..5cdedba2d164 100644 --- a/drivers/infiniband/hw/qib/qib_ud.c +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -508,7 +508,6 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct ib_header *hdr, opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) { wc.ex.imm_data = ohdr->u.ud.imm_data; wc.wc_flags = IB_WC_WITH_IMM; - tlen -= sizeof(u32); } else if (opcode == IB_OPCODE_UD_SEND_ONLY) { wc.ex.imm_data = 0; wc.wc_flags = 0; diff --git a/drivers/infiniband/hw/qib/qib_user_pages.c b/drivers/infiniband/hw/qib/qib_user_pages.c index 075f09fb7ce3..ef8bcf366ddc 100644 --- a/drivers/infiniband/hw/qib/qib_user_pages.c +++ b/drivers/infiniband/hw/qib/qib_user_pages.c @@ -49,43 +49,6 @@ static void __qib_release_user_pages(struct page **p, size_t num_pages, } } -/* - * Call with current->mm->mmap_sem held. - */ -static int __qib_get_user_pages(unsigned long start_page, size_t num_pages, - struct page **p) -{ - unsigned long lock_limit; - size_t got; - int ret; - - lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; - - if (num_pages > lock_limit && !capable(CAP_IPC_LOCK)) { - ret = -ENOMEM; - goto bail; - } - - for (got = 0; got < num_pages; got += ret) { - ret = get_user_pages_longterm(start_page + got * PAGE_SIZE, - num_pages - got, - FOLL_WRITE | FOLL_FORCE, - p + got, NULL); - if (ret < 0) - goto bail_release; - } - - current->mm->pinned_vm += num_pages; - - ret = 0; - goto bail; - -bail_release: - __qib_release_user_pages(p, got, 0); -bail: - return ret; -} - /** * qib_map_page - a safety wrapper around pci_map_page() * @@ -137,26 +100,44 @@ int qib_map_page(struct pci_dev *hwdev, struct page *page, dma_addr_t *daddr) int qib_get_user_pages(unsigned long start_page, size_t num_pages, struct page **p) { + unsigned long locked, lock_limit; + size_t got; int ret; - down_write(¤t->mm->mmap_sem); + lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; + locked = atomic64_add_return(num_pages, ¤t->mm->pinned_vm); - ret = __qib_get_user_pages(start_page, num_pages, p); + if (num_pages > lock_limit && !capable(CAP_IPC_LOCK)) { + ret = -ENOMEM; + goto bail; + } - up_write(¤t->mm->mmap_sem); + down_read(¤t->mm->mmap_sem); + for (got = 0; got < num_pages; got += ret) { + ret = get_user_pages_longterm(start_page + got * PAGE_SIZE, + num_pages - got, + FOLL_WRITE | FOLL_FORCE, + p + got, NULL); + if (ret < 0) { + up_read(¤t->mm->mmap_sem); + goto bail_release; + } + } + up_read(¤t->mm->mmap_sem); + return 0; +bail_release: + __qib_release_user_pages(p, got, 0); +bail: + atomic64_sub(num_pages, ¤t->mm->pinned_vm); return ret; } void qib_release_user_pages(struct page **p, size_t num_pages) { - if (current->mm) /* during close after signal, mm can be NULL */ - down_write(¤t->mm->mmap_sem); - __qib_release_user_pages(p, num_pages, 1); - if (current->mm) { - current->mm->pinned_vm -= num_pages; - up_write(¤t->mm->mmap_sem); - } + /* during close after signal, mm can be NULL */ + if (current->mm) + atomic64_sub(num_pages, ¤t->mm->pinned_vm); } diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index 3201dd1899c7..577d9301251a 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -333,32 +333,25 @@ static void usnic_get_dev_fw_str(struct ib_device *device, char *str) static const struct ib_device_ops usnic_dev_ops = { .alloc_pd = usnic_ib_alloc_pd, .alloc_ucontext = usnic_ib_alloc_ucontext, - .create_ah = usnic_ib_create_ah, .create_cq = usnic_ib_create_cq, .create_qp = usnic_ib_create_qp, .dealloc_pd = usnic_ib_dealloc_pd, .dealloc_ucontext = usnic_ib_dealloc_ucontext, .dereg_mr = usnic_ib_dereg_mr, - .destroy_ah = usnic_ib_destroy_ah, .destroy_cq = usnic_ib_destroy_cq, .destroy_qp = usnic_ib_destroy_qp, .get_dev_fw_str = usnic_get_dev_fw_str, - .get_dma_mr = usnic_ib_get_dma_mr, .get_link_layer = usnic_ib_port_link_layer, .get_netdev = usnic_get_netdev, .get_port_immutable = usnic_port_immutable, .mmap = usnic_ib_mmap, .modify_qp = usnic_ib_modify_qp, - .poll_cq = usnic_ib_poll_cq, - .post_recv = usnic_ib_post_recv, - .post_send = usnic_ib_post_send, .query_device = usnic_ib_query_device, .query_gid = usnic_ib_query_gid, .query_pkey = usnic_ib_query_pkey, .query_port = usnic_ib_query_port, .query_qp = usnic_ib_query_qp, .reg_user_mr = usnic_ib_reg_mr, - .req_notify_cq = usnic_ib_req_notify_cq, }; /* Start of PF discovery section */ @@ -372,7 +365,7 @@ static void *usnic_ib_device_add(struct pci_dev *dev) usnic_dbg("\n"); netdev = pci_get_drvdata(dev); - us_ibdev = (struct usnic_ib_dev *)ib_alloc_device(sizeof(*us_ibdev)); + us_ibdev = ib_alloc_device(usnic_ib_dev, ib_dev); if (!us_ibdev) { usnic_err("Device %s context alloc failed\n", netdev_name(pci_get_drvdata(dev))); @@ -691,7 +684,6 @@ out_unreg_netdev_notifier: out_pci_unreg: pci_unregister_driver(&usnic_ib_pci_driver); out_umem_fini: - usnic_uiom_fini(); return err; } @@ -704,7 +696,6 @@ static void __exit usnic_ib_destroy(void) unregister_inetaddr_notifier(&usnic_ib_inetaddr_notifier); unregister_netdevice_notifier(&usnic_ib_netdevice_notifier); pci_unregister_driver(&usnic_ib_pci_driver); - usnic_uiom_fini(); } MODULE_DESCRIPTION("Cisco VIC (usNIC) Verbs Driver"); diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index 432e6f6599fa..9dea18106247 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -760,57 +760,4 @@ int usnic_ib_mmap(struct ib_ucontext *context, return -EINVAL; } -/* In ib callbacks section - Start of stub funcs */ -struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd, - struct rdma_ah_attr *ah_attr, - u32 flags, - struct ib_udata *udata) - -{ - usnic_dbg("\n"); - return ERR_PTR(-EPERM); -} - -int usnic_ib_destroy_ah(struct ib_ah *ah, u32 flags) -{ - usnic_dbg("\n"); - return -EINVAL; -} - -int usnic_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, - const struct ib_send_wr **bad_wr) -{ - usnic_dbg("\n"); - return -EINVAL; -} - -int usnic_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, - const struct ib_recv_wr **bad_wr) -{ - usnic_dbg("\n"); - return -EINVAL; -} - -int usnic_ib_poll_cq(struct ib_cq *ibcq, int num_entries, - struct ib_wc *wc) -{ - usnic_dbg("\n"); - return -EINVAL; -} - -int usnic_ib_req_notify_cq(struct ib_cq *cq, - enum ib_cq_notify_flags flags) -{ - usnic_dbg("\n"); - return -EINVAL; -} - -struct ib_mr *usnic_ib_get_dma_mr(struct ib_pd *pd, int acc) -{ - usnic_dbg("\n"); - return ERR_PTR(-ENOMEM); -} - - -/* In ib callbacks section - End of stub funcs */ /* End of ib callbacks section */ diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h index e33144261b9a..99a6d81c2bcd 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h @@ -75,19 +75,4 @@ struct ib_ucontext *usnic_ib_alloc_ucontext(struct ib_device *ibdev, int usnic_ib_dealloc_ucontext(struct ib_ucontext *ibcontext); int usnic_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); -struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd, - struct rdma_ah_attr *ah_attr, - u32 flags, - struct ib_udata *udata); - -int usnic_ib_destroy_ah(struct ib_ah *ah, u32 flags); -int usnic_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, - const struct ib_send_wr **bad_wr); -int usnic_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, - const struct ib_recv_wr **bad_wr); -int usnic_ib_poll_cq(struct ib_cq *ibcq, int num_entries, - struct ib_wc *wc); -int usnic_ib_req_notify_cq(struct ib_cq *cq, - enum ib_cq_notify_flags flags); -struct ib_mr *usnic_ib_get_dma_mr(struct ib_pd *pd, int acc); #endif /* !USNIC_IB_VERBS_H */ diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c index ce01a59fccc4..06862a6af185 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom.c +++ b/drivers/infiniband/hw/usnic/usnic_uiom.c @@ -47,8 +47,6 @@ #include "usnic_uiom.h" #include "usnic_uiom_interval_tree.h" -static struct workqueue_struct *usnic_uiom_wq; - #define USNIC_UIOM_PAGE_CHUNK \ ((PAGE_SIZE - offsetof(struct usnic_uiom_chunk, page_list)) /\ ((void *) &((struct usnic_uiom_chunk *) 0)->page_list[1] - \ @@ -127,9 +125,9 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable, npages = PAGE_ALIGN(size + (addr & ~PAGE_MASK)) >> PAGE_SHIFT; uiomr->owning_mm = mm = current->mm; - down_write(&mm->mmap_sem); + down_read(&mm->mmap_sem); - locked = npages + current->mm->pinned_vm; + locked = atomic64_add_return(npages, ¤t->mm->pinned_vm); lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) { @@ -184,14 +182,13 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable, } out: - if (ret < 0) + if (ret < 0) { usnic_uiom_put_pages(chunk_list, 0); - else { - mm->pinned_vm = locked; + atomic64_sub(npages, ¤t->mm->pinned_vm); + } else mmgrab(uiomr->owning_mm); - } - up_write(&mm->mmap_sem); + up_read(&mm->mmap_sem); free_page((unsigned long) page_list); return ret; } @@ -435,43 +432,12 @@ static inline size_t usnic_uiom_num_pages(struct usnic_uiom_reg *uiomr) return PAGE_ALIGN(uiomr->length + uiomr->offset) >> PAGE_SHIFT; } -static void usnic_uiom_release_defer(struct work_struct *work) -{ - struct usnic_uiom_reg *uiomr = - container_of(work, struct usnic_uiom_reg, work); - - down_write(&uiomr->owning_mm->mmap_sem); - uiomr->owning_mm->pinned_vm -= usnic_uiom_num_pages(uiomr); - up_write(&uiomr->owning_mm->mmap_sem); - - __usnic_uiom_release_tail(uiomr); -} - void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, struct ib_ucontext *context) { __usnic_uiom_reg_release(uiomr->pd, uiomr, 1); - /* - * We may be called with the mm's mmap_sem already held. This - * can happen when a userspace munmap() is the call that drops - * the last reference to our file and calls our release - * method. If there are memory regions to destroy, we'll end - * up here and not be able to take the mmap_sem. In that case - * we defer the vm_locked accounting to a workqueue. - */ - if (context->closing) { - if (!down_write_trylock(&uiomr->owning_mm->mmap_sem)) { - INIT_WORK(&uiomr->work, usnic_uiom_release_defer); - queue_work(usnic_uiom_wq, &uiomr->work); - return; - } - } else { - down_write(&uiomr->owning_mm->mmap_sem); - } - uiomr->owning_mm->pinned_vm -= usnic_uiom_num_pages(uiomr); - up_write(&uiomr->owning_mm->mmap_sem); - + atomic64_sub(usnic_uiom_num_pages(uiomr), &uiomr->owning_mm->pinned_vm); __usnic_uiom_release_tail(uiomr); } @@ -600,17 +566,5 @@ int usnic_uiom_init(char *drv_name) return -EPERM; } - usnic_uiom_wq = create_workqueue(drv_name); - if (!usnic_uiom_wq) { - usnic_err("Unable to alloc wq for drv %s\n", drv_name); - return -ENOMEM; - } - return 0; } - -void usnic_uiom_fini(void) -{ - flush_workqueue(usnic_uiom_wq); - destroy_workqueue(usnic_uiom_wq); -} diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.h b/drivers/infiniband/hw/usnic/usnic_uiom.h index b86a9731071b..c88cfa087e3a 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom.h +++ b/drivers/infiniband/hw/usnic/usnic_uiom.h @@ -93,5 +93,4 @@ struct usnic_uiom_reg *usnic_uiom_reg_get(struct usnic_uiom_pd *pd, void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, struct ib_ucontext *ucontext); int usnic_uiom_init(char *drv_name); -void usnic_uiom_fini(void); #endif /* USNIC_UIOM_H_ */ diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h index 42b8685c997e..3c633ab58052 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h @@ -427,7 +427,40 @@ static inline enum ib_qp_state pvrdma_qp_state_to_ib(enum pvrdma_qp_state state) static inline enum pvrdma_wr_opcode ib_wr_opcode_to_pvrdma(enum ib_wr_opcode op) { - return (enum pvrdma_wr_opcode)op; + switch (op) { + case IB_WR_RDMA_WRITE: + return PVRDMA_WR_RDMA_WRITE; + case IB_WR_RDMA_WRITE_WITH_IMM: + return PVRDMA_WR_RDMA_WRITE_WITH_IMM; + case IB_WR_SEND: + return PVRDMA_WR_SEND; + case IB_WR_SEND_WITH_IMM: + return PVRDMA_WR_SEND_WITH_IMM; + case IB_WR_RDMA_READ: + return PVRDMA_WR_RDMA_READ; + case IB_WR_ATOMIC_CMP_AND_SWP: + return PVRDMA_WR_ATOMIC_CMP_AND_SWP; + case IB_WR_ATOMIC_FETCH_AND_ADD: + return PVRDMA_WR_ATOMIC_FETCH_AND_ADD; + case IB_WR_LSO: + return PVRDMA_WR_LSO; + case IB_WR_SEND_WITH_INV: + return PVRDMA_WR_SEND_WITH_INV; + case IB_WR_RDMA_READ_WITH_INV: + return PVRDMA_WR_RDMA_READ_WITH_INV; + case IB_WR_LOCAL_INV: + return PVRDMA_WR_LOCAL_INV; + case IB_WR_REG_MR: + return PVRDMA_WR_FAST_REG_MR; + case IB_WR_MASKED_ATOMIC_CMP_AND_SWP: + return PVRDMA_WR_MASKED_ATOMIC_CMP_AND_SWP; + case IB_WR_MASKED_ATOMIC_FETCH_AND_ADD: + return PVRDMA_WR_MASKED_ATOMIC_FETCH_AND_ADD; + case IB_WR_REG_SIG_MR: + return PVRDMA_WR_REG_SIG_MR; + default: + return PVRDMA_WR_ERROR; + } } static inline enum ib_wc_status pvrdma_wc_status_to_ib( diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index a5f02276d903..e582beaf9430 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -795,7 +795,7 @@ static int pvrdma_pci_probe(struct pci_dev *pdev, dev_dbg(&pdev->dev, "initializing driver %s\n", pci_name(pdev)); /* Allocate zero-out device */ - dev = (struct pvrdma_dev *)ib_alloc_device(sizeof(*dev)); + dev = ib_alloc_device(pvrdma_dev, ib_dev); if (!dev) { dev_err(&pdev->dev, "failed to allocate IB device\n"); return -ENOMEM; diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c index 5fc444cef011..08f4257169bd 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c @@ -719,6 +719,12 @@ int pvrdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) wqe_hdr->ex.imm_data = wr->ex.imm_data; + if (unlikely(wqe_hdr->opcode == PVRDMA_WR_ERROR)) { + *bad_wr = wr; + ret = -EINVAL; + goto out; + } + switch (qp->ibqp.qp_type) { case IB_QPT_GSI: case IB_QPT_UD: diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 14ec2577bcaa..6b1b2b75ef60 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -2912,6 +2912,8 @@ send: goto op_err; if (!ret) goto rnr_nak; + if (wqe->length > qp->r_len) + goto inv_err; break; case IB_WR_RDMA_WRITE_WITH_IMM: @@ -3058,7 +3060,10 @@ op_err: goto err; inv_err: - send_status = IB_WC_REM_INV_REQ_ERR; + send_status = + sqp->ibqp.qp_type == IB_QPT_RC ? + IB_WC_REM_INV_REQ_ERR : + IB_WC_SUCCESS; wc.status = IB_WC_LOC_QP_OP_ERR; goto err; diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 7de7389d0235..b3f0c5578925 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -91,7 +91,7 @@ struct rvt_dev_info *rvt_alloc_device(size_t size, int nports) { struct rvt_dev_info *rdi; - rdi = (struct rvt_dev_info *)ib_alloc_device(size); + rdi = container_of(_ib_alloc_device(size), struct rvt_dev_info, ibdev); if (!rdi) return rdi; diff --git a/drivers/infiniband/sw/rxe/rxe_av.c b/drivers/infiniband/sw/rxe/rxe_av.c index 26fe8d7dbc55..81ee756c19b8 100644 --- a/drivers/infiniband/sw/rxe/rxe_av.c +++ b/drivers/infiniband/sw/rxe/rxe_av.c @@ -34,6 +34,13 @@ #include "rxe.h" #include "rxe_loc.h" +void rxe_init_av(struct rdma_ah_attr *attr, struct rxe_av *av) +{ + rxe_av_from_attr(rdma_ah_get_port_num(attr), av, attr); + rxe_av_fill_ip_info(av, attr); + memcpy(av->dmac, attr->roce.dmac, ETH_ALEN); +} + int rxe_av_chk_attr(struct rxe_dev *rxe, struct rdma_ah_attr *attr) { struct rxe_port *port; diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index a65fbd0ff42e..b517a6c86922 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -35,6 +35,7 @@ #define RXE_LOC_H /* rxe_av.c */ +void rxe_init_av(struct rdma_ah_attr *attr, struct rxe_av *av); int rxe_av_chk_attr(struct rxe_dev *rxe, struct rdma_ah_attr *attr); diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index 8fd03ae20efc..3b162e92e8e8 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -384,9 +384,6 @@ static int prepare4(struct rxe_pkt_info *pkt, struct sk_buff *skb, return -EHOSTUNREACH; } - if (!memcmp(saddr, daddr, sizeof(*daddr))) - pkt->mask |= RXE_LOOPBACK_MASK; - prepare_udp_hdr(skb, cpu_to_be16(qp->src_port), cpu_to_be16(ROCE_V2_UDP_DPORT)); @@ -411,9 +408,6 @@ static int prepare6(struct rxe_pkt_info *pkt, struct sk_buff *skb, return -EHOSTUNREACH; } - if (!memcmp(saddr, daddr, sizeof(*daddr))) - pkt->mask |= RXE_LOOPBACK_MASK; - prepare_udp_hdr(skb, cpu_to_be16(qp->src_port), cpu_to_be16(ROCE_V2_UDP_DPORT)); @@ -437,6 +431,9 @@ int rxe_prepare(struct rxe_pkt_info *pkt, struct sk_buff *skb, u32 *crc) *crc = rxe_icrc_hdr(pkt, skb); + if (ether_addr_equal(skb->dev->dev_addr, av->dmac)) + pkt->mask |= RXE_LOOPBACK_MASK; + return err; } @@ -555,7 +552,7 @@ struct rxe_dev *rxe_net_add(struct net_device *ndev) int err; struct rxe_dev *rxe = NULL; - rxe = (struct rxe_dev *)ib_alloc_device(sizeof(*rxe)); + rxe = ib_alloc_device(rxe_dev, ib_dev); if (!rxe) return NULL; diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index fd86fd2fbb26..be5d76b2bcca 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -631,14 +631,11 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask, qp->attr.qkey = attr->qkey; if (mask & IB_QP_AV) { - rxe_av_from_attr(attr->port_num, &qp->pri_av, &attr->ah_attr); - rxe_av_fill_ip_info(&qp->pri_av, &attr->ah_attr); + rxe_init_av(&attr->ah_attr, &qp->pri_av); } if (mask & IB_QP_ALT_PATH) { - rxe_av_from_attr(attr->alt_port_num, &qp->alt_av, - &attr->alt_ah_attr); - rxe_av_fill_ip_info(&qp->alt_av, &attr->alt_ah_attr); + rxe_init_av(&attr->alt_ah_attr, &qp->alt_av); qp->attr.alt_port_num = attr->alt_port_num; qp->attr.alt_pkey_index = attr->alt_pkey_index; qp->attr.alt_timeout = attr->alt_timeout; diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 3d01247a28db..cc5a05124ece 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -210,13 +210,6 @@ static int rxe_dealloc_pd(struct ib_pd *ibpd) return 0; } -static void rxe_init_av(struct rxe_dev *rxe, struct rdma_ah_attr *attr, - struct rxe_av *av) -{ - rxe_av_from_attr(rdma_ah_get_port_num(attr), av, attr); - rxe_av_fill_ip_info(av, attr); -} - static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr, u32 flags, @@ -239,7 +232,7 @@ static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd, rxe_add_ref(pd); ah->pd = pd; - rxe_init_av(rxe, attr, &ah->av); + rxe_init_av(attr, &ah->av); return &ah->ibah; } @@ -253,7 +246,7 @@ static int rxe_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr) if (err) return err; - rxe_init_av(rxe, attr, &ah->av); + rxe_init_av(attr, &ah->av); return 0; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 5941d660add1..2aa3457a30ce 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -248,7 +248,6 @@ struct ipoib_cm_tx { struct list_head list; struct net_device *dev; struct ipoib_neigh *neigh; - struct ipoib_path *path; struct ipoib_tx_buf *tx_ring; unsigned int tx_head; unsigned int tx_tail; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 0428e01e8f69..aa9dcfc36cd3 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -1312,7 +1312,6 @@ struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path neigh->cm = tx; tx->neigh = neigh; - tx->path = path; tx->dev = dev; list_add(&tx->list, &priv->cm.start_list); set_bit(IPOIB_FLAG_INITIALIZED, &tx->flags); @@ -1371,7 +1370,7 @@ static void ipoib_cm_tx_start(struct work_struct *work) neigh->daddr + QPN_AND_OPTIONS_OFFSET); goto free_neigh; } - memcpy(&pathrec, &p->path->pathrec, sizeof(pathrec)); + memcpy(&pathrec, &path->pathrec, sizeof(pathrec)); spin_unlock_irqrestore(&priv->lock, flags); netif_tx_unlock_bh(dev); diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index 394d1b9c2ff7..2ba70729d7b0 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -145,9 +145,8 @@ static void iser_data_buf_dump(struct iser_data_buf *data, for_each_sg(data->sg, sg, data->dma_nents, i) iser_dbg("sg[%d] dma_addr:0x%lX page:0x%p " "off:0x%x sz:0x%x dma_len:0x%x\n", - i, (unsigned long)ib_sg_dma_address(ibdev, sg), - sg_page(sg), sg->offset, - sg->length, ib_sg_dma_len(ibdev, sg)); + i, (unsigned long)sg_dma_address(sg), + sg_page(sg), sg->offset, sg->length, sg_dma_len(sg)); } static void iser_dump_page_vec(struct iser_page_vec *page_vec) @@ -204,8 +203,8 @@ iser_reg_dma(struct iser_device *device, struct iser_data_buf *mem, reg->rkey = device->pd->unsafe_global_rkey; else reg->rkey = 0; - reg->sge.addr = ib_sg_dma_address(device->ib_device, &sg[0]); - reg->sge.length = ib_sg_dma_len(device->ib_device, &sg[0]); + reg->sge.addr = sg_dma_address(&sg[0]); + reg->sge.length = sg_dma_len(&sg[0]); iser_dbg("Single DMA entry: lkey=0x%x, rkey=0x%x, addr=0x%llx," " length=0x%x\n", reg->sge.lkey, reg->rkey, diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 0f855294ff3f..84184910f038 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -1600,9 +1600,8 @@ static int srp_map_sg_entry(struct srp_map_state *state, { struct srp_target_port *target = ch->target; struct srp_device *dev = target->srp_host->srp_dev; - struct ib_device *ibdev = dev->dev; - dma_addr_t dma_addr = ib_sg_dma_address(ibdev, sg); - unsigned int dma_len = ib_sg_dma_len(ibdev, sg); + dma_addr_t dma_addr = sg_dma_address(sg); + unsigned int dma_len = sg_dma_len(sg); unsigned int len = 0; int ret; @@ -1696,13 +1695,11 @@ static int srp_map_sg_dma(struct srp_map_state *state, struct srp_rdma_ch *ch, int count) { struct srp_target_port *target = ch->target; - struct srp_device *dev = target->srp_host->srp_dev; struct scatterlist *sg; int i; for_each_sg(scat, sg, count, i) { - srp_map_desc(state, ib_sg_dma_address(dev->dev, sg), - ib_sg_dma_len(dev->dev, sg), + srp_map_desc(state, sg_dma_address(sg), sg_dma_len(sg), target->global_rkey); } @@ -1852,8 +1849,8 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, buf->len = cpu_to_be32(data_len); WARN_ON_ONCE((void *)(buf + 1) > (void *)cmd + len); for_each_sg(scat, sg, count, i) { - sge[i].addr = ib_sg_dma_address(ibdev, sg); - sge[i].length = ib_sg_dma_len(ibdev, sg); + sge[i].addr = sg_dma_address(sg); + sge[i].length = sg_dma_len(sg); sge[i].lkey = target->lkey; } req->cmd->num_sge += count; @@ -1874,9 +1871,9 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, struct srp_direct_buf *buf; buf = (void *)cmd->add_data + cmd->add_cdb_len; - buf->va = cpu_to_be64(ib_sg_dma_address(ibdev, scat)); + buf->va = cpu_to_be64(sg_dma_address(scat)); buf->key = cpu_to_be32(target->global_rkey); - buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat)); + buf->len = cpu_to_be32(sg_dma_len(scat)); req->nmdesc = 0; goto map_complete; @@ -3823,6 +3820,7 @@ static ssize_t srp_create_target(struct device *dev, target_host->max_id = 1; target_host->max_lun = -1LL; target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb; + target_host->max_segment_size = ib_dma_max_seg_size(ibdev); target = host_to_target(target_host); |